/[LeafOK_CVS]/lbbs/src/str_process.c
ViewVC logotype

Contents of /lbbs/src/str_process.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.28 - (show annotations)
Sat Nov 8 08:21:31 2025 UTC (4 months, 1 week ago) by sysadm
Branch: MAIN
Changes since 1.27: +41 -10 lines
Content type: text/x-csrc
Support dynamic wide-character display width in str_process and editor related functions
Add dynamic / fixed wide-character display width selection

1 /* SPDX-License-Identifier: GPL-3.0-or-later */
2 /*
3 * str_process
4 * - common string processing features with UTF-8 support
5 *
6 * Copyright (C) 2004-2025 Leaflet <leaflet@leafok.com>
7 */
8
9 #include "common.h"
10 #include "log.h"
11 #include "str_process.h"
12 #include <ctype.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <wchar.h>
17
18 int UTF8_fixed_width = 1;
19
20 int str_length(const char *str, int skip_ctrl_seq)
21 {
22 int str_len;
23 char input_str[5];
24 wchar_t wcs[2];
25 int wc_len;
26 int i;
27 char c;
28 int ret = 0;
29
30 for (i = 0; str[i] != '\0'; i++)
31 {
32 c = str[i];
33
34 if (c == '\r' || c == '\7') // skip
35 {
36 continue;
37 }
38
39 if (skip_ctrl_seq && c == '\033' && str[i + 1] == '[') // Skip control sequence
40 {
41 for (i = i + 2; isdigit(str[i]) || str[i] == ';' || str[i] == '?'; i++)
42 ;
43
44 if (str[i] == 'm') // valid
45 {
46 // skip
47 }
48 else if (isalpha(str[i]))
49 {
50 // unsupported ANSI CSI command
51 }
52 else
53 {
54 i--;
55 }
56
57 continue;
58 }
59
60 // Process UTF-8 Chinese characters
61 if (c & 0x80) // head of multi-byte character
62 {
63 str_len = 0;
64 c = (char)(c & 0xf0);
65 while (c & 0x80)
66 {
67 input_str[str_len] = str[i + str_len];
68 str_len++;
69 c = (c & 0x7f) << 1;
70 }
71 input_str[str_len] = '\0';
72
73 if (mbstowcs(wcs, input_str, 1) == (size_t)-1)
74 {
75 log_error("mbstowcs(%s) error\n", input_str);
76 }
77 wc_len = (UTF8_fixed_width ? 2 : wcwidth(wcs[0]));
78
79 i += (str_len - 1);
80 ret += wc_len;
81 }
82 else
83 {
84 ret++;
85 }
86 }
87
88 return ret;
89 }
90
91 int split_line(const char *buffer, int max_display_len, int *p_eol, int *p_display_len, int skip_ctrl_seq)
92 {
93 int i;
94 *p_eol = 0;
95 *p_display_len = 0;
96 char c;
97 int str_len;
98 char input_str[5];
99 wchar_t wcs[2];
100 int wc_len;
101
102 for (i = 0; buffer[i] != '\0'; i++)
103 {
104 c = buffer[i];
105
106 if (c == '\r' || c == '\7') // skip
107 {
108 continue;
109 }
110
111 if (skip_ctrl_seq && c == '\033' && buffer[i + 1] == '[') // Skip control sequence
112 {
113 i += 2;
114 while (buffer[i] != '\0' && buffer[i] != 'm')
115 {
116 i++;
117 }
118 continue;
119 }
120
121 if (c & 0x80) // head of multi-byte character
122 {
123 str_len = 0;
124 c = (char)(c & 0xf0);
125 while (c & 0x80)
126 {
127 input_str[str_len] = buffer[i + str_len];
128 str_len++;
129 c = (c & 0x7f) << 1;
130 }
131 input_str[str_len] = '\0';
132
133 if (mbstowcs(wcs, input_str, 1) == (size_t)-1)
134 {
135 log_error("mbstowcs(%s) error\n", input_str);
136 }
137 wc_len = (UTF8_fixed_width ? 2 : wcwidth(wcs[0]));
138 if (*p_display_len + wc_len > max_display_len)
139 {
140 break;
141 }
142
143 i += (str_len - 1);
144 (*p_display_len) += wc_len;
145 }
146 else
147 {
148 if (*p_display_len + 1 > max_display_len)
149 {
150 break;
151 }
152 (*p_display_len)++;
153
154 // \n is regarded as 1 character wide in terminal editor, which is different from Web version
155 if (c == '\n')
156 {
157 i++;
158 *p_eol = 1;
159 break;
160 }
161 }
162 }
163
164 return i;
165 }
166
167 long split_data_lines(const char *p_buf, int max_display_len, long *p_line_offsets, long line_offsets_count,
168 int skip_ctrl_seq, int *p_line_widths)
169 {
170 int line_cnt = 0;
171 int len;
172 int end_of_line = 0;
173 int display_len = 0;
174
175 p_line_offsets[line_cnt] = 0L;
176
177 do
178 {
179 len = split_line(p_buf, max_display_len, &end_of_line, &display_len, skip_ctrl_seq);
180
181 if (p_line_widths)
182 {
183 p_line_widths[line_cnt] = display_len;
184 }
185
186 // Exceed max_line_cnt
187 if (line_cnt + 1 >= line_offsets_count)
188 {
189 // log_error("Line count %d reaches limit %d\n", line_cnt + 1, line_offsets_count);
190 return line_cnt;
191 }
192
193 p_line_offsets[line_cnt + 1] = p_line_offsets[line_cnt] + len;
194 line_cnt++;
195 p_buf += len;
196 } while (p_buf[0] != '\0' || end_of_line);
197
198 return line_cnt;
199 }
200
201 int str_filter(char *buffer, int skip_ctrl_seq)
202 {
203 int i;
204 int j;
205
206 for (i = 0, j = 0; buffer[i] != '\0'; i++)
207 {
208 if (buffer[i] == '\r' || buffer[i] == '\7') // skip
209 {
210 continue;
211 }
212
213 if (skip_ctrl_seq && buffer[i] == '\033' && buffer[i + 1] == '[') // Skip control sequence
214 {
215 i += 2;
216 while (buffer[i] != '\0' && buffer[i] != 'm')
217 {
218 i++;
219 }
220 continue;
221 }
222
223 buffer[j] = buffer[i];
224 j++;
225 }
226
227 buffer[j] = '\0';
228
229 return j;
230 }

webmaster@leafok.com
ViewVC Help
Powered by ViewVC 1.3.0-beta1