/[LeafOK_CVS]/lbbs/src/str_process.c
ViewVC logotype

Annotation of /lbbs/src/str_process.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.28 - (hide annotations)
Sat Nov 8 08:21:31 2025 UTC (4 months, 1 week ago) by sysadm
Branch: MAIN
Changes since 1.27: +41 -10 lines
Content type: text/x-csrc
Support dynamic wide-character display width in str_process and editor related functions
Add dynamic / fixed wide-character display width selection

1 sysadm 1.23 /* SPDX-License-Identifier: GPL-3.0-or-later */
2     /*
3     * str_process
4     * - common string processing features with UTF-8 support
5     *
6 sysadm 1.24 * Copyright (C) 2004-2025 Leaflet <leaflet@leafok.com>
7 sysadm 1.23 */
8 sysadm 1.1
9     #include "common.h"
10     #include "log.h"
11 sysadm 1.18 #include "str_process.h"
12 sysadm 1.22 #include <ctype.h>
13 sysadm 1.1 #include <stdio.h>
14 sysadm 1.28 #include <stdlib.h>
15 sysadm 1.1 #include <string.h>
16 sysadm 1.28 #include <wchar.h>
17    
18     int UTF8_fixed_width = 1;
19 sysadm 1.1
20 sysadm 1.20 int str_length(const char *str, int skip_ctrl_seq)
21     {
22 sysadm 1.28 int str_len;
23     char input_str[5];
24     wchar_t wcs[2];
25     int wc_len;
26 sysadm 1.20 int i;
27     char c;
28     int ret = 0;
29    
30     for (i = 0; str[i] != '\0'; i++)
31     {
32     c = str[i];
33    
34     if (c == '\r' || c == '\7') // skip
35     {
36     continue;
37     }
38    
39     if (skip_ctrl_seq && c == '\033' && str[i + 1] == '[') // Skip control sequence
40     {
41 sysadm 1.22 for (i = i + 2; isdigit(str[i]) || str[i] == ';' || str[i] == '?'; i++)
42     ;
43    
44     if (str[i] == 'm') // valid
45     {
46     // skip
47     }
48     else if (isalpha(str[i]))
49     {
50     // unsupported ANSI CSI command
51     }
52     else
53 sysadm 1.20 {
54 sysadm 1.22 i--;
55 sysadm 1.20 }
56 sysadm 1.22
57 sysadm 1.20 continue;
58     }
59    
60     // Process UTF-8 Chinese characters
61 sysadm 1.21 if (c & 0x80) // head of multi-byte character
62 sysadm 1.20 {
63 sysadm 1.28 str_len = 0;
64     c = (char)(c & 0xf0);
65 sysadm 1.21 while (c & 0x80)
66 sysadm 1.20 {
67 sysadm 1.28 input_str[str_len] = str[i + str_len];
68     str_len++;
69 sysadm 1.21 c = (c & 0x7f) << 1;
70 sysadm 1.20 }
71 sysadm 1.28 input_str[str_len] = '\0';
72 sysadm 1.20
73 sysadm 1.28 if (mbstowcs(wcs, input_str, 1) == (size_t)-1)
74     {
75     log_error("mbstowcs(%s) error\n", input_str);
76     }
77     wc_len = (UTF8_fixed_width ? 2 : wcwidth(wcs[0]));
78    
79     i += (str_len - 1);
80     ret += wc_len;
81 sysadm 1.20 }
82     else
83     {
84     ret++;
85     }
86     }
87    
88     return ret;
89     }
90    
91 sysadm 1.17 int split_line(const char *buffer, int max_display_len, int *p_eol, int *p_display_len, int skip_ctrl_seq)
92 sysadm 1.1 {
93 sysadm 1.7 int i;
94 sysadm 1.1 *p_eol = 0;
95 sysadm 1.4 *p_display_len = 0;
96 sysadm 1.11 char c;
97 sysadm 1.28 int str_len;
98     char input_str[5];
99     wchar_t wcs[2];
100     int wc_len;
101 sysadm 1.1
102 sysadm 1.7 for (i = 0; buffer[i] != '\0'; i++)
103 sysadm 1.1 {
104 sysadm 1.11 c = buffer[i];
105 sysadm 1.1
106     if (c == '\r' || c == '\7') // skip
107     {
108     continue;
109     }
110    
111 sysadm 1.17 if (skip_ctrl_seq && c == '\033' && buffer[i + 1] == '[') // Skip control sequence
112 sysadm 1.1 {
113     i += 2;
114 sysadm 1.7 while (buffer[i] != '\0' && buffer[i] != 'm')
115 sysadm 1.1 {
116     i++;
117     }
118     continue;
119     }
120    
121 sysadm 1.21 if (c & 0x80) // head of multi-byte character
122 sysadm 1.1 {
123 sysadm 1.28 str_len = 0;
124     c = (char)(c & 0xf0);
125     while (c & 0x80)
126 sysadm 1.1 {
127 sysadm 1.28 input_str[str_len] = buffer[i + str_len];
128     str_len++;
129     c = (c & 0x7f) << 1;
130 sysadm 1.1 }
131 sysadm 1.28 input_str[str_len] = '\0';
132 sysadm 1.20
133 sysadm 1.28 if (mbstowcs(wcs, input_str, 1) == (size_t)-1)
134     {
135     log_error("mbstowcs(%s) error\n", input_str);
136     }
137     wc_len = (UTF8_fixed_width ? 2 : wcwidth(wcs[0]));
138     if (*p_display_len + wc_len > max_display_len)
139 sysadm 1.20 {
140 sysadm 1.28 break;
141 sysadm 1.20 }
142    
143 sysadm 1.28 i += (str_len - 1);
144     (*p_display_len) += wc_len;
145 sysadm 1.1 }
146     else
147     {
148 sysadm 1.6 if (*p_display_len + 1 > max_display_len)
149 sysadm 1.1 {
150     break;
151     }
152 sysadm 1.4 (*p_display_len)++;
153 sysadm 1.12
154 sysadm 1.14 // \n is regarded as 1 character wide in terminal editor, which is different from Web version
155 sysadm 1.12 if (c == '\n')
156     {
157     i++;
158     *p_eol = 1;
159     break;
160     }
161 sysadm 1.1 }
162     }
163    
164     return i;
165     }
166    
167 sysadm 1.19 long split_data_lines(const char *p_buf, int max_display_len, long *p_line_offsets, long line_offsets_count,
168     int skip_ctrl_seq, int *p_line_widths)
169 sysadm 1.1 {
170 sysadm 1.7 int line_cnt = 0;
171 sysadm 1.13 int len;
172 sysadm 1.1 int end_of_line = 0;
173 sysadm 1.4 int display_len = 0;
174    
175 sysadm 1.1 p_line_offsets[line_cnt] = 0L;
176    
177 sysadm 1.13 do
178 sysadm 1.1 {
179 sysadm 1.17 len = split_line(p_buf, max_display_len, &end_of_line, &display_len, skip_ctrl_seq);
180 sysadm 1.8
181 sysadm 1.19 if (p_line_widths)
182     {
183     p_line_widths[line_cnt] = display_len;
184     }
185    
186 sysadm 1.8 // Exceed max_line_cnt
187 sysadm 1.9 if (line_cnt + 1 >= line_offsets_count)
188 sysadm 1.1 {
189 sysadm 1.16 // log_error("Line count %d reaches limit %d\n", line_cnt + 1, line_offsets_count);
190 sysadm 1.8 return line_cnt;
191 sysadm 1.1 }
192 sysadm 1.8
193     p_line_offsets[line_cnt + 1] = p_line_offsets[line_cnt] + len;
194     line_cnt++;
195     p_buf += len;
196 sysadm 1.27 } while (p_buf[0] != '\0' || end_of_line);
197 sysadm 1.1
198     return line_cnt;
199     }
200 sysadm 1.14
201 sysadm 1.17 int str_filter(char *buffer, int skip_ctrl_seq)
202 sysadm 1.14 {
203     int i;
204     int j;
205    
206     for (i = 0, j = 0; buffer[i] != '\0'; i++)
207     {
208 sysadm 1.15 if (buffer[i] == '\r' || buffer[i] == '\7') // skip
209 sysadm 1.14 {
210     continue;
211     }
212    
213 sysadm 1.17 if (skip_ctrl_seq && buffer[i] == '\033' && buffer[i + 1] == '[') // Skip control sequence
214 sysadm 1.14 {
215     i += 2;
216     while (buffer[i] != '\0' && buffer[i] != 'm')
217     {
218     i++;
219     }
220     continue;
221     }
222    
223     buffer[j] = buffer[i];
224     j++;
225     }
226    
227     buffer[j] = '\0';
228    
229     return j;
230     }

webmaster@leafok.com
ViewVC Help
Powered by ViewVC 1.3.0-beta1