/[LeafOK_CVS]/lbbs/src/str_process.c
ViewVC logotype

Annotation of /lbbs/src/str_process.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.32 - (hide annotations)
Sat Nov 22 10:36:37 2025 UTC (3 months, 3 weeks ago) by sysadm
Branch: MAIN
Changes since 1.31: +3 -1 lines
Content type: text/x-csrc
Refine error log

1 sysadm 1.23 /* SPDX-License-Identifier: GPL-3.0-or-later */
2     /*
3     * str_process
4     * - common string processing features with UTF-8 support
5     *
6 sysadm 1.24 * Copyright (C) 2004-2025 Leaflet <leaflet@leafok.com>
7 sysadm 1.23 */
8 sysadm 1.1
9 sysadm 1.30 #ifdef HAVE_CONFIG_H
10     #include "config.h"
11     #endif
12    
13 sysadm 1.1 #include "common.h"
14     #include "log.h"
15 sysadm 1.18 #include "str_process.h"
16 sysadm 1.22 #include <ctype.h>
17 sysadm 1.1 #include <stdio.h>
18 sysadm 1.28 #include <stdlib.h>
19 sysadm 1.1 #include <string.h>
20 sysadm 1.28 #include <wchar.h>
21    
22     int UTF8_fixed_width = 1;
23 sysadm 1.1
24 sysadm 1.20 int str_length(const char *str, int skip_ctrl_seq)
25     {
26 sysadm 1.28 int str_len;
27     char input_str[5];
28     wchar_t wcs[2];
29     int wc_len;
30 sysadm 1.20 int i;
31     char c;
32     int ret = 0;
33    
34     for (i = 0; str[i] != '\0'; i++)
35     {
36     c = str[i];
37    
38     if (c == '\r' || c == '\7') // skip
39     {
40     continue;
41     }
42    
43     if (skip_ctrl_seq && c == '\033' && str[i + 1] == '[') // Skip control sequence
44     {
45 sysadm 1.31 for (i = i + 2; isdigit((int)str[i]) || str[i] == ';' || str[i] == '?'; i++)
46 sysadm 1.22 ;
47    
48     if (str[i] == 'm') // valid
49     {
50     // skip
51     }
52 sysadm 1.31 else if (isalpha((int)str[i]))
53 sysadm 1.22 {
54     // unsupported ANSI CSI command
55     }
56     else
57 sysadm 1.20 {
58 sysadm 1.22 i--;
59 sysadm 1.20 }
60 sysadm 1.22
61 sysadm 1.20 continue;
62     }
63    
64     // Process UTF-8 Chinese characters
65 sysadm 1.21 if (c & 0x80) // head of multi-byte character
66 sysadm 1.20 {
67 sysadm 1.28 str_len = 0;
68     c = (char)(c & 0xf0);
69 sysadm 1.21 while (c & 0x80)
70 sysadm 1.20 {
71 sysadm 1.28 input_str[str_len] = str[i + str_len];
72     str_len++;
73 sysadm 1.21 c = (c & 0x7f) << 1;
74 sysadm 1.20 }
75 sysadm 1.28 input_str[str_len] = '\0';
76 sysadm 1.20
77 sysadm 1.28 if (mbstowcs(wcs, input_str, 1) == (size_t)-1)
78     {
79 sysadm 1.29 #ifdef _DEBUG
80 sysadm 1.28 log_error("mbstowcs(%s) error\n", input_str);
81 sysadm 1.29 #endif
82     wc_len = (UTF8_fixed_width ? 2 : 1); // Fallback
83     }
84     else
85     {
86     wc_len = (UTF8_fixed_width ? 2 : wcwidth(wcs[0]));
87 sysadm 1.28 }
88    
89     i += (str_len - 1);
90     ret += wc_len;
91 sysadm 1.20 }
92     else
93     {
94     ret++;
95     }
96     }
97    
98     return ret;
99     }
100    
101 sysadm 1.17 int split_line(const char *buffer, int max_display_len, int *p_eol, int *p_display_len, int skip_ctrl_seq)
102 sysadm 1.1 {
103 sysadm 1.7 int i;
104 sysadm 1.1 *p_eol = 0;
105 sysadm 1.4 *p_display_len = 0;
106 sysadm 1.11 char c;
107 sysadm 1.28 int str_len;
108     char input_str[5];
109     wchar_t wcs[2];
110     int wc_len;
111 sysadm 1.1
112 sysadm 1.7 for (i = 0; buffer[i] != '\0'; i++)
113 sysadm 1.1 {
114 sysadm 1.11 c = buffer[i];
115 sysadm 1.1
116     if (c == '\r' || c == '\7') // skip
117     {
118     continue;
119     }
120    
121 sysadm 1.17 if (skip_ctrl_seq && c == '\033' && buffer[i + 1] == '[') // Skip control sequence
122 sysadm 1.1 {
123     i += 2;
124 sysadm 1.7 while (buffer[i] != '\0' && buffer[i] != 'm')
125 sysadm 1.1 {
126     i++;
127     }
128     continue;
129     }
130    
131 sysadm 1.21 if (c & 0x80) // head of multi-byte character
132 sysadm 1.1 {
133 sysadm 1.28 str_len = 0;
134     c = (char)(c & 0xf0);
135     while (c & 0x80)
136 sysadm 1.1 {
137 sysadm 1.28 input_str[str_len] = buffer[i + str_len];
138     str_len++;
139     c = (c & 0x7f) << 1;
140 sysadm 1.1 }
141 sysadm 1.28 input_str[str_len] = '\0';
142 sysadm 1.20
143 sysadm 1.28 if (mbstowcs(wcs, input_str, 1) == (size_t)-1)
144     {
145 sysadm 1.29 #ifdef _DEBUG
146 sysadm 1.28 log_error("mbstowcs(%s) error\n", input_str);
147 sysadm 1.29 #endif
148     wc_len = (UTF8_fixed_width ? 2 : 1); // Fallback
149 sysadm 1.28 }
150 sysadm 1.29 else
151     {
152     wc_len = (UTF8_fixed_width ? 2 : wcwidth(wcs[0]));
153     }
154    
155 sysadm 1.28 if (*p_display_len + wc_len > max_display_len)
156 sysadm 1.20 {
157 sysadm 1.28 break;
158 sysadm 1.20 }
159    
160 sysadm 1.28 i += (str_len - 1);
161     (*p_display_len) += wc_len;
162 sysadm 1.1 }
163     else
164     {
165 sysadm 1.6 if (*p_display_len + 1 > max_display_len)
166 sysadm 1.1 {
167     break;
168     }
169 sysadm 1.4 (*p_display_len)++;
170 sysadm 1.12
171 sysadm 1.14 // \n is regarded as 1 character wide in terminal editor, which is different from Web version
172 sysadm 1.12 if (c == '\n')
173     {
174     i++;
175     *p_eol = 1;
176     break;
177     }
178 sysadm 1.1 }
179     }
180    
181     return i;
182     }
183    
184 sysadm 1.19 long split_data_lines(const char *p_buf, int max_display_len, long *p_line_offsets, long line_offsets_count,
185     int skip_ctrl_seq, int *p_line_widths)
186 sysadm 1.1 {
187 sysadm 1.7 int line_cnt = 0;
188 sysadm 1.13 int len;
189 sysadm 1.1 int end_of_line = 0;
190 sysadm 1.4 int display_len = 0;
191    
192 sysadm 1.1 p_line_offsets[line_cnt] = 0L;
193    
194 sysadm 1.13 do
195 sysadm 1.1 {
196 sysadm 1.17 len = split_line(p_buf, max_display_len, &end_of_line, &display_len, skip_ctrl_seq);
197 sysadm 1.8
198 sysadm 1.19 if (p_line_widths)
199     {
200     p_line_widths[line_cnt] = display_len;
201     }
202    
203 sysadm 1.8 // Exceed max_line_cnt
204 sysadm 1.9 if (line_cnt + 1 >= line_offsets_count)
205 sysadm 1.1 {
206 sysadm 1.32 #ifdef _DEBUG
207     log_error("Line count %d reaches limit %d\n", line_cnt + 1, line_offsets_count);
208     #endif
209 sysadm 1.8 return line_cnt;
210 sysadm 1.1 }
211 sysadm 1.8
212     p_line_offsets[line_cnt + 1] = p_line_offsets[line_cnt] + len;
213     line_cnt++;
214     p_buf += len;
215 sysadm 1.27 } while (p_buf[0] != '\0' || end_of_line);
216 sysadm 1.1
217     return line_cnt;
218     }
219 sysadm 1.14
220 sysadm 1.17 int str_filter(char *buffer, int skip_ctrl_seq)
221 sysadm 1.14 {
222     int i;
223     int j;
224    
225     for (i = 0, j = 0; buffer[i] != '\0'; i++)
226     {
227 sysadm 1.15 if (buffer[i] == '\r' || buffer[i] == '\7') // skip
228 sysadm 1.14 {
229     continue;
230     }
231    
232 sysadm 1.17 if (skip_ctrl_seq && buffer[i] == '\033' && buffer[i + 1] == '[') // Skip control sequence
233 sysadm 1.14 {
234     i += 2;
235     while (buffer[i] != '\0' && buffer[i] != 'm')
236     {
237     i++;
238     }
239     continue;
240     }
241    
242     buffer[j] = buffer[i];
243     j++;
244     }
245    
246     buffer[j] = '\0';
247    
248     return j;
249     }

webmaster@leafok.com
ViewVC Help
Powered by ViewVC 1.3.0-beta1