--- lbbs/src/str_process.c 2025/05/06 05:31:26 1.3 +++ lbbs/src/str_process.c 2025/10/18 12:06:10 1.21 @@ -14,120 +14,182 @@ * * ***************************************************************************/ -#include "str_process.h" #include "common.h" #include "log.h" +#include "str_process.h" #include #include -unsigned int split_line(const char *buffer, int max_len, int *p_eol) +int str_length(const char *str, int skip_ctrl_seq) { - size_t len = strnlen(buffer, LINE_BUFFER_LEN); - int display_len = 0; - unsigned int i = 0; - *p_eol = 0; - - if (len == 0) - { - return 0; - } + int i; + char c; + int ret = 0; - for (; i < len; i++) + for (i = 0; str[i] != '\0'; i++) { - char c = buffer[i]; + c = str[i]; if (c == '\r' || c == '\7') // skip { continue; } - if (c == '\n') + if (skip_ctrl_seq && c == '\033' && str[i + 1] == '[') // Skip control sequence { - i++; - *p_eol = 1; - break; + i += 2; + while (str[i] != '\0' && str[i] != 'm') + { + i++; + } + continue; } - if (c == '\033' && buffer[i + 1] == '[') // Skip control sequence + // Process UTF-8 Chinese characters + if (c & 0x80) // head of multi-byte character + { + c = (c & 0x70) << 1; + while (c & 0x80) + { + i++; + c = (c & 0x7f) << 1; + } + + ret += 2; + } + else + { + ret++; + } + } + + return ret; +} + +int split_line(const char *buffer, int max_display_len, int *p_eol, int *p_display_len, int skip_ctrl_seq) +{ + int i; + *p_eol = 0; + *p_display_len = 0; + char c; + + for (i = 0; buffer[i] != '\0'; i++) + { + c = buffer[i]; + + if (c == '\r' || c == '\7') // skip + { + continue; + } + + if (skip_ctrl_seq && c == '\033' && buffer[i + 1] == '[') // Skip control sequence { i += 2; - while (i < len && buffer[i] != 'm') + while (buffer[i] != '\0' && buffer[i] != 'm') { i++; } continue; } - if (c > 127 && c <= 255) // GBK chinese character + if (c & 0x80) // head of multi-byte character { - if (display_len + 2 > max_len) + if (*p_display_len + 2 > max_display_len) { - *p_eol = 1; break; } - i++; - display_len += 2; + + c = (c & 0x70) << 1; + while (c & 0x80) + { + i++; + c = (c & 0x7f) << 1; + } + + (*p_display_len) += 2; } else { - if (display_len >= max_len) + if (*p_display_len + 1 > max_display_len) + { + break; + } + (*p_display_len)++; + + // \n is regarded as 1 character wide in terminal editor, which is different from Web version + if (c == '\n') { + i++; *p_eol = 1; break; } - display_len++; } } return i; } -unsigned int split_file_lines(FILE *fin, int max_len, long *p_line_offsets, int max_line_cnt) +long split_data_lines(const char *p_buf, int max_display_len, long *p_line_offsets, long line_offsets_count, + int skip_ctrl_seq, int *p_line_widths) { - char buffer[LINE_BUFFER_LEN]; - char *p_buf = buffer; - unsigned int line_cnt = 0; - unsigned int len = 0; + int line_cnt = 0; + int len; int end_of_line = 0; + int display_len = 0; + p_line_offsets[line_cnt] = 0L; - while (fgets(p_buf, (int)(sizeof(buffer) - len), fin)) + do { - p_buf = buffer; - while (1) + len = split_line(p_buf, max_display_len, &end_of_line, &display_len, skip_ctrl_seq); + + if (p_line_widths) { - len = split_line(p_buf, max_len, &end_of_line); + p_line_widths[line_cnt] = display_len; + } - if (len == 0 || !end_of_line) - { - break; - } + // Exceed max_line_cnt + if (line_cnt + 1 >= line_offsets_count) + { + // log_error("Line count %d reaches limit %d\n", line_cnt + 1, line_offsets_count); + return line_cnt; + } - // Exceed max_line_cnt - if (line_cnt + 1 >= max_line_cnt) - { - log_error("File line count %d reaches limit\n", line_cnt + 1); - return line_cnt; - } + p_line_offsets[line_cnt + 1] = p_line_offsets[line_cnt] + len; + line_cnt++; + p_buf += len; + } while (p_buf[0] != '\0'); - p_line_offsets[line_cnt + 1] = p_line_offsets[line_cnt] + len; - line_cnt++; - p_buf += len; + return line_cnt; +} + +int str_filter(char *buffer, int skip_ctrl_seq) +{ + int i; + int j; + + for (i = 0, j = 0; buffer[i] != '\0'; i++) + { + if (buffer[i] == '\r' || buffer[i] == '\7') // skip + { + continue; } - // Move p_buf[0 .. len - 1] to head of buffer - for (int i = 0; i < len; i++) + if (skip_ctrl_seq && buffer[i] == '\033' && buffer[i + 1] == '[') // Skip control sequence { - buffer[i] = p_buf[i]; + i += 2; + while (buffer[i] != '\0' && buffer[i] != 'm') + { + i++; + } + continue; } - p_buf = buffer + len; - } - if (len > 0 && line_cnt + 1 < max_line_cnt) - { - p_line_offsets[line_cnt + 1] = p_line_offsets[line_cnt] + len; - line_cnt++; + buffer[j] = buffer[i]; + j++; } - return line_cnt; + buffer[j] = '\0'; + + return j; }