--- lbbs/src/str_process.c 2025/10/18 12:06:10 1.21 +++ lbbs/src/str_process.c 2025/12/18 02:56:01 1.33 @@ -1,27 +1,32 @@ -/*************************************************************************** - str_process.c - description - ------------------- - Copyright : (C) 2004-2025 by Leaflet - Email : leaflet@leafok.com - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 3 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +/* + * str_process + * - common string processing features with UTF-8 support + * + * Copyright (C) 2004-2025 Leaflet + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include "common.h" #include "log.h" #include "str_process.h" +#include #include +#include #include +#include + +int UTF8_fixed_width = 1; int str_length(const char *str, int skip_ctrl_seq) { + int str_len; + char input_str[5]; + wchar_t wcs[2]; + int wc_len; int i; char c; int ret = 0; @@ -37,25 +42,50 @@ int str_length(const char *str, int skip if (skip_ctrl_seq && c == '\033' && str[i + 1] == '[') // Skip control sequence { - i += 2; - while (str[i] != '\0' && str[i] != 'm') + for (i = i + 2; isdigit((int)str[i]) || str[i] == ';' || str[i] == '?'; i++) + ; + + if (str[i] == 'm') // valid { - i++; + // skip + } + else if (isalpha((int)str[i])) + { + // unsupported ANSI CSI command + } + else + { + i--; } + continue; } // Process UTF-8 Chinese characters if (c & 0x80) // head of multi-byte character { - c = (c & 0x70) << 1; + str_len = 0; + c = (char)(c & 0xf0); while (c & 0x80) { - i++; + input_str[str_len] = str[i + str_len]; + str_len++; c = (c & 0x7f) << 1; } + input_str[str_len] = '\0'; - ret += 2; + if (mbstowcs(wcs, input_str, 1) == (size_t)-1) + { + log_debug("mbstowcs(%s) error\n", input_str); + wc_len = (UTF8_fixed_width ? 2 : 1); // Fallback + } + else + { + wc_len = (UTF8_fixed_width ? 2 : wcwidth(wcs[0])); + } + + i += (str_len - 1); + ret += wc_len; } else { @@ -72,6 +102,10 @@ int split_line(const char *buffer, int m *p_eol = 0; *p_display_len = 0; char c; + int str_len; + char input_str[5]; + wchar_t wcs[2]; + int wc_len; for (i = 0; buffer[i] != '\0'; i++) { @@ -94,19 +128,33 @@ int split_line(const char *buffer, int m if (c & 0x80) // head of multi-byte character { - if (*p_display_len + 2 > max_display_len) + str_len = 0; + c = (char)(c & 0xf0); + while (c & 0x80) { - break; + input_str[str_len] = buffer[i + str_len]; + str_len++; + c = (c & 0x7f) << 1; } + input_str[str_len] = '\0'; - c = (c & 0x70) << 1; - while (c & 0x80) + if (mbstowcs(wcs, input_str, 1) == (size_t)-1) { - i++; - c = (c & 0x7f) << 1; + log_debug("mbstowcs(%s) error\n", input_str); + wc_len = (UTF8_fixed_width ? 2 : 1); // Fallback + } + else + { + wc_len = (UTF8_fixed_width ? 2 : wcwidth(wcs[0])); + } + + if (*p_display_len + wc_len > max_display_len) + { + break; } - (*p_display_len) += 2; + i += (str_len - 1); + (*p_display_len) += wc_len; } else { @@ -151,14 +199,14 @@ long split_data_lines(const char *p_buf, // Exceed max_line_cnt if (line_cnt + 1 >= line_offsets_count) { - // log_error("Line count %d reaches limit %d\n", line_cnt + 1, line_offsets_count); + log_debug("Line count %d reaches limit %d\n", line_cnt + 1, line_offsets_count); return line_cnt; } p_line_offsets[line_cnt + 1] = p_line_offsets[line_cnt] + len; line_cnt++; p_buf += len; - } while (p_buf[0] != '\0'); + } while (p_buf[0] != '\0' || end_of_line); return line_cnt; }