--- lbbs/src/str_process.c	2025/06/21 02:15:18	1.18
+++ lbbs/src/str_process.c	2025/11/10 12:53:16	1.29
@@ -1,24 +1,98 @@
-/***************************************************************************
-						  str_process.c  -  description
-							 -------------------
-	Copyright            : (C) 2004-2025 by Leaflet
-	Email                : leaflet@leafok.com
- ***************************************************************************/
-
-/***************************************************************************
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 3 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- ***************************************************************************/
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+/*
+ * str_process
+ *   - common string processing features with UTF-8 support
+ *
+ * Copyright (C) 2004-2025  Leaflet <leaflet@leafok.com>
+ */
 
 #include "common.h"
 #include "log.h"
 #include "str_process.h"
+#include <ctype.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
+#include <wchar.h>
+
+int UTF8_fixed_width = 1;
+
+int str_length(const char *str, int skip_ctrl_seq)
+{
+	int str_len;
+	char input_str[5];
+	wchar_t wcs[2];
+	int wc_len;
+	int i;
+	char c;
+	int ret = 0;
+
+	for (i = 0; str[i] != '\0'; i++)
+	{
+		c = str[i];
+
+		if (c == '\r' || c == '\7') // skip
+		{
+			continue;
+		}
+
+		if (skip_ctrl_seq && c == '\033' && str[i + 1] == '[') // Skip control sequence
+		{
+			for (i = i + 2; isdigit(str[i]) || str[i] == ';' || str[i] == '?'; i++)
+				;
+
+			if (str[i] == 'm') // valid
+			{
+				// skip
+			}
+			else if (isalpha(str[i]))
+			{
+				// unsupported ANSI CSI command
+			}
+			else
+			{
+				i--;
+			}
+
+			continue;
+		}
+
+		// Process UTF-8 Chinese characters
+		if (c & 0x80) // head of multi-byte character
+		{
+			str_len = 0;
+			c = (char)(c & 0xf0);
+			while (c & 0x80)
+			{
+				input_str[str_len] = str[i + str_len];
+				str_len++;
+				c = (c & 0x7f) << 1;
+			}
+			input_str[str_len] = '\0';
+
+			if (mbstowcs(wcs, input_str, 1) == (size_t)-1)
+			{
+#ifdef _DEBUG
+				log_error("mbstowcs(%s) error\n", input_str);
+#endif
+				wc_len = (UTF8_fixed_width ? 2 : 1); // Fallback
+			}
+			else
+			{
+				wc_len = (UTF8_fixed_width ? 2 : wcwidth(wcs[0]));
+			}
+
+			i += (str_len - 1);
+			ret += wc_len;
+		}
+		else
+		{
+			ret++;
+		}
+	}
+
+	return ret;
+}
 
 int split_line(const char *buffer, int max_display_len, int *p_eol, int *p_display_len, int skip_ctrl_seq)
 {
@@ -26,6 +100,10 @@ int split_line(const char *buffer, int m
 	*p_eol = 0;
 	*p_display_len = 0;
 	char c;
+	int str_len;
+	char input_str[5];
+	wchar_t wcs[2];
+	int wc_len;
 
 	for (i = 0; buffer[i] != '\0'; i++)
 	{
@@ -46,14 +124,37 @@ int split_line(const char *buffer, int m
 			continue;
 		}
 
-		if (c < 0 || c > 127) // GBK chinese character
+		if (c & 0x80) // head of multi-byte character
 		{
-			if (*p_display_len + 2 > max_display_len)
+			str_len = 0;
+			c = (char)(c & 0xf0);
+			while (c & 0x80)
+			{
+				input_str[str_len] = buffer[i + str_len];
+				str_len++;
+				c = (c & 0x7f) << 1;
+			}
+			input_str[str_len] = '\0';
+
+			if (mbstowcs(wcs, input_str, 1) == (size_t)-1)
+			{
+#ifdef _DEBUG
+				log_error("mbstowcs(%s) error\n", input_str);
+#endif
+				wc_len = (UTF8_fixed_width ? 2 : 1); // Fallback
+			}
+			else
+			{
+				wc_len = (UTF8_fixed_width ? 2 : wcwidth(wcs[0]));
+			}
+
+			if (*p_display_len + wc_len > max_display_len)
 			{
 				break;
 			}
-			i++;
-			(*p_display_len) += 2;
+
+			i += (str_len - 1);
+			(*p_display_len) += wc_len;
 		}
 		else
 		{
@@ -76,7 +177,8 @@ int split_line(const char *buffer, int m
 	return i;
 }
 
-long split_data_lines(const char *p_buf, int max_display_len, long *p_line_offsets, long line_offsets_count, int skip_ctrl_seq)
+long split_data_lines(const char *p_buf, int max_display_len, long *p_line_offsets, long line_offsets_count,
+					  int skip_ctrl_seq, int *p_line_widths)
 {
 	int line_cnt = 0;
 	int len;
@@ -89,6 +191,11 @@ long split_data_lines(const char *p_buf,
 	{
 		len = split_line(p_buf, max_display_len, &end_of_line, &display_len, skip_ctrl_seq);
 
+		if (p_line_widths)
+		{
+			p_line_widths[line_cnt] = display_len;
+		}
+
 		// Exceed max_line_cnt
 		if (line_cnt + 1 >= line_offsets_count)
 		{
@@ -99,7 +206,7 @@ long split_data_lines(const char *p_buf,
 		p_line_offsets[line_cnt + 1] = p_line_offsets[line_cnt] + len;
 		line_cnt++;
 		p_buf += len;
-	} while (p_buf[0] != '\0');
+	} while (p_buf[0] != '\0' || end_of_line);
 
 	return line_cnt;
 }