1
1

miscellaneous cleanups for the multibyte parsing functions

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2242 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
Этот коммит содержится в:
David Lawrence Ramsey 2005-01-07 22:39:43 +00:00
родитель 12054fe11b
Коммит d96851f534
4 изменённых файлов: 114 добавлений и 117 удалений

Просмотреть файл

@ -1178,12 +1178,11 @@ void do_delete(void)
placewewant = xplustabs();
if (current->data[current_x] != '\0') {
int char_len = parse_char(current->data + current_x, NULL,
NULL
int char_len = parse_char(current->data + current_x, NULL
#ifdef NANO_WIDE
, NULL
#endif
);
, NULL);
size_t line_len = strlen(current->data + current_x);
assert(current_x < strlen(current->data));
@ -2501,11 +2500,11 @@ bool breakable(const char *line, ssize_t goal)
if (isblank(*line))
return TRUE;
line += parse_char(line, NULL, &pos
line += parse_char(line, NULL
#ifdef NANO_WIDE
, NULL
#endif
);
, &pos);
goal -= pos;
}
@ -2539,11 +2538,11 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
assert(*line != '\t');
line_len = parse_char(line, NULL, &pos
line_len = parse_char(line, NULL
#ifdef NANO_WIDE
, NULL
#endif
);
, &pos);
goal -= pos;
line += line_len;

Просмотреть файл

@ -478,13 +478,13 @@ bool is_byte_char(int c);
int num_of_digits(int n);
unsigned char control_rep(unsigned char c);
bool parse_num(const char *str, ssize_t *val);
int parse_char(const char *str, int *chr, size_t *col
int parse_char(const char *buf, int *chr
#ifdef NANO_WIDE
, bool *bad_char
, bool *bad_chr
#endif
);
size_t move_left(const char *str, size_t pos);
size_t move_right(const char *str, size_t pos);
, size_t *col);
size_t move_left(const char *buf, size_t pos);
size_t move_right(const char *buf, size_t pos);
void align(char **strp);
void null_at(char **data, size_t index);
void unsunder(char *str, size_t true_len);

Просмотреть файл

@ -128,66 +128,66 @@ bool parse_num(const char *str, ssize_t *val)
return TRUE;
}
/* Parse a multi-byte character from str. Return the number of bytes
* used. If chr isn't NULL, store the wide character in it. If col
* isn't NULL, store the new display width in it. If *str is '\t', we
* expect col to have the current display width. If bad_char isn't
* NULL, set it to TRUE if we have a null byte or a bad multibyte
* character. */
int parse_char(const char *str, int *chr, size_t *col
/* Parse a multibyte character from buf. Return the number of bytes
* used. If chr isn't NULL, store the wide character in it. If
* bad_chr isn't NULL, set it to TRUE if we have a null byte or a bad
* multibyte character. If col isn't NULL, store the new display width
* in it. If *str is '\t', we expect col to have the current display
* width. */
int parse_char(const char *buf, int *chr
#ifdef NANO_WIDE
, bool *bad_char
, bool *bad_chr
#endif
)
, size_t *col)
{
int wide_str, wide_str_len;
int wide_buf, mb_buf_len;
assert(str != NULL);
assert(buf != NULL);
#ifdef NANO_WIDE
if (bad_char != NULL)
*bad_char = FALSE;
if (bad_chr != NULL)
*bad_chr = FALSE;
if (!ISSET(NO_UTF8)) {
wchar_t tmp;
/* Get the wide character equivalent of the multibyte
* character. */
wide_str_len = mbtowc(&tmp, str, MB_CUR_MAX);
wide_str = (int)tmp;
mb_buf_len = mbtowc(&tmp, buf, MB_CUR_MAX);
wide_buf = (int)tmp;
/* If str contains a null byte or an invalid multibyte
* character, interpret str's first byte as a single-byte
* sequence and set bad_char to TRUE. */
if (wide_str_len <= 0) {
wide_str_len = 1;
wide_str = (unsigned char)*str;
if (bad_char != NULL)
*bad_char = TRUE;
/* If buf contains a null byte or an invalid multibyte
* character, interpret buf's first byte as a single-byte
* sequence and set bad_chr to TRUE. */
if (mb_buf_len <= 0) {
mb_buf_len = 1;
wide_buf = (unsigned char)*buf;
if (bad_chr != NULL)
*bad_chr = TRUE;
}
/* Save the wide character in chr. */
if (chr != NULL)
*chr = wide_str;
*chr = wide_buf;
/* Save the column width of the wide character in col. */
if (col != NULL) {
/* If we have a tab, get its width in columns using the
* current value of col. */
if (wide_str == '\t')
if (wide_buf == '\t')
*col += tabsize - *col % tabsize;
/* If we have a control character, get its width using one
* column for the "^" that will be displayed in front of it,
* and the width in columns of its visible equivalent as
* returned by control_rep(). */
else if (is_cntrl_char(wide_str)) {
char *ctrl_wide_str = charalloc(MB_CUR_MAX);
else if (is_cntrl_char(wide_buf)) {
char *ctrl_mb_buf = charalloc(MB_CUR_MAX);
(*col)++;
wide_str = control_rep((unsigned char)wide_str);
wide_buf = control_rep((unsigned char)wide_buf);
if (wctomb(ctrl_wide_str, (wchar_t)wide_str) != -1) {
int width = wcwidth(wide_str);
if (wctomb(ctrl_mb_buf, (wchar_t)wide_buf) != -1) {
int width = wcwidth((wchar_t)wide_buf);
if (width != -1)
*col += width;
@ -195,11 +195,11 @@ int parse_char(const char *str, int *chr, size_t *col
else
(*col)++;
free(ctrl_wide_str);
free(ctrl_mb_buf);
/* If we have a normal character, get its width in columns
* normally. */
} else {
int width = wcwidth(wide_str);
int width = wcwidth((wchar_t)wide_buf);
if (width != -1)
*col += width;
@ -207,25 +207,25 @@ int parse_char(const char *str, int *chr, size_t *col
}
} else {
#endif
/* Interpret str's first character as a single-byte sequence. */
wide_str_len = 1;
wide_str = (unsigned char)*str;
/* Interpret buf's first character as a single-byte sequence. */
mb_buf_len = 1;
wide_buf = (unsigned char)*buf;
/* Save the single-byte sequence in chr as though it's a wide
* character. */
if (chr != NULL)
*chr = wide_str;
*chr = wide_buf;
if (col != NULL) {
/* If we have a tab, get its width in columns using the
* current value of col. */
if (wide_str == '\t')
if (wide_buf == '\t')
*col += tabsize - *col % tabsize;
/* If we have a control character, it's two columns wide:
* one column for the "^" that will be displayed in front of
* it, and one column for its visible equivalent as returned
* by control_rep(). */
else if (is_cntrl_char(wide_str))
else if (is_cntrl_char(wide_buf))
*col += 2;
/* If we have a normal character, it's one column wide. */
else
@ -235,44 +235,44 @@ int parse_char(const char *str, int *chr, size_t *col
}
#endif
return wide_str_len;
return mb_buf_len;
}
/* Return the index in str of the beginning of the character before the
/* Return the index in buf of the beginning of the character before the
* one at pos. */
size_t move_left(const char *str, size_t pos)
size_t move_left(const char *buf, size_t pos)
{
size_t pos_prev = pos;
assert(str != NULL && pos <= strlen(str));
assert(str != NULL && pos <= strlen(buf));
/* There is no library function to move backward one multibyte
* character. Here is the naive, O(pos) way to do it. */
while (TRUE) {
int str_len = parse_char(str + pos - pos_prev, NULL, NULL
int mb_buf_len = parse_char(buf + pos - pos_prev, NULL
#ifdef NANO_WIDE
, NULL
#endif
);
, NULL);
if (pos_prev <= str_len)
if (pos_prev <= mb_buf_len)
break;
pos_prev -= str_len;
pos_prev -= mb_buf_len;
}
return pos - pos_prev;
}
/* Return the index in str of the beginning of the character after the
/* Return the index in buf of the beginning of the character after the
* one at pos. */
size_t move_right(const char *str, size_t pos)
size_t move_right(const char *buf, size_t pos)
{
return pos + parse_char(str + pos, NULL, NULL
return pos + parse_char(buf + pos, NULL
#ifdef NANO_WIDE
, NULL
#endif
);
, NULL);
}
/* Fix the memory allocation for a string. */

Просмотреть файл

@ -1936,11 +1936,11 @@ void do_statusbar_backspace(void)
void do_statusbar_delete(void)
{
if (statusbar_x < statusbar_xend) {
int char_len = parse_char(answer + statusbar_x, NULL, NULL
int char_len = parse_char(answer + statusbar_x, NULL
#ifdef NANO_WIDE
, NULL
#endif
);
, NULL);
charmove(answer + statusbar_x, answer + statusbar_x + char_len,
statusbar_xend - statusbar_x - char_len + 1);
@ -2056,11 +2056,11 @@ size_t actual_x(const char *str, size_t xplus)
assert(str != NULL);
while (*str != '\0') {
int str_len = parse_char(str, NULL, &length
int str_len = parse_char(str, NULL
#ifdef NANO_WIDE
, NULL
#endif
);
, &length);
if (length > xplus)
break;
@ -2085,11 +2085,11 @@ size_t strnlenpt(const char *str, size_t size)
assert(str != NULL);
while (*str != '\0') {
int str_len = parse_char(str, NULL, &length
int str_len = parse_char(str, NULL
#ifdef NANO_WIDE
, NULL
#endif
);
, &length);
str += str_len;
@ -2160,25 +2160,25 @@ size_t display_string_len(const char *buf, size_t start_col, size_t
/* Throughout the loop, we maintain the fact that *buf displays at
* column start_col. */
while (start_col <= end_col && *buf != '\0') {
int wide_buf, wide_buf_len;
size_t old_col = start_col;
int wide_buf, mb_buf_len;
#ifdef NANO_WIDE
bool bad_char;
#endif
size_t old_col = start_col;
wide_buf_len = parse_char(buf, &wide_buf, &start_col
mb_buf_len = parse_char(buf, &wide_buf
#ifdef NANO_WIDE
, &bad_char
#endif
);
, &start_col);
#ifdef NANO_WIDE
/* If buf contains a null byte or an invalid multibyte
* character, interpret that character as though it's a wide
* character. */
if (!ISSET(NO_UTF8) && bad_char) {
char *bad_wide_buf = charalloc(MB_CUR_MAX);
int bad_wide_buf_len;
char *bad_mb_buf = charalloc(MB_CUR_MAX);
int bad_mb_buf_len;
/* If we have a control character, add one byte to account
* for the "^" that will be displayed in front of it, and
@ -2191,12 +2191,12 @@ size_t display_string_len(const char *buf, size_t start_col, size_t
/* Translate the wide character to its multibyte
* equivalent. */
bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf);
bad_mb_buf_len = wctomb(bad_mb_buf, (wchar_t)wide_buf);
if (bad_wide_buf_len != -1)
retval += bad_wide_buf_len;
if (bad_mb_buf_len != -1)
retval += bad_mb_buf_len;
free(bad_wide_buf);
free(bad_mb_buf);
} else {
#endif
/* If we have a tab, get its width in bytes using the
@ -2208,23 +2208,22 @@ size_t display_string_len(const char *buf, size_t start_col, size_t
* then add the number of bytes for its visible equivalent
* as returned by control_rep(). */
else if (is_cntrl_char(wide_buf)) {
char ctrl_wide_buf =
control_rep((unsigned char)wide_buf);
char ctrl_mb_buf = control_rep((unsigned char)wide_buf);
retval++;
retval += parse_char(&ctrl_wide_buf, NULL, NULL
retval += parse_char(&ctrl_mb_buf, NULL
#ifdef NANO_WIDE
, NULL
#endif
);
, NULL);
/* If we have a normal character, add its width in bytes
* normally. */
} else
retval += wide_buf_len;
retval += mb_buf_len;
#ifdef NANO_WIDE
}
buf += wide_buf_len;
buf += mb_buf_len;
#endif
}
@ -2279,43 +2278,43 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
if (column < start_col || (dollars && column > 0 &&
buf[start_index] != '\t')) {
int wide_buf, wide_buf_len;
int wide_buf, mb_buf_len;
/* We don't display all of buf[start_index] since it starts to
* the left of the screen. */
wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL
mb_buf_len = parse_char(buf + start_index, &wide_buf
#ifdef NANO_WIDE
, NULL
#endif
);
, NULL);
if (is_cntrl_char(wide_buf)) {
if (column < start_col) {
char *ctrl_wide_buf =
char *ctrl_mb_buf =
#ifdef NANO_WIDE
!ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) :
#endif
charalloc(1);
int ctrl_wide_buf_len, i;
int ctrl_mb_buf_len, i;
wide_buf = control_rep((unsigned char)wide_buf);
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8))
ctrl_wide_buf_len = wctomb(ctrl_wide_buf,
ctrl_mb_buf_len = wctomb(ctrl_mb_buf,
(wchar_t)wide_buf);
else {
#endif
ctrl_wide_buf_len = 1;
ctrl_wide_buf[0] = (unsigned char)wide_buf;
ctrl_mb_buf_len = 1;
ctrl_mb_buf[0] = (unsigned char)wide_buf;
#ifdef NANO_WIDE
}
#endif
for (i = 0; i < ctrl_wide_buf_len; i++)
converted[index++] = ctrl_wide_buf[i];
for (i = 0; i < ctrl_mb_buf_len; i++)
converted[index++] = ctrl_mb_buf[i];
free(ctrl_wide_buf);
free(ctrl_mb_buf);
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) {
@ -2327,7 +2326,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
#endif
start_col++;
start_index += wide_buf_len;
start_index += mb_buf_len;
}
}
#ifdef NANO_WIDE
@ -2335,22 +2334,22 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
converted[index++] = ' ';
start_col++;
start_index += wide_buf_len;
start_index += mb_buf_len;
}
#endif
}
while (index < alloc_len - 1 && buf[start_index] != '\0') {
int wide_buf, wide_buf_len;
int wide_buf, mb_buf_len;
#ifdef NANO_WIDE
bool bad_char;
#endif
wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL
mb_buf_len = parse_char(buf + start_index, &wide_buf
#ifdef NANO_WIDE
, &bad_char
#endif
);
, NULL);
if (wide_buf == '\t') {
converted[index++] =
@ -2367,12 +2366,12 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
* contains an invalid multibyte control character, interpret
* that character as though it's a normal control character. */
} else if (is_cntrl_char(wide_buf)) {
char *ctrl_wide_buf =
char *ctrl_mb_buf =
#ifdef NANO_WIDE
!ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) :
#endif
charalloc(1);
int ctrl_wide_buf_len, i;
int ctrl_mb_buf_len, i;
converted[index++] = '^';
start_col++;
@ -2380,20 +2379,20 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8))
ctrl_wide_buf_len = wctomb(ctrl_wide_buf,
ctrl_mb_buf_len = wctomb(ctrl_mb_buf,
(wchar_t)wide_buf);
else {
#endif
ctrl_wide_buf_len = 1;
ctrl_wide_buf[0] = (unsigned char)wide_buf;
ctrl_mb_buf_len = 1;
ctrl_mb_buf[0] = (unsigned char)wide_buf;
#ifdef NANO_WIDE
}
#endif
for (i = 0; i < ctrl_wide_buf_len; i++)
converted[index++] = ctrl_wide_buf[i];
for (i = 0; i < ctrl_mb_buf_len; i++)
converted[index++] = ctrl_mb_buf[i];
free(ctrl_wide_buf);
free(ctrl_mb_buf);
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) {
@ -2419,19 +2418,18 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
* character, interpret that character as though it's a
* normal non-control character. */
if (!ISSET(NO_UTF8) && bad_char) {
char *bad_wide_buf = charalloc(MB_CUR_MAX);
int bad_wide_buf_len;
char *bad_mb_buf = charalloc(MB_CUR_MAX);
int bad_mb_buf_len;
bad_wide_buf_len = wctomb(bad_wide_buf,
(wchar_t)wide_buf);
bad_mb_buf_len = wctomb(bad_mb_buf, (wchar_t)wide_buf);
for (i = 0; i < bad_wide_buf_len; i++)
converted[index++] = bad_wide_buf[i];
for (i = 0; i < bad_mb_buf_len; i++)
converted[index++] = bad_mb_buf[i];
free(bad_wide_buf);
free(bad_mb_buf);
} else {
#endif
for (i = 0; i < wide_buf_len; i++)
for (i = 0; i < mb_buf_len; i++)
converted[index++] = buf[start_index + i];
#ifdef NANO_WIDE
}
@ -2446,7 +2444,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
start_col++;
}
start_index += wide_buf_len;
start_index += mb_buf_len;
}
if (index < alloc_len - 1)