massive updates to multibyte/wide character support; deal with multibyte

characters and strings instead of wide characters and strings as much as possible, and move multibyte/wide character-specific functions into their own source file, chars.c git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2248 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
2005-01-12 03:25:57 +00:00 · 2005-01-12 03:25:57 +00:00 · b54155c4a4
--- a/20
+++ b/20
@ -87,6 +87,23 @@ CVS code -
 	  do_statusbar_input() and do_statusbar_output(); new functions
 	  keys_to_buffer(), unparse_kbinput(), and
 	  do_statusbar_verbatim_input(). (DLR)
+	- Yet more steps toward full wide character/multibyte character
+	  support.  Overhaul the functions that already have support for
+	  them to work with multibyte strings as much as possible, add
+	  support to a few more functions as well, and move multibyte
+	  character-specific functions to their own source file.  New
+	  file chars.c; new functions is_blank_mbchar(),
+	  is_blank_wchar(), is_cntrl_mbchar(), is_cntrl_wchar(),
+	  control_mbrep(), control_wrep(), mbwidth(), mb_cur_max(), and
+	  make_mbchar(); changes to is_blank_char() (moved to chars.c),
+	  is_cntrl_char() (moved to chars.c), parse_char() (renamed
+	  parse_mbchar() and moved to chars.c), do_verbatim_input(),
+	  do_delete(), do_tab(), do_input(), do_output(), get_buffer(),
+	  unget_input(), unget_kbinput(), get_input(), parse_kbinput(),
+	  unparse_kbinput(), parse_verbatim_kbinput(),
+	  do_statusbar_input(), do_statusbar_verbatim_kbinput(),
+	  do_statusbar_output(), and display_string(); removal of
+	  buffer_to_keys() and keys_to_buffer(). (DLR)
 - cut.c:
  do_cut_text()
 	- If keep_cutbuffer is FALSE, only blow away the text in the
@ -209,6 +226,7 @@ CVS code -
 	  obsolete and it defines a struct termio that we don't use
 	  anywhere. (DLR)
 	- Typo fixes. (DLR)
+	- Add checks for iswblank(), mblen(), and wctype.h. (DLR)
 - doc/faq.html:
 	- Remove now-inaccurate note about verbatim input's not working
 	  at prompts, and update its description to mention that it
@ -219,6 +237,8 @@ CVS code -
 	  display.  Since ASCII is technically only seven bits wide,
 	  characters 128-255 aren't ASCII. (DLR, suggested by Michael
 	  Piefel)
+- src/Makefile.am:
+	- Add chars.c to nano_SOURCES. (DLR)

 GNU nano 1.3.5 - 2004.11.22
 - General:
--- a/configure.ac
+++ b/configure.ac
@ -40,7 +40,7 @@ AM_GNU_GETTEXT([external], [need-ngettext])

 dnl Checks for header files.
 AC_HEADER_STDC
-AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h termios.h wchar.h)
+AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h termios.h wchar.h wctype.h)
 AC_CHECK_HEADER(regex.h,
    AC_MSG_CHECKING([for broken regexec])
    AC_TRY_RUN([
@ -291,7 +291,7 @@ AC_MSG_WARN([*** Can not use slang when cross-compiling])),
    esac], [AC_MSG_RESULT(no)])

 dnl Checks for functions
-AC_CHECK_FUNCS(snprintf vsnprintf isblank strcasecmp strncasecmp strcasestr strnlen getline getdelim mbtowc wctomb wcwidth)
+AC_CHECK_FUNCS(snprintf vsnprintf isblank iswblank strcasecmp strncasecmp strcasestr strnlen getline getdelim mblen mbtowc wctomb wcwidth)
 if test "x$ac_cv_func_snprintf" = "xno" -o "x$ac_cv_func_vsnprintf" = "xno"
 then
 	AM_PATH_GLIB_2_0(2.0.0,,
@ -357,9 +357,9 @@ then
 	LDFLAGS="$LDFLAGS $GLIB_LIBS"
 fi

-if test "x$CURSES_LIB_WIDE" = "xyes" -a "x$ac_cv_func_mbtowc" = "xyes" -a "x$ac_cv_func_wctomb" = "xyes" -a "x$ac_cv_func_wcwidth" = "xyes"
+if test "x$CURSES_LIB_WIDE" = "xyes" -a "x$ac_cv_func_mblen" = "xyes" -a "x$ac_cv_func_mbtowc" = "xyes" -a "x$ac_cv_func_wctomb" = "xyes" -a "x$ac_cv_func_wcwidth" = "xyes"
 then
-	AC_DEFINE(NANO_WIDE, 1, [Define this if your system has wide character support (a wide curses library, mbtowc(), wctomb(), and wcwidth()).])
+	AC_DEFINE(NANO_WIDE, 1, [Define this if your system has wide character support (a wide curses library, mblen(), mbtowc(), wctomb(), and wcwidth()).])
 else
 	AC_MSG_WARN([Insufficient wide character support found.  nano will not be able to support UTF-8.])
 fi
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -4,7 +4,8 @@ INCLUDES = -Iintl -DLOCALEDIR=\"$(localedir)\" -DSYSCONFDIR=\"$(sysconfdir)\"
 ACLOCAL_AMFLAGS = -I m4

 bin_PROGRAMS = 	nano
-nano_SOURCES =	color.c \
+nano_SOURCES =	chars.c \
+		color.c \
 		cut.c \
 		files.c \
 		global.c \
--- a/src/chars.c
+++ b/src/chars.c
@ -0,0 +1,352 @@
+/* $Id$ */
+/**************************************************************************
+ *   chars.c                                                              *
+ *                                                                        *
+ *   Copyright (C) 2005 Chris Allegretta                                  *
+ *   This program is free software; you can redistribute it and/or modify *
+ *   it under the terms of the GNU General Public License as published by *
+ *   the Free Software Foundation; either version 2, or (at your option)  *
+ *   any later version.                                                   *
+ *                                                                        *
+ *   This program is distributed in the hope that it will be useful,      *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of       *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        *
+ *   GNU General Public License for more details.                         *
+ *                                                                        *
+ *   You should have received a copy of the GNU General Public License    *
+ *   along with this program; if not, write to the Free Software          *
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.            *
+ *                                                                        *
+ **************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <assert.h>
+#include "proto.h"
+#include "nano.h"
+
+#if defined(HAVE_WCHAR_H) && defined(NANO_WIDE)
+#include <wchar.h>
+#endif
+
+#if defined(HAVE_WCTYPE_H) && defined(NANO_WIDE)
+#include <wctype.h>
+#endif
+
+/* This function is equivalent to isblank(). */
+bool is_blank_char(unsigned char c)
+{
+    return
+#ifdef HAVE_ISBLANK
+	isblank(c)
+#else
+	isspace(c) && (c == '\t' || !is_cntrl_char(c))
+#endif
+	;
+}
+
+/* This function is equivalent to isblank() for multibyte characters. */
+bool is_blank_mbchar(const char *c)
+{
+    assert(c != NULL);
+
+#ifdef NANO_WIDE
+    if (!ISSET(NO_UTF8)) {
+	wchar_t wc;
+	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
+
+	if (c_mb_len <= 0) {
+	    mbtowc(NULL, NULL, 0);
+	    wc = (unsigned char)*c;
+	}
+
+	return is_blank_wchar(wc);
+    } else
+#endif
+	return is_blank_char((unsigned char)*c);
+}
+
+#ifdef NANO_WIDE
+/* This function is equivalent to isblank() for wide characters. */
+bool is_blank_wchar(wchar_t wc)
+{
+    return
+#ifdef HAVE_ISWBLANK
+	iswblank(wc)
+#else
+	iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc))
+#endif
+	;
+}
+#endif
+
+/* This function is equivalent to iscntrl(), except in that it also
+ * handles control characters with their high bits set. */
+bool is_cntrl_char(unsigned char c)
+{
+    return (c < 32) || (127 <= c && c < 160);
+}
+
+/* This function is equivalent to iscntrl() for multibyte characters,
+ * except in that it also handles multibyte control characters with
+ * their high bits set. */
+bool is_cntrl_mbchar(const char *c)
+{
+    assert(c != NULL);
+
+#ifdef NANO_WIDE
+    if (!ISSET(NO_UTF8)) {
+	wchar_t wc;
+	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
+
+	if (c_mb_len <= 0) {
+	    mbtowc(NULL, NULL, 0);
+	    wc = (unsigned char)*c;
+	}
+
+	return is_cntrl_wchar(wc);
+    } else
+#endif
+	return is_cntrl_char((unsigned char)*c);
+}
+
+#ifdef NANO_WIDE
+/* This function is equivalent to iscntrl() for wide characters, except
+ * in that it also handles wide control characters with their high bits
+ * set. */
+bool is_cntrl_wchar(wchar_t wc)
+{
+    return (0 <= wc && wc < 32) || (127 <= wc && wc < 160);
+}
+#endif
+
+/* c is a control character.  It displays as ^@, ^?, or ^[ch] where ch
+ * is c + 64.  We return that character. */
+unsigned char control_rep(unsigned char c)
+{
+    /* Treat newlines embedded in a line as encoded nulls. */
+    if (c == '\n')
+	return '@';
+    else if (c == NANO_CONTROL_8)
+	return '?';
+    else
+	return c + 64;
+}
+
+/* c is a multibyte control character.  It displays as ^@, ^?, or ^[ch]
+ * where ch is c + 64.  We return that multibyte character. */
+char *control_mbrep(const char *c, char *crep, int *crep_len)
+{
+    assert(c != NULL);
+
+#ifdef NANO_WIDE
+    if (!ISSET(NO_UTF8)) {
+	wchar_t wc, wcrep;
+	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX), crep_mb_len;
+
+	if (c_mb_len <= 0) {
+	    mbtowc(NULL, NULL, 0);
+	    wc = *c;
+	}
+
+	wcrep = control_wrep(wc);
+
+	crep_mb_len = wctomb(crep, wcrep);
+
+	if (crep_mb_len <= 0) {
+	    wctomb(NULL, 0);
+	    crep_mb_len = 0;
+	}
+
+	*crep_len = crep_mb_len;
+
+	return crep;
+    } else {
+#endif
+	*crep_len = 1;
+	crep[0] = control_rep((unsigned char)*c);
+
+	return crep;
+#ifdef NANO_WIDE
+    }
+#endif
+}
+
+#ifdef NANO_WIDE
+/* c is a wide control character.  It displays as ^@, ^?, or ^[ch] where
+ * ch is c + 64.  We return that wide character. */
+wchar_t control_wrep(wchar_t wc)
+{
+    /* Treat newlines embedded in a line as encoded nulls. */
+    if (wc == '\n')
+	return '@';
+    else if (wc == NANO_CONTROL_8)
+	return '?';
+    else
+	return wc + 64;
+}
+#endif
+
+/* This function is equivalent to wcwidth() for multibyte characters. */
+int mbwidth(const char *c)
+{
+    assert(c != NULL);
+
+#ifdef NANO_WIDE
+    if (!ISSET(NO_UTF8)) {
+	wchar_t wc;
+	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX), width;
+
+	if (c_mb_len <= 0) {
+	    mbtowc(NULL, NULL, 0);
+	    wc = (unsigned char)*c;
+	}
+
+	width = wcwidth(wc);
+	if (width == -1)
+	    width++;
+
+	return width;
+    } else
+#endif
+	return 1;
+}
+
+/* Return the maximum width in bytes of a multibyte character. */
+int mb_cur_max(void)
+{
+#ifdef NANO_WIDE
+    if (!ISSET(NO_UTF8))
+	return MB_CUR_MAX;
+    else
+#endif
+	return 1;
+}
+
+/* Convert the value in chr to a multibyte character with the same
+ * wide character value as chr.  Return the multibyte character and its
+ * length. */
+char *make_mbchar(unsigned int chr, char *chr_mb, int *chr_mb_len)
+{
+#ifdef NANO_WIDE
+    if (!ISSET(NO_UTF8)) {
+	*chr_mb_len = wctomb(chr_mb, chr);
+
+	if (*chr_mb_len <= 0) {
+	    mbtowc(NULL, NULL, 0);
+	    *chr_mb_len = 1;
+	    chr_mb[0] = (unsigned char)chr;
+	}
+    } else {
+#endif
+	*chr_mb_len = 1;
+	chr_mb[0] = (unsigned char)chr;
+#ifdef NANO_WIDE
+    }
+#endif
+
+    return chr_mb;
+}
+
+/* Parse a multibyte character from buf.  Return the number of bytes
+ * used.  If chr isn't NULL, store the multibyte character in it.  If
+ * bad_chr isn't NULL, set it to TRUE if we have a null byte or a bad
+ * multibyte character.  If col isn't NULL, store the new display width
+ * in it.  If *str is '\t', we expect col to have the current display
+ * width. */
+int parse_mbchar(const char *buf, char *chr
+#ifdef NANO_WIDE
+	, bool *bad_chr
+#endif
+	, size_t *col)
+{
+    int buf_mb_len;
+
+    assert(buf != NULL);
+
+#ifdef NANO_WIDE
+    if (bad_chr != NULL)
+	*bad_chr = FALSE;
+
+    if (!ISSET(NO_UTF8)) {
+	/* Get the number of bytes in the multibyte character. */
+	buf_mb_len = mblen(buf, MB_CUR_MAX);
+
+	/* If buf contains a null byte or an invalid multibyte
+	 * character, interpret buf's first byte and set bad_chr to
+	 * TRUE. */
+	if (buf_mb_len <= 0) {
+	    mblen(NULL, 0);
+	    buf_mb_len = 1;
+	    if (bad_chr != NULL)
+		*bad_chr = TRUE;
+	}
+
+	/* Save the multibyte character in chr. */
+	if (chr != NULL) {
+	    int i;
+	    for (i = 0; i < buf_mb_len; i++)
+		chr[i] = buf[i];
+	}
+
+	/* Save the column width of the wide character in col. */
+	if (col != NULL) {
+	    /* If we have a tab, get its width in columns using the
+	     * current value of col. */
+	    if (*buf == '\t')
+		*col += tabsize - *col % tabsize;
+	    /* If we have a control character, get its width using one
+	     * column for the "^" that will be displayed in front of it,
+	     * and the width in columns of its visible equivalent as
+	     * returned by control_rep(). */
+	    else if (is_cntrl_mbchar(buf)) {
+		char *ctrl_buf_mb = charalloc(mb_cur_max());
+		int ctrl_buf_mb_len;
+
+		(*col)++;
+
+		ctrl_buf_mb = control_mbrep(buf, ctrl_buf_mb,
+			&ctrl_buf_mb_len);
+
+		*col += mbwidth(ctrl_buf_mb);
+
+		free(ctrl_buf_mb);
+	    /* If we have a normal character, get its width in columns
+	     * normally. */
+	    } else
+		*col += mbwidth(buf);
+	}
+    } else {
+#endif
+	/* Get the number of bytes in the byte character. */
+	buf_mb_len = 1;
+
+	/* Save the byte character in chr. */
+	if (chr != NULL)
+	    *chr = *buf;
+
+	if (col != NULL) {
+	    /* If we have a tab, get its width in columns using the
+	     * current value of col. */
+	    if (*buf == '\t')
+		*col += tabsize - *col % tabsize;
+	    /* If we have a control character, it's two columns wide:
+	     * one column for the "^" that will be displayed in front of
+	     * it, and one column for its visible equivalent as returned
+	     * by control_rep(). */
+	    else if (is_cntrl_char((unsigned char)*buf))
+		*col += 2;
+	    /* If we have a normal character, it's one column wide. */
+	    else
+		(*col)++;
+	}
+#ifdef NANO_WIDE
+    }
+#endif
+
+    return buf_mb_len;
+}
--- a/src/files.c
+++ b/src/files.c
@ -2175,7 +2175,7 @@ char *input_tab(char *buf, int place, bool *lastwastab, int *newplace,
 	tmp = matchbuf;

 	/* skip any leading white space */
-	while (*tmp && isblank(*tmp))
+	while (*tmp && is_blank_char(*tmp))
 	    ++tmp;

 	/* Free up any memory already allocated */
--- a/src/move.c
+++ b/src/move.c
@ -2,7 +2,7 @@
 /**************************************************************************
 *   move.c                                                               *
 *                                                                        *
- *   Copyright (C) 1999-2004 Chris Allegretta                             *
+ *   Copyright (C) 1999-2005 Chris Allegretta                             *
 *   This program is free software; you can redistribute it and/or modify *
 *   it under the terms of the GNU General Public License as published by *
 *   the Free Software Foundation; either version 2, or (at your option)  *
@ -58,7 +58,7 @@ void do_home(void)
    if (ISSET(SMART_HOME)) {
 	size_t current_x_save = current_x;

-	for (current_x = 0; isblank(current->data[current_x]) &&
+	for (current_x = 0; is_blank_char(current->data[current_x]) &&
 		current->data[current_x] != '\0'; current_x++)
 	    ;

--- a/src/nano.c
+++ b/src/nano.c
@ -1144,8 +1144,9 @@ bool open_pipe(const char *command)

 void do_verbatim_input(void)
 {
-    int *kbinput;	/* Used to hold verbatim input. */
-    size_t kbinput_len;	/* Length of verbatim input. */
+    int *kbinput;
+    size_t kbinput_len, i;
+    char *output;

    statusbar(_("Verbatim input"));

@ -1153,9 +1154,15 @@ void do_verbatim_input(void)
    kbinput = get_verbatim_kbinput(edit, &kbinput_len);

    /* Display all the verbatim characters at once. */
-    do_output(kbinput, kbinput_len);
+    output = charalloc(kbinput_len + 1);

-    free(kbinput);
+    for (i = 0; i < kbinput_len; i++)
+	output[i] = (char)kbinput[i];
+    output[i] = '\0';
+
+    do_output(output, kbinput_len);
+
+    free(output);
 }

 void do_backspace(void)
@ -1178,7 +1185,7 @@ void do_delete(void)
    placewewant = xplustabs();

    if (current->data[current_x] != '\0') {
-	int char_len = parse_char(current->data + current_x, NULL
+	int char_buf_len = parse_mbchar(current->data + current_x, NULL
 #ifdef NANO_WIDE
 		, NULL
 #endif
@ -1189,15 +1196,15 @@ void do_delete(void)

 	/* Let's get dangerous. */
 	charmove(&current->data[current_x],
-		&current->data[current_x + char_len],
-		line_len - char_len + 1);
+		&current->data[current_x + char_buf_len],
+		line_len - char_buf_len + 1);

-	null_at(&current->data, current_x + line_len - char_len);
+	null_at(&current->data, current_x + line_len - char_buf_len);
 #ifndef NANO_SMALL
 	if (current_x < mark_beginx && mark_beginbuf == current)
-	    mark_beginx -= char_len;
+	    mark_beginx -= char_buf_len;
 #endif
-	totsize -= char_len;
+	totsize -= char_buf_len;
    } else if (current != filebot && (current->next != filebot ||
 	current->data[0] == '\0')) {
 	/* We can delete the line before filebot only if it is blank: it
@ -1251,8 +1258,9 @@ void do_delete(void)

 void do_tab(void)
 {
-    int kbinput = '\t';
-    do_output(&kbinput, 1);
+    char *kbinput = "\t";
+
+    do_output(kbinput, 1);
 }

 /* Someone hits return *gasp!* */
@ -1455,7 +1463,7 @@ bool do_wrap(filestruct *inptr)
    wrap_line = inptr->data + i;
    for (; i < len; i++, wrap_line++) {
 	/* Record where the last word ended. */
-	if (!isblank(*wrap_line))
+	if (!is_blank_char(*wrap_line))
 	    word_back = i;
 	/* If we have found a legal wrap point and the current word
 	 * extends too far, then we stop. */
@ -1463,7 +1471,7 @@ bool do_wrap(filestruct *inptr)
 		strnlenpt(inptr->data, word_back + 1) > fill)
 	    break;
 	/* We record the latest legal wrap point. */
-	if (word_back != i && !isblank(wrap_line[1]))
+	if (word_back != i && !is_blank_char(wrap_line[1]))
 	    wrap_loc = i;
    }
    if (i == len)
@ -1536,7 +1544,7 @@ bool do_wrap(filestruct *inptr)
 	 * between after_break and wrap_line.  If the line already ends
 	 * in a tab or a space, we don't add a space and decrement
 	 * totsize to account for that. */
-	if (!isblank(newline[new_line_len - 1]))
+	if (!is_blank_char(newline[new_line_len - 1]))
 	    strcat(newline, " ");
 	else
 	    totsize--;
@ -2172,7 +2180,7 @@ size_t indent_length(const char *line)
    size_t len = 0;

    assert(line != NULL);
-    while (isblank(*line)) {
+    while (is_blank_char(*line)) {
 	line++;
 	len++;
    }
@ -2200,7 +2208,7 @@ void justify_format(filestruct *line, size_t skip)
    assert(line != NULL);
    assert(line->data != NULL);
    assert(skip < strlen(line->data));
-    assert(!isblank(line->data[skip]));
+    assert(!is_blank_char(line->data[skip]));

    back = line->data + skip;
    for (front = back; ; front++) {
@ -2497,10 +2505,10 @@ bool breakable(const char *line, ssize_t goal)
    while (*line != '\0' && goal >= 0) {
 	size_t pos = 0;

-	if (isblank(*line))
+	if (is_blank_char(*line))
 	    return TRUE;

-	line += parse_char(line, NULL
+	line += parse_mbchar(line, NULL
 #ifdef NANO_WIDE
 		, NULL
 #endif
@ -2538,7 +2546,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)

 	assert(*line != '\t');

-	line_len = parse_char(line, NULL
+	line_len = parse_mbchar(line, NULL
 #ifdef NANO_WIDE
 		, NULL
 #endif
@ -3468,7 +3476,16 @@ int do_input(bool *meta_key, bool *func_key, bool *s_or_t, bool
 	    if (kbinput != NULL) {
 		/* Display all the characters in the input buffer at
 		 * once. */
-		do_output(kbinput, kbinput_len);
+		char *output = charalloc(kbinput_len + 1);
+		size_t i;
+
+		for (i = 0; i < kbinput_len; i++)
+		    output[i] = (char)kbinput[i];
+		output[i] = '\0';
+
+		do_output(output, kbinput_len);
+
+		free(output);

 		/* Empty the input buffer. */
 		kbinput_len = 0;
@ -3588,55 +3605,45 @@ bool do_mouse(void)
 }
 #endif /* !DISABLE_MOUSE */

-/* The user typed kbinput_len wide characters.  Add them to the edit
- * buffer as multibyte characters. */
-void do_output(int *kbinput, size_t kbinput_len)
+/* The user typed kbinput_len multibyte characters.  Add them to the
+ * edit buffer. */
+void do_output(char *output, size_t output_len)
 {
-    size_t i, current_len = strlen(current->data);
+    size_t current_len = strlen(current->data), i = 0;
    bool old_constupdate = ISSET(CONSTUPDATE);
    bool do_refresh = FALSE;
 	/* Do we have to call edit_refresh(), or can we get away with
 	 * update_line()? */

-    char *key =
-#ifdef NANO_WIDE
-	!ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) :
-#endif
-	charalloc(1);
+    char *char_buf = charalloc(mb_cur_max());
+    int char_buf_len;

    assert(current != NULL && current->data != NULL);

    /* Turn off constant cursor position display. */
    UNSET(CONSTUPDATE);

-    for (i = 0; i < kbinput_len; i++) {
-	int key_len;
-
+    while (i < output_len) {
 	/* Null to newline, if needed. */
-	if (kbinput[i] == '\0')
-	    kbinput[i] = '\n';
+	if (output[i] == '\0')
+	    output[i] = '\n';
 	/* Newline to Enter, if needed. */
-	else if (kbinput[i] == '\n') {
+	else if (output[i] == '\n') {
 	    do_enter();
+	    i++;
 	    continue;
 	}

+	/* Interpret the next multibyte character.  If it's an invalid
+	 * multibyte character, interpret it as though it's a byte
+	 * character. */
+	char_buf_len = parse_mbchar(output + i, char_buf
 #ifdef NANO_WIDE
-	/* Change the wide character to its multibyte value.  If it's
-	 * invalid, go on to the next character. */
-	if (!ISSET(NO_UTF8)) {
-	    key_len = wctomb(key, (wchar_t)kbinput[i]);
+		, NULL
+#endif
+		, NULL);

-	    if (key_len == -1)
-		continue;
-	/* Interpret the character as a single-byte sequence. */
-	} else {
-#endif
-	    key_len = 1;
-	    key[0] = (unsigned char)kbinput[i];
-#ifdef NANO_WIDE
-	}
-#endif
+	i += char_buf_len;

 	/* When a character is inserted on the current magicline, it
 	 * means we need a new one! */
@ -3644,30 +3651,30 @@ void do_output(int *kbinput, size_t kbinput_len)
 	    new_magicline();

 	/* More dangerousness fun =) */
-	current->data = charealloc(current->data,
-		current_len + (key_len * 2));
+	current->data = charealloc(current->data, current_len +
+		(char_buf_len * 2));

 	assert(current_x <= current_len);

-	charmove(&current->data[current_x + key_len],
+	charmove(&current->data[current_x + char_buf_len],
 		&current->data[current_x],
-		current_len - current_x + key_len);
-	charcpy(&current->data[current_x], key, key_len);
-	current_len += key_len;
-	totsize += key_len;
+		current_len - current_x + char_buf_len);
+	charcpy(&current->data[current_x], char_buf, char_buf_len);
+	current_len += char_buf_len;
+	totsize += char_buf_len;
 	set_modified();

 #ifndef NANO_SMALL
 	/* Note that current_x has not yet been incremented. */
 	if (current == mark_beginbuf && current_x < mark_beginx)
-	    mark_beginx += key_len;
+	    mark_beginx += char_buf_len;
 #endif

 	do_right(FALSE);

 #ifndef DISABLE_WRAPPING
 	/* If we're wrapping text, we need to call edit_refresh(). */
-	if (!ISSET(NO_WRAP) && kbinput[i] != '\t') {
+	if (!ISSET(NO_WRAP) && output[i] != '\t') {
 	    bool do_refresh_save = do_refresh;

 	    do_refresh = do_wrap(current);
@ -3692,7 +3699,7 @@ void do_output(int *kbinput, size_t kbinput_len)
    if (old_constupdate)
 	SET(CONSTUPDATE);

-    free(key);
+    free(char_buf);

    if (do_refresh)
 	edit_refresh();
--- a/src/nano.h
+++ b/src/nano.h
@ -100,12 +100,8 @@
 # endif
 #endif

-/* If no isblank(), strcasecmp(), strncasecmp(), strcasestr(),
- * strnlen(), getdelim(), or getline(), use the versions we have. */
-#ifndef HAVE_ISBLANK
-#define isblank is_blank_char
-#endif
-
+/* If no strcasecmp(), strncasecmp(), strcasestr(), strnlen(),
+ * getdelim(), or getline(), use the versions we have. */
 #ifndef HAVE_STRCASECMP
 #define strcasecmp nstricmp
 #endif
@ -161,11 +157,6 @@ typedef enum {
 } topmidnone;

 /* Structure types. */
-typedef struct buffer {
-    int key;
-    bool key_code;
-} buffer;
-
 typedef struct filestruct {
    char *data;
    struct filestruct *next;	/* Next node. */
--- a/src/proto.h
+++ b/src/proto.h
@ -150,6 +150,31 @@ extern char *homedir;

 /* Functions we want available. */

+/* Public functions in chars.c. */
+bool is_blank_char(unsigned char c);
+bool is_blank_mbchar(const char *c);
+#ifdef NANO_WIDE
+bool is_blank_wchar(wchar_t wc);
+#endif
+bool is_cntrl_char(unsigned char c);
+bool is_cntrl_mbchar(const char *c);
+#ifdef NANO_WIDE
+bool is_cntrl_wchar(wchar_t wc);
+#endif
+unsigned char control_rep(unsigned char c);
+char *control_mbrep(const char *c, char *crep, int *crep_len);
+#ifdef NANO_WIDE
+wchar_t control_wrep(wchar_t c);
+#endif
+int mbwidth(const char *c);
+int mb_cur_max(void);
+char *make_mbchar(unsigned int chr, char *chr_mb, int *chr_mb_len);
+int parse_mbchar(const char *buf, char *chr
+#ifdef NANO_WIDE
+	, bool *bad_chr
+#endif
+	, size_t *col);
+
 /* Public functions in color.c. */
 #ifdef ENABLE_COLOR
 void set_colorpairs(void);
@ -396,7 +421,7 @@ int do_input(bool *meta_key, bool *func_key, bool *s_or_t, bool
 #ifndef DISABLE_MOUSE
 bool do_mouse(void);
 #endif
-void do_output(int *kbinput, size_t kbinput_len);
+void do_output(char *output, size_t output_len);

 /* Public functions in rcfile.c. */
 #ifdef ENABLE_NANORC
@ -470,19 +495,9 @@ int regexec_safe(const regex_t *preg, const char *string, size_t nmatch,
 #endif
 int regexp_bol_or_eol(const regex_t *preg, const char *string);
 #endif
-#ifndef HAVE_ISBLANK
-int is_blank_char(int c);
-#endif
-int is_cntrl_char(int c);
-bool is_byte_char(int c);
 int num_of_digits(int n);
-unsigned char control_rep(unsigned char c);
+bool is_byte(int c);
 bool parse_num(const char *str, ssize_t *val);
-int parse_char(const char *buf, int *chr
-#ifdef NANO_WIDE
-	, bool *bad_chr
-#endif
-	, size_t *col);
 size_t move_left(const char *buf, size_t pos);
 size_t move_right(const char *buf, size_t pos);
 void align(char **strp);
@ -541,18 +556,16 @@ void reset_kbinput(void);
 #endif
 void get_buffer(WINDOW *win);
 size_t get_buffer_len(void);
-int *buffer_to_keys(buffer *input, size_t input_len);
-buffer *keys_to_buffer(int *input, size_t input_len);
-void unget_input(buffer *input, size_t input_len);
+void unget_input(int *input, size_t input_len);
 void unget_kbinput(int kbinput, bool meta_key, bool func_key);
-buffer *get_input(WINDOW *win, size_t input_len);
+int *get_input(WINDOW *win, size_t input_len);
 int get_kbinput(WINDOW *win, bool *meta_key, bool *func_key);
 int parse_kbinput(WINDOW *win, bool *meta_key, bool *func_key
 #ifndef NANO_SMALL
 	, bool reset
 #endif
 	);
-int get_escape_seq_kbinput(const int *sequence, size_t seq_len, bool
+int get_escape_seq_kbinput(const int *seq, size_t seq_len, bool
 	*ignore_seq);
 int get_escape_seq_abcd(int kbinput);
 int get_byte_kbinput(int kbinput
@ -566,7 +579,7 @@ int get_word_kbinput(int kbinput
 #endif
 	);
 int get_control_kbinput(int kbinput);
-void unparse_kbinput(size_t pos, int *kbinput, size_t kbinput_len);
+void unparse_kbinput(char *output, size_t output_len);
 int *get_verbatim_kbinput(WINDOW *win, size_t *kbinput_len);
 int *parse_verbatim_kbinput(WINDOW *win, size_t *kbinput_len);
 #ifndef DISABLE_MOUSE
@ -590,7 +603,7 @@ void do_statusbar_backspace(void);
 void do_statusbar_delete(void);
 void do_statusbar_cut_text(void);
 void do_statusbar_verbatim_input(bool *got_enter);
-void do_statusbar_output(int *kbinput, size_t kbinput_len, bool
+void do_statusbar_output(char *output, size_t output_len, bool
 	*got_enter);
 size_t xplustabs(void);
 size_t actual_x(const char *str, size_t xplus);
--- a/src/rcfile.c
+++ b/src/rcfile.c
@ -126,7 +126,7 @@ void rcfile_error(const char *msg, ...)
 /* Parse the next word from the string.  Returns NULL if we hit EOL. */
 char *parse_next_word(char *ptr)
 {
-    while (!isblank(*ptr) && *ptr != '\n' && *ptr != '\0')
+    while (!is_blank_char(*ptr) && *ptr != '\n' && *ptr != '\0')
 	ptr++;

    if (*ptr == '\0')
@ -135,7 +135,7 @@ char *parse_next_word(char *ptr)
    /* Null terminate and advance ptr */
    *ptr++ = 0;

-    while (isblank(*ptr))
+    while (is_blank_char(*ptr))
 	ptr++;

    return ptr;
@ -175,7 +175,7 @@ char *parse_argument(char *ptr)
 	ptr = last_quote + 1;
    }
    if (ptr != NULL)
-	while (isblank(*ptr))
+	while (is_blank_char(*ptr))
 	    ptr++;
    return ptr;
 }
@ -233,7 +233,7 @@ char *parse_next_regex(char *ptr)
    /* Null terminate and advance ptr. */
    *ptr++ = '\0';

-    while (isblank(*ptr))
+    while (is_blank_char(*ptr))
 	ptr++;

    return ptr;
@ -477,7 +477,7 @@ void parse_rcfile(FILE *rcstream)
    while (fgets(buf, 1023, rcstream) != 0) {
 	lineno++;
 	ptr = buf;
-	while (isblank(*ptr))
+	while (is_blank_char(*ptr))
 	    ptr++;

 	if (*ptr == '\n' || *ptr == '\0')
--- a/src/utils.c
+++ b/src/utils.c
@ -33,10 +33,6 @@
 #include "proto.h"
 #include "nano.h"

-#if defined(HAVE_WCHAR_H) && defined(NANO_WIDE)
-#include <wchar.h>
-#endif
-
 #ifdef HAVE_REGEX_H
 #ifdef BROKEN_REGEXEC
 int regexec_safe(const regex_t *preg, const char *string, size_t nmatch,
@ -56,29 +52,6 @@ int regexp_bol_or_eol(const regex_t *preg, const char *string)
 }
 #endif /* HAVE_REGEX_H */

-#ifndef HAVE_ISBLANK
-/* This function is equivalent to isblank(). */
-int is_blank_char(int c)
-{
-    return isspace(c) && (!is_cntrl_char(c) || c == '\t');
-}
-#endif
-
-/* This function is equivalent to iscntrl(), except in that it also
- * handles control characters with their high bits set. */
-int is_cntrl_char(int c)
-{
-    return (-128 <= c && c < -96) || (0 <= c && c < 32) ||
-	(127 <= c && c < 160);
-}
-
-/* Return TRUE if the character c is in byte range, and FALSE
- * otherwise. */
-bool is_byte_char(int c)
-{
-    return (unsigned int)c == (unsigned char)c;
-}
-
 int num_of_digits(int n)
 {
    int i = 1;
@ -94,17 +67,9 @@ int num_of_digits(int n)
    return i;
 }

-/* c is a control character.  It displays as ^@, ^?, or ^[ch] where ch
- * is c + 64.  We return that character. */
-unsigned char control_rep(unsigned char c)
+bool is_byte(int c)
 {
-    /* Treat newlines embedded in a line as encoded nulls. */
-    if (c == '\n')
-	return '@';
-    else if (c == NANO_CONTROL_8)
-	return '?';
-    else
-	return c + 64;
+    return ((unsigned int)c == (unsigned char)c);
 }

 /* Read a ssize_t from str, and store it in *val (if val is not NULL).
@ -128,116 +93,6 @@ bool parse_num(const char *str, ssize_t *val)
    return TRUE;
 }

-/* Parse a multibyte character from buf.  Return the number of bytes
- * used.  If chr isn't NULL, store the wide character in it.  If
- * bad_chr isn't NULL, set it to TRUE if we have a null byte or a bad
- * multibyte character.  If col isn't NULL, store the new display width
- * in it.  If *str is '\t', we expect col to have the current display
- * width. */
-int parse_char(const char *buf, int *chr
-#ifdef NANO_WIDE
-	, bool *bad_chr
-#endif
-	, size_t *col)
-{
-    int wide_buf, mb_buf_len;
-
-    assert(buf != NULL);
-
-#ifdef NANO_WIDE
-    if (bad_chr != NULL)
-	*bad_chr = FALSE;
-
-    if (!ISSET(NO_UTF8)) {
-	wchar_t tmp;
-
-	/* Get the wide character equivalent of the multibyte
-	 * character. */
-	mb_buf_len = mbtowc(&tmp, buf, MB_CUR_MAX);
-	wide_buf = (int)tmp;
-
-	/* If buf contains a null byte or an invalid multibyte
-	 * character, interpret buf's first byte as a single-byte
-	 * sequence and set bad_chr to TRUE. */
-	if (mb_buf_len <= 0) {
-	    mb_buf_len = 1;
-	    wide_buf = (unsigned char)*buf;
-	    if (bad_chr != NULL)
-		*bad_chr = TRUE;
-	}
-
-	/* Save the wide character in chr. */
-	if (chr != NULL)
-	    *chr = wide_buf;
-
-	/* Save the column width of the wide character in col. */
-	if (col != NULL) {
-	    /* If we have a tab, get its width in columns using the
-	     * current value of col. */
-	    if (wide_buf == '\t')
-		*col += tabsize - *col % tabsize;
-	    /* If we have a control character, get its width using one
-	     * column for the "^" that will be displayed in front of it,
-	     * and the width in columns of its visible equivalent as
-	     * returned by control_rep(). */
-	    else if (is_cntrl_char(wide_buf)) {
-		char *ctrl_mb_buf = charalloc(MB_CUR_MAX);
-
-		(*col)++;
-		wide_buf = control_rep((unsigned char)wide_buf);
-
-		if (wctomb(ctrl_mb_buf, (wchar_t)wide_buf) != -1) {
-		    int width = wcwidth((wchar_t)wide_buf);
-
-		    if (width != -1)
-			*col += width;
-		}
-		else
-		    (*col)++;
-
-		free(ctrl_mb_buf);
-	    /* If we have a normal character, get its width in columns
-	     * normally. */
-	    } else {
-		int width = wcwidth((wchar_t)wide_buf);
-
-		if (width != -1)
-		    *col += width;
-	    }
-	}
-    } else {
-#endif
-	/* Interpret buf's first character as a single-byte sequence. */
-	mb_buf_len = 1;
-	wide_buf = (unsigned char)*buf;
-
-	/* Save the single-byte sequence in chr as though it's a wide
-	 * character. */
-	if (chr != NULL)
-	    *chr = wide_buf;
-
-	if (col != NULL) {
-	    /* If we have a tab, get its width in columns using the
-	     * current value of col. */
-	    if (wide_buf == '\t')
-		*col += tabsize - *col % tabsize;
-	    /* If we have a control character, it's two columns wide:
-	     * one column for the "^" that will be displayed in front of
-	     * it, and one column for its visible equivalent as returned
-	     * by control_rep(). */
-	    else if (is_cntrl_char(wide_buf))
-		*col += 2;
-	    /* If we have a normal character, it's one column wide. */
-	    else
-		(*col)++;
-	}
-#ifdef NANO_WIDE
-    }
-#endif
-
-    return mb_buf_len;
-}
-
 /* Return the index in buf of the beginning of the character before the
 * one at pos. */
 size_t move_left(const char *buf, size_t pos)
@ -249,16 +104,16 @@ size_t move_left(const char *buf, size_t pos)
    /* There is no library function to move backward one multibyte
     * character.  Here is the naive, O(pos) way to do it. */
    while (TRUE) {
-	int mb_buf_len = parse_char(buf + pos - pos_prev, NULL
+	int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL
 #ifdef NANO_WIDE
 		, NULL
 #endif
 		, NULL);

-	if (pos_prev <= mb_buf_len)
+	if (pos_prev <= buf_mb_len)
 	    break;

-	pos_prev -= mb_buf_len;
+	pos_prev -= buf_mb_len;
    }

    return pos - pos_prev;
@ -268,7 +123,7 @@ size_t move_left(const char *buf, size_t pos)
 * one at pos. */
 size_t move_right(const char *buf, size_t pos)
 {
-    return pos + parse_char(buf + pos, NULL
+    return pos + parse_mbchar(buf + pos, NULL
 #ifdef NANO_WIDE
 	, NULL
 #endif
--- a/src/winio.c
+++ b/src/winio.c