handle invalid multibyte characters more efficiently

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2941 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
2005-07-26 06:13:45 +00:00 · 2005-07-26 06:13:45 +00:00 · 96452cb60c
--- a/11
+++ b/11
@ -118,6 +118,11 @@ CVS code -
 - color.c:
 	- Remove unneeded string.h and fcntl.h includes. (DLR)
 - chars.c:
+  mbrep()
+	- New function, the equivalent of control_mbrep() for non-control
+	  characters. (DLR)
+  parse_mbchar()
+	- Remove now-unneeded bad_chr parameter. (DLR)
  mbstrchr()
 	- Don't count matches between valid and invalid multibyte
 	  sequences anymore, for consistency. (DLR)
@ -200,9 +205,6 @@ CVS code -
 	  (DLR)
 	- Move stdlib.h, dirent.h, regex.h, and assert.h includes here,
 	  as every source file needs them. (DLR)
-  proto.h:
-	- Add declarations for bad_mbchar and bad_mbchar_len, so that we
-	  can use them in display_string() as well as chars.c. (DLR)
 - rcfile.c:
  nregcomp()
 	- Return TRUE when the compilation succeeds and FALSE otherwise,
@ -237,6 +239,9 @@ CVS code -
 	  the number of lines and characters in the file or selection,
 	  as wc does. (DLR)
 - winio.c:
+  display_string()
+	- Instead of using parse_mbchar()'s bad_chr parameter, use
+	  mbrep() to get the representation of a bad character. (DLR)
  edit_redraw(), edit_refresh()
 	- Clean up and simplify. (DLR)
  edit_update()
--- a/src/chars.c
+++ b/src/chars.c
@ -41,8 +41,8 @@ static const wchar_t bad_wchar = 0xFFFD;
 	 * Unicode FFFD (Replacement Character), unless we're
 	 * determining if it's a control character or searching for a
 	 * match to it. */
-const char *bad_mbchar = "\xEF\xBF\xBD";
-const int bad_mbchar_len = 3;
+static const char *bad_mbchar = "\xEF\xBF\xBD";
+static const int bad_mbchar_len = 3;
 #endif

 #ifndef HAVE_ISBLANK
@ -241,6 +241,39 @@ char *control_mbrep(const char *c, char *crep, int *crep_len)
    return crep;
 }

+/* c is a multibyte non-control character.  We return that multibyte
+ * character. */
+char *mbrep(const char *c, char *crep, int *crep_len)
+{
+    assert(c != NULL && crep != NULL && crep_len != NULL);
+
+#ifdef ENABLE_UTF8
+    if (ISSET(USE_UTF8)) {
+	wchar_t wc;
+
+	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
+	    mbtowc(NULL, NULL, 0);
+	    crep = (char *)bad_mbchar;
+	    *crep_len = bad_mbchar_len;
+	} else {
+	    *crep_len = wctomb(crep, wc);
+
+	    if (*crep_len < 0) {
+		wctomb(NULL, 0);
+		*crep_len = 0;
+	    }
+	}
+    } else {
+#endif
+	*crep_len = 1;
+	*crep = *c;
+#ifdef ENABLE_UTF8
+    }
+#endif
+
+    return crep;
+}
+
 /* This function is equivalent to wcwidth() for multibyte characters. */
 int mbwidth(const char *c)
 {
@ -310,19 +343,14 @@ char *make_mbchar(int chr, int *chr_mb_len)

 /* Parse a multibyte character from buf.  Return the number of bytes
 * used.  If chr isn't NULL, store the multibyte character in it.  If
- * bad_chr isn't NULL, set it to TRUE if we have a bad multibyte
- * character.  If col isn't NULL, store the new display width in it.  If
- * *str is '\t', we expect col to have the current display width. */
-int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t
-	*col)
+ * col isn't NULL, store the new display width in it.  If *buf is '\t',
+ * we expect col to have the current display width. */
+int parse_mbchar(const char *buf, char *chr, size_t *col)
 {
    int buf_mb_len;

    assert(buf != NULL);

-    if (bad_chr != NULL)
-	*bad_chr = FALSE;
-
 #ifdef ENABLE_UTF8
    if (ISSET(USE_UTF8)) {
 	/* Get the number of bytes in the multibyte character. */
@ -332,8 +360,6 @@ int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t
 	 * to TRUE and interpret buf's first byte. */
 	if (buf_mb_len < 0) {
 	    mblen(NULL, 0);
-	    if (bad_chr != NULL)
-		*bad_chr = TRUE;
 	    buf_mb_len = 1;
 	} else if (buf_mb_len == 0)
 	    buf_mb_len++;
@ -415,8 +441,7 @@ size_t move_mbleft(const char *buf, size_t pos)
    /* There is no library function to move backward one multibyte
     * character.  Here is the naive, O(pos) way to do it. */
    while (TRUE) {
-	int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL, NULL,
-		NULL);
+	int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL, NULL);

 	if (pos_prev <= (size_t)buf_mb_len)
 	    break;
@ -431,7 +456,7 @@ size_t move_mbleft(const char *buf, size_t pos)
 * after the one at pos. */
 size_t move_mbright(const char *buf, size_t pos)
 {
-    return pos + parse_mbchar(buf + pos, NULL, NULL, NULL);
+    return pos + parse_mbchar(buf + pos, NULL, NULL);
 }

 #ifndef HAVE_STRCASECMP
@ -482,7 +507,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
 	    bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
 	    int s1_mb_len, s2_mb_len;

-	    s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
+	    s1_mb_len = parse_mbchar(s1, s1_mb, NULL);

 	    if (mbtowc(&ws1, s1_mb, s1_mb_len) < 0) {
 		mbtowc(NULL, NULL, 0);
@ -490,7 +515,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
 		bad_s1_mb = TRUE;
 	    }

-	    s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
+	    s2_mb_len = parse_mbchar(s2, s2_mb, NULL);

 	    if (mbtowc(&ws2, s2_mb, s2_mb_len) < 0) {
 		mbtowc(NULL, NULL, 0);
@ -556,7 +581,7 @@ const char *mbstrcasestr(const char *haystack, const char *needle)
 	    while (*q != '\0') {
 		bool bad_r_mb = FALSE, bad_q_mb = FALSE;

-		r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
+		r_mb_len = parse_mbchar(r, r_mb, NULL);

 		if (mbtowc(&wr, r_mb, r_mb_len) < 0) {
 		    mbtowc(NULL, NULL, 0);
@ -564,7 +589,7 @@ const char *mbstrcasestr(const char *haystack, const char *needle)
 		    bad_r_mb = TRUE;
 		}

-		q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
+		q_mb_len = parse_mbchar(q, q_mb, NULL);

 		if (mbtowc(&wq, q_mb, q_mb_len) < 0) {
 		    mbtowc(NULL, NULL, 0);
@ -662,7 +687,7 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle,
 	    while (*q != '\0') {
 		bool bad_r_mb = FALSE, bad_q_mb = FALSE;

-		r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
+		r_mb_len = parse_mbchar(r, r_mb, NULL);

 		if (mbtowc(&wr, r_mb, r_mb_len) < 0) {
 		    mbtowc(NULL, NULL, 0);
@ -670,7 +695,7 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle,
 		    bad_r_mb = TRUE;
 		}

-		q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
+		q_mb_len = parse_mbchar(q, q_mb, NULL);

 		if (mbtowc(&wq, q_mb, q_mb_len) < 0) {
 		    mbtowc(NULL, NULL, 0);
@ -740,7 +765,7 @@ size_t mbstrnlen(const char *s, size_t maxlen)
 	int s_mb_len;

 	while (*s != '\0') {
-	    s_mb_len = parse_mbchar(s, NULL, NULL, NULL);
+	    s_mb_len = parse_mbchar(s, NULL, NULL);

 	    if (maxlen == 0)
 		break;
@ -777,7 +802,7 @@ char *mbstrchr(const char *s, char *c)
 	}

 	while (*s != '\0') {
-	    int s_mb_len = parse_mbchar(s, s_mb, NULL, NULL);
+	    int s_mb_len = parse_mbchar(s, s_mb, NULL);

 	    if (mbtowc(&ws, s_mb, s_mb_len) < 0) {
 		mbtowc(NULL, NULL, 0);
@ -832,7 +857,7 @@ bool has_blank_mbchars(const char *s)
 	while (*s != '\0') {
 	    int chr_mb_len;

-	    chr_mb_len = parse_mbchar(s, chr_mb, NULL, NULL);
+	    chr_mb_len = parse_mbchar(s, chr_mb, NULL);

 	    if (is_blank_mbchar(chr_mb)) {
 		retval = TRUE;
--- a/src/files.c
+++ b/src/files.c
@ -2033,9 +2033,9 @@ char *input_tab(char *buf, size_t *place, bool *lastwastab, bool *list)
 		/* Get the number of single-byte characters that all the
 		 * matches have in common. */
 		match1_mb_len = parse_mbchar(matches[0] + common_len,
-			match1_mb, NULL, NULL);
+			match1_mb, NULL);
 		match2_mb_len = parse_mbchar(matches[match] +
-			common_len, match2_mb, NULL, NULL);
+			common_len, match2_mb, NULL);
 		match1_mb[match1_mb_len] = '\0';
 		match2_mb[match2_mb_len] = '\0';
 		if (strcmp(match1_mb, match2_mb) != 0)
@ -2045,8 +2045,7 @@ char *input_tab(char *buf, size_t *place, bool *lastwastab, bool *list)
 	    if (match < num_matches || matches[0][common_len] == '\0')
 		break;

-	    common_len += parse_mbchar(buf + common_len, NULL, NULL,
-		NULL);
+	    common_len += parse_mbchar(buf + common_len, NULL, NULL);
 	}

 	free(match1_mb);
--- a/src/move.c
+++ b/src/move.c
@ -227,7 +227,7 @@ bool do_next_word(bool allow_punct, bool allow_update)
     * the current word. */
    while (!end_line) {
 	char_mb_len = parse_mbchar(openfile->current->data +
-		openfile->current_x, char_mb, NULL, NULL);
+		openfile->current_x, char_mb, NULL);

 	/* If we've found it, stop moving forward through the current
 	 * line. */
@ -254,7 +254,7 @@ bool do_next_word(bool allow_punct, bool allow_update)
 	openfile->current = openfile->current->next) {
 	while (!end_line) {
 	    char_mb_len = parse_mbchar(openfile->current->data +
-		openfile->current_x, char_mb, NULL, NULL);
+		openfile->current_x, char_mb, NULL);

 	    /* If we've found it, stop moving forward through the
 	     * current line. */
@ -322,7 +322,7 @@ bool do_prev_word(bool allow_punct, bool allow_update)
     * of the current word. */
    while (!begin_line) {
 	char_mb_len = parse_mbchar(openfile->current->data +
-		openfile->current_x, char_mb, NULL, NULL);
+		openfile->current_x, char_mb, NULL);

 	/* If we've found it, stop moving backward through the current
 	 * line. */
@ -352,7 +352,7 @@ bool do_prev_word(bool allow_punct, bool allow_update)
 	openfile->current = openfile->current->prev) {
 	while (!begin_line) {
 	    char_mb_len = parse_mbchar(openfile->current->data +
-		openfile->current_x, char_mb, NULL, NULL);
+		openfile->current_x, char_mb, NULL);

 	    /* If we've found it, stop moving backward through the
 	     * current line. */
@ -392,9 +392,8 @@ bool do_prev_word(bool allow_punct, bool allow_update)
 		openfile->current_x);

 	while (!begin_line) {
-	    char_mb_len =
-		parse_mbchar(openfile->current->data +
-		openfile->current_x, char_mb, NULL, NULL);
+	    char_mb_len = parse_mbchar(openfile->current->data +
+		openfile->current_x, char_mb, NULL);

 	    /* If we've found it, stop moving backward through the
 	     * current line. */
--- a/src/nano.c
+++ b/src/nano.c
@ -1778,10 +1778,8 @@ void do_output(char *output, size_t output_len, bool allow_cntrls)
 	    }
 	}

-	/* Interpret the next multibyte character.  If it's an invalid
-	 * multibyte character, interpret it as though it's a byte
-	 * character. */
-	char_buf_len = parse_mbchar(output + i, char_buf, NULL, NULL);
+	/* Interpret the next multibyte character. */
+	char_buf_len = parse_mbchar(output + i, char_buf, NULL);

 	i += char_buf_len;

--- a/src/proto.h
+++ b/src/proto.h
@ -132,11 +132,6 @@ extern bool curses_ended;

 extern char *homedir;

-#ifdef ENABLE_UTF8
-extern const char *bad_mbchar;
-extern const int bad_mbchar_len;
-#endif
-
 /* The functions we want available. */

 /* Public functions in chars.c. */
@ -161,11 +156,11 @@ char control_rep(char c);
 wchar_t control_wrep(wchar_t c);
 #endif
 char *control_mbrep(const char *c, char *crep, int *crep_len);
+char *mbrep(const char *c, char *crep, int *crep_len);
 int mbwidth(const char *c);
 int mb_cur_max(void);
 char *make_mbchar(int chr, int *chr_mb_len);
-int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t
-	*col);
+int parse_mbchar(const char *buf, char *chr, size_t *col);
 size_t move_mbleft(const char *buf, size_t pos);
 size_t move_mbright(const char *buf, size_t pos);
 #ifndef HAVE_STRCASECMP
--- a/src/rcfile.c
+++ b/src/rcfile.c
@ -625,11 +625,10 @@ void parse_rcfile(FILE *rcstream)
 			    } else {
 				whitespace_len[0] =
 					parse_mbchar(whitespace, NULL,
-					NULL, NULL);
+					NULL);
 				whitespace_len[1] =
 					parse_mbchar(whitespace +
-					whitespace_len[0], NULL,
-					NULL, NULL);
+					whitespace_len[0], NULL, NULL);
 			    }
 			} else
 #endif
--- a/src/text.c
+++ b/src/text.c
@ -76,7 +76,7 @@ void do_delete(void)

    if (openfile->current->data[openfile->current_x] != '\0') {
 	int char_buf_len = parse_mbchar(openfile->current->data +
-		openfile->current_x, NULL, NULL, NULL);
+		openfile->current_x, NULL, NULL);
 	size_t line_len = strlen(openfile->current->data +
 		openfile->current_x);

@ -576,7 +576,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool newline)
    while (*line != '\0' && goal >= 0) {
 	size_t pos = 0;

-	line_len = parse_mbchar(line, NULL, NULL, &pos);
+	line_len = parse_mbchar(line, NULL, &pos);

 	if (is_blank_mbchar(line) || (newline && *line == '\n')) {
 	    blank_loc = cur_loc;
@ -599,7 +599,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool newline)
 	bool found_blank = FALSE;

 	while (*line != '\0') {
-	    line_len = parse_mbchar(line, NULL, NULL, NULL);
+	    line_len = parse_mbchar(line, NULL, NULL);

 	    if (is_blank_mbchar(line) || (newline && *line == '\n')) {
 		if (!found_blank)
@ -617,12 +617,12 @@ ssize_t break_line(const char *line, ssize_t goal, bool newline)
    /* Move to the last blank after blank_loc, if there is one. */
    line -= cur_loc;
    line += blank_loc;
-    line_len = parse_mbchar(line, NULL, NULL, NULL);
+    line_len = parse_mbchar(line, NULL, NULL);
    line += line_len;

    while (*line != '\0' && (is_blank_mbchar(line) ||
 	(newline && *line == '\n'))) {
-	line_len = parse_mbchar(line, NULL, NULL, NULL);
+	line_len = parse_mbchar(line, NULL, NULL);

 	line += line_len;
 	blank_loc += line_len;
@ -646,7 +646,7 @@ size_t indent_length(const char *line)
    blank_mb = charalloc(mb_cur_max());

    while (*line != '\0') {
-	blank_mb_len = parse_mbchar(line, blank_mb, NULL, NULL);
+	blank_mb_len = parse_mbchar(line, blank_mb, NULL);

 	if (!is_blank_mbchar(blank_mb))
 	    break;
@ -697,14 +697,14 @@ void justify_format(filestruct *paragraph, size_t skip)
 	/* If this character is blank, make sure that it's a space with
 	 * no blanks after it. */
 	if (is_blank_mbchar(end)) {
-	    end_len = parse_mbchar(end, NULL, NULL, NULL);
+	    end_len = parse_mbchar(end, NULL, NULL);

 	    *new_end = ' ';
 	    new_end++;
 	    end += end_len;

 	    while (*end != '\0' && is_blank_mbchar(end)) {
-		end_len = parse_mbchar(end, NULL, NULL, NULL);
+		end_len = parse_mbchar(end, NULL, NULL);

 		end += end_len;
 		shift += end_len;
@ -722,7 +722,7 @@ void justify_format(filestruct *paragraph, size_t skip)
 	 * more than two blanks after it, and make sure that the blanks
 	 * are spaces. */
 	} else if (mbstrchr(punct, end) != NULL) {
-	    end_len = parse_mbchar(end, NULL, NULL, NULL);
+	    end_len = parse_mbchar(end, NULL, NULL);

 	    while (end_len > 0) {
 		*new_end = *end;
@ -732,7 +732,7 @@ void justify_format(filestruct *paragraph, size_t skip)
 	    }

 	    if (*end != '\0' && mbstrchr(brackets, end) != NULL) {
-		end_len = parse_mbchar(end, NULL, NULL, NULL);
+		end_len = parse_mbchar(end, NULL, NULL);

 		while (end_len > 0) {
 		    *new_end = *end;
@ -743,7 +743,7 @@ void justify_format(filestruct *paragraph, size_t skip)
 	    }

 	    if (*end != '\0' && is_blank_mbchar(end)) {
-		end_len = parse_mbchar(end, NULL, NULL, NULL);
+		end_len = parse_mbchar(end, NULL, NULL);

 		*new_end = ' ';
 		new_end++;
@ -751,7 +751,7 @@ void justify_format(filestruct *paragraph, size_t skip)
 	    }

 	    if (*end != '\0' && is_blank_mbchar(end)) {
-		end_len = parse_mbchar(end, NULL, NULL, NULL);
+		end_len = parse_mbchar(end, NULL, NULL);

 		*new_end = ' ';
 		new_end++;
@ -759,7 +759,7 @@ void justify_format(filestruct *paragraph, size_t skip)
 	    }

 	    while (*end != '\0' && is_blank_mbchar(end)) {
-		end_len = parse_mbchar(end, NULL, NULL, NULL);
+		end_len = parse_mbchar(end, NULL, NULL);

 		end += end_len;
 		shift += end_len;
@ -775,7 +775,7 @@ void justify_format(filestruct *paragraph, size_t skip)
 	/* If this character is neither blank nor punctuation, leave it
 	 * alone. */
 	} else {
-	    end_len = parse_mbchar(end, NULL, NULL, NULL);
+	    end_len = parse_mbchar(end, NULL, NULL);

 	    while (end_len > 0) {
 		*new_end = *end;
--- a/src/utils.c
+++ b/src/utils.c
@ -247,8 +247,8 @@ bool is_whole_word(size_t pos, const char *buf, const char *word)

    assert(buf != NULL && pos <= strlen(buf) && word != NULL);

-    parse_mbchar(buf + move_mbleft(buf, pos), p, NULL, NULL);
-    parse_mbchar(buf + word_end, r, NULL, NULL);
+    parse_mbchar(buf + move_mbleft(buf, pos), p, NULL);
+    parse_mbchar(buf + word_end, r, NULL);

    /* If we're at the beginning of the line or the character before the
     * word isn't a non-punctuation "word" character, and if we're at
--- a/src/winio.c
+++ b/src/winio.c
@ -1862,10 +1862,8 @@ void do_statusbar_output(char *output, size_t output_len, bool
 	    }
 	}

-	/* Interpret the next multibyte character.  If it's an invalid
-	 * multibyte character, interpret it as though it's a byte
-	 * character. */
-	char_buf_len = parse_mbchar(output + i, char_buf, NULL, NULL);
+	/* Interpret the next multibyte character. */
+	char_buf_len = parse_mbchar(output + i, char_buf, NULL);

 	i += char_buf_len;

@ -1935,7 +1933,7 @@ void do_statusbar_delete(void)
 {
    if (answer[statusbar_x] != '\0') {
 	int char_buf_len = parse_mbchar(answer + statusbar_x, NULL,
-		NULL, NULL);
+		NULL);
 	size_t line_len = strlen(answer + statusbar_x);

 	assert(statusbar_x < strlen(answer));
@ -1982,8 +1980,7 @@ bool do_statusbar_next_word(bool allow_punct)
    /* Move forward until we find the character after the last letter of
     * the current word. */
    while (!end_line) {
-	char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL,
-		NULL);
+	char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL);

 	/* If we've found it, stop moving forward through the current
 	 * line. */
@ -2007,8 +2004,7 @@ bool do_statusbar_next_word(bool allow_punct)
 	statusbar_x += char_mb_len;

    while (!end_line) {
-	char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL,
-		NULL);
+	char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL);

 	/* If we've found it, stop moving forward through the current
 	 * line. */
@ -2043,8 +2039,7 @@ bool do_statusbar_prev_word(bool allow_punct)
    /* Move backward until we find the character before the first letter
     * of the current word. */
    while (!begin_line) {
-	char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL,
-		NULL);
+	char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL);

 	/* If we've found it, stop moving backward through the current
 	 * line. */
@ -2069,8 +2064,7 @@ bool do_statusbar_prev_word(bool allow_punct)
 	statusbar_x = move_mbleft(answer, statusbar_x);

    while (!begin_line) {
-	char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL,
-		NULL);
+	char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL);

 	/* If we've found it, stop moving backward through the current
 	 * line. */
@ -2093,7 +2087,7 @@ bool do_statusbar_prev_word(bool allow_punct)

 	while (!begin_line) {
 	    char_mb_len = parse_mbchar(answer + statusbar_x, char_mb,
-		NULL, NULL);
+		NULL);

 	    /* If we've found it, stop moving backward through the
 	     * current line. */
@ -2164,7 +2158,7 @@ size_t actual_x(const char *str, size_t xplus)
    assert(str != NULL);

    while (*str != '\0') {
-	int str_len = parse_mbchar(str, NULL, NULL, &length);
+	int str_len = parse_mbchar(str, NULL, &length);

 	if (length > xplus)
 	    break;
@ -2189,7 +2183,7 @@ size_t strnlenpt(const char *str, size_t size)
    assert(str != NULL);

    while (*str != '\0') {
-	int str_len = parse_mbchar(str, NULL, NULL, &length);
+	int str_len = parse_mbchar(str, NULL, &length);

 	str += str_len;

@ -2281,8 +2275,6 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 	/* The string we return. */
    size_t index;
 	/* Current position in converted. */
-    bool bad_char;
-	/* Whether we have an invalid multibyte character. */

    char *buf_mb = charalloc(mb_cur_max());
    int buf_mb_len;
@ -2311,8 +2303,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 	buf[start_index] != '\t')) {
 	/* We don't display all of buf[start_index] since it starts to
 	 * the left of the screen. */
-	buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL,
-		NULL);
+	buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL);

 	if (is_cntrl_mbchar(buf_mb)) {
 	    if (column < start_col) {
@ -2343,8 +2334,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
    }

    while (index < alloc_len - 1 && buf[start_index] != '\0') {
-	buf_mb_len = parse_mbchar(buf + start_index, buf_mb, &bad_char,
-		NULL);
+	buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL);

 	/* If buf contains a tab character, interpret it. */
 	if (*buf_mb == '\t') {
@ -2394,27 +2384,22 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 #endif
 		converted[index++] = ' '; 
 	    start_col++;
-	/* If buf contains a non-control character, interpret it. */
+	/* If buf contains a non-control character, interpret it.  If
+	 * buf contains an invalid multibyte non-control character,
+	 * display it as such. */
 	} else {
-	    int i;
+	    char *nctrl_buf_mb = charalloc(mb_cur_max());
+	    int nctrl_buf_mb_len, i;

-#ifdef ENABLE_UTF8
-	    /* If buf contains an invalid multibyte non-control
-	     * character, display it as such. */
-	    if (ISSET(USE_UTF8) && bad_char) {
-		for (i = 0; i < bad_mbchar_len; i++)
-		    converted[index++] = bad_mbchar[i];
+	    nctrl_buf_mb = mbrep(buf_mb, nctrl_buf_mb,
+		&nctrl_buf_mb_len);

-		start_col += mbwidth(bad_mbchar);
-	    } else {
-#endif
-		for (i = 0; i < buf_mb_len; i++)
-		    converted[index++] = buf[start_index + i];
+	    for (i = 0; i < nctrl_buf_mb_len; i++)
+		converted[index++] = nctrl_buf_mb[i];

-		start_col += mbwidth(buf_mb);
-#ifdef ENABLE_UTF8
-	    }
-#endif
+	    start_col += mbwidth(nctrl_buf_mb);
+
+	    free(nctrl_buf_mb);
 	}

 	start_index += buf_mb_len;