Merge branch 'm-utf-8' of ssh://www.midnight-commander.org:2222/git/mc into m-utf-8

* 'm-utf-8' of ssh://www.midnight-commander.org:2222/git/mc: fix by andrew_b uninitialised tool->compose. fix incorrect draw utf-8 text in 8-bit locale fix some warnings add convert_from_utf_to_current_c, fix little warnings add utf8_display global variable, if display bits select utf-8 utf8_display = 1 add more convertion before draw text change charbuf to char from unsigned char add: convert input in utf locale to needle fix incorrect convertion from utf to display CP fix: width utf buffer add in core viewer codepage selection from list fix: editor status bar fix: edit_get_prev_utf segfault, add compare str with start of buf Conflicts: edit/edit.h
2009-04-17 11:52:20 +03:00 · 2009-04-17 11:52:20 +03:00 · 0730efa2fc
--- a/edit/edit-widget.h
+++ b/edit/edit-widget.h
@ -46,7 +46,7 @@ struct WEdit {
    unsigned char *buffers2[MAXBUFF + 1];	/* all data from end of file down to curs2 */

    /* UTF8 */
-    unsigned char charbuf[MB_LEN_MAX];
+    char charbuf[4 + 1];
    int charpoint;
    /* search variables */
    long search_start;		/* First character to start searching from */
--- a/edit/edit.c
+++ b/edit/edit.c
@ -168,6 +168,7 @@ int edit_get_utf (WEdit * edit, long byte_index, int *char_width)
        return '\n';
    }

+
    str = edit_get_byte_ptr (edit, byte_index);
    res = g_utf8_get_char_validated (str, -1);

@ -200,14 +201,12 @@ int edit_get_prev_utf (WEdit * edit, long byte_index, int *char_width)
    gunichar ch;
    gchar *next_ch = NULL;
    int width = 0;
-    gchar *prn_buf=NULL;

    if ( byte_index > 0 ) {
        byte_index--;
    }

    ch = edit_get_utf (edit, byte_index, &width);
-
    if ( width == 1 ) {
        *char_width = width;
        return ch;
@ -277,7 +276,9 @@ edit_load_file_fast (WEdit *edit, const char *filename)

    edit->curs2 = edit->last_byte;
    buf2 = edit->curs2 >> S_EDIT_BUF_SIZE;
-    edit->utf8 = str_isutf8 (get_codepage_id( source_codepage ));
+    edit->utf8 = 0;
+    if ( get_codepage_id( source_codepage ) )
+        edit->utf8 = str_isutf8 (get_codepage_id( source_codepage ));

    if ((file = mc_open (filename, O_RDONLY | O_BINARY)) == -1) {
 	GString *errmsg = g_string_new(NULL);
--- a/edit/edit.h
+++ b/edit/edit.h
@ -130,7 +130,10 @@ int edit_raw_key_query (const char *heading, const char *query, int cancel);
 int edit_file (const char *_file, int line);
 int edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch);
 int edit_get_byte (WEdit * edit, long byte_index);
-char *edit_get_byte_ptr (WEdit *, long);
+char *edit_get_byte_ptr (WEdit * edit, long byte_index);
+char *edit_get_buf_ptr (WEdit * edit, long byte_index);
+int edit_get_utf (WEdit * edit, long byte_index, int *char_width);
+int edit_get_prev_utf (WEdit * edit, long byte_index, int *char_width);
 int edit_count_lines (WEdit * edit, long current, int upto);
 long edit_move_forward (WEdit * edit, long current, int lines, long upto);
 long edit_move_forward3 (WEdit * edit, long current, int cols, long upto);
--- a/edit/editcmd.c
+++ b/edit/editcmd.c
@ -3019,7 +3019,8 @@ edit_select_codepage_cmd (WEdit *edit)
 {
 #ifdef HAVE_CHARSET
    do_select_codepage ();
-    edit->utf8 = str_isutf8 (get_codepage_id (source_codepage));
+    if ( get_codepage_id (source_codepage) )
+        edit->utf8 = str_isutf8 (get_codepage_id (source_codepage));
    edit->force = REDRAW_COMPLETELY;
    edit_refresh_cmd (edit);
 #endif
--- a/edit/editdraw.c
+++ b/edit/editdraw.c
@ -330,7 +330,7 @@ edit_draw_this_line (WEdit *edit, long b, long row, long start_col,
 	eval_marks (edit, &m1, &m2);

 	if (row <= edit->total_lines - edit->start_line) {
-		long tws;
+		long tws = 0;
 	    if (use_colors && visible_tws) {
 		tws = edit_eol (edit, b);
 		while (tws > b && ((c = edit_get_byte (edit, tws - 1)) == ' '
@ -432,10 +432,16 @@ edit_draw_this_line (WEdit *edit, long b, long row, long start_col,
 		    }
 		    /* fallthrough */
 		default:
-		    if (!edit->utf8) {
-		        c = convert_to_display_c (c);
+		    if ( utf8_display ) {
+		        if ( !edit->utf8 ) {
+		        }
 		    } else {
-		        //FIXME: if need
+		        if ( edit->utf8 ) {
+		            c = convert_from_utf_to_current_c (c);
+		            //c = convert_to_utf (c);
+		        } else {
+		            c = convert_to_display_c (c);
+		        }
 		    }
 		    /* Caret notation for control characters */
 		    if (c < 32) {
--- a/edit/editkeys.c
+++ b/edit/editkeys.c
@ -44,6 +44,8 @@
 #include "../src/tty.h"		/* keys */
 #include "../src/charsets.h"	/* convert_from_input_c() */
 #include "../src/selcodepage.h"	/* do_select_codepage() */
+#include "../src/main.h"	/* display_codepage */
+#include "../src/strutil.h"	/* str_isutf8 () */

 /*
 * Ordinary translations.  Note that the keys listed first take priority
@ -191,6 +193,8 @@ edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch)
    int char_for_insertion = -1;
    int i = 0;
    int extmod = 0;
+    int c;
+
    const edit_key_map_type *key_map = NULL;
    switch (edit_key_emulation) {
    case EDIT_KEY_EMULATION_NORMAL:
@ -243,23 +247,55 @@ edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch)

    /* an ordinary insertable character */
    if (x_key < 256 && !extmod) {
+
+        if ( edit->charpoint >= 4 ) {
+            edit->charpoint = 0;
+            edit->charbuf[edit->charpoint] = '\0';
+        }
+        if ( edit->charpoint < 4 ) {
+            edit->charbuf[edit->charpoint++] = x_key;
+            edit->charbuf[edit->charpoint] = '\0';
+        }
+
        if (!edit->utf8) {
-            int c = convert_from_input_c (x_key);
-            if (is_printable (c)) {
-                char_for_insertion = c;
-                goto fin;
-            }
-        } else {
-            if (edit->charpoint >= MB_LEN_MAX) {
-                goto fin;
-                edit->charpoint = 0;
+            /* input from 8-bit locale */
+            if ( utf8_display ) {
+                c = convert_from_input_c (x_key);
+                if (is_printable (c)) {
+                    char_for_insertion = c;
+                    goto fin;
+                }
+            } else {
+                edit->charbuf[edit->charpoint + 1] = '\0';
+                int res = str_is_valid_char (edit->charbuf, edit->charpoint);
+                if (res < 0) {
+                    if (res != -2) {
+                        edit->charpoint = 0; /* broken multibyte char, skip */
+                        goto fin;
+                    }
+                    /* not finised multibyte input (in meddle multibyte utf-8 char) */
+                    goto fin;
+                } else {
+                    if ( g_unichar_isprint (g_utf8_get_char(edit->charbuf)) ) {
+                        c = convert_from_utf_to_current ( edit->charbuf );
+                        edit->charbuf[0] = '\0';
+                        edit->charpoint = 0;
+                        if (is_printable (c)) {
+                            char_for_insertion = c;
+                            goto fin;
+                        }
+                    }
+                    /* unprinteble utf input, skip it */
+                    edit->charbuf[0] = '\0';
+                    edit->charpoint = 0;
+                    goto fin;
+                }
            }

-            edit->charbuf[edit->charpoint] = x_key;
-            edit->charpoint++;
+        } else {

            int res = str_is_valid_char (edit->charbuf, edit->charpoint);
-            mc_log("res:%i, edit->charpoint : %i\n",res, edit->charpoint);
+
            if (res < 0) {
                if (res != -2) {
                    edit->charpoint = 0; /* broken multibyte char, skip */
@ -271,6 +307,7 @@ edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch)
                edit->charbuf[edit->charpoint]='\0';
                edit->charpoint = 0;
                if ( g_unichar_isprint (g_utf8_get_char(edit->charbuf))) {
+                    mc_log("input:%s \n", edit->charbuf);
                    char_for_insertion = x_key;
                    goto fin;
                }
--- a/src/boxes.c
+++ b/src/boxes.c
@ -569,7 +569,8 @@ sel_charset_button (int action)
    cpname = (new_display_codepage < 0)
 	? _("Other 8 bit")
 	: codepages[new_display_codepage].name;
-
+    if ( cpname )
+        utf8_display = str_isutf8 (cpname);
    /* avoid strange bug with label repainting */
    g_snprintf (buf, sizeof (buf), "%-27s", cpname);
    label_set_text (cplabel, buf);
--- a/src/charsets.c
+++ b/src/charsets.c
@ -29,6 +29,8 @@

 #include "global.h"
 #include "charsets.h"
+#include "strutil.h"		/* utf-8 functions */
+#include "main.h"

 int n_codepages = 0;

@ -249,4 +251,69 @@ convert_from_input (char *str)
 	str++;
    }
 }
+
+unsigned char
+convert_from_utf_to_current (const char *str)
+{
+    if (!str)
+        return '.';
+
+    unsigned char ch;
+    char *cp_to = NULL;
+    GIConv conv;
+    GString *translated_data;
+
+    translated_data = g_string_new ("");
+    cp_to = g_strdup ( get_codepage_id ( display_codepage ) );
+    conv = str_crt_conv_to (cp_to);
+
+    if (conv != INVALID_CONV) {
+        if (str_convert (conv, str, translated_data) != ESTR_FAILURE) {
+            ch = translated_data->str[0];
+        } else {
+            ch = '.';
+        }
+        str_close_conv (conv);
+    }
+    g_free (cp_to);
+    g_string_free (translated_data, TRUE);
+    return ch;
+
+}
+
+unsigned char
+convert_from_utf_to_current_c (const int input_char)
+{
+    unsigned char str[6 + 1];
+    unsigned char ch = '.';
+
+    char *cp_to = NULL;
+    GIConv conv;
+    GString *translated_data;
+    int res = 0;
+
+    res = g_unichar_to_utf8 (input_char, str);
+    if ( res == 0 ) {
+        return ch;
+    }
+    str[6] = '\0';
+
+    translated_data = g_string_new ("");
+    cp_to = g_strdup ( get_codepage_id ( display_codepage ) );
+    conv = str_crt_conv_to (cp_to);
+
+    if (conv != INVALID_CONV) {
+        if (str_convert (conv, str, translated_data) != ESTR_FAILURE) {
+            ch = translated_data->str[0];
+        } else {
+            ch = '.';
+        }
+        str_close_conv (conv);
+    }
+    g_free (cp_to);
+    g_string_free (translated_data, TRUE);
+    return ch;
+
+}
+
 #endif				/* HAVE_CHARSET */
--- a/src/charsets.h
+++ b/src/charsets.h
@ -6,7 +6,6 @@
 #define UNKNCHAR '\001'

 #define CHARSETS_INDEX "mc.charsets"
-
 extern int n_codepages;

 extern unsigned char conv_displ[256];
@ -27,7 +26,8 @@ const char *init_translation_table (int cpsource, int cpdisplay);
 void convert_to_display (char *str);
 void convert_from_input (char *str);
 void convert_string (unsigned char *str);
-
+unsigned char convert_from_utf_to_current (const char *str);
+unsigned char convert_from_utf_to_current_c (const int input_char);
 /* Convert single characters */
 static inline int
 convert_to_display_c (int c)
--- a/src/cmd.c
+++ b/src/cmd.c
@ -63,6 +63,9 @@
 #include "execute.h"		/* toggle_panels() */
 #include "history.h"
 #include "strutil.h"
+#include "selcodepage.h"	/* do_select_codepage () */
+#include "charsets.h"		/* get_codepage_id () */
+

 #ifndef MAP_FILE
 #   define MAP_FILE 0
--- a/src/main.c
+++ b/src/main.c
@ -152,8 +152,16 @@ int eight_bit_clean = 1;
 * This is reported to break on many terminals (xterm, qansi-m).
 */
 int full_eight_bits = 0;
+
 #endif				/* !HAVE_CHARSET */

+/*
+ * If utf-8 terminal utf8_display = 1
+ * Display bits set UTF-8
+ *
+*/
+int utf8_display = 0;
+
 /* If true use the internal viewer */
 int use_internal_view = 1;

--- a/src/main.h
+++ b/src/main.h
@ -50,6 +50,7 @@ struct edit_stack_type edit_history_moveto[MAX_HISTORY_MOVETO];
 #ifdef HAVE_CHARSET
 extern int source_codepage;
 extern int display_codepage;
+extern int utf8_display;
 #else
 extern int eight_bit_clean;
 extern int full_eight_bits;
--- a/src/setup.c
+++ b/src/setup.c
@ -59,6 +59,8 @@
 #   include "../edit/edit.h"
 #endif

+#include "../src/strutil.h"	/* str_isutf8 () */
+

 extern char *find_ignore_dirs;

@ -589,6 +591,8 @@ load_setup (void)
 	    source_codepage = get_codepage_index( cpname );
    }
    init_translation_table( source_codepage, display_codepage );
+    if ( get_codepage_id( display_codepage ) )
+        utf8_display = str_isutf8 (get_codepage_id( display_codepage ));
 #endif /* HAVE_CHARSET */
 }

--- a/src/strutil.c
+++ b/src/strutil.c
@ -304,6 +304,8 @@ str_test_encoding_class (const char *encoding, const char **table)
 {
    int t;
    int result = 0;
+    if ( encoding == NULL )
+        return result;

    for (t = 0; table[t] != NULL; t++)
    {
@ -331,7 +333,7 @@ str_choose_str_functions ()
 }

 int
-str_isutf8 (char *codeset_name)
+str_isutf8 (const char *codeset_name)
 {
    int result = 0;
    if (str_test_encoding_class (codeset_name, str_utf8_encodings))
--- a/src/strutil.h
+++ b/src/strutil.h
@ -486,10 +486,9 @@ int str_key_collate (const char *t1, const char *t2, int case_sen);
 */
 void str_release_key (char *key, int case_sen);

+#endif
+
 /* return 1 if codeset_name is utf8 or utf-8 
 * I
 */
-int str_isutf8 (char *codeset_name);
-
-#endif
-
+int str_isutf8 (const char *codeset_name);
--- a/src/strutilutf8.c
+++ b/src/strutilutf8.c
@ -454,6 +454,8 @@ utf8_tool_copy_chars_to_end (struct utf8_tool *tool)
    size_t left;
    gunichar uni;

+    tool->compose = 0;
+
    while (tool->cheked[0] != '\0')
    {
 	uni = g_utf8_get_char (tool->cheked);
@ -478,6 +480,8 @@ utf8_tool_copy_chars_to (struct utf8_tool *tool, int to_ident)
    gunichar uni;
    int w;

+    tool->compose = 0;
+
    while (tool->cheked[0] != '\0')
    {
 	uni = g_utf8_get_char (tool->cheked);
@ -581,6 +585,7 @@ str_utf8_fit_to_term (const char *text, int width, int just_mode)
    tool.cheked = pre_form->text;
    tool.actual = result;
    tool.remain = sizeof (result);
+    tool.compose = 0;

    if (pre_form->width <= width)
    {
@ -653,6 +658,7 @@ str_utf8_term_trim (const char *text, int width)
    tool.cheked = pre_form->text;
    tool.actual = result;
    tool.remain = sizeof (result);
+    tool.compose = 0;

    if (width < pre_form->width)
    {
@ -753,6 +759,7 @@ str_utf8_term_substring (const char *text, int start, int width)
    tool.cheked = pre_form->text;
    tool.actual = result;
    tool.remain = sizeof (result);
+    tool.compose = 0;

    tool.ident = -start;
    utf8_tool_skip_chars_to (&tool, 0);
@ -781,6 +788,7 @@ str_utf8_trunc (const char *text, int width)
    tool.cheked = pre_form->text;
    tool.actual = result;
    tool.remain = sizeof (result);
+    tool.compose = 0;

    if (pre_form->width > width)
    {