1
1

Merge branch 'm-utf-8' of ssh://www.midnight-commander.org:2222/git/mc into m-utf-8

* 'm-utf-8' of ssh://www.midnight-commander.org:2222/git/mc:
  fix by andrew_b uninitialised tool->compose.
  fix incorrect draw utf-8 text in 8-bit locale
  fix some warnings
  add convert_from_utf_to_current_c, fix little warnings
  add utf8_display global variable, if display bits select utf-8 utf8_display = 1
  add more convertion before draw text
  change charbuf to char from unsigned char
  add: convert input in utf locale to needle
  fix incorrect convertion from utf to display CP
  fix: width utf buffer
  add in core viewer codepage selection from list
  fix: editor status bar
  fix: edit_get_prev_utf segfault, add compare str with start of buf

Conflicts:
	edit/edit.h
Этот коммит содержится в:
Slava Zanko 2009-04-17 11:52:20 +03:00
родитель 1e5a201abd 92dac8d7e3
Коммит 0730efa2fc
16 изменённых файлов: 171 добавлений и 30 удалений

Просмотреть файл

@ -46,7 +46,7 @@ struct WEdit {
unsigned char *buffers2[MAXBUFF + 1]; /* all data from end of file down to curs2 */
/* UTF8 */
unsigned char charbuf[MB_LEN_MAX];
char charbuf[4 + 1];
int charpoint;
/* search variables */
long search_start; /* First character to start searching from */

Просмотреть файл

@ -168,6 +168,7 @@ int edit_get_utf (WEdit * edit, long byte_index, int *char_width)
return '\n';
}
str = edit_get_byte_ptr (edit, byte_index);
res = g_utf8_get_char_validated (str, -1);
@ -200,14 +201,12 @@ int edit_get_prev_utf (WEdit * edit, long byte_index, int *char_width)
gunichar ch;
gchar *next_ch = NULL;
int width = 0;
gchar *prn_buf=NULL;
if ( byte_index > 0 ) {
byte_index--;
}
ch = edit_get_utf (edit, byte_index, &width);
if ( width == 1 ) {
*char_width = width;
return ch;
@ -277,7 +276,9 @@ edit_load_file_fast (WEdit *edit, const char *filename)
edit->curs2 = edit->last_byte;
buf2 = edit->curs2 >> S_EDIT_BUF_SIZE;
edit->utf8 = str_isutf8 (get_codepage_id( source_codepage ));
edit->utf8 = 0;
if ( get_codepage_id( source_codepage ) )
edit->utf8 = str_isutf8 (get_codepage_id( source_codepage ));
if ((file = mc_open (filename, O_RDONLY | O_BINARY)) == -1) {
GString *errmsg = g_string_new(NULL);

Просмотреть файл

@ -130,7 +130,10 @@ int edit_raw_key_query (const char *heading, const char *query, int cancel);
int edit_file (const char *_file, int line);
int edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch);
int edit_get_byte (WEdit * edit, long byte_index);
char *edit_get_byte_ptr (WEdit *, long);
char *edit_get_byte_ptr (WEdit * edit, long byte_index);
char *edit_get_buf_ptr (WEdit * edit, long byte_index);
int edit_get_utf (WEdit * edit, long byte_index, int *char_width);
int edit_get_prev_utf (WEdit * edit, long byte_index, int *char_width);
int edit_count_lines (WEdit * edit, long current, int upto);
long edit_move_forward (WEdit * edit, long current, int lines, long upto);
long edit_move_forward3 (WEdit * edit, long current, int cols, long upto);

Просмотреть файл

@ -3019,7 +3019,8 @@ edit_select_codepage_cmd (WEdit *edit)
{
#ifdef HAVE_CHARSET
do_select_codepage ();
edit->utf8 = str_isutf8 (get_codepage_id (source_codepage));
if ( get_codepage_id (source_codepage) )
edit->utf8 = str_isutf8 (get_codepage_id (source_codepage));
edit->force = REDRAW_COMPLETELY;
edit_refresh_cmd (edit);
#endif

Просмотреть файл

@ -330,7 +330,7 @@ edit_draw_this_line (WEdit *edit, long b, long row, long start_col,
eval_marks (edit, &m1, &m2);
if (row <= edit->total_lines - edit->start_line) {
long tws;
long tws = 0;
if (use_colors && visible_tws) {
tws = edit_eol (edit, b);
while (tws > b && ((c = edit_get_byte (edit, tws - 1)) == ' '
@ -432,10 +432,16 @@ edit_draw_this_line (WEdit *edit, long b, long row, long start_col,
}
/* fallthrough */
default:
if (!edit->utf8) {
c = convert_to_display_c (c);
if ( utf8_display ) {
if ( !edit->utf8 ) {
}
} else {
//FIXME: if need
if ( edit->utf8 ) {
c = convert_from_utf_to_current_c (c);
//c = convert_to_utf (c);
} else {
c = convert_to_display_c (c);
}
}
/* Caret notation for control characters */
if (c < 32) {

Просмотреть файл

@ -44,6 +44,8 @@
#include "../src/tty.h" /* keys */
#include "../src/charsets.h" /* convert_from_input_c() */
#include "../src/selcodepage.h" /* do_select_codepage() */
#include "../src/main.h" /* display_codepage */
#include "../src/strutil.h" /* str_isutf8 () */
/*
* Ordinary translations. Note that the keys listed first take priority
@ -191,6 +193,8 @@ edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch)
int char_for_insertion = -1;
int i = 0;
int extmod = 0;
int c;
const edit_key_map_type *key_map = NULL;
switch (edit_key_emulation) {
case EDIT_KEY_EMULATION_NORMAL:
@ -243,23 +247,55 @@ edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch)
/* an ordinary insertable character */
if (x_key < 256 && !extmod) {
if ( edit->charpoint >= 4 ) {
edit->charpoint = 0;
edit->charbuf[edit->charpoint] = '\0';
}
if ( edit->charpoint < 4 ) {
edit->charbuf[edit->charpoint++] = x_key;
edit->charbuf[edit->charpoint] = '\0';
}
if (!edit->utf8) {
int c = convert_from_input_c (x_key);
if (is_printable (c)) {
char_for_insertion = c;
goto fin;
}
} else {
if (edit->charpoint >= MB_LEN_MAX) {
goto fin;
edit->charpoint = 0;
/* input from 8-bit locale */
if ( utf8_display ) {
c = convert_from_input_c (x_key);
if (is_printable (c)) {
char_for_insertion = c;
goto fin;
}
} else {
edit->charbuf[edit->charpoint + 1] = '\0';
int res = str_is_valid_char (edit->charbuf, edit->charpoint);
if (res < 0) {
if (res != -2) {
edit->charpoint = 0; /* broken multibyte char, skip */
goto fin;
}
/* not finised multibyte input (in meddle multibyte utf-8 char) */
goto fin;
} else {
if ( g_unichar_isprint (g_utf8_get_char(edit->charbuf)) ) {
c = convert_from_utf_to_current ( edit->charbuf );
edit->charbuf[0] = '\0';
edit->charpoint = 0;
if (is_printable (c)) {
char_for_insertion = c;
goto fin;
}
}
/* unprinteble utf input, skip it */
edit->charbuf[0] = '\0';
edit->charpoint = 0;
goto fin;
}
}
edit->charbuf[edit->charpoint] = x_key;
edit->charpoint++;
} else {
int res = str_is_valid_char (edit->charbuf, edit->charpoint);
mc_log("res:%i, edit->charpoint : %i\n",res, edit->charpoint);
if (res < 0) {
if (res != -2) {
edit->charpoint = 0; /* broken multibyte char, skip */
@ -271,6 +307,7 @@ edit_translate_key (WEdit *edit, long x_key, int *cmd, int *ch)
edit->charbuf[edit->charpoint]='\0';
edit->charpoint = 0;
if ( g_unichar_isprint (g_utf8_get_char(edit->charbuf))) {
mc_log("input:%s \n", edit->charbuf);
char_for_insertion = x_key;
goto fin;
}

Просмотреть файл

@ -569,7 +569,8 @@ sel_charset_button (int action)
cpname = (new_display_codepage < 0)
? _("Other 8 bit")
: codepages[new_display_codepage].name;
if ( cpname )
utf8_display = str_isutf8 (cpname);
/* avoid strange bug with label repainting */
g_snprintf (buf, sizeof (buf), "%-27s", cpname);
label_set_text (cplabel, buf);

Просмотреть файл

@ -29,6 +29,8 @@
#include "global.h"
#include "charsets.h"
#include "strutil.h" /* utf-8 functions */
#include "main.h"
int n_codepages = 0;
@ -249,4 +251,69 @@ convert_from_input (char *str)
str++;
}
}
unsigned char
convert_from_utf_to_current (const char *str)
{
if (!str)
return '.';
unsigned char ch;
char *cp_to = NULL;
GIConv conv;
GString *translated_data;
translated_data = g_string_new ("");
cp_to = g_strdup ( get_codepage_id ( display_codepage ) );
conv = str_crt_conv_to (cp_to);
if (conv != INVALID_CONV) {
if (str_convert (conv, str, translated_data) != ESTR_FAILURE) {
ch = translated_data->str[0];
} else {
ch = '.';
}
str_close_conv (conv);
}
g_free (cp_to);
g_string_free (translated_data, TRUE);
return ch;
}
unsigned char
convert_from_utf_to_current_c (const int input_char)
{
unsigned char str[6 + 1];
unsigned char ch = '.';
char *cp_to = NULL;
GIConv conv;
GString *translated_data;
int res = 0;
res = g_unichar_to_utf8 (input_char, str);
if ( res == 0 ) {
return ch;
}
str[6] = '\0';
translated_data = g_string_new ("");
cp_to = g_strdup ( get_codepage_id ( display_codepage ) );
conv = str_crt_conv_to (cp_to);
if (conv != INVALID_CONV) {
if (str_convert (conv, str, translated_data) != ESTR_FAILURE) {
ch = translated_data->str[0];
} else {
ch = '.';
}
str_close_conv (conv);
}
g_free (cp_to);
g_string_free (translated_data, TRUE);
return ch;
}
#endif /* HAVE_CHARSET */

Просмотреть файл

@ -6,7 +6,6 @@
#define UNKNCHAR '\001'
#define CHARSETS_INDEX "mc.charsets"
extern int n_codepages;
extern unsigned char conv_displ[256];
@ -27,7 +26,8 @@ const char *init_translation_table (int cpsource, int cpdisplay);
void convert_to_display (char *str);
void convert_from_input (char *str);
void convert_string (unsigned char *str);
unsigned char convert_from_utf_to_current (const char *str);
unsigned char convert_from_utf_to_current_c (const int input_char);
/* Convert single characters */
static inline int
convert_to_display_c (int c)

Просмотреть файл

@ -63,6 +63,9 @@
#include "execute.h" /* toggle_panels() */
#include "history.h"
#include "strutil.h"
#include "selcodepage.h" /* do_select_codepage () */
#include "charsets.h" /* get_codepage_id () */
#ifndef MAP_FILE
# define MAP_FILE 0

Просмотреть файл

@ -152,8 +152,16 @@ int eight_bit_clean = 1;
* This is reported to break on many terminals (xterm, qansi-m).
*/
int full_eight_bits = 0;
#endif /* !HAVE_CHARSET */
/*
* If utf-8 terminal utf8_display = 1
* Display bits set UTF-8
*
*/
int utf8_display = 0;
/* If true use the internal viewer */
int use_internal_view = 1;

Просмотреть файл

@ -50,6 +50,7 @@ struct edit_stack_type edit_history_moveto[MAX_HISTORY_MOVETO];
#ifdef HAVE_CHARSET
extern int source_codepage;
extern int display_codepage;
extern int utf8_display;
#else
extern int eight_bit_clean;
extern int full_eight_bits;

Просмотреть файл

@ -59,6 +59,8 @@
# include "../edit/edit.h"
#endif
#include "../src/strutil.h" /* str_isutf8 () */
extern char *find_ignore_dirs;
@ -589,6 +591,8 @@ load_setup (void)
source_codepage = get_codepage_index( cpname );
}
init_translation_table( source_codepage, display_codepage );
if ( get_codepage_id( display_codepage ) )
utf8_display = str_isutf8 (get_codepage_id( display_codepage ));
#endif /* HAVE_CHARSET */
}

Просмотреть файл

@ -304,6 +304,8 @@ str_test_encoding_class (const char *encoding, const char **table)
{
int t;
int result = 0;
if ( encoding == NULL )
return result;
for (t = 0; table[t] != NULL; t++)
{
@ -331,7 +333,7 @@ str_choose_str_functions ()
}
int
str_isutf8 (char *codeset_name)
str_isutf8 (const char *codeset_name)
{
int result = 0;
if (str_test_encoding_class (codeset_name, str_utf8_encodings))

Просмотреть файл

@ -486,10 +486,9 @@ int str_key_collate (const char *t1, const char *t2, int case_sen);
*/
void str_release_key (char *key, int case_sen);
#endif
/* return 1 if codeset_name is utf8 or utf-8
* I
*/
int str_isutf8 (char *codeset_name);
#endif
int str_isutf8 (const char *codeset_name);

Просмотреть файл

@ -454,6 +454,8 @@ utf8_tool_copy_chars_to_end (struct utf8_tool *tool)
size_t left;
gunichar uni;
tool->compose = 0;
while (tool->cheked[0] != '\0')
{
uni = g_utf8_get_char (tool->cheked);
@ -478,6 +480,8 @@ utf8_tool_copy_chars_to (struct utf8_tool *tool, int to_ident)
gunichar uni;
int w;
tool->compose = 0;
while (tool->cheked[0] != '\0')
{
uni = g_utf8_get_char (tool->cheked);
@ -581,6 +585,7 @@ str_utf8_fit_to_term (const char *text, int width, int just_mode)
tool.cheked = pre_form->text;
tool.actual = result;
tool.remain = sizeof (result);
tool.compose = 0;
if (pre_form->width <= width)
{
@ -653,6 +658,7 @@ str_utf8_term_trim (const char *text, int width)
tool.cheked = pre_form->text;
tool.actual = result;
tool.remain = sizeof (result);
tool.compose = 0;
if (width < pre_form->width)
{
@ -753,6 +759,7 @@ str_utf8_term_substring (const char *text, int start, int width)
tool.cheked = pre_form->text;
tool.actual = result;
tool.remain = sizeof (result);
tool.compose = 0;
tool.ident = -start;
utf8_tool_skip_chars_to (&tool, 0);
@ -781,6 +788,7 @@ str_utf8_trunc (const char *text, int width)
tool.cheked = pre_form->text;
tool.actual = result;
tool.remain = sizeof (result);
tool.compose = 0;
if (pre_form->width > width)
{