diff --git a/src/Makefile.am b/src/Makefile.am index a4713a625..f1391787e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -63,7 +63,8 @@ SRCS = achown.c achown.h background.c background.h boxes.c boxes.h \ tree.c tree.h treestore.c treestore.h timefmt.h tty.c tty.h user.c \ user.h util.c util.h utilunix.c view.c view.h vfsdummy.h widget.c \ widget.h win.c win.h wtools.c wtools.h unixcompat.h \ - x11conn.h x11conn.c ecs.h ecs.c + x11conn.h x11conn.c ecs.h ecs.c \ + strutil.h strutil.c strutilascii.c strutil8bit.c strutilutf8.c if CHARSET mc_SOURCES = $(SRCS) $(CHARSET_SRC) diff --git a/src/main.c b/src/main.c index cb83baedf..c16d2b3be 100644 --- a/src/main.c +++ b/src/main.c @@ -61,6 +61,7 @@ #include "listmode.h" #include "execute.h" #include "ext.h" /* For flush_extension_file() */ +#include "strutil.h" /* Listbox for the command history feature */ #include "widget.h" @@ -2134,6 +2135,8 @@ main (int argc, char *argv[]) home_dir = mc_home; } + str_init_strings (NULL); + vfs_init (); #ifdef HAVE_SLANG @@ -2253,6 +2256,8 @@ main (int argc, char *argv[]) #ifdef HAVE_CHARSET free_codepages_list (); #endif + str_uninit_strings (); + g_free (this_dir); g_free (other_dir); diff --git a/src/strutil.c b/src/strutil.c new file mode 100644 index 000000000..d1527e4ee --- /dev/null +++ b/src/strutil.c @@ -0,0 +1,802 @@ +/* common strings utilities + Copyright (C) 2007 Free Software Foundation, Inc. + + Written 2007 by: + Rostislav Benes + + The file_date routine is mostly from GNU's fileutils package, + written by Richard Stallman and David MacKenzie. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "global.h" +#include "strutil.h" + +//names, that are used for utf-8 +static const char *str_utf8_encodings[] = { + "utf-8", + "utf8", + NULL}; + +// standard 8bit encodings, no wide or multibytes characters +static const char *str_8bit_encodings[] = { + "iso-8859", + "iso8859", + NULL +}; + +// terminal encoding +static char *codeset; +// function for encoding specific operations +static struct str_class used_class; +// linked list of string buffers +static struct str_buffer *buffer_list = NULL; + +iconv_t str_cnv_to_term; +iconv_t str_cnv_from_term; +iconv_t str_cnv_not_convert; + +// if enc is same encoding like on terminal +static int +str_test_not_convert (const char *enc) +{ + return g_ascii_strcasecmp (enc, codeset) == 0; +} + +str_conv_t +str_crt_conv_to (const char *to_enc) +{ + return (!str_test_not_convert (to_enc)) ? iconv_open (to_enc, codeset) : + str_cnv_not_convert; +} + +str_conv_t +str_crt_conv_from (const char *from_enc) +{ + return (!str_test_not_convert (from_enc)) ? iconv_open (codeset, from_enc) : + str_cnv_not_convert; +} + +void +str_close_conv (str_conv_t conv) +{ + if (conv != str_cnv_not_convert) + iconv_close (conv); +} + +struct str_buffer * +str_get_buffer () +{ + struct str_buffer *result; + + result = buffer_list; + + while (result != NULL) { + if (!result->used) { + str_reset_buffer (result); + result->used = 1; + return result; + } + result = result->next; + } + + result = g_new (struct str_buffer, 1); + result->size = BUF_TINY; + result->data = g_new0 (char, result->size); + result->data[0] = '\0'; + result->actual = result->data; + result->remain = result->size; + + result->next = buffer_list; + buffer_list = result; + + result->used = 1; + + return result; +} + +void +str_release_buffer (struct str_buffer *buffer) +{ + buffer->used = 0; +} + +void +str_incrase_buffer (struct str_buffer *buffer) +{ + size_t offset; + + offset = buffer->actual - buffer->data; + buffer->remain+= buffer->size; + buffer->size*= 2; + buffer->data = g_renew (char, buffer->data, buffer->size); + buffer->actual = buffer->data + offset; +} + +void +str_reset_buffer (struct str_buffer *buffer) +{ + buffer->data[0] = '\0'; + buffer->actual = buffer->data; + buffer->remain = buffer->size; +} + +static int +_str_convert (str_conv_t coder, char *string, struct str_buffer *buffer) +{ + int state; + size_t left; + size_t nconv; + + errno = 0; + + if (used_class.is_valid_string (string)) { + state = 0; + + left = strlen (string); + + if (coder == (iconv_t) (-1)) return ESTR_FAILURE; + + iconv(coder, NULL, NULL, NULL, NULL); + + while (((int)left) > 0) { + nconv = iconv(coder, &string, &left, + &(buffer->actual), &(buffer->remain)); + if (nconv == (size_t) (-1)) { + switch (errno) { + case EINVAL: + return ESTR_FAILURE; + case EILSEQ: + string++; + left--; + if (buffer->remain <= 0) { + str_incrase_buffer (buffer); + } + buffer->actual[0] = '?'; + buffer->actual++; + buffer->remain--; + state = ESTR_PROBLEM; + break; + case E2BIG: + str_incrase_buffer (buffer); + break; + } + } + }; + + return state; + } else return ESTR_FAILURE; +} + +int +str_convert (str_conv_t coder, char *string, struct str_buffer *buffer) +{ + int result; + + result = _str_convert (coder, string, buffer); + buffer->actual[0] = '\0'; + + return result; +} + +static int +_str_vfs_convert_from (str_conv_t coder, char *string, + struct str_buffer *buffer) +{ + size_t left; + size_t nconv; + + left = strlen (string); + + if (coder == (iconv_t) (-1)) return ESTR_FAILURE; + + iconv(coder, NULL, NULL, NULL, NULL); + + do { + nconv = iconv(coder, &string, &left, + &(buffer->actual), &(buffer->remain)); + if (nconv == (size_t) (-1)) { + switch (errno) { + case EINVAL: + return ESTR_FAILURE; + case EILSEQ: + return ESTR_FAILURE; + case E2BIG: + str_incrase_buffer (buffer); + break; + } + } + } while (left > 0); + + return 0; +} + +int +str_vfs_convert_from (str_conv_t coder, char *string, struct str_buffer *buffer) +{ + int result; + + if (coder == str_cnv_not_convert) { + str_insert_string (string, buffer); + result = 0; + } else result = _str_vfs_convert_from (coder, string, buffer); + buffer->actual[0] = '\0'; + + return result; +} + +int +str_vfs_convert_to (str_conv_t coder, const char *string, + int size, struct str_buffer *buffer) +{ + return used_class.vfs_convert_to (coder, string, size, buffer); +} + +void +str_insert_string (const char *string, struct str_buffer *buffer) +{ + size_t s; + + s = strlen (string); + while (buffer->remain < s) str_incrase_buffer (buffer); + + memcpy (buffer->actual, string, s); + buffer->actual+= s; + buffer->remain-= s; + buffer->actual[0] = '\0'; +} + +void +str_insert_string2 (const char *string, int size, struct str_buffer *buffer) +{ + size_t s; + + s = (size >= 0) ? size : strlen (string); + while (buffer->remain < s) str_incrase_buffer (buffer); + + memcpy (buffer->actual, string, s); + buffer->actual+= s; + buffer->remain-= s; + buffer->actual[0] = '\0'; +} + +void +str_printf (struct str_buffer *buffer, const char *format, ...) +{ + int size; + va_list ap; + + va_start (ap, format); + size = vsnprintf (buffer->actual, buffer->remain, format, ap); + while (buffer->remain <= size) { + str_incrase_buffer (buffer); + size = vsnprintf (buffer->actual, buffer->remain, format, ap); + } + buffer->actual+= size; + buffer->remain-= size; + va_end (ap); +} + +void +str_insert_char (char ch, struct str_buffer *buffer) +{ + if (buffer->remain <= 1) str_incrase_buffer (buffer); + + buffer->actual[0] = ch; + buffer->actual++; + buffer->remain--; + buffer->actual[0] = '\0'; +} + +void +str_insert_replace_char (struct str_buffer *buffer) +{ + used_class.insert_replace_char (buffer); +} + +void +str_backward_buffer (struct str_buffer *buffer, int count) +{ + char *prev; + + while ((count > 0) && (buffer->actual > buffer->data)) { + prev = str_get_prev_char (buffer->actual); + buffer->remain+= buffer->actual - prev; + buffer->actual = prev; + buffer->actual[0] = '\0'; + count--; + } +} + + +int +str_translate_char (str_conv_t conv, char *keys, size_t ch_size, + char *output, size_t out_size) +{ + size_t left; + size_t cnv; + + iconv (conv, NULL, NULL, NULL, NULL); + + left = (ch_size == (size_t)(-1)) ? strlen (keys) : ch_size; + + cnv = iconv (conv, &keys, &left, &output, &out_size); + if (cnv == (size_t)(-1)) { + if (errno == EINVAL) return ESTR_PROBLEM; else return ESTR_FAILURE; + } else { + output[0] = '\0'; + return 0; + } +} + + +static const char * +str_detect_termencoding () +{ + return (nl_langinfo(CODESET)); +} + +static int +str_test_encoding_class (const char *encoding, const char **table) +{ + int t; + int result = 0; + + for (t = 0; table[t] != NULL; t++) { + result+= (g_ascii_strncasecmp (encoding, table[t], + strlen (table[t])) == 0); + } + + return result; +} + +static void +str_choose_str_functions () +{ + if (str_test_encoding_class (codeset, str_utf8_encodings)) { + used_class = str_utf8_init (); + } else if (str_test_encoding_class (codeset, str_8bit_encodings)) { + used_class = str_8bit_init (); + } else { + used_class = str_ascii_init (); + } +} + +void +str_init_strings (const char *termenc) +{ + codeset = g_strdup ((termenc != NULL) + ? termenc + : str_detect_termencoding ()); + + str_cnv_not_convert = iconv_open (codeset, codeset); + if (str_cnv_not_convert == INVALID_CONV) { + if (termenc != NULL) { + g_free (codeset); + codeset = g_strdup (str_detect_termencoding ()); + str_cnv_not_convert = iconv_open (codeset, codeset); + } + + if (str_cnv_not_convert == INVALID_CONV) { + g_free (codeset); + codeset = g_strdup ("ascii"); + str_cnv_not_convert = iconv_open (codeset, codeset); + } + } + + str_cnv_to_term = str_cnv_not_convert; + str_cnv_from_term = str_cnv_not_convert; + + str_choose_str_functions (); +} + +static void +str_release_buffer_list () +{ + struct str_buffer *buffer; + struct str_buffer *next; + + buffer = buffer_list; + while (buffer != NULL) { + next = buffer->next; + g_free (buffer->data); + g_free (buffer); + buffer = next; + } +} + +void +str_uninit_strings () +{ + str_release_buffer_list (); + + iconv_close (str_cnv_not_convert); +} + +const char * +str_term_form (const char *text) +{ + return used_class.term_form (text); +} + +const char * +str_fit_to_term (const char *text, int width, int just_mode) +{ + return used_class.fit_to_term (text, width, just_mode); +} + +const char * +str_term_trim (const char *text, int width) +{ + return used_class.term_trim (text, width); +} + +void +str_msg_term_size (const char *text, int *lines, int *columns) +{ + return used_class.msg_term_size (text, lines, columns); +} + +const char * +str_term_substring (const char *text, int start, int width) +{ + return used_class.term_substring (text, start, width); +} + +char * +str_get_next_char (char *text) +{ + + used_class.cnext_char ((const char **)&text); + return text; +} + +const char * +str_cget_next_char (const char *text) +{ + used_class.cnext_char (&text); + return text; +} + +void +str_next_char (char **text) +{ + used_class.cnext_char ((const char **) text); +} + +void +str_cnext_char (const char **text) +{ + used_class.cnext_char (text); +} + +char * +str_get_prev_char (char *text) +{ + used_class.cprev_char ((const char **) &text); + return text; +} + +const char * +str_cget_prev_char (const char *text) +{ + used_class.cprev_char (&text); + return text; +} + +void +str_prev_char (char **text) +{ + used_class.cprev_char ((const char **) text); +} + +void +str_cprev_char (const char **text) +{ + used_class.cprev_char (text); +} + +char * +str_get_next_char_safe (char *text) +{ + used_class.cnext_char_safe ((const char **) &text); + return text; +} + +const char * +str_cget_next_char_safe (const char *text) +{ + used_class.cnext_char_safe (&text); + return text; +} + +void +str_next_char_safe (char **text) +{ + used_class.cnext_char_safe ((const char **) text); +} + +void +str_cnext_char_safe (const char **text) +{ + used_class.cnext_char_safe (text); +} + +char * +str_get_prev_char_safe (char *text) +{ + used_class.cprev_char_safe ((const char **) &text); + return text; +} + +const char * +str_cget_prev_char_safe (const char *text) +{ + used_class.cprev_char_safe (&text); + return text; +} + +void +str_prev_char_safe (char **text) +{ + used_class.cprev_char_safe ((const char **) text); +} + +void +str_cprev_char_safe (const char **text) +{ + used_class.cprev_char_safe (text); +} + +int +str_next_noncomb_char (char **text) +{ + return used_class.cnext_noncomb_char ((const char **) text); +} + +int +str_cnext_noncomb_char (const char **text) +{ + return used_class.cnext_noncomb_char (text); +} + +int +str_prev_noncomb_char (char **text, const char *begin) +{ + return used_class.cprev_noncomb_char ((const char **) text, begin); +} + +int +str_cprev_noncomb_char (const char **text, const char *begin) +{ + return used_class.cprev_noncomb_char (text, begin); +} + +int +str_is_valid_char (const char *ch, size_t size) +{ + return used_class.is_valid_char (ch, size); +} + +int +str_term_width1 (const char *text) +{ + return used_class.term_width1 (text); +} + +int +str_term_width2 (const char *text, size_t length) +{ + return used_class.term_width2 (text, length); +} + +int +str_term_char_width (const char *text) +{ + return used_class.term_char_width (text); +} + +int +str_offset_to_pos (const char* text, size_t length) +{ + return used_class.offset_to_pos (text, length); +} + +int +str_length (const char* text) +{ + return used_class.length (text); +} + +int +str_length2 (const char* text, int size) +{ + return used_class.length2 (text, size); +} + +int +str_length_noncomb (const char* text) +{ + return used_class.length_noncomb (text); +} + +int +str_column_to_pos (const char *text, size_t pos) +{ + return used_class.column_to_pos (text, pos); +} + +int +str_isspace (const char *ch) +{ + return used_class.isspace (ch); +} + +int +str_ispunct (const char *ch) +{ + return used_class.ispunct (ch); +} + +int +str_isalnum (const char *ch) +{ + return used_class.isalnum (ch); +} + +int +str_isdigit (const char *ch) +{ + return used_class.isdigit (ch); +} + +int +str_toupper (const char *ch, char **out, size_t *remain) +{ + return used_class.toupper (ch, out, remain); +} + +int +str_tolower (const char *ch, char **out, size_t *remain) +{ + return used_class.tolower (ch, out, remain); +} + +int +str_isprint (const char *ch) +{ + return used_class.isprint (ch); +} + +int +str_iscombiningmark (const char *ch) +{ + return used_class.iscombiningmark (ch); +} + +const char * +str_trunc (const char *text, int width) +{ + return used_class.trunc (text, width); +} + +char * +str_create_search_needle (const char *needle, int case_sen) +{ + return used_class.create_search_needle (needle, case_sen); +} + + +void +str_release_search_needle (char *needle, int case_sen) +{ + used_class.release_search_needle (needle, case_sen); +} + +const char * +str_search_first (const char *text, const char *search, int case_sen) +{ + return used_class.search_first (text, search, case_sen); +} + +const char * +str_search_last (const char *text, const char *search, int case_sen) +{ + return used_class.search_last (text, search, case_sen); +} + +int +str_is_valid_string (const char *text) +{ + return used_class.is_valid_string (text); +} + +int +str_compare (const char *t1, const char *t2) +{ + return used_class.compare (t1, t2); +} + +int +str_ncompare (const char *t1, const char *t2) +{ + return used_class.ncompare (t1, t2); +} + +int +str_casecmp (const char *t1, const char *t2) +{ + return used_class.casecmp (t1, t2); +} + +int +str_ncasecmp (const char *t1, const char *t2) +{ + return used_class.ncasecmp (t1, t2); +} + +int +str_prefix (const char *text, const char *prefix) +{ + return used_class.prefix (text, prefix); +} + +int +str_caseprefix (const char *text, const char *prefix) +{ + return used_class.caseprefix (text, prefix); +} + +void +str_fix_string (char *text) +{ + used_class.fix_string (text); +} + +char * +str_create_key (const char *text, int case_sen) +{ + return used_class.create_key (text, case_sen); +} + +char * +str_create_key_for_filename (const char *text, int case_sen) +{ + return used_class.create_key_for_filename (text, case_sen); +} + +int +str_key_collate (const char *t1, const char *t2, int case_sen) +{ + return used_class.key_collate (t1, t2, case_sen); +} + +void +str_release_key (char *key, int case_sen) +{ + used_class.release_key (key, case_sen); +} + diff --git a/src/strutil.h b/src/strutil.h new file mode 100644 index 000000000..8fabcd81c --- /dev/null +++ b/src/strutil.h @@ -0,0 +1,537 @@ +#ifndef MC_STRUTIL_H +#define MC_STRUTIL_H + +/* Header file for strutil.c, strutilascii.c, strutil8bit.c, strutilutf8.c. + * There are two sort of functions: + * 1. functions for working with growing strings and conversion strings between + * different encodings. + * (implemented directly in strutil.c) + * 2. functions, that hide differences between encodings derived from ASCII. + * (implemented separately in strutilascii.c, strutil8bit.c, strutilutf8.c) + * documentation is made for UTF-8 version of functions. + */ + +/* invalid strings + * function, that works with invalid strings are marked with "I" + * in documentation + * invalid bytes of string are handled as one byte characters with width 1, they + * are displayed as questionmarks, I-maked comparing functions try to keep + * the original value of these bytes. + */ + +/* combining characters + * displaynig: all handled as zero with characters, expect combing character + * at the begin of string, this character has with one (space add before), + * so str_term_width is not good for computing width of singles characters + * (never return zero, expect emtpy string) + * for compatibility are strings composed before displaynig + * comparing: comparing decompose all string before comparing, n-compare + * functions do not work as is usual, because same strings do not have to be + * same length in UTF-8. So they return 0 if one string is prefix of the other + * one. + * str_prefix is used to determine, how many characters from one string are + * prefix in second string. However, str_prefix return number of characters in + * decompose form. (used in do_search (screen.c)) + */ +#include + +/* errors for conversion function: + * problem means, that not every characters was successfully converted (They are + * replaced with questionmark). So is impossible convert string back. + * failure means, that conversion is not possible (example: wrong encoding + * of input string) + */ +#define ESTR_PROBLEM 1 +#define ESTR_FAILURE 2 + +/* constanst originally from screen.c + * used for alignment strings on terminal + */ +#define J_LEFT 0x01 +#define J_RIGHT 0x02 +#define J_CENTER 0x03 +// if there is enough space for string on terminal, string is centered +// otherwise is aligned to left +#define J_CENTER_LEFT 0x04 + +#define IS_FIT(x) ((x) & 0x0010) +#define MAKE_FIT(x) ((x) | 0x0010) +#define HIDE_FIT(x) ((x) & 0x000f) + +// fit alignment, if string is to long, is truncated with '~' +#define J_LEFT_FIT 0x11 +#define J_RIGHT_FIT 0x12 +#define J_CENTER_FIT 0x13 +#define J_CENTER_LEFT_FIT 0x14 + +// redefinition of iconv_t, so is not needed include iconv.h in other files. +typedef iconv_t str_conv_t; +#define INVALID_CONV ((iconv_t) (-1)) + +// standard convertors +extern str_conv_t str_cnv_to_term; +extern str_conv_t str_cnv_from_term; +// from terminal encoding to terminal encoding +extern str_conv_t str_cnv_not_convert; + +/* structure for growing strings + * try to avoid set any members manually + */ +struct str_buffer { + // all buffers are stored in linked list + struct str_buffer *next; + // if is buffer in use or not + int used; + // whole string + char *data; + // size of string + size_t size; + // end of string, actual[0] is always '\0' + char *actual; + // how many (chars)bytes remain after actual + size_t remain; +}; + +// all functions in str_class must be defined for every encoding +struct str_class { + int (*vfs_convert_to) (str_conv_t coder, const char *string, + int size, struct str_buffer *buffer); //I + void (*insert_replace_char) (struct str_buffer *buffer); + int (*is_valid_string) (const char *); //I + int (*is_valid_char) (const char *, size_t); //I + void (*cnext_char) (const char **); + void (*cprev_char) (const char **); + void (*cnext_char_safe) (const char **); //I + void (*cprev_char_safe) (const char **); //I + int (*cnext_noncomb_char) (const char **text); //I + int (*cprev_noncomb_char) (const char **text, const char *begin); //I + int (*isspace) (const char *); //I + int (*ispunct) (const char *); //I + int (*isalnum) (const char *); //I + int (*isdigit) (const char *); //I + int (*isprint) (const char *); //I + int (*iscombiningmark) (const char *); //I + int (*length) (const char *); //I + int (*length2) (const char *, int); //I + int (*length_noncomb) (const char *); //I + int (*toupper) (const char *, char **, size_t *); + int (*tolower) (const char *, char **, size_t *); + void (*fix_string) (char *); //I + const char *(*term_form) (const char *); //I + const char *(*fit_to_term) (const char *, int, int); //I + const char *(*term_trim) (const char *text, int width); //I + void (*msg_term_size) (const char *, int *, int *); //I + const char *(*term_substring) (const char *, int, int); //I + int (*term_width1) (const char *); //I + int (*term_width2) (const char *, size_t); //I + int (*term_char_width) (const char *); //I + const char *(*trunc) (const char *, int); //I + int (*offset_to_pos) (const char *, size_t); //I + int (*column_to_pos) (const char *, size_t); //I + char *(*create_search_needle) (const char *, int); + void (*release_search_needle) (char *, int); + const char *(*search_first) (const char *, const char *, int); + const char *(*search_last) (const char *, const char *, int); + int (*compare) (const char *, const char *); //I + int (*ncompare) (const char *, const char *); //I + int (*casecmp) (const char *, const char *); //I + int (*ncasecmp) (const char *, const char *); //I + int (*prefix) (const char *, const char *); //I + int (*caseprefix) (const char *, const char *); //I + char *(*create_key) (const char *text, int case_sen); //I + char *(*create_key_for_filename) (const char *text, int case_sen); //I + int (*key_collate) (const char *t1, const char *t2, int case_sen); //I + void (*release_key) (char *key, int case_sen); //I +}; + +struct str_class str_utf8_init (); +struct str_class str_8bit_init (); +struct str_class str_ascii_init (); + +/* create convertor from "from_enc" to terminal encoding + * if "from_enc" is not supported return INVALID_CONV + */ +str_conv_t str_crt_conv_from (const char *from_enc); + +/* create convertor from terminal encoding to "to_enc" + * if "to_enc" is not supported return INVALID_CONV + */ +str_conv_t str_crt_conv_to (const char *to_enc); + +/* close convertor, do not close str_cnv_to_term, str_cnv_from_term, + * str_cnv_not_convert + */ +void str_close_conv (str_conv_t conv); + +/* return on of not used buffers (.used == 0) or create new + * returned buffer has set .used to 1 + */ +struct str_buffer *str_get_buffer (); + +/* clear buffer, in .data is empty string, .actual = .data, .remain = .size + * do not set .used + */ +void str_reset_buffer (struct str_buffer *buffer); + +/* set .used of buffer to 0, so can be returned by str_get_buffer again + * data in buffer may stay valid after function return + */ +void str_release_buffer (struct str_buffer *buffer); + +/* incrase capacity of buffer + */ +void str_incrase_buffer (struct str_buffer *buffer); + +/* convert string using coder, result of conversion is appended at end of buffer + * return 0 if there was no problem. + * otherwise return ESTR_PROBLEM or ESTR_FAILURE + */ +int str_convert (str_conv_t coder, char *string, + struct str_buffer *buffer); + +/* return only 0 or ESTR_FAILURE, because vfs must be able to convert result to + * original string. (so no replace with questionmark) + * if coder is str_cnv_from_term or str_cnv_not_convert, string is only copied, + * so is possible to show file, that is not valid in terminal encoding + */ +int str_vfs_convert_from (str_conv_t coder, char *string, + struct str_buffer *buffer); + +/* if coder is str_cnv_to_term or str_cnv_not_convert, string is only copied, + * does replace with questionmark + * I + */ +int str_vfs_convert_to (str_conv_t coder, const char *string, + int size, struct str_buffer *buffer); + +/* append string at the end of buffer + */ +void str_insert_string (const char *string, struct str_buffer *buffer); + +/* append string at the end of buffer, limit to size + */ +void +str_insert_string2 (const char *string, int size, struct str_buffer *buffer); + +/* printf functin for str_buffer, append result of printf at the end of buffer + */ +void +str_printf (struct str_buffer *buffer, const char *format, ...); + +/* append char at the end of buffer + */ +void str_insert_char (char ch, struct str_buffer *buffer); + +/* add standard replacement character in terminal encoding + */ +void str_insert_replace_char (struct str_buffer *buffer); + +/* rewind "count" characters buffer back + */ +void str_backward_buffer (struct str_buffer *buffer, int count); + +/* init strings and set terminal encoding, + * if is termenc NULL, detect terminal encoding + * create all str_cnv_* and set functions for terminal encoding + */ +void str_init_strings (const char *termenc); + +/* free all str_buffer and all str_cnv_* + */ +void str_uninit_strings (); + +/* try convert characters in ch to output using conv + * ch_size is size of ch, can by (size_t)(-1) (-1 only for ASCII + * compatible encoding, for other must be set) + * return 0 if conversion was successfully, ESTR_PROBLEM if ch contains only + * part of characters, ESTR_FAILURE if conversion is not possible + */ +int str_translate_char (str_conv_t conv, char *ch, size_t ch_size, + char *output, size_t out_size); + +/* test, if text is valid in terminal encoding + * I + */ +int str_is_valid_string (const char *text); + +/* test, if first char of ch is valid + * size, how many bytes characters occupied, could be (size_t)(-1) + * return 1 if it is valid, -1 if it is invalid or -2 if it is only part of + * multibyte character + * I + */ +int str_is_valid_char (const char *ch, size_t size); + +/* return next characters after text, do not call on the end of string + */ +char *str_get_next_char (char *text); +const char *str_cget_next_char (const char *text); + +/* return previous characters before text, do not call on the start of strings + */ +char *str_get_prev_char (char *text); +const char *str_cget_prev_char (const char *text); + +/* set text to next characters, do not call on the end of string + */ +void str_next_char (char **text); +void str_cnext_char (const char **text); + +/* set text to previous characters, do not call on the start of strings + */ +void str_prev_char (char **text); +void str_cprev_char (const char **text); + +/* return next characters after text, do not call on the end of string + * works with invalid string + * I + */ +char *str_get_next_char_safe (char *text); +const char *str_cget_next_char_safe (const char *text); + +/* return previous characters before text, do not call on the start of strings + * works with invalid string + * I + */ +char *str_get_prev_char_safe (char *text); +const char *str_cget_prev_char_safe (const char *text); + +/* set text to next characters, do not call on the end of string + * works with invalid string + * I + */ +void str_next_char_safe (char **text); +void str_cnext_char_safe (const char **text); + +/* set text to previous characters, do not call on the start of strings + * works with invalid string + * I + */ +void str_prev_char_safe (char **text); +void str_cprev_char_safe (const char **text); + +/* set text to next noncombining characters, check the end of text + * return how many characters was skipped + * works with invalid string + * I + */ +int str_next_noncomb_char (char **text); +int str_cnext_noncomb_char (const char **text); + +/* set text to previous noncombining characters, search stop at begin + * return how many characters was skipped + * works with invalid string + * I + */ +int str_prev_noncomb_char (char **text, const char *begin); +int str_cprev_noncomb_char (const char **text, const char *begin); + +/* if first characters in ch is space, tabulator or new lines + * I + */ +int str_isspace (const char *ch); + +/* if first characters in ch is punctuation or symbol + * I + */ +int str_ispunct (const char *ch); + +/* if first characters in ch is alphanum + * I + */ +int str_isalnum (const char *ch); + +/* if first characters in ch is digit + * I + */ +int str_isdigit (const char *ch); + +/* if first characters in ch is printable + * I + */ +int str_isprint (const char *ch); + +/* if first characters in ch is a combining mark (only in utf-8) + * combining makrs are assumed to be zero width + * I + */ +int str_iscombiningmark (const char *ch); + +/* write lower from of fisrt characters in ch into out + * decrase remain by size of returned characters + * if out is not big enough, do nothing + */ +int str_toupper (const char *ch, char **out, size_t *remain); + +/* write upper from of fisrt characters in ch into out + * decrase remain by size of returned characters + * if out is not big enough, do nothing + */ +int str_tolower (const char *ch, char **out, size_t *remain); + +/* return length of text in characters + * I + */ +int str_length (const char* text); + +/* return length of text in characters, limit to size + * I + */ +int str_length2 (const char* text, int size); + +/* return length of text in characters, count only noncombining characters + * I + */ +int str_length_noncomb (const char* text); + +/* replace all invalid characters in text with questionmark + * after return, text is valid string in terminal encoding + * I + */ +void str_fix_string (char* text); + +/* replace all invalid characters in text with questionmark + * replace all unprintable characters with '.' + * return static allocated string, "text" is not changed + * returned string do not need to be freed + * I + */ +const char *str_term_form (const char *text); + +/* like str_term_form, but text can be alignment to width + * alignment is specified in just_mode (J_LEFT, J_LEFT_FIT, ...) + * result is completed with spaces to width + * I + */ +const char *str_fit_to_term (const char *text, int width, int just_mode); + +/* like str_term_form, but when text is wider than width, three dots are + * inserted at begin and result is completed with suffix of text + * no additional spaces are inserted + * I + */ +const char *str_term_trim (const char *text, int width); + +/* return how many lines and columns will text occupy on terminal + * I + */ +void str_msg_term_size (const char *text, int *lines, int *columns); + +/* like str_term_form, but return only specified substring + * start - column (position) on terminal, where substring begin + * result is completed with spaces to width + * I + */ +const char *str_term_substring (const char *text, int start, int width); + +/* return width, that will be text occupied on terminal + * I + */ +int str_term_width1 (const char *text); + +/* return width, that will be text occupied on terminal + * text is limited by length in characters + * I + */ +int str_term_width2 (const char *text, size_t length); + +/* return width, that will be character occupied on terminal + * combining characters are always zero width + * I + */ +int str_term_char_width (const char *text); + +/* convert position in characters to position in bytes + * I + */ +int str_offset_to_pos (const char* text, size_t length); + +/* convert position on terminal to position in characters + * I + */ +int str_column_to_pos (const char *text, size_t pos); + +/* like str_fit_to_term width just_mode = J_LEFT_FIT, + * but do not insert additional spaces + * I + */ +const char *str_trunc (const char *text, int width); + +/* create needle, that will be searched in str_search_fist/last, + * so needle can be reused + * in UTF-8 return normalized form of needle + */ +char *str_create_search_needle (const char *needle, int case_sen); + +/* free needle returned by str_create_search_needle + */ +void str_release_search_needle (char *needle, int case_sen); + +/* search for first occurrence of search in text + */ +const char *str_search_first (const char *text, const char *needle, int case_sen); + +/* search for last occurrence of search in text + */ +const char *str_search_last (const char *text, const char *needle, int case_sen); + +/* case sensitive compare two strings + * I + */ +int str_compare (const char *t1, const char *t2); + +/* case sensitive compare two strings + * if one string is prefix of the other string, return 0 + * I + */ +int str_ncompare (const char *t1, const char *t2); + +/* case insensitive compare two strings + * I + */ +int str_casecmp (const char *t1, const char *t2); + +/* case insensitive compare two strings + * if one string is prefix of the other string, return 0 + * I + */ +int str_ncasecmp (const char *t1, const char *t2); + +/* return, how many bytes are are same from start in text and prefix + * both strings are decomposed befor comapring and return value is counted + * in decomposed form, too. caling with prefix, prefix, you get size in bytes + * of prefix in decomposed form, + * I + */ +int str_prefix (const char *text, const char *prefix); + +/* case insensitive version of str_prefix + * I + */ +int str_caseprefix (const char *text, const char *prefix); + +/* create a key that is used by str_key_collate + * I + */ +char *str_create_key (const char *text, int case_sen); + +/* create a key that is used by str_key_collate + * should aware dot '.' in text + * I + */ +char *str_create_key_for_filename (const char *text, int case_sen); + +/* compare two string using LC_COLLATE, if is possible + * if case_sen is set, comparing is case sensitive, + * case_sen must be same for str_create_key, str_key_collate and str_release_key + * I + */ +int str_key_collate (const char *t1, const char *t2, int case_sen); + +/* release_key created by str_create_key, only rigth way to release key + * I + */ +void str_release_key (char *key, int case_sen); + +#endif + diff --git a/src/strutil8bit.c b/src/strutil8bit.c new file mode 100644 index 000000000..fd3d4b327 --- /dev/null +++ b/src/strutil8bit.c @@ -0,0 +1,673 @@ +/* 8bit strings utilities + Copyright (C) 2007 Free Software Foundation, Inc. + + Written 2007 by: + Rostislav Benes + + The file_date routine is mostly from GNU's fileutils package, + written by Richard Stallman and David MacKenzie. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include + +#include "global.h" +#include "strutil.h" + +/* functions for singlebyte encodings, all characters have width 1 + * using standard system functions + * there are only small differences between functions in strutil8bit.c + * and strutilascii.c + */ + +static const char replch = '?'; + +static void +str_8bit_insert_replace_char (struct str_buffer *buffer) +{ + str_insert_char (replch, buffer); +} + +static int +str_8bit_is_valid_string (const char *text) +{ + return 1; +} + +static int +str_8bit_is_valid_char (const char *ch, size_t size) +{ + return 1; +} + +static void +str_8bit_cnext_char (const char **text) +{ + (*text)++; +} + +static void +str_8bit_cprev_char (const char **text) +{ + (*text)--; +} + +static int +str_8bit_cnext_noncomb_char (const char **text) +{ + if (*text[0] != '\0') { + (*text)++; + return 1; + } else return 0; +} + +static int +str_8bit_cprev_noncomb_char (const char **text, const char *begin) +{ + if ((*text) != begin) { + (*text)--; + return 1; + } else return 0; +} + +static int +str_8bit_isspace (const char *text) +{ + return isspace (text[0]); +} + +static int +str_8bit_ispunct (const char *text) +{ + return ispunct (text[0]); +} + +static int +str_8bit_isalnum (const char *text) +{ + return isalnum (text[0]); +} + +static int +str_8bit_isdigit (const char *text) +{ + return isdigit (text[0]); +} + +static int +str_8bit_isprint (const char *text) +{ + return isprint (text[0]); +} + +static int +str_8bit_iscombiningmark (const char *text) +{ + return 0; +} + +static int +str_8bit_toupper (const char *text, char **out, size_t *remain) +{ + if (*remain <= 1) return 0; + (*out)[0] = toupper ((unsigned char) text[0]); + (*out)++; + (*remain)--; + return 1; +} + +static int +str_8bit_tolower (const char *text, char **out, size_t *remain) +{ + if (*remain <= 1) return 0; + (*out)[0] = tolower ((unsigned char) text[0]); + (*out)++; + (*remain)--; + return 1; +} + +static int +str_8bit_length (const char *text) +{ + return strlen (text); +} + +static int +str_8bit_length2 (const char *text, int size) +{ + return (size >= 0) ? min (strlen (text), size) : strlen (text); +} + +static int +_str_8bit_vfs_convert_to (str_conv_t coder, char *string, + int size, struct str_buffer *buffer) +{ + int state; + size_t left; + size_t nconv; + + errno = 0; + + state = 0; + + left = (size >= 0) ? size : strlen (string); + + if (coder == (iconv_t) (-1)) return ESTR_FAILURE; + + iconv(coder, NULL, NULL, NULL, NULL); + + while (((int)left) > 0) { + nconv = iconv(coder, &string, &left, + &(buffer->actual), &(buffer->remain)); + if (nconv == (size_t) (-1)) { + switch (errno) { + case EINVAL: + return ESTR_FAILURE; + case EILSEQ: + string++; + left--; + str_insert_char ('?', buffer); + state = ESTR_PROBLEM; + break; + case E2BIG: + str_incrase_buffer (buffer); + break; + } + } + } + return state; +} + +int +str_8bit_vfs_convert_to (str_conv_t coder, const char *string, + int size, struct str_buffer *buffer) +{ + int result; + + if (coder == str_cnv_not_convert) { + str_insert_string2 (string, size, buffer); + result = 0; + } else result = _str_8bit_vfs_convert_to (coder, (char*)string, size, buffer); + buffer->actual[0] = '\0'; + + return result; +} + + +static const char * +str_8bit_term_form (const char *text) +{ + static char result[BUF_MEDIUM]; + char *actual; + size_t remain; + size_t length; + size_t pos = 0; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + + actual[0] = '\0'; + return result; +} + +static const char * +str_8bit_fit_to_term (const char *text, int width, int just_mode) +{ + static char result[BUF_MEDIUM]; + char *actual; + size_t remain; + int ident; + size_t length; + size_t pos = 0; + + length = strlen (text); + actual = result; + remain = sizeof(result); + + if (length <= width) { + ident = 0; + switch (HIDE_FIT (just_mode)) { + case J_CENTER_LEFT: + case J_CENTER: + ident = (width - length) / 2; + break; + case J_RIGHT: + ident = width - length; + break; + } + + if (remain <= ident) goto finally; + memset (actual, ' ', ident); + actual+= ident; + remain-= ident; + + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + if (width - length - ident > 0) { + if (remain <= width - length - ident) goto finally; + memset (actual, ' ', width - length - ident); + actual+= width - length - ident; + remain-= width - length - ident; + } + } else { + if (IS_FIT (just_mode)) { + for (; pos + 1 <= width / 2 && remain > 1; + actual++, pos++, remain--) { + + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + + if (remain <= 1) goto finally; + actual[0] = '~'; + actual++; + remain--; + + pos+= length - width + 1; + + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + } else { + ident = 0; + switch (HIDE_FIT (just_mode)) { + case J_CENTER: + ident = (length - width) / 2; + break; + case J_RIGHT: + ident = length - width; + break; + } + + pos+= ident; + for (; pos < ident + width && remain > 1; + pos++, actual++, remain--) { + + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + + } + } + finally: + actual[0] = '\0'; + return result; +} + +static const char * +str_8bit_term_trim (const char *text, int width) +{ + static char result[BUF_MEDIUM]; + size_t remain; + char *actual; + size_t pos = 0; + size_t length; + + length = strlen (text); + actual = result; + remain = sizeof (result); + + if (width < length) { + if (width <= 3) { + memset (actual, '.', width); + actual+= width; + remain-= width; + } else { + memset (actual, '.', 3); + actual+= 3; + remain-= 3; + + pos+= length - width + 3; + + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + } + } else { + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + } + + actual[0] = '\0'; + return result; +} + +static int +str_8bit_term_width2 (const char *text, size_t length) +{ + return (length != (size_t)(-1)) + ? min (strlen (text), length) + : strlen (text); +} + +static int +str_8bit_term_width1 (const char *text) +{ + return str_8bit_term_width2 (text, (size_t)(-1)); +} + +static int +str_8bit_term_char_width (const char *text) +{ + return 1; +} + +static void +str_8bit_msg_term_size (const char *text, int *lines, int *columns) +{ + (*lines) = 1; + (*columns) = 0; + + char *p, *tmp = g_strdup (text); + char *q; + char c = '\0'; + int width; + p = tmp; + + for (;;) { + q = strchr (p, '\n'); + if (q != NULL) { + c = q[0]; + q[0] = '\0'; + } + + width = str_8bit_term_width1 (p); + if (width > (*columns)) (*columns) = width; + + if (q == NULL) + break; + q[0] = c; + p = q + 1; + (*lines)++; + } + g_free (tmp); +} + +static const char * +str_8bit_term_substring (const char *text, int start, int width) +{ + static char result[BUF_MEDIUM]; + size_t remain; + char *actual; + size_t pos = 0; + size_t length; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + if (start < length) { + pos+= start; + for (; pos < length && width > 0 && remain > 1; + pos++, width--, actual++, remain--) { + + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + } + + for (; width > 0 && remain > 1; actual++, remain--, width--) { + actual[0] = ' '; + } + + actual[0] = '\0'; + return result; +} + +static const char * +str_8bit_trunc (const char *text, int width) +{ + static char result[MC_MAXPATHLEN]; + int remain; + char *actual; + size_t pos = 0; + size_t length; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + if (length > width) { + for (; pos + 1 <= width / 2 && remain > 1; actual++, pos++, remain--) { + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + + if (remain <= 1) goto finally; + actual[0] = '~'; + actual++; + remain--; + + pos+= length - width + 1; + + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + } else { + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isprint (text[pos]) ? text[pos] : '.'; + } + } + + finally: + actual[0] = '\0'; + return result; +} + +static int +str_8bit_offset_to_pos (const char *text, size_t length) +{ + return (int)length; +} + +static int +str_8bit_column_to_pos (const char *text, size_t pos) +{ + return (int)pos; +} + +static char * +str_8bit_create_search_needle (const char *needle, int case_sen) +{ + return (char*) needle; +} + +static void +str_8bit_release_search_needle (char *needle, int case_sen) +{ +} + +static const char * +str_8bit_search_first (const char *text, const char *search, int case_sen) +{ + char *fold_text; + char *fold_search; + const char *match; + size_t offsset; + + fold_text = (case_sen) ? (char*) text : g_strdown (g_strdup (text)); + fold_search = (case_sen) ? (char*) text : g_strdown (g_strdup (search)); + + match = g_strstr_len (fold_text, -1, fold_search); + if (match != NULL) { + offsset = match - fold_text; + match = text + offsset; + } + + if (!case_sen) { + g_free (fold_text); + g_free (fold_search); + } + + return match; +} + +static const char * +str_8bit_search_last (const char *text, const char *search, int case_sen) +{ + char *fold_text; + char *fold_search; + const char *match; + size_t offsset; + + fold_text = (case_sen) ? (char*) text : g_strdown (g_strdup (text)); + fold_search = (case_sen) ? (char*) text : g_strdown (g_strdup (search)); + + match = g_strrstr_len (fold_text, -1, fold_search); + if (match != NULL) { + offsset = match - fold_text; + match = text + offsset; + } + + if (!case_sen) { + g_free (fold_text); + g_free (fold_search); + } + + return match; +} + +static int +str_8bit_compare (const char *t1, const char *t2) +{ + return strcmp (t1, t2); +} + +static int +str_8bit_ncompare (const char *t1, const char *t2) +{ + return strncmp (t1, t2, min (strlen (t1), strlen (t2))); +} + +static int +str_8bit_casecmp (const char *t1, const char *t2) +{ + return g_strcasecmp (t1, t2); +} + +static int +str_8bit_ncasecmp (const char *t1, const char *t2) +{ + return g_strncasecmp (t1, t2, min (strlen (t1), strlen (t2))); +} + +static int +str_8bit_prefix (const char *text, const char *prefix) +{ + int result; + for (result = 0; text[result] != '\0' && prefix[result] != '\0' + && text[result] == prefix[result]; result++); + return result; +} + +static int +str_8bit_caseprefix (const char *text, const char *prefix) +{ + int result; + for (result = 0; text[result] != '\0' && prefix[result] != '\0' + && toupper (text[result]) == toupper (prefix[result]); + result++); + return result; +} + + + +static void +str_8bit_fix_string (char *text) +{ +} + +static char * +str_8bit_create_key (const char *text, int case_sen) +{ + return (case_sen) ? (char*)text : g_strdown (g_strdup (text)); +} + +static int +str_8bit_key_collate (const char *t1, const char *t2, int case_sen) +{ + if (case_sen) return strcmp (t1, t2); + else return strcoll (t1, t2); +} + +static void +str_8bit_release_key (char *key, int case_sen) +{ + if (!case_sen) g_free (key); +} + +struct str_class +str_8bit_init () +{ + struct str_class result; + + result.vfs_convert_to = str_8bit_vfs_convert_to; + result.insert_replace_char = str_8bit_insert_replace_char; + result.is_valid_string = str_8bit_is_valid_string; + result.is_valid_char = str_8bit_is_valid_char; + result.cnext_char = str_8bit_cnext_char; + result.cprev_char = str_8bit_cprev_char; + result.cnext_char_safe = str_8bit_cnext_char; + result.cprev_char_safe = str_8bit_cprev_char; + result.cnext_noncomb_char = str_8bit_cnext_noncomb_char; + result.cprev_noncomb_char = str_8bit_cprev_noncomb_char; + result.isspace = str_8bit_isspace; + result.ispunct = str_8bit_ispunct; + result.isalnum = str_8bit_isalnum; + result.isdigit = str_8bit_isdigit; + result.isprint = str_8bit_isprint; + result.iscombiningmark = str_8bit_iscombiningmark; + result.toupper = str_8bit_toupper; + result.tolower = str_8bit_tolower; + result.length = str_8bit_length; + result.length2 = str_8bit_length2; + result.length_noncomb = str_8bit_length; + result.fix_string = str_8bit_fix_string; + result.term_form = str_8bit_term_form; + result.fit_to_term = str_8bit_fit_to_term; + result.term_trim = str_8bit_term_trim; + result.term_width2 = str_8bit_term_width2; + result.term_width1 = str_8bit_term_width1; + result.term_char_width = str_8bit_term_char_width; + result.msg_term_size = str_8bit_msg_term_size; + result.term_substring = str_8bit_term_substring; + result.trunc = str_8bit_trunc; + result.offset_to_pos = str_8bit_offset_to_pos; + result.column_to_pos = str_8bit_column_to_pos; + result.create_search_needle = str_8bit_create_search_needle; + result.release_search_needle = str_8bit_release_search_needle; + result.search_first = str_8bit_search_first; + result.search_last = str_8bit_search_last; + result.compare = str_8bit_compare; + result.ncompare = str_8bit_ncompare; + result.casecmp = str_8bit_casecmp; + result.ncasecmp = str_8bit_ncasecmp; + result.prefix = str_8bit_prefix; + result.caseprefix = str_8bit_caseprefix; + result.create_key = str_8bit_create_key; + result.create_key_for_filename = str_8bit_create_key; + result.key_collate = str_8bit_key_collate; + result.release_key = str_8bit_release_key; + + return result; +} diff --git a/src/strutilascii.c b/src/strutilascii.c new file mode 100644 index 000000000..5c70b75e4 --- /dev/null +++ b/src/strutilascii.c @@ -0,0 +1,657 @@ +/* ASCII strings utilities + Copyright (C) 2007 Free Software Foundation, Inc. + + Written 2007 by: + Rostislav Benes + + The file_date routine is mostly from GNU's fileutils package, + written by Richard Stallman and David MacKenzie. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include + +#include "global.h" +#include "strutil.h" + +/* using g_ascii function from glib + * on terminal are showed only ascii characters (lower then 0x80) + */ + +static const char replch = '?'; + +static void +str_ascii_insert_replace_char (struct str_buffer *buffer) +{ + str_insert_char (replch, buffer); +} + +static int +str_ascii_is_valid_string (const char *text) +{ + return 1; +} + +static int +str_ascii_is_valid_char (const char *ch, size_t size) +{ + return 1; +} + +static void +str_ascii_cnext_char (const char **text) +{ + (*text)++; +} + +static void +str_ascii_cprev_char (const char **text) +{ + (*text)--; +} + +static int +str_ascii_cnext_noncomb_char (const char **text) +{ + if (*text[0] != '\0') { + (*text)++; + return 1; + } else return 0; +} + +static int +str_ascii_cprev_noncomb_char (const char **text, const char *begin) +{ + if ((*text) != begin) { + (*text)--; + return 1; + } else return 0; +} + +static int +str_ascii_isspace (const char *text) +{ + return g_ascii_isspace ((gchar) text[0]); +} + +static int +str_ascii_ispunct (const char *text) +{ + return g_ascii_ispunct ((gchar) text[0]); +} + +static int +str_ascii_isalnum (const char *text) +{ + return g_ascii_isalnum ((gchar) text[0]); +} + +static int +str_ascii_isdigit (const char *text) +{ + return g_ascii_isdigit ((gchar) text[0]); +} + +static int +str_ascii_isprint (const char *text) +{ + return g_ascii_isprint ((gchar) text[0]); +} + +static int +str_ascii_iscombiningmark (const char *text) +{ + return 0; +} + +static int +str_ascii_toupper (const char *text, char **out, size_t *remain) +{ + if (*remain <= 1) return 0; + (*out)[0] = (char) g_ascii_toupper ((gchar) text[0]); + (*out)++; + (*remain)--; + return 1; +} + +static int +str_ascii_tolower (const char *text, char **out, size_t *remain) +{ + if (*remain <= 1) return 0; + (*out)[0] = (char) g_ascii_tolower ((gchar) text[0]); + (*out)++; + (*remain)--; + return 1; +} + +static int +str_ascii_length (const char *text) +{ + return strlen (text); +} + +static int +str_ascii_length2 (const char *text, int size) +{ + return (size >= 0) ? min (strlen (text), size) : strlen (text); +} + +int +str_ascii_vfs_convert_to (str_conv_t coder, const char *string, + int size, struct str_buffer *buffer) +{ + str_insert_string2 (string, size, buffer); + return 0; +} + + +static const char * +str_ascii_term_form (const char *text) +{ + static char result[BUF_MEDIUM]; + char *actual; + size_t remain; + size_t length; + size_t pos = 0; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + /* go throw all characters and check, if they are ascii and printable */ + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isascii((unsigned char)text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + + actual[0] = '\0'; + return result; +} + +static const char * +str_ascii_fit_to_term (const char *text, int width, int just_mode) +{ + static char result[BUF_MEDIUM]; + char *actual; + size_t remain; + int ident; + size_t length; + size_t pos = 0; + + length = strlen (text); + actual = result; + remain = sizeof(result); + + if (length <= width) { + ident = 0; + switch (HIDE_FIT (just_mode)) { + case J_CENTER_LEFT: + case J_CENTER: + ident = (width - length) / 2; + break; + case J_RIGHT: + ident = width - length; + break; + } + + /* add space before text */ + if (remain <= ident) goto finally; + memset (actual, ' ', ident); + actual+= ident; + remain-= ident; + + /* copy all characters */ + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isascii((unsigned char)text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + + /* add space after text */ + if (width - length - ident > 0) { + if (remain <= width - length - ident) goto finally; + memset (actual, ' ', width - length - ident); + actual+= width - length - ident; + remain-= width - length - ident; + } + } else { + if (IS_FIT (just_mode)) { + /* copy prefix of text, that is not wider than width / 2 */ + for (; pos + 1 <= width / 2 && remain > 1; + actual++, pos++, remain--) { + actual[0] = isascii((unsigned char)text[pos]) + ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) + ? actual[0] : '.'; + } + + if (remain <= 1) goto finally; + actual[0] = '~'; + actual++; + remain--; + + pos+= length - width + 1; + + /* copy suffix of text */ + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isascii((unsigned char)text[pos]) + ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) + ? actual[0] : '.'; + } + } else { + ident = 0; + switch (HIDE_FIT (just_mode)) { + case J_CENTER: + ident = (length - width) / 2; + break; + case J_RIGHT: + ident = length - width; + break; + } + + /* copy substring text, substring start from ident and take width + * characters from text */ + pos+= ident; + for (; pos < ident + width && remain > 1; + pos++, actual++, remain--) { + actual[0] = isascii((unsigned char)text[pos]) + ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) + ? actual[0] : '.'; + } + + } + } + finally: + actual[0] = '\0'; + return result; +} + +static const char * +str_ascii_term_trim (const char *text, int width) +{ + static char result[BUF_MEDIUM]; + size_t remain; + char *actual; + size_t pos = 0; + size_t length; + + length = strlen (text); + actual = result; + remain = sizeof (result); + + if (width < length) { + if (width <= 3) { + memset (actual, '.', width); + actual+= width; + remain-= width; + } else { + memset (actual, '.', 3); + actual+= 3; + remain-= 3; + + pos+= length - width + 3; + + /* copy suffix of text*/ + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isascii((unsigned char)text[pos]) + ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) + ? actual[0] : '.'; + } + } + } else { + /* copy all characters */ + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isascii((unsigned char)text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + } + + actual[0] = '\0'; + return result; +} + +static int +str_ascii_term_width2 (const char *text, size_t length) +{ + return (length != (size_t)(-1)) + ? min (strlen (text), length) + : strlen (text); +} + +static int +str_ascii_term_width1 (const char *text) +{ + return str_ascii_term_width2 (text, (size_t)(-1)); +} + +static int +str_ascii_term_char_width (const char *text) +{ + return 1; +} + +static void +str_ascii_msg_term_size (const char *text, int *lines, int *columns) +{ + (*lines) = 1; + (*columns) = 0; + + char *p, *tmp = g_strdup (text); + char *q; + char c = '\0'; + int width; + p = tmp; + + for (;;) { + q = strchr (p, '\n'); + if (q != NULL) { + c = q[0]; + q[0] = '\0'; + } + + width = str_ascii_term_width1 (p); + if (width > (*columns)) (*columns) = width; + + if (q == NULL) + break; + q[0] = c; + p = q + 1; + (*lines)++; + } + g_free (tmp); +} + +static const char * +str_ascii_term_substring (const char *text, int start, int width) +{ + static char result[BUF_MEDIUM]; + size_t remain; + char *actual; + size_t pos = 0; + size_t length; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + if (start < length) { + pos+= start; + /* copy at most width characters from text from start */ + for (; pos < length && width > 0 && remain > 1; + pos++, width--, actual++, remain--) { + + actual[0] = isascii((unsigned char)text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + } + + /* if text is shorter then width, add space to the end */ + for (; width > 0 && remain > 1; actual++, remain--, width--) { + actual[0] = ' '; + } + + actual[0] = '\0'; + return result; +} + +static const char * +str_ascii_trunc (const char *text, int width) +{ + static char result[MC_MAXPATHLEN]; + int remain; + char *actual; + size_t pos = 0; + size_t length; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + if (length > width) { + /* copy prefix of text */ + for (; pos + 1 <= width / 2 && remain > 1; actual++, pos++, remain--) { + actual[0] = isascii((unsigned char)text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + + if (remain <= 1) goto finally; + actual[0] = '~'; + actual++; + remain--; + + pos+= length - width + 1; + + /* copy suffix of text */ + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isascii((unsigned char)text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + } else { + /* copy all characters */ + for (; pos < length && remain > 1; pos++, actual++, remain--) { + actual[0] = isascii((unsigned char)text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + } + + finally: + actual[0] = '\0'; + return result; +} + +static int +str_ascii_offset_to_pos (const char *text, size_t length) +{ + return (int)length; +} + +static int +str_ascii_column_to_pos (const char *text, size_t pos) +{ + return (int)pos; +} + +static char * +str_ascii_create_search_needle (const char *needle, int case_sen) +{ + return (char*) needle; +} + +static void +str_ascii_release_search_needle (char *needle, int case_sen) +{ +} + +static const char * +str_ascii_search_first (const char *text, const char *search, int case_sen) +{ + char *fold_text; + char *fold_search; + const char *match; + size_t offset; + + fold_text = (case_sen) ? (char*) text : g_ascii_strdown (text, -1); + fold_search = (case_sen) ? (char*) search : g_ascii_strdown (search, -1); + + match = g_strstr_len (fold_text, -1, fold_search); + if (match != NULL) { + offset = match - fold_text; + match = text + offset; + } + + if (!case_sen) { + g_free (fold_text); + g_free (fold_search); + } + + return match; +} + +static const char * +str_ascii_search_last (const char *text, const char *search, int case_sen) +{ + char *fold_text; + char *fold_search; + const char *match; + size_t offset; + + fold_text = (case_sen) ? (char*) text : g_ascii_strdown (text, -1); + fold_search = (case_sen) ? (char*) search : g_ascii_strdown (search, -1); + + match = g_strrstr_len (fold_text, -1, fold_search); + if (match != NULL) { + offset = match - fold_text; + match = text + offset; + } + + if (!case_sen) { + g_free (fold_text); + g_free (fold_search); + } + + return match; +} + +static int +str_ascii_compare (const char *t1, const char *t2) +{ + return strcmp (t1, t2); +} + +static int +str_ascii_ncompare (const char *t1, const char *t2) +{ + return strncmp (t1, t2, min (strlen (t1), strlen(t2))); +} + +static int +str_ascii_casecmp (const char *t1, const char *t2) +{ + return g_ascii_strcasecmp (t1, t2); +} + +static int +str_ascii_ncasecmp (const char *t1, const char *t2) +{ + return g_ascii_strncasecmp (t1, t2, min (strlen (t1), strlen (t2))); +} + +static void +str_ascii_fix_string (char *text) +{ + for (; text[0] != '\0'; text++) { + text[0] = ((unsigned char)text[0] < 128) ? text[0] : '?'; + } +} + +static char * +str_ascii_create_key (const char *text, int case_sen) +{ + return (char*)text; +} + +static int +str_ascii_key_collate (const char *t1, const char *t2, int case_sen) +{ + return (case_sen) ? strcmp (t1, t2) : g_ascii_strcasecmp (t1, t2); +} + +static void +str_ascii_release_key (char *key, int case_sen) +{ +} + +static int +str_ascii_prefix (const char *text, const char *prefix) +{ + int result; + for (result = 0; text[result] != '\0' && prefix[result] != '\0' + && text[result] == prefix[result]; result++); + return result; +} + +static int +str_ascii_caseprefix (const char *text, const char *prefix) +{ + int result; + for (result = 0; text[result] != '\0' && prefix[result] != '\0' + && g_ascii_toupper (text[result]) == g_ascii_toupper (prefix[result]); + result++); + return result; +} + + +struct str_class +str_ascii_init () +{ + struct str_class result; + + result.vfs_convert_to = str_ascii_vfs_convert_to; + result.insert_replace_char = str_ascii_insert_replace_char; + result.is_valid_string = str_ascii_is_valid_string; + result.is_valid_char = str_ascii_is_valid_char; + result.cnext_char = str_ascii_cnext_char; + result.cprev_char = str_ascii_cprev_char; + result.cnext_char_safe = str_ascii_cnext_char; + result.cprev_char_safe = str_ascii_cprev_char; + result.cnext_noncomb_char = str_ascii_cnext_noncomb_char; + result.cprev_noncomb_char = str_ascii_cprev_noncomb_char; + result.isspace = str_ascii_isspace; + result.ispunct = str_ascii_ispunct; + result.isalnum = str_ascii_isalnum; + result.isdigit = str_ascii_isdigit; + result.isprint = str_ascii_isprint; + result.iscombiningmark = str_ascii_iscombiningmark; + result.toupper = str_ascii_toupper; + result.tolower = str_ascii_tolower; + result.length = str_ascii_length; + result.length2 = str_ascii_length2; + result.length_noncomb = str_ascii_length; + result.fix_string = str_ascii_fix_string; + result.term_form = str_ascii_term_form; + result.fit_to_term = str_ascii_fit_to_term; + result.term_trim = str_ascii_term_trim; + result.term_width2 = str_ascii_term_width2; + result.term_width1 = str_ascii_term_width1; + result.term_char_width = str_ascii_term_char_width; + result.msg_term_size = str_ascii_msg_term_size; + result.term_substring = str_ascii_term_substring; + result.trunc = str_ascii_trunc; + result.offset_to_pos = str_ascii_offset_to_pos; + result.column_to_pos = str_ascii_column_to_pos; + result.create_search_needle = str_ascii_create_search_needle; + result.release_search_needle = str_ascii_release_search_needle; + result.search_first = str_ascii_search_first; + result.search_last = str_ascii_search_last; + result.compare = str_ascii_compare; + result.ncompare = str_ascii_ncompare; + result.casecmp = str_ascii_casecmp; + result.ncasecmp = str_ascii_ncasecmp; + result.prefix = str_ascii_prefix; + result.caseprefix = str_ascii_caseprefix; + result.create_key = str_ascii_create_key; + result.create_key_for_filename = str_ascii_create_key; + result.key_collate = str_ascii_key_collate; + result.release_key = str_ascii_release_key; + + return result; +} diff --git a/src/strutilutf8.c b/src/strutilutf8.c new file mode 100644 index 000000000..dca01ab92 --- /dev/null +++ b/src/strutilutf8.c @@ -0,0 +1,1241 @@ +/* UTF-8 strings utilities + Copyright (C) 2007 Free Software Foundation, Inc. + + Written 2007 by: + Rostislav Benes + + The file_date routine is mostly from GNU's fileutils package, + written by Richard Stallman and David MacKenzie. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "global.h" +#include "strutil.h" + +/* using function for utf-8 from glib */ + +static const char replch[] = "\xEF\xBF\xBD"; + +static int +str_unichar_iscombiningmark (gunichar uni) { + int type = g_unichar_type (uni); + return (type == G_UNICODE_COMBINING_MARK) + || (type == G_UNICODE_ENCLOSING_MARK) + || (type == G_UNICODE_NON_SPACING_MARK); +} + +static void +str_utf8_insert_replace_char (struct str_buffer *buffer) +{ + str_insert_string (replch, buffer); +} + +static int +str_utf8_is_valid_string (const char *text) +{ + return g_utf8_validate (text, -1, NULL); +} + +static int +str_utf8_is_valid_char (const char *ch, size_t size) +{ + switch (g_utf8_get_char_validated (ch, size)) { + case (gunichar)(-2): return -2; + case (gunichar)(-1): return -1; + default : return 1; + } +} + +static void +str_utf8_cnext_char (const char **text) +{ + (*text) = g_utf8_next_char (*text); +} + +static void +str_utf8_cprev_char (const char **text) +{ + (*text) = g_utf8_prev_char (*text); +} + +static void +str_utf8_cnext_char_safe (const char **text) +{ + if (str_utf8_is_valid_char (*text, -1) == 1) + (*text) = g_utf8_next_char (*text); + else + (*text)++; +} + +static void +str_utf8_cprev_char_safe (const char **text) +{ + const char *result = g_utf8_prev_char (*text); + const char *t = result; + str_utf8_cnext_char_safe (&t); + if (t == *text) + (*text) = result; + else + (*text)--; +} + +static void +str_utf8_fix_string (char *text) +{ + gunichar uni; + + while (text[0] != '\0') { + uni = g_utf8_get_char_validated (text, -1); + if ((uni != (gunichar)(-1)) && (uni != (gunichar)(-2))) { + text = g_utf8_next_char (text); + } else { + text[0] = '?'; + text++; + } + } +} + +static int +str_utf8_isspace (const char *text) +{ + gunichar uni = g_utf8_get_char_validated (text, -1); + return g_unichar_isspace (uni); +} + +static int +str_utf8_ispunct (const char *text) +{ + gunichar uni = g_utf8_get_char_validated (text, -1); + return g_unichar_ispunct (uni); +} + +static int +str_utf8_isalnum (const char *text) +{ + gunichar uni = g_utf8_get_char_validated (text, -1); + return g_unichar_isalnum (uni); +} + +static int +str_utf8_isdigit (const char *text) +{ + gunichar uni = g_utf8_get_char_validated (text, -1); + return g_unichar_isdigit (uni); +} + +static int +str_utf8_isprint (const char *ch) +{ + gunichar uni = g_utf8_get_char_validated (ch, -1); + return g_unichar_isprint (uni); +} + +static int +str_utf8_iscombiningmark (const char *ch) +{ + gunichar uni = g_utf8_get_char_validated (ch, -1); + return str_unichar_iscombiningmark (uni); +} + +static int +str_utf8_cnext_noncomb_char (const char **text) +{ + int count = 0; + while ((*text)[0] != '\0') { + str_utf8_cnext_char_safe (text); + count++; + if (!str_utf8_iscombiningmark (*text)) break; + } + return count; +} + +static int +str_utf8_cprev_noncomb_char (const char **text, const char *begin) +{ + int count = 0; + while ((*text) != begin) { + str_utf8_cprev_char_safe (text); + count++; + if (!str_utf8_iscombiningmark (*text)) break; + } + return count; +} + +static int +str_utf8_toupper (const char *text, char **out, size_t *remain) +{ + gunichar uni; + size_t left; + + uni = g_utf8_get_char_validated (text, -1); + if (uni == (gunichar)(-1) || uni == (gunichar)(-2)) return 0; + + uni = g_unichar_toupper (uni); + left = g_unichar_to_utf8 (uni, NULL); + if (left >= *remain) return 0; + + left = g_unichar_to_utf8 (uni, *out); + (*out)+= left; + (*remain)-= left; + return 1; +} + +static int +str_utf8_tolower (const char *text, char **out, size_t *remain) +{ + gunichar uni; + size_t left; + + uni = g_utf8_get_char_validated (text, -1); + if (uni == (gunichar)(-1) || uni == (gunichar)(-2)) return 0; + + uni = g_unichar_tolower (uni); + left = g_unichar_to_utf8 (uni, NULL); + if (left >= *remain) return 0; + + left = g_unichar_to_utf8 (uni, *out); + (*out)+= left; + (*remain)-= left; + return 1; +} + +static int +str_utf8_length (const char* text) +{ + int result = 0; + const char *start; + const char *end; + + start = text; + while (!g_utf8_validate (start, -1, &end) && start[0] != '\0') { + if (start != end) { + result+= g_utf8_strlen (start, end - start); + } + result++; + start = end + 1; + } + + if (start == text) { + result = g_utf8_strlen (text, -1); + } else { + if (start[0] != '\0' && start != end) { + result+= g_utf8_strlen (start, end - start); + } + } + + return result; +} + +static int +str_utf8_length2 (const char* text, int size) +{ + int result = 0; + const char *start; + const char *end; + + start = text; + while (!g_utf8_validate (start, -1, &end) && start[0] != '\0' && size > 0) { + if (start != end) { + result+= g_utf8_strlen (start, min (end - start, size)); + size-= end - start; + } + result+= (size > 0); + size--; + start = end + 1; + } + + if (start == text) { + result = g_utf8_strlen (text, size); + } else { + if (start[0] != '\0' && start != end && size > 0) { + result+= g_utf8_strlen (start, min (end - start, size)); + } + } + + return result; +} + +static int +str_utf8_length_noncomb (const char *text) +{ + int result = 0; + const char *t = text; + + while (t[0] != '\0') { + str_utf8_cnext_noncomb_char (&t); + result++; + } + + return result; +} + +static void +str_utf8_questmark_sustb (char **string, size_t *left, struct str_buffer *buffer) +{ + char *next = g_utf8_next_char (*string); + (*left)-= next - (*string); + (*string) = next; + str_insert_char ('?', buffer); +} + +static int +_str_utf8_vfs_convert_to (str_conv_t coder, const char *string, + int size, struct str_buffer *buffer) +{ + int state = 0; + size_t left; + size_t nconv; + char *composed, *c; + const char *start, *end; + + errno = 0; + + size = (size >= 0) ? size : strlen (string); + if (coder == (iconv_t) (-1)) return ESTR_FAILURE; + iconv(coder, NULL, NULL, NULL, NULL); + + start = string; + while (size > 0) { + end = strchr (start, PATH_SEP); + end = (end == NULL || end >= start + size) ? start + size : end + 1; + if (g_utf8_validate (start, end - start, NULL)) { + c = composed = g_utf8_normalize (start, end - start, G_NORMALIZE_DEFAULT_COMPOSE); + left = strlen (composed); + while (((int)left) > 0) { + nconv = iconv(coder, &c, &left, &(buffer->actual), &(buffer->remain)); + if (nconv == (size_t) (-1)) { + switch (errno) { + case EINVAL: + g_free (composed); + return ESTR_FAILURE; + case EILSEQ: + str_utf8_questmark_sustb (&c, &left, buffer); + state = ESTR_PROBLEM; + break; + case E2BIG: + str_incrase_buffer (buffer); + break; + } + } + } + g_free (composed); + } else { + str_insert_string2 (start, end - start, buffer); + } + size-= end - start; + start = end; + } + return state; +} + +static int +str_utf8_vfs_convert_to (str_conv_t coder, const char *string, + int size, struct str_buffer *buffer) +{ + int result; + + if (coder == str_cnv_not_convert) { + str_insert_string2 (string, size, buffer); + result = 0; + } else result = _str_utf8_vfs_convert_to (coder, string, size, buffer); + buffer->actual[0] = '\0'; + +return result; +} + +struct term_form { + char text[BUF_MEDIUM * 6]; + size_t width; + int compose; +}; + +/* utiliti function, that make string valid in utf8 and all characters printable + * return width of string too*/ +static const struct term_form * +str_utf8_make_make_term_form (const char *text, size_t length) +{ + static struct term_form result; + gunichar uni; + size_t left; + char *actual; + + result.text[0] = '\0'; + result.width = 0; + result.compose = 0; + actual = result.text; + + /* check if text start with combining character, + * add space at begin in this case */ + if (length != 0 && text[0] != '\0') { + uni = g_utf8_get_char_validated (text, -1); + if ((uni != (gunichar)(-1)) && (uni != (gunichar)(-2))) { + if (str_unichar_iscombiningmark (uni)) { + actual[0] = ' '; + actual++; + result.width++; + result.compose = 1; + } + } + } + + while (length != 0 && text[0] != '\0') { + uni = g_utf8_get_char_validated (text, -1); + if ((uni != (gunichar)(-1)) && (uni != (gunichar)(-2))) { + if (g_unichar_isprint(uni)) { + left = g_unichar_to_utf8 (uni, actual); + actual+= left; + if (!str_unichar_iscombiningmark (uni)) { + result.width++; + if (g_unichar_iswide(uni)) result.width++; + } else result.compose = 1; + } else { + actual[0] = '.'; + actual++; + result.width++; + } + text = g_utf8_next_char (text); + } else { + text++; + //actual[0] = '?'; + memcpy (actual, replch, strlen (replch)); + actual+= strlen (replch); + result.width++; + } + if (length != (size_t) (-1)) length--; + } + actual[0] = '\0'; + + return &result; +} + +static const char * +str_utf8_term_form (const char *text) +{ + static char result[BUF_MEDIUM * 6]; + const struct term_form *pre_form; + char *composed; + + pre_form = str_utf8_make_make_term_form (text, (size_t)(-1)); + if (pre_form->compose) { + composed = g_utf8_normalize (pre_form->text, -1, G_NORMALIZE_DEFAULT_COMPOSE); + g_strlcpy (result, composed, sizeof (result)); + g_free (composed); + } else { + g_strlcpy (result, pre_form->text, sizeof (result)); + } + return result; +} + +struct utf8_tool { + char *actual; + size_t remain; + const char *cheked; + int ident; + int compose; +}; + +/* utiliti function, that copy all characters from cheked to actual */ +static int +utf8_tool_copy_chars_to_end (struct utf8_tool *tool) +{ + size_t left; + gunichar uni; + + while (tool->cheked[0] != '\0') { + uni = g_utf8_get_char (tool->cheked); + tool->compose|= str_unichar_iscombiningmark (uni); + left = g_unichar_to_utf8 (uni, NULL); + if (tool->remain <= left) return 0; + left = g_unichar_to_utf8 (uni, tool->actual); + tool->actual+= left; + tool->remain-= left; + tool->cheked = g_utf8_next_char (tool->cheked); + } + return 1; +} + +/* utiliti function, that copy characters from cheked to actual until ident is + * smaller than to_ident */ +static int +utf8_tool_copy_chars_to (struct utf8_tool *tool, int to_ident) +{ + size_t left; + gunichar uni; + int w; + + while (tool->cheked[0] != '\0') { + uni = g_utf8_get_char (tool->cheked); + if (!str_unichar_iscombiningmark (uni)) { + w = 1; + if (g_unichar_iswide (uni)) w++; + if (tool->ident + w > to_ident) return 1; + } else { + w = 0; + tool->compose = 1; + } + + left = g_unichar_to_utf8 (uni, NULL); + if (tool->remain <= left) return 0; + left = g_unichar_to_utf8 (uni, tool->actual); + tool->actual+= left; + tool->remain-= left; + tool->cheked = g_utf8_next_char (tool->cheked); + tool->ident+= w; + } + return 1; +} + +/* utiliti function, add count spaces to actual */ +static int +utf8_tool_insert_space (struct utf8_tool *tool, int count) +{ + if (count <= 0) return 1; + if (tool->remain <= count) return 0; + memset (tool->actual, ' ', count); + tool->actual+= count; + tool->remain-= count; + return 1; +} + +/* utiliti function, add one characters to actual */ +static int +utf8_tool_insert_char (struct utf8_tool *tool, char ch) +{ + if (tool->remain <= 1) return 0; + tool->actual[0] = ch; + tool->actual++; + tool->remain--; + return 1; +} + +/* utiliti function, thah skip characters from cheked until ident is greater or + * equal to to_ident */ +static int +utf8_tool_skip_chars_to (struct utf8_tool *tool, int to_ident) +{ + gunichar uni; + + while (to_ident > tool->ident && tool->cheked[0] != '\0') { + uni = g_utf8_get_char (tool->cheked); + if (!str_unichar_iscombiningmark (uni)) { + tool->ident++; + if (g_unichar_iswide (uni)) tool->ident++; + } + tool->cheked = g_utf8_next_char (tool->cheked); + } + uni = g_utf8_get_char (tool->cheked); + while (str_unichar_iscombiningmark (uni)) { + tool->cheked = g_utf8_next_char (tool->cheked); + uni = g_utf8_get_char (tool->cheked); + } + return 1; +} + +static void +utf8_tool_compose (char *buffer, size_t size) +{ + char *composed = g_utf8_normalize (buffer, -1, G_NORMALIZE_DEFAULT_COMPOSE); + g_strlcpy (buffer, composed, size); + g_free (composed); +} + + +static const char * +str_utf8_fit_to_term (const char *text, int width, int just_mode) +{ + static char result[BUF_MEDIUM * 6]; + const struct term_form *pre_form; + struct utf8_tool tool; + + pre_form = str_utf8_make_make_term_form (text, (size_t)(-1)); + tool.cheked = pre_form->text; + tool.actual = result; + tool.remain = sizeof(result); + + if (pre_form->width <= width) { + tool.ident = 0; + switch (HIDE_FIT (just_mode)) { + case J_CENTER_LEFT: + case J_CENTER: + tool.ident = (width - pre_form->width) / 2; + break; + case J_RIGHT: + tool.ident = width - pre_form->width; + break; + } + + utf8_tool_insert_space (&tool, tool.ident); + utf8_tool_copy_chars_to_end (&tool); + utf8_tool_insert_space (&tool, width - pre_form->width - tool.ident); + } else { + if (IS_FIT (just_mode)) { + tool.ident = 0; + utf8_tool_copy_chars_to (&tool, width / 2); + utf8_tool_insert_char (&tool, '~'); + + tool.ident = 0; + utf8_tool_skip_chars_to (&tool, pre_form->width - width + 1); + utf8_tool_copy_chars_to_end (&tool); + utf8_tool_insert_space (&tool, + width - (pre_form->width - tool.ident + 1)); + } else { + tool.ident = 0; + switch (HIDE_FIT (just_mode)) { + case J_CENTER: + tool.ident = (width - pre_form->width) / 2; + break; + case J_RIGHT: + tool.ident = width - pre_form->width; + break; + } + + utf8_tool_skip_chars_to (&tool, 0); + utf8_tool_insert_space (&tool, tool.ident); + utf8_tool_copy_chars_to (&tool, width); + utf8_tool_insert_space (&tool, width - tool.ident); + } + } + + tool.actual[0] = '\0'; + if (tool.compose) utf8_tool_compose (result, sizeof (result)); + return result; +} + +static const char * +str_utf8_term_trim (const char *text, int width) +{ + static char result[BUF_MEDIUM * 6]; + const struct term_form *pre_form; + struct utf8_tool tool; + + pre_form = str_utf8_make_make_term_form (text, (size_t)(-1)); + + tool.cheked = pre_form->text; + tool.actual = result; + tool.remain = sizeof(result); + + if (width < pre_form->width) { + if (width <= 3) { + memset (tool.actual, '.', width); + tool.actual+= width; + tool.remain-= width; + } else { + memset (tool.actual, '.', 3); + tool.actual+= 3; + tool.remain-= 3; + + tool.ident = 0; + utf8_tool_skip_chars_to (&tool, pre_form->width - width + 3); + utf8_tool_copy_chars_to_end (&tool); + } + } else { + utf8_tool_copy_chars_to_end (&tool); + } + + tool.actual[0] = '\0'; + if (tool.compose) utf8_tool_compose (result, sizeof (result)); + return result; +} + +static int +str_utf8_term_width2 (const char *text, size_t length) +{ + const struct term_form *result; + + result = str_utf8_make_make_term_form (text, length); + return result->width; +} + +static int +str_utf8_term_width1 (const char *text) +{ + return str_utf8_term_width2 (text, (size_t)(-1)); +} + +static int +str_utf8_term_char_width (const char *text) +{ + gunichar uni = g_utf8_get_char_validated (text, -1); + return (str_unichar_iscombiningmark (uni)) ? 0 + : ((g_unichar_iswide (uni)) ? 2 : 1); +} + +static void +str_utf8_msg_term_size (const char *text, int *lines, int *columns) +{ + (*lines) = 1; + (*columns) = 0; + + char *p, *tmp = g_strdup (text); + char *q; + char c = '\0'; + int width; + + p = tmp; + for (;;) { + q = strchr (p, '\n'); + if (q != NULL) { + c = q[0]; + q[0] = '\0'; + } + + width = str_utf8_term_width1 (p); + if (width > (*columns)) (*columns) = width; + + if (q == NULL) + break; + q[0] = c; + p = q + 1; + (*lines)++; + } + g_free (tmp); +} + +static const char * +str_utf8_term_substring (const char *text, int start, int width) +{ + static char result[BUF_MEDIUM * 6]; + const struct term_form *pre_form; + struct utf8_tool tool; + + pre_form = str_utf8_make_make_term_form (text, (size_t)(-1)); + + tool.cheked = pre_form->text; + tool.actual = result; + tool.remain = sizeof(result); + + tool.ident = -start; + utf8_tool_skip_chars_to (&tool, 0); + if (tool.ident < 0) tool.ident = 0; + utf8_tool_insert_space (&tool, tool.ident); + + utf8_tool_copy_chars_to (&tool, width); + utf8_tool_insert_space (&tool, width - tool.ident); + + tool.actual[0] = '\0'; + if (tool.compose) utf8_tool_compose (result, sizeof (result)); + return result; +} + +static const char * +str_utf8_trunc (const char *text, int width) +{ + static char result[MC_MAXPATHLEN * 6 * 2]; + const struct term_form *pre_form; + struct utf8_tool tool; + + pre_form = str_utf8_make_make_term_form (text, (size_t)(-1)); + + tool.cheked = pre_form->text; + tool.actual = result; + tool.remain = sizeof(result); + + if (pre_form->width > width) { + tool.ident = 0; + utf8_tool_copy_chars_to (&tool, width / 2); + utf8_tool_insert_char (&tool, '~'); + + tool.ident = 0; + utf8_tool_skip_chars_to (&tool, pre_form->width - width + 1); + utf8_tool_copy_chars_to_end (&tool); + } else { + utf8_tool_copy_chars_to_end (&tool); + } + + tool.actual[0] = '\0'; + if (tool.compose) utf8_tool_compose (result, sizeof (result)); + return result; +} + +static int +str_utf8_offset_to_pos (const char *text, size_t length) +{ + if (str_utf8_is_valid_string (text)) + return g_utf8_offset_to_pointer (text, length) - text; + else { + int result; + struct str_buffer *buffer = str_get_buffer (); + str_insert_string (text, buffer); + str_utf8_fix_string (buffer->data); + result = g_utf8_offset_to_pointer (buffer->data, length) - buffer->data; + str_release_buffer (buffer); + return result; + } +} + +static int +str_utf8_column_to_pos (const char *text, size_t pos) +{ + static int result; + gunichar uni; + int width; + + width = 0; + result = 0; + + while (text[0] != '\0') { + uni = g_utf8_get_char_validated (text, 6); + if ((uni != (gunichar)(-1)) && (uni != (gunichar)(-2))) { + if (g_unichar_isprint(uni)) { + if (!str_unichar_iscombiningmark (uni)) { + width++; + if (g_unichar_iswide (uni)) width++; + } + } else { + width++; + } + text = g_utf8_next_char (text); + } else { + text++; + width++; + } + if (width > pos) return result; + + result++; + } + + return result; +} + +static char * +str_utf8_create_search_needle (const char *needle, int case_sen) +{ + if (needle != NULL) { + if (case_sen) { + return g_utf8_normalize (needle, -1, G_NORMALIZE_ALL); + } else { + char *fold = g_utf8_casefold (needle, -1); + char *result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); + g_free (fold); + return result; + } + } else return NULL; +} + +static void +str_utf8_release_search_needle (char *needle, int case_sen) +{ + if (needle != NULL) g_free (needle); +} + +static const char * +str_utf8_search_first (const char *text, const char *search, int case_sen) +{ + char *fold_text; + char *deco_text; + const char *match; + const char *result = NULL; + const char *m; + + fold_text = (case_sen) ? (char*)text : g_utf8_casefold (text, -1); + deco_text = g_utf8_normalize (fold_text, -1, G_NORMALIZE_ALL); + + match = deco_text; + do { + match = g_strstr_len (match, -1, search); + if (match != NULL) { + if ((!str_utf8_iscombiningmark (match) || (match == deco_text)) && + !str_utf8_iscombiningmark (match + strlen (search))) { + + result = text; + m = deco_text; + while (m < match) { + str_utf8_cnext_noncomb_char (&m); + str_utf8_cnext_noncomb_char (&result); + } + } else { + str_utf8_cnext_char (&match); + } + } + } while (match != NULL && result == NULL); + + g_free (deco_text); + if (!case_sen) g_free (fold_text); + + return result; +} + +static const char * +str_utf8_search_last (const char *text, const char *search, int case_sen) +{ + char *fold_text; + char *deco_text; + char *match; + const char *result = NULL; + const char *m; + + fold_text = (case_sen) ? (char*)text : g_utf8_casefold (text, -1); + deco_text = g_utf8_normalize (fold_text, -1, G_NORMALIZE_ALL); + + do { + match = g_strrstr_len (deco_text, -1, search); + if (match != NULL) { + if ((!str_utf8_iscombiningmark (match) || (match == deco_text)) && + !str_utf8_iscombiningmark (match + strlen (search))) { + + result = text; + m = deco_text; + while (m < match) { + str_utf8_cnext_noncomb_char (&m); + str_utf8_cnext_noncomb_char (&result); + } + } else { + match[0] = '\0'; + } + } + } while (match != NULL && result == NULL); + + g_free (deco_text); + if (!case_sen) g_free (fold_text); + + return result; +} + +static char * +str_utf8_normalize (const char *text) +{ + struct str_buffer *fixed = str_get_buffer (); + char *tmp; + char *result; + const char *start; + const char *end; + + start = text; + while (!g_utf8_validate (start, -1, &end) && start[0] != '\0') { + if (start != end) { + tmp = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL); + str_insert_string (tmp, fixed); + g_free (tmp); + } + str_insert_char (end[0], fixed); + start = end + 1; + } + + if (start == text) { + result = g_utf8_normalize (text, -1, G_NORMALIZE_ALL); + } else { + if (start[0] != '\0' && start != end) { + tmp = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL); + str_insert_string (tmp, fixed); + g_free (tmp); + } + result = g_strdup (fixed->data); + } + + str_release_buffer (fixed); + + return result; +} + +static char * +str_utf8_casefold_normalize (const char *text) +{ + struct str_buffer *fixed = str_get_buffer (); + char *tmp, *fold; + char *result; + const char *start; + const char *end; + + start = text; + while (!g_utf8_validate (start, -1, &end) && start[0] != '\0') { + if (start != end) { + fold = g_utf8_casefold (start, end - start); + tmp = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); + str_insert_string (tmp, fixed); + g_free (tmp); + g_free (fold); + } + str_insert_char (end[0], fixed); + start = end + 1; + } + + if (start == text) { + fold = g_utf8_casefold (text, -1); + result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); + g_free (fold); + } else { + if (start[0] != '\0' && start != end) { + fold = g_utf8_casefold (start, end - start); + tmp = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); + str_insert_string (tmp, fixed); + g_free (tmp); + g_free (fold); + } + result = g_strdup (fixed->data); + } + + str_release_buffer (fixed); + + return result; +} + +static int +str_utf8_compare (const char *t1, const char *t2) +{ + char *n1, *n2; + int result; + + n1 = str_utf8_normalize (t1); + n2 = str_utf8_normalize (t2); + + result = strcmp (n1, n2); + + g_free (n1); + g_free (n2); + + return result; +} + +static int +str_utf8_ncompare (const char *t1, const char *t2) +{ + char *n1, *n2; + int result; + + n1 = str_utf8_normalize (t1); + n2 = str_utf8_normalize (t2); + + result = strncmp (n1, n2, min (strlen (n1), strlen (n2))); + + g_free (n1); + g_free (n2); + + return result; +} + +static int +str_utf8_casecmp (const char *t1, const char *t2) +{ + char *n1, *n2; + int result; + + n1 = str_utf8_casefold_normalize (t1); + n2 = str_utf8_casefold_normalize (t2); + + result = strcmp (n1, n2); + + g_free (n1); + g_free (n2); + + return result; +} + +static int +str_utf8_ncasecmp (const char *t1, const char *t2) +{ + char *n1, *n2; + int result; + + n1 = str_utf8_casefold_normalize (t1); + n2 = str_utf8_casefold_normalize (t2); + + result = strncmp (n1, n2, min (strlen (n1), strlen (n2))); + + g_free (n1); + g_free (n2); + + return result; +} + +static int +str_utf8_prefix (const char *text, const char *prefix) +{ + char *t = str_utf8_normalize (text); + char *p = str_utf8_normalize (prefix); + const char *nt = t; + const char *np = p; + const char *nnt = t; + const char *nnp = p; + int result; + + while (nt[0] != '\0' && np[0] != '\0') { + str_utf8_cnext_char_safe (&nnt); + str_utf8_cnext_char_safe (&nnp); + if (nnt - nt != nnp - np) break; + if (strncmp (nt, np, nnt - nt) != 0) break; + nt = nnt; + np = nnp; + } + + result = np - p; + + g_free (t); + g_free (p); + + return result; +} + +static int +str_utf8_caseprefix (const char *text, const char *prefix) +{ + char *t = str_utf8_casefold_normalize (text); + char *p = str_utf8_casefold_normalize (prefix); + const char *nt = t; + const char *np = p; + const char *nnt = t; + const char *nnp = p; + int result; + + while (nt[0] != '\0' && np[0] != '\0') { + str_utf8_cnext_char_safe (&nnt); + str_utf8_cnext_char_safe (&nnp); + if (nnt - nt != nnp - np) break; + if (strncmp (nt, np, nnt - nt) != 0) break; + nt = nnt; + np = nnp; + } + + result = np - p; + + g_free (t); + g_free (p); + + return result; +} + +static char * +str_utf8_create_key_gen (const char *text, int case_sen, + gchar *(*keygen) (const gchar *, gssize size)) +{ + char *result; + + if (case_sen) { + result = str_utf8_normalize (text); + } else { + const char *start, *end; + char *fold, *key; + struct str_buffer *fixed = str_get_buffer (); + + start = text; + while (!g_utf8_validate (start, -1, &end) && start[0] != '\0') { + if (start != end) { + fold = g_utf8_casefold (start, end - start); + key = keygen (fold, -1); + str_insert_string (key, fixed); + g_free (key); + g_free (fold); + } + str_insert_char (end[0], fixed); + start = end + 1; + } + + if (start == text) { + fold = g_utf8_casefold (text, -1); + result = keygen (fold, -1); + g_free (fold); + } else { + if (start[0] != '\0' && start != end) { + fold = g_utf8_casefold (start, end - start); + key = keygen (fold, -1); + str_insert_string (key, fixed); + g_free (key); + g_free (fold); + } + result = g_strdup (fixed->data); + } + str_release_buffer (fixed); + } + return result; +} + +static char * +str_utf8_create_key (const char *text, int case_sen) +{ + return str_utf8_create_key_gen (text, case_sen, g_utf8_collate_key); +} + +static char * +str_utf8_create_key_for_filename (const char *text, int case_sen) +{ + return str_utf8_create_key_gen (text, case_sen, g_utf8_collate_key_for_filename); +} + +static int +str_utf8_key_collate (const char *t1, const char *t2, int case_sen) +{ + return strcmp (t1, t2); +} + +static void +str_utf8_release_key (char *key, int case_sen) +{ + g_free (key); +} + +struct str_class +str_utf8_init () +{ + struct str_class result; + + result.vfs_convert_to = str_utf8_vfs_convert_to; + result.insert_replace_char = str_utf8_insert_replace_char; + result.is_valid_string = str_utf8_is_valid_string; + result.is_valid_char = str_utf8_is_valid_char; + result.cnext_char = str_utf8_cnext_char; + result.cprev_char = str_utf8_cprev_char; + result.cnext_char_safe = str_utf8_cnext_char_safe; + result.cprev_char_safe = str_utf8_cprev_char_safe; + result.cnext_noncomb_char = str_utf8_cnext_noncomb_char; + result.cprev_noncomb_char = str_utf8_cprev_noncomb_char; + result.isspace = str_utf8_isspace; + result.ispunct = str_utf8_ispunct; + result.isalnum = str_utf8_isalnum; + result.isdigit = str_utf8_isdigit; + result.isprint = str_utf8_isprint; + result.iscombiningmark = str_utf8_iscombiningmark; + result.toupper = str_utf8_toupper; + result.tolower = str_utf8_tolower; + result.length = str_utf8_length; + result.length2 = str_utf8_length2; + result.length_noncomb = str_utf8_length_noncomb; + result.fix_string = str_utf8_fix_string; + result.term_form = str_utf8_term_form; + result.fit_to_term = str_utf8_fit_to_term; + result.term_trim = str_utf8_term_trim; + result.term_width2 = str_utf8_term_width2; + result.term_width1 = str_utf8_term_width1; + result.term_char_width = str_utf8_term_char_width; + result.msg_term_size = str_utf8_msg_term_size; + result.term_substring = str_utf8_term_substring; + result.trunc = str_utf8_trunc; + result.offset_to_pos = str_utf8_offset_to_pos; + result.column_to_pos = str_utf8_column_to_pos; + result.create_search_needle = str_utf8_create_search_needle; + result.release_search_needle = str_utf8_release_search_needle; + result.search_first = str_utf8_search_first; + result.search_last = str_utf8_search_last; + result.compare = str_utf8_compare; + result.ncompare = str_utf8_ncompare; + result.casecmp = str_utf8_casecmp; + result.ncasecmp = str_utf8_ncasecmp; + result.prefix = str_utf8_prefix; + result.caseprefix = str_utf8_caseprefix; + result.create_key = str_utf8_create_key; + result.create_key_for_filename = str_utf8_create_key_for_filename; + result.key_collate = str_utf8_key_collate; + result.release_key = str_utf8_release_key; + + return result; +}