From 3934c6fb8e08914e3d45fe1d0a1091e1c449dc0b Mon Sep 17 00:00:00 2001 From: Slava Zanko Date: Fri, 22 Oct 2010 10:01:48 +0300 Subject: [PATCH] Ticket #2396 (Find File "Whole words" search bug) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when searching in files for non english word with "Whole words" set "on" - then nothig will be found try search word: "время" and also "time" in next example: 'time' Время 'Time' 'время' Signed-off-by: Slava Zanko Signed-off-by: Andrew Borodin --- lib/search/normal.c | 9 ++++++--- lib/search/regex.c | 23 +++++++++++++++++++---- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/lib/search/normal.c b/lib/search/normal.c index c2c8ea734..fbb6790cc 100644 --- a/lib/search/normal.c +++ b/lib/search/normal.c @@ -92,9 +92,12 @@ mc_search__cond_struct_new_init_normal (const char *charset, mc_search_t * lc_mc mc_search__normal_translate_to_regex (mc_search_cond->str->str, &mc_search_cond->len); g_string_free (mc_search_cond->str, TRUE); - if (lc_mc_search->whole_words) { - g_string_prepend (tmp, "\\b"); - g_string_append (tmp, "\\b"); + if (lc_mc_search->whole_words) + { + /* NOTE: \b as word boundary doesn't allow search + * whole words with non-ASCII symbols */ + g_string_prepend (tmp, "(^|[^\\p{L}\\p{N}_])("); + g_string_append (tmp, ")([^\\p{L}\\p{N}_]|$)"); } mc_search_cond->str = tmp; diff --git a/lib/search/regex.c b/lib/search/regex.c index 40e9599a2..f19e1a2ca 100644 --- a/lib/search/regex.c +++ b/lib/search/regex.c @@ -58,7 +58,7 @@ typedef enum /*** file scope functions ************************************************************************/ static gboolean -mc_search__regex_str_append_if_special (GString * copy_to, GString * regex_str, gsize * offset) +mc_search__regex_str_append_if_special (GString * copy_to, const GString * regex_str, gsize * offset) { char *tmp_regex_str; gsize spec_chr_len; @@ -622,10 +622,25 @@ mc_search__run_regex (mc_search_t * lc_mc_search, const void *user_data, { case COND__FOUND_OK: #ifdef SEARCH_TYPE_GLIB - g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos); + if (lc_mc_search->whole_words) + { + g_match_info_fetch_pos (lc_mc_search->regex_match_info, 2, &start_pos, &end_pos); + } + else + { + g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos); + } #else /* SEARCH_TYPE_GLIB */ - start_pos = lc_mc_search->iovector[0]; - end_pos = lc_mc_search->iovector[1]; + if (lc_mc_search->whole_words) + { + start_pos = lc_mc_search->iovector[4]; + end_pos = lc_mc_search->iovector[5]; + } + else + { + start_pos = lc_mc_search->iovector[0]; + end_pos = lc_mc_search->iovector[1]; + } #endif /* SEARCH_TYPE_GLIB */ if (found_len) *found_len = end_pos - start_pos;