chars: implement mblen() ourselves, for efficiency
Most implementations of mblen() do a call to mbtowc(), which is a waste of time when all we want to know is the number of bytes (and when we already know that we're using UTF-8 and that the first byte is at least 0xC2). (This also avoids burdening correct implementations with the workaround that was needed only for glibc.) Code was written after looking at gnulib/lib/mbrtowc-impl-utf8.h.
Этот коммит содержится в:
родитель
e3f46b066a
Коммит
b020937475
34
src/chars.c
34
src/chars.c
@ -233,21 +233,39 @@ char *make_mbchar(long code, int *length)
|
||||
}
|
||||
#endif /* ENABLE_UTF8 */
|
||||
|
||||
/* Return the length (in bytes) of the character located at *pointer. */
|
||||
/* Return the number of bytes in the character that starts at *pointer. */
|
||||
int char_length(const char *pointer)
|
||||
{
|
||||
#ifdef ENABLE_UTF8
|
||||
/* If possibly a multibyte character, get its length; otherwise, it's 1. */
|
||||
if ((unsigned char)*pointer > 0xC1 && use_utf8) {
|
||||
int length = mblen(pointer, MAXCHARLEN);
|
||||
unsigned char c1 = (unsigned char)pointer[0];
|
||||
unsigned char c2 = (unsigned char)pointer[1];
|
||||
|
||||
/* Codes beyond U+10FFFF are invalid, even when glibc thinks otherwise. */
|
||||
if ((unsigned char)*pointer > 0xF4 || ((unsigned char)*pointer == 0xF4 &&
|
||||
(unsigned char)*(pointer + 1) > 0x8F))
|
||||
if ((c2 ^ 0x80) > 0x3F)
|
||||
return 1;
|
||||
|
||||
return (length < 0 ? 1 : length);
|
||||
} else
|
||||
if (c1 < 0xE0)
|
||||
return 2;
|
||||
|
||||
if (((unsigned char)pointer[2] ^ 0x80) > 0x3F)
|
||||
return 1;
|
||||
|
||||
if (c1 < 0xF0) {
|
||||
if ((c1 > 0xE0 || c2 >= 0xA0) && (c1 != 0xED || c2 < 0xA0))
|
||||
return 3;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (((unsigned char)pointer[3] ^ 0x80) > 0x3F)
|
||||
return 1;
|
||||
|
||||
if (c1 > 0xF4)
|
||||
return 1;
|
||||
|
||||
if ((c1 > 0xF0 || c2 >= 0x90) && (c1 != 0xF4 || c2 < 0x90))
|
||||
return 4;
|
||||
}
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user