42,6 → 42,75 |
#include <arch.h> |
#include <console/kconsole.h> |
|
/** Decode a single UTF-8 character from a NULL-terminated string. |
* |
* Decode a single UTF-8 character from a plain char NULL-terminated |
* string. Decoding starts at @index and this index is incremented |
* if the current UTF-8 string is encoded in more than a single byte. |
* |
* @param str Plain character NULL-terminated string. |
* @param index Index (counted in plain characters) where to start |
* the decoding. |
* |
* @return Decoded character in UTF-32 or '?' if the encoding is wrong. |
* |
*/ |
wchar_t utf8_decode(const char *str, index_t *index) |
{ |
uint8_t c1; /* First plain character from str */ |
uint8_t c2; /* Second plain character from str */ |
uint8_t c3; /* Third plain character from str */ |
uint8_t c4; /* Fourth plain character from str */ |
|
c1 = (uint8_t) str[*index]; |
|
if ((c1 & 0x80) == 0) { |
/* Plain ASCII (code points 0 .. 127) */ |
return (wchar_t) c1; |
} else if ((c1 & 0xe0) == 0xc0) { |
/* Code points 128 .. 2047 */ |
c2 = (uint8_t) str[*index + 1]; |
if ((c2 & 0xc0) == 0x80) { |
(*index)++; |
return ((wchar_t) ((c1 & 0x1f) << 6) | (c2 & 0x3f)); |
} else |
return ((wchar_t) '?'); |
} else if ((c1 & 0xf0) == 0xe0) { |
/* Code points 2048 .. 65535 */ |
c2 = (uint8_t) str[*index + 1]; |
if ((c2 & 0xc0) == 0x80) { |
(*index)++; |
c3 = (uint8_t) str[*index + 1]; |
if ((c3 & 0xc0) == 0x80) { |
(*index)++; |
return ((wchar_t) ((c1 & 0x0f) << 12) | ((c2 & 0x3f) << 6) | (c3 & 0x3f)); |
} else |
return ((wchar_t) '?'); |
} else |
return ((wchar_t) '?'); |
} else if ((c1 & 0xf8) == 0xf0) { |
/* Code points 65536 .. 1114111 */ |
c2 = (uint8_t) str[*index + 1]; |
if ((c2 & 0xc0) == 0x80) { |
(*index)++; |
c3 = (uint8_t) str[*index + 1]; |
if ((c3 & 0xc0) == 0x80) { |
(*index)++; |
c4 = (uint8_t) str[*index + 1]; |
if ((c4 & 0xc0) == 0x80) { |
(*index)++; |
return ((wchar_t) ((c1 & 0x07) << 18) | ((c2 & 0x3f) << 12) | ((c3 & 0x3f) << 6) | (c4 & 0x3f)); |
} else |
return ((wchar_t) '?'); |
} else |
return ((wchar_t) '?'); |
} else |
return ((wchar_t) '?'); |
} |
|
return ((wchar_t) '?'); |
} |
|
/** Return number of characters in a string. |
* |
* @param str NULL terminated string. |