Rev 4014 | Rev 4179 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 4014 | Rev 4175 | ||
---|---|---|---|
Line 40... | Line 40... | ||
40 | #include <cpu.h> |
40 | #include <cpu.h> |
41 | #include <arch/asm.h> |
41 | #include <arch/asm.h> |
42 | #include <arch.h> |
42 | #include <arch.h> |
43 | #include <console/kconsole.h> |
43 | #include <console/kconsole.h> |
44 | 44 | ||
- | 45 | /** Decode a single UTF-8 character from a NULL-terminated string. |
|
- | 46 | * |
|
- | 47 | * Decode a single UTF-8 character from a plain char NULL-terminated |
|
- | 48 | * string. Decoding starts at @index and this index is incremented |
|
- | 49 | * if the current UTF-8 string is encoded in more than a single byte. |
|
- | 50 | * |
|
- | 51 | * @param str Plain character NULL-terminated string. |
|
- | 52 | * @param index Index (counted in plain characters) where to start |
|
- | 53 | * the decoding. |
|
- | 54 | * |
|
- | 55 | * @return Decoded character in UTF-32 or '?' if the encoding is wrong. |
|
- | 56 | * |
|
- | 57 | */ |
|
- | 58 | wchar_t utf8_decode(const char *str, index_t *index) |
|
- | 59 | { |
|
- | 60 | uint8_t c1; /* First plain character from str */ |
|
- | 61 | uint8_t c2; /* Second plain character from str */ |
|
- | 62 | uint8_t c3; /* Third plain character from str */ |
|
- | 63 | uint8_t c4; /* Fourth plain character from str */ |
|
- | 64 | ||
- | 65 | c1 = (uint8_t) str[*index]; |
|
- | 66 | ||
- | 67 | if ((c1 & 0x80) == 0) { |
|
- | 68 | /* Plain ASCII (code points 0 .. 127) */ |
|
- | 69 | return (wchar_t) c1; |
|
- | 70 | } else if ((c1 & 0xe0) == 0xc0) { |
|
- | 71 | /* Code points 128 .. 2047 */ |
|
- | 72 | c2 = (uint8_t) str[*index + 1]; |
|
- | 73 | if ((c2 & 0xc0) == 0x80) { |
|
- | 74 | (*index)++; |
|
- | 75 | return ((wchar_t) ((c1 & 0x1f) << 6) | (c2 & 0x3f)); |
|
- | 76 | } else |
|
- | 77 | return ((wchar_t) '?'); |
|
- | 78 | } else if ((c1 & 0xf0) == 0xe0) { |
|
- | 79 | /* Code points 2048 .. 65535 */ |
|
- | 80 | c2 = (uint8_t) str[*index + 1]; |
|
- | 81 | if ((c2 & 0xc0) == 0x80) { |
|
- | 82 | (*index)++; |
|
- | 83 | c3 = (uint8_t) str[*index + 1]; |
|
- | 84 | if ((c3 & 0xc0) == 0x80) { |
|
- | 85 | (*index)++; |
|
- | 86 | return ((wchar_t) ((c1 & 0x0f) << 12) | ((c2 & 0x3f) << 6) | (c3 & 0x3f)); |
|
- | 87 | } else |
|
- | 88 | return ((wchar_t) '?'); |
|
- | 89 | } else |
|
- | 90 | return ((wchar_t) '?'); |
|
- | 91 | } else if ((c1 & 0xf8) == 0xf0) { |
|
- | 92 | /* Code points 65536 .. 1114111 */ |
|
- | 93 | c2 = (uint8_t) str[*index + 1]; |
|
- | 94 | if ((c2 & 0xc0) == 0x80) { |
|
- | 95 | (*index)++; |
|
- | 96 | c3 = (uint8_t) str[*index + 1]; |
|
- | 97 | if ((c3 & 0xc0) == 0x80) { |
|
- | 98 | (*index)++; |
|
- | 99 | c4 = (uint8_t) str[*index + 1]; |
|
- | 100 | if ((c4 & 0xc0) == 0x80) { |
|
- | 101 | (*index)++; |
|
- | 102 | return ((wchar_t) ((c1 & 0x07) << 18) | ((c2 & 0x3f) << 12) | ((c3 & 0x3f) << 6) | (c4 & 0x3f)); |
|
- | 103 | } else |
|
- | 104 | return ((wchar_t) '?'); |
|
- | 105 | } else |
|
- | 106 | return ((wchar_t) '?'); |
|
- | 107 | } else |
|
- | 108 | return ((wchar_t) '?'); |
|
- | 109 | } |
|
- | 110 | ||
- | 111 | return ((wchar_t) '?'); |
|
- | 112 | } |
|
- | 113 | ||
45 | /** Return number of characters in a string. |
114 | /** Return number of characters in a string. |
46 | * |
115 | * |
47 | * @param str NULL terminated string. |
116 | * @param str NULL terminated string. |
48 | * |
117 | * |
49 | * @return Number of characters in str. |
118 | * @return Number of characters in str. |