Subversion Repositories HelenOS

Rev

Rev 4014 | Rev 4179 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4014 Rev 4175
Line 40... Line 40...
40
#include <cpu.h>
40
#include <cpu.h>
41
#include <arch/asm.h>
41
#include <arch/asm.h>
42
#include <arch.h>
42
#include <arch.h>
43
#include <console/kconsole.h>
43
#include <console/kconsole.h>
44
 
44
 
-
 
45
/** Decode a single UTF-8 character from a NULL-terminated string.
-
 
46
 *
-
 
47
 * Decode a single UTF-8 character from a plain char NULL-terminated
-
 
48
 * string. Decoding starts at @index and this index is incremented
-
 
49
 * if the current UTF-8 string is encoded in more than a single byte.
-
 
50
 *
-
 
51
 * @param str   Plain character NULL-terminated string.
-
 
52
 * @param index Index (counted in plain characters) where to start
-
 
53
 *              the decoding.
-
 
54
 *
-
 
55
 * @return Decoded character in UTF-32 or '?' if the encoding is wrong.
-
 
56
 *
-
 
57
 */
-
 
58
wchar_t utf8_decode(const char *str, index_t *index)
-
 
59
{
-
 
60
    uint8_t c1;           /* First plain character from str */
-
 
61
    uint8_t c2;           /* Second plain character from str */
-
 
62
    uint8_t c3;           /* Third plain character from str */
-
 
63
    uint8_t c4;           /* Fourth plain character from str */
-
 
64
   
-
 
65
    c1 = (uint8_t) str[*index];
-
 
66
   
-
 
67
    if ((c1 & 0x80) == 0) {
-
 
68
        /* Plain ASCII (code points 0 .. 127) */
-
 
69
        return (wchar_t) c1;
-
 
70
    } else if ((c1 & 0xe0) == 0xc0) {
-
 
71
        /* Code points 128 .. 2047 */
-
 
72
        c2 = (uint8_t) str[*index + 1];
-
 
73
        if ((c2 & 0xc0) == 0x80) {
-
 
74
            (*index)++;
-
 
75
            return ((wchar_t) ((c1 & 0x1f) << 6) | (c2 & 0x3f));
-
 
76
        } else
-
 
77
            return ((wchar_t) '?');
-
 
78
    } else if ((c1 & 0xf0) == 0xe0) {
-
 
79
        /* Code points 2048 .. 65535 */
-
 
80
        c2 = (uint8_t) str[*index + 1];
-
 
81
        if ((c2 & 0xc0) == 0x80) {
-
 
82
            (*index)++;
-
 
83
            c3 = (uint8_t) str[*index + 1];
-
 
84
            if ((c3 & 0xc0) == 0x80) {
-
 
85
                (*index)++;
-
 
86
                return ((wchar_t) ((c1 & 0x0f) << 12) | ((c2 & 0x3f) << 6) | (c3 & 0x3f));
-
 
87
            } else
-
 
88
                return ((wchar_t) '?');
-
 
89
        } else
-
 
90
            return ((wchar_t) '?');
-
 
91
    } else if ((c1 & 0xf8) == 0xf0) {
-
 
92
        /* Code points 65536 .. 1114111 */
-
 
93
        c2 = (uint8_t) str[*index + 1];
-
 
94
        if ((c2 & 0xc0) == 0x80) {
-
 
95
            (*index)++;
-
 
96
            c3 = (uint8_t) str[*index + 1];
-
 
97
            if ((c3 & 0xc0) == 0x80) {
-
 
98
                (*index)++;
-
 
99
                c4 = (uint8_t) str[*index + 1];
-
 
100
                if ((c4 & 0xc0) == 0x80) {
-
 
101
                    (*index)++;
-
 
102
                    return ((wchar_t) ((c1 & 0x07) << 18) | ((c2 & 0x3f) << 12) | ((c3 & 0x3f) << 6) | (c4 & 0x3f));
-
 
103
                } else
-
 
104
                    return ((wchar_t) '?');
-
 
105
            } else
-
 
106
                return ((wchar_t) '?');
-
 
107
        } else
-
 
108
            return ((wchar_t) '?');
-
 
109
    }
-
 
110
   
-
 
111
    return ((wchar_t) '?');
-
 
112
}
-
 
113
 
45
/** Return number of characters in a string.
114
/** Return number of characters in a string.
46
 *
115
 *
47
 * @param str NULL terminated string.
116
 * @param str NULL terminated string.
48
 *
117
 *
49
 * @return Number of characters in str.
118
 * @return Number of characters in str.