Subversion Repositories HelenOS

Rev

Rev 4198 | Rev 4200 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4198 Rev 4199
Line 57... Line 57...
57
#define CONT_BITS 6
57
#define CONT_BITS 6
58
 
58
 
59
/** Decode a single UTF-8 character from a NULL-terminated string.
59
/** Decode a single UTF-8 character from a NULL-terminated string.
60
 *
60
 *
61
 * Decode a single UTF-8 character from a plain char NULL-terminated
61
 * Decode a single UTF-8 character from a plain char NULL-terminated
62
 * string. Decoding starts at @index and this index is incremented
62
 * string. Decoding starts at @index and this index is moved to the
-
 
63
 * beginning of the next character. In case of decoding error,
63
 * if the current UTF-8 string is encoded in more than a single byte.
64
 * index advances. However, index is never moved beyond (str+limit).
64
 *
65
 *
65
 * @param str   Plain character NULL-terminated string.
66
 * @param str   Plain character NULL-terminated string.
66
 * @param index Index (counted in plain characters) where to start
67
 * @param index Index (counted in plain characters) where to start
67
 *              the decoding.
68
 *              the decoding.
68
 * @param limit Maximal allowed value of index.
69
 * @param limit Maximal allowed value of index.
Line 76... Line 77...
76
    wchar_t ch;
77
    wchar_t ch;
77
 
78
 
78
    int b0_bits;        /* Data bits in first byte. */
79
    int b0_bits;        /* Data bits in first byte. */
79
    int cbytes;     /* Number of continuation bytes. */
80
    int cbytes;     /* Number of continuation bytes. */
80
 
81
 
81
    if (*index > limit)
82
    if (*index + 1 > limit)
82
        return invalch;
83
        return invalch;
83
 
84
 
84
    b0 = (uint8_t) str[*index];
85
    b0 = (uint8_t) str[(*index)++];
85
 
86
 
86
    /* Determine code length. */
87
    /* Determine code length. */
87
 
88
 
88
    if ((b0 & 0x80) == 0) {
89
    if ((b0 & 0x80) == 0) {
89
        /* 0xxxxxxx (Plain ASCII) */
90
        /* 0xxxxxxx (Plain ASCII) */
Line 112... Line 113...
112
 
113
 
113
    ch = b0 & LO_MASK_8(b0_bits);
114
    ch = b0 & LO_MASK_8(b0_bits);
114
 
115
 
115
    /* Decode continuation bytes. */
116
    /* Decode continuation bytes. */
116
    while (cbytes > 0) {
117
    while (cbytes > 0) {
117
        b = (uint8_t) str[*index + 1];
118
        b = (uint8_t) str[(*index)++];
118
        ++(*index);
-
 
119
 
119
 
120
        /* Must be 10xxxxxx. */
120
        /* Must be 10xxxxxx. */
121
        if ((b & 0xc0) != 0x80) {
121
        if ((b & 0xc0) != 0x80) {
122
            return invalch;
122
            return invalch;
123
        }
123
        }
Line 132... Line 132...
132
 
132
 
133
/** Encode a single UTF-32 character as UTF-8
133
/** Encode a single UTF-32 character as UTF-8
134
 *
134
 *
135
 * Encode a single UTF-32 character as UTF-8 and store it into
135
 * Encode a single UTF-32 character as UTF-8 and store it into
136
 * the given buffer at @index. Encoding starts at @index and
136
 * the given buffer at @index. Encoding starts at @index and
137
 * this index is incremented if the UTF-8 character takes
137
 * this index is moved at the position where the next character
138
 * more than a single byte.
138
 * can be written to.
139
 *
139
 *
140
 * @param ch    Input UTF-32 character.
140
 * @param ch    Input UTF-32 character.
141
 * @param str   Output buffer.
141
 * @param str   Output buffer.
142
 * @param index Index (counted in plain characters) where to start
142
 * @param index Index (counted in plain characters) where to start
143
 *              the encoding
143
 *              the encoding
Line 154... Line 154...
154
 
154
 
155
    int cbytes;     /* Number of continuation bytes. */
155
    int cbytes;     /* Number of continuation bytes. */
156
    int b0_bits;        /* Number of data bits in first byte. */
156
    int b0_bits;        /* Number of data bits in first byte. */
157
    int i;
157
    int i;
158
 
158
 
159
    if (*index > limit)
159
    if (*index >= limit)
160
        return false;
160
        return false;
161
 
161
 
162
    if (ch < 0)
162
    if (ch < 0)
163
        return false;
163
        return false;
164
 
164
 
Line 182... Line 182...
182
        /* Codes longer than 21 bits are not supported. */
182
        /* Codes longer than 21 bits are not supported. */
183
        return false;
183
        return false;
184
    }
184
    }
185
 
185
 
186
    /* Check for available space in buffer. */
186
    /* Check for available space in buffer. */
187
    if (*index + cbytes > limit)
187
    if (*index + cbytes >= limit)
188
        return false;
188
        return false;
189
 
189
 
190
    /* Encode continuation bytes. */
190
    /* Encode continuation bytes. */
191
    for (i = cbytes; i > 0; --i) {
191
    for (i = cbytes; i > 0; --i) {
192
        str[*index + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
192
        str[*index + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
Line 195... Line 195...
195
 
195
 
196
    /* Encode first byte. */
196
    /* Encode first byte. */
197
    str[*index] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
197
    str[*index] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
198
 
198
 
199
    /* Advance index. */
199
    /* Advance index. */
200
    *index += cbytes;
200
    *index += (1 + cbytes);
201
   
201
   
202
    return true;
202
    return true;
203
}
203
}
204
 
204
 
205
/** Get bytes used by UTF-8 characters.
205
/** Get bytes used by UTF-8 characters.
Line 217... Line 217...
217
 */
217
 */
218
size_t utf8_count_bytes(const char *str, count_t count)
218
size_t utf8_count_bytes(const char *str, count_t count)
219
{
219
{
220
    size_t size = 0;
220
    size_t size = 0;
221
    index_t index = 0;
221
    index_t index = 0;
-
 
222
    index_t iprev;
-
 
223
    wchar_t ch;
222
   
224
   
-
 
225
    while (true) {
-
 
226
        iprev = index;
-
 
227
        if (size >= count)
-
 
228
            break;
223
    while ((utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) && (size < count)) {
229
        ch = utf8_decode(str, &index, UTF8_NO_LIMIT);
-
 
230
        if (ch == '\0') break;
-
 
231
 
224
        size++;
232
        size++;
225
        index++;
-
 
226
    }
233
    }
227
   
234
   
228
    return index;
235
    return iprev;
229
}
236
}
230
 
237
 
231
/** Check whether character is plain ASCII.
238
/** Check whether character is plain ASCII.
232
 *
239
 *
233
 * @return True if character is plain ASCII.
240
 * @return True if character is plain ASCII.
Line 281... Line 288...
281
    size_t size = 0;
288
    size_t size = 0;
282
    index_t index = 0;
289
    index_t index = 0;
283
   
290
   
284
    while (utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) {
291
    while (utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) {
285
        size++;
292
        size++;
286
        index++;
-
 
287
    }
293
    }
288
   
294
   
289
    return size;
295
    return size;
290
}
296
}
291
 
297