Subversion Repositories HelenOS

Rev

Rev 4208 | Rev 4212 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4208 Rev 4209
Line 30... Line 30...
30
 * @{
30
 * @{
31
 */
31
 */
32
 
32
 
33
/**
33
/**
34
 * @file
34
 * @file
35
 * @brief Miscellaneous functions.
35
 * @brief String functions.
-
 
36
 *
-
 
37
 * Strings and characters use the Universal Character Set (UCS). The standard
-
 
38
 * strings, called just strings are encoded in UTF-8. Wide strings (encoded
-
 
39
 * in UTF-32) are supported to a limited degree. A single character is
-
 
40
 * represented as wchar_t.
-
 
41
 *
-
 
42
 * Strings have the following metrics:
-
 
43
 *
-
 
44
 *  Metric  Abbrev. Meaning
-
 
45
 *  ------  ------  -------
-
 
46
 *  size    n   Number of bytes the string is encoded into, excluding
-
 
47
 *          the null terminator.
-
 
48
 *  length  l   The number of characters in the string, excluding
-
 
49
 *          the null terminator.
-
 
50
 *  width   w   The number of character cells the string takes up on a
-
 
51
 *          monospace display.
-
 
52
 *
-
 
53
 * Naming scheme:
-
 
54
 *
-
 
55
 *  chr_xxx     operate on characters
-
 
56
 *  str_xxx     operate on strings
-
 
57
 *  wstr_xxx    operate on wide strings
-
 
58
 *
-
 
59
 *  [w]str_[n|l|w]xxx   operate on a prefix limited by size, length
-
 
60
 *              or width.
36
 */
61
 */
37
 
62
 
38
#include <string.h>
63
#include <string.h>
39
#include <print.h>
64
#include <print.h>
40
#include <cpu.h>
65
#include <cpu.h>
Line 143... Line 168...
143
 *
168
 *
144
 * @return EOK if the character was encoded successfully, EOVERFLOW if there
169
 * @return EOK if the character was encoded successfully, EOVERFLOW if there
145
 *     was not enough space in the output buffer or EINVAL if the character
170
 *     was not enough space in the output buffer or EINVAL if the character
146
 *     code was invalid.
171
 *     code was invalid.
147
 */
172
 */
148
int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t sz)
173
int chr_encode(wchar_t ch, char *str, size_t *offset, size_t sz)
149
{
174
{
150
    uint32_t cc;        /* Unsigned version of ch. */
175
    uint32_t cc;        /* Unsigned version of ch. */
151
 
176
 
152
    int cbytes;     /* Number of continuation bytes. */
177
    int cbytes;     /* Number of continuation bytes. */
153
    int b0_bits;        /* Number of data bits in first byte. */
178
    int b0_bits;        /* Number of data bits in first byte. */
Line 197... Line 222...
197
    *offset += (1 + cbytes);
222
    *offset += (1 + cbytes);
198
   
223
   
199
    return EOK;
224
    return EOK;
200
}
225
}
201
 
226
 
-
 
227
/** Get display width of character.
-
 
228
 *
-
 
229
 * @param ch    The character.
-
 
230
 * @return  Character width in display cells.
-
 
231
 */
-
 
232
count_t chr_width(wchar_t ch)
-
 
233
{
-
 
234
    return 1;
-
 
235
}
-
 
236
 
202
/** Get size of string, with length limit.
237
/** Get size of string, with length limit.
203
 *
238
 *
204
 * Get the number of bytes which are used by up to @a max_len first
239
 * Get the number of bytes which are used by up to @a max_len first
205
 * characters in the string @a str. If @a max_len is greater than
240
 * characters in the string @a str. If @a max_len is greater than
206
 * the length of @a str, the entire string is measured.
241
 * the length of @a str, the entire string is measured.
207
 *
242
 *
208
 * @param str   String to consider.
243
 * @param str   String to consider.
209
 * @param count Maximum number of characters to measure.
244
 * @param count Maximum number of characters to measure.
210
 *
245
 *
211
 * @return Number of bytes used by the characters.
246
 * @return  Number of bytes used by the characters.
212
 */
247
 */
213
size_t str_lsize(const char *str, count_t max_len)
248
size_t str_lsize(const char *str, count_t max_len)
214
{
249
{
215
    count_t len = 0;
250
    count_t len = 0;
216
    size_t cur = 0;
251
    size_t cur = 0;
Line 228... Line 263...
228
    }
263
    }
229
 
264
 
230
    return prev;
265
    return prev;
231
}
266
}
232
 
267
 
-
 
268
/** Get size of string, with width limit.
-
 
269
 *
-
 
270
 * Get the number of bytes which are used by the longest prefix of @a str
-
 
271
 * that can fit into @a max_width display cells.
-
 
272
 *
-
 
273
 * @param str   String to consider.
-
 
274
 * @param count Maximum number of display cells.
-
 
275
 *
-
 
276
 * @return  Number of bytes used by the characters that fit.
-
 
277
 */
-
 
278
size_t str_wsize(const char *str, count_t max_width)
-
 
279
{
-
 
280
    count_t width = 0;
-
 
281
    size_t cur = 0;
-
 
282
    size_t prev;
-
 
283
    wchar_t ch;
-
 
284
 
-
 
285
    while (true) {
-
 
286
        prev = cur;
-
 
287
        if (width >= max_width)
-
 
288
            break;
-
 
289
        ch = chr_decode(str, &cur, UTF8_NO_LIMIT);
-
 
290
        if (ch == '\0') break;
-
 
291
 
-
 
292
        width += chr_width(ch);
-
 
293
    }
-
 
294
 
-
 
295
    return prev;
-
 
296
}
-
 
297
 
-
 
298
 
-
 
299
/** Get length of wide string, with width limit.
-
 
300
 *
-
 
301
 * Get the number of characters in a wide string that can fit into @a max_width
-
 
302
 * display cells.
-
 
303
 *
-
 
304
 * @param wstr   Wide string to consider.
-
 
305
 * @param count Maximum number of display cells.
-
 
306
 *
-
 
307
 * @return  Number of bytes used by the characters that fit.
-
 
308
 */
-
 
309
count_t wstr_wlength(const wchar_t *wstr, count_t max_width)
-
 
310
{
-
 
311
    count_t width = 0;
-
 
312
    index_t cur = 0;
-
 
313
 
-
 
314
    while (true) {
-
 
315
        if (width >= max_width)
-
 
316
            break;
-
 
317
        if (wstr[cur] == '\0') break;
-
 
318
 
-
 
319
        width += chr_width(wstr[cur]);
-
 
320
        ++cur;
-
 
321
    }
-
 
322
 
-
 
323
    return (count_t) cur;
-
 
324
}
-
 
325
 
233
/** Check whether character is plain ASCII.
326
/** Check whether character is plain ASCII.
234
 *
327
 *
235
 * @return True if character is plain ASCII.
328
 * @return True if character is plain ASCII.
236
 *
329
 *
237
 */
330
 */
Line 288... Line 381...
288
    return len;
381
    return len;
289
}
382
}
290
 
383
 
291
/** Return number of characters in a wide string.
384
/** Return number of characters in a wide string.
292
 *
385
 *
293
 * @param str NULL-terminated wide string.
386
 * @param   str NULL-terminated wide string.
294
 * @return Number of characters in @a str.
387
 * @return  Number of characters in @a str.
295
 */
388
 */
296
count_t wstr_length(const wchar_t *wstr)
389
count_t wstr_length(const wchar_t *wstr)
297
{
390
{
298
    count_t len;
391
    count_t len;
299
 
392