Subversion Repositories HelenOS

Compare Revisions

Ignore whitespace Rev 4199 → Rev 4200

/trunk/kernel/generic/include/string.h
37,12 → 37,12
 
#include <typedefs.h>
 
#define UTF8_NO_LIMIT ((index_t) -1)
#define UTF8_NO_LIMIT ((size_t) -1)
 
extern char invalch;
 
extern wchar_t utf8_decode(const char *str, index_t *index, index_t limit);
extern bool utf8_encode(const wchar_t ch, char *str, index_t *index, index_t limit);
extern wchar_t chr_decode(const char *, size_t *, size_t);
extern bool chr_encode(const wchar_t, char *, size_t *, size_t limit);
extern size_t utf8_count_bytes(const char *str, count_t count);
extern bool ascii_check(const wchar_t ch);
extern bool unicode_check(const wchar_t ch);
/trunk/kernel/generic/src/printf/vprintf.c
49,7 → 49,7
index_t chars = 0;
while (index < size) {
putchar(utf8_decode(str, &index, size));
putchar(chr_decode(str, &index, size));
chars++;
}
74,7 → 74,7
index_t chars = 0;
wchar_t uc;
while ((uc = utf8_decode(str, &index, UTF8_NO_LIMIT)) != 0) {
while ((uc = chr_decode(str, &index, UTF8_NO_LIMIT)) != 0) {
putchar(uc);
chars++;
}
/trunk/kernel/generic/src/printf/vsnprintf.c
84,9 → 84,9
index_t index = 0;
while (index < size) {
wchar_t uc = utf8_decode(str, &index, size);
wchar_t uc = chr_decode(str, &index, size);
 
if (!utf8_encode(uc, data->dst, &data->len, data->size - 1))
if (!chr_encode(uc, data->dst, &data->len, data->size - 1))
break;
}
146,7 → 146,7
return ((int) size);
}
if (!utf8_encode(str[index], data->dst, &data->len, data->size - 1))
if (!chr_encode(str[index], data->dst, &data->len, data->size - 1))
break;
index++;
/trunk/kernel/generic/src/printf/printf_core.c
594,7 → 594,7
while (true) {
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
 
if (uc == '\0') break;
 
618,7 → 618,7
do {
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
switch (uc) {
case '#':
flags |= __PRINTF_FLAG_PREFIX;
648,7 → 648,7
width += uc - '0';
 
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
if (uc == '\0')
break;
if (!isdigit(uc))
657,7 → 657,7
} else if (uc == '*') {
/* Get width value from argument list */
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
width = (int) va_arg(ap, int);
if (width < 0) {
/* Negative width sets '-' flag */
670,7 → 670,7
int precision = 0;
if (uc == '.') {
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
if (isdigit(uc)) {
while (true) {
precision *= 10;
677,7 → 677,7
precision += uc - '0';
 
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
if (uc == '\0')
break;
if (!isdigit(uc))
686,7 → 686,7
} else if (uc == '*') {
/* Get precision value from the argument list */
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
precision = (int) va_arg(ap, int);
if (precision < 0) {
/* Ignore negative precision */
705,10 → 705,10
/* Char or short */
qualifier = PrintfQualifierShort;
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
if (uc == 'h') {
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
qualifier = PrintfQualifierByte;
}
break;
716,10 → 716,10
/* Long or long long */
qualifier = PrintfQualifierLong;
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
if (uc == 'l') {
i = nxt;
uc = utf8_decode(fmt, &nxt, UTF8_NO_LIMIT);
uc = chr_decode(fmt, &nxt, UTF8_NO_LIMIT);
qualifier = PrintfQualifierLongLong;
}
break;
/trunk/kernel/generic/src/lib/string.c
56,22 → 56,22
/** Number of data bits in a UTF-8 continuation byte. */
#define CONT_BITS 6
 
/** Decode a single UTF-8 character from a NULL-terminated string.
/** Decode a single character from a substring.
*
* Decode a single UTF-8 character from a plain char NULL-terminated
* string. Decoding starts at @index and this index is moved to the
* beginning of the next character. In case of decoding error,
* index advances. However, index is never moved beyond (str+limit).
* Decode a single character from a substring of size @a sz. Decoding starts
* at @a offset and this offset is moved to the beginning of the next
* character. In case of decoding error, offset generally advances at least
* by one. However, offset is never moved beyond (str + sz).
*
* @param str Plain character NULL-terminated string.
* @param str String (not necessarily NULL-terminated).
* @param index Index (counted in plain characters) where to start
* the decoding.
* @param limit Maximal allowed value of index.
* @param limit Size of the substring.
*
* @return Decoded character in UTF-32 or '?' if the encoding is wrong.
* @return Value of decoded character or '?' on decoding error.
*
*/
wchar_t utf8_decode(const char *str, index_t *index, index_t limit)
wchar_t chr_decode(const char *str, size_t *offset, size_t sz)
{
uint8_t b0, b; /* Bytes read from str. */
wchar_t ch;
79,10 → 79,10
int b0_bits; /* Data bits in first byte. */
int cbytes; /* Number of continuation bytes. */
 
if (*index + 1 > limit)
if (*offset + 1 > sz)
return invalch;
 
b0 = (uint8_t) str[(*index)++];
b0 = (uint8_t) str[(*offset)++];
 
/* Determine code length. */
 
107,7 → 107,7
return invalch;
}
 
if (*index + cbytes > limit) {
if (*offset + cbytes > sz) {
return invalch;
}
 
115,7 → 115,7
 
/* Decode continuation bytes. */
while (cbytes > 0) {
b = (uint8_t) str[(*index)++];
b = (uint8_t) str[(*offset)++];
 
/* Must be 10xxxxxx. */
if ((b & 0xc0) != 0x80) {
130,25 → 130,22
return ch;
}
 
/** Encode a single UTF-32 character as UTF-8
/** Encode a single character to string representation.
*
* Encode a single UTF-32 character as UTF-8 and store it into
* the given buffer at @index. Encoding starts at @index and
* this index is moved at the position where the next character
* can be written to.
* Encode a single character to string representation (i.e. UTF-8) and store
* it into a buffer at @a offset. Encoding starts at @a offset and this offset
* is moved to the position where the next character can be written to.
*
* @param ch Input UTF-32 character.
* @param str Output buffer.
* @param index Index (counted in plain characters) where to start
* the encoding
* @param limit Maximal allowed value of index.
* @param ch Input character.
* @param str Output buffer.
* @param offset Offset (in bytes) where to start writing.
* @param sz Size of the output buffer.
*
* @return True if the character was encoded or false if there is not
* enought space in the output buffer or the character is invalid
* Unicode code point.
*
* @return True if the character was encoded successfully or false if there
* was not enough space in the output buffer or the character code
* was invalid.
*/
bool utf8_encode(const wchar_t ch, char *str, index_t *index, index_t limit)
bool chr_encode(const wchar_t ch, char *str, size_t *offset, size_t sz)
{
uint32_t cc; /* Unsigned version of ch. */
 
156,7 → 153,7
int b0_bits; /* Number of data bits in first byte. */
int i;
 
if (*index >= limit)
if (*offset >= sz)
return false;
 
if (ch < 0)
184,20 → 181,20
}
 
/* Check for available space in buffer. */
if (*index + cbytes >= limit)
if (*offset + cbytes >= sz)
return false;
 
/* Encode continuation bytes. */
for (i = cbytes; i > 0; --i) {
str[*index + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
cc = cc >> CONT_BITS;
}
 
/* Encode first byte. */
str[*index] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
 
/* Advance index. */
*index += (1 + cbytes);
/* Advance offset. */
*offset += (1 + cbytes);
return true;
}
226,7 → 223,7
iprev = index;
if (size >= count)
break;
ch = utf8_decode(str, &index, UTF8_NO_LIMIT);
ch = chr_decode(str, &index, UTF8_NO_LIMIT);
if (ch == '\0') break;
 
size++;
288,7 → 285,7
size_t size = 0;
index_t index = 0;
while (utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) {
while (chr_decode(str, &index, UTF8_NO_LIMIT) != 0) {
size++;
}