WebSVN – HelenOS – Diff – //trunk/kernel/generic/src/lib/string.c

 #define CONT_BITS 6
 /** Decode a single UTF-8 character from a NULL-terminated string.
+ *
  * Decode a single UTF-8 character from a plain char NULL-terminated
- * string. Decoding starts at @index and this index is incremented
+ * string. Decoding starts at @index and this index is moved to the
+ * beginning of the next character. In case of decoding error,
- * if the current UTF-8 string is encoded in more than a single byte.
+ * index advances. However, index is never moved beyond (str+limit).
+ *
  * @param str   Plain character NULL-terminated string.
  * @param index Index (counted in plain characters) where to start
  *              the decoding.
  * @param limit Maximal allowed value of index.
     wchar_t ch;
     int b0_bits;        /* Data bits in first byte. */
     int cbytes;     /* Number of continuation bytes. */
-    if (*index > limit)
+    if (*index + 1 > limit)
         return invalch;
-    b0 = (uint8_t) str[*index];
+    b0 = (uint8_t) str[(*index)++];
     /* Determine code length. */
     if ((b0 & 0x80) == 0) {
         /* 0xxxxxxx (Plain ASCII) */
     ch = b0 & LO_MASK_8(b0_bits);
     /* Decode continuation bytes. */
     while (cbytes > 0) {
-        b = (uint8_t) str[*index + 1];
+        b = (uint8_t) str[(*index)++];
-        ++(*index);
         /* Must be 10xxxxxx. */
         if ((b & 0xc0) != 0x80) {
             return invalch;
+        }
 /** Encode a single UTF-32 character as UTF-8
+ *
  * Encode a single UTF-32 character as UTF-8 and store it into
  * the given buffer at @index. Encoding starts at @index and
- * this index is incremented if the UTF-8 character takes
+ * this index is moved at the position where the next character
- * more than a single byte.
+ * can be written to.
+ *
  * @param ch    Input UTF-32 character.
  * @param str   Output buffer.
  * @param index Index (counted in plain characters) where to start
  *              the encoding
     int cbytes;     /* Number of continuation bytes. */
     int b0_bits;        /* Number of data bits in first byte. */
     int i;
-    if (*index > limit)
+    if (*index >= limit)
         return false;
     if (ch < 0)
         return false;
         /* Codes longer than 21 bits are not supported. */
         return false;
+    }
     /* Check for available space in buffer. */
-    if (*index + cbytes > limit)
+    if (*index + cbytes >= limit)
         return false;
     /* Encode continuation bytes. */
     for (i = cbytes; i > 0; --i) {
         str[*index + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
     /* Encode first byte. */
     str[*index] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
     /* Advance index. */
-    *index += cbytes;
+    *index += (1 + cbytes);
     return true;
+}
 /** Get bytes used by UTF-8 characters.
  */
 size_t utf8_count_bytes(const char *str, count_t count)
+{
     size_t size = 0;
     index_t index = 0;
+    index_t iprev;
+    wchar_t ch;
+    while (true) {
+        iprev = index;
+        if (size >= count)
+            break;
-    while ((utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) && (size < count)) {
+        ch = utf8_decode(str, &index, UTF8_NO_LIMIT);
+        if (ch == '\0') break;
         size++;
-        index++;
+    }
-    return index;
+    return iprev;
+}
 /** Check whether character is plain ASCII.
+ *
  * @return True if character is plain ASCII.
     size_t size = 0;
     index_t index = 0;
     while (utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) {
         size++;
-        index++;
+    }
     return size;
+}

Subversion Repositories HelenOS

(root)//trunk/kernel/generic/src/lib/string.c – Rev 4198 → 4199