WebSVN – HelenOS – Diff – /trunk/uspace/lib/libc/generic/string.c/


/*
 * Copyright (c) 2005 Martin Decky
 * Copyright (c) 2008 Jiri Svoboda
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 * - The name of the author may not be used to endorse or promote products
 *   derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
/** @addtogroup libc
 * @{
 */
/** @file
 */
 
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <limits.h>
#include <ctype.h>
#include <malloc.h>
#include <errno.h>
#include <align.h>
#include <mem.h>
#include <string.h>
 
/** Byte mask consisting of lowest @n bits (out of 8) */
#define LO_MASK_8(n)  ((uint8_t) ((1 << (n)) - 1))
 
/** Byte mask consisting of lowest @n bits (out of 32) */
#define LO_MASK_32(n)  ((uint32_t) ((1 << (n)) - 1))
 
/** Byte mask consisting of highest @n bits (out of 8) */
#define HI_MASK_8(n)  (~LO_MASK_8(8 - (n)))
 
/** Number of data bits in a UTF-8 continuation byte */
#define CONT_BITS  6
 
/** Decode a single character from a string.
 *
 * Decode a single character from a string of size @a size. Decoding starts
 * at @a offset and this offset is moved to the beginning of the next
 * character. In case of decoding error, offset generally advances at least
 * by one. However, offset is never moved beyond size.
 *
 * @param str    String (not necessarily NULL-terminated).
 * @param offset Byte offset in string where to start decoding.
 * @param size   Size of the string (in bytes).
 *
 * @return Value of decoded character, U_SPECIAL on decoding error or
 *         NULL if attempt to decode beyond @a size.
 *
 */
wchar_t str_decode(const char *str, size_t *offset, size_t size)
{
    if (*offset + 1 > size)
        return 0;
   
    /* First byte read from string */
    uint8_t b0 = (uint8_t) str[(*offset)++];
   
    /* Determine code length */
   
    unsigned int b0_bits;  /* Data bits in first byte */
    unsigned int cbytes;   /* Number of continuation bytes */
   
    if ((b0 & 0x80) == 0) {
        /* 0xxxxxxx (Plain ASCII) */
        b0_bits = 7;
        cbytes = 0;
    } else if ((b0 & 0xe0) == 0xc0) {
        /* 110xxxxx 10xxxxxx */
        b0_bits = 5;
        cbytes = 1;
    } else if ((b0 & 0xf0) == 0xe0) {
        /* 1110xxxx 10xxxxxx 10xxxxxx */
        b0_bits = 4;
        cbytes = 2;
    } else if ((b0 & 0xf8) == 0xf0) {
        /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
        b0_bits = 3;
        cbytes = 3;
    } else {
        /* 10xxxxxx -- unexpected continuation byte */
        return U_SPECIAL;
    }
   
    if (*offset + cbytes > size)
        return U_SPECIAL;
   
    wchar_t ch = b0 & LO_MASK_8(b0_bits);
   
    /* Decode continuation bytes */
    while (cbytes > 0) {
        uint8_t b = (uint8_t) str[(*offset)++];
       
        /* Must be 10xxxxxx */
        if ((b & 0xc0) != 0x80)
            return U_SPECIAL;
       
        /* Shift data bits to ch */
        ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
        cbytes--;
    }
   
    return ch;
}
 
/** Encode a single character to string representation.
 *
 * Encode a single character to string representation (i.e. UTF-8) and store
 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
 * is moved to the position where the next character can be written to.
 *
 * @param ch     Input character.
 * @param str    Output buffer.
 * @param offset Byte offset where to start writing.
 * @param size   Size of the output buffer (in bytes).
 *
 * @return EOK if the character was encoded successfully, EOVERFLOW if there
 *         was not enough space in the output buffer or EINVAL if the character
 *         code was invalid.
 */
int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
{
    if (*offset >= size)
        return EOVERFLOW;
   
    if (!chr_check(ch))
        return EINVAL;
   
    /* Unsigned version of ch (bit operations should only be done
       on unsigned types). */
    uint32_t cc = (uint32_t) ch;
   
    /* Determine how many continuation bytes are needed */
   
    unsigned int b0_bits;  /* Data bits in first byte */
    unsigned int cbytes;   /* Number of continuation bytes */
   
    if ((cc & ~LO_MASK_32(7)) == 0) {
        b0_bits = 7;
        cbytes = 0;
    } else if ((cc & ~LO_MASK_32(11)) == 0) {
        b0_bits = 5;
        cbytes = 1;
    } else if ((cc & ~LO_MASK_32(16)) == 0) {
        b0_bits = 4;
        cbytes = 2;
    } else if ((cc & ~LO_MASK_32(21)) == 0) {
        b0_bits = 3;
        cbytes = 3;
    } else {
        /* Codes longer than 21 bits are not supported */
        return EINVAL;
    }
   
    /* Check for available space in buffer */
    if (*offset + cbytes >= size)
        return EOVERFLOW;
   
    /* Encode continuation bytes */
    unsigned int i;
    for (i = cbytes; i > 0; i--) {
        str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
        cc = cc >> CONT_BITS;
    }
   
    /* Encode first byte */
    str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
   
    /* Advance offset */
    *offset += cbytes + 1;
   
    return EOK;
}
 
/** Get size of string.
 *
 * Get the number of bytes which are used by the string @a str (excluding the
 * NULL-terminator).
 *
 * @param str String to consider.
 *
 * @return Number of bytes used by the string
 *
 */
size_t str_size(const char *str)
{
    size_t size = 0;
   
    while (*str++ != 0)
        size++;
   
    return size;
}
 
/** Get size of wide string.
 *
 * Get the number of bytes which are used by the wide string @a str (excluding the
 * NULL-terminator).
 *
 * @param str Wide string to consider.
 *
 * @return Number of bytes used by the wide string
 *
 */
size_t wstr_size(const wchar_t *str)
{
    return (wstr_length(str) * sizeof(wchar_t));
}
 
/** Get size of string with length limit.
 *
 * Get the number of bytes which are used by up to @a max_len first
 * characters in the string @a str. If @a max_len is greater than
 * the length of @a str, the entire string is measured (excluding the
 * NULL-terminator).
 *
 * @param str     String to consider.
 * @param max_len Maximum number of characters to measure.
 *
 * @return Number of bytes used by the characters.
 *
 */
size_t str_lsize(const char *str, size_t max_len)
{
    size_t len = 0;
    size_t offset = 0;
   
    while (len < max_len) {
        if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
            break;
       
        len++;
    }
   
    return offset;
}
 
/** Get size of wide string with length limit.
 *
 * Get the number of bytes which are used by up to @a max_len first
 * wide characters in the wide string @a str. If @a max_len is greater than
 * the length of @a str, the entire wide string is measured (excluding the
 * NULL-terminator).
 *
 * @param str     Wide string to consider.
 * @param max_len Maximum number of wide characters to measure.
 *
 * @return Number of bytes used by the wide characters.
 *
 */
size_t wstr_lsize(const wchar_t *str, size_t max_len)
{
    return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
}
 
/** Get number of characters in a string.
 *
 * @param str NULL-terminated string.
 *
 * @return Number of characters in string.
 *
 */
size_t str_length(const char *str)
{
    size_t len = 0;
    size_t offset = 0;
   
    while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
        len++;
   
    return len;
}
 
/** Get number of characters in a wide string.
 *
 * @param str NULL-terminated wide string.
 *
 * @return Number of characters in @a str.
 *
 */
size_t wstr_length(const wchar_t *wstr)
{
    size_t len = 0;
   
    while (*wstr++ != 0)
        len++;
   
    return len;
}
 
/** Get number of characters in a string with size limit.
 *
 * @param str  NULL-terminated string.
 * @param size Maximum number of bytes to consider.
 *
 * @return Number of characters in string.
 *
 */
size_t str_nlength(const char *str, size_t size)
{
    size_t len = 0;
    size_t offset = 0;
   
    while (str_decode(str, &offset, size) != 0)
        len++;
   
    return len;
}
 
/** Get number of characters in a string with size limit.
 *
 * @param str  NULL-terminated string.
 * @param size Maximum number of bytes to consider.
 *
 * @return Number of characters in string.
 *
 */
size_t wstr_nlength(const wchar_t *str, size_t size)
{
    size_t len = 0;
    size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
    size_t offset = 0;
   
    while ((offset < limit) && (*str++ != 0)) {
        len++;
        offset += sizeof(wchar_t);
    }
   
    return len;
}
 
/** Check whether character is plain ASCII.
 *
 * @return True if character is plain ASCII.
 *
 */
bool ascii_check(wchar_t ch)
{
    if ((ch >= 0) && (ch <= 127))
        return true;
   
    return false;
}
 
/** Check whether character is valid
 *
 * @return True if character is a valid Unicode code point.
 *
 */
bool chr_check(wchar_t ch)
{
    if ((ch >= 0) && (ch <= 1114111))
        return true;
   
    return false;
}
 
/** Compare two NULL terminated strings.
 *
 * Do a char-by-char comparison of two NULL-terminated strings.
 * The strings are considered equal iff they consist of the same
 * characters on the minimum of their lengths.
 *
 * @param s1 First string to compare.
 * @param s2 Second string to compare.
 *
 * @return 0 if the strings are equal, -1 if first is smaller,
 *         1 if second smaller.
 *
 */
int str_cmp(const char *s1, const char *s2)
{
    wchar_t c1 = 0;
    wchar_t c2 = 0;
   
    size_t off1 = 0;
    size_t off2 = 0;
 
    while (true) {
        c1 = str_decode(s1, &off1, STR_NO_LIMIT);
        c2 = str_decode(s2, &off2, STR_NO_LIMIT);
 
        if (c1 < c2)
            return -1;
       
        if (c1 > c2)
            return 1;
 
        if (c1 == 0 || c2 == 0)
            break;     
    }
 
    return 0;
}
 
/** Compare two NULL terminated strings with length limit.
 *
 * Do a char-by-char comparison of two NULL-terminated strings.
 * The strings are considered equal iff they consist of the same
 * characters on the minimum of their lengths and the length limit.
 *
 * @param s1      First string to compare.
 * @param s2      Second string to compare.
 * @param max_len Maximum number of characters to consider.
 *
 * @return 0 if the strings are equal, -1 if first is smaller,
 *         1 if second smaller.
 *
 */
int str_lcmp(const char *s1, const char *s2, size_t max_len)
{
    wchar_t c1 = 0;
    wchar_t c2 = 0;
   
    size_t off1 = 0;
    size_t off2 = 0;
   
    size_t len = 0;
 
    while (true) {
        if (len >= max_len)
            break;
 
        c1 = str_decode(s1, &off1, STR_NO_LIMIT);
        c2 = str_decode(s2, &off2, STR_NO_LIMIT);
 
        if (c1 < c2)
            return -1;
 
        if (c1 > c2)
            return 1;
 
        if (c1 == 0 || c2 == 0)
            break;
 
        ++len; 
    }
 
    return 0;
 
}
 
/** Copy string.
 *
 * Copy source string @a src to destination buffer @a dest.
 * No more than @a size bytes are written. If the size of the output buffer
 * is at least one byte, the output string will always be well-formed, i.e.
 * null-terminated and containing only complete characters.
 *
 * @param dst   Destination buffer.
 * @param count Size of the destination buffer (must be > 0).
 * @param src   Source string.
 */
void str_cpy(char *dest, size_t size, const char *src)
{
    wchar_t ch;
    size_t src_off;
    size_t dest_off;
 
    /* There must be space for a null terminator in the buffer. */
    assert(size > 0);
   
    src_off = 0;
    dest_off = 0;
 
    while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
        if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
            break;
    }
 
    dest[dest_off] = '\0';
}
 
/** Copy size-limited substring.
 *
 * Copy prefix of string @a src of max. size @a size to destination buffer
 * @a dest. No more than @a size bytes are written. The output string will
 * always be well-formed, i.e. null-terminated and containing only complete
 * characters.
 *
 * No more than @a n bytes are read from the input string, so it does not
 * have to be null-terminated.
 *
 * @param dst   Destination buffer.
 * @param count Size of the destination buffer (must be > 0).
 * @param src   Source string.
 * @param n Maximum number of bytes to read from @a src.
 */
void str_ncpy(char *dest, size_t size, const char *src, size_t n)
{
    wchar_t ch;
    size_t src_off;
    size_t dest_off;
 
    /* There must be space for a null terminator in the buffer. */
    assert(size > 0);
   
    src_off = 0;
    dest_off = 0;
 
    while ((ch = str_decode(src, &src_off, n)) != 0) {
        if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
            break;
    }
 
    dest[dest_off] = '\0';
}
 
/** Append one string to another.
 *
 * Append source string @a src to string in destination buffer @a dest.
 * Size of the destination buffer is @a dest. If the size of the output buffer
 * is at least one byte, the output string will always be well-formed, i.e.
 * null-terminated and containing only complete characters.
 *
 * @param dst   Destination buffer.
 * @param count Size of the destination buffer.
 * @param src   Source string.
 */
void str_append(char *dest, size_t size, const char *src)
{
    size_t dstr_size;
 
    dstr_size = str_size(dest);
    str_cpy(dest + dstr_size, size - dstr_size, src);
}
 
/** Copy NULL-terminated wide string to string
 *
 * Copy source wide string @a src to destination buffer @a dst.
 * No more than @a size bytes are written. NULL-terminator is always
 * written after the last succesfully copied character (i.e. if the
 * destination buffer is has at least 1 byte, it will be always
 * NULL-terminated).
 *
 * @param src   Source wide string.
 * @param dst   Destination buffer.
 * @param count Size of the destination buffer.
 *
 */
void wstr_nstr(char *dst, const wchar_t *src, size_t size)
{
    /* No space for the NULL-terminator in the buffer */
    if (size == 0)
        return;
   
    wchar_t ch;
    size_t src_idx = 0;
    size_t dst_off = 0;
   
    while ((ch = src[src_idx++]) != 0) {
        if (chr_encode(ch, dst, &dst_off, size) != EOK)
            break;
    }
   
    if (dst_off >= size)
        dst[size - 1] = 0;
    else
        dst[dst_off] = 0;
}
 
/** Find first occurence of character in string.
 *
 * @param str String to search.
 * @param ch  Character to look for.
 *
 * @return Pointer to character in @a str or NULL if not found.
 */
const char *str_chr(const char *str, wchar_t ch)
{
    wchar_t acc;
    size_t off = 0;
    size_t last = 0;
   
    while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
        if (acc == ch)
            return (str + last);
        last = off;
    }
   
    return NULL;
}
 
/** Find last occurence of character in string.
 *
 * @param str String to search.
 * @param ch  Character to look for.
 *
 * @return Pointer to character in @a str or NULL if not found.
 */
const char *str_rchr(const char *str, wchar_t ch)
{
    wchar_t acc;
    size_t off = 0;
    size_t last = 0;
    const char *res = NULL;
   
    while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
        if (acc == ch)
            res = (str + last);
        last = off;
    }
   
    return res;
}
 
/** Insert a wide character into a wide string.
 *
 * Insert a wide character into a wide string at position
 * @a pos. The characters after the position are shifted.
 *
 * @param str     String to insert to.
 * @param ch      Character to insert to.
 * @param pos     Character index where to insert.
 @ @param max_pos Characters in the buffer.
 *
 * @return True if the insertion was sucessful, false if the position
 *         is out of bounds.
 *
 */
bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
{
    size_t len = wstr_length(str);
   
    if ((pos > len) || (pos + 1 > max_pos))
        return false;
   
    size_t i;
    for (i = len; i + 1 > pos; i--)
        str[i + 1] = str[i];
   
    str[pos] = ch;
   
    return true;
}
 
/** Remove a wide character from a wide string.
 *
 * Remove a wide character from a wide string at position
 * @a pos. The characters after the position are shifted.
 *
 * @param str String to remove from.
 * @param pos Character index to remove.
 *
 * @return True if the removal was sucessful, false if the position
 *         is out of bounds.
 *
 */
bool wstr_remove(wchar_t *str, size_t pos)
{
    size_t len = wstr_length(str);
   
    if (pos >= len)
        return false;
   
    size_t i;
    for (i = pos + 1; i <= len; i++)
        str[i - 1] = str[i];
   
    return true;
}
 
int stricmp(const char *a, const char *b)
{
    int c = 0;
   
    while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
        c++;
   
    return (tolower(a[c]) - tolower(b[c]));
}
 
/** Convert string to a number.
 * Core of strtol and strtoul functions.
 *
 * @param nptr      Pointer to string.
 * @param endptr    If not NULL, function stores here pointer to the first
 *          invalid character.
 * @param base      Zero or number between 2 and 36 inclusive.
 * @param sgn       It's set to 1 if minus found.
 * @return      Result of conversion.
 */
static unsigned long
_strtoul(const char *nptr, char **endptr, int base, char *sgn)
{
    unsigned char c;
    unsigned long result = 0;
    unsigned long a, b;
    const char *str = nptr;
    const char *tmpptr;
   
    while (isspace(*str))
        str++;
   
    if (*str == '-') {
        *sgn = 1;
        ++str;
    } else if (*str == '+')
        ++str;
   
    if (base) {
        if ((base == 1) || (base > 36)) {
            /* FIXME: set errno to EINVAL */
            return 0;
        }
        if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
            (str[1] == 'X'))) {
            str += 2;
        }
    } else {
        base = 10;
       
        if (*str == '0') {
            base = 8;
            if ((str[1] == 'X') || (str[1] == 'x'))  {
                base = 16;
                str += 2;
            }
        }
    }
   
    tmpptr = str;
 
    while (*str) {
        c = *str;
        c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
            (c <= '9' ? c - '0' : 0xff)));
        if (c > base) {
            break;
        }
       
        a = (result & 0xff) * base + c;
        b = (result >> 8) * base + (a >> 8);
       
        if (b > (ULONG_MAX >> 8)) {
            /* overflow */
            /* FIXME: errno = ERANGE*/
            return ULONG_MAX;
        }
   
        result = (b << 8) + (a & 0xff);
        ++str;
    }
   
    if (str == tmpptr) {
        /*
         * No number was found => first invalid character is the first
         * character of the string.
         */
        /* FIXME: set errno to EINVAL */
        str = nptr;
        result = 0;
    }
   
    if (endptr)
        *endptr = (char *) str;
 
    if (nptr == str) {
        /*FIXME: errno = EINVAL*/
        return 0;
    }
 
    return result;
}
 
/** Convert initial part of string to long int according to given base.
 * The number may begin with an arbitrary number of whitespaces followed by
 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
 * inserted and the number will be taken as hexadecimal one. If the base is 0
 * and the number begin with a zero, number will be taken as octal one (as with
 * base 8). Otherwise the base 0 is taken as decimal.
 *
 * @param nptr      Pointer to string.
 * @param endptr    If not NULL, function stores here pointer to the first
 *          invalid character.
 * @param base      Zero or number between 2 and 36 inclusive.
 * @return      Result of conversion.
 */
long int strtol(const char *nptr, char **endptr, int base)
{
    char sgn = 0;
    unsigned long number = 0;
   
    number = _strtoul(nptr, endptr, base, &sgn);
 
    if (number > LONG_MAX) {
        if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
            /* FIXME: set 0 to errno */
            return number;     
        }
        /* FIXME: set ERANGE to errno */
        return (sgn ? LONG_MIN : LONG_MAX);
    }
   
    return (sgn ? -number : number);
}
 
 
/** Convert initial part of string to unsigned long according to given base.
 * The number may begin with an arbitrary number of whitespaces followed by
 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
 * inserted and the number will be taken as hexadecimal one. If the base is 0
 * and the number begin with a zero, number will be taken as octal one (as with
 * base 8). Otherwise the base 0 is taken as decimal.
 *
 * @param nptr      Pointer to string.
 * @param endptr    If not NULL, function stores here pointer to the first
 *          invalid character
 * @param base      Zero or number between 2 and 36 inclusive.
 * @return      Result of conversion.
 */
unsigned long strtoul(const char *nptr, char **endptr, int base)
{
    char sgn = 0;
    unsigned long number = 0;
   
    number = _strtoul(nptr, endptr, base, &sgn);
 
    return (sgn ? -number : number);
}
 
char *str_dup(const char *src)
{
    size_t size = str_size(src);
    void *dest = malloc(size + 1);
 
    if (dest == NULL)
        return (char *) NULL;
 
    return (char *) memcpy(dest, src, size + 1);
}
 
char *strtok(char *s, const char *delim)
{
    static char *next;
 
    return strtok_r(s, delim, &next);
}
 
char *strtok_r(char *s, const char *delim, char **next)
{
    char *start, *end;
 
    if (s == NULL)
        s = *next;
 
    /* Skip over leading delimiters. */
    while (*s && (str_chr(delim, *s) != NULL)) ++s;
    start = s;
 
    /* Skip over token characters. */
    while (*s && (str_chr(delim, *s) == NULL)) ++s;
    end = s;
    *next = (*s ? s + 1 : s);
 
    if (start == end) {
        return NULL;    /* No more tokens. */
    }
 
    /* Overwrite delimiter with NULL terminator. */
    *end = '\0';
    return start;
}
 
/** @}
 */
 

Rev 4312	Rev 4472
1	/*	1	/*
2	* Copyright (c) 2005 Martin Decky	2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda	3	* Copyright (c) 2008 Jiri Svoboda
4	* All rights reserved.	4	* All rights reserved.
5	*	5	*
6	* Redistribution and use in source and binary forms, with or without	6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions	7	* modification, are permitted provided that the following conditions
8	* are met:	8	* are met:
9	*	9	*
10	* - Redistributions of source code must retain the above copyright	10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.	11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright	12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the	13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.	14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products	15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.	16	* derived from this software without specific prior written permission.
17	*	17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR	18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES	19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.	20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,	21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT	22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF	26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/	28	*/
29		29
30	/** @addtogroup libc	30	/** @addtogroup libc
31	* @{	31	* @{
32	*/	32	*/
33	/** @file	33	/** @file
34	*/	34	*/
35		35
36	#include <string.h>	36	#include <string.h>
37	#include <stdlib.h>	37	#include <stdlib.h>
38	#include <assert.h>	38	#include <assert.h>
39	#include <limits.h>	39	#include <limits.h>
40	#include <ctype.h>	40	#include <ctype.h>
41	#include <malloc.h>	41	#include <malloc.h>
42	#include <errno.h>	42	#include <errno.h>
43	#include <align.h>	43	#include <align.h>
44	#include <mem.h>	44	#include <mem.h>
45	#include <string.h>	45	#include <string.h>
46		46
47	/** Byte mask consisting of lowest @n bits (out of 8) */	47	/** Byte mask consisting of lowest @n bits (out of 8) */
48	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))	48	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
49		49
50	/** Byte mask consisting of lowest @n bits (out of 32) */	50	/** Byte mask consisting of lowest @n bits (out of 32) */
51	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))	51	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
52		52
53	/** Byte mask consisting of highest @n bits (out of 8) */	53	/** Byte mask consisting of highest @n bits (out of 8) */
54	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))	54	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
55		55
56	/** Number of data bits in a UTF-8 continuation byte */	56	/** Number of data bits in a UTF-8 continuation byte */
57	#define CONT_BITS 6	57	#define CONT_BITS 6
58		58
59	/** Decode a single character from a string.	59	/** Decode a single character from a string.
60	*	60	*
61	* Decode a single character from a string of size @a size. Decoding starts	61	* Decode a single character from a string of size @a size. Decoding starts
62	* at @a offset and this offset is moved to the beginning of the next	62	* at @a offset and this offset is moved to the beginning of the next
63	* character. In case of decoding error, offset generally advances at least	63	* character. In case of decoding error, offset generally advances at least
64	* by one. However, offset is never moved beyond size.	64	* by one. However, offset is never moved beyond size.
65	*	65	*
66	* @param str String (not necessarily NULL-terminated).	66	* @param str String (not necessarily NULL-terminated).
67	* @param offset Byte offset in string where to start decoding.	67	* @param offset Byte offset in string where to start decoding.
68	* @param size Size of the string (in bytes).	68	* @param size Size of the string (in bytes).
69	*	69	*
70	* @return Value of decoded character, U_SPECIAL on decoding error or	70	* @return Value of decoded character, U_SPECIAL on decoding error or
71	* NULL if attempt to decode beyond @a size.	71	* NULL if attempt to decode beyond @a size.
72	*	72	*
73	*/	73	*/
74	wchar_t str_decode(const char str, size_t offset, size_t size)	74	wchar_t str_decode(const char str, size_t offset, size_t size)
75	{	75	{
76	if (*offset + 1 > size)	76	if (*offset + 1 > size)
77	return 0;	77	return 0;
78		78
79	/* First byte read from string */	79	/* First byte read from string */
80	uint8_t b0 = (uint8_t) str[(*offset)++];	80	uint8_t b0 = (uint8_t) str[(*offset)++];
81		81
82	/* Determine code length */	82	/* Determine code length */
83		83
84	unsigned int b0_bits; /* Data bits in first byte */	84	unsigned int b0_bits; /* Data bits in first byte */
85	unsigned int cbytes; /* Number of continuation bytes */	85	unsigned int cbytes; /* Number of continuation bytes */
86		86
87	if ((b0 & 0x80) == 0) {	87	if ((b0 & 0x80) == 0) {
88	/* 0xxxxxxx (Plain ASCII) */	88	/* 0xxxxxxx (Plain ASCII) */
89	b0_bits = 7;	89	b0_bits = 7;
90	cbytes = 0;	90	cbytes = 0;
91	} else if ((b0 & 0xe0) == 0xc0) {	91	} else if ((b0 & 0xe0) == 0xc0) {
92	/* 110xxxxx 10xxxxxx */	92	/* 110xxxxx 10xxxxxx */
93	b0_bits = 5;	93	b0_bits = 5;
94	cbytes = 1;	94	cbytes = 1;
95	} else if ((b0 & 0xf0) == 0xe0) {	95	} else if ((b0 & 0xf0) == 0xe0) {
96	/* 1110xxxx 10xxxxxx 10xxxxxx */	96	/* 1110xxxx 10xxxxxx 10xxxxxx */
97	b0_bits = 4;	97	b0_bits = 4;
98	cbytes = 2;	98	cbytes = 2;
99	} else if ((b0 & 0xf8) == 0xf0) {	99	} else if ((b0 & 0xf8) == 0xf0) {
100	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */	100	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
101	b0_bits = 3;	101	b0_bits = 3;
102	cbytes = 3;	102	cbytes = 3;
103	} else {	103	} else {
104	/* 10xxxxxx -- unexpected continuation byte */	104	/* 10xxxxxx -- unexpected continuation byte */
105	return U_SPECIAL;	105	return U_SPECIAL;
106	}	106	}
107		107
108	if (*offset + cbytes > size)	108	if (*offset + cbytes > size)
109	return U_SPECIAL;	109	return U_SPECIAL;
110		110
111	wchar_t ch = b0 & LO_MASK_8(b0_bits);	111	wchar_t ch = b0 & LO_MASK_8(b0_bits);
112		112
113	/* Decode continuation bytes */	113	/* Decode continuation bytes */
114	while (cbytes > 0) {	114	while (cbytes > 0) {
115	uint8_t b = (uint8_t) str[(*offset)++];	115	uint8_t b = (uint8_t) str[(*offset)++];
116		116
117	/* Must be 10xxxxxx */	117	/* Must be 10xxxxxx */
118	if ((b & 0xc0) != 0x80)	118	if ((b & 0xc0) != 0x80)
119	return U_SPECIAL;	119	return U_SPECIAL;
120		120
121	/* Shift data bits to ch */	121	/* Shift data bits to ch */
122	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));	122	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
123	cbytes--;	123	cbytes--;
124	}	124	}
125		125
126	return ch;	126	return ch;
127	}	127	}
128		128
129	/** Encode a single character to string representation.	129	/** Encode a single character to string representation.
130	*	130	*
131	* Encode a single character to string representation (i.e. UTF-8) and store	131	* Encode a single character to string representation (i.e. UTF-8) and store
132	* it into a buffer at @a offset. Encoding starts at @a offset and this offset	132	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
133	* is moved to the position where the next character can be written to.	133	* is moved to the position where the next character can be written to.
134	*	134	*
135	* @param ch Input character.	135	* @param ch Input character.
136	* @param str Output buffer.	136	* @param str Output buffer.
137	* @param offset Byte offset where to start writing.	137	* @param offset Byte offset where to start writing.
138	* @param size Size of the output buffer (in bytes).	138	* @param size Size of the output buffer (in bytes).
139	*	139	*
140	* @return EOK if the character was encoded successfully, EOVERFLOW if there	140	* @return EOK if the character was encoded successfully, EOVERFLOW if there
141	* was not enough space in the output buffer or EINVAL if the character	141	* was not enough space in the output buffer or EINVAL if the character
142	* code was invalid.	142	* code was invalid.
143	*/	143	*/
144	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)	144	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
145	{	145	{
146	if (*offset >= size)	146	if (*offset >= size)
147	return EOVERFLOW;	147	return EOVERFLOW;
148		148
149	if (!chr_check(ch))	149	if (!chr_check(ch))
150	return EINVAL;	150	return EINVAL;
151		151
152	/* Unsigned version of ch (bit operations should only be done	152	/* Unsigned version of ch (bit operations should only be done
153	on unsigned types). */	153	on unsigned types). */
154	uint32_t cc = (uint32_t) ch;	154	uint32_t cc = (uint32_t) ch;
155		155
156	/* Determine how many continuation bytes are needed */	156	/* Determine how many continuation bytes are needed */
157		157
158	unsigned int b0_bits; /* Data bits in first byte */	158	unsigned int b0_bits; /* Data bits in first byte */
159	unsigned int cbytes; /* Number of continuation bytes */	159	unsigned int cbytes; /* Number of continuation bytes */
160		160
161	if ((cc & ~LO_MASK_32(7)) == 0) {	161	if ((cc & ~LO_MASK_32(7)) == 0) {
162	b0_bits = 7;	162	b0_bits = 7;
163	cbytes = 0;	163	cbytes = 0;
164	} else if ((cc & ~LO_MASK_32(11)) == 0) {	164	} else if ((cc & ~LO_MASK_32(11)) == 0) {
165	b0_bits = 5;	165	b0_bits = 5;
166	cbytes = 1;	166	cbytes = 1;
167	} else if ((cc & ~LO_MASK_32(16)) == 0) {	167	} else if ((cc & ~LO_MASK_32(16)) == 0) {
168	b0_bits = 4;	168	b0_bits = 4;
169	cbytes = 2;	169	cbytes = 2;
170	} else if ((cc & ~LO_MASK_32(21)) == 0) {	170	} else if ((cc & ~LO_MASK_32(21)) == 0) {
171	b0_bits = 3;	171	b0_bits = 3;
172	cbytes = 3;	172	cbytes = 3;
173	} else {	173	} else {
174	/* Codes longer than 21 bits are not supported */	174	/* Codes longer than 21 bits are not supported */
175	return EINVAL;	175	return EINVAL;
176	}	176	}
177		177
178	/* Check for available space in buffer */	178	/* Check for available space in buffer */
179	if (*offset + cbytes >= size)	179	if (*offset + cbytes >= size)
180	return EOVERFLOW;	180	return EOVERFLOW;
181		181
182	/* Encode continuation bytes */	182	/* Encode continuation bytes */
183	unsigned int i;	183	unsigned int i;
184	for (i = cbytes; i > 0; i--) {	184	for (i = cbytes; i > 0; i--) {
185	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));	185	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
186	cc = cc >> CONT_BITS;	186	cc = cc >> CONT_BITS;
187	}	187	}
188		188
189	/* Encode first byte */	189	/* Encode first byte */
190	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);	190	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
191		191
192	/* Advance offset */	192	/* Advance offset */
193	*offset += cbytes + 1;	193	*offset += cbytes + 1;
194		194
195	return EOK;	195	return EOK;
196	}	196	}
197		197
198	/** Get size of string.	198	/** Get size of string.
199	*	199	*
200	* Get the number of bytes which are used by the string @a str (excluding the	200	* Get the number of bytes which are used by the string @a str (excluding the
201	* NULL-terminator).	201	* NULL-terminator).
202	*	202	*
203	* @param str String to consider.	203	* @param str String to consider.
204	*	204	*
205	* @return Number of bytes used by the string	205	* @return Number of bytes used by the string
206	*	206	*
207	*/	207	*/
208	size_t str_size(const char *str)	208	size_t str_size(const char *str)
209	{	209	{
210	size_t size = 0;	210	size_t size = 0;
211		211
212	while (*str++ != 0)	212	while (*str++ != 0)
213	size++;	213	size++;
214		214
215	return size;	215	return size;
216	}	216	}
217		217
218	/** Get size of wide string.	218	/** Get size of wide string.
219	*	219	*
220	* Get the number of bytes which are used by the wide string @a str (excluding the	220	* Get the number of bytes which are used by the wide string @a str (excluding the
221	* NULL-terminator).	221	* NULL-terminator).
222	*	222	*
223	* @param str Wide string to consider.	223	* @param str Wide string to consider.
224	*	224	*
225	* @return Number of bytes used by the wide string	225	* @return Number of bytes used by the wide string
226	*	226	*
227	*/	227	*/
228	size_t wstr_size(const wchar_t *str)	228	size_t wstr_size(const wchar_t *str)
229	{	229	{
230	return (wstr_length(str) * sizeof(wchar_t));	230	return (wstr_length(str) * sizeof(wchar_t));
231	}	231	}
232		232
233	/** Get size of string with length limit.	233	/** Get size of string with length limit.
234	*	234	*
235	* Get the number of bytes which are used by up to @a max_len first	235	* Get the number of bytes which are used by up to @a max_len first
236	* characters in the string @a str. If @a max_len is greater than	236	* characters in the string @a str. If @a max_len is greater than
237	* the length of @a str, the entire string is measured (excluding the	237	* the length of @a str, the entire string is measured (excluding the
238	* NULL-terminator).	238	* NULL-terminator).
239	*	239	*
240	* @param str String to consider.	240	* @param str String to consider.
241	* @param max_len Maximum number of characters to measure.	241	* @param max_len Maximum number of characters to measure.
242	*	242	*
243	* @return Number of bytes used by the characters.	243	* @return Number of bytes used by the characters.
244	*	244	*
245	*/	245	*/
246	size_t str_lsize(const char *str, count_t max_len)	246	size_t str_lsize(const char *str, size_t max_len)
247	{	247	{
248	count_t len = 0;	248	size_t len = 0;
249	size_t offset = 0;	249	size_t offset = 0;
250		250
251	while (len < max_len) {	251	while (len < max_len) {
252	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)	252	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
253	break;	253	break;
254		254
255	len++;	255	len++;
256	}	256	}
257		257
258	return offset;	258	return offset;
259	}	259	}
260		260
261	/** Get size of wide string with length limit.	261	/** Get size of wide string with length limit.
262	*	262	*
263	* Get the number of bytes which are used by up to @a max_len first	263	* Get the number of bytes which are used by up to @a max_len first
264	* wide characters in the wide string @a str. If @a max_len is greater than	264	* wide characters in the wide string @a str. If @a max_len is greater than
265	* the length of @a str, the entire wide string is measured (excluding the	265	* the length of @a str, the entire wide string is measured (excluding the
266	* NULL-terminator).	266	* NULL-terminator).
267	*	267	*
268	* @param str Wide string to consider.	268	* @param str Wide string to consider.
269	* @param max_len Maximum number of wide characters to measure.	269	* @param max_len Maximum number of wide characters to measure.
270	*	270	*
271	* @return Number of bytes used by the wide characters.	271	* @return Number of bytes used by the wide characters.
272	*	272	*
273	*/	273	*/
274	size_t wstr_lsize(const wchar_t *str, count_t max_len)	274	size_t wstr_lsize(const wchar_t *str, size_t max_len)
275	{	275	{
276	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));	276	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
277	}	277	}
278		278
279	/** Get number of characters in a string.	279	/** Get number of characters in a string.
280	*	280	*
281	* @param str NULL-terminated string.	281	* @param str NULL-terminated string.
282	*	282	*
283	* @return Number of characters in string.	283	* @return Number of characters in string.
284	*	284	*
285	*/	285	*/
286	count_t str_length(const char *str)	286	size_t str_length(const char *str)
287	{	287	{
288	count_t len = 0;	288	size_t len = 0;
289	size_t offset = 0;	289	size_t offset = 0;
290		290
291	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)	291	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
292	len++;	292	len++;
293		293
294	return len;	294	return len;
295	}	295	}
296		296
297	/** Get number of characters in a wide string.	297	/** Get number of characters in a wide string.
298	*	298	*
299	* @param str NULL-terminated wide string.	299	* @param str NULL-terminated wide string.
300	*	300	*
301	* @return Number of characters in @a str.	301	* @return Number of characters in @a str.
302	*	302	*
303	*/	303	*/
304	count_t wstr_length(const wchar_t *wstr)	304	size_t wstr_length(const wchar_t *wstr)
305	{	305	{
306	count_t len = 0;	306	size_t len = 0;
307		307
308	while (*wstr++ != 0)	308	while (*wstr++ != 0)
309	len++;	309	len++;
310		310
311	return len;	311	return len;
312	}	312	}
313		313
314	/** Get number of characters in a string with size limit.	314	/** Get number of characters in a string with size limit.
315	*	315	*
316	* @param str NULL-terminated string.	316	* @param str NULL-terminated string.
317	* @param size Maximum number of bytes to consider.	317	* @param size Maximum number of bytes to consider.
318	*	318	*
319	* @return Number of characters in string.	319	* @return Number of characters in string.
320	*	320	*
321	*/	321	*/
322	count_t str_nlength(const char *str, size_t size)	322	size_t str_nlength(const char *str, size_t size)
323	{	323	{
324	count_t len = 0;	324	size_t len = 0;
325	size_t offset = 0;	325	size_t offset = 0;
326		326
327	while (str_decode(str, &offset, size) != 0)	327	while (str_decode(str, &offset, size) != 0)
328	len++;	328	len++;
329		329
330	return len;	330	return len;
331	}	331	}
332		332
333	/** Get number of characters in a string with size limit.	333	/** Get number of characters in a string with size limit.
334	*	334	*
335	* @param str NULL-terminated string.	335	* @param str NULL-terminated string.
336	* @param size Maximum number of bytes to consider.	336	* @param size Maximum number of bytes to consider.
337	*	337	*
338	* @return Number of characters in string.	338	* @return Number of characters in string.
339	*	339	*
340	*/	340	*/
341	count_t wstr_nlength(const wchar_t *str, size_t size)	341	size_t wstr_nlength(const wchar_t *str, size_t size)
342	{	342	{
343	count_t len = 0;	343	size_t len = 0;
344	count_t limit = ALIGN_DOWN(size, sizeof(wchar_t));	344	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
345	count_t offset = 0;	345	size_t offset = 0;
346		346
347	while ((offset < limit) && (*str++ != 0)) {	347	while ((offset < limit) && (*str++ != 0)) {
348	len++;	348	len++;
349	offset += sizeof(wchar_t);	349	offset += sizeof(wchar_t);
350	}	350	}
351		351
352	return len;	352	return len;
353	}	353	}
354		354
355	/** Check whether character is plain ASCII.	355	/** Check whether character is plain ASCII.
356	*	356	*
357	* @return True if character is plain ASCII.	357	* @return True if character is plain ASCII.
358	*	358	*
359	*/	359	*/
360	bool ascii_check(wchar_t ch)	360	bool ascii_check(wchar_t ch)
361	{	361	{
362	if ((ch >= 0) && (ch <= 127))	362	if ((ch >= 0) && (ch <= 127))
363	return true;	363	return true;
364		364
365	return false;	365	return false;
366	}	366	}
367		367
368	/** Check whether character is valid	368	/** Check whether character is valid
369	*	369	*
370	* @return True if character is a valid Unicode code point.	370	* @return True if character is a valid Unicode code point.
371	*	371	*
372	*/	372	*/
373	bool chr_check(wchar_t ch)	373	bool chr_check(wchar_t ch)
374	{	374	{
375	if ((ch >= 0) && (ch <= 1114111))	375	if ((ch >= 0) && (ch <= 1114111))
376	return true;	376	return true;
377		377
378	return false;	378	return false;
379	}	379	}
380		380
381	/** Compare two NULL terminated strings.	381	/** Compare two NULL terminated strings.
382	*	382	*
383	* Do a char-by-char comparison of two NULL-terminated strings.	383	* Do a char-by-char comparison of two NULL-terminated strings.
384	* The strings are considered equal iff they consist of the same	384	* The strings are considered equal iff they consist of the same
385	* characters on the minimum of their lengths.	385	* characters on the minimum of their lengths.
386	*	386	*
387	* @param s1 First string to compare.	387	* @param s1 First string to compare.
388	* @param s2 Second string to compare.	388	* @param s2 Second string to compare.
389	*	389	*
390	* @return 0 if the strings are equal, -1 if first is smaller,	390	* @return 0 if the strings are equal, -1 if first is smaller,
391	* 1 if second smaller.	391	* 1 if second smaller.
392	*	392	*
393	*/	393	*/
394	int str_cmp(const char s1, const char s2)	394	int str_cmp(const char s1, const char s2)
395	{	395	{
396	wchar_t c1 = 0;	396	wchar_t c1 = 0;
397	wchar_t c2 = 0;	397	wchar_t c2 = 0;
398		398
399	size_t off1 = 0;	399	size_t off1 = 0;
400	size_t off2 = 0;	400	size_t off2 = 0;
401		401
402	while (true) {	402	while (true) {
403	c1 = str_decode(s1, &off1, STR_NO_LIMIT);	403	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
404	c2 = str_decode(s2, &off2, STR_NO_LIMIT);	404	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
405		405
406	if (c1 < c2)	406	if (c1 < c2)
407	return -1;	407	return -1;
408		408
409	if (c1 > c2)	409	if (c1 > c2)
410	return 1;	410	return 1;
411		411
412	if (c1 == 0 \|\| c2 == 0)	412	if (c1 == 0 \|\| c2 == 0)
413	break;	413	break;
414	}	414	}
415		415
416	return 0;	416	return 0;
417	}	417	}
418		418
419	/** Compare two NULL terminated strings with length limit.	419	/** Compare two NULL terminated strings with length limit.
420	*	420	*
421	* Do a char-by-char comparison of two NULL-terminated strings.	421	* Do a char-by-char comparison of two NULL-terminated strings.
422	* The strings are considered equal iff they consist of the same	422	* The strings are considered equal iff they consist of the same
423	* characters on the minimum of their lengths and the length limit.	423	* characters on the minimum of their lengths and the length limit.
424	*	424	*
425	* @param s1 First string to compare.	425	* @param s1 First string to compare.
426	* @param s2 Second string to compare.	426	* @param s2 Second string to compare.
427	* @param max_len Maximum number of characters to consider.	427	* @param max_len Maximum number of characters to consider.
428	*	428	*
429	* @return 0 if the strings are equal, -1 if first is smaller,	429	* @return 0 if the strings are equal, -1 if first is smaller,
430	* 1 if second smaller.	430	* 1 if second smaller.
431	*	431	*
432	*/	432	*/
433	int str_lcmp(const char s1, const char s2, count_t max_len)	433	int str_lcmp(const char s1, const char s2, size_t max_len)
434	{	434	{
435	wchar_t c1 = 0;	435	wchar_t c1 = 0;
436	wchar_t c2 = 0;	436	wchar_t c2 = 0;
437		437
438	size_t off1 = 0;	438	size_t off1 = 0;
439	size_t off2 = 0;	439	size_t off2 = 0;
440		440
441	count_t len = 0;	441	size_t len = 0;
442		442
443	while (true) {	443	while (true) {
444	if (len >= max_len)	444	if (len >= max_len)
445	break;	445	break;
446		446
447	c1 = str_decode(s1, &off1, STR_NO_LIMIT);	447	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
448	c2 = str_decode(s2, &off2, STR_NO_LIMIT);	448	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
449		449
450	if (c1 < c2)	450	if (c1 < c2)
451	return -1;	451	return -1;
452		452
453	if (c1 > c2)	453	if (c1 > c2)
454	return 1;	454	return 1;
455		455
456	if (c1 == 0 \|\| c2 == 0)	456	if (c1 == 0 \|\| c2 == 0)
457	break;	457	break;
458		458
459	++len;	459	++len;
460	}	460	}
461		461
462	return 0;	462	return 0;
463		463
464	}	464	}
465		465
466	/** Copy string.	466	/** Copy string.
467	*	467	*
468	* Copy source string @a src to destination buffer @a dest.	468	* Copy source string @a src to destination buffer @a dest.
469	* No more than @a size bytes are written. If the size of the output buffer	469	* No more than @a size bytes are written. If the size of the output buffer
470	* is at least one byte, the output string will always be well-formed, i.e.	470	* is at least one byte, the output string will always be well-formed, i.e.
471	* null-terminated and containing only complete characters.	471	* null-terminated and containing only complete characters.
472	*	472	*
473	* @param dst Destination buffer.	473	* @param dst Destination buffer.
474	* @param count Size of the destination buffer (must be > 0).	474	* @param count Size of the destination buffer (must be > 0).
475	* @param src Source string.	475	* @param src Source string.
476	*/	476	*/
477	void str_cpy(char dest, size_t size, const char src)	477	void str_cpy(char dest, size_t size, const char src)
478	{	478	{
479	wchar_t ch;	479	wchar_t ch;
480	size_t src_off;	480	size_t src_off;
481	size_t dest_off;	481	size_t dest_off;
482		482
483	/* There must be space for a null terminator in the buffer. */	483	/* There must be space for a null terminator in the buffer. */
484	assert(size > 0);	484	assert(size > 0);
485		485
486	src_off = 0;	486	src_off = 0;
487	dest_off = 0;	487	dest_off = 0;
488		488
489	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {	489	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
490	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)	490	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
491	break;	491	break;
492	}	492	}
493		493
494	dest[dest_off] = '\0';	494	dest[dest_off] = '\0';
495	}	495	}
496		496
497	/** Copy size-limited substring.	497	/** Copy size-limited substring.
498	*	498	*
499	* Copy prefix of string @a src of max. size @a size to destination buffer	499	* Copy prefix of string @a src of max. size @a size to destination buffer
500	* @a dest. No more than @a size bytes are written. The output string will	500	* @a dest. No more than @a size bytes are written. The output string will

Subversion Repositories HelenOS

(root)/trunk/uspace/lib/libc/generic/string.c/ @ 4233 – Rev 4312 → 4472