WebSVN – HelenOS – Diff – /trunk/uspace/lib/libc/generic/string.c


/*
 * Copyright (c) 2005 Martin Decky
 * Copyright (c) 2008 Jiri Svoboda
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 * - The name of the author may not be used to endorse or promote products
 *   derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
/** @addtogroup libc
 * @{
 */
/** @file
 */
 
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <ctype.h>
#include <malloc.h>
#include <errno.h>
#include <align.h>
#include <mem.h>
#include <string.h>
 
/** Byte mask consisting of lowest @n bits (out of 8) */
#define LO_MASK_8(n)  ((uint8_t) ((1 << (n)) - 1))
 
/** Byte mask consisting of lowest @n bits (out of 32) */
#define LO_MASK_32(n)  ((uint32_t) ((1 << (n)) - 1))
 
/** Byte mask consisting of highest @n bits (out of 8) */
#define HI_MASK_8(n)  (~LO_MASK_8(8 - (n)))
 
/** Number of data bits in a UTF-8 continuation byte */
#define CONT_BITS  6
 
/** Decode a single character from a string.
 *
 * Decode a single character from a string of size @a size. Decoding starts
 * at @a offset and this offset is moved to the beginning of the next
 * character. In case of decoding error, offset generally advances at least
 * by one. However, offset is never moved beyond size.
 *
 * @param str    String (not necessarily NULL-terminated).
 * @param offset Byte offset in string where to start decoding.
 * @param size   Size of the string (in bytes).
 *
 * @return Value of decoded character, U_SPECIAL on decoding error or
 *         NULL if attempt to decode beyond @a size.
 *
 */
wchar_t str_decode(const char *str, size_t *offset, size_t size)
{
    if (*offset + 1 > size)
        return 0;
   
    /* First byte read from string */
    uint8_t b0 = (uint8_t) str[(*offset)++];
   
    /* Determine code length */
   
    unsigned int b0_bits;  /* Data bits in first byte */
    unsigned int cbytes;   /* Number of continuation bytes */
   
    if ((b0 & 0x80) == 0) {
        /* 0xxxxxxx (Plain ASCII) */
        b0_bits = 7;
        cbytes = 0;
    } else if ((b0 & 0xe0) == 0xc0) {
        /* 110xxxxx 10xxxxxx */
        b0_bits = 5;
        cbytes = 1;
    } else if ((b0 & 0xf0) == 0xe0) {
        /* 1110xxxx 10xxxxxx 10xxxxxx */
        b0_bits = 4;
        cbytes = 2;
    } else if ((b0 & 0xf8) == 0xf0) {
        /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
        b0_bits = 3;
        cbytes = 3;
    } else {
        /* 10xxxxxx -- unexpected continuation byte */
        return U_SPECIAL;
    }
   
    if (*offset + cbytes > size)
        return U_SPECIAL;
   
    wchar_t ch = b0 & LO_MASK_8(b0_bits);
   
    /* Decode continuation bytes */
    while (cbytes > 0) {
        uint8_t b = (uint8_t) str[(*offset)++];
       
        /* Must be 10xxxxxx */
        if ((b & 0xc0) != 0x80)
            return U_SPECIAL;
       
        /* Shift data bits to ch */
        ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
        cbytes--;
    }
   
    return ch;
}
 
/** Encode a single character to string representation.
 *
 * Encode a single character to string representation (i.e. UTF-8) and store
 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
 * is moved to the position where the next character can be written to.
 *
 * @param ch     Input character.
 * @param str    Output buffer.
 * @param offset Byte offset where to start writing.
 * @param size   Size of the output buffer (in bytes).
 *
 * @return EOK if the character was encoded successfully, EOVERFLOW if there
 *     was not enough space in the output buffer or EINVAL if the character
 *     code was invalid.
 */
int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
{
    if (*offset >= size)
        return EOVERFLOW;
   
    if (!chr_check(ch))
        return EINVAL;
   
    /* Unsigned version of ch (bit operations should only be done
       on unsigned types). */
    uint32_t cc = (uint32_t) ch;
   
    /* Determine how many continuation bytes are needed */
   
    unsigned int b0_bits;  /* Data bits in first byte */
    unsigned int cbytes;   /* Number of continuation bytes */
   
    if ((cc & ~LO_MASK_32(7)) == 0) {
        b0_bits = 7;
        cbytes = 0;
    } else if ((cc & ~LO_MASK_32(11)) == 0) {
        b0_bits = 5;
        cbytes = 1;
    } else if ((cc & ~LO_MASK_32(16)) == 0) {
        b0_bits = 4;
        cbytes = 2;
    } else if ((cc & ~LO_MASK_32(21)) == 0) {
        b0_bits = 3;
        cbytes = 3;
    } else {
        /* Codes longer than 21 bits are not supported */
        return EINVAL;
    }
   
    /* Check for available space in buffer */
    if (*offset + cbytes >= size)
        return EOVERFLOW;
   
    /* Encode continuation bytes */
    unsigned int i;
    for (i = cbytes; i > 0; i--) {
        str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
        cc = cc >> CONT_BITS;
    }
   
    /* Encode first byte */
    str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
   
    /* Advance offset */
    *offset += cbytes + 1;
   
    return EOK;
}
 
/** Get size of string.
 *
 * Get the number of bytes which are used by the string @a str (excluding the
 * NULL-terminator).
 *
 * @param str String to consider.
 *
 * @return Number of bytes used by the string
 *
 */
size_t str_size(const char *str)
{
    size_t size = 0;
   
    while (*str++ != 0)
        size++;
   
    return size;
}
 
/** Get size of wide string.
 *
 * Get the number of bytes which are used by the wide string @a str (excluding the
 * NULL-terminator).
 *
 * @param str Wide string to consider.
 *
 * @return Number of bytes used by the wide string
 *
 */
size_t wstr_size(const wchar_t *str)
{
    return (wstr_length(str) * sizeof(wchar_t));
}
 
/** Get size of string with length limit.
 *
 * Get the number of bytes which are used by up to @a max_len first
 * characters in the string @a str. If @a max_len is greater than
 * the length of @a str, the entire string is measured (excluding the
 * NULL-terminator).
 *
 * @param str     String to consider.
 * @param max_len Maximum number of characters to measure.
 *
 * @return Number of bytes used by the characters.
 *
 */
size_t str_lsize(const char *str, count_t max_len)
{
    count_t len = 0;
    size_t offset = 0;
   
    while (len < max_len) {
        if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
            break;
       
        len++;
    }
   
    return offset;
}
 
/** Get size of wide string with length limit.
 *
 * Get the number of bytes which are used by up to @a max_len first
 * wide characters in the wide string @a str. If @a max_len is greater than
 * the length of @a str, the entire wide string is measured (excluding the
 * NULL-terminator).
 *
 * @param str     Wide string to consider.
 * @param max_len Maximum number of wide characters to measure.
 *
 * @return Number of bytes used by the wide characters.
 *
 */
size_t wstr_lsize(const wchar_t *str, count_t max_len)
{
    return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
}
 
/** Get number of characters in a string.
 *
 * @param str NULL-terminated string.
 *
 * @return Number of characters in string.
 *
 */
count_t str_length(const char *str)
{
    count_t len = 0;
    size_t offset = 0;
   
    while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
        len++;
   
    return len;
}
 
/** Get number of characters in a wide string.
 *
 * @param str NULL-terminated wide string.
 *
 * @return Number of characters in @a str.
 *
 */
count_t wstr_length(const wchar_t *wstr)
{
    count_t len = 0;
   
    while (*wstr++ != 0)
        len++;
   
    return len;
}
 
/** Get number of characters in a string with size limit.
 *
 * @param str  NULL-terminated string.
 * @param size Maximum number of bytes to consider.
 *
 * @return Number of characters in string.
 *
 */
count_t str_nlength(const char *str, size_t size)
{
    count_t len = 0;
    size_t offset = 0;
   
    while (str_decode(str, &offset, size) != 0)
        len++;
   
    return len;
}
 
/** Get number of characters in a string with size limit.
 *
 * @param str  NULL-terminated string.
 * @param size Maximum number of bytes to consider.
 *
 * @return Number of characters in string.
 *
 */
count_t wstr_nlength(const wchar_t *str, size_t size)
{
    count_t len = 0;
    count_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
    count_t offset = 0;
   
    while ((offset < limit) && (*str++ != 0)) {
        len++;
        offset += sizeof(wchar_t);
    }
   
    return len;
}
 
/** Check whether character is plain ASCII.
 *
 * @return True if character is plain ASCII.
 *
 */
bool ascii_check(wchar_t ch)
{
    if ((ch >= 0) && (ch <= 127))
        return true;
   
    return false;
}
 
/** Check whether character is valid
 *
 * @return True if character is a valid Unicode code point.
 *
 */
bool chr_check(wchar_t ch)
{
    if ((ch >= 0) && (ch <= 1114111))
        return true;
   
    return false;
}
 
/** Compare two NULL terminated strings.
 *
 * Do a char-by-char comparison of two NULL-terminated strings.
 * The strings are considered equal iff they consist of the same
 * characters on the minimum of their lengths.
 *
 * @param s1 First string to compare.
 * @param s2 Second string to compare.
 *
 * @return 0 if the strings are equal, -1 if first is smaller,
 *         1 if second smaller.
 *
 */
int str_cmp(const char *s1, const char *s2)
{
    wchar_t c1 = 0;
    wchar_t c2 = 0;
   
    size_t off1 = 0;
    size_t off2 = 0;
 
    while (true) {
        c1 = str_decode(s1, &off1, STR_NO_LIMIT);
        c2 = str_decode(s2, &off2, STR_NO_LIMIT);
 
        if (c1 < c2)
            return -1;
       
        if (c1 > c2)
            return 1;
 
        if (c1 == 0 || c2 == 0)
            break;     
    }
 
    return 0;
}
 
/** Compare two NULL terminated strings with length limit.
 *
 * Do a char-by-char comparison of two NULL-terminated strings.
 * The strings are considered equal iff they consist of the same
 * characters on the minimum of their lengths and the length limit.
 *
 * @param s1      First string to compare.
 * @param s2      Second string to compare.
 * @param max_len Maximum number of characters to consider.
 *
 * @return 0 if the strings are equal, -1 if first is smaller,
 *         1 if second smaller.
 *
 */
int str_lcmp(const char *s1, const char *s2, count_t max_len)
{
    wchar_t c1 = 0;
    wchar_t c2 = 0;
   
    size_t off1 = 0;
    size_t off2 = 0;
   
    count_t len = 0;
 
    while (true) {
        if (len >= max_len)
            break;
 
        c1 = str_decode(s1, &off1, STR_NO_LIMIT);
        c2 = str_decode(s2, &off2, STR_NO_LIMIT);
 
        if (c1 < c2)
            return -1;
 
        if (c1 > c2)
            return 1;
 
        if (c1 == 0 || c2 == 0)
            break;
 
        ++len; 
    }
 
    return 0;
 
}
 
/** Copy NULL-terminated string.
 *
 * Copy source string @a src to destination buffer @a dst.
 * No more than @a size bytes are written. NULL-terminator is always
 * written after the last succesfully copied character (i.e. if the
 * destination buffer is has at least 1 byte, it will be always
 * NULL-terminated).
 *
 * @param src   Source string.
 * @param dst   Destination buffer.
 * @param count Size of the destination buffer.
 *
 */
void str_ncpy(char *dst, const char *src, size_t size)
{
    /* No space for the NULL-terminator in the buffer */
    if (size == 0)
        return;
   
    wchar_t ch;
    size_t str_off = 0;
    size_t dst_off = 0;
   
    while ((ch = str_decode(src, &str_off, STR_NO_LIMIT)) != 0) {
        if (chr_encode(ch, dst, &dst_off, size) != EOK)
            break;
    }
   
    if (dst_off >= size)
        dst[size - 1] = 0;
    else
        dst[dst_off] = 0;
}
 
/** Copy NULL-terminated wide string to string
 *
 * Copy source wide string @a src to destination buffer @a dst.
 * No more than @a size bytes are written. NULL-terminator is always
 * written after the last succesfully copied character (i.e. if the
 * destination buffer is has at least 1 byte, it will be always
 * NULL-terminated).
 *
 * @param src   Source wide string.
 * @param dst   Destination buffer.
 * @param count Size of the destination buffer.
 *
 */
void wstr_nstr(char *dst, const wchar_t *src, size_t size)
{
    /* No space for the NULL-terminator in the buffer */
    if (size == 0)
        return;
   
    wchar_t ch;
    count_t src_idx = 0;
    size_t dst_off = 0;
   
    while ((ch = src[src_idx++]) != 0) {
        if (chr_encode(ch, dst, &dst_off, size) != EOK)
            break;
    }
   
    if (dst_off >= size)
        dst[size - 1] = 0;
    else
        dst[dst_off] = 0;
}
 
/** Find first occurence of character in string.
 *
 * @param str String to search.
 * @param ch  Character to look for.
 *
 * @return Pointer to character in @a str or NULL if not found.
 */
const char *str_chr(const char *str, wchar_t ch)
{
    wchar_t acc;
    size_t off = 0;
   
    while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
        if (acc == ch)
            return (str + off);
    }
   
    return NULL;
}
 
/** Find last occurence of character in string.
 *
 * @param str String to search.
 * @param ch  Character to look for.
 *
 * @return Pointer to character in @a str or NULL if not found.
 */
const char *str_rchr(const char *str, wchar_t ch)
{
    wchar_t acc;
    size_t off = 0;
    char *res;
 
    res = NULL;
    while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
        if (acc == ch)
            res = (str + off);
    }
 
    return res;
}
 
/** Insert a wide character into a wide string.
 *
 * Insert a wide character into a wide string at position
 * @a pos. The characters after the position are shifted.
 *
 * @param str     String to insert to.
 * @param ch      Character to insert to.
 * @param pos     Character index where to insert.
 @ @param max_pos Characters in the buffer.
 *
 * @return True if the insertion was sucessful, false if the position
 *         is out of bounds.
 *
 */
bool wstr_linsert(wchar_t *str, wchar_t ch, count_t pos, count_t max_pos)
{
    count_t len = wstr_length(str);
   
    if ((pos > len) || (pos + 1 > max_pos))
        return false;
   
    count_t i;
    for (i = len; i + 1 > pos; i--)
        str[i + 1] = str[i];
   
    str[pos] = ch;
   
    return true;
}
 
/** Remove a wide character from a wide string.
 *
 * Remove a wide character from a wide string at position
 * @a pos. The characters after the position are shifted.
 *
 * @param str String to remove from.
 * @param pos Character index to remove.
 *
 * @return True if the removal was sucessful, false if the position
 *         is out of bounds.
 *
 */
bool wstr_remove(wchar_t *str, count_t pos)
{
    count_t len = wstr_length(str);
   
    if (pos >= len)
        return false;
   
    count_t i;
    for (i = pos + 1; i <= len; i++)
        str[i - 1] = str[i];
   
    return true;
}
 
int strncmp(const char *a, const char *b, size_t n)
{
    size_t c = 0;
 
    while (c < n && a[c] && b[c] && (!(a[c] - b[c])))
        c++;
   
    return ( c < n ? a[c] - b[c] : 0);
   
}
 
int stricmp(const char *a, const char *b)
{
    int c = 0;
   
    while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
        c++;
   
    return (tolower(a[c]) - tolower(b[c]));
}
 
/** Convert string to a number.
 * Core of strtol and strtoul functions.
 *
 * @param nptr      Pointer to string.
 * @param endptr    If not NULL, function stores here pointer to the first
 *          invalid character.
 * @param base      Zero or number between 2 and 36 inclusive.
 * @param sgn       It's set to 1 if minus found.
 * @return      Result of conversion.
 */
static unsigned long
_strtoul(const char *nptr, char **endptr, int base, char *sgn)
{
    unsigned char c;
    unsigned long result = 0;
    unsigned long a, b;
    const char *str = nptr;
    const char *tmpptr;
   
    while (isspace(*str))
        str++;
   
    if (*str == '-') {
        *sgn = 1;
        ++str;
    } else if (*str == '+')
        ++str;
   
    if (base) {
        if ((base == 1) || (base > 36)) {
            /* FIXME: set errno to EINVAL */
            return 0;
        }
        if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
            (str[1] == 'X'))) {
            str += 2;
        }
    } else {
        base = 10;
       
        if (*str == '0') {
            base = 8;
            if ((str[1] == 'X') || (str[1] == 'x'))  {
                base = 16;
                str += 2;
            }
        }
    }
   
    tmpptr = str;
 
    while (*str) {
        c = *str;
        c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
            (c <= '9' ? c - '0' : 0xff)));
        if (c > base) {
            break;
        }
       
        a = (result & 0xff) * base + c;
        b = (result >> 8) * base + (a >> 8);
       
        if (b > (ULONG_MAX >> 8)) {
            /* overflow */
            /* FIXME: errno = ERANGE*/
            return ULONG_MAX;
        }
   
        result = (b << 8) + (a & 0xff);
        ++str;
    }
   
    if (str == tmpptr) {
        /*
         * No number was found => first invalid character is the first
         * character of the string.
         */
        /* FIXME: set errno to EINVAL */
        str = nptr;
        result = 0;
    }
   
    if (endptr)
        *endptr = (char *) str;
 
    if (nptr == str) {
        /*FIXME: errno = EINVAL*/
        return 0;
    }
 
    return result;
}
 
/** Convert initial part of string to long int according to given base.
 * The number may begin with an arbitrary number of whitespaces followed by
 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
 * inserted and the number will be taken as hexadecimal one. If the base is 0
 * and the number begin with a zero, number will be taken as octal one (as with
 * base 8). Otherwise the base 0 is taken as decimal.
 *
 * @param nptr      Pointer to string.
 * @param endptr    If not NULL, function stores here pointer to the first
 *          invalid character.
 * @param base      Zero or number between 2 and 36 inclusive.
 * @return      Result of conversion.
 */
long int strtol(const char *nptr, char **endptr, int base)
{
    char sgn = 0;
    unsigned long number = 0;
   
    number = _strtoul(nptr, endptr, base, &sgn);
 
    if (number > LONG_MAX) {
        if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
            /* FIXME: set 0 to errno */
            return number;     
        }
        /* FIXME: set ERANGE to errno */
        return (sgn ? LONG_MIN : LONG_MAX);
    }
   
    return (sgn ? -number : number);
}
 
 
/** Convert initial part of string to unsigned long according to given base.
 * The number may begin with an arbitrary number of whitespaces followed by
 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
 * inserted and the number will be taken as hexadecimal one. If the base is 0
 * and the number begin with a zero, number will be taken as octal one (as with
 * base 8). Otherwise the base 0 is taken as decimal.
 *
 * @param nptr      Pointer to string.
 * @param endptr    If not NULL, function stores here pointer to the first
 *          invalid character
 * @param base      Zero or number between 2 and 36 inclusive.
 * @return      Result of conversion.
 */
unsigned long strtoul(const char *nptr, char **endptr, int base)
{
    char sgn = 0;
    unsigned long number = 0;
   
    number = _strtoul(nptr, endptr, base, &sgn);
 
    return (sgn ? -number : number);
}
 
char *strcpy(char *dest, const char *src)
{
    char *orig = dest;
   
    while ((*(dest++) = *(src++)))
        ;
    return orig;
}
 
 
 
 
 
 
 
 
 
 
char *strcat(char *dest, const char *src)
{
    char *orig = dest;
    while (*dest++)
        ;
    --dest;
    while ((*dest++ = *src++))
        ;
    return orig;
}
 
char *str_dup(const char *src)
{
    size_t size = str_size(src);
    void *dest = malloc(size + 1);
 
    if (dest == NULL)
        return (char *) NULL;
 
    return (char *) memcpy(dest, src, size + 1);
}
 
char *strtok(char *s, const char *delim)
{
    static char *next;
 
    return strtok_r(s, delim, &next);
}
 
char *strtok_r(char *s, const char *delim, char **next)
{
    char *start, *end;
 
    if (s == NULL)
        s = *next;
 
    /* Skip over leading delimiters. */
    while (*s && (str_chr(delim, *s) != NULL)) ++s;
    start = s;
 
    /* Skip over token characters. */
    while (*s && (str_chr(delim, *s) == NULL)) ++s;
    end = s;
    *next = (*s ? s + 1 : s);
 
    if (start == end) {
        return NULL;    /* No more tokens. */
    }
 
    /* Overwrite delimiter with NULL terminator. */
    *end = '\0';
    return start;
}
 
/** @}
 */
 

Rev 4265	Rev 4266
1	/*	1	/*
2	* Copyright (c) 2005 Martin Decky	2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda	3	* Copyright (c) 2008 Jiri Svoboda
4	* All rights reserved.	4	* All rights reserved.
5	*	5	*
6	* Redistribution and use in source and binary forms, with or without	6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions	7	* modification, are permitted provided that the following conditions
8	* are met:	8	* are met:
9	*	9	*
10	* - Redistributions of source code must retain the above copyright	10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.	11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright	12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the	13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.	14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products	15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.	16	* derived from this software without specific prior written permission.
17	*	17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR	18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES	19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.	20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,	21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT	22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF	26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/	28	*/
29		29
30	/** @addtogroup libc	30	/** @addtogroup libc
31	* @{	31	* @{
32	*/	32	*/
33	/** @file	33	/** @file
34	*/	34	*/
35		35
36	#include <string.h>	36	#include <string.h>
37	#include <stdlib.h>	37	#include <stdlib.h>
38	#include <limits.h>	38	#include <limits.h>
39	#include <ctype.h>	39	#include <ctype.h>
40	#include <malloc.h>	40	#include <malloc.h>
41	#include <errno.h>	41	#include <errno.h>
42	#include <align.h>	42	#include <align.h>
-		43	#include <mem.h>
43	#include <string.h>	44	#include <string.h>
44		45
45	/** Byte mask consisting of lowest @n bits (out of 8) */	46	/** Byte mask consisting of lowest @n bits (out of 8) */
46	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))	47	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
47		48
48	/** Byte mask consisting of lowest @n bits (out of 32) */	49	/** Byte mask consisting of lowest @n bits (out of 32) */
49	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))	50	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
50		51
51	/** Byte mask consisting of highest @n bits (out of 8) */	52	/** Byte mask consisting of highest @n bits (out of 8) */
52	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))	53	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
53		54
54	/** Number of data bits in a UTF-8 continuation byte */	55	/** Number of data bits in a UTF-8 continuation byte */
55	#define CONT_BITS 6	56	#define CONT_BITS 6
56		57
57	/** Decode a single character from a string.	58	/** Decode a single character from a string.
58	*	59	*
59	* Decode a single character from a string of size @a size. Decoding starts	60	* Decode a single character from a string of size @a size. Decoding starts
60	* at @a offset and this offset is moved to the beginning of the next	61	* at @a offset and this offset is moved to the beginning of the next
61	* character. In case of decoding error, offset generally advances at least	62	* character. In case of decoding error, offset generally advances at least
62	* by one. However, offset is never moved beyond size.	63	* by one. However, offset is never moved beyond size.
63	*	64	*
64	* @param str String (not necessarily NULL-terminated).	65	* @param str String (not necessarily NULL-terminated).
65	* @param offset Byte offset in string where to start decoding.	66	* @param offset Byte offset in string where to start decoding.
66	* @param size Size of the string (in bytes).	67	* @param size Size of the string (in bytes).
67	*	68	*
68	* @return Value of decoded character, U_SPECIAL on decoding error or	69	* @return Value of decoded character, U_SPECIAL on decoding error or
69	* NULL if attempt to decode beyond @a size.	70	* NULL if attempt to decode beyond @a size.
70	*	71	*
71	*/	72	*/
72	wchar_t str_decode(const char str, size_t offset, size_t size)	73	wchar_t str_decode(const char str, size_t offset, size_t size)
73	{	74	{
74	if (*offset + 1 > size)	75	if (*offset + 1 > size)
75	return 0;	76	return 0;
76		77
77	/* First byte read from string */	78	/* First byte read from string */
78	uint8_t b0 = (uint8_t) str[(*offset)++];	79	uint8_t b0 = (uint8_t) str[(*offset)++];
79		80
80	/* Determine code length */	81	/* Determine code length */
81		82
82	unsigned int b0_bits; /* Data bits in first byte */	83	unsigned int b0_bits; /* Data bits in first byte */
83	unsigned int cbytes; /* Number of continuation bytes */	84	unsigned int cbytes; /* Number of continuation bytes */
84		85
85	if ((b0 & 0x80) == 0) {	86	if ((b0 & 0x80) == 0) {
86	/* 0xxxxxxx (Plain ASCII) */	87	/* 0xxxxxxx (Plain ASCII) */
87	b0_bits = 7;	88	b0_bits = 7;
88	cbytes = 0;	89	cbytes = 0;
89	} else if ((b0 & 0xe0) == 0xc0) {	90	} else if ((b0 & 0xe0) == 0xc0) {
90	/* 110xxxxx 10xxxxxx */	91	/* 110xxxxx 10xxxxxx */
91	b0_bits = 5;	92	b0_bits = 5;
92	cbytes = 1;	93	cbytes = 1;
93	} else if ((b0 & 0xf0) == 0xe0) {	94	} else if ((b0 & 0xf0) == 0xe0) {
94	/* 1110xxxx 10xxxxxx 10xxxxxx */	95	/* 1110xxxx 10xxxxxx 10xxxxxx */
95	b0_bits = 4;	96	b0_bits = 4;
96	cbytes = 2;	97	cbytes = 2;
97	} else if ((b0 & 0xf8) == 0xf0) {	98	} else if ((b0 & 0xf8) == 0xf0) {
98	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */	99	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
99	b0_bits = 3;	100	b0_bits = 3;
100	cbytes = 3;	101	cbytes = 3;
101	} else {	102	} else {
102	/* 10xxxxxx -- unexpected continuation byte */	103	/* 10xxxxxx -- unexpected continuation byte */
103	return U_SPECIAL;	104	return U_SPECIAL;
104	}	105	}
105		106
106	if (*offset + cbytes > size)	107	if (*offset + cbytes > size)
107	return U_SPECIAL;	108	return U_SPECIAL;
108		109
109	wchar_t ch = b0 & LO_MASK_8(b0_bits);	110	wchar_t ch = b0 & LO_MASK_8(b0_bits);
110		111
111	/* Decode continuation bytes */	112	/* Decode continuation bytes */
112	while (cbytes > 0) {	113	while (cbytes > 0) {
113	uint8_t b = (uint8_t) str[(*offset)++];	114	uint8_t b = (uint8_t) str[(*offset)++];
114		115
115	/* Must be 10xxxxxx */	116	/* Must be 10xxxxxx */
116	if ((b & 0xc0) != 0x80)	117	if ((b & 0xc0) != 0x80)
117	return U_SPECIAL;	118	return U_SPECIAL;
118		119
119	/* Shift data bits to ch */	120	/* Shift data bits to ch */
120	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));	121	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
121	cbytes--;	122	cbytes--;
122	}	123	}
123		124
124	return ch;	125	return ch;
125	}	126	}
126		127
127	/** Encode a single character to string representation.	128	/** Encode a single character to string representation.
128	*	129	*
129	* Encode a single character to string representation (i.e. UTF-8) and store	130	* Encode a single character to string representation (i.e. UTF-8) and store
130	* it into a buffer at @a offset. Encoding starts at @a offset and this offset	131	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
131	* is moved to the position where the next character can be written to.	132	* is moved to the position where the next character can be written to.
132	*	133	*
133	* @param ch Input character.	134	* @param ch Input character.
134	* @param str Output buffer.	135	* @param str Output buffer.
135	* @param offset Byte offset where to start writing.	136	* @param offset Byte offset where to start writing.
136	* @param size Size of the output buffer (in bytes).	137	* @param size Size of the output buffer (in bytes).
137	*	138	*
138	* @return EOK if the character was encoded successfully, EOVERFLOW if there	139	* @return EOK if the character was encoded successfully, EOVERFLOW if there
139	* was not enough space in the output buffer or EINVAL if the character	140	* was not enough space in the output buffer or EINVAL if the character
140	* code was invalid.	141	* code was invalid.
141	*/	142	*/
142	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)	143	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
143	{	144	{
144	if (*offset >= size)	145	if (*offset >= size)
145	return EOVERFLOW;	146	return EOVERFLOW;
146		147
147	if (!chr_check(ch))	148	if (!chr_check(ch))
148	return EINVAL;	149	return EINVAL;
149		150
150	/* Unsigned version of ch (bit operations should only be done	151	/* Unsigned version of ch (bit operations should only be done
151	on unsigned types). */	152	on unsigned types). */
152	uint32_t cc = (uint32_t) ch;	153	uint32_t cc = (uint32_t) ch;
153		154
154	/* Determine how many continuation bytes are needed */	155	/* Determine how many continuation bytes are needed */
155		156
156	unsigned int b0_bits; /* Data bits in first byte */	157	unsigned int b0_bits; /* Data bits in first byte */
157	unsigned int cbytes; /* Number of continuation bytes */	158	unsigned int cbytes; /* Number of continuation bytes */
158		159
159	if ((cc & ~LO_MASK_32(7)) == 0) {	160	if ((cc & ~LO_MASK_32(7)) == 0) {
160	b0_bits = 7;	161	b0_bits = 7;
161	cbytes = 0;	162	cbytes = 0;
162	} else if ((cc & ~LO_MASK_32(11)) == 0) {	163	} else if ((cc & ~LO_MASK_32(11)) == 0) {
163	b0_bits = 5;	164	b0_bits = 5;
164	cbytes = 1;	165	cbytes = 1;
165	} else if ((cc & ~LO_MASK_32(16)) == 0) {	166	} else if ((cc & ~LO_MASK_32(16)) == 0) {
166	b0_bits = 4;	167	b0_bits = 4;
167	cbytes = 2;	168	cbytes = 2;
168	} else if ((cc & ~LO_MASK_32(21)) == 0) {	169	} else if ((cc & ~LO_MASK_32(21)) == 0) {
169	b0_bits = 3;	170	b0_bits = 3;
170	cbytes = 3;	171	cbytes = 3;
171	} else {	172	} else {
172	/* Codes longer than 21 bits are not supported */	173	/* Codes longer than 21 bits are not supported */
173	return EINVAL;	174	return EINVAL;
174	}	175	}
175		176
176	/* Check for available space in buffer */	177	/* Check for available space in buffer */
177	if (*offset + cbytes >= size)	178	if (*offset + cbytes >= size)
178	return EOVERFLOW;	179	return EOVERFLOW;
179		180
180	/* Encode continuation bytes */	181	/* Encode continuation bytes */
181	unsigned int i;	182	unsigned int i;
182	for (i = cbytes; i > 0; i--) {	183	for (i = cbytes; i > 0; i--) {
183	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));	184	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
184	cc = cc >> CONT_BITS;	185	cc = cc >> CONT_BITS;
185	}	186	}
186		187
187	/* Encode first byte */	188	/* Encode first byte */
188	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);	189	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
189		190
190	/* Advance offset */	191	/* Advance offset */
191	*offset += cbytes + 1;	192	*offset += cbytes + 1;
192		193
193	return EOK;	194	return EOK;
194	}	195	}
195		196
196	/** Get size of string.	197	/** Get size of string.
197	*	198	*
198	* Get the number of bytes which are used by the string @a str (excluding the	199	* Get the number of bytes which are used by the string @a str (excluding the
199	* NULL-terminator).	200	* NULL-terminator).
200	*	201	*
201	* @param str String to consider.	202	* @param str String to consider.
202	*	203	*
203	* @return Number of bytes used by the string	204	* @return Number of bytes used by the string
204	*	205	*
205	*/	206	*/
206	size_t str_size(const char *str)	207	size_t str_size(const char *str)
207	{	208	{
208	size_t size = 0;	209	size_t size = 0;
209		210
210	while (*str++ != 0)	211	while (*str++ != 0)
211	size++;	212	size++;
212		213
213	return size;	214	return size;
214	}	215	}
215		216
216	/** Get size of wide string.	217	/** Get size of wide string.
217	*	218	*
218	* Get the number of bytes which are used by the wide string @a str (excluding the	219	* Get the number of bytes which are used by the wide string @a str (excluding the
219	* NULL-terminator).	220	* NULL-terminator).
220	*	221	*
221	* @param str Wide string to consider.	222	* @param str Wide string to consider.
222	*	223	*
223	* @return Number of bytes used by the wide string	224	* @return Number of bytes used by the wide string
224	*	225	*
225	*/	226	*/
226	size_t wstr_size(const wchar_t *str)	227	size_t wstr_size(const wchar_t *str)
227	{	228	{
228	return (wstr_length(str) * sizeof(wchar_t));	229	return (wstr_length(str) * sizeof(wchar_t));
229	}	230	}
230		231
231	/** Get size of string with length limit.	232	/** Get size of string with length limit.
232	*	233	*
233	* Get the number of bytes which are used by up to @a max_len first	234	* Get the number of bytes which are used by up to @a max_len first
234	* characters in the string @a str. If @a max_len is greater than	235	* characters in the string @a str. If @a max_len is greater than
235	* the length of @a str, the entire string is measured (excluding the	236	* the length of @a str, the entire string is measured (excluding the
236	* NULL-terminator).	237	* NULL-terminator).
237	*	238	*
238	* @param str String to consider.	239	* @param str String to consider.
239	* @param max_len Maximum number of characters to measure.	240	* @param max_len Maximum number of characters to measure.
240	*	241	*
241	* @return Number of bytes used by the characters.	242	* @return Number of bytes used by the characters.
242	*	243	*
243	*/	244	*/
244	size_t str_lsize(const char *str, count_t max_len)	245	size_t str_lsize(const char *str, count_t max_len)
245	{	246	{
246	count_t len = 0;	247	count_t len = 0;
247	size_t offset = 0;	248	size_t offset = 0;
248		249
249	while (len < max_len) {	250	while (len < max_len) {
250	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)	251	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
251	break;	252	break;
252		253
253	len++;	254	len++;
254	}	255	}
255		256
256	return offset;	257	return offset;
257	}	258	}
258		259
259	/** Get size of wide string with length limit.	260	/** Get size of wide string with length limit.
260	*	261	*
261	* Get the number of bytes which are used by up to @a max_len first	262	* Get the number of bytes which are used by up to @a max_len first
262	* wide characters in the wide string @a str. If @a max_len is greater than	263	* wide characters in the wide string @a str. If @a max_len is greater than
263	* the length of @a str, the entire wide string is measured (excluding the	264	* the length of @a str, the entire wide string is measured (excluding the
264	* NULL-terminator).	265	* NULL-terminator).
265	*	266	*
266	* @param str Wide string to consider.	267	* @param str Wide string to consider.
267	* @param max_len Maximum number of wide characters to measure.	268	* @param max_len Maximum number of wide characters to measure.
268	*	269	*
269	* @return Number of bytes used by the wide characters.	270	* @return Number of bytes used by the wide characters.
270	*	271	*
271	*/	272	*/
272	size_t wstr_lsize(const wchar_t *str, count_t max_len)	273	size_t wstr_lsize(const wchar_t *str, count_t max_len)
273	{	274	{
274	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));	275	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
275	}	276	}
276		277
277	/** Get number of characters in a string.	278	/** Get number of characters in a string.
278	*	279	*
279	* @param str NULL-terminated string.	280	* @param str NULL-terminated string.
280	*	281	*
281	* @return Number of characters in string.	282	* @return Number of characters in string.
282	*	283	*
283	*/	284	*/
284	count_t str_length(const char *str)	285	count_t str_length(const char *str)
285	{	286	{
286	count_t len = 0;	287	count_t len = 0;
287	size_t offset = 0;	288	size_t offset = 0;
288		289
289	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)	290	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
290	len++;	291	len++;
291		292
292	return len;	293	return len;
293	}	294	}
294		295
295	/** Get number of characters in a wide string.	296	/** Get number of characters in a wide string.
296	*	297	*
297	* @param str NULL-terminated wide string.	298	* @param str NULL-terminated wide string.
298	*	299	*
299	* @return Number of characters in @a str.	300	* @return Number of characters in @a str.
300	*	301	*
301	*/	302	*/
302	count_t wstr_length(const wchar_t *wstr)	303	count_t wstr_length(const wchar_t *wstr)
303	{	304	{
304	count_t len = 0;	305	count_t len = 0;
305		306
306	while (*wstr++ != 0)	307	while (*wstr++ != 0)
307	len++;	308	len++;
308		309
309	return len;	310	return len;
310	}	311	}
311		312
312	/** Get number of characters in a string with size limit.	313	/** Get number of characters in a string with size limit.
313	*	314	*
314	* @param str NULL-terminated string.	315	* @param str NULL-terminated string.
315	* @param size Maximum number of bytes to consider.	316	* @param size Maximum number of bytes to consider.
316	*	317	*
317	* @return Number of characters in string.	318	* @return Number of characters in string.
318	*	319	*
319	*/	320	*/
320	count_t str_nlength(const char *str, size_t size)	321	count_t str_nlength(const char *str, size_t size)
321	{	322	{
322	count_t len = 0;	323	count_t len = 0;
323	size_t offset = 0;	324	size_t offset = 0;
324		325
325	while (str_decode(str, &offset, size) != 0)	326	while (str_decode(str, &offset, size) != 0)
326	len++;	327	len++;
327		328
328	return len;	329	return len;
329	}	330	}
330		331
331	/** Get number of characters in a string with size limit.	332	/** Get number of characters in a string with size limit.
332	*	333	*
333	* @param str NULL-terminated string.	334	* @param str NULL-terminated string.
334	* @param size Maximum number of bytes to consider.	335	* @param size Maximum number of bytes to consider.
335	*	336	*
336	* @return Number of characters in string.	337	* @return Number of characters in string.
337	*	338	*
338	*/	339	*/
339	count_t wstr_nlength(const wchar_t *str, size_t size)	340	count_t wstr_nlength(const wchar_t *str, size_t size)
340	{	341	{
341	count_t len = 0;	342	count_t len = 0;
342	count_t limit = ALIGN_DOWN(size, sizeof(wchar_t));	343	count_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
343	count_t offset = 0;	344	count_t offset = 0;
344		345
345	while ((offset < limit) && (*str++ != 0)) {	346	while ((offset < limit) && (*str++ != 0)) {
346	len++;	347	len++;
347	offset += sizeof(wchar_t);	348	offset += sizeof(wchar_t);
348	}	349	}
349		350
350	return len;	351	return len;
351	}	352	}
352		353
353	/** Check whether character is plain ASCII.	354	/** Check whether character is plain ASCII.
354	*	355	*
355	* @return True if character is plain ASCII.	356	* @return True if character is plain ASCII.
356	*	357	*
357	*/	358	*/
358	bool ascii_check(wchar_t ch)	359	bool ascii_check(wchar_t ch)
359	{	360	{
360	if ((ch >= 0) && (ch <= 127))	361	if ((ch >= 0) && (ch <= 127))
361	return true;	362	return true;
362		363
363	return false;	364	return false;
364	}	365	}
365		366
366	/** Check whether character is valid	367	/** Check whether character is valid
367	*	368	*
368	* @return True if character is a valid Unicode code point.	369	* @return True if character is a valid Unicode code point.
369	*	370	*
370	*/	371	*/
371	bool chr_check(wchar_t ch)	372	bool chr_check(wchar_t ch)
372	{	373	{
373	if ((ch >= 0) && (ch <= 1114111))	374	if ((ch >= 0) && (ch <= 1114111))
374	return true;	375	return true;
375		376
376	return false;	377	return false;
377	}	378	}
378		379
379	/** Compare two NULL terminated strings.	380	/** Compare two NULL terminated strings.
380	*	381	*
381	* Do a char-by-char comparison of two NULL-terminated strings.	382	* Do a char-by-char comparison of two NULL-terminated strings.
382	* The strings are considered equal iff they consist of the same	383	* The strings are considered equal iff they consist of the same
383	* characters on the minimum of their lengths.	384	* characters on the minimum of their lengths.
384	*	385	*
385	* @param s1 First string to compare.	386	* @param s1 First string to compare.
386	* @param s2 Second string to compare.	387	* @param s2 Second string to compare.
387	*	388	*
388	* @return 0 if the strings are equal, -1 if first is smaller,	389	* @return 0 if the strings are equal, -1 if first is smaller,
389	* 1 if second smaller.	390	* 1 if second smaller.
390	*	391	*
391	*/	392	*/
392	int str_cmp(const char s1, const char s2)	393	int str_cmp(const char s1, const char s2)
393	{	394	{
394	wchar_t c1 = 0;	395	wchar_t c1 = 0;
395	wchar_t c2 = 0;	396	wchar_t c2 = 0;
396		397
397	size_t off1 = 0;	398	size_t off1 = 0;
398	size_t off2 = 0;	399	size_t off2 = 0;
399		400
400	while (true) {	401	while (true) {
401	c1 = str_decode(s1, &off1, STR_NO_LIMIT);	402	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
402	c2 = str_decode(s2, &off2, STR_NO_LIMIT);	403	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
403		404
404	if (c1 < c2)	405	if (c1 < c2)
405	return -1;	406	return -1;
406		407
407	if (c1 > c2)	408	if (c1 > c2)
408	return 1;	409	return 1;
409		410
410	if (c1 == 0 \|\| c2 == 0)	411	if (c1 == 0 \|\| c2 == 0)
411	break;	412	break;
412	}	413	}
413		414
414	return 0;	415	return 0;
415	}	416	}
416		417
417	/** Compare two NULL terminated strings with length limit.	418	/** Compare two NULL terminated strings with length limit.
418	*	419	*
419	* Do a char-by-char comparison of two NULL-terminated strings.	420	* Do a char-by-char comparison of two NULL-terminated strings.
420	* The strings are considered equal iff they consist of the same	421	* The strings are considered equal iff they consist of the same
421	* characters on the minimum of their lengths and the length limit.	422	* characters on the minimum of their lengths and the length limit.
422	*	423	*
423	* @param s1 First string to compare.	424	* @param s1 First string to compare.
424	* @param s2 Second string to compare.	425	* @param s2 Second string to compare.
425	* @param max_len Maximum number of characters to consider.	426	* @param max_len Maximum number of characters to consider.
426	*	427	*
427	* @return 0 if the strings are equal, -1 if first is smaller,	428	* @return 0 if the strings are equal, -1 if first is smaller,
428	* 1 if second smaller.	429	* 1 if second smaller.
429	*	430	*
430	*/	431	*/
431	int str_lcmp(const char s1, const char s2, count_t max_len)	432	int str_lcmp(const char s1, const char s2, count_t max_len)
432	{	433	{
433	wchar_t c1 = 0;	434	wchar_t c1 = 0;
434	wchar_t c2 = 0;	435	wchar_t c2 = 0;
435		436
436	size_t off1 = 0;	437	size_t off1 = 0;
437	size_t off2 = 0;	438	size_t off2 = 0;
438		439
439	count_t len = 0;	440	count_t len = 0;
440		441
441	while (true) {	442	while (true) {
442	if (len >= max_len)	443	if (len >= max_len)
443	break;	444	break;
444		445
445	c1 = str_decode(s1, &off1, STR_NO_LIMIT);	446	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
446	c2 = str_decode(s2, &off2, STR_NO_LIMIT);	447	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
447		448
448	if (c1 < c2)	449	if (c1 < c2)
449	return -1;	450	return -1;
450		451
451	if (c1 > c2)	452	if (c1 > c2)
452	return 1;	453	return 1;
453		454
454	if (c1 == 0 \|\| c2 == 0)	455	if (c1 == 0 \|\| c2 == 0)
455	break;	456	break;
456		457
457	++len;	458	++len;
458	}	459	}
459		460
460	return 0;	461	return 0;
461		462
462	}	463	}
463		464
464	/** Copy NULL-terminated string.	465	/** Copy NULL-terminated string.
465	*	466	*
466	* Copy source string @a src to destination buffer @a dst.	467	* Copy source string @a src to destination buffer @a dst.
467	* No more than @a size bytes are written. NULL-terminator is always	468	* No more than @a size bytes are written. NULL-terminator is always
468	* written after the last succesfully copied character (i.e. if the	469	* written after the last succesfully copied character (i.e. if the
469	* destination buffer is has at least 1 byte, it will be always	470	* destination buffer is has at least 1 byte, it will be always
470	* NULL-terminated).	471	* NULL-terminated).
471	*	472	*
472	* @param src Source string.	473	* @param src Source string.
473	* @param dst Destination buffer.	474	* @param dst Destination buffer.
474	* @param count Size of the destination buffer.	475	* @param count Size of the destination buffer.
475	*	476	*
476	*/	477	*/
477	void str_ncpy(char dst, const char src, size_t size)	478	void str_ncpy(char dst, const char src, size_t size)
478	{	479	{
479	/* No space for the NULL-terminator in the buffer */	480	/* No space for the NULL-terminator in the buffer */
480	if (size == 0)	481	if (size == 0)
481	return;	482	return;
482		483
483	wchar_t ch;	484	wchar_t ch;
484	size_t str_off = 0;	485	size_t str_off = 0;
485	size_t dst_off = 0;	486	size_t dst_off = 0;
486		487
487	while ((ch = str_decode(src, &str_off, STR_NO_LIMIT)) != 0) {	488	while ((ch = str_decode(src, &str_off, STR_NO_LIMIT)) != 0) {
488	if (chr_encode(ch, dst, &dst_off, size) != EOK)	489	if (chr_encode(ch, dst, &dst_off, size) != EOK)
489	break;	490	break;
490	}	491	}
491		492
492	if (dst_off >= size)	493	if (dst_off >= size)
493	dst[size - 1] = 0;	494	dst[size - 1] = 0;
494	else	495	else
495	dst[dst_off] = 0;	496	dst[dst_off] = 0;
496	}	497	}
497		498
498	/** Copy NULL-terminated wide string to string	499	/** Copy NULL-terminated wide string to string
499	*	500	*

Subversion Repositories HelenOS

(root)/trunk/uspace/lib/libc/generic/string.c @ 4591 – Rev 4265 → 4266