Subversion Repositories HelenOS

Rev

Rev 3731 | Rev 4234 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright (c) 2005 Martin Decky
  3.  * Copyright (c) 2008 Jiri Svoboda
  4.  * All rights reserved.
  5.  *
  6.  * Redistribution and use in source and binary forms, with or without
  7.  * modification, are permitted provided that the following conditions
  8.  * are met:
  9.  *
  10.  * - Redistributions of source code must retain the above copyright
  11.  *   notice, this list of conditions and the following disclaimer.
  12.  * - Redistributions in binary form must reproduce the above copyright
  13.  *   notice, this list of conditions and the following disclaimer in the
  14.  *   documentation and/or other materials provided with the distribution.
  15.  * - The name of the author may not be used to endorse or promote products
  16.  *   derived from this software without specific prior written permission.
  17.  *
  18.  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  19.  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  20.  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  21.  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  22.  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  23.  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24.  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25.  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26.  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  27.  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28.  */
  29.  
  30. /** @addtogroup libc
  31.  * @{
  32.  */
  33. /** @file
  34.  */
  35.  
  36. #include <string.h>
  37. #include <stdlib.h>
  38. #include <limits.h>
  39. #include <ctype.h>
  40. #include <malloc.h>
  41. #include <errno.h>
  42. #include <string.h>
  43.  
  44. /** Byte mask consisting of lowest @n bits (out of 8) */
  45. #define LO_MASK_8(n)  ((uint8_t) ((1 << (n)) - 1))
  46.  
  47. /** Byte mask consisting of lowest @n bits (out of 32) */
  48. #define LO_MASK_32(n)  ((uint32_t) ((1 << (n)) - 1))
  49.  
  50. /** Byte mask consisting of highest @n bits (out of 8) */
  51. #define HI_MASK_8(n)  (~LO_MASK_8(8 - (n)))
  52.  
  53. /** Number of data bits in a UTF-8 continuation byte */
  54. #define CONT_BITS  6
  55.  
  56. /** Decode a single character from a string.
  57.  *
  58.  * Decode a single character from a string of size @a size. Decoding starts
  59.  * at @a offset and this offset is moved to the beginning of the next
  60.  * character. In case of decoding error, offset generally advances at least
  61.  * by one. However, offset is never moved beyond size.
  62.  *
  63.  * @param str    String (not necessarily NULL-terminated).
  64.  * @param offset Byte offset in string where to start decoding.
  65.  * @param size   Size of the string (in bytes).
  66.  *
  67.  * @return Value of decoded character, U_SPECIAL on decoding error or
  68.  *         NULL if attempt to decode beyond @a size.
  69.  *
  70.  */
  71. wchar_t str_decode(const char *str, size_t *offset, size_t size)
  72. {
  73.     if (*offset + 1 > size)
  74.         return 0;
  75.    
  76.     /* First byte read from string */
  77.     uint8_t b0 = (uint8_t) str[(*offset)++];
  78.    
  79.     /* Determine code length */
  80.    
  81.     unsigned int b0_bits;  /* Data bits in first byte */
  82.     unsigned int cbytes;   /* Number of continuation bytes */
  83.    
  84.     if ((b0 & 0x80) == 0) {
  85.         /* 0xxxxxxx (Plain ASCII) */
  86.         b0_bits = 7;
  87.         cbytes = 0;
  88.     } else if ((b0 & 0xe0) == 0xc0) {
  89.         /* 110xxxxx 10xxxxxx */
  90.         b0_bits = 5;
  91.         cbytes = 1;
  92.     } else if ((b0 & 0xf0) == 0xe0) {
  93.         /* 1110xxxx 10xxxxxx 10xxxxxx */
  94.         b0_bits = 4;
  95.         cbytes = 2;
  96.     } else if ((b0 & 0xf8) == 0xf0) {
  97.         /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
  98.         b0_bits = 3;
  99.         cbytes = 3;
  100.     } else {
  101.         /* 10xxxxxx -- unexpected continuation byte */
  102.         return U_SPECIAL;
  103.     }
  104.    
  105.     if (*offset + cbytes > size)
  106.         return U_SPECIAL;
  107.    
  108.     wchar_t ch = b0 & LO_MASK_8(b0_bits);
  109.    
  110.     /* Decode continuation bytes */
  111.     while (cbytes > 0) {
  112.         uint8_t b = (uint8_t) str[(*offset)++];
  113.        
  114.         /* Must be 10xxxxxx */
  115.         if ((b & 0xc0) != 0x80)
  116.             return U_SPECIAL;
  117.        
  118.         /* Shift data bits to ch */
  119.         ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
  120.         cbytes--;
  121.     }
  122.    
  123.     return ch;
  124. }
  125.  
  126. /** Encode a single character to string representation.
  127.  *
  128.  * Encode a single character to string representation (i.e. UTF-8) and store
  129.  * it into a buffer at @a offset. Encoding starts at @a offset and this offset
  130.  * is moved to the position where the next character can be written to.
  131.  *
  132.  * @param ch     Input character.
  133.  * @param str    Output buffer.
  134.  * @param offset Byte offset where to start writing.
  135.  * @param size   Size of the output buffer (in bytes).
  136.  *
  137.  * @return EOK if the character was encoded successfully, EOVERFLOW if there
  138.  *     was not enough space in the output buffer or EINVAL if the character
  139.  *     code was invalid.
  140.  */
  141. int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
  142. {
  143.     if (*offset >= size)
  144.         return EOVERFLOW;
  145.    
  146.     if (!chr_check(ch))
  147.         return EINVAL;
  148.    
  149.     /* Unsigned version of ch (bit operations should only be done
  150.        on unsigned types). */
  151.     uint32_t cc = (uint32_t) ch;
  152.    
  153.     /* Determine how many continuation bytes are needed */
  154.    
  155.     unsigned int b0_bits;  /* Data bits in first byte */
  156.     unsigned int cbytes;   /* Number of continuation bytes */
  157.    
  158.     if ((cc & ~LO_MASK_32(7)) == 0) {
  159.         b0_bits = 7;
  160.         cbytes = 0;
  161.     } else if ((cc & ~LO_MASK_32(11)) == 0) {
  162.         b0_bits = 5;
  163.         cbytes = 1;
  164.     } else if ((cc & ~LO_MASK_32(16)) == 0) {
  165.         b0_bits = 4;
  166.         cbytes = 2;
  167.     } else if ((cc & ~LO_MASK_32(21)) == 0) {
  168.         b0_bits = 3;
  169.         cbytes = 3;
  170.     } else {
  171.         /* Codes longer than 21 bits are not supported */
  172.         return EINVAL;
  173.     }
  174.    
  175.     /* Check for available space in buffer */
  176.     if (*offset + cbytes >= size)
  177.         return EOVERFLOW;
  178.    
  179.     /* Encode continuation bytes */
  180.     unsigned int i;
  181.     for (i = cbytes; i > 0; i--) {
  182.         str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
  183.         cc = cc >> CONT_BITS;
  184.     }
  185.    
  186.     /* Encode first byte */
  187.     str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
  188.    
  189.     /* Advance offset */
  190.     *offset += cbytes + 1;
  191.    
  192.     return EOK;
  193. }
  194.  
  195. /** Check whether character is valid
  196.  *
  197.  * @return True if character is a valid Unicode code point.
  198.  *
  199.  */
  200. bool chr_check(const wchar_t ch)
  201. {
  202.     if ((ch >= 0) && (ch <= 1114111))
  203.         return true;
  204.    
  205.     return false;
  206. }
  207.  
  208. /** Count the number of characters in the string, not including terminating 0.
  209.  *
  210.  * @param str       String.
  211.  * @return      Number of characters in string.
  212.  */
  213. size_t strlen(const char *str)
  214. {
  215.     size_t counter = 0;
  216.  
  217.     while (str[counter] != 0)
  218.         counter++;
  219.  
  220.     return counter;
  221. }
  222.  
  223. int strcmp(const char *a, const char *b)
  224. {
  225.     int c = 0;
  226.    
  227.     while (a[c] && b[c] && (!(a[c] - b[c])))
  228.         c++;
  229.    
  230.     return (a[c] - b[c]);
  231. }
  232.  
  233. int strncmp(const char *a, const char *b, size_t n)
  234. {
  235.     size_t c = 0;
  236.  
  237.     while (c < n && a[c] && b[c] && (!(a[c] - b[c])))
  238.         c++;
  239.    
  240.     return ( c < n ? a[c] - b[c] : 0);
  241.    
  242. }
  243.  
  244. int stricmp(const char *a, const char *b)
  245. {
  246.     int c = 0;
  247.    
  248.     while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
  249.         c++;
  250.    
  251.     return (tolower(a[c]) - tolower(b[c]));
  252. }
  253.  
  254. /** Return pointer to the first occurence of character c in string.
  255.  *
  256.  * @param str       Scanned string.
  257.  * @param c     Searched character (taken as one byte).
  258.  * @return      Pointer to the matched character or NULL if it is not
  259.  *          found in given string.
  260.  */
  261. char *strchr(const char *str, int c)
  262. {
  263.     while (*str != '\0') {
  264.         if (*str == (char) c)
  265.             return (char *) str;
  266.         str++;
  267.     }
  268.  
  269.     return NULL;
  270. }
  271.  
  272. /** Return pointer to the last occurence of character c in string.
  273.  *
  274.  * @param str       Scanned string.
  275.  * @param c     Searched character (taken as one byte).
  276.  * @return      Pointer to the matched character or NULL if it is not
  277.  *          found in given string.
  278.  */
  279. char *strrchr(const char *str, int c)
  280. {
  281.     char *retval = NULL;
  282.  
  283.     while (*str != '\0') {
  284.         if (*str == (char) c)
  285.             retval = (char *) str;
  286.         str++;
  287.     }
  288.  
  289.     return (char *) retval;
  290. }
  291.  
  292. /** Convert string to a number.
  293.  * Core of strtol and strtoul functions.
  294.  *
  295.  * @param nptr      Pointer to string.
  296.  * @param endptr    If not NULL, function stores here pointer to the first
  297.  *          invalid character.
  298.  * @param base      Zero or number between 2 and 36 inclusive.
  299.  * @param sgn       It's set to 1 if minus found.
  300.  * @return      Result of conversion.
  301.  */
  302. static unsigned long
  303. _strtoul(const char *nptr, char **endptr, int base, char *sgn)
  304. {
  305.     unsigned char c;
  306.     unsigned long result = 0;
  307.     unsigned long a, b;
  308.     const char *str = nptr;
  309.     const char *tmpptr;
  310.    
  311.     while (isspace(*str))
  312.         str++;
  313.    
  314.     if (*str == '-') {
  315.         *sgn = 1;
  316.         ++str;
  317.     } else if (*str == '+')
  318.         ++str;
  319.    
  320.     if (base) {
  321.         if ((base == 1) || (base > 36)) {
  322.             /* FIXME: set errno to EINVAL */
  323.             return 0;
  324.         }
  325.         if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
  326.             (str[1] == 'X'))) {
  327.             str += 2;
  328.         }
  329.     } else {
  330.         base = 10;
  331.        
  332.         if (*str == '0') {
  333.             base = 8;
  334.             if ((str[1] == 'X') || (str[1] == 'x'))  {
  335.                 base = 16;
  336.                 str += 2;
  337.             }
  338.         }
  339.     }
  340.    
  341.     tmpptr = str;
  342.  
  343.     while (*str) {
  344.         c = *str;
  345.         c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
  346.             (c <= '9' ? c - '0' : 0xff)));
  347.         if (c > base) {
  348.             break;
  349.         }
  350.        
  351.         a = (result & 0xff) * base + c;
  352.         b = (result >> 8) * base + (a >> 8);
  353.        
  354.         if (b > (ULONG_MAX >> 8)) {
  355.             /* overflow */
  356.             /* FIXME: errno = ERANGE*/
  357.             return ULONG_MAX;
  358.         }
  359.    
  360.         result = (b << 8) + (a & 0xff);
  361.         ++str;
  362.     }
  363.    
  364.     if (str == tmpptr) {
  365.         /*
  366.          * No number was found => first invalid character is the first
  367.          * character of the string.
  368.          */
  369.         /* FIXME: set errno to EINVAL */
  370.         str = nptr;
  371.         result = 0;
  372.     }
  373.    
  374.     if (endptr)
  375.         *endptr = (char *) str;
  376.  
  377.     if (nptr == str) {
  378.         /*FIXME: errno = EINVAL*/
  379.         return 0;
  380.     }
  381.  
  382.     return result;
  383. }
  384.  
  385. /** Convert initial part of string to long int according to given base.
  386.  * The number may begin with an arbitrary number of whitespaces followed by
  387.  * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
  388.  * inserted and the number will be taken as hexadecimal one. If the base is 0
  389.  * and the number begin with a zero, number will be taken as octal one (as with
  390.  * base 8). Otherwise the base 0 is taken as decimal.
  391.  *
  392.  * @param nptr      Pointer to string.
  393.  * @param endptr    If not NULL, function stores here pointer to the first
  394.  *          invalid character.
  395.  * @param base      Zero or number between 2 and 36 inclusive.
  396.  * @return      Result of conversion.
  397.  */
  398. long int strtol(const char *nptr, char **endptr, int base)
  399. {
  400.     char sgn = 0;
  401.     unsigned long number = 0;
  402.    
  403.     number = _strtoul(nptr, endptr, base, &sgn);
  404.  
  405.     if (number > LONG_MAX) {
  406.         if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
  407.             /* FIXME: set 0 to errno */
  408.             return number;     
  409.         }
  410.         /* FIXME: set ERANGE to errno */
  411.         return (sgn ? LONG_MIN : LONG_MAX);
  412.     }
  413.    
  414.     return (sgn ? -number : number);
  415. }
  416.  
  417.  
  418. /** Convert initial part of string to unsigned long according to given base.
  419.  * The number may begin with an arbitrary number of whitespaces followed by
  420.  * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
  421.  * inserted and the number will be taken as hexadecimal one. If the base is 0
  422.  * and the number begin with a zero, number will be taken as octal one (as with
  423.  * base 8). Otherwise the base 0 is taken as decimal.
  424.  *
  425.  * @param nptr      Pointer to string.
  426.  * @param endptr    If not NULL, function stores here pointer to the first
  427.  *          invalid character
  428.  * @param base      Zero or number between 2 and 36 inclusive.
  429.  * @return      Result of conversion.
  430.  */
  431. unsigned long strtoul(const char *nptr, char **endptr, int base)
  432. {
  433.     char sgn = 0;
  434.     unsigned long number = 0;
  435.    
  436.     number = _strtoul(nptr, endptr, base, &sgn);
  437.  
  438.     return (sgn ? -number : number);
  439. }
  440.  
  441. char *strcpy(char *dest, const char *src)
  442. {
  443.     char *orig = dest;
  444.    
  445.     while ((*(dest++) = *(src++)))
  446.         ;
  447.     return orig;
  448. }
  449.  
  450. char *strncpy(char *dest, const char *src, size_t n)
  451. {
  452.     char *orig = dest;
  453.    
  454.     while ((*(dest++) = *(src++)) && --n)
  455.         ;
  456.     return orig;
  457. }
  458.  
  459. char *strcat(char *dest, const char *src)
  460. {
  461.     char *orig = dest;
  462.     while (*dest++)
  463.         ;
  464.     --dest;
  465.     while ((*dest++ = *src++))
  466.         ;
  467.     return orig;
  468. }
  469.  
  470. char * strdup(const char *s1)
  471. {
  472.     size_t len = strlen(s1) + 1;
  473.     void *ret = malloc(len);
  474.  
  475.     if (ret == NULL)
  476.         return (char *) NULL;
  477.  
  478.     return (char *) memcpy(ret, s1, len);
  479. }
  480.  
  481. char *strtok(char *s, const char *delim)
  482. {
  483.     static char *next;
  484.  
  485.     return strtok_r(s, delim, &next);
  486. }
  487.  
  488. char *strtok_r(char *s, const char *delim, char **next)
  489. {
  490.     char *start, *end;
  491.  
  492.     if (s == NULL)
  493.         s = *next;
  494.  
  495.     /* Skip over leading delimiters. */
  496.     while (*s && (strchr(delim, *s) != NULL)) ++s;
  497.     start = s;
  498.  
  499.     /* Skip over token characters. */
  500.     while (*s && (strchr(delim, *s) == NULL)) ++s;
  501.     end = s;
  502.     *next = (*s ? s + 1 : s);
  503.  
  504.     if (start == end) {
  505.         return NULL;    /* No more tokens. */
  506.     }
  507.  
  508.     /* Overwrite delimiter with NULL terminator. */
  509.     *end = '\0';
  510.     return start;
  511. }
  512.  
  513. /** @}
  514.  */
  515.