Subversion Repositories HelenOS

Rev

Rev 3731 | Rev 4234 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
999 palkovsky 1
/*
2071 jermar 2
 * Copyright (c) 2005 Martin Decky
3730 svoboda 3
 * Copyright (c) 2008 Jiri Svoboda
999 palkovsky 4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions
8
 * are met:
9
 *
10
 * - Redistributions of source code must retain the above copyright
11
 *   notice, this list of conditions and the following disclaimer.
12
 * - Redistributions in binary form must reproduce the above copyright
13
 *   notice, this list of conditions and the following disclaimer in the
14
 *   documentation and/or other materials provided with the distribution.
15
 * - The name of the author may not be used to endorse or promote products
16
 *   derived from this software without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29
 
1719 decky 30
/** @addtogroup libc
1653 cejka 31
 * @{
32
 */
33
/** @file
34
 */
35
 
999 palkovsky 36
#include <string.h>
3731 svoboda 37
#include <stdlib.h>
38
#include <limits.h>
1314 cejka 39
#include <ctype.h>
3238 jermar 40
#include <malloc.h>
4226 svoboda 41
#include <errno.h>
42
#include <string.h>
999 palkovsky 43
 
4226 svoboda 44
/** Byte mask consisting of lowest @n bits (out of 8) */
45
#define LO_MASK_8(n)  ((uint8_t) ((1 << (n)) - 1))
46
 
47
/** Byte mask consisting of lowest @n bits (out of 32) */
48
#define LO_MASK_32(n)  ((uint32_t) ((1 << (n)) - 1))
49
 
50
/** Byte mask consisting of highest @n bits (out of 8) */
51
#define HI_MASK_8(n)  (~LO_MASK_8(8 - (n)))
52
 
53
/** Number of data bits in a UTF-8 continuation byte */
54
#define CONT_BITS  6
55
 
56
/** Decode a single character from a string.
57
 *
58
 * Decode a single character from a string of size @a size. Decoding starts
59
 * at @a offset and this offset is moved to the beginning of the next
60
 * character. In case of decoding error, offset generally advances at least
61
 * by one. However, offset is never moved beyond size.
62
 *
63
 * @param str    String (not necessarily NULL-terminated).
64
 * @param offset Byte offset in string where to start decoding.
65
 * @param size   Size of the string (in bytes).
66
 *
67
 * @return Value of decoded character, U_SPECIAL on decoding error or
68
 *         NULL if attempt to decode beyond @a size.
69
 *
70
 */
71
wchar_t str_decode(const char *str, size_t *offset, size_t size)
72
{
73
	if (*offset + 1 > size)
74
		return 0;
75
 
76
	/* First byte read from string */
77
	uint8_t b0 = (uint8_t) str[(*offset)++];
78
 
79
	/* Determine code length */
80
 
81
	unsigned int b0_bits;  /* Data bits in first byte */
82
	unsigned int cbytes;   /* Number of continuation bytes */
83
 
84
	if ((b0 & 0x80) == 0) {
85
		/* 0xxxxxxx (Plain ASCII) */
86
		b0_bits = 7;
87
		cbytes = 0;
88
	} else if ((b0 & 0xe0) == 0xc0) {
89
		/* 110xxxxx 10xxxxxx */
90
		b0_bits = 5;
91
		cbytes = 1;
92
	} else if ((b0 & 0xf0) == 0xe0) {
93
		/* 1110xxxx 10xxxxxx 10xxxxxx */
94
		b0_bits = 4;
95
		cbytes = 2;
96
	} else if ((b0 & 0xf8) == 0xf0) {
97
		/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
98
		b0_bits = 3;
99
		cbytes = 3;
100
	} else {
101
		/* 10xxxxxx -- unexpected continuation byte */
102
		return U_SPECIAL;
103
	}
104
 
105
	if (*offset + cbytes > size)
106
		return U_SPECIAL;
107
 
108
	wchar_t ch = b0 & LO_MASK_8(b0_bits);
109
 
110
	/* Decode continuation bytes */
111
	while (cbytes > 0) {
112
		uint8_t b = (uint8_t) str[(*offset)++];
113
 
114
		/* Must be 10xxxxxx */
115
		if ((b & 0xc0) != 0x80)
116
			return U_SPECIAL;
117
 
118
		/* Shift data bits to ch */
119
		ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
120
		cbytes--;
121
	}
122
 
123
	return ch;
124
}
125
 
126
/** Encode a single character to string representation.
127
 *
128
 * Encode a single character to string representation (i.e. UTF-8) and store
129
 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
130
 * is moved to the position where the next character can be written to.
131
 *
132
 * @param ch     Input character.
133
 * @param str    Output buffer.
134
 * @param offset Byte offset where to start writing.
135
 * @param size   Size of the output buffer (in bytes).
136
 *
137
 * @return EOK if the character was encoded successfully, EOVERFLOW if there
138
 *	   was not enough space in the output buffer or EINVAL if the character
139
 *	   code was invalid.
140
 */
141
int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
142
{
143
	if (*offset >= size)
144
		return EOVERFLOW;
145
 
146
	if (!chr_check(ch))
147
		return EINVAL;
148
 
149
	/* Unsigned version of ch (bit operations should only be done
150
	   on unsigned types). */
151
	uint32_t cc = (uint32_t) ch;
152
 
153
	/* Determine how many continuation bytes are needed */
154
 
155
	unsigned int b0_bits;  /* Data bits in first byte */
156
	unsigned int cbytes;   /* Number of continuation bytes */
157
 
158
	if ((cc & ~LO_MASK_32(7)) == 0) {
159
		b0_bits = 7;
160
		cbytes = 0;
161
	} else if ((cc & ~LO_MASK_32(11)) == 0) {
162
		b0_bits = 5;
163
		cbytes = 1;
164
	} else if ((cc & ~LO_MASK_32(16)) == 0) {
165
		b0_bits = 4;
166
		cbytes = 2;
167
	} else if ((cc & ~LO_MASK_32(21)) == 0) {
168
		b0_bits = 3;
169
		cbytes = 3;
170
	} else {
171
		/* Codes longer than 21 bits are not supported */
172
		return EINVAL;
173
	}
174
 
175
	/* Check for available space in buffer */
176
	if (*offset + cbytes >= size)
177
		return EOVERFLOW;
178
 
179
	/* Encode continuation bytes */
180
	unsigned int i;
181
	for (i = cbytes; i > 0; i--) {
182
		str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
183
		cc = cc >> CONT_BITS;
184
	}
185
 
186
	/* Encode first byte */
187
	str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
188
 
189
	/* Advance offset */
190
	*offset += cbytes + 1;
191
 
192
	return EOK;
193
}
194
 
195
/** Check whether character is valid
196
 *
197
 * @return True if character is a valid Unicode code point.
198
 *
199
 */
200
bool chr_check(const wchar_t ch)
201
{
202
	if ((ch >= 0) && (ch <= 1114111))
203
		return true;
204
 
205
	return false;
206
}
207
 
1314 cejka 208
/** Count the number of characters in the string, not including terminating 0.
3261 jermar 209
 *
210
 * @param str		String.
211
 * @return		Number of characters in string.
1314 cejka 212
 */
1173 cejka 213
size_t strlen(const char *str) 
214
{
1197 cejka 215
	size_t counter = 0;
1173 cejka 216
 
1719 decky 217
	while (str[counter] != 0)
1173 cejka 218
		counter++;
219
 
220
	return counter;
221
}
1314 cejka 222
 
1719 decky 223
int strcmp(const char *a, const char *b)
1319 vana 224
{
1719 decky 225
	int c = 0;
1319 vana 226
 
1719 decky 227
	while (a[c] && b[c] && (!(a[c] - b[c])))
228
		c++;
1319 vana 229
 
1719 decky 230
	return (a[c] - b[c]);
1319 vana 231
}
232
 
2640 cejka 233
int strncmp(const char *a, const char *b, size_t n)
234
{
235
	size_t c = 0;
1319 vana 236
 
2640 cejka 237
	while (c < n && a[c] && b[c] && (!(a[c] - b[c])))
238
		c++;
239
 
240
	return ( c < n ? a[c] - b[c] : 0);
241
 
242
}
243
 
3271 jermar 244
int stricmp(const char *a, const char *b)
245
{
246
	int c = 0;
247
 
248
	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
249
		c++;
250
 
251
	return (tolower(a[c]) - tolower(b[c]));
252
}
253
 
3261 jermar 254
/** Return pointer to the first occurence of character c in string.
255
 *
256
 * @param str		Scanned string.
257
 * @param c		Searched character (taken as one byte).
258
 * @return		Pointer to the matched character or NULL if it is not
259
 * 			found in given string.
1314 cejka 260
 */
261
char *strchr(const char *str, int c)
262
{
263
	while (*str != '\0') {
1719 decky 264
		if (*str == (char) c)
265
			return (char *) str;
1314 cejka 266
		str++;
267
	}
268
 
269
	return NULL;
270
}
271
 
3261 jermar 272
/** Return pointer to the last occurence of character c in string.
273
 *
274
 * @param str		Scanned string.
275
 * @param c		Searched character (taken as one byte).
276
 * @return		Pointer to the matched character or NULL if it is not
277
 * 			found in given string.
1314 cejka 278
 */
279
char *strrchr(const char *str, int c)
280
{
281
	char *retval = NULL;
282
 
283
	while (*str != '\0') {
1719 decky 284
		if (*str == (char) c)
285
			retval = (char *) str;
1314 cejka 286
		str++;
287
	}
288
 
1719 decky 289
	return (char *) retval;
1314 cejka 290
}
291
 
292
/** Convert string to a number. 
293
 * Core of strtol and strtoul functions.
3261 jermar 294
 *
295
 * @param nptr		Pointer to string.
296
 * @param endptr	If not NULL, function stores here pointer to the first
297
 * 			invalid character.
298
 * @param base		Zero or number between 2 and 36 inclusive.
299
 * @param sgn		It's set to 1 if minus found.
300
 * @return		Result of conversion.
1314 cejka 301
 */
3261 jermar 302
static unsigned long
303
_strtoul(const char *nptr, char **endptr, int base, char *sgn)
1314 cejka 304
{
305
	unsigned char c;
306
	unsigned long result = 0;
307
	unsigned long a, b;
308
	const char *str = nptr;
309
	const char *tmpptr;
310
 
311
	while (isspace(*str))
312
		str++;
313
 
314
	if (*str == '-') {
315
		*sgn = 1;
316
		++str;
317
	} else if (*str == '+')
318
		++str;
319
 
320
	if (base) {
321
		if ((base == 1) || (base > 36)) {
322
			/* FIXME: set errno to EINVAL */
323
			return 0;
324
		}
3261 jermar 325
		if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
326
		    (str[1] == 'X'))) {
1314 cejka 327
			str += 2;
328
		}
329
	} else {
330
		base = 10;
331
 
332
		if (*str == '0') {
333
			base = 8;
334
			if ((str[1] == 'X') || (str[1] == 'x'))  {
335
				base = 16;
336
				str += 2;
337
			}
338
		} 
339
	}
340
 
341
	tmpptr = str;
342
 
343
	while (*str) {
344
		c = *str;
3261 jermar 345
		c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
346
		    (c <= '9' ? c - '0' : 0xff)));
1314 cejka 347
		if (c > base) {
348
			break;
349
		}
350
 
351
		a = (result & 0xff) * base + c;
352
		b = (result >> 8) * base + (a >> 8);
353
 
354
		if (b > (ULONG_MAX >> 8)) {
355
			/* overflow */
356
			/* FIXME: errno = ERANGE*/
357
			return ULONG_MAX;
358
		}
359
 
360
		result = (b << 8) + (a & 0xff);
361
		++str;
362
	}
363
 
364
	if (str == tmpptr) {
3261 jermar 365
		/*
366
		 * No number was found => first invalid character is the first
367
		 * character of the string.
368
		 */
1314 cejka 369
		/* FIXME: set errno to EINVAL */
370
		str = nptr;
371
		result = 0;
372
	}
373
 
374
	if (endptr)
1719 decky 375
		*endptr = (char *) str;
1314 cejka 376
 
377
	if (nptr == str) { 
378
		/*FIXME: errno = EINVAL*/
379
		return 0;
380
	}
381
 
382
	return result;
383
}
384
 
385
/** Convert initial part of string to long int according to given base.
3261 jermar 386
 * The number may begin with an arbitrary number of whitespaces followed by
387
 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
388
 * inserted and the number will be taken as hexadecimal one. If the base is 0
389
 * and the number begin with a zero, number will be taken as octal one (as with
390
 * base 8). Otherwise the base 0 is taken as decimal.
391
 *
392
 * @param nptr		Pointer to string.
393
 * @param endptr	If not NULL, function stores here pointer to the first
394
 * 			invalid character.
395
 * @param base		Zero or number between 2 and 36 inclusive.
396
 * @return		Result of conversion.
1314 cejka 397
 */
398
long int strtol(const char *nptr, char **endptr, int base)
399
{
400
	char sgn = 0;
401
	unsigned long number = 0;
402
 
403
	number = _strtoul(nptr, endptr, base, &sgn);
404
 
405
	if (number > LONG_MAX) {
1719 decky 406
		if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1314 cejka 407
			/* FIXME: set 0 to errno */
408
			return number;		
409
		}
410
		/* FIXME: set ERANGE to errno */
1719 decky 411
		return (sgn ? LONG_MIN : LONG_MAX);	
1314 cejka 412
	}
413
 
1719 decky 414
	return (sgn ? -number : number);
1314 cejka 415
}
416
 
417
 
418
/** Convert initial part of string to unsigned long according to given base.
3261 jermar 419
 * The number may begin with an arbitrary number of whitespaces followed by
420
 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
421
 * inserted and the number will be taken as hexadecimal one. If the base is 0
422
 * and the number begin with a zero, number will be taken as octal one (as with
423
 * base 8). Otherwise the base 0 is taken as decimal.
424
 *
425
 * @param nptr		Pointer to string.
426
 * @param endptr	If not NULL, function stores here pointer to the first
427
 * 			invalid character
428
 * @param base		Zero or number between 2 and 36 inclusive.
429
 * @return		Result of conversion.
1314 cejka 430
 */
431
unsigned long strtoul(const char *nptr, char **endptr, int base)
432
{
433
	char sgn = 0;
434
	unsigned long number = 0;
435
 
436
	number = _strtoul(nptr, endptr, base, &sgn);
437
 
1719 decky 438
	return (sgn ? -number : number);
1314 cejka 439
}
1472 palkovsky 440
 
441
char *strcpy(char *dest, const char *src)
442
{
1719 decky 443
	char *orig = dest;
444
 
2754 jermar 445
	while ((*(dest++) = *(src++)))
446
		;
1719 decky 447
	return orig;
1472 palkovsky 448
}
449
 
450
char *strncpy(char *dest, const char *src, size_t n)
451
{
1719 decky 452
	char *orig = dest;
453
 
2754 jermar 454
	while ((*(dest++) = *(src++)) && --n)
455
		;
1719 decky 456
	return orig;
1472 palkovsky 457
}
1653 cejka 458
 
2754 jermar 459
char *strcat(char *dest, const char *src)
460
{
461
	char *orig = dest;
462
	while (*dest++)
463
		;
464
	--dest;
465
	while ((*dest++ = *src++))
466
		;
467
	return orig;
468
}
469
 
3238 jermar 470
char * strdup(const char *s1)
471
{
472
	size_t len = strlen(s1) + 1;
473
	void *ret = malloc(len);
474
 
475
	if (ret == NULL)
476
		return (char *) NULL;
477
 
478
	return (char *) memcpy(ret, s1, len);
479
}
480
 
3730 svoboda 481
char *strtok(char *s, const char *delim)
3427 post 482
{
3730 svoboda 483
	static char *next;
3427 post 484
 
3730 svoboda 485
	return strtok_r(s, delim, &next);
486
}
3427 post 487
 
3730 svoboda 488
char *strtok_r(char *s, const char *delim, char **next)
489
{
490
	char *start, *end;
3427 post 491
 
3730 svoboda 492
	if (s == NULL)
493
		s = *next;
3427 post 494
 
3730 svoboda 495
	/* Skip over leading delimiters. */
496
	while (*s && (strchr(delim, *s) != NULL)) ++s;
497
	start = s;
3427 post 498
 
3730 svoboda 499
	/* Skip over token characters. */
500
	while (*s && (strchr(delim, *s) == NULL)) ++s;
501
	end = s;
502
	*next = (*s ? s + 1 : s);
503
 
504
	if (start == end) {
505
		return NULL;	/* No more tokens. */
3427 post 506
	}
507
 
3730 svoboda 508
	/* Overwrite delimiter with NULL terminator. */
509
	*end = '\0';
510
	return start;
3427 post 511
}
512
 
1719 decky 513
/** @}
1653 cejka 514
 */