WebSVN – HelenOS – Blame – /trunk/uspace/lib/libc/generic/string.c/

Rev	Author	Line No.	Line
999	palkovsky	1	/*
2071	jermar	2	* Copyright (c) 2005 Martin Decky
3730	svoboda	3	* Copyright (c) 2008 Jiri Svoboda
999	palkovsky	4	* All rights reserved.
		5	*
		6	* Redistribution and use in source and binary forms, with or without
		7	* modification, are permitted provided that the following conditions
		8	* are met:
		9	*
		10	* - Redistributions of source code must retain the above copyright
		11	* notice, this list of conditions and the following disclaimer.
		12	* - Redistributions in binary form must reproduce the above copyright
		13	* notice, this list of conditions and the following disclaimer in the
		14	* documentation and/or other materials provided with the distribution.
		15	* - The name of the author may not be used to endorse or promote products
		16	* derived from this software without specific prior written permission.
		17	*
		18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
		19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
		20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
		21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
		22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
		23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
		24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
		25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
		26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
		27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
		28	*/
		29
1719	decky	30	/** @addtogroup libc
1653	cejka	31	* @{
		32	*/
		33	/** @file
		34	*/
		35
999	palkovsky	36	#include <string.h>
3731	svoboda	37	#include <stdlib.h>
		38	#include <limits.h>
1314	cejka	39	#include <ctype.h>
3238	jermar	40	#include <malloc.h>
4226	svoboda	41	#include <errno.h>
		42	#include <string.h>
999	palkovsky	43
4226	svoboda	44	/** Byte mask consisting of lowest @n bits (out of 8) */
		45	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
		46
		47	/** Byte mask consisting of lowest @n bits (out of 32) */
		48	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
		49
		50	/** Byte mask consisting of highest @n bits (out of 8) */
		51	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
		52
		53	/** Number of data bits in a UTF-8 continuation byte */
		54	#define CONT_BITS 6
		55
		56	/** Decode a single character from a string.
		57	*
		58	* Decode a single character from a string of size @a size. Decoding starts
		59	* at @a offset and this offset is moved to the beginning of the next
		60	* character. In case of decoding error, offset generally advances at least
		61	* by one. However, offset is never moved beyond size.
		62	*
		63	* @param str String (not necessarily NULL-terminated).
		64	* @param offset Byte offset in string where to start decoding.
		65	* @param size Size of the string (in bytes).
		66	*
		67	* @return Value of decoded character, U_SPECIAL on decoding error or
		68	* NULL if attempt to decode beyond @a size.
		69	*
		70	*/
		71	wchar_t str_decode(const char str, size_t offset, size_t size)
		72	{
		73	if (*offset + 1 > size)
		74	return 0;
		75
		76	/* First byte read from string */
		77	uint8_t b0 = (uint8_t) str[(*offset)++];
		78
		79	/* Determine code length */
		80
		81	unsigned int b0_bits; /* Data bits in first byte */
		82	unsigned int cbytes; /* Number of continuation bytes */
		83
		84	if ((b0 & 0x80) == 0) {
		85	/* 0xxxxxxx (Plain ASCII) */
		86	b0_bits = 7;
		87	cbytes = 0;
		88	} else if ((b0 & 0xe0) == 0xc0) {
		89	/* 110xxxxx 10xxxxxx */
		90	b0_bits = 5;
		91	cbytes = 1;
		92	} else if ((b0 & 0xf0) == 0xe0) {
		93	/* 1110xxxx 10xxxxxx 10xxxxxx */
		94	b0_bits = 4;
		95	cbytes = 2;
		96	} else if ((b0 & 0xf8) == 0xf0) {
		97	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
		98	b0_bits = 3;
		99	cbytes = 3;
		100	} else {
		101	/* 10xxxxxx -- unexpected continuation byte */
		102	return U_SPECIAL;
		103	}
		104
		105	if (*offset + cbytes > size)
		106	return U_SPECIAL;
		107
		108	wchar_t ch = b0 & LO_MASK_8(b0_bits);
		109
		110	/* Decode continuation bytes */
		111	while (cbytes > 0) {
		112	uint8_t b = (uint8_t) str[(*offset)++];
		113
		114	/* Must be 10xxxxxx */
		115	if ((b & 0xc0) != 0x80)
		116	return U_SPECIAL;
		117
		118	/* Shift data bits to ch */
		119	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
		120	cbytes--;
		121	}
		122
		123	return ch;
		124	}
		125
		126	/** Encode a single character to string representation.
		127	*
		128	* Encode a single character to string representation (i.e. UTF-8) and store
		129	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
		130	* is moved to the position where the next character can be written to.
		131	*
		132	* @param ch Input character.
		133	* @param str Output buffer.
		134	* @param offset Byte offset where to start writing.
		135	* @param size Size of the output buffer (in bytes).
		136	*
		137	* @return EOK if the character was encoded successfully, EOVERFLOW if there
		138	* was not enough space in the output buffer or EINVAL if the character
		139	* code was invalid.
		140	*/
		141	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
		142	{
		143	if (*offset >= size)
		144	return EOVERFLOW;
		145
		146	if (!chr_check(ch))
		147	return EINVAL;
		148
		149	/* Unsigned version of ch (bit operations should only be done
		150	on unsigned types). */
		151	uint32_t cc = (uint32_t) ch;
		152
		153	/* Determine how many continuation bytes are needed */
		154
		155	unsigned int b0_bits; /* Data bits in first byte */
		156	unsigned int cbytes; /* Number of continuation bytes */
		157
		158	if ((cc & ~LO_MASK_32(7)) == 0) {
		159	b0_bits = 7;
		160	cbytes = 0;
		161	} else if ((cc & ~LO_MASK_32(11)) == 0) {
		162	b0_bits = 5;
		163	cbytes = 1;
		164	} else if ((cc & ~LO_MASK_32(16)) == 0) {
		165	b0_bits = 4;
		166	cbytes = 2;
		167	} else if ((cc & ~LO_MASK_32(21)) == 0) {
		168	b0_bits = 3;
		169	cbytes = 3;
		170	} else {
		171	/* Codes longer than 21 bits are not supported */
		172	return EINVAL;
		173	}
		174
		175	/* Check for available space in buffer */
		176	if (*offset + cbytes >= size)
		177	return EOVERFLOW;
		178
		179	/* Encode continuation bytes */
		180	unsigned int i;
		181	for (i = cbytes; i > 0; i--) {
		182	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
		183	cc = cc >> CONT_BITS;
		184	}
		185
		186	/* Encode first byte */
		187	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
		188
		189	/* Advance offset */
		190	*offset += cbytes + 1;
		191
		192	return EOK;
		193	}
		194
		195	/** Check whether character is valid
		196	*
		197	* @return True if character is a valid Unicode code point.
		198	*
		199	*/
		200	bool chr_check(const wchar_t ch)
		201	{
		202	if ((ch >= 0) && (ch <= 1114111))
		203	return true;
		204
		205	return false;
		206	}
		207
1314	cejka	208	/** Count the number of characters in the string, not including terminating 0.
3261	jermar	209	*
		210	* @param str String.
		211	* @return Number of characters in string.
1314	cejka	212	*/
1173	cejka	213	size_t strlen(const char *str)
		214	{
1197	cejka	215	size_t counter = 0;
1173	cejka	216
1719	decky	217	while (str[counter] != 0)
1173	cejka	218	counter++;
		219
		220	return counter;
		221	}
1314	cejka	222
1719	decky	223	int strcmp(const char a, const char b)
1319	vana	224	{
1719	decky	225	int c = 0;
1319	vana	226
1719	decky	227	while (a[c] && b[c] && (!(a[c] - b[c])))
		228	c++;
1319	vana	229
1719	decky	230	return (a[c] - b[c]);
1319	vana	231	}
		232
2640	cejka	233	int strncmp(const char a, const char b, size_t n)
		234	{
		235	size_t c = 0;
1319	vana	236
2640	cejka	237	while (c < n && a[c] && b[c] && (!(a[c] - b[c])))
		238	c++;
		239
		240	return ( c < n ? a[c] - b[c] : 0);
		241
		242	}
		243
3271	jermar	244	int stricmp(const char a, const char b)
		245	{
		246	int c = 0;
		247
		248	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
		249	c++;
		250
		251	return (tolower(a[c]) - tolower(b[c]));
		252	}
		253
3261	jermar	254	/** Return pointer to the first occurence of character c in string.
		255	*
		256	* @param str Scanned string.
		257	* @param c Searched character (taken as one byte).
		258	* @return Pointer to the matched character or NULL if it is not
		259	* found in given string.
1314	cejka	260	*/
		261	char strchr(const char str, int c)
		262	{
		263	while (*str != '\0') {
1719	decky	264	if (*str == (char) c)
		265	return (char *) str;
1314	cejka	266	str++;
		267	}
		268
		269	return NULL;
		270	}
		271
3261	jermar	272	/** Return pointer to the last occurence of character c in string.
		273	*
		274	* @param str Scanned string.
		275	* @param c Searched character (taken as one byte).
		276	* @return Pointer to the matched character or NULL if it is not
		277	* found in given string.
1314	cejka	278	*/
		279	char strrchr(const char str, int c)
		280	{
		281	char *retval = NULL;
		282
		283	while (*str != '\0') {
1719	decky	284	if (*str == (char) c)
		285	retval = (char *) str;
1314	cejka	286	str++;
		287	}
		288
1719	decky	289	return (char *) retval;
1314	cejka	290	}
		291
		292	/** Convert string to a number.
		293	* Core of strtol and strtoul functions.
3261	jermar	294	*
		295	* @param nptr Pointer to string.
		296	* @param endptr If not NULL, function stores here pointer to the first
		297	* invalid character.
		298	* @param base Zero or number between 2 and 36 inclusive.
		299	* @param sgn It's set to 1 if minus found.
		300	* @return Result of conversion.
1314	cejka	301	*/
3261	jermar	302	static unsigned long
		303	_strtoul(const char nptr, char endptr, int base, char sgn)
1314	cejka	304	{
		305	unsigned char c;
		306	unsigned long result = 0;
		307	unsigned long a, b;
		308	const char *str = nptr;
		309	const char *tmpptr;
		310
		311	while (isspace(*str))
		312	str++;
		313
		314	if (*str == '-') {
		315	*sgn = 1;
		316	++str;
		317	} else if (*str == '+')
		318	++str;
		319
		320	if (base) {
		321	if ((base == 1) \|\| (base > 36)) {
		322	/* FIXME: set errno to EINVAL */
		323	return 0;
		324	}
3261	jermar	325	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
		326	(str[1] == 'X'))) {
1314	cejka	327	str += 2;
		328	}
		329	} else {
		330	base = 10;
		331
		332	if (*str == '0') {
		333	base = 8;
		334	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
		335	base = 16;
		336	str += 2;
		337	}
		338	}
		339	}
		340
		341	tmpptr = str;
		342
		343	while (*str) {
		344	c = *str;
3261	jermar	345	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
		346	(c <= '9' ? c - '0' : 0xff)));
1314	cejka	347	if (c > base) {
		348	break;
		349	}
		350
		351	a = (result & 0xff) * base + c;
		352	b = (result >> 8) * base + (a >> 8);
		353
		354	if (b > (ULONG_MAX >> 8)) {
		355	/* overflow */
		356	/* FIXME: errno = ERANGE*/
		357	return ULONG_MAX;
		358	}
		359
		360	result = (b << 8) + (a & 0xff);
		361	++str;
		362	}
		363
		364	if (str == tmpptr) {
3261	jermar	365	/*
		366	* No number was found => first invalid character is the first
		367	* character of the string.
		368	*/
1314	cejka	369	/* FIXME: set errno to EINVAL */
		370	str = nptr;
		371	result = 0;
		372	}
		373
		374	if (endptr)
1719	decky	375	endptr = (char ) str;
1314	cejka	376
		377	if (nptr == str) {
		378	/FIXME: errno = EINVAL/
		379	return 0;
		380	}
		381
		382	return result;
		383	}
		384
		385	/** Convert initial part of string to long int according to given base.
3261	jermar	386	* The number may begin with an arbitrary number of whitespaces followed by
		387	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
		388	* inserted and the number will be taken as hexadecimal one. If the base is 0
		389	* and the number begin with a zero, number will be taken as octal one (as with
		390	* base 8). Otherwise the base 0 is taken as decimal.
		391	*
		392	* @param nptr Pointer to string.
		393	* @param endptr If not NULL, function stores here pointer to the first
		394	* invalid character.
		395	* @param base Zero or number between 2 and 36 inclusive.
		396	* @return Result of conversion.
1314	cejka	397	*/
		398	long int strtol(const char nptr, char *endptr, int base)
		399	{
		400	char sgn = 0;
		401	unsigned long number = 0;
		402
		403	number = _strtoul(nptr, endptr, base, &sgn);
		404
		405	if (number > LONG_MAX) {
1719	decky	406	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1314	cejka	407	/* FIXME: set 0 to errno */
		408	return number;
		409	}
		410	/* FIXME: set ERANGE to errno */
1719	decky	411	return (sgn ? LONG_MIN : LONG_MAX);
1314	cejka	412	}
		413
1719	decky	414	return (sgn ? -number : number);
1314	cejka	415	}
		416
		417
		418	/** Convert initial part of string to unsigned long according to given base.
3261	jermar	419	* The number may begin with an arbitrary number of whitespaces followed by
		420	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
		421	* inserted and the number will be taken as hexadecimal one. If the base is 0
		422	* and the number begin with a zero, number will be taken as octal one (as with
		423	* base 8). Otherwise the base 0 is taken as decimal.
		424	*
		425	* @param nptr Pointer to string.
		426	* @param endptr If not NULL, function stores here pointer to the first
		427	* invalid character
		428	* @param base Zero or number between 2 and 36 inclusive.
		429	* @return Result of conversion.
1314	cejka	430	*/
		431	unsigned long strtoul(const char nptr, char *endptr, int base)
		432	{
		433	char sgn = 0;
		434	unsigned long number = 0;
		435
		436	number = _strtoul(nptr, endptr, base, &sgn);
		437
1719	decky	438	return (sgn ? -number : number);
1314	cejka	439	}
1472	palkovsky	440
		441	char strcpy(char dest, const char *src)
		442	{
1719	decky	443	char *orig = dest;
		444
2754	jermar	445	while (((dest++) = (src++)))
		446	;
1719	decky	447	return orig;
1472	palkovsky	448	}
		449
		450	char strncpy(char dest, const char *src, size_t n)
		451	{
1719	decky	452	char *orig = dest;
		453
2754	jermar	454	while (((dest++) = (src++)) && --n)
		455	;
1719	decky	456	return orig;
1472	palkovsky	457	}
1653	cejka	458
2754	jermar	459	char strcat(char dest, const char *src)
		460	{
		461	char *orig = dest;
		462	while (*dest++)
		463	;
		464	--dest;
		465	while ((dest++ = src++))
		466	;
		467	return orig;
		468	}
		469
3238	jermar	470	char * strdup(const char *s1)
		471	{
		472	size_t len = strlen(s1) + 1;
		473	void *ret = malloc(len);
		474
		475	if (ret == NULL)
		476	return (char *) NULL;
		477
		478	return (char *) memcpy(ret, s1, len);
		479	}
		480
3730	svoboda	481	char strtok(char s, const char *delim)
3427	post	482	{
3730	svoboda	483	static char *next;
3427	post	484
3730	svoboda	485	return strtok_r(s, delim, &next);
		486	}
3427	post	487
3730	svoboda	488	char strtok_r(char s, const char delim, char *next)
		489	{
		490	char start, end;
3427	post	491
3730	svoboda	492	if (s == NULL)
		493	s = *next;
3427	post	494
3730	svoboda	495	/* Skip over leading delimiters. */
		496	while (s && (strchr(delim, s) != NULL)) ++s;
		497	start = s;
3427	post	498
3730	svoboda	499	/* Skip over token characters. */
		500	while (s && (strchr(delim, s) == NULL)) ++s;
		501	end = s;
		502	next = (s ? s + 1 : s);
		503
		504	if (start == end) {
		505	return NULL; /* No more tokens. */
3427	post	506	}
		507
3730	svoboda	508	/* Overwrite delimiter with NULL terminator. */
		509	*end = '\0';
		510	return start;
3427	post	511	}
		512
1719	decky	513	/** @}
1653	cejka	514	*/

Subversion Repositories HelenOS

(root)/trunk/uspace/lib/libc/generic/string.c/ – Rev 4226