WebSVN – HelenOS – Blame – /branches/dd/uspace/lib/libc/generic/string.c

Rev	Author	Line No.	Line
999	palkovsky	1	/*
2071	jermar	2	* Copyright (c) 2005 Martin Decky
4055	trochtova	3	* Copyright (c) 2008 Jiri Svoboda
999	palkovsky	4	* All rights reserved.
		5	*
		6	* Redistribution and use in source and binary forms, with or without
		7	* modification, are permitted provided that the following conditions
		8	* are met:
		9	*
		10	* - Redistributions of source code must retain the above copyright
		11	* notice, this list of conditions and the following disclaimer.
		12	* - Redistributions in binary form must reproduce the above copyright
		13	* notice, this list of conditions and the following disclaimer in the
		14	* documentation and/or other materials provided with the distribution.
		15	* - The name of the author may not be used to endorse or promote products
		16	* derived from this software without specific prior written permission.
		17	*
		18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
		19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
		20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
		21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
		22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
		23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
		24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
		25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
		26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
		27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
		28	*/
		29
1719	decky	30	/** @addtogroup libc
1653	cejka	31	* @{
		32	*/
		33	/** @file
		34	*/
		35
999	palkovsky	36	#include <string.h>
4055	trochtova	37	#include <stdlib.h>
4296	trochtova	38	#include <assert.h>
4055	trochtova	39	#include <limits.h>
1314	cejka	40	#include <ctype.h>
4055	trochtova	41	#include <malloc.h>
4296	trochtova	42	#include <errno.h>
		43	#include <align.h>
		44	#include <mem.h>
		45	#include <string.h>
999	palkovsky	46
4296	trochtova	47	/** Byte mask consisting of lowest @n bits (out of 8) */
		48	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
		49
		50	/** Byte mask consisting of lowest @n bits (out of 32) */
		51	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
		52
		53	/** Byte mask consisting of highest @n bits (out of 8) */
		54	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
		55
		56	/** Number of data bits in a UTF-8 continuation byte */
		57	#define CONT_BITS 6
		58
		59	/** Decode a single character from a string.
2072	jermar	60	*
4296	trochtova	61	* Decode a single character from a string of size @a size. Decoding starts
		62	* at @a offset and this offset is moved to the beginning of the next
		63	* character. In case of decoding error, offset generally advances at least
		64	* by one. However, offset is never moved beyond size.
		65	*
		66	* @param str String (not necessarily NULL-terminated).
		67	* @param offset Byte offset in string where to start decoding.
		68	* @param size Size of the string (in bytes).
		69	*
		70	* @return Value of decoded character, U_SPECIAL on decoding error or
		71	* NULL if attempt to decode beyond @a size.
		72	*
2072	jermar	73	*/
4296	trochtova	74	wchar_t str_decode(const char str, size_t offset, size_t size)
1173	cejka	75	{
4296	trochtova	76	if (*offset + 1 > size)
		77	return 0;
		78
		79	/* First byte read from string */
		80	uint8_t b0 = (uint8_t) str[(*offset)++];
		81
		82	/* Determine code length */
		83
		84	unsigned int b0_bits; /* Data bits in first byte */
		85	unsigned int cbytes; /* Number of continuation bytes */
		86
		87	if ((b0 & 0x80) == 0) {
		88	/* 0xxxxxxx (Plain ASCII) */
		89	b0_bits = 7;
		90	cbytes = 0;
		91	} else if ((b0 & 0xe0) == 0xc0) {
		92	/* 110xxxxx 10xxxxxx */
		93	b0_bits = 5;
		94	cbytes = 1;
		95	} else if ((b0 & 0xf0) == 0xe0) {
		96	/* 1110xxxx 10xxxxxx 10xxxxxx */
		97	b0_bits = 4;
		98	cbytes = 2;
		99	} else if ((b0 & 0xf8) == 0xf0) {
		100	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
		101	b0_bits = 3;
		102	cbytes = 3;
		103	} else {
		104	/* 10xxxxxx -- unexpected continuation byte */
		105	return U_SPECIAL;
		106	}
		107
		108	if (*offset + cbytes > size)
		109	return U_SPECIAL;
		110
		111	wchar_t ch = b0 & LO_MASK_8(b0_bits);
		112
		113	/* Decode continuation bytes */
		114	while (cbytes > 0) {
		115	uint8_t b = (uint8_t) str[(*offset)++];
		116
		117	/* Must be 10xxxxxx */
		118	if ((b & 0xc0) != 0x80)
		119	return U_SPECIAL;
		120
		121	/* Shift data bits to ch */
		122	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
		123	cbytes--;
		124	}
		125
		126	return ch;
		127	}
1173	cejka	128
4296	trochtova	129	/** Encode a single character to string representation.
		130	*
		131	* Encode a single character to string representation (i.e. UTF-8) and store
		132	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
		133	* is moved to the position where the next character can be written to.
		134	*
		135	* @param ch Input character.
		136	* @param str Output buffer.
		137	* @param offset Byte offset where to start writing.
		138	* @param size Size of the output buffer (in bytes).
		139	*
		140	* @return EOK if the character was encoded successfully, EOVERFLOW if there
4537	trochtova	141	* was not enough space in the output buffer or EINVAL if the character
		142	* code was invalid.
4296	trochtova	143	*/
		144	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
		145	{
		146	if (*offset >= size)
		147	return EOVERFLOW;
		148
		149	if (!chr_check(ch))
		150	return EINVAL;
		151
		152	/* Unsigned version of ch (bit operations should only be done
		153	on unsigned types). */
		154	uint32_t cc = (uint32_t) ch;
		155
		156	/* Determine how many continuation bytes are needed */
		157
		158	unsigned int b0_bits; /* Data bits in first byte */
		159	unsigned int cbytes; /* Number of continuation bytes */
		160
		161	if ((cc & ~LO_MASK_32(7)) == 0) {
		162	b0_bits = 7;
		163	cbytes = 0;
		164	} else if ((cc & ~LO_MASK_32(11)) == 0) {
		165	b0_bits = 5;
		166	cbytes = 1;
		167	} else if ((cc & ~LO_MASK_32(16)) == 0) {
		168	b0_bits = 4;
		169	cbytes = 2;
		170	} else if ((cc & ~LO_MASK_32(21)) == 0) {
		171	b0_bits = 3;
		172	cbytes = 3;
		173	} else {
		174	/* Codes longer than 21 bits are not supported */
		175	return EINVAL;
		176	}
		177
		178	/* Check for available space in buffer */
		179	if (*offset + cbytes >= size)
		180	return EOVERFLOW;
		181
		182	/* Encode continuation bytes */
		183	unsigned int i;
		184	for (i = cbytes; i > 0; i--) {
		185	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
		186	cc = cc >> CONT_BITS;
		187	}
		188
		189	/* Encode first byte */
		190	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
		191
		192	/* Advance offset */
		193	*offset += cbytes + 1;
		194
		195	return EOK;
		196	}
1173	cejka	197
4296	trochtova	198	/** Get size of string.
		199	*
		200	* Get the number of bytes which are used by the string @a str (excluding the
		201	* NULL-terminator).
		202	*
		203	* @param str String to consider.
		204	*
		205	* @return Number of bytes used by the string
		206	*
		207	*/
		208	size_t str_size(const char *str)
		209	{
		210	size_t size = 0;
		211
		212	while (*str++ != 0)
		213	size++;
		214
		215	return size;
1173	cejka	216	}
1314	cejka	217
4296	trochtova	218	/** Get size of wide string.
		219	*
		220	* Get the number of bytes which are used by the wide string @a str (excluding the
		221	* NULL-terminator).
		222	*
		223	* @param str Wide string to consider.
		224	*
		225	* @return Number of bytes used by the wide string
		226	*
		227	*/
		228	size_t wstr_size(const wchar_t *str)
1319	vana	229	{
4296	trochtova	230	return (wstr_length(str) * sizeof(wchar_t));
		231	}
		232
		233	/** Get size of string with length limit.
		234	*
		235	* Get the number of bytes which are used by up to @a max_len first
		236	* characters in the string @a str. If @a max_len is greater than
		237	* the length of @a str, the entire string is measured (excluding the
		238	* NULL-terminator).
		239	*
		240	* @param str String to consider.
		241	* @param max_len Maximum number of characters to measure.
		242	*
		243	* @return Number of bytes used by the characters.
		244	*
		245	*/
4537	trochtova	246	size_t str_lsize(const char *str, size_t max_len)
4296	trochtova	247	{
4537	trochtova	248	size_t len = 0;
4296	trochtova	249	size_t offset = 0;
1319	vana	250
4296	trochtova	251	while (len < max_len) {
		252	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
		253	break;
		254
		255	len++;
		256	}
1319	vana	257
4296	trochtova	258	return offset;
1319	vana	259	}
		260
4296	trochtova	261	/** Get size of wide string with length limit.
		262	*
		263	* Get the number of bytes which are used by up to @a max_len first
		264	* wide characters in the wide string @a str. If @a max_len is greater than
		265	* the length of @a str, the entire wide string is measured (excluding the
		266	* NULL-terminator).
		267	*
		268	* @param str Wide string to consider.
		269	* @param max_len Maximum number of wide characters to measure.
		270	*
		271	* @return Number of bytes used by the wide characters.
		272	*
		273	*/
4537	trochtova	274	size_t wstr_lsize(const wchar_t *str, size_t max_len)
2640	cejka	275	{
4296	trochtova	276	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
		277	}
1319	vana	278
4296	trochtova	279	/** Get number of characters in a string.
		280	*
		281	* @param str NULL-terminated string.
		282	*
		283	* @return Number of characters in string.
		284	*
		285	*/
4537	trochtova	286	size_t str_length(const char *str)
4296	trochtova	287	{
4537	trochtova	288	size_t len = 0;
4296	trochtova	289	size_t offset = 0;
2640	cejka	290
4296	trochtova	291	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
		292	len++;
2640	cejka	293
4296	trochtova	294	return len;
2640	cejka	295	}
		296
4296	trochtova	297	/** Get number of characters in a wide string.
		298	*
		299	* @param str NULL-terminated wide string.
		300	*
		301	* @return Number of characters in @a str.
		302	*
		303	*/
4537	trochtova	304	size_t wstr_length(const wchar_t *wstr)
4055	trochtova	305	{
4537	trochtova	306	size_t len = 0;
4055	trochtova	307
4296	trochtova	308	while (*wstr++ != 0)
		309	len++;
4055	trochtova	310
4296	trochtova	311	return len;
4055	trochtova	312	}
		313
4296	trochtova	314	/** Get number of characters in a string with size limit.
4055	trochtova	315	*
4296	trochtova	316	* @param str NULL-terminated string.
		317	* @param size Maximum number of bytes to consider.
		318	*
		319	* @return Number of characters in string.
		320	*
1314	cejka	321	*/
4537	trochtova	322	size_t str_nlength(const char *str, size_t size)
1314	cejka	323	{
4537	trochtova	324	size_t len = 0;
4296	trochtova	325	size_t offset = 0;
		326
		327	while (str_decode(str, &offset, size) != 0)
		328	len++;
		329
		330	return len;
		331	}
		332
		333	/** Get number of characters in a string with size limit.
		334	*
		335	* @param str NULL-terminated string.
		336	* @param size Maximum number of bytes to consider.
		337	*
		338	* @return Number of characters in string.
		339	*
		340	*/
4537	trochtova	341	size_t wstr_nlength(const wchar_t *str, size_t size)
4296	trochtova	342	{
4537	trochtova	343	size_t len = 0;
		344	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
		345	size_t offset = 0;
4296	trochtova	346
		347	while ((offset < limit) && (*str++ != 0)) {
		348	len++;
		349	offset += sizeof(wchar_t);
1314	cejka	350	}
4296	trochtova	351
		352	return len;
		353	}
1314	cejka	354
4296	trochtova	355	/** Check whether character is plain ASCII.
		356	*
		357	* @return True if character is plain ASCII.
		358	*
		359	*/
		360	bool ascii_check(wchar_t ch)
		361	{
		362	if ((ch >= 0) && (ch <= 127))
		363	return true;
		364
		365	return false;
		366	}
		367
		368	/** Check whether character is valid
		369	*
		370	* @return True if character is a valid Unicode code point.
		371	*
		372	*/
		373	bool chr_check(wchar_t ch)
		374	{
		375	if ((ch >= 0) && (ch <= 1114111))
		376	return true;
		377
		378	return false;
		379	}
		380
		381	/** Compare two NULL terminated strings.
		382	*
		383	* Do a char-by-char comparison of two NULL-terminated strings.
		384	* The strings are considered equal iff they consist of the same
		385	* characters on the minimum of their lengths.
		386	*
		387	* @param s1 First string to compare.
		388	* @param s2 Second string to compare.
		389	*
		390	* @return 0 if the strings are equal, -1 if first is smaller,
		391	* 1 if second smaller.
		392	*
		393	*/
		394	int str_cmp(const char s1, const char s2)
		395	{
		396	wchar_t c1 = 0;
		397	wchar_t c2 = 0;
		398
		399	size_t off1 = 0;
		400	size_t off2 = 0;
		401
		402	while (true) {
		403	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
		404	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
		405
		406	if (c1 < c2)
		407	return -1;
		408
		409	if (c1 > c2)
		410	return 1;
		411
		412	if (c1 == 0 \|\| c2 == 0)
		413	break;
		414	}
		415
		416	return 0;
		417	}
		418
		419	/** Compare two NULL terminated strings with length limit.
		420	*
		421	* Do a char-by-char comparison of two NULL-terminated strings.
		422	* The strings are considered equal iff they consist of the same
		423	* characters on the minimum of their lengths and the length limit.
		424	*
		425	* @param s1 First string to compare.
		426	* @param s2 Second string to compare.
		427	* @param max_len Maximum number of characters to consider.
		428	*
		429	* @return 0 if the strings are equal, -1 if first is smaller,
		430	* 1 if second smaller.
		431	*
		432	*/
4537	trochtova	433	int str_lcmp(const char s1, const char s2, size_t max_len)
4296	trochtova	434	{
		435	wchar_t c1 = 0;
		436	wchar_t c2 = 0;
		437
		438	size_t off1 = 0;
		439	size_t off2 = 0;
		440
4537	trochtova	441	size_t len = 0;
4296	trochtova	442
		443	while (true) {
		444	if (len >= max_len)
		445	break;
		446
		447	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
		448	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
		449
		450	if (c1 < c2)
		451	return -1;
		452
		453	if (c1 > c2)
		454	return 1;
		455
		456	if (c1 == 0 \|\| c2 == 0)
		457	break;
		458
		459	++len;
		460	}
		461
		462	return 0;
		463
		464	}
		465
		466	/** Copy string.
		467	*
		468	* Copy source string @a src to destination buffer @a dest.
		469	* No more than @a size bytes are written. If the size of the output buffer
		470	* is at least one byte, the output string will always be well-formed, i.e.
		471	* null-terminated and containing only complete characters.
		472	*
		473	* @param dst Destination buffer.
		474	* @param count Size of the destination buffer (must be > 0).
		475	* @param src Source string.
		476	*/
		477	void str_cpy(char dest, size_t size, const char src)
		478	{
		479	wchar_t ch;
		480	size_t src_off;
		481	size_t dest_off;
		482
		483	/* There must be space for a null terminator in the buffer. */
		484	assert(size > 0);
		485
		486	src_off = 0;
		487	dest_off = 0;
		488
		489	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
		490	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
		491	break;
		492	}
		493
		494	dest[dest_off] = '\0';
		495	}
		496
		497	/** Copy size-limited substring.
		498	*
		499	* Copy prefix of string @a src of max. size @a size to destination buffer
		500	* @a dest. No more than @a size bytes are written. The output string will
		501	* always be well-formed, i.e. null-terminated and containing only complete
		502	* characters.
		503	*
		504	* No more than @a n bytes are read from the input string, so it does not
		505	* have to be null-terminated.
		506	*
		507	* @param dst Destination buffer.
		508	* @param count Size of the destination buffer (must be > 0).
		509	* @param src Source string.
		510	* @param n Maximum number of bytes to read from @a src.
		511	*/
		512	void str_ncpy(char dest, size_t size, const char src, size_t n)
		513	{
		514	wchar_t ch;
		515	size_t src_off;
		516	size_t dest_off;
		517
		518	/* There must be space for a null terminator in the buffer. */
		519	assert(size > 0);
		520
		521	src_off = 0;
		522	dest_off = 0;
		523
		524	while ((ch = str_decode(src, &src_off, n)) != 0) {
		525	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
		526	break;
		527	}
		528
		529	dest[dest_off] = '\0';
		530	}
		531
		532	/** Append one string to another.
		533	*
		534	* Append source string @a src to string in destination buffer @a dest.
		535	* Size of the destination buffer is @a dest. If the size of the output buffer
		536	* is at least one byte, the output string will always be well-formed, i.e.
		537	* null-terminated and containing only complete characters.
		538	*
		539	* @param dst Destination buffer.
		540	* @param count Size of the destination buffer.
		541	* @param src Source string.
		542	*/
		543	void str_append(char dest, size_t size, const char src)
		544	{
		545	size_t dstr_size;
		546
		547	dstr_size = str_size(dest);
		548	str_cpy(dest + dstr_size, size - dstr_size, src);
		549	}
		550
		551	/** Copy NULL-terminated wide string to string
		552	*
		553	* Copy source wide string @a src to destination buffer @a dst.
		554	* No more than @a size bytes are written. NULL-terminator is always
		555	* written after the last succesfully copied character (i.e. if the
		556	* destination buffer is has at least 1 byte, it will be always
		557	* NULL-terminated).
		558	*
		559	* @param src Source wide string.
		560	* @param dst Destination buffer.
		561	* @param count Size of the destination buffer.
		562	*
		563	*/
		564	void wstr_nstr(char dst, const wchar_t src, size_t size)
		565	{
		566	/* No space for the NULL-terminator in the buffer */
		567	if (size == 0)
		568	return;
		569
		570	wchar_t ch;
4537	trochtova	571	size_t src_idx = 0;
4296	trochtova	572	size_t dst_off = 0;
		573
		574	while ((ch = src[src_idx++]) != 0) {
		575	if (chr_encode(ch, dst, &dst_off, size) != EOK)
		576	break;
		577	}
		578
		579	if (dst_off >= size)
		580	dst[size - 1] = 0;
		581	else
		582	dst[dst_off] = 0;
		583	}
		584
		585	/** Find first occurence of character in string.
		586	*
		587	* @param str String to search.
		588	* @param ch Character to look for.
		589	*
		590	* @return Pointer to character in @a str or NULL if not found.
		591	*/
4688	trochtova	592	char str_chr(const char str, wchar_t ch)
4296	trochtova	593	{
		594	wchar_t acc;
		595	size_t off = 0;
4420	trochtova	596	size_t last = 0;
4296	trochtova	597
		598	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
		599	if (acc == ch)
4688	trochtova	600	return (char *) (str + last);
4420	trochtova	601	last = off;
4296	trochtova	602	}
		603
1314	cejka	604	return NULL;
		605	}
		606
4296	trochtova	607	/** Find last occurence of character in string.
4055	trochtova	608	*
4296	trochtova	609	* @param str String to search.
		610	* @param ch Character to look for.
		611	*
		612	* @return Pointer to character in @a str or NULL if not found.
1314	cejka	613	*/
4688	trochtova	614	char str_rchr(const char str, wchar_t ch)
1314	cejka	615	{
4296	trochtova	616	wchar_t acc;
		617	size_t off = 0;
4420	trochtova	618	size_t last = 0;
4537	trochtova	619	const char *res = NULL;
4420	trochtova	620
4296	trochtova	621	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
		622	if (acc == ch)
4420	trochtova	623	res = (str + last);
		624	last = off;
1314	cejka	625	}
4420	trochtova	626
4688	trochtova	627	return (char *) res;
1314	cejka	628	}
		629
4296	trochtova	630	/** Insert a wide character into a wide string.
		631	*
		632	* Insert a wide character into a wide string at position
		633	* @a pos. The characters after the position are shifted.
		634	*
		635	* @param str String to insert to.
		636	* @param ch Character to insert to.
		637	* @param pos Character index where to insert.
		638	@ @param max_pos Characters in the buffer.
		639	*
		640	* @return True if the insertion was sucessful, false if the position
		641	* is out of bounds.
		642	*
		643	*/
4537	trochtova	644	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
4296	trochtova	645	{
4537	trochtova	646	size_t len = wstr_length(str);
4296	trochtova	647
		648	if ((pos > len) \|\| (pos + 1 > max_pos))
		649	return false;
		650
4537	trochtova	651	size_t i;
4296	trochtova	652	for (i = len; i + 1 > pos; i--)
		653	str[i + 1] = str[i];
		654
		655	str[pos] = ch;
		656
		657	return true;
		658	}
		659
		660	/** Remove a wide character from a wide string.
		661	*
		662	* Remove a wide character from a wide string at position
		663	* @a pos. The characters after the position are shifted.
		664	*
		665	* @param str String to remove from.
		666	* @param pos Character index to remove.
		667	*
		668	* @return True if the removal was sucessful, false if the position
		669	* is out of bounds.
		670	*
		671	*/
4537	trochtova	672	bool wstr_remove(wchar_t *str, size_t pos)
4296	trochtova	673	{
4537	trochtova	674	size_t len = wstr_length(str);
4296	trochtova	675
		676	if (pos >= len)
		677	return false;
		678
4537	trochtova	679	size_t i;
4296	trochtova	680	for (i = pos + 1; i <= len; i++)
		681	str[i - 1] = str[i];
		682
		683	return true;
		684	}
		685
		686	int stricmp(const char a, const char b)
		687	{
		688	int c = 0;
		689
		690	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
		691	c++;
		692
		693	return (tolower(a[c]) - tolower(b[c]));
		694	}
		695
1314	cejka	696	/** Convert string to a number.
		697	* Core of strtol and strtoul functions.
4055	trochtova	698	*
		699	* @param nptr Pointer to string.
		700	* @param endptr If not NULL, function stores here pointer to the first
		701	* invalid character.
		702	* @param base Zero or number between 2 and 36 inclusive.
		703	* @param sgn It's set to 1 if minus found.
		704	* @return Result of conversion.
1314	cejka	705	*/
4055	trochtova	706	static unsigned long
		707	_strtoul(const char nptr, char endptr, int base, char sgn)
1314	cejka	708	{
		709	unsigned char c;
		710	unsigned long result = 0;
		711	unsigned long a, b;
		712	const char *str = nptr;
		713	const char *tmpptr;
		714
		715	while (isspace(*str))
		716	str++;
		717
		718	if (*str == '-') {
		719	*sgn = 1;
		720	++str;
		721	} else if (*str == '+')
		722	++str;
		723
		724	if (base) {
		725	if ((base == 1) \|\| (base > 36)) {
		726	/* FIXME: set errno to EINVAL */
		727	return 0;
		728	}
4055	trochtova	729	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
		730	(str[1] == 'X'))) {
1314	cejka	731	str += 2;
		732	}
		733	} else {
		734	base = 10;
		735
		736	if (*str == '0') {
		737	base = 8;
		738	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
		739	base = 16;
		740	str += 2;
		741	}
		742	}
		743	}
		744
		745	tmpptr = str;
		746
		747	while (*str) {
		748	c = *str;
4055	trochtova	749	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
		750	(c <= '9' ? c - '0' : 0xff)));
1314	cejka	751	if (c > base) {
		752	break;
		753	}
		754
		755	a = (result & 0xff) * base + c;
		756	b = (result >> 8) * base + (a >> 8);
		757
		758	if (b > (ULONG_MAX >> 8)) {
		759	/* overflow */
		760	/* FIXME: errno = ERANGE*/
		761	return ULONG_MAX;
		762	}
		763
		764	result = (b << 8) + (a & 0xff);
		765	++str;
		766	}
		767
		768	if (str == tmpptr) {
4055	trochtova	769	/*
		770	* No number was found => first invalid character is the first
		771	* character of the string.
		772	*/
1314	cejka	773	/* FIXME: set errno to EINVAL */
		774	str = nptr;
		775	result = 0;
		776	}
		777
		778	if (endptr)
1719	decky	779	endptr = (char ) str;
1314	cejka	780
		781	if (nptr == str) {
		782	/FIXME: errno = EINVAL/
		783	return 0;
		784	}
		785
		786	return result;
		787	}
		788
		789	/** Convert initial part of string to long int according to given base.
4055	trochtova	790	* The number may begin with an arbitrary number of whitespaces followed by
		791	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
		792	* inserted and the number will be taken as hexadecimal one. If the base is 0
		793	* and the number begin with a zero, number will be taken as octal one (as with
		794	* base 8). Otherwise the base 0 is taken as decimal.
		795	*
		796	* @param nptr Pointer to string.
		797	* @param endptr If not NULL, function stores here pointer to the first
		798	* invalid character.
		799	* @param base Zero or number between 2 and 36 inclusive.
		800	* @return Result of conversion.
1314	cejka	801	*/
		802	long int strtol(const char nptr, char *endptr, int base)
		803	{
		804	char sgn = 0;
		805	unsigned long number = 0;
		806
		807	number = _strtoul(nptr, endptr, base, &sgn);
		808
		809	if (number > LONG_MAX) {
1719	decky	810	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1314	cejka	811	/* FIXME: set 0 to errno */
		812	return number;
		813	}
		814	/* FIXME: set ERANGE to errno */
1719	decky	815	return (sgn ? LONG_MIN : LONG_MAX);
1314	cejka	816	}
		817
1719	decky	818	return (sgn ? -number : number);
1314	cejka	819	}
		820
		821
		822	/** Convert initial part of string to unsigned long according to given base.
4055	trochtova	823	* The number may begin with an arbitrary number of whitespaces followed by
		824	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
		825	* inserted and the number will be taken as hexadecimal one. If the base is 0
		826	* and the number begin with a zero, number will be taken as octal one (as with
		827	* base 8). Otherwise the base 0 is taken as decimal.
		828	*
		829	* @param nptr Pointer to string.
		830	* @param endptr If not NULL, function stores here pointer to the first
		831	* invalid character
		832	* @param base Zero or number between 2 and 36 inclusive.
		833	* @return Result of conversion.
1314	cejka	834	*/
		835	unsigned long strtoul(const char nptr, char *endptr, int base)
		836	{
		837	char sgn = 0;
		838	unsigned long number = 0;
		839
		840	number = _strtoul(nptr, endptr, base, &sgn);
		841
1719	decky	842	return (sgn ? -number : number);
1314	cejka	843	}
1472	palkovsky	844
4296	trochtova	845	char str_dup(const char src)
1472	palkovsky	846	{
4296	trochtova	847	size_t size = str_size(src);
		848	void *dest = malloc(size + 1);
1472	palkovsky	849
4296	trochtova	850	if (dest == NULL)
4055	trochtova	851	return (char *) NULL;
		852
4296	trochtova	853	return (char *) memcpy(dest, src, size + 1);
4055	trochtova	854	}
		855
		856	char strtok(char s, const char *delim)
		857	{
		858	static char *next;
		859
		860	return strtok_r(s, delim, &next);
		861	}
		862
		863	char strtok_r(char s, const char delim, char *next)
		864	{
		865	char start, end;
		866
		867	if (s == NULL)
		868	s = *next;
		869
		870	/* Skip over leading delimiters. */
4296	trochtova	871	while (s && (str_chr(delim, s) != NULL)) ++s;
4055	trochtova	872	start = s;
		873
		874	/* Skip over token characters. */
4296	trochtova	875	while (s && (str_chr(delim, s) == NULL)) ++s;
4055	trochtova	876	end = s;
		877	next = (s ? s + 1 : s);
		878
		879	if (start == end) {
		880	return NULL; /* No more tokens. */
		881	}
		882
		883	/* Overwrite delimiter with NULL terminator. */
		884	*end = '\0';
		885	return start;
		886	}
		887
1719	decky	888	/** @}
1653	cejka	889	*/

Subversion Repositories HelenOS

(root)/branches/dd/uspace/lib/libc/generic/string.c – Rev 4688