WebSVN – HelenOS – Blame – /trunk/kernel/generic/src/lib/string.c

Rev	Author	Line No.	Line
4011	svoboda	1	/*
		2	* Copyright (c) 2001-2004 Jakub Jermar
		3	* All rights reserved.
		4	*
		5	* Redistribution and use in source and binary forms, with or without
		6	* modification, are permitted provided that the following conditions
		7	* are met:
		8	*
		9	* - Redistributions of source code must retain the above copyright
		10	* notice, this list of conditions and the following disclaimer.
		11	* - Redistributions in binary form must reproduce the above copyright
		12	* notice, this list of conditions and the following disclaimer in the
		13	* documentation and/or other materials provided with the distribution.
		14	* - The name of the author may not be used to endorse or promote products
		15	* derived from this software without specific prior written permission.
		16	*
		17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
		18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
		19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
		20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
		21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
		22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
		23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
		24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
		25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
		26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
		27	*/
		28
4014	decky	29	/** @addtogroup generic
4011	svoboda	30	* @{
		31	*/
		32
		33	/**
		34	* @file
4014	decky	35	* @brief Miscellaneous functions.
4011	svoboda	36	*/
		37
		38	#include <string.h>
		39	#include <print.h>
		40	#include <cpu.h>
		41	#include <arch/asm.h>
		42	#include <arch.h>
		43	#include <console/kconsole.h>
		44
4179	decky	45	char invalch = '?';
		46
4198	svoboda	47	/** Byte mask consisting of lowest @n bits (out of eight). */
4196	svoboda	48	#define LO_MASK_8(n) ((uint8_t)((1 << (n)) - 1))
		49
4198	svoboda	50	/** Byte mask consisting of lowest @n bits (out of 32). */
		51	#define LO_MASK_32(n) ((uint32_t)((1 << (n)) - 1))
		52
		53	/** Byte mask consisting of highest @n bits (out of eight). */
		54	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
		55
4196	svoboda	56	/** Number of data bits in a UTF-8 continuation byte. */
		57	#define CONT_BITS 6
		58
4175	decky	59	/** Decode a single UTF-8 character from a NULL-terminated string.
		60	*
		61	* Decode a single UTF-8 character from a plain char NULL-terminated
4199	svoboda	62	* string. Decoding starts at @index and this index is moved to the
		63	* beginning of the next character. In case of decoding error,
		64	* index advances. However, index is never moved beyond (str+limit).
4175	decky	65	*
		66	* @param str Plain character NULL-terminated string.
		67	* @param index Index (counted in plain characters) where to start
		68	* the decoding.
4179	decky	69	* @param limit Maximal allowed value of index.
4175	decky	70	*
		71	* @return Decoded character in UTF-32 or '?' if the encoding is wrong.
		72	*
		73	*/
4179	decky	74	wchar_t utf8_decode(const char str, index_t index, index_t limit)
4175	decky	75	{
4196	svoboda	76	uint8_t b0, b; /* Bytes read from str. */
		77	wchar_t ch;
		78
		79	int b0_bits; /* Data bits in first byte. */
		80	int cbytes; /* Number of continuation bytes. */
		81
4199	svoboda	82	if (*index + 1 > limit)
4179	decky	83	return invalch;
4196	svoboda	84
4199	svoboda	85	b0 = (uint8_t) str[(*index)++];
4196	svoboda	86
		87	/* Determine code length. */
		88
		89	if ((b0 & 0x80) == 0) {
		90	/* 0xxxxxxx (Plain ASCII) */
		91	b0_bits = 7;
		92	cbytes = 0;
		93	} else if ((b0 & 0xe0) == 0xc0) {
		94	/* 110xxxxx 10xxxxxx */
		95	b0_bits = 5;
		96	cbytes = 1;
		97	} else if ((b0 & 0xf0) == 0xe0) {
		98	/* 1110xxxx 10xxxxxx 10xxxxxx */
		99	b0_bits = 4;
		100	cbytes = 2;
		101	} else if ((b0 & 0xf8) == 0xf0) {
		102	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
		103	b0_bits = 3;
		104	cbytes = 3;
		105	} else {
		106	/* 10xxxxxx -- unexpected continuation byte. */
		107	return invalch;
4179	decky	108	}
4196	svoboda	109
		110	if (*index + cbytes > limit) {
		111	return invalch;
4179	decky	112	}
4196	svoboda	113
		114	ch = b0 & LO_MASK_8(b0_bits);
		115
		116	/* Decode continuation bytes. */
		117	while (cbytes > 0) {
4199	svoboda	118	b = (uint8_t) str[(*index)++];
4196	svoboda	119
		120	/* Must be 10xxxxxx. */
		121	if ((b & 0xc0) != 0x80) {
4179	decky	122	return invalch;
4196	svoboda	123	}
		124
		125	/* Shift data bits to ch. */
		126	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
		127	--cbytes;
4179	decky	128	}
4196	svoboda	129
		130	return ch;
4175	decky	131	}
		132
4179	decky	133	/** Encode a single UTF-32 character as UTF-8
4011	svoboda	134	*
4179	decky	135	* Encode a single UTF-32 character as UTF-8 and store it into
		136	* the given buffer at @index. Encoding starts at @index and
4199	svoboda	137	* this index is moved at the position where the next character
		138	* can be written to.
4011	svoboda	139	*
4179	decky	140	* @param ch Input UTF-32 character.
		141	* @param str Output buffer.
		142	* @param index Index (counted in plain characters) where to start
		143	* the encoding
		144	* @param limit Maximal allowed value of index.
		145	*
		146	* @return True if the character was encoded or false if there is not
		147	* enought space in the output buffer or the character is invalid
		148	* Unicode code point.
		149	*
		150	*/
		151	bool utf8_encode(const wchar_t ch, char str, index_t index, index_t limit)
		152	{
4198	svoboda	153	uint32_t cc; /* Unsigned version of ch. */
		154
		155	int cbytes; /* Number of continuation bytes. */
		156	int b0_bits; /* Number of data bits in first byte. */
		157	int i;
		158
4199	svoboda	159	if (*index >= limit)
4179	decky	160	return false;
4198	svoboda	161
		162	if (ch < 0)
		163	return false;
		164
		165	/* Bit operations should only be done on unsigned numbers. */
		166	cc = (uint32_t) ch;
		167
		168	/* Determine how many continuation bytes are needed. */
		169	if ((cc & ~LO_MASK_32(7)) == 0) {
		170	b0_bits = 7;
		171	cbytes = 0;
		172	} else if ((cc & ~LO_MASK_32(11)) == 0) {
		173	b0_bits = 5;
		174	cbytes = 1;
		175	} else if ((cc & ~LO_MASK_32(16)) == 0) {
		176	b0_bits = 4;
		177	cbytes = 2;
		178	} else if ((cc & ~LO_MASK_32(21)) == 0) {
		179	b0_bits = 3;
		180	cbytes = 3;
		181	} else {
		182	/* Codes longer than 21 bits are not supported. */
		183	return false;
4179	decky	184	}
4198	svoboda	185
		186	/* Check for available space in buffer. */
4199	svoboda	187	if (*index + cbytes >= limit)
4198	svoboda	188	return false;
		189
		190	/* Encode continuation bytes. */
		191	for (i = cbytes; i > 0; --i) {
		192	str[*index + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
		193	cc = cc >> CONT_BITS;
4179	decky	194	}
4198	svoboda	195
		196	/* Encode first byte. */
		197	str[*index] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
		198
		199	/* Advance index. */
4199	svoboda	200	*index += (1 + cbytes);
4179	decky	201
4198	svoboda	202	return true;
4179	decky	203	}
		204
		205	/** Get bytes used by UTF-8 characters.
		206	*
		207	* Get the number of bytes (count of plain characters) which
		208	* are used by a given count of UTF-8 characters in a string.
		209	* As UTF-8 encoding is multibyte, there is no constant
		210	* correspondence between number of characters and used bytes.
		211	*
		212	* @param str UTF-8 string to consider.
		213	* @param count Number of UTF-8 characters to count.
		214	*
		215	* @return Number of bytes used by the characters.
		216	*
		217	*/
		218	size_t utf8_count_bytes(const char *str, count_t count)
		219	{
		220	size_t size = 0;
		221	index_t index = 0;
4199	svoboda	222	index_t iprev;
		223	wchar_t ch;
4179	decky	224
4199	svoboda	225	while (true) {
		226	iprev = index;
		227	if (size >= count)
		228	break;
		229	ch = utf8_decode(str, &index, UTF8_NO_LIMIT);
		230	if (ch == '\0') break;
		231
4179	decky	232	size++;
		233	}
		234
4199	svoboda	235	return iprev;
4179	decky	236	}
		237
		238	/** Check whether character is plain ASCII.
		239	*
		240	* @return True if character is plain ASCII.
		241	*
		242	*/
		243	bool ascii_check(const wchar_t ch)
		244	{
		245	if ((ch >= 0) && (ch <= 127))
		246	return true;
		247
		248	return false;
		249	}
		250
		251	/** Check whether character is Unicode.
		252	*
		253	* @return True if character is valid Unicode code point.
		254	*
		255	*/
		256	bool unicode_check(const wchar_t ch)
		257	{
		258	if ((ch >= 0) && (ch <= 1114111))
		259	return true;
		260
		261	return false;
		262	}
		263
		264	/** Return number of plain characters in a string.
		265	*
		266	* @param str NULL-terminated string.
		267	*
4011	svoboda	268	* @return Number of characters in str.
4014	decky	269	*
4011	svoboda	270	*/
		271	size_t strlen(const char *str)
		272	{
4179	decky	273	size_t size;
		274	for (size = 0; str[size]; size++);
4011	svoboda	275
4179	decky	276	return size;
		277	}
		278
		279	/** Return number of UTF-8 characters in a string.
		280	*
		281	* @param str NULL-terminated UTF-8 string.
		282	*
		283	* @return Number of UTF-8 characters in str.
		284	*
		285	*/
		286	size_t strlen_utf8(const char *str)
		287	{
		288	size_t size = 0;
		289	index_t index = 0;
4011	svoboda	290
4179	decky	291	while (utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) {
		292	size++;
		293	}
		294
		295	return size;
4011	svoboda	296	}
		297
4179	decky	298	/** Return number of UTF-32 characters in a string.
		299	*
		300	* @param str NULL-terminated UTF-32 string.
		301	*
		302	* @return Number of UTF-32 characters in str.
		303	*
		304	*/
		305	size_t strlen_utf32(const wchar_t *str)
		306	{
		307	size_t size;
		308	for (size = 0; str[size]; size++);
		309
		310	return size;
		311	}
		312
4011	svoboda	313	/** Compare two NULL terminated strings
		314	*
		315	* Do a char-by-char comparison of two NULL terminated strings.
		316	* The strings are considered equal iff they consist of the same
		317	* characters on the minimum of their lengths.
		318	*
		319	* @param src First string to compare.
		320	* @param dst Second string to compare.
		321	*
		322	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
		323	*
		324	*/
		325	int strcmp(const char src, const char dst)
		326	{
		327	for (; src && dst; src++, dst++) {
		328	if (src < dst)
		329	return -1;
		330	if (src > dst)
		331	return 1;
		332	}
		333	if (src == dst)
		334	return 0;
4014	decky	335
4011	svoboda	336	if (!*src)
		337	return -1;
4014	decky	338
4011	svoboda	339	return 1;
		340	}
		341
		342
		343	/** Compare two NULL terminated strings
		344	*
		345	* Do a char-by-char comparison of two NULL terminated strings.
		346	* The strings are considered equal iff they consist of the same
		347	* characters on the minimum of their lengths and specified maximal
		348	* length.
		349	*
		350	* @param src First string to compare.
		351	* @param dst Second string to compare.
		352	* @param len Maximal length for comparison.
		353	*
		354	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
		355	*
		356	*/
		357	int strncmp(const char src, const char dst, size_t len)
		358	{
		359	unsigned int i;
		360
		361	for (i = 0; (src) && (dst) && (i < len); src++, dst++, i++) {
		362	if (src < dst)
		363	return -1;
4014	decky	364
4011	svoboda	365	if (src > dst)
		366	return 1;
		367	}
4014	decky	368
4011	svoboda	369	if (i == len \|\| src == dst)
		370	return 0;
4014	decky	371
4011	svoboda	372	if (!*src)
		373	return -1;
4014	decky	374
4011	svoboda	375	return 1;
		376	}
		377
		378
		379
		380	/** Copy NULL terminated string.
		381	*
		382	* Copy at most 'len' characters from string 'src' to 'dest'.
		383	* If 'src' is shorter than 'len', '\0' is inserted behind the
		384	* last copied character.
		385	*
4014	decky	386	* @param src Source string.
4011	svoboda	387	* @param dest Destination buffer.
4014	decky	388	* @param len Size of destination buffer.
		389	*
4011	svoboda	390	*/
		391	void strncpy(char dest, const char src, size_t len)
		392	{
		393	unsigned int i;
4014	decky	394
4011	svoboda	395	for (i = 0; i < len; i++) {
		396	if (!(dest[i] = src[i]))
		397	return;
		398	}
4014	decky	399
4011	svoboda	400	dest[i - 1] = '\0';
		401	}
		402
4012	svoboda	403	/** Find first occurence of character in string.
		404	*
4014	decky	405	* @param s String to search.
		406	* @param i Character to look for.
4012	svoboda	407	*
4014	decky	408	* @return Pointer to character in @a s or NULL if not found.
4012	svoboda	409	*/
		410	extern char strchr(const char s, int i)
		411	{
		412	while (*s != '\0') {
4014	decky	413	if (*s == i)
		414	return (char *) s;
4012	svoboda	415	++s;
		416	}
4014	decky	417
4012	svoboda	418	return NULL;
		419	}
		420
4011	svoboda	421	/** @}
		422	*/

Subversion Repositories HelenOS

(root)/trunk/kernel/generic/src/lib/string.c – Rev 4199