WebSVN – HelenOS – Blame – /trunk/kernel/generic/src/lib/string.c

Rev	Author	Line No.	Line
4011	svoboda	1	/*
		2	* Copyright (c) 2001-2004 Jakub Jermar
		3	* All rights reserved.
		4	*
		5	* Redistribution and use in source and binary forms, with or without
		6	* modification, are permitted provided that the following conditions
		7	* are met:
		8	*
		9	* - Redistributions of source code must retain the above copyright
		10	* notice, this list of conditions and the following disclaimer.
		11	* - Redistributions in binary form must reproduce the above copyright
		12	* notice, this list of conditions and the following disclaimer in the
		13	* documentation and/or other materials provided with the distribution.
		14	* - The name of the author may not be used to endorse or promote products
		15	* derived from this software without specific prior written permission.
		16	*
		17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
		18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
		19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
		20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
		21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
		22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
		23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
		24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
		25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
		26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
		27	*/
		28
4014	decky	29	/** @addtogroup generic
4011	svoboda	30	* @{
		31	*/
		32
		33	/**
		34	* @file
4014	decky	35	* @brief Miscellaneous functions.
4011	svoboda	36	*/
		37
		38	#include <string.h>
		39	#include <print.h>
		40	#include <cpu.h>
		41	#include <arch/asm.h>
		42	#include <arch.h>
		43	#include <console/kconsole.h>
		44
4179	decky	45	char invalch = '?';
		46
4196	svoboda	47	/** Byte mask consisting of bits 0 - (@n - 1) */
		48	#define LO_MASK_8(n) ((uint8_t)((1 << (n)) - 1))
		49
		50	/** Number of data bits in a UTF-8 continuation byte. */
		51	#define CONT_BITS 6
		52
4175	decky	53	/** Decode a single UTF-8 character from a NULL-terminated string.
		54	*
		55	* Decode a single UTF-8 character from a plain char NULL-terminated
		56	* string. Decoding starts at @index and this index is incremented
		57	* if the current UTF-8 string is encoded in more than a single byte.
		58	*
		59	* @param str Plain character NULL-terminated string.
		60	* @param index Index (counted in plain characters) where to start
		61	* the decoding.
4179	decky	62	* @param limit Maximal allowed value of index.
4175	decky	63	*
		64	* @return Decoded character in UTF-32 or '?' if the encoding is wrong.
		65	*
		66	*/
4179	decky	67	wchar_t utf8_decode(const char str, index_t index, index_t limit)
4175	decky	68	{
4196	svoboda	69	uint8_t b0, b; /* Bytes read from str. */
		70	wchar_t ch;
		71
		72	int b0_bits; /* Data bits in first byte. */
		73	int cbytes; /* Number of continuation bytes. */
		74
4179	decky	75	if (*index > limit)
		76	return invalch;
4196	svoboda	77
		78	b0 = (uint8_t) str[*index];
		79
		80	/* Determine code length. */
		81
		82	if ((b0 & 0x80) == 0) {
		83	/* 0xxxxxxx (Plain ASCII) */
		84	b0_bits = 7;
		85	cbytes = 0;
		86	} else if ((b0 & 0xe0) == 0xc0) {
		87	/* 110xxxxx 10xxxxxx */
		88	b0_bits = 5;
		89	cbytes = 1;
		90	} else if ((b0 & 0xf0) == 0xe0) {
		91	/* 1110xxxx 10xxxxxx 10xxxxxx */
		92	b0_bits = 4;
		93	cbytes = 2;
		94	} else if ((b0 & 0xf8) == 0xf0) {
		95	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
		96	b0_bits = 3;
		97	cbytes = 3;
		98	} else {
		99	/* 10xxxxxx -- unexpected continuation byte. */
		100	return invalch;
4179	decky	101	}
4196	svoboda	102
		103	if (*index + cbytes > limit) {
		104	return invalch;
4179	decky	105	}
4196	svoboda	106
		107	ch = b0 & LO_MASK_8(b0_bits);
		108
		109	/* Decode continuation bytes. */
		110	while (cbytes > 0) {
		111	b = (uint8_t) str[*index + 1];
		112	++(*index);
		113
		114	/* Must be 10xxxxxx. */
		115	if ((b & 0xc0) != 0x80) {
4179	decky	116	return invalch;
4196	svoboda	117	}
		118
		119	/* Shift data bits to ch. */
		120	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
		121	--cbytes;
4179	decky	122	}
4196	svoboda	123
		124	return ch;
4175	decky	125	}
		126
4179	decky	127	/** Encode a single UTF-32 character as UTF-8
4011	svoboda	128	*
4179	decky	129	* Encode a single UTF-32 character as UTF-8 and store it into
		130	* the given buffer at @index. Encoding starts at @index and
		131	* this index is incremented if the UTF-8 character takes
		132	* more than a single byte.
4011	svoboda	133	*
4179	decky	134	* @param ch Input UTF-32 character.
		135	* @param str Output buffer.
		136	* @param index Index (counted in plain characters) where to start
		137	* the encoding
		138	* @param limit Maximal allowed value of index.
		139	*
		140	* @return True if the character was encoded or false if there is not
		141	* enought space in the output buffer or the character is invalid
		142	* Unicode code point.
		143	*
		144	*/
		145	bool utf8_encode(const wchar_t ch, char str, index_t index, index_t limit)
		146	{
		147	if (*index > limit)
		148	return false;
		149
		150	if ((ch >= 0) && (ch <= 127)) {
		151	/* Plain ASCII (code points 0 .. 127) */
		152	str[*index] = ch & 0x7f;
		153	return true;
		154	}
		155
		156	if ((ch >= 128) && (ch <= 2047)) {
		157	/* Code points 128 .. 2047 */
		158	if (*index + 1 > limit)
		159	return false;
		160
		161	str[*index] = 0xc0 \| ((ch >> 6) & 0x1f);
		162	(*index)++;
		163	str[*index] = 0x80 \| (ch & 0x3f);
		164	return true;
		165	}
		166
		167	if ((ch >= 2048) && (ch <= 65535)) {
		168	/* Code points 2048 .. 65535 */
		169	if (*index + 2 > limit)
		170	return false;
		171
		172	str[*index] = 0xe0 \| ((ch >> 12) & 0x0f);
		173	(*index)++;
		174	str[*index] = 0x80 \| ((ch >> 6) & 0x3f);
		175	(*index)++;
		176	str[*index] = 0x80 \| (ch & 0x3f);
		177	return true;
		178	}
		179
		180	if ((ch >= 65536) && (ch <= 1114111)) {
		181	/* Code points 65536 .. 1114111 */
		182	if (*index + 3 > limit)
		183	return false;
		184
		185	str[*index] = 0xf0 \| ((ch >> 18) & 0x07);
		186	(*index)++;
		187	str[*index] = 0x80 \| ((ch >> 12) & 0x3f);
		188	(*index)++;
		189	str[*index] = 0x80 \| ((ch >> 6) & 0x3f);
		190	(*index)++;
		191	str[*index] = 0x80 \| (ch & 0x3f);
		192	return true;
		193	}
		194
		195	return false;
		196	}
		197
		198	/** Get bytes used by UTF-8 characters.
		199	*
		200	* Get the number of bytes (count of plain characters) which
		201	* are used by a given count of UTF-8 characters in a string.
		202	* As UTF-8 encoding is multibyte, there is no constant
		203	* correspondence between number of characters and used bytes.
		204	*
		205	* @param str UTF-8 string to consider.
		206	* @param count Number of UTF-8 characters to count.
		207	*
		208	* @return Number of bytes used by the characters.
		209	*
		210	*/
		211	size_t utf8_count_bytes(const char *str, count_t count)
		212	{
		213	size_t size = 0;
		214	index_t index = 0;
		215
		216	while ((utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) && (size < count)) {
		217	size++;
		218	index++;
		219	}
		220
		221	return index;
		222	}
		223
		224	/** Check whether character is plain ASCII.
		225	*
		226	* @return True if character is plain ASCII.
		227	*
		228	*/
		229	bool ascii_check(const wchar_t ch)
		230	{
		231	if ((ch >= 0) && (ch <= 127))
		232	return true;
		233
		234	return false;
		235	}
		236
		237	/** Check whether character is Unicode.
		238	*
		239	* @return True if character is valid Unicode code point.
		240	*
		241	*/
		242	bool unicode_check(const wchar_t ch)
		243	{
		244	if ((ch >= 0) && (ch <= 1114111))
		245	return true;
		246
		247	return false;
		248	}
		249
		250	/** Return number of plain characters in a string.
		251	*
		252	* @param str NULL-terminated string.
		253	*
4011	svoboda	254	* @return Number of characters in str.
4014	decky	255	*
4011	svoboda	256	*/
		257	size_t strlen(const char *str)
		258	{
4179	decky	259	size_t size;
		260	for (size = 0; str[size]; size++);
4011	svoboda	261
4179	decky	262	return size;
		263	}
		264
		265	/** Return number of UTF-8 characters in a string.
		266	*
		267	* @param str NULL-terminated UTF-8 string.
		268	*
		269	* @return Number of UTF-8 characters in str.
		270	*
		271	*/
		272	size_t strlen_utf8(const char *str)
		273	{
		274	size_t size = 0;
		275	index_t index = 0;
4011	svoboda	276
4179	decky	277	while (utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) {
		278	size++;
		279	index++;
		280	}
		281
		282	return size;
4011	svoboda	283	}
		284
4179	decky	285	/** Return number of UTF-32 characters in a string.
		286	*
		287	* @param str NULL-terminated UTF-32 string.
		288	*
		289	* @return Number of UTF-32 characters in str.
		290	*
		291	*/
		292	size_t strlen_utf32(const wchar_t *str)
		293	{
		294	size_t size;
		295	for (size = 0; str[size]; size++);
		296
		297	return size;
		298	}
		299
4011	svoboda	300	/** Compare two NULL terminated strings
		301	*
		302	* Do a char-by-char comparison of two NULL terminated strings.
		303	* The strings are considered equal iff they consist of the same
		304	* characters on the minimum of their lengths.
		305	*
		306	* @param src First string to compare.
		307	* @param dst Second string to compare.
		308	*
		309	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
		310	*
		311	*/
		312	int strcmp(const char src, const char dst)
		313	{
		314	for (; src && dst; src++, dst++) {
		315	if (src < dst)
		316	return -1;
		317	if (src > dst)
		318	return 1;
		319	}
		320	if (src == dst)
		321	return 0;
4014	decky	322
4011	svoboda	323	if (!*src)
		324	return -1;
4014	decky	325
4011	svoboda	326	return 1;
		327	}
		328
		329
		330	/** Compare two NULL terminated strings
		331	*
		332	* Do a char-by-char comparison of two NULL terminated strings.
		333	* The strings are considered equal iff they consist of the same
		334	* characters on the minimum of their lengths and specified maximal
		335	* length.
		336	*
		337	* @param src First string to compare.
		338	* @param dst Second string to compare.
		339	* @param len Maximal length for comparison.
		340	*
		341	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
		342	*
		343	*/
		344	int strncmp(const char src, const char dst, size_t len)
		345	{
		346	unsigned int i;
		347
		348	for (i = 0; (src) && (dst) && (i < len); src++, dst++, i++) {
		349	if (src < dst)
		350	return -1;
4014	decky	351
4011	svoboda	352	if (src > dst)
		353	return 1;
		354	}
4014	decky	355
4011	svoboda	356	if (i == len \|\| src == dst)
		357	return 0;
4014	decky	358
4011	svoboda	359	if (!*src)
		360	return -1;
4014	decky	361
4011	svoboda	362	return 1;
		363	}
		364
		365
		366
		367	/** Copy NULL terminated string.
		368	*
		369	* Copy at most 'len' characters from string 'src' to 'dest'.
		370	* If 'src' is shorter than 'len', '\0' is inserted behind the
		371	* last copied character.
		372	*
4014	decky	373	* @param src Source string.
4011	svoboda	374	* @param dest Destination buffer.
4014	decky	375	* @param len Size of destination buffer.
		376	*
4011	svoboda	377	*/
		378	void strncpy(char dest, const char src, size_t len)
		379	{
		380	unsigned int i;
4014	decky	381
4011	svoboda	382	for (i = 0; i < len; i++) {
		383	if (!(dest[i] = src[i]))
		384	return;
		385	}
4014	decky	386
4011	svoboda	387	dest[i - 1] = '\0';
		388	}
		389
4012	svoboda	390	/** Find first occurence of character in string.
		391	*
4014	decky	392	* @param s String to search.
		393	* @param i Character to look for.
4012	svoboda	394	*
4014	decky	395	* @return Pointer to character in @a s or NULL if not found.
4012	svoboda	396	*/
		397	extern char strchr(const char s, int i)
		398	{
		399	while (*s != '\0') {
4014	decky	400	if (*s == i)
		401	return (char *) s;
4012	svoboda	402	++s;
		403	}
4014	decky	404
4012	svoboda	405	return NULL;
		406	}
		407
4011	svoboda	408	/** @}
		409	*/

Subversion Repositories HelenOS

(root)/trunk/kernel/generic/src/lib/string.c – Rev 4196