Browse Source

libc: working wcstombs and mbstowcs

K. Lange 4 years ago
parent
commit
3d780117c3
5 changed files with 107 additions and 30 deletions
  1. 18 0
      apps/t_mbstowcs.c
  2. 3 0
      base/usr/include/stdlib.h
  3. 0 3
      base/usr/include/wchar.h
  4. 86 0
      libc/stdlib/mbstowcs.c
  5. 0 27
      libc/wchar/wcs.c

+ 18 - 0
apps/t_mbstowcs.c

@@ -0,0 +1,18 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+int main(int argc, char * argv[]) {
+	size_t req = mbstowcs(NULL, argv[1], 0);
+	wchar_t * dest = malloc(sizeof(wchar_t) * req);
+	mbstowcs(dest, argv[1], req+1);
+
+	for (size_t i = 0; i < req; ++i) {
+		char tmp[8];
+		wchar_t in[] = {dest[i], L'\0'};
+		wcstombs(tmp, in, 8);
+		fprintf(stdout, "U+%4x %s\n", dest[i], tmp);
+	}
+
+	return 0;
+}

+ 3 - 0
base/usr/include/stdlib.h

@@ -50,3 +50,6 @@ extern void *bsearch(const void *key, const void *base, size_t nmemb, size_t siz
 	int (*compar)(const void *, const void *));
 
 extern char * mktemp(char * template);
+
+extern size_t mbstowcs(wchar_t *dest, const char *src, size_t n);
+extern size_t wcstombs(char * dest, const wchar_t *src, size_t n);

+ 0 - 3
base/usr/include/wchar.h

@@ -14,7 +14,4 @@ extern wchar_t * wcschr(const wchar_t *wcs, wchar_t wc);
 extern wchar_t * wcsrchr(const wchar_t *wcs, wchar_t wc);
 extern wchar_t * wcsncat(wchar_t *dest, const wchar_t * src, size_t n);
 
-/* TODO */
-extern size_t wcstombs(char * dest, const wchar_t *src, size_t n);
-
 typedef unsigned int wint_t;

+ 86 - 0
libc/stdlib/mbstowcs.c

@@ -0,0 +1,86 @@
+#include <stdlib.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <toaru/decodeutf8.h>
+
+static int to_eight(uint32_t codepoint, char * out) {
+	memset(out, 0x00, 7);
+
+	if (codepoint < 0x0080) {
+		out[0] = (char)codepoint;
+	} else if (codepoint < 0x0800) {
+		out[0] = 0xC0 | (codepoint >> 6);
+		out[1] = 0x80 | (codepoint & 0x3F);
+	} else if (codepoint < 0x10000) {
+		out[0] = 0xE0 | (codepoint >> 12);
+		out[1] = 0x80 | ((codepoint >> 6) & 0x3F);
+		out[2] = 0x80 | (codepoint & 0x3F);
+	} else if (codepoint < 0x200000) {
+		out[0] = 0xF0 | (codepoint >> 18);
+		out[1] = 0x80 | ((codepoint >> 12) & 0x3F);
+		out[2] = 0x80 | ((codepoint >> 6) & 0x3F);
+		out[3] = 0x80 | ((codepoint) & 0x3F);
+	} else if (codepoint < 0x4000000) {
+		out[0] = 0xF8 | (codepoint >> 24);
+		out[1] = 0x80 | (codepoint >> 18);
+		out[2] = 0x80 | ((codepoint >> 12) & 0x3F);
+		out[3] = 0x80 | ((codepoint >> 6) & 0x3F);
+		out[4] = 0x80 | ((codepoint) & 0x3F);
+	} else {
+		out[0] = 0xF8 | (codepoint >> 30);
+		out[1] = 0x80 | ((codepoint >> 24) & 0x3F);
+		out[2] = 0x80 | ((codepoint >> 18) & 0x3F);
+		out[3] = 0x80 | ((codepoint >> 12) & 0x3F);
+		out[4] = 0x80 | ((codepoint >> 6) & 0x3F);
+		out[5] = 0x80 | ((codepoint) & 0x3F);
+	}
+
+	return strlen(out);
+}
+
+size_t mbstowcs(wchar_t *dest, const char *src, size_t n) {
+	size_t count = 0;
+	uint32_t state = 0;
+	uint32_t codepoint = 0;
+
+	while ((!dest || count < n) && *src) {
+		if (!decode(&state, &codepoint, *(unsigned char *)src)) {
+			if (dest) {
+				dest[count] = codepoint;
+			}
+			count++;
+			codepoint = 0;
+		} else if (state == UTF8_REJECT) {
+			return (size_t)-1;
+		}
+		src++;
+	}
+
+	if (dest && !*src && count < n) {
+		dest[count] = L'\0';
+	}
+
+	return count;
+}
+
+size_t wcstombs(char * dest, const wchar_t *src, size_t n) {
+	size_t count = 0;
+
+	while ((!dest || count < n) && *src) {
+		char tmp[7];
+		int size = to_eight(*src, tmp);
+		if (count + size > n) return n;
+		memcpy(&dest[count], tmp, size);
+		count += size;
+		src++;
+	}
+
+	if (dest && !*src && count < n) {
+		dest[count] = '\0';
+	}
+
+	return count;
+}
+

+ 0 - 27
libc/wchar/wcs.c

@@ -1,27 +0,0 @@
-#include <wchar.h>
-
-size_t wcstombs(char * dest, const wchar_t *src, size_t n) {
-	/* TODO */
-	size_t c = 0;
-	while (c < n && *src) {
-		*dest = *src;
-		c++;
-		src++;
-		dest++;
-	}
-	*dest = 0;
-	return c;
-}
-
-size_t mbstowcs(wchar_t * dest, const char *src, size_t n) {
-	/* TODO */
-	size_t c = 0;
-	while (c < n && *src) {
-		*dest = *src;
-		c++;
-		src++;
-		dest++;
-	}
-	*dest = 0;
-	return c;
-}