Browse Source

Replace utf8decode with our own thing

K. Lange 3 years ago
parent
commit
ca08d442d6
5 changed files with 52 additions and 64 deletions
  1. 3 3
      apps/bim.c
  2. 1 1
      apps/terminal-vga.c
  3. 1 1
      apps/terminal.c
  4. 47 0
      base/usr/include/toaru/decodeutf8.h
  5. 0 59
      base/usr/include/toaru/utf8decode.h

+ 3 - 3
apps/bim.c

@@ -49,13 +49,13 @@
 
 #ifdef __toaru__
 #include <sys/fswait.h>
-#include <toaru/utf8decode.h>
+#include <toaru/decodeutf8.h>
 #else
 #include <poll.h>
-#include "../base/usr/include/toaru/utf8decode.h"
+#include "../base/usr/include/toaru/decodeutf8.h"
 #endif
 
-#define BLOCK_SIZE 256
+#define BLOCK_SIZE 4096
 #define ENTER_KEY     '\n'
 #define BACKSPACE_KEY 0x08
 #define DELETE_KEY    0x7F

+ 1 - 1
apps/terminal-vga.c

@@ -25,7 +25,7 @@
 
 #include <wchar.h>
 
-#include <toaru/utf8decode.h>
+#include <toaru/decodeutf8.h>
 #include <toaru/kbd.h>
 #include <toaru/graphics.h>
 #include <toaru/termemu.h>

+ 1 - 1
apps/terminal.c

@@ -36,7 +36,7 @@
 
 #define TRACE_APP_NAME "terminal"
 #include <toaru/trace.h>
-#include <toaru/utf8decode.h>
+#include <toaru/decodeutf8.h>
 #include <toaru/yutani.h>
 #include <toaru/decorations.h>
 #include <toaru/graphics.h>

+ 47 - 0
base/usr/include/toaru/decodeutf8.h

@@ -0,0 +1,47 @@
+/*
+ * Stateful UTF-8 decoder
+ */
+
+#include <stdint.h>
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+
+static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
+	static int state_table[32] = {
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xxxxxxx */
+		1,1,1,1,1,1,1,1,                 /* 10xxxxxx */
+		2,2,2,2,                         /* 110xxxxx */
+		3,3,                             /* 1110xxxx */
+		4,                               /* 11110xxx */
+		1                                /* 11111xxx */
+	};
+
+	static int mask_bytes[32] = {
+		0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
+		0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
+		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+		0x1F,0x1F,0x1F,0x1F,
+		0x0F,0x0F,
+		0x07,
+		0x00
+	};
+
+	static int next[5] = {
+		0,
+		1,
+		0,
+		2,
+		3
+	};
+
+	if (*state == UTF8_ACCEPT) {
+		*codep = byte & mask_bytes[byte >> 3];
+		*state = state_table[byte >> 3];
+	} else if (*state > 0) {
+		*codep = (byte & 0x3F) | (*codep << 6);
+		*state = next[*state];
+	}
+	return *state;
+}

+ 0 - 59
base/usr/include/toaru/utf8decode.h

@@ -1,59 +0,0 @@
-/*
-
-   Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
-
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so,
-subject to the following conditions:
-
-   The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-#pragma once
-
-#include <stdint.h>
-
-#define UTF8_ACCEPT 0
-#define UTF8_REJECT 1
-
-static const uint8_t utf8d[] = {
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
-	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
-	8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
-	0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
-	0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
-	0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
-	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
-	1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
-	1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
-	1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
-};
-
-static inline uint32_t
-decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
-	uint32_t type = utf8d[byte];
-
-	*codep = (*state != UTF8_ACCEPT) ?
-		(byte & 0x3fu) | (*codep << 6) :
-		(0xff >> type) & (byte);
-
-	*state = utf8d[256 + *state*16 + type];
-	return *state;
-}
-