Use utf8 helper functions for keyboard input

ttgetc() used some homebrew utf8 to unicode translation, limited to just the normal latin1 characters. Use the utf8 helper functions to get it right for the more complex cases. NOTE! We don't actually handle characters > 0xff right anyway. And we still end up doing Latin1 in the buffers on input. One small step at a time. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-07-10 17:36:30 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-07-10 17:36:30 -0700
commit: ec6f4f36ec679a9434c71915f22c45b165cd227a (patch)
tree: b5c8f2c3bd2908446be767c23b0ae9dee48a7563
parent: 6b793211c2aec69115dd2769892be0524801f7d8 (diff)
download: uemacs-ec6f4f36ec679a9434c71915f22c45b165cd227a.tar.gz
1 files changed, 17 insertions, 49 deletions
diff --git a/posix.c b/posix.c
index 445724e..e6ebc83 100644
--- a/posix.c
+++ b/posix.c
@@ -150,10 +150,10 @@ void ttflush(void)
  */
 int ttgetc(void)
 {
-	static unsigned char buffer[32];
+	static char buffer[32];
 	static int pending;
-	unsigned char c, second;
-	int count;
+	unicode_t c;
+	int count, bytes = 1;
 
 	count = pending;
 	if (!count) {
@@ -163,7 +163,7 @@ int ttgetc(void)
 		pending = count;
 	}
 
-	c = buffer[0];
+	c = (unsigned char) buffer[0];
 	if (c >= 32 && c < 128)
 		goto done;
 
@@ -181,56 +181,24 @@ int ttgetc(void)
 		ntermios.c_cc[VTIME] = 0;
 		tcsetattr(0, TCSANOW, &ntermios);
 
-		if (n <= 0)
-			goto done;
-		pending += n;
+		if (n > 0)
+			pending += n;
 	}
-	second = buffer[1];
+	if (pending > 1) {
+		unsigned char second = buffer[1];
 
-	/* Turn ESC+'[' into CSI */
-	if (c == 27 && second == '[') {
-		pending -= 2;
-		memmove(buffer, buffer+2, pending);
-		return 128+27;
+		/* Turn ESC+'[' into CSI */
+		if (c == 27 && second == '[') {
+			bytes = 2;
+			c = 128+27;
+			goto done;
+		}
 	}
-
-	/* Normal 7-bit? */
-	if (!(c & 0x80))
-		goto done;
-
-	/*
-	 * Unexpected UTF-8 continuation character? Maybe
-	 * we're in non-UTF mode, or maybe it's a control
-	 * character.. Regardless, just pass it on.
-	 */
-	if (!(c & 0x40))
-		goto done;
-
-	/*
-	 * Multi-byte sequences.. Right now we only
-	 * want to get characters that can be represented
-	 * in a single byte, so we're not interested in
-	 * anything else..
-	 */
-	if (c & 0x3c)
-		goto done;
-
-	if ((second & 0xc0) != 0x80)
-		goto done;
-
-	/*
-	 * Ok, it's a two-byte UTF-8 character that can be represented
-	 * as a single-byte Latin1 character!
-	 */
-	c = (c << 6) | (second & 0x3f);
-	pending -= 2;
-	memmove(buffer, buffer+2, pending);
-
-	return c;
+	bytes = utf8_to_unicode(buffer, 0, pending, &c);
 
 done:
-	pending--;
-	memmove(buffer, buffer+1, pending);
+	pending -= bytes;
+	memmove(buffer, buffer+bytes, pending);
 	return c;
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-07-10 17:36:30 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-07-10 17:36:30 -0700
commit	ec6f4f36ec679a9434c71915f22c45b165cd227a (patch)
tree	b5c8f2c3bd2908446be767c23b0ae9dee48a7563
parent	6b793211c2aec69115dd2769892be0524801f7d8 (diff)
download	uemacs-ec6f4f36ec679a9434c71915f22c45b165cd227a.tar.gz