aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-10 16:40:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-10 16:40:36 -0700
commit6b793211c2aec69115dd2769892be0524801f7d8 (patch)
tree4a9cab1b1a97c7c66ab90353ad6aab56abad30e0
parente62cdf04cff63381121364cd6ef077f00d72307a (diff)
downloaduemacs-6b793211c2aec69115dd2769892be0524801f7d8.tar.gz
Make cursor movement (largely) understand UTF-8 character boundaries
Ok, so it may do odd things if it's not truly utf-8, and when moving up and down lines that have utf-8 the cursor moves oddly (because the byte offset within the line stays constant, rather than the character offset), but with this you can actually open the UTF8 example file and move around it, and at least some of the movement makes sense. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--basic.c26
-rw-r--r--display.c11
-rw-r--r--utf8.h5
3 files changed, 31 insertions, 11 deletions
diff --git a/basic.c b/basic.c
index 3bf0227..3a7d6f7 100644
--- a/basic.c
+++ b/basic.c
@@ -15,6 +15,7 @@
#include "edef.h"
#include "efunc.h"
#include "line.h"
+#include "utf8.h"
/*
* This routine, given a pointer to a struct line, and the current cursor goal
@@ -74,8 +75,15 @@ int backchar(int f, int n)
curwp->w_dotp = lp;
curwp->w_doto = llength(lp);
curwp->w_flag |= WFMOVE;
- } else
- curwp->w_doto--;
+ } else {
+ do {
+ unsigned char c;
+ curwp->w_doto--;
+ c = lgetc(curwp->w_dotp, curwp->w_doto);
+ if (is_beginning_utf8(c))
+ break;
+ } while (curwp->w_doto);
+ }
}
return TRUE;
}
@@ -100,14 +108,22 @@ int forwchar(int f, int n)
if (n < 0)
return backchar(f, -n);
while (n--) {
- if (curwp->w_doto == llength(curwp->w_dotp)) {
+ int len = llength(curwp->w_dotp);
+ if (curwp->w_doto == len) {
if (curwp->w_dotp == curbp->b_linep)
return FALSE;
curwp->w_dotp = lforw(curwp->w_dotp);
curwp->w_doto = 0;
curwp->w_flag |= WFMOVE;
- } else
- curwp->w_doto++;
+ } else {
+ do {
+ unsigned char c;
+ curwp->w_doto++;
+ c = lgetc(curwp->w_dotp, curwp->w_doto);
+ if (is_beginning_utf8(c))
+ break;
+ } while (curwp->w_doto < len);
+ }
}
return TRUE;
}
diff --git a/display.c b/display.c
index 82b4f84..676514d 100644
--- a/display.c
+++ b/display.c
@@ -528,7 +528,6 @@ static void updall(struct window *wp)
void updpos(void)
{
struct line *lp;
- int c;
int i;
/* find the current row */
@@ -543,13 +542,13 @@ void updpos(void)
curcol = 0;
i = 0;
while (i < curwp->w_doto) {
- c = lgetc(lp, i++);
+ unicode_t c;
+ int bytes;
+
+ bytes = utf8_to_unicode(lp->l_text, i, curwp->w_doto, &c);
+ i += bytes;
if (c == '\t')
curcol |= tabmask;
- else if (c < 0x20 || c == 0x7f)
- ++curcol;
- else if (c >= 0x80 && c <= 0xa0)
- curcol+=2;
++curcol;
}
diff --git a/utf8.h b/utf8.h
index b60ccd2..c317a6a 100644
--- a/utf8.h
+++ b/utf8.h
@@ -6,4 +6,9 @@ typedef unsigned int unicode_t;
unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res);
unsigned unicode_to_utf8(unsigned int c, char *utf8);
+static inline int is_beginning_utf8(unsigned char c)
+{
+ return (c & 0xc0) != 0x80;
+}
+
#endif