diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-10 16:40:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-10 16:40:36 -0700 |
commit | 6b793211c2aec69115dd2769892be0524801f7d8 (patch) | |
tree | 4a9cab1b1a97c7c66ab90353ad6aab56abad30e0 | |
parent | e62cdf04cff63381121364cd6ef077f00d72307a (diff) | |
download | uemacs-6b793211c2aec69115dd2769892be0524801f7d8.tar.gz |
Make cursor movement (largely) understand UTF-8 character boundaries
Ok, so it may do odd things if it's not truly utf-8, and when moving up
and down lines that have utf-8 the cursor moves oddly (because the byte
offset within the line stays constant, rather than the character
offset), but with this you can actually open the UTF8 example file and
move around it, and at least some of the movement makes sense.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | basic.c | 26 | ||||
-rw-r--r-- | display.c | 11 | ||||
-rw-r--r-- | utf8.h | 5 |
3 files changed, 31 insertions, 11 deletions
@@ -15,6 +15,7 @@ #include "edef.h" #include "efunc.h" #include "line.h" +#include "utf8.h" /* * This routine, given a pointer to a struct line, and the current cursor goal @@ -74,8 +75,15 @@ int backchar(int f, int n) curwp->w_dotp = lp; curwp->w_doto = llength(lp); curwp->w_flag |= WFMOVE; - } else - curwp->w_doto--; + } else { + do { + unsigned char c; + curwp->w_doto--; + c = lgetc(curwp->w_dotp, curwp->w_doto); + if (is_beginning_utf8(c)) + break; + } while (curwp->w_doto); + } } return TRUE; } @@ -100,14 +108,22 @@ int forwchar(int f, int n) if (n < 0) return backchar(f, -n); while (n--) { - if (curwp->w_doto == llength(curwp->w_dotp)) { + int len = llength(curwp->w_dotp); + if (curwp->w_doto == len) { if (curwp->w_dotp == curbp->b_linep) return FALSE; curwp->w_dotp = lforw(curwp->w_dotp); curwp->w_doto = 0; curwp->w_flag |= WFMOVE; - } else - curwp->w_doto++; + } else { + do { + unsigned char c; + curwp->w_doto++; + c = lgetc(curwp->w_dotp, curwp->w_doto); + if (is_beginning_utf8(c)) + break; + } while (curwp->w_doto < len); + } } return TRUE; } @@ -528,7 +528,6 @@ static void updall(struct window *wp) void updpos(void) { struct line *lp; - int c; int i; /* find the current row */ @@ -543,13 +542,13 @@ void updpos(void) curcol = 0; i = 0; while (i < curwp->w_doto) { - c = lgetc(lp, i++); + unicode_t c; + int bytes; + + bytes = utf8_to_unicode(lp->l_text, i, curwp->w_doto, &c); + i += bytes; if (c == '\t') curcol |= tabmask; - else if (c < 0x20 || c == 0x7f) - ++curcol; - else if (c >= 0x80 && c <= 0xa0) - curcol+=2; ++curcol; } @@ -6,4 +6,9 @@ typedef unsigned int unicode_t; unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res); unsigned unicode_to_utf8(unsigned int c, char *utf8); +static inline int is_beginning_utf8(unsigned char c) +{ + return (c & 0xc0) != 0x80; +} + #endif |