switch to delayed handling of escape sequences

#define A(x) #x A('\12') should *not* yield "'\\n'"; the problem is that we are doing the conversion too early. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
author: Al Viro <viro@zeniv.linux.org.uk> 2013-01-07 14:00:48 -0500
committer: Al Viro <viro@zeniv.linux.org.uk> 2013-02-12 10:05:31 -0500
commit: 959bd8973bfcfced69715a522007662929ae6d48 (patch)
tree: 18c00b9c8fde48546638c613e4db9d0fa84d06c6
parent: 15cfba61d99668e9c14782779766f48834490ead (diff)
download: sparse-959bd8973bfcfced69715a522007662929ae6d48.tar.gz
9 files changed, 388 insertions, 222 deletions
diff --git a/Makefile b/Makefile
index 79cadb09..8a8bed7b 100644
--- a/Makefile
+++ b/Makefile
@@ -69,7 +69,7 @@ LIB_H=    token.h parse.h lib.h symbol.h scope.h expression.h target.h \
 
 LIB_OBJS= target.o parse.o tokenize.o pre-process.o symbol.o lib.o scope.o \
 	  expression.o show-parse.o evaluate.o expand.o inline.o linearize.o \
-	  sort.o allocate.o compat-$(OS).o ptrlist.o \
+	  char.o sort.o allocate.o compat-$(OS).o ptrlist.o \
 	  flow.o cse.o simplify.o memops.o liveness.o storage.o unssa.o dissect.o
 
 LIB_FILE= libsparse.a
diff --git a/char.c b/char.c
new file mode 100644
index 00000000..92674565
--- /dev/null
+++ b/char.c
@@ -0,0 +1,131 @@
+#include <string.h>
+#include "target.h"
+#include "lib.h"
+#include "allocate.h"
+#include "token.h"
+#include "expression.h"
+
+static const char *parse_escape(const char *p, unsigned *val, const char *end, int bits, struct position pos)
+{
+	unsigned c = *p++;
+	unsigned d;
+	if (c != '\\') {
+		*val = c;
+		return p;
+	}
+
+	c = *p++;
+	switch (c) {
+	case 'a': c = '\a'; break;
+	case 'b': c = '\b'; break;
+	case 't': c = '\t'; break;
+	case 'n': c = '\n'; break;
+	case 'v': c = '\v'; break;
+	case 'f': c = '\f'; break;
+	case 'r': c = '\r'; break;
+	case 'e': c = '\e'; break;
+	case 'x': {
+		unsigned mask = -(1U << (bits - 4));
+		for (c = 0; p < end; c = (c << 4) + d) {
+			d = hexval(*p++);
+			if (d > 16)
+				break;
+			if (c & mask) {
+				warning(pos,
+					"hex escape sequence out of range");
+				mask = 0;
+			}
+		}
+		break;
+	}
+	case '0'...'7': {
+		if (p + 2 < end)
+			end = p + 2;
+		c -= '0';
+		while (p < end && (d = *p++ - '0') < 8)
+			c = (c << 3) + d;
+		if ((c & 0400) && bits < 9)
+			warning(pos,
+				"octal escape sequence out of range");
+		break;
+	}
+	default:	/* everything else is left as is */
+		break;
+	}
+	*val = c & ~((~0U << (bits - 1)) << 1);
+	return p;
+}
+
+void get_char_constant(struct token *token, unsigned long long *val)
+{
+	const char *p = token->embedded, *end;
+	unsigned v;
+	int type = token_type(token);
+	switch (type) {
+	case TOKEN_CHAR:
+	case TOKEN_WIDE_CHAR:
+		p = token->string->data;
+		end = p + token->string->length;
+		break;
+	case TOKEN_CHAR + 1 ... TOKEN_CHAR + 4:
+		end = p + type - TOKEN_CHAR;
+		break;
+	default:
+		end = p + type - TOKEN_WIDE_CHAR;
+	}
+	p = parse_escape(p, &v, end,
+			type < TOKEN_WIDE_CHAR ? bits_in_char : 32, token->pos);
+	if (p != end)
+		warning(token->pos,
+			"multi-character character constant");
+	*val = v;
+}
+
+struct token *get_string_constant(struct token *token, struct expression *expr)
+{
+	struct string *string = token->string;
+	struct token *next = token->next, *done = NULL;
+	int stringtype = token_type(token);
+	int is_wide = stringtype == TOKEN_WIDE_STRING;
+	static char buffer[MAX_STRING];
+	int len = 0;
+	int bits;
+
+	while (!done) {
+		switch (token_type(next)) {
+		case TOKEN_WIDE_STRING:
+			is_wide = 1;
+		case TOKEN_STRING:
+			next = next->next;
+			break;
+		default:
+			done = next;
+		}
+	}
+	bits = is_wide ? 32 : bits_in_char;
+	while (token != done) {
+		unsigned v;
+		const char *p = token->string->data;
+		const char *end = p + token->string->length - 1;
+		while (p < end) {
+			p = parse_escape(p, &v, end, bits, token->pos);
+			if (len < MAX_STRING)
+				buffer[len] = v;
+			len++;
+		}
+		token = token->next;
+	}
+	if (len > MAX_STRING) {
+		warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", len, MAX_STRING);
+		len = MAX_STRING;
+	}
+
+	if (len >= string->length)	/* can't cannibalize */
+		string = __alloc_string(len+1);
+	string->length = len+1;
+	memcpy(string->data, buffer, len);
+	string->data[len] = '\0';
+	expr->string = string;
+	expr->wide = is_wide;
+	return token;
+}
diff --git a/char.h b/char.h
new file mode 100644
index 00000000..54be6b74
--- /dev/null
+++ b/char.h
@@ -0,0 +1,2 @@
+extern void get_char_constant(struct token *, unsigned long long *);
+extern struct token *get_string_constant(struct token *, struct expression *);
diff --git a/expression.c b/expression.c
index b8fab8f3..482e2b17 100644
--- a/expression.c
+++ b/expression.c
@@ -26,6 +26,7 @@
 #include "scope.h"
 #include "expression.h"
 #include "target.h"
+#include "char.h"
 
 static int match_oplist(int op, ...)
 {
@@ -217,48 +218,6 @@ static struct token *builtin_offsetof_expr(struct token *token,
 	}
 }
 
-static struct token *string_expression(struct token *token, struct expression *expr)
-{
-	struct string *string = token->string;
-	struct token *next = token->next;
-	int stringtype = token_type(token);
-
-	if (token_type(next) == stringtype) {
-		int totlen = string->length-1;
-		char *data;
-
-		do {
-			totlen += next->string->length-1;
-			next = next->next;
-		} while (token_type(next) == stringtype);
-
-		if (totlen > MAX_STRING) {
-			warning(token->pos, "trying to concatenate %d-character string (%d bytes max)", totlen, MAX_STRING);
-			totlen = MAX_STRING;
-		}
-
-		string = __alloc_string(totlen+1);
-		string->length = totlen+1;
-		data = string->data;
-		next = token;
-		do {
-			struct string *s = next->string;
-			int len = s->length-1;
-
-			if (len > totlen)
-				len = totlen;
-			totlen -= len;
-
-			next = next->next;
-			memcpy(data, s->data, len);
-			data += len;
-		} while (token_type(next) == stringtype);
-		*data = '\0';
-	}
-	expr->string = string;
-	return next;
-}
-
 #ifndef ULLONG_MAX
 #define ULLONG_MAX (~0ULL)
 #endif
@@ -399,12 +358,11 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 	struct expression *expr = NULL;
 
 	switch (token_type(token)) {
-	case TOKEN_CHAR:
-	case TOKEN_WIDE_CHAR:
+	case TOKEN_CHAR ... TOKEN_WIDE_CHAR + 4:
 		expr = alloc_expression(token->pos, EXPR_VALUE);   
 		expr->flags = Int_const_expr;
-		expr->ctype = token_type(token) == TOKEN_CHAR ? &int_ctype : &long_ctype;
-		expr->value = (unsigned char) token->character;
+		expr->ctype = token_type(token) < TOKEN_WIDE_CHAR ? &int_ctype : &long_ctype;
+		get_char_constant(token, &expr->value);
 		token = token->next;
 		break;
 
@@ -469,8 +427,7 @@ struct token *primary_expression(struct token *token, struct expression **tree)
 	case TOKEN_STRING:
 	case TOKEN_WIDE_STRING:
 		expr = alloc_expression(token->pos, EXPR_STRING);
-		expr->wide = token_type(token) == TOKEN_WIDE_STRING;
-		token = string_expression(token, expr);
+		token = get_string_constant(token, expr);
 		break;
 
 	case TOKEN_SPECIAL:
diff --git a/pre-process.c b/pre-process.c
index d5b19220..9e5df5c3 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -82,8 +82,6 @@ static struct token *alloc_token(struct position *pos)
 	return token;
 }
 
-static const char *show_token_sequence(struct token *token);
-
 /* Expand symbol 'sym' at '*list' */
 static int expand(struct token **, struct symbol *);
 
@@ -340,9 +338,35 @@ static struct token *dup_list(struct token *list)
 	return res;
 }
 
+static const char *quote_token_sequence(struct token *token)
+{
+	static char buffer[1024];
+	char *ptr = buffer;
+	int whitespace = 0;
+
+	while (!eof_token(token)) {
+		const char *val = quote_token(token);
+		int len = strlen(val);
+
+		if (ptr + whitespace + len >= buffer + sizeof(buffer)) {
+			sparse_error(token->pos, "too long token expansion");
+			break;
+		}
+
+		if (whitespace)
+			*ptr++ = ' ';
+		memcpy(ptr, val, len);
+		ptr += len;
+		token = token->next;
+		whitespace = token->pos.whitespace;
+	}
+	*ptr = 0;
+	return buffer;
+}
+
 static struct token *stringify(struct token *arg)
 {
-	const char *s = show_token_sequence(arg);
+	const char *s = quote_token_sequence(arg);
 	int size = strlen(s)+1;
 	struct token *token = __alloc_token(0);
 	struct string *string = __alloc_string(size);
@@ -907,10 +931,12 @@ static int token_different(struct token *t1, struct token *t2)
 	case TOKEN_STR_ARGUMENT:
 		different = t1->argnum != t2->argnum;
 		break;
+	case TOKEN_CHAR + 1 ... TOKEN_CHAR + 4:
+	case TOKEN_WIDE_CHAR + 1 ... TOKEN_WIDE_CHAR + 4:
+		different = memcmp(t1->embedded, t2->embedded, 4);
+		break;
 	case TOKEN_CHAR:
 	case TOKEN_WIDE_CHAR:
-		different = t1->character != t2->character;
-		break;
 	case TOKEN_STRING:
 	case TOKEN_WIDE_STRING: {
 		struct string *s1, *s2;
@@ -1385,6 +1411,8 @@ static int handle_ifndef(struct stream *stream, struct token **line, struct toke
 	return preprocessor_if(stream, token, arg);
 }
 
+static const char *show_token_sequence(struct token *token);
+
 /*
  * Expression handling for #if and #elif; it differs from normal expansion
  * due to special treatment of "defined".
diff --git a/token.h b/token.h
index cd292331..20c23268 100644
--- a/token.h
+++ b/token.h
@@ -68,8 +68,8 @@ enum token_type {
 	TOKEN_ZERO_IDENT,
 	TOKEN_NUMBER,
 	TOKEN_CHAR,
-	TOKEN_WIDE_CHAR,
-	TOKEN_STRING,
+	TOKEN_WIDE_CHAR = TOKEN_CHAR + 5,
+	TOKEN_STRING = TOKEN_WIDE_CHAR + 5,
 	TOKEN_WIDE_STRING,
 	TOKEN_SPECIAL,
 	TOKEN_STREAMBEGIN,
@@ -165,9 +165,9 @@ struct token {
 		struct ident *ident;
 		unsigned int special;
 		struct string *string;
-		int character;
 		int argnum;
 		struct argcount count;
+		char embedded[4];
 	};
 };
 
@@ -198,6 +198,7 @@ extern const char *show_special(int);
 extern const char *show_ident(const struct ident *);
 extern const char *show_string(const struct string *string);
 extern const char *show_token(const struct token *);
+extern const char *quote_token(const struct token *);
 extern struct token * tokenize(const char *, int, struct token *, const char **next_path);
 extern struct token * tokenize_buffer(void *, unsigned long, struct token **);
 
diff --git a/tokenize.c b/tokenize.c
index 42630212..95f308e0 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -121,6 +121,42 @@ const char *show_string(const struct string *string)
 	return buffer;
 }
 
+static const char *show_char(const char *s, size_t len, char prefix, char delim)
+{
+	static char buffer[MAX_STRING + 4];
+	char *p = buffer;
+	if (prefix)
+		*p++ = prefix;
+	*p++ = delim;
+	memcpy(p, s, len);
+	p += len;
+	*p++ = delim;
+	*p++ = '\0';
+	return buffer;
+}
+
+static const char *quote_char(const char *s, size_t len, char prefix, char delim)
+{
+	static char buffer[2*MAX_STRING + 6];
+	size_t i;
+	char *p = buffer;
+	if (prefix)
+		*p++ = prefix;
+	if (delim == '"')
+		*p++ = '\\';
+	*p++ = delim;
+	for (i = 0; i < len; i++) {
+		if (s[i] == '"' || s[i] == '\\')
+			*p++ = '\\';
+		*p++ = s[i];
+	}
+	if (delim == '"')
+		*p++ = '\\';
+	*p++ = delim;
+	*p++ = '\0';
+	return buffer;
+}
+
 const char *show_token(const struct token *token)
 {
 	static char buffer[256];
@@ -137,10 +173,6 @@ const char *show_token(const struct token *token)
 	case TOKEN_IDENT:
 		return show_ident(token->ident);
 
-	case TOKEN_STRING:
-	case TOKEN_WIDE_STRING:
-		return show_string(token->string);
-
 	case TOKEN_NUMBER:
 		return token->number;
 
@@ -148,15 +180,23 @@ const char *show_token(const struct token *token)
 		return show_special(token->special);
 
 	case TOKEN_CHAR: 
-	case TOKEN_WIDE_CHAR: {
-		char *ptr = buffer;
-		int c = token->character;
-		*ptr++ = '\'';
-		ptr = charstr(ptr, c, '\'', 0);
-		*ptr++ = '\'';
-		*ptr++ = '\0';
-		return buffer;
-	}
+		return show_char(token->string->data,
+			token->string->length - 1, 0, '\'');
+	case TOKEN_CHAR+1 ... TOKEN_CHAR+4:
+		return show_char(token->embedded,
+			token_type(token) - TOKEN_CHAR, 0, '\'');
+	case TOKEN_WIDE_CHAR: 
+		return show_char(token->string->data,
+			token->string->length - 1, 'L', '\'');
+	case TOKEN_WIDE_CHAR+1 ... TOKEN_WIDE_CHAR+4:
+		return show_char(token->embedded,
+			token_type(token) - TOKEN_WIDE_CHAR, 'L', '\'');
+	case TOKEN_STRING: 
+		return show_char(token->string->data,
+			token->string->length - 1, 0, '"');
+	case TOKEN_WIDE_STRING: 
+		return show_char(token->string->data,
+			token->string->length - 1, 'L', '"');
 
 	case TOKEN_STREAMBEGIN:
 		sprintf(buffer, "<beginning of '%s'>", stream_name(token->pos.stream));
@@ -180,6 +220,47 @@ const char *show_token(const struct token *token)
 	}
 }
 
+const char *quote_token(const struct token *token)
+{
+	static char buffer[256];
+
+	switch (token_type(token)) {
+	case TOKEN_ERROR:
+		return "syntax error";
+
+	case TOKEN_IDENT:
+		return show_ident(token->ident);
+
+	case TOKEN_NUMBER:
+		return token->number;
+
+	case TOKEN_SPECIAL:
+		return show_special(token->special);
+
+	case TOKEN_CHAR: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 0, '\'');
+	case TOKEN_CHAR+1 ... TOKEN_CHAR+4:
+		return quote_char(token->embedded,
+			token_type(token) - TOKEN_CHAR, 0, '\'');
+	case TOKEN_WIDE_CHAR: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 'L', '\'');
+	case TOKEN_WIDE_CHAR+1 ... TOKEN_WIDE_CHAR+4:
+		return quote_char(token->embedded,
+			token_type(token) - TOKEN_WIDE_CHAR, 'L', '\'');
+	case TOKEN_STRING: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 0, '"');
+	case TOKEN_WIDE_STRING: 
+		return quote_char(token->string->data,
+			token->string->length - 1, 'L', '"');
+	default:
+		sprintf(buffer, "unhandled token type '%d' ", token_type(token));
+		return buffer;
+	}
+}
+
 #define HASHED_INPUT_BITS (6)
 #define HASHED_INPUT (1 << HASHED_INPUT_BITS)
 #define HASH_PRIME 0x9e370001UL
@@ -384,22 +465,35 @@ enum {
 	Dot = 16,
 	ValidSecond = 32,
 	Quote = 64,
+	Escape = 128,
 };
 
 static const long cclass[257] = {
-	['0' + 1 ... '9' + 1] = Digit | Hex,
+	['0' + 1 ... '7' + 1] = Digit | Hex | Escape,	/* \<octal> */
+	['8' + 1 ... '9' + 1] = Digit | Hex,
 	['A' + 1 ... 'D' + 1] = Letter | Hex,
-	['E' + 1] = Letter | Hex | Exp,
+	['E' + 1] = Letter | Hex | Exp,	/* E<exp> */
 	['F' + 1] = Letter | Hex,
 	['G' + 1 ... 'O' + 1] = Letter,
-	['P' + 1] = Letter | Exp,
+	['P' + 1] = Letter | Exp,	/* P<exp> */
 	['Q' + 1 ... 'Z' + 1] = Letter,
-	['a' + 1 ... 'd' + 1] = Letter | Hex,
-	['e' + 1] = Letter | Hex | Exp,
-	['f' + 1] = Letter | Hex,
-	['g' + 1 ... 'o' + 1] = Letter,
-	['p' + 1] = Letter | Exp,
-	['q' + 1 ... 'z' + 1] = Letter,
+	['a' + 1 ... 'b' + 1] = Letter | Hex | Escape, /* \a, \b */
+	['c' + 1 ... 'd' + 1] = Letter | Hex,
+	['e' + 1] = Letter | Hex | Exp | Escape,/* \e, e<exp> */
+	['f' + 1] = Letter | Hex | Escape,	/* \f */
+	['g' + 1 ... 'm' + 1] = Letter,
+	['n' + 1] = Letter | Escape,	/* \n */
+	['o' + 1] = Letter,
+	['p' + 1] = Letter | Exp,	/* p<exp> */
+	['q' + 1] = Letter,
+	['r' + 1] = Letter | Escape,	/* \r */
+	['s' + 1] = Letter,
+	['t' + 1] = Letter | Escape,	/* \t */
+	['u' + 1] = Letter,
+	['v' + 1] = Letter | Escape,	/* \v */
+	['w' + 1] = Letter,
+	['x' + 1] = Letter | Escape,	/* \x<hex> */
+	['y' + 1 ... 'z' + 1] = Letter,
 	['_' + 1] = Letter,
 	['.' + 1] = Dot | ValidSecond,
 	['=' + 1] = ValidSecond,
@@ -410,8 +504,10 @@ static const long cclass[257] = {
 	['&' + 1] = ValidSecond,
 	['|' + 1] = ValidSecond,
 	['#' + 1] = ValidSecond,
-	['\'' + 1] = Quote,
-	['"' + 1] = Quote,
+	['\'' + 1] = Quote | Escape,
+	['"' + 1] = Quote | Escape,
+	['\\' + 1] = Escape,
+	['?' + 1] = Escape,
 };
 
 /*
@@ -471,151 +567,74 @@ static int get_one_number(int c, int next, stream_t *stream)
 	return next;
 }
 
-static int escapechar(int first, int type, stream_t *stream, int *valp)
-{
-	int next, value;
-
-	next = nextchar(stream);
-	value = first;
-
-	if (first == '\n')
-		warning(stream_pos(stream), "Newline in string or character constant");
-
-	if (first == '\\' && next != EOF) {
-		value = next;
-		next = nextchar(stream);
-		if (value != type) {
-			switch (value) {
-			case 'a':
-				value = '\a';
-				break;
-			case 'b':
-				value = '\b';
-				break;
-			case 't':
-				value = '\t';
-				break;
-			case 'n':
-				value = '\n';
-				break;
-			case 'v':
-				value = '\v';
-				break;
-			case 'f':
-				value = '\f';
-				break;
-			case 'r':
-				value = '\r';
-				break;
-			case 'e':
-				value = '\e';
-				break;
-			case '\\':
-				break;
-			case '?':
-				break;
-			case '\'':
-				break;
-			case '"':
-				break;
-			case '\n':
-				warning(stream_pos(stream), "Newline in string or character constant");
-				break;
-			case '0'...'7': {
-				int nr = 2;
-				value -= '0';
-				while (next >= '0' && next <= '7') {
-					value = (value << 3) + (next-'0');
-					next = nextchar(stream);
-					if (!--nr)
-						break;
-				}
-				value &= 0xff;
-				break;
-			}
-			case 'x': {
-				int hex = hexval(next);
-				if (hex < 16) {
-					value = hex;
-					next = nextchar(stream);
-					while ((hex = hexval(next)) < 16) {
-						value = (value << 4) + hex;
-						next = nextchar(stream);
-					}
-					value &= 0xff;
-					break;
-				}
-			}
-			/* Fall through */
-			default:
-				warning(stream_pos(stream), "Unknown escape '%c'", value);
-			}
-		}
-		/* Mark it as escaped */
-		value |= 0x100;
-	}
-	*valp = value;
-	return next;
-}
-
-static int get_char_token(int next, stream_t *stream, enum token_type type)
-{
-	int value;
-	struct token *token;
-
-	next = escapechar(next, '\'', stream, &value);
-	if (value == '\'' || next != '\'') {
-		sparse_error(stream_pos(stream), "Bad character constant");
-		drop_token(stream);
-		return next;
-	}
-
-	token = stream->token;
-	token_type(token) = type;
-	token->character = value & 0xff;
-
-	add_token(stream);
-	return nextchar(stream);
-}
-
-static int get_string_token(int next, stream_t *stream, enum token_type type)
+static int eat_string(int next, stream_t *stream, enum token_type type)
 {
 	static char buffer[MAX_STRING];
 	struct string *string;
-	struct token *token;
+	struct token *token = stream->token;
 	int len = 0;
+	int escape;
+	int want_hex = 0;
+	char delim = type < TOKEN_STRING ? '\'' : '"';
 
-	for (;;) {
-		int val;
-		next = escapechar(next, '"', stream, &val);
-		if (val == '"')
-			break;
+	for (escape = 0; escape || next != delim; next = nextchar(stream)) {
+		if (len < MAX_STRING)
+			buffer[len] = next;
+		len++;
+		if (next == '\n') {
+			warning(stream_pos(stream),
+				"Newline in string or character constant");
+			if (delim == '\'') /* assume it's lost ' */
+				break;
+		}
 		if (next == EOF) {
-			warning(stream_pos(stream), "End of file in middle of string");
+			warning(stream_pos(stream),
+				"End of file in middle of string");
 			return next;
 		}
-		if (len < MAX_STRING)
-			buffer[len] = val;
-		len++;
+		if (!escape) {
+			if (want_hex && !(cclass[next + 1] & Hex))
+				warning(stream_pos(stream),
+					"\\x used with no following hex digits");
+			want_hex = 0;
+			escape = next == '\\';
+		} else {
+			if (!(cclass[next + 1] & Escape))
+				warning(stream_pos(stream),
+					"Unknown escape '%c'", next);
+			escape = 0;
+			want_hex = next == 'x';
+		}
 	}
-
+	if (want_hex)
+		warning(stream_pos(stream),
+			"\\x used with no following hex digits");
 	if (len > MAX_STRING) {
 		warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
 		len = MAX_STRING;
 	}
-
-	string = __alloc_string(len+1);
-	memcpy(string->data, buffer, len);
-	string->data[len] = '\0';
-	string->length = len+1;
+	if (delim == '\'' && len <= 4) {
+		if (len == 0) {
+			sparse_error(stream_pos(stream),
+				"empty character constant");
+			return nextchar(stream);
+		}
+		token_type(token) = type + len;
+		memset(buffer + len, '\0', 4 - len);
+		memcpy(token->embedded, buffer, 4);
+	} else {
+		token_type(token) = type;
+		string = __alloc_string(len+1);
+		memcpy(string->data, buffer, len);
+		string->data[len] = '\0';
+		string->length = len+1;
+		token->string = string;
+	}
 
 	/* Pass it on.. */
 	token = stream->token;
-	token_type(token) = type;
-	token->string = string;
 	add_token(stream);
-	
-	return next;
+	return nextchar(stream);
 }
 
 static int drop_stream_eoln(stream_t *stream)
@@ -731,9 +750,9 @@ static int get_one_special(int c, stream_t *stream)
 			return get_one_number(c, next, stream);
 		break;
 	case '"':
-		return get_string_token(next, stream, TOKEN_STRING);
+		return eat_string(next, stream, TOKEN_STRING);
 	case '\'':
-		return get_char_token(next, stream, TOKEN_CHAR);
+		return eat_string(next, stream, TOKEN_CHAR);
 	case '/':
 		if (next == '/')
 			return drop_stream_eoln(stream);
@@ -910,10 +929,10 @@ static int get_one_identifier(int c, stream_t *stream)
 	if (cclass[next + 1] & Quote) {
 		if (len == 1 && buf[0] == 'L') {
 			if (next == '\'')
-				return get_char_token(nextchar(stream), stream,
+				return eat_string(nextchar(stream), stream,
 							TOKEN_WIDE_CHAR);
 			else
-				return get_string_token(nextchar(stream), stream,
+				return eat_string(nextchar(stream), stream,
 							TOKEN_WIDE_STRING);
 		}
 	}
diff --git a/validation/escapes.c b/validation/escapes.c
index 13f8f9c8..4a1b030e 100644
--- a/validation/escapes.c
+++ b/validation/escapes.c
@@ -8,14 +8,13 @@ static int bad_e[] = { '\c', '\0123', '\789', '\xdefg' };
  * check-name: Character escape sequences
  *
  * check-error-start
-escapes.c:6:27: warning: Unknown escape 'c'
-escapes.c:6:35: error: Bad character constant
-escapes.c:6:38: error: Bad character constant
-escapes.c:6:42: error: Bad character constant
-escapes.c:6:46: error: Bad character constant
-escapes.c:6:53: error: Bad character constant
-escapes.c:6:56: error: Bad character constant
-escapes.c:6:42: error: Expected } at end of initializer
-escapes.c:6:42: error: got 89
+escapes.c:6:26: warning: Unknown escape 'c'
+escapes.c:3:34: warning: hex escape sequence out of range
+escapes.c:3:44: warning: hex escape sequence out of range
+escapes.c:4:18: warning: hex escape sequence out of range
+escapes.c:6:30: warning: multi-character character constant
+escapes.c:6:39: warning: multi-character character constant
+escapes.c:6:47: warning: hex escape sequence out of range
+escapes.c:6:47: warning: multi-character character constant
  * check-error-end
  */
diff --git a/validation/preprocessor/stringify.c b/validation/preprocessor/stringify.c
new file mode 100644
index 00000000..7fe965d5
--- /dev/null
+++ b/validation/preprocessor/stringify.c
@@ -0,0 +1,29 @@
+#define A(x) #x
+A('a')
+A("a")
+A(a)
+A(\n)
+A('\n')
+A("\n")
+A('"')
+A("a\nb")
+A(L"a\nb")
+A('\12')
+/*
+ * check-name: Preprocessor #14
+ * check-command: sparse -E $file
+ *
+ * check-output-start
+
+"'a'"
+"\"a\""
+"a"
+"\n"
+"'\\n'"
+"\"\\n\""
+"'\"'"
+"\"a\\nb\""
+"L\"a\\nb\""
+"'\\12'"
+ * check-output-end
+ */
author	Al Viro <viro@zeniv.linux.org.uk>	2013-01-07 14:00:48 -0500
committer	Al Viro <viro@zeniv.linux.org.uk>	2013-02-12 10:05:31 -0500
commit	959bd8973bfcfced69715a522007662929ae6d48 (patch)
tree	18c00b9c8fde48546638c613e4db9d0fa84d06c6
parent	15cfba61d99668e9c14782779766f48834490ead (diff)
download	sparse-959bd8973bfcfced69715a522007662929ae6d48.tar.gz