diff options
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | builtin.c | 2 | ||||
-rw-r--r-- | evaluate.c | 12 | ||||
-rw-r--r-- | evaluate.h | 11 | ||||
-rw-r--r-- | options.c | 2 | ||||
-rw-r--r-- | options.h | 1 | ||||
-rw-r--r-- | sparse.1 | 9 | ||||
-rw-r--r-- | validation/varargs-format-addrspace1.c | 1 | ||||
-rw-r--r-- | validation/varargs-format-dyn-prec.c | 11 | ||||
-rw-r--r-- | validation/varargs-format-prefix.c | 1 | ||||
-rw-r--r-- | validation/varargs-format-tests.c | 1 | ||||
-rw-r--r-- | verify-format.c | 504 | ||||
-rw-r--r-- | verify-format.h | 6 |
13 files changed, 553 insertions, 9 deletions
@@ -90,6 +90,7 @@ LIB_OBJS += target-xtensa.o LIB_OBJS += tokenize.o LIB_OBJS += unssa.o LIB_OBJS += utils.o +LIB_OBJS += verify-format.o PROGRAMS := PROGRAMS += compile @@ -396,7 +396,7 @@ static int eval_sync_compare_and_swap(struct expression *expr) if (!expr->ctype) // __sync_val_compare_and_swap() expr->ctype = ctype; - return evaluate_arguments(types, expr->args); + return evaluate_arguments(NULL, types, expr->args); err: sparse_error(arg->pos, "invalid type for argument %d:", n); @@ -42,6 +42,7 @@ #include "symbol.h" #include "target.h" #include "expression.h" +#include "verify-format.h" struct symbol *current_fn; @@ -1411,8 +1412,8 @@ static int whitelist_pointers(struct symbol *t1, struct symbol *t2) return !Wtypesign; } -static int check_assignment_types(struct symbol *target, struct expression **rp, - const char **typediff) +int check_assignment_types(struct symbol *target, struct expression **rp, + const char **typediff) { struct symbol *source = degenerate(*rp); struct symbol *t, *s; @@ -2344,7 +2345,8 @@ static struct symbol *evaluate_alignof(struct expression *expr) return size_t_ctype; } -int evaluate_arguments(struct symbol_list *argtypes, struct expression_list *head) +int evaluate_arguments(struct symbol *fn, struct symbol_list *argtypes, + struct expression_list *head) { struct expression *expr; struct symbol *argtype; @@ -2385,6 +2387,8 @@ int evaluate_arguments(struct symbol_list *argtypes, struct expression_list *hea NEXT_PTR_LIST(argtype); } END_FOR_EACH_PTR(expr); FINISH_PTR_LIST(argtype); + + verify_format_attribute(fn, head); return 1; } @@ -3208,7 +3212,7 @@ static struct symbol *evaluate_call(struct expression *expr) if (!sym->op->args(expr)) return NULL; } else { - if (!evaluate_arguments(ctype->arguments, arglist)) + if (!evaluate_arguments(ctype, ctype->arguments, arglist)) return NULL; args = expression_list_size(expr->args); fnargs = symbol_list_size(ctype->arguments); @@ -28,8 +28,17 @@ void evaluate_symbol_list(struct symbol_list *list); /// // evaluate the arguments of a function +// @fn: the symbol of the prototype // @argtypes: the list of the types in the prototype // @args: the list of the effective arguments -int evaluate_arguments(struct symbol_list *argtypes, struct expression_list *args); +int evaluate_arguments(struct symbol *fn, struct symbol_list *argtypes, struct expression_list *args); + +/// +// check if assignment types are compatible +// @target: the type of the right-hand side +// @rp: the expression corresponding to the left-hand side +// @typediff: a pointer for details about the type difference, if any +// @return: a non-zero value of the types are compatible, 0 otherwise +int check_assignment_types(struct symbol *target, struct expression **rp, const char **typediff); #endif @@ -100,6 +100,7 @@ int Wdesignated_init = 1; int Wdo_while = 0; int Wenum_mismatch = 1; int Wexternal_function_has_definition = 1; +int Wformat = 0; int Wimplicit_int = 1; int Winit_cstring = 0; int Wint_to_pointer_cast = 1; @@ -840,6 +841,7 @@ static const struct flag warnings[] = { { "do-while", &Wdo_while }, { "enum-mismatch", &Wenum_mismatch }, { "external-function-has-definition", &Wexternal_function_has_definition }, + { "format", &Wformat }, { "implicit-int", &Wimplicit_int }, { "init-cstring", &Winit_cstring }, { "int-to-pointer-cast", &Wint_to_pointer_cast }, @@ -99,6 +99,7 @@ extern int Wdesignated_init; extern int Wdo_while; extern int Wenum_mismatch; extern int Wexternal_function_has_definition; +extern int Wformat; extern int Wimplicit_int; extern int Winit_cstring; extern int Wint_to_pointer_cast; @@ -257,6 +257,15 @@ Sparse issues these warnings by default. To turn them off, use \fB\-Wno\-external\-function\-has\-definition\fR. . .TP +.B \-Wformat +Warn about parameter mismatch to any variadic function which specifies +where the format string is specified with the +.BI __attribute__((format(type, message, va_start))) +attribute. + +Sparse does not issue these warnings by default. +. +.TP .B \-Winit\-cstring Warn about initialization of a char array with a too long constant C string. diff --git a/validation/varargs-format-addrspace1.c b/validation/varargs-format-addrspace1.c index 99b61196..3370ac67 100644 --- a/validation/varargs-format-addrspace1.c +++ b/validation/varargs-format-addrspace1.c @@ -18,7 +18,6 @@ static void test(void) { /* * check-name: variadic formatting test with address-space to %s * check-command: sparse -Wformat $file - * check-known-to-fail * * check-error-start varargs-format-addrspace1.c:10:26: warning: incorrect type in argument 2 (different address spaces) diff --git a/validation/varargs-format-dyn-prec.c b/validation/varargs-format-dyn-prec.c new file mode 100644 index 00000000..a143f166 --- /dev/null +++ b/validation/varargs-format-dyn-prec.c @@ -0,0 +1,11 @@ +extern void pf(const char *msg, ...) __attribute__((format(printf, 1, 2))); + +static void test(int prec) +{ + pf("%*s\n", prec, "xyz"); +} + +/* + * check-name: variadic formatting test dynamic precision + * check-command: sparse -Wformat $file + */ diff --git a/validation/varargs-format-prefix.c b/validation/varargs-format-prefix.c index 96e66d5d..8e2456e6 100644 --- a/validation/varargs-format-prefix.c +++ b/validation/varargs-format-prefix.c @@ -10,7 +10,6 @@ static int test(void) { /* * check-name: variadic formatting test prefix based __attribute__ * check-command: sparse -Wformat $file - * check-known-to-fail * * check-error-start varargs-format-prefix.c:7:26: warning: incorrect type in argument 2 (different address spaces) diff --git a/validation/varargs-format-tests.c b/validation/varargs-format-tests.c index 4d80a9ce..659bbe94 100644 --- a/validation/varargs-format-tests.c +++ b/validation/varargs-format-tests.c @@ -46,7 +46,6 @@ static void test4(int i, unsigned int u) /* * check-name: variadic formatting tests for width/precisions * check-command: sparse -Wformat $file - * check-known-to-fail * * check-error-start varargs-format-tests.c:10:12: warning: cannot evaluate type '%3$*2$-d' diff --git a/verify-format.c b/verify-format.c new file mode 100644 index 00000000..4b473028 --- /dev/null +++ b/verify-format.c @@ -0,0 +1,504 @@ +/* + * sparse/verify-format.c + * + * Copyright (C) 2019 Codethink Ltd. + * Written by Ben Dooks <ben.dooks@codethink.co.uk> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * Verification code for format-attributes (currently printf) + */ +#include <stdlib.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> + +#include "evaluate.h" +#include "lib.h" +#include "allocate.h" +#include "parse.h" +#include "token.h" +#include "symbol.h" +#include "target.h" +#include "expression.h" +#include "verify-format.h" + +struct format_type { + const char *format; + int (*test)(struct format_type *fmt, + struct expression **expr, + struct symbol *ctype, + struct symbol **target, + const char **typediff); + struct symbol *type; +}; + +struct format_state { + struct expression *expr; + unsigned int first; + unsigned int fmt_index; + unsigned int arg_index; + unsigned int used_position: 1; +}; + +static int printf_fmt_numtype(struct format_type *fmt, + struct expression **expr, + struct symbol *ctype, + struct symbol **target, const char **typediff) +{ + struct symbol *type = fmt->type; + *target = type; + return check_assignment_types(*target, expr, typediff); +} + +static int printf_fmt_string(struct format_type *fmt, + struct expression **expr, + struct symbol *ctype, + struct symbol **target, const char **typediff) +{ + *target = fmt->type; + return check_assignment_types(*target, expr, typediff); +} + +static int printf_fmt_pointer(struct format_type *fmt, + struct expression **expr, + struct symbol *ctype, + struct symbol **target, const char **typediff) +{ + *target = &const_ptr_ctype; + return check_assignment_types(*target, expr, typediff); +} + +static int printf_fmt_print_pointer(struct format_type *fmt, + struct expression **expr, + struct symbol *ctype, + struct symbol **target, + const char **typediff) +{ + // TODO TODO: fix this here!!! + int ret; + *target = &const_ptr_ctype; + ret = check_assignment_types(*target, expr, typediff); + if (ret == 0) { + /* if just printing, ignore address-space mismatches */ + if (strcmp(*typediff, "different address spaces") == 0) + ret = 1; + } + return ret; +} + +static struct format_type printf_fmt_ptr_ref = { + .format = "p", + .test = printf_fmt_pointer, +}; + +static int is_float_spec(char t) +{ + return t == 'f' || t == 'g' || t == 'F' || t == 'G'; +} + +static struct format_type *parse_printf_get_fmt(struct format_type *type, + const char *msg, const char **msgout) +{ + const char *ptr = msg; + int szmod=0; + + type->test = NULL; + *msgout = ptr; + + if (*ptr == 's') { + ptr++; + type->test = printf_fmt_string; + type->type = &const_string_ctype; + } else if (*ptr == 'c') { + ptr++; + type->test = printf_fmt_numtype; + type->type = &char_ctype; + } else if (*ptr == 'p') { + ptr++; + type->test = printf_fmt_print_pointer; + /* check for pointer being printed as hex explicitly */ + if (*ptr == 'x' || *ptr == 'X') { + ptr++; + } else if (isalpha(*ptr)) { + /* probably some extra specifiers after %p */ + ptr++; + type->test = printf_fmt_pointer; + } + } else if (*ptr == 'z') { + // todo - we should construct pointer to int/etc // + + ptr++; + if (*ptr == 'd' || *ptr == 'i') { + ptr++; + type->test = printf_fmt_numtype; + type->type = ssize_t_ctype; + } else if (*ptr == 'u' || *ptr == 'x' || *ptr == 'X' || + *ptr == 'o') { + ptr++; + type->test = printf_fmt_numtype; + type->type = size_t_ctype; + } + } else { + if (*ptr == 'l') { + szmod++; + ptr++; + if (*ptr == 'l') { + szmod++; + ptr++; + } + } else if (*ptr == 'L') { + szmod++; + ptr++; + } else { + if (*ptr == 'h') { // short/char to int + szmod = -1; + ptr++; + if (*ptr == 'h') // promotion from char + ptr++; + } + if (*ptr == 't') { // ptrdiff_t + szmod = 2; + ptr++; + } + if (*ptr == 'j') { // intmax_t + // todo - replace iwth intmax_ctype when added + szmod = 1; + ptr++; + } + } + + if (*ptr == 'x' || *ptr == 'X' || *ptr == 'u' || *ptr == 'o') { + ptr++; + type->test = printf_fmt_numtype; + switch (szmod) { + case -1: + type->type = &ushort_ctype; + break; + case 0: + type->type = &uint_ctype; + break; + case 1: + type->type = &ulong_ctype; + break; + case 2: + type->type = &ullong_ctype; + break; + default: + type->test = NULL; + } + } else if (*ptr == 'i' || *ptr == 'd') { + ptr++; + type->test = printf_fmt_numtype; + switch (szmod) { + case -1: + type->type = &short_ctype; + break; + case 0: + type->type = &int_ctype; + break; + case 1: + type->type = &long_ctype; + break; + case 2: + type->type = &llong_ctype; + break; + default: + type->test = NULL; + } + } else if (*ptr == 'L' && is_float_spec(ptr[1])) { + type->test = printf_fmt_numtype; + type->type = &ldouble_ctype; + ptr += 2; + } else if (is_float_spec(*ptr)) { + type->test = printf_fmt_numtype; + type->type = szmod == 1 ? &ldouble_ctype : &double_ctype; + ptr++; + } else if (*ptr == 's') { + type->test = printf_fmt_string; + type->type = &const_wstring_ctype; + ptr++; + } else if (*ptr == 'n') { /* pointer to an de-referenced int/etc */ + // todo - we should construct pointer to int/etc // + // also should not have any flags or widths for this + type->test = printf_fmt_pointer; + ptr++; + } else { + // anything else here? + } + } + + if (type->test == NULL) + return NULL; + + *msgout = ptr; + return type; +} + +static int is_printf_flag(char ch) +{ + return ch == '0' || ch == '+' || ch == '-' || ch == ' ' || ch == '#'; +} + +static int printf_check_position(const char **fmt) +{ + const char *ptr= *fmt; + + if (!isdigit(*ptr)) + return -1; + while (isdigit(*ptr)) + ptr++; + if (*ptr == '$') { + const char *pos = *fmt; + *fmt = ptr+1; + return strtoul(pos, NULL, 10); + } + return -1; +} + +static void parse_format_printf_checkpos(struct format_state *state, const char *which) +{ + if (state->used_position) { + warning(state->expr->pos, + "format %d: %s: no position specified", + state->arg_index-1, which); + } +} + +static int parse_format_printf_argfield(const char **fmtptr, + struct format_state *state, + struct expression_list *args, + int *pos, const char *which) +{ + struct expression *expr; + struct symbol *ctype; + const char *fmt = *fmtptr; + int argpos = -1; + + /* check for simple digit-string width/precision specifier first */ + if (*fmt != '*') { + while (isdigit(*fmt)) + fmt++; + *fmtptr = fmt; + return 0; + } + + fmt++; + argpos = printf_check_position(&fmt); + + if (argpos > 0) { + argpos += state->first - 1; + state->used_position = 1; + } else { + argpos = (*pos)++; + state->arg_index++; + parse_format_printf_checkpos(state, which); + } + + *fmtptr = fmt; + expr = get_nth_expression(args, argpos-1); + if (!expr) { + warning(state->expr->pos, "%s: no argument at position %d", which, argpos); + return 1; + } + + /* check the value we got was int/uint type */ + ctype = expr->ctype; + if (ctype) { + struct symbol *target = &int_ctype; + + if (ctype->type == SYM_NODE) + ctype = ctype->ctype.base_type; + + if (ctype != &int_ctype && ctype != &uint_ctype) { + warning(expr->pos, "incorrect type for %s argument %d", which, argpos); + info(expr->pos, " expected %s", show_typename(target)); + info(expr->pos, " got %s", show_typename(ctype)); + } + } + + return 0; +} + +/* + * printf format parsing code + * + * this code currently does not: + * - check castable types (such as int vs long vs long long) + * - validate all arguments specified are also used... + */ +static int parse_format_printf(const char **fmtstring, + struct format_state *state, + struct expression_list *args) +{ + struct format_type ftype; /* temp storage for format info */ + struct format_type *type; /* type found from the parse */ + struct expression *expr; + const char *fmt = *fmtstring; /* pointer to parse position */ + const char *fmtpost = NULL; /* moved to end of the parsed format */ + int pos = state->arg_index; /* position of the argument */ + int error = 0; + int ret; + + if (!fmt) { + warning(state->expr->pos, "no format string passed"); + return -1; + } + + /* trivial check for %% */ + fmt++; + if (fmt[0] == '%') { + *fmtstring = fmt+1; + return 0; + } + + state->arg_index++; + state->fmt_index++; + + ret = printf_check_position(&fmt); + if (ret == 0) { + /* we got an invalid position argument */ + error++; + } else if (ret < 0) { + parse_format_printf_checkpos(state, "position"); + } else { + state->used_position = 1; + pos = ret + state->first - 1; + } + + /* get rid of any formatting flag bits */ + while (is_printf_flag(*fmt)) + fmt++; + + /* now there is the posibility of a width specifier */ + if (parse_format_printf_argfield(&fmt, state, args, &pos, "width")) + error++; + + /* now we might have the precision specifier */ + if (*fmt == '.') { + fmt++; + if (parse_format_printf_argfield(&fmt, state, args, &pos, "position")) + error++; + } + + type = parse_printf_get_fmt(&ftype, fmt, &fmtpost); + + if (!type && fmt[0] == 'p') + type = &printf_fmt_ptr_ref; /* probably some extension */ + + if (type) { + struct symbol *ctype, *target = NULL; + const char *typediff = "different types"; + int ret; + + *fmtstring = fmtpost; + expr = get_nth_expression(args, pos-1); + if (!expr) { + /* no argument, but otherwise valid argument string */ + warning(state->expr->pos, "no argument at position '%d'", pos); + return 0; + } + + ctype = expr->ctype; + if (!ctype) + return -3; + + ret = type->test(type, &expr, ctype, &target, &typediff); + if (!target) /* shouldn't happen, but catch anyway */ + return -4; + + if (ret == 0) { + warning(expr->pos, "incorrect type in argument %d (%s)", pos, typediff); + info(expr->pos, " expected %s", show_typename(target)); + info(expr->pos, " got %s", show_typename(ctype)); + } + } else { + /* try and find the end of this format string by looking for a space*/ + fmtpost = *fmtstring; + while (*fmtpost > ' ') + fmtpost++; + warning(state->expr->pos, "cannot evaluate type '%.*s'", + (int)(fmtpost - *fmtstring), *fmtstring); + *fmtstring += 1; + return -1; + } + + return 1; +} + +/* + * attempt to run through a printf format string and work out the types + * it specifies. The format is parsed from the __attribute__(format()) + * in the parser code which stores the positions of the message and arg + * start in the ctype. + */ +void verify_format_attribute(struct symbol *fn, struct expression_list *args) +{ + struct format_state state = { }; + struct expression *expr; + struct expression *init; + const char *fmt_string; + + if (!fn || !Wformat) + return; + if (!fn->ctype.format.index) + return; + + expr = get_nth_expression(args, fn->ctype.format.index-1); + if (!expr) + return; + + if (expr->type != EXPR_SYMBOL || expr->symbol->ident) + return; // not a literal + init = expr->symbol->initializer; + if (!init || init->type != EXPR_STRING) + return; // not a string + fmt_string = init->string->data; + + state.expr = expr; + state.first = fn->ctype.format.first; + state.arg_index = fn->ctype.format.first; + + if (!fmt_string) { + warning(expr->pos, "not a format string?"); + } else { + const char *string = fmt_string; + int fail = 0; + + while (string[0]) { + if (string[0] != '%') { + /* strip anything before the '%' */ + string++; + continue; + } + + if (parse_format_printf(&string, &state, args) < 0) + fail++; + } + + if (fail > 0) + /* format string may have '\n' etc embedded in it */ + warning(expr->pos, "cannot evaluate format string"); + } +} diff --git a/verify-format.h b/verify-format.h new file mode 100644 index 00000000..4a7ef79d --- /dev/null +++ b/verify-format.h @@ -0,0 +1,6 @@ +#ifndef VERIFY_FORMAT_H +#define VERIFY_FORMAT_H + +void verify_format_attribute(struct symbol *fn, struct expression_list *args); + +#endif |