diff options
39 files changed, 1201 insertions, 497 deletions
@@ -8,25 +8,26 @@ .*.swp # generated -version.h +/version.h # programs -c2xml -compile -ctags -example -graph -obfuscate -sparse -sparse-llvm -semind -test-dissect -test-inspect -test-lexing -test-linearize -test-parsing -test-show-type -test-unssa +/c2xml +/compile +/ctags +/example +/graph +/obfuscate +/scheck +/semind +/sparse +/sparse-llvm +/test-dissect +/test-inspect +/test-lexing +/test-linearize +/test-parsing +/test-show-type +/test-unssa # tags tags diff --git a/Documentation/TODO.md b/Documentation/TODO.md index 3f00bb11..a2763895 100644 --- a/Documentation/TODO.md +++ b/Documentation/TODO.md @@ -56,6 +56,17 @@ Optimization IR -- +* pseudos are untyped, it's usually OK but often it complicates things: + + - PSEUDO_REGs are defined by instructions and their type is normally + retrievable via this defining instruction but in some cases they're not: + for example, pseudos defined by ASM output. + - PSEUDO_ARGs are considered as defined by OP_ENTRY and are used like + this for liveness trackability but their type can't simply be + retrieved via this instruction like PSEUDO_REGs are (with ->def->type). + - PSEUDO_VALs are completely typeless. + + Maybe a few bits should be used to store some kind of low-level type. * OP_SET should return a bool, always * add IR instructions for va_arg() & friends * add a possibility to import of file in "IR assembly" @@ -63,7 +63,6 @@ LIB_OBJS += show-parse.o LIB_OBJS += simplify.o LIB_OBJS += sort.o LIB_OBJS += ssa.o -LIB_OBJS += sset.o LIB_OBJS += stats.o LIB_OBJS += storage.o LIB_OBJS += symbol.o @@ -227,6 +226,13 @@ else $(warning Your system does not have llvm, disabling sparse-llvm) endif +ifeq ($(HAVE_BOOLECTOR),yes) +PROGRAMS += scheck +scheck-cflags := -I${BOOLECTORDIR}/include/boolector +scheck-ldflags := -L${BOOLECTORDIR}/lib +scheck-ldlibs := -lboolector -llgl -lbtor2parser +endif + ######################################################################## LIBS := libsparse.a OBJS := $(LIB_OBJS) $(EXTRA_OBJS) $(PROGRAMS:%=%.o) @@ -390,6 +390,69 @@ static struct symbol_op overflow_p_op = { }; +/// +// Evaluate the arguments of 'generic' integer operators. +// +// Parameters with a complete type are used like in a normal prototype. +// The first parameter with a 'dynamic' type will be consider +// as polymorphic and for each calls will be instancied with the type +// of its effective argument. +// The next dynamic parameters will the use this polymorphic type. +// This allows to declare functions with some parameters having +// a type variably defined at call time: +// int foo(int, T, T); +static int evaluate_generic_int_op(struct expression *expr) +{ + struct symbol *fntype = expr->fn->ctype->ctype.base_type; + struct symbol_list *types = NULL; + struct symbol *ctype = NULL; + struct expression *arg; + struct symbol *t; + int n = 0; + + PREPARE_PTR_LIST(fntype->arguments, t); + FOR_EACH_PTR(expr->args, arg) { + n++; + + if (!is_dynamic_type(t)) { + ; + } else if (!ctype) { + // first 'dynamic' type, check that it's an integer + t = arg->ctype; + if (!t) + return 0; + if (t->type == SYM_NODE) + t = t->ctype.base_type; + if (!t) + return 0; + if (t->ctype.base_type != &int_type) + goto err; + + // next 'dynamic' arguments will use this type + ctype = t; + } else { + // use the previous 'dynamic' type + t = ctype; + } + add_ptr_list(&types, t); + NEXT_PTR_LIST(t); + } END_FOR_EACH_PTR(arg); + FINISH_PTR_LIST(t); + return evaluate_arguments(types, expr->args); + +err: + sparse_error(arg->pos, "non-integer type for argument %d:", n); + info(arg->pos, " %s", show_typename(arg->ctype)); + expr->ctype = &bad_ctype; + return 0; +} + +struct symbol_op generic_int_op = { + .args = args_prototype, + .evaluate = evaluate_generic_int_op, +}; + + static int eval_atomic_common(struct expression *expr) { struct symbol *fntype = expr->fn->ctype->ctype.base_type; @@ -559,7 +622,7 @@ static void declare_builtin(int stream, const struct builtin_fn *entry) } } -static void declare_builtins(int stream, const struct builtin_fn tbl[]) +void declare_builtins(int stream, const struct builtin_fn tbl[]) { if (!tbl) return; @@ -12,4 +12,8 @@ struct builtin_fn { struct symbol_op *op; }; +void declare_builtins(int stream, const struct builtin_fn tbl[]); + +extern struct symbol_op generic_int_op; + #endif @@ -298,14 +298,6 @@ static inline void remove_instruction(struct instruction_list **list, struct ins delete_ptr_list_entry((struct ptr_list **)list, insn, count); } -static void add_instruction_to_end(struct instruction *insn, struct basic_block *bb) -{ - struct instruction *br = delete_last_instruction(&bb->insns); - insn->bb = bb; - add_instruction(&bb->insns, insn); - add_instruction(&bb->insns, br); -} - static struct instruction * try_to_cse(struct entrypoint *ep, struct instruction *i1, struct instruction *i2) { struct basic_block *b1, *b2, *common; @@ -343,7 +335,7 @@ static struct instruction * try_to_cse(struct entrypoint *ep, struct instruction if (common) { i1 = cse_one_instruction(i2, i1); remove_instruction(&b1->insns, i1, 1); - add_instruction_to_end(i1, common); + insert_last_instruction(common, i1); } else { i1 = i2; } @@ -226,12 +226,6 @@ static struct symbol *bigger_int_type(struct symbol *left, struct symbol *right) return utype; } -static int same_cast_type(struct symbol *orig, struct symbol *new) -{ - return orig->bit_size == new->bit_size && - orig->bit_offset == new->bit_offset; -} - static struct symbol *base_type(struct symbol *node, unsigned long *modp, struct ident **asp) { unsigned long mod = 0; @@ -316,10 +310,7 @@ static struct symbol *cast_to_bool(struct expression *expr); /* * This gets called for implicit casts in assignments and - * integer promotion. We often want to try to move the - * cast down, because the ops involved may have been - * implicitly cast up, and we can get rid of the casts - * early. + * integer promotion. */ static struct expression * cast_to(struct expression *old, struct symbol *type) { @@ -330,39 +321,6 @@ static struct expression * cast_to(struct expression *old, struct symbol *type) if (old->ctype != &null_ctype && is_same_type(old, type)) return old; - /* - * See if we can simplify the op. Move the cast down. - */ - switch (old->type) { - case EXPR_PREOP: - if (old->ctype->bit_size < type->bit_size) - break; - if (old->op == '~') { - old->ctype = type; - old->unop = cast_to(old->unop, type); - return old; - } - break; - - case EXPR_IMPLIED_CAST: - warn_for_different_enum_types(old->pos, old->ctype, type); - - if (old->ctype->bit_size >= type->bit_size) { - struct expression *orig = old->cast_expression; - if (same_cast_type(orig->ctype, type)) - return orig; - if (old->ctype->bit_offset == type->bit_offset) { - old->ctype = type; - old->cast_type = type; - return old; - } - } - break; - - default: - /* nothing */; - } - expr = alloc_expression(old->pos, EXPR_IMPLIED_CAST); expr->ctype = type; expr->cast_type = type; @@ -19,10 +19,69 @@ #include "simplify.h" #include "flow.h" #include "target.h" -#include "flowgraph.h" unsigned long bb_generation; +/// +// remove phi-sources from a removed edge +// +// :note: It's possible to have several edges between the same BBs. +// It's common with switches but it's also possible with branches. +// This function will only remove a single phi-source per edge. +int remove_phisources(struct basic_block *par, struct basic_block *old) +{ + struct instruction *insn; + int changed = 0; + + FOR_EACH_PTR(old->insns, insn) { + pseudo_t phi; + + if (!insn->bb) + continue; + if (insn->opcode != OP_PHI) + return changed; + + // found a phi-node in the target BB, + // now look after its phi-sources. + FOR_EACH_PTR(insn->phi_list, phi) { + struct instruction *phisrc = phi->def; + + if (phi == VOID) + continue; + assert(phisrc->phi_node == insn); + if (phisrc->bb != par) + continue; + // found a phi-source corresponding to this edge: + // remove it but avoid the recursive killing. + REPLACE_CURRENT_PTR(phi, VOID); + remove_use(&phisrc->src); + phisrc->bb = NULL; + changed |= REPEAT_CSE; + // Only the first one must be removed. + goto next; + } END_FOR_EACH_PTR(phi); +next: ; + } END_FOR_EACH_PTR(insn); + return changed; +} + +/// +// remove all phisources but the one corresponding to the given target +static int remove_other_phisources(struct basic_block *bb, struct multijmp_list *list, struct basic_block *target) +{ + struct multijmp *jmp; + int changed = 0; + + FOR_EACH_PTR(list, jmp) { + if (jmp->target == target) { + target = NULL; + continue; + } + changed |= remove_phisources(bb, jmp->target); + } END_FOR_EACH_PTR(jmp); + return changed; +} + /* * Dammit, if we have a phi-node followed by a conditional * branch on that phi-node, we should damn well be able to @@ -69,34 +128,6 @@ static int pseudo_truth_value(pseudo_t pseudo) } } -/// -// check if the BB is empty or only contains phi-sources -static int bb_is_trivial(struct basic_block *bb) -{ - struct instruction *insn; - int n = 0; - - FOR_EACH_PTR(bb->insns, insn) { - if (!insn->bb) - continue; - switch (insn->opcode) { - case OP_TERMINATOR ... OP_TERMINATOR_END: - return n ? -1 : 1; - case OP_NOP: - case OP_INLINED_CALL: - continue; - case OP_PHISOURCE: - n++; - continue; - default: - goto out; - } - } END_FOR_EACH_PTR(insn); - -out: - return 0; -} - /* * Does a basic block depend on the pseudos that "src" defines? */ @@ -159,78 +190,24 @@ out: } /// -// do jump threading in dominated BBs -// @dom: the BB which must dominate the modified BBs. -// @old: the old target BB -// @new: the new target BB -// @return: 0 if no chnages have been made, 1 otherwise. -// -// In all BB dominated by @dom, rewrite branches to @old into branches to @new -static int retarget_bb(struct basic_block *dom, struct basic_block *old, struct basic_block *new) +// check if the sources of a phi-node match with the parent BBs +static bool phi_check(struct instruction *node) { struct basic_block *bb; - int changed = 0; - - if (new == old) - return 0; - -restart: - FOR_EACH_PTR(old->parents, bb) { - struct instruction *last; - struct multijmp *jmp; + pseudo_t phi; - if (!domtree_dominates(dom, bb)) + PREPARE_PTR_LIST(node->bb->parents, bb); + FOR_EACH_PTR(node->phi_list, phi) { + if (phi == VOID || !phi->def) continue; - last = last_instruction(bb->insns); - switch (last->opcode) { - case OP_BR: - changed |= rewrite_branch(bb, &last->bb_true, old, new); - break; - case OP_CBR: - changed |= rewrite_branch(bb, &last->bb_true, old, new); - changed |= rewrite_branch(bb, &last->bb_false, old, new); - break; - case OP_SWITCH: - case OP_COMPUTEDGOTO: - FOR_EACH_PTR(last->multijmp_list, jmp) { - changed |= rewrite_branch(bb, &jmp->target, old, new); - } END_FOR_EACH_PTR(jmp); - break; - default: - continue; - } - - // since rewrite_branch() will modify old->parents() the list - // iteration won't work correctly. Several solution exist for - // this but in this case the simplest is to restart the loop. - goto restart; - } END_FOR_EACH_PTR(bb); - return changed; -} - -static int simplify_cbr_cbr(struct instruction *insn) -{ - struct instruction *last; - struct basic_block *bot = insn->bb; - struct basic_block *top = bot->idom; - int changed = 0; - int trivial; - - if (!top) - return 0; - - trivial = bb_is_trivial(bot); - if (trivial == 0) - return 0; - if (trivial < 0) - return 0; - last = last_instruction(top->insns); - if (last->opcode != OP_CBR || last->cond != insn->cond) - return 0; - - changed |= retarget_bb(last->bb_true , bot, insn->bb_true); - changed |= retarget_bb(last->bb_false, bot, insn->bb_false); - return changed; + if (phi->def->bb != bb) + return false; + NEXT_PTR_LIST(bb); + } END_FOR_EACH_PTR(phi); + if (bb) + return false; + FINISH_PTR_LIST(bb); + return true; } /* @@ -255,7 +232,7 @@ static int try_to_simplify_bb(struct basic_block *bb, struct instruction *first, * simplify_symbol_usage()/conversion to SSA form. * No sane simplification can be done when we have this. */ - bogus = bb_list_size(bb->parents) != pseudo_list_size(first->phi_list); + bogus = !phi_check(first); FOR_EACH_PTR(first->phi_list, phi) { struct instruction *def = phi->def; @@ -372,7 +349,7 @@ try_to_rewrite_target: */ if (bb_list_size(target->parents) != 1) return retval; - insert_branch(target, insn, final); + convert_to_jump(insn, final); return 1; } @@ -380,8 +357,6 @@ static int simplify_one_branch(struct basic_block *bb, struct instruction *br) { if (simplify_phi_branch(bb, br)) return 1; - if (simplify_cbr_cbr(br)) - return 1; return simplify_branch_branch(bb, br, &br->bb_true, 1) | simplify_branch_branch(bb, br, &br->bb_false, 0); } @@ -469,7 +444,7 @@ static inline int distinct_symbols(pseudo_t a, pseudo_t b) * * Return 0 if it doesn't, and -1 if you don't know. */ -int dominates(pseudo_t pseudo, struct instruction *insn, struct instruction *dom, int local) +int dominates(struct instruction *insn, struct instruction *dom, int local) { switch (dom->opcode) { case OP_CALL: case OP_ENTRY: @@ -486,7 +461,7 @@ int dominates(pseudo_t pseudo, struct instruction *insn, struct instruction *dom return 0; } - if (dom->src != pseudo) { + if (dom->src != insn->src) { if (local) return 0; /* We don't think two explicitly different symbols ever alias */ @@ -815,6 +790,43 @@ void vrfy_flow(struct entrypoint *ep) assert(!entry); } +/// +// change a switch or a conditional branch into a branch +int convert_to_jump(struct instruction *insn, struct basic_block *target) +{ + struct basic_block *bb = insn->bb; + struct basic_block *child; + int changed = REPEAT_CSE; + + switch (insn->opcode) { + case OP_CBR: + changed |= remove_phisources(insn->bb, insn->bb_true == target ? insn->bb_false : insn->bb_true); + break; + case OP_SWITCH: + changed |= remove_other_phisources(insn->bb, insn->multijmp_list, target); + break; + } + kill_use(&insn->cond); + insn->bb_true = target; + insn->bb_false = NULL; + insn->cond = NULL; + insn->size = 0; + insn->opcode = OP_BR; + + FOR_EACH_PTR(bb->children, child) { + if (child == target) { + target = NULL; // leave first occurence + continue; + } + DELETE_CURRENT_PTR(child); + remove_bb_from_list(&child->parents, bb, 1); + changed |= REPEAT_CFG_CLEANUP; + } END_FOR_EACH_PTR(child); + PACK_PTR_LIST(&bb->children); + repeat_phase |= changed; + return changed; +} + static int retarget_parents(struct basic_block *bb, struct basic_block *target) { struct basic_block *parent; @@ -831,21 +843,26 @@ static int retarget_parents(struct basic_block *bb, struct basic_block *target) return REPEAT_CFG_CLEANUP; } -static void remove_merging_phisrc(struct basic_block *top, struct instruction *insn) +static void remove_merging_phisrc(struct instruction *insn, struct basic_block *bot) { - struct instruction *user = insn->phi_node; + struct instruction *node = insn->phi_node; pseudo_t phi; - FOR_EACH_PTR(user->phi_list, phi) { + if (!node) { + kill_instruction(insn); + return; + } + + FOR_EACH_PTR(node->phi_list, phi) { struct instruction *phisrc; if (phi == VOID) continue; phisrc = phi->def; - if (phisrc->bb != top) - continue; - REPLACE_CURRENT_PTR(phi, VOID); - kill_instruction(phisrc); + if (phisrc->bb == bot) { + kill_instruction(insn); + return; + } } END_FOR_EACH_PTR(phi); } @@ -889,6 +906,14 @@ static int merge_bb(struct basic_block *top, struct basic_block *bot) replace_bb_in_list(&bb->parents, bot, top, 1); } END_FOR_EACH_PTR(bb); + FOR_EACH_PTR(top->insns, insn) { + if (!insn->bb) + continue; + if (insn->opcode != OP_PHISOURCE) + continue; + remove_merging_phisrc(insn, bot); + } END_FOR_EACH_PTR(insn); + kill_instruction(delete_last_instruction(&top->insns)); FOR_EACH_PTR(bot->insns, insn) { if (!insn->bb) @@ -898,9 +923,6 @@ static int merge_bb(struct basic_block *top, struct basic_block *bot) case OP_PHI: remove_merging_phi(top, insn); continue; - case OP_PHISOURCE: - remove_merging_phisrc(top, insn); - break; } insn->bb = top; add_instruction(&top->insns, insn); @@ -11,6 +11,8 @@ extern unsigned long bb_generation; struct entrypoint; struct instruction; +extern int remove_phisources(struct basic_block *par, struct basic_block *old); + extern int simplify_flow(struct entrypoint *ep); extern void kill_dead_stores(struct entrypoint *ep, pseudo_t addr, int local); @@ -18,6 +20,7 @@ extern void simplify_symbol_usage(struct entrypoint *ep); extern void simplify_memops(struct entrypoint *ep); extern void pack_basic_blocks(struct entrypoint *ep); extern int simplify_cfg_early(struct entrypoint *ep); +extern int convert_to_jump(struct instruction *insn, struct basic_block *target); extern void convert_instruction_target(struct instruction *insn, pseudo_t src); extern void remove_dead_insns(struct entrypoint *); @@ -38,7 +41,7 @@ static inline int kill_instruction_force(struct instruction *insn) } void check_access(struct instruction *insn); -int dominates(pseudo_t pseudo, struct instruction *insn, struct instruction *dom, int local); +int dominates(struct instruction *insn, struct instruction *dom, int local); extern void vrfy_flow(struct entrypoint *ep); extern int pseudo_in_list(struct pseudo_list *list, pseudo_t pseudo); diff --git a/ident-list.h b/ident-list.h index 8049b694..3c08e8ca 100644 --- a/ident-list.h +++ b/ident-list.h @@ -78,6 +78,12 @@ IDENT(memset); IDENT(memcpy); IDENT(copy_to_user); IDENT(copy_from_user); IDENT(main); +/* used by the symbolic checker */ +IDENT(__assume); +IDENT(__assert); +IDENT(__assert_eq); +IDENT(__assert_const); + #undef __IDENT #undef IDENT #undef IDENT_RESERVED diff --git a/linearize.c b/linearize.c index 7248fa56..d9aed61b 100644 --- a/linearize.c +++ b/linearize.c @@ -692,52 +692,12 @@ static void set_activeblock(struct entrypoint *ep, struct basic_block *bb) add_bb(&ep->bbs, bb); } -static void remove_parent(struct basic_block *child, struct basic_block *parent) -{ - remove_bb_from_list(&child->parents, parent, 1); - if (!child->parents) - repeat_phase |= REPEAT_CFG_CLEANUP; -} - -/* Change a "switch" or a conditional branch into a branch */ -void insert_branch(struct basic_block *bb, struct instruction *jmp, struct basic_block *target) -{ - struct instruction *br, *old; - struct basic_block *child; - - /* Remove the switch */ - old = delete_last_instruction(&bb->insns); - assert(old == jmp); - kill_instruction(old); - - br = alloc_instruction(OP_BR, 0); - br->bb = bb; - br->bb_true = target; - add_instruction(&bb->insns, br); - - FOR_EACH_PTR(bb->children, child) { - if (child == target) { - target = NULL; /* Trigger just once */ - continue; - } - DELETE_CURRENT_PTR(child); - remove_parent(child, bb); - } END_FOR_EACH_PTR(child); - PACK_PTR_LIST(&bb->children); - repeat_phase |= REPEAT_CFG_CLEANUP; -} - - void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi_node, pseudo_t if_true, pseudo_t if_false) { pseudo_t target; struct instruction *select; - /* Remove the 'br' */ - delete_last_instruction(&bb->insns); - select = alloc_typed_instruction(OP_SEL, phi_node->type); - select->bb = bb; assert(br->cond); use_pseudo(select, br->cond, &select->src1); @@ -750,8 +710,7 @@ void insert_select(struct basic_block *bb, struct instruction *br, struct instru use_pseudo(select, if_true, &select->src2); use_pseudo(select, if_false, &select->src3); - add_instruction(&bb->insns, select); - add_instruction(&bb->insns, br); + insert_last_instruction(bb, select); } static inline int bb_empty(struct basic_block *bb) @@ -1533,7 +1492,7 @@ static pseudo_t linearize_assignment(struct entrypoint *ep, struct expression *e static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expression *expr) { struct expression *arg, *fn; - struct instruction *insn = alloc_typed_instruction(OP_CALL, expr->ctype); + struct instruction *insn; pseudo_t retval, call; struct ctype *ctype = NULL; struct symbol *fntype; @@ -1554,6 +1513,7 @@ static pseudo_t linearize_call_expression(struct entrypoint *ep, struct expressi ctype = &fntype->ctype; + insn = alloc_typed_instruction(OP_CALL, expr->ctype); add_symbol(&insn->fntypes, fntype); FOR_EACH_PTR(expr->args, arg) { pseudo_t new = linearize_expression(ep, arg); @@ -1749,10 +1709,9 @@ static void insert_phis(struct basic_block *bb, pseudo_t src, struct symbol *cty struct basic_block *parent; FOR_EACH_PTR(bb->parents, parent) { - struct instruction *br = delete_last_instruction(&parent->insns); - pseudo_t phi = alloc_phi(parent, src, ctype); - add_instruction(&parent->insns, br); - link_phi(node, phi); + struct instruction *phisrc = alloc_phisrc(src, ctype); + insert_last_instruction(parent, phisrc); + link_phi(node, phisrc->target); } END_FOR_EACH_PTR(parent); } diff --git a/linearize.h b/linearize.h index 65f54c28..cac6082f 100644 --- a/linearize.h +++ b/linearize.h @@ -18,7 +18,7 @@ struct pseudo_user { DECLARE_ALLOCATOR(pseudo_user); DECLARE_PTR_LIST(pseudo_user_list, struct pseudo_user); -DECLARE_PTRMAP(phi_map, struct symbol *, pseudo_t); +DECLARE_PTRMAP(phi_map, struct symbol *, struct instruction *); enum pseudo_type { @@ -200,6 +200,14 @@ static inline void add_instruction(struct instruction_list **list, struct instru add_ptr_list(list, insn); } +static inline void insert_last_instruction(struct basic_block *bb, struct instruction *insn) +{ + struct instruction *last = delete_last_instruction(&bb->insns); + add_instruction(&bb->insns, insn); + add_instruction(&bb->insns, last); + insn->bb = bb; +} + static inline void add_multijmp(struct multijmp_list **list, struct multijmp *multijmp) { add_ptr_list(list, multijmp); @@ -324,7 +332,6 @@ struct entrypoint { }; extern void insert_select(struct basic_block *bb, struct instruction *br, struct instruction *phi, pseudo_t if_true, pseudo_t if_false); -extern void insert_branch(struct basic_block *bb, struct instruction *br, struct basic_block *target); struct instruction *alloc_phisrc(pseudo_t pseudo, struct symbol *type); struct instruction *alloc_phi_node(struct basic_block *bb, struct symbol *type, struct ident *ident); @@ -17,23 +17,20 @@ #include "simplify.h" #include "flow.h" -/* - * We should probably sort the phi list just to make it easier to compare - * later for equality. - */ static void rewrite_load_instruction(struct instruction *insn, struct pseudo_list *dominators) { - pseudo_t new, phi; + pseudo_t new = NULL; + pseudo_t phi; /* * Check for somewhat common case of duplicate * phi nodes. */ - new = first_pseudo(dominators)->def->phi_src; FOR_EACH_PTR(dominators, phi) { - if (new != phi->def->phi_src) + if (!new) + new = phi->def->phi_src; + else if (new != phi->def->phi_src) goto complex_phi; - new->ident = new->ident ? : phi->ident; } END_FOR_EACH_PTR(phi); /* @@ -48,9 +45,7 @@ static void rewrite_load_instruction(struct instruction *insn, struct pseudo_lis goto end; complex_phi: - /* We leave symbol pseudos with a bogus usage list here */ - if (insn->src->type != PSEUDO_SYM) - kill_use(&insn->src); + kill_use(&insn->src); insn->opcode = OP_PHI; insn->phi_list = dominators; @@ -58,15 +53,15 @@ end: repeat_phase |= REPEAT_CSE; } -static int find_dominating_parents(pseudo_t pseudo, struct instruction *insn, - struct basic_block *bb, unsigned long generation, struct pseudo_list **dominators, +static int find_dominating_parents(struct instruction *insn, + struct basic_block *bb, struct pseudo_list **dominators, int local) { struct basic_block *parent; FOR_EACH_PTR(bb->parents, parent) { + struct instruction *phisrc; struct instruction *one; - struct instruction *br; pseudo_t phi; FOR_EACH_PTR_REVERSE(parent->insns, one) { @@ -75,7 +70,7 @@ static int find_dominating_parents(pseudo_t pseudo, struct instruction *insn, continue; if (one == insn) goto no_dominance; - dominance = dominates(pseudo, insn, one, local); + dominance = dominates(insn, one, local); if (dominance < 0) { if (one->opcode == OP_LOAD) continue; @@ -86,21 +81,21 @@ static int find_dominating_parents(pseudo_t pseudo, struct instruction *insn, goto found_dominator; } END_FOR_EACH_PTR_REVERSE(one); no_dominance: - if (parent->generation == generation) + if (parent->generation == bb->generation) continue; - parent->generation = generation; + parent->generation = bb->generation; - if (!find_dominating_parents(pseudo, insn, parent, generation, dominators, local)) + if (!find_dominating_parents(insn, parent, dominators, local)) return 0; continue; found_dominator: - br = delete_last_instruction(&parent->insns); - phi = alloc_phi(parent, one->target, one->type); + phisrc = alloc_phisrc(one->target, one->type); + phisrc->phi_node = insn; + insert_last_instruction(parent, phisrc); + phi = phisrc->target; phi->ident = phi->ident ? : one->target->ident; - add_instruction(&parent->insns, br); use_pseudo(insn, phi, add_pseudo(dominators, phi)); - phi->def->phi_node = insn; } END_FOR_EACH_PTR(parent); return 1; } @@ -146,7 +141,6 @@ static void simplify_loads(struct basic_block *bb) pseudo_t pseudo = insn->src; int local = local_pseudo(pseudo); struct pseudo_list *dominators; - unsigned long generation; if (insn->is_volatile) continue; @@ -160,7 +154,7 @@ static void simplify_loads(struct basic_block *bb) int dominance; if (!dom->bb) continue; - dominance = dominates(pseudo, insn, dom, local); + dominance = dominates(insn, dom, local); if (dominance) { /* possible partial dominance? */ if (dominance < 0) { @@ -177,10 +171,9 @@ static void simplify_loads(struct basic_block *bb) } END_FOR_EACH_PTR_REVERSE(dom); /* OK, go find the parents */ - generation = ++bb_generation; - bb->generation = generation; + bb->generation = ++bb_generation; dominators = NULL; - if (find_dominating_parents(pseudo, insn, bb, generation, &dominators, local)) { + if (find_dominating_parents(insn, bb, &dominators, local)) { /* This happens with initial assignments to structures etc.. */ if (!dominators) { if (local) { @@ -204,6 +197,30 @@ next_load: } END_FOR_EACH_PTR_REVERSE(insn); } +static bool try_to_kill_store(struct instruction *insn, + struct instruction *dom, int local) +{ + int dominance = dominates(insn, dom, local); + + if (dominance) { + /* possible partial dominance? */ + if (dominance < 0) + return false; + if (insn->target == dom->target && insn->bb == dom->bb) { + // found a memop which makes the store redundant + kill_instruction_force(insn); + return false; + } + if (dom->opcode == OP_LOAD) + return false; + if (dom->is_volatile) + return false; + /* Yeehaa! Found one! */ + kill_instruction_force(dom); + } + return true; +} + static void kill_dominated_stores(struct basic_block *bb) { struct instruction *insn; @@ -212,6 +229,7 @@ static void kill_dominated_stores(struct basic_block *bb) if (!insn->bb) continue; if (insn->opcode == OP_STORE) { + struct basic_block *par; struct instruction *dom; pseudo_t pseudo = insn->src; int local; @@ -223,22 +241,28 @@ static void kill_dominated_stores(struct basic_block *bb) local = local_pseudo(pseudo); RECURSE_PTR_REVERSE(insn, dom) { - int dominance; if (!dom->bb) continue; - dominance = dominates(pseudo, insn, dom, local); - if (dominance) { - /* possible partial dominance? */ - if (dominance < 0) - goto next_store; - if (dom->opcode == OP_LOAD) - goto next_store; - /* Yeehaa! Found one! */ - kill_instruction_force(dom); - } + if (!try_to_kill_store(insn, dom, local)) + goto next_store; } END_FOR_EACH_PTR_REVERSE(dom); /* OK, we should check the parents now */ + FOR_EACH_PTR(bb->parents, par) { + + if (bb_list_size(par->children) != 1) + goto next_parent; + FOR_EACH_PTR(par->insns, dom) { + if (!dom->bb) + continue; + if (dom == insn) + goto next_parent; + if (!try_to_kill_store(insn, dom, local)) + goto next_parent; + } END_FOR_EACH_PTR(dom); +next_parent: + ; + } END_FOR_EACH_PTR(par); } next_store: /* Do the next one */; @@ -1653,7 +1653,7 @@ static bool match_attribute(struct token *token) if (token_type(token) != TOKEN_IDENT) return false; sym = lookup_keyword(token->ident, NS_TYPEDEF); - if (!sym) + if (!sym || !sym->op) return false; return sym->op->type & KW_ATTRIBUTE; } diff --git a/scheck.c b/scheck.c new file mode 100644 index 00000000..754fe76f --- /dev/null +++ b/scheck.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: MIT +// Copyright (C) 2021 Luc Van Oostenryck + +/// +// Symbolic checker for Sparse's IR +// -------------------------------- +// +// This is an example client program with a dual purpose: +// # It shows how to translate Sparse's IR into the language +// of SMT solvers (only the part concerning integers, +// floating-point and memory is ignored). +// # It's used as a simple symbolic checker for the IR. +// The idea is to create a mini-language that allows to +// express some assertions with some pre-conditions. + +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> +#include <fcntl.h> + +#include <boolector.h> +#include "lib.h" +#include "expression.h" +#include "linearize.h" +#include "symbol.h" +#include "builtin.h" + + +#define dyntype incomplete_ctype +static const struct builtin_fn builtins_scheck[] = { + { "__assume", &void_ctype, 0, { &dyntype }, .op = &generic_int_op }, + { "__assert", &void_ctype, 0, { &bool_ctype }, .op = &generic_int_op }, + { "__assert_eq", &void_ctype, 0, { &dyntype, &dyntype }, .op = &generic_int_op }, + { "__assert_const", &void_ctype, 0, { &dyntype, &dyntype }, .op = &generic_int_op }, + {} +}; + + +static BoolectorSort get_sort(Btor *btor, struct symbol *type, struct position pos) +{ + if (!is_int_type(type)) { + sparse_error(pos, "invalid type"); + return NULL; + } + return boolector_bitvec_sort(btor, type->bit_size); +} + +static BoolectorNode *mkvar(Btor *btor, BoolectorSort s, pseudo_t pseudo) +{ + static char buff[33]; + BoolectorNode *n; + + if (pseudo->priv) + return pseudo->priv; + + switch (pseudo->type) { + case PSEUDO_VAL: + sprintf(buff, "%llx", pseudo->value); + return boolector_consth(btor, s, buff); + case PSEUDO_ARG: + case PSEUDO_REG: + n = boolector_var(btor, s, show_pseudo(pseudo)); + break; + default: + fprintf(stderr, "invalid pseudo: %s\n", show_pseudo(pseudo)); + return NULL; + } + return pseudo->priv = n; +} + +static BoolectorNode *get_arg(Btor *btor, struct instruction *insn, int idx) +{ + pseudo_t arg = ptr_list_nth(insn->arguments, idx); + struct symbol *type = ptr_list_nth(insn->fntypes, idx + 1); + BoolectorSort s = get_sort(btor, type, insn->pos); + + return mkvar(btor, s, arg); +} + +static BoolectorNode *zext(Btor *btor, struct instruction *insn, BoolectorNode *s) +{ + int old = boolector_get_width(btor, s); + int new = insn->type->bit_size; + return boolector_uext(btor, s, new - old); +} + +static BoolectorNode *sext(Btor *btor, struct instruction *insn, BoolectorNode *s) +{ + int old = boolector_get_width(btor, s); + int new = insn->type->bit_size; + return boolector_sext(btor, s, new - old); +} + +static BoolectorNode *slice(Btor *btor, struct instruction *insn, BoolectorNode *s) +{ + int old = boolector_get_width(btor, s); + int new = insn->type->bit_size; + return boolector_slice(btor, s, old - new - 1, 0); +} + +static void binary(Btor *btor, BoolectorSort s, struct instruction *insn) +{ + BoolectorNode *t, *a, *b; + + a = mkvar(btor, s, insn->src1); + b = mkvar(btor, s, insn->src2); + if (!a || !b) + return; + switch (insn->opcode) { + case OP_ADD: t = boolector_add(btor, a, b); break; + case OP_SUB: t = boolector_sub(btor, a, b); break; + case OP_MUL: t = boolector_mul(btor, a, b); break; + case OP_AND: t = boolector_and(btor, a, b); break; + case OP_OR: t = boolector_or (btor, a, b); break; + case OP_XOR: t = boolector_xor(btor, a, b); break; + case OP_SHL: t = boolector_sll(btor, a, b); break; + case OP_LSR: t = boolector_srl(btor, a, b); break; + case OP_ASR: t = boolector_sra(btor, a, b); break; + case OP_DIVS: t = boolector_sdiv(btor, a, b); break; + case OP_DIVU: t = boolector_udiv(btor, a, b); break; + case OP_MODS: t = boolector_srem(btor, a, b); break; + case OP_MODU: t = boolector_urem(btor, a, b); break; + case OP_SET_EQ: t = zext(btor, insn, boolector_eq(btor, a, b)); break; + case OP_SET_NE: t = zext(btor, insn, boolector_ne(btor, a, b)); break; + case OP_SET_LT: t = zext(btor, insn, boolector_slt(btor, a, b)); break; + case OP_SET_LE: t = zext(btor, insn, boolector_slte(btor, a, b)); break; + case OP_SET_GE: t = zext(btor, insn, boolector_sgte(btor, a, b)); break; + case OP_SET_GT: t = zext(btor, insn, boolector_sgt(btor, a, b)); break; + case OP_SET_B: t = zext(btor, insn, boolector_ult(btor, a, b)); break; + case OP_SET_BE: t = zext(btor, insn, boolector_ulte(btor, a, b)); break; + case OP_SET_AE: t = zext(btor, insn, boolector_ugte(btor, a, b)); break; + case OP_SET_A: t = zext(btor, insn, boolector_ugt(btor, a, b)); break; + default: + fprintf(stderr, "unsupported insn\n"); + return; + } + insn->target->priv = t; +} + +static void binop(Btor *btor, struct instruction *insn) +{ + BoolectorSort s = get_sort(btor, insn->type, insn->pos); + binary(btor, s, insn); +} + +static void icmp(Btor *btor, struct instruction *insn) +{ + BoolectorSort s = get_sort(btor, insn->itype, insn->pos); + binary(btor, s, insn); +} + +static void unop(Btor *btor, struct instruction *insn) +{ + BoolectorSort s = get_sort(btor, insn->type, insn->pos); + BoolectorNode *t, *a; + + a = mkvar(btor, s, insn->src1); + if (!a) + return; + switch (insn->opcode) { + case OP_NEG: t = boolector_neg(btor, a); break; + case OP_NOT: t = boolector_not(btor, a); break; + case OP_SEXT: t = sext(btor, insn, a); break; + case OP_ZEXT: t = zext(btor, insn, a); break; + case OP_TRUNC: t = slice(btor, insn, a); break; + default: + fprintf(stderr, "unsupported insn\n"); + return; + } + insn->target->priv = t; +} + +static void ternop(Btor *btor, struct instruction *insn) +{ + BoolectorSort s = get_sort(btor, insn->type, insn->pos); + BoolectorNode *t, *a, *b, *c, *z, *d; + + a = mkvar(btor, s, insn->src1); + b = mkvar(btor, s, insn->src2); + c = mkvar(btor, s, insn->src3); + if (!a || !b || !c) + return; + switch (insn->opcode) { + case OP_SEL: + z = boolector_zero(btor, s); + d = boolector_ne(btor, a, z); + t = boolector_cond(btor, d, b, c); + break; + default: + fprintf(stderr, "unsupported insn\n"); + return; + } + insn->target->priv = t; +} + +static bool add_precondition(Btor *btor, BoolectorNode **pre, struct instruction *insn) +{ + BoolectorNode *a = get_arg(btor, insn, 0); + BoolectorNode *z = boolector_zero(btor, boolector_get_sort(btor, a)); + BoolectorNode *n = boolector_ne(btor, a, z); + BoolectorNode *p = boolector_and(btor, *pre, n); + *pre = p; + return true; +} + +static bool check_btor(Btor *btor, BoolectorNode *p, BoolectorNode *n, struct instruction *insn) +{ + char model_format[] = "btor"; + int res; + + n = boolector_implies(btor, p, n); + boolector_assert(btor, boolector_not(btor, n)); + res = boolector_sat(btor); + switch (res) { + case BOOLECTOR_UNSAT: + return 1; + case BOOLECTOR_SAT: + sparse_error(insn->pos, "assertion failed"); + show_entry(insn->bb->ep); + boolector_dump_btor(btor, stdout); + boolector_print_model(btor, model_format, stdout); + break; + default: + sparse_error(insn->pos, "SMT failure"); + break; + } + return 0; +} + +static bool check_assert(Btor *btor, BoolectorNode *pre, struct instruction *insn) +{ + BoolectorNode *a = get_arg(btor, insn, 0); + BoolectorNode *z = boolector_zero(btor, boolector_get_sort(btor, a)); + BoolectorNode *n = boolector_ne(btor, a, z); + return check_btor(btor, pre, n, insn); +} + +static bool check_equal(Btor *btor, BoolectorNode *pre, struct instruction *insn) +{ + BoolectorNode *a = get_arg(btor, insn, 0); + BoolectorNode *b = get_arg(btor, insn, 1); + BoolectorNode *n = boolector_eq(btor, a, b); + return check_btor(btor, pre, n, insn); +} + +static bool check_const(Btor *ctxt, struct instruction *insn) +{ + pseudo_t src1 = ptr_list_nth(insn->arguments, 0); + pseudo_t src2 = ptr_list_nth(insn->arguments, 1); + + if (src2->type != PSEUDO_VAL) + sparse_error(insn->pos, "should be a constant: %s", show_pseudo(src2)); + if (src1 == src2) + return 1; + if (src1->type != PSEUDO_VAL) + sparse_error(insn->pos, "not a constant: %s", show_pseudo(src1)); + else + sparse_error(insn->pos, "invalid value: %s != %s", show_pseudo(src1), show_pseudo(src2)); + return 0; +} + +static bool check_call(Btor *btor, BoolectorNode **pre, struct instruction *insn) +{ + pseudo_t func = insn->func; + struct ident *ident = func->ident; + + if (ident == &__assume_ident) + return add_precondition(btor, pre, insn); + if (ident == &__assert_ident) + return check_assert(btor, *pre, insn); + if (ident == &__assert_eq_ident) + return check_equal(btor, *pre, insn); + if (ident == &__assert_const_ident) + return check_const(btor, insn); + return 0; +} + +static bool check_function(struct entrypoint *ep) +{ + Btor *btor = boolector_new(); + BoolectorNode *pre = boolector_true(btor); + struct basic_block *bb; + int rc = 0; + + boolector_set_opt(btor, BTOR_OPT_MODEL_GEN, 1); + boolector_set_opt(btor, BTOR_OPT_INCREMENTAL, 1); + + FOR_EACH_PTR(ep->bbs, bb) { + struct instruction *insn; + FOR_EACH_PTR(bb->insns, insn) { + if (!insn->bb) + continue; + switch (insn->opcode) { + case OP_ENTRY: + continue; + case OP_BINARY ... OP_BINARY_END: + binop(btor, insn); + break; + case OP_BINCMP ... OP_BINCMP_END: + icmp(btor, insn); + break; + case OP_UNOP ... OP_UNOP_END: + unop(btor, insn); + break; + case OP_SEL: + ternop(btor, insn); + break; + case OP_CALL: + rc &= check_call(btor, &pre, insn); + break; + case OP_RET: + goto out; + default: + fprintf(stderr, "unsupported insn\n"); + goto out; + } + } END_FOR_EACH_PTR(insn); + } END_FOR_EACH_PTR(bb); + fprintf(stderr, "unterminated function\n"); + +out: + boolector_release_all(btor); + boolector_delete(btor); + return rc; +} + +static void check_functions(struct symbol_list *list) +{ + struct symbol *sym; + + FOR_EACH_PTR(list, sym) { + struct entrypoint *ep; + + expand_symbol(sym); + ep = linearize_symbol(sym); + if (!ep || !ep->entry) + continue; + check_function(ep); + } END_FOR_EACH_PTR(sym); +} + +int main(int argc, char **argv) +{ + struct string_list *filelist = NULL; + char *file; + + Wdecl = 0; + + sparse_initialize(argc, argv, &filelist); + + declare_builtins(0, builtins_scheck); + predefine_strong("__SYMBOLIC_CHECKER__"); + + // Expand, linearize and check. + FOR_EACH_PTR(filelist, file) { + check_functions(sparse(file)); + } END_FOR_EACH_PTR(file); + return 0; +} @@ -1280,10 +1280,14 @@ static int simplify_compare_constant(struct instruction *insn, long long value) case OP_SET_EQ: if ((value & bits) != value) return replace_with_value(insn, 0); + if (value == bits && is_power_of_2(bits)) + return replace_binop_value(insn, OP_SET_NE, 0); break; case OP_SET_NE: if ((value & bits) != value) return replace_with_value(insn, 1); + if (value == bits && is_power_of_2(bits)) + return replace_binop_value(insn, OP_SET_EQ, 0); break; case OP_SET_LE: case OP_SET_LT: value = sign_extend(value, def->size); @@ -1414,6 +1418,20 @@ static int simplify_compare_constant(struct instruction *insn, long long value) break; } break; + case OP_TRUNC: + osize = def->orig_type->bit_size; + switch (insn->opcode) { + case OP_SET_EQ: case OP_SET_NE: + if (one_use(def->target)) { + insn->itype = def->orig_type; + def->type = def->orig_type; + def->size = osize; + def->src2 = value_pseudo(bits); + return replace_opcode(def, OP_AND); + } + break; + } + break; case OP_ZEXT: osize = def->orig_type->bit_size; bits = bits_mask(osize); @@ -2321,6 +2339,21 @@ static int simplify_cast(struct instruction *insn) return replace_pseudo(insn, &insn->src1, def->src1); } break; + case OP_NOT: + switch (insn->opcode) { + case OP_TRUNC: + if (one_use(src)) { + // TRUNC(NOT(x)) --> NOT(TRUNC(x)) + insn->opcode = OP_NOT; + def->orig_type = def->type; + def->opcode = OP_TRUNC; + def->type = insn->type; + def->size = insn->size; + return REPEAT_CSE; + } + break; + } + break; case OP_OR: switch (insn->opcode) { case OP_TRUNC: @@ -2571,23 +2604,12 @@ static int simplify_branch(struct instruction *insn) pseudo_t cond = insn->cond; /* Constant conditional */ - if (constant(cond)) { - insert_branch(insn->bb, insn, cond->value ? insn->bb_true : insn->bb_false); - return REPEAT_CSE; - } + if (constant(cond)) + return convert_to_jump(insn, cond->value ? insn->bb_true : insn->bb_false); /* Same target? */ - if (insn->bb_true == insn->bb_false) { - struct basic_block *bb = insn->bb; - struct basic_block *target = insn->bb_false; - remove_bb_from_list(&target->parents, bb, 1); - remove_bb_from_list(&bb->children, target, 1); - insn->bb_false = NULL; - kill_use(&insn->cond); - insn->cond = NULL; - insn->opcode = OP_BR; - return REPEAT_CSE|REPEAT_CFG_CLEANUP; - } + if (insn->bb_true == insn->bb_false) + return convert_to_jump(insn, insn->bb_true); /* Conditional on a SETNE $0 or SETEQ $0 */ if (cond->type == PSEUDO_REG) { @@ -2603,14 +2625,10 @@ static int simplify_branch(struct instruction *insn) if (constant(def->src2) && constant(def->src3)) { long long val1 = def->src2->value; long long val2 = def->src3->value; - if (!val1 && !val2) { - insert_branch(insn->bb, insn, insn->bb_false); - return REPEAT_CSE; - } - if (val1 && val2) { - insert_branch(insn->bb, insn, insn->bb_true); - return REPEAT_CSE; - } + if (!val1 && !val2) + return convert_to_jump(insn, insn->bb_false); + if (val1 && val2) + return convert_to_jump(insn, insn->bb_true); if (val2) { struct basic_block *tmp = insn->bb_true; insn->bb_true = insn->bb_false; @@ -2646,8 +2664,7 @@ static int simplify_switch(struct instruction *insn) return 0; found: - insert_branch(insn->bb, insn, jmp->target); - return REPEAT_CSE; + return convert_to_jump(insn, jmp->target); } static struct basic_block *is_label(pseudo_t pseudo) @@ -2684,7 +2701,7 @@ static int simplify_cgoto(struct instruction *insn) continue; remove_bb_from_list(&jmp->target->parents, bb, 1); remove_bb_from_list(&bb->children, jmp->target, 1); - MARK_CURRENT_DELETED(jmp); + DELETE_CURRENT_PTR(jmp); } END_FOR_EACH_PTR(jmp); kill_use(&insn->src); insn->opcode = OP_BR; @@ -7,7 +7,6 @@ #include <assert.h> #include "ssa.h" #include "lib.h" -#include "sset.h" #include "dominate.h" #include "flowgraph.h" #include "linearize.h" @@ -33,6 +32,9 @@ static inline bool is_promotable(struct symbol *type) case SYM_STRUCT: // we allow a single scalar field // but a run of bitfields count for 1 + // (and packed bifields are excluded). + if (type->packed) + return 0; nbr = 0; bf_seen = 0; FOR_EACH_PTR(type->symbol_list, member) { @@ -72,21 +74,6 @@ static inline bool is_promotable(struct symbol *type) return 0; } -static bool insn_before(struct instruction *a, struct instruction *b) -{ - struct basic_block *bb = a->bb; - struct instruction *insn; - - assert(b->bb == bb); - FOR_EACH_PTR(bb->insns, insn) { - if (insn == a) - return true; - if (insn == b) - return false; - } END_FOR_EACH_PTR(insn); - assert(0); -} - static void kill_store(struct instruction *insn) { remove_use(&insn->src); @@ -94,10 +81,22 @@ static void kill_store(struct instruction *insn) insn->bb = NULL; } +static bool same_memop(struct instruction *a, struct instruction *b) +{ + if (a->size != b->size || a->offset != b->offset) + return false; + if (is_integral_type(a->type) && is_float_type(b->type)) + return false; + if (is_float_type(a->type) && is_integral_type(b->type)) + return false; + return true; +} + static void rewrite_local_var(struct basic_block *bb, pseudo_t addr, int nbr_stores, int nbr_uses) { + struct instruction *store = NULL; struct instruction *insn; - pseudo_t val = NULL; + bool remove = false; if (!bb) return; @@ -108,71 +107,32 @@ static void rewrite_local_var(struct basic_block *bb, pseudo_t addr, int nbr_sto continue; switch (insn->opcode) { case OP_LOAD: - if (!val) - val = undef_pseudo(); - replace_with_pseudo(insn, val); + if (!store) + replace_with_pseudo(insn, undef_pseudo()); + else if (same_memop(store, insn)) + replace_with_pseudo(insn, store->target); + else + remove = false; break; case OP_STORE: - val = insn->target; - // can't use kill_instruction() unless - // we add a fake user to val - kill_store(insn); + store = insn; + remove = true; break; } } END_FOR_EACH_PTR(insn); + if (remove) + kill_store(store); } -static bool rewrite_single_store(struct instruction *store) -{ - pseudo_t addr = store->src; - struct pseudo_user *pu; - - FOR_EACH_PTR(addr->users, pu) { - struct instruction *insn = pu->insn; - - if (insn->opcode != OP_LOAD) - continue; - - // Let's try to replace the value of the load - // by the value from the store. This is only valid - // if the store dominate the load. - - if (insn->bb == store->bb) { - // the load and the store are in the same BB - // we can convert if the load is after the store. - if (!insn_before(store, insn)) - continue; - } else if (!domtree_dominates(store->bb, insn->bb)) { - // we can't convert this load - continue; - } - - // OK, we can rewrite this load - - // undefs ? - - replace_with_pseudo(insn, store->target); - } END_FOR_EACH_PTR(pu); - - // is there some unconverted loads? - if (pseudo_user_list_size(addr->users) > 1) - return false; - - kill_store(store); - return true; -} - -static struct sset *processed; - // we would like to know: // is there one or more stores? // are all loads & stores local/done in a single block? static void ssa_convert_one_var(struct entrypoint *ep, struct symbol *var) { + unsigned long generation = ++bb_generation; struct basic_block_list *alpha = NULL; struct basic_block_list *idf = NULL; struct basic_block *samebb = NULL; - struct instruction *store = NULL; struct basic_block *bb; struct pseudo_user *pu; unsigned long mod = var->ctype.modifiers; @@ -199,7 +159,6 @@ static void ssa_convert_one_var(struct entrypoint *ep, struct symbol *var) return; // 1) insert in the worklist all BBs that may modify var - sset_reset(processed); FOR_EACH_PTR(addr->users, pu) { struct instruction *insn = pu->insn; struct basic_block *bb = insn->bb; @@ -207,9 +166,10 @@ static void ssa_convert_one_var(struct entrypoint *ep, struct symbol *var) switch (insn->opcode) { case OP_STORE: nbr_stores++; - store = insn; - if (!sset_testset(processed, bb->nr)) + if (bb->generation != generation) { + bb->generation = generation; add_bb(&alpha, bb); + } /* fall through */ case OP_LOAD: if (local) { @@ -229,11 +189,6 @@ static void ssa_convert_one_var(struct entrypoint *ep, struct symbol *var) } } END_FOR_EACH_PTR(pu); - if (nbr_stores == 1) { - if (rewrite_single_store(store)) - return; - } - // if all uses are local to a single block // they can easily be rewritten and doesn't need phi-nodes // FIXME: could be done for extended BB too @@ -255,21 +210,40 @@ external_visibility: kill_dead_stores(ep, addr, !mod); } -static pseudo_t lookup_var(struct basic_block *bb, struct symbol *var) +static struct instruction *lookup_var(struct basic_block *bb, struct symbol *var) { do { - pseudo_t val = phi_map_lookup(bb->phi_map, var); - if (val) - return val; + struct instruction *insn = phi_map_lookup(bb->phi_map, var); + if (insn) + return insn; } while ((bb = bb->idom)); - return undef_pseudo(); + return NULL; } static struct instruction_list *phis_all; static struct instruction_list *phis_used; +static struct instruction_list *stores; + +static bool matching_load(struct instruction *def, struct instruction *insn) +{ + if (insn->size != def->size) + return false; + switch (def->opcode) { + case OP_STORE: + case OP_LOAD: + if (insn->offset != def->offset) + return false; + case OP_PHI: + break; + default: + return false; + } + return true; +} static void ssa_rename_insn(struct basic_block *bb, struct instruction *insn) { + struct instruction *def; struct symbol *var; pseudo_t addr; pseudo_t val; @@ -282,8 +256,8 @@ static void ssa_rename_insn(struct basic_block *bb, struct instruction *insn) var = addr->sym; if (!var || !var->torename) break; - phi_map_update(&bb->phi_map, var, insn->target); - kill_store(insn); + phi_map_update(&bb->phi_map, var, insn); + add_instruction(&stores, insn); break; case OP_LOAD: addr = insn->src; @@ -292,14 +266,22 @@ static void ssa_rename_insn(struct basic_block *bb, struct instruction *insn) var = addr->sym; if (!var || !var->torename) break; - val = lookup_var(bb, var); + def = lookup_var(bb, var); + if (!def) { + val = undef_pseudo(); + } else if (!matching_load(def, insn)) { + var->torename = false; + break; + } else { + val = def->target; + } replace_with_pseudo(insn, val); break; case OP_PHI: var = insn->type; if (!var || !var->torename) break; - phi_map_update(&bb->phi_map, var, insn->target); + phi_map_update(&bb->phi_map, var, insn); add_instruction(&phis_all, insn); break; } @@ -345,11 +327,12 @@ static void ssa_rename_phi(struct instruction *insn) if (!var->torename) return; FOR_EACH_PTR(insn->bb->parents, par) { - struct instruction *term = delete_last_instruction(&par->insns); - pseudo_t val = lookup_var(par, var); - pseudo_t phi = alloc_phi(par, val, var); + struct instruction *def = lookup_var(par, var); + pseudo_t val = def ? def->target : undef_pseudo(); + struct instruction *phisrc = alloc_phisrc(val, var); + pseudo_t phi = phisrc->target; phi->ident = var->ident; - add_instruction(&par->insns, term); + insert_last_instruction(par, phisrc); link_phi(insn, phi); mark_phi_used(val); } END_FOR_EACH_PTR(par); @@ -374,6 +357,18 @@ static void ssa_rename_phis(struct entrypoint *ep) } END_FOR_EACH_PTR(phi); } +static void remove_dead_stores(struct instruction_list *stores) +{ + struct instruction *store; + + FOR_EACH_PTR(stores, store) { + struct symbol *var = store->addr->sym; + + if (var->torename) + kill_store(store); + } END_FOR_EACH_PTR(store); +} + void ssa_convert(struct entrypoint *ep) { struct basic_block *bb; @@ -390,9 +385,8 @@ void ssa_convert(struct entrypoint *ep) bb->phi_map = NULL; } END_FOR_EACH_PTR(bb); - processed = sset_init(first, last); - // try to promote memory accesses to pseudos + stores = NULL; FOR_EACH_PTR(ep->accesses, pseudo) { ssa_convert_one_var(ep, pseudo->sym); } END_FOR_EACH_PTR(pseudo); @@ -401,4 +395,7 @@ void ssa_convert(struct entrypoint *ep) phis_all = phis_used = NULL; ssa_rename_insns(ep); ssa_rename_phis(ep); + + // remove now dead stores + remove_dead_stores(stores); } diff --git a/sset.c b/sset.c deleted file mode 100644 index e9681e00..00000000 --- a/sset.c +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// sset.c - an all O(1) implementation of sparse sets as presented in: -// "An Efficient Representation for Sparse Sets" -// by Preston Briggs and Linda Torczon -// -// Copyright (C) 2017 - Luc Van Oostenryck - -#include "sset.h" -#include "lib.h" -#include <stdlib.h> - - -struct sset *sset_init(unsigned int first, unsigned int last) -{ - unsigned int size = last - first + 1; - struct sset *s = malloc(sizeof(*s) + size * 2 * sizeof(s->sets[0])); - - s->size = size; - s->off = first; - s->nbr = 0; - return s; -} - -void sset_free(struct sset *s) -{ - free(s); -} diff --git a/sset.h b/sset.h deleted file mode 100644 index 69cee18a..00000000 --- a/sset.h +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: MIT - -#ifndef SSET_H -#define SSET_H - -/* - * sset.h - an all O(1) implementation of sparse sets as presented in: - * "An Efficient Representation for Sparse Sets" - * by Preston Briggs and Linda Torczon - * - * Copyright (C) 2017 - Luc Van Oostenryck - */ - -#include <stdbool.h> - -struct sset { - unsigned int nbr; - unsigned int off; - unsigned int size; - unsigned int sets[0]; -}; - -extern struct sset *sset_init(unsigned int size, unsigned int off); -extern void sset_free(struct sset *s); - - -static inline void sset_reset(struct sset *s) -{ - s->nbr = 0; -} - -static inline void sset_add(struct sset *s, unsigned int idx) -{ - unsigned int __idx = idx - s->off; - unsigned int n = s->nbr++; - s->sets[__idx] = n; - s->sets[s->size + n] = __idx; -} - -static inline bool sset_test(struct sset *s, unsigned int idx) -{ - unsigned int __idx = idx - s->off; - unsigned int n = s->sets[__idx]; - - return (n < s->nbr) && (s->sets[s->size + n] == __idx); -} - -static inline bool sset_testset(struct sset *s, unsigned int idx) -{ - if (sset_test(s, idx)) - return true; - sset_add(s, idx); - return false; -} - -#endif diff --git a/validation/eval/not-cast-bool.c b/validation/eval/not-cast-bool.c new file mode 100644 index 00000000..acd8bbf2 --- /dev/null +++ b/validation/eval/not-cast-bool.c @@ -0,0 +1,14 @@ +static _Bool foo(void) +{ + unsigned char c = 1; + _Bool b = ~c; + return b; +} + +/* + * check-name: not-cast-bool + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-returns: 1 + */ diff --git a/validation/eval/not-cast-float.c b/validation/eval/not-cast-float.c new file mode 100644 index 00000000..d474d69b --- /dev/null +++ b/validation/eval/not-cast-float.c @@ -0,0 +1,14 @@ +static int foo(void) +{ + int i = 123; + float x = ~i; + return (x < 0); +} + +/* + * check-name: eval-bool-zext-neg + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-returns: 1 + */ diff --git a/validation/knr-attr-crash.c b/validation/knr-attr-crash.c new file mode 100644 index 00000000..176ff503 --- /dev/null +++ b/validation/knr-attr-crash.c @@ -0,0 +1,12 @@ +typedef int word; + +void foo(word x); + +void foo(x) + word x; +{ } + +/* + * check-name: knr-attr-crash + * check-command: sparse -Wno-old-style-definition $file + */ diff --git a/validation/mem2reg/not-same-memop0.c b/validation/mem2reg/not-same-memop0.c new file mode 100644 index 00000000..4de98195 --- /dev/null +++ b/validation/mem2reg/not-same-memop0.c @@ -0,0 +1,48 @@ +struct s { + int:16; + short f:6; +}; + +static short local(struct s s) +{ + return s.f; +} + +static void foo(struct s s) +{ + while (s.f) ; +} + +/* + * check-name: not-same-memop0 + * check-command: test-linearize -Wno-decl -fdump-ir=mem2reg $file + * + * check-output-start +local: +.L0: + <entry-point> + store.32 %arg1 -> 0[s] + load.16 %r1 <- 2[s] + trunc.6 %r2 <- (16) %r1 + sext.16 %r3 <- (6) %r2 + ret.16 %r3 + + +foo: +.L2: + <entry-point> + store.32 %arg1 -> 0[s] + br .L6 + +.L6: + load.16 %r5 <- 2[s] + trunc.6 %r6 <- (16) %r5 + setne.1 %r7 <- %r6, $0 + cbr %r7, .L6, .L5 + +.L5: + ret + + + * check-output-end + */ diff --git a/validation/mem2reg/packed-bitfield.c b/validation/mem2reg/packed-bitfield.c new file mode 100644 index 00000000..f3ee259a --- /dev/null +++ b/validation/mem2reg/packed-bitfield.c @@ -0,0 +1,35 @@ +struct s { + int:16; + int f:16; +} __attribute__((__packed__)); + +static void foo(struct s s) +{ + while (s.f) + ; +} + +/* + * check-name: packed-bitfield + * check-command: test-linearize -fmem2reg $file + * + * check-output-contains: store.32 + * check-output-contains: load.16 + * + * check-output-start +foo: +.L0: + <entry-point> + store.32 %arg1 -> 0[s] + br .L4 + +.L4: + load.16 %r1 <- 2[s] + cbr %r1, .L4, .L3 + +.L3: + ret + + + * check-output-end + */ diff --git a/validation/memops/kill-dead-store-parent0.c b/validation/memops/kill-dead-store-parent0.c new file mode 100644 index 00000000..c1b2466c --- /dev/null +++ b/validation/memops/kill-dead-store-parent0.c @@ -0,0 +1,14 @@ +void foo(int *ptr, int p) +{ + if (p) + *ptr = 1; + *ptr = 0; +} + +/* + * check-name: kill-dead-store-parent0 + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-pattern(1): store + */ diff --git a/validation/memops/kill-dead-store-parent2.c b/validation/memops/kill-dead-store-parent2.c new file mode 100644 index 00000000..4f7b9dd9 --- /dev/null +++ b/validation/memops/kill-dead-store-parent2.c @@ -0,0 +1,25 @@ +int ladder02(int *ptr, int p, int x) +{ + *ptr = x++; + if (p) + goto l11; + else + goto l12; +l11: + *ptr = x++; + goto l20; +l12: + *ptr = x++; + goto l20; +l20: + *ptr = x++; + return *ptr; +} + +/* + * check-name: kill-dead-store-parent2 + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-pattern(1): store + */ diff --git a/validation/memops/kill-redundant-store0.c b/validation/memops/kill-redundant-store0.c new file mode 100644 index 00000000..8819938f --- /dev/null +++ b/validation/memops/kill-redundant-store0.c @@ -0,0 +1,13 @@ +void foo(int *ptr) +{ + int i = *ptr; + *ptr = i; +} + +/* + * check-name: kill-redundant-store0 + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-excludes: store + */ diff --git a/validation/optim/and-extendx.c b/validation/optim/and-extendx.c deleted file mode 100644 index 5c181c93..00000000 --- a/validation/optim/and-extendx.c +++ /dev/null @@ -1,24 +0,0 @@ -typedef unsigned short u16; -typedef short s16; -typedef unsigned int u32; -typedef int s32; -typedef unsigned long long u64; -typedef long long s64; - -u64 ufoo(int x) -{ - return x & 0x7fff; -} - -u64 sfoo(int x) -{ - return x & 0x7fff; -} - -/* - * check-name: and-extend - * check-command: test-linearize -Wno-decl $file - * - * check-output-ignore - * check-output-contains: and\\.64.*0x7fff - */ diff --git a/validation/optim/bad-phisrc1.c b/validation/optim/bad-phisrc1.c new file mode 100644 index 00000000..aa12dd0a --- /dev/null +++ b/validation/optim/bad-phisrc1.c @@ -0,0 +1,15 @@ +void foo(int a, int b) +{ + if (b) + while ((a += 5) > a) + ; +} + +/* + * check-name: bad-phisrc1 + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-excludes: phi\\. + * check-output-excludes: phisource\\. + */ diff --git a/validation/optim/bad-phisrc1a.c b/validation/optim/bad-phisrc1a.c new file mode 100644 index 00000000..b7519ee7 --- /dev/null +++ b/validation/optim/bad-phisrc1a.c @@ -0,0 +1,23 @@ +int def(void); + +int fun4(struct xfrm_state *net, int cnt) +{ + int err = 0; + if (err) + goto out; + for (; net;) + err = def(); + if (cnt) +out: + return err; + return 0; +} + +/* + * check-name: bad-phisrc1a + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-contains: select\\. + */ + diff --git a/validation/optim/bad-phisrc2.c b/validation/optim/bad-phisrc2.c new file mode 100644 index 00000000..78eae288 --- /dev/null +++ b/validation/optim/bad-phisrc2.c @@ -0,0 +1,16 @@ +int bad_phisrc2(int p, int a, int r) +{ + if (p) + r = a; + else if (r) + ; + return r; +} + +/* + * check-name: bad-phisrc2 + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-contains: select\\. + */ diff --git a/validation/optim/bad-phisrc3.c b/validation/optim/bad-phisrc3.c new file mode 100644 index 00000000..41537420 --- /dev/null +++ b/validation/optim/bad-phisrc3.c @@ -0,0 +1,20 @@ +void foo(void) +{ + int c = 1; + switch (3) { + case 0: + do { + ; + case 3: ; + } while (c++); + } +} + +/* + * check-name: bad-phisrc3 + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-pattern(2): phisrc\\. + * check-output-pattern(1): phi\\. + */ diff --git a/validation/optim/cmp-and-pow2.c b/validation/optim/cmp-and-pow2.c new file mode 100644 index 00000000..01ba2537 --- /dev/null +++ b/validation/optim/cmp-and-pow2.c @@ -0,0 +1,12 @@ +#define M 32 + +_Bool eq(int a) { return ((a & M) != M) == ((a & M) == 0); } +_Bool ne(int a) { return ((a & M) == M) == ((a & M) != 0); } + +/* + * check-name: cmp-and-pow2 + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-returns: 1 + */ diff --git a/validation/optim/multi-phisrc.c b/validation/optim/multi-phisrc.c new file mode 100644 index 00000000..ff31c083 --- /dev/null +++ b/validation/optim/multi-phisrc.c @@ -0,0 +1,23 @@ +void fun(void); + +void foo(int p, int a) +{ + if (p == p) { + switch (p) { + case 0: + break; + case 1: + a = 0; + } + } + if (a) + fun(); +} + +/* + * check-name: multi-phisrc + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-excludes: phi + */ diff --git a/validation/optim/phi-count00.c b/validation/optim/phi-count00.c new file mode 100644 index 00000000..38db0eda --- /dev/null +++ b/validation/optim/phi-count00.c @@ -0,0 +1,27 @@ +inline int inl(int d, int e, int f) +{ + switch (d) { + case 0: + return e; + case 1: + return f; + default: + return 0; + } +} + +void foo(int a, int b, int c) +{ + while (1) { + if (inl(a, b, c)) + break; + } +} + +/* + * check-name: phi-count00 + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-pattern(0,2): phisrc + */ diff --git a/validation/optim/trunc-not0.c b/validation/optim/trunc-not0.c new file mode 100644 index 00000000..882b446d --- /dev/null +++ b/validation/optim/trunc-not0.c @@ -0,0 +1,20 @@ +typedef __INT32_TYPE__ int32; +typedef __INT64_TYPE__ int64; + +static _Bool sfoo(int64 a) { return ((int32) ~a) == (~ (int32)a); } +static _Bool sbar(int64 a) { return (~(int32) ~a) == (int32)a; } + + +typedef __UINT32_TYPE__ uint32; +typedef __UINT64_TYPE__ uint64; + +static _Bool ufoo(uint64 a) { return ((uint32) ~a) == (~ (uint32)a); } +static _Bool ubar(uint64 a) { return (~(uint32) ~a) == (uint32)a; } + +/* + * check-name: trunc-not0 + * check-command: test-linearize -Wno-decl $file + * + * check-output-ignore + * check-output-returns: 1 + */ diff --git a/validation/scheck/ko.c b/validation/scheck/ko.c new file mode 100644 index 00000000..dbd861ea --- /dev/null +++ b/validation/scheck/ko.c @@ -0,0 +1,10 @@ +static void ko(int x) +{ + __assert((~x) == (~0 + x)); +} + +/* + * check-name: scheck-ko + * check-command: scheck $file + * check-known-to-fail + */ diff --git a/validation/scheck/ok.c b/validation/scheck/ok.c new file mode 100644 index 00000000..1e5314f2 --- /dev/null +++ b/validation/scheck/ok.c @@ -0,0 +1,22 @@ +static void ok(int x) +{ + __assert((~x) == (~0 - x)); // true but not simplified yet + __assert_eq(~x, ~0 - x); + __assert_const(x & 0, 0); +} + +static void always(int x) +{ + __assert((x - x) == 0); // true and simplified +} + +static void assumed(int x, int a, int b) +{ + __assume((a & ~b) == 0); + __assert_eq((x ^ a) | b, x | b); +} + +/* + * check-name: scheck-ok + * check-command: scheck $file + */ diff --git a/validation/test-suite b/validation/test-suite index 1b05c75e..305edd1f 100755 --- a/validation/test-suite +++ b/validation/test-suite @@ -13,6 +13,9 @@ prog_name=`basename $0` if [ ! -x "$default_path/sparse-llvm" ]; then disabled_cmds="sparsec sparsei sparse-llvm sparse-llvm-dis" fi +if [ ! -x "$default_path/scheck" ]; then + disabled_cmds="$disabled_cmds scheck" +fi # flags: # - some tests gave an unexpected result @@ -512,7 +515,9 @@ echo "options:" echo " -a append the created test to the input file" echo " -f write a test known to fail" echo " -l write a test for linearized code" +echo " -r write a test for linearized code returning 1" echo " -p write a test for pre-processing" +echo " -s write a test for symbolic checking" echo echo "argument(s):" echo " file file containing the test case(s)" @@ -528,6 +533,7 @@ do_format() append=0 linear=0 fail=0 + ret='' while [ $# -gt 0 ] ; do case "$1" in @@ -538,8 +544,13 @@ do_format() -l) def_cmd='test-linearize -Wno-decl $file' linear=1 ;; + -r) + def_cmd='test-linearize -Wno-decl $file' + ret=1 ;; -p) def_cmd='sparse -E $file' ;; + -s) + def_cmd='scheck $file' ;; help|-*) do_format_help @@ -582,6 +593,12 @@ _EOF if [ $fail != 0 ]; then echo " * check-known-to-fail" fi + if [ "$ret" != '' ]; then + echo ' *' + echo ' * check-output-ignore' + echo " * check-output-returns: $ret" + rm -f "$file.output.got" + fi if [ $linear != 0 ]; then echo ' *' echo ' * check-output-ignore' |