aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorLuc Van Oostenryck <luc.vanoostenryck@gmail.com>2020-11-12 22:01:10 +0100
committerLuc Van Oostenryck <luc.vanoostenryck@gmail.com>2020-11-17 18:03:24 +0100
commit28677f8ac6efd939b2bd306a2b1af0f95ef44136 (patch)
treedd406eaa21360a4cd21aa206ccf5bb9f6cf3b298
parent6b5e7cf5ac390f12472f914737c8a947eed0afe0 (diff)
downloadsparse-28677f8ac6efd939b2bd306a2b1af0f95ef44136.tar.gz
cfg: early CFG simplification
The linearization step sometimes creates a lot of intermediate basic blocks, often containing just a branch. Their presence often make things more complicated than needed (more work to do in later phases, visual clutter when inspection the IR 'by hand') and they can sometimes, indirectly hinder some optimizations. Happily, most of them can trivially be optimized away. So, add a CFG simplification phase running very early and doing: *) jump threading (eliminate jump to jump) *) merge single-child/sinle-parents basic blocks. These changes slightly decrease the number of 'context imbalance' warnings (32 less on a total of 995 warnings) and the size of the generated IR (only ~0.4% but this is very significant relatively to most other simplifications). They also seem to improve the kernel tests' running time: before after real 4m19.261s real 4m17.548s user 72m03.634s user 71m34.642s sys 29m05.573s sys 29m01.856s but it's probably just noise. Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
-rw-r--r--flow.c78
-rw-r--r--flow.h1
-rw-r--r--optimize.c5
-rw-r--r--validation/call-inlined.c4
-rw-r--r--validation/expand/builtin_constant_inline0.c1
-rw-r--r--validation/inline_base0.c3
-rw-r--r--validation/linear/builtin_unreachable0.c4
-rw-r--r--validation/linear/builtin_unreachable1.c4
-rw-r--r--validation/linear/call-inline.c2
-rw-r--r--validation/mem2reg/cond-expr.c4
-rw-r--r--validation/mem2reg/cond-expr5.c5
-rw-r--r--validation/optim/cse-size.c2
12 files changed, 102 insertions, 11 deletions
diff --git a/flow.c b/flow.c
index f052cdb1..2e20ab75 100644
--- a/flow.c
+++ b/flow.c
@@ -121,6 +121,31 @@ static int bb_depends_on_phi(struct basic_block *target, struct basic_block *src
return 0;
}
+///
+// does the BB contains ignorable instructions but a final branch?
+// :note: something could be done for phi-sources but ... we'll see.
+static bool bb_is_forwarder(struct basic_block *bb)
+{
+ struct instruction *insn;
+
+ FOR_EACH_PTR(bb->insns, insn) {
+ if (!insn->bb)
+ continue;
+ switch (insn->opcode) {
+ case OP_NOP:
+ case OP_INLINED_CALL:
+ continue;
+ case OP_CBR:
+ case OP_BR:
+ return true;
+ default:
+ goto out;
+ }
+ } END_FOR_EACH_PTR(insn);
+out:
+ return false;
+}
+
/*
* When we reach here, we have:
* - a basic block that ends in a conditional branch and
@@ -742,6 +767,22 @@ void vrfy_flow(struct entrypoint *ep)
assert(!entry);
}
+static int retarget_parents(struct basic_block *bb, struct basic_block *target)
+{
+ struct basic_block *parent;
+
+ /*
+ * We can't do FOR_EACH_PTR() here, because the parent list
+ * may change when we rewrite the parent.
+ */
+ while ((parent = first_basic_block(bb->parents))) {
+ if (!rewrite_parent_branch(parent, bb, target))
+ return 0;
+ }
+ kill_bb(bb);
+ return REPEAT_CFG_CLEANUP;
+}
+
static void remove_merging_phisrc(struct basic_block *top, struct instruction *insn)
{
struct instruction *user = get_phinode(insn);
@@ -821,6 +862,43 @@ static int merge_bb(struct basic_block *top, struct basic_block *bot)
return REPEAT_CFG_CLEANUP;
}
+///
+// early simplification of the CFG
+// Three things are done here:
+// # inactive BB are removed
+// # branches to a 'forwarder' BB are redirected to the forwardee.
+// # merge single-child/single-parent BBs.
+int simplify_cfg_early(struct entrypoint *ep)
+{
+ struct basic_block *bb;
+ int changed = 0;
+
+ FOR_EACH_PTR_REVERSE(ep->bbs, bb) {
+ struct instruction *insn;
+ struct basic_block *tgt;
+
+ if (!bb->ep) {
+ DELETE_CURRENT_PTR(bb);
+ changed = REPEAT_CFG_CLEANUP;
+ continue;
+ }
+
+ insn = last_instruction(bb->insns);
+ if (!insn)
+ continue;
+ switch (insn->opcode) {
+ case OP_BR:
+ tgt = insn->bb_true;
+ if (bb_is_forwarder(bb))
+ changed |= retarget_parents(bb, tgt);
+ else if (bb_list_size(tgt->parents) == 1)
+ changed |= merge_bb(bb, tgt);
+ break;
+ }
+ } END_FOR_EACH_PTR_REVERSE(bb);
+ return changed;
+}
+
void pack_basic_blocks(struct entrypoint *ep)
{
struct basic_block *bb;
diff --git a/flow.h b/flow.h
index 099767d4..19a743c8 100644
--- a/flow.h
+++ b/flow.h
@@ -18,6 +18,7 @@ extern void kill_dead_stores(struct entrypoint *ep, pseudo_t addr, int local);
extern void simplify_symbol_usage(struct entrypoint *ep);
extern void simplify_memops(struct entrypoint *ep);
extern void pack_basic_blocks(struct entrypoint *ep);
+extern int simplify_cfg_early(struct entrypoint *ep);
extern void convert_instruction_target(struct instruction *insn, pseudo_t src);
extern void remove_dead_insns(struct entrypoint *);
diff --git a/optimize.c b/optimize.c
index 8ab105bc..338714c7 100644
--- a/optimize.c
+++ b/optimize.c
@@ -57,6 +57,11 @@ void optimize(struct entrypoint *ep)
kill_unreachable_bbs(ep);
ir_validate(ep);
+ cfg_postorder(ep);
+ if (simplify_cfg_early(ep))
+ kill_unreachable_bbs(ep);
+ ir_validate(ep);
+
domtree_build(ep);
/*
diff --git a/validation/call-inlined.c b/validation/call-inlined.c
index 3612c5c4..a6cb4b5b 100644
--- a/validation/call-inlined.c
+++ b/validation/call-inlined.c
@@ -28,12 +28,14 @@ foo:
<entry-point>
add.32 %r3 <- %arg1, %arg2
add.32 %r5 <- %r3, $1
+ # call %r6 <- add, %r3, $1
ret.32 %r5
bar:
.L3:
<entry-point>
+ # call %r13 <- add, %r10, $1
ret
@@ -41,6 +43,7 @@ bas:
.L6:
<entry-point>
add.64 %r16 <- "abc", $1
+ # call %r17 <- lstrip, %r14
ret.64 %r16
@@ -48,6 +51,7 @@ qus:
.L9:
<entry-point>
add.64 %r21 <- messg, $1
+ # call %r22 <- lstrip, %r19
ret.64 %r21
diff --git a/validation/expand/builtin_constant_inline0.c b/validation/expand/builtin_constant_inline0.c
index a0057f20..d72a211f 100644
--- a/validation/expand/builtin_constant_inline0.c
+++ b/validation/expand/builtin_constant_inline0.c
@@ -16,6 +16,7 @@ int foo(void)
foo:
.L0:
<entry-point>
+ # call %r1 <- is_const, $42
ret.32 $42
diff --git a/validation/inline_base0.c b/validation/inline_base0.c
index 517ee972..698c760f 100644
--- a/validation/inline_base0.c
+++ b/validation/inline_base0.c
@@ -27,6 +27,7 @@ foo0:
.L0:
<entry-point>
add.32 %r5 <- %arg1, %arg2
+ # call %r6 <- add, %r1, %r2
ret.32 %r5
@@ -34,12 +35,14 @@ foo1:
.L3:
<entry-point>
add.32 %r10 <- %arg1, $1
+ # call %r11 <- add, %r8, $1
ret.32 %r10
foo2:
.L6:
<entry-point>
+ # call %r13 <- add, $1, $2
ret.32 $3
diff --git a/validation/linear/builtin_unreachable0.c b/validation/linear/builtin_unreachable0.c
index 911ed7f9..4fc56473 100644
--- a/validation/linear/builtin_unreachable0.c
+++ b/validation/linear/builtin_unreachable0.c
@@ -14,12 +14,12 @@ foo:
.L0:
<entry-point>
seteq.32 %r2 <- %arg1, $3
- cbr %r2, .L1, .L3
+ cbr %r2, .L1, .L2
.L1:
unreachable
-.L3:
+.L2:
ret.32 %arg1
diff --git a/validation/linear/builtin_unreachable1.c b/validation/linear/builtin_unreachable1.c
index 70f6674c..2fc1d728 100644
--- a/validation/linear/builtin_unreachable1.c
+++ b/validation/linear/builtin_unreachable1.c
@@ -16,9 +16,9 @@ int foo(int c)
foo:
.L0:
<entry-point>
- cbr %arg1, .L3, .L2
+ cbr %arg1, .L1, .L2
-.L3:
+.L1:
ret.32 $1
.L2:
diff --git a/validation/linear/call-inline.c b/validation/linear/call-inline.c
index dfd49b62..1ad785ee 100644
--- a/validation/linear/call-inline.c
+++ b/validation/linear/call-inline.c
@@ -13,6 +13,6 @@ int i3(void) { return (***fun)(); } // C99,C11 6.5.3.2p4
*
* check-output-ignore
* check-output-excludes: load
- * check-output-excludes: call
+ * check-output-excludes: \\tcall
* check-output-pattern(5): ret\\..* \\$42
*/
diff --git a/validation/mem2reg/cond-expr.c b/validation/mem2reg/cond-expr.c
index 8acb00ac..2474d65d 100644
--- a/validation/mem2reg/cond-expr.c
+++ b/validation/mem2reg/cond-expr.c
@@ -9,6 +9,6 @@ int foo(int a, int b, int c)
* check-name: cond-expr
* check-command: test-linearize -Wno-decl -fdump-ir=mem2reg $file
* check-output-ignore
- * check-output-pattern(2): phi\\.
- * check-output-pattern(3): phisrc\\.
+ * check-output-pattern(1): phi\\.
+ * check-output-pattern(2): phisrc\\.
*/
diff --git a/validation/mem2reg/cond-expr5.c b/validation/mem2reg/cond-expr5.c
index a3ce5e3a..beef8f25 100644
--- a/validation/mem2reg/cond-expr5.c
+++ b/validation/mem2reg/cond-expr5.c
@@ -15,7 +15,6 @@ int foo(int p, int q, int a)
* check-output-ignore
* check-output-excludes: load\\.
* check-output-excludes: store\\.
- * check-output-excludes: phi\\..*, .*, .*
- * check-output-pattern(3): phi\\.
- * check-output-pattern(5): phisrc\\.
+ * check-output-pattern(2): phi\\.
+ * check-output-pattern(4): phisrc\\.
*/
diff --git a/validation/optim/cse-size.c b/validation/optim/cse-size.c
index e1a5d492..0c0c2d14 100644
--- a/validation/optim/cse-size.c
+++ b/validation/optim/cse-size.c
@@ -13,6 +13,6 @@ static void foo(void)
* check-command: test-linearize -Wno-decl $file
*
* check-output-ignore
- * check-output-pattern(0,1): phi\\.
+ * check-output-excludes: phi\\.
* check-output-excludes: cbr
*/