From ca7d65cd6f5923e006a1552fe14a1e1724c73a0b Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Wed, 17 Oct 2012 17:12:05 +0000 Subject: * tree-ssa-loop-ivcanon.c (tree_estimate_loop_size): Add edge_to_cancel parameter and use it to estimate code optimized out in the final iteration. (loop_edge_to_cancel): New function. (try_unroll_loop_completely): New IRRED_IVALIDATED parameter; handle unrolling loops with bounds given via max_loop_iteratins; handle unrolling non-inner loops when code size shrinks; tidy dump output; when the last iteration loop still stays as loop in the CFG forcongly redirect the latch to __builtin_unreachable. (canonicalize_loop_induction_variables): Add irred_invlaidated parameter; record niter bound derrived; dump max_loop_iterations bounds; call try_unroll_loop_completely even if no niter bound is given. (canonicalize_induction_variables): Handle irred_invalidated. (tree_unroll_loops_completely): Handle non-innermost loops; handle irred_invalidated. * cfgloop.h (unlop): Declare. * cfgloopmanip.c (unloop): Export. * tree.c (build_common_builtin_nodes): Build BULTIN_UNREACHABLE. * gcc.target/i386/l_fma_float_?.c: Update. * gcc.target/i386/l_fma_double_?.c: Update. * gfortran.dg/do_1.f90: XFAIL * gcc.dg/tree-ssa/cunroll-1.c: New testcase. * gcc.dg/tree-ssa/cunroll-2.c: New testcase. * gcc.dg/tree-ssa/cunroll-3.c: New testcase. * gcc.dg/tree-ssa/cunroll-4.c: New testcase. * gcc.dg/tree-ssa/cunroll-5.c: New testcase. * gcc.dg/tree-ssa/ldist-17.c: Block cunroll to make testcase still valid. git-svn-id: https://gcc.gnu.org/svn/gcc/trunk@192538 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 22 ++ gcc/cfgloop.h | 3 +- gcc/cfgloopmanip.c | 3 +- gcc/testsuite/ChangeLog | 13 ++ gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c | 2 +- gcc/testsuite/gcc.target/i386/l_fma_double_1.c | 16 +- gcc/testsuite/gcc.target/i386/l_fma_double_2.c | 8 +- gcc/testsuite/gcc.target/i386/l_fma_double_3.c | 16 +- gcc/testsuite/gcc.target/i386/l_fma_double_4.c | 8 +- gcc/testsuite/gcc.target/i386/l_fma_double_5.c | 8 +- gcc/testsuite/gcc.target/i386/l_fma_double_6.c | 8 +- gcc/testsuite/gcc.target/i386/l_fma_float_1.c | 16 +- gcc/testsuite/gcc.target/i386/l_fma_float_2.c | 8 +- gcc/testsuite/gcc.target/i386/l_fma_float_3.c | 16 +- gcc/testsuite/gcc.target/i386/l_fma_float_4.c | 8 +- gcc/testsuite/gcc.target/i386/l_fma_float_5.c | 8 +- gcc/testsuite/gcc.target/i386/l_fma_float_6.c | 8 +- gcc/testsuite/gfortran.dg/do_1.f90 | 3 +- gcc/tree-ssa-loop-ivcanon.c | 283 +++++++++++++++++++++---- gcc/tree.c | 9 + 20 files changed, 360 insertions(+), 106 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 992322e1692..63635b8f019 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2012-10-17 Jan Hubicka + + * tree-ssa-loop-ivcanon.c (tree_estimate_loop_size): Add edge_to_cancel + parameter and use it to estimate code optimized out in the final iteration. + (loop_edge_to_cancel): New function. + (try_unroll_loop_completely): New IRRED_IVALIDATED parameter; + handle unrolling loops with bounds given via max_loop_iteratins; + handle unrolling non-inner loops when code size shrinks; + tidy dump output; when the last iteration loop still stays + as loop in the CFG forcongly redirect the latch to + __builtin_unreachable. + (canonicalize_loop_induction_variables): Add irred_invlaidated + parameter; record niter bound derrived; dump + max_loop_iterations bounds; call try_unroll_loop_completely + even if no niter bound is given. + (canonicalize_induction_variables): Handle irred_invalidated. + (tree_unroll_loops_completely): Handle non-innermost loops; + handle irred_invalidated. + * cfgloop.h (unlop): Declare. + * cfgloopmanip.c (unloop): Export. + * tree.c (build_common_builtin_nodes): Build BULTIN_UNREACHABLE. + 2012-10-17 Michael Meissner * opth-gen.awk (TARGET_* generation): Always generate TARGET_ diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 95f5d5313da..97660663781 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -320,7 +320,8 @@ extern struct loop *loopify (edge, edge, struct loop * loop_version (struct loop *, void *, basic_block *, unsigned, unsigned, unsigned, bool); extern bool remove_path (edge); -void scale_loop_frequencies (struct loop *, int, int); +extern void unloop (struct loop *, bool *); +extern void scale_loop_frequencies (struct loop *, int, int); /* Induction variable analysis. */ diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 8a44a0b6f93..98f359e086c 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -37,7 +37,6 @@ static int find_path (edge, basic_block **); static void fix_loop_placements (struct loop *, bool *); static bool fix_bb_placement (basic_block); static void fix_bb_placements (basic_block, bool *); -static void unloop (struct loop *, bool *); /* Checks whether basic block BB is dominated by DATA. */ static bool @@ -895,7 +894,7 @@ loopify (edge latch_edge, edge header_edge, If this may cause the information about irreducible regions to become invalid, IRRED_INVALIDATED is set to true. */ -static void +void unloop (struct loop *loop, bool *irred_invalidated) { basic_block *body; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6c6bf05ef70..da34d4d6c8a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,16 @@ +2012-10-16 Jan Hubicka + + * gcc.target/i386/l_fma_float_?.c: Update. + * gcc.target/i386/l_fma_double_?.c: Update. + * gfortran.dg/do_1.f90: XFAIL + * gcc.dg/tree-ssa/cunroll-1.c: New testcase. + * gcc.dg/tree-ssa/cunroll-2.c: New testcase. + * gcc.dg/tree-ssa/cunroll-3.c: New testcase. + * gcc.dg/tree-ssa/cunroll-4.c: New testcase. + * gcc.dg/tree-ssa/cunroll-5.c: New testcase. + * gcc.dg/tree-ssa/ldist-17.c: Block cunroll to make testcase still + valid. + 2012-10-16 Manuel López-Ibáñez PR c/53063 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c index fe40bed5811..5c280b3f0c4 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */ +/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details -fdisable-tree-cunroll -fdisable-tree-cunrolli" } */ typedef int mad_fixed_t; struct mad_pcm diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c index 87225ba3e5d..716acfef65c 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c @@ -16,11 +16,11 @@ /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd213sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmsub213sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213sd" 8 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c index 8b00fe1ef85..01173afb223 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c index 37d062c3a2a..8cda521a870 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c @@ -16,11 +16,11 @@ /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd213sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmsub213sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213sd" 8 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfmadd213sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfmsub213sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 20 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213sd" 20 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c index 7311913e837..9f2331b51e8 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c index a7a337be1d9..9e33975b1e4 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c index fcb596c550d..28d264dd20d 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfmsub132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132sd" 40 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132sd" 40 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c index b85971ddb34..fea0b20619d 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c @@ -16,11 +16,11 @@ /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd213ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfmsub213ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213ss" 8 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c index 9cd02495b1b..dd5f543f58c 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c index 8388cfe03f3..38853353b01 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c @@ -16,11 +16,11 @@ /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd213ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfmsub213ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmadd213ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 8 } } */ -/* { dg-final { scan-assembler-times "vfnmsub213ss" 8 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfmadd213ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfmsub213ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfnmadd213ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 36 } } */ +/* { dg-final { scan-assembler-times "vfnmsub213ss" 36 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c index bb8df69893b..5a7bb217836 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c index 3adf99f57fc..0b0454ed336 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */ diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c index ddf05e200d8..03bf8e84835 100644 --- a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c +++ b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c @@ -12,7 +12,7 @@ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ -/* { dg-final { scan-assembler-times "vfmadd132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfmsub132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmadd132ss" 16 } } */ -/* { dg-final { scan-assembler-times "vfnmsub132ss" 16 } } */ +/* { dg-final { scan-assembler-times "vfmadd132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfmsub132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132ss" 72 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132ss" 72 } } */ diff --git a/gcc/testsuite/gfortran.dg/do_1.f90 b/gcc/testsuite/gfortran.dg/do_1.f90 index 171275af3f2..8ed0f7fb6c2 100644 --- a/gcc/testsuite/gfortran.dg/do_1.f90 +++ b/gcc/testsuite/gfortran.dg/do_1.f90 @@ -1,4 +1,5 @@ -! { dg-do run } +! { dg-do run { xfail *-*-* } } +! XFAIL is tracked in PR 54932 ! Program to check corner cases for DO statements. program do_1 implicit none diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index b790e1f43cc..81bf09e9f8a 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -192,7 +192,7 @@ constant_after_peeling (tree op, gimple stmt, struct loop *loop) Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. */ static void -tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size) +tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size) { basic_block *body = get_loop_body (loop); gimple_stmt_iterator gsi; @@ -208,8 +208,8 @@ tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size) fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num); for (i = 0; i < loop->num_nodes; i++) { - if (exit && body[i] != exit->src - && dominated_by_p (CDI_DOMINATORS, body[i], exit->src)) + if (edge_to_cancel && body[i] != edge_to_cancel->src + && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src)) after_exit = true; else after_exit = false; @@ -231,7 +231,7 @@ tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size) /* Look for reasons why we might optimize this stmt away. */ /* Exit conditional. */ - if (body[i] == exit->src && stmt == last_stmt (exit->src)) + if (exit && body[i] == exit->src && stmt == last_stmt (exit->src)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Exit condition will be eliminated.\n"); @@ -314,36 +314,161 @@ estimated_unrolled_size (struct loop_size *size, return unr_insns; } +/* Loop LOOP is known to not loop. See if there is an edge in the loop + body that can be remove to make the loop to always exit and at + the same time it does not make any code potentially executed + during the last iteration dead. + + After complette unrolling we still may get rid of the conditional + on the exit in the last copy even if we have no idea what it does. + This is quite common case for loops of form + + int a[5]; + for (i=0;ilatch->preds) > 1) + return NULL; + + exits = get_loop_exit_edges (loop); + + FOR_EACH_VEC_ELT (edge, exits, i, edge_to_cancel) + { + /* Find the other edge than the loop exit + leaving the conditoinal. */ + if (EDGE_COUNT (edge_to_cancel->src->succs) != 2) + continue; + if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel) + edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1); + else + edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0); + + /* We should never have conditionals in the loop latch. */ + gcc_assert (edge_to_cancel->dest != loop->header); + + /* Check that it leads to loop latch. */ + if (edge_to_cancel->dest != loop->latch) + continue; + + VEC_free (edge, heap, exits); + + /* Verify that the code in loop latch does nothing that may end program + execution without really reaching the exit. This may include + non-pure/const function calls, EH statements, volatile ASMs etc. */ + for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi)) + if (gimple_has_side_effects (gsi_stmt (gsi))) + return NULL; + return edge_to_cancel; + } + VEC_free (edge, heap, exits); + return NULL; +} + /* Tries to unroll LOOP completely, i.e. NITER times. UL determines which loops we are allowed to unroll. - EXIT is the exit of the loop that should be eliminated. */ + EXIT is the exit of the loop that should be eliminated. + IRRED_INVALIDATED is used to bookkeep if information about + irreducible regions may become invalid as a result + of the transformation. */ static bool try_unroll_loop_completely (struct loop *loop, edge exit, tree niter, - enum unroll_level ul) + enum unroll_level ul, + bool *irred_invalidated) { unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns; gimple cond; struct loop_size size; + bool n_unroll_found = false; + HOST_WIDE_INT maxiter; + basic_block latch; + edge latch_edge; + location_t locus; + int flags; + gimple stmt; + gimple_stmt_iterator gsi; + edge edge_to_cancel = NULL; + int num = loop->num; - if (loop->inner) - return false; + /* See if we proved number of iterations to be low constant. - if (!host_integerp (niter, 1)) + EXIT is an edge that will be removed in all but last iteration of + the loop. + + EDGE_TO_CACNEL is an edge that will be removed from the last iteration + of the unrolled sequence and is expected to make the final loop not + rolling. + + If the number of execution of loop is determined by standard induction + variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving + from the iv test. */ + if (host_integerp (niter, 1)) + { + n_unroll = tree_low_cst (niter, 1); + n_unroll_found = true; + edge_to_cancel = EDGE_SUCC (exit->src, 0); + if (edge_to_cancel == exit) + edge_to_cancel = EDGE_SUCC (exit->src, 1); + } + /* We do not know the number of iterations and thus we can not eliminate + the EXIT edge. */ + else + exit = NULL; + + /* See if we can improve our estimate by using recorded loop bounds. */ + maxiter = max_loop_iterations_int (loop); + if (maxiter >= 0 + && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) + { + n_unroll = maxiter; + n_unroll_found = true; + /* Loop terminates before the IV variable test, so we can not + remove it in the last iteration. */ + edge_to_cancel = NULL; + } + + if (!n_unroll_found) return false; - n_unroll = tree_low_cst (niter, 1); max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); if (n_unroll > max_unroll) return false; + if (!edge_to_cancel) + edge_to_cancel = loop_edge_to_cancel (loop); + if (n_unroll) { + sbitmap wont_exit; + edge e; + unsigned i; + VEC (edge, heap) *to_remove = NULL; if (ul == UL_SINGLE_ITER) return false; - tree_estimate_loop_size (loop, exit, &size); + tree_estimate_loop_size (loop, exit, edge_to_cancel, &size); ninsns = size.overall; unr_insns = estimated_unrolled_size (&size, n_unroll); @@ -354,6 +479,18 @@ try_unroll_loop_completely (struct loop *loop, (int) unr_insns); } + /* We unroll only inner loops, because we do not consider it profitable + otheriwse. We still can cancel loopback edge of not rolling loop; + this is always a good idea. */ + if (loop->inner && unr_insns > ninsns) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d:" + "it is not innermost and code would grow.\n", + loop->num); + return false; + } + if (unr_insns > ninsns && (unr_insns > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))) @@ -369,17 +506,10 @@ try_unroll_loop_completely (struct loop *loop, && unr_insns > ninsns) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d.\n", loop->num); + fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", + loop->num); return false; } - } - - if (n_unroll) - { - sbitmap wont_exit; - edge e; - unsigned i; - VEC (edge, heap) *to_remove = NULL; initialize_original_copy_tables (); wont_exit = sbitmap_alloc (n_unroll + 1); @@ -408,15 +538,67 @@ try_unroll_loop_completely (struct loop *loop, free_original_copy_tables (); } - cond = last_stmt (exit->src); - if (exit->flags & EDGE_TRUE_VALUE) - gimple_cond_make_true (cond); + /* Remove the conditional from the last copy of the loop. */ + if (edge_to_cancel) + { + cond = last_stmt (edge_to_cancel->src); + if (edge_to_cancel->flags & EDGE_TRUE_VALUE) + gimple_cond_make_false (cond); + else + gimple_cond_make_true (cond); + update_stmt (cond); + /* Do not remove the path. Doing so may remove outer loop + and confuse bookkeeping code in tree_unroll_loops_completelly. */ + } + /* We did not manage to cancel the loop. + The loop latch remains reachable even if it will never be reached + at runtime. We must redirect it to somewhere, so create basic + block containg __builtin_unreachable call for this reason. */ else - gimple_cond_make_false (cond); - update_stmt (cond); + { + latch = loop->latch; + latch_edge = loop_latch_edge (loop); + flags = latch_edge->flags; + locus = latch_edge->goto_locus; + + /* Unloop destroys the latch edge. */ + unloop (loop, irred_invalidated); + + /* Create new basic block for the latch edge destination and wire + it in. */ + stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); + latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags); + latch_edge->probability = 0; + latch_edge->count = 0; + latch_edge->flags |= flags; + latch_edge->goto_locus = locus; + + latch_edge->dest->loop_father = current_loops->tree_root; + latch_edge->dest->count = 0; + latch_edge->dest->frequency = 0; + set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); + + gsi = gsi_start_bb (latch_edge->dest); + gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); + } if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num); + { + if (!n_unroll) + fprintf (dump_file, "Turned loop %d to non-loop; it never loops.\n", + num); + else + fprintf (dump_file, "Unrolled loop %d completely " + "(duplicated %i times).\n", num, (int)n_unroll); + if (exit) + fprintf (dump_file, "Exit condition of peeled iterations was " + "eliminated.\n"); + if (edge_to_cancel) + fprintf (dump_file, "Last iteration exit edge was proved true.\n"); + else + fprintf (dump_file, "Latch of last iteration was marked by " + "__builtin_unreachable ().\n"); + } return true; } @@ -425,12 +607,15 @@ try_unroll_loop_completely (struct loop *loop, CREATE_IV is true if we may create a new iv. UL determines which loops we are allowed to completely unroll. If TRY_EVAL is true, we try to determine the number of iterations of a loop by direct evaluation. - Returns true if cfg is changed. */ + Returns true if cfg is changed. + + IRRED_INVALIDATED is used to keep if irreducible reginos needs to be recomputed. */ static bool canonicalize_loop_induction_variables (struct loop *loop, bool create_iv, enum unroll_level ul, - bool try_eval) + bool try_eval, + bool *irred_invalidated) { edge exit = NULL; tree niter; @@ -455,22 +640,34 @@ canonicalize_loop_induction_variables (struct loop *loop, || TREE_CODE (niter) != INTEGER_CST)) niter = find_loop_niter_by_eval (loop, &exit); - if (chrec_contains_undetermined (niter) - || TREE_CODE (niter) != INTEGER_CST) - return false; + if (TREE_CODE (niter) != INTEGER_CST) + exit = NULL; } - if (dump_file && (dump_flags & TDF_DETAILS)) + /* We work exceptionally hard here to estimate the bound + by find_loop_niter_by_eval. Be sure to keep it for future. */ + if (niter && TREE_CODE (niter) == INTEGER_CST) + record_niter_bound (loop, tree_to_double_int (niter), false, true); + + if (dump_file && (dump_flags & TDF_DETAILS) + && TREE_CODE (niter) == INTEGER_CST) { fprintf (dump_file, "Loop %d iterates ", loop->num); print_generic_expr (dump_file, niter, TDF_SLIM); fprintf (dump_file, " times.\n"); } + if (dump_file && (dump_flags & TDF_DETAILS) + && max_loop_iterations_int (loop) >= 0) + { + fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num, + (int)max_loop_iterations_int (loop)); + } - if (try_unroll_loop_completely (loop, exit, niter, ul)) + if (try_unroll_loop_completely (loop, exit, niter, ul, irred_invalidated)) return true; - if (create_iv) + if (create_iv + && niter && !chrec_contains_undetermined (niter)) create_canonical_iv (loop, exit, niter); return false; @@ -485,15 +682,21 @@ canonicalize_induction_variables (void) loop_iterator li; struct loop *loop; bool changed = false; + bool irred_invalidated = false; FOR_EACH_LOOP (li, loop, 0) { changed |= canonicalize_loop_induction_variables (loop, true, UL_SINGLE_ITER, - true); + true, + &irred_invalidated); } gcc_assert (!need_ssa_update_p (cfun)); + if (irred_invalidated + && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) + mark_irreducible_loops (); + /* Clean up the information about numbers of iterations, since brute force evaluation could reveal new information. */ scev_reset (); @@ -594,9 +797,10 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) do { + bool irred_invalidated = false; changed = false; - FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) + FOR_EACH_LOOP (li, loop, 0) { struct loop *loop_father = loop_outer (loop); @@ -609,7 +813,8 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) ul = UL_NO_GROWTH; if (canonicalize_loop_induction_variables (loop, false, ul, - !flag_tree_loop_ivcanon)) + !flag_tree_loop_ivcanon, + &irred_invalidated)) { changed = true; /* If we'll continue unrolling, we need to propagate constants @@ -629,6 +834,10 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) struct loop **iter; unsigned i; + if (irred_invalidated + && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) + mark_irreducible_loops (); + update_ssa (TODO_update_ssa); /* Propagate the constants within the new basic blocks. */ diff --git a/gcc/tree.c b/gcc/tree.c index 8df1b86d2be..d974362de27 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -9524,6 +9524,15 @@ build_common_builtin_nodes (void) tree tmp, ftype; int ecf_flags; + if (!builtin_decl_explicit_p (BUILT_IN_UNREACHABLE)) + { + ftype = build_function_type (void_type_node, void_list_node); + local_define_builtin ("__builtin_unreachable", ftype, BUILT_IN_UNREACHABLE, + "__builtin_unreachable", + ECF_NOTHROW | ECF_LEAF | ECF_NORETURN + | ECF_CONST | ECF_LEAF); + } + if (!builtin_decl_explicit_p (BUILT_IN_MEMCPY) || !builtin_decl_explicit_p (BUILT_IN_MEMMOVE)) { -- cgit v1.2.3