crosstool-ng/patches/gcc/4.3.2/360-fix-expensive-optimize.patch
Yann E. MORIN" 380ac20c2d On 20090131.1659+0100, Vincent Sanders <vince@kyllikki.org> wrote:
[This]patch is a bit more involved. The patch addresses a gcc
  regression in the 4.3 series (specifically this patch is against 4.3.2
  which does *not* have a lot of other issues which affect kernel building)

  GCC bug tracker has this issue as
  #38453 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38453
  #32044 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32044

  comment 65 of #32044 has the fix being applied to gcc trunk as revision #142719

  The attached patch is a backport to gcc 4.3.2 which allows this
  version to be used to generate correct output for various ARM kernel
  build (and indeed is teh correct answer in general).

 /trunk/patches/gcc/4.3.2/360-fix-expensive-optimize.patch |  207   207     0     0 +++++++++++++++++++++
 1 file changed, 207 insertions(+)
2009-01-31 17:49:27 +00:00

208 lines
6.4 KiB
Diff

PR tree-optimization/32044
From: rakdver
Date: 2008-12-12 21:32:47 +0100
* tree-scalar-evolution.h (expression_expensive_p): Declare.
* tree-scalar-evolution.c (expression_expensive_p): New function.
(scev_const_prop): Avoid introducing expensive expressions.
* tree-ssa-loop-ivopts.c (may_eliminate_iv): Ditto.
* gcc.dg/pr34027-1.c: Change outcome.
* gcc.dg/tree-ssa/pr32044.c: New test.
cherry picked from svn://gcc.gnu.org/svn/gcc/trunk, rev 142719 and adapted to
apply on gcc 4.3.2
------------------------------------------------------------------------
Index: gcc-4.3.2/gcc/tree-scalar-evolution.c
===================================================================
--- gcc-4.3.2.orig/gcc/tree-scalar-evolution.c 2009-01-28 10:14:37.000000000 +0100
+++ gcc-4.3.2/gcc/tree-scalar-evolution.c 2009-01-28 10:17:50.000000000 +0100
@@ -2716,6 +2716,50 @@
scalar_evolution_info = NULL;
}
+/* Returns true if the expression EXPR is considered to be too expensive
+ for scev_const_prop. */
+
+bool
+expression_expensive_p (tree expr)
+{
+ enum tree_code code;
+
+ if (is_gimple_val (expr))
+ return false;
+
+ code = TREE_CODE (expr);
+ if (code == TRUNC_DIV_EXPR
+ || code == CEIL_DIV_EXPR
+ || code == FLOOR_DIV_EXPR
+ || code == ROUND_DIV_EXPR
+ || code == TRUNC_MOD_EXPR
+ || code == CEIL_MOD_EXPR
+ || code == FLOOR_MOD_EXPR
+ || code == ROUND_MOD_EXPR
+ || code == EXACT_DIV_EXPR)
+ {
+ /* Division by power of two is usually cheap, so we allow it.
+ Forbid anything else. */
+ if (!integer_pow2p (TREE_OPERAND (expr, 1)))
+ return true;
+ }
+
+ switch (TREE_CODE_CLASS (code))
+ {
+ case tcc_binary:
+ case tcc_comparison:
+ if (expression_expensive_p (TREE_OPERAND (expr, 1)))
+ return true;
+
+ /* Fallthru. */
+ case tcc_unary:
+ return expression_expensive_p (TREE_OPERAND (expr, 0));
+
+ default:
+ return true;
+ }
+}
+
/* Replace ssa names for that scev can prove they are constant by the
appropriate constants. Also perform final value replacement in loops,
in case the replacement expressions are cheap.
@@ -2802,12 +2846,6 @@
continue;
niter = number_of_latch_executions (loop);
- /* We used to check here whether the computation of NITER is expensive,
- and avoided final value elimination if that is the case. The problem
- is that it is hard to evaluate whether the expression is too
- expensive, as we do not know what optimization opportunities the
- the elimination of the final value may reveal. Therefore, we now
- eliminate the final values of induction variables unconditionally. */
if (niter == chrec_dont_know)
continue;
@@ -2838,7 +2876,15 @@
/* Moving the computation from the loop may prolong life range
of some ssa names, which may cause problems if they appear
on abnormal edges. */
- || contains_abnormal_ssa_name_p (def))
+ || contains_abnormal_ssa_name_p (def)
+ /* Do not emit expensive expressions. The rationale is that
+ when someone writes a code like
+
+ while (n > 45) n -= 45;
+
+ he probably knows that n is not large, and does not want it
+ to be turned into n %= 45. */
+ || expression_expensive_p (def))
continue;
/* Eliminate the PHI node and replace it by a computation outside
Index: gcc-4.3.2/gcc/tree-scalar-evolution.h
===================================================================
--- gcc-4.3.2.orig/gcc/tree-scalar-evolution.h 2009-01-28 10:22:47.000000000 +0100
+++ gcc-4.3.2/gcc/tree-scalar-evolution.h 2009-01-28 10:23:10.000000000 +0100
@@ -35,6 +35,7 @@
extern void scev_analysis (void);
unsigned int scev_const_prop (void);
+bool expression_expensive_p (tree);
extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
/* Returns the loop of the polynomial chrec CHREC. */
Index: gcc-4.3.2/gcc/testsuite/gcc.dg/pr34027-1.c
===================================================================
--- gcc-4.3.2.orig/gcc/testsuite/gcc.dg/pr34027-1.c 2009-01-28 10:24:09.000000000 +0100
+++ gcc-4.3.2/gcc/testsuite/gcc.dg/pr34027-1.c 2009-01-28 10:24:43.000000000 +0100
@@ -8,5 +8,9 @@
return ns;
}
-/* { dg-final { scan-tree-dump "ns % 10000" "optimized" } } */
+/* This test was originally introduced to test that we transform
+ to ns % 10000. See the discussion of PR 32044 why we do not do
+ that anymore. */
+/* { dg-final { scan-tree-dump-times "%" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "/" 0 "optimized" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: gcc-4.3.2/gcc/testsuite/gcc.dg/tree-ssa/pr32044.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ gcc-4.3.2/gcc/testsuite/gcc.dg/tree-ssa/pr32044.c 2009-01-28 10:25:50.000000000 +0100
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-empty -fdump-tree-final_cleanup" } */
+
+int foo (int n)
+{
+ while (n >= 45)
+ n -= 45;
+
+ return n;
+}
+
+int bar (int n)
+{
+ while (n >= 64)
+ n -= 64;
+
+ return n;
+}
+
+int bla (int n)
+{
+ int i = 0;
+
+ while (n >= 45)
+ {
+ i++;
+ n -= 45;
+ }
+
+ return i;
+}
+
+int baz (int n)
+{
+ int i = 0;
+
+ while (n >= 64)
+ {
+ i++;
+ n -= 64;
+ }
+
+ return i;
+}
+
+/* The loops computing division/modulo by 64 should be eliminated. */
+/* { dg-final { scan-tree-dump-times "Removing empty loop" 2 "empty" } } */
+
+/* There should be no division/modulo in the final dump (division and modulo
+ by 64 are done using bit operations). */
+/* { dg-final { scan-tree-dump-times "/" 0 "final_cleanup" } } */
+/* { dg-final { scan-tree-dump-times "%" 0 "final_cleanup" } } */
+
+/* { dg-final { cleanup-tree-dump "empty" } } */
+/* { dg-final { cleanup-tree-dump "final_cleanup" } } */
Index: gcc-4.3.2/gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc-4.3.2.orig/gcc/tree-ssa-loop-ivopts.c 2009-01-28 10:26:04.000000000 +0100
+++ gcc-4.3.2/gcc/tree-ssa-loop-ivopts.c 2009-01-28 10:27:09.000000000 +0100
@@ -3778,7 +3778,12 @@
return false;
cand_value_at (loop, cand, use->stmt, nit, &bnd);
+
*bound = aff_combination_to_tree (&bnd);
+ /* It is unlikely that computing the number of iterations using division
+ would be more profitable than keeping the original induction variable. */
+ if (expression_expensive_p (*bound))
+ return false;
return true;
}