aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-10-13 09:30:47 +0200
committerKwok Cheung Yeung <kcy@codesourcery.com>2021-02-09 10:09:02 -0800
commit88528328ea560230f728af97110e89396c8267d2 (patch)
tree94d607a1b184ed23b70ac0442d7374b3b7f7ec89 /gcc
parent758fdf6514348a40ed424f3244cb25b92a005095 (diff)
openmp: Improve composite triangular loop lowering and expansion
This propagates needed values from the point where number of iterations is calculated on composite loops to the places where that information is needed to use the more efficient square root discovery to compute the starting iterator values from the logical iteration number. 2020-10-13 Jakub Jelinek <jakub@redhat.com> * omp-low.c (add_taskreg_looptemp_clauses): For triangular loops with non-constant number of iterations add another 4 _looptemp_ clauses before the (optional) one for lastprivate. (lower_omp_for_lastprivate): Skip those clauses when looking for the lastprivate clause. (lower_omp_for): For triangular loops with non-constant number of iterations add another 4 _looptemp_ clauses. * omp-expand.c (expand_omp_for_init_counts): For triangular loops with non-constant number of iterations set counts[0], fd->first_inner_iterations, fd->factor and fd->adjn1 from the newly added _looptemp_ clauses. (expand_omp_for_init_vars): Initialize the newly added _looptemp_ clauses. (find_lastprivate_looptemp): New function. (expand_omp_for_static_nochunk, expand_omp_for_static_chunk, expand_omp_taskloop_for_outer): Use it instead of manually skipping _looptemp_ clauses. (cherry picked from commit 14707c896a207606f13886d3b3251e8db1f3c9c0)
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog.omp23
-rw-r--r--gcc/omp-expand.c102
-rw-r--r--gcc/omp-low.c64
3 files changed, 147 insertions, 42 deletions
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index c924dfd2565..94c0957318b 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,6 +1,29 @@
2021-02-09 Kwok Cheung Yeung <kcy@codesourcery.com>
Backport from mainline
+ 2020-10-13 Jakub Jelinek <jakub@redhat.com>
+
+ * omp-low.c (add_taskreg_looptemp_clauses): For triangular loops
+ with non-constant number of iterations add another 4 _looptemp_
+ clauses before the (optional) one for lastprivate.
+ (lower_omp_for_lastprivate): Skip those clauses when looking for
+ the lastprivate clause.
+ (lower_omp_for): For triangular loops with non-constant number of
+ iterations add another 4 _looptemp_ clauses.
+ * omp-expand.c (expand_omp_for_init_counts): For triangular loops
+ with non-constant number of iterations set counts[0],
+ fd->first_inner_iterations, fd->factor and fd->adjn1 from the newly
+ added _looptemp_ clauses.
+ (expand_omp_for_init_vars): Initialize the newly added _looptemp_
+ clauses.
+ (find_lastprivate_looptemp): New function.
+ (expand_omp_for_static_nochunk, expand_omp_for_static_chunk,
+ expand_omp_taskloop_for_outer): Use it instead of manually skipping
+ _looptemp_ clauses.
+
+2021-02-09 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ Backport from mainline
2020-08-13 Jakub Jelinek <jakub@redhat.com>
* gimplify.c (gimplify_omp_taskloop_expr): New function.
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index 4c4d86bf489..71bf80308c9 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -1842,6 +1842,23 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
else
counts[0] = NULL_TREE;
}
+ if (fd->non_rect
+ && fd->last_nonrect == fd->first_nonrect + 1
+ && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
+ {
+ tree c[4];
+ for (i = 0; i < 4; i++)
+ {
+ innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
+ OMP_CLAUSE__LOOPTEMP_);
+ gcc_assert (innerc);
+ c[i] = OMP_CLAUSE_DECL (innerc);
+ }
+ counts[0] = c[0];
+ fd->first_inner_iterations = c[1];
+ fd->factor = c[2];
+ fd->adjn1 = c[3];
+ }
return;
}
@@ -2486,7 +2503,12 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
use it. */
tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
- for (i = 0; i < fd->collapse; i++)
+ int count = 0;
+ if (fd->non_rect
+ && fd->last_nonrect == fd->first_nonrect + 1
+ && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
+ count = 4;
+ for (i = 0; i < fd->collapse + count; i++)
{
innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
OMP_CLAUSE__LOOPTEMP_);
@@ -2494,7 +2516,19 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
if (i)
{
tree tem = OMP_CLAUSE_DECL (innerc);
- tree t = fold_convert (TREE_TYPE (tem), counts[i]);
+ tree t;
+ if (i < fd->collapse)
+ t = counts[i];
+ else
+ switch (i - fd->collapse)
+ {
+ case 0: t = counts[0]; break;
+ case 1: t = fd->first_inner_iterations; break;
+ case 2: t = fd->factor; break;
+ case 3: t = fd->adjn1; break;
+ default: gcc_unreachable ();
+ }
+ t = fold_convert (TREE_TYPE (tem), t);
t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
gassign *stmt = gimple_build_assign (tem, t);
@@ -2530,10 +2564,7 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
basic_block bb_triang = NULL, bb_triang_dom = NULL;
if (fd->first_nonrect + 1 == fd->last_nonrect
&& (TREE_CODE (fd->loop.n2) == INTEGER_CST
- || (fd->first_inner_iterations
- /* For now. Later add clauses to propagate the
- values. */
- && !gimple_omp_for_combined_into_p (fd->for_stmt)))
+ || fd->first_inner_iterations)
&& (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
!= CODE_FOR_nothing))
{
@@ -4718,6 +4749,35 @@ expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
return ptr;
}
+/* Return the last _looptemp_ clause if one has been created for
+ lastprivate on distribute parallel for{, simd} or taskloop.
+ FD is the loop data and INNERC should be the second _looptemp_
+ clause (the one holding the end of the range).
+ This is followed by collapse - 1 _looptemp_ clauses for the
+ counts[1] and up, and for triangular loops followed by 4
+ further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
+ one factor and one adjn1). After this there is optionally one
+ _looptemp_ clause that this function returns. */
+
+static tree
+find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
+{
+ gcc_assert (innerc);
+ int count = fd->collapse - 1;
+ if (fd->non_rect
+ && fd->last_nonrect == fd->first_nonrect + 1
+ && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
+ count += 4;
+ for (int i = 0; i < count; i++)
+ {
+ innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
+ OMP_CLAUSE__LOOPTEMP_);
+ gcc_assert (innerc);
+ }
+ return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
+ OMP_CLAUSE__LOOPTEMP_);
+}
+
/* A subroutine of expand_omp_for. Generate code for a parallel
loop with static schedule and no specified chunk size. Given
parameters:
@@ -5142,15 +5202,7 @@ expand_omp_for_static_nochunk (struct omp_region *region,
if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
&& gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
{
- int i;
- for (i = 1; i < fd->collapse; i++)
- {
- innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
- OMP_CLAUSE__LOOPTEMP_);
- gcc_assert (innerc);
- }
- innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
- OMP_CLAUSE__LOOPTEMP_);
+ innerc = find_lastprivate_looptemp (fd, innerc);
if (innerc)
{
/* If needed (distribute parallel for with lastprivate),
@@ -5867,15 +5919,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
&& gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
{
- int i;
- for (i = 1; i < fd->collapse; i++)
- {
- innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
- OMP_CLAUSE__LOOPTEMP_);
- gcc_assert (innerc);
- }
- innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
- OMP_CLAUSE__LOOPTEMP_);
+ innerc = find_lastprivate_looptemp (fd, innerc);
if (innerc)
{
/* If needed (distribute parallel for with lastprivate),
@@ -6808,15 +6852,7 @@ expand_omp_taskloop_for_outer (struct omp_region *region,
tree endvar = OMP_CLAUSE_DECL (innerc);
if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
{
- gcc_assert (innerc);
- for (i = 1; i < fd->collapse; i++)
- {
- innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
- OMP_CLAUSE__LOOPTEMP_);
- gcc_assert (innerc);
- }
- innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
- OMP_CLAUSE__LOOPTEMP_);
+ innerc = find_lastprivate_looptemp (fd, innerc);
if (innerc)
{
/* If needed (inner taskloop has lastprivate clause), propagate
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 03372f93f4e..66519ad9f90 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -2258,12 +2258,38 @@ add_taskreg_looptemp_clauses (enum gf_mask msk, gimple *stmt,
GIMPLE_OMP_FOR, add one more temporaries for the total number
of iterations (product of count1 ... countN-1). */
if (omp_find_clause (gimple_omp_for_clauses (for_stmt),
- OMP_CLAUSE_LASTPRIVATE))
- count++;
- else if (msk == GF_OMP_FOR_KIND_FOR
- && omp_find_clause (gimple_omp_parallel_clauses (stmt),
- OMP_CLAUSE_LASTPRIVATE))
- count++;
+ OMP_CLAUSE_LASTPRIVATE)
+ || (msk == GF_OMP_FOR_KIND_FOR
+ && omp_find_clause (gimple_omp_parallel_clauses (stmt),
+ OMP_CLAUSE_LASTPRIVATE)))
+ {
+ tree temp = create_tmp_var (type);
+ tree c = build_omp_clause (UNKNOWN_LOCATION,
+ OMP_CLAUSE__LOOPTEMP_);
+ insert_decl_map (&outer_ctx->cb, temp, temp);
+ OMP_CLAUSE_DECL (c) = temp;
+ OMP_CLAUSE_CHAIN (c) = gimple_omp_taskreg_clauses (stmt);
+ gimple_omp_taskreg_set_clauses (stmt, c);
+ }
+ if (fd.non_rect
+ && fd.last_nonrect == fd.first_nonrect + 1)
+ if (tree v = gimple_omp_for_index (for_stmt, fd.last_nonrect))
+ if (!TYPE_UNSIGNED (TREE_TYPE (v)))
+ {
+ v = gimple_omp_for_index (for_stmt, fd.first_nonrect);
+ tree type2 = TREE_TYPE (v);
+ count++;
+ for (i = 0; i < 3; i++)
+ {
+ tree temp = create_tmp_var (type2);
+ tree c = build_omp_clause (UNKNOWN_LOCATION,
+ OMP_CLAUSE__LOOPTEMP_);
+ insert_decl_map (&outer_ctx->cb, temp, temp);
+ OMP_CLAUSE_DECL (c) = temp;
+ OMP_CLAUSE_CHAIN (c) = gimple_omp_taskreg_clauses (stmt);
+ gimple_omp_taskreg_set_clauses (stmt, c);
+ }
+ }
}
for (i = 0; i < count; i++)
{
@@ -10155,7 +10181,13 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
tree innerc = omp_find_clause (taskreg_clauses,
OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
- for (i = 0; i < fd->collapse; i++)
+ int count = fd->collapse;
+ if (fd->non_rect
+ && fd->last_nonrect == fd->first_nonrect + 1)
+ if (tree v = gimple_omp_for_index (fd->for_stmt, fd->last_nonrect))
+ if (!TYPE_UNSIGNED (TREE_TYPE (v)))
+ count += 4;
+ for (i = 0; i < count; i++)
{
innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
OMP_CLAUSE__LOOPTEMP_);
@@ -11136,12 +11168,26 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
if (fd.collapse > 1
&& TREE_CODE (fd.loop.n2) != INTEGER_CST)
count += fd.collapse - 1;
+ size_t count2 = 0;
+ tree type2 = NULL_TREE;
bool taskreg_for
= (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR
|| gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_TASKLOOP);
tree outerc = NULL, *pc = gimple_omp_for_clauses_ptr (stmt);
tree simtc = NULL;
tree clauses = *pc;
+ if (fd.collapse > 1
+ && fd.non_rect
+ && fd.last_nonrect == fd.first_nonrect + 1
+ && TREE_CODE (fd.loop.n2) != INTEGER_CST)
+ if (tree v = gimple_omp_for_index (stmt, fd.last_nonrect))
+ if (!TYPE_UNSIGNED (TREE_TYPE (v)))
+ {
+ v = gimple_omp_for_index (stmt, fd.first_nonrect);
+ type2 = TREE_TYPE (v);
+ count++;
+ count2 = 3;
+ }
if (taskreg_for)
outerc
= omp_find_clause (gimple_omp_taskreg_clauses (ctx->outer->stmt),
@@ -11149,7 +11195,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
if (ctx->simt_stmt)
simtc = omp_find_clause (gimple_omp_for_clauses (ctx->simt_stmt),
OMP_CLAUSE__LOOPTEMP_);
- for (i = 0; i < count; i++)
+ for (i = 0; i < count + count2; i++)
{
tree temp;
if (taskreg_for)
@@ -11168,7 +11214,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
if (ctx->simt_stmt)
temp = OMP_CLAUSE_DECL (simtc);
else
- temp = create_tmp_var (type);
+ temp = create_tmp_var (i >= count ? type2 : type);
insert_decl_map (&ctx->outer->cb, temp, temp);
}
*pc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__LOOPTEMP_);