Line data Source code
1 : /* Loop unroll-and-jam.
2 : Copyright (C) 2017-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "tree-pass.h"
24 : #include "backend.h"
25 : #include "tree.h"
26 : #include "gimple.h"
27 : #include "ssa.h"
28 : #include "fold-const.h"
29 : #include "tree-cfg.h"
30 : #include "tree-ssa.h"
31 : #include "tree-ssa-loop-niter.h"
32 : #include "tree-ssa-loop.h"
33 : #include "tree-ssa-loop-manip.h"
34 : #include "cfgloop.h"
35 : #include "tree-scalar-evolution.h"
36 : #include "gimple-iterator.h"
37 : #include "cfghooks.h"
38 : #include "tree-data-ref.h"
39 : #include "tree-ssa-loop-ivopts.h"
40 : #include "tree-vectorizer.h"
41 : #include "tree-ssa-sccvn.h"
42 : #include "tree-cfgcleanup.h"
43 :
44 : /* Unroll and Jam transformation
45 :
46 : This is a combination of two transformations, where the second
47 : is not always valid. It's applicable if a loop nest has redundancies
48 : over the iterations of an outer loop while not having that with
49 : an inner loop.
50 :
51 : Given this nest:
52 : for (i) {
53 : for (j) {
54 : B(i,j)
55 : }
56 : }
57 :
58 : first unroll:
59 : for (i by 2) {
60 : for (j) {
61 : B(i,j)
62 : }
63 : for (j) {
64 : B(i+1,j)
65 : }
66 : }
67 :
68 : then fuse the two adjacent inner loops resulting from that:
69 : for (i by 2) {
70 : for (j) {
71 : B(i,j)
72 : B(i+1,j)
73 : }
74 : }
75 :
76 : As the order of evaluations of the body B changes this is valid
77 : only in certain situations: all distance vectors need to be forward.
78 : Additionally if there are multiple induction variables than just
79 : a counting control IV (j above) we can also deal with some situations.
80 :
81 : The validity is checked by unroll_jam_possible_p, and the data-dep
82 : testing below.
83 :
84 : A trivial example where the fusion is wrong would be when
85 : B(i,j) == x[j-1] = x[j];
86 : for (i by 2) {
87 : for (j) {
88 : x[j-1] = x[j];
89 : }
90 : for (j) {
91 : x[j-1] = x[j];
92 : }
93 : } effect: move content to front by two elements
94 : -->
95 : for (i by 2) {
96 : for (j) {
97 : x[j-1] = x[j];
98 : x[j-1] = x[j];
99 : }
100 : } effect: move content to front by one element
101 : */
102 :
103 : /* Modify the loop tree for the fact that all code once belonging
104 : to the OLD loop or the outer loop of OLD now is inside LOOP. */
105 :
106 : static void
107 58 : merge_loop_tree (class loop *loop, class loop *old)
108 : {
109 58 : basic_block *bbs;
110 58 : int i, n;
111 58 : class loop *subloop;
112 58 : edge e;
113 58 : edge_iterator ei;
114 :
115 : /* Find its nodes. */
116 58 : bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
117 58 : n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
118 :
119 467 : for (i = 0; i < n; i++)
120 : {
121 : /* If the block was direct child of OLD loop it's now part
122 : of LOOP. If it was outside OLD, then it moved into LOOP
123 : as well. This avoids changing the loop father for BBs
124 : in inner loops of OLD. */
125 752 : if (bbs[i]->loop_father == old
126 694 : || loop_depth (bbs[i]->loop_father) < loop_depth (old))
127 : {
128 343 : remove_bb_from_loops (bbs[i]);
129 343 : add_bb_to_loop (bbs[i], loop);
130 343 : continue;
131 : }
132 :
133 : /* If we find a direct subloop of OLD, move it to LOOP. */
134 66 : subloop = bbs[i]->loop_father;
135 66 : if (loop_outer (subloop) == old && subloop->header == bbs[i])
136 : {
137 0 : flow_loop_tree_node_remove (subloop);
138 0 : flow_loop_tree_node_add (loop, subloop);
139 : }
140 : }
141 :
142 : /* Update the information about loop exit edges. */
143 467 : for (i = 0; i < n; i++)
144 : {
145 884 : FOR_EACH_EDGE (e, ei, bbs[i]->succs)
146 : {
147 475 : rescan_loop_exit (e, false, false);
148 : }
149 : }
150 :
151 58 : loop->num_nodes = n;
152 :
153 58 : free (bbs);
154 58 : }
155 :
156 : /* BB is part of the outer loop of an unroll-and-jam situation.
157 : Check if any statements therein would prevent the transformation. */
158 :
159 : static bool
160 8283 : bb_prevents_fusion_p (basic_block bb)
161 : {
162 8283 : gimple_stmt_iterator gsi;
163 : /* BB is duplicated by outer unrolling and then all N-1 first copies
164 : move into the body of the fused inner loop. If BB exits the outer loop
165 : the last copy still does so, and the first N-1 copies are cancelled
166 : by loop unrolling, so also after fusion it's the exit block.
167 : But there might be other reasons that prevent fusion:
168 : * stores or unknown side-effects prevent fusion
169 : * loads don't
170 : * computations into SSA names: these aren't problematic. Their
171 : result will be unused on the exit edges of the first N-1 copies
172 : (those aren't taken after unrolling). If they are used on the
173 : other edge (the one leading to the outer latch block) they are
174 : loop-carried (on the outer loop) and the Nth copy of BB will
175 : compute them again (i.e. the first N-1 copies will be dead). */
176 31376 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
177 : {
178 15090 : gimple *g = gsi_stmt (gsi);
179 25463 : if (gimple_vdef (g) || gimple_has_side_effects (g))
180 280 : return true;
181 : }
182 : return false;
183 : }
184 :
185 : /* Given an inner loop LOOP (of some OUTER loop) determine if
186 : we can safely fuse copies of it (generated by outer unrolling).
187 : If so return true, otherwise return false. */
188 :
189 : static bool
190 16254 : unroll_jam_possible_p (class loop *outer, class loop *loop)
191 : {
192 16254 : basic_block *bbs;
193 16254 : int i, n;
194 16254 : class tree_niter_desc niter;
195 :
196 : /* When fusing the loops we skip the latch block
197 : of the first one, so it mustn't have any effects to
198 : preserve. */
199 16254 : if (!empty_block_p (loop->latch))
200 : return false;
201 :
202 15429 : edge exit;
203 15429 : if (!(exit = single_exit (loop)))
204 : return false;
205 :
206 : /* We need a perfect nest. Quick check for adjacent inner loops. */
207 11118 : if (outer->inner != loop || loop->next)
208 : return false;
209 :
210 : /* Prevent head-controlled inner loops, that we usually have.
211 : The guard block would need to be accepted
212 : (invariant condition either entering or skipping the loop),
213 : without also accepting arbitrary control flow. When unswitching
214 : ran before us (as with -O3) this won't be a problem because its
215 : outer loop unswitching will have moved out the invariant condition.
216 :
217 : If we do that we need to extend fuse_loops() to cope with this
218 : by threading through the (still invariant) copied condition
219 : between the two loop copies. */
220 6743 : if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header))
221 : return false;
222 :
223 : /* The number of iterations of the inner loop must be loop invariant
224 : with respect to the outer loop. */
225 4359 : if (!number_of_iterations_exit (loop, single_exit (loop), &niter,
226 : false, true)
227 3993 : || niter.cmp == ERROR_MARK
228 3993 : || !integer_zerop (niter.may_be_zero)
229 8142 : || !expr_invariant_in_loop_p (outer, niter.niter))
230 1076 : return false;
231 :
232 : /* If the inner loop produces any values that are used inside the
233 : outer loop (except the virtual op) then it can flow
234 : back (perhaps indirectly) into the inner loop. This prevents
235 : fusion: without fusion the value at the last iteration is used,
236 : with fusion the value after the initial iteration is used.
237 :
238 : If all uses are outside the outer loop this doesn't prevent fusion;
239 : the value of the last iteration is still used (and the values from
240 : all intermediate iterations are dead). */
241 3283 : gphi_iterator psi;
242 3283 : for (psi = gsi_start_phis (single_exit (loop)->dest);
243 5836 : !gsi_end_p (psi); gsi_next (&psi))
244 : {
245 3037 : imm_use_iterator imm_iter;
246 3037 : use_operand_p use_p;
247 3037 : tree op = gimple_phi_result (psi.phi ());
248 6074 : if (virtual_operand_p (op))
249 2462 : continue;
250 1259 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
251 : {
252 593 : gimple *use_stmt = USE_STMT (use_p);
253 593 : if (!is_gimple_debug (use_stmt)
254 593 : && flow_bb_inside_loop_p (outer, gimple_bb (use_stmt)))
255 484 : return false;
256 575 : }
257 : }
258 :
259 : /* And check blocks belonging to just outer loop. */
260 2799 : bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
261 2799 : n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun));
262 :
263 16445 : for (i = 0; i < n; i++)
264 13961 : if (bbs[i]->loop_father == outer
265 13961 : && (bb_prevents_fusion_p (bbs[i])
266 : /* Outer loop exits must come after the inner loop, otherwise
267 : we'll put the outer loop exit into the fused inner loop. */
268 8003 : || (loop_exits_from_bb_p (outer, bbs[i])
269 2529 : && !dominated_by_p (CDI_DOMINATORS, bbs[i], exit->src))))
270 : break;
271 2799 : free (bbs);
272 2799 : if (i != n)
273 : return false;
274 :
275 : /* For now we can safely fuse copies of LOOP only if all
276 : loop carried variables are inductions (or the virtual op).
277 :
278 : We could handle reductions as well (the initial value in the second
279 : body would be the after-iter value of the first body) if it's over
280 : an associative and commutative operation. We wouldn't
281 : be able to handle unknown cycles. */
282 2484 : bool inner_vdef = false;
283 6388 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
284 : {
285 4300 : affine_iv iv;
286 4300 : tree op = gimple_phi_result (psi.phi ());
287 :
288 8600 : if (virtual_operand_p (op))
289 : {
290 1737 : inner_vdef = true;
291 1737 : continue;
292 : }
293 2563 : if (!simple_iv (loop, loop, op, &iv, true))
294 396 : return false;
295 : /* The inductions must be regular, loop invariant step and initial
296 : value. */
297 2537 : if (!expr_invariant_in_loop_p (outer, iv.step)
298 2537 : || !expr_invariant_in_loop_p (outer, iv.base))
299 370 : return false;
300 : /* XXX With more effort we could also be able to deal with inductions
301 : where the initial value is loop variant but a simple IV in the
302 : outer loop. The initial value for the second body would be
303 : the original initial value plus iv.base.step. The next value
304 : for the fused loop would be the original next value of the first
305 : copy, _not_ the next value of the second body. */
306 : }
307 :
308 : /* When there's no inner loop virtual PHI IV we cannot handle the update
309 : required to the inner loop if that doesn't already have one. See
310 : PR117113. */
311 2088 : if (!inner_vdef && get_virtual_phi (outer->header))
312 : return false;
313 :
314 : return true;
315 16254 : }
316 :
317 : /* Fuse LOOP with all further neighbors. The loops are expected to
318 : be in appropriate form. */
319 :
320 : static void
321 58 : fuse_loops (class loop *loop)
322 : {
323 58 : class loop *next = loop->next;
324 :
325 116 : while (next)
326 : {
327 58 : edge e;
328 :
329 58 : remove_branch (single_pred_edge (loop->latch));
330 : /* Make delete_basic_block not fiddle with the loop structure. */
331 58 : basic_block oldlatch = loop->latch;
332 58 : loop->latch = NULL;
333 58 : delete_basic_block (oldlatch);
334 58 : e = redirect_edge_and_branch (loop_latch_edge (next),
335 : loop->header);
336 58 : loop->latch = e->src;
337 58 : flush_pending_stmts (e);
338 :
339 58 : gcc_assert (EDGE_COUNT (next->header->preds) == 1);
340 :
341 : /* The PHI nodes of the second body (single-argument now)
342 : need adjustments to use the right values: either directly
343 : the value of the corresponding PHI in the first copy or
344 : the one leaving the first body which unrolling did for us.
345 :
346 : See also unroll_jam_possible_p() for further possibilities. */
347 58 : gphi_iterator psi_first, psi_second;
348 58 : e = single_pred_edge (next->header);
349 58 : for (psi_first = gsi_start_phis (loop->header),
350 58 : psi_second = gsi_start_phis (next->header);
351 175 : !gsi_end_p (psi_first);
352 117 : gsi_next (&psi_first), gsi_next (&psi_second))
353 : {
354 117 : gphi *phi_first = psi_first.phi ();
355 117 : gphi *phi_second = psi_second.phi ();
356 117 : tree firstop = gimple_phi_result (phi_first);
357 : /* The virtual operand is correct already as it's
358 : always live at exit, hence has a LCSSA node and outer
359 : loop unrolling updated SSA form. */
360 234 : if (virtual_operand_p (firstop))
361 58 : continue;
362 :
363 : /* Due to unroll_jam_possible_p() we know that this is
364 : an induction. The second body goes over the same
365 : iteration space. */
366 59 : add_phi_arg (phi_second, firstop, e,
367 : gimple_location (phi_first));
368 : }
369 58 : gcc_assert (gsi_end_p (psi_second));
370 :
371 58 : merge_loop_tree (loop, next);
372 58 : gcc_assert (!next->num_nodes);
373 58 : class loop *ln = next->next;
374 58 : delete_loop (next);
375 58 : next = ln;
376 : }
377 58 : }
378 :
379 : /* Return true if any of the access functions for dataref A
380 : isn't invariant with respect to loop LOOP_NEST. */
381 : static bool
382 3738 : any_access_function_variant_p (const struct data_reference *a,
383 : const class loop *loop_nest)
384 : {
385 3738 : vec<tree> fns = DR_ACCESS_FNS (a);
386 :
387 11781 : for (tree t : fns)
388 4291 : if (!evolution_function_is_invariant_p (t, loop_nest->num))
389 : return true;
390 :
391 : return false;
392 : }
393 :
394 : /* Returns true if the distance in DDR can be determined and adjusts
395 : the unroll factor in *UNROLL to make unrolling valid for that distance.
396 : Otherwise return false. DDR is with respect to the outer loop of INNER.
397 :
398 : If this data dep can lead to a removed memory reference, increment
399 : *REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
400 : for this to happen. */
401 :
402 : static bool
403 3719 : adjust_unroll_factor (class loop *inner, struct data_dependence_relation *ddr,
404 : unsigned *unroll, unsigned *profit_unroll,
405 : unsigned *removed)
406 : {
407 3719 : bool ret = false;
408 3719 : if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
409 : {
410 3719 : if (DDR_NUM_DIST_VECTS (ddr) == 0)
411 3719 : return false;
412 : unsigned i;
413 : lambda_vector dist_v;
414 3769 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
415 : {
416 : /* A distance (a,b) is at worst transformed into (a/N,b) by the
417 : unrolling (factor N), so the transformation is valid if
418 : a >= N, or b > 0, or b is zero and a > 0. Otherwise the unroll
419 : factor needs to be limited so that the first condition holds.
420 : That may limit the factor down to zero in the worst case. */
421 2450 : lambda_int dist = dist_v[0];
422 2450 : if (dist < 0)
423 0 : gcc_unreachable ();
424 2450 : else if (dist >= (lambda_int)*unroll)
425 : ;
426 7152 : else if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
427 : {
428 : /* We have (a,0) with a < N, so this will be transformed into
429 : (0,0) after unrolling by N. This might potentially be a
430 : problem, if it's not a read-read dependency. */
431 2320 : if (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))
432 : ;
433 : else
434 : {
435 : /* So, at least one is a write, and we might reduce the
436 : distance vector to (0,0). This is still no problem
437 : if both data-refs are affine with respect to the inner
438 : loops. But if one of them is invariant with respect
439 : to an inner loop our reordering implicit in loop fusion
440 : corrupts the program, as our data dependences don't
441 : capture this. E.g. for:
442 : for (0 <= i < n)
443 : for (0 <= j < m)
444 : a[i][0] = a[i+1][0] + 2; // (1)
445 : b[i][j] = b[i+1][j] + 2; // (2)
446 : the distance vector for both statements is (-1,0),
447 : but exchanging the order for (2) is okay, while
448 : for (1) it is not. To see this, write out the original
449 : accesses (assume m is 2):
450 : a i j original
451 : 0 0 0 r a[1][0] b[1][0]
452 : 1 0 0 w a[0][0] b[0][0]
453 : 2 0 1 r a[1][0] b[1][1]
454 : 3 0 1 w a[0][0] b[0][1]
455 : 4 1 0 r a[2][0] b[2][0]
456 : 5 1 0 w a[1][0] b[1][0]
457 : after unroll-by-2 and fusion the accesses are done in
458 : this order (from column a): 0,1, 4,5, 2,3, i.e. this:
459 : a i j transformed
460 : 0 0 0 r a[1][0] b[1][0]
461 : 1 0 0 w a[0][0] b[0][0]
462 : 4 1 0 r a[2][0] b[2][0]
463 : 5 1 0 w a[1][0] b[1][0]
464 : 2 0 1 r a[1][0] b[1][1]
465 : 3 0 1 w a[0][0] b[0][1]
466 : Note how access 2 accesses the same element as access 5
467 : for array 'a' but not for array 'b'. */
468 1876 : if (any_access_function_variant_p (DDR_A (ddr), inner)
469 1876 : && any_access_function_variant_p (DDR_B (ddr), inner))
470 : ;
471 : else
472 : /* And if any dataref of this pair is invariant with
473 : respect to the inner loop, we have no chance than
474 : to reduce the unroll factor. */
475 14 : *unroll = dist;
476 : }
477 : }
478 2496 : else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
479 : ;
480 : else
481 46 : *unroll = dist;
482 :
483 : /* With a distance (a,0) it's always profitable to unroll-and-jam
484 : (by a+1), because one memory reference will go away. With
485 : (a,b) and b != 0 that's less clear. We will increase the
486 : number of streams without lowering the number of mem refs.
487 : So for now only handle the first situation. */
488 7350 : if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
489 : {
490 2336 : *profit_unroll = MAX (*profit_unroll, (unsigned)dist + 1);
491 2336 : (*removed)++;
492 : }
493 :
494 2450 : ret = true;
495 : }
496 : }
497 : return ret;
498 : }
499 :
500 : /* Main entry point for the unroll-and-jam transformation
501 : described above. */
502 :
503 : static unsigned int
504 28047 : tree_loop_unroll_and_jam (void)
505 : {
506 28047 : unsigned int todo = 0;
507 :
508 28047 : gcc_assert (scev_initialized_p ());
509 :
510 : /* Go through all innermost loops. */
511 153520 : for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
512 : {
513 69379 : class loop *outer = loop_outer (loop);
514 :
515 69379 : if (loop_depth (loop) < 2
516 69379 : || optimize_loop_nest_for_size_p (outer))
517 68086 : continue;
518 :
519 16254 : if (!unroll_jam_possible_p (outer, loop))
520 14196 : continue;
521 :
522 2058 : vec<data_reference_p> datarefs = vNULL;
523 2058 : vec<ddr_p> dependences = vNULL;
524 2058 : unsigned unroll_factor, profit_unroll, removed;
525 2058 : class tree_niter_desc desc;
526 2058 : bool unroll = false;
527 :
528 2058 : auto_vec<loop_p, 3> loop_nest;
529 2058 : if (!compute_data_dependences_for_loop (outer, true, &loop_nest,
530 : &datarefs, &dependences))
531 : {
532 345 : if (dump_file && (dump_flags & TDF_DETAILS))
533 0 : fprintf (dump_file, "Cannot analyze data dependencies\n");
534 345 : free_data_refs (datarefs);
535 345 : free_dependence_relations (dependences);
536 345 : continue;
537 : }
538 1713 : if (!datarefs.length ())
539 420 : continue;
540 :
541 1293 : if (dump_file && (dump_flags & TDF_DETAILS))
542 14 : dump_data_dependence_relations (dump_file, dependences);
543 :
544 1293 : unroll_factor = (unsigned)-1;
545 1293 : profit_unroll = 1;
546 1293 : removed = 0;
547 :
548 : /* Check all dependencies. */
549 1293 : unsigned i;
550 1293 : struct data_dependence_relation *ddr;
551 40693 : FOR_EACH_VEC_ELT (dependences, i, ddr)
552 : {
553 39792 : struct data_reference *dra, *drb;
554 :
555 : /* If the refs are independend there's nothing to do. */
556 39792 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
557 32086 : continue;
558 :
559 7706 : dra = DDR_A (ddr);
560 7706 : drb = DDR_B (ddr);
561 :
562 : /* Nothing interesting for the self dependencies, except for WAW if
563 : the access function is not affine or constant because we may end
564 : up reordering writes to the same location. */
565 7706 : if (dra == drb)
566 : {
567 7802 : if (DR_IS_WRITE (dra)
568 2122 : && !DR_ACCESS_FNS (dra).is_empty ()
569 6094 : && DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
570 : {
571 172 : unroll_factor = 0;
572 172 : break;
573 : }
574 : else
575 3815 : continue;
576 : }
577 :
578 : /* Now check the distance vector, for determining a sensible
579 : outer unroll factor, and for validity of merging the inner
580 : loop copies. */
581 3719 : if (!adjust_unroll_factor (loop, ddr, &unroll_factor, &profit_unroll,
582 : &removed))
583 : {
584 : /* Couldn't get the distance vector. For two reads that's
585 : harmless (we assume we should unroll). For at least
586 : one write this means we can't check the dependence direction
587 : and hence can't determine safety. */
588 :
589 2400 : if (DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
590 : {
591 220 : unroll_factor = 0;
592 220 : break;
593 : }
594 : }
595 : }
596 :
597 : /* We regard a user-specified minimum percentage of zero as a request
598 : to ignore all profitability concerns and apply the transformation
599 : always. */
600 1293 : if (!param_unroll_jam_min_percent)
601 27 : profit_unroll = MAX(2, profit_unroll);
602 1266 : else if (removed * 100 / datarefs.length ()
603 1266 : < (unsigned)param_unroll_jam_min_percent)
604 1006 : profit_unroll = 1;
605 1293 : if (unroll_factor > profit_unroll)
606 853 : unroll_factor = profit_unroll;
607 1293 : if (unroll_factor > (unsigned)param_unroll_jam_max_unroll)
608 0 : unroll_factor = param_unroll_jam_max_unroll;
609 2644 : unroll = (unroll_factor > 1
610 1293 : && can_unroll_loop_p (outer, unroll_factor, &desc));
611 :
612 58 : if (unroll)
613 : {
614 58 : if (dump_enabled_p ())
615 10 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
616 10 : find_loop_location (outer),
617 : "applying unroll and jam with factor %d\n",
618 : unroll_factor);
619 58 : initialize_original_copy_tables ();
620 58 : tree_unroll_loop (outer, unroll_factor, &desc);
621 58 : free_original_copy_tables ();
622 58 : fuse_loops (outer->inner);
623 58 : todo |= TODO_cleanup_cfg;
624 :
625 58 : auto_bitmap exit_bbs;
626 58 : bitmap_set_bit (exit_bbs, single_exit (outer)->dest->index);
627 58 : todo |= do_rpo_vn (cfun, loop_preheader_edge (outer), exit_bbs);
628 58 : }
629 :
630 1293 : loop_nest.release ();
631 1293 : free_dependence_relations (dependences);
632 1293 : free_data_refs (datarefs);
633 2058 : }
634 :
635 28047 : if (todo)
636 : {
637 52 : free_dominance_info (CDI_DOMINATORS);
638 : /* We need to cleanup the CFG first since otherwise SSA form can
639 : be not up-to-date from the VN run. */
640 52 : if (todo & TODO_cleanup_cfg)
641 : {
642 52 : cleanup_tree_cfg ();
643 52 : todo &= ~TODO_cleanup_cfg;
644 52 : todo |= loop_invariant_motion_in_fun (cfun, false);
645 : }
646 52 : rewrite_into_loop_closed_ssa (NULL, 0);
647 52 : scev_reset ();
648 : }
649 28047 : return todo;
650 : }
651 :
652 : /* Pass boilerplate */
653 :
654 : namespace {
655 :
656 : const pass_data pass_data_loop_jam =
657 : {
658 : GIMPLE_PASS, /* type */
659 : "unrolljam", /* name */
660 : OPTGROUP_LOOP, /* optinfo_flags */
661 : TV_LOOP_JAM, /* tv_id */
662 : PROP_cfg, /* properties_required */
663 : 0, /* properties_provided */
664 : 0, /* properties_destroyed */
665 : 0, /* todo_flags_start */
666 : 0, /* todo_flags_finish */
667 : };
668 :
669 : class pass_loop_jam : public gimple_opt_pass
670 : {
671 : public:
672 285722 : pass_loop_jam (gcc::context *ctxt)
673 571444 : : gimple_opt_pass (pass_data_loop_jam, ctxt)
674 : {}
675 :
676 : /* opt_pass methods: */
677 241458 : bool gate (function *) final override { return flag_unroll_jam != 0; }
678 : unsigned int execute (function *) final override;
679 :
680 : };
681 :
682 : unsigned int
683 28047 : pass_loop_jam::execute (function *fun)
684 : {
685 56094 : if (number_of_loops (fun) <= 1)
686 : return 0;
687 :
688 28047 : return tree_loop_unroll_and_jam ();
689 : }
690 :
691 : }
692 :
693 : gimple_opt_pass *
694 285722 : make_pass_loop_jam (gcc::context *ctxt)
695 : {
696 285722 : return new pass_loop_jam (ctxt);
697 : }
|