LCOV - code coverage report
Current view: top level - gcc - loop-doloop.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 0.0 % 347 0
Test Date: 2026-02-28 14:20:25 Functions: 0.0 % 8 0
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Perform doloop optimizations
       2              :    Copyright (C) 2004-2026 Free Software Foundation, Inc.
       3              :    Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
       4              : 
       5              : This file is part of GCC.
       6              : 
       7              : GCC is free software; you can redistribute it and/or modify it under
       8              : the terms of the GNU General Public License as published by the Free
       9              : Software Foundation; either version 3, or (at your option) any later
      10              : version.
      11              : 
      12              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15              : for more details.
      16              : 
      17              : You should have received a copy of the GNU General Public License
      18              : along with GCC; see the file COPYING3.  If not see
      19              : <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "target.h"
      26              : #include "rtl.h"
      27              : #include "tree.h"
      28              : #include "cfghooks.h"
      29              : #include "memmodel.h"
      30              : #include "emit-rtl.h"
      31              : #include "dojump.h"
      32              : #include "expr.h"
      33              : #include "cfgloop.h"
      34              : #include "cfgrtl.h"
      35              : #include "dumpfile.h"
      36              : #include "loop-unroll.h"
      37              : #include "regs.h"
      38              : #include "df.h"
      39              : #include "targhooks.h"
      40              : 
      41              : /* This module is used to modify loops with a determinable number of
      42              :    iterations to use special low-overhead looping instructions.
      43              : 
      44              :    It first validates whether the loop is well behaved and has a
      45              :    determinable number of iterations (either at compile or run-time).
      46              :    It then modifies the loop to use a low-overhead looping pattern as
      47              :    follows:
      48              : 
      49              :    1. A pseudo register is allocated as the loop iteration counter.
      50              : 
      51              :    2. The number of loop iterations is calculated and is stored
      52              :       in the loop counter.
      53              : 
      54              :    3. At the end of the loop, the jump insn is replaced by the
      55              :       doloop_end pattern.  The compare must remain because it might be
      56              :       used elsewhere.  If the loop-variable or condition register are
      57              :       used elsewhere, they will be eliminated by flow.
      58              : 
      59              :    4. An optional doloop_begin pattern is inserted at the top of the
      60              :       loop.
      61              : 
      62              :    TODO The optimization should only performed when either the biv used for exit
      63              :    condition is unused at all except for the exit test, or if we do not have to
      64              :    change its value, since otherwise we have to add a new induction variable,
      65              :    which usually will not pay up (unless the cost of the doloop pattern is
      66              :    somehow extremely lower than the cost of compare & jump, or unless the bct
      67              :    register cannot be used for anything else but doloop -- ??? detect these
      68              :    cases).  */
      69              : 
      70              : /* Return the loop termination condition for PATTERN or zero
      71              :    if it is not a decrement and branch jump insn.  */
      72              : 
      73              : rtx
      74            0 : doloop_condition_get (rtx_insn *doloop_pat)
      75              : {
      76            0 :   rtx cmp;
      77            0 :   rtx inc;
      78            0 :   rtx reg;
      79            0 :   rtx inc_src;
      80            0 :   rtx condition;
      81            0 :   rtx pattern;
      82            0 :   rtx cc_reg = NULL_RTX;
      83            0 :   rtx reg_orig = NULL_RTX;
      84              : 
      85              :   /* The canonical doloop pattern we expect has one of the following
      86              :      forms:
      87              : 
      88              :      1)  (parallel [(set (pc) (if_then_else (condition)
      89              :                                             (label_ref (label))
      90              :                                             (pc)))
      91              :                      (set (reg) (plus (reg) (const_int -1)))
      92              :                      (additional clobbers and uses)])
      93              : 
      94              :      The branch must be the first entry of the parallel (also required
      95              :      by jump.cc), and the second entry of the parallel must be a set of
      96              :      the loop counter register.  Some targets (IA-64) wrap the set of
      97              :      the loop counter in an if_then_else too.
      98              : 
      99              :      2)  (set (reg) (plus (reg) (const_int -1))
     100              :          (set (pc) (if_then_else (reg != 0)
     101              :                                  (label_ref (label))
     102              :                                  (pc))).
     103              : 
     104              :      3) Some targets (Arm) do the comparison before the branch, as in the
     105              :      following form:
     106              : 
     107              :      (parallel [(set (cc) (compare (plus (reg) (const_int -1)) 0))
     108              :                 (set (reg) (plus (reg) (const_int -1)))])
     109              :      (set (pc) (if_then_else (cc == NE)
     110              :                              (label_ref (label))
     111              :                              (pc)))
     112              : 
     113              :       4) This form supports a construct that is used to represent a vectorized
     114              :       do loop with predication, however we do not need to care about the
     115              :       details of the predication here.
     116              :       Arm uses this construct to support MVE tail predication.
     117              : 
     118              :       (parallel
     119              :        [(set (pc)
     120              :              (if_then_else (gtu (plus (reg) (const_int -n))
     121              :                                 (const_int n-1))
     122              :                            (label_ref)
     123              :                            (pc)))
     124              :         (set (reg) (plus (reg) (const_int -n)))
     125              :         (additional clobbers and uses)])
     126              :      */
     127            0 :   pattern = PATTERN (doloop_pat);
     128              : 
     129            0 :   if (GET_CODE (pattern) != PARALLEL)
     130              :     {
     131            0 :       rtx cond;
     132            0 :       rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat);
     133            0 :       rtx cmp_arg1, cmp_arg2;
     134            0 :       rtx cmp_orig;
     135              : 
     136              :       /* In case the pattern is not PARALLEL we expect two forms
     137              :          of doloop which are cases 2) and 3) above: in case 2) the
     138              :          decrement immediately precedes the branch, while in case 3)
     139              :          the compare and decrement instructions immediately precede
     140              :          the branch.  */
     141              : 
     142            0 :       if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
     143              :         return 0;
     144              : 
     145            0 :       cmp = pattern;
     146            0 :       if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
     147              :         {
     148              :           /* The third case: the compare and decrement instructions
     149              :              immediately precede the branch.  */
     150            0 :           cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
     151            0 :           if (GET_CODE (cmp_orig) != SET)
     152              :             return 0;
     153            0 :           if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
     154              :             return 0;
     155            0 :           cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
     156            0 :           cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
     157            0 :           if (cmp_arg2 != const0_rtx
     158            0 :               || GET_CODE (cmp_arg1) != PLUS)
     159              :             return 0;
     160            0 :           reg_orig = XEXP (cmp_arg1, 0);
     161            0 :           if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
     162            0 :               || !REG_P (reg_orig))
     163              :             return 0;
     164            0 :           cc_reg = SET_DEST (cmp_orig);
     165              : 
     166            0 :           inc = XVECEXP (PATTERN (prev_insn), 0, 1);
     167              :         }
     168              :       else
     169              :         inc = PATTERN (prev_insn);
     170            0 :       if (GET_CODE (cmp) == SET && GET_CODE (SET_SRC (cmp)) == IF_THEN_ELSE)
     171              :         {
     172              :           /* We expect the condition to be of the form (reg != 0)  */
     173            0 :           cond = XEXP (SET_SRC (cmp), 0);
     174            0 :           if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
     175              :             return 0;
     176              :         }
     177              :     }
     178              :   else
     179              :     {
     180            0 :       cmp = XVECEXP (pattern, 0, 0);
     181            0 :       inc = XVECEXP (pattern, 0, 1);
     182              :     }
     183              : 
     184              :   /* Check for (set (reg) (something)).  */
     185            0 :   if (GET_CODE (inc) != SET)
     186              :     return 0;
     187            0 :   reg = SET_DEST (inc);
     188            0 :   if (! REG_P (reg))
     189              :     return 0;
     190              : 
     191              :   /* Check if something = (plus (reg) (const_int -n)).
     192              :      On IA-64, this decrement is wrapped in an if_then_else.  */
     193            0 :   inc_src = SET_SRC (inc);
     194            0 :   if (GET_CODE (inc_src) == IF_THEN_ELSE)
     195            0 :     inc_src = XEXP (inc_src, 1);
     196            0 :   if (GET_CODE (inc_src) != PLUS
     197            0 :       || !rtx_equal_p (XEXP (inc_src, 0), reg)
     198            0 :       || !CONST_INT_P (XEXP (inc_src, 1))
     199            0 :       || INTVAL (XEXP (inc_src, 1)) >= 0)
     200            0 :     return 0;
     201            0 :   int dec_num = -INTVAL (XEXP (inc_src, 1));
     202              : 
     203              :   /* Check for (set (pc) (if_then_else (condition)
     204              :                                        (label_ref (label))
     205              :                                        (pc))).  */
     206            0 :   if (GET_CODE (cmp) != SET
     207            0 :       || SET_DEST (cmp) != pc_rtx
     208            0 :       || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
     209            0 :       || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
     210            0 :       || XEXP (SET_SRC (cmp), 2) != pc_rtx)
     211              :     return 0;
     212              : 
     213              :   /* Extract loop termination condition.  */
     214            0 :   condition = XEXP (SET_SRC (cmp), 0);
     215              : 
     216              :   /* We expect a GE or NE comparison with 0 or 1, or a GTU comparison with
     217              :      dec_num - 1.  */
     218            0 :   if (!((GET_CODE (condition) == GE
     219            0 :          || GET_CODE (condition) == NE)
     220            0 :         && (XEXP (condition, 1) == const0_rtx
     221            0 :             || XEXP (condition, 1) == const1_rtx ))
     222            0 :       &&!(GET_CODE (condition) == GTU
     223            0 :           && ((INTVAL (XEXP (condition, 1))) == (dec_num - 1))))
     224              :     return 0;
     225              : 
     226            0 :   if (rtx_equal_p (XEXP (condition, 0), reg)
     227              :       /* For the third case:  */
     228            0 :       || ((cc_reg != NULL_RTX)
     229            0 :           && (XEXP (condition, 0) == cc_reg)
     230            0 :           && (rtx_equal_p (reg_orig, reg)))
     231            0 :       || (GET_CODE (XEXP (condition, 0)) == PLUS
     232            0 :           && rtx_equal_p (XEXP (XEXP (condition, 0), 0), reg, NULL)))
     233              :     {
     234            0 :       if (GET_CODE (pattern) != PARALLEL)
     235              :       /* For the second form we expect:
     236              : 
     237              :          (set (reg) (plus (reg) (const_int -1))
     238              :          (set (pc) (if_then_else (reg != 0)
     239              :                                  (label_ref (label))
     240              :                                  (pc))).
     241              : 
     242              :          is equivalent to the following:
     243              : 
     244              :          (parallel [(set (pc) (if_then_else (reg != 1)
     245              :                                             (label_ref (label))
     246              :                                             (pc)))
     247              :                      (set (reg) (plus (reg) (const_int -1)))
     248              :                      (additional clobbers and uses)])
     249              : 
     250              :         For the third form we expect:
     251              : 
     252              :         (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
     253              :                    (set (reg) (plus (reg) (const_int -1)))])
     254              :         (set (pc) (if_then_else (cc == NE)
     255              :                                 (label_ref (label))
     256              :                                 (pc)))
     257              : 
     258              :         which is equivalent to the following:
     259              : 
     260              :         (parallel [(set (cc) (compare (reg,  1))
     261              :                    (set (reg) (plus (reg) (const_int -1)))
     262              :                    (set (pc) (if_then_else (NE == cc)
     263              :                                            (label_ref (label))
     264              :                                            (pc))))])
     265              : 
     266              :         So we return the second form instead for the two cases.
     267              : 
     268              :      */
     269            0 :         condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
     270              : 
     271            0 :     return condition;
     272              :     }
     273              : 
     274              :   /* ??? If a machine uses a funny comparison, we could return a
     275              :      canonicalized form here.  */
     276              : 
     277              :   return 0;
     278              : }
     279              : 
     280              : /* Return nonzero if the loop specified by LOOP is suitable for
     281              :    the use of special low-overhead looping instructions.  DESC
     282              :    describes the number of iterations of the loop.  */
     283              : 
     284              : static bool
     285            0 : doloop_valid_p (class loop *loop, class niter_desc *desc)
     286              : {
     287            0 :   basic_block *body = get_loop_body (loop), bb;
     288            0 :   rtx_insn *insn;
     289            0 :   unsigned i;
     290            0 :   bool result = true;
     291              : 
     292              :   /* Check for loops that may not terminate under special conditions.  */
     293            0 :   if (!desc->simple_p
     294            0 :       || desc->assumptions
     295            0 :       || desc->infinite)
     296              :     {
     297              :       /* There are some cases that would require a special attention.
     298              :          For example if the comparison is LEU and the comparison value
     299              :          is UINT_MAX then the loop will not terminate.  Similarly, if the
     300              :          comparison code is GEU and the comparison value is 0, the
     301              :          loop will not terminate.
     302              : 
     303              :          If the absolute increment is not 1, the loop can be infinite
     304              :          even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
     305              : 
     306              :          ??? We could compute these conditions at run-time and have a
     307              :          additional jump around the loop to ensure an infinite loop.
     308              :          However, it is very unlikely that this is the intended
     309              :          behavior of the loop and checking for these rare boundary
     310              :          conditions would pessimize all other code.
     311              : 
     312              :          If the loop is executed only a few times an extra check to
     313              :          restart the loop could use up most of the benefits of using a
     314              :          count register loop.  Note however, that normally, this
     315              :          restart branch would never execute, so it could be predicted
     316              :          well by the CPU.  We should generate the pessimistic code by
     317              :          default, and have an option, e.g. -funsafe-loops that would
     318              :          enable count-register loops in this case.  */
     319            0 :       if (dump_file)
     320            0 :         fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
     321            0 :       result = false;
     322            0 :       goto cleanup;
     323              :     }
     324              : 
     325            0 :   for (i = 0; i < loop->num_nodes; i++)
     326              :     {
     327            0 :       bb = body[i];
     328              : 
     329            0 :       for (insn = BB_HEAD (bb);
     330            0 :            insn != NEXT_INSN (BB_END (bb));
     331            0 :            insn = NEXT_INSN (insn))
     332              :         {
     333              :           /* Different targets have different necessities for low-overhead
     334              :              looping.  Call the back end for each instruction within the loop
     335              :              to let it decide whether the insn prohibits a low-overhead loop.
     336              :              It will then return the cause for it to emit to the dump file.  */
     337            0 :           const char * invalid = targetm.invalid_within_doloop (insn);
     338            0 :           if (invalid)
     339              :             {
     340            0 :               if (dump_file)
     341            0 :                 fprintf (dump_file, "Doloop: %s\n", invalid);
     342            0 :               result = false;
     343            0 :               goto cleanup;
     344              :             }
     345              :         }
     346              :     }
     347              :   result = true;
     348              : 
     349            0 : cleanup:
     350            0 :   free (body);
     351              : 
     352            0 :   return result;
     353              : }
     354              : 
     355              : /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
     356              :    edge.  If the condition is always false, do not do anything.  If it is always
     357              :    true, redirect E to DEST and return false.  In all other cases, true is
     358              :    returned.  */
     359              : 
     360              : static bool
     361            0 : add_test (rtx cond, edge *e, basic_block dest)
     362              : {
     363            0 :   rtx_insn *seq, *jump;
     364            0 :   rtx_code_label *label;
     365            0 :   machine_mode mode;
     366            0 :   rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
     367            0 :   enum rtx_code code = GET_CODE (cond);
     368            0 :   basic_block bb;
     369              :   /* The jump is supposed to handle an unlikely special case.  */
     370            0 :   profile_probability prob = profile_probability::guessed_never ();
     371              : 
     372            0 :   mode = GET_MODE (XEXP (cond, 0));
     373            0 :   if (mode == VOIDmode)
     374            0 :     mode = GET_MODE (XEXP (cond, 1));
     375              : 
     376            0 :   start_sequence ();
     377            0 :   op0 = force_operand (op0, NULL_RTX);
     378            0 :   op1 = force_operand (op1, NULL_RTX);
     379            0 :   label = block_label (dest);
     380            0 :   do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label,
     381              :                            prob);
     382              : 
     383            0 :   jump = get_last_insn ();
     384            0 :   if (!jump || !JUMP_P (jump))
     385              :     {
     386              :       /* The condition is always false and the jump was optimized out.  */
     387            0 :       end_sequence ();
     388            0 :       return true;
     389              :     }
     390              : 
     391            0 :   seq = get_insns ();
     392            0 :   unshare_all_rtl_in_chain (seq);
     393            0 :   end_sequence ();
     394              : 
     395              :   /* There always is at least the jump insn in the sequence.  */
     396            0 :   gcc_assert (seq != NULL_RTX);
     397              : 
     398            0 :   bb = split_edge_and_insert (*e, seq);
     399            0 :   *e = single_succ_edge (bb);
     400              : 
     401            0 :   if (any_uncondjump_p (jump) && onlyjump_p (jump))
     402              :     {
     403              :       /* The condition is always true.  */
     404            0 :       delete_insn (jump);
     405            0 :       redirect_edge_and_branch_force (*e, dest);
     406            0 :       return false;
     407              :     }
     408              : 
     409            0 :   JUMP_LABEL (jump) = label;
     410              : 
     411            0 :   LABEL_NUSES (label)++;
     412              : 
     413            0 :   edge e2 = make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
     414            0 :   e2->probability = prob;
     415            0 :   (*e)->probability = prob.invert ();
     416            0 :   update_br_prob_note (e2->src);
     417            0 :   return true;
     418              : }
     419              : 
     420              : /* Fold (add -1; zero_ext; add +1) operations to zero_ext if not wrapping. i.e:
     421              : 
     422              :    73: r145:SI=r123:DI#0-0x1
     423              :    74: r144:DI=zero_extend (r145:SI)
     424              :    75: r143:DI=r144:DI+0x1
     425              :    ...
     426              :    31: r135:CC=cmp (r123:DI,0)
     427              :    72: {pc={(r143:DI!=0x1)?L70:pc};r143:DI=r143:DI-0x1;...}
     428              : 
     429              :    r123:DI#0-0x1 is param count derived from loop->niter_expr equal to number of
     430              :    loop iterations, if loop iterations expression doesn't overflow, then
     431              :    (zero_extend (r123:DI#0-1))+1 can be simplified to zero_extend.  */
     432              : 
     433              : static rtx
     434            0 : doloop_simplify_count (class loop *loop, scalar_int_mode mode, rtx count)
     435              : {
     436            0 :   widest_int iterations;
     437            0 :   if (GET_CODE (count) == ZERO_EXTEND)
     438              :     {
     439            0 :       rtx extop0 = XEXP (count, 0);
     440            0 :       if (GET_CODE (extop0) == PLUS)
     441              :         {
     442            0 :           rtx addop0 = XEXP (extop0, 0);
     443            0 :           rtx addop1 = XEXP (extop0, 1);
     444              : 
     445            0 :           if (get_max_loop_iterations (loop, &iterations)
     446            0 :               && wi::ltu_p (iterations, GET_MODE_MASK (GET_MODE (addop0)))
     447            0 :               && addop1 == constm1_rtx)
     448            0 :             return simplify_gen_unary (ZERO_EXTEND, mode, addop0,
     449            0 :                                        GET_MODE (addop0));
     450              :         }
     451              :     }
     452              : 
     453            0 :   return simplify_gen_binary (PLUS, mode, count, const1_rtx);
     454            0 : }
     455              : 
     456              : /* Modify the loop to use the low-overhead looping insn where LOOP
     457              :    describes the loop, DESC describes the number of iterations of the
     458              :    loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
     459              :    end of the loop.  CONDITION is the condition separated from the
     460              :    DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
     461              : 
     462              : static void
     463            0 : doloop_modify (class loop *loop, class niter_desc *desc,
     464              :                rtx_insn *doloop_seq, rtx condition, rtx count)
     465              : {
     466            0 :   rtx counter_reg;
     467            0 :   rtx tmp, noloop = NULL_RTX;
     468            0 :   rtx_insn *sequence;
     469            0 :   rtx_insn *jump_insn;
     470            0 :   rtx_code_label *jump_label;
     471            0 :   int nonneg = 0;
     472            0 :   bool increment_count;
     473            0 :   basic_block loop_end = desc->out_edge->src;
     474            0 :   scalar_int_mode mode;
     475            0 :   widest_int iterations;
     476              : 
     477            0 :   jump_insn = BB_END (loop_end);
     478              : 
     479            0 :   if (dump_file)
     480              :     {
     481            0 :       fprintf (dump_file, "Doloop: Inserting doloop pattern (");
     482            0 :       if (desc->const_iter)
     483            0 :         fprintf (dump_file, "%" PRId64, desc->niter);
     484              :       else
     485            0 :         fputs ("runtime", dump_file);
     486            0 :       fputs (" iterations).\n", dump_file);
     487              :     }
     488              : 
     489              :   /* Discard original jump to continue loop.  The original compare
     490              :      result may still be live, so it cannot be discarded explicitly.  */
     491            0 :   delete_insn (jump_insn);
     492              : 
     493            0 :   counter_reg = XEXP (condition, 0);
     494            0 :   if (GET_CODE (counter_reg) == PLUS)
     495            0 :     counter_reg = XEXP (counter_reg, 0);
     496              :   /* These patterns must operate on integer counters.  */
     497            0 :   mode = as_a <scalar_int_mode> (GET_MODE (counter_reg));
     498              : 
     499            0 :   increment_count = false;
     500            0 :   switch (GET_CODE (condition))
     501              :     {
     502            0 :     case NE:
     503              :       /* Currently only NE tests against zero and one are supported.  */
     504            0 :       noloop = XEXP (condition, 1);
     505            0 :       if (noloop != const0_rtx)
     506              :         {
     507            0 :           gcc_assert (noloop == const1_rtx);
     508              :           increment_count = true;
     509              :         }
     510              :       break;
     511              : 
     512            0 :     case GE:
     513              :       /* Currently only GE tests against zero are supported.  */
     514            0 :       gcc_assert (XEXP (condition, 1) == const0_rtx);
     515              : 
     516            0 :       noloop = constm1_rtx;
     517              : 
     518              :       /* The iteration count does not need incrementing for a GE test.  */
     519            0 :       increment_count = false;
     520              : 
     521              :       /* Determine if the iteration counter will be non-negative.
     522              :          Note that the maximum value loaded is iterations_max - 1.  */
     523            0 :       if (get_max_loop_iterations (loop, &iterations)
     524            0 :           && wi::leu_p (iterations,
     525              :                         wi::set_bit_in_zero <widest_int>
     526            0 :                         (GET_MODE_PRECISION (mode) - 1)))
     527              :         nonneg = 1;
     528              :       break;
     529              : 
     530              :     case GTU:
     531              :       /* The iteration count does not need incrementing for a GTU test.  */
     532              :       increment_count = false;
     533              :       break;
     534              : 
     535              :       /* Abort if an invalid doloop pattern has been generated.  */
     536            0 :     default:
     537            0 :       gcc_unreachable ();
     538              :     }
     539              : 
     540            0 :   if (increment_count)
     541            0 :     count = doloop_simplify_count (loop, mode, count);
     542              : 
     543              :   /* Insert initialization of the count register into the loop header.  */
     544            0 :   start_sequence ();
     545              :   /* count has been already copied through copy_rtx.  */
     546            0 :   reset_used_flags (count);
     547            0 :   set_used_flags (condition);
     548            0 :   tmp = force_operand (count, counter_reg);
     549            0 :   convert_move (counter_reg, tmp, 1);
     550            0 :   sequence = get_insns ();
     551            0 :   unshare_all_rtl_in_chain (sequence);
     552            0 :   end_sequence ();
     553            0 :   emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
     554              : 
     555            0 :   if (desc->noloop_assumptions)
     556              :     {
     557              :       /* The GTU case has only been implemented for Arm, where
     558              :          noloop_assumptions gets explicitly set to NULL for that case, so
     559              :          assert here for safety.  */
     560            0 :       gcc_assert (GET_CODE (condition) != GTU);
     561            0 :       rtx ass = copy_rtx (desc->noloop_assumptions);
     562            0 :       basic_block preheader = loop_preheader_edge (loop)->src;
     563            0 :       basic_block set_zero = split_edge (loop_preheader_edge (loop));
     564            0 :       basic_block new_preheader = split_edge (loop_preheader_edge (loop));
     565            0 :       edge te;
     566              : 
     567              :       /* Expand the condition testing the assumptions and if it does not pass,
     568              :          reset the count register to 0.  */
     569            0 :       redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
     570            0 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
     571              : 
     572            0 :       set_zero->count = profile_count::uninitialized ();
     573              : 
     574            0 :       te = single_succ_edge (preheader);
     575            0 :       for (; ass; ass = XEXP (ass, 1))
     576            0 :         if (!add_test (XEXP (ass, 0), &te, set_zero))
     577              :           break;
     578              : 
     579            0 :       if (ass)
     580              :         {
     581              :           /* We reached a condition that is always true.  This is very hard to
     582              :              reproduce (such a loop does not roll, and thus it would most
     583              :              likely get optimized out by some of the preceding optimizations).
     584              :              In fact, I do not have any testcase for it.  However, it would
     585              :              also be very hard to show that it is impossible, so we must
     586              :              handle this case.  */
     587            0 :           set_zero->count = preheader->count;
     588              :         }
     589              : 
     590            0 :       if (EDGE_COUNT (set_zero->preds) == 0)
     591              :         {
     592              :           /* All the conditions were simplified to false, remove the
     593              :              unreachable set_zero block.  */
     594            0 :           delete_basic_block (set_zero);
     595              :         }
     596              :       else
     597              :         {
     598              :           /* Reset the counter to zero in the set_zero block.  */
     599            0 :           start_sequence ();
     600            0 :           convert_move (counter_reg, noloop, 0);
     601            0 :           sequence = end_sequence ();
     602            0 :           emit_insn_after (sequence, BB_END (set_zero));
     603              : 
     604            0 :           set_immediate_dominator (CDI_DOMINATORS, set_zero,
     605              :                                    recompute_dominator (CDI_DOMINATORS,
     606              :                                                         set_zero));
     607              :         }
     608              : 
     609            0 :       set_immediate_dominator (CDI_DOMINATORS, new_preheader,
     610              :                                recompute_dominator (CDI_DOMINATORS,
     611              :                                                     new_preheader));
     612              :     }
     613              : 
     614              :   /* Some targets (eg, C4x) need to initialize special looping
     615              :      registers.  */
     616            0 :   if (targetm.have_doloop_begin ())
     617            0 :     if (rtx_insn *seq = targetm.gen_doloop_begin (counter_reg, doloop_seq))
     618            0 :       emit_insn_after (seq, BB_END (loop_preheader_edge (loop)->src));
     619              : 
     620              :   /* Insert the new low-overhead looping insn.  */
     621            0 :   emit_jump_insn_after (doloop_seq, BB_END (loop_end));
     622            0 :   jump_insn = BB_END (loop_end);
     623            0 :   jump_label = block_label (desc->in_edge->dest);
     624            0 :   JUMP_LABEL (jump_insn) = jump_label;
     625            0 :   LABEL_NUSES (jump_label)++;
     626              : 
     627              :   /* Ensure the right fallthru edge is marked, for case we have reversed
     628              :      the condition.  */
     629            0 :   desc->in_edge->flags &= ~EDGE_FALLTHRU;
     630            0 :   desc->out_edge->flags |= EDGE_FALLTHRU;
     631              : 
     632              :   /* Add a REG_NONNEG note if the actual or estimated maximum number
     633              :      of iterations is non-negative.  */
     634            0 :   if (nonneg)
     635            0 :     add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
     636              : 
     637              :   /* Update the REG_BR_PROB note.  */
     638            0 :   if (desc->in_edge->probability.initialized_p ())
     639            0 :     add_reg_br_prob_note (jump_insn, desc->in_edge->probability);
     640            0 : }
     641              : 
     642              : /* Called through note_stores.  */
     643              : 
     644              : static void
     645            0 : record_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
     646              : {
     647            0 :   bitmap mod = (bitmap)data;
     648            0 :   if (REG_P (x))
     649              :     {
     650            0 :       unsigned int regno = REGNO (x);
     651            0 :       if (HARD_REGISTER_P (x))
     652              :         {
     653            0 :           unsigned int end_regno = end_hard_regno (GET_MODE (x), regno);
     654            0 :           do
     655            0 :             bitmap_set_bit (mod, regno);
     656            0 :           while (++regno < end_regno);
     657              :         }
     658              :       else
     659            0 :         bitmap_set_bit (mod, regno);
     660              :     }
     661            0 : }
     662              : 
     663              : /* Process loop described by LOOP validating that the loop is suitable for
     664              :    conversion to use a low overhead looping instruction, replacing the jump
     665              :    insn where suitable.  Returns true if the loop was successfully
     666              :    modified.  */
     667              : 
     668              : static bool
     669            0 : doloop_optimize (class loop *loop)
     670              : {
     671            0 :   scalar_int_mode mode;
     672            0 :   rtx doloop_reg;
     673            0 :   rtx count = NULL_RTX;
     674            0 :   widest_int iterations, iterations_max;
     675            0 :   rtx_code_label *start_label;
     676            0 :   rtx condition;
     677            0 :   unsigned level;
     678            0 :   HOST_WIDE_INT est_niter;
     679            0 :   int max_cost;
     680            0 :   class niter_desc *desc;
     681            0 :   unsigned word_mode_size;
     682            0 :   unsigned HOST_WIDE_INT word_mode_max;
     683            0 :   int entered_at_top;
     684              : 
     685            0 :   if (dump_file)
     686            0 :     fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
     687              : 
     688            0 :   iv_analysis_loop_init (loop);
     689              : 
     690              :   /* Find the simple exit of a LOOP.  */
     691            0 :   desc = get_simple_loop_desc (loop);
     692              : 
     693              :   /* Check that loop is a candidate for a low-overhead looping insn.  */
     694            0 :   if (!doloop_valid_p (loop, desc))
     695              :     {
     696            0 :       if (dump_file)
     697            0 :         fprintf (dump_file,
     698              :                  "Doloop: The loop is not suitable.\n");
     699            0 :       return false;
     700              :     }
     701            0 :   mode = desc->mode;
     702              : 
     703            0 :   est_niter = get_estimated_loop_iterations_int (loop);
     704            0 :   if (est_niter == -1)
     705            0 :     est_niter = get_likely_max_loop_iterations_int (loop);
     706              : 
     707            0 :   if (est_niter >= 0 && est_niter < 3)
     708              :     {
     709            0 :       if (dump_file)
     710            0 :         fprintf (dump_file,
     711              :                  "Doloop: Too few iterations (%u) to be profitable.\n",
     712              :                  (unsigned int)est_niter);
     713            0 :       return false;
     714              :     }
     715              : 
     716            0 :   if (desc->const_iter)
     717            0 :     iterations = widest_int::from (rtx_mode_t (desc->niter_expr, mode),
     718            0 :                                    UNSIGNED);
     719              :   else
     720            0 :     iterations = 0;
     721            0 :   if (!get_max_loop_iterations (loop, &iterations_max))
     722            0 :     iterations_max = 0;
     723            0 :   level = get_loop_level (loop) + 1;
     724            0 :   entered_at_top = (loop->latch == desc->in_edge->dest
     725            0 :                     && contains_no_active_insn_p (loop->latch));
     726            0 :   if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
     727              :                                  entered_at_top))
     728              :     {
     729            0 :       if (dump_file)
     730            0 :         fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n");
     731            0 :       return false;
     732              :     }
     733              : 
     734              :   /* Generate looping insn.  If the pattern FAILs then give up trying
     735              :      to modify the loop since there is some aspect the back-end does
     736              :      not like.  If this succeeds, there is a chance that the loop
     737              :      desc->niter_expr has been altered by the backend, so only extract
     738              :      that data after the gen_doloop_end.  */
     739            0 :   start_label = block_label (desc->in_edge->dest);
     740            0 :   doloop_reg = gen_reg_rtx (mode);
     741            0 :   rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
     742              : 
     743            0 :   max_cost
     744            0 :     = COSTS_N_INSNS (param_max_iterations_computation_cost);
     745            0 :   if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop))
     746              :       > max_cost)
     747              :     {
     748            0 :       if (dump_file)
     749            0 :         fprintf (dump_file,
     750              :                  "Doloop: number of iterations too costly to compute.\n");
     751            0 :       return false;
     752              :     }
     753              : 
     754            0 :   count = copy_rtx (desc->niter_expr);
     755            0 :   word_mode_size = GET_MODE_PRECISION (word_mode);
     756            0 :   word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - 1) << 1) - 1;
     757            0 :   if (! doloop_seq
     758            0 :       && mode != word_mode
     759              :       /* Before trying mode different from the one in that # of iterations is
     760              :          computed, we must be sure that the number of iterations fits into
     761              :          the new mode.  */
     762            0 :       && (word_mode_size >= GET_MODE_PRECISION (mode)
     763            0 :           || wi::leu_p (iterations_max, word_mode_max)))
     764              :     {
     765            0 :       if (word_mode_size > GET_MODE_PRECISION (mode))
     766            0 :         count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode);
     767              :       else
     768            0 :         count = lowpart_subreg (word_mode, count, mode);
     769            0 :       PUT_MODE (doloop_reg, word_mode);
     770            0 :       doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
     771              :     }
     772            0 :   if (! doloop_seq)
     773              :     {
     774            0 :       if (dump_file)
     775            0 :         fprintf (dump_file,
     776              :                  "Doloop: Target unwilling to use doloop pattern!\n");
     777            0 :       return false;
     778              :     }
     779              : 
     780              :   /* If multiple instructions were created, the last must be the
     781              :      jump instruction.  */
     782              :   rtx_insn *doloop_insn = doloop_seq;
     783            0 :   while (NEXT_INSN (doloop_insn) != NULL_RTX)
     784              :     doloop_insn = NEXT_INSN (doloop_insn);
     785            0 :   if (!JUMP_P (doloop_insn)
     786            0 :       || !(condition = doloop_condition_get (doloop_insn)))
     787              :     {
     788            0 :       if (dump_file)
     789            0 :         fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
     790            0 :       return false;
     791              :     }
     792              : 
     793              :   /* Ensure that the new sequence doesn't clobber a register that
     794              :      is live at the end of the block.  */
     795            0 :   {
     796            0 :     bitmap modified = BITMAP_ALLOC (NULL);
     797              : 
     798            0 :     for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i))
     799            0 :       note_stores (i, record_reg_sets, modified);
     800              : 
     801            0 :     basic_block loop_end = desc->out_edge->src;
     802            0 :     bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified);
     803              :     /* iv_analysis_loop_init calls df_analyze_loop, which computes just
     804              :        partial df for blocks of the loop only.  The above will catch if
     805              :        any of the modified registers are use inside of the loop body, but
     806              :        it will most likely not have accurate info on registers used
     807              :        at the destination of the out_edge.  We call df_analyze on the
     808              :        whole function at the start of the pass though and iterate only
     809              :        on innermost loops or from innermost loops, so
     810              :        live in on desc->out_edge->dest should be still unmodified from
     811              :        the initial df_analyze.  */
     812            0 :     if (!fail)
     813            0 :       fail = bitmap_intersect_p (df_get_live_in (desc->out_edge->dest),
     814              :                                  modified);
     815            0 :     BITMAP_FREE (modified);
     816              : 
     817            0 :     if (fail)
     818              :       {
     819            0 :         if (dump_file)
     820            0 :           fprintf (dump_file, "Doloop: doloop pattern clobbers live out\n");
     821            0 :         return false;
     822              :       }
     823              :   }
     824              : 
     825            0 :   doloop_modify (loop, desc, doloop_seq, condition, count);
     826            0 :   return true;
     827            0 : }
     828              : 
     829              : /* This is the main entry point.  Process all loops using doloop_optimize.  */
     830              : 
     831              : void
     832            0 : doloop_optimize_loops (void)
     833              : {
     834            0 :   if (optimize == 1)
     835              :     {
     836            0 :       df_live_add_problem ();
     837            0 :       df_live_set_all_dirty ();
     838              :     }
     839              : 
     840            0 :   df_analyze ();
     841              : 
     842            0 :   for (auto loop : loops_list (cfun,
     843            0 :                                targetm.can_use_doloop_p
     844              :                                == can_use_doloop_if_innermost
     845            0 :                                ? LI_ONLY_INNERMOST : LI_FROM_INNERMOST))
     846            0 :     doloop_optimize (loop);
     847              : 
     848            0 :   if (optimize == 1)
     849            0 :     df_remove_problem (df_live);
     850              : 
     851            0 :   iv_analysis_done ();
     852              : 
     853            0 :   checking_verify_loop_structure ();
     854            0 : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.