Line data Source code
1 : /* Branch prediction routines for the GNU compiler.
2 : Copyright (C) 2000-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* References:
21 :
22 : [1] "Branch Prediction for Free"
23 : Ball and Larus; PLDI '93.
24 : [2] "Static Branch Frequency and Program Profile Analysis"
25 : Wu and Larus; MICRO-27.
26 : [3] "Corpus-based Static Branch Prediction"
27 : Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */
28 :
29 : #include "config.h"
30 : #include "system.h"
31 : #include "coretypes.h"
32 : #include "backend.h"
33 : #include "rtl.h"
34 : #include "tree.h"
35 : #include "gimple.h"
36 : #include "cfghooks.h"
37 : #include "tree-pass.h"
38 : #include "ssa.h"
39 : #include "memmodel.h"
40 : #include "emit-rtl.h"
41 : #include "cgraph.h"
42 : #include "coverage.h"
43 : #include "diagnostic-core.h"
44 : #include "gimple-predict.h"
45 : #include "fold-const.h"
46 : #include "calls.h"
47 : #include "cfganal.h"
48 : #include "profile.h"
49 : #include "sreal.h"
50 : #include "cfgloop.h"
51 : #include "gimple-iterator.h"
52 : #include "tree-cfg.h"
53 : #include "tree-ssa-loop-niter.h"
54 : #include "tree-ssa-loop.h"
55 : #include "tree-scalar-evolution.h"
56 : #include "ipa-utils.h"
57 : #include "gimple-pretty-print.h"
58 : #include "selftest.h"
59 : #include "cfgrtl.h"
60 : #include "stringpool.h"
61 : #include "attribs.h"
62 :
63 : /* Enum with reasons why a predictor is ignored. */
64 :
65 : enum predictor_reason
66 : {
67 : REASON_NONE,
68 : REASON_IGNORED,
69 : REASON_SINGLE_EDGE_DUPLICATE,
70 : REASON_EDGE_PAIR_DUPLICATE
71 : };
72 :
73 : /* String messages for the aforementioned enum. */
74 :
75 : static const char *reason_messages[] = {"", " (ignored)",
76 : " (single edge duplicate)", " (edge pair duplicate)"};
77 :
78 :
79 : static void combine_predictions_for_insn (rtx_insn *, basic_block);
80 : static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
81 : enum predictor_reason, edge);
82 : static void predict_paths_leading_to (basic_block, enum br_predictor,
83 : enum prediction,
84 : class loop *in_loop = NULL);
85 : static void predict_paths_leading_to_edge (edge, enum br_predictor,
86 : enum prediction,
87 : class loop *in_loop = NULL);
88 : static bool can_predict_insn_p (const rtx_insn *);
89 : static HOST_WIDE_INT get_predictor_value (br_predictor, HOST_WIDE_INT);
90 : static void determine_unlikely_bbs ();
91 : static void estimate_bb_frequencies ();
92 :
93 : /* Information we hold about each branch predictor.
94 : Filled using information from predict.def. */
95 :
96 : struct predictor_info
97 : {
98 : const char *const name; /* Name used in the debugging dumps. */
99 : const int hitrate; /* Expected hitrate used by
100 : predict_insn_def call. */
101 : const int flags;
102 : };
103 :
104 : /* Use given predictor without Dempster-Shaffer theory if it matches
105 : using first_match heuristics. */
106 : #define PRED_FLAG_FIRST_MATCH 1
107 :
108 : /* Recompute hitrate in percent to our representation. */
109 :
110 : #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
111 :
112 : #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
113 : static const struct predictor_info predictor_info[]= {
114 : #include "predict.def"
115 :
116 : /* Upper bound on predictors. */
117 : {NULL, 0, 0}
118 : };
119 : #undef DEF_PREDICTOR
120 :
121 : static gcov_type min_count = -1;
122 :
123 : /* Determine the threshold for hot BB counts. */
124 :
125 : gcov_type
126 57089 : get_hot_bb_threshold ()
127 : {
128 57089 : if (min_count == -1)
129 : {
130 100 : const int hot_frac = param_hot_bb_count_fraction;
131 200 : const gcov_type min_hot_count
132 : = hot_frac
133 100 : ? profile_info->sum_max / hot_frac
134 : : (gcov_type)profile_count::max_count;
135 100 : set_hot_bb_threshold (min_hot_count);
136 100 : if (dump_file)
137 21 : fprintf (dump_file, "Setting hotness threshold to %" PRId64 ".\n",
138 : min_hot_count);
139 : }
140 57089 : return min_count;
141 : }
142 :
143 : /* Set the threshold for hot BB counts. */
144 :
145 : void
146 360 : set_hot_bb_threshold (gcov_type min)
147 : {
148 360 : min_count = min;
149 360 : }
150 :
151 : /* Return TRUE if COUNT is considered to be hot in function FUN. */
152 :
153 : bool
154 1027890783 : maybe_hot_count_p (struct function *fun, profile_count count)
155 : {
156 1027890783 : if (!count.initialized_p ())
157 : return true;
158 937884313 : if (count.ipa () == profile_count::zero ())
159 4398309 : return false;
160 933486004 : if (!count.ipa_p ())
161 : {
162 933388678 : struct cgraph_node *node = cgraph_node::get (fun->decl);
163 933388678 : if (!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
164 : {
165 933388678 : if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
166 : return false;
167 933328672 : if (node->frequency == NODE_FREQUENCY_HOT)
168 : return true;
169 : }
170 933324580 : if (profile_status_for_fn (fun) == PROFILE_ABSENT)
171 : return true;
172 933121239 : if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
173 933121239 : && count < (ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (2, 3)))
174 21597050 : return false;
175 911524189 : if (count * param_hot_bb_frequency_fraction
176 911524189 : < ENTRY_BLOCK_PTR_FOR_FN (fun)->count)
177 : return false;
178 : return true;
179 : }
180 : /* Code executed at most once is not hot. */
181 97326 : if (count <= MAX (profile_info ? profile_info->runs : 1, 1))
182 : return false;
183 56874 : return (count >= get_hot_bb_threshold ());
184 : }
185 :
186 : /* Return true if basic block BB of function FUN can be CPU intensive
187 : and should thus be optimized for maximum performance. */
188 :
189 : bool
190 1017697212 : maybe_hot_bb_p (struct function *fun, const_basic_block bb)
191 : {
192 1017697212 : gcc_checking_assert (fun);
193 1017697212 : return maybe_hot_count_p (fun, bb->count);
194 : }
195 :
196 : /* Return true if edge E can be CPU intensive and should thus be optimized
197 : for maximum performance. */
198 :
199 : bool
200 9575842 : maybe_hot_edge_p (edge e)
201 : {
202 9575842 : return maybe_hot_count_p (cfun, e->count ());
203 : }
204 :
205 : /* Return true if COUNT is considered to be never executed in function FUN
206 : or if function FUN is considered so in the static profile. */
207 :
208 : static bool
209 56795371 : probably_never_executed (struct function *fun, profile_count count)
210 : {
211 56795371 : gcc_checking_assert (fun);
212 56795371 : if (count.ipa () == profile_count::zero ())
213 1511091 : return true;
214 : /* Do not trust adjusted counts. This will make us to drop int cold section
215 : code with low execution count as a result of inlining. These low counts
216 : are not safe even with read profile and may lead us to dropping
217 : code which actually gets executed into cold section of binary that is not
218 : desirable. */
219 55284280 : if (count.precise_p () && profile_status_for_fn (fun) == PROFILE_READ)
220 : {
221 3325 : const int unlikely_frac = param_unlikely_bb_count_fraction;
222 3325 : if (count * unlikely_frac >= profile_info->runs)
223 : return false;
224 : return true;
225 : }
226 3165 : if ((!profile_info || profile_status_for_fn (fun) != PROFILE_READ)
227 55281063 : && (cgraph_node::get (fun->decl)->frequency
228 55277898 : == NODE_FREQUENCY_UNLIKELY_EXECUTED))
229 : return true;
230 : return false;
231 : }
232 :
233 : /* Return true if basic block BB of function FUN is probably never executed. */
234 :
235 : bool
236 17483371 : probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
237 : {
238 17483371 : return probably_never_executed (fun, bb->count);
239 : }
240 :
241 : /* Return true if edge E is unlikely executed for obvious reasons. */
242 :
243 : static bool
244 121118374 : unlikely_executed_edge_p (edge e)
245 : {
246 122940846 : return (e->src->count == profile_count::zero ()
247 118965139 : || e->probability == profile_probability::never ())
248 114186198 : || (e->flags & EDGE_FAKE)
249 : /* If we read profile and know EH edge is executed, trust it.
250 : Otherwise we consider EH edges never executed. */
251 113629370 : || ((e->flags & EDGE_EH) && !e->probability.reliable_p ());
252 : }
253 :
254 : /* Return true if edge E of function FUN is probably never executed. */
255 :
256 : bool
257 41405406 : probably_never_executed_edge_p (struct function *fun, edge e)
258 : {
259 41405406 : if (unlikely_executed_edge_p (e))
260 : return true;
261 39312000 : return probably_never_executed (fun, e->count ());
262 : }
263 :
264 : /* Return true if function FUN should always be optimized for size. */
265 :
266 : optimize_size_level
267 2490928953 : optimize_function_for_size_p (struct function *fun)
268 : {
269 2490928953 : if (!fun || !fun->decl)
270 195808396 : return optimize_size ? OPTIMIZE_SIZE_MAX : OPTIMIZE_SIZE_NO;
271 2389596561 : cgraph_node *n = cgraph_node::get (fun->decl);
272 2389596561 : if (n)
273 2335219057 : return n->optimize_for_size_p ();
274 : return OPTIMIZE_SIZE_NO;
275 : }
276 :
277 : /* Return true if function FUN should always be optimized for speed. */
278 :
279 : bool
280 237910307 : optimize_function_for_speed_p (struct function *fun)
281 : {
282 237910307 : return !optimize_function_for_size_p (fun);
283 : }
284 :
285 : /* Return the optimization type that should be used for function FUN. */
286 :
287 : optimization_type
288 0 : function_optimization_type (struct function *fun)
289 : {
290 0 : return (optimize_function_for_speed_p (fun)
291 0 : ? OPTIMIZE_FOR_SPEED
292 0 : : OPTIMIZE_FOR_SIZE);
293 : }
294 :
295 : /* Return TRUE if basic block BB should be optimized for size. */
296 :
297 : optimize_size_level
298 1000487235 : optimize_bb_for_size_p (const_basic_block bb)
299 : {
300 1000487235 : enum optimize_size_level ret = optimize_function_for_size_p (cfun);
301 :
302 1988492069 : if (bb && ret < OPTIMIZE_SIZE_MAX && bb->count == profile_count::zero ())
303 26351934 : ret = OPTIMIZE_SIZE_MAX;
304 1000487235 : if (bb && ret < OPTIMIZE_SIZE_BALANCED && !maybe_hot_bb_p (cfun, bb))
305 : ret = OPTIMIZE_SIZE_BALANCED;
306 1000487235 : return ret;
307 : }
308 :
309 : /* Return TRUE if basic block BB should be optimized for speed. */
310 :
311 : bool
312 934729573 : optimize_bb_for_speed_p (const_basic_block bb)
313 : {
314 934729573 : return !optimize_bb_for_size_p (bb);
315 : }
316 :
317 : /* Return the optimization type that should be used for basic block BB. */
318 :
319 : optimization_type
320 5358767 : bb_optimization_type (const_basic_block bb)
321 : {
322 5358767 : return (optimize_bb_for_speed_p (bb)
323 5358767 : ? OPTIMIZE_FOR_SPEED
324 5358767 : : OPTIMIZE_FOR_SIZE);
325 : }
326 :
327 : /* Return TRUE if edge E should be optimized for size. */
328 :
329 : optimize_size_level
330 9991822 : optimize_edge_for_size_p (edge e)
331 : {
332 9991822 : enum optimize_size_level ret = optimize_function_for_size_p (cfun);
333 :
334 9991822 : if (ret < OPTIMIZE_SIZE_MAX && unlikely_executed_edge_p (e))
335 : ret = OPTIMIZE_SIZE_MAX;
336 9781783 : if (ret < OPTIMIZE_SIZE_BALANCED && !maybe_hot_edge_p (e))
337 : ret = OPTIMIZE_SIZE_BALANCED;
338 9991822 : return ret;
339 : }
340 :
341 : /* Return TRUE if edge E should be optimized for speed. */
342 :
343 : bool
344 4349904 : optimize_edge_for_speed_p (edge e)
345 : {
346 4349904 : return !optimize_edge_for_size_p (e);
347 : }
348 :
349 : /* Return TRUE if the current function is optimized for size. */
350 :
351 : optimize_size_level
352 220049677 : optimize_insn_for_size_p (void)
353 : {
354 220049677 : enum optimize_size_level ret = optimize_function_for_size_p (cfun);
355 220049677 : if (ret < OPTIMIZE_SIZE_BALANCED && !crtl->maybe_hot_insn_p)
356 220049677 : ret = OPTIMIZE_SIZE_BALANCED;
357 220049677 : return ret;
358 : }
359 :
360 : /* Return TRUE if the current function is optimized for speed. */
361 :
362 : bool
363 85763474 : optimize_insn_for_speed_p (void)
364 : {
365 85763474 : return !optimize_insn_for_size_p ();
366 : }
367 :
368 : /* Return the optimization type that should be used for the current
369 : instruction. */
370 :
371 : optimization_type
372 7252 : insn_optimization_type ()
373 : {
374 7252 : return (optimize_insn_for_speed_p ()
375 7252 : ? OPTIMIZE_FOR_SPEED
376 7252 : : OPTIMIZE_FOR_SIZE);
377 : }
378 :
379 : /* Return TRUE if LOOP should be optimized for size. */
380 :
381 : optimize_size_level
382 755612 : optimize_loop_for_size_p (class loop *loop)
383 : {
384 755612 : return optimize_bb_for_size_p (loop->header);
385 : }
386 :
387 : /* Return TRUE if LOOP should be optimized for speed. */
388 :
389 : bool
390 15409555 : optimize_loop_for_speed_p (class loop *loop)
391 : {
392 15409555 : return optimize_bb_for_speed_p (loop->header);
393 : }
394 :
395 : /* Return TRUE if nest rooted at LOOP should be optimized for speed. */
396 :
397 : bool
398 1219028 : optimize_loop_nest_for_speed_p (class loop *loop)
399 : {
400 1219028 : class loop *l = loop;
401 1219028 : if (optimize_loop_for_speed_p (loop))
402 : return true;
403 35119 : l = loop->inner;
404 47500 : while (l && l != loop)
405 : {
406 13807 : if (optimize_loop_for_speed_p (l))
407 : return true;
408 12381 : if (l->inner)
409 : l = l->inner;
410 8288 : else if (l->next)
411 : l = l->next;
412 : else
413 : {
414 19534 : while (l != loop && !l->next)
415 11647 : l = loop_outer (l);
416 7887 : if (l != loop)
417 52 : l = l->next;
418 : }
419 : }
420 : return false;
421 : }
422 :
423 : /* Return TRUE if nest rooted at LOOP should be optimized for size. */
424 :
425 : optimize_size_level
426 17131 : optimize_loop_nest_for_size_p (class loop *loop)
427 : {
428 17131 : enum optimize_size_level ret = optimize_loop_for_size_p (loop);
429 17131 : class loop *l = loop;
430 :
431 17131 : l = loop->inner;
432 17405 : while (l && l != loop)
433 : {
434 16657 : if (ret == OPTIMIZE_SIZE_NO)
435 : break;
436 274 : ret = MIN (optimize_loop_for_size_p (l), ret);
437 274 : if (l->inner)
438 : l = l->inner;
439 265 : else if (l->next)
440 : l = l->next;
441 : else
442 : {
443 325 : while (l != loop && !l->next)
444 165 : l = loop_outer (l);
445 160 : if (l != loop)
446 4 : l = l->next;
447 : }
448 : }
449 17131 : return ret;
450 : }
451 :
452 : /* Return true if edge E is likely to be well predictable by branch
453 : predictor. */
454 :
455 : bool
456 5778536 : predictable_edge_p (edge e)
457 : {
458 5778536 : if (!e->probability.initialized_p ())
459 : return false;
460 5777769 : if ((e->probability.to_reg_br_prob_base ()
461 5777769 : <= param_predictable_branch_outcome * REG_BR_PROB_BASE / 100)
462 5777769 : || (REG_BR_PROB_BASE - e->probability.to_reg_br_prob_base ()
463 : <= param_predictable_branch_outcome * REG_BR_PROB_BASE / 100))
464 624645 : return true;
465 : return false;
466 : }
467 :
468 :
469 : /* Set RTL expansion for BB profile. */
470 :
471 : void
472 79265538 : rtl_profile_for_bb (basic_block bb)
473 : {
474 79265538 : crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
475 79265538 : }
476 :
477 : /* Set RTL expansion for edge profile. */
478 :
479 : void
480 11606 : rtl_profile_for_edge (edge e)
481 : {
482 11606 : crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
483 11606 : }
484 :
485 : /* Set RTL expansion to default mode (i.e. when profile info is not known). */
486 : void
487 14183081 : default_rtl_profile (void)
488 : {
489 14183081 : crtl->maybe_hot_insn_p = true;
490 14183081 : }
491 :
492 : /* Return true if the one of outgoing edges is already predicted by
493 : PREDICTOR. */
494 :
495 : bool
496 0 : rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
497 : {
498 0 : rtx note;
499 0 : if (!INSN_P (BB_END (bb)))
500 : return false;
501 0 : for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
502 0 : if (REG_NOTE_KIND (note) == REG_BR_PRED
503 0 : && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
504 : return true;
505 : return false;
506 : }
507 :
508 : /* Structure representing predictions in tree level. */
509 :
510 : struct edge_prediction {
511 : struct edge_prediction *ep_next;
512 : edge ep_edge;
513 : enum br_predictor ep_predictor;
514 : int ep_probability;
515 : };
516 :
517 : /* This map contains for a basic block the list of predictions for the
518 : outgoing edges. */
519 :
520 : static hash_map<const_basic_block, edge_prediction *> *bb_predictions;
521 :
522 : /* Global cache for expr_expected_value. */
523 :
524 : struct expected_value
525 : {
526 : tree val;
527 : enum br_predictor predictor;
528 : HOST_WIDE_INT probability;
529 : };
530 : static hash_map<int_hash<unsigned, 0>, expected_value> *ssa_expected_value;
531 :
532 : /* Return true if the one of outgoing edges is already predicted by
533 : PREDICTOR. */
534 :
535 : bool
536 3180986 : gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
537 : {
538 3180986 : struct edge_prediction *i;
539 3180986 : edge_prediction **preds = bb_predictions->get (bb);
540 :
541 3180986 : if (!preds)
542 : return false;
543 :
544 1583643 : for (i = *preds; i; i = i->ep_next)
545 823710 : if (i->ep_predictor == predictor)
546 : return true;
547 : return false;
548 : }
549 :
550 : /* Return true if the one of outgoing edges is already predicted by
551 : PREDICTOR for edge E predicted as TAKEN. */
552 :
553 : bool
554 1095319 : edge_predicted_by_p (edge e, enum br_predictor predictor, bool taken)
555 : {
556 1095319 : struct edge_prediction *i;
557 1095319 : basic_block bb = e->src;
558 1095319 : edge_prediction **preds = bb_predictions->get (bb);
559 1095319 : if (!preds)
560 : return false;
561 :
562 136266 : int probability = predictor_info[(int) predictor].hitrate;
563 :
564 136266 : if (taken != TAKEN)
565 136096 : probability = REG_BR_PROB_BASE - probability;
566 :
567 4850127 : for (i = *preds; i; i = i->ep_next)
568 4718263 : if (i->ep_predictor == predictor
569 4581729 : && i->ep_edge == e
570 4402 : && i->ep_probability == probability)
571 : return true;
572 : return false;
573 : }
574 :
575 : /* Same predicate as above, working on edges. */
576 : bool
577 0 : edge_probability_reliable_p (const_edge e)
578 : {
579 0 : return e->probability.probably_reliable_p ();
580 : }
581 :
582 : /* Same predicate as edge_probability_reliable_p, working on notes. */
583 : bool
584 0 : br_prob_note_reliable_p (const_rtx note)
585 : {
586 0 : gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
587 0 : return profile_probability::from_reg_br_prob_note
588 0 : (XINT (note, 0)).probably_reliable_p ();
589 : }
590 :
591 : static void
592 87455 : predict_insn (rtx_insn *insn, enum br_predictor predictor, int probability)
593 : {
594 87455 : gcc_assert (any_condjump_p (insn));
595 87455 : if (!flag_guess_branch_prob)
596 : return;
597 :
598 174812 : add_reg_note (insn, REG_BR_PRED,
599 87406 : gen_rtx_CONCAT (VOIDmode,
600 : GEN_INT ((int) predictor),
601 : GEN_INT ((int) probability)));
602 : }
603 :
604 : /* Predict insn by given predictor. */
605 :
606 : void
607 87455 : predict_insn_def (rtx_insn *insn, enum br_predictor predictor,
608 : enum prediction taken)
609 : {
610 87455 : int probability = predictor_info[(int) predictor].hitrate;
611 87455 : gcc_assert (probability != PROB_UNINITIALIZED);
612 :
613 87455 : if (taken != TAKEN)
614 5841 : probability = REG_BR_PROB_BASE - probability;
615 :
616 87455 : predict_insn (insn, predictor, probability);
617 87455 : }
618 :
619 : /* Predict edge E with given probability if possible. */
620 :
621 : void
622 0 : rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
623 : {
624 0 : rtx_insn *last_insn;
625 0 : last_insn = BB_END (e->src);
626 :
627 : /* We can store the branch prediction information only about
628 : conditional jumps. */
629 0 : if (!any_condjump_p (last_insn))
630 : return;
631 :
632 : /* We always store probability of branching. */
633 0 : if (e->flags & EDGE_FALLTHRU)
634 0 : probability = REG_BR_PROB_BASE - probability;
635 :
636 0 : predict_insn (last_insn, predictor, probability);
637 : }
638 :
639 : /* Predict edge E with the given PROBABILITY. */
640 : void
641 6166881 : gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
642 : {
643 6166881 : if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
644 6166881 : && EDGE_COUNT (e->src->succs) > 1
645 6166881 : && flag_guess_branch_prob
646 12333762 : && optimize)
647 : {
648 6166881 : struct edge_prediction *i = XNEW (struct edge_prediction);
649 6166881 : edge_prediction *&preds = bb_predictions->get_or_insert (e->src);
650 :
651 6166881 : i->ep_next = preds;
652 6166881 : preds = i;
653 6166881 : i->ep_probability = probability;
654 6166881 : i->ep_predictor = predictor;
655 6166881 : i->ep_edge = e;
656 : }
657 6166881 : }
658 :
659 : /* Filter edge predictions PREDS by a function FILTER: if FILTER return false
660 : the prediction is removed.
661 : DATA are passed to the filter function. */
662 :
663 : static void
664 2844667 : filter_predictions (edge_prediction **preds,
665 : bool (*filter) (edge_prediction *, void *), void *data)
666 : {
667 2844667 : if (!bb_predictions)
668 : return;
669 :
670 2844667 : if (preds)
671 : {
672 : struct edge_prediction **prediction = preds;
673 : struct edge_prediction *next;
674 :
675 7871877 : while (*prediction)
676 : {
677 5027210 : if ((*filter) (*prediction, data))
678 4517741 : prediction = &((*prediction)->ep_next);
679 : else
680 : {
681 509469 : next = (*prediction)->ep_next;
682 509469 : free (*prediction);
683 509469 : *prediction = next;
684 : }
685 : }
686 : }
687 : }
688 :
689 : /* Filter function predicate that returns true for a edge predicate P
690 : if its edge is equal to DATA. */
691 :
692 : static bool
693 0 : not_equal_edge_p (edge_prediction *p, void *data)
694 : {
695 0 : return p->ep_edge != (edge)data;
696 : }
697 :
698 : /* Remove all predictions on given basic block that are attached
699 : to edge E. */
700 : void
701 84116832 : remove_predictions_associated_with_edge (edge e)
702 : {
703 84116832 : if (!bb_predictions)
704 : return;
705 :
706 0 : edge_prediction **preds = bb_predictions->get (e->src);
707 0 : filter_predictions (preds, not_equal_edge_p, e);
708 : }
709 :
710 : /* Clears the list of predictions stored for BB. */
711 :
712 : static void
713 11405265 : clear_bb_predictions (basic_block bb)
714 : {
715 11405265 : edge_prediction **preds = bb_predictions->get (bb);
716 11405265 : struct edge_prediction *pred, *next;
717 :
718 11405265 : if (!preds)
719 : return;
720 :
721 9231800 : for (pred = *preds; pred; pred = next)
722 : {
723 5657412 : next = pred->ep_next;
724 5657412 : free (pred);
725 : }
726 3574388 : *preds = NULL;
727 : }
728 :
729 : /* Return true when we can store prediction on insn INSN.
730 : At the moment we represent predictions only on conditional
731 : jumps, not at computed jump or other complicated cases. */
732 : static bool
733 1132392 : can_predict_insn_p (const rtx_insn *insn)
734 : {
735 1132392 : return (JUMP_P (insn)
736 336820 : && any_condjump_p (insn)
737 1468704 : && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
738 : }
739 :
740 : /* Predict edge E by given predictor if possible. */
741 :
742 : void
743 5073078 : predict_edge_def (edge e, enum br_predictor predictor,
744 : enum prediction taken)
745 : {
746 5073078 : int probability = predictor_info[(int) predictor].hitrate;
747 :
748 5073078 : if (taken != TAKEN)
749 4088046 : probability = REG_BR_PROB_BASE - probability;
750 :
751 5073078 : predict_edge (e, predictor, probability);
752 5073078 : }
753 :
754 : /* Invert all branch predictions or probability notes in the INSN. This needs
755 : to be done each time we invert the condition used by the jump. */
756 :
757 : void
758 12841169 : invert_br_probabilities (rtx insn)
759 : {
760 12841169 : rtx note;
761 :
762 37978443 : for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
763 25137274 : if (REG_NOTE_KIND (note) == REG_BR_PROB)
764 25522824 : XINT (note, 0) = profile_probability::from_reg_br_prob_note
765 12761412 : (XINT (note, 0)).invert ().to_reg_br_prob_note ();
766 12375862 : else if (REG_NOTE_KIND (note) == REG_BR_PRED)
767 0 : XEXP (XEXP (note, 0), 1)
768 0 : = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
769 12841169 : }
770 :
771 : /* Dump information about the branch prediction to the output file. */
772 :
773 : static void
774 12035514 : dump_prediction (FILE *file, enum br_predictor predictor, int probability,
775 : basic_block bb, enum predictor_reason reason = REASON_NONE,
776 : edge ep_edge = NULL)
777 : {
778 12035514 : edge e = ep_edge;
779 12035514 : edge_iterator ei;
780 :
781 12035514 : if (!file)
782 12033647 : return;
783 :
784 1867 : if (e == NULL)
785 1247 : FOR_EACH_EDGE (e, ei, bb->succs)
786 1247 : if (! (e->flags & EDGE_FALLTHRU))
787 : break;
788 :
789 1867 : char edge_info_str[128];
790 1867 : if (ep_edge)
791 620 : sprintf (edge_info_str, " of edge %d->%d", ep_edge->src->index,
792 620 : ep_edge->dest->index);
793 : else
794 1247 : edge_info_str[0] = '\0';
795 :
796 1867 : fprintf (file, " %s heuristics%s%s: %.2f%%",
797 1867 : predictor_info[predictor].name,
798 1867 : edge_info_str, reason_messages[reason],
799 1867 : probability * 100.0 / REG_BR_PROB_BASE);
800 :
801 1867 : if (bb->count.initialized_p ())
802 : {
803 264 : fprintf (file, " exec ");
804 264 : bb->count.dump (file);
805 264 : if (e && e->count ().initialized_p () && bb->count.to_gcov_type ())
806 : {
807 183 : fprintf (file, " hit ");
808 183 : e->count ().dump (file);
809 366 : fprintf (file, " (%.1f%%)", e->count ().to_gcov_type() * 100.0
810 183 : / bb->count.to_gcov_type ());
811 : }
812 : }
813 :
814 1867 : fprintf (file, "\n");
815 :
816 : /* Print output that be easily read by analyze_brprob.py script. We are
817 : interested only in counts that are read from GCDA files. */
818 1867 : if (dump_file && (dump_flags & TDF_DETAILS)
819 1116 : && bb->count.precise_p ()
820 1890 : && reason == REASON_NONE)
821 : {
822 15 : fprintf (file, ";;heuristics;%s;%" PRId64 ";%" PRId64 ";%.1f;\n",
823 : predictor_info[predictor].name,
824 30 : bb->count.to_gcov_type (), e->count ().to_gcov_type (),
825 : probability * 100.0 / REG_BR_PROB_BASE);
826 : }
827 : }
828 :
829 : /* Return true if STMT is known to be unlikely executed. */
830 :
831 : static bool
832 172708789 : unlikely_executed_stmt_p (gimple *stmt)
833 : {
834 172708789 : if (!is_gimple_call (stmt))
835 : return false;
836 :
837 : /* Those calls are inserted by optimizers when code is known to be
838 : unreachable or undefined. */
839 12283880 : if (gimple_call_builtin_p (stmt, BUILT_IN_UNREACHABLE)
840 12237247 : || gimple_call_builtin_p (stmt, BUILT_IN_UNREACHABLE_TRAP)
841 24521121 : || gimple_call_builtin_p (stmt, BUILT_IN_TRAP))
842 79887 : return true;
843 :
844 : /* Checks below do not need to be fully reliable. Cold attribute may be
845 : misplaced by user and in the presence of comdat we may result in call to
846 : function with 0 profile having non-zero profile.
847 :
848 : We later detect that profile is lost and will drop the profile of the
849 : comdat.
850 :
851 : So if we think profile count is reliable, do not try to apply these
852 : heuristics. */
853 12203993 : if (gimple_bb (stmt)->count.reliable_p ()
854 181269886 : && gimple_bb (stmt)->count.nonzero_p ())
855 : return false;
856 : /* NORETURN attribute alone is not strong enough: exit() may be quite
857 : likely executed once during program run. */
858 12203727 : if (gimple_call_fntype (stmt)
859 11700250 : && lookup_attribute ("cold",
860 11700250 : TYPE_ATTRIBUTES (gimple_call_fntype (stmt)))
861 12203727 : && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
862 : return true;
863 12203727 : tree decl = gimple_call_fndecl (stmt);
864 12203727 : if (!decl)
865 : return false;
866 11457732 : if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl))
867 11457732 : && !lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)))
868 : return true;
869 :
870 11155062 : cgraph_node *n = cgraph_node::get (decl);
871 11155062 : if (!n)
872 : return false;
873 :
874 11148866 : availability avail;
875 11148866 : n = n->ultimate_alias_target (&avail);
876 11148866 : if (avail < AVAIL_AVAILABLE)
877 : return false;
878 3278385 : if (!n->analyzed
879 3278348 : || n->decl == current_function_decl)
880 : return false;
881 3260339 : return n->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED;
882 : }
883 :
884 : /* Return true if BB is unlikely executed. */
885 :
886 : static bool
887 32504725 : unlikely_executed_bb_p (basic_block bb)
888 : {
889 32504725 : if (bb->count == profile_count::zero ())
890 0 : return true;
891 32504725 : if (bb == ENTRY_BLOCK_PTR_FOR_FN (cfun) || bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
892 : return false;
893 65009450 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
894 195520810 : !gsi_end_p (gsi); gsi_next (&gsi))
895 : {
896 172708789 : if (unlikely_executed_stmt_p (gsi_stmt (gsi)))
897 : return true;
898 172311526 : if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
899 : return false;
900 : }
901 : return false;
902 : }
903 :
904 : /* We cannot predict the probabilities of outgoing edges of bb. Set them
905 : evenly and hope for the best. If UNLIKELY_EDGES is not null, distribute
906 : even probability for all edges not mentioned in the set. These edges
907 : are given PROB_VERY_UNLIKELY probability. Similarly for LIKELY_EDGES,
908 : if we have exactly one likely edge, make the other edges predicted
909 : as not probable. */
910 :
911 : static void
912 8510985 : set_even_probabilities (basic_block bb,
913 : hash_set<edge> *unlikely_edges = NULL,
914 : hash_set<edge_prediction *> *likely_edges = NULL)
915 : {
916 8510985 : unsigned nedges = 0, unlikely_count = 0;
917 8510985 : edge e = NULL;
918 8510985 : edge_iterator ei;
919 8510985 : profile_probability all = profile_probability::always ();
920 :
921 18655488 : FOR_EACH_EDGE (e, ei, bb->succs)
922 10144503 : if (e->probability.initialized_p ())
923 9117435 : all -= e->probability;
924 1027068 : else if (!unlikely_executed_edge_p (e))
925 : {
926 622357 : nedges++;
927 622357 : if (unlikely_edges != NULL && unlikely_edges->contains (e))
928 : {
929 5097 : all -= profile_probability::very_unlikely ();
930 5097 : unlikely_count++;
931 : }
932 : }
933 :
934 : /* Make the distribution even if all edges are unlikely. */
935 8510985 : unsigned likely_count = likely_edges ? likely_edges->elements () : 0;
936 8510985 : if (unlikely_count == nedges)
937 : {
938 8006609 : unlikely_edges = NULL;
939 8006609 : unlikely_count = 0;
940 : }
941 :
942 : /* If we have one likely edge, then use its probability and distribute
943 : remaining probabilities as even. */
944 8510985 : if (likely_count == 1)
945 : {
946 5217 : FOR_EACH_EDGE (e, ei, bb->succs)
947 3482 : if (e->probability.initialized_p ())
948 : ;
949 24 : else if (!unlikely_executed_edge_p (e))
950 : {
951 24 : edge_prediction *prediction = *likely_edges->begin ();
952 24 : int p = prediction->ep_probability;
953 24 : profile_probability prob
954 24 : = profile_probability::from_reg_br_prob_base (p);
955 :
956 24 : if (prediction->ep_edge == e)
957 6 : e->probability = prob;
958 18 : else if (unlikely_edges != NULL && unlikely_edges->contains (e))
959 1 : e->probability = profile_probability::very_unlikely ();
960 : else
961 : {
962 17 : profile_probability remainder = prob.invert ();
963 17 : remainder -= (profile_probability::very_unlikely ()
964 34 : * unlikely_count);
965 17 : int count = nedges - unlikely_count - 1;
966 17 : gcc_assert (count >= 0);
967 :
968 17 : e->probability = remainder / count;
969 : }
970 : }
971 : else
972 0 : e->probability = profile_probability::never ();
973 : }
974 : else
975 : {
976 : /* Make all unlikely edges unlikely and the rest will have even
977 : probability. */
978 8509250 : unsigned scale = nedges - unlikely_count;
979 18650271 : FOR_EACH_EDGE (e, ei, bb->succs)
980 10141021 : if (e->probability.initialized_p ())
981 : ;
982 1027044 : else if (!unlikely_executed_edge_p (e))
983 : {
984 622333 : if (unlikely_edges != NULL && unlikely_edges->contains (e))
985 5096 : e->probability = profile_probability::very_unlikely ();
986 : else
987 617237 : e->probability = (all / scale).guessed ();
988 : }
989 : else
990 404711 : e->probability = profile_probability::never ();
991 : }
992 8510985 : }
993 :
994 : /* Add REG_BR_PROB note to JUMP with PROB. */
995 :
996 : void
997 5279631 : add_reg_br_prob_note (rtx_insn *jump, profile_probability prob)
998 : {
999 5279631 : gcc_checking_assert (JUMP_P (jump) && !find_reg_note (jump, REG_BR_PROB, 0));
1000 5279631 : add_int_reg_note (jump, REG_BR_PROB, prob.to_reg_br_prob_note ());
1001 5279631 : }
1002 :
1003 : /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
1004 : note if not already present. Remove now useless REG_BR_PRED notes. */
1005 :
1006 : static void
1007 566196 : combine_predictions_for_insn (rtx_insn *insn, basic_block bb)
1008 : {
1009 566196 : rtx prob_note;
1010 566196 : rtx *pnote;
1011 566196 : rtx note;
1012 566196 : int best_probability = PROB_EVEN;
1013 566196 : enum br_predictor best_predictor = END_PREDICTORS;
1014 566196 : int combined_probability = REG_BR_PROB_BASE / 2;
1015 566196 : int d;
1016 566196 : bool first_match = false;
1017 566196 : bool found = false;
1018 :
1019 566196 : if (!can_predict_insn_p (insn))
1020 : {
1021 398040 : set_even_probabilities (bb);
1022 398040 : return;
1023 : }
1024 :
1025 168156 : prob_note = find_reg_note (insn, REG_BR_PROB, 0);
1026 168156 : pnote = ®_NOTES (insn);
1027 168156 : if (dump_file)
1028 36 : fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
1029 : bb->index);
1030 :
1031 : /* We implement "first match" heuristics and use probability guessed
1032 : by predictor with smallest index. */
1033 255562 : for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1034 87406 : if (REG_NOTE_KIND (note) == REG_BR_PRED)
1035 : {
1036 87406 : enum br_predictor predictor = ((enum br_predictor)
1037 87406 : INTVAL (XEXP (XEXP (note, 0), 0)));
1038 87406 : int probability = INTVAL (XEXP (XEXP (note, 0), 1));
1039 :
1040 87406 : found = true;
1041 87406 : if (best_predictor > predictor
1042 87406 : && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1043 87406 : best_probability = probability, best_predictor = predictor;
1044 :
1045 87406 : d = (combined_probability * probability
1046 87406 : + (REG_BR_PROB_BASE - combined_probability)
1047 87406 : * (REG_BR_PROB_BASE - probability));
1048 :
1049 : /* Use FP math to avoid overflows of 32bit integers. */
1050 87406 : if (d == 0)
1051 : /* If one probability is 0% and one 100%, avoid division by zero. */
1052 : combined_probability = REG_BR_PROB_BASE / 2;
1053 : else
1054 87406 : combined_probability = (((double) combined_probability) * probability
1055 87406 : * REG_BR_PROB_BASE / d + 0.5);
1056 : }
1057 :
1058 : /* Decide which heuristic to use. In case we didn't match anything,
1059 : use no_prediction heuristic, in case we did match, use either
1060 : first match or Dempster-Shaffer theory depending on the flags. */
1061 :
1062 168156 : if (best_predictor != END_PREDICTORS)
1063 215 : first_match = true;
1064 :
1065 168156 : if (!found)
1066 80750 : dump_prediction (dump_file, PRED_NO_PREDICTION,
1067 : combined_probability, bb);
1068 : else
1069 : {
1070 87406 : if (!first_match)
1071 87191 : dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
1072 : bb, !first_match ? REASON_NONE : REASON_IGNORED);
1073 : else
1074 215 : dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
1075 : bb, first_match ? REASON_NONE : REASON_IGNORED);
1076 : }
1077 :
1078 168156 : if (first_match)
1079 215 : combined_probability = best_probability;
1080 168156 : dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1081 :
1082 423718 : while (*pnote)
1083 : {
1084 87406 : if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
1085 : {
1086 87406 : enum br_predictor predictor = ((enum br_predictor)
1087 87406 : INTVAL (XEXP (XEXP (*pnote, 0), 0)));
1088 87406 : int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
1089 :
1090 87406 : dump_prediction (dump_file, predictor, probability, bb,
1091 87406 : (!first_match || best_predictor == predictor)
1092 87406 : ? REASON_NONE : REASON_IGNORED);
1093 87406 : *pnote = XEXP (*pnote, 1);
1094 : }
1095 : else
1096 0 : pnote = &XEXP (*pnote, 1);
1097 : }
1098 :
1099 168156 : if (!prob_note)
1100 : {
1101 168156 : profile_probability p
1102 168156 : = profile_probability::from_reg_br_prob_base (combined_probability);
1103 168156 : add_reg_br_prob_note (insn, p);
1104 :
1105 : /* Save the prediction into CFG in case we are seeing non-degenerated
1106 : conditional jump. */
1107 168156 : if (!single_succ_p (bb))
1108 : {
1109 168156 : BRANCH_EDGE (bb)->probability = p;
1110 168156 : FALLTHRU_EDGE (bb)->probability
1111 336312 : = BRANCH_EDGE (bb)->probability.invert ();
1112 : }
1113 : }
1114 0 : else if (!single_succ_p (bb))
1115 : {
1116 0 : profile_probability prob = profile_probability::from_reg_br_prob_note
1117 0 : (XINT (prob_note, 0));
1118 :
1119 0 : BRANCH_EDGE (bb)->probability = prob;
1120 0 : FALLTHRU_EDGE (bb)->probability = prob.invert ();
1121 : }
1122 : else
1123 0 : single_succ_edge (bb)->probability = profile_probability::always ();
1124 : }
1125 :
1126 : /* Edge prediction hash traits. */
1127 :
1128 : struct predictor_hash: pointer_hash <edge_prediction>
1129 : {
1130 :
1131 : static inline hashval_t hash (const edge_prediction *);
1132 : static inline bool equal (const edge_prediction *, const edge_prediction *);
1133 : };
1134 :
1135 : /* Calculate hash value of an edge prediction P based on predictor and
1136 : normalized probability. */
1137 :
1138 : inline hashval_t
1139 12791894 : predictor_hash::hash (const edge_prediction *p)
1140 : {
1141 12791894 : inchash::hash hstate;
1142 12791894 : hstate.add_int (p->ep_predictor);
1143 :
1144 12791894 : int prob = p->ep_probability;
1145 12791894 : if (prob > REG_BR_PROB_BASE / 2)
1146 1647578 : prob = REG_BR_PROB_BASE - prob;
1147 :
1148 12791894 : hstate.add_int (prob);
1149 :
1150 12791894 : return hstate.end ();
1151 : }
1152 :
1153 : /* Return true whether edge predictions P1 and P2 use the same predictor and
1154 : have equal (or opposed probability). */
1155 :
1156 : inline bool
1157 2938404 : predictor_hash::equal (const edge_prediction *p1, const edge_prediction *p2)
1158 : {
1159 2938404 : return (p1->ep_predictor == p2->ep_predictor
1160 2938404 : && (p1->ep_probability == p2->ep_probability
1161 0 : || p1->ep_probability == REG_BR_PROB_BASE - p2->ep_probability));
1162 : }
1163 :
1164 : struct predictor_hash_traits: predictor_hash,
1165 : typed_noop_remove <edge_prediction *> {};
1166 :
1167 : /* Return true if edge prediction P is not in DATA hash set. */
1168 :
1169 : static bool
1170 5027202 : not_removed_prediction_p (edge_prediction *p, void *data)
1171 : {
1172 5027202 : hash_set<edge_prediction *> *remove = (hash_set<edge_prediction *> *) data;
1173 5027202 : return !remove->contains (p);
1174 : }
1175 :
1176 : /* Prune predictions for a basic block BB. Currently we do following
1177 : clean-up steps:
1178 :
1179 : 1) remove duplicate prediction that is guessed with the same probability
1180 : (different than 1/2) to both edge
1181 : 2) remove duplicates for a prediction that belongs with the same probability
1182 : to a single edge
1183 :
1184 : */
1185 :
1186 : static void
1187 3292264 : prune_predictions_for_bb (basic_block bb)
1188 : {
1189 3292264 : edge_prediction **preds = bb_predictions->get (bb);
1190 :
1191 3292264 : if (preds)
1192 : {
1193 2844663 : hash_table <predictor_hash_traits> s (13);
1194 2844663 : hash_set <edge_prediction *> remove;
1195 :
1196 : /* Step 1: identify predictors that should be removed. */
1197 7871865 : for (edge_prediction *pred = *preds; pred; pred = pred->ep_next)
1198 : {
1199 5027202 : edge_prediction *existing = s.find (pred);
1200 5027202 : if (existing)
1201 : {
1202 257164 : if (pred->ep_edge == existing->ep_edge
1203 0 : && pred->ep_probability == existing->ep_probability)
1204 : {
1205 : /* Remove a duplicate predictor. */
1206 0 : dump_prediction (dump_file, pred->ep_predictor,
1207 : pred->ep_probability, bb,
1208 : REASON_SINGLE_EDGE_DUPLICATE, pred->ep_edge);
1209 :
1210 0 : remove.add (pred);
1211 : }
1212 257164 : else if (pred->ep_edge != existing->ep_edge
1213 257164 : && pred->ep_probability == existing->ep_probability
1214 257164 : && pred->ep_probability != REG_BR_PROB_BASE / 2)
1215 : {
1216 : /* Remove both predictors as they predict the same
1217 : for both edges. */
1218 254763 : dump_prediction (dump_file, existing->ep_predictor,
1219 : pred->ep_probability, bb,
1220 : REASON_EDGE_PAIR_DUPLICATE,
1221 : existing->ep_edge);
1222 254763 : dump_prediction (dump_file, pred->ep_predictor,
1223 : pred->ep_probability, bb,
1224 : REASON_EDGE_PAIR_DUPLICATE,
1225 : pred->ep_edge);
1226 :
1227 254763 : remove.add (existing);
1228 254763 : remove.add (pred);
1229 : }
1230 : }
1231 :
1232 5027202 : edge_prediction **slot2 = s.find_slot (pred, INSERT);
1233 5027202 : *slot2 = pred;
1234 : }
1235 :
1236 : /* Step 2: Remove predictors. */
1237 2844663 : filter_predictions (preds, not_removed_prediction_p, &remove);
1238 2844663 : }
1239 3292264 : }
1240 :
1241 : /* Combine predictions into single probability and store them into CFG.
1242 : Remove now useless prediction entries.
1243 : If DRY_RUN is set, only produce dumps and do not modify profile. */
1244 :
1245 : static void
1246 11405265 : combine_predictions_for_bb (basic_block bb, bool dry_run)
1247 : {
1248 11405265 : int best_probability = PROB_EVEN;
1249 11405265 : enum br_predictor best_predictor = END_PREDICTORS;
1250 11405265 : int combined_probability = REG_BR_PROB_BASE / 2;
1251 11405265 : int d;
1252 11405265 : bool first_match = false;
1253 11405265 : bool found = false;
1254 11405265 : struct edge_prediction *pred;
1255 11405265 : int nedges = 0;
1256 11405265 : edge e, first = NULL, second = NULL;
1257 11405265 : edge_iterator ei;
1258 11405265 : int nzero = 0;
1259 11405265 : int nunknown = 0;
1260 :
1261 27337810 : FOR_EACH_EDGE (e, ei, bb->succs)
1262 : {
1263 15932545 : if (!unlikely_executed_edge_p (e))
1264 : {
1265 13597691 : nedges ++;
1266 13597691 : if (first && !second)
1267 : second = e;
1268 10281571 : if (!first)
1269 10187369 : first = e;
1270 : }
1271 2334854 : else if (!e->probability.initialized_p ())
1272 25801 : e->probability = profile_probability::never ();
1273 15932545 : if (!e->probability.initialized_p ())
1274 6140404 : nunknown++;
1275 9792141 : else if (e->probability == profile_probability::never ())
1276 2214360 : nzero++;
1277 : }
1278 :
1279 : /* When there is no successor or only one choice, prediction is easy.
1280 :
1281 : When we have a basic block with more than 2 successors, the situation
1282 : is more complicated as DS theory cannot be used literally.
1283 : More precisely, let's assume we predicted edge e1 with probability p1,
1284 : thus: m1({b1}) = p1. As we're going to combine more than 2 edges, we
1285 : need to find probability of e.g. m1({b2}), which we don't know.
1286 : The only approximation is to equally distribute 1-p1 to all edges
1287 : different from b1.
1288 :
1289 : According to numbers we've got from SPEC2006 benchark, there's only
1290 : one interesting reliable predictor (noreturn call), which can be
1291 : handled with a bit easier approach. */
1292 11405265 : if (nedges != 2)
1293 : {
1294 8113001 : hash_set<edge> unlikely_edges (4);
1295 8113001 : hash_set<edge_prediction *> likely_edges (4);
1296 :
1297 : /* Identify all edges that have a probability close to very unlikely.
1298 : Doing the approach for very unlikely doesn't worth for doing as
1299 : there's no such probability in SPEC2006 benchmark. */
1300 8113001 : edge_prediction **preds = bb_predictions->get (bb);
1301 8113001 : if (preds)
1302 1869404 : for (pred = *preds; pred; pred = pred->ep_next)
1303 : {
1304 1139679 : if (pred->ep_probability <= PROB_VERY_UNLIKELY
1305 1131061 : || pred->ep_predictor == PRED_COLD_LABEL)
1306 8683 : unlikely_edges.add (pred->ep_edge);
1307 1130996 : else if (pred->ep_probability >= PROB_VERY_LIKELY
1308 1130367 : || pred->ep_predictor == PRED_BUILTIN_EXPECT
1309 1129263 : || pred->ep_predictor == PRED_HOT_LABEL)
1310 1736 : likely_edges.add (pred);
1311 : }
1312 :
1313 : /* It can happen that an edge is both in likely_edges and unlikely_edges.
1314 : Clear both sets in that situation. */
1315 8114736 : for (hash_set<edge_prediction *>::iterator it = likely_edges.begin ();
1316 8116471 : it != likely_edges.end (); ++it)
1317 1736 : if (unlikely_edges.contains ((*it)->ep_edge))
1318 : {
1319 1 : likely_edges.empty ();
1320 8113002 : unlikely_edges.empty ();
1321 : break;
1322 : }
1323 :
1324 8113001 : if (!dry_run)
1325 8112945 : set_even_probabilities (bb, &unlikely_edges, &likely_edges);
1326 8113001 : clear_bb_predictions (bb);
1327 8113001 : if (dump_file)
1328 : {
1329 1222 : fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1330 1222 : if (unlikely_edges.is_empty ())
1331 1220 : fprintf (dump_file,
1332 : "%i edges in bb %i predicted to even probabilities\n",
1333 : nedges, bb->index);
1334 : else
1335 : {
1336 2 : fprintf (dump_file,
1337 : "%i edges in bb %i predicted with some unlikely edges\n",
1338 : nedges, bb->index);
1339 11 : FOR_EACH_EDGE (e, ei, bb->succs)
1340 9 : if (!unlikely_executed_edge_p (e))
1341 9 : dump_prediction (dump_file, PRED_COMBINED,
1342 : e->probability.to_reg_br_prob_base (), bb, REASON_NONE, e);
1343 : }
1344 : }
1345 8113001 : return;
1346 8113001 : }
1347 :
1348 3292264 : if (dump_file)
1349 583 : fprintf (dump_file, "Predictions for bb %i\n", bb->index);
1350 :
1351 3292264 : prune_predictions_for_bb (bb);
1352 :
1353 3292264 : edge_prediction **preds = bb_predictions->get (bb);
1354 :
1355 3292264 : if (preds)
1356 : {
1357 : /* We implement "first match" heuristics and use probability guessed
1358 : by predictor with smallest index. */
1359 7362396 : for (pred = *preds; pred; pred = pred->ep_next)
1360 : {
1361 4517733 : enum br_predictor predictor = pred->ep_predictor;
1362 4517733 : int probability = pred->ep_probability;
1363 :
1364 4517733 : if (pred->ep_edge != first)
1365 1192822 : probability = REG_BR_PROB_BASE - probability;
1366 :
1367 4517733 : found = true;
1368 : /* First match heuristics would be widly confused if we predicted
1369 : both directions. */
1370 4517733 : if (best_predictor > predictor
1371 4329208 : && predictor_info[predictor].flags & PRED_FLAG_FIRST_MATCH)
1372 : {
1373 : struct edge_prediction *pred2;
1374 : int prob = probability;
1375 :
1376 2862345 : for (pred2 = (struct edge_prediction *) *preds;
1377 4221756 : pred2; pred2 = pred2->ep_next)
1378 2862345 : if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
1379 : {
1380 0 : int probability2 = pred2->ep_probability;
1381 :
1382 0 : if (pred2->ep_edge != first)
1383 0 : probability2 = REG_BR_PROB_BASE - probability2;
1384 :
1385 0 : if ((probability < REG_BR_PROB_BASE / 2) !=
1386 0 : (probability2 < REG_BR_PROB_BASE / 2))
1387 : break;
1388 :
1389 : /* If the same predictor later gave better result, go for it! */
1390 0 : if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
1391 0 : || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
1392 2862345 : prob = probability2;
1393 : }
1394 1359411 : if (!pred2)
1395 4517733 : best_probability = prob, best_predictor = predictor;
1396 : }
1397 :
1398 4517733 : d = (combined_probability * probability
1399 4517733 : + (REG_BR_PROB_BASE - combined_probability)
1400 4517733 : * (REG_BR_PROB_BASE - probability));
1401 :
1402 : /* Use FP math to avoid overflows of 32bit integers. */
1403 4517733 : if (d == 0)
1404 : /* If one probability is 0% and one 100%, avoid division by zero. */
1405 : combined_probability = REG_BR_PROB_BASE / 2;
1406 : else
1407 4517733 : combined_probability = (((double) combined_probability)
1408 4517733 : * probability
1409 4517733 : * REG_BR_PROB_BASE / d + 0.5);
1410 : }
1411 : }
1412 :
1413 : /* Decide which heuristic to use. In case we didn't match anything,
1414 : use no_prediction heuristic, in case we did match, use either
1415 : first match or Dempster-Shaffer theory depending on the flags. */
1416 :
1417 2844663 : if (best_predictor != END_PREDICTORS)
1418 1263043 : first_match = true;
1419 :
1420 3292264 : if (!found)
1421 517015 : dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb);
1422 : else
1423 : {
1424 2775249 : if (!first_match)
1425 1512206 : dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1426 : !first_match ? REASON_NONE : REASON_IGNORED);
1427 : else
1428 1263043 : dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1429 : first_match ? REASON_NONE : REASON_IGNORED);
1430 : }
1431 :
1432 3292264 : if (first_match)
1433 1263043 : combined_probability = best_probability;
1434 3292264 : dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb);
1435 :
1436 3292264 : if (preds)
1437 : {
1438 7362396 : for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1439 : {
1440 4517733 : enum br_predictor predictor = pred->ep_predictor;
1441 4517733 : int probability = pred->ep_probability;
1442 :
1443 4517733 : dump_prediction (dump_file, predictor, probability, bb,
1444 4517733 : (!first_match || best_predictor == predictor)
1445 4517733 : ? REASON_NONE : REASON_IGNORED, pred->ep_edge);
1446 : }
1447 : }
1448 3292264 : clear_bb_predictions (bb);
1449 :
1450 :
1451 : /* If we have only one successor which is unknown, we can compute missing
1452 : probability. */
1453 3292264 : if (nunknown == 1)
1454 : {
1455 1225 : profile_probability prob = profile_probability::always ();
1456 1225 : edge missing = NULL;
1457 :
1458 3675 : FOR_EACH_EDGE (e, ei, bb->succs)
1459 2450 : if (e->probability.initialized_p ())
1460 1225 : prob -= e->probability;
1461 1225 : else if (missing == NULL)
1462 : missing = e;
1463 : else
1464 0 : gcc_unreachable ();
1465 1225 : missing->probability = prob;
1466 : }
1467 : /* If nothing is unknown, we have nothing to update. */
1468 3636835 : else if (!nunknown && nzero != (int)EDGE_COUNT (bb->succs))
1469 : ;
1470 2945243 : else if (!dry_run)
1471 : {
1472 2945243 : first->probability
1473 2945243 : = profile_probability::from_reg_br_prob_base (combined_probability);
1474 2945243 : second->probability = first->probability.invert ();
1475 : }
1476 : }
1477 :
1478 : /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1479 : Return the SSA_NAME if the condition satisfies, NULL otherwise.
1480 :
1481 : T1 and T2 should be one of the following cases:
1482 : 1. T1 is SSA_NAME, T2 is NULL
1483 : 2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1484 : 3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4] */
1485 :
1486 : static tree
1487 9370 : strips_small_constant (tree t1, tree t2)
1488 : {
1489 9370 : tree ret = NULL;
1490 9370 : int value = 0;
1491 :
1492 9370 : if (!t1)
1493 : return NULL;
1494 9370 : else if (TREE_CODE (t1) == SSA_NAME)
1495 : ret = t1;
1496 1090 : else if (tree_fits_shwi_p (t1))
1497 93 : value = tree_to_shwi (t1);
1498 : else
1499 : return NULL;
1500 :
1501 8373 : if (!t2)
1502 : return ret;
1503 8373 : else if (tree_fits_shwi_p (t2))
1504 7824 : value = tree_to_shwi (t2);
1505 549 : else if (TREE_CODE (t2) == SSA_NAME)
1506 : {
1507 370 : if (ret)
1508 : return NULL;
1509 : else
1510 : ret = t2;
1511 : }
1512 :
1513 8096 : if (value <= 4 && value >= -4)
1514 : return ret;
1515 : else
1516 631 : return NULL;
1517 : }
1518 :
1519 : /* Return the SSA_NAME in T or T's operands.
1520 : Return NULL if SSA_NAME cannot be found. */
1521 :
1522 : static tree
1523 235723 : get_base_value (tree t)
1524 : {
1525 235723 : if (TREE_CODE (t) == SSA_NAME)
1526 : return t;
1527 :
1528 11614 : if (!BINARY_CLASS_P (t))
1529 : return NULL;
1530 :
1531 9370 : switch (TREE_OPERAND_LENGTH (t))
1532 : {
1533 0 : case 1:
1534 0 : return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1535 9370 : case 2:
1536 9370 : return strips_small_constant (TREE_OPERAND (t, 0),
1537 18740 : TREE_OPERAND (t, 1));
1538 : default:
1539 : return NULL;
1540 : }
1541 : }
1542 :
1543 : /* Check the compare STMT in LOOP. If it compares an induction
1544 : variable to a loop invariant, return true, and save
1545 : LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1546 : Otherwise return false and set LOOP_INVAIANT to NULL. */
1547 :
1548 : static bool
1549 863658 : is_comparison_with_loop_invariant_p (gcond *stmt, class loop *loop,
1550 : tree *loop_invariant,
1551 : enum tree_code *compare_code,
1552 : tree *loop_step,
1553 : tree *loop_iv_base)
1554 : {
1555 863658 : tree op0, op1, bound, base;
1556 863658 : affine_iv iv0, iv1;
1557 863658 : enum tree_code code;
1558 863658 : tree step;
1559 :
1560 863658 : code = gimple_cond_code (stmt);
1561 863658 : *loop_invariant = NULL;
1562 :
1563 863658 : switch (code)
1564 : {
1565 862784 : case GT_EXPR:
1566 862784 : case GE_EXPR:
1567 862784 : case NE_EXPR:
1568 862784 : case LT_EXPR:
1569 862784 : case LE_EXPR:
1570 862784 : case EQ_EXPR:
1571 862784 : break;
1572 :
1573 : default:
1574 : return false;
1575 : }
1576 :
1577 862784 : op0 = gimple_cond_lhs (stmt);
1578 862784 : op1 = gimple_cond_rhs (stmt);
1579 :
1580 862784 : if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1581 862765 : || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1582 : return false;
1583 1698290 : if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1584 : return false;
1585 1004364 : if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1586 : return false;
1587 482794 : if (TREE_CODE (iv0.step) != INTEGER_CST
1588 479983 : || TREE_CODE (iv1.step) != INTEGER_CST)
1589 : return false;
1590 545636 : if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1591 531879 : || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1592 14060 : return false;
1593 :
1594 459366 : if (integer_zerop (iv0.step))
1595 : {
1596 58453 : if (code != NE_EXPR && code != EQ_EXPR)
1597 42474 : code = invert_tree_comparison (code, false);
1598 58453 : bound = iv0.base;
1599 58453 : base = iv1.base;
1600 58453 : if (tree_fits_shwi_p (iv1.step))
1601 : step = iv1.step;
1602 : else
1603 : return false;
1604 : }
1605 : else
1606 : {
1607 400913 : bound = iv1.base;
1608 400913 : base = iv0.base;
1609 400913 : if (tree_fits_shwi_p (iv0.step))
1610 : step = iv0.step;
1611 : else
1612 : return false;
1613 : }
1614 :
1615 452600 : if (TREE_CODE (bound) != INTEGER_CST)
1616 159350 : bound = get_base_value (bound);
1617 159350 : if (!bound)
1618 : return false;
1619 450853 : if (TREE_CODE (base) != INTEGER_CST)
1620 76373 : base = get_base_value (base);
1621 76373 : if (!base)
1622 : return false;
1623 :
1624 448451 : *loop_invariant = bound;
1625 448451 : *compare_code = code;
1626 448451 : *loop_step = step;
1627 448451 : *loop_iv_base = base;
1628 448451 : return true;
1629 : }
1630 :
1631 : /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent. */
1632 :
1633 : static bool
1634 6892 : expr_coherent_p (tree t1, tree t2)
1635 : {
1636 6892 : gimple *stmt;
1637 6892 : tree ssa_name_1 = NULL;
1638 6892 : tree ssa_name_2 = NULL;
1639 :
1640 6892 : gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1641 6892 : gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1642 :
1643 6892 : if (t1 == t2)
1644 : return true;
1645 :
1646 2953 : if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1647 : return true;
1648 1742 : if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1649 : return false;
1650 :
1651 : /* Check to see if t1 is expressed/defined with t2. */
1652 318 : stmt = SSA_NAME_DEF_STMT (t1);
1653 318 : gcc_assert (stmt != NULL);
1654 318 : if (is_gimple_assign (stmt))
1655 : {
1656 194 : ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1657 194 : if (ssa_name_1 && ssa_name_1 == t2)
1658 : return true;
1659 : }
1660 :
1661 : /* Check to see if t2 is expressed/defined with t1. */
1662 318 : stmt = SSA_NAME_DEF_STMT (t2);
1663 318 : gcc_assert (stmt != NULL);
1664 318 : if (is_gimple_assign (stmt))
1665 : {
1666 194 : ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1667 194 : if (ssa_name_2 && ssa_name_2 == t1)
1668 : return true;
1669 : }
1670 :
1671 : /* Compare if t1 and t2's def_stmts are identical. */
1672 318 : if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1673 : return true;
1674 : else
1675 : return false;
1676 : }
1677 :
1678 : /* Return true if E is predicted by one of loop heuristics. */
1679 :
1680 : static bool
1681 6073863 : predicted_by_loop_heuristics_p (basic_block bb)
1682 : {
1683 6073863 : struct edge_prediction *i;
1684 6073863 : edge_prediction **preds = bb_predictions->get (bb);
1685 :
1686 6073863 : if (!preds)
1687 : return false;
1688 :
1689 2123356 : for (i = *preds; i; i = i->ep_next)
1690 1808599 : if (i->ep_predictor == PRED_LOOP_ITERATIONS_GUESSED
1691 1808599 : || i->ep_predictor == PRED_LOOP_ITERATIONS_MAX
1692 : || i->ep_predictor == PRED_LOOP_ITERATIONS
1693 : || i->ep_predictor == PRED_LOOP_EXIT
1694 : || i->ep_predictor == PRED_LOOP_EXIT_WITH_RECURSION
1695 : || i->ep_predictor == PRED_LOOP_EXTRA_EXIT)
1696 : return true;
1697 : return false;
1698 : }
1699 :
1700 : /* Predict branch probability of BB when BB contains a branch that compares
1701 : an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1702 : loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1703 :
1704 : E.g.
1705 : for (int i = 0; i < bound; i++) {
1706 : if (i < bound - 2)
1707 : computation_1();
1708 : else
1709 : computation_2();
1710 : }
1711 :
1712 : In this loop, we will predict the branch inside the loop to be taken. */
1713 :
1714 : static void
1715 2022156 : predict_iv_comparison (class loop *loop, basic_block bb,
1716 : tree loop_bound_var,
1717 : tree loop_iv_base_var,
1718 : enum tree_code loop_bound_code,
1719 : int loop_bound_step)
1720 : {
1721 2022156 : tree compare_var, compare_base;
1722 2022156 : enum tree_code compare_code;
1723 2022156 : tree compare_step_var;
1724 2022156 : edge then_edge;
1725 2022156 : edge_iterator ei;
1726 :
1727 2022156 : if (predicted_by_loop_heuristics_p (bb))
1728 2020559 : return;
1729 :
1730 4304302 : gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1731 257486 : if (!stmt)
1732 : return;
1733 257486 : if (!is_comparison_with_loop_invariant_p (stmt,
1734 : loop, &compare_var,
1735 : &compare_code,
1736 : &compare_step_var,
1737 : &compare_base))
1738 : return;
1739 :
1740 : /* Find the taken edge. */
1741 11186 : FOR_EACH_EDGE (then_edge, ei, bb->succs)
1742 11186 : if (then_edge->flags & EDGE_TRUE_VALUE)
1743 : break;
1744 :
1745 : /* When comparing an IV to a loop invariant, NE is more likely to be
1746 : taken while EQ is more likely to be not-taken. */
1747 11089 : if (compare_code == NE_EXPR)
1748 : {
1749 1661 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1750 1661 : return;
1751 : }
1752 9428 : else if (compare_code == EQ_EXPR)
1753 : {
1754 4798 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1755 4798 : return;
1756 : }
1757 :
1758 4630 : if (!expr_coherent_p (loop_iv_base_var, compare_base))
1759 : return;
1760 :
1761 : /* If loop bound, base and compare bound are all constants, we can
1762 : calculate the probability directly. */
1763 4055 : if (tree_fits_shwi_p (loop_bound_var)
1764 2659 : && tree_fits_shwi_p (compare_var)
1765 2522 : && tree_fits_shwi_p (compare_base))
1766 : {
1767 2458 : int probability;
1768 2458 : wi::overflow_type overflow;
1769 2458 : bool overall_overflow = false;
1770 2458 : widest_int compare_count, tem;
1771 :
1772 : /* (loop_bound - base) / compare_step */
1773 2458 : tem = wi::sub (wi::to_widest (loop_bound_var),
1774 4916 : wi::to_widest (compare_base), SIGNED, &overflow);
1775 2458 : overall_overflow |= overflow;
1776 2458 : widest_int loop_count = wi::div_trunc (tem,
1777 2458 : wi::to_widest (compare_step_var),
1778 2458 : SIGNED, &overflow);
1779 2458 : overall_overflow |= overflow;
1780 :
1781 2458 : if (!wi::neg_p (wi::to_widest (compare_step_var))
1782 2458 : ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1783 : {
1784 : /* (loop_bound - compare_bound) / compare_step */
1785 359 : tem = wi::sub (wi::to_widest (loop_bound_var),
1786 718 : wi::to_widest (compare_var), SIGNED, &overflow);
1787 359 : overall_overflow |= overflow;
1788 359 : compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1789 359 : SIGNED, &overflow);
1790 359 : overall_overflow |= overflow;
1791 : }
1792 : else
1793 : {
1794 : /* (compare_bound - base) / compare_step */
1795 2099 : tem = wi::sub (wi::to_widest (compare_var),
1796 4198 : wi::to_widest (compare_base), SIGNED, &overflow);
1797 2099 : overall_overflow |= overflow;
1798 2099 : compare_count = wi::div_trunc (tem, wi::to_widest (compare_step_var),
1799 2099 : SIGNED, &overflow);
1800 2099 : overall_overflow |= overflow;
1801 : }
1802 2458 : if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1803 2082 : ++compare_count;
1804 2458 : if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1805 175 : ++loop_count;
1806 2458 : if (wi::neg_p (compare_count))
1807 766 : compare_count = 0;
1808 2458 : if (wi::neg_p (loop_count))
1809 801 : loop_count = 0;
1810 2458 : if (loop_count == 0)
1811 : probability = 0;
1812 1642 : else if (wi::cmps (compare_count, loop_count) == 1)
1813 : probability = REG_BR_PROB_BASE;
1814 : else
1815 : {
1816 1598 : tem = compare_count * REG_BR_PROB_BASE;
1817 1598 : tem = wi::udiv_trunc (tem, loop_count);
1818 1598 : probability = tem.to_uhwi ();
1819 : }
1820 :
1821 : /* FIXME: The branch prediction seems broken. It has only 20% hitrate. */
1822 2458 : if (!overall_overflow)
1823 2458 : predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1824 :
1825 2458 : return;
1826 2458 : }
1827 :
1828 1597 : if (expr_coherent_p (loop_bound_var, compare_var))
1829 : {
1830 932 : if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1831 809 : && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1832 801 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1833 131 : else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1834 113 : && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1835 98 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1836 33 : else if (loop_bound_code == NE_EXPR)
1837 : {
1838 : /* If the loop backedge condition is "(i != bound)", we do
1839 : the comparison based on the step of IV:
1840 : * step < 0 : backedge condition is like (i > bound)
1841 : * step > 0 : backedge condition is like (i < bound) */
1842 8 : gcc_assert (loop_bound_step != 0);
1843 8 : if (loop_bound_step > 0
1844 8 : && (compare_code == LT_EXPR
1845 8 : || compare_code == LE_EXPR))
1846 7 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1847 1 : else if (loop_bound_step < 0
1848 0 : && (compare_code == GT_EXPR
1849 0 : || compare_code == GE_EXPR))
1850 0 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1851 : else
1852 1 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1853 : }
1854 : else
1855 : /* The branch is predicted not-taken if loop_bound_code is
1856 : opposite with compare_code. */
1857 25 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1858 : }
1859 665 : else if (expr_coherent_p (loop_iv_base_var, compare_var))
1860 : {
1861 : /* For cases like:
1862 : for (i = s; i < h; i++)
1863 : if (i > s + 2) ....
1864 : The branch should be predicted taken. */
1865 163 : if (loop_bound_step > 0
1866 153 : && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1867 62 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1868 101 : else if (loop_bound_step < 0
1869 10 : && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1870 10 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1871 : else
1872 91 : predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1873 : }
1874 : }
1875 :
1876 : /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1877 : exits are resulted from short-circuit conditions that will generate an
1878 : if_tmp. E.g.:
1879 :
1880 : if (foo() || global > 10)
1881 : break;
1882 :
1883 : This will be translated into:
1884 :
1885 : BB3:
1886 : loop header...
1887 : BB4:
1888 : if foo() goto BB6 else goto BB5
1889 : BB5:
1890 : if global > 10 goto BB6 else goto BB7
1891 : BB6:
1892 : goto BB7
1893 : BB7:
1894 : iftmp = (PHI 0(BB5), 1(BB6))
1895 : if iftmp == 1 goto BB8 else goto BB3
1896 : BB8:
1897 : outside of the loop...
1898 :
1899 : The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1900 : From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1901 : exits. This function takes BB7->BB8 as input, and finds out the extra loop
1902 : exits to predict them using PRED_LOOP_EXTRA_EXIT. */
1903 :
1904 : static void
1905 832016 : predict_extra_loop_exits (class loop *loop, edge exit_edge)
1906 : {
1907 832016 : unsigned i;
1908 832016 : bool check_value_one;
1909 832016 : gimple *lhs_def_stmt;
1910 832016 : gphi *phi_stmt;
1911 832016 : tree cmp_rhs, cmp_lhs;
1912 :
1913 1664032 : gcond *cmp_stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (exit_edge->src));
1914 818623 : if (!cmp_stmt)
1915 : return;
1916 :
1917 818623 : cmp_rhs = gimple_cond_rhs (cmp_stmt);
1918 818623 : cmp_lhs = gimple_cond_lhs (cmp_stmt);
1919 818623 : if (!TREE_CONSTANT (cmp_rhs)
1920 818623 : || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1921 603076 : return;
1922 215547 : if (TREE_CODE (cmp_lhs) != SSA_NAME)
1923 : return;
1924 :
1925 : /* If check_value_one is true, only the phi_args with value '1' will lead
1926 : to loop exit. Otherwise, only the phi_args with value '0' will lead to
1927 : loop exit. */
1928 215547 : check_value_one = (((integer_onep (cmp_rhs))
1929 215547 : ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1930 215547 : ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1931 :
1932 215547 : lhs_def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1933 215547 : if (!lhs_def_stmt)
1934 : return;
1935 :
1936 215547 : phi_stmt = dyn_cast <gphi *> (lhs_def_stmt);
1937 : if (!phi_stmt)
1938 : return;
1939 :
1940 223443 : for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1941 : {
1942 150464 : edge e1;
1943 150464 : edge_iterator ei;
1944 150464 : tree val = gimple_phi_arg_def (phi_stmt, i);
1945 150464 : edge e = gimple_phi_arg_edge (phi_stmt, i);
1946 :
1947 150464 : if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1948 136064 : continue;
1949 39839 : if ((check_value_one ^ integer_onep (val)) == 1)
1950 19711 : continue;
1951 20128 : if (EDGE_COUNT (e->src->succs) != 1)
1952 : {
1953 5728 : predict_paths_leading_to_edge (e, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN,
1954 : loop);
1955 5728 : continue;
1956 : }
1957 :
1958 31370 : FOR_EACH_EDGE (e1, ei, e->src->preds)
1959 16970 : predict_paths_leading_to_edge (e1, PRED_LOOP_EXTRA_EXIT, NOT_TAKEN,
1960 : loop);
1961 : }
1962 : }
1963 :
1964 :
1965 : /* Predict edge probabilities by exploiting loop structure. */
1966 :
1967 : static void
1968 273297 : predict_loops (void)
1969 : {
1970 273297 : basic_block bb;
1971 273297 : hash_set <class loop *> with_recursion(10);
1972 :
1973 5507660 : FOR_EACH_BB_FN (bb, cfun)
1974 : {
1975 5234363 : gimple_stmt_iterator gsi;
1976 5234363 : tree decl;
1977 :
1978 34785413 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1979 24316687 : if (is_gimple_call (gsi_stmt (gsi))
1980 2231198 : && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
1981 26364418 : && recursive_call_p (current_function_decl, decl))
1982 : {
1983 5858 : class loop *loop = bb->loop_father;
1984 12057 : while (loop && !with_recursion.add (loop))
1985 6199 : loop = loop_outer (loop);
1986 : }
1987 : }
1988 :
1989 : /* Try to predict out blocks in a loop that are not part of a
1990 : natural loop. */
1991 1465851 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1992 : {
1993 645960 : basic_block bb, *bbs;
1994 645960 : unsigned j, n_exits = 0;
1995 645960 : class tree_niter_desc niter_desc;
1996 645960 : edge ex;
1997 645960 : class nb_iter_bound *nb_iter;
1998 645960 : enum tree_code loop_bound_code = ERROR_MARK;
1999 645960 : tree loop_bound_step = NULL;
2000 645960 : tree loop_bound_var = NULL;
2001 645960 : tree loop_iv_base = NULL;
2002 645960 : gcond *stmt = NULL;
2003 645960 : bool recursion = with_recursion.contains (loop);
2004 :
2005 645960 : auto_vec<edge> exits = get_loop_exit_edges (loop);
2006 2326799 : FOR_EACH_VEC_ELT (exits, j, ex)
2007 1034879 : if (!unlikely_executed_edge_p (ex) && !(ex->flags & EDGE_ABNORMAL_CALL))
2008 879029 : n_exits ++;
2009 645960 : if (!n_exits)
2010 15952 : continue;
2011 :
2012 630008 : if (dump_file && (dump_flags & TDF_DETAILS))
2013 542 : fprintf (dump_file, "Predicting loop %i%s with %i exits.\n",
2014 : loop->num, recursion ? " (with recursion)" : "", n_exits);
2015 331 : if (dump_file && (dump_flags & TDF_DETAILS)
2016 630279 : && max_loop_iterations_int (loop) >= 0)
2017 : {
2018 201 : fprintf (dump_file,
2019 : "Loop %d iterates at most %i times.\n", loop->num,
2020 201 : (int)max_loop_iterations_int (loop));
2021 : }
2022 331 : if (dump_file && (dump_flags & TDF_DETAILS)
2023 630279 : && likely_max_loop_iterations_int (loop) >= 0)
2024 : {
2025 201 : fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
2026 201 : loop->num, (int)likely_max_loop_iterations_int (loop));
2027 : }
2028 :
2029 1648758 : FOR_EACH_VEC_ELT (exits, j, ex)
2030 : {
2031 1018750 : tree niter = NULL;
2032 1018750 : HOST_WIDE_INT nitercst;
2033 1018750 : int max = param_max_predicted_iterations;
2034 1018750 : int probability;
2035 1018750 : enum br_predictor predictor;
2036 1018750 : widest_int nit;
2037 :
2038 1018750 : if (unlikely_executed_edge_p (ex)
2039 1018750 : || (ex->flags & EDGE_ABNORMAL_CALL))
2040 139721 : continue;
2041 : /* Loop heuristics do not expect exit conditional to be inside
2042 : inner loop. We predict from innermost to outermost loop. */
2043 879029 : if (predicted_by_loop_heuristics_p (ex->src))
2044 : {
2045 47013 : if (dump_file && (dump_flags & TDF_DETAILS))
2046 0 : fprintf (dump_file, "Skipping exit %i->%i because "
2047 : "it is already predicted.\n",
2048 0 : ex->src->index, ex->dest->index);
2049 47013 : continue;
2050 : }
2051 832016 : predict_extra_loop_exits (loop, ex);
2052 :
2053 832016 : if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
2054 437064 : niter = niter_desc.niter;
2055 437064 : if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
2056 558141 : niter = loop_niter_by_eval (loop, ex);
2057 336 : if (dump_file && (dump_flags & TDF_DETAILS)
2058 832287 : && TREE_CODE (niter) == INTEGER_CST)
2059 : {
2060 190 : fprintf (dump_file, "Exit %i->%i %d iterates ",
2061 190 : ex->src->index, ex->dest->index,
2062 : loop->num);
2063 190 : print_generic_expr (dump_file, niter, TDF_SLIM);
2064 190 : fprintf (dump_file, " times.\n");
2065 : }
2066 :
2067 832016 : if (TREE_CODE (niter) == INTEGER_CST)
2068 : {
2069 286576 : if (tree_fits_uhwi_p (niter)
2070 286576 : && max
2071 573146 : && compare_tree_int (niter, max - 1) == -1)
2072 203802 : nitercst = tree_to_uhwi (niter) + 1;
2073 : else
2074 82774 : nitercst = max;
2075 : predictor = PRED_LOOP_ITERATIONS;
2076 : }
2077 : /* If we have just one exit and we can derive some information about
2078 : the number of iterations of the loop from the statements inside
2079 : the loop, use it to predict this exit. */
2080 545440 : else if (n_exits == 1
2081 545440 : && estimated_stmt_executions (loop, &nit))
2082 : {
2083 9 : if (wi::gtu_p (nit, max))
2084 1 : nitercst = max;
2085 : else
2086 8 : nitercst = nit.to_shwi ();
2087 : predictor = PRED_LOOP_ITERATIONS_GUESSED;
2088 : }
2089 : /* If we have likely upper bound, trust it for very small iteration
2090 : counts. Such loops would otherwise get mispredicted by standard
2091 : LOOP_EXIT heuristics. */
2092 1085292 : else if (n_exits == 1
2093 254748 : && likely_max_stmt_executions (loop, &nit)
2094 689423 : && wi::ltu_p (nit,
2095 287582 : RDIV (REG_BR_PROB_BASE,
2096 : REG_BR_PROB_BASE
2097 : - predictor_info
2098 : [recursion
2099 : ? PRED_LOOP_EXIT_WITH_RECURSION
2100 : : PRED_LOOP_EXIT].hitrate)))
2101 : {
2102 5570 : nitercst = nit.to_shwi ();
2103 5570 : predictor = PRED_LOOP_ITERATIONS_MAX;
2104 : }
2105 : else
2106 : {
2107 539861 : if (dump_file && (dump_flags & TDF_DETAILS))
2108 79 : fprintf (dump_file, "Nothing known about exit %i->%i.\n",
2109 79 : ex->src->index, ex->dest->index);
2110 539861 : continue;
2111 : }
2112 :
2113 292155 : if (dump_file && (dump_flags & TDF_DETAILS))
2114 192 : fprintf (dump_file, "Recording prediction to %i iterations by %s.\n",
2115 192 : (int)nitercst, predictor_info[predictor].name);
2116 : /* If the prediction for number of iterations is zero, do not
2117 : predict the exit edges. */
2118 292155 : if (nitercst == 0)
2119 6 : continue;
2120 :
2121 292149 : probability = RDIV (REG_BR_PROB_BASE, nitercst);
2122 292149 : predict_edge (ex, predictor, probability);
2123 1018750 : }
2124 :
2125 : /* Find information about loop bound variables. */
2126 888048 : for (nb_iter = loop->bounds; nb_iter;
2127 258040 : nb_iter = nb_iter->next)
2128 387406 : if (nb_iter->stmt
2129 387406 : && gimple_code (nb_iter->stmt) == GIMPLE_COND)
2130 : {
2131 129366 : stmt = as_a <gcond *> (nb_iter->stmt);
2132 129366 : break;
2133 : }
2134 630008 : if (!stmt)
2135 1001284 : stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (loop->header));
2136 129366 : if (stmt)
2137 606172 : is_comparison_with_loop_invariant_p (stmt, loop,
2138 : &loop_bound_var,
2139 : &loop_bound_code,
2140 : &loop_bound_step,
2141 : &loop_iv_base);
2142 :
2143 630008 : bbs = get_loop_body (loop);
2144 :
2145 3810994 : for (j = 0; j < loop->num_nodes; j++)
2146 : {
2147 3180986 : edge e;
2148 3180986 : edge_iterator ei;
2149 :
2150 3180986 : bb = bbs[j];
2151 :
2152 : /* Bypass loop heuristics on continue statement. These
2153 : statements construct loops via "non-loop" constructs
2154 : in the source language and are better to be handled
2155 : separately. */
2156 3180986 : if (predicted_by_p (bb, PRED_CONTINUE))
2157 : {
2158 8308 : if (dump_file && (dump_flags & TDF_DETAILS))
2159 0 : fprintf (dump_file, "BB %i predicted by continue.\n",
2160 : bb->index);
2161 8308 : continue;
2162 : }
2163 :
2164 : /* If we already used more reliable loop exit predictors, do not
2165 : bother with PRED_LOOP_EXIT. */
2166 3172678 : if (!predicted_by_loop_heuristics_p (bb))
2167 : {
2168 : /* For loop with many exits we don't want to predict all exits
2169 : with the pretty large probability, because if all exits are
2170 : considered in row, the loop would be predicted to iterate
2171 : almost never. The code to divide probability by number of
2172 : exits is very rough. It should compute the number of exits
2173 : taken in each patch through function (not the overall number
2174 : of exits that might be a lot higher for loops with wide switch
2175 : statements in them) and compute n-th square root.
2176 :
2177 : We limit the minimal probability by 2% to avoid
2178 : EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
2179 : as this was causing regression in perl benchmark containing such
2180 : a wide loop. */
2181 :
2182 5154248 : int probability = ((REG_BR_PROB_BASE
2183 2577124 : - predictor_info
2184 : [recursion
2185 2577124 : ? PRED_LOOP_EXIT_WITH_RECURSION
2186 2577124 : : PRED_LOOP_EXIT].hitrate)
2187 2577124 : / n_exits);
2188 2577124 : if (probability < HITRATE (2))
2189 : probability = HITRATE (2);
2190 6259934 : FOR_EACH_EDGE (e, ei, bb->succs)
2191 3682810 : if (e->dest->index < NUM_FIXED_BLOCKS
2192 3682810 : || !flow_bb_inside_loop_p (loop, e->dest))
2193 : {
2194 660534 : if (dump_file && (dump_flags & TDF_DETAILS))
2195 89 : fprintf (dump_file,
2196 : "Predicting exit %i->%i with prob %i.\n",
2197 89 : e->src->index, e->dest->index, probability);
2198 1314247 : predict_edge (e,
2199 : recursion ? PRED_LOOP_EXIT_WITH_RECURSION
2200 : : PRED_LOOP_EXIT, probability);
2201 : }
2202 : }
2203 3172678 : if (loop_bound_var)
2204 2022156 : predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
2205 : loop_bound_code,
2206 2022156 : tree_to_shwi (loop_bound_step));
2207 : }
2208 :
2209 : /* In the following code
2210 : for (loop1)
2211 : if (cond)
2212 : for (loop2)
2213 : body;
2214 : guess that cond is unlikely. */
2215 630008 : if (loop_outer (loop)->num)
2216 : {
2217 134236 : basic_block bb = NULL;
2218 134236 : edge preheader_edge = loop_preheader_edge (loop);
2219 :
2220 134236 : if (single_pred_p (preheader_edge->src)
2221 246247 : && single_succ_p (preheader_edge->src))
2222 112011 : preheader_edge = single_pred_edge (preheader_edge->src);
2223 :
2224 : /* Pattern match fortran loop preheader:
2225 : _16 = BUILTIN_EXPECT (_15, 1, PRED_FORTRAN_LOOP_PREHEADER);
2226 : _17 = (logical(kind=4)) _16;
2227 : if (_17 != 0)
2228 : goto <bb 11>;
2229 : else
2230 : goto <bb 13>;
2231 :
2232 : Loop guard branch prediction says nothing about duplicated loop
2233 : headers produced by fortran frontend and in this case we want
2234 : to predict paths leading to this preheader. */
2235 :
2236 134236 : gcond *stmt
2237 268472 : = safe_dyn_cast <gcond *> (*gsi_last_bb (preheader_edge->src));
2238 109557 : if (stmt
2239 109557 : && gimple_cond_code (stmt) == NE_EXPR
2240 35525 : && TREE_CODE (gimple_cond_lhs (stmt)) == SSA_NAME
2241 35525 : && integer_zerop (gimple_cond_rhs (stmt)))
2242 : {
2243 14954 : gimple *call_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
2244 14954 : if (gimple_code (call_stmt) == GIMPLE_ASSIGN
2245 6331 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (call_stmt))
2246 15810 : && TREE_CODE (gimple_assign_rhs1 (call_stmt)) == SSA_NAME)
2247 856 : call_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (call_stmt));
2248 14954 : if (gimple_call_internal_p (call_stmt, IFN_BUILTIN_EXPECT)
2249 830 : && TREE_CODE (gimple_call_arg (call_stmt, 2)) == INTEGER_CST
2250 830 : && tree_fits_uhwi_p (gimple_call_arg (call_stmt, 2))
2251 15784 : && tree_to_uhwi (gimple_call_arg (call_stmt, 2))
2252 : == PRED_FORTRAN_LOOP_PREHEADER)
2253 22 : bb = preheader_edge->src;
2254 : }
2255 22 : if (!bb)
2256 : {
2257 134214 : if (!dominated_by_p (CDI_DOMINATORS,
2258 134214 : loop_outer (loop)->latch, loop->header))
2259 56387 : predict_paths_leading_to_edge (loop_preheader_edge (loop),
2260 : recursion
2261 : ? PRED_LOOP_GUARD_WITH_RECURSION
2262 : : PRED_LOOP_GUARD,
2263 : NOT_TAKEN,
2264 : loop_outer (loop));
2265 : }
2266 : else
2267 : {
2268 22 : if (!dominated_by_p (CDI_DOMINATORS,
2269 22 : loop_outer (loop)->latch, bb))
2270 0 : predict_paths_leading_to (bb,
2271 : recursion
2272 : ? PRED_LOOP_GUARD_WITH_RECURSION
2273 : : PRED_LOOP_GUARD,
2274 : NOT_TAKEN,
2275 : loop_outer (loop));
2276 : }
2277 : }
2278 :
2279 : /* Free basic blocks from get_loop_body. */
2280 630008 : free (bbs);
2281 919257 : }
2282 273297 : }
2283 :
2284 : /* Attempt to predict probabilities of BB outgoing edges using local
2285 : properties. */
2286 : static void
2287 566196 : bb_estimate_probability_locally (basic_block bb)
2288 : {
2289 566196 : rtx_insn *last_insn = BB_END (bb);
2290 566196 : rtx cond;
2291 :
2292 566196 : if (! can_predict_insn_p (last_insn))
2293 : return;
2294 168156 : cond = get_condition (last_insn, NULL, false, false);
2295 168156 : if (! cond)
2296 : return;
2297 :
2298 : /* Try "pointer heuristic."
2299 : A comparison ptr == 0 is predicted as false.
2300 : Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2301 145338 : if (COMPARISON_P (cond)
2302 145338 : && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
2303 143833 : || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
2304 : {
2305 1506 : if (GET_CODE (cond) == EQ)
2306 33 : predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
2307 1473 : else if (GET_CODE (cond) == NE)
2308 19 : predict_insn_def (last_insn, PRED_POINTER, TAKEN);
2309 : }
2310 : else
2311 :
2312 : /* Try "opcode heuristic."
2313 : EQ tests are usually false and NE tests are usually true. Also,
2314 : most quantities are positive, so we can make the appropriate guesses
2315 : about signed comparisons against zero. */
2316 143832 : switch (GET_CODE (cond))
2317 : {
2318 0 : case CONST_INT:
2319 : /* Unconditional branch. */
2320 0 : predict_insn_def (last_insn, PRED_UNCONDITIONAL,
2321 0 : cond == const0_rtx ? NOT_TAKEN : TAKEN);
2322 0 : break;
2323 :
2324 21225 : case EQ:
2325 21225 : case UNEQ:
2326 : /* Floating point comparisons appears to behave in a very
2327 : unpredictable way because of special role of = tests in
2328 : FP code. */
2329 21225 : if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2330 : ;
2331 : /* Comparisons with 0 are often used for booleans and there is
2332 : nothing useful to predict about them. */
2333 21225 : else if (XEXP (cond, 1) == const0_rtx
2334 578 : || XEXP (cond, 0) == const0_rtx)
2335 : ;
2336 : else
2337 578 : predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
2338 : break;
2339 :
2340 84369 : case NE:
2341 84369 : case LTGT:
2342 : /* Floating point comparisons appears to behave in a very
2343 : unpredictable way because of special role of = tests in
2344 : FP code. */
2345 84369 : if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
2346 : ;
2347 : /* Comparisons with 0 are often used for booleans and there is
2348 : nothing useful to predict about them. */
2349 84369 : else if (XEXP (cond, 1) == const0_rtx
2350 79083 : || XEXP (cond, 0) == const0_rtx)
2351 : ;
2352 : else
2353 79083 : predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
2354 : break;
2355 :
2356 0 : case ORDERED:
2357 0 : predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
2358 0 : break;
2359 :
2360 52 : case UNORDERED:
2361 52 : predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
2362 52 : break;
2363 :
2364 5178 : case LE:
2365 5178 : case LT:
2366 5178 : if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2367 0 : || XEXP (cond, 1) == constm1_rtx)
2368 5178 : predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
2369 : break;
2370 :
2371 5582 : case GE:
2372 5582 : case GT:
2373 5582 : if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
2374 5515 : || XEXP (cond, 1) == constm1_rtx)
2375 2248 : predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
2376 : break;
2377 :
2378 : default:
2379 : break;
2380 : }
2381 : }
2382 :
2383 : /* Set edge->probability for each successor edge of BB. */
2384 : void
2385 566196 : guess_outgoing_edge_probabilities (basic_block bb)
2386 : {
2387 566196 : bb_estimate_probability_locally (bb);
2388 566196 : combine_predictions_for_insn (BB_END (bb), bb);
2389 566196 : }
2390 :
2391 : static tree expr_expected_value (tree, enum br_predictor *predictor,
2392 : HOST_WIDE_INT *probability);
2393 :
2394 : /* Helper function for expr_expected_value. */
2395 :
2396 : static tree
2397 16011926 : expr_expected_value_1 (tree type, tree op0, enum tree_code code,
2398 : tree op1, enum br_predictor *predictor,
2399 : HOST_WIDE_INT *probability)
2400 : {
2401 16011926 : gimple *def;
2402 :
2403 : /* Reset returned probability value. */
2404 16011926 : *probability = -1;
2405 16011926 : *predictor = PRED_UNCONDITIONAL;
2406 :
2407 16011926 : if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
2408 : {
2409 10215223 : if (TREE_CONSTANT (op0))
2410 : return op0;
2411 :
2412 10211721 : if (code == IMAGPART_EXPR)
2413 : {
2414 51406 : if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
2415 : {
2416 49797 : def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
2417 49797 : if (is_gimple_call (def)
2418 47659 : && gimple_call_internal_p (def)
2419 97400 : && (gimple_call_internal_fn (def)
2420 : == IFN_ATOMIC_COMPARE_EXCHANGE))
2421 : {
2422 : /* Assume that any given atomic operation has low contention,
2423 : and thus the compare-and-swap operation succeeds. */
2424 5589 : *predictor = PRED_COMPARE_AND_SWAP;
2425 5589 : return build_one_cst (TREE_TYPE (op0));
2426 : }
2427 : }
2428 : }
2429 :
2430 10206132 : if (code != SSA_NAME)
2431 : return NULL_TREE;
2432 :
2433 7983388 : def = SSA_NAME_DEF_STMT (op0);
2434 :
2435 : /* If we were already here, break the infinite cycle. */
2436 7983388 : bool existed_p;
2437 7983388 : expected_value *res
2438 7983388 : = &ssa_expected_value->get_or_insert (SSA_NAME_VERSION (op0),
2439 : &existed_p);
2440 7983388 : if (existed_p)
2441 : {
2442 1672745 : *probability = res->probability;
2443 1672745 : *predictor = res->predictor;
2444 1672745 : return res->val;
2445 : }
2446 6310643 : res->val = NULL_TREE;
2447 6310643 : res->predictor = *predictor;
2448 6310643 : res->probability = *probability;
2449 :
2450 6310643 : if (gphi *phi = dyn_cast <gphi *> (def))
2451 : {
2452 : /* All the arguments of the PHI node must have the same constant
2453 : length. */
2454 909417 : int i, n = gimple_phi_num_args (phi);
2455 909417 : tree val = NULL;
2456 909417 : bool has_nonzero_edge = false;
2457 :
2458 : /* If we already proved that given edge is unlikely, we do not need
2459 : to handle merging of the probabilities. */
2460 1826034 : for (i = 0; i < n && !has_nonzero_edge; i++)
2461 : {
2462 916617 : tree arg = PHI_ARG_DEF (phi, i);
2463 916617 : if (arg == PHI_RESULT (phi))
2464 0 : continue;
2465 916617 : profile_count cnt = gimple_phi_arg_edge (phi, i)->count ();
2466 927293 : if (!cnt.initialized_p () || cnt.nonzero_p ())
2467 : has_nonzero_edge = true;
2468 : }
2469 :
2470 1487076 : for (i = 0; i < n; i++)
2471 : {
2472 1484074 : tree arg = PHI_ARG_DEF (phi, i);
2473 1484074 : enum br_predictor predictor2;
2474 :
2475 : /* Skip self-referring parameters, since they does not change
2476 : expected value. */
2477 1484074 : if (arg == PHI_RESULT (phi))
2478 577652 : continue;
2479 :
2480 : /* Skip edges which we already predicted as executing
2481 : zero times. */
2482 1484074 : if (has_nonzero_edge)
2483 : {
2484 1479566 : profile_count cnt = gimple_phi_arg_edge (phi, i)->count ();
2485 1479566 : if (cnt.initialized_p () && !cnt.nonzero_p ())
2486 216 : continue;
2487 : }
2488 1483858 : HOST_WIDE_INT probability2;
2489 1483858 : tree new_val = expr_expected_value (arg, &predictor2,
2490 : &probability2);
2491 : /* If we know nothing about value, give up. */
2492 1483858 : if (!new_val)
2493 906415 : return NULL;
2494 :
2495 : /* If this is a first edge, trust its prediction. */
2496 645032 : if (!val)
2497 : {
2498 568980 : val = new_val;
2499 568980 : *predictor = predictor2;
2500 568980 : *probability = probability2;
2501 568980 : continue;
2502 : }
2503 : /* If there are two different values, give up. */
2504 76052 : if (!operand_equal_p (val, new_val, false))
2505 : return NULL;
2506 :
2507 8463 : int p1 = get_predictor_value (*predictor, *probability);
2508 8463 : int p2 = get_predictor_value (predictor2, probability2);
2509 : /* If both predictors agree, it does not matter from which
2510 : edge we enter the basic block. */
2511 8463 : if (*predictor == predictor2 && p1 == p2)
2512 8456 : continue;
2513 : /* The general case has no precise solution, since we do not
2514 : know probabilities of incomming edges, yet.
2515 : Still if value is predicted over all incomming edges, we
2516 : can hope it will be indeed the case. Conservatively
2517 : downgrade prediction quality (so first match merging is not
2518 : performed) and take least successful prediction. */
2519 :
2520 7 : *predictor = PRED_COMBINED_VALUE_PREDICTIONS_PHI;
2521 7 : *probability = MIN (p1, p2);
2522 : }
2523 :
2524 3002 : res = ssa_expected_value->get (SSA_NAME_VERSION (op0));
2525 3002 : res->val = val;
2526 3002 : res->predictor = *predictor;
2527 3002 : res->probability = *probability;
2528 3002 : return val;
2529 : }
2530 5401226 : if (is_gimple_assign (def))
2531 : {
2532 4136878 : if (gimple_assign_lhs (def) != op0)
2533 : return NULL;
2534 :
2535 4136878 : tree val = expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
2536 : gimple_assign_rhs1 (def),
2537 : gimple_assign_rhs_code (def),
2538 : gimple_assign_rhs2 (def),
2539 : predictor, probability);
2540 4136878 : if (val)
2541 : {
2542 49159 : res = ssa_expected_value->get (SSA_NAME_VERSION (op0));
2543 49159 : res->val = val;
2544 49159 : res->predictor = *predictor;
2545 49159 : res->probability = *probability;
2546 : }
2547 4136878 : return val;
2548 : }
2549 :
2550 1264348 : if (is_gimple_call (def))
2551 : {
2552 862623 : tree decl = gimple_call_fndecl (def);
2553 862623 : if (!decl)
2554 : {
2555 78770 : if (gimple_call_internal_p (def)
2556 78770 : && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
2557 : {
2558 32954 : gcc_assert (gimple_call_num_args (def) == 3);
2559 32954 : tree val = gimple_call_arg (def, 0);
2560 32954 : if (TREE_CONSTANT (val))
2561 : return val;
2562 32954 : tree val2 = gimple_call_arg (def, 2);
2563 32954 : gcc_assert (TREE_CODE (val2) == INTEGER_CST
2564 : && tree_fits_uhwi_p (val2)
2565 : && tree_to_uhwi (val2) < END_PREDICTORS);
2566 32954 : *predictor = (enum br_predictor) tree_to_uhwi (val2);
2567 32954 : if (*predictor == PRED_BUILTIN_EXPECT)
2568 8748 : *probability
2569 8748 : = HITRATE (param_builtin_expect_probability);
2570 32954 : val = gimple_call_arg (def, 1);
2571 32954 : res->val = val;
2572 32954 : res->predictor = *predictor;
2573 32954 : res->probability = *probability;
2574 32954 : return val;
2575 : }
2576 : return NULL;
2577 : }
2578 :
2579 783853 : if (DECL_IS_MALLOC (decl) || DECL_IS_OPERATOR_NEW_P (decl))
2580 : {
2581 11974 : if (predictor)
2582 11974 : *predictor = PRED_MALLOC_NONNULL;
2583 : /* FIXME: This is wrong and we need to convert the logic
2584 : to value ranges. This makes predictor to assume that
2585 : malloc always returns (size_t)1 which is not the same
2586 : as returning non-NULL. */
2587 11974 : tree val = fold_convert (type, boolean_true_node);
2588 11974 : res->val = val;
2589 11974 : res->predictor = *predictor;
2590 11974 : res->probability = *probability;
2591 11974 : return val;
2592 : }
2593 :
2594 771879 : if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
2595 417144 : switch (DECL_FUNCTION_CODE (decl))
2596 : {
2597 79544 : case BUILT_IN_EXPECT:
2598 79544 : {
2599 79544 : tree val;
2600 79544 : if (gimple_call_num_args (def) != 2)
2601 : return NULL;
2602 79544 : val = gimple_call_arg (def, 0);
2603 79544 : if (TREE_CONSTANT (val))
2604 : return val;
2605 79544 : *predictor = PRED_BUILTIN_EXPECT;
2606 79544 : *probability
2607 79544 : = HITRATE (param_builtin_expect_probability);
2608 79544 : val = gimple_call_arg (def, 1);
2609 79544 : res->val = val;
2610 79544 : res->predictor = *predictor;
2611 79544 : res->probability = *probability;
2612 79544 : return val;
2613 : }
2614 19 : case BUILT_IN_EXPECT_WITH_PROBABILITY:
2615 19 : {
2616 19 : tree val;
2617 19 : if (gimple_call_num_args (def) != 3)
2618 : return NULL;
2619 19 : val = gimple_call_arg (def, 0);
2620 19 : if (TREE_CONSTANT (val))
2621 : {
2622 0 : res->val = val;
2623 0 : res->predictor = *predictor;
2624 0 : res->probability = *probability;
2625 0 : return val;
2626 : }
2627 : /* Compute final probability as:
2628 : probability * REG_BR_PROB_BASE. */
2629 19 : tree prob = gimple_call_arg (def, 2);
2630 19 : tree t = TREE_TYPE (prob);
2631 19 : tree base = build_int_cst (integer_type_node,
2632 : REG_BR_PROB_BASE);
2633 19 : base = build_real_from_int_cst (t, base);
2634 19 : tree r = fold_build2_initializer_loc (UNKNOWN_LOCATION,
2635 : MULT_EXPR, t, prob, base);
2636 19 : if (TREE_CODE (r) != REAL_CST)
2637 : {
2638 1 : error_at (gimple_location (def),
2639 : "probability %qE must be "
2640 : "constant floating-point expression", prob);
2641 1 : return NULL;
2642 : }
2643 18 : HOST_WIDE_INT probi
2644 18 : = real_to_integer (TREE_REAL_CST_PTR (r));
2645 18 : if (probi >= 0 && probi <= REG_BR_PROB_BASE)
2646 : {
2647 17 : *predictor = PRED_BUILTIN_EXPECT_WITH_PROBABILITY;
2648 17 : *probability = probi;
2649 : }
2650 : else
2651 1 : error_at (gimple_location (def),
2652 : "probability %qE is outside "
2653 : "the range [0.0, 1.0]", prob);
2654 :
2655 18 : val = gimple_call_arg (def, 1);
2656 18 : res->val = val;
2657 18 : res->predictor = *predictor;
2658 18 : res->probability = *probability;
2659 18 : return val;
2660 : }
2661 :
2662 5172 : case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
2663 5172 : case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
2664 5172 : case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
2665 5172 : case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
2666 5172 : case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
2667 5172 : case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
2668 5172 : case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
2669 5172 : case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
2670 5172 : case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
2671 5172 : case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
2672 5172 : case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
2673 5172 : case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
2674 5172 : case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
2675 : /* Assume that any given atomic operation has low contention,
2676 : and thus the compare-and-swap operation succeeds. */
2677 5172 : *predictor = PRED_COMPARE_AND_SWAP;
2678 5172 : res->val = boolean_true_node;
2679 5172 : res->predictor = *predictor;
2680 5172 : res->probability = *probability;
2681 5172 : return boolean_true_node;
2682 1944 : case BUILT_IN_REALLOC:
2683 1944 : case BUILT_IN_GOMP_REALLOC:
2684 1944 : if (predictor)
2685 1944 : *predictor = PRED_MALLOC_NONNULL;
2686 : /* FIXME: This is wrong and we need to convert the logic
2687 : to value ranges. */
2688 1944 : res->val = fold_convert (type, boolean_true_node);
2689 1944 : res->predictor = *predictor;
2690 1944 : res->probability = *probability;
2691 1944 : return res->val;
2692 : default:
2693 : break;
2694 : }
2695 : }
2696 :
2697 1086925 : return NULL;
2698 : }
2699 :
2700 5796703 : if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
2701 : {
2702 5382710 : tree res;
2703 5382710 : tree nop0 = op0;
2704 5382710 : tree nop1 = op1;
2705 :
2706 : /* First handle situation where single op is enough to determine final
2707 : value. In this case we can do better job by avoiding the prediction
2708 : merging. */
2709 5382710 : if (TREE_CODE (op0) != INTEGER_CST)
2710 : {
2711 : /* See if expected value of op0 is good enough to determine the result. */
2712 5358654 : nop0 = expr_expected_value (op0, predictor, probability);
2713 5358654 : if (nop0
2714 146425 : && (res = fold_build2 (code, type, nop0, op1)) != NULL
2715 5505079 : && TREE_CODE (res) == INTEGER_CST)
2716 : /* We are now getting conservative probability. Consider for
2717 : example:
2718 : op0 * op1
2719 : If op0 is 0 with probability p, then we will ignore the
2720 : posibility that op0 != 0 and op1 == 0. It does not seem to be
2721 : worthwhile to downgrade prediciton quality for this. */
2722 : return res;
2723 5218031 : if (!nop0)
2724 5236285 : nop0 = op0;
2725 : }
2726 5242087 : enum br_predictor predictor2 = PRED_UNCONDITIONAL;
2727 5242087 : HOST_WIDE_INT probability2 = -1;
2728 5242087 : if (TREE_CODE (op1) != INTEGER_CST)
2729 : {
2730 : /* See if expected value of op1 is good enough to determine the result. */
2731 1613338 : nop1 = expr_expected_value (op1, &predictor2, &probability2);
2732 1613338 : if (nop1
2733 225824 : && (res = fold_build2 (code, type, op0, nop1)) != NULL
2734 1839162 : && TREE_CODE (res) == INTEGER_CST)
2735 : {
2736 : /* Similarly as above we now get conservative probability. */
2737 57 : *predictor = predictor2;
2738 57 : *probability = probability2;
2739 57 : return res;
2740 : }
2741 1613281 : if (!nop1)
2742 5016263 : nop1 = op1;
2743 : }
2744 : /* We already checked if folding one of arguments to constant is good
2745 : enough. Consequently failing to fold both means that we will not
2746 : succeed determining the value. */
2747 5242030 : if (nop0 == op0 || nop1 == op1)
2748 : return NULL;
2749 : /* Finally see if we have two known values. */
2750 236 : res = fold_build2 (code, type, nop0, nop1);
2751 236 : if (TREE_CODE (res) == INTEGER_CST)
2752 : {
2753 161 : HOST_WIDE_INT p1 = get_predictor_value (*predictor, *probability);
2754 161 : HOST_WIDE_INT p2 = get_predictor_value (predictor2, probability2);
2755 :
2756 : /* If one of predictions is sure, such as PRED_UNCONDITIONAL, we
2757 : can ignore it. */
2758 161 : if (p2 == PROB_ALWAYS)
2759 : return res;
2760 141 : if (p1 == PROB_ALWAYS)
2761 : {
2762 1 : *predictor = predictor2;
2763 1 : *probability = probability2;
2764 1 : return res;
2765 : }
2766 : /* Combine binary predictions.
2767 : Since we do not know about independence of predictors, we
2768 : can not determine value precisely. */
2769 140 : *probability = RDIV (p1 * p2, REG_BR_PROB_BASE);
2770 : /* If we no longer track useful information, give up. */
2771 140 : if (!*probability)
2772 : return NULL;
2773 : /* Otherwise mark that prediction is a result of combining
2774 : different heuristics, since we do not want it to participate
2775 : in first match merging. It is no longer reliable since
2776 : we do not know if the probabilities are indpenendet. */
2777 140 : *predictor = PRED_COMBINED_VALUE_PREDICTIONS;
2778 :
2779 140 : return res;
2780 : }
2781 : return NULL;
2782 : }
2783 413993 : if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
2784 : {
2785 412881 : tree res;
2786 412881 : op0 = expr_expected_value (op0, predictor, probability);
2787 412881 : if (!op0)
2788 : return NULL;
2789 37842 : res = fold_build1 (code, type, op0);
2790 37842 : if (TREE_CONSTANT (res))
2791 : return res;
2792 : return NULL;
2793 : }
2794 : return NULL;
2795 : }
2796 :
2797 : /* Return constant EXPR will likely have at execution time, NULL if unknown.
2798 : The function is used by builtin_expect branch predictor so the evidence
2799 : must come from this construct and additional possible constant folding.
2800 :
2801 : We may want to implement more involved value guess (such as value range
2802 : propagation based prediction), but such tricks shall go to new
2803 : implementation. */
2804 :
2805 : static tree
2806 8896515 : expr_expected_value (tree expr, enum br_predictor *predictor,
2807 : HOST_WIDE_INT *probability)
2808 : {
2809 8896515 : enum tree_code code;
2810 8896515 : tree op0, op1;
2811 :
2812 8896515 : if (TREE_CONSTANT (expr))
2813 : {
2814 867999 : *predictor = PRED_UNCONDITIONAL;
2815 867999 : *probability = -1;
2816 867999 : return expr;
2817 : }
2818 :
2819 8028516 : extract_ops_from_tree (expr, &code, &op0, &op1);
2820 8028516 : return expr_expected_value_1 (TREE_TYPE (expr),
2821 8028516 : op0, code, op1, predictor, probability);
2822 : }
2823 :
2824 :
2825 : /* Return probability of a PREDICTOR. If the predictor has variable
2826 : probability return passed PROBABILITY. */
2827 :
2828 : static HOST_WIDE_INT
2829 155906 : get_predictor_value (br_predictor predictor, HOST_WIDE_INT probability)
2830 : {
2831 155906 : switch (predictor)
2832 : {
2833 88636 : case PRED_BUILTIN_EXPECT:
2834 88636 : case PRED_BUILTIN_EXPECT_WITH_PROBABILITY:
2835 88636 : case PRED_COMBINED_VALUE_PREDICTIONS_PHI:
2836 88636 : case PRED_COMBINED_VALUE_PREDICTIONS:
2837 88636 : gcc_assert (probability != -1);
2838 : return probability;
2839 67270 : default:
2840 67270 : gcc_assert (probability == -1);
2841 67270 : return predictor_info[(int) predictor].hitrate;
2842 : }
2843 : }
2844 :
2845 : /* Predict using opcode of the last statement in basic block. */
2846 : static void
2847 11405265 : tree_predict_by_opcode (basic_block bb)
2848 : {
2849 11405265 : edge then_edge;
2850 11405265 : tree op0, op1;
2851 11405265 : tree type;
2852 11405265 : tree val;
2853 11405265 : enum tree_code cmp;
2854 11405265 : edge_iterator ei;
2855 11405265 : enum br_predictor predictor;
2856 11405265 : HOST_WIDE_INT probability;
2857 :
2858 11405265 : gimple *stmt = *gsi_last_bb (bb);
2859 11405265 : if (!stmt)
2860 7558733 : return;
2861 :
2862 10937408 : if (gswitch *sw = dyn_cast <gswitch *> (stmt))
2863 : {
2864 27784 : tree index = gimple_switch_index (sw);
2865 27784 : tree val = expr_expected_value (index, &predictor, &probability);
2866 27784 : if (val && TREE_CODE (val) == INTEGER_CST)
2867 : {
2868 4 : edge e = find_taken_edge_switch_expr (sw, val);
2869 4 : if (predictor == PRED_BUILTIN_EXPECT)
2870 : {
2871 4 : int percent = param_builtin_expect_probability;
2872 4 : gcc_assert (percent >= 0 && percent <= 100);
2873 4 : predict_edge (e, PRED_BUILTIN_EXPECT,
2874 4 : HITRATE (percent));
2875 : }
2876 : else
2877 0 : predict_edge_def (e, predictor, TAKEN);
2878 : }
2879 : }
2880 :
2881 10937408 : if (gimple_code (stmt) != GIMPLE_COND)
2882 : return;
2883 3978090 : FOR_EACH_EDGE (then_edge, ei, bb->succs)
2884 3978090 : if (then_edge->flags & EDGE_TRUE_VALUE)
2885 : break;
2886 3846532 : op0 = gimple_cond_lhs (stmt);
2887 3846532 : op1 = gimple_cond_rhs (stmt);
2888 3846532 : cmp = gimple_cond_code (stmt);
2889 3846532 : type = TREE_TYPE (op0);
2890 3846532 : val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1,
2891 : &predictor, &probability);
2892 3846532 : if (val && TREE_CODE (val) == INTEGER_CST)
2893 : {
2894 138658 : HOST_WIDE_INT prob = get_predictor_value (predictor, probability);
2895 138658 : if (integer_zerop (val))
2896 113010 : prob = REG_BR_PROB_BASE - prob;
2897 138658 : predict_edge (then_edge, predictor, prob);
2898 : }
2899 : /* Try "pointer heuristic."
2900 : A comparison ptr == 0 is predicted as false.
2901 : Similarly, a comparison ptr1 == ptr2 is predicted as false. */
2902 3846532 : if (POINTER_TYPE_P (type))
2903 : {
2904 565733 : if (cmp == EQ_EXPR)
2905 230260 : predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2906 335473 : else if (cmp == NE_EXPR)
2907 316154 : predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2908 : }
2909 : else
2910 :
2911 : /* Try "opcode heuristic."
2912 : EQ tests are usually false and NE tests are usually true. Also,
2913 : most quantities are positive, so we can make the appropriate guesses
2914 : about signed comparisons against zero. */
2915 3280799 : switch (cmp)
2916 : {
2917 881225 : case EQ_EXPR:
2918 881225 : case UNEQ_EXPR:
2919 : /* Floating point comparisons appears to behave in a very
2920 : unpredictable way because of special role of = tests in
2921 : FP code. */
2922 881225 : if (FLOAT_TYPE_P (type))
2923 : ;
2924 : /* Comparisons with 0 are often used for booleans and there is
2925 : nothing useful to predict about them. */
2926 869459 : else if (integer_zerop (op0) || integer_zerop (op1))
2927 : ;
2928 : else
2929 347012 : predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2930 : break;
2931 :
2932 1558660 : case NE_EXPR:
2933 1558660 : case LTGT_EXPR:
2934 : /* Floating point comparisons appears to behave in a very
2935 : unpredictable way because of special role of = tests in
2936 : FP code. */
2937 1558660 : if (FLOAT_TYPE_P (type))
2938 : ;
2939 : /* Comparisons with 0 are often used for booleans and there is
2940 : nothing useful to predict about them. */
2941 1442923 : else if (integer_zerop (op0)
2942 1442923 : || integer_zerop (op1))
2943 : ;
2944 : else
2945 530696 : predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2946 : break;
2947 :
2948 2305 : case ORDERED_EXPR:
2949 2305 : predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2950 2305 : break;
2951 :
2952 2502 : case UNORDERED_EXPR:
2953 2502 : predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2954 2502 : break;
2955 :
2956 346667 : case LE_EXPR:
2957 346667 : case LT_EXPR:
2958 346667 : if (integer_zerop (op1)
2959 284003 : || integer_onep (op1)
2960 276862 : || integer_all_onesp (op1)
2961 276807 : || real_zerop (op1)
2962 273594 : || real_onep (op1)
2963 619756 : || real_minus_onep (op1))
2964 73582 : predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2965 : break;
2966 :
2967 481326 : case GE_EXPR:
2968 481326 : case GT_EXPR:
2969 481326 : if (integer_zerop (op1)
2970 417643 : || integer_onep (op1)
2971 406122 : || integer_all_onesp (op1)
2972 405848 : || real_zerop (op1)
2973 404297 : || real_onep (op1)
2974 884834 : || real_minus_onep (op1))
2975 77842 : predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2976 : break;
2977 :
2978 : default:
2979 : break;
2980 : }
2981 : }
2982 :
2983 : /* Returns TRUE if the STMT is exit(0) like statement. */
2984 :
2985 : static bool
2986 830264 : is_exit_with_zero_arg (const gimple *stmt)
2987 : {
2988 : /* This is not exit, _exit or _Exit. */
2989 830264 : if (!gimple_call_builtin_p (stmt, BUILT_IN_EXIT)
2990 826406 : && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT)
2991 1656648 : && !gimple_call_builtin_p (stmt, BUILT_IN__EXIT2))
2992 : return false;
2993 :
2994 : /* Argument is an interger zero. */
2995 3880 : return integer_zerop (gimple_call_arg (stmt, 0));
2996 : }
2997 :
2998 : /* Try to guess whether the value of return means error code. */
2999 :
3000 : static enum br_predictor
3001 707319 : return_prediction (tree val, enum prediction *prediction)
3002 : {
3003 : /* VOID. */
3004 707319 : if (!val)
3005 : return PRED_NO_PREDICTION;
3006 : /* Different heuristics for pointers and scalars. */
3007 707319 : if (POINTER_TYPE_P (TREE_TYPE (val)))
3008 : {
3009 : /* NULL is usually not returned. */
3010 144537 : if (integer_zerop (val))
3011 : {
3012 32308 : *prediction = NOT_TAKEN;
3013 32308 : return PRED_NULL_RETURN;
3014 : }
3015 : }
3016 562782 : else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
3017 : {
3018 : /* Negative return values are often used to indicate
3019 : errors. */
3020 460931 : if (TREE_CODE (val) == INTEGER_CST
3021 460931 : && tree_int_cst_sgn (val) < 0)
3022 : {
3023 12984 : *prediction = NOT_TAKEN;
3024 12984 : return PRED_NEGATIVE_RETURN;
3025 : }
3026 : /* Constant return values seems to be commonly taken.
3027 : Zero/one often represent booleans so exclude them from the
3028 : heuristics. */
3029 447947 : if (TREE_CONSTANT (val)
3030 447947 : && (!integer_zerop (val) && !integer_onep (val)))
3031 : {
3032 75065 : *prediction = NOT_TAKEN;
3033 75065 : return PRED_CONST_RETURN;
3034 : }
3035 : }
3036 : return PRED_NO_PREDICTION;
3037 : }
3038 :
3039 : /* Return zero if phi result could have values other than -1, 0 or 1,
3040 : otherwise return a bitmask, with bits 0, 1 and 2 set if -1, 0 and 1
3041 : values are used or likely. */
3042 :
3043 : static int
3044 62819 : zero_one_minusone (gphi *phi, int limit)
3045 : {
3046 62819 : int phi_num_args = gimple_phi_num_args (phi);
3047 62819 : int ret = 0;
3048 210005 : for (int i = 0; i < phi_num_args; i++)
3049 : {
3050 157739 : tree t = PHI_ARG_DEF (phi, i);
3051 157739 : if (TREE_CODE (t) != INTEGER_CST)
3052 71630 : continue;
3053 86109 : wide_int w = wi::to_wide (t);
3054 86109 : if (w == -1)
3055 5293 : ret |= 1;
3056 80816 : else if (w == 0)
3057 40315 : ret |= 2;
3058 40501 : else if (w == 1)
3059 29948 : ret |= 4;
3060 : else
3061 10553 : return 0;
3062 86109 : }
3063 120052 : for (int i = 0; i < phi_num_args; i++)
3064 : {
3065 103241 : tree t = PHI_ARG_DEF (phi, i);
3066 103241 : if (TREE_CODE (t) == INTEGER_CST)
3067 66054 : continue;
3068 37187 : if (TREE_CODE (t) != SSA_NAME)
3069 : return 0;
3070 37187 : gimple *g = SSA_NAME_DEF_STMT (t);
3071 37187 : if (gimple_code (g) == GIMPLE_PHI && limit > 0)
3072 12340 : if (int r = zero_one_minusone (as_a <gphi *> (g), limit - 1))
3073 : {
3074 443 : ret |= r;
3075 443 : continue;
3076 : }
3077 36744 : if (!is_gimple_assign (g))
3078 : return 0;
3079 13577 : if (gimple_assign_cast_p (g))
3080 : {
3081 3520 : tree rhs1 = gimple_assign_rhs1 (g);
3082 3520 : if (TREE_CODE (rhs1) != SSA_NAME
3083 3520 : || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
3084 3442 : || TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
3085 4810 : || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3086 : return 0;
3087 1289 : ret |= (2 | 4);
3088 1289 : continue;
3089 1289 : }
3090 10057 : if (TREE_CODE_CLASS (gimple_assign_rhs_code (g)) != tcc_comparison)
3091 : return 0;
3092 0 : ret |= (2 | 4);
3093 : }
3094 : return ret;
3095 : }
3096 :
3097 : /* Find the basic block with return expression and look up for possible
3098 : return value trying to apply RETURN_PREDICTION heuristics. */
3099 : static void
3100 2411524 : apply_return_prediction (void)
3101 : {
3102 2411524 : greturn *return_stmt = NULL;
3103 2411524 : tree return_val;
3104 2411524 : edge e;
3105 2411524 : gphi *phi;
3106 2411524 : int phi_num_args, i;
3107 2411524 : enum br_predictor pred;
3108 2411524 : enum prediction direction;
3109 2411524 : edge_iterator ei;
3110 :
3111 2474700 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3112 : {
3113 4953548 : if (greturn *last = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src)))
3114 : {
3115 : return_stmt = last;
3116 : break;
3117 : }
3118 : }
3119 2411524 : if (!e)
3120 2231553 : return;
3121 2382380 : return_val = gimple_return_retval (return_stmt);
3122 2382380 : if (!return_val)
3123 : return;
3124 1340843 : if (TREE_CODE (return_val) != SSA_NAME
3125 987025 : || !SSA_NAME_DEF_STMT (return_val)
3126 2327868 : || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
3127 : return;
3128 180575 : phi = as_a <gphi *> (SSA_NAME_DEF_STMT (return_val));
3129 180575 : phi_num_args = gimple_phi_num_args (phi);
3130 180575 : pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
3131 :
3132 : /* Avoid the case where the function returns -1, 0 and 1 values and
3133 : nothing else. Those could be qsort etc. comparison functions
3134 : where the negative return isn't less probable than positive.
3135 : For this require that the function returns at least -1 or 1
3136 : or -1 and a boolean value or comparison result, so that functions
3137 : returning just -1 and 0 are treated as if -1 represents error value. */
3138 359594 : if (INTEGRAL_TYPE_P (TREE_TYPE (return_val))
3139 127239 : && !TYPE_UNSIGNED (TREE_TYPE (return_val))
3140 231054 : && TYPE_PRECISION (TREE_TYPE (return_val)) > 1)
3141 50479 : if (int r = zero_one_minusone (phi, 3))
3142 16368 : if ((r & (1 | 4)) == (1 | 4))
3143 : return;
3144 :
3145 : /* Avoid the degenerate case where all return values form the function
3146 : belongs to same category (ie they are all positive constants)
3147 : so we can hardly say something about them. */
3148 577712 : for (i = 1; i < phi_num_args; i++)
3149 429636 : if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
3150 : break;
3151 179971 : if (i != phi_num_args)
3152 129003 : for (i = 0; i < phi_num_args; i++)
3153 : {
3154 97108 : pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
3155 97108 : if (pred != PRED_NO_PREDICTION)
3156 50763 : predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
3157 : direction);
3158 : }
3159 : }
3160 :
3161 : /* Look for basic block that contains unlikely to happen events
3162 : (such as noreturn calls) and mark all paths leading to execution
3163 : of this basic blocks as unlikely. */
3164 :
3165 : static void
3166 2411524 : tree_bb_level_predictions (void)
3167 : {
3168 2411524 : basic_block bb;
3169 2411524 : bool has_return_edges = false;
3170 2411524 : edge e;
3171 2411524 : edge_iterator ei;
3172 :
3173 2480847 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3174 2450441 : if (!unlikely_executed_edge_p (e) && !(e->flags & EDGE_ABNORMAL_CALL))
3175 : {
3176 : has_return_edges = true;
3177 : break;
3178 : }
3179 :
3180 2411524 : apply_return_prediction ();
3181 :
3182 13678025 : FOR_EACH_BB_FN (bb, cfun)
3183 : {
3184 11266501 : gimple_stmt_iterator gsi;
3185 :
3186 89053495 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3187 : {
3188 66520493 : gimple *stmt = gsi_stmt (gsi);
3189 66520493 : tree decl;
3190 :
3191 66520493 : if (is_gimple_call (stmt))
3192 : {
3193 6091228 : if (gimple_call_noreturn_p (stmt)
3194 894343 : && has_return_edges
3195 6921492 : && !is_exit_with_zero_arg (stmt))
3196 827730 : predict_paths_leading_to (bb, PRED_NORETURN,
3197 : NOT_TAKEN);
3198 6091228 : decl = gimple_call_fndecl (stmt);
3199 6091228 : if (decl
3200 11806854 : && lookup_attribute ("cold",
3201 5715626 : DECL_ATTRIBUTES (decl)))
3202 515032 : predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
3203 : NOT_TAKEN);
3204 6091228 : if (decl && recursive_call_p (current_function_decl, decl))
3205 9671 : predict_paths_leading_to (bb, PRED_RECURSIVE_CALL,
3206 : NOT_TAKEN);
3207 : }
3208 60429265 : else if (gimple_code (stmt) == GIMPLE_PREDICT)
3209 : {
3210 459459 : predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
3211 : gimple_predict_outcome (stmt));
3212 : /* Keep GIMPLE_PREDICT around so early inlining will propagate
3213 : hints to callers. */
3214 : }
3215 : }
3216 : }
3217 2411524 : }
3218 :
3219 : /* Callback for hash_map::traverse, asserts that the pointer map is
3220 : empty. */
3221 :
3222 : bool
3223 3574364 : assert_is_empty (const_basic_block const &, edge_prediction *const &value,
3224 : void *)
3225 : {
3226 3574364 : gcc_assert (!value);
3227 3574364 : return true;
3228 : }
3229 :
3230 : /* Predict branch probabilities and estimate profile for basic block BB.
3231 : When LOCAL_ONLY is set do not use any global properties of CFG. */
3232 :
3233 : static void
3234 11405265 : tree_estimate_probability_bb (basic_block bb, bool local_only)
3235 : {
3236 11405265 : edge e;
3237 11405265 : edge_iterator ei;
3238 :
3239 27337810 : FOR_EACH_EDGE (e, ei, bb->succs)
3240 : {
3241 : /* Look for block we are guarding (ie we dominate it,
3242 : but it doesn't postdominate us). */
3243 12590452 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
3244 12590436 : && !local_only
3245 12422485 : && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
3246 24196032 : && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
3247 : {
3248 6415791 : gimple_stmt_iterator bi;
3249 :
3250 : /* The call heuristic claims that a guarded function call
3251 : is improbable. This is because such calls are often used
3252 : to signal exceptional situations such as printing error
3253 : messages. */
3254 36460032 : for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
3255 23628450 : gsi_next (&bi))
3256 : {
3257 26050538 : gimple *stmt = gsi_stmt (bi);
3258 26050538 : if (is_gimple_call (stmt)
3259 3033111 : && !gimple_inexpensive_call_p (as_a <gcall *> (stmt))
3260 : /* Constant and pure calls are hardly used to signalize
3261 : something exceptional. */
3262 28824477 : && gimple_has_side_effects (stmt))
3263 : {
3264 2422088 : if (gimple_call_fndecl (stmt))
3265 2359282 : predict_edge_def (e, PRED_CALL, NOT_TAKEN);
3266 62806 : else if (virtual_method_call_p (gimple_call_fn (stmt)))
3267 13464 : predict_edge_def (e, PRED_POLYMORPHIC_CALL, NOT_TAKEN);
3268 : else
3269 49342 : predict_edge_def (e, PRED_INDIR_CALL, TAKEN);
3270 : break;
3271 : }
3272 : }
3273 : }
3274 : }
3275 11405265 : tree_predict_by_opcode (bb);
3276 11405265 : }
3277 :
3278 : /* Predict branch probabilities and estimate profile of the tree CFG.
3279 : This function can be called from the loop optimizers to recompute
3280 : the profile information.
3281 : If DRY_RUN is set, do not modify CFG and only produce dump files. */
3282 :
3283 : void
3284 2411524 : tree_estimate_probability (bool dry_run)
3285 : {
3286 2411524 : basic_block bb;
3287 :
3288 2411524 : connect_infinite_loops_to_exit ();
3289 : /* We use loop_niter_by_eval, which requires that the loops have
3290 : preheaders. */
3291 2411524 : create_preheaders (CP_SIMPLE_PREHEADERS);
3292 2411524 : calculate_dominance_info (CDI_POST_DOMINATORS);
3293 : /* Decide which edges are known to be unlikely. This improves later
3294 : branch prediction. */
3295 2411524 : if (!dry_run)
3296 2411512 : determine_unlikely_bbs ();
3297 :
3298 2411524 : bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
3299 2411524 : ssa_expected_value = new hash_map<int_hash<unsigned, 0>, expected_value>;
3300 :
3301 2411524 : tree_bb_level_predictions ();
3302 2411524 : record_loop_exits ();
3303 :
3304 4823048 : if (number_of_loops (cfun) > 1)
3305 273297 : predict_loops ();
3306 :
3307 13678025 : FOR_EACH_BB_FN (bb, cfun)
3308 11266501 : tree_estimate_probability_bb (bb, false);
3309 :
3310 13678025 : FOR_EACH_BB_FN (bb, cfun)
3311 11266501 : combine_predictions_for_bb (bb, dry_run);
3312 :
3313 2411524 : if (flag_checking)
3314 5985860 : bb_predictions->traverse<void *, assert_is_empty> (NULL);
3315 :
3316 4823048 : delete bb_predictions;
3317 2411524 : bb_predictions = NULL;
3318 4823048 : delete ssa_expected_value;
3319 2411524 : ssa_expected_value = NULL;
3320 :
3321 2411524 : if (!dry_run
3322 2411512 : && profile_status_for_fn (cfun) != PROFILE_READ)
3323 2411512 : estimate_bb_frequencies ();
3324 2411524 : free_dominance_info (CDI_POST_DOMINATORS);
3325 2411524 : remove_fake_exit_edges ();
3326 2411524 : }
3327 :
3328 : /* Set edge->probability for each successor edge of BB. */
3329 : void
3330 138764 : tree_guess_outgoing_edge_probabilities (basic_block bb)
3331 : {
3332 138764 : bb_predictions = new hash_map<const_basic_block, edge_prediction *>;
3333 138764 : ssa_expected_value = new hash_map<int_hash<unsigned, 0>, expected_value>;
3334 138764 : tree_estimate_probability_bb (bb, true);
3335 138764 : combine_predictions_for_bb (bb, false);
3336 138764 : if (flag_checking)
3337 138764 : bb_predictions->traverse<void *, assert_is_empty> (NULL);
3338 277528 : delete bb_predictions;
3339 138764 : bb_predictions = NULL;
3340 277528 : delete ssa_expected_value;
3341 138764 : ssa_expected_value = NULL;
3342 138764 : }
3343 :
3344 : /* Filter function predicate that returns true for a edge predicate P
3345 : if its edge is equal to DATA. */
3346 :
3347 : static bool
3348 8 : not_loop_guard_equal_edge_p (edge_prediction *p, void *data)
3349 : {
3350 8 : return p->ep_edge != (edge)data || p->ep_predictor != PRED_LOOP_GUARD;
3351 : }
3352 :
3353 : /* Predict edge E with PRED unless it is already predicted by some predictor
3354 : considered equivalent. */
3355 :
3356 : static void
3357 1067485 : maybe_predict_edge (edge e, enum br_predictor pred, enum prediction taken)
3358 : {
3359 1067485 : if (edge_predicted_by_p (e, pred, taken))
3360 : return;
3361 1063091 : if (pred == PRED_LOOP_GUARD
3362 1063091 : && edge_predicted_by_p (e, PRED_LOOP_GUARD_WITH_RECURSION, taken))
3363 : return;
3364 : /* Consider PRED_LOOP_GUARD_WITH_RECURSION superrior to LOOP_GUARD. */
3365 1063083 : if (pred == PRED_LOOP_GUARD_WITH_RECURSION)
3366 : {
3367 15 : edge_prediction **preds = bb_predictions->get (e->src);
3368 15 : if (preds)
3369 4 : filter_predictions (preds, not_loop_guard_equal_edge_p, e);
3370 : }
3371 1063083 : predict_edge_def (e, pred, taken);
3372 : }
3373 : /* Predict edges to successors of CUR whose sources are not postdominated by
3374 : BB by PRED and recurse to all postdominators. */
3375 :
3376 : static void
3377 2366546 : predict_paths_for_bb (basic_block cur, basic_block bb,
3378 : enum br_predictor pred,
3379 : enum prediction taken,
3380 : bitmap visited, class loop *in_loop = NULL)
3381 : {
3382 2366546 : edge e;
3383 2366546 : edge_iterator ei;
3384 2366546 : basic_block son;
3385 :
3386 : /* If we exited the loop or CUR is unconditional in the loop, there is
3387 : nothing to do. */
3388 2366546 : if (in_loop
3389 2366546 : && (!flow_bb_inside_loop_p (in_loop, cur)
3390 72825 : || dominated_by_p (CDI_DOMINATORS, in_loop->latch, cur)))
3391 10948 : return;
3392 :
3393 : /* We are looking for all edges forming edge cut induced by
3394 : set of all blocks postdominated by BB. */
3395 5224009 : FOR_EACH_EDGE (e, ei, cur->preds)
3396 2868411 : if (e->src->index >= NUM_FIXED_BLOCKS
3397 2868411 : && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
3398 : {
3399 2212043 : edge e2;
3400 2212043 : edge_iterator ei2;
3401 2212043 : bool found = false;
3402 :
3403 : /* Ignore fake edges and eh, we predict them as not taken anyway. */
3404 2212043 : if (unlikely_executed_edge_p (e))
3405 1148075 : continue;
3406 1063968 : gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
3407 :
3408 : /* See if there is an edge from e->src that is not abnormal
3409 : and does not lead to BB and does not exit the loop. */
3410 1940496 : FOR_EACH_EDGE (e2, ei2, e->src->succs)
3411 1911058 : if (e2 != e
3412 1070768 : && !unlikely_executed_edge_p (e2)
3413 1043389 : && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb)
3414 2948271 : && (!in_loop || !loop_exit_edge_p (in_loop, e2)))
3415 : {
3416 : found = true;
3417 : break;
3418 : }
3419 :
3420 : /* If there is non-abnormal path leaving e->src, predict edge
3421 : using predictor. Otherwise we need to look for paths
3422 : leading to e->src.
3423 :
3424 : The second may lead to infinite loop in the case we are predicitng
3425 : regions that are only reachable by abnormal edges. We simply
3426 : prevent visiting given BB twice. */
3427 1063968 : if (found)
3428 1034530 : maybe_predict_edge (e, pred, taken);
3429 29438 : else if (bitmap_set_bit (visited, e->src->index))
3430 29394 : predict_paths_for_bb (e->src, e->src, pred, taken, visited, in_loop);
3431 : }
3432 2355598 : for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
3433 2812151 : son;
3434 456553 : son = next_dom_son (CDI_POST_DOMINATORS, son))
3435 456553 : predict_paths_for_bb (son, bb, pred, taken, visited, in_loop);
3436 : }
3437 :
3438 : /* Sets branch probabilities according to PREDiction and
3439 : FLAGS. */
3440 :
3441 : static void
3442 1811892 : predict_paths_leading_to (basic_block bb, enum br_predictor pred,
3443 : enum prediction taken, class loop *in_loop)
3444 : {
3445 1811892 : predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
3446 1811892 : }
3447 :
3448 : /* Like predict_paths_leading_to but take edge instead of basic block. */
3449 :
3450 : static void
3451 101662 : predict_paths_leading_to_edge (edge e, enum br_predictor pred,
3452 : enum prediction taken, class loop *in_loop)
3453 : {
3454 101662 : bool has_nonloop_edge = false;
3455 101662 : edge_iterator ei;
3456 101662 : edge e2;
3457 :
3458 101662 : basic_block bb = e->src;
3459 191053 : FOR_EACH_EDGE (e2, ei, bb->succs)
3460 122346 : if (e2->dest != e->src && e2->dest != e->dest
3461 44558 : && !unlikely_executed_edge_p (e2)
3462 164306 : && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
3463 : {
3464 : has_nonloop_edge = true;
3465 : break;
3466 : }
3467 :
3468 101662 : if (!has_nonloop_edge)
3469 68707 : predict_paths_for_bb (bb, bb, pred, taken, auto_bitmap (), in_loop);
3470 : else
3471 32955 : maybe_predict_edge (e, pred, taken);
3472 101662 : }
3473 :
3474 : /* This is used to carry information about basic blocks. It is
3475 : attached to the AUX field of the standard CFG block. */
3476 :
3477 : class block_info
3478 : {
3479 : public:
3480 : /* Estimated frequency of execution of basic_block. */
3481 : sreal frequency;
3482 :
3483 : /* To keep queue of basic blocks to process. */
3484 : basic_block next;
3485 :
3486 : /* Number of predecessors we need to visit first. */
3487 : int npredecessors;
3488 : };
3489 :
3490 : /* Similar information for edges. */
3491 : class edge_prob_info
3492 : {
3493 : public:
3494 : /* In case edge is a loopback edge, the probability edge will be reached
3495 : in case header is. Estimated number of iterations of the loop can be
3496 : then computed as 1 / (1 - back_edge_prob). */
3497 : sreal back_edge_prob;
3498 : /* True if the edge is a loopback edge in the natural loop. */
3499 : unsigned int back_edge:1;
3500 : };
3501 :
3502 : #define BLOCK_INFO(B) ((block_info *) (B)->aux)
3503 : #undef EDGE_INFO
3504 : #define EDGE_INFO(E) ((edge_prob_info *) (E)->aux)
3505 :
3506 : /* Helper function for estimate_bb_frequencies.
3507 : Propagate the frequencies in blocks marked in
3508 : TOVISIT, starting in HEAD. */
3509 :
3510 : static void
3511 3265910 : propagate_freq (basic_block head, bitmap tovisit,
3512 : sreal max_cyclic_prob)
3513 : {
3514 3265910 : basic_block bb;
3515 3265910 : basic_block last;
3516 3265910 : unsigned i;
3517 3265910 : edge e;
3518 3265910 : basic_block nextbb;
3519 3265910 : bitmap_iterator bi;
3520 :
3521 : /* For each basic block we need to visit count number of his predecessors
3522 : we need to visit first. */
3523 26689601 : EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
3524 : {
3525 23423691 : edge_iterator ei;
3526 23423691 : int count = 0;
3527 :
3528 23423691 : bb = BASIC_BLOCK_FOR_FN (cfun, i);
3529 :
3530 52213713 : FOR_EACH_EDGE (e, ei, bb->preds)
3531 : {
3532 28790022 : bool visit = bitmap_bit_p (tovisit, e->src->index);
3533 :
3534 28790022 : if (visit && !(e->flags & EDGE_DFS_BACK))
3535 26100980 : count++;
3536 1895888 : else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
3537 3 : fprintf (dump_file,
3538 : "Irreducible region hit, ignoring edge to %i->%i\n",
3539 3 : e->src->index, bb->index);
3540 : }
3541 23423691 : BLOCK_INFO (bb)->npredecessors = count;
3542 : /* When function never returns, we will never process exit block. */
3543 23423691 : if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
3544 0 : bb->count = profile_count::zero ();
3545 : }
3546 :
3547 3265910 : BLOCK_INFO (head)->frequency = 1;
3548 3265910 : last = head;
3549 26689601 : for (bb = head; bb; bb = nextbb)
3550 : {
3551 23423691 : edge_iterator ei;
3552 23423691 : sreal cyclic_probability = 0;
3553 23423691 : sreal frequency = 0;
3554 :
3555 23423691 : nextbb = BLOCK_INFO (bb)->next;
3556 23423691 : BLOCK_INFO (bb)->next = NULL;
3557 :
3558 : /* Compute frequency of basic block. */
3559 23423691 : if (bb != head)
3560 : {
3561 20157781 : if (flag_checking)
3562 47360807 : FOR_EACH_EDGE (e, ei, bb->preds)
3563 27203415 : gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
3564 : || (e->flags & EDGE_DFS_BACK));
3565 :
3566 47361757 : FOR_EACH_EDGE (e, ei, bb->preds)
3567 27203976 : if (EDGE_INFO (e)->back_edge)
3568 1097945 : cyclic_probability += EDGE_INFO (e)->back_edge_prob;
3569 26106031 : else if (!(e->flags & EDGE_DFS_BACK))
3570 : {
3571 : /* FIXME: Graphite is producing edges with no profile. Once
3572 : this is fixed, drop this. */
3573 26100980 : sreal tmp = e->probability.initialized_p () ?
3574 26100980 : e->probability.to_sreal () : 0;
3575 26100980 : frequency += tmp * BLOCK_INFO (e->src)->frequency;
3576 : }
3577 :
3578 20157781 : if (cyclic_probability == 0)
3579 : {
3580 19087024 : BLOCK_INFO (bb)->frequency = frequency;
3581 : }
3582 : else
3583 : {
3584 1070757 : if (cyclic_probability > max_cyclic_prob)
3585 : {
3586 9771 : if (dump_file)
3587 252 : fprintf (dump_file,
3588 : "cyclic probability of bb %i is %f (capped to %f)"
3589 : "; turning freq %f",
3590 : bb->index, cyclic_probability.to_double (),
3591 : max_cyclic_prob.to_double (),
3592 : frequency.to_double ());
3593 :
3594 9771 : cyclic_probability = max_cyclic_prob;
3595 : }
3596 1060986 : else if (dump_file)
3597 111 : fprintf (dump_file,
3598 : "cyclic probability of bb %i is %f; turning freq %f",
3599 : bb->index, cyclic_probability.to_double (),
3600 : frequency.to_double ());
3601 :
3602 1070757 : BLOCK_INFO (bb)->frequency = frequency
3603 1070757 : / (sreal (1) - cyclic_probability);
3604 1070757 : if (dump_file)
3605 363 : fprintf (dump_file, " to %f\n",
3606 363 : BLOCK_INFO (bb)->frequency.to_double ());
3607 : }
3608 : }
3609 :
3610 23423691 : bitmap_clear_bit (tovisit, bb->index);
3611 :
3612 23423691 : e = find_edge (bb, head);
3613 23423691 : if (e)
3614 : {
3615 : /* FIXME: Graphite is producing edges with no profile. Once
3616 : this is fixed, drop this. */
3617 792892 : sreal tmp = e->probability.initialized_p () ?
3618 792892 : e->probability.to_sreal () : 0;
3619 792892 : EDGE_INFO (e)->back_edge_prob = tmp * BLOCK_INFO (bb)->frequency;
3620 : }
3621 :
3622 : /* Propagate to successor blocks. */
3623 52777819 : FOR_EACH_EDGE (e, ei, bb->succs)
3624 29354128 : if (!(e->flags & EDGE_DFS_BACK)
3625 27457015 : && BLOCK_INFO (e->dest)->npredecessors)
3626 : {
3627 26100980 : BLOCK_INFO (e->dest)->npredecessors--;
3628 26100980 : if (!BLOCK_INFO (e->dest)->npredecessors)
3629 : {
3630 20157781 : if (!nextbb)
3631 : nextbb = e->dest;
3632 : else
3633 7600959 : BLOCK_INFO (last)->next = e->dest;
3634 :
3635 : last = e->dest;
3636 : }
3637 : }
3638 : }
3639 3265910 : }
3640 :
3641 : /* Estimate frequencies in loops at same nest level. */
3642 :
3643 : static void
3644 1102005 : estimate_loops_at_level (class loop *first_loop, sreal max_cyclic_prob)
3645 : {
3646 1102005 : class loop *loop;
3647 :
3648 1894897 : for (loop = first_loop; loop; loop = loop->next)
3649 : {
3650 792892 : edge e;
3651 792892 : basic_block *bbs;
3652 792892 : unsigned i;
3653 792892 : auto_bitmap tovisit;
3654 :
3655 792892 : estimate_loops_at_level (loop->inner, max_cyclic_prob);
3656 :
3657 : /* Find current loop back edge and mark it. */
3658 792892 : e = loop_latch_edge (loop);
3659 792892 : EDGE_INFO (e)->back_edge = 1;
3660 :
3661 792892 : bbs = get_loop_body (loop);
3662 5923367 : for (i = 0; i < loop->num_nodes; i++)
3663 4337583 : bitmap_set_bit (tovisit, bbs[i]->index);
3664 792892 : free (bbs);
3665 792892 : propagate_freq (loop->header, tovisit, max_cyclic_prob);
3666 792892 : }
3667 1102005 : }
3668 :
3669 : /* Propagates frequencies through structure of loops. */
3670 :
3671 : static void
3672 2473018 : estimate_loops (void)
3673 : {
3674 2473018 : auto_bitmap tovisit;
3675 2473018 : basic_block bb;
3676 7419054 : sreal max_cyclic_prob = (sreal)1
3677 2473018 : - (sreal)1 / (param_max_predicted_iterations + 1);
3678 :
3679 : /* Start by estimating the frequencies in the loops. */
3680 4946036 : if (number_of_loops (cfun) > 1)
3681 309113 : estimate_loops_at_level (current_loops->tree_root->inner, max_cyclic_prob);
3682 :
3683 : /* Now propagate the frequencies through all the blocks. */
3684 21559126 : FOR_ALL_BB_FN (bb, cfun)
3685 : {
3686 19086108 : bitmap_set_bit (tovisit, bb->index);
3687 : }
3688 2473018 : propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit, max_cyclic_prob);
3689 2473018 : }
3690 :
3691 : /* Drop the profile for NODE to guessed, and update its frequency based on
3692 : whether it is expected to be hot given the CALL_COUNT. */
3693 :
3694 : static void
3695 0 : drop_profile (struct cgraph_node *node, profile_count call_count)
3696 : {
3697 0 : struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3698 : /* In the case where this was called by another function with a
3699 : dropped profile, call_count will be 0. Since there are no
3700 : non-zero call counts to this function, we don't know for sure
3701 : whether it is hot, and therefore it will be marked normal below. */
3702 0 : bool hot = maybe_hot_count_p (NULL, call_count);
3703 :
3704 0 : if (dump_file)
3705 0 : fprintf (dump_file,
3706 : "Dropping 0 profile for %s. %s based on calls.\n",
3707 : node->dump_name (),
3708 : hot ? "Function is hot" : "Function is normal");
3709 : /* We only expect to miss profiles for functions that are reached
3710 : via non-zero call edges in cases where the function may have
3711 : been linked from another module or library (COMDATs and extern
3712 : templates). See the comments below for handle_missing_profiles.
3713 : Also, only warn in cases where the missing counts exceed the
3714 : number of training runs. In certain cases with an execv followed
3715 : by a no-return call the profile for the no-return call is not
3716 : dumped and there can be a mismatch. */
3717 0 : if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
3718 0 : && call_count > profile_info->runs)
3719 : {
3720 0 : if (flag_profile_correction)
3721 : {
3722 0 : if (dump_file)
3723 0 : fprintf (dump_file,
3724 : "Missing counts for called function %s\n",
3725 : node->dump_name ());
3726 : }
3727 : else
3728 0 : warning (0, "Missing counts for called function %s",
3729 : node->dump_name ());
3730 : }
3731 :
3732 0 : basic_block bb;
3733 0 : if (opt_for_fn (node->decl, flag_guess_branch_prob))
3734 : {
3735 0 : bool clear_zeros
3736 0 : = !ENTRY_BLOCK_PTR_FOR_FN (fn)->count.nonzero_p ();
3737 0 : FOR_ALL_BB_FN (bb, fn)
3738 0 : if (clear_zeros || !(bb->count == profile_count::zero ()))
3739 0 : bb->count = bb->count.guessed_local ();
3740 0 : fn->cfg->count_max = fn->cfg->count_max.guessed_local ();
3741 : }
3742 : else
3743 : {
3744 0 : FOR_ALL_BB_FN (bb, fn)
3745 0 : bb->count = profile_count::uninitialized ();
3746 0 : fn->cfg->count_max = profile_count::uninitialized ();
3747 : }
3748 :
3749 0 : struct cgraph_edge *e;
3750 0 : for (e = node->callees; e; e = e->next_callee)
3751 0 : e->count = gimple_bb (e->call_stmt)->count;
3752 0 : for (e = node->indirect_calls; e; e = e->next_callee)
3753 0 : e->count = gimple_bb (e->call_stmt)->count;
3754 0 : node->count = ENTRY_BLOCK_PTR_FOR_FN (fn)->count;
3755 :
3756 0 : profile_status_for_fn (fn)
3757 0 : = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
3758 0 : node->frequency
3759 0 : = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
3760 0 : }
3761 :
3762 : /* In the case of COMDAT routines, multiple object files will contain the same
3763 : function and the linker will select one for the binary. In that case
3764 : all the other copies from the profile instrument binary will be missing
3765 : profile counts. Look for cases where this happened, due to non-zero
3766 : call counts going to 0-count functions, and drop the profile to guessed
3767 : so that we can use the estimated probabilities and avoid optimizing only
3768 : for size.
3769 :
3770 : The other case where the profile may be missing is when the routine
3771 : is not going to be emitted to the object file, e.g. for "extern template"
3772 : class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
3773 : all other cases of non-zero calls to 0-count functions. */
3774 :
3775 : void
3776 601 : handle_missing_profiles (void)
3777 : {
3778 601 : const int unlikely_frac = param_unlikely_bb_count_fraction;
3779 601 : struct cgraph_node *node;
3780 601 : auto_vec<struct cgraph_node *, 64> worklist;
3781 :
3782 : /* See if 0 count function has non-0 count callers. In this case we
3783 : lost some profile. Drop its function profile to PROFILE_GUESSED. */
3784 3386 : FOR_EACH_DEFINED_FUNCTION (node)
3785 : {
3786 2785 : struct cgraph_edge *e;
3787 2785 : profile_count call_count = profile_count::zero ();
3788 2785 : gcov_type max_tp_first_run = 0;
3789 2785 : struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
3790 :
3791 2785 : if (node->count.ipa ().nonzero_p ())
3792 346 : continue;
3793 4661 : for (e = node->callers; e; e = e->next_caller)
3794 2222 : if (e->count.ipa ().initialized_p () && e->count.ipa () > 0)
3795 : {
3796 8 : call_count = call_count + e->count.ipa ();
3797 :
3798 8 : if (e->caller->tp_first_run > max_tp_first_run)
3799 2222 : max_tp_first_run = e->caller->tp_first_run;
3800 : }
3801 :
3802 : /* If time profile is missing, let assign the maximum that comes from
3803 : caller functions. */
3804 2439 : if (!node->tp_first_run && max_tp_first_run)
3805 4 : node->tp_first_run = max_tp_first_run + 1;
3806 :
3807 2439 : if (call_count > 0
3808 6 : && fn && fn->cfg
3809 2445 : && call_count * unlikely_frac >= profile_info->runs)
3810 : {
3811 0 : drop_profile (node, call_count);
3812 0 : worklist.safe_push (node);
3813 : }
3814 : }
3815 :
3816 : /* Propagate the profile dropping to other 0-count COMDATs that are
3817 : potentially called by COMDATs we already dropped the profile on. */
3818 601 : while (worklist.length () > 0)
3819 : {
3820 0 : struct cgraph_edge *e;
3821 :
3822 0 : node = worklist.pop ();
3823 0 : for (e = node->callees; e; e = e->next_caller)
3824 : {
3825 0 : struct cgraph_node *callee = e->callee;
3826 0 : struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
3827 :
3828 0 : if (!(e->count.ipa () == profile_count::zero ())
3829 0 : && callee->count.ipa ().nonzero_p ())
3830 0 : continue;
3831 0 : if ((DECL_COMDAT (callee->decl) || DECL_EXTERNAL (callee->decl))
3832 0 : && fn && fn->cfg
3833 0 : && profile_status_for_fn (fn) == PROFILE_READ)
3834 : {
3835 0 : drop_profile (node, profile_count::zero ());
3836 0 : worklist.safe_push (callee);
3837 : }
3838 : }
3839 : }
3840 601 : }
3841 :
3842 : /* Convert counts measured by profile driven feedback to frequencies.
3843 : Return nonzero iff there was any nonzero execution count. */
3844 :
3845 : bool
3846 1717007 : update_max_bb_count (void)
3847 : {
3848 1717007 : profile_count true_count_max = profile_count::uninitialized ();
3849 1717007 : basic_block bb;
3850 :
3851 36085050 : FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3852 34368043 : true_count_max = profile_count::max_prefer_initialized (true_count_max, bb->count);
3853 :
3854 1717007 : cfun->cfg->count_max = true_count_max;
3855 :
3856 1717007 : return true_count_max.ipa ().nonzero_p ();
3857 : }
3858 :
3859 : /* Return true if function is likely to be expensive, so there is no point to
3860 : optimize performance of prologue, epilogue or do inlining at the expense
3861 : of code size growth. THRESHOLD is the limit of number of instructions
3862 : function can execute at average to be still considered not expensive. */
3863 :
3864 : bool
3865 285493 : expensive_function_p (int threshold)
3866 : {
3867 285493 : basic_block bb;
3868 :
3869 : /* If profile was scaled in a way entry block has count 0, then the function
3870 : is deifnitly taking a lot of time. */
3871 449612 : if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.nonzero_p ())
3872 : return true;
3873 :
3874 245359 : profile_count limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count * threshold;
3875 245359 : profile_count sum = profile_count::zero ();
3876 3105256 : FOR_EACH_BB_FN (bb, cfun)
3877 : {
3878 3024016 : rtx_insn *insn;
3879 :
3880 3024016 : if (!bb->count.initialized_p ())
3881 : {
3882 2 : if (dump_file)
3883 0 : fprintf (dump_file, "Function is considered expensive because"
3884 : " count of bb %i is not initialized\n", bb->index);
3885 2 : return true;
3886 : }
3887 :
3888 46016075 : FOR_BB_INSNS (bb, insn)
3889 43156178 : if (active_insn_p (insn))
3890 : {
3891 17593069 : sum += bb->count;
3892 17593069 : if (sum > limit)
3893 : return true;
3894 : }
3895 : }
3896 :
3897 : return false;
3898 : }
3899 :
3900 : /* All basic blocks that are reachable only from unlikely basic blocks are
3901 : unlikely. */
3902 :
3903 : void
3904 7222205 : propagate_unlikely_bbs_forward (void)
3905 : {
3906 7222205 : auto_vec<basic_block, 64> worklist;
3907 7222205 : basic_block bb;
3908 7222205 : edge_iterator ei;
3909 7222205 : edge e;
3910 :
3911 7238955 : if (!(ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ()))
3912 : {
3913 7205455 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(size_t) 1;
3914 7205455 : worklist.safe_push (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3915 :
3916 78186767 : while (worklist.length () > 0)
3917 : {
3918 63775857 : bb = worklist.pop ();
3919 142375964 : FOR_EACH_EDGE (e, ei, bb->succs)
3920 80316844 : if (!(e->count () == profile_count::zero ())
3921 94864879 : && !(e->dest->count == profile_count::zero ())
3922 72167873 : && !e->dest->aux)
3923 : {
3924 56570402 : e->dest->aux = (void *)(size_t) 1;
3925 56570402 : worklist.safe_push (e->dest);
3926 : }
3927 : }
3928 : }
3929 :
3930 74884806 : FOR_ALL_BB_FN (bb, cfun)
3931 : {
3932 67662601 : if (!bb->aux)
3933 : {
3934 7772812 : if (!(bb->count == profile_count::zero ())
3935 391133 : && (dump_file && (dump_flags & TDF_DETAILS)))
3936 676 : fprintf (dump_file,
3937 : "Basic block %i is marked unlikely by forward prop\n",
3938 : bb->index);
3939 3886744 : bb->count = profile_count::zero ();
3940 : }
3941 : else
3942 63775857 : bb->aux = NULL;
3943 : }
3944 7222205 : }
3945 :
3946 : /* Determine basic blocks/edges that are known to be unlikely executed and set
3947 : their counters to zero.
3948 : This is done with first identifying obviously unlikely BBs/edges and then
3949 : propagating in both directions. */
3950 :
3951 : static void
3952 5913680 : determine_unlikely_bbs ()
3953 : {
3954 5913680 : basic_block bb;
3955 5913680 : auto_vec<basic_block, 64> worklist;
3956 5913680 : edge_iterator ei;
3957 5913680 : edge e;
3958 :
3959 40308468 : FOR_EACH_BB_FN (bb, cfun)
3960 : {
3961 68392313 : if (!(bb->count == profile_count::zero ())
3962 32504725 : && unlikely_executed_bb_p (bb))
3963 : {
3964 397263 : if (dump_file && (dump_flags & TDF_DETAILS))
3965 0 : fprintf (dump_file, "Basic block %i is locally unlikely\n",
3966 : bb->index);
3967 397263 : bb->count = profile_count::zero ();
3968 : }
3969 :
3970 82701875 : FOR_EACH_EDGE (e, ei, bb->succs)
3971 90393976 : if (!(e->probability == profile_probability::never ())
3972 48307087 : && unlikely_executed_edge_p (e))
3973 : {
3974 2030835 : if (dump_file && (dump_flags & TDF_DETAILS))
3975 26 : fprintf (dump_file, "Edge %i->%i is locally unlikely\n",
3976 26 : bb->index, e->dest->index);
3977 2030835 : e->probability = profile_probability::never ();
3978 : }
3979 :
3980 34394788 : gcc_checking_assert (!bb->aux);
3981 : }
3982 5913680 : propagate_unlikely_bbs_forward ();
3983 :
3984 5913680 : auto_vec<int, 64> nsuccs;
3985 5913680 : nsuccs.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
3986 52135828 : FOR_ALL_BB_FN (bb, cfun)
3987 54697672 : if (!(bb->count == profile_count::zero ())
3988 43558026 : && bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
3989 : {
3990 37746624 : nsuccs[bb->index] = 0;
3991 89697448 : FOR_EACH_EDGE (e, ei, bb->succs)
3992 51950824 : if (!(e->probability == profile_probability::never ())
3993 100328052 : && !(e->dest->count == profile_count::zero ()))
3994 47308512 : nsuccs[bb->index]++;
3995 37746624 : if (!nsuccs[bb->index])
3996 1502557 : worklist.safe_push (bb);
3997 : }
3998 7439180 : while (worklist.length () > 0)
3999 : {
4000 1525500 : bb = worklist.pop ();
4001 1525500 : if (bb->count == profile_count::zero ())
4002 0 : continue;
4003 1525500 : if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4004 : {
4005 1516840 : bool found = false;
4006 3033680 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
4007 2595168 : !gsi_end_p (gsi); gsi_next (&gsi))
4008 2569532 : if (stmt_can_terminate_bb_p (gsi_stmt (gsi))
4009 : /* stmt_can_terminate_bb_p special cases noreturns because it
4010 : assumes that fake edges are created. We want to know that
4011 : noreturn alone does not imply BB to be unlikely. */
4012 2569532 : || (is_gimple_call (gsi_stmt (gsi))
4013 618568 : && (gimple_call_flags (gsi_stmt (gsi)) & ECF_NORETURN)))
4014 : {
4015 : found = true;
4016 : break;
4017 : }
4018 1516840 : if (found)
4019 1491204 : continue;
4020 : }
4021 34296 : if (dump_file && (dump_flags & TDF_DETAILS))
4022 0 : fprintf (dump_file,
4023 : "Basic block %i is marked unlikely by backward prop\n",
4024 : bb->index);
4025 34296 : bb->count = profile_count::zero ();
4026 70970 : FOR_EACH_EDGE (e, ei, bb->preds)
4027 36674 : if (!(e->probability == profile_probability::never ()))
4028 : {
4029 35874 : if (!(e->src->count == profile_count::zero ()))
4030 : {
4031 35872 : gcc_checking_assert (nsuccs[e->src->index] > 0);
4032 35872 : nsuccs[e->src->index]--;
4033 35872 : if (!nsuccs[e->src->index])
4034 22943 : worklist.safe_push (e->src);
4035 : }
4036 : }
4037 : }
4038 : /* Finally all edges from non-0 regions to 0 are unlikely. */
4039 52135828 : FOR_ALL_BB_FN (bb, cfun)
4040 : {
4041 48920566 : if (!(bb->count == profile_count::zero ()))
4042 95426089 : FOR_EACH_EDGE (e, ei, bb->succs)
4043 51902359 : if (!(e->probability == profile_probability::never ())
4044 100208654 : && e->dest->count == profile_count::zero ())
4045 : {
4046 522972 : if (dump_file && (dump_flags & TDF_DETAILS))
4047 0 : fprintf (dump_file, "Edge %i->%i is unlikely because "
4048 : "it enters unlikely block\n",
4049 : bb->index, e->dest->index);
4050 522972 : e->probability = profile_probability::never ();
4051 : }
4052 :
4053 46222148 : edge other = NULL;
4054 :
4055 89349914 : FOR_EACH_EDGE (e, ei, bb->succs)
4056 53928252 : if (e->probability == profile_probability::never ())
4057 : ;
4058 47194410 : else if (other)
4059 : {
4060 : other = NULL;
4061 : break;
4062 : }
4063 : else
4064 : other = e;
4065 46222148 : if (other
4066 46222148 : && !(other->probability == profile_probability::always ()))
4067 : {
4068 8340819 : if (dump_file && (dump_flags & TDF_DETAILS))
4069 6 : fprintf (dump_file, "Edge %i->%i is locally likely\n",
4070 6 : bb->index, other->dest->index);
4071 8340819 : other->probability = profile_probability::always ();
4072 : }
4073 : }
4074 5913701 : if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count == profile_count::zero ())
4075 21809 : cgraph_node::get (current_function_decl)->count = profile_count::zero ();
4076 5913680 : }
4077 :
4078 : /* Estimate and propagate basic block frequencies using the given branch
4079 : probabilities. */
4080 :
4081 : static void
4082 2473018 : estimate_bb_frequencies ()
4083 : {
4084 2473018 : basic_block bb;
4085 2473018 : sreal freq_max;
4086 :
4087 2473018 : determine_unlikely_bbs ();
4088 :
4089 2473018 : mark_dfs_back_edges ();
4090 :
4091 2473018 : single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
4092 : profile_probability::always ();
4093 :
4094 : /* Set up block info for each basic block. */
4095 2473018 : alloc_aux_for_blocks (sizeof (block_info));
4096 2473018 : alloc_aux_for_edges (sizeof (edge_prob_info));
4097 21559126 : FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
4098 : {
4099 19086108 : edge e;
4100 19086108 : edge_iterator ei;
4101 :
4102 41722036 : FOR_EACH_EDGE (e, ei, bb->succs)
4103 : {
4104 : /* FIXME: Graphite is producing edges with no profile. Once
4105 : this is fixed, drop this. */
4106 22635928 : if (e->probability.initialized_p ())
4107 22635896 : EDGE_INFO (e)->back_edge_prob
4108 22635896 : = e->probability.to_sreal ();
4109 : else
4110 : /* back_edge_prob = 0.5 */
4111 32 : EDGE_INFO (e)->back_edge_prob = sreal (1, -1);
4112 : }
4113 : }
4114 :
4115 : /* First compute frequencies locally for each loop from innermost
4116 : to outermost to examine frequencies for back edges. */
4117 2473018 : estimate_loops ();
4118 :
4119 2473018 : freq_max = 0;
4120 16613090 : FOR_EACH_BB_FN (bb, cfun)
4121 14140072 : if (freq_max < BLOCK_INFO (bb)->frequency)
4122 3131989 : freq_max = BLOCK_INFO (bb)->frequency;
4123 :
4124 : /* Scaling frequencies up to maximal profile count may result in
4125 : frequent overflows especially when inlining loops.
4126 : Small scaling results in unnecesary precision loss. Stay in
4127 : the half of the (exponential) range. */
4128 2473018 : freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max;
4129 2473018 : if (freq_max < 16)
4130 74 : freq_max = 16;
4131 2473018 : profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
4132 2473018 : cfun->cfg->count_max = profile_count::uninitialized ();
4133 21559126 : FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
4134 : {
4135 19086108 : sreal tmp = BLOCK_INFO (bb)->frequency;
4136 19086108 : if (tmp >= 1)
4137 : {
4138 11170561 : gimple_stmt_iterator gsi;
4139 11170561 : tree decl;
4140 :
4141 : /* Self recursive calls can not have frequency greater than 1
4142 : or program will never terminate. This will result in an
4143 : inconsistent bb profile but it is better than greatly confusing
4144 : IPA cost metrics. */
4145 71933101 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4146 49597470 : if (is_gimple_call (gsi_stmt (gsi))
4147 2945072 : && (decl = gimple_call_fndecl (gsi_stmt (gsi))) != NULL
4148 52241394 : && recursive_call_p (current_function_decl, decl))
4149 : {
4150 5491 : if (dump_file)
4151 0 : fprintf (dump_file, "Dropping frequency of recursive call"
4152 : " in bb %i from %f\n", bb->index,
4153 : tmp.to_double ());
4154 5491 : tmp = (sreal)9 / (sreal)10;
4155 5491 : break;
4156 : }
4157 : }
4158 19086108 : tmp = tmp * freq_max;
4159 19086108 : profile_count count = profile_count::from_gcov_type (tmp.to_nearest_int ());
4160 :
4161 : /* If we have profile feedback in which this function was never
4162 : executed, then preserve this info. */
4163 20330396 : if (!(bb->count == profile_count::zero ()))
4164 17841820 : bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count);
4165 19086108 : cfun->cfg->count_max
4166 19086108 : = profile_count::max_prefer_initialized (cfun->cfg->count_max,
4167 : bb->count);
4168 : }
4169 :
4170 2473018 : free_aux_for_blocks ();
4171 2473018 : free_aux_for_edges ();
4172 2473018 : compute_function_frequency ();
4173 2473018 : }
4174 :
4175 : /* Decide whether function is hot, cold or unlikely executed. */
4176 : void
4177 2503355 : compute_function_frequency (void)
4178 : {
4179 2503355 : basic_block bb;
4180 2503355 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
4181 :
4182 2503355 : if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
4183 2503355 : || MAIN_NAME_P (DECL_NAME (current_function_decl)))
4184 92132 : node->only_called_at_startup = true;
4185 2503355 : if (DECL_STATIC_DESTRUCTOR (current_function_decl))
4186 1212 : node->only_called_at_exit = true;
4187 :
4188 2503355 : if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa_p ())
4189 : {
4190 2494256 : int flags = flags_from_decl_or_type (current_function_decl);
4191 2494256 : if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
4192 : != NULL)
4193 576 : node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
4194 2493680 : else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
4195 : != NULL)
4196 60 : node->frequency = NODE_FREQUENCY_HOT;
4197 2493620 : else if (flags & ECF_NORETURN)
4198 1973 : node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
4199 2491647 : else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
4200 78705 : node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
4201 2412942 : else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
4202 2412942 : || DECL_STATIC_DESTRUCTOR (current_function_decl))
4203 13849 : node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
4204 2494256 : return;
4205 : }
4206 :
4207 9099 : node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
4208 9099 : if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
4209 : == NULL)
4210 9091 : warn_function_cold (current_function_decl);
4211 9099 : if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa() == profile_count::zero ())
4212 8751 : return;
4213 931 : FOR_EACH_BB_FN (bb, cfun)
4214 : {
4215 819 : if (maybe_hot_bb_p (cfun, bb))
4216 : {
4217 236 : node->frequency = NODE_FREQUENCY_HOT;
4218 236 : return;
4219 : }
4220 583 : if (!probably_never_executed_bb_p (cfun, bb))
4221 529 : node->frequency = NODE_FREQUENCY_NORMAL;
4222 : }
4223 : }
4224 :
4225 : /* Build PREDICT_EXPR. */
4226 : tree
4227 1856586 : build_predict_expr (enum br_predictor predictor, enum prediction taken)
4228 : {
4229 1856586 : tree t = build1 (PREDICT_EXPR, void_type_node,
4230 1856586 : build_int_cst (integer_type_node, predictor));
4231 1856586 : SET_PREDICT_EXPR_OUTCOME (t, taken);
4232 1856586 : return t;
4233 : }
4234 :
4235 : const char *
4236 1255 : predictor_name (enum br_predictor predictor)
4237 : {
4238 1255 : return predictor_info[predictor].name;
4239 : }
4240 :
4241 : /* Predict branch probabilities and estimate profile of the tree CFG. */
4242 :
4243 : namespace {
4244 :
4245 : const pass_data pass_data_profile =
4246 : {
4247 : GIMPLE_PASS, /* type */
4248 : "profile_estimate", /* name */
4249 : OPTGROUP_NONE, /* optinfo_flags */
4250 : TV_BRANCH_PROB, /* tv_id */
4251 : PROP_cfg, /* properties_required */
4252 : 0, /* properties_provided */
4253 : 0, /* properties_destroyed */
4254 : 0, /* todo_flags_start */
4255 : 0, /* todo_flags_finish */
4256 : };
4257 :
4258 : class pass_profile : public gimple_opt_pass
4259 : {
4260 : public:
4261 285722 : pass_profile (gcc::context *ctxt)
4262 571444 : : gimple_opt_pass (pass_data_profile, ctxt)
4263 : {}
4264 :
4265 : /* opt_pass methods: */
4266 2412428 : bool gate (function *) final override { return flag_guess_branch_prob; }
4267 : unsigned int execute (function *) final override;
4268 :
4269 : }; // class pass_profile
4270 :
4271 : unsigned int
4272 2411398 : pass_profile::execute (function *fun)
4273 : {
4274 2411398 : unsigned nb_loops;
4275 :
4276 2411398 : if (profile_status_for_fn (cfun) == PROFILE_GUESSED)
4277 : return 0;
4278 :
4279 2411360 : loop_optimizer_init (LOOPS_NORMAL);
4280 2411360 : if (dump_file && (dump_flags & TDF_DETAILS))
4281 2 : flow_loops_dump (dump_file, NULL, 0);
4282 :
4283 2411360 : nb_loops = number_of_loops (fun);
4284 2411360 : if (nb_loops > 1)
4285 273140 : scev_initialize ();
4286 :
4287 2411360 : tree_estimate_probability (false);
4288 2411360 : cfun->cfg->full_profile = true;
4289 :
4290 2411360 : if (nb_loops > 1)
4291 273140 : scev_finalize ();
4292 :
4293 2411360 : loop_optimizer_finalize ();
4294 2411360 : if (dump_file && (dump_flags & TDF_DETAILS))
4295 2 : gimple_dump_cfg (dump_file, dump_flags);
4296 2411360 : if (profile_status_for_fn (fun) == PROFILE_ABSENT)
4297 2411360 : profile_status_for_fn (fun) = PROFILE_GUESSED;
4298 2411360 : if (dump_file && (dump_flags & TDF_DETAILS))
4299 : {
4300 2 : sreal iterations;
4301 7 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
4302 1 : if (expected_loop_iterations_by_profile (loop, &iterations))
4303 1 : fprintf (dump_file, "Loop %d got predicted to iterate %f times.\n",
4304 2 : loop->num, iterations.to_double ());
4305 : }
4306 : return 0;
4307 : }
4308 :
4309 : } // anon namespace
4310 :
4311 : gimple_opt_pass *
4312 285722 : make_pass_profile (gcc::context *ctxt)
4313 : {
4314 285722 : return new pass_profile (ctxt);
4315 : }
4316 :
4317 : /* Return true when PRED predictor should be removed after early
4318 : tree passes. Most of the predictors are beneficial to survive
4319 : as early inlining can also distribute then into caller's bodies. */
4320 :
4321 : static bool
4322 369748 : strip_predictor_early (enum br_predictor pred)
4323 : {
4324 0 : switch (pred)
4325 : {
4326 : case PRED_TREE_EARLY_RETURN:
4327 : return true;
4328 0 : default:
4329 0 : return false;
4330 : }
4331 : }
4332 :
4333 : /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
4334 : we no longer need. EARLY is set to true when called from early
4335 : optimizations. */
4336 :
4337 : unsigned int
4338 3456407 : strip_predict_hints (function *fun, bool early)
4339 : {
4340 3456407 : basic_block bb;
4341 3456407 : gimple *ass_stmt;
4342 3456407 : tree var;
4343 3456407 : bool changed = false;
4344 :
4345 26539979 : FOR_EACH_BB_FN (bb, fun)
4346 : {
4347 23083572 : gimple_stmt_iterator bi;
4348 201001750 : for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
4349 : {
4350 154834606 : gimple *stmt = gsi_stmt (bi);
4351 :
4352 154834606 : if (gimple_code (stmt) == GIMPLE_PREDICT)
4353 : {
4354 1098402 : if (!early
4355 581714 : || strip_predictor_early (gimple_predict_predictor (stmt)))
4356 : {
4357 516688 : gsi_remove (&bi, true);
4358 516688 : changed = true;
4359 516688 : continue;
4360 : }
4361 : }
4362 154252892 : else if (is_gimple_call (stmt))
4363 : {
4364 12136428 : tree fndecl = gimple_call_fndecl (stmt);
4365 :
4366 12136428 : if (!early
4367 12136428 : && ((fndecl != NULL_TREE
4368 5825263 : && fndecl_built_in_p (fndecl, BUILT_IN_EXPECT)
4369 142760 : && gimple_call_num_args (stmt) == 2)
4370 : || (fndecl != NULL_TREE
4371 5682503 : && fndecl_built_in_p (fndecl,
4372 : BUILT_IN_EXPECT_WITH_PROBABILITY)
4373 18 : && gimple_call_num_args (stmt) == 3)
4374 6034432 : || (gimple_call_internal_p (stmt)
4375 168292 : && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT)))
4376 : {
4377 176921 : var = gimple_call_lhs (stmt);
4378 176921 : changed = true;
4379 176921 : if (var)
4380 : {
4381 176920 : ass_stmt
4382 176920 : = gimple_build_assign (var, gimple_call_arg (stmt, 0));
4383 176920 : gsi_replace (&bi, ass_stmt, true);
4384 : }
4385 : else
4386 : {
4387 1 : gsi_remove (&bi, true);
4388 1 : continue;
4389 : }
4390 : }
4391 : }
4392 154317917 : gsi_next (&bi);
4393 : }
4394 : }
4395 3456407 : return changed ? TODO_cleanup_cfg : 0;
4396 : }
4397 :
4398 : namespace {
4399 :
4400 : const pass_data pass_data_strip_predict_hints =
4401 : {
4402 : GIMPLE_PASS, /* type */
4403 : "*strip_predict_hints", /* name */
4404 : OPTGROUP_NONE, /* optinfo_flags */
4405 : TV_BRANCH_PROB, /* tv_id */
4406 : PROP_cfg, /* properties_required */
4407 : 0, /* properties_provided */
4408 : 0, /* properties_destroyed */
4409 : 0, /* todo_flags_start */
4410 : 0, /* todo_flags_finish */
4411 : };
4412 :
4413 : class pass_strip_predict_hints : public gimple_opt_pass
4414 : {
4415 : public:
4416 857166 : pass_strip_predict_hints (gcc::context *ctxt)
4417 1714332 : : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
4418 : {}
4419 :
4420 : /* opt_pass methods: */
4421 571444 : opt_pass * clone () final override
4422 : {
4423 571444 : return new pass_strip_predict_hints (m_ctxt);
4424 : }
4425 857166 : void set_pass_param (unsigned int n, bool param) final override
4426 : {
4427 857166 : gcc_assert (n == 0);
4428 857166 : early_p = param;
4429 857166 : }
4430 :
4431 : unsigned int execute (function *) final override;
4432 :
4433 : private:
4434 : bool early_p;
4435 :
4436 : }; // class pass_strip_predict_hints
4437 :
4438 : unsigned int
4439 3456407 : pass_strip_predict_hints::execute (function *fun)
4440 : {
4441 3456407 : return strip_predict_hints (fun, early_p);
4442 : }
4443 :
4444 : } // anon namespace
4445 :
4446 : gimple_opt_pass *
4447 285722 : make_pass_strip_predict_hints (gcc::context *ctxt)
4448 : {
4449 285722 : return new pass_strip_predict_hints (ctxt);
4450 : }
4451 :
4452 : /* Rebuild function frequencies. Passes are in general expected to
4453 : maintain profile by hand, however in some cases this is not possible:
4454 : for example when inlining several functions with loops freuqencies might run
4455 : out of scale and thus needs to be recomputed. */
4456 :
4457 : void
4458 1090904 : rebuild_frequencies (void)
4459 : {
4460 : /* If we have no profile, do nothing. Note that after inlining
4461 : profile_status_for_fn may not represent the actual presence/absence of
4462 : profile. */
4463 1090904 : if (profile_status_for_fn (cfun) == PROFILE_ABSENT
4464 1090904 : && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
4465 : return;
4466 :
4467 :
4468 : /* See if everything is OK and update count_max. */
4469 1090656 : basic_block bb;
4470 1090656 : bool inconsistency_found = false;
4471 1090656 : bool uninitialized_probablity_found = false;
4472 1090656 : bool uninitialized_count_found = false;
4473 1090656 : bool feedback_found = false;
4474 :
4475 1090656 : cfun->cfg->count_max = profile_count::uninitialized ();
4476 15133876 : FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
4477 : {
4478 14043220 : cfun->cfg->count_max
4479 14043220 : = profile_count::max_prefer_initialized (cfun->cfg->count_max,
4480 : bb->count);
4481 26583353 : if (bb->count.nonzero_p () && bb->count.quality () >= AFDO)
4482 : feedback_found = true;
4483 : /* Uninitialized count may be result of inlining or an omision in an
4484 : optimization pass. */
4485 14043220 : if (!bb->count.initialized_p ())
4486 : {
4487 18 : uninitialized_count_found = true;
4488 18 : if (dump_file)
4489 0 : fprintf (dump_file, "BB %i has uninitialized count\n",
4490 : bb->index);
4491 : }
4492 14043220 : if (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)
4493 : && (!uninitialized_probablity_found || !inconsistency_found))
4494 : {
4495 12952564 : profile_count sum = profile_count::zero ();
4496 12952564 : edge e;
4497 12952564 : edge_iterator ei;
4498 :
4499 30449401 : FOR_EACH_EDGE (e, ei, bb->preds)
4500 : {
4501 17496837 : sum += e->count ();
4502 : /* Uninitialized probability may be result of inlining or an
4503 : omision in an optimization pass. */
4504 17496837 : if (!e->probability.initialized_p ())
4505 : {
4506 36 : if (dump_file)
4507 0 : fprintf (dump_file,
4508 : "Edge %i->%i has uninitialized probability\n",
4509 0 : e->src->index, e->dest->index);
4510 : }
4511 : }
4512 12952564 : if (sum.differs_from_p (bb->count))
4513 : {
4514 195620 : if (dump_file)
4515 18 : fprintf (dump_file,
4516 : "BB %i has invalid sum of incomming counts\n",
4517 : bb->index);
4518 : inconsistency_found = true;
4519 : }
4520 : }
4521 : }
4522 :
4523 : /* If everything is OK, do not re-propagate frequencies. */
4524 1090656 : if (!inconsistency_found
4525 1029186 : && (!uninitialized_count_found || uninitialized_probablity_found)
4526 2119842 : && !cfun->cfg->count_max.very_large_p ())
4527 : {
4528 : /* Propagating zero counts should be safe and may
4529 : help hot/cold splitting. */
4530 1029141 : determine_unlikely_bbs ();
4531 1029141 : if (dump_file)
4532 31 : fprintf (dump_file, "Profile is consistent\n");
4533 1029141 : return;
4534 : }
4535 : /* Do not re-propagate if we have profile feedback. Even if the profile is
4536 : inconsistent from previous transofrmations, it is probably more realistic
4537 : for hot part of the program than result of repropagating.
4538 :
4539 : Consider example where we previously has
4540 :
4541 : if (test)
4542 : then [large probability for true]
4543 :
4544 : and we later proved that test is always 0. In this case, if profile was
4545 : read correctly, we must have duplicated the conditional (for example by
4546 : inlining) in to a context where test is false. From profile feedback
4547 : we know that most executions if the conditionals were true, so the
4548 : important copy is not the one we look on.
4549 :
4550 : Propagating from probabilities would make profile look consistent, but
4551 : because probablities after code duplication may not be representative
4552 : for a given run, we would only propagate the error further. */
4553 61515 : if (feedback_found && !uninitialized_count_found)
4554 : {
4555 : /* Propagating zero counts should be safe and may
4556 : help hot/cold splitting. */
4557 9 : determine_unlikely_bbs ();
4558 9 : if (dump_file)
4559 0 : fprintf (dump_file,
4560 : "Profile is inconsistent but read from profile feedback;"
4561 : " not rebuilding\n");
4562 9 : return;
4563 : }
4564 :
4565 61506 : loop_optimizer_init (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
4566 61506 : connect_infinite_loops_to_exit ();
4567 61506 : estimate_bb_frequencies ();
4568 61506 : remove_fake_exit_edges ();
4569 61506 : loop_optimizer_finalize ();
4570 61506 : if (dump_file)
4571 5 : fprintf (dump_file, "Rebuilt basic block counts\n");
4572 :
4573 : return;
4574 : }
4575 :
4576 : namespace {
4577 :
4578 : const pass_data pass_data_rebuild_frequencies =
4579 : {
4580 : GIMPLE_PASS, /* type */
4581 : "rebuild_frequencies", /* name */
4582 : OPTGROUP_NONE, /* optinfo_flags */
4583 : TV_REBUILD_FREQUENCIES, /* tv_id */
4584 : PROP_cfg, /* properties_required */
4585 : 0, /* properties_provided */
4586 : 0, /* properties_destroyed */
4587 : 0, /* todo_flags_start */
4588 : 0, /* todo_flags_finish */
4589 : };
4590 :
4591 : class pass_rebuild_frequencies : public gimple_opt_pass
4592 : {
4593 : public:
4594 571444 : pass_rebuild_frequencies (gcc::context *ctxt)
4595 1142888 : : gimple_opt_pass (pass_data_rebuild_frequencies, ctxt)
4596 : {}
4597 :
4598 : /* opt_pass methods: */
4599 285722 : opt_pass * clone () final override
4600 : {
4601 285722 : return new pass_rebuild_frequencies (m_ctxt);
4602 : }
4603 0 : void set_pass_param (unsigned int n, bool param) final override
4604 : {
4605 0 : gcc_assert (n == 0);
4606 0 : early_p = param;
4607 0 : }
4608 :
4609 1044058 : unsigned int execute (function *) final override
4610 : {
4611 1044058 : rebuild_frequencies ();
4612 1044058 : return 0;
4613 : }
4614 :
4615 : private:
4616 : bool early_p;
4617 :
4618 : }; // class pass_rebuild_frequencies
4619 :
4620 : } // anon namespace
4621 :
4622 : gimple_opt_pass *
4623 285722 : make_pass_rebuild_frequencies (gcc::context *ctxt)
4624 : {
4625 285722 : return new pass_rebuild_frequencies (ctxt);
4626 : }
4627 :
4628 : /* Perform a dry run of the branch prediction pass and report comparsion of
4629 : the predicted and real profile into the dump file. */
4630 :
4631 : void
4632 12 : report_predictor_hitrates (void)
4633 : {
4634 12 : unsigned nb_loops;
4635 :
4636 12 : loop_optimizer_init (LOOPS_NORMAL);
4637 12 : if (dump_file && (dump_flags & TDF_DETAILS))
4638 12 : flow_loops_dump (dump_file, NULL, 0);
4639 :
4640 12 : nb_loops = number_of_loops (cfun);
4641 12 : if (nb_loops > 1)
4642 5 : scev_initialize ();
4643 :
4644 12 : tree_estimate_probability (true);
4645 :
4646 12 : if (nb_loops > 1)
4647 5 : scev_finalize ();
4648 :
4649 12 : loop_optimizer_finalize ();
4650 12 : }
4651 :
4652 : /* Force edge E to be cold.
4653 : If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
4654 : keep low probability to represent possible error in a guess. This is used
4655 : i.e. in case we predict loop to likely iterate given number of times but
4656 : we are not 100% sure.
4657 :
4658 : This function locally updates profile without attempt to keep global
4659 : consistency which cannot be reached in full generality without full profile
4660 : rebuild from probabilities alone. Doing so is not necessarily a good idea
4661 : because frequencies and counts may be more realistic then probabilities.
4662 :
4663 : In some cases (such as for elimination of early exits during full loop
4664 : unrolling) the caller can ensure that profile will get consistent
4665 : afterwards. */
4666 :
4667 : void
4668 571272 : force_edge_cold (edge e, bool impossible)
4669 : {
4670 571272 : profile_count count_sum = profile_count::zero ();
4671 571272 : profile_probability prob_sum = profile_probability::never ();
4672 571272 : edge_iterator ei;
4673 571272 : edge e2;
4674 571272 : bool uninitialized_exit = false;
4675 :
4676 : /* When branch probability guesses are not known, then do nothing. */
4677 571506 : if (!impossible && !e->count ().initialized_p ())
4678 2030 : return;
4679 :
4680 571272 : profile_probability goal = (impossible ? profile_probability::never ()
4681 234 : : profile_probability::very_unlikely ());
4682 :
4683 : /* If edge is already improbably or cold, just return. */
4684 571272 : if (e->probability <= goal
4685 55140 : && (!impossible || e->count () == profile_count::zero ()))
4686 1170 : return;
4687 1709801 : FOR_EACH_EDGE (e2, ei, e->src->succs)
4688 1139699 : if (e2 != e)
4689 : {
4690 569597 : if (e->flags & EDGE_FAKE)
4691 0 : continue;
4692 569597 : if (e2->count ().initialized_p ())
4693 569396 : count_sum += e2->count ();
4694 569597 : if (e2->probability.initialized_p ())
4695 569396 : prob_sum += e2->probability;
4696 : else
4697 : uninitialized_exit = true;
4698 : }
4699 :
4700 : /* If we are not guessing profiles but have some other edges out,
4701 : just assume the control flow goes elsewhere. */
4702 570102 : if (uninitialized_exit)
4703 201 : e->probability = goal;
4704 : /* If there are other edges out of e->src, redistribute probabilitity
4705 : there. */
4706 569901 : else if (prob_sum > profile_probability::never ())
4707 : {
4708 568749 : if (dump_file && (dump_flags & TDF_DETAILS))
4709 : {
4710 998 : fprintf (dump_file, "Making edge %i->%i %s by redistributing "
4711 : "probability to other edges. Original probability: ",
4712 998 : e->src->index, e->dest->index,
4713 : impossible ? "impossible" : "cold");
4714 998 : e->probability.dump (dump_file);
4715 998 : fprintf (dump_file, "\n");
4716 : }
4717 568749 : set_edge_probability_and_rescale_others (e, goal);
4718 568749 : if (current_ir_type () != IR_GIMPLE
4719 568749 : && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4720 144162 : update_br_prob_note (e->src);
4721 : }
4722 : /* If all edges out of e->src are unlikely, the basic block itself
4723 : is unlikely. */
4724 : else
4725 : {
4726 1152 : if (prob_sum == profile_probability::never ())
4727 928 : e->probability = profile_probability::always ();
4728 : else
4729 : {
4730 224 : if (impossible)
4731 224 : e->probability = profile_probability::never ();
4732 : /* If BB has some edges out that are not impossible, we cannot
4733 : assume that BB itself is. */
4734 : impossible = false;
4735 : }
4736 1152 : if (current_ir_type () != IR_GIMPLE
4737 1152 : && e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun))
4738 28 : update_br_prob_note (e->src);
4739 1152 : if (e->src->count == profile_count::zero ())
4740 168 : return;
4741 1968 : if (count_sum == profile_count::zero () && impossible)
4742 : {
4743 738 : bool found = false;
4744 738 : if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
4745 : ;
4746 708 : else if (current_ir_type () == IR_GIMPLE)
4747 1416 : for (gimple_stmt_iterator gsi = gsi_start_bb (e->src);
4748 2710 : !gsi_end_p (gsi); gsi_next (&gsi))
4749 : {
4750 2048 : if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
4751 : {
4752 : found = true;
4753 : break;
4754 : }
4755 : }
4756 : /* FIXME: Implement RTL path. */
4757 : else
4758 : found = true;
4759 708 : if (!found)
4760 : {
4761 692 : if (dump_file && (dump_flags & TDF_DETAILS))
4762 0 : fprintf (dump_file,
4763 : "Making bb %i impossible and dropping count to 0.\n",
4764 0 : e->src->index);
4765 692 : e->src->count = profile_count::zero ();
4766 1467 : FOR_EACH_EDGE (e2, ei, e->src->preds)
4767 775 : force_edge_cold (e2, impossible);
4768 : return;
4769 : }
4770 : }
4771 :
4772 : /* If we did not adjusting, the source basic block has no likely edeges
4773 : leaving other direction. In that case force that bb cold, too.
4774 : This in general is difficult task to do, but handle special case when
4775 : BB has only one predecestor. This is common case when we are updating
4776 : after loop transforms. */
4777 584 : if (!(prob_sum > profile_probability::never ())
4778 292 : && count_sum == profile_count::zero ()
4779 122 : && single_pred_p (e->src) && e->src->count.to_frequency (cfun)
4780 52 : > (impossible ? 0 : 1))
4781 : {
4782 51 : int old_frequency = e->src->count.to_frequency (cfun);
4783 51 : if (dump_file && (dump_flags & TDF_DETAILS))
4784 0 : fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
4785 : impossible ? "impossible" : "cold");
4786 51 : int new_frequency = MIN (e->src->count.to_frequency (cfun),
4787 : impossible ? 0 : 1);
4788 0 : if (impossible)
4789 28 : e->src->count = profile_count::zero ();
4790 : else
4791 23 : e->src->count = e->count ().apply_scale (new_frequency,
4792 : old_frequency);
4793 51 : force_edge_cold (single_pred_edge (e->src), impossible);
4794 : }
4795 2 : else if (dump_file && (dump_flags & TDF_DETAILS)
4796 241 : && maybe_hot_bb_p (cfun, e->src))
4797 0 : fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
4798 : impossible ? "impossible" : "cold");
4799 : }
4800 : }
4801 :
4802 : #if CHECKING_P
4803 :
4804 : namespace selftest {
4805 :
4806 : /* Test that value range of predictor values defined in predict.def is
4807 : within range (50, 100]. */
4808 :
4809 : struct branch_predictor
4810 : {
4811 : const char *name;
4812 : int probability;
4813 : };
4814 :
4815 : #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) { NAME, HITRATE },
4816 :
4817 : static void
4818 4 : test_prediction_value_range ()
4819 : {
4820 4 : branch_predictor predictors[] = {
4821 : #include "predict.def"
4822 : { NULL, PROB_UNINITIALIZED }
4823 : };
4824 :
4825 216 : for (unsigned i = 0; predictors[i].name != NULL; i++)
4826 : {
4827 212 : if (predictors[i].probability == PROB_UNINITIALIZED)
4828 28 : continue;
4829 :
4830 184 : unsigned p = 100 * predictors[i].probability / REG_BR_PROB_BASE;
4831 212 : ASSERT_TRUE (p >= 50 && p <= 100);
4832 : }
4833 4 : }
4834 :
4835 : #undef DEF_PREDICTOR
4836 :
4837 : /* Run all of the selfests within this file. */
4838 :
4839 : void
4840 4 : predict_cc_tests ()
4841 : {
4842 4 : test_prediction_value_range ();
4843 4 : }
4844 :
4845 : } // namespace selftest
4846 : #endif /* CHECKING_P. */
|