Line data Source code
1 : /* Calculate branch probabilities, and basic block execution counts.
2 : Copyright (C) 1990-2026 Free Software Foundation, Inc.
3 : Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
4 : based on some ideas from Dain Samples of UC Berkeley.
5 : Further mangling by Bob Manson, Cygnus Support.
6 : Converted to use trees by Dale Johannesen, Apple Computer.
7 :
8 : This file is part of GCC.
9 :
10 : GCC is free software; you can redistribute it and/or modify it under
11 : the terms of the GNU General Public License as published by the Free
12 : Software Foundation; either version 3, or (at your option) any later
13 : version.
14 :
15 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 : for more details.
19 :
20 : You should have received a copy of the GNU General Public License
21 : along with GCC; see the file COPYING3. If not see
22 : <http://www.gnu.org/licenses/>. */
23 :
24 : /* Generate basic block profile instrumentation and auxiliary files.
25 : Tree-based version. See profile.cc for overview. */
26 :
27 : #include "config.h"
28 : #include "system.h"
29 : #include "coretypes.h"
30 : #include "memmodel.h"
31 : #include "backend.h"
32 : #include "target.h"
33 : #include "tree.h"
34 : #include "gimple.h"
35 : #include "cfghooks.h"
36 : #include "tree-pass.h"
37 : #include "ssa.h"
38 : #include "cgraph.h"
39 : #include "coverage.h"
40 : #include "diagnostic-core.h"
41 : #include "fold-const.h"
42 : #include "varasm.h"
43 : #include "tree-nested.h"
44 : #include "gimplify.h"
45 : #include "gimple-iterator.h"
46 : #include "gimplify-me.h"
47 : #include "tree-cfg.h"
48 : #include "tree-into-ssa.h"
49 : #include "value-prof.h"
50 : #include "profile.h"
51 : #include "tree-cfgcleanup.h"
52 : #include "stringpool.h"
53 : #include "attribs.h"
54 : #include "tree-pretty-print.h"
55 : #include "langhooks.h"
56 : #include "stor-layout.h"
57 : #include "xregex.h"
58 : #include "alloc-pool.h"
59 : #include "symbol-summary.h"
60 : #include "symtab-thunks.h"
61 : #include "cfganal.h"
62 :
63 : static GTY(()) tree gcov_type_node;
64 : static GTY(()) tree tree_interval_profiler_fn;
65 : static GTY(()) tree tree_pow2_profiler_fn;
66 : static GTY(()) tree tree_topn_values_profiler_fn;
67 : static GTY(()) tree tree_indirect_call_profiler_fn;
68 : static GTY(()) tree tree_average_profiler_fn;
69 : static GTY(()) tree tree_ior_profiler_fn;
70 : static GTY(()) tree tree_time_profiler_counter;
71 :
72 :
73 : static GTY(()) tree ic_tuple_var;
74 : static GTY(()) tree ic_tuple_counters_field;
75 : static GTY(()) tree ic_tuple_callee_field;
76 :
77 : /* Types of counter update methods.
78 :
79 : By default, the counter updates are done for a single threaded system
80 : (COUNTER_UPDATE_SINGLE_THREAD).
81 :
82 : If the user selected atomic profile counter updates
83 : (-fprofile-update=atomic), then the counter updates will be done atomically
84 : on a best-effort basis. One of three methods to do the counter updates is
85 : selected according to the target capabilities.
86 :
87 : Ideally, the counter updates are done through atomic operations in hardware
88 : (COUNTER_UPDATE_ATOMIC_BUILTIN).
89 :
90 : If the target supports only 32-bit atomic increments and gcov_type_node is a
91 : 64-bit integer type, then for the profile edge counters the increment is
92 : performed through two separate 32-bit atomic increments
93 : (COUNTER_UPDATE_ATOMIC_SPLIT or COUNTER_UPDATE_ATOMIC_PARTIAL). If the
94 : target supports libatomic (targetm.have_libatomic), then other counter
95 : updates are carried out by libatomic calls (COUNTER_UPDATE_ATOMIC_SPLIT).
96 : If the target does not support libatomic, then the other counter updates are
97 : not done atomically (COUNTER_UPDATE_ATOMIC_PARTIAL) and a warning is
98 : issued.
99 :
100 : If the target does not support atomic operations in hardware, however, it
101 : supports libatomic, then all updates are carried out by libatomic calls
102 : (COUNTER_UPDATE_ATOMIC_BUILTIN). */
103 : enum counter_update_method {
104 : COUNTER_UPDATE_SINGLE_THREAD,
105 : COUNTER_UPDATE_ATOMIC_BUILTIN,
106 : COUNTER_UPDATE_ATOMIC_SPLIT,
107 : COUNTER_UPDATE_ATOMIC_PARTIAL
108 : };
109 :
110 : static counter_update_method counter_update = COUNTER_UPDATE_SINGLE_THREAD;
111 :
112 : /* These functions support measuring modified conditition/decision coverage
113 : (MC/DC). MC/DC requires all of the below during testing:
114 :
115 : - Each entry and exit point is invoked
116 : - Each decision takes every possible outcome
117 : - Each condition in a decision takes every possible outcome
118 : - Each condition in a decision is shown to independently affect the outcome
119 : of the decision
120 :
121 : Independence of a condition is shown by recording it being evaluated to a
122 : value (true/false) and not being made irrelevant ("masked") by a later term.
123 : This feature adds some instrumentation code, a few bitwise operators, that
124 : records the branches taken in conditions and applies a filter for the
125 : masking effect. Masking is essentially short-circuiting in reverse: a
126 : condition does not contribute to the outcome if it would short circuit the
127 : (sub) expression if it was evaluated right-to-left, (_ && false) and (_ ||
128 : true).
129 :
130 : The program is essentially rewritten this way:
131 :
132 : - if (a || b) { fn () }
133 : + if (a) { _t |= 0x1; goto _then; }
134 : + else { _f |= 0x1;
135 : + if (b) { _t |= 0x2; _mask |= 0x1; goto _then; }
136 : + else { _f |= 0x2; goto _else; }
137 : + _then:
138 : + _gcov_t |= (_t & _mask);
139 : + _gcov_f |= (_f & _mask);
140 : + fn (); goto _end;
141 : + _else:
142 : + _gcov_t |= (_t & _mask);
143 : + _gcov_f |= (_f & _mask);
144 : + fn ();
145 : + _end:
146 :
147 : It is assumed the front end will provide discrimnators so that conditional
148 : basic blocks (basic block with a conditional jump and outgoing true/false
149 : edges) that belong to the same Boolean expression have the same
150 : discriminator. Masking is determined by analyzing these expressions as a
151 : reduced order binary decision diagram. */
152 : namespace
153 : {
154 : /* Some context and reused instances between function calls. Large embedded
155 : buffers are used to up-front request enough memory for most programs and
156 : merge them into a single allocation at the cost of using more memory in the
157 : average case. Some numbers from linux v5.13 which is assumed to be a
158 : reasonably diverse code base: 75% of the functions in linux have less than
159 : 16 nodes in the CFG and approx 2.5% have more than 64 nodes. The functions
160 : that go beyond a few dozen nodes tend to be very large (>100) and so 64
161 : seems like a good balance.
162 :
163 : This is really just a performance balance of the cost of allocation and
164 : wasted memory. */
165 : struct conds_ctx
166 : {
167 : /* This is both a reusable shared allocation which is also used to return
168 : single expressions, which means it for most code should only hold a
169 : couple of elements. */
170 : auto_vec<basic_block, 64> blocks;
171 :
172 : /* Index for the topological order indexed by basic_block->index to an
173 : ordering so that expression (a || b && c) => top_index[a] < top_index[b]
174 : < top_index[c]. */
175 : auto_vec<int, 256> top_index;
176 :
177 : /* Pre-allocate bitmaps and vectors for per-function book keeping. This is
178 : pure instance reuse and the bitmaps carry no data between function
179 : calls. */
180 : auto_vec<basic_block, 64> B1;
181 : auto_vec<basic_block, 64> B2;
182 : auto_sbitmap G1;
183 : auto_sbitmap G2;
184 : auto_sbitmap G3;
185 :
186 156 : explicit conds_ctx (unsigned size) noexcept (true) : G1 (size), G2 (size),
187 312 : G3 (size)
188 : {
189 156 : }
190 : };
191 :
192 : /* Only instrument terms with fewer than number of bits in a (wide) gcov
193 : integer, which is probably 64. The algorithm itself does not impose this
194 : limitation, but it makes for a simpler implementation.
195 :
196 : * Allocating the output data structure (coverage_counter_alloc ()) can
197 : assume pairs of gcov_type_unsigned and not use a separate length field.
198 : * A pair gcov_type_unsigned can be used as accumulators.
199 : * Updating accumulators is can use the bitwise operations |=, &= and not
200 : custom operators that work for arbitrary-sized bit-sets.
201 :
202 : Most real-world code should be unaffected by this, but it is possible
203 : (especially for generated code) to exceed this limit. */
204 : #define CONDITIONS_MAX_TERMS (TYPE_PRECISION (gcov_type_node))
205 : #define EDGE_CONDITION (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)
206 :
207 : /* Compare two basic blocks by their order in the expression i.e. for (a || b)
208 : then topological_cmp (a, b, ...) < 0. The result is undefined if LHS, RHS
209 : belong to different expressions. The TOP_INDEX argument should be the
210 : top_index vector from ctx. */
211 : int
212 8028 : topological_cmp (const void *lhs, const void *rhs, void *top_index)
213 : {
214 8028 : const_basic_block l = *(const basic_block*) lhs;
215 8028 : const_basic_block r = *(const basic_block*) rhs;
216 8028 : const vec<int>* im = (const vec<int>*) top_index;
217 8028 : return (*im)[l->index] - (*im)[r->index];
218 : }
219 :
220 : /* Find the index of NEEDLE in BLOCKS; return -1 if not found. This has two
221 : uses, sometimes for the index and sometimes for set member checks. Sets are
222 : typically very small (number of conditions, >8 is uncommon) so linear search
223 : should be very fast. */
224 : int
225 2927564 : index_of (const basic_block needle, array_slice<basic_block> blocks)
226 : {
227 51732229 : for (size_t i = 0; i < blocks.size (); i++)
228 51732229 : if (blocks[i] == needle)
229 2927564 : return int (i);
230 : return -1;
231 : }
232 :
233 : /* Special cases of the single_*_p and single_*_edge functions in basic-block.h
234 : that don't consider exception handling or other complex edges. This helps
235 : create a view of the CFG with only normal edges - if a basic block has both
236 : an outgoing fallthrough and exceptional edge, it should be considered a
237 : single-successor. */
238 : bool
239 6513870 : single_p (const vec<edge, va_gc> *edges)
240 : {
241 6513870 : int n = EDGE_COUNT (edges);
242 6384240 : if (n == 0)
243 : return false;
244 :
245 15968959 : for (edge e : edges)
246 9584719 : if (e->flags & EDGE_COMPLEX)
247 17 : n -= 1;
248 :
249 6384240 : return n == 1;
250 : }
251 :
252 : /* Get the single, non-complex edge. Behavior is undefined edges have more
253 : than 1 non-complex edges. */
254 : edge
255 1725 : single_edge (const vec<edge, va_gc> *edges)
256 : {
257 1725 : gcc_checking_assert (single_p (edges));
258 1725 : for (edge e : edges)
259 : {
260 1725 : if (e->flags & EDGE_COMPLEX)
261 0 : continue;
262 : return e;
263 : }
264 : return NULL;
265 : }
266 :
267 : /* Sometimes, for example with function calls, goto labels, and C++
268 : destructors, the CFG gets extra nodes that are essentially single-entry
269 : single-exit in the middle of boolean expressions. For example:
270 :
271 : x || can_throw (y)
272 :
273 : A
274 : /|
275 : / |
276 : B |
277 : | |
278 : C |
279 : / \ |
280 : / \|
281 : F T
282 :
283 : Without the extra node inserted by the function + exception it becomes a
284 : proper 2-term graph, not 2 single-term graphs.
285 :
286 : A
287 : /|
288 : C |
289 : / \|
290 : F T
291 :
292 : This function finds the source edge of these paths. This is often the
293 : identity function. */
294 : edge
295 3320631 : contract_edge_up (edge e)
296 : {
297 3322611 : while (true)
298 : {
299 3321621 : basic_block src = e->src;
300 3321621 : if (!single_p (src->preds))
301 : return e;
302 3189380 : if (!single_p (src->succs))
303 : return e;
304 990 : e = single_edge (src->preds);
305 990 : }
306 : }
307 :
308 : /* A simple struct for storing/returning outcome block pairs. Either both
309 : blocks are set or both are NULL. */
310 : struct outcomes
311 : {
312 : basic_block t = NULL;
313 : basic_block f = NULL;
314 :
315 130022 : operator bool () const noexcept (true)
316 : {
317 130022 : return t && f;
318 : }
319 : };
320 :
321 : /* Get the true/false successors of a basic block. If b is not a conditional
322 : block both edges are NULL. */
323 : outcomes
324 2928304 : conditional_succs (const basic_block b)
325 : {
326 2928304 : outcomes c;
327 14641520 : for (edge e : b->succs)
328 : {
329 5856608 : if (e->flags & EDGE_TRUE_VALUE)
330 2928304 : c.t = e->dest;
331 5856608 : if (e->flags & EDGE_FALSE_VALUE)
332 2928304 : c.f = e->dest;
333 : }
334 :
335 2928304 : gcc_assert ((c.t && c.f) || (!c.t && !c.f));
336 2928304 : return c;
337 : }
338 :
339 : /* Get the index or offset of a conditional flag, 0 for true and 1 for false.
340 : These indices carry no semantics but must be consistent as they are used to
341 : index into data structures in code generation and gcov. */
342 : unsigned
343 131134 : condition_index (unsigned flag)
344 : {
345 131134 : return (flag & EDGE_CONDITION) == EDGE_TRUE_VALUE ? 0 : 1;
346 : }
347 :
348 : /* Returns the condition identifier for the basic block if set, otherwise 0.
349 : This is only meaningful in GIMPLE and is used for condition coverage.
350 :
351 : There may be conditions created that did not get an uid, such as those
352 : implicitly created by destructors. We could include them in the condition
353 : coverage for completeness (i.e. condition coverage implies (implicit) branch
354 : coverage), but they have no natural buckets and should all be single-term.
355 : For now these are ignored and given uid = 0, and branch coverage is left to
356 : -fprofile-arcs.
357 :
358 : Under optimization, COND_EXPRs may be folded, replaced with switches,
359 : min-max, etc., which leaves ghost identifiers in basic blocks that do not
360 : end with a conditional jump. They are not really meaningful for condition
361 : coverage anymore, but since coverage is unreliable under optimization anyway
362 : this is not a big problem.
363 :
364 : The cond_uids map in FN cannot be expected to exist. It will only be
365 : created if it is needed, and a function may have gconds even though there
366 : are none in source. This can be seen in PR gcov-profile/114601, when
367 : -finstrument-functions-once is used and the function has no conditions. */
368 : unsigned
369 1852 : condition_uid (struct function *fn, basic_block b)
370 : {
371 1852 : gimple *stmt = gsi_stmt (gsi_last_bb (b));
372 1854 : if (!safe_is_a <gcond*> (stmt) || !fn->cond_uids)
373 : return 0;
374 :
375 623 : unsigned *v = fn->cond_uids->get (as_a <gcond*> (stmt));
376 623 : return v ? *v : 0;
377 : }
378 :
379 : /* Compute the masking table.
380 :
381 : Masking and short circuiting are deeply connected - masking occurs when
382 : control flow reaches a state that is also reachable with short circuiting.
383 : In fact, masking corresponds to short circuiting for the reversed
384 : expression. This means we can find the limits, the last term in preceeding
385 : subexpressions, by following the edges that short circuit to the same
386 : outcome. The algorithm treats the CFG as a reduced order binary decision
387 : diagram (see Randall E. Bryant's Graph Based Algorithms for Boolean
388 : Function Manipulation (1987)).
389 :
390 : In the simplest case a || b:
391 :
392 : a
393 : |\
394 : | b
395 : |/ \
396 : T F
397 :
398 : T has multiple incoming edges and is the outcome of a short circuit,
399 : with top = a, bot = b. The top node (a) is masked when the edge (b, T) is
400 : taken.
401 :
402 : The names "top" and "bot" refer to a pair of nodes with a shared
403 : successor. The top is always the node corresponding to the left-most
404 : operand of the two, and it holds that top < bot in a topological ordering.
405 :
406 : Now consider (a && b) || (c && d) and its masking table:
407 :
408 : a
409 : |\
410 : b \
411 : |\|
412 : | c
413 : | |\
414 : | d \
415 : |/ \|
416 : T F
417 :
418 : a[0] = {}
419 : a[1] = {}
420 : b[0] = {a}
421 : b[1] = {}
422 : c[0] = {}
423 : c[1] = {}
424 : d[0] = {c}
425 : d[1] = {a,b}
426 :
427 : Note that 0 and 1 are indices and not boolean values - a[0] is the index in
428 : the masking vector when a takes the true edge.
429 :
430 : b[0] and d[0] are identical to the a || b example, and d[1] is the bot in
431 : the triangle [d, b] -> T. b is the top node in the [d, b] relationship and
432 : last term in (a && b). To find the other terms masked we use the fact that
433 : all paths in an expression go through either of the outcomes, found by
434 : collecting all non-complex edges that go out of the expression (the
435 : neighborhood). In some cases the outgoing edge go through intermediate (or
436 : bypass) nodes, and we collect these paths too (see contract_edge_up).
437 :
438 : We find the terms by marking the outcomes (in this case c, T) and walk the
439 : predecessors starting at top (in this case b) and masking nodes when both
440 : successors are marked.
441 :
442 : The masking table is represented as two bitfields per term in the expression
443 : with the index corresponding to the term in the Boolean expression.
444 : a || b && c becomes the term vector [a b c] and the masking table [a[0]
445 : a[1] b[0] ...]. The kth bit of a masking vector is set if the kth term
446 : is masked by taking the edge.
447 :
448 : The out masks are in uint64_t (the practical maximum for gcov_type_node for
449 : any target) as it has to be big enough to store the target size gcov types
450 : independent of the host. */
451 : void
452 289 : masking_vectors (conds_ctx& ctx, array_slice<basic_block> blocks,
453 : array_slice<sbitmap> maps, array_slice<uint64_t> masks)
454 : {
455 289 : gcc_assert (blocks.is_valid ());
456 289 : gcc_assert (!blocks.empty ());
457 289 : gcc_assert (maps.is_valid ());
458 289 : gcc_assert (masks.is_valid ());
459 289 : gcc_assert (sizeof (masks[0]) * BITS_PER_UNIT >= CONDITIONS_MAX_TERMS);
460 :
461 289 : if (bitmap_count_bits (maps[0]) == 1)
462 : return;
463 :
464 94 : sbitmap marks = ctx.G1;
465 94 : const sbitmap core = maps[0];
466 94 : const sbitmap allg = maps[1];
467 94 : vec<basic_block>& queue = ctx.B1;
468 94 : vec<basic_block>& body = ctx.B2;
469 94 : const vec<int>& top_index = ctx.top_index;
470 :
471 : /* Set up for the iteration - include the outcome nodes in the traversal.
472 : The algorithm compares pairs of nodes and is not really sensitive to
473 : traversal order, but need to maintain topological order because the
474 : index of masking nodes maps to the index in the accumulators. We must
475 : also check the incoming-to-outcome pairs. These edges may in turn be
476 : split (this happens with labels on top of then/else blocks) so we must
477 : follow any single-in single-out path. The non-condition blocks do not
478 : have to be in order as they are non-condition blocks and will not be
479 : considered for the set-bit index. */
480 94 : body.truncate (0);
481 94 : body.reserve (blocks.size () + 2);
482 497 : for (const basic_block b : blocks)
483 403 : if (bitmap_bit_p (core, b->index))
484 361 : body.quick_push (b);
485 :
486 497 : for (basic_block b : blocks)
487 : {
488 403 : if (!bitmap_bit_p (core, b->index))
489 42 : continue;
490 :
491 1805 : for (edge e : b->succs)
492 : {
493 722 : if (e->flags & EDGE_COMPLEX)
494 0 : continue;
495 722 : if (bitmap_bit_p (allg, e->dest->index))
496 307 : continue;
497 415 : body.safe_push (e->dest);
498 :
499 : /* There may be multiple nodes between the condition edge and the
500 : actual outcome, and we need to know when these paths join to
501 : determine if there is short circuit/masking. This is
502 : effectively creating a virtual edge from the condition node to
503 : the real outcome. */
504 1565 : while (!(e->flags & EDGE_DFS_BACK) && single_p (e->dest->succs))
505 : {
506 735 : e = single_edge (e->dest->succs);
507 735 : body.safe_push (e->dest);
508 : }
509 : }
510 : }
511 :
512 : /* Find the masking. The leftmost element cannot mask anything, so
513 : start at 1. */
514 3022 : for (size_t i = 1; i != body.length (); i++)
515 : {
516 1417 : const basic_block b = body[i];
517 10717 : for (edge e1 : b->preds)
518 287044 : for (edge e2 : b->preds)
519 : {
520 267646 : if (e1 == e2)
521 137624 : continue;
522 261180 : if ((e1->flags | e2->flags) & EDGE_COMPLEX)
523 0 : continue;
524 :
525 261180 : edge etop = contract_edge_up (e1);
526 261180 : edge ebot = contract_edge_up (e2);
527 261180 : gcc_assert (etop != ebot);
528 :
529 261180 : const basic_block top = etop->src;
530 261180 : const basic_block bot = ebot->src;
531 261180 : const unsigned cond = etop->flags & ebot->flags & EDGE_CONDITION;
532 261180 : if (!cond)
533 996 : continue;
534 260184 : if (top_index[top->index] > top_index[bot->index])
535 130092 : continue;
536 130092 : if (!bitmap_bit_p (core, top->index))
537 50 : continue;
538 130042 : if (!bitmap_bit_p (core, bot->index))
539 20 : continue;
540 :
541 130022 : outcomes out = conditional_succs (top);
542 130022 : gcc_assert (out);
543 130022 : bitmap_clear (marks);
544 130022 : bitmap_set_bit (marks, out.t->index);
545 130022 : bitmap_set_bit (marks, out.f->index);
546 130022 : queue.truncate (0);
547 130022 : queue.safe_push (top);
548 :
549 : // The edge bot -> outcome triggers the masking
550 260044 : const int m = 2*index_of (bot, body) + condition_index (cond);
551 130022 : gcc_assert (m >= 0);
552 3058571 : while (!queue.is_empty ())
553 : {
554 2798527 : basic_block q = queue.pop ();
555 : /* q may have been processed & completed by being added to the
556 : queue multiple times, so check that there is still work to
557 : do before continuing. */
558 2798527 : if (bitmap_bit_p (marks, q->index))
559 985 : continue;
560 :
561 2798282 : outcomes succs = conditional_succs (q);
562 2798282 : if (!bitmap_bit_p (marks, succs.t->index))
563 639 : continue;
564 2797643 : if (!bitmap_bit_p (marks, succs.f->index))
565 101 : continue;
566 :
567 5595084 : const int index = index_of (q, body);
568 2797542 : gcc_assert (index != -1);
569 2797542 : masks[m] |= uint64_t (1) << index;
570 2797542 : bitmap_set_bit (marks, q->index);
571 :
572 11190897 : for (edge e : q->preds)
573 : {
574 2798271 : e = contract_edge_up (e);
575 2798271 : if (e->flags & EDGE_DFS_BACK)
576 13 : continue;
577 2798258 : if (bitmap_bit_p (marks, e->src->index))
578 5 : continue;
579 2798253 : if (!bitmap_bit_p (core, e->src->index))
580 129748 : continue;
581 2668505 : queue.safe_push (e->src);
582 : }
583 : }
584 : }
585 : }
586 : }
587 :
588 : /* Emit LHS = RHS on edges. This is just a short hand that automates the
589 : building of the assign and immediately puts it on the edge, which becomes
590 : noisy. */
591 : tree
592 3216 : emit_assign (edge e, tree lhs, tree rhs)
593 : {
594 1608 : gassign *w = gimple_build_assign (lhs, rhs);
595 3216 : gsi_insert_on_edge (e, w);
596 3216 : return lhs;
597 : }
598 :
599 : /* Emit lhs = RHS on edges. The lhs is created. */
600 : tree
601 1608 : emit_assign (edge e, tree rhs)
602 : {
603 1608 : return emit_assign (e, make_ssa_name (gcov_type_node), rhs);
604 : }
605 :
606 : /* Emit LHS = OP1 <OP> OP2 on edges. */
607 : tree
608 5406 : emit_bitwise_op (edge e, tree op1, tree_code op, tree op2 = NULL_TREE)
609 : {
610 5406 : tree lhs = make_ssa_name (gcov_type_node);
611 5406 : gassign *w = gimple_build_assign (lhs, op, op1, op2);
612 5406 : gsi_insert_on_edge (e, w);
613 5406 : return lhs;
614 : }
615 :
616 : /* Visitor for make_top_index. */
617 : void
618 4147 : make_top_index_visit (basic_block b, vec<basic_block>& L, vec<int>& marks)
619 : {
620 4147 : if (marks[b->index])
621 : return;
622 :
623 : /* Follow the false edge first, if it exists, so that true paths are given
624 : the lower index in the ordering. Any iteration order
625 : would yield a valid and useful topological ordering, but making sure the
626 : true branch has the lower index first makes reporting work better for
627 : expressions with ternaries. Walk the false branch first because the
628 : array will be reversed to finalize the topological order.
629 :
630 : With the wrong ordering (a ? b : c) && d could become [a c b d], but the
631 : (expected) order is really [a b c d]. */
632 :
633 1852 : const unsigned false_fwd = EDGE_DFS_BACK | EDGE_FALSE_VALUE;
634 7585 : for (edge e : b->succs)
635 2341 : if ((e->flags & false_fwd) == EDGE_FALSE_VALUE)
636 624 : make_top_index_visit (e->dest, L, marks);
637 :
638 7585 : for (edge e : b->succs)
639 2341 : if (!(e->flags & false_fwd))
640 1671 : make_top_index_visit (e->dest, L, marks);
641 :
642 1852 : marks[b->index] = 1;
643 1852 : L.quick_push (b);
644 : }
645 :
646 : /* Find a topological sorting of the blocks in a function so that left operands
647 : are before right operands including subexpressions. Sorting on block index
648 : does not guarantee this property and the syntactical order of terms is very
649 : important to the condition coverage. The sorting algorithm is from Cormen
650 : et al (2001) but with back-edges ignored and thus there is no need for
651 : temporary marks (for cycle detection). The L argument is a buffer/working
652 : memory, and the output will be written to TOP_INDEX.
653 :
654 : For the expression (a || (b && c) || d) the blocks should be [a b c d]. */
655 : void
656 156 : make_top_index (array_slice<basic_block> blocks, vec<basic_block>& L,
657 : vec<int>& top_index)
658 : {
659 156 : L.truncate (0);
660 156 : L.reserve (blocks.size ());
661 :
662 : /* Use of the output map as a temporary for tracking visited status. */
663 156 : top_index.truncate (0);
664 156 : top_index.safe_grow_cleared (blocks.size ());
665 2008 : for (const basic_block b : blocks)
666 1852 : make_top_index_visit (b, L, top_index);
667 :
668 : /* Insert canaries - if there are unreachable nodes (for example infinite
669 : loops) then the unreachable nodes should never be needed for comparison,
670 : and L.length () < max_index. An index mapping should also never be
671 : recorded twice. */
672 4016 : for (unsigned i = 0; i != top_index.length (); i++)
673 1852 : top_index[i] = -1;
674 :
675 312 : gcc_assert (blocks.size () == L.length ());
676 156 : L.reverse ();
677 156 : const unsigned nblocks = L.length ();
678 2008 : for (unsigned i = 0; i != nblocks; i++)
679 : {
680 1852 : gcc_assert (L[i]->index != -1);
681 1852 : top_index[L[i]->index] = int (i);
682 : }
683 156 : }
684 :
685 : /* Find all nodes including non-conditions in a Boolean expression. We need to
686 : know the paths through the expression so that the masking and
687 : instrumentation phases can limit searches and know what subgraphs must be
688 : threaded through, but not counted, such as the (b || c) in
689 : a && fn (b || c) && d.
690 :
691 : It is essentially the intersection of downwards paths from the expression
692 : nodes EXPR to the post-dominator and upwards from the post-dominator.
693 : Finding the dominator is slightly more involved than picking the first/last,
694 : particularly under optimization, because both incoming and outgoing paths
695 : may have multiple entries/exits.
696 :
697 : It is assumed GRAPH is an array_slice of the basic blocks of this function
698 : sorted by the basic block index. */
699 : vec<basic_block>&
700 289 : paths_between (conds_ctx &ctx, array_slice<basic_block> graph,
701 : const vec<basic_block>& expr)
702 : {
703 289 : if (expr.length () == 1)
704 : {
705 195 : ctx.blocks.truncate (0);
706 195 : ctx.blocks.safe_push (expr[0]);
707 195 : return ctx.blocks;
708 : }
709 :
710 94 : basic_block dom;
711 94 : sbitmap up = ctx.G1;
712 94 : sbitmap down = ctx.G2;
713 94 : sbitmap paths = ctx.G3;
714 94 : vec<basic_block>& queue = ctx.B1;
715 :
716 94 : queue.truncate (0);
717 94 : bitmap_clear (down);
718 94 : dom = get_immediate_dominator (CDI_POST_DOMINATORS, expr[0]);
719 643 : for (basic_block b : expr)
720 361 : if (dom != b)
721 361 : dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, b);
722 94 : queue.safe_splice (expr);
723 1566 : while (!queue.is_empty ())
724 : {
725 1378 : basic_block b = queue.pop ();
726 1378 : if (!bitmap_set_bit (down, b->index))
727 656 : continue;
728 722 : if (b == dom)
729 94 : continue;
730 2913 : for (edge e : b->succs)
731 1029 : if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
732 1017 : queue.safe_push (e->dest);
733 : }
734 :
735 94 : queue.truncate (0);
736 94 : bitmap_clear (up);
737 94 : dom = expr[0];
738 455 : for (basic_block b : expr)
739 361 : if (dom != b)
740 267 : dom = nearest_common_dominator (CDI_DOMINATORS, dom, b);
741 94 : queue.safe_splice (expr);
742 917 : while (!queue.is_empty ())
743 : {
744 729 : basic_block b = queue.pop ();
745 729 : if (!bitmap_set_bit (up, b->index))
746 320 : continue;
747 409 : if (b == dom)
748 94 : continue;
749 1315 : for (edge e : b->preds)
750 370 : if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
751 368 : queue.safe_push (e->src);
752 : }
753 :
754 94 : bitmap_and (paths, up, down);
755 94 : vec<basic_block>& blocks = ctx.blocks;
756 94 : blocks.truncate (0);
757 94 : blocks.reserve (graph.size ());
758 94 : sbitmap_iterator itr;
759 94 : unsigned index;
760 591 : EXECUTE_IF_SET_IN_BITMAP (paths, 0, index, itr)
761 403 : blocks.quick_push (graph[index]);
762 : return blocks;
763 : }
764 :
765 : }
766 :
767 : /* Context object for the condition coverage. This stores conds_ctx (the
768 : buffers reused when analyzing the cfg) and the output arrays. This is
769 : designed to be heap allocated and aggressively preallocates large buffers to
770 : avoid having to reallocate for most programs. */
771 : struct condcov
772 : {
773 156 : explicit condcov (unsigned nblocks) noexcept (true) : ctx (nblocks),
774 156 : m_maps (sbitmap_vector_alloc (2 * nblocks, nblocks))
775 : {
776 156 : bitmap_vector_clear (m_maps, 2 * nblocks);
777 156 : }
778 : auto_vec<size_t, 128> m_index;
779 : auto_vec<basic_block, 256> m_blocks;
780 : auto_vec<uint64_t, 512> m_masks;
781 : conds_ctx ctx;
782 : sbitmap *m_maps;
783 : };
784 :
785 : /* Get the length, that is the number of Boolean expression found. cov_length
786 : is the one-past index for cov_{blocks,masks,maps}. */
787 : size_t
788 312 : cov_length (const struct condcov* cov)
789 : {
790 312 : if (cov->m_index.is_empty ())
791 : return 0;
792 312 : return cov->m_index.length () - 1;
793 : }
794 :
795 : /* The subgraph, exluding intermediates, for the nth Boolean expression. */
796 : array_slice<basic_block>
797 578 : cov_blocks (struct condcov* cov, size_t n)
798 : {
799 578 : if (n >= cov->m_index.length ())
800 0 : return array_slice<basic_block>::invalid ();
801 :
802 1156 : basic_block *begin = cov->m_blocks.begin () + cov->m_index[n];
803 578 : basic_block *end = cov->m_blocks.begin () + cov->m_index[n + 1];
804 578 : return array_slice<basic_block> (begin, end - begin);
805 : }
806 :
807 : /* The masks for the nth Boolean expression. */
808 : array_slice<uint64_t>
809 578 : cov_masks (struct condcov* cov, size_t n)
810 : {
811 578 : if (n >= cov->m_index.length ())
812 0 : return array_slice<uint64_t>::invalid ();
813 :
814 1156 : uint64_t *begin = cov->m_masks.begin () + 2*cov->m_index[n];
815 578 : uint64_t *end = cov->m_masks.begin () + 2*cov->m_index[n + 1];
816 578 : return array_slice<uint64_t> (begin, end - begin);
817 : }
818 :
819 : /* The maps for the nth Boolean expression. */
820 : array_slice<sbitmap>
821 578 : cov_maps (struct condcov* cov, size_t n)
822 : {
823 578 : if (n >= cov->m_index.length ())
824 0 : return array_slice<sbitmap>::invalid ();
825 :
826 578 : sbitmap *begin = cov->m_maps + 2*n;
827 578 : sbitmap *end = begin + 2;
828 578 : return array_slice<sbitmap> (begin, end - begin);
829 : }
830 :
831 : /* Deleter for condcov. */
832 : void
833 156 : cov_free (struct condcov* cov)
834 : {
835 156 : sbitmap_vector_free (cov->m_maps);
836 156 : delete cov;
837 156 : }
838 :
839 : /* Condition coverage (MC/DC)
840 :
841 : Whalen, Heimdahl, De Silva in "Efficient Test Coverage Measurement for
842 : MC/DC" describe an algorithm for modified condition/decision coverage based
843 : on AST analysis. This algorithm does analyzes the control flow graph
844 : (interpreted as a binary decision diagram) to determine the masking vectors.
845 : The individual phases are described in more detail closer to the
846 : implementation.
847 :
848 : The coverage only considers the positions, not the symbols, in a
849 : conditional, e.g. !A || (!B && A) is a 3-term conditional even though A
850 : appears twice. Subexpressions have no effect on term ordering:
851 : (a && (b || (c && d)) || e) comes out as [a b c d e]. Functions whose
852 : arguments are Boolean expressions are treated as separate expressions, that
853 : is, a && fn (b || c) && d is treated as [a _fn d] and [b c], not [a b c d].
854 :
855 : The output for gcov is a vector of pairs of unsigned integers, interpreted
856 : as bit-sets, where the bit index corresponds to the index of the condition
857 : in the expression.
858 :
859 : The returned condcov should be released by the caller with cov_free. */
860 : struct condcov*
861 156 : find_conditions (struct function *fn)
862 : {
863 156 : mark_dfs_back_edges (fn);
864 156 : const bool have_dom = dom_info_available_p (fn, CDI_DOMINATORS);
865 156 : const bool have_post_dom = dom_info_available_p (fn, CDI_POST_DOMINATORS);
866 156 : if (!have_dom)
867 153 : calculate_dominance_info (CDI_DOMINATORS);
868 156 : if (!have_post_dom)
869 156 : calculate_dominance_info (CDI_POST_DOMINATORS);
870 :
871 156 : const unsigned nblocks = n_basic_blocks_for_fn (fn);
872 156 : basic_block *fnblocksp = basic_block_info_for_fn (fn)->address ();
873 156 : condcov *cov = new condcov (nblocks);
874 156 : conds_ctx& ctx = cov->ctx;
875 156 : array_slice<basic_block> fnblocks (fnblocksp, nblocks);
876 156 : make_top_index (fnblocks, ctx.B1, ctx.top_index);
877 :
878 : /* Bin the Boolean expressions so that exprs[id] -> [x1, x2, ...]. */
879 156 : hash_map<int_hash<unsigned, 0>, auto_vec<basic_block>> exprs;
880 2008 : for (basic_block b : fnblocks)
881 : {
882 1852 : const unsigned uid = condition_uid (fn, b);
883 1852 : if (uid == 0)
884 1231 : continue;
885 621 : exprs.get_or_insert (uid).safe_push (b);
886 : }
887 :
888 : /* Visit all reachable nodes and collect conditions. Topological order is
889 : important so the first node of a boolean expression is visited first
890 : (it will mark subsequent terms). */
891 156 : cov->m_index.safe_push (0);
892 736 : for (auto expr : exprs)
893 : {
894 290 : vec<basic_block>& conds = expr.second;
895 580 : if (conds.length () > CONDITIONS_MAX_TERMS)
896 : {
897 2 : location_t loc = gimple_location (gsi_stmt (gsi_last_bb (conds[0])));
898 1 : warning_at (loc, OPT_Wcoverage_too_many_conditions,
899 : "Too many conditions (found %u); giving up coverage",
900 : conds.length ());
901 1 : continue;
902 1 : }
903 289 : conds.sort (topological_cmp, &ctx.top_index);
904 289 : vec<basic_block>& subgraph = paths_between (ctx, fnblocks, conds);
905 289 : subgraph.sort (topological_cmp, &ctx.top_index);
906 289 : const unsigned index = cov->m_index.length () - 1;
907 289 : sbitmap condm = cov->m_maps[0 + 2*index];
908 289 : sbitmap subgm = cov->m_maps[1 + 2*index];
909 1423 : for (basic_block b : conds)
910 556 : bitmap_set_bit (condm, b->index);
911 1465 : for (basic_block b : subgraph)
912 598 : bitmap_set_bit (subgm, b->index);
913 289 : cov->m_blocks.safe_splice (subgraph);
914 578 : cov->m_index.safe_push (cov->m_blocks.length ());
915 : }
916 :
917 156 : if (!have_dom)
918 153 : free_dominance_info (fn, CDI_DOMINATORS);
919 156 : if (!have_post_dom)
920 156 : free_dominance_info (fn, CDI_POST_DOMINATORS);
921 :
922 156 : cov->m_masks.safe_grow_cleared (2 * cov->m_index.last ());
923 156 : const size_t length = cov_length (cov);
924 445 : for (size_t i = 0; i != length; i++)
925 289 : masking_vectors (ctx, cov_blocks (cov, i), cov_maps (cov, i),
926 : cov_masks (cov, i));
927 :
928 156 : return cov;
929 156 : }
930 :
931 : namespace
932 : {
933 :
934 : /* Stores the incoming edge and previous counters (in SSA form) on that edge
935 : for the node e->deston that edge for the node e->dest. The counters record
936 : the seen-true (0), seen-false (1), and current-mask (2). They are stored in
937 : an array rather than proper members for access-by-index as the code paths
938 : tend to be identical for the different counters. */
939 : struct counters
940 : {
941 : edge e;
942 : tree counter[3];
943 5785 : tree& operator [] (size_t i) { return counter[i]; }
944 : };
945 :
946 : /* Find the counters for the incoming edge e, or NULL if the edge has not been
947 : recorded (could be for complex incoming edges). */
948 : counters*
949 1122 : find_counters (vec<counters>& candidates, edge e)
950 : {
951 5807 : for (counters& candidate : candidates)
952 3557 : if (candidate.e == e)
953 : return &candidate;
954 : return NULL;
955 : }
956 :
957 : /* Resolve the SSA for a specific counter KIND. If it is not modified by any
958 : incoming edges, simply forward it, otherwise create a phi node of all the
959 : candidate counters and return it. */
960 : tree
961 1794 : resolve_counter (vec<counters>& cands, size_t kind)
962 : {
963 1794 : gcc_assert (!cands.is_empty ());
964 1794 : gcc_assert (kind < 3);
965 :
966 1794 : counters& fst = cands[0];
967 :
968 1794 : if (!fst.e || fst.e->dest->preds->length () == 1)
969 : {
970 1638 : gcc_assert (cands.length () == 1);
971 1638 : return fst[kind];
972 : }
973 :
974 156 : tree zero0 = build_int_cst (gcov_type_node, 0);
975 156 : tree ssa = make_ssa_name (gcov_type_node);
976 156 : gphi *phi = create_phi_node (ssa, fst.e->dest);
977 783 : for (edge e : fst.e->dest->preds)
978 : {
979 315 : counters *prev = find_counters (cands, e);
980 315 : if (prev)
981 309 : add_phi_arg (phi, (*prev)[kind], e, UNKNOWN_LOCATION);
982 : else
983 : {
984 6 : tree zero = make_ssa_name (gcov_type_node);
985 6 : gimple_stmt_iterator gsi = gsi_after_labels (e->src);
986 6 : gassign *set = gimple_build_assign (zero, zero0);
987 6 : gsi_insert_before (&gsi, set, GSI_NEW_STMT);
988 6 : add_phi_arg (phi, zero, e, UNKNOWN_LOCATION);
989 : }
990 : }
991 : return ssa;
992 : }
993 :
994 : /* Resolve all the counters for a node. Note that the edge is undefined, as
995 : the counters are intended to form the base to push to the successors, and
996 : because the is only meaningful for nodes with a single predecessor. */
997 : counters
998 598 : resolve_counters (vec<counters>& cands)
999 : {
1000 598 : counters next;
1001 598 : next[0] = resolve_counter (cands, 0);
1002 598 : next[1] = resolve_counter (cands, 1);
1003 598 : next[2] = resolve_counter (cands, 2);
1004 598 : return next;
1005 : }
1006 :
1007 : }
1008 :
1009 : /* Add instrumentation to a decision subgraph. EXPR should be the
1010 : (topologically sorted) block of nodes returned by cov_blocks, MAPS the
1011 : bitmaps returned by cov_maps, and MASKS the block of bitsets returned by
1012 : cov_masks. CONDNO should be the index of this condition in the function,
1013 : i.e. the same argument given to cov_{masks,graphs}. EXPR may contain nodes
1014 : in-between the conditions, e.g. when an operand contains a function call,
1015 : or there is a setjmp and the cfg is filled with complex edges.
1016 :
1017 : Every node is annotated with three counters; the true, false, and mask
1018 : value. First, walk the graph and determine what if there are multiple
1019 : possible values for either accumulator depending on the path taken, in which
1020 : case a phi node is created and registered as the accumulator. Then, those
1021 : values are pushed as accumulators to the immediate successors. For some
1022 : very particular programs there may be multiple paths into the expression
1023 : (e.g. when prior terms are determined by a surrounding conditional) in which
1024 : case the default zero-counter is pushed, otherwise all predecessors will
1025 : have been considered before the successor because of topologically ordered
1026 : traversal. Finally, expr is traversed again to look for edges to the
1027 : outcomes, that is, edges with a destination outside of expr, and the local
1028 : accumulators are flushed to the global gcov counters on these edges. In
1029 : some cases there are edge splits that cause 3+ edges to the two outcome
1030 : nodes.
1031 :
1032 : If a complex edge is taken (e.g. on a longjmp) the accumulators are
1033 : attempted poisoned so that there would be no change to the global counters,
1034 : but this has proven unreliable in the presence of undefined behavior, see
1035 : the setjmp003 test.
1036 :
1037 : It is important that the flushes happen on the basic condition outgoing
1038 : edge, otherwise flushes could be lost to exception handling or other
1039 : abnormal control flow. */
1040 : size_t
1041 289 : instrument_decisions (array_slice<basic_block> expr, size_t condno,
1042 : array_slice<sbitmap> maps, array_slice<uint64_t> masks)
1043 : {
1044 289 : tree zero = build_int_cst (gcov_type_node, 0);
1045 289 : tree poison = build_int_cst (gcov_type_node, ~0ULL);
1046 289 : const sbitmap core = maps[0];
1047 289 : const sbitmap allg = maps[1];
1048 :
1049 289 : hash_map<basic_block, vec<counters>> table;
1050 289 : counters zerocounter;
1051 289 : zerocounter.e = NULL;
1052 289 : zerocounter[0] = zero;
1053 289 : zerocounter[1] = zero;
1054 289 : zerocounter[2] = zero;
1055 :
1056 289 : unsigned xi = 0;
1057 289 : bool increment = false;
1058 289 : tree rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1059 887 : for (basic_block current : expr)
1060 : {
1061 598 : vec<counters>& candidates = table.get_or_insert (current);
1062 598 : if (candidates.is_empty ())
1063 290 : candidates.safe_push (zerocounter);
1064 598 : counters prev = resolve_counters (candidates);
1065 :
1066 598 : if (increment)
1067 : {
1068 267 : xi += 1;
1069 267 : gcc_checking_assert (xi < sizeof (uint64_t) * BITS_PER_UNIT);
1070 267 : rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1071 267 : increment = false;
1072 : }
1073 :
1074 2962 : for (edge e : current->succs)
1075 : {
1076 1168 : counters next = prev;
1077 1168 : next.e = e;
1078 :
1079 1168 : if (bitmap_bit_p (core, e->src->index) && (e->flags & EDGE_CONDITION))
1080 : {
1081 1112 : const int k = condition_index (e->flags);
1082 1112 : next[k] = emit_bitwise_op (e, prev[k], BIT_IOR_EXPR, rhs);
1083 1112 : if (masks[2*xi + k])
1084 : {
1085 265 : tree m = build_int_cst (gcov_type_node, masks[2*xi + k]);
1086 265 : next[2] = emit_bitwise_op (e, prev[2], BIT_IOR_EXPR, m);
1087 : }
1088 : increment = true;
1089 : }
1090 56 : else if (e->flags & EDGE_COMPLEX)
1091 : {
1092 : /* A complex edge has been taken - wipe the accumulators and
1093 : poison the mask so that this path does not contribute to
1094 : coverage. */
1095 2 : next[0] = poison;
1096 2 : next[1] = poison;
1097 2 : next[2] = poison;
1098 : }
1099 1168 : table.get_or_insert (e->dest).safe_push (next);
1100 : }
1101 : }
1102 :
1103 : /* Since this is also the return value, the number of conditions, make sure
1104 : to include the increment of the last basic block. */
1105 289 : if (increment)
1106 289 : xi += 1;
1107 :
1108 289 : gcc_assert (xi == bitmap_count_bits (core));
1109 :
1110 289 : const tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1111 289 : const bool atomic = flag_profile_update == PROFILE_UPDATE_ATOMIC;
1112 289 : const tree atomic_ior = builtin_decl_explicit
1113 289 : (TYPE_PRECISION (gcov_type_node) > 32
1114 : ? BUILT_IN_ATOMIC_FETCH_OR_8
1115 : : BUILT_IN_ATOMIC_FETCH_OR_4);
1116 :
1117 : /* Flush to the gcov accumulators. */
1118 887 : for (const basic_block b : expr)
1119 : {
1120 598 : if (!bitmap_bit_p (core, b->index))
1121 42 : continue;
1122 :
1123 2780 : for (edge e : b->succs)
1124 : {
1125 : /* Flush the accumulators on leaving the Boolean function. The
1126 : destination may be inside the function only when it returns to
1127 : the loop header, such as do { ... } while (x); */
1128 1112 : if (bitmap_bit_p (allg, e->dest->index)) {
1129 309 : if (!(e->flags & EDGE_DFS_BACK))
1130 305 : continue;
1131 4 : if (e->dest != expr[0])
1132 0 : continue;
1133 : }
1134 :
1135 807 : vec<counters> *cands = table.get (e->dest);
1136 807 : gcc_assert (cands);
1137 807 : counters *prevp = find_counters (*cands, e);
1138 807 : gcc_assert (prevp);
1139 807 : counters prev = *prevp;
1140 :
1141 : /* _true &= ~mask, _false &= ~mask */
1142 807 : counters next;
1143 807 : next[2] = emit_bitwise_op (e, prev[2], BIT_NOT_EXPR);
1144 807 : next[0] = emit_bitwise_op (e, prev[0], BIT_AND_EXPR, next[2]);
1145 807 : next[1] = emit_bitwise_op (e, prev[1], BIT_AND_EXPR, next[2]);
1146 :
1147 : /* _global_true |= _true, _global_false |= _false */
1148 2421 : for (size_t k = 0; k != 2; ++k)
1149 : {
1150 1614 : tree ref = tree_coverage_counter_ref (GCOV_COUNTER_CONDS,
1151 : 2*condno + k);
1152 1614 : if (atomic)
1153 : {
1154 6 : ref = unshare_expr (ref);
1155 6 : gcall *flush = gimple_build_call (atomic_ior, 3,
1156 : build_addr (ref),
1157 6 : next[k], relaxed);
1158 6 : gsi_insert_on_edge (e, flush);
1159 : }
1160 : else
1161 : {
1162 1608 : tree get = emit_assign (e, ref);
1163 1608 : tree put = emit_bitwise_op (e, next[k], BIT_IOR_EXPR, get);
1164 1608 : emit_assign (e, unshare_expr (ref), put);
1165 : }
1166 : }
1167 : }
1168 : }
1169 :
1170 289 : return xi;
1171 289 : }
1172 :
1173 : #undef CONDITIONS_MAX_TERMS
1174 : #undef EDGE_CONDITION
1175 :
1176 : /* Do initialization work for the edge profiler. */
1177 :
1178 : /* Add code:
1179 : __thread gcov* __gcov_indirect_call.counters; // pointer to actual counter
1180 : __thread void* __gcov_indirect_call.callee; // actual callee address
1181 : __thread int __gcov_function_counter; // time profiler function counter
1182 : */
1183 : static void
1184 424 : init_ic_make_global_vars (void)
1185 : {
1186 424 : tree gcov_type_ptr;
1187 :
1188 424 : gcov_type_ptr = build_pointer_type (get_gcov_type ());
1189 :
1190 424 : tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE);
1191 :
1192 : /* callee */
1193 424 : ic_tuple_callee_field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
1194 : ptr_type_node);
1195 :
1196 : /* counters */
1197 424 : ic_tuple_counters_field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
1198 : NULL_TREE, gcov_type_ptr);
1199 424 : DECL_CHAIN (ic_tuple_counters_field) = ic_tuple_callee_field;
1200 :
1201 424 : finish_builtin_struct (tuple_type, "indirect_call_tuple",
1202 : ic_tuple_counters_field, NULL_TREE);
1203 :
1204 424 : ic_tuple_var
1205 424 : = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1206 : get_identifier ("__gcov_indirect_call"), tuple_type);
1207 424 : TREE_PUBLIC (ic_tuple_var) = 1;
1208 424 : DECL_ARTIFICIAL (ic_tuple_var) = 1;
1209 424 : DECL_INITIAL (ic_tuple_var) = NULL;
1210 424 : DECL_EXTERNAL (ic_tuple_var) = 1;
1211 424 : if (targetm.have_tls)
1212 424 : set_decl_tls_model (ic_tuple_var, decl_default_tls_model (ic_tuple_var));
1213 424 : }
1214 :
1215 : /* Create the type and function decls for the interface with gcov. */
1216 :
1217 : void
1218 2503 : gimple_init_gcov_profiler (void)
1219 : {
1220 2503 : tree interval_profiler_fn_type;
1221 2503 : tree pow2_profiler_fn_type;
1222 2503 : tree topn_values_profiler_fn_type;
1223 2503 : tree gcov_type_ptr;
1224 2503 : tree ic_profiler_fn_type;
1225 2503 : tree average_profiler_fn_type;
1226 2503 : const char *fn_name;
1227 :
1228 2503 : if (!gcov_type_node)
1229 : {
1230 409 : const char *fn_suffix
1231 424 : = flag_profile_update == PROFILE_UPDATE_ATOMIC ? "_atomic" : "";
1232 :
1233 424 : gcov_type_node = get_gcov_type ();
1234 424 : gcov_type_ptr = build_pointer_type (gcov_type_node);
1235 :
1236 : /* void (*) (gcov_type *, gcov_type, int, unsigned) */
1237 424 : interval_profiler_fn_type
1238 424 : = build_function_type_list (void_type_node,
1239 : gcov_type_ptr, gcov_type_node,
1240 : integer_type_node,
1241 : unsigned_type_node, NULL_TREE);
1242 424 : fn_name = concat ("__gcov_interval_profiler", fn_suffix, NULL);
1243 424 : tree_interval_profiler_fn = build_fn_decl (fn_name,
1244 : interval_profiler_fn_type);
1245 424 : free (const_cast<char *> (fn_name));
1246 424 : TREE_NOTHROW (tree_interval_profiler_fn) = 1;
1247 424 : DECL_ATTRIBUTES (tree_interval_profiler_fn)
1248 424 : = tree_cons (get_identifier ("leaf"), NULL,
1249 424 : DECL_ATTRIBUTES (tree_interval_profiler_fn));
1250 :
1251 : /* void (*) (gcov_type *, gcov_type) */
1252 424 : pow2_profiler_fn_type
1253 424 : = build_function_type_list (void_type_node,
1254 : gcov_type_ptr, gcov_type_node,
1255 : NULL_TREE);
1256 424 : fn_name = concat ("__gcov_pow2_profiler", fn_suffix, NULL);
1257 424 : tree_pow2_profiler_fn = build_fn_decl (fn_name, pow2_profiler_fn_type);
1258 424 : free (const_cast<char *> (fn_name));
1259 424 : TREE_NOTHROW (tree_pow2_profiler_fn) = 1;
1260 424 : DECL_ATTRIBUTES (tree_pow2_profiler_fn)
1261 424 : = tree_cons (get_identifier ("leaf"), NULL,
1262 424 : DECL_ATTRIBUTES (tree_pow2_profiler_fn));
1263 :
1264 : /* void (*) (gcov_type *, gcov_type) */
1265 424 : topn_values_profiler_fn_type
1266 424 : = build_function_type_list (void_type_node,
1267 : gcov_type_ptr, gcov_type_node,
1268 : NULL_TREE);
1269 424 : fn_name = concat ("__gcov_topn_values_profiler", fn_suffix, NULL);
1270 424 : tree_topn_values_profiler_fn
1271 424 : = build_fn_decl (fn_name, topn_values_profiler_fn_type);
1272 424 : free (const_cast<char *> (fn_name));
1273 :
1274 424 : TREE_NOTHROW (tree_topn_values_profiler_fn) = 1;
1275 424 : DECL_ATTRIBUTES (tree_topn_values_profiler_fn)
1276 424 : = tree_cons (get_identifier ("leaf"), NULL,
1277 424 : DECL_ATTRIBUTES (tree_topn_values_profiler_fn));
1278 :
1279 424 : init_ic_make_global_vars ();
1280 :
1281 : /* void (*) (gcov_type, void *) */
1282 424 : ic_profiler_fn_type
1283 424 : = build_function_type_list (void_type_node,
1284 : gcov_type_node,
1285 : ptr_type_node,
1286 : NULL_TREE);
1287 424 : fn_name = concat ("__gcov_indirect_call_profiler_v4", fn_suffix, NULL);
1288 424 : tree_indirect_call_profiler_fn
1289 424 : = build_fn_decl (fn_name, ic_profiler_fn_type);
1290 424 : free (const_cast<char *> (fn_name));
1291 :
1292 424 : TREE_NOTHROW (tree_indirect_call_profiler_fn) = 1;
1293 424 : DECL_ATTRIBUTES (tree_indirect_call_profiler_fn)
1294 424 : = tree_cons (get_identifier ("leaf"), NULL,
1295 424 : DECL_ATTRIBUTES (tree_indirect_call_profiler_fn));
1296 :
1297 424 : tree_time_profiler_counter
1298 424 : = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1299 : get_identifier ("__gcov_time_profiler_counter"),
1300 : get_gcov_type ());
1301 424 : TREE_PUBLIC (tree_time_profiler_counter) = 1;
1302 424 : DECL_EXTERNAL (tree_time_profiler_counter) = 1;
1303 424 : TREE_STATIC (tree_time_profiler_counter) = 1;
1304 424 : DECL_ARTIFICIAL (tree_time_profiler_counter) = 1;
1305 424 : DECL_INITIAL (tree_time_profiler_counter) = NULL;
1306 :
1307 : /* void (*) (gcov_type *, gcov_type) */
1308 424 : average_profiler_fn_type
1309 424 : = build_function_type_list (void_type_node,
1310 : gcov_type_ptr, gcov_type_node, NULL_TREE);
1311 424 : fn_name = concat ("__gcov_average_profiler", fn_suffix, NULL);
1312 424 : tree_average_profiler_fn = build_fn_decl (fn_name,
1313 : average_profiler_fn_type);
1314 424 : free (const_cast<char *> (fn_name));
1315 424 : TREE_NOTHROW (tree_average_profiler_fn) = 1;
1316 424 : DECL_ATTRIBUTES (tree_average_profiler_fn)
1317 424 : = tree_cons (get_identifier ("leaf"), NULL,
1318 424 : DECL_ATTRIBUTES (tree_average_profiler_fn));
1319 424 : fn_name = concat ("__gcov_ior_profiler", fn_suffix, NULL);
1320 424 : tree_ior_profiler_fn = build_fn_decl (fn_name, average_profiler_fn_type);
1321 424 : free (const_cast<char *> (fn_name));
1322 424 : TREE_NOTHROW (tree_ior_profiler_fn) = 1;
1323 424 : DECL_ATTRIBUTES (tree_ior_profiler_fn)
1324 424 : = tree_cons (get_identifier ("leaf"), NULL,
1325 424 : DECL_ATTRIBUTES (tree_ior_profiler_fn));
1326 :
1327 : /* LTO streamer needs assembler names. Because we create these decls
1328 : late, we need to initialize them by hand. */
1329 424 : DECL_ASSEMBLER_NAME (tree_interval_profiler_fn);
1330 424 : DECL_ASSEMBLER_NAME (tree_pow2_profiler_fn);
1331 424 : DECL_ASSEMBLER_NAME (tree_topn_values_profiler_fn);
1332 424 : DECL_ASSEMBLER_NAME (tree_indirect_call_profiler_fn);
1333 424 : DECL_ASSEMBLER_NAME (tree_average_profiler_fn);
1334 424 : DECL_ASSEMBLER_NAME (tree_ior_profiler_fn);
1335 : }
1336 2503 : }
1337 :
1338 : /* If RESULT is not null, then output instructions as GIMPLE trees to assign
1339 : the updated counter from CALL of FUNC to RESULT. Insert the CALL and the
1340 : optional assignment instructions to GSI. Use NAME for temporary values. */
1341 :
1342 : static inline void
1343 545 : gen_assign_counter_update (gimple_stmt_iterator *gsi, gcall *call, tree func,
1344 : tree result, const char *name)
1345 : {
1346 545 : if (result)
1347 : {
1348 15 : tree result_type = TREE_TYPE (TREE_TYPE (func));
1349 15 : tree tmp1 = make_temp_ssa_name (result_type, NULL, name);
1350 15 : gimple_set_lhs (call, tmp1);
1351 15 : gsi_insert_after (gsi, call, GSI_NEW_STMT);
1352 15 : tree tmp2 = make_temp_ssa_name (TREE_TYPE (result), NULL, name);
1353 15 : gassign *assign = gimple_build_assign (tmp2, NOP_EXPR, tmp1);
1354 15 : gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1355 15 : assign = gimple_build_assign (result, tmp2);
1356 15 : gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1357 : }
1358 : else
1359 530 : gsi_insert_after (gsi, call, GSI_NEW_STMT);
1360 545 : }
1361 :
1362 : /* Output instructions as GIMPLE trees to increment the COUNTER. If RESULT is
1363 : not null, then assign the updated counter value to RESULT. Insert the
1364 : instructions to GSI. Use NAME for temporary values. */
1365 :
1366 : static inline void
1367 6829 : gen_counter_update (gimple_stmt_iterator *gsi, tree counter, tree result,
1368 : const char *name)
1369 : {
1370 6829 : tree type = gcov_type_node;
1371 6829 : tree addr = build_fold_addr_expr (counter);
1372 6829 : tree one = build_int_cst (type, 1);
1373 6829 : tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1374 :
1375 6829 : if (counter_update == COUNTER_UPDATE_ATOMIC_BUILTIN
1376 6284 : || (result && counter_update == COUNTER_UPDATE_ATOMIC_SPLIT))
1377 : {
1378 : /* __atomic_fetch_add (&counter, 1, MEMMODEL_RELAXED); */
1379 545 : tree f = builtin_decl_explicit (TYPE_PRECISION (type) > 32
1380 : ? BUILT_IN_ATOMIC_ADD_FETCH_8
1381 : : BUILT_IN_ATOMIC_ADD_FETCH_4);
1382 545 : gcall *call = gimple_build_call (f, 3, addr, one, relaxed);
1383 545 : gen_assign_counter_update (gsi, call, f, result, name);
1384 545 : }
1385 5734 : else if (!result && (counter_update == COUNTER_UPDATE_ATOMIC_SPLIT
1386 5734 : || counter_update == COUNTER_UPDATE_ATOMIC_PARTIAL))
1387 : {
1388 : /* low = __atomic_add_fetch_4 (addr, 1, MEMMODEL_RELAXED);
1389 : high_inc = low == 0 ? 1 : 0;
1390 : __atomic_add_fetch_4 (addr_high, high_inc, MEMMODEL_RELAXED); */
1391 0 : tree zero32 = build_zero_cst (uint32_type_node);
1392 0 : tree one32 = build_one_cst (uint32_type_node);
1393 0 : tree addr_high = make_temp_ssa_name (TREE_TYPE (addr), NULL, name);
1394 0 : tree four = build_int_cst (size_type_node, 4);
1395 0 : gassign *assign1 = gimple_build_assign (addr_high, POINTER_PLUS_EXPR,
1396 : addr, four);
1397 0 : gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1398 0 : if (WORDS_BIG_ENDIAN)
1399 : std::swap (addr, addr_high);
1400 0 : tree f = builtin_decl_explicit (BUILT_IN_ATOMIC_ADD_FETCH_4);
1401 0 : gcall *call1 = gimple_build_call (f, 3, addr, one, relaxed);
1402 0 : tree low = make_temp_ssa_name (uint32_type_node, NULL, name);
1403 0 : gimple_call_set_lhs (call1, low);
1404 0 : gsi_insert_after (gsi, call1, GSI_NEW_STMT);
1405 0 : tree is_zero = make_temp_ssa_name (boolean_type_node, NULL, name);
1406 0 : gassign *assign2 = gimple_build_assign (is_zero, EQ_EXPR, low,
1407 : zero32);
1408 0 : gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1409 0 : tree high_inc = make_temp_ssa_name (uint32_type_node, NULL, name);
1410 0 : gassign *assign3 = gimple_build_assign (high_inc, COND_EXPR,
1411 : is_zero, one32, zero32);
1412 0 : gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1413 0 : gcall *call2 = gimple_build_call (f, 3, addr_high, high_inc,
1414 : relaxed);
1415 0 : gsi_insert_after (gsi, call2, GSI_NEW_STMT);
1416 0 : }
1417 : else
1418 : {
1419 6284 : tree tmp1 = make_temp_ssa_name (type, NULL, name);
1420 6284 : gassign *assign1 = gimple_build_assign (tmp1, counter);
1421 6284 : gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1422 6284 : tree tmp2 = make_temp_ssa_name (type, NULL, name);
1423 6284 : gassign *assign2 = gimple_build_assign (tmp2, PLUS_EXPR, tmp1, one);
1424 6284 : gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1425 6284 : gassign *assign3 = gimple_build_assign (unshare_expr (counter), tmp2);
1426 6284 : gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1427 6284 : if (result)
1428 : {
1429 550 : gassign *assign4 = gimple_build_assign (result, tmp2);
1430 550 : gsi_insert_after (gsi, assign4, GSI_NEW_STMT);
1431 : }
1432 : }
1433 6829 : }
1434 :
1435 : /* Output instructions as GIMPLE trees to increment the edge
1436 : execution count, and insert them on E. */
1437 :
1438 : void
1439 6264 : gimple_gen_edge_profiler (int edgeno, edge e)
1440 : {
1441 6264 : gimple_stmt_iterator gsi = gsi_last (PENDING_STMT (e));
1442 6264 : tree counter = tree_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
1443 6264 : gen_counter_update (&gsi, counter, NULL_TREE, "PROF_edge_counter");
1444 6264 : }
1445 :
1446 : /* Emits code to get VALUE to instrument at GSI, and returns the
1447 : variable containing the value. */
1448 :
1449 : static tree
1450 110 : prepare_instrumented_value (gimple_stmt_iterator *gsi, histogram_value value)
1451 : {
1452 110 : tree val = value->hvalue.value;
1453 110 : if (POINTER_TYPE_P (TREE_TYPE (val)))
1454 29 : val = fold_convert (build_nonstandard_integer_type
1455 : (TYPE_PRECISION (TREE_TYPE (val)), 1), val);
1456 110 : return force_gimple_operand_gsi (gsi, fold_convert (gcov_type_node, val),
1457 110 : true, NULL_TREE, true, GSI_SAME_STMT);
1458 : }
1459 :
1460 : /* Output instructions as GIMPLE trees to increment the interval histogram
1461 : counter. VALUE is the expression whose value is profiled. TAG is the
1462 : tag of the section for counters, BASE is offset of the counter position. */
1463 :
1464 : void
1465 5 : gimple_gen_interval_profiler (histogram_value value, unsigned tag)
1466 : {
1467 5 : gimple *stmt = value->hvalue.stmt;
1468 5 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1469 5 : tree ref = tree_coverage_counter_ref (tag, 0), ref_ptr;
1470 5 : gcall *call;
1471 5 : tree val;
1472 10 : tree start = build_int_cst_type (integer_type_node,
1473 5 : value->hdata.intvl.int_start);
1474 10 : tree steps = build_int_cst_type (unsigned_type_node,
1475 5 : value->hdata.intvl.steps);
1476 :
1477 5 : ref_ptr = force_gimple_operand_gsi (&gsi,
1478 : build_addr (ref),
1479 : true, NULL_TREE, true, GSI_SAME_STMT);
1480 5 : val = prepare_instrumented_value (&gsi, value);
1481 5 : call = gimple_build_call (tree_interval_profiler_fn, 4,
1482 : ref_ptr, val, start, steps);
1483 5 : gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1484 5 : }
1485 :
1486 : /* Output instructions as GIMPLE trees to increment the power of two histogram
1487 : counter. VALUE is the expression whose value is profiled. TAG is the tag
1488 : of the section for counters. */
1489 :
1490 : void
1491 5 : gimple_gen_pow2_profiler (histogram_value value, unsigned tag)
1492 : {
1493 5 : gimple *stmt = value->hvalue.stmt;
1494 5 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1495 5 : tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1496 5 : gcall *call;
1497 5 : tree val;
1498 :
1499 5 : ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1500 : true, NULL_TREE, true, GSI_SAME_STMT);
1501 5 : val = prepare_instrumented_value (&gsi, value);
1502 5 : call = gimple_build_call (tree_pow2_profiler_fn, 2, ref_ptr, val);
1503 5 : gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1504 5 : }
1505 :
1506 : /* Output instructions as GIMPLE trees for code to find the most N common
1507 : values. VALUE is the expression whose value is profiled. TAG is the tag
1508 : of the section for counters. */
1509 :
1510 : void
1511 42 : gimple_gen_topn_values_profiler (histogram_value value, unsigned tag)
1512 : {
1513 42 : gimple *stmt = value->hvalue.stmt;
1514 42 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1515 42 : tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1516 42 : gcall *call;
1517 42 : tree val;
1518 :
1519 42 : ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1520 : true, NULL_TREE, true, GSI_SAME_STMT);
1521 42 : val = prepare_instrumented_value (&gsi, value);
1522 42 : call = gimple_build_call (tree_topn_values_profiler_fn, 2, ref_ptr, val);
1523 42 : gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1524 42 : }
1525 :
1526 :
1527 : /* Output instructions as GIMPLE trees for code to find the most
1528 : common called function in indirect call.
1529 : VALUE is the call expression whose indirect callee is profiled.
1530 : TAG is the tag of the section for counters. */
1531 :
1532 : void
1533 49 : gimple_gen_ic_profiler (histogram_value value, unsigned tag)
1534 : {
1535 49 : tree tmp1;
1536 49 : gassign *stmt1, *stmt2, *stmt3;
1537 49 : gimple *stmt = value->hvalue.stmt;
1538 49 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1539 49 : tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1540 :
1541 49 : ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1542 : true, NULL_TREE, true, GSI_SAME_STMT);
1543 :
1544 : /* Insert code:
1545 :
1546 : stmt1: __gcov_indirect_call.counters = get_relevant_counter_ptr ();
1547 : stmt2: tmp1 = (void *) (indirect call argument value)
1548 : stmt3: __gcov_indirect_call.callee = tmp1;
1549 :
1550 : Example:
1551 : f_1 = foo;
1552 : __gcov_indirect_call.counters = &__gcov4.main[0];
1553 : PROF_fn_9 = f_1;
1554 : __gcov_indirect_call.callee = PROF_fn_9;
1555 : _4 = f_1 ();
1556 : */
1557 :
1558 49 : tree gcov_type_ptr = build_pointer_type (get_gcov_type ());
1559 :
1560 49 : tree counter_ref = build3 (COMPONENT_REF, gcov_type_ptr,
1561 : ic_tuple_var, ic_tuple_counters_field, NULL_TREE);
1562 :
1563 49 : stmt1 = gimple_build_assign (counter_ref, ref_ptr);
1564 49 : tmp1 = make_temp_ssa_name (ptr_type_node, NULL, "PROF_fn");
1565 49 : stmt2 = gimple_build_assign (tmp1, unshare_expr (value->hvalue.value));
1566 49 : tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1567 : ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1568 49 : stmt3 = gimple_build_assign (callee_ref, tmp1);
1569 :
1570 49 : gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1571 49 : gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
1572 49 : gsi_insert_before (&gsi, stmt3, GSI_SAME_STMT);
1573 49 : }
1574 :
1575 :
1576 : /* Output instructions as GIMPLE trees for code to find the most
1577 : common called function in indirect call. Insert instructions at the
1578 : beginning of every possible called function.
1579 : */
1580 :
1581 : void
1582 564 : gimple_gen_ic_func_profiler (void)
1583 : {
1584 564 : struct cgraph_node * c_node = cgraph_node::get (current_function_decl);
1585 564 : gcall *stmt1;
1586 564 : tree tree_uid, cur_func, void0;
1587 :
1588 : /* Disable indirect call profiling for an IFUNC resolver and its
1589 : callees since it requires TLS which hasn't been set up yet when
1590 : the dynamic linker is resolving IFUNC symbols. See
1591 : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114115
1592 : */
1593 564 : if (c_node->only_called_directly_p ()
1594 564 : || c_node->called_by_ifunc_resolver)
1595 36 : return;
1596 :
1597 528 : gimple_init_gcov_profiler ();
1598 :
1599 528 : basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1600 528 : basic_block cond_bb = split_edge (single_succ_edge (entry));
1601 528 : basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1602 :
1603 : /* We need to do an extra split in order to not create an input
1604 : for a possible PHI node. */
1605 528 : split_edge (single_succ_edge (update_bb));
1606 :
1607 528 : edge true_edge = single_succ_edge (cond_bb);
1608 528 : true_edge->flags = EDGE_TRUE_VALUE;
1609 :
1610 528 : profile_probability probability;
1611 528 : if (DECL_VIRTUAL_P (current_function_decl))
1612 11 : probability = profile_probability::very_likely ();
1613 : else
1614 517 : probability = profile_probability::unlikely ();
1615 :
1616 528 : true_edge->probability = probability;
1617 528 : edge e = make_edge (cond_bb, single_succ_edge (update_bb)->dest,
1618 : EDGE_FALSE_VALUE);
1619 528 : e->probability = true_edge->probability.invert ();
1620 :
1621 : /* Insert code:
1622 :
1623 : if (__gcov_indirect_call.callee != NULL)
1624 : __gcov_indirect_call_profiler_v3 (profile_id, ¤t_function_decl);
1625 :
1626 : The function __gcov_indirect_call_profiler_v3 is responsible for
1627 : resetting __gcov_indirect_call.callee to NULL. */
1628 :
1629 528 : gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1630 528 : void0 = build_int_cst (ptr_type_node, 0);
1631 :
1632 528 : tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1633 : ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1634 :
1635 528 : tree ref = force_gimple_operand_gsi (&gsi, callee_ref, true, NULL_TREE,
1636 : true, GSI_SAME_STMT);
1637 :
1638 528 : gcond *cond = gimple_build_cond (NE_EXPR, ref,
1639 : void0, NULL, NULL);
1640 528 : gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1641 :
1642 528 : gsi = gsi_after_labels (update_bb);
1643 :
1644 528 : cur_func = force_gimple_operand_gsi (&gsi,
1645 : build_addr (current_function_decl),
1646 : true, NULL_TREE,
1647 : true, GSI_SAME_STMT);
1648 528 : tree_uid = build_int_cst
1649 528 : (gcov_type_node,
1650 528 : cgraph_node::get (current_function_decl)->profile_id);
1651 528 : stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 2,
1652 : tree_uid, cur_func);
1653 528 : gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1654 : }
1655 :
1656 : /* Output instructions as GIMPLE tree at the beginning for each function.
1657 : TAG is the tag of the section for counters, BASE is offset of the
1658 : counter position and GSI is the iterator we place the counter. */
1659 :
1660 : void
1661 565 : gimple_gen_time_profiler (unsigned tag)
1662 : {
1663 565 : tree type = get_gcov_type ();
1664 565 : basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1665 565 : basic_block cond_bb = split_edge (single_succ_edge (entry));
1666 565 : basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1667 :
1668 : /* We need to do an extra split in order to not create an input
1669 : for a possible PHI node. */
1670 565 : split_edge (single_succ_edge (update_bb));
1671 :
1672 565 : edge true_edge = single_succ_edge (cond_bb);
1673 565 : true_edge->flags = EDGE_TRUE_VALUE;
1674 565 : true_edge->probability = profile_probability::unlikely ();
1675 565 : edge e
1676 565 : = make_edge (cond_bb, single_succ_edge (update_bb)->dest, EDGE_FALSE_VALUE);
1677 565 : e->probability = true_edge->probability.invert ();
1678 :
1679 565 : gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1680 565 : tree original_ref = tree_coverage_counter_ref (tag, 0);
1681 565 : tree ref = force_gimple_operand_gsi (&gsi, original_ref, true, NULL_TREE,
1682 : true, GSI_SAME_STMT);
1683 :
1684 : /* Emit: if (counters[0] != 0). */
1685 565 : gcond *cond = gimple_build_cond (EQ_EXPR, ref, build_int_cst (type, 0),
1686 : NULL, NULL);
1687 565 : gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1688 :
1689 : /* Emit: counters[0] = ++__gcov_time_profiler_counter. */
1690 565 : gsi = gsi_start_bb (update_bb);
1691 565 : gen_counter_update (&gsi, tree_time_profiler_counter, original_ref,
1692 : "PROF_time_profile");
1693 565 : }
1694 :
1695 : /* Output instructions as GIMPLE trees to increment the average histogram
1696 : counter. VALUE is the expression whose value is profiled. TAG is the
1697 : tag of the section for counters, BASE is offset of the counter position. */
1698 :
1699 : void
1700 29 : gimple_gen_average_profiler (histogram_value value, unsigned tag)
1701 : {
1702 29 : gimple *stmt = value->hvalue.stmt;
1703 29 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1704 29 : tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1705 29 : gcall *call;
1706 29 : tree val;
1707 :
1708 29 : ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1709 : true, NULL_TREE,
1710 : true, GSI_SAME_STMT);
1711 29 : val = prepare_instrumented_value (&gsi, value);
1712 29 : call = gimple_build_call (tree_average_profiler_fn, 2, ref_ptr, val);
1713 29 : gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1714 29 : }
1715 :
1716 : /* Output instructions as GIMPLE trees to increment the ior histogram
1717 : counter. VALUE is the expression whose value is profiled. TAG is the
1718 : tag of the section for counters, BASE is offset of the counter position. */
1719 :
1720 : void
1721 29 : gimple_gen_ior_profiler (histogram_value value, unsigned tag)
1722 : {
1723 29 : gimple *stmt = value->hvalue.stmt;
1724 29 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1725 29 : tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1726 29 : gcall *call;
1727 29 : tree val;
1728 :
1729 29 : ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1730 : true, NULL_TREE, true, GSI_SAME_STMT);
1731 29 : val = prepare_instrumented_value (&gsi, value);
1732 29 : call = gimple_build_call (tree_ior_profiler_fn, 2, ref_ptr, val);
1733 29 : gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1734 29 : }
1735 :
1736 : static vec<regex_t> profile_filter_files;
1737 : static vec<regex_t> profile_exclude_files;
1738 :
1739 : /* Parse list of provided REGEX (separated with semi-collon) and
1740 : create expressions (of type regex_t) and save them into V vector.
1741 : If there is a regular expression parsing error, error message is
1742 : printed for FLAG_NAME. */
1743 :
1744 : static void
1745 1202 : parse_profile_filter (const char *regex, vec<regex_t> *v,
1746 : const char *flag_name)
1747 : {
1748 1202 : v->create (4);
1749 1202 : if (regex != NULL)
1750 : {
1751 3 : char *str = xstrdup (regex);
1752 6 : for (char *p = strtok (str, ";"); p != NULL; p = strtok (NULL, ";"))
1753 : {
1754 3 : regex_t r;
1755 3 : if (regcomp (&r, p, REG_EXTENDED | REG_NOSUB) != 0)
1756 : {
1757 0 : error ("invalid regular expression %qs in %qs",
1758 : p, flag_name);
1759 0 : return;
1760 : }
1761 :
1762 3 : v->safe_push (r);
1763 : }
1764 : }
1765 : }
1766 :
1767 : /* Parse values of -fprofile-filter-files and -fprofile-exclude-files
1768 : options. */
1769 :
1770 : static void
1771 601 : parse_profile_file_filtering ()
1772 : {
1773 601 : parse_profile_filter (flag_profile_filter_files, &profile_filter_files,
1774 : "-fprofile-filter-files");
1775 601 : parse_profile_filter (flag_profile_exclude_files, &profile_exclude_files,
1776 : "-fprofile-exclude-files");
1777 601 : }
1778 :
1779 : /* Parse vectors of regular expressions. */
1780 :
1781 : static void
1782 601 : release_profile_file_filtering ()
1783 : {
1784 601 : profile_filter_files.release ();
1785 601 : profile_exclude_files.release ();
1786 601 : }
1787 :
1788 : /* Return true when FILENAME should be instrumented based on
1789 : -fprofile-filter-files and -fprofile-exclude-files options. */
1790 :
1791 : static bool
1792 2555 : include_source_file_for_profile (const char *filename)
1793 : {
1794 : /* First check whether file is included in flag_profile_exclude_files. */
1795 2555 : for (unsigned i = 0; i < profile_exclude_files.length (); i++)
1796 2 : if (regexec (&profile_exclude_files[i],
1797 : filename, 0, NULL, 0) == REG_NOERROR)
1798 : return false;
1799 :
1800 : /* For non-empty flag_profile_filter_files include only files matching a
1801 : regex in the flag. */
1802 5106 : if (profile_filter_files.is_empty ())
1803 : return true;
1804 :
1805 2 : for (unsigned i = 0; i < profile_filter_files.length (); i++)
1806 2 : if (regexec (&profile_filter_files[i], filename, 0, NULL, 0) == REG_NOERROR)
1807 : return true;
1808 :
1809 : return false;
1810 : }
1811 :
1812 : #ifndef HAVE_sync_compare_and_swapsi
1813 : #define HAVE_sync_compare_and_swapsi 0
1814 : #endif
1815 : #ifndef HAVE_atomic_compare_and_swapsi
1816 : #define HAVE_atomic_compare_and_swapsi 0
1817 : #endif
1818 :
1819 : #ifndef HAVE_sync_compare_and_swapdi
1820 : #define HAVE_sync_compare_and_swapdi 0
1821 : #endif
1822 : #ifndef HAVE_atomic_compare_and_swapdi
1823 : #define HAVE_atomic_compare_and_swapdi 0
1824 : #endif
1825 :
1826 : /* Profile all functions in the callgraph. */
1827 :
1828 : static unsigned int
1829 601 : tree_profiling (void)
1830 : {
1831 601 : struct cgraph_node *node;
1832 :
1833 : /* Verify whether we can utilize atomic update operations. */
1834 601 : bool can_support_atomic = targetm.have_libatomic;
1835 601 : unsigned HOST_WIDE_INT gcov_type_size
1836 601 : = tree_to_uhwi (TYPE_SIZE_UNIT (get_gcov_type ()));
1837 601 : bool have_atomic_4
1838 601 : = HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi;
1839 1202 : bool have_atomic_8
1840 601 : = HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi;
1841 601 : bool needs_split = gcov_type_size == 8 && !have_atomic_8 && have_atomic_4;
1842 601 : if (!can_support_atomic)
1843 : {
1844 601 : if (gcov_type_size == 4)
1845 : can_support_atomic = have_atomic_4;
1846 601 : else if (gcov_type_size == 8)
1847 601 : can_support_atomic = have_atomic_8;
1848 : }
1849 :
1850 601 : if (flag_profile_update == PROFILE_UPDATE_ATOMIC
1851 9 : && !can_support_atomic)
1852 : {
1853 0 : if (needs_split)
1854 : {
1855 0 : warning (0, "target does not fully support atomic profile "
1856 : "update, single mode is selected with partial "
1857 : "atomic updates");
1858 0 : counter_update = COUNTER_UPDATE_ATOMIC_PARTIAL;
1859 : }
1860 : else
1861 0 : warning (0, "target does not support atomic profile update, "
1862 : "single mode is selected");
1863 0 : flag_profile_update = PROFILE_UPDATE_SINGLE;
1864 : }
1865 601 : else if (flag_profile_update == PROFILE_UPDATE_PREFER_ATOMIC)
1866 : {
1867 8 : if (can_support_atomic)
1868 8 : flag_profile_update = PROFILE_UPDATE_ATOMIC;
1869 : else
1870 : {
1871 0 : if (needs_split)
1872 0 : counter_update = COUNTER_UPDATE_ATOMIC_PARTIAL;
1873 0 : flag_profile_update = PROFILE_UPDATE_SINGLE;
1874 : }
1875 : }
1876 :
1877 601 : if (flag_profile_update == PROFILE_UPDATE_ATOMIC)
1878 : {
1879 17 : if (needs_split)
1880 0 : counter_update = COUNTER_UPDATE_ATOMIC_SPLIT;
1881 : else
1882 17 : counter_update = COUNTER_UPDATE_ATOMIC_BUILTIN;
1883 : }
1884 :
1885 : /* This is a small-ipa pass that gets called only once, from
1886 : cgraphunit.cc:ipa_passes(). */
1887 601 : gcc_assert (symtab->state == IPA_SSA);
1888 :
1889 601 : init_node_map (true);
1890 601 : parse_profile_file_filtering ();
1891 :
1892 3386 : FOR_EACH_DEFINED_FUNCTION (node)
1893 : {
1894 2785 : bool thunk = false;
1895 2785 : if (!gimple_has_body_p (node->decl) && !node->thunk)
1896 220 : continue;
1897 :
1898 : /* Don't profile functions produced for builtin stuff. */
1899 2565 : if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1900 0 : continue;
1901 :
1902 2565 : if (lookup_attribute ("no_profile_instrument_function",
1903 2565 : DECL_ATTRIBUTES (node->decl)))
1904 3 : continue;
1905 : /* Do not instrument extern inline functions when testing coverage.
1906 : While this is not perfectly consistent (early inlined extern inlines
1907 : will get acocunted), testsuite expects that. */
1908 2562 : if (DECL_EXTERNAL (node->decl)
1909 2562 : && flag_test_coverage)
1910 7 : continue;
1911 :
1912 2555 : const char *file = LOCATION_FILE (DECL_SOURCE_LOCATION (node->decl));
1913 2555 : if (!include_source_file_for_profile (file))
1914 2 : continue;
1915 :
1916 2553 : if (node->thunk)
1917 : {
1918 : /* We cannot expand variadic thunks to Gimple. */
1919 14 : if (stdarg_p (TREE_TYPE (node->decl)))
1920 0 : continue;
1921 14 : thunk = true;
1922 : /* When generate profile, expand thunk to gimple so it can be
1923 : instrumented same way as other functions. */
1924 14 : if (coverage_instrumentation_p ())
1925 7 : expand_thunk (node, false, true);
1926 : /* Read cgraph profile but keep function as thunk at profile-use
1927 : time. */
1928 : else
1929 : {
1930 7 : read_thunk_profile (node);
1931 7 : continue;
1932 : }
1933 : }
1934 :
1935 2546 : push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1936 :
1937 2546 : if (dump_file)
1938 143 : dump_function_header (dump_file, cfun->decl, dump_flags);
1939 :
1940 : /* Local pure-const may imply need to fixup the cfg. */
1941 2546 : if (gimple_has_body_p (node->decl)
1942 2546 : && (execute_fixup_cfg () & TODO_cleanup_cfg))
1943 225 : cleanup_tree_cfg ();
1944 :
1945 2546 : branch_prob (thunk);
1946 :
1947 2546 : if (! flag_branch_probabilities
1948 1986 : && flag_profile_values)
1949 564 : gimple_gen_ic_func_profiler ();
1950 :
1951 2546 : if (flag_branch_probabilities
1952 560 : && !thunk
1953 560 : && flag_profile_values
1954 414 : && flag_value_profile_transformations
1955 414 : && profile_status_for_fn (cfun) == PROFILE_READ)
1956 333 : gimple_value_profile_transformations ();
1957 :
1958 : /* The above could hose dominator info. Currently there is
1959 : none coming in, this is a safety valve. It should be
1960 : easy to adjust it, if and when there is some. */
1961 2546 : free_dominance_info (CDI_DOMINATORS);
1962 2546 : free_dominance_info (CDI_POST_DOMINATORS);
1963 2546 : pop_cfun ();
1964 : }
1965 :
1966 601 : release_profile_file_filtering ();
1967 :
1968 : /* Drop pure/const flags from instrumented functions. */
1969 601 : if (coverage_instrumentation_p () || flag_test_coverage)
1970 2639 : FOR_EACH_DEFINED_FUNCTION (node)
1971 : {
1972 2199 : if (!gimple_has_body_p (node->decl)
1973 2199 : || !(!node->clone_of
1974 0 : || node->decl != node->clone_of->decl))
1975 200 : continue;
1976 :
1977 : /* Don't profile functions produced for builtin stuff. */
1978 1999 : if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1979 0 : continue;
1980 :
1981 1999 : node->set_const_flag (false, false);
1982 1999 : node->set_pure_flag (false, false);
1983 : }
1984 :
1985 : /* Update call statements and rebuild the cgraph. */
1986 3386 : FOR_EACH_DEFINED_FUNCTION (node)
1987 : {
1988 2785 : basic_block bb;
1989 :
1990 2785 : if (!gimple_has_body_p (node->decl)
1991 2785 : || !(!node->clone_of
1992 0 : || node->decl != node->clone_of->decl))
1993 227 : continue;
1994 :
1995 : /* Don't profile functions produced for builtin stuff. */
1996 2558 : if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1997 0 : continue;
1998 :
1999 2558 : push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2000 :
2001 2558 : if (coverage_instrumentation_p () || flag_test_coverage)
2002 17527 : FOR_EACH_BB_FN (bb, cfun)
2003 : {
2004 15528 : gimple_stmt_iterator gsi;
2005 86055 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2006 : {
2007 54999 : gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
2008 4677 : if (!call || gimple_call_internal_p (call))
2009 50403 : continue;
2010 :
2011 : /* We do not clear pure/const on decls without body. */
2012 4596 : tree fndecl = gimple_call_fndecl (call);
2013 4596 : cgraph_node *callee;
2014 6574 : if (fndecl
2015 4538 : && (callee = cgraph_node::get (fndecl))
2016 8747 : && callee->get_availability (node) == AVAIL_NOT_AVAILABLE)
2017 1978 : continue;
2018 :
2019 : /* Drop the const attribute from the call type (the pure
2020 : attribute is not available on types). */
2021 2618 : tree fntype = gimple_call_fntype (call);
2022 2618 : if (fntype && TYPE_READONLY (fntype))
2023 : {
2024 1 : int quals = TYPE_QUALS (fntype) & ~TYPE_QUAL_CONST;
2025 1 : fntype = build_qualified_type (fntype, quals);
2026 1 : gimple_call_set_fntype (call, fntype);
2027 : }
2028 :
2029 : /* Update virtual operands of calls to no longer const/pure
2030 : functions. */
2031 2618 : update_stmt (call);
2032 : }
2033 : }
2034 :
2035 : /* re-merge split blocks. */
2036 2558 : cleanup_tree_cfg ();
2037 2558 : update_ssa (TODO_update_ssa);
2038 :
2039 2558 : cgraph_edge::rebuild_edges ();
2040 :
2041 2558 : pop_cfun ();
2042 : }
2043 :
2044 601 : handle_missing_profiles ();
2045 :
2046 601 : del_node_map ();
2047 601 : end_branch_prob ();
2048 601 : return 0;
2049 : }
2050 :
2051 : namespace {
2052 :
2053 : const pass_data pass_data_ipa_tree_profile =
2054 : {
2055 : SIMPLE_IPA_PASS, /* type */
2056 : "profile", /* name */
2057 : OPTGROUP_NONE, /* optinfo_flags */
2058 : TV_IPA_PROFILE, /* tv_id */
2059 : 0, /* properties_required */
2060 : 0, /* properties_provided */
2061 : 0, /* properties_destroyed */
2062 : 0, /* todo_flags_start */
2063 : TODO_dump_symtab, /* todo_flags_finish */
2064 : };
2065 :
2066 : class pass_ipa_tree_profile : public simple_ipa_opt_pass
2067 : {
2068 : public:
2069 285722 : pass_ipa_tree_profile (gcc::context *ctxt)
2070 571444 : : simple_ipa_opt_pass (pass_data_ipa_tree_profile, ctxt)
2071 : {}
2072 :
2073 : /* opt_pass methods: */
2074 : bool gate (function *) final override;
2075 601 : unsigned int execute (function *) final override { return tree_profiling (); }
2076 :
2077 : }; // class pass_ipa_tree_profile
2078 :
2079 : bool
2080 229960 : pass_ipa_tree_profile::gate (function *)
2081 : {
2082 : /* When profile instrumentation, use or test coverage shall be performed. */
2083 229960 : return (!in_lto_p
2084 229960 : && (flag_branch_probabilities || flag_test_coverage
2085 229634 : || coverage_instrumentation_p ())
2086 230565 : && !seen_error ());
2087 : }
2088 :
2089 : } // anon namespace
2090 :
2091 : simple_ipa_opt_pass *
2092 285722 : make_pass_ipa_tree_profile (gcc::context *ctxt)
2093 : {
2094 285722 : return new pass_ipa_tree_profile (ctxt);
2095 : }
2096 :
2097 : #include "gt-tree-profile.h"
|