Line data Source code
1 : /* Calculate branch probabilities, and basic block execution counts.
2 : Copyright (C) 1990-2026 Free Software Foundation, Inc.
3 : Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
4 : based on some ideas from Dain Samples of UC Berkeley.
5 : Further mangling by Bob Manson, Cygnus Support.
6 : Converted to use trees by Dale Johannesen, Apple Computer.
7 :
8 : This file is part of GCC.
9 :
10 : GCC is free software; you can redistribute it and/or modify it under
11 : the terms of the GNU General Public License as published by the Free
12 : Software Foundation; either version 3, or (at your option) any later
13 : version.
14 :
15 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 : for more details.
19 :
20 : You should have received a copy of the GNU General Public License
21 : along with GCC; see the file COPYING3. If not see
22 : <http://www.gnu.org/licenses/>. */
23 :
24 : /* Generate basic block profile instrumentation and auxiliary files.
25 : Tree-based version. See profile.cc for overview. */
26 :
27 : #include "config.h"
28 : #include "system.h"
29 : #include "coretypes.h"
30 : #include "memmodel.h"
31 : #include "backend.h"
32 : #include "target.h"
33 : #include "tree.h"
34 : #include "gimple.h"
35 : #include "cfghooks.h"
36 : #include "tree-pass.h"
37 : #include "ssa.h"
38 : #include "cgraph.h"
39 : #include "coverage.h"
40 : #include "diagnostic-core.h"
41 : #include "fold-const.h"
42 : #include "varasm.h"
43 : #include "tree-nested.h"
44 : #include "gimplify.h"
45 : #include "gimple-iterator.h"
46 : #include "gimplify-me.h"
47 : #include "tree-cfg.h"
48 : #include "tree-into-ssa.h"
49 : #include "value-prof.h"
50 : #include "profile.h"
51 : #include "tree-cfgcleanup.h"
52 : #include "stringpool.h"
53 : #include "attribs.h"
54 : #include "tree-pretty-print.h"
55 : #include "langhooks.h"
56 : #include "stor-layout.h"
57 : #include "xregex.h"
58 : #include "alloc-pool.h"
59 : #include "symbol-summary.h"
60 : #include "symtab-thunks.h"
61 : #include "cfganal.h"
62 :
63 : static GTY(()) tree gcov_type_node;
64 : static GTY(()) tree tree_interval_profiler_fn;
65 : static GTY(()) tree tree_pow2_profiler_fn;
66 : static GTY(()) tree tree_topn_values_profiler_fn;
67 : static GTY(()) tree tree_indirect_call_profiler_fn;
68 : static GTY(()) tree tree_average_profiler_fn;
69 : static GTY(()) tree tree_ior_profiler_fn;
70 : static GTY(()) tree tree_time_profiler_counter;
71 :
72 :
73 : static GTY(()) tree ic_tuple_var;
74 : static GTY(()) tree ic_tuple_counters_field;
75 : static GTY(()) tree ic_tuple_callee_field;
76 :
77 : /* Types of counter update methods.
78 :
79 : By default, the counter updates are done for a single threaded system
80 : (COUNTER_UPDATE_SINGLE_THREAD).
81 :
82 : If the user selected atomic profile counter updates
83 : (-fprofile-update=atomic), then the counter updates will be done atomically
84 : on a best-effort basis. One of three methods to do the counter updates is
85 : selected according to the target capabilities.
86 :
87 : Ideally, the counter updates are done through atomic operations in hardware
88 : (COUNTER_UPDATE_ATOMIC_BUILTIN).
89 :
90 : If the target supports only 32-bit atomic increments and gcov_type_node is a
91 : 64-bit integer type, then for the profile edge counters the increment is
92 : performed through two separate 32-bit atomic increments
93 : (COUNTER_UPDATE_ATOMIC_SPLIT or COUNTER_UPDATE_ATOMIC_PARTIAL). If the
94 : target supports libatomic (targetm.have_libatomic), then other counter
95 : updates are carried out by libatomic calls (COUNTER_UPDATE_ATOMIC_SPLIT).
96 : If the target does not support libatomic, then the other counter updates are
97 : not done atomically (COUNTER_UPDATE_ATOMIC_PARTIAL) and a warning is
98 : issued.
99 :
100 : If the target does not support atomic operations in hardware, however, it
101 : supports libatomic, then all updates are carried out by libatomic calls
102 : (COUNTER_UPDATE_ATOMIC_BUILTIN). */
103 : enum counter_update_method {
104 : COUNTER_UPDATE_SINGLE_THREAD,
105 : COUNTER_UPDATE_ATOMIC_BUILTIN,
106 : COUNTER_UPDATE_ATOMIC_SPLIT,
107 : COUNTER_UPDATE_ATOMIC_PARTIAL
108 : };
109 :
110 : static counter_update_method counter_update = COUNTER_UPDATE_SINGLE_THREAD;
111 :
112 : /* These functions support measuring modified conditition/decision coverage
113 : (MC/DC). MC/DC requires all of the below during testing:
114 :
115 : - Each entry and exit point is invoked
116 : - Each decision takes every possible outcome
117 : - Each condition in a decision takes every possible outcome
118 : - Each condition in a decision is shown to independently affect the outcome
119 : of the decision
120 :
121 : Independence of a condition is shown by recording it being evaluated to a
122 : value (true/false) and not being made irrelevant ("masked") by a later term.
123 : This feature adds some instrumentation code, a few bitwise operators, that
124 : records the branches taken in conditions and applies a filter for the
125 : masking effect. Masking is essentially short-circuiting in reverse: a
126 : condition does not contribute to the outcome if it would short circuit the
127 : (sub) expression if it was evaluated right-to-left, (_ && false) and (_ ||
128 : true).
129 :
130 : The program is essentially rewritten this way:
131 :
132 : - if (a || b) { fn () }
133 : + if (a) { _t |= 0x1; goto _then; }
134 : + else { _f |= 0x1;
135 : + if (b) { _t |= 0x2; _mask |= 0x1; goto _then; }
136 : + else { _f |= 0x2; goto _else; }
137 : + _then:
138 : + _gcov_t |= (_t & _mask);
139 : + _gcov_f |= (_f & _mask);
140 : + fn (); goto _end;
141 : + _else:
142 : + _gcov_t |= (_t & _mask);
143 : + _gcov_f |= (_f & _mask);
144 : + fn ();
145 : + _end:
146 :
147 : It is assumed the front end will provide discrimnators so that conditional
148 : basic blocks (basic block with a conditional jump and outgoing true/false
149 : edges) that belong to the same Boolean expression have the same
150 : discriminator. Masking is determined by analyzing these expressions as a
151 : reduced order binary decision diagram. */
152 : namespace
153 : {
154 : /* Some context and reused instances between function calls. Large embedded
155 : buffers are used to up-front request enough memory for most programs and
156 : merge them into a single allocation at the cost of using more memory in the
157 : average case. Some numbers from linux v5.13 which is assumed to be a
158 : reasonably diverse code base: 75% of the functions in linux have less than
159 : 16 nodes in the CFG and approx 2.5% have more than 64 nodes. The functions
160 : that go beyond a few dozen nodes tend to be very large (>100) and so 64
161 : seems like a good balance.
162 :
163 : This is really just a performance balance of the cost of allocation and
164 : wasted memory. */
165 : struct conds_ctx
166 : {
167 : /* This is both a reusable shared allocation which is also used to return
168 : single expressions, which means it for most code should only hold a
169 : couple of elements. */
170 : auto_vec<basic_block, 64> blocks;
171 :
172 : /* Index for the topological order indexed by basic_block->index to an
173 : ordering so that expression (a || b && c) => top_index[a] < top_index[b]
174 : < top_index[c]. */
175 : auto_vec<int, 256> top_index;
176 :
177 : /* Pre-allocate bitmaps and vectors for per-function book keeping. This is
178 : pure instance reuse and the bitmaps carry no data between function
179 : calls. */
180 : auto_vec<basic_block, 64> b1;
181 : auto_vec<basic_block, 64> b2;
182 : auto_sbitmap g1;
183 : auto_sbitmap g2;
184 : auto_sbitmap g3;
185 : auto_vec<edge, 64> edges;
186 :
187 159 : explicit conds_ctx (unsigned size) noexcept (true) : g1 (size), g2 (size),
188 318 : g3 (size)
189 : {
190 159 : }
191 : };
192 :
193 : /* Only instrument terms with fewer than number of bits in a (wide) gcov
194 : integer, which is probably 64. The algorithm itself does not impose this
195 : limitation, but it makes for a simpler implementation.
196 :
197 : * Allocating the output data structure (coverage_counter_alloc ()) can
198 : assume pairs of gcov_type_unsigned and not use a separate length field.
199 : * A pair gcov_type_unsigned can be used as accumulators.
200 : * Updating accumulators is can use the bitwise operations |=, &= and not
201 : custom operators that work for arbitrary-sized bit-sets.
202 :
203 : Most real-world code should be unaffected by this, but it is possible
204 : (especially for generated code) to exceed this limit. */
205 : #define CONDITIONS_MAX_TERMS (TYPE_PRECISION (gcov_type_node))
206 : #define EDGE_CONDITION (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)
207 :
208 : /* Compare two basic blocks by their order in the expression i.e. for (a || b)
209 : then topological_cmp (a, b, ...) < 0. The result is undefined if LHS, RHS
210 : belong to different expressions. The TOP_INDEX argument should be the
211 : top_index vector from ctx. */
212 : int
213 141691 : topological_cmp (const void *lhs, const void *rhs, void *top_index)
214 : {
215 141691 : const_basic_block l = *(const basic_block *) lhs;
216 141691 : const_basic_block r = *(const basic_block *) rhs;
217 141691 : const vec<int> *im = (const vec<int> *) top_index;
218 141691 : return (*im)[l->index] - (*im)[r->index];
219 : }
220 :
221 : /* topological_cmp of the src block of LHS and RHS. The TOP_INDEX argument
222 : should be the top_index vector from ctx. */
223 : int
224 133504 : topological_src_cmp (const void *lhs, const void *rhs, void *top_index)
225 : {
226 133504 : const_edge l = *(const edge *) lhs;
227 133504 : const_edge r = *(const edge *) rhs;
228 133504 : return topological_cmp (&l->src, &r->src, top_index);
229 : }
230 :
231 : /* Find the index of NEEDLE in BLOCKS; return -1 if not found. This has two
232 : uses, sometimes for the index and sometimes for set member checks. Sets are
233 : typically very small (number of conditions, >8 is uncommon) so linear search
234 : should be very fast. */
235 : int
236 134839 : index_of (const basic_block needle, array_slice<basic_block> blocks)
237 : {
238 2936654 : for (size_t i = 0; i < blocks.size (); i++)
239 2936654 : if (blocks[i] == needle)
240 134839 : return int (i);
241 : return -1;
242 : }
243 :
244 : /* Special cases of the single_*_p and single_*_edge functions in basic-block.h
245 : that don't consider exception handling or other complex edges. This helps
246 : create a view of the CFG with only normal edges - if a basic block has both
247 : an outgoing fallthrough and exceptional edge, it should be considered a
248 : single-successor. */
249 : bool
250 271042 : single_p (const vec<edge, va_gc> *edges)
251 : {
252 271042 : int n = EDGE_COUNT (edges);
253 266523 : if (n == 0)
254 : return false;
255 :
256 669938 : for (edge e : edges)
257 403421 : if (e->flags & EDGE_COMPLEX)
258 55 : n -= 1;
259 :
260 266517 : return n == 1;
261 : }
262 :
263 : /* Get the single, non-complex edge. Behavior is undefined edges have more
264 : than 1 non-complex edges. */
265 : edge
266 1250 : single_edge (const vec<edge, va_gc> *edges)
267 : {
268 1250 : gcc_checking_assert (single_p (edges));
269 1250 : for (edge e : edges)
270 : {
271 1250 : if (e->flags & EDGE_COMPLEX)
272 0 : continue;
273 : return e;
274 : }
275 : return NULL;
276 : }
277 :
278 : /* Sometimes, for example with function calls, goto labels, and C++
279 : destructors, the CFG gets extra nodes that are essentially single-entry
280 : single-exit in the middle of boolean expressions. For example:
281 :
282 : x || can_throw (y)
283 :
284 : A
285 : /|
286 : / |
287 : B |
288 : | |
289 : C |
290 : / \ |
291 : / \|
292 : F T
293 :
294 : Without the extra node inserted by the function + exception it becomes a
295 : proper 2-term graph, not 2 single-term graphs.
296 :
297 : A
298 : /|
299 : C |
300 : / \|
301 : F T
302 :
303 : This function finds the source edge of these paths. This is often the
304 : identity function. */
305 : edge
306 136488 : contract_edge_up (edge e)
307 : {
308 137488 : while (true)
309 : {
310 136988 : basic_block src = e->src;
311 136988 : if (!single_p (src->preds))
312 : return e;
313 131627 : if (!single_p (src->succs))
314 : return e;
315 500 : e = single_edge (src->preds);
316 500 : }
317 : }
318 :
319 : /* A simple struct for storing/returning outcome block pairs. Either both
320 : blocks are set or both are NULL. */
321 : struct outcomes
322 : {
323 : basic_block t = NULL;
324 : basic_block f = NULL;
325 :
326 4597 : operator bool () const noexcept (true)
327 : {
328 4597 : return t && f;
329 : }
330 : };
331 :
332 : /* Get the true/false successors of a basic block. If b is not a conditional
333 : block both edges are NULL. */
334 : outcomes
335 135169 : conditional_succs (const basic_block b)
336 : {
337 135169 : outcomes c;
338 675845 : for (edge e : b->succs)
339 : {
340 270338 : if (e->flags & EDGE_TRUE_VALUE)
341 135169 : c.t = e->dest;
342 270338 : if (e->flags & EDGE_FALSE_VALUE)
343 135169 : c.f = e->dest;
344 : }
345 :
346 135169 : gcc_assert ((c.t && c.f) || (!c.t && !c.f));
347 135169 : return c;
348 : }
349 :
350 : /* Get the index or offset of a conditional flag, 0 for true and 1 for false.
351 : These indices carry no semantics but must be consistent as they are used to
352 : index into data structures in code generation and gcov. */
353 : unsigned
354 5733 : condition_index (unsigned flag)
355 : {
356 5733 : return (flag & EDGE_CONDITION) == EDGE_TRUE_VALUE ? 0 : 1;
357 : }
358 :
359 : /* Returns the condition identifier for the basic block if set, otherwise 0.
360 : This is only meaningful in GIMPLE and is used for condition coverage.
361 :
362 : There may be conditions created that did not get an uid, such as those
363 : implicitly created by destructors. We could include them in the condition
364 : coverage for completeness (i.e. condition coverage implies (implicit) branch
365 : coverage), but they have no natural buckets and should all be single-term.
366 : For now these are ignored and given uid = 0, and branch coverage is left to
367 : -fprofile-arcs.
368 :
369 : Under optimization, COND_EXPRs may be folded, replaced with switches,
370 : min-max, etc., which leaves ghost identifiers in basic blocks that do not
371 : end with a conditional jump. They are not really meaningful for condition
372 : coverage anymore, but since coverage is unreliable under optimization anyway
373 : this is not a big problem.
374 :
375 : The cond_uids map in FN cannot be expected to exist. It will only be
376 : created if it is needed, and a function may have gconds even though there
377 : are none in source. This can be seen in PR gcov-profile/114601, when
378 : -finstrument-functions-once is used and the function has no conditions. */
379 : unsigned
380 1897 : condition_uid (struct function *fn, basic_block b)
381 : {
382 1897 : gimple *stmt = gsi_stmt (gsi_last_bb (b));
383 1899 : if (!safe_is_a <gcond *> (stmt) || !fn->cond_uids)
384 : return 0;
385 :
386 635 : unsigned *v = fn->cond_uids->get (as_a <gcond *> (stmt));
387 635 : return v ? *v : 0;
388 : }
389 :
390 : /* Compute the masking table.
391 :
392 : Masking and short circuiting are deeply connected - masking occurs when
393 : control flow reaches a state that is also reachable with short circuiting.
394 : In fact, masking corresponds to short circuiting for the reversed
395 : expression. This means we can find the limits, the last term in preceeding
396 : subexpressions, by following the edges that short circuit to the same
397 : outcome. The algorithm treats the CFG as a reduced order binary decision
398 : diagram (see Randall E. Bryant's Graph Based Algorithms for Boolean
399 : Function Manipulation (1987)).
400 :
401 : In the simplest case a || b:
402 :
403 : a
404 : |\
405 : | b
406 : |/ \
407 : T F
408 :
409 : T has multiple incoming edges and is the outcome of a short circuit,
410 : with top = a, bot = b. The top node (a) is masked when the edge (b, T) is
411 : taken.
412 :
413 : The names "top" and "bot" refer to a pair of nodes with a shared
414 : successor. The top is always the node corresponding to the left-most
415 : operand of the two, and it holds that top < bot in a topological ordering.
416 :
417 : Now consider (a && b) || (c && d) and its masking table:
418 :
419 : a
420 : |\
421 : b \
422 : |\|
423 : | c
424 : | |\
425 : | d \
426 : |/ \|
427 : T F
428 :
429 : a[0] = {}
430 : a[1] = {}
431 : b[0] = {a}
432 : b[1] = {}
433 : c[0] = {}
434 : c[1] = {}
435 : d[0] = {c}
436 : d[1] = {a,b}
437 :
438 : Note that 0 and 1 are indices and not boolean values - a[0] is the index in
439 : the masking vector when a takes the true edge.
440 :
441 : b[0] and d[0] are identical to the a || b example, and d[1] is the bot in
442 : the triangle [d, b] -> T. b is the top node in the [d, b] relationship and
443 : last term in (a && b). To find the other terms masked we use the fact that
444 : all paths in an expression go through either of the outcomes, found by
445 : collecting all non-complex edges that go out of the expression (the
446 : neighborhood). In some cases the outgoing edge go through intermediate (or
447 : bypass) nodes, and we collect these paths too (see contract_edge_up).
448 :
449 : We find the terms by marking the outcomes (in this case c, T) and walk the
450 : predecessors starting at top (in this case b) and masking nodes when both
451 : successors are marked. This is equivalent to removing the two outcome nodes
452 : of the subexpression and finding the nodes not in the inverse reachability
453 : set.
454 :
455 : We only have to consider the pairs of top, bot where top is the the closest
456 : (highest-index'd) candidate that still satisfies top < bot in the
457 : topological order, as this will be the immediate left operand. The nodes of
458 : the other left operands will also be found when going through the righmost
459 : term, and a lower-index'd top would just find subsets. This has a
460 : significant performance impact, 15-20x faster for the worst cases of (x && y
461 : && ..) with no nesting.
462 :
463 : The masking table is represented as two bitfields per term in the expression
464 : with the index corresponding to the term in the Boolean expression.
465 : a || b && c becomes the term vector [a b c] and the masking table [a[0]
466 : a[1] b[0] ...]. The kth bit of a masking vector is set if the kth term
467 : is masked by taking the edge.
468 :
469 : The out masks are in uint64_t (the practical maximum for gcov_type_node for
470 : any target) as it has to be big enough to store the target size gcov types
471 : independent of the host. */
472 : void
473 292 : masking_vectors (conds_ctx& ctx, array_slice<basic_block> blocks,
474 : array_slice<sbitmap> maps, array_slice<uint64_t> masks)
475 : {
476 292 : gcc_assert (blocks.is_valid ());
477 292 : gcc_assert (!blocks.empty ());
478 292 : gcc_assert (maps.is_valid ());
479 292 : gcc_assert (masks.is_valid ());
480 292 : gcc_assert (sizeof (masks[0]) * BITS_PER_UNIT >= CONDITIONS_MAX_TERMS);
481 :
482 292 : if (bitmap_count_bits (maps[0]) == 1)
483 : return;
484 :
485 97 : sbitmap marks = ctx.g1;
486 97 : const sbitmap core = maps[0];
487 97 : const sbitmap allg = maps[1];
488 97 : vec<basic_block> &queue = ctx.b1;
489 97 : vec<basic_block> &body = ctx.b2;
490 97 : const vec<int> &top_index = ctx.top_index;
491 :
492 : /* Set up for the iteration - include the outcome nodes in the traversal.
493 : The algorithm compares pairs of nodes and is not really sensitive to
494 : traversal order, but need to maintain topological order because the
495 : index of masking nodes maps to the index in the accumulators. We must
496 : also check the incoming-to-outcome pairs. These edges may in turn be
497 : split (this happens with labels on top of then/else blocks) so we must
498 : follow any single-in single-out path. The non-condition blocks do not
499 : have to be in order as they are non-condition blocks and will not be
500 : considered for the set-bit index. */
501 97 : body.truncate (0);
502 97 : body.reserve (blocks.size () + 2);
503 515 : for (const basic_block b : blocks)
504 418 : if (bitmap_bit_p (core, b->index))
505 373 : body.quick_push (b);
506 :
507 515 : for (basic_block b : blocks)
508 : {
509 418 : if (!bitmap_bit_p (core, b->index))
510 45 : continue;
511 :
512 1865 : for (edge e : b->succs)
513 : {
514 746 : if (e->flags & EDGE_COMPLEX)
515 0 : continue;
516 746 : if (bitmap_bit_p (allg, e->dest->index))
517 313 : continue;
518 433 : body.safe_push (e->dest);
519 :
520 : /* There may be multiple nodes between the condition edge and the
521 : actual outcome, and we need to know when these paths join to
522 : determine if there is short circuit/masking. This is
523 : effectively creating a virtual edge from the condition node to
524 : the real outcome. */
525 1616 : while (!(e->flags & EDGE_DFS_BACK) && single_p (e->dest->succs))
526 : {
527 750 : e = single_edge (e->dest->succs);
528 750 : body.safe_push (e->dest);
529 : }
530 : }
531 : }
532 :
533 : /* Find the masking. The leftmost element cannot mask anything, so
534 : start at 1. */
535 3112 : for (size_t i = 1; i != body.length (); i++)
536 : {
537 1459 : const basic_block b = body[i];
538 1459 : if (b->preds->length () < 2)
539 683 : continue;
540 776 : ctx.edges.truncate (0);
541 776 : ctx.edges.reserve (b->preds->length ());
542 8178 : for (edge e : b->preds)
543 5850 : if (!(e->flags & EDGE_COMPLEX))
544 5843 : ctx.edges.quick_push (contract_edge_up (e));
545 776 : if (ctx.edges.length () < 2)
546 3 : continue;
547 773 : ctx.edges.sort (topological_src_cmp, &ctx.top_index);
548 :
549 11686 : for (size_t i0 = 0, i1 = 1; i1 != ctx.edges.length (); ++i0, ++i1)
550 : {
551 5070 : edge etop = ctx.edges[i0];
552 5070 : edge ebot = ctx.edges[i1];
553 5070 : gcc_assert (etop != ebot);
554 :
555 5070 : const basic_block top = etop->src;
556 5070 : const basic_block bot = ebot->src;
557 5070 : const unsigned cond = etop->flags & ebot->flags & EDGE_CONDITION;
558 5070 : if (!cond)
559 473 : continue;
560 4634 : if (top_index[top->index] > top_index[bot->index])
561 0 : continue;
562 4634 : if (!bitmap_bit_p (core, top->index))
563 24 : continue;
564 4610 : if (!bitmap_bit_p (core, bot->index))
565 13 : continue;
566 :
567 4597 : outcomes out = conditional_succs (top);
568 4597 : gcc_assert (out);
569 4597 : bitmap_clear (marks);
570 4597 : bitmap_set_bit (marks, out.t->index);
571 4597 : bitmap_set_bit (marks, out.f->index);
572 4597 : queue.truncate (0);
573 4597 : queue.safe_push (top);
574 :
575 : // The edge bot -> outcome triggers the masking
576 9194 : const int m = 2 * index_of (bot, body) + condition_index (cond);
577 4597 : gcc_assert (m >= 0);
578 139941 : while (!queue.is_empty ())
579 : {
580 130747 : basic_block q = queue.pop ();
581 : /* q may have been processed & completed by being added to the
582 : queue multiple times, so check that there is still work to
583 : do before continuing. */
584 130747 : if (bitmap_bit_p (marks, q->index))
585 505 : continue;
586 :
587 130572 : outcomes succs = conditional_succs (q);
588 130572 : if (!bitmap_bit_p (marks, succs.t->index))
589 237 : continue;
590 130335 : if (!bitmap_bit_p (marks, succs.f->index))
591 93 : continue;
592 :
593 260484 : const int index = index_of (q, body);
594 130242 : gcc_assert (index != -1);
595 130242 : masks[m] |= uint64_t (1) << index;
596 130242 : bitmap_set_bit (marks, q->index);
597 :
598 521371 : for (edge e : q->preds)
599 : {
600 130645 : e = contract_edge_up (e);
601 130645 : if (e->flags & EDGE_DFS_BACK)
602 10 : continue;
603 130635 : if (bitmap_bit_p (marks, e->src->index))
604 5 : continue;
605 130630 : if (!bitmap_bit_p (core, e->src->index))
606 4480 : continue;
607 126150 : queue.safe_push (e->src);
608 : }
609 : }
610 : }
611 : }
612 : }
613 :
614 : /* Emit LHS = RHS on edges. This is just a short hand that automates the
615 : building of the assign and immediately puts it on the edge, which becomes
616 : noisy. */
617 : tree
618 3288 : emit_assign (edge e, tree lhs, tree rhs)
619 : {
620 1644 : gassign *w = gimple_build_assign (lhs, rhs);
621 3288 : gsi_insert_on_edge (e, w);
622 3288 : return lhs;
623 : }
624 :
625 : /* Emit lhs = RHS on edges. The lhs is created. */
626 : tree
627 1644 : emit_assign (edge e, tree rhs)
628 : {
629 1644 : return emit_assign (e, make_ssa_name (gcov_type_node), rhs);
630 : }
631 :
632 : /* Emit LHS = OP1 <OP> OP2 on edges. */
633 : tree
634 5518 : emit_bitwise_op (edge e, tree op1, tree_code op, tree op2 = NULL_TREE)
635 : {
636 5518 : tree lhs = make_ssa_name (gcov_type_node);
637 5518 : gassign *w = gimple_build_assign (lhs, op, op1, op2);
638 5518 : gsi_insert_on_edge (e, w);
639 5518 : return lhs;
640 : }
641 :
642 : /* Visitor for make_top_index. */
643 : void
644 4250 : make_top_index_visit (basic_block b, vec<basic_block> &l, vec<int> &marks)
645 : {
646 4250 : if (marks[b->index])
647 : return;
648 :
649 : /* Follow the false edge first, if it exists, so that true paths are given
650 : the lower index in the ordering. Any iteration order
651 : would yield a valid and useful topological ordering, but making sure the
652 : true branch has the lower index first makes reporting work better for
653 : expressions with ternaries. Walk the false branch first because the
654 : array will be reversed to finalize the topological order.
655 :
656 : With the wrong ordering (a ? b : c) && d could become [a c b d], but the
657 : (expected) order is really [a b c d]. */
658 :
659 1897 : const unsigned false_fwd = EDGE_DFS_BACK | EDGE_FALSE_VALUE;
660 7772 : for (edge e : b->succs)
661 2399 : if ((e->flags & false_fwd) == EDGE_FALSE_VALUE)
662 636 : make_top_index_visit (e->dest, l, marks);
663 :
664 7772 : for (edge e : b->succs)
665 2399 : if (!(e->flags & false_fwd))
666 1717 : make_top_index_visit (e->dest, l, marks);
667 :
668 1897 : marks[b->index] = 1;
669 1897 : l.quick_push (b);
670 : }
671 :
672 : /* Find a topological sorting of the blocks in a function so that left operands
673 : are before right operands including subexpressions. Sorting on block index
674 : does not guarantee this property and the syntactical order of terms is very
675 : important to the condition coverage. The sorting algorithm is from Cormen
676 : et al (2001) but with back-edges ignored and thus there is no need for
677 : temporary marks (for cycle detection). The L argument is a buffer/working
678 : memory, and the output will be written to TOP_INDEX.
679 :
680 : For the expression (a || (b && c) || d) the blocks should be [a b c d]. */
681 : void
682 159 : make_top_index (array_slice<basic_block> blocks, vec<basic_block> &l,
683 : vec<int> &top_index)
684 : {
685 159 : l.truncate (0);
686 159 : l.reserve (blocks.size ());
687 :
688 : /* Use of the output map as a temporary for tracking visited status. */
689 159 : top_index.truncate (0);
690 159 : top_index.safe_grow_cleared (blocks.size ());
691 2056 : for (const basic_block b : blocks)
692 1897 : make_top_index_visit (b, l, top_index);
693 :
694 : /* Insert canaries - if there are unreachable nodes (for example infinite
695 : loops) then the unreachable nodes should never be needed for comparison,
696 : and l.length () < max_index. An index mapping should also never be
697 : recorded twice. */
698 4112 : for (unsigned i = 0; i != top_index.length (); i++)
699 1897 : top_index[i] = -1;
700 :
701 318 : gcc_assert (blocks.size () == l.length ());
702 159 : l.reverse ();
703 159 : const unsigned nblocks = l.length ();
704 2056 : for (unsigned i = 0; i != nblocks; i++)
705 : {
706 1897 : gcc_assert (l[i]->index != -1);
707 1897 : top_index[l[i]->index] = int (i);
708 : }
709 159 : }
710 :
711 : /* Find all nodes including non-conditions in a Boolean expression. We need to
712 : know the paths through the expression so that the masking and
713 : instrumentation phases can limit searches and know what subgraphs must be
714 : threaded through, but not counted, such as the (b || c) in
715 : a && fn (b || c) && d.
716 :
717 : It is essentially the intersection of downwards paths from the expression
718 : nodes EXPR to the post-dominator and upwards from the post-dominator.
719 : Finding the dominator is slightly more involved than picking the first/last,
720 : particularly under optimization, because both incoming and outgoing paths
721 : may have multiple entries/exits.
722 :
723 : It is assumed GRAPH is an array_slice of the basic blocks of this function
724 : sorted by the basic block index. */
725 : vec<basic_block> &
726 292 : paths_between (conds_ctx &ctx, array_slice<basic_block> graph,
727 : const vec<basic_block> &expr)
728 : {
729 292 : if (expr.length () == 1)
730 : {
731 195 : ctx.blocks.truncate (0);
732 195 : ctx.blocks.safe_push (expr[0]);
733 195 : return ctx.blocks;
734 : }
735 :
736 97 : basic_block dom;
737 97 : sbitmap up = ctx.g1;
738 97 : sbitmap down = ctx.g2;
739 97 : sbitmap paths = ctx.g3;
740 97 : vec<basic_block> &queue = ctx.b1;
741 :
742 97 : queue.truncate (0);
743 97 : bitmap_clear (down);
744 97 : dom = get_immediate_dominator (CDI_POST_DOMINATORS, expr[0]);
745 664 : for (basic_block b : expr)
746 373 : if (dom != b)
747 371 : dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, b);
748 97 : queue.safe_splice (expr);
749 1623 : while (!queue.is_empty ())
750 : {
751 1429 : basic_block b = queue.pop ();
752 1429 : if (!bitmap_set_bit (down, b->index))
753 671 : continue;
754 758 : if (b == dom)
755 97 : continue;
756 3055 : for (edge e : b->succs)
757 1072 : if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
758 1056 : queue.safe_push (e->dest);
759 : }
760 :
761 97 : queue.truncate (0);
762 97 : bitmap_clear (up);
763 97 : dom = expr[0];
764 470 : for (basic_block b : expr)
765 373 : if (dom != b)
766 276 : dom = nearest_common_dominator (CDI_DOMINATORS, dom, b);
767 97 : queue.safe_splice (expr);
768 950 : while (!queue.is_empty ())
769 : {
770 756 : basic_block b = queue.pop ();
771 756 : if (!bitmap_set_bit (up, b->index))
772 326 : continue;
773 430 : if (b == dom)
774 97 : continue;
775 1394 : for (edge e : b->preds)
776 395 : if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
777 383 : queue.safe_push (e->src);
778 : }
779 :
780 97 : bitmap_and (paths, up, down);
781 97 : vec<basic_block> &blocks = ctx.blocks;
782 97 : blocks.truncate (0);
783 97 : blocks.reserve (graph.size ());
784 97 : sbitmap_iterator itr;
785 97 : unsigned index;
786 612 : EXECUTE_IF_SET_IN_BITMAP (paths, 0, index, itr)
787 418 : blocks.quick_push (graph[index]);
788 : return blocks;
789 : }
790 :
791 : }
792 :
793 : /* Context object for the condition coverage. This stores conds_ctx (the
794 : buffers reused when analyzing the cfg) and the output arrays. This is
795 : designed to be heap allocated and aggressively preallocates large buffers to
796 : avoid having to reallocate for most programs. */
797 : struct condcov
798 : {
799 159 : explicit condcov (unsigned nblocks) noexcept (true) : ctx (nblocks),
800 159 : m_maps (sbitmap_vector_alloc (2 * nblocks, nblocks))
801 : {
802 159 : bitmap_vector_clear (m_maps, 2 * nblocks);
803 159 : }
804 : auto_vec<size_t, 128> m_index;
805 : auto_vec<basic_block, 256> m_blocks;
806 : auto_vec<uint64_t, 512> m_masks;
807 : conds_ctx ctx;
808 : sbitmap *m_maps;
809 : };
810 :
811 : /* Get the length, that is the number of Boolean expression found. cov_length
812 : is the one-past index for cov_{blocks,masks,maps}. */
813 : size_t
814 318 : cov_length (const struct condcov *cov)
815 : {
816 318 : if (cov->m_index.is_empty ())
817 : return 0;
818 318 : return cov->m_index.length () - 1;
819 : }
820 :
821 : /* The subgraph, exluding intermediates, for the nth Boolean expression. */
822 : array_slice<basic_block>
823 584 : cov_blocks (struct condcov *cov, size_t n)
824 : {
825 584 : if (n >= cov->m_index.length ())
826 0 : return array_slice<basic_block>::invalid ();
827 :
828 1168 : basic_block *begin = cov->m_blocks.begin () + cov->m_index[n];
829 584 : basic_block *end = cov->m_blocks.begin () + cov->m_index[n + 1];
830 584 : return array_slice<basic_block> (begin, end - begin);
831 : }
832 :
833 : /* The masks for the nth Boolean expression. */
834 : array_slice<uint64_t>
835 584 : cov_masks (struct condcov *cov, size_t n)
836 : {
837 584 : if (n >= cov->m_index.length ())
838 0 : return array_slice<uint64_t>::invalid ();
839 :
840 1168 : uint64_t *begin = cov->m_masks.begin () + 2 * cov->m_index[n];
841 584 : uint64_t *end = cov->m_masks.begin () + 2 * cov->m_index[n + 1];
842 584 : return array_slice<uint64_t> (begin, end - begin);
843 : }
844 :
845 : /* The maps for the nth Boolean expression. */
846 : array_slice<sbitmap>
847 584 : cov_maps (struct condcov *cov, size_t n)
848 : {
849 584 : if (n >= cov->m_index.length ())
850 0 : return array_slice<sbitmap>::invalid ();
851 :
852 584 : sbitmap *begin = cov->m_maps + 2 * n;
853 584 : sbitmap *end = begin + 2;
854 584 : return array_slice<sbitmap> (begin, end - begin);
855 : }
856 :
857 : /* Deleter for condcov. */
858 : void
859 159 : cov_free (struct condcov *cov)
860 : {
861 159 : sbitmap_vector_free (cov->m_maps);
862 159 : delete cov;
863 159 : }
864 :
865 : /* Condition coverage (MC/DC)
866 :
867 : Whalen, Heimdahl, De Silva in "Efficient Test Coverage Measurement for
868 : MC/DC" describe an algorithm for modified condition/decision coverage based
869 : on AST analysis. This algorithm does analyzes the control flow graph
870 : (interpreted as a binary decision diagram) to determine the masking vectors.
871 : The individual phases are described in more detail closer to the
872 : implementation.
873 :
874 : The coverage only considers the positions, not the symbols, in a
875 : conditional, e.g. !A || (!B && A) is a 3-term conditional even though A
876 : appears twice. Subexpressions have no effect on term ordering:
877 : (a && (b || (c && d)) || e) comes out as [a b c d e]. Functions whose
878 : arguments are Boolean expressions are treated as separate expressions, that
879 : is, a && fn (b || c) && d is treated as [a _fn d] and [b c], not [a b c d].
880 :
881 : The output for gcov is a vector of pairs of unsigned integers, interpreted
882 : as bit-sets, where the bit index corresponds to the index of the condition
883 : in the expression.
884 :
885 : The returned condcov should be released by the caller with cov_free. */
886 : struct condcov *
887 159 : find_conditions (struct function *fn)
888 : {
889 159 : mark_dfs_back_edges (fn);
890 159 : const bool have_dom = dom_info_available_p (fn, CDI_DOMINATORS);
891 159 : const bool have_post_dom = dom_info_available_p (fn, CDI_POST_DOMINATORS);
892 159 : if (!have_dom)
893 156 : calculate_dominance_info (CDI_DOMINATORS);
894 159 : if (!have_post_dom)
895 159 : calculate_dominance_info (CDI_POST_DOMINATORS);
896 :
897 159 : const unsigned nblocks = n_basic_blocks_for_fn (fn);
898 159 : basic_block *fnblocksp = basic_block_info_for_fn (fn)->address ();
899 159 : condcov *cov = new condcov (nblocks);
900 159 : conds_ctx &ctx = cov->ctx;
901 159 : array_slice<basic_block> fnblocks (fnblocksp, nblocks);
902 159 : make_top_index (fnblocks, ctx.b1, ctx.top_index);
903 :
904 : /* Bin the Boolean expressions so that exprs[id] -> [x1, x2, ...]. */
905 159 : hash_map<int_hash<unsigned, 0>, auto_vec<basic_block>> exprs;
906 2056 : for (basic_block b : fnblocks)
907 : {
908 1897 : const unsigned uid = condition_uid (fn, b);
909 1897 : if (uid == 0)
910 1264 : continue;
911 633 : exprs.get_or_insert (uid).safe_push (b);
912 : }
913 :
914 : /* Visit all reachable nodes and collect conditions. Topological order is
915 : important so the first node of a boolean expression is visited first
916 : (it will mark subsequent terms). */
917 159 : cov->m_index.safe_push (0);
918 745 : for (auto expr : exprs)
919 : {
920 293 : vec<basic_block> &conds = expr.second;
921 586 : if (conds.length () > CONDITIONS_MAX_TERMS)
922 : {
923 2 : location_t loc = gimple_location (gsi_stmt (gsi_last_bb (conds[0])));
924 1 : warning_at (loc, OPT_Wcoverage_too_many_conditions,
925 : "too many conditions (found %u); giving up coverage",
926 : conds.length ());
927 1 : continue;
928 1 : }
929 292 : conds.sort (topological_cmp, &ctx.top_index);
930 292 : vec<basic_block> &subgraph = paths_between (ctx, fnblocks, conds);
931 292 : subgraph.sort (topological_cmp, &ctx.top_index);
932 292 : const unsigned index = cov->m_index.length () - 1;
933 292 : sbitmap condm = cov->m_maps[0 + 2 * index];
934 292 : sbitmap subgm = cov->m_maps[1 + 2 * index];
935 1444 : for (basic_block b : conds)
936 568 : bitmap_set_bit (condm, b->index);
937 1489 : for (basic_block b : subgraph)
938 613 : bitmap_set_bit (subgm, b->index);
939 292 : cov->m_blocks.safe_splice (subgraph);
940 584 : cov->m_index.safe_push (cov->m_blocks.length ());
941 : }
942 :
943 159 : if (!have_dom)
944 156 : free_dominance_info (fn, CDI_DOMINATORS);
945 159 : if (!have_post_dom)
946 159 : free_dominance_info (fn, CDI_POST_DOMINATORS);
947 :
948 159 : cov->m_masks.safe_grow_cleared (2 * cov->m_index.last ());
949 159 : const size_t length = cov_length (cov);
950 451 : for (size_t i = 0; i != length; i++)
951 292 : masking_vectors (ctx, cov_blocks (cov, i), cov_maps (cov, i),
952 : cov_masks (cov, i));
953 :
954 159 : return cov;
955 159 : }
956 :
957 : namespace
958 : {
959 :
960 : /* Stores the incoming edge and previous counters (in SSA form) on that edge
961 : for the node e->deston that edge for the node e->dest. The counters record
962 : the seen-true (0), seen-false (1), and current-mask (2). They are stored in
963 : an array rather than proper members for access-by-index as the code paths
964 : tend to be identical for the different counters. */
965 : struct counters
966 : {
967 : edge e;
968 : tree counter[3];
969 5923 : tree &operator[] (size_t i) { return counter[i]; }
970 : };
971 :
972 : /* Find the counters for the incoming edge e, or NULL if the edge has not been
973 : recorded (could be for complex incoming edges). */
974 : counters *
975 1167 : find_counters (vec<counters> &candidates, edge e)
976 : {
977 5957 : for (counters &candidate : candidates)
978 3611 : if (candidate.e == e)
979 : return &candidate;
980 : return NULL;
981 : }
982 :
983 : /* Resolve the SSA for a specific counter KIND. If it is not modified by any
984 : incoming edges, simply forward it, otherwise create a phi node of all the
985 : candidate counters and return it. */
986 : tree
987 1839 : resolve_counter (vec<counters> &cands, size_t kind)
988 : {
989 1839 : gcc_assert (!cands.is_empty ());
990 1839 : gcc_assert (kind < 3);
991 :
992 1839 : counters &fst = cands[0];
993 :
994 1839 : if (!fst.e || fst.e->dest->preds->length () == 1)
995 : {
996 1671 : gcc_assert (cands.length () == 1);
997 1671 : return fst[kind];
998 : }
999 :
1000 168 : tree zero0 = build_int_cst (gcov_type_node, 0);
1001 168 : tree ssa = make_ssa_name (gcov_type_node);
1002 168 : gphi *phi = create_phi_node (ssa, fst.e->dest);
1003 846 : for (edge e : fst.e->dest->preds)
1004 : {
1005 342 : counters *prev = find_counters (cands, e);
1006 342 : if (prev)
1007 330 : add_phi_arg (phi, (*prev)[kind], e, UNKNOWN_LOCATION);
1008 : else
1009 : {
1010 12 : tree zero = make_ssa_name (gcov_type_node);
1011 12 : gimple_stmt_iterator gsi = gsi_after_labels (e->src);
1012 12 : gassign *set = gimple_build_assign (zero, zero0);
1013 12 : gsi_insert_before (&gsi, set, GSI_NEW_STMT);
1014 12 : add_phi_arg (phi, zero, e, UNKNOWN_LOCATION);
1015 : }
1016 : }
1017 : return ssa;
1018 : }
1019 :
1020 : /* Resolve all the counters for a node. Note that the edge is undefined, as
1021 : the counters are intended to form the base to push to the successors, and
1022 : because the is only meaningful for nodes with a single predecessor. */
1023 : counters
1024 613 : resolve_counters (vec<counters> &cands)
1025 : {
1026 613 : counters next;
1027 613 : next[0] = resolve_counter (cands, 0);
1028 613 : next[1] = resolve_counter (cands, 1);
1029 613 : next[2] = resolve_counter (cands, 2);
1030 613 : return next;
1031 : }
1032 :
1033 : }
1034 :
1035 : /* Add instrumentation to a decision subgraph. EXPR should be the
1036 : (topologically sorted) block of nodes returned by cov_blocks, MAPS the
1037 : bitmaps returned by cov_maps, and MASKS the block of bitsets returned by
1038 : cov_masks. CONDNO should be the index of this condition in the function,
1039 : i.e. the same argument given to cov_{masks,graphs}. EXPR may contain nodes
1040 : in-between the conditions, e.g. when an operand contains a function call,
1041 : or there is a setjmp and the cfg is filled with complex edges.
1042 :
1043 : Every node is annotated with three counters; the true, false, and mask
1044 : value. First, walk the graph and determine what if there are multiple
1045 : possible values for either accumulator depending on the path taken, in which
1046 : case a phi node is created and registered as the accumulator. Then, those
1047 : values are pushed as accumulators to the immediate successors. For some
1048 : very particular programs there may be multiple paths into the expression
1049 : (e.g. when prior terms are determined by a surrounding conditional) in which
1050 : case the default zero-counter is pushed, otherwise all predecessors will
1051 : have been considered before the successor because of topologically ordered
1052 : traversal. Finally, expr is traversed again to look for edges to the
1053 : outcomes, that is, edges with a destination outside of expr, and the local
1054 : accumulators are flushed to the global gcov counters on these edges. In
1055 : some cases there are edge splits that cause 3+ edges to the two outcome
1056 : nodes.
1057 :
1058 : If a complex edge is taken (e.g. on a longjmp) the accumulators are
1059 : attempted poisoned so that there would be no change to the global counters,
1060 : but this has proven unreliable in the presence of undefined behavior, see
1061 : the setjmp003 test.
1062 :
1063 : It is important that the flushes happen on the basic condition outgoing
1064 : edge, otherwise flushes could be lost to exception handling or other
1065 : abnormal control flow. */
1066 : size_t
1067 292 : instrument_decisions (array_slice<basic_block> expr, size_t condno,
1068 : array_slice<sbitmap> maps, array_slice<uint64_t> masks)
1069 : {
1070 292 : tree zero = build_int_cst (gcov_type_node, 0);
1071 292 : tree poison = build_int_cst (gcov_type_node, ~0ULL);
1072 292 : const sbitmap core = maps[0];
1073 292 : const sbitmap allg = maps[1];
1074 :
1075 292 : hash_map<basic_block, vec<counters>> table;
1076 292 : counters zerocounter;
1077 292 : zerocounter.e = NULL;
1078 292 : zerocounter[0] = zero;
1079 292 : zerocounter[1] = zero;
1080 292 : zerocounter[2] = zero;
1081 :
1082 292 : unsigned xi = 0;
1083 292 : bool increment = false;
1084 292 : tree rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1085 905 : for (basic_block current : expr)
1086 : {
1087 613 : vec<counters> &candidates = table.get_or_insert (current);
1088 613 : if (candidates.is_empty ())
1089 298 : candidates.safe_push (zerocounter);
1090 613 : counters prev = resolve_counters (candidates);
1091 :
1092 613 : if (increment)
1093 : {
1094 276 : xi += 1;
1095 276 : gcc_checking_assert (xi < sizeof (uint64_t) * BITS_PER_UNIT);
1096 276 : rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1097 276 : increment = false;
1098 : }
1099 :
1100 3035 : for (edge e : current->succs)
1101 : {
1102 1196 : counters next = prev;
1103 1196 : next.e = e;
1104 :
1105 1196 : if (bitmap_bit_p (core, e->src->index) && (e->flags & EDGE_CONDITION))
1106 : {
1107 1136 : const int k = condition_index (e->flags);
1108 1136 : next[k] = emit_bitwise_op (e, prev[k], BIT_IOR_EXPR, rhs);
1109 1136 : if (masks[2 * xi + k])
1110 : {
1111 263 : tree m = build_int_cst (gcov_type_node, masks[2 * xi + k]);
1112 263 : next[2] = emit_bitwise_op (e, prev[2], BIT_IOR_EXPR, m);
1113 : }
1114 : increment = true;
1115 : }
1116 60 : else if (e->flags & EDGE_COMPLEX)
1117 : {
1118 : /* A complex edge has been taken - wipe the accumulators and
1119 : poison the mask so that this path does not contribute to
1120 : coverage. */
1121 3 : next[0] = poison;
1122 3 : next[1] = poison;
1123 3 : next[2] = poison;
1124 : }
1125 1196 : table.get_or_insert (e->dest).safe_push (next);
1126 : }
1127 : }
1128 :
1129 : /* Since this is also the return value, the number of conditions, make sure
1130 : to include the increment of the last basic block. */
1131 292 : if (increment)
1132 292 : xi += 1;
1133 :
1134 292 : gcc_assert (xi == bitmap_count_bits (core));
1135 :
1136 292 : const tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1137 292 : const bool atomic = flag_profile_update == PROFILE_UPDATE_ATOMIC;
1138 292 : const tree atomic_ior
1139 292 : = builtin_decl_explicit (TYPE_PRECISION (gcov_type_node) > 32
1140 : ? BUILT_IN_ATOMIC_FETCH_OR_8
1141 : : BUILT_IN_ATOMIC_FETCH_OR_4);
1142 :
1143 : /* Flush to the gcov accumulators. */
1144 905 : for (const basic_block b : expr)
1145 : {
1146 613 : if (!bitmap_bit_p (core, b->index))
1147 45 : continue;
1148 :
1149 2840 : for (edge e : b->succs)
1150 : {
1151 : /* Flush the accumulators on leaving the Boolean function. The
1152 : destination may be inside the function only when it returns to
1153 : the loop header, such as do { ... } while (x); */
1154 1136 : if (bitmap_bit_p (allg, e->dest->index))
1155 : {
1156 315 : if (!(e->flags & EDGE_DFS_BACK))
1157 311 : continue;
1158 4 : if (e->dest != expr[0])
1159 0 : continue;
1160 : }
1161 :
1162 825 : vec<counters> *cands = table.get (e->dest);
1163 825 : gcc_assert (cands);
1164 825 : counters *prevp = find_counters (*cands, e);
1165 825 : gcc_assert (prevp);
1166 825 : counters prev = *prevp;
1167 :
1168 : /* _true &= ~mask, _false &= ~mask */
1169 825 : counters next;
1170 825 : next[2] = emit_bitwise_op (e, prev[2], BIT_NOT_EXPR);
1171 825 : next[0] = emit_bitwise_op (e, prev[0], BIT_AND_EXPR, next[2]);
1172 825 : next[1] = emit_bitwise_op (e, prev[1], BIT_AND_EXPR, next[2]);
1173 :
1174 : /* _global_true |= _true, _global_false |= _false */
1175 2475 : for (size_t k = 0; k != 2; ++k)
1176 : {
1177 1650 : tree ref = tree_coverage_counter_ref (GCOV_COUNTER_CONDS,
1178 : 2 * condno + k);
1179 1650 : if (atomic)
1180 : {
1181 6 : ref = unshare_expr (ref);
1182 6 : gcall *flush = gimple_build_call (atomic_ior, 3,
1183 : build_addr (ref),
1184 6 : next[k], relaxed);
1185 6 : gsi_insert_on_edge (e, flush);
1186 : }
1187 : else
1188 : {
1189 1644 : tree get = emit_assign (e, ref);
1190 1644 : tree put = emit_bitwise_op (e, next[k], BIT_IOR_EXPR, get);
1191 1644 : emit_assign (e, unshare_expr (ref), put);
1192 : }
1193 : }
1194 : }
1195 : }
1196 :
1197 292 : return xi;
1198 292 : }
1199 :
1200 : #undef CONDITIONS_MAX_TERMS
1201 : #undef EDGE_CONDITION
1202 :
1203 : /* Do initialization work for the edge profiler. */
1204 :
1205 : /* Add code:
1206 : __thread gcov *__gcov_indirect_call.counters; // pointer to actual counter
1207 : __thread void *__gcov_indirect_call.callee; // actual callee address
1208 : __thread int __gcov_function_counter; // time profiler function counter */
1209 : static void
1210 429 : init_ic_make_global_vars (void)
1211 : {
1212 429 : tree gcov_type_ptr;
1213 :
1214 429 : gcov_type_ptr = build_pointer_type (get_gcov_type ());
1215 :
1216 429 : tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE);
1217 :
1218 : /* callee */
1219 429 : ic_tuple_callee_field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
1220 : ptr_type_node);
1221 :
1222 : /* counters */
1223 429 : ic_tuple_counters_field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
1224 : NULL_TREE, gcov_type_ptr);
1225 429 : DECL_CHAIN (ic_tuple_counters_field) = ic_tuple_callee_field;
1226 :
1227 429 : finish_builtin_struct (tuple_type, "indirect_call_tuple",
1228 : ic_tuple_counters_field, NULL_TREE);
1229 :
1230 429 : ic_tuple_var
1231 429 : = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1232 : get_identifier ("__gcov_indirect_call"), tuple_type);
1233 429 : TREE_PUBLIC (ic_tuple_var) = 1;
1234 429 : DECL_ARTIFICIAL (ic_tuple_var) = 1;
1235 429 : DECL_INITIAL (ic_tuple_var) = NULL;
1236 429 : DECL_EXTERNAL (ic_tuple_var) = 1;
1237 429 : if (targetm.have_tls)
1238 429 : set_decl_tls_model (ic_tuple_var, decl_default_tls_model (ic_tuple_var));
1239 429 : }
1240 :
1241 : /* Create the type and function decls for the interface with gcov. */
1242 :
1243 : void
1244 2510 : gimple_init_gcov_profiler (void)
1245 : {
1246 2510 : tree interval_profiler_fn_type;
1247 2510 : tree pow2_profiler_fn_type;
1248 2510 : tree topn_values_profiler_fn_type;
1249 2510 : tree gcov_type_ptr;
1250 2510 : tree ic_profiler_fn_type;
1251 2510 : tree average_profiler_fn_type;
1252 2510 : const char *fn_name;
1253 :
1254 2510 : if (!gcov_type_node)
1255 : {
1256 412 : const char *fn_suffix
1257 429 : = flag_profile_update == PROFILE_UPDATE_ATOMIC ? "_atomic" : "";
1258 :
1259 429 : gcov_type_node = get_gcov_type ();
1260 429 : gcov_type_ptr = build_pointer_type (gcov_type_node);
1261 :
1262 : /* void (*) (gcov_type *, gcov_type, int, unsigned) */
1263 429 : interval_profiler_fn_type
1264 429 : = build_function_type_list (void_type_node,
1265 : gcov_type_ptr, gcov_type_node,
1266 : integer_type_node,
1267 : unsigned_type_node, NULL_TREE);
1268 429 : fn_name = concat ("__gcov_interval_profiler", fn_suffix, NULL);
1269 429 : tree_interval_profiler_fn = build_fn_decl (fn_name,
1270 : interval_profiler_fn_type);
1271 429 : free (const_cast<char *> (fn_name));
1272 429 : TREE_NOTHROW (tree_interval_profiler_fn) = 1;
1273 429 : DECL_ATTRIBUTES (tree_interval_profiler_fn)
1274 429 : = tree_cons (get_identifier ("leaf"), NULL,
1275 429 : DECL_ATTRIBUTES (tree_interval_profiler_fn));
1276 :
1277 : /* void (*) (gcov_type *, gcov_type) */
1278 429 : pow2_profiler_fn_type
1279 429 : = build_function_type_list (void_type_node,
1280 : gcov_type_ptr, gcov_type_node,
1281 : NULL_TREE);
1282 429 : fn_name = concat ("__gcov_pow2_profiler", fn_suffix, NULL);
1283 429 : tree_pow2_profiler_fn = build_fn_decl (fn_name, pow2_profiler_fn_type);
1284 429 : free (const_cast<char *> (fn_name));
1285 429 : TREE_NOTHROW (tree_pow2_profiler_fn) = 1;
1286 429 : DECL_ATTRIBUTES (tree_pow2_profiler_fn)
1287 429 : = tree_cons (get_identifier ("leaf"), NULL,
1288 429 : DECL_ATTRIBUTES (tree_pow2_profiler_fn));
1289 :
1290 : /* void (*) (gcov_type *, gcov_type) */
1291 429 : topn_values_profiler_fn_type
1292 429 : = build_function_type_list (void_type_node,
1293 : gcov_type_ptr, gcov_type_node,
1294 : NULL_TREE);
1295 429 : fn_name = concat ("__gcov_topn_values_profiler", fn_suffix, NULL);
1296 429 : tree_topn_values_profiler_fn
1297 429 : = build_fn_decl (fn_name, topn_values_profiler_fn_type);
1298 429 : free (const_cast<char *> (fn_name));
1299 :
1300 429 : TREE_NOTHROW (tree_topn_values_profiler_fn) = 1;
1301 429 : DECL_ATTRIBUTES (tree_topn_values_profiler_fn)
1302 429 : = tree_cons (get_identifier ("leaf"), NULL,
1303 429 : DECL_ATTRIBUTES (tree_topn_values_profiler_fn));
1304 :
1305 429 : init_ic_make_global_vars ();
1306 :
1307 : /* void (*) (gcov_type, void *) */
1308 429 : ic_profiler_fn_type
1309 429 : = build_function_type_list (void_type_node,
1310 : gcov_type_node,
1311 : ptr_type_node,
1312 : NULL_TREE);
1313 429 : fn_name = concat ("__gcov_indirect_call_profiler_v4", fn_suffix, NULL);
1314 429 : tree_indirect_call_profiler_fn
1315 429 : = build_fn_decl (fn_name, ic_profiler_fn_type);
1316 429 : free (const_cast<char *> (fn_name));
1317 :
1318 429 : TREE_NOTHROW (tree_indirect_call_profiler_fn) = 1;
1319 429 : DECL_ATTRIBUTES (tree_indirect_call_profiler_fn)
1320 429 : = tree_cons (get_identifier ("leaf"), NULL,
1321 429 : DECL_ATTRIBUTES (tree_indirect_call_profiler_fn));
1322 :
1323 429 : tree_time_profiler_counter
1324 429 : = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1325 : get_identifier ("__gcov_time_profiler_counter"),
1326 : get_gcov_type ());
1327 429 : TREE_PUBLIC (tree_time_profiler_counter) = 1;
1328 429 : DECL_EXTERNAL (tree_time_profiler_counter) = 1;
1329 429 : TREE_STATIC (tree_time_profiler_counter) = 1;
1330 429 : DECL_ARTIFICIAL (tree_time_profiler_counter) = 1;
1331 429 : DECL_INITIAL (tree_time_profiler_counter) = NULL;
1332 :
1333 : /* void (*) (gcov_type *, gcov_type) */
1334 429 : average_profiler_fn_type
1335 429 : = build_function_type_list (void_type_node,
1336 : gcov_type_ptr, gcov_type_node, NULL_TREE);
1337 429 : fn_name = concat ("__gcov_average_profiler", fn_suffix, NULL);
1338 429 : tree_average_profiler_fn = build_fn_decl (fn_name,
1339 : average_profiler_fn_type);
1340 429 : free (const_cast<char *> (fn_name));
1341 429 : TREE_NOTHROW (tree_average_profiler_fn) = 1;
1342 429 : DECL_ATTRIBUTES (tree_average_profiler_fn)
1343 429 : = tree_cons (get_identifier ("leaf"), NULL,
1344 429 : DECL_ATTRIBUTES (tree_average_profiler_fn));
1345 429 : fn_name = concat ("__gcov_ior_profiler", fn_suffix, NULL);
1346 429 : tree_ior_profiler_fn = build_fn_decl (fn_name, average_profiler_fn_type);
1347 429 : free (const_cast<char *> (fn_name));
1348 429 : TREE_NOTHROW (tree_ior_profiler_fn) = 1;
1349 429 : DECL_ATTRIBUTES (tree_ior_profiler_fn)
1350 429 : = tree_cons (get_identifier ("leaf"), NULL,
1351 429 : DECL_ATTRIBUTES (tree_ior_profiler_fn));
1352 :
1353 : /* LTO streamer needs assembler names. Because we create these decls
1354 : late, we need to initialize them by hand. */
1355 429 : DECL_ASSEMBLER_NAME (tree_interval_profiler_fn);
1356 429 : DECL_ASSEMBLER_NAME (tree_pow2_profiler_fn);
1357 429 : DECL_ASSEMBLER_NAME (tree_topn_values_profiler_fn);
1358 429 : DECL_ASSEMBLER_NAME (tree_indirect_call_profiler_fn);
1359 429 : DECL_ASSEMBLER_NAME (tree_average_profiler_fn);
1360 429 : DECL_ASSEMBLER_NAME (tree_ior_profiler_fn);
1361 : }
1362 2510 : }
1363 :
1364 : /* If RESULT is not null, then output instructions as GIMPLE trees to assign
1365 : the updated counter from CALL of FUNC to RESULT. Insert the CALL and the
1366 : optional assignment instructions to GSI. Use NAME for temporary values. */
1367 :
1368 : static inline void
1369 556 : gen_assign_counter_update (gimple_stmt_iterator *gsi, gcall *call, tree func,
1370 : tree result, const char *name)
1371 : {
1372 556 : if (result)
1373 : {
1374 17 : tree result_type = TREE_TYPE (TREE_TYPE (func));
1375 17 : tree tmp1 = make_temp_ssa_name (result_type, NULL, name);
1376 17 : gimple_set_lhs (call, tmp1);
1377 17 : gsi_insert_after (gsi, call, GSI_NEW_STMT);
1378 17 : tree tmp2 = make_temp_ssa_name (TREE_TYPE (result), NULL, name);
1379 17 : gassign *assign = gimple_build_assign (tmp2, NOP_EXPR, tmp1);
1380 17 : gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1381 17 : assign = gimple_build_assign (result, tmp2);
1382 17 : gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1383 : }
1384 : else
1385 539 : gsi_insert_after (gsi, call, GSI_NEW_STMT);
1386 556 : }
1387 :
1388 : /* Output instructions as GIMPLE trees to increment the COUNTER. If RESULT is
1389 : not null, then assign the updated counter value to RESULT. Insert the
1390 : instructions to GSI. Use NAME for temporary values. */
1391 :
1392 : static inline void
1393 6840 : gen_counter_update (gimple_stmt_iterator *gsi, tree counter, tree result,
1394 : const char *name)
1395 : {
1396 6840 : tree type = gcov_type_node;
1397 6840 : tree addr = build_fold_addr_expr (counter);
1398 6840 : tree one = build_int_cst (type, 1);
1399 6840 : tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1400 :
1401 6840 : if (counter_update == COUNTER_UPDATE_ATOMIC_BUILTIN
1402 6284 : || (result && counter_update == COUNTER_UPDATE_ATOMIC_SPLIT))
1403 : {
1404 : /* __atomic_fetch_add (&counter, 1, MEMMODEL_RELAXED); */
1405 556 : tree f = builtin_decl_explicit (TYPE_PRECISION (type) > 32
1406 : ? BUILT_IN_ATOMIC_ADD_FETCH_8
1407 : : BUILT_IN_ATOMIC_ADD_FETCH_4);
1408 556 : gcall *call = gimple_build_call (f, 3, addr, one, relaxed);
1409 556 : gen_assign_counter_update (gsi, call, f, result, name);
1410 556 : }
1411 5734 : else if (!result && (counter_update == COUNTER_UPDATE_ATOMIC_SPLIT
1412 5734 : || counter_update == COUNTER_UPDATE_ATOMIC_PARTIAL))
1413 : {
1414 : /* low = __atomic_add_fetch_4 (addr, 1, MEMMODEL_RELAXED);
1415 : high_inc = low == 0 ? 1 : 0;
1416 : __atomic_add_fetch_4 (addr_high, high_inc, MEMMODEL_RELAXED); */
1417 0 : tree zero32 = build_zero_cst (uint32_type_node);
1418 0 : tree one32 = build_one_cst (uint32_type_node);
1419 0 : tree addr_high = make_temp_ssa_name (TREE_TYPE (addr), NULL, name);
1420 0 : tree four = build_int_cst (size_type_node, 4);
1421 0 : gassign *assign1 = gimple_build_assign (addr_high, POINTER_PLUS_EXPR,
1422 : addr, four);
1423 0 : gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1424 0 : if (WORDS_BIG_ENDIAN)
1425 : std::swap (addr, addr_high);
1426 0 : tree f = builtin_decl_explicit (BUILT_IN_ATOMIC_ADD_FETCH_4);
1427 0 : gcall *call1 = gimple_build_call (f, 3, addr, one, relaxed);
1428 0 : tree low = make_temp_ssa_name (uint32_type_node, NULL, name);
1429 0 : gimple_call_set_lhs (call1, low);
1430 0 : gsi_insert_after (gsi, call1, GSI_NEW_STMT);
1431 0 : tree is_zero = make_temp_ssa_name (boolean_type_node, NULL, name);
1432 0 : gassign *assign2 = gimple_build_assign (is_zero, EQ_EXPR, low,
1433 : zero32);
1434 0 : gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1435 0 : tree high_inc = make_temp_ssa_name (uint32_type_node, NULL, name);
1436 0 : gassign *assign3 = gimple_build_assign (high_inc, COND_EXPR,
1437 : is_zero, one32, zero32);
1438 0 : gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1439 0 : gcall *call2 = gimple_build_call (f, 3, addr_high, high_inc,
1440 : relaxed);
1441 0 : gsi_insert_after (gsi, call2, GSI_NEW_STMT);
1442 0 : }
1443 : else
1444 : {
1445 6284 : tree tmp1 = make_temp_ssa_name (type, NULL, name);
1446 6284 : gassign *assign1 = gimple_build_assign (tmp1, counter);
1447 6284 : gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1448 6284 : tree tmp2 = make_temp_ssa_name (type, NULL, name);
1449 6284 : gassign *assign2 = gimple_build_assign (tmp2, PLUS_EXPR, tmp1, one);
1450 6284 : gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1451 6284 : gassign *assign3 = gimple_build_assign (unshare_expr (counter), tmp2);
1452 6284 : gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1453 6284 : if (result)
1454 : {
1455 550 : gassign *assign4 = gimple_build_assign (result, tmp2);
1456 550 : gsi_insert_after (gsi, assign4, GSI_NEW_STMT);
1457 : }
1458 : }
1459 6840 : }
1460 :
1461 : /* Output instructions as GIMPLE trees to increment the edge
1462 : execution count, and insert them on E. */
1463 :
1464 : void
1465 6273 : gimple_gen_edge_profiler (int edgeno, edge e)
1466 : {
1467 6273 : gimple_stmt_iterator gsi = gsi_last (PENDING_STMT (e));
1468 6273 : tree counter = tree_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
1469 6273 : gen_counter_update (&gsi, counter, NULL_TREE, "PROF_edge_counter");
1470 6273 : }
1471 :
1472 : /* Emits code to get VALUE to instrument at GSI, and returns the
1473 : variable containing the value. */
1474 :
1475 : static tree
1476 110 : prepare_instrumented_value (gimple_stmt_iterator *gsi, histogram_value value)
1477 : {
1478 110 : tree val = value->hvalue.value;
1479 110 : if (POINTER_TYPE_P (TREE_TYPE (val)))
1480 29 : val = fold_convert (build_nonstandard_integer_type
1481 : (TYPE_PRECISION (TREE_TYPE (val)), 1), val);
1482 110 : return force_gimple_operand_gsi (gsi, fold_convert (gcov_type_node, val),
1483 110 : true, NULL_TREE, true, GSI_SAME_STMT);
1484 : }
1485 :
1486 : /* Output instructions as GIMPLE trees to increment the interval histogram
1487 : counter. VALUE is the expression whose value is profiled. TAG is the
1488 : tag of the section for counters, BASE is offset of the counter position. */
1489 :
1490 : void
1491 5 : gimple_gen_interval_profiler (histogram_value value, unsigned tag)
1492 : {
1493 5 : gimple *stmt = value->hvalue.stmt;
1494 5 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1495 5 : tree ref = tree_coverage_counter_ref (tag, 0), ref_ptr;
1496 5 : gcall *call;
1497 5 : tree val;
1498 10 : tree start = build_int_cst_type (integer_type_node,
1499 5 : value->hdata.intvl.int_start);
1500 10 : tree steps = build_int_cst_type (unsigned_type_node,
1501 5 : value->hdata.intvl.steps);
1502 :
1503 5 : ref_ptr = force_gimple_operand_gsi (&gsi,
1504 : build_addr (ref),
1505 : true, NULL_TREE, true, GSI_SAME_STMT);
1506 5 : val = prepare_instrumented_value (&gsi, value);
1507 5 : call = gimple_build_call (tree_interval_profiler_fn, 4,
1508 : ref_ptr, val, start, steps);
1509 5 : gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1510 5 : }
1511 :
1512 : /* Output instructions as GIMPLE trees to increment the power of two histogram
1513 : counter. VALUE is the expression whose value is profiled. TAG is the tag
1514 : of the section for counters. */
1515 :
1516 : void
1517 5 : gimple_gen_pow2_profiler (histogram_value value, unsigned tag)
1518 : {
1519 5 : gimple *stmt = value->hvalue.stmt;
1520 5 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1521 5 : tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1522 5 : gcall *call;
1523 5 : tree val;
1524 :
1525 5 : ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1526 : true, NULL_TREE, true, GSI_SAME_STMT);
1527 5 : val = prepare_instrumented_value (&gsi, value);
1528 5 : call = gimple_build_call (tree_pow2_profiler_fn, 2, ref_ptr, val);
1529 5 : gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1530 5 : }
1531 :
1532 : /* Output instructions as GIMPLE trees for code to find the most N common
1533 : values. VALUE is the expression whose value is profiled. TAG is the tag
1534 : of the section for counters. */
1535 :
1536 : void
1537 42 : gimple_gen_topn_values_profiler (histogram_value value, unsigned tag)
1538 : {
1539 42 : gimple *stmt = value->hvalue.stmt;
1540 42 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1541 42 : tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1542 42 : gcall *call;
1543 42 : tree val;
1544 :
1545 42 : ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1546 : true, NULL_TREE, true, GSI_SAME_STMT);
1547 42 : val = prepare_instrumented_value (&gsi, value);
1548 42 : call = gimple_build_call (tree_topn_values_profiler_fn, 2, ref_ptr, val);
1549 42 : gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1550 42 : }
1551 :
1552 :
1553 : /* Output instructions as GIMPLE trees for code to find the most
1554 : common called function in indirect call.
1555 : VALUE is the call expression whose indirect callee is profiled.
1556 : TAG is the tag of the section for counters. */
1557 :
1558 : void
1559 49 : gimple_gen_ic_profiler (histogram_value value, unsigned tag)
1560 : {
1561 49 : tree tmp1;
1562 49 : gassign *stmt1, *stmt2, *stmt3;
1563 49 : gimple *stmt = value->hvalue.stmt;
1564 49 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1565 49 : tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1566 :
1567 49 : ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1568 : true, NULL_TREE, true, GSI_SAME_STMT);
1569 :
1570 : /* Insert code:
1571 :
1572 : stmt1: __gcov_indirect_call.counters = get_relevant_counter_ptr ();
1573 : stmt2: tmp1 = (void *) (indirect call argument value)
1574 : stmt3: __gcov_indirect_call.callee = tmp1;
1575 :
1576 : Example:
1577 : f_1 = foo;
1578 : __gcov_indirect_call.counters = &__gcov4.main[0];
1579 : PROF_fn_9 = f_1;
1580 : __gcov_indirect_call.callee = PROF_fn_9;
1581 : _4 = f_1 ();
1582 : */
1583 :
1584 49 : tree gcov_type_ptr = build_pointer_type (get_gcov_type ());
1585 :
1586 49 : tree counter_ref = build3 (COMPONENT_REF, gcov_type_ptr,
1587 : ic_tuple_var, ic_tuple_counters_field, NULL_TREE);
1588 :
1589 49 : stmt1 = gimple_build_assign (counter_ref, ref_ptr);
1590 49 : tmp1 = make_temp_ssa_name (ptr_type_node, NULL, "PROF_fn");
1591 49 : stmt2 = gimple_build_assign (tmp1, unshare_expr (value->hvalue.value));
1592 49 : tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1593 : ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1594 49 : stmt3 = gimple_build_assign (callee_ref, tmp1);
1595 :
1596 49 : gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1597 49 : gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
1598 49 : gsi_insert_before (&gsi, stmt3, GSI_SAME_STMT);
1599 49 : }
1600 :
1601 :
1602 : /* Output instructions as GIMPLE trees for code to find the most
1603 : common called function in indirect call. Insert instructions at the
1604 : beginning of every possible called function.
1605 : */
1606 :
1607 : void
1608 566 : gimple_gen_ic_func_profiler (void)
1609 : {
1610 566 : struct cgraph_node * c_node = cgraph_node::get (current_function_decl);
1611 566 : gcall *stmt1;
1612 566 : tree tree_uid, cur_func, void0;
1613 :
1614 : /* Disable indirect call profiling for an IFUNC resolver and its
1615 : callees since it requires TLS which hasn't been set up yet when
1616 : the dynamic linker is resolving IFUNC symbols. See
1617 : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114115
1618 : */
1619 566 : if (c_node->only_called_directly_p ()
1620 566 : || c_node->called_by_ifunc_resolver)
1621 36 : return;
1622 :
1623 530 : gimple_init_gcov_profiler ();
1624 :
1625 530 : basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1626 530 : basic_block cond_bb = split_edge (single_succ_edge (entry));
1627 530 : basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1628 :
1629 : /* We need to do an extra split in order to not create an input
1630 : for a possible PHI node. */
1631 530 : split_edge (single_succ_edge (update_bb));
1632 :
1633 530 : edge true_edge = single_succ_edge (cond_bb);
1634 530 : true_edge->flags = EDGE_TRUE_VALUE;
1635 :
1636 530 : profile_probability probability;
1637 530 : if (DECL_VIRTUAL_P (current_function_decl))
1638 11 : probability = profile_probability::very_likely ();
1639 : else
1640 519 : probability = profile_probability::unlikely ();
1641 :
1642 530 : true_edge->probability = probability;
1643 530 : edge e = make_edge (cond_bb, single_succ_edge (update_bb)->dest,
1644 : EDGE_FALSE_VALUE);
1645 530 : e->probability = true_edge->probability.invert ();
1646 :
1647 : /* Insert code:
1648 :
1649 : if (__gcov_indirect_call.callee != NULL)
1650 : __gcov_indirect_call_profiler_v3 (profile_id, ¤t_function_decl);
1651 :
1652 : The function __gcov_indirect_call_profiler_v3 is responsible for
1653 : resetting __gcov_indirect_call.callee to NULL. */
1654 :
1655 530 : gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1656 530 : void0 = build_int_cst (ptr_type_node, 0);
1657 :
1658 530 : tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1659 : ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1660 :
1661 530 : tree ref = force_gimple_operand_gsi (&gsi, callee_ref, true, NULL_TREE,
1662 : true, GSI_SAME_STMT);
1663 :
1664 530 : gcond *cond = gimple_build_cond (NE_EXPR, ref,
1665 : void0, NULL, NULL);
1666 530 : gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1667 :
1668 530 : gsi = gsi_after_labels (update_bb);
1669 :
1670 530 : cur_func = force_gimple_operand_gsi (&gsi,
1671 : build_addr (current_function_decl),
1672 : true, NULL_TREE,
1673 : true, GSI_SAME_STMT);
1674 530 : tree_uid = build_int_cst
1675 530 : (gcov_type_node,
1676 530 : cgraph_node::get (current_function_decl)->profile_id);
1677 530 : stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 2,
1678 : tree_uid, cur_func);
1679 530 : gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1680 : }
1681 :
1682 : /* Output instructions as GIMPLE tree at the beginning for each function.
1683 : TAG is the tag of the section for counters, BASE is offset of the
1684 : counter position and GSI is the iterator we place the counter. */
1685 :
1686 : void
1687 567 : gimple_gen_time_profiler (unsigned tag)
1688 : {
1689 567 : tree type = get_gcov_type ();
1690 567 : basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1691 567 : basic_block cond_bb = split_edge (single_succ_edge (entry));
1692 567 : basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1693 :
1694 : /* We need to do an extra split in order to not create an input
1695 : for a possible PHI node. */
1696 567 : split_edge (single_succ_edge (update_bb));
1697 :
1698 567 : edge true_edge = single_succ_edge (cond_bb);
1699 567 : true_edge->flags = EDGE_TRUE_VALUE;
1700 567 : true_edge->probability = profile_probability::unlikely ();
1701 567 : edge e
1702 567 : = make_edge (cond_bb, single_succ_edge (update_bb)->dest, EDGE_FALSE_VALUE);
1703 567 : e->probability = true_edge->probability.invert ();
1704 :
1705 567 : gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1706 567 : tree original_ref = tree_coverage_counter_ref (tag, 0);
1707 567 : tree ref = force_gimple_operand_gsi (&gsi, original_ref, true, NULL_TREE,
1708 : true, GSI_SAME_STMT);
1709 :
1710 : /* Emit: if (counters[0] != 0). */
1711 567 : gcond *cond = gimple_build_cond (EQ_EXPR, ref, build_int_cst (type, 0),
1712 : NULL, NULL);
1713 567 : gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1714 :
1715 : /* Emit: counters[0] = ++__gcov_time_profiler_counter. */
1716 567 : gsi = gsi_start_bb (update_bb);
1717 567 : gen_counter_update (&gsi, tree_time_profiler_counter, original_ref,
1718 : "PROF_time_profile");
1719 567 : }
1720 :
1721 : /* Output instructions as GIMPLE trees to increment the average histogram
1722 : counter. VALUE is the expression whose value is profiled. TAG is the
1723 : tag of the section for counters, BASE is offset of the counter position. */
1724 :
1725 : void
1726 29 : gimple_gen_average_profiler (histogram_value value, unsigned tag)
1727 : {
1728 29 : gimple *stmt = value->hvalue.stmt;
1729 29 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1730 29 : tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1731 29 : gcall *call;
1732 29 : tree val;
1733 :
1734 29 : ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1735 : true, NULL_TREE,
1736 : true, GSI_SAME_STMT);
1737 29 : val = prepare_instrumented_value (&gsi, value);
1738 29 : call = gimple_build_call (tree_average_profiler_fn, 2, ref_ptr, val);
1739 29 : gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1740 29 : }
1741 :
1742 : /* Output instructions as GIMPLE trees to increment the ior histogram
1743 : counter. VALUE is the expression whose value is profiled. TAG is the
1744 : tag of the section for counters, BASE is offset of the counter position. */
1745 :
1746 : void
1747 29 : gimple_gen_ior_profiler (histogram_value value, unsigned tag)
1748 : {
1749 29 : gimple *stmt = value->hvalue.stmt;
1750 29 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1751 29 : tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1752 29 : gcall *call;
1753 29 : tree val;
1754 :
1755 29 : ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1756 : true, NULL_TREE, true, GSI_SAME_STMT);
1757 29 : val = prepare_instrumented_value (&gsi, value);
1758 29 : call = gimple_build_call (tree_ior_profiler_fn, 2, ref_ptr, val);
1759 29 : gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1760 29 : }
1761 :
1762 : static vec<regex_t> profile_filter_files;
1763 : static vec<regex_t> profile_exclude_files;
1764 :
1765 : /* Parse list of provided REGEX (separated with semi-collon) and
1766 : create expressions (of type regex_t) and save them into V vector.
1767 : If there is a regular expression parsing error, error message is
1768 : printed for FLAG_NAME. */
1769 :
1770 : static void
1771 1210 : parse_profile_filter (const char *regex, vec<regex_t> *v,
1772 : const char *flag_name)
1773 : {
1774 1210 : v->create (4);
1775 1210 : if (regex != NULL)
1776 : {
1777 3 : char *str = xstrdup (regex);
1778 6 : for (char *p = strtok (str, ";"); p != NULL; p = strtok (NULL, ";"))
1779 : {
1780 3 : regex_t r;
1781 3 : if (regcomp (&r, p, REG_EXTENDED | REG_NOSUB) != 0)
1782 : {
1783 0 : error ("invalid regular expression %qs in %qs",
1784 : p, flag_name);
1785 0 : return;
1786 : }
1787 :
1788 3 : v->safe_push (r);
1789 : }
1790 : }
1791 : }
1792 :
1793 : /* Parse values of -fprofile-filter-files and -fprofile-exclude-files
1794 : options. */
1795 :
1796 : static void
1797 605 : parse_profile_file_filtering ()
1798 : {
1799 605 : parse_profile_filter (flag_profile_filter_files, &profile_filter_files,
1800 : "-fprofile-filter-files");
1801 605 : parse_profile_filter (flag_profile_exclude_files, &profile_exclude_files,
1802 : "-fprofile-exclude-files");
1803 605 : }
1804 :
1805 : /* Parse vectors of regular expressions. */
1806 :
1807 : static void
1808 605 : release_profile_file_filtering ()
1809 : {
1810 605 : profile_filter_files.release ();
1811 605 : profile_exclude_files.release ();
1812 605 : }
1813 :
1814 : /* Return true when FILENAME should be instrumented based on
1815 : -fprofile-filter-files and -fprofile-exclude-files options. */
1816 :
1817 : static bool
1818 2559 : include_source_file_for_profile (const char *filename)
1819 : {
1820 : /* First check whether file is included in flag_profile_exclude_files. */
1821 2559 : for (unsigned i = 0; i < profile_exclude_files.length (); i++)
1822 2 : if (regexec (&profile_exclude_files[i],
1823 : filename, 0, NULL, 0) == REG_NOERROR)
1824 : return false;
1825 :
1826 : /* For non-empty flag_profile_filter_files include only files matching a
1827 : regex in the flag. */
1828 5114 : if (profile_filter_files.is_empty ())
1829 : return true;
1830 :
1831 2 : for (unsigned i = 0; i < profile_filter_files.length (); i++)
1832 2 : if (regexec (&profile_filter_files[i], filename, 0, NULL, 0) == REG_NOERROR)
1833 : return true;
1834 :
1835 : return false;
1836 : }
1837 :
1838 : #ifndef HAVE_sync_compare_and_swapsi
1839 : #define HAVE_sync_compare_and_swapsi 0
1840 : #endif
1841 : #ifndef HAVE_atomic_compare_and_swapsi
1842 : #define HAVE_atomic_compare_and_swapsi 0
1843 : #endif
1844 :
1845 : #ifndef HAVE_sync_compare_and_swapdi
1846 : #define HAVE_sync_compare_and_swapdi 0
1847 : #endif
1848 : #ifndef HAVE_atomic_compare_and_swapdi
1849 : #define HAVE_atomic_compare_and_swapdi 0
1850 : #endif
1851 :
1852 : /* Profile all functions in the callgraph. */
1853 :
1854 : static unsigned int
1855 605 : tree_profiling (void)
1856 : {
1857 605 : struct cgraph_node *node;
1858 :
1859 605 : coverage_init_file ();
1860 :
1861 : /* Verify whether we can utilize atomic update operations. */
1862 605 : bool can_support_atomic = targetm.have_libatomic;
1863 605 : unsigned HOST_WIDE_INT gcov_type_size
1864 605 : = tree_to_uhwi (TYPE_SIZE_UNIT (get_gcov_type ()));
1865 605 : bool have_atomic_4
1866 605 : = HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi;
1867 1210 : bool have_atomic_8
1868 605 : = HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi;
1869 605 : bool needs_split = gcov_type_size == 8 && !have_atomic_8 && have_atomic_4;
1870 605 : if (!can_support_atomic)
1871 : {
1872 605 : if (gcov_type_size == 4)
1873 : can_support_atomic = have_atomic_4;
1874 605 : else if (gcov_type_size == 8)
1875 605 : can_support_atomic = have_atomic_8;
1876 : }
1877 :
1878 605 : if (flag_profile_update == PROFILE_UPDATE_ATOMIC
1879 10 : && !can_support_atomic)
1880 : {
1881 0 : if (needs_split)
1882 : {
1883 0 : warning (0, "target does not fully support atomic profile "
1884 : "update, single mode is selected with partial "
1885 : "atomic updates");
1886 0 : counter_update = COUNTER_UPDATE_ATOMIC_PARTIAL;
1887 : }
1888 : else
1889 0 : warning (0, "target does not support atomic profile update, "
1890 : "single mode is selected");
1891 0 : flag_profile_update = PROFILE_UPDATE_SINGLE;
1892 : }
1893 605 : else if (flag_profile_update == PROFILE_UPDATE_PREFER_ATOMIC)
1894 : {
1895 9 : if (can_support_atomic)
1896 9 : flag_profile_update = PROFILE_UPDATE_ATOMIC;
1897 : else
1898 : {
1899 0 : if (needs_split)
1900 0 : counter_update = COUNTER_UPDATE_ATOMIC_PARTIAL;
1901 0 : flag_profile_update = PROFILE_UPDATE_SINGLE;
1902 : }
1903 : }
1904 :
1905 605 : if (flag_profile_update == PROFILE_UPDATE_ATOMIC)
1906 : {
1907 19 : if (needs_split)
1908 0 : counter_update = COUNTER_UPDATE_ATOMIC_SPLIT;
1909 : else
1910 19 : counter_update = COUNTER_UPDATE_ATOMIC_BUILTIN;
1911 : }
1912 :
1913 : /* This is a small-ipa pass that gets called only once, from
1914 : cgraphunit.cc:ipa_passes(). */
1915 605 : gcc_assert (symtab->state == IPA_SSA);
1916 :
1917 605 : init_node_map (true);
1918 605 : parse_profile_file_filtering ();
1919 :
1920 3394 : FOR_EACH_DEFINED_FUNCTION (node)
1921 : {
1922 2789 : bool thunk = false;
1923 2789 : if (!gimple_has_body_p (node->decl) && !node->thunk)
1924 220 : continue;
1925 :
1926 : /* Don't profile functions produced for builtin stuff. */
1927 2569 : if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1928 0 : continue;
1929 :
1930 2569 : if (lookup_attribute ("no_profile_instrument_function",
1931 2569 : DECL_ATTRIBUTES (node->decl)))
1932 3 : continue;
1933 : /* Do not instrument extern inline functions when testing coverage.
1934 : While this is not perfectly consistent (early inlined extern inlines
1935 : will get acocunted), testsuite expects that. */
1936 2566 : if (DECL_EXTERNAL (node->decl)
1937 2566 : && flag_test_coverage)
1938 7 : continue;
1939 :
1940 2559 : const char *file = LOCATION_FILE (DECL_SOURCE_LOCATION (node->decl));
1941 2559 : if (!include_source_file_for_profile (file))
1942 2 : continue;
1943 :
1944 2557 : if (node->thunk)
1945 : {
1946 : /* We cannot expand variadic thunks to Gimple. */
1947 14 : if (stdarg_p (TREE_TYPE (node->decl)))
1948 0 : continue;
1949 14 : thunk = true;
1950 : /* When generate profile, expand thunk to gimple so it can be
1951 : instrumented same way as other functions. */
1952 14 : if (coverage_instrumentation_p ())
1953 7 : expand_thunk (node, false, true);
1954 : /* Read cgraph profile but keep function as thunk at profile-use
1955 : time. */
1956 : else
1957 : {
1958 7 : read_thunk_profile (node);
1959 7 : continue;
1960 : }
1961 : }
1962 :
1963 2550 : push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1964 :
1965 2550 : if (dump_file)
1966 143 : dump_function_header (dump_file, cfun->decl, dump_flags);
1967 :
1968 : /* Local pure-const may imply need to fixup the cfg. */
1969 2550 : if (gimple_has_body_p (node->decl)
1970 2550 : && (execute_fixup_cfg () & TODO_cleanup_cfg))
1971 225 : cleanup_tree_cfg ();
1972 :
1973 2550 : branch_prob (thunk);
1974 :
1975 2550 : if (! flag_branch_probabilities
1976 1991 : && flag_profile_values)
1977 566 : gimple_gen_ic_func_profiler ();
1978 :
1979 2550 : if (flag_branch_probabilities
1980 559 : && !thunk
1981 559 : && flag_profile_values
1982 413 : && flag_value_profile_transformations
1983 413 : && profile_status_for_fn (cfun) == PROFILE_READ)
1984 333 : gimple_value_profile_transformations ();
1985 :
1986 : /* The above could hose dominator info. Currently there is
1987 : none coming in, this is a safety valve. It should be
1988 : easy to adjust it, if and when there is some. */
1989 2550 : free_dominance_info (CDI_DOMINATORS);
1990 2550 : free_dominance_info (CDI_POST_DOMINATORS);
1991 2550 : pop_cfun ();
1992 : }
1993 :
1994 605 : release_profile_file_filtering ();
1995 :
1996 : /* Drop pure/const flags from instrumented functions. */
1997 605 : if (coverage_instrumentation_p () || flag_test_coverage)
1998 2649 : FOR_EACH_DEFINED_FUNCTION (node)
1999 : {
2000 2204 : if (!gimple_has_body_p (node->decl)
2001 2204 : || !(!node->clone_of
2002 0 : || node->decl != node->clone_of->decl))
2003 200 : continue;
2004 :
2005 : /* Don't profile functions produced for builtin stuff. */
2006 2004 : if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
2007 0 : continue;
2008 :
2009 2004 : node->set_const_flag (false, false);
2010 2004 : node->set_pure_flag (false, false);
2011 : }
2012 :
2013 : /* Update call statements and rebuild the cgraph. */
2014 3394 : FOR_EACH_DEFINED_FUNCTION (node)
2015 : {
2016 2789 : basic_block bb;
2017 :
2018 2789 : if (!gimple_has_body_p (node->decl)
2019 2789 : || !(!node->clone_of
2020 0 : || node->decl != node->clone_of->decl))
2021 227 : continue;
2022 :
2023 : /* Don't profile functions produced for builtin stuff. */
2024 2562 : if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
2025 0 : continue;
2026 :
2027 2562 : push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2028 :
2029 2562 : if (coverage_instrumentation_p () || flag_test_coverage)
2030 17606 : FOR_EACH_BB_FN (bb, cfun)
2031 : {
2032 15602 : gimple_stmt_iterator gsi;
2033 86497 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2034 : {
2035 55293 : gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
2036 4719 : if (!call || gimple_call_internal_p (call))
2037 50659 : continue;
2038 :
2039 : /* We do not clear pure/const on decls without body. */
2040 4634 : tree fndecl = gimple_call_fndecl (call);
2041 4634 : cgraph_node *callee;
2042 6637 : if (fndecl
2043 4576 : && (callee = cgraph_node::get (fndecl))
2044 8810 : && callee->get_availability (node) == AVAIL_NOT_AVAILABLE)
2045 2003 : continue;
2046 :
2047 : /* Drop the const attribute from the call type (the pure
2048 : attribute is not available on types). */
2049 2631 : tree fntype = gimple_call_fntype (call);
2050 2631 : if (fntype && TYPE_READONLY (fntype))
2051 : {
2052 1 : int quals = TYPE_QUALS (fntype) & ~TYPE_QUAL_CONST;
2053 1 : fntype = build_qualified_type (fntype, quals);
2054 1 : gimple_call_set_fntype (call, fntype);
2055 : }
2056 :
2057 : /* Update virtual operands of calls to no longer const/pure
2058 : functions. */
2059 2631 : update_stmt (call);
2060 : }
2061 : }
2062 :
2063 : /* re-merge split blocks. */
2064 2562 : cleanup_tree_cfg ();
2065 2562 : update_ssa (TODO_update_ssa);
2066 :
2067 2562 : cgraph_edge::rebuild_edges ();
2068 :
2069 2562 : pop_cfun ();
2070 : }
2071 :
2072 605 : handle_missing_profiles ();
2073 :
2074 605 : del_node_map ();
2075 605 : end_branch_prob ();
2076 605 : coverage_finish_file ();
2077 605 : return 0;
2078 : }
2079 :
2080 : namespace {
2081 :
2082 : const pass_data pass_data_ipa_tree_profile =
2083 : {
2084 : SIMPLE_IPA_PASS, /* type */
2085 : "profile", /* name */
2086 : OPTGROUP_NONE, /* optinfo_flags */
2087 : TV_IPA_PROFILE, /* tv_id */
2088 : 0, /* properties_required */
2089 : 0, /* properties_provided */
2090 : 0, /* properties_destroyed */
2091 : 0, /* todo_flags_start */
2092 : TODO_dump_symtab, /* todo_flags_finish */
2093 : };
2094 :
2095 : class pass_ipa_tree_profile : public simple_ipa_opt_pass
2096 : {
2097 : public:
2098 287872 : pass_ipa_tree_profile (gcc::context *ctxt)
2099 575744 : : simple_ipa_opt_pass (pass_data_ipa_tree_profile, ctxt)
2100 : {}
2101 :
2102 : /* opt_pass methods: */
2103 : bool gate (function *) final override;
2104 605 : unsigned int execute (function *) final override { return tree_profiling (); }
2105 :
2106 : }; // class pass_ipa_tree_profile
2107 :
2108 : bool
2109 232112 : pass_ipa_tree_profile::gate (function *)
2110 : {
2111 : /* When profile instrumentation, use or test coverage shall be performed. */
2112 232112 : return (!in_lto_p
2113 232112 : && (flag_branch_probabilities || flag_test_coverage
2114 231784 : || coverage_instrumentation_p ())
2115 232721 : && !seen_error ());
2116 : }
2117 :
2118 : } // anon namespace
2119 :
2120 : simple_ipa_opt_pass *
2121 287872 : make_pass_ipa_tree_profile (gcc::context *ctxt)
2122 : {
2123 287872 : return new pass_ipa_tree_profile (ctxt);
2124 : }
2125 :
2126 : #include "gt-tree-profile.h"
|