Line data Source code
1 : /* Detect paths through the CFG which can never be executed in a conforming
2 : program and isolate them.
3 :
4 : Copyright (C) 2013-2026 Free Software Foundation, Inc.
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify
9 : it under the terms of the GNU General Public License as published by
10 : the Free Software Foundation; either version 3, or (at your option)
11 : any later version.
12 :
13 : GCC is distributed in the hope that it will be useful,
14 : but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : GNU General Public License for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : #include "config.h"
23 : #include "system.h"
24 : #include "coretypes.h"
25 : #include "backend.h"
26 : #include "tree.h"
27 : #include "gimple.h"
28 : #include "cfghooks.h"
29 : #include "tree-pass.h"
30 : #include "ssa.h"
31 : #include "diagnostic-core.h"
32 : #include "fold-const.h"
33 : #include "gimple-iterator.h"
34 : #include "gimple-walk.h"
35 : #include "tree-ssa.h"
36 : #include "cfgloop.h"
37 : #include "tree-cfg.h"
38 : #include "cfganal.h"
39 : #include "intl.h"
40 :
41 :
42 : static bool cfg_altered;
43 :
44 : /* Callback for walk_stmt_load_store_ops.
45 :
46 : Return TRUE if OP will dereference the tree stored in DATA, FALSE
47 : otherwise.
48 :
49 : This routine only makes a superficial check for a dereference. Thus,
50 : it must only be used if it is safe to return a false negative. */
51 : static bool
52 2912 : check_loadstore (gimple *stmt, tree op, tree, void *data)
53 : {
54 2912 : if ((TREE_CODE (op) == MEM_REF || TREE_CODE (op) == TARGET_MEM_REF)
55 2912 : && operand_equal_p (TREE_OPERAND (op, 0), (tree)data, 0))
56 : {
57 2807 : TREE_THIS_VOLATILE (op) = 1;
58 2807 : TREE_SIDE_EFFECTS (op) = 1;
59 2807 : gimple_set_has_volatile_ops (stmt, true);
60 2807 : return true;
61 : }
62 : return false;
63 : }
64 :
65 : static vec<gimple *> *bb_split_points;
66 :
67 : /* Insert a trap after SI and split the block after the trap. */
68 :
69 : static void
70 2397 : insert_trap (gimple_stmt_iterator *si_p, tree op)
71 : {
72 : /* We want the NULL pointer dereference to actually occur so that
73 : code that wishes to catch the signal can do so.
74 :
75 : If the dereference is a load, then there's nothing to do as the
76 : LHS will be a throw-away SSA_NAME and the RHS is the NULL dereference.
77 :
78 : If the dereference is a store and we can easily transform the RHS,
79 : then simplify the RHS to enable more DCE. Note that we require the
80 : statement to be a GIMPLE_ASSIGN which filters out calls on the RHS. */
81 2397 : gimple *stmt = gsi_stmt (*si_p);
82 2397 : if (walk_stmt_load_store_ops (stmt, (void *)op, NULL, check_loadstore)
83 717 : && is_gimple_assign (stmt)
84 3111 : && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))))
85 : {
86 : /* We just need to turn the RHS into zero converted to the proper
87 : type. */
88 445 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
89 445 : gimple_assign_set_rhs_code (stmt, INTEGER_CST);
90 445 : gimple_assign_set_rhs1 (stmt, fold_convert (type, integer_zero_node));
91 445 : update_stmt (stmt);
92 : }
93 :
94 2397 : gcall *new_stmt
95 2397 : = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
96 2397 : gimple_seq seq = NULL;
97 2397 : gimple_seq_add_stmt (&seq, new_stmt);
98 :
99 : /* If we had a NULL pointer dereference, then we want to insert the
100 : __builtin_trap after the statement, for the other cases we want
101 : to insert before the statement. */
102 2397 : if (walk_stmt_load_store_ops (stmt, (void *)op,
103 : check_loadstore,
104 : check_loadstore))
105 : {
106 2090 : gsi_insert_after (si_p, seq, GSI_NEW_STMT);
107 2090 : if (stmt_ends_bb_p (stmt))
108 : {
109 1 : if (dom_info_available_p (CDI_POST_DOMINATORS))
110 0 : bb_split_points->safe_push (stmt);
111 : else
112 1 : split_block (gimple_bb (stmt), stmt);
113 1 : return;
114 : }
115 : }
116 : else
117 307 : gsi_insert_before (si_p, seq, GSI_NEW_STMT);
118 :
119 2396 : if (dom_info_available_p (CDI_POST_DOMINATORS))
120 1 : bb_split_points->safe_push (new_stmt);
121 : else
122 2395 : split_block (gimple_bb (new_stmt), new_stmt);
123 2396 : *si_p = gsi_for_stmt (stmt);
124 : }
125 :
126 : /* BB when reached via incoming edge E will exhibit undefined behavior
127 : at STMT. Isolate and optimize the path which exhibits undefined
128 : behavior.
129 :
130 : Isolation is simple. Duplicate BB and redirect E to BB'.
131 :
132 : Optimization is simple as well. Replace STMT in BB' with an
133 : unconditional trap and remove all outgoing edges from BB'.
134 :
135 : If RET_ZERO, do not trap, only return NULL.
136 :
137 : DUPLICATE is a pre-existing duplicate, use it as BB' if it exists.
138 :
139 : Return BB' (which may be equal to DUPLICATE). */
140 :
141 : ATTRIBUTE_RETURNS_NONNULL basic_block
142 1418 : isolate_path (basic_block bb, basic_block duplicate,
143 : edge e, gimple *stmt, tree op, bool ret_zero)
144 : {
145 1418 : gimple_stmt_iterator si, si2;
146 1418 : edge_iterator ei;
147 1418 : edge e2;
148 1418 : bool impossible = true;
149 1418 : profile_count count = e->count ();
150 :
151 16299 : for (si = gsi_start_bb (bb); gsi_stmt (si) != stmt; gsi_next (&si))
152 13680 : if (stmt_can_terminate_bb_p (gsi_stmt (si)))
153 : {
154 : impossible = false;
155 : break;
156 : }
157 1418 : force_edge_cold (e, impossible);
158 :
159 : /* First duplicate BB if we have not done so already and remove all
160 : the duplicate's outgoing edges as duplicate is going to unconditionally
161 : trap. Removing the outgoing edges is both an optimization and ensures
162 : we don't need to do any PHI node updates. */
163 1418 : if (!duplicate)
164 : {
165 880 : duplicate = duplicate_block (bb, NULL, NULL);
166 880 : duplicate->count = profile_count::zero ();
167 880 : if (!ret_zero)
168 2119 : for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); )
169 1303 : remove_edge (e2);
170 : }
171 1418 : bb->count -= count;
172 :
173 : /* Complete the isolation step by redirecting E to reach DUPLICATE. */
174 1418 : e2 = redirect_edge_and_branch (e, duplicate);
175 1418 : if (e2)
176 : {
177 1027 : flush_pending_stmts (e2);
178 :
179 : /* Update profile only when redirection is really processed. */
180 1027 : bb->count += e->count ();
181 : }
182 :
183 : /* There may be more than one statement in DUPLICATE which exhibits
184 : undefined behavior. Ultimately we want the first such statement in
185 : DUPLCIATE so that we're able to delete as much code as possible.
186 :
187 : So each time we discover undefined behavior in DUPLICATE, search for
188 : the statement which triggers undefined behavior. If found, then
189 : transform the statement into a trap and delete everything after the
190 : statement. If not found, then this particular instance was subsumed by
191 : an earlier instance of undefined behavior and there's nothing to do.
192 :
193 : This is made more complicated by the fact that we have STMT, which is in
194 : BB rather than in DUPLICATE. So we set up two iterators, one for each
195 : block and walk forward looking for STMT in BB, advancing each iterator at
196 : each step.
197 :
198 : When we find STMT the second iterator should point to STMT's equivalent in
199 : duplicate. If DUPLICATE ends before STMT is found in BB, then there's
200 : nothing to do.
201 :
202 : Ignore labels and debug statements. */
203 1418 : si = gsi_start_nondebug_after_labels_bb (bb);
204 1418 : si2 = gsi_start_nondebug_after_labels_bb (duplicate);
205 4685 : while (!gsi_end_p (si) && !gsi_end_p (si2) && gsi_stmt (si) != stmt)
206 : {
207 3267 : gsi_next_nondebug (&si);
208 3267 : gsi_next_nondebug (&si2);
209 : }
210 :
211 : /* This would be an indicator that we never found STMT in BB, which should
212 : never happen. */
213 1418 : gcc_assert (!gsi_end_p (si));
214 :
215 : /* If we did not run to the end of DUPLICATE, then SI points to STMT and
216 : SI2 points to the duplicate of STMT in DUPLICATE. Insert a trap
217 : before SI2 and remove SI2 and all trailing statements. */
218 1418 : if (!gsi_end_p (si2))
219 : {
220 1337 : if (ret_zero)
221 : {
222 109 : greturn *ret = as_a <greturn *> (gsi_stmt (si2));
223 109 : tree zero = build_zero_cst (TREE_TYPE (gimple_return_retval (ret)));
224 109 : gimple_return_set_retval (ret, zero);
225 109 : update_stmt (ret);
226 : }
227 : else
228 1228 : insert_trap (&si2, op);
229 : }
230 :
231 1418 : return duplicate;
232 : }
233 :
234 : /* Return TRUE if STMT is a div/mod operation using DIVISOR as the divisor.
235 : FALSE otherwise. */
236 :
237 : static bool
238 63082217 : is_divmod_with_given_divisor (gimple *stmt, tree divisor)
239 : {
240 : /* Only assignments matter. */
241 63082217 : if (!is_gimple_assign (stmt))
242 : return false;
243 :
244 : /* Check for every DIV/MOD expression. */
245 14516957 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
246 14516957 : if (rhs_code == TRUNC_DIV_EXPR
247 14516957 : || rhs_code == FLOOR_DIV_EXPR
248 14468166 : || rhs_code == CEIL_DIV_EXPR
249 14468166 : || rhs_code == EXACT_DIV_EXPR
250 : || rhs_code == ROUND_DIV_EXPR
251 14421694 : || rhs_code == TRUNC_MOD_EXPR
252 : || rhs_code == FLOOR_MOD_EXPR
253 14377894 : || rhs_code == CEIL_MOD_EXPR
254 14377445 : || rhs_code == ROUND_MOD_EXPR)
255 : {
256 : /* Pointer equality is fine when DIVISOR is an SSA_NAME, but
257 : not sufficient for constants which may have different types. */
258 139530 : if (operand_equal_p (gimple_assign_rhs2 (stmt), divisor, 0))
259 : return true;
260 : }
261 : return false;
262 : }
263 :
264 : /* NAME is an SSA_NAME that we have already determined has the value 0 or NULL.
265 :
266 : Return TRUE if USE_STMT uses NAME in a way where a 0 or NULL value results
267 : in undefined behavior, FALSE otherwise
268 :
269 : LOC is used for issuing diagnostics. This case represents potential
270 : undefined behavior exposed by path splitting and that's reflected in
271 : the diagnostic. */
272 :
273 : bool
274 733775 : stmt_uses_name_in_undefined_way (gimple *use_stmt, tree name, location_t loc)
275 : {
276 : /* If we are working with a non pointer type, then see
277 : if this use is a DIV/MOD operation using NAME as the
278 : divisor. */
279 733775 : if (!POINTER_TYPE_P (TREE_TYPE (name)))
280 : {
281 613063 : if (!cfun->can_throw_non_call_exceptions)
282 404429 : return is_divmod_with_given_divisor (use_stmt, name);
283 : return false;
284 : }
285 :
286 : /* NAME is a pointer, so see if it's used in a context where it must
287 : be non-NULL. */
288 120712 : bool by_dereference
289 120712 : = infer_nonnull_range_by_dereference (use_stmt, name);
290 :
291 120712 : if (by_dereference
292 120712 : || infer_nonnull_range_by_attribute (use_stmt, name))
293 : {
294 :
295 1280 : if (by_dereference)
296 : {
297 1270 : warning_at (loc, OPT_Wnull_dereference,
298 : "potential null pointer dereference");
299 1270 : if (!flag_isolate_erroneous_paths_dereference)
300 : return false;
301 : }
302 : else
303 : {
304 10 : if (!flag_isolate_erroneous_paths_attribute)
305 : return false;
306 : }
307 1272 : return true;
308 : }
309 : return false;
310 : }
311 :
312 : /* Return TRUE if USE_STMT uses 0 or NULL in a context which results in
313 : undefined behavior, FALSE otherwise.
314 :
315 : These cases are explicit in the IL. */
316 :
317 : bool
318 73591436 : stmt_uses_0_or_null_in_undefined_way (gimple *stmt)
319 : {
320 73591436 : if (!cfun->can_throw_non_call_exceptions
321 73591436 : && is_divmod_with_given_divisor (stmt, integer_zero_node))
322 : return true;
323 :
324 : /* By passing null_pointer_node, we can use the
325 : infer_nonnull_range functions to detect explicit NULL
326 : pointer dereferences and other uses where a non-NULL
327 : value is required. */
328 :
329 73591187 : bool by_dereference
330 73591187 : = infer_nonnull_range_by_dereference (stmt, null_pointer_node);
331 73591187 : if (by_dereference
332 73591187 : || infer_nonnull_range_by_attribute (stmt, null_pointer_node))
333 : {
334 1235 : if (by_dereference)
335 : {
336 919 : location_t loc = gimple_location (stmt);
337 919 : warning_at (loc, OPT_Wnull_dereference,
338 : "null pointer dereference");
339 919 : if (!flag_isolate_erroneous_paths_dereference)
340 : return false;
341 : }
342 : else
343 : {
344 316 : if (!flag_isolate_erroneous_paths_attribute)
345 : return false;
346 : }
347 920 : return true;
348 : }
349 : return false;
350 : }
351 :
352 : /* Describes the property of a return statement that may return
353 : the address of one or more local variables. The type must
354 : be safely assignable and copyable so that it can be stored in
355 : a hash_map. */
356 : class args_loc_t
357 : {
358 : public:
359 :
360 42984 : args_loc_t (): nargs (), locvec (), ptr (&ptr)
361 : {
362 42984 : locvec.create (4);
363 42984 : }
364 :
365 0 : args_loc_t (const args_loc_t &rhs)
366 0 : : nargs (rhs.nargs), locvec (rhs.locvec.copy ()), ptr (&ptr) { }
367 :
368 : args_loc_t& operator= (const args_loc_t &rhs)
369 : {
370 : nargs = rhs.nargs;
371 : locvec.release ();
372 : locvec = rhs.locvec.copy ();
373 : return *this;
374 : }
375 :
376 42984 : ~args_loc_t ()
377 : {
378 42984 : locvec.release ();
379 42984 : gcc_assert (ptr == &ptr);
380 42984 : }
381 :
382 : /* For a PHI in a return statement its number of arguments. When greater
383 : than LOCVEC.LENGTH () implies that an address of one of the locals in
384 : LOCVEC may but need not be returned by the statement. Otherwise,
385 : unless both are zero, it implies it definitely is returned. */
386 : unsigned nargs;
387 : /* The locations of local variables/alloca calls returned by the return
388 : statement. Avoid using auto_vec here since it's not safe to copy due
389 : to pr90904. */
390 : vec <location_t> locvec;
391 : void *ptr;
392 : };
393 :
394 : /* A mapping from a return statement to the locations of local variables
395 : whose addresses it may return. */
396 : typedef hash_map <gimple *, args_loc_t> locmap_t;
397 :
398 : /* Given the LOCMAP mapping, issue diagnostics about returning addresses
399 : of local variables. When MAYBE is set, all diagnostics will be of
400 : the "may return" kind. Otherwise each will be determined based on
401 : the equality of the corresponding NARGS and LOCVEC.LENGTH () values. */
402 :
403 : static void
404 965914 : diag_returned_locals (bool maybe, const locmap_t &locmap)
405 : {
406 966550 : for (locmap_t::iterator it = locmap.begin (); it != locmap.end (); ++it)
407 : {
408 318 : gimple *stmt = (*it).first;
409 318 : const args_loc_t &argsloc = (*it).second;
410 318 : location_t stmtloc = gimple_location (stmt);
411 318 : if (stmtloc == UNKNOWN_LOCATION)
412 : /* When multiple return statements are merged into one it
413 : may not have an associated location. Use the location
414 : of the closing brace instead. */
415 18 : stmtloc = cfun->function_end_locus;
416 :
417 318 : auto_diagnostic_group d;
418 318 : unsigned nargs = argsloc.locvec.length ();
419 636 : if (warning_at (stmtloc, OPT_Wreturn_local_addr,
420 150 : (maybe || argsloc.nargs > nargs
421 : ? G_("function may return address of local variable")
422 : : G_("function returns address of local variable"))))
423 : {
424 576 : for (unsigned i = 0; i != nargs; ++i)
425 321 : inform (argsloc.locvec[i], "declared here");
426 : }
427 318 : }
428 965914 : }
429 :
430 : /* Return true if EXPR is an expression of pointer type that refers
431 : to the address of one or more variables with automatic storage
432 : duration. If so, add an entry to *PLOCMAP and insert into
433 : PLOCMAP->LOCVEC the locations of the corresponding local variables
434 : whose address is returned by the RETURN_STMT (which may be set to
435 : (gimple*)-1 as a placeholder for such a statement). VISITED is
436 : a bitmap of PHI nodes already visited by recursive calls. When
437 : null, PHI expressions are not considered. */
438 :
439 : static bool
440 8403756 : is_addr_local (gimple *return_stmt, tree exp, locmap_t *plocmap,
441 : hash_set<gphi *> *visited)
442 : {
443 8403756 : if (TREE_CODE (exp) == ADDR_EXPR)
444 : {
445 102070 : tree baseaddr = get_base_address (TREE_OPERAND (exp, 0));
446 102070 : if (TREE_CODE (baseaddr) == MEM_REF)
447 23699 : return is_addr_local (return_stmt, TREE_OPERAND (baseaddr, 0),
448 23699 : plocmap, visited);
449 :
450 78371 : if ((!VAR_P (baseaddr)
451 55379 : || is_global_var (baseaddr))
452 103161 : && TREE_CODE (baseaddr) != PARM_DECL)
453 : return false;
454 :
455 30784 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
456 30784 : argsloc.locvec.safe_push (DECL_SOURCE_LOCATION (baseaddr));
457 30784 : return true;
458 : }
459 :
460 8301686 : if (!POINTER_TYPE_P (TREE_TYPE (exp)))
461 : return false;
462 :
463 1477462 : if (TREE_CODE (exp) == SSA_NAME)
464 : {
465 1369970 : gimple *def_stmt = SSA_NAME_DEF_STMT (exp);
466 1369970 : enum gimple_code code = gimple_code (def_stmt);
467 :
468 1369970 : if (is_gimple_assign (def_stmt))
469 : {
470 668169 : tree type = TREE_TYPE (gimple_assign_lhs (def_stmt));
471 668169 : if (POINTER_TYPE_P (type))
472 : {
473 668169 : tree_code code = gimple_assign_rhs_code (def_stmt);
474 668169 : tree ptr1 = NULL_TREE, ptr2 = NULL_TREE;
475 :
476 : /* Set to the number of arguments examined that should
477 : be added to ARGSLOC->NARGS to identify expressions
478 : only some but not all of whose operands refer to local
479 : addresses. */
480 668169 : unsigned nargs = 0;
481 668169 : if (code == COND_EXPR)
482 : {
483 0 : ptr1 = gimple_assign_rhs2 (def_stmt);
484 0 : ptr2 = gimple_assign_rhs3 (def_stmt);
485 0 : nargs = 2;
486 : }
487 : else if (code == MAX_EXPR || code == MIN_EXPR)
488 : {
489 245 : ptr1 = gimple_assign_rhs1 (def_stmt);
490 245 : ptr2 = gimple_assign_rhs2 (def_stmt);
491 245 : nargs = 2;
492 : }
493 : else if (code == ADDR_EXPR
494 : || code == NOP_EXPR
495 : || code == POINTER_PLUS_EXPR)
496 : /* Leave NARGS at zero and let the recursive call set it. */
497 265637 : ptr1 = gimple_assign_rhs1 (def_stmt);
498 :
499 : /* Avoid short-circuiting the logical OR result in case
500 : both operands refer to local variables, in which case
501 : both should be considered and identified in the warning. */
502 934051 : bool res1 = false, res2 = false;
503 265882 : if (ptr1)
504 265882 : res1 = is_addr_local (return_stmt, ptr1, plocmap, visited);
505 668169 : if (ptr2)
506 245 : res2 = is_addr_local (return_stmt, ptr2, plocmap, visited);
507 :
508 668169 : if (nargs)
509 245 : if (args_loc_t *argsloc = plocmap->get (return_stmt))
510 90 : argsloc->nargs += nargs;
511 :
512 668169 : return res1 || res2;
513 : }
514 : return false;
515 : }
516 :
517 701801 : if (code == GIMPLE_CALL
518 701801 : && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL))
519 : {
520 : /* Handle alloca and friends that return pointers to automatic
521 : storage. */
522 15832 : tree fn = gimple_call_fndecl (def_stmt);
523 15832 : int code = DECL_FUNCTION_CODE (fn);
524 15832 : if (code == BUILT_IN_ALLOCA
525 15832 : || code == BUILT_IN_ALLOCA_WITH_ALIGN
526 13623 : || code == BUILT_IN_ALLOCA_WITH_ALIGN_AND_MAX)
527 : {
528 2209 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
529 2209 : argsloc.locvec.safe_push (gimple_location (def_stmt));
530 2209 : return true;
531 : }
532 :
533 13623 : if (gimple_call_num_args (def_stmt) < 1)
534 : return false;
535 :
536 : /* Recursively examine the first argument of calls to built-ins
537 : that return it. */
538 12772 : switch (code)
539 : {
540 1292 : case BUILT_IN_MEMCPY:
541 1292 : case BUILT_IN_MEMCPY_CHK:
542 1292 : case BUILT_IN_MEMPCPY:
543 1292 : case BUILT_IN_MEMPCPY_CHK:
544 1292 : case BUILT_IN_MEMMOVE:
545 1292 : case BUILT_IN_MEMMOVE_CHK:
546 1292 : case BUILT_IN_STPCPY:
547 1292 : case BUILT_IN_STPCPY_CHK:
548 1292 : case BUILT_IN_STPNCPY:
549 1292 : case BUILT_IN_STPNCPY_CHK:
550 1292 : case BUILT_IN_STRCAT:
551 1292 : case BUILT_IN_STRCAT_CHK:
552 1292 : case BUILT_IN_STRCHR:
553 1292 : case BUILT_IN_STRCPY:
554 1292 : case BUILT_IN_STRCPY_CHK:
555 1292 : case BUILT_IN_STRNCAT:
556 1292 : case BUILT_IN_STRNCAT_CHK:
557 1292 : case BUILT_IN_STRNCPY:
558 1292 : case BUILT_IN_STRNCPY_CHK:
559 1292 : case BUILT_IN_STRRCHR:
560 1292 : case BUILT_IN_STRSTR:
561 1292 : return is_addr_local (return_stmt,
562 : gimple_call_arg (def_stmt, 0),
563 1292 : plocmap, visited);
564 : default:
565 : return false;
566 : }
567 : }
568 :
569 685969 : if (code == GIMPLE_PHI && visited)
570 : {
571 31183 : gphi *phi_stmt = as_a <gphi *> (def_stmt);
572 31183 : if (visited->add (phi_stmt))
573 : return false;
574 :
575 20955 : unsigned count = 0;
576 20955 : unsigned nargs = gimple_phi_num_args (phi_stmt);
577 20955 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
578 : /* Bump up the number of operands examined by the number of
579 : operands of this PHI. */
580 20955 : argsloc.nargs += nargs;
581 84282 : for (unsigned i = 0; i < gimple_phi_num_args (phi_stmt); ++i)
582 : {
583 63327 : tree arg = gimple_phi_arg_def (phi_stmt, i);
584 63327 : if (is_addr_local (return_stmt, arg, plocmap, visited))
585 54 : ++count;
586 : }
587 20955 : return count != 0;
588 : }
589 : }
590 :
591 : return false;
592 : }
593 :
594 : /* Detect returning the address of a local variable in a PHI result LHS
595 : and argument ARG and PHI edge E in basic block BB. Add an entry for
596 : each use to LOCMAP, setting its NARGS member to the NARGS argument
597 : (the number of PHI operands) plus the number of arguments in binary
598 : expressions refereced by ARG. Call isolate_path for each returned
599 : address and set *ISOLATED to true if called.
600 : Return either DUPLICATE or the most recent result of isolate_path. */
601 :
602 : static basic_block
603 7510156 : handle_return_addr_local_phi_arg (basic_block bb, basic_block duplicate,
604 : tree lhs, tree arg, edge e, locmap_t &locmap,
605 : unsigned nargs, bool *isolated)
606 : {
607 : /* Use (gimple*)-1 as a temporary placeholder and replace it with
608 : the return statement below once it is known. Using a null doesn't
609 : work because it's used by the hash_map to mean "no-entry." Pass
610 : null instead of a visited_phis bitmap to avoid descending into
611 : PHIs since they are being processed by the caller. Those that
612 : remain will be checked again later. */
613 7510156 : if (!is_addr_local ((gimple*)-1, arg, &locmap, NULL))
614 : {
615 : /* Remove the placeholder regardless of success or failure. */
616 7477461 : locmap.remove ((gimple*)-1);
617 7477461 : return duplicate;
618 : }
619 :
620 32695 : const args_loc_t* const placeargsloc = locmap.get ((gimple*)-1);
621 32695 : const unsigned nlocs = placeargsloc->locvec.length ();
622 32695 : gcc_assert (nlocs);
623 :
624 : /* Add to the number of PHI arguments determined by the caller
625 : the number of operands of the expressions referenced by ARG.
626 : This lets the caller determine whether it's dealing with
627 : a "may return" or "definitely returns." */
628 32695 : nargs += placeargsloc->nargs;
629 :
630 : /* Set to true if any expressions referenced by ARG involve
631 : multiple addresses only some of which are those of locals. */
632 32695 : bool maybe = placeargsloc->nargs > placeargsloc->locvec.length ();
633 :
634 32695 : gimple *use_stmt;
635 32695 : imm_use_iterator iter;
636 :
637 : /* Look for uses of the PHI result LHS in return statements. */
638 157343 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
639 : {
640 124648 : greturn *return_stmt = dyn_cast <greturn *> (use_stmt);
641 124648 : if (!return_stmt)
642 124518 : continue;
643 :
644 130 : if (gimple_return_retval (return_stmt) != lhs
645 130 : || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), bb))
646 6 : continue;
647 :
648 : /* Add an entry for the return statement and the locations
649 : oof the PHI arguments obtained above to the map. */
650 124 : args_loc_t &argsloc = locmap.get_or_insert (use_stmt);
651 124 : argsloc.nargs = nargs;
652 124 : unsigned nelts = argsloc.locvec.length () + nlocs;
653 124 : argsloc.locvec.reserve (nelts);
654 124 : argsloc.locvec.splice (placeargsloc->locvec);
655 :
656 124 : if (!maybe
657 109 : && (flag_isolate_erroneous_paths_dereference
658 0 : || flag_isolate_erroneous_paths_attribute)
659 109 : && gimple_bb (use_stmt) == bb
660 233 : && (duplicate || can_duplicate_block_p (bb)))
661 : {
662 109 : duplicate = isolate_path (bb, duplicate, e,
663 : use_stmt, lhs, true);
664 :
665 : /* Let caller know the path has been isolated. */
666 109 : *isolated = true;
667 : }
668 32695 : }
669 :
670 32695 : locmap.remove ((gimple*)-1);
671 :
672 32695 : return duplicate;
673 : }
674 :
675 : /* Look for PHI nodes which feed statements in the same block where
676 : the value of the PHI node implies the statement is erroneous.
677 :
678 : For example, a NULL PHI arg value which then feeds a pointer
679 : dereference.
680 :
681 : When found isolate and optimize the path associated with the PHI
682 : argument feeding the erroneous statement. */
683 : static void
684 965668 : find_implicit_erroneous_behavior (void)
685 : {
686 965668 : locmap_t locmap;
687 :
688 965668 : basic_block bb;
689 :
690 9966215 : FOR_EACH_BB_FN (bb, cfun)
691 : {
692 9000547 : gphi_iterator si;
693 :
694 : /* Out of an abundance of caution, do not isolate paths to a
695 : block where the block has any abnormal outgoing edges.
696 :
697 : We might be able to relax this in the future. We have to detect
698 : when we have to split the block with the NULL dereference and
699 : the trap we insert. We have to preserve abnormal edges out
700 : of the isolated block which in turn means updating PHIs at
701 : the targets of those abnormal outgoing edges. */
702 9000547 : if (has_abnormal_or_eh_outgoing_edge_p (bb))
703 953079 : continue;
704 :
705 :
706 : /* If BB has an edge to itself, then duplication of BB below
707 : could result in reallocation of BB's PHI nodes. If that happens
708 : then the loop below over the PHIs would use the old PHI and
709 : thus invalid information. We don't have a good way to know
710 : if a PHI has been reallocated, so just avoid isolation in
711 : this case. */
712 8296628 : if (find_edge (bb, bb))
713 249160 : continue;
714 :
715 : /* First look for a PHI which sets a pointer to NULL and which
716 : is then dereferenced within BB. This is somewhat overly
717 : conservative, but probably catches most of the interesting
718 : cases. */
719 8047468 : basic_block duplicate = NULL;
720 11029197 : for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
721 : {
722 2981729 : gphi *phi = si.phi ();
723 2981729 : tree lhs = gimple_phi_result (phi);
724 :
725 : /* PHI produces a pointer result. See if any of the PHI's
726 : arguments are NULL.
727 :
728 : When we remove an edge, we want to reprocess the current
729 : index since the argument at that index will have been
730 : removed, hence the ugly way we update I for each iteration. */
731 2981729 : for (unsigned i = 0, next_i = 0;
732 10493015 : i < gimple_phi_num_args (phi); i = next_i)
733 : {
734 7511286 : tree arg = gimple_phi_arg_def (phi, i);
735 7511286 : edge e = gimple_phi_arg_edge (phi, i);
736 :
737 : /* Advance the argument index unless a path involving
738 : the current argument has been isolated. */
739 7511286 : next_i = i + 1;
740 :
741 7511286 : if (!integer_zerop (arg))
742 6907783 : continue;
743 :
744 603503 : location_t phi_arg_loc = gimple_phi_arg_location (phi, i);
745 :
746 603503 : imm_use_iterator iter;
747 603503 : gimple *use_stmt;
748 :
749 : /* We've got a NULL PHI argument. Now see if the
750 : PHI's result is dereferenced within BB. */
751 603503 : auto_vec <gimple *, 4> uses_in_bb;
752 2120338 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
753 : {
754 : /* We only care about uses in BB. Catching cases in
755 : in other blocks would require more complex path
756 : isolation code. */
757 1516835 : if (gimple_bb (use_stmt) != bb)
758 783060 : continue;
759 :
760 733775 : location_t loc = gimple_location (use_stmt)
761 733775 : ? gimple_location (use_stmt)
762 : : phi_arg_loc;
763 :
764 733775 : if (stmt_uses_name_in_undefined_way (use_stmt, lhs, loc))
765 : {
766 1309 : if (!can_duplicate_block_p (bb))
767 : break;
768 1309 : uses_in_bb.safe_push (use_stmt);
769 : }
770 603503 : }
771 1811818 : for (gimple *use_stmt : uses_in_bb)
772 : {
773 1309 : duplicate = isolate_path (bb, duplicate, e,
774 : use_stmt, lhs, false);
775 :
776 : /* When we remove an incoming edge, we need to
777 : reprocess the Ith element. */
778 1309 : next_i = i;
779 1309 : cfg_altered = true;
780 : }
781 603503 : }
782 : }
783 :
784 : /* Then look for a PHI which have addresses of locals that
785 : are then returned. */
786 8047468 : duplicate = NULL;
787 11029197 : for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
788 : {
789 2981729 : gphi *phi = si.phi ();
790 2981729 : tree lhs = gimple_phi_result (phi);
791 :
792 : /* Initial number of PHI arguments. The result may change
793 : from one iteration of the loop below to the next in
794 : response to changes to the CFG but only the initial
795 : value is stored below for use by diagnostics. */
796 2981729 : unsigned nargs = gimple_phi_num_args (phi);
797 :
798 : /* PHI produces a pointer result. See if any of the PHI's
799 : arguments are NULL.
800 :
801 : When we remove an edge, we want to reprocess the current
802 : index since the argument at that index will have been
803 : removed, hence the ugly way we update I for each iteration. */
804 2981729 : for (unsigned i = 0, next_i = 0;
805 10491885 : i < gimple_phi_num_args (phi); i = next_i)
806 : {
807 7510156 : tree arg = gimple_phi_arg_def (phi, i);
808 7510156 : edge e = gimple_phi_arg_edge (phi, i);
809 :
810 : /* Advance the argument index unless a path involving
811 : the current argument has been isolated. */
812 7510156 : next_i = i + 1;
813 7510156 : bool isolated = false;
814 7510156 : duplicate = handle_return_addr_local_phi_arg (bb, duplicate, lhs,
815 : arg, e, locmap,
816 : nargs, &isolated);
817 7510156 : if (isolated)
818 : {
819 109 : cfg_altered = true;
820 109 : next_i = i;
821 : }
822 : }
823 : }
824 :
825 : }
826 :
827 965668 : diag_returned_locals (false, locmap);
828 965668 : }
829 :
830 : /* Detect and diagnose returning the address of a local variable
831 : in RETURN_STMT in basic block BB. This only becomes undefined
832 : behavior if the result is used, so we do not insert a trap and
833 : only return NULL instead. */
834 :
835 : static void
836 941364 : warn_return_addr_local (basic_block bb, greturn *return_stmt)
837 : {
838 941364 : tree val = gimple_return_retval (return_stmt);
839 941364 : if (!val)
840 941286 : return;
841 :
842 539155 : locmap_t locmap;
843 539155 : hash_set<gphi *> visited_phis;
844 539155 : if (!is_addr_local (return_stmt, val, &locmap, &visited_phis))
845 : return;
846 :
847 : /* We only need it for this particular case. */
848 246 : calculate_dominance_info (CDI_POST_DOMINATORS);
849 :
850 246 : const args_loc_t *argsloc = locmap.get (return_stmt);
851 246 : gcc_assert (argsloc);
852 :
853 246 : bool maybe = argsloc->nargs > argsloc->locvec.length ();
854 246 : if (!maybe)
855 213 : maybe = !dominated_by_p (CDI_POST_DOMINATORS,
856 213 : single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)), bb);
857 :
858 246 : diag_returned_locals (maybe, locmap);
859 :
860 : /* Bail if the statement isn't certain to return the address
861 : of a local (e.g., if it involves a conditional expression
862 : that wasn't trasnformed into a PHI or if it involves
863 : a MAX_EXPR or MIN_EXPR only one of whose operands is a local
864 : (even though such an expression isn't valid in C or has
865 : defined semantics in C++). */
866 246 : if (maybe)
867 : return;
868 :
869 : /* Do not modify code if the user only asked for warnings. */
870 78 : if (flag_isolate_erroneous_paths_dereference
871 0 : || flag_isolate_erroneous_paths_attribute)
872 : {
873 78 : tree zero = build_zero_cst (TREE_TYPE (val));
874 78 : gimple_return_set_retval (return_stmt, zero);
875 78 : update_stmt (return_stmt);
876 : }
877 539155 : }
878 :
879 : /* Look for statements which exhibit erroneous behavior. For example
880 : a NULL pointer dereference.
881 :
882 : When found, optimize the block containing the erroneous behavior. */
883 : static void
884 965668 : find_explicit_erroneous_behavior (void)
885 : {
886 965668 : basic_block bb;
887 965668 : auto_vec<gimple *> local_bb_split_points;
888 965668 : bb_split_points = &local_bb_split_points;
889 :
890 9967131 : FOR_EACH_BB_FN (bb, cfun)
891 : {
892 9001463 : gimple_stmt_iterator si;
893 :
894 : /* Out of an abundance of caution, do not isolate paths to a
895 : block where the block has any abnormal outgoing edges.
896 :
897 : We might be able to relax this in the future. We have to detect
898 : when we have to split the block with the NULL dereference and
899 : the trap we insert. We have to preserve abnormal edges out
900 : of the isolated block which in turn means updating PHIs at
901 : the targets of those abnormal outgoing edges. */
902 9001463 : if (has_abnormal_or_eh_outgoing_edge_p (bb))
903 703919 : continue;
904 :
905 : /* Now look at the statements in the block and see if any of
906 : them explicitly dereference a NULL pointer. This happens
907 : because of jump threading and constant propagation. */
908 90185355 : for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
909 : {
910 73591436 : gimple *stmt = gsi_stmt (si);
911 :
912 73591436 : if (stmt_uses_0_or_null_in_undefined_way (stmt))
913 : {
914 1169 : insert_trap (&si, null_pointer_node);
915 1169 : bb = gimple_bb (gsi_stmt (si));
916 :
917 : /* Ignore any more operands on this statement and
918 : continue the statement iterator (which should
919 : terminate its loop immediately. */
920 1169 : cfg_altered = true;
921 1169 : break;
922 : }
923 :
924 : /* Look for a return statement that returns the address
925 : of a local variable or the result of alloca. */
926 74531631 : if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
927 941364 : warn_return_addr_local (bb, return_stmt);
928 : }
929 : }
930 :
931 965668 : free_dominance_info (CDI_POST_DOMINATORS);
932 :
933 : /* Perform delayed splitting of blocks. */
934 965671 : for (gimple *stmt : local_bb_split_points)
935 1 : split_block (gimple_bb (stmt), stmt);
936 :
937 965668 : bb_split_points = NULL;
938 965668 : }
939 :
940 : /* Search the function for statements which, if executed, would cause
941 : the program to fault such as a dereference of a NULL pointer.
942 :
943 : Such a program can't be valid if such a statement was to execute
944 : according to ISO standards.
945 :
946 : We detect explicit NULL pointer dereferences as well as those implied
947 : by a PHI argument having a NULL value which unconditionally flows into
948 : a dereference in the same block as the PHI.
949 :
950 : In the former case we replace the offending statement with an
951 : unconditional trap and eliminate the outgoing edges from the statement's
952 : basic block. This may expose secondary optimization opportunities.
953 :
954 : In the latter case, we isolate the path(s) with the NULL PHI
955 : feeding the dereference. We can then replace the offending statement
956 : and eliminate the outgoing edges in the duplicate. Again, this may
957 : expose secondary optimization opportunities.
958 :
959 : A warning for both cases may be advisable as well.
960 :
961 : Other statically detectable violations of the ISO standard could be
962 : handled in a similar way, such as out-of-bounds array indexing. */
963 :
964 : static unsigned int
965 965668 : gimple_ssa_isolate_erroneous_paths (void)
966 : {
967 965668 : initialize_original_copy_tables ();
968 :
969 : /* Search all the blocks for edges which, if traversed, will
970 : result in undefined behavior. */
971 965668 : cfg_altered = false;
972 :
973 : /* First handle cases where traversal of a particular edge
974 : triggers undefined behavior. These cases require creating
975 : duplicate blocks and thus new SSA_NAMEs.
976 :
977 : We want that process complete prior to the phase where we start
978 : removing edges from the CFG. Edge removal may ultimately result in
979 : removal of PHI nodes and thus releasing SSA_NAMEs back to the
980 : name manager.
981 :
982 : If the two processes run in parallel we could release an SSA_NAME
983 : back to the manager but we could still have dangling references
984 : to the released SSA_NAME in unreachable blocks.
985 : that any released names not have dangling references in the IL. */
986 965668 : find_implicit_erroneous_behavior ();
987 965668 : find_explicit_erroneous_behavior ();
988 :
989 965668 : free_original_copy_tables ();
990 :
991 : /* We scramble the CFG and loop structures a bit, clean up
992 : appropriately. We really should incrementally update the
993 : loop structures, in theory it shouldn't be that hard. */
994 965668 : if (cfg_altered)
995 : {
996 1301 : free_dominance_info (CDI_DOMINATORS);
997 1301 : loops_state_set (LOOPS_NEED_FIXUP);
998 1301 : return TODO_cleanup_cfg | TODO_update_ssa;
999 : }
1000 : return 0;
1001 : }
1002 :
1003 : namespace {
1004 : const pass_data pass_data_isolate_erroneous_paths =
1005 : {
1006 : GIMPLE_PASS, /* type */
1007 : "isolate-paths", /* name */
1008 : OPTGROUP_NONE, /* optinfo_flags */
1009 : TV_ISOLATE_ERRONEOUS_PATHS, /* tv_id */
1010 : ( PROP_cfg | PROP_ssa ), /* properties_required */
1011 : 0, /* properties_provided */
1012 : 0, /* properties_destroyed */
1013 : 0, /* todo_flags_start */
1014 : 0, /* todo_flags_finish */
1015 : };
1016 :
1017 : class pass_isolate_erroneous_paths : public gimple_opt_pass
1018 : {
1019 : public:
1020 288775 : pass_isolate_erroneous_paths (gcc::context *ctxt)
1021 577550 : : gimple_opt_pass (pass_data_isolate_erroneous_paths, ctxt)
1022 : {}
1023 :
1024 : /* opt_pass methods: */
1025 0 : opt_pass * clone () final override
1026 : {
1027 0 : return new pass_isolate_erroneous_paths (m_ctxt);
1028 : }
1029 1043362 : bool gate (function *) final override
1030 : {
1031 : /* If we do not have a suitable builtin function for the trap statement,
1032 : then do not perform the optimization. */
1033 1043362 : return (flag_isolate_erroneous_paths_dereference != 0
1034 77646 : || flag_isolate_erroneous_paths_attribute != 0
1035 1121006 : || warn_null_dereference);
1036 : }
1037 :
1038 965668 : unsigned int execute (function *) final override
1039 : {
1040 965668 : return gimple_ssa_isolate_erroneous_paths ();
1041 : }
1042 :
1043 : }; // class pass_isolate_erroneous_paths
1044 : }
1045 :
1046 : gimple_opt_pass *
1047 288775 : make_pass_isolate_erroneous_paths (gcc::context *ctxt)
1048 : {
1049 288775 : return new pass_isolate_erroneous_paths (ctxt);
1050 : }
|