Line data Source code
1 : /* Detect paths through the CFG which can never be executed in a conforming
2 : program and isolate them.
3 :
4 : Copyright (C) 2013-2026 Free Software Foundation, Inc.
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify
9 : it under the terms of the GNU General Public License as published by
10 : the Free Software Foundation; either version 3, or (at your option)
11 : any later version.
12 :
13 : GCC is distributed in the hope that it will be useful,
14 : but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : GNU General Public License for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : #include "config.h"
23 : #include "system.h"
24 : #include "coretypes.h"
25 : #include "backend.h"
26 : #include "tree.h"
27 : #include "gimple.h"
28 : #include "cfghooks.h"
29 : #include "tree-pass.h"
30 : #include "ssa.h"
31 : #include "diagnostic-core.h"
32 : #include "fold-const.h"
33 : #include "gimple-iterator.h"
34 : #include "gimple-walk.h"
35 : #include "tree-ssa.h"
36 : #include "cfgloop.h"
37 : #include "tree-cfg.h"
38 : #include "cfganal.h"
39 : #include "intl.h"
40 :
41 :
42 : static bool cfg_altered;
43 :
44 : /* Callback for walk_stmt_load_store_ops.
45 :
46 : Return TRUE if OP will dereference the tree stored in DATA, FALSE
47 : otherwise.
48 :
49 : This routine only makes a superficial check for a dereference. Thus,
50 : it must only be used if it is safe to return a false negative. */
51 : static bool
52 2911 : check_loadstore (gimple *stmt, tree op, tree, void *data)
53 : {
54 2911 : if ((TREE_CODE (op) == MEM_REF || TREE_CODE (op) == TARGET_MEM_REF)
55 2911 : && operand_equal_p (TREE_OPERAND (op, 0), (tree)data, 0))
56 : {
57 2806 : TREE_THIS_VOLATILE (op) = 1;
58 2806 : TREE_SIDE_EFFECTS (op) = 1;
59 2806 : gimple_set_has_volatile_ops (stmt, true);
60 2806 : return true;
61 : }
62 : return false;
63 : }
64 :
65 : static vec<gimple *> *bb_split_points;
66 :
67 : /* Insert a trap after SI and split the block after the trap. */
68 :
69 : static void
70 2390 : insert_trap (gimple_stmt_iterator *si_p, tree op)
71 : {
72 : /* We want the NULL pointer dereference to actually occur so that
73 : code that wishes to catch the signal can do so.
74 :
75 : If the dereference is a load, then there's nothing to do as the
76 : LHS will be a throw-away SSA_NAME and the RHS is the NULL dereference.
77 :
78 : If the dereference is a store and we can easily transform the RHS,
79 : then simplify the RHS to enable more DCE. Note that we require the
80 : statement to be a GIMPLE_ASSIGN which filters out calls on the RHS. */
81 2390 : gimple *stmt = gsi_stmt (*si_p);
82 2390 : if (walk_stmt_load_store_ops (stmt, (void *)op, NULL, check_loadstore)
83 719 : && is_gimple_assign (stmt)
84 3106 : && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))))
85 : {
86 : /* We just need to turn the RHS into zero converted to the proper
87 : type. */
88 447 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
89 447 : gimple_assign_set_rhs_code (stmt, INTEGER_CST);
90 447 : gimple_assign_set_rhs1 (stmt, fold_convert (type, integer_zero_node));
91 447 : update_stmt (stmt);
92 : }
93 :
94 2390 : gcall *new_stmt
95 2390 : = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
96 2390 : gimple_seq seq = NULL;
97 2390 : gimple_seq_add_stmt (&seq, new_stmt);
98 :
99 : /* If we had a NULL pointer dereference, then we want to insert the
100 : __builtin_trap after the statement, for the other cases we want
101 : to insert before the statement. */
102 2390 : if (walk_stmt_load_store_ops (stmt, (void *)op,
103 : check_loadstore,
104 : check_loadstore))
105 : {
106 2087 : gsi_insert_after (si_p, seq, GSI_NEW_STMT);
107 2087 : if (stmt_ends_bb_p (stmt))
108 : {
109 1 : if (dom_info_available_p (CDI_POST_DOMINATORS))
110 0 : bb_split_points->safe_push (stmt);
111 : else
112 1 : split_block (gimple_bb (stmt), stmt);
113 1 : return;
114 : }
115 : }
116 : else
117 303 : gsi_insert_before (si_p, seq, GSI_NEW_STMT);
118 :
119 2389 : if (dom_info_available_p (CDI_POST_DOMINATORS))
120 1 : bb_split_points->safe_push (new_stmt);
121 : else
122 2388 : split_block (gimple_bb (new_stmt), new_stmt);
123 2389 : *si_p = gsi_for_stmt (stmt);
124 : }
125 :
126 : /* BB when reached via incoming edge E will exhibit undefined behavior
127 : at STMT. Isolate and optimize the path which exhibits undefined
128 : behavior.
129 :
130 : Isolation is simple. Duplicate BB and redirect E to BB'.
131 :
132 : Optimization is simple as well. Replace STMT in BB' with an
133 : unconditional trap and remove all outgoing edges from BB'.
134 :
135 : If RET_ZERO, do not trap, only return NULL.
136 :
137 : DUPLICATE is a pre-existing duplicate, use it as BB' if it exists.
138 :
139 : Return BB' (which may be equal to DUPLICATE). */
140 :
141 : ATTRIBUTE_RETURNS_NONNULL basic_block
142 1404 : isolate_path (basic_block bb, basic_block duplicate,
143 : edge e, gimple *stmt, tree op, bool ret_zero)
144 : {
145 1404 : gimple_stmt_iterator si, si2;
146 1404 : edge_iterator ei;
147 1404 : edge e2;
148 1404 : bool impossible = true;
149 1404 : profile_count count = e->count ();
150 :
151 16259 : for (si = gsi_start_bb (bb); gsi_stmt (si) != stmt; gsi_next (&si))
152 13662 : if (stmt_can_terminate_bb_p (gsi_stmt (si)))
153 : {
154 : impossible = false;
155 : break;
156 : }
157 1404 : force_edge_cold (e, impossible);
158 :
159 : /* First duplicate BB if we have not done so already and remove all
160 : the duplicate's outgoing edges as duplicate is going to unconditionally
161 : trap. Removing the outgoing edges is both an optimization and ensures
162 : we don't need to do any PHI node updates. */
163 1404 : if (!duplicate)
164 : {
165 867 : duplicate = duplicate_block (bb, NULL, NULL);
166 867 : duplicate->count = profile_count::zero ();
167 867 : if (!ret_zero)
168 2104 : for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); )
169 1295 : remove_edge (e2);
170 : }
171 1404 : bb->count -= count;
172 :
173 : /* Complete the isolation step by redirecting E to reach DUPLICATE. */
174 1404 : e2 = redirect_edge_and_branch (e, duplicate);
175 1404 : if (e2)
176 : {
177 1013 : flush_pending_stmts (e2);
178 :
179 : /* Update profile only when redirection is really processed. */
180 1013 : bb->count += e->count ();
181 : }
182 :
183 : /* There may be more than one statement in DUPLICATE which exhibits
184 : undefined behavior. Ultimately we want the first such statement in
185 : DUPLCIATE so that we're able to delete as much code as possible.
186 :
187 : So each time we discover undefined behavior in DUPLICATE, search for
188 : the statement which triggers undefined behavior. If found, then
189 : transform the statement into a trap and delete everything after the
190 : statement. If not found, then this particular instance was subsumed by
191 : an earlier instance of undefined behavior and there's nothing to do.
192 :
193 : This is made more complicated by the fact that we have STMT, which is in
194 : BB rather than in DUPLICATE. So we set up two iterators, one for each
195 : block and walk forward looking for STMT in BB, advancing each iterator at
196 : each step.
197 :
198 : When we find STMT the second iterator should point to STMT's equivalent in
199 : duplicate. If DUPLICATE ends before STMT is found in BB, then there's
200 : nothing to do.
201 :
202 : Ignore labels and debug statements. */
203 1404 : si = gsi_start_nondebug_after_labels_bb (bb);
204 1404 : si2 = gsi_start_nondebug_after_labels_bb (duplicate);
205 4651 : while (!gsi_end_p (si) && !gsi_end_p (si2) && gsi_stmt (si) != stmt)
206 : {
207 3247 : gsi_next_nondebug (&si);
208 3247 : gsi_next_nondebug (&si2);
209 : }
210 :
211 : /* This would be an indicator that we never found STMT in BB, which should
212 : never happen. */
213 1404 : gcc_assert (!gsi_end_p (si));
214 :
215 : /* If we did not run to the end of DUPLICATE, then SI points to STMT and
216 : SI2 points to the duplicate of STMT in DUPLICATE. Insert a trap
217 : before SI2 and remove SI2 and all trailing statements. */
218 1404 : if (!gsi_end_p (si2))
219 : {
220 1323 : if (ret_zero)
221 : {
222 103 : greturn *ret = as_a <greturn *> (gsi_stmt (si2));
223 103 : tree zero = build_zero_cst (TREE_TYPE (gimple_return_retval (ret)));
224 103 : gimple_return_set_retval (ret, zero);
225 103 : update_stmt (ret);
226 : }
227 : else
228 1220 : insert_trap (&si2, op);
229 : }
230 :
231 1404 : return duplicate;
232 : }
233 :
234 : /* Return TRUE if STMT is a div/mod operation using DIVISOR as the divisor.
235 : FALSE otherwise. */
236 :
237 : static bool
238 62956081 : is_divmod_with_given_divisor (gimple *stmt, tree divisor)
239 : {
240 : /* Only assignments matter. */
241 62956081 : if (!is_gimple_assign (stmt))
242 : return false;
243 :
244 : /* Check for every DIV/MOD expression. */
245 14618948 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
246 14618948 : if (rhs_code == TRUNC_DIV_EXPR
247 14618948 : || rhs_code == FLOOR_DIV_EXPR
248 14568977 : || rhs_code == CEIL_DIV_EXPR
249 14568977 : || rhs_code == EXACT_DIV_EXPR
250 : || rhs_code == ROUND_DIV_EXPR
251 14521922 : || rhs_code == TRUNC_MOD_EXPR
252 : || rhs_code == FLOOR_MOD_EXPR
253 14478179 : || rhs_code == CEIL_MOD_EXPR
254 14477730 : || rhs_code == ROUND_MOD_EXPR)
255 : {
256 : /* Pointer equality is fine when DIVISOR is an SSA_NAME, but
257 : not sufficient for constants which may have different types. */
258 141236 : if (operand_equal_p (gimple_assign_rhs2 (stmt), divisor, 0))
259 : return true;
260 : }
261 : return false;
262 : }
263 :
264 : /* NAME is an SSA_NAME that we have already determined has the value 0 or NULL.
265 :
266 : Return TRUE if USE_STMT uses NAME in a way where a 0 or NULL value results
267 : in undefined behavior, FALSE otherwise
268 :
269 : LOC is used for issuing diagnostics. This case represents potential
270 : undefined behavior exposed by path splitting and that's reflected in
271 : the diagnostic. */
272 :
273 : bool
274 740796 : stmt_uses_name_in_undefined_way (gimple *use_stmt, tree name, location_t loc)
275 : {
276 : /* If we are working with a non pointer type, then see
277 : if this use is a DIV/MOD operation using NAME as the
278 : divisor. */
279 740796 : if (!POINTER_TYPE_P (TREE_TYPE (name)))
280 : {
281 620353 : if (!cfun->can_throw_non_call_exceptions)
282 411726 : return is_divmod_with_given_divisor (use_stmt, name);
283 : return false;
284 : }
285 :
286 : /* NAME is a pointer, so see if it's used in a context where it must
287 : be non-NULL. */
288 120443 : bool by_dereference
289 120443 : = infer_nonnull_range_by_dereference (use_stmt, name);
290 :
291 120443 : if (by_dereference
292 120443 : || infer_nonnull_range_by_attribute (use_stmt, name))
293 : {
294 :
295 1272 : if (by_dereference)
296 : {
297 1262 : warning_at (loc, OPT_Wnull_dereference,
298 : "potential null pointer dereference");
299 1262 : if (!flag_isolate_erroneous_paths_dereference)
300 : return false;
301 : }
302 : else
303 : {
304 10 : if (!flag_isolate_erroneous_paths_attribute)
305 : return false;
306 : }
307 1264 : return true;
308 : }
309 : return false;
310 : }
311 :
312 : /* Return TRUE if USE_STMT uses 0 or NULL in a context which results in
313 : undefined behavior, FALSE otherwise.
314 :
315 : These cases are explicit in the IL. */
316 :
317 : bool
318 73458002 : stmt_uses_0_or_null_in_undefined_way (gimple *stmt)
319 : {
320 73458002 : if (!cfun->can_throw_non_call_exceptions
321 73458002 : && is_divmod_with_given_divisor (stmt, integer_zero_node))
322 : return true;
323 :
324 : /* By passing null_pointer_node, we can use the
325 : infer_nonnull_range functions to detect explicit NULL
326 : pointer dereferences and other uses where a non-NULL
327 : value is required. */
328 :
329 73457757 : bool by_dereference
330 73457757 : = infer_nonnull_range_by_dereference (stmt, null_pointer_node);
331 73457757 : if (by_dereference
332 73457757 : || infer_nonnull_range_by_attribute (stmt, null_pointer_node))
333 : {
334 1240 : if (by_dereference)
335 : {
336 924 : location_t loc = gimple_location (stmt);
337 924 : warning_at (loc, OPT_Wnull_dereference,
338 : "null pointer dereference");
339 924 : if (!flag_isolate_erroneous_paths_dereference)
340 : return false;
341 : }
342 : else
343 : {
344 316 : if (!flag_isolate_erroneous_paths_attribute)
345 : return false;
346 : }
347 925 : return true;
348 : }
349 : return false;
350 : }
351 :
352 : /* Describes the property of a return statement that may return
353 : the address of one or more local variables. The type must
354 : be safely assignable and copyable so that it can be stored in
355 : a hash_map. */
356 : class args_loc_t
357 : {
358 : public:
359 :
360 50213 : args_loc_t (): nargs (), locvec (), ptr (&ptr)
361 : {
362 50213 : locvec.create (4);
363 50213 : }
364 :
365 0 : args_loc_t (const args_loc_t &rhs)
366 0 : : nargs (rhs.nargs), locvec (rhs.locvec.copy ()), ptr (&ptr) { }
367 :
368 : args_loc_t& operator= (const args_loc_t &rhs)
369 : {
370 : nargs = rhs.nargs;
371 : locvec.release ();
372 : locvec = rhs.locvec.copy ();
373 : return *this;
374 : }
375 :
376 50213 : ~args_loc_t ()
377 : {
378 50213 : locvec.release ();
379 50213 : gcc_assert (ptr == &ptr);
380 50213 : }
381 :
382 : /* For a PHI in a return statement its number of arguments. When greater
383 : than LOCVEC.LENGTH () implies that an address of one of the locals in
384 : LOCVEC may but need not be returned by the statement. Otherwise,
385 : unless both are zero, it implies it definitely is returned. */
386 : unsigned nargs;
387 : /* The locations of local variables/alloca calls returned by the return
388 : statement. Avoid using auto_vec here since it's not safe to copy due
389 : to pr90904. */
390 : vec <location_t> locvec;
391 : void *ptr;
392 : };
393 :
394 : /* A mapping from a return statement to the locations of local variables
395 : whose addresses it may return. */
396 : typedef hash_map <gimple *, args_loc_t> locmap_t;
397 :
398 : /* Given the LOCMAP mapping, issue diagnostics about returning addresses
399 : of local variables. When MAYBE is set, all diagnostics will be of
400 : the "may return" kind. Otherwise each will be determined based on
401 : the equality of the corresponding NARGS and LOCVEC.LENGTH () values. */
402 :
403 : static void
404 964404 : diag_returned_locals (bool maybe, const locmap_t &locmap)
405 : {
406 965026 : for (locmap_t::iterator it = locmap.begin (); it != locmap.end (); ++it)
407 : {
408 311 : gimple *stmt = (*it).first;
409 311 : const args_loc_t &argsloc = (*it).second;
410 311 : location_t stmtloc = gimple_location (stmt);
411 311 : if (stmtloc == UNKNOWN_LOCATION)
412 : /* When multiple return statements are merged into one it
413 : may not have an associated location. Use the location
414 : of the closing brace instead. */
415 18 : stmtloc = cfun->function_end_locus;
416 :
417 311 : auto_diagnostic_group d;
418 311 : unsigned nargs = argsloc.locvec.length ();
419 622 : if (warning_at (stmtloc, OPT_Wreturn_local_addr,
420 143 : (maybe || argsloc.nargs > nargs
421 : ? G_("function may return address of local variable")
422 : : G_("function returns address of local variable"))))
423 : {
424 562 : for (unsigned i = 0; i != nargs; ++i)
425 314 : inform (argsloc.locvec[i], "declared here");
426 : }
427 311 : }
428 964404 : }
429 :
430 : /* Return true if EXPR is an expression of pointer type that refers
431 : to the address of one or more variables with automatic storage
432 : duration. If so, add an entry to *PLOCMAP and insert into
433 : PLOCMAP->LOCVEC the locations of the corresponding local variables
434 : whose address is returned by the RETURN_STMT (which may be set to
435 : (gimple*)-1 as a placeholder for such a statement). VISITED is
436 : a bitmap of PHI nodes already visited by recursive calls. When
437 : null, PHI expressions are not considered. */
438 :
439 : static bool
440 8601582 : is_addr_local (gimple *return_stmt, tree exp, locmap_t *plocmap,
441 : hash_set<gphi *> *visited)
442 : {
443 8601582 : if (TREE_CODE (exp) == ADDR_EXPR)
444 : {
445 110421 : tree baseaddr = get_base_address (TREE_OPERAND (exp, 0));
446 110421 : if (TREE_CODE (baseaddr) == MEM_REF)
447 23108 : return is_addr_local (return_stmt, TREE_OPERAND (baseaddr, 0),
448 23108 : plocmap, visited);
449 :
450 87313 : if ((!VAR_P (baseaddr)
451 61849 : || is_global_var (baseaddr))
452 111828 : && TREE_CODE (baseaddr) != PARM_DECL)
453 : return false;
454 :
455 37529 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
456 37529 : argsloc.locvec.safe_push (DECL_SOURCE_LOCATION (baseaddr));
457 37529 : return true;
458 : }
459 :
460 8491161 : if (!POINTER_TYPE_P (TREE_TYPE (exp)))
461 : return false;
462 :
463 1543697 : if (TREE_CODE (exp) == SSA_NAME)
464 : {
465 1435890 : gimple *def_stmt = SSA_NAME_DEF_STMT (exp);
466 1435890 : enum gimple_code code = gimple_code (def_stmt);
467 :
468 1435890 : if (is_gimple_assign (def_stmt))
469 : {
470 699995 : tree type = TREE_TYPE (gimple_assign_lhs (def_stmt));
471 699995 : if (POINTER_TYPE_P (type))
472 : {
473 699995 : tree_code code = gimple_assign_rhs_code (def_stmt);
474 699995 : tree ptr1 = NULL_TREE, ptr2 = NULL_TREE;
475 :
476 : /* Set to the number of arguments examined that should
477 : be added to ARGSLOC->NARGS to identify expressions
478 : only some but not all of whose operands refer to local
479 : addresses. */
480 699995 : unsigned nargs = 0;
481 699995 : if (code == COND_EXPR)
482 : {
483 0 : ptr1 = gimple_assign_rhs2 (def_stmt);
484 0 : ptr2 = gimple_assign_rhs3 (def_stmt);
485 0 : nargs = 2;
486 : }
487 : else if (code == MAX_EXPR || code == MIN_EXPR)
488 : {
489 245 : ptr1 = gimple_assign_rhs1 (def_stmt);
490 245 : ptr2 = gimple_assign_rhs2 (def_stmt);
491 245 : nargs = 2;
492 : }
493 : else if (code == ADDR_EXPR
494 : || code == NOP_EXPR
495 : || code == POINTER_PLUS_EXPR)
496 : /* Leave NARGS at zero and let the recursive call set it. */
497 283226 : ptr1 = gimple_assign_rhs1 (def_stmt);
498 :
499 : /* Avoid short-circuiting the logical OR result in case
500 : both operands refer to local variables, in which case
501 : both should be considered and identified in the warning. */
502 983466 : bool res1 = false, res2 = false;
503 283471 : if (ptr1)
504 283471 : res1 = is_addr_local (return_stmt, ptr1, plocmap, visited);
505 699995 : if (ptr2)
506 245 : res2 = is_addr_local (return_stmt, ptr2, plocmap, visited);
507 :
508 699995 : if (nargs)
509 245 : if (args_loc_t *argsloc = plocmap->get (return_stmt))
510 90 : argsloc->nargs += nargs;
511 :
512 699995 : return res1 || res2;
513 : }
514 : return false;
515 : }
516 :
517 735895 : if (code == GIMPLE_CALL
518 735895 : && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL))
519 : {
520 : /* Handle alloca and friends that return pointers to automatic
521 : storage. */
522 15896 : tree fn = gimple_call_fndecl (def_stmt);
523 15896 : int code = DECL_FUNCTION_CODE (fn);
524 15896 : if (code == BUILT_IN_ALLOCA
525 15896 : || code == BUILT_IN_ALLOCA_WITH_ALIGN
526 13336 : || code == BUILT_IN_ALLOCA_WITH_ALIGN_AND_MAX)
527 : {
528 2560 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
529 2560 : argsloc.locvec.safe_push (gimple_location (def_stmt));
530 2560 : return true;
531 : }
532 :
533 13336 : if (gimple_call_num_args (def_stmt) < 1)
534 : return false;
535 :
536 : /* Recursively examine the first argument of calls to built-ins
537 : that return it. */
538 12485 : switch (code)
539 : {
540 1292 : case BUILT_IN_MEMCPY:
541 1292 : case BUILT_IN_MEMCPY_CHK:
542 1292 : case BUILT_IN_MEMPCPY:
543 1292 : case BUILT_IN_MEMPCPY_CHK:
544 1292 : case BUILT_IN_MEMMOVE:
545 1292 : case BUILT_IN_MEMMOVE_CHK:
546 1292 : case BUILT_IN_STPCPY:
547 1292 : case BUILT_IN_STPCPY_CHK:
548 1292 : case BUILT_IN_STPNCPY:
549 1292 : case BUILT_IN_STPNCPY_CHK:
550 1292 : case BUILT_IN_STRCAT:
551 1292 : case BUILT_IN_STRCAT_CHK:
552 1292 : case BUILT_IN_STRCHR:
553 1292 : case BUILT_IN_STRCPY:
554 1292 : case BUILT_IN_STRCPY_CHK:
555 1292 : case BUILT_IN_STRNCAT:
556 1292 : case BUILT_IN_STRNCAT_CHK:
557 1292 : case BUILT_IN_STRNCPY:
558 1292 : case BUILT_IN_STRNCPY_CHK:
559 1292 : case BUILT_IN_STRRCHR:
560 1292 : case BUILT_IN_STRSTR:
561 1292 : return is_addr_local (return_stmt,
562 : gimple_call_arg (def_stmt, 0),
563 1292 : plocmap, visited);
564 : default:
565 : return false;
566 : }
567 : }
568 :
569 719999 : if (code == GIMPLE_PHI && visited)
570 : {
571 39683 : gphi *phi_stmt = as_a <gphi *> (def_stmt);
572 39683 : if (visited->add (phi_stmt))
573 : return false;
574 :
575 25075 : unsigned count = 0;
576 25075 : unsigned nargs = gimple_phi_num_args (phi_stmt);
577 25075 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
578 : /* Bump up the number of operands examined by the number of
579 : operands of this PHI. */
580 25075 : argsloc.nargs += nargs;
581 99786 : for (unsigned i = 0; i < gimple_phi_num_args (phi_stmt); ++i)
582 : {
583 74711 : tree arg = gimple_phi_arg_def (phi_stmt, i);
584 74711 : if (is_addr_local (return_stmt, arg, plocmap, visited))
585 54 : ++count;
586 : }
587 25075 : return count != 0;
588 : }
589 : }
590 :
591 : return false;
592 : }
593 :
594 : /* Detect returning the address of a local variable in a PHI result LHS
595 : and argument ARG and PHI edge E in basic block BB. Add an entry for
596 : each use to LOCMAP, setting its NARGS member to the NARGS argument
597 : (the number of PHI operands) plus the number of arguments in binary
598 : expressions refereced by ARG. Call isolate_path for each returned
599 : address and set *ISOLATED to true if called.
600 : Return either DUPLICATE or the most recent result of isolate_path. */
601 :
602 : static basic_block
603 7680315 : handle_return_addr_local_phi_arg (basic_block bb, basic_block duplicate,
604 : tree lhs, tree arg, edge e, locmap_t &locmap,
605 : unsigned nargs, bool *isolated)
606 : {
607 : /* Use (gimple*)-1 as a temporary placeholder and replace it with
608 : the return statement below once it is known. Using a null doesn't
609 : work because it's used by the hash_map to mean "no-entry." Pass
610 : null instead of a visited_phis bitmap to avoid descending into
611 : PHIs since they are being processed by the caller. Those that
612 : remain will be checked again later. */
613 7680315 : if (!is_addr_local ((gimple*)-1, arg, &locmap, NULL))
614 : {
615 : /* Remove the placeholder regardless of success or failure. */
616 7640523 : locmap.remove ((gimple*)-1);
617 7640523 : return duplicate;
618 : }
619 :
620 39792 : const args_loc_t* const placeargsloc = locmap.get ((gimple*)-1);
621 39792 : const unsigned nlocs = placeargsloc->locvec.length ();
622 39792 : gcc_assert (nlocs);
623 :
624 : /* Add to the number of PHI arguments determined by the caller
625 : the number of operands of the expressions referenced by ARG.
626 : This lets the caller determine whether it's dealing with
627 : a "may return" or "definitely returns." */
628 39792 : nargs += placeargsloc->nargs;
629 :
630 : /* Set to true if any expressions referenced by ARG involve
631 : multiple addresses only some of which are those of locals. */
632 39792 : bool maybe = placeargsloc->nargs > placeargsloc->locvec.length ();
633 :
634 39792 : gimple *use_stmt;
635 39792 : imm_use_iterator iter;
636 :
637 : /* Look for uses of the PHI result LHS in return statements. */
638 197169 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
639 : {
640 157377 : greturn *return_stmt = dyn_cast <greturn *> (use_stmt);
641 157377 : if (!return_stmt)
642 157259 : continue;
643 :
644 118 : if (gimple_return_retval (return_stmt) != lhs)
645 0 : continue;
646 :
647 : /* Add an entry for the return statement and the locations
648 : oof the PHI arguments obtained above to the map. */
649 118 : args_loc_t &argsloc = locmap.get_or_insert (use_stmt);
650 118 : argsloc.nargs = nargs;
651 118 : unsigned nelts = argsloc.locvec.length () + nlocs;
652 118 : argsloc.locvec.reserve (nelts);
653 118 : argsloc.locvec.splice (placeargsloc->locvec);
654 :
655 118 : if (!maybe
656 103 : && (flag_isolate_erroneous_paths_dereference
657 0 : || flag_isolate_erroneous_paths_attribute)
658 103 : && gimple_bb (use_stmt) == bb
659 221 : && (duplicate || can_duplicate_block_p (bb)))
660 : {
661 103 : duplicate = isolate_path (bb, duplicate, e,
662 : use_stmt, lhs, true);
663 :
664 : /* Let caller know the path has been isolated. */
665 103 : *isolated = true;
666 : }
667 39792 : }
668 :
669 39792 : locmap.remove ((gimple*)-1);
670 :
671 39792 : return duplicate;
672 : }
673 :
674 : /* Look for PHI nodes which feed statements in the same block where
675 : the value of the PHI node implies the statement is erroneous.
676 :
677 : For example, a NULL PHI arg value which then feeds a pointer
678 : dereference.
679 :
680 : When found isolate and optimize the path associated with the PHI
681 : argument feeding the erroneous statement. */
682 : static void
683 964159 : find_implicit_erroneous_behavior (void)
684 : {
685 964159 : locmap_t locmap;
686 :
687 964159 : basic_block bb;
688 :
689 10011714 : FOR_EACH_BB_FN (bb, cfun)
690 : {
691 9047555 : gphi_iterator si;
692 :
693 : /* Out of an abundance of caution, do not isolate paths to a
694 : block where the block has any abnormal outgoing edges.
695 :
696 : We might be able to relax this in the future. We have to detect
697 : when we have to split the block with the NULL dereference and
698 : the trap we insert. We have to preserve abnormal edges out
699 : of the isolated block which in turn means updating PHIs at
700 : the targets of those abnormal outgoing edges. */
701 9047555 : if (has_abnormal_or_eh_outgoing_edge_p (bb))
702 948958 : continue;
703 :
704 :
705 : /* If BB has an edge to itself, then duplication of BB below
706 : could result in reallocation of BB's PHI nodes. If that happens
707 : then the loop below over the PHIs would use the old PHI and
708 : thus invalid information. We don't have a good way to know
709 : if a PHI has been reallocated, so just avoid isolation in
710 : this case. */
711 8349542 : if (find_edge (bb, bb))
712 250945 : continue;
713 :
714 : /* First look for a PHI which sets a pointer to NULL and which
715 : is then dereferenced within BB. This is somewhat overly
716 : conservative, but probably catches most of the interesting
717 : cases. */
718 11146041 : for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
719 : {
720 3047444 : gphi *phi = si.phi ();
721 3047444 : tree lhs = gimple_phi_result (phi);
722 :
723 : /* Initial number of PHI arguments. The result may change
724 : from one iteration of the loop below to the next in
725 : response to changes to the CFG but only the initial
726 : value is stored below for use by diagnostics. */
727 3047444 : unsigned nargs = gimple_phi_num_args (phi);
728 :
729 : /* PHI produces a pointer result. See if any of the PHI's
730 : arguments are NULL.
731 :
732 : When we remove an edge, we want to reprocess the current
733 : index since the argument at that index will have been
734 : removed, hence the ugly way we update I for each iteration. */
735 3047444 : basic_block duplicate = NULL;
736 3047444 : for (unsigned i = 0, next_i = 0;
737 10727759 : i < gimple_phi_num_args (phi); i = next_i)
738 : {
739 7680315 : tree arg = gimple_phi_arg_def (phi, i);
740 7680315 : edge e = gimple_phi_arg_edge (phi, i);
741 :
742 : /* Advance the argument index unless a path involving
743 : the current argument has been isolated. */
744 7680315 : next_i = i + 1;
745 7680315 : bool isolated = false;
746 7680315 : duplicate = handle_return_addr_local_phi_arg (bb, duplicate, lhs,
747 : arg, e, locmap,
748 : nargs, &isolated);
749 7680315 : if (isolated)
750 : {
751 103 : cfg_altered = true;
752 103 : next_i = i;
753 : }
754 :
755 7680315 : if (!integer_zerop (arg))
756 7069132 : continue;
757 :
758 611183 : location_t phi_arg_loc = gimple_phi_arg_location (phi, i);
759 :
760 611183 : imm_use_iterator iter;
761 611183 : gimple *use_stmt;
762 :
763 : /* We've got a NULL PHI argument. Now see if the
764 : PHI's result is dereferenced within BB. */
765 611183 : auto_vec <gimple *, 4> uses_in_bb;
766 2157514 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
767 : {
768 : /* We only care about uses in BB. Catching cases in
769 : in other blocks would require more complex path
770 : isolation code. */
771 1546331 : if (gimple_bb (use_stmt) != bb)
772 805535 : continue;
773 :
774 740796 : location_t loc = gimple_location (use_stmt)
775 740796 : ? gimple_location (use_stmt)
776 : : phi_arg_loc;
777 :
778 740796 : if (stmt_uses_name_in_undefined_way (use_stmt, lhs, loc))
779 : {
780 1301 : if (!can_duplicate_block_p (bb))
781 : break;
782 1301 : uses_in_bb.safe_push (use_stmt);
783 : }
784 611183 : }
785 1834850 : for (gimple *use_stmt : uses_in_bb)
786 : {
787 1301 : duplicate = isolate_path (bb, duplicate, e,
788 : use_stmt, lhs, false);
789 :
790 : /* When we remove an incoming edge, we need to
791 : reprocess the Ith element. */
792 1301 : next_i = i;
793 1301 : cfg_altered = true;
794 : }
795 611183 : }
796 : }
797 : }
798 :
799 964159 : diag_returned_locals (false, locmap);
800 964159 : }
801 :
802 : /* Detect and diagnose returning the address of a local variable
803 : in RETURN_STMT in basic block BB. This only becomes undefined
804 : behavior if the result is used, so we do not insert a trap and
805 : only return NULL instead. */
806 :
807 : static void
808 939806 : warn_return_addr_local (basic_block bb, greturn *return_stmt)
809 : {
810 939806 : tree val = gimple_return_retval (return_stmt);
811 939806 : if (!val)
812 939729 : return;
813 :
814 538440 : locmap_t locmap;
815 538440 : hash_set<gphi *> visited_phis;
816 538440 : if (!is_addr_local (return_stmt, val, &locmap, &visited_phis))
817 : return;
818 :
819 : /* We only need it for this particular case. */
820 245 : calculate_dominance_info (CDI_POST_DOMINATORS);
821 :
822 245 : const args_loc_t *argsloc = locmap.get (return_stmt);
823 245 : gcc_assert (argsloc);
824 :
825 245 : bool maybe = argsloc->nargs > argsloc->locvec.length ();
826 245 : if (!maybe)
827 212 : maybe = !dominated_by_p (CDI_POST_DOMINATORS,
828 212 : single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)), bb);
829 :
830 245 : diag_returned_locals (maybe, locmap);
831 :
832 : /* Bail if the statement isn't certain to return the address
833 : of a local (e.g., if it involves a conditional expression
834 : that wasn't trasnformed into a PHI or if it involves
835 : a MAX_EXPR or MIN_EXPR only one of whose operands is a local
836 : (even though such an expression isn't valid in C or has
837 : defined semantics in C++). */
838 245 : if (maybe)
839 : return;
840 :
841 : /* Do not modify code if the user only asked for warnings. */
842 77 : if (flag_isolate_erroneous_paths_dereference
843 0 : || flag_isolate_erroneous_paths_attribute)
844 : {
845 77 : tree zero = build_zero_cst (TREE_TYPE (val));
846 77 : gimple_return_set_retval (return_stmt, zero);
847 77 : update_stmt (return_stmt);
848 : }
849 538440 : }
850 :
851 : /* Look for statements which exhibit erroneous behavior. For example
852 : a NULL pointer dereference.
853 :
854 : When found, optimize the block containing the erroneous behavior. */
855 : static void
856 964159 : find_explicit_erroneous_behavior (void)
857 : {
858 964159 : basic_block bb;
859 964159 : auto_vec<gimple *> local_bb_split_points;
860 964159 : bb_split_points = &local_bb_split_points;
861 :
862 10012635 : FOR_EACH_BB_FN (bb, cfun)
863 : {
864 9048476 : gimple_stmt_iterator si;
865 :
866 : /* Out of an abundance of caution, do not isolate paths to a
867 : block where the block has any abnormal outgoing edges.
868 :
869 : We might be able to relax this in the future. We have to detect
870 : when we have to split the block with the NULL dereference and
871 : the trap we insert. We have to preserve abnormal edges out
872 : of the isolated block which in turn means updating PHIs at
873 : the targets of those abnormal outgoing edges. */
874 9048476 : if (has_abnormal_or_eh_outgoing_edge_p (bb))
875 698013 : continue;
876 :
877 : /* Now look at the statements in the block and see if any of
878 : them explicitly dereference a NULL pointer. This happens
879 : because of jump threading and constant propagation. */
880 90157758 : for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
881 : {
882 73458002 : gimple *stmt = gsi_stmt (si);
883 :
884 73458002 : if (stmt_uses_0_or_null_in_undefined_way (stmt))
885 : {
886 1170 : insert_trap (&si, null_pointer_node);
887 1170 : bb = gimple_bb (gsi_stmt (si));
888 :
889 : /* Ignore any more operands on this statement and
890 : continue the statement iterator (which should
891 : terminate its loop immediately. */
892 1170 : cfg_altered = true;
893 1170 : break;
894 : }
895 :
896 : /* Look for a return statement that returns the address
897 : of a local variable or the result of alloca. */
898 74396638 : if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
899 939806 : warn_return_addr_local (bb, return_stmt);
900 : }
901 : }
902 :
903 964159 : free_dominance_info (CDI_POST_DOMINATORS);
904 :
905 : /* Perform delayed splitting of blocks. */
906 964162 : for (gimple *stmt : local_bb_split_points)
907 1 : split_block (gimple_bb (stmt), stmt);
908 :
909 964159 : bb_split_points = NULL;
910 964159 : }
911 :
912 : /* Search the function for statements which, if executed, would cause
913 : the program to fault such as a dereference of a NULL pointer.
914 :
915 : Such a program can't be valid if such a statement was to execute
916 : according to ISO standards.
917 :
918 : We detect explicit NULL pointer dereferences as well as those implied
919 : by a PHI argument having a NULL value which unconditionally flows into
920 : a dereference in the same block as the PHI.
921 :
922 : In the former case we replace the offending statement with an
923 : unconditional trap and eliminate the outgoing edges from the statement's
924 : basic block. This may expose secondary optimization opportunities.
925 :
926 : In the latter case, we isolate the path(s) with the NULL PHI
927 : feeding the dereference. We can then replace the offending statement
928 : and eliminate the outgoing edges in the duplicate. Again, this may
929 : expose secondary optimization opportunities.
930 :
931 : A warning for both cases may be advisable as well.
932 :
933 : Other statically detectable violations of the ISO standard could be
934 : handled in a similar way, such as out-of-bounds array indexing. */
935 :
936 : static unsigned int
937 964159 : gimple_ssa_isolate_erroneous_paths (void)
938 : {
939 964159 : initialize_original_copy_tables ();
940 :
941 : /* Search all the blocks for edges which, if traversed, will
942 : result in undefined behavior. */
943 964159 : cfg_altered = false;
944 :
945 : /* First handle cases where traversal of a particular edge
946 : triggers undefined behavior. These cases require creating
947 : duplicate blocks and thus new SSA_NAMEs.
948 :
949 : We want that process complete prior to the phase where we start
950 : removing edges from the CFG. Edge removal may ultimately result in
951 : removal of PHI nodes and thus releasing SSA_NAMEs back to the
952 : name manager.
953 :
954 : If the two processes run in parallel we could release an SSA_NAME
955 : back to the manager but we could still have dangling references
956 : to the released SSA_NAME in unreachable blocks.
957 : that any released names not have dangling references in the IL. */
958 964159 : find_implicit_erroneous_behavior ();
959 964159 : find_explicit_erroneous_behavior ();
960 :
961 964159 : free_original_copy_tables ();
962 :
963 : /* We scramble the CFG and loop structures a bit, clean up
964 : appropriately. We really should incrementally update the
965 : loop structures, in theory it shouldn't be that hard. */
966 964159 : if (cfg_altered)
967 : {
968 1293 : free_dominance_info (CDI_DOMINATORS);
969 1293 : loops_state_set (LOOPS_NEED_FIXUP);
970 1293 : return TODO_cleanup_cfg | TODO_update_ssa;
971 : }
972 : return 0;
973 : }
974 :
975 : namespace {
976 : const pass_data pass_data_isolate_erroneous_paths =
977 : {
978 : GIMPLE_PASS, /* type */
979 : "isolate-paths", /* name */
980 : OPTGROUP_NONE, /* optinfo_flags */
981 : TV_ISOLATE_ERRONEOUS_PATHS, /* tv_id */
982 : ( PROP_cfg | PROP_ssa ), /* properties_required */
983 : 0, /* properties_provided */
984 : 0, /* properties_destroyed */
985 : 0, /* todo_flags_start */
986 : 0, /* todo_flags_finish */
987 : };
988 :
989 : class pass_isolate_erroneous_paths : public gimple_opt_pass
990 : {
991 : public:
992 285722 : pass_isolate_erroneous_paths (gcc::context *ctxt)
993 571444 : : gimple_opt_pass (pass_data_isolate_erroneous_paths, ctxt)
994 : {}
995 :
996 : /* opt_pass methods: */
997 0 : opt_pass * clone () final override
998 : {
999 0 : return new pass_isolate_erroneous_paths (m_ctxt);
1000 : }
1001 1041484 : bool gate (function *) final override
1002 : {
1003 : /* If we do not have a suitable builtin function for the trap statement,
1004 : then do not perform the optimization. */
1005 1041484 : return (flag_isolate_erroneous_paths_dereference != 0
1006 77277 : || flag_isolate_erroneous_paths_attribute != 0
1007 1118759 : || warn_null_dereference);
1008 : }
1009 :
1010 964159 : unsigned int execute (function *) final override
1011 : {
1012 964159 : return gimple_ssa_isolate_erroneous_paths ();
1013 : }
1014 :
1015 : }; // class pass_isolate_erroneous_paths
1016 : }
1017 :
1018 : gimple_opt_pass *
1019 285722 : make_pass_isolate_erroneous_paths (gcc::context *ctxt)
1020 : {
1021 285722 : return new pass_isolate_erroneous_paths (ctxt);
1022 : }
|