Branch data Line data Source code
1 : : /* Detect paths through the CFG which can never be executed in a conforming
2 : : program and isolate them.
3 : :
4 : : Copyright (C) 2013-2024 Free Software Foundation, Inc.
5 : :
6 : : This file is part of GCC.
7 : :
8 : : GCC is free software; you can redistribute it and/or modify
9 : : it under the terms of the GNU General Public License as published by
10 : : the Free Software Foundation; either version 3, or (at your option)
11 : : any later version.
12 : :
13 : : GCC is distributed in the hope that it will be useful,
14 : : but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : : GNU General Public License for more details.
17 : :
18 : : You should have received a copy of the GNU General Public License
19 : : along with GCC; see the file COPYING3. If not see
20 : : <http://www.gnu.org/licenses/>. */
21 : :
22 : : #include "config.h"
23 : : #include "system.h"
24 : : #include "coretypes.h"
25 : : #include "backend.h"
26 : : #include "tree.h"
27 : : #include "gimple.h"
28 : : #include "cfghooks.h"
29 : : #include "tree-pass.h"
30 : : #include "ssa.h"
31 : : #include "diagnostic-core.h"
32 : : #include "fold-const.h"
33 : : #include "gimple-iterator.h"
34 : : #include "gimple-walk.h"
35 : : #include "tree-ssa.h"
36 : : #include "cfgloop.h"
37 : : #include "tree-cfg.h"
38 : : #include "cfganal.h"
39 : : #include "intl.h"
40 : :
41 : :
42 : : static bool cfg_altered;
43 : :
44 : : /* Callback for walk_stmt_load_store_ops.
45 : :
46 : : Return TRUE if OP will dereference the tree stored in DATA, FALSE
47 : : otherwise.
48 : :
49 : : This routine only makes a superficial check for a dereference. Thus,
50 : : it must only be used if it is safe to return a false negative. */
51 : : static bool
52 : 2570 : check_loadstore (gimple *stmt, tree op, tree, void *data)
53 : : {
54 : 2570 : if ((TREE_CODE (op) == MEM_REF || TREE_CODE (op) == TARGET_MEM_REF)
55 : 2570 : && operand_equal_p (TREE_OPERAND (op, 0), (tree)data, 0))
56 : : {
57 : 2490 : TREE_THIS_VOLATILE (op) = 1;
58 : 2490 : TREE_SIDE_EFFECTS (op) = 1;
59 : 2490 : update_stmt (stmt);
60 : 2490 : return true;
61 : : }
62 : : return false;
63 : : }
64 : :
65 : : static vec<gimple *> *bb_split_points;
66 : :
67 : : /* Insert a trap after SI and split the block after the trap. */
68 : :
69 : : static void
70 : 2174 : insert_trap (gimple_stmt_iterator *si_p, tree op)
71 : : {
72 : : /* We want the NULL pointer dereference to actually occur so that
73 : : code that wishes to catch the signal can do so.
74 : :
75 : : If the dereference is a load, then there's nothing to do as the
76 : : LHS will be a throw-away SSA_NAME and the RHS is the NULL dereference.
77 : :
78 : : If the dereference is a store and we can easily transform the RHS,
79 : : then simplify the RHS to enable more DCE. Note that we require the
80 : : statement to be a GIMPLE_ASSIGN which filters out calls on the RHS. */
81 : 2174 : gimple *stmt = gsi_stmt (*si_p);
82 : 2174 : if (walk_stmt_load_store_ops (stmt, (void *)op, NULL, check_loadstore)
83 : 621 : && is_gimple_assign (stmt)
84 : 2792 : && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))))
85 : : {
86 : : /* We just need to turn the RHS into zero converted to the proper
87 : : type. */
88 : 436 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
89 : 436 : gimple_assign_set_rhs_code (stmt, INTEGER_CST);
90 : 436 : gimple_assign_set_rhs1 (stmt, fold_convert (type, integer_zero_node));
91 : 436 : update_stmt (stmt);
92 : : }
93 : :
94 : 2174 : gcall *new_stmt
95 : 2174 : = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
96 : 2174 : gimple_seq seq = NULL;
97 : 2174 : gimple_seq_add_stmt (&seq, new_stmt);
98 : :
99 : : /* If we had a NULL pointer dereference, then we want to insert the
100 : : __builtin_trap after the statement, for the other cases we want
101 : : to insert before the statement. */
102 : 2174 : if (walk_stmt_load_store_ops (stmt, (void *)op,
103 : : check_loadstore,
104 : : check_loadstore))
105 : : {
106 : 1869 : gsi_insert_after (si_p, seq, GSI_NEW_STMT);
107 : 1869 : if (stmt_ends_bb_p (stmt))
108 : : {
109 : 1 : if (dom_info_available_p (CDI_POST_DOMINATORS))
110 : 0 : bb_split_points->safe_push (stmt);
111 : : else
112 : 1 : split_block (gimple_bb (stmt), stmt);
113 : 1 : return;
114 : : }
115 : : }
116 : : else
117 : 305 : gsi_insert_before (si_p, seq, GSI_NEW_STMT);
118 : :
119 : 2173 : if (dom_info_available_p (CDI_POST_DOMINATORS))
120 : 1 : bb_split_points->safe_push (new_stmt);
121 : : else
122 : 2172 : split_block (gimple_bb (new_stmt), new_stmt);
123 : 2173 : *si_p = gsi_for_stmt (stmt);
124 : : }
125 : :
126 : : /* BB when reached via incoming edge E will exhibit undefined behavior
127 : : at STMT. Isolate and optimize the path which exhibits undefined
128 : : behavior.
129 : :
130 : : Isolation is simple. Duplicate BB and redirect E to BB'.
131 : :
132 : : Optimization is simple as well. Replace STMT in BB' with an
133 : : unconditional trap and remove all outgoing edges from BB'.
134 : :
135 : : If RET_ZERO, do not trap, only return NULL.
136 : :
137 : : DUPLICATE is a pre-existing duplicate, use it as BB' if it exists.
138 : :
139 : : Return BB' (which may be equal to DUPLICATE). */
140 : :
141 : : ATTRIBUTE_RETURNS_NONNULL basic_block
142 : 1280 : isolate_path (basic_block bb, basic_block duplicate,
143 : : edge e, gimple *stmt, tree op, bool ret_zero)
144 : : {
145 : 1280 : gimple_stmt_iterator si, si2;
146 : 1280 : edge_iterator ei;
147 : 1280 : edge e2;
148 : 1280 : bool impossible = true;
149 : 1280 : profile_count count = e->count ();
150 : :
151 : 14913 : for (si = gsi_start_bb (bb); gsi_stmt (si) != stmt; gsi_next (&si))
152 : 12560 : if (stmt_can_terminate_bb_p (gsi_stmt (si)))
153 : : {
154 : : impossible = false;
155 : : break;
156 : : }
157 : 1280 : force_edge_cold (e, impossible);
158 : :
159 : : /* First duplicate BB if we have not done so already and remove all
160 : : the duplicate's outgoing edges as duplicate is going to unconditionally
161 : : trap. Removing the outgoing edges is both an optimization and ensures
162 : : we don't need to do any PHI node updates. */
163 : 1280 : if (!duplicate)
164 : : {
165 : 741 : duplicate = duplicate_block (bb, NULL, NULL);
166 : 741 : duplicate->count = profile_count::zero ();
167 : 741 : if (!ret_zero)
168 : 1746 : for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); )
169 : 1063 : remove_edge (e2);
170 : : }
171 : 1280 : bb->count -= count;
172 : :
173 : : /* Complete the isolation step by redirecting E to reach DUPLICATE. */
174 : 1280 : e2 = redirect_edge_and_branch (e, duplicate);
175 : 1280 : if (e2)
176 : : {
177 : 885 : flush_pending_stmts (e2);
178 : :
179 : : /* Update profile only when redirection is really processed. */
180 : 885 : bb->count += e->count ();
181 : : }
182 : :
183 : : /* There may be more than one statement in DUPLICATE which exhibits
184 : : undefined behavior. Ultimately we want the first such statement in
185 : : DUPLCIATE so that we're able to delete as much code as possible.
186 : :
187 : : So each time we discover undefined behavior in DUPLICATE, search for
188 : : the statement which triggers undefined behavior. If found, then
189 : : transform the statement into a trap and delete everything after the
190 : : statement. If not found, then this particular instance was subsumed by
191 : : an earlier instance of undefined behavior and there's nothing to do.
192 : :
193 : : This is made more complicated by the fact that we have STMT, which is in
194 : : BB rather than in DUPLICATE. So we set up two iterators, one for each
195 : : block and walk forward looking for STMT in BB, advancing each iterator at
196 : : each step.
197 : :
198 : : When we find STMT the second iterator should point to STMT's equivalent in
199 : : duplicate. If DUPLICATE ends before STMT is found in BB, then there's
200 : : nothing to do.
201 : :
202 : : Ignore labels and debug statements. */
203 : 1280 : si = gsi_start_nondebug_after_labels_bb (bb);
204 : 1280 : si2 = gsi_start_nondebug_after_labels_bb (duplicate);
205 : 4472 : while (!gsi_end_p (si) && !gsi_end_p (si2) && gsi_stmt (si) != stmt)
206 : : {
207 : 3192 : gsi_next_nondebug (&si);
208 : 3192 : gsi_next_nondebug (&si2);
209 : : }
210 : :
211 : : /* This would be an indicator that we never found STMT in BB, which should
212 : : never happen. */
213 : 1280 : gcc_assert (!gsi_end_p (si));
214 : :
215 : : /* If we did not run to the end of DUPLICATE, then SI points to STMT and
216 : : SI2 points to the duplicate of STMT in DUPLICATE. Insert a trap
217 : : before SI2 and remove SI2 and all trailing statements. */
218 : 1280 : if (!gsi_end_p (si2))
219 : : {
220 : 1201 : if (ret_zero)
221 : : {
222 : 103 : greturn *ret = as_a <greturn *> (gsi_stmt (si2));
223 : 103 : tree zero = build_zero_cst (TREE_TYPE (gimple_return_retval (ret)));
224 : 103 : gimple_return_set_retval (ret, zero);
225 : 103 : update_stmt (ret);
226 : : }
227 : : else
228 : 1098 : insert_trap (&si2, op);
229 : : }
230 : :
231 : 1280 : return duplicate;
232 : : }
233 : :
234 : : /* Return TRUE if STMT is a div/mod operation using DIVISOR as the divisor.
235 : : FALSE otherwise. */
236 : :
237 : : static bool
238 : 52592139 : is_divmod_with_given_divisor (gimple *stmt, tree divisor)
239 : : {
240 : : /* Only assignments matter. */
241 : 52592139 : if (!is_gimple_assign (stmt))
242 : : return false;
243 : :
244 : : /* Check for every DIV/MOD expression. */
245 : 13760799 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
246 : 13760799 : if (rhs_code == TRUNC_DIV_EXPR
247 : 13760799 : || rhs_code == FLOOR_DIV_EXPR
248 : 13711220 : || rhs_code == CEIL_DIV_EXPR
249 : 13711220 : || rhs_code == EXACT_DIV_EXPR
250 : : || rhs_code == ROUND_DIV_EXPR
251 : 13681604 : || rhs_code == TRUNC_MOD_EXPR
252 : : || rhs_code == FLOOR_MOD_EXPR
253 : 13641563 : || rhs_code == CEIL_MOD_EXPR
254 : 13641250 : || rhs_code == ROUND_MOD_EXPR)
255 : : {
256 : : /* Pointer equality is fine when DIVISOR is an SSA_NAME, but
257 : : not sufficient for constants which may have different types. */
258 : 119549 : if (operand_equal_p (gimple_assign_rhs2 (stmt), divisor, 0))
259 : : return true;
260 : : }
261 : : return false;
262 : : }
263 : :
264 : : /* NAME is an SSA_NAME that we have already determined has the value 0 or NULL.
265 : :
266 : : Return TRUE if USE_STMT uses NAME in a way where a 0 or NULL value results
267 : : in undefined behavior, FALSE otherwise
268 : :
269 : : LOC is used for issuing diagnostics. This case represents potential
270 : : undefined behavior exposed by path splitting and that's reflected in
271 : : the diagnostic. */
272 : :
273 : : bool
274 : 725313 : stmt_uses_name_in_undefined_way (gimple *use_stmt, tree name, location_t loc)
275 : : {
276 : : /* If we are working with a non pointer type, then see
277 : : if this use is a DIV/MOD operation using NAME as the
278 : : divisor. */
279 : 725313 : if (!POINTER_TYPE_P (TREE_TYPE (name)))
280 : : {
281 : 619581 : if (!cfun->can_throw_non_call_exceptions)
282 : 419512 : return is_divmod_with_given_divisor (use_stmt, name);
283 : : return false;
284 : : }
285 : :
286 : : /* NAME is a pointer, so see if it's used in a context where it must
287 : : be non-NULL. */
288 : 105732 : bool by_dereference
289 : 105732 : = infer_nonnull_range_by_dereference (use_stmt, name);
290 : :
291 : 105732 : if (by_dereference
292 : 105732 : || infer_nonnull_range_by_attribute (use_stmt, name))
293 : : {
294 : :
295 : 1190 : if (by_dereference)
296 : : {
297 : 1138 : warning_at (loc, OPT_Wnull_dereference,
298 : : "potential null pointer dereference");
299 : 1138 : if (!flag_isolate_erroneous_paths_dereference)
300 : : return false;
301 : : }
302 : : else
303 : : {
304 : 52 : if (!flag_isolate_erroneous_paths_attribute)
305 : : return false;
306 : : }
307 : 1140 : return true;
308 : : }
309 : : return false;
310 : : }
311 : :
312 : : /* Return TRUE if USE_STMT uses 0 or NULL in a context which results in
313 : : undefined behavior, FALSE otherwise.
314 : :
315 : : These cases are explicit in the IL. */
316 : :
317 : : bool
318 : 63203116 : stmt_uses_0_or_null_in_undefined_way (gimple *stmt)
319 : : {
320 : 63203116 : if (!cfun->can_throw_non_call_exceptions
321 : 63203116 : && is_divmod_with_given_divisor (stmt, integer_zero_node))
322 : : return true;
323 : :
324 : : /* By passing null_pointer_node, we can use the
325 : : infer_nonnull_range functions to detect explicit NULL
326 : : pointer dereferences and other uses where a non-NULL
327 : : value is required. */
328 : :
329 : 63202870 : bool by_dereference
330 : 63202870 : = infer_nonnull_range_by_dereference (stmt, null_pointer_node);
331 : 63202870 : if (by_dereference
332 : 63202870 : || infer_nonnull_range_by_attribute (stmt, null_pointer_node))
333 : : {
334 : 1133 : if (by_dereference)
335 : : {
336 : 830 : location_t loc = gimple_location (stmt);
337 : 830 : warning_at (loc, OPT_Wnull_dereference,
338 : : "null pointer dereference");
339 : 830 : if (!flag_isolate_erroneous_paths_dereference)
340 : : return false;
341 : : }
342 : : else
343 : : {
344 : 303 : if (!flag_isolate_erroneous_paths_attribute)
345 : : return false;
346 : : }
347 : 830 : return true;
348 : : }
349 : : return false;
350 : : }
351 : :
352 : : /* Describes the property of a return statement that may return
353 : : the address of one or more local variables. The type must
354 : : be safely assignable and copyable so that it can be stored in
355 : : a hash_map. */
356 : : class args_loc_t
357 : : {
358 : : public:
359 : :
360 : 42507 : args_loc_t (): nargs (), locvec (), ptr (&ptr)
361 : : {
362 : 42507 : locvec.create (4);
363 : 42507 : }
364 : :
365 : 0 : args_loc_t (const args_loc_t &rhs)
366 : 0 : : nargs (rhs.nargs), locvec (rhs.locvec.copy ()), ptr (&ptr) { }
367 : :
368 : : args_loc_t& operator= (const args_loc_t &rhs)
369 : : {
370 : : nargs = rhs.nargs;
371 : : locvec.release ();
372 : : locvec = rhs.locvec.copy ();
373 : : return *this;
374 : : }
375 : :
376 : 42507 : ~args_loc_t ()
377 : : {
378 : 42507 : locvec.release ();
379 : 42507 : gcc_assert (ptr == &ptr);
380 : 42507 : }
381 : :
382 : : /* For a PHI in a return statement its number of arguments. When greater
383 : : than LOCVEC.LENGTH () implies that an address of one of the locals in
384 : : LOCVEC may but need not be returned by the statement. Otherwise,
385 : : unless both are zero, it implies it definitely is returned. */
386 : : unsigned nargs;
387 : : /* The locations of local variables/alloca calls returned by the return
388 : : statement. Avoid using auto_vec here since it's not safe to copy due
389 : : to pr90904. */
390 : : vec <location_t> locvec;
391 : : void *ptr;
392 : : };
393 : :
394 : : /* A mapping from a return statement to the locations of local variables
395 : : whose addresses it may return. */
396 : : typedef hash_map <gimple *, args_loc_t> locmap_t;
397 : :
398 : : /* Given the LOCMAP mapping, issue diagnostics about returning addresses
399 : : of local variables. When MAYBE is set, all diagnostics will be of
400 : : the "may return" kind. Otherwise each will be determined based on
401 : : the equality of the corresponding NARGS and LOCVEC.LENGTH () values. */
402 : :
403 : : static void
404 : 969375 : diag_returned_locals (bool maybe, const locmap_t &locmap)
405 : : {
406 : 969997 : for (locmap_t::iterator it = locmap.begin (); it != locmap.end (); ++it)
407 : : {
408 : 311 : gimple *stmt = (*it).first;
409 : 311 : const args_loc_t &argsloc = (*it).second;
410 : 311 : location_t stmtloc = gimple_location (stmt);
411 : 311 : if (stmtloc == UNKNOWN_LOCATION)
412 : : /* When multiple return statements are merged into one it
413 : : may not have an associated location. Use the location
414 : : of the closing brace instead. */
415 : 18 : stmtloc = cfun->function_end_locus;
416 : :
417 : 311 : auto_diagnostic_group d;
418 : 311 : unsigned nargs = argsloc.locvec.length ();
419 : 622 : if (warning_at (stmtloc, OPT_Wreturn_local_addr,
420 : 143 : (maybe || argsloc.nargs > nargs
421 : : ? G_("function may return address of local variable")
422 : : : G_("function returns address of local variable"))))
423 : : {
424 : 562 : for (unsigned i = 0; i != nargs; ++i)
425 : 314 : inform (argsloc.locvec[i], "declared here");
426 : : }
427 : 311 : }
428 : 969375 : }
429 : :
430 : : /* Return true if EXPR is an expression of pointer type that refers
431 : : to the address of one or more variables with automatic storage
432 : : duration. If so, add an entry to *PLOCMAP and insert into
433 : : PLOCMAP->LOCVEC the locations of the corresponding local variables
434 : : whose address is returned by the RETURN_STMT (which may be set to
435 : : (gimple*)-1 as a placeholder for such a statement). VISITED is
436 : : a bitmap of PHI nodes already visited by recursive calls. When
437 : : null, PHI expressions are not considered. */
438 : :
439 : : static bool
440 : 8018301 : is_addr_local (gimple *return_stmt, tree exp, locmap_t *plocmap,
441 : : hash_set<gphi *> *visited)
442 : : {
443 : 8018301 : if (TREE_CODE (exp) == ADDR_EXPR)
444 : : {
445 : 94594 : tree baseaddr = get_base_address (TREE_OPERAND (exp, 0));
446 : 94594 : if (TREE_CODE (baseaddr) == MEM_REF)
447 : 17268 : return is_addr_local (return_stmt, TREE_OPERAND (baseaddr, 0),
448 : 17268 : plocmap, visited);
449 : :
450 : 77326 : if ((!VAR_P (baseaddr)
451 : 53159 : || is_global_var (baseaddr))
452 : 99566 : && TREE_CODE (baseaddr) != PARM_DECL)
453 : : return false;
454 : :
455 : 31216 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
456 : 31216 : argsloc.locvec.safe_push (DECL_SOURCE_LOCATION (baseaddr));
457 : 31216 : return true;
458 : : }
459 : :
460 : 7923707 : if (!POINTER_TYPE_P (TREE_TYPE (exp)))
461 : : return false;
462 : :
463 : 1249614 : if (TREE_CODE (exp) == SSA_NAME)
464 : : {
465 : 1155635 : gimple *def_stmt = SSA_NAME_DEF_STMT (exp);
466 : 1155635 : enum gimple_code code = gimple_code (def_stmt);
467 : :
468 : 1155635 : if (is_gimple_assign (def_stmt))
469 : : {
470 : 556454 : tree type = TREE_TYPE (gimple_assign_lhs (def_stmt));
471 : 556454 : if (POINTER_TYPE_P (type))
472 : : {
473 : 556454 : tree_code code = gimple_assign_rhs_code (def_stmt);
474 : 556454 : tree ptr1 = NULL_TREE, ptr2 = NULL_TREE;
475 : :
476 : : /* Set to the number of arguments examined that should
477 : : be added to ARGSLOC->NARGS to identify expressions
478 : : only some but not all of whose operands refer to local
479 : : addresses. */
480 : 556454 : unsigned nargs = 0;
481 : 556454 : if (code == COND_EXPR)
482 : : {
483 : 0 : ptr1 = gimple_assign_rhs2 (def_stmt);
484 : 0 : ptr2 = gimple_assign_rhs3 (def_stmt);
485 : 0 : nargs = 2;
486 : : }
487 : : else if (code == MAX_EXPR || code == MIN_EXPR)
488 : : {
489 : 252 : ptr1 = gimple_assign_rhs1 (def_stmt);
490 : 252 : ptr2 = gimple_assign_rhs2 (def_stmt);
491 : 252 : nargs = 2;
492 : : }
493 : : else if (code == ADDR_EXPR
494 : : || code == NOP_EXPR
495 : : || code == POINTER_PLUS_EXPR)
496 : : /* Leave NARGS at zero and let the recursive call set it. */
497 : 233662 : ptr1 = gimple_assign_rhs1 (def_stmt);
498 : :
499 : : /* Avoid short-circuiting the logical OR result in case
500 : : both operands refer to local variables, in which case
501 : : both should be considered and identified in the warning. */
502 : 790368 : bool res1 = false, res2 = false;
503 : 233914 : if (ptr1)
504 : 233914 : res1 = is_addr_local (return_stmt, ptr1, plocmap, visited);
505 : 556454 : if (ptr2)
506 : 252 : res2 = is_addr_local (return_stmt, ptr2, plocmap, visited);
507 : :
508 : 556454 : if (nargs)
509 : 252 : if (args_loc_t *argsloc = plocmap->get (return_stmt))
510 : 90 : argsloc->nargs += nargs;
511 : :
512 : 556454 : return res1 || res2;
513 : : }
514 : : return false;
515 : : }
516 : :
517 : 599181 : if (code == GIMPLE_CALL
518 : 599181 : && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL))
519 : : {
520 : : /* Handle alloca and friends that return pointers to automatic
521 : : storage. */
522 : 13081 : tree fn = gimple_call_fndecl (def_stmt);
523 : 13081 : int code = DECL_FUNCTION_CODE (fn);
524 : 13081 : if (code == BUILT_IN_ALLOCA
525 : 13081 : || code == BUILT_IN_ALLOCA_WITH_ALIGN
526 : 10611 : || code == BUILT_IN_ALLOCA_WITH_ALIGN_AND_MAX)
527 : : {
528 : 2470 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
529 : 2470 : argsloc.locvec.safe_push (gimple_location (def_stmt));
530 : 2470 : return true;
531 : : }
532 : :
533 : 10611 : if (gimple_call_num_args (def_stmt) < 1)
534 : : return false;
535 : :
536 : : /* Recursively examine the first argument of calls to built-ins
537 : : that return it. */
538 : 9760 : switch (code)
539 : : {
540 : 1284 : case BUILT_IN_MEMCPY:
541 : 1284 : case BUILT_IN_MEMCPY_CHK:
542 : 1284 : case BUILT_IN_MEMPCPY:
543 : 1284 : case BUILT_IN_MEMPCPY_CHK:
544 : 1284 : case BUILT_IN_MEMMOVE:
545 : 1284 : case BUILT_IN_MEMMOVE_CHK:
546 : 1284 : case BUILT_IN_STPCPY:
547 : 1284 : case BUILT_IN_STPCPY_CHK:
548 : 1284 : case BUILT_IN_STPNCPY:
549 : 1284 : case BUILT_IN_STPNCPY_CHK:
550 : 1284 : case BUILT_IN_STRCAT:
551 : 1284 : case BUILT_IN_STRCAT_CHK:
552 : 1284 : case BUILT_IN_STRCHR:
553 : 1284 : case BUILT_IN_STRCPY:
554 : 1284 : case BUILT_IN_STRCPY_CHK:
555 : 1284 : case BUILT_IN_STRNCAT:
556 : 1284 : case BUILT_IN_STRNCAT_CHK:
557 : 1284 : case BUILT_IN_STRNCPY:
558 : 1284 : case BUILT_IN_STRNCPY_CHK:
559 : 1284 : case BUILT_IN_STRRCHR:
560 : 1284 : case BUILT_IN_STRSTR:
561 : 1284 : return is_addr_local (return_stmt,
562 : : gimple_call_arg (def_stmt, 0),
563 : 1284 : plocmap, visited);
564 : : default:
565 : : return false;
566 : : }
567 : : }
568 : :
569 : 586100 : if (code == GIMPLE_PHI && visited)
570 : : {
571 : 33462 : gphi *phi_stmt = as_a <gphi *> (def_stmt);
572 : 33462 : if (visited->add (phi_stmt))
573 : : return false;
574 : :
575 : 20928 : unsigned count = 0;
576 : 20928 : unsigned nargs = gimple_phi_num_args (phi_stmt);
577 : 20928 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
578 : : /* Bump up the number of operands examined by the number of
579 : : operands of this PHI. */
580 : 20928 : argsloc.nargs += nargs;
581 : 83755 : for (unsigned i = 0; i < gimple_phi_num_args (phi_stmt); ++i)
582 : : {
583 : 62827 : tree arg = gimple_phi_arg_def (phi_stmt, i);
584 : 62827 : if (is_addr_local (return_stmt, arg, plocmap, visited))
585 : 54 : ++count;
586 : : }
587 : 20928 : return count != 0;
588 : : }
589 : : }
590 : :
591 : : return false;
592 : : }
593 : :
594 : : /* Detect returning the address of a local variable in a PHI result LHS
595 : : and argument ARG and PHI edge E in basic block BB. Add an entry for
596 : : each use to LOCMAP, setting its NARGS member to the NARGS argument
597 : : (the number of PHI operands) plus the number of arguments in binary
598 : : expressions refereced by ARG. Call isolate_path for each returned
599 : : address and set *ISOLATED to true if called.
600 : : Return either DUPLICATE or the most recent result of isolate_path. */
601 : :
602 : : static basic_block
603 : 7181500 : handle_return_addr_local_phi_arg (basic_block bb, basic_block duplicate,
604 : : tree lhs, tree arg, edge e, locmap_t &locmap,
605 : : unsigned nargs, bool *isolated)
606 : : {
607 : : /* Use (gimple*)-1 as a temporary placeholder and replace it with
608 : : the return statement below once it is known. Using a null doesn't
609 : : work because it's used by the hash_map to mean "no-entry." Pass
610 : : null instead of a visited_phis bitmap to avoid descending into
611 : : PHIs since they are being processed by the caller. Those that
612 : : remain will be checked again later. */
613 : 7181500 : if (!is_addr_local ((gimple*)-1, arg, &locmap, NULL))
614 : : {
615 : : /* Remove the placeholder regardless of success or failure. */
616 : 7148111 : locmap.remove ((gimple*)-1);
617 : 7148111 : return duplicate;
618 : : }
619 : :
620 : 33389 : const args_loc_t* const placeargsloc = locmap.get ((gimple*)-1);
621 : 33389 : const unsigned nlocs = placeargsloc->locvec.length ();
622 : 33389 : gcc_assert (nlocs);
623 : :
624 : : /* Add to the number of PHI arguments determined by the caller
625 : : the number of operands of the expressions referenced by ARG.
626 : : This lets the caller determine whether it's dealing with
627 : : a "may return" or "definitely returns." */
628 : 33389 : nargs += placeargsloc->nargs;
629 : :
630 : : /* Set to true if any expressions referenced by ARG involve
631 : : multiple addresses only some of which are those of locals. */
632 : 33389 : bool maybe = placeargsloc->nargs > placeargsloc->locvec.length ();
633 : :
634 : 33389 : gimple *use_stmt;
635 : 33389 : imm_use_iterator iter;
636 : :
637 : : /* Look for uses of the PHI result LHS in return statements. */
638 : 166064 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
639 : : {
640 : 132675 : greturn *return_stmt = dyn_cast <greturn *> (use_stmt);
641 : 132675 : if (!return_stmt)
642 : 132557 : continue;
643 : :
644 : 118 : if (gimple_return_retval (return_stmt) != lhs)
645 : 0 : continue;
646 : :
647 : : /* Add an entry for the return statement and the locations
648 : : oof the PHI arguments obtained above to the map. */
649 : 118 : args_loc_t &argsloc = locmap.get_or_insert (use_stmt);
650 : 118 : argsloc.nargs = nargs;
651 : 118 : unsigned nelts = argsloc.locvec.length () + nlocs;
652 : 118 : argsloc.locvec.reserve (nelts);
653 : 118 : argsloc.locvec.splice (placeargsloc->locvec);
654 : :
655 : 118 : if (!maybe
656 : 103 : && (flag_isolate_erroneous_paths_dereference
657 : 103 : || flag_isolate_erroneous_paths_attribute)
658 : 103 : && gimple_bb (use_stmt) == bb
659 : 221 : && (duplicate || can_duplicate_block_p (bb)))
660 : : {
661 : 103 : duplicate = isolate_path (bb, duplicate, e,
662 : : use_stmt, lhs, true);
663 : :
664 : : /* Let caller know the path has been isolated. */
665 : 103 : *isolated = true;
666 : : }
667 : 33389 : }
668 : :
669 : 33389 : locmap.remove ((gimple*)-1);
670 : :
671 : 33389 : return duplicate;
672 : : }
673 : :
674 : : /* Look for PHI nodes which feed statements in the same block where
675 : : the value of the PHI node implies the statement is erroneous.
676 : :
677 : : For example, a NULL PHI arg value which then feeds a pointer
678 : : dereference.
679 : :
680 : : When found isolate and optimize the path associated with the PHI
681 : : argument feeding the erroneous statement. */
682 : : static void
683 : 969130 : find_implicit_erroneous_behavior (void)
684 : : {
685 : 969130 : locmap_t locmap;
686 : :
687 : 969130 : basic_block bb;
688 : :
689 : 9864055 : FOR_EACH_BB_FN (bb, cfun)
690 : : {
691 : 8894925 : gphi_iterator si;
692 : :
693 : : /* Out of an abundance of caution, do not isolate paths to a
694 : : block where the block has any abnormal outgoing edges.
695 : :
696 : : We might be able to relax this in the future. We have to detect
697 : : when we have to split the block with the NULL dereference and
698 : : the trap we insert. We have to preserve abnormal edges out
699 : : of the isolated block which in turn means updating PHIs at
700 : : the targets of those abnormal outgoing edges. */
701 : 8894925 : if (has_abnormal_or_eh_outgoing_edge_p (bb))
702 : 984191 : continue;
703 : :
704 : :
705 : : /* If BB has an edge to itself, then duplication of BB below
706 : : could result in reallocation of BB's PHI nodes. If that happens
707 : : then the loop below over the PHIs would use the old PHI and
708 : : thus invalid information. We don't have a good way to know
709 : : if a PHI has been reallocated, so just avoid isolation in
710 : : this case. */
711 : 8139264 : if (find_edge (bb, bb))
712 : 228530 : continue;
713 : :
714 : : /* First look for a PHI which sets a pointer to NULL and which
715 : : is then dereferenced within BB. This is somewhat overly
716 : : conservative, but probably catches most of the interesting
717 : : cases. */
718 : 10768882 : for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
719 : : {
720 : 2858148 : gphi *phi = si.phi ();
721 : 2858148 : tree lhs = gimple_phi_result (phi);
722 : :
723 : : /* Initial number of PHI arguments. The result may change
724 : : from one iteration of the loop below to the next in
725 : : response to changes to the CFG but only the initial
726 : : value is stored below for use by diagnostics. */
727 : 2858148 : unsigned nargs = gimple_phi_num_args (phi);
728 : :
729 : : /* PHI produces a pointer result. See if any of the PHI's
730 : : arguments are NULL.
731 : :
732 : : When we remove an edge, we want to reprocess the current
733 : : index since the argument at that index will have been
734 : : removed, hence the ugly way we update I for each iteration. */
735 : 2858148 : basic_block duplicate = NULL;
736 : 2858148 : for (unsigned i = 0, next_i = 0;
737 : 10039648 : i < gimple_phi_num_args (phi); i = next_i)
738 : : {
739 : 7181500 : tree arg = gimple_phi_arg_def (phi, i);
740 : 7181500 : edge e = gimple_phi_arg_edge (phi, i);
741 : :
742 : : /* Advance the argument index unless a path involving
743 : : the current argument has been isolated. */
744 : 7181500 : next_i = i + 1;
745 : 7181500 : bool isolated = false;
746 : 7181500 : duplicate = handle_return_addr_local_phi_arg (bb, duplicate, lhs,
747 : : arg, e, locmap,
748 : : nargs, &isolated);
749 : 7181500 : if (isolated)
750 : : {
751 : 103 : cfg_altered = true;
752 : 103 : next_i = i;
753 : : }
754 : :
755 : 7181500 : if (!integer_zerop (arg))
756 : 6579562 : continue;
757 : :
758 : 601938 : location_t phi_arg_loc = gimple_phi_arg_location (phi, i);
759 : :
760 : 601938 : imm_use_iterator iter;
761 : 601938 : gimple *use_stmt;
762 : :
763 : : /* We've got a NULL PHI argument. Now see if the
764 : : PHI's result is dereferenced within BB. */
765 : 2106003 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
766 : : {
767 : : /* We only care about uses in BB. Catching cases in
768 : : in other blocks would require more complex path
769 : : isolation code. */
770 : 1504065 : if (gimple_bb (use_stmt) != bb)
771 : 778752 : continue;
772 : :
773 : 725313 : location_t loc = gimple_location (use_stmt)
774 : 725313 : ? gimple_location (use_stmt)
775 : 725313 : : phi_arg_loc;
776 : :
777 : 725313 : if (stmt_uses_name_in_undefined_way (use_stmt, lhs, loc)
778 : 725313 : && (duplicate || can_duplicate_block_p (bb)))
779 : : {
780 : 1177 : duplicate = isolate_path (bb, duplicate, e,
781 : : use_stmt, lhs, false);
782 : :
783 : : /* When we remove an incoming edge, we need to
784 : : reprocess the Ith element. */
785 : 1177 : next_i = i;
786 : 1177 : cfg_altered = true;
787 : : }
788 : 601938 : }
789 : : }
790 : : }
791 : : }
792 : :
793 : 969130 : diag_returned_locals (false, locmap);
794 : 969130 : }
795 : :
796 : : /* Detect and diagnose returning the address of a local variable
797 : : in RETURN_STMT in basic block BB. This only becomes undefined
798 : : behavior if the result is used, so we do not insert a trap and
799 : : only return NULL instead. */
800 : :
801 : : static void
802 : 948075 : warn_return_addr_local (basic_block bb, greturn *return_stmt)
803 : : {
804 : 948075 : tree val = gimple_return_retval (return_stmt);
805 : 948075 : if (!val)
806 : 947998 : return;
807 : :
808 : 521256 : locmap_t locmap;
809 : 521256 : hash_set<gphi *> visited_phis;
810 : 521256 : if (!is_addr_local (return_stmt, val, &locmap, &visited_phis))
811 : : return;
812 : :
813 : : /* We only need it for this particular case. */
814 : 245 : calculate_dominance_info (CDI_POST_DOMINATORS);
815 : :
816 : 245 : const args_loc_t *argsloc = locmap.get (return_stmt);
817 : 245 : gcc_assert (argsloc);
818 : :
819 : 245 : bool maybe = argsloc->nargs > argsloc->locvec.length ();
820 : 245 : if (!maybe)
821 : 212 : maybe = !dominated_by_p (CDI_POST_DOMINATORS,
822 : 212 : single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)), bb);
823 : :
824 : 245 : diag_returned_locals (maybe, locmap);
825 : :
826 : : /* Bail if the statement isn't certain to return the address
827 : : of a local (e.g., if it involves a conditional expression
828 : : that wasn't trasnformed into a PHI or if it involves
829 : : a MAX_EXPR or MIN_EXPR only one of whose operands is a local
830 : : (even though such an expression isn't valid in C or has
831 : : defined semantics in C++). */
832 : 245 : if (maybe)
833 : : return;
834 : :
835 : : /* Do not modify code if the user only asked for warnings. */
836 : 77 : if (flag_isolate_erroneous_paths_dereference
837 : 77 : || flag_isolate_erroneous_paths_attribute)
838 : : {
839 : 77 : tree zero = build_zero_cst (TREE_TYPE (val));
840 : 77 : gimple_return_set_retval (return_stmt, zero);
841 : 77 : update_stmt (return_stmt);
842 : : }
843 : 521256 : }
844 : :
845 : : /* Look for statements which exhibit erroneous behavior. For example
846 : : a NULL pointer dereference.
847 : :
848 : : When found, optimize the block containing the erroneous behavior. */
849 : : static void
850 : 969130 : find_explicit_erroneous_behavior (void)
851 : : {
852 : 969130 : basic_block bb;
853 : 969130 : auto_vec<gimple *> local_bb_split_points;
854 : 969130 : bb_split_points = &local_bb_split_points;
855 : :
856 : 9864881 : FOR_EACH_BB_FN (bb, cfun)
857 : : {
858 : 8895751 : gimple_stmt_iterator si;
859 : :
860 : : /* Out of an abundance of caution, do not isolate paths to a
861 : : block where the block has any abnormal outgoing edges.
862 : :
863 : : We might be able to relax this in the future. We have to detect
864 : : when we have to split the block with the NULL dereference and
865 : : the trap we insert. We have to preserve abnormal edges out
866 : : of the isolated block which in turn means updating PHIs at
867 : : the targets of those abnormal outgoing edges. */
868 : 8895751 : if (has_abnormal_or_eh_outgoing_edge_p (bb))
869 : 755661 : continue;
870 : :
871 : : /* Now look at the statements in the block and see if any of
872 : : them explicitly dereference a NULL pointer. This happens
873 : : because of jump threading and constant propagation. */
874 : 79482220 : for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
875 : : {
876 : 63203116 : gimple *stmt = gsi_stmt (si);
877 : :
878 : 63203116 : if (stmt_uses_0_or_null_in_undefined_way (stmt))
879 : : {
880 : 1076 : insert_trap (&si, null_pointer_node);
881 : 1076 : bb = gimple_bb (gsi_stmt (si));
882 : :
883 : : /* Ignore any more operands on this statement and
884 : : continue the statement iterator (which should
885 : : terminate its loop immediately. */
886 : 1076 : cfg_altered = true;
887 : 1076 : break;
888 : : }
889 : :
890 : : /* Look for a return statement that returns the address
891 : : of a local variable or the result of alloca. */
892 : 64150115 : if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
893 : 948075 : warn_return_addr_local (bb, return_stmt);
894 : : }
895 : : }
896 : :
897 : 969130 : free_dominance_info (CDI_POST_DOMINATORS);
898 : :
899 : : /* Perform delayed splitting of blocks. */
900 : 969133 : for (gimple *stmt : local_bb_split_points)
901 : 1 : split_block (gimple_bb (stmt), stmt);
902 : :
903 : 969130 : bb_split_points = NULL;
904 : 969130 : }
905 : :
906 : : /* Search the function for statements which, if executed, would cause
907 : : the program to fault such as a dereference of a NULL pointer.
908 : :
909 : : Such a program can't be valid if such a statement was to execute
910 : : according to ISO standards.
911 : :
912 : : We detect explicit NULL pointer dereferences as well as those implied
913 : : by a PHI argument having a NULL value which unconditionally flows into
914 : : a dereference in the same block as the PHI.
915 : :
916 : : In the former case we replace the offending statement with an
917 : : unconditional trap and eliminate the outgoing edges from the statement's
918 : : basic block. This may expose secondary optimization opportunities.
919 : :
920 : : In the latter case, we isolate the path(s) with the NULL PHI
921 : : feeding the dereference. We can then replace the offending statement
922 : : and eliminate the outgoing edges in the duplicate. Again, this may
923 : : expose secondary optimization opportunities.
924 : :
925 : : A warning for both cases may be advisable as well.
926 : :
927 : : Other statically detectable violations of the ISO standard could be
928 : : handled in a similar way, such as out-of-bounds array indexing. */
929 : :
930 : : static unsigned int
931 : 969130 : gimple_ssa_isolate_erroneous_paths (void)
932 : : {
933 : 969130 : initialize_original_copy_tables ();
934 : :
935 : : /* Search all the blocks for edges which, if traversed, will
936 : : result in undefined behavior. */
937 : 969130 : cfg_altered = false;
938 : :
939 : : /* First handle cases where traversal of a particular edge
940 : : triggers undefined behavior. These cases require creating
941 : : duplicate blocks and thus new SSA_NAMEs.
942 : :
943 : : We want that process complete prior to the phase where we start
944 : : removing edges from the CFG. Edge removal may ultimately result in
945 : : removal of PHI nodes and thus releasing SSA_NAMEs back to the
946 : : name manager.
947 : :
948 : : If the two processes run in parallel we could release an SSA_NAME
949 : : back to the manager but we could still have dangling references
950 : : to the released SSA_NAME in unreachable blocks.
951 : : that any released names not have dangling references in the IL. */
952 : 969130 : find_implicit_erroneous_behavior ();
953 : 969130 : find_explicit_erroneous_behavior ();
954 : :
955 : 969130 : free_original_copy_tables ();
956 : :
957 : : /* We scramble the CFG and loop structures a bit, clean up
958 : : appropriately. We really should incrementally update the
959 : : loop structures, in theory it shouldn't be that hard. */
960 : 969130 : if (cfg_altered)
961 : : {
962 : 1199 : free_dominance_info (CDI_DOMINATORS);
963 : 1199 : loops_state_set (LOOPS_NEED_FIXUP);
964 : 1199 : return TODO_cleanup_cfg | TODO_update_ssa;
965 : : }
966 : : return 0;
967 : : }
968 : :
969 : : namespace {
970 : : const pass_data pass_data_isolate_erroneous_paths =
971 : : {
972 : : GIMPLE_PASS, /* type */
973 : : "isolate-paths", /* name */
974 : : OPTGROUP_NONE, /* optinfo_flags */
975 : : TV_ISOLATE_ERRONEOUS_PATHS, /* tv_id */
976 : : ( PROP_cfg | PROP_ssa ), /* properties_required */
977 : : 0, /* properties_provided */
978 : : 0, /* properties_destroyed */
979 : : 0, /* todo_flags_start */
980 : : 0, /* todo_flags_finish */
981 : : };
982 : :
983 : : class pass_isolate_erroneous_paths : public gimple_opt_pass
984 : : {
985 : : public:
986 : 281608 : pass_isolate_erroneous_paths (gcc::context *ctxt)
987 : 563216 : : gimple_opt_pass (pass_data_isolate_erroneous_paths, ctxt)
988 : : {}
989 : :
990 : : /* opt_pass methods: */
991 : 0 : opt_pass * clone () final override
992 : : {
993 : 0 : return new pass_isolate_erroneous_paths (m_ctxt);
994 : : }
995 : 1048998 : bool gate (function *) final override
996 : : {
997 : : /* If we do not have a suitable builtin function for the trap statement,
998 : : then do not perform the optimization. */
999 : 1048998 : return (flag_isolate_erroneous_paths_dereference != 0
1000 : 1048998 : || flag_isolate_erroneous_paths_attribute != 0
1001 : 1048998 : || warn_null_dereference);
1002 : : }
1003 : :
1004 : 969130 : unsigned int execute (function *) final override
1005 : : {
1006 : 969130 : return gimple_ssa_isolate_erroneous_paths ();
1007 : : }
1008 : :
1009 : : }; // class pass_isolate_erroneous_paths
1010 : : }
1011 : :
1012 : : gimple_opt_pass *
1013 : 281608 : make_pass_isolate_erroneous_paths (gcc::context *ctxt)
1014 : : {
1015 : 281608 : return new pass_isolate_erroneous_paths (ctxt);
1016 : : }
|