Branch data Line data Source code
1 : : /* Detect paths through the CFG which can never be executed in a conforming
2 : : program and isolate them.
3 : :
4 : : Copyright (C) 2013-2024 Free Software Foundation, Inc.
5 : :
6 : : This file is part of GCC.
7 : :
8 : : GCC is free software; you can redistribute it and/or modify
9 : : it under the terms of the GNU General Public License as published by
10 : : the Free Software Foundation; either version 3, or (at your option)
11 : : any later version.
12 : :
13 : : GCC is distributed in the hope that it will be useful,
14 : : but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : : GNU General Public License for more details.
17 : :
18 : : You should have received a copy of the GNU General Public License
19 : : along with GCC; see the file COPYING3. If not see
20 : : <http://www.gnu.org/licenses/>. */
21 : :
22 : : #include "config.h"
23 : : #include "system.h"
24 : : #include "coretypes.h"
25 : : #include "backend.h"
26 : : #include "tree.h"
27 : : #include "gimple.h"
28 : : #include "cfghooks.h"
29 : : #include "tree-pass.h"
30 : : #include "ssa.h"
31 : : #include "diagnostic-core.h"
32 : : #include "fold-const.h"
33 : : #include "gimple-iterator.h"
34 : : #include "gimple-walk.h"
35 : : #include "tree-ssa.h"
36 : : #include "cfgloop.h"
37 : : #include "tree-cfg.h"
38 : : #include "cfganal.h"
39 : : #include "intl.h"
40 : :
41 : :
42 : : static bool cfg_altered;
43 : :
44 : : /* Callback for walk_stmt_load_store_ops.
45 : :
46 : : Return TRUE if OP will dereference the tree stored in DATA, FALSE
47 : : otherwise.
48 : :
49 : : This routine only makes a superficial check for a dereference. Thus,
50 : : it must only be used if it is safe to return a false negative. */
51 : : static bool
52 : 2597 : check_loadstore (gimple *stmt, tree op, tree, void *data)
53 : : {
54 : 2597 : if ((TREE_CODE (op) == MEM_REF || TREE_CODE (op) == TARGET_MEM_REF)
55 : 2597 : && operand_equal_p (TREE_OPERAND (op, 0), (tree)data, 0))
56 : : {
57 : 2517 : TREE_THIS_VOLATILE (op) = 1;
58 : 2517 : TREE_SIDE_EFFECTS (op) = 1;
59 : 2517 : update_stmt (stmt);
60 : 2517 : return true;
61 : : }
62 : : return false;
63 : : }
64 : :
65 : : /* Insert a trap after SI and split the block after the trap. */
66 : :
67 : : static void
68 : 2208 : insert_trap (gimple_stmt_iterator *si_p, tree op)
69 : : {
70 : : /* We want the NULL pointer dereference to actually occur so that
71 : : code that wishes to catch the signal can do so.
72 : :
73 : : If the dereference is a load, then there's nothing to do as the
74 : : LHS will be a throw-away SSA_NAME and the RHS is the NULL dereference.
75 : :
76 : : If the dereference is a store and we can easily transform the RHS,
77 : : then simplify the RHS to enable more DCE. Note that we require the
78 : : statement to be a GIMPLE_ASSIGN which filters out calls on the RHS. */
79 : 2208 : gimple *stmt = gsi_stmt (*si_p);
80 : 2208 : if (walk_stmt_load_store_ops (stmt, (void *)op, NULL, check_loadstore)
81 : 621 : && is_gimple_assign (stmt)
82 : 2826 : && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))))
83 : : {
84 : : /* We just need to turn the RHS into zero converted to the proper
85 : : type. */
86 : 442 : tree type = TREE_TYPE (gimple_assign_lhs (stmt));
87 : 442 : gimple_assign_set_rhs_code (stmt, INTEGER_CST);
88 : 442 : gimple_assign_set_rhs1 (stmt, fold_convert (type, integer_zero_node));
89 : 442 : update_stmt (stmt);
90 : : }
91 : :
92 : 2208 : gcall *new_stmt
93 : 2208 : = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
94 : 2208 : gimple_seq seq = NULL;
95 : 2208 : gimple_seq_add_stmt (&seq, new_stmt);
96 : :
97 : : /* If we had a NULL pointer dereference, then we want to insert the
98 : : __builtin_trap after the statement, for the other cases we want
99 : : to insert before the statement. */
100 : 2208 : if (walk_stmt_load_store_ops (stmt, (void *)op,
101 : : check_loadstore,
102 : : check_loadstore))
103 : : {
104 : 1896 : gsi_insert_after (si_p, seq, GSI_NEW_STMT);
105 : 1896 : if (stmt_ends_bb_p (stmt))
106 : : {
107 : 1 : split_block (gimple_bb (stmt), stmt);
108 : 1 : return;
109 : : }
110 : : }
111 : : else
112 : 312 : gsi_insert_before (si_p, seq, GSI_NEW_STMT);
113 : :
114 : 2207 : split_block (gimple_bb (new_stmt), new_stmt);
115 : 2207 : *si_p = gsi_for_stmt (stmt);
116 : : }
117 : :
118 : : /* BB when reached via incoming edge E will exhibit undefined behavior
119 : : at STMT. Isolate and optimize the path which exhibits undefined
120 : : behavior.
121 : :
122 : : Isolation is simple. Duplicate BB and redirect E to BB'.
123 : :
124 : : Optimization is simple as well. Replace STMT in BB' with an
125 : : unconditional trap and remove all outgoing edges from BB'.
126 : :
127 : : If RET_ZERO, do not trap, only return NULL.
128 : :
129 : : DUPLICATE is a pre-existing duplicate, use it as BB' if it exists.
130 : :
131 : : Return BB' (which may be equal to DUPLICATE). */
132 : :
133 : : ATTRIBUTE_RETURNS_NONNULL basic_block
134 : 1278 : isolate_path (basic_block bb, basic_block duplicate,
135 : : edge e, gimple *stmt, tree op, bool ret_zero)
136 : : {
137 : 1278 : gimple_stmt_iterator si, si2;
138 : 1278 : edge_iterator ei;
139 : 1278 : edge e2;
140 : 1278 : bool impossible = true;
141 : 1278 : profile_count count = e->count ();
142 : :
143 : 15407 : for (si = gsi_start_bb (bb); gsi_stmt (si) != stmt; gsi_next (&si))
144 : 13064 : if (stmt_can_terminate_bb_p (gsi_stmt (si)))
145 : : {
146 : : impossible = false;
147 : : break;
148 : : }
149 : 1278 : force_edge_cold (e, impossible);
150 : :
151 : : /* First duplicate BB if we have not done so already and remove all
152 : : the duplicate's outgoing edges as duplicate is going to unconditionally
153 : : trap. Removing the outgoing edges is both an optimization and ensures
154 : : we don't need to do any PHI node updates. */
155 : 1278 : if (!duplicate)
156 : : {
157 : 737 : duplicate = duplicate_block (bb, NULL, NULL);
158 : 737 : duplicate->count = profile_count::zero ();
159 : 737 : if (!ret_zero)
160 : 1727 : for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); )
161 : 1050 : remove_edge (e2);
162 : : }
163 : 1278 : bb->count -= count;
164 : :
165 : : /* Complete the isolation step by redirecting E to reach DUPLICATE. */
166 : 1278 : e2 = redirect_edge_and_branch (e, duplicate);
167 : 1278 : if (e2)
168 : : {
169 : 881 : flush_pending_stmts (e2);
170 : :
171 : : /* Update profile only when redirection is really processed. */
172 : 881 : bb->count += e->count ();
173 : : }
174 : :
175 : : /* There may be more than one statement in DUPLICATE which exhibits
176 : : undefined behavior. Ultimately we want the first such statement in
177 : : DUPLCIATE so that we're able to delete as much code as possible.
178 : :
179 : : So each time we discover undefined behavior in DUPLICATE, search for
180 : : the statement which triggers undefined behavior. If found, then
181 : : transform the statement into a trap and delete everything after the
182 : : statement. If not found, then this particular instance was subsumed by
183 : : an earlier instance of undefined behavior and there's nothing to do.
184 : :
185 : : This is made more complicated by the fact that we have STMT, which is in
186 : : BB rather than in DUPLICATE. So we set up two iterators, one for each
187 : : block and walk forward looking for STMT in BB, advancing each iterator at
188 : : each step.
189 : :
190 : : When we find STMT the second iterator should point to STMT's equivalent in
191 : : duplicate. If DUPLICATE ends before STMT is found in BB, then there's
192 : : nothing to do.
193 : :
194 : : Ignore labels and debug statements. */
195 : 1278 : si = gsi_start_nondebug_after_labels_bb (bb);
196 : 1278 : si2 = gsi_start_nondebug_after_labels_bb (duplicate);
197 : 4568 : while (!gsi_end_p (si) && !gsi_end_p (si2) && gsi_stmt (si) != stmt)
198 : : {
199 : 3290 : gsi_next_nondebug (&si);
200 : 3290 : gsi_next_nondebug (&si2);
201 : : }
202 : :
203 : : /* This would be an indicator that we never found STMT in BB, which should
204 : : never happen. */
205 : 1278 : gcc_assert (!gsi_end_p (si));
206 : :
207 : : /* If we did not run to the end of DUPLICATE, then SI points to STMT and
208 : : SI2 points to the duplicate of STMT in DUPLICATE. Insert a trap
209 : : before SI2 and remove SI2 and all trailing statements. */
210 : 1278 : if (!gsi_end_p (si2))
211 : : {
212 : 1202 : if (ret_zero)
213 : : {
214 : 106 : greturn *ret = as_a <greturn *> (gsi_stmt (si2));
215 : 106 : tree zero = build_zero_cst (TREE_TYPE (gimple_return_retval (ret)));
216 : 106 : gimple_return_set_retval (ret, zero);
217 : 106 : update_stmt (ret);
218 : : }
219 : : else
220 : 1096 : insert_trap (&si2, op);
221 : : }
222 : :
223 : 1278 : return duplicate;
224 : : }
225 : :
226 : : /* Return TRUE if STMT is a div/mod operation using DIVISOR as the divisor.
227 : : FALSE otherwise. */
228 : :
229 : : static bool
230 : 48726657 : is_divmod_with_given_divisor (gimple *stmt, tree divisor)
231 : : {
232 : : /* Only assignments matter. */
233 : 48726657 : if (!is_gimple_assign (stmt))
234 : : return false;
235 : :
236 : : /* Check for every DIV/MOD expression. */
237 : 12842363 : enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
238 : 12842363 : if (rhs_code == TRUNC_DIV_EXPR
239 : 12842363 : || rhs_code == FLOOR_DIV_EXPR
240 : 12793966 : || rhs_code == CEIL_DIV_EXPR
241 : 12793966 : || rhs_code == EXACT_DIV_EXPR
242 : : || rhs_code == ROUND_DIV_EXPR
243 : 12764209 : || rhs_code == TRUNC_MOD_EXPR
244 : : || rhs_code == FLOOR_MOD_EXPR
245 : 12724226 : || rhs_code == CEIL_MOD_EXPR
246 : 12723905 : || rhs_code == ROUND_MOD_EXPR)
247 : : {
248 : : /* Pointer equality is fine when DIVISOR is an SSA_NAME, but
249 : : not sufficient for constants which may have different types. */
250 : 118458 : if (operand_equal_p (gimple_assign_rhs2 (stmt), divisor, 0))
251 : : return true;
252 : : }
253 : : return false;
254 : : }
255 : :
256 : : /* NAME is an SSA_NAME that we have already determined has the value 0 or NULL.
257 : :
258 : : Return TRUE if USE_STMT uses NAME in a way where a 0 or NULL value results
259 : : in undefined behavior, FALSE otherwise
260 : :
261 : : LOC is used for issuing diagnostics. This case represents potential
262 : : undefined behavior exposed by path splitting and that's reflected in
263 : : the diagnostic. */
264 : :
265 : : bool
266 : 640050 : stmt_uses_name_in_undefined_way (gimple *use_stmt, tree name, location_t loc)
267 : : {
268 : : /* If we are working with a non pointer type, then see
269 : : if this use is a DIV/MOD operation using NAME as the
270 : : divisor. */
271 : 640050 : if (!POINTER_TYPE_P (TREE_TYPE (name)))
272 : : {
273 : 528798 : if (!cfun->can_throw_non_call_exceptions)
274 : 320685 : return is_divmod_with_given_divisor (use_stmt, name);
275 : : return false;
276 : : }
277 : :
278 : : /* NAME is a pointer, so see if it's used in a context where it must
279 : : be non-NULL. */
280 : 111252 : bool by_dereference
281 : 111252 : = infer_nonnull_range_by_dereference (use_stmt, name);
282 : :
283 : 111252 : if (by_dereference
284 : 111252 : || infer_nonnull_range_by_attribute (use_stmt, name))
285 : : {
286 : :
287 : 1153 : if (by_dereference)
288 : : {
289 : 1135 : warning_at (loc, OPT_Wnull_dereference,
290 : : "potential null pointer dereference");
291 : 1135 : if (!flag_isolate_erroneous_paths_dereference)
292 : : return false;
293 : : }
294 : : else
295 : : {
296 : 18 : if (!flag_isolate_erroneous_paths_attribute)
297 : : return false;
298 : : }
299 : 1137 : return true;
300 : : }
301 : : return false;
302 : : }
303 : :
304 : : /* Return TRUE if USE_STMT uses 0 or NULL in a context which results in
305 : : undefined behavior, FALSE otherwise.
306 : :
307 : : These cases are explicit in the IL. */
308 : :
309 : : bool
310 : 59511015 : stmt_uses_0_or_null_in_undefined_way (gimple *stmt)
311 : : {
312 : 59511015 : if (!cfun->can_throw_non_call_exceptions
313 : 59511015 : && is_divmod_with_given_divisor (stmt, integer_zero_node))
314 : : return true;
315 : :
316 : : /* By passing null_pointer_node, we can use the
317 : : infer_nonnull_range functions to detect explicit NULL
318 : : pointer dereferences and other uses where a non-NULL
319 : : value is required. */
320 : :
321 : 59510760 : bool by_dereference
322 : 59510760 : = infer_nonnull_range_by_dereference (stmt, null_pointer_node);
323 : 59510760 : if (by_dereference
324 : 59510760 : || infer_nonnull_range_by_attribute (stmt, null_pointer_node))
325 : : {
326 : 1118 : if (by_dereference)
327 : : {
328 : 857 : location_t loc = gimple_location (stmt);
329 : 857 : warning_at (loc, OPT_Wnull_dereference,
330 : : "null pointer dereference");
331 : 857 : if (!flag_isolate_erroneous_paths_dereference)
332 : : return false;
333 : : }
334 : : else
335 : : {
336 : 261 : if (!flag_isolate_erroneous_paths_attribute)
337 : : return false;
338 : : }
339 : 857 : return true;
340 : : }
341 : : return false;
342 : : }
343 : :
344 : : /* Describes the property of a return statement that may return
345 : : the address of one or more local variables. The type must
346 : : be safely assignable and copyable so that it can be stored in
347 : : a hash_map. */
348 : : class args_loc_t
349 : : {
350 : : public:
351 : :
352 : 42467 : args_loc_t (): nargs (), locvec (), ptr (&ptr)
353 : : {
354 : 42467 : locvec.create (4);
355 : 42467 : }
356 : :
357 : 0 : args_loc_t (const args_loc_t &rhs)
358 : 0 : : nargs (rhs.nargs), locvec (rhs.locvec.copy ()), ptr (&ptr) { }
359 : :
360 : : args_loc_t& operator= (const args_loc_t &rhs)
361 : : {
362 : : nargs = rhs.nargs;
363 : : locvec.release ();
364 : : locvec = rhs.locvec.copy ();
365 : : return *this;
366 : : }
367 : :
368 : 42467 : ~args_loc_t ()
369 : : {
370 : 42467 : locvec.release ();
371 : 42467 : gcc_assert (ptr == &ptr);
372 : 42467 : }
373 : :
374 : : /* For a PHI in a return statement its number of arguments. When greater
375 : : than LOCVEC.LENGTH () implies that an address of one of the locals in
376 : : LOCVEC may but need not be returned by the statement. Otherwise,
377 : : unless both are zero, it implies it definitely is returned. */
378 : : unsigned nargs;
379 : : /* The locations of local variables/alloca calls returned by the return
380 : : statement. Avoid using auto_vec here since it's not safe to copy due
381 : : to pr90904. */
382 : : vec <location_t> locvec;
383 : : void *ptr;
384 : : };
385 : :
386 : : /* A mapping from a return statement to the locations of local variables
387 : : whose addresses it may return. */
388 : : typedef hash_map <gimple *, args_loc_t> locmap_t;
389 : :
390 : : /* Given the LOCMAP mapping, issue diagnostics about returning addresses
391 : : of local variables. When MAYBE is set, all diagnostics will be of
392 : : the "may return" kind. Otherwise each will be determined based on
393 : : the equality of the corresponding NARGS and LOCVEC.LENGTH () values. */
394 : :
395 : : static void
396 : 914810 : diag_returned_locals (bool maybe, const locmap_t &locmap)
397 : : {
398 : 915446 : for (locmap_t::iterator it = locmap.begin (); it != locmap.end (); ++it)
399 : : {
400 : 318 : gimple *stmt = (*it).first;
401 : 318 : const args_loc_t &argsloc = (*it).second;
402 : 318 : location_t stmtloc = gimple_location (stmt);
403 : 318 : if (stmtloc == UNKNOWN_LOCATION)
404 : : /* When multiple return statements are merged into one it
405 : : may not have an associated location. Use the location
406 : : of the closing brace instead. */
407 : 18 : stmtloc = cfun->function_end_locus;
408 : :
409 : 318 : auto_diagnostic_group d;
410 : 318 : unsigned nargs = argsloc.locvec.length ();
411 : 636 : if (warning_at (stmtloc, OPT_Wreturn_local_addr,
412 : 151 : (maybe || argsloc.nargs > nargs
413 : : ? G_("function may return address of local variable")
414 : : : G_("function returns address of local variable"))))
415 : : {
416 : 571 : for (unsigned i = 0; i != nargs; ++i)
417 : 319 : inform (argsloc.locvec[i], "declared here");
418 : : }
419 : 318 : }
420 : 914810 : }
421 : :
422 : : /* Return true if EXPR is an expression of pointer type that refers
423 : : to the address of one or more variables with automatic storage
424 : : duration. If so, add an entry to *PLOCMAP and insert into
425 : : PLOCMAP->LOCVEC the locations of the corresponding local variables
426 : : whose address is returned by the RETURN_STMT (which may be set to
427 : : (gimple*)-1 as a placeholder for such a statement). VISITED is
428 : : a bitmap of PHI nodes already visited by recursive calls. When
429 : : null, PHI expressions are not considered. */
430 : :
431 : : static bool
432 : 7466053 : is_addr_local (gimple *return_stmt, tree exp, locmap_t *plocmap,
433 : : hash_set<gphi *> *visited)
434 : : {
435 : 7466053 : if (TREE_CODE (exp) == ADDR_EXPR)
436 : : {
437 : 98291 : tree baseaddr = get_base_address (TREE_OPERAND (exp, 0));
438 : 98291 : if (TREE_CODE (baseaddr) == MEM_REF)
439 : 19953 : return is_addr_local (return_stmt, TREE_OPERAND (baseaddr, 0),
440 : 19953 : plocmap, visited);
441 : :
442 : 78338 : if ((!VAR_P (baseaddr)
443 : 54166 : || is_global_var (baseaddr))
444 : 101879 : && TREE_CODE (baseaddr) != PARM_DECL)
445 : : return false;
446 : :
447 : 30922 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
448 : 30922 : argsloc.locvec.safe_push (DECL_SOURCE_LOCATION (baseaddr));
449 : 30922 : return true;
450 : : }
451 : :
452 : 7367762 : if (!POINTER_TYPE_P (TREE_TYPE (exp)))
453 : : return false;
454 : :
455 : 1193424 : if (TREE_CODE (exp) == SSA_NAME)
456 : : {
457 : 1102319 : gimple *def_stmt = SSA_NAME_DEF_STMT (exp);
458 : 1102319 : enum gimple_code code = gimple_code (def_stmt);
459 : :
460 : 1102319 : if (is_gimple_assign (def_stmt))
461 : : {
462 : 518166 : tree type = TREE_TYPE (gimple_assign_lhs (def_stmt));
463 : 518166 : if (POINTER_TYPE_P (type))
464 : : {
465 : 518166 : tree_code code = gimple_assign_rhs_code (def_stmt);
466 : 518166 : tree ptr1 = NULL_TREE, ptr2 = NULL_TREE;
467 : :
468 : : /* Set to the number of arguments examined that should
469 : : be added to ARGSLOC->NARGS to identify expressions
470 : : only some but not all of whose operands refer to local
471 : : addresses. */
472 : 518166 : unsigned nargs = 0;
473 : 518166 : if (code == COND_EXPR)
474 : : {
475 : 0 : ptr1 = gimple_assign_rhs2 (def_stmt);
476 : 0 : ptr2 = gimple_assign_rhs3 (def_stmt);
477 : 0 : nargs = 2;
478 : : }
479 : : else if (code == MAX_EXPR || code == MIN_EXPR)
480 : : {
481 : 251 : ptr1 = gimple_assign_rhs1 (def_stmt);
482 : 251 : ptr2 = gimple_assign_rhs2 (def_stmt);
483 : 251 : nargs = 2;
484 : : }
485 : : else if (code == ADDR_EXPR
486 : : || code == NOP_EXPR
487 : : || code == POINTER_PLUS_EXPR)
488 : : /* Leave NARGS at zero and let the recursive call set it. */
489 : 225489 : ptr1 = gimple_assign_rhs1 (def_stmt);
490 : :
491 : : /* Avoid short-circuiting the logical OR result in case
492 : : both operands refer to local variables, in which case
493 : : both should be considered and identified in the warning. */
494 : 518166 : bool res1 = false, res2 = false;
495 : 518166 : if (ptr1)
496 : 225740 : res1 = is_addr_local (return_stmt, ptr1, plocmap, visited);
497 : 518166 : if (ptr2)
498 : 251 : res2 = is_addr_local (return_stmt, ptr2, plocmap, visited);
499 : :
500 : 518166 : if (nargs)
501 : 251 : if (args_loc_t *argsloc = plocmap->get (return_stmt))
502 : 90 : argsloc->nargs += nargs;
503 : :
504 : 518166 : return res1 || res2;
505 : : }
506 : : return false;
507 : : }
508 : :
509 : 584153 : if (code == GIMPLE_CALL
510 : 584153 : && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL))
511 : : {
512 : : /* Handle alloca and friends that return pointers to automatic
513 : : storage. */
514 : 12037 : tree fn = gimple_call_fndecl (def_stmt);
515 : 12037 : int code = DECL_FUNCTION_CODE (fn);
516 : 12037 : if (code == BUILT_IN_ALLOCA
517 : 12037 : || code == BUILT_IN_ALLOCA_WITH_ALIGN
518 : 9625 : || code == BUILT_IN_ALLOCA_WITH_ALIGN_AND_MAX)
519 : : {
520 : 2412 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
521 : 2412 : argsloc.locvec.safe_push (gimple_location (def_stmt));
522 : 2412 : return true;
523 : : }
524 : :
525 : 9625 : if (gimple_call_num_args (def_stmt) < 1)
526 : : return false;
527 : :
528 : : /* Recursively examine the first argument of calls to built-ins
529 : : that return it. */
530 : 8744 : switch (code)
531 : : {
532 : 1288 : case BUILT_IN_MEMCPY:
533 : 1288 : case BUILT_IN_MEMCPY_CHK:
534 : 1288 : case BUILT_IN_MEMPCPY:
535 : 1288 : case BUILT_IN_MEMPCPY_CHK:
536 : 1288 : case BUILT_IN_MEMMOVE:
537 : 1288 : case BUILT_IN_MEMMOVE_CHK:
538 : 1288 : case BUILT_IN_STPCPY:
539 : 1288 : case BUILT_IN_STPCPY_CHK:
540 : 1288 : case BUILT_IN_STPNCPY:
541 : 1288 : case BUILT_IN_STPNCPY_CHK:
542 : 1288 : case BUILT_IN_STRCAT:
543 : 1288 : case BUILT_IN_STRCAT_CHK:
544 : 1288 : case BUILT_IN_STRCHR:
545 : 1288 : case BUILT_IN_STRCPY:
546 : 1288 : case BUILT_IN_STRCPY_CHK:
547 : 1288 : case BUILT_IN_STRNCAT:
548 : 1288 : case BUILT_IN_STRNCAT_CHK:
549 : 1288 : case BUILT_IN_STRNCPY:
550 : 1288 : case BUILT_IN_STRNCPY_CHK:
551 : 1288 : case BUILT_IN_STRRCHR:
552 : 1288 : case BUILT_IN_STRSTR:
553 : 1288 : return is_addr_local (return_stmt,
554 : : gimple_call_arg (def_stmt, 0),
555 : 1288 : plocmap, visited);
556 : : default:
557 : : return false;
558 : : }
559 : : }
560 : :
561 : 572116 : if (code == GIMPLE_PHI && visited)
562 : : {
563 : 33664 : gphi *phi_stmt = as_a <gphi *> (def_stmt);
564 : 33664 : if (visited->add (phi_stmt))
565 : : return false;
566 : :
567 : 21346 : unsigned count = 0;
568 : 21346 : unsigned nargs = gimple_phi_num_args (phi_stmt);
569 : 21346 : args_loc_t &argsloc = plocmap->get_or_insert (return_stmt);
570 : : /* Bump up the number of operands examined by the number of
571 : : operands of this PHI. */
572 : 21346 : argsloc.nargs += nargs;
573 : 85528 : for (unsigned i = 0; i < gimple_phi_num_args (phi_stmt); ++i)
574 : : {
575 : 64182 : tree arg = gimple_phi_arg_def (phi_stmt, i);
576 : 64182 : if (is_addr_local (return_stmt, arg, plocmap, visited))
577 : 54 : ++count;
578 : : }
579 : 21346 : return count != 0;
580 : : }
581 : : }
582 : :
583 : : return false;
584 : : }
585 : :
586 : : /* Detect returning the address of a local variable in a PHI result LHS
587 : : and argument ARG and PHI edge E in basic block BB. Add an entry for
588 : : each use to LOCMAP, setting its NARGS member to the NARGS argument
589 : : (the number of PHI operands) plus the number of arguments in binary
590 : : expressions refereced by ARG. Call isolate_path for each returned
591 : : address and set *ISOLATED to true if called.
592 : : Return either DUPLICATE or the most recent result of isolate_path. */
593 : :
594 : : static basic_block
595 : 6640894 : handle_return_addr_local_phi_arg (basic_block bb, basic_block duplicate,
596 : : tree lhs, tree arg, edge e, locmap_t &locmap,
597 : : unsigned nargs, bool *isolated)
598 : : {
599 : : /* Use (gimple*)-1 as a temporary placeholder and replace it with
600 : : the return statement below once it is known. Using a null doesn't
601 : : work because it's used by the hash_map to mean "no-entry." Pass
602 : : null instead of a visited_phis bitmap to avoid descending into
603 : : PHIs since they are being processed by the caller. Those that
604 : : remain will be checked again later. */
605 : 6640894 : if (!is_addr_local ((gimple*)-1, arg, &locmap, NULL))
606 : : {
607 : : /* Remove the placeholder regardless of success or failure. */
608 : 6607862 : locmap.remove ((gimple*)-1);
609 : 6607862 : return duplicate;
610 : : }
611 : :
612 : 33032 : const args_loc_t* const placeargsloc = locmap.get ((gimple*)-1);
613 : 33032 : const unsigned nlocs = placeargsloc->locvec.length ();
614 : 33032 : gcc_assert (nlocs);
615 : :
616 : : /* Add to the number of PHI arguments determined by the caller
617 : : the number of operands of the expressions referenced by ARG.
618 : : This lets the caller determine whether it's dealing with
619 : : a "may return" or "definitely returns." */
620 : 33032 : nargs += placeargsloc->nargs;
621 : :
622 : : /* Set to true if any expressions referenced by ARG involve
623 : : multiple addresses only some of which are those of locals. */
624 : 33032 : bool maybe = placeargsloc->nargs > placeargsloc->locvec.length ();
625 : :
626 : 33032 : gimple *use_stmt;
627 : 33032 : imm_use_iterator iter;
628 : :
629 : : /* Look for uses of the PHI result LHS in return statements. */
630 : 160471 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
631 : : {
632 : 127439 : greturn *return_stmt = dyn_cast <greturn *> (use_stmt);
633 : 127439 : if (!return_stmt)
634 : 127318 : continue;
635 : :
636 : 121 : if (gimple_return_retval (return_stmt) != lhs)
637 : 0 : continue;
638 : :
639 : : /* Add an entry for the return statement and the locations
640 : : oof the PHI arguments obtained above to the map. */
641 : 121 : args_loc_t &argsloc = locmap.get_or_insert (use_stmt);
642 : 121 : argsloc.nargs = nargs;
643 : 121 : unsigned nelts = argsloc.locvec.length () + nlocs;
644 : 121 : argsloc.locvec.reserve (nelts);
645 : 121 : argsloc.locvec.splice (placeargsloc->locvec);
646 : :
647 : 121 : if (!maybe
648 : 106 : && (flag_isolate_erroneous_paths_dereference
649 : 0 : || flag_isolate_erroneous_paths_attribute)
650 : 106 : && gimple_bb (use_stmt) == bb
651 : 227 : && (duplicate || can_duplicate_block_p (bb)))
652 : : {
653 : 106 : duplicate = isolate_path (bb, duplicate, e,
654 : : use_stmt, lhs, true);
655 : :
656 : : /* Let caller know the path has been isolated. */
657 : 106 : *isolated = true;
658 : : }
659 : 33032 : }
660 : :
661 : 33032 : locmap.remove ((gimple*)-1);
662 : :
663 : 33032 : return duplicate;
664 : : }
665 : :
666 : : /* Look for PHI nodes which feed statements in the same block where
667 : : the value of the PHI node implies the statement is erroneous.
668 : :
669 : : For example, a NULL PHI arg value which then feeds a pointer
670 : : dereference.
671 : :
672 : : When found isolate and optimize the path associated with the PHI
673 : : argument feeding the erroneous statement. */
674 : : static void
675 : 914560 : find_implicit_erroneous_behavior (void)
676 : : {
677 : 914560 : locmap_t locmap;
678 : :
679 : 914560 : basic_block bb;
680 : :
681 : 9038299 : FOR_EACH_BB_FN (bb, cfun)
682 : : {
683 : 8123739 : gphi_iterator si;
684 : :
685 : : /* Out of an abundance of caution, do not isolate paths to a
686 : : block where the block has any abnormal outgoing edges.
687 : :
688 : : We might be able to relax this in the future. We have to detect
689 : : when we have to split the block with the NULL dereference and
690 : : the trap we insert. We have to preserve abnormal edges out
691 : : of the isolated block which in turn means updating PHIs at
692 : : the targets of those abnormal outgoing edges. */
693 : 8123739 : if (has_abnormal_or_eh_outgoing_edge_p (bb))
694 : 980486 : continue;
695 : :
696 : :
697 : : /* If BB has an edge to itself, then duplication of BB below
698 : : could result in reallocation of BB's PHI nodes. If that happens
699 : : then the loop below over the PHIs would use the old PHI and
700 : : thus invalid information. We don't have a good way to know
701 : : if a PHI has been reallocated, so just avoid isolation in
702 : : this case. */
703 : 7362644 : if (find_edge (bb, bb))
704 : 219391 : continue;
705 : :
706 : : /* First look for a PHI which sets a pointer to NULL and which
707 : : is then dereferenced within BB. This is somewhat overly
708 : : conservative, but probably catches most of the interesting
709 : : cases. */
710 : 9745378 : for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
711 : : {
712 : 2602125 : gphi *phi = si.phi ();
713 : 2602125 : tree lhs = gimple_phi_result (phi);
714 : :
715 : : /* Initial number of PHI arguments. The result may change
716 : : from one iteration of the loop below to the next in
717 : : response to changes to the CFG but only the initial
718 : : value is stored below for use by diagnostics. */
719 : 2602125 : unsigned nargs = gimple_phi_num_args (phi);
720 : :
721 : : /* PHI produces a pointer result. See if any of the PHI's
722 : : arguments are NULL.
723 : :
724 : : When we remove an edge, we want to reprocess the current
725 : : index since the argument at that index will have been
726 : : removed, hence the ugly way we update I for each iteration. */
727 : 2602125 : basic_block duplicate = NULL;
728 : 2602125 : for (unsigned i = 0, next_i = 0;
729 : 9243019 : i < gimple_phi_num_args (phi); i = next_i)
730 : : {
731 : 6640894 : tree arg = gimple_phi_arg_def (phi, i);
732 : 6640894 : edge e = gimple_phi_arg_edge (phi, i);
733 : :
734 : : /* Advance the argument index unless a path involving
735 : : the current argument has been isolated. */
736 : 6640894 : next_i = i + 1;
737 : 6640894 : bool isolated = false;
738 : 6640894 : duplicate = handle_return_addr_local_phi_arg (bb, duplicate, lhs,
739 : : arg, e, locmap,
740 : : nargs, &isolated);
741 : 6640894 : if (isolated)
742 : : {
743 : 106 : cfg_altered = true;
744 : 106 : next_i = i;
745 : : }
746 : :
747 : 6640894 : if (!integer_zerop (arg))
748 : 6119139 : continue;
749 : :
750 : 521755 : location_t phi_arg_loc = gimple_phi_arg_location (phi, i);
751 : :
752 : 521755 : imm_use_iterator iter;
753 : 521755 : gimple *use_stmt;
754 : :
755 : : /* We've got a NULL PHI argument. Now see if the
756 : : PHI's result is dereferenced within BB. */
757 : 1827193 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
758 : : {
759 : : /* We only care about uses in BB. Catching cases in
760 : : in other blocks would require more complex path
761 : : isolation code. */
762 : 1305438 : if (gimple_bb (use_stmt) != bb)
763 : 665388 : continue;
764 : :
765 : 640050 : location_t loc = gimple_location (use_stmt)
766 : 640050 : ? gimple_location (use_stmt)
767 : 640050 : : phi_arg_loc;
768 : :
769 : 640050 : if (stmt_uses_name_in_undefined_way (use_stmt, lhs, loc)
770 : 640050 : && (duplicate || can_duplicate_block_p (bb)))
771 : : {
772 : 1172 : duplicate = isolate_path (bb, duplicate, e,
773 : : use_stmt, lhs, false);
774 : :
775 : : /* When we remove an incoming edge, we need to
776 : : reprocess the Ith element. */
777 : 1172 : next_i = i;
778 : 1172 : cfg_altered = true;
779 : : }
780 : 521755 : }
781 : : }
782 : : }
783 : : }
784 : :
785 : 914560 : diag_returned_locals (false, locmap);
786 : 914560 : }
787 : :
788 : : /* Detect and diagnose returning the address of a local variable
789 : : in RETURN_STMT in basic block BB. This only becomes undefined
790 : : behavior if the result is used, so we do not insert a trap and
791 : : only return NULL instead. */
792 : :
793 : : static void
794 : 893210 : warn_return_addr_local (basic_block bb, greturn *return_stmt)
795 : : {
796 : 893210 : tree val = gimple_return_retval (return_stmt);
797 : 893210 : if (!val)
798 : 893127 : return;
799 : :
800 : 513745 : locmap_t locmap;
801 : 513745 : hash_set<gphi *> visited_phis;
802 : 513745 : if (!is_addr_local (return_stmt, val, &locmap, &visited_phis))
803 : : return;
804 : :
805 : : /* We only need it for this particular case. */
806 : 250 : calculate_dominance_info (CDI_POST_DOMINATORS);
807 : :
808 : 250 : const args_loc_t *argsloc = locmap.get (return_stmt);
809 : 250 : gcc_assert (argsloc);
810 : :
811 : 250 : bool maybe = argsloc->nargs > argsloc->locvec.length ();
812 : 250 : if (!maybe)
813 : 217 : maybe = !dominated_by_p (CDI_POST_DOMINATORS,
814 : 217 : single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)), bb);
815 : :
816 : 250 : diag_returned_locals (maybe, locmap);
817 : :
818 : : /* Bail if the statement isn't certain to return the address
819 : : of a local (e.g., if it involves a conditional expression
820 : : that wasn't trasnformed into a PHI or if it involves
821 : : a MAX_EXPR or MIN_EXPR only one of whose operands is a local
822 : : (even though such an expression isn't valid in C or has
823 : : defined semantics in C++). */
824 : 250 : if (maybe)
825 : : return;
826 : :
827 : : /* Do not modify code if the user only asked for warnings. */
828 : 83 : if (flag_isolate_erroneous_paths_dereference
829 : 0 : || flag_isolate_erroneous_paths_attribute)
830 : : {
831 : 83 : tree zero = build_zero_cst (TREE_TYPE (val));
832 : 83 : gimple_return_set_retval (return_stmt, zero);
833 : 83 : update_stmt (return_stmt);
834 : : }
835 : 513745 : }
836 : :
837 : : /* Look for statements which exhibit erroneous behavior. For example
838 : : a NULL pointer dereference.
839 : :
840 : : When found, optimize the block containing the erroneous behavior. */
841 : : static void
842 : 914560 : find_explicit_erroneous_behavior (void)
843 : : {
844 : 914560 : basic_block bb;
845 : :
846 : 9039153 : FOR_EACH_BB_FN (bb, cfun)
847 : : {
848 : 8124593 : gimple_stmt_iterator si;
849 : :
850 : : /* Out of an abundance of caution, do not isolate paths to a
851 : : block where the block has any abnormal outgoing edges.
852 : :
853 : : We might be able to relax this in the future. We have to detect
854 : : when we have to split the block with the NULL dereference and
855 : : the trap we insert. We have to preserve abnormal edges out
856 : : of the isolated block which in turn means updating PHIs at
857 : : the targets of those abnormal outgoing edges. */
858 : 8124593 : if (has_abnormal_or_eh_outgoing_edge_p (bb))
859 : 761095 : continue;
860 : :
861 : : /* Now look at the statements in the block and see if any of
862 : : them explicitly dereference a NULL pointer. This happens
863 : : because of jump threading and constant propagation. */
864 : 74236899 : for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
865 : : {
866 : 59511015 : gimple *stmt = gsi_stmt (si);
867 : :
868 : 59511015 : if (stmt_uses_0_or_null_in_undefined_way (stmt))
869 : : {
870 : 1112 : insert_trap (&si, null_pointer_node);
871 : 1112 : bb = gimple_bb (gsi_stmt (si));
872 : :
873 : : /* Ignore any more operands on this statement and
874 : : continue the statement iterator (which should
875 : : terminate its loop immediately. */
876 : 1112 : cfg_altered = true;
877 : 1112 : break;
878 : : }
879 : :
880 : : /* Look for a return statement that returns the address
881 : : of a local variable or the result of alloca. */
882 : 60403113 : if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
883 : 893210 : warn_return_addr_local (bb, return_stmt);
884 : : }
885 : : }
886 : 914560 : }
887 : :
888 : : /* Search the function for statements which, if executed, would cause
889 : : the program to fault such as a dereference of a NULL pointer.
890 : :
891 : : Such a program can't be valid if such a statement was to execute
892 : : according to ISO standards.
893 : :
894 : : We detect explicit NULL pointer dereferences as well as those implied
895 : : by a PHI argument having a NULL value which unconditionally flows into
896 : : a dereference in the same block as the PHI.
897 : :
898 : : In the former case we replace the offending statement with an
899 : : unconditional trap and eliminate the outgoing edges from the statement's
900 : : basic block. This may expose secondary optimization opportunities.
901 : :
902 : : In the latter case, we isolate the path(s) with the NULL PHI
903 : : feeding the dereference. We can then replace the offending statement
904 : : and eliminate the outgoing edges in the duplicate. Again, this may
905 : : expose secondary optimization opportunities.
906 : :
907 : : A warning for both cases may be advisable as well.
908 : :
909 : : Other statically detectable violations of the ISO standard could be
910 : : handled in a similar way, such as out-of-bounds array indexing. */
911 : :
912 : : static unsigned int
913 : 914560 : gimple_ssa_isolate_erroneous_paths (void)
914 : : {
915 : 914560 : initialize_original_copy_tables ();
916 : :
917 : : /* Search all the blocks for edges which, if traversed, will
918 : : result in undefined behavior. */
919 : 914560 : cfg_altered = false;
920 : :
921 : : /* First handle cases where traversal of a particular edge
922 : : triggers undefined behavior. These cases require creating
923 : : duplicate blocks and thus new SSA_NAMEs.
924 : :
925 : : We want that process complete prior to the phase where we start
926 : : removing edges from the CFG. Edge removal may ultimately result in
927 : : removal of PHI nodes and thus releasing SSA_NAMEs back to the
928 : : name manager.
929 : :
930 : : If the two processes run in parallel we could release an SSA_NAME
931 : : back to the manager but we could still have dangling references
932 : : to the released SSA_NAME in unreachable blocks.
933 : : that any released names not have dangling references in the IL. */
934 : 914560 : find_implicit_erroneous_behavior ();
935 : 914560 : find_explicit_erroneous_behavior ();
936 : :
937 : 914560 : free_original_copy_tables ();
938 : :
939 : : /* We scramble the CFG and loop structures a bit, clean up
940 : : appropriately. We really should incrementally update the
941 : : loop structures, in theory it shouldn't be that hard. */
942 : 914560 : free_dominance_info (CDI_POST_DOMINATORS);
943 : 914560 : if (cfg_altered)
944 : : {
945 : 1216 : free_dominance_info (CDI_DOMINATORS);
946 : 1216 : loops_state_set (LOOPS_NEED_FIXUP);
947 : 1216 : return TODO_cleanup_cfg | TODO_update_ssa;
948 : : }
949 : : return 0;
950 : : }
951 : :
952 : : namespace {
953 : : const pass_data pass_data_isolate_erroneous_paths =
954 : : {
955 : : GIMPLE_PASS, /* type */
956 : : "isolate-paths", /* name */
957 : : OPTGROUP_NONE, /* optinfo_flags */
958 : : TV_ISOLATE_ERRONEOUS_PATHS, /* tv_id */
959 : : ( PROP_cfg | PROP_ssa ), /* properties_required */
960 : : 0, /* properties_provided */
961 : : 0, /* properties_destroyed */
962 : : 0, /* todo_flags_start */
963 : : 0, /* todo_flags_finish */
964 : : };
965 : :
966 : : class pass_isolate_erroneous_paths : public gimple_opt_pass
967 : : {
968 : : public:
969 : 281914 : pass_isolate_erroneous_paths (gcc::context *ctxt)
970 : 563828 : : gimple_opt_pass (pass_data_isolate_erroneous_paths, ctxt)
971 : : {}
972 : :
973 : : /* opt_pass methods: */
974 : 0 : opt_pass * clone () final override
975 : : {
976 : 0 : return new pass_isolate_erroneous_paths (m_ctxt);
977 : : }
978 : 985383 : bool gate (function *) final override
979 : : {
980 : : /* If we do not have a suitable builtin function for the trap statement,
981 : : then do not perform the optimization. */
982 : 985383 : return (flag_isolate_erroneous_paths_dereference != 0
983 : 70795 : || flag_isolate_erroneous_paths_attribute != 0
984 : 1056176 : || warn_null_dereference);
985 : : }
986 : :
987 : 914560 : unsigned int execute (function *) final override
988 : : {
989 : 914560 : return gimple_ssa_isolate_erroneous_paths ();
990 : : }
991 : :
992 : : }; // class pass_isolate_erroneous_paths
993 : : }
994 : :
995 : : gimple_opt_pass *
996 : 281914 : make_pass_isolate_erroneous_paths (gcc::context *ctxt)
997 : : {
998 : 281914 : return new pass_isolate_erroneous_paths (ctxt);
999 : : }
|