Line data Source code
1 : /* A state machine for tracking "taint": unsanitized uses
2 : of data potentially under an attacker's control.
3 :
4 : Copyright (C) 2019-2026 Free Software Foundation, Inc.
5 : Contributed by David Malcolm <dmalcolm@redhat.com>.
6 :
7 : This file is part of GCC.
8 :
9 : GCC is free software; you can redistribute it and/or modify it
10 : under the terms of the GNU General Public License as published by
11 : the Free Software Foundation; either version 3, or (at your option)
12 : any later version.
13 :
14 : GCC is distributed in the hope that it will be useful, but
15 : WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 : General Public License for more details.
18 :
19 : You should have received a copy of the GNU General Public License
20 : along with GCC; see the file COPYING3. If not see
21 : <http://www.gnu.org/licenses/>. */
22 :
23 : #include "analyzer/common.h"
24 :
25 : #include "gimple-iterator.h"
26 : #include "ordered-hash-map.h"
27 : #include "cgraph.h"
28 : #include "cfg.h"
29 : #include "digraph.h"
30 : #include "stringpool.h"
31 : #include "attribs.h"
32 : #include "fold-const.h"
33 : #include "diagnostics/sarif-sink.h"
34 : #include "gcc-urlifier.h"
35 :
36 : #include "analyzer/analyzer-logging.h"
37 : #include "analyzer/supergraph.h"
38 : #include "analyzer/call-string.h"
39 : #include "analyzer/program-point.h"
40 : #include "analyzer/store.h"
41 : #include "analyzer/region-model.h"
42 : #include "analyzer/sm.h"
43 : #include "analyzer/program-state.h"
44 : #include "analyzer/pending-diagnostic.h"
45 : #include "analyzer/constraint-manager.h"
46 :
47 : #if ENABLE_ANALYZER
48 :
49 : namespace ana {
50 :
51 : namespace {
52 :
53 : /* An enum for describing tainted values. */
54 :
55 : enum bounds
56 : {
57 : /* This tainted value has no upper or lower bound. */
58 : BOUNDS_NONE,
59 :
60 : /* This tainted value has an upper bound but not lower bound. */
61 : BOUNDS_UPPER,
62 :
63 : /* This tainted value has a lower bound but no upper bound. */
64 : BOUNDS_LOWER
65 : };
66 :
67 : static const char *
68 0 : bounds_to_str (enum bounds b)
69 : {
70 0 : switch (b)
71 : {
72 0 : default:
73 0 : gcc_unreachable ();
74 : case BOUNDS_NONE:
75 : return "BOUNDS_NONE";
76 0 : case BOUNDS_UPPER:
77 0 : return "BOUNDS_UPPER";
78 0 : case BOUNDS_LOWER:
79 0 : return "BOUNDS_LOWER";
80 : }
81 : }
82 :
83 : /* An experimental state machine, for tracking "taint": unsanitized uses
84 : of data potentially under an attacker's control. */
85 :
86 : class taint_state_machine : public state_machine
87 : {
88 : public:
89 : taint_state_machine (logger *logger);
90 :
91 2659038 : bool inherited_state_p () const final override { return true; }
92 :
93 : state_t alt_get_inherited_state (const sm_state_map &map,
94 : const svalue *sval,
95 : const extrinsic_state &ext_state)
96 : const final override;
97 :
98 : bool
99 483724 : has_alt_get_inherited_state_p () const final override
100 : {
101 483724 : return true;
102 : }
103 :
104 : bool on_stmt (sm_context &sm_ctxt,
105 : const gimple *stmt) const final override;
106 :
107 : void on_condition (sm_context &sm_ctxt,
108 : const svalue *lhs,
109 : enum tree_code op,
110 : const svalue *rhs) const final override;
111 : void on_bounded_ranges (sm_context &sm_ctxt,
112 : const svalue &sval,
113 : const bounded_ranges &ranges) const final override;
114 :
115 : bool can_purge_p (state_t s) const final override;
116 :
117 : bool get_taint (state_t s, tree type, enum bounds *out) const;
118 :
119 : state_t combine_states (state_t s0, state_t s1) const;
120 :
121 : private:
122 : void check_control_flow_arg_for_taint (sm_context &sm_ctxt,
123 : tree expr) const;
124 :
125 : void check_for_tainted_size_arg (sm_context &sm_ctxt,
126 : const gcall &call,
127 : tree callee_fndecl) const;
128 : void check_for_tainted_divisor (sm_context &sm_ctxt,
129 : const gassign *assign) const;
130 :
131 : public:
132 : /* State for a "tainted" value: unsanitized data potentially under an
133 : attacker's control. */
134 : state_t m_tainted;
135 :
136 : /* State for a "tainted" value that has a lower bound. */
137 : state_t m_has_lb;
138 :
139 : /* State for a "tainted" value that has an upper bound. */
140 : state_t m_has_ub;
141 :
142 : /* Stop state, for a value we don't want to track any more. */
143 : state_t m_stop;
144 :
145 : /* Global state, for when the last condition had tainted arguments. */
146 : state_t m_tainted_control_flow;
147 : };
148 :
149 : /* Class for diagnostics relating to taint_state_machine. */
150 :
151 0 : class taint_diagnostic : public pending_diagnostic
152 : {
153 : public:
154 218 : taint_diagnostic (const taint_state_machine &sm, tree arg,
155 : enum bounds has_bounds)
156 218 : : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds)
157 : {}
158 :
159 230 : bool subclass_equal_p (const pending_diagnostic &base_other) const override
160 : {
161 230 : const taint_diagnostic &other = (const taint_diagnostic &)base_other;
162 230 : return (same_tree_p (m_arg, other.m_arg)
163 230 : && m_has_bounds == other.m_has_bounds);
164 : }
165 :
166 : bool
167 10 : describe_state_change (pretty_printer &pp,
168 : const evdesc::state_change &change) override
169 : {
170 10 : if (change.m_new_state == m_sm.m_tainted)
171 : {
172 4 : if (change.m_origin)
173 : {
174 0 : pp_printf (&pp,
175 : "%qE has an unchecked value here (from %qE)",
176 0 : change.m_expr, change.m_origin);
177 0 : return true;
178 : }
179 : else
180 : {
181 4 : pp_printf (&pp,
182 : "%qE gets an unchecked value here",
183 4 : change.m_expr);
184 4 : return true;
185 : }
186 : }
187 6 : else if (change.m_new_state == m_sm.m_has_lb)
188 : {
189 4 : pp_printf (&pp,
190 : "%qE has its lower bound checked here",
191 4 : change.m_expr);
192 4 : return true;
193 : }
194 2 : else if (change.m_new_state == m_sm.m_has_ub)
195 : {
196 2 : pp_printf (&pp,
197 : "%qE has its upper bound checked here",
198 2 : change.m_expr);
199 2 : return true;
200 : }
201 : return false;
202 : }
203 :
204 : diagnostics::paths::event::meaning
205 2 : get_meaning_for_state_change (const evdesc::state_change &change)
206 : const final override
207 : {
208 2 : using event = diagnostics::paths::event;
209 2 : if (change.m_new_state == m_sm.m_tainted)
210 0 : return event::meaning (event::verb::acquire,
211 0 : event::noun::taint);
212 2 : return event::meaning ();
213 : }
214 :
215 : void
216 0 : maybe_add_sarif_properties (diagnostics::sarif_object &result_obj)
217 : const override
218 : {
219 0 : auto &props = result_obj.get_or_create_properties ();
220 : #define PROPERTY_PREFIX "gcc/analyzer/taint_diagnostic/"
221 0 : props.set (PROPERTY_PREFIX "arg", tree_to_json (m_arg));
222 0 : props.set_string (PROPERTY_PREFIX "has_bounds",
223 0 : bounds_to_str (m_has_bounds));
224 : #undef PROPERTY_PREFIX
225 0 : }
226 :
227 : protected:
228 : const taint_state_machine &m_sm;
229 : tree m_arg;
230 : enum bounds m_has_bounds;
231 : };
232 :
233 : /* Concrete taint_diagnostic subclass for reporting attacker-controlled
234 : array index. */
235 :
236 0 : class tainted_array_index : public taint_diagnostic
237 : {
238 : public:
239 120 : tainted_array_index (const taint_state_machine &sm, tree arg,
240 : enum bounds has_bounds)
241 120 : : taint_diagnostic (sm, arg, has_bounds)
242 : {}
243 :
244 1038 : const char *get_kind () const final override { return "tainted_array_index"; }
245 :
246 186 : int get_controlling_option () const final override
247 : {
248 186 : return OPT_Wanalyzer_tainted_array_index;
249 : }
250 :
251 66 : bool emit (diagnostic_emission_context &ctxt) final override
252 : {
253 : /* CWE-129: "Improper Validation of Array Index". */
254 66 : ctxt.add_cwe (129);
255 66 : if (m_arg)
256 66 : switch (m_has_bounds)
257 : {
258 0 : default:
259 0 : gcc_unreachable ();
260 47 : case BOUNDS_NONE:
261 47 : return ctxt.warn ("use of attacker-controlled value %qE"
262 : " in array lookup without bounds checking",
263 47 : m_arg);
264 6 : break;
265 6 : case BOUNDS_UPPER:
266 6 : return ctxt.warn ("use of attacker-controlled value %qE"
267 : " in array lookup without checking for negative",
268 6 : m_arg);
269 13 : break;
270 13 : case BOUNDS_LOWER:
271 13 : return ctxt.warn ("use of attacker-controlled value %qE"
272 : " in array lookup without upper-bounds checking",
273 13 : m_arg);
274 0 : break;
275 : }
276 : else
277 0 : switch (m_has_bounds)
278 : {
279 0 : default:
280 0 : gcc_unreachable ();
281 0 : case BOUNDS_NONE:
282 0 : return ctxt.warn ("use of attacker-controlled value"
283 0 : " in array lookup without bounds checking");
284 0 : break;
285 0 : case BOUNDS_UPPER:
286 0 : return ctxt.warn ("use of attacker-controlled value"
287 : " in array lookup without checking for"
288 0 : " negative");
289 0 : break;
290 0 : case BOUNDS_LOWER:
291 0 : return ctxt.warn ("use of attacker-controlled value"
292 : " in array lookup without upper-bounds"
293 0 : " checking");
294 : break;
295 : }
296 : }
297 :
298 : bool
299 132 : describe_final_event (pretty_printer &pp,
300 : const evdesc::final_event &) final override
301 : {
302 132 : if (m_arg)
303 132 : switch (m_has_bounds)
304 : {
305 0 : default:
306 0 : gcc_unreachable ();
307 94 : case BOUNDS_NONE:
308 94 : {
309 94 : pp_printf (&pp,
310 : "use of attacker-controlled value %qE in array lookup"
311 : " without bounds checking",
312 : m_arg);
313 94 : return true;
314 : }
315 12 : case BOUNDS_UPPER:
316 12 : {
317 12 : pp_printf (&pp,
318 : "use of attacker-controlled value %qE"
319 : " in array lookup without checking for negative",
320 : m_arg);
321 12 : return true;
322 : }
323 26 : case BOUNDS_LOWER:
324 26 : {
325 26 : pp_printf (&pp,
326 : "use of attacker-controlled value %qE"
327 : " in array lookup without upper-bounds checking",
328 : m_arg);
329 26 : return true;
330 : }
331 : }
332 : else
333 0 : switch (m_has_bounds)
334 : {
335 0 : default:
336 0 : gcc_unreachable ();
337 0 : case BOUNDS_NONE:
338 0 : {
339 0 : pp_printf (&pp,
340 : "use of attacker-controlled value in array lookup"
341 : " without bounds checking");
342 0 : return true;
343 : }
344 0 : case BOUNDS_UPPER:
345 0 : {
346 0 : pp_printf (&pp,
347 : "use of attacker-controlled value"
348 : " in array lookup without checking for negative");
349 0 : return true;
350 : }
351 0 : case BOUNDS_LOWER:
352 0 : {
353 0 : pp_printf (&pp,
354 : "use of attacker-controlled value"
355 : " in array lookup without upper-bounds checking");
356 0 : return true;
357 : }
358 : }
359 : }
360 : };
361 :
362 : /* Concrete taint_diagnostic subclass for reporting attacker-controlled
363 : pointer offset. */
364 :
365 0 : class tainted_offset : public taint_diagnostic
366 : {
367 : public:
368 27 : tainted_offset (const taint_state_machine &sm, tree arg,
369 : enum bounds has_bounds,
370 : const svalue *offset)
371 27 : : taint_diagnostic (sm, arg, has_bounds),
372 27 : m_offset (offset)
373 : {}
374 :
375 236 : const char *get_kind () const final override { return "tainted_offset"; }
376 :
377 43 : int get_controlling_option () const final override
378 : {
379 43 : return OPT_Wanalyzer_tainted_offset;
380 : }
381 :
382 16 : bool emit (diagnostic_emission_context &ctxt) final override
383 : {
384 : /* CWE-823: "Use of Out-of-range Pointer Offset". */
385 16 : ctxt.add_cwe (823);
386 16 : if (m_arg)
387 16 : switch (m_has_bounds)
388 : {
389 0 : default:
390 0 : gcc_unreachable ();
391 4 : case BOUNDS_NONE:
392 4 : return ctxt.warn ("use of attacker-controlled value %qE as offset"
393 : " without bounds checking",
394 4 : m_arg);
395 2 : break;
396 2 : case BOUNDS_UPPER:
397 2 : return ctxt.warn ("use of attacker-controlled value %qE as offset"
398 : " without lower-bounds checking",
399 2 : m_arg);
400 10 : break;
401 10 : case BOUNDS_LOWER:
402 10 : return ctxt.warn ("use of attacker-controlled value %qE as offset"
403 : " without upper-bounds checking",
404 10 : m_arg);
405 0 : break;
406 : }
407 : else
408 0 : switch (m_has_bounds)
409 : {
410 0 : default:
411 0 : gcc_unreachable ();
412 0 : case BOUNDS_NONE:
413 0 : return ctxt.warn ("use of attacker-controlled value as offset"
414 0 : " without bounds checking");
415 0 : break;
416 0 : case BOUNDS_UPPER:
417 0 : return ctxt.warn ("use of attacker-controlled value as offset"
418 0 : " without lower-bounds checking");
419 0 : break;
420 0 : case BOUNDS_LOWER:
421 0 : return ctxt.warn ("use of attacker-controlled value as offset"
422 0 : " without upper-bounds checking");
423 : break;
424 : }
425 : }
426 :
427 : bool
428 32 : describe_final_event (pretty_printer &pp,
429 : const evdesc::final_event &) final override
430 : {
431 32 : if (m_arg)
432 32 : switch (m_has_bounds)
433 : {
434 0 : default:
435 0 : gcc_unreachable ();
436 8 : case BOUNDS_NONE:
437 8 : {
438 8 : pp_printf (&pp,
439 : "use of attacker-controlled value %qE"
440 : " as offset without bounds checking",
441 : m_arg);
442 8 : return true;
443 : }
444 4 : case BOUNDS_UPPER:
445 4 : {
446 4 : pp_printf (&pp,
447 : "use of attacker-controlled value %qE"
448 : " as offset without lower-bounds checking",
449 : m_arg);
450 4 : return true;
451 : }
452 20 : case BOUNDS_LOWER:
453 20 : {
454 20 : pp_printf (&pp,
455 : "use of attacker-controlled value %qE"
456 : " as offset without upper-bounds checking",
457 : m_arg);
458 20 : return true;
459 : }
460 : }
461 : else
462 0 : switch (m_has_bounds)
463 : {
464 0 : default:
465 0 : gcc_unreachable ();
466 0 : case BOUNDS_NONE:
467 0 : {
468 0 : pp_printf (&pp,
469 : "use of attacker-controlled value"
470 : " as offset without bounds checking");
471 0 : return true;
472 : }
473 0 : case BOUNDS_UPPER:
474 0 : {
475 0 : pp_printf (&pp,
476 : "use of attacker-controlled value"
477 : " as offset without lower-bounds"
478 : " checking");
479 0 : return true;
480 : }
481 0 : case BOUNDS_LOWER:
482 0 : {
483 0 : pp_printf (&pp,
484 : "use of attacker-controlled value"
485 : " as offset without upper-bounds"
486 : " checking");
487 0 : return true;
488 : }
489 : }
490 : }
491 :
492 : void
493 0 : maybe_add_sarif_properties (diagnostics::sarif_object &result_obj)
494 : const final override
495 : {
496 0 : taint_diagnostic::maybe_add_sarif_properties (result_obj);
497 0 : auto &props = result_obj.get_or_create_properties ();
498 : #define PROPERTY_PREFIX "gcc/analyzer/tainted_offset/"
499 0 : props.set (PROPERTY_PREFIX "offset", m_offset->to_json ());
500 : #undef PROPERTY_PREFIX
501 0 : }
502 :
503 : private:
504 : const svalue *m_offset;
505 : };
506 :
507 : /* Concrete taint_diagnostic subclass for reporting attacker-controlled
508 : size. */
509 :
510 0 : class tainted_size : public taint_diagnostic
511 : {
512 : public:
513 6 : tainted_size (const taint_state_machine &sm, tree arg,
514 : enum bounds has_bounds)
515 6 : : taint_diagnostic (sm, arg, has_bounds)
516 : {}
517 :
518 17 : const char *get_kind () const override { return "tainted_size"; }
519 :
520 10 : int get_controlling_option () const final override
521 : {
522 10 : return OPT_Wanalyzer_tainted_size;
523 : }
524 :
525 4 : bool emit (diagnostic_emission_context &ctxt) override
526 : {
527 : /* "CWE-129: Improper Validation of Array Index". */
528 4 : ctxt.add_cwe (129);
529 4 : if (m_arg)
530 4 : switch (m_has_bounds)
531 : {
532 0 : default:
533 0 : gcc_unreachable ();
534 0 : case BOUNDS_NONE:
535 0 : return ctxt.warn ("use of attacker-controlled value %qE as size"
536 : " without bounds checking",
537 0 : m_arg);
538 0 : break;
539 0 : case BOUNDS_UPPER:
540 0 : return ctxt.warn ("use of attacker-controlled value %qE as size"
541 : " without lower-bounds checking",
542 0 : m_arg);
543 4 : break;
544 4 : case BOUNDS_LOWER:
545 4 : return ctxt.warn ("use of attacker-controlled value %qE as size"
546 : " without upper-bounds checking",
547 4 : m_arg);
548 0 : break;
549 : }
550 : else
551 0 : switch (m_has_bounds)
552 : {
553 0 : default:
554 0 : gcc_unreachable ();
555 0 : case BOUNDS_NONE:
556 0 : return ctxt.warn ("use of attacker-controlled value as size"
557 0 : " without bounds checking");
558 0 : break;
559 0 : case BOUNDS_UPPER:
560 0 : return ctxt.warn ("use of attacker-controlled value as size"
561 0 : " without lower-bounds checking");
562 0 : break;
563 0 : case BOUNDS_LOWER:
564 0 : return ctxt.warn ("use of attacker-controlled value as size"
565 0 : " without upper-bounds checking");
566 : break;
567 : }
568 : }
569 :
570 : bool
571 8 : describe_final_event (pretty_printer &pp,
572 : const evdesc::final_event &) final override
573 : {
574 8 : if (m_arg)
575 8 : switch (m_has_bounds)
576 : {
577 0 : default:
578 0 : gcc_unreachable ();
579 0 : case BOUNDS_NONE:
580 0 : pp_printf (&pp,
581 : "use of attacker-controlled value %qE"
582 : " as size without bounds checking",
583 : m_arg);
584 0 : return true;
585 0 : case BOUNDS_UPPER:
586 0 : pp_printf (&pp,
587 : "use of attacker-controlled value %qE"
588 : " as size without lower-bounds checking",
589 : m_arg);
590 0 : return true;
591 8 : case BOUNDS_LOWER:
592 8 : pp_printf (&pp,
593 : "use of attacker-controlled value %qE"
594 : " as size without upper-bounds checking",
595 : m_arg);
596 8 : return true;
597 : }
598 : else
599 0 : switch (m_has_bounds)
600 : {
601 0 : default:
602 0 : gcc_unreachable ();
603 0 : case BOUNDS_NONE:
604 0 : pp_printf (&pp,
605 : "use of attacker-controlled value"
606 : " as size without bounds checking");
607 0 : return true;
608 0 : case BOUNDS_UPPER:
609 0 : pp_printf (&pp,
610 : "use of attacker-controlled value"
611 : " as size without lower-bounds checking");
612 0 : return true;
613 0 : case BOUNDS_LOWER:
614 0 : pp_printf (&pp,
615 : "use of attacker-controlled value"
616 : " as size without upper-bounds checking");
617 0 : return true;
618 : }
619 : }
620 : };
621 :
622 : /* Subclass of tainted_size for reporting on tainted size values
623 : passed to an external function annotated with attribute "access". */
624 :
625 0 : class tainted_access_attrib_size : public tainted_size
626 : {
627 : public:
628 3 : tainted_access_attrib_size (const taint_state_machine &sm, tree arg,
629 : enum bounds has_bounds, tree callee_fndecl,
630 : unsigned size_argno, const char *access_str)
631 3 : : tainted_size (sm, arg, has_bounds),
632 3 : m_callee_fndecl (callee_fndecl),
633 3 : m_size_argno (size_argno), m_access_str (access_str)
634 : {
635 : }
636 :
637 14 : const char *get_kind () const override
638 : {
639 14 : return "tainted_access_attrib_size";
640 : }
641 :
642 2 : bool emit (diagnostic_emission_context &ctxt) final override
643 : {
644 2 : bool warned = tainted_size::emit (ctxt);
645 2 : if (warned)
646 : {
647 2 : auto_urlify_attributes sentinel;
648 2 : inform (DECL_SOURCE_LOCATION (m_callee_fndecl),
649 : "parameter %i of %qD marked as a size via attribute %qs",
650 2 : m_size_argno + 1, m_callee_fndecl, m_access_str);
651 2 : }
652 2 : return warned;
653 : }
654 :
655 : private:
656 : tree m_callee_fndecl;
657 : unsigned m_size_argno;
658 : const char *m_access_str;
659 : };
660 :
661 : /* Concrete taint_diagnostic subclass for reporting attacker-controlled
662 : divisor (so that an attacker can trigger a divide by zero). */
663 :
664 0 : class tainted_divisor : public taint_diagnostic
665 : {
666 : public:
667 10 : tainted_divisor (const taint_state_machine &sm, tree arg,
668 : enum bounds has_bounds)
669 10 : : taint_diagnostic (sm, arg, has_bounds)
670 : {}
671 :
672 122 : const char *get_kind () const final override { return "tainted_divisor"; }
673 :
674 20 : int get_controlling_option () const final override
675 : {
676 20 : return OPT_Wanalyzer_tainted_divisor;
677 : }
678 :
679 10 : bool emit (diagnostic_emission_context &ctxt) final override
680 : {
681 : /* CWE-369: "Divide By Zero". */
682 10 : ctxt.add_cwe (369);
683 10 : if (m_arg)
684 10 : return ctxt.warn ("use of attacker-controlled value %qE as divisor"
685 : " without checking for zero",
686 10 : m_arg);
687 : else
688 0 : return ctxt.warn ("use of attacker-controlled value as divisor"
689 0 : " without checking for zero");
690 : }
691 :
692 : bool
693 20 : describe_final_event (pretty_printer &pp,
694 : const evdesc::final_event &) final override
695 : {
696 20 : if (m_arg)
697 20 : pp_printf (&pp,
698 : "use of attacker-controlled value %qE as divisor"
699 : " without checking for zero",
700 : m_arg);
701 : else
702 0 : pp_printf (&pp,
703 : "use of attacker-controlled value as divisor"
704 : " without checking for zero");
705 20 : return true;
706 : }
707 : };
708 :
709 : /* Concrete taint_diagnostic subclass for reporting attacker-controlled
710 : size of a dynamic allocation. */
711 :
712 0 : class tainted_allocation_size : public taint_diagnostic
713 : {
714 : public:
715 20 : tainted_allocation_size (const taint_state_machine &sm, tree arg,
716 : const svalue *size_in_bytes,
717 : enum bounds has_bounds, enum memory_space mem_space)
718 20 : : taint_diagnostic (sm, arg, has_bounds),
719 20 : m_size_in_bytes (size_in_bytes),
720 20 : m_mem_space (mem_space)
721 : {
722 : }
723 :
724 216 : const char *get_kind () const final override
725 : {
726 216 : return "tainted_allocation_size";
727 : }
728 :
729 32 : bool subclass_equal_p (const pending_diagnostic &base_other) const override
730 : {
731 32 : if (!taint_diagnostic::subclass_equal_p (base_other))
732 : return false;
733 32 : const tainted_allocation_size &other
734 : = (const tainted_allocation_size &)base_other;
735 32 : return m_mem_space == other.m_mem_space;
736 : }
737 :
738 40 : int get_controlling_option () const final override
739 : {
740 40 : return OPT_Wanalyzer_tainted_allocation_size;
741 : }
742 :
743 20 : bool emit (diagnostic_emission_context &ctxt) final override
744 : {
745 : /* "CWE-789: Memory Allocation with Excessive Size Value". */
746 20 : ctxt.add_cwe (789);
747 :
748 20 : bool warned;
749 20 : if (m_arg)
750 20 : switch (m_has_bounds)
751 : {
752 0 : default:
753 0 : gcc_unreachable ();
754 0 : case BOUNDS_NONE:
755 0 : warned = ctxt.warn ("use of attacker-controlled value %qE as"
756 : " allocation size without bounds checking",
757 : m_arg);
758 0 : break;
759 0 : case BOUNDS_UPPER:
760 0 : warned = ctxt.warn ("use of attacker-controlled value %qE as"
761 : " allocation size without"
762 : " lower-bounds checking",
763 : m_arg);
764 0 : break;
765 20 : case BOUNDS_LOWER:
766 20 : warned = ctxt.warn ("use of attacker-controlled value %qE as"
767 : " allocation size without"
768 : " upper-bounds checking",
769 : m_arg);
770 20 : break;
771 : }
772 : else
773 0 : switch (m_has_bounds)
774 : {
775 0 : default:
776 0 : gcc_unreachable ();
777 0 : case BOUNDS_NONE:
778 0 : warned = ctxt.warn ("use of attacker-controlled value as"
779 : " allocation size without bounds"
780 : " checking");
781 0 : break;
782 0 : case BOUNDS_UPPER:
783 0 : warned = ctxt.warn ("use of attacker-controlled value as"
784 : " allocation size without"
785 : " lower-bounds checking");
786 0 : break;
787 0 : case BOUNDS_LOWER:
788 0 : warned = ctxt.warn ("use of attacker-controlled value as"
789 : " allocation size without"
790 : " upper-bounds checking");
791 0 : break;
792 : }
793 20 : if (warned)
794 : {
795 20 : const location_t loc = ctxt.get_location ();
796 20 : switch (m_mem_space)
797 : {
798 : default:
799 : break;
800 1 : case MEMSPACE_STACK:
801 1 : inform (loc, "stack-based allocation");
802 1 : break;
803 13 : case MEMSPACE_HEAP:
804 13 : inform (loc, "heap-based allocation");
805 13 : break;
806 : }
807 : }
808 20 : return warned;
809 : }
810 :
811 : bool
812 40 : describe_final_event (pretty_printer &pp,
813 : const evdesc::final_event &) final override
814 : {
815 40 : if (m_arg)
816 40 : switch (m_has_bounds)
817 : {
818 0 : default:
819 0 : gcc_unreachable ();
820 0 : case BOUNDS_NONE:
821 0 : pp_printf (&pp,
822 : "use of attacker-controlled value %qE as allocation size"
823 : " without bounds checking",
824 : m_arg);
825 0 : return true;
826 0 : case BOUNDS_UPPER:
827 0 : pp_printf (&pp,
828 : "use of attacker-controlled value %qE as allocation size"
829 : " without lower-bounds checking",
830 : m_arg);
831 0 : return true;
832 40 : case BOUNDS_LOWER:
833 40 : pp_printf (&pp,
834 : "use of attacker-controlled value %qE as allocation size"
835 : " without upper-bounds checking",
836 : m_arg);
837 40 : return true;
838 : }
839 : else
840 0 : switch (m_has_bounds)
841 : {
842 0 : default:
843 0 : gcc_unreachable ();
844 0 : case BOUNDS_NONE:
845 0 : pp_printf (&pp,
846 : "use of attacker-controlled value as allocation size"
847 : " without bounds checking");
848 0 : return true;
849 0 : case BOUNDS_UPPER:
850 0 : pp_printf (&pp,
851 : "use of attacker-controlled value as allocation size"
852 : " without lower-bounds checking");
853 0 : return true;
854 0 : case BOUNDS_LOWER:
855 0 : pp_printf (&pp,
856 : "use of attacker-controlled value as allocation size"
857 : " without upper-bounds checking");
858 0 : return true;
859 : }
860 : }
861 :
862 : void
863 0 : maybe_add_sarif_properties (diagnostics::sarif_object &result_obj)
864 : const final override
865 : {
866 0 : taint_diagnostic::maybe_add_sarif_properties (result_obj);
867 0 : auto &props = result_obj.get_or_create_properties ();
868 : #define PROPERTY_PREFIX "gcc/analyzer/tainted_allocation_size/"
869 0 : props.set (PROPERTY_PREFIX "size_in_bytes", m_size_in_bytes->to_json ());
870 : #undef PROPERTY_PREFIX
871 0 : }
872 :
873 : private:
874 : const svalue *m_size_in_bytes;
875 : enum memory_space m_mem_space;
876 : };
877 :
878 : /* Concrete taint_diagnostic subclass for reporting attacker-controlled
879 : value being used as part of the condition of an assertion. */
880 :
881 0 : class tainted_assertion : public taint_diagnostic
882 : {
883 : public:
884 35 : tainted_assertion (const taint_state_machine &sm, tree arg,
885 : tree assert_failure_fndecl)
886 35 : : taint_diagnostic (sm, arg, BOUNDS_NONE),
887 35 : m_assert_failure_fndecl (assert_failure_fndecl)
888 : {
889 35 : gcc_assert (m_assert_failure_fndecl);
890 35 : }
891 :
892 618 : const char *get_kind () const final override
893 : {
894 618 : return "tainted_assertion";
895 : }
896 :
897 35 : bool subclass_equal_p (const pending_diagnostic &base_other) const override
898 : {
899 35 : if (!taint_diagnostic::subclass_equal_p (base_other))
900 : return false;
901 35 : const tainted_assertion &other
902 : = (const tainted_assertion &)base_other;
903 35 : return m_assert_failure_fndecl == other.m_assert_failure_fndecl;
904 : }
905 :
906 69 : int get_controlling_option () const final override
907 : {
908 69 : return OPT_Wanalyzer_tainted_assertion;
909 : }
910 :
911 34 : bool emit (diagnostic_emission_context &ctxt) final override
912 : {
913 : /* "CWE-617: Reachable Assertion". */
914 34 : ctxt.add_cwe (617);
915 :
916 34 : return ctxt.warn ("use of attacked-controlled value in"
917 34 : " condition for assertion");
918 : }
919 :
920 275 : location_t fixup_location (location_t loc,
921 : bool primary) const final override
922 : {
923 275 : if (primary)
924 : /* For the primary location we want to avoid being in e.g. the
925 : <assert.h> system header, since this would suppress the
926 : diagnostic. */
927 69 : return expansion_point_location_if_in_system_header (loc);
928 206 : else if (in_system_header_at (loc))
929 : /* For events, we want to show the implemenation of the assert
930 : macro when we're describing them. */
931 4 : return linemap_resolve_location (line_table, loc,
932 : LRK_SPELLING_LOCATION,
933 4 : nullptr);
934 : else
935 202 : return pending_diagnostic::fixup_location (loc, primary);
936 : }
937 :
938 : bool
939 68 : describe_state_change (pretty_printer &pp,
940 : const evdesc::state_change &change) override
941 : {
942 68 : if (change.m_new_state == m_sm.m_tainted_control_flow)
943 : {
944 68 : pp_printf (&pp,
945 : "use of attacker-controlled value for control flow");
946 68 : return true;
947 : }
948 0 : return taint_diagnostic::describe_state_change (pp, change);
949 : }
950 :
951 : bool
952 68 : describe_final_event (pretty_printer &pp,
953 : const evdesc::final_event &) final override
954 : {
955 68 : if (mention_noreturn_attribute_p ())
956 58 : pp_printf (&pp,
957 : "treating %qE as an assertion failure handler"
958 : " due to %<__attribute__((__noreturn__))%>",
959 : m_assert_failure_fndecl);
960 : else
961 10 : pp_printf (&pp,
962 : "treating %qE as an assertion failure handler",
963 : m_assert_failure_fndecl);
964 68 : return true;
965 : }
966 :
967 : private:
968 68 : bool mention_noreturn_attribute_p () const
969 : {
970 68 : if (fndecl_built_in_p (m_assert_failure_fndecl, BUILT_IN_UNREACHABLE))
971 10 : return false;
972 : return true;
973 : }
974 :
975 : tree m_assert_failure_fndecl;
976 : };
977 :
978 : /* taint_state_machine's ctor. */
979 :
980 3377 : taint_state_machine::taint_state_machine (logger *logger)
981 : : state_machine ("taint", logger),
982 6754 : m_tainted (add_state ("tainted")),
983 3377 : m_has_lb (add_state ("has_lb")),
984 3377 : m_has_ub (add_state ("has_ub")),
985 3377 : m_stop (add_state ("stop")),
986 6754 : m_tainted_control_flow (add_state ("tainted-control-flow"))
987 : {
988 3377 : }
989 :
990 : state_machine::state_t
991 2391997 : taint_state_machine::alt_get_inherited_state (const sm_state_map &map,
992 : const svalue *sval,
993 : const extrinsic_state &ext_state)
994 : const
995 : {
996 2391997 : switch (sval->get_kind ())
997 : {
998 : default:
999 : break;
1000 89240 : case SK_UNARYOP:
1001 89240 : {
1002 89240 : const unaryop_svalue *unaryop_sval
1003 89240 : = as_a <const unaryop_svalue *> (sval);
1004 89240 : enum tree_code op = unaryop_sval->get_op ();
1005 89240 : const svalue *arg = unaryop_sval->get_arg ();
1006 89240 : switch (op)
1007 : {
1008 85282 : case NOP_EXPR:
1009 85282 : {
1010 85282 : state_t arg_state = map.get_state (arg, ext_state);
1011 85282 : return arg_state;
1012 : }
1013 : default:
1014 : break;
1015 : }
1016 : }
1017 : break;
1018 167761 : case SK_BINOP:
1019 167761 : {
1020 167761 : const binop_svalue *binop_sval = as_a <const binop_svalue *> (sval);
1021 167761 : enum tree_code op = binop_sval->get_op ();
1022 167761 : const svalue *arg0 = binop_sval->get_arg0 ();
1023 167761 : const svalue *arg1 = binop_sval->get_arg1 ();
1024 167761 : switch (op)
1025 : {
1026 : default:
1027 : break;
1028 :
1029 130973 : case EQ_EXPR:
1030 130973 : case GE_EXPR:
1031 130973 : case LE_EXPR:
1032 130973 : case NE_EXPR:
1033 130973 : case GT_EXPR:
1034 130973 : case LT_EXPR:
1035 130973 : case UNORDERED_EXPR:
1036 130973 : case ORDERED_EXPR:
1037 130973 : case PLUS_EXPR:
1038 130973 : case MINUS_EXPR:
1039 130973 : case MULT_EXPR:
1040 130973 : case POINTER_PLUS_EXPR:
1041 130973 : case TRUNC_DIV_EXPR:
1042 130973 : {
1043 130973 : state_t arg0_state = map.get_state (arg0, ext_state);
1044 130973 : state_t arg1_state = map.get_state (arg1, ext_state);
1045 130973 : return combine_states (arg0_state, arg1_state);
1046 : }
1047 730 : break;
1048 :
1049 730 : case TRUNC_MOD_EXPR:
1050 730 : {
1051 : /* The left-hand side of X % Y can be sanitized by
1052 : the operation. */
1053 730 : return map.get_state (arg1, ext_state);
1054 : }
1055 : break;
1056 :
1057 : case BIT_AND_EXPR:
1058 : case RSHIFT_EXPR:
1059 : return nullptr;
1060 : }
1061 : }
1062 : break;
1063 : }
1064 : return nullptr;
1065 : }
1066 :
1067 : /* Return true iff FNDECL should be considered to be an assertion failure
1068 : handler by -Wanalyzer-tainted-assertion. */
1069 :
1070 : static bool
1071 45369 : is_assertion_failure_handler_p (tree fndecl)
1072 : {
1073 : // i.e. "noreturn"
1074 0 : if (TREE_THIS_VOLATILE (fndecl))
1075 0 : return true;
1076 :
1077 : return false;
1078 : }
1079 :
1080 : /* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
1081 :
1082 : bool
1083 263888 : taint_state_machine::on_stmt (sm_context &sm_ctxt,
1084 : const gimple *stmt) const
1085 : {
1086 263888 : if (const gcall *call = dyn_cast <const gcall *> (stmt))
1087 49635 : if (tree callee_fndecl = sm_ctxt.get_fndecl_for_call (*call))
1088 : {
1089 45837 : if (is_named_call_p (callee_fndecl, "fread", *call, 4))
1090 : {
1091 468 : tree arg = gimple_call_arg (call, 0);
1092 :
1093 468 : sm_ctxt.on_transition (arg, m_start, m_tainted);
1094 :
1095 : /* Dereference an ADDR_EXPR. */
1096 : // TODO: should the engine do this?
1097 468 : if (TREE_CODE (arg) == ADDR_EXPR)
1098 318 : sm_ctxt.on_transition (TREE_OPERAND (arg, 0),
1099 318 : m_start, m_tainted);
1100 468 : return true;
1101 : }
1102 :
1103 : /* External function with "access" attribute. */
1104 45369 : if (sm_ctxt.unknown_side_effects_p ())
1105 11575 : check_for_tainted_size_arg (sm_ctxt, *call, callee_fndecl);
1106 :
1107 50043 : if (is_assertion_failure_handler_p (callee_fndecl)
1108 876 : && sm_ctxt.get_global_state () == m_tainted_control_flow)
1109 : {
1110 35 : sm_ctxt.warn (NULL_TREE,
1111 35 : std::make_unique<tainted_assertion> (*this, NULL_TREE,
1112 : callee_fndecl));
1113 : }
1114 : }
1115 : // TODO: ...etc; many other sources of untrusted data
1116 :
1117 263420 : if (const gassign *assign = dyn_cast <const gassign *> (stmt))
1118 : {
1119 166167 : enum tree_code op = gimple_assign_rhs_code (assign);
1120 :
1121 166167 : switch (op)
1122 : {
1123 : default:
1124 : break;
1125 449 : case TRUNC_DIV_EXPR:
1126 449 : case CEIL_DIV_EXPR:
1127 449 : case FLOOR_DIV_EXPR:
1128 449 : case ROUND_DIV_EXPR:
1129 449 : case TRUNC_MOD_EXPR:
1130 449 : case CEIL_MOD_EXPR:
1131 449 : case FLOOR_MOD_EXPR:
1132 449 : case ROUND_MOD_EXPR:
1133 449 : case RDIV_EXPR:
1134 449 : case EXACT_DIV_EXPR:
1135 449 : check_for_tainted_divisor (sm_ctxt, assign);
1136 449 : break;
1137 : }
1138 : }
1139 :
1140 263420 : if (const gcond *cond = dyn_cast <const gcond *> (stmt))
1141 : {
1142 : /* Reset the state of "tainted-control-flow" before each
1143 : control flow statement, so that only the last one before
1144 : an assertion-failure-handler counts. */
1145 38673 : sm_ctxt.set_global_state (m_start);
1146 38673 : check_control_flow_arg_for_taint (sm_ctxt, gimple_cond_lhs (cond));
1147 38673 : check_control_flow_arg_for_taint (sm_ctxt, gimple_cond_rhs (cond));
1148 : }
1149 :
1150 263420 : if (const gswitch *switch_ = dyn_cast <const gswitch *> (stmt))
1151 : {
1152 : /* Reset the state of "tainted-control-flow" before each
1153 : control flow statement, so that only the last one before
1154 : an assertion-failure-handler counts. */
1155 6105 : sm_ctxt.set_global_state (m_start);
1156 6105 : check_control_flow_arg_for_taint (sm_ctxt,
1157 : gimple_switch_index (switch_));
1158 : }
1159 :
1160 : return false;
1161 : }
1162 :
1163 : /* If EXPR is tainted, mark this execution path with the
1164 : "tainted-control-flow" global state, in case we're about
1165 : to call an assertion-failure-handler. */
1166 :
1167 : void
1168 83451 : taint_state_machine::check_control_flow_arg_for_taint (sm_context &sm_ctxt,
1169 : tree expr) const
1170 : {
1171 83451 : const region_model *old_model = sm_ctxt.get_old_region_model ();
1172 83451 : const svalue *sval = old_model->get_rvalue (expr, nullptr);
1173 83451 : state_t state = sm_ctxt.get_state (sval);
1174 83451 : enum bounds b;
1175 83451 : if (get_taint (state, TREE_TYPE (expr), &b))
1176 558 : sm_ctxt.set_global_state (m_tainted_control_flow);
1177 83451 : }
1178 :
1179 : /* Implementation of state_machine::on_condition vfunc for taint_state_machine.
1180 : Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1181 : and states 'has_ub' and 'has_lb' to 'stop'. */
1182 :
1183 : void
1184 34595 : taint_state_machine::on_condition (sm_context &sm_ctxt,
1185 : const svalue *lhs,
1186 : enum tree_code op,
1187 : const svalue *rhs) const
1188 : {
1189 34595 : if (lhs->get_kind () == SK_UNKNOWN
1190 34595 : || rhs->get_kind () == SK_UNKNOWN)
1191 : {
1192 : /* If we have a comparison against UNKNOWN, then
1193 : we've presumably hit the svalue complexity limit,
1194 : and we don't know what is being sanitized.
1195 : Give up on any taint already found on this execution path. */
1196 : // TODO: warn about this
1197 9681 : if (get_logger ())
1198 16 : get_logger ()->log ("comparison against UNKNOWN; removing all taint");
1199 9681 : sm_ctxt.clear_all_per_svalue_state ();
1200 9681 : return;
1201 : }
1202 :
1203 : /* Strip away casts before considering LHS and RHS, to increase the
1204 : chance of detecting places where sanitization of a value may have
1205 : happened. */
1206 24914 : if (const svalue *inner = lhs->maybe_undo_cast ())
1207 1281 : lhs = inner;
1208 24914 : if (const svalue *inner = rhs->maybe_undo_cast ())
1209 1396 : rhs = inner;
1210 :
1211 : // TODO
1212 24914 : switch (op)
1213 : {
1214 : //case NE_EXPR:
1215 : //case EQ_EXPR:
1216 3782 : case GE_EXPR:
1217 3782 : case GT_EXPR:
1218 3782 : {
1219 : /* (LHS >= RHS) or (LHS > RHS)
1220 : LHS gains a lower bound
1221 : RHS gains an upper bound. */
1222 3782 : sm_ctxt.on_transition (lhs, m_tainted, m_has_lb);
1223 3782 : sm_ctxt.on_transition (lhs, m_has_ub, m_stop);
1224 3782 : sm_ctxt.on_transition (rhs, m_tainted, m_has_ub);
1225 3782 : sm_ctxt.on_transition (rhs, m_has_lb, m_stop);
1226 : }
1227 3782 : break;
1228 3219 : case LE_EXPR:
1229 3219 : case LT_EXPR:
1230 3219 : {
1231 : /* Detect where build_range_check has optimized
1232 : (c>=low) && (c<=high)
1233 : into
1234 : (c-low>=0) && (c-low<=high-low)
1235 : and thus into:
1236 : (unsigned)(c - low) <= (unsigned)(high-low). */
1237 6438 : if (const binop_svalue *binop_sval
1238 3219 : = lhs->dyn_cast_binop_svalue ())
1239 : {
1240 687 : const svalue *inner_lhs = binop_sval->get_arg0 ();
1241 687 : enum tree_code inner_op = binop_sval->get_op ();
1242 687 : const svalue *inner_rhs = binop_sval->get_arg1 ();
1243 687 : if (const svalue *before_cast = inner_lhs->maybe_undo_cast ())
1244 150 : inner_lhs = before_cast;
1245 687 : if (tree outer_rhs_cst = rhs->maybe_get_constant ())
1246 563 : if (tree inner_rhs_cst = inner_rhs->maybe_get_constant ())
1247 359 : if (inner_op == PLUS_EXPR
1248 346 : && TREE_CODE (inner_rhs_cst) == INTEGER_CST
1249 346 : && TREE_CODE (outer_rhs_cst) == INTEGER_CST
1250 346 : && TYPE_UNSIGNED (TREE_TYPE (inner_rhs_cst))
1251 449 : && TYPE_UNSIGNED (TREE_TYPE (outer_rhs_cst)))
1252 : {
1253 : /* We have
1254 : (unsigned)(INNER_LHS + CST_A) </<= UNSIGNED_CST_B
1255 : and thus an optimized test of INNER_LHS (before any
1256 : cast to unsigned) against a range.
1257 : Transition any of the tainted states to the stop state.
1258 : We have to special-case this here rather than in
1259 : region_model::on_condition since we can't apply
1260 : both conditions simultaneously (we'd have a transition
1261 : from the old state to has_lb, then a transition from
1262 : the old state *again* to has_ub). */
1263 89 : state_t old_state = sm_ctxt.get_state (inner_lhs);
1264 89 : if (old_state == m_tainted
1265 72 : || old_state == m_has_lb
1266 72 : || old_state == m_has_ub)
1267 17 : sm_ctxt.set_next_state (inner_lhs, m_stop);
1268 89 : return;
1269 : }
1270 : }
1271 :
1272 : /* (LHS <= RHS) or (LHS < RHS)
1273 : LHS gains an upper bound
1274 : RHS gains a lower bound. */
1275 3130 : sm_ctxt.on_transition (lhs, m_tainted, m_has_ub);
1276 3130 : sm_ctxt.on_transition (lhs, m_has_lb, m_stop);
1277 3130 : sm_ctxt.on_transition (rhs, m_tainted, m_has_lb);
1278 3130 : sm_ctxt.on_transition (rhs, m_has_ub, m_stop);
1279 : }
1280 3130 : break;
1281 : default:
1282 : break;
1283 : }
1284 : }
1285 :
1286 : /* Implementation of state_machine::on_bounded_ranges vfunc for
1287 : taint_state_machine, for handling switch statement cases.
1288 : Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
1289 : and states 'has_ub' and 'has_lb' to 'stop'. */
1290 :
1291 : void
1292 6099 : taint_state_machine::on_bounded_ranges (sm_context &sm_ctxt,
1293 : const svalue &sval,
1294 : const bounded_ranges &ranges) const
1295 : {
1296 6099 : gcc_assert (!ranges.empty_p ());
1297 6099 : gcc_assert (ranges.get_count () > 0);
1298 :
1299 : /* We have one or more ranges; this could be a "default:", or one or
1300 : more single or range cases.
1301 :
1302 : Look at the overall endpoints to see if the ranges impose any lower
1303 : bounds or upper bounds beyond those of the underlying numeric type. */
1304 :
1305 6099 : tree lowest_bound = ranges.get_range (0).m_lower;
1306 6099 : tree highest_bound = ranges.get_range (ranges.get_count () - 1).m_upper;
1307 6099 : gcc_assert (lowest_bound);
1308 6099 : gcc_assert (highest_bound);
1309 :
1310 6099 : bool ranges_have_lb
1311 6099 : = (lowest_bound != TYPE_MIN_VALUE (TREE_TYPE (lowest_bound)));
1312 6099 : bool ranges_have_ub
1313 6099 : = (highest_bound != TYPE_MAX_VALUE (TREE_TYPE (highest_bound)));
1314 :
1315 6099 : if (!ranges_have_lb && !ranges_have_ub)
1316 : return;
1317 :
1318 : /* We have new bounds from the ranges; combine them with any
1319 : existing bounds on SVAL. */
1320 5478 : state_t old_state = sm_ctxt.get_state (&sval);
1321 5478 : if (old_state == m_tainted)
1322 : {
1323 143 : if (ranges_have_lb && ranges_have_ub)
1324 139 : sm_ctxt.set_next_state (&sval, m_stop);
1325 4 : else if (ranges_have_lb)
1326 4 : sm_ctxt.set_next_state (&sval, m_has_lb);
1327 0 : else if (ranges_have_ub)
1328 0 : sm_ctxt.set_next_state (&sval, m_has_ub);
1329 : }
1330 5335 : else if (old_state == m_has_ub && ranges_have_lb)
1331 0 : sm_ctxt.set_next_state (&sval, m_stop);
1332 5335 : else if (old_state == m_has_lb && ranges_have_ub)
1333 0 : sm_ctxt.set_next_state (&sval, m_stop);
1334 : }
1335 :
1336 : bool
1337 1556982 : taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
1338 : {
1339 1556982 : if (s == m_has_lb || s == m_has_ub)
1340 656 : return false;
1341 :
1342 : return true;
1343 : }
1344 :
1345 : /* If STATE is a tainted state, write the bounds to *OUT and return true.
1346 : Otherwise return false.
1347 : Use the signedness of TYPE to determine if "has_ub" is tainted. */
1348 :
1349 : bool
1350 125449 : taint_state_machine::get_taint (state_t state, tree type,
1351 : enum bounds *out) const
1352 : {
1353 : /* Unsigned types have an implicit lower bound. */
1354 125449 : bool is_unsigned = false;
1355 125449 : if (type)
1356 125449 : if (INTEGRAL_TYPE_P (type))
1357 103983 : is_unsigned = TYPE_UNSIGNED (type);
1358 :
1359 : /* Can't use a switch as the states are non-const. */
1360 125449 : if (state == m_tainted)
1361 : {
1362 690 : *out = is_unsigned ? BOUNDS_LOWER : BOUNDS_NONE;
1363 690 : return true;
1364 : }
1365 124759 : else if (state == m_has_lb)
1366 : {
1367 62 : *out = BOUNDS_LOWER;
1368 62 : return true;
1369 : }
1370 124697 : else if (state == m_has_ub && !is_unsigned)
1371 : {
1372 : /* Missing lower bound. */
1373 12 : *out = BOUNDS_UPPER;
1374 12 : return true;
1375 : }
1376 : return false;
1377 : }
1378 :
1379 : /* Find the most tainted state of S0 and S1. */
1380 :
1381 : state_machine::state_t
1382 130973 : taint_state_machine::combine_states (state_t s0, state_t s1) const
1383 : {
1384 130973 : gcc_assert (s0);
1385 130973 : gcc_assert (s1);
1386 130973 : if (s0 == s1)
1387 : return s0;
1388 528 : if (s0 == m_tainted || s1 == m_tainted)
1389 : return m_tainted;
1390 216 : if (s0 == m_start)
1391 : return s1;
1392 197 : if (s1 == m_start)
1393 : return s0;
1394 48 : if (s0 == m_stop)
1395 : return s1;
1396 32 : if (s1 == m_stop)
1397 : return s0;
1398 : /* The only remaining combinations are one of has_ub and has_lb
1399 : (in either order). */
1400 16 : gcc_assert ((s0 == m_has_lb && s1 == m_has_ub)
1401 : || (s0 == m_has_ub && s1 == m_has_lb));
1402 : return m_tainted;
1403 : }
1404 :
1405 : /* Check for calls to external functions marked with
1406 : __attribute__((access)) with a size-index: complain about
1407 : tainted values passed as a size to such a function. */
1408 :
1409 : void
1410 11575 : taint_state_machine::check_for_tainted_size_arg (sm_context &sm_ctxt,
1411 : const gcall &call,
1412 : tree callee_fndecl) const
1413 : {
1414 11575 : tree fntype = TREE_TYPE (callee_fndecl);
1415 11575 : if (!fntype)
1416 10430 : return;
1417 :
1418 11575 : if (!TYPE_ATTRIBUTES (fntype))
1419 : return;
1420 :
1421 : /* Initialize a map of attribute access specifications for arguments
1422 : to the function call. */
1423 1145 : rdwr_map rdwr_idx;
1424 1145 : init_attr_rdwr_indices (&rdwr_idx, TYPE_ATTRIBUTES (fntype));
1425 :
1426 1145 : unsigned argno = 0;
1427 :
1428 4209 : for (tree iter = TYPE_ARG_TYPES (fntype); iter;
1429 3064 : iter = TREE_CHAIN (iter), ++argno)
1430 : {
1431 3064 : const attr_access* access = rdwr_idx.get (argno);
1432 3064 : if (!access)
1433 2948 : continue;
1434 :
1435 : /* Ignore any duplicate entry in the map for the size argument. */
1436 278 : if (access->ptrarg != argno)
1437 114 : continue;
1438 :
1439 164 : if (access->sizarg == UINT_MAX)
1440 48 : continue;
1441 :
1442 116 : tree size_arg = gimple_call_arg (&call, access->sizarg);
1443 :
1444 116 : state_t state = sm_ctxt.get_state (size_arg);
1445 116 : enum bounds b;
1446 116 : if (get_taint (state, TREE_TYPE (size_arg), &b))
1447 : {
1448 3 : const char* const access_str =
1449 3 : TREE_STRING_POINTER (access->to_external_string ());
1450 3 : tree diag_size = sm_ctxt.get_diagnostic_tree (size_arg);
1451 3 : sm_ctxt.warn (size_arg,
1452 : std::make_unique<tainted_access_attrib_size>
1453 3 : (*this, diag_size, b,
1454 : callee_fndecl,
1455 3 : access->sizarg,
1456 : access_str));
1457 : }
1458 : }
1459 1145 : }
1460 :
1461 : /* Complain if ASSIGN (a division operation) has a tainted divisor
1462 : that could be zero. */
1463 :
1464 : void
1465 449 : taint_state_machine::check_for_tainted_divisor (sm_context &sm_ctxt,
1466 : const gassign *assign) const
1467 : {
1468 449 : const region_model *old_model = sm_ctxt.get_old_region_model ();
1469 449 : if (!old_model)
1470 67 : return;
1471 :
1472 449 : tree divisor_expr = gimple_assign_rhs2 (assign);;
1473 :
1474 : /* Until we track conditions on floating point values, we can't check to
1475 : see if they've been checked against zero. */
1476 449 : if (!INTEGRAL_TYPE_P (TREE_TYPE (divisor_expr)))
1477 : return;
1478 :
1479 389 : const svalue *divisor_sval = old_model->get_rvalue (divisor_expr, nullptr);
1480 :
1481 389 : state_t state = sm_ctxt.get_state (divisor_sval);
1482 389 : enum bounds b;
1483 389 : if (get_taint (state, TREE_TYPE (divisor_expr), &b))
1484 : {
1485 17 : const svalue *zero_sval
1486 : = old_model->get_manager ()->get_or_create_int_cst
1487 17 : (TREE_TYPE (divisor_expr), 0);
1488 17 : tristate ts
1489 17 : = old_model->eval_condition (divisor_sval, NE_EXPR, zero_sval);
1490 17 : if (ts.is_true ())
1491 : /* The divisor is known to not equal 0: don't warn. */
1492 7 : return;
1493 :
1494 10 : tree diag_divisor = sm_ctxt.get_diagnostic_tree (divisor_expr);
1495 10 : sm_ctxt.warn
1496 10 : (divisor_expr,
1497 10 : std::make_unique <tainted_divisor> (*this, diag_divisor, b));
1498 10 : sm_ctxt.set_next_state (divisor_sval, m_stop);
1499 : }
1500 : }
1501 :
1502 : } // anonymous namespace
1503 :
1504 : /* Internal interface to this file. */
1505 :
1506 : std::unique_ptr<state_machine>
1507 3377 : make_taint_state_machine (logger *logger)
1508 : {
1509 3377 : return std::make_unique<taint_state_machine> (logger);
1510 : }
1511 :
1512 : /* A closed concrete range. */
1513 :
1514 : class concrete_range
1515 : {
1516 : public:
1517 : /* Return true iff THIS is fully within OTHER
1518 : i.e.
1519 : - m_min must be >= OTHER.m_min
1520 : - m_max must be <= OTHER.m_max. */
1521 136 : bool within_p (const concrete_range &other) const
1522 : {
1523 136 : if (compare_constants (m_min, GE_EXPR, other.m_min).is_true ())
1524 51 : if (compare_constants (m_max, LE_EXPR, other.m_max).is_true ())
1525 : return true;
1526 : return false;
1527 : }
1528 :
1529 : tree m_min;
1530 : tree m_max;
1531 : };
1532 :
1533 : /* Attempt to get a closed concrete range for SVAL based on types.
1534 : If found, write to *OUT and return true.
1535 : Otherwise return false. */
1536 :
1537 : static bool
1538 212 : get_possible_range (const svalue *sval, concrete_range *out)
1539 : {
1540 212 : if (const svalue *inner = sval->maybe_undo_cast ())
1541 : {
1542 76 : concrete_range inner_range;
1543 76 : if (!get_possible_range (inner, &inner_range))
1544 32 : return false;
1545 :
1546 76 : if (sval->get_type ()
1547 76 : && inner->get_type ()
1548 76 : && INTEGRAL_TYPE_P (sval->get_type ())
1549 76 : && INTEGRAL_TYPE_P (inner->get_type ())
1550 76 : && TYPE_UNSIGNED (inner->get_type ())
1551 120 : && (TYPE_PRECISION (sval->get_type ())
1552 44 : > TYPE_PRECISION (inner->get_type ())))
1553 : {
1554 : /* We have a cast from an unsigned type to a wider integral type.
1555 : Assuming this is zero-extension, we can inherit the range from
1556 : the inner type. */
1557 32 : enum tree_code op = ((const unaryop_svalue *)sval)->get_op ();
1558 32 : out->m_min = fold_unary (op, sval->get_type (), inner_range.m_min);
1559 32 : out->m_max = fold_unary (op, sval->get_type (), inner_range.m_max);
1560 32 : return true;
1561 : }
1562 : }
1563 :
1564 180 : if (sval->get_type ()
1565 180 : && INTEGRAL_TYPE_P (sval->get_type ()))
1566 : {
1567 180 : out->m_min = TYPE_MIN_VALUE (sval->get_type ());
1568 180 : out->m_max = TYPE_MAX_VALUE (sval->get_type ());
1569 180 : return true;
1570 : }
1571 :
1572 : return false;
1573 : }
1574 :
1575 : /* Determine if it's possible for tainted array access ELEMENT_REG to
1576 : actually be a problem.
1577 :
1578 : Check here for index being from e.g. unsigned char when the array
1579 : contains >= 255 elements.
1580 :
1581 : Return true if out-of-bounds is possible, false if it's impossible
1582 : (for suppressing false positives). */
1583 :
1584 : static bool
1585 136 : index_can_be_out_of_bounds_p (const element_region *element_reg)
1586 : {
1587 136 : const svalue *index = element_reg->get_index ();
1588 136 : const region *array_reg = element_reg->get_parent_region ();
1589 :
1590 136 : if (array_reg->get_type ()
1591 136 : && TREE_CODE (array_reg->get_type ()) == ARRAY_TYPE
1592 136 : && TYPE_DOMAIN (array_reg->get_type ())
1593 272 : && INTEGRAL_TYPE_P (TYPE_DOMAIN (array_reg->get_type ())))
1594 : {
1595 136 : concrete_range valid_index_range;
1596 136 : valid_index_range.m_min
1597 136 : = TYPE_MIN_VALUE (TYPE_DOMAIN (array_reg->get_type ()));
1598 136 : valid_index_range.m_max
1599 136 : = TYPE_MAX_VALUE (TYPE_DOMAIN (array_reg->get_type ()));
1600 :
1601 136 : concrete_range possible_index_range;
1602 136 : if (get_possible_range (index, &possible_index_range))
1603 136 : if (possible_index_range.within_p (valid_index_range))
1604 16 : return false;
1605 : }
1606 :
1607 : return true;
1608 : }
1609 :
1610 : /* Complain to CTXT if accessing REG leads could lead to arbitrary
1611 : memory access under an attacker's control (due to taint). */
1612 :
1613 : void
1614 837030 : region_model::check_region_for_taint (const region *reg,
1615 : enum access_direction,
1616 : region_model_context *ctxt) const
1617 : {
1618 837030 : gcc_assert (reg);
1619 837030 : gcc_assert (ctxt);
1620 :
1621 837030 : LOG_SCOPE (ctxt->get_logger ());
1622 :
1623 837030 : sm_state_map *smap;
1624 837030 : const state_machine *sm;
1625 837030 : unsigned sm_idx;
1626 837030 : if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
1627 : return;
1628 :
1629 712409 : gcc_assert (smap);
1630 712409 : gcc_assert (sm);
1631 :
1632 712409 : const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1633 :
1634 712409 : const extrinsic_state *ext_state = ctxt->get_ext_state ();
1635 712409 : if (!ext_state)
1636 : return;
1637 :
1638 : const region *iter_region = reg;
1639 3522437 : while (iter_region)
1640 : {
1641 2810028 : switch (iter_region->get_kind ())
1642 : {
1643 : default:
1644 : break;
1645 :
1646 13559 : case RK_ELEMENT:
1647 13559 : {
1648 13559 : const element_region *element_reg
1649 : = (const element_region *)iter_region;
1650 13559 : const svalue *index = element_reg->get_index ();
1651 13559 : const state_machine::state_t
1652 13559 : state = smap->get_state (index, *ext_state);
1653 13559 : gcc_assert (state);
1654 13559 : enum bounds b;
1655 13559 : if (taint_sm.get_taint (state, index->get_type (), &b))
1656 : {
1657 136 : if (index_can_be_out_of_bounds_p (element_reg))
1658 : {
1659 120 : tree arg = get_representative_tree (index);
1660 120 : ctxt->warn (std::make_unique<tainted_array_index> (taint_sm,
1661 : arg, b));
1662 : }
1663 16 : else if (ctxt->get_logger ())
1664 0 : ctxt->get_logger ()->log ("rejecting tainted_array_index as"
1665 : " out of bounds is not possible");
1666 : }
1667 : }
1668 13559 : break;
1669 :
1670 16419 : case RK_OFFSET:
1671 16419 : {
1672 16419 : const offset_region *offset_reg
1673 : = (const offset_region *)iter_region;
1674 16419 : const svalue *offset = offset_reg->get_byte_offset ();
1675 16419 : const state_machine::state_t
1676 16419 : state = smap->get_state (offset, *ext_state);
1677 16419 : gcc_assert (state);
1678 : /* Handle implicit cast to sizetype. */
1679 16419 : tree effective_type = offset->get_type ();
1680 16419 : if (const svalue *cast = offset->maybe_undo_cast ())
1681 176 : if (cast->get_type ())
1682 16419 : effective_type = cast->get_type ();
1683 16419 : enum bounds b;
1684 16419 : if (taint_sm.get_taint (state, effective_type, &b))
1685 : {
1686 27 : tree arg = get_representative_tree (offset);
1687 27 : ctxt->warn (std::make_unique<tainted_offset> (taint_sm, arg, b,
1688 : offset));
1689 : }
1690 : }
1691 16419 : break;
1692 :
1693 3916 : case RK_SIZED:
1694 3916 : {
1695 3916 : const sized_region *sized_reg
1696 : = (const sized_region *)iter_region;
1697 3916 : const svalue *size_sval = sized_reg->get_byte_size_sval (m_mgr);
1698 3916 : const state_machine::state_t
1699 3916 : state = smap->get_state (size_sval, *ext_state);
1700 3916 : gcc_assert (state);
1701 3916 : enum bounds b;
1702 3916 : if (taint_sm.get_taint (state, size_sval->get_type (), &b))
1703 : {
1704 3 : tree arg = get_representative_tree (size_sval);
1705 3 : ctxt->warn (std::make_unique<tainted_size> (taint_sm, arg, b));
1706 : }
1707 : }
1708 3916 : break;
1709 : }
1710 :
1711 2810028 : iter_region = iter_region->get_parent_region ();
1712 : }
1713 837030 : }
1714 :
1715 : /* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
1716 : under an attacker's control (due to taint), where the allocation
1717 : is happening within MEM_SPACE. */
1718 :
1719 : void
1720 7810 : region_model::check_dynamic_size_for_taint (enum memory_space mem_space,
1721 : const svalue *size_in_bytes,
1722 : region_model_context *ctxt) const
1723 : {
1724 7810 : gcc_assert (size_in_bytes);
1725 7810 : gcc_assert (ctxt);
1726 :
1727 7810 : LOG_SCOPE (ctxt->get_logger ());
1728 :
1729 7810 : sm_state_map *smap;
1730 7810 : const state_machine *sm;
1731 7810 : unsigned sm_idx;
1732 7810 : if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
1733 : return;
1734 :
1735 7599 : gcc_assert (smap);
1736 7599 : gcc_assert (sm);
1737 :
1738 7599 : const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1739 :
1740 7599 : const extrinsic_state *ext_state = ctxt->get_ext_state ();
1741 7599 : if (!ext_state)
1742 : return;
1743 :
1744 7599 : const state_machine::state_t
1745 7599 : state = smap->get_state (size_in_bytes, *ext_state);
1746 7599 : gcc_assert (state);
1747 7599 : enum bounds b;
1748 7599 : if (taint_sm.get_taint (state, size_in_bytes->get_type (), &b))
1749 : {
1750 20 : tree arg = get_representative_tree (size_in_bytes);
1751 40 : ctxt->warn (std::make_unique<tainted_allocation_size>
1752 20 : (taint_sm, arg, size_in_bytes, b, mem_space));
1753 : }
1754 7810 : }
1755 :
1756 : /* Mark SVAL as TAINTED. CTXT must be non-NULL. */
1757 :
1758 : void
1759 47 : region_model::mark_as_tainted (const svalue *sval,
1760 : region_model_context *ctxt)
1761 : {
1762 47 : gcc_assert (sval);
1763 47 : gcc_assert (ctxt);
1764 :
1765 47 : sm_state_map *smap;
1766 47 : const state_machine *sm;
1767 47 : unsigned sm_idx;
1768 47 : if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
1769 0 : return;
1770 :
1771 47 : gcc_assert (smap);
1772 47 : gcc_assert (sm);
1773 :
1774 47 : const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1775 :
1776 47 : const extrinsic_state *ext_state = ctxt->get_ext_state ();
1777 47 : if (!ext_state)
1778 : return;
1779 :
1780 47 : smap->set_state (this, sval, taint_sm.m_tainted, nullptr, *ext_state);
1781 : }
1782 :
1783 : /* Return true if SVAL could possibly be attacker-controlled. */
1784 :
1785 : bool
1786 35 : region_model_context::possibly_tainted_p (const svalue *sval)
1787 : {
1788 35 : sm_state_map *smap;
1789 35 : const state_machine *sm;
1790 35 : unsigned sm_idx;
1791 35 : if (!get_taint_map (&smap, &sm, &sm_idx))
1792 : return false;
1793 :
1794 35 : const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
1795 :
1796 35 : const extrinsic_state *ext_state = get_ext_state ();
1797 35 : if (!ext_state)
1798 : return false;
1799 :
1800 35 : const state_machine::state_t state = smap->get_state (sval, *ext_state);
1801 35 : gcc_assert (state);
1802 :
1803 35 : return (state == taint_sm.m_tainted
1804 33 : || state == taint_sm.m_has_lb
1805 68 : || state == taint_sm.m_has_ub);
1806 : }
1807 :
1808 : } // namespace ana
1809 :
1810 : #endif /* #if ENABLE_ANALYZER */
|