Branch data Line data Source code
1 : : /* Handling inline asm in the analyzer.
2 : : Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 : : Contributed by David Malcolm <dmalcolm@redhat.com>.
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it
8 : : under the terms of the GNU General Public License as published by
9 : : the Free Software Foundation; either version 3, or (at your option)
10 : : any later version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but
13 : : WITHOUT ANY WARRANTY; without even the implied warranty of
14 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : : General Public License for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : #include "config.h"
22 : : #define INCLUDE_VECTOR
23 : : #include "system.h"
24 : : #include "coretypes.h"
25 : : #include "tree.h"
26 : : #include "function.h"
27 : : #include "basic-block.h"
28 : : #include "gimple.h"
29 : : #include "gimple-iterator.h"
30 : : #include "diagnostic-core.h"
31 : : #include "pretty-print.h"
32 : : #include "analyzer/analyzer.h"
33 : : #include "analyzer/analyzer-logging.h"
34 : : #include "options.h"
35 : : #include "analyzer/call-string.h"
36 : : #include "analyzer/program-point.h"
37 : : #include "analyzer/store.h"
38 : : #include "analyzer/region-model.h"
39 : : #include "analyzer/region-model-reachability.h"
40 : : #include "stmt.h"
41 : :
42 : : #if ENABLE_ANALYZER
43 : :
44 : : namespace ana {
45 : :
46 : : /* Minimal asm support for the analyzer.
47 : :
48 : : The objective of this code is to:
49 : : - minimize false positives from the analyzer on the Linux kernel
50 : : (which makes heavy use of inline asm), whilst
51 : : - avoiding having to "teach" the compiler anything about specific strings
52 : : in asm statements.
53 : :
54 : : Specifically, we want to:
55 : :
56 : : (a) mark asm outputs and certain other regions as having been written to,
57 : : to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
58 : :
59 : : (b) identify some of these stmts as "deterministic" so that we can
60 : : write consistent outputs given consistent inputs, so that we can
61 : : avoid false positives for paths in which an asm is invoked twice
62 : : with the same inputs and is expected to emit the same output.
63 : :
64 : : This file implements heuristics for achieving the above. */
65 : :
66 : : /* Determine if ASM_STMT is deterministic, in the sense of (b) above.
67 : :
68 : : Consider this x86 function taken from the Linux kernel
69 : : (arch/x86/include/asm/barrier.h):
70 : :
71 : : static inline unsigned long array_index_mask_nospec(unsigned long index,
72 : : unsigned long size)
73 : : {
74 : : unsigned long mask;
75 : :
76 : : asm volatile ("cmp %1,%2; sbb %0,%0;"
77 : : :"=r" (mask)
78 : : :"g"(size),"r" (index)
79 : : :"cc");
80 : : return mask;
81 : : }
82 : :
83 : : The above is a mitigation for Spectre-variant-1 attacks, for clamping
84 : : an array access to within the range of [0, size] if the CPU speculates
85 : : past the array bounds.
86 : :
87 : : However, it is ultimately used to implement wdev_to_wvif:
88 : :
89 : : static inline struct wfx_vif *
90 : : wdev_to_wvif(struct wfx_dev *wdev, int vif_id)
91 : : {
92 : : vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
93 : : if (!wdev->vif[vif_id]) {
94 : : return NULL;
95 : : }
96 : : return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv;
97 : : }
98 : :
99 : : which is used by:
100 : :
101 : : if (wdev_to_wvif(wvif->wdev, 1))
102 : : return wdev_to_wvif(wvif->wdev, 1)->vif;
103 : :
104 : : The code has been written to assume that wdev_to_wvif is deterministic,
105 : : and won't change from returning non-NULL at the "if" clause to
106 : : returning NULL at the "->vif" dereference.
107 : :
108 : : By treating the above specific "asm volatile" as deterministic we avoid
109 : : a false positive from -Wanalyzer-null-dereference. */
110 : :
111 : : static bool
112 : 450 : deterministic_p (const gasm *asm_stmt)
113 : : {
114 : : /* Assume something volatile with no inputs is querying
115 : : changeable state e.g. rdtsc. */
116 : 0 : if (gimple_asm_ninputs (asm_stmt) == 0
117 : 450 : && gimple_asm_volatile_p (asm_stmt))
118 : 0 : return false;
119 : :
120 : : /* Otherwise assume it's purely a function of its inputs. */
121 : : return true;
122 : : }
123 : :
124 : : /* Update this model for the asm STMT, using CTXT to report any
125 : : diagnostics.
126 : :
127 : : Compare with cfgexpand.cc: expand_asm_stmt. */
128 : :
129 : : void
130 : 398 : region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt)
131 : : {
132 : 398 : logger *logger = ctxt ? ctxt->get_logger () : NULL;
133 : 398 : LOG_SCOPE (logger);
134 : :
135 : 398 : const unsigned noutputs = gimple_asm_noutputs (stmt);
136 : 398 : const unsigned ninputs = gimple_asm_ninputs (stmt);
137 : :
138 : 398 : auto_vec<tree> output_tvec;
139 : 398 : auto_vec<tree> input_tvec;
140 : 398 : auto_vec<const char *> constraints;
141 : :
142 : : /* Copy the gimple vectors into new vectors that we can manipulate. */
143 : 398 : output_tvec.safe_grow (noutputs, true);
144 : 398 : input_tvec.safe_grow (ninputs, true);
145 : 398 : constraints.safe_grow (noutputs + ninputs, true);
146 : :
147 : 848 : for (unsigned i = 0; i < noutputs; ++i)
148 : : {
149 : 450 : tree t = gimple_asm_output_op (stmt, i);
150 : 450 : output_tvec[i] = TREE_VALUE (t);
151 : 450 : constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
152 : : }
153 : 1136 : for (unsigned i = 0; i < ninputs; i++)
154 : : {
155 : 738 : tree t = gimple_asm_input_op (stmt, i);
156 : 738 : input_tvec[i] = TREE_VALUE (t);
157 : 738 : constraints[i + noutputs]
158 : 738 : = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
159 : : }
160 : :
161 : : /* Determine which regions are reachable from the inputs
162 : : to this stmt. */
163 : 398 : reachable_regions reachable_regs (this);
164 : :
165 : 398 : int num_errors = 0;
166 : :
167 : 398 : auto_vec<const region *> output_regions (noutputs);
168 : 848 : for (unsigned i = 0; i < noutputs; ++i)
169 : : {
170 : 450 : tree val = output_tvec[i];
171 : 450 : const char *constraint;
172 : 450 : bool is_inout;
173 : 450 : bool allows_reg;
174 : 450 : bool allows_mem;
175 : :
176 : 450 : const region *dst_reg = get_lvalue (val, ctxt);
177 : 450 : output_regions.quick_push (dst_reg);
178 : 450 : reachable_regs.add (dst_reg, true);
179 : :
180 : : /* Try to parse the output constraint. If that fails, there's
181 : : no point in going further. */
182 : 450 : constraint = constraints[i];
183 : 450 : if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
184 : : &allows_mem, &allows_reg, &is_inout))
185 : : {
186 : 0 : if (logger)
187 : 0 : logger->log ("error parsing constraint for output %i: %qs",
188 : : i, constraint);
189 : 0 : num_errors++;
190 : 0 : continue;
191 : : }
192 : :
193 : 450 : if (logger)
194 : : {
195 : 0 : logger->log ("output %i: %qs %qE"
196 : : " is_inout: %i allows_reg: %i allows_mem: %i",
197 : : i, constraint, val,
198 : : (int)is_inout, (int)allows_reg, (int)allows_mem);
199 : 0 : logger->start_log_line ();
200 : 0 : logger->log_partial (" region: ");
201 : 0 : dst_reg->dump_to_pp (logger->get_printer (), true);
202 : 0 : logger->end_log_line ();
203 : : }
204 : :
205 : : }
206 : :
207 : : /* Ideally should combine with inout_svals to determine the
208 : : "effective inputs" and use this for the asm_output_svalue. */
209 : :
210 : 398 : auto_vec<const svalue *> input_svals (ninputs);
211 : 1136 : for (unsigned i = 0; i < ninputs; i++)
212 : : {
213 : 738 : tree val = input_tvec[i];
214 : 738 : const char *constraint = constraints[i + noutputs];
215 : 738 : bool allows_reg, allows_mem;
216 : 738 : if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
217 : 738 : constraints.address (),
218 : : &allows_mem, &allows_reg))
219 : : {
220 : 0 : if (logger)
221 : 0 : logger->log ("error parsing constraint for input %i: %qs",
222 : : i, constraint);
223 : 0 : num_errors++;
224 : 0 : continue;
225 : : }
226 : :
227 : 738 : tree src_expr = input_tvec[i];
228 : 738 : const svalue *src_sval = get_rvalue (src_expr, ctxt);
229 : 738 : check_for_poison (src_sval, src_expr, NULL, ctxt);
230 : 738 : input_svals.quick_push (src_sval);
231 : 738 : reachable_regs.handle_sval (src_sval);
232 : :
233 : 738 : if (logger)
234 : : {
235 : 0 : logger->log ("input %i: %qs %qE"
236 : : " allows_reg: %i allows_mem: %i",
237 : : i, constraint, val,
238 : : (int)allows_reg, (int)allows_mem);
239 : 0 : logger->start_log_line ();
240 : 0 : logger->log_partial (" sval: ");
241 : 0 : src_sval->dump_to_pp (logger->get_printer (), true);
242 : 0 : logger->end_log_line ();
243 : : }
244 : : }
245 : :
246 : 398 : if (num_errors > 0)
247 : 0 : gcc_unreachable ();
248 : :
249 : 398 : if (logger)
250 : : {
251 : 0 : logger->log ("reachability: ");
252 : 0 : reachable_regs.dump_to_pp (logger->get_printer ());
253 : 0 : logger->end_log_line ();
254 : : }
255 : :
256 : : /* Given the regions that were reachable from the inputs we
257 : : want to clobber them.
258 : : This is similar to region_model::handle_unrecognized_call,
259 : : but the unknown call policies seems too aggressive (e.g. purging state
260 : : from anything that's ever escaped). Instead, clobber any clusters
261 : : that were reachable in *this* asm stmt, rather than those that
262 : : escaped, and we don't treat the values as having escaped.
263 : : We also assume that asm stmts don't affect sm-state. */
264 : 973 : for (auto iter = reachable_regs.begin_mutable_base_regs ();
265 : 1548 : iter != reachable_regs.end_mutable_base_regs (); ++iter)
266 : : {
267 : 575 : const region *base_reg = *iter;
268 : 575 : if (base_reg->symbolic_for_unknown_ptr_p ()
269 : 575 : || !base_reg->tracked_p ())
270 : 53 : continue;
271 : :
272 : 522 : binding_cluster *cluster = m_store.get_or_create_cluster (base_reg);
273 : 522 : cluster->on_asm (stmt, m_mgr->get_store_manager (),
274 : 522 : conjured_purge (this, ctxt));
275 : : }
276 : :
277 : : /* Update the outputs. */
278 : 848 : for (unsigned output_idx = 0; output_idx < noutputs; output_idx++)
279 : : {
280 : 450 : tree dst_expr = output_tvec[output_idx];
281 : 450 : const region *dst_reg = output_regions[output_idx];
282 : :
283 : 450 : const svalue *sval;
284 : 466 : if (deterministic_p (stmt)
285 : 429 : && input_svals.length () <= asm_output_svalue::MAX_INPUTS)
286 : 301 : sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
287 : : stmt,
288 : : output_idx,
289 : : input_svals);
290 : : else
291 : : {
292 : 149 : sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
293 : : stmt,
294 : : dst_reg,
295 : 149 : conjured_purge (this,
296 : 149 : ctxt));
297 : : }
298 : 450 : set_value (dst_reg, sval, ctxt);
299 : : }
300 : 398 : }
301 : :
302 : : } // namespace ana
303 : :
304 : : #endif /* #if ENABLE_ANALYZER */
|