Line data Source code
1 : /* Handling inline asm in the analyzer.
2 : Copyright (C) 2021-2026 Free Software Foundation, Inc.
3 : Contributed by David Malcolm <dmalcolm@redhat.com>.
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it
8 : under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3, or (at your option)
10 : any later version.
11 :
12 : GCC is distributed in the hope that it will be useful, but
13 : WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "analyzer/common.h"
22 :
23 : #include "stmt.h"
24 :
25 : #include "analyzer/analyzer-logging.h"
26 : #include "analyzer/call-string.h"
27 : #include "analyzer/program-point.h"
28 : #include "analyzer/store.h"
29 : #include "analyzer/region-model.h"
30 : #include "analyzer/region-model-reachability.h"
31 :
32 : #if ENABLE_ANALYZER
33 :
34 : namespace ana {
35 :
36 : /* Minimal asm support for the analyzer.
37 :
38 : The objective of this code is to:
39 : - minimize false positives from the analyzer on the Linux kernel
40 : (which makes heavy use of inline asm), whilst
41 : - avoiding having to "teach" the compiler anything about specific strings
42 : in asm statements.
43 :
44 : Specifically, we want to:
45 :
46 : (a) mark asm outputs and certain other regions as having been written to,
47 : to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
48 :
49 : (b) identify some of these stmts as "deterministic" so that we can
50 : write consistent outputs given consistent inputs, so that we can
51 : avoid false positives for paths in which an asm is invoked twice
52 : with the same inputs and is expected to emit the same output.
53 :
54 : This file implements heuristics for achieving the above. */
55 :
56 : /* Determine if ASM_STMT is deterministic, in the sense of (b) above.
57 :
58 : Consider this x86 function taken from the Linux kernel
59 : (arch/x86/include/asm/barrier.h):
60 :
61 : static inline unsigned long array_index_mask_nospec(unsigned long index,
62 : unsigned long size)
63 : {
64 : unsigned long mask;
65 :
66 : asm volatile ("cmp %1,%2; sbb %0,%0;"
67 : :"=r" (mask)
68 : :"g"(size),"r" (index)
69 : :"cc");
70 : return mask;
71 : }
72 :
73 : The above is a mitigation for Spectre-variant-1 attacks, for clamping
74 : an array access to within the range of [0, size] if the CPU speculates
75 : past the array bounds.
76 :
77 : However, it is ultimately used to implement wdev_to_wvif:
78 :
79 : static inline struct wfx_vif *
80 : wdev_to_wvif(struct wfx_dev *wdev, int vif_id)
81 : {
82 : vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
83 : if (!wdev->vif[vif_id]) {
84 : return NULL;
85 : }
86 : return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv;
87 : }
88 :
89 : which is used by:
90 :
91 : if (wdev_to_wvif(wvif->wdev, 1))
92 : return wdev_to_wvif(wvif->wdev, 1)->vif;
93 :
94 : The code has been written to assume that wdev_to_wvif is deterministic,
95 : and won't change from returning non-NULL at the "if" clause to
96 : returning NULL at the "->vif" dereference.
97 :
98 : By treating the above specific "asm volatile" as deterministic we avoid
99 : a false positive from -Wanalyzer-null-dereference. */
100 :
101 : static bool
102 438 : deterministic_p (const gasm *asm_stmt)
103 : {
104 : /* Assume something volatile with no inputs is querying
105 : changeable state e.g. rdtsc. */
106 0 : if (gimple_asm_ninputs (asm_stmt) == 0
107 438 : && gimple_asm_volatile_p (asm_stmt))
108 0 : return false;
109 :
110 : /* Otherwise assume it's purely a function of its inputs. */
111 : return true;
112 : }
113 :
114 : /* Update this model for the asm STMT, using CTXT to report any
115 : diagnostics.
116 :
117 : Compare with cfgexpand.cc: expand_asm_stmt. */
118 :
119 : void
120 384 : region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt)
121 : {
122 384 : logger *logger = ctxt ? ctxt->get_logger () : nullptr;
123 384 : LOG_SCOPE (logger);
124 :
125 384 : const unsigned noutputs = gimple_asm_noutputs (stmt);
126 384 : const unsigned ninputs = gimple_asm_ninputs (stmt);
127 :
128 384 : auto_vec<tree> output_tvec;
129 384 : auto_vec<tree> input_tvec;
130 384 : auto_vec<const char *> constraints;
131 :
132 : /* Copy the gimple vectors into new vectors that we can manipulate. */
133 384 : output_tvec.safe_grow (noutputs, true);
134 384 : input_tvec.safe_grow (ninputs, true);
135 384 : constraints.safe_grow (noutputs + ninputs, true);
136 :
137 822 : for (unsigned i = 0; i < noutputs; ++i)
138 : {
139 438 : tree t = gimple_asm_output_op (stmt, i);
140 438 : output_tvec[i] = TREE_VALUE (t);
141 438 : constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
142 : }
143 1096 : for (unsigned i = 0; i < ninputs; i++)
144 : {
145 712 : tree t = gimple_asm_input_op (stmt, i);
146 712 : input_tvec[i] = TREE_VALUE (t);
147 712 : constraints[i + noutputs]
148 712 : = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
149 : }
150 :
151 : /* Determine which regions are reachable from the inputs
152 : to this stmt. */
153 384 : reachable_regions reachable_regs (this);
154 :
155 384 : int num_errors = 0;
156 :
157 384 : auto_vec<const region *> output_regions (noutputs);
158 822 : for (unsigned i = 0; i < noutputs; ++i)
159 : {
160 438 : tree val = output_tvec[i];
161 438 : const char *constraint;
162 438 : bool is_inout;
163 438 : bool allows_reg;
164 438 : bool allows_mem;
165 :
166 438 : const region *dst_reg = get_lvalue (val, ctxt);
167 438 : output_regions.quick_push (dst_reg);
168 438 : reachable_regs.add (dst_reg, true);
169 :
170 : /* Try to parse the output constraint. If that fails, there's
171 : no point in going further. */
172 438 : constraint = constraints[i];
173 438 : if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
174 : &allows_mem, &allows_reg, &is_inout,
175 : nullptr))
176 : {
177 0 : if (logger)
178 0 : logger->log ("error parsing constraint for output %i: %qs",
179 : i, constraint);
180 0 : num_errors++;
181 0 : continue;
182 : }
183 :
184 438 : if (logger)
185 : {
186 0 : logger->log ("output %i: %qs %qE"
187 : " is_inout: %i allows_reg: %i allows_mem: %i",
188 : i, constraint, val,
189 : (int)is_inout, (int)allows_reg, (int)allows_mem);
190 0 : logger->start_log_line ();
191 0 : logger->log_partial (" region: ");
192 0 : dst_reg->dump_to_pp (logger->get_printer (), true);
193 0 : logger->end_log_line ();
194 : }
195 :
196 : }
197 :
198 : /* Ideally should combine with inout_svals to determine the
199 : "effective inputs" and use this for the asm_output_svalue. */
200 :
201 384 : auto_vec<const svalue *> input_svals (ninputs);
202 1096 : for (unsigned i = 0; i < ninputs; i++)
203 : {
204 712 : tree val = input_tvec[i];
205 712 : const char *constraint = constraints[i + noutputs];
206 712 : bool allows_reg, allows_mem;
207 712 : if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
208 712 : constraints.address (), &allows_mem,
209 : &allows_reg, nullptr))
210 : {
211 0 : if (logger)
212 0 : logger->log ("error parsing constraint for input %i: %qs",
213 : i, constraint);
214 0 : num_errors++;
215 0 : continue;
216 : }
217 :
218 712 : tree src_expr = input_tvec[i];
219 712 : const svalue *src_sval = get_rvalue (src_expr, ctxt);
220 712 : check_for_poison (src_sval, src_expr, nullptr, ctxt);
221 712 : input_svals.quick_push (src_sval);
222 712 : reachable_regs.handle_sval (src_sval);
223 :
224 712 : if (logger)
225 : {
226 0 : logger->log ("input %i: %qs %qE"
227 : " allows_reg: %i allows_mem: %i",
228 : i, constraint, val,
229 : (int)allows_reg, (int)allows_mem);
230 0 : logger->start_log_line ();
231 0 : logger->log_partial (" sval: ");
232 0 : src_sval->dump_to_pp (logger->get_printer (), true);
233 0 : logger->end_log_line ();
234 : }
235 : }
236 :
237 384 : if (num_errors > 0)
238 0 : gcc_unreachable ();
239 :
240 384 : if (logger)
241 : {
242 0 : logger->log ("reachability: ");
243 0 : reachable_regs.dump_to_pp (logger->get_printer ());
244 0 : logger->end_log_line ();
245 : }
246 :
247 : /* Given the regions that were reachable from the inputs we
248 : want to clobber them.
249 : This is similar to region_model::handle_unrecognized_call,
250 : but the unknown call policies seems too aggressive (e.g. purging state
251 : from anything that's ever escaped). Instead, clobber any clusters
252 : that were reachable in *this* asm stmt, rather than those that
253 : escaped, and we don't treat the values as having escaped.
254 : We also assume that asm stmts don't affect sm-state. */
255 946 : for (auto iter = reachable_regs.begin_mutable_base_regs ();
256 1508 : iter != reachable_regs.end_mutable_base_regs (); ++iter)
257 : {
258 562 : const region *base_reg = *iter;
259 562 : if (base_reg->symbolic_for_unknown_ptr_p ()
260 562 : || !base_reg->tracked_p ())
261 52 : continue;
262 :
263 510 : binding_cluster *cluster
264 510 : = m_store.get_or_create_cluster (*m_mgr->get_store_manager (),
265 : base_reg);
266 510 : cluster->on_asm (stmt, m_mgr->get_store_manager (),
267 510 : conjured_purge (this, ctxt));
268 : }
269 :
270 : /* Update the outputs. */
271 822 : for (unsigned output_idx = 0; output_idx < noutputs; output_idx++)
272 : {
273 438 : tree dst_expr = output_tvec[output_idx];
274 438 : const region *dst_reg = output_regions[output_idx];
275 :
276 438 : const svalue *sval;
277 454 : if (deterministic_p (stmt)
278 417 : && input_svals.length () <= asm_output_svalue::MAX_INPUTS)
279 289 : sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
280 : stmt,
281 : output_idx,
282 : input_svals);
283 : else
284 : {
285 149 : sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
286 : stmt,
287 : dst_reg,
288 149 : conjured_purge (this,
289 149 : ctxt));
290 : }
291 438 : set_value (dst_reg, sval, ctxt);
292 : }
293 384 : }
294 :
295 : } // namespace ana
296 :
297 : #endif /* #if ENABLE_ANALYZER */
|