Branch data Line data Source code
1 : : /* Handling inline asm in the analyzer.
2 : : Copyright (C) 2021-2025 Free Software Foundation, Inc.
3 : : Contributed by David Malcolm <dmalcolm@redhat.com>.
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it
8 : : under the terms of the GNU General Public License as published by
9 : : the Free Software Foundation; either version 3, or (at your option)
10 : : any later version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but
13 : : WITHOUT ANY WARRANTY; without even the implied warranty of
14 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 : : General Public License for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : #include "analyzer/common.h"
22 : :
23 : : #include "stmt.h"
24 : :
25 : : #include "analyzer/analyzer-logging.h"
26 : : #include "analyzer/call-string.h"
27 : : #include "analyzer/program-point.h"
28 : : #include "analyzer/store.h"
29 : : #include "analyzer/region-model.h"
30 : : #include "analyzer/region-model-reachability.h"
31 : :
32 : : #if ENABLE_ANALYZER
33 : :
34 : : namespace ana {
35 : :
36 : : /* Minimal asm support for the analyzer.
37 : :
38 : : The objective of this code is to:
39 : : - minimize false positives from the analyzer on the Linux kernel
40 : : (which makes heavy use of inline asm), whilst
41 : : - avoiding having to "teach" the compiler anything about specific strings
42 : : in asm statements.
43 : :
44 : : Specifically, we want to:
45 : :
46 : : (a) mark asm outputs and certain other regions as having been written to,
47 : : to avoid false postives from -Wanalyzer-use-of-uninitialized-value.
48 : :
49 : : (b) identify some of these stmts as "deterministic" so that we can
50 : : write consistent outputs given consistent inputs, so that we can
51 : : avoid false positives for paths in which an asm is invoked twice
52 : : with the same inputs and is expected to emit the same output.
53 : :
54 : : This file implements heuristics for achieving the above. */
55 : :
56 : : /* Determine if ASM_STMT is deterministic, in the sense of (b) above.
57 : :
58 : : Consider this x86 function taken from the Linux kernel
59 : : (arch/x86/include/asm/barrier.h):
60 : :
61 : : static inline unsigned long array_index_mask_nospec(unsigned long index,
62 : : unsigned long size)
63 : : {
64 : : unsigned long mask;
65 : :
66 : : asm volatile ("cmp %1,%2; sbb %0,%0;"
67 : : :"=r" (mask)
68 : : :"g"(size),"r" (index)
69 : : :"cc");
70 : : return mask;
71 : : }
72 : :
73 : : The above is a mitigation for Spectre-variant-1 attacks, for clamping
74 : : an array access to within the range of [0, size] if the CPU speculates
75 : : past the array bounds.
76 : :
77 : : However, it is ultimately used to implement wdev_to_wvif:
78 : :
79 : : static inline struct wfx_vif *
80 : : wdev_to_wvif(struct wfx_dev *wdev, int vif_id)
81 : : {
82 : : vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif));
83 : : if (!wdev->vif[vif_id]) {
84 : : return NULL;
85 : : }
86 : : return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv;
87 : : }
88 : :
89 : : which is used by:
90 : :
91 : : if (wdev_to_wvif(wvif->wdev, 1))
92 : : return wdev_to_wvif(wvif->wdev, 1)->vif;
93 : :
94 : : The code has been written to assume that wdev_to_wvif is deterministic,
95 : : and won't change from returning non-NULL at the "if" clause to
96 : : returning NULL at the "->vif" dereference.
97 : :
98 : : By treating the above specific "asm volatile" as deterministic we avoid
99 : : a false positive from -Wanalyzer-null-dereference. */
100 : :
101 : : static bool
102 : 450 : deterministic_p (const gasm *asm_stmt)
103 : : {
104 : : /* Assume something volatile with no inputs is querying
105 : : changeable state e.g. rdtsc. */
106 : 0 : if (gimple_asm_ninputs (asm_stmt) == 0
107 : 450 : && gimple_asm_volatile_p (asm_stmt))
108 : 0 : return false;
109 : :
110 : : /* Otherwise assume it's purely a function of its inputs. */
111 : : return true;
112 : : }
113 : :
114 : : /* Update this model for the asm STMT, using CTXT to report any
115 : : diagnostics.
116 : :
117 : : Compare with cfgexpand.cc: expand_asm_stmt. */
118 : :
119 : : void
120 : 398 : region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt)
121 : : {
122 : 398 : logger *logger = ctxt ? ctxt->get_logger () : NULL;
123 : 398 : LOG_SCOPE (logger);
124 : :
125 : 398 : const unsigned noutputs = gimple_asm_noutputs (stmt);
126 : 398 : const unsigned ninputs = gimple_asm_ninputs (stmt);
127 : :
128 : 398 : auto_vec<tree> output_tvec;
129 : 398 : auto_vec<tree> input_tvec;
130 : 398 : auto_vec<const char *> constraints;
131 : :
132 : : /* Copy the gimple vectors into new vectors that we can manipulate. */
133 : 398 : output_tvec.safe_grow (noutputs, true);
134 : 398 : input_tvec.safe_grow (ninputs, true);
135 : 398 : constraints.safe_grow (noutputs + ninputs, true);
136 : :
137 : 848 : for (unsigned i = 0; i < noutputs; ++i)
138 : : {
139 : 450 : tree t = gimple_asm_output_op (stmt, i);
140 : 450 : output_tvec[i] = TREE_VALUE (t);
141 : 450 : constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
142 : : }
143 : 1136 : for (unsigned i = 0; i < ninputs; i++)
144 : : {
145 : 738 : tree t = gimple_asm_input_op (stmt, i);
146 : 738 : input_tvec[i] = TREE_VALUE (t);
147 : 738 : constraints[i + noutputs]
148 : 738 : = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
149 : : }
150 : :
151 : : /* Determine which regions are reachable from the inputs
152 : : to this stmt. */
153 : 398 : reachable_regions reachable_regs (this);
154 : :
155 : 398 : int num_errors = 0;
156 : :
157 : 398 : auto_vec<const region *> output_regions (noutputs);
158 : 848 : for (unsigned i = 0; i < noutputs; ++i)
159 : : {
160 : 450 : tree val = output_tvec[i];
161 : 450 : const char *constraint;
162 : 450 : bool is_inout;
163 : 450 : bool allows_reg;
164 : 450 : bool allows_mem;
165 : :
166 : 450 : const region *dst_reg = get_lvalue (val, ctxt);
167 : 450 : output_regions.quick_push (dst_reg);
168 : 450 : reachable_regs.add (dst_reg, true);
169 : :
170 : : /* Try to parse the output constraint. If that fails, there's
171 : : no point in going further. */
172 : 450 : constraint = constraints[i];
173 : 450 : if (!parse_output_constraint (&constraint, i, ninputs, noutputs,
174 : : &allows_mem, &allows_reg, &is_inout))
175 : : {
176 : 0 : if (logger)
177 : 0 : logger->log ("error parsing constraint for output %i: %qs",
178 : : i, constraint);
179 : 0 : num_errors++;
180 : 0 : continue;
181 : : }
182 : :
183 : 450 : if (logger)
184 : : {
185 : 0 : logger->log ("output %i: %qs %qE"
186 : : " is_inout: %i allows_reg: %i allows_mem: %i",
187 : : i, constraint, val,
188 : : (int)is_inout, (int)allows_reg, (int)allows_mem);
189 : 0 : logger->start_log_line ();
190 : 0 : logger->log_partial (" region: ");
191 : 0 : dst_reg->dump_to_pp (logger->get_printer (), true);
192 : 0 : logger->end_log_line ();
193 : : }
194 : :
195 : : }
196 : :
197 : : /* Ideally should combine with inout_svals to determine the
198 : : "effective inputs" and use this for the asm_output_svalue. */
199 : :
200 : 398 : auto_vec<const svalue *> input_svals (ninputs);
201 : 1136 : for (unsigned i = 0; i < ninputs; i++)
202 : : {
203 : 738 : tree val = input_tvec[i];
204 : 738 : const char *constraint = constraints[i + noutputs];
205 : 738 : bool allows_reg, allows_mem;
206 : 738 : if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
207 : 738 : constraints.address (),
208 : : &allows_mem, &allows_reg))
209 : : {
210 : 0 : if (logger)
211 : 0 : logger->log ("error parsing constraint for input %i: %qs",
212 : : i, constraint);
213 : 0 : num_errors++;
214 : 0 : continue;
215 : : }
216 : :
217 : 738 : tree src_expr = input_tvec[i];
218 : 738 : const svalue *src_sval = get_rvalue (src_expr, ctxt);
219 : 738 : check_for_poison (src_sval, src_expr, NULL, ctxt);
220 : 738 : input_svals.quick_push (src_sval);
221 : 738 : reachable_regs.handle_sval (src_sval);
222 : :
223 : 738 : if (logger)
224 : : {
225 : 0 : logger->log ("input %i: %qs %qE"
226 : : " allows_reg: %i allows_mem: %i",
227 : : i, constraint, val,
228 : : (int)allows_reg, (int)allows_mem);
229 : 0 : logger->start_log_line ();
230 : 0 : logger->log_partial (" sval: ");
231 : 0 : src_sval->dump_to_pp (logger->get_printer (), true);
232 : 0 : logger->end_log_line ();
233 : : }
234 : : }
235 : :
236 : 398 : if (num_errors > 0)
237 : 0 : gcc_unreachable ();
238 : :
239 : 398 : if (logger)
240 : : {
241 : 0 : logger->log ("reachability: ");
242 : 0 : reachable_regs.dump_to_pp (logger->get_printer ());
243 : 0 : logger->end_log_line ();
244 : : }
245 : :
246 : : /* Given the regions that were reachable from the inputs we
247 : : want to clobber them.
248 : : This is similar to region_model::handle_unrecognized_call,
249 : : but the unknown call policies seems too aggressive (e.g. purging state
250 : : from anything that's ever escaped). Instead, clobber any clusters
251 : : that were reachable in *this* asm stmt, rather than those that
252 : : escaped, and we don't treat the values as having escaped.
253 : : We also assume that asm stmts don't affect sm-state. */
254 : 973 : for (auto iter = reachable_regs.begin_mutable_base_regs ();
255 : 1548 : iter != reachable_regs.end_mutable_base_regs (); ++iter)
256 : : {
257 : 575 : const region *base_reg = *iter;
258 : 575 : if (base_reg->symbolic_for_unknown_ptr_p ()
259 : 575 : || !base_reg->tracked_p ())
260 : 53 : continue;
261 : :
262 : 522 : binding_cluster *cluster = m_store.get_or_create_cluster (base_reg);
263 : 522 : cluster->on_asm (stmt, m_mgr->get_store_manager (),
264 : 522 : conjured_purge (this, ctxt));
265 : : }
266 : :
267 : : /* Update the outputs. */
268 : 848 : for (unsigned output_idx = 0; output_idx < noutputs; output_idx++)
269 : : {
270 : 450 : tree dst_expr = output_tvec[output_idx];
271 : 450 : const region *dst_reg = output_regions[output_idx];
272 : :
273 : 450 : const svalue *sval;
274 : 466 : if (deterministic_p (stmt)
275 : 429 : && input_svals.length () <= asm_output_svalue::MAX_INPUTS)
276 : 301 : sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr),
277 : : stmt,
278 : : output_idx,
279 : : input_svals);
280 : : else
281 : : {
282 : 149 : sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr),
283 : : stmt,
284 : : dst_reg,
285 : 149 : conjured_purge (this,
286 : 149 : ctxt));
287 : : }
288 : 450 : set_value (dst_reg, sval, ctxt);
289 : : }
290 : 398 : }
291 : :
292 : : } // namespace ana
293 : :
294 : : #endif /* #if ENABLE_ANALYZER */
|