Line data Source code
1 : // Copyright (C) 2020-2026 Free Software Foundation, Inc.
2 :
3 : // This file is part of GCC.
4 :
5 : // GCC is free software; you can redistribute it and/or modify it under
6 : // the terms of the GNU General Public License as published by the Free
7 : // Software Foundation; either version 3, or (at your option) any later
8 : // version.
9 :
10 : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 : // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 : // for more details.
14 :
15 : // You should have received a copy of the GNU General Public License
16 : // along with GCC; see the file COPYING3. If not see
17 : // <http://www.gnu.org/licenses/>.
18 :
19 : #include "rust-macro-substitute-ctx.h"
20 : #include "input.h"
21 : #include "rust-hir-map.h"
22 : #include "rust-token.h"
23 :
24 : namespace Rust {
25 :
26 : bool
27 10 : SubstituteCtx::substitute_dollar_crate (
28 : std::vector<std::unique_ptr<AST::Token>> &expanded)
29 : {
30 10 : auto &mappings = Analysis::Mappings::get ();
31 :
32 10 : auto def_crate = mappings.lookup_macro_def_crate (definition.get_node_id ());
33 10 : auto current_crate = mappings.get_current_crate ();
34 :
35 10 : rust_assert (def_crate);
36 :
37 : // If we're expanding a macro defined in the current crate which uses $crate,
38 : // we can just replace the metavar with the `crate` path segment. Otherwise,
39 : // use the fully qualified extern-crate lookup path `::<crate_name>`
40 10 : if (*def_crate == current_crate)
41 : {
42 20 : expanded.push_back (std::make_unique<AST::Token> (
43 20 : Rust::Token::make_identifier (origin, "crate")));
44 : }
45 : else
46 : {
47 0 : auto name = mappings.get_crate_name (*def_crate);
48 :
49 0 : rust_assert (name);
50 :
51 0 : expanded.push_back (std::make_unique<AST::Token> (
52 0 : Rust::Token::make (SCOPE_RESOLUTION, origin)));
53 0 : expanded.push_back (std::make_unique<AST::Token> (
54 0 : Rust::Token::make_identifier (origin, std::string (*name))));
55 : }
56 :
57 10 : return true;
58 : }
59 :
60 : bool
61 11380 : SubstituteCtx::substitute_metavar (
62 : std::unique_ptr<AST::Token> &metavar,
63 : std::vector<std::unique_ptr<AST::Token>> &expanded)
64 : {
65 11380 : auto metavar_name = metavar->get_str ();
66 :
67 11380 : auto it = fragments.find (metavar_name);
68 11380 : if (it == fragments.end ())
69 : {
70 : // fail to substitute, unless we are dealing with a special-case metavar
71 : // like $crate
72 :
73 13 : if (metavar->get_id () == CRATE)
74 10 : return substitute_dollar_crate (expanded);
75 :
76 3 : expanded.push_back (metavar->clone_token ());
77 :
78 3 : return false;
79 : }
80 : else
81 : {
82 : // If we are expanding a metavar which has a lof of matches, we are
83 : // currently expanding a repetition metavar - not a simple metavar. We
84 : // need to error out and inform the user.
85 : // Associated test case for an example: compile/macro-issue1224.rs
86 11367 : if (!it->second->is_single_fragment ())
87 : {
88 1 : rust_error_at (metavar->get_locus (),
89 : "metavariable is still repeating at this depth");
90 1 : rust_inform (
91 : metavar->get_locus (),
92 : "you probably forgot the repetition operator: %<%s%s%s%>", "$(",
93 1 : metavar->as_string ().c_str (), ")*");
94 1 : return true;
95 : }
96 :
97 : // We only care about the vector when expanding repetitions.
98 : // Just access the first element of the vector.
99 11366 : auto &frag = it->second->get_single_fragment ();
100 29398 : for (size_t offs = frag.token_offset_begin; offs < frag.token_offset_end;
101 : offs++)
102 : {
103 18032 : auto &tok = input.at (offs);
104 18032 : expanded.push_back (tok->clone_token ());
105 : }
106 : }
107 :
108 : return true;
109 11380 : }
110 :
111 : static bool
112 2017 : is_builtin_metavariable (AST::Token &token)
113 : {
114 2017 : return token.get_id () == CRATE;
115 : }
116 :
117 : bool
118 1642 : SubstituteCtx::check_repetition_amount (size_t pattern_start,
119 : size_t pattern_end,
120 : size_t &expected_repetition_amount)
121 : {
122 1642 : bool first_fragment_found = false;
123 1642 : bool is_valid = true;
124 :
125 22683 : for (size_t i = pattern_start; i < pattern_end; i++)
126 : {
127 21041 : if (macro.at (i)->get_id () == DOLLAR_SIGN)
128 : {
129 2061 : auto &frag_token = macro.at (i + 1);
130 2061 : if (token_id_is_keyword (frag_token->get_id ())
131 2061 : || frag_token->get_id () == IDENTIFIER)
132 : {
133 2017 : auto it = fragments.find (frag_token->get_str ());
134 :
135 2017 : if (is_builtin_metavariable (*frag_token))
136 21042 : continue;
137 :
138 2016 : if (it == fragments.end ())
139 : {
140 : // If the repetition is not anything we know (ie no declared
141 : // metavars, or metavars which aren't present in the
142 : // fragment), we can just error out. No need to paste the
143 : // tokens as if nothing had happened.
144 0 : rust_error_at (frag_token->get_locus (),
145 : "metavar %s used in repetition does not exist",
146 0 : frag_token->get_str ().c_str ());
147 :
148 0 : is_valid = false;
149 0 : continue;
150 : }
151 :
152 2016 : auto &fragment = *it->second;
153 :
154 2016 : if (!fragment.is_single_fragment ())
155 : {
156 2014 : auto &fragment_rep
157 : = static_cast<MatchedFragmentContainerRepetition &> (
158 : fragment);
159 2014 : size_t repeat_amount = fragment_rep.get_match_amount ();
160 2014 : if (!first_fragment_found)
161 : {
162 1642 : first_fragment_found = true;
163 1642 : expected_repetition_amount = repeat_amount;
164 : }
165 : else
166 : {
167 372 : if (repeat_amount != expected_repetition_amount)
168 : {
169 2 : rust_error_at (
170 : frag_token->get_locus (),
171 : "different amount of matches used in merged "
172 : "repetitions: expected %lu, got %lu",
173 : (unsigned long) expected_repetition_amount,
174 : (unsigned long) repeat_amount);
175 2 : is_valid = false;
176 : }
177 : }
178 : }
179 : }
180 : }
181 : }
182 :
183 1642 : return is_valid && first_fragment_found;
184 : }
185 :
186 : std::vector<std::unique_ptr<AST::Token>>
187 1642 : SubstituteCtx::substitute_repetition (
188 : size_t pattern_start, size_t pattern_end,
189 : std::unique_ptr<AST::Token> separator_token)
190 : {
191 1642 : rust_assert (pattern_end < macro.size ());
192 :
193 1642 : size_t repeat_amount = 0;
194 1642 : if (!check_repetition_amount (pattern_start, pattern_end, repeat_amount))
195 2 : return {};
196 :
197 1640 : rust_debug ("repetition amount to use: %lu", (unsigned long) repeat_amount);
198 1640 : std::vector<std::unique_ptr<AST::Token>> expanded;
199 1640 : std::vector<std::unique_ptr<AST::Token>> new_macro;
200 :
201 : // We want to generate a "new macro" to substitute with. This new macro
202 : // should contain only the tokens inside the pattern
203 22657 : for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++)
204 21017 : new_macro.emplace_back (macro.at (tok_idx)->clone_token ());
205 :
206 : // Then, we want to create a subset of the matches so that
207 : // `substitute_tokens()` can only see one fragment per metavar. Let's say we
208 : // have the following user input: (1 145 'h')
209 : // on the following match arm: ($($lit:literal)*)
210 : // which causes the following matches: { "lit": [1, 145, 'h'] }
211 : //
212 : // The pattern (new_macro) is `$lit:literal`
213 : // The first time we expand it, we want $lit to have the following token: 1
214 : // The second time, 145
215 : // The third and final time, 'h'
216 : //
217 : // In order to do so we must create "sub maps", which only contain parts of
218 : // the original matches
219 : // sub-maps: [ { "lit": 1 }, { "lit": 145 }, { "lit": 'h' } ]
220 : //
221 : // and give them to `substitute_tokens` one by one.
222 :
223 5733 : for (size_t i = 0; i < repeat_amount; i++)
224 : {
225 4093 : std::map<std::string, MatchedFragmentContainer *> sub_map;
226 10193 : for (auto &kv_match : fragments)
227 : {
228 6100 : if (kv_match.second->is_single_fragment ())
229 1660 : sub_map.emplace (kv_match.first, kv_match.second);
230 : // Hack: A repeating meta variable might not be present in the new
231 : // macro. Don't include this match if the fragment doesn't have enough
232 : // items, as check_repetition_amount should prevent repetition amount
233 : // mismatches anyway.
234 4440 : else if (kv_match.second->get_fragments ().size () > i)
235 4422 : sub_map.emplace (kv_match.first,
236 4422 : kv_match.second->get_fragments ().at (i).get ());
237 : }
238 :
239 4093 : auto substitute_context
240 4093 : = SubstituteCtx (input, new_macro, sub_map, definition, origin);
241 4093 : auto new_tokens = substitute_context.substitute_tokens ();
242 :
243 : // Skip the first repetition, but add the separator to the expanded
244 : // tokens if it is present
245 4093 : if (i != 0 && separator_token)
246 155 : expanded.emplace_back (separator_token->clone_token ());
247 :
248 96847 : for (auto &new_token : new_tokens)
249 92754 : expanded.emplace_back (new_token->clone_token ());
250 4093 : }
251 :
252 : // FIXME: We also need to make sure that all subsequent fragments
253 : // contain the same amount of repetitions as the first one
254 :
255 1640 : return expanded;
256 1640 : }
257 :
258 : static bool
259 1642 : is_rep_op (std::unique_ptr<AST::Token> &tok)
260 : {
261 1642 : auto id = tok->get_id ();
262 1642 : return id == QUESTION_MARK || id == ASTERISK || id == PLUS;
263 : }
264 :
265 : std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t>
266 13023 : SubstituteCtx::substitute_token (size_t token_idx)
267 : {
268 13023 : auto &token = macro.at (token_idx);
269 :
270 13023 : switch (token->get_id ())
271 : {
272 12 : default:
273 12 : if (token_id_is_keyword (token->get_id ()))
274 : {
275 11380 : case IDENTIFIER:
276 11383 : std::vector<std::unique_ptr<AST::Token>> expanded;
277 :
278 11380 : rust_debug ("expanding metavar: %s", token->get_str ().c_str ());
279 :
280 11380 : if (substitute_metavar (token, expanded))
281 11377 : return {std::move (expanded), 2};
282 : }
283 :
284 : // don't substitute, dollar sign is alone/metavar is unknown
285 4 : return {std::vector<std::unique_ptr<AST::Token>> (), 0};
286 :
287 1642 : case LEFT_PAREN:
288 1642 : {
289 : // We need to parse up until the closing delimiter and expand this
290 : // fragment->n times.
291 1642 : rust_debug ("expanding repetition");
292 :
293 : // We're in a context where macro repetitions have already been
294 : // parsed and validated: This means that
295 : // 1/ There will be no delimiters as that is an error
296 : // 2/ There are no fragment specifiers anymore, which prevents us
297 : // from reusing parser functions.
298 : //
299 : // Repetition patterns are also special in that they cannot contain
300 : // "rogue" delimiters: For example, this is invalid, as they are
301 : // parsed as MacroMatches and must contain a correct amount of
302 : // delimiters.
303 : // `$($e:expr ) )`
304 : // ^ rogue closing parenthesis
305 : //
306 : // With all of that in mind, we can simply skip ahead from one
307 : // parenthesis to the other to find the pattern to expand. Of course,
308 : // pairs of delimiters, including parentheses, are allowed.
309 : // `$($e:expr ( ) )`
310 : // Parentheses are the sole delimiter for which we need a special
311 : // behavior since they delimit the repetition pattern
312 :
313 1642 : size_t pattern_start = token_idx + 1;
314 1642 : size_t pattern_end = pattern_start;
315 1642 : auto parentheses_stack = 0;
316 22683 : for (size_t idx = pattern_start; idx < macro.size (); idx++)
317 : {
318 22683 : if (macro.at (idx)->get_id () == LEFT_PAREN)
319 : {
320 1320 : parentheses_stack++;
321 : }
322 21363 : else if (macro.at (idx)->get_id () == RIGHT_PAREN)
323 : {
324 2962 : if (parentheses_stack == 0)
325 : {
326 : pattern_end = idx;
327 : break;
328 : }
329 1320 : parentheses_stack--;
330 : }
331 : }
332 :
333 : // Unreachable case, but let's make sure we don't ever run into it
334 1642 : rust_assert (pattern_end != pattern_start);
335 :
336 1642 : std::unique_ptr<AST::Token> separator_token = nullptr;
337 1642 : if (pattern_end + 1 <= macro.size ())
338 : {
339 1642 : auto &post_pattern_token = macro.at (pattern_end + 1);
340 1642 : if (!is_rep_op (post_pattern_token))
341 212 : separator_token = post_pattern_token->clone_token ();
342 : }
343 :
344 : // Amount of tokens to skip
345 1642 : auto to_skip = 0;
346 : // Parentheses
347 1642 : to_skip += 2;
348 : // Repetition operator
349 1642 : to_skip += 1;
350 : // Separator
351 1642 : if (separator_token)
352 212 : to_skip += 1;
353 :
354 1642 : return {substitute_repetition (pattern_start, pattern_end,
355 : std::move (separator_token)),
356 1642 : pattern_end - pattern_start + to_skip + 1};
357 1642 : }
358 : }
359 :
360 : rust_unreachable ();
361 : }
362 :
363 : std::vector<std::unique_ptr<AST::Token>>
364 6529 : SubstituteCtx::substitute_tokens ()
365 : {
366 6529 : std::vector<std::unique_ptr<AST::Token>> replaced_tokens;
367 6529 : rust_debug ("expanding tokens");
368 :
369 134722 : for (size_t i = 0; i < macro.size ();)
370 : {
371 128193 : auto &tok = macro.at (i);
372 128193 : if (tok->get_id () == DOLLAR_SIGN)
373 : {
374 : // Aaaaah, if only we had C++17 :)
375 : // auto [expanded, tok_to_skip] = ...
376 13023 : auto p = substitute_token (i + 1);
377 13023 : auto expanded = std::move (p.first);
378 13023 : auto tok_to_skip = p.second;
379 :
380 13023 : if (!tok_to_skip)
381 : {
382 4 : replaced_tokens.emplace_back (tok->clone_token ());
383 4 : tok_to_skip++;
384 : }
385 :
386 13023 : i += tok_to_skip;
387 :
388 123974 : for (auto &token : expanded)
389 110951 : replaced_tokens.emplace_back (token->clone_token ());
390 13023 : }
391 : else
392 : {
393 115170 : replaced_tokens.emplace_back (tok->clone_token ());
394 115170 : i++;
395 : }
396 : }
397 :
398 6529 : return replaced_tokens;
399 : }
400 :
401 : } // namespace Rust
|