Line data Source code
1 : // This file is part of GCC.
2 :
3 : // GCC is free software; you can redistribute it and/or modify it under
4 : // the terms of the GNU General Public License as published by the Free
5 : // Software Foundation; either version 3, or (at your option) any later
6 : // version.
7 :
8 : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 : // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 : // for more details.
12 :
13 : // You should have received a copy of the GNU General Public License
14 : // along with GCC; see the file COPYING3. If not see
15 : // <http://www.gnu.org/licenses/>.
16 :
17 : #include "rust-lex.h"
18 : #include "rust-token-converter.h"
19 : #include "bi-map.h"
20 : #include "line-map.h"
21 : #include "rust-system.h"
22 :
23 : namespace Rust {
24 :
25 : static const BiMap<PrimitiveCoreType, std::string> suffixes
26 : = {{{CORETYPE_F32, "f32"},
27 : {CORETYPE_F64, "f64"},
28 : {CORETYPE_U8, "u8"},
29 : {CORETYPE_U16, "u16"},
30 : {CORETYPE_U32, "u32"},
31 : {CORETYPE_U64, "u64"},
32 : {CORETYPE_U128, "u128"},
33 : {CORETYPE_I8, "i8"},
34 : {CORETYPE_I16, "i16"},
35 : {CORETYPE_I32, "i32"},
36 : {CORETYPE_I64, "i64"},
37 : {CORETYPE_I128, "i128"},
38 : {CORETYPE_ISIZE, "isize"},
39 : {CORETYPE_USIZE, "usize"}}};
40 :
41 : static void
42 0 : pop_group (std::vector<ProcMacro::TokenStream> &streams,
43 : ProcMacro::Delimiter delim)
44 : {
45 0 : auto g = ProcMacro::Group::make_group (streams.back (), delim);
46 0 : streams.pop_back ();
47 0 : auto tt = ProcMacro::TokenTree::make_tokentree (g);
48 :
49 0 : streams.back ().push (tt);
50 0 : }
51 :
52 : static ProcMacro::Span
53 0 : convert (location_t location)
54 : {
55 0 : return ProcMacro::Span::make_span (location, 0);
56 : }
57 :
58 : static location_t
59 0 : convert (ProcMacro::Span span)
60 : {
61 0 : return span.start;
62 : }
63 :
64 : static ProcMacro::Literal
65 0 : handle_suffix (const const_TokenPtr &token, ProcMacro::LitKind kind)
66 : {
67 0 : auto str = token->as_string ();
68 0 : auto lookup = suffixes.lookup (token->get_type_hint ());
69 0 : auto suffix = lookup.value_or ("");
70 0 : return ProcMacro::Literal::make_literal (kind, convert (token->get_locus ()),
71 0 : str, suffix);
72 0 : }
73 :
74 : ProcMacro::Literal
75 0 : convert_literal (const_TokenPtr lit)
76 : {
77 0 : auto loc = convert (lit->get_locus ());
78 0 : switch (lit->get_id ())
79 : {
80 0 : case FLOAT_LITERAL:
81 0 : return handle_suffix (lit, ProcMacro::LitKind::make_float ());
82 0 : case INT_LITERAL:
83 0 : return handle_suffix (lit, ProcMacro::LitKind::make_integer ());
84 0 : case CHAR_LITERAL:
85 0 : return ProcMacro::Literal::make_literal (ProcMacro::LitKind::make_char (),
86 0 : loc, lit->as_string ());
87 0 : case STRING_LITERAL:
88 0 : return ProcMacro::Literal::make_literal (ProcMacro::LitKind::make_str (),
89 0 : loc, lit->as_string ());
90 0 : case BYTE_CHAR_LITERAL:
91 0 : return ProcMacro::Literal::make_literal (ProcMacro::LitKind::make_byte (),
92 0 : loc, lit->as_string ());
93 0 : case BYTE_STRING_LITERAL:
94 0 : return ProcMacro::Literal::make_literal (
95 0 : ProcMacro::LitKind::make_byte_str (), loc, lit->as_string ());
96 0 : default:
97 0 : rust_unreachable ();
98 : }
99 : }
100 :
101 : ProcMacro::TokenStream
102 0 : convert (const std::vector<const_TokenPtr> &tokens)
103 : {
104 0 : std::vector<ProcMacro::TokenStream> trees;
105 0 : trees.reserve (tokens.size ());
106 :
107 0 : trees.emplace_back (ProcMacro::TokenStream::make_tokenstream ());
108 0 : for (auto &token : tokens)
109 : {
110 0 : auto loc = convert (token->get_locus ());
111 0 : switch (token->get_id ())
112 : {
113 : // Literals
114 0 : case FLOAT_LITERAL:
115 0 : case INT_LITERAL:
116 0 : case CHAR_LITERAL:
117 0 : case STRING_LITERAL:
118 0 : case BYTE_CHAR_LITERAL:
119 0 : case BYTE_STRING_LITERAL:
120 0 : trees.back ().push (
121 0 : ProcMacro::TokenTree::make_tokentree (convert_literal (token)));
122 0 : break;
123 : // Ident
124 0 : case IDENTIFIER:
125 0 : case ABSTRACT:
126 0 : case AS:
127 0 : case ASYNC:
128 0 : case AUTO:
129 0 : case BECOME:
130 0 : case BOX:
131 0 : case BREAK:
132 0 : case CONST:
133 0 : case CONTINUE:
134 0 : case CRATE:
135 0 : case DO:
136 0 : case DYN:
137 0 : case ELSE:
138 0 : case ENUM_KW:
139 0 : case EXTERN_KW:
140 0 : case FINAL_KW:
141 0 : case FN_KW:
142 0 : case FOR:
143 0 : case IF:
144 0 : case IMPL:
145 0 : case IN:
146 0 : case LET:
147 0 : case LOOP:
148 0 : case MACRO:
149 0 : case MATCH_KW:
150 0 : case MOD:
151 0 : case MOVE:
152 0 : case MUT:
153 0 : case OVERRIDE_KW:
154 0 : case PRIV:
155 0 : case PUB:
156 0 : case REF:
157 0 : case RETURN_KW:
158 0 : case SELF_ALIAS:
159 0 : case SELF:
160 0 : case STATIC_KW:
161 0 : case STRUCT_KW:
162 0 : case SUPER:
163 0 : case TRAIT:
164 0 : case TRY:
165 0 : case TYPE:
166 0 : case TYPEOF:
167 0 : case UNSAFE:
168 0 : case UNSIZED:
169 0 : case USE:
170 0 : case VIRTUAL:
171 0 : case WHERE:
172 0 : case WHILE:
173 0 : case YIELD:
174 : // Underscore is not a Punct, considered as an Ident
175 0 : case UNDERSCORE:
176 : // True and false are idents, not literals
177 : // (https://doc.rust-lang.org/proc_macro/struct.Literal.html)
178 0 : case FALSE_LITERAL:
179 0 : case TRUE_LITERAL:
180 0 : trees.back ().push (ProcMacro::TokenTree::make_tokentree (
181 0 : ProcMacro::Ident::make_ident (token->as_string (), loc)));
182 0 : break;
183 : // Joint punct
184 0 : case OR:
185 0 : case PIPE_EQ:
186 0 : case CARET_EQ:
187 0 : case RIGHT_SHIFT_EQ:
188 0 : case RIGHT_SHIFT:
189 0 : case GREATER_OR_EQUAL:
190 0 : case MATCH_ARROW:
191 0 : case LESS_OR_EQUAL:
192 0 : case LEFT_SHIFT_EQ:
193 0 : case LEFT_SHIFT:
194 0 : case DIV_EQ:
195 0 : case ELLIPSIS:
196 0 : case DOT_DOT_EQ:
197 0 : case DOT_DOT:
198 0 : case RETURN_TYPE:
199 0 : case MINUS_EQ:
200 0 : case PLUS_EQ:
201 0 : case ASTERISK_EQ:
202 0 : case LOGICAL_AND:
203 0 : case AMP_EQ:
204 0 : case PERCENT_EQ:
205 0 : case SCOPE_RESOLUTION:
206 0 : case NOT_EQUAL:
207 0 : case EQUAL_EQUAL:
208 0 : {
209 0 : auto str = token->as_string ();
210 0 : auto it = str.cbegin ();
211 0 : for (; it != str.cend () - 1; it++)
212 0 : trees.back ().push (ProcMacro::TokenTree::make_tokentree (
213 0 : ProcMacro::Punct::make_punct (*it, loc, ProcMacro::JOINT)));
214 0 : trees.back ().push (ProcMacro::TokenTree::make_tokentree (
215 0 : ProcMacro::Punct::make_punct (*it, loc, ProcMacro::ALONE)));
216 0 : }
217 0 : break;
218 : // Alone punct tokens
219 0 : case EQUAL:
220 0 : case RIGHT_ANGLE:
221 0 : case LEFT_ANGLE:
222 0 : case EXCLAM:
223 0 : case TILDE:
224 0 : case PLUS:
225 0 : case MINUS:
226 0 : case ASTERISK:
227 0 : case DIV:
228 0 : case PERCENT:
229 0 : case CARET:
230 0 : case AMP:
231 0 : case PIPE:
232 0 : case PATTERN_BIND:
233 0 : case DOT:
234 0 : case COMMA:
235 0 : case SEMICOLON:
236 0 : case COLON:
237 0 : case HASH:
238 0 : case DOLLAR_SIGN:
239 0 : case QUESTION_MARK:
240 0 : case SINGLE_QUOTE:
241 0 : trees.back ().push (ProcMacro::TokenTree::make_tokentree (
242 0 : ProcMacro::Punct::make_punct (token->as_string ()[0], loc,
243 : ProcMacro::ALONE)));
244 0 : break;
245 0 : case RIGHT_PAREN:
246 0 : pop_group (trees, ProcMacro::PARENTHESIS);
247 0 : break;
248 0 : case RIGHT_CURLY:
249 0 : pop_group (trees, ProcMacro::BRACE);
250 0 : break;
251 0 : case RIGHT_SQUARE:
252 0 : pop_group (trees, ProcMacro::BRACKET);
253 0 : break;
254 0 : case LEFT_SQUARE:
255 0 : case LEFT_CURLY:
256 0 : case LEFT_PAREN:
257 0 : trees.push_back (ProcMacro::TokenStream::make_tokenstream ());
258 0 : break;
259 0 : default:
260 0 : rust_unreachable ();
261 : }
262 : }
263 0 : return trees.back ();
264 0 : }
265 :
266 : static void from_tokenstream (const ProcMacro::TokenStream &ts,
267 : std::vector<const_TokenPtr> &result);
268 :
269 : /**
270 : * Append the token corresponding to a given Ident to a vector.
271 : *
272 : * @param literal Reference to the Ident to convert.
273 : * @param result Reference to the output vector.
274 : */
275 : static void
276 0 : from_ident (const ProcMacro::Ident &ident, std::vector<const_TokenPtr> &result)
277 : {
278 0 : std::string value (ident.value.to_string ());
279 0 : if (ident.is_raw)
280 0 : value = "r#" + value;
281 :
282 0 : Lexer lexer (value, nullptr);
283 0 : auto token = lexer.build_token ();
284 0 : token->set_locus (convert (ident.span));
285 0 : result.push_back (token);
286 0 : }
287 :
288 : /**
289 : * Append the token corresponding to a given Literal to a vector.
290 : *
291 : * @param literal Reference to the Literal to convert.
292 : * @param result Reference to the vector tokens should be appended to.
293 : */
294 : static void
295 0 : from_literal (const ProcMacro::Literal &literal,
296 : std::vector<const_TokenPtr> &result)
297 : {
298 0 : auto lookup = suffixes.lookup (literal.suffix.to_string ());
299 0 : auto loc = convert (literal.span);
300 0 : auto type_hint = lookup.value_or (CORETYPE_UNKNOWN);
301 : // FIXME: Add spans instead of empty locations
302 0 : switch (literal.kind.tag)
303 : {
304 0 : case ProcMacro::BYTE:
305 0 : result.push_back (
306 0 : Token::make_byte_char (loc, literal.text.to_string ()[0]));
307 0 : break;
308 0 : case ProcMacro::CHAR:
309 0 : result.push_back (Token::make_char (loc, literal.text.to_string ()[0]));
310 0 : break;
311 0 : case ProcMacro::INTEGER:
312 0 : {
313 0 : std::string text = literal.text.to_string ();
314 0 : std::string suffix_str = literal.suffix.to_string ();
315 0 : int suffix_start = text.length ();
316 :
317 0 : if (!suffix_str.empty ())
318 : {
319 0 : bool ends_with_suffix
320 0 : = text.size () >= suffix_str.size ()
321 0 : && text.compare (text.size () - suffix_str.size (),
322 : suffix_str.size (), suffix_str)
323 0 : == 0;
324 :
325 0 : if (!ends_with_suffix)
326 0 : text += suffix_str;
327 : else
328 0 : suffix_start = text.length () - suffix_str.length ();
329 : }
330 0 : auto base = IntegerLiteralBase::Decimal;
331 0 : if (suffix_start >= 2 && text[0] == '0')
332 : {
333 0 : if (text[1] == 'x' || text[1] == 'X')
334 : base = Rust::IntegerLiteralBase::Hex;
335 : else if (text[1] == 'o' || text[1] == 'O')
336 : base = Rust::IntegerLiteralBase::Octal;
337 : else if (text[1] == 'b' || text[1] == 'B')
338 : base = Rust::IntegerLiteralBase::Binary;
339 : }
340 :
341 0 : result.push_back (
342 0 : Token::make_int (loc, text, suffix_start, base, type_hint));
343 0 : break;
344 0 : }
345 0 : case ProcMacro::FLOAT:
346 0 : {
347 0 : std::string text = literal.text.to_string ();
348 0 : std::string suffix_str = literal.suffix.to_string ();
349 0 : auto suffix_start = text.length ();
350 0 : if (!suffix_str.empty ())
351 : {
352 0 : bool ends_with_suffix
353 0 : = text.size () >= suffix_str.size ()
354 0 : && text.compare (text.size () - suffix_str.size (),
355 : suffix_str.size (), suffix_str)
356 0 : == 0;
357 :
358 0 : if (!ends_with_suffix)
359 : {
360 0 : text += suffix_str;
361 : }
362 : else
363 : {
364 : suffix_start = text.length () - suffix_str.length ();
365 : }
366 : }
367 :
368 0 : result.push_back (Token::make_float (loc, text, suffix_start, type_hint,
369 : IntegerLiteralBase::Decimal));
370 0 : break;
371 0 : }
372 0 : case ProcMacro::STR:
373 0 : result.push_back (Token::make_string (loc, literal.text.to_string ()));
374 0 : break;
375 0 : case ProcMacro::BYTE_STR:
376 0 : result.push_back (
377 0 : Token::make_byte_string (loc, literal.text.to_string ()));
378 0 : break;
379 : // FIXME: Handle raw string
380 0 : case ProcMacro::STR_RAW:
381 0 : case ProcMacro::BYTE_STR_RAW:
382 0 : default:
383 0 : rust_unreachable ();
384 : }
385 0 : }
386 :
387 : /**
388 : * Accumulate through successive calls multiple Punct until one is tagged
389 : * "Alone", then append the formed token to a given result vector.
390 : *
391 : * @param punct Reference to the Punct to convert.
392 : * @param acc Reference to an accumulator for joined Punct.
393 : * @param result Reference to the output token vector.
394 : */
395 : static void
396 0 : from_punct (const ProcMacro::Punct &punct, std::vector<std::uint32_t> &acc,
397 : std::vector<const_TokenPtr> &result)
398 : {
399 0 : acc.push_back (punct.ch);
400 0 : if (ProcMacro::ALONE == punct.spacing) /* Last punct of a chain */
401 : {
402 : // TODO: UTF-8 string
403 0 : std::string whole (acc.begin (), acc.end ());
404 0 : auto lexer = Lexer (whole, nullptr);
405 0 : auto token = lexer.build_token ();
406 0 : token->set_locus (convert (punct.span));
407 0 : result.push_back (token);
408 0 : acc.clear ();
409 0 : }
410 0 : }
411 :
412 : /**
413 : * Iterate over a Group and append all inner tokens to a vector enclosed by
414 : * its delimiters.
415 : *
416 : * @param g Reference to the Group to convert.
417 : * @param result Reference to the vector tokens should be appended to.
418 : */
419 : static void
420 0 : from_group (const ProcMacro::Group &g, std::vector<const_TokenPtr> &result)
421 : {
422 0 : auto loc = convert (g.span);
423 0 : switch (g.delimiter)
424 : {
425 0 : case ProcMacro::PARENTHESIS:
426 0 : result.push_back (Token::make (LEFT_PAREN, loc));
427 0 : from_tokenstream (g.stream, result);
428 0 : result.push_back (Token::make (RIGHT_PAREN, loc));
429 0 : break;
430 0 : case ProcMacro::BRACE:
431 0 : result.push_back (Token::make (LEFT_CURLY, loc));
432 0 : from_tokenstream (g.stream, result);
433 0 : result.push_back (Token::make (RIGHT_CURLY, loc));
434 0 : break;
435 0 : case ProcMacro::BRACKET:
436 0 : result.push_back (Token::make (LEFT_SQUARE, loc));
437 0 : from_tokenstream (g.stream, result);
438 0 : result.push_back (Token::make (RIGHT_SQUARE, loc));
439 0 : break;
440 0 : case ProcMacro::NONE:
441 0 : from_tokenstream (g.stream, result);
442 0 : break;
443 0 : default:
444 0 : rust_unreachable ();
445 : }
446 0 : }
447 :
448 : /**
449 : * Dispatch TokenTree's conversion to its inner type depending on its tag.
450 : *
451 : * @param tt Reference to the TokenTree.
452 : * @param punct_accumulator Reference to an accumulator for joined Punct.
453 : * @param result Reference to the vector tokens should be appended to.
454 : */
455 : static void
456 0 : from_tokentree (const ProcMacro::TokenTree &tt,
457 : std::vector<std::uint32_t> &punct_accumulator,
458 : std::vector<const_TokenPtr> &result)
459 : {
460 0 : switch (tt.tag)
461 : {
462 0 : case ProcMacro::GROUP:
463 0 : from_group (tt.payload.group, result);
464 0 : break;
465 0 : case ProcMacro::IDENT:
466 0 : from_ident (tt.payload.ident, result);
467 0 : break;
468 0 : case ProcMacro::PUNCT:
469 0 : from_punct (tt.payload.punct, punct_accumulator, result);
470 0 : break;
471 0 : case ProcMacro::LITERAL:
472 0 : from_literal (tt.payload.literal, result);
473 0 : break;
474 0 : default:
475 0 : rust_unreachable ();
476 : }
477 0 : }
478 :
479 : /**
480 : * Iterate over a TokenStream and append all inner tokens to a vector.
481 : *
482 : * @param ts Reference to the TokenStream.
483 : * @param result Reference to the vector tokens should be appended to.
484 : */
485 : static void
486 0 : from_tokenstream (const ProcMacro::TokenStream &ts,
487 : std::vector<const_TokenPtr> &result)
488 : {
489 0 : std::vector<std::uint32_t> punct_accumulator;
490 0 : for (std::uint64_t i = 0; i < ts.size; i++)
491 : {
492 0 : from_tokentree (ts.data[i], punct_accumulator, result);
493 : }
494 0 : }
495 :
496 : std::vector<const_TokenPtr>
497 0 : convert (const ProcMacro::TokenStream &ts)
498 : {
499 0 : std::vector<const_TokenPtr> result;
500 0 : from_tokenstream (ts, result);
501 0 : return result;
502 : }
503 :
504 : } // namespace Rust
|