Line data Source code
1 : // Copyright (C) 2020-2026 Free Software Foundation, Inc.
2 :
3 : // This file is part of GCC.
4 :
5 : // GCC is free software; you can redistribute it and/or modify it under
6 : // the terms of the GNU General Public License as published by the Free
7 : // Software Foundation; either version 3, or (at your option) any later
8 : // version.
9 :
10 : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 : // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 : // for more details.
14 :
15 : // You should have received a copy of the GNU General Public License
16 : // along with GCC; see the file COPYING3. If not see
17 : // <http://www.gnu.org/licenses/>.
18 :
19 : #ifndef RUST_LEX_H
20 : #define RUST_LEX_H
21 :
22 : #include "rust-linemap.h"
23 : #include "rust-buffered-queue.h"
24 : #include "rust-token.h"
25 : #include "optional.h"
26 : #include "selftest.h"
27 : #include "rust-input-source.h"
28 :
29 : namespace Rust {
30 : // Simple wrapper for FILE* that simplifies destruction.
31 : struct RAIIFile
32 : {
33 : private:
34 : FILE *file;
35 : const char *filename;
36 :
37 14232 : void close ()
38 : {
39 14232 : if (file != nullptr && file != stdin)
40 4736 : fclose (file);
41 14232 : }
42 :
43 4740 : static bool allowed_filetype (const struct stat &statbuf)
44 : {
45 : // The file could be either
46 : // - a regular file
47 : // - a char device (/dev/null...)
48 4740 : return S_ISREG (statbuf.st_mode) || S_ISCHR (statbuf.st_mode);
49 : }
50 :
51 : public:
52 4753 : RAIIFile (const char *filename) : filename (filename)
53 : {
54 4753 : if (strcmp (filename, "-") == 0)
55 : {
56 0 : file = stdin;
57 : }
58 : else
59 : {
60 4753 : struct stat statbuf;
61 4753 : if (!(file = fopen (filename, "r")))
62 : {
63 13 : return;
64 : }
65 :
66 4740 : if (-1 == fstat (fileno (file), &statbuf)
67 4740 : || !allowed_filetype (statbuf))
68 : {
69 2 : fclose (file);
70 2 : file = nullptr;
71 2 : errno = EISDIR;
72 : }
73 : }
74 : }
75 :
76 : /**
77 : * Create a RAIIFile from an existing instance of FILE*
78 : */
79 103 : RAIIFile (FILE *raw, const char *filename = nullptr)
80 103 : : file (raw), filename (filename)
81 : {}
82 :
83 : RAIIFile (const RAIIFile &other) = delete;
84 : RAIIFile &operator= (const RAIIFile &other) = delete;
85 :
86 : // have to specify setting file to nullptr, otherwise unintended fclose occurs
87 9380 : RAIIFile (RAIIFile &&other) : file (other.file), filename (other.filename)
88 : {
89 9380 : other.file = nullptr;
90 : }
91 :
92 : RAIIFile &operator= (RAIIFile &&other)
93 : {
94 : close ();
95 : file = other.file;
96 : filename = other.filename;
97 : other.file = nullptr;
98 :
99 : return *this;
100 : }
101 :
102 103 : static RAIIFile create_error () { return RAIIFile (nullptr, nullptr); }
103 :
104 14232 : ~RAIIFile () { close (); }
105 :
106 4742 : FILE *get_raw () { return file; }
107 840 : const char *get_filename () { return filename; }
108 :
109 4701 : bool ok () const { return file; }
110 : };
111 :
112 : class Lexer
113 : {
114 : private:
115 : // Request new Location for current column in line_table
116 : location_t get_current_location ();
117 :
118 : // Skips the current input character.
119 : void skip_input ();
120 : // Advances current input character to n + 1 characters ahead of current
121 : // position.
122 : void skip_input (int n);
123 :
124 : // Peeks the current character.
125 : Codepoint peek_input ();
126 : // Returns character n characters ahead of current position.
127 : Codepoint peek_input (int n);
128 :
129 : // Classifies keyword (i.e. gets id for keyword).
130 : TokenId classify_keyword (const std::string &str);
131 :
132 : std::tuple<std::string, int, bool> parse_in_decimal ();
133 : std::pair<std::string, int> parse_in_exponent_part ();
134 : std::pair<PrimitiveCoreType, int> parse_in_type_suffix ();
135 : std::tuple<char, int, bool> parse_escape (char opening_char);
136 : std::tuple<Codepoint, int, bool> parse_utf8_escape ();
137 : int parse_partial_string_continue ();
138 : std::pair<long, int> parse_partial_hex_escape ();
139 : std::pair<Codepoint, int> parse_partial_unicode_escape ();
140 :
141 : void skip_broken_string_input (Codepoint current_char);
142 :
143 : TokenPtr parse_byte_char (location_t loc);
144 : TokenPtr parse_byte_string (location_t loc);
145 : TokenPtr parse_raw_byte_string (location_t loc);
146 : TokenPtr parse_raw_identifier (location_t loc);
147 : TokenPtr parse_string (location_t loc);
148 : TokenPtr maybe_parse_raw_string (location_t loc);
149 : TokenPtr parse_raw_string (location_t loc, int initial_hash_count);
150 : TokenPtr parse_non_decimal_int_literals (location_t loc);
151 : TokenPtr parse_decimal_int_or_float (location_t loc);
152 : TokenPtr parse_char_or_lifetime (location_t loc);
153 : TokenPtr parse_identifier_or_keyword (location_t loc);
154 :
155 : template <typename IsDigitFunc>
156 : TokenPtr parse_non_decimal_int_literal (location_t loc,
157 : IsDigitFunc is_digit_func,
158 : std::string existent_str, int base);
159 :
160 : public:
161 : // Construct lexer with input file and filename provided
162 : Lexer (const char *filename, RAIIFile input, Linemap *linemap,
163 : tl::optional<std::ofstream &> dump_lex_opt = tl::nullopt);
164 :
165 : // Lex the contents of a string instead of a file
166 : Lexer (const std::string &input, Linemap *linemap);
167 :
168 : // dtor
169 : ~Lexer ();
170 :
171 : // don't allow copy semantics (for now, at least)
172 : Lexer (const Lexer &other) = delete;
173 : Lexer &operator= (const Lexer &other) = delete;
174 :
175 : // enable move semantics
176 : Lexer (Lexer &&other) = default;
177 : Lexer &operator= (Lexer &&other) = default;
178 :
179 : bool input_source_is_valid_utf8 ();
180 :
181 : // Returns token n tokens ahead of current position.
182 235322 : const_TokenPtr peek_token (int n) { return token_queue.peek (n); }
183 : // Peeks the current token.
184 2783854 : const_TokenPtr peek_token () { return peek_token (0); }
185 :
186 : // Builds a token from the input queue.
187 : TokenPtr build_token ();
188 :
189 : // Advances current token to n + 1 tokens ahead of current position.
190 : void skip_token (int n);
191 : // Skips the current token.
192 701577 : void skip_token () { skip_token (0); }
193 :
194 : // Dumps and advances by n + 1 tokens.
195 : void dump_and_skip (int n);
196 :
197 : // Replaces the current token with a specified token.
198 : void replace_current_token (TokenPtr replacement);
199 : // FIXME: don't use anymore
200 :
201 : /* Splits the current token into two. Intended for use with nested generics
202 : * closes (i.e. T<U<X>> where >> is wrongly lexed as one token). Note that
203 : * this will only work with "simple" tokens like punctuation. */
204 : void split_current_token (TokenId new_left, TokenId new_right);
205 :
206 : void split_current_token (std::vector<TokenPtr> new_tokens);
207 :
208 : Linemap *get_line_map () { return line_map; }
209 840 : std::string get_filename () { return std::string (input.get_filename ()); }
210 :
211 : private:
212 : void start_line (int current_line, int current_column);
213 :
214 : // File for use as input.
215 : RAIIFile input;
216 : // TODO is this actually required? could just have file storage in InputSource
217 :
218 : // Current line number.
219 : int current_line;
220 : // Current column number.
221 : int current_column;
222 : // Current character.
223 : Codepoint current_char;
224 : // Line map.
225 : Linemap *line_map;
226 :
227 : /* Max column number that can be quickly allocated - higher may require
228 : * allocating new linemap */
229 : static const int max_column_hint = 80;
230 :
231 : tl::optional<std::ofstream &> dump_lex_out;
232 :
233 : // The input source for the lexer.
234 : // InputSource input_source;
235 : // Input file queue.
236 : std::unique_ptr<InputSource> raw_input_source;
237 : buffered_queue<Codepoint, std::reference_wrapper<InputSource>> input_queue;
238 :
239 : // Token source wrapper thing.
240 : struct TokenSource
241 : {
242 : // The lexer object that will use this TokenSource.
243 : Lexer *lexer;
244 :
245 : // Create a new TokenSource with given lexer.
246 4690 : TokenSource (Lexer *parLexer) : lexer (parLexer) {}
247 :
248 : // Used to mimic std::reference_wrapper that is used for InputSource.
249 : TokenSource &get () { return *this; }
250 :
251 : // Overload operator () to build token in lexer.
252 733690 : TokenPtr next () { return lexer->build_token (); }
253 : };
254 :
255 : // The token source for the lexer.
256 : // TokenSource token_source;
257 : // Token stream queue.
258 : buffered_queue<std::shared_ptr<Token>, TokenSource> token_queue;
259 : };
260 :
261 : } // namespace Rust
262 :
263 : #if CHECKING_P
264 :
265 : namespace selftest {
266 : void rust_input_source_test ();
267 :
268 : } // namespace selftest
269 :
270 : #endif // CHECKING_P
271 :
272 : #endif
|