Branch data Line data Source code
1 : : // Copyright (C) 2020-2025 Free Software Foundation, Inc.
2 : :
3 : : // This file is part of GCC.
4 : :
5 : : // GCC is free software; you can redistribute it and/or modify it under
6 : : // the terms of the GNU General Public License as published by the Free
7 : : // Software Foundation; either version 3, or (at your option) any later
8 : : // version.
9 : :
10 : : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 : : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 : : // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 : : // for more details.
14 : :
15 : : // You should have received a copy of the GNU General Public License
16 : : // along with GCC; see the file COPYING3. If not see
17 : : // <http://www.gnu.org/licenses/>.
18 : :
19 : : #ifndef RUST_NAME_RESOLVER_2_0_H
20 : : #define RUST_NAME_RESOLVER_2_0_H
21 : :
22 : : #include "optional.h"
23 : : #include "rust-forever-stack.h"
24 : : #include "rust-hir-map.h"
25 : : #include "rust-rib.h"
26 : : #include "rust-stacked-contexts.h"
27 : :
28 : : namespace Rust {
29 : : namespace Resolver2_0 {
30 : :
31 : : // TODO: Add missing mappings and data structures
32 : :
33 : : /**
34 : : The data structures we need to develop need to fill in a few roles - like the
35 : : original name resolver, they need to be accessible at multiple points during the
36 : : pipeline to allow compiler passes such as macro expansion or typechecking to
37 : : benefit from them. Unlike the original name resolution, these data structures
38 : : need to be created by multiple compiler passes: Whereas the original name
39 : : resolution of gccrs tries to perform name resolution in a single pass, it fails
40 : : at properly handling more complex name resolution cases such as macro name
41 : : resolution, imports in general, and glob imports in particular. The goal of this
42 : : new name resolution algorithm is to split the name resolution in at least two
43 : : passes - `Early` name resolution, which takes care of macro name resolution and
44 : : import resolution, and `Late` name resolution - your typical name resolution,
45 : : for types, functions, variables...
46 : :
47 : : 1. `Early`
48 : :
49 : : The Early name resolution is tied in snuggly with macro expansion: macro
50 : : expansion cannot happen without some form of name resolution (pointing an
51 : : invocation to its definition) but may also *depend* on name resolution (a macro
52 : : generating another macro... or importing items... and funny other cases like
53 : : these). It needs to work in a fixed-point fashion alongside macro expansion:
54 : : While there are imports to resolve, or macros to expand, we need to keep going
55 : : and resolve them. This is achieved, among other things, by a top-level name
56 : : resolution pass in charge of collection use statements and macro definitions (as
57 : : well as Items, which will be useful for later passes of the name resolution).
58 : :
59 : : This top-level pass exists because Rust enables you to call a function
60 : : before having declared it (at a lexical level, i.e calling `f(15)` at line 3
61 : : while the `f` function is declared at line 1499).
62 : :
63 : : This Early pass needs to build the first part of our "resolution map", which
64 : : will then be used in multiple contexts:
65 : :
66 : : 1. The MacroExpander, in a read-only fashion: fetching macro definitions for
67 : : each invocation and performing the expansion.
68 : : 2. `Late`, which will write more data inside that resolution map, and use it
69 : : to perform its name resolution too.
70 : :
71 : : This is where the first challenge of this data structure lies: The existing
72 : : data structures and name resolution algorithm relies on the name resolution pass
73 : : happening just once. In typical name resolution fashion, when it sees a lexical
74 : : scope (a new module, a function's block, a block expression...), it "pushes" a
75 : : new "Scope" to a stack of these scopes, and "pops" it when exiting said lexical
76 : : scope. However, because we are splitting the name resolution into two passes, we
77 : : would like to avoid re-doing a bunch of work we've already done - which is why
78 : : this data structure needs to allow "re-entrancy", or to at least not keep as
79 : : much state as the existing one, and allow for viewing the same module multiple
80 : : times without throwing a fit.
81 : :
82 : : We will be implementing a "forever stack" of scopes, which allows the user the
83 : : pushing of new scopes onto the stack, but only simulates the popping of a scope:
84 : : When pushing new scopes, more space is allocated on our stack, and we keep
85 : : track of this scope as being the current one - however, when popping this scope,
86 : : we do not actually delete the memory associated with it: we simply mark the
87 : : previous scope (parent) as the current one.
88 : :
89 : : In the example below, each number indicates the "state" of our resolution map,
90 : : and the carret is used to point to the current lexical scope.
91 : :
92 : : ```rust
93 : : // []
94 : : //
95 : : fn main() { // [ `main` scope: {} ]
96 : : // ^
97 : : let a = 15; // [ `main` scope: { Decl(a) } ]
98 : : // ^
99 : : { _PUSH_ // [ `main` scope: { Decl(a) }, anonymous scope: {} ]
100 : : // ^
101 : : let a = 16; // [ `main` scope: { Decl(a) }, anonymous scope: { Decl(a) } ]
102 : : // ^
103 : : f(a); // [ `main` scope: { Decl(a) }, anonymous scope: { Decl(a) } ]
104 : : // ^
105 : : } _POP_ // [ `main` scope: { Decl(a) }, anonymous scope: { Decl(a) } ]
106 : : // ^
107 : : f(a); // [ `main` scope: { Decl(a) }, anonymous scope: { Decl(a) } ]
108 : : // ^
109 : : }
110 : : ```
111 : :
112 : : This allows us to revisit scopes previously visited in later phases of the name
113 : : resolution, and add more information if necessary.
114 : :
115 : : 2. `Late`
116 : :
117 : : `Late` name resolution possesses some unique challenges since Rust's name
118 : : resolution rules are extremely complex - variable shadowing, variable capture in
119 : : closures (but not inner functions!)... You can have a look at a fucked up
120 : : example here:
121 : :
122 : : https://rustc-dev-guide.rust-lang.org/name-resolution.html#scopes-and-ribs
123 : :
124 : : This requires us to think about what exactly to put in our `Scope`s and what to
125 : : do with our `Rib`s - and how it affects our data structures. For example, in the
126 : : above example, `rustc` demonstrates how multiple `Rib`s can be created inside of
127 : : a single lexical scope for variables, as the Rust programming language allows
128 : : shadowing.
129 : :
130 : : TODO: Mention macro hygiene and that it is the same
131 : : TODO: How does this affect our data structures?
132 : : TODO: Last challenge - reuse the same APIs to allow the typechecker to not
133 : : change?
134 : : TODO: Mention that ForeverStack is templated to make sure that behavior is
135 : : correct
136 : : */
137 : :
138 : : // FIXME: Documentation
139 : : class Usage
140 : : {
141 : : public:
142 : 154556 : explicit Usage (NodeId id) : id (id) {}
143 : :
144 : : // TODO: move to name-resolution-ctx.cc
145 : : // storing it as a key in a map
146 : 1819986 : bool operator< (const Usage other) const { return other.id < id; }
147 : :
148 : : NodeId id;
149 : : };
150 : :
151 : : // FIXME: Documentation
152 : : class Definition
153 : : {
154 : : public:
155 : 11959 : explicit Definition (NodeId id) : id (id) {}
156 : :
157 : : NodeId id;
158 : : };
159 : :
160 : 28914 : struct Binding
161 : : {
162 : : enum class Kind
163 : : {
164 : : Product,
165 : : Or,
166 : : } kind;
167 : :
168 : : std::unordered_set<Identifier> set;
169 : :
170 : 4141 : Binding (Binding::Kind kind) : kind (kind) {}
171 : : };
172 : :
173 : : /**
174 : : * Used to identify the source of a binding, and emit the correct error message.
175 : : */
176 : : enum class BindingSource
177 : : {
178 : : Match,
179 : : Let,
180 : : For,
181 : : /* Closure param or function param */
182 : : Param
183 : : };
184 : :
185 : 16421 : class BindingLayer
186 : : {
187 : : BindingSource source;
188 : : std::vector<Binding> bindings;
189 : :
190 : : bool bind_test (Identifier ident, Binding::Kind kind);
191 : :
192 : : public:
193 : : void push (Binding::Kind kind);
194 : :
195 : : BindingLayer (BindingSource source);
196 : :
197 : : /**
198 : : * Identifies if the identifier has been used in a product binding context.
199 : : * eg. `let (a, a) = test();`
200 : : */
201 : : bool is_and_bound (Identifier ident);
202 : :
203 : : /**
204 : : * Identifies if the identifier has been used in a or context.
205 : : * eg. `let (a, 1) | (a, 2) = test()`
206 : : */
207 : : bool is_or_bound (Identifier ident);
208 : :
209 : : void insert_ident (Identifier ident);
210 : :
211 : : void merge ();
212 : :
213 : : BindingSource get_source () const;
214 : : };
215 : :
216 : : // Now our resolver, which keeps track of all the `ForeverStack`s we could want
217 : : class NameResolutionContext
218 : : {
219 : : public:
220 : : NameResolutionContext ();
221 : :
222 : : /**
223 : : * Insert a new value in the current rib.
224 : : *
225 : : * @param name Name of the value to insert.
226 : : * @param id This value's ID, e.g the function definition's node ID.
227 : : * @param ns Namespace in which to insert the value.
228 : : */
229 : : tl::expected<NodeId, DuplicateNameError> insert (Identifier name, NodeId id,
230 : : Namespace ns);
231 : :
232 : : tl::expected<NodeId, DuplicateNameError> insert_variant (Identifier name,
233 : : NodeId id);
234 : :
235 : : tl::expected<NodeId, DuplicateNameError>
236 : : insert_shadowable (Identifier name, NodeId id, Namespace ns);
237 : :
238 : : tl::expected<NodeId, DuplicateNameError>
239 : : insert_globbed (Identifier name, NodeId id, Namespace ns);
240 : :
241 : : /**
242 : : * Run a lambda in a "scoped" context, meaning that a new `Rib` will be pushed
243 : : * before executing the lambda and then popped. This is useful for all kinds
244 : : * of scope in the language, such as a block expression or when entering a
245 : : * function. This variant of the function enters a new scope in *all*
246 : : * namespaces, while the second variant enters a scope in *one* namespace.
247 : : *
248 : : * @param rib_kind New `Rib` to create when entering this scope. A function
249 : : * `Rib`, or an item `Rib`... etc
250 : : * @param scope_id node ID of the scope we are entering, e.g the block's
251 : : * `NodeId`.
252 : : * @param lambda Function to run within that scope
253 : : * @param path Optional path of the scope. This is useful for scopes which
254 : : * affect path resolution, such as modules. Defaults to an empty
255 : : * option.
256 : : */
257 : : // FIXME: Do we want to handle something in particular for expected within the
258 : : // scoped lambda?
259 : : void scoped (Rib::Kind rib_kind, NodeId scope_id,
260 : : std::function<void (void)> lambda,
261 : : tl::optional<Identifier> path = {});
262 : : void scoped (Rib::Kind rib_kind, Namespace ns, NodeId scope_id,
263 : : std::function<void (void)> lambda,
264 : : tl::optional<Identifier> path = {});
265 : :
266 : : ForeverStack<Namespace::Values> values;
267 : : ForeverStack<Namespace::Types> types;
268 : : ForeverStack<Namespace::Macros> macros;
269 : : ForeverStack<Namespace::Labels> labels;
270 : :
271 : : Analysis::Mappings &mappings;
272 : : StackedContexts<BindingLayer> bindings;
273 : :
274 : : // TODO: Rename
275 : : // TODO: Use newtype pattern for Usage and Definition
276 : : void map_usage (Usage usage, Definition definition);
277 : :
278 : : tl::optional<NodeId> lookup (NodeId usage) const;
279 : :
280 : : template <typename S>
281 : : tl::optional<Rib::Definition>
282 : 10213 : resolve_path (const std::vector<S> &segments,
283 : : bool has_opening_scope_resolution,
284 : : std::vector<Error> &collect_errors, Namespace ns)
285 : : {
286 : 10213 : std::function<void (const S &, NodeId)> insert_segment_resolution
287 : 23526 : = [this] (const S &seg, NodeId id) {
288 : 4828 : auto seg_id = unwrap_segment_node_id (seg);
289 : 11545 : if (resolved_nodes.find (Usage (seg_id)) == resolved_nodes.end ())
290 : 9962 : map_usage (Usage (seg_id), Definition (id));
291 : : };
292 : 10213 : switch (ns)
293 : : {
294 : 2309 : case Namespace::Values:
295 : 2309 : return values.resolve_path (segments, has_opening_scope_resolution,
296 : 2309 : insert_segment_resolution, collect_errors);
297 : 7570 : case Namespace::Types:
298 : 7570 : return types.resolve_path (segments, has_opening_scope_resolution,
299 : 7570 : insert_segment_resolution, collect_errors);
300 : 334 : case Namespace::Macros:
301 : 334 : return macros.resolve_path (segments, has_opening_scope_resolution,
302 : 334 : insert_segment_resolution, collect_errors);
303 : 0 : case Namespace::Labels:
304 : 0 : return labels.resolve_path (segments, has_opening_scope_resolution,
305 : 0 : insert_segment_resolution, collect_errors);
306 : 0 : default:
307 : 0 : rust_unreachable ();
308 : : }
309 : 10213 : }
310 : :
311 : : template <typename S, typename... Args>
312 : : tl::optional<Rib::Definition>
313 : 9736 : resolve_path (const std::vector<S> &segments,
314 : : bool has_opening_scope_resolution,
315 : : tl::optional<std::vector<Error> &> collect_errors,
316 : : Namespace ns_first, Args... ns_args)
317 : : {
318 : 9736 : std::initializer_list<Namespace> namespaces = {ns_first, ns_args...};
319 : :
320 : 19949 : for (auto ns : namespaces)
321 : : {
322 : 10213 : std::vector<Error> collect_errors_inner;
323 : 10213 : if (auto ret = resolve_path (segments, has_opening_scope_resolution,
324 : : collect_errors_inner, ns))
325 : : return ret;
326 : 1468 : if (!collect_errors_inner.empty ())
327 : : {
328 : 14 : if (collect_errors.has_value ())
329 : : {
330 : 6 : std::move (collect_errors_inner.begin (),
331 : : collect_errors_inner.end (),
332 : : std::back_inserter (collect_errors.value ()));
333 : : }
334 : : else
335 : : {
336 : 16 : for (auto &e : collect_errors_inner)
337 : 8 : e.emit ();
338 : : }
339 : 14 : return tl::nullopt;
340 : : }
341 : : }
342 : :
343 : 977 : return tl::nullopt;
344 : : }
345 : :
346 : : template <typename... Args>
347 : : tl::optional<Rib::Definition>
348 : 774 : resolve_path (const AST::SimplePath &path,
349 : : tl::optional<std::vector<Error> &> collect_errors,
350 : : Namespace ns_first, Args... ns_args)
351 : : {
352 : : return resolve_path (path.get_segments (),
353 : 872 : path.has_opening_scope_resolution (), collect_errors,
354 : 872 : ns_first, ns_args...);
355 : : }
356 : :
357 : : template <typename... Args>
358 : : tl::optional<Rib::Definition>
359 : 2222 : resolve_path (const AST::PathInExpression &path,
360 : : tl::optional<std::vector<Error> &> collect_errors,
361 : : Namespace ns_first, Args... ns_args)
362 : : {
363 : 2222 : return resolve_path (path.get_segments (), path.opening_scope_resolution (),
364 : 2222 : collect_errors, ns_first, ns_args...);
365 : : }
366 : :
367 : : template <typename... Args>
368 : : tl::optional<Rib::Definition>
369 : 6642 : resolve_path (const AST::TypePath &path,
370 : : tl::optional<std::vector<Error> &> collect_errors,
371 : : Namespace ns_first, Args... ns_args)
372 : : {
373 : : return resolve_path (path.get_segments (),
374 : 6642 : path.has_opening_scope_resolution_op (),
375 : 6642 : collect_errors, ns_first, ns_args...);
376 : : }
377 : :
378 : : template <typename P, typename... Args>
379 : 8962 : tl::optional<Rib::Definition> resolve_path (const P &path, Namespace ns_first,
380 : : Args... ns_args)
381 : : {
382 : 8962 : return resolve_path (path, tl::nullopt, ns_first, ns_args...);
383 : : }
384 : :
385 : : template <typename P, typename... Args>
386 : : tl::optional<Rib::Definition>
387 : : resolve_path (const P &path_segments, bool has_opening_scope_resolution,
388 : : Namespace ns_first, Args... ns_args)
389 : : {
390 : : return resolve_path (path_segments, has_opening_scope_resolution,
391 : : tl::nullopt, ns_first, ns_args...);
392 : : }
393 : :
394 : : private:
395 : : /* Map of "usage" nodes which have been resolved to a "definition" node */
396 : : std::map<Usage, Definition> resolved_nodes;
397 : : };
398 : :
399 : : } // namespace Resolver2_0
400 : : } // namespace Rust
401 : :
402 : : #endif // ! RUST_NAME_RESOLVER_2_0_H
|