Line data Source code
1 : /* Find near-matches for strings and identifiers.
2 : Copyright (C) 2015-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #ifndef GCC_SPELLCHECK_H
21 : #define GCC_SPELLCHECK_H
22 :
23 : typedef unsigned int edit_distance_t;
24 : const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
25 :
26 : /* spellcheck.cc */
27 : extern edit_distance_t
28 : get_edit_distance (const char *s, int len_s,
29 : const char *t, int len_t);
30 :
31 : extern edit_distance_t
32 : get_edit_distance (const char *s, const char *t);
33 :
34 : extern const char *
35 : find_closest_string (const char *target,
36 : const auto_vec<const char *> *candidates);
37 :
38 : /* A traits class for describing a string-like type usable by
39 : class best_match.
40 : Specializations should provide the implementations of the following:
41 :
42 : static size_t get_length (StringlikeType);
43 : static const char *get_string (StringlikeType);
44 :
45 : get_string should return a non-NULL ptr, which does not need to be
46 : 0-terminated. */
47 :
48 : template <typename StringlikeType>
49 : struct edit_distance_traits {};
50 :
51 : /* Specialization of edit_distance_traits for C-style strings. */
52 :
53 : template <>
54 : struct edit_distance_traits<const char *>
55 : {
56 6689077 : static size_t get_length (const char *str)
57 : {
58 6689077 : gcc_assert (str);
59 6689077 : return strlen (str);
60 : }
61 :
62 3450364 : static const char *get_string (const char *str)
63 : {
64 3449166 : gcc_assert (str);
65 3450364 : return str;
66 : }
67 : };
68 :
69 : extern edit_distance_t get_edit_distance_cutoff (size_t goal_len,
70 : size_t candidate_len);
71 :
72 : /* A type for use when determining the best match against a string,
73 : expressed as a template so that we can match against various
74 : string-like types (const char *, frontend identifiers, and preprocessor
75 : macros).
76 :
77 : This type accumulates the best possible match against GoalType for
78 : a sequence of elements of CandidateType, whilst minimizing the
79 : number of calls to get_edit_distance and to
80 : edit_distance_traits<T>::get_length. */
81 :
82 : template <typename GoalType, typename CandidateType>
83 : class best_match
84 : {
85 : public:
86 : typedef GoalType goal_t;
87 : typedef CandidateType candidate_t;
88 : typedef edit_distance_traits<goal_t> goal_traits;
89 : typedef edit_distance_traits<candidate_t> candidate_traits;
90 :
91 : /* Constructor. */
92 :
93 10391 : best_match (goal_t goal,
94 : edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
95 10391 : : m_goal (goal_traits::get_string (goal)),
96 10391 : m_goal_len (goal_traits::get_length (goal)),
97 10391 : m_best_candidate (NULL),
98 10391 : m_best_distance (best_distance_so_far),
99 10391 : m_best_candidate_len (0)
100 10385 : {}
101 :
102 : /* Compare the edit distance between CANDIDATE and m_goal,
103 : and if it's the best so far, record it. */
104 :
105 9650222 : void consider (candidate_t candidate)
106 : {
107 9650222 : size_t candidate_len = candidate_traits::get_length (candidate);
108 :
109 : /* Calculate a lower bound on the candidate's distance to the goal,
110 : based on the difference in lengths; it will require at least
111 : this many insertions/deletions. */
112 9650222 : edit_distance_t min_candidate_distance
113 9650222 : = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
114 :
115 : /* If the candidate's length is sufficiently different to that
116 : of the goal string, then the number of insertions/deletions
117 : may be >= the best distance so far. If so, we can reject
118 : the candidate immediately without needing to compute
119 : the exact distance, since it won't be an improvement. */
120 9650222 : if (min_candidate_distance >= m_best_distance)
121 : return;
122 :
123 : /* If the candidate will be unable to beat the criterion in
124 : get_best_meaningful_candidate, reject it without computing
125 : the exact distance. */
126 6070021 : edit_distance_t cutoff = get_cutoff (candidate_len);
127 6070021 : if (min_candidate_distance > cutoff)
128 : return;
129 :
130 : /* Otherwise, compute the distance and see if the candidate
131 : has beaten the previous best value. */
132 5949114 : const char *candidate_str = candidate_traits::get_string (candidate);
133 : edit_distance_t dist
134 5949114 : = get_edit_distance (m_goal, m_goal_len, candidate_str, candidate_len);
135 :
136 5949114 : bool is_better = false;
137 5949114 : if (dist < m_best_distance)
138 : is_better = true;
139 5933143 : else if (dist == m_best_distance)
140 : {
141 : /* Prefer a candidate that inserts a trailing '=',
142 : so that for
143 : "-ftrivial-auto-var-init"
144 : we suggest
145 : "-ftrivial-auto-var-init="
146 : rather than
147 : "-Wtrivial-auto-var-init". */
148 : /* Prefer a candidate has a difference in trailing sign character. */
149 69634 : if (candidate_str[candidate_len - 1] == '='
150 1317 : && m_goal[m_goal_len - 1] != '=')
151 : is_better = true;
152 : }
153 :
154 : if (is_better)
155 : {
156 17288 : m_best_distance = dist;
157 17288 : m_best_candidate = candidate;
158 17288 : m_best_candidate_len = candidate_len;
159 : }
160 : }
161 :
162 : /* Assuming that BEST_CANDIDATE is known to be better than
163 : m_best_candidate, update (without recomputing the edit distance to
164 : the goal). */
165 :
166 11 : void set_best_so_far (candidate_t best_candidate,
167 : edit_distance_t best_distance,
168 : size_t best_candidate_len)
169 : {
170 0 : gcc_assert (best_distance < m_best_distance);
171 11 : m_best_candidate = best_candidate;
172 11 : m_best_distance = best_distance;
173 11 : m_best_candidate_len = best_candidate_len;
174 11 : }
175 :
176 : /* Generate the maximum edit distance for which we consider a suggestion
177 : to be meaningful, given a candidate of length CANDIDATE_LEN. */
178 :
179 6076769 : edit_distance_t get_cutoff (size_t candidate_len) const
180 : {
181 6070021 : return ::get_edit_distance_cutoff (m_goal_len, candidate_len);
182 : }
183 :
184 : /* Get the best candidate so far, but applying a filter to ensure
185 : that we return NULL if none of the candidates are close to the goal,
186 : to avoid offering nonsensical suggestions to the user. */
187 :
188 10353 : candidate_t get_best_meaningful_candidate () const
189 : {
190 : /* If the edit distance is too high, the suggestion is likely to be
191 : meaningless. */
192 10353 : if (m_best_candidate)
193 : {
194 6748 : edit_distance_t cutoff = get_cutoff (m_best_candidate_len);
195 6748 : if (m_best_distance > cutoff)
196 : return NULL;
197 : }
198 :
199 : /* If the goal string somehow makes it into the candidate list, offering
200 : it as a suggestion will be nonsensical e.g.
201 : 'constexpr' does not name a type; did you mean 'constexpr'?
202 : Ultimately such suggestions are due to bugs in constructing the
203 : candidate list, but as a band-aid, do not offer suggestions for
204 : distance == 0 (where candidate == goal). */
205 4882 : if (m_best_distance == 0)
206 : return NULL;
207 :
208 4752 : return m_best_candidate;
209 : }
210 :
211 : /* Get the closest candidate so far, without applying any filtering. */
212 :
213 74 : candidate_t blithely_get_best_candidate () const
214 : {
215 74 : return m_best_candidate;
216 : }
217 :
218 6144 : edit_distance_t get_best_distance () const { return m_best_distance; }
219 11 : size_t get_best_candidate_length () const { return m_best_candidate_len; }
220 :
221 : private:
222 : const char *m_goal;
223 : size_t m_goal_len;
224 : candidate_t m_best_candidate;
225 : edit_distance_t m_best_distance;
226 : size_t m_best_candidate_len;
227 : };
228 :
229 : #endif /* GCC_SPELLCHECK_H */
|