Branch data Line data Source code
1 : : /* Find near-matches for strings and identifiers.
2 : : Copyright (C) 2015-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #ifndef GCC_SPELLCHECK_H
21 : : #define GCC_SPELLCHECK_H
22 : :
23 : : typedef unsigned int edit_distance_t;
24 : : const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
25 : :
26 : : /* spellcheck.cc */
27 : : extern edit_distance_t
28 : : get_edit_distance (const char *s, int len_s,
29 : : const char *t, int len_t);
30 : :
31 : : extern edit_distance_t
32 : : get_edit_distance (const char *s, const char *t);
33 : :
34 : : extern const char *
35 : : find_closest_string (const char *target,
36 : : const auto_vec<const char *> *candidates);
37 : :
38 : : /* A traits class for describing a string-like type usable by
39 : : class best_match.
40 : : Specializations should provide the implementations of the following:
41 : :
42 : : static size_t get_length (TYPE);
43 : : static const char *get_string (TYPE);
44 : :
45 : : get_string should return a non-NULL ptr, which does not need to be
46 : : 0-terminated. */
47 : :
48 : : template <typename TYPE>
49 : : struct edit_distance_traits {};
50 : :
51 : : /* Specialization of edit_distance_traits for C-style strings. */
52 : :
53 : : template <>
54 : : struct edit_distance_traits<const char *>
55 : : {
56 : 5682789 : static size_t get_length (const char *str)
57 : : {
58 : 5682789 : gcc_assert (str);
59 : 5682789 : return strlen (str);
60 : : }
61 : :
62 : 2463311 : static const char *get_string (const char *str)
63 : : {
64 : 2462521 : gcc_assert (str);
65 : 2463311 : return str;
66 : : }
67 : : };
68 : :
69 : : extern edit_distance_t get_edit_distance_cutoff (size_t goal_len,
70 : : size_t candidate_len);
71 : :
72 : : /* A type for use when determining the best match against a string,
73 : : expressed as a template so that we can match against various
74 : : string-like types (const char *, frontend identifiers, and preprocessor
75 : : macros).
76 : :
77 : : This type accumulates the best possible match against GOAL_TYPE for
78 : : a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
79 : : number of calls to get_edit_distance and to
80 : : edit_distance_traits<T>::get_length. */
81 : :
82 : : template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
83 : : class best_match
84 : : {
85 : : public:
86 : : typedef GOAL_TYPE goal_t;
87 : : typedef CANDIDATE_TYPE candidate_t;
88 : : typedef edit_distance_traits<goal_t> goal_traits;
89 : : typedef edit_distance_traits<candidate_t> candidate_traits;
90 : :
91 : : /* Constructor. */
92 : :
93 : 9197 : best_match (GOAL_TYPE goal,
94 : : edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
95 : 9197 : : m_goal (goal_traits::get_string (goal)),
96 : 9197 : m_goal_len (goal_traits::get_length (goal)),
97 : 9197 : m_best_candidate (NULL),
98 : 9197 : m_best_distance (best_distance_so_far),
99 : 9197 : m_best_candidate_len (0)
100 : 9191 : {}
101 : :
102 : : /* Compare the edit distance between CANDIDATE and m_goal,
103 : : and if it's the best so far, record it. */
104 : :
105 : 8555917 : void consider (candidate_t candidate)
106 : : {
107 : 8555917 : size_t candidate_len = candidate_traits::get_length (candidate);
108 : :
109 : : /* Calculate a lower bound on the candidate's distance to the goal,
110 : : based on the difference in lengths; it will require at least
111 : : this many insertions/deletions. */
112 : 8555917 : edit_distance_t min_candidate_distance
113 : 8555917 : = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
114 : :
115 : : /* If the candidate's length is sufficiently different to that
116 : : of the goal string, then the number of insertions/deletions
117 : : may be >= the best distance so far. If so, we can reject
118 : : the candidate immediately without needing to compute
119 : : the exact distance, since it won't be an improvement. */
120 : 8555917 : if (min_candidate_distance >= m_best_distance)
121 : : return;
122 : :
123 : : /* If the candidate will be unable to beat the criterion in
124 : : get_best_meaningful_candidate, reject it without computing
125 : : the exact distance. */
126 : 4976956 : edit_distance_t cutoff = get_cutoff (candidate_len);
127 : 4976956 : if (min_candidate_distance > cutoff)
128 : : return;
129 : :
130 : : /* Otherwise, compute the distance and see if the candidate
131 : : has beaten the previous best value. */
132 : 4874374 : const char *candidate_str = candidate_traits::get_string (candidate);
133 : : edit_distance_t dist
134 : 4874374 : = get_edit_distance (m_goal, m_goal_len, candidate_str, candidate_len);
135 : :
136 : 4874374 : bool is_better = false;
137 : 4874374 : if (dist < m_best_distance)
138 : : is_better = true;
139 : 4860611 : else if (dist == m_best_distance)
140 : : {
141 : : /* Prefer a candidate that inserts a trailing '=',
142 : : so that for
143 : : "-ftrivial-auto-var-init"
144 : : we suggest
145 : : "-ftrivial-auto-var-init="
146 : : rather than
147 : : "-Wtrivial-auto-var-init". */
148 : : /* Prefer a candidate has a difference in trailing sign character. */
149 : 65207 : if (candidate_str[candidate_len - 1] == '='
150 : 885 : && m_goal[m_goal_len - 1] != '=')
151 : : is_better = true;
152 : : }
153 : :
154 : : if (is_better)
155 : : {
156 : 14648 : m_best_distance = dist;
157 : 14648 : m_best_candidate = candidate;
158 : 14648 : m_best_candidate_len = candidate_len;
159 : : }
160 : : }
161 : :
162 : : /* Assuming that BEST_CANDIDATE is known to be better than
163 : : m_best_candidate, update (without recomputing the edit distance to
164 : : the goal). */
165 : :
166 : 11 : void set_best_so_far (CANDIDATE_TYPE best_candidate,
167 : : edit_distance_t best_distance,
168 : : size_t best_candidate_len)
169 : : {
170 : 0 : gcc_assert (best_distance < m_best_distance);
171 : 11 : m_best_candidate = best_candidate;
172 : 11 : m_best_distance = best_distance;
173 : 11 : m_best_candidate_len = best_candidate_len;
174 : 11 : }
175 : :
176 : : /* Generate the maximum edit distance for which we consider a suggestion
177 : : to be meaningful, given a candidate of length CANDIDATE_LEN. */
178 : :
179 : 4982891 : edit_distance_t get_cutoff (size_t candidate_len) const
180 : : {
181 : 4976956 : return ::get_edit_distance_cutoff (m_goal_len, candidate_len);
182 : : }
183 : :
184 : : /* Get the best candidate so far, but applying a filter to ensure
185 : : that we return NULL if none of the candidates are close to the goal,
186 : : to avoid offering nonsensical suggestions to the user. */
187 : :
188 : 9138 : candidate_t get_best_meaningful_candidate () const
189 : : {
190 : : /* If the edit distance is too high, the suggestion is likely to be
191 : : meaningless. */
192 : 9138 : if (m_best_candidate)
193 : : {
194 : 5935 : edit_distance_t cutoff = get_cutoff (m_best_candidate_len);
195 : 5935 : if (m_best_distance > cutoff)
196 : : return NULL;
197 : : }
198 : :
199 : : /* If the goal string somehow makes it into the candidate list, offering
200 : : it as a suggestion will be nonsensical e.g.
201 : : 'constexpr' does not name a type; did you mean 'constexpr'?
202 : : Ultimately such suggestions are due to bugs in constructing the
203 : : candidate list, but as a band-aid, do not offer suggestions for
204 : : distance == 0 (where candidate == goal). */
205 : 4369 : if (m_best_distance == 0)
206 : : return NULL;
207 : :
208 : 4231 : return m_best_candidate;
209 : : }
210 : :
211 : : /* Get the closest candidate so far, without applying any filtering. */
212 : :
213 : 89 : candidate_t blithely_get_best_candidate () const
214 : : {
215 : 89 : return m_best_candidate;
216 : : }
217 : :
218 : 5497 : edit_distance_t get_best_distance () const { return m_best_distance; }
219 : 11 : size_t get_best_candidate_length () const { return m_best_candidate_len; }
220 : :
221 : : private:
222 : : const char *m_goal;
223 : : size_t m_goal_len;
224 : : candidate_t m_best_candidate;
225 : : edit_distance_t m_best_distance;
226 : : size_t m_best_candidate_len;
227 : : };
228 : :
229 : : #endif /* GCC_SPELLCHECK_H */
|