Branch data Line data Source code
1 : : /* Data and functions related to line maps and input files.
2 : : Copyright (C) 2004-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "intl.h"
24 : : #include "diagnostic.h"
25 : : #include "diagnostics/file-cache.h"
26 : : #include "selftest.h"
27 : : #include "cpplib.h"
28 : :
29 : : #ifndef HAVE_ICONV
30 : : #define HAVE_ICONV 0
31 : : #endif
32 : :
33 : : const char *
34 : 5642307 : special_fname_builtin ()
35 : : {
36 : 5642307 : return _("<built-in>");
37 : : }
38 : :
39 : : /* Current position in real source file. */
40 : :
41 : : location_t input_location = UNKNOWN_LOCATION;
42 : :
43 : : class line_maps *line_table;
44 : :
45 : : /* A stashed copy of "line_table" for use by selftest::line_table_test.
46 : : This needs to be a global so that it can be a GC root, and thus
47 : : prevent the stashed copy from being garbage-collected if the GC runs
48 : : during a line_table_test. */
49 : :
50 : : class line_maps *saved_line_table;
51 : :
52 : : /* Expand the source location LOC into a human readable location. If
53 : : LOC resolves to a builtin location, the file name of the readable
54 : : location is set to the string "<built-in>". If EXPANSION_POINT_P is
55 : : TRUE and LOC is virtual, then it is resolved to the expansion
56 : : point of the involved macro. Otherwise, it is resolved to the
57 : : spelling location of the token.
58 : :
59 : : When resolving to the spelling location of the token, if the
60 : : resulting location is for a built-in location (that is, it has no
61 : : associated line/column) in the context of a macro expansion, the
62 : : returned location is the first one (while unwinding the macro
63 : : location towards its expansion point) that is in real source
64 : : code.
65 : :
66 : : ASPECT controls which part of the location to use. */
67 : :
68 : : static expanded_location
69 : 764653474 : expand_location_1 (const line_maps *set,
70 : : location_t loc,
71 : : bool expansion_point_p,
72 : : enum location_aspect aspect)
73 : : {
74 : 764653474 : expanded_location xloc;
75 : 764653474 : const line_map_ordinary *map;
76 : 764653474 : enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
77 : 764653474 : tree block = NULL;
78 : :
79 : 764653474 : if (IS_ADHOC_LOC (loc))
80 : : {
81 : 191497293 : block = LOCATION_BLOCK (loc);
82 : 191497293 : loc = LOCATION_LOCUS (loc);
83 : : }
84 : :
85 : 764653474 : memset (&xloc, 0, sizeof (xloc));
86 : :
87 : 764653474 : if (loc >= RESERVED_LOCATION_COUNT)
88 : : {
89 : 735099670 : if (!expansion_point_p)
90 : : {
91 : : /* We want to resolve LOC to its spelling location.
92 : :
93 : : But if that spelling location is a reserved location that
94 : : appears in the context of a macro expansion (like for a
95 : : location for a built-in token), let's consider the first
96 : : location (toward the expansion point) that is not reserved;
97 : : that is, the first location that is in real source code. */
98 : 5181026 : loc = linemap_unwind_to_first_non_reserved_loc (set,
99 : : loc, NULL);
100 : 5181026 : lrk = LRK_SPELLING_LOCATION;
101 : : }
102 : 735099670 : loc = linemap_resolve_location (set, loc, lrk, &map);
103 : :
104 : : /* loc is now either in an ordinary map, or is a reserved location.
105 : : If it is a compound location, the caret is in a spelling location,
106 : : but the start/finish might still be a virtual location.
107 : : Depending of what the caller asked for, we may need to recurse
108 : : one level in order to resolve any virtual locations in the
109 : : end-points. */
110 : 735099670 : switch (aspect)
111 : : {
112 : 0 : default:
113 : 0 : gcc_unreachable ();
114 : : /* Fall through. */
115 : : case LOCATION_ASPECT_CARET:
116 : : break;
117 : 3455046 : case LOCATION_ASPECT_START:
118 : 3455046 : {
119 : 3455046 : location_t start = get_start (loc);
120 : 3455046 : if (start != loc)
121 : 1145 : return expand_location_1 (set, start, expansion_point_p, aspect);
122 : : }
123 : : break;
124 : 99533 : case LOCATION_ASPECT_FINISH:
125 : 99533 : {
126 : 99533 : location_t finish = get_finish (loc);
127 : 99533 : if (finish != loc)
128 : 1103 : return expand_location_1 (set, finish, expansion_point_p, aspect);
129 : : }
130 : : break;
131 : : }
132 : 735097422 : xloc = linemap_expand_location (set, map, loc);
133 : : }
134 : :
135 : 764651226 : xloc.data = block;
136 : 764651226 : if (loc <= BUILTINS_LOCATION)
137 : 29553804 : xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
138 : :
139 : 764651226 : return xloc;
140 : : }
141 : :
142 : : /* Return a NUL-terminated copy of the source text between two locations, or
143 : : NULL if the arguments are invalid. The caller is responsible for freeing
144 : : the return value. */
145 : :
146 : : char *
147 : 832 : get_source_text_between (diagnostics::file_cache &fc,
148 : : location_t start, location_t end)
149 : : {
150 : 832 : expanded_location expstart
151 : 832 : = expand_location_to_spelling_point (start, LOCATION_ASPECT_START);
152 : 832 : expanded_location expend
153 : 832 : = expand_location_to_spelling_point (end, LOCATION_ASPECT_FINISH);
154 : :
155 : : /* If the locations are in different files or the end comes before the
156 : : start, give up and return nothing. */
157 : 832 : if (!expstart.file || !expend.file)
158 : : return NULL;
159 : 811 : if (strcmp (expstart.file, expend.file) != 0)
160 : : return NULL;
161 : 811 : if (expstart.line > expend.line)
162 : : return NULL;
163 : 811 : if (expstart.line == expend.line
164 : 810 : && expstart.column > expend.column)
165 : : return NULL;
166 : : /* These aren't real column numbers, give up. */
167 : 811 : if (expstart.column == 0 || expend.column == 0)
168 : : return NULL;
169 : :
170 : : /* For a single line we need to trim both edges. */
171 : 811 : if (expstart.line == expend.line)
172 : : {
173 : 810 : diagnostics::char_span line
174 : 810 : = fc.get_source_line (expstart.file, expstart.line);
175 : 810 : if (line.length () < 1)
176 : : return NULL;
177 : 808 : int s = expstart.column - 1;
178 : 808 : int len = expend.column - s;
179 : 808 : if (line.length () < (size_t)expend.column)
180 : : return NULL;
181 : 808 : return line.subspan (s, len).xstrdup ();
182 : : }
183 : :
184 : 1 : struct obstack buf_obstack;
185 : 1 : obstack_init (&buf_obstack);
186 : :
187 : : /* Loop through all lines in the range and append each to buf; may trim
188 : : parts of the start and end lines off depending on column values. */
189 : 8 : for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
190 : : {
191 : 7 : diagnostics::char_span line = fc.get_source_line (expstart.file, lnum);
192 : 7 : if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
193 : 0 : continue;
194 : :
195 : : /* For the first line in the range, only start at expstart.column */
196 : 7 : if (lnum == expstart.line)
197 : : {
198 : 1 : unsigned off = expstart.column - 1;
199 : 1 : if (line.length () < off)
200 : 0 : return NULL;
201 : 1 : line = line.subspan (off, line.length() - off);
202 : : }
203 : : /* For the last line, don't go past expend.column */
204 : 6 : else if (lnum == expend.line)
205 : : {
206 : 1 : if (line.length () < (size_t)expend.column)
207 : : return NULL;
208 : 1 : line = line.subspan (0, expend.column);
209 : : }
210 : :
211 : : /* Combine spaces at the beginning of later lines. */
212 : 7 : if (lnum > expstart.line)
213 : : {
214 : : unsigned off;
215 : 30 : for (off = 0; off < line.length(); ++off)
216 : 30 : if (line[off] != ' ' && line[off] != '\t')
217 : : break;
218 : 6 : if (off > 0)
219 : : {
220 : 6 : obstack_1grow (&buf_obstack, ' ');
221 : 6 : line = line.subspan (off, line.length() - off);
222 : : }
223 : : }
224 : :
225 : : /* This does not include any trailing newlines. */
226 : 7 : obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
227 : : }
228 : :
229 : : /* NUL-terminate and finish the buf obstack. */
230 : 1 : obstack_1grow (&buf_obstack, 0);
231 : 1 : const char *buf = (const char *) obstack_finish (&buf_obstack);
232 : :
233 : 1 : return xstrdup (buf);
234 : : }
235 : :
236 : : /* Test if the location originates from the spelling location of a
237 : : builtin-tokens. That is, return TRUE if LOC is a (possibly
238 : : virtual) location of a built-in token that appears in the expansion
239 : : list of a macro. Please note that this function also works on
240 : : tokens that result from built-in tokens. For instance, the
241 : : function would return true if passed a token "4" that is the result
242 : : of the expansion of the built-in __LINE__ macro. */
243 : : bool
244 : 13808 : is_location_from_builtin_token (location_t loc)
245 : : {
246 : 13808 : const line_map_ordinary *map = NULL;
247 : 13808 : loc = linemap_resolve_location (line_table, loc,
248 : : LRK_SPELLING_LOCATION, &map);
249 : 13808 : return loc == BUILTINS_LOCATION;
250 : : }
251 : :
252 : : /* Expand the source location LOC into a human readable location. If
253 : : LOC is virtual, it resolves to the expansion point of the involved
254 : : macro. If LOC resolves to a builtin location, the file name of the
255 : : readable location is set to the string "<built-in>". */
256 : :
257 : : expanded_location
258 : 759468500 : expand_location (location_t loc)
259 : : {
260 : 759468500 : return expand_location_1 (line_table, loc, /*expansion_point_p=*/true,
261 : 759468500 : LOCATION_ASPECT_CARET);
262 : : }
263 : :
264 : : /* Expand the source location LOC into a human readable location. If
265 : : LOC is virtual, it resolves to the expansion location of the
266 : : relevant macro. If LOC resolves to a builtin location, the file
267 : : name of the readable location is set to the string
268 : : "<built-in>". */
269 : :
270 : : expanded_location
271 : 84514 : expand_location_to_spelling_point (location_t loc,
272 : : enum location_aspect aspect)
273 : : {
274 : 84514 : return expand_location_1 (line_table, loc, /*expansion_point_p=*/false,
275 : 84514 : aspect);
276 : : }
277 : :
278 : : /* The rich_location class within libcpp requires a way to expand
279 : : location_t instances, and relies on the client code
280 : : providing a symbol named
281 : : linemap_client_expand_location_to_spelling_point
282 : : to do this.
283 : :
284 : : This is the implementation for libcommon.a (all host binaries),
285 : : which simply calls into expand_location_1. */
286 : :
287 : : expanded_location
288 : 5098212 : linemap_client_expand_location_to_spelling_point (const line_maps *set,
289 : : location_t loc,
290 : : enum location_aspect aspect)
291 : : {
292 : 5098212 : return expand_location_1 (set, loc, /*expansion_point_p=*/false, aspect);
293 : : }
294 : :
295 : :
296 : : /* If LOCATION is in a system header and if it is a virtual location
297 : : for a token coming from the expansion of a macro, unwind it to
298 : : the location of the expansion point of the macro. If the expansion
299 : : point is also in a system header return the original LOCATION.
300 : : Otherwise, return the location of the expansion point.
301 : :
302 : : This is used for instance when we want to emit diagnostics about a
303 : : token that may be located in a macro that is itself defined in a
304 : : system header, for example, for the NULL macro. In such a case, if
305 : : LOCATION were passed directly to diagnostic functions such as
306 : : warning_at, the diagnostic would be suppressed (unless
307 : : -Wsystem-headers). */
308 : :
309 : : location_t
310 : 440883342 : expansion_point_location_if_in_system_header (location_t location)
311 : : {
312 : 440883342 : if (!in_system_header_at (location))
313 : : return location;
314 : :
315 : 363652485 : location_t xloc = linemap_resolve_location (line_table, location,
316 : : LRK_MACRO_EXPANSION_POINT,
317 : : NULL);
318 : 363652485 : return in_system_header_at (xloc) ? location : xloc;
319 : : }
320 : :
321 : : /* If LOCATION is a virtual location for a token coming from the expansion
322 : : of a macro, unwind to the location of the expansion point of the macro. */
323 : :
324 : : location_t
325 : 197 : expansion_point_location (location_t location)
326 : : {
327 : 197 : return linemap_resolve_location (line_table, location,
328 : 197 : LRK_MACRO_EXPANSION_POINT, NULL);
329 : : }
330 : :
331 : : /* Construct a location with caret at CARET, ranging from START to
332 : : FINISH.
333 : :
334 : : For example, consider:
335 : :
336 : : 11111111112
337 : : 12345678901234567890
338 : : 522
339 : : 523 return foo + bar;
340 : : ~~~~^~~~~
341 : : 524
342 : :
343 : : The location's caret is at the "+", line 523 column 15, but starts
344 : : earlier, at the "f" of "foo" at column 11. The finish is at the "r"
345 : : of "bar" at column 19. */
346 : :
347 : : location_t
348 : 2367547909 : make_location (location_t caret, location_t start, location_t finish)
349 : : {
350 : 2367547909 : return line_table->make_location (caret, start, finish);
351 : : }
352 : :
353 : : /* Same as above, but taking a source range rather than two locations. */
354 : :
355 : : location_t
356 : 1375270276 : make_location (location_t caret, source_range src_range)
357 : : {
358 : 1375270276 : location_t pure_loc = get_pure_location (caret);
359 : 1375270276 : return line_table->get_or_create_combined_loc (pure_loc, src_range,
360 : 1375270276 : nullptr, 0);
361 : : }
362 : :
363 : : /* An expanded_location stores the column in byte units. This function
364 : : converts that column to display units. That requires reading the associated
365 : : source line in order to calculate the display width. If that cannot be done
366 : : for any reason, then returns the byte column as a fallback. */
367 : : int
368 : 731284 : location_compute_display_column (diagnostics::file_cache &fc,
369 : : expanded_location exploc,
370 : : const cpp_char_column_policy &policy)
371 : : {
372 : 731284 : if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
373 : : return exploc.column;
374 : 695742 : diagnostics::char_span line = fc.get_source_line (exploc.file, exploc.line);
375 : : /* If line is NULL, this function returns exploc.column which is the
376 : : desired fallback. */
377 : 695742 : return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
378 : 695742 : exploc.column, policy);
379 : : }
380 : :
381 : : /* Dump statistics to stderr about the memory usage of the line_table
382 : : set of line maps. This also displays some statistics about macro
383 : : expansion. */
384 : :
385 : : void
386 : 0 : dump_line_table_statistics (void)
387 : : {
388 : 0 : struct linemap_stats s;
389 : 0 : long total_used_map_size,
390 : : macro_maps_size,
391 : : total_allocated_map_size;
392 : :
393 : 0 : memset (&s, 0, sizeof (s));
394 : :
395 : 0 : linemap_get_statistics (line_table, &s);
396 : :
397 : 0 : macro_maps_size = s.macro_maps_used_size
398 : 0 : + s.macro_maps_locations_size;
399 : :
400 : 0 : total_allocated_map_size = s.ordinary_maps_allocated_size
401 : 0 : + s.macro_maps_allocated_size
402 : : + s.macro_maps_locations_size;
403 : :
404 : 0 : total_used_map_size = s.ordinary_maps_used_size
405 : 0 : + s.macro_maps_used_size
406 : : + s.macro_maps_locations_size;
407 : :
408 : 0 : fprintf (stderr, "Number of expanded macros: %5ld\n",
409 : : s.num_expanded_macros);
410 : 0 : if (s.num_expanded_macros != 0)
411 : 0 : fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
412 : 0 : s.num_macro_tokens / s.num_expanded_macros);
413 : 0 : fprintf (stderr,
414 : : "\nLine Table allocations during the "
415 : : "compilation process\n");
416 : 0 : fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
417 : 0 : SIZE_AMOUNT (s.num_ordinary_maps_used));
418 : 0 : fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
419 : 0 : SIZE_AMOUNT (s.ordinary_maps_used_size));
420 : 0 : fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
421 : 0 : SIZE_AMOUNT (s.num_ordinary_maps_allocated));
422 : 0 : fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
423 : 0 : SIZE_AMOUNT (s.ordinary_maps_allocated_size));
424 : 0 : fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
425 : 0 : SIZE_AMOUNT (s.num_macro_maps_used));
426 : 0 : fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
427 : 0 : SIZE_AMOUNT (s.macro_maps_used_size));
428 : 0 : fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
429 : 0 : SIZE_AMOUNT (s.macro_maps_locations_size));
430 : 0 : fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
431 : 0 : SIZE_AMOUNT (macro_maps_size));
432 : 0 : fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
433 : 0 : SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
434 : 0 : fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
435 : 0 : SIZE_AMOUNT (total_allocated_map_size));
436 : 0 : fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
437 : 0 : SIZE_AMOUNT (total_used_map_size));
438 : 0 : fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
439 : 0 : SIZE_AMOUNT (s.adhoc_table_size));
440 : 0 : fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
441 : 0 : SIZE_AMOUNT (s.adhoc_table_entries_used));
442 : 0 : fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
443 : 0 : SIZE_AMOUNT (line_table->m_num_optimized_ranges));
444 : 0 : fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
445 : 0 : SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
446 : :
447 : 0 : fprintf (stderr, "\n");
448 : 0 : }
449 : :
450 : : /* Get location one beyond the final location in ordinary map IDX. */
451 : :
452 : : static location_t
453 : 0 : get_end_location (class line_maps *set, line_map_uint_t idx)
454 : : {
455 : 0 : if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
456 : 0 : return set->highest_location;
457 : :
458 : 0 : struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
459 : 0 : return MAP_START_LOCATION (next_map);
460 : : }
461 : :
462 : : /* Helper function for write_digit_row. */
463 : :
464 : : static void
465 : 0 : write_digit (FILE *stream, int digit)
466 : : {
467 : 0 : fputc ('0' + (digit % 10), stream);
468 : 0 : }
469 : :
470 : : /* Helper function for dump_location_info.
471 : : Write a row of numbers to STREAM, numbering a source line,
472 : : giving the units, tens, hundreds etc of the column number. */
473 : :
474 : : static void
475 : 0 : write_digit_row (FILE *stream, int indent,
476 : : const line_map_ordinary *map,
477 : : location_t loc, int max_col, int divisor)
478 : : {
479 : 0 : fprintf (stream, "%*c", indent, ' ');
480 : 0 : fprintf (stream, "|");
481 : 0 : for (int column = 1; column < max_col; column++)
482 : : {
483 : 0 : location_t column_loc = loc + (location_t (column) << map->m_range_bits);
484 : 0 : write_digit (stream, column_loc / divisor);
485 : : }
486 : 0 : fprintf (stream, "\n");
487 : 0 : }
488 : :
489 : : /* Write a half-closed (START) / half-open (END) interval of
490 : : location_t to STREAM. */
491 : :
492 : : static void
493 : 0 : dump_location_range (FILE *stream,
494 : : location_t start, location_t end)
495 : : {
496 : 0 : fprintf (stream,
497 : : " location_t interval: %llu <= loc < %llu\n",
498 : : (unsigned long long) start, (unsigned long long) end);
499 : 0 : }
500 : :
501 : : /* Write a labelled description of a half-closed (START) / half-open (END)
502 : : interval of location_t to STREAM. */
503 : :
504 : : static void
505 : 0 : dump_labelled_location_range (FILE *stream,
506 : : const char *name,
507 : : location_t start, location_t end)
508 : : {
509 : 0 : fprintf (stream, "%s\n", name);
510 : 0 : dump_location_range (stream, start, end);
511 : 0 : fprintf (stream, "\n");
512 : 0 : }
513 : :
514 : : /* Write a visualization of the locations in the line_table to STREAM. */
515 : :
516 : : void
517 : 0 : dump_location_info (FILE *stream)
518 : : {
519 : 0 : diagnostics::file_cache fc;
520 : :
521 : : /* Visualize the reserved locations. */
522 : 0 : dump_labelled_location_range (stream, "RESERVED LOCATIONS",
523 : : 0, RESERVED_LOCATION_COUNT);
524 : :
525 : 0 : using ULL = unsigned long long;
526 : :
527 : : /* Visualize the ordinary line_map instances, rendering the sources. */
528 : 0 : for (line_map_uint_t idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table);
529 : : idx++)
530 : : {
531 : 0 : location_t end_location = get_end_location (line_table, idx);
532 : : /* half-closed: doesn't include this one. */
533 : :
534 : 0 : const line_map_ordinary *map
535 : 0 : = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
536 : 0 : fprintf (stream, "ORDINARY MAP: %llu\n", (ULL) idx);
537 : 0 : dump_location_range (stream,
538 : : MAP_START_LOCATION (map), end_location);
539 : 0 : fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
540 : 0 : fprintf (stream, " starting at line: %i\n",
541 : : ORDINARY_MAP_STARTING_LINE_NUMBER (map));
542 : 0 : fprintf (stream, " column and range bits: %i\n",
543 : 0 : map->m_column_and_range_bits);
544 : 0 : fprintf (stream, " column bits: %i\n",
545 : 0 : map->m_column_and_range_bits - map->m_range_bits);
546 : 0 : fprintf (stream, " range bits: %i\n",
547 : 0 : map->m_range_bits);
548 : 0 : const char * reason;
549 : 0 : switch (map->reason) {
550 : : case LC_ENTER:
551 : : reason = "LC_ENTER";
552 : : break;
553 : 0 : case LC_LEAVE:
554 : 0 : reason = "LC_LEAVE";
555 : 0 : break;
556 : 0 : case LC_RENAME:
557 : 0 : reason = "LC_RENAME";
558 : 0 : break;
559 : 0 : case LC_RENAME_VERBATIM:
560 : 0 : reason = "LC_RENAME_VERBATIM";
561 : 0 : break;
562 : 0 : case LC_ENTER_MACRO:
563 : 0 : reason = "LC_RENAME_MACRO";
564 : 0 : break;
565 : 0 : default:
566 : 0 : reason = "Unknown";
567 : : }
568 : 0 : fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
569 : :
570 : 0 : const line_map_ordinary *includer_map
571 : 0 : = linemap_included_from_linemap (line_table, map);
572 : 0 : fprintf (stream, " included from location: %llu",
573 : 0 : (ULL) linemap_included_from (map));
574 : 0 : if (includer_map) {
575 : 0 : fprintf (stream, " (in ordinary map %llu)",
576 : 0 : ULL (includer_map - line_table->info_ordinary.maps));
577 : : }
578 : 0 : fprintf (stream, "\n");
579 : :
580 : : /* Render the span of source lines that this "map" covers. */
581 : 0 : for (location_t loc = MAP_START_LOCATION (map);
582 : 0 : loc < end_location;
583 : 0 : loc += (location_t (1) << map->m_range_bits))
584 : : {
585 : 0 : gcc_assert (pure_location_p (line_table, loc) );
586 : :
587 : 0 : expanded_location exploc
588 : 0 : = linemap_expand_location (line_table, map, loc);
589 : :
590 : 0 : if (exploc.column == 0)
591 : : {
592 : : /* Beginning of a new source line: draw the line. */
593 : :
594 : 0 : diagnostics::char_span line_text
595 : 0 : = fc.get_source_line (exploc.file, exploc.line);
596 : 0 : if (!line_text)
597 : : break;
598 : 0 : fprintf (stream,
599 : : "%s:%3i|loc:%5llu|%.*s\n",
600 : : exploc.file, exploc.line,
601 : : (ULL) loc,
602 : 0 : (int)line_text.length (), line_text.get_buffer ());
603 : :
604 : : /* "loc" is at column 0, which means "the whole line".
605 : : Render the locations *within* the line, by underlining
606 : : it, showing the location_t numeric values
607 : : at each column. */
608 : 0 : auto max_col = (ULL (1) << map->m_column_and_range_bits) - 1;
609 : 0 : if (max_col > line_text.length ())
610 : 0 : max_col = line_text.length () + 1;
611 : :
612 : 0 : int len_lnum = diagnostics::num_digits (exploc.line);
613 : 0 : if (len_lnum < 3)
614 : : len_lnum = 3;
615 : 0 : int len_loc = diagnostics::num_digits (loc);
616 : 0 : if (len_loc < 5)
617 : : len_loc = 5;
618 : :
619 : 0 : int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
620 : :
621 : : /* Thousands. */
622 : 0 : if (end_location > 999)
623 : 0 : write_digit_row (stream, indent, map, loc, max_col, 1000);
624 : :
625 : : /* Hundreds. */
626 : 0 : if (end_location > 99)
627 : 0 : write_digit_row (stream, indent, map, loc, max_col, 100);
628 : :
629 : : /* Tens. */
630 : 0 : write_digit_row (stream, indent, map, loc, max_col, 10);
631 : :
632 : : /* Units. */
633 : 0 : write_digit_row (stream, indent, map, loc, max_col, 1);
634 : : }
635 : : }
636 : 0 : fprintf (stream, "\n");
637 : : }
638 : :
639 : : /* Visualize unallocated values. */
640 : 0 : dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
641 : : line_table->highest_location,
642 : : LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
643 : :
644 : : /* Visualize the macro line_map instances, rendering the sources. */
645 : 0 : for (line_map_uint_t i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
646 : : {
647 : : /* Each macro map that is allocated owns location_t values
648 : : that are *lower* that the one before them.
649 : : Hence it's meaningful to view them either in order of ascending
650 : : source locations, or in order of ascending macro map index. */
651 : 0 : const bool ascending_location_ts = true;
652 : 0 : auto idx = (ascending_location_ts
653 : 0 : ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
654 : 0 : : i);
655 : 0 : const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
656 : 0 : fprintf (stream, "MACRO %llu: %s (%u tokens)\n",
657 : : (ULL) idx,
658 : : linemap_map_get_macro_name (map),
659 : : MACRO_MAP_NUM_MACRO_TOKENS (map));
660 : 0 : dump_location_range (stream,
661 : 0 : map->start_location,
662 : 0 : (map->start_location
663 : 0 : + MACRO_MAP_NUM_MACRO_TOKENS (map)));
664 : 0 : inform (map->get_expansion_point_location (),
665 : : "expansion point is location %llu",
666 : 0 : (ULL) map->get_expansion_point_location ());
667 : 0 : fprintf (stream, " map->start_location: %llu\n",
668 : 0 : (ULL) map->start_location);
669 : :
670 : 0 : fprintf (stream, " macro_locations:\n");
671 : 0 : for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
672 : : {
673 : 0 : location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
674 : 0 : location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
675 : :
676 : : /* linemap_add_macro_token encodes token numbers in an expansion
677 : : by putting them after MAP_START_LOCATION. */
678 : :
679 : : /* I'm typically seeing 4 uninitialized entries at the end of
680 : : 0xafafafaf.
681 : : This appears to be due to macro.cc:replace_args
682 : : adding 2 extra args for padding tokens; presumably there may
683 : : be a leading and/or trailing padding token injected,
684 : : each for 2 more location slots.
685 : : This would explain there being up to 4 location_ts slots
686 : : that may be uninitialized. */
687 : :
688 : 0 : fprintf (stream, " %u: %llu, %llu\n",
689 : : i,
690 : : (ULL) x,
691 : : (ULL) y);
692 : 0 : if (x == y)
693 : : {
694 : 0 : if (x < MAP_START_LOCATION (map))
695 : 0 : inform (x, "token %u has %<x-location == y-location == %llu%>",
696 : : i, (ULL) x);
697 : : else
698 : 0 : fprintf (stream,
699 : : "x-location == y-location == %llu"
700 : : " encodes token # %u\n",
701 : : (ULL) x,
702 : 0 : (unsigned int)(x - MAP_START_LOCATION (map)));
703 : : }
704 : : else
705 : : {
706 : 0 : inform (x, "token %u has %<x-location == %llu%>", i, (ULL) x);
707 : 0 : inform (x, "token %u has %<y-location == %llu%>", i, (ULL) y);
708 : : }
709 : : }
710 : 0 : fprintf (stream, "\n");
711 : : }
712 : :
713 : : /* It appears that MAX_LOCATION_T itself is never assigned to a
714 : : macro map, presumably due to an off-by-one error somewhere
715 : : between the logic in linemap_enter_macro and
716 : : LINEMAPS_MACRO_LOWEST_LOCATION. */
717 : 0 : dump_labelled_location_range (stream, "MAX_LOCATION_T",
718 : : MAX_LOCATION_T,
719 : : MAX_LOCATION_T + 1);
720 : :
721 : : /* Visualize ad-hoc values. */
722 : 0 : dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
723 : : MAX_LOCATION_T + 1, location_t (-1));
724 : 0 : }
725 : :
726 : : /* string_concat's constructor. */
727 : :
728 : 2954297 : string_concat::string_concat (int num, location_t *locs)
729 : 2954297 : : m_num (num)
730 : : {
731 : 2954297 : m_locs = ggc_vec_alloc <location_t> (num);
732 : 36248627 : for (int i = 0; i < num; i++)
733 : 33294330 : m_locs[i] = locs[i];
734 : 2954297 : }
735 : :
736 : : /* string_concat_db's constructor. */
737 : :
738 : 206281 : string_concat_db::string_concat_db ()
739 : : {
740 : 206281 : m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
741 : 206281 : }
742 : :
743 : : /* Record that a string concatenation occurred, covering NUM
744 : : string literal tokens. LOCS is an array of size NUM, containing the
745 : : locations of the tokens. A copy of LOCS is taken. */
746 : :
747 : : void
748 : 2954303 : string_concat_db::record_string_concatenation (int num, location_t *locs)
749 : : {
750 : 2954303 : gcc_assert (num > 1);
751 : 2954303 : gcc_assert (locs);
752 : :
753 : 2954303 : location_t key_loc = get_key_loc (locs[0]);
754 : : /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
755 : : any data now recorded under key 'key_loc' would be overwritten by a
756 : : subsequent call with the same key 'key_loc'. */
757 : 2954303 : if (RESERVED_LOCATION_P (key_loc))
758 : 6 : return;
759 : :
760 : 2954297 : string_concat *concat
761 : 2954297 : = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
762 : 2954297 : m_table->put (key_loc, concat);
763 : : }
764 : :
765 : : /* Determine if LOC was the location of the initial token of a
766 : : concatenation of string literal tokens.
767 : : If so, *OUT_NUM is written to with the number of tokens, and
768 : : *OUT_LOCS with the location of an array of locations of the
769 : : tokens, and return true. *OUT_LOCS is a borrowed pointer to
770 : : storage owned by the string_concat_db.
771 : : Otherwise, return false. */
772 : :
773 : : bool
774 : 34670 : string_concat_db::get_string_concatenation (location_t loc,
775 : : int *out_num,
776 : : location_t **out_locs)
777 : : {
778 : 34670 : gcc_assert (out_num);
779 : 34670 : gcc_assert (out_locs);
780 : :
781 : 34670 : location_t key_loc = get_key_loc (loc);
782 : : /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
783 : : discussion in 'string_concat_db::record_string_concatenation'. */
784 : 34670 : if (RESERVED_LOCATION_P (key_loc))
785 : : return false;
786 : :
787 : 34668 : string_concat **concat = m_table->get (key_loc);
788 : 34668 : if (!concat)
789 : : return false;
790 : :
791 : 4381 : *out_num = (*concat)->m_num;
792 : 4381 : *out_locs =(*concat)->m_locs;
793 : 4381 : return true;
794 : : }
795 : :
796 : : /* Internal function. Canonicalize LOC into a form suitable for
797 : : use as a key within the database, stripping away macro expansion,
798 : : ad-hoc information, and range information, using the location of
799 : : the start of LOC within an ordinary linemap. */
800 : :
801 : : location_t
802 : 2988973 : string_concat_db::get_key_loc (location_t loc)
803 : : {
804 : 2988973 : loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
805 : : NULL);
806 : :
807 : 2988973 : loc = get_range_from_loc (line_table, loc).m_start;
808 : :
809 : 2988973 : return loc;
810 : : }
811 : :
812 : : /* Helper class for use within get_substring_ranges_for_loc.
813 : : An vec of cpp_string with responsibility for releasing all of the
814 : : str->text for each str in the vector. */
815 : :
816 : : class auto_cpp_string_vec : public auto_vec <cpp_string>
817 : : {
818 : : public:
819 : 34670 : auto_cpp_string_vec (int alloc)
820 : 69340 : : auto_vec <cpp_string> (alloc) {}
821 : :
822 : 34670 : ~auto_cpp_string_vec ()
823 : : {
824 : : /* Clean up the copies within this vec. */
825 : 34670 : int i;
826 : 34670 : cpp_string *str;
827 : 70307 : FOR_EACH_VEC_ELT (*this, i, str)
828 : 35637 : free (const_cast <unsigned char *> (str->text));
829 : 34670 : }
830 : : };
831 : :
832 : : /* Attempt to populate RANGES with source location information on the
833 : : individual characters within the string literal found at STRLOC.
834 : : If CONCATS is non-NULL, then any string literals that the token at
835 : : STRLOC was concatenated with are also added to RANGES.
836 : :
837 : : Return NULL if successful, or an error message if any errors occurred (in
838 : : which case RANGES may be only partially populated and should not
839 : : be used).
840 : :
841 : : This is implemented by re-parsing the relevant source line(s). */
842 : :
843 : : static const char *
844 : 36916 : get_substring_ranges_for_loc (cpp_reader *pfile,
845 : : diagnostics::file_cache &fc,
846 : : string_concat_db *concats,
847 : : location_t strloc,
848 : : enum cpp_ttype type,
849 : : cpp_substring_ranges &ranges)
850 : : {
851 : 36916 : gcc_assert (pfile);
852 : :
853 : 36916 : if (strloc == UNKNOWN_LOCATION)
854 : : return "unknown location";
855 : :
856 : : /* Reparsing the strings requires accurate location information.
857 : : If -ftrack-macro-expansion has been overridden from its default
858 : : of 2, then we might have a location of a macro expansion point,
859 : : rather than the location of the literal itself.
860 : : Avoid this by requiring that we have full macro expansion tracking
861 : : for substring locations to be available. */
862 : 36916 : if (cpp_get_options (pfile)->track_macro_expansion != 2)
863 : : return "track_macro_expansion != 2";
864 : :
865 : : /* If #line or # 44 "file"-style directives are present, then there's
866 : : no guarantee that the line numbers we have can be used to locate
867 : : the strings. For example, we might have a .i file with # directives
868 : : pointing back to lines within a .c file, but the .c file might
869 : : have been edited since the .i file was created.
870 : : In such a case, the safest course is to disable on-demand substring
871 : : locations. */
872 : 34673 : if (line_table->seen_line_directive)
873 : : return "seen line directive";
874 : :
875 : : /* If string concatenation has occurred at STRLOC, get the locations
876 : : of all of the literal tokens making up the compound string.
877 : : Otherwise, just use STRLOC. */
878 : 34670 : int num_locs = 1;
879 : 34670 : location_t *strlocs = &strloc;
880 : 34670 : if (concats)
881 : 34670 : concats->get_string_concatenation (strloc, &num_locs, &strlocs);
882 : :
883 : 34670 : auto_cpp_string_vec strs (num_locs);
884 : 34670 : auto_vec <cpp_string_location_reader> loc_readers (num_locs);
885 : 70300 : for (int i = 0; i < num_locs; i++)
886 : : {
887 : : /* Get range of strloc. We will use it to locate the start and finish
888 : : of the literal token within the line. */
889 : 41724 : source_range src_range = get_range_from_loc (line_table, strlocs[i]);
890 : :
891 : 41724 : if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
892 : : {
893 : : /* If the string token was within a macro expansion, then we can
894 : : cope with it for the simple case where we have a single token.
895 : : Otherwise, bail out. */
896 : 1147 : if (src_range.m_start != src_range.m_finish)
897 : 6094 : return "macro expansion";
898 : : }
899 : : else
900 : : {
901 : 40577 : if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
902 : : /* If so, we can't reliably determine where the token started within
903 : : its line. */
904 : : return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
905 : :
906 : 34905 : if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
907 : : /* If so, we can't reliably determine where the token finished
908 : : within its line. */
909 : : return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
910 : : }
911 : :
912 : 35881 : expanded_location start
913 : 35881 : = expand_location_to_spelling_point (src_range.m_start,
914 : : LOCATION_ASPECT_START);
915 : 35881 : expanded_location finish
916 : 35881 : = expand_location_to_spelling_point (src_range.m_finish,
917 : : LOCATION_ASPECT_FINISH);
918 : 35881 : if (start.file != finish.file)
919 : : return "range endpoints are in different files";
920 : 35881 : if (start.line != finish.line)
921 : : return "range endpoints are on different lines";
922 : 35638 : if (start.column > finish.column)
923 : : return "range endpoints are reversed";
924 : :
925 : 35638 : diagnostics::char_span line = fc.get_source_line (start.file, start.line);
926 : 35638 : if (!line)
927 : : return "unable to read source line";
928 : :
929 : : /* Determine the location of the literal (including quotes
930 : : and leading prefix chars, such as the 'u' in a u""
931 : : token). */
932 : 35638 : size_t literal_length = finish.column - start.column + 1;
933 : :
934 : : /* Ensure that we don't crash if we got the wrong location. */
935 : 35638 : if (start.column < 1)
936 : : return "zero start column";
937 : 35638 : if (line.length () < (start.column - 1 + literal_length))
938 : : return "line is not wide enough";
939 : :
940 : 35637 : diagnostics::char_span literal
941 : 35637 : = line.subspan (start.column - 1, literal_length);
942 : :
943 : 35637 : cpp_string from;
944 : 35637 : from.len = literal_length;
945 : : /* Make a copy of the literal, to avoid having to rely on
946 : : the lifetime of the copy of the line within the cache.
947 : : This will be released by the auto_cpp_string_vec dtor. */
948 : 35637 : from.text = (unsigned char *)literal.xstrdup ();
949 : 35637 : strs.safe_push (from);
950 : :
951 : : /* For very long lines, a new linemap could have started
952 : : halfway through the token.
953 : : Ensure that the loc_reader uses the linemap of the
954 : : *end* of the token for its start location. */
955 : 35637 : const line_map_ordinary *start_ord_map;
956 : 35637 : linemap_resolve_location (line_table, src_range.m_start,
957 : : LRK_SPELLING_LOCATION, &start_ord_map);
958 : 35637 : const line_map_ordinary *final_ord_map;
959 : 35637 : linemap_resolve_location (line_table, src_range.m_finish,
960 : : LRK_SPELLING_LOCATION, &final_ord_map);
961 : 35637 : if (start_ord_map == NULL || final_ord_map == NULL)
962 : : return "failed to get ordinary maps";
963 : : /* Bulletproofing. We ought to only have different ordinary maps
964 : : for start vs finish due to line-length jumps. */
965 : 35636 : if (start_ord_map != final_ord_map
966 : 6865 : && start_ord_map->to_file != final_ord_map->to_file)
967 : : return "start and finish are spelled in different ordinary maps";
968 : : /* The file from linemap_resolve_location ought to match that from
969 : : expand_location_to_spelling_point. */
970 : 35636 : if (start_ord_map->to_file != start.file)
971 : : return "mismatching file after resolving linemap";
972 : :
973 : 35630 : location_t start_loc
974 : 35630 : = linemap_position_for_line_and_column (line_table, final_ord_map,
975 : : start.line, start.column);
976 : :
977 : 35630 : cpp_string_location_reader loc_reader (start_loc, line_table);
978 : 35630 : loc_readers.safe_push (loc_reader);
979 : : }
980 : :
981 : : /* Rerun cpp_interpret_string, or rather, a modified version of it. */
982 : 57152 : const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
983 : : loc_readers.address (),
984 : : num_locs, &ranges, type);
985 : 28576 : if (err)
986 : : return err;
987 : :
988 : : /* Success: "ranges" should now contain information on the string. */
989 : : return NULL;
990 : 34670 : }
991 : :
992 : : /* Attempt to populate *OUT_LOC with source location information on the
993 : : given characters within the string literal found at STRLOC.
994 : : CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
995 : : character set.
996 : :
997 : : For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
998 : : and string literal "012345\n789"
999 : : *OUT_LOC is written to with:
1000 : : "012345\n789"
1001 : : ~^~~~~
1002 : :
1003 : : If CONCATS is non-NULL, then any string literals that the token at
1004 : : STRLOC was concatenated with are also considered.
1005 : :
1006 : : This is implemented by re-parsing the relevant source line(s).
1007 : :
1008 : : Return NULL if successful, or an error message if any errors occurred.
1009 : : Error messages are intended for GCC developers (to help debugging) rather
1010 : : than for end-users. */
1011 : :
1012 : : const char *
1013 : 11264 : get_location_within_string (cpp_reader *pfile,
1014 : : diagnostics::file_cache &fc,
1015 : : string_concat_db *concats,
1016 : : location_t strloc,
1017 : : enum cpp_ttype type,
1018 : : int caret_idx, int start_idx, int end_idx,
1019 : : location_t *out_loc)
1020 : : {
1021 : 11264 : gcc_checking_assert (caret_idx >= 0);
1022 : 11264 : gcc_checking_assert (start_idx >= 0);
1023 : 11264 : gcc_checking_assert (end_idx >= 0);
1024 : 11264 : gcc_assert (out_loc);
1025 : :
1026 : 11264 : cpp_substring_ranges ranges;
1027 : 11264 : const char *err
1028 : 11264 : = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1029 : 11264 : if (err)
1030 : : return err;
1031 : :
1032 : 8557 : if (caret_idx >= ranges.get_num_ranges ())
1033 : : return "caret_idx out of range";
1034 : 8557 : if (start_idx >= ranges.get_num_ranges ())
1035 : : return "start_idx out of range";
1036 : 8557 : if (end_idx >= ranges.get_num_ranges ())
1037 : : return "end_idx out of range";
1038 : :
1039 : 8557 : *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1040 : 8557 : ranges.get_range (start_idx).m_start,
1041 : 8557 : ranges.get_range (end_idx).m_finish);
1042 : 8557 : return NULL;
1043 : 11264 : }
1044 : :
1045 : : /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1046 : :
1047 : : location_t
1048 : 52541313 : location_with_discriminator (location_t locus, int discriminator)
1049 : : {
1050 : 52541313 : tree block = LOCATION_BLOCK (locus);
1051 : 52541313 : source_range src_range = get_range_from_loc (line_table, locus);
1052 : 52541313 : locus = get_pure_location (locus);
1053 : :
1054 : 52541313 : if (locus == UNKNOWN_LOCATION)
1055 : : return locus;
1056 : :
1057 : 28520650 : return line_table->get_or_create_combined_loc (locus, src_range, block,
1058 : 28520650 : discriminator);
1059 : : }
1060 : :
1061 : : /* Return TRUE if LOCUS represents a location with a discriminator. */
1062 : :
1063 : : bool
1064 : 78192191 : has_discriminator (location_t locus)
1065 : : {
1066 : 78192191 : return get_discriminator_from_loc (locus) != 0;
1067 : : }
1068 : :
1069 : : /* Return the discriminator for LOCUS. */
1070 : :
1071 : : int
1072 : 380684012 : get_discriminator_from_loc (location_t locus)
1073 : : {
1074 : 380684012 : return get_discriminator_from_loc (line_table, locus);
1075 : : }
1076 : :
1077 : : #if CHECKING_P
1078 : :
1079 : : namespace selftest {
1080 : :
1081 : : /* Selftests of location handling. */
1082 : :
1083 : : /* Attempt to populate *OUT_RANGE with source location information on the
1084 : : given character within the string literal found at STRLOC.
1085 : : CHAR_IDX refers to an offset within the execution character set.
1086 : : If CONCATS is non-NULL, then any string literals that the token at
1087 : : STRLOC was concatenated with are also considered.
1088 : :
1089 : : This is implemented by re-parsing the relevant source line(s).
1090 : :
1091 : : Return NULL if successful, or an error message if any errors occurred.
1092 : : Error messages are intended for GCC developers (to help debugging) rather
1093 : : than for end-users. */
1094 : :
1095 : : static const char *
1096 : 23748 : get_source_range_for_char (cpp_reader *pfile,
1097 : : diagnostics::file_cache &fc,
1098 : : string_concat_db *concats,
1099 : : location_t strloc,
1100 : : enum cpp_ttype type,
1101 : : int char_idx,
1102 : : source_range *out_range)
1103 : : {
1104 : 23748 : gcc_checking_assert (char_idx >= 0);
1105 : 23748 : gcc_assert (out_range);
1106 : :
1107 : 23748 : cpp_substring_ranges ranges;
1108 : 23748 : const char *err
1109 : 23748 : = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1110 : 23748 : if (err)
1111 : : return err;
1112 : :
1113 : 18652 : if (char_idx >= ranges.get_num_ranges ())
1114 : : return "char_idx out of range";
1115 : :
1116 : 18652 : *out_range = ranges.get_range (char_idx);
1117 : 18652 : return NULL;
1118 : 23748 : }
1119 : :
1120 : : /* As get_source_range_for_char, but write to *OUT the number
1121 : : of ranges that are available. */
1122 : :
1123 : : static const char *
1124 : 1268 : get_num_source_ranges_for_substring (cpp_reader *pfile,
1125 : : diagnostics::file_cache &fc,
1126 : : string_concat_db *concats,
1127 : : location_t strloc,
1128 : : enum cpp_ttype type,
1129 : : int *out)
1130 : : {
1131 : 1268 : gcc_assert (out);
1132 : :
1133 : 1268 : cpp_substring_ranges ranges;
1134 : 1268 : const char *err
1135 : 1268 : = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1136 : :
1137 : 1268 : if (err)
1138 : : return err;
1139 : :
1140 : 884 : *out = ranges.get_num_ranges ();
1141 : 884 : return NULL;
1142 : 1268 : }
1143 : :
1144 : : /* Selftests of location handling. */
1145 : :
1146 : : /* Verify that compare() on linenum_type handles comparisons over the full
1147 : : range of the type. */
1148 : :
1149 : : static void
1150 : 4 : test_linenum_comparisons ()
1151 : : {
1152 : 4 : linenum_type min_line (0);
1153 : 4 : linenum_type max_line (0xffffffff);
1154 : 4 : ASSERT_EQ (0, compare (min_line, min_line));
1155 : 4 : ASSERT_EQ (0, compare (max_line, max_line));
1156 : :
1157 : 4 : ASSERT_GT (compare (max_line, min_line), 0);
1158 : 4 : ASSERT_LT (compare (min_line, max_line), 0);
1159 : 4 : }
1160 : :
1161 : : /* Helper function for verifying location data: when location_t
1162 : : values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1163 : : as having column 0. */
1164 : :
1165 : : static bool
1166 : 65136 : should_have_column_data_p (location_t loc)
1167 : : {
1168 : 65136 : if (IS_ADHOC_LOC (loc))
1169 : 20240 : loc = get_location_from_adhoc_loc (line_table, loc);
1170 : 65136 : if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1171 : 6452 : return false;
1172 : : return true;
1173 : : }
1174 : :
1175 : : /* Selftest for should_have_column_data_p. */
1176 : :
1177 : : static void
1178 : 4 : test_should_have_column_data_p ()
1179 : : {
1180 : 4 : ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1181 : 4 : ASSERT_TRUE
1182 : : (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1183 : 4 : ASSERT_FALSE
1184 : : (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1185 : 4 : }
1186 : :
1187 : : /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1188 : : on LOC. */
1189 : :
1190 : : static void
1191 : 1068 : assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1192 : : location_t loc)
1193 : : {
1194 : 1068 : ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1195 : 1068 : ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1196 : : /* If location_t values are sufficiently high, then column numbers
1197 : : will be unavailable and LOCATION_COLUMN (loc) will be 0.
1198 : : When close to the threshold, column numbers *may* be present: if
1199 : : the final linemap before the threshold contains a line that straddles
1200 : : the threshold, locations in that line have column information. */
1201 : 1068 : if (should_have_column_data_p (loc))
1202 : 660 : ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1203 : 1068 : }
1204 : :
1205 : : /* Various selftests involve constructing a line table and one or more
1206 : : line maps within it.
1207 : :
1208 : : For maximum test coverage we want to run these tests with a variety
1209 : : of situations:
1210 : : - line_table->default_range_bits: some frontends use a non-zero value
1211 : : and others use zero
1212 : : - the fallback modes within line-map.cc: there are various threshold
1213 : : values for location_t beyond line-map.cc changes
1214 : : behavior (disabling of the range-packing optimization, disabling
1215 : : of column-tracking). We can exercise these by starting the line_table
1216 : : at interesting values at or near these thresholds.
1217 : :
1218 : : The following struct describes a particular case within our test
1219 : : matrix. */
1220 : :
1221 : : class line_table_case
1222 : : {
1223 : : public:
1224 : 5860 : line_table_case (int default_range_bits, location_t base_location)
1225 : 5860 : : m_default_range_bits (default_range_bits),
1226 : 5860 : m_base_location (base_location)
1227 : : {}
1228 : :
1229 : : int m_default_range_bits;
1230 : : location_t m_base_location;
1231 : : };
1232 : :
1233 : : /* Constructor. Store the old value of line_table, and create a new
1234 : : one, using sane defaults. */
1235 : :
1236 : 21 : line_table_test::line_table_test ()
1237 : : {
1238 : 21 : gcc_assert (saved_line_table == NULL);
1239 : 21 : saved_line_table = line_table;
1240 : 21 : line_table = ggc_alloc<line_maps> ();
1241 : 21 : linemap_init (line_table, BUILTINS_LOCATION);
1242 : 21 : gcc_assert (saved_line_table->m_reallocator);
1243 : 21 : line_table->m_reallocator = saved_line_table->m_reallocator;
1244 : 21 : gcc_assert (saved_line_table->m_round_alloc_size);
1245 : 21 : line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
1246 : 21 : line_table->default_range_bits = 0;
1247 : 21 : }
1248 : :
1249 : : /* Constructor. Store the old value of line_table, and create a new
1250 : : one, using the sitation described in CASE_. */
1251 : :
1252 : 6724 : line_table_test::line_table_test (const line_table_case &case_)
1253 : : {
1254 : 6724 : gcc_assert (saved_line_table == NULL);
1255 : 6724 : saved_line_table = line_table;
1256 : 6724 : line_table = ggc_alloc<line_maps> ();
1257 : 6724 : linemap_init (line_table, BUILTINS_LOCATION);
1258 : 6724 : gcc_assert (saved_line_table->m_reallocator);
1259 : 6724 : line_table->m_reallocator = saved_line_table->m_reallocator;
1260 : 6724 : gcc_assert (saved_line_table->m_round_alloc_size);
1261 : 6724 : line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
1262 : 6724 : line_table->default_range_bits = case_.m_default_range_bits;
1263 : 6724 : if (case_.m_base_location)
1264 : : {
1265 : 6160 : line_table->highest_location = case_.m_base_location;
1266 : 6160 : line_table->highest_line = case_.m_base_location;
1267 : : }
1268 : 6724 : }
1269 : :
1270 : : /* Destructor. Restore the old value of line_table. */
1271 : :
1272 : 6745 : line_table_test::~line_table_test ()
1273 : : {
1274 : 6745 : gcc_assert (saved_line_table != NULL);
1275 : 6745 : line_table = saved_line_table;
1276 : 6745 : saved_line_table = NULL;
1277 : 6745 : }
1278 : :
1279 : : /* Verify basic operation of ordinary linemaps. */
1280 : :
1281 : : static void
1282 : 96 : test_accessing_ordinary_linemaps (const line_table_case &case_)
1283 : : {
1284 : 96 : line_table_test ltt (case_);
1285 : :
1286 : : /* Build a simple linemap describing some locations. */
1287 : 96 : linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1288 : :
1289 : 96 : linemap_line_start (line_table, 1, 100);
1290 : 96 : location_t loc_a = linemap_position_for_column (line_table, 1);
1291 : 96 : location_t loc_b = linemap_position_for_column (line_table, 23);
1292 : :
1293 : 96 : linemap_line_start (line_table, 2, 100);
1294 : 96 : location_t loc_c = linemap_position_for_column (line_table, 1);
1295 : 96 : location_t loc_d = linemap_position_for_column (line_table, 17);
1296 : :
1297 : : /* Example of a very long line. */
1298 : 96 : linemap_line_start (line_table, 3, 2000);
1299 : 96 : location_t loc_e = linemap_position_for_column (line_table, 700);
1300 : :
1301 : : /* Transitioning back to a short line. */
1302 : 96 : linemap_line_start (line_table, 4, 0);
1303 : 96 : location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1304 : :
1305 : 96 : if (should_have_column_data_p (loc_back_to_short))
1306 : : {
1307 : : /* Verify that we switched to short lines in the linemap. */
1308 : 56 : line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1309 : 56 : ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1310 : : }
1311 : :
1312 : : /* Example of a line that will eventually be seen to be longer
1313 : : than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1314 : : below that. */
1315 : 96 : linemap_line_start (line_table, 5, 2000);
1316 : :
1317 : 96 : location_t loc_start_of_very_long_line
1318 : 96 : = linemap_position_for_column (line_table, 2000);
1319 : 96 : location_t loc_too_wide
1320 : 96 : = linemap_position_for_column (line_table, LINE_MAP_MAX_COLUMN_NUMBER + 1);
1321 : 96 : location_t loc_too_wide_2
1322 : 96 : = linemap_position_for_column (line_table, LINE_MAP_MAX_COLUMN_NUMBER + 2);
1323 : :
1324 : : /* ...and back to a sane line length. */
1325 : 96 : linemap_line_start (line_table, 6, 100);
1326 : 96 : location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1327 : :
1328 : 96 : linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1329 : :
1330 : : /* Multiple files. */
1331 : 96 : linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1332 : 96 : linemap_line_start (line_table, 1, 200);
1333 : 96 : location_t loc_f = linemap_position_for_column (line_table, 150);
1334 : 96 : linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1335 : :
1336 : : /* Verify that we can recover the location info. */
1337 : 96 : assert_loceq ("foo.c", 1, 1, loc_a);
1338 : 96 : assert_loceq ("foo.c", 1, 23, loc_b);
1339 : 96 : assert_loceq ("foo.c", 2, 1, loc_c);
1340 : 96 : assert_loceq ("foo.c", 2, 17, loc_d);
1341 : 96 : assert_loceq ("foo.c", 3, 700, loc_e);
1342 : 96 : assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1343 : :
1344 : : /* In the very wide line, the initial location should be fully tracked. */
1345 : 96 : assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1346 : : /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1347 : : be disabled. */
1348 : 96 : assert_loceq ("foo.c", 5, 0, loc_too_wide);
1349 : 96 : assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1350 : : /*...and column-tracking should be re-enabled for subsequent lines. */
1351 : 96 : assert_loceq ("foo.c", 6, 10, loc_sane_again);
1352 : :
1353 : 96 : assert_loceq ("bar.c", 1, 150, loc_f);
1354 : :
1355 : 96 : ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1356 : 96 : ASSERT_TRUE (pure_location_p (line_table, loc_a));
1357 : :
1358 : : /* Verify using make_location to build a range, and extracting data
1359 : : back from it. */
1360 : 96 : location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1361 : 96 : ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1362 : 96 : ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1363 : 96 : source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1364 : 96 : ASSERT_EQ (loc_b, src_range.m_start);
1365 : 96 : ASSERT_EQ (loc_d, src_range.m_finish);
1366 : 96 : }
1367 : :
1368 : : /* Verify various properties of UNKNOWN_LOCATION. */
1369 : :
1370 : : static void
1371 : 4 : test_unknown_location ()
1372 : : {
1373 : 4 : ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1374 : 4 : ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1375 : 4 : ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1376 : 4 : }
1377 : :
1378 : : /* Verify various properties of BUILTINS_LOCATION. */
1379 : :
1380 : : static void
1381 : 4 : test_builtins ()
1382 : : {
1383 : 4 : assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
1384 : 4 : ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1385 : 4 : }
1386 : :
1387 : : /* Regression test for make_location.
1388 : : Ensure that we use pure locations for the start/finish of the range,
1389 : : rather than storing a packed or ad-hoc range as the start/finish. */
1390 : :
1391 : : static void
1392 : 96 : test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1393 : : {
1394 : : /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1395 : : with C++ frontend.
1396 : : ....................0000000001111111111222.
1397 : : ....................1234567890123456789012. */
1398 : 96 : const char *content = " r += !aaa == bbb;\n";
1399 : 96 : temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1400 : 96 : line_table_test ltt (case_);
1401 : 96 : linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1402 : :
1403 : 96 : const location_t c11 = linemap_position_for_column (line_table, 11);
1404 : 96 : const location_t c12 = linemap_position_for_column (line_table, 12);
1405 : 96 : const location_t c13 = linemap_position_for_column (line_table, 13);
1406 : 96 : const location_t c14 = linemap_position_for_column (line_table, 14);
1407 : 96 : const location_t c21 = linemap_position_for_column (line_table, 21);
1408 : :
1409 : 96 : if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1410 : 32 : return;
1411 : :
1412 : : /* Use column 13 for the caret location, arbitrarily, to verify that we
1413 : : handle start != caret. */
1414 : 64 : const location_t aaa = make_location (c13, c12, c14);
1415 : 64 : ASSERT_EQ (c13, get_pure_location (aaa));
1416 : 64 : ASSERT_EQ (c12, get_start (aaa));
1417 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1418 : 64 : ASSERT_EQ (c14, get_finish (aaa));
1419 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1420 : :
1421 : : /* Make a location using a location with a range as the start-point. */
1422 : 64 : const location_t not_aaa = make_location (c11, aaa, c14);
1423 : 64 : ASSERT_EQ (c11, get_pure_location (not_aaa));
1424 : : /* It should use the start location of the range, not store the range
1425 : : itself. */
1426 : 64 : ASSERT_EQ (c12, get_start (not_aaa));
1427 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1428 : 64 : ASSERT_EQ (c14, get_finish (not_aaa));
1429 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1430 : :
1431 : : /* Similarly, make a location with a range as the end-point. */
1432 : 64 : const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1433 : 64 : ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1434 : 64 : ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1435 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1436 : 64 : ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1437 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1438 : 64 : const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1439 : : /* It should use the finish location of the range, not store the range
1440 : : itself. */
1441 : 64 : ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1442 : 64 : ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1443 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1444 : 64 : ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1445 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1446 : 96 : }
1447 : :
1448 : : /* Tests of lexing. */
1449 : :
1450 : : /* Verify that token TOK from PARSER has cpp_token_as_text
1451 : : equal to EXPECTED_TEXT. */
1452 : :
1453 : : #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1454 : : SELFTEST_BEGIN_STMT \
1455 : : unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1456 : : ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1457 : : SELFTEST_END_STMT
1458 : :
1459 : : /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1460 : : and ranges from EXP_START_COL to EXP_FINISH_COL.
1461 : : Use LOC as the effective location of the selftest. */
1462 : :
1463 : : static void
1464 : 576 : assert_token_loc_eq (const location &loc,
1465 : : const cpp_token *tok,
1466 : : const char *exp_filename, int exp_linenum,
1467 : : int exp_start_col, int exp_finish_col)
1468 : : {
1469 : 576 : location_t tok_loc = tok->src_loc;
1470 : 576 : ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1471 : 576 : ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1472 : :
1473 : : /* If location_t values are sufficiently high, then column numbers
1474 : : will be unavailable. */
1475 : 576 : if (!should_have_column_data_p (tok_loc))
1476 : 196 : return;
1477 : :
1478 : 380 : ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1479 : 380 : source_range tok_range = get_range_from_loc (line_table, tok_loc);
1480 : 380 : ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1481 : 380 : ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1482 : : }
1483 : :
1484 : : /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1485 : : SELFTEST_LOCATION as the effective location of the selftest. */
1486 : :
1487 : : #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1488 : : EXP_START_COL, EXP_FINISH_COL) \
1489 : : assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1490 : : (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1491 : :
1492 : : /* Test of lexing a file using libcpp, verifying tokens and their
1493 : : location information. */
1494 : :
1495 : : static void
1496 : 96 : test_lexer (const line_table_case &case_)
1497 : : {
1498 : : /* Create a tempfile and write some text to it. */
1499 : 96 : const char *content =
1500 : : /*00000000011111111112222222222333333.3333444444444.455555555556
1501 : : 12345678901234567890123456789012345.6789012345678.901234567890. */
1502 : : ("test_name /* c-style comment */\n"
1503 : : " \"test literal\"\n"
1504 : : " // test c++-style comment\n"
1505 : : " 42\n");
1506 : 96 : temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1507 : :
1508 : 96 : line_table_test ltt (case_);
1509 : :
1510 : 96 : cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1511 : :
1512 : 96 : const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1513 : 96 : ASSERT_NE (fname, NULL);
1514 : :
1515 : : /* Verify that we get the expected tokens back, with the correct
1516 : : location information. */
1517 : :
1518 : 96 : location_t loc;
1519 : 96 : const cpp_token *tok;
1520 : 96 : tok = cpp_get_token_with_location (parser, &loc);
1521 : 96 : ASSERT_NE (tok, NULL);
1522 : 96 : ASSERT_EQ (tok->type, CPP_NAME);
1523 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1524 : 96 : ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1525 : :
1526 : 96 : tok = cpp_get_token_with_location (parser, &loc);
1527 : 96 : ASSERT_NE (tok, NULL);
1528 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
1529 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1530 : 96 : ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1531 : :
1532 : 96 : tok = cpp_get_token_with_location (parser, &loc);
1533 : 96 : ASSERT_NE (tok, NULL);
1534 : 96 : ASSERT_EQ (tok->type, CPP_NUMBER);
1535 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1536 : 96 : ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1537 : :
1538 : 96 : tok = cpp_get_token_with_location (parser, &loc);
1539 : 96 : ASSERT_NE (tok, NULL);
1540 : 96 : ASSERT_EQ (tok->type, CPP_EOF);
1541 : :
1542 : 96 : cpp_finish (parser, NULL);
1543 : 96 : cpp_destroy (parser);
1544 : 96 : }
1545 : :
1546 : : /* Forward decls. */
1547 : :
1548 : : class lexer_test;
1549 : : class lexer_test_options;
1550 : :
1551 : : /* A class for specifying options of a lexer_test.
1552 : : The "apply" vfunc is called during the lexer_test constructor. */
1553 : :
1554 : 192 : class lexer_test_options
1555 : : {
1556 : : public:
1557 : : virtual void apply (lexer_test &) = 0;
1558 : : };
1559 : :
1560 : : /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
1561 : : in its dtor.
1562 : :
1563 : : This is needed by struct lexer_test to ensure that the cleanup of the
1564 : : cpp_reader happens *after* the cleanup of the temp_source_file. */
1565 : :
1566 : : class cpp_reader_ptr
1567 : : {
1568 : : public:
1569 : 2304 : cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
1570 : :
1571 : 2304 : ~cpp_reader_ptr ()
1572 : : {
1573 : 2304 : cpp_finish (m_ptr, NULL);
1574 : 2304 : cpp_destroy (m_ptr);
1575 : 2304 : }
1576 : :
1577 : 2304 : operator cpp_reader * () const { return m_ptr; }
1578 : :
1579 : : private:
1580 : : cpp_reader *m_ptr;
1581 : : };
1582 : :
1583 : : /* A struct for writing lexer tests. */
1584 : :
1585 : : class lexer_test
1586 : : {
1587 : : public:
1588 : : lexer_test (const line_table_case &case_, const char *content,
1589 : : lexer_test_options *options);
1590 : : ~lexer_test ();
1591 : :
1592 : : const cpp_token *get_token ();
1593 : :
1594 : : /* The ordering of these fields matters.
1595 : : The line_table_test must be first, since the cpp_reader_ptr
1596 : : uses it.
1597 : : The cpp_reader must be cleaned up *after* the temp_source_file
1598 : : since the filenames in input.cc's input cache are owned by the
1599 : : cpp_reader; in particular, when ~temp_source_file evicts the
1600 : : filename the filenames must still be alive. */
1601 : : line_table_test m_ltt;
1602 : : cpp_reader_ptr m_parser;
1603 : : temp_source_file m_tempfile;
1604 : : diagnostics::file_cache m_file_cache;
1605 : : string_concat_db m_concats;
1606 : : bool m_implicitly_expect_EOF;
1607 : : };
1608 : :
1609 : : /* Use an EBCDIC encoding for the execution charset, specifically
1610 : : IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1611 : :
1612 : : This exercises iconv integration within libcpp.
1613 : : Not every build of iconv supports the given charset,
1614 : : so we need to flag this error and handle it gracefully. */
1615 : :
1616 : : class ebcdic_execution_charset : public lexer_test_options
1617 : : {
1618 : : public:
1619 : 96 : ebcdic_execution_charset () : m_num_iconv_errors (0)
1620 : : {
1621 : 96 : gcc_assert (s_singleton == NULL);
1622 : 96 : s_singleton = this;
1623 : 96 : }
1624 : 96 : ~ebcdic_execution_charset ()
1625 : 96 : {
1626 : 96 : gcc_assert (s_singleton == this);
1627 : 96 : s_singleton = NULL;
1628 : 96 : }
1629 : :
1630 : 96 : void apply (lexer_test &test) final override
1631 : : {
1632 : 96 : cpp_options *cpp_opts = cpp_get_options (test.m_parser);
1633 : 96 : cpp_opts->narrow_charset = "IBM1047";
1634 : :
1635 : 96 : cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1636 : 96 : callbacks->diagnostic = on_diagnostic;
1637 : 96 : }
1638 : :
1639 : 0 : static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
1640 : : enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
1641 : : enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
1642 : : rich_location *richloc ATTRIBUTE_UNUSED,
1643 : : const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
1644 : : ATTRIBUTE_FPTR_PRINTF(5,0)
1645 : : {
1646 : 0 : gcc_assert (s_singleton);
1647 : : /* Avoid exgettext from picking this up, it is translated in libcpp. */
1648 : 0 : const char *msg = "conversion from %s to %s not supported by iconv";
1649 : : #ifdef ENABLE_NLS
1650 : 0 : msg = dgettext ("cpplib", msg);
1651 : : #endif
1652 : : /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
1653 : : when the local iconv build doesn't support the conversion. */
1654 : 0 : if (strcmp (msgid, msg) == 0)
1655 : : {
1656 : 0 : s_singleton->m_num_iconv_errors++;
1657 : 0 : return true;
1658 : : }
1659 : :
1660 : : /* Otherwise, we have an unexpected error. */
1661 : 0 : abort ();
1662 : : }
1663 : :
1664 : 96 : bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
1665 : :
1666 : : private:
1667 : : static ebcdic_execution_charset *s_singleton;
1668 : : int m_num_iconv_errors;
1669 : : };
1670 : :
1671 : : ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
1672 : :
1673 : : /* A lexer_test_options subclass that records a list of diagnostic
1674 : : messages emitted by the lexer. */
1675 : :
1676 : : class lexer_diagnostic_sink : public lexer_test_options
1677 : : {
1678 : : public:
1679 : 96 : lexer_diagnostic_sink ()
1680 : 96 : {
1681 : 96 : gcc_assert (s_singleton == NULL);
1682 : 96 : s_singleton = this;
1683 : 96 : }
1684 : 96 : ~lexer_diagnostic_sink ()
1685 : 96 : {
1686 : 96 : gcc_assert (s_singleton == this);
1687 : 96 : s_singleton = NULL;
1688 : :
1689 : 96 : int i;
1690 : 96 : char *str;
1691 : 192 : FOR_EACH_VEC_ELT (m_diagnostics, i, str)
1692 : 96 : free (str);
1693 : 96 : }
1694 : :
1695 : 96 : void apply (lexer_test &test) final override
1696 : : {
1697 : 96 : cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1698 : 96 : callbacks->diagnostic = on_diagnostic;
1699 : 96 : }
1700 : :
1701 : 96 : static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
1702 : : enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
1703 : : enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
1704 : : rich_location *richloc ATTRIBUTE_UNUSED,
1705 : : const char *msgid, va_list *ap)
1706 : : ATTRIBUTE_FPTR_PRINTF(5,0)
1707 : : {
1708 : 96 : char *msg = xvasprintf (msgid, *ap);
1709 : 96 : s_singleton->m_diagnostics.safe_push (msg);
1710 : 96 : return true;
1711 : : }
1712 : :
1713 : : auto_vec<char *> m_diagnostics;
1714 : :
1715 : : private:
1716 : : static lexer_diagnostic_sink *s_singleton;
1717 : : };
1718 : :
1719 : : lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
1720 : :
1721 : : /* Constructor. Override line_table with a new instance based on CASE_,
1722 : : and write CONTENT to a tempfile. Create a cpp_reader, and use it to
1723 : : start parsing the tempfile. */
1724 : :
1725 : 2304 : lexer_test::lexer_test (const line_table_case &case_, const char *content,
1726 : 2304 : lexer_test_options *options)
1727 : 2304 : : m_ltt (case_),
1728 : 2304 : m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
1729 : : /* Create a tempfile and write the text to it. */
1730 : 2304 : m_tempfile (SELFTEST_LOCATION, ".c", content),
1731 : 2304 : m_concats (),
1732 : 2304 : m_implicitly_expect_EOF (true)
1733 : : {
1734 : 2304 : if (options)
1735 : 192 : options->apply (*this);
1736 : :
1737 : 2304 : cpp_init_iconv (m_parser);
1738 : :
1739 : : /* Parse the file. */
1740 : 2304 : const char *fname = cpp_read_main_file (m_parser,
1741 : : m_tempfile.get_filename ());
1742 : 2304 : ASSERT_NE (fname, NULL);
1743 : 2304 : }
1744 : :
1745 : : /* Destructor. By default, verify that the next token in m_parser is EOF. */
1746 : :
1747 : 2304 : lexer_test::~lexer_test ()
1748 : : {
1749 : 2304 : location_t loc;
1750 : 2304 : const cpp_token *tok;
1751 : :
1752 : 2304 : if (m_implicitly_expect_EOF)
1753 : : {
1754 : 2208 : tok = cpp_get_token_with_location (m_parser, &loc);
1755 : 2208 : ASSERT_NE (tok, NULL);
1756 : 2208 : ASSERT_EQ (tok->type, CPP_EOF);
1757 : : }
1758 : 2304 : }
1759 : :
1760 : : /* Get the next token from m_parser. */
1761 : :
1762 : : const cpp_token *
1763 : 3936 : lexer_test::get_token ()
1764 : : {
1765 : 3936 : location_t loc;
1766 : 3936 : const cpp_token *tok;
1767 : :
1768 : 3936 : tok = cpp_get_token_with_location (m_parser, &loc);
1769 : 3936 : ASSERT_NE (tok, NULL);
1770 : 3936 : return tok;
1771 : : }
1772 : :
1773 : : /* Verify that locations within string literals are correctly handled. */
1774 : :
1775 : : /* Verify get_source_range_for_substring for token(s) at STRLOC,
1776 : : using the string concatenation database for TEST.
1777 : :
1778 : : Assert that the character at index IDX is on EXPECTED_LINE,
1779 : : and that it begins at column EXPECTED_START_COL and ends at
1780 : : EXPECTED_FINISH_COL (unless the locations are beyond
1781 : : LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
1782 : : columns). */
1783 : :
1784 : : static void
1785 : 23740 : assert_char_at_range (const location &loc,
1786 : : lexer_test& test,
1787 : : location_t strloc, enum cpp_ttype type, int idx,
1788 : : int expected_line, int expected_start_col,
1789 : : int expected_finish_col)
1790 : : {
1791 : 23740 : cpp_reader *pfile = test.m_parser;
1792 : 23740 : string_concat_db *concats = &test.m_concats;
1793 : :
1794 : 23740 : source_range actual_range = source_range();
1795 : 23740 : const char *err
1796 : 23740 : = get_source_range_for_char (pfile, test.m_file_cache,
1797 : : concats, strloc, type, idx,
1798 : : &actual_range);
1799 : 23740 : if (should_have_column_data_p (strloc))
1800 : 18652 : ASSERT_EQ_AT (loc, NULL, err);
1801 : : else
1802 : : {
1803 : 5088 : ASSERT_STREQ_AT (loc,
1804 : : "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1805 : : err);
1806 : 5088 : return;
1807 : : }
1808 : :
1809 : 18652 : int actual_start_line = LOCATION_LINE (actual_range.m_start);
1810 : 18652 : ASSERT_EQ_AT (loc, expected_line, actual_start_line);
1811 : 18652 : int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
1812 : 18652 : ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
1813 : :
1814 : 18652 : if (should_have_column_data_p (actual_range.m_start))
1815 : : {
1816 : 18652 : int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
1817 : 18652 : ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
1818 : : }
1819 : 18652 : if (should_have_column_data_p (actual_range.m_finish))
1820 : : {
1821 : 18652 : int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
1822 : 18652 : ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
1823 : : }
1824 : : }
1825 : :
1826 : : /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
1827 : : the effective location of any errors. */
1828 : :
1829 : : #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
1830 : : EXPECTED_START_COL, EXPECTED_FINISH_COL) \
1831 : : assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
1832 : : (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
1833 : : (EXPECTED_FINISH_COL))
1834 : :
1835 : : /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
1836 : : using the string concatenation database for TEST.
1837 : :
1838 : : Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
1839 : :
1840 : : static void
1841 : 1268 : assert_num_substring_ranges (const location &loc,
1842 : : lexer_test& test,
1843 : : location_t strloc,
1844 : : enum cpp_ttype type,
1845 : : int expected_num_ranges)
1846 : : {
1847 : 1268 : cpp_reader *pfile = test.m_parser;
1848 : 1268 : string_concat_db *concats = &test.m_concats;
1849 : :
1850 : 1268 : int actual_num_ranges = -1;
1851 : 1268 : const char *err
1852 : 1268 : = get_num_source_ranges_for_substring (pfile, test.m_file_cache,
1853 : : concats, strloc, type,
1854 : : &actual_num_ranges);
1855 : 1268 : if (should_have_column_data_p (strloc))
1856 : 884 : ASSERT_EQ_AT (loc, NULL, err);
1857 : : else
1858 : : {
1859 : 384 : ASSERT_STREQ_AT (loc,
1860 : : "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1861 : : err);
1862 : 384 : return;
1863 : : }
1864 : 884 : ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
1865 : : }
1866 : :
1867 : : /* Macro for calling assert_num_substring_ranges, supplying
1868 : : SELFTEST_LOCATION for the effective location of any errors. */
1869 : :
1870 : : #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
1871 : : EXPECTED_NUM_RANGES) \
1872 : : assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
1873 : : (TYPE), (EXPECTED_NUM_RANGES))
1874 : :
1875 : :
1876 : : /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
1877 : : returns an error (using the string concatenation database for TEST). */
1878 : :
1879 : : static void
1880 : 636 : assert_has_no_substring_ranges (const location &loc,
1881 : : lexer_test& test,
1882 : : location_t strloc,
1883 : : enum cpp_ttype type,
1884 : : const char *expected_err)
1885 : : {
1886 : 636 : cpp_reader *pfile = test.m_parser;
1887 : 636 : string_concat_db *concats = &test.m_concats;
1888 : 636 : cpp_substring_ranges ranges;
1889 : 636 : const char *actual_err
1890 : 636 : = get_substring_ranges_for_loc (pfile, test.m_file_cache, concats, strloc,
1891 : : type, ranges);
1892 : 636 : if (should_have_column_data_p (strloc))
1893 : 444 : ASSERT_STREQ_AT (loc, expected_err, actual_err);
1894 : : else
1895 : 192 : ASSERT_STREQ_AT (loc,
1896 : : "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1897 : : actual_err);
1898 : 636 : }
1899 : :
1900 : : #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
1901 : : assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
1902 : : (STRLOC), (TYPE), (ERR))
1903 : :
1904 : : /* Lex a simple string literal. Verify the substring location data, before
1905 : : and after running cpp_interpret_string on it. */
1906 : :
1907 : : static void
1908 : 96 : test_lexer_string_locations_simple (const line_table_case &case_)
1909 : : {
1910 : : /* Digits 0-9 (with 0 at column 10), the simple way.
1911 : : ....................000000000.11111111112.2222222223333333333
1912 : : ....................123456789.01234567890.1234567890123456789
1913 : : We add a trailing comment to ensure that we correctly locate
1914 : : the end of the string literal token. */
1915 : 96 : const char *content = " \"0123456789\" /* not a string */\n";
1916 : 96 : lexer_test test (case_, content, NULL);
1917 : :
1918 : : /* Verify that we get the expected token back, with the correct
1919 : : location information. */
1920 : 96 : const cpp_token *tok = test.get_token ();
1921 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
1922 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
1923 : 96 : ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
1924 : :
1925 : : /* At this point in lexing, the quote characters are treated as part of
1926 : : the string (they are stripped off by cpp_interpret_string). */
1927 : :
1928 : 96 : ASSERT_EQ (tok->val.str.len, 12);
1929 : :
1930 : : /* Verify that cpp_interpret_string works. */
1931 : 96 : cpp_string dst_string;
1932 : 96 : const enum cpp_ttype type = CPP_STRING;
1933 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
1934 : : &dst_string, type);
1935 : 96 : ASSERT_TRUE (result);
1936 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
1937 : 96 : free (const_cast <unsigned char *> (dst_string.text));
1938 : :
1939 : : /* Verify ranges of individual characters. This no longer includes the
1940 : : opening quote, but does include the closing quote. */
1941 : 1152 : for (int i = 0; i <= 10; i++)
1942 : 1056 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
1943 : : 10 + i, 10 + i);
1944 : :
1945 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
1946 : 96 : }
1947 : :
1948 : : /* As test_lexer_string_locations_simple, but use an EBCDIC execution
1949 : : encoding. */
1950 : :
1951 : : static void
1952 : 96 : test_lexer_string_locations_ebcdic (const line_table_case &case_)
1953 : : {
1954 : : /* EBCDIC support requires iconv. */
1955 : 96 : if (!HAVE_ICONV)
1956 : 0 : return;
1957 : :
1958 : : /* Digits 0-9 (with 0 at column 10), the simple way.
1959 : : ....................000000000.11111111112.2222222223333333333
1960 : : ....................123456789.01234567890.1234567890123456789
1961 : : We add a trailing comment to ensure that we correctly locate
1962 : : the end of the string literal token. */
1963 : 96 : const char *content = " \"0123456789\" /* not a string */\n";
1964 : 96 : ebcdic_execution_charset use_ebcdic;
1965 : 96 : lexer_test test (case_, content, &use_ebcdic);
1966 : :
1967 : : /* Verify that we get the expected token back, with the correct
1968 : : location information. */
1969 : 96 : const cpp_token *tok = test.get_token ();
1970 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
1971 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
1972 : 96 : ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
1973 : :
1974 : : /* At this point in lexing, the quote characters are treated as part of
1975 : : the string (they are stripped off by cpp_interpret_string). */
1976 : :
1977 : 96 : ASSERT_EQ (tok->val.str.len, 12);
1978 : :
1979 : : /* The remainder of the test requires an iconv implementation that
1980 : : can convert from UTF-8 to the EBCDIC encoding requested above. */
1981 : 96 : if (use_ebcdic.iconv_errors_occurred_p ())
1982 : 0 : return;
1983 : :
1984 : : /* Verify that cpp_interpret_string works. */
1985 : 96 : cpp_string dst_string;
1986 : 96 : const enum cpp_ttype type = CPP_STRING;
1987 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
1988 : : &dst_string, type);
1989 : 96 : ASSERT_TRUE (result);
1990 : : /* We should now have EBCDIC-encoded text, specifically
1991 : : IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1992 : : The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
1993 : 96 : ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
1994 : : (const char *)dst_string.text);
1995 : 96 : free (const_cast <unsigned char *> (dst_string.text));
1996 : :
1997 : : /* Verify that we don't attempt to record substring location information
1998 : : for such cases. */
1999 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
2000 : : (test, tok->src_loc, type,
2001 : : "execution character set != source character set");
2002 : 96 : }
2003 : :
2004 : : /* Lex a string literal containing a hex-escaped character.
2005 : : Verify the substring location data, before and after running
2006 : : cpp_interpret_string on it. */
2007 : :
2008 : : static void
2009 : 96 : test_lexer_string_locations_hex (const line_table_case &case_)
2010 : : {
2011 : : /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2012 : : and with a space in place of digit 6, to terminate the escaped
2013 : : hex code.
2014 : : ....................000000000.111111.11112222.
2015 : : ....................123456789.012345.67890123. */
2016 : 96 : const char *content = " \"01234\\x35 789\"\n";
2017 : 96 : lexer_test test (case_, content, NULL);
2018 : :
2019 : : /* Verify that we get the expected token back, with the correct
2020 : : location information. */
2021 : 96 : const cpp_token *tok = test.get_token ();
2022 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2023 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2024 : 96 : ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2025 : :
2026 : : /* At this point in lexing, the quote characters are treated as part of
2027 : : the string (they are stripped off by cpp_interpret_string). */
2028 : 96 : ASSERT_EQ (tok->val.str.len, 15);
2029 : :
2030 : : /* Verify that cpp_interpret_string works. */
2031 : 96 : cpp_string dst_string;
2032 : 96 : const enum cpp_ttype type = CPP_STRING;
2033 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2034 : : &dst_string, type);
2035 : 96 : ASSERT_TRUE (result);
2036 : 96 : ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2037 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2038 : :
2039 : : /* Verify ranges of individual characters. This no longer includes the
2040 : : opening quote, but does include the closing quote. */
2041 : 576 : for (int i = 0; i <= 4; i++)
2042 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2043 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2044 : 576 : for (int i = 6; i <= 10; i++)
2045 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2046 : :
2047 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2048 : 96 : }
2049 : :
2050 : : /* Lex a string literal containing an octal-escaped character.
2051 : : Verify the substring location data after running cpp_interpret_string
2052 : : on it. */
2053 : :
2054 : : static void
2055 : 96 : test_lexer_string_locations_oct (const line_table_case &case_)
2056 : : {
2057 : : /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2058 : : and with a space in place of digit 6, to terminate the escaped
2059 : : octal code.
2060 : : ....................000000000.111111.11112222.2222223333333333444
2061 : : ....................123456789.012345.67890123.4567890123456789012 */
2062 : 96 : const char *content = " \"01234\\065 789\" /* not a string */\n";
2063 : 96 : lexer_test test (case_, content, NULL);
2064 : :
2065 : : /* Verify that we get the expected token back, with the correct
2066 : : location information. */
2067 : 96 : const cpp_token *tok = test.get_token ();
2068 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2069 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2070 : :
2071 : : /* Verify that cpp_interpret_string works. */
2072 : 96 : cpp_string dst_string;
2073 : 96 : const enum cpp_ttype type = CPP_STRING;
2074 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2075 : : &dst_string, type);
2076 : 96 : ASSERT_TRUE (result);
2077 : 96 : ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2078 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2079 : :
2080 : : /* Verify ranges of individual characters. This no longer includes the
2081 : : opening quote, but does include the closing quote. */
2082 : 576 : for (int i = 0; i < 5; i++)
2083 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2084 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2085 : 576 : for (int i = 6; i <= 10; i++)
2086 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2087 : :
2088 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2089 : 96 : }
2090 : :
2091 : : /* Test of string literal containing letter escapes. */
2092 : :
2093 : : static void
2094 : 96 : test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2095 : : {
2096 : : /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2097 : : .....................000000000.1.11111.1.1.11222.22222223333333
2098 : : .....................123456789.0.12345.6.7.89012.34567890123456. */
2099 : 96 : const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2100 : 96 : lexer_test test (case_, content, NULL);
2101 : :
2102 : : /* Verify that we get the expected tokens back. */
2103 : 96 : const cpp_token *tok = test.get_token ();
2104 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2105 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2106 : :
2107 : : /* Verify ranges of individual characters. */
2108 : : /* "\t". */
2109 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2110 : : 0, 1, 10, 11);
2111 : : /* "foo". */
2112 : 384 : for (int i = 1; i <= 3; i++)
2113 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2114 : : i, 1, 11 + i, 11 + i);
2115 : : /* "\\" and "\n". */
2116 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2117 : : 4, 1, 15, 16);
2118 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2119 : : 5, 1, 17, 18);
2120 : :
2121 : : /* "bar" and closing quote for nul-terminator. */
2122 : 480 : for (int i = 6; i <= 9; i++)
2123 : 384 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2124 : : i, 1, 13 + i, 13 + i);
2125 : :
2126 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2127 : 96 : }
2128 : :
2129 : : /* Another test of a string literal containing a letter escape.
2130 : : Based on string seen in
2131 : : printf ("%-%\n");
2132 : : in gcc.dg/format/c90-printf-1.c. */
2133 : :
2134 : : static void
2135 : 96 : test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2136 : : {
2137 : : /* .....................000000000.1111.11.1111.22222222223.
2138 : : .....................123456789.0123.45.6789.01234567890. */
2139 : 96 : const char *content = (" \"%-%\\n\" /* non-str */\n");
2140 : 96 : lexer_test test (case_, content, NULL);
2141 : :
2142 : : /* Verify that we get the expected tokens back. */
2143 : 96 : const cpp_token *tok = test.get_token ();
2144 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2145 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2146 : :
2147 : : /* Verify ranges of individual characters. */
2148 : : /* "%-%". */
2149 : 384 : for (int i = 0; i < 3; i++)
2150 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2151 : : i, 1, 10 + i, 10 + i);
2152 : : /* "\n". */
2153 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2154 : : 3, 1, 13, 14);
2155 : :
2156 : : /* Closing quote for nul-terminator. */
2157 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2158 : : 4, 1, 15, 15);
2159 : :
2160 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2161 : 96 : }
2162 : :
2163 : : /* Lex a string literal containing UCN 4 characters.
2164 : : Verify the substring location data after running cpp_interpret_string
2165 : : on it. */
2166 : :
2167 : : static void
2168 : 96 : test_lexer_string_locations_ucn4 (const line_table_case &case_)
2169 : : {
2170 : : /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2171 : : as UCN 4.
2172 : : ....................000000000.111111.111122.222222223.33333333344444
2173 : : ....................123456789.012345.678901.234567890.12345678901234 */
2174 : 96 : const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2175 : 96 : lexer_test test (case_, content, NULL);
2176 : :
2177 : : /* Verify that we get the expected token back, with the correct
2178 : : location information. */
2179 : 96 : const cpp_token *tok = test.get_token ();
2180 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2181 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2182 : :
2183 : : /* Verify that cpp_interpret_string works.
2184 : : The string should be encoded in the execution character
2185 : : set. Assuming that is UTF-8, we should have the following:
2186 : : ----------- ---- ----- ------- ----------------
2187 : : Byte offset Byte Octal Unicode Source Column(s)
2188 : : ----------- ---- ----- ------- ----------------
2189 : : 0 0x30 '0' 10
2190 : : 1 0x31 '1' 11
2191 : : 2 0x32 '2' 12
2192 : : 3 0x33 '3' 13
2193 : : 4 0x34 '4' 14
2194 : : 5 0xE2 \342 U+2174 15-20
2195 : : 6 0x85 \205 (cont) 15-20
2196 : : 7 0xB4 \264 (cont) 15-20
2197 : : 8 0xE2 \342 U+2175 21-26
2198 : : 9 0x85 \205 (cont) 21-26
2199 : : 10 0xB5 \265 (cont) 21-26
2200 : : 11 0x37 '7' 27
2201 : : 12 0x38 '8' 28
2202 : : 13 0x39 '9' 29
2203 : : 14 0x00 30 (closing quote)
2204 : : ----------- ---- ----- ------- ---------------. */
2205 : :
2206 : 96 : cpp_string dst_string;
2207 : 96 : const enum cpp_ttype type = CPP_STRING;
2208 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2209 : : &dst_string, type);
2210 : 96 : ASSERT_TRUE (result);
2211 : 96 : ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2212 : : (const char *)dst_string.text);
2213 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2214 : :
2215 : : /* Verify ranges of individual characters. This no longer includes the
2216 : : opening quote, but does include the closing quote.
2217 : : '01234'. */
2218 : 576 : for (int i = 0; i <= 4; i++)
2219 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2220 : : /* U+2174. */
2221 : 384 : for (int i = 5; i <= 7; i++)
2222 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2223 : : /* U+2175. */
2224 : 384 : for (int i = 8; i <= 10; i++)
2225 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2226 : : /* '789' and nul terminator */
2227 : 480 : for (int i = 11; i <= 14; i++)
2228 : 384 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2229 : :
2230 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2231 : 96 : }
2232 : :
2233 : : /* Lex a string literal containing UCN 8 characters.
2234 : : Verify the substring location data after running cpp_interpret_string
2235 : : on it. */
2236 : :
2237 : : static void
2238 : 96 : test_lexer_string_locations_ucn8 (const line_table_case &case_)
2239 : : {
2240 : : /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2241 : : ....................000000000.111111.1111222222.2222333333333.344444
2242 : : ....................123456789.012345.6789012345.6789012345678.901234 */
2243 : 96 : const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2244 : 96 : lexer_test test (case_, content, NULL);
2245 : :
2246 : : /* Verify that we get the expected token back, with the correct
2247 : : location information. */
2248 : 96 : const cpp_token *tok = test.get_token ();
2249 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2250 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2251 : : "\"01234\\U00002174\\U00002175789\"");
2252 : :
2253 : : /* Verify that cpp_interpret_string works.
2254 : : The UTF-8 encoding of the string is identical to that from
2255 : : the ucn4 testcase above; the only difference is the column
2256 : : locations. */
2257 : 96 : cpp_string dst_string;
2258 : 96 : const enum cpp_ttype type = CPP_STRING;
2259 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2260 : : &dst_string, type);
2261 : 96 : ASSERT_TRUE (result);
2262 : 96 : ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2263 : : (const char *)dst_string.text);
2264 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2265 : :
2266 : : /* Verify ranges of individual characters. This no longer includes the
2267 : : opening quote, but does include the closing quote.
2268 : : '01234'. */
2269 : 576 : for (int i = 0; i <= 4; i++)
2270 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2271 : : /* U+2174. */
2272 : 384 : for (int i = 5; i <= 7; i++)
2273 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2274 : : /* U+2175. */
2275 : 384 : for (int i = 8; i <= 10; i++)
2276 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2277 : : /* '789' at columns 35-37 */
2278 : 384 : for (int i = 11; i <= 13; i++)
2279 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2280 : : /* Closing quote/nul-terminator at column 38. */
2281 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2282 : :
2283 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2284 : 96 : }
2285 : :
2286 : : /* Fetch a big-endian 32-bit value and convert to host endianness. */
2287 : :
2288 : : static uint32_t
2289 : 768 : uint32_from_big_endian (const uint32_t *ptr_be_value)
2290 : : {
2291 : 768 : const unsigned char *buf = (const unsigned char *)ptr_be_value;
2292 : 768 : return (((uint32_t) buf[0] << 24)
2293 : 768 : | ((uint32_t) buf[1] << 16)
2294 : 768 : | ((uint32_t) buf[2] << 8)
2295 : 768 : | (uint32_t) buf[3]);
2296 : : }
2297 : :
2298 : : /* Lex a wide string literal and verify that attempts to read substring
2299 : : location data from it fail gracefully. */
2300 : :
2301 : : static void
2302 : 96 : test_lexer_string_locations_wide_string (const line_table_case &case_)
2303 : : {
2304 : : /* Digits 0-9.
2305 : : ....................000000000.11111111112.22222222233333
2306 : : ....................123456789.01234567890.12345678901234 */
2307 : 96 : const char *content = " L\"0123456789\" /* non-str */\n";
2308 : 96 : lexer_test test (case_, content, NULL);
2309 : :
2310 : : /* Verify that we get the expected token back, with the correct
2311 : : location information. */
2312 : 96 : const cpp_token *tok = test.get_token ();
2313 : 96 : ASSERT_EQ (tok->type, CPP_WSTRING);
2314 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2315 : :
2316 : : /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2317 : 96 : cpp_string dst_string;
2318 : 96 : const enum cpp_ttype type = CPP_WSTRING;
2319 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2320 : : &dst_string, type);
2321 : 96 : ASSERT_TRUE (result);
2322 : : /* The cpp_reader defaults to big-endian with
2323 : : CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2324 : : now be encoded as UTF-32BE. */
2325 : 96 : const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2326 : 96 : ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2327 : 96 : ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2328 : 96 : ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2329 : 96 : ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2330 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2331 : :
2332 : : /* We don't yet support generating substring location information
2333 : : for L"" strings. */
2334 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
2335 : : (test, tok->src_loc, type,
2336 : : "execution character set != source character set");
2337 : 96 : }
2338 : :
2339 : : /* Fetch a big-endian 16-bit value and convert to host endianness. */
2340 : :
2341 : : static uint16_t
2342 : 384 : uint16_from_big_endian (const uint16_t *ptr_be_value)
2343 : : {
2344 : 384 : const unsigned char *buf = (const unsigned char *)ptr_be_value;
2345 : 384 : return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2346 : : }
2347 : :
2348 : : /* Lex a u"" string literal and verify that attempts to read substring
2349 : : location data from it fail gracefully. */
2350 : :
2351 : : static void
2352 : 96 : test_lexer_string_locations_string16 (const line_table_case &case_)
2353 : : {
2354 : : /* Digits 0-9.
2355 : : ....................000000000.11111111112.22222222233333
2356 : : ....................123456789.01234567890.12345678901234 */
2357 : 96 : const char *content = " u\"0123456789\" /* non-str */\n";
2358 : 96 : lexer_test test (case_, content, NULL);
2359 : :
2360 : : /* Verify that we get the expected token back, with the correct
2361 : : location information. */
2362 : 96 : const cpp_token *tok = test.get_token ();
2363 : 96 : ASSERT_EQ (tok->type, CPP_STRING16);
2364 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2365 : :
2366 : : /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2367 : 96 : cpp_string dst_string;
2368 : 96 : const enum cpp_ttype type = CPP_STRING16;
2369 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2370 : : &dst_string, type);
2371 : 96 : ASSERT_TRUE (result);
2372 : :
2373 : : /* The cpp_reader defaults to big-endian, so dst_string should
2374 : : now be encoded as UTF-16BE. */
2375 : 96 : const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2376 : 96 : ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2377 : 96 : ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2378 : 96 : ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2379 : 96 : ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2380 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2381 : :
2382 : : /* We don't yet support generating substring location information
2383 : : for L"" strings. */
2384 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
2385 : : (test, tok->src_loc, type,
2386 : : "execution character set != source character set");
2387 : 96 : }
2388 : :
2389 : : /* Lex a U"" string literal and verify that attempts to read substring
2390 : : location data from it fail gracefully. */
2391 : :
2392 : : static void
2393 : 96 : test_lexer_string_locations_string32 (const line_table_case &case_)
2394 : : {
2395 : : /* Digits 0-9.
2396 : : ....................000000000.11111111112.22222222233333
2397 : : ....................123456789.01234567890.12345678901234 */
2398 : 96 : const char *content = " U\"0123456789\" /* non-str */\n";
2399 : 96 : lexer_test test (case_, content, NULL);
2400 : :
2401 : : /* Verify that we get the expected token back, with the correct
2402 : : location information. */
2403 : 96 : const cpp_token *tok = test.get_token ();
2404 : 96 : ASSERT_EQ (tok->type, CPP_STRING32);
2405 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2406 : :
2407 : : /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2408 : 96 : cpp_string dst_string;
2409 : 96 : const enum cpp_ttype type = CPP_STRING32;
2410 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2411 : : &dst_string, type);
2412 : 96 : ASSERT_TRUE (result);
2413 : :
2414 : : /* The cpp_reader defaults to big-endian, so dst_string should
2415 : : now be encoded as UTF-32BE. */
2416 : 96 : const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2417 : 96 : ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2418 : 96 : ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2419 : 96 : ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2420 : 96 : ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2421 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2422 : :
2423 : : /* We don't yet support generating substring location information
2424 : : for L"" strings. */
2425 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
2426 : : (test, tok->src_loc, type,
2427 : : "execution character set != source character set");
2428 : 96 : }
2429 : :
2430 : : /* Lex a u8-string literal.
2431 : : Verify the substring location data after running cpp_interpret_string
2432 : : on it. */
2433 : :
2434 : : static void
2435 : 96 : test_lexer_string_locations_u8 (const line_table_case &case_)
2436 : : {
2437 : : /* Digits 0-9.
2438 : : ....................000000000.11111111112.22222222233333
2439 : : ....................123456789.01234567890.12345678901234 */
2440 : 96 : const char *content = " u8\"0123456789\" /* non-str */\n";
2441 : 96 : lexer_test test (case_, content, NULL);
2442 : :
2443 : : /* Verify that we get the expected token back, with the correct
2444 : : location information. */
2445 : 96 : const cpp_token *tok = test.get_token ();
2446 : 96 : ASSERT_EQ (tok->type, CPP_UTF8STRING);
2447 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2448 : :
2449 : : /* Verify that cpp_interpret_string works. */
2450 : 96 : cpp_string dst_string;
2451 : 96 : const enum cpp_ttype type = CPP_STRING;
2452 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2453 : : &dst_string, type);
2454 : 96 : ASSERT_TRUE (result);
2455 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2456 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2457 : :
2458 : : /* Verify ranges of individual characters. This no longer includes the
2459 : : opening quote, but does include the closing quote. */
2460 : 1152 : for (int i = 0; i <= 10; i++)
2461 : 1056 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2462 : 96 : }
2463 : :
2464 : : /* Lex a string literal containing UTF-8 source characters.
2465 : : Verify the substring location data after running cpp_interpret_string
2466 : : on it. */
2467 : :
2468 : : static void
2469 : 96 : test_lexer_string_locations_utf8_source (const line_table_case &case_)
2470 : : {
2471 : : /* This string literal is written out to the source file as UTF-8,
2472 : : and is of the form "before mojibake after", where "mojibake"
2473 : : is written as the following four unicode code points:
2474 : : U+6587 CJK UNIFIED IDEOGRAPH-6587
2475 : : U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2476 : : U+5316 CJK UNIFIED IDEOGRAPH-5316
2477 : : U+3051 HIRAGANA LETTER KE.
2478 : : Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2479 : : "before" and "after" are 1 byte per unicode character.
2480 : :
2481 : : The numbering shown are "columns", which are *byte* numbers within
2482 : : the line, rather than unicode character numbers.
2483 : :
2484 : : .................... 000000000.1111111.
2485 : : .................... 123456789.0123456. */
2486 : 96 : const char *content = (" \"before "
2487 : : /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2488 : : UTF-8: 0xE6 0x96 0x87
2489 : : C octal escaped UTF-8: \346\226\207
2490 : : "column" numbers: 17-19. */
2491 : : "\346\226\207"
2492 : :
2493 : : /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2494 : : UTF-8: 0xE5 0xAD 0x97
2495 : : C octal escaped UTF-8: \345\255\227
2496 : : "column" numbers: 20-22. */
2497 : : "\345\255\227"
2498 : :
2499 : : /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2500 : : UTF-8: 0xE5 0x8C 0x96
2501 : : C octal escaped UTF-8: \345\214\226
2502 : : "column" numbers: 23-25. */
2503 : : "\345\214\226"
2504 : :
2505 : : /* U+3051 HIRAGANA LETTER KE
2506 : : UTF-8: 0xE3 0x81 0x91
2507 : : C octal escaped UTF-8: \343\201\221
2508 : : "column" numbers: 26-28. */
2509 : : "\343\201\221"
2510 : :
2511 : : /* column numbers 29 onwards
2512 : : 2333333.33334444444444
2513 : : 9012345.67890123456789. */
2514 : : " after\" /* non-str */\n");
2515 : 96 : lexer_test test (case_, content, NULL);
2516 : :
2517 : : /* Verify that we get the expected token back, with the correct
2518 : : location information. */
2519 : 96 : const cpp_token *tok = test.get_token ();
2520 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2521 : 96 : ASSERT_TOKEN_AS_TEXT_EQ
2522 : : (test.m_parser, tok,
2523 : : "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2524 : :
2525 : : /* Verify that cpp_interpret_string works. */
2526 : 96 : cpp_string dst_string;
2527 : 96 : const enum cpp_ttype type = CPP_STRING;
2528 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2529 : : &dst_string, type);
2530 : 96 : ASSERT_TRUE (result);
2531 : 96 : ASSERT_STREQ
2532 : : ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2533 : : (const char *)dst_string.text);
2534 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2535 : :
2536 : : /* Verify ranges of individual characters. This no longer includes the
2537 : : opening quote, but does include the closing quote.
2538 : : Assuming that both source and execution encodings are UTF-8, we have
2539 : : a run of 25 octets in each, plus the NUL terminator. */
2540 : 2496 : for (int i = 0; i < 25; i++)
2541 : 2400 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2542 : : /* NUL-terminator should use the closing quote at column 35. */
2543 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
2544 : :
2545 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
2546 : 96 : }
2547 : :
2548 : : /* Test of string literal concatenation. */
2549 : :
2550 : : static void
2551 : 96 : test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2552 : : {
2553 : : /* Digits 0-9.
2554 : : .....................000000000.111111.11112222222222
2555 : : .....................123456789.012345.67890123456789. */
2556 : 96 : const char *content = (" \"01234\" /* non-str */\n"
2557 : : " \"56789\" /* non-str */\n");
2558 : 96 : lexer_test test (case_, content, NULL);
2559 : :
2560 : 96 : location_t input_locs[2];
2561 : :
2562 : : /* Verify that we get the expected tokens back. */
2563 : 96 : auto_vec <cpp_string> input_strings;
2564 : 96 : const cpp_token *tok_a = test.get_token ();
2565 : 96 : ASSERT_EQ (tok_a->type, CPP_STRING);
2566 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2567 : 96 : input_strings.safe_push (tok_a->val.str);
2568 : 96 : input_locs[0] = tok_a->src_loc;
2569 : :
2570 : 96 : const cpp_token *tok_b = test.get_token ();
2571 : 96 : ASSERT_EQ (tok_b->type, CPP_STRING);
2572 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2573 : 96 : input_strings.safe_push (tok_b->val.str);
2574 : 96 : input_locs[1] = tok_b->src_loc;
2575 : :
2576 : : /* Verify that cpp_interpret_string works. */
2577 : 96 : cpp_string dst_string;
2578 : 96 : const enum cpp_ttype type = CPP_STRING;
2579 : 96 : bool result = cpp_interpret_string (test.m_parser,
2580 : 96 : input_strings.address (), 2,
2581 : : &dst_string, type);
2582 : 96 : ASSERT_TRUE (result);
2583 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2584 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2585 : :
2586 : : /* Simulate c-lex.cc's lex_string in order to record concatenation. */
2587 : 96 : test.m_concats.record_string_concatenation (2, input_locs);
2588 : :
2589 : 96 : location_t initial_loc = input_locs[0];
2590 : :
2591 : : /* "01234" on line 1. */
2592 : 576 : for (int i = 0; i <= 4; i++)
2593 : 480 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2594 : : /* "56789" in line 2, plus its closing quote for the nul terminator. */
2595 : 672 : for (int i = 5; i <= 10; i++)
2596 : 576 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
2597 : :
2598 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
2599 : 96 : }
2600 : :
2601 : : /* Another test of string literal concatenation. */
2602 : :
2603 : : static void
2604 : 96 : test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
2605 : : {
2606 : : /* Digits 0-9.
2607 : : .....................000000000.111.11111112222222
2608 : : .....................123456789.012.34567890123456. */
2609 : 96 : const char *content = (" \"01\" /* non-str */\n"
2610 : : " \"23\" /* non-str */\n"
2611 : : " \"45\" /* non-str */\n"
2612 : : " \"67\" /* non-str */\n"
2613 : : " \"89\" /* non-str */\n");
2614 : 96 : lexer_test test (case_, content, NULL);
2615 : :
2616 : 96 : auto_vec <cpp_string> input_strings;
2617 : 96 : location_t input_locs[5];
2618 : :
2619 : : /* Verify that we get the expected tokens back. */
2620 : 576 : for (int i = 0; i < 5; i++)
2621 : : {
2622 : 480 : const cpp_token *tok = test.get_token ();
2623 : 480 : ASSERT_EQ (tok->type, CPP_STRING);
2624 : 480 : input_strings.safe_push (tok->val.str);
2625 : 480 : input_locs[i] = tok->src_loc;
2626 : : }
2627 : :
2628 : : /* Verify that cpp_interpret_string works. */
2629 : 96 : cpp_string dst_string;
2630 : 96 : const enum cpp_ttype type = CPP_STRING;
2631 : 96 : bool result = cpp_interpret_string (test.m_parser,
2632 : 96 : input_strings.address (), 5,
2633 : : &dst_string, type);
2634 : 96 : ASSERT_TRUE (result);
2635 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2636 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2637 : :
2638 : : /* Simulate c-lex.cc's lex_string in order to record concatenation. */
2639 : 96 : test.m_concats.record_string_concatenation (5, input_locs);
2640 : :
2641 : 96 : location_t initial_loc = input_locs[0];
2642 : :
2643 : : /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
2644 : : detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
2645 : : and expect get_source_range_for_substring to fail.
2646 : : However, for a string concatenation test, we can have a case
2647 : : where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
2648 : : but subsequent strings can be after it.
2649 : : Attempting to detect this within assert_char_at_range
2650 : : would overcomplicate the logic for the common test cases, so
2651 : : we detect it here. */
2652 : 96 : if (should_have_column_data_p (input_locs[0])
2653 : 96 : && !should_have_column_data_p (input_locs[4]))
2654 : : {
2655 : : /* Verify that get_source_range_for_substring gracefully rejects
2656 : : this case. */
2657 : 8 : source_range actual_range;
2658 : 8 : const char *err
2659 : 8 : = get_source_range_for_char (test.m_parser, test.m_file_cache,
2660 : : &test.m_concats,
2661 : : initial_loc, type, 0, &actual_range);
2662 : 8 : ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
2663 : 8 : return;
2664 : : }
2665 : :
2666 : 528 : for (int i = 0; i < 5; i++)
2667 : 1320 : for (int j = 0; j < 2; j++)
2668 : 880 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
2669 : : i + 1, 10 + j, 10 + j);
2670 : :
2671 : : /* NUL-terminator should use the final closing quote at line 5 column 12. */
2672 : 88 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
2673 : :
2674 : 88 : ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
2675 : 96 : }
2676 : :
2677 : : /* Another test of string literal concatenation, this time combined with
2678 : : various kinds of escaped characters. */
2679 : :
2680 : : static void
2681 : 96 : test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
2682 : : {
2683 : : /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
2684 : : digit 6 in ASCII as octal "\066", concatenating multiple strings. */
2685 : 96 : const char *content
2686 : : /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
2687 : : .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
2688 : : = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
2689 : 96 : lexer_test test (case_, content, NULL);
2690 : :
2691 : 96 : auto_vec <cpp_string> input_strings;
2692 : 96 : location_t input_locs[4];
2693 : :
2694 : : /* Verify that we get the expected tokens back. */
2695 : 480 : for (int i = 0; i < 4; i++)
2696 : : {
2697 : 384 : const cpp_token *tok = test.get_token ();
2698 : 384 : ASSERT_EQ (tok->type, CPP_STRING);
2699 : 384 : input_strings.safe_push (tok->val.str);
2700 : 384 : input_locs[i] = tok->src_loc;
2701 : : }
2702 : :
2703 : : /* Verify that cpp_interpret_string works. */
2704 : 96 : cpp_string dst_string;
2705 : 96 : const enum cpp_ttype type = CPP_STRING;
2706 : 96 : bool result = cpp_interpret_string (test.m_parser,
2707 : 96 : input_strings.address (), 4,
2708 : : &dst_string, type);
2709 : 96 : ASSERT_TRUE (result);
2710 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2711 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2712 : :
2713 : : /* Simulate c-lex.cc's lex_string in order to record concatenation. */
2714 : 96 : test.m_concats.record_string_concatenation (4, input_locs);
2715 : :
2716 : 96 : location_t initial_loc = input_locs[0];
2717 : :
2718 : 576 : for (int i = 0; i <= 4; i++)
2719 : 480 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2720 : 96 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
2721 : 96 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
2722 : 384 : for (int i = 7; i <= 9; i++)
2723 : 288 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
2724 : :
2725 : : /* NUL-terminator should use the location of the final closing quote. */
2726 : 96 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
2727 : :
2728 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
2729 : 96 : }
2730 : :
2731 : : /* Test of string literal in a macro. */
2732 : :
2733 : : static void
2734 : 96 : test_lexer_string_locations_macro (const line_table_case &case_)
2735 : : {
2736 : : /* Digits 0-9.
2737 : : .....................0000000001111111111.22222222223.
2738 : : .....................1234567890123456789.01234567890. */
2739 : 96 : const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
2740 : : " MACRO");
2741 : 96 : lexer_test test (case_, content, NULL);
2742 : :
2743 : : /* Verify that we get the expected tokens back. */
2744 : 96 : const cpp_token *tok = test.get_token ();
2745 : 96 : ASSERT_EQ (tok->type, CPP_PADDING);
2746 : :
2747 : 96 : tok = test.get_token ();
2748 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2749 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2750 : :
2751 : : /* Verify ranges of individual characters. We ought to
2752 : : see columns within the macro definition. */
2753 : 1152 : for (int i = 0; i <= 10; i++)
2754 : 1056 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2755 : : i, 1, 20 + i, 20 + i);
2756 : :
2757 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
2758 : :
2759 : 96 : tok = test.get_token ();
2760 : 96 : ASSERT_EQ (tok->type, CPP_PADDING);
2761 : 96 : }
2762 : :
2763 : : /* Test of stringification of a macro argument. */
2764 : :
2765 : : static void
2766 : 96 : test_lexer_string_locations_stringified_macro_argument
2767 : : (const line_table_case &case_)
2768 : : {
2769 : : /* .....................000000000111111111122222222223.
2770 : : .....................123456789012345678901234567890. */
2771 : 96 : const char *content = ("#define MACRO(X) #X /* non-str */\n"
2772 : : "MACRO(foo)\n");
2773 : 96 : lexer_test test (case_, content, NULL);
2774 : :
2775 : : /* Verify that we get the expected token back. */
2776 : 96 : const cpp_token *tok = test.get_token ();
2777 : 96 : ASSERT_EQ (tok->type, CPP_PADDING);
2778 : :
2779 : 96 : tok = test.get_token ();
2780 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2781 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
2782 : :
2783 : : /* We don't support getting the location of a stringified macro
2784 : : argument. Verify that it fails gracefully. */
2785 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2786 : : "cpp_interpret_string_1 failed");
2787 : :
2788 : 96 : tok = test.get_token ();
2789 : 96 : ASSERT_EQ (tok->type, CPP_PADDING);
2790 : :
2791 : 96 : tok = test.get_token ();
2792 : 96 : ASSERT_EQ (tok->type, CPP_PADDING);
2793 : 96 : }
2794 : :
2795 : : /* Ensure that we are fail gracefully if something attempts to pass
2796 : : in a location that isn't a string literal token. Seen on this code:
2797 : :
2798 : : const char a[] = " %d ";
2799 : : __builtin_printf (a, 0.5);
2800 : : ^
2801 : :
2802 : : when c-format.cc erroneously used the indicated one-character
2803 : : location as the format string location, leading to a read past the
2804 : : end of a string buffer in cpp_interpret_string_1. */
2805 : :
2806 : : static void
2807 : 96 : test_lexer_string_locations_non_string (const line_table_case &case_)
2808 : : {
2809 : : /* .....................000000000111111111122222222223.
2810 : : .....................123456789012345678901234567890. */
2811 : 96 : const char *content = (" a\n");
2812 : 96 : lexer_test test (case_, content, NULL);
2813 : :
2814 : : /* Verify that we get the expected token back. */
2815 : 96 : const cpp_token *tok = test.get_token ();
2816 : 96 : ASSERT_EQ (tok->type, CPP_NAME);
2817 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
2818 : :
2819 : : /* At this point, libcpp is attempting to interpret the name as a
2820 : : string literal, despite it not starting with a quote. We don't detect
2821 : : that, but we should at least fail gracefully. */
2822 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2823 : : "cpp_interpret_string_1 failed");
2824 : 96 : }
2825 : :
2826 : : /* Ensure that we can read substring information for a token which
2827 : : starts in one linemap and ends in another . Adapted from
2828 : : gcc.dg/cpp/pr69985.c. */
2829 : :
2830 : : static void
2831 : 96 : test_lexer_string_locations_long_line (const line_table_case &case_)
2832 : : {
2833 : : /* .....................000000.000111111111
2834 : : .....................123456.789012346789. */
2835 : 96 : const char *content = ("/* A very long line, so that we start a new line map. */\n"
2836 : : " \"0123456789012345678901234567890123456789"
2837 : : "0123456789012345678901234567890123456789"
2838 : : "0123456789012345678901234567890123456789"
2839 : : "0123456789\"\n");
2840 : :
2841 : 96 : lexer_test test (case_, content, NULL);
2842 : :
2843 : : /* Verify that we get the expected token back. */
2844 : 96 : const cpp_token *tok = test.get_token ();
2845 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2846 : :
2847 : 96 : if (!should_have_column_data_p (line_table->highest_location))
2848 : 36 : return;
2849 : :
2850 : : /* Verify ranges of individual characters. */
2851 : 60 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
2852 : 7920 : for (int i = 0; i < 131; i++)
2853 : 7860 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2854 : : i, 2, 7 + i, 7 + i);
2855 : 96 : }
2856 : :
2857 : : /* Test of locations within a raw string that doesn't contain a newline. */
2858 : :
2859 : : static void
2860 : 96 : test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
2861 : : {
2862 : : /* .....................00.0000000111111111122.
2863 : : .....................12.3456789012345678901. */
2864 : 96 : const char *content = ("R\"foo(0123456789)foo\"\n");
2865 : 96 : lexer_test test (case_, content, NULL);
2866 : :
2867 : : /* Verify that we get the expected token back. */
2868 : 96 : const cpp_token *tok = test.get_token ();
2869 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2870 : :
2871 : : /* Verify that cpp_interpret_string works. */
2872 : 96 : cpp_string dst_string;
2873 : 96 : const enum cpp_ttype type = CPP_STRING;
2874 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2875 : : &dst_string, type);
2876 : 96 : ASSERT_TRUE (result);
2877 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2878 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2879 : :
2880 : 96 : if (!should_have_column_data_p (line_table->highest_location))
2881 : 32 : return;
2882 : :
2883 : : /* 0-9, plus the nil terminator. */
2884 : 64 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
2885 : 768 : for (int i = 0; i < 11; i++)
2886 : 704 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2887 : : i, 1, 7 + i, 7 + i);
2888 : 96 : }
2889 : :
2890 : : /* Test of locations within a raw string that contains a newline. */
2891 : :
2892 : : static void
2893 : 96 : test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
2894 : : {
2895 : : /* .....................00.0000.
2896 : : .....................12.3456. */
2897 : 96 : const char *content = ("R\"foo(\n"
2898 : : /* .....................00000.
2899 : : .....................12345. */
2900 : : "hello\n"
2901 : : "world\n"
2902 : : /* .....................00000.
2903 : : .....................12345. */
2904 : : ")foo\"\n");
2905 : 96 : lexer_test test (case_, content, NULL);
2906 : :
2907 : : /* Verify that we get the expected token back. */
2908 : 96 : const cpp_token *tok = test.get_token ();
2909 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2910 : :
2911 : : /* Verify that cpp_interpret_string works. */
2912 : 96 : cpp_string dst_string;
2913 : 96 : const enum cpp_ttype type = CPP_STRING;
2914 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2915 : : &dst_string, type);
2916 : 96 : ASSERT_TRUE (result);
2917 : 96 : ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
2918 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2919 : :
2920 : 96 : if (!should_have_column_data_p (line_table->highest_location))
2921 : 36 : return;
2922 : :
2923 : : /* Currently we don't support locations within raw strings that
2924 : : contain newlines. */
2925 : 60 : ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
2926 : : "range endpoints are on different lines");
2927 : 96 : }
2928 : :
2929 : : /* Test of parsing an unterminated raw string. */
2930 : :
2931 : : static void
2932 : 96 : test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
2933 : : {
2934 : 96 : const char *content = "R\"ouch()ouCh\" /* etc */";
2935 : :
2936 : 96 : lexer_diagnostic_sink diagnostics;
2937 : 96 : lexer_test test (case_, content, &diagnostics);
2938 : 96 : test.m_implicitly_expect_EOF = false;
2939 : :
2940 : : /* Attempt to parse the raw string. */
2941 : 96 : const cpp_token *tok = test.get_token ();
2942 : 96 : ASSERT_EQ (tok->type, CPP_EOF);
2943 : :
2944 : 96 : ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
2945 : : /* We expect the message "unterminated raw string"
2946 : : in the "cpplib" translation domain.
2947 : : It's not clear that dgettext is available on all supported hosts,
2948 : : so this assertion is commented-out for now.
2949 : : ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
2950 : : diagnostics.m_diagnostics[0]);
2951 : : */
2952 : 96 : }
2953 : :
2954 : : /* Test of lexing char constants. */
2955 : :
2956 : : static void
2957 : 96 : test_lexer_char_constants (const line_table_case &case_)
2958 : : {
2959 : : /* Various char constants.
2960 : : .....................0000000001111111111.22222222223.
2961 : : .....................1234567890123456789.01234567890. */
2962 : 96 : const char *content = (" 'a'\n"
2963 : : " u'a'\n"
2964 : : " U'a'\n"
2965 : : " L'a'\n"
2966 : : " 'abc'\n");
2967 : 96 : lexer_test test (case_, content, NULL);
2968 : :
2969 : : /* Verify that we get the expected tokens back. */
2970 : : /* 'a'. */
2971 : 96 : const cpp_token *tok = test.get_token ();
2972 : 96 : ASSERT_EQ (tok->type, CPP_CHAR);
2973 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
2974 : :
2975 : 96 : unsigned int chars_seen;
2976 : 96 : int unsignedp;
2977 : 96 : cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
2978 : : &chars_seen, &unsignedp);
2979 : 96 : ASSERT_EQ (cc, 'a');
2980 : 96 : ASSERT_EQ (chars_seen, 1);
2981 : :
2982 : : /* u'a'. */
2983 : 96 : tok = test.get_token ();
2984 : 96 : ASSERT_EQ (tok->type, CPP_CHAR16);
2985 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
2986 : :
2987 : : /* U'a'. */
2988 : 96 : tok = test.get_token ();
2989 : 96 : ASSERT_EQ (tok->type, CPP_CHAR32);
2990 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
2991 : :
2992 : : /* L'a'. */
2993 : 96 : tok = test.get_token ();
2994 : 96 : ASSERT_EQ (tok->type, CPP_WCHAR);
2995 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
2996 : :
2997 : : /* 'abc' (c-char-sequence). */
2998 : 96 : tok = test.get_token ();
2999 : 96 : ASSERT_EQ (tok->type, CPP_CHAR);
3000 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3001 : 96 : }
3002 : : /* A table of interesting location_t values, giving one axis of our test
3003 : : matrix. */
3004 : :
3005 : : static const location_t boundary_locations[] = {
3006 : : /* Zero means "don't override the default values for a new line_table". */
3007 : : 0,
3008 : :
3009 : : /* An arbitrary non-zero value that isn't close to one of
3010 : : the boundary values below. */
3011 : : 0x10000,
3012 : :
3013 : : /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3014 : : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3015 : : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3016 : : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3017 : : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3018 : : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3019 : :
3020 : : /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3021 : : LINE_MAP_MAX_LOCATION_WITH_COLS - 0x200,
3022 : : LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3023 : : LINE_MAP_MAX_LOCATION_WITH_COLS,
3024 : : LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3025 : : LINE_MAP_MAX_LOCATION_WITH_COLS + 0x200,
3026 : : };
3027 : :
3028 : : /* Run TESTCASE multiple times, once for each case in our test matrix. */
3029 : :
3030 : : void
3031 : 244 : for_each_line_table_case (void (*testcase) (const line_table_case &))
3032 : : {
3033 : : /* As noted above in the description of struct line_table_case,
3034 : : we want to explore a test matrix of interesting line_table
3035 : : situations, running various selftests for each case within the
3036 : : matrix. */
3037 : :
3038 : : /* Run all tests with:
3039 : : (a) line_table->default_range_bits == 0, and
3040 : : (b) line_table->default_range_bits == line_map_suggested_range_bits. */
3041 : :
3042 : 732 : for (int default_range_bits: {0, line_map_suggested_range_bits})
3043 : : {
3044 : : /* ...and use each of the "interesting" location values as
3045 : : the starting location within line_table. */
3046 : 488 : const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3047 : 6344 : for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3048 : : {
3049 : 5856 : line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3050 : 5856 : testcase (c);
3051 : : }
3052 : : }
3053 : 244 : }
3054 : :
3055 : : /* Verify that when presented with a consecutive pair of locations with
3056 : : a very large line offset, we don't attempt to consolidate them into
3057 : : a single ordinary linemap where the line offsets within the line map
3058 : : would lead to overflow (PR lto/88147). */
3059 : :
3060 : : static void
3061 : 4 : test_line_offset_overflow ()
3062 : : {
3063 : 4 : line_table_test ltt (line_table_case (5, 0));
3064 : :
3065 : 4 : linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3066 : 4 : linemap_line_start (line_table, 1, 100);
3067 : 4 : location_t loc_a = linemap_line_start (line_table, 2578, 255);
3068 : 4 : assert_loceq ("foo.c", 2578, 0, loc_a);
3069 : :
3070 : 4 : const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3071 : 4 : ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3072 : 4 : ASSERT_EQ (ordmap_a->m_range_bits, 5);
3073 : :
3074 : 4 : location_t loc_b = linemap_line_start (line_table, 404198, 512);
3075 : 4 : assert_loceq ("foo.c", 404198, 0, loc_b);
3076 : :
3077 : : /* We should have started a new linemap, rather than attempting to store
3078 : : a very large line offset. */
3079 : 4 : const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3080 : 4 : ASSERT_NE (ordmap_a, ordmap_b);
3081 : 4 : }
3082 : :
3083 : 4 : void test_cpp_utf8 ()
3084 : : {
3085 : 4 : const int def_tabstop = 8;
3086 : 4 : cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3087 : :
3088 : : /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3089 : 4 : {
3090 : 4 : int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3091 : 4 : ASSERT_EQ (8, w_bad);
3092 : 4 : int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3093 : 4 : ASSERT_EQ (5, w_ctrl);
3094 : : }
3095 : :
3096 : : /* Verify that wcwidth of valid UTF-8 is as expected. */
3097 : 4 : {
3098 : 4 : const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3099 : 4 : ASSERT_EQ (1, w_pi);
3100 : 4 : const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3101 : 4 : ASSERT_EQ (2, w_emoji);
3102 : 4 : const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3103 : : policy);
3104 : 4 : ASSERT_EQ (1, w_umlaut_precomposed);
3105 : 4 : const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3106 : : policy);
3107 : 4 : ASSERT_EQ (1, w_umlaut_combining);
3108 : 4 : const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3109 : 4 : ASSERT_EQ (2, w_han);
3110 : 4 : const int w_ascii = cpp_display_width ("GCC", 3, policy);
3111 : 4 : ASSERT_EQ (3, w_ascii);
3112 : 4 : const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3113 : : "\x9f! \xe4\xb8\xba y\xcc\x88",
3114 : : 24, policy);
3115 : 4 : ASSERT_EQ (18, w_mixed);
3116 : : }
3117 : :
3118 : : /* Verify that display width properly expands tabs. */
3119 : 4 : {
3120 : 4 : const char *tstr = "\tabc\td";
3121 : 4 : ASSERT_EQ (6, cpp_display_width (tstr, 6,
3122 : : cpp_char_column_policy (1, cpp_wcwidth)));
3123 : 4 : ASSERT_EQ (10, cpp_display_width (tstr, 6,
3124 : : cpp_char_column_policy (3, cpp_wcwidth)));
3125 : 4 : ASSERT_EQ (17, cpp_display_width (tstr, 6,
3126 : : cpp_char_column_policy (8, cpp_wcwidth)));
3127 : 4 : ASSERT_EQ (1,
3128 : : cpp_display_column_to_byte_column
3129 : : (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
3130 : : }
3131 : :
3132 : : /* Verify that cpp_byte_column_to_display_column can go past the end,
3133 : : and similar edge cases. */
3134 : 4 : {
3135 : 4 : const char *str
3136 : : /* Display columns.
3137 : : 111111112345 */
3138 : : = "\xcf\x80 abc";
3139 : : /* 111122223456
3140 : : Byte columns. */
3141 : :
3142 : 4 : ASSERT_EQ (5, cpp_display_width (str, 6, policy));
3143 : 4 : ASSERT_EQ (105,
3144 : : cpp_byte_column_to_display_column (str, 6, 106, policy));
3145 : 4 : ASSERT_EQ (10000,
3146 : : cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
3147 : 4 : ASSERT_EQ (0,
3148 : : cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
3149 : : }
3150 : :
3151 : : /* Verify that cpp_display_column_to_byte_column can go past the end,
3152 : : and similar edge cases, and check invertibility. */
3153 : 4 : {
3154 : 4 : const char *str
3155 : : /* Display columns.
3156 : : 000000000000000000000000000000000000011
3157 : : 111111112222222234444444455555555678901 */
3158 : : = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3159 : : /* 000000000000000000000000000000000111111
3160 : : 111122223333444456666777788889999012345
3161 : : Byte columns. */
3162 : 4 : ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
3163 : 4 : ASSERT_EQ (15,
3164 : : cpp_display_column_to_byte_column (str, 15, 11, policy));
3165 : 4 : ASSERT_EQ (115,
3166 : : cpp_display_column_to_byte_column (str, 15, 111, policy));
3167 : 4 : ASSERT_EQ (10000,
3168 : : cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
3169 : 4 : ASSERT_EQ (0,
3170 : : cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
3171 : :
3172 : : /* Verify that we do not interrupt a UTF-8 sequence. */
3173 : 4 : ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
3174 : :
3175 : 64 : for (int byte_col = 1; byte_col <= 15; ++byte_col)
3176 : : {
3177 : 60 : const int disp_col
3178 : 60 : = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
3179 : 60 : const int byte_col2
3180 : 60 : = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
3181 : :
3182 : : /* If we ask for the display column in the middle of a UTF-8
3183 : : sequence, it will return the length of the partial sequence,
3184 : : matching the behavior of GCC before display column support.
3185 : : Otherwise check the round trip was successful. */
3186 : 60 : if (byte_col < 4)
3187 : 12 : ASSERT_EQ (byte_col, disp_col);
3188 : 48 : else if (byte_col >= 6 && byte_col < 9)
3189 : 12 : ASSERT_EQ (3 + (byte_col - 5), disp_col);
3190 : : else
3191 : 60 : ASSERT_EQ (byte_col2, byte_col);
3192 : : }
3193 : : }
3194 : 4 : }
3195 : :
3196 : : static bool
3197 : 36 : check_cpp_valid_utf8_p (const char *str)
3198 : : {
3199 : 36 : return cpp_valid_utf8_p (str, strlen (str));
3200 : : }
3201 : :
3202 : : /* Check that cpp_valid_utf8_p works as expected. */
3203 : :
3204 : : static void
3205 : 4 : test_cpp_valid_utf8_p ()
3206 : : {
3207 : 4 : ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
3208 : :
3209 : : /* 2-byte char (pi). */
3210 : 4 : ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
3211 : :
3212 : : /* 3-byte chars (the Japanese word "mojibake"). */
3213 : 4 : ASSERT_TRUE (check_cpp_valid_utf8_p
3214 : : (
3215 : : /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3216 : : UTF-8: 0xE6 0x96 0x87
3217 : : C octal escaped UTF-8: \346\226\207. */
3218 : : "\346\226\207"
3219 : : /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3220 : : UTF-8: 0xE5 0xAD 0x97
3221 : : C octal escaped UTF-8: \345\255\227. */
3222 : : "\345\255\227"
3223 : : /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3224 : : UTF-8: 0xE5 0x8C 0x96
3225 : : C octal escaped UTF-8: \345\214\226. */
3226 : : "\345\214\226"
3227 : : /* U+3051 HIRAGANA LETTER KE
3228 : : UTF-8: 0xE3 0x81 0x91
3229 : : C octal escaped UTF-8: \343\201\221. */
3230 : : "\343\201\221"));
3231 : :
3232 : : /* 4-byte char: an emoji. */
3233 : 4 : ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
3234 : :
3235 : : /* Control codes, including the NUL byte. */
3236 : 4 : ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
3237 : :
3238 : 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
3239 : :
3240 : : /* Unexpected continuation bytes. */
3241 : 4 : for (unsigned char continuation_byte = 0x80;
3242 : 260 : continuation_byte <= 0xbf;
3243 : : continuation_byte++)
3244 : 256 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1));
3245 : :
3246 : : /* "Lonely start characters" for 2-byte sequences. */
3247 : 4 : {
3248 : 4 : unsigned char buf[2];
3249 : 4 : buf[1] = ' ';
3250 : 4 : for (buf[0] = 0xc0;
3251 : 132 : buf[0] <= 0xdf;
3252 : 128 : buf[0]++)
3253 : 128 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
3254 : : }
3255 : :
3256 : : /* "Lonely start characters" for 3-byte sequences. */
3257 : 4 : {
3258 : 4 : unsigned char buf[2];
3259 : 4 : buf[1] = ' ';
3260 : 4 : for (buf[0] = 0xe0;
3261 : 68 : buf[0] <= 0xef;
3262 : 64 : buf[0]++)
3263 : 64 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
3264 : : }
3265 : :
3266 : : /* "Lonely start characters" for 4-byte sequences. */
3267 : 4 : {
3268 : 4 : unsigned char buf[2];
3269 : 4 : buf[1] = ' ';
3270 : 4 : for (buf[0] = 0xf0;
3271 : 24 : buf[0] <= 0xf4;
3272 : 20 : buf[0]++)
3273 : 20 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
3274 : : }
3275 : :
3276 : : /* Invalid start characters (formerly valid for 5-byte and 6-byte
3277 : : sequences). */
3278 : 4 : {
3279 : 4 : unsigned char buf[2];
3280 : 4 : buf[1] = ' ';
3281 : 4 : for (buf[0] = 0xf5;
3282 : 40 : buf[0] <= 0xfd;
3283 : 36 : buf[0]++)
3284 : 36 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
3285 : : }
3286 : :
3287 : : /* Impossible bytes. */
3288 : 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
3289 : 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
3290 : 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
3291 : 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
3292 : 4 : }
3293 : :
3294 : : /* Run all of the selftests within this file. */
3295 : :
3296 : : void
3297 : 4 : input_cc_tests ()
3298 : : {
3299 : 4 : test_linenum_comparisons ();
3300 : 4 : test_should_have_column_data_p ();
3301 : 4 : test_unknown_location ();
3302 : 4 : test_builtins ();
3303 : 4 : for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3304 : :
3305 : 4 : for_each_line_table_case (test_accessing_ordinary_linemaps);
3306 : 4 : for_each_line_table_case (test_lexer);
3307 : 4 : for_each_line_table_case (test_lexer_string_locations_simple);
3308 : 4 : for_each_line_table_case (test_lexer_string_locations_ebcdic);
3309 : 4 : for_each_line_table_case (test_lexer_string_locations_hex);
3310 : 4 : for_each_line_table_case (test_lexer_string_locations_oct);
3311 : 4 : for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3312 : 4 : for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3313 : 4 : for_each_line_table_case (test_lexer_string_locations_ucn4);
3314 : 4 : for_each_line_table_case (test_lexer_string_locations_ucn8);
3315 : 4 : for_each_line_table_case (test_lexer_string_locations_wide_string);
3316 : 4 : for_each_line_table_case (test_lexer_string_locations_string16);
3317 : 4 : for_each_line_table_case (test_lexer_string_locations_string32);
3318 : 4 : for_each_line_table_case (test_lexer_string_locations_u8);
3319 : 4 : for_each_line_table_case (test_lexer_string_locations_utf8_source);
3320 : 4 : for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3321 : 4 : for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3322 : 4 : for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3323 : 4 : for_each_line_table_case (test_lexer_string_locations_macro);
3324 : 4 : for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3325 : 4 : for_each_line_table_case (test_lexer_string_locations_non_string);
3326 : 4 : for_each_line_table_case (test_lexer_string_locations_long_line);
3327 : 4 : for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3328 : 4 : for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3329 : 4 : for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3330 : 4 : for_each_line_table_case (test_lexer_char_constants);
3331 : :
3332 : 4 : test_line_offset_overflow ();
3333 : :
3334 : 4 : test_cpp_utf8 ();
3335 : 4 : test_cpp_valid_utf8_p ();
3336 : 4 : }
3337 : :
3338 : : } // namespace selftest
3339 : :
3340 : : #endif /* CHECKING_P */
|