Line data Source code
1 : /* Data and functions related to line maps and input files.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "intl.h"
24 : #include "diagnostic.h"
25 : #include "diagnostics/file-cache.h"
26 : #include "selftest.h"
27 : #include "cpplib.h"
28 :
29 : #ifndef HAVE_ICONV
30 : #define HAVE_ICONV 0
31 : #endif
32 :
33 : const char *
34 6865121 : special_fname_builtin ()
35 : {
36 6865121 : return _("<built-in>");
37 : }
38 :
39 : /* Current position in real source file. */
40 :
41 : location_t input_location = UNKNOWN_LOCATION;
42 :
43 : class line_maps *line_table;
44 :
45 : /* A stashed copy of "line_table" for use by selftest::line_table_test.
46 : This needs to be a global so that it can be a GC root, and thus
47 : prevent the stashed copy from being garbage-collected if the GC runs
48 : during a line_table_test. */
49 :
50 : class line_maps *saved_line_table;
51 :
52 : /* Expand the source location LOC into a human readable location. If
53 : LOC resolves to a builtin location, the file name of the readable
54 : location is set to the string "<built-in>". If EXPANSION_POINT_P is
55 : TRUE and LOC is virtual, then it is resolved to the expansion
56 : point of the involved macro. Otherwise, it is resolved to the
57 : spelling location of the token.
58 :
59 : When resolving to the spelling location of the token, if the
60 : resulting location is for a built-in location (that is, it has no
61 : associated line/column) in the context of a macro expansion, the
62 : returned location is the first one (while unwinding the macro
63 : location towards its expansion point) that is in real source
64 : code.
65 :
66 : ASPECT controls which part of the location to use. */
67 :
68 : static expanded_location
69 964438857 : expand_location_1 (const line_maps *set,
70 : location_t loc,
71 : bool expansion_point_p,
72 : enum location_aspect aspect)
73 : {
74 964438857 : expanded_location xloc;
75 964438857 : const line_map_ordinary *map;
76 964438857 : enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
77 964438857 : tree block = NULL;
78 :
79 964438857 : if (IS_ADHOC_LOC (loc))
80 : {
81 265118978 : block = LOCATION_BLOCK (loc);
82 265118978 : loc = LOCATION_LOCUS (loc);
83 : }
84 :
85 964438857 : memset (&xloc, 0, sizeof (xloc));
86 :
87 964438857 : if (loc >= RESERVED_LOCATION_COUNT)
88 : {
89 907979855 : if (!expansion_point_p)
90 : {
91 : /* We want to resolve LOC to its spelling location.
92 :
93 : But if that spelling location is a reserved location that
94 : appears in the context of a macro expansion (like for a
95 : location for a built-in token), let's consider the first
96 : location (toward the expansion point) that is not reserved;
97 : that is, the first location that is in real source code. */
98 2198310 : loc = linemap_unwind_to_first_non_reserved_loc (set,
99 : loc, NULL);
100 2198310 : lrk = LRK_SPELLING_LOCATION;
101 : }
102 907979855 : loc = linemap_resolve_location (set, loc, lrk, &map);
103 :
104 : /* loc is now either in an ordinary map, or is a reserved location.
105 : If it is a compound location, the caret is in a spelling location,
106 : but the start/finish might still be a virtual location.
107 : Depending of what the caller asked for, we may need to recurse
108 : one level in order to resolve any virtual locations in the
109 : end-points. */
110 907979855 : switch (aspect)
111 : {
112 0 : default:
113 0 : gcc_unreachable ();
114 : /* Fall through. */
115 : case location_aspect::caret:
116 : break;
117 428211 : case location_aspect::start:
118 428211 : {
119 428211 : location_t start = get_start (loc);
120 428211 : if (start != loc)
121 1179 : return expand_location_1 (set, start, expansion_point_p, aspect);
122 : }
123 : break;
124 100378 : case location_aspect::finish:
125 100378 : {
126 100378 : location_t finish = get_finish (loc);
127 100378 : if (finish != loc)
128 1133 : return expand_location_1 (set, finish, expansion_point_p, aspect);
129 : }
130 : break;
131 : }
132 907977543 : xloc = linemap_expand_location (set, map, loc);
133 : }
134 :
135 964436545 : xloc.data = block;
136 964436545 : if (loc <= BUILTINS_LOCATION)
137 56459002 : xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
138 :
139 964436545 : return xloc;
140 : }
141 :
142 : /* Return a NUL-terminated copy of the source text between two locations, or
143 : NULL if the arguments are invalid. The caller is responsible for freeing
144 : the return value. */
145 :
146 : char *
147 996 : get_source_text_between (diagnostics::file_cache &fc,
148 : location_t start, location_t end)
149 : {
150 996 : expanded_location expstart
151 996 : = expand_location_to_spelling_point (start, location_aspect::start);
152 996 : expanded_location expend
153 996 : = expand_location_to_spelling_point (end, location_aspect::finish);
154 :
155 : /* If the locations are in different files or the end comes before the
156 : start, give up and return nothing. */
157 996 : if (!expstart.file || !expend.file)
158 : return NULL;
159 995 : if (strcmp (expstart.file, expend.file) != 0)
160 : return NULL;
161 995 : if (expstart.line > expend.line)
162 : return NULL;
163 995 : if (expstart.line == expend.line
164 993 : && expstart.column > expend.column)
165 : return NULL;
166 : /* These aren't real column numbers, give up. */
167 995 : if (expstart.column == 0 || expend.column == 0)
168 : return NULL;
169 :
170 : /* For a single line we need to trim both edges. */
171 995 : if (expstart.line == expend.line)
172 : {
173 993 : diagnostics::char_span line
174 993 : = fc.get_source_line (expstart.file, expstart.line);
175 993 : if (line.length () < 1)
176 : return NULL;
177 993 : int s = expstart.column - 1;
178 993 : int len = expend.column - s;
179 993 : if (line.length () < (size_t)expend.column)
180 : return NULL;
181 993 : return line.subspan (s, len).xstrdup ();
182 : }
183 :
184 2 : struct obstack buf_obstack;
185 2 : obstack_init (&buf_obstack);
186 :
187 : /* Loop through all lines in the range and append each to buf; may trim
188 : parts of the start and end lines off depending on column values. */
189 22 : for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
190 : {
191 20 : diagnostics::char_span line = fc.get_source_line (expstart.file, lnum);
192 20 : if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
193 0 : continue;
194 :
195 : /* For the first line in the range, only start at expstart.column */
196 20 : if (lnum == expstart.line)
197 : {
198 2 : unsigned off = expstart.column - 1;
199 2 : if (line.length () < off)
200 0 : return NULL;
201 2 : line = line.subspan (off, line.length() - off);
202 : }
203 : /* For the last line, don't go past expend.column */
204 18 : else if (lnum == expend.line)
205 : {
206 2 : if (line.length () < (size_t)expend.column)
207 : return NULL;
208 2 : line = line.subspan (0, expend.column);
209 : }
210 :
211 : /* Combine spaces at the beginning of later lines. */
212 20 : if (lnum > expstart.line)
213 : {
214 : unsigned off;
215 230 : for (off = 0; off < line.length(); ++off)
216 230 : if (line[off] != ' ' && line[off] != '\t')
217 : break;
218 18 : if (off > 0)
219 : {
220 18 : obstack_1grow (&buf_obstack, ' ');
221 18 : line = line.subspan (off, line.length() - off);
222 : }
223 : }
224 :
225 : /* This does not include any trailing newlines. */
226 20 : obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
227 : }
228 :
229 : /* NUL-terminate and finish the buf obstack. */
230 2 : obstack_1grow (&buf_obstack, 0);
231 2 : const char *buf = (const char *) obstack_finish (&buf_obstack);
232 :
233 2 : return xstrdup (buf);
234 : }
235 :
236 : /* Test if the location originates from the spelling location of a
237 : builtin-tokens. That is, return TRUE if LOC is a (possibly
238 : virtual) location of a built-in token that appears in the expansion
239 : list of a macro. Please note that this function also works on
240 : tokens that result from built-in tokens. For instance, the
241 : function would return true if passed a token "4" that is the result
242 : of the expansion of the built-in __LINE__ macro. */
243 : bool
244 13259 : is_location_from_builtin_token (location_t loc)
245 : {
246 13259 : const line_map_ordinary *map = NULL;
247 13259 : loc = linemap_resolve_location (line_table, loc,
248 : LRK_SPELLING_LOCATION, &map);
249 13259 : return loc == BUILTINS_LOCATION;
250 : }
251 :
252 : /* Expand the source location LOC into a human readable location. If
253 : LOC is virtual, it resolves to the expansion point of the involved
254 : macro. If LOC resolves to a builtin location, the file name of the
255 : readable location is set to the string "<built-in>". */
256 :
257 : expanded_location
258 962236236 : expand_location (location_t loc)
259 : {
260 962236236 : return expand_location_1 (line_table, loc, /*expansion_point_p=*/true,
261 962236236 : location_aspect::caret);
262 : }
263 :
264 : /* Expand the source location LOC into a human readable location. If
265 : LOC is virtual, it resolves to the expansion location of the
266 : relevant macro. If LOC resolves to a builtin location, the file
267 : name of the readable location is set to the string
268 : "<built-in>". */
269 :
270 : expanded_location
271 84301 : expand_location_to_spelling_point (location_t loc,
272 : enum location_aspect aspect)
273 : {
274 84301 : return expand_location_1 (line_table, loc, /*expansion_point_p=*/false,
275 84301 : aspect);
276 : }
277 :
278 : /* The rich_location class within libcpp requires a way to expand
279 : location_t instances, and relies on the client code
280 : providing a symbol named
281 : linemap_client_expand_location_to_spelling_point
282 : to do this.
283 :
284 : This is the implementation for libcommon.a (all host binaries),
285 : which simply calls into expand_location_1. */
286 :
287 : expanded_location
288 2116008 : linemap_client_expand_location_to_spelling_point (const line_maps *set,
289 : location_t loc,
290 : enum location_aspect aspect)
291 : {
292 2116008 : return expand_location_1 (set, loc, /*expansion_point_p=*/false, aspect);
293 : }
294 :
295 :
296 : /* If LOCATION is in a system header and if it is a virtual location
297 : for a token coming from the expansion of a macro, unwind it to
298 : the location of the expansion point of the macro. If the expansion
299 : point is also in a system header return the original LOCATION.
300 : Otherwise, return the location of the expansion point.
301 :
302 : This is used for instance when we want to emit diagnostics about a
303 : token that may be located in a macro that is itself defined in a
304 : system header, for example, for the NULL macro. In such a case, if
305 : LOCATION were passed directly to diagnostic functions such as
306 : warning_at, the diagnostic would be suppressed (unless
307 : -Wsystem-headers). */
308 :
309 : location_t
310 501676296 : expansion_point_location_if_in_system_header (location_t location)
311 : {
312 501676296 : if (!in_system_header_at (location))
313 : return location;
314 :
315 371966894 : location_t xloc = linemap_resolve_location (line_table, location,
316 : LRK_MACRO_EXPANSION_POINT,
317 : NULL);
318 371966894 : return in_system_header_at (xloc) ? location : xloc;
319 : }
320 :
321 : /* If LOCATION is a virtual location for a token coming from the expansion
322 : of a macro, unwind to the location of the expansion point of the macro. */
323 :
324 : location_t
325 197 : expansion_point_location (location_t location)
326 : {
327 197 : return linemap_resolve_location (line_table, location,
328 197 : LRK_MACRO_EXPANSION_POINT, NULL);
329 : }
330 :
331 : /* Construct a location with caret at CARET, ranging from START to
332 : FINISH.
333 :
334 : For example, consider:
335 :
336 : 11111111112
337 : 12345678901234567890
338 : 522
339 : 523 return foo + bar;
340 : ~~~~^~~~~
341 : 524
342 :
343 : The location's caret is at the "+", line 523 column 15, but starts
344 : earlier, at the "f" of "foo" at column 11. The finish is at the "r"
345 : of "bar" at column 19. */
346 :
347 : location_t
348 2669691412 : make_location (location_t caret, location_t start, location_t finish)
349 : {
350 2669691412 : return line_table->make_location (caret, start, finish);
351 : }
352 :
353 : /* Same as above, but taking a source range rather than two locations. */
354 :
355 : location_t
356 1879040930 : make_location (location_t caret, source_range src_range)
357 : {
358 1879040930 : location_t pure_loc = get_pure_location (caret);
359 1879040930 : return line_table->get_or_create_combined_loc (pure_loc, src_range,
360 1879040930 : nullptr, 0);
361 : }
362 :
363 : /* An expanded_location stores the column in byte units. This function
364 : converts that column to display units. That requires reading the associated
365 : source line in order to calculate the display width. If that cannot be done
366 : for any reason, then returns the byte column as a fallback. */
367 : int
368 748683 : location_compute_display_column (diagnostics::file_cache &fc,
369 : expanded_location exploc,
370 : const cpp_char_column_policy &policy)
371 : {
372 748683 : if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
373 : return exploc.column;
374 713162 : diagnostics::char_span line = fc.get_source_line (exploc.file, exploc.line);
375 : /* If line is NULL, this function returns exploc.column which is the
376 : desired fallback. */
377 713162 : return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
378 713162 : exploc.column, policy);
379 : }
380 :
381 : /* Dump statistics to stderr about the memory usage of the line_table
382 : set of line maps. This also displays some statistics about macro
383 : expansion. */
384 :
385 : void
386 0 : dump_line_table_statistics (void)
387 : {
388 0 : struct linemap_stats s;
389 0 : long total_used_map_size,
390 : macro_maps_size,
391 : total_allocated_map_size;
392 :
393 0 : memset (&s, 0, sizeof (s));
394 :
395 0 : linemap_get_statistics (line_table, &s);
396 :
397 0 : macro_maps_size = s.macro_maps_used_size
398 0 : + s.macro_maps_locations_size;
399 :
400 0 : total_allocated_map_size = s.ordinary_maps_allocated_size
401 0 : + s.macro_maps_allocated_size
402 : + s.macro_maps_locations_size;
403 :
404 0 : total_used_map_size = s.ordinary_maps_used_size
405 0 : + s.macro_maps_used_size
406 : + s.macro_maps_locations_size;
407 :
408 0 : fprintf (stderr, "Number of expanded macros: %5ld\n",
409 : s.num_expanded_macros);
410 0 : if (s.num_expanded_macros != 0)
411 0 : fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
412 0 : s.num_macro_tokens / s.num_expanded_macros);
413 0 : fprintf (stderr,
414 : "\nLine Table allocations during the "
415 : "compilation process\n");
416 0 : fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
417 0 : SIZE_AMOUNT (s.num_ordinary_maps_used));
418 0 : fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
419 0 : SIZE_AMOUNT (s.ordinary_maps_used_size));
420 0 : fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
421 0 : SIZE_AMOUNT (s.num_ordinary_maps_allocated));
422 0 : fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
423 0 : SIZE_AMOUNT (s.ordinary_maps_allocated_size));
424 0 : fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
425 0 : SIZE_AMOUNT (s.num_macro_maps_used));
426 0 : fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
427 0 : SIZE_AMOUNT (s.macro_maps_used_size));
428 0 : fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
429 0 : SIZE_AMOUNT (s.macro_maps_locations_size));
430 0 : fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
431 0 : SIZE_AMOUNT (macro_maps_size));
432 0 : fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
433 0 : SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
434 0 : fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
435 0 : SIZE_AMOUNT (total_allocated_map_size));
436 0 : fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
437 0 : SIZE_AMOUNT (total_used_map_size));
438 0 : fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
439 0 : SIZE_AMOUNT (s.adhoc_table_size));
440 0 : fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
441 0 : SIZE_AMOUNT (s.adhoc_table_entries_used));
442 0 : fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
443 0 : SIZE_AMOUNT (line_table->m_num_optimized_ranges));
444 0 : fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
445 0 : SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
446 :
447 0 : fprintf (stderr, "\n");
448 0 : }
449 :
450 : /* Get location one beyond the final location in ordinary map IDX. */
451 :
452 : static location_t
453 6 : get_end_location (class line_maps *set, line_map_uint_t idx)
454 : {
455 6 : if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
456 1 : return set->highest_location;
457 :
458 5 : struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
459 5 : return MAP_START_LOCATION (next_map);
460 : }
461 :
462 : /* Helper function for write_digit_row. */
463 :
464 : static void
465 11500 : write_digit (FILE *stream, int digit)
466 : {
467 0 : fputc ('0' + digit, stream);
468 0 : }
469 :
470 : /* Helper function for dump_location_info.
471 : Write a row of numbers to STREAM, numbering a source line,
472 : giving the units, tens, hundreds etc of the column number. */
473 :
474 : static void
475 296 : write_digit_row (FILE *stream, int indent,
476 : const line_map_ordinary *map,
477 : location_t loc, int max_col, int divisor)
478 : {
479 296 : fprintf (stream, "%*c", indent, ' ');
480 296 : fprintf (stream, "|");
481 11796 : for (int column = 1; column < max_col; column++)
482 : {
483 11500 : location_t column_loc = loc + (location_t (column) << map->m_range_bits);
484 11500 : write_digit (stream, (column_loc / divisor) % 10);
485 : }
486 296 : fprintf (stream, "\n");
487 296 : }
488 :
489 : /* Write a half-closed (START) / half-open (END) interval of
490 : location_t to STREAM. */
491 :
492 : static void
493 12 : dump_location_range (FILE *stream,
494 : location_t start, location_t end)
495 : {
496 6 : fprintf (stream,
497 : " location_t interval: %llu <= loc < %llu\n",
498 : (unsigned long long) start, (unsigned long long) end);
499 0 : }
500 :
501 : /* Write a labelled description of a half-closed (START) / half-open (END)
502 : interval of location_t to STREAM. */
503 :
504 : static void
505 4 : dump_labelled_location_range (FILE *stream,
506 : const char *name,
507 : location_t start, location_t end)
508 : {
509 4 : fprintf (stream, "%s\n", name);
510 4 : dump_location_range (stream, start, end);
511 4 : fprintf (stream, "\n");
512 4 : }
513 :
514 : /* Write a visualization of the locations in the line_table to STREAM. */
515 :
516 : void
517 1 : dump_location_info (FILE *stream)
518 : {
519 1 : diagnostics::file_cache fc;
520 :
521 : /* Visualize the reserved locations. */
522 1 : dump_labelled_location_range (stream, "RESERVED LOCATIONS",
523 : 0, RESERVED_LOCATION_COUNT);
524 :
525 1 : using ULL = unsigned long long;
526 :
527 : /* Visualize the ordinary line_map instances, rendering the sources. */
528 7 : for (line_map_uint_t idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table);
529 : idx++)
530 : {
531 6 : location_t end_location = get_end_location (line_table, idx);
532 : /* half-closed: doesn't include this one. */
533 :
534 6 : const line_map_ordinary *map
535 6 : = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
536 6 : fprintf (stream, "ORDINARY MAP: %llu\n", (ULL) idx);
537 6 : dump_location_range (stream,
538 : MAP_START_LOCATION (map), end_location);
539 6 : fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
540 6 : fprintf (stream, " starting at line: %i\n",
541 : ORDINARY_MAP_STARTING_LINE_NUMBER (map));
542 6 : fprintf (stream, " column and range bits: %i\n",
543 6 : map->m_column_and_range_bits);
544 6 : fprintf (stream, " column bits: %i\n",
545 6 : map->m_column_and_range_bits - map->m_range_bits);
546 6 : fprintf (stream, " range bits: %i\n",
547 6 : map->m_range_bits);
548 6 : const char * reason;
549 6 : switch (map->reason) {
550 : case LC_ENTER:
551 : reason = "LC_ENTER";
552 : break;
553 1 : case LC_LEAVE:
554 1 : reason = "LC_LEAVE";
555 1 : break;
556 3 : case LC_RENAME:
557 3 : reason = "LC_RENAME";
558 3 : break;
559 0 : case LC_RENAME_VERBATIM:
560 0 : reason = "LC_RENAME_VERBATIM";
561 0 : break;
562 0 : case LC_ENTER_MACRO:
563 0 : reason = "LC_RENAME_MACRO";
564 0 : break;
565 0 : default:
566 0 : reason = "Unknown";
567 : }
568 6 : fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
569 :
570 6 : const line_map_ordinary *includer_map
571 6 : = linemap_included_from_linemap (line_table, map);
572 6 : fprintf (stream, " included from location: %llu",
573 6 : (ULL) linemap_included_from (map));
574 6 : if (includer_map) {
575 1 : fprintf (stream, " (in ordinary map %llu)",
576 1 : ULL (includer_map - line_table->info_ordinary.maps));
577 : }
578 6 : fprintf (stream, "\n");
579 :
580 : /* Render the span of source lines that this "map" covers. */
581 6 : for (location_t loc = MAP_START_LOCATION (map);
582 9358 : loc < end_location;
583 9352 : loc += (location_t (1) << map->m_range_bits))
584 : {
585 9356 : gcc_assert (pure_location_p (line_table, loc) );
586 :
587 9356 : expanded_location exploc
588 9356 : = linemap_expand_location (line_table, map, loc);
589 :
590 9356 : if (exploc.column == 0)
591 : {
592 : /* Beginning of a new source line: draw the line. */
593 :
594 78 : diagnostics::char_span line_text
595 78 : = fc.get_source_line (exploc.file, exploc.line);
596 78 : if (!line_text)
597 : break;
598 74 : fprintf (stream,
599 : "%s:%3i|loc:%5llu|%.*s\n",
600 : exploc.file, exploc.line,
601 : (ULL) loc,
602 74 : (int)line_text.length (), line_text.get_buffer ());
603 :
604 : /* "loc" is at column 0, which means "the whole line".
605 : Render the locations *within* the line, by underlining
606 : it, showing the location_t numeric values
607 : at each column. */
608 74 : auto max_col = (ULL (1) << map->m_column_and_range_bits) - 1;
609 74 : if (max_col > line_text.length ())
610 74 : max_col = line_text.length () + 1;
611 :
612 74 : int len_lnum = diagnostics::num_digits (exploc.line);
613 74 : if (len_lnum < 3)
614 : len_lnum = 3;
615 74 : int len_loc = diagnostics::num_digits (loc);
616 74 : if (len_loc < 5)
617 : len_loc = 5;
618 :
619 74 : int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
620 :
621 : /* Thousands. */
622 74 : if (end_location > 999)
623 74 : write_digit_row (stream, indent, map, loc, max_col, 1000);
624 :
625 : /* Hundreds. */
626 74 : if (end_location > 99)
627 74 : write_digit_row (stream, indent, map, loc, max_col, 100);
628 :
629 : /* Tens. */
630 74 : write_digit_row (stream, indent, map, loc, max_col, 10);
631 :
632 : /* Units. */
633 74 : write_digit_row (stream, indent, map, loc, max_col, 1);
634 : }
635 : }
636 6 : fprintf (stream, "\n");
637 : }
638 :
639 : /* Visualize unallocated values. */
640 1 : dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
641 : line_table->highest_location,
642 : LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
643 :
644 : /* Visualize the macro line_map instances, rendering the sources. */
645 3 : for (line_map_uint_t i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
646 : {
647 : /* Each macro map that is allocated owns location_t values
648 : that are *lower* that the one before them.
649 : Hence it's meaningful to view them either in order of ascending
650 : source locations, or in order of ascending macro map index. */
651 2 : const bool ascending_location_ts = true;
652 2 : auto idx = (ascending_location_ts
653 2 : ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
654 2 : : i);
655 2 : const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
656 2 : fprintf (stream, "MACRO %llu: %s (%u tokens)\n",
657 : (ULL) idx,
658 : linemap_map_get_macro_name (map),
659 : MACRO_MAP_NUM_MACRO_TOKENS (map));
660 4 : dump_location_range (stream,
661 2 : map->start_location,
662 2 : (map->start_location
663 2 : + MACRO_MAP_NUM_MACRO_TOKENS (map)));
664 2 : inform (map->get_expansion_point_location (),
665 : "expansion point is location %llu",
666 2 : (ULL) map->get_expansion_point_location ());
667 2 : fprintf (stream, " map->start_location: %llu\n",
668 2 : (ULL) map->start_location);
669 :
670 2 : fprintf (stream, " macro_locations:\n");
671 4 : for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
672 : {
673 2 : location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
674 2 : location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
675 :
676 : /* linemap_add_macro_token encodes token numbers in an expansion
677 : by putting them after MAP_START_LOCATION. */
678 :
679 : /* I'm typically seeing 4 uninitialized entries at the end of
680 : 0xafafafaf.
681 : This appears to be due to macro.cc:replace_args
682 : adding 2 extra args for padding tokens; presumably there may
683 : be a leading and/or trailing padding token injected,
684 : each for 2 more location slots.
685 : This would explain there being up to 4 location_ts slots
686 : that may be uninitialized. */
687 :
688 2 : fprintf (stream, " %u: %llu, %llu\n",
689 : i,
690 : (ULL) x,
691 : (ULL) y);
692 2 : if (x == y)
693 : {
694 2 : if (x < MAP_START_LOCATION (map))
695 2 : inform (x, "token %u has %<x-location == y-location == %llu%>",
696 : i, (ULL) x);
697 : else
698 0 : fprintf (stream,
699 : "x-location == y-location == %llu"
700 : " encodes token # %u\n",
701 : (ULL) x,
702 0 : (unsigned int)(x - MAP_START_LOCATION (map)));
703 : }
704 : else
705 : {
706 0 : inform (x, "token %u has %<x-location == %llu%>", i, (ULL) x);
707 0 : inform (x, "token %u has %<y-location == %llu%>", i, (ULL) y);
708 : }
709 : }
710 2 : fprintf (stream, "\n");
711 : }
712 :
713 : /* It appears that MAX_LOCATION_T itself is never assigned to a
714 : macro map, presumably due to an off-by-one error somewhere
715 : between the logic in linemap_enter_macro and
716 : LINEMAPS_MACRO_LOWEST_LOCATION. */
717 1 : dump_labelled_location_range (stream, "MAX_LOCATION_T",
718 : MAX_LOCATION_T,
719 : MAX_LOCATION_T + 1);
720 :
721 : /* Visualize ad-hoc values. */
722 1 : dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
723 : MAX_LOCATION_T + 1, location_t (-1));
724 1 : }
725 :
726 : /* string_concat's constructor. */
727 :
728 3168362 : string_concat::string_concat (int num, location_t *locs)
729 3168362 : : m_num (num)
730 : {
731 3168362 : m_locs = ggc_vec_alloc <location_t> (num);
732 37162920 : for (int i = 0; i < num; i++)
733 33994558 : m_locs[i] = locs[i];
734 3168362 : }
735 :
736 : /* string_concat_db's constructor. */
737 :
738 210762 : string_concat_db::string_concat_db ()
739 : {
740 210762 : m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
741 210762 : }
742 :
743 : /* Record that a string concatenation occurred, covering NUM
744 : string literal tokens. LOCS is an array of size NUM, containing the
745 : locations of the tokens. A copy of LOCS is taken. */
746 :
747 : void
748 3168368 : string_concat_db::record_string_concatenation (int num, location_t *locs)
749 : {
750 3168368 : gcc_assert (num > 1);
751 3168368 : gcc_assert (locs);
752 :
753 3168368 : location_t key_loc = get_key_loc (locs[0]);
754 : /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
755 : any data now recorded under key 'key_loc' would be overwritten by a
756 : subsequent call with the same key 'key_loc'. */
757 3168368 : if (RESERVED_LOCATION_P (key_loc))
758 6 : return;
759 :
760 3168362 : string_concat *concat
761 3168362 : = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
762 3168362 : m_table->put (key_loc, concat);
763 : }
764 :
765 : /* Determine if LOC was the location of the initial token of a
766 : concatenation of string literal tokens.
767 : If so, *OUT_NUM is written to with the number of tokens, and
768 : *OUT_LOCS with the location of an array of locations of the
769 : tokens, and return true. *OUT_LOCS is a borrowed pointer to
770 : storage owned by the string_concat_db.
771 : Otherwise, return false. */
772 :
773 : bool
774 34529 : string_concat_db::get_string_concatenation (location_t loc,
775 : int *out_num,
776 : location_t **out_locs)
777 : {
778 34529 : gcc_assert (out_num);
779 34529 : gcc_assert (out_locs);
780 :
781 34529 : location_t key_loc = get_key_loc (loc);
782 : /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
783 : discussion in 'string_concat_db::record_string_concatenation'. */
784 34529 : if (RESERVED_LOCATION_P (key_loc))
785 : return false;
786 :
787 34527 : string_concat **concat = m_table->get (key_loc);
788 34527 : if (!concat)
789 : return false;
790 :
791 4352 : *out_num = (*concat)->m_num;
792 4352 : *out_locs =(*concat)->m_locs;
793 4352 : return true;
794 : }
795 :
796 : /* Internal function. Canonicalize LOC into a form suitable for
797 : use as a key within the database, stripping away macro expansion,
798 : ad-hoc information, and range information, using the location of
799 : the start of LOC within an ordinary linemap. */
800 :
801 : location_t
802 3202897 : string_concat_db::get_key_loc (location_t loc)
803 : {
804 3202897 : loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
805 : NULL);
806 :
807 3202897 : loc = get_range_from_loc (line_table, loc).m_start;
808 :
809 3202897 : return loc;
810 : }
811 :
812 : /* Helper class for use within get_substring_ranges_for_loc.
813 : An vec of cpp_string with responsibility for releasing all of the
814 : str->text for each str in the vector. */
815 :
816 : class auto_cpp_string_vec : public auto_vec <cpp_string>
817 : {
818 : public:
819 34529 : auto_cpp_string_vec (int alloc)
820 69058 : : auto_vec <cpp_string> (alloc) {}
821 :
822 34529 : ~auto_cpp_string_vec ()
823 : {
824 : /* Clean up the copies within this vec. */
825 34529 : int i;
826 34529 : cpp_string *str;
827 70016 : FOR_EACH_VEC_ELT (*this, i, str)
828 35487 : free (const_cast <unsigned char *> (str->text));
829 34529 : }
830 : };
831 :
832 : /* Attempt to populate RANGES with source location information on the
833 : individual characters within the string literal found at STRLOC.
834 : If CONCATS is non-NULL, then any string literals that the token at
835 : STRLOC was concatenated with are also added to RANGES.
836 :
837 : Return NULL if successful, or an error message if any errors occurred (in
838 : which case RANGES may be only partially populated and should not
839 : be used).
840 :
841 : This is implemented by re-parsing the relevant source line(s). */
842 :
843 : static const char *
844 36775 : get_substring_ranges_for_loc (cpp_reader *pfile,
845 : diagnostics::file_cache &fc,
846 : string_concat_db *concats,
847 : location_t strloc,
848 : enum cpp_ttype type,
849 : cpp_substring_ranges &ranges)
850 : {
851 36775 : gcc_assert (pfile);
852 :
853 36775 : if (strloc == UNKNOWN_LOCATION)
854 : return "unknown location";
855 :
856 : /* Reparsing the strings requires accurate location information.
857 : If -ftrack-macro-expansion has been overridden from its default
858 : of 2, then we might have a location of a macro expansion point,
859 : rather than the location of the literal itself.
860 : Avoid this by requiring that we have full macro expansion tracking
861 : for substring locations to be available. */
862 36775 : if (cpp_get_options (pfile)->track_macro_expansion != 2)
863 : return "track_macro_expansion != 2";
864 :
865 : /* If #line or # 44 "file"-style directives are present, then there's
866 : no guarantee that the line numbers we have can be used to locate
867 : the strings. For example, we might have a .i file with # directives
868 : pointing back to lines within a .c file, but the .c file might
869 : have been edited since the .i file was created.
870 : In such a case, the safest course is to disable on-demand substring
871 : locations. */
872 34532 : if (line_table->seen_line_directive)
873 : return "seen line directive";
874 :
875 : /* If string concatenation has occurred at STRLOC, get the locations
876 : of all of the literal tokens making up the compound string.
877 : Otherwise, just use STRLOC. */
878 34529 : int num_locs = 1;
879 34529 : location_t *strlocs = &strloc;
880 34529 : if (concats)
881 34529 : concats->get_string_concatenation (strloc, &num_locs, &strlocs);
882 :
883 34529 : auto_cpp_string_vec strs (num_locs);
884 34529 : auto_vec <cpp_string_location_reader> loc_readers (num_locs);
885 70009 : for (int i = 0; i < num_locs; i++)
886 : {
887 : /* Get range of strloc. We will use it to locate the start and finish
888 : of the literal token within the line. */
889 41574 : source_range src_range = get_range_from_loc (line_table, strlocs[i]);
890 :
891 41574 : if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
892 : {
893 : /* If the string token was within a macro expansion, then we can
894 : cope with it for the simple case where we have a single token.
895 : Otherwise, bail out. */
896 1171 : if (src_range.m_start != src_range.m_finish)
897 6094 : return "macro expansion";
898 : }
899 : else
900 : {
901 40403 : if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
902 : /* If so, we can't reliably determine where the token started within
903 : its line. */
904 : return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
905 :
906 34731 : if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
907 : /* If so, we can't reliably determine where the token finished
908 : within its line. */
909 : return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
910 : }
911 :
912 35731 : expanded_location start
913 35731 : = expand_location_to_spelling_point (src_range.m_start,
914 : location_aspect::start);
915 35731 : expanded_location finish
916 35731 : = expand_location_to_spelling_point (src_range.m_finish,
917 : location_aspect::finish);
918 35731 : if (start.file != finish.file)
919 : return "range endpoints are in different files";
920 35731 : if (start.line != finish.line)
921 : return "range endpoints are on different lines";
922 35488 : if (start.column > finish.column)
923 : return "range endpoints are reversed";
924 :
925 35488 : diagnostics::char_span line = fc.get_source_line (start.file, start.line);
926 35488 : if (!line)
927 : return "unable to read source line";
928 :
929 : /* Determine the location of the literal (including quotes
930 : and leading prefix chars, such as the 'u' in a u""
931 : token). */
932 35488 : size_t literal_length = finish.column - start.column + 1;
933 :
934 : /* Ensure that we don't crash if we got the wrong location. */
935 35488 : if (start.column < 1)
936 : return "zero start column";
937 35488 : if (line.length () < (start.column - 1 + literal_length))
938 : return "line is not wide enough";
939 :
940 35487 : diagnostics::char_span literal
941 35487 : = line.subspan (start.column - 1, literal_length);
942 :
943 35487 : cpp_string from;
944 35487 : from.len = literal_length;
945 : /* Make a copy of the literal, to avoid having to rely on
946 : the lifetime of the copy of the line within the cache.
947 : This will be released by the auto_cpp_string_vec dtor. */
948 35487 : from.text = (unsigned char *)literal.xstrdup ();
949 35487 : strs.safe_push (from);
950 :
951 : /* For very long lines, a new linemap could have started
952 : halfway through the token.
953 : Ensure that the loc_reader uses the linemap of the
954 : *end* of the token for its start location. */
955 35487 : const line_map_ordinary *start_ord_map;
956 35487 : linemap_resolve_location (line_table, src_range.m_start,
957 : LRK_SPELLING_LOCATION, &start_ord_map);
958 35487 : const line_map_ordinary *final_ord_map;
959 35487 : linemap_resolve_location (line_table, src_range.m_finish,
960 : LRK_SPELLING_LOCATION, &final_ord_map);
961 35487 : if (start_ord_map == NULL || final_ord_map == NULL)
962 : return "failed to get ordinary maps";
963 : /* Bulletproofing. We ought to only have different ordinary maps
964 : for start vs finish due to line-length jumps. */
965 35486 : if (start_ord_map != final_ord_map
966 6865 : && start_ord_map->to_file != final_ord_map->to_file)
967 : return "start and finish are spelled in different ordinary maps";
968 : /* The file from linemap_resolve_location ought to match that from
969 : expand_location_to_spelling_point. */
970 35486 : if (start_ord_map->to_file != start.file)
971 : return "mismatching file after resolving linemap";
972 :
973 35480 : location_t start_loc
974 35480 : = linemap_position_for_line_and_column (line_table, final_ord_map,
975 : start.line, start.column);
976 :
977 35480 : cpp_string_location_reader loc_reader (start_loc, line_table);
978 35480 : loc_readers.safe_push (loc_reader);
979 : }
980 :
981 : /* Rerun cpp_interpret_string, or rather, a modified version of it. */
982 56870 : const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
983 : loc_readers.address (),
984 : num_locs, &ranges, type);
985 28435 : if (err)
986 : return err;
987 :
988 : /* Success: "ranges" should now contain information on the string. */
989 : return NULL;
990 34529 : }
991 :
992 : /* Attempt to populate *OUT_LOC with source location information on the
993 : given characters within the string literal found at STRLOC.
994 : CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
995 : character set.
996 :
997 : For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
998 : and string literal "012345\n789"
999 : *OUT_LOC is written to with:
1000 : "012345\n789"
1001 : ~^~~~~
1002 :
1003 : If CONCATS is non-NULL, then any string literals that the token at
1004 : STRLOC was concatenated with are also considered.
1005 :
1006 : This is implemented by re-parsing the relevant source line(s).
1007 :
1008 : Return NULL if successful, or an error message if any errors occurred.
1009 : Error messages are intended for GCC developers (to help debugging) rather
1010 : than for end-users. */
1011 :
1012 : const char *
1013 11123 : get_location_within_string (cpp_reader *pfile,
1014 : diagnostics::file_cache &fc,
1015 : string_concat_db *concats,
1016 : location_t strloc,
1017 : enum cpp_ttype type,
1018 : int caret_idx, int start_idx, int end_idx,
1019 : location_t *out_loc)
1020 : {
1021 11123 : gcc_checking_assert (caret_idx >= 0);
1022 11123 : gcc_checking_assert (start_idx >= 0);
1023 11123 : gcc_checking_assert (end_idx >= 0);
1024 11123 : gcc_assert (out_loc);
1025 :
1026 11123 : cpp_substring_ranges ranges;
1027 11123 : const char *err
1028 11123 : = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1029 11123 : if (err)
1030 : return err;
1031 :
1032 8414 : if (caret_idx >= ranges.get_num_ranges ())
1033 : return "caret_idx out of range";
1034 8414 : if (start_idx >= ranges.get_num_ranges ())
1035 : return "start_idx out of range";
1036 8414 : if (end_idx >= ranges.get_num_ranges ())
1037 : return "end_idx out of range";
1038 :
1039 8414 : *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1040 8414 : ranges.get_range (start_idx).m_start,
1041 8414 : ranges.get_range (end_idx).m_finish);
1042 8414 : return NULL;
1043 11123 : }
1044 :
1045 : /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1046 :
1047 : location_t
1048 56196149 : location_with_discriminator (location_t locus, int discriminator)
1049 : {
1050 56196149 : tree block = LOCATION_BLOCK (locus);
1051 56196149 : source_range src_range = get_range_from_loc (line_table, locus);
1052 56196149 : locus = get_pure_location (locus);
1053 :
1054 56196149 : if (locus == UNKNOWN_LOCATION)
1055 : return locus;
1056 :
1057 31932914 : return line_table->get_or_create_combined_loc (locus, src_range, block,
1058 31932914 : discriminator);
1059 : }
1060 :
1061 : /* Return TRUE if LOCUS represents a location with a discriminator. */
1062 :
1063 : bool
1064 77877468 : has_discriminator (location_t locus)
1065 : {
1066 77877468 : return get_discriminator_from_loc (locus) != 0;
1067 : }
1068 :
1069 : /* Return the discriminator for LOCUS. */
1070 :
1071 : int
1072 404179215 : get_discriminator_from_loc (location_t locus)
1073 : {
1074 404179215 : return get_discriminator_from_loc (line_table, locus);
1075 : }
1076 :
1077 : /* Create a location with hierarchical discriminator components. */
1078 :
1079 : location_t
1080 3475088 : location_with_discriminator_components (location_t locus,
1081 : const discriminator_components &comp)
1082 : {
1083 3475088 : gcc_assert (comp.base <= DISCR_BASE_MAX);
1084 3475088 : gcc_assert (comp.multiplicity <= DISCR_MULTIPLICITY_MAX);
1085 3475088 : gcc_assert (comp.copyid <= DISCR_COPYID_MAX);
1086 3475088 : unsigned int discriminator = (comp.base << DISCR_BASE_SHIFT)
1087 3475088 : | (comp.multiplicity << DISCR_MULTIPLICITY_SHIFT)
1088 3475088 : | (comp.copyid << DISCR_COPYID_SHIFT);
1089 3475088 : return location_with_discriminator (locus, discriminator);
1090 : }
1091 :
1092 : /* Get hierarchical discriminator components from a location. */
1093 :
1094 : discriminator_components
1095 3475088 : get_discriminator_components_from_loc (location_t locus)
1096 : {
1097 3475088 : unsigned int discriminator = get_discriminator_from_loc (locus);
1098 3475088 : discriminator_components comp;
1099 3475088 : comp.base = discriminator & DISCR_BASE_MASK;
1100 3475088 : comp.multiplicity = (discriminator >> DISCR_MULTIPLICITY_SHIFT)
1101 3475088 : & DISCR_MULTIPLICITY_MASK;
1102 3475088 : comp.copyid = (discriminator >> DISCR_COPYID_SHIFT) & DISCR_COPYID_MASK;
1103 3475088 : return comp;
1104 : }
1105 :
1106 : #if CHECKING_P
1107 :
1108 : namespace selftest {
1109 :
1110 : /* Selftests of location handling. */
1111 :
1112 : /* Attempt to populate *OUT_RANGE with source location information on the
1113 : given character within the string literal found at STRLOC.
1114 : CHAR_IDX refers to an offset within the execution character set.
1115 : If CONCATS is non-NULL, then any string literals that the token at
1116 : STRLOC was concatenated with are also considered.
1117 :
1118 : This is implemented by re-parsing the relevant source line(s).
1119 :
1120 : Return NULL if successful, or an error message if any errors occurred.
1121 : Error messages are intended for GCC developers (to help debugging) rather
1122 : than for end-users. */
1123 :
1124 : static const char *
1125 23748 : get_source_range_for_char (cpp_reader *pfile,
1126 : diagnostics::file_cache &fc,
1127 : string_concat_db *concats,
1128 : location_t strloc,
1129 : enum cpp_ttype type,
1130 : int char_idx,
1131 : source_range *out_range)
1132 : {
1133 23748 : gcc_checking_assert (char_idx >= 0);
1134 23748 : gcc_assert (out_range);
1135 :
1136 23748 : cpp_substring_ranges ranges;
1137 23748 : const char *err
1138 23748 : = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1139 23748 : if (err)
1140 : return err;
1141 :
1142 18652 : if (char_idx >= ranges.get_num_ranges ())
1143 : return "char_idx out of range";
1144 :
1145 18652 : *out_range = ranges.get_range (char_idx);
1146 18652 : return NULL;
1147 23748 : }
1148 :
1149 : /* As get_source_range_for_char, but write to *OUT the number
1150 : of ranges that are available. */
1151 :
1152 : static const char *
1153 1268 : get_num_source_ranges_for_substring (cpp_reader *pfile,
1154 : diagnostics::file_cache &fc,
1155 : string_concat_db *concats,
1156 : location_t strloc,
1157 : enum cpp_ttype type,
1158 : int *out)
1159 : {
1160 1268 : gcc_assert (out);
1161 :
1162 1268 : cpp_substring_ranges ranges;
1163 1268 : const char *err
1164 1268 : = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1165 :
1166 1268 : if (err)
1167 : return err;
1168 :
1169 884 : *out = ranges.get_num_ranges ();
1170 884 : return NULL;
1171 1268 : }
1172 :
1173 : /* Selftests of location handling. */
1174 :
1175 : /* Verify that compare() on linenum_type handles comparisons over the full
1176 : range of the type. */
1177 :
1178 : static void
1179 4 : test_linenum_comparisons ()
1180 : {
1181 4 : linenum_type min_line (0);
1182 4 : linenum_type max_line (0xffffffff);
1183 4 : ASSERT_EQ (0, compare (min_line, min_line));
1184 4 : ASSERT_EQ (0, compare (max_line, max_line));
1185 :
1186 4 : ASSERT_GT (compare (max_line, min_line), 0);
1187 4 : ASSERT_LT (compare (min_line, max_line), 0);
1188 4 : }
1189 :
1190 : /* Helper function for verifying location data: when location_t
1191 : values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1192 : as having column 0. */
1193 :
1194 : static bool
1195 65136 : should_have_column_data_p (location_t loc)
1196 : {
1197 65136 : if (IS_ADHOC_LOC (loc))
1198 20240 : loc = get_location_from_adhoc_loc (line_table, loc);
1199 65136 : if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1200 6452 : return false;
1201 : return true;
1202 : }
1203 :
1204 : /* Selftest for should_have_column_data_p. */
1205 :
1206 : static void
1207 4 : test_should_have_column_data_p ()
1208 : {
1209 4 : ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1210 4 : ASSERT_TRUE
1211 : (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1212 4 : ASSERT_FALSE
1213 : (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1214 4 : }
1215 :
1216 : /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1217 : on LOC. */
1218 :
1219 : static void
1220 1068 : assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1221 : location_t loc)
1222 : {
1223 1068 : ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1224 1068 : ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1225 : /* If location_t values are sufficiently high, then column numbers
1226 : will be unavailable and LOCATION_COLUMN (loc) will be 0.
1227 : When close to the threshold, column numbers *may* be present: if
1228 : the final linemap before the threshold contains a line that straddles
1229 : the threshold, locations in that line have column information. */
1230 1068 : if (should_have_column_data_p (loc))
1231 660 : ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1232 1068 : }
1233 :
1234 : /* Various selftests involve constructing a line table and one or more
1235 : line maps within it.
1236 :
1237 : For maximum test coverage we want to run these tests with a variety
1238 : of situations:
1239 : - line_table->default_range_bits: some frontends use a non-zero value
1240 : and others use zero
1241 : - the fallback modes within line-map.cc: there are various threshold
1242 : values for location_t beyond line-map.cc changes
1243 : behavior (disabling of the range-packing optimization, disabling
1244 : of column-tracking). We can exercise these by starting the line_table
1245 : at interesting values at or near these thresholds.
1246 :
1247 : The following struct describes a particular case within our test
1248 : matrix. */
1249 :
1250 : class line_table_case
1251 : {
1252 : public:
1253 5860 : line_table_case (int default_range_bits, location_t base_location)
1254 5860 : : m_default_range_bits (default_range_bits),
1255 5860 : m_base_location (base_location)
1256 : {}
1257 :
1258 : int m_default_range_bits;
1259 : location_t m_base_location;
1260 : };
1261 :
1262 : /* Constructor. Store the old value of line_table, and create a new
1263 : one, using sane defaults. */
1264 :
1265 21 : line_table_test::line_table_test ()
1266 : {
1267 21 : gcc_assert (saved_line_table == NULL);
1268 21 : saved_line_table = line_table;
1269 21 : line_table = ggc_alloc<line_maps> ();
1270 21 : linemap_init (line_table, BUILTINS_LOCATION);
1271 21 : gcc_assert (saved_line_table->m_reallocator);
1272 21 : line_table->m_reallocator = saved_line_table->m_reallocator;
1273 21 : gcc_assert (saved_line_table->m_round_alloc_size);
1274 21 : line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
1275 21 : line_table->default_range_bits = 0;
1276 21 : }
1277 :
1278 : /* Constructor. Store the old value of line_table, and create a new
1279 : one, using the sitation described in CASE_. */
1280 :
1281 6724 : line_table_test::line_table_test (const line_table_case &case_)
1282 : {
1283 6724 : gcc_assert (saved_line_table == NULL);
1284 6724 : saved_line_table = line_table;
1285 6724 : line_table = ggc_alloc<line_maps> ();
1286 6724 : linemap_init (line_table, BUILTINS_LOCATION);
1287 6724 : gcc_assert (saved_line_table->m_reallocator);
1288 6724 : line_table->m_reallocator = saved_line_table->m_reallocator;
1289 6724 : gcc_assert (saved_line_table->m_round_alloc_size);
1290 6724 : line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
1291 6724 : line_table->default_range_bits = case_.m_default_range_bits;
1292 6724 : if (case_.m_base_location)
1293 : {
1294 6160 : line_table->highest_location = case_.m_base_location;
1295 6160 : line_table->highest_line = case_.m_base_location;
1296 : }
1297 6724 : }
1298 :
1299 : /* Destructor. Restore the old value of line_table. */
1300 :
1301 6745 : line_table_test::~line_table_test ()
1302 : {
1303 6745 : gcc_assert (saved_line_table != NULL);
1304 6745 : line_table = saved_line_table;
1305 6745 : saved_line_table = NULL;
1306 6745 : }
1307 :
1308 : /* Verify basic operation of ordinary linemaps. */
1309 :
1310 : static void
1311 96 : test_accessing_ordinary_linemaps (const line_table_case &case_)
1312 : {
1313 96 : line_table_test ltt (case_);
1314 :
1315 : /* Build a simple linemap describing some locations. */
1316 96 : linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1317 :
1318 96 : linemap_line_start (line_table, 1, 100);
1319 96 : location_t loc_a = linemap_position_for_column (line_table, 1);
1320 96 : location_t loc_b = linemap_position_for_column (line_table, 23);
1321 :
1322 96 : linemap_line_start (line_table, 2, 100);
1323 96 : location_t loc_c = linemap_position_for_column (line_table, 1);
1324 96 : location_t loc_d = linemap_position_for_column (line_table, 17);
1325 :
1326 : /* Example of a very long line. */
1327 96 : linemap_line_start (line_table, 3, 2000);
1328 96 : location_t loc_e = linemap_position_for_column (line_table, 700);
1329 :
1330 : /* Transitioning back to a short line. */
1331 96 : linemap_line_start (line_table, 4, 0);
1332 96 : location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1333 :
1334 96 : if (should_have_column_data_p (loc_back_to_short))
1335 : {
1336 : /* Verify that we switched to short lines in the linemap. */
1337 56 : line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1338 56 : ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1339 : }
1340 :
1341 : /* Example of a line that will eventually be seen to be longer
1342 : than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1343 : below that. */
1344 96 : linemap_line_start (line_table, 5, 2000);
1345 :
1346 96 : location_t loc_start_of_very_long_line
1347 96 : = linemap_position_for_column (line_table, 2000);
1348 96 : location_t loc_too_wide
1349 96 : = linemap_position_for_column (line_table, LINE_MAP_MAX_COLUMN_NUMBER + 1);
1350 96 : location_t loc_too_wide_2
1351 96 : = linemap_position_for_column (line_table, LINE_MAP_MAX_COLUMN_NUMBER + 2);
1352 :
1353 : /* ...and back to a sane line length. */
1354 96 : linemap_line_start (line_table, 6, 100);
1355 96 : location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1356 :
1357 96 : linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1358 :
1359 : /* Multiple files. */
1360 96 : linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1361 96 : linemap_line_start (line_table, 1, 200);
1362 96 : location_t loc_f = linemap_position_for_column (line_table, 150);
1363 96 : linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1364 :
1365 : /* Verify that we can recover the location info. */
1366 96 : assert_loceq ("foo.c", 1, 1, loc_a);
1367 96 : assert_loceq ("foo.c", 1, 23, loc_b);
1368 96 : assert_loceq ("foo.c", 2, 1, loc_c);
1369 96 : assert_loceq ("foo.c", 2, 17, loc_d);
1370 96 : assert_loceq ("foo.c", 3, 700, loc_e);
1371 96 : assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1372 :
1373 : /* In the very wide line, the initial location should be fully tracked. */
1374 96 : assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1375 : /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1376 : be disabled. */
1377 96 : assert_loceq ("foo.c", 5, 0, loc_too_wide);
1378 96 : assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1379 : /*...and column-tracking should be re-enabled for subsequent lines. */
1380 96 : assert_loceq ("foo.c", 6, 10, loc_sane_again);
1381 :
1382 96 : assert_loceq ("bar.c", 1, 150, loc_f);
1383 :
1384 96 : ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1385 96 : ASSERT_TRUE (pure_location_p (line_table, loc_a));
1386 :
1387 : /* Verify using make_location to build a range, and extracting data
1388 : back from it. */
1389 96 : location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1390 96 : ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1391 96 : ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1392 96 : source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1393 96 : ASSERT_EQ (loc_b, src_range.m_start);
1394 96 : ASSERT_EQ (loc_d, src_range.m_finish);
1395 96 : }
1396 :
1397 : /* Verify various properties of UNKNOWN_LOCATION. */
1398 :
1399 : static void
1400 4 : test_unknown_location ()
1401 : {
1402 4 : ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1403 4 : ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1404 4 : ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1405 4 : }
1406 :
1407 : /* Verify various properties of BUILTINS_LOCATION. */
1408 :
1409 : static void
1410 4 : test_builtins ()
1411 : {
1412 4 : assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
1413 4 : ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1414 4 : }
1415 :
1416 : /* Regression test for make_location.
1417 : Ensure that we use pure locations for the start/finish of the range,
1418 : rather than storing a packed or ad-hoc range as the start/finish. */
1419 :
1420 : static void
1421 96 : test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1422 : {
1423 : /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1424 : with C++ frontend.
1425 : ....................0000000001111111111222.
1426 : ....................1234567890123456789012. */
1427 96 : const char *content = " r += !aaa == bbb;\n";
1428 96 : temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1429 96 : line_table_test ltt (case_);
1430 96 : linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1431 :
1432 96 : const location_t c11 = linemap_position_for_column (line_table, 11);
1433 96 : const location_t c12 = linemap_position_for_column (line_table, 12);
1434 96 : const location_t c13 = linemap_position_for_column (line_table, 13);
1435 96 : const location_t c14 = linemap_position_for_column (line_table, 14);
1436 96 : const location_t c21 = linemap_position_for_column (line_table, 21);
1437 :
1438 96 : if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1439 32 : return;
1440 :
1441 : /* Use column 13 for the caret location, arbitrarily, to verify that we
1442 : handle start != caret. */
1443 64 : const location_t aaa = make_location (c13, c12, c14);
1444 64 : ASSERT_EQ (c13, get_pure_location (aaa));
1445 64 : ASSERT_EQ (c12, get_start (aaa));
1446 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1447 64 : ASSERT_EQ (c14, get_finish (aaa));
1448 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1449 :
1450 : /* Make a location using a location with a range as the start-point. */
1451 64 : const location_t not_aaa = make_location (c11, aaa, c14);
1452 64 : ASSERT_EQ (c11, get_pure_location (not_aaa));
1453 : /* It should use the start location of the range, not store the range
1454 : itself. */
1455 64 : ASSERT_EQ (c12, get_start (not_aaa));
1456 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1457 64 : ASSERT_EQ (c14, get_finish (not_aaa));
1458 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1459 :
1460 : /* Similarly, make a location with a range as the end-point. */
1461 64 : const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1462 64 : ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1463 64 : ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1464 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1465 64 : ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1466 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1467 64 : const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1468 : /* It should use the finish location of the range, not store the range
1469 : itself. */
1470 64 : ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1471 64 : ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1472 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1473 64 : ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1474 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1475 96 : }
1476 :
1477 : /* Tests of lexing. */
1478 :
1479 : /* Verify that token TOK from PARSER has cpp_token_as_text
1480 : equal to EXPECTED_TEXT. */
1481 :
1482 : #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
1483 : SELFTEST_BEGIN_STMT \
1484 : unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
1485 : ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
1486 : SELFTEST_END_STMT
1487 :
1488 : /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1489 : and ranges from EXP_START_COL to EXP_FINISH_COL.
1490 : Use LOC as the effective location of the selftest. */
1491 :
1492 : static void
1493 576 : assert_token_loc_eq (const location &loc,
1494 : const cpp_token *tok,
1495 : const char *exp_filename, int exp_linenum,
1496 : int exp_start_col, int exp_finish_col)
1497 : {
1498 576 : location_t tok_loc = tok->src_loc;
1499 576 : ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1500 576 : ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1501 :
1502 : /* If location_t values are sufficiently high, then column numbers
1503 : will be unavailable. */
1504 576 : if (!should_have_column_data_p (tok_loc))
1505 196 : return;
1506 :
1507 380 : ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1508 380 : source_range tok_range = get_range_from_loc (line_table, tok_loc);
1509 380 : ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1510 380 : ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1511 : }
1512 :
1513 : /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1514 : SELFTEST_LOCATION as the effective location of the selftest. */
1515 :
1516 : #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1517 : EXP_START_COL, EXP_FINISH_COL) \
1518 : assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1519 : (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1520 :
1521 : /* Test of lexing a file using libcpp, verifying tokens and their
1522 : location information. */
1523 :
1524 : static void
1525 96 : test_lexer (const line_table_case &case_)
1526 : {
1527 : /* Create a tempfile and write some text to it. */
1528 96 : const char *content =
1529 : /*00000000011111111112222222222333333.3333444444444.455555555556
1530 : 12345678901234567890123456789012345.6789012345678.901234567890. */
1531 : ("test_name /* c-style comment */\n"
1532 : " \"test literal\"\n"
1533 : " // test c++-style comment\n"
1534 : " 42\n");
1535 96 : temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1536 :
1537 96 : line_table_test ltt (case_);
1538 :
1539 96 : cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1540 :
1541 96 : const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1542 96 : ASSERT_NE (fname, NULL);
1543 :
1544 : /* Verify that we get the expected tokens back, with the correct
1545 : location information. */
1546 :
1547 96 : location_t loc;
1548 96 : const cpp_token *tok;
1549 96 : tok = cpp_get_token_with_location (parser, &loc);
1550 96 : ASSERT_NE (tok, NULL);
1551 96 : ASSERT_EQ (tok->type, CPP_NAME);
1552 96 : ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1553 96 : ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1554 :
1555 96 : tok = cpp_get_token_with_location (parser, &loc);
1556 96 : ASSERT_NE (tok, NULL);
1557 96 : ASSERT_EQ (tok->type, CPP_STRING);
1558 96 : ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1559 96 : ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1560 :
1561 96 : tok = cpp_get_token_with_location (parser, &loc);
1562 96 : ASSERT_NE (tok, NULL);
1563 96 : ASSERT_EQ (tok->type, CPP_NUMBER);
1564 96 : ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1565 96 : ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1566 :
1567 96 : tok = cpp_get_token_with_location (parser, &loc);
1568 96 : ASSERT_NE (tok, NULL);
1569 96 : ASSERT_EQ (tok->type, CPP_EOF);
1570 :
1571 96 : cpp_finish (parser, NULL);
1572 96 : cpp_destroy (parser);
1573 96 : }
1574 :
1575 : /* Forward decls. */
1576 :
1577 : class lexer_test;
1578 : class lexer_test_options;
1579 :
1580 : /* A class for specifying options of a lexer_test.
1581 : The "apply" vfunc is called during the lexer_test constructor. */
1582 :
1583 192 : class lexer_test_options
1584 : {
1585 : public:
1586 : virtual void apply (lexer_test &) = 0;
1587 : };
1588 :
1589 : /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
1590 : in its dtor.
1591 :
1592 : This is needed by struct lexer_test to ensure that the cleanup of the
1593 : cpp_reader happens *after* the cleanup of the temp_source_file. */
1594 :
1595 : class cpp_reader_ptr
1596 : {
1597 : public:
1598 2304 : cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
1599 :
1600 2304 : ~cpp_reader_ptr ()
1601 : {
1602 2304 : cpp_finish (m_ptr, NULL);
1603 2304 : cpp_destroy (m_ptr);
1604 2304 : }
1605 :
1606 2304 : operator cpp_reader * () const { return m_ptr; }
1607 :
1608 : private:
1609 : cpp_reader *m_ptr;
1610 : };
1611 :
1612 : /* A struct for writing lexer tests. */
1613 :
1614 : class lexer_test
1615 : {
1616 : public:
1617 : lexer_test (const line_table_case &case_, const char *content,
1618 : lexer_test_options *options);
1619 : ~lexer_test ();
1620 :
1621 : const cpp_token *get_token ();
1622 :
1623 : /* The ordering of these fields matters.
1624 : The line_table_test must be first, since the cpp_reader_ptr
1625 : uses it.
1626 : The cpp_reader must be cleaned up *after* the temp_source_file
1627 : since the filenames in input.cc's input cache are owned by the
1628 : cpp_reader; in particular, when ~temp_source_file evicts the
1629 : filename the filenames must still be alive. */
1630 : line_table_test m_ltt;
1631 : cpp_reader_ptr m_parser;
1632 : temp_source_file m_tempfile;
1633 : diagnostics::file_cache m_file_cache;
1634 : string_concat_db m_concats;
1635 : bool m_implicitly_expect_EOF;
1636 : };
1637 :
1638 : /* Use an EBCDIC encoding for the execution charset, specifically
1639 : IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
1640 :
1641 : This exercises iconv integration within libcpp.
1642 : Not every build of iconv supports the given charset,
1643 : so we need to flag this error and handle it gracefully. */
1644 :
1645 : class ebcdic_execution_charset : public lexer_test_options
1646 : {
1647 : public:
1648 96 : ebcdic_execution_charset () : m_num_iconv_errors (0)
1649 : {
1650 96 : gcc_assert (s_singleton == NULL);
1651 96 : s_singleton = this;
1652 96 : }
1653 96 : ~ebcdic_execution_charset ()
1654 96 : {
1655 96 : gcc_assert (s_singleton == this);
1656 96 : s_singleton = NULL;
1657 96 : }
1658 :
1659 96 : void apply (lexer_test &test) final override
1660 : {
1661 96 : cpp_options *cpp_opts = cpp_get_options (test.m_parser);
1662 96 : cpp_opts->narrow_charset = "IBM1047";
1663 :
1664 96 : cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1665 96 : callbacks->diagnostic = on_diagnostic;
1666 96 : }
1667 :
1668 0 : static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
1669 : enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
1670 : enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
1671 : rich_location *richloc ATTRIBUTE_UNUSED,
1672 : const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
1673 : ATTRIBUTE_FPTR_PRINTF(5,0)
1674 : {
1675 0 : gcc_assert (s_singleton);
1676 : /* Avoid exgettext from picking this up, it is translated in libcpp. */
1677 0 : const char *msg = "conversion from %s to %s not supported by iconv";
1678 : #ifdef ENABLE_NLS
1679 0 : msg = dgettext ("cpplib", msg);
1680 : #endif
1681 : /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
1682 : when the local iconv build doesn't support the conversion. */
1683 0 : if (strcmp (msgid, msg) == 0)
1684 : {
1685 0 : s_singleton->m_num_iconv_errors++;
1686 0 : return true;
1687 : }
1688 :
1689 : /* Otherwise, we have an unexpected error. */
1690 0 : abort ();
1691 : }
1692 :
1693 96 : bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
1694 :
1695 : private:
1696 : static ebcdic_execution_charset *s_singleton;
1697 : int m_num_iconv_errors;
1698 : };
1699 :
1700 : ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
1701 :
1702 : /* A lexer_test_options subclass that records a list of diagnostic
1703 : messages emitted by the lexer. */
1704 :
1705 : class lexer_diagnostic_sink : public lexer_test_options
1706 : {
1707 : public:
1708 96 : lexer_diagnostic_sink ()
1709 96 : {
1710 96 : gcc_assert (s_singleton == NULL);
1711 96 : s_singleton = this;
1712 96 : }
1713 96 : ~lexer_diagnostic_sink ()
1714 96 : {
1715 96 : gcc_assert (s_singleton == this);
1716 96 : s_singleton = NULL;
1717 :
1718 96 : int i;
1719 96 : char *str;
1720 192 : FOR_EACH_VEC_ELT (m_diagnostics, i, str)
1721 96 : free (str);
1722 96 : }
1723 :
1724 96 : void apply (lexer_test &test) final override
1725 : {
1726 96 : cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
1727 96 : callbacks->diagnostic = on_diagnostic;
1728 96 : }
1729 :
1730 96 : static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
1731 : enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
1732 : enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
1733 : rich_location *richloc ATTRIBUTE_UNUSED,
1734 : const char *msgid, va_list *ap)
1735 : ATTRIBUTE_FPTR_PRINTF(5,0)
1736 : {
1737 96 : char *msg = xvasprintf (msgid, *ap);
1738 96 : s_singleton->m_diagnostics.safe_push (msg);
1739 96 : return true;
1740 : }
1741 :
1742 : auto_vec<char *> m_diagnostics;
1743 :
1744 : private:
1745 : static lexer_diagnostic_sink *s_singleton;
1746 : };
1747 :
1748 : lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
1749 :
1750 : /* Constructor. Override line_table with a new instance based on CASE_,
1751 : and write CONTENT to a tempfile. Create a cpp_reader, and use it to
1752 : start parsing the tempfile. */
1753 :
1754 2304 : lexer_test::lexer_test (const line_table_case &case_, const char *content,
1755 2304 : lexer_test_options *options)
1756 2304 : : m_ltt (case_),
1757 2304 : m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
1758 : /* Create a tempfile and write the text to it. */
1759 2304 : m_tempfile (SELFTEST_LOCATION, ".c", content),
1760 2304 : m_concats (),
1761 2304 : m_implicitly_expect_EOF (true)
1762 : {
1763 2304 : if (options)
1764 192 : options->apply (*this);
1765 :
1766 2304 : cpp_init_iconv (m_parser);
1767 :
1768 : /* Parse the file. */
1769 2304 : const char *fname = cpp_read_main_file (m_parser,
1770 : m_tempfile.get_filename ());
1771 2304 : ASSERT_NE (fname, NULL);
1772 2304 : }
1773 :
1774 : /* Destructor. By default, verify that the next token in m_parser is EOF. */
1775 :
1776 2304 : lexer_test::~lexer_test ()
1777 : {
1778 2304 : location_t loc;
1779 2304 : const cpp_token *tok;
1780 :
1781 2304 : if (m_implicitly_expect_EOF)
1782 : {
1783 2208 : tok = cpp_get_token_with_location (m_parser, &loc);
1784 2208 : ASSERT_NE (tok, NULL);
1785 2208 : ASSERT_EQ (tok->type, CPP_EOF);
1786 : }
1787 2304 : }
1788 :
1789 : /* Get the next token from m_parser. */
1790 :
1791 : const cpp_token *
1792 3936 : lexer_test::get_token ()
1793 : {
1794 3936 : location_t loc;
1795 3936 : const cpp_token *tok;
1796 :
1797 3936 : tok = cpp_get_token_with_location (m_parser, &loc);
1798 3936 : ASSERT_NE (tok, NULL);
1799 3936 : return tok;
1800 : }
1801 :
1802 : /* Verify that locations within string literals are correctly handled. */
1803 :
1804 : /* Verify get_source_range_for_substring for token(s) at STRLOC,
1805 : using the string concatenation database for TEST.
1806 :
1807 : Assert that the character at index IDX is on EXPECTED_LINE,
1808 : and that it begins at column EXPECTED_START_COL and ends at
1809 : EXPECTED_FINISH_COL (unless the locations are beyond
1810 : LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
1811 : columns). */
1812 :
1813 : static void
1814 23740 : assert_char_at_range (const location &loc,
1815 : lexer_test& test,
1816 : location_t strloc, enum cpp_ttype type, int idx,
1817 : int expected_line, int expected_start_col,
1818 : int expected_finish_col)
1819 : {
1820 23740 : cpp_reader *pfile = test.m_parser;
1821 23740 : string_concat_db *concats = &test.m_concats;
1822 :
1823 23740 : source_range actual_range = source_range();
1824 23740 : const char *err
1825 23740 : = get_source_range_for_char (pfile, test.m_file_cache,
1826 : concats, strloc, type, idx,
1827 : &actual_range);
1828 23740 : if (should_have_column_data_p (strloc))
1829 18652 : ASSERT_EQ_AT (loc, NULL, err);
1830 : else
1831 : {
1832 5088 : ASSERT_STREQ_AT (loc,
1833 : "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1834 : err);
1835 5088 : return;
1836 : }
1837 :
1838 18652 : int actual_start_line = LOCATION_LINE (actual_range.m_start);
1839 18652 : ASSERT_EQ_AT (loc, expected_line, actual_start_line);
1840 18652 : int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
1841 18652 : ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
1842 :
1843 18652 : if (should_have_column_data_p (actual_range.m_start))
1844 : {
1845 18652 : int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
1846 18652 : ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
1847 : }
1848 18652 : if (should_have_column_data_p (actual_range.m_finish))
1849 : {
1850 18652 : int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
1851 18652 : ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
1852 : }
1853 : }
1854 :
1855 : /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
1856 : the effective location of any errors. */
1857 :
1858 : #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
1859 : EXPECTED_START_COL, EXPECTED_FINISH_COL) \
1860 : assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
1861 : (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
1862 : (EXPECTED_FINISH_COL))
1863 :
1864 : /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
1865 : using the string concatenation database for TEST.
1866 :
1867 : Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
1868 :
1869 : static void
1870 1268 : assert_num_substring_ranges (const location &loc,
1871 : lexer_test& test,
1872 : location_t strloc,
1873 : enum cpp_ttype type,
1874 : int expected_num_ranges)
1875 : {
1876 1268 : cpp_reader *pfile = test.m_parser;
1877 1268 : string_concat_db *concats = &test.m_concats;
1878 :
1879 1268 : int actual_num_ranges = -1;
1880 1268 : const char *err
1881 1268 : = get_num_source_ranges_for_substring (pfile, test.m_file_cache,
1882 : concats, strloc, type,
1883 : &actual_num_ranges);
1884 1268 : if (should_have_column_data_p (strloc))
1885 884 : ASSERT_EQ_AT (loc, NULL, err);
1886 : else
1887 : {
1888 384 : ASSERT_STREQ_AT (loc,
1889 : "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1890 : err);
1891 384 : return;
1892 : }
1893 884 : ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
1894 : }
1895 :
1896 : /* Macro for calling assert_num_substring_ranges, supplying
1897 : SELFTEST_LOCATION for the effective location of any errors. */
1898 :
1899 : #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
1900 : EXPECTED_NUM_RANGES) \
1901 : assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
1902 : (TYPE), (EXPECTED_NUM_RANGES))
1903 :
1904 :
1905 : /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
1906 : returns an error (using the string concatenation database for TEST). */
1907 :
1908 : static void
1909 636 : assert_has_no_substring_ranges (const location &loc,
1910 : lexer_test& test,
1911 : location_t strloc,
1912 : enum cpp_ttype type,
1913 : const char *expected_err)
1914 : {
1915 636 : cpp_reader *pfile = test.m_parser;
1916 636 : string_concat_db *concats = &test.m_concats;
1917 636 : cpp_substring_ranges ranges;
1918 636 : const char *actual_err
1919 636 : = get_substring_ranges_for_loc (pfile, test.m_file_cache, concats, strloc,
1920 : type, ranges);
1921 636 : if (should_have_column_data_p (strloc))
1922 444 : ASSERT_STREQ_AT (loc, expected_err, actual_err);
1923 : else
1924 192 : ASSERT_STREQ_AT (loc,
1925 : "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
1926 : actual_err);
1927 636 : }
1928 :
1929 : #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
1930 : assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
1931 : (STRLOC), (TYPE), (ERR))
1932 :
1933 : /* Lex a simple string literal. Verify the substring location data, before
1934 : and after running cpp_interpret_string on it. */
1935 :
1936 : static void
1937 96 : test_lexer_string_locations_simple (const line_table_case &case_)
1938 : {
1939 : /* Digits 0-9 (with 0 at column 10), the simple way.
1940 : ....................000000000.11111111112.2222222223333333333
1941 : ....................123456789.01234567890.1234567890123456789
1942 : We add a trailing comment to ensure that we correctly locate
1943 : the end of the string literal token. */
1944 96 : const char *content = " \"0123456789\" /* not a string */\n";
1945 96 : lexer_test test (case_, content, NULL);
1946 :
1947 : /* Verify that we get the expected token back, with the correct
1948 : location information. */
1949 96 : const cpp_token *tok = test.get_token ();
1950 96 : ASSERT_EQ (tok->type, CPP_STRING);
1951 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
1952 96 : ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
1953 :
1954 : /* At this point in lexing, the quote characters are treated as part of
1955 : the string (they are stripped off by cpp_interpret_string). */
1956 :
1957 96 : ASSERT_EQ (tok->val.str.len, 12);
1958 :
1959 : /* Verify that cpp_interpret_string works. */
1960 96 : cpp_string dst_string;
1961 96 : const enum cpp_ttype type = CPP_STRING;
1962 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
1963 : &dst_string, type);
1964 96 : ASSERT_TRUE (result);
1965 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
1966 96 : free (const_cast <unsigned char *> (dst_string.text));
1967 :
1968 : /* Verify ranges of individual characters. This no longer includes the
1969 : opening quote, but does include the closing quote. */
1970 1152 : for (int i = 0; i <= 10; i++)
1971 1056 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
1972 : 10 + i, 10 + i);
1973 :
1974 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
1975 96 : }
1976 :
1977 : /* As test_lexer_string_locations_simple, but use an EBCDIC execution
1978 : encoding. */
1979 :
1980 : static void
1981 96 : test_lexer_string_locations_ebcdic (const line_table_case &case_)
1982 : {
1983 : /* EBCDIC support requires iconv. */
1984 96 : if (!HAVE_ICONV)
1985 0 : return;
1986 :
1987 : /* Digits 0-9 (with 0 at column 10), the simple way.
1988 : ....................000000000.11111111112.2222222223333333333
1989 : ....................123456789.01234567890.1234567890123456789
1990 : We add a trailing comment to ensure that we correctly locate
1991 : the end of the string literal token. */
1992 96 : const char *content = " \"0123456789\" /* not a string */\n";
1993 96 : ebcdic_execution_charset use_ebcdic;
1994 96 : lexer_test test (case_, content, &use_ebcdic);
1995 :
1996 : /* Verify that we get the expected token back, with the correct
1997 : location information. */
1998 96 : const cpp_token *tok = test.get_token ();
1999 96 : ASSERT_EQ (tok->type, CPP_STRING);
2000 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2001 96 : ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2002 :
2003 : /* At this point in lexing, the quote characters are treated as part of
2004 : the string (they are stripped off by cpp_interpret_string). */
2005 :
2006 96 : ASSERT_EQ (tok->val.str.len, 12);
2007 :
2008 : /* The remainder of the test requires an iconv implementation that
2009 : can convert from UTF-8 to the EBCDIC encoding requested above. */
2010 96 : if (use_ebcdic.iconv_errors_occurred_p ())
2011 0 : return;
2012 :
2013 : /* Verify that cpp_interpret_string works. */
2014 96 : cpp_string dst_string;
2015 96 : const enum cpp_ttype type = CPP_STRING;
2016 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2017 : &dst_string, type);
2018 96 : ASSERT_TRUE (result);
2019 : /* We should now have EBCDIC-encoded text, specifically
2020 : IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2021 : The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
2022 96 : ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2023 : (const char *)dst_string.text);
2024 96 : free (const_cast <unsigned char *> (dst_string.text));
2025 :
2026 : /* Verify that we don't attempt to record substring location information
2027 : for such cases. */
2028 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
2029 : (test, tok->src_loc, type,
2030 : "execution character set != source character set");
2031 96 : }
2032 :
2033 : /* Lex a string literal containing a hex-escaped character.
2034 : Verify the substring location data, before and after running
2035 : cpp_interpret_string on it. */
2036 :
2037 : static void
2038 96 : test_lexer_string_locations_hex (const line_table_case &case_)
2039 : {
2040 : /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2041 : and with a space in place of digit 6, to terminate the escaped
2042 : hex code.
2043 : ....................000000000.111111.11112222.
2044 : ....................123456789.012345.67890123. */
2045 96 : const char *content = " \"01234\\x35 789\"\n";
2046 96 : lexer_test test (case_, content, NULL);
2047 :
2048 : /* Verify that we get the expected token back, with the correct
2049 : location information. */
2050 96 : const cpp_token *tok = test.get_token ();
2051 96 : ASSERT_EQ (tok->type, CPP_STRING);
2052 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2053 96 : ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2054 :
2055 : /* At this point in lexing, the quote characters are treated as part of
2056 : the string (they are stripped off by cpp_interpret_string). */
2057 96 : ASSERT_EQ (tok->val.str.len, 15);
2058 :
2059 : /* Verify that cpp_interpret_string works. */
2060 96 : cpp_string dst_string;
2061 96 : const enum cpp_ttype type = CPP_STRING;
2062 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2063 : &dst_string, type);
2064 96 : ASSERT_TRUE (result);
2065 96 : ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2066 96 : free (const_cast <unsigned char *> (dst_string.text));
2067 :
2068 : /* Verify ranges of individual characters. This no longer includes the
2069 : opening quote, but does include the closing quote. */
2070 576 : for (int i = 0; i <= 4; i++)
2071 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2072 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2073 576 : for (int i = 6; i <= 10; i++)
2074 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2075 :
2076 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2077 96 : }
2078 :
2079 : /* Lex a string literal containing an octal-escaped character.
2080 : Verify the substring location data after running cpp_interpret_string
2081 : on it. */
2082 :
2083 : static void
2084 96 : test_lexer_string_locations_oct (const line_table_case &case_)
2085 : {
2086 : /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2087 : and with a space in place of digit 6, to terminate the escaped
2088 : octal code.
2089 : ....................000000000.111111.11112222.2222223333333333444
2090 : ....................123456789.012345.67890123.4567890123456789012 */
2091 96 : const char *content = " \"01234\\065 789\" /* not a string */\n";
2092 96 : lexer_test test (case_, content, NULL);
2093 :
2094 : /* Verify that we get the expected token back, with the correct
2095 : location information. */
2096 96 : const cpp_token *tok = test.get_token ();
2097 96 : ASSERT_EQ (tok->type, CPP_STRING);
2098 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2099 :
2100 : /* Verify that cpp_interpret_string works. */
2101 96 : cpp_string dst_string;
2102 96 : const enum cpp_ttype type = CPP_STRING;
2103 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2104 : &dst_string, type);
2105 96 : ASSERT_TRUE (result);
2106 96 : ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2107 96 : free (const_cast <unsigned char *> (dst_string.text));
2108 :
2109 : /* Verify ranges of individual characters. This no longer includes the
2110 : opening quote, but does include the closing quote. */
2111 576 : for (int i = 0; i < 5; i++)
2112 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2113 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2114 576 : for (int i = 6; i <= 10; i++)
2115 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2116 :
2117 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2118 96 : }
2119 :
2120 : /* Test of string literal containing letter escapes. */
2121 :
2122 : static void
2123 96 : test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2124 : {
2125 : /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2126 : .....................000000000.1.11111.1.1.11222.22222223333333
2127 : .....................123456789.0.12345.6.7.89012.34567890123456. */
2128 96 : const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2129 96 : lexer_test test (case_, content, NULL);
2130 :
2131 : /* Verify that we get the expected tokens back. */
2132 96 : const cpp_token *tok = test.get_token ();
2133 96 : ASSERT_EQ (tok->type, CPP_STRING);
2134 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2135 :
2136 : /* Verify ranges of individual characters. */
2137 : /* "\t". */
2138 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2139 : 0, 1, 10, 11);
2140 : /* "foo". */
2141 384 : for (int i = 1; i <= 3; i++)
2142 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2143 : i, 1, 11 + i, 11 + i);
2144 : /* "\\" and "\n". */
2145 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2146 : 4, 1, 15, 16);
2147 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2148 : 5, 1, 17, 18);
2149 :
2150 : /* "bar" and closing quote for nul-terminator. */
2151 480 : for (int i = 6; i <= 9; i++)
2152 384 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2153 : i, 1, 13 + i, 13 + i);
2154 :
2155 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2156 96 : }
2157 :
2158 : /* Another test of a string literal containing a letter escape.
2159 : Based on string seen in
2160 : printf ("%-%\n");
2161 : in gcc.dg/format/c90-printf-1.c. */
2162 :
2163 : static void
2164 96 : test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2165 : {
2166 : /* .....................000000000.1111.11.1111.22222222223.
2167 : .....................123456789.0123.45.6789.01234567890. */
2168 96 : const char *content = (" \"%-%\\n\" /* non-str */\n");
2169 96 : lexer_test test (case_, content, NULL);
2170 :
2171 : /* Verify that we get the expected tokens back. */
2172 96 : const cpp_token *tok = test.get_token ();
2173 96 : ASSERT_EQ (tok->type, CPP_STRING);
2174 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2175 :
2176 : /* Verify ranges of individual characters. */
2177 : /* "%-%". */
2178 384 : for (int i = 0; i < 3; i++)
2179 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2180 : i, 1, 10 + i, 10 + i);
2181 : /* "\n". */
2182 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2183 : 3, 1, 13, 14);
2184 :
2185 : /* Closing quote for nul-terminator. */
2186 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2187 : 4, 1, 15, 15);
2188 :
2189 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2190 96 : }
2191 :
2192 : /* Lex a string literal containing UCN 4 characters.
2193 : Verify the substring location data after running cpp_interpret_string
2194 : on it. */
2195 :
2196 : static void
2197 96 : test_lexer_string_locations_ucn4 (const line_table_case &case_)
2198 : {
2199 : /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2200 : as UCN 4.
2201 : ....................000000000.111111.111122.222222223.33333333344444
2202 : ....................123456789.012345.678901.234567890.12345678901234 */
2203 96 : const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
2204 96 : lexer_test test (case_, content, NULL);
2205 :
2206 : /* Verify that we get the expected token back, with the correct
2207 : location information. */
2208 96 : const cpp_token *tok = test.get_token ();
2209 96 : ASSERT_EQ (tok->type, CPP_STRING);
2210 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2211 :
2212 : /* Verify that cpp_interpret_string works.
2213 : The string should be encoded in the execution character
2214 : set. Assuming that is UTF-8, we should have the following:
2215 : ----------- ---- ----- ------- ----------------
2216 : Byte offset Byte Octal Unicode Source Column(s)
2217 : ----------- ---- ----- ------- ----------------
2218 : 0 0x30 '0' 10
2219 : 1 0x31 '1' 11
2220 : 2 0x32 '2' 12
2221 : 3 0x33 '3' 13
2222 : 4 0x34 '4' 14
2223 : 5 0xE2 \342 U+2174 15-20
2224 : 6 0x85 \205 (cont) 15-20
2225 : 7 0xB4 \264 (cont) 15-20
2226 : 8 0xE2 \342 U+2175 21-26
2227 : 9 0x85 \205 (cont) 21-26
2228 : 10 0xB5 \265 (cont) 21-26
2229 : 11 0x37 '7' 27
2230 : 12 0x38 '8' 28
2231 : 13 0x39 '9' 29
2232 : 14 0x00 30 (closing quote)
2233 : ----------- ---- ----- ------- ---------------. */
2234 :
2235 96 : cpp_string dst_string;
2236 96 : const enum cpp_ttype type = CPP_STRING;
2237 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2238 : &dst_string, type);
2239 96 : ASSERT_TRUE (result);
2240 96 : ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2241 : (const char *)dst_string.text);
2242 96 : free (const_cast <unsigned char *> (dst_string.text));
2243 :
2244 : /* Verify ranges of individual characters. This no longer includes the
2245 : opening quote, but does include the closing quote.
2246 : '01234'. */
2247 576 : for (int i = 0; i <= 4; i++)
2248 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2249 : /* U+2174. */
2250 384 : for (int i = 5; i <= 7; i++)
2251 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2252 : /* U+2175. */
2253 384 : for (int i = 8; i <= 10; i++)
2254 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2255 : /* '789' and nul terminator */
2256 480 : for (int i = 11; i <= 14; i++)
2257 384 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2258 :
2259 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2260 96 : }
2261 :
2262 : /* Lex a string literal containing UCN 8 characters.
2263 : Verify the substring location data after running cpp_interpret_string
2264 : on it. */
2265 :
2266 : static void
2267 96 : test_lexer_string_locations_ucn8 (const line_table_case &case_)
2268 : {
2269 : /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2270 : ....................000000000.111111.1111222222.2222333333333.344444
2271 : ....................123456789.012345.6789012345.6789012345678.901234 */
2272 96 : const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
2273 96 : lexer_test test (case_, content, NULL);
2274 :
2275 : /* Verify that we get the expected token back, with the correct
2276 : location information. */
2277 96 : const cpp_token *tok = test.get_token ();
2278 96 : ASSERT_EQ (tok->type, CPP_STRING);
2279 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2280 : "\"01234\\U00002174\\U00002175789\"");
2281 :
2282 : /* Verify that cpp_interpret_string works.
2283 : The UTF-8 encoding of the string is identical to that from
2284 : the ucn4 testcase above; the only difference is the column
2285 : locations. */
2286 96 : cpp_string dst_string;
2287 96 : const enum cpp_ttype type = CPP_STRING;
2288 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2289 : &dst_string, type);
2290 96 : ASSERT_TRUE (result);
2291 96 : ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2292 : (const char *)dst_string.text);
2293 96 : free (const_cast <unsigned char *> (dst_string.text));
2294 :
2295 : /* Verify ranges of individual characters. This no longer includes the
2296 : opening quote, but does include the closing quote.
2297 : '01234'. */
2298 576 : for (int i = 0; i <= 4; i++)
2299 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2300 : /* U+2174. */
2301 384 : for (int i = 5; i <= 7; i++)
2302 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2303 : /* U+2175. */
2304 384 : for (int i = 8; i <= 10; i++)
2305 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2306 : /* '789' at columns 35-37 */
2307 384 : for (int i = 11; i <= 13; i++)
2308 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2309 : /* Closing quote/nul-terminator at column 38. */
2310 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2311 :
2312 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2313 96 : }
2314 :
2315 : /* Fetch a big-endian 32-bit value and convert to host endianness. */
2316 :
2317 : static uint32_t
2318 768 : uint32_from_big_endian (const uint32_t *ptr_be_value)
2319 : {
2320 768 : const unsigned char *buf = (const unsigned char *)ptr_be_value;
2321 768 : return (((uint32_t) buf[0] << 24)
2322 768 : | ((uint32_t) buf[1] << 16)
2323 768 : | ((uint32_t) buf[2] << 8)
2324 768 : | (uint32_t) buf[3]);
2325 : }
2326 :
2327 : /* Lex a wide string literal and verify that attempts to read substring
2328 : location data from it fail gracefully. */
2329 :
2330 : static void
2331 96 : test_lexer_string_locations_wide_string (const line_table_case &case_)
2332 : {
2333 : /* Digits 0-9.
2334 : ....................000000000.11111111112.22222222233333
2335 : ....................123456789.01234567890.12345678901234 */
2336 96 : const char *content = " L\"0123456789\" /* non-str */\n";
2337 96 : lexer_test test (case_, content, NULL);
2338 :
2339 : /* Verify that we get the expected token back, with the correct
2340 : location information. */
2341 96 : const cpp_token *tok = test.get_token ();
2342 96 : ASSERT_EQ (tok->type, CPP_WSTRING);
2343 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2344 :
2345 : /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
2346 96 : cpp_string dst_string;
2347 96 : const enum cpp_ttype type = CPP_WSTRING;
2348 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2349 : &dst_string, type);
2350 96 : ASSERT_TRUE (result);
2351 : /* The cpp_reader defaults to big-endian with
2352 : CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2353 : now be encoded as UTF-32BE. */
2354 96 : const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2355 96 : ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2356 96 : ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2357 96 : ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2358 96 : ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2359 96 : free (const_cast <unsigned char *> (dst_string.text));
2360 :
2361 : /* We don't yet support generating substring location information
2362 : for L"" strings. */
2363 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
2364 : (test, tok->src_loc, type,
2365 : "execution character set != source character set");
2366 96 : }
2367 :
2368 : /* Fetch a big-endian 16-bit value and convert to host endianness. */
2369 :
2370 : static uint16_t
2371 384 : uint16_from_big_endian (const uint16_t *ptr_be_value)
2372 : {
2373 384 : const unsigned char *buf = (const unsigned char *)ptr_be_value;
2374 384 : return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2375 : }
2376 :
2377 : /* Lex a u"" string literal and verify that attempts to read substring
2378 : location data from it fail gracefully. */
2379 :
2380 : static void
2381 96 : test_lexer_string_locations_string16 (const line_table_case &case_)
2382 : {
2383 : /* Digits 0-9.
2384 : ....................000000000.11111111112.22222222233333
2385 : ....................123456789.01234567890.12345678901234 */
2386 96 : const char *content = " u\"0123456789\" /* non-str */\n";
2387 96 : lexer_test test (case_, content, NULL);
2388 :
2389 : /* Verify that we get the expected token back, with the correct
2390 : location information. */
2391 96 : const cpp_token *tok = test.get_token ();
2392 96 : ASSERT_EQ (tok->type, CPP_STRING16);
2393 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2394 :
2395 : /* Verify that cpp_interpret_string works, using CPP_STRING16. */
2396 96 : cpp_string dst_string;
2397 96 : const enum cpp_ttype type = CPP_STRING16;
2398 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2399 : &dst_string, type);
2400 96 : ASSERT_TRUE (result);
2401 :
2402 : /* The cpp_reader defaults to big-endian, so dst_string should
2403 : now be encoded as UTF-16BE. */
2404 96 : const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2405 96 : ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2406 96 : ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2407 96 : ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2408 96 : ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2409 96 : free (const_cast <unsigned char *> (dst_string.text));
2410 :
2411 : /* We don't yet support generating substring location information
2412 : for L"" strings. */
2413 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
2414 : (test, tok->src_loc, type,
2415 : "execution character set != source character set");
2416 96 : }
2417 :
2418 : /* Lex a U"" string literal and verify that attempts to read substring
2419 : location data from it fail gracefully. */
2420 :
2421 : static void
2422 96 : test_lexer_string_locations_string32 (const line_table_case &case_)
2423 : {
2424 : /* Digits 0-9.
2425 : ....................000000000.11111111112.22222222233333
2426 : ....................123456789.01234567890.12345678901234 */
2427 96 : const char *content = " U\"0123456789\" /* non-str */\n";
2428 96 : lexer_test test (case_, content, NULL);
2429 :
2430 : /* Verify that we get the expected token back, with the correct
2431 : location information. */
2432 96 : const cpp_token *tok = test.get_token ();
2433 96 : ASSERT_EQ (tok->type, CPP_STRING32);
2434 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2435 :
2436 : /* Verify that cpp_interpret_string works, using CPP_STRING32. */
2437 96 : cpp_string dst_string;
2438 96 : const enum cpp_ttype type = CPP_STRING32;
2439 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2440 : &dst_string, type);
2441 96 : ASSERT_TRUE (result);
2442 :
2443 : /* The cpp_reader defaults to big-endian, so dst_string should
2444 : now be encoded as UTF-32BE. */
2445 96 : const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2446 96 : ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2447 96 : ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2448 96 : ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2449 96 : ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2450 96 : free (const_cast <unsigned char *> (dst_string.text));
2451 :
2452 : /* We don't yet support generating substring location information
2453 : for L"" strings. */
2454 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
2455 : (test, tok->src_loc, type,
2456 : "execution character set != source character set");
2457 96 : }
2458 :
2459 : /* Lex a u8-string literal.
2460 : Verify the substring location data after running cpp_interpret_string
2461 : on it. */
2462 :
2463 : static void
2464 96 : test_lexer_string_locations_u8 (const line_table_case &case_)
2465 : {
2466 : /* Digits 0-9.
2467 : ....................000000000.11111111112.22222222233333
2468 : ....................123456789.01234567890.12345678901234 */
2469 96 : const char *content = " u8\"0123456789\" /* non-str */\n";
2470 96 : lexer_test test (case_, content, NULL);
2471 :
2472 : /* Verify that we get the expected token back, with the correct
2473 : location information. */
2474 96 : const cpp_token *tok = test.get_token ();
2475 96 : ASSERT_EQ (tok->type, CPP_UTF8STRING);
2476 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2477 :
2478 : /* Verify that cpp_interpret_string works. */
2479 96 : cpp_string dst_string;
2480 96 : const enum cpp_ttype type = CPP_STRING;
2481 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2482 : &dst_string, type);
2483 96 : ASSERT_TRUE (result);
2484 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2485 96 : free (const_cast <unsigned char *> (dst_string.text));
2486 :
2487 : /* Verify ranges of individual characters. This no longer includes the
2488 : opening quote, but does include the closing quote. */
2489 1152 : for (int i = 0; i <= 10; i++)
2490 1056 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2491 96 : }
2492 :
2493 : /* Lex a string literal containing UTF-8 source characters.
2494 : Verify the substring location data after running cpp_interpret_string
2495 : on it. */
2496 :
2497 : static void
2498 96 : test_lexer_string_locations_utf8_source (const line_table_case &case_)
2499 : {
2500 : /* This string literal is written out to the source file as UTF-8,
2501 : and is of the form "before mojibake after", where "mojibake"
2502 : is written as the following four unicode code points:
2503 : U+6587 CJK UNIFIED IDEOGRAPH-6587
2504 : U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2505 : U+5316 CJK UNIFIED IDEOGRAPH-5316
2506 : U+3051 HIRAGANA LETTER KE.
2507 : Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2508 : "before" and "after" are 1 byte per unicode character.
2509 :
2510 : The numbering shown are "columns", which are *byte* numbers within
2511 : the line, rather than unicode character numbers.
2512 :
2513 : .................... 000000000.1111111.
2514 : .................... 123456789.0123456. */
2515 96 : const char *content = (" \"before "
2516 : /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2517 : UTF-8: 0xE6 0x96 0x87
2518 : C octal escaped UTF-8: \346\226\207
2519 : "column" numbers: 17-19. */
2520 : "\346\226\207"
2521 :
2522 : /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2523 : UTF-8: 0xE5 0xAD 0x97
2524 : C octal escaped UTF-8: \345\255\227
2525 : "column" numbers: 20-22. */
2526 : "\345\255\227"
2527 :
2528 : /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2529 : UTF-8: 0xE5 0x8C 0x96
2530 : C octal escaped UTF-8: \345\214\226
2531 : "column" numbers: 23-25. */
2532 : "\345\214\226"
2533 :
2534 : /* U+3051 HIRAGANA LETTER KE
2535 : UTF-8: 0xE3 0x81 0x91
2536 : C octal escaped UTF-8: \343\201\221
2537 : "column" numbers: 26-28. */
2538 : "\343\201\221"
2539 :
2540 : /* column numbers 29 onwards
2541 : 2333333.33334444444444
2542 : 9012345.67890123456789. */
2543 : " after\" /* non-str */\n");
2544 96 : lexer_test test (case_, content, NULL);
2545 :
2546 : /* Verify that we get the expected token back, with the correct
2547 : location information. */
2548 96 : const cpp_token *tok = test.get_token ();
2549 96 : ASSERT_EQ (tok->type, CPP_STRING);
2550 96 : ASSERT_TOKEN_AS_TEXT_EQ
2551 : (test.m_parser, tok,
2552 : "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2553 :
2554 : /* Verify that cpp_interpret_string works. */
2555 96 : cpp_string dst_string;
2556 96 : const enum cpp_ttype type = CPP_STRING;
2557 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2558 : &dst_string, type);
2559 96 : ASSERT_TRUE (result);
2560 96 : ASSERT_STREQ
2561 : ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2562 : (const char *)dst_string.text);
2563 96 : free (const_cast <unsigned char *> (dst_string.text));
2564 :
2565 : /* Verify ranges of individual characters. This no longer includes the
2566 : opening quote, but does include the closing quote.
2567 : Assuming that both source and execution encodings are UTF-8, we have
2568 : a run of 25 octets in each, plus the NUL terminator. */
2569 2496 : for (int i = 0; i < 25; i++)
2570 2400 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2571 : /* NUL-terminator should use the closing quote at column 35. */
2572 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
2573 :
2574 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
2575 96 : }
2576 :
2577 : /* Test of string literal concatenation. */
2578 :
2579 : static void
2580 96 : test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
2581 : {
2582 : /* Digits 0-9.
2583 : .....................000000000.111111.11112222222222
2584 : .....................123456789.012345.67890123456789. */
2585 96 : const char *content = (" \"01234\" /* non-str */\n"
2586 : " \"56789\" /* non-str */\n");
2587 96 : lexer_test test (case_, content, NULL);
2588 :
2589 96 : location_t input_locs[2];
2590 :
2591 : /* Verify that we get the expected tokens back. */
2592 96 : auto_vec <cpp_string> input_strings;
2593 96 : const cpp_token *tok_a = test.get_token ();
2594 96 : ASSERT_EQ (tok_a->type, CPP_STRING);
2595 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
2596 96 : input_strings.safe_push (tok_a->val.str);
2597 96 : input_locs[0] = tok_a->src_loc;
2598 :
2599 96 : const cpp_token *tok_b = test.get_token ();
2600 96 : ASSERT_EQ (tok_b->type, CPP_STRING);
2601 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
2602 96 : input_strings.safe_push (tok_b->val.str);
2603 96 : input_locs[1] = tok_b->src_loc;
2604 :
2605 : /* Verify that cpp_interpret_string works. */
2606 96 : cpp_string dst_string;
2607 96 : const enum cpp_ttype type = CPP_STRING;
2608 96 : bool result = cpp_interpret_string (test.m_parser,
2609 96 : input_strings.address (), 2,
2610 : &dst_string, type);
2611 96 : ASSERT_TRUE (result);
2612 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2613 96 : free (const_cast <unsigned char *> (dst_string.text));
2614 :
2615 : /* Simulate c-lex.cc's lex_string in order to record concatenation. */
2616 96 : test.m_concats.record_string_concatenation (2, input_locs);
2617 :
2618 96 : location_t initial_loc = input_locs[0];
2619 :
2620 : /* "01234" on line 1. */
2621 576 : for (int i = 0; i <= 4; i++)
2622 480 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2623 : /* "56789" in line 2, plus its closing quote for the nul terminator. */
2624 672 : for (int i = 5; i <= 10; i++)
2625 576 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
2626 :
2627 96 : ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
2628 96 : }
2629 :
2630 : /* Another test of string literal concatenation. */
2631 :
2632 : static void
2633 96 : test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
2634 : {
2635 : /* Digits 0-9.
2636 : .....................000000000.111.11111112222222
2637 : .....................123456789.012.34567890123456. */
2638 96 : const char *content = (" \"01\" /* non-str */\n"
2639 : " \"23\" /* non-str */\n"
2640 : " \"45\" /* non-str */\n"
2641 : " \"67\" /* non-str */\n"
2642 : " \"89\" /* non-str */\n");
2643 96 : lexer_test test (case_, content, NULL);
2644 :
2645 96 : auto_vec <cpp_string> input_strings;
2646 96 : location_t input_locs[5];
2647 :
2648 : /* Verify that we get the expected tokens back. */
2649 576 : for (int i = 0; i < 5; i++)
2650 : {
2651 480 : const cpp_token *tok = test.get_token ();
2652 480 : ASSERT_EQ (tok->type, CPP_STRING);
2653 480 : input_strings.safe_push (tok->val.str);
2654 480 : input_locs[i] = tok->src_loc;
2655 : }
2656 :
2657 : /* Verify that cpp_interpret_string works. */
2658 96 : cpp_string dst_string;
2659 96 : const enum cpp_ttype type = CPP_STRING;
2660 96 : bool result = cpp_interpret_string (test.m_parser,
2661 96 : input_strings.address (), 5,
2662 : &dst_string, type);
2663 96 : ASSERT_TRUE (result);
2664 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2665 96 : free (const_cast <unsigned char *> (dst_string.text));
2666 :
2667 : /* Simulate c-lex.cc's lex_string in order to record concatenation. */
2668 96 : test.m_concats.record_string_concatenation (5, input_locs);
2669 :
2670 96 : location_t initial_loc = input_locs[0];
2671 :
2672 : /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
2673 : detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
2674 : and expect get_source_range_for_substring to fail.
2675 : However, for a string concatenation test, we can have a case
2676 : where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
2677 : but subsequent strings can be after it.
2678 : Attempting to detect this within assert_char_at_range
2679 : would overcomplicate the logic for the common test cases, so
2680 : we detect it here. */
2681 96 : if (should_have_column_data_p (input_locs[0])
2682 96 : && !should_have_column_data_p (input_locs[4]))
2683 : {
2684 : /* Verify that get_source_range_for_substring gracefully rejects
2685 : this case. */
2686 8 : source_range actual_range;
2687 8 : const char *err
2688 8 : = get_source_range_for_char (test.m_parser, test.m_file_cache,
2689 : &test.m_concats,
2690 : initial_loc, type, 0, &actual_range);
2691 8 : ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
2692 8 : return;
2693 : }
2694 :
2695 528 : for (int i = 0; i < 5; i++)
2696 1320 : for (int j = 0; j < 2; j++)
2697 880 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
2698 : i + 1, 10 + j, 10 + j);
2699 :
2700 : /* NUL-terminator should use the final closing quote at line 5 column 12. */
2701 88 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
2702 :
2703 88 : ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
2704 96 : }
2705 :
2706 : /* Another test of string literal concatenation, this time combined with
2707 : various kinds of escaped characters. */
2708 :
2709 : static void
2710 96 : test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
2711 : {
2712 : /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
2713 : digit 6 in ASCII as octal "\066", concatenating multiple strings. */
2714 96 : const char *content
2715 : /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
2716 : .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
2717 : = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
2718 96 : lexer_test test (case_, content, NULL);
2719 :
2720 96 : auto_vec <cpp_string> input_strings;
2721 96 : location_t input_locs[4];
2722 :
2723 : /* Verify that we get the expected tokens back. */
2724 480 : for (int i = 0; i < 4; i++)
2725 : {
2726 384 : const cpp_token *tok = test.get_token ();
2727 384 : ASSERT_EQ (tok->type, CPP_STRING);
2728 384 : input_strings.safe_push (tok->val.str);
2729 384 : input_locs[i] = tok->src_loc;
2730 : }
2731 :
2732 : /* Verify that cpp_interpret_string works. */
2733 96 : cpp_string dst_string;
2734 96 : const enum cpp_ttype type = CPP_STRING;
2735 96 : bool result = cpp_interpret_string (test.m_parser,
2736 96 : input_strings.address (), 4,
2737 : &dst_string, type);
2738 96 : ASSERT_TRUE (result);
2739 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2740 96 : free (const_cast <unsigned char *> (dst_string.text));
2741 :
2742 : /* Simulate c-lex.cc's lex_string in order to record concatenation. */
2743 96 : test.m_concats.record_string_concatenation (4, input_locs);
2744 :
2745 96 : location_t initial_loc = input_locs[0];
2746 :
2747 576 : for (int i = 0; i <= 4; i++)
2748 480 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
2749 96 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
2750 96 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
2751 384 : for (int i = 7; i <= 9; i++)
2752 288 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
2753 :
2754 : /* NUL-terminator should use the location of the final closing quote. */
2755 96 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
2756 :
2757 96 : ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
2758 96 : }
2759 :
2760 : /* Test of string literal in a macro. */
2761 :
2762 : static void
2763 96 : test_lexer_string_locations_macro (const line_table_case &case_)
2764 : {
2765 : /* Digits 0-9.
2766 : .....................0000000001111111111.22222222223.
2767 : .....................1234567890123456789.01234567890. */
2768 96 : const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
2769 : " MACRO");
2770 96 : lexer_test test (case_, content, NULL);
2771 :
2772 : /* Verify that we get the expected tokens back. */
2773 96 : const cpp_token *tok = test.get_token ();
2774 96 : ASSERT_EQ (tok->type, CPP_PADDING);
2775 :
2776 96 : tok = test.get_token ();
2777 96 : ASSERT_EQ (tok->type, CPP_STRING);
2778 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2779 :
2780 : /* Verify ranges of individual characters. We ought to
2781 : see columns within the macro definition. */
2782 1152 : for (int i = 0; i <= 10; i++)
2783 1056 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2784 : i, 1, 20 + i, 20 + i);
2785 :
2786 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
2787 :
2788 96 : tok = test.get_token ();
2789 96 : ASSERT_EQ (tok->type, CPP_PADDING);
2790 96 : }
2791 :
2792 : /* Test of stringification of a macro argument. */
2793 :
2794 : static void
2795 96 : test_lexer_string_locations_stringified_macro_argument
2796 : (const line_table_case &case_)
2797 : {
2798 : /* .....................000000000111111111122222222223.
2799 : .....................123456789012345678901234567890. */
2800 96 : const char *content = ("#define MACRO(X) #X /* non-str */\n"
2801 : "MACRO(foo)\n");
2802 96 : lexer_test test (case_, content, NULL);
2803 :
2804 : /* Verify that we get the expected token back. */
2805 96 : const cpp_token *tok = test.get_token ();
2806 96 : ASSERT_EQ (tok->type, CPP_PADDING);
2807 :
2808 96 : tok = test.get_token ();
2809 96 : ASSERT_EQ (tok->type, CPP_STRING);
2810 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
2811 :
2812 : /* We don't support getting the location of a stringified macro
2813 : argument. Verify that it fails gracefully. */
2814 96 : ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2815 : "cpp_interpret_string_1 failed");
2816 :
2817 96 : tok = test.get_token ();
2818 96 : ASSERT_EQ (tok->type, CPP_PADDING);
2819 :
2820 96 : tok = test.get_token ();
2821 96 : ASSERT_EQ (tok->type, CPP_PADDING);
2822 96 : }
2823 :
2824 : /* Ensure that we are fail gracefully if something attempts to pass
2825 : in a location that isn't a string literal token. Seen on this code:
2826 :
2827 : const char a[] = " %d ";
2828 : __builtin_printf (a, 0.5);
2829 : ^
2830 :
2831 : when c-format.cc erroneously used the indicated one-character
2832 : location as the format string location, leading to a read past the
2833 : end of a string buffer in cpp_interpret_string_1. */
2834 :
2835 : static void
2836 96 : test_lexer_string_locations_non_string (const line_table_case &case_)
2837 : {
2838 : /* .....................000000000111111111122222222223.
2839 : .....................123456789012345678901234567890. */
2840 96 : const char *content = (" a\n");
2841 96 : lexer_test test (case_, content, NULL);
2842 :
2843 : /* Verify that we get the expected token back. */
2844 96 : const cpp_token *tok = test.get_token ();
2845 96 : ASSERT_EQ (tok->type, CPP_NAME);
2846 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
2847 :
2848 : /* At this point, libcpp is attempting to interpret the name as a
2849 : string literal, despite it not starting with a quote. We don't detect
2850 : that, but we should at least fail gracefully. */
2851 96 : ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
2852 : "cpp_interpret_string_1 failed");
2853 96 : }
2854 :
2855 : /* Ensure that we can read substring information for a token which
2856 : starts in one linemap and ends in another . Adapted from
2857 : gcc.dg/cpp/pr69985.c. */
2858 :
2859 : static void
2860 96 : test_lexer_string_locations_long_line (const line_table_case &case_)
2861 : {
2862 : /* .....................000000.000111111111
2863 : .....................123456.789012346789. */
2864 96 : const char *content = ("/* A very long line, so that we start a new line map. */\n"
2865 : " \"0123456789012345678901234567890123456789"
2866 : "0123456789012345678901234567890123456789"
2867 : "0123456789012345678901234567890123456789"
2868 : "0123456789\"\n");
2869 :
2870 96 : lexer_test test (case_, content, NULL);
2871 :
2872 : /* Verify that we get the expected token back. */
2873 96 : const cpp_token *tok = test.get_token ();
2874 96 : ASSERT_EQ (tok->type, CPP_STRING);
2875 :
2876 96 : if (!should_have_column_data_p (line_table->highest_location))
2877 36 : return;
2878 :
2879 : /* Verify ranges of individual characters. */
2880 60 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
2881 7920 : for (int i = 0; i < 131; i++)
2882 7860 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2883 : i, 2, 7 + i, 7 + i);
2884 96 : }
2885 :
2886 : /* Test of locations within a raw string that doesn't contain a newline. */
2887 :
2888 : static void
2889 96 : test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
2890 : {
2891 : /* .....................00.0000000111111111122.
2892 : .....................12.3456789012345678901. */
2893 96 : const char *content = ("R\"foo(0123456789)foo\"\n");
2894 96 : lexer_test test (case_, content, NULL);
2895 :
2896 : /* Verify that we get the expected token back. */
2897 96 : const cpp_token *tok = test.get_token ();
2898 96 : ASSERT_EQ (tok->type, CPP_STRING);
2899 :
2900 : /* Verify that cpp_interpret_string works. */
2901 96 : cpp_string dst_string;
2902 96 : const enum cpp_ttype type = CPP_STRING;
2903 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2904 : &dst_string, type);
2905 96 : ASSERT_TRUE (result);
2906 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2907 96 : free (const_cast <unsigned char *> (dst_string.text));
2908 :
2909 96 : if (!should_have_column_data_p (line_table->highest_location))
2910 32 : return;
2911 :
2912 : /* 0-9, plus the nil terminator. */
2913 64 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
2914 768 : for (int i = 0; i < 11; i++)
2915 704 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2916 : i, 1, 7 + i, 7 + i);
2917 96 : }
2918 :
2919 : /* Test of locations within a raw string that contains a newline. */
2920 :
2921 : static void
2922 96 : test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
2923 : {
2924 : /* .....................00.0000.
2925 : .....................12.3456. */
2926 96 : const char *content = ("R\"foo(\n"
2927 : /* .....................00000.
2928 : .....................12345. */
2929 : "hello\n"
2930 : "world\n"
2931 : /* .....................00000.
2932 : .....................12345. */
2933 : ")foo\"\n");
2934 96 : lexer_test test (case_, content, NULL);
2935 :
2936 : /* Verify that we get the expected token back. */
2937 96 : const cpp_token *tok = test.get_token ();
2938 96 : ASSERT_EQ (tok->type, CPP_STRING);
2939 :
2940 : /* Verify that cpp_interpret_string works. */
2941 96 : cpp_string dst_string;
2942 96 : const enum cpp_ttype type = CPP_STRING;
2943 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2944 : &dst_string, type);
2945 96 : ASSERT_TRUE (result);
2946 96 : ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
2947 96 : free (const_cast <unsigned char *> (dst_string.text));
2948 :
2949 96 : if (!should_have_column_data_p (line_table->highest_location))
2950 36 : return;
2951 :
2952 : /* Currently we don't support locations within raw strings that
2953 : contain newlines. */
2954 60 : ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
2955 : "range endpoints are on different lines");
2956 96 : }
2957 :
2958 : /* Test of parsing an unterminated raw string. */
2959 :
2960 : static void
2961 96 : test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
2962 : {
2963 96 : const char *content = "R\"ouch()ouCh\" /* etc */";
2964 :
2965 96 : lexer_diagnostic_sink diagnostics;
2966 96 : lexer_test test (case_, content, &diagnostics);
2967 96 : test.m_implicitly_expect_EOF = false;
2968 :
2969 : /* Attempt to parse the raw string. */
2970 96 : const cpp_token *tok = test.get_token ();
2971 96 : ASSERT_EQ (tok->type, CPP_EOF);
2972 :
2973 96 : ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
2974 : /* We expect the message "unterminated raw string"
2975 : in the "cpplib" translation domain.
2976 : It's not clear that dgettext is available on all supported hosts,
2977 : so this assertion is commented-out for now.
2978 : ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
2979 : diagnostics.m_diagnostics[0]);
2980 : */
2981 96 : }
2982 :
2983 : /* Test of lexing char constants. */
2984 :
2985 : static void
2986 96 : test_lexer_char_constants (const line_table_case &case_)
2987 : {
2988 : /* Various char constants.
2989 : .....................0000000001111111111.22222222223.
2990 : .....................1234567890123456789.01234567890. */
2991 96 : const char *content = (" 'a'\n"
2992 : " u'a'\n"
2993 : " U'a'\n"
2994 : " L'a'\n"
2995 : " 'abc'\n");
2996 96 : lexer_test test (case_, content, NULL);
2997 :
2998 : /* Verify that we get the expected tokens back. */
2999 : /* 'a'. */
3000 96 : const cpp_token *tok = test.get_token ();
3001 96 : ASSERT_EQ (tok->type, CPP_CHAR);
3002 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3003 :
3004 96 : unsigned int chars_seen;
3005 96 : int unsignedp;
3006 96 : cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3007 : &chars_seen, &unsignedp);
3008 96 : ASSERT_EQ (cc, 'a');
3009 96 : ASSERT_EQ (chars_seen, 1);
3010 :
3011 : /* u'a'. */
3012 96 : tok = test.get_token ();
3013 96 : ASSERT_EQ (tok->type, CPP_CHAR16);
3014 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3015 :
3016 : /* U'a'. */
3017 96 : tok = test.get_token ();
3018 96 : ASSERT_EQ (tok->type, CPP_CHAR32);
3019 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3020 :
3021 : /* L'a'. */
3022 96 : tok = test.get_token ();
3023 96 : ASSERT_EQ (tok->type, CPP_WCHAR);
3024 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3025 :
3026 : /* 'abc' (c-char-sequence). */
3027 96 : tok = test.get_token ();
3028 96 : ASSERT_EQ (tok->type, CPP_CHAR);
3029 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3030 96 : }
3031 : /* A table of interesting location_t values, giving one axis of our test
3032 : matrix. */
3033 :
3034 : static const location_t boundary_locations[] = {
3035 : /* Zero means "don't override the default values for a new line_table". */
3036 : 0,
3037 :
3038 : /* An arbitrary non-zero value that isn't close to one of
3039 : the boundary values below. */
3040 : 0x10000,
3041 :
3042 : /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
3043 : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3044 : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3045 : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3046 : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3047 : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3048 :
3049 : /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
3050 : LINE_MAP_MAX_LOCATION_WITH_COLS - 0x200,
3051 : LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3052 : LINE_MAP_MAX_LOCATION_WITH_COLS,
3053 : LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3054 : LINE_MAP_MAX_LOCATION_WITH_COLS + 0x200,
3055 : };
3056 :
3057 : /* Run TESTCASE multiple times, once for each case in our test matrix. */
3058 :
3059 : void
3060 244 : for_each_line_table_case (void (*testcase) (const line_table_case &))
3061 : {
3062 : /* As noted above in the description of struct line_table_case,
3063 : we want to explore a test matrix of interesting line_table
3064 : situations, running various selftests for each case within the
3065 : matrix. */
3066 :
3067 : /* Run all tests with:
3068 : (a) line_table->default_range_bits == 0, and
3069 : (b) line_table->default_range_bits == line_map_suggested_range_bits. */
3070 :
3071 732 : for (int default_range_bits: {0, line_map_suggested_range_bits})
3072 : {
3073 : /* ...and use each of the "interesting" location values as
3074 : the starting location within line_table. */
3075 488 : const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3076 6344 : for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3077 : {
3078 5856 : line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3079 5856 : testcase (c);
3080 : }
3081 : }
3082 244 : }
3083 :
3084 : /* Verify that when presented with a consecutive pair of locations with
3085 : a very large line offset, we don't attempt to consolidate them into
3086 : a single ordinary linemap where the line offsets within the line map
3087 : would lead to overflow (PR lto/88147). */
3088 :
3089 : static void
3090 4 : test_line_offset_overflow ()
3091 : {
3092 4 : line_table_test ltt (line_table_case (5, 0));
3093 :
3094 4 : linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
3095 4 : linemap_line_start (line_table, 1, 100);
3096 4 : location_t loc_a = linemap_line_start (line_table, 2578, 255);
3097 4 : assert_loceq ("foo.c", 2578, 0, loc_a);
3098 :
3099 4 : const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3100 4 : ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
3101 4 : ASSERT_EQ (ordmap_a->m_range_bits, 5);
3102 :
3103 4 : location_t loc_b = linemap_line_start (line_table, 404198, 512);
3104 4 : assert_loceq ("foo.c", 404198, 0, loc_b);
3105 :
3106 : /* We should have started a new linemap, rather than attempting to store
3107 : a very large line offset. */
3108 4 : const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
3109 4 : ASSERT_NE (ordmap_a, ordmap_b);
3110 4 : }
3111 :
3112 4 : void test_cpp_utf8 ()
3113 : {
3114 4 : const int def_tabstop = 8;
3115 4 : cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3116 :
3117 : /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
3118 4 : {
3119 4 : int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
3120 4 : ASSERT_EQ (8, w_bad);
3121 4 : int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
3122 4 : ASSERT_EQ (5, w_ctrl);
3123 : }
3124 :
3125 : /* Verify that wcwidth of valid UTF-8 is as expected. */
3126 4 : {
3127 4 : const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
3128 4 : ASSERT_EQ (1, w_pi);
3129 4 : const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
3130 4 : ASSERT_EQ (2, w_emoji);
3131 4 : const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
3132 : policy);
3133 4 : ASSERT_EQ (1, w_umlaut_precomposed);
3134 4 : const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
3135 : policy);
3136 4 : ASSERT_EQ (1, w_umlaut_combining);
3137 4 : const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
3138 4 : ASSERT_EQ (2, w_han);
3139 4 : const int w_ascii = cpp_display_width ("GCC", 3, policy);
3140 4 : ASSERT_EQ (3, w_ascii);
3141 4 : const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
3142 : "\x9f! \xe4\xb8\xba y\xcc\x88",
3143 : 24, policy);
3144 4 : ASSERT_EQ (18, w_mixed);
3145 : }
3146 :
3147 : /* Verify that display width properly expands tabs. */
3148 4 : {
3149 4 : const char *tstr = "\tabc\td";
3150 4 : ASSERT_EQ (6, cpp_display_width (tstr, 6,
3151 : cpp_char_column_policy (1, cpp_wcwidth)));
3152 4 : ASSERT_EQ (10, cpp_display_width (tstr, 6,
3153 : cpp_char_column_policy (3, cpp_wcwidth)));
3154 4 : ASSERT_EQ (17, cpp_display_width (tstr, 6,
3155 : cpp_char_column_policy (8, cpp_wcwidth)));
3156 4 : ASSERT_EQ (1,
3157 : cpp_display_column_to_byte_column
3158 : (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
3159 : }
3160 :
3161 : /* Verify that cpp_byte_column_to_display_column can go past the end,
3162 : and similar edge cases. */
3163 4 : {
3164 4 : const char *str
3165 : /* Display columns.
3166 : 111111112345 */
3167 : = "\xcf\x80 abc";
3168 : /* 111122223456
3169 : Byte columns. */
3170 :
3171 4 : ASSERT_EQ (5, cpp_display_width (str, 6, policy));
3172 4 : ASSERT_EQ (105,
3173 : cpp_byte_column_to_display_column (str, 6, 106, policy));
3174 4 : ASSERT_EQ (10000,
3175 : cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
3176 4 : ASSERT_EQ (0,
3177 : cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
3178 : }
3179 :
3180 : /* Verify that cpp_display_column_to_byte_column can go past the end,
3181 : and similar edge cases, and check invertibility. */
3182 4 : {
3183 4 : const char *str
3184 : /* Display columns.
3185 : 000000000000000000000000000000000000011
3186 : 111111112222222234444444455555555678901 */
3187 : = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
3188 : /* 000000000000000000000000000000000111111
3189 : 111122223333444456666777788889999012345
3190 : Byte columns. */
3191 4 : ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
3192 4 : ASSERT_EQ (15,
3193 : cpp_display_column_to_byte_column (str, 15, 11, policy));
3194 4 : ASSERT_EQ (115,
3195 : cpp_display_column_to_byte_column (str, 15, 111, policy));
3196 4 : ASSERT_EQ (10000,
3197 : cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
3198 4 : ASSERT_EQ (0,
3199 : cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
3200 :
3201 : /* Verify that we do not interrupt a UTF-8 sequence. */
3202 4 : ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
3203 :
3204 64 : for (int byte_col = 1; byte_col <= 15; ++byte_col)
3205 : {
3206 60 : const int disp_col
3207 60 : = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
3208 60 : const int byte_col2
3209 60 : = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
3210 :
3211 : /* If we ask for the display column in the middle of a UTF-8
3212 : sequence, it will return the length of the partial sequence,
3213 : matching the behavior of GCC before display column support.
3214 : Otherwise check the round trip was successful. */
3215 60 : if (byte_col < 4)
3216 12 : ASSERT_EQ (byte_col, disp_col);
3217 48 : else if (byte_col >= 6 && byte_col < 9)
3218 12 : ASSERT_EQ (3 + (byte_col - 5), disp_col);
3219 : else
3220 60 : ASSERT_EQ (byte_col2, byte_col);
3221 : }
3222 : }
3223 4 : }
3224 :
3225 : static bool
3226 36 : check_cpp_valid_utf8_p (const char *str)
3227 : {
3228 36 : return cpp_valid_utf8_p (str, strlen (str));
3229 : }
3230 :
3231 : /* Check that cpp_valid_utf8_p works as expected. */
3232 :
3233 : static void
3234 4 : test_cpp_valid_utf8_p ()
3235 : {
3236 4 : ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
3237 :
3238 : /* 2-byte char (pi). */
3239 4 : ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
3240 :
3241 : /* 3-byte chars (the Japanese word "mojibake"). */
3242 4 : ASSERT_TRUE (check_cpp_valid_utf8_p
3243 : (
3244 : /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3245 : UTF-8: 0xE6 0x96 0x87
3246 : C octal escaped UTF-8: \346\226\207. */
3247 : "\346\226\207"
3248 : /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3249 : UTF-8: 0xE5 0xAD 0x97
3250 : C octal escaped UTF-8: \345\255\227. */
3251 : "\345\255\227"
3252 : /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3253 : UTF-8: 0xE5 0x8C 0x96
3254 : C octal escaped UTF-8: \345\214\226. */
3255 : "\345\214\226"
3256 : /* U+3051 HIRAGANA LETTER KE
3257 : UTF-8: 0xE3 0x81 0x91
3258 : C octal escaped UTF-8: \343\201\221. */
3259 : "\343\201\221"));
3260 :
3261 : /* 4-byte char: an emoji. */
3262 4 : ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
3263 :
3264 : /* Control codes, including the NUL byte. */
3265 4 : ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
3266 :
3267 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
3268 :
3269 : /* Unexpected continuation bytes. */
3270 4 : for (unsigned char continuation_byte = 0x80;
3271 260 : continuation_byte <= 0xbf;
3272 : continuation_byte++)
3273 256 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1));
3274 :
3275 : /* "Lonely start characters" for 2-byte sequences. */
3276 4 : {
3277 4 : unsigned char buf[2];
3278 4 : buf[1] = ' ';
3279 4 : for (buf[0] = 0xc0;
3280 132 : buf[0] <= 0xdf;
3281 128 : buf[0]++)
3282 128 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
3283 : }
3284 :
3285 : /* "Lonely start characters" for 3-byte sequences. */
3286 4 : {
3287 4 : unsigned char buf[2];
3288 4 : buf[1] = ' ';
3289 4 : for (buf[0] = 0xe0;
3290 68 : buf[0] <= 0xef;
3291 64 : buf[0]++)
3292 64 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
3293 : }
3294 :
3295 : /* "Lonely start characters" for 4-byte sequences. */
3296 4 : {
3297 4 : unsigned char buf[2];
3298 4 : buf[1] = ' ';
3299 4 : for (buf[0] = 0xf0;
3300 24 : buf[0] <= 0xf4;
3301 20 : buf[0]++)
3302 20 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
3303 : }
3304 :
3305 : /* Invalid start characters (formerly valid for 5-byte and 6-byte
3306 : sequences). */
3307 4 : {
3308 4 : unsigned char buf[2];
3309 4 : buf[1] = ' ';
3310 4 : for (buf[0] = 0xf5;
3311 40 : buf[0] <= 0xfd;
3312 36 : buf[0]++)
3313 36 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
3314 : }
3315 :
3316 : /* Impossible bytes. */
3317 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
3318 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
3319 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
3320 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
3321 4 : }
3322 :
3323 : /* Run all of the selftests within this file. */
3324 :
3325 : void
3326 4 : input_cc_tests ()
3327 : {
3328 4 : test_linenum_comparisons ();
3329 4 : test_should_have_column_data_p ();
3330 4 : test_unknown_location ();
3331 4 : test_builtins ();
3332 4 : for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3333 :
3334 4 : for_each_line_table_case (test_accessing_ordinary_linemaps);
3335 4 : for_each_line_table_case (test_lexer);
3336 4 : for_each_line_table_case (test_lexer_string_locations_simple);
3337 4 : for_each_line_table_case (test_lexer_string_locations_ebcdic);
3338 4 : for_each_line_table_case (test_lexer_string_locations_hex);
3339 4 : for_each_line_table_case (test_lexer_string_locations_oct);
3340 4 : for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3341 4 : for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3342 4 : for_each_line_table_case (test_lexer_string_locations_ucn4);
3343 4 : for_each_line_table_case (test_lexer_string_locations_ucn8);
3344 4 : for_each_line_table_case (test_lexer_string_locations_wide_string);
3345 4 : for_each_line_table_case (test_lexer_string_locations_string16);
3346 4 : for_each_line_table_case (test_lexer_string_locations_string32);
3347 4 : for_each_line_table_case (test_lexer_string_locations_u8);
3348 4 : for_each_line_table_case (test_lexer_string_locations_utf8_source);
3349 4 : for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3350 4 : for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3351 4 : for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3352 4 : for_each_line_table_case (test_lexer_string_locations_macro);
3353 4 : for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3354 4 : for_each_line_table_case (test_lexer_string_locations_non_string);
3355 4 : for_each_line_table_case (test_lexer_string_locations_long_line);
3356 4 : for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3357 4 : for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3358 4 : for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3359 4 : for_each_line_table_case (test_lexer_char_constants);
3360 :
3361 4 : test_line_offset_overflow ();
3362 :
3363 4 : test_cpp_utf8 ();
3364 4 : test_cpp_valid_utf8_p ();
3365 4 : }
3366 :
3367 : } // namespace selftest
3368 :
3369 : #endif /* CHECKING_P */
|