Branch data Line data Source code
1 : : /* Data and functions related to line maps and input files.
2 : : Copyright (C) 2004-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "intl.h"
24 : : #include "diagnostic.h"
25 : : #include "selftest.h"
26 : : #include "cpplib.h"
27 : :
28 : : #ifndef HAVE_ICONV
29 : : #define HAVE_ICONV 0
30 : : #endif
31 : :
32 : : const char *
33 : 7630284 : special_fname_builtin ()
34 : : {
35 : 7630284 : return _("<built-in>");
36 : : }
37 : :
38 : : /* Input charset configuration. */
39 : 25610 : static const char *default_charset_callback (const char *)
40 : : {
41 : 25610 : return nullptr;
42 : : }
43 : :
44 : : void
45 : 914791 : file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
46 : : bool should_skip_bom)
47 : : {
48 : 914791 : m_input_context.ccb = (ccb ? ccb : default_charset_callback);
49 : 914791 : m_input_context.should_skip_bom = should_skip_bom;
50 : 914791 : }
51 : :
52 : : /* This is a cache used by get_next_line to store the content of a
53 : : file to be searched for file lines. */
54 : : class file_cache_slot
55 : : {
56 : : public:
57 : : file_cache_slot ();
58 : : ~file_cache_slot ();
59 : :
60 : : void dump (FILE *out, int indent) const;
61 : 0 : void DEBUG_FUNCTION dump () const { dump (stderr, 0); }
62 : :
63 : : bool read_line_num (size_t line_num,
64 : : char ** line, ssize_t *line_len);
65 : :
66 : : /* Accessors. */
67 : 26872197 : const char *get_file_path () const { return m_file_path; }
68 : 321877 : unsigned get_use_count () const { return m_use_count; }
69 : 3455 : bool missing_trailing_newline_p () const
70 : : {
71 : 3455 : return m_missing_trailing_newline;
72 : : }
73 : : char_span get_full_file_content ();
74 : :
75 : 3070216 : void inc_use_count () { m_use_count++; }
76 : :
77 : : bool create (const file_cache::input_context &in_context,
78 : : const char *file_path, FILE *fp, unsigned highest_use_count);
79 : : void evict ();
80 : : void set_content (const char *buf, size_t sz);
81 : :
82 : 282866 : static size_t tune (size_t line_record_size_)
83 : : {
84 : 282866 : size_t ret = line_record_size;
85 : 282866 : line_record_size = line_record_size_;
86 : 282866 : return ret;
87 : : }
88 : :
89 : : private:
90 : : /* These are information used to store a line boundary. */
91 : : class line_info
92 : : {
93 : : public:
94 : : /* The line number. It starts from 1. */
95 : : size_t line_num;
96 : :
97 : : /* The position (byte count) of the beginning of the line,
98 : : relative to the file data pointer. This starts at zero. */
99 : : size_t start_pos;
100 : :
101 : : /* The position (byte count) of the last byte of the line. This
102 : : normally points to the '\n' character, or to one byte after the
103 : : last byte of the file, if the file doesn't contain a '\n'
104 : : character. */
105 : : size_t end_pos;
106 : :
107 : 2922149025 : line_info (size_t l, size_t s, size_t e)
108 : 2906402263 : : line_num (l), start_pos (s), end_pos (e)
109 : : {}
110 : :
111 : : line_info ()
112 : : :line_num (0), start_pos (0), end_pos (0)
113 : : {}
114 : :
115 : 48844 : static bool less_than(const line_info &a, const line_info &b)
116 : : {
117 : 48844 : return a.line_num < b.line_num;
118 : : }
119 : : };
120 : :
121 : : bool needs_read_p () const;
122 : : bool needs_grow_p () const;
123 : : void maybe_grow ();
124 : : bool read_data ();
125 : : bool maybe_read_data ();
126 : : bool get_next_line (char **line, ssize_t *line_len);
127 : : bool read_next_line (char ** line, ssize_t *line_len);
128 : : bool goto_next_line ();
129 : :
130 : : static const size_t buffer_size = 4 * 1024;
131 : : static size_t line_record_size;
132 : : static size_t recent_cached_lines_shift;
133 : :
134 : : /* The number of time this file has been accessed. This is used
135 : : to designate which file cache to evict from the cache
136 : : array. */
137 : : unsigned m_use_count;
138 : :
139 : : /* The file_path is the key for identifying a particular file in
140 : : the cache. This copy is owned by the slot. */
141 : : char *m_file_path;
142 : :
143 : : FILE *m_fp;
144 : :
145 : : /* True when an read error happened. */
146 : : bool m_error;
147 : :
148 : : /* This points to the content of the file that we've read so
149 : : far. */
150 : : char *m_data;
151 : :
152 : : /* The allocated buffer to be freed may start a little earlier than DATA,
153 : : e.g. if a UTF8 BOM was skipped at the beginning. */
154 : : int m_alloc_offset;
155 : :
156 : : /* The size of the DATA array above.*/
157 : : size_t m_size;
158 : :
159 : : /* The number of bytes read from the underlying file so far. This
160 : : must be less (or equal) than SIZE above. */
161 : : size_t m_nb_read;
162 : :
163 : : /* The index of the beginning of the current line. */
164 : : size_t m_line_start_idx;
165 : :
166 : : /* The number of the previous line read. This starts at 1. Zero
167 : : means we've read no line so far. */
168 : : size_t m_line_num;
169 : :
170 : : /* Could this file be missing a trailing newline on its final line?
171 : : Initially true (to cope with empty files), set to true/false
172 : : as each line is read. */
173 : : bool m_missing_trailing_newline;
174 : :
175 : : /* This is a record of the beginning and end of the lines we've seen
176 : : while reading the file. This is useful to avoid walking the data
177 : : from the beginning when we are asked to read a line that is
178 : : before LINE_START_IDX above. When the lines exceed line_record_size
179 : : this is scaled down dynamically, with the line_info becoming anchors. */
180 : : vec<line_info, va_heap> m_line_record;
181 : :
182 : : /* A cache of the recently seen lines. This is maintained as a ring
183 : : buffer. */
184 : : vec<line_info, va_heap> m_line_recent;
185 : :
186 : : /* First and last valid entry in m_line_recent. */
187 : : size_t m_line_recent_last, m_line_recent_first;
188 : :
189 : 245213 : void offset_buffer (int offset)
190 : : {
191 : 245213 : gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
192 : : : (size_t) offset <= m_size);
193 : 245213 : gcc_assert (m_data);
194 : 245213 : m_alloc_offset += offset;
195 : 245213 : m_data += offset;
196 : 245213 : m_size -= offset;
197 : 245213 : }
198 : :
199 : : };
200 : :
201 : : size_t file_cache_slot::line_record_size = 0;
202 : : size_t file_cache_slot::recent_cached_lines_shift = 8;
203 : :
204 : : /* Tune file_cache. */
205 : : void
206 : 282866 : file_cache::tune (size_t num_file_slots, size_t lines)
207 : : {
208 : 282866 : if (file_cache_slot::tune (lines) != lines
209 : 282866 : || m_num_file_slots != num_file_slots)
210 : : {
211 : 17 : delete[] m_file_slots;
212 : 181 : m_file_slots = new file_cache_slot[num_file_slots];
213 : : }
214 : 282866 : m_num_file_slots = num_file_slots;
215 : 282866 : }
216 : :
217 : : static const char *
218 : : find_end_of_line (const char *s, size_t len);
219 : :
220 : : /* Current position in real source file. */
221 : :
222 : : location_t input_location = UNKNOWN_LOCATION;
223 : :
224 : : class line_maps *line_table;
225 : :
226 : : /* A stashed copy of "line_table" for use by selftest::line_table_test.
227 : : This needs to be a global so that it can be a GC root, and thus
228 : : prevent the stashed copy from being garbage-collected if the GC runs
229 : : during a line_table_test. */
230 : :
231 : : class line_maps *saved_line_table;
232 : :
233 : : /* Expand the source location LOC into a human readable location. If
234 : : LOC resolves to a builtin location, the file name of the readable
235 : : location is set to the string "<built-in>". If EXPANSION_POINT_P is
236 : : TRUE and LOC is virtual, then it is resolved to the expansion
237 : : point of the involved macro. Otherwise, it is resolved to the
238 : : spelling location of the token.
239 : :
240 : : When resolving to the spelling location of the token, if the
241 : : resulting location is for a built-in location (that is, it has no
242 : : associated line/column) in the context of a macro expansion, the
243 : : returned location is the first one (while unwinding the macro
244 : : location towards its expansion point) that is in real source
245 : : code.
246 : :
247 : : ASPECT controls which part of the location to use. */
248 : :
249 : : static expanded_location
250 : 808386860 : expand_location_1 (const line_maps *set,
251 : : location_t loc,
252 : : bool expansion_point_p,
253 : : enum location_aspect aspect)
254 : : {
255 : 808386860 : expanded_location xloc;
256 : 808386860 : const line_map_ordinary *map;
257 : 808386860 : enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
258 : 808386860 : tree block = NULL;
259 : :
260 : 808386860 : if (IS_ADHOC_LOC (loc))
261 : : {
262 : 281854270 : block = LOCATION_BLOCK (loc);
263 : 281854270 : loc = LOCATION_LOCUS (loc);
264 : : }
265 : :
266 : 808386860 : memset (&xloc, 0, sizeof (xloc));
267 : :
268 : 808386860 : if (loc >= RESERVED_LOCATION_COUNT)
269 : : {
270 : 764950102 : if (!expansion_point_p)
271 : : {
272 : : /* We want to resolve LOC to its spelling location.
273 : :
274 : : But if that spelling location is a reserved location that
275 : : appears in the context of a macro expansion (like for a
276 : : location for a built-in token), let's consider the first
277 : : location (toward the expansion point) that is not reserved;
278 : : that is, the first location that is in real source code. */
279 : 2003921 : loc = linemap_unwind_to_first_non_reserved_loc (set,
280 : : loc, NULL);
281 : 2003921 : lrk = LRK_SPELLING_LOCATION;
282 : : }
283 : 764950102 : loc = linemap_resolve_location (set, loc, lrk, &map);
284 : :
285 : : /* loc is now either in an ordinary map, or is a reserved location.
286 : : If it is a compound location, the caret is in a spelling location,
287 : : but the start/finish might still be a virtual location.
288 : : Depending of what the caller asked for, we may need to recurse
289 : : one level in order to resolve any virtual locations in the
290 : : end-points. */
291 : 764950102 : switch (aspect)
292 : : {
293 : 0 : default:
294 : 0 : gcc_unreachable ();
295 : : /* Fall through. */
296 : : case LOCATION_ASPECT_CARET:
297 : : break;
298 : 297749 : case LOCATION_ASPECT_START:
299 : 297749 : {
300 : 297749 : location_t start = get_start (loc);
301 : 297749 : if (start != loc)
302 : 1157 : return expand_location_1 (set, start, expansion_point_p, aspect);
303 : : }
304 : : break;
305 : 98777 : case LOCATION_ASPECT_FINISH:
306 : 98777 : {
307 : 98777 : location_t finish = get_finish (loc);
308 : 98777 : if (finish != loc)
309 : 1115 : return expand_location_1 (set, finish, expansion_point_p, aspect);
310 : : }
311 : : break;
312 : : }
313 : 764947830 : xloc = linemap_expand_location (set, map, loc);
314 : : }
315 : :
316 : 808384588 : xloc.data = block;
317 : 808384588 : if (loc <= BUILTINS_LOCATION)
318 : 43436758 : xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
319 : :
320 : 808384588 : return xloc;
321 : : }
322 : :
323 : : /* Lookup the cache used for the content of a given file accessed by
324 : : caret diagnostic. Return the found cached file, or NULL if no
325 : : cached file was found. */
326 : :
327 : : file_cache_slot *
328 : 1651169 : file_cache::lookup_file (const char *file_path)
329 : : {
330 : 1651169 : gcc_assert (file_path);
331 : :
332 : : /* This will contain the found cached file. */
333 : : file_cache_slot *r = NULL;
334 : 28070037 : for (unsigned i = 0; i < m_num_file_slots; ++i)
335 : : {
336 : 26418868 : file_cache_slot *c = &m_file_slots[i];
337 : 26418868 : if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
338 : : {
339 : 1535108 : c->inc_use_count ();
340 : 1535108 : r = c;
341 : : }
342 : : }
343 : :
344 : 1651169 : if (r)
345 : 1535108 : r->inc_use_count ();
346 : :
347 : 1651169 : return r;
348 : : }
349 : :
350 : : /* Purge any mention of FILENAME from the cache of files used for
351 : : printing source code. For use in selftests when working
352 : : with tempfiles. */
353 : :
354 : : void
355 : 576 : file_cache::forcibly_evict_file (const char *file_path)
356 : : {
357 : 576 : gcc_assert (file_path);
358 : :
359 : 576 : file_cache_slot *r = lookup_file (file_path);
360 : 576 : if (!r)
361 : : /* Not found. */
362 : : return;
363 : :
364 : 0 : r->evict ();
365 : : }
366 : :
367 : : /* Determine if FILE_PATH missing a trailing newline on its final line.
368 : : Only valid to call once all of the file has been loaded, by
369 : : requesting a line number beyond the end of the file. */
370 : :
371 : : bool
372 : 3455 : file_cache::missing_trailing_newline_p (const char *file_path)
373 : : {
374 : 3455 : gcc_assert (file_path);
375 : :
376 : 3455 : file_cache_slot *r = lookup_or_add_file (file_path);
377 : 3455 : return r->missing_trailing_newline_p ();
378 : : }
379 : :
380 : : void
381 : 4 : file_cache::add_buffered_content (const char *file_path,
382 : : const char *buffer,
383 : : size_t sz)
384 : : {
385 : 4 : gcc_assert (file_path);
386 : :
387 : 4 : file_cache_slot *r = lookup_file (file_path);
388 : 4 : if (!r)
389 : : {
390 : 4 : unsigned highest_use_count = 0;
391 : 4 : r = evicted_cache_tab_entry (&highest_use_count);
392 : 4 : if (!r->create (m_input_context, file_path, nullptr, highest_use_count))
393 : 0 : return;
394 : : }
395 : :
396 : 4 : r->set_content (buffer, sz);
397 : : }
398 : :
399 : : void
400 : 0 : file_cache_slot::evict ()
401 : : {
402 : 0 : free (m_file_path);
403 : 0 : m_file_path = NULL;
404 : 0 : if (m_fp)
405 : 0 : fclose (m_fp);
406 : 0 : m_error = false;
407 : 0 : m_fp = NULL;
408 : 0 : m_nb_read = 0;
409 : 0 : m_line_start_idx = 0;
410 : 0 : m_line_num = 0;
411 : 0 : m_line_record.truncate (0);
412 : 0 : m_line_recent_first = 0;
413 : 0 : m_line_recent_last = 0;
414 : 0 : m_use_count = 0;
415 : 0 : m_missing_trailing_newline = true;
416 : 0 : }
417 : :
418 : : /* Return the file cache that has been less used, recently, or the
419 : : first empty one. If HIGHEST_USE_COUNT is non-null,
420 : : *HIGHEST_USE_COUNT is set to the highest use count of the entries
421 : : in the cache table. */
422 : :
423 : : file_cache_slot*
424 : 85244 : file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
425 : : {
426 : 85244 : file_cache_slot *to_evict = &m_file_slots[0];
427 : 85244 : unsigned huc = to_evict->get_use_count ();
428 : 242540 : for (unsigned i = 1; i < m_num_file_slots; ++i)
429 : : {
430 : 236633 : file_cache_slot *c = &m_file_slots[i];
431 : 236633 : bool c_is_empty = (c->get_file_path () == NULL);
432 : :
433 : 236633 : if (c->get_use_count () < to_evict->get_use_count ()
434 : 236633 : || (to_evict->get_file_path () && c_is_empty))
435 : : /* We evict C because it's either an entry with a lower use
436 : : count or one that is empty. */
437 : : to_evict = c;
438 : :
439 : 236633 : if (huc < c->get_use_count ())
440 : : huc = c->get_use_count ();
441 : :
442 : 236633 : if (c_is_empty)
443 : : /* We've reached the end of the cache; subsequent elements are
444 : : all empty. */
445 : : break;
446 : : }
447 : :
448 : 85244 : if (highest_use_count)
449 : 85244 : *highest_use_count = huc;
450 : :
451 : 85244 : return to_evict;
452 : : }
453 : :
454 : : /* Create the cache used for the content of a given file to be
455 : : accessed by caret diagnostic. This cache is added to an array of
456 : : cache and can be retrieved by lookup_file_in_cache_tab. This
457 : : function returns the created cache. Note that only the last
458 : : m_num_file_slots files are cached.
459 : :
460 : : This can return nullptr if the FILE_PATH can't be opened for
461 : : reading, or if the content can't be converted to the input_charset. */
462 : :
463 : : file_cache_slot*
464 : 115481 : file_cache::add_file (const char *file_path)
465 : : {
466 : :
467 : 115481 : FILE *fp = fopen (file_path, "r");
468 : 115481 : if (fp == NULL)
469 : : return NULL;
470 : :
471 : 85240 : unsigned highest_use_count = 0;
472 : 85240 : file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
473 : 85240 : if (!r->create (m_input_context, file_path, fp, highest_use_count))
474 : : return NULL;
475 : : return r;
476 : : }
477 : :
478 : : /* Get a borrowed char_span to the full content of this file
479 : : as decoded according to the input charset, encoded as UTF-8. */
480 : :
481 : : char_span
482 : 216 : file_cache_slot::get_full_file_content ()
483 : : {
484 : 216 : char *line;
485 : 216 : ssize_t line_len;
486 : 1692 : while (get_next_line (&line, &line_len))
487 : : {
488 : : }
489 : 216 : return char_span (m_data, m_nb_read);
490 : : }
491 : :
492 : : /* Populate this slot for use on FILE_PATH and FP, dropping any
493 : : existing cached content within it. */
494 : :
495 : : bool
496 : 85244 : file_cache_slot::create (const file_cache::input_context &in_context,
497 : : const char *file_path, FILE *fp,
498 : : unsigned highest_use_count)
499 : : {
500 : 85244 : m_file_path = file_path ? xstrdup (file_path) : nullptr;
501 : 85244 : if (m_fp)
502 : 5907 : fclose (m_fp);
503 : 85244 : m_error = false;
504 : 85244 : m_fp = fp;
505 : 85244 : if (m_alloc_offset)
506 : 0 : offset_buffer (-m_alloc_offset);
507 : 85244 : m_nb_read = 0;
508 : 85244 : m_line_start_idx = 0;
509 : 85244 : m_line_num = 0;
510 : 85244 : m_line_recent_first = 0;
511 : 85244 : m_line_recent_last = 0;
512 : 85244 : m_line_record.truncate (0);
513 : : /* Ensure that this cache entry doesn't get evicted next time
514 : : add_file_to_cache_tab is called. */
515 : 85244 : m_use_count = ++highest_use_count;
516 : 85244 : m_missing_trailing_newline = true;
517 : :
518 : :
519 : : /* Check the input configuration to determine if we need to do any
520 : : transformations, such as charset conversion or BOM skipping. */
521 : 85244 : if (const char *input_charset = in_context.ccb (file_path))
522 : : {
523 : : /* Need a full-blown conversion of the input charset. */
524 : 5 : fclose (m_fp);
525 : 5 : m_fp = NULL;
526 : 5 : const cpp_converted_source cs
527 : 5 : = cpp_get_converted_source (file_path, input_charset);
528 : 5 : if (!cs.data)
529 : 0 : return false;
530 : 5 : if (m_data)
531 : 0 : XDELETEVEC (m_data);
532 : 5 : m_data = cs.data;
533 : 5 : m_nb_read = m_size = cs.len;
534 : 5 : m_alloc_offset = cs.data - cs.to_free;
535 : : }
536 : 85239 : else if (in_context.should_skip_bom)
537 : : {
538 : 60198 : if (read_data ())
539 : : {
540 : 60198 : const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
541 : 60198 : offset_buffer (offset);
542 : 60198 : m_nb_read -= offset;
543 : : }
544 : : }
545 : :
546 : : return true;
547 : : }
548 : :
549 : : void
550 : 4 : file_cache_slot::set_content (const char *buf, size_t sz)
551 : : {
552 : 4 : m_data = (char *)xmalloc (sz);
553 : 4 : memcpy (m_data, buf, sz);
554 : 4 : m_nb_read = m_size = sz;
555 : 4 : m_alloc_offset = 0;
556 : :
557 : 4 : if (m_fp)
558 : : {
559 : 0 : fclose (m_fp);
560 : 0 : m_fp = nullptr;
561 : : }
562 : 4 : }
563 : :
564 : : /* file_cache's ctor. */
565 : :
566 : 709285 : file_cache::file_cache ()
567 : 12057845 : : m_num_file_slots (16), m_file_slots (new file_cache_slot[m_num_file_slots])
568 : : {
569 : 709285 : initialize_input_context (nullptr, false);
570 : 709285 : }
571 : :
572 : : /* file_cache's dtor. */
573 : :
574 : 306402 : file_cache::~file_cache ()
575 : : {
576 : 5208998 : delete[] m_file_slots;
577 : 306402 : }
578 : :
579 : : void
580 : 0 : file_cache::dump (FILE *out, int indent) const
581 : : {
582 : 0 : for (size_t i = 0; i < m_num_file_slots; ++i)
583 : : {
584 : 0 : fprintf (out, "%*sslot[%i]:\n", indent, "", (int)i);
585 : 0 : m_file_slots[i].dump (out, indent + 2);
586 : : }
587 : 0 : }
588 : :
589 : : void
590 : 0 : file_cache::dump () const
591 : : {
592 : 0 : dump (stderr, 0);
593 : 0 : }
594 : :
595 : : /* Lookup the cache used for the content of a given file accessed by
596 : : caret diagnostic. If no cached file was found, create a new cache
597 : : for this file, add it to the array of cached file and return
598 : : it.
599 : :
600 : : This can return nullptr on a cache miss if FILE_PATH can't be opened for
601 : : reading, or if the content can't be converted to the input_charset. */
602 : :
603 : : file_cache_slot*
604 : 1650589 : file_cache::lookup_or_add_file (const char *file_path)
605 : : {
606 : 1650589 : file_cache_slot *r = lookup_file (file_path);
607 : 1650589 : if (r == NULL)
608 : 115481 : r = add_file (file_path);
609 : 1650589 : return r;
610 : : }
611 : :
612 : : /* Default constructor for a cache of file used by caret
613 : : diagnostic. */
614 : :
615 : 11348740 : file_cache_slot::file_cache_slot ()
616 : 11348740 : : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_error (false), m_data (0),
617 : 11348740 : m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
618 : 11348740 : m_line_num (0), m_missing_trailing_newline (true),
619 : 11348740 : m_line_recent_last (0), m_line_recent_first (0)
620 : : {
621 : 11348740 : m_line_record.create (0);
622 : 11348740 : m_line_recent.create (1U << recent_cached_lines_shift);
623 : 2916626180 : for (int i = 0; i < 1 << recent_cached_lines_shift; i++)
624 : 2905277440 : m_line_recent.quick_push (file_cache_slot::line_info (0, 0, 0));
625 : 11348740 : }
626 : :
627 : : /* Destructor for a cache of file used by caret diagnostic. */
628 : :
629 : 4902612 : file_cache_slot::~file_cache_slot ()
630 : : {
631 : 4902612 : free (m_file_path);
632 : 4902612 : if (m_fp)
633 : : {
634 : 77900 : fclose (m_fp);
635 : 77900 : m_fp = NULL;
636 : : }
637 : 4902612 : if (m_data)
638 : : {
639 : 77909 : offset_buffer (-m_alloc_offset);
640 : 77909 : XDELETEVEC (m_data);
641 : 77909 : m_data = 0;
642 : : }
643 : 4902612 : m_line_record.release ();
644 : 4902612 : m_line_recent.release ();
645 : 4902612 : }
646 : :
647 : : void
648 : 0 : file_cache_slot::dump (FILE *out, int indent) const
649 : : {
650 : 0 : if (!m_file_path)
651 : : {
652 : 0 : fprintf (out, "%*s(unused)\n", indent, "");
653 : 0 : return;
654 : : }
655 : 0 : fprintf (out, "%*sfile_path: %s\n", indent, "", m_file_path);
656 : 0 : fprintf (out, "%*sfp: %p\n", indent, "", (void *)m_fp);
657 : 0 : fprintf (out, "%*sneeds_read_p: %i\n", indent, "", (int)needs_read_p ());
658 : 0 : fprintf (out, "%*sneeds_grow_p: %i\n", indent, "", (int)needs_grow_p ());
659 : 0 : fprintf (out, "%*suse_count: %i\n", indent, "", m_use_count);
660 : 0 : fprintf (out, "%*ssize: %zi\n", indent, "", m_size);
661 : 0 : fprintf (out, "%*snb_read: %zi\n", indent, "", m_nb_read);
662 : 0 : fprintf (out, "%*sstart_line_idx: %zi\n", indent, "", m_line_start_idx);
663 : 0 : fprintf (out, "%*sline_num: %zi\n", indent, "", m_line_num);
664 : 0 : fprintf (out, "%*smissing_trailing_newline: %i\n",
665 : 0 : indent, "", (int)m_missing_trailing_newline);
666 : 0 : fprintf (out, "%*sline records (%i):\n",
667 : : indent, "", m_line_record.length ());
668 : 0 : int idx = 0;
669 : 0 : for (auto &line : m_line_record)
670 : 0 : fprintf (out, "%*s[%i]: line %zi: byte offsets: %zi-%zi\n",
671 : : indent + 2, "",
672 : 0 : idx++, line.line_num, line.start_pos, line.end_pos);
673 : : }
674 : :
675 : : /* Returns TRUE iff the cache would need to be filled with data coming
676 : : from the file. That is, either the cache is empty or full or the
677 : : current line is empty. Note that if the cache is full, it would
678 : : need to be extended and filled again. */
679 : :
680 : : bool
681 : 26737923 : file_cache_slot::needs_read_p () const
682 : : {
683 : 26737923 : return m_fp && (m_nb_read == 0
684 : 26712762 : || m_nb_read == m_size
685 : 26659209 : || (m_line_start_idx >= m_nb_read - 1));
686 : : }
687 : :
688 : : /* Return TRUE iff the cache is full and thus needs to be
689 : : extended. */
690 : :
691 : : bool
692 : 138788 : file_cache_slot::needs_grow_p () const
693 : : {
694 : 138788 : return m_nb_read == m_size;
695 : : }
696 : :
697 : : /* Grow the cache if it needs to be extended. */
698 : :
699 : : void
700 : 138788 : file_cache_slot::maybe_grow ()
701 : : {
702 : 138788 : if (!needs_grow_p ())
703 : : return;
704 : :
705 : 132881 : if (!m_data)
706 : : {
707 : 79328 : gcc_assert (m_size == 0 && m_alloc_offset == 0);
708 : 79328 : m_size = buffer_size;
709 : 79328 : m_data = XNEWVEC (char, m_size);
710 : : }
711 : : else
712 : : {
713 : 53553 : const int offset = m_alloc_offset;
714 : 53553 : offset_buffer (-offset);
715 : 53553 : m_size *= 2;
716 : 53553 : m_data = XRESIZEVEC (char, m_data, m_size);
717 : 53553 : offset_buffer (offset);
718 : : }
719 : : }
720 : :
721 : : /* Read more data into the cache. Extends the cache if need be.
722 : : Returns TRUE iff new data could be read. */
723 : :
724 : : bool
725 : 143522 : file_cache_slot::read_data ()
726 : : {
727 : 143522 : if (feof (m_fp) || ferror (m_fp))
728 : 4734 : return false;
729 : :
730 : 138788 : maybe_grow ();
731 : :
732 : 138788 : char * from = m_data + m_nb_read;
733 : 138788 : size_t to_read = m_size - m_nb_read;
734 : 138788 : size_t nb_read = fread (from, 1, to_read, m_fp);
735 : :
736 : 138788 : if (ferror (m_fp))
737 : : {
738 : 0 : m_error = true;
739 : 0 : return false;
740 : : }
741 : :
742 : 138788 : m_nb_read += nb_read;
743 : 138788 : return !!nb_read;
744 : : }
745 : :
746 : : /* Read new data iff the cache needs to be filled with more data
747 : : coming from the file FP. Return TRUE iff the cache was filled with
748 : : mode data. */
749 : :
750 : : bool
751 : 26737923 : file_cache_slot::maybe_read_data ()
752 : : {
753 : 26737923 : if (!needs_read_p ())
754 : : return false;
755 : 83324 : return read_data ();
756 : : }
757 : :
758 : : /* Helper function for file_cache_slot::get_next_line (), to find the end of
759 : : the next line. Returns with the memchr convention, i.e. nullptr if a line
760 : : terminator was not found. We need to determine line endings in the same
761 : : manner that libcpp does: any of \n, \r\n, or \r is a line ending. */
762 : :
763 : : static const char *
764 : 26733109 : find_end_of_line (const char *s, size_t len)
765 : : {
766 : 983433344 : for (const auto end = s + len; s != end; ++s)
767 : : {
768 : 983426052 : if (*s == '\n')
769 : : return s;
770 : 956700888 : if (*s == '\r')
771 : : {
772 : 653 : const auto next = s + 1;
773 : 653 : if (next == end)
774 : : {
775 : : /* Don't find the line ending if \r is the very last character
776 : : in the buffer; we do not know if it's the end of the file or
777 : : just the end of what has been read so far, and we wouldn't
778 : : want to break in the middle of what's actually a \r\n
779 : : sequence. Instead, we will handle the case of a file ending
780 : : in a \r later. */
781 : : break;
782 : : }
783 : 653 : return (*next == '\n' ? next : s);
784 : : }
785 : : }
786 : : return nullptr;
787 : : }
788 : :
789 : : /* Read a new line from file FP, using C as a cache for the data
790 : : coming from the file. Upon successful completion, *LINE is set to
791 : : the beginning of the line found. *LINE points directly in the
792 : : line cache and is only valid until the next call of get_next_line.
793 : : *LINE_LEN is set to the length of the line. Note that the line
794 : : does not contain any terminal delimiter. This function returns
795 : : true if some data was read or process from the cache, false
796 : : otherwise. Note that subsequent calls to get_next_line might
797 : : make the content of *LINE invalid. */
798 : :
799 : : bool
800 : 26730631 : file_cache_slot::get_next_line (char **line, ssize_t *line_len)
801 : : {
802 : : /* Fill the cache with data to process. */
803 : 26730631 : maybe_read_data ();
804 : :
805 : 26730631 : size_t remaining_size = m_nb_read - m_line_start_idx;
806 : 26730631 : if (remaining_size == 0)
807 : : /* There is no more data to process. */
808 : : return false;
809 : :
810 : 26725887 : const char *line_start = m_data + m_line_start_idx;
811 : :
812 : 26725887 : const char *next_line_start = NULL;
813 : 26725887 : size_t len = 0;
814 : 26725887 : const char *line_end = find_end_of_line (line_start, remaining_size);
815 : 26725887 : if (line_end == NULL)
816 : : {
817 : : /* We haven't found an end-of-line delimiter in the cache.
818 : : Fill the cache with more data from the file and look again. */
819 : 7292 : while (maybe_read_data ())
820 : : {
821 : 7222 : line_start = m_data + m_line_start_idx;
822 : 7222 : remaining_size = m_nb_read - m_line_start_idx;
823 : 7222 : line_end = find_end_of_line (line_start, remaining_size);
824 : 7222 : if (line_end != NULL)
825 : : {
826 : 3622 : next_line_start = line_end + 1;
827 : 3622 : break;
828 : : }
829 : : }
830 : 3692 : if (line_end == NULL)
831 : : {
832 : : /* We've loaded all the file into the cache and still no
833 : : terminator. Let's say the line ends up at one byte past the
834 : : end of the file. This is to stay consistent with the case
835 : : of when the line ends up with a terminator and line_end points to
836 : : that. That consistency is useful below in the len calculation.
837 : :
838 : : If the file ends in a \r, we didn't identify it as a line
839 : : terminator above, so do that now instead. */
840 : 70 : line_end = m_data + m_nb_read;
841 : 70 : if (m_nb_read && line_end[-1] == '\r')
842 : : {
843 : 0 : --line_end;
844 : 0 : m_missing_trailing_newline = false;
845 : : }
846 : : else
847 : 70 : m_missing_trailing_newline = true;
848 : : }
849 : : else
850 : 3622 : m_missing_trailing_newline = false;
851 : : }
852 : : else
853 : : {
854 : 26722195 : next_line_start = line_end + 1;
855 : 26722195 : m_missing_trailing_newline = false;
856 : : }
857 : :
858 : 26725887 : if (m_error)
859 : : return false;
860 : :
861 : : /* At this point, we've found the end of the of line. It either points to
862 : : the line terminator or to one byte after the last byte of the file. */
863 : 26725887 : gcc_assert (line_end != NULL);
864 : :
865 : 26725887 : len = line_end - line_start;
866 : :
867 : 26725887 : if (m_line_start_idx < m_nb_read)
868 : 26725887 : *line = const_cast<char *> (line_start);
869 : :
870 : 26725887 : ++m_line_num;
871 : :
872 : : /* Now update our line record so that re-reading lines from the
873 : : before m_line_start_idx is faster. */
874 : 26725887 : size_t rlen = m_line_record.length ();
875 : : /* Only update when beyond the previously cached region. */
876 : 23857083 : if (rlen == 0 || m_line_record[rlen - 1].line_num < m_line_num)
877 : : {
878 : 16424777 : size_t spacing
879 : : = (rlen >= 2
880 : 16424777 : ? (m_line_record[rlen - 1].line_num
881 : 10770062 : - m_line_record[rlen - 2].line_num) : 1);
882 : 16424777 : size_t delta
883 : 16424777 : = rlen >= 1 ? m_line_num - m_line_record[rlen - 1].line_num : 1;
884 : :
885 : 16424777 : size_t max_size = line_record_size;
886 : : /* One anchor per hundred input lines. */
887 : 16424777 : if (max_size == 0)
888 : 16424777 : max_size = m_line_num / 100;
889 : :
890 : : /* If we're too far beyond drop half of the lines to rebalance. */
891 : 16424777 : if (rlen == max_size && delta >= spacing * 2)
892 : : {
893 : : size_t j = 0;
894 : 1047988 : for (size_t i = 1; i < rlen; i += 2)
895 : 69454 : m_line_record[j++] = m_line_record[i];
896 : 978534 : m_line_record.truncate (j);
897 : 978534 : rlen = j;
898 : 978534 : spacing *= 2;
899 : : }
900 : :
901 : 16424777 : if (rlen < max_size && delta >= spacing)
902 : : {
903 : 1093591 : file_cache_slot::line_info li (m_line_num, m_line_start_idx,
904 : 1093591 : line_end - m_data);
905 : 1093591 : m_line_record.safe_push (li);
906 : : }
907 : : }
908 : :
909 : : /* Cache recent tail lines separately for fast access. This assumes
910 : : most accesses do not skip backwards. */
911 : 26725887 : if (m_line_recent_last == m_line_recent_first
912 : 26725887 : || m_line_recent[m_line_recent_last].line_num == m_line_num - 1)
913 : : {
914 : 15746762 : size_t mask = ((size_t) 1 << recent_cached_lines_shift) - 1;
915 : 15746762 : m_line_recent_last = (m_line_recent_last + 1) & mask;
916 : 15746762 : if (m_line_recent_last == m_line_recent_first)
917 : 9708948 : m_line_recent_first = (m_line_recent_first + 1) & mask;
918 : 31493524 : m_line_recent[m_line_recent_last]
919 : 15746762 : = file_cache_slot::line_info (m_line_num, m_line_start_idx,
920 : 15746762 : line_end - m_data);
921 : : }
922 : :
923 : : /* Update m_line_start_idx so that it points to the next line to be
924 : : read. */
925 : 26725887 : if (next_line_start)
926 : 26725817 : m_line_start_idx = next_line_start - m_data;
927 : : else
928 : : /* We didn't find any terminal '\n'. Let's consider that the end
929 : : of line is the end of the data in the cache. The next
930 : : invocation of get_next_line will either read more data from the
931 : : underlying file or return false early because we've reached the
932 : : end of the file. */
933 : 70 : m_line_start_idx = m_nb_read;
934 : :
935 : 26725887 : *line_len = len;
936 : :
937 : 26725887 : return true;
938 : : }
939 : :
940 : : /* Consume the next bytes coming from the cache (or from its
941 : : underlying file if there are remaining unread bytes in the file)
942 : : until we reach the next end-of-line (or end-of-file). There is no
943 : : copying from the cache involved. Return TRUE upon successful
944 : : completion. */
945 : :
946 : : bool
947 : 26225817 : file_cache_slot::goto_next_line ()
948 : : {
949 : 26225817 : char *l;
950 : 26225817 : ssize_t len;
951 : :
952 : 26225817 : return get_next_line (&l, &len);
953 : : }
954 : :
955 : : /* Read an arbitrary line number LINE_NUM from the file cached in C.
956 : : If the line was read successfully, *LINE points to the beginning
957 : : of the line in the file cache and *LINE_LEN is the length of the
958 : : line. *LINE is not nul-terminated, but may contain zero bytes.
959 : : *LINE is only valid until the next call of read_line_num.
960 : : This function returns bool if a line was read. */
961 : :
962 : : bool
963 : 1616677 : file_cache_slot::read_line_num (size_t line_num,
964 : : char ** line, ssize_t *line_len)
965 : : {
966 : 1616677 : gcc_assert (line_num > 0);
967 : :
968 : : /* Is the line in the recent line cache?
969 : : This assumes the main file processing is only using
970 : : a single contiguous cursor with only temporary excursions. */
971 : 1616677 : if (m_line_recent_first != m_line_recent_last
972 : 1531433 : && m_line_recent[m_line_recent_first].line_num <= line_num
973 : 3101016 : && m_line_recent[m_line_recent_last].line_num >= line_num)
974 : : {
975 : 1112105 : line_info &last = m_line_recent[m_line_recent_last];
976 : 1112105 : size_t mask = (1U << recent_cached_lines_shift) - 1;
977 : 1112105 : size_t idx = (m_line_recent_last - (last.line_num - line_num)) & mask;
978 : 1112105 : line_info &recent = m_line_recent[idx];
979 : 1112105 : gcc_assert (recent.line_num == line_num);
980 : 1112105 : *line = m_data + recent.start_pos;
981 : 1112105 : *line_len = recent.end_pos - recent.start_pos;
982 : 1112105 : return true;
983 : : }
984 : :
985 : 504572 : if (line_num <= m_line_num)
986 : : {
987 : 31232 : line_info l (line_num, 0, 0);
988 : 31232 : int i = m_line_record.lower_bound (l, line_info::less_than);
989 : 31232 : if (i == 0)
990 : : {
991 : 23394 : m_line_start_idx = 0;
992 : 23394 : m_line_num = 0;
993 : : }
994 : 7838 : else if (m_line_record[i - 1].line_num == line_num)
995 : : {
996 : : /* We have the start/end of the line. */
997 : 0 : *line = m_data + m_line_record[i - 1].start_pos;
998 : 0 : *line_len = m_line_record[i - 1].end_pos - m_line_record[i - 1].start_pos;
999 : 0 : return true;
1000 : : }
1001 : : else
1002 : : {
1003 : 7838 : gcc_assert (m_line_record[i - 1].line_num < m_line_num);
1004 : 7838 : m_line_start_idx = m_line_record[i - 1].start_pos;
1005 : 7838 : m_line_num = m_line_record[i - 1].line_num - 1;
1006 : : }
1007 : : }
1008 : :
1009 : : /* Let's walk from line m_line_num up to line_num - 1, without
1010 : : copying any line. */
1011 : 26728939 : while (m_line_num < line_num - 1)
1012 : 26225817 : if (!goto_next_line ())
1013 : : return false;
1014 : :
1015 : : /* The line we want is the next one. Let's read it. */
1016 : 503122 : return get_next_line (line, line_len);
1017 : : }
1018 : :
1019 : : /* Return the physical source line that corresponds to FILE_PATH/LINE.
1020 : : The line is not nul-terminated. The returned pointer is only
1021 : : valid until the next call of location_get_source_line.
1022 : : Note that the line can contain several null characters,
1023 : : so the returned value's length has the actual length of the line.
1024 : : If the function fails, a NULL char_span is returned. */
1025 : :
1026 : : char_span
1027 : 1646981 : file_cache::get_source_line (const char *file_path, int line)
1028 : : {
1029 : 1646981 : char *buffer = NULL;
1030 : 1646981 : ssize_t len;
1031 : :
1032 : 1646981 : if (line == 0)
1033 : 67 : return char_span (NULL, 0);
1034 : :
1035 : 1646914 : if (file_path == NULL)
1036 : 0 : return char_span (NULL, 0);
1037 : :
1038 : 1646914 : file_cache_slot *c = lookup_or_add_file (file_path);
1039 : 1646914 : if (c == NULL)
1040 : 30237 : return char_span (NULL, 0);
1041 : :
1042 : 1616677 : bool read = c->read_line_num (line, &buffer, &len);
1043 : 1616677 : if (!read)
1044 : 4528 : return char_span (NULL, 0);
1045 : :
1046 : 1612149 : return char_span (buffer, len);
1047 : : }
1048 : :
1049 : : /* Return a NUL-terminated copy of the source text between two locations, or
1050 : : NULL if the arguments are invalid. The caller is responsible for freeing
1051 : : the return value. */
1052 : :
1053 : : char *
1054 : 828 : get_source_text_between (file_cache &fc, location_t start, location_t end)
1055 : : {
1056 : 828 : expanded_location expstart
1057 : 828 : = expand_location_to_spelling_point (start, LOCATION_ASPECT_START);
1058 : 828 : expanded_location expend
1059 : 828 : = expand_location_to_spelling_point (end, LOCATION_ASPECT_FINISH);
1060 : :
1061 : : /* If the locations are in different files or the end comes before the
1062 : : start, give up and return nothing. */
1063 : 828 : if (!expstart.file || !expend.file)
1064 : : return NULL;
1065 : 807 : if (strcmp (expstart.file, expend.file) != 0)
1066 : : return NULL;
1067 : 807 : if (expstart.line > expend.line)
1068 : : return NULL;
1069 : 807 : if (expstart.line == expend.line
1070 : 806 : && expstart.column > expend.column)
1071 : : return NULL;
1072 : : /* These aren't real column numbers, give up. */
1073 : 807 : if (expstart.column == 0 || expend.column == 0)
1074 : : return NULL;
1075 : :
1076 : : /* For a single line we need to trim both edges. */
1077 : 807 : if (expstart.line == expend.line)
1078 : : {
1079 : 806 : char_span line = fc.get_source_line (expstart.file, expstart.line);
1080 : 806 : if (line.length () < 1)
1081 : : return NULL;
1082 : 804 : int s = expstart.column - 1;
1083 : 804 : int len = expend.column - s;
1084 : 804 : if (line.length () < (size_t)expend.column)
1085 : : return NULL;
1086 : 804 : return line.subspan (s, len).xstrdup ();
1087 : : }
1088 : :
1089 : 1 : struct obstack buf_obstack;
1090 : 1 : obstack_init (&buf_obstack);
1091 : :
1092 : : /* Loop through all lines in the range and append each to buf; may trim
1093 : : parts of the start and end lines off depending on column values. */
1094 : 8 : for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
1095 : : {
1096 : 7 : char_span line = fc.get_source_line (expstart.file, lnum);
1097 : 7 : if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
1098 : 0 : continue;
1099 : :
1100 : : /* For the first line in the range, only start at expstart.column */
1101 : 7 : if (lnum == expstart.line)
1102 : : {
1103 : 1 : unsigned off = expstart.column - 1;
1104 : 1 : if (line.length () < off)
1105 : 0 : return NULL;
1106 : 1 : line = line.subspan (off, line.length() - off);
1107 : : }
1108 : : /* For the last line, don't go past expend.column */
1109 : 6 : else if (lnum == expend.line)
1110 : : {
1111 : 1 : if (line.length () < (size_t)expend.column)
1112 : : return NULL;
1113 : 1 : line = line.subspan (0, expend.column);
1114 : : }
1115 : :
1116 : : /* Combine spaces at the beginning of later lines. */
1117 : 7 : if (lnum > expstart.line)
1118 : : {
1119 : : unsigned off;
1120 : 30 : for (off = 0; off < line.length(); ++off)
1121 : 30 : if (line[off] != ' ' && line[off] != '\t')
1122 : : break;
1123 : 6 : if (off > 0)
1124 : : {
1125 : 6 : obstack_1grow (&buf_obstack, ' ');
1126 : 6 : line = line.subspan (off, line.length() - off);
1127 : : }
1128 : : }
1129 : :
1130 : : /* This does not include any trailing newlines. */
1131 : 7 : obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
1132 : : }
1133 : :
1134 : : /* NUL-terminate and finish the buf obstack. */
1135 : 1 : obstack_1grow (&buf_obstack, 0);
1136 : 1 : const char *buf = (const char *) obstack_finish (&buf_obstack);
1137 : :
1138 : 1 : return xstrdup (buf);
1139 : : }
1140 : :
1141 : :
1142 : : char_span
1143 : 220 : file_cache::get_source_file_content (const char *file_path)
1144 : : {
1145 : 220 : file_cache_slot *c = lookup_or_add_file (file_path);
1146 : 220 : if (c == nullptr)
1147 : 4 : return char_span (nullptr, 0);
1148 : 216 : return c->get_full_file_content ();
1149 : : }
1150 : :
1151 : : /* Test if the location originates from the spelling location of a
1152 : : builtin-tokens. That is, return TRUE if LOC is a (possibly
1153 : : virtual) location of a built-in token that appears in the expansion
1154 : : list of a macro. Please note that this function also works on
1155 : : tokens that result from built-in tokens. For instance, the
1156 : : function would return true if passed a token "4" that is the result
1157 : : of the expansion of the built-in __LINE__ macro. */
1158 : : bool
1159 : 11833 : is_location_from_builtin_token (location_t loc)
1160 : : {
1161 : 11833 : const line_map_ordinary *map = NULL;
1162 : 11833 : loc = linemap_resolve_location (line_table, loc,
1163 : : LRK_SPELLING_LOCATION, &map);
1164 : 11833 : return loc == BUILTINS_LOCATION;
1165 : : }
1166 : :
1167 : : /* Expand the source location LOC into a human readable location. If
1168 : : LOC is virtual, it resolves to the expansion point of the involved
1169 : : macro. If LOC resolves to a builtin location, the file name of the
1170 : : readable location is set to the string "<built-in>". */
1171 : :
1172 : : expanded_location
1173 : 806378991 : expand_location (location_t loc)
1174 : : {
1175 : 806378991 : return expand_location_1 (line_table, loc, /*expansion_point_p=*/true,
1176 : 806378991 : LOCATION_ASPECT_CARET);
1177 : : }
1178 : :
1179 : : /* Expand the source location LOC into a human readable location. If
1180 : : LOC is virtual, it resolves to the expansion location of the
1181 : : relevant macro. If LOC resolves to a builtin location, the file
1182 : : name of the readable location is set to the string
1183 : : "<built-in>". */
1184 : :
1185 : : expanded_location
1186 : 84305 : expand_location_to_spelling_point (location_t loc,
1187 : : enum location_aspect aspect)
1188 : : {
1189 : 84305 : return expand_location_1 (line_table, loc, /*expansion_point_p=*/false,
1190 : 84305 : aspect);
1191 : : }
1192 : :
1193 : : /* The rich_location class within libcpp requires a way to expand
1194 : : location_t instances, and relies on the client code
1195 : : providing a symbol named
1196 : : linemap_client_expand_location_to_spelling_point
1197 : : to do this.
1198 : :
1199 : : This is the implementation for libcommon.a (all host binaries),
1200 : : which simply calls into expand_location_1. */
1201 : :
1202 : : expanded_location
1203 : 1921292 : linemap_client_expand_location_to_spelling_point (const line_maps *set,
1204 : : location_t loc,
1205 : : enum location_aspect aspect)
1206 : : {
1207 : 1921292 : return expand_location_1 (set, loc, /*expansion_point_p=*/false, aspect);
1208 : : }
1209 : :
1210 : :
1211 : : /* If LOCATION is in a system header and if it is a virtual location
1212 : : for a token coming from the expansion of a macro, unwind it to
1213 : : the location of the expansion point of the macro. If the expansion
1214 : : point is also in a system header return the original LOCATION.
1215 : : Otherwise, return the location of the expansion point.
1216 : :
1217 : : This is used for instance when we want to emit diagnostics about a
1218 : : token that may be located in a macro that is itself defined in a
1219 : : system header, for example, for the NULL macro. In such a case, if
1220 : : LOCATION were passed directly to diagnostic functions such as
1221 : : warning_at, the diagnostic would be suppressed (unless
1222 : : -Wsystem-headers). */
1223 : :
1224 : : location_t
1225 : 451739060 : expansion_point_location_if_in_system_header (location_t location)
1226 : : {
1227 : 451739060 : if (!in_system_header_at (location))
1228 : : return location;
1229 : :
1230 : 382602983 : location_t xloc = linemap_resolve_location (line_table, location,
1231 : : LRK_MACRO_EXPANSION_POINT,
1232 : : NULL);
1233 : 382602983 : return in_system_header_at (xloc) ? location : xloc;
1234 : : }
1235 : :
1236 : : /* If LOCATION is a virtual location for a token coming from the expansion
1237 : : of a macro, unwind to the location of the expansion point of the macro. */
1238 : :
1239 : : location_t
1240 : 197 : expansion_point_location (location_t location)
1241 : : {
1242 : 197 : return linemap_resolve_location (line_table, location,
1243 : 197 : LRK_MACRO_EXPANSION_POINT, NULL);
1244 : : }
1245 : :
1246 : : /* Construct a location with caret at CARET, ranging from START to
1247 : : FINISH.
1248 : :
1249 : : For example, consider:
1250 : :
1251 : : 11111111112
1252 : : 12345678901234567890
1253 : : 522
1254 : : 523 return foo + bar;
1255 : : ~~~~^~~~~
1256 : : 524
1257 : :
1258 : : The location's caret is at the "+", line 523 column 15, but starts
1259 : : earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1260 : : of "bar" at column 19. */
1261 : :
1262 : : location_t
1263 : 2225605323 : make_location (location_t caret, location_t start, location_t finish)
1264 : : {
1265 : 2225605323 : return line_table->make_location (caret, start, finish);
1266 : : }
1267 : :
1268 : : /* Same as above, but taking a source range rather than two locations. */
1269 : :
1270 : : location_t
1271 : 1340997490 : make_location (location_t caret, source_range src_range)
1272 : : {
1273 : 1340997490 : location_t pure_loc = get_pure_location (caret);
1274 : 1340997490 : return line_table->get_or_create_combined_loc (pure_loc, src_range,
1275 : 1340997490 : nullptr, 0);
1276 : : }
1277 : :
1278 : : /* An expanded_location stores the column in byte units. This function
1279 : : converts that column to display units. That requires reading the associated
1280 : : source line in order to calculate the display width. If that cannot be done
1281 : : for any reason, then returns the byte column as a fallback. */
1282 : : int
1283 : 721222 : location_compute_display_column (file_cache &fc,
1284 : : expanded_location exploc,
1285 : : const cpp_char_column_policy &policy)
1286 : : {
1287 : 721222 : if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1288 : : return exploc.column;
1289 : 685867 : char_span line = fc.get_source_line (exploc.file, exploc.line);
1290 : : /* If line is NULL, this function returns exploc.column which is the
1291 : : desired fallback. */
1292 : 685867 : return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
1293 : 685867 : exploc.column, policy);
1294 : : }
1295 : :
1296 : : /* Dump statistics to stderr about the memory usage of the line_table
1297 : : set of line maps. This also displays some statistics about macro
1298 : : expansion. */
1299 : :
1300 : : void
1301 : 0 : dump_line_table_statistics (void)
1302 : : {
1303 : 0 : struct linemap_stats s;
1304 : 0 : long total_used_map_size,
1305 : : macro_maps_size,
1306 : : total_allocated_map_size;
1307 : :
1308 : 0 : memset (&s, 0, sizeof (s));
1309 : :
1310 : 0 : linemap_get_statistics (line_table, &s);
1311 : :
1312 : 0 : macro_maps_size = s.macro_maps_used_size
1313 : 0 : + s.macro_maps_locations_size;
1314 : :
1315 : 0 : total_allocated_map_size = s.ordinary_maps_allocated_size
1316 : 0 : + s.macro_maps_allocated_size
1317 : : + s.macro_maps_locations_size;
1318 : :
1319 : 0 : total_used_map_size = s.ordinary_maps_used_size
1320 : 0 : + s.macro_maps_used_size
1321 : : + s.macro_maps_locations_size;
1322 : :
1323 : 0 : fprintf (stderr, "Number of expanded macros: %5ld\n",
1324 : : s.num_expanded_macros);
1325 : 0 : if (s.num_expanded_macros != 0)
1326 : 0 : fprintf (stderr, "Average number of tokens per macro expansion: %5ld\n",
1327 : 0 : s.num_macro_tokens / s.num_expanded_macros);
1328 : 0 : fprintf (stderr,
1329 : : "\nLine Table allocations during the "
1330 : : "compilation process\n");
1331 : 0 : fprintf (stderr, "Number of ordinary maps used: " PRsa (5) "\n",
1332 : 0 : SIZE_AMOUNT (s.num_ordinary_maps_used));
1333 : 0 : fprintf (stderr, "Ordinary map used size: " PRsa (5) "\n",
1334 : 0 : SIZE_AMOUNT (s.ordinary_maps_used_size));
1335 : 0 : fprintf (stderr, "Number of ordinary maps allocated: " PRsa (5) "\n",
1336 : 0 : SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1337 : 0 : fprintf (stderr, "Ordinary maps allocated size: " PRsa (5) "\n",
1338 : 0 : SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1339 : 0 : fprintf (stderr, "Number of macro maps used: " PRsa (5) "\n",
1340 : 0 : SIZE_AMOUNT (s.num_macro_maps_used));
1341 : 0 : fprintf (stderr, "Macro maps used size: " PRsa (5) "\n",
1342 : 0 : SIZE_AMOUNT (s.macro_maps_used_size));
1343 : 0 : fprintf (stderr, "Macro maps locations size: " PRsa (5) "\n",
1344 : 0 : SIZE_AMOUNT (s.macro_maps_locations_size));
1345 : 0 : fprintf (stderr, "Macro maps size: " PRsa (5) "\n",
1346 : 0 : SIZE_AMOUNT (macro_maps_size));
1347 : 0 : fprintf (stderr, "Duplicated maps locations size: " PRsa (5) "\n",
1348 : 0 : SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1349 : 0 : fprintf (stderr, "Total allocated maps size: " PRsa (5) "\n",
1350 : 0 : SIZE_AMOUNT (total_allocated_map_size));
1351 : 0 : fprintf (stderr, "Total used maps size: " PRsa (5) "\n",
1352 : 0 : SIZE_AMOUNT (total_used_map_size));
1353 : 0 : fprintf (stderr, "Ad-hoc table size: " PRsa (5) "\n",
1354 : 0 : SIZE_AMOUNT (s.adhoc_table_size));
1355 : 0 : fprintf (stderr, "Ad-hoc table entries used: " PRsa (5) "\n",
1356 : 0 : SIZE_AMOUNT (s.adhoc_table_entries_used));
1357 : 0 : fprintf (stderr, "optimized_ranges: " PRsa (5) "\n",
1358 : 0 : SIZE_AMOUNT (line_table->m_num_optimized_ranges));
1359 : 0 : fprintf (stderr, "unoptimized_ranges: " PRsa (5) "\n",
1360 : 0 : SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
1361 : :
1362 : 0 : fprintf (stderr, "\n");
1363 : 0 : }
1364 : :
1365 : : /* Get location one beyond the final location in ordinary map IDX. */
1366 : :
1367 : : static location_t
1368 : 0 : get_end_location (class line_maps *set, line_map_uint_t idx)
1369 : : {
1370 : 0 : if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1371 : 0 : return set->highest_location;
1372 : :
1373 : 0 : struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1374 : 0 : return MAP_START_LOCATION (next_map);
1375 : : }
1376 : :
1377 : : /* Helper function for write_digit_row. */
1378 : :
1379 : : static void
1380 : 0 : write_digit (FILE *stream, int digit)
1381 : : {
1382 : 0 : fputc ('0' + (digit % 10), stream);
1383 : 0 : }
1384 : :
1385 : : /* Helper function for dump_location_info.
1386 : : Write a row of numbers to STREAM, numbering a source line,
1387 : : giving the units, tens, hundreds etc of the column number. */
1388 : :
1389 : : static void
1390 : 0 : write_digit_row (FILE *stream, int indent,
1391 : : const line_map_ordinary *map,
1392 : : location_t loc, int max_col, int divisor)
1393 : : {
1394 : 0 : fprintf (stream, "%*c", indent, ' ');
1395 : 0 : fprintf (stream, "|");
1396 : 0 : for (int column = 1; column < max_col; column++)
1397 : : {
1398 : 0 : location_t column_loc = loc + (location_t (column) << map->m_range_bits);
1399 : 0 : write_digit (stream, column_loc / divisor);
1400 : : }
1401 : 0 : fprintf (stream, "\n");
1402 : 0 : }
1403 : :
1404 : : /* Write a half-closed (START) / half-open (END) interval of
1405 : : location_t to STREAM. */
1406 : :
1407 : : static void
1408 : 0 : dump_location_range (FILE *stream,
1409 : : location_t start, location_t end)
1410 : : {
1411 : 0 : fprintf (stream,
1412 : : " location_t interval: %llu <= loc < %llu\n",
1413 : : (unsigned long long) start, (unsigned long long) end);
1414 : 0 : }
1415 : :
1416 : : /* Write a labelled description of a half-closed (START) / half-open (END)
1417 : : interval of location_t to STREAM. */
1418 : :
1419 : : static void
1420 : 0 : dump_labelled_location_range (FILE *stream,
1421 : : const char *name,
1422 : : location_t start, location_t end)
1423 : : {
1424 : 0 : fprintf (stream, "%s\n", name);
1425 : 0 : dump_location_range (stream, start, end);
1426 : 0 : fprintf (stream, "\n");
1427 : 0 : }
1428 : :
1429 : : /* Write a visualization of the locations in the line_table to STREAM. */
1430 : :
1431 : : void
1432 : 0 : dump_location_info (FILE *stream)
1433 : : {
1434 : 0 : file_cache fc;
1435 : :
1436 : : /* Visualize the reserved locations. */
1437 : 0 : dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1438 : : 0, RESERVED_LOCATION_COUNT);
1439 : :
1440 : 0 : using ULL = unsigned long long;
1441 : :
1442 : : /* Visualize the ordinary line_map instances, rendering the sources. */
1443 : 0 : for (line_map_uint_t idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table);
1444 : : idx++)
1445 : : {
1446 : 0 : location_t end_location = get_end_location (line_table, idx);
1447 : : /* half-closed: doesn't include this one. */
1448 : :
1449 : 0 : const line_map_ordinary *map
1450 : 0 : = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1451 : 0 : fprintf (stream, "ORDINARY MAP: %llu\n", (ULL) idx);
1452 : 0 : dump_location_range (stream,
1453 : : MAP_START_LOCATION (map), end_location);
1454 : 0 : fprintf (stream, " file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1455 : 0 : fprintf (stream, " starting at line: %i\n",
1456 : : ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1457 : 0 : fprintf (stream, " column and range bits: %i\n",
1458 : 0 : map->m_column_and_range_bits);
1459 : 0 : fprintf (stream, " column bits: %i\n",
1460 : 0 : map->m_column_and_range_bits - map->m_range_bits);
1461 : 0 : fprintf (stream, " range bits: %i\n",
1462 : 0 : map->m_range_bits);
1463 : 0 : const char * reason;
1464 : 0 : switch (map->reason) {
1465 : : case LC_ENTER:
1466 : : reason = "LC_ENTER";
1467 : : break;
1468 : 0 : case LC_LEAVE:
1469 : 0 : reason = "LC_LEAVE";
1470 : 0 : break;
1471 : 0 : case LC_RENAME:
1472 : 0 : reason = "LC_RENAME";
1473 : 0 : break;
1474 : 0 : case LC_RENAME_VERBATIM:
1475 : 0 : reason = "LC_RENAME_VERBATIM";
1476 : 0 : break;
1477 : 0 : case LC_ENTER_MACRO:
1478 : 0 : reason = "LC_RENAME_MACRO";
1479 : 0 : break;
1480 : 0 : default:
1481 : 0 : reason = "Unknown";
1482 : : }
1483 : 0 : fprintf (stream, " reason: %d (%s)\n", map->reason, reason);
1484 : :
1485 : 0 : const line_map_ordinary *includer_map
1486 : 0 : = linemap_included_from_linemap (line_table, map);
1487 : 0 : fprintf (stream, " included from location: %llu",
1488 : 0 : (ULL) linemap_included_from (map));
1489 : 0 : if (includer_map) {
1490 : 0 : fprintf (stream, " (in ordinary map %llu)",
1491 : 0 : ULL (includer_map - line_table->info_ordinary.maps));
1492 : : }
1493 : 0 : fprintf (stream, "\n");
1494 : :
1495 : : /* Render the span of source lines that this "map" covers. */
1496 : 0 : for (location_t loc = MAP_START_LOCATION (map);
1497 : 0 : loc < end_location;
1498 : 0 : loc += (location_t (1) << map->m_range_bits))
1499 : : {
1500 : 0 : gcc_assert (pure_location_p (line_table, loc) );
1501 : :
1502 : 0 : expanded_location exploc
1503 : 0 : = linemap_expand_location (line_table, map, loc);
1504 : :
1505 : 0 : if (exploc.column == 0)
1506 : : {
1507 : : /* Beginning of a new source line: draw the line. */
1508 : :
1509 : 0 : char_span line_text = fc.get_source_line (exploc.file,
1510 : : exploc.line);
1511 : 0 : if (!line_text)
1512 : : break;
1513 : 0 : fprintf (stream,
1514 : : "%s:%3i|loc:%5llu|%.*s\n",
1515 : : exploc.file, exploc.line,
1516 : : (ULL) loc,
1517 : 0 : (int)line_text.length (), line_text.get_buffer ());
1518 : :
1519 : : /* "loc" is at column 0, which means "the whole line".
1520 : : Render the locations *within* the line, by underlining
1521 : : it, showing the location_t numeric values
1522 : : at each column. */
1523 : 0 : auto max_col = (ULL (1) << map->m_column_and_range_bits) - 1;
1524 : 0 : if (max_col > line_text.length ())
1525 : 0 : max_col = line_text.length () + 1;
1526 : :
1527 : 0 : int len_lnum = num_digits (exploc.line);
1528 : 0 : if (len_lnum < 3)
1529 : : len_lnum = 3;
1530 : 0 : int len_loc = num_digits (loc);
1531 : 0 : if (len_loc < 5)
1532 : : len_loc = 5;
1533 : :
1534 : 0 : int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1535 : :
1536 : : /* Thousands. */
1537 : 0 : if (end_location > 999)
1538 : 0 : write_digit_row (stream, indent, map, loc, max_col, 1000);
1539 : :
1540 : : /* Hundreds. */
1541 : 0 : if (end_location > 99)
1542 : 0 : write_digit_row (stream, indent, map, loc, max_col, 100);
1543 : :
1544 : : /* Tens. */
1545 : 0 : write_digit_row (stream, indent, map, loc, max_col, 10);
1546 : :
1547 : : /* Units. */
1548 : 0 : write_digit_row (stream, indent, map, loc, max_col, 1);
1549 : : }
1550 : : }
1551 : 0 : fprintf (stream, "\n");
1552 : : }
1553 : :
1554 : : /* Visualize unallocated values. */
1555 : 0 : dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1556 : : line_table->highest_location,
1557 : : LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1558 : :
1559 : : /* Visualize the macro line_map instances, rendering the sources. */
1560 : 0 : for (line_map_uint_t i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1561 : : {
1562 : : /* Each macro map that is allocated owns location_t values
1563 : : that are *lower* that the one before them.
1564 : : Hence it's meaningful to view them either in order of ascending
1565 : : source locations, or in order of ascending macro map index. */
1566 : 0 : const bool ascending_location_ts = true;
1567 : 0 : auto idx = (ascending_location_ts
1568 : 0 : ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1569 : 0 : : i);
1570 : 0 : const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1571 : 0 : fprintf (stream, "MACRO %llu: %s (%u tokens)\n",
1572 : : (ULL) idx,
1573 : : linemap_map_get_macro_name (map),
1574 : : MACRO_MAP_NUM_MACRO_TOKENS (map));
1575 : 0 : dump_location_range (stream,
1576 : 0 : map->start_location,
1577 : 0 : (map->start_location
1578 : 0 : + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1579 : 0 : inform (map->get_expansion_point_location (),
1580 : : "expansion point is location %llu",
1581 : 0 : (ULL) map->get_expansion_point_location ());
1582 : 0 : fprintf (stream, " map->start_location: %llu\n",
1583 : 0 : (ULL) map->start_location);
1584 : :
1585 : 0 : fprintf (stream, " macro_locations:\n");
1586 : 0 : for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1587 : : {
1588 : 0 : location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1589 : 0 : location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1590 : :
1591 : : /* linemap_add_macro_token encodes token numbers in an expansion
1592 : : by putting them after MAP_START_LOCATION. */
1593 : :
1594 : : /* I'm typically seeing 4 uninitialized entries at the end of
1595 : : 0xafafafaf.
1596 : : This appears to be due to macro.cc:replace_args
1597 : : adding 2 extra args for padding tokens; presumably there may
1598 : : be a leading and/or trailing padding token injected,
1599 : : each for 2 more location slots.
1600 : : This would explain there being up to 4 location_ts slots
1601 : : that may be uninitialized. */
1602 : :
1603 : 0 : fprintf (stream, " %u: %llu, %llu\n",
1604 : : i,
1605 : : (ULL) x,
1606 : : (ULL) y);
1607 : 0 : if (x == y)
1608 : : {
1609 : 0 : if (x < MAP_START_LOCATION (map))
1610 : 0 : inform (x, "token %u has %<x-location == y-location == %llu%>",
1611 : : i, (ULL) x);
1612 : : else
1613 : 0 : fprintf (stream,
1614 : : "x-location == y-location == %llu"
1615 : : " encodes token # %u\n",
1616 : : (ULL) x,
1617 : 0 : (unsigned int)(x - MAP_START_LOCATION (map)));
1618 : : }
1619 : : else
1620 : : {
1621 : 0 : inform (x, "token %u has %<x-location == %llu%>", i, (ULL) x);
1622 : 0 : inform (x, "token %u has %<y-location == %llu%>", i, (ULL) y);
1623 : : }
1624 : : }
1625 : 0 : fprintf (stream, "\n");
1626 : : }
1627 : :
1628 : : /* It appears that MAX_LOCATION_T itself is never assigned to a
1629 : : macro map, presumably due to an off-by-one error somewhere
1630 : : between the logic in linemap_enter_macro and
1631 : : LINEMAPS_MACRO_LOWEST_LOCATION. */
1632 : 0 : dump_labelled_location_range (stream, "MAX_LOCATION_T",
1633 : : MAX_LOCATION_T,
1634 : : MAX_LOCATION_T + 1);
1635 : :
1636 : : /* Visualize ad-hoc values. */
1637 : 0 : dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1638 : : MAX_LOCATION_T + 1, location_t (-1));
1639 : 0 : }
1640 : :
1641 : : /* string_concat's constructor. */
1642 : :
1643 : 2820317 : string_concat::string_concat (int num, location_t *locs)
1644 : 2820317 : : m_num (num)
1645 : : {
1646 : 2820317 : m_locs = ggc_vec_alloc <location_t> (num);
1647 : 35795767 : for (int i = 0; i < num; i++)
1648 : 32975450 : m_locs[i] = locs[i];
1649 : 2820317 : }
1650 : :
1651 : : /* string_concat_db's constructor. */
1652 : :
1653 : 206774 : string_concat_db::string_concat_db ()
1654 : : {
1655 : 206774 : m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1656 : 206774 : }
1657 : :
1658 : : /* Record that a string concatenation occurred, covering NUM
1659 : : string literal tokens. LOCS is an array of size NUM, containing the
1660 : : locations of the tokens. A copy of LOCS is taken. */
1661 : :
1662 : : void
1663 : 2820323 : string_concat_db::record_string_concatenation (int num, location_t *locs)
1664 : : {
1665 : 2820323 : gcc_assert (num > 1);
1666 : 2820323 : gcc_assert (locs);
1667 : :
1668 : 2820323 : location_t key_loc = get_key_loc (locs[0]);
1669 : : /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
1670 : : any data now recorded under key 'key_loc' would be overwritten by a
1671 : : subsequent call with the same key 'key_loc'. */
1672 : 2820323 : if (RESERVED_LOCATION_P (key_loc))
1673 : 6 : return;
1674 : :
1675 : 2820317 : string_concat *concat
1676 : 2820317 : = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1677 : 2820317 : m_table->put (key_loc, concat);
1678 : : }
1679 : :
1680 : : /* Determine if LOC was the location of the initial token of a
1681 : : concatenation of string literal tokens.
1682 : : If so, *OUT_NUM is written to with the number of tokens, and
1683 : : *OUT_LOCS with the location of an array of locations of the
1684 : : tokens, and return true. *OUT_LOCS is a borrowed pointer to
1685 : : storage owned by the string_concat_db.
1686 : : Otherwise, return false. */
1687 : :
1688 : : bool
1689 : 34628 : string_concat_db::get_string_concatenation (location_t loc,
1690 : : int *out_num,
1691 : : location_t **out_locs)
1692 : : {
1693 : 34628 : gcc_assert (out_num);
1694 : 34628 : gcc_assert (out_locs);
1695 : :
1696 : 34628 : location_t key_loc = get_key_loc (loc);
1697 : : /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
1698 : : discussion in 'string_concat_db::record_string_concatenation'. */
1699 : 34628 : if (RESERVED_LOCATION_P (key_loc))
1700 : : return false;
1701 : :
1702 : 34626 : string_concat **concat = m_table->get (key_loc);
1703 : 34626 : if (!concat)
1704 : : return false;
1705 : :
1706 : 4365 : *out_num = (*concat)->m_num;
1707 : 4365 : *out_locs =(*concat)->m_locs;
1708 : 4365 : return true;
1709 : : }
1710 : :
1711 : : /* Internal function. Canonicalize LOC into a form suitable for
1712 : : use as a key within the database, stripping away macro expansion,
1713 : : ad-hoc information, and range information, using the location of
1714 : : the start of LOC within an ordinary linemap. */
1715 : :
1716 : : location_t
1717 : 2854951 : string_concat_db::get_key_loc (location_t loc)
1718 : : {
1719 : 2854951 : loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1720 : : NULL);
1721 : :
1722 : 2854951 : loc = get_range_from_loc (line_table, loc).m_start;
1723 : :
1724 : 2854951 : return loc;
1725 : : }
1726 : :
1727 : : /* Helper class for use within get_substring_ranges_for_loc.
1728 : : An vec of cpp_string with responsibility for releasing all of the
1729 : : str->text for each str in the vector. */
1730 : :
1731 : : class auto_cpp_string_vec : public auto_vec <cpp_string>
1732 : : {
1733 : : public:
1734 : 34628 : auto_cpp_string_vec (int alloc)
1735 : 69256 : : auto_vec <cpp_string> (alloc) {}
1736 : :
1737 : 34628 : ~auto_cpp_string_vec ()
1738 : : {
1739 : : /* Clean up the copies within this vec. */
1740 : 34628 : int i;
1741 : 34628 : cpp_string *str;
1742 : 70183 : FOR_EACH_VEC_ELT (*this, i, str)
1743 : 35555 : free (const_cast <unsigned char *> (str->text));
1744 : 34628 : }
1745 : : };
1746 : :
1747 : : /* Attempt to populate RANGES with source location information on the
1748 : : individual characters within the string literal found at STRLOC.
1749 : : If CONCATS is non-NULL, then any string literals that the token at
1750 : : STRLOC was concatenated with are also added to RANGES.
1751 : :
1752 : : Return NULL if successful, or an error message if any errors occurred (in
1753 : : which case RANGES may be only partially populated and should not
1754 : : be used).
1755 : :
1756 : : This is implemented by re-parsing the relevant source line(s). */
1757 : :
1758 : : static const char *
1759 : 36874 : get_substring_ranges_for_loc (cpp_reader *pfile,
1760 : : file_cache &fc,
1761 : : string_concat_db *concats,
1762 : : location_t strloc,
1763 : : enum cpp_ttype type,
1764 : : cpp_substring_ranges &ranges)
1765 : : {
1766 : 36874 : gcc_assert (pfile);
1767 : :
1768 : 36874 : if (strloc == UNKNOWN_LOCATION)
1769 : : return "unknown location";
1770 : :
1771 : : /* Reparsing the strings requires accurate location information.
1772 : : If -ftrack-macro-expansion has been overridden from its default
1773 : : of 2, then we might have a location of a macro expansion point,
1774 : : rather than the location of the literal itself.
1775 : : Avoid this by requiring that we have full macro expansion tracking
1776 : : for substring locations to be available. */
1777 : 36874 : if (cpp_get_options (pfile)->track_macro_expansion != 2)
1778 : : return "track_macro_expansion != 2";
1779 : :
1780 : : /* If #line or # 44 "file"-style directives are present, then there's
1781 : : no guarantee that the line numbers we have can be used to locate
1782 : : the strings. For example, we might have a .i file with # directives
1783 : : pointing back to lines within a .c file, but the .c file might
1784 : : have been edited since the .i file was created.
1785 : : In such a case, the safest course is to disable on-demand substring
1786 : : locations. */
1787 : 34631 : if (line_table->seen_line_directive)
1788 : : return "seen line directive";
1789 : :
1790 : : /* If string concatenation has occurred at STRLOC, get the locations
1791 : : of all of the literal tokens making up the compound string.
1792 : : Otherwise, just use STRLOC. */
1793 : 34628 : int num_locs = 1;
1794 : 34628 : location_t *strlocs = &strloc;
1795 : 34628 : if (concats)
1796 : 34628 : concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1797 : :
1798 : 34628 : auto_cpp_string_vec strs (num_locs);
1799 : 34628 : auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1800 : 70176 : for (int i = 0; i < num_locs; i++)
1801 : : {
1802 : : /* Get range of strloc. We will use it to locate the start and finish
1803 : : of the literal token within the line. */
1804 : 41650 : source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1805 : :
1806 : 41650 : if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1807 : : {
1808 : : /* If the string token was within a macro expansion, then we can
1809 : : cope with it for the simple case where we have a single token.
1810 : : Otherwise, bail out. */
1811 : 1147 : if (src_range.m_start != src_range.m_finish)
1812 : 6102 : return "macro expansion";
1813 : : }
1814 : : else
1815 : : {
1816 : 40503 : if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1817 : : /* If so, we can't reliably determine where the token started within
1818 : : its line. */
1819 : : return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1820 : :
1821 : 34831 : if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1822 : : /* If so, we can't reliably determine where the token finished
1823 : : within its line. */
1824 : : return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1825 : : }
1826 : :
1827 : 35807 : expanded_location start
1828 : 35807 : = expand_location_to_spelling_point (src_range.m_start,
1829 : : LOCATION_ASPECT_START);
1830 : 35807 : expanded_location finish
1831 : 35807 : = expand_location_to_spelling_point (src_range.m_finish,
1832 : : LOCATION_ASPECT_FINISH);
1833 : 35807 : if (start.file != finish.file)
1834 : : return "range endpoints are in different files";
1835 : 35807 : if (start.line != finish.line)
1836 : : return "range endpoints are on different lines";
1837 : 35556 : if (start.column > finish.column)
1838 : : return "range endpoints are reversed";
1839 : :
1840 : 35556 : char_span line = fc.get_source_line (start.file, start.line);
1841 : 35556 : if (!line)
1842 : : return "unable to read source line";
1843 : :
1844 : : /* Determine the location of the literal (including quotes
1845 : : and leading prefix chars, such as the 'u' in a u""
1846 : : token). */
1847 : 35556 : size_t literal_length = finish.column - start.column + 1;
1848 : :
1849 : : /* Ensure that we don't crash if we got the wrong location. */
1850 : 35556 : if (start.column < 1)
1851 : : return "zero start column";
1852 : 35556 : if (line.length () < (start.column - 1 + literal_length))
1853 : : return "line is not wide enough";
1854 : :
1855 : 35555 : char_span literal = line.subspan (start.column - 1, literal_length);
1856 : :
1857 : 35555 : cpp_string from;
1858 : 35555 : from.len = literal_length;
1859 : : /* Make a copy of the literal, to avoid having to rely on
1860 : : the lifetime of the copy of the line within the cache.
1861 : : This will be released by the auto_cpp_string_vec dtor. */
1862 : 35555 : from.text = (unsigned char *)literal.xstrdup ();
1863 : 35555 : strs.safe_push (from);
1864 : :
1865 : : /* For very long lines, a new linemap could have started
1866 : : halfway through the token.
1867 : : Ensure that the loc_reader uses the linemap of the
1868 : : *end* of the token for its start location. */
1869 : 35555 : const line_map_ordinary *start_ord_map;
1870 : 35555 : linemap_resolve_location (line_table, src_range.m_start,
1871 : : LRK_SPELLING_LOCATION, &start_ord_map);
1872 : 35555 : const line_map_ordinary *final_ord_map;
1873 : 35555 : linemap_resolve_location (line_table, src_range.m_finish,
1874 : : LRK_SPELLING_LOCATION, &final_ord_map);
1875 : 35555 : if (start_ord_map == NULL || final_ord_map == NULL)
1876 : : return "failed to get ordinary maps";
1877 : : /* Bulletproofing. We ought to only have different ordinary maps
1878 : : for start vs finish due to line-length jumps. */
1879 : 35554 : if (start_ord_map != final_ord_map
1880 : 6865 : && start_ord_map->to_file != final_ord_map->to_file)
1881 : : return "start and finish are spelled in different ordinary maps";
1882 : : /* The file from linemap_resolve_location ought to match that from
1883 : : expand_location_to_spelling_point. */
1884 : 35554 : if (start_ord_map->to_file != start.file)
1885 : : return "mismatching file after resolving linemap";
1886 : :
1887 : 35548 : location_t start_loc
1888 : 35548 : = linemap_position_for_line_and_column (line_table, final_ord_map,
1889 : : start.line, start.column);
1890 : :
1891 : 35548 : cpp_string_location_reader loc_reader (start_loc, line_table);
1892 : 35548 : loc_readers.safe_push (loc_reader);
1893 : : }
1894 : :
1895 : : /* Rerun cpp_interpret_string, or rather, a modified version of it. */
1896 : 57052 : const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1897 : : loc_readers.address (),
1898 : : num_locs, &ranges, type);
1899 : 28526 : if (err)
1900 : : return err;
1901 : :
1902 : : /* Success: "ranges" should now contain information on the string. */
1903 : : return NULL;
1904 : 34628 : }
1905 : :
1906 : : /* Attempt to populate *OUT_LOC with source location information on the
1907 : : given characters within the string literal found at STRLOC.
1908 : : CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1909 : : character set.
1910 : :
1911 : : For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1912 : : and string literal "012345\n789"
1913 : : *OUT_LOC is written to with:
1914 : : "012345\n789"
1915 : : ~^~~~~
1916 : :
1917 : : If CONCATS is non-NULL, then any string literals that the token at
1918 : : STRLOC was concatenated with are also considered.
1919 : :
1920 : : This is implemented by re-parsing the relevant source line(s).
1921 : :
1922 : : Return NULL if successful, or an error message if any errors occurred.
1923 : : Error messages are intended for GCC developers (to help debugging) rather
1924 : : than for end-users. */
1925 : :
1926 : : const char *
1927 : 11222 : get_location_within_string (cpp_reader *pfile,
1928 : : file_cache &fc,
1929 : : string_concat_db *concats,
1930 : : location_t strloc,
1931 : : enum cpp_ttype type,
1932 : : int caret_idx, int start_idx, int end_idx,
1933 : : location_t *out_loc)
1934 : : {
1935 : 11222 : gcc_checking_assert (caret_idx >= 0);
1936 : 11222 : gcc_checking_assert (start_idx >= 0);
1937 : 11222 : gcc_checking_assert (end_idx >= 0);
1938 : 11222 : gcc_assert (out_loc);
1939 : :
1940 : 11222 : cpp_substring_ranges ranges;
1941 : 11222 : const char *err
1942 : 11222 : = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
1943 : 11222 : if (err)
1944 : : return err;
1945 : :
1946 : 8509 : if (caret_idx >= ranges.get_num_ranges ())
1947 : : return "caret_idx out of range";
1948 : 8509 : if (start_idx >= ranges.get_num_ranges ())
1949 : : return "start_idx out of range";
1950 : 8509 : if (end_idx >= ranges.get_num_ranges ())
1951 : : return "end_idx out of range";
1952 : :
1953 : 8509 : *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1954 : 8509 : ranges.get_range (start_idx).m_start,
1955 : 8509 : ranges.get_range (end_idx).m_finish);
1956 : 8509 : return NULL;
1957 : 11222 : }
1958 : :
1959 : : /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
1960 : :
1961 : : location_t
1962 : 40334939 : location_with_discriminator (location_t locus, int discriminator)
1963 : : {
1964 : 40334939 : tree block = LOCATION_BLOCK (locus);
1965 : 40334939 : source_range src_range = get_range_from_loc (line_table, locus);
1966 : 40334939 : locus = get_pure_location (locus);
1967 : :
1968 : 40334939 : if (locus == UNKNOWN_LOCATION)
1969 : : return locus;
1970 : :
1971 : 39775442 : return line_table->get_or_create_combined_loc (locus, src_range, block,
1972 : 39775442 : discriminator);
1973 : : }
1974 : :
1975 : : /* Return TRUE if LOCUS represents a location with a discriminator. */
1976 : :
1977 : : bool
1978 : 14795135 : has_discriminator (location_t locus)
1979 : : {
1980 : 14795135 : return get_discriminator_from_loc (locus) != 0;
1981 : : }
1982 : :
1983 : : /* Return the discriminator for LOCUS. */
1984 : :
1985 : : int
1986 : 317468315 : get_discriminator_from_loc (location_t locus)
1987 : : {
1988 : 317468315 : return get_discriminator_from_loc (line_table, locus);
1989 : : }
1990 : :
1991 : : #if CHECKING_P
1992 : :
1993 : : namespace selftest {
1994 : :
1995 : : /* Selftests of location handling. */
1996 : :
1997 : : /* Attempt to populate *OUT_RANGE with source location information on the
1998 : : given character within the string literal found at STRLOC.
1999 : : CHAR_IDX refers to an offset within the execution character set.
2000 : : If CONCATS is non-NULL, then any string literals that the token at
2001 : : STRLOC was concatenated with are also considered.
2002 : :
2003 : : This is implemented by re-parsing the relevant source line(s).
2004 : :
2005 : : Return NULL if successful, or an error message if any errors occurred.
2006 : : Error messages are intended for GCC developers (to help debugging) rather
2007 : : than for end-users. */
2008 : :
2009 : : static const char *
2010 : 23748 : get_source_range_for_char (cpp_reader *pfile,
2011 : : file_cache &fc,
2012 : : string_concat_db *concats,
2013 : : location_t strloc,
2014 : : enum cpp_ttype type,
2015 : : int char_idx,
2016 : : source_range *out_range)
2017 : : {
2018 : 23748 : gcc_checking_assert (char_idx >= 0);
2019 : 23748 : gcc_assert (out_range);
2020 : :
2021 : 23748 : cpp_substring_ranges ranges;
2022 : 23748 : const char *err
2023 : 23748 : = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
2024 : 23748 : if (err)
2025 : : return err;
2026 : :
2027 : 18652 : if (char_idx >= ranges.get_num_ranges ())
2028 : : return "char_idx out of range";
2029 : :
2030 : 18652 : *out_range = ranges.get_range (char_idx);
2031 : 18652 : return NULL;
2032 : 23748 : }
2033 : :
2034 : : /* As get_source_range_for_char, but write to *OUT the number
2035 : : of ranges that are available. */
2036 : :
2037 : : static const char *
2038 : 1268 : get_num_source_ranges_for_substring (cpp_reader *pfile,
2039 : : file_cache &fc,
2040 : : string_concat_db *concats,
2041 : : location_t strloc,
2042 : : enum cpp_ttype type,
2043 : : int *out)
2044 : : {
2045 : 1268 : gcc_assert (out);
2046 : :
2047 : 1268 : cpp_substring_ranges ranges;
2048 : 1268 : const char *err
2049 : 1268 : = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
2050 : :
2051 : 1268 : if (err)
2052 : : return err;
2053 : :
2054 : 884 : *out = ranges.get_num_ranges ();
2055 : 884 : return NULL;
2056 : 1268 : }
2057 : :
2058 : : /* Selftests of location handling. */
2059 : :
2060 : : /* Verify that compare() on linenum_type handles comparisons over the full
2061 : : range of the type. */
2062 : :
2063 : : static void
2064 : 4 : test_linenum_comparisons ()
2065 : : {
2066 : 4 : linenum_type min_line (0);
2067 : 4 : linenum_type max_line (0xffffffff);
2068 : 4 : ASSERT_EQ (0, compare (min_line, min_line));
2069 : 4 : ASSERT_EQ (0, compare (max_line, max_line));
2070 : :
2071 : 4 : ASSERT_GT (compare (max_line, min_line), 0);
2072 : 4 : ASSERT_LT (compare (min_line, max_line), 0);
2073 : 4 : }
2074 : :
2075 : : /* Helper function for verifying location data: when location_t
2076 : : values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
2077 : : as having column 0. */
2078 : :
2079 : : static bool
2080 : 65136 : should_have_column_data_p (location_t loc)
2081 : : {
2082 : 65136 : if (IS_ADHOC_LOC (loc))
2083 : 20240 : loc = get_location_from_adhoc_loc (line_table, loc);
2084 : 65136 : if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
2085 : 6452 : return false;
2086 : : return true;
2087 : : }
2088 : :
2089 : : /* Selftest for should_have_column_data_p. */
2090 : :
2091 : : static void
2092 : 4 : test_should_have_column_data_p ()
2093 : : {
2094 : 4 : ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
2095 : 4 : ASSERT_TRUE
2096 : : (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
2097 : 4 : ASSERT_FALSE
2098 : : (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
2099 : 4 : }
2100 : :
2101 : : /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
2102 : : on LOC. */
2103 : :
2104 : : static void
2105 : 1068 : assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
2106 : : location_t loc)
2107 : : {
2108 : 1068 : ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
2109 : 1068 : ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
2110 : : /* If location_t values are sufficiently high, then column numbers
2111 : : will be unavailable and LOCATION_COLUMN (loc) will be 0.
2112 : : When close to the threshold, column numbers *may* be present: if
2113 : : the final linemap before the threshold contains a line that straddles
2114 : : the threshold, locations in that line have column information. */
2115 : 1068 : if (should_have_column_data_p (loc))
2116 : 660 : ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
2117 : 1068 : }
2118 : :
2119 : : /* Various selftests involve constructing a line table and one or more
2120 : : line maps within it.
2121 : :
2122 : : For maximum test coverage we want to run these tests with a variety
2123 : : of situations:
2124 : : - line_table->default_range_bits: some frontends use a non-zero value
2125 : : and others use zero
2126 : : - the fallback modes within line-map.cc: there are various threshold
2127 : : values for location_t beyond line-map.cc changes
2128 : : behavior (disabling of the range-packing optimization, disabling
2129 : : of column-tracking). We can exercise these by starting the line_table
2130 : : at interesting values at or near these thresholds.
2131 : :
2132 : : The following struct describes a particular case within our test
2133 : : matrix. */
2134 : :
2135 : : class line_table_case
2136 : : {
2137 : : public:
2138 : 5860 : line_table_case (int default_range_bits, location_t base_location)
2139 : 5860 : : m_default_range_bits (default_range_bits),
2140 : 5860 : m_base_location (base_location)
2141 : : {}
2142 : :
2143 : : int m_default_range_bits;
2144 : : location_t m_base_location;
2145 : : };
2146 : :
2147 : : /* Constructor. Store the old value of line_table, and create a new
2148 : : one, using sane defaults. */
2149 : :
2150 : 21 : line_table_test::line_table_test ()
2151 : : {
2152 : 21 : gcc_assert (saved_line_table == NULL);
2153 : 21 : saved_line_table = line_table;
2154 : 21 : line_table = ggc_alloc<line_maps> ();
2155 : 21 : linemap_init (line_table, BUILTINS_LOCATION);
2156 : 21 : gcc_assert (saved_line_table->m_reallocator);
2157 : 21 : line_table->m_reallocator = saved_line_table->m_reallocator;
2158 : 21 : gcc_assert (saved_line_table->m_round_alloc_size);
2159 : 21 : line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2160 : 21 : line_table->default_range_bits = 0;
2161 : 21 : }
2162 : :
2163 : : /* Constructor. Store the old value of line_table, and create a new
2164 : : one, using the sitation described in CASE_. */
2165 : :
2166 : 6724 : line_table_test::line_table_test (const line_table_case &case_)
2167 : : {
2168 : 6724 : gcc_assert (saved_line_table == NULL);
2169 : 6724 : saved_line_table = line_table;
2170 : 6724 : line_table = ggc_alloc<line_maps> ();
2171 : 6724 : linemap_init (line_table, BUILTINS_LOCATION);
2172 : 6724 : gcc_assert (saved_line_table->m_reallocator);
2173 : 6724 : line_table->m_reallocator = saved_line_table->m_reallocator;
2174 : 6724 : gcc_assert (saved_line_table->m_round_alloc_size);
2175 : 6724 : line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2176 : 6724 : line_table->default_range_bits = case_.m_default_range_bits;
2177 : 6724 : if (case_.m_base_location)
2178 : : {
2179 : 6160 : line_table->highest_location = case_.m_base_location;
2180 : 6160 : line_table->highest_line = case_.m_base_location;
2181 : : }
2182 : 6724 : }
2183 : :
2184 : : /* Destructor. Restore the old value of line_table. */
2185 : :
2186 : 6745 : line_table_test::~line_table_test ()
2187 : : {
2188 : 6745 : gcc_assert (saved_line_table != NULL);
2189 : 6745 : line_table = saved_line_table;
2190 : 6745 : saved_line_table = NULL;
2191 : 6745 : }
2192 : :
2193 : : /* Verify basic operation of ordinary linemaps. */
2194 : :
2195 : : static void
2196 : 96 : test_accessing_ordinary_linemaps (const line_table_case &case_)
2197 : : {
2198 : 96 : line_table_test ltt (case_);
2199 : :
2200 : : /* Build a simple linemap describing some locations. */
2201 : 96 : linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
2202 : :
2203 : 96 : linemap_line_start (line_table, 1, 100);
2204 : 96 : location_t loc_a = linemap_position_for_column (line_table, 1);
2205 : 96 : location_t loc_b = linemap_position_for_column (line_table, 23);
2206 : :
2207 : 96 : linemap_line_start (line_table, 2, 100);
2208 : 96 : location_t loc_c = linemap_position_for_column (line_table, 1);
2209 : 96 : location_t loc_d = linemap_position_for_column (line_table, 17);
2210 : :
2211 : : /* Example of a very long line. */
2212 : 96 : linemap_line_start (line_table, 3, 2000);
2213 : 96 : location_t loc_e = linemap_position_for_column (line_table, 700);
2214 : :
2215 : : /* Transitioning back to a short line. */
2216 : 96 : linemap_line_start (line_table, 4, 0);
2217 : 96 : location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
2218 : :
2219 : 96 : if (should_have_column_data_p (loc_back_to_short))
2220 : : {
2221 : : /* Verify that we switched to short lines in the linemap. */
2222 : 56 : line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
2223 : 56 : ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
2224 : : }
2225 : :
2226 : : /* Example of a line that will eventually be seen to be longer
2227 : : than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2228 : : below that. */
2229 : 96 : linemap_line_start (line_table, 5, 2000);
2230 : :
2231 : 96 : location_t loc_start_of_very_long_line
2232 : 96 : = linemap_position_for_column (line_table, 2000);
2233 : 96 : location_t loc_too_wide
2234 : 96 : = linemap_position_for_column (line_table, LINE_MAP_MAX_COLUMN_NUMBER + 1);
2235 : 96 : location_t loc_too_wide_2
2236 : 96 : = linemap_position_for_column (line_table, LINE_MAP_MAX_COLUMN_NUMBER + 2);
2237 : :
2238 : : /* ...and back to a sane line length. */
2239 : 96 : linemap_line_start (line_table, 6, 100);
2240 : 96 : location_t loc_sane_again = linemap_position_for_column (line_table, 10);
2241 : :
2242 : 96 : linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2243 : :
2244 : : /* Multiple files. */
2245 : 96 : linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
2246 : 96 : linemap_line_start (line_table, 1, 200);
2247 : 96 : location_t loc_f = linemap_position_for_column (line_table, 150);
2248 : 96 : linemap_add (line_table, LC_LEAVE, false, NULL, 0);
2249 : :
2250 : : /* Verify that we can recover the location info. */
2251 : 96 : assert_loceq ("foo.c", 1, 1, loc_a);
2252 : 96 : assert_loceq ("foo.c", 1, 23, loc_b);
2253 : 96 : assert_loceq ("foo.c", 2, 1, loc_c);
2254 : 96 : assert_loceq ("foo.c", 2, 17, loc_d);
2255 : 96 : assert_loceq ("foo.c", 3, 700, loc_e);
2256 : 96 : assert_loceq ("foo.c", 4, 100, loc_back_to_short);
2257 : :
2258 : : /* In the very wide line, the initial location should be fully tracked. */
2259 : 96 : assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
2260 : : /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
2261 : : be disabled. */
2262 : 96 : assert_loceq ("foo.c", 5, 0, loc_too_wide);
2263 : 96 : assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
2264 : : /*...and column-tracking should be re-enabled for subsequent lines. */
2265 : 96 : assert_loceq ("foo.c", 6, 10, loc_sane_again);
2266 : :
2267 : 96 : assert_loceq ("bar.c", 1, 150, loc_f);
2268 : :
2269 : 96 : ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2270 : 96 : ASSERT_TRUE (pure_location_p (line_table, loc_a));
2271 : :
2272 : : /* Verify using make_location to build a range, and extracting data
2273 : : back from it. */
2274 : 96 : location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
2275 : 96 : ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2276 : 96 : ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2277 : 96 : source_range src_range = get_range_from_loc (line_table, range_c_b_d);
2278 : 96 : ASSERT_EQ (loc_b, src_range.m_start);
2279 : 96 : ASSERT_EQ (loc_d, src_range.m_finish);
2280 : 96 : }
2281 : :
2282 : : /* Verify various properties of UNKNOWN_LOCATION. */
2283 : :
2284 : : static void
2285 : 4 : test_unknown_location ()
2286 : : {
2287 : 4 : ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2288 : 4 : ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
2289 : 4 : ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
2290 : 4 : }
2291 : :
2292 : : /* Verify various properties of BUILTINS_LOCATION. */
2293 : :
2294 : : static void
2295 : 4 : test_builtins ()
2296 : : {
2297 : 4 : assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
2298 : 4 : ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2299 : 4 : }
2300 : :
2301 : : /* Regression test for make_location.
2302 : : Ensure that we use pure locations for the start/finish of the range,
2303 : : rather than storing a packed or ad-hoc range as the start/finish. */
2304 : :
2305 : : static void
2306 : 96 : test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2307 : : {
2308 : : /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
2309 : : with C++ frontend.
2310 : : ....................0000000001111111111222.
2311 : : ....................1234567890123456789012. */
2312 : 96 : const char *content = " r += !aaa == bbb;\n";
2313 : 96 : temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2314 : 96 : line_table_test ltt (case_);
2315 : 96 : linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
2316 : :
2317 : 96 : const location_t c11 = linemap_position_for_column (line_table, 11);
2318 : 96 : const location_t c12 = linemap_position_for_column (line_table, 12);
2319 : 96 : const location_t c13 = linemap_position_for_column (line_table, 13);
2320 : 96 : const location_t c14 = linemap_position_for_column (line_table, 14);
2321 : 96 : const location_t c21 = linemap_position_for_column (line_table, 21);
2322 : :
2323 : 96 : if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2324 : 32 : return;
2325 : :
2326 : : /* Use column 13 for the caret location, arbitrarily, to verify that we
2327 : : handle start != caret. */
2328 : 64 : const location_t aaa = make_location (c13, c12, c14);
2329 : 64 : ASSERT_EQ (c13, get_pure_location (aaa));
2330 : 64 : ASSERT_EQ (c12, get_start (aaa));
2331 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2332 : 64 : ASSERT_EQ (c14, get_finish (aaa));
2333 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2334 : :
2335 : : /* Make a location using a location with a range as the start-point. */
2336 : 64 : const location_t not_aaa = make_location (c11, aaa, c14);
2337 : 64 : ASSERT_EQ (c11, get_pure_location (not_aaa));
2338 : : /* It should use the start location of the range, not store the range
2339 : : itself. */
2340 : 64 : ASSERT_EQ (c12, get_start (not_aaa));
2341 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2342 : 64 : ASSERT_EQ (c14, get_finish (not_aaa));
2343 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2344 : :
2345 : : /* Similarly, make a location with a range as the end-point. */
2346 : 64 : const location_t aaa_eq_bbb = make_location (c12, c12, c21);
2347 : 64 : ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2348 : 64 : ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2349 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2350 : 64 : ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2351 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2352 : 64 : const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
2353 : : /* It should use the finish location of the range, not store the range
2354 : : itself. */
2355 : 64 : ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2356 : 64 : ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2357 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2358 : 64 : ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2359 : 64 : ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2360 : 96 : }
2361 : :
2362 : : /* Verify reading of a specific line LINENUM in TMP, FC. */
2363 : :
2364 : : static void
2365 : 18600 : check_line (temp_source_file &tmp, file_cache &fc, int linenum)
2366 : : {
2367 : 18600 : char_span line = fc.get_source_line (tmp.get_filename (), linenum);
2368 : 18600 : int n;
2369 : 18600 : const char *b = line.get_buffer ();
2370 : 18600 : size_t l = line.length ();
2371 : 18600 : char buf[5];
2372 : 18600 : ASSERT_LT (l, 5);
2373 : 18600 : memcpy (buf, b, l);
2374 : 18600 : buf[l] = '\0';
2375 : 18600 : ASSERT_TRUE (sscanf (buf, "%d", &n) == 1);
2376 : 18600 : ASSERT_EQ (n, linenum);
2377 : 18600 : }
2378 : :
2379 : : /* Test file cache replacement. */
2380 : :
2381 : : static void
2382 : 4 : test_replacement ()
2383 : : {
2384 : 4 : const int maxline = 1000;
2385 : :
2386 : 4 : char *vec = XNEWVEC (char, maxline * 5);
2387 : 4 : char *p = vec;
2388 : 4 : int i;
2389 : 4008 : for (i = 1; i <= maxline; i++)
2390 : 4000 : p += sprintf (p, "%d\n", i);
2391 : :
2392 : 4 : temp_source_file tmp (SELFTEST_LOCATION, ".txt", vec);
2393 : 4 : free (vec);
2394 : 4 : file_cache fc;
2395 : :
2396 : 4004 : for (i = 2; i <= maxline; i++)
2397 : : {
2398 : 3996 : check_line (tmp, fc, i);
2399 : 3996 : check_line (tmp, fc, i - 1);
2400 : 3996 : if (i >= 10)
2401 : 3964 : check_line (tmp, fc, i - 9);
2402 : 3964 : if (i >= 350) /* Exceed the look behind cache. */
2403 : 2604 : check_line (tmp, fc, i - 300);
2404 : : }
2405 : 44 : for (i = 5; i <= maxline; i += 100)
2406 : 40 : check_line (tmp, fc, i);
2407 : 4004 : for (i = 1; i <= maxline; i++)
2408 : 4000 : check_line (tmp, fc, i);
2409 : 4 : }
2410 : :
2411 : : /* Verify reading of input files (e.g. for caret-based diagnostics). */
2412 : :
2413 : : static void
2414 : 4 : test_reading_source_line ()
2415 : : {
2416 : : /* Create a tempfile and write some text to it. */
2417 : 4 : temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2418 : : "01234567890123456789\n"
2419 : : "This is the test text\n"
2420 : 4 : "This is the 3rd line");
2421 : 4 : file_cache fc;
2422 : :
2423 : : /* Read back a specific line from the tempfile. */
2424 : 4 : char_span source_line = fc.get_source_line (tmp.get_filename (), 3);
2425 : 4 : ASSERT_TRUE (source_line);
2426 : 4 : ASSERT_TRUE (source_line.get_buffer () != NULL);
2427 : 4 : ASSERT_EQ (20, source_line.length ());
2428 : 4 : ASSERT_TRUE (!strncmp ("This is the 3rd line",
2429 : : source_line.get_buffer (), source_line.length ()));
2430 : :
2431 : 4 : source_line = fc.get_source_line (tmp.get_filename (), 2);
2432 : 4 : ASSERT_TRUE (source_line);
2433 : 4 : ASSERT_TRUE (source_line.get_buffer () != NULL);
2434 : 4 : ASSERT_EQ (21, source_line.length ());
2435 : 4 : ASSERT_TRUE (!strncmp ("This is the test text",
2436 : : source_line.get_buffer (), source_line.length ()));
2437 : :
2438 : 4 : source_line = fc.get_source_line (tmp.get_filename (), 4);
2439 : 4 : ASSERT_FALSE (source_line);
2440 : 4 : ASSERT_TRUE (source_line.get_buffer () == NULL);
2441 : 4 : }
2442 : :
2443 : : /* Verify reading from buffers (e.g. for sarif-replay). */
2444 : :
2445 : : static void
2446 : 4 : test_reading_source_buffer ()
2447 : : {
2448 : 4 : const char *text = ("01234567890123456789\n"
2449 : : "This is the test text\n"
2450 : : "This is the 3rd line");
2451 : 4 : const char *filename = "foo.txt";
2452 : 4 : file_cache fc;
2453 : 4 : fc.add_buffered_content (filename, text, strlen (text));
2454 : :
2455 : : /* Read back a specific line from the tempfile. */
2456 : 4 : char_span source_line = fc.get_source_line (filename, 3);
2457 : 4 : ASSERT_TRUE (source_line);
2458 : 4 : ASSERT_TRUE (source_line.get_buffer () != NULL);
2459 : 4 : ASSERT_EQ (20, source_line.length ());
2460 : 4 : ASSERT_TRUE (!strncmp ("This is the 3rd line",
2461 : : source_line.get_buffer (), source_line.length ()));
2462 : :
2463 : 4 : source_line = fc.get_source_line (filename, 2);
2464 : 4 : ASSERT_TRUE (source_line);
2465 : 4 : ASSERT_TRUE (source_line.get_buffer () != NULL);
2466 : 4 : ASSERT_EQ (21, source_line.length ());
2467 : 4 : ASSERT_TRUE (!strncmp ("This is the test text",
2468 : : source_line.get_buffer (), source_line.length ()));
2469 : :
2470 : 4 : source_line = fc.get_source_line (filename, 4);
2471 : 4 : ASSERT_FALSE (source_line);
2472 : 4 : ASSERT_TRUE (source_line.get_buffer () == NULL);
2473 : 4 : }
2474 : :
2475 : : /* Tests of lexing. */
2476 : :
2477 : : /* Verify that token TOK from PARSER has cpp_token_as_text
2478 : : equal to EXPECTED_TEXT. */
2479 : :
2480 : : #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2481 : : SELFTEST_BEGIN_STMT \
2482 : : unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2483 : : ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2484 : : SELFTEST_END_STMT
2485 : :
2486 : : /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
2487 : : and ranges from EXP_START_COL to EXP_FINISH_COL.
2488 : : Use LOC as the effective location of the selftest. */
2489 : :
2490 : : static void
2491 : 576 : assert_token_loc_eq (const location &loc,
2492 : : const cpp_token *tok,
2493 : : const char *exp_filename, int exp_linenum,
2494 : : int exp_start_col, int exp_finish_col)
2495 : : {
2496 : 576 : location_t tok_loc = tok->src_loc;
2497 : 576 : ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2498 : 576 : ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2499 : :
2500 : : /* If location_t values are sufficiently high, then column numbers
2501 : : will be unavailable. */
2502 : 576 : if (!should_have_column_data_p (tok_loc))
2503 : 196 : return;
2504 : :
2505 : 380 : ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2506 : 380 : source_range tok_range = get_range_from_loc (line_table, tok_loc);
2507 : 380 : ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2508 : 380 : ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2509 : : }
2510 : :
2511 : : /* Use assert_token_loc_eq to verify the TOK->src_loc, using
2512 : : SELFTEST_LOCATION as the effective location of the selftest. */
2513 : :
2514 : : #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2515 : : EXP_START_COL, EXP_FINISH_COL) \
2516 : : assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2517 : : (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2518 : :
2519 : : /* Test of lexing a file using libcpp, verifying tokens and their
2520 : : location information. */
2521 : :
2522 : : static void
2523 : 96 : test_lexer (const line_table_case &case_)
2524 : : {
2525 : : /* Create a tempfile and write some text to it. */
2526 : 96 : const char *content =
2527 : : /*00000000011111111112222222222333333.3333444444444.455555555556
2528 : : 12345678901234567890123456789012345.6789012345678.901234567890. */
2529 : : ("test_name /* c-style comment */\n"
2530 : : " \"test literal\"\n"
2531 : : " // test c++-style comment\n"
2532 : : " 42\n");
2533 : 96 : temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2534 : :
2535 : 96 : line_table_test ltt (case_);
2536 : :
2537 : 96 : cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2538 : :
2539 : 96 : const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2540 : 96 : ASSERT_NE (fname, NULL);
2541 : :
2542 : : /* Verify that we get the expected tokens back, with the correct
2543 : : location information. */
2544 : :
2545 : 96 : location_t loc;
2546 : 96 : const cpp_token *tok;
2547 : 96 : tok = cpp_get_token_with_location (parser, &loc);
2548 : 96 : ASSERT_NE (tok, NULL);
2549 : 96 : ASSERT_EQ (tok->type, CPP_NAME);
2550 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2551 : 96 : ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2552 : :
2553 : 96 : tok = cpp_get_token_with_location (parser, &loc);
2554 : 96 : ASSERT_NE (tok, NULL);
2555 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2556 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2557 : 96 : ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2558 : :
2559 : 96 : tok = cpp_get_token_with_location (parser, &loc);
2560 : 96 : ASSERT_NE (tok, NULL);
2561 : 96 : ASSERT_EQ (tok->type, CPP_NUMBER);
2562 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2563 : 96 : ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2564 : :
2565 : 96 : tok = cpp_get_token_with_location (parser, &loc);
2566 : 96 : ASSERT_NE (tok, NULL);
2567 : 96 : ASSERT_EQ (tok->type, CPP_EOF);
2568 : :
2569 : 96 : cpp_finish (parser, NULL);
2570 : 96 : cpp_destroy (parser);
2571 : 96 : }
2572 : :
2573 : : /* Forward decls. */
2574 : :
2575 : : class lexer_test;
2576 : : class lexer_test_options;
2577 : :
2578 : : /* A class for specifying options of a lexer_test.
2579 : : The "apply" vfunc is called during the lexer_test constructor. */
2580 : :
2581 : 192 : class lexer_test_options
2582 : : {
2583 : : public:
2584 : : virtual void apply (lexer_test &) = 0;
2585 : : };
2586 : :
2587 : : /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2588 : : in its dtor.
2589 : :
2590 : : This is needed by struct lexer_test to ensure that the cleanup of the
2591 : : cpp_reader happens *after* the cleanup of the temp_source_file. */
2592 : :
2593 : : class cpp_reader_ptr
2594 : : {
2595 : : public:
2596 : 2304 : cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2597 : :
2598 : 2304 : ~cpp_reader_ptr ()
2599 : : {
2600 : 2304 : cpp_finish (m_ptr, NULL);
2601 : 2304 : cpp_destroy (m_ptr);
2602 : 2304 : }
2603 : :
2604 : 2304 : operator cpp_reader * () const { return m_ptr; }
2605 : :
2606 : : private:
2607 : : cpp_reader *m_ptr;
2608 : : };
2609 : :
2610 : : /* A struct for writing lexer tests. */
2611 : :
2612 : : class lexer_test
2613 : : {
2614 : : public:
2615 : : lexer_test (const line_table_case &case_, const char *content,
2616 : : lexer_test_options *options);
2617 : : ~lexer_test ();
2618 : :
2619 : : const cpp_token *get_token ();
2620 : :
2621 : : /* The ordering of these fields matters.
2622 : : The line_table_test must be first, since the cpp_reader_ptr
2623 : : uses it.
2624 : : The cpp_reader must be cleaned up *after* the temp_source_file
2625 : : since the filenames in input.cc's input cache are owned by the
2626 : : cpp_reader; in particular, when ~temp_source_file evicts the
2627 : : filename the filenames must still be alive. */
2628 : : line_table_test m_ltt;
2629 : : cpp_reader_ptr m_parser;
2630 : : temp_source_file m_tempfile;
2631 : : file_cache m_file_cache;
2632 : : string_concat_db m_concats;
2633 : : bool m_implicitly_expect_EOF;
2634 : : };
2635 : :
2636 : : /* Use an EBCDIC encoding for the execution charset, specifically
2637 : : IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2638 : :
2639 : : This exercises iconv integration within libcpp.
2640 : : Not every build of iconv supports the given charset,
2641 : : so we need to flag this error and handle it gracefully. */
2642 : :
2643 : : class ebcdic_execution_charset : public lexer_test_options
2644 : : {
2645 : : public:
2646 : 96 : ebcdic_execution_charset () : m_num_iconv_errors (0)
2647 : : {
2648 : 96 : gcc_assert (s_singleton == NULL);
2649 : 96 : s_singleton = this;
2650 : 96 : }
2651 : 96 : ~ebcdic_execution_charset ()
2652 : 96 : {
2653 : 96 : gcc_assert (s_singleton == this);
2654 : 96 : s_singleton = NULL;
2655 : 96 : }
2656 : :
2657 : 96 : void apply (lexer_test &test) final override
2658 : : {
2659 : 96 : cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2660 : 96 : cpp_opts->narrow_charset = "IBM1047";
2661 : :
2662 : 96 : cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2663 : 96 : callbacks->diagnostic = on_diagnostic;
2664 : 96 : }
2665 : :
2666 : 0 : static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2667 : : enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2668 : : enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2669 : : rich_location *richloc ATTRIBUTE_UNUSED,
2670 : : const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2671 : : ATTRIBUTE_FPTR_PRINTF(5,0)
2672 : : {
2673 : 0 : gcc_assert (s_singleton);
2674 : : /* Avoid exgettext from picking this up, it is translated in libcpp. */
2675 : 0 : const char *msg = "conversion from %s to %s not supported by iconv";
2676 : : #ifdef ENABLE_NLS
2677 : 0 : msg = dgettext ("cpplib", msg);
2678 : : #endif
2679 : : /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
2680 : : when the local iconv build doesn't support the conversion. */
2681 : 0 : if (strcmp (msgid, msg) == 0)
2682 : : {
2683 : 0 : s_singleton->m_num_iconv_errors++;
2684 : 0 : return true;
2685 : : }
2686 : :
2687 : : /* Otherwise, we have an unexpected error. */
2688 : 0 : abort ();
2689 : : }
2690 : :
2691 : 96 : bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2692 : :
2693 : : private:
2694 : : static ebcdic_execution_charset *s_singleton;
2695 : : int m_num_iconv_errors;
2696 : : };
2697 : :
2698 : : ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2699 : :
2700 : : /* A lexer_test_options subclass that records a list of diagnostic
2701 : : messages emitted by the lexer. */
2702 : :
2703 : : class lexer_diagnostic_sink : public lexer_test_options
2704 : : {
2705 : : public:
2706 : 96 : lexer_diagnostic_sink ()
2707 : 96 : {
2708 : 96 : gcc_assert (s_singleton == NULL);
2709 : 96 : s_singleton = this;
2710 : 96 : }
2711 : 96 : ~lexer_diagnostic_sink ()
2712 : 96 : {
2713 : 96 : gcc_assert (s_singleton == this);
2714 : 96 : s_singleton = NULL;
2715 : :
2716 : 96 : int i;
2717 : 96 : char *str;
2718 : 192 : FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2719 : 96 : free (str);
2720 : 96 : }
2721 : :
2722 : 96 : void apply (lexer_test &test) final override
2723 : : {
2724 : 96 : cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2725 : 96 : callbacks->diagnostic = on_diagnostic;
2726 : 96 : }
2727 : :
2728 : 96 : static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2729 : : enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2730 : : enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2731 : : rich_location *richloc ATTRIBUTE_UNUSED,
2732 : : const char *msgid, va_list *ap)
2733 : : ATTRIBUTE_FPTR_PRINTF(5,0)
2734 : : {
2735 : 96 : char *msg = xvasprintf (msgid, *ap);
2736 : 96 : s_singleton->m_diagnostics.safe_push (msg);
2737 : 96 : return true;
2738 : : }
2739 : :
2740 : : auto_vec<char *> m_diagnostics;
2741 : :
2742 : : private:
2743 : : static lexer_diagnostic_sink *s_singleton;
2744 : : };
2745 : :
2746 : : lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2747 : :
2748 : : /* Constructor. Override line_table with a new instance based on CASE_,
2749 : : and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2750 : : start parsing the tempfile. */
2751 : :
2752 : 2304 : lexer_test::lexer_test (const line_table_case &case_, const char *content,
2753 : 2304 : lexer_test_options *options)
2754 : 2304 : : m_ltt (case_),
2755 : 2304 : m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2756 : : /* Create a tempfile and write the text to it. */
2757 : 2304 : m_tempfile (SELFTEST_LOCATION, ".c", content),
2758 : 2304 : m_concats (),
2759 : 2304 : m_implicitly_expect_EOF (true)
2760 : : {
2761 : 2304 : if (options)
2762 : 192 : options->apply (*this);
2763 : :
2764 : 2304 : cpp_init_iconv (m_parser);
2765 : :
2766 : : /* Parse the file. */
2767 : 2304 : const char *fname = cpp_read_main_file (m_parser,
2768 : : m_tempfile.get_filename ());
2769 : 2304 : ASSERT_NE (fname, NULL);
2770 : 2304 : }
2771 : :
2772 : : /* Destructor. By default, verify that the next token in m_parser is EOF. */
2773 : :
2774 : 2304 : lexer_test::~lexer_test ()
2775 : : {
2776 : 2304 : location_t loc;
2777 : 2304 : const cpp_token *tok;
2778 : :
2779 : 2304 : if (m_implicitly_expect_EOF)
2780 : : {
2781 : 2208 : tok = cpp_get_token_with_location (m_parser, &loc);
2782 : 2208 : ASSERT_NE (tok, NULL);
2783 : 2208 : ASSERT_EQ (tok->type, CPP_EOF);
2784 : : }
2785 : 2304 : }
2786 : :
2787 : : /* Get the next token from m_parser. */
2788 : :
2789 : : const cpp_token *
2790 : 3936 : lexer_test::get_token ()
2791 : : {
2792 : 3936 : location_t loc;
2793 : 3936 : const cpp_token *tok;
2794 : :
2795 : 3936 : tok = cpp_get_token_with_location (m_parser, &loc);
2796 : 3936 : ASSERT_NE (tok, NULL);
2797 : 3936 : return tok;
2798 : : }
2799 : :
2800 : : /* Verify that locations within string literals are correctly handled. */
2801 : :
2802 : : /* Verify get_source_range_for_substring for token(s) at STRLOC,
2803 : : using the string concatenation database for TEST.
2804 : :
2805 : : Assert that the character at index IDX is on EXPECTED_LINE,
2806 : : and that it begins at column EXPECTED_START_COL and ends at
2807 : : EXPECTED_FINISH_COL (unless the locations are beyond
2808 : : LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2809 : : columns). */
2810 : :
2811 : : static void
2812 : 23740 : assert_char_at_range (const location &loc,
2813 : : lexer_test& test,
2814 : : location_t strloc, enum cpp_ttype type, int idx,
2815 : : int expected_line, int expected_start_col,
2816 : : int expected_finish_col)
2817 : : {
2818 : 23740 : cpp_reader *pfile = test.m_parser;
2819 : 23740 : string_concat_db *concats = &test.m_concats;
2820 : :
2821 : 23740 : source_range actual_range = source_range();
2822 : 23740 : const char *err
2823 : 23740 : = get_source_range_for_char (pfile, test.m_file_cache,
2824 : : concats, strloc, type, idx,
2825 : : &actual_range);
2826 : 23740 : if (should_have_column_data_p (strloc))
2827 : 18652 : ASSERT_EQ_AT (loc, NULL, err);
2828 : : else
2829 : : {
2830 : 5088 : ASSERT_STREQ_AT (loc,
2831 : : "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2832 : : err);
2833 : 5088 : return;
2834 : : }
2835 : :
2836 : 18652 : int actual_start_line = LOCATION_LINE (actual_range.m_start);
2837 : 18652 : ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2838 : 18652 : int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2839 : 18652 : ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2840 : :
2841 : 18652 : if (should_have_column_data_p (actual_range.m_start))
2842 : : {
2843 : 18652 : int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2844 : 18652 : ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2845 : : }
2846 : 18652 : if (should_have_column_data_p (actual_range.m_finish))
2847 : : {
2848 : 18652 : int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2849 : 18652 : ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2850 : : }
2851 : : }
2852 : :
2853 : : /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2854 : : the effective location of any errors. */
2855 : :
2856 : : #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2857 : : EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2858 : : assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2859 : : (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2860 : : (EXPECTED_FINISH_COL))
2861 : :
2862 : : /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2863 : : using the string concatenation database for TEST.
2864 : :
2865 : : Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */
2866 : :
2867 : : static void
2868 : 1268 : assert_num_substring_ranges (const location &loc,
2869 : : lexer_test& test,
2870 : : location_t strloc,
2871 : : enum cpp_ttype type,
2872 : : int expected_num_ranges)
2873 : : {
2874 : 1268 : cpp_reader *pfile = test.m_parser;
2875 : 1268 : string_concat_db *concats = &test.m_concats;
2876 : :
2877 : 1268 : int actual_num_ranges = -1;
2878 : 1268 : const char *err
2879 : 1268 : = get_num_source_ranges_for_substring (pfile, test.m_file_cache,
2880 : : concats, strloc, type,
2881 : : &actual_num_ranges);
2882 : 1268 : if (should_have_column_data_p (strloc))
2883 : 884 : ASSERT_EQ_AT (loc, NULL, err);
2884 : : else
2885 : : {
2886 : 384 : ASSERT_STREQ_AT (loc,
2887 : : "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2888 : : err);
2889 : 384 : return;
2890 : : }
2891 : 884 : ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2892 : : }
2893 : :
2894 : : /* Macro for calling assert_num_substring_ranges, supplying
2895 : : SELFTEST_LOCATION for the effective location of any errors. */
2896 : :
2897 : : #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2898 : : EXPECTED_NUM_RANGES) \
2899 : : assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2900 : : (TYPE), (EXPECTED_NUM_RANGES))
2901 : :
2902 : :
2903 : : /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2904 : : returns an error (using the string concatenation database for TEST). */
2905 : :
2906 : : static void
2907 : 636 : assert_has_no_substring_ranges (const location &loc,
2908 : : lexer_test& test,
2909 : : location_t strloc,
2910 : : enum cpp_ttype type,
2911 : : const char *expected_err)
2912 : : {
2913 : 636 : cpp_reader *pfile = test.m_parser;
2914 : 636 : string_concat_db *concats = &test.m_concats;
2915 : 636 : cpp_substring_ranges ranges;
2916 : 636 : const char *actual_err
2917 : 636 : = get_substring_ranges_for_loc (pfile, test.m_file_cache, concats, strloc,
2918 : : type, ranges);
2919 : 636 : if (should_have_column_data_p (strloc))
2920 : 444 : ASSERT_STREQ_AT (loc, expected_err, actual_err);
2921 : : else
2922 : 192 : ASSERT_STREQ_AT (loc,
2923 : : "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2924 : : actual_err);
2925 : 636 : }
2926 : :
2927 : : #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2928 : : assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2929 : : (STRLOC), (TYPE), (ERR))
2930 : :
2931 : : /* Lex a simple string literal. Verify the substring location data, before
2932 : : and after running cpp_interpret_string on it. */
2933 : :
2934 : : static void
2935 : 96 : test_lexer_string_locations_simple (const line_table_case &case_)
2936 : : {
2937 : : /* Digits 0-9 (with 0 at column 10), the simple way.
2938 : : ....................000000000.11111111112.2222222223333333333
2939 : : ....................123456789.01234567890.1234567890123456789
2940 : : We add a trailing comment to ensure that we correctly locate
2941 : : the end of the string literal token. */
2942 : 96 : const char *content = " \"0123456789\" /* not a string */\n";
2943 : 96 : lexer_test test (case_, content, NULL);
2944 : :
2945 : : /* Verify that we get the expected token back, with the correct
2946 : : location information. */
2947 : 96 : const cpp_token *tok = test.get_token ();
2948 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2949 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2950 : 96 : ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2951 : :
2952 : : /* At this point in lexing, the quote characters are treated as part of
2953 : : the string (they are stripped off by cpp_interpret_string). */
2954 : :
2955 : 96 : ASSERT_EQ (tok->val.str.len, 12);
2956 : :
2957 : : /* Verify that cpp_interpret_string works. */
2958 : 96 : cpp_string dst_string;
2959 : 96 : const enum cpp_ttype type = CPP_STRING;
2960 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2961 : : &dst_string, type);
2962 : 96 : ASSERT_TRUE (result);
2963 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2964 : 96 : free (const_cast <unsigned char *> (dst_string.text));
2965 : :
2966 : : /* Verify ranges of individual characters. This no longer includes the
2967 : : opening quote, but does include the closing quote. */
2968 : 1152 : for (int i = 0; i <= 10; i++)
2969 : 1056 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2970 : : 10 + i, 10 + i);
2971 : :
2972 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2973 : 96 : }
2974 : :
2975 : : /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2976 : : encoding. */
2977 : :
2978 : : static void
2979 : 96 : test_lexer_string_locations_ebcdic (const line_table_case &case_)
2980 : : {
2981 : : /* EBCDIC support requires iconv. */
2982 : 96 : if (!HAVE_ICONV)
2983 : 0 : return;
2984 : :
2985 : : /* Digits 0-9 (with 0 at column 10), the simple way.
2986 : : ....................000000000.11111111112.2222222223333333333
2987 : : ....................123456789.01234567890.1234567890123456789
2988 : : We add a trailing comment to ensure that we correctly locate
2989 : : the end of the string literal token. */
2990 : 96 : const char *content = " \"0123456789\" /* not a string */\n";
2991 : 96 : ebcdic_execution_charset use_ebcdic;
2992 : 96 : lexer_test test (case_, content, &use_ebcdic);
2993 : :
2994 : : /* Verify that we get the expected token back, with the correct
2995 : : location information. */
2996 : 96 : const cpp_token *tok = test.get_token ();
2997 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
2998 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2999 : 96 : ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
3000 : :
3001 : : /* At this point in lexing, the quote characters are treated as part of
3002 : : the string (they are stripped off by cpp_interpret_string). */
3003 : :
3004 : 96 : ASSERT_EQ (tok->val.str.len, 12);
3005 : :
3006 : : /* The remainder of the test requires an iconv implementation that
3007 : : can convert from UTF-8 to the EBCDIC encoding requested above. */
3008 : 96 : if (use_ebcdic.iconv_errors_occurred_p ())
3009 : 0 : return;
3010 : :
3011 : : /* Verify that cpp_interpret_string works. */
3012 : 96 : cpp_string dst_string;
3013 : 96 : const enum cpp_ttype type = CPP_STRING;
3014 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3015 : : &dst_string, type);
3016 : 96 : ASSERT_TRUE (result);
3017 : : /* We should now have EBCDIC-encoded text, specifically
3018 : : IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
3019 : : The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */
3020 : 96 : ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
3021 : : (const char *)dst_string.text);
3022 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3023 : :
3024 : : /* Verify that we don't attempt to record substring location information
3025 : : for such cases. */
3026 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
3027 : : (test, tok->src_loc, type,
3028 : : "execution character set != source character set");
3029 : 96 : }
3030 : :
3031 : : /* Lex a string literal containing a hex-escaped character.
3032 : : Verify the substring location data, before and after running
3033 : : cpp_interpret_string on it. */
3034 : :
3035 : : static void
3036 : 96 : test_lexer_string_locations_hex (const line_table_case &case_)
3037 : : {
3038 : : /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
3039 : : and with a space in place of digit 6, to terminate the escaped
3040 : : hex code.
3041 : : ....................000000000.111111.11112222.
3042 : : ....................123456789.012345.67890123. */
3043 : 96 : const char *content = " \"01234\\x35 789\"\n";
3044 : 96 : lexer_test test (case_, content, NULL);
3045 : :
3046 : : /* Verify that we get the expected token back, with the correct
3047 : : location information. */
3048 : 96 : const cpp_token *tok = test.get_token ();
3049 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3050 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
3051 : 96 : ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
3052 : :
3053 : : /* At this point in lexing, the quote characters are treated as part of
3054 : : the string (they are stripped off by cpp_interpret_string). */
3055 : 96 : ASSERT_EQ (tok->val.str.len, 15);
3056 : :
3057 : : /* Verify that cpp_interpret_string works. */
3058 : 96 : cpp_string dst_string;
3059 : 96 : const enum cpp_ttype type = CPP_STRING;
3060 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3061 : : &dst_string, type);
3062 : 96 : ASSERT_TRUE (result);
3063 : 96 : ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
3064 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3065 : :
3066 : : /* Verify ranges of individual characters. This no longer includes the
3067 : : opening quote, but does include the closing quote. */
3068 : 576 : for (int i = 0; i <= 4; i++)
3069 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3070 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
3071 : 576 : for (int i = 6; i <= 10; i++)
3072 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
3073 : :
3074 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
3075 : 96 : }
3076 : :
3077 : : /* Lex a string literal containing an octal-escaped character.
3078 : : Verify the substring location data after running cpp_interpret_string
3079 : : on it. */
3080 : :
3081 : : static void
3082 : 96 : test_lexer_string_locations_oct (const line_table_case &case_)
3083 : : {
3084 : : /* Digits 0-9, expressing digit 5 in ASCII as "\065"
3085 : : and with a space in place of digit 6, to terminate the escaped
3086 : : octal code.
3087 : : ....................000000000.111111.11112222.2222223333333333444
3088 : : ....................123456789.012345.67890123.4567890123456789012 */
3089 : 96 : const char *content = " \"01234\\065 789\" /* not a string */\n";
3090 : 96 : lexer_test test (case_, content, NULL);
3091 : :
3092 : : /* Verify that we get the expected token back, with the correct
3093 : : location information. */
3094 : 96 : const cpp_token *tok = test.get_token ();
3095 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3096 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
3097 : :
3098 : : /* Verify that cpp_interpret_string works. */
3099 : 96 : cpp_string dst_string;
3100 : 96 : const enum cpp_ttype type = CPP_STRING;
3101 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3102 : : &dst_string, type);
3103 : 96 : ASSERT_TRUE (result);
3104 : 96 : ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
3105 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3106 : :
3107 : : /* Verify ranges of individual characters. This no longer includes the
3108 : : opening quote, but does include the closing quote. */
3109 : 576 : for (int i = 0; i < 5; i++)
3110 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3111 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
3112 : 576 : for (int i = 6; i <= 10; i++)
3113 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
3114 : :
3115 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
3116 : 96 : }
3117 : :
3118 : : /* Test of string literal containing letter escapes. */
3119 : :
3120 : : static void
3121 : 96 : test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
3122 : : {
3123 : : /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
3124 : : .....................000000000.1.11111.1.1.11222.22222223333333
3125 : : .....................123456789.0.12345.6.7.89012.34567890123456. */
3126 : 96 : const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n");
3127 : 96 : lexer_test test (case_, content, NULL);
3128 : :
3129 : : /* Verify that we get the expected tokens back. */
3130 : 96 : const cpp_token *tok = test.get_token ();
3131 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3132 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
3133 : :
3134 : : /* Verify ranges of individual characters. */
3135 : : /* "\t". */
3136 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3137 : : 0, 1, 10, 11);
3138 : : /* "foo". */
3139 : 384 : for (int i = 1; i <= 3; i++)
3140 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3141 : : i, 1, 11 + i, 11 + i);
3142 : : /* "\\" and "\n". */
3143 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3144 : : 4, 1, 15, 16);
3145 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3146 : : 5, 1, 17, 18);
3147 : :
3148 : : /* "bar" and closing quote for nul-terminator. */
3149 : 480 : for (int i = 6; i <= 9; i++)
3150 : 384 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3151 : : i, 1, 13 + i, 13 + i);
3152 : :
3153 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
3154 : 96 : }
3155 : :
3156 : : /* Another test of a string literal containing a letter escape.
3157 : : Based on string seen in
3158 : : printf ("%-%\n");
3159 : : in gcc.dg/format/c90-printf-1.c. */
3160 : :
3161 : : static void
3162 : 96 : test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
3163 : : {
3164 : : /* .....................000000000.1111.11.1111.22222222223.
3165 : : .....................123456789.0123.45.6789.01234567890. */
3166 : 96 : const char *content = (" \"%-%\\n\" /* non-str */\n");
3167 : 96 : lexer_test test (case_, content, NULL);
3168 : :
3169 : : /* Verify that we get the expected tokens back. */
3170 : 96 : const cpp_token *tok = test.get_token ();
3171 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3172 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
3173 : :
3174 : : /* Verify ranges of individual characters. */
3175 : : /* "%-%". */
3176 : 384 : for (int i = 0; i < 3; i++)
3177 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3178 : : i, 1, 10 + i, 10 + i);
3179 : : /* "\n". */
3180 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3181 : : 3, 1, 13, 14);
3182 : :
3183 : : /* Closing quote for nul-terminator. */
3184 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3185 : : 4, 1, 15, 15);
3186 : :
3187 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
3188 : 96 : }
3189 : :
3190 : : /* Lex a string literal containing UCN 4 characters.
3191 : : Verify the substring location data after running cpp_interpret_string
3192 : : on it. */
3193 : :
3194 : : static void
3195 : 96 : test_lexer_string_locations_ucn4 (const line_table_case &case_)
3196 : : {
3197 : : /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
3198 : : as UCN 4.
3199 : : ....................000000000.111111.111122.222222223.33333333344444
3200 : : ....................123456789.012345.678901.234567890.12345678901234 */
3201 : 96 : const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n";
3202 : 96 : lexer_test test (case_, content, NULL);
3203 : :
3204 : : /* Verify that we get the expected token back, with the correct
3205 : : location information. */
3206 : 96 : const cpp_token *tok = test.get_token ();
3207 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3208 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
3209 : :
3210 : : /* Verify that cpp_interpret_string works.
3211 : : The string should be encoded in the execution character
3212 : : set. Assuming that is UTF-8, we should have the following:
3213 : : ----------- ---- ----- ------- ----------------
3214 : : Byte offset Byte Octal Unicode Source Column(s)
3215 : : ----------- ---- ----- ------- ----------------
3216 : : 0 0x30 '0' 10
3217 : : 1 0x31 '1' 11
3218 : : 2 0x32 '2' 12
3219 : : 3 0x33 '3' 13
3220 : : 4 0x34 '4' 14
3221 : : 5 0xE2 \342 U+2174 15-20
3222 : : 6 0x85 \205 (cont) 15-20
3223 : : 7 0xB4 \264 (cont) 15-20
3224 : : 8 0xE2 \342 U+2175 21-26
3225 : : 9 0x85 \205 (cont) 21-26
3226 : : 10 0xB5 \265 (cont) 21-26
3227 : : 11 0x37 '7' 27
3228 : : 12 0x38 '8' 28
3229 : : 13 0x39 '9' 29
3230 : : 14 0x00 30 (closing quote)
3231 : : ----------- ---- ----- ------- ---------------. */
3232 : :
3233 : 96 : cpp_string dst_string;
3234 : 96 : const enum cpp_ttype type = CPP_STRING;
3235 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3236 : : &dst_string, type);
3237 : 96 : ASSERT_TRUE (result);
3238 : 96 : ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3239 : : (const char *)dst_string.text);
3240 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3241 : :
3242 : : /* Verify ranges of individual characters. This no longer includes the
3243 : : opening quote, but does include the closing quote.
3244 : : '01234'. */
3245 : 576 : for (int i = 0; i <= 4; i++)
3246 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3247 : : /* U+2174. */
3248 : 384 : for (int i = 5; i <= 7; i++)
3249 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
3250 : : /* U+2175. */
3251 : 384 : for (int i = 8; i <= 10; i++)
3252 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
3253 : : /* '789' and nul terminator */
3254 : 480 : for (int i = 11; i <= 14; i++)
3255 : 384 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
3256 : :
3257 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3258 : 96 : }
3259 : :
3260 : : /* Lex a string literal containing UCN 8 characters.
3261 : : Verify the substring location data after running cpp_interpret_string
3262 : : on it. */
3263 : :
3264 : : static void
3265 : 96 : test_lexer_string_locations_ucn8 (const line_table_case &case_)
3266 : : {
3267 : : /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
3268 : : ....................000000000.111111.1111222222.2222333333333.344444
3269 : : ....................123456789.012345.6789012345.6789012345678.901234 */
3270 : 96 : const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n";
3271 : 96 : lexer_test test (case_, content, NULL);
3272 : :
3273 : : /* Verify that we get the expected token back, with the correct
3274 : : location information. */
3275 : 96 : const cpp_token *tok = test.get_token ();
3276 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3277 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
3278 : : "\"01234\\U00002174\\U00002175789\"");
3279 : :
3280 : : /* Verify that cpp_interpret_string works.
3281 : : The UTF-8 encoding of the string is identical to that from
3282 : : the ucn4 testcase above; the only difference is the column
3283 : : locations. */
3284 : 96 : cpp_string dst_string;
3285 : 96 : const enum cpp_ttype type = CPP_STRING;
3286 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3287 : : &dst_string, type);
3288 : 96 : ASSERT_TRUE (result);
3289 : 96 : ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3290 : : (const char *)dst_string.text);
3291 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3292 : :
3293 : : /* Verify ranges of individual characters. This no longer includes the
3294 : : opening quote, but does include the closing quote.
3295 : : '01234'. */
3296 : 576 : for (int i = 0; i <= 4; i++)
3297 : 480 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3298 : : /* U+2174. */
3299 : 384 : for (int i = 5; i <= 7; i++)
3300 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
3301 : : /* U+2175. */
3302 : 384 : for (int i = 8; i <= 10; i++)
3303 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
3304 : : /* '789' at columns 35-37 */
3305 : 384 : for (int i = 11; i <= 13; i++)
3306 : 288 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
3307 : : /* Closing quote/nul-terminator at column 38. */
3308 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
3309 : :
3310 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
3311 : 96 : }
3312 : :
3313 : : /* Fetch a big-endian 32-bit value and convert to host endianness. */
3314 : :
3315 : : static uint32_t
3316 : 768 : uint32_from_big_endian (const uint32_t *ptr_be_value)
3317 : : {
3318 : 768 : const unsigned char *buf = (const unsigned char *)ptr_be_value;
3319 : 768 : return (((uint32_t) buf[0] << 24)
3320 : 768 : | ((uint32_t) buf[1] << 16)
3321 : 768 : | ((uint32_t) buf[2] << 8)
3322 : 768 : | (uint32_t) buf[3]);
3323 : : }
3324 : :
3325 : : /* Lex a wide string literal and verify that attempts to read substring
3326 : : location data from it fail gracefully. */
3327 : :
3328 : : static void
3329 : 96 : test_lexer_string_locations_wide_string (const line_table_case &case_)
3330 : : {
3331 : : /* Digits 0-9.
3332 : : ....................000000000.11111111112.22222222233333
3333 : : ....................123456789.01234567890.12345678901234 */
3334 : 96 : const char *content = " L\"0123456789\" /* non-str */\n";
3335 : 96 : lexer_test test (case_, content, NULL);
3336 : :
3337 : : /* Verify that we get the expected token back, with the correct
3338 : : location information. */
3339 : 96 : const cpp_token *tok = test.get_token ();
3340 : 96 : ASSERT_EQ (tok->type, CPP_WSTRING);
3341 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3342 : :
3343 : : /* Verify that cpp_interpret_string works, using CPP_WSTRING. */
3344 : 96 : cpp_string dst_string;
3345 : 96 : const enum cpp_ttype type = CPP_WSTRING;
3346 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3347 : : &dst_string, type);
3348 : 96 : ASSERT_TRUE (result);
3349 : : /* The cpp_reader defaults to big-endian with
3350 : : CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
3351 : : now be encoded as UTF-32BE. */
3352 : 96 : const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3353 : 96 : ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3354 : 96 : ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3355 : 96 : ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3356 : 96 : ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3357 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3358 : :
3359 : : /* We don't yet support generating substring location information
3360 : : for L"" strings. */
3361 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
3362 : : (test, tok->src_loc, type,
3363 : : "execution character set != source character set");
3364 : 96 : }
3365 : :
3366 : : /* Fetch a big-endian 16-bit value and convert to host endianness. */
3367 : :
3368 : : static uint16_t
3369 : 384 : uint16_from_big_endian (const uint16_t *ptr_be_value)
3370 : : {
3371 : 384 : const unsigned char *buf = (const unsigned char *)ptr_be_value;
3372 : 384 : return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
3373 : : }
3374 : :
3375 : : /* Lex a u"" string literal and verify that attempts to read substring
3376 : : location data from it fail gracefully. */
3377 : :
3378 : : static void
3379 : 96 : test_lexer_string_locations_string16 (const line_table_case &case_)
3380 : : {
3381 : : /* Digits 0-9.
3382 : : ....................000000000.11111111112.22222222233333
3383 : : ....................123456789.01234567890.12345678901234 */
3384 : 96 : const char *content = " u\"0123456789\" /* non-str */\n";
3385 : 96 : lexer_test test (case_, content, NULL);
3386 : :
3387 : : /* Verify that we get the expected token back, with the correct
3388 : : location information. */
3389 : 96 : const cpp_token *tok = test.get_token ();
3390 : 96 : ASSERT_EQ (tok->type, CPP_STRING16);
3391 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3392 : :
3393 : : /* Verify that cpp_interpret_string works, using CPP_STRING16. */
3394 : 96 : cpp_string dst_string;
3395 : 96 : const enum cpp_ttype type = CPP_STRING16;
3396 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3397 : : &dst_string, type);
3398 : 96 : ASSERT_TRUE (result);
3399 : :
3400 : : /* The cpp_reader defaults to big-endian, so dst_string should
3401 : : now be encoded as UTF-16BE. */
3402 : 96 : const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
3403 : 96 : ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
3404 : 96 : ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
3405 : 96 : ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
3406 : 96 : ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
3407 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3408 : :
3409 : : /* We don't yet support generating substring location information
3410 : : for L"" strings. */
3411 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
3412 : : (test, tok->src_loc, type,
3413 : : "execution character set != source character set");
3414 : 96 : }
3415 : :
3416 : : /* Lex a U"" string literal and verify that attempts to read substring
3417 : : location data from it fail gracefully. */
3418 : :
3419 : : static void
3420 : 96 : test_lexer_string_locations_string32 (const line_table_case &case_)
3421 : : {
3422 : : /* Digits 0-9.
3423 : : ....................000000000.11111111112.22222222233333
3424 : : ....................123456789.01234567890.12345678901234 */
3425 : 96 : const char *content = " U\"0123456789\" /* non-str */\n";
3426 : 96 : lexer_test test (case_, content, NULL);
3427 : :
3428 : : /* Verify that we get the expected token back, with the correct
3429 : : location information. */
3430 : 96 : const cpp_token *tok = test.get_token ();
3431 : 96 : ASSERT_EQ (tok->type, CPP_STRING32);
3432 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3433 : :
3434 : : /* Verify that cpp_interpret_string works, using CPP_STRING32. */
3435 : 96 : cpp_string dst_string;
3436 : 96 : const enum cpp_ttype type = CPP_STRING32;
3437 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3438 : : &dst_string, type);
3439 : 96 : ASSERT_TRUE (result);
3440 : :
3441 : : /* The cpp_reader defaults to big-endian, so dst_string should
3442 : : now be encoded as UTF-32BE. */
3443 : 96 : const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
3444 : 96 : ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
3445 : 96 : ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
3446 : 96 : ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
3447 : 96 : ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
3448 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3449 : :
3450 : : /* We don't yet support generating substring location information
3451 : : for L"" strings. */
3452 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES
3453 : : (test, tok->src_loc, type,
3454 : : "execution character set != source character set");
3455 : 96 : }
3456 : :
3457 : : /* Lex a u8-string literal.
3458 : : Verify the substring location data after running cpp_interpret_string
3459 : : on it. */
3460 : :
3461 : : static void
3462 : 96 : test_lexer_string_locations_u8 (const line_table_case &case_)
3463 : : {
3464 : : /* Digits 0-9.
3465 : : ....................000000000.11111111112.22222222233333
3466 : : ....................123456789.01234567890.12345678901234 */
3467 : 96 : const char *content = " u8\"0123456789\" /* non-str */\n";
3468 : 96 : lexer_test test (case_, content, NULL);
3469 : :
3470 : : /* Verify that we get the expected token back, with the correct
3471 : : location information. */
3472 : 96 : const cpp_token *tok = test.get_token ();
3473 : 96 : ASSERT_EQ (tok->type, CPP_UTF8STRING);
3474 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3475 : :
3476 : : /* Verify that cpp_interpret_string works. */
3477 : 96 : cpp_string dst_string;
3478 : 96 : const enum cpp_ttype type = CPP_STRING;
3479 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3480 : : &dst_string, type);
3481 : 96 : ASSERT_TRUE (result);
3482 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3483 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3484 : :
3485 : : /* Verify ranges of individual characters. This no longer includes the
3486 : : opening quote, but does include the closing quote. */
3487 : 1152 : for (int i = 0; i <= 10; i++)
3488 : 1056 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3489 : 96 : }
3490 : :
3491 : : /* Lex a string literal containing UTF-8 source characters.
3492 : : Verify the substring location data after running cpp_interpret_string
3493 : : on it. */
3494 : :
3495 : : static void
3496 : 96 : test_lexer_string_locations_utf8_source (const line_table_case &case_)
3497 : : {
3498 : : /* This string literal is written out to the source file as UTF-8,
3499 : : and is of the form "before mojibake after", where "mojibake"
3500 : : is written as the following four unicode code points:
3501 : : U+6587 CJK UNIFIED IDEOGRAPH-6587
3502 : : U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3503 : : U+5316 CJK UNIFIED IDEOGRAPH-5316
3504 : : U+3051 HIRAGANA LETTER KE.
3505 : : Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3506 : : "before" and "after" are 1 byte per unicode character.
3507 : :
3508 : : The numbering shown are "columns", which are *byte* numbers within
3509 : : the line, rather than unicode character numbers.
3510 : :
3511 : : .................... 000000000.1111111.
3512 : : .................... 123456789.0123456. */
3513 : 96 : const char *content = (" \"before "
3514 : : /* U+6587 CJK UNIFIED IDEOGRAPH-6587
3515 : : UTF-8: 0xE6 0x96 0x87
3516 : : C octal escaped UTF-8: \346\226\207
3517 : : "column" numbers: 17-19. */
3518 : : "\346\226\207"
3519 : :
3520 : : /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3521 : : UTF-8: 0xE5 0xAD 0x97
3522 : : C octal escaped UTF-8: \345\255\227
3523 : : "column" numbers: 20-22. */
3524 : : "\345\255\227"
3525 : :
3526 : : /* U+5316 CJK UNIFIED IDEOGRAPH-5316
3527 : : UTF-8: 0xE5 0x8C 0x96
3528 : : C octal escaped UTF-8: \345\214\226
3529 : : "column" numbers: 23-25. */
3530 : : "\345\214\226"
3531 : :
3532 : : /* U+3051 HIRAGANA LETTER KE
3533 : : UTF-8: 0xE3 0x81 0x91
3534 : : C octal escaped UTF-8: \343\201\221
3535 : : "column" numbers: 26-28. */
3536 : : "\343\201\221"
3537 : :
3538 : : /* column numbers 29 onwards
3539 : : 2333333.33334444444444
3540 : : 9012345.67890123456789. */
3541 : : " after\" /* non-str */\n");
3542 : 96 : lexer_test test (case_, content, NULL);
3543 : :
3544 : : /* Verify that we get the expected token back, with the correct
3545 : : location information. */
3546 : 96 : const cpp_token *tok = test.get_token ();
3547 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3548 : 96 : ASSERT_TOKEN_AS_TEXT_EQ
3549 : : (test.m_parser, tok,
3550 : : "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3551 : :
3552 : : /* Verify that cpp_interpret_string works. */
3553 : 96 : cpp_string dst_string;
3554 : 96 : const enum cpp_ttype type = CPP_STRING;
3555 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3556 : : &dst_string, type);
3557 : 96 : ASSERT_TRUE (result);
3558 : 96 : ASSERT_STREQ
3559 : : ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3560 : : (const char *)dst_string.text);
3561 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3562 : :
3563 : : /* Verify ranges of individual characters. This no longer includes the
3564 : : opening quote, but does include the closing quote.
3565 : : Assuming that both source and execution encodings are UTF-8, we have
3566 : : a run of 25 octets in each, plus the NUL terminator. */
3567 : 2496 : for (int i = 0; i < 25; i++)
3568 : 2400 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3569 : : /* NUL-terminator should use the closing quote at column 35. */
3570 : 96 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3571 : :
3572 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3573 : 96 : }
3574 : :
3575 : : /* Test of string literal concatenation. */
3576 : :
3577 : : static void
3578 : 96 : test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3579 : : {
3580 : : /* Digits 0-9.
3581 : : .....................000000000.111111.11112222222222
3582 : : .....................123456789.012345.67890123456789. */
3583 : 96 : const char *content = (" \"01234\" /* non-str */\n"
3584 : : " \"56789\" /* non-str */\n");
3585 : 96 : lexer_test test (case_, content, NULL);
3586 : :
3587 : 96 : location_t input_locs[2];
3588 : :
3589 : : /* Verify that we get the expected tokens back. */
3590 : 96 : auto_vec <cpp_string> input_strings;
3591 : 96 : const cpp_token *tok_a = test.get_token ();
3592 : 96 : ASSERT_EQ (tok_a->type, CPP_STRING);
3593 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3594 : 96 : input_strings.safe_push (tok_a->val.str);
3595 : 96 : input_locs[0] = tok_a->src_loc;
3596 : :
3597 : 96 : const cpp_token *tok_b = test.get_token ();
3598 : 96 : ASSERT_EQ (tok_b->type, CPP_STRING);
3599 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3600 : 96 : input_strings.safe_push (tok_b->val.str);
3601 : 96 : input_locs[1] = tok_b->src_loc;
3602 : :
3603 : : /* Verify that cpp_interpret_string works. */
3604 : 96 : cpp_string dst_string;
3605 : 96 : const enum cpp_ttype type = CPP_STRING;
3606 : 96 : bool result = cpp_interpret_string (test.m_parser,
3607 : 96 : input_strings.address (), 2,
3608 : : &dst_string, type);
3609 : 96 : ASSERT_TRUE (result);
3610 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3611 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3612 : :
3613 : : /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3614 : 96 : test.m_concats.record_string_concatenation (2, input_locs);
3615 : :
3616 : 96 : location_t initial_loc = input_locs[0];
3617 : :
3618 : : /* "01234" on line 1. */
3619 : 576 : for (int i = 0; i <= 4; i++)
3620 : 480 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3621 : : /* "56789" in line 2, plus its closing quote for the nul terminator. */
3622 : 672 : for (int i = 5; i <= 10; i++)
3623 : 576 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3624 : :
3625 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3626 : 96 : }
3627 : :
3628 : : /* Another test of string literal concatenation. */
3629 : :
3630 : : static void
3631 : 96 : test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3632 : : {
3633 : : /* Digits 0-9.
3634 : : .....................000000000.111.11111112222222
3635 : : .....................123456789.012.34567890123456. */
3636 : 96 : const char *content = (" \"01\" /* non-str */\n"
3637 : : " \"23\" /* non-str */\n"
3638 : : " \"45\" /* non-str */\n"
3639 : : " \"67\" /* non-str */\n"
3640 : : " \"89\" /* non-str */\n");
3641 : 96 : lexer_test test (case_, content, NULL);
3642 : :
3643 : 96 : auto_vec <cpp_string> input_strings;
3644 : 96 : location_t input_locs[5];
3645 : :
3646 : : /* Verify that we get the expected tokens back. */
3647 : 576 : for (int i = 0; i < 5; i++)
3648 : : {
3649 : 480 : const cpp_token *tok = test.get_token ();
3650 : 480 : ASSERT_EQ (tok->type, CPP_STRING);
3651 : 480 : input_strings.safe_push (tok->val.str);
3652 : 480 : input_locs[i] = tok->src_loc;
3653 : : }
3654 : :
3655 : : /* Verify that cpp_interpret_string works. */
3656 : 96 : cpp_string dst_string;
3657 : 96 : const enum cpp_ttype type = CPP_STRING;
3658 : 96 : bool result = cpp_interpret_string (test.m_parser,
3659 : 96 : input_strings.address (), 5,
3660 : : &dst_string, type);
3661 : 96 : ASSERT_TRUE (result);
3662 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3663 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3664 : :
3665 : : /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3666 : 96 : test.m_concats.record_string_concatenation (5, input_locs);
3667 : :
3668 : 96 : location_t initial_loc = input_locs[0];
3669 : :
3670 : : /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3671 : : detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3672 : : and expect get_source_range_for_substring to fail.
3673 : : However, for a string concatenation test, we can have a case
3674 : : where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3675 : : but subsequent strings can be after it.
3676 : : Attempting to detect this within assert_char_at_range
3677 : : would overcomplicate the logic for the common test cases, so
3678 : : we detect it here. */
3679 : 96 : if (should_have_column_data_p (input_locs[0])
3680 : 96 : && !should_have_column_data_p (input_locs[4]))
3681 : : {
3682 : : /* Verify that get_source_range_for_substring gracefully rejects
3683 : : this case. */
3684 : 8 : source_range actual_range;
3685 : 8 : const char *err
3686 : 8 : = get_source_range_for_char (test.m_parser, test.m_file_cache,
3687 : : &test.m_concats,
3688 : : initial_loc, type, 0, &actual_range);
3689 : 8 : ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3690 : 8 : return;
3691 : : }
3692 : :
3693 : 528 : for (int i = 0; i < 5; i++)
3694 : 1320 : for (int j = 0; j < 2; j++)
3695 : 880 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3696 : : i + 1, 10 + j, 10 + j);
3697 : :
3698 : : /* NUL-terminator should use the final closing quote at line 5 column 12. */
3699 : 88 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3700 : :
3701 : 88 : ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3702 : 96 : }
3703 : :
3704 : : /* Another test of string literal concatenation, this time combined with
3705 : : various kinds of escaped characters. */
3706 : :
3707 : : static void
3708 : 96 : test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3709 : : {
3710 : : /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3711 : : digit 6 in ASCII as octal "\066", concatenating multiple strings. */
3712 : 96 : const char *content
3713 : : /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3714 : : .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3715 : : = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3716 : 96 : lexer_test test (case_, content, NULL);
3717 : :
3718 : 96 : auto_vec <cpp_string> input_strings;
3719 : 96 : location_t input_locs[4];
3720 : :
3721 : : /* Verify that we get the expected tokens back. */
3722 : 480 : for (int i = 0; i < 4; i++)
3723 : : {
3724 : 384 : const cpp_token *tok = test.get_token ();
3725 : 384 : ASSERT_EQ (tok->type, CPP_STRING);
3726 : 384 : input_strings.safe_push (tok->val.str);
3727 : 384 : input_locs[i] = tok->src_loc;
3728 : : }
3729 : :
3730 : : /* Verify that cpp_interpret_string works. */
3731 : 96 : cpp_string dst_string;
3732 : 96 : const enum cpp_ttype type = CPP_STRING;
3733 : 96 : bool result = cpp_interpret_string (test.m_parser,
3734 : 96 : input_strings.address (), 4,
3735 : : &dst_string, type);
3736 : 96 : ASSERT_TRUE (result);
3737 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3738 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3739 : :
3740 : : /* Simulate c-lex.cc's lex_string in order to record concatenation. */
3741 : 96 : test.m_concats.record_string_concatenation (4, input_locs);
3742 : :
3743 : 96 : location_t initial_loc = input_locs[0];
3744 : :
3745 : 576 : for (int i = 0; i <= 4; i++)
3746 : 480 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3747 : 96 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3748 : 96 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3749 : 384 : for (int i = 7; i <= 9; i++)
3750 : 288 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3751 : :
3752 : : /* NUL-terminator should use the location of the final closing quote. */
3753 : 96 : ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3754 : :
3755 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3756 : 96 : }
3757 : :
3758 : : /* Test of string literal in a macro. */
3759 : :
3760 : : static void
3761 : 96 : test_lexer_string_locations_macro (const line_table_case &case_)
3762 : : {
3763 : : /* Digits 0-9.
3764 : : .....................0000000001111111111.22222222223.
3765 : : .....................1234567890123456789.01234567890. */
3766 : 96 : const char *content = ("#define MACRO \"0123456789\" /* non-str */\n"
3767 : : " MACRO");
3768 : 96 : lexer_test test (case_, content, NULL);
3769 : :
3770 : : /* Verify that we get the expected tokens back. */
3771 : 96 : const cpp_token *tok = test.get_token ();
3772 : 96 : ASSERT_EQ (tok->type, CPP_PADDING);
3773 : :
3774 : 96 : tok = test.get_token ();
3775 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3776 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3777 : :
3778 : : /* Verify ranges of individual characters. We ought to
3779 : : see columns within the macro definition. */
3780 : 1152 : for (int i = 0; i <= 10; i++)
3781 : 1056 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3782 : : i, 1, 20 + i, 20 + i);
3783 : :
3784 : 96 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3785 : :
3786 : 96 : tok = test.get_token ();
3787 : 96 : ASSERT_EQ (tok->type, CPP_PADDING);
3788 : 96 : }
3789 : :
3790 : : /* Test of stringification of a macro argument. */
3791 : :
3792 : : static void
3793 : 96 : test_lexer_string_locations_stringified_macro_argument
3794 : : (const line_table_case &case_)
3795 : : {
3796 : : /* .....................000000000111111111122222222223.
3797 : : .....................123456789012345678901234567890. */
3798 : 96 : const char *content = ("#define MACRO(X) #X /* non-str */\n"
3799 : : "MACRO(foo)\n");
3800 : 96 : lexer_test test (case_, content, NULL);
3801 : :
3802 : : /* Verify that we get the expected token back. */
3803 : 96 : const cpp_token *tok = test.get_token ();
3804 : 96 : ASSERT_EQ (tok->type, CPP_PADDING);
3805 : :
3806 : 96 : tok = test.get_token ();
3807 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3808 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3809 : :
3810 : : /* We don't support getting the location of a stringified macro
3811 : : argument. Verify that it fails gracefully. */
3812 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3813 : : "cpp_interpret_string_1 failed");
3814 : :
3815 : 96 : tok = test.get_token ();
3816 : 96 : ASSERT_EQ (tok->type, CPP_PADDING);
3817 : :
3818 : 96 : tok = test.get_token ();
3819 : 96 : ASSERT_EQ (tok->type, CPP_PADDING);
3820 : 96 : }
3821 : :
3822 : : /* Ensure that we are fail gracefully if something attempts to pass
3823 : : in a location that isn't a string literal token. Seen on this code:
3824 : :
3825 : : const char a[] = " %d ";
3826 : : __builtin_printf (a, 0.5);
3827 : : ^
3828 : :
3829 : : when c-format.cc erroneously used the indicated one-character
3830 : : location as the format string location, leading to a read past the
3831 : : end of a string buffer in cpp_interpret_string_1. */
3832 : :
3833 : : static void
3834 : 96 : test_lexer_string_locations_non_string (const line_table_case &case_)
3835 : : {
3836 : : /* .....................000000000111111111122222222223.
3837 : : .....................123456789012345678901234567890. */
3838 : 96 : const char *content = (" a\n");
3839 : 96 : lexer_test test (case_, content, NULL);
3840 : :
3841 : : /* Verify that we get the expected token back. */
3842 : 96 : const cpp_token *tok = test.get_token ();
3843 : 96 : ASSERT_EQ (tok->type, CPP_NAME);
3844 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3845 : :
3846 : : /* At this point, libcpp is attempting to interpret the name as a
3847 : : string literal, despite it not starting with a quote. We don't detect
3848 : : that, but we should at least fail gracefully. */
3849 : 96 : ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3850 : : "cpp_interpret_string_1 failed");
3851 : 96 : }
3852 : :
3853 : : /* Ensure that we can read substring information for a token which
3854 : : starts in one linemap and ends in another . Adapted from
3855 : : gcc.dg/cpp/pr69985.c. */
3856 : :
3857 : : static void
3858 : 96 : test_lexer_string_locations_long_line (const line_table_case &case_)
3859 : : {
3860 : : /* .....................000000.000111111111
3861 : : .....................123456.789012346789. */
3862 : 96 : const char *content = ("/* A very long line, so that we start a new line map. */\n"
3863 : : " \"0123456789012345678901234567890123456789"
3864 : : "0123456789012345678901234567890123456789"
3865 : : "0123456789012345678901234567890123456789"
3866 : : "0123456789\"\n");
3867 : :
3868 : 96 : lexer_test test (case_, content, NULL);
3869 : :
3870 : : /* Verify that we get the expected token back. */
3871 : 96 : const cpp_token *tok = test.get_token ();
3872 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3873 : :
3874 : 96 : if (!should_have_column_data_p (line_table->highest_location))
3875 : 36 : return;
3876 : :
3877 : : /* Verify ranges of individual characters. */
3878 : 60 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3879 : 7920 : for (int i = 0; i < 131; i++)
3880 : 7860 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3881 : : i, 2, 7 + i, 7 + i);
3882 : 96 : }
3883 : :
3884 : : /* Test of locations within a raw string that doesn't contain a newline. */
3885 : :
3886 : : static void
3887 : 96 : test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3888 : : {
3889 : : /* .....................00.0000000111111111122.
3890 : : .....................12.3456789012345678901. */
3891 : 96 : const char *content = ("R\"foo(0123456789)foo\"\n");
3892 : 96 : lexer_test test (case_, content, NULL);
3893 : :
3894 : : /* Verify that we get the expected token back. */
3895 : 96 : const cpp_token *tok = test.get_token ();
3896 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3897 : :
3898 : : /* Verify that cpp_interpret_string works. */
3899 : 96 : cpp_string dst_string;
3900 : 96 : const enum cpp_ttype type = CPP_STRING;
3901 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3902 : : &dst_string, type);
3903 : 96 : ASSERT_TRUE (result);
3904 : 96 : ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3905 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3906 : :
3907 : 96 : if (!should_have_column_data_p (line_table->highest_location))
3908 : 32 : return;
3909 : :
3910 : : /* 0-9, plus the nil terminator. */
3911 : 64 : ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3912 : 768 : for (int i = 0; i < 11; i++)
3913 : 704 : ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3914 : : i, 1, 7 + i, 7 + i);
3915 : 96 : }
3916 : :
3917 : : /* Test of locations within a raw string that contains a newline. */
3918 : :
3919 : : static void
3920 : 96 : test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3921 : : {
3922 : : /* .....................00.0000.
3923 : : .....................12.3456. */
3924 : 96 : const char *content = ("R\"foo(\n"
3925 : : /* .....................00000.
3926 : : .....................12345. */
3927 : : "hello\n"
3928 : : "world\n"
3929 : : /* .....................00000.
3930 : : .....................12345. */
3931 : : ")foo\"\n");
3932 : 96 : lexer_test test (case_, content, NULL);
3933 : :
3934 : : /* Verify that we get the expected token back. */
3935 : 96 : const cpp_token *tok = test.get_token ();
3936 : 96 : ASSERT_EQ (tok->type, CPP_STRING);
3937 : :
3938 : : /* Verify that cpp_interpret_string works. */
3939 : 96 : cpp_string dst_string;
3940 : 96 : const enum cpp_ttype type = CPP_STRING;
3941 : 96 : bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3942 : : &dst_string, type);
3943 : 96 : ASSERT_TRUE (result);
3944 : 96 : ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3945 : 96 : free (const_cast <unsigned char *> (dst_string.text));
3946 : :
3947 : 96 : if (!should_have_column_data_p (line_table->highest_location))
3948 : 36 : return;
3949 : :
3950 : : /* Currently we don't support locations within raw strings that
3951 : : contain newlines. */
3952 : 60 : ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3953 : : "range endpoints are on different lines");
3954 : 96 : }
3955 : :
3956 : : /* Test of parsing an unterminated raw string. */
3957 : :
3958 : : static void
3959 : 96 : test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3960 : : {
3961 : 96 : const char *content = "R\"ouch()ouCh\" /* etc */";
3962 : :
3963 : 96 : lexer_diagnostic_sink diagnostics;
3964 : 96 : lexer_test test (case_, content, &diagnostics);
3965 : 96 : test.m_implicitly_expect_EOF = false;
3966 : :
3967 : : /* Attempt to parse the raw string. */
3968 : 96 : const cpp_token *tok = test.get_token ();
3969 : 96 : ASSERT_EQ (tok->type, CPP_EOF);
3970 : :
3971 : 96 : ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3972 : : /* We expect the message "unterminated raw string"
3973 : : in the "cpplib" translation domain.
3974 : : It's not clear that dgettext is available on all supported hosts,
3975 : : so this assertion is commented-out for now.
3976 : : ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3977 : : diagnostics.m_diagnostics[0]);
3978 : : */
3979 : 96 : }
3980 : :
3981 : : /* Test of lexing char constants. */
3982 : :
3983 : : static void
3984 : 96 : test_lexer_char_constants (const line_table_case &case_)
3985 : : {
3986 : : /* Various char constants.
3987 : : .....................0000000001111111111.22222222223.
3988 : : .....................1234567890123456789.01234567890. */
3989 : 96 : const char *content = (" 'a'\n"
3990 : : " u'a'\n"
3991 : : " U'a'\n"
3992 : : " L'a'\n"
3993 : : " 'abc'\n");
3994 : 96 : lexer_test test (case_, content, NULL);
3995 : :
3996 : : /* Verify that we get the expected tokens back. */
3997 : : /* 'a'. */
3998 : 96 : const cpp_token *tok = test.get_token ();
3999 : 96 : ASSERT_EQ (tok->type, CPP_CHAR);
4000 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
4001 : :
4002 : 96 : unsigned int chars_seen;
4003 : 96 : int unsignedp;
4004 : 96 : cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
4005 : : &chars_seen, &unsignedp);
4006 : 96 : ASSERT_EQ (cc, 'a');
4007 : 96 : ASSERT_EQ (chars_seen, 1);
4008 : :
4009 : : /* u'a'. */
4010 : 96 : tok = test.get_token ();
4011 : 96 : ASSERT_EQ (tok->type, CPP_CHAR16);
4012 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
4013 : :
4014 : : /* U'a'. */
4015 : 96 : tok = test.get_token ();
4016 : 96 : ASSERT_EQ (tok->type, CPP_CHAR32);
4017 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
4018 : :
4019 : : /* L'a'. */
4020 : 96 : tok = test.get_token ();
4021 : 96 : ASSERT_EQ (tok->type, CPP_WCHAR);
4022 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
4023 : :
4024 : : /* 'abc' (c-char-sequence). */
4025 : 96 : tok = test.get_token ();
4026 : 96 : ASSERT_EQ (tok->type, CPP_CHAR);
4027 : 96 : ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
4028 : 96 : }
4029 : : /* A table of interesting location_t values, giving one axis of our test
4030 : : matrix. */
4031 : :
4032 : : static const location_t boundary_locations[] = {
4033 : : /* Zero means "don't override the default values for a new line_table". */
4034 : : 0,
4035 : :
4036 : : /* An arbitrary non-zero value that isn't close to one of
4037 : : the boundary values below. */
4038 : : 0x10000,
4039 : :
4040 : : /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */
4041 : : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
4042 : : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
4043 : : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
4044 : : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
4045 : : LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
4046 : :
4047 : : /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */
4048 : : LINE_MAP_MAX_LOCATION_WITH_COLS - 0x200,
4049 : : LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
4050 : : LINE_MAP_MAX_LOCATION_WITH_COLS,
4051 : : LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
4052 : : LINE_MAP_MAX_LOCATION_WITH_COLS + 0x200,
4053 : : };
4054 : :
4055 : : /* Run TESTCASE multiple times, once for each case in our test matrix. */
4056 : :
4057 : : void
4058 : 244 : for_each_line_table_case (void (*testcase) (const line_table_case &))
4059 : : {
4060 : : /* As noted above in the description of struct line_table_case,
4061 : : we want to explore a test matrix of interesting line_table
4062 : : situations, running various selftests for each case within the
4063 : : matrix. */
4064 : :
4065 : : /* Run all tests with:
4066 : : (a) line_table->default_range_bits == 0, and
4067 : : (b) line_table->default_range_bits == line_map_suggested_range_bits. */
4068 : :
4069 : 732 : for (int default_range_bits: {0, line_map_suggested_range_bits})
4070 : : {
4071 : : /* ...and use each of the "interesting" location values as
4072 : : the starting location within line_table. */
4073 : 488 : const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
4074 : 6344 : for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
4075 : : {
4076 : 5856 : line_table_case c (default_range_bits, boundary_locations[loc_idx]);
4077 : 5856 : testcase (c);
4078 : : }
4079 : : }
4080 : 244 : }
4081 : :
4082 : : /* Verify that when presented with a consecutive pair of locations with
4083 : : a very large line offset, we don't attempt to consolidate them into
4084 : : a single ordinary linemap where the line offsets within the line map
4085 : : would lead to overflow (PR lto/88147). */
4086 : :
4087 : : static void
4088 : 4 : test_line_offset_overflow ()
4089 : : {
4090 : 4 : line_table_test ltt (line_table_case (5, 0));
4091 : :
4092 : 4 : linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
4093 : 4 : linemap_line_start (line_table, 1, 100);
4094 : 4 : location_t loc_a = linemap_line_start (line_table, 2578, 255);
4095 : 4 : assert_loceq ("foo.c", 2578, 0, loc_a);
4096 : :
4097 : 4 : const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
4098 : 4 : ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
4099 : 4 : ASSERT_EQ (ordmap_a->m_range_bits, 5);
4100 : :
4101 : 4 : location_t loc_b = linemap_line_start (line_table, 404198, 512);
4102 : 4 : assert_loceq ("foo.c", 404198, 0, loc_b);
4103 : :
4104 : : /* We should have started a new linemap, rather than attempting to store
4105 : : a very large line offset. */
4106 : 4 : const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
4107 : 4 : ASSERT_NE (ordmap_a, ordmap_b);
4108 : 4 : }
4109 : :
4110 : 4 : void test_cpp_utf8 ()
4111 : : {
4112 : 4 : const int def_tabstop = 8;
4113 : 4 : cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
4114 : :
4115 : : /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */
4116 : 4 : {
4117 : 4 : int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
4118 : 4 : ASSERT_EQ (8, w_bad);
4119 : 4 : int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
4120 : 4 : ASSERT_EQ (5, w_ctrl);
4121 : : }
4122 : :
4123 : : /* Verify that wcwidth of valid UTF-8 is as expected. */
4124 : 4 : {
4125 : 4 : const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
4126 : 4 : ASSERT_EQ (1, w_pi);
4127 : 4 : const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
4128 : 4 : ASSERT_EQ (2, w_emoji);
4129 : 4 : const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
4130 : : policy);
4131 : 4 : ASSERT_EQ (1, w_umlaut_precomposed);
4132 : 4 : const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
4133 : : policy);
4134 : 4 : ASSERT_EQ (1, w_umlaut_combining);
4135 : 4 : const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
4136 : 4 : ASSERT_EQ (2, w_han);
4137 : 4 : const int w_ascii = cpp_display_width ("GCC", 3, policy);
4138 : 4 : ASSERT_EQ (3, w_ascii);
4139 : 4 : const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
4140 : : "\x9f! \xe4\xb8\xba y\xcc\x88",
4141 : : 24, policy);
4142 : 4 : ASSERT_EQ (18, w_mixed);
4143 : : }
4144 : :
4145 : : /* Verify that display width properly expands tabs. */
4146 : 4 : {
4147 : 4 : const char *tstr = "\tabc\td";
4148 : 4 : ASSERT_EQ (6, cpp_display_width (tstr, 6,
4149 : : cpp_char_column_policy (1, cpp_wcwidth)));
4150 : 4 : ASSERT_EQ (10, cpp_display_width (tstr, 6,
4151 : : cpp_char_column_policy (3, cpp_wcwidth)));
4152 : 4 : ASSERT_EQ (17, cpp_display_width (tstr, 6,
4153 : : cpp_char_column_policy (8, cpp_wcwidth)));
4154 : 4 : ASSERT_EQ (1,
4155 : : cpp_display_column_to_byte_column
4156 : : (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
4157 : : }
4158 : :
4159 : : /* Verify that cpp_byte_column_to_display_column can go past the end,
4160 : : and similar edge cases. */
4161 : 4 : {
4162 : 4 : const char *str
4163 : : /* Display columns.
4164 : : 111111112345 */
4165 : : = "\xcf\x80 abc";
4166 : : /* 111122223456
4167 : : Byte columns. */
4168 : :
4169 : 4 : ASSERT_EQ (5, cpp_display_width (str, 6, policy));
4170 : 4 : ASSERT_EQ (105,
4171 : : cpp_byte_column_to_display_column (str, 6, 106, policy));
4172 : 4 : ASSERT_EQ (10000,
4173 : : cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
4174 : 4 : ASSERT_EQ (0,
4175 : : cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
4176 : : }
4177 : :
4178 : : /* Verify that cpp_display_column_to_byte_column can go past the end,
4179 : : and similar edge cases, and check invertibility. */
4180 : 4 : {
4181 : 4 : const char *str
4182 : : /* Display columns.
4183 : : 000000000000000000000000000000000000011
4184 : : 111111112222222234444444455555555678901 */
4185 : : = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
4186 : : /* 000000000000000000000000000000000111111
4187 : : 111122223333444456666777788889999012345
4188 : : Byte columns. */
4189 : 4 : ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
4190 : 4 : ASSERT_EQ (15,
4191 : : cpp_display_column_to_byte_column (str, 15, 11, policy));
4192 : 4 : ASSERT_EQ (115,
4193 : : cpp_display_column_to_byte_column (str, 15, 111, policy));
4194 : 4 : ASSERT_EQ (10000,
4195 : : cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
4196 : 4 : ASSERT_EQ (0,
4197 : : cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
4198 : :
4199 : : /* Verify that we do not interrupt a UTF-8 sequence. */
4200 : 4 : ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
4201 : :
4202 : 64 : for (int byte_col = 1; byte_col <= 15; ++byte_col)
4203 : : {
4204 : 60 : const int disp_col
4205 : 60 : = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
4206 : 60 : const int byte_col2
4207 : 60 : = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
4208 : :
4209 : : /* If we ask for the display column in the middle of a UTF-8
4210 : : sequence, it will return the length of the partial sequence,
4211 : : matching the behavior of GCC before display column support.
4212 : : Otherwise check the round trip was successful. */
4213 : 60 : if (byte_col < 4)
4214 : 12 : ASSERT_EQ (byte_col, disp_col);
4215 : 48 : else if (byte_col >= 6 && byte_col < 9)
4216 : 12 : ASSERT_EQ (3 + (byte_col - 5), disp_col);
4217 : : else
4218 : 60 : ASSERT_EQ (byte_col2, byte_col);
4219 : : }
4220 : : }
4221 : 4 : }
4222 : :
4223 : : static bool
4224 : 36 : check_cpp_valid_utf8_p (const char *str)
4225 : : {
4226 : 36 : return cpp_valid_utf8_p (str, strlen (str));
4227 : : }
4228 : :
4229 : : /* Check that cpp_valid_utf8_p works as expected. */
4230 : :
4231 : : static void
4232 : 4 : test_cpp_valid_utf8_p ()
4233 : : {
4234 : 4 : ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
4235 : :
4236 : : /* 2-byte char (pi). */
4237 : 4 : ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
4238 : :
4239 : : /* 3-byte chars (the Japanese word "mojibake"). */
4240 : 4 : ASSERT_TRUE (check_cpp_valid_utf8_p
4241 : : (
4242 : : /* U+6587 CJK UNIFIED IDEOGRAPH-6587
4243 : : UTF-8: 0xE6 0x96 0x87
4244 : : C octal escaped UTF-8: \346\226\207. */
4245 : : "\346\226\207"
4246 : : /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
4247 : : UTF-8: 0xE5 0xAD 0x97
4248 : : C octal escaped UTF-8: \345\255\227. */
4249 : : "\345\255\227"
4250 : : /* U+5316 CJK UNIFIED IDEOGRAPH-5316
4251 : : UTF-8: 0xE5 0x8C 0x96
4252 : : C octal escaped UTF-8: \345\214\226. */
4253 : : "\345\214\226"
4254 : : /* U+3051 HIRAGANA LETTER KE
4255 : : UTF-8: 0xE3 0x81 0x91
4256 : : C octal escaped UTF-8: \343\201\221. */
4257 : : "\343\201\221"));
4258 : :
4259 : : /* 4-byte char: an emoji. */
4260 : 4 : ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
4261 : :
4262 : : /* Control codes, including the NUL byte. */
4263 : 4 : ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
4264 : :
4265 : 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
4266 : :
4267 : : /* Unexpected continuation bytes. */
4268 : 4 : for (unsigned char continuation_byte = 0x80;
4269 : 260 : continuation_byte <= 0xbf;
4270 : : continuation_byte++)
4271 : 256 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1));
4272 : :
4273 : : /* "Lonely start characters" for 2-byte sequences. */
4274 : 4 : {
4275 : 4 : unsigned char buf[2];
4276 : 4 : buf[1] = ' ';
4277 : 4 : for (buf[0] = 0xc0;
4278 : 132 : buf[0] <= 0xdf;
4279 : 128 : buf[0]++)
4280 : 128 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4281 : : }
4282 : :
4283 : : /* "Lonely start characters" for 3-byte sequences. */
4284 : 4 : {
4285 : 4 : unsigned char buf[2];
4286 : 4 : buf[1] = ' ';
4287 : 4 : for (buf[0] = 0xe0;
4288 : 68 : buf[0] <= 0xef;
4289 : 64 : buf[0]++)
4290 : 64 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4291 : : }
4292 : :
4293 : : /* "Lonely start characters" for 4-byte sequences. */
4294 : 4 : {
4295 : 4 : unsigned char buf[2];
4296 : 4 : buf[1] = ' ';
4297 : 4 : for (buf[0] = 0xf0;
4298 : 24 : buf[0] <= 0xf4;
4299 : 20 : buf[0]++)
4300 : 20 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4301 : : }
4302 : :
4303 : : /* Invalid start characters (formerly valid for 5-byte and 6-byte
4304 : : sequences). */
4305 : 4 : {
4306 : 4 : unsigned char buf[2];
4307 : 4 : buf[1] = ' ';
4308 : 4 : for (buf[0] = 0xf5;
4309 : 40 : buf[0] <= 0xfd;
4310 : 36 : buf[0]++)
4311 : 36 : ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
4312 : : }
4313 : :
4314 : : /* Impossible bytes. */
4315 : 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
4316 : 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
4317 : 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
4318 : 4 : ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
4319 : 4 : }
4320 : :
4321 : : /* Run all of the selftests within this file. */
4322 : :
4323 : : void
4324 : 4 : input_cc_tests ()
4325 : : {
4326 : 4 : test_linenum_comparisons ();
4327 : 4 : test_should_have_column_data_p ();
4328 : 4 : test_unknown_location ();
4329 : 4 : test_builtins ();
4330 : 4 : for_each_line_table_case (test_make_location_nonpure_range_endpoints);
4331 : :
4332 : 4 : for_each_line_table_case (test_accessing_ordinary_linemaps);
4333 : 4 : for_each_line_table_case (test_lexer);
4334 : 4 : for_each_line_table_case (test_lexer_string_locations_simple);
4335 : 4 : for_each_line_table_case (test_lexer_string_locations_ebcdic);
4336 : 4 : for_each_line_table_case (test_lexer_string_locations_hex);
4337 : 4 : for_each_line_table_case (test_lexer_string_locations_oct);
4338 : 4 : for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
4339 : 4 : for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
4340 : 4 : for_each_line_table_case (test_lexer_string_locations_ucn4);
4341 : 4 : for_each_line_table_case (test_lexer_string_locations_ucn8);
4342 : 4 : for_each_line_table_case (test_lexer_string_locations_wide_string);
4343 : 4 : for_each_line_table_case (test_lexer_string_locations_string16);
4344 : 4 : for_each_line_table_case (test_lexer_string_locations_string32);
4345 : 4 : for_each_line_table_case (test_lexer_string_locations_u8);
4346 : 4 : for_each_line_table_case (test_lexer_string_locations_utf8_source);
4347 : 4 : for_each_line_table_case (test_lexer_string_locations_concatenation_1);
4348 : 4 : for_each_line_table_case (test_lexer_string_locations_concatenation_2);
4349 : 4 : for_each_line_table_case (test_lexer_string_locations_concatenation_3);
4350 : 4 : for_each_line_table_case (test_lexer_string_locations_macro);
4351 : 4 : for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
4352 : 4 : for_each_line_table_case (test_lexer_string_locations_non_string);
4353 : 4 : for_each_line_table_case (test_lexer_string_locations_long_line);
4354 : 4 : for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
4355 : 4 : for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
4356 : 4 : for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
4357 : 4 : for_each_line_table_case (test_lexer_char_constants);
4358 : :
4359 : 4 : test_reading_source_line ();
4360 : 4 : test_reading_source_buffer ();
4361 : 4 : test_replacement ();
4362 : :
4363 : 4 : test_line_offset_overflow ();
4364 : :
4365 : 4 : test_cpp_utf8 ();
4366 : 4 : test_cpp_valid_utf8_p ();
4367 : 4 : }
4368 : :
4369 : : } // namespace selftest
4370 : :
4371 : : #endif /* CHECKING_P */
|