Line data Source code
1 : /* Caching input files for use by diagnostics.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "cpplib.h"
24 : #include "diagnostics/file-cache.h"
25 : #include "diagnostics/dumping.h"
26 : #include "selftest.h"
27 :
28 : #ifndef HAVE_ICONV
29 : #define HAVE_ICONV 0
30 : #endif
31 :
32 : namespace diagnostics {
33 :
34 : /* Input charset configuration. */
35 26495 : static const char *default_charset_callback (const char *)
36 : {
37 26495 : return nullptr;
38 : }
39 :
40 : void
41 919508 : file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
42 : bool should_skip_bom)
43 : {
44 919508 : m_input_context.ccb = (ccb ? ccb : default_charset_callback);
45 919508 : m_input_context.should_skip_bom = should_skip_bom;
46 919508 : }
47 :
48 : /* This is a cache used by get_next_line to store the content of a
49 : file to be searched for file lines. */
50 : class file_cache_slot
51 : {
52 : public:
53 : file_cache_slot ();
54 : ~file_cache_slot ();
55 :
56 : void dump (FILE *out, int indent) const;
57 0 : void DEBUG_FUNCTION dump () const { dump (stderr, 0); }
58 :
59 : bool read_line_num (size_t line_num,
60 : char ** line, ssize_t *line_len);
61 :
62 : /* Accessors. */
63 28452141 : const char *get_file_path () const { return m_file_path; }
64 344650 : unsigned get_use_count () const { return m_use_count; }
65 3489 : bool missing_trailing_newline_p () const
66 : {
67 3489 : return m_missing_trailing_newline;
68 : }
69 : char_span get_full_file_content ();
70 :
71 3255584 : void inc_use_count () { m_use_count++; }
72 :
73 : bool create (const file_cache::input_context &in_context,
74 : const char *file_path, FILE *fp, unsigned highest_use_count);
75 : void evict ();
76 : void set_content (const char *buf, size_t sz);
77 :
78 285722 : static size_t tune (size_t line_record_size_)
79 : {
80 285722 : size_t ret = line_record_size;
81 285722 : line_record_size = line_record_size_;
82 285722 : return ret;
83 : }
84 :
85 : private:
86 : /* These are information used to store a line boundary. */
87 : class line_info
88 : {
89 : public:
90 : /* The line number. It starts from 1. */
91 : size_t line_num;
92 :
93 : /* The position (byte count) of the beginning of the line,
94 : relative to the file data pointer. This starts at zero. */
95 : size_t start_pos;
96 :
97 : /* The position (byte count) of the last byte of the line. This
98 : normally points to the '\n' character, or to one byte after the
99 : last byte of the file, if the file doesn't contain a '\n'
100 : character. */
101 : size_t end_pos;
102 :
103 2926314443 : line_info (size_t l, size_t s, size_t e)
104 2909287276 : : line_num (l), start_pos (s), end_pos (e)
105 : {}
106 :
107 : line_info ()
108 : :line_num (0), start_pos (0), end_pos (0)
109 : {}
110 :
111 52650 : static bool less_than(const line_info &a, const line_info &b)
112 : {
113 52650 : return a.line_num < b.line_num;
114 : }
115 : };
116 :
117 : bool needs_read_p () const;
118 : bool needs_grow_p () const;
119 : void maybe_grow ();
120 : bool read_data ();
121 : bool maybe_read_data ();
122 : bool get_next_line (char **line, ssize_t *line_len);
123 : bool read_next_line (char ** line, ssize_t *line_len);
124 : bool goto_next_line ();
125 :
126 : static const size_t buffer_size = 4 * 1024;
127 : static size_t line_record_size;
128 : static size_t recent_cached_lines_shift;
129 :
130 : /* The number of time this file has been accessed. This is used
131 : to designate which file cache to evict from the cache
132 : array. */
133 : unsigned m_use_count;
134 :
135 : /* The file_path is the key for identifying a particular file in
136 : the cache. This copy is owned by the slot. */
137 : char *m_file_path;
138 :
139 : FILE *m_fp;
140 :
141 : /* True when an read error happened. */
142 : bool m_error;
143 :
144 : /* This points to the content of the file that we've read so
145 : far. */
146 : char *m_data;
147 :
148 : /* The allocated buffer to be freed may start a little earlier than DATA,
149 : e.g. if a UTF8 BOM was skipped at the beginning. */
150 : int m_alloc_offset;
151 :
152 : /* The size of the DATA array above.*/
153 : size_t m_size;
154 :
155 : /* The number of bytes read from the underlying file so far. This
156 : must be less (or equal) than SIZE above. */
157 : size_t m_nb_read;
158 :
159 : /* The index of the beginning of the current line. */
160 : size_t m_line_start_idx;
161 :
162 : /* The number of the previous line read. This starts at 1. Zero
163 : means we've read no line so far. */
164 : size_t m_line_num;
165 :
166 : /* Could this file be missing a trailing newline on its final line?
167 : Initially true (to cope with empty files), set to true/false
168 : as each line is read. */
169 : bool m_missing_trailing_newline;
170 :
171 : /* This is a record of the beginning and end of the lines we've seen
172 : while reading the file. This is useful to avoid walking the data
173 : from the beginning when we are asked to read a line that is
174 : before LINE_START_IDX above. When the lines exceed line_record_size
175 : this is scaled down dynamically, with the line_info becoming anchors. */
176 : vec<line_info, va_heap> m_line_record;
177 :
178 : /* A cache of the recently seen lines. This is maintained as a ring
179 : buffer. */
180 : vec<line_info, va_heap> m_line_recent;
181 :
182 : /* First and last valid entry in m_line_recent. */
183 : size_t m_line_recent_last, m_line_recent_first;
184 :
185 254866 : void offset_buffer (int offset)
186 : {
187 254866 : gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
188 : : (size_t) offset <= m_size);
189 254866 : gcc_assert (m_data);
190 254866 : m_alloc_offset += offset;
191 254866 : m_data += offset;
192 254866 : m_size -= offset;
193 254866 : }
194 :
195 : };
196 :
197 : size_t file_cache_slot::line_record_size = 0;
198 : size_t file_cache_slot::recent_cached_lines_shift = 8;
199 :
200 : /* Tune file_cache. */
201 : void
202 285722 : file_cache::tune (size_t num_file_slots, size_t lines)
203 : {
204 285722 : if (file_cache_slot::tune (lines) != lines
205 285722 : || m_num_file_slots != num_file_slots)
206 : {
207 17 : delete[] m_file_slots;
208 181 : m_file_slots = new file_cache_slot[num_file_slots];
209 : }
210 285722 : m_num_file_slots = num_file_slots;
211 285722 : }
212 :
213 : static const char *
214 : find_end_of_line (const char *s, size_t len);
215 :
216 : /* Lookup the cache used for the content of a given file accessed by
217 : caret diagnostic. Return the found cached file, or NULL if no
218 : cached file was found. */
219 :
220 : file_cache_slot *
221 1747602 : file_cache::lookup_file (const char *file_path)
222 : {
223 1747602 : gcc_assert (file_path);
224 :
225 : /* This will contain the found cached file. */
226 : file_cache_slot *r = NULL;
227 29709398 : for (unsigned i = 0; i < m_num_file_slots; ++i)
228 : {
229 27961796 : file_cache_slot *c = &m_file_slots[i];
230 27961796 : if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
231 : {
232 1627792 : c->inc_use_count ();
233 1627792 : r = c;
234 : }
235 : }
236 :
237 1747602 : if (r)
238 1627792 : r->inc_use_count ();
239 :
240 1747602 : return r;
241 : }
242 :
243 : /* Purge any mention of FILENAME from the cache of files used for
244 : printing source code. For use in selftests when working
245 : with tempfiles. */
246 :
247 : void
248 576 : file_cache::forcibly_evict_file (const char *file_path)
249 : {
250 576 : gcc_assert (file_path);
251 :
252 576 : file_cache_slot *r = lookup_file (file_path);
253 576 : if (!r)
254 : /* Not found. */
255 : return;
256 :
257 0 : r->evict ();
258 : }
259 :
260 : /* Determine if FILE_PATH missing a trailing newline on its final line.
261 : Only valid to call once all of the file has been loaded, by
262 : requesting a line number beyond the end of the file. */
263 :
264 : bool
265 3489 : file_cache::missing_trailing_newline_p (const char *file_path)
266 : {
267 3489 : gcc_assert (file_path);
268 :
269 3489 : file_cache_slot *r = lookup_or_add_file (file_path);
270 3489 : return r->missing_trailing_newline_p ();
271 : }
272 :
273 : void
274 4 : file_cache::add_buffered_content (const char *file_path,
275 : const char *buffer,
276 : size_t sz)
277 : {
278 4 : gcc_assert (file_path);
279 :
280 4 : file_cache_slot *r = lookup_file (file_path);
281 4 : if (!r)
282 : {
283 4 : unsigned highest_use_count = 0;
284 4 : r = evicted_cache_tab_entry (&highest_use_count);
285 4 : if (!r->create (m_input_context, file_path, nullptr, highest_use_count))
286 0 : return;
287 : }
288 :
289 4 : r->set_content (buffer, sz);
290 : }
291 :
292 : void
293 0 : file_cache_slot::evict ()
294 : {
295 0 : free (m_file_path);
296 0 : m_file_path = NULL;
297 0 : if (m_fp)
298 0 : fclose (m_fp);
299 0 : m_error = false;
300 0 : m_fp = NULL;
301 0 : m_nb_read = 0;
302 0 : m_line_start_idx = 0;
303 0 : m_line_num = 0;
304 0 : m_line_record.truncate (0);
305 0 : m_line_recent_first = 0;
306 0 : m_line_recent_last = 0;
307 0 : m_use_count = 0;
308 0 : m_missing_trailing_newline = true;
309 0 : }
310 :
311 : /* Return the file cache that has been less used, recently, or the
312 : first empty one. If HIGHEST_USE_COUNT is non-null,
313 : *HIGHEST_USE_COUNT is set to the highest use count of the entries
314 : in the cache table. */
315 :
316 : file_cache_slot*
317 88891 : file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
318 : {
319 88891 : file_cache_slot *to_evict = &m_file_slots[0];
320 88891 : unsigned huc = to_evict->get_use_count ();
321 262630 : for (unsigned i = 1; i < m_num_file_slots; ++i)
322 : {
323 255759 : file_cache_slot *c = &m_file_slots[i];
324 255759 : bool c_is_empty = (c->get_file_path () == NULL);
325 :
326 255759 : if (c->get_use_count () < to_evict->get_use_count ()
327 255759 : || (to_evict->get_file_path () && c_is_empty))
328 : /* We evict C because it's either an entry with a lower use
329 : count or one that is empty. */
330 : to_evict = c;
331 :
332 255759 : if (huc < c->get_use_count ())
333 : huc = c->get_use_count ();
334 :
335 255759 : if (c_is_empty)
336 : /* We've reached the end of the cache; subsequent elements are
337 : all empty. */
338 : break;
339 : }
340 :
341 88891 : if (highest_use_count)
342 88891 : *highest_use_count = huc;
343 :
344 88891 : return to_evict;
345 : }
346 :
347 : /* Create the cache used for the content of a given file to be
348 : accessed by caret diagnostic. This cache is added to an array of
349 : cache and can be retrieved by lookup_file_in_cache_tab. This
350 : function returns the created cache. Note that only the last
351 : m_num_file_slots files are cached.
352 :
353 : This can return nullptr if the FILE_PATH can't be opened for
354 : reading, or if the content can't be converted to the input_charset. */
355 :
356 : file_cache_slot*
357 119230 : file_cache::add_file (const char *file_path)
358 : {
359 :
360 119230 : FILE *fp = fopen (file_path, "r");
361 119230 : if (fp == NULL)
362 : return NULL;
363 :
364 88887 : unsigned highest_use_count = 0;
365 88887 : file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
366 88887 : if (!r->create (m_input_context, file_path, fp, highest_use_count))
367 : return NULL;
368 : return r;
369 : }
370 :
371 : /* Get a borrowed char_span to the full content of this file
372 : as decoded according to the input charset, encoded as UTF-8. */
373 :
374 : char_span
375 237 : file_cache_slot::get_full_file_content ()
376 : {
377 237 : char *line;
378 237 : ssize_t line_len;
379 2018 : while (get_next_line (&line, &line_len))
380 : {
381 : }
382 237 : return char_span (m_data, m_nb_read);
383 : }
384 :
385 : /* Populate this slot for use on FILE_PATH and FP, dropping any
386 : existing cached content within it. */
387 :
388 : bool
389 88891 : file_cache_slot::create (const file_cache::input_context &in_context,
390 : const char *file_path, FILE *fp,
391 : unsigned highest_use_count)
392 : {
393 88891 : m_file_path = file_path ? xstrdup (file_path) : nullptr;
394 88891 : if (m_fp)
395 6871 : fclose (m_fp);
396 88891 : m_error = false;
397 88891 : m_fp = fp;
398 88891 : if (m_alloc_offset)
399 0 : offset_buffer (-m_alloc_offset);
400 88891 : m_nb_read = 0;
401 88891 : m_line_start_idx = 0;
402 88891 : m_line_num = 0;
403 88891 : m_line_recent_first = 0;
404 88891 : m_line_recent_last = 0;
405 88891 : m_line_record.truncate (0);
406 : /* Ensure that this cache entry doesn't get evicted next time
407 : add_file_to_cache_tab is called. */
408 88891 : m_use_count = ++highest_use_count;
409 88891 : m_missing_trailing_newline = true;
410 :
411 :
412 : /* Check the input configuration to determine if we need to do any
413 : transformations, such as charset conversion or BOM skipping. */
414 88891 : if (const char *input_charset = in_context.ccb (file_path))
415 : {
416 : /* Need a full-blown conversion of the input charset. */
417 5 : fclose (m_fp);
418 5 : m_fp = NULL;
419 5 : const cpp_converted_source cs
420 5 : = cpp_get_converted_source (file_path, input_charset);
421 5 : if (!cs.data)
422 0 : return false;
423 5 : if (m_data)
424 0 : XDELETEVEC (m_data);
425 5 : m_data = cs.data;
426 5 : m_nb_read = m_size = cs.len;
427 5 : m_alloc_offset = cs.data - cs.to_free;
428 : }
429 88886 : else if (in_context.should_skip_bom)
430 : {
431 62956 : if (read_data ())
432 : {
433 62956 : const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
434 62956 : offset_buffer (offset);
435 62956 : m_nb_read -= offset;
436 : }
437 : }
438 :
439 : return true;
440 : }
441 :
442 : void
443 4 : file_cache_slot::set_content (const char *buf, size_t sz)
444 : {
445 4 : m_data = (char *)xmalloc (sz);
446 4 : memcpy (m_data, buf, sz);
447 4 : m_nb_read = m_size = sz;
448 4 : m_alloc_offset = 0;
449 :
450 4 : if (m_fp)
451 : {
452 0 : fclose (m_fp);
453 0 : m_fp = nullptr;
454 : }
455 4 : }
456 :
457 : /* file_cache's ctor. */
458 :
459 709973 : file_cache::file_cache ()
460 12069541 : : m_num_file_slots (16), m_file_slots (new file_cache_slot[m_num_file_slots])
461 : {
462 709973 : initialize_input_context (nullptr, false);
463 709973 : }
464 :
465 : /* file_cache's dtor. */
466 :
467 309632 : file_cache::~file_cache ()
468 : {
469 5263908 : delete[] m_file_slots;
470 309632 : }
471 :
472 : void
473 0 : file_cache::dump (FILE *out, int indent) const
474 : {
475 0 : for (size_t i = 0; i < m_num_file_slots; ++i)
476 : {
477 0 : dumping::emit_indent (out, indent);
478 0 : fprintf (out, "slot[%i]:\n", (int)i);
479 0 : m_file_slots[i].dump (out, indent + 2);
480 : }
481 0 : }
482 :
483 : void
484 0 : file_cache::dump () const
485 : {
486 0 : dump (stderr, 0);
487 0 : }
488 :
489 : /* Lookup the cache used for the content of a given file accessed by
490 : caret diagnostic. If no cached file was found, create a new cache
491 : for this file, add it to the array of cached file and return
492 : it.
493 :
494 : This can return nullptr on a cache miss if FILE_PATH can't be opened for
495 : reading, or if the content can't be converted to the input_charset. */
496 :
497 : file_cache_slot*
498 1747022 : file_cache::lookup_or_add_file (const char *file_path)
499 : {
500 1747022 : file_cache_slot *r = lookup_file (file_path);
501 1747022 : if (r == NULL)
502 119230 : r = add_file (file_path);
503 1747022 : return r;
504 : }
505 :
506 : /* Default constructor for a cache of file used by caret
507 : diagnostic. */
508 :
509 11359748 : file_cache_slot::file_cache_slot ()
510 11359748 : : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_error (false), m_data (0),
511 11359748 : m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
512 11359748 : m_line_num (0), m_missing_trailing_newline (true),
513 11359748 : m_line_recent_last (0), m_line_recent_first (0)
514 : {
515 11359748 : m_line_record.create (0);
516 11359748 : m_line_recent.create (1U << recent_cached_lines_shift);
517 2919455236 : for (int i = 0; i < 1 << recent_cached_lines_shift; i++)
518 2908095488 : m_line_recent.quick_push (file_cache_slot::line_info (0, 0, 0));
519 11359748 : }
520 :
521 : /* Destructor for a cache of file used by caret diagnostic. */
522 :
523 4954292 : file_cache_slot::~file_cache_slot ()
524 : {
525 4954292 : free (m_file_path);
526 4954292 : if (m_fp)
527 : {
528 80417 : fclose (m_fp);
529 80417 : m_fp = NULL;
530 : }
531 4954292 : if (m_data)
532 : {
533 80426 : offset_buffer (-m_alloc_offset);
534 80426 : XDELETEVEC (m_data);
535 80426 : m_data = 0;
536 : }
537 4954292 : m_line_record.release ();
538 4954292 : m_line_recent.release ();
539 4954292 : }
540 :
541 : void
542 0 : file_cache_slot::dump (FILE *out, int indent) const
543 : {
544 0 : if (!m_file_path)
545 : {
546 0 : dumping::emit_indent (out, indent);
547 0 : fprintf (out, "(unused)\n");
548 0 : return;
549 : }
550 0 : dumping::emit_string_field (out, indent, "file_path", m_file_path);
551 0 : {
552 0 : dumping::emit_indent (out, indent);
553 0 : fprintf (out, "fp: %p\n", (void *)m_fp);
554 : }
555 0 : dumping::emit_bool_field (out, indent, "needs_read_p", needs_read_p ());
556 0 : dumping::emit_bool_field (out, indent, "needs_grow_p", needs_grow_p ());
557 0 : dumping::emit_unsigned_field (out, indent, "use_count", m_use_count);
558 0 : dumping::emit_size_t_field (out, indent, "size", m_size);
559 0 : dumping::emit_size_t_field (out, indent, "nb_read", m_nb_read);
560 0 : dumping::emit_size_t_field (out, indent, "start_line_idx", m_line_start_idx);
561 0 : dumping::emit_size_t_field (out, indent, "line_num", m_line_num);
562 0 : dumping::emit_bool_field (out, indent, "missing_trailing_newline",
563 0 : m_missing_trailing_newline);
564 0 : {
565 0 : dumping::emit_indent (out, indent);
566 0 : fprintf (out, "line records (%i):\n", m_line_record.length ());
567 : }
568 0 : int idx = 0;
569 0 : for (auto &line : m_line_record)
570 : {
571 0 : dumping::emit_indent (out, indent);
572 0 : fprintf (out, ("[%i]:"
573 : " line " HOST_SIZE_T_PRINT_DEC ":"
574 : " byte offsets: " HOST_SIZE_T_PRINT_DEC
575 : "-" HOST_SIZE_T_PRINT_DEC "\n"),
576 0 : idx++, line.line_num, line.start_pos, line.end_pos);
577 : }
578 : }
579 :
580 : /* Returns TRUE iff the cache would need to be filled with data coming
581 : from the file. That is, either the cache is empty or full or the
582 : current line is empty. Note that if the cache is full, it would
583 : need to be extended and filled again. */
584 :
585 : bool
586 28530732 : file_cache_slot::needs_read_p () const
587 : {
588 28530732 : return m_fp && (m_nb_read == 0
589 28504682 : || m_nb_read == m_size
590 28448940 : || (m_line_start_idx >= m_nb_read - 1));
591 : }
592 :
593 : /* Return TRUE iff the cache is full and thus needs to be
594 : extended. */
595 :
596 : bool
597 144624 : file_cache_slot::needs_grow_p () const
598 : {
599 144624 : return m_nb_read == m_size;
600 : }
601 :
602 : /* Grow the cache if it needs to be extended. */
603 :
604 : void
605 144624 : file_cache_slot::maybe_grow ()
606 : {
607 144624 : if (!needs_grow_p ())
608 : return;
609 :
610 137753 : if (!m_data)
611 : {
612 82011 : gcc_assert (m_size == 0 && m_alloc_offset == 0);
613 82011 : m_size = buffer_size;
614 82011 : m_data = XNEWVEC (char, m_size);
615 : }
616 : else
617 : {
618 55742 : const int offset = m_alloc_offset;
619 55742 : offset_buffer (-offset);
620 55742 : m_size *= 2;
621 55742 : m_data = XRESIZEVEC (char, m_data, m_size);
622 55742 : offset_buffer (offset);
623 : }
624 : }
625 :
626 : /* Read more data into the cache. Extends the cache if need be.
627 : Returns TRUE iff new data could be read. */
628 :
629 : bool
630 149425 : file_cache_slot::read_data ()
631 : {
632 149425 : if (feof (m_fp) || ferror (m_fp))
633 4801 : return false;
634 :
635 144624 : maybe_grow ();
636 :
637 144624 : char * from = m_data + m_nb_read;
638 144624 : size_t to_read = m_size - m_nb_read;
639 144624 : size_t nb_read = fread (from, 1, to_read, m_fp);
640 :
641 144624 : if (ferror (m_fp))
642 : {
643 0 : m_error = true;
644 0 : return false;
645 : }
646 :
647 144624 : m_nb_read += nb_read;
648 144624 : return !!nb_read;
649 : }
650 :
651 : /* Read new data iff the cache needs to be filled with more data
652 : coming from the file FP. Return TRUE iff the cache was filled with
653 : mode data. */
654 :
655 : bool
656 28530732 : file_cache_slot::maybe_read_data ()
657 : {
658 28530732 : if (!needs_read_p ())
659 : return false;
660 86469 : return read_data ();
661 : }
662 :
663 : /* Helper function for file_cache_slot::get_next_line (), to find the end of
664 : the next line. Returns with the memchr convention, i.e. nullptr if a line
665 : terminator was not found. We need to determine line endings in the same
666 : manner that libcpp does: any of \n, \r\n, or \r is a line ending. */
667 :
668 : static const char *
669 28525858 : find_end_of_line (const char *s, size_t len)
670 : {
671 1040588014 : for (const auto end = s + len; s != end; ++s)
672 : {
673 1040580722 : if (*s == '\n')
674 : return s;
675 1012062809 : if (*s == '\r')
676 : {
677 653 : const auto next = s + 1;
678 653 : if (next == end)
679 : {
680 : /* Don't find the line ending if \r is the very last character
681 : in the buffer; we do not know if it's the end of the file or
682 : just the end of what has been read so far, and we wouldn't
683 : want to break in the middle of what's actually a \r\n
684 : sequence. Instead, we will handle the case of a file ending
685 : in a \r later. */
686 : break;
687 : }
688 653 : return (*next == '\n' ? next : s);
689 : }
690 : }
691 : return nullptr;
692 : }
693 :
694 : /* Read a new line from file FP, using C as a cache for the data
695 : coming from the file. Upon successful completion, *LINE is set to
696 : the beginning of the line found. *LINE points directly in the
697 : line cache and is only valid until the next call of get_next_line.
698 : *LINE_LEN is set to the length of the line. Note that the line
699 : does not contain any terminal delimiter. This function returns
700 : true if some data was read or process from the cache, false
701 : otherwise. Note that subsequent calls to get_next_line might
702 : make the content of *LINE invalid. */
703 :
704 : bool
705 28523440 : file_cache_slot::get_next_line (char **line, ssize_t *line_len)
706 : {
707 : /* Fill the cache with data to process. */
708 28523440 : maybe_read_data ();
709 :
710 28523440 : size_t remaining_size = m_nb_read - m_line_start_idx;
711 28523440 : if (remaining_size == 0)
712 : /* There is no more data to process. */
713 : return false;
714 :
715 28518636 : const char *line_start = m_data + m_line_start_idx;
716 :
717 28518636 : const char *next_line_start = NULL;
718 28518636 : size_t len = 0;
719 28518636 : const char *line_end = find_end_of_line (line_start, remaining_size);
720 28518636 : if (line_end == NULL)
721 : {
722 : /* We haven't found an end-of-line delimiter in the cache.
723 : Fill the cache with more data from the file and look again. */
724 7292 : while (maybe_read_data ())
725 : {
726 7222 : line_start = m_data + m_line_start_idx;
727 7222 : remaining_size = m_nb_read - m_line_start_idx;
728 7222 : line_end = find_end_of_line (line_start, remaining_size);
729 7222 : if (line_end != NULL)
730 : {
731 3622 : next_line_start = line_end + 1;
732 3622 : break;
733 : }
734 : }
735 3692 : if (line_end == NULL)
736 : {
737 : /* We've loaded all the file into the cache and still no
738 : terminator. Let's say the line ends up at one byte past the
739 : end of the file. This is to stay consistent with the case
740 : of when the line ends up with a terminator and line_end points to
741 : that. That consistency is useful below in the len calculation.
742 :
743 : If the file ends in a \r, we didn't identify it as a line
744 : terminator above, so do that now instead. */
745 70 : line_end = m_data + m_nb_read;
746 70 : if (m_nb_read && line_end[-1] == '\r')
747 : {
748 0 : --line_end;
749 0 : m_missing_trailing_newline = false;
750 : }
751 : else
752 70 : m_missing_trailing_newline = true;
753 : }
754 : else
755 3622 : m_missing_trailing_newline = false;
756 : }
757 : else
758 : {
759 28514944 : next_line_start = line_end + 1;
760 28514944 : m_missing_trailing_newline = false;
761 : }
762 :
763 28518636 : if (m_error)
764 : return false;
765 :
766 : /* At this point, we've found the end of the of line. It either points to
767 : the line terminator or to one byte after the last byte of the file. */
768 28518636 : gcc_assert (line_end != NULL);
769 :
770 28518636 : len = line_end - line_start;
771 :
772 28518636 : if (m_line_start_idx < m_nb_read)
773 28518636 : *line = const_cast<char *> (line_start);
774 :
775 28518636 : ++m_line_num;
776 :
777 : /* Now update our line record so that re-reading lines from the
778 : before m_line_start_idx is faster. */
779 28518636 : size_t rlen = m_line_record.length ();
780 : /* Only update when beyond the previously cached region. */
781 25536629 : if (rlen == 0 || m_line_record[rlen - 1].line_num < m_line_num)
782 : {
783 17848614 : size_t spacing
784 : = (rlen >= 2
785 17848614 : ? (m_line_record[rlen - 1].line_num
786 11754890 : - m_line_record[rlen - 2].line_num) : 1);
787 17848614 : size_t delta
788 17848614 : = rlen >= 1 ? m_line_num - m_line_record[rlen - 1].line_num : 1;
789 :
790 17848614 : size_t max_size = line_record_size;
791 : /* One anchor per hundred input lines. */
792 17848614 : if (max_size == 0)
793 17848614 : max_size = m_line_num / 100;
794 :
795 : /* If we're too far beyond drop half of the lines to rebalance. */
796 17848614 : if (rlen == max_size && delta >= spacing * 2)
797 : {
798 : size_t j = 0;
799 1103404 : for (size_t i = 1; i < rlen; i += 2)
800 72823 : m_line_record[j++] = m_line_record[i];
801 1030581 : m_line_record.truncate (j);
802 1030581 : rlen = j;
803 1030581 : spacing *= 2;
804 : }
805 :
806 17848614 : if (rlen < max_size && delta >= spacing)
807 : {
808 1155225 : file_cache_slot::line_info li (m_line_num, m_line_start_idx,
809 1155225 : line_end - m_data);
810 1155225 : m_line_record.safe_push (li);
811 : }
812 : }
813 :
814 : /* Cache recent tail lines separately for fast access. This assumes
815 : most accesses do not skip backwards. */
816 28518636 : if (m_line_recent_last == m_line_recent_first
817 28518636 : || m_line_recent[m_line_recent_last].line_num == m_line_num - 1)
818 : {
819 17027167 : size_t mask = ((size_t) 1 << recent_cached_lines_shift) - 1;
820 17027167 : m_line_recent_last = (m_line_recent_last + 1) & mask;
821 17027167 : if (m_line_recent_last == m_line_recent_first)
822 10658583 : m_line_recent_first = (m_line_recent_first + 1) & mask;
823 34054334 : m_line_recent[m_line_recent_last]
824 17027167 : = file_cache_slot::line_info (m_line_num, m_line_start_idx,
825 17027167 : line_end - m_data);
826 : }
827 :
828 : /* Update m_line_start_idx so that it points to the next line to be
829 : read. */
830 28518636 : if (next_line_start)
831 28518566 : m_line_start_idx = next_line_start - m_data;
832 : else
833 : /* We didn't find any terminal '\n'. Let's consider that the end
834 : of line is the end of the data in the cache. The next
835 : invocation of get_next_line will either read more data from the
836 : underlying file or return false early because we've reached the
837 : end of the file. */
838 70 : m_line_start_idx = m_nb_read;
839 :
840 28518636 : *line_len = len;
841 :
842 28518636 : return true;
843 : }
844 :
845 : /* Consume the next bytes coming from the cache (or from its
846 : underlying file if there are remaining unread bytes in the file)
847 : until we reach the next end-of-line (or end-of-file). There is no
848 : copying from the cache involved. Return TRUE upon successful
849 : completion. */
850 :
851 : bool
852 27983245 : file_cache_slot::goto_next_line ()
853 : {
854 27983245 : char *l;
855 27983245 : ssize_t len;
856 :
857 27983245 : return get_next_line (&l, &len);
858 : }
859 :
860 : /* Read an arbitrary line number LINE_NUM from the file cached in C.
861 : If the line was read successfully, *LINE points to the beginning
862 : of the line in the file cache and *LINE_LEN is the length of the
863 : line. *LINE is not nul-terminated, but may contain zero bytes.
864 : *LINE is only valid until the next call of read_line_num.
865 : This function returns bool if a line was read. */
866 :
867 : bool
868 1712953 : file_cache_slot::read_line_num (size_t line_num,
869 : char ** line, ssize_t *line_len)
870 : {
871 1712953 : gcc_assert (line_num > 0);
872 :
873 : /* Is the line in the recent line cache?
874 : This assumes the main file processing is only using
875 : a single contiguous cursor with only temporary excursions. */
876 1712953 : if (m_line_recent_first != m_line_recent_last
877 1624062 : && m_line_recent[m_line_recent_first].line_num <= line_num
878 3278659 : && m_line_recent[m_line_recent_last].line_num >= line_num)
879 : {
880 1173328 : line_info &last = m_line_recent[m_line_recent_last];
881 1173328 : size_t mask = (1U << recent_cached_lines_shift) - 1;
882 1173328 : size_t idx = (m_line_recent_last - (last.line_num - line_num)) & mask;
883 1173328 : line_info &recent = m_line_recent[idx];
884 1173328 : gcc_assert (recent.line_num == line_num);
885 1173328 : *line = m_data + recent.start_pos;
886 1173328 : *line_len = recent.end_pos - recent.start_pos;
887 1173328 : return true;
888 : }
889 :
890 539625 : if (line_num <= m_line_num)
891 : {
892 36563 : line_info l (line_num, 0, 0);
893 36563 : int i = m_line_record.lower_bound (l, line_info::less_than);
894 36563 : if (i == 0)
895 : {
896 28143 : m_line_start_idx = 0;
897 28143 : m_line_num = 0;
898 : }
899 8420 : else if (m_line_record[i - 1].line_num == line_num)
900 : {
901 : /* We have the start/end of the line. */
902 0 : *line = m_data + m_line_record[i - 1].start_pos;
903 0 : *line_len = m_line_record[i - 1].end_pos - m_line_record[i - 1].start_pos;
904 0 : return true;
905 : }
906 : else
907 : {
908 8420 : gcc_assert (m_line_record[i - 1].line_num < m_line_num);
909 8420 : m_line_start_idx = m_line_record[i - 1].start_pos;
910 8420 : m_line_num = m_line_record[i - 1].line_num - 1;
911 : }
912 : }
913 :
914 : /* Let's walk from line m_line_num up to line_num - 1, without
915 : copying any line. */
916 28521422 : while (m_line_num < line_num - 1)
917 27983245 : if (!goto_next_line ())
918 : return false;
919 :
920 : /* The line we want is the next one. Let's read it. */
921 538177 : return get_next_line (line, line_len);
922 : }
923 :
924 : /* Return the physical source line that corresponds to FILE_PATH/LINE.
925 : The line is not nul-terminated. The returned pointer is only
926 : valid until the next call of location_get_source_line.
927 : Note that the line can contain several null characters,
928 : so the returned value's length has the actual length of the line.
929 : If the function fails, a NULL char_span is returned. */
930 :
931 : char_span
932 1743315 : file_cache::get_source_line (const char *file_path, int line)
933 : {
934 1743315 : char *buffer = NULL;
935 1743315 : ssize_t len;
936 :
937 1743315 : if (line == 0)
938 23 : return char_span (NULL, 0);
939 :
940 1743292 : if (file_path == NULL)
941 0 : return char_span (NULL, 0);
942 :
943 1743292 : file_cache_slot *c = lookup_or_add_file (file_path);
944 1743292 : if (c == NULL)
945 30339 : return char_span (NULL, 0);
946 :
947 1712953 : bool read = c->read_line_num (line, &buffer, &len);
948 1712953 : if (!read)
949 4567 : return char_span (NULL, 0);
950 :
951 1708386 : return char_span (buffer, len);
952 : }
953 :
954 : char_span
955 241 : file_cache::get_source_file_content (const char *file_path)
956 : {
957 241 : file_cache_slot *c = lookup_or_add_file (file_path);
958 241 : if (c == nullptr)
959 4 : return char_span (nullptr, 0);
960 237 : return c->get_full_file_content ();
961 : }
962 :
963 : #if CHECKING_P
964 :
965 : namespace selftest {
966 :
967 : using temp_source_file = ::selftest::temp_source_file;
968 :
969 : /* Verify reading of a specific line LINENUM in TMP, FC. */
970 :
971 : static void
972 18600 : check_line (temp_source_file &tmp, file_cache &fc, int linenum)
973 : {
974 18600 : char_span line = fc.get_source_line (tmp.get_filename (), linenum);
975 18600 : int n;
976 18600 : const char *b = line.get_buffer ();
977 18600 : size_t l = line.length ();
978 18600 : char buf[5];
979 18600 : ASSERT_LT (l, 5);
980 18600 : memcpy (buf, b, l);
981 18600 : buf[l] = '\0';
982 18600 : ASSERT_TRUE (sscanf (buf, "%d", &n) == 1);
983 18600 : ASSERT_EQ (n, linenum);
984 18600 : }
985 :
986 : /* Test file cache replacement. */
987 :
988 : static void
989 4 : test_replacement ()
990 : {
991 4 : const int maxline = 1000;
992 :
993 4 : char *vec = XNEWVEC (char, maxline * 5);
994 4 : char *p = vec;
995 4 : int i;
996 4008 : for (i = 1; i <= maxline; i++)
997 4000 : p += sprintf (p, "%d\n", i);
998 :
999 4 : temp_source_file tmp (SELFTEST_LOCATION, ".txt", vec);
1000 4 : free (vec);
1001 4 : file_cache fc;
1002 :
1003 4004 : for (i = 2; i <= maxline; i++)
1004 : {
1005 3996 : check_line (tmp, fc, i);
1006 3996 : check_line (tmp, fc, i - 1);
1007 3996 : if (i >= 10)
1008 3964 : check_line (tmp, fc, i - 9);
1009 3964 : if (i >= 350) /* Exceed the look behind cache. */
1010 2604 : check_line (tmp, fc, i - 300);
1011 : }
1012 44 : for (i = 5; i <= maxline; i += 100)
1013 40 : check_line (tmp, fc, i);
1014 4004 : for (i = 1; i <= maxline; i++)
1015 4000 : check_line (tmp, fc, i);
1016 4 : }
1017 :
1018 : /* Verify reading of input files (e.g. for caret-based diagnostics). */
1019 :
1020 : static void
1021 4 : test_reading_source_line ()
1022 : {
1023 : /* Create a tempfile and write some text to it. */
1024 4 : temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1025 : "01234567890123456789\n"
1026 : "This is the test text\n"
1027 4 : "This is the 3rd line");
1028 4 : file_cache fc;
1029 :
1030 : /* Read back a specific line from the tempfile. */
1031 4 : char_span source_line = fc.get_source_line (tmp.get_filename (), 3);
1032 4 : ASSERT_TRUE (source_line);
1033 4 : ASSERT_TRUE (source_line.get_buffer () != NULL);
1034 4 : ASSERT_EQ (20, source_line.length ());
1035 4 : ASSERT_TRUE (!strncmp ("This is the 3rd line",
1036 : source_line.get_buffer (), source_line.length ()));
1037 :
1038 4 : source_line = fc.get_source_line (tmp.get_filename (), 2);
1039 4 : ASSERT_TRUE (source_line);
1040 4 : ASSERT_TRUE (source_line.get_buffer () != NULL);
1041 4 : ASSERT_EQ (21, source_line.length ());
1042 4 : ASSERT_TRUE (!strncmp ("This is the test text",
1043 : source_line.get_buffer (), source_line.length ()));
1044 :
1045 4 : source_line = fc.get_source_line (tmp.get_filename (), 4);
1046 4 : ASSERT_FALSE (source_line);
1047 4 : ASSERT_TRUE (source_line.get_buffer () == NULL);
1048 4 : }
1049 :
1050 : /* Verify reading from buffers (e.g. for sarif-replay). */
1051 :
1052 : static void
1053 4 : test_reading_source_buffer ()
1054 : {
1055 4 : const char *text = ("01234567890123456789\n"
1056 : "This is the test text\n"
1057 : "This is the 3rd line");
1058 4 : const char *filename = "foo.txt";
1059 4 : file_cache fc;
1060 4 : fc.add_buffered_content (filename, text, strlen (text));
1061 :
1062 : /* Read back a specific line from the tempfile. */
1063 4 : char_span source_line = fc.get_source_line (filename, 3);
1064 4 : ASSERT_TRUE (source_line);
1065 4 : ASSERT_TRUE (source_line.get_buffer () != NULL);
1066 4 : ASSERT_EQ (20, source_line.length ());
1067 4 : ASSERT_TRUE (!strncmp ("This is the 3rd line",
1068 : source_line.get_buffer (), source_line.length ()));
1069 :
1070 4 : source_line = fc.get_source_line (filename, 2);
1071 4 : ASSERT_TRUE (source_line);
1072 4 : ASSERT_TRUE (source_line.get_buffer () != NULL);
1073 4 : ASSERT_EQ (21, source_line.length ());
1074 4 : ASSERT_TRUE (!strncmp ("This is the test text",
1075 : source_line.get_buffer (), source_line.length ()));
1076 :
1077 4 : source_line = fc.get_source_line (filename, 4);
1078 4 : ASSERT_FALSE (source_line);
1079 4 : ASSERT_TRUE (source_line.get_buffer () == NULL);
1080 4 : }
1081 :
1082 : /* Run all of the selftests within this file. */
1083 :
1084 : void
1085 4 : file_cache_cc_tests ()
1086 : {
1087 4 : test_reading_source_line ();
1088 4 : test_reading_source_buffer ();
1089 4 : test_replacement ();
1090 4 : }
1091 :
1092 : } // namespace selftest
1093 :
1094 : #endif /* CHECKING_P */
1095 :
1096 : } // namespace diagnostics
|