LCOV - code coverage report
Current view: top level - gcc/diagnostics - file-cache.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 84.9 % 445 378
Test Date: 2026-02-28 14:20:25 Functions: 87.5 % 40 35
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Caching input files for use by diagnostics.
       2              :    Copyright (C) 2004-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it under
       7              : the terms of the GNU General Public License as published by the Free
       8              : Software Foundation; either version 3, or (at your option) any later
       9              : version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "cpplib.h"
      24              : #include "diagnostics/file-cache.h"
      25              : #include "diagnostics/dumping.h"
      26              : #include "selftest.h"
      27              : 
      28              : #ifndef HAVE_ICONV
      29              : #define HAVE_ICONV 0
      30              : #endif
      31              : 
      32              : namespace diagnostics {
      33              : 
      34              : /* Input charset configuration.  */
      35        26495 : static const char *default_charset_callback (const char *)
      36              : {
      37        26495 :   return nullptr;
      38              : }
      39              : 
      40              : void
      41       919508 : file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
      42              :                                       bool should_skip_bom)
      43              : {
      44       919508 :   m_input_context.ccb = (ccb ? ccb : default_charset_callback);
      45       919508 :   m_input_context.should_skip_bom = should_skip_bom;
      46       919508 : }
      47              : 
      48              : /* This is a cache used by get_next_line to store the content of a
      49              :    file to be searched for file lines.  */
      50              : class file_cache_slot
      51              : {
      52              : public:
      53              :   file_cache_slot ();
      54              :   ~file_cache_slot ();
      55              : 
      56              :   void dump (FILE *out, int indent) const;
      57            0 :   void DEBUG_FUNCTION dump () const { dump (stderr, 0); }
      58              : 
      59              :   bool read_line_num (size_t line_num,
      60              :                       char ** line, ssize_t *line_len);
      61              : 
      62              :   /* Accessors.  */
      63     28452141 :   const char *get_file_path () const { return m_file_path; }
      64       344650 :   unsigned get_use_count () const { return m_use_count; }
      65         3489 :   bool missing_trailing_newline_p () const
      66              :   {
      67         3489 :     return m_missing_trailing_newline;
      68              :   }
      69              :   char_span get_full_file_content ();
      70              : 
      71      3255584 :   void inc_use_count () { m_use_count++; }
      72              : 
      73              :   bool create (const file_cache::input_context &in_context,
      74              :                const char *file_path, FILE *fp, unsigned highest_use_count);
      75              :   void evict ();
      76              :   void set_content (const char *buf, size_t sz);
      77              : 
      78       285722 :   static size_t tune (size_t line_record_size_)
      79              :   {
      80       285722 :     size_t ret = line_record_size;
      81       285722 :     line_record_size = line_record_size_;
      82       285722 :     return ret;
      83              :   }
      84              : 
      85              :  private:
      86              :   /* These are information used to store a line boundary.  */
      87              :   class line_info
      88              :   {
      89              :   public:
      90              :     /* The line number.  It starts from 1.  */
      91              :     size_t line_num;
      92              : 
      93              :     /* The position (byte count) of the beginning of the line,
      94              :        relative to the file data pointer.  This starts at zero.  */
      95              :     size_t start_pos;
      96              : 
      97              :     /* The position (byte count) of the last byte of the line.  This
      98              :        normally points to the '\n' character, or to one byte after the
      99              :        last byte of the file, if the file doesn't contain a '\n'
     100              :        character.  */
     101              :     size_t end_pos;
     102              : 
     103   2926314443 :     line_info (size_t l, size_t s, size_t e)
     104   2909287276 :       : line_num (l), start_pos (s), end_pos (e)
     105              :     {}
     106              : 
     107              :     line_info ()
     108              :       :line_num (0), start_pos (0), end_pos (0)
     109              :     {}
     110              : 
     111        52650 :     static bool less_than(const line_info &a, const line_info &b)
     112              :     {
     113        52650 :       return a.line_num < b.line_num;
     114              :     }
     115              :   };
     116              : 
     117              :   bool needs_read_p () const;
     118              :   bool needs_grow_p () const;
     119              :   void maybe_grow ();
     120              :   bool read_data ();
     121              :   bool maybe_read_data ();
     122              :   bool get_next_line (char **line, ssize_t *line_len);
     123              :   bool read_next_line (char ** line, ssize_t *line_len);
     124              :   bool goto_next_line ();
     125              : 
     126              :   static const size_t buffer_size = 4 * 1024;
     127              :   static size_t line_record_size;
     128              :   static size_t recent_cached_lines_shift;
     129              : 
     130              :   /* The number of time this file has been accessed.  This is used
     131              :      to designate which file cache to evict from the cache
     132              :      array.  */
     133              :   unsigned m_use_count;
     134              : 
     135              :   /* The file_path is the key for identifying a particular file in
     136              :      the cache.  This copy is owned by the slot.  */
     137              :   char *m_file_path;
     138              : 
     139              :   FILE *m_fp;
     140              : 
     141              :   /* True when an read error happened.  */
     142              :   bool m_error;
     143              : 
     144              :   /* This points to the content of the file that we've read so
     145              :      far.  */
     146              :   char *m_data;
     147              : 
     148              :   /* The allocated buffer to be freed may start a little earlier than DATA,
     149              :      e.g. if a UTF8 BOM was skipped at the beginning.  */
     150              :   int m_alloc_offset;
     151              : 
     152              :   /*  The size of the DATA array above.*/
     153              :   size_t m_size;
     154              : 
     155              :   /* The number of bytes read from the underlying file so far.  This
     156              :      must be less (or equal) than SIZE above.  */
     157              :   size_t m_nb_read;
     158              : 
     159              :   /* The index of the beginning of the current line.  */
     160              :   size_t m_line_start_idx;
     161              : 
     162              :   /* The number of the previous line read.  This starts at 1.  Zero
     163              :      means we've read no line so far.  */
     164              :   size_t m_line_num;
     165              : 
     166              :   /* Could this file be missing a trailing newline on its final line?
     167              :      Initially true (to cope with empty files), set to true/false
     168              :      as each line is read.  */
     169              :   bool m_missing_trailing_newline;
     170              : 
     171              :   /* This is a record of the beginning and end of the lines we've seen
     172              :      while reading the file.  This is useful to avoid walking the data
     173              :      from the beginning when we are asked to read a line that is
     174              :      before LINE_START_IDX above.  When the lines exceed line_record_size
     175              :      this is scaled down dynamically, with the line_info becoming anchors.  */
     176              :   vec<line_info, va_heap> m_line_record;
     177              : 
     178              :   /* A cache of the recently seen lines. This is maintained as a ring
     179              :      buffer. */
     180              :   vec<line_info, va_heap> m_line_recent;
     181              : 
     182              :   /* First and last valid entry in m_line_recent.  */
     183              :   size_t m_line_recent_last, m_line_recent_first;
     184              : 
     185       254866 :   void offset_buffer (int offset)
     186              :   {
     187       254866 :     gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
     188              :                 : (size_t) offset <= m_size);
     189       254866 :     gcc_assert (m_data);
     190       254866 :     m_alloc_offset += offset;
     191       254866 :     m_data += offset;
     192       254866 :     m_size -= offset;
     193       254866 :   }
     194              : 
     195              : };
     196              : 
     197              : size_t file_cache_slot::line_record_size = 0;
     198              : size_t file_cache_slot::recent_cached_lines_shift = 8;
     199              : 
     200              : /* Tune file_cache.  */
     201              : void
     202       285722 : file_cache::tune (size_t num_file_slots, size_t lines)
     203              : {
     204       285722 :   if (file_cache_slot::tune (lines) != lines
     205       285722 :       || m_num_file_slots != num_file_slots)
     206              :     {
     207           17 :       delete[] m_file_slots;
     208          181 :       m_file_slots = new file_cache_slot[num_file_slots];
     209              :     }
     210       285722 :   m_num_file_slots = num_file_slots;
     211       285722 : }
     212              : 
     213              : static const char *
     214              : find_end_of_line (const char *s, size_t len);
     215              : 
     216              : /* Lookup the cache used for the content of a given file accessed by
     217              :    caret diagnostic.  Return the found cached file, or NULL if no
     218              :    cached file was found.  */
     219              : 
     220              : file_cache_slot *
     221      1747602 : file_cache::lookup_file (const char *file_path)
     222              : {
     223      1747602 :   gcc_assert (file_path);
     224              : 
     225              :   /* This will contain the found cached file.  */
     226              :   file_cache_slot *r = NULL;
     227     29709398 :   for (unsigned i = 0; i < m_num_file_slots; ++i)
     228              :     {
     229     27961796 :       file_cache_slot *c = &m_file_slots[i];
     230     27961796 :       if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
     231              :         {
     232      1627792 :           c->inc_use_count ();
     233      1627792 :           r = c;
     234              :         }
     235              :     }
     236              : 
     237      1747602 :   if (r)
     238      1627792 :     r->inc_use_count ();
     239              : 
     240      1747602 :   return r;
     241              : }
     242              : 
     243              : /* Purge any mention of FILENAME from the cache of files used for
     244              :    printing source code.  For use in selftests when working
     245              :    with tempfiles.  */
     246              : 
     247              : void
     248          576 : file_cache::forcibly_evict_file (const char *file_path)
     249              : {
     250          576 :   gcc_assert (file_path);
     251              : 
     252          576 :   file_cache_slot *r = lookup_file (file_path);
     253          576 :   if (!r)
     254              :     /* Not found.  */
     255              :     return;
     256              : 
     257            0 :   r->evict ();
     258              : }
     259              : 
     260              : /* Determine if FILE_PATH missing a trailing newline on its final line.
     261              :    Only valid to call once all of the file has been loaded, by
     262              :    requesting a line number beyond the end of the file.  */
     263              : 
     264              : bool
     265         3489 : file_cache::missing_trailing_newline_p (const char *file_path)
     266              : {
     267         3489 :   gcc_assert (file_path);
     268              : 
     269         3489 :   file_cache_slot *r = lookup_or_add_file (file_path);
     270         3489 :   return r->missing_trailing_newline_p ();
     271              : }
     272              : 
     273              : void
     274            4 : file_cache::add_buffered_content (const char *file_path,
     275              :                                   const char *buffer,
     276              :                                   size_t sz)
     277              : {
     278            4 :   gcc_assert (file_path);
     279              : 
     280            4 :   file_cache_slot *r = lookup_file (file_path);
     281            4 :   if (!r)
     282              :     {
     283            4 :       unsigned highest_use_count = 0;
     284            4 :       r = evicted_cache_tab_entry (&highest_use_count);
     285            4 :       if (!r->create (m_input_context, file_path, nullptr, highest_use_count))
     286            0 :         return;
     287              :     }
     288              : 
     289            4 :   r->set_content (buffer, sz);
     290              : }
     291              : 
     292              : void
     293            0 : file_cache_slot::evict ()
     294              : {
     295            0 :   free (m_file_path);
     296            0 :   m_file_path = NULL;
     297            0 :   if (m_fp)
     298            0 :     fclose (m_fp);
     299            0 :   m_error = false;
     300            0 :   m_fp = NULL;
     301            0 :   m_nb_read = 0;
     302            0 :   m_line_start_idx = 0;
     303            0 :   m_line_num = 0;
     304            0 :   m_line_record.truncate (0);
     305            0 :   m_line_recent_first = 0;
     306            0 :   m_line_recent_last = 0;
     307            0 :   m_use_count = 0;
     308            0 :   m_missing_trailing_newline = true;
     309            0 : }
     310              : 
     311              : /* Return the file cache that has been less used, recently, or the
     312              :    first empty one.  If HIGHEST_USE_COUNT is non-null,
     313              :    *HIGHEST_USE_COUNT is set to the highest use count of the entries
     314              :    in the cache table.  */
     315              : 
     316              : file_cache_slot*
     317        88891 : file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
     318              : {
     319        88891 :   file_cache_slot *to_evict = &m_file_slots[0];
     320        88891 :   unsigned huc = to_evict->get_use_count ();
     321       262630 :   for (unsigned i = 1; i < m_num_file_slots; ++i)
     322              :     {
     323       255759 :       file_cache_slot *c = &m_file_slots[i];
     324       255759 :       bool c_is_empty = (c->get_file_path () == NULL);
     325              : 
     326       255759 :       if (c->get_use_count () < to_evict->get_use_count ()
     327       255759 :           || (to_evict->get_file_path () && c_is_empty))
     328              :         /* We evict C because it's either an entry with a lower use
     329              :            count or one that is empty.  */
     330              :         to_evict = c;
     331              : 
     332       255759 :       if (huc < c->get_use_count ())
     333              :         huc = c->get_use_count ();
     334              : 
     335       255759 :       if (c_is_empty)
     336              :         /* We've reached the end of the cache; subsequent elements are
     337              :            all empty.  */
     338              :         break;
     339              :     }
     340              : 
     341        88891 :   if (highest_use_count)
     342        88891 :     *highest_use_count = huc;
     343              : 
     344        88891 :   return to_evict;
     345              : }
     346              : 
     347              : /* Create the cache used for the content of a given file to be
     348              :    accessed by caret diagnostic.  This cache is added to an array of
     349              :    cache and can be retrieved by lookup_file_in_cache_tab.  This
     350              :    function returns the created cache.  Note that only the last
     351              :    m_num_file_slots files are cached.
     352              : 
     353              :    This can return nullptr if the FILE_PATH can't be opened for
     354              :    reading, or if the content can't be converted to the input_charset.  */
     355              : 
     356              : file_cache_slot*
     357       119230 : file_cache::add_file (const char *file_path)
     358              : {
     359              : 
     360       119230 :   FILE *fp = fopen (file_path, "r");
     361       119230 :   if (fp == NULL)
     362              :     return NULL;
     363              : 
     364        88887 :   unsigned highest_use_count = 0;
     365        88887 :   file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
     366        88887 :   if (!r->create (m_input_context, file_path, fp, highest_use_count))
     367              :     return NULL;
     368              :   return r;
     369              : }
     370              : 
     371              : /* Get a borrowed char_span to the full content of this file
     372              :    as decoded according to the input charset, encoded as UTF-8.  */
     373              : 
     374              : char_span
     375          237 : file_cache_slot::get_full_file_content ()
     376              : {
     377          237 :   char *line;
     378          237 :   ssize_t line_len;
     379         2018 :   while (get_next_line (&line, &line_len))
     380              :     {
     381              :     }
     382          237 :   return char_span (m_data, m_nb_read);
     383              : }
     384              : 
     385              : /* Populate this slot for use on FILE_PATH and FP, dropping any
     386              :    existing cached content within it.  */
     387              : 
     388              : bool
     389        88891 : file_cache_slot::create (const file_cache::input_context &in_context,
     390              :                          const char *file_path, FILE *fp,
     391              :                          unsigned highest_use_count)
     392              : {
     393        88891 :   m_file_path = file_path ? xstrdup (file_path) : nullptr;
     394        88891 :   if (m_fp)
     395         6871 :     fclose (m_fp);
     396        88891 :   m_error = false;
     397        88891 :   m_fp = fp;
     398        88891 :   if (m_alloc_offset)
     399            0 :     offset_buffer (-m_alloc_offset);
     400        88891 :   m_nb_read = 0;
     401        88891 :   m_line_start_idx = 0;
     402        88891 :   m_line_num = 0;
     403        88891 :   m_line_recent_first = 0;
     404        88891 :   m_line_recent_last = 0;
     405        88891 :   m_line_record.truncate (0);
     406              :   /* Ensure that this cache entry doesn't get evicted next time
     407              :      add_file_to_cache_tab is called.  */
     408        88891 :   m_use_count = ++highest_use_count;
     409        88891 :   m_missing_trailing_newline = true;
     410              : 
     411              : 
     412              :   /* Check the input configuration to determine if we need to do any
     413              :      transformations, such as charset conversion or BOM skipping.  */
     414        88891 :   if (const char *input_charset = in_context.ccb (file_path))
     415              :     {
     416              :       /* Need a full-blown conversion of the input charset.  */
     417            5 :       fclose (m_fp);
     418            5 :       m_fp = NULL;
     419            5 :       const cpp_converted_source cs
     420            5 :         = cpp_get_converted_source (file_path, input_charset);
     421            5 :       if (!cs.data)
     422            0 :         return false;
     423            5 :       if (m_data)
     424            0 :         XDELETEVEC (m_data);
     425            5 :       m_data = cs.data;
     426            5 :       m_nb_read = m_size = cs.len;
     427            5 :       m_alloc_offset = cs.data - cs.to_free;
     428              :     }
     429        88886 :   else if (in_context.should_skip_bom)
     430              :     {
     431        62956 :       if (read_data ())
     432              :         {
     433        62956 :           const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
     434        62956 :           offset_buffer (offset);
     435        62956 :           m_nb_read -= offset;
     436              :         }
     437              :     }
     438              : 
     439              :   return true;
     440              : }
     441              : 
     442              : void
     443            4 : file_cache_slot::set_content (const char *buf, size_t sz)
     444              : {
     445            4 :   m_data = (char *)xmalloc (sz);
     446            4 :   memcpy (m_data, buf, sz);
     447            4 :   m_nb_read = m_size = sz;
     448            4 :   m_alloc_offset = 0;
     449              : 
     450            4 :   if (m_fp)
     451              :     {
     452            0 :       fclose (m_fp);
     453            0 :       m_fp = nullptr;
     454              :     }
     455            4 : }
     456              : 
     457              : /* file_cache's ctor.  */
     458              : 
     459       709973 : file_cache::file_cache ()
     460     12069541 : : m_num_file_slots (16), m_file_slots (new file_cache_slot[m_num_file_slots])
     461              : {
     462       709973 :   initialize_input_context (nullptr, false);
     463       709973 : }
     464              : 
     465              : /* file_cache's dtor.  */
     466              : 
     467       309632 : file_cache::~file_cache ()
     468              : {
     469      5263908 :   delete[] m_file_slots;
     470       309632 : }
     471              : 
     472              : void
     473            0 : file_cache::dump (FILE *out, int indent) const
     474              : {
     475            0 :   for (size_t i = 0; i < m_num_file_slots; ++i)
     476              :     {
     477            0 :       dumping::emit_indent (out, indent);
     478            0 :       fprintf (out, "slot[%i]:\n", (int)i);
     479            0 :       m_file_slots[i].dump (out, indent + 2);
     480              :     }
     481            0 : }
     482              : 
     483              : void
     484            0 : file_cache::dump () const
     485              : {
     486            0 :   dump (stderr, 0);
     487            0 : }
     488              : 
     489              : /* Lookup the cache used for the content of a given file accessed by
     490              :    caret diagnostic.  If no cached file was found, create a new cache
     491              :    for this file, add it to the array of cached file and return
     492              :    it.
     493              : 
     494              :    This can return nullptr on a cache miss if FILE_PATH can't be opened for
     495              :    reading, or if the content can't be converted to the input_charset.  */
     496              : 
     497              : file_cache_slot*
     498      1747022 : file_cache::lookup_or_add_file (const char *file_path)
     499              : {
     500      1747022 :   file_cache_slot *r = lookup_file (file_path);
     501      1747022 :   if (r == NULL)
     502       119230 :     r = add_file (file_path);
     503      1747022 :   return r;
     504              : }
     505              : 
     506              : /* Default constructor for a cache of file used by caret
     507              :    diagnostic.  */
     508              : 
     509     11359748 : file_cache_slot::file_cache_slot ()
     510     11359748 : : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_error (false), m_data (0),
     511     11359748 :   m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
     512     11359748 :   m_line_num (0), m_missing_trailing_newline (true),
     513     11359748 :   m_line_recent_last (0), m_line_recent_first (0)
     514              : {
     515     11359748 :   m_line_record.create (0);
     516     11359748 :   m_line_recent.create (1U << recent_cached_lines_shift);
     517   2919455236 :   for (int i = 0; i < 1 << recent_cached_lines_shift; i++)
     518   2908095488 :     m_line_recent.quick_push (file_cache_slot::line_info (0, 0, 0));
     519     11359748 : }
     520              : 
     521              : /* Destructor for a cache of file used by caret diagnostic.  */
     522              : 
     523      4954292 : file_cache_slot::~file_cache_slot ()
     524              : {
     525      4954292 :   free (m_file_path);
     526      4954292 :   if (m_fp)
     527              :     {
     528        80417 :       fclose (m_fp);
     529        80417 :       m_fp = NULL;
     530              :     }
     531      4954292 :   if (m_data)
     532              :     {
     533        80426 :       offset_buffer (-m_alloc_offset);
     534        80426 :       XDELETEVEC (m_data);
     535        80426 :       m_data = 0;
     536              :     }
     537      4954292 :   m_line_record.release ();
     538      4954292 :   m_line_recent.release ();
     539      4954292 : }
     540              : 
     541              : void
     542            0 : file_cache_slot::dump (FILE *out, int indent) const
     543              : {
     544            0 :   if (!m_file_path)
     545              :     {
     546            0 :       dumping::emit_indent (out, indent);
     547            0 :       fprintf (out, "(unused)\n");
     548            0 :       return;
     549              :     }
     550            0 :   dumping::emit_string_field (out, indent, "file_path", m_file_path);
     551            0 :   {
     552            0 :     dumping::emit_indent (out, indent);
     553            0 :     fprintf (out, "fp: %p\n", (void *)m_fp);
     554              :   }
     555            0 :   dumping::emit_bool_field (out, indent, "needs_read_p", needs_read_p ());
     556            0 :   dumping::emit_bool_field (out, indent, "needs_grow_p", needs_grow_p ());
     557            0 :   dumping::emit_unsigned_field (out, indent, "use_count", m_use_count);
     558            0 :   dumping::emit_size_t_field (out, indent, "size", m_size);
     559            0 :   dumping::emit_size_t_field (out, indent, "nb_read", m_nb_read);
     560            0 :   dumping::emit_size_t_field (out, indent, "start_line_idx", m_line_start_idx);
     561            0 :   dumping::emit_size_t_field (out, indent, "line_num", m_line_num);
     562            0 :   dumping::emit_bool_field (out, indent, "missing_trailing_newline",
     563            0 :                             m_missing_trailing_newline);
     564            0 :   {
     565            0 :     dumping::emit_indent (out, indent);
     566            0 :     fprintf (out, "line records (%i):\n", m_line_record.length ());
     567              :   }
     568            0 :   int idx = 0;
     569            0 :   for (auto &line : m_line_record)
     570              :     {
     571            0 :       dumping::emit_indent (out, indent);
     572            0 :       fprintf (out, ("[%i]:"
     573              :                      " line " HOST_SIZE_T_PRINT_DEC ":"
     574              :                      " byte offsets: " HOST_SIZE_T_PRINT_DEC
     575              :                      "-" HOST_SIZE_T_PRINT_DEC "\n"),
     576            0 :                idx++, line.line_num, line.start_pos, line.end_pos);
     577              :     }
     578              : }
     579              : 
     580              : /* Returns TRUE iff the cache would need to be filled with data coming
     581              :    from the file.  That is, either the cache is empty or full or the
     582              :    current line is empty.  Note that if the cache is full, it would
     583              :    need to be extended and filled again.  */
     584              : 
     585              : bool
     586     28530732 : file_cache_slot::needs_read_p () const
     587              : {
     588     28530732 :   return m_fp && (m_nb_read == 0
     589     28504682 :           || m_nb_read == m_size
     590     28448940 :           || (m_line_start_idx >= m_nb_read - 1));
     591              : }
     592              : 
     593              : /*  Return TRUE iff the cache is full and thus needs to be
     594              :     extended.  */
     595              : 
     596              : bool
     597       144624 : file_cache_slot::needs_grow_p () const
     598              : {
     599       144624 :   return m_nb_read == m_size;
     600              : }
     601              : 
     602              : /* Grow the cache if it needs to be extended.  */
     603              : 
     604              : void
     605       144624 : file_cache_slot::maybe_grow ()
     606              : {
     607       144624 :   if (!needs_grow_p ())
     608              :     return;
     609              : 
     610       137753 :   if (!m_data)
     611              :     {
     612        82011 :       gcc_assert (m_size == 0 && m_alloc_offset == 0);
     613        82011 :       m_size = buffer_size;
     614        82011 :       m_data = XNEWVEC (char, m_size);
     615              :     }
     616              :   else
     617              :     {
     618        55742 :       const int offset = m_alloc_offset;
     619        55742 :       offset_buffer (-offset);
     620        55742 :       m_size *= 2;
     621        55742 :       m_data = XRESIZEVEC (char, m_data, m_size);
     622        55742 :       offset_buffer (offset);
     623              :     }
     624              : }
     625              : 
     626              : /*  Read more data into the cache.  Extends the cache if need be.
     627              :     Returns TRUE iff new data could be read.  */
     628              : 
     629              : bool
     630       149425 : file_cache_slot::read_data ()
     631              : {
     632       149425 :   if (feof (m_fp) || ferror (m_fp))
     633         4801 :     return false;
     634              : 
     635       144624 :   maybe_grow ();
     636              : 
     637       144624 :   char * from = m_data + m_nb_read;
     638       144624 :   size_t to_read = m_size - m_nb_read;
     639       144624 :   size_t nb_read = fread (from, 1, to_read, m_fp);
     640              : 
     641       144624 :   if (ferror (m_fp))
     642              :     {
     643            0 :       m_error = true;
     644            0 :       return false;
     645              :     }
     646              : 
     647       144624 :   m_nb_read += nb_read;
     648       144624 :   return !!nb_read;
     649              : }
     650              : 
     651              : /* Read new data iff the cache needs to be filled with more data
     652              :    coming from the file FP.  Return TRUE iff the cache was filled with
     653              :    mode data.  */
     654              : 
     655              : bool
     656     28530732 : file_cache_slot::maybe_read_data ()
     657              : {
     658     28530732 :   if (!needs_read_p ())
     659              :     return false;
     660        86469 :   return read_data ();
     661              : }
     662              : 
     663              : /* Helper function for file_cache_slot::get_next_line (), to find the end of
     664              :    the next line.  Returns with the memchr convention, i.e. nullptr if a line
     665              :    terminator was not found.  We need to determine line endings in the same
     666              :    manner that libcpp does: any of \n, \r\n, or \r is a line ending.  */
     667              : 
     668              : static const char *
     669     28525858 : find_end_of_line (const char *s, size_t len)
     670              : {
     671   1040588014 :   for (const auto end = s + len; s != end; ++s)
     672              :     {
     673   1040580722 :       if (*s == '\n')
     674              :         return s;
     675   1012062809 :       if (*s == '\r')
     676              :         {
     677          653 :           const auto next = s + 1;
     678          653 :           if (next == end)
     679              :             {
     680              :               /* Don't find the line ending if \r is the very last character
     681              :                  in the buffer; we do not know if it's the end of the file or
     682              :                  just the end of what has been read so far, and we wouldn't
     683              :                  want to break in the middle of what's actually a \r\n
     684              :                  sequence.  Instead, we will handle the case of a file ending
     685              :                  in a \r later.  */
     686              :               break;
     687              :             }
     688          653 :           return (*next == '\n' ? next : s);
     689              :         }
     690              :     }
     691              :   return nullptr;
     692              : }
     693              : 
     694              : /* Read a new line from file FP, using C as a cache for the data
     695              :    coming from the file.  Upon successful completion, *LINE is set to
     696              :    the beginning of the line found.  *LINE points directly in the
     697              :    line cache and is only valid until the next call of get_next_line.
     698              :    *LINE_LEN is set to the length of the line.  Note that the line
     699              :    does not contain any terminal delimiter.  This function returns
     700              :    true if some data was read or process from the cache, false
     701              :    otherwise.  Note that subsequent calls to get_next_line might
     702              :    make the content of *LINE invalid.  */
     703              : 
     704              : bool
     705     28523440 : file_cache_slot::get_next_line (char **line, ssize_t *line_len)
     706              : {
     707              :   /* Fill the cache with data to process.  */
     708     28523440 :   maybe_read_data ();
     709              : 
     710     28523440 :   size_t remaining_size = m_nb_read - m_line_start_idx;
     711     28523440 :   if (remaining_size == 0)
     712              :     /* There is no more data to process.  */
     713              :     return false;
     714              : 
     715     28518636 :   const char *line_start = m_data + m_line_start_idx;
     716              : 
     717     28518636 :   const char *next_line_start = NULL;
     718     28518636 :   size_t len = 0;
     719     28518636 :   const char *line_end = find_end_of_line (line_start, remaining_size);
     720     28518636 :   if (line_end == NULL)
     721              :     {
     722              :       /* We haven't found an end-of-line delimiter in the cache.
     723              :          Fill the cache with more data from the file and look again.  */
     724         7292 :       while (maybe_read_data ())
     725              :         {
     726         7222 :           line_start = m_data + m_line_start_idx;
     727         7222 :           remaining_size = m_nb_read - m_line_start_idx;
     728         7222 :           line_end = find_end_of_line (line_start, remaining_size);
     729         7222 :           if (line_end != NULL)
     730              :             {
     731         3622 :               next_line_start = line_end + 1;
     732         3622 :               break;
     733              :             }
     734              :         }
     735         3692 :       if (line_end == NULL)
     736              :         {
     737              :           /* We've loaded all the file into the cache and still no
     738              :              terminator.  Let's say the line ends up at one byte past the
     739              :              end of the file.  This is to stay consistent with the case
     740              :              of when the line ends up with a terminator and line_end points to
     741              :              that.  That consistency is useful below in the len calculation.
     742              : 
     743              :              If the file ends in a \r, we didn't identify it as a line
     744              :              terminator above, so do that now instead.  */
     745           70 :           line_end = m_data + m_nb_read;
     746           70 :           if (m_nb_read && line_end[-1] == '\r')
     747              :             {
     748            0 :               --line_end;
     749            0 :               m_missing_trailing_newline = false;
     750              :             }
     751              :           else
     752           70 :             m_missing_trailing_newline = true;
     753              :         }
     754              :       else
     755         3622 :         m_missing_trailing_newline = false;
     756              :     }
     757              :   else
     758              :     {
     759     28514944 :       next_line_start = line_end + 1;
     760     28514944 :       m_missing_trailing_newline = false;
     761              :     }
     762              : 
     763     28518636 :   if (m_error)
     764              :     return false;
     765              : 
     766              :   /* At this point, we've found the end of the of line.  It either points to
     767              :      the line terminator or to one byte after the last byte of the file.  */
     768     28518636 :   gcc_assert (line_end != NULL);
     769              : 
     770     28518636 :   len = line_end - line_start;
     771              : 
     772     28518636 :   if (m_line_start_idx < m_nb_read)
     773     28518636 :     *line = const_cast<char *> (line_start);
     774              : 
     775     28518636 :   ++m_line_num;
     776              : 
     777              :   /* Now update our line record so that re-reading lines from the
     778              :      before m_line_start_idx is faster.  */
     779     28518636 :   size_t rlen = m_line_record.length ();
     780              :   /* Only update when beyond the previously cached region.  */
     781     25536629 :   if (rlen == 0 || m_line_record[rlen - 1].line_num < m_line_num)
     782              :     {
     783     17848614 :       size_t spacing
     784              :         = (rlen >= 2
     785     17848614 :            ? (m_line_record[rlen - 1].line_num
     786     11754890 :               - m_line_record[rlen - 2].line_num) : 1);
     787     17848614 :       size_t delta
     788     17848614 :         = rlen >= 1 ? m_line_num - m_line_record[rlen - 1].line_num : 1;
     789              : 
     790     17848614 :       size_t max_size = line_record_size;
     791              :       /* One anchor per hundred input lines.  */
     792     17848614 :       if (max_size == 0)
     793     17848614 :         max_size = m_line_num / 100;
     794              : 
     795              :       /* If we're too far beyond drop half of the lines to rebalance.  */
     796     17848614 :       if (rlen == max_size && delta >= spacing * 2)
     797              :         {
     798              :           size_t j = 0;
     799      1103404 :           for (size_t i = 1; i < rlen; i += 2)
     800        72823 :             m_line_record[j++] = m_line_record[i];
     801      1030581 :           m_line_record.truncate (j);
     802      1030581 :           rlen = j;
     803      1030581 :           spacing *= 2;
     804              :         }
     805              : 
     806     17848614 :       if (rlen < max_size && delta >= spacing)
     807              :         {
     808      1155225 :           file_cache_slot::line_info li (m_line_num, m_line_start_idx,
     809      1155225 :                                          line_end - m_data);
     810      1155225 :           m_line_record.safe_push (li);
     811              :         }
     812              :     }
     813              : 
     814              :   /* Cache recent tail lines separately for fast access. This assumes
     815              :      most accesses do not skip backwards.  */
     816     28518636 :   if (m_line_recent_last == m_line_recent_first
     817     28518636 :       || m_line_recent[m_line_recent_last].line_num == m_line_num - 1)
     818              :     {
     819     17027167 :       size_t mask = ((size_t) 1 << recent_cached_lines_shift) - 1;
     820     17027167 :       m_line_recent_last = (m_line_recent_last + 1) & mask;
     821     17027167 :       if (m_line_recent_last == m_line_recent_first)
     822     10658583 :         m_line_recent_first = (m_line_recent_first + 1) & mask;
     823     34054334 :       m_line_recent[m_line_recent_last]
     824     17027167 :         = file_cache_slot::line_info (m_line_num, m_line_start_idx,
     825     17027167 :                                       line_end - m_data);
     826              :     }
     827              : 
     828              :   /* Update m_line_start_idx so that it points to the next line to be
     829              :      read.  */
     830     28518636 :   if (next_line_start)
     831     28518566 :     m_line_start_idx = next_line_start - m_data;
     832              :   else
     833              :     /* We didn't find any terminal '\n'.  Let's consider that the end
     834              :        of line is the end of the data in the cache.  The next
     835              :        invocation of get_next_line will either read more data from the
     836              :        underlying file or return false early because we've reached the
     837              :        end of the file.  */
     838           70 :     m_line_start_idx = m_nb_read;
     839              : 
     840     28518636 :   *line_len = len;
     841              : 
     842     28518636 :   return true;
     843              : }
     844              : 
     845              : /* Consume the next bytes coming from the cache (or from its
     846              :    underlying file if there are remaining unread bytes in the file)
     847              :    until we reach the next end-of-line (or end-of-file).  There is no
     848              :    copying from the cache involved.  Return TRUE upon successful
     849              :    completion.  */
     850              : 
     851              : bool
     852     27983245 : file_cache_slot::goto_next_line ()
     853              : {
     854     27983245 :   char *l;
     855     27983245 :   ssize_t len;
     856              : 
     857     27983245 :   return get_next_line (&l, &len);
     858              : }
     859              : 
     860              : /* Read an arbitrary line number LINE_NUM from the file cached in C.
     861              :    If the line was read successfully, *LINE points to the beginning
     862              :    of the line in the file cache and *LINE_LEN is the length of the
     863              :    line.  *LINE is not nul-terminated, but may contain zero bytes.
     864              :    *LINE is only valid until the next call of read_line_num.
     865              :    This function returns bool if a line was read.  */
     866              : 
     867              : bool
     868      1712953 : file_cache_slot::read_line_num (size_t line_num,
     869              :                                 char ** line, ssize_t *line_len)
     870              : {
     871      1712953 :   gcc_assert (line_num > 0);
     872              : 
     873              :   /* Is the line in the recent line cache?
     874              :      This assumes the main file processing is only using
     875              :      a single contiguous cursor with only temporary excursions.  */
     876      1712953 :   if (m_line_recent_first != m_line_recent_last
     877      1624062 :         && m_line_recent[m_line_recent_first].line_num <= line_num
     878      3278659 :         && m_line_recent[m_line_recent_last].line_num >= line_num)
     879              :     {
     880      1173328 :       line_info &last = m_line_recent[m_line_recent_last];
     881      1173328 :       size_t mask = (1U << recent_cached_lines_shift) - 1;
     882      1173328 :       size_t idx = (m_line_recent_last - (last.line_num - line_num)) & mask;
     883      1173328 :       line_info &recent = m_line_recent[idx];
     884      1173328 :       gcc_assert (recent.line_num == line_num);
     885      1173328 :       *line = m_data + recent.start_pos;
     886      1173328 :       *line_len = recent.end_pos - recent.start_pos;
     887      1173328 :       return true;
     888              :     }
     889              : 
     890       539625 :   if (line_num <= m_line_num)
     891              :     {
     892        36563 :       line_info l (line_num, 0, 0);
     893        36563 :       int i = m_line_record.lower_bound (l, line_info::less_than);
     894        36563 :       if (i == 0)
     895              :         {
     896        28143 :           m_line_start_idx = 0;
     897        28143 :           m_line_num = 0;
     898              :         }
     899         8420 :       else if (m_line_record[i - 1].line_num == line_num)
     900              :         {
     901              :           /* We have the start/end of the line.  */
     902            0 :           *line = m_data + m_line_record[i - 1].start_pos;
     903            0 :           *line_len = m_line_record[i - 1].end_pos - m_line_record[i - 1].start_pos;
     904            0 :           return true;
     905              :         }
     906              :       else
     907              :        {
     908         8420 :          gcc_assert (m_line_record[i - 1].line_num < m_line_num);
     909         8420 :          m_line_start_idx = m_line_record[i - 1].start_pos;
     910         8420 :          m_line_num = m_line_record[i - 1].line_num - 1;
     911              :        }
     912              :     }
     913              : 
     914              :   /*  Let's walk from line m_line_num up to line_num - 1, without
     915              :       copying any line.  */
     916     28521422 :   while (m_line_num < line_num - 1)
     917     27983245 :     if (!goto_next_line ())
     918              :       return false;
     919              : 
     920              :   /* The line we want is the next one.  Let's read it.  */
     921       538177 :   return get_next_line (line, line_len);
     922              : }
     923              : 
     924              : /* Return the physical source line that corresponds to FILE_PATH/LINE.
     925              :    The line is not nul-terminated.  The returned pointer is only
     926              :    valid until the next call of location_get_source_line.
     927              :    Note that the line can contain several null characters,
     928              :    so the returned value's length has the actual length of the line.
     929              :    If the function fails, a NULL char_span is returned.  */
     930              : 
     931              : char_span
     932      1743315 : file_cache::get_source_line (const char *file_path, int line)
     933              : {
     934      1743315 :   char *buffer = NULL;
     935      1743315 :   ssize_t len;
     936              : 
     937      1743315 :   if (line == 0)
     938           23 :     return char_span (NULL, 0);
     939              : 
     940      1743292 :   if (file_path == NULL)
     941            0 :     return char_span (NULL, 0);
     942              : 
     943      1743292 :   file_cache_slot *c = lookup_or_add_file (file_path);
     944      1743292 :   if (c == NULL)
     945        30339 :     return char_span (NULL, 0);
     946              : 
     947      1712953 :   bool read = c->read_line_num (line, &buffer, &len);
     948      1712953 :   if (!read)
     949         4567 :     return char_span (NULL, 0);
     950              : 
     951      1708386 :   return char_span (buffer, len);
     952              : }
     953              : 
     954              : char_span
     955          241 : file_cache::get_source_file_content (const char *file_path)
     956              : {
     957          241 :   file_cache_slot *c = lookup_or_add_file (file_path);
     958          241 :   if (c == nullptr)
     959            4 :     return char_span (nullptr, 0);
     960          237 :   return c->get_full_file_content ();
     961              : }
     962              : 
     963              : #if CHECKING_P
     964              : 
     965              : namespace selftest {
     966              : 
     967              :  using temp_source_file = ::selftest::temp_source_file;
     968              : 
     969              : /* Verify reading of a specific line LINENUM in TMP, FC.  */
     970              : 
     971              : static void
     972        18600 : check_line (temp_source_file &tmp, file_cache &fc, int linenum)
     973              : {
     974        18600 :   char_span line = fc.get_source_line (tmp.get_filename (), linenum);
     975        18600 :   int n;
     976        18600 :   const char *b = line.get_buffer ();
     977        18600 :   size_t l = line.length ();
     978        18600 :   char buf[5];
     979        18600 :   ASSERT_LT (l, 5);
     980        18600 :   memcpy (buf, b, l);
     981        18600 :   buf[l] = '\0';
     982        18600 :   ASSERT_TRUE (sscanf (buf, "%d", &n) == 1);
     983        18600 :   ASSERT_EQ (n, linenum);
     984        18600 : }
     985              : 
     986              : /* Test file cache replacement.  */
     987              : 
     988              : static void
     989            4 : test_replacement ()
     990              : {
     991            4 :   const int maxline = 1000;
     992              : 
     993            4 :   char *vec = XNEWVEC (char, maxline * 5);
     994            4 :   char *p = vec;
     995            4 :   int i;
     996         4008 :   for (i = 1; i <= maxline; i++)
     997         4000 :     p += sprintf (p, "%d\n", i);
     998              : 
     999            4 :   temp_source_file tmp (SELFTEST_LOCATION, ".txt", vec);
    1000            4 :   free (vec);
    1001            4 :   file_cache fc;
    1002              : 
    1003         4004 :   for (i = 2; i <= maxline; i++)
    1004              :     {
    1005         3996 :       check_line (tmp, fc, i);
    1006         3996 :       check_line (tmp, fc, i - 1);
    1007         3996 :       if (i >= 10)
    1008         3964 :         check_line (tmp, fc, i - 9);
    1009         3964 :       if (i >= 350) /* Exceed the look behind cache.  */
    1010         2604 :         check_line (tmp, fc, i - 300);
    1011              :     }
    1012           44 :   for (i = 5; i <= maxline; i += 100)
    1013           40 :     check_line (tmp, fc, i);
    1014         4004 :   for (i = 1; i <= maxline; i++)
    1015         4000 :     check_line (tmp, fc, i);
    1016            4 : }
    1017              : 
    1018              : /* Verify reading of input files (e.g. for caret-based diagnostics).  */
    1019              : 
    1020              : static void
    1021            4 : test_reading_source_line ()
    1022              : {
    1023              :   /* Create a tempfile and write some text to it.  */
    1024            4 :   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
    1025              :                         "01234567890123456789\n"
    1026              :                         "This is the test text\n"
    1027            4 :                         "This is the 3rd line");
    1028            4 :   file_cache fc;
    1029              : 
    1030              :   /* Read back a specific line from the tempfile.  */
    1031            4 :   char_span source_line = fc.get_source_line (tmp.get_filename (), 3);
    1032            4 :   ASSERT_TRUE (source_line);
    1033            4 :   ASSERT_TRUE (source_line.get_buffer () != NULL);
    1034            4 :   ASSERT_EQ (20, source_line.length ());
    1035            4 :   ASSERT_TRUE (!strncmp ("This is the 3rd line",
    1036              :                          source_line.get_buffer (), source_line.length ()));
    1037              : 
    1038            4 :   source_line = fc.get_source_line (tmp.get_filename (), 2);
    1039            4 :   ASSERT_TRUE (source_line);
    1040            4 :   ASSERT_TRUE (source_line.get_buffer () != NULL);
    1041            4 :   ASSERT_EQ (21, source_line.length ());
    1042            4 :   ASSERT_TRUE (!strncmp ("This is the test text",
    1043              :                          source_line.get_buffer (), source_line.length ()));
    1044              : 
    1045            4 :   source_line = fc.get_source_line (tmp.get_filename (), 4);
    1046            4 :   ASSERT_FALSE (source_line);
    1047            4 :   ASSERT_TRUE (source_line.get_buffer () == NULL);
    1048            4 : }
    1049              : 
    1050              : /* Verify reading from buffers (e.g. for sarif-replay).  */
    1051              : 
    1052              : static void
    1053            4 : test_reading_source_buffer ()
    1054              : {
    1055            4 :   const char *text = ("01234567890123456789\n"
    1056              :                       "This is the test text\n"
    1057              :                       "This is the 3rd line");
    1058            4 :   const char *filename = "foo.txt";
    1059            4 :   file_cache fc;
    1060            4 :   fc.add_buffered_content (filename, text, strlen (text));
    1061              : 
    1062              :   /* Read back a specific line from the tempfile.  */
    1063            4 :   char_span source_line = fc.get_source_line (filename, 3);
    1064            4 :   ASSERT_TRUE (source_line);
    1065            4 :   ASSERT_TRUE (source_line.get_buffer () != NULL);
    1066            4 :   ASSERT_EQ (20, source_line.length ());
    1067            4 :   ASSERT_TRUE (!strncmp ("This is the 3rd line",
    1068              :                          source_line.get_buffer (), source_line.length ()));
    1069              : 
    1070            4 :   source_line = fc.get_source_line (filename, 2);
    1071            4 :   ASSERT_TRUE (source_line);
    1072            4 :   ASSERT_TRUE (source_line.get_buffer () != NULL);
    1073            4 :   ASSERT_EQ (21, source_line.length ());
    1074            4 :   ASSERT_TRUE (!strncmp ("This is the test text",
    1075              :                          source_line.get_buffer (), source_line.length ()));
    1076              : 
    1077            4 :   source_line = fc.get_source_line (filename, 4);
    1078            4 :   ASSERT_FALSE (source_line);
    1079            4 :   ASSERT_TRUE (source_line.get_buffer () == NULL);
    1080            4 : }
    1081              : 
    1082              : /* Run all of the selftests within this file.  */
    1083              : 
    1084              : void
    1085            4 : file_cache_cc_tests ()
    1086              : {
    1087            4 :   test_reading_source_line ();
    1088            4 :   test_reading_source_buffer ();
    1089            4 :   test_replacement ();
    1090            4 : }
    1091              : 
    1092              : } // namespace selftest
    1093              : 
    1094              : #endif /* CHECKING_P */
    1095              : 
    1096              : } // namespace diagnostics
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.