LCOV - code coverage report
Current view: top level - gcc - input.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.4 % 1640 1450
Test Date: 2024-04-13 14:00:49 Functions: 91.7 % 121 111
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: - 0 0

             Branch data     Line data    Source code
       1                 :             : /* Data and functions related to line maps and input files.
       2                 :             :    Copyright (C) 2004-2024 Free Software Foundation, Inc.
       3                 :             : 
       4                 :             : This file is part of GCC.
       5                 :             : 
       6                 :             : GCC is free software; you can redistribute it and/or modify it under
       7                 :             : the terms of the GNU General Public License as published by the Free
       8                 :             : Software Foundation; either version 3, or (at your option) any later
       9                 :             : version.
      10                 :             : 
      11                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14                 :             : for more details.
      15                 :             : 
      16                 :             : You should have received a copy of the GNU General Public License
      17                 :             : along with GCC; see the file COPYING3.  If not see
      18                 :             : <http://www.gnu.org/licenses/>.  */
      19                 :             : 
      20                 :             : #include "config.h"
      21                 :             : #include "system.h"
      22                 :             : #include "coretypes.h"
      23                 :             : #include "intl.h"
      24                 :             : #include "diagnostic.h"
      25                 :             : #include "selftest.h"
      26                 :             : #include "cpplib.h"
      27                 :             : 
      28                 :             : #ifndef HAVE_ICONV
      29                 :             : #define HAVE_ICONV 0
      30                 :             : #endif
      31                 :             : 
      32                 :             : const char *
      33                 :     7473728 : special_fname_builtin ()
      34                 :             : {
      35                 :     7473728 :   return _("<built-in>");
      36                 :             : }
      37                 :             : 
      38                 :             : /* Input charset configuration.  */
      39                 :       86304 : static const char *default_charset_callback (const char *)
      40                 :             : {
      41                 :       86304 :   return nullptr;
      42                 :             : }
      43                 :             : 
      44                 :             : void
      45                 :      975570 : file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
      46                 :             :                                       bool should_skip_bom)
      47                 :             : {
      48                 :      975570 :   in_context.ccb = (ccb ? ccb : default_charset_callback);
      49                 :      975570 :   in_context.should_skip_bom = should_skip_bom;
      50                 :      975570 : }
      51                 :             : 
      52                 :             : /* This is a cache used by get_next_line to store the content of a
      53                 :             :    file to be searched for file lines.  */
      54                 :             : class file_cache_slot
      55                 :             : {
      56                 :             : public:
      57                 :             :   file_cache_slot ();
      58                 :             :   ~file_cache_slot ();
      59                 :             : 
      60                 :             :   bool read_line_num (size_t line_num,
      61                 :             :                       char ** line, ssize_t *line_len);
      62                 :             : 
      63                 :             :   /* Accessors.  */
      64                 :    29403882 :   const char *get_file_path () const { return m_file_path; }
      65                 :      446137 :   unsigned get_use_count () const { return m_use_count; }
      66                 :        9610 :   bool missing_trailing_newline_p () const
      67                 :             :   {
      68                 :        9610 :     return m_missing_trailing_newline;
      69                 :             :   }
      70                 :             :   char_span get_full_file_content ();
      71                 :             : 
      72                 :     3251030 :   void inc_use_count () { m_use_count++; }
      73                 :             : 
      74                 :             :   bool create (const file_cache::input_context &in_context,
      75                 :             :                const char *file_path, FILE *fp, unsigned highest_use_count);
      76                 :             :   void evict ();
      77                 :             : 
      78                 :             :  private:
      79                 :             :   /* These are information used to store a line boundary.  */
      80                 :             :   class line_info
      81                 :             :   {
      82                 :             :   public:
      83                 :             :     /* The line number.  It starts from 1.  */
      84                 :             :     size_t line_num;
      85                 :             : 
      86                 :             :     /* The position (byte count) of the beginning of the line,
      87                 :             :        relative to the file data pointer.  This starts at zero.  */
      88                 :             :     size_t start_pos;
      89                 :             : 
      90                 :             :     /* The position (byte count) of the last byte of the line.  This
      91                 :             :        normally points to the '\n' character, or to one byte after the
      92                 :             :        last byte of the file, if the file doesn't contain a '\n'
      93                 :             :        character.  */
      94                 :             :     size_t end_pos;
      95                 :             : 
      96                 :     2846416 :     line_info (size_t l, size_t s, size_t e)
      97                 :     2846416 :       : line_num (l), start_pos (s), end_pos (e)
      98                 :             :     {}
      99                 :             : 
     100                 :             :     line_info ()
     101                 :             :       :line_num (0), start_pos (0), end_pos (0)
     102                 :             :     {}
     103                 :             :   };
     104                 :             : 
     105                 :             :   bool needs_read_p () const;
     106                 :             :   bool needs_grow_p () const;
     107                 :             :   void maybe_grow ();
     108                 :             :   bool read_data ();
     109                 :             :   bool maybe_read_data ();
     110                 :             :   bool get_next_line (char **line, ssize_t *line_len);
     111                 :             :   bool read_next_line (char ** line, ssize_t *line_len);
     112                 :             :   bool goto_next_line ();
     113                 :             : 
     114                 :             :   static const size_t buffer_size = 4 * 1024;
     115                 :             :   static const size_t line_record_size = 100;
     116                 :             : 
     117                 :             :   /* The number of time this file has been accessed.  This is used
     118                 :             :      to designate which file cache to evict from the cache
     119                 :             :      array.  */
     120                 :             :   unsigned m_use_count;
     121                 :             : 
     122                 :             :   /* The file_path is the key for identifying a particular file in
     123                 :             :      the cache.
     124                 :             :      For libcpp-using code, the underlying buffer for this field is
     125                 :             :      owned by the corresponding _cpp_file within the cpp_reader.  */
     126                 :             :   const char *m_file_path;
     127                 :             : 
     128                 :             :   FILE *m_fp;
     129                 :             : 
     130                 :             :   /* This points to the content of the file that we've read so
     131                 :             :      far.  */
     132                 :             :   char *m_data;
     133                 :             : 
     134                 :             :   /* The allocated buffer to be freed may start a little earlier than DATA,
     135                 :             :      e.g. if a UTF8 BOM was skipped at the beginning.  */
     136                 :             :   int m_alloc_offset;
     137                 :             : 
     138                 :             :   /*  The size of the DATA array above.*/
     139                 :             :   size_t m_size;
     140                 :             : 
     141                 :             :   /* The number of bytes read from the underlying file so far.  This
     142                 :             :      must be less (or equal) than SIZE above.  */
     143                 :             :   size_t m_nb_read;
     144                 :             : 
     145                 :             :   /* The index of the beginning of the current line.  */
     146                 :             :   size_t m_line_start_idx;
     147                 :             : 
     148                 :             :   /* The number of the previous line read.  This starts at 1.  Zero
     149                 :             :      means we've read no line so far.  */
     150                 :             :   size_t m_line_num;
     151                 :             : 
     152                 :             :   /* This is the total number of lines of the current file.  At the
     153                 :             :      moment, we try to get this information from the line map
     154                 :             :      subsystem.  Note that this is just a hint.  When using the C++
     155                 :             :      front-end, this hint is correct because the input file is then
     156                 :             :      completely tokenized before parsing starts; so the line map knows
     157                 :             :      the number of lines before compilation really starts.  For e.g,
     158                 :             :      the C front-end, it can happen that we start emitting diagnostics
     159                 :             :      before the line map has seen the end of the file.  */
     160                 :             :   size_t m_total_lines;
     161                 :             : 
     162                 :             :   /* Could this file be missing a trailing newline on its final line?
     163                 :             :      Initially true (to cope with empty files), set to true/false
     164                 :             :      as each line is read.  */
     165                 :             :   bool m_missing_trailing_newline;
     166                 :             : 
     167                 :             :   /* This is a record of the beginning and end of the lines we've seen
     168                 :             :      while reading the file.  This is useful to avoid walking the data
     169                 :             :      from the beginning when we are asked to read a line that is
     170                 :             :      before LINE_START_IDX above.  Note that the maximum size of this
     171                 :             :      record is line_record_size, so that the memory consumption
     172                 :             :      doesn't explode.  We thus scale total_lines down to
     173                 :             :      line_record_size.  */
     174                 :             :   vec<line_info, va_heap> m_line_record;
     175                 :             : 
     176                 :      394105 :   void offset_buffer (int offset)
     177                 :             :   {
     178                 :      394105 :     gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0
     179                 :             :                 : (size_t) offset <= m_size);
     180                 :      394105 :     gcc_assert (m_data);
     181                 :      394105 :     m_alloc_offset += offset;
     182                 :      394105 :     m_data += offset;
     183                 :      394105 :     m_size -= offset;
     184                 :      394105 :   }
     185                 :             : 
     186                 :             : };
     187                 :             : 
     188                 :             : /* Current position in real source file.  */
     189                 :             : 
     190                 :             : location_t input_location = UNKNOWN_LOCATION;
     191                 :             : 
     192                 :             : class line_maps *line_table;
     193                 :             : 
     194                 :             : /* A stashed copy of "line_table" for use by selftest::line_table_test.
     195                 :             :    This needs to be a global so that it can be a GC root, and thus
     196                 :             :    prevent the stashed copy from being garbage-collected if the GC runs
     197                 :             :    during a line_table_test.  */
     198                 :             : 
     199                 :             : class line_maps *saved_line_table;
     200                 :             : 
     201                 :             : /* Expand the source location LOC into a human readable location.  If
     202                 :             :    LOC resolves to a builtin location, the file name of the readable
     203                 :             :    location is set to the string "<built-in>". If EXPANSION_POINT_P is
     204                 :             :    TRUE and LOC is virtual, then it is resolved to the expansion
     205                 :             :    point of the involved macro.  Otherwise, it is resolved to the
     206                 :             :    spelling location of the token.
     207                 :             : 
     208                 :             :    When resolving to the spelling location of the token, if the
     209                 :             :    resulting location is for a built-in location (that is, it has no
     210                 :             :    associated line/column) in the context of a macro expansion, the
     211                 :             :    returned location is the first one (while unwinding the macro
     212                 :             :    location towards its expansion point) that is in real source
     213                 :             :    code.
     214                 :             : 
     215                 :             :    ASPECT controls which part of the location to use.  */
     216                 :             : 
     217                 :             : static expanded_location
     218                 :   773834697 : expand_location_1 (const line_maps *set,
     219                 :             :                    location_t loc,
     220                 :             :                    bool expansion_point_p,
     221                 :             :                    enum location_aspect aspect)
     222                 :             : {
     223                 :   773834697 :   expanded_location xloc;
     224                 :   773834697 :   const line_map_ordinary *map;
     225                 :   773834697 :   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
     226                 :   773834697 :   tree block = NULL;
     227                 :             : 
     228                 :   773834697 :   if (IS_ADHOC_LOC (loc))
     229                 :             :     {
     230                 :   342416028 :       block = LOCATION_BLOCK (loc);
     231                 :   342416028 :       loc = LOCATION_LOCUS (loc);
     232                 :             :     }
     233                 :             : 
     234                 :   773834697 :   memset (&xloc, 0, sizeof (xloc));
     235                 :             : 
     236                 :   773834697 :   if (loc >= RESERVED_LOCATION_COUNT)
     237                 :             :     {
     238                 :   732029800 :       if (!expansion_point_p)
     239                 :             :         {
     240                 :             :           /* We want to resolve LOC to its spelling location.
     241                 :             : 
     242                 :             :              But if that spelling location is a reserved location that
     243                 :             :              appears in the context of a macro expansion (like for a
     244                 :             :              location for a built-in token), let's consider the first
     245                 :             :              location (toward the expansion point) that is not reserved;
     246                 :             :              that is, the first location that is in real source code.  */
     247                 :     2083818 :           loc = linemap_unwind_to_first_non_reserved_loc (set,
     248                 :             :                                                           loc, NULL);
     249                 :     2083818 :           lrk = LRK_SPELLING_LOCATION;
     250                 :             :         }
     251                 :   732029800 :       loc = linemap_resolve_location (set, loc, lrk, &map);
     252                 :             : 
     253                 :             :       /* loc is now either in an ordinary map, or is a reserved location.
     254                 :             :          If it is a compound location, the caret is in a spelling location,
     255                 :             :          but the start/finish might still be a virtual location.
     256                 :             :          Depending of what the caller asked for, we may need to recurse
     257                 :             :          one level in order to resolve any virtual locations in the
     258                 :             :          end-points.  */
     259                 :   732029800 :       switch (aspect)
     260                 :             :         {
     261                 :           0 :         default:
     262                 :           0 :           gcc_unreachable ();
     263                 :             :           /* Fall through.  */
     264                 :             :         case LOCATION_ASPECT_CARET:
     265                 :             :           break;
     266                 :      333795 :         case LOCATION_ASPECT_START:
     267                 :      333795 :           {
     268                 :      333795 :             location_t start = get_start (loc);
     269                 :      333795 :             if (start != loc)
     270                 :        1178 :               return expand_location_1 (set, start, expansion_point_p, aspect);
     271                 :             :           }
     272                 :             :           break;
     273                 :      154654 :         case LOCATION_ASPECT_FINISH:
     274                 :      154654 :           {
     275                 :      154654 :             location_t finish = get_finish (loc);
     276                 :      154654 :             if (finish != loc)
     277                 :        1131 :               return expand_location_1 (set, finish, expansion_point_p, aspect);
     278                 :             :           }
     279                 :             :           break;
     280                 :             :         }
     281                 :   732027491 :       xloc = linemap_expand_location (set, map, loc);
     282                 :             :     }
     283                 :             : 
     284                 :   773832388 :   xloc.data = block;
     285                 :   773832388 :   if (loc <= BUILTINS_LOCATION)
     286                 :    41804897 :     xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
     287                 :             : 
     288                 :   773832388 :   return xloc;
     289                 :             : }
     290                 :             : 
     291                 :             : /* Return the total lines number that have been read so far by the
     292                 :             :    line map (in the preprocessor) so far.  For languages like C++ that
     293                 :             :    entirely preprocess the input file before starting to parse, this
     294                 :             :    equals the actual number of lines of the file.  */
     295                 :             : 
     296                 :             : static size_t
     297                 :      147161 : total_lines_num (const char *file_path)
     298                 :             : {
     299                 :      147161 :   size_t r = 0;
     300                 :      147161 :   location_t l = 0;
     301                 :      147161 :   if (linemap_get_file_highest_location (line_table, file_path, &l))
     302                 :             :     {
     303                 :      147136 :       gcc_assert (l >= RESERVED_LOCATION_COUNT);
     304                 :      147136 :       expanded_location xloc = expand_location (l);
     305                 :      147136 :       r = xloc.line;
     306                 :             :     }
     307                 :      147161 :   return r;
     308                 :             : }
     309                 :             : 
     310                 :             : /* Lookup the cache used for the content of a given file accessed by
     311                 :             :    caret diagnostic.  Return the found cached file, or NULL if no
     312                 :             :    cached file was found.  */
     313                 :             : 
     314                 :             : file_cache_slot *
     315                 :     1801601 : file_cache::lookup_file (const char *file_path)
     316                 :             : {
     317                 :     1801601 :   gcc_assert (file_path);
     318                 :             : 
     319                 :             :   /* This will contain the found cached file.  */
     320                 :             :   file_cache_slot *r = NULL;
     321                 :    30627217 :   for (unsigned i = 0; i < num_file_slots; ++i)
     322                 :             :     {
     323                 :    28825616 :       file_cache_slot *c = &m_file_slots[i];
     324                 :    28825616 :       if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
     325                 :             :         {
     326                 :     1625515 :           c->inc_use_count ();
     327                 :     1625515 :           r = c;
     328                 :             :         }
     329                 :             :     }
     330                 :             : 
     331                 :     1801601 :   if (r)
     332                 :     1625515 :     r->inc_use_count ();
     333                 :             : 
     334                 :     1801601 :   return r;
     335                 :             : }
     336                 :             : 
     337                 :             : /* Purge any mention of FILENAME from the cache of files used for
     338                 :             :    printing source code.  For use in selftests when working
     339                 :             :    with tempfiles.  */
     340                 :             : 
     341                 :             : void
     342                 :          96 : file_cache::forcibly_evict_file (const char *file_path)
     343                 :             : {
     344                 :          96 :   gcc_assert (file_path);
     345                 :             : 
     346                 :          96 :   file_cache_slot *r = lookup_file (file_path);
     347                 :          96 :   if (!r)
     348                 :             :     /* Not found.  */
     349                 :             :     return;
     350                 :             : 
     351                 :          56 :   r->evict ();
     352                 :             : }
     353                 :             : 
     354                 :             : /* Determine if FILE_PATH missing a trailing newline on its final line.
     355                 :             :    Only valid to call once all of the file has been loaded, by
     356                 :             :    requesting a line number beyond the end of the file.  */
     357                 :             : 
     358                 :             : bool
     359                 :        9610 : file_cache::missing_trailing_newline_p (const char *file_path)
     360                 :             : {
     361                 :        9610 :   gcc_assert (file_path);
     362                 :             : 
     363                 :        9610 :   file_cache_slot *r = lookup_or_add_file (file_path);
     364                 :        9610 :   return r->missing_trailing_newline_p ();
     365                 :             : }
     366                 :             : 
     367                 :             : void
     368                 :          56 : file_cache_slot::evict ()
     369                 :             : {
     370                 :          56 :   m_file_path = NULL;
     371                 :          56 :   if (m_fp)
     372                 :          56 :     fclose (m_fp);
     373                 :          56 :   m_fp = NULL;
     374                 :          56 :   m_nb_read = 0;
     375                 :          56 :   m_line_start_idx = 0;
     376                 :          56 :   m_line_num = 0;
     377                 :          56 :   m_line_record.truncate (0);
     378                 :          56 :   m_use_count = 0;
     379                 :          56 :   m_total_lines = 0;
     380                 :          56 :   m_missing_trailing_newline = true;
     381                 :          56 : }
     382                 :             : 
     383                 :             : /* Return the file cache that has been less used, recently, or the
     384                 :             :    first empty one.  If HIGHEST_USE_COUNT is non-null,
     385                 :             :    *HIGHEST_USE_COUNT is set to the highest use count of the entries
     386                 :             :    in the cache table.  */
     387                 :             : 
     388                 :             : file_cache_slot*
     389                 :      147161 : file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
     390                 :             : {
     391                 :      147161 :   file_cache_slot *to_evict = &m_file_slots[0];
     392                 :      147161 :   unsigned huc = to_evict->get_use_count ();
     393                 :      305027 :   for (unsigned i = 1; i < num_file_slots; ++i)
     394                 :             :     {
     395                 :      298976 :       file_cache_slot *c = &m_file_slots[i];
     396                 :      298976 :       bool c_is_empty = (c->get_file_path () == NULL);
     397                 :             : 
     398                 :      298976 :       if (c->get_use_count () < to_evict->get_use_count ()
     399                 :      298976 :           || (to_evict->get_file_path () && c_is_empty))
     400                 :             :         /* We evict C because it's either an entry with a lower use
     401                 :             :            count or one that is empty.  */
     402                 :             :         to_evict = c;
     403                 :             : 
     404                 :      298976 :       if (huc < c->get_use_count ())
     405                 :             :         huc = c->get_use_count ();
     406                 :             : 
     407                 :      298976 :       if (c_is_empty)
     408                 :             :         /* We've reached the end of the cache; subsequent elements are
     409                 :             :            all empty.  */
     410                 :             :         break;
     411                 :             :     }
     412                 :             : 
     413                 :      147161 :   if (highest_use_count)
     414                 :      147161 :     *highest_use_count = huc;
     415                 :             : 
     416                 :      147161 :   return to_evict;
     417                 :             : }
     418                 :             : 
     419                 :             : /* Create the cache used for the content of a given file to be
     420                 :             :    accessed by caret diagnostic.  This cache is added to an array of
     421                 :             :    cache and can be retrieved by lookup_file_in_cache_tab.  This
     422                 :             :    function returns the created cache.  Note that only the last
     423                 :             :    num_file_slots files are cached.
     424                 :             : 
     425                 :             :    This can return nullptr if the FILE_PATH can't be opened for
     426                 :             :    reading, or if the content can't be converted to the input_charset.  */
     427                 :             : 
     428                 :             : file_cache_slot*
     429                 :      176046 : file_cache::add_file (const char *file_path)
     430                 :             : {
     431                 :             : 
     432                 :      176046 :   FILE *fp = fopen (file_path, "r");
     433                 :      176046 :   if (fp == NULL)
     434                 :             :     return NULL;
     435                 :             : 
     436                 :      147161 :   unsigned highest_use_count = 0;
     437                 :      147161 :   file_cache_slot *r = evicted_cache_tab_entry (&highest_use_count);
     438                 :      147161 :   if (!r->create (in_context, file_path, fp, highest_use_count))
     439                 :             :     return NULL;
     440                 :             :   return r;
     441                 :             : }
     442                 :             : 
     443                 :             : /* Get a borrowed char_span to the full content of this file
     444                 :             :    as decoded according to the input charset, encoded as UTF-8.  */
     445                 :             : 
     446                 :             : char_span
     447                 :          74 : file_cache_slot::get_full_file_content ()
     448                 :             : {
     449                 :          74 :   char *line;
     450                 :          74 :   ssize_t line_len;
     451                 :        1543 :   while (get_next_line (&line, &line_len))
     452                 :             :     {
     453                 :             :     }
     454                 :          74 :   return char_span (m_data, m_nb_read);
     455                 :             : }
     456                 :             : 
     457                 :             : /* Populate this slot for use on FILE_PATH and FP, dropping any
     458                 :             :    existing cached content within it.  */
     459                 :             : 
     460                 :             : bool
     461                 :      147161 : file_cache_slot::create (const file_cache::input_context &in_context,
     462                 :             :                          const char *file_path, FILE *fp,
     463                 :             :                          unsigned highest_use_count)
     464                 :             : {
     465                 :      147161 :   m_file_path = file_path;
     466                 :      147161 :   if (m_fp)
     467                 :        6051 :     fclose (m_fp);
     468                 :      147161 :   m_fp = fp;
     469                 :      147161 :   if (m_alloc_offset)
     470                 :           0 :     offset_buffer (-m_alloc_offset);
     471                 :      147161 :   m_nb_read = 0;
     472                 :      147161 :   m_line_start_idx = 0;
     473                 :      147161 :   m_line_num = 0;
     474                 :      147161 :   m_line_record.truncate (0);
     475                 :             :   /* Ensure that this cache entry doesn't get evicted next time
     476                 :             :      add_file_to_cache_tab is called.  */
     477                 :      147161 :   m_use_count = ++highest_use_count;
     478                 :      147161 :   m_total_lines = total_lines_num (file_path);
     479                 :      147161 :   m_missing_trailing_newline = true;
     480                 :             : 
     481                 :             : 
     482                 :             :   /* Check the input configuration to determine if we need to do any
     483                 :             :      transformations, such as charset conversion or BOM skipping.  */
     484                 :      147161 :   if (const char *input_charset = in_context.ccb (file_path))
     485                 :             :     {
     486                 :             :       /* Need a full-blown conversion of the input charset.  */
     487                 :           6 :       fclose (m_fp);
     488                 :           6 :       m_fp = NULL;
     489                 :           6 :       const cpp_converted_source cs
     490                 :           6 :         = cpp_get_converted_source (file_path, input_charset);
     491                 :           6 :       if (!cs.data)
     492                 :           0 :         return false;
     493                 :           6 :       if (m_data)
     494                 :           0 :         XDELETEVEC (m_data);
     495                 :           6 :       m_data = cs.data;
     496                 :           6 :       m_nb_read = m_size = cs.len;
     497                 :           6 :       m_alloc_offset = cs.data - cs.to_free;
     498                 :             :     }
     499                 :      147155 :   else if (in_context.should_skip_bom)
     500                 :             :     {
     501                 :       61418 :       if (read_data ())
     502                 :             :         {
     503                 :       61418 :           const int offset = cpp_check_utf8_bom (m_data, m_nb_read);
     504                 :       61418 :           offset_buffer (offset);
     505                 :       61418 :           m_nb_read -= offset;
     506                 :             :         }
     507                 :             :     }
     508                 :             : 
     509                 :             :   return true;
     510                 :             : }
     511                 :             : 
     512                 :             : /* file_cache's ctor.  */
     513                 :             : 
     514                 :      767901 : file_cache::file_cache ()
     515                 :    13054317 : : m_file_slots (new file_cache_slot[num_file_slots])
     516                 :             : {
     517                 :      767901 :   initialize_input_context (nullptr, false);
     518                 :      767901 : }
     519                 :             : 
     520                 :             : /* file_cache's dtor.  */
     521                 :             : 
     522                 :      365247 : file_cache::~file_cache ()
     523                 :             : {
     524                 :     6209199 :   delete[] m_file_slots;
     525                 :      365247 : }
     526                 :             : 
     527                 :             : /* Lookup the cache used for the content of a given file accessed by
     528                 :             :    caret diagnostic.  If no cached file was found, create a new cache
     529                 :             :    for this file, add it to the array of cached file and return
     530                 :             :    it.
     531                 :             : 
     532                 :             :    This can return nullptr on a cache miss if FILE_PATH can't be opened for
     533                 :             :    reading, or if the content can't be converted to the input_charset.  */
     534                 :             : 
     535                 :             : file_cache_slot*
     536                 :     1801505 : file_cache::lookup_or_add_file (const char *file_path)
     537                 :             : {
     538                 :     1801505 :   file_cache_slot *r = lookup_file (file_path);
     539                 :     1801505 :   if (r == NULL)
     540                 :      176046 :     r = add_file (file_path);
     541                 :     1801505 :   return r;
     542                 :             : }
     543                 :             : 
     544                 :             : /* Default constructor for a cache of file used by caret
     545                 :             :    diagnostic.  */
     546                 :             : 
     547                 :    12286416 : file_cache_slot::file_cache_slot ()
     548                 :    12286416 : : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0),
     549                 :    12286416 :   m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0),
     550                 :    12286416 :   m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true)
     551                 :             : {
     552                 :    12286416 :   m_line_record.create (0);
     553                 :    12286416 : }
     554                 :             : 
     555                 :             : /* Destructor for a cache of file used by caret diagnostic.  */
     556                 :             : 
     557                 :     5843952 : file_cache_slot::~file_cache_slot ()
     558                 :             : {
     559                 :     5843952 :   if (m_fp)
     560                 :             :     {
     561                 :      140223 :       fclose (m_fp);
     562                 :      140223 :       m_fp = NULL;
     563                 :             :     }
     564                 :     5843952 :   if (m_data)
     565                 :             :     {
     566                 :      140233 :       offset_buffer (-m_alloc_offset);
     567                 :      140233 :       XDELETEVEC (m_data);
     568                 :      140233 :       m_data = 0;
     569                 :             :     }
     570                 :     5843952 :   m_line_record.release ();
     571                 :     5843952 : }
     572                 :             : 
     573                 :             : /* Returns TRUE iff the cache would need to be filled with data coming
     574                 :             :    from the file.  That is, either the cache is empty or full or the
     575                 :             :    current line is empty.  Note that if the cache is full, it would
     576                 :             :    need to be extended and filled again.  */
     577                 :             : 
     578                 :             : bool
     579                 :   169915227 : file_cache_slot::needs_read_p () const
     580                 :             : {
     581                 :   169915227 :   return m_fp && (m_nb_read == 0
     582                 :   169829363 :           || m_nb_read == m_size
     583                 :   169733136 :           || (m_line_start_idx >= m_nb_read - 1));
     584                 :             : }
     585                 :             : 
     586                 :             : /*  Return TRUE iff the cache is full and thus needs to be
     587                 :             :     extended.  */
     588                 :             : 
     589                 :             : bool
     590                 :      243382 : file_cache_slot::needs_grow_p () const
     591                 :             : {
     592                 :      243382 :   return m_nb_read == m_size;
     593                 :             : }
     594                 :             : 
     595                 :             : /* Grow the cache if it needs to be extended.  */
     596                 :             : 
     597                 :             : void
     598                 :      243382 : file_cache_slot::maybe_grow ()
     599                 :             : {
     600                 :      243382 :   if (!needs_grow_p ())
     601                 :             :     return;
     602                 :             : 
     603                 :      237279 :   if (!m_data)
     604                 :             :     {
     605                 :      141052 :       gcc_assert (m_size == 0 && m_alloc_offset == 0);
     606                 :      141052 :       m_size = buffer_size;
     607                 :      141052 :       m_data = XNEWVEC (char, m_size);
     608                 :             :     }
     609                 :             :   else
     610                 :             :     {
     611                 :       96227 :       const int offset = m_alloc_offset;
     612                 :       96227 :       offset_buffer (-offset);
     613                 :       96227 :       m_size *= 2;
     614                 :       96227 :       m_data = XRESIZEVEC (char, m_data, m_size);
     615                 :       96227 :       offset_buffer (offset);
     616                 :             :     }
     617                 :             : }
     618                 :             : 
     619                 :             : /*  Read more data into the cache.  Extends the cache if need be.
     620                 :             :     Returns TRUE iff new data could be read.  */
     621                 :             : 
     622                 :             : bool
     623                 :      265036 : file_cache_slot::read_data ()
     624                 :             : {
     625                 :      265036 :   if (feof (m_fp) || ferror (m_fp))
     626                 :       21654 :     return false;
     627                 :             : 
     628                 :      243382 :   maybe_grow ();
     629                 :             : 
     630                 :      243382 :   char * from = m_data + m_nb_read;
     631                 :      243382 :   size_t to_read = m_size - m_nb_read;
     632                 :      243382 :   size_t nb_read = fread (from, 1, to_read, m_fp);
     633                 :             : 
     634                 :      243382 :   if (ferror (m_fp))
     635                 :             :     return false;
     636                 :             : 
     637                 :      243382 :   m_nb_read += nb_read;
     638                 :      243382 :   return !!nb_read;
     639                 :             : }
     640                 :             : 
     641                 :             : /* Read new data iff the cache needs to be filled with more data
     642                 :             :    coming from the file FP.  Return TRUE iff the cache was filled with
     643                 :             :    mode data.  */
     644                 :             : 
     645                 :             : bool
     646                 :   169915227 : file_cache_slot::maybe_read_data ()
     647                 :             : {
     648                 :   169915227 :   if (!needs_read_p ())
     649                 :             :     return false;
     650                 :      203618 :   return read_data ();
     651                 :             : }
     652                 :             : 
     653                 :             : /* Helper function for file_cache_slot::get_next_line (), to find the end of
     654                 :             :    the next line.  Returns with the memchr convention, i.e. nullptr if a line
     655                 :             :    terminator was not found.  We need to determine line endings in the same
     656                 :             :    manner that libcpp does: any of \n, \r\n, or \r is a line ending.  */
     657                 :             : 
     658                 :             : static char *
     659                 :   169892918 : find_end_of_line (char *s, size_t len)
     660                 :             : {
     661                 :  6060734688 :   for (const auto end = s + len; s != end; ++s)
     662                 :             :     {
     663                 :  6060684234 :       if (*s == '\n')
     664                 :   169842277 :         return s;
     665                 :  5890841957 :       if (*s == '\r')
     666                 :             :         {
     667                 :         187 :           const auto next = s + 1;
     668                 :         187 :           if (next == end)
     669                 :             :             {
     670                 :             :               /* Don't find the line ending if \r is the very last character
     671                 :             :                  in the buffer; we do not know if it's the end of the file or
     672                 :             :                  just the end of what has been read so far, and we wouldn't
     673                 :             :                  want to break in the middle of what's actually a \r\n
     674                 :             :                  sequence.  Instead, we will handle the case of a file ending
     675                 :             :                  in a \r later.  */
     676                 :             :               break;
     677                 :             :             }
     678                 :         187 :           return (*next == '\n' ? next : s);
     679                 :             :         }
     680                 :             :     }
     681                 :             :   return nullptr;
     682                 :             : }
     683                 :             : 
     684                 :             : /* Read a new line from file FP, using C as a cache for the data
     685                 :             :    coming from the file.  Upon successful completion, *LINE is set to
     686                 :             :    the beginning of the line found.  *LINE points directly in the
     687                 :             :    line cache and is only valid until the next call of get_next_line.
     688                 :             :    *LINE_LEN is set to the length of the line.  Note that the line
     689                 :             :    does not contain any terminal delimiter.  This function returns
     690                 :             :    true if some data was read or process from the cache, false
     691                 :             :    otherwise.  Note that subsequent calls to get_next_line might
     692                 :             :    make the content of *LINE invalid.  */
     693                 :             : 
     694                 :             : bool
     695                 :   169864773 : file_cache_slot::get_next_line (char **line, ssize_t *line_len)
     696                 :             : {
     697                 :             :   /* Fill the cache with data to process.  */
     698                 :   169864773 :   maybe_read_data ();
     699                 :             : 
     700                 :   169864773 :   size_t remaining_size = m_nb_read - m_line_start_idx;
     701                 :   169864773 :   if (remaining_size == 0)
     702                 :             :     /* There is no more data to process.  */
     703                 :             :     return false;
     704                 :             : 
     705                 :   169842512 :   char *line_start = m_data + m_line_start_idx;
     706                 :             : 
     707                 :   169842512 :   char *next_line_start = NULL;
     708                 :   169842512 :   size_t len = 0;
     709                 :   169842512 :   char *line_end = find_end_of_line (line_start, remaining_size);
     710                 :   169842512 :   if (line_end == NULL)
     711                 :             :     {
     712                 :             :       /* We haven't found an end-of-line delimiter in the cache.
     713                 :             :          Fill the cache with more data from the file and look again.  */
     714                 :       50454 :       while (maybe_read_data ())
     715                 :             :         {
     716                 :       50406 :           line_start = m_data + m_line_start_idx;
     717                 :       50406 :           remaining_size = m_nb_read - m_line_start_idx;
     718                 :       50406 :           line_end = find_end_of_line (line_start, remaining_size);
     719                 :       50406 :           if (line_end != NULL)
     720                 :             :             {
     721                 :       36006 :               next_line_start = line_end + 1;
     722                 :       36006 :               break;
     723                 :             :             }
     724                 :             :         }
     725                 :       36054 :       if (line_end == NULL)
     726                 :             :         {
     727                 :             :           /* We've loaded all the file into the cache and still no
     728                 :             :              terminator.  Let's say the line ends up at one byte past the
     729                 :             :              end of the file.  This is to stay consistent with the case
     730                 :             :              of when the line ends up with a terminator and line_end points to
     731                 :             :              that.  That consistency is useful below in the len calculation.
     732                 :             : 
     733                 :             :              If the file ends in a \r, we didn't identify it as a line
     734                 :             :              terminator above, so do that now instead.  */
     735                 :          48 :           line_end = m_data + m_nb_read;
     736                 :          48 :           if (m_nb_read && line_end[-1] == '\r')
     737                 :             :             {
     738                 :           0 :               --line_end;
     739                 :           0 :               m_missing_trailing_newline = false;
     740                 :             :             }
     741                 :             :           else
     742                 :          48 :             m_missing_trailing_newline = true;
     743                 :             :         }
     744                 :             :       else
     745                 :       36006 :         m_missing_trailing_newline = false;
     746                 :             :     }
     747                 :             :   else
     748                 :             :     {
     749                 :   169806458 :       next_line_start = line_end + 1;
     750                 :   169806458 :       m_missing_trailing_newline = false;
     751                 :             :     }
     752                 :             : 
     753                 :   169842512 :   if (m_fp && ferror (m_fp))
     754                 :             :     return false;
     755                 :             : 
     756                 :             :   /* At this point, we've found the end of the of line.  It either points to
     757                 :             :      the line terminator or to one byte after the last byte of the file.  */
     758                 :   169842512 :   gcc_assert (line_end != NULL);
     759                 :             : 
     760                 :   169842512 :   len = line_end - line_start;
     761                 :             : 
     762                 :   169842512 :   if (m_line_start_idx < m_nb_read)
     763                 :   169842512 :     *line = line_start;
     764                 :             : 
     765                 :   169842512 :   ++m_line_num;
     766                 :             : 
     767                 :             :   /* Before we update our line record, make sure the hint about the
     768                 :             :      total number of lines of the file is correct.  If it's not, then
     769                 :             :      we give up recording line boundaries from now on.  */
     770                 :   169842512 :   bool update_line_record = true;
     771                 :   169842512 :   if (m_line_num > m_total_lines)
     772                 :             :     update_line_record = false;
     773                 :             : 
     774                 :             :     /* Now update our line record so that re-reading lines from the
     775                 :             :      before m_line_start_idx is faster.  */
     776                 :    27508817 :   if (update_line_record
     777                 :    27508817 :       && m_line_record.length () < line_record_size)
     778                 :             :     {
     779                 :             :       /* If the file lines fits in the line record, we just record all
     780                 :             :          its lines ...*/
     781                 :    18156865 :       if (m_total_lines <= line_record_size
     782                 :    19197308 :           && m_line_num > m_line_record.length ())
     783                 :     1091984 :         m_line_record.safe_push
     784                 :     1091984 :           (file_cache_slot::line_info (m_line_num,
     785                 :             :                                        m_line_start_idx,
     786                 :     1091984 :                                        line_end - m_data));
     787                 :    17064881 :       else if (m_total_lines > line_record_size)
     788                 :             :         {
     789                 :             :           /* ... otherwise, we just scale total_lines down to
     790                 :             :              (line_record_size lines.  */
     791                 :    16991732 :           size_t n = (m_line_num * line_record_size) / m_total_lines;
     792                 :    16991732 :           if (m_line_record.length () == 0
     793                 :    16975390 :               || n >= m_line_record.length ())
     794                 :     1754432 :             m_line_record.safe_push
     795                 :     1754432 :               (file_cache_slot::line_info (m_line_num,
     796                 :             :                                            m_line_start_idx,
     797                 :     1754432 :                                            line_end - m_data));
     798                 :             :         }
     799                 :             :     }
     800                 :             : 
     801                 :             :   /* Update m_line_start_idx so that it points to the next line to be
     802                 :             :      read.  */
     803                 :   169842512 :   if (next_line_start)
     804                 :   169842464 :     m_line_start_idx = next_line_start - m_data;
     805                 :             :   else
     806                 :             :     /* We didn't find any terminal '\n'.  Let's consider that the end
     807                 :             :        of line is the end of the data in the cache.  The next
     808                 :             :        invocation of get_next_line will either read more data from the
     809                 :             :        underlying file or return false early because we've reached the
     810                 :             :        end of the file.  */
     811                 :          48 :     m_line_start_idx = m_nb_read;
     812                 :             : 
     813                 :   169842512 :   *line_len = len;
     814                 :             : 
     815                 :   169842512 :   return true;
     816                 :             : }
     817                 :             : 
     818                 :             : /* Consume the next bytes coming from the cache (or from its
     819                 :             :    underlying file if there are remaining unread bytes in the file)
     820                 :             :    until we reach the next end-of-line (or end-of-file).  There is no
     821                 :             :    copying from the cache involved.  Return TRUE upon successful
     822                 :             :    completion.  */
     823                 :             : 
     824                 :             : bool
     825                 :   168717068 : file_cache_slot::goto_next_line ()
     826                 :             : {
     827                 :   168717068 :   char *l;
     828                 :   168717068 :   ssize_t len;
     829                 :             : 
     830                 :   168717068 :   return get_next_line (&l, &len);
     831                 :             : }
     832                 :             : 
     833                 :             : /* Read an arbitrary line number LINE_NUM from the file cached in C.
     834                 :             :    If the line was read successfully, *LINE points to the beginning
     835                 :             :    of the line in the file cache and *LINE_LEN is the length of the
     836                 :             :    line.  *LINE is not nul-terminated, but may contain zero bytes.
     837                 :             :    *LINE is only valid until the next call of read_line_num.
     838                 :             :    This function returns bool if a line was read.  */
     839                 :             : 
     840                 :             : bool
     841                 :     1762936 : file_cache_slot::read_line_num (size_t line_num,
     842                 :             :                        char ** line, ssize_t *line_len)
     843                 :             : {
     844                 :     1762936 :   gcc_assert (line_num > 0);
     845                 :             : 
     846                 :     1762936 :   if (line_num <= m_line_num)
     847                 :             :     {
     848                 :             :       /* We've been asked to read lines that are before m_line_num.
     849                 :             :          So lets use our line record (if it's not empty) to try to
     850                 :             :          avoid re-reading the file from the beginning again.  */
     851                 :             : 
     852                 :     1185215 :       if (m_line_record.is_empty ())
     853                 :             :         {
     854                 :          42 :           m_line_start_idx = 0;
     855                 :          42 :           m_line_num = 0;
     856                 :             :         }
     857                 :             :       else
     858                 :             :         {
     859                 :     1185173 :           file_cache_slot::line_info *i = NULL;
     860                 :     1185173 :           if (m_total_lines <= line_record_size)
     861                 :             :             {
     862                 :             :               /* In languages where the input file is not totally
     863                 :             :                  preprocessed up front, the m_total_lines hint
     864                 :             :                  can be smaller than the number of lines of the
     865                 :             :                  file.  In that case, only the first
     866                 :             :                  m_total_lines have been recorded.
     867                 :             : 
     868                 :             :                  Otherwise, the first m_total_lines we've read have
     869                 :             :                  their start/end recorded here.  */
     870                 :     1127052 :               i = (line_num <= m_total_lines)
     871                 :      563526 :                 ? &m_line_record[line_num - 1]
     872                 :       73501 :                 : &m_line_record[m_total_lines - 1];
     873                 :      563526 :               gcc_assert (i->line_num <= line_num);
     874                 :             :             }
     875                 :             :           else
     876                 :             :             {
     877                 :             :               /*  So the file had more lines than our line record
     878                 :             :                   size.  Thus the number of lines we've recorded has
     879                 :             :                   been scaled down to line_record_size.  Let's
     880                 :             :                   pick the start/end of the recorded line that is
     881                 :             :                   closest to line_num.  */
     882                 :      621647 :               size_t n = (line_num <= m_total_lines)
     883                 :      621647 :                 ? line_num * line_record_size / m_total_lines
     884                 :       71449 :                 : m_line_record.length () - 1;
     885                 :      621647 :               if (n < m_line_record.length ())
     886                 :             :                 {
     887                 :      619165 :                   i = &m_line_record[n];
     888                 :      619165 :                   gcc_assert (i->line_num <= line_num);
     889                 :             :                 }
     890                 :             :             }
     891                 :             : 
     892                 :     1182691 :           if (i && i->line_num == line_num)
     893                 :             :             {
     894                 :             :               /* We have the start/end of the line.  */
     895                 :      609492 :               *line = m_data + i->start_pos;
     896                 :      609492 :               *line_len = i->end_pos - i->start_pos;
     897                 :      609492 :               return true;
     898                 :             :             }
     899                 :             : 
     900                 :      575681 :           if (i)
     901                 :             :             {
     902                 :      573199 :               m_line_start_idx = i->start_pos;
     903                 :      573199 :               m_line_num = i->line_num - 1;
     904                 :             :             }
     905                 :             :           else
     906                 :             :             {
     907                 :        2482 :               m_line_start_idx = 0;
     908                 :        2482 :               m_line_num = 0;
     909                 :             :             }
     910                 :             :         }
     911                 :             :     }
     912                 :             : 
     913                 :             :   /*  Let's walk from line m_line_num up to line_num - 1, without
     914                 :             :       copying any line.  */
     915                 :   169863230 :   while (m_line_num < line_num - 1)
     916                 :   168717068 :     if (!goto_next_line ())
     917                 :             :       return false;
     918                 :             : 
     919                 :             :   /* The line we want is the next one.  Let's read and copy it back to
     920                 :             :      the caller.  */
     921                 :     1146162 :   return get_next_line (line, line_len);
     922                 :             : }
     923                 :             : 
     924                 :             : /* Return the physical source line that corresponds to FILE_PATH/LINE.
     925                 :             :    The line is not nul-terminated.  The returned pointer is only
     926                 :             :    valid until the next call of location_get_source_line.
     927                 :             :    Note that the line can contain several null characters,
     928                 :             :    so the returned value's length has the actual length of the line.
     929                 :             :    If the function fails, a NULL char_span is returned.  */
     930                 :             : 
     931                 :             : char_span
     932                 :     1791835 : file_cache::get_source_line (const char *file_path, int line)
     933                 :             : {
     934                 :     1791835 :   char *buffer = NULL;
     935                 :     1791835 :   ssize_t len;
     936                 :             : 
     937                 :     1791835 :   if (line == 0)
     938                 :          19 :     return char_span (NULL, 0);
     939                 :             : 
     940                 :     1791816 :   if (file_path == NULL)
     941                 :           0 :     return char_span (NULL, 0);
     942                 :             : 
     943                 :     1791816 :   file_cache_slot *c = lookup_or_add_file (file_path);
     944                 :     1791816 :   if (c == NULL)
     945                 :       28880 :     return char_span (NULL, 0);
     946                 :             : 
     947                 :     1762936 :   bool read = c->read_line_num (line, &buffer, &len);
     948                 :     1762936 :   if (!read)
     949                 :       22187 :     return char_span (NULL, 0);
     950                 :             : 
     951                 :     1740749 :   return char_span (buffer, len);
     952                 :             : }
     953                 :             : 
     954                 :             : /* Return a NUL-terminated copy of the source text between two locations, or
     955                 :             :    NULL if the arguments are invalid.  The caller is responsible for freeing
     956                 :             :    the return value.  */
     957                 :             : 
     958                 :             : char *
     959                 :         809 : get_source_text_between (file_cache &fc, location_t start, location_t end)
     960                 :             : {
     961                 :         809 :   expanded_location expstart =
     962                 :         809 :     expand_location_to_spelling_point (start, LOCATION_ASPECT_START);
     963                 :         809 :   expanded_location expend =
     964                 :         809 :     expand_location_to_spelling_point (end, LOCATION_ASPECT_FINISH);
     965                 :             : 
     966                 :             :   /* If the locations are in different files or the end comes before the
     967                 :             :      start, give up and return nothing.  */
     968                 :         809 :   if (!expstart.file || !expend.file)
     969                 :             :     return NULL;
     970                 :         787 :   if (strcmp (expstart.file, expend.file) != 0)
     971                 :             :     return NULL;
     972                 :         787 :   if (expstart.line > expend.line)
     973                 :             :     return NULL;
     974                 :         787 :   if (expstart.line == expend.line
     975                 :         786 :       && expstart.column > expend.column)
     976                 :             :     return NULL;
     977                 :             :   /* These aren't real column numbers, give up.  */
     978                 :         787 :   if (expstart.column == 0 || expend.column == 0)
     979                 :             :     return NULL;
     980                 :             : 
     981                 :             :   /* For a single line we need to trim both edges.  */
     982                 :         787 :   if (expstart.line == expend.line)
     983                 :             :     {
     984                 :         786 :       char_span line = fc.get_source_line (expstart.file, expstart.line);
     985                 :         786 :       if (line.length () < 1)
     986                 :             :         return NULL;
     987                 :         784 :       int s = expstart.column - 1;
     988                 :         784 :       int len = expend.column - s;
     989                 :         784 :       if (line.length () < (size_t)expend.column)
     990                 :             :         return NULL;
     991                 :         784 :       return line.subspan (s, len).xstrdup ();
     992                 :             :     }
     993                 :             : 
     994                 :           1 :   struct obstack buf_obstack;
     995                 :           1 :   obstack_init (&buf_obstack);
     996                 :             : 
     997                 :             :   /* Loop through all lines in the range and append each to buf; may trim
     998                 :             :      parts of the start and end lines off depending on column values.  */
     999                 :           8 :   for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
    1000                 :             :     {
    1001                 :           7 :       char_span line = fc.get_source_line (expstart.file, lnum);
    1002                 :           7 :       if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
    1003                 :           0 :         continue;
    1004                 :             : 
    1005                 :             :       /* For the first line in the range, only start at expstart.column */
    1006                 :           7 :       if (lnum == expstart.line)
    1007                 :             :         {
    1008                 :           1 :           unsigned off = expstart.column - 1;
    1009                 :           1 :           if (line.length () < off)
    1010                 :           0 :             return NULL;
    1011                 :           1 :           line = line.subspan (off, line.length() - off);
    1012                 :             :         }
    1013                 :             :       /* For the last line, don't go past expend.column */
    1014                 :           6 :       else if (lnum == expend.line)
    1015                 :             :         {
    1016                 :           1 :           if (line.length () < (size_t)expend.column)
    1017                 :             :             return NULL;
    1018                 :           1 :           line = line.subspan (0, expend.column);
    1019                 :             :         }
    1020                 :             : 
    1021                 :             :       /* Combine spaces at the beginning of later lines.  */
    1022                 :           7 :       if (lnum > expstart.line)
    1023                 :             :         {
    1024                 :             :           unsigned off;
    1025                 :          30 :           for (off = 0; off < line.length(); ++off)
    1026                 :          30 :             if (line[off] != ' ' && line[off] != '\t')
    1027                 :             :               break;
    1028                 :           6 :           if (off > 0)
    1029                 :             :             {
    1030                 :           6 :               obstack_1grow (&buf_obstack, ' ');
    1031                 :           6 :               line = line.subspan (off, line.length() - off);
    1032                 :             :             }
    1033                 :             :         }
    1034                 :             : 
    1035                 :             :       /* This does not include any trailing newlines.  */
    1036                 :           7 :       obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
    1037                 :             :     }
    1038                 :             : 
    1039                 :             :   /* NUL-terminate and finish the buf obstack.  */
    1040                 :           1 :   obstack_1grow (&buf_obstack, 0);
    1041                 :           1 :   const char *buf = (const char *) obstack_finish (&buf_obstack);
    1042                 :             : 
    1043                 :           1 :   return xstrdup (buf);
    1044                 :             : }
    1045                 :             : 
    1046                 :             : 
    1047                 :             : char_span
    1048                 :          79 : file_cache::get_source_file_content (const char *file_path)
    1049                 :             : {
    1050                 :          79 :   file_cache_slot *c = lookup_or_add_file (file_path);
    1051                 :          79 :   if (c == nullptr)
    1052                 :           5 :     return char_span (nullptr, 0);
    1053                 :          74 :   return c->get_full_file_content ();
    1054                 :             : }
    1055                 :             : 
    1056                 :             : /* Test if the location originates from the spelling location of a
    1057                 :             :    builtin-tokens.  That is, return TRUE if LOC is a (possibly
    1058                 :             :    virtual) location of a built-in token that appears in the expansion
    1059                 :             :    list of a macro.  Please note that this function also works on
    1060                 :             :    tokens that result from built-in tokens.  For instance, the
    1061                 :             :    function would return true if passed a token "4" that is the result
    1062                 :             :    of the expansion of the built-in __LINE__ macro.  */
    1063                 :             : bool
    1064                 :        2002 : is_location_from_builtin_token (location_t loc)
    1065                 :             : {
    1066                 :        2002 :   const line_map_ordinary *map = NULL;
    1067                 :        2002 :   loc = linemap_resolve_location (line_table, loc,
    1068                 :             :                                   LRK_SPELLING_LOCATION, &map);
    1069                 :        2002 :   return loc == BUILTINS_LOCATION;
    1070                 :             : }
    1071                 :             : 
    1072                 :             : /* Expand the source location LOC into a human readable location.  If
    1073                 :             :    LOC is virtual, it resolves to the expansion point of the involved
    1074                 :             :    macro.  If LOC resolves to a builtin location, the file name of the
    1075                 :             :    readable location is set to the string "<built-in>".  */
    1076                 :             : 
    1077                 :             : expanded_location
    1078                 :   771746985 : expand_location (location_t loc)
    1079                 :             : {
    1080                 :   771746985 :   return expand_location_1 (line_table, loc, /*expansion_point_p=*/true,
    1081                 :   771746985 :                             LOCATION_ASPECT_CARET);
    1082                 :             : }
    1083                 :             : 
    1084                 :             : /* Expand the source location LOC into a human readable location.  If
    1085                 :             :    LOC is virtual, it resolves to the expansion location of the
    1086                 :             :    relevant macro.  If LOC resolves to a builtin location, the file
    1087                 :             :    name of the readable location is set to the string
    1088                 :             :    "<built-in>".  */
    1089                 :             : 
    1090                 :             : expanded_location
    1091                 :       80443 : expand_location_to_spelling_point (location_t loc,
    1092                 :             :                                    enum location_aspect aspect)
    1093                 :             : {
    1094                 :       80443 :   return expand_location_1 (line_table, loc, /*expansion_point_p=*/false,
    1095                 :       80443 :                             aspect);
    1096                 :             : }
    1097                 :             : 
    1098                 :             : /* The rich_location class within libcpp requires a way to expand
    1099                 :             :    location_t instances, and relies on the client code
    1100                 :             :    providing a symbol named
    1101                 :             :      linemap_client_expand_location_to_spelling_point
    1102                 :             :    to do this.
    1103                 :             : 
    1104                 :             :    This is the implementation for libcommon.a (all host binaries),
    1105                 :             :    which simply calls into expand_location_1.  */
    1106                 :             : 
    1107                 :             : expanded_location
    1108                 :     2004960 : linemap_client_expand_location_to_spelling_point (const line_maps *set,
    1109                 :             :                                                   location_t loc,
    1110                 :             :                                                   enum location_aspect aspect)
    1111                 :             : {
    1112                 :     2004960 :   return expand_location_1 (set, loc, /*expansion_point_p=*/false, aspect);
    1113                 :             : }
    1114                 :             : 
    1115                 :             : 
    1116                 :             : /* If LOCATION is in a system header and if it is a virtual location
    1117                 :             :    for a token coming from the expansion of a macro, unwind it to
    1118                 :             :    the location of the expansion point of the macro.  If the expansion
    1119                 :             :    point is also in a system header return the original LOCATION.
    1120                 :             :    Otherwise, return the location of the expansion point.
    1121                 :             : 
    1122                 :             :    This is used for instance when we want to emit diagnostics about a
    1123                 :             :    token that may be located in a macro that is itself defined in a
    1124                 :             :    system header, for example, for the NULL macro.  In such a case, if
    1125                 :             :    LOCATION were passed directly to diagnostic functions such as
    1126                 :             :    warning_at, the diagnostic would be suppressed (unless
    1127                 :             :    -Wsystem-headers).  */
    1128                 :             : 
    1129                 :             : location_t
    1130                 :   396657075 : expansion_point_location_if_in_system_header (location_t location)
    1131                 :             : {
    1132                 :   396657075 :   if (!in_system_header_at (location))
    1133                 :             :     return location;
    1134                 :             : 
    1135                 :   370960829 :   location_t xloc = linemap_resolve_location (line_table, location,
    1136                 :             :                                               LRK_MACRO_EXPANSION_POINT,
    1137                 :             :                                               NULL);
    1138                 :   370960829 :   return in_system_header_at (xloc) ? location : xloc;
    1139                 :             : }
    1140                 :             : 
    1141                 :             : /* If LOCATION is a virtual location for a token coming from the expansion
    1142                 :             :    of a macro, unwind to the location of the expansion point of the macro.  */
    1143                 :             : 
    1144                 :             : location_t
    1145                 :         197 : expansion_point_location (location_t location)
    1146                 :             : {
    1147                 :         197 :   return linemap_resolve_location (line_table, location,
    1148                 :         197 :                                    LRK_MACRO_EXPANSION_POINT, NULL);
    1149                 :             : }
    1150                 :             : 
    1151                 :             : /* Construct a location with caret at CARET, ranging from START to
    1152                 :             :    FINISH.
    1153                 :             : 
    1154                 :             :    For example, consider:
    1155                 :             : 
    1156                 :             :                  11111111112
    1157                 :             :         12345678901234567890
    1158                 :             :      522
    1159                 :             :      523   return foo + bar;
    1160                 :             :                   ~~~~^~~~~
    1161                 :             :      524
    1162                 :             : 
    1163                 :             :    The location's caret is at the "+", line 523 column 15, but starts
    1164                 :             :    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
    1165                 :             :    of "bar" at column 19.  */
    1166                 :             : 
    1167                 :             : location_t
    1168                 :  2097898475 : make_location (location_t caret, location_t start, location_t finish)
    1169                 :             : {
    1170                 :  2097898475 :   return line_table->make_location (caret, start, finish);
    1171                 :             : }
    1172                 :             : 
    1173                 :             : /* Same as above, but taking a source range rather than two locations.  */
    1174                 :             : 
    1175                 :             : location_t
    1176                 :  1177474952 : make_location (location_t caret, source_range src_range)
    1177                 :             : {
    1178                 :  1177474952 :   location_t pure_loc = get_pure_location (caret);
    1179                 :  1177474952 :   return line_table->get_or_create_combined_loc (pure_loc, src_range,
    1180                 :  1177474952 :                                                  nullptr, 0);
    1181                 :             : }
    1182                 :             : 
    1183                 :             : /* An expanded_location stores the column in byte units.  This function
    1184                 :             :    converts that column to display units.  That requires reading the associated
    1185                 :             :    source line in order to calculate the display width.  If that cannot be done
    1186                 :             :    for any reason, then returns the byte column as a fallback.  */
    1187                 :             : int
    1188                 :      975128 : location_compute_display_column (file_cache &fc,
    1189                 :             :                                  expanded_location exploc,
    1190                 :             :                                  const cpp_char_column_policy &policy)
    1191                 :             : {
    1192                 :      975128 :   if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
    1193                 :             :     return exploc.column;
    1194                 :      732198 :   char_span line = fc.get_source_line (exploc.file, exploc.line);
    1195                 :             :   /* If line is NULL, this function returns exploc.column which is the
    1196                 :             :      desired fallback.  */
    1197                 :      732198 :   return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
    1198                 :      732198 :                                             exploc.column, policy);
    1199                 :             : }
    1200                 :             : 
    1201                 :             : /* Dump statistics to stderr about the memory usage of the line_table
    1202                 :             :    set of line maps.  This also displays some statistics about macro
    1203                 :             :    expansion.  */
    1204                 :             : 
    1205                 :             : void
    1206                 :           0 : dump_line_table_statistics (void)
    1207                 :             : {
    1208                 :           0 :   struct linemap_stats s;
    1209                 :           0 :   long total_used_map_size,
    1210                 :             :     macro_maps_size,
    1211                 :             :     total_allocated_map_size;
    1212                 :             : 
    1213                 :           0 :   memset (&s, 0, sizeof (s));
    1214                 :             : 
    1215                 :           0 :   linemap_get_statistics (line_table, &s);
    1216                 :             : 
    1217                 :           0 :   macro_maps_size = s.macro_maps_used_size
    1218                 :           0 :     + s.macro_maps_locations_size;
    1219                 :             : 
    1220                 :           0 :   total_allocated_map_size = s.ordinary_maps_allocated_size
    1221                 :           0 :     + s.macro_maps_allocated_size
    1222                 :             :     + s.macro_maps_locations_size;
    1223                 :             : 
    1224                 :           0 :   total_used_map_size = s.ordinary_maps_used_size
    1225                 :           0 :     + s.macro_maps_used_size
    1226                 :             :     + s.macro_maps_locations_size;
    1227                 :             : 
    1228                 :           0 :   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
    1229                 :             :            s.num_expanded_macros);
    1230                 :           0 :   if (s.num_expanded_macros != 0)
    1231                 :           0 :     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
    1232                 :           0 :              s.num_macro_tokens / s.num_expanded_macros);
    1233                 :           0 :   fprintf (stderr,
    1234                 :             :            "\nLine Table allocations during the "
    1235                 :             :            "compilation process\n");
    1236                 :           0 :   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
    1237                 :           0 :            SIZE_AMOUNT (s.num_ordinary_maps_used));
    1238                 :           0 :   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
    1239                 :           0 :            SIZE_AMOUNT (s.ordinary_maps_used_size));
    1240                 :           0 :   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
    1241                 :           0 :            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
    1242                 :           0 :   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
    1243                 :           0 :            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
    1244                 :           0 :   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
    1245                 :           0 :            SIZE_AMOUNT (s.num_macro_maps_used));
    1246                 :           0 :   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
    1247                 :           0 :            SIZE_AMOUNT (s.macro_maps_used_size));
    1248                 :           0 :   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
    1249                 :           0 :            SIZE_AMOUNT (s.macro_maps_locations_size));
    1250                 :           0 :   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
    1251                 :           0 :            SIZE_AMOUNT (macro_maps_size));
    1252                 :           0 :   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
    1253                 :           0 :            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
    1254                 :           0 :   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
    1255                 :           0 :            SIZE_AMOUNT (total_allocated_map_size));
    1256                 :           0 :   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
    1257                 :           0 :            SIZE_AMOUNT (total_used_map_size));
    1258                 :           0 :   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
    1259                 :           0 :            SIZE_AMOUNT (s.adhoc_table_size));
    1260                 :           0 :   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
    1261                 :           0 :            SIZE_AMOUNT (s.adhoc_table_entries_used));
    1262                 :           0 :   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
    1263                 :           0 :            SIZE_AMOUNT (line_table->m_num_optimized_ranges));
    1264                 :           0 :   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
    1265                 :           0 :            SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
    1266                 :             : 
    1267                 :           0 :   fprintf (stderr, "\n");
    1268                 :           0 : }
    1269                 :             : 
    1270                 :             : /* Get location one beyond the final location in ordinary map IDX.  */
    1271                 :             : 
    1272                 :             : static location_t
    1273                 :           0 : get_end_location (class line_maps *set, unsigned int idx)
    1274                 :             : {
    1275                 :           0 :   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
    1276                 :           0 :     return set->highest_location;
    1277                 :             : 
    1278                 :           0 :   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
    1279                 :           0 :   return MAP_START_LOCATION (next_map);
    1280                 :             : }
    1281                 :             : 
    1282                 :             : /* Helper function for write_digit_row.  */
    1283                 :             : 
    1284                 :             : static void
    1285                 :           0 : write_digit (FILE *stream, int digit)
    1286                 :             : {
    1287                 :           0 :   fputc ('0' + (digit % 10), stream);
    1288                 :           0 : }
    1289                 :             : 
    1290                 :             : /* Helper function for dump_location_info.
    1291                 :             :    Write a row of numbers to STREAM, numbering a source line,
    1292                 :             :    giving the units, tens, hundreds etc of the column number.  */
    1293                 :             : 
    1294                 :             : static void
    1295                 :           0 : write_digit_row (FILE *stream, int indent,
    1296                 :             :                  const line_map_ordinary *map,
    1297                 :             :                  location_t loc, int max_col, int divisor)
    1298                 :             : {
    1299                 :           0 :   fprintf (stream, "%*c", indent, ' ');
    1300                 :           0 :   fprintf (stream, "|");
    1301                 :           0 :   for (int column = 1; column < max_col; column++)
    1302                 :             :     {
    1303                 :           0 :       location_t column_loc = loc + (column << map->m_range_bits);
    1304                 :           0 :       write_digit (stream, column_loc / divisor);
    1305                 :             :     }
    1306                 :           0 :   fprintf (stream, "\n");
    1307                 :           0 : }
    1308                 :             : 
    1309                 :             : /* Write a half-closed (START) / half-open (END) interval of
    1310                 :             :    location_t to STREAM.  */
    1311                 :             : 
    1312                 :             : static void
    1313                 :           0 : dump_location_range (FILE *stream,
    1314                 :             :                      location_t start, location_t end)
    1315                 :             : {
    1316                 :           0 :   fprintf (stream,
    1317                 :             :            "  location_t interval: %u <= loc < %u\n",
    1318                 :             :            start, end);
    1319                 :           0 : }
    1320                 :             : 
    1321                 :             : /* Write a labelled description of a half-closed (START) / half-open (END)
    1322                 :             :    interval of location_t to STREAM.  */
    1323                 :             : 
    1324                 :             : static void
    1325                 :           0 : dump_labelled_location_range (FILE *stream,
    1326                 :             :                               const char *name,
    1327                 :             :                               location_t start, location_t end)
    1328                 :             : {
    1329                 :           0 :   fprintf (stream, "%s\n", name);
    1330                 :           0 :   dump_location_range (stream, start, end);
    1331                 :           0 :   fprintf (stream, "\n");
    1332                 :           0 : }
    1333                 :             : 
    1334                 :             : /* Write a visualization of the locations in the line_table to STREAM.  */
    1335                 :             : 
    1336                 :             : void
    1337                 :           0 : dump_location_info (FILE *stream)
    1338                 :             : {
    1339                 :           0 :   file_cache fc;
    1340                 :             : 
    1341                 :             :   /* Visualize the reserved locations.  */
    1342                 :           0 :   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
    1343                 :             :                                 0, RESERVED_LOCATION_COUNT);
    1344                 :             : 
    1345                 :             :   /* Visualize the ordinary line_map instances, rendering the sources. */
    1346                 :           0 :   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
    1347                 :             :     {
    1348                 :           0 :       location_t end_location = get_end_location (line_table, idx);
    1349                 :             :       /* half-closed: doesn't include this one. */
    1350                 :             : 
    1351                 :           0 :       const line_map_ordinary *map
    1352                 :           0 :         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
    1353                 :           0 :       fprintf (stream, "ORDINARY MAP: %i\n", idx);
    1354                 :           0 :       dump_location_range (stream,
    1355                 :             :                            MAP_START_LOCATION (map), end_location);
    1356                 :           0 :       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
    1357                 :           0 :       fprintf (stream, "  starting at line: %i\n",
    1358                 :             :                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
    1359                 :           0 :       fprintf (stream, "  column and range bits: %i\n",
    1360                 :           0 :                map->m_column_and_range_bits);
    1361                 :           0 :       fprintf (stream, "  column bits: %i\n",
    1362                 :           0 :                map->m_column_and_range_bits - map->m_range_bits);
    1363                 :           0 :       fprintf (stream, "  range bits: %i\n",
    1364                 :           0 :                map->m_range_bits);
    1365                 :           0 :       const char * reason;
    1366                 :           0 :       switch (map->reason) {
    1367                 :             :       case LC_ENTER:
    1368                 :             :         reason = "LC_ENTER";
    1369                 :             :         break;
    1370                 :           0 :       case LC_LEAVE:
    1371                 :           0 :         reason = "LC_LEAVE";
    1372                 :           0 :         break;
    1373                 :           0 :       case LC_RENAME:
    1374                 :           0 :         reason = "LC_RENAME";
    1375                 :           0 :         break;
    1376                 :           0 :       case LC_RENAME_VERBATIM:
    1377                 :           0 :         reason = "LC_RENAME_VERBATIM";
    1378                 :           0 :         break;
    1379                 :           0 :       case LC_ENTER_MACRO:
    1380                 :           0 :         reason = "LC_RENAME_MACRO";
    1381                 :           0 :         break;
    1382                 :           0 :       default:
    1383                 :           0 :         reason = "Unknown";
    1384                 :             :       }
    1385                 :           0 :       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
    1386                 :             : 
    1387                 :           0 :       const line_map_ordinary *includer_map
    1388                 :           0 :         = linemap_included_from_linemap (line_table, map);
    1389                 :           0 :       fprintf (stream, "  included from location: %d",
    1390                 :             :                linemap_included_from (map));
    1391                 :           0 :       if (includer_map) {
    1392                 :           0 :         fprintf (stream, " (in ordinary map %d)",
    1393                 :           0 :                  int (includer_map - line_table->info_ordinary.maps));
    1394                 :             :       }
    1395                 :           0 :       fprintf (stream, "\n");
    1396                 :             : 
    1397                 :             :       /* Render the span of source lines that this "map" covers.  */
    1398                 :           0 :       for (location_t loc = MAP_START_LOCATION (map);
    1399                 :           0 :            loc < end_location;
    1400                 :           0 :            loc += (1 << map->m_range_bits) )
    1401                 :             :         {
    1402                 :           0 :           gcc_assert (pure_location_p (line_table, loc) );
    1403                 :             : 
    1404                 :           0 :           expanded_location exploc
    1405                 :           0 :             = linemap_expand_location (line_table, map, loc);
    1406                 :             : 
    1407                 :           0 :           if (exploc.column == 0)
    1408                 :             :             {
    1409                 :             :               /* Beginning of a new source line: draw the line.  */
    1410                 :             : 
    1411                 :           0 :               char_span line_text = fc.get_source_line (exploc.file,
    1412                 :             :                                                         exploc.line);
    1413                 :           0 :               if (!line_text)
    1414                 :             :                 break;
    1415                 :           0 :               fprintf (stream,
    1416                 :             :                        "%s:%3i|loc:%5i|%.*s\n",
    1417                 :             :                        exploc.file, exploc.line,
    1418                 :             :                        loc,
    1419                 :           0 :                        (int)line_text.length (), line_text.get_buffer ());
    1420                 :             : 
    1421                 :             :               /* "loc" is at column 0, which means "the whole line".
    1422                 :             :                  Render the locations *within* the line, by underlining
    1423                 :             :                  it, showing the location_t numeric values
    1424                 :             :                  at each column.  */
    1425                 :           0 :               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
    1426                 :           0 :               if (max_col > line_text.length ())
    1427                 :           0 :                 max_col = line_text.length () + 1;
    1428                 :             : 
    1429                 :           0 :               int len_lnum = num_digits (exploc.line);
    1430                 :           0 :               if (len_lnum < 3)
    1431                 :             :                 len_lnum = 3;
    1432                 :           0 :               int len_loc = num_digits (loc);
    1433                 :           0 :               if (len_loc < 5)
    1434                 :             :                 len_loc = 5;
    1435                 :             : 
    1436                 :           0 :               int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
    1437                 :             : 
    1438                 :             :               /* Thousands.  */
    1439                 :           0 :               if (end_location > 999)
    1440                 :           0 :                 write_digit_row (stream, indent, map, loc, max_col, 1000);
    1441                 :             : 
    1442                 :             :               /* Hundreds.  */
    1443                 :           0 :               if (end_location > 99)
    1444                 :           0 :                 write_digit_row (stream, indent, map, loc, max_col, 100);
    1445                 :             : 
    1446                 :             :               /* Tens.  */
    1447                 :           0 :               write_digit_row (stream, indent, map, loc, max_col, 10);
    1448                 :             : 
    1449                 :             :               /* Units.  */
    1450                 :           0 :               write_digit_row (stream, indent, map, loc, max_col, 1);
    1451                 :             :             }
    1452                 :             :         }
    1453                 :           0 :       fprintf (stream, "\n");
    1454                 :             :     }
    1455                 :             : 
    1456                 :             :   /* Visualize unallocated values.  */
    1457                 :           0 :   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
    1458                 :             :                                 line_table->highest_location,
    1459                 :             :                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
    1460                 :             : 
    1461                 :             :   /* Visualize the macro line_map instances, rendering the sources. */
    1462                 :           0 :   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
    1463                 :             :     {
    1464                 :             :       /* Each macro map that is allocated owns location_t values
    1465                 :             :          that are *lower* that the one before them.
    1466                 :             :          Hence it's meaningful to view them either in order of ascending
    1467                 :             :          source locations, or in order of ascending macro map index.  */
    1468                 :           0 :       const bool ascending_location_ts = true;
    1469                 :           0 :       unsigned int idx = (ascending_location_ts
    1470                 :           0 :                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
    1471                 :           0 :                           : i);
    1472                 :           0 :       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
    1473                 :           0 :       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
    1474                 :             :                idx,
    1475                 :             :                linemap_map_get_macro_name (map),
    1476                 :             :                MACRO_MAP_NUM_MACRO_TOKENS (map));
    1477                 :           0 :       dump_location_range (stream,
    1478                 :           0 :                            map->start_location,
    1479                 :           0 :                            (map->start_location
    1480                 :           0 :                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
    1481                 :           0 :       inform (map->get_expansion_point_location (),
    1482                 :             :               "expansion point is location %i",
    1483                 :             :               map->get_expansion_point_location ());
    1484                 :           0 :       fprintf (stream, "  map->start_location: %u\n",
    1485                 :           0 :                map->start_location);
    1486                 :             : 
    1487                 :           0 :       fprintf (stream, "  macro_locations:\n");
    1488                 :           0 :       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
    1489                 :             :         {
    1490                 :           0 :           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
    1491                 :           0 :           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
    1492                 :             : 
    1493                 :             :           /* linemap_add_macro_token encodes token numbers in an expansion
    1494                 :             :              by putting them after MAP_START_LOCATION. */
    1495                 :             : 
    1496                 :             :           /* I'm typically seeing 4 uninitialized entries at the end of
    1497                 :             :              0xafafafaf.
    1498                 :             :              This appears to be due to macro.cc:replace_args
    1499                 :             :              adding 2 extra args for padding tokens; presumably there may
    1500                 :             :              be a leading and/or trailing padding token injected,
    1501                 :             :              each for 2 more location slots.
    1502                 :             :              This would explain there being up to 4 location_ts slots
    1503                 :             :              that may be uninitialized.  */
    1504                 :             : 
    1505                 :           0 :           fprintf (stream, "    %u: %u, %u\n",
    1506                 :             :                    i,
    1507                 :             :                    x,
    1508                 :             :                    y);
    1509                 :           0 :           if (x == y)
    1510                 :             :             {
    1511                 :           0 :               if (x < MAP_START_LOCATION (map))
    1512                 :           0 :                 inform (x, "token %u has %<x-location == y-location == %u%>",
    1513                 :             :                         i, x);
    1514                 :             :               else
    1515                 :           0 :                 fprintf (stream,
    1516                 :             :                          "x-location == y-location == %u encodes token # %u\n",
    1517                 :           0 :                          x, x - MAP_START_LOCATION (map));
    1518                 :             :                 }
    1519                 :             :           else
    1520                 :             :             {
    1521                 :           0 :               inform (x, "token %u has %<x-location == %u%>", i, x);
    1522                 :           0 :               inform (x, "token %u has %<y-location == %u%>", i, y);
    1523                 :             :             }
    1524                 :             :         }
    1525                 :           0 :       fprintf (stream, "\n");
    1526                 :             :     }
    1527                 :             : 
    1528                 :             :   /* It appears that MAX_LOCATION_T itself is never assigned to a
    1529                 :             :      macro map, presumably due to an off-by-one error somewhere
    1530                 :             :      between the logic in linemap_enter_macro and
    1531                 :             :      LINEMAPS_MACRO_LOWEST_LOCATION.  */
    1532                 :           0 :   dump_labelled_location_range (stream, "MAX_LOCATION_T",
    1533                 :             :                                 MAX_LOCATION_T,
    1534                 :             :                                 MAX_LOCATION_T + 1);
    1535                 :             : 
    1536                 :             :   /* Visualize ad-hoc values.  */
    1537                 :           0 :   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
    1538                 :             :                                 MAX_LOCATION_T + 1, UINT_MAX);
    1539                 :           0 : }
    1540                 :             : 
    1541                 :             : /* string_concat's constructor.  */
    1542                 :             : 
    1543                 :     2593701 : string_concat::string_concat (int num, location_t *locs)
    1544                 :     2593701 :   : m_num (num)
    1545                 :             : {
    1546                 :     2593701 :   m_locs = ggc_vec_alloc <location_t> (num);
    1547                 :    35197273 :   for (int i = 0; i < num; i++)
    1548                 :    32603572 :     m_locs[i] = locs[i];
    1549                 :     2593701 : }
    1550                 :             : 
    1551                 :             : /* string_concat_db's constructor.  */
    1552                 :             : 
    1553                 :      208959 : string_concat_db::string_concat_db ()
    1554                 :             : {
    1555                 :      208959 :   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
    1556                 :      208959 : }
    1557                 :             : 
    1558                 :             : /* Record that a string concatenation occurred, covering NUM
    1559                 :             :    string literal tokens.  LOCS is an array of size NUM, containing the
    1560                 :             :    locations of the tokens.  A copy of LOCS is taken.  */
    1561                 :             : 
    1562                 :             : void
    1563                 :     2593708 : string_concat_db::record_string_concatenation (int num, location_t *locs)
    1564                 :             : {
    1565                 :     2593708 :   gcc_assert (num > 1);
    1566                 :     2593708 :   gcc_assert (locs);
    1567                 :             : 
    1568                 :     2593708 :   location_t key_loc = get_key_loc (locs[0]);
    1569                 :             :   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
    1570                 :             :      any data now recorded under key 'key_loc' would be overwritten by a
    1571                 :             :      subsequent call with the same key 'key_loc'.  */
    1572                 :     2593708 :   if (RESERVED_LOCATION_P (key_loc))
    1573                 :           7 :     return;
    1574                 :             : 
    1575                 :     2593701 :   string_concat *concat
    1576                 :     2593701 :     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
    1577                 :     2593701 :   m_table->put (key_loc, concat);
    1578                 :             : }
    1579                 :             : 
    1580                 :             : /* Determine if LOC was the location of the initial token of a
    1581                 :             :    concatenation of string literal tokens.
    1582                 :             :    If so, *OUT_NUM is written to with the number of tokens, and
    1583                 :             :    *OUT_LOCS with the location of an array of locations of the
    1584                 :             :    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
    1585                 :             :    storage owned by the string_concat_db.
    1586                 :             :    Otherwise, return false.  */
    1587                 :             : 
    1588                 :             : bool
    1589                 :       34015 : string_concat_db::get_string_concatenation (location_t loc,
    1590                 :             :                                             int *out_num,
    1591                 :             :                                             location_t **out_locs)
    1592                 :             : {
    1593                 :       34015 :   gcc_assert (out_num);
    1594                 :       34015 :   gcc_assert (out_locs);
    1595                 :             : 
    1596                 :       34015 :   location_t key_loc = get_key_loc (loc);
    1597                 :             :   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
    1598                 :             :      discussion in 'string_concat_db::record_string_concatenation'.  */
    1599                 :       34015 :   if (RESERVED_LOCATION_P (key_loc))
    1600                 :             :     return false;
    1601                 :             : 
    1602                 :       34013 :   string_concat **concat = m_table->get (key_loc);
    1603                 :       34013 :   if (!concat)
    1604                 :             :     return false;
    1605                 :             : 
    1606                 :        4364 :   *out_num = (*concat)->m_num;
    1607                 :        4364 :   *out_locs =(*concat)->m_locs;
    1608                 :        4364 :   return true;
    1609                 :             : }
    1610                 :             : 
    1611                 :             : /* Internal function.  Canonicalize LOC into a form suitable for
    1612                 :             :    use as a key within the database, stripping away macro expansion,
    1613                 :             :    ad-hoc information, and range information, using the location of
    1614                 :             :    the start of LOC within an ordinary linemap.  */
    1615                 :             : 
    1616                 :             : location_t
    1617                 :     2627723 : string_concat_db::get_key_loc (location_t loc)
    1618                 :             : {
    1619                 :     2627723 :   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
    1620                 :             :                                   NULL);
    1621                 :             : 
    1622                 :     2627723 :   loc = get_range_from_loc (line_table, loc).m_start;
    1623                 :             : 
    1624                 :     2627723 :   return loc;
    1625                 :             : }
    1626                 :             : 
    1627                 :             : /* Helper class for use within get_substring_ranges_for_loc.
    1628                 :             :    An vec of cpp_string with responsibility for releasing all of the
    1629                 :             :    str->text for each str in the vector.  */
    1630                 :             : 
    1631                 :             : class auto_cpp_string_vec :  public auto_vec <cpp_string>
    1632                 :             : {
    1633                 :             :  public:
    1634                 :       34015 :   auto_cpp_string_vec (int alloc)
    1635                 :       68030 :     : auto_vec <cpp_string> (alloc) {}
    1636                 :             : 
    1637                 :       34015 :   ~auto_cpp_string_vec ()
    1638                 :             :   {
    1639                 :             :     /* Clean up the copies within this vec.  */
    1640                 :       34015 :     int i;
    1641                 :       34015 :     cpp_string *str;
    1642                 :       68942 :     FOR_EACH_VEC_ELT (*this, i, str)
    1643                 :       34927 :       free (const_cast <unsigned char *> (str->text));
    1644                 :       34015 :   }
    1645                 :             : };
    1646                 :             : 
    1647                 :             : /* Attempt to populate RANGES with source location information on the
    1648                 :             :    individual characters within the string literal found at STRLOC.
    1649                 :             :    If CONCATS is non-NULL, then any string literals that the token at
    1650                 :             :    STRLOC  was concatenated with are also added to RANGES.
    1651                 :             : 
    1652                 :             :    Return NULL if successful, or an error message if any errors occurred (in
    1653                 :             :    which case RANGES may be only partially populated and should not
    1654                 :             :    be used).
    1655                 :             : 
    1656                 :             :    This is implemented by re-parsing the relevant source line(s).  */
    1657                 :             : 
    1658                 :             : static const char *
    1659                 :       36256 : get_substring_ranges_for_loc (cpp_reader *pfile,
    1660                 :             :                               file_cache &fc,
    1661                 :             :                               string_concat_db *concats,
    1662                 :             :                               location_t strloc,
    1663                 :             :                               enum cpp_ttype type,
    1664                 :             :                               cpp_substring_ranges &ranges)
    1665                 :             : {
    1666                 :       36256 :   gcc_assert (pfile);
    1667                 :             : 
    1668                 :       36256 :   if (strloc == UNKNOWN_LOCATION)
    1669                 :             :     return "unknown location";
    1670                 :             : 
    1671                 :             :   /* Reparsing the strings requires accurate location information.
    1672                 :             :      If -ftrack-macro-expansion has been overridden from its default
    1673                 :             :      of 2, then we might have a location of a macro expansion point,
    1674                 :             :      rather than the location of the literal itself.
    1675                 :             :      Avoid this by requiring that we have full macro expansion tracking
    1676                 :             :      for substring locations to be available.  */
    1677                 :       36256 :   if (cpp_get_options (pfile)->track_macro_expansion != 2)
    1678                 :             :     return "track_macro_expansion != 2";
    1679                 :             : 
    1680                 :             :   /* If #line or # 44 "file"-style directives are present, then there's
    1681                 :             :      no guarantee that the line numbers we have can be used to locate
    1682                 :             :      the strings.  For example, we might have a .i file with # directives
    1683                 :             :      pointing back to lines within a .c file, but the .c file might
    1684                 :             :      have been edited since the .i file was created.
    1685                 :             :      In such a case, the safest course is to disable on-demand substring
    1686                 :             :      locations.  */
    1687                 :       34018 :   if (line_table->seen_line_directive)
    1688                 :             :     return "seen line directive";
    1689                 :             : 
    1690                 :             :   /* If string concatenation has occurred at STRLOC, get the locations
    1691                 :             :      of all of the literal tokens making up the compound string.
    1692                 :             :      Otherwise, just use STRLOC.  */
    1693                 :       34015 :   int num_locs = 1;
    1694                 :       34015 :   location_t *strlocs = &strloc;
    1695                 :       34015 :   if (concats)
    1696                 :       34015 :     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
    1697                 :             : 
    1698                 :       34015 :   auto_cpp_string_vec strs (num_locs);
    1699                 :       34015 :   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
    1700                 :       68933 :   for (int i = 0; i < num_locs; i++)
    1701                 :             :     {
    1702                 :             :       /* Get range of strloc.  We will use it to locate the start and finish
    1703                 :             :          of the literal token within the line.  */
    1704                 :       41020 :       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
    1705                 :             : 
    1706                 :       52392 :       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
    1707                 :             :         {
    1708                 :             :           /* If the string token was within a macro expansion, then we can
    1709                 :             :              cope with it for the simple case where we have a single token.
    1710                 :             :              Otherwise, bail out.  */
    1711                 :        1157 :           if (src_range.m_start != src_range.m_finish)
    1712                 :        6102 :             return "macro expansion";
    1713                 :             :         }
    1714                 :             :       else
    1715                 :             :         {
    1716                 :       39863 :           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
    1717                 :             :             /* If so, we can't reliably determine where the token started within
    1718                 :             :                its line.  */
    1719                 :             :             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
    1720                 :             : 
    1721                 :       34191 :           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
    1722                 :             :             /* If so, we can't reliably determine where the token finished
    1723                 :             :                within its line.  */
    1724                 :             :             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
    1725                 :             :         }
    1726                 :             : 
    1727                 :       35177 :       expanded_location start
    1728                 :       35177 :         = expand_location_to_spelling_point (src_range.m_start,
    1729                 :             :                                              LOCATION_ASPECT_START);
    1730                 :       35177 :       expanded_location finish
    1731                 :       35177 :         = expand_location_to_spelling_point (src_range.m_finish,
    1732                 :             :                                              LOCATION_ASPECT_FINISH);
    1733                 :       35177 :       if (start.file != finish.file)
    1734                 :             :         return "range endpoints are in different files";
    1735                 :       35177 :       if (start.line != finish.line)
    1736                 :             :         return "range endpoints are on different lines";
    1737                 :       34930 :       if (start.column > finish.column)
    1738                 :             :         return "range endpoints are reversed";
    1739                 :             : 
    1740                 :       34930 :       char_span line = fc.get_source_line (start.file, start.line);
    1741                 :       34930 :       if (!line)
    1742                 :             :         return "unable to read source line";
    1743                 :             : 
    1744                 :             :       /* Determine the location of the literal (including quotes
    1745                 :             :          and leading prefix chars, such as the 'u' in a u""
    1746                 :             :          token).  */
    1747                 :       34930 :       size_t literal_length = finish.column - start.column + 1;
    1748                 :             : 
    1749                 :             :       /* Ensure that we don't crash if we got the wrong location.  */
    1750                 :       34930 :       if (start.column < 1)
    1751                 :             :         return "zero start column";
    1752                 :       34928 :       if (line.length () < (start.column - 1 + literal_length))
    1753                 :             :         return "line is not wide enough";
    1754                 :             : 
    1755                 :       34927 :       char_span literal = line.subspan (start.column - 1, literal_length);
    1756                 :             : 
    1757                 :       34927 :       cpp_string from;
    1758                 :       34927 :       from.len = literal_length;
    1759                 :             :       /* Make a copy of the literal, to avoid having to rely on
    1760                 :             :          the lifetime of the copy of the line within the cache.
    1761                 :             :          This will be released by the auto_cpp_string_vec dtor.  */
    1762                 :       34927 :       from.text = (unsigned char *)literal.xstrdup ();
    1763                 :       34927 :       strs.safe_push (from);
    1764                 :             : 
    1765                 :             :       /* For very long lines, a new linemap could have started
    1766                 :             :          halfway through the token.
    1767                 :             :          Ensure that the loc_reader uses the linemap of the
    1768                 :             :          *end* of the token for its start location.  */
    1769                 :       34927 :       const line_map_ordinary *start_ord_map;
    1770                 :       34927 :       linemap_resolve_location (line_table, src_range.m_start,
    1771                 :             :                                 LRK_SPELLING_LOCATION, &start_ord_map);
    1772                 :       34927 :       const line_map_ordinary *final_ord_map;
    1773                 :       34927 :       linemap_resolve_location (line_table, src_range.m_finish,
    1774                 :             :                                 LRK_SPELLING_LOCATION, &final_ord_map);
    1775                 :       34927 :       if (start_ord_map == NULL || final_ord_map == NULL)
    1776                 :             :         return "failed to get ordinary maps";
    1777                 :             :       /* Bulletproofing.  We ought to only have different ordinary maps
    1778                 :             :          for start vs finish due to line-length jumps.  */
    1779                 :       34926 :       if (start_ord_map != final_ord_map
    1780                 :        6337 :           && start_ord_map->to_file != final_ord_map->to_file)
    1781                 :             :         return "start and finish are spelled in different ordinary maps";
    1782                 :             :       /* The file from linemap_resolve_location ought to match that from
    1783                 :             :          expand_location_to_spelling_point.  */
    1784                 :       34926 :       if (start_ord_map->to_file != start.file)
    1785                 :             :         return "mismatching file after resolving linemap";
    1786                 :             : 
    1787                 :       34918 :       location_t start_loc
    1788                 :       34918 :         = linemap_position_for_line_and_column (line_table, final_ord_map,
    1789                 :             :                                                 start.line, start.column);
    1790                 :             : 
    1791                 :       34918 :       cpp_string_location_reader loc_reader (start_loc, line_table);
    1792                 :       34918 :       loc_readers.safe_push (loc_reader);
    1793                 :             :     }
    1794                 :             : 
    1795                 :             :   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
    1796                 :       55826 :   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
    1797                 :             :                                                  loc_readers.address (),
    1798                 :             :                                                  num_locs, &ranges, type);
    1799                 :       27913 :   if (err)
    1800                 :             :     return err;
    1801                 :             : 
    1802                 :             :   /* Success: "ranges" should now contain information on the string.  */
    1803                 :             :   return NULL;
    1804                 :       34015 : }
    1805                 :             : 
    1806                 :             : /* Attempt to populate *OUT_LOC with source location information on the
    1807                 :             :    given characters within the string literal found at STRLOC.
    1808                 :             :    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
    1809                 :             :    character set.
    1810                 :             : 
    1811                 :             :    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
    1812                 :             :    and string literal "012345\n789"
    1813                 :             :    *OUT_LOC is written to with:
    1814                 :             :      "012345\n789"
    1815                 :             :          ~^~~~~
    1816                 :             : 
    1817                 :             :    If CONCATS is non-NULL, then any string literals that the token at
    1818                 :             :    STRLOC was concatenated with are also considered.
    1819                 :             : 
    1820                 :             :    This is implemented by re-parsing the relevant source line(s).
    1821                 :             : 
    1822                 :             :    Return NULL if successful, or an error message if any errors occurred.
    1823                 :             :    Error messages are intended for GCC developers (to help debugging) rather
    1824                 :             :    than for end-users.  */
    1825                 :             : 
    1826                 :             : const char *
    1827                 :       11136 : get_location_within_string (cpp_reader *pfile,
    1828                 :             :                             file_cache &fc,
    1829                 :             :                             string_concat_db *concats,
    1830                 :             :                             location_t strloc,
    1831                 :             :                             enum cpp_ttype type,
    1832                 :             :                             int caret_idx, int start_idx, int end_idx,
    1833                 :             :                             location_t *out_loc)
    1834                 :             : {
    1835                 :       11136 :   gcc_checking_assert (caret_idx >= 0);
    1836                 :       11136 :   gcc_checking_assert (start_idx >= 0);
    1837                 :       11136 :   gcc_checking_assert (end_idx >= 0);
    1838                 :       11136 :   gcc_assert (out_loc);
    1839                 :             : 
    1840                 :       11136 :   cpp_substring_ranges ranges;
    1841                 :       11136 :   const char *err
    1842                 :       11136 :     = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
    1843                 :       11136 :   if (err)
    1844                 :             :     return err;
    1845                 :             : 
    1846                 :        8422 :   if (caret_idx >= ranges.get_num_ranges ())
    1847                 :             :     return "caret_idx out of range";
    1848                 :        8422 :   if (start_idx >= ranges.get_num_ranges ())
    1849                 :             :     return "start_idx out of range";
    1850                 :        8422 :   if (end_idx >= ranges.get_num_ranges ())
    1851                 :             :     return "end_idx out of range";
    1852                 :             : 
    1853                 :        8422 :   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
    1854                 :        8422 :                             ranges.get_range (start_idx).m_start,
    1855                 :        8422 :                             ranges.get_range (end_idx).m_finish);
    1856                 :        8422 :   return NULL;
    1857                 :       11136 : }
    1858                 :             : 
    1859                 :             : /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
    1860                 :             : 
    1861                 :             : location_t
    1862                 :    38522525 : location_with_discriminator (location_t locus, int discriminator)
    1863                 :             : {
    1864                 :    38522525 :   tree block = LOCATION_BLOCK (locus);
    1865                 :    38522525 :   source_range src_range = get_range_from_loc (line_table, locus);
    1866                 :    38522525 :   locus = get_pure_location (locus);
    1867                 :             : 
    1868                 :    38522525 :   if (locus == UNKNOWN_LOCATION)
    1869                 :             :     return locus;
    1870                 :             : 
    1871                 :    37992278 :   return line_table->get_or_create_combined_loc (locus, src_range, block,
    1872                 :    37992278 :                                                  discriminator);
    1873                 :             : }
    1874                 :             : 
    1875                 :             : /* Return TRUE if LOCUS represents a location with a discriminator.  */
    1876                 :             : 
    1877                 :             : bool
    1878                 :    14297183 : has_discriminator (location_t locus)
    1879                 :             : {
    1880                 :    14297183 :   return get_discriminator_from_loc (locus) != 0;
    1881                 :             : }
    1882                 :             : 
    1883                 :             : /* Return the discriminator for LOCUS.  */
    1884                 :             : 
    1885                 :             : int
    1886                 :   286169718 : get_discriminator_from_loc (location_t locus)
    1887                 :             : {
    1888                 :   286169718 :   return get_discriminator_from_loc (line_table, locus);
    1889                 :             : }
    1890                 :             : 
    1891                 :             : #if CHECKING_P
    1892                 :             : 
    1893                 :             : namespace selftest {
    1894                 :             : 
    1895                 :             : /* Selftests of location handling.  */
    1896                 :             : 
    1897                 :             : /* Attempt to populate *OUT_RANGE with source location information on the
    1898                 :             :    given character within the string literal found at STRLOC.
    1899                 :             :    CHAR_IDX refers to an offset within the execution character set.
    1900                 :             :    If CONCATS is non-NULL, then any string literals that the token at
    1901                 :             :    STRLOC was concatenated with are also considered.
    1902                 :             : 
    1903                 :             :    This is implemented by re-parsing the relevant source line(s).
    1904                 :             : 
    1905                 :             :    Return NULL if successful, or an error message if any errors occurred.
    1906                 :             :    Error messages are intended for GCC developers (to help debugging) rather
    1907                 :             :    than for end-users.  */
    1908                 :             : 
    1909                 :             : static const char *
    1910                 :       23224 : get_source_range_for_char (cpp_reader *pfile,
    1911                 :             :                            file_cache &fc,
    1912                 :             :                            string_concat_db *concats,
    1913                 :             :                            location_t strloc,
    1914                 :             :                            enum cpp_ttype type,
    1915                 :             :                            int char_idx,
    1916                 :             :                            source_range *out_range)
    1917                 :             : {
    1918                 :       23224 :   gcc_checking_assert (char_idx >= 0);
    1919                 :       23224 :   gcc_assert (out_range);
    1920                 :             : 
    1921                 :       23224 :   cpp_substring_ranges ranges;
    1922                 :       23224 :   const char *err
    1923                 :       23224 :     = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
    1924                 :       23224 :   if (err)
    1925                 :             :     return err;
    1926                 :             : 
    1927                 :       18128 :   if (char_idx >= ranges.get_num_ranges ())
    1928                 :             :     return "char_idx out of range";
    1929                 :             : 
    1930                 :       18128 :   *out_range = ranges.get_range (char_idx);
    1931                 :       18128 :   return NULL;
    1932                 :       23224 : }
    1933                 :             : 
    1934                 :             : /* As get_source_range_for_char, but write to *OUT the number
    1935                 :             :    of ranges that are available.  */
    1936                 :             : 
    1937                 :             : static const char *
    1938                 :        1264 : get_num_source_ranges_for_substring (cpp_reader *pfile,
    1939                 :             :                                      file_cache &fc,
    1940                 :             :                                      string_concat_db *concats,
    1941                 :             :                                      location_t strloc,
    1942                 :             :                                      enum cpp_ttype type,
    1943                 :             :                                      int *out)
    1944                 :             : {
    1945                 :        1264 :   gcc_assert (out);
    1946                 :             : 
    1947                 :        1264 :   cpp_substring_ranges ranges;
    1948                 :        1264 :   const char *err
    1949                 :        1264 :     = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
    1950                 :             : 
    1951                 :        1264 :   if (err)
    1952                 :             :     return err;
    1953                 :             : 
    1954                 :         880 :   *out = ranges.get_num_ranges ();
    1955                 :         880 :   return NULL;
    1956                 :        1264 : }
    1957                 :             : 
    1958                 :             : /* Selftests of location handling.  */
    1959                 :             : 
    1960                 :             : /* Verify that compare() on linenum_type handles comparisons over the full
    1961                 :             :    range of the type.  */
    1962                 :             : 
    1963                 :             : static void
    1964                 :           4 : test_linenum_comparisons ()
    1965                 :             : {
    1966                 :           4 :   linenum_type min_line (0);
    1967                 :           4 :   linenum_type max_line (0xffffffff);
    1968                 :           4 :   ASSERT_EQ (0, compare (min_line, min_line));
    1969                 :           4 :   ASSERT_EQ (0, compare (max_line, max_line));
    1970                 :             : 
    1971                 :           4 :   ASSERT_GT (compare (max_line, min_line), 0);
    1972                 :           4 :   ASSERT_LT (compare (min_line, max_line), 0);
    1973                 :           4 : }
    1974                 :             : 
    1975                 :             : /* Helper function for verifying location data: when location_t
    1976                 :             :    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
    1977                 :             :    as having column 0.  */
    1978                 :             : 
    1979                 :             : static bool
    1980                 :       63556 : should_have_column_data_p (location_t loc)
    1981                 :             : {
    1982                 :       63556 :   if (IS_ADHOC_LOC (loc))
    1983                 :       19644 :     loc = get_location_from_adhoc_loc (line_table, loc);
    1984                 :       63556 :   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
    1985                 :        6480 :     return false;
    1986                 :             :   return true;
    1987                 :             : }
    1988                 :             : 
    1989                 :             : /* Selftest for should_have_column_data_p.  */
    1990                 :             : 
    1991                 :             : static void
    1992                 :           4 : test_should_have_column_data_p ()
    1993                 :             : {
    1994                 :           4 :   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
    1995                 :           4 :   ASSERT_TRUE
    1996                 :             :     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
    1997                 :           4 :   ASSERT_FALSE
    1998                 :             :     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
    1999                 :           4 : }
    2000                 :             : 
    2001                 :             : /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
    2002                 :             :    on LOC.  */
    2003                 :             : 
    2004                 :             : static void
    2005                 :        1068 : assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
    2006                 :             :               location_t loc)
    2007                 :             : {
    2008                 :        1068 :   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
    2009                 :        1068 :   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
    2010                 :             :   /* If location_t values are sufficiently high, then column numbers
    2011                 :             :      will be unavailable and LOCATION_COLUMN (loc) will be 0.
    2012                 :             :      When close to the threshold, column numbers *may* be present: if
    2013                 :             :      the final linemap before the threshold contains a line that straddles
    2014                 :             :      the threshold, locations in that line have column information.  */
    2015                 :        1068 :   if (should_have_column_data_p (loc))
    2016                 :         644 :     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
    2017                 :        1068 : }
    2018                 :             : 
    2019                 :             : /* Various selftests involve constructing a line table and one or more
    2020                 :             :    line maps within it.
    2021                 :             : 
    2022                 :             :    For maximum test coverage we want to run these tests with a variety
    2023                 :             :    of situations:
    2024                 :             :    - line_table->default_range_bits: some frontends use a non-zero value
    2025                 :             :    and others use zero
    2026                 :             :    - the fallback modes within line-map.cc: there are various threshold
    2027                 :             :    values for location_t beyond line-map.cc changes
    2028                 :             :    behavior (disabling of the range-packing optimization, disabling
    2029                 :             :    of column-tracking).  We can exercise these by starting the line_table
    2030                 :             :    at interesting values at or near these thresholds.
    2031                 :             : 
    2032                 :             :    The following struct describes a particular case within our test
    2033                 :             :    matrix.  */
    2034                 :             : 
    2035                 :             : class line_table_case
    2036                 :             : {
    2037                 :             : public:
    2038                 :        5668 :   line_table_case (int default_range_bits, int base_location)
    2039                 :        5668 :   : m_default_range_bits (default_range_bits),
    2040                 :        5668 :     m_base_location (base_location)
    2041                 :             :   {}
    2042                 :             : 
    2043                 :             :   int m_default_range_bits;
    2044                 :             :   int m_base_location;
    2045                 :             : };
    2046                 :             : 
    2047                 :             : /* Constructor.  Store the old value of line_table, and create a new
    2048                 :             :    one, using sane defaults.  */
    2049                 :             : 
    2050                 :          21 : line_table_test::line_table_test ()
    2051                 :             : {
    2052                 :          21 :   gcc_assert (saved_line_table == NULL);
    2053                 :          21 :   saved_line_table = line_table;
    2054                 :          21 :   line_table = ggc_alloc<line_maps> ();
    2055                 :          21 :   linemap_init (line_table, BUILTINS_LOCATION);
    2056                 :          21 :   gcc_assert (saved_line_table->m_reallocator);
    2057                 :          21 :   line_table->m_reallocator = saved_line_table->m_reallocator;
    2058                 :          21 :   gcc_assert (saved_line_table->m_round_alloc_size);
    2059                 :          21 :   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
    2060                 :          21 :   line_table->default_range_bits = 0;
    2061                 :          21 : }
    2062                 :             : 
    2063                 :             : /* Constructor.  Store the old value of line_table, and create a new
    2064                 :             :    one, using the sitation described in CASE_.  */
    2065                 :             : 
    2066                 :        5884 : line_table_test::line_table_test (const line_table_case &case_)
    2067                 :             : {
    2068                 :        5884 :   gcc_assert (saved_line_table == NULL);
    2069                 :        5884 :   saved_line_table = line_table;
    2070                 :        5884 :   line_table = ggc_alloc<line_maps> ();
    2071                 :        5884 :   linemap_init (line_table, BUILTINS_LOCATION);
    2072                 :        5884 :   gcc_assert (saved_line_table->m_reallocator);
    2073                 :        5884 :   line_table->m_reallocator = saved_line_table->m_reallocator;
    2074                 :        5884 :   gcc_assert (saved_line_table->m_round_alloc_size);
    2075                 :        5884 :   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
    2076                 :        5884 :   line_table->default_range_bits = case_.m_default_range_bits;
    2077                 :        5884 :   if (case_.m_base_location)
    2078                 :             :     {
    2079                 :        5390 :       line_table->highest_location = case_.m_base_location;
    2080                 :        5390 :       line_table->highest_line = case_.m_base_location;
    2081                 :             :     }
    2082                 :        5884 : }
    2083                 :             : 
    2084                 :             : /* Destructor.  Restore the old value of line_table.  */
    2085                 :             : 
    2086                 :        5905 : line_table_test::~line_table_test ()
    2087                 :             : {
    2088                 :        5905 :   gcc_assert (saved_line_table != NULL);
    2089                 :        5905 :   line_table = saved_line_table;
    2090                 :        5905 :   saved_line_table = NULL;
    2091                 :        5905 : }
    2092                 :             : 
    2093                 :             : /* Verify basic operation of ordinary linemaps.  */
    2094                 :             : 
    2095                 :             : static void
    2096                 :          96 : test_accessing_ordinary_linemaps (const line_table_case &case_)
    2097                 :             : {
    2098                 :          96 :   line_table_test ltt (case_);
    2099                 :             : 
    2100                 :             :   /* Build a simple linemap describing some locations. */
    2101                 :          96 :   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
    2102                 :             : 
    2103                 :          96 :   linemap_line_start (line_table, 1, 100);
    2104                 :          96 :   location_t loc_a = linemap_position_for_column (line_table, 1);
    2105                 :          96 :   location_t loc_b = linemap_position_for_column (line_table, 23);
    2106                 :             : 
    2107                 :          96 :   linemap_line_start (line_table, 2, 100);
    2108                 :          96 :   location_t loc_c = linemap_position_for_column (line_table, 1);
    2109                 :          96 :   location_t loc_d = linemap_position_for_column (line_table, 17);
    2110                 :             : 
    2111                 :             :   /* Example of a very long line.  */
    2112                 :          96 :   linemap_line_start (line_table, 3, 2000);
    2113                 :          96 :   location_t loc_e = linemap_position_for_column (line_table, 700);
    2114                 :             : 
    2115                 :             :   /* Transitioning back to a short line.  */
    2116                 :          96 :   linemap_line_start (line_table, 4, 0);
    2117                 :          96 :   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
    2118                 :             : 
    2119                 :          96 :   if (should_have_column_data_p (loc_back_to_short))
    2120                 :             :     {
    2121                 :             :       /* Verify that we switched to short lines in the linemap.  */
    2122                 :          56 :       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
    2123                 :          56 :       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
    2124                 :             :     }
    2125                 :             : 
    2126                 :             :   /* Example of a line that will eventually be seen to be longer
    2127                 :             :      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
    2128                 :             :      below that.  */
    2129                 :          96 :   linemap_line_start (line_table, 5, 2000);
    2130                 :             : 
    2131                 :          96 :   location_t loc_start_of_very_long_line
    2132                 :          96 :     = linemap_position_for_column (line_table, 2000);
    2133                 :          96 :   location_t loc_too_wide
    2134                 :          96 :     = linemap_position_for_column (line_table, 4097);
    2135                 :          96 :   location_t loc_too_wide_2
    2136                 :          96 :     = linemap_position_for_column (line_table, 4098);
    2137                 :             : 
    2138                 :             :   /* ...and back to a sane line length.  */
    2139                 :          96 :   linemap_line_start (line_table, 6, 100);
    2140                 :          96 :   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
    2141                 :             : 
    2142                 :          96 :   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
    2143                 :             : 
    2144                 :             :   /* Multiple files.  */
    2145                 :          96 :   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
    2146                 :          96 :   linemap_line_start (line_table, 1, 200);
    2147                 :          96 :   location_t loc_f = linemap_position_for_column (line_table, 150);
    2148                 :          96 :   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
    2149                 :             : 
    2150                 :             :   /* Verify that we can recover the location info.  */
    2151                 :          96 :   assert_loceq ("foo.c", 1, 1, loc_a);
    2152                 :          96 :   assert_loceq ("foo.c", 1, 23, loc_b);
    2153                 :          96 :   assert_loceq ("foo.c", 2, 1, loc_c);
    2154                 :          96 :   assert_loceq ("foo.c", 2, 17, loc_d);
    2155                 :          96 :   assert_loceq ("foo.c", 3, 700, loc_e);
    2156                 :          96 :   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
    2157                 :             : 
    2158                 :             :   /* In the very wide line, the initial location should be fully tracked.  */
    2159                 :          96 :   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
    2160                 :             :   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
    2161                 :             :      be disabled.  */
    2162                 :          96 :   assert_loceq ("foo.c", 5, 0, loc_too_wide);
    2163                 :          96 :   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
    2164                 :             :   /*...and column-tracking should be re-enabled for subsequent lines.  */
    2165                 :          96 :   assert_loceq ("foo.c", 6, 10, loc_sane_again);
    2166                 :             : 
    2167                 :          96 :   assert_loceq ("bar.c", 1, 150, loc_f);
    2168                 :             : 
    2169                 :          96 :   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
    2170                 :          96 :   ASSERT_TRUE (pure_location_p (line_table, loc_a));
    2171                 :             : 
    2172                 :             :   /* Verify using make_location to build a range, and extracting data
    2173                 :             :      back from it.  */
    2174                 :          96 :   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
    2175                 :          96 :   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
    2176                 :          96 :   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
    2177                 :          96 :   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
    2178                 :          96 :   ASSERT_EQ (loc_b, src_range.m_start);
    2179                 :          96 :   ASSERT_EQ (loc_d, src_range.m_finish);
    2180                 :          96 : }
    2181                 :             : 
    2182                 :             : /* Verify various properties of UNKNOWN_LOCATION.  */
    2183                 :             : 
    2184                 :             : static void
    2185                 :           4 : test_unknown_location ()
    2186                 :             : {
    2187                 :           4 :   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
    2188                 :           4 :   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
    2189                 :           4 :   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
    2190                 :           4 : }
    2191                 :             : 
    2192                 :             : /* Verify various properties of BUILTINS_LOCATION.  */
    2193                 :             : 
    2194                 :             : static void
    2195                 :           4 : test_builtins ()
    2196                 :             : {
    2197                 :           4 :   assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
    2198                 :           4 :   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
    2199                 :           4 : }
    2200                 :             : 
    2201                 :             : /* Regression test for make_location.
    2202                 :             :    Ensure that we use pure locations for the start/finish of the range,
    2203                 :             :    rather than storing a packed or ad-hoc range as the start/finish.  */
    2204                 :             : 
    2205                 :             : static void
    2206                 :          96 : test_make_location_nonpure_range_endpoints (const line_table_case &case_)
    2207                 :             : {
    2208                 :             :   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
    2209                 :             :      with C++ frontend.
    2210                 :             :      ....................0000000001111111111222.
    2211                 :             :      ....................1234567890123456789012.  */
    2212                 :          96 :   const char *content = "     r += !aaa == bbb;\n";
    2213                 :          96 :   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
    2214                 :          96 :   line_table_test ltt (case_);
    2215                 :          96 :   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
    2216                 :             : 
    2217                 :          96 :   const location_t c11 = linemap_position_for_column (line_table, 11);
    2218                 :          96 :   const location_t c12 = linemap_position_for_column (line_table, 12);
    2219                 :          96 :   const location_t c13 = linemap_position_for_column (line_table, 13);
    2220                 :          96 :   const location_t c14 = linemap_position_for_column (line_table, 14);
    2221                 :          96 :   const location_t c21 = linemap_position_for_column (line_table, 21);
    2222                 :             : 
    2223                 :          96 :   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
    2224                 :          32 :     return;
    2225                 :             : 
    2226                 :             :   /* Use column 13 for the caret location, arbitrarily, to verify that we
    2227                 :             :      handle start != caret.  */
    2228                 :          64 :   const location_t aaa = make_location (c13, c12, c14);
    2229                 :          64 :   ASSERT_EQ (c13, get_pure_location (aaa));
    2230                 :          64 :   ASSERT_EQ (c12, get_start (aaa));
    2231                 :          64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
    2232                 :          64 :   ASSERT_EQ (c14, get_finish (aaa));
    2233                 :          64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
    2234                 :             : 
    2235                 :             :   /* Make a location using a location with a range as the start-point.  */
    2236                 :          64 :   const location_t not_aaa = make_location (c11, aaa, c14);
    2237                 :          64 :   ASSERT_EQ (c11, get_pure_location (not_aaa));
    2238                 :             :   /* It should use the start location of the range, not store the range
    2239                 :             :      itself.  */
    2240                 :          64 :   ASSERT_EQ (c12, get_start (not_aaa));
    2241                 :          64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
    2242                 :          64 :   ASSERT_EQ (c14, get_finish (not_aaa));
    2243                 :          64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
    2244                 :             : 
    2245                 :             :   /* Similarly, make a location with a range as the end-point.  */
    2246                 :          64 :   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
    2247                 :          64 :   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
    2248                 :          64 :   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
    2249                 :          64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
    2250                 :          64 :   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
    2251                 :          64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
    2252                 :          64 :   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
    2253                 :             :   /* It should use the finish location of the range, not store the range
    2254                 :             :      itself.  */
    2255                 :          64 :   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
    2256                 :          64 :   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
    2257                 :          64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
    2258                 :          64 :   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
    2259                 :          64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
    2260                 :          96 : }
    2261                 :             : 
    2262                 :             : /* Verify reading of input files (e.g. for caret-based diagnostics).  */
    2263                 :             : 
    2264                 :             : static void
    2265                 :           4 : test_reading_source_line ()
    2266                 :             : {
    2267                 :             :   /* Create a tempfile and write some text to it.  */
    2268                 :           4 :   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
    2269                 :             :                         "01234567890123456789\n"
    2270                 :             :                         "This is the test text\n"
    2271                 :           4 :                         "This is the 3rd line");
    2272                 :           4 :   file_cache fc;
    2273                 :             : 
    2274                 :             :   /* Read back a specific line from the tempfile.  */
    2275                 :           4 :   char_span source_line = fc.get_source_line (tmp.get_filename (), 3);
    2276                 :           4 :   ASSERT_TRUE (source_line);
    2277                 :           4 :   ASSERT_TRUE (source_line.get_buffer () != NULL);
    2278                 :           4 :   ASSERT_EQ (20, source_line.length ());
    2279                 :           4 :   ASSERT_TRUE (!strncmp ("This is the 3rd line",
    2280                 :             :                          source_line.get_buffer (), source_line.length ()));
    2281                 :             : 
    2282                 :           4 :   source_line = fc.get_source_line (tmp.get_filename (), 2);
    2283                 :           4 :   ASSERT_TRUE (source_line);
    2284                 :           4 :   ASSERT_TRUE (source_line.get_buffer () != NULL);
    2285                 :           4 :   ASSERT_EQ (21, source_line.length ());
    2286                 :           4 :   ASSERT_TRUE (!strncmp ("This is the test text",
    2287                 :             :                          source_line.get_buffer (), source_line.length ()));
    2288                 :             : 
    2289                 :           4 :   source_line = fc.get_source_line (tmp.get_filename (), 4);
    2290                 :           4 :   ASSERT_FALSE (source_line);
    2291                 :           4 :   ASSERT_TRUE (source_line.get_buffer () == NULL);
    2292                 :           4 : }
    2293                 :             : 
    2294                 :             : /* Tests of lexing.  */
    2295                 :             : 
    2296                 :             : /* Verify that token TOK from PARSER has cpp_token_as_text
    2297                 :             :    equal to EXPECTED_TEXT.  */
    2298                 :             : 
    2299                 :             : #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
    2300                 :             :   SELFTEST_BEGIN_STMT                                                   \
    2301                 :             :     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
    2302                 :             :     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
    2303                 :             :   SELFTEST_END_STMT
    2304                 :             : 
    2305                 :             : /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
    2306                 :             :    and ranges from EXP_START_COL to EXP_FINISH_COL.
    2307                 :             :    Use LOC as the effective location of the selftest.  */
    2308                 :             : 
    2309                 :             : static void
    2310                 :         576 : assert_token_loc_eq (const location &loc,
    2311                 :             :                      const cpp_token *tok,
    2312                 :             :                      const char *exp_filename, int exp_linenum,
    2313                 :             :                      int exp_start_col, int exp_finish_col)
    2314                 :             : {
    2315                 :         576 :   location_t tok_loc = tok->src_loc;
    2316                 :         576 :   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
    2317                 :         576 :   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
    2318                 :             : 
    2319                 :             :   /* If location_t values are sufficiently high, then column numbers
    2320                 :             :      will be unavailable.  */
    2321                 :         576 :   if (!should_have_column_data_p (tok_loc))
    2322                 :         200 :     return;
    2323                 :             : 
    2324                 :         376 :   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
    2325                 :         376 :   source_range tok_range = get_range_from_loc (line_table, tok_loc);
    2326                 :         376 :   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
    2327                 :         376 :   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
    2328                 :             : }
    2329                 :             : 
    2330                 :             : /* Use assert_token_loc_eq to verify the TOK->src_loc, using
    2331                 :             :    SELFTEST_LOCATION as the effective location of the selftest.  */
    2332                 :             : 
    2333                 :             : #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
    2334                 :             :                             EXP_START_COL, EXP_FINISH_COL) \
    2335                 :             :   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
    2336                 :             :                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
    2337                 :             : 
    2338                 :             : /* Test of lexing a file using libcpp, verifying tokens and their
    2339                 :             :    location information.  */
    2340                 :             : 
    2341                 :             : static void
    2342                 :          96 : test_lexer (const line_table_case &case_)
    2343                 :             : {
    2344                 :             :   /* Create a tempfile and write some text to it.  */
    2345                 :          96 :   const char *content =
    2346                 :             :     /*00000000011111111112222222222333333.3333444444444.455555555556
    2347                 :             :       12345678901234567890123456789012345.6789012345678.901234567890.  */
    2348                 :             :     ("test_name /* c-style comment */\n"
    2349                 :             :      "                                  \"test literal\"\n"
    2350                 :             :      " // test c++-style comment\n"
    2351                 :             :      "   42\n");
    2352                 :          96 :   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
    2353                 :             : 
    2354                 :          96 :   line_table_test ltt (case_);
    2355                 :             : 
    2356                 :          96 :   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
    2357                 :             : 
    2358                 :          96 :   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
    2359                 :          96 :   ASSERT_NE (fname, NULL);
    2360                 :             : 
    2361                 :             :   /* Verify that we get the expected tokens back, with the correct
    2362                 :             :      location information.  */
    2363                 :             : 
    2364                 :          96 :   location_t loc;
    2365                 :          96 :   const cpp_token *tok;
    2366                 :          96 :   tok = cpp_get_token_with_location (parser, &loc);
    2367                 :          96 :   ASSERT_NE (tok, NULL);
    2368                 :          96 :   ASSERT_EQ (tok->type, CPP_NAME);
    2369                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
    2370                 :          96 :   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
    2371                 :             : 
    2372                 :          96 :   tok = cpp_get_token_with_location (parser, &loc);
    2373                 :          96 :   ASSERT_NE (tok, NULL);
    2374                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2375                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
    2376                 :          96 :   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
    2377                 :             : 
    2378                 :          96 :   tok = cpp_get_token_with_location (parser, &loc);
    2379                 :          96 :   ASSERT_NE (tok, NULL);
    2380                 :          96 :   ASSERT_EQ (tok->type, CPP_NUMBER);
    2381                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
    2382                 :          96 :   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
    2383                 :             : 
    2384                 :          96 :   tok = cpp_get_token_with_location (parser, &loc);
    2385                 :          96 :   ASSERT_NE (tok, NULL);
    2386                 :          96 :   ASSERT_EQ (tok->type, CPP_EOF);
    2387                 :             : 
    2388                 :          96 :   cpp_finish (parser, NULL);
    2389                 :          96 :   cpp_destroy (parser);
    2390                 :          96 : }
    2391                 :             : 
    2392                 :             : /* Forward decls.  */
    2393                 :             : 
    2394                 :             : class lexer_test;
    2395                 :             : class lexer_test_options;
    2396                 :             : 
    2397                 :             : /* A class for specifying options of a lexer_test.
    2398                 :             :    The "apply" vfunc is called during the lexer_test constructor.  */
    2399                 :             : 
    2400                 :         192 : class lexer_test_options
    2401                 :             : {
    2402                 :             :  public:
    2403                 :             :   virtual void apply (lexer_test &) = 0;
    2404                 :             : };
    2405                 :             : 
    2406                 :             : /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
    2407                 :             :    in its dtor.
    2408                 :             : 
    2409                 :             :    This is needed by struct lexer_test to ensure that the cleanup of the
    2410                 :             :    cpp_reader happens *after* the cleanup of the temp_source_file.  */
    2411                 :             : 
    2412                 :             : class cpp_reader_ptr
    2413                 :             : {
    2414                 :             :  public:
    2415                 :        2304 :   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
    2416                 :             : 
    2417                 :        2304 :   ~cpp_reader_ptr ()
    2418                 :             :   {
    2419                 :        2304 :     cpp_finish (m_ptr, NULL);
    2420                 :        2304 :     cpp_destroy (m_ptr);
    2421                 :        2304 :   }
    2422                 :             : 
    2423                 :        2304 :   operator cpp_reader * () const { return m_ptr; }
    2424                 :             : 
    2425                 :             :  private:
    2426                 :             :   cpp_reader *m_ptr;
    2427                 :             : };
    2428                 :             : 
    2429                 :             : /* A struct for writing lexer tests.  */
    2430                 :             : 
    2431                 :             : class lexer_test
    2432                 :             : {
    2433                 :             : public:
    2434                 :             :   lexer_test (const line_table_case &case_, const char *content,
    2435                 :             :               lexer_test_options *options);
    2436                 :             :   ~lexer_test ();
    2437                 :             : 
    2438                 :             :   const cpp_token *get_token ();
    2439                 :             : 
    2440                 :             :   /* The ordering of these fields matters.
    2441                 :             :      The line_table_test must be first, since the cpp_reader_ptr
    2442                 :             :      uses it.
    2443                 :             :      The cpp_reader must be cleaned up *after* the temp_source_file
    2444                 :             :      since the filenames in input.cc's input cache are owned by the
    2445                 :             :      cpp_reader; in particular, when ~temp_source_file evicts the
    2446                 :             :      filename the filenames must still be alive.  */
    2447                 :             :   line_table_test m_ltt;
    2448                 :             :   cpp_reader_ptr m_parser;
    2449                 :             :   temp_source_file m_tempfile;
    2450                 :             :   file_cache m_file_cache;
    2451                 :             :   string_concat_db m_concats;
    2452                 :             :   bool m_implicitly_expect_EOF;
    2453                 :             : };
    2454                 :             : 
    2455                 :             : /* Use an EBCDIC encoding for the execution charset, specifically
    2456                 :             :    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
    2457                 :             : 
    2458                 :             :    This exercises iconv integration within libcpp.
    2459                 :             :    Not every build of iconv supports the given charset,
    2460                 :             :    so we need to flag this error and handle it gracefully.  */
    2461                 :             : 
    2462                 :             : class ebcdic_execution_charset : public lexer_test_options
    2463                 :             : {
    2464                 :             :  public:
    2465                 :          96 :   ebcdic_execution_charset () : m_num_iconv_errors (0)
    2466                 :             :     {
    2467                 :          96 :       gcc_assert (s_singleton == NULL);
    2468                 :          96 :       s_singleton = this;
    2469                 :          96 :     }
    2470                 :          96 :   ~ebcdic_execution_charset ()
    2471                 :          96 :     {
    2472                 :          96 :       gcc_assert (s_singleton == this);
    2473                 :          96 :       s_singleton = NULL;
    2474                 :          96 :     }
    2475                 :             : 
    2476                 :          96 :   void apply (lexer_test &test) final override
    2477                 :             :   {
    2478                 :          96 :     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
    2479                 :          96 :     cpp_opts->narrow_charset = "IBM1047";
    2480                 :             : 
    2481                 :          96 :     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
    2482                 :          96 :     callbacks->diagnostic = on_diagnostic;
    2483                 :          96 :   }
    2484                 :             : 
    2485                 :           0 :   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
    2486                 :             :                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
    2487                 :             :                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
    2488                 :             :                              rich_location *richloc ATTRIBUTE_UNUSED,
    2489                 :             :                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
    2490                 :             :     ATTRIBUTE_FPTR_PRINTF(5,0)
    2491                 :             :   {
    2492                 :           0 :     gcc_assert (s_singleton);
    2493                 :             :     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
    2494                 :           0 :     const char *msg = "conversion from %s to %s not supported by iconv";
    2495                 :             : #ifdef ENABLE_NLS
    2496                 :           0 :     msg = dgettext ("cpplib", msg);
    2497                 :             : #endif
    2498                 :             :     /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
    2499                 :             :        when the local iconv build doesn't support the conversion.  */
    2500                 :           0 :     if (strcmp (msgid, msg) == 0)
    2501                 :             :       {
    2502                 :           0 :         s_singleton->m_num_iconv_errors++;
    2503                 :           0 :         return true;
    2504                 :             :       }
    2505                 :             : 
    2506                 :             :     /* Otherwise, we have an unexpected error.  */
    2507                 :           0 :     abort ();
    2508                 :             :   }
    2509                 :             : 
    2510                 :          96 :   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
    2511                 :             : 
    2512                 :             :  private:
    2513                 :             :   static ebcdic_execution_charset *s_singleton;
    2514                 :             :   int m_num_iconv_errors;
    2515                 :             : };
    2516                 :             : 
    2517                 :             : ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
    2518                 :             : 
    2519                 :             : /* A lexer_test_options subclass that records a list of diagnostic
    2520                 :             :    messages emitted by the lexer.  */
    2521                 :             : 
    2522                 :             : class lexer_diagnostic_sink : public lexer_test_options
    2523                 :             : {
    2524                 :             :  public:
    2525                 :          96 :   lexer_diagnostic_sink ()
    2526                 :          96 :   {
    2527                 :          96 :     gcc_assert (s_singleton == NULL);
    2528                 :          96 :     s_singleton = this;
    2529                 :          96 :   }
    2530                 :          96 :   ~lexer_diagnostic_sink ()
    2531                 :          96 :   {
    2532                 :          96 :     gcc_assert (s_singleton == this);
    2533                 :          96 :     s_singleton = NULL;
    2534                 :             : 
    2535                 :          96 :     int i;
    2536                 :          96 :     char *str;
    2537                 :         192 :     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
    2538                 :          96 :       free (str);
    2539                 :          96 :   }
    2540                 :             : 
    2541                 :          96 :   void apply (lexer_test &test) final override
    2542                 :             :   {
    2543                 :          96 :     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
    2544                 :          96 :     callbacks->diagnostic = on_diagnostic;
    2545                 :          96 :   }
    2546                 :             : 
    2547                 :          96 :   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
    2548                 :             :                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
    2549                 :             :                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
    2550                 :             :                              rich_location *richloc ATTRIBUTE_UNUSED,
    2551                 :             :                              const char *msgid, va_list *ap)
    2552                 :             :     ATTRIBUTE_FPTR_PRINTF(5,0)
    2553                 :             :   {
    2554                 :          96 :     char *msg = xvasprintf (msgid, *ap);
    2555                 :          96 :     s_singleton->m_diagnostics.safe_push (msg);
    2556                 :          96 :     return true;
    2557                 :             :   }
    2558                 :             : 
    2559                 :             :   auto_vec<char *> m_diagnostics;
    2560                 :             : 
    2561                 :             :  private:
    2562                 :             :   static lexer_diagnostic_sink *s_singleton;
    2563                 :             : };
    2564                 :             : 
    2565                 :             : lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
    2566                 :             : 
    2567                 :             : /* Constructor.  Override line_table with a new instance based on CASE_,
    2568                 :             :    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
    2569                 :             :    start parsing the tempfile.  */
    2570                 :             : 
    2571                 :        2304 : lexer_test::lexer_test (const line_table_case &case_, const char *content,
    2572                 :        2304 :                         lexer_test_options *options)
    2573                 :        2304 : : m_ltt (case_),
    2574                 :        2304 :   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
    2575                 :             :   /* Create a tempfile and write the text to it.  */
    2576                 :        2304 :   m_tempfile (SELFTEST_LOCATION, ".c", content),
    2577                 :        2304 :   m_concats (),
    2578                 :        2304 :   m_implicitly_expect_EOF (true)
    2579                 :             : {
    2580                 :        2304 :   if (options)
    2581                 :         192 :     options->apply (*this);
    2582                 :             : 
    2583                 :        2304 :   cpp_init_iconv (m_parser);
    2584                 :             : 
    2585                 :             :   /* Parse the file.  */
    2586                 :        2304 :   const char *fname = cpp_read_main_file (m_parser,
    2587                 :             :                                           m_tempfile.get_filename ());
    2588                 :        2304 :   ASSERT_NE (fname, NULL);
    2589                 :        2304 : }
    2590                 :             : 
    2591                 :             : /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
    2592                 :             : 
    2593                 :        2304 : lexer_test::~lexer_test ()
    2594                 :             : {
    2595                 :        2304 :   location_t loc;
    2596                 :        2304 :   const cpp_token *tok;
    2597                 :             : 
    2598                 :        2304 :   if (m_implicitly_expect_EOF)
    2599                 :             :     {
    2600                 :        2208 :       tok = cpp_get_token_with_location (m_parser, &loc);
    2601                 :        2208 :       ASSERT_NE (tok, NULL);
    2602                 :        2208 :       ASSERT_EQ (tok->type, CPP_EOF);
    2603                 :             :     }
    2604                 :        2304 : }
    2605                 :             : 
    2606                 :             : /* Get the next token from m_parser.  */
    2607                 :             : 
    2608                 :             : const cpp_token *
    2609                 :        3936 : lexer_test::get_token ()
    2610                 :             : {
    2611                 :        3936 :   location_t loc;
    2612                 :        3936 :   const cpp_token *tok;
    2613                 :             : 
    2614                 :        3936 :   tok = cpp_get_token_with_location (m_parser, &loc);
    2615                 :        3936 :   ASSERT_NE (tok, NULL);
    2616                 :        3936 :   return tok;
    2617                 :             : }
    2618                 :             : 
    2619                 :             : /* Verify that locations within string literals are correctly handled.  */
    2620                 :             : 
    2621                 :             : /* Verify get_source_range_for_substring for token(s) at STRLOC,
    2622                 :             :    using the string concatenation database for TEST.
    2623                 :             : 
    2624                 :             :    Assert that the character at index IDX is on EXPECTED_LINE,
    2625                 :             :    and that it begins at column EXPECTED_START_COL and ends at
    2626                 :             :    EXPECTED_FINISH_COL (unless the locations are beyond
    2627                 :             :    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
    2628                 :             :    columns).  */
    2629                 :             : 
    2630                 :             : static void
    2631                 :       23216 : assert_char_at_range (const location &loc,
    2632                 :             :                       lexer_test& test,
    2633                 :             :                       location_t strloc, enum cpp_ttype type, int idx,
    2634                 :             :                       int expected_line, int expected_start_col,
    2635                 :             :                       int expected_finish_col)
    2636                 :             : {
    2637                 :       23216 :   cpp_reader *pfile = test.m_parser;
    2638                 :       23216 :   string_concat_db *concats = &test.m_concats;
    2639                 :             : 
    2640                 :       23216 :   source_range actual_range = source_range();
    2641                 :       23216 :   const char *err
    2642                 :       23216 :     = get_source_range_for_char (pfile, test.m_file_cache,
    2643                 :             :                                  concats, strloc, type, idx,
    2644                 :             :                                  &actual_range);
    2645                 :       23216 :   if (should_have_column_data_p (strloc))
    2646                 :       18128 :     ASSERT_EQ_AT (loc, NULL, err);
    2647                 :             :   else
    2648                 :             :     {
    2649                 :        5088 :       ASSERT_STREQ_AT (loc,
    2650                 :             :                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
    2651                 :             :                        err);
    2652                 :        5088 :       return;
    2653                 :             :     }
    2654                 :             : 
    2655                 :       18128 :   int actual_start_line = LOCATION_LINE (actual_range.m_start);
    2656                 :       18128 :   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
    2657                 :       18128 :   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
    2658                 :       18128 :   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
    2659                 :             : 
    2660                 :       18128 :   if (should_have_column_data_p (actual_range.m_start))
    2661                 :             :     {
    2662                 :       18128 :       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
    2663                 :       18128 :       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
    2664                 :             :     }
    2665                 :       18128 :   if (should_have_column_data_p (actual_range.m_finish))
    2666                 :             :     {
    2667                 :       18128 :       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
    2668                 :       18128 :       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
    2669                 :             :     }
    2670                 :             : }
    2671                 :             : 
    2672                 :             : /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
    2673                 :             :    the effective location of any errors.  */
    2674                 :             : 
    2675                 :             : #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
    2676                 :             :                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
    2677                 :             :   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
    2678                 :             :                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
    2679                 :             :                         (EXPECTED_FINISH_COL))
    2680                 :             : 
    2681                 :             : /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
    2682                 :             :    using the string concatenation database for TEST.
    2683                 :             : 
    2684                 :             :    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
    2685                 :             : 
    2686                 :             : static void
    2687                 :        1264 : assert_num_substring_ranges (const location &loc,
    2688                 :             :                              lexer_test& test,
    2689                 :             :                              location_t strloc,
    2690                 :             :                              enum cpp_ttype type,
    2691                 :             :                              int expected_num_ranges)
    2692                 :             : {
    2693                 :        1264 :   cpp_reader *pfile = test.m_parser;
    2694                 :        1264 :   string_concat_db *concats = &test.m_concats;
    2695                 :             : 
    2696                 :        1264 :   int actual_num_ranges = -1;
    2697                 :        1264 :   const char *err
    2698                 :        1264 :     = get_num_source_ranges_for_substring (pfile, test.m_file_cache,
    2699                 :             :                                            concats, strloc, type,
    2700                 :             :                                            &actual_num_ranges);
    2701                 :        1264 :   if (should_have_column_data_p (strloc))
    2702                 :         880 :     ASSERT_EQ_AT (loc, NULL, err);
    2703                 :             :   else
    2704                 :             :     {
    2705                 :         384 :       ASSERT_STREQ_AT (loc,
    2706                 :             :                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
    2707                 :             :                        err);
    2708                 :         384 :       return;
    2709                 :             :     }
    2710                 :         880 :   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
    2711                 :             : }
    2712                 :             : 
    2713                 :             : /* Macro for calling assert_num_substring_ranges, supplying
    2714                 :             :    SELFTEST_LOCATION for the effective location of any errors.  */
    2715                 :             : 
    2716                 :             : #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
    2717                 :             :                                     EXPECTED_NUM_RANGES)                \
    2718                 :             :   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
    2719                 :             :                                (TYPE), (EXPECTED_NUM_RANGES))
    2720                 :             : 
    2721                 :             : 
    2722                 :             : /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
    2723                 :             :    returns an error (using the string concatenation database for TEST).  */
    2724                 :             : 
    2725                 :             : static void
    2726                 :         632 : assert_has_no_substring_ranges (const location &loc,
    2727                 :             :                                 lexer_test& test,
    2728                 :             :                                 location_t strloc,
    2729                 :             :                                 enum cpp_ttype type,
    2730                 :             :                                 const char *expected_err)
    2731                 :             : {
    2732                 :         632 :   cpp_reader *pfile = test.m_parser;
    2733                 :         632 :   string_concat_db *concats = &test.m_concats;
    2734                 :         632 :   cpp_substring_ranges ranges;
    2735                 :         632 :   const char *actual_err
    2736                 :         632 :     = get_substring_ranges_for_loc (pfile, test.m_file_cache, concats, strloc,
    2737                 :             :                                     type, ranges);
    2738                 :         632 :   if (should_have_column_data_p (strloc))
    2739                 :         440 :     ASSERT_STREQ_AT (loc, expected_err, actual_err);
    2740                 :             :   else
    2741                 :         192 :     ASSERT_STREQ_AT (loc,
    2742                 :             :                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
    2743                 :             :                      actual_err);
    2744                 :         632 : }
    2745                 :             : 
    2746                 :             : #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
    2747                 :             :     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
    2748                 :             :                                     (STRLOC), (TYPE), (ERR))
    2749                 :             : 
    2750                 :             : /* Lex a simple string literal.  Verify the substring location data, before
    2751                 :             :    and after running cpp_interpret_string on it.  */
    2752                 :             : 
    2753                 :             : static void
    2754                 :          96 : test_lexer_string_locations_simple (const line_table_case &case_)
    2755                 :             : {
    2756                 :             :   /* Digits 0-9 (with 0 at column 10), the simple way.
    2757                 :             :      ....................000000000.11111111112.2222222223333333333
    2758                 :             :      ....................123456789.01234567890.1234567890123456789
    2759                 :             :      We add a trailing comment to ensure that we correctly locate
    2760                 :             :      the end of the string literal token.  */
    2761                 :          96 :   const char *content = "        \"0123456789\" /* not a string */\n";
    2762                 :          96 :   lexer_test test (case_, content, NULL);
    2763                 :             : 
    2764                 :             :   /* Verify that we get the expected token back, with the correct
    2765                 :             :      location information.  */
    2766                 :          96 :   const cpp_token *tok = test.get_token ();
    2767                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2768                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
    2769                 :          96 :   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
    2770                 :             : 
    2771                 :             :   /* At this point in lexing, the quote characters are treated as part of
    2772                 :             :      the string (they are stripped off by cpp_interpret_string).  */
    2773                 :             : 
    2774                 :          96 :   ASSERT_EQ (tok->val.str.len, 12);
    2775                 :             : 
    2776                 :             :   /* Verify that cpp_interpret_string works.  */
    2777                 :          96 :   cpp_string dst_string;
    2778                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    2779                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2780                 :             :                                       &dst_string, type);
    2781                 :          96 :   ASSERT_TRUE (result);
    2782                 :          96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    2783                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    2784                 :             : 
    2785                 :             :   /* Verify ranges of individual characters.  This no longer includes the
    2786                 :             :      opening quote, but does include the closing quote.  */
    2787                 :        1152 :   for (int i = 0; i <= 10; i++)
    2788                 :        1056 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
    2789                 :             :                           10 + i, 10 + i);
    2790                 :             : 
    2791                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
    2792                 :          96 : }
    2793                 :             : 
    2794                 :             : /* As test_lexer_string_locations_simple, but use an EBCDIC execution
    2795                 :             :    encoding.  */
    2796                 :             : 
    2797                 :             : static void
    2798                 :          96 : test_lexer_string_locations_ebcdic (const line_table_case &case_)
    2799                 :             : {
    2800                 :             :   /* EBCDIC support requires iconv.  */
    2801                 :          96 :   if (!HAVE_ICONV)
    2802                 :           0 :     return;
    2803                 :             : 
    2804                 :             :   /* Digits 0-9 (with 0 at column 10), the simple way.
    2805                 :             :      ....................000000000.11111111112.2222222223333333333
    2806                 :             :      ....................123456789.01234567890.1234567890123456789
    2807                 :             :      We add a trailing comment to ensure that we correctly locate
    2808                 :             :      the end of the string literal token.  */
    2809                 :          96 :   const char *content = "        \"0123456789\" /* not a string */\n";
    2810                 :          96 :   ebcdic_execution_charset use_ebcdic;
    2811                 :          96 :   lexer_test test (case_, content, &use_ebcdic);
    2812                 :             : 
    2813                 :             :   /* Verify that we get the expected token back, with the correct
    2814                 :             :      location information.  */
    2815                 :          96 :   const cpp_token *tok = test.get_token ();
    2816                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2817                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
    2818                 :          96 :   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
    2819                 :             : 
    2820                 :             :   /* At this point in lexing, the quote characters are treated as part of
    2821                 :             :      the string (they are stripped off by cpp_interpret_string).  */
    2822                 :             : 
    2823                 :          96 :   ASSERT_EQ (tok->val.str.len, 12);
    2824                 :             : 
    2825                 :             :   /* The remainder of the test requires an iconv implementation that
    2826                 :             :      can convert from UTF-8 to the EBCDIC encoding requested above.  */
    2827                 :          96 :   if (use_ebcdic.iconv_errors_occurred_p ())
    2828                 :           0 :     return;
    2829                 :             : 
    2830                 :             :   /* Verify that cpp_interpret_string works.  */
    2831                 :          96 :   cpp_string dst_string;
    2832                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    2833                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2834                 :             :                                       &dst_string, type);
    2835                 :          96 :   ASSERT_TRUE (result);
    2836                 :             :   /* We should now have EBCDIC-encoded text, specifically
    2837                 :             :      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
    2838                 :             :      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
    2839                 :          96 :   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
    2840                 :             :                 (const char *)dst_string.text);
    2841                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    2842                 :             : 
    2843                 :             :   /* Verify that we don't attempt to record substring location information
    2844                 :             :      for such cases.  */
    2845                 :          96 :   ASSERT_HAS_NO_SUBSTRING_RANGES
    2846                 :             :     (test, tok->src_loc, type,
    2847                 :             :      "execution character set != source character set");
    2848                 :          96 : }
    2849                 :             : 
    2850                 :             : /* Lex a string literal containing a hex-escaped character.
    2851                 :             :    Verify the substring location data, before and after running
    2852                 :             :    cpp_interpret_string on it.  */
    2853                 :             : 
    2854                 :             : static void
    2855                 :          96 : test_lexer_string_locations_hex (const line_table_case &case_)
    2856                 :             : {
    2857                 :             :   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
    2858                 :             :      and with a space in place of digit 6, to terminate the escaped
    2859                 :             :      hex code.
    2860                 :             :      ....................000000000.111111.11112222.
    2861                 :             :      ....................123456789.012345.67890123.  */
    2862                 :          96 :   const char *content = "        \"01234\\x35 789\"\n";
    2863                 :          96 :   lexer_test test (case_, content, NULL);
    2864                 :             : 
    2865                 :             :   /* Verify that we get the expected token back, with the correct
    2866                 :             :      location information.  */
    2867                 :          96 :   const cpp_token *tok = test.get_token ();
    2868                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2869                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
    2870                 :          96 :   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
    2871                 :             : 
    2872                 :             :   /* At this point in lexing, the quote characters are treated as part of
    2873                 :             :      the string (they are stripped off by cpp_interpret_string).  */
    2874                 :          96 :   ASSERT_EQ (tok->val.str.len, 15);
    2875                 :             : 
    2876                 :             :   /* Verify that cpp_interpret_string works.  */
    2877                 :          96 :   cpp_string dst_string;
    2878                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    2879                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2880                 :             :                                       &dst_string, type);
    2881                 :          96 :   ASSERT_TRUE (result);
    2882                 :          96 :   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
    2883                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    2884                 :             : 
    2885                 :             :   /* Verify ranges of individual characters.  This no longer includes the
    2886                 :             :      opening quote, but does include the closing quote.  */
    2887                 :         576 :   for (int i = 0; i <= 4; i++)
    2888                 :         480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    2889                 :          96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
    2890                 :         576 :   for (int i = 6; i <= 10; i++)
    2891                 :         480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
    2892                 :             : 
    2893                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
    2894                 :          96 : }
    2895                 :             : 
    2896                 :             : /* Lex a string literal containing an octal-escaped character.
    2897                 :             :    Verify the substring location data after running cpp_interpret_string
    2898                 :             :    on it.  */
    2899                 :             : 
    2900                 :             : static void
    2901                 :          96 : test_lexer_string_locations_oct (const line_table_case &case_)
    2902                 :             : {
    2903                 :             :   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
    2904                 :             :      and with a space in place of digit 6, to terminate the escaped
    2905                 :             :      octal code.
    2906                 :             :      ....................000000000.111111.11112222.2222223333333333444
    2907                 :             :      ....................123456789.012345.67890123.4567890123456789012  */
    2908                 :          96 :   const char *content = "        \"01234\\065 789\" /* not a string */\n";
    2909                 :          96 :   lexer_test test (case_, content, NULL);
    2910                 :             : 
    2911                 :             :   /* Verify that we get the expected token back, with the correct
    2912                 :             :      location information.  */
    2913                 :          96 :   const cpp_token *tok = test.get_token ();
    2914                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2915                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
    2916                 :             : 
    2917                 :             :   /* Verify that cpp_interpret_string works.  */
    2918                 :          96 :   cpp_string dst_string;
    2919                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    2920                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2921                 :             :                                       &dst_string, type);
    2922                 :          96 :   ASSERT_TRUE (result);
    2923                 :          96 :   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
    2924                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    2925                 :             : 
    2926                 :             :   /* Verify ranges of individual characters.  This no longer includes the
    2927                 :             :      opening quote, but does include the closing quote.  */
    2928                 :         576 :   for (int i = 0; i < 5; i++)
    2929                 :         480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    2930                 :          96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
    2931                 :         576 :   for (int i = 6; i <= 10; i++)
    2932                 :         480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
    2933                 :             : 
    2934                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
    2935                 :          96 : }
    2936                 :             : 
    2937                 :             : /* Test of string literal containing letter escapes.  */
    2938                 :             : 
    2939                 :             : static void
    2940                 :          96 : test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
    2941                 :             : {
    2942                 :             :   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
    2943                 :             :      .....................000000000.1.11111.1.1.11222.22222223333333
    2944                 :             :      .....................123456789.0.12345.6.7.89012.34567890123456.  */
    2945                 :          96 :   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
    2946                 :          96 :   lexer_test test (case_, content, NULL);
    2947                 :             : 
    2948                 :             :   /* Verify that we get the expected tokens back.  */
    2949                 :          96 :   const cpp_token *tok = test.get_token ();
    2950                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2951                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
    2952                 :             : 
    2953                 :             :   /* Verify ranges of individual characters. */
    2954                 :             :   /* "\t".  */
    2955                 :          96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2956                 :             :                         0, 1, 10, 11);
    2957                 :             :   /* "foo". */
    2958                 :         384 :   for (int i = 1; i <= 3; i++)
    2959                 :         288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2960                 :             :                           i, 1, 11 + i, 11 + i);
    2961                 :             :   /* "\\" and "\n".  */
    2962                 :          96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2963                 :             :                         4, 1, 15, 16);
    2964                 :          96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2965                 :             :                         5, 1, 17, 18);
    2966                 :             : 
    2967                 :             :   /* "bar" and closing quote for nul-terminator.  */
    2968                 :         480 :   for (int i = 6; i <= 9; i++)
    2969                 :         384 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2970                 :             :                           i, 1, 13 + i, 13 + i);
    2971                 :             : 
    2972                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
    2973                 :          96 : }
    2974                 :             : 
    2975                 :             : /* Another test of a string literal containing a letter escape.
    2976                 :             :    Based on string seen in
    2977                 :             :      printf ("%-%\n");
    2978                 :             :    in gcc.dg/format/c90-printf-1.c.  */
    2979                 :             : 
    2980                 :             : static void
    2981                 :          96 : test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
    2982                 :             : {
    2983                 :             :   /* .....................000000000.1111.11.1111.22222222223.
    2984                 :             :      .....................123456789.0123.45.6789.01234567890.  */
    2985                 :          96 :   const char *content = ("        \"%-%\\n\" /* non-str */\n");
    2986                 :          96 :   lexer_test test (case_, content, NULL);
    2987                 :             : 
    2988                 :             :   /* Verify that we get the expected tokens back.  */
    2989                 :          96 :   const cpp_token *tok = test.get_token ();
    2990                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2991                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
    2992                 :             : 
    2993                 :             :   /* Verify ranges of individual characters. */
    2994                 :             :   /* "%-%".  */
    2995                 :         384 :   for (int i = 0; i < 3; i++)
    2996                 :         288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2997                 :             :                           i, 1, 10 + i, 10 + i);
    2998                 :             :   /* "\n".  */
    2999                 :          96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    3000                 :             :                         3, 1, 13, 14);
    3001                 :             : 
    3002                 :             :   /* Closing quote for nul-terminator.  */
    3003                 :          96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    3004                 :             :                         4, 1, 15, 15);
    3005                 :             : 
    3006                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
    3007                 :          96 : }
    3008                 :             : 
    3009                 :             : /* Lex a string literal containing UCN 4 characters.
    3010                 :             :    Verify the substring location data after running cpp_interpret_string
    3011                 :             :    on it.  */
    3012                 :             : 
    3013                 :             : static void
    3014                 :          96 : test_lexer_string_locations_ucn4 (const line_table_case &case_)
    3015                 :             : {
    3016                 :             :   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
    3017                 :             :      as UCN 4.
    3018                 :             :      ....................000000000.111111.111122.222222223.33333333344444
    3019                 :             :      ....................123456789.012345.678901.234567890.12345678901234  */
    3020                 :          96 :   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
    3021                 :          96 :   lexer_test test (case_, content, NULL);
    3022                 :             : 
    3023                 :             :   /* Verify that we get the expected token back, with the correct
    3024                 :             :      location information.  */
    3025                 :          96 :   const cpp_token *tok = test.get_token ();
    3026                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    3027                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
    3028                 :             : 
    3029                 :             :   /* Verify that cpp_interpret_string works.
    3030                 :             :      The string should be encoded in the execution character
    3031                 :             :      set.  Assuming that is UTF-8, we should have the following:
    3032                 :             :      -----------  ----  -----  -------  ----------------
    3033                 :             :      Byte offset  Byte  Octal  Unicode  Source Column(s)
    3034                 :             :      -----------  ----  -----  -------  ----------------
    3035                 :             :      0            0x30         '0'      10
    3036                 :             :      1            0x31         '1'      11
    3037                 :             :      2            0x32         '2'      12
    3038                 :             :      3            0x33         '3'      13
    3039                 :             :      4            0x34         '4'      14
    3040                 :             :      5            0xE2  \342   U+2174   15-20
    3041                 :             :      6            0x85  \205    (cont)  15-20
    3042                 :             :      7            0xB4  \264    (cont)  15-20
    3043                 :             :      8            0xE2  \342   U+2175   21-26
    3044                 :             :      9            0x85  \205    (cont)  21-26
    3045                 :             :      10           0xB5  \265    (cont)  21-26
    3046                 :             :      11           0x37         '7'      27
    3047                 :             :      12           0x38         '8'      28
    3048                 :             :      13           0x39         '9'      29
    3049                 :             :      14           0x00                  30 (closing quote)
    3050                 :             :      -----------  ----  -----  -------  ---------------.  */
    3051                 :             : 
    3052                 :          96 :   cpp_string dst_string;
    3053                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    3054                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    3055                 :             :                                       &dst_string, type);
    3056                 :          96 :   ASSERT_TRUE (result);
    3057                 :          96 :   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
    3058                 :             :                 (const char *)dst_string.text);
    3059                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3060                 :             : 
    3061                 :             :   /* Verify ranges of individual characters.  This no longer includes the
    3062                 :             :      opening quote, but does include the closing quote.
    3063                 :             :      '01234'.  */
    3064                 :         576 :   for (int i = 0; i <= 4; i++)
    3065                 :         480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    3066                 :             :   /* U+2174.  */
    3067                 :         384 :   for (int i = 5; i <= 7; i++)
    3068                 :         288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
    3069                 :             :   /* U+2175.  */
    3070                 :         384 :   for (int i = 8; i <= 10; i++)
    3071                 :         288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
    3072                 :             :   /* '789' and nul terminator  */
    3073                 :         480 :   for (int i = 11; i <= 14; i++)
    3074                 :         384 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
    3075                 :             : 
    3076                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
    3077                 :          96 : }
    3078                 :             : 
    3079                 :             : /* Lex a string literal containing UCN 8 characters.
    3080                 :             :    Verify the substring location data after running cpp_interpret_string
    3081                 :             :    on it.  */
    3082                 :             : 
    3083                 :             : static void
    3084                 :          96 : test_lexer_string_locations_ucn8 (const line_table_case &case_)
    3085                 :             : {
    3086                 :             :   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
    3087                 :             :      ....................000000000.111111.1111222222.2222333333333.344444
    3088                 :             :      ....................123456789.012345.6789012345.6789012345678.901234  */
    3089                 :          96 :   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
    3090                 :          96 :   lexer_test test (case_, content, NULL);
    3091                 :             : 
    3092                 :             :   /* Verify that we get the expected token back, with the correct
    3093                 :             :      location information.  */
    3094                 :          96 :   const cpp_token *tok = test.get_token ();
    3095                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    3096                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
    3097                 :             :                            "\"01234\\U00002174\\U00002175789\"");
    3098                 :             : 
    3099                 :             :   /* Verify that cpp_interpret_string works.
    3100                 :             :      The UTF-8 encoding of the string is identical to that from
    3101                 :             :      the ucn4 testcase above; the only difference is the column
    3102                 :             :      locations.  */
    3103                 :          96 :   cpp_string dst_string;
    3104                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    3105                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    3106                 :             :                                       &dst_string, type);
    3107                 :          96 :   ASSERT_TRUE (result);
    3108                 :          96 :   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
    3109                 :             :                 (const char *)dst_string.text);
    3110                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3111                 :             : 
    3112                 :             :   /* Verify ranges of individual characters.  This no longer includes the
    3113                 :             :      opening quote, but does include the closing quote.
    3114                 :             :      '01234'.  */
    3115                 :         576 :   for (int i = 0; i <= 4; i++)
    3116                 :         480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    3117                 :             :   /* U+2174.  */
    3118                 :         384 :   for (int i = 5; i <= 7; i++)
    3119                 :         288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
    3120                 :             :   /* U+2175.  */
    3121                 :         384 :   for (int i = 8; i <= 10; i++)
    3122                 :         288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
    3123                 :             :   /* '789' at columns 35-37  */
    3124                 :         384 :   for (int i = 11; i <= 13; i++)
    3125                 :         288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
    3126                 :             :   /* Closing quote/nul-terminator at column 38.  */
    3127                 :          96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
    3128                 :             : 
    3129                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
    3130                 :          96 : }
    3131                 :             : 
    3132                 :             : /* Fetch a big-endian 32-bit value and convert to host endianness.  */
    3133                 :             : 
    3134                 :             : static uint32_t
    3135                 :         768 : uint32_from_big_endian (const uint32_t *ptr_be_value)
    3136                 :             : {
    3137                 :         768 :   const unsigned char *buf = (const unsigned char *)ptr_be_value;
    3138                 :         768 :   return (((uint32_t) buf[0] << 24)
    3139                 :         768 :           | ((uint32_t) buf[1] << 16)
    3140                 :         768 :           | ((uint32_t) buf[2] << 8)
    3141                 :         768 :           | (uint32_t) buf[3]);
    3142                 :             : }
    3143                 :             : 
    3144                 :             : /* Lex a wide string literal and verify that attempts to read substring
    3145                 :             :    location data from it fail gracefully.  */
    3146                 :             : 
    3147                 :             : static void
    3148                 :          96 : test_lexer_string_locations_wide_string (const line_table_case &case_)
    3149                 :             : {
    3150                 :             :   /* Digits 0-9.
    3151                 :             :      ....................000000000.11111111112.22222222233333
    3152                 :             :      ....................123456789.01234567890.12345678901234  */
    3153                 :          96 :   const char *content = "       L\"0123456789\" /* non-str */\n";
    3154                 :          96 :   lexer_test test (case_, content, NULL);
    3155                 :             : 
    3156                 :             :   /* Verify that we get the expected token back, with the correct
    3157                 :             :      location information.  */
    3158                 :          96 :   const cpp_token *tok = test.get_token ();
    3159                 :          96 :   ASSERT_EQ (tok->type, CPP_WSTRING);
    3160                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
    3161                 :             : 
    3162                 :             :   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
    3163                 :          96 :   cpp_string dst_string;
    3164                 :          96 :   const enum cpp_ttype type = CPP_WSTRING;
    3165                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    3166                 :             :                                       &dst_string, type);
    3167                 :          96 :   ASSERT_TRUE (result);
    3168                 :             :   /* The cpp_reader defaults to big-endian with
    3169                 :             :      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
    3170                 :             :      now be encoded as UTF-32BE.  */
    3171                 :          96 :   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
    3172                 :          96 :   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
    3173                 :          96 :   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
    3174                 :          96 :   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
    3175                 :          96 :   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
    3176                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3177                 :             : 
    3178                 :             :   /* We don't yet support generating substring location information
    3179                 :             :      for L"" strings.  */
    3180                 :          96 :   ASSERT_HAS_NO_SUBSTRING_RANGES
    3181                 :             :     (test, tok->src_loc, type,
    3182                 :             :      "execution character set != source character set");
    3183                 :          96 : }
    3184                 :             : 
    3185                 :             : /* Fetch a big-endian 16-bit value and convert to host endianness.  */
    3186                 :             : 
    3187                 :             : static uint16_t
    3188                 :         384 : uint16_from_big_endian (const uint16_t *ptr_be_value)
    3189                 :             : {
    3190                 :         384 :   const unsigned char *buf = (const unsigned char *)ptr_be_value;
    3191                 :         384 :   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
    3192                 :             : }
    3193                 :             : 
    3194                 :             : /* Lex a u"" string literal and verify that attempts to read substring
    3195                 :             :    location data from it fail gracefully.  */
    3196                 :             : 
    3197                 :             : static void
    3198                 :          96 : test_lexer_string_locations_string16 (const line_table_case &case_)
    3199                 :             : {
    3200                 :             :   /* Digits 0-9.
    3201                 :             :      ....................000000000.11111111112.22222222233333
    3202                 :             :      ....................123456789.01234567890.12345678901234  */
    3203                 :          96 :   const char *content = "       u\"0123456789\" /* non-str */\n";
    3204                 :          96 :   lexer_test test (case_, content, NULL);
    3205                 :             : 
    3206                 :             :   /* Verify that we get the expected token back, with the correct
    3207                 :             :      location information.  */
    3208                 :          96 :   const cpp_token *tok = test.get_token ();
    3209                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING16);
    3210                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
    3211                 :             : 
    3212                 :             :   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
    3213                 :          96 :   cpp_string dst_string;
    3214                 :          96 :   const enum cpp_ttype type = CPP_STRING16;
    3215                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    3216                 :             :                                       &dst_string, type);
    3217                 :          96 :   ASSERT_TRUE (result);
    3218                 :             : 
    3219                 :             :   /* The cpp_reader defaults to big-endian, so dst_string should
    3220                 :             :      now be encoded as UTF-16BE.  */
    3221                 :          96 :   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
    3222                 :          96 :   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
    3223                 :          96 :   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
    3224                 :          96 :   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
    3225                 :          96 :   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
    3226                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3227                 :             : 
    3228                 :             :   /* We don't yet support generating substring location information
    3229                 :             :      for L"" strings.  */
    3230                 :          96 :   ASSERT_HAS_NO_SUBSTRING_RANGES
    3231                 :             :     (test, tok->src_loc, type,
    3232                 :             :      "execution character set != source character set");
    3233                 :          96 : }
    3234                 :             : 
    3235                 :             : /* Lex a U"" string literal and verify that attempts to read substring
    3236                 :             :    location data from it fail gracefully.  */
    3237                 :             : 
    3238                 :             : static void
    3239                 :          96 : test_lexer_string_locations_string32 (const line_table_case &case_)
    3240                 :             : {
    3241                 :             :   /* Digits 0-9.
    3242                 :             :      ....................000000000.11111111112.22222222233333
    3243                 :             :      ....................123456789.01234567890.12345678901234  */
    3244                 :          96 :   const char *content = "       U\"0123456789\" /* non-str */\n";
    3245                 :          96 :   lexer_test test (case_, content, NULL);
    3246                 :             : 
    3247                 :             :   /* Verify that we get the expected token back, with the correct
    3248                 :             :      location information.  */
    3249                 :          96 :   const cpp_token *tok = test.get_token ();
    3250                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING32);
    3251                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
    3252                 :             : 
    3253                 :             :   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
    3254                 :          96 :   cpp_string dst_string;
    3255                 :          96 :   const enum cpp_ttype type = CPP_STRING32;
    3256                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    3257                 :             :                                       &dst_string, type);
    3258                 :          96 :   ASSERT_TRUE (result);
    3259                 :             : 
    3260                 :             :   /* The cpp_reader defaults to big-endian, so dst_string should
    3261                 :             :      now be encoded as UTF-32BE.  */
    3262                 :          96 :   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
    3263                 :          96 :   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
    3264                 :          96 :   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
    3265                 :          96 :   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
    3266                 :          96 :   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
    3267                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3268                 :             : 
    3269                 :             :   /* We don't yet support generating substring location information
    3270                 :             :      for L"" strings.  */
    3271                 :          96 :   ASSERT_HAS_NO_SUBSTRING_RANGES
    3272                 :             :     (test, tok->src_loc, type,
    3273                 :             :      "execution character set != source character set");
    3274                 :          96 : }
    3275                 :             : 
    3276                 :             : /* Lex a u8-string literal.
    3277                 :             :    Verify the substring location data after running cpp_interpret_string
    3278                 :             :    on it.  */
    3279                 :             : 
    3280                 :             : static void
    3281                 :          96 : test_lexer_string_locations_u8 (const line_table_case &case_)
    3282                 :             : {
    3283                 :             :   /* Digits 0-9.
    3284                 :             :      ....................000000000.11111111112.22222222233333
    3285                 :             :      ....................123456789.01234567890.12345678901234  */
    3286                 :          96 :   const char *content = "      u8\"0123456789\" /* non-str */\n";
    3287                 :          96 :   lexer_test test (case_, content, NULL);
    3288                 :             : 
    3289                 :             :   /* Verify that we get the expected token back, with the correct
    3290                 :             :      location information.  */
    3291                 :          96 :   const cpp_token *tok = test.get_token ();
    3292                 :          96 :   ASSERT_EQ (tok->type, CPP_UTF8STRING);
    3293                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
    3294                 :             : 
    3295                 :             :   /* Verify that cpp_interpret_string works.  */
    3296                 :          96 :   cpp_string dst_string;
    3297                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    3298                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    3299                 :             :                                       &dst_string, type);
    3300                 :          96 :   ASSERT_TRUE (result);
    3301                 :          96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    3302                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3303                 :             : 
    3304                 :             :   /* Verify ranges of individual characters.  This no longer includes the
    3305                 :             :      opening quote, but does include the closing quote.  */
    3306                 :        1152 :   for (int i = 0; i <= 10; i++)
    3307                 :        1056 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    3308                 :          96 : }
    3309                 :             : 
    3310                 :             : /* Lex a string literal containing UTF-8 source characters.
    3311                 :             :    Verify the substring location data after running cpp_interpret_string
    3312                 :             :    on it.  */
    3313                 :             : 
    3314                 :             : static void
    3315                 :          96 : test_lexer_string_locations_utf8_source (const line_table_case &case_)
    3316                 :             : {
    3317                 :             :  /* This string literal is written out to the source file as UTF-8,
    3318                 :             :     and is of the form "before mojibake after", where "mojibake"
    3319                 :             :     is written as the following four unicode code points:
    3320                 :             :        U+6587 CJK UNIFIED IDEOGRAPH-6587
    3321                 :             :        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
    3322                 :             :        U+5316 CJK UNIFIED IDEOGRAPH-5316
    3323                 :             :        U+3051 HIRAGANA LETTER KE.
    3324                 :             :      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
    3325                 :             :      "before" and "after" are 1 byte per unicode character.
    3326                 :             : 
    3327                 :             :      The numbering shown are "columns", which are *byte* numbers within
    3328                 :             :      the line, rather than unicode character numbers.
    3329                 :             : 
    3330                 :             :      .................... 000000000.1111111.
    3331                 :             :      .................... 123456789.0123456.  */
    3332                 :          96 :   const char *content = ("        \"before "
    3333                 :             :                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
    3334                 :             :                               UTF-8: 0xE6 0x96 0x87
    3335                 :             :                               C octal escaped UTF-8: \346\226\207
    3336                 :             :                             "column" numbers: 17-19.  */
    3337                 :             :                          "\346\226\207"
    3338                 :             : 
    3339                 :             :                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
    3340                 :             :                               UTF-8: 0xE5 0xAD 0x97
    3341                 :             :                               C octal escaped UTF-8: \345\255\227
    3342                 :             :                             "column" numbers: 20-22.  */
    3343                 :             :                          "\345\255\227"
    3344                 :             : 
    3345                 :             :                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
    3346                 :             :                               UTF-8: 0xE5 0x8C 0x96
    3347                 :             :                               C octal escaped UTF-8: \345\214\226
    3348                 :             :                             "column" numbers: 23-25.  */
    3349                 :             :                          "\345\214\226"
    3350                 :             : 
    3351                 :             :                          /* U+3051 HIRAGANA LETTER KE
    3352                 :             :                               UTF-8: 0xE3 0x81 0x91
    3353                 :             :                               C octal escaped UTF-8: \343\201\221
    3354                 :             :                             "column" numbers: 26-28.  */
    3355                 :             :                          "\343\201\221"
    3356                 :             : 
    3357                 :             :                          /* column numbers 29 onwards
    3358                 :             :                           2333333.33334444444444
    3359                 :             :                           9012345.67890123456789. */
    3360                 :             :                          " after\" /* non-str */\n");
    3361                 :          96 :   lexer_test test (case_, content, NULL);
    3362                 :             : 
    3363                 :             :   /* Verify that we get the expected token back, with the correct
    3364                 :             :      location information.  */
    3365                 :          96 :   const cpp_token *tok = test.get_token ();
    3366                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    3367                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ
    3368                 :             :     (test.m_parser, tok,
    3369                 :             :      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
    3370                 :             : 
    3371                 :             :   /* Verify that cpp_interpret_string works.  */
    3372                 :          96 :   cpp_string dst_string;
    3373                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    3374                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    3375                 :             :                                       &dst_string, type);
    3376                 :          96 :   ASSERT_TRUE (result);
    3377                 :          96 :   ASSERT_STREQ
    3378                 :             :     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
    3379                 :             :      (const char *)dst_string.text);
    3380                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3381                 :             : 
    3382                 :             :   /* Verify ranges of individual characters.  This no longer includes the
    3383                 :             :      opening quote, but does include the closing quote.
    3384                 :             :      Assuming that both source and execution encodings are UTF-8, we have
    3385                 :             :      a run of 25 octets in each, plus the NUL terminator.  */
    3386                 :        2496 :   for (int i = 0; i < 25; i++)
    3387                 :        2400 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    3388                 :             :   /* NUL-terminator should use the closing quote at column 35.  */
    3389                 :          96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
    3390                 :             : 
    3391                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
    3392                 :          96 : }
    3393                 :             : 
    3394                 :             : /* Test of string literal concatenation.  */
    3395                 :             : 
    3396                 :             : static void
    3397                 :          96 : test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
    3398                 :             : {
    3399                 :             :   /* Digits 0-9.
    3400                 :             :      .....................000000000.111111.11112222222222
    3401                 :             :      .....................123456789.012345.67890123456789.  */
    3402                 :          96 :   const char *content = ("        \"01234\" /* non-str */\n"
    3403                 :             :                          "        \"56789\" /* non-str */\n");
    3404                 :          96 :   lexer_test test (case_, content, NULL);
    3405                 :             : 
    3406                 :          96 :   location_t input_locs[2];
    3407                 :             : 
    3408                 :             :   /* Verify that we get the expected tokens back.  */
    3409                 :          96 :   auto_vec <cpp_string> input_strings;
    3410                 :          96 :   const cpp_token *tok_a = test.get_token ();
    3411                 :          96 :   ASSERT_EQ (tok_a->type, CPP_STRING);
    3412                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
    3413                 :          96 :   input_strings.safe_push (tok_a->val.str);
    3414                 :          96 :   input_locs[0] = tok_a->src_loc;
    3415                 :             : 
    3416                 :          96 :   const cpp_token *tok_b = test.get_token ();
    3417                 :          96 :   ASSERT_EQ (tok_b->type, CPP_STRING);
    3418                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
    3419                 :          96 :   input_strings.safe_push (tok_b->val.str);
    3420                 :          96 :   input_locs[1] = tok_b->src_loc;
    3421                 :             : 
    3422                 :             :   /* Verify that cpp_interpret_string works.  */
    3423                 :          96 :   cpp_string dst_string;
    3424                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    3425                 :          96 :   bool result = cpp_interpret_string (test.m_parser,
    3426                 :          96 :                                       input_strings.address (), 2,
    3427                 :             :                                       &dst_string, type);
    3428                 :          96 :   ASSERT_TRUE (result);
    3429                 :          96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    3430                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3431                 :             : 
    3432                 :             :   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
    3433                 :          96 :   test.m_concats.record_string_concatenation (2, input_locs);
    3434                 :             : 
    3435                 :          96 :   location_t initial_loc = input_locs[0];
    3436                 :             : 
    3437                 :             :   /* "01234" on line 1.  */
    3438                 :         576 :   for (int i = 0; i <= 4; i++)
    3439                 :         480 :     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
    3440                 :             :   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
    3441                 :         672 :   for (int i = 5; i <= 10; i++)
    3442                 :         576 :     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
    3443                 :             : 
    3444                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
    3445                 :          96 : }
    3446                 :             : 
    3447                 :             : /* Another test of string literal concatenation.  */
    3448                 :             : 
    3449                 :             : static void
    3450                 :          96 : test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
    3451                 :             : {
    3452                 :             :   /* Digits 0-9.
    3453                 :             :      .....................000000000.111.11111112222222
    3454                 :             :      .....................123456789.012.34567890123456.  */
    3455                 :          96 :   const char *content = ("        \"01\" /* non-str */\n"
    3456                 :             :                          "        \"23\" /* non-str */\n"
    3457                 :             :                          "        \"45\" /* non-str */\n"
    3458                 :             :                          "        \"67\" /* non-str */\n"
    3459                 :             :                          "        \"89\" /* non-str */\n");
    3460                 :          96 :   lexer_test test (case_, content, NULL);
    3461                 :             : 
    3462                 :          96 :   auto_vec <cpp_string> input_strings;
    3463                 :          96 :   location_t input_locs[5];
    3464                 :             : 
    3465                 :             :   /* Verify that we get the expected tokens back.  */
    3466                 :         576 :   for (int i = 0; i < 5; i++)
    3467                 :             :     {
    3468                 :         480 :       const cpp_token *tok = test.get_token ();
    3469                 :         480 :       ASSERT_EQ (tok->type, CPP_STRING);
    3470                 :         480 :       input_strings.safe_push (tok->val.str);
    3471                 :         480 :       input_locs[i] = tok->src_loc;
    3472                 :             :     }
    3473                 :             : 
    3474                 :             :   /* Verify that cpp_interpret_string works.  */
    3475                 :          96 :   cpp_string dst_string;
    3476                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    3477                 :          96 :   bool result = cpp_interpret_string (test.m_parser,
    3478                 :          96 :                                       input_strings.address (), 5,
    3479                 :             :                                       &dst_string, type);
    3480                 :          96 :   ASSERT_TRUE (result);
    3481                 :          96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    3482                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3483                 :             : 
    3484                 :             :   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
    3485                 :          96 :   test.m_concats.record_string_concatenation (5, input_locs);
    3486                 :             : 
    3487                 :          96 :   location_t initial_loc = input_locs[0];
    3488                 :             : 
    3489                 :             :   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
    3490                 :             :      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
    3491                 :             :      and expect get_source_range_for_substring to fail.
    3492                 :             :      However, for a string concatenation test, we can have a case
    3493                 :             :      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
    3494                 :             :      but subsequent strings can be after it.
    3495                 :             :      Attempting to detect this within assert_char_at_range
    3496                 :             :      would overcomplicate the logic for the common test cases, so
    3497                 :             :      we detect it here.  */
    3498                 :          96 :   if (should_have_column_data_p (input_locs[0])
    3499                 :          96 :       && !should_have_column_data_p (input_locs[4]))
    3500                 :             :     {
    3501                 :             :       /* Verify that get_source_range_for_substring gracefully rejects
    3502                 :             :          this case.  */
    3503                 :           8 :       source_range actual_range;
    3504                 :           8 :       const char *err
    3505                 :           8 :         = get_source_range_for_char (test.m_parser, test.m_file_cache,
    3506                 :             :                                      &test.m_concats,
    3507                 :             :                                      initial_loc, type, 0, &actual_range);
    3508                 :           8 :       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
    3509                 :           8 :       return;
    3510                 :             :     }
    3511                 :             : 
    3512                 :         528 :   for (int i = 0; i < 5; i++)
    3513                 :        1320 :     for (int j = 0; j < 2; j++)
    3514                 :         880 :       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
    3515                 :             :                             i + 1, 10 + j, 10 + j);
    3516                 :             : 
    3517                 :             :   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
    3518                 :          88 :   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
    3519                 :             : 
    3520                 :          88 :   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
    3521                 :          96 : }
    3522                 :             : 
    3523                 :             : /* Another test of string literal concatenation, this time combined with
    3524                 :             :    various kinds of escaped characters.  */
    3525                 :             : 
    3526                 :             : static void
    3527                 :          96 : test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
    3528                 :             : {
    3529                 :             :   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
    3530                 :             :      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
    3531                 :          96 :   const char *content
    3532                 :             :     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
    3533                 :             :        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
    3534                 :             :     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
    3535                 :          96 :   lexer_test test (case_, content, NULL);
    3536                 :             : 
    3537                 :          96 :   auto_vec <cpp_string> input_strings;
    3538                 :          96 :   location_t input_locs[4];
    3539                 :             : 
    3540                 :             :   /* Verify that we get the expected tokens back.  */
    3541                 :         480 :   for (int i = 0; i < 4; i++)
    3542                 :             :     {
    3543                 :         384 :       const cpp_token *tok = test.get_token ();
    3544                 :         384 :       ASSERT_EQ (tok->type, CPP_STRING);
    3545                 :         384 :       input_strings.safe_push (tok->val.str);
    3546                 :         384 :       input_locs[i] = tok->src_loc;
    3547                 :             :     }
    3548                 :             : 
    3549                 :             :   /* Verify that cpp_interpret_string works.  */
    3550                 :          96 :   cpp_string dst_string;
    3551                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    3552                 :          96 :   bool result = cpp_interpret_string (test.m_parser,
    3553                 :          96 :                                       input_strings.address (), 4,
    3554                 :             :                                       &dst_string, type);
    3555                 :          96 :   ASSERT_TRUE (result);
    3556                 :          96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    3557                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3558                 :             : 
    3559                 :             :   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
    3560                 :          96 :   test.m_concats.record_string_concatenation (4, input_locs);
    3561                 :             : 
    3562                 :          96 :   location_t initial_loc = input_locs[0];
    3563                 :             : 
    3564                 :         576 :   for (int i = 0; i <= 4; i++)
    3565                 :         480 :     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
    3566                 :          96 :   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
    3567                 :          96 :   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
    3568                 :         384 :   for (int i = 7; i <= 9; i++)
    3569                 :         288 :     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
    3570                 :             : 
    3571                 :             :   /* NUL-terminator should use the location of the final closing quote.  */
    3572                 :          96 :   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
    3573                 :             : 
    3574                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
    3575                 :          96 : }
    3576                 :             : 
    3577                 :             : /* Test of string literal in a macro.  */
    3578                 :             : 
    3579                 :             : static void
    3580                 :          96 : test_lexer_string_locations_macro (const line_table_case &case_)
    3581                 :             : {
    3582                 :             :   /* Digits 0-9.
    3583                 :             :      .....................0000000001111111111.22222222223.
    3584                 :             :      .....................1234567890123456789.01234567890.  */
    3585                 :          96 :   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
    3586                 :             :                          "  MACRO");
    3587                 :          96 :   lexer_test test (case_, content, NULL);
    3588                 :             : 
    3589                 :             :   /* Verify that we get the expected tokens back.  */
    3590                 :          96 :   const cpp_token *tok = test.get_token ();
    3591                 :          96 :   ASSERT_EQ (tok->type, CPP_PADDING);
    3592                 :             : 
    3593                 :          96 :   tok = test.get_token ();
    3594                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    3595                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
    3596                 :             : 
    3597                 :             :   /* Verify ranges of individual characters.  We ought to
    3598                 :             :      see columns within the macro definition.  */
    3599                 :        1152 :   for (int i = 0; i <= 10; i++)
    3600                 :        1056 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    3601                 :             :                           i, 1, 20 + i, 20 + i);
    3602                 :             : 
    3603                 :          96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
    3604                 :             : 
    3605                 :          96 :   tok = test.get_token ();
    3606                 :          96 :   ASSERT_EQ (tok->type, CPP_PADDING);
    3607                 :          96 : }
    3608                 :             : 
    3609                 :             : /* Test of stringification of a macro argument.  */
    3610                 :             : 
    3611                 :             : static void
    3612                 :          96 : test_lexer_string_locations_stringified_macro_argument
    3613                 :             :   (const line_table_case &case_)
    3614                 :             : {
    3615                 :             :   /* .....................000000000111111111122222222223.
    3616                 :             :      .....................123456789012345678901234567890.  */
    3617                 :          96 :   const char *content = ("#define MACRO(X) #X /* non-str */\n"
    3618                 :             :                          "MACRO(foo)\n");
    3619                 :          96 :   lexer_test test (case_, content, NULL);
    3620                 :             : 
    3621                 :             :   /* Verify that we get the expected token back.  */
    3622                 :          96 :   const cpp_token *tok = test.get_token ();
    3623                 :          96 :   ASSERT_EQ (tok->type, CPP_PADDING);
    3624                 :             : 
    3625                 :          96 :   tok = test.get_token ();
    3626                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    3627                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
    3628                 :             : 
    3629                 :             :   /* We don't support getting the location of a stringified macro
    3630                 :             :      argument.  Verify that it fails gracefully.  */
    3631                 :          96 :   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
    3632                 :             :                                   "cpp_interpret_string_1 failed");
    3633                 :             : 
    3634                 :          96 :   tok = test.get_token ();
    3635                 :          96 :   ASSERT_EQ (tok->type, CPP_PADDING);
    3636                 :             : 
    3637                 :          96 :   tok = test.get_token ();
    3638                 :          96 :   ASSERT_EQ (tok->type, CPP_PADDING);
    3639                 :          96 : }
    3640                 :             : 
    3641                 :             : /* Ensure that we are fail gracefully if something attempts to pass
    3642                 :             :    in a location that isn't a string literal token.  Seen on this code:
    3643                 :             : 
    3644                 :             :      const char a[] = " %d ";
    3645                 :             :      __builtin_printf (a, 0.5);
    3646                 :             :                        ^
    3647                 :             : 
    3648                 :             :    when c-format.cc erroneously used the indicated one-character
    3649                 :             :    location as the format string location, leading to a read past the
    3650                 :             :    end of a string buffer in cpp_interpret_string_1.  */
    3651                 :             : 
    3652                 :             : static void
    3653                 :          96 : test_lexer_string_locations_non_string (const line_table_case &case_)
    3654                 :             : {
    3655                 :             :   /* .....................000000000111111111122222222223.
    3656                 :             :      .....................123456789012345678901234567890.  */
    3657                 :          96 :   const char *content = ("         a\n");
    3658                 :          96 :   lexer_test test (case_, content, NULL);
    3659                 :             : 
    3660                 :             :   /* Verify that we get the expected token back.  */
    3661                 :          96 :   const cpp_token *tok = test.get_token ();
    3662                 :          96 :   ASSERT_EQ (tok->type, CPP_NAME);
    3663                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
    3664                 :             : 
    3665                 :             :   /* At this point, libcpp is attempting to interpret the name as a
    3666                 :             :      string literal, despite it not starting with a quote.  We don't detect
    3667                 :             :      that, but we should at least fail gracefully.  */
    3668                 :          96 :   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
    3669                 :             :                                   "cpp_interpret_string_1 failed");
    3670                 :          96 : }
    3671                 :             : 
    3672                 :             : /* Ensure that we can read substring information for a token which
    3673                 :             :    starts in one linemap and ends in another .  Adapted from
    3674                 :             :    gcc.dg/cpp/pr69985.c.  */
    3675                 :             : 
    3676                 :             : static void
    3677                 :          96 : test_lexer_string_locations_long_line (const line_table_case &case_)
    3678                 :             : {
    3679                 :             :   /* .....................000000.000111111111
    3680                 :             :      .....................123456.789012346789.  */
    3681                 :          96 :   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
    3682                 :             :                          "     \"0123456789012345678901234567890123456789"
    3683                 :             :                          "0123456789012345678901234567890123456789"
    3684                 :             :                          "0123456789012345678901234567890123456789"
    3685                 :             :                          "0123456789\"\n");
    3686                 :             : 
    3687                 :          96 :   lexer_test test (case_, content, NULL);
    3688                 :             : 
    3689                 :             :   /* Verify that we get the expected token back.  */
    3690                 :          96 :   const cpp_token *tok = test.get_token ();
    3691                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    3692                 :             : 
    3693                 :          96 :   if (!should_have_column_data_p (line_table->highest_location))
    3694                 :          40 :     return;
    3695                 :             : 
    3696                 :             :   /* Verify ranges of individual characters.  */
    3697                 :          56 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
    3698                 :        7392 :   for (int i = 0; i < 131; i++)
    3699                 :        7336 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    3700                 :             :                           i, 2, 7 + i, 7 + i);
    3701                 :          96 : }
    3702                 :             : 
    3703                 :             : /* Test of locations within a raw string that doesn't contain a newline.  */
    3704                 :             : 
    3705                 :             : static void
    3706                 :          96 : test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
    3707                 :             : {
    3708                 :             :   /* .....................00.0000000111111111122.
    3709                 :             :      .....................12.3456789012345678901.  */
    3710                 :          96 :   const char *content = ("R\"foo(0123456789)foo\"\n");
    3711                 :          96 :   lexer_test test (case_, content, NULL);
    3712                 :             : 
    3713                 :             :   /* Verify that we get the expected token back.  */
    3714                 :          96 :   const cpp_token *tok = test.get_token ();
    3715                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    3716                 :             : 
    3717                 :             :   /* Verify that cpp_interpret_string works.  */
    3718                 :          96 :   cpp_string dst_string;
    3719                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    3720                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    3721                 :             :                                       &dst_string, type);
    3722                 :          96 :   ASSERT_TRUE (result);
    3723                 :          96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    3724                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3725                 :             : 
    3726                 :          96 :   if (!should_have_column_data_p (line_table->highest_location))
    3727                 :          32 :     return;
    3728                 :             : 
    3729                 :             :   /* 0-9, plus the nil terminator.  */
    3730                 :          64 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
    3731                 :         768 :   for (int i = 0; i < 11; i++)
    3732                 :         704 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    3733                 :             :                           i, 1, 7 + i, 7 + i);
    3734                 :          96 : }
    3735                 :             : 
    3736                 :             : /* Test of locations within a raw string that contains a newline.  */
    3737                 :             : 
    3738                 :             : static void
    3739                 :          96 : test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
    3740                 :             : {
    3741                 :             :   /* .....................00.0000.
    3742                 :             :      .....................12.3456.  */
    3743                 :          96 :   const char *content = ("R\"foo(\n"
    3744                 :             :   /* .....................00000.
    3745                 :             :      .....................12345.  */
    3746                 :             :                          "hello\n"
    3747                 :             :                          "world\n"
    3748                 :             :   /* .....................00000.
    3749                 :             :      .....................12345.  */
    3750                 :             :                          ")foo\"\n");
    3751                 :          96 :   lexer_test test (case_, content, NULL);
    3752                 :             : 
    3753                 :             :   /* Verify that we get the expected token back.  */
    3754                 :          96 :   const cpp_token *tok = test.get_token ();
    3755                 :          96 :   ASSERT_EQ (tok->type, CPP_STRING);
    3756                 :             : 
    3757                 :             :   /* Verify that cpp_interpret_string works.  */
    3758                 :          96 :   cpp_string dst_string;
    3759                 :          96 :   const enum cpp_ttype type = CPP_STRING;
    3760                 :          96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    3761                 :             :                                       &dst_string, type);
    3762                 :          96 :   ASSERT_TRUE (result);
    3763                 :          96 :   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
    3764                 :          96 :   free (const_cast <unsigned char *> (dst_string.text));
    3765                 :             : 
    3766                 :          96 :   if (!should_have_column_data_p (line_table->highest_location))
    3767                 :          40 :     return;
    3768                 :             : 
    3769                 :             :   /* Currently we don't support locations within raw strings that
    3770                 :             :      contain newlines.  */
    3771                 :          56 :   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
    3772                 :             :                                   "range endpoints are on different lines");
    3773                 :          96 : }
    3774                 :             : 
    3775                 :             : /* Test of parsing an unterminated raw string.  */
    3776                 :             : 
    3777                 :             : static void
    3778                 :          96 : test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
    3779                 :             : {
    3780                 :          96 :   const char *content = "R\"ouch()ouCh\" /* etc */";
    3781                 :             : 
    3782                 :          96 :   lexer_diagnostic_sink diagnostics;
    3783                 :          96 :   lexer_test test (case_, content, &diagnostics);
    3784                 :          96 :   test.m_implicitly_expect_EOF = false;
    3785                 :             : 
    3786                 :             :   /* Attempt to parse the raw string.  */
    3787                 :          96 :   const cpp_token *tok = test.get_token ();
    3788                 :          96 :   ASSERT_EQ (tok->type, CPP_EOF);
    3789                 :             : 
    3790                 :          96 :   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
    3791                 :             :   /* We expect the message "unterminated raw string"
    3792                 :             :      in the "cpplib" translation domain.
    3793                 :             :      It's not clear that dgettext is available on all supported hosts,
    3794                 :             :      so this assertion is commented-out for now.
    3795                 :             :        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
    3796                 :             :                      diagnostics.m_diagnostics[0]);
    3797                 :             :   */
    3798                 :          96 : }
    3799                 :             : 
    3800                 :             : /* Test of lexing char constants.  */
    3801                 :             : 
    3802                 :             : static void
    3803                 :          96 : test_lexer_char_constants (const line_table_case &case_)
    3804                 :             : {
    3805                 :             :   /* Various char constants.
    3806                 :             :      .....................0000000001111111111.22222222223.
    3807                 :             :      .....................1234567890123456789.01234567890.  */
    3808                 :          96 :   const char *content = ("         'a'\n"
    3809                 :             :                          "        u'a'\n"
    3810                 :             :                          "        U'a'\n"
    3811                 :             :                          "        L'a'\n"
    3812                 :             :                          "         'abc'\n");
    3813                 :          96 :   lexer_test test (case_, content, NULL);
    3814                 :             : 
    3815                 :             :   /* Verify that we get the expected tokens back.  */
    3816                 :             :   /* 'a'.  */
    3817                 :          96 :   const cpp_token *tok = test.get_token ();
    3818                 :          96 :   ASSERT_EQ (tok->type, CPP_CHAR);
    3819                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
    3820                 :             : 
    3821                 :          96 :   unsigned int chars_seen;
    3822                 :          96 :   int unsignedp;
    3823                 :          96 :   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
    3824                 :             :                                           &chars_seen, &unsignedp);
    3825                 :          96 :   ASSERT_EQ (cc, 'a');
    3826                 :          96 :   ASSERT_EQ (chars_seen, 1);
    3827                 :             : 
    3828                 :             :   /* u'a'.  */
    3829                 :          96 :   tok = test.get_token ();
    3830                 :          96 :   ASSERT_EQ (tok->type, CPP_CHAR16);
    3831                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
    3832                 :             : 
    3833                 :             :   /* U'a'.  */
    3834                 :          96 :   tok = test.get_token ();
    3835                 :          96 :   ASSERT_EQ (tok->type, CPP_CHAR32);
    3836                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
    3837                 :             : 
    3838                 :             :   /* L'a'.  */
    3839                 :          96 :   tok = test.get_token ();
    3840                 :          96 :   ASSERT_EQ (tok->type, CPP_WCHAR);
    3841                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
    3842                 :             : 
    3843                 :             :   /* 'abc' (c-char-sequence).  */
    3844                 :          96 :   tok = test.get_token ();
    3845                 :          96 :   ASSERT_EQ (tok->type, CPP_CHAR);
    3846                 :          96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
    3847                 :          96 : }
    3848                 :             : /* A table of interesting location_t values, giving one axis of our test
    3849                 :             :    matrix.  */
    3850                 :             : 
    3851                 :             : static const location_t boundary_locations[] = {
    3852                 :             :   /* Zero means "don't override the default values for a new line_table".  */
    3853                 :             :   0,
    3854                 :             : 
    3855                 :             :   /* An arbitrary non-zero value that isn't close to one of
    3856                 :             :      the boundary values below.  */
    3857                 :             :   0x10000,
    3858                 :             : 
    3859                 :             :   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
    3860                 :             :   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
    3861                 :             :   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
    3862                 :             :   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
    3863                 :             :   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
    3864                 :             :   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
    3865                 :             : 
    3866                 :             :   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
    3867                 :             :   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
    3868                 :             :   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
    3869                 :             :   LINE_MAP_MAX_LOCATION_WITH_COLS,
    3870                 :             :   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
    3871                 :             :   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
    3872                 :             : };
    3873                 :             : 
    3874                 :             : /* Run TESTCASE multiple times, once for each case in our test matrix.  */
    3875                 :             : 
    3876                 :             : void
    3877                 :         236 : for_each_line_table_case (void (*testcase) (const line_table_case &))
    3878                 :             : {
    3879                 :             :   /* As noted above in the description of struct line_table_case,
    3880                 :             :      we want to explore a test matrix of interesting line_table
    3881                 :             :      situations, running various selftests for each case within the
    3882                 :             :      matrix.  */
    3883                 :             : 
    3884                 :             :   /* Run all tests with:
    3885                 :             :      (a) line_table->default_range_bits == 0, and
    3886                 :             :      (b) line_table->default_range_bits == 5.  */
    3887                 :         236 :   int num_cases_tested = 0;
    3888                 :         708 :   for (int default_range_bits = 0; default_range_bits <= 5;
    3889                 :         472 :        default_range_bits += 5)
    3890                 :             :     {
    3891                 :             :       /* ...and use each of the "interesting" location values as
    3892                 :             :          the starting location within line_table.  */
    3893                 :             :       const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
    3894                 :        6136 :       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
    3895                 :             :         {
    3896                 :        5664 :           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
    3897                 :             : 
    3898                 :        5664 :           testcase (c);
    3899                 :             : 
    3900                 :        5664 :           num_cases_tested++;
    3901                 :             :         }
    3902                 :             :     }
    3903                 :             : 
    3904                 :             :   /* Verify that we fully covered the test matrix.  */
    3905                 :         236 :   ASSERT_EQ (num_cases_tested, 2 * 12);
    3906                 :         236 : }
    3907                 :             : 
    3908                 :             : /* Verify that when presented with a consecutive pair of locations with
    3909                 :             :    a very large line offset, we don't attempt to consolidate them into
    3910                 :             :    a single ordinary linemap where the line offsets within the line map
    3911                 :             :    would lead to overflow (PR lto/88147).  */
    3912                 :             : 
    3913                 :             : static void
    3914                 :           4 : test_line_offset_overflow ()
    3915                 :             : {
    3916                 :           4 :   line_table_test ltt (line_table_case (5, 0));
    3917                 :             : 
    3918                 :           4 :   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
    3919                 :           4 :   linemap_line_start (line_table, 1, 100);
    3920                 :           4 :   location_t loc_a = linemap_line_start (line_table, 2578, 255);
    3921                 :           4 :   assert_loceq ("foo.c", 2578, 0, loc_a);
    3922                 :             : 
    3923                 :           4 :   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
    3924                 :           4 :   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
    3925                 :           4 :   ASSERT_EQ (ordmap_a->m_range_bits, 5);
    3926                 :             : 
    3927                 :           4 :   location_t loc_b = linemap_line_start (line_table, 404198, 512);
    3928                 :           4 :   assert_loceq ("foo.c", 404198, 0, loc_b);
    3929                 :             : 
    3930                 :             :   /* We should have started a new linemap, rather than attempting to store
    3931                 :             :      a very large line offset.  */
    3932                 :           4 :   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
    3933                 :           4 :   ASSERT_NE (ordmap_a, ordmap_b);
    3934                 :           4 : }
    3935                 :             : 
    3936                 :           4 : void test_cpp_utf8 ()
    3937                 :             : {
    3938                 :           4 :   const int def_tabstop = 8;
    3939                 :           4 :   cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
    3940                 :             : 
    3941                 :             :   /* Verify that wcwidth of invalid UTF-8 or control bytes is 1.  */
    3942                 :           4 :   {
    3943                 :           4 :     int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
    3944                 :           4 :     ASSERT_EQ (8, w_bad);
    3945                 :           4 :     int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
    3946                 :           4 :     ASSERT_EQ (5, w_ctrl);
    3947                 :             :   }
    3948                 :             : 
    3949                 :             :   /* Verify that wcwidth of valid UTF-8 is as expected.  */
    3950                 :           4 :   {
    3951                 :           4 :     const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
    3952                 :           4 :     ASSERT_EQ (1, w_pi);
    3953                 :           4 :     const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
    3954                 :           4 :     ASSERT_EQ (2, w_emoji);
    3955                 :           4 :     const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
    3956                 :             :                                                         policy);
    3957                 :           4 :     ASSERT_EQ (1, w_umlaut_precomposed);
    3958                 :           4 :     const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
    3959                 :             :                                                       policy);
    3960                 :           4 :     ASSERT_EQ (1, w_umlaut_combining);
    3961                 :           4 :     const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
    3962                 :           4 :     ASSERT_EQ (2, w_han);
    3963                 :           4 :     const int w_ascii = cpp_display_width ("GCC", 3, policy);
    3964                 :           4 :     ASSERT_EQ (3, w_ascii);
    3965                 :           4 :     const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
    3966                 :             :                                            "\x9f! \xe4\xb8\xba y\xcc\x88",
    3967                 :             :                                            24, policy);
    3968                 :           4 :     ASSERT_EQ (18, w_mixed);
    3969                 :             :   }
    3970                 :             : 
    3971                 :             :   /* Verify that display width properly expands tabs.  */
    3972                 :           4 :   {
    3973                 :           4 :     const char *tstr = "\tabc\td";
    3974                 :           4 :     ASSERT_EQ (6, cpp_display_width (tstr, 6,
    3975                 :             :                                      cpp_char_column_policy (1, cpp_wcwidth)));
    3976                 :           4 :     ASSERT_EQ (10, cpp_display_width (tstr, 6,
    3977                 :             :                                       cpp_char_column_policy (3, cpp_wcwidth)));
    3978                 :           4 :     ASSERT_EQ (17, cpp_display_width (tstr, 6,
    3979                 :             :                                       cpp_char_column_policy (8, cpp_wcwidth)));
    3980                 :           4 :     ASSERT_EQ (1,
    3981                 :             :                cpp_display_column_to_byte_column
    3982                 :             :                  (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
    3983                 :             :   }
    3984                 :             : 
    3985                 :             :   /* Verify that cpp_byte_column_to_display_column can go past the end,
    3986                 :             :      and similar edge cases.  */
    3987                 :           4 :   {
    3988                 :           4 :     const char *str
    3989                 :             :       /* Display columns.
    3990                 :             :          111111112345  */
    3991                 :             :       = "\xcf\x80 abc";
    3992                 :             :       /* 111122223456
    3993                 :             :          Byte columns.  */
    3994                 :             : 
    3995                 :           4 :     ASSERT_EQ (5, cpp_display_width (str, 6, policy));
    3996                 :           4 :     ASSERT_EQ (105,
    3997                 :             :                cpp_byte_column_to_display_column (str, 6, 106, policy));
    3998                 :           4 :     ASSERT_EQ (10000,
    3999                 :             :                cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
    4000                 :           4 :     ASSERT_EQ (0,
    4001                 :             :                cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
    4002                 :             :   }
    4003                 :             : 
    4004                 :             :   /* Verify that cpp_display_column_to_byte_column can go past the end,
    4005                 :             :      and similar edge cases, and check invertibility.  */
    4006                 :           4 :   {
    4007                 :           4 :     const char *str
    4008                 :             :       /* Display columns.
    4009                 :             :          000000000000000000000000000000000000011
    4010                 :             :          111111112222222234444444455555555678901  */
    4011                 :             :       = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
    4012                 :             :       /* 000000000000000000000000000000000111111
    4013                 :             :          111122223333444456666777788889999012345
    4014                 :             :          Byte columns.  */
    4015                 :           4 :     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
    4016                 :           4 :     ASSERT_EQ (15,
    4017                 :             :                cpp_display_column_to_byte_column (str, 15, 11, policy));
    4018                 :           4 :     ASSERT_EQ (115,
    4019                 :             :                cpp_display_column_to_byte_column (str, 15, 111, policy));
    4020                 :           4 :     ASSERT_EQ (10000,
    4021                 :             :                cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
    4022                 :           4 :     ASSERT_EQ (0,
    4023                 :             :                cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
    4024                 :             : 
    4025                 :             :     /* Verify that we do not interrupt a UTF-8 sequence.  */
    4026                 :           4 :     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
    4027                 :             : 
    4028                 :          64 :     for (int byte_col = 1; byte_col <= 15; ++byte_col)
    4029                 :             :       {
    4030                 :          60 :         const int disp_col
    4031                 :          60 :           = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
    4032                 :          60 :         const int byte_col2
    4033                 :          60 :           = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
    4034                 :             : 
    4035                 :             :         /* If we ask for the display column in the middle of a UTF-8
    4036                 :             :            sequence, it will return the length of the partial sequence,
    4037                 :             :            matching the behavior of GCC before display column support.
    4038                 :             :            Otherwise check the round trip was successful.  */
    4039                 :          60 :         if (byte_col < 4)
    4040                 :          12 :           ASSERT_EQ (byte_col, disp_col);
    4041                 :          48 :         else if (byte_col >= 6 && byte_col < 9)
    4042                 :          12 :           ASSERT_EQ (3 + (byte_col - 5), disp_col);
    4043                 :             :         else
    4044                 :          60 :           ASSERT_EQ (byte_col2, byte_col);
    4045                 :             :       }
    4046                 :             :   }
    4047                 :           4 : }
    4048                 :             : 
    4049                 :             : static bool
    4050                 :          36 : check_cpp_valid_utf8_p (const char *str)
    4051                 :             : {
    4052                 :          36 :   return cpp_valid_utf8_p (str, strlen (str));
    4053                 :             : }
    4054                 :             : 
    4055                 :             : /* Check that cpp_valid_utf8_p works as expected.  */
    4056                 :             : 
    4057                 :             : static void
    4058                 :           4 : test_cpp_valid_utf8_p ()
    4059                 :             : {
    4060                 :           4 :   ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
    4061                 :             : 
    4062                 :             :   /* 2-byte char (pi).  */
    4063                 :           4 :   ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
    4064                 :             : 
    4065                 :             :   /* 3-byte chars (the Japanese word "mojibake").  */
    4066                 :           4 :   ASSERT_TRUE (check_cpp_valid_utf8_p
    4067                 :             :                (
    4068                 :             :                 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
    4069                 :             :                    UTF-8: 0xE6 0x96 0x87
    4070                 :             :                    C octal escaped UTF-8: \346\226\207.  */
    4071                 :             :                 "\346\226\207"
    4072                 :             :                 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
    4073                 :             :                    UTF-8: 0xE5 0xAD 0x97
    4074                 :             :                    C octal escaped UTF-8: \345\255\227.  */
    4075                 :             :                 "\345\255\227"
    4076                 :             :                 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
    4077                 :             :                    UTF-8: 0xE5 0x8C 0x96
    4078                 :             :                    C octal escaped UTF-8: \345\214\226.  */
    4079                 :             :                 "\345\214\226"
    4080                 :             :                 /* U+3051 HIRAGANA LETTER KE
    4081                 :             :                    UTF-8: 0xE3 0x81 0x91
    4082                 :             :                    C octal escaped UTF-8: \343\201\221.  */
    4083                 :             :                 "\343\201\221"));
    4084                 :             : 
    4085                 :             :   /* 4-byte char: an emoji.  */
    4086                 :           4 :   ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
    4087                 :             : 
    4088                 :             :   /* Control codes, including the NUL byte.  */
    4089                 :           4 :   ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
    4090                 :             : 
    4091                 :           4 :   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
    4092                 :             : 
    4093                 :             :   /* Unexpected continuation bytes.  */
    4094                 :           4 :   for (unsigned char continuation_byte = 0x80;
    4095                 :         260 :        continuation_byte <= 0xbf;
    4096                 :             :        continuation_byte++)
    4097                 :         256 :     ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1));
    4098                 :             : 
    4099                 :             :   /* "Lonely start characters" for 2-byte sequences.  */
    4100                 :           4 :   {
    4101                 :           4 :     unsigned char buf[2];
    4102                 :           4 :     buf[1] = ' ';
    4103                 :           4 :     for (buf[0] = 0xc0;
    4104                 :         132 :          buf[0] <= 0xdf;
    4105                 :         128 :          buf[0]++)
    4106                 :         128 :       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
    4107                 :             :   }
    4108                 :             : 
    4109                 :             :   /* "Lonely start characters" for 3-byte sequences.  */
    4110                 :           4 :   {
    4111                 :           4 :     unsigned char buf[2];
    4112                 :           4 :     buf[1] = ' ';
    4113                 :           4 :     for (buf[0] = 0xe0;
    4114                 :          68 :          buf[0] <= 0xef;
    4115                 :          64 :          buf[0]++)
    4116                 :          64 :       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
    4117                 :             :   }
    4118                 :             : 
    4119                 :             :   /* "Lonely start characters" for 4-byte sequences.  */
    4120                 :           4 :   {
    4121                 :           4 :     unsigned char buf[2];
    4122                 :           4 :     buf[1] = ' ';
    4123                 :           4 :     for (buf[0] = 0xf0;
    4124                 :          24 :          buf[0] <= 0xf4;
    4125                 :          20 :          buf[0]++)
    4126                 :          20 :       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
    4127                 :             :   }
    4128                 :             : 
    4129                 :             :   /* Invalid start characters (formerly valid for 5-byte and 6-byte
    4130                 :             :      sequences).  */
    4131                 :           4 :   {
    4132                 :           4 :     unsigned char buf[2];
    4133                 :           4 :     buf[1] = ' ';
    4134                 :           4 :     for (buf[0] = 0xf5;
    4135                 :          40 :          buf[0] <= 0xfd;
    4136                 :          36 :          buf[0]++)
    4137                 :          36 :       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
    4138                 :             :   }
    4139                 :             : 
    4140                 :             :   /* Impossible bytes.  */
    4141                 :           4 :   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
    4142                 :           4 :   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
    4143                 :           4 :   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
    4144                 :           4 :   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
    4145                 :           4 : }
    4146                 :             : 
    4147                 :             : /* Run all of the selftests within this file.  */
    4148                 :             : 
    4149                 :             : void
    4150                 :           4 : input_cc_tests ()
    4151                 :             : {
    4152                 :           4 :   test_linenum_comparisons ();
    4153                 :           4 :   test_should_have_column_data_p ();
    4154                 :           4 :   test_unknown_location ();
    4155                 :           4 :   test_builtins ();
    4156                 :           4 :   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
    4157                 :             : 
    4158                 :           4 :   for_each_line_table_case (test_accessing_ordinary_linemaps);
    4159                 :           4 :   for_each_line_table_case (test_lexer);
    4160                 :           4 :   for_each_line_table_case (test_lexer_string_locations_simple);
    4161                 :           4 :   for_each_line_table_case (test_lexer_string_locations_ebcdic);
    4162                 :           4 :   for_each_line_table_case (test_lexer_string_locations_hex);
    4163                 :           4 :   for_each_line_table_case (test_lexer_string_locations_oct);
    4164                 :           4 :   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
    4165                 :           4 :   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
    4166                 :           4 :   for_each_line_table_case (test_lexer_string_locations_ucn4);
    4167                 :           4 :   for_each_line_table_case (test_lexer_string_locations_ucn8);
    4168                 :           4 :   for_each_line_table_case (test_lexer_string_locations_wide_string);
    4169                 :           4 :   for_each_line_table_case (test_lexer_string_locations_string16);
    4170                 :           4 :   for_each_line_table_case (test_lexer_string_locations_string32);
    4171                 :           4 :   for_each_line_table_case (test_lexer_string_locations_u8);
    4172                 :           4 :   for_each_line_table_case (test_lexer_string_locations_utf8_source);
    4173                 :           4 :   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
    4174                 :           4 :   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
    4175                 :           4 :   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
    4176                 :           4 :   for_each_line_table_case (test_lexer_string_locations_macro);
    4177                 :           4 :   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
    4178                 :           4 :   for_each_line_table_case (test_lexer_string_locations_non_string);
    4179                 :           4 :   for_each_line_table_case (test_lexer_string_locations_long_line);
    4180                 :           4 :   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
    4181                 :           4 :   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
    4182                 :           4 :   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
    4183                 :           4 :   for_each_line_table_case (test_lexer_char_constants);
    4184                 :             : 
    4185                 :           4 :   test_reading_source_line ();
    4186                 :             : 
    4187                 :           4 :   test_line_offset_overflow ();
    4188                 :             : 
    4189                 :           4 :   test_cpp_utf8 ();
    4190                 :           4 :   test_cpp_valid_utf8_p ();
    4191                 :           4 : }
    4192                 :             : 
    4193                 :             : } // namespace selftest
    4194                 :             : 
    4195                 :             : #endif /* CHECKING_P */
        

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.