LCOV - code coverage report
Current view: top level - gcc - input.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 94.3 % 1354 1277
Test Date: 2026-02-28 14:20:25 Functions: 97.9 % 94 92
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Data and functions related to line maps and input files.
       2              :    Copyright (C) 2004-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it under
       7              : the terms of the GNU General Public License as published by the Free
       8              : Software Foundation; either version 3, or (at your option) any later
       9              : version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "intl.h"
      24              : #include "diagnostic.h"
      25              : #include "diagnostics/file-cache.h"
      26              : #include "selftest.h"
      27              : #include "cpplib.h"
      28              : 
      29              : #ifndef HAVE_ICONV
      30              : #define HAVE_ICONV 0
      31              : #endif
      32              : 
      33              : const char *
      34      6865121 : special_fname_builtin ()
      35              : {
      36      6865121 :   return _("<built-in>");
      37              : }
      38              : 
      39              : /* Current position in real source file.  */
      40              : 
      41              : location_t input_location = UNKNOWN_LOCATION;
      42              : 
      43              : class line_maps *line_table;
      44              : 
      45              : /* A stashed copy of "line_table" for use by selftest::line_table_test.
      46              :    This needs to be a global so that it can be a GC root, and thus
      47              :    prevent the stashed copy from being garbage-collected if the GC runs
      48              :    during a line_table_test.  */
      49              : 
      50              : class line_maps *saved_line_table;
      51              : 
      52              : /* Expand the source location LOC into a human readable location.  If
      53              :    LOC resolves to a builtin location, the file name of the readable
      54              :    location is set to the string "<built-in>". If EXPANSION_POINT_P is
      55              :    TRUE and LOC is virtual, then it is resolved to the expansion
      56              :    point of the involved macro.  Otherwise, it is resolved to the
      57              :    spelling location of the token.
      58              : 
      59              :    When resolving to the spelling location of the token, if the
      60              :    resulting location is for a built-in location (that is, it has no
      61              :    associated line/column) in the context of a macro expansion, the
      62              :    returned location is the first one (while unwinding the macro
      63              :    location towards its expansion point) that is in real source
      64              :    code.
      65              : 
      66              :    ASPECT controls which part of the location to use.  */
      67              : 
      68              : static expanded_location
      69    964438857 : expand_location_1 (const line_maps *set,
      70              :                    location_t loc,
      71              :                    bool expansion_point_p,
      72              :                    enum location_aspect aspect)
      73              : {
      74    964438857 :   expanded_location xloc;
      75    964438857 :   const line_map_ordinary *map;
      76    964438857 :   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
      77    964438857 :   tree block = NULL;
      78              : 
      79    964438857 :   if (IS_ADHOC_LOC (loc))
      80              :     {
      81    265118978 :       block = LOCATION_BLOCK (loc);
      82    265118978 :       loc = LOCATION_LOCUS (loc);
      83              :     }
      84              : 
      85    964438857 :   memset (&xloc, 0, sizeof (xloc));
      86              : 
      87    964438857 :   if (loc >= RESERVED_LOCATION_COUNT)
      88              :     {
      89    907979855 :       if (!expansion_point_p)
      90              :         {
      91              :           /* We want to resolve LOC to its spelling location.
      92              : 
      93              :              But if that spelling location is a reserved location that
      94              :              appears in the context of a macro expansion (like for a
      95              :              location for a built-in token), let's consider the first
      96              :              location (toward the expansion point) that is not reserved;
      97              :              that is, the first location that is in real source code.  */
      98      2198310 :           loc = linemap_unwind_to_first_non_reserved_loc (set,
      99              :                                                           loc, NULL);
     100      2198310 :           lrk = LRK_SPELLING_LOCATION;
     101              :         }
     102    907979855 :       loc = linemap_resolve_location (set, loc, lrk, &map);
     103              : 
     104              :       /* loc is now either in an ordinary map, or is a reserved location.
     105              :          If it is a compound location, the caret is in a spelling location,
     106              :          but the start/finish might still be a virtual location.
     107              :          Depending of what the caller asked for, we may need to recurse
     108              :          one level in order to resolve any virtual locations in the
     109              :          end-points.  */
     110    907979855 :       switch (aspect)
     111              :         {
     112            0 :         default:
     113            0 :           gcc_unreachable ();
     114              :           /* Fall through.  */
     115              :         case location_aspect::caret:
     116              :           break;
     117       428211 :         case location_aspect::start:
     118       428211 :           {
     119       428211 :             location_t start = get_start (loc);
     120       428211 :             if (start != loc)
     121         1179 :               return expand_location_1 (set, start, expansion_point_p, aspect);
     122              :           }
     123              :           break;
     124       100378 :         case location_aspect::finish:
     125       100378 :           {
     126       100378 :             location_t finish = get_finish (loc);
     127       100378 :             if (finish != loc)
     128         1133 :               return expand_location_1 (set, finish, expansion_point_p, aspect);
     129              :           }
     130              :           break;
     131              :         }
     132    907977543 :       xloc = linemap_expand_location (set, map, loc);
     133              :     }
     134              : 
     135    964436545 :   xloc.data = block;
     136    964436545 :   if (loc <= BUILTINS_LOCATION)
     137     56459002 :     xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
     138              : 
     139    964436545 :   return xloc;
     140              : }
     141              : 
     142              : /* Return a NUL-terminated copy of the source text between two locations, or
     143              :    NULL if the arguments are invalid.  The caller is responsible for freeing
     144              :    the return value.  */
     145              : 
     146              : char *
     147          996 : get_source_text_between (diagnostics::file_cache &fc,
     148              :                          location_t start, location_t end)
     149              : {
     150          996 :   expanded_location expstart
     151          996 :     = expand_location_to_spelling_point (start, location_aspect::start);
     152          996 :   expanded_location expend
     153          996 :     = expand_location_to_spelling_point (end, location_aspect::finish);
     154              : 
     155              :   /* If the locations are in different files or the end comes before the
     156              :      start, give up and return nothing.  */
     157          996 :   if (!expstart.file || !expend.file)
     158              :     return NULL;
     159          995 :   if (strcmp (expstart.file, expend.file) != 0)
     160              :     return NULL;
     161          995 :   if (expstart.line > expend.line)
     162              :     return NULL;
     163          995 :   if (expstart.line == expend.line
     164          993 :       && expstart.column > expend.column)
     165              :     return NULL;
     166              :   /* These aren't real column numbers, give up.  */
     167          995 :   if (expstart.column == 0 || expend.column == 0)
     168              :     return NULL;
     169              : 
     170              :   /* For a single line we need to trim both edges.  */
     171          995 :   if (expstart.line == expend.line)
     172              :     {
     173          993 :       diagnostics::char_span line
     174          993 :         = fc.get_source_line (expstart.file, expstart.line);
     175          993 :       if (line.length () < 1)
     176              :         return NULL;
     177          993 :       int s = expstart.column - 1;
     178          993 :       int len = expend.column - s;
     179          993 :       if (line.length () < (size_t)expend.column)
     180              :         return NULL;
     181          993 :       return line.subspan (s, len).xstrdup ();
     182              :     }
     183              : 
     184            2 :   struct obstack buf_obstack;
     185            2 :   obstack_init (&buf_obstack);
     186              : 
     187              :   /* Loop through all lines in the range and append each to buf; may trim
     188              :      parts of the start and end lines off depending on column values.  */
     189           22 :   for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
     190              :     {
     191           20 :       diagnostics::char_span line = fc.get_source_line (expstart.file, lnum);
     192           20 :       if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line))
     193            0 :         continue;
     194              : 
     195              :       /* For the first line in the range, only start at expstart.column */
     196           20 :       if (lnum == expstart.line)
     197              :         {
     198            2 :           unsigned off = expstart.column - 1;
     199            2 :           if (line.length () < off)
     200            0 :             return NULL;
     201            2 :           line = line.subspan (off, line.length() - off);
     202              :         }
     203              :       /* For the last line, don't go past expend.column */
     204           18 :       else if (lnum == expend.line)
     205              :         {
     206            2 :           if (line.length () < (size_t)expend.column)
     207              :             return NULL;
     208            2 :           line = line.subspan (0, expend.column);
     209              :         }
     210              : 
     211              :       /* Combine spaces at the beginning of later lines.  */
     212           20 :       if (lnum > expstart.line)
     213              :         {
     214              :           unsigned off;
     215          230 :           for (off = 0; off < line.length(); ++off)
     216          230 :             if (line[off] != ' ' && line[off] != '\t')
     217              :               break;
     218           18 :           if (off > 0)
     219              :             {
     220           18 :               obstack_1grow (&buf_obstack, ' ');
     221           18 :               line = line.subspan (off, line.length() - off);
     222              :             }
     223              :         }
     224              : 
     225              :       /* This does not include any trailing newlines.  */
     226           20 :       obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
     227              :     }
     228              : 
     229              :   /* NUL-terminate and finish the buf obstack.  */
     230            2 :   obstack_1grow (&buf_obstack, 0);
     231            2 :   const char *buf = (const char *) obstack_finish (&buf_obstack);
     232              : 
     233            2 :   return xstrdup (buf);
     234              : }
     235              : 
     236              : /* Test if the location originates from the spelling location of a
     237              :    builtin-tokens.  That is, return TRUE if LOC is a (possibly
     238              :    virtual) location of a built-in token that appears in the expansion
     239              :    list of a macro.  Please note that this function also works on
     240              :    tokens that result from built-in tokens.  For instance, the
     241              :    function would return true if passed a token "4" that is the result
     242              :    of the expansion of the built-in __LINE__ macro.  */
     243              : bool
     244        13259 : is_location_from_builtin_token (location_t loc)
     245              : {
     246        13259 :   const line_map_ordinary *map = NULL;
     247        13259 :   loc = linemap_resolve_location (line_table, loc,
     248              :                                   LRK_SPELLING_LOCATION, &map);
     249        13259 :   return loc == BUILTINS_LOCATION;
     250              : }
     251              : 
     252              : /* Expand the source location LOC into a human readable location.  If
     253              :    LOC is virtual, it resolves to the expansion point of the involved
     254              :    macro.  If LOC resolves to a builtin location, the file name of the
     255              :    readable location is set to the string "<built-in>".  */
     256              : 
     257              : expanded_location
     258    962236236 : expand_location (location_t loc)
     259              : {
     260    962236236 :   return expand_location_1 (line_table, loc, /*expansion_point_p=*/true,
     261    962236236 :                             location_aspect::caret);
     262              : }
     263              : 
     264              : /* Expand the source location LOC into a human readable location.  If
     265              :    LOC is virtual, it resolves to the expansion location of the
     266              :    relevant macro.  If LOC resolves to a builtin location, the file
     267              :    name of the readable location is set to the string
     268              :    "<built-in>".  */
     269              : 
     270              : expanded_location
     271        84301 : expand_location_to_spelling_point (location_t loc,
     272              :                                    enum location_aspect aspect)
     273              : {
     274        84301 :   return expand_location_1 (line_table, loc, /*expansion_point_p=*/false,
     275        84301 :                             aspect);
     276              : }
     277              : 
     278              : /* The rich_location class within libcpp requires a way to expand
     279              :    location_t instances, and relies on the client code
     280              :    providing a symbol named
     281              :      linemap_client_expand_location_to_spelling_point
     282              :    to do this.
     283              : 
     284              :    This is the implementation for libcommon.a (all host binaries),
     285              :    which simply calls into expand_location_1.  */
     286              : 
     287              : expanded_location
     288      2116008 : linemap_client_expand_location_to_spelling_point (const line_maps *set,
     289              :                                                   location_t loc,
     290              :                                                   enum location_aspect aspect)
     291              : {
     292      2116008 :   return expand_location_1 (set, loc, /*expansion_point_p=*/false, aspect);
     293              : }
     294              : 
     295              : 
     296              : /* If LOCATION is in a system header and if it is a virtual location
     297              :    for a token coming from the expansion of a macro, unwind it to
     298              :    the location of the expansion point of the macro.  If the expansion
     299              :    point is also in a system header return the original LOCATION.
     300              :    Otherwise, return the location of the expansion point.
     301              : 
     302              :    This is used for instance when we want to emit diagnostics about a
     303              :    token that may be located in a macro that is itself defined in a
     304              :    system header, for example, for the NULL macro.  In such a case, if
     305              :    LOCATION were passed directly to diagnostic functions such as
     306              :    warning_at, the diagnostic would be suppressed (unless
     307              :    -Wsystem-headers).  */
     308              : 
     309              : location_t
     310    501676296 : expansion_point_location_if_in_system_header (location_t location)
     311              : {
     312    501676296 :   if (!in_system_header_at (location))
     313              :     return location;
     314              : 
     315    371966894 :   location_t xloc = linemap_resolve_location (line_table, location,
     316              :                                               LRK_MACRO_EXPANSION_POINT,
     317              :                                               NULL);
     318    371966894 :   return in_system_header_at (xloc) ? location : xloc;
     319              : }
     320              : 
     321              : /* If LOCATION is a virtual location for a token coming from the expansion
     322              :    of a macro, unwind to the location of the expansion point of the macro.  */
     323              : 
     324              : location_t
     325          197 : expansion_point_location (location_t location)
     326              : {
     327          197 :   return linemap_resolve_location (line_table, location,
     328          197 :                                    LRK_MACRO_EXPANSION_POINT, NULL);
     329              : }
     330              : 
     331              : /* Construct a location with caret at CARET, ranging from START to
     332              :    FINISH.
     333              : 
     334              :    For example, consider:
     335              : 
     336              :                  11111111112
     337              :         12345678901234567890
     338              :      522
     339              :      523   return foo + bar;
     340              :                   ~~~~^~~~~
     341              :      524
     342              : 
     343              :    The location's caret is at the "+", line 523 column 15, but starts
     344              :    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
     345              :    of "bar" at column 19.  */
     346              : 
     347              : location_t
     348   2669691412 : make_location (location_t caret, location_t start, location_t finish)
     349              : {
     350   2669691412 :   return line_table->make_location (caret, start, finish);
     351              : }
     352              : 
     353              : /* Same as above, but taking a source range rather than two locations.  */
     354              : 
     355              : location_t
     356   1879040930 : make_location (location_t caret, source_range src_range)
     357              : {
     358   1879040930 :   location_t pure_loc = get_pure_location (caret);
     359   1879040930 :   return line_table->get_or_create_combined_loc (pure_loc, src_range,
     360   1879040930 :                                                  nullptr, 0);
     361              : }
     362              : 
     363              : /* An expanded_location stores the column in byte units.  This function
     364              :    converts that column to display units.  That requires reading the associated
     365              :    source line in order to calculate the display width.  If that cannot be done
     366              :    for any reason, then returns the byte column as a fallback.  */
     367              : int
     368       748683 : location_compute_display_column (diagnostics::file_cache &fc,
     369              :                                  expanded_location exploc,
     370              :                                  const cpp_char_column_policy &policy)
     371              : {
     372       748683 :   if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
     373              :     return exploc.column;
     374       713162 :   diagnostics::char_span line = fc.get_source_line (exploc.file, exploc.line);
     375              :   /* If line is NULL, this function returns exploc.column which is the
     376              :      desired fallback.  */
     377       713162 :   return cpp_byte_column_to_display_column (line.get_buffer (), line.length (),
     378       713162 :                                             exploc.column, policy);
     379              : }
     380              : 
     381              : /* Dump statistics to stderr about the memory usage of the line_table
     382              :    set of line maps.  This also displays some statistics about macro
     383              :    expansion.  */
     384              : 
     385              : void
     386            0 : dump_line_table_statistics (void)
     387              : {
     388            0 :   struct linemap_stats s;
     389            0 :   long total_used_map_size,
     390              :     macro_maps_size,
     391              :     total_allocated_map_size;
     392              : 
     393            0 :   memset (&s, 0, sizeof (s));
     394              : 
     395            0 :   linemap_get_statistics (line_table, &s);
     396              : 
     397            0 :   macro_maps_size = s.macro_maps_used_size
     398            0 :     + s.macro_maps_locations_size;
     399              : 
     400            0 :   total_allocated_map_size = s.ordinary_maps_allocated_size
     401            0 :     + s.macro_maps_allocated_size
     402              :     + s.macro_maps_locations_size;
     403              : 
     404            0 :   total_used_map_size = s.ordinary_maps_used_size
     405            0 :     + s.macro_maps_used_size
     406              :     + s.macro_maps_locations_size;
     407              : 
     408            0 :   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
     409              :            s.num_expanded_macros);
     410            0 :   if (s.num_expanded_macros != 0)
     411            0 :     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
     412            0 :              s.num_macro_tokens / s.num_expanded_macros);
     413            0 :   fprintf (stderr,
     414              :            "\nLine Table allocations during the "
     415              :            "compilation process\n");
     416            0 :   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
     417            0 :            SIZE_AMOUNT (s.num_ordinary_maps_used));
     418            0 :   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
     419            0 :            SIZE_AMOUNT (s.ordinary_maps_used_size));
     420            0 :   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
     421            0 :            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
     422            0 :   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
     423            0 :            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
     424            0 :   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
     425            0 :            SIZE_AMOUNT (s.num_macro_maps_used));
     426            0 :   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
     427            0 :            SIZE_AMOUNT (s.macro_maps_used_size));
     428            0 :   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
     429            0 :            SIZE_AMOUNT (s.macro_maps_locations_size));
     430            0 :   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
     431            0 :            SIZE_AMOUNT (macro_maps_size));
     432            0 :   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
     433            0 :            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
     434            0 :   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
     435            0 :            SIZE_AMOUNT (total_allocated_map_size));
     436            0 :   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
     437            0 :            SIZE_AMOUNT (total_used_map_size));
     438            0 :   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
     439            0 :            SIZE_AMOUNT (s.adhoc_table_size));
     440            0 :   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
     441            0 :            SIZE_AMOUNT (s.adhoc_table_entries_used));
     442            0 :   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
     443            0 :            SIZE_AMOUNT (line_table->m_num_optimized_ranges));
     444            0 :   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
     445            0 :            SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
     446              : 
     447            0 :   fprintf (stderr, "\n");
     448            0 : }
     449              : 
     450              : /* Get location one beyond the final location in ordinary map IDX.  */
     451              : 
     452              : static location_t
     453            6 : get_end_location (class line_maps *set, line_map_uint_t idx)
     454              : {
     455            6 :   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
     456            1 :     return set->highest_location;
     457              : 
     458            5 :   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
     459            5 :   return MAP_START_LOCATION (next_map);
     460              : }
     461              : 
     462              : /* Helper function for write_digit_row.  */
     463              : 
     464              : static void
     465        11500 : write_digit (FILE *stream, int digit)
     466              : {
     467            0 :   fputc ('0' + digit, stream);
     468            0 : }
     469              : 
     470              : /* Helper function for dump_location_info.
     471              :    Write a row of numbers to STREAM, numbering a source line,
     472              :    giving the units, tens, hundreds etc of the column number.  */
     473              : 
     474              : static void
     475          296 : write_digit_row (FILE *stream, int indent,
     476              :                  const line_map_ordinary *map,
     477              :                  location_t loc, int max_col, int divisor)
     478              : {
     479          296 :   fprintf (stream, "%*c", indent, ' ');
     480          296 :   fprintf (stream, "|");
     481        11796 :   for (int column = 1; column < max_col; column++)
     482              :     {
     483        11500 :       location_t column_loc = loc + (location_t (column) << map->m_range_bits);
     484        11500 :       write_digit (stream, (column_loc / divisor) % 10);
     485              :     }
     486          296 :   fprintf (stream, "\n");
     487          296 : }
     488              : 
     489              : /* Write a half-closed (START) / half-open (END) interval of
     490              :    location_t to STREAM.  */
     491              : 
     492              : static void
     493           12 : dump_location_range (FILE *stream,
     494              :                      location_t start, location_t end)
     495              : {
     496            6 :   fprintf (stream,
     497              :            "  location_t interval: %llu <= loc < %llu\n",
     498              :            (unsigned long long) start, (unsigned long long) end);
     499            0 : }
     500              : 
     501              : /* Write a labelled description of a half-closed (START) / half-open (END)
     502              :    interval of location_t to STREAM.  */
     503              : 
     504              : static void
     505            4 : dump_labelled_location_range (FILE *stream,
     506              :                               const char *name,
     507              :                               location_t start, location_t end)
     508              : {
     509            4 :   fprintf (stream, "%s\n", name);
     510            4 :   dump_location_range (stream, start, end);
     511            4 :   fprintf (stream, "\n");
     512            4 : }
     513              : 
     514              : /* Write a visualization of the locations in the line_table to STREAM.  */
     515              : 
     516              : void
     517            1 : dump_location_info (FILE *stream)
     518              : {
     519            1 :   diagnostics::file_cache fc;
     520              : 
     521              :   /* Visualize the reserved locations.  */
     522            1 :   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
     523              :                                 0, RESERVED_LOCATION_COUNT);
     524              : 
     525            1 :   using ULL = unsigned long long;
     526              : 
     527              :   /* Visualize the ordinary line_map instances, rendering the sources. */
     528            7 :   for (line_map_uint_t idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table);
     529              :        idx++)
     530              :     {
     531            6 :       location_t end_location = get_end_location (line_table, idx);
     532              :       /* half-closed: doesn't include this one. */
     533              : 
     534            6 :       const line_map_ordinary *map
     535            6 :         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
     536            6 :       fprintf (stream, "ORDINARY MAP: %llu\n", (ULL) idx);
     537            6 :       dump_location_range (stream,
     538              :                            MAP_START_LOCATION (map), end_location);
     539            6 :       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
     540            6 :       fprintf (stream, "  starting at line: %i\n",
     541              :                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
     542            6 :       fprintf (stream, "  column and range bits: %i\n",
     543            6 :                map->m_column_and_range_bits);
     544            6 :       fprintf (stream, "  column bits: %i\n",
     545            6 :                map->m_column_and_range_bits - map->m_range_bits);
     546            6 :       fprintf (stream, "  range bits: %i\n",
     547            6 :                map->m_range_bits);
     548            6 :       const char * reason;
     549            6 :       switch (map->reason) {
     550              :       case LC_ENTER:
     551              :         reason = "LC_ENTER";
     552              :         break;
     553            1 :       case LC_LEAVE:
     554            1 :         reason = "LC_LEAVE";
     555            1 :         break;
     556            3 :       case LC_RENAME:
     557            3 :         reason = "LC_RENAME";
     558            3 :         break;
     559            0 :       case LC_RENAME_VERBATIM:
     560            0 :         reason = "LC_RENAME_VERBATIM";
     561            0 :         break;
     562            0 :       case LC_ENTER_MACRO:
     563            0 :         reason = "LC_RENAME_MACRO";
     564            0 :         break;
     565            0 :       default:
     566            0 :         reason = "Unknown";
     567              :       }
     568            6 :       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
     569              : 
     570            6 :       const line_map_ordinary *includer_map
     571            6 :         = linemap_included_from_linemap (line_table, map);
     572            6 :       fprintf (stream, "  included from location: %llu",
     573            6 :                (ULL) linemap_included_from (map));
     574            6 :       if (includer_map) {
     575            1 :         fprintf (stream, " (in ordinary map %llu)",
     576            1 :                  ULL (includer_map - line_table->info_ordinary.maps));
     577              :       }
     578            6 :       fprintf (stream, "\n");
     579              : 
     580              :       /* Render the span of source lines that this "map" covers.  */
     581            6 :       for (location_t loc = MAP_START_LOCATION (map);
     582         9358 :            loc < end_location;
     583         9352 :            loc += (location_t (1) << map->m_range_bits))
     584              :         {
     585         9356 :           gcc_assert (pure_location_p (line_table, loc) );
     586              : 
     587         9356 :           expanded_location exploc
     588         9356 :             = linemap_expand_location (line_table, map, loc);
     589              : 
     590         9356 :           if (exploc.column == 0)
     591              :             {
     592              :               /* Beginning of a new source line: draw the line.  */
     593              : 
     594           78 :               diagnostics::char_span line_text
     595           78 :                 = fc.get_source_line (exploc.file, exploc.line);
     596           78 :               if (!line_text)
     597              :                 break;
     598           74 :               fprintf (stream,
     599              :                        "%s:%3i|loc:%5llu|%.*s\n",
     600              :                        exploc.file, exploc.line,
     601              :                        (ULL) loc,
     602           74 :                        (int)line_text.length (), line_text.get_buffer ());
     603              : 
     604              :               /* "loc" is at column 0, which means "the whole line".
     605              :                  Render the locations *within* the line, by underlining
     606              :                  it, showing the location_t numeric values
     607              :                  at each column.  */
     608           74 :               auto max_col = (ULL (1) << map->m_column_and_range_bits) - 1;
     609           74 :               if (max_col > line_text.length ())
     610           74 :                 max_col = line_text.length () + 1;
     611              : 
     612           74 :               int len_lnum = diagnostics::num_digits (exploc.line);
     613           74 :               if (len_lnum < 3)
     614              :                 len_lnum = 3;
     615           74 :               int len_loc = diagnostics::num_digits (loc);
     616           74 :               if (len_loc < 5)
     617              :                 len_loc = 5;
     618              : 
     619           74 :               int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
     620              : 
     621              :               /* Thousands.  */
     622           74 :               if (end_location > 999)
     623           74 :                 write_digit_row (stream, indent, map, loc, max_col, 1000);
     624              : 
     625              :               /* Hundreds.  */
     626           74 :               if (end_location > 99)
     627           74 :                 write_digit_row (stream, indent, map, loc, max_col, 100);
     628              : 
     629              :               /* Tens.  */
     630           74 :               write_digit_row (stream, indent, map, loc, max_col, 10);
     631              : 
     632              :               /* Units.  */
     633           74 :               write_digit_row (stream, indent, map, loc, max_col, 1);
     634              :             }
     635              :         }
     636            6 :       fprintf (stream, "\n");
     637              :     }
     638              : 
     639              :   /* Visualize unallocated values.  */
     640            1 :   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
     641              :                                 line_table->highest_location,
     642              :                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
     643              : 
     644              :   /* Visualize the macro line_map instances, rendering the sources. */
     645            3 :   for (line_map_uint_t i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
     646              :     {
     647              :       /* Each macro map that is allocated owns location_t values
     648              :          that are *lower* that the one before them.
     649              :          Hence it's meaningful to view them either in order of ascending
     650              :          source locations, or in order of ascending macro map index.  */
     651            2 :       const bool ascending_location_ts = true;
     652            2 :       auto idx = (ascending_location_ts
     653            2 :                   ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
     654            2 :                   : i);
     655            2 :       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
     656            2 :       fprintf (stream, "MACRO %llu: %s (%u tokens)\n",
     657              :                (ULL) idx,
     658              :                linemap_map_get_macro_name (map),
     659              :                MACRO_MAP_NUM_MACRO_TOKENS (map));
     660            4 :       dump_location_range (stream,
     661            2 :                            map->start_location,
     662            2 :                            (map->start_location
     663            2 :                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
     664            2 :       inform (map->get_expansion_point_location (),
     665              :               "expansion point is location %llu",
     666            2 :               (ULL) map->get_expansion_point_location ());
     667            2 :       fprintf (stream, "  map->start_location: %llu\n",
     668            2 :                (ULL) map->start_location);
     669              : 
     670            2 :       fprintf (stream, "  macro_locations:\n");
     671            4 :       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
     672              :         {
     673            2 :           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
     674            2 :           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
     675              : 
     676              :           /* linemap_add_macro_token encodes token numbers in an expansion
     677              :              by putting them after MAP_START_LOCATION. */
     678              : 
     679              :           /* I'm typically seeing 4 uninitialized entries at the end of
     680              :              0xafafafaf.
     681              :              This appears to be due to macro.cc:replace_args
     682              :              adding 2 extra args for padding tokens; presumably there may
     683              :              be a leading and/or trailing padding token injected,
     684              :              each for 2 more location slots.
     685              :              This would explain there being up to 4 location_ts slots
     686              :              that may be uninitialized.  */
     687              : 
     688            2 :           fprintf (stream, "    %u: %llu, %llu\n",
     689              :                    i,
     690              :                    (ULL) x,
     691              :                    (ULL) y);
     692            2 :           if (x == y)
     693              :             {
     694            2 :               if (x < MAP_START_LOCATION (map))
     695            2 :                 inform (x, "token %u has %<x-location == y-location == %llu%>",
     696              :                         i, (ULL) x);
     697              :               else
     698            0 :                 fprintf (stream,
     699              :                          "x-location == y-location == %llu"
     700              :                          " encodes token # %u\n",
     701              :                          (ULL) x,
     702            0 :                          (unsigned int)(x - MAP_START_LOCATION (map)));
     703              :             }
     704              :           else
     705              :             {
     706            0 :               inform (x, "token %u has %<x-location == %llu%>", i, (ULL) x);
     707            0 :               inform (x, "token %u has %<y-location == %llu%>", i, (ULL) y);
     708              :             }
     709              :         }
     710            2 :       fprintf (stream, "\n");
     711              :     }
     712              : 
     713              :   /* It appears that MAX_LOCATION_T itself is never assigned to a
     714              :      macro map, presumably due to an off-by-one error somewhere
     715              :      between the logic in linemap_enter_macro and
     716              :      LINEMAPS_MACRO_LOWEST_LOCATION.  */
     717            1 :   dump_labelled_location_range (stream, "MAX_LOCATION_T",
     718              :                                 MAX_LOCATION_T,
     719              :                                 MAX_LOCATION_T + 1);
     720              : 
     721              :   /* Visualize ad-hoc values.  */
     722            1 :   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
     723              :                                 MAX_LOCATION_T + 1, location_t (-1));
     724            1 : }
     725              : 
     726              : /* string_concat's constructor.  */
     727              : 
     728      3168362 : string_concat::string_concat (int num, location_t *locs)
     729      3168362 :   : m_num (num)
     730              : {
     731      3168362 :   m_locs = ggc_vec_alloc <location_t> (num);
     732     37162920 :   for (int i = 0; i < num; i++)
     733     33994558 :     m_locs[i] = locs[i];
     734      3168362 : }
     735              : 
     736              : /* string_concat_db's constructor.  */
     737              : 
     738       210762 : string_concat_db::string_concat_db ()
     739              : {
     740       210762 :   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
     741       210762 : }
     742              : 
     743              : /* Record that a string concatenation occurred, covering NUM
     744              :    string literal tokens.  LOCS is an array of size NUM, containing the
     745              :    locations of the tokens.  A copy of LOCS is taken.  */
     746              : 
     747              : void
     748      3168368 : string_concat_db::record_string_concatenation (int num, location_t *locs)
     749              : {
     750      3168368 :   gcc_assert (num > 1);
     751      3168368 :   gcc_assert (locs);
     752              : 
     753      3168368 :   location_t key_loc = get_key_loc (locs[0]);
     754              :   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:
     755              :      any data now recorded under key 'key_loc' would be overwritten by a
     756              :      subsequent call with the same key 'key_loc'.  */
     757      3168368 :   if (RESERVED_LOCATION_P (key_loc))
     758            6 :     return;
     759              : 
     760      3168362 :   string_concat *concat
     761      3168362 :     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
     762      3168362 :   m_table->put (key_loc, concat);
     763              : }
     764              : 
     765              : /* Determine if LOC was the location of the initial token of a
     766              :    concatenation of string literal tokens.
     767              :    If so, *OUT_NUM is written to with the number of tokens, and
     768              :    *OUT_LOCS with the location of an array of locations of the
     769              :    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
     770              :    storage owned by the string_concat_db.
     771              :    Otherwise, return false.  */
     772              : 
     773              : bool
     774        34529 : string_concat_db::get_string_concatenation (location_t loc,
     775              :                                             int *out_num,
     776              :                                             location_t **out_locs)
     777              : {
     778        34529 :   gcc_assert (out_num);
     779        34529 :   gcc_assert (out_locs);
     780              : 
     781        34529 :   location_t key_loc = get_key_loc (loc);
     782              :   /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see
     783              :      discussion in 'string_concat_db::record_string_concatenation'.  */
     784        34529 :   if (RESERVED_LOCATION_P (key_loc))
     785              :     return false;
     786              : 
     787        34527 :   string_concat **concat = m_table->get (key_loc);
     788        34527 :   if (!concat)
     789              :     return false;
     790              : 
     791         4352 :   *out_num = (*concat)->m_num;
     792         4352 :   *out_locs =(*concat)->m_locs;
     793         4352 :   return true;
     794              : }
     795              : 
     796              : /* Internal function.  Canonicalize LOC into a form suitable for
     797              :    use as a key within the database, stripping away macro expansion,
     798              :    ad-hoc information, and range information, using the location of
     799              :    the start of LOC within an ordinary linemap.  */
     800              : 
     801              : location_t
     802      3202897 : string_concat_db::get_key_loc (location_t loc)
     803              : {
     804      3202897 :   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
     805              :                                   NULL);
     806              : 
     807      3202897 :   loc = get_range_from_loc (line_table, loc).m_start;
     808              : 
     809      3202897 :   return loc;
     810              : }
     811              : 
     812              : /* Helper class for use within get_substring_ranges_for_loc.
     813              :    An vec of cpp_string with responsibility for releasing all of the
     814              :    str->text for each str in the vector.  */
     815              : 
     816              : class auto_cpp_string_vec :  public auto_vec <cpp_string>
     817              : {
     818              :  public:
     819        34529 :   auto_cpp_string_vec (int alloc)
     820        69058 :     : auto_vec <cpp_string> (alloc) {}
     821              : 
     822        34529 :   ~auto_cpp_string_vec ()
     823              :   {
     824              :     /* Clean up the copies within this vec.  */
     825        34529 :     int i;
     826        34529 :     cpp_string *str;
     827        70016 :     FOR_EACH_VEC_ELT (*this, i, str)
     828        35487 :       free (const_cast <unsigned char *> (str->text));
     829        34529 :   }
     830              : };
     831              : 
     832              : /* Attempt to populate RANGES with source location information on the
     833              :    individual characters within the string literal found at STRLOC.
     834              :    If CONCATS is non-NULL, then any string literals that the token at
     835              :    STRLOC  was concatenated with are also added to RANGES.
     836              : 
     837              :    Return NULL if successful, or an error message if any errors occurred (in
     838              :    which case RANGES may be only partially populated and should not
     839              :    be used).
     840              : 
     841              :    This is implemented by re-parsing the relevant source line(s).  */
     842              : 
     843              : static const char *
     844        36775 : get_substring_ranges_for_loc (cpp_reader *pfile,
     845              :                               diagnostics::file_cache &fc,
     846              :                               string_concat_db *concats,
     847              :                               location_t strloc,
     848              :                               enum cpp_ttype type,
     849              :                               cpp_substring_ranges &ranges)
     850              : {
     851        36775 :   gcc_assert (pfile);
     852              : 
     853        36775 :   if (strloc == UNKNOWN_LOCATION)
     854              :     return "unknown location";
     855              : 
     856              :   /* Reparsing the strings requires accurate location information.
     857              :      If -ftrack-macro-expansion has been overridden from its default
     858              :      of 2, then we might have a location of a macro expansion point,
     859              :      rather than the location of the literal itself.
     860              :      Avoid this by requiring that we have full macro expansion tracking
     861              :      for substring locations to be available.  */
     862        36775 :   if (cpp_get_options (pfile)->track_macro_expansion != 2)
     863              :     return "track_macro_expansion != 2";
     864              : 
     865              :   /* If #line or # 44 "file"-style directives are present, then there's
     866              :      no guarantee that the line numbers we have can be used to locate
     867              :      the strings.  For example, we might have a .i file with # directives
     868              :      pointing back to lines within a .c file, but the .c file might
     869              :      have been edited since the .i file was created.
     870              :      In such a case, the safest course is to disable on-demand substring
     871              :      locations.  */
     872        34532 :   if (line_table->seen_line_directive)
     873              :     return "seen line directive";
     874              : 
     875              :   /* If string concatenation has occurred at STRLOC, get the locations
     876              :      of all of the literal tokens making up the compound string.
     877              :      Otherwise, just use STRLOC.  */
     878        34529 :   int num_locs = 1;
     879        34529 :   location_t *strlocs = &strloc;
     880        34529 :   if (concats)
     881        34529 :     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
     882              : 
     883        34529 :   auto_cpp_string_vec strs (num_locs);
     884        34529 :   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
     885        70009 :   for (int i = 0; i < num_locs; i++)
     886              :     {
     887              :       /* Get range of strloc.  We will use it to locate the start and finish
     888              :          of the literal token within the line.  */
     889        41574 :       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
     890              : 
     891        41574 :       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
     892              :         {
     893              :           /* If the string token was within a macro expansion, then we can
     894              :              cope with it for the simple case where we have a single token.
     895              :              Otherwise, bail out.  */
     896         1171 :           if (src_range.m_start != src_range.m_finish)
     897         6094 :             return "macro expansion";
     898              :         }
     899              :       else
     900              :         {
     901        40403 :           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
     902              :             /* If so, we can't reliably determine where the token started within
     903              :                its line.  */
     904              :             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
     905              : 
     906        34731 :           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
     907              :             /* If so, we can't reliably determine where the token finished
     908              :                within its line.  */
     909              :             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
     910              :         }
     911              : 
     912        35731 :       expanded_location start
     913        35731 :         = expand_location_to_spelling_point (src_range.m_start,
     914              :                                              location_aspect::start);
     915        35731 :       expanded_location finish
     916        35731 :         = expand_location_to_spelling_point (src_range.m_finish,
     917              :                                              location_aspect::finish);
     918        35731 :       if (start.file != finish.file)
     919              :         return "range endpoints are in different files";
     920        35731 :       if (start.line != finish.line)
     921              :         return "range endpoints are on different lines";
     922        35488 :       if (start.column > finish.column)
     923              :         return "range endpoints are reversed";
     924              : 
     925        35488 :       diagnostics::char_span line = fc.get_source_line (start.file, start.line);
     926        35488 :       if (!line)
     927              :         return "unable to read source line";
     928              : 
     929              :       /* Determine the location of the literal (including quotes
     930              :          and leading prefix chars, such as the 'u' in a u""
     931              :          token).  */
     932        35488 :       size_t literal_length = finish.column - start.column + 1;
     933              : 
     934              :       /* Ensure that we don't crash if we got the wrong location.  */
     935        35488 :       if (start.column < 1)
     936              :         return "zero start column";
     937        35488 :       if (line.length () < (start.column - 1 + literal_length))
     938              :         return "line is not wide enough";
     939              : 
     940        35487 :       diagnostics::char_span literal
     941        35487 :         = line.subspan (start.column - 1, literal_length);
     942              : 
     943        35487 :       cpp_string from;
     944        35487 :       from.len = literal_length;
     945              :       /* Make a copy of the literal, to avoid having to rely on
     946              :          the lifetime of the copy of the line within the cache.
     947              :          This will be released by the auto_cpp_string_vec dtor.  */
     948        35487 :       from.text = (unsigned char *)literal.xstrdup ();
     949        35487 :       strs.safe_push (from);
     950              : 
     951              :       /* For very long lines, a new linemap could have started
     952              :          halfway through the token.
     953              :          Ensure that the loc_reader uses the linemap of the
     954              :          *end* of the token for its start location.  */
     955        35487 :       const line_map_ordinary *start_ord_map;
     956        35487 :       linemap_resolve_location (line_table, src_range.m_start,
     957              :                                 LRK_SPELLING_LOCATION, &start_ord_map);
     958        35487 :       const line_map_ordinary *final_ord_map;
     959        35487 :       linemap_resolve_location (line_table, src_range.m_finish,
     960              :                                 LRK_SPELLING_LOCATION, &final_ord_map);
     961        35487 :       if (start_ord_map == NULL || final_ord_map == NULL)
     962              :         return "failed to get ordinary maps";
     963              :       /* Bulletproofing.  We ought to only have different ordinary maps
     964              :          for start vs finish due to line-length jumps.  */
     965        35486 :       if (start_ord_map != final_ord_map
     966         6865 :           && start_ord_map->to_file != final_ord_map->to_file)
     967              :         return "start and finish are spelled in different ordinary maps";
     968              :       /* The file from linemap_resolve_location ought to match that from
     969              :          expand_location_to_spelling_point.  */
     970        35486 :       if (start_ord_map->to_file != start.file)
     971              :         return "mismatching file after resolving linemap";
     972              : 
     973        35480 :       location_t start_loc
     974        35480 :         = linemap_position_for_line_and_column (line_table, final_ord_map,
     975              :                                                 start.line, start.column);
     976              : 
     977        35480 :       cpp_string_location_reader loc_reader (start_loc, line_table);
     978        35480 :       loc_readers.safe_push (loc_reader);
     979              :     }
     980              : 
     981              :   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
     982        56870 :   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
     983              :                                                  loc_readers.address (),
     984              :                                                  num_locs, &ranges, type);
     985        28435 :   if (err)
     986              :     return err;
     987              : 
     988              :   /* Success: "ranges" should now contain information on the string.  */
     989              :   return NULL;
     990        34529 : }
     991              : 
     992              : /* Attempt to populate *OUT_LOC with source location information on the
     993              :    given characters within the string literal found at STRLOC.
     994              :    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
     995              :    character set.
     996              : 
     997              :    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
     998              :    and string literal "012345\n789"
     999              :    *OUT_LOC is written to with:
    1000              :      "012345\n789"
    1001              :          ~^~~~~
    1002              : 
    1003              :    If CONCATS is non-NULL, then any string literals that the token at
    1004              :    STRLOC was concatenated with are also considered.
    1005              : 
    1006              :    This is implemented by re-parsing the relevant source line(s).
    1007              : 
    1008              :    Return NULL if successful, or an error message if any errors occurred.
    1009              :    Error messages are intended for GCC developers (to help debugging) rather
    1010              :    than for end-users.  */
    1011              : 
    1012              : const char *
    1013        11123 : get_location_within_string (cpp_reader *pfile,
    1014              :                             diagnostics::file_cache &fc,
    1015              :                             string_concat_db *concats,
    1016              :                             location_t strloc,
    1017              :                             enum cpp_ttype type,
    1018              :                             int caret_idx, int start_idx, int end_idx,
    1019              :                             location_t *out_loc)
    1020              : {
    1021        11123 :   gcc_checking_assert (caret_idx >= 0);
    1022        11123 :   gcc_checking_assert (start_idx >= 0);
    1023        11123 :   gcc_checking_assert (end_idx >= 0);
    1024        11123 :   gcc_assert (out_loc);
    1025              : 
    1026        11123 :   cpp_substring_ranges ranges;
    1027        11123 :   const char *err
    1028        11123 :     = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
    1029        11123 :   if (err)
    1030              :     return err;
    1031              : 
    1032         8414 :   if (caret_idx >= ranges.get_num_ranges ())
    1033              :     return "caret_idx out of range";
    1034         8414 :   if (start_idx >= ranges.get_num_ranges ())
    1035              :     return "start_idx out of range";
    1036         8414 :   if (end_idx >= ranges.get_num_ranges ())
    1037              :     return "end_idx out of range";
    1038              : 
    1039         8414 :   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
    1040         8414 :                             ranges.get_range (start_idx).m_start,
    1041         8414 :                             ranges.get_range (end_idx).m_finish);
    1042         8414 :   return NULL;
    1043        11123 : }
    1044              : 
    1045              : /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */
    1046              : 
    1047              : location_t
    1048     56196149 : location_with_discriminator (location_t locus, int discriminator)
    1049              : {
    1050     56196149 :   tree block = LOCATION_BLOCK (locus);
    1051     56196149 :   source_range src_range = get_range_from_loc (line_table, locus);
    1052     56196149 :   locus = get_pure_location (locus);
    1053              : 
    1054     56196149 :   if (locus == UNKNOWN_LOCATION)
    1055              :     return locus;
    1056              : 
    1057     31932914 :   return line_table->get_or_create_combined_loc (locus, src_range, block,
    1058     31932914 :                                                  discriminator);
    1059              : }
    1060              : 
    1061              : /* Return TRUE if LOCUS represents a location with a discriminator.  */
    1062              : 
    1063              : bool
    1064     77877468 : has_discriminator (location_t locus)
    1065              : {
    1066     77877468 :   return get_discriminator_from_loc (locus) != 0;
    1067              : }
    1068              : 
    1069              : /* Return the discriminator for LOCUS.  */
    1070              : 
    1071              : int
    1072    404179215 : get_discriminator_from_loc (location_t locus)
    1073              : {
    1074    404179215 :   return get_discriminator_from_loc (line_table, locus);
    1075              : }
    1076              : 
    1077              : /* Create a location with hierarchical discriminator components.  */
    1078              : 
    1079              : location_t
    1080      3475088 : location_with_discriminator_components (location_t locus,
    1081              :                                         const discriminator_components &comp)
    1082              : {
    1083      3475088 :   gcc_assert (comp.base <= DISCR_BASE_MAX);
    1084      3475088 :   gcc_assert (comp.multiplicity <= DISCR_MULTIPLICITY_MAX);
    1085      3475088 :   gcc_assert (comp.copyid <= DISCR_COPYID_MAX);
    1086      3475088 :   unsigned int discriminator = (comp.base << DISCR_BASE_SHIFT)
    1087      3475088 :     | (comp.multiplicity << DISCR_MULTIPLICITY_SHIFT)
    1088      3475088 :     | (comp.copyid << DISCR_COPYID_SHIFT);
    1089      3475088 :   return location_with_discriminator (locus, discriminator);
    1090              : }
    1091              : 
    1092              : /* Get hierarchical discriminator components from a location.  */
    1093              : 
    1094              : discriminator_components
    1095      3475088 : get_discriminator_components_from_loc (location_t locus)
    1096              : {
    1097      3475088 :   unsigned int discriminator = get_discriminator_from_loc (locus);
    1098      3475088 :   discriminator_components comp;
    1099      3475088 :   comp.base = discriminator & DISCR_BASE_MASK;
    1100      3475088 :   comp.multiplicity = (discriminator >> DISCR_MULTIPLICITY_SHIFT)
    1101      3475088 :     & DISCR_MULTIPLICITY_MASK;
    1102      3475088 :   comp.copyid = (discriminator >> DISCR_COPYID_SHIFT) & DISCR_COPYID_MASK;
    1103      3475088 :   return comp;
    1104              : }
    1105              : 
    1106              : #if CHECKING_P
    1107              : 
    1108              : namespace selftest {
    1109              : 
    1110              : /* Selftests of location handling.  */
    1111              : 
    1112              : /* Attempt to populate *OUT_RANGE with source location information on the
    1113              :    given character within the string literal found at STRLOC.
    1114              :    CHAR_IDX refers to an offset within the execution character set.
    1115              :    If CONCATS is non-NULL, then any string literals that the token at
    1116              :    STRLOC was concatenated with are also considered.
    1117              : 
    1118              :    This is implemented by re-parsing the relevant source line(s).
    1119              : 
    1120              :    Return NULL if successful, or an error message if any errors occurred.
    1121              :    Error messages are intended for GCC developers (to help debugging) rather
    1122              :    than for end-users.  */
    1123              : 
    1124              : static const char *
    1125        23748 : get_source_range_for_char (cpp_reader *pfile,
    1126              :                            diagnostics::file_cache &fc,
    1127              :                            string_concat_db *concats,
    1128              :                            location_t strloc,
    1129              :                            enum cpp_ttype type,
    1130              :                            int char_idx,
    1131              :                            source_range *out_range)
    1132              : {
    1133        23748 :   gcc_checking_assert (char_idx >= 0);
    1134        23748 :   gcc_assert (out_range);
    1135              : 
    1136        23748 :   cpp_substring_ranges ranges;
    1137        23748 :   const char *err
    1138        23748 :     = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
    1139        23748 :   if (err)
    1140              :     return err;
    1141              : 
    1142        18652 :   if (char_idx >= ranges.get_num_ranges ())
    1143              :     return "char_idx out of range";
    1144              : 
    1145        18652 :   *out_range = ranges.get_range (char_idx);
    1146        18652 :   return NULL;
    1147        23748 : }
    1148              : 
    1149              : /* As get_source_range_for_char, but write to *OUT the number
    1150              :    of ranges that are available.  */
    1151              : 
    1152              : static const char *
    1153         1268 : get_num_source_ranges_for_substring (cpp_reader *pfile,
    1154              :                                      diagnostics::file_cache &fc,
    1155              :                                      string_concat_db *concats,
    1156              :                                      location_t strloc,
    1157              :                                      enum cpp_ttype type,
    1158              :                                      int *out)
    1159              : {
    1160         1268 :   gcc_assert (out);
    1161              : 
    1162         1268 :   cpp_substring_ranges ranges;
    1163         1268 :   const char *err
    1164         1268 :     = get_substring_ranges_for_loc (pfile, fc, concats, strloc, type, ranges);
    1165              : 
    1166         1268 :   if (err)
    1167              :     return err;
    1168              : 
    1169          884 :   *out = ranges.get_num_ranges ();
    1170          884 :   return NULL;
    1171         1268 : }
    1172              : 
    1173              : /* Selftests of location handling.  */
    1174              : 
    1175              : /* Verify that compare() on linenum_type handles comparisons over the full
    1176              :    range of the type.  */
    1177              : 
    1178              : static void
    1179            4 : test_linenum_comparisons ()
    1180              : {
    1181            4 :   linenum_type min_line (0);
    1182            4 :   linenum_type max_line (0xffffffff);
    1183            4 :   ASSERT_EQ (0, compare (min_line, min_line));
    1184            4 :   ASSERT_EQ (0, compare (max_line, max_line));
    1185              : 
    1186            4 :   ASSERT_GT (compare (max_line, min_line), 0);
    1187            4 :   ASSERT_LT (compare (min_line, max_line), 0);
    1188            4 : }
    1189              : 
    1190              : /* Helper function for verifying location data: when location_t
    1191              :    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
    1192              :    as having column 0.  */
    1193              : 
    1194              : static bool
    1195        65136 : should_have_column_data_p (location_t loc)
    1196              : {
    1197        65136 :   if (IS_ADHOC_LOC (loc))
    1198        20240 :     loc = get_location_from_adhoc_loc (line_table, loc);
    1199        65136 :   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
    1200         6452 :     return false;
    1201              :   return true;
    1202              : }
    1203              : 
    1204              : /* Selftest for should_have_column_data_p.  */
    1205              : 
    1206              : static void
    1207            4 : test_should_have_column_data_p ()
    1208              : {
    1209            4 :   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
    1210            4 :   ASSERT_TRUE
    1211              :     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
    1212            4 :   ASSERT_FALSE
    1213              :     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
    1214            4 : }
    1215              : 
    1216              : /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
    1217              :    on LOC.  */
    1218              : 
    1219              : static void
    1220         1068 : assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
    1221              :               location_t loc)
    1222              : {
    1223         1068 :   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
    1224         1068 :   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
    1225              :   /* If location_t values are sufficiently high, then column numbers
    1226              :      will be unavailable and LOCATION_COLUMN (loc) will be 0.
    1227              :      When close to the threshold, column numbers *may* be present: if
    1228              :      the final linemap before the threshold contains a line that straddles
    1229              :      the threshold, locations in that line have column information.  */
    1230         1068 :   if (should_have_column_data_p (loc))
    1231          660 :     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
    1232         1068 : }
    1233              : 
    1234              : /* Various selftests involve constructing a line table and one or more
    1235              :    line maps within it.
    1236              : 
    1237              :    For maximum test coverage we want to run these tests with a variety
    1238              :    of situations:
    1239              :    - line_table->default_range_bits: some frontends use a non-zero value
    1240              :    and others use zero
    1241              :    - the fallback modes within line-map.cc: there are various threshold
    1242              :    values for location_t beyond line-map.cc changes
    1243              :    behavior (disabling of the range-packing optimization, disabling
    1244              :    of column-tracking).  We can exercise these by starting the line_table
    1245              :    at interesting values at or near these thresholds.
    1246              : 
    1247              :    The following struct describes a particular case within our test
    1248              :    matrix.  */
    1249              : 
    1250              : class line_table_case
    1251              : {
    1252              : public:
    1253         5860 :   line_table_case (int default_range_bits, location_t base_location)
    1254         5860 :   : m_default_range_bits (default_range_bits),
    1255         5860 :     m_base_location (base_location)
    1256              :   {}
    1257              : 
    1258              :   int m_default_range_bits;
    1259              :   location_t m_base_location;
    1260              : };
    1261              : 
    1262              : /* Constructor.  Store the old value of line_table, and create a new
    1263              :    one, using sane defaults.  */
    1264              : 
    1265           21 : line_table_test::line_table_test ()
    1266              : {
    1267           21 :   gcc_assert (saved_line_table == NULL);
    1268           21 :   saved_line_table = line_table;
    1269           21 :   line_table = ggc_alloc<line_maps> ();
    1270           21 :   linemap_init (line_table, BUILTINS_LOCATION);
    1271           21 :   gcc_assert (saved_line_table->m_reallocator);
    1272           21 :   line_table->m_reallocator = saved_line_table->m_reallocator;
    1273           21 :   gcc_assert (saved_line_table->m_round_alloc_size);
    1274           21 :   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
    1275           21 :   line_table->default_range_bits = 0;
    1276           21 : }
    1277              : 
    1278              : /* Constructor.  Store the old value of line_table, and create a new
    1279              :    one, using the sitation described in CASE_.  */
    1280              : 
    1281         6724 : line_table_test::line_table_test (const line_table_case &case_)
    1282              : {
    1283         6724 :   gcc_assert (saved_line_table == NULL);
    1284         6724 :   saved_line_table = line_table;
    1285         6724 :   line_table = ggc_alloc<line_maps> ();
    1286         6724 :   linemap_init (line_table, BUILTINS_LOCATION);
    1287         6724 :   gcc_assert (saved_line_table->m_reallocator);
    1288         6724 :   line_table->m_reallocator = saved_line_table->m_reallocator;
    1289         6724 :   gcc_assert (saved_line_table->m_round_alloc_size);
    1290         6724 :   line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
    1291         6724 :   line_table->default_range_bits = case_.m_default_range_bits;
    1292         6724 :   if (case_.m_base_location)
    1293              :     {
    1294         6160 :       line_table->highest_location = case_.m_base_location;
    1295         6160 :       line_table->highest_line = case_.m_base_location;
    1296              :     }
    1297         6724 : }
    1298              : 
    1299              : /* Destructor.  Restore the old value of line_table.  */
    1300              : 
    1301         6745 : line_table_test::~line_table_test ()
    1302              : {
    1303         6745 :   gcc_assert (saved_line_table != NULL);
    1304         6745 :   line_table = saved_line_table;
    1305         6745 :   saved_line_table = NULL;
    1306         6745 : }
    1307              : 
    1308              : /* Verify basic operation of ordinary linemaps.  */
    1309              : 
    1310              : static void
    1311           96 : test_accessing_ordinary_linemaps (const line_table_case &case_)
    1312              : {
    1313           96 :   line_table_test ltt (case_);
    1314              : 
    1315              :   /* Build a simple linemap describing some locations. */
    1316           96 :   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
    1317              : 
    1318           96 :   linemap_line_start (line_table, 1, 100);
    1319           96 :   location_t loc_a = linemap_position_for_column (line_table, 1);
    1320           96 :   location_t loc_b = linemap_position_for_column (line_table, 23);
    1321              : 
    1322           96 :   linemap_line_start (line_table, 2, 100);
    1323           96 :   location_t loc_c = linemap_position_for_column (line_table, 1);
    1324           96 :   location_t loc_d = linemap_position_for_column (line_table, 17);
    1325              : 
    1326              :   /* Example of a very long line.  */
    1327           96 :   linemap_line_start (line_table, 3, 2000);
    1328           96 :   location_t loc_e = linemap_position_for_column (line_table, 700);
    1329              : 
    1330              :   /* Transitioning back to a short line.  */
    1331           96 :   linemap_line_start (line_table, 4, 0);
    1332           96 :   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
    1333              : 
    1334           96 :   if (should_have_column_data_p (loc_back_to_short))
    1335              :     {
    1336              :       /* Verify that we switched to short lines in the linemap.  */
    1337           56 :       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
    1338           56 :       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
    1339              :     }
    1340              : 
    1341              :   /* Example of a line that will eventually be seen to be longer
    1342              :      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
    1343              :      below that.  */
    1344           96 :   linemap_line_start (line_table, 5, 2000);
    1345              : 
    1346           96 :   location_t loc_start_of_very_long_line
    1347           96 :     = linemap_position_for_column (line_table, 2000);
    1348           96 :   location_t loc_too_wide
    1349           96 :     = linemap_position_for_column (line_table, LINE_MAP_MAX_COLUMN_NUMBER + 1);
    1350           96 :   location_t loc_too_wide_2
    1351           96 :     = linemap_position_for_column (line_table, LINE_MAP_MAX_COLUMN_NUMBER + 2);
    1352              : 
    1353              :   /* ...and back to a sane line length.  */
    1354           96 :   linemap_line_start (line_table, 6, 100);
    1355           96 :   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
    1356              : 
    1357           96 :   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
    1358              : 
    1359              :   /* Multiple files.  */
    1360           96 :   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
    1361           96 :   linemap_line_start (line_table, 1, 200);
    1362           96 :   location_t loc_f = linemap_position_for_column (line_table, 150);
    1363           96 :   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
    1364              : 
    1365              :   /* Verify that we can recover the location info.  */
    1366           96 :   assert_loceq ("foo.c", 1, 1, loc_a);
    1367           96 :   assert_loceq ("foo.c", 1, 23, loc_b);
    1368           96 :   assert_loceq ("foo.c", 2, 1, loc_c);
    1369           96 :   assert_loceq ("foo.c", 2, 17, loc_d);
    1370           96 :   assert_loceq ("foo.c", 3, 700, loc_e);
    1371           96 :   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
    1372              : 
    1373              :   /* In the very wide line, the initial location should be fully tracked.  */
    1374           96 :   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
    1375              :   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
    1376              :      be disabled.  */
    1377           96 :   assert_loceq ("foo.c", 5, 0, loc_too_wide);
    1378           96 :   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
    1379              :   /*...and column-tracking should be re-enabled for subsequent lines.  */
    1380           96 :   assert_loceq ("foo.c", 6, 10, loc_sane_again);
    1381              : 
    1382           96 :   assert_loceq ("bar.c", 1, 150, loc_f);
    1383              : 
    1384           96 :   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
    1385           96 :   ASSERT_TRUE (pure_location_p (line_table, loc_a));
    1386              : 
    1387              :   /* Verify using make_location to build a range, and extracting data
    1388              :      back from it.  */
    1389           96 :   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
    1390           96 :   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
    1391           96 :   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
    1392           96 :   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
    1393           96 :   ASSERT_EQ (loc_b, src_range.m_start);
    1394           96 :   ASSERT_EQ (loc_d, src_range.m_finish);
    1395           96 : }
    1396              : 
    1397              : /* Verify various properties of UNKNOWN_LOCATION.  */
    1398              : 
    1399              : static void
    1400            4 : test_unknown_location ()
    1401              : {
    1402            4 :   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
    1403            4 :   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
    1404            4 :   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
    1405            4 : }
    1406              : 
    1407              : /* Verify various properties of BUILTINS_LOCATION.  */
    1408              : 
    1409              : static void
    1410            4 : test_builtins ()
    1411              : {
    1412            4 :   assert_loceq (special_fname_builtin (), 0, 0, BUILTINS_LOCATION);
    1413            4 :   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
    1414            4 : }
    1415              : 
    1416              : /* Regression test for make_location.
    1417              :    Ensure that we use pure locations for the start/finish of the range,
    1418              :    rather than storing a packed or ad-hoc range as the start/finish.  */
    1419              : 
    1420              : static void
    1421           96 : test_make_location_nonpure_range_endpoints (const line_table_case &case_)
    1422              : {
    1423              :   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
    1424              :      with C++ frontend.
    1425              :      ....................0000000001111111111222.
    1426              :      ....................1234567890123456789012.  */
    1427           96 :   const char *content = "     r += !aaa == bbb;\n";
    1428           96 :   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
    1429           96 :   line_table_test ltt (case_);
    1430           96 :   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
    1431              : 
    1432           96 :   const location_t c11 = linemap_position_for_column (line_table, 11);
    1433           96 :   const location_t c12 = linemap_position_for_column (line_table, 12);
    1434           96 :   const location_t c13 = linemap_position_for_column (line_table, 13);
    1435           96 :   const location_t c14 = linemap_position_for_column (line_table, 14);
    1436           96 :   const location_t c21 = linemap_position_for_column (line_table, 21);
    1437              : 
    1438           96 :   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
    1439           32 :     return;
    1440              : 
    1441              :   /* Use column 13 for the caret location, arbitrarily, to verify that we
    1442              :      handle start != caret.  */
    1443           64 :   const location_t aaa = make_location (c13, c12, c14);
    1444           64 :   ASSERT_EQ (c13, get_pure_location (aaa));
    1445           64 :   ASSERT_EQ (c12, get_start (aaa));
    1446           64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
    1447           64 :   ASSERT_EQ (c14, get_finish (aaa));
    1448           64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
    1449              : 
    1450              :   /* Make a location using a location with a range as the start-point.  */
    1451           64 :   const location_t not_aaa = make_location (c11, aaa, c14);
    1452           64 :   ASSERT_EQ (c11, get_pure_location (not_aaa));
    1453              :   /* It should use the start location of the range, not store the range
    1454              :      itself.  */
    1455           64 :   ASSERT_EQ (c12, get_start (not_aaa));
    1456           64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
    1457           64 :   ASSERT_EQ (c14, get_finish (not_aaa));
    1458           64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
    1459              : 
    1460              :   /* Similarly, make a location with a range as the end-point.  */
    1461           64 :   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
    1462           64 :   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
    1463           64 :   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
    1464           64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
    1465           64 :   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
    1466           64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
    1467           64 :   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
    1468              :   /* It should use the finish location of the range, not store the range
    1469              :      itself.  */
    1470           64 :   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
    1471           64 :   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
    1472           64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
    1473           64 :   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
    1474           64 :   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
    1475           96 : }
    1476              : 
    1477              : /* Tests of lexing.  */
    1478              : 
    1479              : /* Verify that token TOK from PARSER has cpp_token_as_text
    1480              :    equal to EXPECTED_TEXT.  */
    1481              : 
    1482              : #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
    1483              :   SELFTEST_BEGIN_STMT                                                   \
    1484              :     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
    1485              :     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
    1486              :   SELFTEST_END_STMT
    1487              : 
    1488              : /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
    1489              :    and ranges from EXP_START_COL to EXP_FINISH_COL.
    1490              :    Use LOC as the effective location of the selftest.  */
    1491              : 
    1492              : static void
    1493          576 : assert_token_loc_eq (const location &loc,
    1494              :                      const cpp_token *tok,
    1495              :                      const char *exp_filename, int exp_linenum,
    1496              :                      int exp_start_col, int exp_finish_col)
    1497              : {
    1498          576 :   location_t tok_loc = tok->src_loc;
    1499          576 :   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
    1500          576 :   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
    1501              : 
    1502              :   /* If location_t values are sufficiently high, then column numbers
    1503              :      will be unavailable.  */
    1504          576 :   if (!should_have_column_data_p (tok_loc))
    1505          196 :     return;
    1506              : 
    1507          380 :   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
    1508          380 :   source_range tok_range = get_range_from_loc (line_table, tok_loc);
    1509          380 :   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
    1510          380 :   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
    1511              : }
    1512              : 
    1513              : /* Use assert_token_loc_eq to verify the TOK->src_loc, using
    1514              :    SELFTEST_LOCATION as the effective location of the selftest.  */
    1515              : 
    1516              : #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
    1517              :                             EXP_START_COL, EXP_FINISH_COL) \
    1518              :   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
    1519              :                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
    1520              : 
    1521              : /* Test of lexing a file using libcpp, verifying tokens and their
    1522              :    location information.  */
    1523              : 
    1524              : static void
    1525           96 : test_lexer (const line_table_case &case_)
    1526              : {
    1527              :   /* Create a tempfile and write some text to it.  */
    1528           96 :   const char *content =
    1529              :     /*00000000011111111112222222222333333.3333444444444.455555555556
    1530              :       12345678901234567890123456789012345.6789012345678.901234567890.  */
    1531              :     ("test_name /* c-style comment */\n"
    1532              :      "                                  \"test literal\"\n"
    1533              :      " // test c++-style comment\n"
    1534              :      "   42\n");
    1535           96 :   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
    1536              : 
    1537           96 :   line_table_test ltt (case_);
    1538              : 
    1539           96 :   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
    1540              : 
    1541           96 :   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
    1542           96 :   ASSERT_NE (fname, NULL);
    1543              : 
    1544              :   /* Verify that we get the expected tokens back, with the correct
    1545              :      location information.  */
    1546              : 
    1547           96 :   location_t loc;
    1548           96 :   const cpp_token *tok;
    1549           96 :   tok = cpp_get_token_with_location (parser, &loc);
    1550           96 :   ASSERT_NE (tok, NULL);
    1551           96 :   ASSERT_EQ (tok->type, CPP_NAME);
    1552           96 :   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
    1553           96 :   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
    1554              : 
    1555           96 :   tok = cpp_get_token_with_location (parser, &loc);
    1556           96 :   ASSERT_NE (tok, NULL);
    1557           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    1558           96 :   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
    1559           96 :   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
    1560              : 
    1561           96 :   tok = cpp_get_token_with_location (parser, &loc);
    1562           96 :   ASSERT_NE (tok, NULL);
    1563           96 :   ASSERT_EQ (tok->type, CPP_NUMBER);
    1564           96 :   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
    1565           96 :   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
    1566              : 
    1567           96 :   tok = cpp_get_token_with_location (parser, &loc);
    1568           96 :   ASSERT_NE (tok, NULL);
    1569           96 :   ASSERT_EQ (tok->type, CPP_EOF);
    1570              : 
    1571           96 :   cpp_finish (parser, NULL);
    1572           96 :   cpp_destroy (parser);
    1573           96 : }
    1574              : 
    1575              : /* Forward decls.  */
    1576              : 
    1577              : class lexer_test;
    1578              : class lexer_test_options;
    1579              : 
    1580              : /* A class for specifying options of a lexer_test.
    1581              :    The "apply" vfunc is called during the lexer_test constructor.  */
    1582              : 
    1583          192 : class lexer_test_options
    1584              : {
    1585              :  public:
    1586              :   virtual void apply (lexer_test &) = 0;
    1587              : };
    1588              : 
    1589              : /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
    1590              :    in its dtor.
    1591              : 
    1592              :    This is needed by struct lexer_test to ensure that the cleanup of the
    1593              :    cpp_reader happens *after* the cleanup of the temp_source_file.  */
    1594              : 
    1595              : class cpp_reader_ptr
    1596              : {
    1597              :  public:
    1598         2304 :   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
    1599              : 
    1600         2304 :   ~cpp_reader_ptr ()
    1601              :   {
    1602         2304 :     cpp_finish (m_ptr, NULL);
    1603         2304 :     cpp_destroy (m_ptr);
    1604         2304 :   }
    1605              : 
    1606         2304 :   operator cpp_reader * () const { return m_ptr; }
    1607              : 
    1608              :  private:
    1609              :   cpp_reader *m_ptr;
    1610              : };
    1611              : 
    1612              : /* A struct for writing lexer tests.  */
    1613              : 
    1614              : class lexer_test
    1615              : {
    1616              : public:
    1617              :   lexer_test (const line_table_case &case_, const char *content,
    1618              :               lexer_test_options *options);
    1619              :   ~lexer_test ();
    1620              : 
    1621              :   const cpp_token *get_token ();
    1622              : 
    1623              :   /* The ordering of these fields matters.
    1624              :      The line_table_test must be first, since the cpp_reader_ptr
    1625              :      uses it.
    1626              :      The cpp_reader must be cleaned up *after* the temp_source_file
    1627              :      since the filenames in input.cc's input cache are owned by the
    1628              :      cpp_reader; in particular, when ~temp_source_file evicts the
    1629              :      filename the filenames must still be alive.  */
    1630              :   line_table_test m_ltt;
    1631              :   cpp_reader_ptr m_parser;
    1632              :   temp_source_file m_tempfile;
    1633              :   diagnostics::file_cache m_file_cache;
    1634              :   string_concat_db m_concats;
    1635              :   bool m_implicitly_expect_EOF;
    1636              : };
    1637              : 
    1638              : /* Use an EBCDIC encoding for the execution charset, specifically
    1639              :    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
    1640              : 
    1641              :    This exercises iconv integration within libcpp.
    1642              :    Not every build of iconv supports the given charset,
    1643              :    so we need to flag this error and handle it gracefully.  */
    1644              : 
    1645              : class ebcdic_execution_charset : public lexer_test_options
    1646              : {
    1647              :  public:
    1648           96 :   ebcdic_execution_charset () : m_num_iconv_errors (0)
    1649              :     {
    1650           96 :       gcc_assert (s_singleton == NULL);
    1651           96 :       s_singleton = this;
    1652           96 :     }
    1653           96 :   ~ebcdic_execution_charset ()
    1654           96 :     {
    1655           96 :       gcc_assert (s_singleton == this);
    1656           96 :       s_singleton = NULL;
    1657           96 :     }
    1658              : 
    1659           96 :   void apply (lexer_test &test) final override
    1660              :   {
    1661           96 :     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
    1662           96 :     cpp_opts->narrow_charset = "IBM1047";
    1663              : 
    1664           96 :     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
    1665           96 :     callbacks->diagnostic = on_diagnostic;
    1666           96 :   }
    1667              : 
    1668            0 :   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
    1669              :                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
    1670              :                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
    1671              :                              rich_location *richloc ATTRIBUTE_UNUSED,
    1672              :                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
    1673              :     ATTRIBUTE_FPTR_PRINTF(5,0)
    1674              :   {
    1675            0 :     gcc_assert (s_singleton);
    1676              :     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
    1677            0 :     const char *msg = "conversion from %s to %s not supported by iconv";
    1678              : #ifdef ENABLE_NLS
    1679            0 :     msg = dgettext ("cpplib", msg);
    1680              : #endif
    1681              :     /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc
    1682              :        when the local iconv build doesn't support the conversion.  */
    1683            0 :     if (strcmp (msgid, msg) == 0)
    1684              :       {
    1685            0 :         s_singleton->m_num_iconv_errors++;
    1686            0 :         return true;
    1687              :       }
    1688              : 
    1689              :     /* Otherwise, we have an unexpected error.  */
    1690            0 :     abort ();
    1691              :   }
    1692              : 
    1693           96 :   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
    1694              : 
    1695              :  private:
    1696              :   static ebcdic_execution_charset *s_singleton;
    1697              :   int m_num_iconv_errors;
    1698              : };
    1699              : 
    1700              : ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
    1701              : 
    1702              : /* A lexer_test_options subclass that records a list of diagnostic
    1703              :    messages emitted by the lexer.  */
    1704              : 
    1705              : class lexer_diagnostic_sink : public lexer_test_options
    1706              : {
    1707              :  public:
    1708           96 :   lexer_diagnostic_sink ()
    1709           96 :   {
    1710           96 :     gcc_assert (s_singleton == NULL);
    1711           96 :     s_singleton = this;
    1712           96 :   }
    1713           96 :   ~lexer_diagnostic_sink ()
    1714           96 :   {
    1715           96 :     gcc_assert (s_singleton == this);
    1716           96 :     s_singleton = NULL;
    1717              : 
    1718           96 :     int i;
    1719           96 :     char *str;
    1720          192 :     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
    1721           96 :       free (str);
    1722           96 :   }
    1723              : 
    1724           96 :   void apply (lexer_test &test) final override
    1725              :   {
    1726           96 :     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
    1727           96 :     callbacks->diagnostic = on_diagnostic;
    1728           96 :   }
    1729              : 
    1730           96 :   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
    1731              :                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
    1732              :                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
    1733              :                              rich_location *richloc ATTRIBUTE_UNUSED,
    1734              :                              const char *msgid, va_list *ap)
    1735              :     ATTRIBUTE_FPTR_PRINTF(5,0)
    1736              :   {
    1737           96 :     char *msg = xvasprintf (msgid, *ap);
    1738           96 :     s_singleton->m_diagnostics.safe_push (msg);
    1739           96 :     return true;
    1740              :   }
    1741              : 
    1742              :   auto_vec<char *> m_diagnostics;
    1743              : 
    1744              :  private:
    1745              :   static lexer_diagnostic_sink *s_singleton;
    1746              : };
    1747              : 
    1748              : lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
    1749              : 
    1750              : /* Constructor.  Override line_table with a new instance based on CASE_,
    1751              :    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
    1752              :    start parsing the tempfile.  */
    1753              : 
    1754         2304 : lexer_test::lexer_test (const line_table_case &case_, const char *content,
    1755         2304 :                         lexer_test_options *options)
    1756         2304 : : m_ltt (case_),
    1757         2304 :   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
    1758              :   /* Create a tempfile and write the text to it.  */
    1759         2304 :   m_tempfile (SELFTEST_LOCATION, ".c", content),
    1760         2304 :   m_concats (),
    1761         2304 :   m_implicitly_expect_EOF (true)
    1762              : {
    1763         2304 :   if (options)
    1764          192 :     options->apply (*this);
    1765              : 
    1766         2304 :   cpp_init_iconv (m_parser);
    1767              : 
    1768              :   /* Parse the file.  */
    1769         2304 :   const char *fname = cpp_read_main_file (m_parser,
    1770              :                                           m_tempfile.get_filename ());
    1771         2304 :   ASSERT_NE (fname, NULL);
    1772         2304 : }
    1773              : 
    1774              : /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
    1775              : 
    1776         2304 : lexer_test::~lexer_test ()
    1777              : {
    1778         2304 :   location_t loc;
    1779         2304 :   const cpp_token *tok;
    1780              : 
    1781         2304 :   if (m_implicitly_expect_EOF)
    1782              :     {
    1783         2208 :       tok = cpp_get_token_with_location (m_parser, &loc);
    1784         2208 :       ASSERT_NE (tok, NULL);
    1785         2208 :       ASSERT_EQ (tok->type, CPP_EOF);
    1786              :     }
    1787         2304 : }
    1788              : 
    1789              : /* Get the next token from m_parser.  */
    1790              : 
    1791              : const cpp_token *
    1792         3936 : lexer_test::get_token ()
    1793              : {
    1794         3936 :   location_t loc;
    1795         3936 :   const cpp_token *tok;
    1796              : 
    1797         3936 :   tok = cpp_get_token_with_location (m_parser, &loc);
    1798         3936 :   ASSERT_NE (tok, NULL);
    1799         3936 :   return tok;
    1800              : }
    1801              : 
    1802              : /* Verify that locations within string literals are correctly handled.  */
    1803              : 
    1804              : /* Verify get_source_range_for_substring for token(s) at STRLOC,
    1805              :    using the string concatenation database for TEST.
    1806              : 
    1807              :    Assert that the character at index IDX is on EXPECTED_LINE,
    1808              :    and that it begins at column EXPECTED_START_COL and ends at
    1809              :    EXPECTED_FINISH_COL (unless the locations are beyond
    1810              :    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
    1811              :    columns).  */
    1812              : 
    1813              : static void
    1814        23740 : assert_char_at_range (const location &loc,
    1815              :                       lexer_test& test,
    1816              :                       location_t strloc, enum cpp_ttype type, int idx,
    1817              :                       int expected_line, int expected_start_col,
    1818              :                       int expected_finish_col)
    1819              : {
    1820        23740 :   cpp_reader *pfile = test.m_parser;
    1821        23740 :   string_concat_db *concats = &test.m_concats;
    1822              : 
    1823        23740 :   source_range actual_range = source_range();
    1824        23740 :   const char *err
    1825        23740 :     = get_source_range_for_char (pfile, test.m_file_cache,
    1826              :                                  concats, strloc, type, idx,
    1827              :                                  &actual_range);
    1828        23740 :   if (should_have_column_data_p (strloc))
    1829        18652 :     ASSERT_EQ_AT (loc, NULL, err);
    1830              :   else
    1831              :     {
    1832         5088 :       ASSERT_STREQ_AT (loc,
    1833              :                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
    1834              :                        err);
    1835         5088 :       return;
    1836              :     }
    1837              : 
    1838        18652 :   int actual_start_line = LOCATION_LINE (actual_range.m_start);
    1839        18652 :   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
    1840        18652 :   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
    1841        18652 :   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
    1842              : 
    1843        18652 :   if (should_have_column_data_p (actual_range.m_start))
    1844              :     {
    1845        18652 :       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
    1846        18652 :       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
    1847              :     }
    1848        18652 :   if (should_have_column_data_p (actual_range.m_finish))
    1849              :     {
    1850        18652 :       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
    1851        18652 :       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
    1852              :     }
    1853              : }
    1854              : 
    1855              : /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
    1856              :    the effective location of any errors.  */
    1857              : 
    1858              : #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
    1859              :                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
    1860              :   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
    1861              :                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
    1862              :                         (EXPECTED_FINISH_COL))
    1863              : 
    1864              : /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
    1865              :    using the string concatenation database for TEST.
    1866              : 
    1867              :    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
    1868              : 
    1869              : static void
    1870         1268 : assert_num_substring_ranges (const location &loc,
    1871              :                              lexer_test& test,
    1872              :                              location_t strloc,
    1873              :                              enum cpp_ttype type,
    1874              :                              int expected_num_ranges)
    1875              : {
    1876         1268 :   cpp_reader *pfile = test.m_parser;
    1877         1268 :   string_concat_db *concats = &test.m_concats;
    1878              : 
    1879         1268 :   int actual_num_ranges = -1;
    1880         1268 :   const char *err
    1881         1268 :     = get_num_source_ranges_for_substring (pfile, test.m_file_cache,
    1882              :                                            concats, strloc, type,
    1883              :                                            &actual_num_ranges);
    1884         1268 :   if (should_have_column_data_p (strloc))
    1885          884 :     ASSERT_EQ_AT (loc, NULL, err);
    1886              :   else
    1887              :     {
    1888          384 :       ASSERT_STREQ_AT (loc,
    1889              :                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
    1890              :                        err);
    1891          384 :       return;
    1892              :     }
    1893          884 :   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
    1894              : }
    1895              : 
    1896              : /* Macro for calling assert_num_substring_ranges, supplying
    1897              :    SELFTEST_LOCATION for the effective location of any errors.  */
    1898              : 
    1899              : #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
    1900              :                                     EXPECTED_NUM_RANGES)                \
    1901              :   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
    1902              :                                (TYPE), (EXPECTED_NUM_RANGES))
    1903              : 
    1904              : 
    1905              : /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
    1906              :    returns an error (using the string concatenation database for TEST).  */
    1907              : 
    1908              : static void
    1909          636 : assert_has_no_substring_ranges (const location &loc,
    1910              :                                 lexer_test& test,
    1911              :                                 location_t strloc,
    1912              :                                 enum cpp_ttype type,
    1913              :                                 const char *expected_err)
    1914              : {
    1915          636 :   cpp_reader *pfile = test.m_parser;
    1916          636 :   string_concat_db *concats = &test.m_concats;
    1917          636 :   cpp_substring_ranges ranges;
    1918          636 :   const char *actual_err
    1919          636 :     = get_substring_ranges_for_loc (pfile, test.m_file_cache, concats, strloc,
    1920              :                                     type, ranges);
    1921          636 :   if (should_have_column_data_p (strloc))
    1922          444 :     ASSERT_STREQ_AT (loc, expected_err, actual_err);
    1923              :   else
    1924          192 :     ASSERT_STREQ_AT (loc,
    1925              :                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
    1926              :                      actual_err);
    1927          636 : }
    1928              : 
    1929              : #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
    1930              :     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
    1931              :                                     (STRLOC), (TYPE), (ERR))
    1932              : 
    1933              : /* Lex a simple string literal.  Verify the substring location data, before
    1934              :    and after running cpp_interpret_string on it.  */
    1935              : 
    1936              : static void
    1937           96 : test_lexer_string_locations_simple (const line_table_case &case_)
    1938              : {
    1939              :   /* Digits 0-9 (with 0 at column 10), the simple way.
    1940              :      ....................000000000.11111111112.2222222223333333333
    1941              :      ....................123456789.01234567890.1234567890123456789
    1942              :      We add a trailing comment to ensure that we correctly locate
    1943              :      the end of the string literal token.  */
    1944           96 :   const char *content = "        \"0123456789\" /* not a string */\n";
    1945           96 :   lexer_test test (case_, content, NULL);
    1946              : 
    1947              :   /* Verify that we get the expected token back, with the correct
    1948              :      location information.  */
    1949           96 :   const cpp_token *tok = test.get_token ();
    1950           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    1951           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
    1952           96 :   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
    1953              : 
    1954              :   /* At this point in lexing, the quote characters are treated as part of
    1955              :      the string (they are stripped off by cpp_interpret_string).  */
    1956              : 
    1957           96 :   ASSERT_EQ (tok->val.str.len, 12);
    1958              : 
    1959              :   /* Verify that cpp_interpret_string works.  */
    1960           96 :   cpp_string dst_string;
    1961           96 :   const enum cpp_ttype type = CPP_STRING;
    1962           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    1963              :                                       &dst_string, type);
    1964           96 :   ASSERT_TRUE (result);
    1965           96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    1966           96 :   free (const_cast <unsigned char *> (dst_string.text));
    1967              : 
    1968              :   /* Verify ranges of individual characters.  This no longer includes the
    1969              :      opening quote, but does include the closing quote.  */
    1970         1152 :   for (int i = 0; i <= 10; i++)
    1971         1056 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
    1972              :                           10 + i, 10 + i);
    1973              : 
    1974           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
    1975           96 : }
    1976              : 
    1977              : /* As test_lexer_string_locations_simple, but use an EBCDIC execution
    1978              :    encoding.  */
    1979              : 
    1980              : static void
    1981           96 : test_lexer_string_locations_ebcdic (const line_table_case &case_)
    1982              : {
    1983              :   /* EBCDIC support requires iconv.  */
    1984           96 :   if (!HAVE_ICONV)
    1985            0 :     return;
    1986              : 
    1987              :   /* Digits 0-9 (with 0 at column 10), the simple way.
    1988              :      ....................000000000.11111111112.2222222223333333333
    1989              :      ....................123456789.01234567890.1234567890123456789
    1990              :      We add a trailing comment to ensure that we correctly locate
    1991              :      the end of the string literal token.  */
    1992           96 :   const char *content = "        \"0123456789\" /* not a string */\n";
    1993           96 :   ebcdic_execution_charset use_ebcdic;
    1994           96 :   lexer_test test (case_, content, &use_ebcdic);
    1995              : 
    1996              :   /* Verify that we get the expected token back, with the correct
    1997              :      location information.  */
    1998           96 :   const cpp_token *tok = test.get_token ();
    1999           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2000           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
    2001           96 :   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
    2002              : 
    2003              :   /* At this point in lexing, the quote characters are treated as part of
    2004              :      the string (they are stripped off by cpp_interpret_string).  */
    2005              : 
    2006           96 :   ASSERT_EQ (tok->val.str.len, 12);
    2007              : 
    2008              :   /* The remainder of the test requires an iconv implementation that
    2009              :      can convert from UTF-8 to the EBCDIC encoding requested above.  */
    2010           96 :   if (use_ebcdic.iconv_errors_occurred_p ())
    2011            0 :     return;
    2012              : 
    2013              :   /* Verify that cpp_interpret_string works.  */
    2014           96 :   cpp_string dst_string;
    2015           96 :   const enum cpp_ttype type = CPP_STRING;
    2016           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2017              :                                       &dst_string, type);
    2018           96 :   ASSERT_TRUE (result);
    2019              :   /* We should now have EBCDIC-encoded text, specifically
    2020              :      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
    2021              :      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
    2022           96 :   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
    2023              :                 (const char *)dst_string.text);
    2024           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2025              : 
    2026              :   /* Verify that we don't attempt to record substring location information
    2027              :      for such cases.  */
    2028           96 :   ASSERT_HAS_NO_SUBSTRING_RANGES
    2029              :     (test, tok->src_loc, type,
    2030              :      "execution character set != source character set");
    2031           96 : }
    2032              : 
    2033              : /* Lex a string literal containing a hex-escaped character.
    2034              :    Verify the substring location data, before and after running
    2035              :    cpp_interpret_string on it.  */
    2036              : 
    2037              : static void
    2038           96 : test_lexer_string_locations_hex (const line_table_case &case_)
    2039              : {
    2040              :   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
    2041              :      and with a space in place of digit 6, to terminate the escaped
    2042              :      hex code.
    2043              :      ....................000000000.111111.11112222.
    2044              :      ....................123456789.012345.67890123.  */
    2045           96 :   const char *content = "        \"01234\\x35 789\"\n";
    2046           96 :   lexer_test test (case_, content, NULL);
    2047              : 
    2048              :   /* Verify that we get the expected token back, with the correct
    2049              :      location information.  */
    2050           96 :   const cpp_token *tok = test.get_token ();
    2051           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2052           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
    2053           96 :   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
    2054              : 
    2055              :   /* At this point in lexing, the quote characters are treated as part of
    2056              :      the string (they are stripped off by cpp_interpret_string).  */
    2057           96 :   ASSERT_EQ (tok->val.str.len, 15);
    2058              : 
    2059              :   /* Verify that cpp_interpret_string works.  */
    2060           96 :   cpp_string dst_string;
    2061           96 :   const enum cpp_ttype type = CPP_STRING;
    2062           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2063              :                                       &dst_string, type);
    2064           96 :   ASSERT_TRUE (result);
    2065           96 :   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
    2066           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2067              : 
    2068              :   /* Verify ranges of individual characters.  This no longer includes the
    2069              :      opening quote, but does include the closing quote.  */
    2070          576 :   for (int i = 0; i <= 4; i++)
    2071          480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    2072           96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
    2073          576 :   for (int i = 6; i <= 10; i++)
    2074          480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
    2075              : 
    2076           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
    2077           96 : }
    2078              : 
    2079              : /* Lex a string literal containing an octal-escaped character.
    2080              :    Verify the substring location data after running cpp_interpret_string
    2081              :    on it.  */
    2082              : 
    2083              : static void
    2084           96 : test_lexer_string_locations_oct (const line_table_case &case_)
    2085              : {
    2086              :   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
    2087              :      and with a space in place of digit 6, to terminate the escaped
    2088              :      octal code.
    2089              :      ....................000000000.111111.11112222.2222223333333333444
    2090              :      ....................123456789.012345.67890123.4567890123456789012  */
    2091           96 :   const char *content = "        \"01234\\065 789\" /* not a string */\n";
    2092           96 :   lexer_test test (case_, content, NULL);
    2093              : 
    2094              :   /* Verify that we get the expected token back, with the correct
    2095              :      location information.  */
    2096           96 :   const cpp_token *tok = test.get_token ();
    2097           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2098           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
    2099              : 
    2100              :   /* Verify that cpp_interpret_string works.  */
    2101           96 :   cpp_string dst_string;
    2102           96 :   const enum cpp_ttype type = CPP_STRING;
    2103           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2104              :                                       &dst_string, type);
    2105           96 :   ASSERT_TRUE (result);
    2106           96 :   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
    2107           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2108              : 
    2109              :   /* Verify ranges of individual characters.  This no longer includes the
    2110              :      opening quote, but does include the closing quote.  */
    2111          576 :   for (int i = 0; i < 5; i++)
    2112          480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    2113           96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
    2114          576 :   for (int i = 6; i <= 10; i++)
    2115          480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
    2116              : 
    2117           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
    2118           96 : }
    2119              : 
    2120              : /* Test of string literal containing letter escapes.  */
    2121              : 
    2122              : static void
    2123           96 : test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
    2124              : {
    2125              :   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
    2126              :      .....................000000000.1.11111.1.1.11222.22222223333333
    2127              :      .....................123456789.0.12345.6.7.89012.34567890123456.  */
    2128           96 :   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
    2129           96 :   lexer_test test (case_, content, NULL);
    2130              : 
    2131              :   /* Verify that we get the expected tokens back.  */
    2132           96 :   const cpp_token *tok = test.get_token ();
    2133           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2134           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
    2135              : 
    2136              :   /* Verify ranges of individual characters. */
    2137              :   /* "\t".  */
    2138           96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2139              :                         0, 1, 10, 11);
    2140              :   /* "foo". */
    2141          384 :   for (int i = 1; i <= 3; i++)
    2142          288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2143              :                           i, 1, 11 + i, 11 + i);
    2144              :   /* "\\" and "\n".  */
    2145           96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2146              :                         4, 1, 15, 16);
    2147           96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2148              :                         5, 1, 17, 18);
    2149              : 
    2150              :   /* "bar" and closing quote for nul-terminator.  */
    2151          480 :   for (int i = 6; i <= 9; i++)
    2152          384 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2153              :                           i, 1, 13 + i, 13 + i);
    2154              : 
    2155           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
    2156           96 : }
    2157              : 
    2158              : /* Another test of a string literal containing a letter escape.
    2159              :    Based on string seen in
    2160              :      printf ("%-%\n");
    2161              :    in gcc.dg/format/c90-printf-1.c.  */
    2162              : 
    2163              : static void
    2164           96 : test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
    2165              : {
    2166              :   /* .....................000000000.1111.11.1111.22222222223.
    2167              :      .....................123456789.0123.45.6789.01234567890.  */
    2168           96 :   const char *content = ("        \"%-%\\n\" /* non-str */\n");
    2169           96 :   lexer_test test (case_, content, NULL);
    2170              : 
    2171              :   /* Verify that we get the expected tokens back.  */
    2172           96 :   const cpp_token *tok = test.get_token ();
    2173           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2174           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
    2175              : 
    2176              :   /* Verify ranges of individual characters. */
    2177              :   /* "%-%".  */
    2178          384 :   for (int i = 0; i < 3; i++)
    2179          288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2180              :                           i, 1, 10 + i, 10 + i);
    2181              :   /* "\n".  */
    2182           96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2183              :                         3, 1, 13, 14);
    2184              : 
    2185              :   /* Closing quote for nul-terminator.  */
    2186           96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2187              :                         4, 1, 15, 15);
    2188              : 
    2189           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
    2190           96 : }
    2191              : 
    2192              : /* Lex a string literal containing UCN 4 characters.
    2193              :    Verify the substring location data after running cpp_interpret_string
    2194              :    on it.  */
    2195              : 
    2196              : static void
    2197           96 : test_lexer_string_locations_ucn4 (const line_table_case &case_)
    2198              : {
    2199              :   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
    2200              :      as UCN 4.
    2201              :      ....................000000000.111111.111122.222222223.33333333344444
    2202              :      ....................123456789.012345.678901.234567890.12345678901234  */
    2203           96 :   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
    2204           96 :   lexer_test test (case_, content, NULL);
    2205              : 
    2206              :   /* Verify that we get the expected token back, with the correct
    2207              :      location information.  */
    2208           96 :   const cpp_token *tok = test.get_token ();
    2209           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2210           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
    2211              : 
    2212              :   /* Verify that cpp_interpret_string works.
    2213              :      The string should be encoded in the execution character
    2214              :      set.  Assuming that is UTF-8, we should have the following:
    2215              :      -----------  ----  -----  -------  ----------------
    2216              :      Byte offset  Byte  Octal  Unicode  Source Column(s)
    2217              :      -----------  ----  -----  -------  ----------------
    2218              :      0            0x30         '0'      10
    2219              :      1            0x31         '1'      11
    2220              :      2            0x32         '2'      12
    2221              :      3            0x33         '3'      13
    2222              :      4            0x34         '4'      14
    2223              :      5            0xE2  \342   U+2174   15-20
    2224              :      6            0x85  \205    (cont)  15-20
    2225              :      7            0xB4  \264    (cont)  15-20
    2226              :      8            0xE2  \342   U+2175   21-26
    2227              :      9            0x85  \205    (cont)  21-26
    2228              :      10           0xB5  \265    (cont)  21-26
    2229              :      11           0x37         '7'      27
    2230              :      12           0x38         '8'      28
    2231              :      13           0x39         '9'      29
    2232              :      14           0x00                  30 (closing quote)
    2233              :      -----------  ----  -----  -------  ---------------.  */
    2234              : 
    2235           96 :   cpp_string dst_string;
    2236           96 :   const enum cpp_ttype type = CPP_STRING;
    2237           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2238              :                                       &dst_string, type);
    2239           96 :   ASSERT_TRUE (result);
    2240           96 :   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
    2241              :                 (const char *)dst_string.text);
    2242           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2243              : 
    2244              :   /* Verify ranges of individual characters.  This no longer includes the
    2245              :      opening quote, but does include the closing quote.
    2246              :      '01234'.  */
    2247          576 :   for (int i = 0; i <= 4; i++)
    2248          480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    2249              :   /* U+2174.  */
    2250          384 :   for (int i = 5; i <= 7; i++)
    2251          288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
    2252              :   /* U+2175.  */
    2253          384 :   for (int i = 8; i <= 10; i++)
    2254          288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
    2255              :   /* '789' and nul terminator  */
    2256          480 :   for (int i = 11; i <= 14; i++)
    2257          384 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
    2258              : 
    2259           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
    2260           96 : }
    2261              : 
    2262              : /* Lex a string literal containing UCN 8 characters.
    2263              :    Verify the substring location data after running cpp_interpret_string
    2264              :    on it.  */
    2265              : 
    2266              : static void
    2267           96 : test_lexer_string_locations_ucn8 (const line_table_case &case_)
    2268              : {
    2269              :   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
    2270              :      ....................000000000.111111.1111222222.2222333333333.344444
    2271              :      ....................123456789.012345.6789012345.6789012345678.901234  */
    2272           96 :   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
    2273           96 :   lexer_test test (case_, content, NULL);
    2274              : 
    2275              :   /* Verify that we get the expected token back, with the correct
    2276              :      location information.  */
    2277           96 :   const cpp_token *tok = test.get_token ();
    2278           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2279           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
    2280              :                            "\"01234\\U00002174\\U00002175789\"");
    2281              : 
    2282              :   /* Verify that cpp_interpret_string works.
    2283              :      The UTF-8 encoding of the string is identical to that from
    2284              :      the ucn4 testcase above; the only difference is the column
    2285              :      locations.  */
    2286           96 :   cpp_string dst_string;
    2287           96 :   const enum cpp_ttype type = CPP_STRING;
    2288           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2289              :                                       &dst_string, type);
    2290           96 :   ASSERT_TRUE (result);
    2291           96 :   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
    2292              :                 (const char *)dst_string.text);
    2293           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2294              : 
    2295              :   /* Verify ranges of individual characters.  This no longer includes the
    2296              :      opening quote, but does include the closing quote.
    2297              :      '01234'.  */
    2298          576 :   for (int i = 0; i <= 4; i++)
    2299          480 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    2300              :   /* U+2174.  */
    2301          384 :   for (int i = 5; i <= 7; i++)
    2302          288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
    2303              :   /* U+2175.  */
    2304          384 :   for (int i = 8; i <= 10; i++)
    2305          288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
    2306              :   /* '789' at columns 35-37  */
    2307          384 :   for (int i = 11; i <= 13; i++)
    2308          288 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
    2309              :   /* Closing quote/nul-terminator at column 38.  */
    2310           96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
    2311              : 
    2312           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
    2313           96 : }
    2314              : 
    2315              : /* Fetch a big-endian 32-bit value and convert to host endianness.  */
    2316              : 
    2317              : static uint32_t
    2318          768 : uint32_from_big_endian (const uint32_t *ptr_be_value)
    2319              : {
    2320          768 :   const unsigned char *buf = (const unsigned char *)ptr_be_value;
    2321          768 :   return (((uint32_t) buf[0] << 24)
    2322          768 :           | ((uint32_t) buf[1] << 16)
    2323          768 :           | ((uint32_t) buf[2] << 8)
    2324          768 :           | (uint32_t) buf[3]);
    2325              : }
    2326              : 
    2327              : /* Lex a wide string literal and verify that attempts to read substring
    2328              :    location data from it fail gracefully.  */
    2329              : 
    2330              : static void
    2331           96 : test_lexer_string_locations_wide_string (const line_table_case &case_)
    2332              : {
    2333              :   /* Digits 0-9.
    2334              :      ....................000000000.11111111112.22222222233333
    2335              :      ....................123456789.01234567890.12345678901234  */
    2336           96 :   const char *content = "       L\"0123456789\" /* non-str */\n";
    2337           96 :   lexer_test test (case_, content, NULL);
    2338              : 
    2339              :   /* Verify that we get the expected token back, with the correct
    2340              :      location information.  */
    2341           96 :   const cpp_token *tok = test.get_token ();
    2342           96 :   ASSERT_EQ (tok->type, CPP_WSTRING);
    2343           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
    2344              : 
    2345              :   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
    2346           96 :   cpp_string dst_string;
    2347           96 :   const enum cpp_ttype type = CPP_WSTRING;
    2348           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2349              :                                       &dst_string, type);
    2350           96 :   ASSERT_TRUE (result);
    2351              :   /* The cpp_reader defaults to big-endian with
    2352              :      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
    2353              :      now be encoded as UTF-32BE.  */
    2354           96 :   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
    2355           96 :   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
    2356           96 :   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
    2357           96 :   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
    2358           96 :   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
    2359           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2360              : 
    2361              :   /* We don't yet support generating substring location information
    2362              :      for L"" strings.  */
    2363           96 :   ASSERT_HAS_NO_SUBSTRING_RANGES
    2364              :     (test, tok->src_loc, type,
    2365              :      "execution character set != source character set");
    2366           96 : }
    2367              : 
    2368              : /* Fetch a big-endian 16-bit value and convert to host endianness.  */
    2369              : 
    2370              : static uint16_t
    2371          384 : uint16_from_big_endian (const uint16_t *ptr_be_value)
    2372              : {
    2373          384 :   const unsigned char *buf = (const unsigned char *)ptr_be_value;
    2374          384 :   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
    2375              : }
    2376              : 
    2377              : /* Lex a u"" string literal and verify that attempts to read substring
    2378              :    location data from it fail gracefully.  */
    2379              : 
    2380              : static void
    2381           96 : test_lexer_string_locations_string16 (const line_table_case &case_)
    2382              : {
    2383              :   /* Digits 0-9.
    2384              :      ....................000000000.11111111112.22222222233333
    2385              :      ....................123456789.01234567890.12345678901234  */
    2386           96 :   const char *content = "       u\"0123456789\" /* non-str */\n";
    2387           96 :   lexer_test test (case_, content, NULL);
    2388              : 
    2389              :   /* Verify that we get the expected token back, with the correct
    2390              :      location information.  */
    2391           96 :   const cpp_token *tok = test.get_token ();
    2392           96 :   ASSERT_EQ (tok->type, CPP_STRING16);
    2393           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
    2394              : 
    2395              :   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
    2396           96 :   cpp_string dst_string;
    2397           96 :   const enum cpp_ttype type = CPP_STRING16;
    2398           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2399              :                                       &dst_string, type);
    2400           96 :   ASSERT_TRUE (result);
    2401              : 
    2402              :   /* The cpp_reader defaults to big-endian, so dst_string should
    2403              :      now be encoded as UTF-16BE.  */
    2404           96 :   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
    2405           96 :   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
    2406           96 :   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
    2407           96 :   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
    2408           96 :   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
    2409           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2410              : 
    2411              :   /* We don't yet support generating substring location information
    2412              :      for L"" strings.  */
    2413           96 :   ASSERT_HAS_NO_SUBSTRING_RANGES
    2414              :     (test, tok->src_loc, type,
    2415              :      "execution character set != source character set");
    2416           96 : }
    2417              : 
    2418              : /* Lex a U"" string literal and verify that attempts to read substring
    2419              :    location data from it fail gracefully.  */
    2420              : 
    2421              : static void
    2422           96 : test_lexer_string_locations_string32 (const line_table_case &case_)
    2423              : {
    2424              :   /* Digits 0-9.
    2425              :      ....................000000000.11111111112.22222222233333
    2426              :      ....................123456789.01234567890.12345678901234  */
    2427           96 :   const char *content = "       U\"0123456789\" /* non-str */\n";
    2428           96 :   lexer_test test (case_, content, NULL);
    2429              : 
    2430              :   /* Verify that we get the expected token back, with the correct
    2431              :      location information.  */
    2432           96 :   const cpp_token *tok = test.get_token ();
    2433           96 :   ASSERT_EQ (tok->type, CPP_STRING32);
    2434           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
    2435              : 
    2436              :   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
    2437           96 :   cpp_string dst_string;
    2438           96 :   const enum cpp_ttype type = CPP_STRING32;
    2439           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2440              :                                       &dst_string, type);
    2441           96 :   ASSERT_TRUE (result);
    2442              : 
    2443              :   /* The cpp_reader defaults to big-endian, so dst_string should
    2444              :      now be encoded as UTF-32BE.  */
    2445           96 :   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
    2446           96 :   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
    2447           96 :   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
    2448           96 :   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
    2449           96 :   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
    2450           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2451              : 
    2452              :   /* We don't yet support generating substring location information
    2453              :      for L"" strings.  */
    2454           96 :   ASSERT_HAS_NO_SUBSTRING_RANGES
    2455              :     (test, tok->src_loc, type,
    2456              :      "execution character set != source character set");
    2457           96 : }
    2458              : 
    2459              : /* Lex a u8-string literal.
    2460              :    Verify the substring location data after running cpp_interpret_string
    2461              :    on it.  */
    2462              : 
    2463              : static void
    2464           96 : test_lexer_string_locations_u8 (const line_table_case &case_)
    2465              : {
    2466              :   /* Digits 0-9.
    2467              :      ....................000000000.11111111112.22222222233333
    2468              :      ....................123456789.01234567890.12345678901234  */
    2469           96 :   const char *content = "      u8\"0123456789\" /* non-str */\n";
    2470           96 :   lexer_test test (case_, content, NULL);
    2471              : 
    2472              :   /* Verify that we get the expected token back, with the correct
    2473              :      location information.  */
    2474           96 :   const cpp_token *tok = test.get_token ();
    2475           96 :   ASSERT_EQ (tok->type, CPP_UTF8STRING);
    2476           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
    2477              : 
    2478              :   /* Verify that cpp_interpret_string works.  */
    2479           96 :   cpp_string dst_string;
    2480           96 :   const enum cpp_ttype type = CPP_STRING;
    2481           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2482              :                                       &dst_string, type);
    2483           96 :   ASSERT_TRUE (result);
    2484           96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    2485           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2486              : 
    2487              :   /* Verify ranges of individual characters.  This no longer includes the
    2488              :      opening quote, but does include the closing quote.  */
    2489         1152 :   for (int i = 0; i <= 10; i++)
    2490         1056 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    2491           96 : }
    2492              : 
    2493              : /* Lex a string literal containing UTF-8 source characters.
    2494              :    Verify the substring location data after running cpp_interpret_string
    2495              :    on it.  */
    2496              : 
    2497              : static void
    2498           96 : test_lexer_string_locations_utf8_source (const line_table_case &case_)
    2499              : {
    2500              :  /* This string literal is written out to the source file as UTF-8,
    2501              :     and is of the form "before mojibake after", where "mojibake"
    2502              :     is written as the following four unicode code points:
    2503              :        U+6587 CJK UNIFIED IDEOGRAPH-6587
    2504              :        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
    2505              :        U+5316 CJK UNIFIED IDEOGRAPH-5316
    2506              :        U+3051 HIRAGANA LETTER KE.
    2507              :      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
    2508              :      "before" and "after" are 1 byte per unicode character.
    2509              : 
    2510              :      The numbering shown are "columns", which are *byte* numbers within
    2511              :      the line, rather than unicode character numbers.
    2512              : 
    2513              :      .................... 000000000.1111111.
    2514              :      .................... 123456789.0123456.  */
    2515           96 :   const char *content = ("        \"before "
    2516              :                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
    2517              :                               UTF-8: 0xE6 0x96 0x87
    2518              :                               C octal escaped UTF-8: \346\226\207
    2519              :                             "column" numbers: 17-19.  */
    2520              :                          "\346\226\207"
    2521              : 
    2522              :                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
    2523              :                               UTF-8: 0xE5 0xAD 0x97
    2524              :                               C octal escaped UTF-8: \345\255\227
    2525              :                             "column" numbers: 20-22.  */
    2526              :                          "\345\255\227"
    2527              : 
    2528              :                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
    2529              :                               UTF-8: 0xE5 0x8C 0x96
    2530              :                               C octal escaped UTF-8: \345\214\226
    2531              :                             "column" numbers: 23-25.  */
    2532              :                          "\345\214\226"
    2533              : 
    2534              :                          /* U+3051 HIRAGANA LETTER KE
    2535              :                               UTF-8: 0xE3 0x81 0x91
    2536              :                               C octal escaped UTF-8: \343\201\221
    2537              :                             "column" numbers: 26-28.  */
    2538              :                          "\343\201\221"
    2539              : 
    2540              :                          /* column numbers 29 onwards
    2541              :                           2333333.33334444444444
    2542              :                           9012345.67890123456789. */
    2543              :                          " after\" /* non-str */\n");
    2544           96 :   lexer_test test (case_, content, NULL);
    2545              : 
    2546              :   /* Verify that we get the expected token back, with the correct
    2547              :      location information.  */
    2548           96 :   const cpp_token *tok = test.get_token ();
    2549           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2550           96 :   ASSERT_TOKEN_AS_TEXT_EQ
    2551              :     (test.m_parser, tok,
    2552              :      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
    2553              : 
    2554              :   /* Verify that cpp_interpret_string works.  */
    2555           96 :   cpp_string dst_string;
    2556           96 :   const enum cpp_ttype type = CPP_STRING;
    2557           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2558              :                                       &dst_string, type);
    2559           96 :   ASSERT_TRUE (result);
    2560           96 :   ASSERT_STREQ
    2561              :     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
    2562              :      (const char *)dst_string.text);
    2563           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2564              : 
    2565              :   /* Verify ranges of individual characters.  This no longer includes the
    2566              :      opening quote, but does include the closing quote.
    2567              :      Assuming that both source and execution encodings are UTF-8, we have
    2568              :      a run of 25 octets in each, plus the NUL terminator.  */
    2569         2496 :   for (int i = 0; i < 25; i++)
    2570         2400 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
    2571              :   /* NUL-terminator should use the closing quote at column 35.  */
    2572           96 :   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
    2573              : 
    2574           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
    2575           96 : }
    2576              : 
    2577              : /* Test of string literal concatenation.  */
    2578              : 
    2579              : static void
    2580           96 : test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
    2581              : {
    2582              :   /* Digits 0-9.
    2583              :      .....................000000000.111111.11112222222222
    2584              :      .....................123456789.012345.67890123456789.  */
    2585           96 :   const char *content = ("        \"01234\" /* non-str */\n"
    2586              :                          "        \"56789\" /* non-str */\n");
    2587           96 :   lexer_test test (case_, content, NULL);
    2588              : 
    2589           96 :   location_t input_locs[2];
    2590              : 
    2591              :   /* Verify that we get the expected tokens back.  */
    2592           96 :   auto_vec <cpp_string> input_strings;
    2593           96 :   const cpp_token *tok_a = test.get_token ();
    2594           96 :   ASSERT_EQ (tok_a->type, CPP_STRING);
    2595           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
    2596           96 :   input_strings.safe_push (tok_a->val.str);
    2597           96 :   input_locs[0] = tok_a->src_loc;
    2598              : 
    2599           96 :   const cpp_token *tok_b = test.get_token ();
    2600           96 :   ASSERT_EQ (tok_b->type, CPP_STRING);
    2601           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
    2602           96 :   input_strings.safe_push (tok_b->val.str);
    2603           96 :   input_locs[1] = tok_b->src_loc;
    2604              : 
    2605              :   /* Verify that cpp_interpret_string works.  */
    2606           96 :   cpp_string dst_string;
    2607           96 :   const enum cpp_ttype type = CPP_STRING;
    2608           96 :   bool result = cpp_interpret_string (test.m_parser,
    2609           96 :                                       input_strings.address (), 2,
    2610              :                                       &dst_string, type);
    2611           96 :   ASSERT_TRUE (result);
    2612           96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    2613           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2614              : 
    2615              :   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
    2616           96 :   test.m_concats.record_string_concatenation (2, input_locs);
    2617              : 
    2618           96 :   location_t initial_loc = input_locs[0];
    2619              : 
    2620              :   /* "01234" on line 1.  */
    2621          576 :   for (int i = 0; i <= 4; i++)
    2622          480 :     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
    2623              :   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
    2624          672 :   for (int i = 5; i <= 10; i++)
    2625          576 :     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
    2626              : 
    2627           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
    2628           96 : }
    2629              : 
    2630              : /* Another test of string literal concatenation.  */
    2631              : 
    2632              : static void
    2633           96 : test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
    2634              : {
    2635              :   /* Digits 0-9.
    2636              :      .....................000000000.111.11111112222222
    2637              :      .....................123456789.012.34567890123456.  */
    2638           96 :   const char *content = ("        \"01\" /* non-str */\n"
    2639              :                          "        \"23\" /* non-str */\n"
    2640              :                          "        \"45\" /* non-str */\n"
    2641              :                          "        \"67\" /* non-str */\n"
    2642              :                          "        \"89\" /* non-str */\n");
    2643           96 :   lexer_test test (case_, content, NULL);
    2644              : 
    2645           96 :   auto_vec <cpp_string> input_strings;
    2646           96 :   location_t input_locs[5];
    2647              : 
    2648              :   /* Verify that we get the expected tokens back.  */
    2649          576 :   for (int i = 0; i < 5; i++)
    2650              :     {
    2651          480 :       const cpp_token *tok = test.get_token ();
    2652          480 :       ASSERT_EQ (tok->type, CPP_STRING);
    2653          480 :       input_strings.safe_push (tok->val.str);
    2654          480 :       input_locs[i] = tok->src_loc;
    2655              :     }
    2656              : 
    2657              :   /* Verify that cpp_interpret_string works.  */
    2658           96 :   cpp_string dst_string;
    2659           96 :   const enum cpp_ttype type = CPP_STRING;
    2660           96 :   bool result = cpp_interpret_string (test.m_parser,
    2661           96 :                                       input_strings.address (), 5,
    2662              :                                       &dst_string, type);
    2663           96 :   ASSERT_TRUE (result);
    2664           96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    2665           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2666              : 
    2667              :   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
    2668           96 :   test.m_concats.record_string_concatenation (5, input_locs);
    2669              : 
    2670           96 :   location_t initial_loc = input_locs[0];
    2671              : 
    2672              :   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
    2673              :      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
    2674              :      and expect get_source_range_for_substring to fail.
    2675              :      However, for a string concatenation test, we can have a case
    2676              :      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
    2677              :      but subsequent strings can be after it.
    2678              :      Attempting to detect this within assert_char_at_range
    2679              :      would overcomplicate the logic for the common test cases, so
    2680              :      we detect it here.  */
    2681           96 :   if (should_have_column_data_p (input_locs[0])
    2682           96 :       && !should_have_column_data_p (input_locs[4]))
    2683              :     {
    2684              :       /* Verify that get_source_range_for_substring gracefully rejects
    2685              :          this case.  */
    2686            8 :       source_range actual_range;
    2687            8 :       const char *err
    2688            8 :         = get_source_range_for_char (test.m_parser, test.m_file_cache,
    2689              :                                      &test.m_concats,
    2690              :                                      initial_loc, type, 0, &actual_range);
    2691            8 :       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
    2692            8 :       return;
    2693              :     }
    2694              : 
    2695          528 :   for (int i = 0; i < 5; i++)
    2696         1320 :     for (int j = 0; j < 2; j++)
    2697          880 :       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
    2698              :                             i + 1, 10 + j, 10 + j);
    2699              : 
    2700              :   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
    2701           88 :   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
    2702              : 
    2703           88 :   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
    2704           96 : }
    2705              : 
    2706              : /* Another test of string literal concatenation, this time combined with
    2707              :    various kinds of escaped characters.  */
    2708              : 
    2709              : static void
    2710           96 : test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
    2711              : {
    2712              :   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
    2713              :      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
    2714           96 :   const char *content
    2715              :     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
    2716              :        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
    2717              :     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
    2718           96 :   lexer_test test (case_, content, NULL);
    2719              : 
    2720           96 :   auto_vec <cpp_string> input_strings;
    2721           96 :   location_t input_locs[4];
    2722              : 
    2723              :   /* Verify that we get the expected tokens back.  */
    2724          480 :   for (int i = 0; i < 4; i++)
    2725              :     {
    2726          384 :       const cpp_token *tok = test.get_token ();
    2727          384 :       ASSERT_EQ (tok->type, CPP_STRING);
    2728          384 :       input_strings.safe_push (tok->val.str);
    2729          384 :       input_locs[i] = tok->src_loc;
    2730              :     }
    2731              : 
    2732              :   /* Verify that cpp_interpret_string works.  */
    2733           96 :   cpp_string dst_string;
    2734           96 :   const enum cpp_ttype type = CPP_STRING;
    2735           96 :   bool result = cpp_interpret_string (test.m_parser,
    2736           96 :                                       input_strings.address (), 4,
    2737              :                                       &dst_string, type);
    2738           96 :   ASSERT_TRUE (result);
    2739           96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    2740           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2741              : 
    2742              :   /* Simulate c-lex.cc's lex_string in order to record concatenation.  */
    2743           96 :   test.m_concats.record_string_concatenation (4, input_locs);
    2744              : 
    2745           96 :   location_t initial_loc = input_locs[0];
    2746              : 
    2747          576 :   for (int i = 0; i <= 4; i++)
    2748          480 :     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
    2749           96 :   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
    2750           96 :   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
    2751          384 :   for (int i = 7; i <= 9; i++)
    2752          288 :     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
    2753              : 
    2754              :   /* NUL-terminator should use the location of the final closing quote.  */
    2755           96 :   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
    2756              : 
    2757           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
    2758           96 : }
    2759              : 
    2760              : /* Test of string literal in a macro.  */
    2761              : 
    2762              : static void
    2763           96 : test_lexer_string_locations_macro (const line_table_case &case_)
    2764              : {
    2765              :   /* Digits 0-9.
    2766              :      .....................0000000001111111111.22222222223.
    2767              :      .....................1234567890123456789.01234567890.  */
    2768           96 :   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
    2769              :                          "  MACRO");
    2770           96 :   lexer_test test (case_, content, NULL);
    2771              : 
    2772              :   /* Verify that we get the expected tokens back.  */
    2773           96 :   const cpp_token *tok = test.get_token ();
    2774           96 :   ASSERT_EQ (tok->type, CPP_PADDING);
    2775              : 
    2776           96 :   tok = test.get_token ();
    2777           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2778           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
    2779              : 
    2780              :   /* Verify ranges of individual characters.  We ought to
    2781              :      see columns within the macro definition.  */
    2782         1152 :   for (int i = 0; i <= 10; i++)
    2783         1056 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2784              :                           i, 1, 20 + i, 20 + i);
    2785              : 
    2786           96 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
    2787              : 
    2788           96 :   tok = test.get_token ();
    2789           96 :   ASSERT_EQ (tok->type, CPP_PADDING);
    2790           96 : }
    2791              : 
    2792              : /* Test of stringification of a macro argument.  */
    2793              : 
    2794              : static void
    2795           96 : test_lexer_string_locations_stringified_macro_argument
    2796              :   (const line_table_case &case_)
    2797              : {
    2798              :   /* .....................000000000111111111122222222223.
    2799              :      .....................123456789012345678901234567890.  */
    2800           96 :   const char *content = ("#define MACRO(X) #X /* non-str */\n"
    2801              :                          "MACRO(foo)\n");
    2802           96 :   lexer_test test (case_, content, NULL);
    2803              : 
    2804              :   /* Verify that we get the expected token back.  */
    2805           96 :   const cpp_token *tok = test.get_token ();
    2806           96 :   ASSERT_EQ (tok->type, CPP_PADDING);
    2807              : 
    2808           96 :   tok = test.get_token ();
    2809           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2810           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
    2811              : 
    2812              :   /* We don't support getting the location of a stringified macro
    2813              :      argument.  Verify that it fails gracefully.  */
    2814           96 :   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
    2815              :                                   "cpp_interpret_string_1 failed");
    2816              : 
    2817           96 :   tok = test.get_token ();
    2818           96 :   ASSERT_EQ (tok->type, CPP_PADDING);
    2819              : 
    2820           96 :   tok = test.get_token ();
    2821           96 :   ASSERT_EQ (tok->type, CPP_PADDING);
    2822           96 : }
    2823              : 
    2824              : /* Ensure that we are fail gracefully if something attempts to pass
    2825              :    in a location that isn't a string literal token.  Seen on this code:
    2826              : 
    2827              :      const char a[] = " %d ";
    2828              :      __builtin_printf (a, 0.5);
    2829              :                        ^
    2830              : 
    2831              :    when c-format.cc erroneously used the indicated one-character
    2832              :    location as the format string location, leading to a read past the
    2833              :    end of a string buffer in cpp_interpret_string_1.  */
    2834              : 
    2835              : static void
    2836           96 : test_lexer_string_locations_non_string (const line_table_case &case_)
    2837              : {
    2838              :   /* .....................000000000111111111122222222223.
    2839              :      .....................123456789012345678901234567890.  */
    2840           96 :   const char *content = ("         a\n");
    2841           96 :   lexer_test test (case_, content, NULL);
    2842              : 
    2843              :   /* Verify that we get the expected token back.  */
    2844           96 :   const cpp_token *tok = test.get_token ();
    2845           96 :   ASSERT_EQ (tok->type, CPP_NAME);
    2846           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
    2847              : 
    2848              :   /* At this point, libcpp is attempting to interpret the name as a
    2849              :      string literal, despite it not starting with a quote.  We don't detect
    2850              :      that, but we should at least fail gracefully.  */
    2851           96 :   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
    2852              :                                   "cpp_interpret_string_1 failed");
    2853           96 : }
    2854              : 
    2855              : /* Ensure that we can read substring information for a token which
    2856              :    starts in one linemap and ends in another .  Adapted from
    2857              :    gcc.dg/cpp/pr69985.c.  */
    2858              : 
    2859              : static void
    2860           96 : test_lexer_string_locations_long_line (const line_table_case &case_)
    2861              : {
    2862              :   /* .....................000000.000111111111
    2863              :      .....................123456.789012346789.  */
    2864           96 :   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
    2865              :                          "     \"0123456789012345678901234567890123456789"
    2866              :                          "0123456789012345678901234567890123456789"
    2867              :                          "0123456789012345678901234567890123456789"
    2868              :                          "0123456789\"\n");
    2869              : 
    2870           96 :   lexer_test test (case_, content, NULL);
    2871              : 
    2872              :   /* Verify that we get the expected token back.  */
    2873           96 :   const cpp_token *tok = test.get_token ();
    2874           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2875              : 
    2876           96 :   if (!should_have_column_data_p (line_table->highest_location))
    2877           36 :     return;
    2878              : 
    2879              :   /* Verify ranges of individual characters.  */
    2880           60 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
    2881         7920 :   for (int i = 0; i < 131; i++)
    2882         7860 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2883              :                           i, 2, 7 + i, 7 + i);
    2884           96 : }
    2885              : 
    2886              : /* Test of locations within a raw string that doesn't contain a newline.  */
    2887              : 
    2888              : static void
    2889           96 : test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
    2890              : {
    2891              :   /* .....................00.0000000111111111122.
    2892              :      .....................12.3456789012345678901.  */
    2893           96 :   const char *content = ("R\"foo(0123456789)foo\"\n");
    2894           96 :   lexer_test test (case_, content, NULL);
    2895              : 
    2896              :   /* Verify that we get the expected token back.  */
    2897           96 :   const cpp_token *tok = test.get_token ();
    2898           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2899              : 
    2900              :   /* Verify that cpp_interpret_string works.  */
    2901           96 :   cpp_string dst_string;
    2902           96 :   const enum cpp_ttype type = CPP_STRING;
    2903           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2904              :                                       &dst_string, type);
    2905           96 :   ASSERT_TRUE (result);
    2906           96 :   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
    2907           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2908              : 
    2909           96 :   if (!should_have_column_data_p (line_table->highest_location))
    2910           32 :     return;
    2911              : 
    2912              :   /* 0-9, plus the nil terminator.  */
    2913           64 :   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
    2914          768 :   for (int i = 0; i < 11; i++)
    2915          704 :     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
    2916              :                           i, 1, 7 + i, 7 + i);
    2917           96 : }
    2918              : 
    2919              : /* Test of locations within a raw string that contains a newline.  */
    2920              : 
    2921              : static void
    2922           96 : test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
    2923              : {
    2924              :   /* .....................00.0000.
    2925              :      .....................12.3456.  */
    2926           96 :   const char *content = ("R\"foo(\n"
    2927              :   /* .....................00000.
    2928              :      .....................12345.  */
    2929              :                          "hello\n"
    2930              :                          "world\n"
    2931              :   /* .....................00000.
    2932              :      .....................12345.  */
    2933              :                          ")foo\"\n");
    2934           96 :   lexer_test test (case_, content, NULL);
    2935              : 
    2936              :   /* Verify that we get the expected token back.  */
    2937           96 :   const cpp_token *tok = test.get_token ();
    2938           96 :   ASSERT_EQ (tok->type, CPP_STRING);
    2939              : 
    2940              :   /* Verify that cpp_interpret_string works.  */
    2941           96 :   cpp_string dst_string;
    2942           96 :   const enum cpp_ttype type = CPP_STRING;
    2943           96 :   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
    2944              :                                       &dst_string, type);
    2945           96 :   ASSERT_TRUE (result);
    2946           96 :   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
    2947           96 :   free (const_cast <unsigned char *> (dst_string.text));
    2948              : 
    2949           96 :   if (!should_have_column_data_p (line_table->highest_location))
    2950           36 :     return;
    2951              : 
    2952              :   /* Currently we don't support locations within raw strings that
    2953              :      contain newlines.  */
    2954           60 :   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
    2955              :                                   "range endpoints are on different lines");
    2956           96 : }
    2957              : 
    2958              : /* Test of parsing an unterminated raw string.  */
    2959              : 
    2960              : static void
    2961           96 : test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
    2962              : {
    2963           96 :   const char *content = "R\"ouch()ouCh\" /* etc */";
    2964              : 
    2965           96 :   lexer_diagnostic_sink diagnostics;
    2966           96 :   lexer_test test (case_, content, &diagnostics);
    2967           96 :   test.m_implicitly_expect_EOF = false;
    2968              : 
    2969              :   /* Attempt to parse the raw string.  */
    2970           96 :   const cpp_token *tok = test.get_token ();
    2971           96 :   ASSERT_EQ (tok->type, CPP_EOF);
    2972              : 
    2973           96 :   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
    2974              :   /* We expect the message "unterminated raw string"
    2975              :      in the "cpplib" translation domain.
    2976              :      It's not clear that dgettext is available on all supported hosts,
    2977              :      so this assertion is commented-out for now.
    2978              :        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
    2979              :                      diagnostics.m_diagnostics[0]);
    2980              :   */
    2981           96 : }
    2982              : 
    2983              : /* Test of lexing char constants.  */
    2984              : 
    2985              : static void
    2986           96 : test_lexer_char_constants (const line_table_case &case_)
    2987              : {
    2988              :   /* Various char constants.
    2989              :      .....................0000000001111111111.22222222223.
    2990              :      .....................1234567890123456789.01234567890.  */
    2991           96 :   const char *content = ("         'a'\n"
    2992              :                          "        u'a'\n"
    2993              :                          "        U'a'\n"
    2994              :                          "        L'a'\n"
    2995              :                          "         'abc'\n");
    2996           96 :   lexer_test test (case_, content, NULL);
    2997              : 
    2998              :   /* Verify that we get the expected tokens back.  */
    2999              :   /* 'a'.  */
    3000           96 :   const cpp_token *tok = test.get_token ();
    3001           96 :   ASSERT_EQ (tok->type, CPP_CHAR);
    3002           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
    3003              : 
    3004           96 :   unsigned int chars_seen;
    3005           96 :   int unsignedp;
    3006           96 :   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
    3007              :                                           &chars_seen, &unsignedp);
    3008           96 :   ASSERT_EQ (cc, 'a');
    3009           96 :   ASSERT_EQ (chars_seen, 1);
    3010              : 
    3011              :   /* u'a'.  */
    3012           96 :   tok = test.get_token ();
    3013           96 :   ASSERT_EQ (tok->type, CPP_CHAR16);
    3014           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
    3015              : 
    3016              :   /* U'a'.  */
    3017           96 :   tok = test.get_token ();
    3018           96 :   ASSERT_EQ (tok->type, CPP_CHAR32);
    3019           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
    3020              : 
    3021              :   /* L'a'.  */
    3022           96 :   tok = test.get_token ();
    3023           96 :   ASSERT_EQ (tok->type, CPP_WCHAR);
    3024           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
    3025              : 
    3026              :   /* 'abc' (c-char-sequence).  */
    3027           96 :   tok = test.get_token ();
    3028           96 :   ASSERT_EQ (tok->type, CPP_CHAR);
    3029           96 :   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
    3030           96 : }
    3031              : /* A table of interesting location_t values, giving one axis of our test
    3032              :    matrix.  */
    3033              : 
    3034              : static const location_t boundary_locations[] = {
    3035              :   /* Zero means "don't override the default values for a new line_table".  */
    3036              :   0,
    3037              : 
    3038              :   /* An arbitrary non-zero value that isn't close to one of
    3039              :      the boundary values below.  */
    3040              :   0x10000,
    3041              : 
    3042              :   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
    3043              :   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
    3044              :   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
    3045              :   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
    3046              :   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
    3047              :   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
    3048              : 
    3049              :   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
    3050              :   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x200,
    3051              :   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
    3052              :   LINE_MAP_MAX_LOCATION_WITH_COLS,
    3053              :   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
    3054              :   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x200,
    3055              : };
    3056              : 
    3057              : /* Run TESTCASE multiple times, once for each case in our test matrix.  */
    3058              : 
    3059              : void
    3060          244 : for_each_line_table_case (void (*testcase) (const line_table_case &))
    3061              : {
    3062              :   /* As noted above in the description of struct line_table_case,
    3063              :      we want to explore a test matrix of interesting line_table
    3064              :      situations, running various selftests for each case within the
    3065              :      matrix.  */
    3066              : 
    3067              :   /* Run all tests with:
    3068              :      (a) line_table->default_range_bits == 0, and
    3069              :      (b) line_table->default_range_bits == line_map_suggested_range_bits.  */
    3070              : 
    3071          732 :   for (int default_range_bits: {0, line_map_suggested_range_bits})
    3072              :     {
    3073              :       /* ...and use each of the "interesting" location values as
    3074              :          the starting location within line_table.  */
    3075          488 :       const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
    3076         6344 :       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
    3077              :         {
    3078         5856 :           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
    3079         5856 :           testcase (c);
    3080              :         }
    3081              :     }
    3082          244 : }
    3083              : 
    3084              : /* Verify that when presented with a consecutive pair of locations with
    3085              :    a very large line offset, we don't attempt to consolidate them into
    3086              :    a single ordinary linemap where the line offsets within the line map
    3087              :    would lead to overflow (PR lto/88147).  */
    3088              : 
    3089              : static void
    3090            4 : test_line_offset_overflow ()
    3091              : {
    3092            4 :   line_table_test ltt (line_table_case (5, 0));
    3093              : 
    3094            4 :   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
    3095            4 :   linemap_line_start (line_table, 1, 100);
    3096            4 :   location_t loc_a = linemap_line_start (line_table, 2578, 255);
    3097            4 :   assert_loceq ("foo.c", 2578, 0, loc_a);
    3098              : 
    3099            4 :   const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (line_table);
    3100            4 :   ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13);
    3101            4 :   ASSERT_EQ (ordmap_a->m_range_bits, 5);
    3102              : 
    3103            4 :   location_t loc_b = linemap_line_start (line_table, 404198, 512);
    3104            4 :   assert_loceq ("foo.c", 404198, 0, loc_b);
    3105              : 
    3106              :   /* We should have started a new linemap, rather than attempting to store
    3107              :      a very large line offset.  */
    3108            4 :   const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (line_table);
    3109            4 :   ASSERT_NE (ordmap_a, ordmap_b);
    3110            4 : }
    3111              : 
    3112            4 : void test_cpp_utf8 ()
    3113              : {
    3114            4 :   const int def_tabstop = 8;
    3115            4 :   cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
    3116              : 
    3117              :   /* Verify that wcwidth of invalid UTF-8 or control bytes is 1.  */
    3118            4 :   {
    3119            4 :     int w_bad = cpp_display_width ("\xf0!\x9f!\x98!\x82!", 8, policy);
    3120            4 :     ASSERT_EQ (8, w_bad);
    3121            4 :     int w_ctrl = cpp_display_width ("\r\n\v\0\1", 5, policy);
    3122            4 :     ASSERT_EQ (5, w_ctrl);
    3123              :   }
    3124              : 
    3125              :   /* Verify that wcwidth of valid UTF-8 is as expected.  */
    3126            4 :   {
    3127            4 :     const int w_pi = cpp_display_width ("\xcf\x80", 2, policy);
    3128            4 :     ASSERT_EQ (1, w_pi);
    3129            4 :     const int w_emoji = cpp_display_width ("\xf0\x9f\x98\x82", 4, policy);
    3130            4 :     ASSERT_EQ (2, w_emoji);
    3131            4 :     const int w_umlaut_precomposed = cpp_display_width ("\xc3\xbf", 2,
    3132              :                                                         policy);
    3133            4 :     ASSERT_EQ (1, w_umlaut_precomposed);
    3134            4 :     const int w_umlaut_combining = cpp_display_width ("y\xcc\x88", 3,
    3135              :                                                       policy);
    3136            4 :     ASSERT_EQ (1, w_umlaut_combining);
    3137            4 :     const int w_han = cpp_display_width ("\xe4\xb8\xba", 3, policy);
    3138            4 :     ASSERT_EQ (2, w_han);
    3139            4 :     const int w_ascii = cpp_display_width ("GCC", 3, policy);
    3140            4 :     ASSERT_EQ (3, w_ascii);
    3141            4 :     const int w_mixed = cpp_display_width ("\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
    3142              :                                            "\x9f! \xe4\xb8\xba y\xcc\x88",
    3143              :                                            24, policy);
    3144            4 :     ASSERT_EQ (18, w_mixed);
    3145              :   }
    3146              : 
    3147              :   /* Verify that display width properly expands tabs.  */
    3148            4 :   {
    3149            4 :     const char *tstr = "\tabc\td";
    3150            4 :     ASSERT_EQ (6, cpp_display_width (tstr, 6,
    3151              :                                      cpp_char_column_policy (1, cpp_wcwidth)));
    3152            4 :     ASSERT_EQ (10, cpp_display_width (tstr, 6,
    3153              :                                       cpp_char_column_policy (3, cpp_wcwidth)));
    3154            4 :     ASSERT_EQ (17, cpp_display_width (tstr, 6,
    3155              :                                       cpp_char_column_policy (8, cpp_wcwidth)));
    3156            4 :     ASSERT_EQ (1,
    3157              :                cpp_display_column_to_byte_column
    3158              :                  (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth)));
    3159              :   }
    3160              : 
    3161              :   /* Verify that cpp_byte_column_to_display_column can go past the end,
    3162              :      and similar edge cases.  */
    3163            4 :   {
    3164            4 :     const char *str
    3165              :       /* Display columns.
    3166              :          111111112345  */
    3167              :       = "\xcf\x80 abc";
    3168              :       /* 111122223456
    3169              :          Byte columns.  */
    3170              : 
    3171            4 :     ASSERT_EQ (5, cpp_display_width (str, 6, policy));
    3172            4 :     ASSERT_EQ (105,
    3173              :                cpp_byte_column_to_display_column (str, 6, 106, policy));
    3174            4 :     ASSERT_EQ (10000,
    3175              :                cpp_byte_column_to_display_column (NULL, 0, 10000, policy));
    3176            4 :     ASSERT_EQ (0,
    3177              :                cpp_byte_column_to_display_column (NULL, 10000, 0, policy));
    3178              :   }
    3179              : 
    3180              :   /* Verify that cpp_display_column_to_byte_column can go past the end,
    3181              :      and similar edge cases, and check invertibility.  */
    3182            4 :   {
    3183            4 :     const char *str
    3184              :       /* Display columns.
    3185              :          000000000000000000000000000000000000011
    3186              :          111111112222222234444444455555555678901  */
    3187              :       = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
    3188              :       /* 000000000000000000000000000000000111111
    3189              :          111122223333444456666777788889999012345
    3190              :          Byte columns.  */
    3191            4 :     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy));
    3192            4 :     ASSERT_EQ (15,
    3193              :                cpp_display_column_to_byte_column (str, 15, 11, policy));
    3194            4 :     ASSERT_EQ (115,
    3195              :                cpp_display_column_to_byte_column (str, 15, 111, policy));
    3196            4 :     ASSERT_EQ (10000,
    3197              :                cpp_display_column_to_byte_column (NULL, 0, 10000, policy));
    3198            4 :     ASSERT_EQ (0,
    3199              :                cpp_display_column_to_byte_column (NULL, 10000, 0, policy));
    3200              : 
    3201              :     /* Verify that we do not interrupt a UTF-8 sequence.  */
    3202            4 :     ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy));
    3203              : 
    3204           64 :     for (int byte_col = 1; byte_col <= 15; ++byte_col)
    3205              :       {
    3206           60 :         const int disp_col
    3207           60 :           = cpp_byte_column_to_display_column (str, 15, byte_col, policy);
    3208           60 :         const int byte_col2
    3209           60 :           = cpp_display_column_to_byte_column (str, 15, disp_col, policy);
    3210              : 
    3211              :         /* If we ask for the display column in the middle of a UTF-8
    3212              :            sequence, it will return the length of the partial sequence,
    3213              :            matching the behavior of GCC before display column support.
    3214              :            Otherwise check the round trip was successful.  */
    3215           60 :         if (byte_col < 4)
    3216           12 :           ASSERT_EQ (byte_col, disp_col);
    3217           48 :         else if (byte_col >= 6 && byte_col < 9)
    3218           12 :           ASSERT_EQ (3 + (byte_col - 5), disp_col);
    3219              :         else
    3220           60 :           ASSERT_EQ (byte_col2, byte_col);
    3221              :       }
    3222              :   }
    3223            4 : }
    3224              : 
    3225              : static bool
    3226           36 : check_cpp_valid_utf8_p (const char *str)
    3227              : {
    3228           36 :   return cpp_valid_utf8_p (str, strlen (str));
    3229              : }
    3230              : 
    3231              : /* Check that cpp_valid_utf8_p works as expected.  */
    3232              : 
    3233              : static void
    3234            4 : test_cpp_valid_utf8_p ()
    3235              : {
    3236            4 :   ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
    3237              : 
    3238              :   /* 2-byte char (pi).  */
    3239            4 :   ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
    3240              : 
    3241              :   /* 3-byte chars (the Japanese word "mojibake").  */
    3242            4 :   ASSERT_TRUE (check_cpp_valid_utf8_p
    3243              :                (
    3244              :                 /* U+6587 CJK UNIFIED IDEOGRAPH-6587
    3245              :                    UTF-8: 0xE6 0x96 0x87
    3246              :                    C octal escaped UTF-8: \346\226\207.  */
    3247              :                 "\346\226\207"
    3248              :                 /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
    3249              :                    UTF-8: 0xE5 0xAD 0x97
    3250              :                    C octal escaped UTF-8: \345\255\227.  */
    3251              :                 "\345\255\227"
    3252              :                 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
    3253              :                    UTF-8: 0xE5 0x8C 0x96
    3254              :                    C octal escaped UTF-8: \345\214\226.  */
    3255              :                 "\345\214\226"
    3256              :                 /* U+3051 HIRAGANA LETTER KE
    3257              :                    UTF-8: 0xE3 0x81 0x91
    3258              :                    C octal escaped UTF-8: \343\201\221.  */
    3259              :                 "\343\201\221"));
    3260              : 
    3261              :   /* 4-byte char: an emoji.  */
    3262            4 :   ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
    3263              : 
    3264              :   /* Control codes, including the NUL byte.  */
    3265            4 :   ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", 5));
    3266              : 
    3267            4 :   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
    3268              : 
    3269              :   /* Unexpected continuation bytes.  */
    3270            4 :   for (unsigned char continuation_byte = 0x80;
    3271          260 :        continuation_byte <= 0xbf;
    3272              :        continuation_byte++)
    3273          256 :     ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1));
    3274              : 
    3275              :   /* "Lonely start characters" for 2-byte sequences.  */
    3276            4 :   {
    3277            4 :     unsigned char buf[2];
    3278            4 :     buf[1] = ' ';
    3279            4 :     for (buf[0] = 0xc0;
    3280          132 :          buf[0] <= 0xdf;
    3281          128 :          buf[0]++)
    3282          128 :       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
    3283              :   }
    3284              : 
    3285              :   /* "Lonely start characters" for 3-byte sequences.  */
    3286            4 :   {
    3287            4 :     unsigned char buf[2];
    3288            4 :     buf[1] = ' ';
    3289            4 :     for (buf[0] = 0xe0;
    3290           68 :          buf[0] <= 0xef;
    3291           64 :          buf[0]++)
    3292           64 :       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
    3293              :   }
    3294              : 
    3295              :   /* "Lonely start characters" for 4-byte sequences.  */
    3296            4 :   {
    3297            4 :     unsigned char buf[2];
    3298            4 :     buf[1] = ' ';
    3299            4 :     for (buf[0] = 0xf0;
    3300           24 :          buf[0] <= 0xf4;
    3301           20 :          buf[0]++)
    3302           20 :       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
    3303              :   }
    3304              : 
    3305              :   /* Invalid start characters (formerly valid for 5-byte and 6-byte
    3306              :      sequences).  */
    3307            4 :   {
    3308            4 :     unsigned char buf[2];
    3309            4 :     buf[1] = ' ';
    3310            4 :     for (buf[0] = 0xf5;
    3311           40 :          buf[0] <= 0xfd;
    3312           36 :          buf[0]++)
    3313           36 :       ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2));
    3314              :   }
    3315              : 
    3316              :   /* Impossible bytes.  */
    3317            4 :   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
    3318            4 :   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
    3319            4 :   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
    3320            4 :   ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
    3321            4 : }
    3322              : 
    3323              : /* Run all of the selftests within this file.  */
    3324              : 
    3325              : void
    3326            4 : input_cc_tests ()
    3327              : {
    3328            4 :   test_linenum_comparisons ();
    3329            4 :   test_should_have_column_data_p ();
    3330            4 :   test_unknown_location ();
    3331            4 :   test_builtins ();
    3332            4 :   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
    3333              : 
    3334            4 :   for_each_line_table_case (test_accessing_ordinary_linemaps);
    3335            4 :   for_each_line_table_case (test_lexer);
    3336            4 :   for_each_line_table_case (test_lexer_string_locations_simple);
    3337            4 :   for_each_line_table_case (test_lexer_string_locations_ebcdic);
    3338            4 :   for_each_line_table_case (test_lexer_string_locations_hex);
    3339            4 :   for_each_line_table_case (test_lexer_string_locations_oct);
    3340            4 :   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
    3341            4 :   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
    3342            4 :   for_each_line_table_case (test_lexer_string_locations_ucn4);
    3343            4 :   for_each_line_table_case (test_lexer_string_locations_ucn8);
    3344            4 :   for_each_line_table_case (test_lexer_string_locations_wide_string);
    3345            4 :   for_each_line_table_case (test_lexer_string_locations_string16);
    3346            4 :   for_each_line_table_case (test_lexer_string_locations_string32);
    3347            4 :   for_each_line_table_case (test_lexer_string_locations_u8);
    3348            4 :   for_each_line_table_case (test_lexer_string_locations_utf8_source);
    3349            4 :   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
    3350            4 :   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
    3351            4 :   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
    3352            4 :   for_each_line_table_case (test_lexer_string_locations_macro);
    3353            4 :   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
    3354            4 :   for_each_line_table_case (test_lexer_string_locations_non_string);
    3355            4 :   for_each_line_table_case (test_lexer_string_locations_long_line);
    3356            4 :   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
    3357            4 :   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
    3358            4 :   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
    3359            4 :   for_each_line_table_case (test_lexer_char_constants);
    3360              : 
    3361            4 :   test_line_offset_overflow ();
    3362              : 
    3363            4 :   test_cpp_utf8 ();
    3364            4 :   test_cpp_valid_utf8_p ();
    3365            4 : }
    3366              : 
    3367              : } // namespace selftest
    3368              : 
    3369              : #endif /* CHECKING_P */
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.