LCOV - gcc.info - gcc/json-parsing.cc

LCOV - code coverage report

Current view:	top level - gcc - json-parsing.cc (source / functions)		Coverage	Total	Hit
Test:	gcc.info	Lines:	85.4 %	1191	1017
Test Date:	2024-12-28 13:16:48	Functions:	83.6 %	55	46
Legend:	Lines: hit not hit \| Branches: + taken - not taken # not executed	Branches:	-	0	0

             Branch data     Line data    Source code

       1                 :             : /* JSON parsing
       2                 :             :    Copyright (C) 2017-2024 Free Software Foundation, Inc.
       3                 :             :    Contributed by David Malcolm <dmalcolm@redhat.com>.
       4                 :             : 
       5                 :             : This file is part of GCC.
       6                 :             : 
       7                 :             : GCC is free software; you can redistribute it and/or modify it under
       8                 :             : the terms of the GNU General Public License as published by the Free
       9                 :             : Software Foundation; either version 3, or (at your option) any later
      10                 :             : version.
      11                 :             : 
      12                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15                 :             : for more details.
      16                 :             : 
      17                 :             : You should have received a copy of the GNU General Public License
      18                 :             : along with GCC; see the file COPYING3.  If not see
      19                 :             : <http://www.gnu.org/licenses/>.  */
      20                 :             : 
      21                 :             : #include "config.h"
      22                 :             : #include "system.h"
      23                 :             : #include "coretypes.h"
      24                 :             : #include "json-parsing.h"
      25                 :             : #include "pretty-print.h"
      26                 :             : #include "math.h"
      27                 :             : #include "make-unique.h"
      28                 :             : #include "selftest.h"
      29                 :             : 
      30                 :             : using namespace json;
      31                 :             : 
      32                 :             : /* Declarations relating to parsing JSON, all within an
      33                 :             :    anonymous namespace.  */
      34                 :             : 
      35                 :             : namespace {
      36                 :             : 
      37                 :             : /* A typedef representing a single unicode character.  */
      38                 :             : 
      39                 :             : typedef unsigned unichar;
      40                 :             : 
      41                 :             : /* An enum for discriminating different kinds of JSON token.  */
      42                 :             : 
      43                 :             : enum token_id
      44                 :             : {
      45                 :             :   TOK_ERROR,
      46                 :             : 
      47                 :             :   TOK_EOF,
      48                 :             : 
      49                 :             :   /* Punctuation.  */
      50                 :             :   TOK_OPEN_SQUARE,
      51                 :             :   TOK_OPEN_CURLY,
      52                 :             :   TOK_CLOSE_SQUARE,
      53                 :             :   TOK_CLOSE_CURLY,
      54                 :             :   TOK_COLON,
      55                 :             :   TOK_COMMA,
      56                 :             : 
      57                 :             :   /* Literal names.  */
      58                 :             :   TOK_TRUE,
      59                 :             :   TOK_FALSE,
      60                 :             :   TOK_NULL,
      61                 :             : 
      62                 :             :   TOK_STRING,
      63                 :             :   TOK_FLOAT_NUMBER,
      64                 :             :   TOK_INTEGER_NUMBER
      65                 :             : };
      66                 :             : 
      67                 :             : /* Human-readable descriptions of enum token_id.  */
      68                 :             : 
      69                 :             : static const char *token_id_name[] = {
      70                 :             :   "error",
      71                 :             :   "EOF",
      72                 :             :   "'['",
      73                 :             :   "'{'",
      74                 :             :   "']'",
      75                 :             :   "'}'",
      76                 :             :   "':'",
      77                 :             :   "','",
      78                 :             :   "'true'",
      79                 :             :   "'false'",
      80                 :             :   "'null'",
      81                 :             :   "string",
      82                 :             :   "number",
      83                 :             :   "number"
      84                 :             : };
      85                 :             : 
      86                 :             : /* Tokens within the JSON lexer.  */
      87                 :             : 
      88                 :             : struct token
      89                 :             : {
      90                 :             :   /* The kind of token.  */
      91                 :             :   enum token_id id;
      92                 :             : 
      93                 :             :   /* The location of this token within the unicode
      94                 :             :      character stream.  */
      95                 :             :   location_map::range range;
      96                 :             : 
      97                 :             :   union
      98                 :             :   {
      99                 :             :     /* Value for TOK_ERROR and TOK_STRING.  */
     100                 :             :     char *string;
     101                 :             : 
     102                 :             :     /* Value for TOK_FLOAT_NUMBER.  */
     103                 :             :     double float_number;
     104                 :             : 
     105                 :             :     /* Value for TOK_INTEGER_NUMBER.  */
     106                 :             :     long integer_number;
     107                 :             :   } u;
     108                 :             : };
     109                 :             : 
     110                 :             : /* A class for lexing JSON.  */
     111                 :             : 
     112                 :             : class lexer
     113                 :             : {
     114                 :             :  public:
     115                 :             :   lexer (bool support_comments);
     116                 :             :   ~lexer ();
     117                 :             : 
     118                 :             :   std::unique_ptr<error> add_utf8 (size_t length, const char *utf8_buf);
     119                 :             : 
     120                 :             :   const token *peek ();
     121                 :             : 
     122                 :             :   void consume ();
     123                 :             : 
     124                 :             :  private:
     125                 :             :   bool get_char (unichar &out_char, location_map::point *out_point);
     126                 :             :   void unget_char ();
     127                 :             :   location_map::point get_next_point () const;
     128                 :             :   static void dump_token (FILE *outf, const token *tok);
     129                 :             :   void lex_token (token *out);
     130                 :             :   void lex_string (token *out);
     131                 :             :   void lex_number (token *out, unichar first_char);
     132                 :             :   bool rest_of_literal (token *out, const char *suffix);
     133                 :             :   std::unique_ptr<error> make_error (const char *msg);
     134                 :             :   bool consume_single_line_comment (token *out);
     135                 :             :   bool consume_multiline_comment (token *out);
     136                 :             : 
     137                 :             :  private:
     138                 :             :   auto_vec<unichar> m_buffer;
     139                 :             :   int m_next_char_idx;
     140                 :             :   int m_next_char_line;
     141                 :             :   int m_next_char_column;
     142                 :             :   int m_prev_line_final_column; /* for handling unget_char after a '\n'.  */
     143                 :             : 
     144                 :             :   static const int MAX_TOKENS = 1;
     145                 :             :   token m_next_tokens[MAX_TOKENS];
     146                 :             :   int m_num_next_tokens;
     147                 :             : 
     148                 :             :   bool m_support_comments;
     149                 :             : };
     150                 :             : 
     151                 :             : /* A class for parsing JSON.  */
     152                 :             : 
     153                 :             : class parser
     154                 :             : {
     155                 :             :  public:
     156                 :             :   parser (location_map *out_loc_map,
     157                 :             :           bool support_comments);
     158                 :             :   ~parser ();
     159                 :             : 
     160                 :             :   std::unique_ptr<error>
     161                 :             :   add_utf8 (size_t length, const char *utf8_buf);
     162                 :             : 
     163                 :             :   parser_result_t parse_value (int depth);
     164                 :             :   parser_result_t parse_object (int depth);
     165                 :             :   parser_result_t parse_array (int depth);
     166                 :             : 
     167                 :             :   std::unique_ptr<error>
     168                 :             :   require_eof ();
     169                 :             : 
     170                 :             :  private:
     171                 :             :   location_map::point get_next_token_start ();
     172                 :             :   location_map::point get_next_token_end ();
     173                 :             : 
     174                 :             :   std::unique_ptr<error>
     175                 :             :   require (enum token_id tok_id);
     176                 :             : 
     177                 :             :   result<enum token_id, std::unique_ptr<error>>
     178                 :             :   require_one_of (enum token_id tok_id_a, enum token_id tok_id_b);
     179                 :             : 
     180                 :             :   std::unique_ptr<error>
     181                 :             :   error_at (const location_map::range &r,
     182                 :             :             const char *fmt, ...) ATTRIBUTE_PRINTF_3;
     183                 :             : 
     184                 :             :   void maybe_record_range (json::value *jv, const location_map::range &r);
     185                 :             :   void maybe_record_range (json::value *jv,
     186                 :             :                            const location_map::point &start,
     187                 :             :                            const location_map::point &end);
     188                 :             : 
     189                 :             :  private:
     190                 :             :   lexer m_lexer;
     191                 :             :   location_map *m_loc_map;
     192                 :             : };
     193                 :             : 
     194                 :             : } // anonymous namespace for parsing implementation
     195                 :             : 
     196                 :             : /* Parser implementation.  */
     197                 :             : 
     198                 :             : /* lexer's ctor.  */
     199                 :             : 
     200                 :         112 : lexer::lexer (bool support_comments)
     201                 :         112 : : m_buffer (), m_next_char_idx (0),
     202                 :         112 :   m_next_char_line (1), m_next_char_column (0),
     203                 :         112 :   m_prev_line_final_column (-1),
     204                 :         112 :   m_num_next_tokens (0),
     205                 :         112 :   m_support_comments (support_comments)
     206                 :             : {
     207                 :           0 : }
     208                 :             : 
     209                 :             : /* lexer's dtor.  */
     210                 :             : 
     211                 :         112 : lexer::~lexer ()
     212                 :             : {
     213                 :         132 :   while (m_num_next_tokens > 0)
     214                 :          20 :     consume ();
     215                 :         112 : }
     216                 :             : 
     217                 :             : /* Peek the next token.  */
     218                 :             : 
     219                 :             : const token *
     220                 :         688 : lexer::peek ()
     221                 :             : {
     222                 :         688 :   if (m_num_next_tokens == 0)
     223                 :             :     {
     224                 :         496 :       lex_token (&m_next_tokens[0]);
     225                 :         496 :       m_num_next_tokens++;
     226                 :             :     }
     227                 :         688 :   return &m_next_tokens[0];
     228                 :             : }
     229                 :             : 
     230                 :             : /* Consume the next token.  */
     231                 :             : 
     232                 :             : void
     233                 :         496 : lexer::consume ()
     234                 :             : {
     235                 :         496 :   if (m_num_next_tokens == 0)
     236                 :           0 :     peek ();
     237                 :             : 
     238                 :         496 :   gcc_assert (m_num_next_tokens > 0);
     239                 :         496 :   gcc_assert (m_num_next_tokens <= MAX_TOKENS);
     240                 :             : 
     241                 :         496 :   if (0)
     242                 :             :     {
     243                 :             :       fprintf (stderr, "consuming token: ");
     244                 :             :       dump_token (stderr, &m_next_tokens[0]);
     245                 :             :       fprintf (stderr, "\n");
     246                 :             :     }
     247                 :             : 
     248                 :         496 :   if (m_next_tokens[0].id == TOK_ERROR
     249                 :         496 :       || m_next_tokens[0].id == TOK_STRING)
     250                 :          80 :     free (m_next_tokens[0].u.string);
     251                 :             : 
     252                 :         496 :   m_num_next_tokens--;
     253                 :         496 :   memmove (&m_next_tokens[0], &m_next_tokens[1],
     254                 :         496 :            sizeof (token) * m_num_next_tokens);
     255                 :         496 : }
     256                 :             : 
     257                 :             : /* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this lexer's
     258                 :             :    buffer.
     259                 :             :    Return null if successful, or the error if there was a problem.  */
     260                 :             : 
     261                 :             : std::unique_ptr<error>
     262                 :         112 : lexer::add_utf8 (size_t length, const char *utf8_buf)
     263                 :             : {
     264                 :             :   /* Adapted from charset.c:one_utf8_to_cppchar.  */
     265                 :         112 :   static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
     266                 :         112 :   static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
     267                 :             : 
     268                 :         112 :   const uchar *inbuf = (const unsigned char *) (utf8_buf);
     269                 :         112 :   const uchar **inbufp = &inbuf;
     270                 :         112 :   size_t *inbytesleftp = &length;
     271                 :             : 
     272                 :        2684 :   while (length > 0)
     273                 :             :     {
     274                 :        2572 :       unichar c;
     275                 :        2572 :       const uchar *inbuf = *inbufp;
     276                 :        2572 :       size_t nbytes, i;
     277                 :             : 
     278                 :        2572 :       c = *inbuf;
     279                 :        2572 :       if (c < 0x80)
     280                 :             :         {
     281                 :        2556 :           m_buffer.safe_push (c);
     282                 :        2556 :           *inbytesleftp -= 1;
     283                 :        2556 :           *inbufp += 1;
     284                 :        2556 :           continue;
     285                 :             :         }
     286                 :             : 
     287                 :             :       /* The number of leading 1-bits in the first byte indicates how many
     288                 :             :          bytes follow.  */
     289                 :          32 :       for (nbytes = 2; nbytes < 7; nbytes++)
     290                 :          32 :         if ((c & ~masks[nbytes-1]) == patns[nbytes-1])
     291                 :          16 :           goto found;
     292                 :           0 :       return make_error ("ill-formed UTF-8 sequence");
     293                 :          16 :     found:
     294                 :             : 
     295                 :          16 :       if (*inbytesleftp < nbytes)
     296                 :           0 :         return make_error ("ill-formed UTF-8 sequence");
     297                 :             : 
     298                 :          16 :       c = (c & masks[nbytes-1]);
     299                 :          16 :       inbuf++;
     300                 :          48 :       for (i = 1; i < nbytes; i++)
     301                 :             :         {
     302                 :          32 :           unichar n = *inbuf++;
     303                 :          32 :           if ((n & 0xC0) != 0x80)
     304                 :           0 :             return make_error ("ill-formed UTF-8 sequence");
     305                 :          32 :           c = ((c << 6) + (n & 0x3F));
     306                 :             :         }
     307                 :             : 
     308                 :             :       /* Make sure the shortest possible encoding was used.  */
     309                 :          16 :       if ((   c <=      0x7F && nbytes > 1)
     310                 :          16 :           || (c <=     0x7FF && nbytes > 2)
     311                 :          16 :           || (c <=    0xFFFF && nbytes > 3)
     312                 :          16 :           || (c <=  0x1FFFFF && nbytes > 4)
     313                 :          16 :           || (c <= 0x3FFFFFF && nbytes > 5))
     314                 :           0 :         return make_error ("ill-formed UTF-8:"
     315                 :           0 :                            " shortest possible encoding not used");
     316                 :             : 
     317                 :             :       /* Make sure the character is valid.  */
     318                 :          16 :       if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF))
     319                 :           0 :         return make_error ("ill-formed UTF-8: invalid character");
     320                 :             : 
     321                 :          16 :       m_buffer.safe_push (c);
     322                 :          16 :       *inbufp = inbuf;
     323                 :          16 :       *inbytesleftp -= nbytes;
     324                 :             :     }
     325                 :         112 :   return nullptr;
     326                 :             : }
     327                 :             : 
     328                 :             : /* Attempt to get the next unicode character from this lexer's buffer.
     329                 :             :    If successful, write it to OUT_CHAR, and its location to *OUT_POINT,
     330                 :             :    and return true.
     331                 :             :    Otherwise, return false.  */
     332                 :             : 
     333                 :             : bool
     334                 :        2472 : lexer::get_char (unichar &out_char, location_map::point *out_point)
     335                 :             : {
     336                 :        4940 :   if (m_next_char_idx >= (int)m_buffer.length ())
     337                 :             :     return false;
     338                 :             : 
     339                 :        2344 :   if (out_point)
     340                 :        1944 :     *out_point = get_next_point ();
     341                 :        2344 :   out_char = m_buffer[m_next_char_idx++];
     342                 :             : 
     343                 :        2344 :   if (out_char == '\n')
     344                 :             :     {
     345                 :          64 :       m_next_char_line++;
     346                 :          64 :       m_prev_line_final_column = m_next_char_column;
     347                 :          64 :       m_next_char_column = 0;
     348                 :             :     }
     349                 :             :   else
     350                 :        2280 :     m_next_char_column++;
     351                 :             : 
     352                 :             :   return true;
     353                 :             : }
     354                 :             : 
     355                 :             : /* Undo the last successful get_char.  */
     356                 :             : 
     357                 :             : void
     358                 :         368 : lexer::unget_char ()
     359                 :             : {
     360                 :         368 :   --m_next_char_idx;
     361                 :         368 :   if (m_next_char_column > 0)
     362                 :         352 :     --m_next_char_column;
     363                 :             :   else
     364                 :             :     {
     365                 :          16 :       m_next_char_line--;
     366                 :          16 :       m_next_char_column = m_prev_line_final_column;
     367                 :             :       /* We don't support more than one unget_char in a row.  */
     368                 :          16 :       gcc_assert (m_prev_line_final_column != -1);
     369                 :          16 :       m_prev_line_final_column = -1;
     370                 :             :     }
     371                 :         368 : }
     372                 :             : 
     373                 :             : /* Get the location of the next char.  */
     374                 :             : 
     375                 :             : location_map::point
     376                 :        2028 : lexer::get_next_point () const
     377                 :             : {
     378                 :        2028 :   location_map::point result;
     379                 :        2028 :   result.m_unichar_idx = m_next_char_idx;
     380                 :        2028 :   result.m_line = m_next_char_line;
     381                 :        2028 :   result.m_column = m_next_char_column;
     382                 :        1944 :   return result;
     383                 :             : }
     384                 :             : 
     385                 :             : /* Print a textual representation of TOK to OUTF.
     386                 :             :    This is intended for debugging the lexer and parser,
     387                 :             :    rather than for user-facing output.  */
     388                 :             : 
     389                 :             : void
     390                 :           0 : lexer::dump_token (FILE *outf, const token *tok)
     391                 :             : {
     392                 :           0 :   switch (tok->id)
     393                 :             :     {
     394                 :           0 :     case TOK_ERROR:
     395                 :           0 :       fprintf (outf, "TOK_ERROR (\"%s\")", tok->u.string);
     396                 :           0 :       break;
     397                 :             : 
     398                 :           0 :     case TOK_EOF:
     399                 :           0 :       fprintf (outf, "TOK_EOF");
     400                 :           0 :       break;
     401                 :             : 
     402                 :           0 :     case TOK_OPEN_SQUARE:
     403                 :           0 :       fprintf (outf, "TOK_OPEN_SQUARE");
     404                 :           0 :       break;
     405                 :             : 
     406                 :           0 :     case TOK_OPEN_CURLY:
     407                 :           0 :       fprintf (outf, "TOK_OPEN_CURLY");
     408                 :           0 :       break;
     409                 :             : 
     410                 :           0 :     case TOK_CLOSE_SQUARE:
     411                 :           0 :       fprintf (outf, "TOK_CLOSE_SQUARE");
     412                 :           0 :       break;
     413                 :             : 
     414                 :           0 :     case TOK_CLOSE_CURLY:
     415                 :           0 :       fprintf (outf, "TOK_CLOSE_CURLY");
     416                 :           0 :       break;
     417                 :             : 
     418                 :           0 :     case TOK_COLON:
     419                 :           0 :       fprintf (outf, "TOK_COLON");
     420                 :           0 :       break;
     421                 :             : 
     422                 :           0 :     case TOK_COMMA:
     423                 :           0 :       fprintf (outf, "TOK_COMMA");
     424                 :           0 :       break;
     425                 :             : 
     426                 :           0 :     case TOK_TRUE:
     427                 :           0 :       fprintf (outf, "TOK_TRUE");
     428                 :           0 :       break;
     429                 :             : 
     430                 :           0 :     case TOK_FALSE:
     431                 :           0 :       fprintf (outf, "TOK_FALSE");
     432                 :           0 :       break;
     433                 :             : 
     434                 :           0 :     case TOK_NULL:
     435                 :           0 :       fprintf (outf, "TOK_NULL");
     436                 :           0 :       break;
     437                 :             : 
     438                 :           0 :     case TOK_STRING:
     439                 :           0 :       fprintf (outf, "TOK_STRING (\"%s\")", tok->u.string);
     440                 :           0 :       break;
     441                 :             : 
     442                 :           0 :     case TOK_FLOAT_NUMBER:
     443                 :           0 :       fprintf (outf, "TOK_FLOAT_NUMBER (%f)", tok->u.float_number);
     444                 :           0 :       break;
     445                 :             : 
     446                 :           0 :     case TOK_INTEGER_NUMBER:
     447                 :           0 :       fprintf (outf, "TOK_INTEGER_NUMBER (%ld)", tok->u.integer_number);
     448                 :           0 :       break;
     449                 :             : 
     450                 :           0 :     default:
     451                 :           0 :       gcc_unreachable ();
     452                 :           0 :       break;
     453                 :             :     }
     454                 :           0 : }
     455                 :             : 
     456                 :             : /* Treat "//" as a comment to the end of the line.
     457                 :             : 
     458                 :             :    This isn't compliant with the JSON spec,
     459                 :             :    but is very handy for writing DejaGnu tests.
     460                 :             : 
     461                 :             :    Return true if EOF and populate *OUT, false otherwise.  */
     462                 :             : 
     463                 :             : bool
     464                 :          16 : lexer::consume_single_line_comment (token *out)
     465                 :             : {
     466                 :         208 :   while (1)
     467                 :             :     {
     468                 :         112 :       unichar next_char;
     469                 :         112 :       if (!get_char (next_char, nullptr))
     470                 :             :         {
     471                 :           0 :           out->id = TOK_EOF;
     472                 :           0 :           location_map::point p = get_next_point ();
     473                 :           0 :           out->range.m_start = p;
     474                 :           0 :           out->range.m_end = p;
     475                 :           0 :           return true;
     476                 :             :         }
     477                 :         112 :       if (next_char == '\n')
     478                 :             :         return false;
     479                 :          96 :     }
     480                 :             : }
     481                 :             : 
     482                 :             : /* Treat '/' '*' as a multiline comment until the next closing '*' '/'.
     483                 :             : 
     484                 :             :    This isn't compliant with the JSON spec,
     485                 :             :    but is very handy for writing DejaGnu tests.
     486                 :             : 
     487                 :             :    Return true if EOF and populate *OUT, false otherwise.  */
     488                 :             : 
     489                 :             : bool
     490                 :          12 : lexer::consume_multiline_comment (token *out)
     491                 :             : {
     492                 :         276 :   while (1)
     493                 :             :     {
     494                 :         276 :       unichar next_char;
     495                 :         276 :       if (!get_char (next_char, nullptr))
     496                 :             :         {
     497                 :           0 :           out->id = TOK_ERROR;
     498                 :           0 :           gcc_unreachable (); // TODO
     499                 :             :           location_map::point p = get_next_point ();
     500                 :             :           out->range.m_start = p;
     501                 :             :           out->range.m_end = p;
     502                 :             :           return true;
     503                 :             :         }
     504                 :         276 :       if (next_char != '*')
     505                 :         264 :         continue;
     506                 :          12 :       if (!get_char (next_char, nullptr))
     507                 :             :         {
     508                 :           0 :           out->id = TOK_ERROR;
     509                 :           0 :           gcc_unreachable (); // TODO
     510                 :             :           location_map::point p = get_next_point ();
     511                 :             :           out->range.m_start = p;
     512                 :             :           out->range.m_end = p;
     513                 :             :           return true;
     514                 :             :         }
     515                 :          12 :       if (next_char == '/')
     516                 :          12 :         return false;
     517                 :             :     }
     518                 :             : }
     519                 :             : 
     520                 :             : /* Attempt to lex the input buffer, writing the next token to OUT.
     521                 :             :    On errors, TOK_ERROR (or TOK_EOF) is written to OUT.  */
     522                 :             : 
     523                 :             : void
     524                 :         496 : lexer::lex_token (token *out)
     525                 :             : {
     526                 :             :   /* Skip to next non-whitespace char.  */
     527                 :         940 :   unichar next_char;
     528                 :         940 :   location_map::point start_point;
     529                 :         940 :   while (1)
     530                 :             :     {
     531                 :         940 :       if (!get_char (next_char, &start_point))
     532                 :             :         {
     533                 :          84 :           out->id = TOK_EOF;
     534                 :          84 :           location_map::point p = get_next_point ();
     535                 :          84 :           out->range.m_start = p;
     536                 :          84 :           out->range.m_end = p;
     537                 :          84 :           return;
     538                 :             :         }
     539                 :         856 :       if (m_support_comments)
     540                 :         160 :         if (next_char == '/')
     541                 :             :           {
     542                 :          28 :             location_map::point point;
     543                 :          28 :             unichar next_next_char;
     544                 :          28 :             if (get_char (next_next_char, &point))
     545                 :             :               {
     546                 :          28 :                 switch (next_next_char)
     547                 :             :                   {
     548                 :          16 :                   case '/':
     549                 :          16 :                     if (consume_single_line_comment (out))
     550                 :           0 :                       return;
     551                 :          28 :                     continue;
     552                 :          12 :                   case '*':
     553                 :          12 :                     if (consume_multiline_comment (out))
     554                 :             :                       return;
     555                 :          12 :                     continue;
     556                 :           0 :                   default:
     557                 :             :                     /* A stray single '/'.  Break out of loop, so that we
     558                 :             :                        handle it below as an unexpected character.  */
     559                 :           0 :                     goto non_whitespace;
     560                 :             :                   }
     561                 :             :               }
     562                 :             :           }
     563                 :         828 :       if (next_char != ' '
     564                 :         828 :           && next_char != '\t'
     565                 :         432 :           && next_char != '\n'
     566                 :         412 :           && next_char != '\r')
     567                 :             :         break;
     568                 :             :     }
     569                 :             : 
     570                 :         412 :  non_whitespace:
     571                 :             : 
     572                 :         412 :   out->range.m_start = start_point;
     573                 :         412 :   out->range.m_end = start_point;
     574                 :             : 
     575                 :         412 :   switch (next_char)
     576                 :             :     {
     577                 :          20 :     case '[':
     578                 :          20 :       out->id = TOK_OPEN_SQUARE;
     579                 :          20 :       break;
     580                 :             : 
     581                 :          20 :     case '{':
     582                 :          20 :       out->id = TOK_OPEN_CURLY;
     583                 :          20 :       break;
     584                 :             : 
     585                 :          16 :     case ']':
     586                 :          16 :       out->id = TOK_CLOSE_SQUARE;
     587                 :          16 :       break;
     588                 :             : 
     589                 :          16 :     case '}':
     590                 :          16 :       out->id = TOK_CLOSE_CURLY;
     591                 :          16 :       break;
     592                 :             : 
     593                 :          28 :     case ':':
     594                 :          28 :       out->id = TOK_COLON;
     595                 :          28 :       break;
     596                 :             : 
     597                 :          64 :     case ',':
     598                 :          64 :       out->id = TOK_COMMA;
     599                 :          64 :       break;
     600                 :             : 
     601                 :          64 :     case '"':
     602                 :          64 :       lex_string (out);
     603                 :          64 :       break;
     604                 :             : 
     605                 :         140 :     case '-':
     606                 :         140 :     case '0':
     607                 :         140 :     case '1':
     608                 :         140 :     case '2':
     609                 :         140 :     case '3':
     610                 :         140 :     case '4':
     611                 :         140 :     case '5':
     612                 :         140 :     case '6':
     613                 :         140 :     case '7':
     614                 :         140 :     case '8':
     615                 :         140 :     case '9':
     616                 :         140 :       lex_number (out, next_char);
     617                 :         140 :       break;
     618                 :             : 
     619                 :           8 :     case 't':
     620                 :             :       /* Handle literal "true".  */
     621                 :           8 :       if (rest_of_literal (out, "rue"))
     622                 :             :         {
     623                 :           8 :           out->id = TOK_TRUE;
     624                 :           8 :           break;
     625                 :             :         }
     626                 :             :       else
     627                 :           0 :         goto err;
     628                 :             : 
     629                 :           8 :     case 'f':
     630                 :             :       /* Handle literal "false".  */
     631                 :           8 :       if (rest_of_literal (out, "alse"))
     632                 :             :         {
     633                 :           8 :           out->id = TOK_FALSE;
     634                 :           8 :           break;
     635                 :             :         }
     636                 :             :       else
     637                 :           0 :         goto err;
     638                 :             : 
     639                 :          16 :     case 'n':
     640                 :             :       /* Handle literal "null".  */
     641                 :          16 :       if (rest_of_literal (out, "ull"))
     642                 :             :         {
     643                 :          12 :           out->id = TOK_NULL;
     644                 :          12 :           break;
     645                 :             :         }
     646                 :             :       else
     647                 :           4 :         goto err;
     648                 :             : 
     649                 :          16 :     err:
     650                 :          16 :     default:
     651                 :          16 :       out->id = TOK_ERROR;
     652                 :          16 :       out->u.string = xasprintf ("unexpected character: '%c'", next_char);
     653                 :          16 :       break;
     654                 :             :     }
     655                 :             : }
     656                 :             : 
     657                 :             : /* Having consumed an open-quote character from the lexer's buffer, attempt
     658                 :             :    to lex the rest of a JSON string, writing the result to OUT (or TOK_ERROR)
     659                 :             :    if an error occurred.
     660                 :             :    (ECMA-404 section 9; RFC 7159 section 7).  */
     661                 :             : 
     662                 :             : void
     663                 :          64 : lexer::lex_string (token *out)
     664                 :             : {
     665                 :          64 :   auto_vec<unichar> content;
     666                 :          64 :   bool still_going = true;
     667                 :         456 :   while (still_going)
     668                 :             :     {
     669                 :         392 :       unichar uc;
     670                 :         392 :       if (!get_char (uc, &out->range.m_end))
     671                 :             :         {
     672                 :           0 :           out->id = TOK_ERROR;
     673                 :           0 :           out->range.m_end = get_next_point ();
     674                 :           0 :           out->u.string = xstrdup ("EOF within string");
     675                 :           0 :           return;
     676                 :             :         }
     677                 :         392 :       switch (uc)
     678                 :             :         {
     679                 :             :         case '"':
     680                 :             :           still_going = false;
     681                 :             :           break;
     682                 :          24 :         case '\\':
     683                 :          24 :           {
     684                 :          24 :             unichar next_char;
     685                 :          24 :             if (!get_char (next_char, &out->range.m_end))
     686                 :             :               {
     687                 :           0 :                 out->id = TOK_ERROR;
     688                 :           0 :                 out->range.m_end = get_next_point ();
     689                 :           0 :                 out->u.string = xstrdup ("EOF within string");;
     690                 :           0 :                 return;
     691                 :             :               }
     692                 :          24 :             switch (next_char)
     693                 :             :               {
     694                 :           8 :               case '"':
     695                 :           8 :               case '\\':
     696                 :           8 :               case '/':
     697                 :           8 :                 content.safe_push (next_char);
     698                 :           8 :                 break;
     699                 :             : 
     700                 :           0 :               case 'b':
     701                 :           0 :                 content.safe_push ('\b');
     702                 :           0 :                 break;
     703                 :             : 
     704                 :           0 :               case 'f':
     705                 :           0 :                 content.safe_push ('\f');
     706                 :           0 :                 break;
     707                 :             : 
     708                 :           0 :               case 'n':
     709                 :           0 :                 content.safe_push ('\n');
     710                 :           0 :                 break;
     711                 :             : 
     712                 :           0 :               case 'r':
     713                 :           0 :                 content.safe_push ('\r');
     714                 :           0 :                 break;
     715                 :             : 
     716                 :           0 :               case 't':
     717                 :           0 :                 content.safe_push ('\t');
     718                 :           0 :                 break;
     719                 :             : 
     720                 :          16 :               case 'u':
     721                 :          16 :                 {
     722                 :          16 :                   unichar result = 0;
     723                 :          80 :                   for (int i = 0; i < 4; i++)
     724                 :             :                     {
     725                 :          64 :                       unichar hexdigit;
     726                 :          64 :                       if (!get_char (hexdigit, &out->range.m_end))
     727                 :             :                         {
     728                 :           0 :                           out->id = TOK_ERROR;
     729                 :           0 :                           out->range.m_end = get_next_point ();
     730                 :           0 :                           out->u.string = xstrdup ("EOF within string");
     731                 :           0 :                           return;
     732                 :             :                         }
     733                 :          64 :                       result <<= 4;
     734                 :          64 :                       if (hexdigit >= '0' && hexdigit <= '9')
     735                 :          60 :                         result += hexdigit - '0';
     736                 :           4 :                       else if (hexdigit >= 'a' && hexdigit <= 'f')
     737                 :           4 :                         result += (hexdigit - 'a') + 10;
     738                 :           0 :                       else if (hexdigit >= 'A' && hexdigit <= 'F')
     739                 :           0 :                         result += (hexdigit - 'A') + 10;
     740                 :             :                       else
     741                 :             :                         {
     742                 :           0 :                           out->id = TOK_ERROR;
     743                 :           0 :                           out->range.m_start = out->range.m_end;
     744                 :           0 :                           out->u.string = xstrdup ("bogus hex char");
     745                 :           0 :                           return;
     746                 :             :                         }
     747                 :             :                     }
     748                 :          16 :                   content.safe_push (result);
     749                 :             :                 }
     750                 :          16 :                 break;
     751                 :             : 
     752                 :           0 :               default:
     753                 :           0 :                 out->id = TOK_ERROR;
     754                 :           0 :                 out->u.string = xstrdup ("unrecognized escape char");
     755                 :           0 :                 return;
     756                 :             :               }
     757                 :             :           }
     758                 :          24 :           break;
     759                 :             : 
     760                 :         304 :         default:
     761                 :             :           /* Reject unescaped control characters U+0000 through U+001F
     762                 :             :              (ECMA-404 section 9 para 1; RFC 7159 section 7 para 1).  */
     763                 :         304 :           if (uc <= 0x1f)
     764                 :             :             {
     765                 :           0 :                 out->id = TOK_ERROR;
     766                 :           0 :                 out->range.m_start = out->range.m_end;
     767                 :           0 :                 out->u.string = xstrdup ("unescaped control char");
     768                 :           0 :                 return;
     769                 :             :             }
     770                 :             : 
     771                 :             :           /* Otherwise, add regular unicode code point.  */
     772                 :         304 :           content.safe_push (uc);
     773                 :         304 :           break;
     774                 :             :         }
     775                 :             :     }
     776                 :             : 
     777                 :          64 :   out->id = TOK_STRING;
     778                 :             : 
     779                 :         128 :   auto_vec<char> utf8_buf;
     780                 :             :   // Adapted from libcpp/charset.c:one_cppchar_to_utf8
     781                 :         392 :   for (unsigned i = 0; i < content.length (); i++)
     782                 :             :     {
     783                 :         328 :       static const uchar masks[6] =  { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
     784                 :         328 :       static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE };
     785                 :         328 :       size_t nbytes;
     786                 :         328 :       uchar buf[6], *p = &buf[6];
     787                 :         328 :       unichar c = content[i];
     788                 :             : 
     789                 :         328 :       nbytes = 1;
     790                 :         328 :       if (c < 0x80)
     791                 :         296 :         *--p = c;
     792                 :             :       else
     793                 :             :         {
     794                 :          64 :           do
     795                 :             :             {
     796                 :          64 :               *--p = ((c & 0x3F) | 0x80);
     797                 :          64 :               c >>= 6;
     798                 :          64 :               nbytes++;
     799                 :             :             }
     800                 :          64 :           while (c >= 0x3F || (c & limits[nbytes-1]));
     801                 :          32 :           *--p = (c | masks[nbytes-1]);
     802                 :             :         }
     803                 :             : 
     804                 :         720 :       while (p < &buf[6])
     805                 :         392 :         utf8_buf.safe_push (*p++);
     806                 :             :     }
     807                 :             : 
     808                 :         128 :   out->u.string = XNEWVEC (char, utf8_buf.length () + 1);
     809                 :         456 :   for (unsigned i = 0; i < utf8_buf.length (); i++)
     810                 :         392 :     out->u.string[i] = utf8_buf[i];
     811                 :         128 :   out->u.string[utf8_buf.length ()] = '\0';
     812                 :          64 : }
     813                 :             : 
     814                 :             : /* Having consumed FIRST_CHAR, an initial digit or '-' character from
     815                 :             :    the lexer's buffer attempt to lex the rest of a JSON number, writing
     816                 :             :    the result to OUT (or TOK_ERROR) if an error occurred.
     817                 :             :    (ECMA-404 section 8; RFC 7159 section 6).  */
     818                 :             : 
     819                 :             : void
     820                 :         140 : lexer::lex_number (token *out, unichar first_char)
     821                 :             : {
     822                 :         140 :   bool negate = false;
     823                 :         140 :   double value = 0.0;
     824                 :         140 :   if (first_char == '-')
     825                 :             :     {
     826                 :          12 :       negate = true;
     827                 :          12 :       if (!get_char (first_char, &out->range.m_end))
     828                 :             :         {
     829                 :           0 :           out->id = TOK_ERROR;
     830                 :           0 :           out->range.m_start = out->range.m_end;
     831                 :           0 :           out->u.string = xstrdup ("expected digit");
     832                 :           0 :           return;
     833                 :             :         }
     834                 :             :     }
     835                 :             : 
     836                 :         140 :   if (first_char == '0')
     837                 :             :     value = 0.0;
     838                 :         132 :   else if (!ISDIGIT (first_char))
     839                 :             :     {
     840                 :           0 :       out->id = TOK_ERROR;
     841                 :           0 :       out->range.m_start = out->range.m_end;
     842                 :           0 :       out->u.string = xstrdup ("expected digit");
     843                 :           0 :       return;
     844                 :             :     }
     845                 :             :   else
     846                 :             :     {
     847                 :             :       /* Got a nonzero digit; expect zero or more digits.  */
     848                 :         132 :       value = first_char - '0';
     849                 :         364 :       while (1)
     850                 :             :         {
     851                 :         248 :           unichar uc;
     852                 :         248 :           location_map::point point;
     853                 :         248 :           if (!get_char (uc, &point))
     854                 :             :             break;
     855                 :         240 :           if (ISDIGIT (uc))
     856                 :             :             {
     857                 :         116 :               value *= 10;
     858                 :         116 :               value += uc -'0';
     859                 :         116 :               out->range.m_end = point;
     860                 :         116 :               continue;
     861                 :             :             }
     862                 :             :           else
     863                 :             :             {
     864                 :         124 :               unget_char ();
     865                 :         124 :               break;
     866                 :             :             }
     867                 :             :         }
     868                 :             :     }
     869                 :             : 
     870                 :             :   /* Optional '.', followed by one or more decimals.  */
     871                 :         140 :   unichar next_char;
     872                 :         140 :   location_map::point point;
     873                 :         140 :   if (get_char (next_char, &point))
     874                 :             :     {
     875                 :         132 :       if (next_char == '.')
     876                 :             :         {
     877                 :             :           /* Parse decimal digits.  */
     878                 :             :           bool had_digit = false;
     879                 :             :           double digit_factor = 0.1;
     880                 :          44 :           while (get_char (next_char, &point))
     881                 :             :             {
     882                 :          40 :               if (!ISDIGIT (next_char))
     883                 :             :                 {
     884                 :           8 :                   unget_char ();
     885                 :           8 :                   break;
     886                 :             :                 }
     887                 :          32 :               value += (next_char - '0') * digit_factor;
     888                 :          32 :               digit_factor *= 0.1;
     889                 :          32 :               had_digit = true;
     890                 :          32 :               out->range.m_end = point;
     891                 :             :             }
     892                 :          12 :           if (!had_digit)
     893                 :             :             {
     894                 :           0 :               out->id = TOK_ERROR;
     895                 :           0 :               out->range.m_start = point;
     896                 :           0 :               out->range.m_start = point;
     897                 :           0 :               out->u.string = xstrdup ("expected digit");
     898                 :           0 :               return;
     899                 :             :             }
     900                 :             :         }
     901                 :             :       else
     902                 :         120 :         unget_char ();
     903                 :             :     }
     904                 :             : 
     905                 :             :   /* Parse 'e' and 'E'.  */
     906                 :         140 :   unichar exponent_char;
     907                 :         140 :   if (get_char (exponent_char, &point))
     908                 :             :     {
     909                 :         128 :       if (exponent_char == 'e' || exponent_char == 'E')
     910                 :             :         {
     911                 :             :           /* Optional +/-.  */
     912                 :          16 :           unichar sign_char;
     913                 :          16 :           int exponent = 0;
     914                 :          16 :           bool negate_exponent = false;
     915                 :          16 :           bool had_exponent_digit = false;
     916                 :          16 :           if (!get_char (sign_char, &point))
     917                 :             :             {
     918                 :           0 :               out->id = TOK_ERROR;
     919                 :           0 :               out->range.m_start = point;
     920                 :           0 :               out->range.m_start = point;
     921                 :           0 :               out->u.string = xstrdup ("EOF within exponent");
     922                 :           0 :               return;
     923                 :             :             }
     924                 :          16 :           if (sign_char == '-')
     925                 :             :             negate_exponent = true;
     926                 :          12 :           else if (sign_char == '+')
     927                 :             :             ;
     928                 :           8 :           else if (ISDIGIT (sign_char))
     929                 :             :             {
     930                 :           8 :               exponent = sign_char - '0';
     931                 :           8 :               had_exponent_digit = true;
     932                 :             :             }
     933                 :             :           else
     934                 :             :             {
     935                 :           0 :               out->id = TOK_ERROR;
     936                 :           0 :               out->range.m_start = point;
     937                 :           0 :               out->range.m_start = point;
     938                 :           0 :               out->u.string
     939                 :           0 :                 = xstrdup ("expected '-','+' or digit within exponent");
     940                 :           0 :               return;
     941                 :             :             }
     942                 :          16 :           out->range.m_end = point;
     943                 :             : 
     944                 :             :           /* One or more digits (we might have seen the digit above,
     945                 :             :              though).  */
     946                 :          32 :           while (1)
     947                 :             :             {
     948                 :          24 :               unichar uc;
     949                 :          24 :               location_map::point point;
     950                 :          24 :               if (!get_char (uc, &point))
     951                 :             :                 break;
     952                 :          12 :               if (ISDIGIT (uc))
     953                 :             :                 {
     954                 :           8 :                   exponent *= 10;
     955                 :           8 :                   exponent += uc -'0';
     956                 :           8 :                   had_exponent_digit = true;
     957                 :           8 :                   out->range.m_end = point;
     958                 :           8 :                   continue;
     959                 :             :                 }
     960                 :             :               else
     961                 :             :                 {
     962                 :           4 :                   unget_char ();
     963                 :           4 :                   break;
     964                 :             :                 }
     965                 :             :             }
     966                 :          16 :           if (!had_exponent_digit)
     967                 :             :             {
     968                 :           0 :               out->id = TOK_ERROR;
     969                 :           0 :               out->range.m_start = point;
     970                 :           0 :               out->range.m_start = point;
     971                 :           0 :               out->u.string = xstrdup ("expected digit within exponent");
     972                 :           0 :               return;
     973                 :             :             }
     974                 :          16 :           if (negate_exponent)
     975                 :           4 :             exponent = -exponent;
     976                 :          16 :           value = value * pow (10, exponent);
     977                 :             :         }
     978                 :             :       else
     979                 :         112 :         unget_char ();
     980                 :             :     }
     981                 :             : 
     982                 :         140 :   if (negate)
     983                 :          12 :     value = -value;
     984                 :             : 
     985                 :         140 :   if (value == (long)value)
     986                 :             :     {
     987                 :         124 :       out->id = TOK_INTEGER_NUMBER;
     988                 :         124 :       out->u.integer_number = value;
     989                 :             :     }
     990                 :             :   else
     991                 :             :     {
     992                 :          16 :       out->id = TOK_FLOAT_NUMBER;
     993                 :          16 :       out->u.float_number = value;
     994                 :             :     }
     995                 :             : }
     996                 :             : 
     997                 :             : /* Determine if the next characters to be lexed match SUFFIX.
     998                 :             :    SUFFIX must be pure ASCII and not contain newlines.
     999                 :             :    If so, consume the characters and return true.
    1000                 :             :    Otherwise, return false.  */
    1001                 :             : 
    1002                 :             : bool
    1003                 :          32 : lexer::rest_of_literal (token *out, const char *suffix)
    1004                 :             : {
    1005                 :          32 :   int suffix_idx = 0;
    1006                 :          32 :   int buf_idx = m_next_char_idx;
    1007                 :         216 :   while (1)
    1008                 :             :     {
    1009                 :         124 :       if (suffix[suffix_idx] == '\0')
    1010                 :             :         {
    1011                 :          28 :           m_next_char_idx += suffix_idx;
    1012                 :          28 :           m_next_char_column += suffix_idx;
    1013                 :          28 :           out->range.m_end.m_unichar_idx += suffix_idx;
    1014                 :          28 :           out->range.m_end.m_column += suffix_idx;
    1015                 :          28 :           return true;
    1016                 :             :         }
    1017                 :         192 :       if (buf_idx >= (int)m_buffer.length ())
    1018                 :             :         return false;
    1019                 :             :       /* This assumes that suffix is ASCII.  */
    1020                 :          96 :       if (m_buffer[buf_idx] != (unichar)suffix[suffix_idx])
    1021                 :             :         return false;
    1022                 :          92 :       buf_idx++;
    1023                 :          92 :       suffix_idx++;
    1024                 :             :     }
    1025                 :             : }
    1026                 :             : 
    1027                 :             : /* Create a new error instance for MSG, using the location of the next
    1028                 :             :    character for the location of the error.  */
    1029                 :             : 
    1030                 :             : std::unique_ptr<error>
    1031                 :           0 : lexer::make_error (const char *msg)
    1032                 :             : {
    1033                 :           0 :   location_map::point p;
    1034                 :           0 :   p.m_unichar_idx = m_next_char_idx;
    1035                 :           0 :   p.m_line = m_next_char_line;
    1036                 :           0 :   p.m_column = m_next_char_column;
    1037                 :           0 :   location_map::range r;
    1038                 :           0 :   r.m_start = p;
    1039                 :           0 :   r.m_end = p;
    1040                 :           0 :   return ::make_unique<error> (r, xstrdup (msg));
    1041                 :             : }
    1042                 :             : 
    1043                 :             : /* parser's ctor.  */
    1044                 :             : 
    1045                 :          92 : parser::parser (location_map *out_loc_map,
    1046                 :          92 :                 bool support_comments)
    1047                 :          92 : : m_lexer (support_comments), m_loc_map (out_loc_map)
    1048                 :             : {
    1049                 :           0 : }
    1050                 :             : 
    1051                 :             : /* parser's dtor.  */
    1052                 :             : 
    1053                 :          92 : parser::~parser ()
    1054                 :             : {
    1055                 :          92 :   if (m_loc_map)
    1056                 :          92 :     m_loc_map->on_finished_parsing ();
    1057                 :          92 : }
    1058                 :             : 
    1059                 :             : /* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this parser's
    1060                 :             :    lexer's buffer.  */
    1061                 :             : 
    1062                 :             : std::unique_ptr<error>
    1063                 :          92 : parser::add_utf8 (size_t length, const char *utf8_buf)
    1064                 :             : {
    1065                 :           0 :   return m_lexer.add_utf8 (length, utf8_buf);
    1066                 :             : }
    1067                 :             : 
    1068                 :             : /* Parse a JSON value (object, array, number, string, or literal).
    1069                 :             :    (ECMA-404 section 5; RFC 7159 section 3).  */
    1070                 :             : 
    1071                 :             : parser_result_t
    1072                 :         184 : parser::parse_value (int depth)
    1073                 :             : {
    1074                 :         184 :   const token *tok = m_lexer.peek ();
    1075                 :             : 
    1076                 :             :   /* Avoid stack overflow with deeply-nested inputs; RFC 7159 section 9
    1077                 :             :      states: "An implementation may set limits on the maximum depth
    1078                 :             :      of nesting.".
    1079                 :             : 
    1080                 :             :      Ideally we'd avoid this limit (e.g. by rewriting parse_value,
    1081                 :             :      parse_object, and parse_array into a single function with a vec of
    1082                 :             :      state).  */
    1083                 :         184 :   const int MAX_DEPTH = 100;
    1084                 :         184 :   if (depth >= MAX_DEPTH)
    1085                 :           0 :     return error_at (tok->range, "maximum nesting depth exceeded: %i",
    1086                 :           0 :                      MAX_DEPTH);
    1087                 :             : 
    1088                 :         184 :   switch (tok->id)
    1089                 :             :     {
    1090                 :          16 :     case TOK_OPEN_CURLY:
    1091                 :          16 :       return parse_object (depth);
    1092                 :             : 
    1093                 :          28 :     case TOK_STRING:
    1094                 :          28 :       {
    1095                 :          28 :         auto val = ::make_unique<string> (tok->u.string);
    1096                 :          28 :         m_lexer.consume ();
    1097                 :          28 :         maybe_record_range (val.get (), tok->range);
    1098                 :          28 :         return parser_result_t (std::move (val));
    1099                 :          28 :       }
    1100                 :             : 
    1101                 :          16 :     case TOK_OPEN_SQUARE:
    1102                 :          16 :       return parse_array (depth);
    1103                 :             : 
    1104                 :          12 :     case TOK_FLOAT_NUMBER:
    1105                 :          12 :       {
    1106                 :          12 :         auto val = ::make_unique<float_number> (tok->u.float_number);
    1107                 :          12 :         m_lexer.consume ();
    1108                 :          12 :         maybe_record_range (val.get (), tok->range);
    1109                 :          12 :         return parser_result_t (std::move (val));
    1110                 :          12 :       }
    1111                 :             : 
    1112                 :          84 :     case TOK_INTEGER_NUMBER:
    1113                 :          84 :       {
    1114                 :          84 :         auto val = ::make_unique<integer_number> (tok->u.integer_number);
    1115                 :          84 :         m_lexer.consume ();
    1116                 :          84 :         maybe_record_range (val.get (), tok->range);
    1117                 :          84 :         return parser_result_t (std::move (val));
    1118                 :          84 :       }
    1119                 :             : 
    1120                 :           4 :     case TOK_TRUE:
    1121                 :           4 :       {
    1122                 :           4 :         auto val = ::make_unique<literal> (JSON_TRUE);
    1123                 :           4 :         m_lexer.consume ();
    1124                 :           4 :         maybe_record_range (val.get (), tok->range);
    1125                 :           4 :         return parser_result_t (std::move (val));
    1126                 :           4 :       }
    1127                 :             : 
    1128                 :           4 :     case TOK_FALSE:
    1129                 :           4 :       {
    1130                 :           4 :         auto val = ::make_unique<literal> (JSON_FALSE);
    1131                 :           4 :         m_lexer.consume ();
    1132                 :           4 :         maybe_record_range (val.get (), tok->range);
    1133                 :           4 :         return parser_result_t (std::move (val));
    1134                 :           4 :       }
    1135                 :             : 
    1136                 :           8 :     case TOK_NULL:
    1137                 :           8 :       {
    1138                 :           8 :         auto val = ::make_unique<literal> (JSON_NULL);
    1139                 :           8 :         m_lexer.consume ();
    1140                 :           8 :         maybe_record_range (val.get (), tok->range);
    1141                 :           8 :         return parser_result_t (std::move (val));
    1142                 :           8 :       }
    1143                 :             : 
    1144                 :           8 :     case TOK_ERROR:
    1145                 :           8 :       return error_at (tok->range, "invalid JSON token: %s", tok->u.string);
    1146                 :             : 
    1147                 :           4 :     default:
    1148                 :           4 :       return error_at (tok->range, "expected a JSON value but got %s",
    1149                 :           4 :                        token_id_name[tok->id]);
    1150                 :             :     }
    1151                 :             : }
    1152                 :             : 
    1153                 :             : /* Parse a JSON object.
    1154                 :             :    (ECMA-404 section 6; RFC 7159 section 4).  */
    1155                 :             : 
    1156                 :             : parser_result_t
    1157                 :          16 : parser::parse_object (int depth)
    1158                 :             : {
    1159                 :          32 :   location_map::point start = get_next_token_start ();
    1160                 :             : 
    1161                 :          16 :   require (TOK_OPEN_CURLY);
    1162                 :             : 
    1163                 :          16 :   auto obj = ::make_unique<object> ();
    1164                 :             : 
    1165                 :          16 :   const token *tok = m_lexer.peek ();
    1166                 :          16 :   if (tok->id == TOK_CLOSE_CURLY)
    1167                 :             :     {
    1168                 :           8 :       location_map::point end = get_next_token_end ();
    1169                 :           4 :       maybe_record_range (obj.get (), start, end);
    1170                 :           4 :       if (auto err = require (TOK_CLOSE_CURLY))
    1171                 :           4 :         return parser_result_t (std::move (err));
    1172                 :           4 :       return parser_result_t (std::move (obj));
    1173                 :             :     }
    1174                 :          12 :   if (tok->id != TOK_STRING)
    1175                 :           0 :     return error_at (tok->range,
    1176                 :             :                      "expected string for object key after '{'; got %s",
    1177                 :           0 :                      token_id_name[tok->id]);
    1178                 :          16 :   while (true)
    1179                 :             :     {
    1180                 :          28 :       tok = m_lexer.peek ();
    1181                 :          28 :       if (tok->id != TOK_STRING)
    1182                 :           0 :         return error_at (tok->range,
    1183                 :             :                          "expected string for object key after ','; got %s",
    1184                 :           0 :                          token_id_name[tok->id]);
    1185                 :          28 :       label_text key = label_text::take (xstrdup (tok->u.string));
    1186                 :          28 :       m_lexer.consume ();
    1187                 :             : 
    1188                 :          28 :       if (auto err = require (TOK_COLON))
    1189                 :          28 :         return parser_result_t (std::move (err));
    1190                 :             : 
    1191                 :          28 :       parser_result_t r = parse_value (depth + 1);
    1192                 :          28 :       if (r.m_err)
    1193                 :           0 :         return r;
    1194                 :          28 :       if (!r.m_val)
    1195                 :           0 :         return parser_result_t (std::move (obj));
    1196                 :             : 
    1197                 :             :       /* We don't enforce uniqueness for keys.  */
    1198                 :          28 :       obj->set (key.get (), std::move (r.m_val));
    1199                 :             : 
    1200                 :          56 :       location_map::point end = get_next_token_end ();
    1201                 :          28 :       result<enum token_id, std::unique_ptr<error>> result
    1202                 :          28 :         (require_one_of (TOK_COMMA, TOK_CLOSE_CURLY));
    1203                 :          28 :       if (result.m_err)
    1204                 :           4 :         return parser_result_t (std::move (result.m_err));
    1205                 :          24 :       if (result.m_val == TOK_COMMA)
    1206                 :          16 :         continue;
    1207                 :             :       else
    1208                 :             :         {
    1209                 :             :           /* TOK_CLOSE_CURLY.  */
    1210                 :           8 :           maybe_record_range (obj.get (), start, end);
    1211                 :           8 :           return parser_result_t (std::move (obj));
    1212                 :             :         }
    1213                 :          56 :     }
    1214                 :          16 : }
    1215                 :             : 
    1216                 :             : /* Parse a JSON array.
    1217                 :             :    (ECMA-404 section 7; RFC 7159 section 5).  */
    1218                 :             : 
    1219                 :             : parser_result_t
    1220                 :          16 : parser::parse_array (int depth)
    1221                 :             : {
    1222                 :          32 :   location_map::point start = get_next_token_start ();
    1223                 :          16 :   if (auto err = require (TOK_OPEN_SQUARE))
    1224                 :          16 :     return parser_result_t (std::move (err));
    1225                 :             : 
    1226                 :          16 :   auto arr = ::make_unique<array> ();
    1227                 :             : 
    1228                 :          16 :   const token *tok = m_lexer.peek ();
    1229                 :          16 :   if (tok->id == TOK_CLOSE_SQUARE)
    1230                 :             :     {
    1231                 :           0 :       location_map::point end = get_next_token_end ();
    1232                 :           0 :       maybe_record_range (arr.get (), start, end);
    1233                 :           0 :       m_lexer.consume ();
    1234                 :           0 :       return parser_result_t (std::move (arr));
    1235                 :             :     }
    1236                 :             : 
    1237                 :         112 :   while (true)
    1238                 :             :     {
    1239                 :          64 :       parser_result_t r = parse_value (depth + 1);
    1240                 :          64 :       if (r.m_err)
    1241                 :           0 :         return r;
    1242                 :             : 
    1243                 :          64 :       arr->append (std::move (r.m_val));
    1244                 :             : 
    1245                 :         128 :       location_map::point end = get_next_token_end ();
    1246                 :          64 :       result<enum token_id, std::unique_ptr<error>> result
    1247                 :          64 :         (require_one_of (TOK_COMMA, TOK_CLOSE_SQUARE));
    1248                 :          64 :       if (result.m_err)
    1249                 :           4 :         return parser_result_t (std::move (result.m_err));
    1250                 :          60 :       if (result.m_val == TOK_COMMA)
    1251                 :          48 :         continue;
    1252                 :             :       else
    1253                 :             :         {
    1254                 :             :           /* TOK_CLOSE_SQUARE.  */
    1255                 :          12 :           maybe_record_range (arr.get (), start, end);
    1256                 :          12 :           return parser_result_t (std::move (arr));
    1257                 :             :         }
    1258                 :         128 :     }
    1259                 :          16 : }
    1260                 :             : 
    1261                 :             : /* Get the start point of the next token.  */
    1262                 :             : 
    1263                 :             : location_map::point
    1264                 :          32 : parser::get_next_token_start ()
    1265                 :             : {
    1266                 :          32 :   const token *tok = m_lexer.peek ();
    1267                 :          32 :   return tok->range.m_start;
    1268                 :             : }
    1269                 :             : 
    1270                 :             : /* Get the end point of the next token.  */
    1271                 :             : 
    1272                 :             : location_map::point
    1273                 :          96 : parser::get_next_token_end ()
    1274                 :             : {
    1275                 :          96 :   const token *tok = m_lexer.peek ();
    1276                 :          96 :   return tok->range.m_end;
    1277                 :             : }
    1278                 :             : 
    1279                 :             : /* Require an EOF, or fail if there is surplus input.  */
    1280                 :             : 
    1281                 :             : std::unique_ptr<error>
    1282                 :          72 : parser::require_eof ()
    1283                 :             : {
    1284                 :           0 :   return require (TOK_EOF);
    1285                 :             : }
    1286                 :             : 
    1287                 :             : /* Consume the next token, issuing an error if it is not of kind TOK_ID.  */
    1288                 :             : 
    1289                 :             : std::unique_ptr<error>
    1290                 :         136 : parser::require (enum token_id tok_id)
    1291                 :             : {
    1292                 :         136 :   const token *tok = m_lexer.peek ();
    1293                 :         136 :   if (tok->id != tok_id)
    1294                 :             :     {
    1295                 :           0 :       if (tok->id == TOK_ERROR)
    1296                 :           0 :         return error_at (tok->range,
    1297                 :             :                          "expected %s; got bad token: %s",
    1298                 :           0 :                          token_id_name[tok_id], tok->u.string);
    1299                 :             :       else
    1300                 :           0 :         return error_at (tok->range,
    1301                 :           0 :                          "expected %s; got %s", token_id_name[tok_id],
    1302                 :           0 :                          token_id_name[tok->id]);
    1303                 :             :     }
    1304                 :         136 :   m_lexer.consume ();
    1305                 :         136 :   return nullptr;
    1306                 :             : }
    1307                 :             : 
    1308                 :             : /* Consume the next token, issuing an error if it is not of
    1309                 :             :    kind TOK_ID_A or TOK_ID_B.
    1310                 :             :    Return which kind it was.  */
    1311                 :             : 
    1312                 :             : result<enum token_id, std::unique_ptr<error>>
    1313                 :          92 : parser::require_one_of (enum token_id tok_id_a, enum token_id tok_id_b)
    1314                 :             : {
    1315                 :          92 :   const token *tok = m_lexer.peek ();
    1316                 :          92 :   if ((tok->id != tok_id_a)
    1317                 :          28 :       && (tok->id != tok_id_b))
    1318                 :             :     {
    1319                 :           8 :       if (tok->id == TOK_ERROR)
    1320                 :           0 :         return error_at (tok->range, "expected %s or %s; got bad token: %s",
    1321                 :           0 :                          token_id_name[tok_id_a], token_id_name[tok_id_b],
    1322                 :           0 :                          tok->u.string);
    1323                 :             :       else
    1324                 :           8 :         return error_at (tok->range, "expected %s or %s; got %s",
    1325                 :           8 :                          token_id_name[tok_id_a], token_id_name[tok_id_b],
    1326                 :           8 :                          token_id_name[tok->id]);
    1327                 :             :     }
    1328                 :          84 :   enum token_id id = tok->id;
    1329                 :          84 :   m_lexer.consume ();
    1330                 :          84 :   return result<enum token_id, std::unique_ptr<error>> (id);
    1331                 :             : }
    1332                 :             : 
    1333                 :             : /* Genarate a parsing error.  */
    1334                 :             : 
    1335                 :             : std::unique_ptr<error>
    1336                 :          20 : parser::error_at (const location_map::range &r, const char *fmt, ...)
    1337                 :             : {
    1338                 :          20 :   va_list ap;
    1339                 :          20 :   va_start (ap, fmt);
    1340                 :          20 :   char *formatted_msg = xvasprintf (fmt, ap);
    1341                 :          20 :   va_end (ap);
    1342                 :             : 
    1343                 :          20 :   return ::make_unique<error> (r, formatted_msg);
    1344                 :             : }
    1345                 :             : 
    1346                 :             : /* Record that JV has range R within the input file.  */
    1347                 :             : 
    1348                 :             : void
    1349                 :         140 : parser::maybe_record_range (json::value *jv, const location_map::range &r)
    1350                 :             : {
    1351                 :         140 :   if (m_loc_map)
    1352                 :         140 :     m_loc_map->record_range_for_value (jv, r);
    1353                 :         140 : }
    1354                 :             : 
    1355                 :             : /* Record that JV has range START to END within the input file.  */
    1356                 :             : 
    1357                 :             : void
    1358                 :          24 : parser::maybe_record_range (json::value *jv,
    1359                 :             :                             const location_map::point &start,
    1360                 :             :                             const location_map::point &end)
    1361                 :             : {
    1362                 :          24 :   if (m_loc_map)
    1363                 :             :     {
    1364                 :          24 :       location_map::range r;
    1365                 :          24 :       r.m_start = start;
    1366                 :          24 :       r.m_end = end;
    1367                 :          24 :       m_loc_map->record_range_for_value (jv, r);
    1368                 :             :     }
    1369                 :          24 : }
    1370                 :             : 
    1371                 :             : /* Attempt to parse the UTF-8 encoded buffer at UTF8_BUF
    1372                 :             :    of the given LENGTH.
    1373                 :             :    If ALLOW_COMMENTS is true, then allow C and C++ style-comments in the
    1374                 :             :    buffer, as an extension to JSON, otherwise forbid them.
    1375                 :             :    If successful, return an json::value in the result.
    1376                 :             :    if there was a problem, return a json::error in the result.
    1377                 :             :    If OUT_LOC_MAP is non-NULL, notify *OUT_LOC_MAP about
    1378                 :             :    source locations of nodes seen during parsing.  */
    1379                 :             : 
    1380                 :             : parser_result_t
    1381                 :          92 : json::parse_utf8_string (size_t length,
    1382                 :             :                          const char *utf8_buf,
    1383                 :             :                          bool allow_comments,
    1384                 :             :                          location_map *out_loc_map)
    1385                 :             : {
    1386                 :          92 :   parser p (out_loc_map, allow_comments);
    1387                 :          92 :   if (auto err = p.add_utf8 (length, utf8_buf))
    1388                 :          92 :     return parser_result_t (std::move (err));
    1389                 :          92 :   parser_result_t r = p.parse_value (0);
    1390                 :          92 :   if (r.m_err)
    1391                 :          20 :     return r;
    1392                 :          72 :   if (auto err = p.require_eof ())
    1393                 :          72 :     return parser_result_t (std::move (err));
    1394                 :          72 :   return r;
    1395                 :          92 : }
    1396                 :             : 
    1397                 :             : /* Attempt to parse the nil-terminated UTF-8 encoded buffer at
    1398                 :             :    UTF8_BUF.
    1399                 :             :    If ALLOW_COMMENTS is true, then allow C and C++ style-comments in the
    1400                 :             :    buffer, as an extension to JSON, otherwise forbid them.
    1401                 :             :    If successful, return a non-NULL json::value *.
    1402                 :             :    if there was a problem, return NULL and write an error
    1403                 :             :    message to err_out, which must be deleted by the caller.
    1404                 :             :    If OUT_LOC_MAP is non-NULL, notify *OUT_LOC_MAP about
    1405                 :             :    source locations of nodes seen during parsing.  */
    1406                 :             : 
    1407                 :             : json::parser_result_t
    1408                 :          92 : json::parse_utf8_string (const char *utf8,
    1409                 :             :                          bool allow_comments,
    1410                 :             :                          location_map *out_loc_map)
    1411                 :             : {
    1412                 :          92 :   return parse_utf8_string (strlen (utf8), utf8, allow_comments,
    1413                 :          92 :                             out_loc_map);
    1414                 :             : }
    1415                 :             : 
    1416                 :             : 
    1417                 :             : #if CHECKING_P
    1418                 :             : 
    1419                 :             : namespace selftest {
    1420                 :             : 
    1421                 :             : /* Selftests.  */
    1422                 :             : 
    1423                 :             : #define ASSERT_PRINT_EQ(JV, FORMATTED, EXPECTED_JSON)   \
    1424                 :             :   assert_print_eq (SELFTEST_LOCATION, JV, FORMATTED, EXPECTED_JSON)
    1425                 :             : 
    1426                 :             : /* Implementation detail of ASSERT_RANGE_EQ.  */
    1427                 :             : 
    1428                 :             : static void
    1429                 :         424 : assert_point_eq (const location &loc,
    1430                 :             :                  const location_map::point &actual_point,
    1431                 :             :                  size_t exp_unichar_idx, int exp_line, int exp_column)
    1432                 :             : {
    1433                 :         424 :   ASSERT_EQ_AT (loc, actual_point.m_unichar_idx, exp_unichar_idx);
    1434                 :         424 :   ASSERT_EQ_AT (loc, actual_point.m_line, exp_line);
    1435                 :         424 :   ASSERT_EQ_AT (loc, actual_point.m_column, exp_column);
    1436                 :         424 : }
    1437                 :             : 
    1438                 :             : /* Implementation detail of ASSERT_RANGE_EQ.  */
    1439                 :             : 
    1440                 :             : static void
    1441                 :         212 : assert_range_eq (const location &loc,
    1442                 :             :                  const location_map::range &actual_range,
    1443                 :             :                  /* Expected location.  */
    1444                 :             :                  size_t start_unichar_idx, int start_line, int start_column,
    1445                 :             :                  size_t end_unichar_idx, int end_line, int end_column)
    1446                 :             : {
    1447                 :         212 :   assert_point_eq (loc, actual_range.m_start,
    1448                 :             :                    start_unichar_idx, start_line, start_column);
    1449                 :         212 :   assert_point_eq (loc, actual_range.m_end,
    1450                 :             :                    end_unichar_idx, end_line, end_column);
    1451                 :         212 : }
    1452                 :             : 
    1453                 :             : /* Assert that ACTUAL_RANGE starts at
    1454                 :             :    (START_UNICHAR_IDX, START_LINE, START_COLUMN)
    1455                 :             :    and ends at (END_UNICHAR_IDX, END_LINE, END_COLUMN).  */
    1456                 :             : 
    1457                 :             : #define ASSERT_RANGE_EQ(ACTUAL_RANGE, \
    1458                 :             :                         START_UNICHAR_IDX, START_LINE, START_COLUMN,    \
    1459                 :             :                         END_UNICHAR_IDX, END_LINE, END_COLUMN)          \
    1460                 :             :   assert_range_eq ((SELFTEST_LOCATION), (ACTUAL_RANGE), \
    1461                 :             :                    (START_UNICHAR_IDX), (START_LINE), (START_COLUMN),   \
    1462                 :             :                    (END_UNICHAR_IDX), (END_LINE), (END_COLUMN))
    1463                 :             : 
    1464                 :             : /* Implementation detail of ASSERT_ERR_EQ.  */
    1465                 :             : 
    1466                 :             : static void
    1467                 :          16 : assert_err_eq (const location &loc,
    1468                 :             :                const json::error *actual_err,
    1469                 :             :                /* Expected location.  */
    1470                 :             :                size_t start_unichar_idx, int start_line, int start_column,
    1471                 :             :                size_t end_unichar_idx, int end_line, int end_column,
    1472                 :             :                const char *expected_msg)
    1473                 :             : {
    1474                 :          16 :   ASSERT_TRUE_AT (loc, actual_err);
    1475                 :          16 :   const location_map::range &actual_range = actual_err->get_range ();
    1476                 :          16 :   ASSERT_EQ_AT (loc, actual_range.m_start.m_unichar_idx, start_unichar_idx);
    1477                 :          16 :   ASSERT_EQ_AT (loc, actual_range.m_start.m_line, start_line);
    1478                 :          16 :   ASSERT_EQ_AT (loc, actual_range.m_start.m_column, start_column);
    1479                 :          16 :   ASSERT_EQ_AT (loc, actual_range.m_end.m_unichar_idx, end_unichar_idx);
    1480                 :          16 :   ASSERT_EQ_AT (loc, actual_range.m_end.m_line, end_line);
    1481                 :          16 :   ASSERT_EQ_AT (loc, actual_range.m_end.m_column, end_column);
    1482                 :          16 :   ASSERT_STREQ_AT (loc, actual_err->get_msg (), expected_msg);
    1483                 :          16 : }
    1484                 :             : 
    1485                 :             : /* Assert that ACTUAL_ERR is a non-NULL json::error *,
    1486                 :             :    with message EXPECTED_MSG, and that its location starts
    1487                 :             :    at (START_UNICHAR_IDX, START_LINE, START_COLUMN)
    1488                 :             :    and ends at (END_UNICHAR_IDX, END_LINE, END_COLUMN).  */
    1489                 :             : 
    1490                 :             : #define ASSERT_ERR_EQ(ACTUAL_ERR, \
    1491                 :             :                       START_UNICHAR_IDX, START_LINE, START_COLUMN,      \
    1492                 :             :                       END_UNICHAR_IDX, END_LINE, END_COLUMN,    \
    1493                 :             :                       EXPECTED_MSG)                 \
    1494                 :             :   assert_err_eq ((SELFTEST_LOCATION), (ACTUAL_ERR), \
    1495                 :             :                  (START_UNICHAR_IDX), (START_LINE), (START_COLUMN),     \
    1496                 :             :                  (END_UNICHAR_IDX), (END_LINE), (END_COLUMN),   \
    1497                 :             :                  (EXPECTED_MSG))
    1498                 :             : 
    1499                 :             : /* Verify that the JSON lexer works as expected.  */
    1500                 :             : 
    1501                 :             : static void
    1502                 :           4 : test_lexer ()
    1503                 :             : {
    1504                 :           4 :   lexer l (false);
    1505                 :           4 :   const char *str
    1506                 :             :     /*  0         1         2         3         4         .  */
    1507                 :             :     /*  01234567890123456789012345678901234567890123456789.  */
    1508                 :             :     = ("    1066   -1  \n"
    1509                 :             :        "    -273.15 1e6\n"
    1510                 :             :        "  [   ] null   true  false  {  }  \"foo\" \n");
    1511                 :           4 :   auto err = l.add_utf8 (strlen (str), str);
    1512                 :           4 :   ASSERT_EQ (err, nullptr);
    1513                 :             : 
    1514                 :             :   /* Line 1.  */
    1515                 :           4 :   {
    1516                 :           4 :     const size_t line_offset = 0;
    1517                 :             : 
    1518                 :             :     /* Expect token: "1066" in columns 4-7.  */
    1519                 :           4 :     {
    1520                 :           4 :       const token *tok = l.peek ();
    1521                 :           4 :       ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
    1522                 :           4 :       ASSERT_EQ (tok->u.integer_number, 1066);
    1523                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1524                 :             :                        line_offset + 4, 1, 4,
    1525                 :             :                        line_offset + 7, 1, 7);
    1526                 :           4 :       l.consume ();
    1527                 :             :     }
    1528                 :             :     /* Expect token: "-1" in columns 11-12.  */
    1529                 :           4 :     {
    1530                 :           4 :       const token *tok = l.peek ();
    1531                 :           4 :       ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
    1532                 :           4 :       ASSERT_EQ (tok->u.integer_number, -1);
    1533                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1534                 :             :                        line_offset + 11, 1, 11,
    1535                 :             :                        line_offset + 12, 1, 12);
    1536                 :           4 :       l.consume ();
    1537                 :             :     }
    1538                 :             :   }
    1539                 :             : 
    1540                 :             :   /* Line 2.  */
    1541                 :           4 :   {
    1542                 :           4 :     const size_t line_offset = 16;
    1543                 :             : 
    1544                 :             :     /* Expect token: "-273.15" in columns 4-10.  */
    1545                 :           4 :     {
    1546                 :           4 :       const token *tok = l.peek ();
    1547                 :           4 :       ASSERT_EQ (tok->id, TOK_FLOAT_NUMBER);
    1548                 :           4 :       ASSERT_EQ (int(tok->u.float_number), int(-273.15));
    1549                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1550                 :             :                        line_offset + 4, 2, 4,
    1551                 :             :                        line_offset + 10, 2, 10);
    1552                 :           4 :       l.consume ();
    1553                 :             :     }
    1554                 :             :     /* Expect token: "1e6" in columns 12-14.  */
    1555                 :           4 :     {
    1556                 :           4 :       const token *tok = l.peek ();
    1557                 :           4 :       ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
    1558                 :           4 :       ASSERT_EQ (tok->u.integer_number, 1000000);
    1559                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1560                 :             :                        line_offset + 12, 2, 12,
    1561                 :             :                        line_offset + 14, 2, 14);
    1562                 :           4 :       l.consume ();
    1563                 :             :     }
    1564                 :             :   }
    1565                 :             : 
    1566                 :             :   /* Line 3.  */
    1567                 :           4 :   {
    1568                 :           4 :     const size_t line_offset = 32;
    1569                 :             : 
    1570                 :             :     /* Expect token: "[".  */
    1571                 :           4 :     {
    1572                 :           4 :       const token *tok = l.peek ();
    1573                 :           4 :       ASSERT_EQ (tok->id, TOK_OPEN_SQUARE);
    1574                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1575                 :             :                        line_offset + 2, 3, 2,
    1576                 :             :                        line_offset + 2, 3, 2);
    1577                 :           4 :       l.consume ();
    1578                 :             :     }
    1579                 :             :     /* Expect token: "]".  */
    1580                 :           4 :     {
    1581                 :           4 :       const token *tok = l.peek ();
    1582                 :           4 :       ASSERT_EQ (tok->id, TOK_CLOSE_SQUARE);
    1583                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1584                 :             :                        line_offset + 6, 3, 6,
    1585                 :             :                        line_offset + 6, 3, 6);
    1586                 :           4 :       l.consume ();
    1587                 :             :     }
    1588                 :             :     /* Expect token: "null".  */
    1589                 :           4 :     {
    1590                 :           4 :       const token *tok = l.peek ();
    1591                 :           4 :       ASSERT_EQ (tok->id, TOK_NULL);
    1592                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1593                 :             :                        line_offset + 8, 3, 8,
    1594                 :             :                        line_offset + 11, 3, 11);
    1595                 :           4 :       l.consume ();
    1596                 :             :     }
    1597                 :             :     /* Expect token: "true".  */
    1598                 :           4 :     {
    1599                 :           4 :       const token *tok = l.peek ();
    1600                 :           4 :       ASSERT_EQ (tok->id, TOK_TRUE);
    1601                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1602                 :             :                        line_offset + 15, 3, 15,
    1603                 :             :                        line_offset + 18, 3, 18);
    1604                 :           4 :       l.consume ();
    1605                 :             :     }
    1606                 :             :     /* Expect token: "false".  */
    1607                 :           4 :     {
    1608                 :           4 :       const token *tok = l.peek ();
    1609                 :           4 :       ASSERT_EQ (tok->id, TOK_FALSE);
    1610                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1611                 :             :                        line_offset + 21, 3, 21,
    1612                 :             :                        line_offset + 25, 3, 25);
    1613                 :           4 :       l.consume ();
    1614                 :             :     }
    1615                 :             :     /* Expect token: "{".  */
    1616                 :           4 :     {
    1617                 :           4 :       const token *tok = l.peek ();
    1618                 :           4 :       ASSERT_EQ (tok->id, TOK_OPEN_CURLY);
    1619                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1620                 :             :                        line_offset + 28, 3, 28,
    1621                 :             :                        line_offset + 28, 3, 28);
    1622                 :           4 :       l.consume ();
    1623                 :             :     }
    1624                 :             :     /* Expect token: "}".  */
    1625                 :           4 :     {
    1626                 :           4 :       const token *tok = l.peek ();
    1627                 :           4 :       ASSERT_EQ (tok->id, TOK_CLOSE_CURLY);
    1628                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1629                 :             :                        line_offset + 31, 3, 31,
    1630                 :             :                        line_offset + 31, 3, 31);
    1631                 :           4 :       l.consume ();
    1632                 :             :     }
    1633                 :             :     /* Expect token: "\"foo\"".  */
    1634                 :           4 :     {
    1635                 :           4 :       const token *tok = l.peek ();
    1636                 :           4 :       ASSERT_EQ (tok->id, TOK_STRING);
    1637                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1638                 :             :                        line_offset + 34, 3, 34,
    1639                 :             :                        line_offset + 38, 3, 38);
    1640                 :           4 :       l.consume ();
    1641                 :             :     }
    1642                 :             :   }
    1643                 :           4 : }
    1644                 :             : 
    1645                 :             : /* Verify that the JSON lexer complains about single-line comments
    1646                 :             :    when comments are disabled.  */
    1647                 :             : 
    1648                 :             : static void
    1649                 :           4 : test_lexing_unsupported_single_line_comment ()
    1650                 :             : {
    1651                 :           4 :   lexer l (false);
    1652                 :           4 :   const char *str
    1653                 :             :     /*  0         1         2         3         4         .  */
    1654                 :             :     /*  01234567890123456789012345678901234567890123456789.  */
    1655                 :             :     = ("    1066   // Hello world\n");
    1656                 :           4 :   auto err = l.add_utf8 (strlen (str), str);
    1657                 :           4 :   ASSERT_EQ (err, nullptr);
    1658                 :             : 
    1659                 :             :   /* Line 1.  */
    1660                 :           4 :   {
    1661                 :           4 :     const size_t line_offset = 0;
    1662                 :           4 :     const int line_1 = 1;
    1663                 :             : 
    1664                 :             :     /* Expect token: "1066" in columns 4-7.  */
    1665                 :           4 :     {
    1666                 :           4 :       const token *tok = l.peek ();
    1667                 :           4 :       ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
    1668                 :           4 :       ASSERT_EQ (tok->u.integer_number, 1066);
    1669                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1670                 :             :                        line_offset + 4, line_1, 4,
    1671                 :             :                        line_offset + 7, line_1, 7);
    1672                 :           4 :       l.consume ();
    1673                 :             :     }
    1674                 :             : 
    1675                 :             :     /* Expect error.  */
    1676                 :           4 :     {
    1677                 :           4 :       const token *tok = l.peek ();
    1678                 :           4 :       ASSERT_EQ (tok->id, TOK_ERROR);
    1679                 :           4 :       ASSERT_STREQ (tok->u.string, "unexpected character: '/'");
    1680                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1681                 :             :                        line_offset + 11, line_1, 11,
    1682                 :             :                        line_offset + 11, line_1, 11);
    1683                 :           4 :       l.consume ();
    1684                 :             :     }
    1685                 :             :   }
    1686                 :           4 : }
    1687                 :             : 
    1688                 :             : /* Verify that the JSON lexer complains about multiline comments
    1689                 :             :    when comments are disabled.  */
    1690                 :             : 
    1691                 :             : static void
    1692                 :           4 : test_lexing_unsupported_multiline_comment ()
    1693                 :             : {
    1694                 :           4 :   lexer l (false);
    1695                 :           4 :   const char *str
    1696                 :             :     /*  0         1         2         3         4         .  */
    1697                 :             :     /*  01234567890123456789012345678901234567890123456789.  */
    1698                 :             :     = ("    1066   /* Hello world\n"
    1699                 :             :        " continuation of comment\n"
    1700                 :             :        " end of comment */  42\n");
    1701                 :           4 :   auto err = l.add_utf8 (strlen (str), str);
    1702                 :           4 :   ASSERT_EQ (err, nullptr);
    1703                 :             : 
    1704                 :             :   /* Line 1.  */
    1705                 :           4 :   {
    1706                 :           4 :     const size_t line_offset = 0;
    1707                 :           4 :     const int line_1 = 1;
    1708                 :             : 
    1709                 :             :     /* Expect token: "1066" in line 1, columns 4-7.  */
    1710                 :           4 :     {
    1711                 :           4 :       const token *tok = l.peek ();
    1712                 :           4 :       ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
    1713                 :           4 :       ASSERT_EQ (tok->u.integer_number, 1066);
    1714                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1715                 :             :                        line_offset + 4, line_1, 4,
    1716                 :             :                        line_offset + 7, line_1, 7);
    1717                 :           4 :       l.consume ();
    1718                 :             :     }
    1719                 :             : 
    1720                 :             :     /* Expect error.  */
    1721                 :           4 :     {
    1722                 :           4 :       const token *tok = l.peek ();
    1723                 :           4 :       ASSERT_EQ (tok->id, TOK_ERROR);
    1724                 :           4 :       ASSERT_STREQ (tok->u.string, "unexpected character: '/'");
    1725                 :           4 :       ASSERT_RANGE_EQ (tok->range,
    1726                 :             :                        line_offset + 11, line_1, 11,
    1727                 :             :                        line_offset + 11, line_1, 11);
    1728                 :           4 :       l.consume ();
    1729                 :             :     }
    1730                 :             :   }
    1731                 :           4 : }
    1732                 :             : 
    1733                 :             : /* Verify that the JSON lexer handles single-line comments
    1734                 :             :    when comments are enabled.  */
    1735                 :             : 
    1736                 :             : static void
    1737                 :           4 : test_lexing_supported_single_line_comment ()
    1738                 :             : {
    1739                 :           4 :   lexer l (true);
    1740                 :           4 :   const char *str
    1741                 :             :     /*  0         1         2         3         4         .  */
    1742                 :             :     /*  01234567890123456789012345678901234567890123456789.  */
    1743                 :             :     = ("    1066   // Hello world\n"
    1744                 :             :        "     42   // etc\n");
    1745                 :           4 :   auto err = l.add_utf8 (strlen (str), str);
    1746                 :           4 :   ASSERT_EQ (err, nullptr);
    1747                 :             : 
    1748                 :           4 :   const size_t line_1_offset = 0;
    1749                 :           4 :   const size_t line_2_offset = 26;
    1750                 :           4 :   const size_t line_3_offset = line_2_offset + 17;
    1751                 :             : 
    1752                 :             :   /* Expect token: "1066" in line 1, columns 4-7.  */
    1753                 :           4 :   {
    1754                 :           4 :     const int line_1 = 1;
    1755                 :           4 :     const token *tok = l.peek ();
    1756                 :           4 :     ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
    1757                 :           4 :     ASSERT_EQ (tok->u.integer_number, 1066);
    1758                 :           4 :     ASSERT_RANGE_EQ (tok->range,
    1759                 :             :                      line_1_offset + 4, line_1, 4,
    1760                 :             :                      line_1_offset + 7, line_1, 7);
    1761                 :           4 :     l.consume ();
    1762                 :             :   }
    1763                 :             : 
    1764                 :             :   /* Expect token: "42" in line 2, columns 5-6.  */
    1765                 :           4 :   {
    1766                 :           4 :     const int line_2 = 2;
    1767                 :           4 :     const token *tok = l.peek ();
    1768                 :           4 :     ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
    1769                 :           4 :     ASSERT_EQ (tok->u.integer_number, 42);
    1770                 :           4 :     ASSERT_RANGE_EQ (tok->range,
    1771                 :             :                      line_2_offset + 5, line_2, 5,
    1772                 :             :                      line_2_offset + 6, line_2, 6);
    1773                 :           4 :     l.consume ();
    1774                 :             :   }
    1775                 :             : 
    1776                 :             :   /* Expect EOF.  */
    1777                 :           4 :   {
    1778                 :           4 :     const int line_3 = 3;
    1779                 :           4 :     const token *tok = l.peek ();
    1780                 :           4 :     ASSERT_EQ (tok->id, TOK_EOF);
    1781                 :           4 :     ASSERT_RANGE_EQ (tok->range,
    1782                 :             :                      line_3_offset + 0, line_3, 0,
    1783                 :             :                      line_3_offset + 0, line_3, 0);
    1784                 :           4 :     l.consume ();
    1785                 :             :   }
    1786                 :           4 : }
    1787                 :             : 
    1788                 :             : /* Verify that the JSON lexer handles multiline comments
    1789                 :             :    when comments are enabled.  */
    1790                 :             : 
    1791                 :             : static void
    1792                 :           4 : test_lexing_supported_multiline_comment ()
    1793                 :             : {
    1794                 :           4 :   lexer l (true);
    1795                 :           4 :   const char *str
    1796                 :             :     /*  0         1         2         3         4         .  */
    1797                 :             :     /*  01234567890123456789012345678901234567890123456789.  */
    1798                 :             :     = ("    1066   /* Hello world\n"
    1799                 :             :        " continuation of comment\n"
    1800                 :             :        " end of comment */  42\n");
    1801                 :           4 :   auto err = l.add_utf8 (strlen (str), str);
    1802                 :           4 :   ASSERT_EQ (err, nullptr);
    1803                 :             : 
    1804                 :           4 :   const size_t line_1_offset = 0;
    1805                 :           4 :   const size_t line_2_offset = 26;
    1806                 :           4 :   const size_t line_3_offset = line_2_offset + 25;
    1807                 :           4 :   const size_t line_4_offset = line_3_offset + 23;
    1808                 :             : 
    1809                 :             :   /* Expect token: "1066" in line 1, columns 4-7.  */
    1810                 :           4 :   {
    1811                 :           4 :     const int line_1 = 1;
    1812                 :           4 :     const token *tok = l.peek ();
    1813                 :           4 :     ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
    1814                 :           4 :     ASSERT_EQ (tok->u.integer_number, 1066);
    1815                 :           4 :     ASSERT_RANGE_EQ (tok->range,
    1816                 :             :                      line_1_offset + 4, line_1, 4,
    1817                 :             :                      line_1_offset + 7, line_1, 7);
    1818                 :           4 :     l.consume ();
    1819                 :             :   }
    1820                 :             : 
    1821                 :             :   /* Expect token: "42" in line 3, columns 20-21.  */
    1822                 :           4 :   {
    1823                 :           4 :     const int line_3 = 3;
    1824                 :           4 :     const token *tok = l.peek ();
    1825                 :           4 :     ASSERT_EQ (tok->id, TOK_INTEGER_NUMBER);
    1826                 :           4 :     ASSERT_EQ (tok->u.integer_number, 42);
    1827                 :           4 :     ASSERT_RANGE_EQ (tok->range,
    1828                 :             :                      line_3_offset + 20, line_3, 20,
    1829                 :             :                      line_3_offset + 21, line_3, 21);
    1830                 :           4 :     l.consume ();
    1831                 :             :   }
    1832                 :             : 
    1833                 :             :   /* Expect EOF.  */
    1834                 :           4 :   {
    1835                 :           4 :     const int line_4 = 4;
    1836                 :           4 :     const token *tok = l.peek ();
    1837                 :           4 :     ASSERT_EQ (tok->id, TOK_EOF);
    1838                 :           4 :     ASSERT_RANGE_EQ (tok->range,
    1839                 :             :                      line_4_offset + 0, line_4, 0,
    1840                 :             :                      line_4_offset + 0, line_4, 0);
    1841                 :           4 :     l.consume ();
    1842                 :             :   }
    1843                 :           4 : }
    1844                 :             : 
    1845                 :             : /* Helper class for writing JSON parsing testcases.
    1846                 :             :    Attempts to parse a string in ctor, and captures the result (either
    1847                 :             :    a json::value or a json::error), and a location map.  */
    1848                 :             : 
    1849                 :             : struct parser_testcase
    1850                 :             : {
    1851                 :             : public:
    1852                 :          92 :   parser_testcase (const char *utf8_string, bool allow_comments = false)
    1853                 :          92 :   : m_loc_map (),
    1854                 :          92 :     m_result (parse_utf8_string (utf8_string, allow_comments, &m_loc_map))
    1855                 :             :   {
    1856                 :          92 :   }
    1857                 :             : 
    1858                 :          92 :   const json::value *get_value () const { return m_result.m_val.get (); }
    1859                 :          96 :   const json::error *get_error () const { return m_result.m_err.get (); }
    1860                 :             : 
    1861                 :             :   const location_map::range *
    1862                 :         124 :   get_range_for_value (const json::value *jv) const
    1863                 :             :   {
    1864                 :         248 :     return m_loc_map.get_range_for_value (jv);
    1865                 :             :   }
    1866                 :             : 
    1867                 :             : private:
    1868                 :             :   /* Concrete implementation of location_map for use in
    1869                 :             :      JSON parsing selftests.  */
    1870                 :             :   class test_location_map : public location_map
    1871                 :             :   {
    1872                 :             :   public:
    1873                 :         164 :     void record_range_for_value (json::value *jv, const range &r) final override
    1874                 :             :     {
    1875                 :         164 :       m_map.put (jv, r);
    1876                 :         164 :     }
    1877                 :             : 
    1878                 :         124 :     range *get_range_for_value (const json::value *jv) const
    1879                 :             :     {
    1880                 :         124 :       return const_cast<hash_map<const json::value *, range> &> (m_map)
    1881                 :         124 :         .get (jv);
    1882                 :             :     }
    1883                 :             : 
    1884                 :             :   private:
    1885                 :             :     hash_map<const json::value *, range> m_map;
    1886                 :             :   };
    1887                 :             : 
    1888                 :             :   test_location_map m_loc_map;
    1889                 :             :   json::parser_result_t m_result;
    1890                 :             : };
    1891                 :             : 
    1892                 :             : /* Verify that parse_utf8_string works as expected.  */
    1893                 :             : 
    1894                 :             : static void
    1895                 :           4 : test_parse_string ()
    1896                 :             : {
    1897                 :           4 :   const int line_1 = 1;
    1898                 :             : 
    1899                 :           4 :   {
    1900                 :           4 :     parser_testcase tc ("\"foo\"");
    1901                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    1902                 :           4 :     const json::value *jv = tc.get_value ();
    1903                 :           4 :     ASSERT_EQ (jv->get_kind (), JSON_STRING);
    1904                 :           4 :     ASSERT_STREQ (as_a <const json::string *> (jv)->get_string (), "foo");
    1905                 :           4 :     ASSERT_PRINT_EQ (*jv, true, "\"foo\"");
    1906                 :           4 :     auto range = tc.get_range_for_value (jv);
    1907                 :           4 :     ASSERT_TRUE (range);
    1908                 :           4 :     ASSERT_RANGE_EQ (*range,
    1909                 :             :                      0, line_1, 0,
    1910                 :             :                      4, line_1, 4);
    1911                 :           4 :   }
    1912                 :             : 
    1913                 :           4 :   {
    1914                 :           4 :     const char *contains_quotes = "\"before \\\"quoted\\\" after\"";
    1915                 :           4 :     parser_testcase tc (contains_quotes);
    1916                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    1917                 :           4 :     const json::value *jv = tc.get_value ();
    1918                 :           4 :     ASSERT_EQ (jv->get_kind (), JSON_STRING);
    1919                 :           4 :     ASSERT_STREQ (as_a <const json::string *> (jv)->get_string (),
    1920                 :             :                   "before \"quoted\" after");
    1921                 :           4 :     ASSERT_PRINT_EQ (*jv, true, contains_quotes);
    1922                 :           4 :     auto range = tc.get_range_for_value (jv);
    1923                 :           4 :     ASSERT_TRUE (range);
    1924                 :           4 :     ASSERT_RANGE_EQ (*range,
    1925                 :             :                      0, line_1, 0,
    1926                 :             :                      24, line_1, 24);
    1927                 :           4 :   }
    1928                 :             : 
    1929                 :             :   /* Test of non-ASCII input.  This string is the Japanese word "mojibake",
    1930                 :             :      written as C octal-escaped UTF-8.  */
    1931                 :           4 :   const char *mojibake = (/* Opening quote.  */
    1932                 :             :                           "\""
    1933                 :             :                           /* U+6587 CJK UNIFIED IDEOGRAPH-6587
    1934                 :             :                              UTF-8: 0xE6 0x96 0x87
    1935                 :             :                              C octal escaped UTF-8: \346\226\207.  */
    1936                 :             :                           "\346\226\207"
    1937                 :             :                           /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
    1938                 :             :                              UTF-8: 0xE5 0xAD 0x97
    1939                 :             :                              C octal escaped UTF-8: \345\255\227.  */
    1940                 :             :                           "\345\255\227"
    1941                 :             :                           /* U+5316 CJK UNIFIED IDEOGRAPH-5316
    1942                 :             :                              UTF-8: 0xE5 0x8C 0x96
    1943                 :             :                              C octal escaped UTF-8: \345\214\226.  */
    1944                 :             :                           "\345\214\226"
    1945                 :             :                           /* U+3051 HIRAGANA LETTER KE
    1946                 :             :                              UTF-8: 0xE3 0x81 0x91
    1947                 :             :                              C octal escaped UTF-8: \343\201\221.  */
    1948                 :             :                           "\343\201\221"
    1949                 :             :                           /* Closing quote.  */
    1950                 :             :                           "\"");
    1951                 :           4 :   {
    1952                 :           4 :     parser_testcase tc (mojibake);
    1953                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    1954                 :           4 :     const json::value *jv = tc.get_value ();
    1955                 :           4 :     ASSERT_EQ (jv->get_kind (), JSON_STRING);
    1956                 :             :     /* Result of get_string should be UTF-8 encoded, without quotes.  */
    1957                 :           4 :     ASSERT_STREQ (as_a <const json::string *> (jv)->get_string (),
    1958                 :             :                   "\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221");
    1959                 :             :     /* Result of dump should be UTF-8 encoded, with quotes.  */
    1960                 :           4 :     ASSERT_PRINT_EQ (*jv, false, mojibake);
    1961                 :           4 :     auto range = tc.get_range_for_value (jv);
    1962                 :           4 :     ASSERT_TRUE (range);
    1963                 :           4 :     ASSERT_RANGE_EQ (*range,
    1964                 :             :                      0, line_1, 0,
    1965                 :             :                      5, line_1, 5);
    1966                 :           4 :   }
    1967                 :             : 
    1968                 :             :   /* Test of \u-escaped unicode.  This is "mojibake" again, as above.  */
    1969                 :           4 :   {
    1970                 :           4 :     const char *escaped_unicode = "\"\\u6587\\u5b57\\u5316\\u3051\"";
    1971                 :           4 :     parser_testcase tc (escaped_unicode);
    1972                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    1973                 :           4 :     const json::value *jv = tc.get_value ();
    1974                 :           4 :     ASSERT_EQ (jv->get_kind (), JSON_STRING);
    1975                 :             :     /* Result of get_string should be UTF-8 encoded, without quotes.  */
    1976                 :           4 :     ASSERT_STREQ (as_a <const json::string *> (jv)->get_string (),
    1977                 :             :                   "\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221");
    1978                 :             :     /* Result of dump should be UTF-8 encoded, with quotes.  */
    1979                 :           4 :     ASSERT_PRINT_EQ (*jv, false, mojibake);
    1980                 :           4 :     auto range = tc.get_range_for_value (jv);
    1981                 :           4 :     ASSERT_TRUE (range);
    1982                 :           4 :     ASSERT_RANGE_EQ (*range,
    1983                 :             :                      0, line_1, 0,
    1984                 :             :                      25, line_1, 25);
    1985                 :           4 :   }
    1986                 :           4 : }
    1987                 :             : 
    1988                 :             : /* Verify that we can parse various kinds of JSON numbers.  */
    1989                 :             : 
    1990                 :             : static void
    1991                 :           4 : test_parse_number ()
    1992                 :             : {
    1993                 :           4 :   const int line_1 = 1;
    1994                 :             : 
    1995                 :           4 :   {
    1996                 :           4 :     parser_testcase tc ("42");
    1997                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    1998                 :           4 :     const json::value *jv = tc.get_value ();
    1999                 :           4 :     ASSERT_EQ (jv->get_kind (), JSON_INTEGER);
    2000                 :           4 :     ASSERT_EQ (as_a <const json::integer_number *> (jv)->get (), 42.0);
    2001                 :           4 :     ASSERT_PRINT_EQ (*jv, true, "42");
    2002                 :           4 :     auto range = tc.get_range_for_value (jv);
    2003                 :           4 :     ASSERT_TRUE (range);
    2004                 :           4 :     ASSERT_RANGE_EQ (*range,
    2005                 :             :                      0, line_1, 0,
    2006                 :             :                      1, line_1, 1);
    2007                 :           4 :   }
    2008                 :             : 
    2009                 :             :   /* Negative number.  */
    2010                 :           4 :   {
    2011                 :           4 :     parser_testcase tc ("-17");
    2012                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    2013                 :           4 :     const json::value *jv = tc.get_value ();
    2014                 :           4 :     ASSERT_EQ (jv->get_kind (), JSON_INTEGER);
    2015                 :           4 :     ASSERT_EQ (as_a<const json::integer_number *> (jv)->get (), -17.0);
    2016                 :           4 :     ASSERT_PRINT_EQ (*jv, true, "-17");
    2017                 :           4 :     auto range = tc.get_range_for_value (jv);
    2018                 :           4 :     ASSERT_TRUE (range);
    2019                 :           4 :     ASSERT_RANGE_EQ (*range,
    2020                 :             :                      0, line_1, 0,
    2021                 :             :                      2, line_1, 2);
    2022                 :           4 :   }
    2023                 :             : 
    2024                 :             :   /* Decimal.  */
    2025                 :           4 :   {
    2026                 :           4 :     parser_testcase tc ("3.141");
    2027                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    2028                 :           4 :     const json::value *jv = tc.get_value ();
    2029                 :           4 :     ASSERT_EQ (JSON_FLOAT, jv->get_kind ());
    2030                 :           4 :     ASSERT_NEAR (3.141, ((const json::float_number *)jv)->get (), 0.001);
    2031                 :           4 :     auto range = tc.get_range_for_value (jv);
    2032                 :           4 :     ASSERT_TRUE (range);
    2033                 :           4 :     ASSERT_RANGE_EQ (*range,
    2034                 :             :                      0, line_1, 0,
    2035                 :             :                      4, line_1, 4);
    2036                 :           4 :   }
    2037                 :             : 
    2038                 :             :   /* Exponents.  */
    2039                 :           4 :   {
    2040                 :           4 :     {
    2041                 :           4 :       parser_testcase tc ("3.141e+0");
    2042                 :           4 :       ASSERT_EQ (tc.get_error (), nullptr);
    2043                 :           4 :       const json::value *jv = tc.get_value ();
    2044                 :           4 :       ASSERT_EQ (jv->get_kind (), JSON_FLOAT);
    2045                 :           4 :       ASSERT_NEAR (as_a <const json::float_number *> (jv)->get (), 3.141, 0.1);
    2046                 :           4 :       auto range = tc.get_range_for_value (jv);
    2047                 :           4 :       ASSERT_TRUE (range);
    2048                 :           4 :       ASSERT_RANGE_EQ (*range,
    2049                 :             :                        0, line_1, 0,
    2050                 :             :                        7, line_1, 7);
    2051                 :           4 :     }
    2052                 :           4 :     {
    2053                 :           4 :       parser_testcase tc ("42e2");
    2054                 :           4 :       ASSERT_EQ (tc.get_error (), nullptr);
    2055                 :           4 :       const json::value *jv = tc.get_value ();
    2056                 :           4 :       ASSERT_EQ (jv->get_kind (), JSON_INTEGER);
    2057                 :           4 :       ASSERT_EQ (as_a <const json::integer_number *> (jv)->get (), 4200);
    2058                 :           4 :       ASSERT_PRINT_EQ (*jv, true, "4200");
    2059                 :           4 :       auto range = tc.get_range_for_value (jv);
    2060                 :           4 :       ASSERT_TRUE (range);
    2061                 :           4 :       ASSERT_RANGE_EQ (*range,
    2062                 :             :                        0, line_1, 0,
    2063                 :             :                        3, line_1, 3);
    2064                 :           4 :     }
    2065                 :           4 :     {
    2066                 :           4 :       parser_testcase tc ("42e-1");
    2067                 :           4 :       ASSERT_EQ (tc.get_error (), nullptr);
    2068                 :           4 :       const json::value *jv = tc.get_value ();
    2069                 :           4 :       ASSERT_EQ (jv->get_kind (), JSON_FLOAT);
    2070                 :           4 :       ASSERT_NEAR (as_a <const json::float_number *> (jv)->get (), 4.2, 0.1);
    2071                 :           4 :       auto range = tc.get_range_for_value (jv);
    2072                 :           4 :       ASSERT_TRUE (range);
    2073                 :           4 :       ASSERT_RANGE_EQ (*range,
    2074                 :             :                        0, line_1, 0,
    2075                 :             :                        4, line_1, 4);
    2076                 :           4 :     }
    2077                 :             :   }
    2078                 :           4 : }
    2079                 :             : 
    2080                 :             : /* Verify that JSON array parsing works.  */
    2081                 :             : 
    2082                 :             : static void
    2083                 :           4 : test_parse_array ()
    2084                 :             : {
    2085                 :           4 :   const int line_1 = 1;
    2086                 :             : 
    2087                 :           4 :   parser_testcase tc ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]");
    2088                 :           4 :   ASSERT_EQ (tc.get_error (), nullptr);
    2089                 :           4 :   const json::value *jv = tc.get_value ();
    2090                 :           4 :   ASSERT_EQ (jv->get_kind (), JSON_ARRAY);
    2091                 :           4 :   const json::array *arr = as_a <const json::array *> (jv);
    2092                 :           4 :   ASSERT_EQ (arr->length (), 10);
    2093                 :           4 :   auto range = tc.get_range_for_value (jv);
    2094                 :           4 :   ASSERT_TRUE (range);
    2095                 :           4 :   ASSERT_RANGE_EQ (*range,
    2096                 :             :                    0, line_1, 0,
    2097                 :             :                    29, line_1, 29);
    2098                 :          44 :   for (int i = 0; i < 10; i++)
    2099                 :             :     {
    2100                 :          40 :       json::value *element = arr->get (i);
    2101                 :          40 :       ASSERT_EQ (element->get_kind (), JSON_INTEGER);
    2102                 :          40 :       ASSERT_EQ (as_a <json::integer_number *> (element)->get (), i);
    2103                 :          40 :       range = tc.get_range_for_value (element);
    2104                 :          40 :       ASSERT_TRUE (range);
    2105                 :          40 :       const int offset = 1 + (i * 3);
    2106                 :          40 :       ASSERT_RANGE_EQ (*range,
    2107                 :             :                        offset, line_1, offset,
    2108                 :             :                        offset, line_1, offset);
    2109                 :             :     }
    2110                 :           4 :   ASSERT_PRINT_EQ (*jv, false, "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]");
    2111                 :           4 : }
    2112                 :             : 
    2113                 :             : /* Verify that JSON object parsing works.  */
    2114                 :             : 
    2115                 :             : static void
    2116                 :           4 : test_parse_object ()
    2117                 :             : {
    2118                 :           4 :   const int line_1 = 1;
    2119                 :           4 :   std::unique_ptr<error> err;
    2120                 :             :   /*                   0            1            2         3  .  */
    2121                 :             :   /*                   01 2345 678 9012 345 6789 0123456789012.  */
    2122                 :           4 :   parser_testcase tc ("{\"foo\": \"bar\", \"baz\": [42, null]}");
    2123                 :             : 
    2124                 :           4 :   ASSERT_EQ (tc.get_error (), nullptr);
    2125                 :           4 :   const json::value *jv = tc.get_value ();
    2126                 :           4 :   ASSERT_NE (jv, nullptr);
    2127                 :           4 :   ASSERT_EQ (jv->get_kind (), JSON_OBJECT);
    2128                 :           4 :   auto range = tc.get_range_for_value (jv);
    2129                 :           4 :   ASSERT_TRUE (range);
    2130                 :           4 :   ASSERT_RANGE_EQ (*range,
    2131                 :             :                    0, line_1, 0,
    2132                 :             :                    32, line_1, 32);
    2133                 :           4 :   const json::object *jo = static_cast <const json::object *> (jv);
    2134                 :             : 
    2135                 :           4 :   json::value *foo_value = jo->get ("foo");
    2136                 :           4 :   ASSERT_NE (foo_value, nullptr);
    2137                 :           4 :   ASSERT_EQ (foo_value->get_kind (), JSON_STRING);
    2138                 :           4 :   ASSERT_STREQ (as_a <json::string *> (foo_value)->get_string (), "bar");
    2139                 :           4 :   range = tc.get_range_for_value (foo_value);
    2140                 :           4 :   ASSERT_TRUE (range);
    2141                 :           4 :   ASSERT_RANGE_EQ (*range,
    2142                 :             :                    8, line_1, 8,
    2143                 :             :                    12, line_1, 12);
    2144                 :             : 
    2145                 :           4 :   json::value *baz_value = jo->get ("baz");
    2146                 :           4 :   ASSERT_NE (baz_value, nullptr);
    2147                 :           4 :   ASSERT_EQ (baz_value->get_kind (), JSON_ARRAY);
    2148                 :           4 :   range = tc.get_range_for_value (baz_value);
    2149                 :           4 :   ASSERT_TRUE (range);
    2150                 :           4 :   ASSERT_RANGE_EQ (*range,
    2151                 :             :                    22, line_1, 22,
    2152                 :             :                    31, line_1, 31);
    2153                 :             : 
    2154                 :           4 :   json::array *baz_array = as_a <json::array *> (baz_value);
    2155                 :           4 :   ASSERT_EQ (baz_array->length (), 2);
    2156                 :             : 
    2157                 :           4 :   json::value *element0 = baz_array->get (0);
    2158                 :           4 :   ASSERT_EQ (as_a <json::integer_number *> (element0)->get (), 42);
    2159                 :           4 :   range = tc.get_range_for_value (element0);
    2160                 :           4 :   ASSERT_TRUE (range);
    2161                 :           4 :   ASSERT_RANGE_EQ (*range,
    2162                 :             :                    23, line_1, 23,
    2163                 :             :                    24, line_1, 24);
    2164                 :             : 
    2165                 :           4 :   json::value *element1 = baz_array->get (1);
    2166                 :           4 :   ASSERT_EQ (element1->get_kind (), JSON_NULL);
    2167                 :           4 :   range = tc.get_range_for_value (element1);
    2168                 :           4 :   ASSERT_TRUE (range);
    2169                 :           4 :   ASSERT_RANGE_EQ (*range,
    2170                 :             :                    27, line_1, 27,
    2171                 :             :                    30, line_1, 30);
    2172                 :           4 : }
    2173                 :             : 
    2174                 :             : /* Verify that the JSON literals "true", "false" and "null" are parsed
    2175                 :             :    correctly.  */
    2176                 :             : 
    2177                 :             : static void
    2178                 :           4 : test_parse_literals ()
    2179                 :             : {
    2180                 :           4 :   const int line_1 = 1;
    2181                 :           4 :   {
    2182                 :           4 :     parser_testcase tc ("true");
    2183                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    2184                 :           4 :     const json::value *jv = tc.get_value ();
    2185                 :           4 :     ASSERT_NE (jv, nullptr);
    2186                 :           4 :     ASSERT_EQ (jv->get_kind (), JSON_TRUE);
    2187                 :           4 :     ASSERT_PRINT_EQ (*jv, false, "true");
    2188                 :           4 :     auto range = tc.get_range_for_value (jv);
    2189                 :           4 :     ASSERT_TRUE (range);
    2190                 :           4 :     ASSERT_RANGE_EQ (*range,
    2191                 :             :                      0, line_1, 0,
    2192                 :             :                      3, line_1, 3);
    2193                 :           4 :   }
    2194                 :             : 
    2195                 :           4 :   {
    2196                 :           4 :     parser_testcase tc ("false");
    2197                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    2198                 :           4 :     const json::value *jv = tc.get_value ();
    2199                 :           4 :     ASSERT_NE (jv, nullptr);
    2200                 :           4 :     ASSERT_EQ (jv->get_kind (), JSON_FALSE);
    2201                 :           4 :     ASSERT_PRINT_EQ (*jv, false, "false");
    2202                 :           4 :     auto range = tc.get_range_for_value (jv);
    2203                 :           4 :     ASSERT_TRUE (range);
    2204                 :           4 :     ASSERT_RANGE_EQ (*range,
    2205                 :             :                      0, line_1, 0,
    2206                 :             :                      4, line_1, 4);
    2207                 :           4 :   }
    2208                 :             : 
    2209                 :           4 :   {
    2210                 :           4 :     parser_testcase tc ("null");
    2211                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    2212                 :           4 :     const json::value *jv = tc.get_value ();
    2213                 :           4 :     ASSERT_NE (jv, nullptr);
    2214                 :           4 :     ASSERT_EQ (jv->get_kind (), JSON_NULL);
    2215                 :           4 :     ASSERT_PRINT_EQ (*jv, false, "null");
    2216                 :           4 :     auto range = tc.get_range_for_value (jv);
    2217                 :           4 :     ASSERT_TRUE (range);
    2218                 :           4 :     ASSERT_RANGE_EQ (*range,
    2219                 :             :                      0, line_1, 0,
    2220                 :             :                      3, line_1, 3);
    2221                 :           4 :   }
    2222                 :           4 : }
    2223                 :             : 
    2224                 :             : /* Verify that we can parse a simple JSON-RPC request.  */
    2225                 :             : 
    2226                 :             : static void
    2227                 :           4 : test_parse_jsonrpc ()
    2228                 :             : {
    2229                 :           4 :   std::unique_ptr<error> err;
    2230                 :           4 :   const char *request
    2231                 :             :     /*  0           1            2           3          4.  */
    2232                 :             :     /*  01 23456789 012 3456 789 0123456 789 012345678 90.  */
    2233                 :             :     = ("{\"jsonrpc\": \"2.0\", \"method\": \"subtract\",\n"
    2234                 :             :     /*  0           1         2           3          4.  */
    2235                 :             :     /*  0 1234567 8901234567890 1234 56789012345678 90.  */
    2236                 :             :        " \"params\": [42, 23], \"id\": 1}");
    2237                 :           4 :   const int line_1 = 1;
    2238                 :           4 :   const int line_2 = 2;
    2239                 :           4 :   const size_t line_2_offset = 41;
    2240                 :           4 :   parser_testcase tc (request);
    2241                 :           4 :   ASSERT_EQ (tc.get_error (), nullptr);
    2242                 :           4 :   const json::value *jv = tc.get_value ();
    2243                 :           4 :   ASSERT_NE (jv, nullptr);
    2244                 :           4 :   auto range = tc.get_range_for_value (jv);
    2245                 :           4 :   ASSERT_TRUE (range);
    2246                 :           4 :   ASSERT_RANGE_EQ (*range,
    2247                 :             :                    0, line_1, 0,
    2248                 :             :                    line_2_offset + 28, line_2, 28);
    2249                 :           4 : }
    2250                 :             : 
    2251                 :             : /* Verify that we can parse an empty JSON object.  */
    2252                 :             : 
    2253                 :             : static void
    2254                 :           4 : test_parse_empty_object ()
    2255                 :             : {
    2256                 :           4 :   const int line_1 = 1;
    2257                 :           4 :   std::unique_ptr<error> err;
    2258                 :           4 :   parser_testcase tc ("{}");
    2259                 :           4 :   ASSERT_EQ (tc.get_error (), nullptr);
    2260                 :           4 :   const json::value *jv = tc.get_value ();
    2261                 :           4 :   ASSERT_NE (jv, nullptr);
    2262                 :           4 :   ASSERT_EQ (jv->get_kind (), JSON_OBJECT);
    2263                 :           4 :   ASSERT_PRINT_EQ (*jv, true, "{}");
    2264                 :           4 :   auto range = tc.get_range_for_value (jv);
    2265                 :           4 :   ASSERT_TRUE (range);
    2266                 :           4 :   ASSERT_RANGE_EQ (*range,
    2267                 :             :                    0, line_1, 0,
    2268                 :             :                    1, line_1, 1);
    2269                 :           4 : }
    2270                 :             : 
    2271                 :             : /* Verify that comment-parsing can be enabled or disabled.  */
    2272                 :             : 
    2273                 :             : static void
    2274                 :           4 : test_parsing_comments ()
    2275                 :             : {
    2276                 :           4 :   const char *str = ("// foo\n"
    2277                 :             :                      "/*...\n"
    2278                 :             :                      "...*/ 42 // bar\n"
    2279                 :             :                      "/* etc */\n");
    2280                 :             : 
    2281                 :             :   /* Parsing with comment support disabled.  */
    2282                 :           4 :   {
    2283                 :           4 :     parser_testcase tc (str);
    2284                 :           4 :     ASSERT_NE (tc.get_error (), nullptr);
    2285                 :           4 :     ASSERT_STREQ (tc.get_error ()->get_msg (),
    2286                 :             :                   "invalid JSON token: unexpected character: '/'");
    2287                 :           4 :     ASSERT_EQ (tc.get_value (), nullptr);
    2288                 :           4 :   }
    2289                 :             : 
    2290                 :             :   /* Parsing with comment support enabled.  */
    2291                 :           4 :   {
    2292                 :           4 :     parser_testcase tc (str, true);
    2293                 :           4 :     ASSERT_EQ (tc.get_error (), nullptr);
    2294                 :           4 :     const json::value *jv = tc.get_value ();
    2295                 :           4 :     ASSERT_NE (jv, nullptr);
    2296                 :           4 :     ASSERT_EQ (jv->get_kind (), JSON_INTEGER);
    2297                 :           4 :     ASSERT_EQ (((const json::integer_number *)jv)->get (), 42);
    2298                 :           4 :   }
    2299                 :           4 : }
    2300                 :             : 
    2301                 :             : /* Verify that we can parse an empty JSON string.  */
    2302                 :             : 
    2303                 :             : static void
    2304                 :           4 : test_error_empty_string ()
    2305                 :             : {
    2306                 :           4 :   const int line_1 = 1;
    2307                 :           4 :   parser_testcase tc ("");
    2308                 :           4 :   ASSERT_ERR_EQ (tc.get_error (),
    2309                 :             :                  0, line_1, 0,
    2310                 :             :                  0, line_1, 0,
    2311                 :             :                  "expected a JSON value but got EOF");
    2312                 :           4 :   ASSERT_EQ (tc.get_value (), nullptr);
    2313                 :           4 : }
    2314                 :             : 
    2315                 :             : /* Verify that JSON parsing gracefully handles an invalid token.  */
    2316                 :             : 
    2317                 :             : static void
    2318                 :           4 : test_error_bad_token ()
    2319                 :             : {
    2320                 :           4 :   const int line_1 = 1;
    2321                 :           4 :   parser_testcase tc ("  not valid ");
    2322                 :           4 :   ASSERT_ERR_EQ (tc.get_error (),
    2323                 :             :                  2, line_1, 2,
    2324                 :             :                  2, line_1, 2,
    2325                 :             :                  "invalid JSON token: unexpected character: 'n'");
    2326                 :           4 :   ASSERT_EQ (tc.get_value (), nullptr);
    2327                 :           4 : }
    2328                 :             : 
    2329                 :             : /* Verify that JSON parsing gracefully handles a missing comma
    2330                 :             :    within an object.  */
    2331                 :             : 
    2332                 :             : static void
    2333                 :           4 : test_error_object_with_missing_comma ()
    2334                 :             : {
    2335                 :           4 :   const int line_1 = 1;
    2336                 :             :   /*                  0           1           2.  */
    2337                 :             :   /*                  01 2345 6789012 3456 7890.  */
    2338                 :           4 :   const char *json = "{\"foo\" : 42 \"bar\"";
    2339                 :           4 :   parser_testcase tc (json);
    2340                 :           4 :   ASSERT_ERR_EQ (tc.get_error (),
    2341                 :             :                  12, line_1, 12,
    2342                 :             :                  16, line_1, 16,
    2343                 :             :                  "expected ',' or '}'; got string");
    2344                 :           4 :   ASSERT_EQ (tc.get_value (), nullptr);
    2345                 :           4 : }
    2346                 :             : 
    2347                 :             : /* Verify that JSON parsing gracefully handles a missing comma
    2348                 :             :    within an array.  */
    2349                 :             : 
    2350                 :             : static void
    2351                 :           4 : test_error_array_with_missing_comma ()
    2352                 :             : {
    2353                 :           4 :   const int line_1 = 1;
    2354                 :             :   /*                  01234567.  */
    2355                 :           4 :   const char *json = "[0, 1 42]";
    2356                 :           4 :   parser_testcase tc (json);
    2357                 :           4 :   ASSERT_ERR_EQ (tc.get_error (),
    2358                 :             :                  6, line_1, 6,
    2359                 :             :                  7, line_1, 7,
    2360                 :             :                  "expected ',' or ']'; got number");
    2361                 :           4 :   ASSERT_EQ (tc.get_value (), nullptr);
    2362                 :           4 : }
    2363                 :             : 
    2364                 :             : /* Run all of the selftests within this file.  */
    2365                 :             : 
    2366                 :             : void
    2367                 :           4 : json_parser_cc_tests ()
    2368                 :             : {
    2369                 :           4 :   test_lexer ();
    2370                 :           4 :   test_lexing_unsupported_single_line_comment ();
    2371                 :           4 :   test_lexing_unsupported_multiline_comment ();
    2372                 :           4 :   test_lexing_supported_single_line_comment ();
    2373                 :           4 :   test_lexing_supported_multiline_comment ();
    2374                 :           4 :   test_parse_string ();
    2375                 :           4 :   test_parse_number ();
    2376                 :           4 :   test_parse_array ();
    2377                 :           4 :   test_parse_object ();
    2378                 :           4 :   test_parse_literals ();
    2379                 :           4 :   test_parse_jsonrpc ();
    2380                 :           4 :   test_parse_empty_object ();
    2381                 :           4 :   test_parsing_comments ();
    2382                 :           4 :   test_error_empty_string ();
    2383                 :           4 :   test_error_bad_token ();
    2384                 :           4 :   test_error_object_with_missing_comma ();
    2385                 :           4 :   test_error_array_with_missing_comma ();
    2386                 :           4 : }
    2387                 :             : 
    2388                 :             : } // namespace selftest
    2389                 :             : 
    2390                 :             : #endif /* #if CHECKING_P */

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.