LCOV - code coverage report
Current view: top level - gcc/rust/lex - rust-lex.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 92.0 % 1321 1215
Test Date: 2026-06-20 15:32:29 Functions: 94.3 % 53 50
Legend: Lines:     hit not hit

            Line data    Source code
       1              : // Copyright (C) 2020-2026 Free Software Foundation, Inc.
       2              : 
       3              : // This file is part of GCC.
       4              : 
       5              : // GCC is free software; you can redistribute it and/or modify it under
       6              : // the terms of the GNU General Public License as published by the Free
       7              : // Software Foundation; either version 3, or (at your option) any later
       8              : // version.
       9              : 
      10              : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      11              : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
      12              : // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      13              : // for more details.
      14              : 
      15              : // You should have received a copy of the GNU General Public License
      16              : // along with GCC; see the file COPYING3.  If not see
      17              : // <http://www.gnu.org/licenses/>.
      18              : 
      19              : #include "rust-codepoint.h"
      20              : #include "rust-system.h"
      21              : #include "rust-lex.h"
      22              : #include "rust-diagnostics.h"
      23              : #include "rust-linemap.h"
      24              : #include "rust-edition.h"
      25              : #include "safe-ctype.h"
      26              : #include "cpplib.h"
      27              : #include "rust-keyword-values.h"
      28              : 
      29              : namespace Rust {
      30              : // TODO: move to separate compilation unit?
      31              : // overload += for uint32_t to allow 32-bit encoded utf-8 to be added
      32              : std::string &
      33      3275683 : operator+= (std::string &str, Codepoint char32)
      34              : {
      35      3275683 :   if (char32.value < 0x80)
      36              :     {
      37      3274670 :       str += static_cast<char> (char32.value);
      38              :     }
      39         1013 :   else if (char32.value < (0x1F + 1) << (1 * 6))
      40              :     {
      41          674 :       str += static_cast<char> (0xC0 | ((char32.value >> 6) & 0x1F));
      42          674 :       str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F));
      43              :     }
      44          339 :   else if (char32.value < (0x0F + 1) << (2 * 6))
      45              :     {
      46          329 :       str += static_cast<char> (0xE0 | ((char32.value >> 12) & 0x0F));
      47          329 :       str += static_cast<char> (0x80 | ((char32.value >> 6) & 0x3F));
      48          329 :       str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F));
      49              :     }
      50           10 :   else if (char32.value < (0x07 + 1) << (3 * 6))
      51              :     {
      52            6 :       str += static_cast<char> (0xF0 | ((char32.value >> 18) & 0x07));
      53            6 :       str += static_cast<char> (0x80 | ((char32.value >> 12) & 0x3F));
      54            6 :       str += static_cast<char> (0x80 | ((char32.value >> 6) & 0x3F));
      55            6 :       str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F));
      56              :     }
      57              :   else
      58              :     {
      59            4 :       rust_debug ("Invalid unicode codepoint found: '%u' ", char32.value);
      60              :     }
      61      3275683 :   return str;
      62              : }
      63              : 
      64              : std::string
      65      2986365 : Codepoint::as_string ()
      66              : {
      67      2986365 :   std::string str;
      68              : 
      69              :   // str += Codepoint (value);
      70      2986365 :   str += *this;
      71              : 
      72      2986365 :   return str;
      73              : }
      74              : 
      75              : /* Includes all allowable float digits EXCEPT _ and . as that needs lookahead
      76              :  * for handling. */
      77              : bool
      78            0 : is_float_digit (uint32_t number)
      79              : {
      80            0 :   return ISDIGIT (number) || number == 'E' || number == 'e';
      81              : }
      82              : 
      83              : /* Basically ISXDIGIT from safe-ctype but may change if Rust's encoding or
      84              :  * whatever is different */
      85              : bool
      86         2681 : is_x_digit (uint32_t number)
      87              : {
      88         2681 :   return ISXDIGIT (number);
      89              : }
      90              : 
      91              : bool
      92           87 : is_octal_digit (uint32_t number)
      93              : {
      94           87 :   return number >= '0' && number <= '7';
      95              : }
      96              : 
      97              : bool
      98          276 : is_bin_digit (uint32_t number)
      99              : {
     100          276 :   return number == '0' || number == '1';
     101              : }
     102              : 
     103              : bool
     104          142 : check_valid_float_dot_end (uint32_t character)
     105              : {
     106          142 :   return character != '.' && character != '_' && !ISALPHA (character);
     107              : }
     108              : 
     109              : bool
     110         3661 : is_whitespace (uint32_t character)
     111              : {
     112              :   // https://doc.rust-lang.org/reference/whitespace.html
     113         3661 :   switch (character)
     114              :     {
     115              :     case '\t':
     116              :     case '\n':
     117              :     case '\v':
     118              :     case '\f':
     119              :     case '\r':
     120              :     case ' ':
     121              :     case 0x0085: // next line
     122              :     case 0x200e: // left-to-right mark
     123              :     case 0x200f: // right-to-left mark
     124              :     case 0x2028: // line separator
     125              :     case 0x2029: // paragraph separator
     126              :       return true;
     127         3318 :     default:
     128         3318 :       return false;
     129              :     }
     130              : }
     131              : 
     132              : bool
     133         4384 : is_non_decimal_int_literal_separator (uint32_t character)
     134              : {
     135         4384 :   return character == 'x' || character == 'o' || character == 'b';
     136              : }
     137              : 
     138              : bool
     139       321359 : is_identifier_start (uint32_t codepoint)
     140              : {
     141       321359 :   return (cpp_check_xid_property (codepoint) & CPP_XID_START)
     142       321359 :          || codepoint == '_';
     143              : }
     144              : 
     145              : bool
     146      1182099 : is_identifier_continue (uint32_t codepoint)
     147              : {
     148      1182099 :   return cpp_check_xid_property (codepoint) & CPP_XID_CONTINUE;
     149              : }
     150              : 
     151          104 : Lexer::Lexer (const std::string &input, Linemap *linemap)
     152          104 :   : input (RAIIFile::create_error ()), current_line (1), current_column (1),
     153          104 :     line_map (linemap), dump_lex_out ({}),
     154          104 :     raw_input_source (new BufferInputSource (input, 0)),
     155          104 :     input_queue{*raw_input_source}, token_queue (TokenSource (this))
     156          104 : {}
     157              : 
     158         4888 : Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap,
     159         4888 :               tl::optional<std::ofstream &> dump_lex_opt)
     160         4888 :   : input (std::move (file_input)), current_line (1), current_column (1),
     161         4888 :     line_map (linemap), dump_lex_out (dump_lex_opt),
     162         9776 :     raw_input_source (new FileInputSource (input.get_raw ())),
     163         9776 :     input_queue{*raw_input_source}, token_queue (TokenSource (this))
     164              : {
     165              :   // inform line_table that file is being entered and is in line 1
     166         4888 :   if (linemap)
     167         4888 :     line_map->start_file (filename, current_line);
     168         4888 : }
     169              : 
     170         4990 : Lexer::~Lexer ()
     171              : {
     172              :   /* ok apparently stop (which is equivalent of original code in destructor) is
     173              :    * meant to be called after all files have finished parsing, for cleanup. On
     174              :    * the other hand, actual code that it calls to leave a certain line map is
     175              :    * mentioned in GCC docs as being useful for "just leaving an included header"
     176              :    * and stuff like that, so this line mapping functionality may need fixing.
     177              :    * FIXME: find out whether this occurs. */
     178              : 
     179              :   // line_map->stop();
     180         4990 : }
     181              : 
     182              : bool
     183         4833 : Lexer::input_source_is_valid_utf8 ()
     184              : {
     185         4833 :   return raw_input_source->is_valid ();
     186              : }
     187              : 
     188              : location_t
     189      1899038 : Lexer::get_current_location ()
     190              : {
     191      1899038 :   if (line_map)
     192      1898869 :     return linemap_position_for_column (line_table, current_column);
     193              :   else
     194              :     // If we have no linemap, we're lexing something without proper locations
     195              :     return UNDEF_LOCATION;
     196              : }
     197              : 
     198              : Codepoint
     199      4257539 : Lexer::peek_input (int n)
     200              : {
     201      4257539 :   return input_queue.peek (n);
     202              : }
     203              : 
     204              : Codepoint
     205      4159223 : Lexer::peek_input ()
     206              : {
     207      4159223 :   return peek_input (0);
     208              : }
     209              : 
     210              : void
     211      3584366 : Lexer::skip_input (int n)
     212              : {
     213      3584366 :   input_queue.skip (n);
     214      3584366 : }
     215              : 
     216              : void
     217      3574072 : Lexer::skip_input ()
     218              : {
     219      3574072 :   skip_input (0);
     220      3574072 : }
     221              : 
     222              : void
     223       750466 : Lexer::skip_token (int n)
     224              : {
     225              :   // dump tokens if dump-lex option is enabled
     226       750466 :   if (dump_lex_out.has_value ())
     227           55 :     dump_and_skip (n);
     228              :   else
     229       750411 :     token_queue.skip (n);
     230       750466 : }
     231              : 
     232              : void
     233           55 : Lexer::dump_and_skip (int n)
     234              : {
     235           55 :   std::ofstream &out = dump_lex_out.value ();
     236           55 :   bool found_eof = false;
     237           55 :   const_TokenPtr tok;
     238          110 :   for (int i = 0; i < n + 1; i++)
     239              :     {
     240           55 :       if (!found_eof)
     241              :         {
     242           55 :           tok = peek_token ();
     243           55 :           found_eof |= tok->get_id () == Rust::END_OF_FILE;
     244              : 
     245           55 :           location_t loc = tok->get_locus ();
     246              : 
     247           55 :           out << "<id=";
     248           55 :           out << tok->token_id_to_str ();
     249           55 :           out << (tok->should_have_str ()
     250          167 :                     ? (std::string (", text=") + tok->get_str ()
     251          167 :                        + std::string (", typehint=")
     252           93 :                        + std::string (tok->get_type_hint_str ()))
     253           91 :                     : "")
     254          110 :               << " ";
     255          110 :           out << Linemap::location_to_string (loc) << '\n';
     256              :         }
     257              : 
     258           55 :       token_queue.skip (0);
     259              :     }
     260           55 : }
     261              : 
     262              : void
     263            0 : Lexer::replace_current_token (TokenPtr replacement)
     264              : {
     265            0 :   token_queue.replace_current_value (replacement);
     266              : 
     267            0 :   rust_debug ("called 'replace_current_token' - this is deprecated");
     268            0 : }
     269              : 
     270              : /* Determines whether the string passed in is a keyword or not. If it is, it
     271              :  * returns the keyword name.  */
     272              : TokenId
     273       288288 : Lexer::classify_keyword (const std::string &str)
     274              : {
     275       288288 :   auto &keywords = Rust::Values::Keywords::keywords_tokens;
     276       288288 :   auto keyword = keywords.find (str);
     277              : 
     278       288288 :   if (keyword == keywords.end ())
     279              :     return IDENTIFIER;
     280              : 
     281        96371 :   auto id = keyword->second;
     282              : 
     283              :   // We now have the expected token ID of the reserved keyword. However, some
     284              :   // keywords are reserved starting in certain editions. For example, `try` is
     285              :   // only a reserved keyword in editions >=2018. The language might gain new
     286              :   // reserved keywords in the future.
     287              :   //
     288              :   // https://doc.rust-lang.org/reference/keywords.html#reserved-keywords
     289              : 
     290              :   // `try` is not a reserved keyword before 2018
     291        96371 :   if (get_rust_edition () == Edition::E2015 && id == TRY)
     292              :     return IDENTIFIER;
     293              : 
     294              :   return id;
     295              : }
     296              : 
     297              : TokenPtr
     298       758739 : Lexer::build_token ()
     299              : {
     300              :   // loop to go through multiple characters to build a single token
     301      1886107 :   while (true)
     302              :     {
     303      1886107 :       location_t loc = get_current_location ();
     304              : 
     305      1886107 :       current_char = peek_input ();
     306      1886107 :       skip_input ();
     307              : 
     308              :       // detect shebang
     309              :       // Must be the first thing on the first line, starting with #!
     310              :       // But since an attribute can also start with an #! we don't count it as a
     311              :       // shebang line when after any whitespace or comments there is a [. If it
     312              :       // is a shebang line we simple drop the line. Otherwise we don't consume
     313              :       // any characters and fall through to the real tokenizer.
     314        32809 :       if (current_line == 1 && current_column == 1 && current_char == '#'
     315      1918916 :           && peek_input () == '!')
     316              :         {
     317              :           int n = 1;
     318         3289 :           while (true)
     319              :             {
     320         3289 :               Codepoint next_char = peek_input (n);
     321         3289 :               if (is_whitespace (next_char.value))
     322            7 :                 n++;
     323         3282 :               else if ((next_char == '/' && peek_input (n + 1) == '/'
     324            7 :                         && peek_input (n + 2) != '!'
     325            7 :                         && peek_input (n + 2) != '/')
     326         3303 :                        || (next_char == '/' && peek_input (n + 1) == '/'
     327            0 :                            && peek_input (n + 2) == '/'
     328            0 :                            && peek_input (n + 3) == '/'))
     329              :                 {
     330              :                   // two // or four ////
     331              :                   // A single line comment
     332              :                   // (but not an inner or outer doc comment)
     333            7 :                   n += 2;
     334            7 :                   next_char = peek_input (n);
     335          119 :                   while (next_char != '\n' && !next_char.is_eof ())
     336              :                     {
     337          112 :                       n++;
     338          112 :                       next_char = peek_input (n);
     339              :                     }
     340            7 :                   if (next_char == '\n')
     341            7 :                     n++;
     342              :                 }
     343         3275 :               else if (next_char == '/' && peek_input (n + 1) == '*'
     344            0 :                        && peek_input (n + 2) == '*'
     345         3275 :                        && peek_input (n + 3) == '/')
     346              :                 {
     347              :                   /**/
     348            0 :                   n += 4;
     349              :                 }
     350         3275 :               else if (next_char == '/' && peek_input (n + 1) == '*'
     351            0 :                        && peek_input (n + 2) == '*' && peek_input (n + 3) == '*'
     352         3275 :                        && peek_input (n + 4) == '/')
     353              :                 {
     354              :                   /***/
     355            0 :                   n += 5;
     356              :                 }
     357         3275 :               else if ((next_char == '/' && peek_input (n + 1) == '*'
     358            0 :                         && peek_input (n + 2) != '*'
     359            0 :                         && peek_input (n + 2) != '!')
     360         3296 :                        || (next_char == '/' && peek_input (n + 1) == '*'
     361            0 :                            && peek_input (n + 2) == '*'
     362            0 :                            && peek_input (n + 3) == '*'))
     363              :                 {
     364              :                   // one /* or three /***
     365              :                   // Start of a block comment
     366              :                   // (but not an inner or outer doc comment)
     367            0 :                   n += 2;
     368            0 :                   int level = 1;
     369            0 :                   while (level > 0)
     370              :                     {
     371            0 :                       if (peek_input (n).is_eof ())
     372              :                         break;
     373            0 :                       else if (peek_input (n) == '/'
     374            0 :                                && peek_input (n + 1) == '*')
     375              :                         {
     376            0 :                           n += 2;
     377            0 :                           level += 1;
     378              :                         }
     379            0 :                       else if (peek_input (n) == '*'
     380            0 :                                && peek_input (n + 1) == '/')
     381              :                         {
     382            0 :                           n += 2;
     383            0 :                           level -= 1;
     384              :                         }
     385              :                       else
     386            0 :                         n++;
     387              :                     }
     388              :                 }
     389         3275 :               else if (next_char != '[')
     390              :                 {
     391              :                   // definitely shebang, ignore the first line
     392          518 :                   while (current_char != '\n' && !current_char.is_eof ())
     393              :                     {
     394          490 :                       current_char = peek_input ();
     395          490 :                       skip_input ();
     396              :                     }
     397              : 
     398              :                   // newline
     399           28 :                   current_line++;
     400           28 :                   current_column = 1;
     401              :                   // tell line_table that new line starts
     402           28 :                   start_line (current_line, max_column_hint);
     403           28 :                   break;
     404              :                 }
     405              :               else
     406              :                 break; /* Definitely not a shebang line. */
     407              :             }
     408              :         }
     409              : 
     410              :       // return end of file token if end of file
     411      1886107 :       if (current_char.is_eof ())
     412         5288 :         return Token::make (END_OF_FILE, loc);
     413              : 
     414              :       // if not end of file, start tokenising
     415      1880819 :       switch (current_char.value)
     416              :         {
     417              :         /* ignore whitespace characters for tokens but continue updating
     418              :          * location */
     419       166382 :         case '\n':   // newline
     420       166382 :         case 0x0085: // next line
     421       166382 :         case 0x2028: // line separator
     422       166382 :         case 0x2029: // paragraph separator
     423       166382 :           current_line++;
     424       166382 :           current_column = 1;
     425              :           // tell line_table that new line starts
     426       166382 :           start_line (current_line, max_column_hint);
     427       166382 :           continue;
     428          252 :         case '\r': // cr
     429              :           // Ignore, we expect a newline (lf) soon.
     430          252 :           continue;
     431       950746 :         case ' ': // space
     432       950746 :           current_column++;
     433       950746 :           continue;
     434          113 :         case '\t': // horizontal tab
     435              :           // width of a tab is not well-defined, assume 8 spaces
     436          113 :           current_column += 8;
     437          113 :           continue;
     438           28 :         case '\v':   // vertical tab
     439           28 :         case 0x000c: // form feed
     440           28 :         case 0x200e: // left-to-right mark
     441           28 :         case 0x200f: // right-to-left mark
     442              :           // Ignored.
     443           28 :           continue;
     444              : 
     445              :         // punctuation - actual tokens
     446        28197 :         case '=':
     447        28197 :           if (peek_input () == '>')
     448              :             {
     449              :               // match arm arrow
     450         3304 :               skip_input ();
     451         3304 :               current_column += 2;
     452         3304 :               loc += 1;
     453              : 
     454         3304 :               return Token::make (MATCH_ARROW, loc);
     455              :             }
     456        24893 :           else if (peek_input () == '=')
     457              :             {
     458              :               // equality operator
     459          667 :               skip_input ();
     460          667 :               current_column += 2;
     461          667 :               loc += 1;
     462              : 
     463          667 :               return Token::make (EQUAL_EQUAL, loc);
     464              :             }
     465              :           else
     466              :             {
     467              :               // assignment operator
     468        24226 :               current_column++;
     469        24226 :               return Token::make (EQUAL, loc);
     470              :             }
     471        47725 :         case '(':
     472        47725 :           current_column++;
     473        47725 :           return Token::make (LEFT_PAREN, loc);
     474        12102 :         case '-':
     475        12102 :           if (peek_input () == '>')
     476              :             {
     477              :               // return type specifier
     478        10658 :               skip_input ();
     479        10658 :               current_column += 2;
     480        10658 :               loc += 1;
     481              : 
     482        10658 :               return Token::make (RETURN_TYPE, loc);
     483              :             }
     484         1444 :           else if (peek_input () == '=')
     485              :             {
     486              :               // minus-assign
     487          105 :               skip_input ();
     488          105 :               current_column += 2;
     489          105 :               loc += 1;
     490              : 
     491          105 :               return Token::make (MINUS_EQ, loc);
     492              :             }
     493              :           else
     494              :             {
     495              :               // minus
     496         1339 :               current_column++;
     497         1339 :               return Token::make (MINUS, loc);
     498              :             }
     499         1750 :         case '+':
     500         1750 :           if (peek_input () == '=')
     501              :             {
     502              :               // add-assign
     503          155 :               skip_input ();
     504          155 :               current_column += 2;
     505          155 :               loc += 1;
     506              : 
     507          155 :               return Token::make (PLUS_EQ, loc);
     508              :             }
     509              :           else
     510              :             {
     511              :               // add
     512         1595 :               current_column++;
     513         1595 :               return Token::make (PLUS, loc);
     514              :             }
     515        47706 :         case ')':
     516        47706 :           current_column++;
     517        47706 :           return Token::make (RIGHT_PAREN, loc);
     518        30907 :         case ';':
     519        30907 :           current_column++;
     520        30907 :           return Token::make (SEMICOLON, loc);
     521        10845 :         case '*':
     522        10845 :           if (peek_input () == '=')
     523              :             {
     524              :               // multiplication-assign
     525            7 :               skip_input ();
     526            7 :               current_column += 2;
     527            7 :               loc += 1;
     528              : 
     529            7 :               return Token::make (ASTERISK_EQ, loc);
     530              :             }
     531              :           else
     532              :             {
     533              :               // multiplication
     534        10838 :               current_column++;
     535        10838 :               return Token::make (ASTERISK, loc);
     536              :             }
     537        22893 :         case ',':
     538        22893 :           current_column++;
     539        22893 :           return Token::make (COMMA, loc);
     540        17990 :         case '/':
     541        17990 :           if (peek_input () == '=')
     542              :             {
     543              :               // division-assign
     544            7 :               skip_input ();
     545            7 :               current_column += 2;
     546            7 :               loc += 1;
     547              : 
     548            7 :               return Token::make (DIV_EQ, loc);
     549              :             }
     550        17983 :           else if ((peek_input () == '/' && peek_input (1) != '!'
     551        16868 :                     && peek_input (1) != '/')
     552        25983 :                    || (peek_input () == '/' && peek_input (1) == '/'
     553         7900 :                        && peek_input (2) == '/'))
     554              :             {
     555              :               // two // or four ////
     556              :               // single line comment
     557              :               // (but not an inner or outer doc comment)
     558         8983 :               skip_input ();
     559         8983 :               current_column += 2;
     560         8983 :               current_char = peek_input ();
     561              : 
     562              :               // basically ignore until line finishes
     563       440128 :               while (current_char != '\n' && !current_char.is_eof ())
     564              :                 {
     565       422162 :                   skip_input ();
     566       422162 :                   current_column++; // not used
     567       422162 :                   current_char = peek_input ();
     568              :                 }
     569         8983 :               continue;
     570              :             }
     571         9000 :           else if (peek_input () == '/'
     572         9000 :                    && (peek_input (1) == '!' || peek_input (1) == '/'))
     573              :             {
     574              :               /* single line doc comment, inner or outer.  */
     575         7985 :               bool is_inner = peek_input (1) == '!';
     576         7985 :               skip_input (1);
     577         7985 :               current_column += 3;
     578              : 
     579         7985 :               std::string str;
     580         7985 :               str.reserve (32);
     581         7985 :               current_char = peek_input ();
     582       192537 :               while (current_char != '\n')
     583              :                 {
     584       176616 :                   skip_input ();
     585       176616 :                   if (current_char == '\r')
     586              :                     {
     587           51 :                       Codepoint next_char = peek_input ();
     588           51 :                       if (next_char == '\n')
     589              :                         {
     590           49 :                           current_char = '\n';
     591           49 :                           break;
     592              :                         }
     593            2 :                       rust_error_at (
     594              :                         loc, "Isolated CR %<\\r%> not allowed in doc comment");
     595            2 :                       current_char = next_char;
     596            2 :                       continue;
     597            2 :                     }
     598       176565 :                   if (current_char.is_eof ())
     599              :                     {
     600            0 :                       rust_error_at (
     601              :                         loc, ErrorCode::E0758,
     602              :                         "unexpected EOF while looking for end of comment");
     603            0 :                       break;
     604              :                     }
     605       176565 :                   str += current_char;
     606       176565 :                   current_char = peek_input ();
     607              :                 }
     608         7985 :               skip_input ();
     609         7985 :               current_line++;
     610         7985 :               current_column = 1;
     611              :               // tell line_table that new line starts
     612         7985 :               start_line (current_line, max_column_hint);
     613              : 
     614         7985 :               str.shrink_to_fit ();
     615              : 
     616         7985 :               loc += str.size () - 1;
     617         7985 :               if (is_inner)
     618          100 :                 return Token::make_inner_doc_comment (loc, std::move (str));
     619              :               else
     620         7885 :                 return Token::make_outer_doc_comment (loc, std::move (str));
     621         7985 :             }
     622         1015 :           else if (peek_input () == '*' && peek_input (1) == '*'
     623         1100 :                    && peek_input (2) == '/')
     624              :             {
     625              :               /**/
     626           14 :               skip_input (2);
     627           14 :               current_column += 4;
     628           14 :               continue;
     629              :             }
     630         1001 :           else if (peek_input () == '*' && peek_input (1) == '*'
     631         1072 :                    && peek_input (2) == '*' && peek_input (3) == '/')
     632              :             {
     633              :               /***/
     634           14 :               skip_input (3);
     635           14 :               current_column += 5;
     636           14 :               continue;
     637              :             }
     638          987 :           else if ((peek_input () == '*' && peek_input (1) != '!'
     639          878 :                     && peek_input (1) != '*')
     640         1117 :                    || (peek_input () == '*' && peek_input (1) == '*'
     641           57 :                        && peek_input (2) == '*'))
     642              :             {
     643              :               // one /* or three /***
     644              :               // block comment
     645              :               // (but not an inner or outer doc comment)
     646          835 :               skip_input ();
     647          835 :               current_column += 2;
     648              : 
     649          835 :               int level = 1;
     650        37830 :               while (level > 0)
     651              :                 {
     652        36996 :                   current_char = peek_input ();
     653              : 
     654        36996 :                   if (current_char.is_eof ())
     655              :                     {
     656            1 :                       rust_error_at (
     657              :                         loc, ErrorCode::E0758,
     658              :                         "unexpected EOF while looking for end of comment");
     659            1 :                       break;
     660              :                     }
     661              : 
     662              :                   // if /* found
     663        36995 :                   if (current_char == '/' && peek_input (1) == '*')
     664              :                     {
     665              :                       // skip /* characters
     666           49 :                       skip_input (1);
     667              : 
     668           49 :                       current_column += 2;
     669              : 
     670           49 :                       level += 1;
     671           49 :                       continue;
     672              :                     }
     673              : 
     674              :                   // ignore until */ is found
     675        36946 :                   if (current_char == '*' && peek_input (1) == '/')
     676              :                     {
     677              :                       // skip */ characters
     678          883 :                       skip_input (1);
     679              : 
     680          883 :                       current_column += 2;
     681              : 
     682          883 :                       level -= 1;
     683          883 :                       continue;
     684              :                     }
     685              : 
     686        36063 :                   if (current_char == '\n')
     687              :                     {
     688          414 :                       skip_input ();
     689          414 :                       current_line++;
     690          414 :                       current_column = 1;
     691              :                       // tell line_table that new line starts
     692          414 :                       start_line (current_line, max_column_hint);
     693          414 :                       continue;
     694              :                     }
     695              : 
     696        35649 :                   skip_input ();
     697        35649 :                   current_column++;
     698              :                 }
     699              : 
     700              :               // refresh new token
     701          835 :               continue;
     702          835 :             }
     703          152 :           else if (peek_input () == '*'
     704          152 :                    && (peek_input (1) == '!' || peek_input (1) == '*'))
     705              :             {
     706              :               // block doc comment, inner /*! or outer /**
     707          116 :               bool is_inner = peek_input (1) == '!';
     708          116 :               skip_input (1);
     709          116 :               current_column += 3;
     710              : 
     711          116 :               std::string str;
     712          116 :               str.reserve (96);
     713              : 
     714          116 :               int level = 1;
     715          116 :               while (level > 0)
     716              :                 {
     717         2685 :                   current_char = peek_input ();
     718              : 
     719         2685 :                   if (current_char.is_eof ())
     720              :                     {
     721            0 :                       rust_error_at (
     722              :                         loc, ErrorCode::E0758,
     723              :                         "unexpected EOF while looking for end of comment");
     724            0 :                       break;
     725              :                     }
     726              : 
     727              :                   // if /* found
     728         2685 :                   if (current_char == '/' && peek_input (1) == '*')
     729              :                     {
     730              :                       // skip /* characters
     731           84 :                       skip_input (1);
     732           84 :                       current_column += 2;
     733              : 
     734           84 :                       level += 1;
     735           84 :                       str += "/*";
     736           84 :                       continue;
     737              :                     }
     738              : 
     739              :                   // ignore until */ is found
     740         2601 :                   if (current_char == '*' && peek_input (1) == '/')
     741              :                     {
     742              :                       // skip */ characters
     743          200 :                       skip_input (1);
     744          200 :                       current_column += 2;
     745              : 
     746          200 :                       level -= 1;
     747          200 :                       if (level > 0)
     748           84 :                         str += "*/";
     749          200 :                       continue;
     750              :                     }
     751              : 
     752         2401 :                   if (current_char == '\r' && peek_input (1) != '\n')
     753            2 :                     rust_error_at (
     754              :                       loc, "Isolated CR %<\\r%> not allowed in doc comment");
     755              : 
     756         2401 :                   if (current_char == '\n')
     757              :                     {
     758            0 :                       skip_input ();
     759            0 :                       current_line++;
     760            0 :                       current_column = 1;
     761              :                       // tell line_table that new line starts
     762            0 :                       start_line (current_line, max_column_hint);
     763            0 :                       str += '\n';
     764            0 :                       continue;
     765              :                     }
     766              : 
     767         2401 :                   str += current_char;
     768         2401 :                   skip_input ();
     769         2401 :                   current_column++;
     770              :                 }
     771              : 
     772          116 :               str.shrink_to_fit ();
     773              : 
     774          116 :               loc += str.size () - 1;
     775          116 :               if (is_inner)
     776           73 :                 return Token::make_inner_doc_comment (loc, std::move (str));
     777              :               else
     778           43 :                 return Token::make_outer_doc_comment (loc, std::move (str));
     779          116 :             }
     780              :           else
     781              :             {
     782              :               // division
     783           36 :               current_column++;
     784           36 :               return Token::make (DIV, loc);
     785              :             }
     786           43 :         case '%':
     787           43 :           if (peek_input () == '=')
     788              :             {
     789              :               // modulo-assign
     790            7 :               skip_input ();
     791            7 :               current_column += 2;
     792            7 :               loc += 1;
     793              : 
     794            7 :               return Token::make (PERCENT_EQ, loc);
     795              :             }
     796              :           else
     797              :             {
     798              :               // modulo
     799           36 :               current_column++;
     800           36 :               return Token::make (PERCENT, loc);
     801              :             }
     802          147 :         case '^':
     803          147 :           if (peek_input () == '=')
     804              :             {
     805              :               // xor-assign?
     806           84 :               skip_input ();
     807           84 :               current_column += 2;
     808           84 :               loc += 1;
     809              : 
     810           84 :               return Token::make (CARET_EQ, loc);
     811              :             }
     812              :           else
     813              :             {
     814              :               // xor?
     815           63 :               current_column++;
     816           63 :               return Token::make (CARET, loc);
     817              :             }
     818         8717 :         case '<':
     819         8717 :           if (peek_input () == '<')
     820              :             {
     821           66 :               if (peek_input (1) == '=')
     822              :                 {
     823              :                   // left-shift assign
     824            7 :                   skip_input (1);
     825            7 :                   current_column += 3;
     826            7 :                   loc += 2;
     827              : 
     828            7 :                   return Token::make (LEFT_SHIFT_EQ, loc);
     829              :                 }
     830              :               else
     831              :                 {
     832              :                   // left-shift
     833           59 :                   skip_input ();
     834           59 :                   current_column += 2;
     835           59 :                   loc += 1;
     836              : 
     837           59 :                   return Token::make (LEFT_SHIFT, loc);
     838              :                 }
     839              :             }
     840         8651 :           else if (peek_input () == '=')
     841              :             {
     842              :               // smaller than or equal to
     843          224 :               skip_input ();
     844          224 :               current_column += 2;
     845          224 :               loc += 1;
     846              : 
     847          224 :               return Token::make (LESS_OR_EQUAL, loc);
     848              :             }
     849              :           else
     850              :             {
     851              :               // smaller than
     852         8427 :               current_column++;
     853         8427 :               return Token::make (LEFT_ANGLE, loc);
     854              :             }
     855         8570 :           break;
     856         8570 :         case '>':
     857         8570 :           if (peek_input () == '>')
     858              :             {
     859          129 :               if (peek_input (1) == '=')
     860              :                 {
     861              :                   // right-shift-assign
     862            7 :                   skip_input (1);
     863            7 :                   current_column += 3;
     864            7 :                   loc += 2;
     865              : 
     866            7 :                   return Token::make (RIGHT_SHIFT_EQ, loc);
     867              :                 }
     868              :               else
     869              :                 {
     870              :                   // right-shift
     871          122 :                   skip_input ();
     872          122 :                   current_column += 2;
     873          122 :                   loc += 1;
     874              : 
     875          122 :                   return Token::make (RIGHT_SHIFT, loc);
     876              :                 }
     877              :             }
     878         8441 :           else if (peek_input () == '=')
     879              :             {
     880              :               // larger than or equal to
     881          209 :               skip_input ();
     882          209 :               current_column += 2;
     883          209 :               loc += 1;
     884              : 
     885          209 :               return Token::make (GREATER_OR_EQUAL, loc);
     886              :             }
     887              :           else
     888              :             {
     889              :               // larger than
     890         8232 :               current_column++;
     891         8232 :               return Token::make (RIGHT_ANGLE, loc);
     892              :             }
     893        29008 :         case ':':
     894        29008 :           if (peek_input () == ':')
     895              :             {
     896              :               // scope resolution ::
     897         9846 :               skip_input ();
     898         9846 :               current_column += 2;
     899         9846 :               loc += 1;
     900              : 
     901         9846 :               return Token::make (SCOPE_RESOLUTION, loc);
     902              :             }
     903              :           else
     904              :             {
     905              :               // single colon :
     906        19162 :               current_column++;
     907        19162 :               return Token::make (COLON, loc);
     908              :             }
     909        16292 :         case '!':
     910              :           // no special handling for macros in lexer?
     911        16292 :           if (peek_input () == '=')
     912              :             {
     913              :               // not equal boolean operator
     914          941 :               skip_input ();
     915          941 :               current_column += 2;
     916          941 :               loc += 1;
     917              : 
     918          941 :               return Token::make (NOT_EQUAL, loc);
     919              :             }
     920              :           else
     921              :             {
     922              :               // not equal unary operator
     923        15351 :               current_column++;
     924              : 
     925        15351 :               return Token::make (EXCLAM, loc);
     926              :             }
     927          371 :         case '?':
     928          371 :           current_column++;
     929          371 :           return Token::make (QUESTION_MARK, loc);
     930        20749 :         case '#':
     931        20749 :           current_column++;
     932        20749 :           return Token::make (HASH, loc);
     933        22826 :         case '[':
     934        22826 :           current_column++;
     935        22826 :           return Token::make (LEFT_SQUARE, loc);
     936        22819 :         case ']':
     937        22819 :           current_column++;
     938        22819 :           return Token::make (RIGHT_SQUARE, loc);
     939        35986 :         case '{':
     940        35986 :           current_column++;
     941        35986 :           return Token::make (LEFT_CURLY, loc);
     942        35936 :         case '}':
     943        35936 :           current_column++;
     944        35936 :           return Token::make (RIGHT_CURLY, loc);
     945           19 :         case '@':
     946           19 :           current_column++;
     947           19 :           return Token::make (PATTERN_BIND, loc);
     948         3619 :         case '$':
     949         3619 :           current_column++;
     950         3619 :           return Token::make (DOLLAR_SIGN, loc);
     951            0 :         case '~':
     952            0 :           current_column++;
     953            0 :           return Token::make (TILDE, loc);
     954            0 :         case '\\':
     955            0 :           current_column++;
     956            0 :           return Token::make (BACKSLASH, loc);
     957            0 :         case '`':
     958            0 :           current_column++;
     959            0 :           return Token::make (BACKTICK, loc);
     960          475 :         case '|':
     961          475 :           if (peek_input () == '=')
     962              :             {
     963              :               // bitwise or-assign?
     964           28 :               skip_input ();
     965           28 :               current_column += 2;
     966           28 :               loc += 1;
     967              : 
     968           28 :               return Token::make (PIPE_EQ, loc);
     969              :             }
     970          447 :           else if (peek_input () == '|')
     971              :             {
     972              :               // logical or
     973           69 :               skip_input ();
     974           69 :               current_column += 2;
     975           69 :               loc += 1;
     976              : 
     977           69 :               return Token::make (OR, loc);
     978              :             }
     979              :           else
     980              :             {
     981              :               // bitwise or
     982          378 :               current_column++;
     983              : 
     984          378 :               return Token::make (PIPE, loc);
     985              :             }
     986         9962 :         case '&':
     987         9962 :           if (peek_input () == '=')
     988              :             {
     989              :               // bitwise and-assign?
     990           21 :               skip_input ();
     991           21 :               current_column += 2;
     992           21 :               loc += 1;
     993              : 
     994           21 :               return Token::make (AMP_EQ, loc);
     995              :             }
     996         9941 :           else if (peek_input () == '&')
     997              :             {
     998              :               // logical and
     999          306 :               skip_input ();
    1000          306 :               current_column += 2;
    1001          306 :               loc += 1;
    1002              : 
    1003          306 :               return Token::make (LOGICAL_AND, loc);
    1004              :             }
    1005              :           else
    1006              :             {
    1007              :               // bitwise and/reference
    1008         9635 :               current_column++;
    1009              : 
    1010         9635 :               return Token::make (AMP, loc);
    1011              :             }
    1012         6715 :         case '.':
    1013         6715 :           if (peek_input () == '.')
    1014              :             {
    1015         1170 :               if (peek_input (1) == '.')
    1016              :                 {
    1017              :                   // ellipsis
    1018          840 :                   skip_input (1);
    1019          840 :                   current_column += 3;
    1020          840 :                   loc += 2;
    1021              : 
    1022          840 :                   return Token::make (ELLIPSIS, loc);
    1023              :                 }
    1024          330 :               else if (peek_input (1) == '=')
    1025              :                 {
    1026              :                   // ..=
    1027           38 :                   skip_input (1);
    1028           38 :                   current_column += 3;
    1029           38 :                   loc += 2;
    1030              : 
    1031           38 :                   return Token::make (DOT_DOT_EQ, loc);
    1032              :                 }
    1033              :               else
    1034              :                 {
    1035              :                   // ..
    1036          292 :                   skip_input ();
    1037          292 :                   current_column += 2;
    1038          292 :                   loc += 1;
    1039              : 
    1040          292 :                   return Token::make (DOT_DOT, loc);
    1041              :                 }
    1042              :             }
    1043              :           else /*if (!ISDIGIT (peek_input ()))*/
    1044              :             {
    1045              :               // single dot .
    1046              :               // Only if followed by a non-number - otherwise is float
    1047              :               // nope, float cannot start with '.'.
    1048         5545 :               current_column++;
    1049         5545 :               return Token::make (DOT, loc);
    1050              :             }
    1051      1117521 :         }
    1052              :       // TODO: special handling of _ in the lexer? instead of being identifier
    1053              : 
    1054              :       // byte character, byte string and raw byte string literals
    1055       320929 :       if (current_char == 'b')
    1056              :         {
    1057        10743 :           if (peek_input () == '\'')
    1058           78 :             return parse_byte_char (loc);
    1059        10665 :           else if (peek_input () == '"')
    1060           64 :             return parse_byte_string (loc);
    1061        10601 :           else if (peek_input () == 'r'
    1062        10601 :                    && (peek_input (1) == '#' || peek_input (1) == '"'))
    1063           32 :             return parse_raw_byte_string (loc);
    1064              :         }
    1065              : 
    1066              :       // raw identifiers and raw strings
    1067       320755 :       if (current_char == 'r')
    1068              :         {
    1069         4042 :           Codepoint peek = peek_input ();
    1070         4042 :           Codepoint peek1 = peek_input (1);
    1071              : 
    1072              :           // TODO (tamaron) parse Unicode ident
    1073         4042 :           if (peek == '#' && is_identifier_start (peek1.value))
    1074              :             {
    1075           81 :               TokenPtr raw_ident_ptr = parse_raw_identifier (loc);
    1076           81 :               if (raw_ident_ptr != nullptr)
    1077           80 :                 return raw_ident_ptr;
    1078              :               else
    1079            1 :                 continue; /* input got parsed, it just wasn't valid. An error
    1080              :                              was produced. */
    1081           81 :             }
    1082              :           else
    1083              :             {
    1084         3961 :               TokenPtr maybe_raw_string_ptr = maybe_parse_raw_string (loc);
    1085         3961 :               if (maybe_raw_string_ptr != nullptr)
    1086           25 :                 return maybe_raw_string_ptr;
    1087         3961 :             }
    1088              :         }
    1089              : 
    1090              :       // find identifiers and keywords.
    1091       320649 :       if (is_identifier_start (current_char.value))
    1092       289604 :         return parse_identifier_or_keyword (loc);
    1093              : 
    1094              :       // int and float literals
    1095        31045 :       if (ISDIGIT (current_char.value))
    1096              :         { //  _ not allowed as first char
    1097        17452 :           if (current_char == '0'
    1098        17452 :               && is_non_decimal_int_literal_separator (peek_input ().value))
    1099              :             {
    1100              :               // handle binary, octal, hex literals
    1101          335 :               TokenPtr non_dec_int_lit_ptr
    1102          335 :                 = parse_non_decimal_int_literals (loc);
    1103          335 :               if (non_dec_int_lit_ptr != nullptr)
    1104          335 :                 return non_dec_int_lit_ptr;
    1105          335 :             }
    1106              :           else
    1107              :             {
    1108              :               // handle decimals (integer or float)
    1109        17117 :               TokenPtr decimal_or_float_ptr = parse_decimal_int_or_float (loc);
    1110        17117 :               if (decimal_or_float_ptr != nullptr)
    1111        17117 :                 return decimal_or_float_ptr;
    1112        17117 :             }
    1113              :         }
    1114              : 
    1115              :       // string literals
    1116        13593 :       if (current_char == '"')
    1117        12745 :         return parse_string (loc);
    1118              : 
    1119              :       // char literals and lifetime names
    1120          848 :       if (current_char == '\'')
    1121              :         {
    1122          848 :           TokenPtr char_or_lifetime_ptr = parse_char_or_lifetime (loc);
    1123          848 :           if (char_or_lifetime_ptr != nullptr)
    1124          848 :             return char_or_lifetime_ptr;
    1125          848 :         }
    1126              : 
    1127              :       // DEBUG: check for specific character problems:
    1128            0 :       if (current_char == '0')
    1129            0 :         rust_debug ("'0' uncaught before unexpected character");
    1130            0 :       else if (current_char == ']')
    1131            0 :         rust_debug ("']' uncaught before unexpected character");
    1132              :       else if (current_char == 0x5d)
    1133              :         rust_debug ("whatever 0x5d is (not '0' or ']') uncaught before "
    1134              :                     "unexpected character");
    1135              : 
    1136              :       // didn't match anything so error
    1137            0 :       rust_error_at (loc, "unexpected character %<%x%>", current_char.value);
    1138            0 :       current_column++;
    1139              :     }
    1140              : }
    1141              : 
    1142              : // Parses in a suffix
    1143              : std::pair<std::string, int>
    1144        17444 : Lexer::parse_in_suffix ()
    1145              : {
    1146        17444 :   std::string suffix;
    1147              : 
    1148        17444 :   int additional_length_offset = 0;
    1149              : 
    1150              :   // get suffix
    1151        38430 :   while (ISALPHA (current_char.value) || ISDIGIT (current_char.value)
    1152        40525 :          || current_char == '_')
    1153              :     {
    1154         5636 :       additional_length_offset++;
    1155              : 
    1156         5636 :       suffix += current_char;
    1157         5636 :       skip_input ();
    1158         5636 :       current_char = peek_input ();
    1159              :     }
    1160              : 
    1161        17444 :   return std::make_pair (std::move (suffix), additional_length_offset);
    1162        17444 : }
    1163              : 
    1164              : // Parses in the exponent part (if any) of a float literal.
    1165              : std::pair<std::string, int>
    1166          349 : Lexer::parse_in_exponent_part ()
    1167              : {
    1168          349 :   int additional_length_offset = 0;
    1169          349 :   std::string str;
    1170          349 :   if (current_char == 'E' || current_char == 'e')
    1171              :     {
    1172              :       // add exponent to string as strtod works with it
    1173            8 :       str += current_char;
    1174            8 :       skip_input ();
    1175            8 :       current_char = peek_input ();
    1176              : 
    1177            8 :       additional_length_offset++;
    1178              : 
    1179              :       // special - and + handling
    1180            8 :       if (current_char == '-' || current_char == '+')
    1181              :         {
    1182            8 :           str += current_char;
    1183              : 
    1184            8 :           skip_input ();
    1185            8 :           current_char = peek_input ();
    1186              : 
    1187            8 :           additional_length_offset++;
    1188              :         }
    1189              : 
    1190              :       // parse another decimal number for exponent
    1191            8 :       auto str_length = parse_in_decimal ();
    1192            8 :       str += std::get<0> (str_length);
    1193            8 :       additional_length_offset += std::get<1> (str_length);
    1194            8 :     }
    1195          698 :   return std::make_pair (str, additional_length_offset);
    1196          349 : }
    1197              : 
    1198              : // Parses a decimal integer.
    1199              : std::tuple<std::string, int, bool>
    1200        17474 : Lexer::parse_in_decimal ()
    1201              : {
    1202              :   /* A pure decimal contains only digits.  */
    1203        17474 :   bool pure_decimal = true;
    1204        17474 :   int additional_length_offset = 0;
    1205        17474 :   std::string str;
    1206        24932 :   while (ISDIGIT (current_char.value) || current_char.value == '_')
    1207              :     {
    1208         7458 :       if (current_char == '_')
    1209              :         {
    1210           14 :           pure_decimal = false;
    1211              :         }
    1212         7458 :       additional_length_offset++;
    1213              : 
    1214         7458 :       str += current_char;
    1215         7458 :       skip_input ();
    1216         7458 :       current_char = peek_input ();
    1217              :     }
    1218        34948 :   return std::make_tuple (str, additional_length_offset, pure_decimal);
    1219        17474 : }
    1220              : 
    1221              : /* Parses escapes (and string continues) in "byte" strings and characters. Does
    1222              :  * not support unicode. */
    1223              : std::tuple<char, int, bool>
    1224           61 : Lexer::parse_escape (char opening_char)
    1225              : {
    1226           61 :   int additional_length_offset = 0;
    1227           61 :   char output_char = 0;
    1228              : 
    1229              :   // skip to actual letter
    1230           61 :   skip_input ();
    1231           61 :   current_char = peek_input ();
    1232           61 :   additional_length_offset++;
    1233              : 
    1234           61 :   switch (current_char.value)
    1235              :     {
    1236           17 :     case 'x':
    1237           17 :       {
    1238           17 :         auto hex_escape_pair = parse_partial_hex_escape ();
    1239           17 :         long hexLong = hex_escape_pair.first;
    1240           17 :         additional_length_offset += hex_escape_pair.second;
    1241              : 
    1242           17 :         if (hexLong > 255 || hexLong < 0)
    1243            0 :           rust_error_at (
    1244              :             get_current_location (),
    1245              :             "byte \\x escape %<\\x%x%> out of range - allows up to %<\\xFF%>",
    1246              :             static_cast<unsigned int> (hexLong));
    1247              :         /* TODO: restore capital for escape output - gcc pretty-printer doesn't
    1248              :          * support %X directly */
    1249           17 :         char hexChar = static_cast<char> (hexLong);
    1250              : 
    1251           17 :         output_char = hexChar;
    1252              :       }
    1253           17 :       break;
    1254              :     case 'n':
    1255              :       output_char = '\n';
    1256              :       break;
    1257            0 :     case 'r':
    1258            0 :       output_char = '\r';
    1259            0 :       break;
    1260            1 :     case 't':
    1261            1 :       output_char = '\t';
    1262            1 :       break;
    1263            8 :     case '\\':
    1264            8 :       output_char = '\\';
    1265            8 :       break;
    1266            9 :     case '0':
    1267            9 :       output_char = '\0';
    1268            9 :       break;
    1269           15 :     case '\'':
    1270           15 :       output_char = '\'';
    1271           15 :       break;
    1272            1 :     case '"':
    1273            1 :       output_char = '"';
    1274            1 :       break;
    1275            2 :     case 'u':
    1276            3 :       rust_error_at (get_current_location (),
    1277              :                      "cannot have a unicode escape \\u in a byte %s",
    1278              :                      opening_char == '\'' ? "character" : "string");
    1279              :       // Try to parse it anyway, just to skip it
    1280            2 :       parse_partial_unicode_escape ();
    1281            2 :       return std::make_tuple (output_char, additional_length_offset, false);
    1282            0 :     case '\r':
    1283            0 :     case '\n':
    1284              :       // string continue
    1285            0 :       return std::make_tuple (0, parse_partial_string_continue (), true);
    1286            1 :     default:
    1287            1 :       rust_error_at (get_current_location (),
    1288              :                      "unknown escape sequence %<\\%s%>",
    1289            1 :                      current_char.as_string ().c_str ());
    1290              :       // returns false if no parsing could be done
    1291              :       // return false;
    1292            1 :       return std::make_tuple (output_char, additional_length_offset, false);
    1293           58 :       break;
    1294              :     }
    1295              :   // all non-special cases (string continue) should skip their used char
    1296           58 :   skip_input ();
    1297           58 :   current_char = peek_input ();
    1298           58 :   additional_length_offset++;
    1299              : 
    1300              :   // returns true if parsing was successful
    1301              :   // return true;
    1302           58 :   return std::make_tuple (output_char, additional_length_offset, false);
    1303              : }
    1304              : 
    1305              : /* Parses an escape (or string continue) in a string or character. Supports
    1306              :  * unicode escapes. */
    1307              : std::tuple<Codepoint, int, bool>
    1308         2806 : Lexer::parse_utf8_escape ()
    1309              : {
    1310         2806 :   Codepoint output_char;
    1311         2806 :   int additional_length_offset = 0;
    1312              : 
    1313              :   // skip to actual letter
    1314         2806 :   skip_input ();
    1315         2806 :   current_char = peek_input ();
    1316         2806 :   additional_length_offset++;
    1317              : 
    1318         2806 :   switch (current_char.value)
    1319              :     {
    1320           17 :     case 'x':
    1321           17 :       {
    1322           17 :         auto hex_escape_pair = parse_partial_hex_escape ();
    1323           17 :         long hexLong = hex_escape_pair.first;
    1324           17 :         additional_length_offset += hex_escape_pair.second;
    1325              : 
    1326           17 :         if (hexLong > 127 || hexLong < 0)
    1327            4 :           rust_error_at (
    1328              :             get_current_location (),
    1329              :             "ascii \\x escape %<\\x%x%> out of range - allows up to %<\\x7F%>",
    1330              :             static_cast<unsigned int> (hexLong));
    1331              :         /* TODO: restore capital for escape output - gcc pretty-printer doesn't
    1332              :          * support %X directly */
    1333           17 :         char hexChar = static_cast<char> (hexLong);
    1334              : 
    1335           17 :         output_char = hexChar;
    1336              :       }
    1337           17 :       break;
    1338              :     case 'n':
    1339              :       output_char = '\n';
    1340              :       break;
    1341            0 :     case 'r':
    1342            0 :       output_char = '\r';
    1343            0 :       break;
    1344            2 :     case 't':
    1345            2 :       output_char = '\t';
    1346            2 :       break;
    1347            1 :     case '\\':
    1348            1 :       output_char = '\\';
    1349            1 :       break;
    1350         1406 :     case '0':
    1351         1406 :       output_char = '\0';
    1352         1406 :       break;
    1353            1 :     case '\'':
    1354            1 :       output_char = '\'';
    1355            1 :       break;
    1356            1 :     case '"':
    1357            1 :       output_char = '"';
    1358            1 :       break;
    1359           46 :     case 'u':
    1360           46 :       {
    1361           46 :         auto unicode_escape_pair = parse_partial_unicode_escape ();
    1362           46 :         output_char = unicode_escape_pair.first;
    1363           46 :         additional_length_offset += unicode_escape_pair.second;
    1364              : 
    1365           46 :         return std::make_tuple (output_char, additional_length_offset, false);
    1366              :       }
    1367           28 :       break;
    1368           28 :     case '\r':
    1369           28 :     case '\n':
    1370              :       // string continue
    1371           28 :       return std::make_tuple (0, parse_partial_string_continue (), true);
    1372            1 :     default:
    1373            1 :       rust_error_at (get_current_location (),
    1374              :                      "unknown escape sequence %<\\%s%>",
    1375            1 :                      current_char.as_string ().c_str ());
    1376              :       // returns false if no parsing could be done
    1377              :       // return false;
    1378            1 :       return std::make_tuple (output_char, additional_length_offset, false);
    1379         2731 :       break;
    1380              :     }
    1381              :   /* all non-special cases (unicode, string continue) should skip their used
    1382              :    * char */
    1383         2731 :   skip_input ();
    1384         2731 :   current_char = peek_input ();
    1385         2731 :   additional_length_offset++;
    1386              : 
    1387              :   // returns true if parsing was successful
    1388              :   // return true;
    1389         2731 :   return std::make_tuple (output_char, additional_length_offset, false);
    1390              : }
    1391              : 
    1392              : // Parses the body of a string continue that has been found in an escape.
    1393              : int
    1394           28 : Lexer::parse_partial_string_continue ()
    1395              : {
    1396           28 :   int additional_length_offset = 1;
    1397              : 
    1398              :   // string continue
    1399              :   // TODO use utf-8 codepoint to skip whitespaces
    1400          364 :   while (is_whitespace (current_char.value))
    1401              :     {
    1402          336 :       if (current_char == '\n')
    1403              :         {
    1404           28 :           current_line++;
    1405           28 :           current_column = 1;
    1406              :           // tell line_table that new line starts
    1407           28 :           start_line (current_line, max_column_hint);
    1408              : 
    1409              :           // reset "length"
    1410           28 :           additional_length_offset = 1;
    1411              : 
    1412              :           // get next char
    1413           28 :           skip_input ();
    1414           28 :           current_char = peek_input ();
    1415              : 
    1416           28 :           continue;
    1417              :         }
    1418              : 
    1419          308 :       skip_input ();
    1420          308 :       current_char = peek_input ();
    1421          308 :       additional_length_offset++;
    1422              :     }
    1423              : 
    1424           28 :   return additional_length_offset;
    1425              : }
    1426              : 
    1427              : /* Parses the body of a '\x' escape. Note that it does not check that the number
    1428              :  * is valid and smaller than 255. */
    1429              : std::pair<long, int>
    1430           34 : Lexer::parse_partial_hex_escape ()
    1431              : {
    1432              :   // hex char string (null-terminated)
    1433           34 :   char hexNum[3] = {0, 0, 0};
    1434              : 
    1435              :   // first hex char
    1436           34 :   current_char = peek_input (1);
    1437           34 :   int additional_length_offset = 1;
    1438              : 
    1439           34 :   if (!is_x_digit (current_char.value))
    1440              :     {
    1441            4 :       rust_error_at (get_current_location (),
    1442              :                      "invalid character %<\\x%s%> in \\x sequence",
    1443            4 :                      current_char.as_string ().c_str ());
    1444            4 :       return std::make_pair (0, 0);
    1445              :     }
    1446           30 :   hexNum[0] = current_char.value;
    1447              : 
    1448              :   // second hex char
    1449           30 :   skip_input ();
    1450           30 :   current_char = peek_input (1);
    1451           30 :   additional_length_offset++;
    1452              : 
    1453           30 :   if (!is_x_digit (current_char.value))
    1454              :     {
    1455            2 :       rust_error_at (get_current_location (),
    1456            2 :                      "invalid character %<\\x%c%s%> in \\x sequence", hexNum[0],
    1457            2 :                      current_char.as_string ().c_str ());
    1458            2 :       return std::make_pair (0, 1);
    1459              :     }
    1460           28 :   skip_input ();
    1461           28 :   hexNum[1] = current_char.value;
    1462              : 
    1463           28 :   long hexLong = std::strtol (hexNum, nullptr, 16);
    1464              : 
    1465           28 :   return std::make_pair (hexLong, additional_length_offset);
    1466              : }
    1467              : 
    1468              : // Parses the body of a unicode escape.
    1469              : std::pair<Codepoint, int>
    1470           48 : Lexer::parse_partial_unicode_escape ()
    1471              : {
    1472           48 :   skip_input ();
    1473           48 :   current_char = peek_input ();
    1474           48 :   int additional_length_offset = 0;
    1475              : 
    1476           48 :   if (current_char != '{')
    1477              :     {
    1478            2 :       rust_error_at (get_current_location (),
    1479              :                      "unicode escape should start with %<{%>");
    1480              :       /* Skip what should probaby have been between brackets.  */
    1481           10 :       while (is_x_digit (current_char.value) || current_char == '_')
    1482              :         {
    1483            6 :           skip_input ();
    1484            6 :           current_char = peek_input ();
    1485            6 :           additional_length_offset++;
    1486              :         }
    1487            2 :       return std::make_pair (Codepoint (0), additional_length_offset);
    1488              :     }
    1489              : 
    1490           46 :   skip_input ();
    1491           46 :   current_char = peek_input ();
    1492           46 :   additional_length_offset++;
    1493              : 
    1494           46 :   if (current_char == '_')
    1495              :     {
    1496            2 :       rust_error_at (get_current_location (),
    1497              :                      "unicode escape cannot start with %<_%>");
    1498            2 :       skip_input ();
    1499            2 :       current_char = peek_input ();
    1500            2 :       additional_length_offset++;
    1501              :       // fallthrough and try to parse the rest anyway
    1502              :     }
    1503              : 
    1504              :   // parse unicode escape - 1-6 hex digits
    1505           46 :   std::string num_str;
    1506           46 :   num_str.reserve (6);
    1507              : 
    1508              :   // loop through to add entire hex number to string
    1509          304 :   while (is_x_digit (current_char.value) || current_char.value == '_')
    1510              :     {
    1511          212 :       if (current_char == '_')
    1512              :         {
    1513              :           // don't add _ to number
    1514           24 :           skip_input ();
    1515           24 :           current_char = peek_input ();
    1516              : 
    1517           24 :           additional_length_offset++;
    1518              : 
    1519           24 :           continue;
    1520              :         }
    1521              : 
    1522          188 :       additional_length_offset++;
    1523              : 
    1524              :       // add raw hex numbers
    1525          188 :       num_str += current_char;
    1526              : 
    1527          188 :       skip_input ();
    1528          188 :       current_char = peek_input ();
    1529              :     }
    1530              : 
    1531           46 :   if (current_char == '}')
    1532              :     {
    1533           44 :       skip_input ();
    1534           44 :       current_char = peek_input ();
    1535           44 :       additional_length_offset++;
    1536              :     }
    1537              :   else
    1538              :     {
    1539              :       // actually an error, but allow propagation anyway Assume that
    1540              :       // wrong bracketm whitespace or single/double quotes are wrong
    1541              :       // termination, otherwise it is a wrong character, then skip to the actual
    1542              :       // terminator.
    1543              :       // TODO use utf-8 codepoint to skip whitespaces
    1544            2 :       if (current_char == '{' || is_whitespace (current_char.value)
    1545            4 :           || current_char == '\'' || current_char == '"')
    1546              :         {
    1547            0 :           rust_error_at (get_current_location (),
    1548              :                          "expected terminating %<}%> in unicode escape");
    1549            0 :           return std::make_pair (Codepoint (0), additional_length_offset);
    1550              :         }
    1551              :       else
    1552              :         {
    1553            2 :           rust_error_at (get_current_location (),
    1554              :                          "invalid character %qs in unicode escape",
    1555            2 :                          current_char.as_string ().c_str ());
    1556              :           // TODO use utf-8 codepoint to skip whitespaces
    1557            8 :           while (current_char != '}' && current_char != '{'
    1558            6 :                  && !is_whitespace (current_char.value) && current_char != '\''
    1559           14 :                  && current_char != '"')
    1560              :             {
    1561            6 :               skip_input ();
    1562            6 :               current_char = peek_input ();
    1563            6 :               additional_length_offset++;
    1564              :             }
    1565              :           // Consume the actual closing bracket if found
    1566            2 :           if (current_char == '}')
    1567              :             {
    1568            2 :               skip_input ();
    1569            2 :               current_char = peek_input ();
    1570            2 :               additional_length_offset++;
    1571              :             }
    1572            2 :           return std::make_pair (Codepoint (0), additional_length_offset);
    1573              :         }
    1574              :     }
    1575              : 
    1576              :   // ensure 1-6 hex characters
    1577           44 :   if (num_str.length () > 6 || num_str.length () < 1)
    1578              :     {
    1579            4 :       rust_error_at (get_current_location (),
    1580              :                      "unicode escape should be between 1 and 6 hex "
    1581              :                      "characters; it is %lu",
    1582            4 :                      (unsigned long) num_str.length ());
    1583              :       // return false;
    1584            4 :       return std::make_pair (Codepoint (0), additional_length_offset);
    1585              :     }
    1586              : 
    1587           40 :   unsigned long hex_num = std::strtoul (num_str.c_str (), nullptr, 16);
    1588              : 
    1589           40 :   if (hex_num > 0xd7ff && hex_num < 0xe000)
    1590              :     {
    1591            4 :       rust_error_at (
    1592              :         get_current_location (),
    1593              :         "unicode escape cannot be a surrogate value (D800 to DFFF)");
    1594            4 :       return std::make_pair (Codepoint (0), additional_length_offset);
    1595              :     }
    1596              : 
    1597           36 :   if (hex_num > 0x10ffff)
    1598              :     {
    1599            4 :       rust_error_at (get_current_location (),
    1600              :                      "unicode escape cannot be larger than 10FFFF");
    1601            4 :       return std::make_pair (Codepoint (0), additional_length_offset);
    1602              :     }
    1603              : 
    1604              :   // return true;
    1605           32 :   return std::make_pair (Codepoint (static_cast<uint32_t> (hex_num)),
    1606              :                          additional_length_offset);
    1607           46 : }
    1608              : 
    1609              : // Parses a byte character.
    1610              : TokenPtr
    1611           78 : Lexer::parse_byte_char (location_t loc)
    1612              : {
    1613           78 :   skip_input ();
    1614           78 :   current_column++;
    1615              :   // make current char the next character
    1616           78 :   current_char = peek_input ();
    1617              : 
    1618           78 :   int length = 1;
    1619              : 
    1620              :   // char to save
    1621           78 :   Codepoint byte_char = 0;
    1622              : 
    1623              :   // detect escapes
    1624           78 :   if (current_char == '\\')
    1625              :     {
    1626           30 :       auto escape_length_pair = parse_escape ('\'');
    1627           30 :       byte_char = std::get<0> (escape_length_pair);
    1628           30 :       length += std::get<1> (escape_length_pair);
    1629              : 
    1630           30 :       current_char = peek_input ();
    1631              : 
    1632           30 :       if (current_char != '\'')
    1633              :         {
    1634            0 :           rust_error_at (get_current_location (), "unclosed %<byte char%>");
    1635              :         }
    1636              : 
    1637           30 :       skip_input ();
    1638           30 :       current_char = peek_input ();
    1639           30 :       length++; // go to next char
    1640              :     }
    1641           48 :   else if (current_char != '\'')
    1642              :     {
    1643              :       // otherwise, get character from direct input character
    1644           48 :       byte_char = current_char;
    1645              : 
    1646           48 :       if (!byte_char.is_ascii ())
    1647              :         {
    1648            2 :           rust_error_at (get_current_location (),
    1649              :                          "non-ASCII character in %<byte char%>");
    1650              :         }
    1651              : 
    1652           48 :       skip_input ();
    1653           48 :       current_char = peek_input ();
    1654           48 :       length++;
    1655              : 
    1656           48 :       if (current_char != '\'')
    1657              :         {
    1658            0 :           rust_error_at (get_current_location (), "unclosed %<byte char%>");
    1659              :         }
    1660              : 
    1661           48 :       skip_input ();
    1662           48 :       current_char = peek_input ();
    1663           48 :       length++; // go to next char
    1664              :     }
    1665              :   else
    1666              :     {
    1667            0 :       rust_error_at (get_current_location (),
    1668              :                      "no character inside %<%> for %<byte char%>");
    1669              :     }
    1670              : 
    1671           78 :   current_column += length;
    1672              : 
    1673           78 :   loc += length - 1;
    1674           78 :   return Token::make_byte_char (loc, byte_char.value);
    1675              : }
    1676              : 
    1677              : // Parses a byte string.
    1678              : TokenPtr
    1679           64 : Lexer::parse_byte_string (location_t loc)
    1680              : {
    1681              :   // byte string
    1682              : 
    1683              :   // skip quote character
    1684           64 :   skip_input ();
    1685           64 :   current_column++;
    1686              : 
    1687           64 :   std::string str;
    1688           64 :   str.reserve (16); // some sensible default
    1689              : 
    1690           64 :   current_char = peek_input ();
    1691              : 
    1692           64 :   const location_t string_begin_locus = get_current_location ();
    1693              : 
    1694          438 :   while (current_char != '"' && !current_char.is_eof ())
    1695              :     {
    1696          310 :       if (current_char == '\\')
    1697              :         {
    1698           31 :           int length = 1;
    1699           31 :           auto escape_length_pair = parse_escape ('"');
    1700           31 :           char output_char = std::get<0> (escape_length_pair);
    1701              : 
    1702           31 :           if (output_char == 0 && std::get<2> (escape_length_pair))
    1703            0 :             length = std::get<1> (escape_length_pair) - 1;
    1704              :           else
    1705           31 :             length += std::get<1> (escape_length_pair);
    1706              : 
    1707           31 :           if (output_char != 0 || !std::get<2> (escape_length_pair))
    1708           31 :             str += output_char;
    1709              : 
    1710           31 :           current_column += length;
    1711              : 
    1712           31 :           continue;
    1713           31 :         }
    1714              : 
    1715          279 :       current_column++;
    1716          279 :       if (current_char.value == '\n')
    1717              :         {
    1718           23 :           current_line++;
    1719           23 :           current_column = 1;
    1720              :           // tell line_table that new line starts
    1721           23 :           start_line (current_line, max_column_hint);
    1722              :         }
    1723              : 
    1724          279 :       str += current_char;
    1725          279 :       skip_input ();
    1726          279 :       current_char = peek_input ();
    1727              :     }
    1728              : 
    1729           64 :   if (current_char == '"')
    1730              :     {
    1731           57 :       current_column++;
    1732              : 
    1733           57 :       skip_input ();
    1734           57 :       current_char = peek_input ();
    1735              :     }
    1736            7 :   else if (current_char.is_eof ())
    1737              :     {
    1738            7 :       rust_error_at (string_begin_locus, "unended byte string literal");
    1739            7 :       return Token::make (END_OF_FILE, get_current_location ());
    1740              :     }
    1741              :   else
    1742              :     {
    1743              :       rust_unreachable ();
    1744              :     }
    1745              : 
    1746           57 :   str.shrink_to_fit ();
    1747           57 :   loc += str.size () - 1;
    1748              : 
    1749           57 :   return Token::make_byte_string (loc, std::move (str));
    1750           64 : }
    1751              : 
    1752              : // Parses a raw byte string.
    1753              : TokenPtr
    1754           32 : Lexer::parse_raw_byte_string (location_t loc)
    1755              : {
    1756              :   // raw byte string literals
    1757           32 :   std::string str;
    1758           32 :   str.reserve (16); // some sensible default
    1759              : 
    1760           32 :   int length = 1;
    1761           32 :   int hash_count = 0;
    1762              : 
    1763           32 :   const location_t string_begin_locus = get_current_location ();
    1764              : 
    1765              :   // get hash count at beginnning
    1766           32 :   skip_input ();
    1767           32 :   current_char = peek_input ();
    1768           32 :   length++;
    1769           32 :   current_column++;
    1770           54 :   while (current_char == '#')
    1771              :     {
    1772           22 :       hash_count++;
    1773           22 :       length++;
    1774           22 :       current_column++;
    1775              : 
    1776           22 :       skip_input ();
    1777           22 :       current_char = peek_input ();
    1778              :     }
    1779              : 
    1780           32 :   if (current_char != '"')
    1781              :     {
    1782            0 :       rust_error_at (get_current_location (),
    1783              :                      "raw byte string has no opening %<\"%>");
    1784              :     }
    1785              : 
    1786           32 :   skip_input ();
    1787           32 :   current_char = peek_input ();
    1788           32 :   length++;
    1789           32 :   current_column++;
    1790              : 
    1791          330 :   while (true)
    1792              :     {
    1793          181 :       if (current_char == '"')
    1794              :         {
    1795           51 :           bool enough_hashes = true;
    1796              : 
    1797           51 :           for (int i = 0; i < hash_count; i++)
    1798              :             {
    1799           26 :               if (peek_input (i + 1) != '#')
    1800              :                 {
    1801              :                   enough_hashes = false;
    1802              :                   break;
    1803              :                 }
    1804              :             }
    1805              : 
    1806           35 :           if (enough_hashes)
    1807              :             {
    1808              :               // skip enough input and peek enough input
    1809           25 :               skip_input (hash_count);
    1810           25 :               current_char = peek_input ();
    1811           25 :               length += hash_count + 1;
    1812           25 :               current_column += hash_count + 1;
    1813           25 :               break;
    1814              :             }
    1815              :         }
    1816          146 :       else if (current_char.is_eof ())
    1817              :         {
    1818            7 :           rust_error_at (string_begin_locus, "unended raw byte string literal");
    1819            7 :           return Token::make (END_OF_FILE, get_current_location ());
    1820              :         }
    1821          139 :       else if (current_char.value > 127)
    1822              :         {
    1823            1 :           rust_error_at (get_current_location (),
    1824              :                          "character %qs in raw byte string out of range",
    1825            1 :                          current_char.as_string ().c_str ());
    1826            1 :           current_char = 0;
    1827              :         }
    1828              : 
    1829          149 :       length++;
    1830          149 :       current_column++;
    1831          149 :       if (current_char == '\n')
    1832              :         {
    1833           22 :           current_line++;
    1834           22 :           current_column = 1;
    1835           22 :           start_line (current_line, max_column_hint);
    1836              :         }
    1837              : 
    1838          149 :       str += current_char;
    1839          149 :       skip_input ();
    1840          149 :       current_char = peek_input ();
    1841          149 :     }
    1842              : 
    1843           25 :   loc += length - 1;
    1844              : 
    1845           25 :   str.shrink_to_fit ();
    1846              : 
    1847           25 :   return Token::make_byte_string (loc, std::move (str));
    1848           32 : }
    1849              : 
    1850              : // Parses a raw identifier.
    1851              : TokenPtr
    1852           81 : Lexer::parse_raw_identifier (location_t loc)
    1853              : {
    1854              :   // raw identifier
    1855           81 :   std::string str;
    1856           81 :   str.reserve (16); // default
    1857              : 
    1858           81 :   skip_input ();
    1859           81 :   current_char = peek_input ();
    1860              : 
    1861           81 :   current_column += 2;
    1862              : 
    1863           81 :   bool first_is_underscore = current_char == '_';
    1864              : 
    1865           81 :   int length = 0;
    1866           81 :   current_char = peek_input ();
    1867              :   // loop through entire name
    1868          475 :   while (is_identifier_continue (current_char.value))
    1869              :     {
    1870          313 :       length++;
    1871              : 
    1872          313 :       str += current_char;
    1873          313 :       skip_input ();
    1874          313 :       current_char = peek_input ();
    1875              :     }
    1876              : 
    1877           81 :   current_column += length;
    1878              : 
    1879           81 :   rust_debug ("raw ident: %s", str.c_str ());
    1880              : 
    1881              :   // if just a single underscore, not an identifier
    1882           81 :   if (first_is_underscore && length == 1)
    1883            1 :     rust_error_at (get_current_location (),
    1884              :                    "%<_%> is not a valid raw identifier");
    1885              : 
    1886           81 :   using namespace Rust::Values;
    1887           81 :   std::set<std::string> invalid{
    1888           81 :     Keywords::CRATE, Keywords::EXTERN_KW,  Keywords::SELF,
    1889           81 :     Keywords::SUPER, Keywords::SELF_ALIAS,
    1890          486 :   };
    1891              : 
    1892           81 :   if (invalid.find (str) != invalid.end ())
    1893              :     {
    1894            1 :       rust_error_at (get_current_location (),
    1895              :                      "%qs is a forbidden raw identifier", str.c_str ());
    1896              : 
    1897            1 :       return nullptr;
    1898              :     }
    1899              :   else
    1900              :     {
    1901           80 :       str.shrink_to_fit ();
    1902           80 :       loc += length - 1;
    1903              : 
    1904           80 :       return Token::make_identifier (loc, std::move (str));
    1905              :     }
    1906           81 : }
    1907              : 
    1908              : // skip broken string input (unterminated strings)
    1909              : void
    1910            0 : Lexer::skip_broken_string_input (Codepoint current_char)
    1911              : {
    1912            0 :   while (current_char != '"' && !current_char.is_eof ())
    1913              :     {
    1914            0 :       if (current_char == '\n')
    1915              :         {
    1916            0 :           current_line++;
    1917            0 :           current_column = 1;
    1918              :         }
    1919              :       else
    1920              :         {
    1921            0 :           current_column++;
    1922              :         }
    1923            0 :       skip_input ();
    1924            0 :       current_char = peek_input ();
    1925              :     }
    1926            0 :   if (current_char == '"')
    1927              :     {
    1928            0 :       current_column++;
    1929              : 
    1930            0 :       skip_input ();
    1931            0 :       current_char = peek_input ();
    1932              :     }
    1933            0 :   rust_debug ("skipped to %d:%d due to bad quotes", current_line,
    1934              :               current_column);
    1935            0 : }
    1936              : 
    1937              : // Parses a string.
    1938              : TokenPtr
    1939        12745 : Lexer::parse_string (location_t loc)
    1940              : {
    1941        12745 :   std::string str;
    1942        12745 :   str.reserve (16); // some sensible default
    1943              : 
    1944        12745 :   current_char = peek_input ();
    1945              : 
    1946        12745 :   const location_t string_begin_locus = get_current_location ();
    1947              : 
    1948              :   // FIXME: This fails if the input ends. How do we check for EOF?
    1949       104398 :   while (current_char.value != '"' && !current_char.is_eof ())
    1950              :     {
    1951        78908 :       if (current_char.value == '\\')
    1952              :         {
    1953         2783 :           int length = 1;
    1954              : 
    1955              :           // parse escape
    1956         2783 :           auto utf8_escape_pair = parse_utf8_escape ();
    1957         2783 :           current_char = std::get<0> (utf8_escape_pair);
    1958              : 
    1959         2783 :           if (current_char == Codepoint (0) && std::get<2> (utf8_escape_pair))
    1960           28 :             length = std::get<1> (utf8_escape_pair) - 1;
    1961              :           else
    1962         2755 :             length += std::get<1> (utf8_escape_pair);
    1963              : 
    1964         2783 :           if (current_char != Codepoint (0) || !std::get<2> (utf8_escape_pair))
    1965         5510 :             str += current_char.as_string ();
    1966              : 
    1967         2783 :           current_column += length;
    1968              : 
    1969              :           // FIXME: should remove this but can't.
    1970              :           // `parse_utf8_escape` does not update `current_char` correctly.
    1971         2783 :           current_char = peek_input ();
    1972         2783 :           continue;
    1973         2783 :         }
    1974              : 
    1975        76125 :       current_column++;
    1976        76125 :       if (current_char.value == '\n')
    1977              :         {
    1978           67 :           current_line++;
    1979           67 :           current_column = 1;
    1980              :           // tell line_table that new line starts
    1981           67 :           start_line (current_line, max_column_hint);
    1982              :         }
    1983              : 
    1984        76125 :       str += current_char;
    1985        76125 :       skip_input ();
    1986        76125 :       current_char = peek_input ();
    1987              :     }
    1988              : 
    1989        12745 :   if (current_char.value == '"')
    1990              :     {
    1991        12731 :       current_column++;
    1992              : 
    1993        12731 :       skip_input ();
    1994        12731 :       current_char = peek_input ();
    1995              :     }
    1996           14 :   else if (current_char.is_eof ())
    1997              :     {
    1998           14 :       rust_error_at (string_begin_locus, "unended string literal");
    1999           14 :       return Token::make (END_OF_FILE, get_current_location ());
    2000              :     }
    2001              :   else
    2002              :     {
    2003              :       rust_unreachable ();
    2004              :     }
    2005              : 
    2006        12731 :   str.shrink_to_fit ();
    2007              : 
    2008        12731 :   return Token::make_string (loc, std::move (str));
    2009        12745 : }
    2010              : 
    2011              : // Parses an identifier or keyword.
    2012              : TokenPtr
    2013       289604 : Lexer::parse_identifier_or_keyword (location_t loc)
    2014              : {
    2015       289604 :   std::string str;
    2016       289604 :   str.reserve (16); // default
    2017       579208 :   str += current_char.as_string ();
    2018              : 
    2019       289604 :   bool first_is_underscore = current_char == '_';
    2020              : 
    2021       289604 :   int length = 1;
    2022       289604 :   current_char = peek_input ();
    2023              : 
    2024              :   // loop through entire name
    2025      1469952 :   while (is_identifier_continue (current_char.value))
    2026              :     {
    2027       890744 :       auto s = current_char.as_string ();
    2028       890744 :       length++;
    2029              : 
    2030      1781488 :       str += current_char.as_string ();
    2031       890744 :       skip_input ();
    2032       890744 :       current_char = peek_input ();
    2033       890744 :     }
    2034              : 
    2035       289604 :   current_column += length;
    2036              : 
    2037              :   // if just a single underscore, not an identifier
    2038       289604 :   if (first_is_underscore && length == 1)
    2039         1316 :     return Token::make (UNDERSCORE, loc);
    2040              : 
    2041       288288 :   str.shrink_to_fit ();
    2042              : 
    2043       288288 :   loc += length - 1;
    2044              : 
    2045       288288 :   TokenId keyword = classify_keyword (str);
    2046       288288 :   if (keyword == IDENTIFIER)
    2047       191918 :     return Token::make_identifier (loc, std::move (str));
    2048              :   else
    2049        96370 :     return Token::make (keyword, loc);
    2050       289604 : }
    2051              : 
    2052              : // Possibly returns a raw string token if it exists - otherwise returns null.
    2053              : TokenPtr
    2054         3961 : Lexer::maybe_parse_raw_string (location_t loc)
    2055              : {
    2056         3961 :   int peek_index = 0;
    2057         3970 :   while (peek_input (peek_index) == '#')
    2058            9 :     peek_index++;
    2059              : 
    2060         3961 :   if (peek_input (peek_index) == '"')
    2061           25 :     return parse_raw_string (loc, peek_index);
    2062              :   else
    2063         3936 :     return nullptr;
    2064              : }
    2065              : 
    2066              : // Returns a raw string token.
    2067              : TokenPtr
    2068           25 : Lexer::parse_raw_string (location_t loc, int initial_hash_count)
    2069              : {
    2070              :   // raw string literals
    2071           25 :   std::string str;
    2072           25 :   str.reserve (16); // some sensible default
    2073              : 
    2074           25 :   int length = 1 + initial_hash_count;
    2075           25 :   current_column += length;
    2076              : 
    2077           25 :   const location_t string_begin_locus = get_current_location ();
    2078              : 
    2079           25 :   if (initial_hash_count > 0)
    2080            7 :     skip_input (initial_hash_count - 1);
    2081              : 
    2082           25 :   current_char = peek_input ();
    2083              : 
    2084           25 :   if (current_char != '"')
    2085            0 :     rust_error_at (get_current_location (), "raw string has no opening %<\"%>");
    2086              : 
    2087           25 :   length++;
    2088           25 :   current_column++;
    2089           25 :   skip_input ();
    2090           25 :   current_char = peek_input ();
    2091              : 
    2092          181 :   while (true)
    2093              :     {
    2094          103 :       if (current_char.value == '"')
    2095              :         {
    2096           38 :           bool enough_hashes = true;
    2097              : 
    2098           38 :           for (int i = 0; i < initial_hash_count; i++)
    2099              :             {
    2100           13 :               if (peek_input (i + 1) != '#')
    2101              :                 {
    2102              :                   enough_hashes = false;
    2103              :                   break;
    2104              :                 }
    2105              :             }
    2106              : 
    2107           28 :           if (enough_hashes)
    2108              :             {
    2109              :               // skip enough input and peek enough input
    2110           25 :               skip_input (initial_hash_count);
    2111           25 :               current_char = peek_input ();
    2112           25 :               length += initial_hash_count + 1;
    2113           25 :               current_column += initial_hash_count + 1;
    2114           25 :               break;
    2115              :             }
    2116              :         }
    2117           75 :       else if (current_char.is_eof ())
    2118              :         {
    2119            0 :           rust_error_at (string_begin_locus, "unended raw string literal");
    2120            0 :           return Token::make (END_OF_FILE, get_current_location ());
    2121              :         }
    2122              : 
    2123           78 :       length++;
    2124           78 :       current_column++;
    2125           78 :       if (current_char == '\n')
    2126              :         {
    2127            1 :           current_line++;
    2128            1 :           current_column = 1;
    2129            1 :           start_line (current_line, max_column_hint);
    2130              :         }
    2131              : 
    2132          156 :       str += current_char.as_string ();
    2133           78 :       skip_input ();
    2134           78 :       current_char = peek_input ();
    2135           78 :     }
    2136              : 
    2137           25 :   loc += length - 1;
    2138              : 
    2139           25 :   str.shrink_to_fit ();
    2140              : 
    2141           25 :   return Token::make_raw_string (loc, std::move (str));
    2142           25 : }
    2143              : 
    2144              : template <typename IsDigitFunc>
    2145              : TokenPtr
    2146          335 : Lexer::parse_non_decimal_int_literal (location_t loc, IsDigitFunc is_digit_func,
    2147              :                                       IntegerLiteralBase base)
    2148              : {
    2149          335 :   std::string raw_str = "0";
    2150          335 :   raw_str += current_char; // x, o, b
    2151          335 :   skip_input ();
    2152              : 
    2153          335 :   int length = 2;
    2154          335 :   bool has_valid_digit = false;
    2155              : 
    2156          335 :   current_char = peek_input ();
    2157              : 
    2158              :   // loop through to add entire number to string
    2159         2379 :   while (true)
    2160              :     {
    2161         2714 :       if (is_digit_func (current_char.value))
    2162              :         {
    2163              :           has_valid_digit = true;
    2164              :         }
    2165          389 :       else if (current_char != '_')
    2166              :         {
    2167              :           break;
    2168              :         }
    2169         2379 :       length++;
    2170              : 
    2171         2379 :       raw_str += current_char;
    2172         2379 :       skip_input ();
    2173         2379 :       current_char = peek_input ();
    2174              :     }
    2175              : 
    2176          335 :   int suffix_start = raw_str.length ();
    2177              : 
    2178              :   // parse in suffix if it exists
    2179          335 :   auto suffix_pair = parse_in_suffix ();
    2180          335 :   PrimitiveCoreType type_hint = CORETYPE_UNKNOWN;
    2181          335 :   raw_str += suffix_pair.first;
    2182          335 :   length += suffix_pair.second;
    2183              : 
    2184          335 :   current_column += length;
    2185              : 
    2186          335 :   if (!has_valid_digit)
    2187              :     {
    2188            5 :       rust_error_at (loc, ErrorCode::E0768, "no valid digits found for number");
    2189              :     }
    2190              : 
    2191          335 :   loc += length - 1;
    2192              : 
    2193          335 :   return Token::make_int (loc, std::move (raw_str), suffix_start, base,
    2194          335 :                           type_hint);
    2195          335 : }
    2196              : 
    2197              : // Parses a hex, binary or octal int literal.
    2198              : TokenPtr
    2199          335 : Lexer::parse_non_decimal_int_literals (location_t loc)
    2200              : {
    2201          335 :   current_char = peek_input ();
    2202              : 
    2203          335 :   if (current_char == 'x')
    2204              :     {
    2205              :       // hex (integer only)
    2206          296 :       return parse_non_decimal_int_literal (loc, is_x_digit,
    2207          296 :                                             IntegerLiteralBase::Hex);
    2208              :     }
    2209           39 :   else if (current_char == 'o')
    2210              :     {
    2211              :       // octal (integer only)
    2212           19 :       return parse_non_decimal_int_literal (loc, is_octal_digit,
    2213           19 :                                             IntegerLiteralBase::Octal);
    2214              :     }
    2215           20 :   else if (current_char == 'b')
    2216              :     {
    2217              :       // binary (integer only)
    2218           20 :       return parse_non_decimal_int_literal (loc, is_bin_digit,
    2219           20 :                                             IntegerLiteralBase::Binary);
    2220              :     }
    2221              :   else
    2222              :     {
    2223            0 :       return nullptr;
    2224              :     }
    2225              : }
    2226              : 
    2227              : // Parses a decimal-based int literal or float literal.
    2228              : TokenPtr
    2229        17117 : Lexer::parse_decimal_int_or_float (location_t loc)
    2230              : {
    2231        17117 :   std::string str;
    2232        17117 :   str.reserve (16); // some sensible default
    2233        17117 :   str += current_char;
    2234              : 
    2235        17117 :   int length = 1;
    2236        17117 :   bool first_zero = current_char == '0';
    2237              : 
    2238        17117 :   current_char = peek_input ();
    2239              : 
    2240              :   // parse initial decimal integer (or first integer part of float) literal
    2241        17117 :   auto initial_decimal = parse_in_decimal ();
    2242        17117 :   str += std::get<0> (initial_decimal);
    2243        17117 :   length += std::get<1> (initial_decimal);
    2244              : 
    2245              :   // detect float literal
    2246              :   //
    2247              :   // Note:
    2248              :   //
    2249              :   // We should not use is_float_digit () for this verification but instead
    2250              :   // directly ISDIGIT because rust does not support non digit values right after
    2251              :   // a dot.
    2252              :   // The following value is not legal in rust:
    2253              :   // let a = 3.e1;
    2254              :   // A `0` should be put between the dot and the exponent to be valid
    2255              :   // (eg. 3.0e1).
    2256        17117 :   if (current_char == '.' && ISDIGIT (peek_input (1).value))
    2257              :     {
    2258              :       // float with a '.', parse another decimal into it
    2259              : 
    2260              :       // add . to str
    2261          349 :       str += current_char;
    2262          349 :       skip_input ();
    2263          349 :       current_char = peek_input ();
    2264          349 :       length++;
    2265              : 
    2266              :       // parse another decimal number for float
    2267          349 :       auto second_decimal = parse_in_decimal ();
    2268          349 :       str += std::get<0> (second_decimal);
    2269          349 :       length += std::get<1> (second_decimal);
    2270              : 
    2271              :       // parse in exponent part if it exists
    2272          349 :       auto exponent_pair = parse_in_exponent_part ();
    2273          349 :       str += exponent_pair.first;
    2274          349 :       length += exponent_pair.second;
    2275              : 
    2276          349 :       int suffix_start = str.length ();
    2277              : 
    2278              :       // parse in type suffix if it exists
    2279          349 :       auto suffix_pair = parse_in_suffix ();
    2280          349 :       PrimitiveCoreType type_hint = CORETYPE_UNKNOWN;
    2281          349 :       str += suffix_pair.first;
    2282          349 :       length += suffix_pair.second;
    2283              : 
    2284          349 :       current_column += length;
    2285              : 
    2286          349 :       loc += length - 1;
    2287              : 
    2288          349 :       str.shrink_to_fit ();
    2289          349 :       return Token::make_float (loc, std::move (str), suffix_start, type_hint);
    2290          349 :     }
    2291        16768 :   else if (current_char == '.'
    2292        16768 :            && check_valid_float_dot_end (peek_input (1).value))
    2293              :     {
    2294              :       // float that is just an integer with a terminating '.' character
    2295              : 
    2296              :       // add . to str
    2297            8 :       str += current_char;
    2298            8 :       skip_input ();
    2299            8 :       current_char = peek_input ();
    2300            8 :       length++;
    2301              : 
    2302              :       // type hint not allowed
    2303              : 
    2304            8 :       current_column += length;
    2305              : 
    2306            8 :       loc += length - 1;
    2307              : 
    2308            8 :       str.shrink_to_fit ();
    2309           16 :       return Token::make_float (loc, std::move (str), str.length (),
    2310            8 :                                 CORETYPE_UNKNOWN);
    2311              :     }
    2312        16760 :   else if (current_char == 'E' || current_char == 'e')
    2313              :     {
    2314              :       // exponent float with no '.' character
    2315              : 
    2316              :       // parse exponent part
    2317            0 :       auto exponent_pair = parse_in_exponent_part ();
    2318            0 :       str += exponent_pair.first;
    2319            0 :       length += exponent_pair.second;
    2320              : 
    2321            0 :       int suffix_start = str.length ();
    2322              : 
    2323              :       // parse in type suffix if it exists
    2324            0 :       auto suffix_pair = parse_in_suffix ();
    2325            0 :       PrimitiveCoreType type_hint = CORETYPE_UNKNOWN;
    2326            0 :       str += suffix_pair.first;
    2327            0 :       length += suffix_pair.second;
    2328              : 
    2329            0 :       current_column += length;
    2330              : 
    2331            0 :       loc += length - 1;
    2332              : 
    2333            0 :       str.shrink_to_fit ();
    2334            0 :       return Token::make_float (loc, std::move (str), suffix_start, type_hint);
    2335            0 :     }
    2336              :   else
    2337              :     {
    2338              :       // is an integer
    2339              : 
    2340        16760 :       int suffix_start = str.length ();
    2341              : 
    2342              :       // parse in type suffix if it exists
    2343        16760 :       auto suffix_pair = parse_in_suffix ();
    2344        16760 :       str += suffix_pair.first;
    2345              : 
    2346        16760 :       PrimitiveCoreType type_hint = CORETYPE_UNKNOWN;
    2347              : 
    2348              :       /* A "real" pure decimal doesn't have a suffix and no zero prefix.  */
    2349        16760 :       bool pure_decimal = std::get<2> (initial_decimal);
    2350        16750 :       if (pure_decimal && (!first_zero || suffix_start == 1)
    2351        33508 :           && suffix_pair.first.empty ())
    2352              :         type_hint = CORETYPE_PURE_DECIMAL;
    2353              : 
    2354        16760 :       length += suffix_pair.second;
    2355              : 
    2356        16760 :       current_column += length;
    2357              : 
    2358        16760 :       loc += length - 1;
    2359              : 
    2360        16760 :       str.shrink_to_fit ();
    2361        16760 :       return Token::make_int (loc, std::move (str), suffix_start,
    2362        16760 :                               IntegerLiteralBase::Decimal, type_hint);
    2363        16760 :     }
    2364        17117 : }
    2365              : 
    2366              : TokenPtr
    2367          848 : Lexer::parse_char_or_lifetime (location_t loc)
    2368              : {
    2369          848 :   int length = 1;
    2370              : 
    2371          848 :   current_char = peek_input ();
    2372          848 :   if (current_char.is_eof ())
    2373            0 :     return nullptr;
    2374              : 
    2375              :   // parse escaped char literal
    2376          848 :   if (current_char.value == '\\')
    2377              :     {
    2378              :       // parse escape
    2379           23 :       auto utf8_escape_pair = parse_utf8_escape ();
    2380           23 :       Codepoint escaped_char = std::get<0> (utf8_escape_pair);
    2381           23 :       length += std::get<1> (utf8_escape_pair);
    2382              : 
    2383           23 :       if (peek_input ().value != '\'')
    2384              :         {
    2385            0 :           rust_error_at (get_current_location (), "unended character literal");
    2386              :         }
    2387              :       else
    2388              :         {
    2389           23 :           skip_input ();
    2390           23 :           current_char = peek_input ();
    2391           23 :           length++;
    2392              :         }
    2393              : 
    2394           23 :       current_column += length;
    2395              : 
    2396           23 :       loc += length - 1;
    2397              : 
    2398           23 :       return Token::make_char (loc, escaped_char);
    2399              :     }
    2400              :   else
    2401              :     {
    2402          825 :       skip_input ();
    2403              : 
    2404          825 :       if (peek_input ().value == '\'')
    2405              :         {
    2406              :           // parse non-escaped char literal
    2407          203 :           Codepoint non_escaped_char = current_char;
    2408              : 
    2409              :           // skip the ' character
    2410          203 :           skip_input ();
    2411          203 :           current_char = peek_input ();
    2412              : 
    2413              :           // TODO fix due to different widths of utf-8 chars?
    2414          203 :           current_column += 3;
    2415              : 
    2416          203 :           loc += 2;
    2417              : 
    2418          203 :           return Token::make_char (loc, non_escaped_char);
    2419              :         }
    2420          622 :       else if (is_identifier_start (current_char.value))
    2421              :         {
    2422              :           // parse lifetime name
    2423          622 :           std::string str;
    2424         1244 :           str += current_char.as_string ();
    2425          622 :           length++;
    2426              : 
    2427          622 :           current_char = peek_input ();
    2428         1979 :           while (is_identifier_continue (current_char.value))
    2429              :             {
    2430         1470 :               str += current_char.as_string ();
    2431          735 :               skip_input ();
    2432          735 :               current_char = peek_input ();
    2433          735 :               length++;
    2434              :             }
    2435              : 
    2436          622 :           current_column += length;
    2437              : 
    2438          622 :           loc += length - 1;
    2439              : 
    2440              :           // TODO some keywords cannot be used for a lifetime label #2306
    2441              :           // https://doc.rust-lang.org/reference/tokens.html
    2442              : 
    2443          622 :           str.shrink_to_fit ();
    2444          622 :           return Token::make_lifetime (loc, std::move (str));
    2445          622 :         }
    2446              :       else
    2447              :         {
    2448            0 :           rust_error_at (
    2449              :             get_current_location (),
    2450              :             "expected %' after character constant in character literal");
    2451            0 :           return nullptr;
    2452              :         }
    2453              :     }
    2454              : }
    2455              : 
    2456              : void
    2457          100 : Lexer::split_current_token (TokenId new_left, TokenId new_right)
    2458              : {
    2459              :   /* TODO: assert that this TokenId is a "simple token" like punctuation and not
    2460              :    * like "IDENTIFIER"? */
    2461          100 :   location_t current_loc = peek_token ()->get_locus ();
    2462          100 :   TokenPtr new_left_tok = Token::make (new_left, current_loc);
    2463          100 :   TokenPtr new_right_tok = Token::make (new_right, current_loc + 1);
    2464              : 
    2465          100 :   token_queue.replace_current_value (std::move (new_left_tok));
    2466          100 :   token_queue.insert (1, std::move (new_right_tok));
    2467          100 : }
    2468              : 
    2469              : void
    2470            2 : Lexer::split_current_token (std::vector<TokenPtr> new_tokens)
    2471              : {
    2472            2 :   rust_assert (new_tokens.size () > 0);
    2473            4 :   token_queue.replace_current_value (new_tokens[0]);
    2474              : 
    2475            5 :   for (size_t i = 1; i < new_tokens.size (); i++)
    2476              :     {
    2477            6 :       token_queue.insert (i, new_tokens[i]);
    2478              :     }
    2479            2 : }
    2480              : 
    2481              : void
    2482       174950 : Lexer::start_line (int current_line, int current_column)
    2483              : {
    2484       174950 :   if (line_map)
    2485       174950 :     linemap_line_start (line_table, current_line, current_column);
    2486       174950 : }
    2487              : 
    2488              : } // namespace Rust
    2489              : 
    2490              : #if CHECKING_P
    2491              : 
    2492              : namespace selftest {
    2493              : 
    2494              : // Checks if `src` has the same contents as the given characters
    2495              : static void
    2496            6 : assert_source_content (Rust::InputSource &src,
    2497              :                        const std::vector<uint32_t> &expected)
    2498              : {
    2499            6 :   Rust::Codepoint src_char = src.next ();
    2500           41 :   for (auto expected_char : expected)
    2501              :     {
    2502              :       // Make sure that `src` is not shorter than `expected`
    2503           35 :       ASSERT_FALSE (src_char.is_eof ());
    2504              :       // Checks skipped character is expeceted one.
    2505           35 :       ASSERT_EQ (src_char.value, expected_char);
    2506           35 :       src_char = src.next ();
    2507              :     }
    2508              :   // Checks if `src` and `chars` has the same length.
    2509            6 :   ASSERT_TRUE (src_char.is_eof ());
    2510            6 : }
    2511              : 
    2512              : static void
    2513            4 : test_buffer_input_source (std::string str,
    2514              :                           const std::vector<uint32_t> &expected)
    2515              : {
    2516            4 :   Rust::BufferInputSource source (str, 0);
    2517            4 :   assert_source_content (source, expected);
    2518            4 : }
    2519              : 
    2520              : static void
    2521            2 : test_file_input_source (std::string str, const std::vector<uint32_t> &expected)
    2522              : {
    2523            2 :   FILE *tmpf = tmpfile ();
    2524              :   // Moves to the first character
    2525            2 :   fputs (str.c_str (), tmpf);
    2526            2 :   std::rewind (tmpf);
    2527            2 :   Rust::FileInputSource source (tmpf);
    2528            2 :   assert_source_content (source, expected);
    2529            2 : }
    2530              : 
    2531              : void
    2532            1 : rust_input_source_test ()
    2533              : {
    2534              :   // ASCII
    2535            1 :   std::string src = (const char *) u8"_abcde\tXYZ\v\f";
    2536            1 :   std::vector<uint32_t> expected = {u'_',  u'a', u'b', u'c', u'd',  u'e',
    2537            1 :                                     u'\t', u'X', u'Y', u'Z', u'\v', u'\f'};
    2538            2 :   test_buffer_input_source (src, expected);
    2539              : 
    2540              :   // BOM
    2541            1 :   src = (const char *) u8"\xef\xbb\xbfOK";
    2542            1 :   expected = {u'O', u'K'};
    2543            2 :   test_buffer_input_source (src, expected);
    2544              : 
    2545              :   // Russian
    2546            1 :   src = (const char *) u8"приве́т";
    2547            1 :   expected = {u'п',
    2548              :               u'р',
    2549              :               u'и',
    2550              :               u'в',
    2551              :               0x0435 /* CYRILLIC SMALL LETTER IE е */,
    2552              :               0x301 /* COMBINING ACUTE ACCENT ́ */,
    2553            1 :               u'т'};
    2554            2 :   test_buffer_input_source (src, expected);
    2555              : 
    2556            1 :   src = (const char *) u8"❤️🦀";
    2557            1 :   expected = {0x2764 /* HEAVY BLACK HEART */,
    2558            1 :               0xfe0f /* VARIATION SELECTOR-16 */, U'🦀'};
    2559            2 :   test_buffer_input_source (src, expected);
    2560              : 
    2561            1 :   src = (const char *) u8"こんにちは";
    2562            1 :   expected = {u'こ', u'ん', u'に', u'ち', u'は'};
    2563            2 :   test_file_input_source (src, expected);
    2564              : 
    2565            1 :   src = (const char *) u8"👮‍♂👩‍⚕";
    2566            1 :   expected
    2567              :     = {0x1f46e /* POLICE OFFICER */,   0x200d /* ZERO WIDTH JOINER */,
    2568              :        0x2642 /* MALE SIGN */,         0x1f469 /* WOMAN */,
    2569            1 :        0x200d /* ZERO WIDTH JOINER */, 0x2695 /* STAFF OF AESCULAPIUS */};
    2570            2 :   test_file_input_source (src, expected);
    2571            1 : }
    2572              : 
    2573              : } // namespace selftest
    2574              : 
    2575              : #endif // CHECKING_P
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.