LCOV - code coverage report
Current view: top level - gcc/rust/lex - rust-lex.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 89.5 % 1368 1225
Test Date: 2026-02-28 14:20:25 Functions: 92.5 % 53 49
Legend: Lines:     hit not hit

            Line data    Source code
       1              : // Copyright (C) 2020-2026 Free Software Foundation, Inc.
       2              : 
       3              : // This file is part of GCC.
       4              : 
       5              : // GCC is free software; you can redistribute it and/or modify it under
       6              : // the terms of the GNU General Public License as published by the Free
       7              : // Software Foundation; either version 3, or (at your option) any later
       8              : // version.
       9              : 
      10              : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      11              : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
      12              : // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      13              : // for more details.
      14              : 
      15              : // You should have received a copy of the GNU General Public License
      16              : // along with GCC; see the file COPYING3.  If not see
      17              : // <http://www.gnu.org/licenses/>.
      18              : 
      19              : #include "rust-codepoint.h"
      20              : #include "rust-system.h"
      21              : #include "rust-lex.h"
      22              : #include "rust-diagnostics.h"
      23              : #include "rust-linemap.h"
      24              : #include "rust-edition.h"
      25              : #include "safe-ctype.h"
      26              : #include "cpplib.h"
      27              : #include "rust-keyword-values.h"
      28              : 
      29              : namespace Rust {
      30              : // TODO: move to separate compilation unit?
      31              : // overload += for uint32_t to allow 32-bit encoded utf-8 to be added
      32              : std::string &
      33      3176200 : operator+= (std::string &str, Codepoint char32)
      34              : {
      35      3176200 :   if (char32.value < 0x80)
      36              :     {
      37      3175187 :       str += static_cast<char> (char32.value);
      38              :     }
      39         1013 :   else if (char32.value < (0x1F + 1) << (1 * 6))
      40              :     {
      41          674 :       str += static_cast<char> (0xC0 | ((char32.value >> 6) & 0x1F));
      42          674 :       str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F));
      43              :     }
      44          339 :   else if (char32.value < (0x0F + 1) << (2 * 6))
      45              :     {
      46          329 :       str += static_cast<char> (0xE0 | ((char32.value >> 12) & 0x0F));
      47          329 :       str += static_cast<char> (0x80 | ((char32.value >> 6) & 0x3F));
      48          329 :       str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F));
      49              :     }
      50           10 :   else if (char32.value < (0x07 + 1) << (3 * 6))
      51              :     {
      52            6 :       str += static_cast<char> (0xF0 | ((char32.value >> 18) & 0x07));
      53            6 :       str += static_cast<char> (0x80 | ((char32.value >> 12) & 0x3F));
      54            6 :       str += static_cast<char> (0x80 | ((char32.value >> 6) & 0x3F));
      55            6 :       str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F));
      56              :     }
      57              :   else
      58              :     {
      59            4 :       rust_debug ("Invalid unicode codepoint found: '%u' ", char32.value);
      60              :     }
      61      3176200 :   return str;
      62              : }
      63              : 
      64              : std::string
      65      2894707 : Codepoint::as_string ()
      66              : {
      67      2894707 :   std::string str;
      68              : 
      69              :   // str += Codepoint (value);
      70      2894707 :   str += *this;
      71              : 
      72      2894707 :   return str;
      73              : }
      74              : 
      75              : /* Includes all allowable float digits EXCEPT _ and . as that needs lookahead
      76              :  * for handling. */
      77              : bool
      78            0 : is_float_digit (uint32_t number)
      79              : {
      80            0 :   return ISDIGIT (number) || number == 'E' || number == 'e';
      81              : }
      82              : 
      83              : /* Basically ISXDIGIT from safe-ctype but may change if Rust's encoding or
      84              :  * whatever is different */
      85              : bool
      86         1953 : is_x_digit (uint32_t number)
      87              : {
      88         1953 :   return ISXDIGIT (number);
      89              : }
      90              : 
      91              : bool
      92           53 : is_octal_digit (uint32_t number)
      93              : {
      94           53 :   return number >= '0' && number <= '7';
      95              : }
      96              : 
      97              : bool
      98          193 : is_bin_digit (uint32_t number)
      99              : {
     100          193 :   return number == '0' || number == '1';
     101              : }
     102              : 
     103              : bool
     104          142 : check_valid_float_dot_end (uint32_t character)
     105              : {
     106          142 :   return character != '.' && character != '_' && !ISALPHA (character);
     107              : }
     108              : 
     109              : bool
     110         3484 : is_whitespace (uint32_t character)
     111              : {
     112              :   // https://doc.rust-lang.org/reference/whitespace.html
     113         3484 :   switch (character)
     114              :     {
     115              :     case '\t':
     116              :     case '\n':
     117              :     case '\v':
     118              :     case '\f':
     119              :     case '\r':
     120              :     case ' ':
     121              :     case 0x0085: // next line
     122              :     case 0x200e: // left-to-right mark
     123              :     case 0x200f: // right-to-left mark
     124              :     case 0x2028: // line separator
     125              :     case 0x2029: // paragraph separator
     126              :       return true;
     127         3141 :     default:
     128         3141 :       return false;
     129              :     }
     130              : }
     131              : 
     132              : bool
     133         3527 : is_non_decimal_int_literal_separator (uint32_t character)
     134              : {
     135         3527 :   return character == 'x' || character == 'o' || character == 'b';
     136              : }
     137              : 
     138              : bool
     139       312001 : is_identifier_start (uint32_t codepoint)
     140              : {
     141       312001 :   return (cpp_check_xid_property (codepoint) & CPP_XID_START)
     142       312001 :          || codepoint == '_';
     143              : }
     144              : 
     145              : bool
     146      1147267 : is_identifier_continue (uint32_t codepoint)
     147              : {
     148      1147267 :   return cpp_check_xid_property (codepoint) & CPP_XID_CONTINUE;
     149              : }
     150              : 
     151          103 : Lexer::Lexer (const std::string &input, Linemap *linemap)
     152          103 :   : input (RAIIFile::create_error ()), current_line (1), current_column (1),
     153          103 :     line_map (linemap), dump_lex_out ({}),
     154          103 :     raw_input_source (new BufferInputSource (input, 0)),
     155          103 :     input_queue{*raw_input_source}, token_queue (TokenSource (this))
     156          103 : {}
     157              : 
     158         4690 : Lexer::Lexer (const char *filename, RAIIFile file_input, Linemap *linemap,
     159         4690 :               tl::optional<std::ofstream &> dump_lex_opt)
     160         4690 :   : input (std::move (file_input)), current_line (1), current_column (1),
     161         4690 :     line_map (linemap), dump_lex_out (dump_lex_opt),
     162         9380 :     raw_input_source (new FileInputSource (input.get_raw ())),
     163         9380 :     input_queue{*raw_input_source}, token_queue (TokenSource (this))
     164              : {
     165              :   // inform line_table that file is being entered and is in line 1
     166         4690 :   if (linemap)
     167         4690 :     line_map->start_file (filename, current_line);
     168         4690 : }
     169              : 
     170         4791 : Lexer::~Lexer ()
     171              : {
     172              :   /* ok apparently stop (which is equivalent of original code in destructor) is
     173              :    * meant to be called after all files have finished parsing, for cleanup. On
     174              :    * the other hand, actual code that it calls to leave a certain line map is
     175              :    * mentioned in GCC docs as being useful for "just leaving an included header"
     176              :    * and stuff like that, so this line mapping functionality may need fixing.
     177              :    * FIXME: find out whether this occurs. */
     178              : 
     179              :   // line_map->stop();
     180         4791 : }
     181              : 
     182              : bool
     183         4635 : Lexer::input_source_is_valid_utf8 ()
     184              : {
     185         4635 :   return raw_input_source->is_valid ();
     186              : }
     187              : 
     188              : location_t
     189      1839756 : Lexer::get_current_location ()
     190              : {
     191      1839756 :   if (line_map)
     192      1839587 :     return linemap_position_for_column (line_table, current_column);
     193              :   else
     194              :     // If we have no linemap, we're lexing something without proper locations
     195              :     return UNDEF_LOCATION;
     196              : }
     197              : 
     198              : Codepoint
     199      4137209 : Lexer::peek_input (int n)
     200              : {
     201      4137209 :   return input_queue.peek (n);
     202              : }
     203              : 
     204              : Codepoint
     205      4039740 : Lexer::peek_input ()
     206              : {
     207      4039740 :   return peek_input (0);
     208              : }
     209              : 
     210              : void
     211      3478657 : Lexer::skip_input (int n)
     212              : {
     213      3478657 :   input_queue.skip (n);
     214      3478657 : }
     215              : 
     216              : void
     217      3468373 : Lexer::skip_input ()
     218              : {
     219      3468373 :   skip_input (0);
     220      3468373 : }
     221              : 
     222              : void
     223       725633 : Lexer::skip_token (int n)
     224              : {
     225              :   // dump tokens if dump-lex option is enabled
     226       725633 :   if (dump_lex_out.has_value ())
     227            0 :     dump_and_skip (n);
     228              :   else
     229       725633 :     token_queue.skip (n);
     230       725633 : }
     231              : 
     232              : void
     233            0 : Lexer::dump_and_skip (int n)
     234              : {
     235            0 :   std::ofstream &out = dump_lex_out.value ();
     236            0 :   bool found_eof = false;
     237            0 :   const_TokenPtr tok;
     238            0 :   for (int i = 0; i < n + 1; i++)
     239              :     {
     240            0 :       if (!found_eof)
     241              :         {
     242            0 :           tok = peek_token ();
     243            0 :           found_eof |= tok->get_id () == Rust::END_OF_FILE;
     244              : 
     245            0 :           location_t loc = tok->get_locus ();
     246              : 
     247            0 :           out << "<id=";
     248            0 :           out << tok->token_id_to_str ();
     249            0 :           out << (tok->should_have_str ()
     250            0 :                     ? (std::string (", text=") + tok->get_str ()
     251            0 :                        + std::string (", typehint=")
     252            0 :                        + std::string (tok->get_type_hint_str ()))
     253            0 :                     : "")
     254            0 :               << " ";
     255            0 :           out << Linemap::location_to_string (loc) << '\n';
     256              :         }
     257              : 
     258            0 :       token_queue.skip (0);
     259              :     }
     260            0 : }
     261              : 
     262              : void
     263            0 : Lexer::replace_current_token (TokenPtr replacement)
     264              : {
     265            0 :   token_queue.replace_current_value (replacement);
     266              : 
     267            0 :   rust_debug ("called 'replace_current_token' - this is deprecated");
     268            0 : }
     269              : 
     270              : /* Determines whether the string passed in is a keyword or not. If it is, it
     271              :  * returns the keyword name.  */
     272              : TokenId
     273       281070 : Lexer::classify_keyword (const std::string &str)
     274              : {
     275       281070 :   auto &keywords = Rust::Values::Keywords::keywords_tokens;
     276       281070 :   auto keyword = keywords.find (str);
     277              : 
     278       281070 :   if (keyword == keywords.end ())
     279              :     return IDENTIFIER;
     280              : 
     281        94047 :   auto id = keyword->second;
     282              : 
     283              :   // We now have the expected token ID of the reserved keyword. However, some
     284              :   // keywords are reserved starting in certain editions. For example, `try` is
     285              :   // only a reserved keyword in editions >=2018. The language might gain new
     286              :   // reserved keywords in the future.
     287              :   //
     288              :   // https://doc.rust-lang.org/reference/keywords.html#reserved-keywords
     289              : 
     290              :   // `try` is not a reserved keyword before 2018
     291        94047 :   if (get_rust_edition () == Edition::E2015 && id == TRY)
     292              :     return IDENTIFIER;
     293              : 
     294              :   return id;
     295              : }
     296              : 
     297              : TokenPtr
     298       733690 : Lexer::build_token ()
     299              : {
     300              :   // loop to go through multiple characters to build a single token
     301      1827146 :   while (true)
     302              :     {
     303      1827146 :       location_t loc = get_current_location ();
     304              : 
     305      1827146 :       current_char = peek_input ();
     306      1827146 :       skip_input ();
     307              : 
     308              :       // detect shebang
     309              :       // Must be the first thing on the first line, starting with #!
     310              :       // But since an attribute can also start with an #! we don't count it as a
     311              :       // shebang line when after any whitespace or comments there is a [. If it
     312              :       // is a shebang line we simple drop the line. Otherwise we don't consume
     313              :       // any characters and fall through to the real tokenizer.
     314        31175 :       if (current_line == 1 && current_column == 1 && current_char == '#'
     315      1858321 :           && peek_input () == '!')
     316              :         {
     317              :           int n = 1;
     318         3112 :           while (true)
     319              :             {
     320         3112 :               Codepoint next_char = peek_input (n);
     321         3112 :               if (is_whitespace (next_char.value))
     322            7 :                 n++;
     323         3105 :               else if ((next_char == '/' && peek_input (n + 1) == '/'
     324            7 :                         && peek_input (n + 2) != '!'
     325            7 :                         && peek_input (n + 2) != '/')
     326         3126 :                        || (next_char == '/' && peek_input (n + 1) == '/'
     327            0 :                            && peek_input (n + 2) == '/'
     328            0 :                            && peek_input (n + 3) == '/'))
     329              :                 {
     330              :                   // two // or four ////
     331              :                   // A single line comment
     332              :                   // (but not an inner or outer doc comment)
     333            7 :                   n += 2;
     334            7 :                   next_char = peek_input (n);
     335          119 :                   while (next_char != '\n' && !next_char.is_eof ())
     336              :                     {
     337          112 :                       n++;
     338          112 :                       next_char = peek_input (n);
     339              :                     }
     340            7 :                   if (next_char == '\n')
     341            7 :                     n++;
     342              :                 }
     343         3098 :               else if (next_char == '/' && peek_input (n + 1) == '*'
     344            0 :                        && peek_input (n + 2) == '*'
     345         3098 :                        && peek_input (n + 3) == '/')
     346              :                 {
     347              :                   /**/
     348            0 :                   n += 4;
     349              :                 }
     350         3098 :               else if (next_char == '/' && peek_input (n + 1) == '*'
     351            0 :                        && peek_input (n + 2) == '*' && peek_input (n + 3) == '*'
     352         3098 :                        && peek_input (n + 4) == '/')
     353              :                 {
     354              :                   /***/
     355            0 :                   n += 5;
     356              :                 }
     357         3098 :               else if ((next_char == '/' && peek_input (n + 1) == '*'
     358            0 :                         && peek_input (n + 2) != '*'
     359            0 :                         && peek_input (n + 2) != '!')
     360         3119 :                        || (next_char == '/' && peek_input (n + 1) == '*'
     361            0 :                            && peek_input (n + 2) == '*'
     362            0 :                            && peek_input (n + 3) == '*'))
     363              :                 {
     364              :                   // one /* or three /***
     365              :                   // Start of a block comment
     366              :                   // (but not an inner or outer doc comment)
     367            0 :                   n += 2;
     368            0 :                   int level = 1;
     369            0 :                   while (level > 0)
     370              :                     {
     371            0 :                       if (peek_input (n).is_eof ())
     372              :                         break;
     373            0 :                       else if (peek_input (n) == '/'
     374            0 :                                && peek_input (n + 1) == '*')
     375              :                         {
     376            0 :                           n += 2;
     377            0 :                           level += 1;
     378              :                         }
     379            0 :                       else if (peek_input (n) == '*'
     380            0 :                                && peek_input (n + 1) == '/')
     381              :                         {
     382            0 :                           n += 2;
     383            0 :                           level -= 1;
     384              :                         }
     385              :                       else
     386            0 :                         n++;
     387              :                     }
     388              :                 }
     389         3098 :               else if (next_char != '[')
     390              :                 {
     391              :                   // definitely shebang, ignore the first line
     392          518 :                   while (current_char != '\n' && !current_char.is_eof ())
     393              :                     {
     394          490 :                       current_char = peek_input ();
     395          490 :                       skip_input ();
     396              :                     }
     397              : 
     398              :                   // newline
     399           28 :                   current_line++;
     400           28 :                   current_column = 1;
     401              :                   // tell line_table that new line starts
     402           28 :                   start_line (current_line, max_column_hint);
     403           28 :                   break;
     404              :                 }
     405              :               else
     406              :                 break; /* Definitely not a shebang line. */
     407              :             }
     408              :         }
     409              : 
     410              :       // return end of file token if end of file
     411      1827146 :       if (current_char.is_eof ())
     412         5089 :         return Token::make (END_OF_FILE, loc);
     413              : 
     414              :       // if not end of file, start tokenising
     415      1822057 :       switch (current_char.value)
     416              :         {
     417              :         /* ignore whitespace characters for tokens but continue updating
     418              :          * location */
     419       160571 :         case '\n':   // newline
     420       160571 :         case 0x0085: // next line
     421       160571 :         case 0x2028: // line separator
     422       160571 :         case 0x2029: // paragraph separator
     423       160571 :           current_line++;
     424       160571 :           current_column = 1;
     425              :           // tell line_table that new line starts
     426       160571 :           start_line (current_line, max_column_hint);
     427       160571 :           continue;
     428          252 :         case '\r': // cr
     429              :           // Ignore, we expect a newline (lf) soon.
     430          252 :           continue;
     431       922883 :         case ' ': // space
     432       922883 :           current_column++;
     433       922883 :           continue;
     434          113 :         case '\t': // horizontal tab
     435              :           // width of a tab is not well-defined, assume 8 spaces
     436          113 :           current_column += 8;
     437          113 :           continue;
     438           28 :         case '\v':   // vertical tab
     439           28 :         case 0x000c: // form feed
     440           28 :         case 0x200e: // left-to-right mark
     441           28 :         case 0x200f: // right-to-left mark
     442              :           // Ignored.
     443           28 :           continue;
     444              : 
     445              :         // punctuation - actual tokens
     446        27915 :         case '=':
     447        27915 :           if (peek_input () == '>')
     448              :             {
     449              :               // match arm arrow
     450         3270 :               skip_input ();
     451         3270 :               current_column += 2;
     452         3270 :               loc += 1;
     453              : 
     454         3270 :               return Token::make (MATCH_ARROW, loc);
     455              :             }
     456        24645 :           else if (peek_input () == '=')
     457              :             {
     458              :               // equality operator
     459          667 :               skip_input ();
     460          667 :               current_column += 2;
     461          667 :               loc += 1;
     462              : 
     463          667 :               return Token::make (EQUAL_EQUAL, loc);
     464              :             }
     465              :           else
     466              :             {
     467              :               // assignment operator
     468        23978 :               current_column++;
     469        23978 :               return Token::make (EQUAL, loc);
     470              :             }
     471        45137 :         case '(':
     472        45137 :           current_column++;
     473        45137 :           return Token::make (LEFT_PAREN, loc);
     474        11551 :         case '-':
     475        11551 :           if (peek_input () == '>')
     476              :             {
     477              :               // return type specifier
     478        10220 :               skip_input ();
     479        10220 :               current_column += 2;
     480        10220 :               loc += 1;
     481              : 
     482        10220 :               return Token::make (RETURN_TYPE, loc);
     483              :             }
     484         1331 :           else if (peek_input () == '=')
     485              :             {
     486              :               // minus-assign
     487          105 :               skip_input ();
     488          105 :               current_column += 2;
     489          105 :               loc += 1;
     490              : 
     491          105 :               return Token::make (MINUS_EQ, loc);
     492              :             }
     493              :           else
     494              :             {
     495              :               // minus
     496         1226 :               current_column++;
     497         1226 :               return Token::make (MINUS, loc);
     498              :             }
     499         1743 :         case '+':
     500         1743 :           if (peek_input () == '=')
     501              :             {
     502              :               // add-assign
     503          152 :               skip_input ();
     504          152 :               current_column += 2;
     505          152 :               loc += 1;
     506              : 
     507          152 :               return Token::make (PLUS_EQ, loc);
     508              :             }
     509              :           else
     510              :             {
     511              :               // add
     512         1591 :               current_column++;
     513         1591 :               return Token::make (PLUS, loc);
     514              :             }
     515        45118 :         case ')':
     516        45118 :           current_column++;
     517        45118 :           return Token::make (RIGHT_PAREN, loc);
     518        29713 :         case ';':
     519        29713 :           current_column++;
     520        29713 :           return Token::make (SEMICOLON, loc);
     521        10832 :         case '*':
     522        10832 :           if (peek_input () == '=')
     523              :             {
     524              :               // multiplication-assign
     525            7 :               skip_input ();
     526            7 :               current_column += 2;
     527            7 :               loc += 1;
     528              : 
     529            7 :               return Token::make (ASTERISK_EQ, loc);
     530              :             }
     531              :           else
     532              :             {
     533              :               // multiplication
     534        10825 :               current_column++;
     535        10825 :               return Token::make (ASTERISK, loc);
     536              :             }
     537        22688 :         case ',':
     538        22688 :           current_column++;
     539        22688 :           return Token::make (COMMA, loc);
     540        17752 :         case '/':
     541        17752 :           if (peek_input () == '=')
     542              :             {
     543              :               // division-assign
     544            7 :               skip_input ();
     545            7 :               current_column += 2;
     546            7 :               loc += 1;
     547              : 
     548            7 :               return Token::make (DIV_EQ, loc);
     549              :             }
     550        17745 :           else if ((peek_input () == '/' && peek_input (1) != '!'
     551        16638 :                     && peek_input (1) != '/')
     552        25745 :                    || (peek_input () == '/' && peek_input (1) == '/'
     553         7900 :                        && peek_input (2) == '/'))
     554              :             {
     555              :               // two // or four ////
     556              :               // single line comment
     557              :               // (but not an inner or outer doc comment)
     558         8753 :               skip_input ();
     559         8753 :               current_column += 2;
     560         8753 :               current_char = peek_input ();
     561              : 
     562              :               // basically ignore until line finishes
     563       429376 :               while (current_char != '\n' && !current_char.is_eof ())
     564              :                 {
     565       411870 :                   skip_input ();
     566       411870 :                   current_column++; // not used
     567       411870 :                   current_char = peek_input ();
     568              :                 }
     569         8753 :               continue;
     570              :             }
     571         8992 :           else if (peek_input () == '/'
     572         8992 :                    && (peek_input (1) == '!' || peek_input (1) == '/'))
     573              :             {
     574              :               /* single line doc comment, inner or outer.  */
     575         7985 :               bool is_inner = peek_input (1) == '!';
     576         7985 :               skip_input (1);
     577         7985 :               current_column += 3;
     578              : 
     579         7985 :               std::string str;
     580         7985 :               str.reserve (32);
     581         7985 :               current_char = peek_input ();
     582       192537 :               while (current_char != '\n')
     583              :                 {
     584       176616 :                   skip_input ();
     585       176616 :                   if (current_char == '\r')
     586              :                     {
     587           51 :                       Codepoint next_char = peek_input ();
     588           51 :                       if (next_char == '\n')
     589              :                         {
     590           49 :                           current_char = '\n';
     591           49 :                           break;
     592              :                         }
     593            2 :                       rust_error_at (
     594              :                         loc, "Isolated CR %<\\r%> not allowed in doc comment");
     595            2 :                       current_char = next_char;
     596            2 :                       continue;
     597            2 :                     }
     598       176565 :                   if (current_char.is_eof ())
     599              :                     {
     600            0 :                       rust_error_at (
     601              :                         loc, ErrorCode::E0758,
     602              :                         "unexpected EOF while looking for end of comment");
     603            0 :                       break;
     604              :                     }
     605       176565 :                   str += current_char;
     606       176565 :                   current_char = peek_input ();
     607              :                 }
     608         7985 :               skip_input ();
     609         7985 :               current_line++;
     610         7985 :               current_column = 1;
     611              :               // tell line_table that new line starts
     612         7985 :               start_line (current_line, max_column_hint);
     613              : 
     614         7985 :               str.shrink_to_fit ();
     615              : 
     616         7985 :               loc += str.size () - 1;
     617         7985 :               if (is_inner)
     618          100 :                 return Token::make_inner_doc_comment (loc, std::move (str));
     619              :               else
     620         7885 :                 return Token::make_outer_doc_comment (loc, std::move (str));
     621         7985 :             }
     622         1007 :           else if (peek_input () == '*' && peek_input (1) == '*'
     623         1092 :                    && peek_input (2) == '/')
     624              :             {
     625              :               /**/
     626           14 :               skip_input (2);
     627           14 :               current_column += 4;
     628           14 :               continue;
     629              :             }
     630          993 :           else if (peek_input () == '*' && peek_input (1) == '*'
     631         1064 :                    && peek_input (2) == '*' && peek_input (3) == '/')
     632              :             {
     633              :               /***/
     634           14 :               skip_input (3);
     635           14 :               current_column += 5;
     636           14 :               continue;
     637              :             }
     638          979 :           else if ((peek_input () == '*' && peek_input (1) != '!'
     639          870 :                     && peek_input (1) != '*')
     640         1109 :                    || (peek_input () == '*' && peek_input (1) == '*'
     641           57 :                        && peek_input (2) == '*'))
     642              :             {
     643              :               // one /* or three /***
     644              :               // block comment
     645              :               // (but not an inner or outer doc comment)
     646          827 :               skip_input ();
     647          827 :               current_column += 2;
     648              : 
     649          827 :               int level = 1;
     650        36902 :               while (level > 0)
     651              :                 {
     652        36076 :                   current_char = peek_input ();
     653              : 
     654        36076 :                   if (current_char.is_eof ())
     655              :                     {
     656            1 :                       rust_error_at (
     657              :                         loc, ErrorCode::E0758,
     658              :                         "unexpected EOF while looking for end of comment");
     659            1 :                       break;
     660              :                     }
     661              : 
     662              :                   // if /* found
     663        36075 :                   if (current_char == '/' && peek_input (1) == '*')
     664              :                     {
     665              :                       // skip /* characters
     666           49 :                       skip_input (1);
     667              : 
     668           49 :                       current_column += 2;
     669              : 
     670           49 :                       level += 1;
     671           49 :                       continue;
     672              :                     }
     673              : 
     674              :                   // ignore until */ is found
     675        36026 :                   if (current_char == '*' && peek_input (1) == '/')
     676              :                     {
     677              :                       // skip */ characters
     678          875 :                       skip_input (1);
     679              : 
     680          875 :                       current_column += 2;
     681              : 
     682          875 :                       level -= 1;
     683          875 :                       continue;
     684              :                     }
     685              : 
     686        35151 :                   if (current_char == '\n')
     687              :                     {
     688          398 :                       skip_input ();
     689          398 :                       current_line++;
     690          398 :                       current_column = 1;
     691              :                       // tell line_table that new line starts
     692          398 :                       start_line (current_line, max_column_hint);
     693          398 :                       continue;
     694              :                     }
     695              : 
     696        34753 :                   skip_input ();
     697        34753 :                   current_column++;
     698              :                 }
     699              : 
     700              :               // refresh new token
     701          827 :               continue;
     702          827 :             }
     703          152 :           else if (peek_input () == '*'
     704          152 :                    && (peek_input (1) == '!' || peek_input (1) == '*'))
     705              :             {
     706              :               // block doc comment, inner /*! or outer /**
     707          116 :               bool is_inner = peek_input (1) == '!';
     708          116 :               skip_input (1);
     709          116 :               current_column += 3;
     710              : 
     711          116 :               std::string str;
     712          116 :               str.reserve (96);
     713              : 
     714          116 :               int level = 1;
     715          116 :               while (level > 0)
     716              :                 {
     717         2685 :                   current_char = peek_input ();
     718              : 
     719         2685 :                   if (current_char.is_eof ())
     720              :                     {
     721            0 :                       rust_error_at (
     722              :                         loc, ErrorCode::E0758,
     723              :                         "unexpected EOF while looking for end of comment");
     724            0 :                       break;
     725              :                     }
     726              : 
     727              :                   // if /* found
     728         2685 :                   if (current_char == '/' && peek_input (1) == '*')
     729              :                     {
     730              :                       // skip /* characters
     731           84 :                       skip_input (1);
     732           84 :                       current_column += 2;
     733              : 
     734           84 :                       level += 1;
     735           84 :                       str += "/*";
     736           84 :                       continue;
     737              :                     }
     738              : 
     739              :                   // ignore until */ is found
     740         2601 :                   if (current_char == '*' && peek_input (1) == '/')
     741              :                     {
     742              :                       // skip */ characters
     743          200 :                       skip_input (1);
     744          200 :                       current_column += 2;
     745              : 
     746          200 :                       level -= 1;
     747          200 :                       if (level > 0)
     748           84 :                         str += "*/";
     749          200 :                       continue;
     750              :                     }
     751              : 
     752         2401 :                   if (current_char == '\r' && peek_input (1) != '\n')
     753            2 :                     rust_error_at (
     754              :                       loc, "Isolated CR %<\\r%> not allowed in doc comment");
     755              : 
     756         2401 :                   if (current_char == '\n')
     757              :                     {
     758            0 :                       skip_input ();
     759            0 :                       current_line++;
     760            0 :                       current_column = 1;
     761              :                       // tell line_table that new line starts
     762            0 :                       start_line (current_line, max_column_hint);
     763            0 :                       str += '\n';
     764            0 :                       continue;
     765              :                     }
     766              : 
     767         2401 :                   str += current_char;
     768         2401 :                   skip_input ();
     769         2401 :                   current_column++;
     770              :                 }
     771              : 
     772          116 :               str.shrink_to_fit ();
     773              : 
     774          116 :               loc += str.size () - 1;
     775          116 :               if (is_inner)
     776           73 :                 return Token::make_inner_doc_comment (loc, std::move (str));
     777              :               else
     778           43 :                 return Token::make_outer_doc_comment (loc, std::move (str));
     779          116 :             }
     780              :           else
     781              :             {
     782              :               // division
     783           36 :               current_column++;
     784           36 :               return Token::make (DIV, loc);
     785              :             }
     786           43 :         case '%':
     787           43 :           if (peek_input () == '=')
     788              :             {
     789              :               // modulo-assign
     790            7 :               skip_input ();
     791            7 :               current_column += 2;
     792            7 :               loc += 1;
     793              : 
     794            7 :               return Token::make (PERCENT_EQ, loc);
     795              :             }
     796              :           else
     797              :             {
     798              :               // modulo
     799           36 :               current_column++;
     800           36 :               return Token::make (PERCENT, loc);
     801              :             }
     802          147 :         case '^':
     803          147 :           if (peek_input () == '=')
     804              :             {
     805              :               // xor-assign?
     806           84 :               skip_input ();
     807           84 :               current_column += 2;
     808           84 :               loc += 1;
     809              : 
     810           84 :               return Token::make (CARET_EQ, loc);
     811              :             }
     812              :           else
     813              :             {
     814              :               // xor?
     815           63 :               current_column++;
     816           63 :               return Token::make (CARET, loc);
     817              :             }
     818         8558 :         case '<':
     819         8558 :           if (peek_input () == '<')
     820              :             {
     821           66 :               if (peek_input (1) == '=')
     822              :                 {
     823              :                   // left-shift assign
     824            7 :                   skip_input (1);
     825            7 :                   current_column += 3;
     826            7 :                   loc += 2;
     827              : 
     828            7 :                   return Token::make (LEFT_SHIFT_EQ, loc);
     829              :                 }
     830              :               else
     831              :                 {
     832              :                   // left-shift
     833           59 :                   skip_input ();
     834           59 :                   current_column += 2;
     835           59 :                   loc += 1;
     836              : 
     837           59 :                   return Token::make (LEFT_SHIFT, loc);
     838              :                 }
     839              :             }
     840         8492 :           else if (peek_input () == '=')
     841              :             {
     842              :               // smaller than or equal to
     843          224 :               skip_input ();
     844          224 :               current_column += 2;
     845          224 :               loc += 1;
     846              : 
     847          224 :               return Token::make (LESS_OR_EQUAL, loc);
     848              :             }
     849              :           else
     850              :             {
     851              :               // smaller than
     852         8268 :               current_column++;
     853         8268 :               return Token::make (LEFT_ANGLE, loc);
     854              :             }
     855         8409 :           break;
     856         8409 :         case '>':
     857         8409 :           if (peek_input () == '>')
     858              :             {
     859          126 :               if (peek_input (1) == '=')
     860              :                 {
     861              :                   // right-shift-assign
     862            7 :                   skip_input (1);
     863            7 :                   current_column += 3;
     864            7 :                   loc += 2;
     865              : 
     866            7 :                   return Token::make (RIGHT_SHIFT_EQ, loc);
     867              :                 }
     868              :               else
     869              :                 {
     870              :                   // right-shift
     871          119 :                   skip_input ();
     872          119 :                   current_column += 2;
     873          119 :                   loc += 1;
     874              : 
     875          119 :                   return Token::make (RIGHT_SHIFT, loc);
     876              :                 }
     877              :             }
     878         8283 :           else if (peek_input () == '=')
     879              :             {
     880              :               // larger than or equal to
     881          209 :               skip_input ();
     882          209 :               current_column += 2;
     883          209 :               loc += 1;
     884              : 
     885          209 :               return Token::make (GREATER_OR_EQUAL, loc);
     886              :             }
     887              :           else
     888              :             {
     889              :               // larger than
     890         8074 :               current_column++;
     891         8074 :               return Token::make (RIGHT_ANGLE, loc);
     892              :             }
     893        28773 :         case ':':
     894        28773 :           if (peek_input () == ':')
     895              :             {
     896              :               // scope resolution ::
     897         9826 :               skip_input ();
     898         9826 :               current_column += 2;
     899         9826 :               loc += 1;
     900              : 
     901         9826 :               return Token::make (SCOPE_RESOLUTION, loc);
     902              :             }
     903              :           else
     904              :             {
     905              :               // single colon :
     906        18947 :               current_column++;
     907        18947 :               return Token::make (COLON, loc);
     908              :             }
     909        14608 :         case '!':
     910              :           // no special handling for macros in lexer?
     911        14608 :           if (peek_input () == '=')
     912              :             {
     913              :               // not equal boolean operator
     914          186 :               skip_input ();
     915          186 :               current_column += 2;
     916          186 :               loc += 1;
     917              : 
     918          186 :               return Token::make (NOT_EQUAL, loc);
     919              :             }
     920              :           else
     921              :             {
     922              :               // not equal unary operator
     923        14422 :               current_column++;
     924              : 
     925        14422 :               return Token::make (EXCLAM, loc);
     926              :             }
     927          368 :         case '?':
     928          368 :           current_column++;
     929          368 :           return Token::make (QUESTION_MARK, loc);
     930        19850 :         case '#':
     931        19850 :           current_column++;
     932        19850 :           return Token::make (HASH, loc);
     933        21864 :         case '[':
     934        21864 :           current_column++;
     935        21864 :           return Token::make (LEFT_SQUARE, loc);
     936        21857 :         case ']':
     937        21857 :           current_column++;
     938        21857 :           return Token::make (RIGHT_SQUARE, loc);
     939        34529 :         case '{':
     940        34529 :           current_column++;
     941        34529 :           return Token::make (LEFT_CURLY, loc);
     942        34482 :         case '}':
     943        34482 :           current_column++;
     944        34482 :           return Token::make (RIGHT_CURLY, loc);
     945           19 :         case '@':
     946           19 :           current_column++;
     947           19 :           return Token::make (PATTERN_BIND, loc);
     948         3589 :         case '$':
     949         3589 :           current_column++;
     950         3589 :           return Token::make (DOLLAR_SIGN, loc);
     951            0 :         case '~':
     952            0 :           current_column++;
     953            0 :           return Token::make (TILDE, loc);
     954            0 :         case '\\':
     955            0 :           current_column++;
     956            0 :           return Token::make (BACKSLASH, loc);
     957            0 :         case '`':
     958            0 :           current_column++;
     959            0 :           return Token::make (BACKTICK, loc);
     960          475 :         case '|':
     961          475 :           if (peek_input () == '=')
     962              :             {
     963              :               // bitwise or-assign?
     964           28 :               skip_input ();
     965           28 :               current_column += 2;
     966           28 :               loc += 1;
     967              : 
     968           28 :               return Token::make (PIPE_EQ, loc);
     969              :             }
     970          447 :           else if (peek_input () == '|')
     971              :             {
     972              :               // logical or
     973           69 :               skip_input ();
     974           69 :               current_column += 2;
     975           69 :               loc += 1;
     976              : 
     977           69 :               return Token::make (OR, loc);
     978              :             }
     979              :           else
     980              :             {
     981              :               // bitwise or
     982          378 :               current_column++;
     983              : 
     984          378 :               return Token::make (PIPE, loc);
     985              :             }
     986         9958 :         case '&':
     987         9958 :           if (peek_input () == '=')
     988              :             {
     989              :               // bitwise and-assign?
     990           21 :               skip_input ();
     991           21 :               current_column += 2;
     992           21 :               loc += 1;
     993              : 
     994           21 :               return Token::make (AMP_EQ, loc);
     995              :             }
     996         9937 :           else if (peek_input () == '&')
     997              :             {
     998              :               // logical and
     999          306 :               skip_input ();
    1000          306 :               current_column += 2;
    1001          306 :               loc += 1;
    1002              : 
    1003          306 :               return Token::make (LOGICAL_AND, loc);
    1004              :             }
    1005              :           else
    1006              :             {
    1007              :               // bitwise and/reference
    1008         9631 :               current_column++;
    1009              : 
    1010         9631 :               return Token::make (AMP, loc);
    1011              :             }
    1012         6656 :         case '.':
    1013         6656 :           if (peek_input () == '.')
    1014              :             {
    1015         1166 :               if (peek_input (1) == '.')
    1016              :                 {
    1017              :                   // ellipsis
    1018          838 :                   skip_input (1);
    1019          838 :                   current_column += 3;
    1020          838 :                   loc += 2;
    1021              : 
    1022          838 :                   return Token::make (ELLIPSIS, loc);
    1023              :                 }
    1024          328 :               else if (peek_input (1) == '=')
    1025              :                 {
    1026              :                   // ..=
    1027           38 :                   skip_input (1);
    1028           38 :                   current_column += 3;
    1029           38 :                   loc += 2;
    1030              : 
    1031           38 :                   return Token::make (DOT_DOT_EQ, loc);
    1032              :                 }
    1033              :               else
    1034              :                 {
    1035              :                   // ..
    1036          290 :                   skip_input ();
    1037          290 :                   current_column += 2;
    1038          290 :                   loc += 1;
    1039              : 
    1040          290 :                   return Token::make (DOT_DOT, loc);
    1041              :                 }
    1042              :             }
    1043              :           else /*if (!ISDIGIT (peek_input ()))*/
    1044              :             {
    1045              :               // single dot .
    1046              :               // Only if followed by a non-number - otherwise is float
    1047              :               // nope, float cannot start with '.'.
    1048         5490 :               current_column++;
    1049         5490 :               return Token::make (DOT, loc);
    1050              :             }
    1051      1083847 :         }
    1052              :       // TODO: special handling of _ in the lexer? instead of being identifier
    1053              : 
    1054              :       // byte character, byte string and raw byte string literals
    1055       311576 :       if (current_char == 'b')
    1056              :         {
    1057        10662 :           if (peek_input () == '\'')
    1058           78 :             return parse_byte_char (loc);
    1059        10584 :           else if (peek_input () == '"')
    1060           64 :             return parse_byte_string (loc);
    1061        10520 :           else if (peek_input () == 'r'
    1062        10520 :                    && (peek_input (1) == '#' || peek_input (1) == '"'))
    1063           32 :             return parse_raw_byte_string (loc);
    1064              :         }
    1065              : 
    1066              :       // raw identifiers and raw strings
    1067       311402 :       if (current_char == 'r')
    1068              :         {
    1069         3995 :           Codepoint peek = peek_input ();
    1070         3995 :           Codepoint peek1 = peek_input (1);
    1071              : 
    1072              :           // TODO (tamaron) parse Unicode ident
    1073         3995 :           if (peek == '#' && is_identifier_start (peek1.value))
    1074              :             {
    1075           81 :               TokenPtr raw_ident_ptr = parse_raw_identifier (loc);
    1076           81 :               if (raw_ident_ptr != nullptr)
    1077           80 :                 return raw_ident_ptr;
    1078              :               else
    1079            1 :                 continue; /* input got parsed, it just wasn't valid. An error
    1080              :                              was produced. */
    1081           81 :             }
    1082              :           else
    1083              :             {
    1084         3914 :               TokenPtr maybe_raw_string_ptr = maybe_parse_raw_string (loc);
    1085         3914 :               if (maybe_raw_string_ptr != nullptr)
    1086           25 :                 return maybe_raw_string_ptr;
    1087         3914 :             }
    1088              :         }
    1089              : 
    1090              :       // find identifiers and keywords.
    1091       311296 :       if (is_identifier_start (current_char.value))
    1092       282362 :         return parse_identifier_or_keyword (loc);
    1093              : 
    1094              :       // int and float literals
    1095        28934 :       if (ISDIGIT (current_char.value))
    1096              :         { //  _ not allowed as first char
    1097        15667 :           if (current_char == '0'
    1098        15667 :               && is_non_decimal_int_literal_separator (peek_input ().value))
    1099              :             {
    1100              :               // handle binary, octal, hex literals
    1101          216 :               TokenPtr non_dec_int_lit_ptr
    1102          216 :                 = parse_non_decimal_int_literals (loc);
    1103          216 :               if (non_dec_int_lit_ptr != nullptr)
    1104          216 :                 return non_dec_int_lit_ptr;
    1105          216 :             }
    1106              :           else
    1107              :             {
    1108              :               // handle decimals (integer or float)
    1109        15451 :               TokenPtr decimal_or_float_ptr = parse_decimal_int_or_float (loc);
    1110        15451 :               if (decimal_or_float_ptr != nullptr)
    1111        15451 :                 return decimal_or_float_ptr;
    1112        15451 :             }
    1113              :         }
    1114              : 
    1115              :       // string literals
    1116        13267 :       if (current_char == '"')
    1117        12424 :         return parse_string (loc);
    1118              : 
    1119              :       // char literals and lifetime names
    1120          843 :       if (current_char == '\'')
    1121              :         {
    1122          843 :           TokenPtr char_or_lifetime_ptr = parse_char_or_lifetime (loc);
    1123          843 :           if (char_or_lifetime_ptr != nullptr)
    1124          843 :             return char_or_lifetime_ptr;
    1125          843 :         }
    1126              : 
    1127              :       // DEBUG: check for specific character problems:
    1128            0 :       if (current_char == '0')
    1129            0 :         rust_debug ("'0' uncaught before unexpected character");
    1130            0 :       else if (current_char == ']')
    1131            0 :         rust_debug ("']' uncaught before unexpected character");
    1132              :       else if (current_char == 0x5d)
    1133              :         rust_debug ("whatever 0x5d is (not '0' or ']') uncaught before "
    1134              :                     "unexpected character");
    1135              : 
    1136              :       // didn't match anything so error
    1137            0 :       rust_error_at (loc, "unexpected character %<%x%>", current_char.value);
    1138            0 :       current_column++;
    1139              :     }
    1140              : }
    1141              : 
    1142              : // Parses in a type suffix.
    1143              : std::pair<PrimitiveCoreType, int>
    1144        15659 : Lexer::parse_in_type_suffix ()
    1145              : {
    1146        15659 :   std::string suffix;
    1147        15659 :   suffix.reserve (5);
    1148              : 
    1149        15659 :   int additional_length_offset = 0;
    1150              : 
    1151              :   // get suffix
    1152        33589 :   while (ISALPHA (current_char.value) || ISDIGIT (current_char.value)
    1153        34941 :          || current_char == '_')
    1154              :     {
    1155         3623 :       if (current_char == '_')
    1156              :         {
    1157              :           // don't add _ to suffix
    1158            0 :           skip_input ();
    1159            0 :           current_char = peek_input ();
    1160              : 
    1161            0 :           additional_length_offset++;
    1162              : 
    1163            0 :           continue;
    1164              :         }
    1165              : 
    1166         3623 :       additional_length_offset++;
    1167              : 
    1168         3623 :       suffix += current_char;
    1169         3623 :       skip_input ();
    1170         3623 :       current_char = peek_input ();
    1171              :     }
    1172              : 
    1173        15659 :   if (suffix.empty ())
    1174              :     {
    1175              :       // no type suffix: do nothing but also no error
    1176        14471 :       return std::make_pair (CORETYPE_UNKNOWN, additional_length_offset);
    1177              :     }
    1178         1188 :   else if (suffix == "f32")
    1179              :     {
    1180          501 :       return std::make_pair (CORETYPE_F32, additional_length_offset);
    1181              :     }
    1182          687 :   else if (suffix == "f64")
    1183              :     {
    1184          221 :       return std::make_pair (CORETYPE_F64, additional_length_offset);
    1185              :     }
    1186          466 :   else if (suffix == "i8")
    1187              :     {
    1188           23 :       return std::make_pair (CORETYPE_I8, additional_length_offset);
    1189              :     }
    1190          443 :   else if (suffix == "i16")
    1191              :     {
    1192           15 :       return std::make_pair (CORETYPE_I16, additional_length_offset);
    1193              :     }
    1194          428 :   else if (suffix == "i32")
    1195              :     {
    1196          177 :       return std::make_pair (CORETYPE_I32, additional_length_offset);
    1197              :     }
    1198          251 :   else if (suffix == "i64")
    1199              :     {
    1200           15 :       return std::make_pair (CORETYPE_I64, additional_length_offset);
    1201              :     }
    1202          236 :   else if (suffix == "i128")
    1203              :     {
    1204           15 :       return std::make_pair (CORETYPE_I128, additional_length_offset);
    1205              :     }
    1206          221 :   else if (suffix == "isize")
    1207              :     {
    1208            4 :       return std::make_pair (CORETYPE_ISIZE, additional_length_offset);
    1209              :     }
    1210          217 :   else if (suffix == "u8")
    1211              :     {
    1212           30 :       return std::make_pair (CORETYPE_U8, additional_length_offset);
    1213              :     }
    1214          187 :   else if (suffix == "u16")
    1215              :     {
    1216           25 :       return std::make_pair (CORETYPE_U16, additional_length_offset);
    1217              :     }
    1218          162 :   else if (suffix == "u32")
    1219              :     {
    1220           85 :       return std::make_pair (CORETYPE_U32, additional_length_offset);
    1221              :     }
    1222           77 :   else if (suffix == "u64")
    1223              :     {
    1224           25 :       return std::make_pair (CORETYPE_U64, additional_length_offset);
    1225              :     }
    1226           52 :   else if (suffix == "u128")
    1227              :     {
    1228           15 :       return std::make_pair (CORETYPE_U128, additional_length_offset);
    1229              :     }
    1230           37 :   else if (suffix == "usize")
    1231              :     {
    1232           37 :       return std::make_pair (CORETYPE_USIZE, additional_length_offset);
    1233              :     }
    1234              :   else
    1235              :     {
    1236            0 :       rust_error_at (get_current_location (), "unknown number suffix %qs",
    1237              :                      suffix.c_str ());
    1238              : 
    1239            0 :       return std::make_pair (CORETYPE_UNKNOWN, additional_length_offset);
    1240              :     }
    1241        15659 : }
    1242              : 
    1243              : // Parses in the exponent part (if any) of a float literal.
    1244              : std::pair<std::string, int>
    1245          345 : Lexer::parse_in_exponent_part ()
    1246              : {
    1247          345 :   int additional_length_offset = 0;
    1248          345 :   std::string str;
    1249          345 :   if (current_char == 'E' || current_char == 'e')
    1250              :     {
    1251              :       // add exponent to string as strtod works with it
    1252            7 :       str += current_char;
    1253            7 :       skip_input ();
    1254            7 :       current_char = peek_input ();
    1255              : 
    1256            7 :       additional_length_offset++;
    1257              : 
    1258              :       // special - and + handling
    1259            7 :       if (current_char == '-')
    1260              :         {
    1261            0 :           str += '-';
    1262              : 
    1263            0 :           skip_input ();
    1264            0 :           current_char = peek_input ();
    1265              : 
    1266            0 :           additional_length_offset++;
    1267              :         }
    1268            7 :       else if (current_char == '+')
    1269              :         {
    1270              :           // don't add + but still skip input
    1271            7 :           skip_input ();
    1272            7 :           current_char = peek_input ();
    1273              : 
    1274            7 :           additional_length_offset++;
    1275              :         }
    1276              : 
    1277              :       // parse another decimal number for exponent
    1278            7 :       auto str_length = parse_in_decimal ();
    1279            7 :       str += std::get<0> (str_length);
    1280            7 :       additional_length_offset += std::get<1> (str_length);
    1281            7 :     }
    1282          690 :   return std::make_pair (str, additional_length_offset);
    1283          345 : }
    1284              : 
    1285              : // Parses a decimal integer.
    1286              : std::tuple<std::string, int, bool>
    1287        15803 : Lexer::parse_in_decimal ()
    1288              : {
    1289              :   /* A pure decimal contains only digits.  */
    1290        15803 :   bool pure_decimal = true;
    1291        15803 :   int additional_length_offset = 0;
    1292        15803 :   std::string str;
    1293        22889 :   while (ISDIGIT (current_char.value) || current_char.value == '_')
    1294              :     {
    1295         7086 :       if (current_char == '_')
    1296              :         {
    1297            9 :           pure_decimal = false;
    1298              :           // don't add _ to number
    1299            9 :           skip_input ();
    1300            9 :           current_char = peek_input ();
    1301              : 
    1302            9 :           additional_length_offset++;
    1303              : 
    1304            9 :           continue;
    1305              :         }
    1306              : 
    1307         7077 :       additional_length_offset++;
    1308              : 
    1309         7077 :       str += current_char;
    1310         7077 :       skip_input ();
    1311         7077 :       current_char = peek_input ();
    1312              :     }
    1313        31606 :   return std::make_tuple (str, additional_length_offset, pure_decimal);
    1314        15803 : }
    1315              : 
    1316              : /* Parses escapes (and string continues) in "byte" strings and characters. Does
    1317              :  * not support unicode. */
    1318              : std::tuple<char, int, bool>
    1319           61 : Lexer::parse_escape (char opening_char)
    1320              : {
    1321           61 :   int additional_length_offset = 0;
    1322           61 :   char output_char = 0;
    1323              : 
    1324              :   // skip to actual letter
    1325           61 :   skip_input ();
    1326           61 :   current_char = peek_input ();
    1327           61 :   additional_length_offset++;
    1328              : 
    1329           61 :   switch (current_char.value)
    1330              :     {
    1331           17 :     case 'x':
    1332           17 :       {
    1333           17 :         auto hex_escape_pair = parse_partial_hex_escape ();
    1334           17 :         long hexLong = hex_escape_pair.first;
    1335           17 :         additional_length_offset += hex_escape_pair.second;
    1336              : 
    1337           17 :         if (hexLong > 255 || hexLong < 0)
    1338            0 :           rust_error_at (
    1339              :             get_current_location (),
    1340              :             "byte \\x escape %<\\x%x%> out of range - allows up to %<\\xFF%>",
    1341              :             static_cast<unsigned int> (hexLong));
    1342              :         /* TODO: restore capital for escape output - gcc pretty-printer doesn't
    1343              :          * support %X directly */
    1344           17 :         char hexChar = static_cast<char> (hexLong);
    1345              : 
    1346           17 :         output_char = hexChar;
    1347              :       }
    1348           17 :       break;
    1349              :     case 'n':
    1350              :       output_char = '\n';
    1351              :       break;
    1352            0 :     case 'r':
    1353            0 :       output_char = '\r';
    1354            0 :       break;
    1355            1 :     case 't':
    1356            1 :       output_char = '\t';
    1357            1 :       break;
    1358            8 :     case '\\':
    1359            8 :       output_char = '\\';
    1360            8 :       break;
    1361            9 :     case '0':
    1362            9 :       output_char = '\0';
    1363            9 :       break;
    1364           15 :     case '\'':
    1365           15 :       output_char = '\'';
    1366           15 :       break;
    1367            1 :     case '"':
    1368            1 :       output_char = '"';
    1369            1 :       break;
    1370            2 :     case 'u':
    1371            3 :       rust_error_at (get_current_location (),
    1372              :                      "cannot have a unicode escape \\u in a byte %s",
    1373              :                      opening_char == '\'' ? "character" : "string");
    1374              :       // Try to parse it anyway, just to skip it
    1375            2 :       parse_partial_unicode_escape ();
    1376            2 :       return std::make_tuple (output_char, additional_length_offset, false);
    1377            0 :     case '\r':
    1378            0 :     case '\n':
    1379              :       // string continue
    1380            0 :       return std::make_tuple (0, parse_partial_string_continue (), true);
    1381            1 :     default:
    1382            1 :       rust_error_at (get_current_location (),
    1383              :                      "unknown escape sequence %<\\%s%>",
    1384            1 :                      current_char.as_string ().c_str ());
    1385              :       // returns false if no parsing could be done
    1386              :       // return false;
    1387            1 :       return std::make_tuple (output_char, additional_length_offset, false);
    1388           58 :       break;
    1389              :     }
    1390              :   // all non-special cases (string continue) should skip their used char
    1391           58 :   skip_input ();
    1392           58 :   current_char = peek_input ();
    1393           58 :   additional_length_offset++;
    1394              : 
    1395              :   // returns true if parsing was successful
    1396              :   // return true;
    1397           58 :   return std::make_tuple (output_char, additional_length_offset, false);
    1398              : }
    1399              : 
    1400              : /* Parses an escape (or string continue) in a string or character. Supports
    1401              :  * unicode escapes. */
    1402              : std::tuple<Codepoint, int, bool>
    1403         2802 : Lexer::parse_utf8_escape ()
    1404              : {
    1405         2802 :   Codepoint output_char;
    1406         2802 :   int additional_length_offset = 0;
    1407              : 
    1408              :   // skip to actual letter
    1409         2802 :   skip_input ();
    1410         2802 :   current_char = peek_input ();
    1411         2802 :   additional_length_offset++;
    1412              : 
    1413         2802 :   switch (current_char.value)
    1414              :     {
    1415           17 :     case 'x':
    1416           17 :       {
    1417           17 :         auto hex_escape_pair = parse_partial_hex_escape ();
    1418           17 :         long hexLong = hex_escape_pair.first;
    1419           17 :         additional_length_offset += hex_escape_pair.second;
    1420              : 
    1421           17 :         if (hexLong > 127 || hexLong < 0)
    1422            4 :           rust_error_at (
    1423              :             get_current_location (),
    1424              :             "ascii \\x escape %<\\x%x%> out of range - allows up to %<\\x7F%>",
    1425              :             static_cast<unsigned int> (hexLong));
    1426              :         /* TODO: restore capital for escape output - gcc pretty-printer doesn't
    1427              :          * support %X directly */
    1428           17 :         char hexChar = static_cast<char> (hexLong);
    1429              : 
    1430           17 :         output_char = hexChar;
    1431              :       }
    1432           17 :       break;
    1433              :     case 'n':
    1434              :       output_char = '\n';
    1435              :       break;
    1436            0 :     case 'r':
    1437            0 :       output_char = '\r';
    1438            0 :       break;
    1439            2 :     case 't':
    1440            2 :       output_char = '\t';
    1441            2 :       break;
    1442            1 :     case '\\':
    1443            1 :       output_char = '\\';
    1444            1 :       break;
    1445         1404 :     case '0':
    1446         1404 :       output_char = '\0';
    1447         1404 :       break;
    1448            1 :     case '\'':
    1449            1 :       output_char = '\'';
    1450            1 :       break;
    1451            1 :     case '"':
    1452            1 :       output_char = '"';
    1453            1 :       break;
    1454           46 :     case 'u':
    1455           46 :       {
    1456           46 :         auto unicode_escape_pair = parse_partial_unicode_escape ();
    1457           46 :         output_char = unicode_escape_pair.first;
    1458           46 :         additional_length_offset += unicode_escape_pair.second;
    1459              : 
    1460           46 :         return std::make_tuple (output_char, additional_length_offset, false);
    1461              :       }
    1462           28 :       break;
    1463           28 :     case '\r':
    1464           28 :     case '\n':
    1465              :       // string continue
    1466           28 :       return std::make_tuple (0, parse_partial_string_continue (), true);
    1467            1 :     default:
    1468            1 :       rust_error_at (get_current_location (),
    1469              :                      "unknown escape sequence %<\\%s%>",
    1470            1 :                      current_char.as_string ().c_str ());
    1471              :       // returns false if no parsing could be done
    1472              :       // return false;
    1473            1 :       return std::make_tuple (output_char, additional_length_offset, false);
    1474         2727 :       break;
    1475              :     }
    1476              :   /* all non-special cases (unicode, string continue) should skip their used
    1477              :    * char */
    1478         2727 :   skip_input ();
    1479         2727 :   current_char = peek_input ();
    1480         2727 :   additional_length_offset++;
    1481              : 
    1482              :   // returns true if parsing was successful
    1483              :   // return true;
    1484         2727 :   return std::make_tuple (output_char, additional_length_offset, false);
    1485              : }
    1486              : 
    1487              : // Parses the body of a string continue that has been found in an escape.
    1488              : int
    1489           28 : Lexer::parse_partial_string_continue ()
    1490              : {
    1491           28 :   int additional_length_offset = 1;
    1492              : 
    1493              :   // string continue
    1494              :   // TODO use utf-8 codepoint to skip whitespaces
    1495          364 :   while (is_whitespace (current_char.value))
    1496              :     {
    1497          336 :       if (current_char == '\n')
    1498              :         {
    1499           28 :           current_line++;
    1500           28 :           current_column = 1;
    1501              :           // tell line_table that new line starts
    1502           28 :           start_line (current_line, max_column_hint);
    1503              : 
    1504              :           // reset "length"
    1505           28 :           additional_length_offset = 1;
    1506              : 
    1507              :           // get next char
    1508           28 :           skip_input ();
    1509           28 :           current_char = peek_input ();
    1510              : 
    1511           28 :           continue;
    1512              :         }
    1513              : 
    1514          308 :       skip_input ();
    1515          308 :       current_char = peek_input ();
    1516          308 :       additional_length_offset++;
    1517              :     }
    1518              : 
    1519           28 :   return additional_length_offset;
    1520              : }
    1521              : 
    1522              : /* Parses the body of a '\x' escape. Note that it does not check that the number
    1523              :  * is valid and smaller than 255. */
    1524              : std::pair<long, int>
    1525           34 : Lexer::parse_partial_hex_escape ()
    1526              : {
    1527              :   // hex char string (null-terminated)
    1528           34 :   char hexNum[3] = {0, 0, 0};
    1529              : 
    1530              :   // first hex char
    1531           34 :   current_char = peek_input (1);
    1532           34 :   int additional_length_offset = 1;
    1533              : 
    1534           34 :   if (!is_x_digit (current_char.value))
    1535              :     {
    1536            4 :       rust_error_at (get_current_location (),
    1537              :                      "invalid character %<\\x%s%> in \\x sequence",
    1538            4 :                      current_char.as_string ().c_str ());
    1539            4 :       return std::make_pair (0, 0);
    1540              :     }
    1541           30 :   hexNum[0] = current_char.value;
    1542              : 
    1543              :   // second hex char
    1544           30 :   skip_input ();
    1545           30 :   current_char = peek_input (1);
    1546           30 :   additional_length_offset++;
    1547              : 
    1548           30 :   if (!is_x_digit (current_char.value))
    1549              :     {
    1550            2 :       rust_error_at (get_current_location (),
    1551            2 :                      "invalid character %<\\x%c%s%> in \\x sequence", hexNum[0],
    1552            2 :                      current_char.as_string ().c_str ());
    1553            2 :       return std::make_pair (0, 1);
    1554              :     }
    1555           28 :   skip_input ();
    1556           28 :   hexNum[1] = current_char.value;
    1557              : 
    1558           28 :   long hexLong = std::strtol (hexNum, nullptr, 16);
    1559              : 
    1560           28 :   return std::make_pair (hexLong, additional_length_offset);
    1561              : }
    1562              : 
    1563              : // Parses the body of a unicode escape.
    1564              : std::pair<Codepoint, int>
    1565           48 : Lexer::parse_partial_unicode_escape ()
    1566              : {
    1567           48 :   skip_input ();
    1568           48 :   current_char = peek_input ();
    1569           48 :   int additional_length_offset = 0;
    1570              : 
    1571           48 :   if (current_char != '{')
    1572              :     {
    1573            2 :       rust_error_at (get_current_location (),
    1574              :                      "unicode escape should start with %<{%>");
    1575              :       /* Skip what should probaby have been between brackets.  */
    1576           10 :       while (is_x_digit (current_char.value) || current_char == '_')
    1577              :         {
    1578            6 :           skip_input ();
    1579            6 :           current_char = peek_input ();
    1580            6 :           additional_length_offset++;
    1581              :         }
    1582            2 :       return std::make_pair (Codepoint (0), additional_length_offset);
    1583              :     }
    1584              : 
    1585           46 :   skip_input ();
    1586           46 :   current_char = peek_input ();
    1587           46 :   additional_length_offset++;
    1588              : 
    1589           46 :   if (current_char == '_')
    1590              :     {
    1591            2 :       rust_error_at (get_current_location (),
    1592              :                      "unicode escape cannot start with %<_%>");
    1593            2 :       skip_input ();
    1594            2 :       current_char = peek_input ();
    1595            2 :       additional_length_offset++;
    1596              :       // fallthrough and try to parse the rest anyway
    1597              :     }
    1598              : 
    1599              :   // parse unicode escape - 1-6 hex digits
    1600           46 :   std::string num_str;
    1601           46 :   num_str.reserve (6);
    1602              : 
    1603              :   // loop through to add entire hex number to string
    1604          304 :   while (is_x_digit (current_char.value) || current_char.value == '_')
    1605              :     {
    1606          212 :       if (current_char == '_')
    1607              :         {
    1608              :           // don't add _ to number
    1609           24 :           skip_input ();
    1610           24 :           current_char = peek_input ();
    1611              : 
    1612           24 :           additional_length_offset++;
    1613              : 
    1614           24 :           continue;
    1615              :         }
    1616              : 
    1617          188 :       additional_length_offset++;
    1618              : 
    1619              :       // add raw hex numbers
    1620          188 :       num_str += current_char;
    1621              : 
    1622          188 :       skip_input ();
    1623          188 :       current_char = peek_input ();
    1624              :     }
    1625              : 
    1626           46 :   if (current_char == '}')
    1627              :     {
    1628           44 :       skip_input ();
    1629           44 :       current_char = peek_input ();
    1630           44 :       additional_length_offset++;
    1631              :     }
    1632              :   else
    1633              :     {
    1634              :       // actually an error, but allow propagation anyway Assume that
    1635              :       // wrong bracketm whitespace or single/double quotes are wrong
    1636              :       // termination, otherwise it is a wrong character, then skip to the actual
    1637              :       // terminator.
    1638              :       // TODO use utf-8 codepoint to skip whitespaces
    1639            2 :       if (current_char == '{' || is_whitespace (current_char.value)
    1640            4 :           || current_char == '\'' || current_char == '"')
    1641              :         {
    1642            0 :           rust_error_at (get_current_location (),
    1643              :                          "expected terminating %<}%> in unicode escape");
    1644            0 :           return std::make_pair (Codepoint (0), additional_length_offset);
    1645              :         }
    1646              :       else
    1647              :         {
    1648            2 :           rust_error_at (get_current_location (),
    1649              :                          "invalid character %qs in unicode escape",
    1650            2 :                          current_char.as_string ().c_str ());
    1651              :           // TODO use utf-8 codepoint to skip whitespaces
    1652            8 :           while (current_char != '}' && current_char != '{'
    1653            6 :                  && !is_whitespace (current_char.value) && current_char != '\''
    1654           14 :                  && current_char != '"')
    1655              :             {
    1656            6 :               skip_input ();
    1657            6 :               current_char = peek_input ();
    1658            6 :               additional_length_offset++;
    1659              :             }
    1660              :           // Consume the actual closing bracket if found
    1661            2 :           if (current_char == '}')
    1662              :             {
    1663            2 :               skip_input ();
    1664            2 :               current_char = peek_input ();
    1665            2 :               additional_length_offset++;
    1666              :             }
    1667            2 :           return std::make_pair (Codepoint (0), additional_length_offset);
    1668              :         }
    1669              :     }
    1670              : 
    1671              :   // ensure 1-6 hex characters
    1672           44 :   if (num_str.length () > 6 || num_str.length () < 1)
    1673              :     {
    1674            4 :       rust_error_at (get_current_location (),
    1675              :                      "unicode escape should be between 1 and 6 hex "
    1676              :                      "characters; it is %lu",
    1677            4 :                      (unsigned long) num_str.length ());
    1678              :       // return false;
    1679            4 :       return std::make_pair (Codepoint (0), additional_length_offset);
    1680              :     }
    1681              : 
    1682           40 :   unsigned long hex_num = std::strtoul (num_str.c_str (), nullptr, 16);
    1683              : 
    1684           40 :   if (hex_num > 0xd7ff && hex_num < 0xe000)
    1685              :     {
    1686            4 :       rust_error_at (
    1687              :         get_current_location (),
    1688              :         "unicode escape cannot be a surrogate value (D800 to DFFF)");
    1689            4 :       return std::make_pair (Codepoint (0), additional_length_offset);
    1690              :     }
    1691              : 
    1692           36 :   if (hex_num > 0x10ffff)
    1693              :     {
    1694            4 :       rust_error_at (get_current_location (),
    1695              :                      "unicode escape cannot be larger than 10FFFF");
    1696            4 :       return std::make_pair (Codepoint (0), additional_length_offset);
    1697              :     }
    1698              : 
    1699              :   // return true;
    1700           32 :   return std::make_pair (Codepoint (static_cast<uint32_t> (hex_num)),
    1701              :                          additional_length_offset);
    1702           46 : }
    1703              : 
    1704              : // Parses a byte character.
    1705              : TokenPtr
    1706           78 : Lexer::parse_byte_char (location_t loc)
    1707              : {
    1708           78 :   skip_input ();
    1709           78 :   current_column++;
    1710              :   // make current char the next character
    1711           78 :   current_char = peek_input ();
    1712              : 
    1713           78 :   int length = 1;
    1714              : 
    1715              :   // char to save
    1716           78 :   Codepoint byte_char = 0;
    1717              : 
    1718              :   // detect escapes
    1719           78 :   if (current_char == '\\')
    1720              :     {
    1721           30 :       auto escape_length_pair = parse_escape ('\'');
    1722           30 :       byte_char = std::get<0> (escape_length_pair);
    1723           30 :       length += std::get<1> (escape_length_pair);
    1724              : 
    1725           30 :       current_char = peek_input ();
    1726              : 
    1727           30 :       if (current_char != '\'')
    1728              :         {
    1729            0 :           rust_error_at (get_current_location (), "unclosed %<byte char%>");
    1730              :         }
    1731              : 
    1732           30 :       skip_input ();
    1733           30 :       current_char = peek_input ();
    1734           30 :       length++; // go to next char
    1735              :     }
    1736           48 :   else if (current_char != '\'')
    1737              :     {
    1738              :       // otherwise, get character from direct input character
    1739           48 :       byte_char = current_char;
    1740              : 
    1741           48 :       if (!byte_char.is_ascii ())
    1742              :         {
    1743            2 :           rust_error_at (get_current_location (),
    1744              :                          "non-ASCII character in %<byte char%>");
    1745              :         }
    1746              : 
    1747           48 :       skip_input ();
    1748           48 :       current_char = peek_input ();
    1749           48 :       length++;
    1750              : 
    1751           48 :       if (current_char != '\'')
    1752              :         {
    1753            0 :           rust_error_at (get_current_location (), "unclosed %<byte char%>");
    1754              :         }
    1755              : 
    1756           48 :       skip_input ();
    1757           48 :       current_char = peek_input ();
    1758           48 :       length++; // go to next char
    1759              :     }
    1760              :   else
    1761              :     {
    1762            0 :       rust_error_at (get_current_location (),
    1763              :                      "no character inside %<%> for %<byte char%>");
    1764              :     }
    1765              : 
    1766           78 :   current_column += length;
    1767              : 
    1768           78 :   loc += length - 1;
    1769           78 :   return Token::make_byte_char (loc, byte_char.value);
    1770              : }
    1771              : 
    1772              : // Parses a byte string.
    1773              : TokenPtr
    1774           64 : Lexer::parse_byte_string (location_t loc)
    1775              : {
    1776              :   // byte string
    1777              : 
    1778              :   // skip quote character
    1779           64 :   skip_input ();
    1780           64 :   current_column++;
    1781              : 
    1782           64 :   std::string str;
    1783           64 :   str.reserve (16); // some sensible default
    1784              : 
    1785           64 :   current_char = peek_input ();
    1786              : 
    1787           64 :   const location_t string_begin_locus = get_current_location ();
    1788              : 
    1789          438 :   while (current_char != '"' && !current_char.is_eof ())
    1790              :     {
    1791          310 :       if (current_char == '\\')
    1792              :         {
    1793           31 :           int length = 1;
    1794           31 :           auto escape_length_pair = parse_escape ('"');
    1795           31 :           char output_char = std::get<0> (escape_length_pair);
    1796              : 
    1797           31 :           if (output_char == 0 && std::get<2> (escape_length_pair))
    1798            0 :             length = std::get<1> (escape_length_pair) - 1;
    1799              :           else
    1800           31 :             length += std::get<1> (escape_length_pair);
    1801              : 
    1802           31 :           if (output_char != 0 || !std::get<2> (escape_length_pair))
    1803           31 :             str += output_char;
    1804              : 
    1805           31 :           current_column += length;
    1806              : 
    1807           31 :           continue;
    1808           31 :         }
    1809              : 
    1810          279 :       current_column++;
    1811          279 :       if (current_char.value == '\n')
    1812              :         {
    1813           23 :           current_line++;
    1814           23 :           current_column = 1;
    1815              :           // tell line_table that new line starts
    1816           23 :           start_line (current_line, max_column_hint);
    1817              :         }
    1818              : 
    1819          279 :       str += current_char;
    1820          279 :       skip_input ();
    1821          279 :       current_char = peek_input ();
    1822              :     }
    1823              : 
    1824           64 :   if (current_char == '"')
    1825              :     {
    1826           57 :       current_column++;
    1827              : 
    1828           57 :       skip_input ();
    1829           57 :       current_char = peek_input ();
    1830              :     }
    1831            7 :   else if (current_char.is_eof ())
    1832              :     {
    1833            7 :       rust_error_at (string_begin_locus, "unended byte string literal");
    1834            7 :       return Token::make (END_OF_FILE, get_current_location ());
    1835              :     }
    1836              :   else
    1837              :     {
    1838              :       rust_unreachable ();
    1839              :     }
    1840              : 
    1841           57 :   str.shrink_to_fit ();
    1842           57 :   loc += str.size () - 1;
    1843              : 
    1844           57 :   return Token::make_byte_string (loc, std::move (str));
    1845           64 : }
    1846              : 
    1847              : // Parses a raw byte string.
    1848              : TokenPtr
    1849           32 : Lexer::parse_raw_byte_string (location_t loc)
    1850              : {
    1851              :   // raw byte string literals
    1852           32 :   std::string str;
    1853           32 :   str.reserve (16); // some sensible default
    1854              : 
    1855           32 :   int length = 1;
    1856           32 :   int hash_count = 0;
    1857              : 
    1858           32 :   const location_t string_begin_locus = get_current_location ();
    1859              : 
    1860              :   // get hash count at beginnning
    1861           32 :   skip_input ();
    1862           32 :   current_char = peek_input ();
    1863           32 :   length++;
    1864           32 :   current_column++;
    1865           54 :   while (current_char == '#')
    1866              :     {
    1867           22 :       hash_count++;
    1868           22 :       length++;
    1869           22 :       current_column++;
    1870              : 
    1871           22 :       skip_input ();
    1872           22 :       current_char = peek_input ();
    1873              :     }
    1874              : 
    1875           32 :   if (current_char != '"')
    1876              :     {
    1877            0 :       rust_error_at (get_current_location (),
    1878              :                      "raw byte string has no opening %<\"%>");
    1879              :     }
    1880              : 
    1881           32 :   skip_input ();
    1882           32 :   current_char = peek_input ();
    1883           32 :   length++;
    1884           32 :   current_column++;
    1885              : 
    1886          330 :   while (true)
    1887              :     {
    1888          181 :       if (current_char == '"')
    1889              :         {
    1890           51 :           bool enough_hashes = true;
    1891              : 
    1892           51 :           for (int i = 0; i < hash_count; i++)
    1893              :             {
    1894           26 :               if (peek_input (i + 1) != '#')
    1895              :                 {
    1896              :                   enough_hashes = false;
    1897              :                   break;
    1898              :                 }
    1899              :             }
    1900              : 
    1901           35 :           if (enough_hashes)
    1902              :             {
    1903              :               // skip enough input and peek enough input
    1904           25 :               skip_input (hash_count);
    1905           25 :               current_char = peek_input ();
    1906           25 :               length += hash_count + 1;
    1907           25 :               current_column += hash_count + 1;
    1908           25 :               break;
    1909              :             }
    1910              :         }
    1911          146 :       else if (current_char.is_eof ())
    1912              :         {
    1913            7 :           rust_error_at (string_begin_locus, "unended raw byte string literal");
    1914            7 :           return Token::make (END_OF_FILE, get_current_location ());
    1915              :         }
    1916          139 :       else if (current_char.value > 127)
    1917              :         {
    1918            1 :           rust_error_at (get_current_location (),
    1919              :                          "character %qs in raw byte string out of range",
    1920            1 :                          current_char.as_string ().c_str ());
    1921            1 :           current_char = 0;
    1922              :         }
    1923              : 
    1924          149 :       length++;
    1925          149 :       current_column++;
    1926          149 :       if (current_char == '\n')
    1927              :         {
    1928           22 :           current_line++;
    1929           22 :           current_column = 1;
    1930           22 :           start_line (current_line, max_column_hint);
    1931              :         }
    1932              : 
    1933          149 :       str += current_char;
    1934          149 :       skip_input ();
    1935          149 :       current_char = peek_input ();
    1936          149 :     }
    1937              : 
    1938           25 :   loc += length - 1;
    1939              : 
    1940           25 :   str.shrink_to_fit ();
    1941              : 
    1942           25 :   return Token::make_byte_string (loc, std::move (str));
    1943           32 : }
    1944              : 
    1945              : // Parses a raw identifier.
    1946              : TokenPtr
    1947           81 : Lexer::parse_raw_identifier (location_t loc)
    1948              : {
    1949              :   // raw identifier
    1950           81 :   std::string str;
    1951           81 :   str.reserve (16); // default
    1952              : 
    1953           81 :   skip_input ();
    1954           81 :   current_char = peek_input ();
    1955              : 
    1956           81 :   current_column += 2;
    1957              : 
    1958           81 :   bool first_is_underscore = current_char == '_';
    1959              : 
    1960           81 :   int length = 0;
    1961           81 :   current_char = peek_input ();
    1962              :   // loop through entire name
    1963          475 :   while (is_identifier_continue (current_char.value))
    1964              :     {
    1965          313 :       length++;
    1966              : 
    1967          313 :       str += current_char;
    1968          313 :       skip_input ();
    1969          313 :       current_char = peek_input ();
    1970              :     }
    1971              : 
    1972           81 :   current_column += length;
    1973              : 
    1974           81 :   rust_debug ("raw ident: %s", str.c_str ());
    1975              : 
    1976              :   // if just a single underscore, not an identifier
    1977           81 :   if (first_is_underscore && length == 1)
    1978            1 :     rust_error_at (get_current_location (),
    1979              :                    "%<_%> is not a valid raw identifier");
    1980              : 
    1981           81 :   using namespace Rust::Values;
    1982           81 :   std::set<std::string> invalid{
    1983           81 :     Keywords::CRATE, Keywords::EXTERN_KW,  Keywords::SELF,
    1984           81 :     Keywords::SUPER, Keywords::SELF_ALIAS,
    1985          486 :   };
    1986              : 
    1987           81 :   if (invalid.find (str) != invalid.end ())
    1988              :     {
    1989            1 :       rust_error_at (get_current_location (),
    1990              :                      "%qs is a forbidden raw identifier", str.c_str ());
    1991              : 
    1992            1 :       return nullptr;
    1993              :     }
    1994              :   else
    1995              :     {
    1996           80 :       str.shrink_to_fit ();
    1997           80 :       loc += length - 1;
    1998              : 
    1999           80 :       return Token::make_identifier (loc, std::move (str));
    2000              :     }
    2001           81 : }
    2002              : 
    2003              : // skip broken string input (unterminated strings)
    2004              : void
    2005            0 : Lexer::skip_broken_string_input (Codepoint current_char)
    2006              : {
    2007            0 :   while (current_char != '"' && !current_char.is_eof ())
    2008              :     {
    2009            0 :       if (current_char == '\n')
    2010              :         {
    2011            0 :           current_line++;
    2012            0 :           current_column = 1;
    2013              :         }
    2014              :       else
    2015              :         {
    2016            0 :           current_column++;
    2017              :         }
    2018            0 :       skip_input ();
    2019            0 :       current_char = peek_input ();
    2020              :     }
    2021            0 :   if (current_char == '"')
    2022              :     {
    2023            0 :       current_column++;
    2024              : 
    2025            0 :       skip_input ();
    2026            0 :       current_char = peek_input ();
    2027              :     }
    2028            0 :   rust_debug ("skipped to %d:%d due to bad quotes", current_line,
    2029              :               current_column);
    2030            0 : }
    2031              : 
    2032              : // Parses a string.
    2033              : TokenPtr
    2034        12424 : Lexer::parse_string (location_t loc)
    2035              : {
    2036        12424 :   std::string str;
    2037        12424 :   str.reserve (16); // some sensible default
    2038              : 
    2039        12424 :   current_char = peek_input ();
    2040              : 
    2041        12424 :   const location_t string_begin_locus = get_current_location ();
    2042              : 
    2043              :   // FIXME: This fails if the input ends. How do we check for EOF?
    2044       100866 :   while (current_char.value != '"' && !current_char.is_eof ())
    2045              :     {
    2046        76018 :       if (current_char.value == '\\')
    2047              :         {
    2048         2779 :           int length = 1;
    2049              : 
    2050              :           // parse escape
    2051         2779 :           auto utf8_escape_pair = parse_utf8_escape ();
    2052         2779 :           current_char = std::get<0> (utf8_escape_pair);
    2053              : 
    2054         2779 :           if (current_char == Codepoint (0) && std::get<2> (utf8_escape_pair))
    2055           28 :             length = std::get<1> (utf8_escape_pair) - 1;
    2056              :           else
    2057         2751 :             length += std::get<1> (utf8_escape_pair);
    2058              : 
    2059         2779 :           if (current_char != Codepoint (0) || !std::get<2> (utf8_escape_pair))
    2060         5502 :             str += current_char.as_string ();
    2061              : 
    2062         2779 :           current_column += length;
    2063              : 
    2064              :           // FIXME: should remove this but can't.
    2065              :           // `parse_utf8_escape` does not update `current_char` correctly.
    2066         2779 :           current_char = peek_input ();
    2067         2779 :           continue;
    2068         2779 :         }
    2069              : 
    2070        73239 :       current_column++;
    2071        73239 :       if (current_char.value == '\n')
    2072              :         {
    2073           67 :           current_line++;
    2074           67 :           current_column = 1;
    2075              :           // tell line_table that new line starts
    2076           67 :           start_line (current_line, max_column_hint);
    2077              :         }
    2078              : 
    2079        73239 :       str += current_char;
    2080        73239 :       skip_input ();
    2081        73239 :       current_char = peek_input ();
    2082              :     }
    2083              : 
    2084        12424 :   if (current_char.value == '"')
    2085              :     {
    2086        12410 :       current_column++;
    2087              : 
    2088        12410 :       skip_input ();
    2089        12410 :       current_char = peek_input ();
    2090              :     }
    2091           14 :   else if (current_char.is_eof ())
    2092              :     {
    2093           14 :       rust_error_at (string_begin_locus, "unended string literal");
    2094           14 :       return Token::make (END_OF_FILE, get_current_location ());
    2095              :     }
    2096              :   else
    2097              :     {
    2098              :       rust_unreachable ();
    2099              :     }
    2100              : 
    2101        12410 :   str.shrink_to_fit ();
    2102              : 
    2103        12410 :   return Token::make_string (loc, std::move (str));
    2104        12424 : }
    2105              : 
    2106              : // Parses an identifier or keyword.
    2107              : TokenPtr
    2108       282362 : Lexer::parse_identifier_or_keyword (location_t loc)
    2109              : {
    2110       282362 :   std::string str;
    2111       282362 :   str.reserve (16); // default
    2112       564724 :   str += current_char.as_string ();
    2113              : 
    2114       282362 :   bool first_is_underscore = current_char == '_';
    2115              : 
    2116       282362 :   int length = 1;
    2117       282362 :   current_char = peek_input ();
    2118              : 
    2119              :   // loop through entire name
    2120      1427911 :   while (is_identifier_continue (current_char.value))
    2121              :     {
    2122       863187 :       auto s = current_char.as_string ();
    2123       863187 :       length++;
    2124              : 
    2125      1726374 :       str += current_char.as_string ();
    2126       863187 :       skip_input ();
    2127       863187 :       current_char = peek_input ();
    2128       863187 :     }
    2129              : 
    2130       282362 :   current_column += length;
    2131              : 
    2132              :   // if just a single underscore, not an identifier
    2133       282362 :   if (first_is_underscore && length == 1)
    2134         1292 :     return Token::make (UNDERSCORE, loc);
    2135              : 
    2136       281070 :   str.shrink_to_fit ();
    2137              : 
    2138       281070 :   loc += length - 1;
    2139              : 
    2140       281070 :   TokenId keyword = classify_keyword (str);
    2141       281070 :   if (keyword == IDENTIFIER)
    2142       187024 :     return Token::make_identifier (loc, std::move (str));
    2143              :   else
    2144        94046 :     return Token::make (keyword, loc);
    2145       282362 : }
    2146              : 
    2147              : // Possibly returns a raw string token if it exists - otherwise returns null.
    2148              : TokenPtr
    2149         3914 : Lexer::maybe_parse_raw_string (location_t loc)
    2150              : {
    2151         3914 :   int peek_index = 0;
    2152         3923 :   while (peek_input (peek_index) == '#')
    2153            9 :     peek_index++;
    2154              : 
    2155         3914 :   if (peek_input (peek_index) == '"')
    2156           25 :     return parse_raw_string (loc, peek_index);
    2157              :   else
    2158         3889 :     return nullptr;
    2159              : }
    2160              : 
    2161              : // Returns a raw string token.
    2162              : TokenPtr
    2163           25 : Lexer::parse_raw_string (location_t loc, int initial_hash_count)
    2164              : {
    2165              :   // raw string literals
    2166           25 :   std::string str;
    2167           25 :   str.reserve (16); // some sensible default
    2168              : 
    2169           25 :   int length = 1 + initial_hash_count;
    2170           25 :   current_column += length;
    2171              : 
    2172           25 :   const location_t string_begin_locus = get_current_location ();
    2173              : 
    2174           25 :   if (initial_hash_count > 0)
    2175            7 :     skip_input (initial_hash_count - 1);
    2176              : 
    2177           25 :   current_char = peek_input ();
    2178              : 
    2179           25 :   if (current_char != '"')
    2180            0 :     rust_error_at (get_current_location (), "raw string has no opening %<\"%>");
    2181              : 
    2182           25 :   length++;
    2183           25 :   current_column++;
    2184           25 :   skip_input ();
    2185           25 :   current_char = peek_input ();
    2186              : 
    2187          181 :   while (true)
    2188              :     {
    2189          103 :       if (current_char.value == '"')
    2190              :         {
    2191           38 :           bool enough_hashes = true;
    2192              : 
    2193           38 :           for (int i = 0; i < initial_hash_count; i++)
    2194              :             {
    2195           13 :               if (peek_input (i + 1) != '#')
    2196              :                 {
    2197              :                   enough_hashes = false;
    2198              :                   break;
    2199              :                 }
    2200              :             }
    2201              : 
    2202           28 :           if (enough_hashes)
    2203              :             {
    2204              :               // skip enough input and peek enough input
    2205           25 :               skip_input (initial_hash_count);
    2206           25 :               current_char = peek_input ();
    2207           25 :               length += initial_hash_count + 1;
    2208           25 :               current_column += initial_hash_count + 1;
    2209           25 :               break;
    2210              :             }
    2211              :         }
    2212           75 :       else if (current_char.is_eof ())
    2213              :         {
    2214            0 :           rust_error_at (string_begin_locus, "unended raw string literal");
    2215            0 :           return Token::make (END_OF_FILE, get_current_location ());
    2216              :         }
    2217              : 
    2218           78 :       length++;
    2219           78 :       current_column++;
    2220           78 :       if (current_char == '\n')
    2221              :         {
    2222            1 :           current_line++;
    2223            1 :           current_column = 1;
    2224            1 :           start_line (current_line, max_column_hint);
    2225              :         }
    2226              : 
    2227          156 :       str += current_char.as_string ();
    2228           78 :       skip_input ();
    2229           78 :       current_char = peek_input ();
    2230           78 :     }
    2231              : 
    2232           25 :   loc += length - 1;
    2233              : 
    2234           25 :   str.shrink_to_fit ();
    2235              : 
    2236           25 :   return Token::make_raw_string (loc, std::move (str));
    2237           25 : }
    2238              : 
    2239              : template <typename IsDigitFunc>
    2240              : TokenPtr
    2241          216 : Lexer::parse_non_decimal_int_literal (location_t loc, IsDigitFunc is_digit_func,
    2242              :                                       std::string existent_str, int base)
    2243              : {
    2244          216 :   int length = 1;
    2245              : 
    2246          216 :   skip_input ();
    2247          216 :   current_char = peek_input ();
    2248              : 
    2249          216 :   length++;
    2250              : 
    2251              :   // loop through to add entire number to string
    2252         1869 :   while (is_digit_func (current_char.value) || current_char == '_')
    2253              :     {
    2254         1653 :       if (current_char == '_')
    2255              :         {
    2256              :           // don't add _ to number
    2257           21 :           skip_input ();
    2258           21 :           current_char = peek_input ();
    2259              : 
    2260           21 :           length++;
    2261              : 
    2262           21 :           continue;
    2263              :         }
    2264              : 
    2265         1632 :       length++;
    2266              : 
    2267              :       // add raw numbers
    2268         1632 :       existent_str += current_char;
    2269         1632 :       skip_input ();
    2270         1632 :       current_char = peek_input ();
    2271              :     }
    2272              : 
    2273              :   // convert value to decimal representation
    2274          216 :   long dec_num = std::strtol (existent_str.c_str (), nullptr, base);
    2275              : 
    2276          216 :   existent_str = std::to_string (dec_num);
    2277              : 
    2278              :   // parse in type suffix if it exists
    2279          216 :   auto type_suffix_pair = parse_in_type_suffix ();
    2280          216 :   PrimitiveCoreType type_hint = type_suffix_pair.first;
    2281          216 :   length += type_suffix_pair.second;
    2282              : 
    2283          216 :   current_column += length;
    2284              : 
    2285          216 :   if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64)
    2286              :     {
    2287            0 :       rust_error_at (get_current_location (),
    2288              :                      "invalid type suffix %qs for integer (%s) literal",
    2289              :                      get_type_hint_string (type_hint),
    2290              :                      base == 16
    2291              :                        ? "hex"
    2292              :                        : (base == 8 ? "octal"
    2293              :                                     : (base == 2 ? "binary"
    2294              :                                                  : "<insert unknown base>")));
    2295            0 :       return nullptr;
    2296              :     }
    2297              : 
    2298          216 :   loc += length - 1;
    2299              : 
    2300          216 :   return Token::make_int (loc, std::move (existent_str), type_hint);
    2301              : }
    2302              : 
    2303              : // Parses a hex, binary or octal int literal.
    2304              : TokenPtr
    2305          216 : Lexer::parse_non_decimal_int_literals (location_t loc)
    2306              : {
    2307          216 :   std::string str;
    2308          216 :   str.reserve (16); // some sensible default
    2309          216 :   str += current_char;
    2310              : 
    2311          216 :   current_char = peek_input ();
    2312              : 
    2313          216 :   if (current_char == 'x')
    2314              :     {
    2315              :       // hex (integer only)
    2316          184 :       return parse_non_decimal_int_literal (loc, is_x_digit, str + "x", 16);
    2317              :     }
    2318           32 :   else if (current_char == 'o')
    2319              :     {
    2320              :       // octal (integer only)
    2321           32 :       return parse_non_decimal_int_literal (loc, is_octal_digit,
    2322           16 :                                             std::move (str), 8);
    2323              :     }
    2324           16 :   else if (current_char == 'b')
    2325              :     {
    2326              :       // binary (integer only)
    2327           32 :       return parse_non_decimal_int_literal (loc, is_bin_digit, std::move (str),
    2328           16 :                                             2);
    2329              :     }
    2330              :   else
    2331              :     {
    2332            0 :       return nullptr;
    2333              :     }
    2334          216 : }
    2335              : 
    2336              : // Parses a decimal-based int literal or float literal.
    2337              : TokenPtr
    2338        15451 : Lexer::parse_decimal_int_or_float (location_t loc)
    2339              : {
    2340        15451 :   std::string str;
    2341        15451 :   str.reserve (16); // some sensible default
    2342        15451 :   str += current_char;
    2343              : 
    2344        15451 :   int length = 1;
    2345        15451 :   bool first_zero = current_char == '0';
    2346              : 
    2347        15451 :   current_char = peek_input ();
    2348              : 
    2349              :   // parse initial decimal integer (or first integer part of float) literal
    2350        15451 :   auto initial_decimal = parse_in_decimal ();
    2351        15451 :   str += std::get<0> (initial_decimal);
    2352        15451 :   length += std::get<1> (initial_decimal);
    2353              : 
    2354              :   // detect float literal
    2355              :   //
    2356              :   // Note:
    2357              :   //
    2358              :   // We should not use is_float_digit () for this verification but instead
    2359              :   // directly ISDIGIT because rust does not support non digit values right after
    2360              :   // a dot.
    2361              :   // The following value is not legal in rust:
    2362              :   // let a = 3.e1;
    2363              :   // A `0` should be put between the dot and the exponent to be valid
    2364              :   // (eg. 3.0e1).
    2365        15451 :   if (current_char == '.' && ISDIGIT (peek_input (1).value))
    2366              :     {
    2367              :       // float with a '.', parse another decimal into it
    2368              : 
    2369              :       // add . to str
    2370          345 :       str += current_char;
    2371          345 :       skip_input ();
    2372          345 :       current_char = peek_input ();
    2373          345 :       length++;
    2374              : 
    2375              :       // parse another decimal number for float
    2376          345 :       auto second_decimal = parse_in_decimal ();
    2377          345 :       str += std::get<0> (second_decimal);
    2378          345 :       length += std::get<1> (second_decimal);
    2379              : 
    2380              :       // parse in exponent part if it exists
    2381          345 :       auto exponent_pair = parse_in_exponent_part ();
    2382          345 :       str += exponent_pair.first;
    2383          345 :       length += exponent_pair.second;
    2384              : 
    2385              :       // parse in type suffix if it exists
    2386          345 :       auto type_suffix_pair = parse_in_type_suffix ();
    2387          345 :       PrimitiveCoreType type_hint = type_suffix_pair.first;
    2388          345 :       length += type_suffix_pair.second;
    2389              : 
    2390          345 :       if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
    2391          345 :           && type_hint != CORETYPE_UNKNOWN)
    2392              :         {
    2393            0 :           rust_error_at (get_current_location (),
    2394              :                          "invalid type suffix %qs for floating-point literal",
    2395              :                          get_type_hint_string (type_hint));
    2396              :           // ignore invalid type suffix as everything else seems fine
    2397            0 :           type_hint = CORETYPE_UNKNOWN;
    2398              :         }
    2399              : 
    2400          345 :       current_column += length;
    2401              : 
    2402          345 :       loc += length - 1;
    2403              : 
    2404          345 :       str.shrink_to_fit ();
    2405          345 :       return Token::make_float (loc, std::move (str), type_hint);
    2406          345 :     }
    2407        15106 :   else if (current_char == '.'
    2408        15106 :            && check_valid_float_dot_end (peek_input (1).value))
    2409              :     {
    2410              :       // float that is just an integer with a terminating '.' character
    2411              : 
    2412              :       // add . to str
    2413            8 :       str += current_char;
    2414            8 :       skip_input ();
    2415            8 :       current_char = peek_input ();
    2416            8 :       length++;
    2417              : 
    2418              :       // type hint not allowed
    2419              : 
    2420            8 :       current_column += length;
    2421              : 
    2422            8 :       loc += length - 1;
    2423              : 
    2424            8 :       str.shrink_to_fit ();
    2425            8 :       return Token::make_float (loc, std::move (str), CORETYPE_UNKNOWN);
    2426              :     }
    2427        15098 :   else if (current_char == 'E' || current_char == 'e')
    2428              :     {
    2429              :       // exponent float with no '.' character
    2430              : 
    2431              :       // parse exponent part
    2432            0 :       auto exponent_pair = parse_in_exponent_part ();
    2433            0 :       str += exponent_pair.first;
    2434            0 :       length += exponent_pair.second;
    2435              : 
    2436              :       // parse in type suffix if it exists
    2437            0 :       auto type_suffix_pair = parse_in_type_suffix ();
    2438            0 :       PrimitiveCoreType type_hint = type_suffix_pair.first;
    2439            0 :       length += type_suffix_pair.second;
    2440              : 
    2441            0 :       if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64
    2442            0 :           && type_hint != CORETYPE_UNKNOWN)
    2443              :         {
    2444            0 :           rust_error_at (get_current_location (),
    2445              :                          "invalid type suffix %qs for floating-point literal",
    2446              :                          get_type_hint_string (type_hint));
    2447              :           // ignore invalid type suffix as everything else seems fine
    2448            0 :           type_hint = CORETYPE_UNKNOWN;
    2449              :         }
    2450              : 
    2451            0 :       current_column += length;
    2452              : 
    2453            0 :       loc += length - 1;
    2454              : 
    2455            0 :       str.shrink_to_fit ();
    2456            0 :       return Token::make_float (loc, std::move (str), type_hint);
    2457            0 :     }
    2458              :   else
    2459              :     {
    2460              :       // is an integer
    2461              : 
    2462              :       // parse in type suffix if it exists
    2463        15098 :       auto type_suffix_pair = parse_in_type_suffix ();
    2464        15098 :       PrimitiveCoreType type_hint = type_suffix_pair.first;
    2465              :       /* A "real" pure decimal doesn't have a suffix and no zero prefix.  */
    2466        15098 :       if (type_hint == CORETYPE_UNKNOWN)
    2467              :         {
    2468        13979 :           bool pure_decimal = std::get<2> (initial_decimal);
    2469        17151 :           if (pure_decimal && (!first_zero || str.size () == 1))
    2470              :             type_hint = CORETYPE_PURE_DECIMAL;
    2471              :         }
    2472        15098 :       length += type_suffix_pair.second;
    2473              : 
    2474        15098 :       current_column += length;
    2475              : 
    2476        15098 :       loc += length - 1;
    2477              : 
    2478        15098 :       str.shrink_to_fit ();
    2479        15098 :       return Token::make_int (loc, std::move (str), type_hint);
    2480              :     }
    2481        15451 : }
    2482              : 
    2483              : TokenPtr
    2484          843 : Lexer::parse_char_or_lifetime (location_t loc)
    2485              : {
    2486          843 :   int length = 1;
    2487              : 
    2488          843 :   current_char = peek_input ();
    2489          843 :   if (current_char.is_eof ())
    2490            0 :     return nullptr;
    2491              : 
    2492              :   // parse escaped char literal
    2493          843 :   if (current_char.value == '\\')
    2494              :     {
    2495              :       // parse escape
    2496           23 :       auto utf8_escape_pair = parse_utf8_escape ();
    2497           23 :       Codepoint escaped_char = std::get<0> (utf8_escape_pair);
    2498           23 :       length += std::get<1> (utf8_escape_pair);
    2499              : 
    2500           23 :       if (peek_input ().value != '\'')
    2501              :         {
    2502            0 :           rust_error_at (get_current_location (), "unended character literal");
    2503              :         }
    2504              :       else
    2505              :         {
    2506           23 :           skip_input ();
    2507           23 :           current_char = peek_input ();
    2508           23 :           length++;
    2509              :         }
    2510              : 
    2511           23 :       current_column += length;
    2512              : 
    2513           23 :       loc += length - 1;
    2514              : 
    2515           23 :       return Token::make_char (loc, escaped_char);
    2516              :     }
    2517              :   else
    2518              :     {
    2519          820 :       skip_input ();
    2520              : 
    2521          820 :       if (peek_input ().value == '\'')
    2522              :         {
    2523              :           // parse non-escaped char literal
    2524          203 :           Codepoint non_escaped_char = current_char;
    2525              : 
    2526              :           // skip the ' character
    2527          203 :           skip_input ();
    2528          203 :           current_char = peek_input ();
    2529              : 
    2530              :           // TODO fix due to different widths of utf-8 chars?
    2531          203 :           current_column += 3;
    2532              : 
    2533          203 :           loc += 2;
    2534              : 
    2535          203 :           return Token::make_char (loc, non_escaped_char);
    2536              :         }
    2537          617 :       else if (is_identifier_start (current_char.value))
    2538              :         {
    2539              :           // parse lifetime name
    2540          617 :           std::string str;
    2541         1234 :           str += current_char.as_string ();
    2542          617 :           length++;
    2543              : 
    2544          617 :           current_char = peek_input ();
    2545         1941 :           while (is_identifier_continue (current_char.value))
    2546              :             {
    2547         1414 :               str += current_char.as_string ();
    2548          707 :               skip_input ();
    2549          707 :               current_char = peek_input ();
    2550          707 :               length++;
    2551              :             }
    2552              : 
    2553          617 :           current_column += length;
    2554              : 
    2555          617 :           loc += length - 1;
    2556              : 
    2557              :           // TODO some keywords cannot be used for a lifetime label #2306
    2558              :           // https://doc.rust-lang.org/reference/tokens.html
    2559              : 
    2560          617 :           str.shrink_to_fit ();
    2561          617 :           return Token::make_lifetime (loc, std::move (str));
    2562          617 :         }
    2563              :       else
    2564              :         {
    2565            0 :           rust_error_at (
    2566              :             get_current_location (),
    2567              :             "expected %' after character constant in character literal");
    2568            0 :           return nullptr;
    2569              :         }
    2570              :     }
    2571              : }
    2572              : 
    2573              : void
    2574          100 : Lexer::split_current_token (TokenId new_left, TokenId new_right)
    2575              : {
    2576              :   /* TODO: assert that this TokenId is a "simple token" like punctuation and not
    2577              :    * like "IDENTIFIER"? */
    2578          100 :   location_t current_loc = peek_token ()->get_locus ();
    2579          100 :   TokenPtr new_left_tok = Token::make (new_left, current_loc);
    2580          100 :   TokenPtr new_right_tok = Token::make (new_right, current_loc + 1);
    2581              : 
    2582          100 :   token_queue.replace_current_value (std::move (new_left_tok));
    2583          100 :   token_queue.insert (1, std::move (new_right_tok));
    2584          100 : }
    2585              : 
    2586              : void
    2587            2 : Lexer::split_current_token (std::vector<TokenPtr> new_tokens)
    2588              : {
    2589            2 :   rust_assert (new_tokens.size () > 0);
    2590            4 :   token_queue.replace_current_value (new_tokens[0]);
    2591              : 
    2592            5 :   for (size_t i = 1; i < new_tokens.size (); i++)
    2593              :     {
    2594            6 :       token_queue.insert (i, new_tokens[i]);
    2595              :     }
    2596            2 : }
    2597              : 
    2598              : void
    2599       169123 : Lexer::start_line (int current_line, int current_column)
    2600              : {
    2601       169123 :   if (line_map)
    2602       169123 :     linemap_line_start (line_table, current_line, current_column);
    2603       169123 : }
    2604              : 
    2605              : } // namespace Rust
    2606              : 
    2607              : #if CHECKING_P
    2608              : 
    2609              : namespace selftest {
    2610              : 
    2611              : // Checks if `src` has the same contents as the given characters
    2612              : static void
    2613            6 : assert_source_content (Rust::InputSource &src,
    2614              :                        const std::vector<uint32_t> &expected)
    2615              : {
    2616            6 :   Rust::Codepoint src_char = src.next ();
    2617           41 :   for (auto expected_char : expected)
    2618              :     {
    2619              :       // Make sure that `src` is not shorter than `expected`
    2620           35 :       ASSERT_FALSE (src_char.is_eof ());
    2621              :       // Checks skipped character is expeceted one.
    2622           35 :       ASSERT_EQ (src_char.value, expected_char);
    2623           35 :       src_char = src.next ();
    2624              :     }
    2625              :   // Checks if `src` and `chars` has the same length.
    2626            6 :   ASSERT_TRUE (src_char.is_eof ());
    2627            6 : }
    2628              : 
    2629              : static void
    2630            4 : test_buffer_input_source (std::string str,
    2631              :                           const std::vector<uint32_t> &expected)
    2632              : {
    2633            4 :   Rust::BufferInputSource source (str, 0);
    2634            4 :   assert_source_content (source, expected);
    2635            4 : }
    2636              : 
    2637              : static void
    2638            2 : test_file_input_source (std::string str, const std::vector<uint32_t> &expected)
    2639              : {
    2640            2 :   FILE *tmpf = tmpfile ();
    2641              :   // Moves to the first character
    2642            2 :   fputs (str.c_str (), tmpf);
    2643            2 :   std::rewind (tmpf);
    2644            2 :   Rust::FileInputSource source (tmpf);
    2645            2 :   assert_source_content (source, expected);
    2646            2 : }
    2647              : 
    2648              : void
    2649            1 : rust_input_source_test ()
    2650              : {
    2651              :   // ASCII
    2652            1 :   std::string src = (const char *) u8"_abcde\tXYZ\v\f";
    2653            1 :   std::vector<uint32_t> expected = {u'_',  u'a', u'b', u'c', u'd',  u'e',
    2654            1 :                                     u'\t', u'X', u'Y', u'Z', u'\v', u'\f'};
    2655            2 :   test_buffer_input_source (src, expected);
    2656              : 
    2657              :   // BOM
    2658            1 :   src = (const char *) u8"\xef\xbb\xbfOK";
    2659            1 :   expected = {u'O', u'K'};
    2660            2 :   test_buffer_input_source (src, expected);
    2661              : 
    2662              :   // Russian
    2663            1 :   src = (const char *) u8"приве́т";
    2664            1 :   expected = {u'п',
    2665              :               u'р',
    2666              :               u'и',
    2667              :               u'в',
    2668              :               0x0435 /* CYRILLIC SMALL LETTER IE е */,
    2669              :               0x301 /* COMBINING ACUTE ACCENT ́ */,
    2670            1 :               u'т'};
    2671            2 :   test_buffer_input_source (src, expected);
    2672              : 
    2673            1 :   src = (const char *) u8"❤️🦀";
    2674            1 :   expected = {0x2764 /* HEAVY BLACK HEART */,
    2675            1 :               0xfe0f /* VARIATION SELECTOR-16 */, U'🦀'};
    2676            2 :   test_buffer_input_source (src, expected);
    2677              : 
    2678            1 :   src = (const char *) u8"こんにちは";
    2679            1 :   expected = {u'こ', u'ん', u'に', u'ち', u'は'};
    2680            2 :   test_file_input_source (src, expected);
    2681              : 
    2682            1 :   src = (const char *) u8"👮‍♂👩‍⚕";
    2683            1 :   expected
    2684              :     = {0x1f46e /* POLICE OFFICER */,   0x200d /* ZERO WIDTH JOINER */,
    2685              :        0x2642 /* MALE SIGN */,         0x1f469 /* WOMAN */,
    2686            1 :        0x200d /* ZERO WIDTH JOINER */, 0x2695 /* STAFF OF AESCULAPIUS */};
    2687            2 :   test_file_input_source (src, expected);
    2688            1 : }
    2689              : 
    2690              : } // namespace selftest
    2691              : 
    2692              : #endif // CHECKING_P
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.