LCOV - code coverage report
Current view: top level - gcc/go/gofrontend - lex.h (source / functions) Coverage Total Hit
Test: gcc.info Lines: 96.5 % 115 111
Test Date: 2026-02-28 14:20:25 Functions: 94.7 % 19 18
Legend: Lines:     hit not hit

            Line data    Source code
       1              : // lex.h -- Go frontend lexer.     -*- C++ -*-
       2              : 
       3              : // Copyright 2009 The Go Authors. All rights reserved.
       4              : // Use of this source code is governed by a BSD-style
       5              : // license that can be found in the LICENSE file.
       6              : 
       7              : #ifndef GO_LEX_H
       8              : #define GO_LEX_H
       9              : 
      10              : #include <mpfr.h>
      11              : 
      12              : #include "operator.h"
      13              : #include "go-linemap.h"
      14              : 
      15              : #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
      16              : # define GO_ATTRIBUTE_UNUSED __attribute__ ((__unused__))
      17              : #else
      18              : # define GO_ATTRIBUTE_UNUSED
      19              : #endif
      20              : 
      21              : struct Unicode_range;
      22              : 
      23              : // The keywords.  These must be in sorted order, other than
      24              : // KEYWORD_INVALID.  They must match the Keywords::mapping_ array in
      25              : // lex.cc.
      26              : 
      27              : enum Keyword
      28              : {
      29              :   KEYWORD_INVALID,      // Not a keyword.
      30              :   KEYWORD_ASM,
      31              :   KEYWORD_BREAK,
      32              :   KEYWORD_CASE,
      33              :   KEYWORD_CHAN,
      34              :   KEYWORD_CONST,
      35              :   KEYWORD_CONTINUE,
      36              :   KEYWORD_DEFAULT,
      37              :   KEYWORD_DEFER,
      38              :   KEYWORD_ELSE,
      39              :   KEYWORD_FALLTHROUGH,
      40              :   KEYWORD_FOR,
      41              :   KEYWORD_FUNC,
      42              :   KEYWORD_GO,
      43              :   KEYWORD_GOTO,
      44              :   KEYWORD_IF,
      45              :   KEYWORD_IMPORT,
      46              :   KEYWORD_INTERFACE,
      47              :   KEYWORD_MAP,
      48              :   KEYWORD_PACKAGE,
      49              :   KEYWORD_RANGE,
      50              :   KEYWORD_RETURN,
      51              :   KEYWORD_SELECT,
      52              :   KEYWORD_STRUCT,
      53              :   KEYWORD_SWITCH,
      54              :   KEYWORD_TYPE,
      55              :   KEYWORD_VAR
      56              : };
      57              : 
      58              : // Pragmas built from magic comments and recorded for functions.
      59              : // These are used as bits in a bitmask.
      60              : // The set of values is intended to be the same as the gc compiler.
      61              : 
      62              : enum GoPragma
      63              : {
      64              :   GOPRAGMA_NOINTERFACE = 1 << 0,  // Method not in type descriptor.
      65              :   GOPRAGMA_NOESCAPE = 1 << 1,             // Args do not escape.
      66              :   GOPRAGMA_NORACE = 1 << 2,               // No race detector.
      67              :   GOPRAGMA_NOSPLIT = 1 << 3,              // Do not split stack.
      68              :   GOPRAGMA_NOINLINE = 1 << 4,             // Do not inline.
      69              :   GOPRAGMA_SYSTEMSTACK = 1 << 5,  // Must run on system stack.
      70              :   GOPRAGMA_NOWRITEBARRIER = 1 << 6,       // No write barriers.
      71              :   GOPRAGMA_NOWRITEBARRIERREC = 1 << 7,    // No write barriers here or callees.
      72              :   GOPRAGMA_YESWRITEBARRIERREC = 1 << 8,   // Stops nowritebarrierrec.
      73              :   GOPRAGMA_MARK = 1 << 9,         // Marker for nowritebarrierrec.
      74              :   GOPRAGMA_CGOUNSAFEARGS = 1 << 10,       // Pointer to arg is pointer to all.
      75              :   GOPRAGMA_UINTPTRESCAPES = 1 << 11,      // uintptr(p) escapes.
      76              :   GOPRAGMA_NOTINHEAP = 1 << 12            // type is not in heap.
      77              : };
      78              : 
      79              : // A token returned from the lexer.
      80              : 
      81              : class Token
      82              : {
      83              :  public:
      84              :   // Token classification.
      85              :   enum Classification
      86              :   {
      87              :     // Token is invalid.
      88              :     TOKEN_INVALID,
      89              :     // Token indicates end of input.
      90              :     TOKEN_EOF,
      91              :     // Token is a keyword.
      92              :     TOKEN_KEYWORD,
      93              :     // Token is an identifier.
      94              :     TOKEN_IDENTIFIER,
      95              :     // Token is a string of characters.
      96              :     TOKEN_STRING,
      97              :     // Token is an operator.
      98              :     TOKEN_OPERATOR,
      99              :     // Token is a character constant.
     100              :     TOKEN_CHARACTER,
     101              :     // Token is an integer.
     102              :     TOKEN_INTEGER,
     103              :     // Token is a floating point number.
     104              :     TOKEN_FLOAT,
     105              :     // Token is an imaginary number.
     106              :     TOKEN_IMAGINARY
     107              :   };
     108              : 
     109              :   ~Token();
     110              :   Token(const Token&);
     111              :   Token& operator=(const Token&);
     112              : 
     113              :   // Get token classification.
     114              :   Classification
     115     15074471 :   classification() const
     116     15074471 :   { return this->classification_; }
     117              : 
     118              :   // Make a token for an invalid value.
     119              :   static Token
     120        25414 :   make_invalid_token(Location location)
     121        25414 :   { return Token(TOKEN_INVALID, location); }
     122              : 
     123              :   // Make a token representing end of file.
     124              :   static Token
     125        12709 :   make_eof_token(Location location)
     126        12709 :   { return Token(TOKEN_EOF, location); }
     127              : 
     128              :   // Make a keyword token.
     129              :   static Token
     130      1109846 :   make_keyword_token(Keyword keyword, Location location)
     131              :   {
     132      1109846 :     Token tok(TOKEN_KEYWORD, location);
     133      1109846 :     tok.u_.keyword = keyword;
     134      1109846 :     return tok;
     135              :   }
     136              : 
     137              :   // Make an identifier token.
     138              :   static Token
     139      7643127 :   make_identifier_token(const std::string& value, bool is_exported,
     140              :                         Location location)
     141              :   {
     142      7643127 :     Token tok(TOKEN_IDENTIFIER, location);
     143      7643127 :     tok.u_.identifier_value.name = new std::string(value);
     144      7643127 :     tok.u_.identifier_value.is_exported = is_exported;
     145      7643127 :     return tok;
     146              :   }
     147              : 
     148              :   // Make a quoted string token.
     149              :   static Token
     150       460495 :   make_string_token(const std::string& value, Location location)
     151              :   {
     152       460495 :     Token tok(TOKEN_STRING, location);
     153       460495 :     tok.u_.string_value = new std::string(value);
     154       460495 :     return tok;
     155              :   }
     156              : 
     157              :   // Make an operator token.
     158              :   static Token
     159     11184438 :   make_operator_token(Operator op, Location location)
     160              :   {
     161     11184438 :     Token tok(TOKEN_OPERATOR, location);
     162     11184438 :     tok.u_.op = op;
     163     11184438 :     return tok;
     164              :   }
     165              : 
     166              :   // Make a character constant token.
     167              :   static Token
     168        49036 :   make_character_token(mpz_t val, Location location)
     169              :   {
     170        49036 :     Token tok(TOKEN_CHARACTER, location);
     171        49036 :     mpz_init(tok.u_.integer_value);
     172        49036 :     mpz_swap(tok.u_.integer_value, val);
     173        49036 :     return tok;
     174              :   }
     175              : 
     176              :   // Make an integer token.
     177              :   static Token
     178      1261667 :   make_integer_token(mpz_t val, Location location)
     179              :   {
     180      1261667 :     Token tok(TOKEN_INTEGER, location);
     181      1261667 :     mpz_init(tok.u_.integer_value);
     182      1261667 :     mpz_swap(tok.u_.integer_value, val);
     183      1261667 :     return tok;
     184              :   }
     185              : 
     186              :   // Make a float token.
     187              :   static Token
     188        18714 :   make_float_token(mpfr_t val, Location location)
     189              :   {
     190        18714 :     Token tok(TOKEN_FLOAT, location);
     191        18714 :     mpfr_init(tok.u_.float_value);
     192        18714 :     mpfr_swap(tok.u_.float_value, val);
     193        18714 :     return tok;
     194              :   }
     195              : 
     196              :   // Make a token for an imaginary number.
     197              :   static Token
     198          993 :   make_imaginary_token(mpfr_t val, Location location)
     199              :   {
     200          993 :     Token tok(TOKEN_IMAGINARY, location);
     201          993 :     mpfr_init(tok.u_.float_value);
     202          993 :     mpfr_swap(tok.u_.float_value, val);
     203          993 :     return tok;
     204              :   }
     205              : 
     206              :   // Get the location of the token.
     207              :   Location
     208     19573459 :   location() const
     209     10245594 :   { return this->location_; }
     210              : 
     211              :   // Return whether this is an invalid token.
     212              :   bool
     213     57866163 :   is_invalid() const
     214     57866163 :   { return this->classification_ == TOKEN_INVALID; }
     215              : 
     216              :   // Return whether this is the EOF token.
     217              :   bool
     218       480763 :   is_eof() const
     219       480763 :   { return this->classification_ == TOKEN_EOF; }
     220              : 
     221              :   // Return the keyword value for a keyword token.
     222              :   Keyword
     223      1322537 :   keyword() const
     224              :   {
     225      1322537 :     go_assert(this->classification_ == TOKEN_KEYWORD);
     226      1322537 :     return this->u_.keyword;
     227              :   }
     228              : 
     229              :   // Return whether this is an identifier.
     230              :   bool
     231      6099770 :   is_identifier() const
     232      6099770 :   { return this->classification_ == TOKEN_IDENTIFIER; }
     233              : 
     234              :   // Return the identifier.
     235              :   const std::string&
     236      8343249 :   identifier() const
     237              :   {
     238      8343249 :     go_assert(this->classification_ == TOKEN_IDENTIFIER);
     239      8343249 :     return *this->u_.identifier_value.name;
     240              :   }
     241              : 
     242              :   // Return whether the identifier is exported.
     243              :   bool
     244      7544917 :   is_identifier_exported() const
     245              :   {
     246      7544917 :     go_assert(this->classification_ == TOKEN_IDENTIFIER);
     247      7544917 :     return this->u_.identifier_value.is_exported;
     248              :   }
     249              : 
     250              :   // Return whether this is a string.
     251              :   bool
     252       128106 :   is_string() const
     253              :   {
     254       128106 :     return this->classification_ == TOKEN_STRING;
     255              :   }
     256              : 
     257              :   // Return the value of a string.  The returned value is a string of
     258              :   // UTF-8 characters.
     259              :   std::string
     260       460489 :   string_value() const
     261              :   {
     262       460489 :     go_assert(this->classification_ == TOKEN_STRING);
     263       460489 :     return *this->u_.string_value;
     264              :   }
     265              : 
     266              :   // Return the value of a character constant.
     267              :   const mpz_t*
     268        49035 :   character_value() const
     269              :   {
     270        49035 :     go_assert(this->classification_ == TOKEN_CHARACTER);
     271        49035 :     return &this->u_.integer_value;
     272              :   }
     273              : 
     274              :   // Return the value of an integer.
     275              :   const mpz_t*
     276      1261659 :   integer_value() const
     277              :   {
     278      1261659 :     go_assert(this->classification_ == TOKEN_INTEGER);
     279      1261659 :     return &this->u_.integer_value;
     280              :   }
     281              : 
     282              :   // Return the value of a float.
     283              :   const mpfr_t*
     284        18712 :   float_value() const
     285              :   {
     286        18712 :     go_assert(this->classification_ == TOKEN_FLOAT);
     287        18712 :     return &this->u_.float_value;
     288              :   }
     289              : 
     290              :   // Return the value of an imaginary number.
     291              :   const mpfr_t*
     292          992 :   imaginary_value() const
     293              :   {
     294          992 :     go_assert(this->classification_ == TOKEN_IMAGINARY);
     295          992 :     return &this->u_.float_value;
     296              :   }
     297              : 
     298              :   // Return the operator value for an operator token.
     299              :   Operator
     300      7244881 :   op() const
     301              :   {
     302      7244881 :     go_assert(this->classification_ == TOKEN_OPERATOR);
     303      7244881 :     return this->u_.op;
     304              :   }
     305              : 
     306              :   // Return whether this token is KEYWORD.
     307              :   bool
     308      5581106 :   is_keyword(Keyword keyword) const
     309              :   {
     310      5581106 :     return (this->classification_ == TOKEN_KEYWORD
     311      3167120 :             && this->u_.keyword == keyword);
     312              :   }
     313              : 
     314              :   // Return whether this token is OP.
     315              :   bool
     316     97505805 :   is_op(Operator op) const
     317     38831364 :   { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
     318              : 
     319              :   // Print the token for debugging.
     320              :   void
     321              :   print(FILE*) const;
     322              : 
     323              :  private:
     324              :   // Private constructor used by make_..._token functions above.
     325              :   Token(Classification, Location);
     326              : 
     327              :   // Clear the token.
     328              :   void
     329              :   clear();
     330              : 
     331              :   // The token classification.
     332              :   Classification classification_;
     333              :   union
     334              :   {
     335              :     // The keyword value for TOKEN_KEYWORD.
     336              :     Keyword keyword;
     337              :     // The token value for TOKEN_IDENTIFIER.
     338              :     struct
     339              :     {
     340              :       // The name of the identifier.  This has been mangled to only
     341              :       // include ASCII characters.
     342              :       std::string* name;
     343              :       // Whether this name should be exported.  This is true if the
     344              :       // first letter in the name is upper case.
     345              :       bool is_exported;
     346              :     } identifier_value;
     347              :     // The string value for TOKEN_STRING.
     348              :     std::string* string_value;
     349              :     // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
     350              :     mpz_t integer_value;
     351              :     // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
     352              :     mpfr_t float_value;
     353              :     // The token value for TOKEN_OPERATOR or the keyword value
     354              :     Operator op;
     355              :   } u_;
     356              :   // The source location.
     357              :   Location location_;
     358              : };
     359              : 
     360              : // The lexer itself.
     361              : 
     362              : class Lex
     363              : {
     364              :  public:
     365              :   Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
     366              : 
     367              :   ~Lex();
     368              : 
     369              :   // Return the next token.
     370              :   Token
     371              :   next_token();
     372              : 
     373              :   // Return the contents of any current //extern comment.
     374              :   const std::string&
     375              :   extern_name() const
     376       128964 :   { return this->extern_; }
     377              : 
     378              :   // Return the current set of pragmas, and clear them.
     379              :   unsigned int
     380       619615 :   get_and_clear_pragmas()
     381              :   {
     382       619615 :     unsigned int ret = this->pragmas_;
     383       619615 :     this->pragmas_ = 0;
     384       619615 :     return ret;
     385              :   }
     386              : 
     387         7833 :   struct Linkname
     388              :   {
     389              :     std::string ext_name;       // External name; empty to just export.
     390              :     bool is_exported;           // Whether the internal name is exported.
     391              :     Location loc;               // Location of go:linkname directive.
     392              : 
     393         3913 :     Linkname()
     394         3913 :       : ext_name(), is_exported(false), loc()
     395              :     { }
     396              : 
     397         3920 :     Linkname(const std::string& ext_name_a, bool is_exported_a, Location loc_a)
     398         7840 :       : ext_name(ext_name_a), is_exported(is_exported_a), loc(loc_a)
     399              :     { }
     400              :   };
     401              : 
     402              :   typedef std::map<std::string, Linkname> Linknames;
     403              : 
     404              :   // Return the linknames seen so far, or NULL if none, and clear the
     405              :   // set.  These are from go:linkname compiler directives.
     406              :   Linknames*
     407        12707 :   get_and_clear_linknames()
     408              :   {
     409        12707 :     Linknames* ret = this->linknames_;
     410        12707 :     this->linknames_ = NULL;
     411        12707 :     return ret;
     412              :   }
     413              : 
     414              :   // Return whether there are any current go:embed patterns.
     415              :   bool
     416       539145 :   has_embeds() const
     417       539145 :   { return !this->embeds_.empty(); }
     418              : 
     419              :   // If there are any go:embed patterns seen so far, store them in
     420              :   // *EMBEDS and clear the saved set.  *EMBEDS must be an empty
     421              :   // vector.
     422              :   void
     423           22 :   get_and_clear_embeds(std::vector<std::string>* embeds)
     424              :   {
     425           22 :     go_assert(embeds->empty());
     426           22 :     std::swap(*embeds, this->embeds_);
     427           22 :   }
     428              : 
     429              :   // Clear any go:embed patterns seen so far.  This is used for
     430              :   // erroneous cases.
     431              :   void
     432            0 :   clear_embeds()
     433            0 :   { this->embeds_.clear(); }
     434              : 
     435              :   // Return whether the identifier NAME should be exported.  NAME is a
     436              :   // mangled name which includes only ASCII characters.
     437              :   static bool
     438              :   is_exported_mangled_name(const std::string& name);
     439              : 
     440              :   // Return whether the identifier NAME should be exported.  NAME is
     441              :   // an unmangled utf-8 string and may contain non-ASCII characters.
     442              :   static bool
     443              :   is_exported_name(const std::string& name);
     444              : 
     445              :   // Return whether the identifier NAME is invalid.  When we see an
     446              :   // invalid character we still build an identifier, but we use a
     447              :   // magic string to indicate that the identifier is invalid.  We then
     448              :   // use this to avoid knockon errors.
     449              :   static bool
     450              :   is_invalid_identifier(const std::string& name);
     451              : 
     452              :   // A helper function.  Append V to STR.  IS_CHARACTER is true if V
     453              :   // is a Unicode character which should be converted into UTF-8,
     454              :   // false if it is a byte value to be appended directly.  The
     455              :   // location is used to warn about an out of range character.
     456              :   static void
     457              :   append_char(unsigned int v, bool is_charater, std::string* str,
     458              :               Location);
     459              : 
     460              :   // A helper function.  Fetch a UTF-8 character from STR and store it
     461              :   // in *VALUE.  Return the number of bytes read from STR.  Return 0
     462              :   // if STR does not point to a valid UTF-8 character.
     463              :   static int
     464              :   fetch_char(const char* str, unsigned int *value);
     465              : 
     466              :   // Return whether C is a Unicode or "C" locale space character.
     467              :   static bool
     468              :   is_unicode_space(unsigned int c);
     469              : 
     470              :   // Convert the specified hex char into an unsigned integer value.
     471              :   static unsigned
     472              :   hex_val(char c);
     473              : 
     474              :  private:
     475              :   ssize_t
     476              :   get_line();
     477              : 
     478              :   bool
     479              :   require_line();
     480              : 
     481              :   // The current location.
     482              :   Location
     483              :   location() const;
     484              : 
     485              :   // A position CHARS column positions before the current location.
     486              :   Location
     487              :   earlier_location(int chars) const;
     488              : 
     489              :   static bool
     490              :   is_hex_digit(char);
     491              : 
     492              :   static bool
     493              :   is_base_digit(int base, char);
     494              : 
     495              :   static unsigned char
     496          122 :   octal_value(char c)
     497          122 :   { return c - '0'; }
     498              : 
     499              :   Token
     500            0 :   make_invalid_token()
     501            0 :   { return Token::make_invalid_token(this->location()); }
     502              : 
     503              :   Token
     504        12709 :   make_eof_token()
     505        12709 :   { return Token::make_eof_token(this->location()); }
     506              : 
     507              :   Token
     508     10941373 :   make_operator(Operator op, int chars)
     509     10941373 :   { return Token::make_operator_token(op, this->earlier_location(chars)); }
     510              : 
     511              :   Token
     512              :   gather_identifier();
     513              : 
     514              :   static bool
     515              :   could_be_exponent(int base, const char*, const char*);
     516              : 
     517              :   Token
     518              :   gather_number();
     519              : 
     520              :   void
     521              :   skip_exponent();
     522              : 
     523              :   Token
     524              :   gather_character();
     525              : 
     526              :   Token
     527              :   gather_string();
     528              : 
     529              :   Token
     530              :   gather_raw_string();
     531              : 
     532              :   const char*
     533              :   advance_one_utf8_char(const char*, unsigned int*, bool*);
     534              : 
     535              :   const char*
     536              :   advance_one_char(const char*, bool, unsigned int*, bool*);
     537              : 
     538              :   static bool
     539              :   is_unicode_digit(unsigned int c);
     540              : 
     541              :   static bool
     542              :   is_unicode_letter(unsigned int c);
     543              : 
     544              :   static bool
     545              :   is_unicode_uppercase(unsigned int c);
     546              : 
     547              :   static bool
     548              :   is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
     549              :                       size_t range_size);
     550              : 
     551              :   Operator
     552              :   three_character_operator(char, char, char);
     553              : 
     554              :   Operator
     555              :   two_character_operator(char, char);
     556              : 
     557              :   Operator
     558              :   one_character_operator(char);
     559              : 
     560              :   bool
     561              :   skip_c_comment(bool* found_newline);
     562              : 
     563              :   void
     564              :   skip_cpp_comment();
     565              : 
     566              :   void
     567              :   gather_embed(const char*, const char*);
     568              : 
     569              :   // The input file name.
     570              :   const char* input_file_name_ GO_ATTRIBUTE_UNUSED;
     571              :   // The input file.
     572              :   FILE* input_file_;
     573              :   // The object used to keep track of file names and line numbers.
     574              :   Linemap* linemap_;
     575              :   // The line buffer.  This holds the current line.
     576              :   char* linebuf_;
     577              :   // The size of the line buffer.
     578              :   size_t linebufsize_;
     579              :   // The nmber of characters in the current line.
     580              :   size_t linesize_;
     581              :   // The current offset in linebuf_.
     582              :   size_t lineoff_;
     583              :   // The current line number.
     584              :   size_t lineno_;
     585              :   // Whether to add a semicolon if we see a newline now.
     586              :   bool add_semi_at_eol_;
     587              :   // Pragmas for the next function, from magic comments.
     588              :   unsigned int pragmas_;
     589              :   // The external name to use for a function declaration, from a magic
     590              :   // //extern comment.
     591              :   std::string extern_;
     592              :   // The list of //go:linkname comments, if any.
     593              :   Linknames* linknames_;
     594              :   // The list of //go:embed patterns, if any.
     595              :   std::vector<std::string> embeds_;
     596              : };
     597              : 
     598              : #endif // !defined(GO_LEX_H)
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.