LCOV - code coverage report
Current view: top level - gcc/go/gofrontend - go-encode-id.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 43.7 % 197 86
Test Date: 2026-02-28 14:20:25 Functions: 75.0 % 8 6
Legend: Lines:     hit not hit

            Line data    Source code
       1              : // go-encode-id.cc -- Go identifier and packagepath encoding/decoding hooks
       2              : 
       3              : // Copyright 2016 The Go Authors. All rights reserved.
       4              : // Use of this source code is governed by a BSD-style
       5              : // license that can be found in the LICENSE file.
       6              : 
       7              : #include "go-system.h"
       8              : 
       9              : #include "gogo.h"
      10              : #include "go-location.h"
      11              : #include "go-linemap.h"
      12              : #include "go-encode-id.h"
      13              : #include "lex.h"
      14              : 
      15              : // Return whether the character c can appear in a name that we are
      16              : // encoding.  We only permit ASCII alphanumeric characters.
      17              : 
      18              : static bool
      19    163310812 : char_needs_encoding(char c)
      20              : {
      21            0 :   switch (c)
      22              :     {
      23              :     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
      24              :     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
      25              :     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
      26              :     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
      27              :     case 'Y': case 'Z':
      28              :     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
      29              :     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
      30              :     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
      31              :     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
      32              :     case 'y': case 'z':
      33              :     case '0': case '1': case '2': case '3': case '4':
      34              :     case '5': case '6': case '7': case '8': case '9':
      35              :       return false;
      36     28970275 :     default:
      37            0 :       return true;
      38              :     }
      39              : }
      40              : 
      41              : // Return whether the identifier needs to be translated because it
      42              : // contains non-ASCII characters.
      43              : 
      44              : bool
      45       890572 : go_id_needs_encoding(const std::string& str)
      46              : {
      47       890572 :   for (std::string::const_iterator p = str.begin();
      48      5997645 :        p != str.end();
      49      5107073 :        ++p)
      50      5402751 :     if (char_needs_encoding(*p))
      51       890572 :       return true;
      52              :   return false;
      53              : }
      54              : 
      55              : // Map from characters to the underscore encoding for them.
      56              : 
      57              : class Special_char_code
      58              : {
      59              :  public:
      60              :   Special_char_code();
      61              : 
      62              :   // Return the simple underscore encoding for C, or 0 if none.
      63              :   char
      64     28970275 :   code_for(unsigned int c) const
      65              :   {
      66     28970275 :     if (c <= 127)
      67     28970275 :       return this->codes_[c];
      68              :     return 0;
      69              :   }
      70              : 
      71              :  private:
      72              :   // Encodings for characters.
      73              :   char codes_[128];
      74              : };
      75              : 
      76              : // Construct the underscore encoding map.
      77              : 
      78         4646 : Special_char_code::Special_char_code()
      79              : {
      80         4646 :   memset(this->codes_, 0, sizeof this->codes_);
      81         4646 :   this->codes_['_'] = '_';
      82         4646 :   this->codes_['.'] = '0';
      83         4646 :   this->codes_['/'] = '1';
      84         4646 :   this->codes_['*'] = '2';
      85         4646 :   this->codes_[','] = '3';
      86         4646 :   this->codes_['{'] = '4';
      87         4646 :   this->codes_['}'] = '5';
      88         4646 :   this->codes_['['] = '6';
      89         4646 :   this->codes_[']'] = '7';
      90         4646 :   this->codes_['('] = '8';
      91         4646 :   this->codes_[')'] = '9';
      92         4646 :   this->codes_['"'] = 'a';
      93         4646 :   this->codes_[' '] = 'b';
      94         4646 :   this->codes_[';'] = 'c';
      95         4646 : }
      96              : 
      97              : // The singleton Special_char_code.
      98              : 
      99              : static const Special_char_code special_char_code;
     100              : 
     101              : // Pull the next UTF-8 character out of P and store it in *PC.  Return
     102              : // the number of bytes read.
     103              : 
     104              : static size_t
     105    158710509 : fetch_utf8_char(const char* p, unsigned int* pc)
     106              : {
     107    158710509 :   unsigned char c = *p;
     108    158710509 :   if ((c & 0x80) == 0)
     109              :     {
     110    158709475 :       *pc = c;
     111    158709475 :       return 1;
     112              :     }
     113              :   size_t len = 0;
     114         3426 :   while ((c & 0x80) != 0)
     115              :     {
     116         2392 :       ++len;
     117         2392 :       c <<= 1;
     118              :     }
     119         1034 :   unsigned int rc = *p & ((1 << (7 - len)) - 1);
     120         2392 :   for (size_t i = 1; i < len; i++)
     121              :     {
     122         1358 :       unsigned int u = p[i];
     123         1358 :       rc <<= 6;
     124         1358 :       rc |= u & 0x3f;
     125              :     }
     126         1034 :   *pc = rc;
     127         1034 :   return len;
     128              : }
     129              : 
     130              : // Encode an identifier using assembler-friendly characters.  The
     131              : // encoding is described in detail near the end of the long comment at
     132              : // the start of names.cc.
     133              : 
     134              : std::string
     135      3881843 : go_encode_id(const std::string &id)
     136              : {
     137      3881843 :   if (Lex::is_invalid_identifier(id))
     138              :     {
     139            1 :       go_assert(saw_errors());
     140            1 :       return id;
     141              :     }
     142              : 
     143      3881842 :   std::string ret;
     144      3881842 :   const char* p = id.c_str();
     145      3881842 :   const char* pend = p + id.length();
     146              : 
     147              :   // We encode a leading digit, to ensure that no identifier starts
     148              :   // with a digit.
     149      3881842 :   if (pend > p && p[0] >= '0' && p[0] <= '9')
     150              :     {
     151            0 :       char buf[8];
     152            0 :       snprintf(buf, sizeof buf, "_x%02x", p[0]);
     153            0 :       ret.append(buf);
     154            0 :       ++p;
     155              :     }
     156              : 
     157    161790847 :   while (p < pend)
     158              :     {
     159    157909005 :       unsigned int c;
     160    157909005 :       size_t len = fetch_utf8_char(p, &c);
     161    157909005 :       if (len == 1)
     162              :         {
     163    157908061 :           if (!char_needs_encoding(c))
     164    128937786 :             ret.push_back(c);
     165              :           else
     166              :             {
     167     28970275 :               char code = special_char_code.code_for(c);
     168     28970275 :               if (code != 0)
     169              :                 {
     170     28896350 :                   ret.push_back('_');
     171     28896350 :                   ret.push_back(code);
     172              :                 }
     173              :               else
     174              :                 {
     175        73925 :                   char buf[16];
     176        73925 :                   snprintf(buf, sizeof buf, "_x%02x", c);
     177        73925 :                   ret.append(buf);
     178              :                 }
     179              :             }
     180              :         }
     181              :       else
     182              :         {
     183          944 :           char buf[16];
     184          944 :           if (c < 0x10000)
     185          944 :             snprintf(buf, sizeof buf, "_u%04x", c);
     186              :           else
     187            0 :             snprintf(buf, sizeof buf, "_U%08x", c);
     188          944 :           ret.append(buf);
     189              :         }
     190              : 
     191    157909005 :       p += len;
     192              :     }
     193              : 
     194      3881842 :   return ret;
     195      3881842 : }
     196              : 
     197              : // Convert a hex digit string to a unicode codepoint. No checking
     198              : // to insure that the hex digit is meaningful.
     199              : 
     200              : static unsigned
     201            0 : hex_digits_to_unicode_codepoint(const char *digits, unsigned ndig)
     202              : {
     203            0 :   unsigned result = 0;
     204            0 :   for (unsigned i = 0; i < ndig; ++i) {
     205            0 :     result <<= 4;
     206            0 :     result |= Lex::hex_val(digits[i]);
     207              :   }
     208            0 :   return result;
     209              : }
     210              : 
     211              : // Decode/demangle a mangled string produced by go_encode_id(). Returns
     212              : // empty string if demangling process fails in some way.  At the moment
     213              : // this routine is unused; there is an equivalent routine in the runtime
     214              : // used for demangling symbols appearing in stack traces.
     215              : 
     216              : std::string
     217            0 : go_decode_id(const std::string &encoded)
     218              : {
     219            0 :   std::string ret;
     220            0 :   const char* p = encoded.c_str();
     221            0 :   const char* pend = p + encoded.length();
     222            0 :   const Location loc = Linemap::predeclared_location();
     223              : 
     224            0 :   while (p < pend)
     225              :     {
     226            0 :       if (*p != '_' || p + 1 == pend)
     227              :         {
     228            0 :           ret.push_back(*p);
     229            0 :           p++;
     230            0 :           continue;
     231              :         }
     232              : 
     233            0 :       switch (p[1])
     234              :         {
     235            0 :         case '_':
     236            0 :           ret.push_back('_');
     237            0 :           p += 2;
     238            0 :           break;
     239            0 :         case '0':
     240            0 :           ret.push_back('.');
     241            0 :           p += 2;
     242            0 :           break;
     243            0 :         case '1':
     244            0 :           ret.push_back('/');
     245            0 :           p += 2;
     246            0 :           break;
     247            0 :         case '2':
     248            0 :           ret.push_back('*');
     249            0 :           p += 2;
     250            0 :           break;
     251            0 :         case '3':
     252            0 :           ret.push_back(',');
     253            0 :           p += 2;
     254            0 :           break;
     255            0 :         case '4':
     256            0 :           ret.push_back('{');
     257            0 :           p += 2;
     258            0 :           break;
     259            0 :         case '5':
     260            0 :           ret.push_back('}');
     261            0 :           p += 2;
     262            0 :           break;
     263            0 :         case '6':
     264            0 :           ret.push_back('[');
     265            0 :           p += 2;
     266            0 :           break;
     267            0 :         case '7':
     268            0 :           ret.push_back(']');
     269            0 :           p += 2;
     270            0 :           break;
     271            0 :         case '8':
     272            0 :           ret.push_back('(');
     273            0 :           p += 2;
     274            0 :           break;
     275            0 :         case '9':
     276            0 :           ret.push_back(')');
     277            0 :           p += 2;
     278            0 :           break;
     279            0 :         case 'a':
     280            0 :           ret.push_back('"');
     281            0 :           p += 2;
     282            0 :           break;
     283            0 :         case 'b':
     284            0 :           ret.push_back(' ');
     285            0 :           p += 2;
     286            0 :           break;
     287            0 :         case 'c':
     288            0 :           ret.push_back(';');
     289            0 :           p += 2;
     290            0 :           break;
     291            0 :         case 'x':
     292            0 :           {
     293            0 :             const char* digits = p + 2;
     294            0 :             if (strlen(digits) < 2)
     295            0 :               return "";
     296            0 :             unsigned int rune = hex_digits_to_unicode_codepoint(digits, 2);
     297            0 :             Lex::append_char(rune, true, &ret, loc);
     298            0 :             p += 4;
     299              :           }
     300            0 :           break;
     301            0 :         case 'u':
     302            0 :           {
     303            0 :             const char* digits = p + 2;
     304            0 :             if (strlen(digits) < 4)
     305            0 :               return "";
     306            0 :             unsigned int rune = hex_digits_to_unicode_codepoint(digits, 4);
     307            0 :             Lex::append_char(rune, true, &ret, loc);
     308            0 :             p += 6;
     309              :           }
     310            0 :           break;
     311            0 :         case 'U':
     312            0 :           {
     313            0 :             const char* digits = p + 2;
     314            0 :             if (strlen(digits) < 8)
     315            0 :               return "";
     316            0 :             unsigned int rune = hex_digits_to_unicode_codepoint(digits, 8);
     317            0 :             Lex::append_char(rune, true, &ret, loc);
     318            0 :             p += 10;
     319              :           }
     320            0 :           break;
     321            0 :         default:
     322            0 :           return "";
     323              :         }
     324              :     }
     325              : 
     326            0 :   return ret;
     327            0 : }
     328              : 
     329              : // Encode a struct field tag.  This is only used when we need to
     330              : // create a type descriptor for an anonymous struct type with field
     331              : // tags.  Underscore encoding will be applied to the returned string.
     332              : // The tag will appear between curly braces, so that is all we have to
     333              : // avoid.
     334              : 
     335              : std::string
     336        47283 : go_mangle_struct_tag(const std::string& tag)
     337              : {
     338        47283 :   std::string ret;
     339        47283 :   const char* p = tag.c_str();
     340        47283 :   const char* pend = p + tag.length();
     341       848787 :   while (p < pend)
     342              :     {
     343       801504 :       unsigned int c;
     344       801504 :       size_t len = fetch_utf8_char(p, &c);
     345       801504 :       if (len > 1)
     346           90 :         ret.append(p, len);
     347       801414 :       else if (c != '{' && c != '}' && c != '\\')
     348       801270 :         ret.push_back(c);
     349              :       else
     350              :         {
     351          144 :           ret.push_back('\\');
     352          144 :           ret.push_back(c);
     353              :         }
     354       801504 :       p += len;
     355              :     }
     356        47283 :   return ret;
     357              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.