Branch data Line data Source code
1 : : /* Definitions for CPP library.
2 : : Copyright (C) 1995-2024 Free Software Foundation, Inc.
3 : : Written by Per Bothner, 1994-95.
4 : :
5 : : This program is free software; you can redistribute it and/or modify it
6 : : under the terms of the GNU General Public License as published by the
7 : : Free Software Foundation; either version 3, or (at your option) any
8 : : later version.
9 : :
10 : : This program is distributed in the hope that it will be useful,
11 : : but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : : GNU General Public License for more details.
14 : :
15 : : You should have received a copy of the GNU General Public License
16 : : along with this program; see the file COPYING3. If not see
17 : : <http://www.gnu.org/licenses/>.
18 : :
19 : : In other words, you are welcome to use, share and improve this program.
20 : : You are forbidden to forbid anyone else to use, share and improve
21 : : what you give them. Help stamp out software-hoarding! */
22 : : #ifndef LIBCPP_CPPLIB_H
23 : : #define LIBCPP_CPPLIB_H
24 : :
25 : : #include <sys/types.h>
26 : : #include "symtab.h"
27 : : #include "line-map.h"
28 : :
29 : : typedef struct cpp_reader cpp_reader;
30 : : typedef struct cpp_buffer cpp_buffer;
31 : : typedef struct cpp_options cpp_options;
32 : : typedef struct cpp_token cpp_token;
33 : : typedef struct cpp_string cpp_string;
34 : : typedef struct cpp_hashnode cpp_hashnode;
35 : : typedef struct cpp_macro cpp_macro;
36 : : typedef struct cpp_callbacks cpp_callbacks;
37 : : typedef struct cpp_dir cpp_dir;
38 : :
39 : : struct _cpp_file;
40 : :
41 : : class rich_location;
42 : :
43 : : /* The first three groups, apart from '=', can appear in preprocessor
44 : : expressions (+= and -= are used to indicate unary + and - resp.).
45 : : This allows a lookup table to be implemented in _cpp_parse_expr.
46 : :
47 : : The first group, to CPP_LAST_EQ, can be immediately followed by an
48 : : '='. The lexer needs operators ending in '=', like ">>=", to be in
49 : : the same order as their counterparts without the '=', like ">>".
50 : :
51 : : See the cpp_operator table optab in expr.cc if you change the order or
52 : : add or remove anything in the first group. */
53 : :
54 : : #define TTYPE_TABLE \
55 : : OP(EQ, "=") \
56 : : OP(NOT, "!") \
57 : : OP(GREATER, ">") /* compare */ \
58 : : OP(LESS, "<") \
59 : : OP(PLUS, "+") /* math */ \
60 : : OP(MINUS, "-") \
61 : : OP(MULT, "*") \
62 : : OP(DIV, "/") \
63 : : OP(MOD, "%") \
64 : : OP(AND, "&") /* bit ops */ \
65 : : OP(OR, "|") \
66 : : OP(XOR, "^") \
67 : : OP(RSHIFT, ">>") \
68 : : OP(LSHIFT, "<<") \
69 : : \
70 : : OP(COMPL, "~") \
71 : : OP(AND_AND, "&&") /* logical */ \
72 : : OP(OR_OR, "||") \
73 : : OP(QUERY, "?") \
74 : : OP(COLON, ":") \
75 : : OP(COMMA, ",") /* grouping */ \
76 : : OP(OPEN_PAREN, "(") \
77 : : OP(CLOSE_PAREN, ")") \
78 : : TK(EOF, NONE) \
79 : : OP(EQ_EQ, "==") /* compare */ \
80 : : OP(NOT_EQ, "!=") \
81 : : OP(GREATER_EQ, ">=") \
82 : : OP(LESS_EQ, "<=") \
83 : : OP(SPACESHIP, "<=>") \
84 : : \
85 : : /* These two are unary + / - in preprocessor expressions. */ \
86 : : OP(PLUS_EQ, "+=") /* math */ \
87 : : OP(MINUS_EQ, "-=") \
88 : : \
89 : : OP(MULT_EQ, "*=") \
90 : : OP(DIV_EQ, "/=") \
91 : : OP(MOD_EQ, "%=") \
92 : : OP(AND_EQ, "&=") /* bit ops */ \
93 : : OP(OR_EQ, "|=") \
94 : : OP(XOR_EQ, "^=") \
95 : : OP(RSHIFT_EQ, ">>=") \
96 : : OP(LSHIFT_EQ, "<<=") \
97 : : /* Digraphs together, beginning with CPP_FIRST_DIGRAPH. */ \
98 : : OP(HASH, "#") /* digraphs */ \
99 : : OP(PASTE, "##") \
100 : : OP(OPEN_SQUARE, "[") \
101 : : OP(CLOSE_SQUARE, "]") \
102 : : OP(OPEN_BRACE, "{") \
103 : : OP(CLOSE_BRACE, "}") \
104 : : /* The remainder of the punctuation. Order is not significant. */ \
105 : : OP(SEMICOLON, ";") /* structure */ \
106 : : OP(ELLIPSIS, "...") \
107 : : OP(PLUS_PLUS, "++") /* increment */ \
108 : : OP(MINUS_MINUS, "--") \
109 : : OP(DEREF, "->") /* accessors */ \
110 : : OP(DOT, ".") \
111 : : OP(SCOPE, "::") \
112 : : OP(DEREF_STAR, "->*") \
113 : : OP(DOT_STAR, ".*") \
114 : : OP(ATSIGN, "@") /* used in Objective-C */ \
115 : : \
116 : : TK(NAME, IDENT) /* word */ \
117 : : TK(AT_NAME, IDENT) /* @word - Objective-C */ \
118 : : TK(NUMBER, LITERAL) /* 34_be+ta */ \
119 : : \
120 : : TK(CHAR, LITERAL) /* 'char' */ \
121 : : TK(WCHAR, LITERAL) /* L'char' */ \
122 : : TK(CHAR16, LITERAL) /* u'char' */ \
123 : : TK(CHAR32, LITERAL) /* U'char' */ \
124 : : TK(UTF8CHAR, LITERAL) /* u8'char' */ \
125 : : TK(OTHER, LITERAL) /* stray punctuation */ \
126 : : \
127 : : TK(STRING, LITERAL) /* "string" */ \
128 : : TK(WSTRING, LITERAL) /* L"string" */ \
129 : : TK(STRING16, LITERAL) /* u"string" */ \
130 : : TK(STRING32, LITERAL) /* U"string" */ \
131 : : TK(UTF8STRING, LITERAL) /* u8"string" */ \
132 : : TK(OBJC_STRING, LITERAL) /* @"string" - Objective-C */ \
133 : : TK(HEADER_NAME, LITERAL) /* <stdio.h> in #include */ \
134 : : TK(UNEVAL_STRING, LITERAL) /* unevaluated "string" - C++26 */ \
135 : : \
136 : : TK(CHAR_USERDEF, LITERAL) /* 'char'_suffix - C++11 */ \
137 : : TK(WCHAR_USERDEF, LITERAL) /* L'char'_suffix - C++11 */ \
138 : : TK(CHAR16_USERDEF, LITERAL) /* u'char'_suffix - C++11 */ \
139 : : TK(CHAR32_USERDEF, LITERAL) /* U'char'_suffix - C++11 */ \
140 : : TK(UTF8CHAR_USERDEF, LITERAL) /* u8'char'_suffix - C++11 */ \
141 : : TK(STRING_USERDEF, LITERAL) /* "string"_suffix - C++11 */ \
142 : : TK(WSTRING_USERDEF, LITERAL) /* L"string"_suffix - C++11 */ \
143 : : TK(STRING16_USERDEF, LITERAL) /* u"string"_suffix - C++11 */ \
144 : : TK(STRING32_USERDEF, LITERAL) /* U"string"_suffix - C++11 */ \
145 : : TK(UTF8STRING_USERDEF,LITERAL) /* u8"string"_suffix - C++11 */ \
146 : : \
147 : : TK(COMMENT, LITERAL) /* Only if output comments. */ \
148 : : /* SPELL_LITERAL happens to DTRT. */ \
149 : : TK(MACRO_ARG, NONE) /* Macro argument. */ \
150 : : TK(PRAGMA, NONE) /* Only for deferred pragmas. */ \
151 : : TK(PRAGMA_EOL, NONE) /* End-of-line for deferred pragmas. */ \
152 : : TK(PADDING, NONE) /* Whitespace for -E. */
153 : :
154 : : #define OP(e, s) CPP_ ## e,
155 : : #define TK(e, s) CPP_ ## e,
156 : : enum cpp_ttype
157 : : {
158 : : TTYPE_TABLE
159 : : N_TTYPES,
160 : :
161 : : /* A token type for keywords, as opposed to ordinary identifiers. */
162 : : CPP_KEYWORD,
163 : :
164 : : /* Positions in the table. */
165 : : CPP_LAST_EQ = CPP_LSHIFT,
166 : : CPP_FIRST_DIGRAPH = CPP_HASH,
167 : : CPP_LAST_PUNCTUATOR= CPP_ATSIGN,
168 : : CPP_LAST_CPP_OP = CPP_LESS_EQ
169 : : };
170 : : #undef OP
171 : : #undef TK
172 : :
173 : : /* C language kind, used when calling cpp_create_reader. */
174 : : enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_GNUC11, CLK_GNUC17, CLK_GNUC23,
175 : : CLK_STDC89, CLK_STDC94, CLK_STDC99, CLK_STDC11, CLK_STDC17,
176 : : CLK_STDC23,
177 : : CLK_GNUCXX, CLK_CXX98, CLK_GNUCXX11, CLK_CXX11,
178 : : CLK_GNUCXX14, CLK_CXX14, CLK_GNUCXX17, CLK_CXX17,
179 : : CLK_GNUCXX20, CLK_CXX20, CLK_GNUCXX23, CLK_CXX23,
180 : : CLK_GNUCXX26, CLK_CXX26, CLK_ASM};
181 : :
182 : : /* Payload of a NUMBER, STRING, CHAR or COMMENT token. */
183 : : struct GTY(()) cpp_string {
184 : : unsigned int len;
185 : :
186 : : /* TEXT is always null terminated (terminator not included in len); but this
187 : : GTY markup arranges that PCH streaming works properly even if there is a
188 : : null byte in the middle of the string. */
189 : : const unsigned char * GTY((string_length ("1 + %h.len"))) text;
190 : : };
191 : :
192 : : /* Flags for the cpp_token structure. */
193 : : #define PREV_WHITE (1 << 0) /* If whitespace before this token. */
194 : : #define DIGRAPH (1 << 1) /* If it was a digraph. */
195 : : #define STRINGIFY_ARG (1 << 2) /* If macro argument to be stringified. */
196 : : #define PASTE_LEFT (1 << 3) /* If on LHS of a ## operator. */
197 : : #define NAMED_OP (1 << 4) /* C++ named operators. */
198 : : #define PREV_FALLTHROUGH (1 << 5) /* On a token preceeded by FALLTHROUGH
199 : : comment. */
200 : : #define DECIMAL_INT (1 << 6) /* Decimal integer, set in c-lex.cc. */
201 : : #define PURE_ZERO (1 << 7) /* Single 0 digit, used by the C++ frontend,
202 : : set in c-lex.cc. */
203 : : #define COLON_SCOPE PURE_ZERO /* Adjacent colons in C < 23. */
204 : : #define SP_DIGRAPH (1 << 8) /* # or ## token was a digraph. */
205 : : #define SP_PREV_WHITE (1 << 9) /* If whitespace before a ##
206 : : operator, or before this token
207 : : after a # operator. */
208 : : #define NO_EXPAND (1 << 10) /* Do not macro-expand this token. */
209 : : #define PRAGMA_OP (1 << 11) /* _Pragma token. */
210 : : #define BOL (1 << 12) /* Token at beginning of line. */
211 : :
212 : : /* Specify which field, if any, of the cpp_token union is used. */
213 : :
214 : : enum cpp_token_fld_kind {
215 : : CPP_TOKEN_FLD_NODE,
216 : : CPP_TOKEN_FLD_SOURCE,
217 : : CPP_TOKEN_FLD_STR,
218 : : CPP_TOKEN_FLD_ARG_NO,
219 : : CPP_TOKEN_FLD_TOKEN_NO,
220 : : CPP_TOKEN_FLD_PRAGMA,
221 : : CPP_TOKEN_FLD_NONE
222 : : };
223 : :
224 : : /* A macro argument in the cpp_token union. */
225 : : struct GTY(()) cpp_macro_arg {
226 : : /* Argument number. */
227 : : unsigned int arg_no;
228 : : /* The original spelling of the macro argument token. */
229 : : cpp_hashnode *
230 : : GTY ((nested_ptr (union tree_node,
231 : : "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
232 : : "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL")))
233 : : spelling;
234 : : };
235 : :
236 : : /* An identifier in the cpp_token union. */
237 : : struct GTY(()) cpp_identifier {
238 : : /* The canonical (UTF-8) spelling of the identifier. */
239 : : cpp_hashnode *
240 : : GTY ((nested_ptr (union tree_node,
241 : : "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
242 : : "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL")))
243 : : node;
244 : : /* The original spelling of the identifier. */
245 : : cpp_hashnode *
246 : : GTY ((nested_ptr (union tree_node,
247 : : "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
248 : : "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL")))
249 : : spelling;
250 : : };
251 : :
252 : : /* A preprocessing token. This has been carefully packed and should
253 : : occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. */
254 : : struct GTY(()) cpp_token {
255 : :
256 : : /* Location of first char of token, together with range of full token. */
257 : : location_t src_loc;
258 : :
259 : : ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT; /* token type */
260 : : unsigned short flags; /* flags - see above */
261 : :
262 : : union cpp_token_u
263 : : {
264 : : /* An identifier. */
265 : : struct cpp_identifier GTY ((tag ("CPP_TOKEN_FLD_NODE"))) node;
266 : :
267 : : /* Inherit padding from this token. */
268 : : cpp_token * GTY ((tag ("CPP_TOKEN_FLD_SOURCE"))) source;
269 : :
270 : : /* A string, or number. */
271 : : struct cpp_string GTY ((tag ("CPP_TOKEN_FLD_STR"))) str;
272 : :
273 : : /* Argument no. (and original spelling) for a CPP_MACRO_ARG. */
274 : : struct cpp_macro_arg GTY ((tag ("CPP_TOKEN_FLD_ARG_NO"))) macro_arg;
275 : :
276 : : /* Original token no. for a CPP_PASTE (from a sequence of
277 : : consecutive paste tokens in a macro expansion). */
278 : : unsigned int GTY ((tag ("CPP_TOKEN_FLD_TOKEN_NO"))) token_no;
279 : :
280 : : /* Caller-supplied identifier for a CPP_PRAGMA. */
281 : : unsigned int GTY ((tag ("CPP_TOKEN_FLD_PRAGMA"))) pragma;
282 : : } GTY ((desc ("cpp_token_val_index (&%1)"))) val;
283 : : };
284 : :
285 : : /* Say which field is in use. */
286 : : extern enum cpp_token_fld_kind cpp_token_val_index (const cpp_token *tok);
287 : :
288 : : /* A type wide enough to hold any multibyte source character.
289 : : cpplib's character constant interpreter requires an unsigned type.
290 : : Also, a typedef for the signed equivalent.
291 : : The width of this type is capped at 32 bits; there do exist targets
292 : : where wchar_t is 64 bits, but only in a non-default mode, and there
293 : : would be no meaningful interpretation for a wchar_t value greater
294 : : than 2^32 anyway -- the widest wide-character encoding around is
295 : : ISO 10646, which stops at 2^31. */
296 : : #if CHAR_BIT * SIZEOF_INT >= 32
297 : : # define CPPCHAR_SIGNED_T int
298 : : #elif CHAR_BIT * SIZEOF_LONG >= 32
299 : : # define CPPCHAR_SIGNED_T long
300 : : #else
301 : : # error "Cannot find a least-32-bit signed integer type"
302 : : #endif
303 : : typedef unsigned CPPCHAR_SIGNED_T cppchar_t;
304 : : typedef CPPCHAR_SIGNED_T cppchar_signed_t;
305 : :
306 : : /* Style of header dependencies to generate. */
307 : : enum cpp_deps_style { DEPS_NONE = 0, DEPS_USER, DEPS_SYSTEM };
308 : :
309 : : /* Structured format of module dependencies to generate. */
310 : : enum cpp_fdeps_format { FDEPS_FMT_NONE = 0, FDEPS_FMT_P1689R5 };
311 : :
312 : : /* The possible normalization levels, from most restrictive to least. */
313 : : enum cpp_normalize_level {
314 : : /* In NFKC. */
315 : : normalized_KC = 0,
316 : : /* In NFC. */
317 : : normalized_C,
318 : : /* In NFC, except for subsequences where being in NFC would make
319 : : the identifier invalid. */
320 : : normalized_identifier_C,
321 : : /* Not normalized at all. */
322 : : normalized_none
323 : : };
324 : :
325 : : enum cpp_main_search
326 : : {
327 : : CMS_none, /* A regular source file. */
328 : : CMS_header, /* Is a directly-specified header file (eg PCH or
329 : : header-unit). */
330 : : CMS_user, /* Search the user INCLUDE path. */
331 : : CMS_system, /* Search the system INCLUDE path. */
332 : : };
333 : :
334 : : /* The possible bidirectional control characters checking levels. */
335 : : enum cpp_bidirectional_level {
336 : : /* No checking. */
337 : : bidirectional_none = 0,
338 : : /* Only detect unpaired uses of bidirectional control characters. */
339 : : bidirectional_unpaired = 1,
340 : : /* Detect any use of bidirectional control characters. */
341 : : bidirectional_any = 2,
342 : : /* Also warn about UCNs. */
343 : : bidirectional_ucn = 4
344 : : };
345 : :
346 : : /* This structure is nested inside struct cpp_reader, and
347 : : carries all the options visible to the command line. */
348 : : struct cpp_options
349 : : {
350 : : /* The language we're preprocessing. */
351 : : enum c_lang lang;
352 : :
353 : : /* Nonzero means use extra default include directories for C++. */
354 : : unsigned char cplusplus;
355 : :
356 : : /* Nonzero means handle cplusplus style comments. */
357 : : unsigned char cplusplus_comments;
358 : :
359 : : /* Nonzero means define __OBJC__, treat @ as a special token, use
360 : : the OBJC[PLUS]_INCLUDE_PATH environment variable, and allow
361 : : "#import". */
362 : : unsigned char objc;
363 : :
364 : : /* Nonzero means don't copy comments into the output file. */
365 : : unsigned char discard_comments;
366 : :
367 : : /* Nonzero means don't copy comments into the output file during
368 : : macro expansion. */
369 : : unsigned char discard_comments_in_macro_exp;
370 : :
371 : : /* Nonzero means process the ISO trigraph sequences. */
372 : : unsigned char trigraphs;
373 : :
374 : : /* Nonzero means process the ISO digraph sequences. */
375 : : unsigned char digraphs;
376 : :
377 : : /* Nonzero means to allow hexadecimal floats and LL suffixes. */
378 : : unsigned char extended_numbers;
379 : :
380 : : /* Nonzero means process u/U prefix literals (UTF-16/32). */
381 : : unsigned char uliterals;
382 : :
383 : : /* Nonzero means process u8 prefixed character literals (UTF-8). */
384 : : unsigned char utf8_char_literals;
385 : :
386 : : /* Nonzero means process r/R raw strings. If this is set, uliterals
387 : : must be set as well. */
388 : : unsigned char rliterals;
389 : :
390 : : /* Nonzero means print names of header files (-H). */
391 : : unsigned char print_include_names;
392 : :
393 : : /* Nonzero means complain about deprecated features. */
394 : : unsigned char cpp_warn_deprecated;
395 : :
396 : : /* Nonzero means warn if slash-star appears in a comment. */
397 : : unsigned char warn_comments;
398 : :
399 : : /* Nonzero means to warn about __DATA__, __TIME__ and __TIMESTAMP__ usage. */
400 : : unsigned char warn_date_time;
401 : :
402 : : /* Nonzero means warn if a user-supplied include directory does not
403 : : exist. */
404 : : unsigned char warn_missing_include_dirs;
405 : :
406 : : /* Nonzero means warn if there are any trigraphs. */
407 : : unsigned char warn_trigraphs;
408 : :
409 : : /* Nonzero means warn about multicharacter charconsts. */
410 : : unsigned char warn_multichar;
411 : :
412 : : /* Nonzero means warn about various incompatibilities with
413 : : traditional C. */
414 : : unsigned char cpp_warn_traditional;
415 : :
416 : : /* Nonzero means warn about long long numeric constants. */
417 : : unsigned char cpp_warn_long_long;
418 : :
419 : : /* Nonzero means warn about text after an #endif (or #else). */
420 : : unsigned char warn_endif_labels;
421 : :
422 : : /* Nonzero means warn about implicit sign changes owing to integer
423 : : promotions. */
424 : : unsigned char warn_num_sign_change;
425 : :
426 : : /* Zero means don't warn about __VA_ARGS__ usage in c89 pedantic mode.
427 : : Presumably the usage is protected by the appropriate #ifdef. */
428 : : unsigned char warn_variadic_macros;
429 : :
430 : : /* Nonzero means warn about builtin macros that are redefined or
431 : : explicitly undefined. */
432 : : unsigned char warn_builtin_macro_redefined;
433 : :
434 : : /* Different -Wimplicit-fallthrough= levels. */
435 : : unsigned char cpp_warn_implicit_fallthrough;
436 : :
437 : : /* Nonzero means we should look for header.gcc files that remap file
438 : : names. */
439 : : unsigned char remap;
440 : :
441 : : /* Zero means dollar signs are punctuation. */
442 : : unsigned char dollars_in_ident;
443 : :
444 : : /* Nonzero means UCNs are accepted in identifiers. */
445 : : unsigned char extended_identifiers;
446 : :
447 : : /* True if we should warn about dollars in identifiers or numbers
448 : : for this translation unit. */
449 : : unsigned char warn_dollars;
450 : :
451 : : /* Nonzero means warn if undefined identifiers are evaluated in an #if. */
452 : : unsigned char warn_undef;
453 : :
454 : : /* Nonzero means warn if "defined" is encountered in a place other than
455 : : an #if. */
456 : : unsigned char warn_expansion_to_defined;
457 : :
458 : : /* Nonzero means warn of unused macros from the main file. */
459 : : unsigned char warn_unused_macros;
460 : :
461 : : /* Nonzero for the 1999 C Standard, including corrigenda and amendments. */
462 : : unsigned char c99;
463 : :
464 : : /* Nonzero if we are conforming to a specific C or C++ standard. */
465 : : unsigned char std;
466 : :
467 : : /* Nonzero means give all the error messages the ANSI standard requires. */
468 : : unsigned char cpp_pedantic;
469 : :
470 : : /* Nonzero means we're looking at already preprocessed code, so don't
471 : : bother trying to do macro expansion and whatnot. */
472 : : unsigned char preprocessed;
473 : :
474 : : /* Nonzero means we are going to emit debugging logs during
475 : : preprocessing. */
476 : : unsigned char debug;
477 : :
478 : : /* Nonzero means we are tracking locations of tokens involved in
479 : : macro expansion. 1 Means we track the location in degraded mode
480 : : where we do not track locations of tokens resulting from the
481 : : expansion of arguments of function-like macro. 2 Means we do
482 : : track all macro expansions. This last option is the one that
483 : : consumes the highest amount of memory. */
484 : : unsigned char track_macro_expansion;
485 : :
486 : : /* Nonzero means handle C++ alternate operator names. */
487 : : unsigned char operator_names;
488 : :
489 : : /* Nonzero means warn about use of C++ alternate operator names. */
490 : : unsigned char warn_cxx_operator_names;
491 : :
492 : : /* True for traditional preprocessing. */
493 : : unsigned char traditional;
494 : :
495 : : /* Nonzero for C++ 2011 Standard user-defined literals. */
496 : : unsigned char user_literals;
497 : :
498 : : /* Nonzero means warn when a string or character literal is followed by a
499 : : ud-suffix which does not beging with an underscore. */
500 : : unsigned char warn_literal_suffix;
501 : :
502 : : /* Nonzero means interpret imaginary, fixed-point, or other gnu extension
503 : : literal number suffixes as user-defined literal number suffixes. */
504 : : unsigned char ext_numeric_literals;
505 : :
506 : : /* Nonzero means extended identifiers allow the characters specified
507 : : in C11. */
508 : : unsigned char c11_identifiers;
509 : :
510 : : /* Nonzero means extended identifiers allow the characters specified
511 : : by Unicode XID_Start and XID_Continue properties. */
512 : : unsigned char xid_identifiers;
513 : :
514 : : /* Nonzero for C++ 2014 Standard binary constants. */
515 : : unsigned char binary_constants;
516 : :
517 : : /* Nonzero for C++ 2014 Standard digit separators. */
518 : : unsigned char digit_separators;
519 : :
520 : : /* Nonzero for C23 decimal floating-point constants. */
521 : : unsigned char dfp_constants;
522 : :
523 : : /* Nonzero for C++20 __VA_OPT__ feature. */
524 : : unsigned char va_opt;
525 : :
526 : : /* Nonzero for the '::' token. */
527 : : unsigned char scope;
528 : :
529 : : /* Nonzero for the '#elifdef' and '#elifndef' directives. */
530 : : unsigned char elifdef;
531 : :
532 : : /* Nonzero for the '#warning' directive. */
533 : : unsigned char warning_directive;
534 : :
535 : : /* Nonzero means tokenize C++20 module directives. */
536 : : unsigned char module_directives;
537 : :
538 : : /* Nonzero for C++23 size_t literals. */
539 : : unsigned char size_t_literals;
540 : :
541 : : /* Nonzero for C++23 delimited escape sequences. */
542 : : unsigned char delimited_escape_seqs;
543 : :
544 : : /* Nonzero for 'true' and 'false' in #if expressions. */
545 : : unsigned char true_false;
546 : :
547 : : /* Holds the name of the target (execution) character set. */
548 : : const char *narrow_charset;
549 : :
550 : : /* Holds the name of the target wide character set. */
551 : : const char *wide_charset;
552 : :
553 : : /* Holds the name of the input character set. */
554 : : const char *input_charset;
555 : :
556 : : /* The minimum permitted level of normalization before a warning
557 : : is generated. See enum cpp_normalize_level. */
558 : : int warn_normalize;
559 : :
560 : : /* True to warn about precompiled header files we couldn't use. */
561 : : bool warn_invalid_pch;
562 : :
563 : : /* True if dependencies should be restored from a precompiled header. */
564 : : bool restore_pch_deps;
565 : :
566 : : /* True if warn about differences between C90 and C99. */
567 : : signed char cpp_warn_c90_c99_compat;
568 : :
569 : : /* True if warn about differences between C11 and C23. */
570 : : signed char cpp_warn_c11_c23_compat;
571 : :
572 : : /* True if warn about differences between C++98 and C++11. */
573 : : bool cpp_warn_cxx11_compat;
574 : :
575 : : /* True if warn about differences between C++17 and C++20. */
576 : : bool cpp_warn_cxx20_compat;
577 : :
578 : : /* Nonzero if bidirectional control characters checking is on. See enum
579 : : cpp_bidirectional_level. */
580 : : unsigned char cpp_warn_bidirectional;
581 : :
582 : : /* True if libcpp should warn about invalid UTF-8 characters in comments.
583 : : 2 if it should be a pedwarn. */
584 : : unsigned char cpp_warn_invalid_utf8;
585 : :
586 : : /* True if libcpp should warn about invalid forms of delimited or named
587 : : escape sequences. */
588 : : bool cpp_warn_unicode;
589 : :
590 : : /* True if -finput-charset= option has been used explicitly. */
591 : : bool cpp_input_charset_explicit;
592 : :
593 : : /* Dependency generation. */
594 : : struct
595 : : {
596 : : /* Style of header dependencies to generate. */
597 : : enum cpp_deps_style style;
598 : :
599 : : /* Structured format of module dependencies to generate. */
600 : : enum cpp_fdeps_format fdeps_format;
601 : :
602 : : /* Assume missing files are generated files. */
603 : : bool missing_files;
604 : :
605 : : /* Generate phony targets for each dependency apart from the first
606 : : one. */
607 : : bool phony_targets;
608 : :
609 : : /* Generate dependency info for modules. */
610 : : bool modules;
611 : :
612 : : /* If true, no dependency is generated on the main file. */
613 : : bool ignore_main_file;
614 : :
615 : : /* If true, intend to use the preprocessor output (e.g., for compilation)
616 : : in addition to the dependency info. */
617 : : bool need_preprocessor_output;
618 : : } deps;
619 : :
620 : : /* Target-specific features set by the front end or client. */
621 : :
622 : : /* Precision for target CPP arithmetic, target characters, target
623 : : ints and target wide characters, respectively. */
624 : : size_t precision, char_precision, int_precision, wchar_precision;
625 : :
626 : : /* True means chars (wide chars, UTF-8 chars) are unsigned. */
627 : : bool unsigned_char, unsigned_wchar, unsigned_utf8char;
628 : :
629 : : /* True if the most significant byte in a word has the lowest
630 : : address in memory. */
631 : : bool bytes_big_endian;
632 : :
633 : : /* Nonzero means __STDC__ should have the value 0 in system headers. */
634 : : unsigned char stdc_0_in_system_headers;
635 : :
636 : : /* True disables tokenization outside of preprocessing directives. */
637 : : bool directives_only;
638 : :
639 : : /* True enables canonicalization of system header file paths. */
640 : : bool canonical_system_headers;
641 : :
642 : : /* The maximum depth of the nested #include. */
643 : : unsigned int max_include_depth;
644 : :
645 : : cpp_main_search main_search : 8;
646 : : };
647 : :
648 : : /* Diagnostic levels. To get a diagnostic without associating a
649 : : position in the translation unit with it, use cpp_error_with_line
650 : : with a line number of zero. */
651 : :
652 : : enum cpp_diagnostic_level {
653 : : /* Warning, an error with -Werror. */
654 : : CPP_DL_WARNING = 0,
655 : : /* Same as CPP_DL_WARNING, except it is not suppressed in system headers. */
656 : : CPP_DL_WARNING_SYSHDR,
657 : : /* Warning, an error with -pedantic-errors or -Werror. */
658 : : CPP_DL_PEDWARN,
659 : : /* An error. */
660 : : CPP_DL_ERROR,
661 : : /* An internal consistency check failed. Prints "internal error: ",
662 : : otherwise the same as CPP_DL_ERROR. */
663 : : CPP_DL_ICE,
664 : : /* An informative note following a warning. */
665 : : CPP_DL_NOTE,
666 : : /* A fatal error. */
667 : : CPP_DL_FATAL
668 : : };
669 : :
670 : : /* Warning reason codes. Use a reason code of CPP_W_NONE for unclassified
671 : : warnings and diagnostics that are not warnings. */
672 : :
673 : : enum cpp_warning_reason {
674 : : CPP_W_NONE = 0,
675 : : CPP_W_DEPRECATED,
676 : : CPP_W_COMMENTS,
677 : : CPP_W_MISSING_INCLUDE_DIRS,
678 : : CPP_W_TRIGRAPHS,
679 : : CPP_W_MULTICHAR,
680 : : CPP_W_TRADITIONAL,
681 : : CPP_W_LONG_LONG,
682 : : CPP_W_ENDIF_LABELS,
683 : : CPP_W_NUM_SIGN_CHANGE,
684 : : CPP_W_VARIADIC_MACROS,
685 : : CPP_W_BUILTIN_MACRO_REDEFINED,
686 : : CPP_W_DOLLARS,
687 : : CPP_W_UNDEF,
688 : : CPP_W_UNUSED_MACROS,
689 : : CPP_W_CXX_OPERATOR_NAMES,
690 : : CPP_W_NORMALIZE,
691 : : CPP_W_INVALID_PCH,
692 : : CPP_W_WARNING_DIRECTIVE,
693 : : CPP_W_LITERAL_SUFFIX,
694 : : CPP_W_SIZE_T_LITERALS,
695 : : CPP_W_DATE_TIME,
696 : : CPP_W_PEDANTIC,
697 : : CPP_W_C90_C99_COMPAT,
698 : : CPP_W_C11_C23_COMPAT,
699 : : CPP_W_CXX11_COMPAT,
700 : : CPP_W_CXX20_COMPAT,
701 : : CPP_W_EXPANSION_TO_DEFINED,
702 : : CPP_W_BIDIRECTIONAL,
703 : : CPP_W_INVALID_UTF8,
704 : : CPP_W_UNICODE
705 : : };
706 : :
707 : : /* Callback for header lookup for HEADER, which is the name of a
708 : : source file. It is used as a method of last resort to find headers
709 : : that are not otherwise found during the normal include processing.
710 : : The return value is the malloced name of a header to try and open,
711 : : if any, or NULL otherwise. This callback is called only if the
712 : : header is otherwise unfound. */
713 : : typedef const char *(*missing_header_cb)(cpp_reader *, const char *header, cpp_dir **);
714 : :
715 : : /* Call backs to cpplib client. */
716 : : struct cpp_callbacks
717 : : {
718 : : /* Called when a new line of preprocessed output is started. */
719 : : void (*line_change) (cpp_reader *, const cpp_token *, int);
720 : :
721 : : /* Called when switching to/from a new file.
722 : : The line_map is for the new file. It is NULL if there is no new file.
723 : : (In C this happens when done with <built-in>+<command line> and also
724 : : when done with a main file.) This can be used for resource cleanup. */
725 : : void (*file_change) (cpp_reader *, const line_map_ordinary *);
726 : :
727 : : void (*dir_change) (cpp_reader *, const char *);
728 : : void (*include) (cpp_reader *, location_t, const unsigned char *,
729 : : const char *, int, const cpp_token **);
730 : : void (*define) (cpp_reader *, location_t, cpp_hashnode *);
731 : : void (*undef) (cpp_reader *, location_t, cpp_hashnode *);
732 : : void (*ident) (cpp_reader *, location_t, const cpp_string *);
733 : : void (*def_pragma) (cpp_reader *, location_t);
734 : : int (*valid_pch) (cpp_reader *, const char *, int);
735 : : void (*read_pch) (cpp_reader *, const char *, int, const char *);
736 : : missing_header_cb missing_header;
737 : :
738 : : /* Context-sensitive macro support. Returns macro (if any) that should
739 : : be expanded. */
740 : : cpp_hashnode * (*macro_to_expand) (cpp_reader *, const cpp_token *);
741 : :
742 : : /* Called to emit a diagnostic. This callback receives the
743 : : translated message. */
744 : : bool (*diagnostic) (cpp_reader *,
745 : : enum cpp_diagnostic_level,
746 : : enum cpp_warning_reason,
747 : : rich_location *,
748 : : const char *, va_list *)
749 : : ATTRIBUTE_FPTR_PRINTF(5,0);
750 : :
751 : : /* Callbacks for when a macro is expanded, or tested (whether
752 : : defined or not at the time) in #ifdef, #ifndef or "defined". */
753 : : void (*used_define) (cpp_reader *, location_t, cpp_hashnode *);
754 : : void (*used_undef) (cpp_reader *, location_t, cpp_hashnode *);
755 : : /* Called before #define and #undef or other macro definition
756 : : changes are processed. */
757 : : void (*before_define) (cpp_reader *);
758 : : /* Called whenever a macro is expanded or tested.
759 : : Second argument is the location of the start of the current expansion. */
760 : : void (*used) (cpp_reader *, location_t, cpp_hashnode *);
761 : :
762 : : /* Callback to identify whether an attribute exists. */
763 : : int (*has_attribute) (cpp_reader *, bool);
764 : :
765 : : /* Callback to determine whether a built-in function is recognized. */
766 : : int (*has_builtin) (cpp_reader *);
767 : :
768 : : /* Callback to determine whether a feature is available. */
769 : : int (*has_feature) (cpp_reader *, bool);
770 : :
771 : : /* Callback that can change a user lazy into normal macro. */
772 : : void (*user_lazy_macro) (cpp_reader *, cpp_macro *, unsigned);
773 : :
774 : : /* Callback to handle deferred cpp_macros. */
775 : : cpp_macro *(*user_deferred_macro) (cpp_reader *, location_t, cpp_hashnode *);
776 : :
777 : : /* Callback to parse SOURCE_DATE_EPOCH from environment. */
778 : : time_t (*get_source_date_epoch) (cpp_reader *);
779 : :
780 : : /* Callback for providing suggestions for misspelled directives. */
781 : : const char *(*get_suggestion) (cpp_reader *, const char *, const char *const *);
782 : :
783 : : /* Callback for when a comment is encountered, giving the location
784 : : of the opening slash, a pointer to the content (which is not
785 : : necessarily 0-terminated), and the length of the content.
786 : : The content contains the opening slash-star (or slash-slash),
787 : : and for C-style comments contains the closing star-slash. For
788 : : C++-style comments it does not include the terminating newline. */
789 : : void (*comment) (cpp_reader *, location_t, const unsigned char *,
790 : : size_t);
791 : :
792 : : /* Callback for filename remapping in __FILE__ and __BASE_FILE__ macro
793 : : expansions. */
794 : : const char *(*remap_filename) (const char*);
795 : :
796 : : /* Maybe translate a #include into something else. Return a
797 : : cpp_buffer containing the translation if translating. */
798 : : char *(*translate_include) (cpp_reader *, line_maps *, location_t,
799 : : const char *path);
800 : : };
801 : :
802 : : #ifdef VMS
803 : : #define INO_T_CPP ino_t ino[3]
804 : : #elif defined (_AIX) && SIZEOF_INO_T == 4
805 : : #define INO_T_CPP ino64_t ino
806 : : #else
807 : : #define INO_T_CPP ino_t ino
808 : : #endif
809 : :
810 : : #if defined (_AIX) && SIZEOF_DEV_T == 4
811 : : #define DEV_T_CPP dev64_t dev
812 : : #else
813 : : #define DEV_T_CPP dev_t dev
814 : : #endif
815 : :
816 : : /* Chain of directories to look for include files in. */
817 : : struct cpp_dir
818 : : {
819 : : /* NULL-terminated singly-linked list. */
820 : : struct cpp_dir *next;
821 : :
822 : : /* NAME of the directory, NUL-terminated. */
823 : : char *name;
824 : : unsigned int len;
825 : :
826 : : /* One if a system header, two if a system header that has extern
827 : : "C" guards for C++. */
828 : : unsigned char sysp;
829 : :
830 : : /* Is this a user-supplied directory? */
831 : : bool user_supplied_p;
832 : :
833 : : /* The canonicalized NAME as determined by lrealpath. This field
834 : : is only used by hosts that lack reliable inode numbers. */
835 : : char *canonical_name;
836 : :
837 : : /* Mapping of file names for this directory for MS-DOS and related
838 : : platforms. A NULL-terminated array of (from, to) pairs. */
839 : : const char **name_map;
840 : :
841 : : /* Routine to construct pathname, given the search path name and the
842 : : HEADER we are trying to find, return a constructed pathname to
843 : : try and open. If this is NULL, the constructed pathname is as
844 : : constructed by append_file_to_dir. */
845 : : char *(*construct) (const char *header, cpp_dir *dir);
846 : :
847 : : /* The C front end uses these to recognize duplicated
848 : : directories in the search path. */
849 : : INO_T_CPP;
850 : : DEV_T_CPP;
851 : : };
852 : :
853 : : /* The kind of the cpp_macro. */
854 : : enum cpp_macro_kind {
855 : : cmk_macro, /* An ISO macro (token expansion). */
856 : : cmk_assert, /* An assertion. */
857 : : cmk_traditional /* A traditional macro (text expansion). */
858 : : };
859 : :
860 : : /* Each macro definition is recorded in a cpp_macro structure.
861 : : Variadic macros cannot occur with traditional cpp. */
862 : : struct GTY(()) cpp_macro {
863 : : union cpp_parm_u
864 : : {
865 : : /* Parameters, if any. If parameter names use extended identifiers,
866 : : the original spelling of those identifiers, not the canonical
867 : : UTF-8 spelling, goes here. */
868 : : cpp_hashnode ** GTY ((tag ("false"),
869 : : nested_ptr (union tree_node,
870 : : "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
871 : : "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL"),
872 : : length ("%1.paramc"))) params;
873 : :
874 : : /* If this is an assertion, the next one in the chain. */
875 : : cpp_macro *GTY ((tag ("true"))) next;
876 : : } GTY ((desc ("%1.kind == cmk_assert"))) parm;
877 : :
878 : : /* Definition line number. */
879 : : location_t line;
880 : :
881 : : /* Number of tokens in body, or bytes for traditional macros. */
882 : : /* Do we really need 2^32-1 range here? */
883 : : unsigned int count;
884 : :
885 : : /* Number of parameters. */
886 : : unsigned short paramc;
887 : :
888 : : /* Non-zero if this is a user-lazy macro, value provided by user. */
889 : : unsigned char lazy;
890 : :
891 : : /* The kind of this macro (ISO, trad or assert) */
892 : : unsigned kind : 2;
893 : :
894 : : /* If a function-like macro. */
895 : : unsigned int fun_like : 1;
896 : :
897 : : /* If a variadic macro. */
898 : : unsigned int variadic : 1;
899 : :
900 : : /* If macro defined in system header. */
901 : : unsigned int syshdr : 1;
902 : :
903 : : /* Nonzero if it has been expanded or had its existence tested. */
904 : : unsigned int used : 1;
905 : :
906 : : /* Indicate whether the tokens include extra CPP_PASTE tokens at the
907 : : end to track invalid redefinitions with consecutive CPP_PASTE
908 : : tokens. */
909 : : unsigned int extra_tokens : 1;
910 : :
911 : : /* Imported C++20 macro (from a header unit). */
912 : : unsigned int imported_p : 1;
913 : :
914 : : /* 0 bits spare (32-bit). 32 on 64-bit target. */
915 : :
916 : : union cpp_exp_u
917 : : {
918 : : /* Trailing array of replacement tokens (ISO), or assertion body value. */
919 : : cpp_token GTY ((tag ("false"), length ("%1.count"))) tokens[1];
920 : :
921 : : /* Pointer to replacement text (traditional). See comment at top
922 : : of cpptrad.c for how traditional function-like macros are
923 : : encoded. */
924 : : const unsigned char *GTY ((tag ("true"))) text;
925 : : } GTY ((desc ("%1.kind == cmk_traditional"))) exp;
926 : : };
927 : :
928 : : /* Poisoned identifiers are flagged NODE_POISONED. NODE_OPERATOR (C++
929 : : only) indicates an identifier that behaves like an operator such as
930 : : "xor". NODE_DIAGNOSTIC is for speed in lex_token: it indicates a
931 : : diagnostic may be required for this node. Currently this only
932 : : applies to __VA_ARGS__, poisoned identifiers, and -Wc++-compat
933 : : warnings about NODE_OPERATOR. */
934 : :
935 : : /* Hash node flags. */
936 : : #define NODE_OPERATOR (1 << 0) /* C++ named operator. */
937 : : #define NODE_POISONED (1 << 1) /* Poisoned identifier. */
938 : : #define NODE_DIAGNOSTIC (1 << 2) /* Possible diagnostic when lexed. */
939 : : #define NODE_WARN (1 << 3) /* Warn if redefined or undefined. */
940 : : #define NODE_DISABLED (1 << 4) /* A disabled macro. */
941 : : #define NODE_USED (1 << 5) /* Dumped with -dU. */
942 : : #define NODE_CONDITIONAL (1 << 6) /* Conditional macro */
943 : : #define NODE_WARN_OPERATOR (1 << 7) /* Warn about C++ named operator. */
944 : : #define NODE_MODULE (1 << 8) /* C++-20 module-related name. */
945 : :
946 : : /* Different flavors of hash node. */
947 : : enum node_type
948 : : {
949 : : NT_VOID = 0, /* Maybe an assert? */
950 : : NT_MACRO_ARG, /* A macro arg. */
951 : : NT_USER_MACRO, /* A user macro. */
952 : : NT_BUILTIN_MACRO, /* A builtin macro. */
953 : : NT_MACRO_MASK = NT_USER_MACRO /* Mask for either macro kind. */
954 : : };
955 : :
956 : : /* Different flavors of builtin macro. _Pragma is an operator, but we
957 : : handle it with the builtin code for efficiency reasons. */
958 : : enum cpp_builtin_type
959 : : {
960 : : BT_SPECLINE = 0, /* `__LINE__' */
961 : : BT_DATE, /* `__DATE__' */
962 : : BT_FILE, /* `__FILE__' */
963 : : BT_FILE_NAME, /* `__FILE_NAME__' */
964 : : BT_BASE_FILE, /* `__BASE_FILE__' */
965 : : BT_INCLUDE_LEVEL, /* `__INCLUDE_LEVEL__' */
966 : : BT_TIME, /* `__TIME__' */
967 : : BT_STDC, /* `__STDC__' */
968 : : BT_PRAGMA, /* `_Pragma' operator */
969 : : BT_TIMESTAMP, /* `__TIMESTAMP__' */
970 : : BT_COUNTER, /* `__COUNTER__' */
971 : : BT_HAS_ATTRIBUTE, /* `__has_attribute(x)' */
972 : : BT_HAS_STD_ATTRIBUTE, /* `__has_c_attribute(x)' */
973 : : BT_HAS_BUILTIN, /* `__has_builtin(x)' */
974 : : BT_HAS_INCLUDE, /* `__has_include(x)' */
975 : : BT_HAS_INCLUDE_NEXT, /* `__has_include_next(x)' */
976 : : BT_HAS_FEATURE, /* `__has_feature(x)' */
977 : : BT_HAS_EXTENSION /* `__has_extension(x)' */
978 : : };
979 : :
980 : : #define CPP_HASHNODE(HNODE) ((cpp_hashnode *) (HNODE))
981 : : #define HT_NODE(NODE) (&(NODE)->ident)
982 : : #define NODE_LEN(NODE) HT_LEN (HT_NODE (NODE))
983 : : #define NODE_NAME(NODE) HT_STR (HT_NODE (NODE))
984 : :
985 : : /* The common part of an identifier node shared amongst all 3 C front
986 : : ends. Also used to store CPP identifiers, which are a superset of
987 : : identifiers in the grammatical sense. */
988 : :
989 : : union GTY(()) _cpp_hashnode_value {
990 : : /* Assert (maybe NULL) */
991 : : cpp_macro * GTY((tag ("NT_VOID"))) answers;
992 : : /* Macro (maybe NULL) */
993 : : cpp_macro * GTY((tag ("NT_USER_MACRO"))) macro;
994 : : /* Code for a builtin macro. */
995 : : enum cpp_builtin_type GTY ((tag ("NT_BUILTIN_MACRO"))) builtin;
996 : : /* Macro argument index. */
997 : : unsigned short GTY ((tag ("NT_MACRO_ARG"))) arg_index;
998 : : };
999 : :
1000 : : struct GTY(()) cpp_hashnode {
1001 : : struct ht_identifier ident;
1002 : : unsigned int is_directive : 1;
1003 : : unsigned int directive_index : 7; /* If is_directive,
1004 : : then index into directive table.
1005 : : Otherwise, a NODE_OPERATOR. */
1006 : : unsigned int rid_code : 8; /* Rid code - for front ends. */
1007 : : unsigned int flags : 9; /* CPP flags. */
1008 : : ENUM_BITFIELD(node_type) type : 2; /* CPP node type. */
1009 : :
1010 : : /* 5 bits spare. */
1011 : :
1012 : : /* The deferred cookie is applicable to NT_USER_MACRO or NT_VOID.
1013 : : The latter for when a macro had a prevailing undef.
1014 : : On a 64-bit system there would be 32-bits of padding to the value
1015 : : field. So placing the deferred index here is not costly. */
1016 : : unsigned deferred; /* Deferred cookie */
1017 : :
1018 : : union _cpp_hashnode_value GTY ((desc ("%1.type"))) value;
1019 : : };
1020 : :
1021 : : /* Extra information we may need to store per identifier, which is needed rarely
1022 : : enough that it's not worth adding directly into the main identifier hash. */
1023 : : struct GTY(()) cpp_hashnode_extra
1024 : : {
1025 : : struct ht_identifier ident;
1026 : : location_t poisoned_loc;
1027 : : };
1028 : :
1029 : : /* A class for iterating through the source locations within a
1030 : : string token (before escapes are interpreted, and before
1031 : : concatenation). */
1032 : :
1033 : : class cpp_string_location_reader {
1034 : : public:
1035 : : cpp_string_location_reader (location_t src_loc,
1036 : : line_maps *line_table);
1037 : :
1038 : : source_range get_next ();
1039 : :
1040 : : private:
1041 : : location_t m_loc;
1042 : : int m_offset_per_column;
1043 : : };
1044 : :
1045 : : /* A class for storing the source ranges of all of the characters within
1046 : : a string literal, after escapes are interpreted, and after
1047 : : concatenation.
1048 : :
1049 : : This is not GTY-marked, as instances are intended to be temporary. */
1050 : :
1051 : : class cpp_substring_ranges
1052 : : {
1053 : : public:
1054 : : cpp_substring_ranges ();
1055 : : ~cpp_substring_ranges ();
1056 : :
1057 : 27430 : int get_num_ranges () const { return m_num_ranges; }
1058 : 43394 : source_range get_range (int idx) const
1059 : : {
1060 : 43394 : linemap_assert (idx < m_num_ranges);
1061 : 43394 : return m_ranges[idx];
1062 : : }
1063 : :
1064 : : void add_range (source_range range);
1065 : : void add_n_ranges (int num, cpp_string_location_reader &loc_reader);
1066 : :
1067 : : private:
1068 : : source_range *m_ranges;
1069 : : int m_num_ranges;
1070 : : int m_alloc_ranges;
1071 : : };
1072 : :
1073 : : /* Call this first to get a handle to pass to other functions.
1074 : :
1075 : : The first hash table argument is for associating a struct cpp_hashnode
1076 : : with each identifier. The second hash table argument is for associating
1077 : : a struct cpp_hashnode_extra with each identifier that needs one. For
1078 : : either, pass in a NULL pointer if you want cpplib to create and manage
1079 : : the hash table itself, or else pass a suitably initialized hash table to
1080 : : be managed external to libcpp, as is done by the C-family frontends. */
1081 : : extern cpp_reader *cpp_create_reader (enum c_lang, struct ht *,
1082 : : class line_maps *,
1083 : : struct ht * = nullptr);
1084 : :
1085 : : /* Reset the cpp_reader's line_map. This is only used after reading a
1086 : : PCH file. */
1087 : : extern void cpp_set_line_map (cpp_reader *, class line_maps *);
1088 : :
1089 : : /* Call this to change the selected language standard (e.g. because of
1090 : : command line options). */
1091 : : extern void cpp_set_lang (cpp_reader *, enum c_lang);
1092 : :
1093 : : /* Set the include paths. */
1094 : : extern void cpp_set_include_chains (cpp_reader *, cpp_dir *, cpp_dir *, int);
1095 : :
1096 : : /* Call these to get pointers to the options, callback, and deps
1097 : : structures for a given reader. These pointers are good until you
1098 : : call cpp_finish on that reader. You can either edit the callbacks
1099 : : through the pointer returned from cpp_get_callbacks, or set them
1100 : : with cpp_set_callbacks. */
1101 : : extern cpp_options *cpp_get_options (cpp_reader *) ATTRIBUTE_PURE;
1102 : : extern cpp_callbacks *cpp_get_callbacks (cpp_reader *) ATTRIBUTE_PURE;
1103 : : extern void cpp_set_callbacks (cpp_reader *, cpp_callbacks *);
1104 : : extern class mkdeps *cpp_get_deps (cpp_reader *) ATTRIBUTE_PURE;
1105 : :
1106 : : extern const char *cpp_probe_header_unit (cpp_reader *, const char *file,
1107 : : bool angle_p, location_t);
1108 : :
1109 : : /* Call these to get name data about the various compile-time
1110 : : charsets. */
1111 : : extern const char *cpp_get_narrow_charset_name (cpp_reader *) ATTRIBUTE_PURE;
1112 : : extern const char *cpp_get_wide_charset_name (cpp_reader *) ATTRIBUTE_PURE;
1113 : :
1114 : : /* This function reads the file, but does not start preprocessing. It
1115 : : returns the name of the original file; this is the same as the
1116 : : input file, except for preprocessed input. This will generate at
1117 : : least one file change callback, and possibly a line change callback
1118 : : too. If there was an error opening the file, it returns NULL. */
1119 : : extern const char *cpp_read_main_file (cpp_reader *, const char *,
1120 : : bool injecting = false);
1121 : : extern location_t cpp_main_loc (const cpp_reader *);
1122 : :
1123 : : /* Adjust for the main file to be an include. */
1124 : : extern void cpp_retrofit_as_include (cpp_reader *);
1125 : :
1126 : : /* Set up built-ins with special behavior. Use cpp_init_builtins()
1127 : : instead unless your know what you are doing. */
1128 : : extern void cpp_init_special_builtins (cpp_reader *);
1129 : :
1130 : : /* Set up built-ins like __FILE__. */
1131 : : extern void cpp_init_builtins (cpp_reader *, int);
1132 : :
1133 : : /* This is called after options have been parsed, and partially
1134 : : processed. */
1135 : : extern void cpp_post_options (cpp_reader *);
1136 : :
1137 : : /* Set up translation to the target character set. */
1138 : : extern void cpp_init_iconv (cpp_reader *);
1139 : :
1140 : : /* Call this to finish preprocessing. If you requested dependency
1141 : : generation, pass open stream(s) to write the information to,
1142 : : otherwise NULL. It is your responsibility to close the stream(s). */
1143 : : extern void cpp_finish (cpp_reader *, FILE *deps_stream, FILE *fdeps_stream = NULL);
1144 : :
1145 : : /* Call this to release the handle at the end of preprocessing. Any
1146 : : use of the handle after this function returns is invalid. */
1147 : : extern void cpp_destroy (cpp_reader *);
1148 : :
1149 : : extern unsigned int cpp_token_len (const cpp_token *);
1150 : : extern unsigned char *cpp_token_as_text (cpp_reader *, const cpp_token *);
1151 : : extern unsigned char *cpp_spell_token (cpp_reader *, const cpp_token *,
1152 : : unsigned char *, bool);
1153 : : extern void cpp_register_pragma (cpp_reader *, const char *, const char *,
1154 : : void (*) (cpp_reader *), bool);
1155 : : extern void cpp_register_deferred_pragma (cpp_reader *, const char *,
1156 : : const char *, unsigned, bool, bool);
1157 : : extern int cpp_avoid_paste (cpp_reader *, const cpp_token *,
1158 : : const cpp_token *);
1159 : : extern const cpp_token *cpp_get_token (cpp_reader *);
1160 : : extern const cpp_token *cpp_get_token_with_location (cpp_reader *,
1161 : : location_t *);
1162 : 6827475 : inline bool cpp_user_macro_p (const cpp_hashnode *node)
1163 : : {
1164 : 6827475 : return node->type == NT_USER_MACRO;
1165 : : }
1166 : : inline bool cpp_builtin_macro_p (const cpp_hashnode *node)
1167 : : {
1168 : : return node->type == NT_BUILTIN_MACRO;
1169 : : }
1170 : 16763854 : inline bool cpp_macro_p (const cpp_hashnode *node)
1171 : : {
1172 : 16763854 : return node->type & NT_MACRO_MASK;
1173 : : }
1174 : 145035 : inline cpp_macro *cpp_set_deferred_macro (cpp_hashnode *node,
1175 : : cpp_macro *forced = NULL)
1176 : : {
1177 : 145035 : cpp_macro *old = node->value.macro;
1178 : :
1179 : 145035 : node->value.macro = forced;
1180 : 145035 : node->type = NT_USER_MACRO;
1181 : 145035 : node->flags &= ~NODE_USED;
1182 : :
1183 : 145035 : return old;
1184 : : }
1185 : : cpp_macro *cpp_get_deferred_macro (cpp_reader *, cpp_hashnode *, location_t);
1186 : :
1187 : : /* Returns true if NODE is a function-like user macro. */
1188 : 64 : inline bool cpp_fun_like_macro_p (cpp_hashnode *node)
1189 : : {
1190 : 64 : return cpp_user_macro_p (node) && node->value.macro->fun_like;
1191 : : }
1192 : :
1193 : : extern const unsigned char *cpp_macro_definition (cpp_reader *, cpp_hashnode *);
1194 : : extern const unsigned char *cpp_macro_definition (cpp_reader *, cpp_hashnode *,
1195 : : const cpp_macro *);
1196 : 86 : inline location_t cpp_macro_definition_location (cpp_hashnode *node)
1197 : : {
1198 : 86 : const cpp_macro *macro = node->value.macro;
1199 : 86 : return macro ? macro->line : 0;
1200 : : }
1201 : : /* Return an idempotent time stamp (possibly from SOURCE_DATE_EPOCH). */
1202 : : enum class CPP_time_kind
1203 : : {
1204 : : FIXED = -1, /* Fixed time via source epoch. */
1205 : : DYNAMIC = -2, /* Dynamic via time(2). */
1206 : : UNKNOWN = -3 /* Wibbly wobbly, timey wimey. */
1207 : : };
1208 : : extern CPP_time_kind cpp_get_date (cpp_reader *, time_t *);
1209 : :
1210 : : extern void _cpp_backup_tokens (cpp_reader *, unsigned int);
1211 : : extern const cpp_token *cpp_peek_token (cpp_reader *, int);
1212 : :
1213 : : /* Evaluate a CPP_*CHAR* token. */
1214 : : extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *,
1215 : : unsigned int *, int *);
1216 : : /* Evaluate a vector of CPP_*STRING* tokens. */
1217 : : extern bool cpp_interpret_string (cpp_reader *,
1218 : : const cpp_string *, size_t,
1219 : : cpp_string *, enum cpp_ttype);
1220 : : extern const char *cpp_interpret_string_ranges (cpp_reader *pfile,
1221 : : const cpp_string *from,
1222 : : cpp_string_location_reader *,
1223 : : size_t count,
1224 : : cpp_substring_ranges *out,
1225 : : enum cpp_ttype type);
1226 : : extern bool cpp_interpret_string_notranslate (cpp_reader *,
1227 : : const cpp_string *, size_t,
1228 : : cpp_string *, enum cpp_ttype);
1229 : :
1230 : : /* Convert a host character constant to the execution character set. */
1231 : : extern cppchar_t cpp_host_to_exec_charset (cpp_reader *, cppchar_t);
1232 : :
1233 : : /* Used to register macros and assertions, perhaps from the command line.
1234 : : The text is the same as the command line argument. */
1235 : : extern void cpp_define (cpp_reader *, const char *);
1236 : : extern void cpp_define_unused (cpp_reader *, const char *);
1237 : : extern void cpp_define_formatted (cpp_reader *pfile,
1238 : : const char *fmt, ...) ATTRIBUTE_PRINTF_2;
1239 : : extern void cpp_define_formatted_unused (cpp_reader *pfile,
1240 : : const char *fmt,
1241 : : ...) ATTRIBUTE_PRINTF_2;
1242 : : extern void cpp_assert (cpp_reader *, const char *);
1243 : : extern void cpp_undef (cpp_reader *, const char *);
1244 : : extern void cpp_unassert (cpp_reader *, const char *);
1245 : :
1246 : : /* Mark a node as a lazily defined macro. */
1247 : : extern void cpp_define_lazily (cpp_reader *, cpp_hashnode *node, unsigned N);
1248 : :
1249 : : /* Undefine all macros and assertions. */
1250 : : extern void cpp_undef_all (cpp_reader *);
1251 : :
1252 : : extern cpp_buffer *cpp_push_buffer (cpp_reader *, const unsigned char *,
1253 : : size_t, int);
1254 : : extern int cpp_defined (cpp_reader *, const unsigned char *, int);
1255 : :
1256 : : /* A preprocessing number. Code assumes that any unused high bits of
1257 : : the double integer are set to zero. */
1258 : :
1259 : : /* This type has to be equal to unsigned HOST_WIDE_INT, see
1260 : : gcc/c-family/c-lex.cc. */
1261 : : typedef uint64_t cpp_num_part;
1262 : : typedef struct cpp_num cpp_num;
1263 : : struct cpp_num
1264 : : {
1265 : : cpp_num_part high;
1266 : : cpp_num_part low;
1267 : : bool unsignedp; /* True if value should be treated as unsigned. */
1268 : : bool overflow; /* True if the most recent calculation overflowed. */
1269 : : };
1270 : :
1271 : : /* cpplib provides two interfaces for interpretation of preprocessing
1272 : : numbers.
1273 : :
1274 : : cpp_classify_number categorizes numeric constants according to
1275 : : their field (integer, floating point, or invalid), radix (decimal,
1276 : : octal, hexadecimal), and type suffixes. */
1277 : :
1278 : : #define CPP_N_CATEGORY 0x000F
1279 : : #define CPP_N_INVALID 0x0000
1280 : : #define CPP_N_INTEGER 0x0001
1281 : : #define CPP_N_FLOATING 0x0002
1282 : :
1283 : : #define CPP_N_WIDTH 0x00F0
1284 : : #define CPP_N_SMALL 0x0010 /* int, float, short _Fract/Accum */
1285 : : #define CPP_N_MEDIUM 0x0020 /* long, double, long _Fract/_Accum. */
1286 : : #define CPP_N_LARGE 0x0040 /* long long, long double,
1287 : : long long _Fract/Accum. */
1288 : :
1289 : : #define CPP_N_WIDTH_MD 0xF0000 /* machine defined. */
1290 : : #define CPP_N_MD_W 0x10000
1291 : : #define CPP_N_MD_Q 0x20000
1292 : :
1293 : : #define CPP_N_RADIX 0x0F00
1294 : : #define CPP_N_DECIMAL 0x0100
1295 : : #define CPP_N_HEX 0x0200
1296 : : #define CPP_N_OCTAL 0x0400
1297 : : #define CPP_N_BINARY 0x0800
1298 : :
1299 : : #define CPP_N_UNSIGNED 0x1000 /* Properties. */
1300 : : #define CPP_N_IMAGINARY 0x2000
1301 : : #define CPP_N_DFLOAT 0x4000
1302 : : #define CPP_N_DEFAULT 0x8000
1303 : :
1304 : : #define CPP_N_FRACT 0x100000 /* Fract types. */
1305 : : #define CPP_N_ACCUM 0x200000 /* Accum types. */
1306 : : #define CPP_N_FLOATN 0x400000 /* _FloatN types. */
1307 : : #define CPP_N_FLOATNX 0x800000 /* _FloatNx types. */
1308 : :
1309 : : #define CPP_N_USERDEF 0x1000000 /* C++11 user-defined literal. */
1310 : :
1311 : : #define CPP_N_SIZE_T 0x2000000 /* C++23 size_t literal. */
1312 : : #define CPP_N_BFLOAT16 0x4000000 /* std::bfloat16_t type. */
1313 : : #define CPP_N_BITINT 0x8000000 /* C23 _BitInt literal. */
1314 : :
1315 : : #define CPP_N_WIDTH_FLOATN_NX 0xF0000000 /* _FloatN / _FloatNx value
1316 : : of N, divided by 16. */
1317 : : #define CPP_FLOATN_SHIFT 24
1318 : : #define CPP_FLOATN_MAX 0xF0
1319 : :
1320 : : /* Classify a CPP_NUMBER token. The return value is a combination of
1321 : : the flags from the above sets. */
1322 : : extern unsigned cpp_classify_number (cpp_reader *, const cpp_token *,
1323 : : const char **, location_t);
1324 : :
1325 : : /* Return the classification flags for a float suffix. */
1326 : : extern unsigned int cpp_interpret_float_suffix (cpp_reader *, const char *,
1327 : : size_t);
1328 : :
1329 : : /* Return the classification flags for an int suffix. */
1330 : : extern unsigned int cpp_interpret_int_suffix (cpp_reader *, const char *,
1331 : : size_t);
1332 : :
1333 : : /* Evaluate a token classified as category CPP_N_INTEGER. */
1334 : : extern cpp_num cpp_interpret_integer (cpp_reader *, const cpp_token *,
1335 : : unsigned int);
1336 : :
1337 : : /* Sign extend a number, with PRECISION significant bits and all
1338 : : others assumed clear, to fill out a cpp_num structure. */
1339 : : cpp_num cpp_num_sign_extend (cpp_num, size_t);
1340 : :
1341 : : /* Output a diagnostic of some kind. */
1342 : : extern bool cpp_error (cpp_reader *, enum cpp_diagnostic_level,
1343 : : const char *msgid, ...)
1344 : : ATTRIBUTE_PRINTF_3;
1345 : : extern bool cpp_warning (cpp_reader *, enum cpp_warning_reason,
1346 : : const char *msgid, ...)
1347 : : ATTRIBUTE_PRINTF_3;
1348 : : extern bool cpp_pedwarning (cpp_reader *, enum cpp_warning_reason,
1349 : : const char *msgid, ...)
1350 : : ATTRIBUTE_PRINTF_3;
1351 : : extern bool cpp_warning_syshdr (cpp_reader *, enum cpp_warning_reason reason,
1352 : : const char *msgid, ...)
1353 : : ATTRIBUTE_PRINTF_3;
1354 : :
1355 : : /* As their counterparts above, but use RICHLOC. */
1356 : : extern bool cpp_warning_at (cpp_reader *, enum cpp_warning_reason,
1357 : : rich_location *richloc, const char *msgid, ...)
1358 : : ATTRIBUTE_PRINTF_4;
1359 : : extern bool cpp_pedwarning_at (cpp_reader *, enum cpp_warning_reason,
1360 : : rich_location *richloc, const char *msgid, ...)
1361 : : ATTRIBUTE_PRINTF_4;
1362 : :
1363 : : /* Output a diagnostic with "MSGID: " preceding the
1364 : : error string of errno. No location is printed. */
1365 : : extern bool cpp_errno (cpp_reader *, enum cpp_diagnostic_level,
1366 : : const char *msgid);
1367 : : /* Similarly, but with "FILENAME: " instead of "MSGID: ", where
1368 : : the filename is not localized. */
1369 : : extern bool cpp_errno_filename (cpp_reader *, enum cpp_diagnostic_level,
1370 : : const char *filename, location_t loc);
1371 : :
1372 : : /* Same as cpp_error, except additionally specifies a position as a
1373 : : (translation unit) physical line and physical column. If the line is
1374 : : zero, then no location is printed. */
1375 : : extern bool cpp_error_with_line (cpp_reader *, enum cpp_diagnostic_level,
1376 : : location_t, unsigned,
1377 : : const char *msgid, ...)
1378 : : ATTRIBUTE_PRINTF_5;
1379 : : extern bool cpp_warning_with_line (cpp_reader *, enum cpp_warning_reason,
1380 : : location_t, unsigned,
1381 : : const char *msgid, ...)
1382 : : ATTRIBUTE_PRINTF_5;
1383 : : extern bool cpp_pedwarning_with_line (cpp_reader *, enum cpp_warning_reason,
1384 : : location_t, unsigned,
1385 : : const char *msgid, ...)
1386 : : ATTRIBUTE_PRINTF_5;
1387 : : extern bool cpp_warning_with_line_syshdr (cpp_reader *, enum cpp_warning_reason,
1388 : : location_t, unsigned,
1389 : : const char *msgid, ...)
1390 : : ATTRIBUTE_PRINTF_5;
1391 : :
1392 : : extern bool cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level,
1393 : : location_t src_loc, const char *msgid, ...)
1394 : : ATTRIBUTE_PRINTF_4;
1395 : :
1396 : : extern bool cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level,
1397 : : rich_location *richloc, const char *msgid, ...)
1398 : : ATTRIBUTE_PRINTF_4;
1399 : :
1400 : : /* In lex.cc */
1401 : : extern int cpp_ideq (const cpp_token *, const char *);
1402 : : extern void cpp_output_line (cpp_reader *, FILE *);
1403 : : extern unsigned char *cpp_output_line_to_string (cpp_reader *,
1404 : : const unsigned char *);
1405 : : extern const unsigned char *cpp_alloc_token_string
1406 : : (cpp_reader *, const unsigned char *, unsigned);
1407 : : extern void cpp_output_token (const cpp_token *, FILE *);
1408 : : extern const char *cpp_type2name (enum cpp_ttype, unsigned char flags);
1409 : : /* Returns the value of an escape sequence, truncated to the correct
1410 : : target precision. PSTR points to the input pointer, which is just
1411 : : after the backslash. LIMIT is how much text we have. WIDE is true
1412 : : if the escape sequence is part of a wide character constant or
1413 : : string literal. Handles all relevant diagnostics. */
1414 : : extern cppchar_t cpp_parse_escape (cpp_reader *, const unsigned char ** pstr,
1415 : : const unsigned char *limit, int wide);
1416 : :
1417 : : /* Structure used to hold a comment block at a given location in the
1418 : : source code. */
1419 : :
1420 : : typedef struct
1421 : : {
1422 : : /* Text of the comment including the terminators. */
1423 : : char *comment;
1424 : :
1425 : : /* source location for the given comment. */
1426 : : location_t sloc;
1427 : : } cpp_comment;
1428 : :
1429 : : /* Structure holding all comments for a given cpp_reader. */
1430 : :
1431 : : typedef struct
1432 : : {
1433 : : /* table of comment entries. */
1434 : : cpp_comment *entries;
1435 : :
1436 : : /* number of actual entries entered in the table. */
1437 : : int count;
1438 : :
1439 : : /* number of entries allocated currently. */
1440 : : int allocated;
1441 : : } cpp_comment_table;
1442 : :
1443 : : /* Returns the table of comments encountered by the preprocessor. This
1444 : : table is only populated when pfile->state.save_comments is true. */
1445 : : extern cpp_comment_table *cpp_get_comments (cpp_reader *);
1446 : :
1447 : : /* In hash.c */
1448 : :
1449 : : /* Lookup an identifier in the hashtable. Puts the identifier in the
1450 : : table if it is not already there. */
1451 : : extern cpp_hashnode *cpp_lookup (cpp_reader *, const unsigned char *,
1452 : : unsigned int);
1453 : :
1454 : : typedef int (*cpp_cb) (cpp_reader *, cpp_hashnode *, void *);
1455 : : extern void cpp_forall_identifiers (cpp_reader *, cpp_cb, void *);
1456 : :
1457 : : /* In macro.cc */
1458 : : extern void cpp_scan_nooutput (cpp_reader *);
1459 : : extern int cpp_sys_macro_p (cpp_reader *);
1460 : : extern unsigned char *cpp_quote_string (unsigned char *, const unsigned char *,
1461 : : unsigned int);
1462 : : extern bool cpp_compare_macros (const cpp_macro *macro1,
1463 : : const cpp_macro *macro2);
1464 : :
1465 : : /* In files.cc */
1466 : : extern bool cpp_included (cpp_reader *, const char *);
1467 : : extern bool cpp_included_before (cpp_reader *, const char *, location_t);
1468 : : extern void cpp_make_system_header (cpp_reader *, int, int);
1469 : : extern bool cpp_push_include (cpp_reader *, const char *);
1470 : : extern bool cpp_push_default_include (cpp_reader *, const char *);
1471 : : extern void cpp_change_file (cpp_reader *, enum lc_reason, const char *);
1472 : : extern const char *cpp_get_path (struct _cpp_file *);
1473 : : extern cpp_dir *cpp_get_dir (struct _cpp_file *);
1474 : : extern cpp_buffer *cpp_get_buffer (cpp_reader *);
1475 : : extern struct _cpp_file *cpp_get_file (cpp_buffer *);
1476 : : extern cpp_buffer *cpp_get_prev (cpp_buffer *);
1477 : : extern void cpp_clear_file_cache (cpp_reader *);
1478 : :
1479 : : /* cpp_get_converted_source returns the contents of the given file, as it exists
1480 : : after cpplib has read it and converted it from the input charset to the
1481 : : source charset. Return struct will be zero-filled if the data could not be
1482 : : read for any reason. The data starts at the DATA pointer, but the TO_FREE
1483 : : pointer is what should be passed to free(), as there may be an offset. */
1484 : : struct cpp_converted_source
1485 : : {
1486 : : char *to_free;
1487 : : char *data;
1488 : : size_t len;
1489 : : };
1490 : : cpp_converted_source cpp_get_converted_source (const char *fname,
1491 : : const char *input_charset);
1492 : :
1493 : : /* In pch.cc */
1494 : : struct save_macro_data;
1495 : : extern int cpp_save_state (cpp_reader *, FILE *);
1496 : : extern int cpp_write_pch_deps (cpp_reader *, FILE *);
1497 : : extern int cpp_write_pch_state (cpp_reader *, FILE *);
1498 : : extern int cpp_valid_state (cpp_reader *, const char *, int);
1499 : : extern void cpp_prepare_state (cpp_reader *, struct save_macro_data **);
1500 : : extern int cpp_read_state (cpp_reader *, const char *, FILE *,
1501 : : struct save_macro_data *);
1502 : :
1503 : : /* In lex.cc */
1504 : : extern void cpp_force_token_locations (cpp_reader *, location_t);
1505 : : extern void cpp_stop_forcing_token_locations (cpp_reader *);
1506 : : enum CPP_DO_task
1507 : : {
1508 : : CPP_DO_print,
1509 : : CPP_DO_location,
1510 : : CPP_DO_token
1511 : : };
1512 : :
1513 : : extern void cpp_directive_only_process (cpp_reader *pfile,
1514 : : void *data,
1515 : : void (*cb) (cpp_reader *,
1516 : : CPP_DO_task,
1517 : : void *data, ...));
1518 : :
1519 : : /* In expr.cc */
1520 : : extern enum cpp_ttype cpp_userdef_string_remove_type
1521 : : (enum cpp_ttype type);
1522 : : extern enum cpp_ttype cpp_userdef_string_add_type
1523 : : (enum cpp_ttype type);
1524 : : extern enum cpp_ttype cpp_userdef_char_remove_type
1525 : : (enum cpp_ttype type);
1526 : : extern enum cpp_ttype cpp_userdef_char_add_type
1527 : : (enum cpp_ttype type);
1528 : : extern bool cpp_userdef_string_p
1529 : : (enum cpp_ttype type);
1530 : : extern bool cpp_userdef_char_p
1531 : : (enum cpp_ttype type);
1532 : : extern const char * cpp_get_userdef_suffix
1533 : : (const cpp_token *);
1534 : :
1535 : : /* In charset.cc */
1536 : :
1537 : : /* The result of attempting to decode a run of UTF-8 bytes. */
1538 : :
1539 : : struct cpp_decoded_char
1540 : : {
1541 : : const char *m_start_byte;
1542 : : const char *m_next_byte;
1543 : :
1544 : : bool m_valid_ch;
1545 : : cppchar_t m_ch;
1546 : : };
1547 : :
1548 : : /* Information for mapping between code points and display columns.
1549 : :
1550 : : This is a tabstop value, along with a callback for getting the
1551 : : widths of characters. Normally this callback is cpp_wcwidth, but we
1552 : : support other schemes for escaping non-ASCII unicode as a series of
1553 : : ASCII chars when printing the user's source code in diagnostic-show-locus.cc
1554 : :
1555 : : For example, consider:
1556 : : - the Unicode character U+03C0 "GREEK SMALL LETTER PI" (UTF-8: 0xCF 0x80)
1557 : : - the Unicode character U+1F642 "SLIGHTLY SMILING FACE"
1558 : : (UTF-8: 0xF0 0x9F 0x99 0x82)
1559 : : - the byte 0xBF (a stray trailing byte of a UTF-8 character)
1560 : : Normally U+03C0 would occupy one display column, U+1F642
1561 : : would occupy two display columns, and the stray byte would be
1562 : : printed verbatim as one display column.
1563 : :
1564 : : However when escaping them as unicode code points as "<U+03C0>"
1565 : : and "<U+1F642>" they occupy 8 and 9 display columns respectively,
1566 : : and when escaping them as bytes as "<CF><80>" and "<F0><9F><99><82>"
1567 : : they occupy 8 and 16 display columns respectively. In both cases
1568 : : the stray byte is escaped to <BF> as 4 display columns. */
1569 : :
1570 : : struct cpp_char_column_policy
1571 : : {
1572 : 288867 : cpp_char_column_policy (int tabstop,
1573 : : int (*width_cb) (cppchar_t c))
1574 : 398306 : : m_tabstop (tabstop),
1575 : 398306 : m_undecoded_byte_width (1),
1576 : 398294 : m_width_cb (width_cb)
1577 : : {}
1578 : :
1579 : : int m_tabstop;
1580 : : /* Width in display columns of a stray byte that isn't decodable
1581 : : as UTF-8. */
1582 : : int m_undecoded_byte_width;
1583 : : int (*m_width_cb) (cppchar_t c);
1584 : : };
1585 : :
1586 : : /* A class to manage the state while converting a UTF-8 sequence to cppchar_t
1587 : : and computing the display width one character at a time. */
1588 : : class cpp_display_width_computation {
1589 : : public:
1590 : : cpp_display_width_computation (const char *data, int data_length,
1591 : : const cpp_char_column_policy &policy);
1592 : 7388124 : const char *next_byte () const { return m_next; }
1593 : 2891120 : int bytes_processed () const { return m_next - m_begin; }
1594 : 3825992 : int bytes_left () const { return m_bytes_left; }
1595 : 3825992 : bool done () const { return !bytes_left (); }
1596 : 6820651 : int display_cols_processed () const { return m_display_cols; }
1597 : :
1598 : : int process_next_codepoint (cpp_decoded_char *out);
1599 : : int advance_display_cols (int n);
1600 : :
1601 : : private:
1602 : : const char *const m_begin;
1603 : : const char *m_next;
1604 : : size_t m_bytes_left;
1605 : : const cpp_char_column_policy &m_policy;
1606 : : int m_display_cols;
1607 : : };
1608 : :
1609 : : /* Convenience functions that are simple use cases for class
1610 : : cpp_display_width_computation. Tab characters will be expanded to spaces
1611 : : as determined by POLICY.m_tabstop, and non-printable-ASCII characters
1612 : : will be escaped as per POLICY. */
1613 : :
1614 : : int cpp_byte_column_to_display_column (const char *data, int data_length,
1615 : : int column,
1616 : : const cpp_char_column_policy &policy);
1617 : 147933 : inline int cpp_display_width (const char *data, int data_length,
1618 : : const cpp_char_column_policy &policy)
1619 : : {
1620 : 147933 : return cpp_byte_column_to_display_column (data, data_length, data_length,
1621 : : policy);
1622 : : }
1623 : : int cpp_display_column_to_byte_column (const char *data, int data_length,
1624 : : int display_col,
1625 : : const cpp_char_column_policy &policy);
1626 : : int cpp_wcwidth (cppchar_t c);
1627 : :
1628 : : bool cpp_input_conversion_is_trivial (const char *input_charset);
1629 : : int cpp_check_utf8_bom (const char *data, size_t data_length);
1630 : : bool cpp_valid_utf8_p (const char *data, size_t num_bytes);
1631 : :
1632 : : bool cpp_is_combining_char (cppchar_t c);
1633 : : bool cpp_is_printable_char (cppchar_t c);
1634 : :
1635 : : enum cpp_xid_property {
1636 : : CPP_XID_START = 1,
1637 : : CPP_XID_CONTINUE = 2
1638 : : };
1639 : :
1640 : : unsigned int cpp_check_xid_property (cppchar_t c);
1641 : :
1642 : : #endif /* ! LIBCPP_CPPLIB_H */
|