Line data Source code
1 : // Copyright (C) 2020-2026 Free Software Foundation, Inc.
2 :
3 : // This file is part of GCC.
4 :
5 : // GCC is free software; you can redistribute it and/or modify it under
6 : // the terms of the GNU General Public License as published by the Free
7 : // Software Foundation; either version 3, or (at your option) any later
8 : // version.
9 :
10 : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 : // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 : // for more details.
14 :
15 : // You should have received a copy of the GNU General Public License
16 : // along with GCC; see the file COPYING3. If not see
17 : // <http://www.gnu.org/licenses/>.
18 :
19 : #ifndef RUST_UNICODE_H
20 : #define RUST_UNICODE_H
21 :
22 : #include "optional.h"
23 : #include "rust-system.h"
24 : #include "rust-input-source.h"
25 :
26 : namespace Rust {
27 :
28 631021 : class Utf8String
29 : {
30 : private:
31 : std::vector<Codepoint> chars;
32 :
33 : public:
34 : static tl::optional<Utf8String>
35 315507 : make_utf8_string (const std::string &maybe_utf8)
36 : {
37 315507 : BufferInputSource input_source = {maybe_utf8, 0};
38 315507 : tl::optional<std::vector<Codepoint>> chars_opt = input_source.get_chars ();
39 315507 : if (chars_opt.has_value ())
40 631014 : return {Utf8String (chars_opt.value ())};
41 : else
42 0 : return tl::nullopt;
43 315507 : }
44 :
45 521591 : Utf8String (const std::vector<Codepoint> codepoints) : chars ({codepoints}) {}
46 :
47 206084 : std::string as_string () const
48 : {
49 206084 : std::stringstream ss;
50 1087196 : for (Codepoint c : chars)
51 1762224 : ss << c.as_string ();
52 :
53 206084 : return ss.str ();
54 206084 : };
55 :
56 : // Returns characters
57 109423 : std::vector<Codepoint> get_chars () const { return chars; }
58 :
59 : Utf8String nfc_normalize () const;
60 : };
61 :
62 : bool is_alphabetic (uint32_t codepoint);
63 :
64 : bool is_ascii_only (const std::string &str);
65 :
66 : bool is_numeric (uint32_t codepoint);
67 :
68 : bool is_nfc_qc_no (uint32_t codepoint);
69 :
70 : bool is_nfc_qc_maybe (uint32_t codepoint);
71 :
72 : enum class QuickCheckResult
73 : {
74 : YES,
75 : NO,
76 : MAYBE
77 : };
78 :
79 : QuickCheckResult nfc_quick_check (const std::vector<Codepoint> &s);
80 :
81 : } // namespace Rust
82 :
83 : #if CHECKING_P
84 :
85 : namespace selftest {
86 :
87 : void rust_nfc_qc_test ();
88 :
89 : void rust_utf8_normalize_test ();
90 :
91 : void rust_utf8_property_test ();
92 :
93 : } // namespace selftest
94 :
95 : #endif // CHECKING_P
96 :
97 : #endif
|