blob: b46bc71f35b9adcf964dbcd11c3ae1065cf96fdf [file] [log] [blame]
Austin Schuh36244a12019-09-21 17:52:38 -07001//
2// Copyright 2017 The Abseil Authors.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// https://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// -----------------------------------------------------------------------------
17// File: ascii.h
18// -----------------------------------------------------------------------------
19//
20// This package contains functions operating on characters and strings
21// restricted to standard ASCII. These include character classification
22// functions analogous to those found in the ANSI C Standard Library <ctype.h>
23// header file.
24//
25// C++ implementations provide <ctype.h> functionality based on their
26// C environment locale. In general, reliance on such a locale is not ideal, as
27// the locale standard is problematic (and may not return invariant information
28// for the same character set, for example). These `ascii_*()` functions are
29// hard-wired for standard ASCII, much faster, and guaranteed to behave
30// consistently. They will never be overloaded, nor will their function
31// signature change.
32//
33// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
34// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
35// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
36// `ascii_isxdigit()`
37// Analogous to the <ctype.h> functions with similar names, these
38// functions take an unsigned char and return a bool, based on whether the
39// character matches the condition specified.
40//
41// If the input character has a numerical value greater than 127, these
42// functions return `false`.
43//
44// `ascii_tolower()`, `ascii_toupper()`
45// Analogous to the <ctype.h> functions with similar names, these functions
46// take an unsigned char and return a char.
47//
48// If the input character is not an ASCII {lower,upper}-case letter (including
49// numerical values greater than 127) then the functions return the same value
50// as the input character.
51
52#ifndef ABSL_STRINGS_ASCII_H_
53#define ABSL_STRINGS_ASCII_H_
54
55#include <algorithm>
56#include <string>
57
58#include "absl/base/attributes.h"
Austin Schuhb4691e92020-12-31 12:37:18 -080059#include "absl/base/config.h"
Austin Schuh36244a12019-09-21 17:52:38 -070060#include "absl/strings/string_view.h"
61
62namespace absl {
Austin Schuhb4691e92020-12-31 12:37:18 -080063ABSL_NAMESPACE_BEGIN
Austin Schuh36244a12019-09-21 17:52:38 -070064namespace ascii_internal {
65
66// Declaration for an array of bitfields holding character information.
Austin Schuhb4691e92020-12-31 12:37:18 -080067ABSL_DLL extern const unsigned char kPropertyBits[256];
Austin Schuh36244a12019-09-21 17:52:38 -070068
69// Declaration for the array of characters to upper-case characters.
Austin Schuhb4691e92020-12-31 12:37:18 -080070ABSL_DLL extern const char kToUpper[256];
Austin Schuh36244a12019-09-21 17:52:38 -070071
72// Declaration for the array of characters to lower-case characters.
Austin Schuhb4691e92020-12-31 12:37:18 -080073ABSL_DLL extern const char kToLower[256];
Austin Schuh36244a12019-09-21 17:52:38 -070074
75} // namespace ascii_internal
76
77// ascii_isalpha()
78//
79// Determines whether the given character is an alphabetic character.
80inline bool ascii_isalpha(unsigned char c) {
81 return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
82}
83
84// ascii_isalnum()
85//
86// Determines whether the given character is an alphanumeric character.
87inline bool ascii_isalnum(unsigned char c) {
88 return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
89}
90
91// ascii_isspace()
92//
93// Determines whether the given character is a whitespace character (space,
94// tab, vertical tab, formfeed, linefeed, or carriage return).
95inline bool ascii_isspace(unsigned char c) {
96 return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
97}
98
99// ascii_ispunct()
100//
101// Determines whether the given character is a punctuation character.
102inline bool ascii_ispunct(unsigned char c) {
103 return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
104}
105
106// ascii_isblank()
107//
108// Determines whether the given character is a blank character (tab or space).
109inline bool ascii_isblank(unsigned char c) {
110 return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
111}
112
113// ascii_iscntrl()
114//
115// Determines whether the given character is a control character.
116inline bool ascii_iscntrl(unsigned char c) {
117 return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
118}
119
120// ascii_isxdigit()
121//
122// Determines whether the given character can be represented as a hexadecimal
123// digit character (i.e. {0-9} or {A-F}).
124inline bool ascii_isxdigit(unsigned char c) {
125 return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
126}
127
128// ascii_isdigit()
129//
130// Determines whether the given character can be represented as a decimal
131// digit character (i.e. {0-9}).
132inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; }
133
134// ascii_isprint()
135//
136// Determines whether the given character is printable, including whitespace.
137inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; }
138
139// ascii_isgraph()
140//
141// Determines whether the given character has a graphical representation.
142inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; }
143
144// ascii_isupper()
145//
146// Determines whether the given character is uppercase.
147inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; }
148
149// ascii_islower()
150//
151// Determines whether the given character is lowercase.
152inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; }
153
154// ascii_isascii()
155//
156// Determines whether the given character is ASCII.
157inline bool ascii_isascii(unsigned char c) { return c < 128; }
158
159// ascii_tolower()
160//
161// Returns an ASCII character, converting to lowercase if uppercase is
162// passed. Note that character values > 127 are simply returned.
163inline char ascii_tolower(unsigned char c) {
164 return ascii_internal::kToLower[c];
165}
166
167// Converts the characters in `s` to lowercase, changing the contents of `s`.
168void AsciiStrToLower(std::string* s);
169
170// Creates a lowercase string from a given absl::string_view.
171ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
172 std::string result(s);
173 absl::AsciiStrToLower(&result);
174 return result;
175}
176
177// ascii_toupper()
178//
179// Returns the ASCII character, converting to upper-case if lower-case is
180// passed. Note that characters values > 127 are simply returned.
181inline char ascii_toupper(unsigned char c) {
182 return ascii_internal::kToUpper[c];
183}
184
185// Converts the characters in `s` to uppercase, changing the contents of `s`.
186void AsciiStrToUpper(std::string* s);
187
188// Creates an uppercase string from a given absl::string_view.
189ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
190 std::string result(s);
191 absl::AsciiStrToUpper(&result);
192 return result;
193}
194
195// Returns absl::string_view with whitespace stripped from the beginning of the
196// given string_view.
197ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
198 absl::string_view str) {
199 auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
200 return str.substr(it - str.begin());
201}
202
203// Strips in place whitespace from the beginning of the given string.
204inline void StripLeadingAsciiWhitespace(std::string* str) {
205 auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
206 str->erase(str->begin(), it);
207}
208
209// Returns absl::string_view with whitespace stripped from the end of the given
210// string_view.
211ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
212 absl::string_view str) {
213 auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
214 return str.substr(0, str.rend() - it);
215}
216
217// Strips in place whitespace from the end of the given string
218inline void StripTrailingAsciiWhitespace(std::string* str) {
219 auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
220 str->erase(str->rend() - it);
221}
222
223// Returns absl::string_view with whitespace stripped from both ends of the
224// given string_view.
225ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
226 absl::string_view str) {
227 return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
228}
229
230// Strips in place whitespace from both ends of the given string
231inline void StripAsciiWhitespace(std::string* str) {
232 StripTrailingAsciiWhitespace(str);
233 StripLeadingAsciiWhitespace(str);
234}
235
236// Removes leading, trailing, and consecutive internal whitespace.
237void RemoveExtraAsciiWhitespace(std::string*);
238
Austin Schuhb4691e92020-12-31 12:37:18 -0800239ABSL_NAMESPACE_END
Austin Schuh36244a12019-09-21 17:52:38 -0700240} // namespace absl
241
242#endif // ABSL_STRINGS_ASCII_H_