Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame^] | 1 | // |
| 2 | // Copyright 2017 The Abseil Authors. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | // |
| 16 | // ----------------------------------------------------------------------------- |
| 17 | // File: escaping.h |
| 18 | // ----------------------------------------------------------------------------- |
| 19 | // |
| 20 | // This header file contains string utilities involved in escaping and |
| 21 | // unescaping strings in various ways. |
| 22 | |
| 23 | #ifndef ABSL_STRINGS_ESCAPING_H_ |
| 24 | #define ABSL_STRINGS_ESCAPING_H_ |
| 25 | |
| 26 | #include <cstddef> |
| 27 | #include <string> |
| 28 | #include <vector> |
| 29 | |
| 30 | #include "absl/base/macros.h" |
| 31 | #include "absl/strings/ascii.h" |
| 32 | #include "absl/strings/str_join.h" |
| 33 | #include "absl/strings/string_view.h" |
| 34 | |
| 35 | namespace absl { |
| 36 | |
| 37 | // CUnescape() |
| 38 | // |
| 39 | // Unescapes a `source` string and copies it into `dest`, rewriting C-style |
| 40 | // escape sequences (https://en.cppreference.com/w/cpp/language/escape) into |
| 41 | // their proper code point equivalents, returning `true` if successful. |
| 42 | // |
| 43 | // The following unescape sequences can be handled: |
| 44 | // |
| 45 | // * ASCII escape sequences ('\n','\r','\\', etc.) to their ASCII equivalents |
| 46 | // * Octal escape sequences ('\nnn') to byte nnn. The unescaped value must |
| 47 | // resolve to a single byte or an error will occur. E.g. values greater than |
| 48 | // 0xff will produce an error. |
| 49 | // * Hexadecimal escape sequences ('\xnn') to byte nn. While an arbitrary |
| 50 | // number of following digits are allowed, the unescaped value must resolve |
| 51 | // to a single byte or an error will occur. E.g. '\x0045' is equivalent to |
| 52 | // '\x45', but '\x1234' will produce an error. |
| 53 | // * Unicode escape sequences ('\unnnn' for exactly four hex digits or |
| 54 | // '\Unnnnnnnn' for exactly eight hex digits, which will be encoded in |
| 55 | // UTF-8. (E.g., `\u2019` unescapes to the three bytes 0xE2, 0x80, and |
| 56 | // 0x99). |
| 57 | // |
| 58 | // If any errors are encountered, this function returns `false`, leaving the |
| 59 | // `dest` output parameter in an unspecified state, and stores the first |
| 60 | // encountered error in `error`. To disable error reporting, set `error` to |
| 61 | // `nullptr` or use the overload with no error reporting below. |
| 62 | // |
| 63 | // Example: |
| 64 | // |
| 65 | // std::string s = "foo\\rbar\\nbaz\\t"; |
| 66 | // std::string unescaped_s; |
| 67 | // if (!absl::CUnescape(s, &unescaped_s) { |
| 68 | // ... |
| 69 | // } |
| 70 | // EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t"); |
| 71 | bool CUnescape(absl::string_view source, std::string* dest, std::string* error); |
| 72 | |
| 73 | // Overload of `CUnescape()` with no error reporting. |
| 74 | inline bool CUnescape(absl::string_view source, std::string* dest) { |
| 75 | return CUnescape(source, dest, nullptr); |
| 76 | } |
| 77 | |
| 78 | // CEscape() |
| 79 | // |
| 80 | // Escapes a 'src' string using C-style escapes sequences |
| 81 | // (https://en.cppreference.com/w/cpp/language/escape), escaping other |
| 82 | // non-printable/non-whitespace bytes as octal sequences (e.g. "\377"). |
| 83 | // |
| 84 | // Example: |
| 85 | // |
| 86 | // std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; |
| 87 | // std::string escaped_s = absl::CEscape(s); |
| 88 | // EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\010\\t\\n\\013\\014\\r\\n"); |
| 89 | std::string CEscape(absl::string_view src); |
| 90 | |
| 91 | // CHexEscape() |
| 92 | // |
| 93 | // Escapes a 'src' string using C-style escape sequences, escaping |
| 94 | // other non-printable/non-whitespace bytes as hexadecimal sequences (e.g. |
| 95 | // "\xFF"). |
| 96 | // |
| 97 | // Example: |
| 98 | // |
| 99 | // std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; |
| 100 | // std::string escaped_s = absl::CHexEscape(s); |
| 101 | // EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\x08\\t\\n\\x0b\\x0c\\r\\n"); |
| 102 | std::string CHexEscape(absl::string_view src); |
| 103 | |
| 104 | // Utf8SafeCEscape() |
| 105 | // |
| 106 | // Escapes a 'src' string using C-style escape sequences, escaping bytes as |
| 107 | // octal sequences, and passing through UTF-8 characters without conversion. |
| 108 | // I.e., when encountering any bytes with their high bit set, this function |
| 109 | // will not escape those values, whether or not they are valid UTF-8. |
| 110 | std::string Utf8SafeCEscape(absl::string_view src); |
| 111 | |
| 112 | // Utf8SafeCHexEscape() |
| 113 | // |
| 114 | // Escapes a 'src' string using C-style escape sequences, escaping bytes as |
| 115 | // hexadecimal sequences, and passing through UTF-8 characters without |
| 116 | // conversion. |
| 117 | std::string Utf8SafeCHexEscape(absl::string_view src); |
| 118 | |
| 119 | // Base64Unescape() |
| 120 | // |
| 121 | // Converts a `src` string encoded in Base64 to its binary equivalent, writing |
| 122 | // it to a `dest` buffer, returning `true` on success. If `src` contains invalid |
| 123 | // characters, `dest` is cleared and returns `false`. |
| 124 | bool Base64Unescape(absl::string_view src, std::string* dest); |
| 125 | |
| 126 | // WebSafeBase64Unescape() |
| 127 | // |
| 128 | // Converts a `src` string encoded in Base64 to its binary equivalent, writing |
| 129 | // it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'. |
| 130 | // If `src` contains invalid characters, `dest` is cleared and returns `false`. |
| 131 | bool WebSafeBase64Unescape(absl::string_view src, std::string* dest); |
| 132 | |
| 133 | // Base64Escape() |
| 134 | // |
| 135 | // Encodes a `src` string into a base64-encoded string, with padding characters. |
| 136 | // This function conforms with RFC 4648 section 4 (base64). |
| 137 | void Base64Escape(absl::string_view src, std::string* dest); |
| 138 | std::string Base64Escape(absl::string_view src); |
| 139 | |
| 140 | // WebSafeBase64Escape() |
| 141 | // |
| 142 | // Encodes a `src` string into a base64-like string, using '-' instead of '+' |
| 143 | // and '_' instead of '/', and without padding. This function conforms with RFC |
| 144 | // 4648 section 5 (base64url). |
| 145 | void WebSafeBase64Escape(absl::string_view src, std::string* dest); |
| 146 | std::string WebSafeBase64Escape(absl::string_view src); |
| 147 | |
| 148 | // HexStringToBytes() |
| 149 | // |
| 150 | // Converts an ASCII hex string into bytes, returning binary data of length |
| 151 | // `from.size()/2`. |
| 152 | std::string HexStringToBytes(absl::string_view from); |
| 153 | |
| 154 | // BytesToHexString() |
| 155 | // |
| 156 | // Converts binary data into an ASCII text string, returning a string of size |
| 157 | // `2*from.size()`. |
| 158 | std::string BytesToHexString(absl::string_view from); |
| 159 | |
| 160 | } // namespace absl |
| 161 | |
| 162 | #endif // ABSL_STRINGS_ESCAPING_H_ |