Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 1 | // Copyright 2020 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "absl/strings/internal/escaping.h" |
| 16 | |
| 17 | #include "absl/base/internal/endian.h" |
| 18 | #include "absl/base/internal/raw_logging.h" |
| 19 | |
| 20 | namespace absl { |
| 21 | ABSL_NAMESPACE_BEGIN |
| 22 | namespace strings_internal { |
| 23 | |
| 24 | const char kBase64Chars[] = |
| 25 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
| 26 | |
| 27 | size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { |
| 28 | // Base64 encodes three bytes of input at a time. If the input is not |
| 29 | // divisible by three, we pad as appropriate. |
| 30 | // |
| 31 | // (from https://tools.ietf.org/html/rfc3548) |
| 32 | // Special processing is performed if fewer than 24 bits are available |
| 33 | // at the end of the data being encoded. A full encoding quantum is |
| 34 | // always completed at the end of a quantity. When fewer than 24 input |
| 35 | // bits are available in an input group, zero bits are added (on the |
| 36 | // right) to form an integral number of 6-bit groups. Padding at the |
| 37 | // end of the data is performed using the '=' character. Since all base |
| 38 | // 64 input is an integral number of octets, only the following cases |
| 39 | // can arise: |
| 40 | |
| 41 | // Base64 encodes each three bytes of input into four bytes of output. |
| 42 | size_t len = (input_len / 3) * 4; |
| 43 | |
| 44 | if (input_len % 3 == 0) { |
| 45 | // (from https://tools.ietf.org/html/rfc3548) |
| 46 | // (1) the final quantum of encoding input is an integral multiple of 24 |
| 47 | // bits; here, the final unit of encoded output will be an integral |
| 48 | // multiple of 4 characters with no "=" padding, |
| 49 | } else if (input_len % 3 == 1) { |
| 50 | // (from https://tools.ietf.org/html/rfc3548) |
| 51 | // (2) the final quantum of encoding input is exactly 8 bits; here, the |
| 52 | // final unit of encoded output will be two characters followed by two |
| 53 | // "=" padding characters, or |
| 54 | len += 2; |
| 55 | if (do_padding) { |
| 56 | len += 2; |
| 57 | } |
| 58 | } else { // (input_len % 3 == 2) |
| 59 | // (from https://tools.ietf.org/html/rfc3548) |
| 60 | // (3) the final quantum of encoding input is exactly 16 bits; here, the |
| 61 | // final unit of encoded output will be three characters followed by one |
| 62 | // "=" padding character. |
| 63 | len += 3; |
| 64 | if (do_padding) { |
| 65 | len += 1; |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | assert(len >= input_len); // make sure we didn't overflow |
| 70 | return len; |
| 71 | } |
| 72 | |
| 73 | size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, |
| 74 | size_t szdest, const char* base64, |
| 75 | bool do_padding) { |
| 76 | static const char kPad64 = '='; |
| 77 | |
| 78 | if (szsrc * 4 > szdest * 3) return 0; |
| 79 | |
| 80 | char* cur_dest = dest; |
| 81 | const unsigned char* cur_src = src; |
| 82 | |
| 83 | char* const limit_dest = dest + szdest; |
| 84 | const unsigned char* const limit_src = src + szsrc; |
| 85 | |
| 86 | // Three bytes of data encodes to four characters of cyphertext. |
| 87 | // So we can pump through three-byte chunks atomically. |
| 88 | if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. |
| 89 | while (cur_src < limit_src - 3) { // While we have >= 32 bits. |
| 90 | uint32_t in = absl::big_endian::Load32(cur_src) >> 8; |
| 91 | |
| 92 | cur_dest[0] = base64[in >> 18]; |
| 93 | in &= 0x3FFFF; |
| 94 | cur_dest[1] = base64[in >> 12]; |
| 95 | in &= 0xFFF; |
| 96 | cur_dest[2] = base64[in >> 6]; |
| 97 | in &= 0x3F; |
| 98 | cur_dest[3] = base64[in]; |
| 99 | |
| 100 | cur_dest += 4; |
| 101 | cur_src += 3; |
| 102 | } |
| 103 | } |
| 104 | // To save time, we didn't update szdest or szsrc in the loop. So do it now. |
| 105 | szdest = limit_dest - cur_dest; |
| 106 | szsrc = limit_src - cur_src; |
| 107 | |
| 108 | /* now deal with the tail (<=3 bytes) */ |
| 109 | switch (szsrc) { |
| 110 | case 0: |
| 111 | // Nothing left; nothing more to do. |
| 112 | break; |
| 113 | case 1: { |
| 114 | // One byte left: this encodes to two characters, and (optionally) |
| 115 | // two pad characters to round out the four-character cypherblock. |
| 116 | if (szdest < 2) return 0; |
| 117 | uint32_t in = cur_src[0]; |
| 118 | cur_dest[0] = base64[in >> 2]; |
| 119 | in &= 0x3; |
| 120 | cur_dest[1] = base64[in << 4]; |
| 121 | cur_dest += 2; |
| 122 | szdest -= 2; |
| 123 | if (do_padding) { |
| 124 | if (szdest < 2) return 0; |
| 125 | cur_dest[0] = kPad64; |
| 126 | cur_dest[1] = kPad64; |
| 127 | cur_dest += 2; |
| 128 | szdest -= 2; |
| 129 | } |
| 130 | break; |
| 131 | } |
| 132 | case 2: { |
| 133 | // Two bytes left: this encodes to three characters, and (optionally) |
| 134 | // one pad character to round out the four-character cypherblock. |
| 135 | if (szdest < 3) return 0; |
| 136 | uint32_t in = absl::big_endian::Load16(cur_src); |
| 137 | cur_dest[0] = base64[in >> 10]; |
| 138 | in &= 0x3FF; |
| 139 | cur_dest[1] = base64[in >> 4]; |
| 140 | in &= 0x00F; |
| 141 | cur_dest[2] = base64[in << 2]; |
| 142 | cur_dest += 3; |
| 143 | szdest -= 3; |
| 144 | if (do_padding) { |
| 145 | if (szdest < 1) return 0; |
| 146 | cur_dest[0] = kPad64; |
| 147 | cur_dest += 1; |
| 148 | szdest -= 1; |
| 149 | } |
| 150 | break; |
| 151 | } |
| 152 | case 3: { |
| 153 | // Three bytes left: same as in the big loop above. We can't do this in |
| 154 | // the loop because the loop above always reads 4 bytes, and the fourth |
| 155 | // byte is past the end of the input. |
| 156 | if (szdest < 4) return 0; |
| 157 | uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); |
| 158 | cur_dest[0] = base64[in >> 18]; |
| 159 | in &= 0x3FFFF; |
| 160 | cur_dest[1] = base64[in >> 12]; |
| 161 | in &= 0xFFF; |
| 162 | cur_dest[2] = base64[in >> 6]; |
| 163 | in &= 0x3F; |
| 164 | cur_dest[3] = base64[in]; |
| 165 | cur_dest += 4; |
| 166 | szdest -= 4; |
| 167 | break; |
| 168 | } |
| 169 | default: |
| 170 | // Should not be reached: blocks of 4 bytes are handled |
| 171 | // in the while loop before this switch statement. |
| 172 | ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); |
| 173 | break; |
| 174 | } |
| 175 | return (cur_dest - dest); |
| 176 | } |
| 177 | |
| 178 | } // namespace strings_internal |
| 179 | ABSL_NAMESPACE_END |
| 180 | } // namespace absl |