blob: 12ff0841bfe7f486411456eb7ac1333c539f3564 [file] [log] [blame]
James Kuszmaulcf324122023-01-14 14:07:17 -08001// Copyright (c) FIRST and other WPILib contributors.
2// Open Source Software; you can modify and/or share it under the terms of
3// the WPILib BSD license file in the root directory of this project.
4
5#include "wpinet/HttpUtil.h"
6
7#include <cctype>
8
9#include <fmt/format.h>
10#include <wpi/Base64.h>
11#include <wpi/StringExtras.h>
12#include <wpi/raw_ostream.h>
13
14#include "wpinet/TCPConnector.h"
15
16namespace wpi {
17
18std::string_view UnescapeURI(std::string_view str, SmallVectorImpl<char>& buf,
19 bool* error) {
20 buf.clear();
21 for (auto i = str.begin(), end = str.end(); i != end; ++i) {
22 // pass non-escaped characters to output
23 if (*i != '%') {
24 // decode + to space
25 if (*i == '+') {
26 buf.push_back(' ');
27 } else {
28 buf.push_back(*i);
29 }
30 continue;
31 }
32
33 // are there enough characters left?
34 if (i + 2 >= end) {
35 *error = true;
36 return {};
37 }
38
39 // replace %xx with the corresponding character
40 unsigned val1 = hexDigitValue(*++i);
41 if (val1 == -1U) {
42 *error = true;
43 return {};
44 }
45 unsigned val2 = hexDigitValue(*++i);
46 if (val2 == -1U) {
47 *error = true;
48 return {};
49 }
50 buf.push_back((val1 << 4) | val2);
51 }
52
53 *error = false;
54 return {buf.data(), buf.size()};
55}
56
57std::string_view EscapeURI(std::string_view str, SmallVectorImpl<char>& buf,
58 bool spacePlus) {
59 static const char* const hexLut = "0123456789ABCDEF";
60
61 buf.clear();
62 for (auto i = str.begin(), end = str.end(); i != end; ++i) {
63 // pass unreserved characters to output
64 if (std::isalnum(*i) || *i == '-' || *i == '_' || *i == '.' || *i == '~') {
65 buf.push_back(*i);
66 continue;
67 }
68
69 // encode space to +
70 if (spacePlus && *i == ' ') {
71 buf.push_back('+');
72 continue;
73 }
74
75 // convert others to %xx
76 buf.push_back('%');
77 buf.push_back(hexLut[((*i) >> 4) & 0x0f]);
78 buf.push_back(hexLut[(*i) & 0x0f]);
79 }
80
81 return {buf.data(), buf.size()};
82}
83
84HttpQueryMap::HttpQueryMap(std::string_view query) {
85 SmallVector<std::string_view, 16> queryElems;
86 split(query, queryElems, '&', 100, false);
87 for (auto elem : queryElems) {
88 auto [nameEsc, valueEsc] = split(elem, '=');
89 SmallString<64> nameBuf;
90 bool err = false;
91 auto name = wpi::UnescapeURI(nameEsc, nameBuf, &err);
92 // note: ignores duplicates
93 if (!err) {
94 m_elems.try_emplace(name, valueEsc);
95 }
96 }
97}
98
99std::optional<std::string_view> HttpQueryMap::Get(
100 std::string_view name, wpi::SmallVectorImpl<char>& buf) const {
101 auto it = m_elems.find(name);
102 if (it == m_elems.end()) {
103 return {};
104 }
105 bool err = false;
106 auto val = wpi::UnescapeURI(it->second, buf, &err);
107 if (err) {
108 return {};
109 }
110 return val;
111}
112
113HttpPath::HttpPath(std::string_view path) {
114 // special-case root path to be a single empty element
115 if (path == "/") {
116 m_pathEnds.emplace_back(0);
117 return;
118 }
119 wpi::SmallVector<std::string_view, 16> pathElems;
120 split(path, pathElems, '/', 100, false);
121 for (auto elem : pathElems) {
122 SmallString<64> buf;
123 bool err = false;
124 auto val = wpi::UnescapeURI(elem, buf, &err);
125 if (err) {
126 m_pathEnds.clear();
127 return;
128 }
129 m_pathBuf += val;
130 m_pathEnds.emplace_back(m_pathBuf.size());
131 }
132}
133
134bool HttpPath::startswith(size_t start,
135 std::span<const std::string_view> match) const {
136 if (m_pathEnds.size() < (start + match.size())) {
137 return false;
138 }
139 bool first = start == 0;
140 auto p = m_pathEnds.begin() + start;
141 for (auto m : match) {
142 auto val = slice(m_pathBuf, first ? 0 : *(p - 1), *p);
143 if (val != m) {
144 return false;
145 }
146 first = false;
147 ++p;
148 }
149 return true;
150}
151
152std::string_view HttpPath::operator[](size_t n) const {
153 return slice(m_pathBuf, n == 0 ? 0 : m_pathEnds[n - 1], m_pathEnds[n]);
154}
155
156bool ParseHttpHeaders(raw_istream& is, SmallVectorImpl<char>* contentType,
157 SmallVectorImpl<char>* contentLength) {
158 if (contentType) {
159 contentType->clear();
160 }
161 if (contentLength) {
162 contentLength->clear();
163 }
164
165 bool inContentType = false;
166 bool inContentLength = false;
167 SmallString<64> lineBuf;
168 for (;;) {
169 std::string_view line = rtrim(is.getline(lineBuf, 1024));
170 if (is.has_error()) {
171 return false;
172 }
173 if (line.empty()) {
174 return true; // empty line signals end of headers
175 }
176
177 // header fields start at the beginning of the line
178 if (!std::isspace(line[0])) {
179 inContentType = false;
180 inContentLength = false;
181 std::string_view field;
182 std::tie(field, line) = split(line, ':');
183 field = rtrim(field);
184 if (equals_lower(field, "content-type")) {
185 inContentType = true;
186 } else if (equals_lower(field, "content-length")) {
187 inContentLength = true;
188 } else {
189 continue; // ignore other fields
190 }
191 }
192
193 // collapse whitespace
194 line = ltrim(line);
195
196 // save field data
197 if (inContentType && contentType) {
198 contentType->append(line.begin(), line.end());
199 } else if (inContentLength && contentLength) {
200 contentLength->append(line.begin(), line.end());
201 }
202 }
203}
204
205bool FindMultipartBoundary(raw_istream& is, std::string_view boundary,
206 std::string* saveBuf) {
207 SmallString<64> searchBuf;
208 searchBuf.resize(boundary.size() + 2);
209 size_t searchPos = 0;
210
211 // Per the spec, the --boundary should be preceded by \r\n, so do a first
212 // pass of 1-byte reads to throw those away (common case) and keep the
213 // last non-\r\n character in searchBuf.
214 if (!saveBuf) {
215 do {
216 is.read(searchBuf.data(), 1);
217 if (is.has_error()) {
218 return false;
219 }
220 } while (searchBuf[0] == '\r' || searchBuf[0] == '\n');
221 searchPos = 1;
222 }
223
224 // Look for --boundary. Read boundarysize+2 bytes at a time
225 // during the search to speed up the reads, then fast-scan for -,
226 // and only then match the entire boundary. This will be slow if
227 // there's a bunch of continuous -'s in the output, but that's unlikely.
228 for (;;) {
229 is.read(searchBuf.data() + searchPos, searchBuf.size() - searchPos);
230 if (is.has_error()) {
231 return false;
232 }
233
234 // Did we find the boundary?
235 if (searchBuf[0] == '-' && searchBuf[1] == '-' &&
236 wpi::substr(searchBuf, 2) == boundary) {
237 return true;
238 }
239
240 // Fast-scan for '-'
241 size_t pos = searchBuf.find('-', searchBuf[0] == '-' ? 1 : 0);
242 if (pos == std::string_view::npos) {
243 if (saveBuf) {
244 saveBuf->append(searchBuf.data(), searchBuf.size());
245 }
246 } else {
247 if (saveBuf) {
248 saveBuf->append(searchBuf.data(), pos);
249 }
250
251 // move '-' and following to start of buffer (next read will fill)
252 std::memmove(searchBuf.data(), searchBuf.data() + pos,
253 searchBuf.size() - pos);
254 searchPos = searchBuf.size() - pos;
255 }
256 }
257}
258
259HttpLocation::HttpLocation(std::string_view url_, bool* error,
260 std::string* errorMsg)
261 : url{url_} {
262 // Split apart into components
263 std::string_view query{url};
264
265 // scheme:
266 std::string_view scheme;
267 std::tie(scheme, query) = split(query, ':');
268 if (!equals_lower(scheme, "http")) {
269 *errorMsg = "only supports http URLs";
270 *error = true;
271 return;
272 }
273
274 // "//"
275 if (!starts_with(query, "//")) {
276 *errorMsg = "expected http://...";
277 *error = true;
278 return;
279 }
280 query.remove_prefix(2);
281
282 // user:password@host:port/
283 std::string_view authority;
284 std::tie(authority, query) = split(query, '/');
285
286 auto [userpass, hostport] = split(authority, '@');
287 // split leaves the RHS empty if the split char isn't present...
288 if (hostport.empty()) {
289 hostport = userpass;
290 userpass = {};
291 }
292
293 if (!userpass.empty()) {
294 auto [rawUser, rawPassword] = split(userpass, ':');
295 SmallString<64> userBuf, passBuf;
296 user = UnescapeURI(rawUser, userBuf, error);
297 if (*error) {
298 *errorMsg = fmt::format("could not unescape user \"{}\"", rawUser);
299 return;
300 }
301 password = UnescapeURI(rawPassword, passBuf, error);
302 if (*error) {
303 *errorMsg =
304 fmt::format("could not unescape password \"{}\"", rawPassword);
305 return;
306 }
307 }
308
309 std::string_view portStr;
310 std::tie(host, portStr) = rsplit(hostport, ':');
311 if (host.empty()) {
312 *errorMsg = "host is empty";
313 *error = true;
314 return;
315 }
316 if (portStr.empty()) {
317 port = 80;
318 } else if (auto p = parse_integer<int>(portStr, 10)) {
319 port = p.value();
320 } else {
321 *errorMsg = fmt::format("port \"{}\" is not an integer", portStr);
322 *error = true;
323 return;
324 }
325
326 // path?query#fragment
327 std::tie(query, fragment) = split(query, '#');
328 std::tie(path, query) = split(query, '?');
329
330 // Split query string into parameters
331 while (!query.empty()) {
332 // split out next param and value
333 std::string_view rawParam, rawValue;
334 std::tie(rawParam, query) = split(query, '&');
335 if (rawParam.empty()) {
336 continue; // ignore "&&"
337 }
338 std::tie(rawParam, rawValue) = split(rawParam, '=');
339
340 // unescape param
341 *error = false;
342 SmallString<64> paramBuf;
343 std::string_view param = UnescapeURI(rawParam, paramBuf, error);
344 if (*error) {
345 *errorMsg = fmt::format("could not unescape parameter \"{}\"", rawParam);
346 return;
347 }
348
349 // unescape value
350 SmallString<64> valueBuf;
351 std::string_view value = UnescapeURI(rawValue, valueBuf, error);
352 if (*error) {
353 *errorMsg = fmt::format("could not unescape value \"{}\"", rawValue);
354 return;
355 }
356
357 params.emplace_back(std::make_pair(param, value));
358 }
359
360 *error = false;
361}
362
363void HttpRequest::SetAuth(const HttpLocation& loc) {
364 if (!loc.user.empty()) {
365 SmallString<64> userpass;
366 userpass += loc.user;
367 userpass += ':';
368 userpass += loc.password;
369 Base64Encode(userpass.str(), &auth);
370 }
371}
372
373bool HttpConnection::Handshake(const HttpRequest& request,
374 std::string* warnMsg) {
375 // send GET request
376 os << "GET /" << request.path << " HTTP/1.1\r\n";
377 os << "Host: " << request.host << "\r\n";
378 if (!request.auth.empty()) {
379 os << "Authorization: Basic " << request.auth << "\r\n";
380 }
381 os << "\r\n";
382 os.flush();
383
384 // read first line of response
385 SmallString<64> lineBuf;
386 std::string_view line = rtrim(is.getline(lineBuf, 1024));
387 if (is.has_error()) {
388 *warnMsg = "disconnected before response";
389 return false;
390 }
391
392 // see if we got a HTTP 200 response
393 std::string_view httpver, code, codeText;
394 std::tie(httpver, line) = split(line, ' ');
395 std::tie(code, codeText) = split(line, ' ');
396 if (!starts_with(httpver, "HTTP")) {
397 *warnMsg = "did not receive HTTP response";
398 return false;
399 }
400 if (code != "200") {
401 *warnMsg = fmt::format("received {} {} response", code, codeText);
402 return false;
403 }
404
405 // Parse headers
406 if (!ParseHttpHeaders(is, &contentType, &contentLength)) {
407 *warnMsg = "disconnected during headers";
408 return false;
409 }
410
411 return true;
412}
413
414void HttpMultipartScanner::SetBoundary(std::string_view boundary) {
415 m_boundaryWith = "\n--";
416 m_boundaryWith += boundary;
417 m_boundaryWithout = "\n";
418 m_boundaryWithout += boundary;
419 m_dashes = kUnknown;
420}
421
422void HttpMultipartScanner::Reset(bool saveSkipped) {
423 m_saveSkipped = saveSkipped;
424 m_state = kBoundary;
425 m_posWith = 0;
426 m_posWithout = 0;
427 m_buf.resize(0);
428}
429
430std::string_view HttpMultipartScanner::Execute(std::string_view in) {
431 if (m_state == kDone) {
432 Reset(m_saveSkipped);
433 }
434 if (m_saveSkipped) {
435 m_buf += in;
436 }
437
438 size_t pos = 0;
439 if (m_state == kBoundary) {
440 for (char ch : in) {
441 ++pos;
442 if (m_dashes != kWithout) {
443 if (ch == m_boundaryWith[m_posWith]) {
444 ++m_posWith;
445 if (m_posWith == m_boundaryWith.size()) {
446 // Found the boundary; transition to padding
447 m_state = kPadding;
448 m_dashes = kWith; // no longer accept plain 'boundary'
449 break;
450 }
451 } else if (ch == m_boundaryWith[0]) {
452 m_posWith = 1;
453 } else {
454 m_posWith = 0;
455 }
456 }
457
458 if (m_dashes != kWith) {
459 if (ch == m_boundaryWithout[m_posWithout]) {
460 ++m_posWithout;
461 if (m_posWithout == m_boundaryWithout.size()) {
462 // Found the boundary; transition to padding
463 m_state = kPadding;
464 m_dashes = kWithout; // no longer accept '--boundary'
465 break;
466 }
467 } else if (ch == m_boundaryWithout[0]) {
468 m_posWithout = 1;
469 } else {
470 m_posWithout = 0;
471 }
472 }
473 }
474 }
475
476 if (m_state == kPadding) {
477 for (char ch : drop_front(in, pos)) {
478 ++pos;
479 if (ch == '\n') {
480 // Found the LF; return remaining input buffer (following it)
481 m_state = kDone;
482 if (m_saveSkipped) {
483 m_buf.resize(m_buf.size() - in.size() + pos);
484 }
485 return drop_front(in, pos);
486 }
487 }
488 }
489
490 // We consumed the entire input
491 return {};
492}
493
494} // namespace wpi