blob: afce3a6e3421769a59f0b49879f9789c14470fa5 [file] [log] [blame]
Austin Schuh812d0d12021-11-04 20:16:48 -07001// Copyright (c) FIRST and other WPILib contributors.
2// Open Source Software; you can modify and/or share it under the terms of
3// the WPILib BSD license file in the root directory of this project.
Brian Silverman8fce7482020-01-05 13:18:21 -08004
5#include "wpi/HttpUtil.h"
6
7#include <cctype>
8
Austin Schuh812d0d12021-11-04 20:16:48 -07009#include "fmt/format.h"
Brian Silverman8fce7482020-01-05 13:18:21 -080010#include "wpi/Base64.h"
Brian Silverman8fce7482020-01-05 13:18:21 -080011#include "wpi/StringExtras.h"
12#include "wpi/TCPConnector.h"
13#include "wpi/raw_ostream.h"
14
15namespace wpi {
16
Austin Schuh812d0d12021-11-04 20:16:48 -070017std::string_view UnescapeURI(std::string_view str, SmallVectorImpl<char>& buf,
18 bool* error) {
Brian Silverman8fce7482020-01-05 13:18:21 -080019 buf.clear();
Austin Schuh812d0d12021-11-04 20:16:48 -070020 for (auto i = str.begin(), end = str.end(); i != end; ++i) {
Brian Silverman8fce7482020-01-05 13:18:21 -080021 // pass non-escaped characters to output
22 if (*i != '%') {
23 // decode + to space
Austin Schuh812d0d12021-11-04 20:16:48 -070024 if (*i == '+') {
Brian Silverman8fce7482020-01-05 13:18:21 -080025 buf.push_back(' ');
Austin Schuh812d0d12021-11-04 20:16:48 -070026 } else {
Brian Silverman8fce7482020-01-05 13:18:21 -080027 buf.push_back(*i);
Austin Schuh812d0d12021-11-04 20:16:48 -070028 }
Brian Silverman8fce7482020-01-05 13:18:21 -080029 continue;
30 }
31
32 // are there enough characters left?
33 if (i + 2 >= end) {
34 *error = true;
Austin Schuh812d0d12021-11-04 20:16:48 -070035 return {};
Brian Silverman8fce7482020-01-05 13:18:21 -080036 }
37
38 // replace %xx with the corresponding character
39 unsigned val1 = hexDigitValue(*++i);
40 if (val1 == -1U) {
41 *error = true;
Austin Schuh812d0d12021-11-04 20:16:48 -070042 return {};
Brian Silverman8fce7482020-01-05 13:18:21 -080043 }
44 unsigned val2 = hexDigitValue(*++i);
45 if (val2 == -1U) {
46 *error = true;
Austin Schuh812d0d12021-11-04 20:16:48 -070047 return {};
Brian Silverman8fce7482020-01-05 13:18:21 -080048 }
49 buf.push_back((val1 << 4) | val2);
50 }
51
52 *error = false;
Austin Schuh812d0d12021-11-04 20:16:48 -070053 return {buf.data(), buf.size()};
Brian Silverman8fce7482020-01-05 13:18:21 -080054}
55
Austin Schuh812d0d12021-11-04 20:16:48 -070056std::string_view EscapeURI(std::string_view str, SmallVectorImpl<char>& buf,
57 bool spacePlus) {
Brian Silverman8fce7482020-01-05 13:18:21 -080058 static const char* const hexLut = "0123456789ABCDEF";
59
Brian Silverman8fce7482020-01-05 13:18:21 -080060 buf.clear();
Austin Schuh812d0d12021-11-04 20:16:48 -070061 for (auto i = str.begin(), end = str.end(); i != end; ++i) {
Brian Silverman8fce7482020-01-05 13:18:21 -080062 // pass unreserved characters to output
63 if (std::isalnum(*i) || *i == '-' || *i == '_' || *i == '.' || *i == '~') {
64 buf.push_back(*i);
65 continue;
66 }
67
68 // encode space to +
69 if (spacePlus && *i == ' ') {
70 buf.push_back('+');
71 continue;
72 }
73
74 // convert others to %xx
75 buf.push_back('%');
76 buf.push_back(hexLut[((*i) >> 4) & 0x0f]);
77 buf.push_back(hexLut[(*i) & 0x0f]);
78 }
79
Austin Schuh812d0d12021-11-04 20:16:48 -070080 return {buf.data(), buf.size()};
Brian Silverman8fce7482020-01-05 13:18:21 -080081}
82
Austin Schuh812d0d12021-11-04 20:16:48 -070083HttpQueryMap::HttpQueryMap(std::string_view query) {
84 SmallVector<std::string_view, 16> queryElems;
85 split(query, queryElems, '&', 100, false);
Austin Schuh1e69f942020-11-14 15:06:14 -080086 for (auto elem : queryElems) {
Austin Schuh812d0d12021-11-04 20:16:48 -070087 auto [nameEsc, valueEsc] = split(elem, '=');
88 SmallString<64> nameBuf;
Austin Schuh1e69f942020-11-14 15:06:14 -080089 bool err = false;
90 auto name = wpi::UnescapeURI(nameEsc, nameBuf, &err);
91 // note: ignores duplicates
Austin Schuh812d0d12021-11-04 20:16:48 -070092 if (!err) {
93 m_elems.try_emplace(name, valueEsc);
94 }
Austin Schuh1e69f942020-11-14 15:06:14 -080095 }
96}
97
Austin Schuh812d0d12021-11-04 20:16:48 -070098std::optional<std::string_view> HttpQueryMap::Get(
99 std::string_view name, wpi::SmallVectorImpl<char>& buf) const {
Austin Schuh1e69f942020-11-14 15:06:14 -0800100 auto it = m_elems.find(name);
Austin Schuh812d0d12021-11-04 20:16:48 -0700101 if (it == m_elems.end()) {
102 return {};
103 }
Austin Schuh1e69f942020-11-14 15:06:14 -0800104 bool err = false;
105 auto val = wpi::UnescapeURI(it->second, buf, &err);
Austin Schuh812d0d12021-11-04 20:16:48 -0700106 if (err) {
107 return {};
108 }
Austin Schuh1e69f942020-11-14 15:06:14 -0800109 return val;
110}
111
Austin Schuh812d0d12021-11-04 20:16:48 -0700112HttpPath::HttpPath(std::string_view path) {
Austin Schuh1e69f942020-11-14 15:06:14 -0800113 // special-case root path to be a single empty element
114 if (path == "/") {
115 m_pathEnds.emplace_back(0);
116 return;
117 }
Austin Schuh812d0d12021-11-04 20:16:48 -0700118 wpi::SmallVector<std::string_view, 16> pathElems;
119 split(path, pathElems, '/', 100, false);
Austin Schuh1e69f942020-11-14 15:06:14 -0800120 for (auto elem : pathElems) {
Austin Schuh812d0d12021-11-04 20:16:48 -0700121 SmallString<64> buf;
Austin Schuh1e69f942020-11-14 15:06:14 -0800122 bool err = false;
123 auto val = wpi::UnescapeURI(elem, buf, &err);
124 if (err) {
125 m_pathEnds.clear();
126 return;
127 }
128 m_pathBuf += val;
129 m_pathEnds.emplace_back(m_pathBuf.size());
130 }
131}
132
Austin Schuh812d0d12021-11-04 20:16:48 -0700133bool HttpPath::startswith(size_t start,
134 span<const std::string_view> match) const {
135 if (m_pathEnds.size() < (start + match.size())) {
136 return false;
137 }
Austin Schuh1e69f942020-11-14 15:06:14 -0800138 bool first = start == 0;
139 auto p = m_pathEnds.begin() + start;
140 for (auto m : match) {
Austin Schuh812d0d12021-11-04 20:16:48 -0700141 auto val = slice(m_pathBuf, first ? 0 : *(p - 1), *p);
142 if (val != m) {
143 return false;
144 }
Austin Schuh1e69f942020-11-14 15:06:14 -0800145 first = false;
146 ++p;
147 }
148 return true;
149}
150
Austin Schuh812d0d12021-11-04 20:16:48 -0700151std::string_view HttpPath::operator[](size_t n) const {
152 return slice(m_pathBuf, n == 0 ? 0 : m_pathEnds[n - 1], m_pathEnds[n]);
153}
154
Brian Silverman8fce7482020-01-05 13:18:21 -0800155bool ParseHttpHeaders(raw_istream& is, SmallVectorImpl<char>* contentType,
156 SmallVectorImpl<char>* contentLength) {
Austin Schuh812d0d12021-11-04 20:16:48 -0700157 if (contentType) {
158 contentType->clear();
159 }
160 if (contentLength) {
161 contentLength->clear();
162 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800163
164 bool inContentType = false;
165 bool inContentLength = false;
166 SmallString<64> lineBuf;
167 for (;;) {
Austin Schuh812d0d12021-11-04 20:16:48 -0700168 std::string_view line = rtrim(is.getline(lineBuf, 1024));
169 if (is.has_error()) {
170 return false;
171 }
172 if (line.empty()) {
173 return true; // empty line signals end of headers
174 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800175
176 // header fields start at the beginning of the line
177 if (!std::isspace(line[0])) {
178 inContentType = false;
179 inContentLength = false;
Austin Schuh812d0d12021-11-04 20:16:48 -0700180 std::string_view field;
181 std::tie(field, line) = split(line, ':');
182 field = rtrim(field);
183 if (equals_lower(field, "content-type")) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800184 inContentType = true;
Austin Schuh812d0d12021-11-04 20:16:48 -0700185 } else if (equals_lower(field, "content-length")) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800186 inContentLength = true;
Austin Schuh812d0d12021-11-04 20:16:48 -0700187 } else {
Brian Silverman8fce7482020-01-05 13:18:21 -0800188 continue; // ignore other fields
Austin Schuh812d0d12021-11-04 20:16:48 -0700189 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800190 }
191
192 // collapse whitespace
Austin Schuh812d0d12021-11-04 20:16:48 -0700193 line = ltrim(line);
Brian Silverman8fce7482020-01-05 13:18:21 -0800194
195 // save field data
Austin Schuh812d0d12021-11-04 20:16:48 -0700196 if (inContentType && contentType) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800197 contentType->append(line.begin(), line.end());
Austin Schuh812d0d12021-11-04 20:16:48 -0700198 } else if (inContentLength && contentLength) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800199 contentLength->append(line.begin(), line.end());
Austin Schuh812d0d12021-11-04 20:16:48 -0700200 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800201 }
202}
203
Austin Schuh812d0d12021-11-04 20:16:48 -0700204bool FindMultipartBoundary(raw_istream& is, std::string_view boundary,
Brian Silverman8fce7482020-01-05 13:18:21 -0800205 std::string* saveBuf) {
206 SmallString<64> searchBuf;
207 searchBuf.resize(boundary.size() + 2);
208 size_t searchPos = 0;
209
210 // Per the spec, the --boundary should be preceded by \r\n, so do a first
211 // pass of 1-byte reads to throw those away (common case) and keep the
212 // last non-\r\n character in searchBuf.
213 if (!saveBuf) {
214 do {
215 is.read(searchBuf.data(), 1);
Austin Schuh812d0d12021-11-04 20:16:48 -0700216 if (is.has_error()) {
217 return false;
218 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800219 } while (searchBuf[0] == '\r' || searchBuf[0] == '\n');
220 searchPos = 1;
221 }
222
223 // Look for --boundary. Read boundarysize+2 bytes at a time
224 // during the search to speed up the reads, then fast-scan for -,
225 // and only then match the entire boundary. This will be slow if
226 // there's a bunch of continuous -'s in the output, but that's unlikely.
227 for (;;) {
228 is.read(searchBuf.data() + searchPos, searchBuf.size() - searchPos);
Austin Schuh812d0d12021-11-04 20:16:48 -0700229 if (is.has_error()) {
230 return false;
231 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800232
233 // Did we find the boundary?
234 if (searchBuf[0] == '-' && searchBuf[1] == '-' &&
Austin Schuh812d0d12021-11-04 20:16:48 -0700235 searchBuf.substr(2) == boundary) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800236 return true;
Austin Schuh812d0d12021-11-04 20:16:48 -0700237 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800238
239 // Fast-scan for '-'
240 size_t pos = searchBuf.find('-', searchBuf[0] == '-' ? 1 : 0);
Austin Schuh812d0d12021-11-04 20:16:48 -0700241 if (pos == std::string_view::npos) {
242 if (saveBuf) {
243 saveBuf->append(searchBuf.data(), searchBuf.size());
244 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800245 } else {
Austin Schuh812d0d12021-11-04 20:16:48 -0700246 if (saveBuf) {
247 saveBuf->append(searchBuf.data(), pos);
248 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800249
250 // move '-' and following to start of buffer (next read will fill)
251 std::memmove(searchBuf.data(), searchBuf.data() + pos,
252 searchBuf.size() - pos);
253 searchPos = searchBuf.size() - pos;
254 }
255 }
256}
257
Austin Schuh812d0d12021-11-04 20:16:48 -0700258HttpLocation::HttpLocation(std::string_view url_, bool* error,
Brian Silverman8fce7482020-01-05 13:18:21 -0800259 std::string* errorMsg)
Austin Schuh812d0d12021-11-04 20:16:48 -0700260 : url{url_} {
Brian Silverman8fce7482020-01-05 13:18:21 -0800261 // Split apart into components
Austin Schuh812d0d12021-11-04 20:16:48 -0700262 std::string_view query{url};
Brian Silverman8fce7482020-01-05 13:18:21 -0800263
264 // scheme:
Austin Schuh812d0d12021-11-04 20:16:48 -0700265 std::string_view scheme;
266 std::tie(scheme, query) = split(query, ':');
267 if (!equals_lower(scheme, "http")) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800268 *errorMsg = "only supports http URLs";
269 *error = true;
270 return;
271 }
272
273 // "//"
Austin Schuh812d0d12021-11-04 20:16:48 -0700274 if (!starts_with(query, "//")) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800275 *errorMsg = "expected http://...";
276 *error = true;
277 return;
278 }
Austin Schuh812d0d12021-11-04 20:16:48 -0700279 query.remove_prefix(2);
Brian Silverman8fce7482020-01-05 13:18:21 -0800280
281 // user:password@host:port/
Austin Schuh812d0d12021-11-04 20:16:48 -0700282 std::string_view authority;
283 std::tie(authority, query) = split(query, '/');
Brian Silverman8fce7482020-01-05 13:18:21 -0800284
Austin Schuh812d0d12021-11-04 20:16:48 -0700285 auto [userpass, hostport] = split(authority, '@');
Brian Silverman8fce7482020-01-05 13:18:21 -0800286 // split leaves the RHS empty if the split char isn't present...
287 if (hostport.empty()) {
288 hostport = userpass;
Austin Schuh812d0d12021-11-04 20:16:48 -0700289 userpass = {};
Brian Silverman8fce7482020-01-05 13:18:21 -0800290 }
291
292 if (!userpass.empty()) {
Austin Schuh812d0d12021-11-04 20:16:48 -0700293 auto [rawUser, rawPassword] = split(userpass, ':');
Brian Silverman8fce7482020-01-05 13:18:21 -0800294 SmallString<64> userBuf, passBuf;
295 user = UnescapeURI(rawUser, userBuf, error);
296 if (*error) {
Austin Schuh812d0d12021-11-04 20:16:48 -0700297 *errorMsg = fmt::format("could not unescape user \"{}\"", rawUser);
Brian Silverman8fce7482020-01-05 13:18:21 -0800298 return;
299 }
300 password = UnescapeURI(rawPassword, passBuf, error);
301 if (*error) {
Austin Schuh812d0d12021-11-04 20:16:48 -0700302 *errorMsg =
303 fmt::format("could not unescape password \"{}\"", rawPassword);
Brian Silverman8fce7482020-01-05 13:18:21 -0800304 return;
305 }
306 }
307
Austin Schuh812d0d12021-11-04 20:16:48 -0700308 std::string_view portStr;
309 std::tie(host, portStr) = rsplit(hostport, ':');
Brian Silverman8fce7482020-01-05 13:18:21 -0800310 if (host.empty()) {
311 *errorMsg = "host is empty";
312 *error = true;
313 return;
314 }
315 if (portStr.empty()) {
316 port = 80;
Austin Schuh812d0d12021-11-04 20:16:48 -0700317 } else if (auto p = parse_integer<int>(portStr, 10)) {
318 port = p.value();
319 } else {
320 *errorMsg = fmt::format("port \"{}\" is not an integer", portStr);
Brian Silverman8fce7482020-01-05 13:18:21 -0800321 *error = true;
322 return;
323 }
324
325 // path?query#fragment
Austin Schuh812d0d12021-11-04 20:16:48 -0700326 std::tie(query, fragment) = split(query, '#');
327 std::tie(path, query) = split(query, '?');
Brian Silverman8fce7482020-01-05 13:18:21 -0800328
329 // Split query string into parameters
330 while (!query.empty()) {
331 // split out next param and value
Austin Schuh812d0d12021-11-04 20:16:48 -0700332 std::string_view rawParam, rawValue;
333 std::tie(rawParam, query) = split(query, '&');
334 if (rawParam.empty()) {
335 continue; // ignore "&&"
336 }
337 std::tie(rawParam, rawValue) = split(rawParam, '=');
Brian Silverman8fce7482020-01-05 13:18:21 -0800338
339 // unescape param
340 *error = false;
341 SmallString<64> paramBuf;
Austin Schuh812d0d12021-11-04 20:16:48 -0700342 std::string_view param = UnescapeURI(rawParam, paramBuf, error);
Brian Silverman8fce7482020-01-05 13:18:21 -0800343 if (*error) {
Austin Schuh812d0d12021-11-04 20:16:48 -0700344 *errorMsg = fmt::format("could not unescape parameter \"{}\"", rawParam);
Brian Silverman8fce7482020-01-05 13:18:21 -0800345 return;
346 }
347
348 // unescape value
349 SmallString<64> valueBuf;
Austin Schuh812d0d12021-11-04 20:16:48 -0700350 std::string_view value = UnescapeURI(rawValue, valueBuf, error);
Brian Silverman8fce7482020-01-05 13:18:21 -0800351 if (*error) {
Austin Schuh812d0d12021-11-04 20:16:48 -0700352 *errorMsg = fmt::format("could not unescape value \"{}\"", rawValue);
Brian Silverman8fce7482020-01-05 13:18:21 -0800353 return;
354 }
355
356 params.emplace_back(std::make_pair(param, value));
357 }
358
359 *error = false;
360}
361
362void HttpRequest::SetAuth(const HttpLocation& loc) {
363 if (!loc.user.empty()) {
364 SmallString<64> userpass;
365 userpass += loc.user;
366 userpass += ':';
367 userpass += loc.password;
Austin Schuh812d0d12021-11-04 20:16:48 -0700368 Base64Encode(userpass.str(), &auth);
Brian Silverman8fce7482020-01-05 13:18:21 -0800369 }
370}
371
372bool HttpConnection::Handshake(const HttpRequest& request,
373 std::string* warnMsg) {
374 // send GET request
375 os << "GET /" << request.path << " HTTP/1.1\r\n";
376 os << "Host: " << request.host << "\r\n";
Austin Schuh812d0d12021-11-04 20:16:48 -0700377 if (!request.auth.empty()) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800378 os << "Authorization: Basic " << request.auth << "\r\n";
Austin Schuh812d0d12021-11-04 20:16:48 -0700379 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800380 os << "\r\n";
381 os.flush();
382
383 // read first line of response
384 SmallString<64> lineBuf;
Austin Schuh812d0d12021-11-04 20:16:48 -0700385 std::string_view line = rtrim(is.getline(lineBuf, 1024));
Brian Silverman8fce7482020-01-05 13:18:21 -0800386 if (is.has_error()) {
387 *warnMsg = "disconnected before response";
388 return false;
389 }
390
391 // see if we got a HTTP 200 response
Austin Schuh812d0d12021-11-04 20:16:48 -0700392 std::string_view httpver, code, codeText;
393 std::tie(httpver, line) = split(line, ' ');
394 std::tie(code, codeText) = split(line, ' ');
395 if (!starts_with(httpver, "HTTP")) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800396 *warnMsg = "did not receive HTTP response";
397 return false;
398 }
399 if (code != "200") {
Austin Schuh812d0d12021-11-04 20:16:48 -0700400 *warnMsg = fmt::format("received {} {} response", code, codeText);
Brian Silverman8fce7482020-01-05 13:18:21 -0800401 return false;
402 }
403
404 // Parse headers
405 if (!ParseHttpHeaders(is, &contentType, &contentLength)) {
406 *warnMsg = "disconnected during headers";
407 return false;
408 }
409
410 return true;
411}
412
Austin Schuh812d0d12021-11-04 20:16:48 -0700413void HttpMultipartScanner::SetBoundary(std::string_view boundary) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800414 m_boundaryWith = "\n--";
415 m_boundaryWith += boundary;
416 m_boundaryWithout = "\n";
417 m_boundaryWithout += boundary;
418 m_dashes = kUnknown;
419}
420
421void HttpMultipartScanner::Reset(bool saveSkipped) {
422 m_saveSkipped = saveSkipped;
423 m_state = kBoundary;
424 m_posWith = 0;
425 m_posWithout = 0;
426 m_buf.resize(0);
427}
428
Austin Schuh812d0d12021-11-04 20:16:48 -0700429std::string_view HttpMultipartScanner::Execute(std::string_view in) {
430 if (m_state == kDone) {
431 Reset(m_saveSkipped);
432 }
433 if (m_saveSkipped) {
434 m_buf += in;
435 }
Brian Silverman8fce7482020-01-05 13:18:21 -0800436
437 size_t pos = 0;
438 if (m_state == kBoundary) {
439 for (char ch : in) {
440 ++pos;
441 if (m_dashes != kWithout) {
442 if (ch == m_boundaryWith[m_posWith]) {
443 ++m_posWith;
444 if (m_posWith == m_boundaryWith.size()) {
445 // Found the boundary; transition to padding
446 m_state = kPadding;
447 m_dashes = kWith; // no longer accept plain 'boundary'
448 break;
449 }
450 } else if (ch == m_boundaryWith[0]) {
451 m_posWith = 1;
452 } else {
453 m_posWith = 0;
454 }
455 }
456
457 if (m_dashes != kWith) {
458 if (ch == m_boundaryWithout[m_posWithout]) {
459 ++m_posWithout;
460 if (m_posWithout == m_boundaryWithout.size()) {
461 // Found the boundary; transition to padding
462 m_state = kPadding;
463 m_dashes = kWithout; // no longer accept '--boundary'
464 break;
465 }
466 } else if (ch == m_boundaryWithout[0]) {
467 m_posWithout = 1;
468 } else {
469 m_posWithout = 0;
470 }
471 }
472 }
473 }
474
475 if (m_state == kPadding) {
Austin Schuh812d0d12021-11-04 20:16:48 -0700476 for (char ch : drop_front(in, pos)) {
Brian Silverman8fce7482020-01-05 13:18:21 -0800477 ++pos;
478 if (ch == '\n') {
479 // Found the LF; return remaining input buffer (following it)
480 m_state = kDone;
Austin Schuh812d0d12021-11-04 20:16:48 -0700481 if (m_saveSkipped) {
482 m_buf.resize(m_buf.size() - in.size() + pos);
483 }
484 return drop_front(in, pos);
Brian Silverman8fce7482020-01-05 13:18:21 -0800485 }
486 }
487 }
488
489 // We consumed the entire input
Austin Schuh812d0d12021-11-04 20:16:48 -0700490 return {};
Brian Silverman8fce7482020-01-05 13:18:21 -0800491}
492
493} // namespace wpi