blob: 866ee1009151b45b5213629a0a7bba8fbb4d9e19 [file] [log] [blame]
Brian Silverman8fce7482020-01-05 13:18:21 -08001/*----------------------------------------------------------------------------*/
Austin Schuh1e69f942020-11-14 15:06:14 -08002/* Copyright (c) 2016-2020 FIRST. All Rights Reserved. */
Brian Silverman8fce7482020-01-05 13:18:21 -08003/* Open Source Software - may be modified and shared by FRC teams. The code */
4/* must be accompanied by the FIRST BSD license file in the root directory of */
5/* the project. */
6/*----------------------------------------------------------------------------*/
7
8#include "wpi/HttpUtil.h"
9
10#include <cctype>
11
12#include "wpi/Base64.h"
13#include "wpi/STLExtras.h"
14#include "wpi/StringExtras.h"
15#include "wpi/TCPConnector.h"
16#include "wpi/raw_ostream.h"
17
18namespace wpi {
19
20StringRef UnescapeURI(const Twine& str, SmallVectorImpl<char>& buf,
21 bool* error) {
22 SmallString<128> strBuf;
23 StringRef strStr = str.toStringRef(strBuf);
24 buf.clear();
25 for (auto i = strStr.begin(), end = strStr.end(); i != end; ++i) {
26 // pass non-escaped characters to output
27 if (*i != '%') {
28 // decode + to space
29 if (*i == '+')
30 buf.push_back(' ');
31 else
32 buf.push_back(*i);
33 continue;
34 }
35
36 // are there enough characters left?
37 if (i + 2 >= end) {
38 *error = true;
39 return StringRef{};
40 }
41
42 // replace %xx with the corresponding character
43 unsigned val1 = hexDigitValue(*++i);
44 if (val1 == -1U) {
45 *error = true;
46 return StringRef{};
47 }
48 unsigned val2 = hexDigitValue(*++i);
49 if (val2 == -1U) {
50 *error = true;
51 return StringRef{};
52 }
53 buf.push_back((val1 << 4) | val2);
54 }
55
56 *error = false;
57 return StringRef{buf.data(), buf.size()};
58}
59
60StringRef EscapeURI(const Twine& str, SmallVectorImpl<char>& buf,
61 bool spacePlus) {
62 static const char* const hexLut = "0123456789ABCDEF";
63
64 SmallString<128> strBuf;
65 StringRef strStr = str.toStringRef(strBuf);
66 buf.clear();
67 for (auto i = strStr.begin(), end = strStr.end(); i != end; ++i) {
68 // pass unreserved characters to output
69 if (std::isalnum(*i) || *i == '-' || *i == '_' || *i == '.' || *i == '~') {
70 buf.push_back(*i);
71 continue;
72 }
73
74 // encode space to +
75 if (spacePlus && *i == ' ') {
76 buf.push_back('+');
77 continue;
78 }
79
80 // convert others to %xx
81 buf.push_back('%');
82 buf.push_back(hexLut[((*i) >> 4) & 0x0f]);
83 buf.push_back(hexLut[(*i) & 0x0f]);
84 }
85
86 return StringRef{buf.data(), buf.size()};
87}
88
Austin Schuh1e69f942020-11-14 15:06:14 -080089HttpQueryMap::HttpQueryMap(wpi::StringRef query) {
90 wpi::SmallVector<wpi::StringRef, 16> queryElems;
91 query.split(queryElems, '&', 100, false);
92 for (auto elem : queryElems) {
93 auto [nameEsc, valueEsc] = elem.split('=');
94 wpi::SmallString<64> nameBuf;
95 bool err = false;
96 auto name = wpi::UnescapeURI(nameEsc, nameBuf, &err);
97 // note: ignores duplicates
98 if (!err) m_elems.try_emplace(name, valueEsc);
99 }
100}
101
102std::optional<wpi::StringRef> HttpQueryMap::Get(
103 wpi::StringRef name, wpi::SmallVectorImpl<char>& buf) const {
104 auto it = m_elems.find(name);
105 if (it == m_elems.end()) return {};
106 bool err = false;
107 auto val = wpi::UnescapeURI(it->second, buf, &err);
108 if (err) return {};
109 return val;
110}
111
112HttpPath::HttpPath(wpi::StringRef path) {
113 // special-case root path to be a single empty element
114 if (path == "/") {
115 m_pathEnds.emplace_back(0);
116 return;
117 }
118 wpi::SmallVector<wpi::StringRef, 16> pathElems;
119 path.split(pathElems, '/', 100, false);
120 for (auto elem : pathElems) {
121 wpi::SmallString<64> buf;
122 bool err = false;
123 auto val = wpi::UnescapeURI(elem, buf, &err);
124 if (err) {
125 m_pathEnds.clear();
126 return;
127 }
128 m_pathBuf += val;
129 m_pathEnds.emplace_back(m_pathBuf.size());
130 }
131}
132
133bool HttpPath::startswith(size_t start, ArrayRef<StringRef> match) const {
134 if (m_pathEnds.size() < (start + match.size())) return false;
135 bool first = start == 0;
136 auto p = m_pathEnds.begin() + start;
137 for (auto m : match) {
138 auto val = m_pathBuf.slice(first ? 0 : *(p - 1), *p);
139 if (val != m) return false;
140 first = false;
141 ++p;
142 }
143 return true;
144}
145
Brian Silverman8fce7482020-01-05 13:18:21 -0800146bool ParseHttpHeaders(raw_istream& is, SmallVectorImpl<char>* contentType,
147 SmallVectorImpl<char>* contentLength) {
148 if (contentType) contentType->clear();
149 if (contentLength) contentLength->clear();
150
151 bool inContentType = false;
152 bool inContentLength = false;
153 SmallString<64> lineBuf;
154 for (;;) {
155 StringRef line = is.getline(lineBuf, 1024).rtrim();
156 if (is.has_error()) return false;
157 if (line.empty()) return true; // empty line signals end of headers
158
159 // header fields start at the beginning of the line
160 if (!std::isspace(line[0])) {
161 inContentType = false;
162 inContentLength = false;
163 StringRef field;
164 std::tie(field, line) = line.split(':');
165 field = field.rtrim();
166 if (field.equals_lower("content-type"))
167 inContentType = true;
168 else if (field.equals_lower("content-length"))
169 inContentLength = true;
170 else
171 continue; // ignore other fields
172 }
173
174 // collapse whitespace
175 line = line.ltrim();
176
177 // save field data
178 if (inContentType && contentType)
179 contentType->append(line.begin(), line.end());
180 else if (inContentLength && contentLength)
181 contentLength->append(line.begin(), line.end());
182 }
183}
184
185bool FindMultipartBoundary(raw_istream& is, StringRef boundary,
186 std::string* saveBuf) {
187 SmallString<64> searchBuf;
188 searchBuf.resize(boundary.size() + 2);
189 size_t searchPos = 0;
190
191 // Per the spec, the --boundary should be preceded by \r\n, so do a first
192 // pass of 1-byte reads to throw those away (common case) and keep the
193 // last non-\r\n character in searchBuf.
194 if (!saveBuf) {
195 do {
196 is.read(searchBuf.data(), 1);
197 if (is.has_error()) return false;
198 } while (searchBuf[0] == '\r' || searchBuf[0] == '\n');
199 searchPos = 1;
200 }
201
202 // Look for --boundary. Read boundarysize+2 bytes at a time
203 // during the search to speed up the reads, then fast-scan for -,
204 // and only then match the entire boundary. This will be slow if
205 // there's a bunch of continuous -'s in the output, but that's unlikely.
206 for (;;) {
207 is.read(searchBuf.data() + searchPos, searchBuf.size() - searchPos);
208 if (is.has_error()) return false;
209
210 // Did we find the boundary?
211 if (searchBuf[0] == '-' && searchBuf[1] == '-' &&
212 searchBuf.substr(2) == boundary)
213 return true;
214
215 // Fast-scan for '-'
216 size_t pos = searchBuf.find('-', searchBuf[0] == '-' ? 1 : 0);
217 if (pos == StringRef::npos) {
218 if (saveBuf) saveBuf->append(searchBuf.data(), searchBuf.size());
219 } else {
220 if (saveBuf) saveBuf->append(searchBuf.data(), pos);
221
222 // move '-' and following to start of buffer (next read will fill)
223 std::memmove(searchBuf.data(), searchBuf.data() + pos,
224 searchBuf.size() - pos);
225 searchPos = searchBuf.size() - pos;
226 }
227 }
228}
229
230HttpLocation::HttpLocation(const Twine& url_, bool* error,
231 std::string* errorMsg)
232 : url{url_.str()} {
233 // Split apart into components
234 StringRef query{url};
235
236 // scheme:
237 StringRef scheme;
238 std::tie(scheme, query) = query.split(':');
239 if (!scheme.equals_lower("http")) {
240 *errorMsg = "only supports http URLs";
241 *error = true;
242 return;
243 }
244
245 // "//"
246 if (!query.startswith("//")) {
247 *errorMsg = "expected http://...";
248 *error = true;
249 return;
250 }
251 query = query.drop_front(2);
252
253 // user:password@host:port/
254 StringRef authority;
255 std::tie(authority, query) = query.split('/');
256
257 StringRef userpass, hostport;
258 std::tie(userpass, hostport) = authority.split('@');
259 // split leaves the RHS empty if the split char isn't present...
260 if (hostport.empty()) {
261 hostport = userpass;
262 userpass = StringRef{};
263 }
264
265 if (!userpass.empty()) {
266 StringRef rawUser, rawPassword;
267 std::tie(rawUser, rawPassword) = userpass.split(':');
268 SmallString<64> userBuf, passBuf;
269 user = UnescapeURI(rawUser, userBuf, error);
270 if (*error) {
271 raw_string_ostream oss(*errorMsg);
272 oss << "could not unescape user \"" << rawUser << "\"";
273 oss.flush();
274 return;
275 }
276 password = UnescapeURI(rawPassword, passBuf, error);
277 if (*error) {
278 raw_string_ostream oss(*errorMsg);
279 oss << "could not unescape password \"" << rawPassword << "\"";
280 oss.flush();
281 return;
282 }
283 }
284
285 StringRef portStr;
286 std::tie(host, portStr) = hostport.rsplit(':');
287 if (host.empty()) {
288 *errorMsg = "host is empty";
289 *error = true;
290 return;
291 }
292 if (portStr.empty()) {
293 port = 80;
294 } else if (portStr.getAsInteger(10, port)) {
295 raw_string_ostream oss(*errorMsg);
296 oss << "port \"" << portStr << "\" is not an integer";
297 oss.flush();
298 *error = true;
299 return;
300 }
301
302 // path?query#fragment
303 std::tie(query, fragment) = query.split('#');
304 std::tie(path, query) = query.split('?');
305
306 // Split query string into parameters
307 while (!query.empty()) {
308 // split out next param and value
309 StringRef rawParam, rawValue;
310 std::tie(rawParam, query) = query.split('&');
311 if (rawParam.empty()) continue; // ignore "&&"
312 std::tie(rawParam, rawValue) = rawParam.split('=');
313
314 // unescape param
315 *error = false;
316 SmallString<64> paramBuf;
317 StringRef param = UnescapeURI(rawParam, paramBuf, error);
318 if (*error) {
319 raw_string_ostream oss(*errorMsg);
320 oss << "could not unescape parameter \"" << rawParam << "\"";
321 oss.flush();
322 return;
323 }
324
325 // unescape value
326 SmallString<64> valueBuf;
327 StringRef value = UnescapeURI(rawValue, valueBuf, error);
328 if (*error) {
329 raw_string_ostream oss(*errorMsg);
330 oss << "could not unescape value \"" << rawValue << "\"";
331 oss.flush();
332 return;
333 }
334
335 params.emplace_back(std::make_pair(param, value));
336 }
337
338 *error = false;
339}
340
341void HttpRequest::SetAuth(const HttpLocation& loc) {
342 if (!loc.user.empty()) {
343 SmallString<64> userpass;
344 userpass += loc.user;
345 userpass += ':';
346 userpass += loc.password;
347 Base64Encode(userpass, &auth);
348 }
349}
350
351bool HttpConnection::Handshake(const HttpRequest& request,
352 std::string* warnMsg) {
353 // send GET request
354 os << "GET /" << request.path << " HTTP/1.1\r\n";
355 os << "Host: " << request.host << "\r\n";
356 if (!request.auth.empty())
357 os << "Authorization: Basic " << request.auth << "\r\n";
358 os << "\r\n";
359 os.flush();
360
361 // read first line of response
362 SmallString<64> lineBuf;
363 StringRef line = is.getline(lineBuf, 1024).rtrim();
364 if (is.has_error()) {
365 *warnMsg = "disconnected before response";
366 return false;
367 }
368
369 // see if we got a HTTP 200 response
370 StringRef httpver, code, codeText;
371 std::tie(httpver, line) = line.split(' ');
372 std::tie(code, codeText) = line.split(' ');
373 if (!httpver.startswith("HTTP")) {
374 *warnMsg = "did not receive HTTP response";
375 return false;
376 }
377 if (code != "200") {
378 raw_string_ostream oss(*warnMsg);
379 oss << "received " << code << " " << codeText << " response";
380 oss.flush();
381 return false;
382 }
383
384 // Parse headers
385 if (!ParseHttpHeaders(is, &contentType, &contentLength)) {
386 *warnMsg = "disconnected during headers";
387 return false;
388 }
389
390 return true;
391}
392
393void HttpMultipartScanner::SetBoundary(StringRef boundary) {
394 m_boundaryWith = "\n--";
395 m_boundaryWith += boundary;
396 m_boundaryWithout = "\n";
397 m_boundaryWithout += boundary;
398 m_dashes = kUnknown;
399}
400
401void HttpMultipartScanner::Reset(bool saveSkipped) {
402 m_saveSkipped = saveSkipped;
403 m_state = kBoundary;
404 m_posWith = 0;
405 m_posWithout = 0;
406 m_buf.resize(0);
407}
408
409StringRef HttpMultipartScanner::Execute(StringRef in) {
410 if (m_state == kDone) Reset(m_saveSkipped);
411 if (m_saveSkipped) m_buf += in;
412
413 size_t pos = 0;
414 if (m_state == kBoundary) {
415 for (char ch : in) {
416 ++pos;
417 if (m_dashes != kWithout) {
418 if (ch == m_boundaryWith[m_posWith]) {
419 ++m_posWith;
420 if (m_posWith == m_boundaryWith.size()) {
421 // Found the boundary; transition to padding
422 m_state = kPadding;
423 m_dashes = kWith; // no longer accept plain 'boundary'
424 break;
425 }
426 } else if (ch == m_boundaryWith[0]) {
427 m_posWith = 1;
428 } else {
429 m_posWith = 0;
430 }
431 }
432
433 if (m_dashes != kWith) {
434 if (ch == m_boundaryWithout[m_posWithout]) {
435 ++m_posWithout;
436 if (m_posWithout == m_boundaryWithout.size()) {
437 // Found the boundary; transition to padding
438 m_state = kPadding;
439 m_dashes = kWithout; // no longer accept '--boundary'
440 break;
441 }
442 } else if (ch == m_boundaryWithout[0]) {
443 m_posWithout = 1;
444 } else {
445 m_posWithout = 0;
446 }
447 }
448 }
449 }
450
451 if (m_state == kPadding) {
452 for (char ch : in.drop_front(pos)) {
453 ++pos;
454 if (ch == '\n') {
455 // Found the LF; return remaining input buffer (following it)
456 m_state = kDone;
457 if (m_saveSkipped) m_buf.resize(m_buf.size() - in.size() + pos);
458 return in.drop_front(pos);
459 }
460 }
461 }
462
463 // We consumed the entire input
464 return StringRef{};
465}
466
467} // namespace wpi