blob: 37fea6014d1e5c421de3944d968940923fb56dfb [file] [log] [blame]
Brian Silverman41cdd3e2019-01-19 19:48:58 -08001/*----------------------------------------------------------------------------*/
2/* Copyright (c) 2016-2018 FIRST. All Rights Reserved. */
3/* Open Source Software - may be modified and shared by FRC teams. The code */
4/* must be accompanied by the FIRST BSD license file in the root directory of */
5/* the project. */
6/*----------------------------------------------------------------------------*/
7
8#include "wpi/HttpUtil.h"
9
10#include <cctype>
11
12#include "wpi/Base64.h"
13#include "wpi/STLExtras.h"
14#include "wpi/StringExtras.h"
15#include "wpi/TCPConnector.h"
16#include "wpi/raw_ostream.h"
17
18namespace wpi {
19
20StringRef UnescapeURI(const Twine& str, SmallVectorImpl<char>& buf,
21 bool* error) {
22 SmallString<128> strBuf;
23 StringRef strStr = str.toStringRef(strBuf);
24 buf.clear();
25 for (auto i = strStr.begin(), end = strStr.end(); i != end; ++i) {
26 // pass non-escaped characters to output
27 if (*i != '%') {
28 // decode + to space
29 if (*i == '+')
30 buf.push_back(' ');
31 else
32 buf.push_back(*i);
33 continue;
34 }
35
36 // are there enough characters left?
37 if (i + 2 >= end) {
38 *error = true;
39 return StringRef{};
40 }
41
42 // replace %xx with the corresponding character
43 unsigned val1 = hexDigitValue(*++i);
44 if (val1 == -1U) {
45 *error = true;
46 return StringRef{};
47 }
48 unsigned val2 = hexDigitValue(*++i);
49 if (val2 == -1U) {
50 *error = true;
51 return StringRef{};
52 }
53 buf.push_back((val1 << 4) | val2);
54 }
55
56 *error = false;
57 return StringRef{buf.data(), buf.size()};
58}
59
60StringRef EscapeURI(const Twine& str, SmallVectorImpl<char>& buf,
61 bool spacePlus) {
62 static const char* const hexLut = "0123456789ABCDEF";
63
64 SmallString<128> strBuf;
65 StringRef strStr = str.toStringRef(strBuf);
66 buf.clear();
67 for (auto i = strStr.begin(), end = strStr.end(); i != end; ++i) {
68 // pass unreserved characters to output
69 if (std::isalnum(*i) || *i == '-' || *i == '_' || *i == '.' || *i == '~') {
70 buf.push_back(*i);
71 continue;
72 }
73
74 // encode space to +
75 if (spacePlus && *i == ' ') {
76 buf.push_back('+');
77 continue;
78 }
79
80 // convert others to %xx
81 buf.push_back('%');
82 buf.push_back(hexLut[((*i) >> 4) & 0x0f]);
83 buf.push_back(hexLut[(*i) & 0x0f]);
84 }
85
86 return StringRef{buf.data(), buf.size()};
87}
88
89bool ParseHttpHeaders(raw_istream& is, SmallVectorImpl<char>* contentType,
90 SmallVectorImpl<char>* contentLength) {
91 if (contentType) contentType->clear();
92 if (contentLength) contentLength->clear();
93
94 bool inContentType = false;
95 bool inContentLength = false;
96 SmallString<64> lineBuf;
97 for (;;) {
98 StringRef line = is.getline(lineBuf, 1024).rtrim();
99 if (is.has_error()) return false;
100 if (line.empty()) return true; // empty line signals end of headers
101
102 // header fields start at the beginning of the line
103 if (!std::isspace(line[0])) {
104 inContentType = false;
105 inContentLength = false;
106 StringRef field;
107 std::tie(field, line) = line.split(':');
108 field = field.rtrim();
109 if (field.equals_lower("content-type"))
110 inContentType = true;
111 else if (field.equals_lower("content-length"))
112 inContentLength = true;
113 else
114 continue; // ignore other fields
115 }
116
117 // collapse whitespace
118 line = line.ltrim();
119
120 // save field data
121 if (inContentType && contentType)
122 contentType->append(line.begin(), line.end());
123 else if (inContentLength && contentLength)
124 contentLength->append(line.begin(), line.end());
125 }
126}
127
128bool FindMultipartBoundary(raw_istream& is, StringRef boundary,
129 std::string* saveBuf) {
130 SmallString<64> searchBuf;
131 searchBuf.resize(boundary.size() + 2);
132 size_t searchPos = 0;
133
134 // Per the spec, the --boundary should be preceded by \r\n, so do a first
135 // pass of 1-byte reads to throw those away (common case) and keep the
136 // last non-\r\n character in searchBuf.
137 if (!saveBuf) {
138 do {
139 is.read(searchBuf.data(), 1);
140 if (is.has_error()) return false;
141 } while (searchBuf[0] == '\r' || searchBuf[0] == '\n');
142 searchPos = 1;
143 }
144
145 // Look for --boundary. Read boundarysize+2 bytes at a time
146 // during the search to speed up the reads, then fast-scan for -,
147 // and only then match the entire boundary. This will be slow if
148 // there's a bunch of continuous -'s in the output, but that's unlikely.
149 for (;;) {
150 is.read(searchBuf.data() + searchPos, searchBuf.size() - searchPos);
151 if (is.has_error()) return false;
152
153 // Did we find the boundary?
154 if (searchBuf[0] == '-' && searchBuf[1] == '-' &&
155 searchBuf.substr(2) == boundary)
156 return true;
157
158 // Fast-scan for '-'
159 size_t pos = searchBuf.find('-', searchBuf[0] == '-' ? 1 : 0);
160 if (pos == StringRef::npos) {
161 if (saveBuf) saveBuf->append(searchBuf.data(), searchBuf.size());
162 } else {
163 if (saveBuf) saveBuf->append(searchBuf.data(), pos);
164
165 // move '-' and following to start of buffer (next read will fill)
166 std::memmove(searchBuf.data(), searchBuf.data() + pos,
167 searchBuf.size() - pos);
168 searchPos = searchBuf.size() - pos;
169 }
170 }
171}
172
173HttpLocation::HttpLocation(const Twine& url_, bool* error,
174 std::string* errorMsg)
175 : url{url_.str()} {
176 // Split apart into components
177 StringRef query{url};
178
179 // scheme:
180 StringRef scheme;
181 std::tie(scheme, query) = query.split(':');
182 if (!scheme.equals_lower("http")) {
183 *errorMsg = "only supports http URLs";
184 *error = true;
185 return;
186 }
187
188 // "//"
189 if (!query.startswith("//")) {
190 *errorMsg = "expected http://...";
191 *error = true;
192 return;
193 }
194 query = query.drop_front(2);
195
196 // user:password@host:port/
197 StringRef authority;
198 std::tie(authority, query) = query.split('/');
199
200 StringRef userpass, hostport;
201 std::tie(userpass, hostport) = authority.split('@');
202 // split leaves the RHS empty if the split char isn't present...
203 if (hostport.empty()) {
204 hostport = userpass;
205 userpass = StringRef{};
206 }
207
208 if (!userpass.empty()) {
209 StringRef rawUser, rawPassword;
210 std::tie(rawUser, rawPassword) = userpass.split(':');
211 SmallString<64> userBuf, passBuf;
212 user = UnescapeURI(rawUser, userBuf, error);
213 if (*error) {
214 raw_string_ostream oss(*errorMsg);
215 oss << "could not unescape user \"" << rawUser << "\"";
216 oss.flush();
217 return;
218 }
219 password = UnescapeURI(rawPassword, passBuf, error);
220 if (*error) {
221 raw_string_ostream oss(*errorMsg);
222 oss << "could not unescape password \"" << rawPassword << "\"";
223 oss.flush();
224 return;
225 }
226 }
227
228 StringRef portStr;
229 std::tie(host, portStr) = hostport.rsplit(':');
230 if (host.empty()) {
231 *errorMsg = "host is empty";
232 *error = true;
233 return;
234 }
235 if (portStr.empty()) {
236 port = 80;
237 } else if (portStr.getAsInteger(10, port)) {
238 raw_string_ostream oss(*errorMsg);
239 oss << "port \"" << portStr << "\" is not an integer";
240 oss.flush();
241 *error = true;
242 return;
243 }
244
245 // path?query#fragment
246 std::tie(query, fragment) = query.split('#');
247 std::tie(path, query) = query.split('?');
248
249 // Split query string into parameters
250 while (!query.empty()) {
251 // split out next param and value
252 StringRef rawParam, rawValue;
253 std::tie(rawParam, query) = query.split('&');
254 if (rawParam.empty()) continue; // ignore "&&"
255 std::tie(rawParam, rawValue) = rawParam.split('=');
256
257 // unescape param
258 *error = false;
259 SmallString<64> paramBuf;
260 StringRef param = UnescapeURI(rawParam, paramBuf, error);
261 if (*error) {
262 raw_string_ostream oss(*errorMsg);
263 oss << "could not unescape parameter \"" << rawParam << "\"";
264 oss.flush();
265 return;
266 }
267
268 // unescape value
269 SmallString<64> valueBuf;
270 StringRef value = UnescapeURI(rawValue, valueBuf, error);
271 if (*error) {
272 raw_string_ostream oss(*errorMsg);
273 oss << "could not unescape value \"" << rawValue << "\"";
274 oss.flush();
275 return;
276 }
277
278 params.emplace_back(std::make_pair(param, value));
279 }
280
281 *error = false;
282}
283
284void HttpRequest::SetAuth(const HttpLocation& loc) {
285 if (!loc.user.empty()) {
286 SmallString<64> userpass;
287 userpass += loc.user;
288 userpass += ':';
289 userpass += loc.password;
290 Base64Encode(userpass, &auth);
291 }
292}
293
294bool HttpConnection::Handshake(const HttpRequest& request,
295 std::string* warnMsg) {
296 // send GET request
297 os << "GET /" << request.path << " HTTP/1.1\r\n";
298 os << "Host: " << request.host << "\r\n";
299 if (!request.auth.empty())
300 os << "Authorization: Basic " << request.auth << "\r\n";
301 os << "\r\n";
302 os.flush();
303
304 // read first line of response
305 SmallString<64> lineBuf;
306 StringRef line = is.getline(lineBuf, 1024).rtrim();
307 if (is.has_error()) {
308 *warnMsg = "disconnected before response";
309 return false;
310 }
311
312 // see if we got a HTTP 200 response
313 StringRef httpver, code, codeText;
314 std::tie(httpver, line) = line.split(' ');
315 std::tie(code, codeText) = line.split(' ');
316 if (!httpver.startswith("HTTP")) {
317 *warnMsg = "did not receive HTTP response";
318 return false;
319 }
320 if (code != "200") {
321 raw_string_ostream oss(*warnMsg);
322 oss << "received " << code << " " << codeText << " response";
323 oss.flush();
324 return false;
325 }
326
327 // Parse headers
328 if (!ParseHttpHeaders(is, &contentType, &contentLength)) {
329 *warnMsg = "disconnected during headers";
330 return false;
331 }
332
333 return true;
334}
335
336void HttpMultipartScanner::SetBoundary(StringRef boundary) {
337 m_boundaryWith = "\n--";
338 m_boundaryWith += boundary;
339 m_boundaryWithout = "\n";
340 m_boundaryWithout += boundary;
341 m_dashes = kUnknown;
342}
343
344void HttpMultipartScanner::Reset(bool saveSkipped) {
345 m_saveSkipped = saveSkipped;
346 m_state = kBoundary;
347 m_posWith = 0;
348 m_posWithout = 0;
349 m_buf.resize(0);
350}
351
352StringRef HttpMultipartScanner::Execute(StringRef in) {
353 if (m_state == kDone) Reset(m_saveSkipped);
354 if (m_saveSkipped) m_buf += in;
355
356 size_t pos = 0;
357 if (m_state == kBoundary) {
358 for (char ch : in) {
359 ++pos;
360 if (m_dashes != kWithout) {
361 if (ch == m_boundaryWith[m_posWith]) {
362 ++m_posWith;
363 if (m_posWith == m_boundaryWith.size()) {
364 // Found the boundary; transition to padding
365 m_state = kPadding;
366 m_dashes = kWith; // no longer accept plain 'boundary'
367 break;
368 }
369 } else if (ch == m_boundaryWith[0]) {
370 m_posWith = 1;
371 } else {
372 m_posWith = 0;
373 }
374 }
375
376 if (m_dashes != kWith) {
377 if (ch == m_boundaryWithout[m_posWithout]) {
378 ++m_posWithout;
379 if (m_posWithout == m_boundaryWithout.size()) {
380 // Found the boundary; transition to padding
381 m_state = kPadding;
382 m_dashes = kWithout; // no longer accept '--boundary'
383 break;
384 }
385 } else if (ch == m_boundaryWithout[0]) {
386 m_posWithout = 1;
387 } else {
388 m_posWithout = 0;
389 }
390 }
391 }
392 }
393
394 if (m_state == kPadding) {
395 for (char ch : in.drop_front(pos)) {
396 ++pos;
397 if (ch == '\n') {
398 // Found the LF; return remaining input buffer (following it)
399 m_state = kDone;
400 if (m_saveSkipped) m_buf.resize(m_buf.size() - in.size() + pos);
401 return in.drop_front(pos);
402 }
403 }
404 }
405
406 // We consumed the entire input
407 return StringRef{};
408}
409
410} // namespace wpi