blob: afce3a6e3421769a59f0b49879f9789c14470fa5 [file] [log] [blame]
// Copyright (c) FIRST and other WPILib contributors.
// Open Source Software; you can modify and/or share it under the terms of
// the WPILib BSD license file in the root directory of this project.
#include "wpi/HttpUtil.h"
#include <cctype>
#include "fmt/format.h"
#include "wpi/Base64.h"
#include "wpi/StringExtras.h"
#include "wpi/TCPConnector.h"
#include "wpi/raw_ostream.h"
namespace wpi {
std::string_view UnescapeURI(std::string_view str, SmallVectorImpl<char>& buf,
bool* error) {
buf.clear();
for (auto i = str.begin(), end = str.end(); i != end; ++i) {
// pass non-escaped characters to output
if (*i != '%') {
// decode + to space
if (*i == '+') {
buf.push_back(' ');
} else {
buf.push_back(*i);
}
continue;
}
// are there enough characters left?
if (i + 2 >= end) {
*error = true;
return {};
}
// replace %xx with the corresponding character
unsigned val1 = hexDigitValue(*++i);
if (val1 == -1U) {
*error = true;
return {};
}
unsigned val2 = hexDigitValue(*++i);
if (val2 == -1U) {
*error = true;
return {};
}
buf.push_back((val1 << 4) | val2);
}
*error = false;
return {buf.data(), buf.size()};
}
std::string_view EscapeURI(std::string_view str, SmallVectorImpl<char>& buf,
bool spacePlus) {
static const char* const hexLut = "0123456789ABCDEF";
buf.clear();
for (auto i = str.begin(), end = str.end(); i != end; ++i) {
// pass unreserved characters to output
if (std::isalnum(*i) || *i == '-' || *i == '_' || *i == '.' || *i == '~') {
buf.push_back(*i);
continue;
}
// encode space to +
if (spacePlus && *i == ' ') {
buf.push_back('+');
continue;
}
// convert others to %xx
buf.push_back('%');
buf.push_back(hexLut[((*i) >> 4) & 0x0f]);
buf.push_back(hexLut[(*i) & 0x0f]);
}
return {buf.data(), buf.size()};
}
HttpQueryMap::HttpQueryMap(std::string_view query) {
SmallVector<std::string_view, 16> queryElems;
split(query, queryElems, '&', 100, false);
for (auto elem : queryElems) {
auto [nameEsc, valueEsc] = split(elem, '=');
SmallString<64> nameBuf;
bool err = false;
auto name = wpi::UnescapeURI(nameEsc, nameBuf, &err);
// note: ignores duplicates
if (!err) {
m_elems.try_emplace(name, valueEsc);
}
}
}
std::optional<std::string_view> HttpQueryMap::Get(
std::string_view name, wpi::SmallVectorImpl<char>& buf) const {
auto it = m_elems.find(name);
if (it == m_elems.end()) {
return {};
}
bool err = false;
auto val = wpi::UnescapeURI(it->second, buf, &err);
if (err) {
return {};
}
return val;
}
HttpPath::HttpPath(std::string_view path) {
// special-case root path to be a single empty element
if (path == "/") {
m_pathEnds.emplace_back(0);
return;
}
wpi::SmallVector<std::string_view, 16> pathElems;
split(path, pathElems, '/', 100, false);
for (auto elem : pathElems) {
SmallString<64> buf;
bool err = false;
auto val = wpi::UnescapeURI(elem, buf, &err);
if (err) {
m_pathEnds.clear();
return;
}
m_pathBuf += val;
m_pathEnds.emplace_back(m_pathBuf.size());
}
}
bool HttpPath::startswith(size_t start,
span<const std::string_view> match) const {
if (m_pathEnds.size() < (start + match.size())) {
return false;
}
bool first = start == 0;
auto p = m_pathEnds.begin() + start;
for (auto m : match) {
auto val = slice(m_pathBuf, first ? 0 : *(p - 1), *p);
if (val != m) {
return false;
}
first = false;
++p;
}
return true;
}
std::string_view HttpPath::operator[](size_t n) const {
return slice(m_pathBuf, n == 0 ? 0 : m_pathEnds[n - 1], m_pathEnds[n]);
}
bool ParseHttpHeaders(raw_istream& is, SmallVectorImpl<char>* contentType,
SmallVectorImpl<char>* contentLength) {
if (contentType) {
contentType->clear();
}
if (contentLength) {
contentLength->clear();
}
bool inContentType = false;
bool inContentLength = false;
SmallString<64> lineBuf;
for (;;) {
std::string_view line = rtrim(is.getline(lineBuf, 1024));
if (is.has_error()) {
return false;
}
if (line.empty()) {
return true; // empty line signals end of headers
}
// header fields start at the beginning of the line
if (!std::isspace(line[0])) {
inContentType = false;
inContentLength = false;
std::string_view field;
std::tie(field, line) = split(line, ':');
field = rtrim(field);
if (equals_lower(field, "content-type")) {
inContentType = true;
} else if (equals_lower(field, "content-length")) {
inContentLength = true;
} else {
continue; // ignore other fields
}
}
// collapse whitespace
line = ltrim(line);
// save field data
if (inContentType && contentType) {
contentType->append(line.begin(), line.end());
} else if (inContentLength && contentLength) {
contentLength->append(line.begin(), line.end());
}
}
}
bool FindMultipartBoundary(raw_istream& is, std::string_view boundary,
std::string* saveBuf) {
SmallString<64> searchBuf;
searchBuf.resize(boundary.size() + 2);
size_t searchPos = 0;
// Per the spec, the --boundary should be preceded by \r\n, so do a first
// pass of 1-byte reads to throw those away (common case) and keep the
// last non-\r\n character in searchBuf.
if (!saveBuf) {
do {
is.read(searchBuf.data(), 1);
if (is.has_error()) {
return false;
}
} while (searchBuf[0] == '\r' || searchBuf[0] == '\n');
searchPos = 1;
}
// Look for --boundary. Read boundarysize+2 bytes at a time
// during the search to speed up the reads, then fast-scan for -,
// and only then match the entire boundary. This will be slow if
// there's a bunch of continuous -'s in the output, but that's unlikely.
for (;;) {
is.read(searchBuf.data() + searchPos, searchBuf.size() - searchPos);
if (is.has_error()) {
return false;
}
// Did we find the boundary?
if (searchBuf[0] == '-' && searchBuf[1] == '-' &&
searchBuf.substr(2) == boundary) {
return true;
}
// Fast-scan for '-'
size_t pos = searchBuf.find('-', searchBuf[0] == '-' ? 1 : 0);
if (pos == std::string_view::npos) {
if (saveBuf) {
saveBuf->append(searchBuf.data(), searchBuf.size());
}
} else {
if (saveBuf) {
saveBuf->append(searchBuf.data(), pos);
}
// move '-' and following to start of buffer (next read will fill)
std::memmove(searchBuf.data(), searchBuf.data() + pos,
searchBuf.size() - pos);
searchPos = searchBuf.size() - pos;
}
}
}
HttpLocation::HttpLocation(std::string_view url_, bool* error,
std::string* errorMsg)
: url{url_} {
// Split apart into components
std::string_view query{url};
// scheme:
std::string_view scheme;
std::tie(scheme, query) = split(query, ':');
if (!equals_lower(scheme, "http")) {
*errorMsg = "only supports http URLs";
*error = true;
return;
}
// "//"
if (!starts_with(query, "//")) {
*errorMsg = "expected http://...";
*error = true;
return;
}
query.remove_prefix(2);
// user:password@host:port/
std::string_view authority;
std::tie(authority, query) = split(query, '/');
auto [userpass, hostport] = split(authority, '@');
// split leaves the RHS empty if the split char isn't present...
if (hostport.empty()) {
hostport = userpass;
userpass = {};
}
if (!userpass.empty()) {
auto [rawUser, rawPassword] = split(userpass, ':');
SmallString<64> userBuf, passBuf;
user = UnescapeURI(rawUser, userBuf, error);
if (*error) {
*errorMsg = fmt::format("could not unescape user \"{}\"", rawUser);
return;
}
password = UnescapeURI(rawPassword, passBuf, error);
if (*error) {
*errorMsg =
fmt::format("could not unescape password \"{}\"", rawPassword);
return;
}
}
std::string_view portStr;
std::tie(host, portStr) = rsplit(hostport, ':');
if (host.empty()) {
*errorMsg = "host is empty";
*error = true;
return;
}
if (portStr.empty()) {
port = 80;
} else if (auto p = parse_integer<int>(portStr, 10)) {
port = p.value();
} else {
*errorMsg = fmt::format("port \"{}\" is not an integer", portStr);
*error = true;
return;
}
// path?query#fragment
std::tie(query, fragment) = split(query, '#');
std::tie(path, query) = split(query, '?');
// Split query string into parameters
while (!query.empty()) {
// split out next param and value
std::string_view rawParam, rawValue;
std::tie(rawParam, query) = split(query, '&');
if (rawParam.empty()) {
continue; // ignore "&&"
}
std::tie(rawParam, rawValue) = split(rawParam, '=');
// unescape param
*error = false;
SmallString<64> paramBuf;
std::string_view param = UnescapeURI(rawParam, paramBuf, error);
if (*error) {
*errorMsg = fmt::format("could not unescape parameter \"{}\"", rawParam);
return;
}
// unescape value
SmallString<64> valueBuf;
std::string_view value = UnescapeURI(rawValue, valueBuf, error);
if (*error) {
*errorMsg = fmt::format("could not unescape value \"{}\"", rawValue);
return;
}
params.emplace_back(std::make_pair(param, value));
}
*error = false;
}
void HttpRequest::SetAuth(const HttpLocation& loc) {
if (!loc.user.empty()) {
SmallString<64> userpass;
userpass += loc.user;
userpass += ':';
userpass += loc.password;
Base64Encode(userpass.str(), &auth);
}
}
bool HttpConnection::Handshake(const HttpRequest& request,
std::string* warnMsg) {
// send GET request
os << "GET /" << request.path << " HTTP/1.1\r\n";
os << "Host: " << request.host << "\r\n";
if (!request.auth.empty()) {
os << "Authorization: Basic " << request.auth << "\r\n";
}
os << "\r\n";
os.flush();
// read first line of response
SmallString<64> lineBuf;
std::string_view line = rtrim(is.getline(lineBuf, 1024));
if (is.has_error()) {
*warnMsg = "disconnected before response";
return false;
}
// see if we got a HTTP 200 response
std::string_view httpver, code, codeText;
std::tie(httpver, line) = split(line, ' ');
std::tie(code, codeText) = split(line, ' ');
if (!starts_with(httpver, "HTTP")) {
*warnMsg = "did not receive HTTP response";
return false;
}
if (code != "200") {
*warnMsg = fmt::format("received {} {} response", code, codeText);
return false;
}
// Parse headers
if (!ParseHttpHeaders(is, &contentType, &contentLength)) {
*warnMsg = "disconnected during headers";
return false;
}
return true;
}
void HttpMultipartScanner::SetBoundary(std::string_view boundary) {
m_boundaryWith = "\n--";
m_boundaryWith += boundary;
m_boundaryWithout = "\n";
m_boundaryWithout += boundary;
m_dashes = kUnknown;
}
void HttpMultipartScanner::Reset(bool saveSkipped) {
m_saveSkipped = saveSkipped;
m_state = kBoundary;
m_posWith = 0;
m_posWithout = 0;
m_buf.resize(0);
}
std::string_view HttpMultipartScanner::Execute(std::string_view in) {
if (m_state == kDone) {
Reset(m_saveSkipped);
}
if (m_saveSkipped) {
m_buf += in;
}
size_t pos = 0;
if (m_state == kBoundary) {
for (char ch : in) {
++pos;
if (m_dashes != kWithout) {
if (ch == m_boundaryWith[m_posWith]) {
++m_posWith;
if (m_posWith == m_boundaryWith.size()) {
// Found the boundary; transition to padding
m_state = kPadding;
m_dashes = kWith; // no longer accept plain 'boundary'
break;
}
} else if (ch == m_boundaryWith[0]) {
m_posWith = 1;
} else {
m_posWith = 0;
}
}
if (m_dashes != kWith) {
if (ch == m_boundaryWithout[m_posWithout]) {
++m_posWithout;
if (m_posWithout == m_boundaryWithout.size()) {
// Found the boundary; transition to padding
m_state = kPadding;
m_dashes = kWithout; // no longer accept '--boundary'
break;
}
} else if (ch == m_boundaryWithout[0]) {
m_posWithout = 1;
} else {
m_posWithout = 0;
}
}
}
}
if (m_state == kPadding) {
for (char ch : drop_front(in, pos)) {
++pos;
if (ch == '\n') {
// Found the LF; return remaining input buffer (following it)
m_state = kDone;
if (m_saveSkipped) {
m_buf.resize(m_buf.size() - in.size() + pos);
}
return drop_front(in, pos);
}
}
}
// We consumed the entire input
return {};
}
} // namespace wpi