Blame - wpiutil/src/main/native/cpp/HttpUtil.cpp - RealtimeRoboticsGroup/test

blob: 866ee1009151b45b5213629a0a7bba8fbb4d9e19 [file] [log] [blame]

Brian Silverman	8fce748	2020-01-05 13:18:21 -0800	[diff] [blame]	1	/----------------------------------------------------------------------------/
Austin Schuh	1e69f94	2020-11-14 15:06:14 -0800	[diff] [blame^]	2	/* Copyright (c) 2016-2020 FIRST. All Rights Reserved. */
Brian Silverman	8fce748	2020-01-05 13:18:21 -0800	[diff] [blame]	3	/* Open Source Software - may be modified and shared by FRC teams. The code */
				4	/* must be accompanied by the FIRST BSD license file in the root directory of */
				5	/* the project. */
				6	/----------------------------------------------------------------------------/
				7
				8	#include "wpi/HttpUtil.h"
				9
				10	#include <cctype>
				11
				12	#include "wpi/Base64.h"
				13	#include "wpi/STLExtras.h"
				14	#include "wpi/StringExtras.h"
				15	#include "wpi/TCPConnector.h"
				16	#include "wpi/raw_ostream.h"
				17
				18	namespace wpi {
				19
				20	StringRef UnescapeURI(const Twine& str, SmallVectorImpl<char>& buf,
				21	bool* error) {
				22	SmallString<128> strBuf;
				23	StringRef strStr = str.toStringRef(strBuf);
				24	buf.clear();
				25	for (auto i = strStr.begin(), end = strStr.end(); i != end; ++i) {
				26	// pass non-escaped characters to output
				27	if (*i != '%') {
				28	// decode + to space
				29	if (*i == '+')
				30	buf.push_back(' ');
				31	else
				32	buf.push_back(*i);
				33	continue;
				34	}
				35
				36	// are there enough characters left?
				37	if (i + 2 >= end) {
				38	*error = true;
				39	return StringRef{};
				40	}
				41
				42	// replace %xx with the corresponding character
				43	unsigned val1 = hexDigitValue(*++i);
				44	if (val1 == -1U) {
				45	*error = true;
				46	return StringRef{};
				47	}
				48	unsigned val2 = hexDigitValue(*++i);
				49	if (val2 == -1U) {
				50	*error = true;
				51	return StringRef{};
				52	}
				53	buf.push_back((val1 << 4) \| val2);
				54	}
				55
				56	*error = false;
				57	return StringRef{buf.data(), buf.size()};
				58	}
				59
				60	StringRef EscapeURI(const Twine& str, SmallVectorImpl<char>& buf,
				61	bool spacePlus) {
				62	static const char* const hexLut = "0123456789ABCDEF";
				63
				64	SmallString<128> strBuf;
				65	StringRef strStr = str.toStringRef(strBuf);
				66	buf.clear();
				67	for (auto i = strStr.begin(), end = strStr.end(); i != end; ++i) {
				68	// pass unreserved characters to output
				69	if (std::isalnum(i) \|\| i == '-' \|\| i == '_' \|\| i == '.' \|\| *i == '~') {
				70	buf.push_back(*i);
				71	continue;
				72	}
				73
				74	// encode space to +
				75	if (spacePlus && *i == ' ') {
				76	buf.push_back('+');
				77	continue;
				78	}
				79
				80	// convert others to %xx
				81	buf.push_back('%');
				82	buf.push_back(hexLut[((*i) >> 4) & 0x0f]);
				83	buf.push_back(hexLut[(*i) & 0x0f]);
				84	}
				85
				86	return StringRef{buf.data(), buf.size()};
				87	}
				88
Austin Schuh	1e69f94	2020-11-14 15:06:14 -0800	[diff] [blame^]	89	HttpQueryMap::HttpQueryMap(wpi::StringRef query) {
				90	wpi::SmallVector<wpi::StringRef, 16> queryElems;
				91	query.split(queryElems, '&', 100, false);
				92	for (auto elem : queryElems) {
				93	auto [nameEsc, valueEsc] = elem.split('=');
				94	wpi::SmallString<64> nameBuf;
				95	bool err = false;
				96	auto name = wpi::UnescapeURI(nameEsc, nameBuf, &err);
				97	// note: ignores duplicates
				98	if (!err) m_elems.try_emplace(name, valueEsc);
				99	}
				100	}
				101
				102	std::optional<wpi::StringRef> HttpQueryMap::Get(
				103	wpi::StringRef name, wpi::SmallVectorImpl<char>& buf) const {
				104	auto it = m_elems.find(name);
				105	if (it == m_elems.end()) return {};
				106	bool err = false;
				107	auto val = wpi::UnescapeURI(it->second, buf, &err);
				108	if (err) return {};
				109	return val;
				110	}
				111
				112	HttpPath::HttpPath(wpi::StringRef path) {
				113	// special-case root path to be a single empty element
				114	if (path == "/") {
				115	m_pathEnds.emplace_back(0);
				116	return;
				117	}
				118	wpi::SmallVector<wpi::StringRef, 16> pathElems;
				119	path.split(pathElems, '/', 100, false);
				120	for (auto elem : pathElems) {
				121	wpi::SmallString<64> buf;
				122	bool err = false;
				123	auto val = wpi::UnescapeURI(elem, buf, &err);
				124	if (err) {
				125	m_pathEnds.clear();
				126	return;
				127	}
				128	m_pathBuf += val;
				129	m_pathEnds.emplace_back(m_pathBuf.size());
				130	}
				131	}
				132
				133	bool HttpPath::startswith(size_t start, ArrayRef<StringRef> match) const {
				134	if (m_pathEnds.size() < (start + match.size())) return false;
				135	bool first = start == 0;
				136	auto p = m_pathEnds.begin() + start;
				137	for (auto m : match) {
				138	auto val = m_pathBuf.slice(first ? 0 : (p - 1), p);
				139	if (val != m) return false;
				140	first = false;
				141	++p;
				142	}
				143	return true;
				144	}
				145
Brian Silverman	8fce748	2020-01-05 13:18:21 -0800	[diff] [blame]	146	bool ParseHttpHeaders(raw_istream& is, SmallVectorImpl<char>* contentType,
				147	SmallVectorImpl<char>* contentLength) {
				148	if (contentType) contentType->clear();
				149	if (contentLength) contentLength->clear();
				150
				151	bool inContentType = false;
				152	bool inContentLength = false;
				153	SmallString<64> lineBuf;
				154	for (;;) {
				155	StringRef line = is.getline(lineBuf, 1024).rtrim();
				156	if (is.has_error()) return false;
				157	if (line.empty()) return true; // empty line signals end of headers
				158
				159	// header fields start at the beginning of the line
				160	if (!std::isspace(line[0])) {
				161	inContentType = false;
				162	inContentLength = false;
				163	StringRef field;
				164	std::tie(field, line) = line.split(':');
				165	field = field.rtrim();
				166	if (field.equals_lower("content-type"))
				167	inContentType = true;
				168	else if (field.equals_lower("content-length"))
				169	inContentLength = true;
				170	else
				171	continue; // ignore other fields
				172	}
				173
				174	// collapse whitespace
				175	line = line.ltrim();
				176
				177	// save field data
				178	if (inContentType && contentType)
				179	contentType->append(line.begin(), line.end());
				180	else if (inContentLength && contentLength)
				181	contentLength->append(line.begin(), line.end());
				182	}
				183	}
				184
				185	bool FindMultipartBoundary(raw_istream& is, StringRef boundary,
				186	std::string* saveBuf) {
				187	SmallString<64> searchBuf;
				188	searchBuf.resize(boundary.size() + 2);
				189	size_t searchPos = 0;
				190
				191	// Per the spec, the --boundary should be preceded by \r\n, so do a first
				192	// pass of 1-byte reads to throw those away (common case) and keep the
				193	// last non-\r\n character in searchBuf.
				194	if (!saveBuf) {
				195	do {
				196	is.read(searchBuf.data(), 1);
				197	if (is.has_error()) return false;
				198	} while (searchBuf[0] == '\r' \|\| searchBuf[0] == '\n');
				199	searchPos = 1;
				200	}
				201
				202	// Look for --boundary. Read boundarysize+2 bytes at a time
				203	// during the search to speed up the reads, then fast-scan for -,
				204	// and only then match the entire boundary. This will be slow if
				205	// there's a bunch of continuous -'s in the output, but that's unlikely.
				206	for (;;) {
				207	is.read(searchBuf.data() + searchPos, searchBuf.size() - searchPos);
				208	if (is.has_error()) return false;
				209
				210	// Did we find the boundary?
				211	if (searchBuf[0] == '-' && searchBuf[1] == '-' &&
				212	searchBuf.substr(2) == boundary)
				213	return true;
				214
				215	// Fast-scan for '-'
				216	size_t pos = searchBuf.find('-', searchBuf[0] == '-' ? 1 : 0);
				217	if (pos == StringRef::npos) {
				218	if (saveBuf) saveBuf->append(searchBuf.data(), searchBuf.size());
				219	} else {
				220	if (saveBuf) saveBuf->append(searchBuf.data(), pos);
				221
				222	// move '-' and following to start of buffer (next read will fill)
				223	std::memmove(searchBuf.data(), searchBuf.data() + pos,
				224	searchBuf.size() - pos);
				225	searchPos = searchBuf.size() - pos;
				226	}
				227	}
				228	}
				229
				230	HttpLocation::HttpLocation(const Twine& url_, bool* error,
				231	std::string* errorMsg)
				232	: url{url_.str()} {
				233	// Split apart into components
				234	StringRef query{url};
				235
				236	// scheme:
				237	StringRef scheme;
				238	std::tie(scheme, query) = query.split(':');
				239	if (!scheme.equals_lower("http")) {
				240	*errorMsg = "only supports http URLs";
				241	*error = true;
				242	return;
				243	}
				244
				245	// "//"
				246	if (!query.startswith("//")) {
				247	*errorMsg = "expected http://...";
				248	*error = true;
				249	return;
				250	}
				251	query = query.drop_front(2);
				252
				253	// user:password@host:port/
				254	StringRef authority;
				255	std::tie(authority, query) = query.split('/');
				256
				257	StringRef userpass, hostport;
				258	std::tie(userpass, hostport) = authority.split('@');
				259	// split leaves the RHS empty if the split char isn't present...
				260	if (hostport.empty()) {
				261	hostport = userpass;
				262	userpass = StringRef{};
				263	}
				264
				265	if (!userpass.empty()) {
				266	StringRef rawUser, rawPassword;
				267	std::tie(rawUser, rawPassword) = userpass.split(':');
				268	SmallString<64> userBuf, passBuf;
				269	user = UnescapeURI(rawUser, userBuf, error);
				270	if (*error) {
				271	raw_string_ostream oss(*errorMsg);
				272	oss << "could not unescape user \"" << rawUser << "\"";
				273	oss.flush();
				274	return;
				275	}
				276	password = UnescapeURI(rawPassword, passBuf, error);
				277	if (*error) {
				278	raw_string_ostream oss(*errorMsg);
				279	oss << "could not unescape password \"" << rawPassword << "\"";
				280	oss.flush();
				281	return;
				282	}
				283	}
				284
				285	StringRef portStr;
				286	std::tie(host, portStr) = hostport.rsplit(':');
				287	if (host.empty()) {
				288	*errorMsg = "host is empty";
				289	*error = true;
				290	return;
				291	}
				292	if (portStr.empty()) {
				293	port = 80;
				294	} else if (portStr.getAsInteger(10, port)) {
				295	raw_string_ostream oss(*errorMsg);
				296	oss << "port \"" << portStr << "\" is not an integer";
				297	oss.flush();
				298	*error = true;
				299	return;
				300	}
				301
				302	// path?query#fragment
				303	std::tie(query, fragment) = query.split('#');
				304	std::tie(path, query) = query.split('?');
				305
				306	// Split query string into parameters
				307	while (!query.empty()) {
				308	// split out next param and value
				309	StringRef rawParam, rawValue;
				310	std::tie(rawParam, query) = query.split('&');
				311	if (rawParam.empty()) continue; // ignore "&&"
				312	std::tie(rawParam, rawValue) = rawParam.split('=');
				313
				314	// unescape param
				315	*error = false;
				316	SmallString<64> paramBuf;
				317	StringRef param = UnescapeURI(rawParam, paramBuf, error);
				318	if (*error) {
				319	raw_string_ostream oss(*errorMsg);
				320	oss << "could not unescape parameter \"" << rawParam << "\"";
				321	oss.flush();
				322	return;
				323	}
				324
				325	// unescape value
				326	SmallString<64> valueBuf;
				327	StringRef value = UnescapeURI(rawValue, valueBuf, error);
				328	if (*error) {
				329	raw_string_ostream oss(*errorMsg);
				330	oss << "could not unescape value \"" << rawValue << "\"";
				331	oss.flush();
				332	return;
				333	}
				334
				335	params.emplace_back(std::make_pair(param, value));
				336	}
				337
				338	*error = false;
				339	}
				340
				341	void HttpRequest::SetAuth(const HttpLocation& loc) {
				342	if (!loc.user.empty()) {
				343	SmallString<64> userpass;
				344	userpass += loc.user;
				345	userpass += ':';
				346	userpass += loc.password;
				347	Base64Encode(userpass, &auth);
				348	}
				349	}
				350
				351	bool HttpConnection::Handshake(const HttpRequest& request,
				352	std::string* warnMsg) {
				353	// send GET request
				354	os << "GET /" << request.path << " HTTP/1.1\r\n";
				355	os << "Host: " << request.host << "\r\n";
				356	if (!request.auth.empty())
				357	os << "Authorization: Basic " << request.auth << "\r\n";
				358	os << "\r\n";
				359	os.flush();
				360
				361	// read first line of response
				362	SmallString<64> lineBuf;
				363	StringRef line = is.getline(lineBuf, 1024).rtrim();
				364	if (is.has_error()) {
				365	*warnMsg = "disconnected before response";
				366	return false;
				367	}
				368
				369	// see if we got a HTTP 200 response
				370	StringRef httpver, code, codeText;
				371	std::tie(httpver, line) = line.split(' ');
				372	std::tie(code, codeText) = line.split(' ');
				373	if (!httpver.startswith("HTTP")) {
				374	*warnMsg = "did not receive HTTP response";
				375	return false;
				376	}
				377	if (code != "200") {
				378	raw_string_ostream oss(*warnMsg);
				379	oss << "received " << code << " " << codeText << " response";
				380	oss.flush();
				381	return false;
				382	}
				383
				384	// Parse headers
				385	if (!ParseHttpHeaders(is, &contentType, &contentLength)) {
				386	*warnMsg = "disconnected during headers";
				387	return false;
				388	}
				389
				390	return true;
				391	}
				392
				393	void HttpMultipartScanner::SetBoundary(StringRef boundary) {
				394	m_boundaryWith = "\n--";
				395	m_boundaryWith += boundary;
				396	m_boundaryWithout = "\n";
				397	m_boundaryWithout += boundary;
				398	m_dashes = kUnknown;
				399	}
				400
				401	void HttpMultipartScanner::Reset(bool saveSkipped) {
				402	m_saveSkipped = saveSkipped;
				403	m_state = kBoundary;
				404	m_posWith = 0;
				405	m_posWithout = 0;
				406	m_buf.resize(0);
				407	}
				408
				409	StringRef HttpMultipartScanner::Execute(StringRef in) {
				410	if (m_state == kDone) Reset(m_saveSkipped);
				411	if (m_saveSkipped) m_buf += in;
				412
				413	size_t pos = 0;
				414	if (m_state == kBoundary) {
				415	for (char ch : in) {
				416	++pos;
				417	if (m_dashes != kWithout) {
				418	if (ch == m_boundaryWith[m_posWith]) {
				419	++m_posWith;
				420	if (m_posWith == m_boundaryWith.size()) {
				421	// Found the boundary; transition to padding
				422	m_state = kPadding;
				423	m_dashes = kWith; // no longer accept plain 'boundary'
				424	break;
				425	}
				426	} else if (ch == m_boundaryWith[0]) {
				427	m_posWith = 1;
				428	} else {
				429	m_posWith = 0;
				430	}
				431	}
				432
				433	if (m_dashes != kWith) {
				434	if (ch == m_boundaryWithout[m_posWithout]) {
				435	++m_posWithout;
				436	if (m_posWithout == m_boundaryWithout.size()) {
				437	// Found the boundary; transition to padding
				438	m_state = kPadding;
				439	m_dashes = kWithout; // no longer accept '--boundary'
				440	break;
				441	}
				442	} else if (ch == m_boundaryWithout[0]) {
				443	m_posWithout = 1;
				444	} else {
				445	m_posWithout = 0;
				446	}
				447	}
				448	}
				449	}
				450
				451	if (m_state == kPadding) {
				452	for (char ch : in.drop_front(pos)) {
				453	++pos;
				454	if (ch == '\n') {
				455	// Found the LF; return remaining input buffer (following it)
				456	m_state = kDone;
				457	if (m_saveSkipped) m_buf.resize(m_buf.size() - in.size() + pos);
				458	return in.drop_front(pos);
				459	}
				460	}
				461	}
				462
				463	// We consumed the entire input
				464	return StringRef{};
				465	}
				466
				467	} // namespace wpi