Blame - wpinet/src/main/native/cpp/HttpUtil.cpp - RealtimeRoboticsGroup/test

blob: 12ff0841bfe7f486411456eb7ac1333c539f3564 [file] [log] [blame]

James Kuszmaul	cf32412	2023-01-14 14:07:17 -0800	[diff] [blame]	1	// Copyright (c) FIRST and other WPILib contributors.
				2	// Open Source Software; you can modify and/or share it under the terms of
				3	// the WPILib BSD license file in the root directory of this project.
				4
				5	#include "wpinet/HttpUtil.h"
				6
				7	#include <cctype>
				8
				9	#include <fmt/format.h>
				10	#include <wpi/Base64.h>
				11	#include <wpi/StringExtras.h>
				12	#include <wpi/raw_ostream.h>
				13
				14	#include "wpinet/TCPConnector.h"
				15
				16	namespace wpi {
				17
				18	std::string_view UnescapeURI(std::string_view str, SmallVectorImpl<char>& buf,
				19	bool* error) {
				20	buf.clear();
				21	for (auto i = str.begin(), end = str.end(); i != end; ++i) {
				22	// pass non-escaped characters to output
				23	if (*i != '%') {
				24	// decode + to space
				25	if (*i == '+') {
				26	buf.push_back(' ');
				27	} else {
				28	buf.push_back(*i);
				29	}
				30	continue;
				31	}
				32
				33	// are there enough characters left?
				34	if (i + 2 >= end) {
				35	*error = true;
				36	return {};
				37	}
				38
				39	// replace %xx with the corresponding character
				40	unsigned val1 = hexDigitValue(*++i);
				41	if (val1 == -1U) {
				42	*error = true;
				43	return {};
				44	}
				45	unsigned val2 = hexDigitValue(*++i);
				46	if (val2 == -1U) {
				47	*error = true;
				48	return {};
				49	}
				50	buf.push_back((val1 << 4) \| val2);
				51	}
				52
				53	*error = false;
				54	return {buf.data(), buf.size()};
				55	}
				56
				57	std::string_view EscapeURI(std::string_view str, SmallVectorImpl<char>& buf,
				58	bool spacePlus) {
				59	static const char* const hexLut = "0123456789ABCDEF";
				60
				61	buf.clear();
				62	for (auto i = str.begin(), end = str.end(); i != end; ++i) {
				63	// pass unreserved characters to output
				64	if (std::isalnum(i) \|\| i == '-' \|\| i == '_' \|\| i == '.' \|\| *i == '~') {
				65	buf.push_back(*i);
				66	continue;
				67	}
				68
				69	// encode space to +
				70	if (spacePlus && *i == ' ') {
				71	buf.push_back('+');
				72	continue;
				73	}
				74
				75	// convert others to %xx
				76	buf.push_back('%');
				77	buf.push_back(hexLut[((*i) >> 4) & 0x0f]);
				78	buf.push_back(hexLut[(*i) & 0x0f]);
				79	}
				80
				81	return {buf.data(), buf.size()};
				82	}
				83
				84	HttpQueryMap::HttpQueryMap(std::string_view query) {
				85	SmallVector<std::string_view, 16> queryElems;
				86	split(query, queryElems, '&', 100, false);
				87	for (auto elem : queryElems) {
				88	auto [nameEsc, valueEsc] = split(elem, '=');
				89	SmallString<64> nameBuf;
				90	bool err = false;
				91	auto name = wpi::UnescapeURI(nameEsc, nameBuf, &err);
				92	// note: ignores duplicates
				93	if (!err) {
				94	m_elems.try_emplace(name, valueEsc);
				95	}
				96	}
				97	}
				98
				99	std::optional<std::string_view> HttpQueryMap::Get(
				100	std::string_view name, wpi::SmallVectorImpl<char>& buf) const {
				101	auto it = m_elems.find(name);
				102	if (it == m_elems.end()) {
				103	return {};
				104	}
				105	bool err = false;
				106	auto val = wpi::UnescapeURI(it->second, buf, &err);
				107	if (err) {
				108	return {};
				109	}
				110	return val;
				111	}
				112
				113	HttpPath::HttpPath(std::string_view path) {
				114	// special-case root path to be a single empty element
				115	if (path == "/") {
				116	m_pathEnds.emplace_back(0);
				117	return;
				118	}
				119	wpi::SmallVector<std::string_view, 16> pathElems;
				120	split(path, pathElems, '/', 100, false);
				121	for (auto elem : pathElems) {
				122	SmallString<64> buf;
				123	bool err = false;
				124	auto val = wpi::UnescapeURI(elem, buf, &err);
				125	if (err) {
				126	m_pathEnds.clear();
				127	return;
				128	}
				129	m_pathBuf += val;
				130	m_pathEnds.emplace_back(m_pathBuf.size());
				131	}
				132	}
				133
				134	bool HttpPath::startswith(size_t start,
				135	std::span<const std::string_view> match) const {
				136	if (m_pathEnds.size() < (start + match.size())) {
				137	return false;
				138	}
				139	bool first = start == 0;
				140	auto p = m_pathEnds.begin() + start;
				141	for (auto m : match) {
				142	auto val = slice(m_pathBuf, first ? 0 : (p - 1), p);
				143	if (val != m) {
				144	return false;
				145	}
				146	first = false;
				147	++p;
				148	}
				149	return true;
				150	}
				151
				152	std::string_view HttpPath::operator[](size_t n) const {
				153	return slice(m_pathBuf, n == 0 ? 0 : m_pathEnds[n - 1], m_pathEnds[n]);
				154	}
				155
				156	bool ParseHttpHeaders(raw_istream& is, SmallVectorImpl<char>* contentType,
				157	SmallVectorImpl<char>* contentLength) {
				158	if (contentType) {
				159	contentType->clear();
				160	}
				161	if (contentLength) {
				162	contentLength->clear();
				163	}
				164
				165	bool inContentType = false;
				166	bool inContentLength = false;
				167	SmallString<64> lineBuf;
				168	for (;;) {
				169	std::string_view line = rtrim(is.getline(lineBuf, 1024));
				170	if (is.has_error()) {
				171	return false;
				172	}
				173	if (line.empty()) {
				174	return true; // empty line signals end of headers
				175	}
				176
				177	// header fields start at the beginning of the line
				178	if (!std::isspace(line[0])) {
				179	inContentType = false;
				180	inContentLength = false;
				181	std::string_view field;
				182	std::tie(field, line) = split(line, ':');
				183	field = rtrim(field);
				184	if (equals_lower(field, "content-type")) {
				185	inContentType = true;
				186	} else if (equals_lower(field, "content-length")) {
				187	inContentLength = true;
				188	} else {
				189	continue; // ignore other fields
				190	}
				191	}
				192
				193	// collapse whitespace
				194	line = ltrim(line);
				195
				196	// save field data
				197	if (inContentType && contentType) {
				198	contentType->append(line.begin(), line.end());
				199	} else if (inContentLength && contentLength) {
				200	contentLength->append(line.begin(), line.end());
				201	}
				202	}
				203	}
				204
				205	bool FindMultipartBoundary(raw_istream& is, std::string_view boundary,
				206	std::string* saveBuf) {
				207	SmallString<64> searchBuf;
				208	searchBuf.resize(boundary.size() + 2);
				209	size_t searchPos = 0;
				210
				211	// Per the spec, the --boundary should be preceded by \r\n, so do a first
				212	// pass of 1-byte reads to throw those away (common case) and keep the
				213	// last non-\r\n character in searchBuf.
				214	if (!saveBuf) {
				215	do {
				216	is.read(searchBuf.data(), 1);
				217	if (is.has_error()) {
				218	return false;
				219	}
				220	} while (searchBuf[0] == '\r' \|\| searchBuf[0] == '\n');
				221	searchPos = 1;
				222	}
				223
				224	// Look for --boundary. Read boundarysize+2 bytes at a time
				225	// during the search to speed up the reads, then fast-scan for -,
				226	// and only then match the entire boundary. This will be slow if
				227	// there's a bunch of continuous -'s in the output, but that's unlikely.
				228	for (;;) {
				229	is.read(searchBuf.data() + searchPos, searchBuf.size() - searchPos);
				230	if (is.has_error()) {
				231	return false;
				232	}
				233
				234	// Did we find the boundary?
				235	if (searchBuf[0] == '-' && searchBuf[1] == '-' &&
				236	wpi::substr(searchBuf, 2) == boundary) {
				237	return true;
				238	}
				239
				240	// Fast-scan for '-'
				241	size_t pos = searchBuf.find('-', searchBuf[0] == '-' ? 1 : 0);
				242	if (pos == std::string_view::npos) {
				243	if (saveBuf) {
				244	saveBuf->append(searchBuf.data(), searchBuf.size());
				245	}
				246	} else {
				247	if (saveBuf) {
				248	saveBuf->append(searchBuf.data(), pos);
				249	}
				250
				251	// move '-' and following to start of buffer (next read will fill)
				252	std::memmove(searchBuf.data(), searchBuf.data() + pos,
				253	searchBuf.size() - pos);
				254	searchPos = searchBuf.size() - pos;
				255	}
				256	}
				257	}
				258
				259	HttpLocation::HttpLocation(std::string_view url_, bool* error,
				260	std::string* errorMsg)
				261	: url{url_} {
				262	// Split apart into components
				263	std::string_view query{url};
				264
				265	// scheme:
				266	std::string_view scheme;
				267	std::tie(scheme, query) = split(query, ':');
				268	if (!equals_lower(scheme, "http")) {
				269	*errorMsg = "only supports http URLs";
				270	*error = true;
				271	return;
				272	}
				273
				274	// "//"
				275	if (!starts_with(query, "//")) {
				276	*errorMsg = "expected http://...";
				277	*error = true;
				278	return;
				279	}
				280	query.remove_prefix(2);
				281
				282	// user:password@host:port/
				283	std::string_view authority;
				284	std::tie(authority, query) = split(query, '/');
				285
				286	auto [userpass, hostport] = split(authority, '@');
				287	// split leaves the RHS empty if the split char isn't present...
				288	if (hostport.empty()) {
				289	hostport = userpass;
				290	userpass = {};
				291	}
				292
				293	if (!userpass.empty()) {
				294	auto [rawUser, rawPassword] = split(userpass, ':');
				295	SmallString<64> userBuf, passBuf;
				296	user = UnescapeURI(rawUser, userBuf, error);
				297	if (*error) {
				298	*errorMsg = fmt::format("could not unescape user \"{}\"", rawUser);
				299	return;
				300	}
				301	password = UnescapeURI(rawPassword, passBuf, error);
				302	if (*error) {
				303	*errorMsg =
				304	fmt::format("could not unescape password \"{}\"", rawPassword);
				305	return;
				306	}
				307	}
				308
				309	std::string_view portStr;
				310	std::tie(host, portStr) = rsplit(hostport, ':');
				311	if (host.empty()) {
				312	*errorMsg = "host is empty";
				313	*error = true;
				314	return;
				315	}
				316	if (portStr.empty()) {
				317	port = 80;
				318	} else if (auto p = parse_integer<int>(portStr, 10)) {
				319	port = p.value();
				320	} else {
				321	*errorMsg = fmt::format("port \"{}\" is not an integer", portStr);
				322	*error = true;
				323	return;
				324	}
				325
				326	// path?query#fragment
				327	std::tie(query, fragment) = split(query, '#');
				328	std::tie(path, query) = split(query, '?');
				329
				330	// Split query string into parameters
				331	while (!query.empty()) {
				332	// split out next param and value
				333	std::string_view rawParam, rawValue;
				334	std::tie(rawParam, query) = split(query, '&');
				335	if (rawParam.empty()) {
				336	continue; // ignore "&&"
				337	}
				338	std::tie(rawParam, rawValue) = split(rawParam, '=');
				339
				340	// unescape param
				341	*error = false;
				342	SmallString<64> paramBuf;
				343	std::string_view param = UnescapeURI(rawParam, paramBuf, error);
				344	if (*error) {
				345	*errorMsg = fmt::format("could not unescape parameter \"{}\"", rawParam);
				346	return;
				347	}
				348
				349	// unescape value
				350	SmallString<64> valueBuf;
				351	std::string_view value = UnescapeURI(rawValue, valueBuf, error);
				352	if (*error) {
				353	*errorMsg = fmt::format("could not unescape value \"{}\"", rawValue);
				354	return;
				355	}
				356
				357	params.emplace_back(std::make_pair(param, value));
				358	}
				359
				360	*error = false;
				361	}
				362
				363	void HttpRequest::SetAuth(const HttpLocation& loc) {
				364	if (!loc.user.empty()) {
				365	SmallString<64> userpass;
				366	userpass += loc.user;
				367	userpass += ':';
				368	userpass += loc.password;
				369	Base64Encode(userpass.str(), &auth);
				370	}
				371	}
				372
				373	bool HttpConnection::Handshake(const HttpRequest& request,
				374	std::string* warnMsg) {
				375	// send GET request
				376	os << "GET /" << request.path << " HTTP/1.1\r\n";
				377	os << "Host: " << request.host << "\r\n";
				378	if (!request.auth.empty()) {
				379	os << "Authorization: Basic " << request.auth << "\r\n";
				380	}
				381	os << "\r\n";
				382	os.flush();
				383
				384	// read first line of response
				385	SmallString<64> lineBuf;
				386	std::string_view line = rtrim(is.getline(lineBuf, 1024));
				387	if (is.has_error()) {
				388	*warnMsg = "disconnected before response";
				389	return false;
				390	}
				391
				392	// see if we got a HTTP 200 response
				393	std::string_view httpver, code, codeText;
				394	std::tie(httpver, line) = split(line, ' ');
				395	std::tie(code, codeText) = split(line, ' ');
				396	if (!starts_with(httpver, "HTTP")) {
				397	*warnMsg = "did not receive HTTP response";
				398	return false;
				399	}
				400	if (code != "200") {
				401	*warnMsg = fmt::format("received {} {} response", code, codeText);
				402	return false;
				403	}
				404
				405	// Parse headers
				406	if (!ParseHttpHeaders(is, &contentType, &contentLength)) {
				407	*warnMsg = "disconnected during headers";
				408	return false;
				409	}
				410
				411	return true;
				412	}
				413
				414	void HttpMultipartScanner::SetBoundary(std::string_view boundary) {
				415	m_boundaryWith = "\n--";
				416	m_boundaryWith += boundary;
				417	m_boundaryWithout = "\n";
				418	m_boundaryWithout += boundary;
				419	m_dashes = kUnknown;
				420	}
				421
				422	void HttpMultipartScanner::Reset(bool saveSkipped) {
				423	m_saveSkipped = saveSkipped;
				424	m_state = kBoundary;
				425	m_posWith = 0;
				426	m_posWithout = 0;
				427	m_buf.resize(0);
				428	}
				429
				430	std::string_view HttpMultipartScanner::Execute(std::string_view in) {
				431	if (m_state == kDone) {
				432	Reset(m_saveSkipped);
				433	}
				434	if (m_saveSkipped) {
				435	m_buf += in;
				436	}
				437
				438	size_t pos = 0;
				439	if (m_state == kBoundary) {
				440	for (char ch : in) {
				441	++pos;
				442	if (m_dashes != kWithout) {
				443	if (ch == m_boundaryWith[m_posWith]) {
				444	++m_posWith;
				445	if (m_posWith == m_boundaryWith.size()) {
				446	// Found the boundary; transition to padding
				447	m_state = kPadding;
				448	m_dashes = kWith; // no longer accept plain 'boundary'
				449	break;
				450	}
				451	} else if (ch == m_boundaryWith[0]) {
				452	m_posWith = 1;
				453	} else {
				454	m_posWith = 0;
				455	}
				456	}
				457
				458	if (m_dashes != kWith) {
				459	if (ch == m_boundaryWithout[m_posWithout]) {
				460	++m_posWithout;
				461	if (m_posWithout == m_boundaryWithout.size()) {
				462	// Found the boundary; transition to padding
				463	m_state = kPadding;
				464	m_dashes = kWithout; // no longer accept '--boundary'
				465	break;
				466	}
				467	} else if (ch == m_boundaryWithout[0]) {
				468	m_posWithout = 1;
				469	} else {
				470	m_posWithout = 0;
				471	}
				472	}
				473	}
				474	}
				475
				476	if (m_state == kPadding) {
				477	for (char ch : drop_front(in, pos)) {
				478	++pos;
				479	if (ch == '\n') {
				480	// Found the LF; return remaining input buffer (following it)
				481	m_state = kDone;
				482	if (m_saveSkipped) {
				483	m_buf.resize(m_buf.size() - in.size() + pos);
				484	}
				485	return drop_front(in, pos);
				486	}
				487	}
				488	}
				489
				490	// We consumed the entire input
				491	return {};
				492	}
				493
				494	} // namespace wpi