Blame - wpiutil/src/main/native/cpp/HttpUtil.cpp - RealtimeRoboticsGroup/test

blob: 37fea6014d1e5c421de3944d968940923fb56dfb [file] [log] [blame]

Brian Silverman	41cdd3e	2019-01-19 19:48:58 -0800	[diff] [blame]	1	/----------------------------------------------------------------------------/
				2	/* Copyright (c) 2016-2018 FIRST. All Rights Reserved. */
				3	/* Open Source Software - may be modified and shared by FRC teams. The code */
				4	/* must be accompanied by the FIRST BSD license file in the root directory of */
				5	/* the project. */
				6	/----------------------------------------------------------------------------/
				7
				8	#include "wpi/HttpUtil.h"
				9
				10	#include <cctype>
				11
				12	#include "wpi/Base64.h"
				13	#include "wpi/STLExtras.h"
				14	#include "wpi/StringExtras.h"
				15	#include "wpi/TCPConnector.h"
				16	#include "wpi/raw_ostream.h"
				17
				18	namespace wpi {
				19
				20	StringRef UnescapeURI(const Twine& str, SmallVectorImpl<char>& buf,
				21	bool* error) {
				22	SmallString<128> strBuf;
				23	StringRef strStr = str.toStringRef(strBuf);
				24	buf.clear();
				25	for (auto i = strStr.begin(), end = strStr.end(); i != end; ++i) {
				26	// pass non-escaped characters to output
				27	if (*i != '%') {
				28	// decode + to space
				29	if (*i == '+')
				30	buf.push_back(' ');
				31	else
				32	buf.push_back(*i);
				33	continue;
				34	}
				35
				36	// are there enough characters left?
				37	if (i + 2 >= end) {
				38	*error = true;
				39	return StringRef{};
				40	}
				41
				42	// replace %xx with the corresponding character
				43	unsigned val1 = hexDigitValue(*++i);
				44	if (val1 == -1U) {
				45	*error = true;
				46	return StringRef{};
				47	}
				48	unsigned val2 = hexDigitValue(*++i);
				49	if (val2 == -1U) {
				50	*error = true;
				51	return StringRef{};
				52	}
				53	buf.push_back((val1 << 4) \| val2);
				54	}
				55
				56	*error = false;
				57	return StringRef{buf.data(), buf.size()};
				58	}
				59
				60	StringRef EscapeURI(const Twine& str, SmallVectorImpl<char>& buf,
				61	bool spacePlus) {
				62	static const char* const hexLut = "0123456789ABCDEF";
				63
				64	SmallString<128> strBuf;
				65	StringRef strStr = str.toStringRef(strBuf);
				66	buf.clear();
				67	for (auto i = strStr.begin(), end = strStr.end(); i != end; ++i) {
				68	// pass unreserved characters to output
				69	if (std::isalnum(i) \|\| i == '-' \|\| i == '_' \|\| i == '.' \|\| *i == '~') {
				70	buf.push_back(*i);
				71	continue;
				72	}
				73
				74	// encode space to +
				75	if (spacePlus && *i == ' ') {
				76	buf.push_back('+');
				77	continue;
				78	}
				79
				80	// convert others to %xx
				81	buf.push_back('%');
				82	buf.push_back(hexLut[((*i) >> 4) & 0x0f]);
				83	buf.push_back(hexLut[(*i) & 0x0f]);
				84	}
				85
				86	return StringRef{buf.data(), buf.size()};
				87	}
				88
				89	bool ParseHttpHeaders(raw_istream& is, SmallVectorImpl<char>* contentType,
				90	SmallVectorImpl<char>* contentLength) {
				91	if (contentType) contentType->clear();
				92	if (contentLength) contentLength->clear();
				93
				94	bool inContentType = false;
				95	bool inContentLength = false;
				96	SmallString<64> lineBuf;
				97	for (;;) {
				98	StringRef line = is.getline(lineBuf, 1024).rtrim();
				99	if (is.has_error()) return false;
				100	if (line.empty()) return true; // empty line signals end of headers
				101
				102	// header fields start at the beginning of the line
				103	if (!std::isspace(line[0])) {
				104	inContentType = false;
				105	inContentLength = false;
				106	StringRef field;
				107	std::tie(field, line) = line.split(':');
				108	field = field.rtrim();
				109	if (field.equals_lower("content-type"))
				110	inContentType = true;
				111	else if (field.equals_lower("content-length"))
				112	inContentLength = true;
				113	else
				114	continue; // ignore other fields
				115	}
				116
				117	// collapse whitespace
				118	line = line.ltrim();
				119
				120	// save field data
				121	if (inContentType && contentType)
				122	contentType->append(line.begin(), line.end());
				123	else if (inContentLength && contentLength)
				124	contentLength->append(line.begin(), line.end());
				125	}
				126	}
				127
				128	bool FindMultipartBoundary(raw_istream& is, StringRef boundary,
				129	std::string* saveBuf) {
				130	SmallString<64> searchBuf;
				131	searchBuf.resize(boundary.size() + 2);
				132	size_t searchPos = 0;
				133
				134	// Per the spec, the --boundary should be preceded by \r\n, so do a first
				135	// pass of 1-byte reads to throw those away (common case) and keep the
				136	// last non-\r\n character in searchBuf.
				137	if (!saveBuf) {
				138	do {
				139	is.read(searchBuf.data(), 1);
				140	if (is.has_error()) return false;
				141	} while (searchBuf[0] == '\r' \|\| searchBuf[0] == '\n');
				142	searchPos = 1;
				143	}
				144
				145	// Look for --boundary. Read boundarysize+2 bytes at a time
				146	// during the search to speed up the reads, then fast-scan for -,
				147	// and only then match the entire boundary. This will be slow if
				148	// there's a bunch of continuous -'s in the output, but that's unlikely.
				149	for (;;) {
				150	is.read(searchBuf.data() + searchPos, searchBuf.size() - searchPos);
				151	if (is.has_error()) return false;
				152
				153	// Did we find the boundary?
				154	if (searchBuf[0] == '-' && searchBuf[1] == '-' &&
				155	searchBuf.substr(2) == boundary)
				156	return true;
				157
				158	// Fast-scan for '-'
				159	size_t pos = searchBuf.find('-', searchBuf[0] == '-' ? 1 : 0);
				160	if (pos == StringRef::npos) {
				161	if (saveBuf) saveBuf->append(searchBuf.data(), searchBuf.size());
				162	} else {
				163	if (saveBuf) saveBuf->append(searchBuf.data(), pos);
				164
				165	// move '-' and following to start of buffer (next read will fill)
				166	std::memmove(searchBuf.data(), searchBuf.data() + pos,
				167	searchBuf.size() - pos);
				168	searchPos = searchBuf.size() - pos;
				169	}
				170	}
				171	}
				172
				173	HttpLocation::HttpLocation(const Twine& url_, bool* error,
				174	std::string* errorMsg)
				175	: url{url_.str()} {
				176	// Split apart into components
				177	StringRef query{url};
				178
				179	// scheme:
				180	StringRef scheme;
				181	std::tie(scheme, query) = query.split(':');
				182	if (!scheme.equals_lower("http")) {
				183	*errorMsg = "only supports http URLs";
				184	*error = true;
				185	return;
				186	}
				187
				188	// "//"
				189	if (!query.startswith("//")) {
				190	*errorMsg = "expected http://...";
				191	*error = true;
				192	return;
				193	}
				194	query = query.drop_front(2);
				195
				196	// user:password@host:port/
				197	StringRef authority;
				198	std::tie(authority, query) = query.split('/');
				199
				200	StringRef userpass, hostport;
				201	std::tie(userpass, hostport) = authority.split('@');
				202	// split leaves the RHS empty if the split char isn't present...
				203	if (hostport.empty()) {
				204	hostport = userpass;
				205	userpass = StringRef{};
				206	}
				207
				208	if (!userpass.empty()) {
				209	StringRef rawUser, rawPassword;
				210	std::tie(rawUser, rawPassword) = userpass.split(':');
				211	SmallString<64> userBuf, passBuf;
				212	user = UnescapeURI(rawUser, userBuf, error);
				213	if (*error) {
				214	raw_string_ostream oss(*errorMsg);
				215	oss << "could not unescape user \"" << rawUser << "\"";
				216	oss.flush();
				217	return;
				218	}
				219	password = UnescapeURI(rawPassword, passBuf, error);
				220	if (*error) {
				221	raw_string_ostream oss(*errorMsg);
				222	oss << "could not unescape password \"" << rawPassword << "\"";
				223	oss.flush();
				224	return;
				225	}
				226	}
				227
				228	StringRef portStr;
				229	std::tie(host, portStr) = hostport.rsplit(':');
				230	if (host.empty()) {
				231	*errorMsg = "host is empty";
				232	*error = true;
				233	return;
				234	}
				235	if (portStr.empty()) {
				236	port = 80;
				237	} else if (portStr.getAsInteger(10, port)) {
				238	raw_string_ostream oss(*errorMsg);
				239	oss << "port \"" << portStr << "\" is not an integer";
				240	oss.flush();
				241	*error = true;
				242	return;
				243	}
				244
				245	// path?query#fragment
				246	std::tie(query, fragment) = query.split('#');
				247	std::tie(path, query) = query.split('?');
				248
				249	// Split query string into parameters
				250	while (!query.empty()) {
				251	// split out next param and value
				252	StringRef rawParam, rawValue;
				253	std::tie(rawParam, query) = query.split('&');
				254	if (rawParam.empty()) continue; // ignore "&&"
				255	std::tie(rawParam, rawValue) = rawParam.split('=');
				256
				257	// unescape param
				258	*error = false;
				259	SmallString<64> paramBuf;
				260	StringRef param = UnescapeURI(rawParam, paramBuf, error);
				261	if (*error) {
				262	raw_string_ostream oss(*errorMsg);
				263	oss << "could not unescape parameter \"" << rawParam << "\"";
				264	oss.flush();
				265	return;
				266	}
				267
				268	// unescape value
				269	SmallString<64> valueBuf;
				270	StringRef value = UnescapeURI(rawValue, valueBuf, error);
				271	if (*error) {
				272	raw_string_ostream oss(*errorMsg);
				273	oss << "could not unescape value \"" << rawValue << "\"";
				274	oss.flush();
				275	return;
				276	}
				277
				278	params.emplace_back(std::make_pair(param, value));
				279	}
				280
				281	*error = false;
				282	}
				283
				284	void HttpRequest::SetAuth(const HttpLocation& loc) {
				285	if (!loc.user.empty()) {
				286	SmallString<64> userpass;
				287	userpass += loc.user;
				288	userpass += ':';
				289	userpass += loc.password;
				290	Base64Encode(userpass, &auth);
				291	}
				292	}
				293
				294	bool HttpConnection::Handshake(const HttpRequest& request,
				295	std::string* warnMsg) {
				296	// send GET request
				297	os << "GET /" << request.path << " HTTP/1.1\r\n";
				298	os << "Host: " << request.host << "\r\n";
				299	if (!request.auth.empty())
				300	os << "Authorization: Basic " << request.auth << "\r\n";
				301	os << "\r\n";
				302	os.flush();
				303
				304	// read first line of response
				305	SmallString<64> lineBuf;
				306	StringRef line = is.getline(lineBuf, 1024).rtrim();
				307	if (is.has_error()) {
				308	*warnMsg = "disconnected before response";
				309	return false;
				310	}
				311
				312	// see if we got a HTTP 200 response
				313	StringRef httpver, code, codeText;
				314	std::tie(httpver, line) = line.split(' ');
				315	std::tie(code, codeText) = line.split(' ');
				316	if (!httpver.startswith("HTTP")) {
				317	*warnMsg = "did not receive HTTP response";
				318	return false;
				319	}
				320	if (code != "200") {
				321	raw_string_ostream oss(*warnMsg);
				322	oss << "received " << code << " " << codeText << " response";
				323	oss.flush();
				324	return false;
				325	}
				326
				327	// Parse headers
				328	if (!ParseHttpHeaders(is, &contentType, &contentLength)) {
				329	*warnMsg = "disconnected during headers";
				330	return false;
				331	}
				332
				333	return true;
				334	}
				335
				336	void HttpMultipartScanner::SetBoundary(StringRef boundary) {
				337	m_boundaryWith = "\n--";
				338	m_boundaryWith += boundary;
				339	m_boundaryWithout = "\n";
				340	m_boundaryWithout += boundary;
				341	m_dashes = kUnknown;
				342	}
				343
				344	void HttpMultipartScanner::Reset(bool saveSkipped) {
				345	m_saveSkipped = saveSkipped;
				346	m_state = kBoundary;
				347	m_posWith = 0;
				348	m_posWithout = 0;
				349	m_buf.resize(0);
				350	}
				351
				352	StringRef HttpMultipartScanner::Execute(StringRef in) {
				353	if (m_state == kDone) Reset(m_saveSkipped);
				354	if (m_saveSkipped) m_buf += in;
				355
				356	size_t pos = 0;
				357	if (m_state == kBoundary) {
				358	for (char ch : in) {
				359	++pos;
				360	if (m_dashes != kWithout) {
				361	if (ch == m_boundaryWith[m_posWith]) {
				362	++m_posWith;
				363	if (m_posWith == m_boundaryWith.size()) {
				364	// Found the boundary; transition to padding
				365	m_state = kPadding;
				366	m_dashes = kWith; // no longer accept plain 'boundary'
				367	break;
				368	}
				369	} else if (ch == m_boundaryWith[0]) {
				370	m_posWith = 1;
				371	} else {
				372	m_posWith = 0;
				373	}
				374	}
				375
				376	if (m_dashes != kWith) {
				377	if (ch == m_boundaryWithout[m_posWithout]) {
				378	++m_posWithout;
				379	if (m_posWithout == m_boundaryWithout.size()) {
				380	// Found the boundary; transition to padding
				381	m_state = kPadding;
				382	m_dashes = kWithout; // no longer accept '--boundary'
				383	break;
				384	}
				385	} else if (ch == m_boundaryWithout[0]) {
				386	m_posWithout = 1;
				387	} else {
				388	m_posWithout = 0;
				389	}
				390	}
				391	}
				392	}
				393
				394	if (m_state == kPadding) {
				395	for (char ch : in.drop_front(pos)) {
				396	++pos;
				397	if (ch == '\n') {
				398	// Found the LF; return remaining input buffer (following it)
				399	m_state = kDone;
				400	if (m_saveSkipped) m_buf.resize(m_buf.size() - in.size() + pos);
				401	return in.drop_front(pos);
				402	}
				403	}
				404	}
				405
				406	// We consumed the entire input
				407	return StringRef{};
				408	}
				409
				410	} // namespace wpi