Refactor URL parser.

This commit is contained in:
anonimal 2015-11-08 01:43:25 +00:00
parent 1e7d4eb711
commit 66db088761
4 changed files with 140 additions and 112 deletions

View file

@ -466,7 +466,7 @@ namespace client
bool success = false; bool success = false;
i2p::util::http::url u (m_Link); i2p::util::http::url u (m_Link);
i2p::data::IdentHash ident; i2p::data::IdentHash ident;
if (m_Book.GetIdentHash (u.host_, ident) && m_Book.getSharedLocalDestination()) if (m_Book.GetIdentHash (u.m_host, ident) && m_Book.getSharedLocalDestination())
{ {
std::condition_variable newDataReceived; std::condition_variable newDataReceived;
std::mutex newDataReceivedMutex; std::mutex newDataReceivedMutex;
@ -487,13 +487,13 @@ namespace client
{ {
std::stringstream request, response; std::stringstream request, response;
// standard header // standard header
request << i2p::util::http::httpHeader(u.path_, u.host_, "1.1"); request << i2p::util::http::httpHeader(u.m_path, u.m_host, "1.1");
if (m_Etag.length () > 0) // etag if (m_Etag.length () > 0) // etag
request << i2p::util::http::IF_NONE_MATCH << ": \"" << m_Etag << "\"\r\n"; request << i2p::util::http::IF_NONE_MATCH << ": \"" << m_Etag << "\"\r\n";
if (m_LastModified.length () > 0) // if modified since if (m_LastModified.length () > 0) // if modified since
request << i2p::util::http::IF_MODIFIED_SINCE << ": " << m_LastModified << "\r\n"; request << i2p::util::http::IF_MODIFIED_SINCE << ": " << m_LastModified << "\r\n";
request << "\r\n"; // end of header request << "\r\n"; // end of header
auto stream = m_Book.getSharedLocalDestination()->CreateStream (leaseSet, u.port_); auto stream = m_Book.getSharedLocalDestination()->CreateStream (leaseSet, u.m_port);
stream->Send ((uint8_t *)request.str ().c_str (), request.str ().length ()); stream->Send ((uint8_t *)request.str ().c_str (), request.str ().length ());
uint8_t buf[4096]; uint8_t buf[4096];
@ -569,10 +569,10 @@ namespace client
LogPrint (eLogWarning, "Addressbook HTTP response ", status); LogPrint (eLogWarning, "Addressbook HTTP response ", status);
} }
else else
LogPrint (eLogError, "Address ", u.host_, " not found"); LogPrint (eLogError, "Address ", u.m_host, " not found");
} }
else else
LogPrint (eLogError, "Can't resolve ", u.host_); LogPrint (eLogError, "Can't resolve ", u.m_host);
LogPrint (eLogInfo, "Download complete ", success ? "Success" : "Failed"); LogPrint (eLogInfo, "Download complete ", success ? "Success" : "Failed");
m_Book.DownloadComplete (success); m_Book.DownloadComplete (success);
} }

View file

@ -122,7 +122,7 @@ namespace config {
int GetArg(const std::string& strArg, int nDefault) int GetArg(const std::string& strArg, int nDefault)
{ {
if(mapArgs.count(strArg)) if(mapArgs.count(strArg))
return stoi(mapArgs[strArg]); return std::stoi(mapArgs[strArg]);
return nDefault; return nDefault;
} }
@ -322,14 +322,14 @@ namespace http // also provides https
std::string httpsRequest (const std::string& address) std::string httpsRequest (const std::string& address)
{ {
url u(address); url u(address);
if (u.port_ == 80) u.port_ = 443; if (u.m_port == 80) u.m_port = 443;
i2p::data::TlsSession session (u.host_, u.port_); i2p::data::TlsSession session (u.m_host, u.m_port);
if (session.IsEstablished ()) if (session.IsEstablished ())
{ {
// send request // send request
std::stringstream ss; std::stringstream ss;
ss << httpHeader(u.path_, u.host_, "1.1"); ss << httpHeader(u.m_path, u.m_host, "1.1");
session.Send ((uint8_t *)ss.str ().c_str (), ss.str ().length ()); session.Send ((uint8_t *)ss.str ().c_str (), ss.str ().length ());
// read response // read response
@ -350,15 +350,15 @@ namespace http // also provides https
// please don't uncomment following line because it's not compatible with boost 1.46 // please don't uncomment following line because it's not compatible with boost 1.46
// 1.46 is default boost for Ubuntu 12.04 LTS // 1.46 is default boost for Ubuntu 12.04 LTS
//site.expires_from_now (boost::posix_time::seconds(30)); //site.expires_from_now (boost::posix_time::seconds(30));
if(u.port_ == 80) if(u.m_port == 80)
site.connect(u.host_, "http"); site.connect(u.m_host, "http");
else { else {
std::stringstream ss; ss << u.port_; std::stringstream ss; ss << u.m_port;
site.connect(u.host_, ss.str()); site.connect(u.m_host, ss.str());
} }
if(site) { if(site) {
// User-Agent is needed to get the server list routerInfo files. // User-Agent is needed to get the server list routerInfo files.
site << httpHeader(u.path_, u.host_, "1.1"); site << httpHeader(u.m_path, u.m_host, "1.1");
// read response and extract content // read response and extract content
return GetHttpContent(site); return GetHttpContent(site);
} else { } else {
@ -439,7 +439,7 @@ namespace http // also provides https
std::stringstream ss; std::stringstream ss;
// set header // set header
ss << httpHeader(u.path_, u.host_, "1.0"); ss << httpHeader(u.m_path, u.m_host, "1.0");
site << ss.str(); site << ss.str();
// read response // read response
@ -469,67 +469,85 @@ namespace http // also provides https
} }
} }
url::url(const std::string& url_s) url::url(const std::string& url)
{ {
portstr_ = "80"; m_portstr = "80";
port_ = 80; m_port = 80;
user_ = ""; m_user = "";
pass_ = ""; m_pass = "";
parse(url_s); parse(url);
} }
void url::parse(const std::string& url)
void url::parse(const std::string& url_s)
{ {
const std::string prot_end("://"); using namespace std;
std::string::const_iterator prot_i = search(
url_s.begin(), url_s.end(), prot_end.begin(), prot_end.end()
);
protocol_.reserve(distance(url_s.begin(), prot_i));
// Make portocol lowercase
transform(
url_s.begin(), prot_i, back_inserter(protocol_), std::ptr_fun<int, int>(std::tolower)
);
if(prot_i == url_s.end())
return;
advance(prot_i, prot_end.length());
std::string::const_iterator path_i = find(prot_i, url_s.end(), '/');
host_.reserve(distance(prot_i, path_i));
// Make host lowerase
transform(prot_i, path_i, back_inserter(host_), std::ptr_fun<int, int>(std::tolower));
// parse user/password /**
auto user_pass_i = find(host_.begin(), host_.end(), '@'); * This is a hack since colons are a part of the URI scheme
if(user_pass_i != host_.end()) { * and slashes aren't always needed. See RFC 7595.
std::string user_pass = std::string(host_.begin(), user_pass_i); * */
const string prot_end("://");
// Separate scheme from authority
string::const_iterator prot_i = search(
url.begin(), url.end(), prot_end.begin(), prot_end.end()
);
// Prepare for lowercase result and transform to lowercase
m_protocol.reserve(distance(url.begin(), prot_i));
transform(
url.begin(), prot_i,
back_inserter(m_protocol), ptr_fun<int, int>(tolower)
);
// TODO: better error checking and handling
if(prot_i == url.end())
return;
// Move onto authority. We assume it's valid and don't bother checking.
advance(prot_i, prot_end.length());
string::const_iterator path_i = find(prot_i, url.end(), '/');
// Prepare for lowercase result and transform to lowercase
m_host.reserve(distance(prot_i, path_i));
transform(
prot_i, path_i,
back_inserter(m_host), ptr_fun<int, int>(tolower)
);
// Parse user/password, assuming it's valid input
auto user_pass_i = find(m_host.begin(), m_host.end(), '@');
if(user_pass_i != m_host.end()) {
string user_pass = string(m_host.begin(), user_pass_i);
auto pass_i = find(user_pass.begin(), user_pass.end(), ':'); auto pass_i = find(user_pass.begin(), user_pass.end(), ':');
if (pass_i != user_pass.end()) { if (pass_i != user_pass.end()) {
user_ = std::string(user_pass.begin(), pass_i); m_user = string(user_pass.begin(), pass_i);
pass_ = std::string(pass_i + 1, user_pass.end()); m_pass = string(pass_i + 1, user_pass.end());
} else } else
user_ = user_pass; m_user = user_pass;
host_.assign(user_pass_i + 1, host_.end()); m_host.assign(user_pass_i + 1, m_host.end());
} }
// parse port // Parse port, assuming it's valid input
auto port_i = find(host_.begin(), host_.end(), ':'); auto port_i = find(m_host.begin(), m_host.end(), ':');
if(port_i != host_.end()) { if(port_i != m_host.end()) {
portstr_ = std::string(port_i + 1, host_.end()); m_portstr = string(port_i + 1, m_host.end());
host_.assign(host_.begin(), port_i); m_host.assign(m_host.begin(), port_i);
try { try {
port_ = boost::lexical_cast<decltype(port_)>(portstr_); m_port = boost::lexical_cast<decltype(m_port)>(m_portstr);
} catch(const std::exception& e) { } catch(const exception& e) {
port_ = 80; m_port = 80;
} }
} }
std::string::const_iterator query_i = find(path_i, url_s.end(), '?'); // Parse query, assuming it's valid input
path_.assign(path_i, query_i); string::const_iterator query_i = find(path_i, url.end(), '?');
if( query_i != url_s.end() ) m_path.assign(path_i, query_i);
if( query_i != url.end() )
++query_i; ++query_i;
query_.assign(query_i, url_s.end()); m_query.assign(query_i, url.end());
} }
std::string urlDecode(const std::string& data) std::string urlDecode(const std::string& data)

View file

@ -167,19 +167,29 @@ namespace util
/** /**
* Provides functionality for parsing URLs. * Provides functionality for parsing URLs.
*/ */
struct url { class url {
/** /**
* Parse a url given as a string. * The code for parse() was originally copied/pasted from
* https://stackoverflow.com/questions/2616011/easy-way-to-parse-a-url-in-c-cross-platform
*
* This function is a URI parser (not a URL parser) and is hack at best.
* See cpp-netlib for a better URI parsing implementation with Boost.
*
* Note: fragments are not parsed by this function (if they should
* ever be needed in the future).
*
* @param string url
*/ */
url(const std::string& url_s); void parse(const std::string& url);
private:
void parse(const std::string& url_s);
public: public:
std::string protocol_, host_, path_, query_; /**
std::string portstr_; * Parse a URI given as a string.
unsigned int port_; */
std::string user_; url(const std::string& url);
std::string pass_; public:
std::string m_protocol, m_host, m_path, m_query, m_portstr;
unsigned int m_port;
std::string m_user, m_pass;
}; };
} }

View file

@ -17,76 +17,76 @@ BOOST_AUTO_TEST_CASE(DecodeUrl)
} }
BOOST_AUTO_TEST_CASE(ParseUrlProtocol) BOOST_AUTO_TEST_CASE(ParseUrlProtocol)
{ {
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").protocol_, "http"); BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_protocol, "http");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").protocol_, "http"); BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_protocol, "http");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123").protocol_, "ftp"); BOOST_CHECK_EQUAL(url("ftp://user@localhost:123").m_protocol, "ftp");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").protocol_, "ssh"); BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_protocol, "ssh");
BOOST_CHECK_EQUAL(url("").protocol_, ""); BOOST_CHECK_EQUAL(url("").m_protocol, "");
} }
BOOST_AUTO_TEST_CASE(ParseUrlHost) BOOST_AUTO_TEST_CASE(ParseUrlHost)
{ {
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").host_, "127.0.0.1"); BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_host, "127.0.0.1");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").host_, "site.com"); BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_host, "site.com");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123").host_, "localhost"); BOOST_CHECK_EQUAL(url("ftp://user@localhost:123").m_host, "localhost");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").host_, "localhost"); BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_host, "localhost");
BOOST_CHECK_EQUAL(url("").host_, ""); BOOST_CHECK_EQUAL(url("").m_host, "");
} }
BOOST_AUTO_TEST_CASE(ParseUrlPath) BOOST_AUTO_TEST_CASE(ParseUrlPath)
{ {
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").path_, "/asdasd"); BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_path, "/asdasd");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").path_, "/A/B"); BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_path, "/A/B");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").path_, "/A/B/C/D"); BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_path, "/A/B/C/D");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").path_, ""); BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_path, "");
BOOST_CHECK_EQUAL(url("").path_, ""); BOOST_CHECK_EQUAL(url("").m_path, "");
} }
BOOST_AUTO_TEST_CASE(ParseUrlQuery) BOOST_AUTO_TEST_CASE(ParseUrlQuery)
{ {
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").query_, "qqqqqqqqqqqq"); BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_query, "qqqqqqqqqqqq");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").query_, "q"); BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_query, "q");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").query_, "x=A"); BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_query, "x=A");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").query_, ""); BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_query, "");
BOOST_CHECK_EQUAL(url("").query_, ""); BOOST_CHECK_EQUAL(url("").m_query, "");
} }
BOOST_AUTO_TEST_CASE(ParseUrlPortStr) BOOST_AUTO_TEST_CASE(ParseUrlPortStr)
{ {
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").portstr_, "7070"); BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_portstr, "7070");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").portstr_, "err_port"); BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_portstr, "err_port");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").portstr_, "123"); BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_portstr, "123");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").portstr_, "123"); BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_portstr, "123");
BOOST_CHECK_EQUAL(url("").portstr_, "80"); BOOST_CHECK_EQUAL(url("").m_portstr, "80");
} }
BOOST_AUTO_TEST_CASE(ParseUrlPort) BOOST_AUTO_TEST_CASE(ParseUrlPort)
{ {
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").port_, 7070); BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_port, 7070);
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").port_, 80); BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_port, 80);
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").port_, 123); BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_port, 123);
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").port_, 123); BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_port, 123);
BOOST_CHECK_EQUAL(url("").port_, 80); BOOST_CHECK_EQUAL(url("").m_port, 80);
} }
BOOST_AUTO_TEST_CASE(ParseUrlUser) BOOST_AUTO_TEST_CASE(ParseUrlUser)
{ {
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").user_, ""); BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_user, "");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").user_, "user"); BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_user, "user");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").user_, "user"); BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_user, "user");
BOOST_CHECK_EQUAL(url("SSH://@localhost:123").user_, ""); BOOST_CHECK_EQUAL(url("SSH://@localhost:123").m_user, "");
BOOST_CHECK_EQUAL(url("SSH://user:@localhost:123").user_, "user"); BOOST_CHECK_EQUAL(url("SSH://user:@localhost:123").m_user, "user");
BOOST_CHECK_EQUAL(url("").user_, ""); BOOST_CHECK_EQUAL(url("").m_user, "");
} }
BOOST_AUTO_TEST_CASE(ParseUrlPassword) BOOST_AUTO_TEST_CASE(ParseUrlPassword)
{ {
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").pass_, ""); BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_pass, "");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").pass_, "password"); BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_pass, "password");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").pass_, ""); BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_pass, "");
BOOST_CHECK_EQUAL(url("SSH://@localhost:123").pass_, ""); BOOST_CHECK_EQUAL(url("SSH://@localhost:123").m_pass, "");
BOOST_CHECK_EQUAL(url("SSH://:password@localhost:123").pass_, "password"); BOOST_CHECK_EQUAL(url("SSH://:password@localhost:123").m_pass, "password");
BOOST_CHECK_EQUAL(url("").pass_, ""); BOOST_CHECK_EQUAL(url("").m_pass, "");
} }
BOOST_AUTO_TEST_CASE(ParseHTTPRequestNoHeaders) BOOST_AUTO_TEST_CASE(ParseHTTPRequestNoHeaders)