Refactor URL parser.

This commit is contained in:
anonimal 2015-11-08 01:43:25 +00:00
parent 1e7d4eb711
commit 66db088761
4 changed files with 140 additions and 112 deletions

View file

@ -466,7 +466,7 @@ namespace client
bool success = false;
i2p::util::http::url u (m_Link);
i2p::data::IdentHash ident;
if (m_Book.GetIdentHash (u.host_, ident) && m_Book.getSharedLocalDestination())
if (m_Book.GetIdentHash (u.m_host, ident) && m_Book.getSharedLocalDestination())
{
std::condition_variable newDataReceived;
std::mutex newDataReceivedMutex;
@ -487,13 +487,13 @@ namespace client
{
std::stringstream request, response;
// standard header
request << i2p::util::http::httpHeader(u.path_, u.host_, "1.1");
request << i2p::util::http::httpHeader(u.m_path, u.m_host, "1.1");
if (m_Etag.length () > 0) // etag
request << i2p::util::http::IF_NONE_MATCH << ": \"" << m_Etag << "\"\r\n";
if (m_LastModified.length () > 0) // if modified since
request << i2p::util::http::IF_MODIFIED_SINCE << ": " << m_LastModified << "\r\n";
request << "\r\n"; // end of header
auto stream = m_Book.getSharedLocalDestination()->CreateStream (leaseSet, u.port_);
auto stream = m_Book.getSharedLocalDestination()->CreateStream (leaseSet, u.m_port);
stream->Send ((uint8_t *)request.str ().c_str (), request.str ().length ());
uint8_t buf[4096];
@ -569,10 +569,10 @@ namespace client
LogPrint (eLogWarning, "Addressbook HTTP response ", status);
}
else
LogPrint (eLogError, "Address ", u.host_, " not found");
LogPrint (eLogError, "Address ", u.m_host, " not found");
}
else
LogPrint (eLogError, "Can't resolve ", u.host_);
LogPrint (eLogError, "Can't resolve ", u.m_host);
LogPrint (eLogInfo, "Download complete ", success ? "Success" : "Failed");
m_Book.DownloadComplete (success);
}

View file

@ -122,7 +122,7 @@ namespace config {
int GetArg(const std::string& strArg, int nDefault)
{
if(mapArgs.count(strArg))
return stoi(mapArgs[strArg]);
return std::stoi(mapArgs[strArg]);
return nDefault;
}
@ -322,14 +322,14 @@ namespace http // also provides https
std::string httpsRequest (const std::string& address)
{
url u(address);
if (u.port_ == 80) u.port_ = 443;
i2p::data::TlsSession session (u.host_, u.port_);
if (u.m_port == 80) u.m_port = 443;
i2p::data::TlsSession session (u.m_host, u.m_port);
if (session.IsEstablished ())
{
// send request
std::stringstream ss;
ss << httpHeader(u.path_, u.host_, "1.1");
ss << httpHeader(u.m_path, u.m_host, "1.1");
session.Send ((uint8_t *)ss.str ().c_str (), ss.str ().length ());
// read response
@ -350,15 +350,15 @@ namespace http // also provides https
// please don't uncomment following line because it's not compatible with boost 1.46
// 1.46 is default boost for Ubuntu 12.04 LTS
//site.expires_from_now (boost::posix_time::seconds(30));
if(u.port_ == 80)
site.connect(u.host_, "http");
if(u.m_port == 80)
site.connect(u.m_host, "http");
else {
std::stringstream ss; ss << u.port_;
site.connect(u.host_, ss.str());
std::stringstream ss; ss << u.m_port;
site.connect(u.m_host, ss.str());
}
if(site) {
// User-Agent is needed to get the server list routerInfo files.
site << httpHeader(u.path_, u.host_, "1.1");
site << httpHeader(u.m_path, u.m_host, "1.1");
// read response and extract content
return GetHttpContent(site);
} else {
@ -439,7 +439,7 @@ namespace http // also provides https
std::stringstream ss;
// set header
ss << httpHeader(u.path_, u.host_, "1.0");
ss << httpHeader(u.m_path, u.m_host, "1.0");
site << ss.str();
// read response
@ -469,67 +469,85 @@ namespace http // also provides https
}
}
url::url(const std::string& url_s)
url::url(const std::string& url)
{
portstr_ = "80";
port_ = 80;
user_ = "";
pass_ = "";
m_portstr = "80";
m_port = 80;
m_user = "";
m_pass = "";
parse(url_s);
parse(url);
}
void url::parse(const std::string& url_s)
void url::parse(const std::string& url)
{
const std::string prot_end("://");
std::string::const_iterator prot_i = search(
url_s.begin(), url_s.end(), prot_end.begin(), prot_end.end()
);
protocol_.reserve(distance(url_s.begin(), prot_i));
// Make portocol lowercase
transform(
url_s.begin(), prot_i, back_inserter(protocol_), std::ptr_fun<int, int>(std::tolower)
);
if(prot_i == url_s.end())
return;
advance(prot_i, prot_end.length());
std::string::const_iterator path_i = find(prot_i, url_s.end(), '/');
host_.reserve(distance(prot_i, path_i));
// Make host lowerase
transform(prot_i, path_i, back_inserter(host_), std::ptr_fun<int, int>(std::tolower));
using namespace std;
// parse user/password
auto user_pass_i = find(host_.begin(), host_.end(), '@');
if(user_pass_i != host_.end()) {
std::string user_pass = std::string(host_.begin(), user_pass_i);
/**
* This is a hack since colons are a part of the URI scheme
* and slashes aren't always needed. See RFC 7595.
* */
const string prot_end("://");
// Separate scheme from authority
string::const_iterator prot_i = search(
url.begin(), url.end(), prot_end.begin(), prot_end.end()
);
// Prepare for lowercase result and transform to lowercase
m_protocol.reserve(distance(url.begin(), prot_i));
transform(
url.begin(), prot_i,
back_inserter(m_protocol), ptr_fun<int, int>(tolower)
);
// TODO: better error checking and handling
if(prot_i == url.end())
return;
// Move onto authority. We assume it's valid and don't bother checking.
advance(prot_i, prot_end.length());
string::const_iterator path_i = find(prot_i, url.end(), '/');
// Prepare for lowercase result and transform to lowercase
m_host.reserve(distance(prot_i, path_i));
transform(
prot_i, path_i,
back_inserter(m_host), ptr_fun<int, int>(tolower)
);
// Parse user/password, assuming it's valid input
auto user_pass_i = find(m_host.begin(), m_host.end(), '@');
if(user_pass_i != m_host.end()) {
string user_pass = string(m_host.begin(), user_pass_i);
auto pass_i = find(user_pass.begin(), user_pass.end(), ':');
if (pass_i != user_pass.end()) {
user_ = std::string(user_pass.begin(), pass_i);
pass_ = std::string(pass_i + 1, user_pass.end());
m_user = string(user_pass.begin(), pass_i);
m_pass = string(pass_i + 1, user_pass.end());
} else
user_ = user_pass;
m_user = user_pass;
host_.assign(user_pass_i + 1, host_.end());
m_host.assign(user_pass_i + 1, m_host.end());
}
// parse port
auto port_i = find(host_.begin(), host_.end(), ':');
if(port_i != host_.end()) {
portstr_ = std::string(port_i + 1, host_.end());
host_.assign(host_.begin(), port_i);
// Parse port, assuming it's valid input
auto port_i = find(m_host.begin(), m_host.end(), ':');
if(port_i != m_host.end()) {
m_portstr = string(port_i + 1, m_host.end());
m_host.assign(m_host.begin(), port_i);
try {
port_ = boost::lexical_cast<decltype(port_)>(portstr_);
} catch(const std::exception& e) {
port_ = 80;
m_port = boost::lexical_cast<decltype(m_port)>(m_portstr);
} catch(const exception& e) {
m_port = 80;
}
}
std::string::const_iterator query_i = find(path_i, url_s.end(), '?');
path_.assign(path_i, query_i);
if( query_i != url_s.end() )
// Parse query, assuming it's valid input
string::const_iterator query_i = find(path_i, url.end(), '?');
m_path.assign(path_i, query_i);
if( query_i != url.end() )
++query_i;
query_.assign(query_i, url_s.end());
m_query.assign(query_i, url.end());
}
std::string urlDecode(const std::string& data)

View file

@ -167,19 +167,29 @@ namespace util
/**
* Provides functionality for parsing URLs.
*/
struct url {
/**
* Parse a url given as a string.
class url {
/**
* The code for parse() was originally copied/pasted from
* https://stackoverflow.com/questions/2616011/easy-way-to-parse-a-url-in-c-cross-platform
*
* This function is a URI parser (not a URL parser) and is hack at best.
* See cpp-netlib for a better URI parsing implementation with Boost.
*
* Note: fragments are not parsed by this function (if they should
* ever be needed in the future).
*
* @param string url
*/
url(const std::string& url_s);
private:
void parse(const std::string& url_s);
void parse(const std::string& url);
public:
/**
* Parse a URI given as a string.
*/
url(const std::string& url);
public:
std::string protocol_, host_, path_, query_;
std::string portstr_;
unsigned int port_;
std::string user_;
std::string pass_;
std::string m_protocol, m_host, m_path, m_query, m_portstr;
unsigned int m_port;
std::string m_user, m_pass;
};
}

View file

@ -17,76 +17,76 @@ BOOST_AUTO_TEST_CASE(DecodeUrl)
}
BOOST_AUTO_TEST_CASE(ParseUrlProtocol)
{
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").protocol_, "http");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").protocol_, "http");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123").protocol_, "ftp");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").protocol_, "ssh");
BOOST_CHECK_EQUAL(url("").protocol_, "");
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_protocol, "http");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_protocol, "http");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123").m_protocol, "ftp");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_protocol, "ssh");
BOOST_CHECK_EQUAL(url("").m_protocol, "");
}
BOOST_AUTO_TEST_CASE(ParseUrlHost)
{
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").host_, "127.0.0.1");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").host_, "site.com");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123").host_, "localhost");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").host_, "localhost");
BOOST_CHECK_EQUAL(url("").host_, "");
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_host, "127.0.0.1");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_host, "site.com");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123").m_host, "localhost");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_host, "localhost");
BOOST_CHECK_EQUAL(url("").m_host, "");
}
BOOST_AUTO_TEST_CASE(ParseUrlPath)
{
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").path_, "/asdasd");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").path_, "/A/B");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").path_, "/A/B/C/D");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").path_, "");
BOOST_CHECK_EQUAL(url("").path_, "");
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_path, "/asdasd");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_path, "/A/B");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_path, "/A/B/C/D");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_path, "");
BOOST_CHECK_EQUAL(url("").m_path, "");
}
BOOST_AUTO_TEST_CASE(ParseUrlQuery)
{
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").query_, "qqqqqqqqqqqq");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").query_, "q");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").query_, "x=A");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").query_, "");
BOOST_CHECK_EQUAL(url("").query_, "");
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_query, "qqqqqqqqqqqq");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_query, "q");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_query, "x=A");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_query, "");
BOOST_CHECK_EQUAL(url("").m_query, "");
}
BOOST_AUTO_TEST_CASE(ParseUrlPortStr)
{
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").portstr_, "7070");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").portstr_, "err_port");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").portstr_, "123");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").portstr_, "123");
BOOST_CHECK_EQUAL(url("").portstr_, "80");
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_portstr, "7070");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_portstr, "err_port");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_portstr, "123");
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_portstr, "123");
BOOST_CHECK_EQUAL(url("").m_portstr, "80");
}
BOOST_AUTO_TEST_CASE(ParseUrlPort)
{
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").port_, 7070);
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").port_, 80);
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").port_, 123);
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").port_, 123);
BOOST_CHECK_EQUAL(url("").port_, 80);
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_port, 7070);
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_port, 80);
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_port, 123);
BOOST_CHECK_EQUAL(url("SSH://user:pass@localhost:123").m_port, 123);
BOOST_CHECK_EQUAL(url("").m_port, 80);
}
BOOST_AUTO_TEST_CASE(ParseUrlUser)
{
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").user_, "");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").user_, "user");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").user_, "user");
BOOST_CHECK_EQUAL(url("SSH://@localhost:123").user_, "");
BOOST_CHECK_EQUAL(url("SSH://user:@localhost:123").user_, "user");
BOOST_CHECK_EQUAL(url("").user_, "");
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_user, "");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_user, "user");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_user, "user");
BOOST_CHECK_EQUAL(url("SSH://@localhost:123").m_user, "");
BOOST_CHECK_EQUAL(url("SSH://user:@localhost:123").m_user, "user");
BOOST_CHECK_EQUAL(url("").m_user, "");
}
BOOST_AUTO_TEST_CASE(ParseUrlPassword)
{
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").pass_, "");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").pass_, "password");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").pass_, "");
BOOST_CHECK_EQUAL(url("SSH://@localhost:123").pass_, "");
BOOST_CHECK_EQUAL(url("SSH://:password@localhost:123").pass_, "password");
BOOST_CHECK_EQUAL(url("").pass_, "");
BOOST_CHECK_EQUAL(url("http://127.0.0.1:7070/asdasd?qqqqqqqqqqqq").m_pass, "");
BOOST_CHECK_EQUAL(url("http://user:password@site.com:err_port/A/B?q").m_pass, "password");
BOOST_CHECK_EQUAL(url("ftp://user@localhost:123/A/B/C/D?x=A").m_pass, "");
BOOST_CHECK_EQUAL(url("SSH://@localhost:123").m_pass, "");
BOOST_CHECK_EQUAL(url("SSH://:password@localhost:123").m_pass, "password");
BOOST_CHECK_EQUAL(url("").m_pass, "");
}
BOOST_AUTO_TEST_CASE(ParseHTTPRequestNoHeaders)