HTTP Request, Response and URL Parser
https://cinc.rud.is/web/packages/construe/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
387 lines
8.8 KiB
387 lines
8.8 KiB
/*
|
|
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
|
|
* License: MIT
|
|
*/
|
|
|
|
#ifndef HTTPPARSER_URLPARSER_H
|
|
#define HTTPPARSER_URLPARSER_H
|
|
|
|
#include <string>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <assert.h>
|
|
|
|
namespace httpparser
|
|
{
|
|
|
|
class UrlParser
|
|
{
|
|
public:
|
|
UrlParser()
|
|
: valid(false)
|
|
{
|
|
}
|
|
|
|
explicit UrlParser(const std::string &url)
|
|
: valid(true)
|
|
{
|
|
parse(url);
|
|
}
|
|
|
|
bool parse(const std::string &str)
|
|
{
|
|
url = Url();
|
|
parse_(str);
|
|
|
|
return isValid();
|
|
}
|
|
|
|
bool isValid() const
|
|
{
|
|
return valid;
|
|
}
|
|
|
|
std::string scheme() const
|
|
{
|
|
assert( isValid() );
|
|
return url.scheme;
|
|
}
|
|
|
|
std::string username() const
|
|
{
|
|
assert( isValid() );
|
|
return url.username;
|
|
}
|
|
|
|
std::string password() const
|
|
{
|
|
assert( isValid() );
|
|
return url.password;
|
|
}
|
|
|
|
std::string hostname() const
|
|
{
|
|
assert( isValid() );
|
|
return url.hostname;
|
|
}
|
|
|
|
std::string port() const
|
|
{
|
|
assert( isValid() );
|
|
return url.port;
|
|
}
|
|
|
|
std::string path() const
|
|
{
|
|
assert( isValid() );
|
|
return url.path;
|
|
}
|
|
|
|
std::string query() const
|
|
{
|
|
assert( isValid() );
|
|
return url.query;
|
|
}
|
|
|
|
std::string fragment() const
|
|
{
|
|
assert( isValid() );
|
|
return url.fragment;
|
|
}
|
|
|
|
uint16_t httpPort() const
|
|
{
|
|
const uint16_t defaultHttpPort = 80;
|
|
const uint16_t defaultHttpsPort = 443;
|
|
|
|
assert( isValid() );
|
|
|
|
if( url.port.empty() )
|
|
{
|
|
if( scheme() == "https" )
|
|
return defaultHttpsPort;
|
|
else
|
|
return defaultHttpPort;
|
|
}
|
|
else
|
|
{
|
|
return url.integerPort;
|
|
}
|
|
}
|
|
|
|
struct Url
|
|
{
|
|
Url() : integerPort(0)
|
|
{}
|
|
|
|
std::string scheme;
|
|
std::string username;
|
|
std::string password;
|
|
std::string hostname;
|
|
std::string port;
|
|
std::string path;
|
|
std::string query;
|
|
std::string fragment;
|
|
uint16_t integerPort;
|
|
} url;
|
|
|
|
private:
|
|
bool isUnreserved(char ch) const
|
|
{
|
|
if( isalnum(ch) )
|
|
return true;
|
|
|
|
switch(ch)
|
|
{
|
|
case '-':
|
|
case '.':
|
|
case '_':
|
|
case '~':
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void parse_(const std::string &str)
|
|
{
|
|
enum {
|
|
Scheme,
|
|
SlashAfterScheme1,
|
|
SlashAfterScheme2,
|
|
UsernameOrHostname,
|
|
Password,
|
|
Hostname,
|
|
IPV6Hostname,
|
|
PortOrPassword,
|
|
Port,
|
|
Path,
|
|
Query,
|
|
Fragment
|
|
} state = Scheme;
|
|
|
|
std::string usernameOrHostname;
|
|
std::string portOrPassword;
|
|
|
|
valid = true;
|
|
url.path = "/";
|
|
url.integerPort = 0;
|
|
|
|
for(size_t i = 0; i < str.size() && valid; ++i)
|
|
{
|
|
char ch = str[i];
|
|
|
|
switch(state)
|
|
{
|
|
case Scheme:
|
|
if( isalnum(ch) || ch == '+' || ch == '-' || ch == '.')
|
|
{
|
|
url.scheme += ch;
|
|
}
|
|
else if( ch == ':' )
|
|
{
|
|
state = SlashAfterScheme1;
|
|
}
|
|
else
|
|
{
|
|
valid = false;
|
|
url = Url();
|
|
}
|
|
break;
|
|
case SlashAfterScheme1:
|
|
if( ch == '/' )
|
|
{
|
|
state = SlashAfterScheme2;
|
|
}
|
|
else if( isalnum(ch) )
|
|
{
|
|
usernameOrHostname = ch;
|
|
state = UsernameOrHostname;
|
|
}
|
|
else
|
|
{
|
|
valid = false;
|
|
url = Url();
|
|
}
|
|
break;
|
|
case SlashAfterScheme2:
|
|
if( ch == '/' )
|
|
{
|
|
state = UsernameOrHostname;
|
|
}
|
|
else
|
|
{
|
|
valid = false;
|
|
url = Url();
|
|
}
|
|
break;
|
|
case UsernameOrHostname:
|
|
if( isUnreserved(ch) || ch == '%' )
|
|
{
|
|
usernameOrHostname += ch;
|
|
}
|
|
else if( ch == ':' )
|
|
{
|
|
state = PortOrPassword;
|
|
}
|
|
else if( ch == '@' )
|
|
{
|
|
state = Hostname;
|
|
std::swap(url.username, usernameOrHostname);
|
|
}
|
|
else if( ch == '/' )
|
|
{
|
|
state = Path;
|
|
std::swap(url.hostname, usernameOrHostname);
|
|
}
|
|
else
|
|
{
|
|
valid = false;
|
|
url = Url();
|
|
}
|
|
break;
|
|
case Password:
|
|
if( isalnum(ch) || ch == '%' )
|
|
{
|
|
url.password += ch;
|
|
}
|
|
else if( ch == '@' )
|
|
{
|
|
state = Hostname;
|
|
}
|
|
else
|
|
{
|
|
valid = false;
|
|
url = Url();
|
|
}
|
|
break;
|
|
case Hostname:
|
|
if( ch == '[' && url.hostname.empty() )
|
|
{
|
|
state = IPV6Hostname;
|
|
}
|
|
else if(isUnreserved(ch) || ch == '%')
|
|
{
|
|
url.hostname += ch;
|
|
}
|
|
else if(ch == ':')
|
|
{
|
|
state = Port;
|
|
}
|
|
else if(ch == '/')
|
|
{
|
|
state = Path;
|
|
}
|
|
else
|
|
{
|
|
valid = false;
|
|
url = Url();
|
|
}
|
|
break;
|
|
case IPV6Hostname:
|
|
break;
|
|
case PortOrPassword:
|
|
if( isdigit(ch) )
|
|
{
|
|
portOrPassword += ch;
|
|
}
|
|
else if( ch == '/' )
|
|
{
|
|
std::swap(url.hostname, usernameOrHostname);
|
|
std::swap(url.port, portOrPassword);
|
|
url.integerPort = atoi(url.port.c_str());
|
|
state = Path;
|
|
}
|
|
else if( isalnum(ch) || ch == '%' )
|
|
{
|
|
std::swap(url.username, usernameOrHostname);
|
|
std::swap(url.password, portOrPassword);
|
|
url.password += ch;
|
|
state = Password;
|
|
}
|
|
else
|
|
{
|
|
valid = false;
|
|
url = Url();
|
|
}
|
|
break;
|
|
case Port:
|
|
if( isdigit(ch) )
|
|
{
|
|
portOrPassword += ch;
|
|
}
|
|
else if(ch == '/')
|
|
{
|
|
std::swap(url.port, portOrPassword);
|
|
url.integerPort = atoi(url.port.c_str());
|
|
state = Path;
|
|
}
|
|
else
|
|
{
|
|
valid = false;
|
|
url = Url();
|
|
}
|
|
break;
|
|
case Path:
|
|
if( ch == '#')
|
|
{
|
|
state = Fragment;
|
|
}
|
|
else if( ch == '?' )
|
|
{
|
|
state = Query;
|
|
}
|
|
else
|
|
{
|
|
url.path += ch;
|
|
}
|
|
break;
|
|
case Query:
|
|
if( ch == '#')
|
|
{
|
|
state = Fragment;
|
|
}
|
|
else if( ch == '?' )
|
|
{
|
|
state = Query;
|
|
}
|
|
else
|
|
{
|
|
url.query += ch;
|
|
}
|
|
break;
|
|
case Fragment:
|
|
url.fragment += ch;
|
|
break;
|
|
}
|
|
}
|
|
|
|
assert(portOrPassword.empty());
|
|
|
|
if( !usernameOrHostname.empty() )
|
|
url.hostname = usernameOrHostname;
|
|
}
|
|
|
|
|
|
bool valid;
|
|
//
|
|
// struct Url
|
|
// {
|
|
// Url() : integerPort(0)
|
|
// {}
|
|
//
|
|
// std::string scheme;
|
|
// std::string username;
|
|
// std::string password;
|
|
// std::string hostname;
|
|
// std::string port;
|
|
// std::string path;
|
|
// std::string query;
|
|
// std::string fragment;
|
|
// uint16_t integerPort;
|
|
// } url;
|
|
};
|
|
|
|
} // namespace httpparser
|
|
|
|
#endif // HTTPPARSER_URLPARSER_H
|
|
|