HTTP Request, Response and URL Parser https://cinc.rud.is/web/packages/construe/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

625 lines
18 KiB

/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_REQUESTPARSER_H
#define HTTPPARSER_REQUESTPARSER_H
#include <algorithm>
#include <string.h>
#include <stdlib.h>
#include "request.h"
namespace httpparser
{
class HttpRequestParser
{
public:
HttpRequestParser()
: state(RequestMethodStart), contentSize(0),
chunkSize(0), chunked(false)
{
}
enum ParseResult {
ParsingCompleted,
ParsingIncompleted,
ParsingError
};
ParseResult parse(Request &req, const unsigned char *begin, const unsigned char *end)
{
return consume(req, begin, end);
}
private:
static bool checkIfConnection(const Request::HeaderItem &item)
{
return strcasecmp(item.name.c_str(), "Connection") == 0;
}
ParseResult consume(Request &req, const unsigned char *begin, const unsigned char *end)
{
while( begin != end )
{
char input = *begin++;
switch (state)
{
case RequestMethodStart:
if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
state = RequestMethod;
req.method.push_back(input);
}
break;
case RequestMethod:
if( input == ' ' )
{
state = RequestUriStart;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
req.method.push_back(input);
}
break;
case RequestUriStart:
if( isControl(input) )
{
return ParsingError;
}
else
{
state = RequestUri;
req.uri.push_back(input);
}
break;
case RequestUri:
if( input == ' ' )
{
state = RequestHttpVersion_h;
}
else if (input == '\r')
{
req.versionMajor = 0;
req.versionMinor = 9;
return ParsingCompleted;
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
req.uri.push_back(input);
}
break;
case RequestHttpVersion_h:
if( input == 'H' )
{
state = RequestHttpVersion_ht;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_ht:
if( input == 'T' )
{
state = RequestHttpVersion_htt;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_htt:
if( input == 'T' )
{
state = RequestHttpVersion_http;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_http:
if( input == 'P' )
{
state = RequestHttpVersion_slash;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_slash:
if( input == '/' )
{
req.versionMajor = 0;
req.versionMinor = 0;
state = RequestHttpVersion_majorStart;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_majorStart:
if( isDigit(input) )
{
req.versionMajor = input - '0';
state = RequestHttpVersion_major;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_major:
if( input == '.' )
{
state = RequestHttpVersion_minorStart;
}
else if (isDigit(input))
{
req.versionMajor = req.versionMajor * 10 + input - '0';
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_minorStart:
if( isDigit(input) )
{
req.versionMinor = input - '0';
state = RequestHttpVersion_minor;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_minor:
if( input == '\r' )
{
state = ResponseHttpVersion_newLine;
}
else if( isDigit(input) )
{
req.versionMinor = req.versionMinor * 10 + input - '0';
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_newLine:
if( input == '\n' )
{
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case HeaderLineStart:
if( input == '\r' )
{
state = ExpectingNewline_3;
}
else if( !req.headers.empty() && (input == ' ' || input == '\t') )
{
state = HeaderLws;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
req.headers.push_back(Request::HeaderItem());
req.headers.back().name.reserve(16);
req.headers.back().value.reserve(16);
req.headers.back().name.push_back(input);
state = HeaderName;
}
break;
case HeaderLws:
if( input == '\r' )
{
state = ExpectingNewline_2;
}
else if( input == ' ' || input == '\t' )
{
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
state = HeaderValue;
req.headers.back().value.push_back(input);
}
break;
case HeaderName:
if( input == ':' )
{
state = SpaceBeforeHeaderValue;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
req.headers.back().name.push_back(input);
}
break;
case SpaceBeforeHeaderValue:
if( input == ' ' )
{
state = HeaderValue;
}
else
{
return ParsingError;
}
break;
case HeaderValue:
if( input == '\r' )
{
if( req.method == "POST" || req.method == "PUT" )
{
Request::HeaderItem &h = req.headers.back();
if( strcasecmp(h.name.c_str(), "Content-Length") == 0 )
{
contentSize = atoi(h.value.c_str());
req.content.reserve( contentSize );
}
else if( strcasecmp(h.name.c_str(), "Transfer-Encoding") == 0 )
{
if(strcasecmp(h.value.c_str(), "chunked") == 0)
chunked = true;
}
}
state = ExpectingNewline_2;
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
req.headers.back().value.push_back(input);
}
break;
case ExpectingNewline_2:
if( input == '\n' )
{
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case ExpectingNewline_3: {
std::vector<Request::HeaderItem>::iterator it = std::find_if(req.headers.begin(),
req.headers.end(),
checkIfConnection);
if( it != req.headers.end() )
{
if( strcasecmp(it->value.c_str(), "Keep-Alive") == 0 )
{
req.keepAlive = true;
}
else // == Close
{
req.keepAlive = false;
}
}
else
{
if( req.versionMajor > 1 || (req.versionMajor == 1 && req.versionMinor == 1) )
req.keepAlive = true;
}
if( chunked )
{
state = ChunkSize;
}
else if( contentSize == 0 )
{
if( input == '\n')
return ParsingCompleted;
else
return ParsingError;
}
else
{
state = Post;
}
break;
}
case Post:
--contentSize;
req.content.push_back( input );
if( contentSize == 0 )
{
return ParsingCompleted;
}
break;
case ChunkSize:
if( isalnum(input) )
{
chunkSizeStr.push_back(input);
}
else if( input == ';' )
{
state = ChunkExtensionName;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionName:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '=' )
{
state = ChunkExtensionValue;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine:
if( input == '\n' )
{
chunkSize = strtol(chunkSizeStr.c_str(), NULL, 16);
chunkSizeStr.clear();
req.content.reserve(req.content.size() + chunkSize);
if( chunkSize == 0 )
state = ChunkSizeNewLine_2;
else
state = ChunkData;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_2:
if( input == '\r' )
{
state = ChunkSizeNewLine_3;
}
else if( isalpha(input) )
{
state = ChunkTrailerName;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_3:
if( input == '\n' )
{
return ParsingCompleted;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerName:
if( isalnum(input) )
{
// skip
}
else if( input == ':' )
{
state = ChunkTrailerValue;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkData:
req.content.push_back(input);
if( --chunkSize == 0 )
{
state = ChunkDataNewLine_1;
}
break;
case ChunkDataNewLine_1:
if( input == '\r' )
{
state = ChunkDataNewLine_2;
}
else
{
return ParsingError;
}
break;
case ChunkDataNewLine_2:
if( input == '\n' )
{
state = ChunkSize;
}
else
{
return ParsingError;
}
break;
default:
return ParsingError;
}
}
return ParsingIncompleted;
}
// Check if a byte is an HTTP character.
inline bool isChar(int c)
{
return c >= 0 && c <= 127;
}
// Check if a byte is an HTTP control character.
inline bool isControl(int c)
{
return (c >= 0 && c <= 31) || (c == 127);
}
// Check if a byte is defined as an HTTP special character.
inline bool isSpecial(int c)
{
switch (c)
{
case '(': case ')': case '<': case '>': case '@':
case ',': case ';': case ':': case '\\': case '"':
case '/': case '[': case ']': case '?': case '=':
case '{': case '}': case ' ': case '\t':
return true;
default:
return false;
}
}
// Check if a byte is a digit.
inline bool isDigit(int c)
{
return c >= '0' && c <= '9';
}
// The current state of the parser.
enum State
{
RequestMethodStart,
RequestMethod,
RequestUriStart,
RequestUri,
RequestHttpVersion_h,
RequestHttpVersion_ht,
RequestHttpVersion_htt,
RequestHttpVersion_http,
RequestHttpVersion_slash,
RequestHttpVersion_majorStart,
RequestHttpVersion_major,
RequestHttpVersion_minorStart,
RequestHttpVersion_minor,
ResponseStatusStart,
ResponseHttpVersion_ht,
ResponseHttpVersion_htt,
ResponseHttpVersion_http,
ResponseHttpVersion_slash,
ResponseHttpVersion_majorStart,
ResponseHttpVersion_major,
ResponseHttpVersion_minorStart,
ResponseHttpVersion_minor,
ResponseHttpVersion_spaceAfterVersion,
ResponseHttpVersion_statusCodeStart,
ResponseHttpVersion_spaceAfterStatusCode,
ResponseHttpVersion_statusTextStart,
ResponseHttpVersion_newLine,
HeaderLineStart,
HeaderLws,
HeaderName,
SpaceBeforeHeaderValue,
HeaderValue,
ExpectingNewline_2,
ExpectingNewline_3,
Post,
ChunkSize,
ChunkExtensionName,
ChunkExtensionValue,
ChunkSizeNewLine,
ChunkSizeNewLine_2,
ChunkSizeNewLine_3,
ChunkTrailerName,
ChunkTrailerValue,
ChunkDataNewLine_1,
ChunkDataNewLine_2,
ChunkData,
} state;
size_t contentSize;
std::string chunkSizeStr;
size_t chunkSize;
bool chunked;
};
} // namespace httpparser
#endif // LIBAHTTP_REQUESTPARSER_H