HTTP Request, Response and URL Parser https://cinc.rud.is/web/packages/construe/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

618 lines
18 KiB

/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_RESPONSEPARSER_H
#define HTTPPARSER_RESPONSEPARSER_H
#include <algorithm>
#include <string.h>
#include <stdlib.h>
#include "response.h"
namespace httpparser
{
class HttpResponseParser
{
public:
HttpResponseParser()
: state(ResponseStatusStart),
contentSize(0),
chunkSize(0),
chunked(false)
{
}
enum ParseResult {
ParsingCompleted,
ParsingIncompleted,
ParsingError
};
ParseResult parse(Response &resp, const unsigned char *begin, const unsigned char *end)
{
return consume(resp, begin, end);
}
private:
static bool checkIfConnection(const Response::HeaderItem &item)
{
return strcasecmp(item.name.c_str(), "Connection") == 0;
}
ParseResult consume(Response &resp, const unsigned char *begin, const unsigned char *end)
{
while( begin != end )
{
char input = *begin++;
switch (state)
{
case ResponseStatusStart:
if( input != 'H' )
{
return ParsingError;
}
else
{
state = ResponseHttpVersion_ht;
}
break;
case ResponseHttpVersion_ht:
if( input == 'T' )
{
state = ResponseHttpVersion_htt;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_htt:
if( input == 'T' )
{
state = ResponseHttpVersion_http;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_http:
if( input == 'P' )
{
state = ResponseHttpVersion_slash;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_slash:
if( input == '/' )
{
resp.versionMajor = 0;
resp.versionMinor = 0;
state = ResponseHttpVersion_majorStart;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_majorStart:
if( isDigit(input) )
{
resp.versionMajor = input - '0';
state = ResponseHttpVersion_major;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_major:
if( input == ' ' ) {
resp.versionMinor = 0;
state = ResponseHttpVersion_statusCodeStart;
} else if( input == '.' ) {
state = ResponseHttpVersion_minorStart;
} else if( isDigit(input) ) {
resp.versionMajor = resp.versionMajor * 10 + input - '0';
} else {
return ParsingError;
}
break;
case ResponseHttpVersion_minorStart:
if( input == ' ' ) {
resp.versionMinor = 0;
state = ResponseHttpVersion_statusCodeStart;
} else if( isDigit(input) ) {
resp.versionMinor = input - '0';
state = ResponseHttpVersion_minor;
} else{
return ParsingError;
}
break;
case ResponseHttpVersion_minor:
if( input == ' ') {
state = ResponseHttpVersion_statusCodeStart;
resp.versionMinor = 0;
}
else if( isDigit(input) )
{
resp.versionMinor = resp.versionMinor * 10 + input - '0';
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_statusCodeStart:
// printf("ResponseHttpVersion_statusCodeStart\n\n");
if( isDigit(input) )
{
// printf(" - digit - ResponseHttpVersion_statusCodeStart\n\n");
resp.statusCode = input - '0';
state = ResponseHttpVersion_statusCode;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_statusCode:
// printf("ResponseHttpVersion_statusCode\n\n");
if( isDigit(input) )
{
// printf(" - digit - ResponseHttpVersion_statusCode\n\n");
resp.statusCode = resp.statusCode * 10 + input - '0';
}
else
{
if( resp.statusCode < 100 || resp.statusCode > 999 ) {
return ParsingError;
} else if( input == ' ' ) {
// printf(" - SPACE - ResponseHttpVersion_statusCode\n\n");
state = ResponseHttpVersion_statusTextStart;
} else if( input == '\r' ) {
// printf(" - CR - ResponseHttpVersion_statusCode\n\n");
resp.status = "";
state = ResponseHttpVersion_newLine;
} else {
return ParsingError;
}
}
break;
case ResponseHttpVersion_statusTextStart:
// printf("ResponseHttpVersion_statusTextStart\n\n");
if( input == '\r' ) {
// printf(" - CR - ResponseHttpVersion_statusTextStart\n\n");
resp.status = "";
state = ResponseHttpVersion_newLine;
} else if( isChar(input) ) {
// printf(" - char - ResponseHttpVersion_statusTextStart\n\n");
resp.status += input;
state = ResponseHttpVersion_statusText;
} else {
return ParsingError;
}
break;
case ResponseHttpVersion_statusText:
if( input == '\r' )
{
state = ResponseHttpVersion_newLine;
}
else if( isChar(input) )
{
resp.status += input;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_newLine:
// printf("ResponseHttpVersion_newLine\n\n");
if( input == '\n' )
{
// printf(" - NL - ResponseHttpVersion_newLine\n\n");
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case HeaderLineStart:
// printf("HeaderLineStart\n\n");
if( input == '\r' )
{
state = ExpectingNewline_3;
}
else if( !resp.headers.empty() && (input == ' ' || input == '\t') )
{
state = HeaderLws;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
resp.headers.push_back(Response::HeaderItem());
resp.headers.back().name.reserve(16);
resp.headers.back().value.reserve(16);
resp.headers.back().name.push_back(input);
state = HeaderName;
}
break;
case HeaderLws:
if( input == '\r' )
{
state = ExpectingNewline_2;
}
else if( input == ' ' || input == '\t' )
{
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
state = HeaderValue;
resp.headers.back().value.push_back(input);
}
break;
case HeaderName:
if( input == ':' )
{
state = SpaceBeforeHeaderValue;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
resp.headers.back().name.push_back(input);
}
break;
case SpaceBeforeHeaderValue:
if( input == ' ' )
{
state = HeaderValue;
}
else
{
return ParsingError;
}
break;
case HeaderValue:
if( input == '\r' )
{
Response::HeaderItem &h = resp.headers.back();
if( strcasecmp(h.name.c_str(), "Content-Length") == 0 )
{
contentSize = atoi(h.value.c_str());
resp.content.reserve( contentSize );
}
else if( strcasecmp(h.name.c_str(), "Transfer-Encoding") == 0 )
{
if(strcasecmp(h.value.c_str(), "chunked") == 0)
chunked = true;
}
state = ExpectingNewline_2;
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
resp.headers.back().value.push_back(input);
}
break;
case ExpectingNewline_2:
if( input == '\n' )
{
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case ExpectingNewline_3: {
std::vector<Response::HeaderItem>::iterator it = std::find_if(resp.headers.begin(),
resp.headers.end(),
checkIfConnection);
if( it != resp.headers.end() )
{
if( strcasecmp(it->value.c_str(), "Keep-Alive") == 0 )
{
resp.keepAlive = true;
}
else // == Close
{
resp.keepAlive = false;
}
}
else
{
if( resp.versionMajor > 1 || (resp.versionMajor == 1 && resp.versionMinor == 1) )
resp.keepAlive = true;
}
if( chunked )
{
state = ChunkSize;
}
else if( contentSize == 0 )
{
if( input == '\n')
return ParsingCompleted;
else
return ParsingError;
}
else
{
state = Post;
}
break;
}
case Post:
--contentSize;
resp.content.push_back(input);
if( contentSize == 0 )
{
return ParsingCompleted;
}
break;
case ChunkSize:
if( isalnum(input) )
{
chunkSizeStr.push_back(input);
}
else if( input == ';' )
{
state = ChunkExtensionName;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionName:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '=' )
{
state = ChunkExtensionValue;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine:
if( input == '\n' )
{
chunkSize = strtol(chunkSizeStr.c_str(), NULL, 16);
chunkSizeStr.clear();
resp.content.reserve(resp.content.size() + chunkSize);
if( chunkSize == 0 )
state = ChunkSizeNewLine_2;
else
state = ChunkData;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_2:
if( input == '\r' )
{
state = ChunkSizeNewLine_3;
}
else if( isalpha(input) )
{
state = ChunkTrailerName;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_3:
if( input == '\n' )
{
return ParsingCompleted;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerName:
if( isalnum(input) )
{
// skip
}
else if( input == ':' )
{
state = ChunkTrailerValue;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkData:
resp.content.push_back(input);
if( --chunkSize == 0 )
{
state = ChunkDataNewLine_1;
}
break;
case ChunkDataNewLine_1:
if( input == '\r' )
{
state = ChunkDataNewLine_2;
}
else
{
return ParsingError;
}
break;
case ChunkDataNewLine_2:
if( input == '\n' )
{
state = ChunkSize;
}
else
{
return ParsingError;
}
break;
default:
return ParsingError;
}
}
return ParsingIncompleted;
}
// Check if a byte is an HTTP character.
inline bool isChar(int c)
{
return c >= 0 && c <= 127;
}
// Check if a byte is an HTTP control character.
inline bool isControl(int c)
{
return (c >= 0 && c <= 31) || (c == 127);
}
// Check if a byte is defined as an HTTP special character.
inline bool isSpecial(int c)
{
switch (c)
{
case '(': case ')': case '<': case '>': case '@':
case ',': case ';': case ':': case '\\': case '"':
case '/': case '[': case ']': case '?': case '=':
case '{': case '}': case ' ': case '\t':
return true;
default:
return false;
}
}
// Check if a byte is a digit.
inline bool isDigit(int c)
{
return c >= '0' && c <= '9';
}
// The current state of the parser.
enum State
{
ResponseStatusStart,
ResponseHttpVersion_ht,
ResponseHttpVersion_htt,
ResponseHttpVersion_http,
ResponseHttpVersion_slash,
ResponseHttpVersion_majorStart,
ResponseHttpVersion_major,
ResponseHttpVersion_minorStart,
ResponseHttpVersion_minor,
ResponseHttpVersion_statusCodeStart,
ResponseHttpVersion_statusCode,
ResponseHttpVersion_statusTextStart,
ResponseHttpVersion_statusText,
ResponseHttpVersion_newLine,
HeaderLineStart,
HeaderLws,
HeaderName,
SpaceBeforeHeaderValue,
HeaderValue,
ExpectingNewline_2,
ExpectingNewline_3,
Post,
ChunkSize,
ChunkExtensionName,
ChunkExtensionValue,
ChunkSizeNewLine,
ChunkSizeNewLine_2,
ChunkSizeNewLine_3,
ChunkTrailerName,
ChunkTrailerValue,
ChunkDataNewLine_1,
ChunkDataNewLine_2,
ChunkData,
} state;
size_t contentSize;
std::string chunkSizeStr;
size_t chunkSize;
bool chunked;
};
} // namespace httpparser
#endif // HTTPPARSER_RESPONSEPARSER_H