Browse Source

initial commit

batman
boB Rudis 4 years ago
parent
commit
9e6f5989db
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 19
      DESCRIPTION
  2. 2
      LICENSE
  3. 21
      LICENSE.md
  4. 7
      NAMESPACE
  5. 27
      R/RcppExports.R
  6. 11
      R/construe-package.R
  7. 63
      README.Rmd
  8. 209
      README.md
  9. 51
      inst/tinytest/test_construe.R
  10. 5
      man/construe.Rd
  11. 14
      man/parse_request.Rd
  12. 14
      man/parse_response.Rd
  13. 14
      man/parse_url.Rd
  14. 3
      src/.gitignore
  15. 2
      src/Makevars
  16. 52
      src/RcppExports.cpp
  17. 185
      src/code.cpp
  18. 625
      src/httprequestparser.h
  19. 609
      src/httpresponseparser.h
  20. 57
      src/request.h
  21. 57
      src/response.h
  22. 387
      src/urlparser.h

19
DESCRIPTION

@ -1,24 +1,31 @@
Package: construe
Type: Package
Title: construe Title Goes Here Otherwise CRAN Checks Fail
Title: HTTP Request, Response and URL Parser
Version: 0.1.0
Date: 2020-08-28
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640"))
comment = c(ORCID = "0000-0001-5670-2640")),
person("Alex", "Nekipelov", email = "alex@nekipelov.net", role = "aut",
comment = "httpparser C++ library"),
person("Ícaro", "Dantas de Araújo Lima", role = "ctb",
comment = "httpparser C++ library")
)
Maintainer: Bob Rudis <bob@rud.is>
Description: A good description goes here otherwise CRAN checks fail.
Description: A dimple and fast HTTP request, response and URL parser based on the C++ 'httpparser' library
by Alex Nekipelov (<https://github.com/nekipelov/httpparser>).
URL: https://git.rud.is/hrbrmstr/construe
BugReports: https://git.rud.is/hrbrmstr/construe/issues
SystemRequirements: C++11
Encoding: UTF-8
License: AGPL
License: MIT + file LICENSE
Suggests:
covr, tinytest
Depends:
R (>= 3.6.0)
Imports:
httr,
jsonlite
Rcpp
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
LinkingTo:
Rcpp

2
LICENSE

@ -0,0 +1,2 @@
YEAR: 2020
COPYRIGHT HOLDER: Bob Rudis

21
LICENSE.md

@ -0,0 +1,21 @@
# MIT License
Copyright (c) 2020 Bob Rudis
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

7
NAMESPACE

@ -1,4 +1,7 @@
# Generated by roxygen2: do not edit by hand
import(httr)
importFrom(jsonlite,fromJSON)
export(parse_request)
export(parse_response)
export(parse_url)
importFrom(Rcpp,sourceCpp)
useDynLib(construe, .registration = TRUE)

27
R/RcppExports.R

@ -0,0 +1,27 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#' Parse an HTTP request
#'
#' @param req HTTP request character string
#' @export
parse_request <- function(req) {
.Call(`_construe_parse_request`, req)
}
#' Parse an HTTP response
#'
#' @param resp HTTP response character string
#' @export
parse_response <- function(resp) {
.Call(`_construe_parse_response`, resp)
}
#' Parse URLs
#'
#' @param urls character vector of URLs
#' @export
parse_url <- function(urls) {
.Call(`_construe_parse_url`, urls)
}

11
R/construe-package.R

@ -1,9 +1,12 @@
#' ...
#'
#' HTTP Request, Response and URL Parser
#'
#' A dimple and fast HTTP request, response and URL parser based on the C++ 'httpparser' library
#' by Alex Nekipelov (<https://github.com/nekipelov/httpparser>)
#'
#' @md
#' @name construe
#' @keywords internal
#' @author Bob Rudis (bob@@rud.is)
#' @import httr
#' @importFrom jsonlite fromJSON
#' @importFrom Rcpp sourceCpp
#' @useDynLib construe, .registration = TRUE
"_PACKAGE"

63
README.Rmd

@ -39,6 +39,69 @@ packageVersion("construe")
```
### Requests
```{r ex01}
paste0(c(
"GET /uri.cgi HTTP/1.1\r\n",
"User-Agent: Mozilla/5.0\r\n",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n",
"Host: 127.0.0.1\r\n", "\r\n"
), collapse = "") -> req
parse_request(req)
microbenchmark::microbenchmark(
parse_request = parse_request(req)
)
```
### Responses
```{r ex02}
paste0(c(
"HTTP/1.1 200 OK\r\n",
"Server: nginx/1.2.1\r\n",
"Content-Type: text/html\r\n",
"Content-Length: 8\r\n",
"Connection: keep-alive\r\n",
"\r\n",
"<html />"
), collapse = "") -> resp
parse_response(resp)
microbenchmark::microbenchmark(
parse_response = parse_response(resp)
)
```
### URLs
```{r ex03}
c(
"git+ssh://example.com/path/file",
"https://example.com/path/file",
"http://www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com",
"http://username@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"ftp://username:passwd@ftp.example.com/dir/filename.ext",
"mailto:username@example.com",
"svn+ssh://hostname-01.org/path/to/file",
"xddp::://///blah.wat/?"
) -> turls
parse_url(turls)
microbenchmark::microbenchmark(
parse_url = parse_url(turls[1])
)
```
## construe Metrics
```{r cloc, echo=FALSE}

209
README.md

@ -0,0 +1,209 @@
[![Project Status: Active – The project has reached a stable, usable
state and is being actively
developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
[![Signed
by](https://img.shields.io/badge/Keybase-Verified-brightgreen.svg)](https://keybase.io/hrbrmstr)
![Signed commit
%](https://img.shields.io/badge/Signed_Commits-100%25-lightgrey.svg)
[![Linux build
Status](https://travis-ci.org/hrbrmstr/construe.svg?branch=master)](https://travis-ci.org/hrbrmstr/construe)
![Minimal R
Version](https://img.shields.io/badge/R%3E%3D-3.6.0-blue.svg)
![License](https://img.shields.io/badge/License-MIT-blue.svg)
# construe
HTTP Request, Response and URL Parser
## Description
A dimple and fast HTTP request, response and URL parser based on the C++
‘httpparser’ library by Alex Nekipelov
(<https://github.com/nekipelov/httpparser>).
## What’s Inside The Tin
The following functions are implemented:
- `parse_request`: Parse an HTTP request
- `parse_response`: Parse an HTTP response
- `parse_url`: Parse URLs
## Installation
``` r
remotes::install_git("https://git.rud.is/hrbrmstr/construe.git")
# or
remotes::install_git("https://git.sr.ht/~hrbrmstr/construe")
# or
remotes::install_bitbucket("hrbrmstr/construe")
```
NOTE: To use the ‘remotes’ install options you will need to have the
[{remotes} package](https://github.com/r-lib/remotes) installed.
## Usage
``` r
library(construe)
# current version
packageVersion("construe")
## [1] '0.1.0'
```
### Requests
``` r
paste0(c(
"GET /uri.cgi HTTP/1.1\r\n",
"User-Agent: Mozilla/5.0\r\n",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n",
"Host: 127.0.0.1\r\n", "\r\n"
), collapse = "") -> req
parse_request(req)
## $method
## [1] "GET"
##
## $uri
## [1] "/uri.cgi"
##
## $vers_maj
## [1] 1
##
## $vers_min
## [1] 1
##
## $keepalive
## [1] TRUE
##
## $headers
## name value
## 1 User-Agent Mozilla/5.0
## 2 Accept text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
## 3 Host 127.0.0.1
##
## $content
## character(0)
##
## attr(,"class")
## [1] "http_request" "list"
microbenchmark::microbenchmark(
parse_request = parse_request(req)
)
## Unit: microseconds
## expr min lq mean median uq max neval
## parse_request 281.369 289.5035 310.6147 305.0675 317.057 499.382 100
```
### Responses
``` r
paste0(c(
"HTTP/1.1 200 OK\r\n",
"Server: nginx/1.2.1\r\n",
"Content-Type: text/html\r\n",
"Content-Length: 8\r\n",
"Connection: keep-alive\r\n",
"\r\n",
"<html />"
), collapse = "") -> resp
parse_response(resp)
## $status_msg
## [1] "OK"
##
## $status_code
## [1] 200
##
## $vers_maj
## [1] 1
##
## $vers_min
## [1] 1
##
## $keepalive
## [1] TRUE
##
## $headers
## name value
## 1 Server nginx/1.2.1
## 2 Content-Type text/html
## 3 Content-Length 8
## 4 Connection keep-alive
##
## $content
## [1] "<" "h" "t" "m" "l" " " "/" ">"
##
## attr(,"class")
## [1] "http_response" "list"
microbenchmark::microbenchmark(
parse_response = parse_response(resp)
)
## Unit: microseconds
## expr min lq mean median uq max neval
## parse_response 279.881 295.951 317.3297 306.9435 322.6595 674.712 100
```
### URLs
``` r
c(
"git+ssh://example.com/path/file",
"https://example.com/path/file",
"http://www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com",
"http://username@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"ftp://username:passwd@ftp.example.com/dir/filename.ext",
"mailto:username@example.com",
"svn+ssh://hostname-01.org/path/to/file",
"xddp::://///blah.wat/?"
) -> turls
parse_url(turls)
## scheme username password hostname port path query fragment
## 1 git+ssh example.com /path/file
## 2 https example.com /path/file
## 3 http www.example.com /dir/subdir param=1&param=2;param%20=%20 fragment
## 4 http www.example.com /
## 5 http username www.example.com /dir/subdir param=1&param=2;param%20=%20 fragment
## 6 http username passwd www.example.com /dir/subdir param=1&param=2;param%20=%20 fragment
## 7 http www.example.com 8080 /dir/subdir param=1&param=2;param%20=%20 fragment
## 8 http username passwd www.example.com 8080 /dir/subdir param=1&param=2;param%20=%20 fragment
## 9 ftp username passwd ftp.example.com /dir/filename.ext
## 10 mailto username example.com /
## 11 svn+ssh hostname-01.org /path/to/file
## 12 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
microbenchmark::microbenchmark(
parse_url = parse_url(turls[1])
)
## Unit: microseconds
## expr min lq mean median uq max neval
## parse_url 697.727 747.634 775.9437 761.5855 775.0465 1025.68 100
```
## construe Metrics
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :----------- | -------: | ---: | ---: | ---: | ----------: | ---: | -------: | ---: |
| C/C++ Header | 5 | 0.23 | 1561 | 0.44 | 120 | 0.29 | 54 | 0.20 |
| C++ | 2 | 0.09 | 162 | 0.05 | 55 | 0.13 | 20 | 0.07 |
| Rmd | 1 | 0.05 | 49 | 0.01 | 28 | 0.07 | 37 | 0.14 |
| R | 3 | 0.14 | 13 | 0.00 | 6 | 0.01 | 25 | 0.09 |
| SUM | 11 | 0.50 | 1785 | 0.50 | 209 | 0.50 | 136 | 0.50 |
clock Package Metrics for construe
## Code of Conduct
Please note that this project is released with a Contributor Code of
Conduct. By participating in this project you agree to abide by its
terms.

51
inst/tinytest/test_construe.R

@ -1,4 +1,51 @@
library(construe)
# Placeholder with simple test
expect_equal(1 + 1, 2)
paste0(c(
"GET /uri.cgi HTTP/1.1\r\n",
"User-Agent: Mozilla/5.0\r\n",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n",
"Host: 127.0.0.1\r\n", "\r\n"
), collapse = "") -> req
res <- parse_request(req)
expect_true(res$method[1] == "GET")
expect_true(res$keepalive[1] == TRUE)
expect_true("Host" %in% res$headers$name)
paste0(c(
"HTTP/1.1 200 OK\r\n",
"Server: nginx/1.2.1\r\n",
"Content-Type: text/html\r\n",
"Content-Length: 8\r\n",
"Connection: keep-alive\r\n",
"\r\n",
"<html />"
), collapse = "") -> resp
res <- parse_response(resp)
expect_true(res$status_msg[1] == "OK")
expect_true(res$keepalive[1] == TRUE)
expect_true("Server" %in% res$headers$name)
c(
"git+ssh://example.com/path/file",
"https://example.com/path/file",
"http://www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com",
"http://username@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"ftp://username:passwd@ftp.example.com/dir/filename.ext",
"mailto:username@example.com",
"svn+ssh://hostname-01.org/path/to/file",
"xddp::://///blah.wat/?"
) -> turls
res <- parse_url(turls)
expect_true(is.na(res$scheme[12]))
expect_true(res$scheme[1] == "git+ssh")

5
man/construe.Rd

@ -4,9 +4,10 @@
\name{construe}
\alias{construe}
\alias{construe-package}
\title{...}
\title{HTTP Request, Response and URL Parser}
\description{
A good description goes here otherwise CRAN checks fail.
A dimple and fast HTTP request, response and URL parser based on the C++ 'httpparser' library
by Alex Nekipelov (\url{https://github.com/nekipelov/httpparser})
}
\seealso{
Useful links:

14
man/parse_request.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{parse_request}
\alias{parse_request}
\title{Parse an HTTP request}
\usage{
parse_request(req)
}
\arguments{
\item{req}{HTTP request character string}
}
\description{
Parse an HTTP request
}

14
man/parse_response.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{parse_response}
\alias{parse_response}
\title{Parse an HTTP response}
\usage{
parse_response(resp)
}
\arguments{
\item{resp}{HTTP response character string}
}
\description{
Parse an HTTP response
}

14
man/parse_url.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{parse_url}
\alias{parse_url}
\title{Parse URLs}
\usage{
parse_url(urls)
}
\arguments{
\item{urls}{character vector of URLs}
}
\description{
Parse URLs
}

3
src/.gitignore

@ -0,0 +1,3 @@
*.o
*.so
*.dll

2
src/Makevars

@ -0,0 +1,2 @@
CXX_STD = CXX11
PKG_LIBS = -L. -lz -lpthread -pthread -std=c++11

52
src/RcppExports.cpp

@ -0,0 +1,52 @@
// Generated by using Rcpp::compileAttributes() -> do not edit by hand
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#include <Rcpp.h>
using namespace Rcpp;
// parse_request
List parse_request(String req);
RcppExport SEXP _construe_parse_request(SEXP reqSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< String >::type req(reqSEXP);
rcpp_result_gen = Rcpp::wrap(parse_request(req));
return rcpp_result_gen;
END_RCPP
}
// parse_response
List parse_response(String resp);
RcppExport SEXP _construe_parse_response(SEXP respSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< String >::type resp(respSEXP);
rcpp_result_gen = Rcpp::wrap(parse_response(resp));
return rcpp_result_gen;
END_RCPP
}
// parse_url
DataFrame parse_url(std::vector < std::string > urls);
RcppExport SEXP _construe_parse_url(SEXP urlsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::vector < std::string > >::type urls(urlsSEXP);
rcpp_result_gen = Rcpp::wrap(parse_url(urls));
return rcpp_result_gen;
END_RCPP
}
static const R_CallMethodDef CallEntries[] = {
{"_construe_parse_request", (DL_FUNC) &_construe_parse_request, 1},
{"_construe_parse_response", (DL_FUNC) &_construe_parse_response, 1},
{"_construe_parse_url", (DL_FUNC) &_construe_parse_url, 1},
{NULL, NULL, 0}
};
RcppExport void R_init_construe(DllInfo *dll) {
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
R_useDynamicSymbols(dll, FALSE);
}

185
src/code.cpp

@ -0,0 +1,185 @@
#include "request.h"
#include "httprequestparser.h"
#include "response.h"
#include "httpresponseparser.h"
#include "urlparser.h"
#include <Rcpp.h>
using namespace Rcpp;
using namespace httpparser;
//' Parse an HTTP request
//'
//' @param req HTTP request character string
//' @export
// [[Rcpp::export]]
List parse_request(String req) {
List l;
Request request;
HttpRequestParser parser;
const char *text = req.get_cstring();
HttpRequestParser::ParseResult res = parser.parse(request, text, text + strlen(text));
if (res == HttpRequestParser::ParsingCompleted) {
StringVector names(request.headers.size());
StringVector vals(request.headers.size());
int idx = 0;
for (std::vector<Request::HeaderItem>::const_iterator it = request.headers.begin(); it != request.headers.end(); ++it) {
names[idx] = it->name;
vals[idx++] = it->value;
}
DataFrame headers = DataFrame::create(
_["name"] = names,
_["value"] = vals
);
l = List::create(
_["method"] = request.method,
_["uri"] = request.uri,
_["vers_maj"] = request.versionMajor,
_["vers_min"] = request.versionMinor,
_["keepalive"] = request.keepAlive,
_["headers"] = headers,
_["content"] = Rcpp::wrap(request.content)
);
l.attr("class") = CharacterVector::create("http_request", "list");
} else {
Rf_error("Parse error.");
}
return(l);
}
//' Parse an HTTP response
//'
//' @param resp HTTP response character string
//' @export
// [[Rcpp::export]]
List parse_response(String resp) {
List l;
Response response;
HttpResponseParser parser;
const char *text = resp.get_cstring();
HttpResponseParser::ParseResult res = parser.parse(response, text, text + strlen(text));
if (res == HttpResponseParser::ParsingCompleted) {
StringVector names(response.headers.size());
StringVector vals(response.headers.size());
int idx = 0;
for (std::vector<Response::HeaderItem>::const_iterator it = response.headers.begin(); it != response.headers.end(); ++it) {
names[idx] = it->name;
vals[idx++] = it->value;
}
DataFrame headers = DataFrame::create(
_["name"] = names,
_["value"] = vals
);
l = List::create(
_["status_msg"] = response.status,
_["status_code"] = response.statusCode,
_["vers_maj"] = response.versionMajor,
_["vers_min"] = response.versionMinor,
_["keepalive"] = response.keepAlive,
_["headers"] = headers,
_["content"] = Rcpp::wrap(response.content)
);
l.attr("class") = CharacterVector::create("http_response", "list");
} else {
Rf_error("Parse error.");
}
return(l);
}
//' Parse URLs
//'
//' @param urls character vector of URLs
//' @export
// [[Rcpp::export]]
DataFrame parse_url(std::vector < std::string > urls) {
UrlParser parser;
StringVector scheme(urls.size());
StringVector username(urls.size());
StringVector password(urls.size());
StringVector hostname(urls.size());
StringVector port(urls.size());
StringVector path(urls.size());
StringVector query(urls.size());
StringVector fragment(urls.size());
UrlParser u;
for (int idx=0; idx<urls.size(); idx++) {
int res = u.parse(urls[idx].c_str());
if (res) {
scheme[idx] = u.url.scheme;
username[idx] = u.url.username;
password[idx] = u.url.password;
hostname[idx] = u.url.hostname;
port[idx] = u.url.port;
path[idx] = u.url.path;
query[idx] = u.url.query;
fragment[idx] = u.url.fragment;
port[idx] = u.url.port;
} else {
scheme[idx] = NA_STRING;
username[idx] = NA_STRING;
password[idx] = NA_STRING;
hostname[idx] = NA_STRING;
port[idx] = NA_STRING;
path[idx] = NA_STRING;
query[idx] = NA_STRING;
fragment[idx] = NA_STRING;
port[idx] = NA_STRING;
}
}
return(DataFrame::create(
_["scheme"] = scheme,
_["username"] = username,
_["password"] = password,
_["hostname"] = hostname,
_["port"] = port,
_["path"] = path,
_["query"] = query,
_["fragment"] = fragment
));
}

625
src/httprequestparser.h

@ -0,0 +1,625 @@
/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_REQUESTPARSER_H
#define HTTPPARSER_REQUESTPARSER_H
#include <algorithm>
#include <string.h>
#include <stdlib.h>
#include "request.h"
namespace httpparser
{
class HttpRequestParser
{
public:
HttpRequestParser()
: state(RequestMethodStart), contentSize(0),
chunkSize(0), chunked(false)
{
}
enum ParseResult {
ParsingCompleted,
ParsingIncompleted,
ParsingError
};
ParseResult parse(Request &req, const char *begin, const char *end)
{
return consume(req, begin, end);
}
private:
static bool checkIfConnection(const Request::HeaderItem &item)
{
return strcasecmp(item.name.c_str(), "Connection") == 0;
}
ParseResult consume(Request &req, const char *begin, const char *end)
{
while( begin != end )
{
char input = *begin++;
switch (state)
{
case RequestMethodStart:
if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
state = RequestMethod;
req.method.push_back(input);
}
break;
case RequestMethod:
if( input == ' ' )
{
state = RequestUriStart;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
req.method.push_back(input);
}
break;
case RequestUriStart:
if( isControl(input) )
{
return ParsingError;
}
else
{
state = RequestUri;
req.uri.push_back(input);
}
break;
case RequestUri:
if( input == ' ' )
{
state = RequestHttpVersion_h;
}
else if (input == '\r')
{
req.versionMajor = 0;
req.versionMinor = 9;
return ParsingCompleted;
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
req.uri.push_back(input);
}
break;
case RequestHttpVersion_h:
if( input == 'H' )
{
state = RequestHttpVersion_ht;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_ht:
if( input == 'T' )
{
state = RequestHttpVersion_htt;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_htt:
if( input == 'T' )
{
state = RequestHttpVersion_http;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_http:
if( input == 'P' )
{
state = RequestHttpVersion_slash;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_slash:
if( input == '/' )
{
req.versionMajor = 0;
req.versionMinor = 0;
state = RequestHttpVersion_majorStart;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_majorStart:
if( isDigit(input) )
{
req.versionMajor = input - '0';
state = RequestHttpVersion_major;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_major:
if( input == '.' )
{
state = RequestHttpVersion_minorStart;
}
else if (isDigit(input))
{
req.versionMajor = req.versionMajor * 10 + input - '0';
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_minorStart:
if( isDigit(input) )
{
req.versionMinor = input - '0';
state = RequestHttpVersion_minor;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_minor:
if( input == '\r' )
{
state = ResponseHttpVersion_newLine;
}
else if( isDigit(input) )
{
req.versionMinor = req.versionMinor * 10 + input - '0';
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_newLine:
if( input == '\n' )
{
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case HeaderLineStart:
if( input == '\r' )
{
state = ExpectingNewline_3;
}
else if( !req.headers.empty() && (input == ' ' || input == '\t') )
{
state = HeaderLws;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
req.headers.push_back(Request::HeaderItem());
req.headers.back().name.reserve(16);
req.headers.back().value.reserve(16);
req.headers.back().name.push_back(input);
state = HeaderName;
}
break;
case HeaderLws:
if( input == '\r' )
{
state = ExpectingNewline_2;
}
else if( input == ' ' || input == '\t' )
{
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
state = HeaderValue;
req.headers.back().value.push_back(input);
}
break;
case HeaderName:
if( input == ':' )
{
state = SpaceBeforeHeaderValue;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
req.headers.back().name.push_back(input);
}
break;
case SpaceBeforeHeaderValue:
if( input == ' ' )
{
state = HeaderValue;
}
else
{
return ParsingError;
}
break;
case HeaderValue:
if( input == '\r' )
{
if( req.method == "POST" || req.method == "PUT" )
{
Request::HeaderItem &h = req.headers.back();
if( strcasecmp(h.name.c_str(), "Content-Length") == 0 )
{
contentSize = atoi(h.value.c_str());
req.content.reserve( contentSize );
}
else if( strcasecmp(h.name.c_str(), "Transfer-Encoding") == 0 )
{
if(strcasecmp(h.value.c_str(), "chunked") == 0)
chunked = true;
}
}
state = ExpectingNewline_2;
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
req.headers.back().value.push_back(input);
}
break;
case ExpectingNewline_2:
if( input == '\n' )
{
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case ExpectingNewline_3: {
std::vector<Request::HeaderItem>::iterator it = std::find_if(req.headers.begin(),
req.headers.end(),
checkIfConnection);
if( it != req.headers.end() )
{
if( strcasecmp(it->value.c_str(), "Keep-Alive") == 0 )
{
req.keepAlive = true;
}
else // == Close
{
req.keepAlive = false;
}
}
else
{
if( req.versionMajor > 1 || (req.versionMajor == 1 && req.versionMinor == 1) )
req.keepAlive = true;
}
if( chunked )
{
state = ChunkSize;
}
else if( contentSize == 0 )
{
if( input == '\n')
return ParsingCompleted;
else
return ParsingError;
}
else
{
state = Post;
}
break;
}
case Post:
--contentSize;
req.content.push_back( input );
if( contentSize == 0 )
{
return ParsingCompleted;
}
break;
case ChunkSize:
if( isalnum(input) )
{
chunkSizeStr.push_back(input);
}
else if( input == ';' )
{
state = ChunkExtensionName;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionName:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '=' )
{
state = ChunkExtensionValue;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine:
if( input == '\n' )
{
chunkSize = strtol(chunkSizeStr.c_str(), NULL, 16);
chunkSizeStr.clear();
req.content.reserve(req.content.size() + chunkSize);
if( chunkSize == 0 )
state = ChunkSizeNewLine_2;
else
state = ChunkData;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_2:
if( input == '\r' )
{
state = ChunkSizeNewLine_3;
}
else if( isalpha(input) )
{
state = ChunkTrailerName;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_3:
if( input == '\n' )
{
return ParsingCompleted;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerName:
if( isalnum(input) )
{
// skip
}
else if( input == ':' )
{
state = ChunkTrailerValue;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkData:
req.content.push_back(input);
if( --chunkSize == 0 )
{
state = ChunkDataNewLine_1;
}
break;
case ChunkDataNewLine_1:
if( input == '\r' )
{
state = ChunkDataNewLine_2;
}
else
{
return ParsingError;
}
break;
case ChunkDataNewLine_2:
if( input == '\n' )
{
state = ChunkSize;
}
else
{
return ParsingError;
}
break;
default:
return ParsingError;
}
}
return ParsingIncompleted;
}
// Check if a byte is an HTTP character.
inline bool isChar(int c)
{
return c >= 0 && c <= 127;
}
// Check if a byte is an HTTP control character.
inline bool isControl(int c)
{
return (c >= 0 && c <= 31) || (c == 127);
}
// Check if a byte is defined as an HTTP special character.
inline bool isSpecial(int c)
{
switch (c)
{
case '(': case ')': case '<': case '>': case '@':
case ',': case ';': case ':': case '\\': case '"':
case '/': case '[': case ']': case '?': case '=':
case '{': case '}': case ' ': case '\t':
return true;
default:
return false;
}
}
// Check if a byte is a digit.
inline bool isDigit(int c)
{
return c >= '0' && c <= '9';
}
// The current state of the parser.
enum State
{
RequestMethodStart,
RequestMethod,
RequestUriStart,
RequestUri,
RequestHttpVersion_h,
RequestHttpVersion_ht,
RequestHttpVersion_htt,
RequestHttpVersion_http,
RequestHttpVersion_slash,
RequestHttpVersion_majorStart,
RequestHttpVersion_major,
RequestHttpVersion_minorStart,
RequestHttpVersion_minor,
ResponseStatusStart,
ResponseHttpVersion_ht,
ResponseHttpVersion_htt,
ResponseHttpVersion_http,
ResponseHttpVersion_slash,
ResponseHttpVersion_majorStart,
ResponseHttpVersion_major,
ResponseHttpVersion_minorStart,
ResponseHttpVersion_minor,
ResponseHttpVersion_spaceAfterVersion,
ResponseHttpVersion_statusCodeStart,
ResponseHttpVersion_spaceAfterStatusCode,
ResponseHttpVersion_statusTextStart,
ResponseHttpVersion_newLine,
HeaderLineStart,
HeaderLws,
HeaderName,
SpaceBeforeHeaderValue,
HeaderValue,
ExpectingNewline_2,
ExpectingNewline_3,
Post,
ChunkSize,
ChunkExtensionName,
ChunkExtensionValue,
ChunkSizeNewLine,
ChunkSizeNewLine_2,
ChunkSizeNewLine_3,
ChunkTrailerName,
ChunkTrailerValue,
ChunkDataNewLine_1,
ChunkDataNewLine_2,
ChunkData,
} state;
size_t contentSize;
std::string chunkSizeStr;
size_t chunkSize;
bool chunked;
};
} // namespace httpparser
#endif // LIBAHTTP_REQUESTPARSER_H

609
src/httpresponseparser.h

@ -0,0 +1,609 @@
/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_RESPONSEPARSER_H
#define HTTPPARSER_RESPONSEPARSER_H
#include <algorithm>
#include <string.h>
#include <stdlib.h>
#include "response.h"
namespace httpparser
{
class HttpResponseParser
{
public:
HttpResponseParser()
: state(ResponseStatusStart),
contentSize(0),
chunkSize(0),
chunked(false)
{
}
enum ParseResult {
ParsingCompleted,
ParsingIncompleted,
ParsingError
};
ParseResult parse(Response &resp, const char *begin, const char *end)
{
return consume(resp, begin, end);
}
private:
static bool checkIfConnection(const Response::HeaderItem &item)
{
return strcasecmp(item.name.c_str(), "Connection") == 0;
}
ParseResult consume(Response &resp, const char *begin, const char *end)
{
while( begin != end )
{
char input = *begin++;
switch (state)
{
case ResponseStatusStart:
if( input != 'H' )
{
return ParsingError;
}
else
{
state = ResponseHttpVersion_ht;
}
break;
case ResponseHttpVersion_ht:
if( input == 'T' )
{
state = ResponseHttpVersion_htt;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_htt:
if( input == 'T' )
{
state = ResponseHttpVersion_http;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_http:
if( input == 'P' )
{
state = ResponseHttpVersion_slash;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_slash:
if( input == '/' )
{
resp.versionMajor = 0;
resp.versionMinor = 0;
state = ResponseHttpVersion_majorStart;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_majorStart:
if( isDigit(input) )
{
resp.versionMajor = input - '0';
state = ResponseHttpVersion_major;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_major:
if( input == '.' )
{
state = ResponseHttpVersion_minorStart;
}
else if( isDigit(input) )
{
resp.versionMajor = resp.versionMajor * 10 + input - '0';
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_minorStart:
if( isDigit(input) )
{
resp.versionMinor = input - '0';
state = ResponseHttpVersion_minor;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_minor:
if( input == ' ')
{
state = ResponseHttpVersion_statusCodeStart;
resp.statusCode = 0;
}
else if( isDigit(input) )
{
resp.versionMinor = resp.versionMinor * 10 + input - '0';
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_statusCodeStart:
if( isDigit(input) )
{
resp.statusCode = input - '0';
state = ResponseHttpVersion_statusCode;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_statusCode:
if( isDigit(input) )
{
resp.statusCode = resp.statusCode * 10 + input - '0';
}
else
{
if( resp.statusCode < 100 || resp.statusCode > 999 )
{
return ParsingError;
}
else if( input == ' ' )
{
state = ResponseHttpVersion_statusTextStart;
}
else
{
return ParsingError;
}
}
break;
case ResponseHttpVersion_statusTextStart:
if( isChar(input) )
{
resp.status += input;
state = ResponseHttpVersion_statusText;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_statusText:
if( input == '\r' )
{
state = ResponseHttpVersion_newLine;
}
else if( isChar(input) )
{
resp.status += input;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_newLine:
if( input == '\n' )
{
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case HeaderLineStart:
if( input == '\r' )
{
state = ExpectingNewline_3;
}
else if( !resp.headers.empty() && (input == ' ' || input == '\t') )
{
state = HeaderLws;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
resp.headers.push_back(Response::HeaderItem());
resp.headers.back().name.reserve(16);
resp.headers.back().value.reserve(16);
resp.headers.back().name.push_back(input);
state = HeaderName;
}
break;
case HeaderLws:
if( input == '\r' )
{
state = ExpectingNewline_2;
}
else if( input == ' ' || input == '\t' )
{
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
state = HeaderValue;
resp.headers.back().value.push_back(input);
}
break;
case HeaderName:
if( input == ':' )
{
state = SpaceBeforeHeaderValue;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
resp.headers.back().name.push_back(input);
}
break;
case SpaceBeforeHeaderValue:
if( input == ' ' )
{
state = HeaderValue;
}
else
{
return ParsingError;
}
break;
case HeaderValue:
if( input == '\r' )
{
Response::HeaderItem &h = resp.headers.back();
if( strcasecmp(h.name.c_str(), "Content-Length") == 0 )
{
contentSize = atoi(h.value.c_str());
resp.content.reserve( contentSize );
}
else if( strcasecmp(h.name.c_str(), "Transfer-Encoding") == 0 )
{
if(strcasecmp(h.value.c_str(), "chunked") == 0)
chunked = true;
}
state = ExpectingNewline_2;
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
resp.headers.back().value.push_back(input);
}
break;
case ExpectingNewline_2:
if( input == '\n' )
{
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case ExpectingNewline_3: {
std::vector<Response::HeaderItem>::iterator it = std::find_if(resp.headers.begin(),
resp.headers.end(),
checkIfConnection);
if( it != resp.headers.end() )
{
if( strcasecmp(it->value.c_str(), "Keep-Alive") == 0 )
{
resp.keepAlive = true;
}
else // == Close
{
resp.keepAlive = false;
}
}
else
{
if( resp.versionMajor > 1 || (resp.versionMajor == 1 && resp.versionMinor == 1) )
resp.keepAlive = true;
}
if( chunked )
{
state = ChunkSize;
}
else if( contentSize == 0 )
{
if( input == '\n')
return ParsingCompleted;
else
return ParsingError;
}
else
{
state = Post;
}
break;
}
case Post:
--contentSize;
resp.content.push_back(input);
if( contentSize == 0 )
{
return ParsingCompleted;
}
break;
case ChunkSize:
if( isalnum(input) )
{
chunkSizeStr.push_back(input);
}
else if( input == ';' )
{
state = ChunkExtensionName;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionName:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '=' )
{
state = ChunkExtensionValue;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine:
if( input == '\n' )
{
chunkSize = strtol(chunkSizeStr.c_str(), NULL, 16);
chunkSizeStr.clear();
resp.content.reserve(resp.content.size() + chunkSize);
if( chunkSize == 0 )
state = ChunkSizeNewLine_2;
else
state = ChunkData;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_2:
if( input == '\r' )
{
state = ChunkSizeNewLine_3;
}
else if( isalpha(input) )
{
state = ChunkTrailerName;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_3:
if( input == '\n' )
{
return ParsingCompleted;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerName:
if( isalnum(input) )
{
// skip
}
else if( input == ':' )
{
state = ChunkTrailerValue;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkData:
resp.content.push_back(input);
if( --chunkSize == 0 )
{
state = ChunkDataNewLine_1;
}
break;
case ChunkDataNewLine_1:
if( input == '\r' )
{
state = ChunkDataNewLine_2;
}
else
{
return ParsingError;
}
break;
case ChunkDataNewLine_2:
if( input == '\n' )
{
state = ChunkSize;
}
else
{
return ParsingError;
}
break;
default:
return ParsingError;
}
}
return ParsingIncompleted;
}
// Check if a byte is an HTTP character.
inline bool isChar(int c)
{
return c >= 0 && c <= 127;
}
// Check if a byte is an HTTP control character.
inline bool isControl(int c)
{
return (c >= 0 && c <= 31) || (c == 127);
}
// Check if a byte is defined as an HTTP special character.
inline bool isSpecial(int c)
{
switch (c)
{
case '(': case ')': case '<': case '>': case '@':
case ',': case ';': case ':': case '\\': case '"':
case '/': case '[': case ']': case '?': case '=':
case '{': case '}': case ' ': case '\t':
return true;
default:
return false;
}
}
// Check if a byte is a digit.
inline bool isDigit(int c)
{
return c >= '0' && c <= '9';
}
// The current state of the parser.
enum State
{
ResponseStatusStart,
ResponseHttpVersion_ht,
ResponseHttpVersion_htt,
ResponseHttpVersion_http,
ResponseHttpVersion_slash,
ResponseHttpVersion_majorStart,
ResponseHttpVersion_major,
ResponseHttpVersion_minorStart,
ResponseHttpVersion_minor,
ResponseHttpVersion_statusCodeStart,
ResponseHttpVersion_statusCode,
ResponseHttpVersion_statusTextStart,
ResponseHttpVersion_statusText,
ResponseHttpVersion_newLine,
HeaderLineStart,
HeaderLws,
HeaderName,
SpaceBeforeHeaderValue,
HeaderValue,
ExpectingNewline_2,
ExpectingNewline_3,
Post,
ChunkSize,
ChunkExtensionName,
ChunkExtensionValue,
ChunkSizeNewLine,
ChunkSizeNewLine_2,
ChunkSizeNewLine_3,
ChunkTrailerName,
ChunkTrailerValue,
ChunkDataNewLine_1,
ChunkDataNewLine_2,
ChunkData,
} state;
size_t contentSize;
std::string chunkSizeStr;
size_t chunkSize;
bool chunked;
};
} // namespace httpparser
#endif // HTTPPARSER_RESPONSEPARSER_H

57
src/request.h

@ -0,0 +1,57 @@
/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_REQUEST_H
#define HTTPPARSER_REQUEST_H
#include <string>
#include <vector>
#include <sstream>
namespace httpparser
{
struct Request {
Request()
: versionMajor(0), versionMinor(0), keepAlive(false)
{}
struct HeaderItem
{
std::string name;
std::string value;
};
std::string method;
std::string uri;
int versionMajor;
int versionMinor;
std::vector<HeaderItem> headers;
std::vector<char> content;
bool keepAlive;
std::string inspect() const
{
std::stringstream stream;
stream << method << " " << uri << " HTTP/"
<< versionMajor << "." << versionMinor << "\n";
for(std::vector<Request::HeaderItem>::const_iterator it = headers.begin();
it != headers.end(); ++it)
{
stream << it->name << ": " << it->value << "\n";
}
std::string data(content.begin(), content.end());
stream << data << "\n";
stream << "+ keep-alive: " << keepAlive << "\n";;
return stream.str();
}
};
} // namespace httpparser
#endif // HTTPPARSER_REQUEST_H

57
src/response.h

@ -0,0 +1,57 @@
/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_RESPONSE_H
#define HTTPPARSER_RESPONSE_H
#include <string>
#include <vector>
#include <sstream>
namespace httpparser
{
struct Response {
Response()
: versionMajor(0), versionMinor(0), keepAlive(false), statusCode(0)
{}
struct HeaderItem
{
std::string name;
std::string value;
};
int versionMajor;
int versionMinor;
std::vector<HeaderItem> headers;
std::vector<char> content;
bool keepAlive;
unsigned int statusCode;
std::string status;
std::string inspect() const
{
std::stringstream stream;
stream << "HTTP/" << versionMajor << "." << versionMinor
<< " " << statusCode << " " << status << "\n";
for(std::vector<Response::HeaderItem>::const_iterator it = headers.begin();
it != headers.end(); ++it)
{
stream << it->name << ": " << it->value << "\n";
}
std::string data(content.begin(), content.end());
stream << data << "\n";
return stream.str();
}
};
} // namespace httpparser
#endif // HTTPPARSER_RESPONSE_H

387
src/urlparser.h

@ -0,0 +1,387 @@
/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_URLPARSER_H
#define HTTPPARSER_URLPARSER_H
#include <string>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
namespace httpparser
{
class UrlParser
{
public:
UrlParser()
: valid(false)
{
}
explicit UrlParser(const std::string &url)
: valid(true)
{
parse(url);
}
bool parse(const std::string &str)
{
url = Url();
parse_(str);
return isValid();
}
bool isValid() const
{
return valid;
}
std::string scheme() const
{
assert( isValid() );
return url.scheme;
}
std::string username() const
{
assert( isValid() );
return url.username;
}
std::string password() const
{
assert( isValid() );
return url.password;
}
std::string hostname() const
{
assert( isValid() );
return url.hostname;
}
std::string port() const
{
assert( isValid() );
return url.port;
}
std::string path() const
{
assert( isValid() );
return url.path;
}
std::string query() const
{
assert( isValid() );
return url.query;
}
std::string fragment() const
{
assert( isValid() );
return url.fragment;
}
uint16_t httpPort() const
{
const uint16_t defaultHttpPort = 80;
const uint16_t defaultHttpsPort = 443;
assert( isValid() );
if( url.port.empty() )
{
if( scheme() == "https" )
return defaultHttpsPort;
else
return defaultHttpPort;
}
else
{
return url.integerPort;
}
}
struct Url
{
Url() : integerPort(0)
{}
std::string scheme;
std::string username;
std::string password;
std::string hostname;
std::string port;
std::string path;
std::string query;
std::string fragment;
uint16_t integerPort;
} url;
private:
bool isUnreserved(char ch) const
{
if( isalnum(ch) )
return true;
switch(ch)
{
case '-':
case '.':
case '_':
case '~':
return true;
}
return false;
}
void parse_(const std::string &str)
{
enum {
Scheme,
SlashAfterScheme1,
SlashAfterScheme2,
UsernameOrHostname,
Password,
Hostname,
IPV6Hostname,
PortOrPassword,
Port,
Path,
Query,
Fragment
} state = Scheme;
std::string usernameOrHostname;
std::string portOrPassword;
valid = true;
url.path = "/";
url.integerPort = 0;
for(size_t i = 0; i < str.size() && valid; ++i)
{
char ch = str[i];
switch(state)
{
case Scheme:
if( isalnum(ch) || ch == '+' || ch == '-' || ch == '.')
{
url.scheme += ch;
}
else if( ch == ':' )
{
state = SlashAfterScheme1;
}
else
{
valid = false;
url = Url();
}
break;
case SlashAfterScheme1:
if( ch == '/' )
{
state = SlashAfterScheme2;
}
else if( isalnum(ch) )
{
usernameOrHostname = ch;
state = UsernameOrHostname;
}
else
{
valid = false;
url = Url();
}
break;
case SlashAfterScheme2:
if( ch == '/' )
{
state = UsernameOrHostname;
}
else
{
valid = false;
url = Url();
}
break;
case UsernameOrHostname:
if( isUnreserved(ch) || ch == '%' )
{
usernameOrHostname += ch;
}
else if( ch == ':' )
{
state = PortOrPassword;
}
else if( ch == '@' )
{
state = Hostname;
std::swap(url.username, usernameOrHostname);
}
else if( ch == '/' )
{
state = Path;
std::swap(url.hostname, usernameOrHostname);
}
else
{
valid = false;
url = Url();
}
break;
case Password:
if( isalnum(ch) || ch == '%' )
{
url.password += ch;
}
else if( ch == '@' )
{
state = Hostname;
}
else
{
valid = false;
url = Url();
}
break;
case Hostname:
if( ch == '[' && url.hostname.empty() )
{
state = IPV6Hostname;
}
else if(isUnreserved(ch) || ch == '%')
{
url.hostname += ch;
}
else if(ch == ':')
{
state = Port;
}
else if(ch == '/')
{
state = Path;
}
else
{
valid = false;
url = Url();
}
break;
case IPV6Hostname:
break;
case PortOrPassword:
if( isdigit(ch) )
{
portOrPassword += ch;
}
else if( ch == '/' )
{
std::swap(url.hostname, usernameOrHostname);
std::swap(url.port, portOrPassword);
url.integerPort = atoi(url.port.c_str());
state = Path;
}
else if( isalnum(ch) || ch == '%' )
{
std::swap(url.username, usernameOrHostname);
std::swap(url.password, portOrPassword);
url.password += ch;
state = Password;
}
else
{
valid = false;
url = Url();
}
break;
case Port:
if( isdigit(ch) )
{
portOrPassword += ch;
}
else if(ch == '/')
{
std::swap(url.port, portOrPassword);
url.integerPort = atoi(url.port.c_str());
state = Path;
}
else
{
valid = false;
url = Url();
}
break;
case Path:
if( ch == '#')
{
state = Fragment;
}
else if( ch == '?' )
{
state = Query;
}
else
{
url.path += ch;
}
break;
case Query:
if( ch == '#')
{
state = Fragment;
}
else if( ch == '?' )
{
state = Query;
}
else
{
url.query += ch;
}
break;
case Fragment:
url.fragment += ch;
break;
}
}
assert(portOrPassword.empty());
if( !usernameOrHostname.empty() )
url.hostname = usernameOrHostname;
}
bool valid;
//
// struct Url
// {
// Url() : integerPort(0)
// {}
//
// std::string scheme;
// std::string username;
// std::string password;
// std::string hostname;
// std::string port;
// std::string path;
// std::string query;
// std::string fragment;
// uint16_t integerPort;
// } url;
};
} // namespace httpparser
#endif // HTTPPARSER_URLPARSER_H
Loading…
Cancel
Save