Compare commits

...

No commits in common. 'batman' and 'master' have entirely different histories.

  1. 26
      .Rbuildignore
  2. 1
      .codecov.yml
  3. 1
      .github/.gitignore
  4. 26
      .github/workflows/R-CMD-check.yaml
  5. 9
      .gitignore
  6. 12
      .travis.yml
  7. 25
      CONDUCT.md
  8. 32
      DESCRIPTION
  9. 2
      LICENSE
  10. 21
      LICENSE.md
  11. 12
      NAMESPACE
  12. 2
      NEWS.md
  13. 94
      R/RcppExports.R
  14. 12
      R/construe-package.R
  15. 11
      R/utils-pipe.R
  16. 185
      README.Rmd
  17. 498
      README.md
  18. 52
      appveyor.yml
  19. 21
      construe.Rproj
  20. 20
      inst/extdat/example.hdr
  21. 64
      inst/tinytest/test_construe.R
  22. 23
      man/construe.Rd
  23. 31
      man/parse_request.Rd
  24. 34
      man/parse_response.Rd
  25. 18
      man/parse_url.Rd
  26. 12
      man/pipe.Rd
  27. 23
      man/read_file_raw.Rd
  28. 3
      src/.gitignore
  29. 2
      src/Makevars
  30. 93
      src/RcppExports.cpp
  31. 385
      src/code.cpp
  32. 625
      src/httprequestparser.h
  33. 618
      src/httpresponseparser.h
  34. 57
      src/request.h
  35. 57
      src/response.h
  36. 387
      src/urlparser.h
  37. 5
      tests/tinytest.R

26
.Rbuildignore

@ -1,26 +0,0 @@
^.vscode$
^.*\.Rproj$
^\.Rproj\.user$
^\.travis\.yml$
^README\.*Rmd$
^README\.*html$
^NOTES\.*Rmd$
^NOTES\.*html$
^\.codecov\.yml$
^README_files$
^doc$
^docs$
^tmp$
^notes$
^CONDUCT.*$
^CODE.*$
^\.gitlab-ci\.yml$
^\.vscode$
^CRAN-RELEASE$
^appveyor\.yml$
^tools$
^LICENSE\.md$
^bld$
^node_modules^
^package-lock\.json$
^\.github$

1
.codecov.yml

@ -1 +0,0 @@
comment: false

1
.github/.gitignore

@ -1 +0,0 @@
*.html

26
.github/workflows/R-CMD-check.yaml

@ -1,26 +0,0 @@
# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
on:
push:
branches:
- batman
pull_request:
branches:
- batman
name: R-CMD-check
jobs:
R-CMD-check:
runs-on: macOS-latest
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/setup-r@master
- name: Install dependencies
run: |
install.packages(c("remotes", "rcmdcheck"))
remotes::install_deps(dependencies = TRUE)
shell: Rscript {0}
- name: Check
run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
shell: Rscript {0}

9
.gitignore

@ -1,9 +0,0 @@
.DS_Store
.Rproj.user
.Rhistory
.RData
.Rproj
README_cache
src/*.o
src/*.so
src/*.dll

12
.travis.yml

@ -1,12 +0,0 @@
language: R
sudo: false
cache: packages
before_install:
- echo "options(repos = c(CRAN = 'https://packagemanager.rstudio.com/all/__linux__/xenial/latest'))" >> ~/.Rprofile.site
- echo "options(HTTPUserAgent = paste0('R/', getRversion(), ' R (',
paste(getRversion(), R.version['platform'], R.version['arch'], R.version['os']),
')'))" >> ~/.Rprofile.site
after_success:
- Rscript -e 'covr::codecov()'

25
CONDUCT.md

@ -1,25 +0,0 @@
# Contributor Code of Conduct
As contributors and maintainers of this project, we pledge to respect all people who
contribute through reporting issues, posting feature requests, updating documentation,
submitting pull requests or patches, and other activities.
We are committed to making participation in this project a harassment-free experience for
everyone, regardless of level of experience, gender, gender identity and expression,
sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
Examples of unacceptable behavior by participants include the use of sexual language or
imagery, derogatory comments or personal attacks, trolling, public or private harassment,
insults, or other unprofessional conduct.
Project maintainers have the right and responsibility to remove, edit, or reject comments,
commits, code, wiki edits, issues, and other contributions that are not aligned to this
Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed
from the project team.
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by
opening an issue or contacting one or more of the project maintainers.
This Code of Conduct is adapted from the Contributor Covenant
(http:contributor-covenant.org), version 1.0.0, available at
http://contributor-covenant.org/version/1/0/0/

32
DESCRIPTION

@ -1,32 +0,0 @@
Package: construe
Type: Package
Title: HTTP Request, Response and URL Parser
Version: 0.1.0
Date: 2020-08-28
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640")),
person("Alex", "Nekipelov", email = "alex@nekipelov.net", role = "aut",
comment = "httpparser C++ library"),
person("Ícaro", "Dantas de Araújo Lima", role = "ctb",
comment = "httpparser C++ library")
)
Maintainer: Bob Rudis <bob@rud.is>
Description: A simple and fast HTTP request, response and URL parser based on the C++ 'httpparser' library
by Alex Nekipelov (<https://github.com/nekipelov/httpparser>).
URL: https://git.rud.is/hrbrmstr/construe
BugReports: https://git.rud.is/hrbrmstr/construe/issues
SystemRequirements: C++11
Encoding: UTF-8
License: MIT + file LICENSE
Suggests:
covr, tinytest
Depends:
R (>= 3.6.0)
Imports:
Rcpp,
magrittr
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
LinkingTo:
Rcpp

2
LICENSE

@ -1,2 +0,0 @@
YEAR: 2020
COPYRIGHT HOLDER: Bob Rudis

21
LICENSE.md

@ -1,21 +0,0 @@
# MIT License
Copyright (c) 2020 Bob Rudis
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

12
NAMESPACE

@ -1,12 +0,0 @@
# Generated by roxygen2: do not edit by hand
export("%>%")
export(parse_request)
export(parse_request_raw)
export(parse_response)
export(parse_response_raw)
export(parse_url)
export(read_file_raw)
importFrom(Rcpp,sourceCpp)
importFrom(magrittr,"%>%")
useDynLib(construe, .registration = TRUE)

2
NEWS.md

@ -1,2 +0,0 @@
0.1.0
* Initial release

94
R/RcppExports.R

@ -1,94 +0,0 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#' Parse an HTTP request
#'
#' You can use the non- `_raw` version on input you know for sure is plain text
#'
#' @param req HTTP request character string
#' @param headers_lowercase if `TRUE` (the default) names in the `headers` data frame
#' element are converted to lower case
#' @export
#' @examples
#' paste0(c(
#' "GET /uri.cgi HTTP/1.1\r\n",
#' "User-Agent: Mozilla/5.0\r\n",
#' "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n",
#' "Host: 127.0.0.1\r\n", "\r\n"
#' ), collapse = "") -> req
#'
#' res <- parse_request(req)
#' res <- parse_request_raw(charToRaw(req))
parse_request <- function(req, headers_lowercase = TRUE) {
.Call(`_construe_parse_request`, req, headers_lowercase)
}
#' @rdname parse_request
#' @param req HTTP request character string
#' @param headers_lowercase if `TRUE` (the default) names in the `headers` data frame
#' element are converted to lower case
#' @export
parse_request_raw <- function(req, headers_lowercase = TRUE) {
.Call(`_construe_parse_request_raw`, req, headers_lowercase)
}
#' Parse an HTTP response
#'
#' You can use the non- `_raw` version on input you know for sure is plain text
#'
#' @param resp HTTP response character string
#' @param headers_lowercase if `TRUE` (the default) names in the `headers` data frame
#' element are converted to lower case
#' @export
#' @examples
#' paste0(c(
#' "HTTP/1.1 200 OK\r\n",
#' "Server: nginx/1.2.1\r\n",
#' "Content-Type: text/html\r\n",
#' "Content-Length: 8\r\n",
#' "Connection: keep-alive\r\n",
#' "\r\n",
#' "<html />"
#' ), collapse = "") -> resp
#'
#' res <- parse_response(resp)
#' res <- parse_response_raw(charToRaw(resp))
parse_response <- function(resp, headers_lowercase = TRUE) {
.Call(`_construe_parse_response`, resp, headers_lowercase)
}
#' @rdname parse_response
#' @param resp HTTP request character string
#' @param headers_lowercase if `TRUE` (the default) names in the `headers` data frame
#' element are converted to lower case
#' @export
parse_response_raw <- function(resp, headers_lowercase = TRUE) {
.Call(`_construe_parse_response_raw`, resp, headers_lowercase)
}
#' Parse URLs
#'
#' @param urls character vector of URLs
#' @export
#' @examples
#' URL <- "http://www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment"
#' parse_url(URL)
parse_url <- function(urls) {
.Call(`_construe_parse_url`, urls)
}
#' Read in a file, fast and raw
#'
#' @param fil file to read in (no path expansion is performed)
#' @param buffer_size larger buffer sizes may speed up reading of
#' very large files. It can also hurt performance, and this
#' function reads in the entire file into memory, so a
#' large buffer size also means more (temporary) memory will
#' be allocated.
#' @export
#' @examples
#' read_file_raw(system.file("extdat", "example.hdr", package = "construe"))
read_file_raw <- function(fil, buffer_size = 16384L) {
.Call(`_construe_read_file_raw`, fil, buffer_size)
}

12
R/construe-package.R

@ -1,12 +0,0 @@
#' HTTP Request, Response and URL Parser
#'
#' A simple and fast HTTP request, response and URL parser based on the C++ 'httpparser' library
#' by Alex Nekipelov (<https://github.com/nekipelov/httpparser>)
#'
#' @md
#' @name construe
#' @keywords internal
#' @author Bob Rudis (bob@@rud.is)
#' @importFrom Rcpp sourceCpp
#' @useDynLib construe, .registration = TRUE
"_PACKAGE"

11
R/utils-pipe.R

@ -1,11 +0,0 @@
#' Pipe operator
#'
#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
#'
#' @name %>%
#' @rdname pipe
#' @keywords internal
#' @export
#' @importFrom magrittr %>%
#' @usage lhs \%>\% rhs
NULL

185
README.Rmd

@ -1,185 +0,0 @@
---
output: rmarkdown::github_document
editor_options:
chunk_output_type: console
---
```{r pkg-knitr-opts, include=FALSE}
hrbrpkghelpr::global_opts()
```
```{r badges, results='asis', echo=FALSE, cache=FALSE}
hrbrpkghelpr::stinking_badges(branch = "batman")
```
```{r description, results='asis', echo=FALSE, cache=FALSE}
hrbrpkghelpr::yank_title_and_description()
```
## What's Inside The Tin
The following functions are implemented:
```{r ingredients, results='asis', echo=FALSE, cache=FALSE}
hrbrpkghelpr::describe_ingredients()
```
## Installation
```{r install-ex, results='asis', echo=FALSE, cache=FALSE}
hrbrpkghelpr::install_block()
```
## Usage
```{r lib-ex}
library(construe)
# current version
packageVersion("construe")
```
### Requests
```{r ex01}
paste0(c(
"GET /uri.cgi HTTP/1.1\r\n",
"User-Agent: Mozilla/5.0\r\n",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n",
"Host: 127.0.0.1\r\n", "\r\n"
), collapse = "") -> req
req_raw <- charToRaw(req)
parse_request(req)
parse_request_raw(req_raw)
microbenchmark::microbenchmark(
parse_request = parse_request(req),
parse_request_raw = parse_request_raw(req_raw)
)
```
### Responses
```{r ex02}
paste0(c(
"HTTP/1.1 200 OK\r\n",
"Server: nginx/1.2.1\r\n",
"Content-Type: text/html\r\n",
"Content-Length: 8\r\n",
"Connection: keep-alive\r\n",
"\r\n",
"<html />"
), collapse = "") -> resp
resp_raw <- charToRaw(resp)
parse_response(resp)
parse_response_raw(resp_raw)
microbenchmark::microbenchmark(
parse_response = parse_response(resp),
parse_response_raw = parse_response_raw(resp_raw)
)
```
### curl output example
`HEAD` request:
```{r curl-01}
sys::exec_internal(
cmd = "curl",
args = c("--include", "--head", "--silent", "https://httpbin.org/")
) -> res
str(parse_response(rawToChar(res$stdout)), 2)
curl::curl_fetch_memory(
"https://httpbin.org/",
handle = curl::new_handle(
nobody = TRUE
)
) -> res
str(construe::parse_response_raw(res$headers), 2)
curl::curl_fetch_memory(
"http://rud.is/b",
handle = curl::new_handle(
nobody = TRUE,
followlocation = TRUE
)
) -> res
rawToChar(res$headers) %>%
strsplit("(?m)\r\n\r\n", perl = TRUE) %>%
unlist() %>%
lapply(construe::parse_response) %>%
str(2)
```
`GET` request:
```{r curl-02}
sys::exec_internal(
cmd = "curl",
args = c("--include", "--silent", "https://httpbin.org/")
) -> res
str(parse_response_raw(res$stdout), 2)
res <- curl::curl_fetch_memory("https://httpbin.org/")
str(construe::parse_response_raw(res$headers), 2)
```
### URLs
```{r ex03}
c(
"git+ssh://example.com/path/file",
"https://example.com/path/file",
"http://www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com",
"http://username@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"ftp://username:passwd@ftp.example.com/dir/filename.ext",
"mailto:username@example.com",
"svn+ssh://hostname-01.org/path/to/file",
"xddp::://///blah.wat/?"
) -> turls
parse_url(turls)
microbenchmark::microbenchmark(
parse_url = parse_url(turls[1])
)
```
### Parse headers from Palo Alto `HEAD` requests
```{r why}
hdr <- read_file_raw(system.file("extdat", "example.hdr", package = "construe"))
cat(rawToChar(hdr))
parse_response_raw(hdr)
```
## construe Metrics
```{r cloc, echo=FALSE}
cloc::cloc_pkg_md()
```
## Code of Conduct
Please note that this project is released with a Contributor Code of Conduct.
By participating in this project you agree to abide by its terms.

498
README.md

@ -1,500 +1,2 @@
[![Project Status: Active – The project has reached a stable, usable
state and is being actively
developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
[![Signed
by](https://img.shields.io/badge/Keybase-Verified-brightgreen.svg)](https://keybase.io/hrbrmstr)
![Signed commit
%](https://img.shields.io/badge/Signed_Commits-100%25-lightgrey.svg)
[![R-CMD-check](https://github.com/hrbrmstr/construe/workflows/R-CMD-check/badge.svg)](https://github.com/hrbrmstr/construe/actions?query=workflow%3AR-CMD-check)
[![Linux build
Status](https://travis-ci.org/hrbrmstr/construe.svg?branch=batman)](https://travis-ci.org/hrbrmstr/construe)
[![Windows build
status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/construe?svg=true)](https://ci.appveyor.com/project/hrbrmstr/construe)
[![Coverage
Status](https://codecov.io/gh/hrbrmstr/construe/branch/batman/graph/badge.svg)](https://codecov.io/gh/hrbrmstr/construe)
![Minimal R
Version](https://img.shields.io/badge/R%3E%3D-3.6.0-blue.svg)
![License](https://img.shields.io/badge/License-MIT-blue.svg)
# construe
HTTP Request, Response and URL Parser
## Description
A simple and fast HTTP request, response and URL parser based on the C++
‘httpparser’ library by Alex Nekipelov
(<https://github.com/nekipelov/httpparser>).
## What’s Inside The Tin
The following functions are implemented:
- `parse_request`: Parse an HTTP request
- `parse_response`: Parse an HTTP response
- `parse_url`: Parse URLs
- `read_file_raw`: Read in a file, fast and raw
## Installation
``` r
install.packages("construe", repos = c("https://cinc.rud.is", "https://cloud.r-project.org/"))
# or
remotes::install_git("https://git.rud.is/hrbrmstr/construe.git")
# or
remotes::install_git("https://git.sr.ht/~hrbrmstr/construe")
# or
remotes::install_gitlab("hrbrmstr/construe")
# or
remotes::install_bitbucket("hrbrmstr/construe")
# or
remotes::install_github("hrbrmstr/construe")
```
NOTE: To use the ‘remotes’ install options you will need to have the
[{remotes} package](https://github.com/r-lib/remotes) installed.
## Usage
``` r
library(construe)
# current version
packageVersion("construe")
## [1] '0.1.0'
```
### Requests
``` r
paste0(c(
"GET /uri.cgi HTTP/1.1\r\n",
"User-Agent: Mozilla/5.0\r\n",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n",
"Host: 127.0.0.1\r\n", "\r\n"
), collapse = "") -> req
req_raw <- charToRaw(req)
parse_request(req)
## $method
## [1] "GET"
##
## $uri
## [1] "/uri.cgi"
##
## $vers_maj
## [1] 1
##
## $vers_min
## [1] 1
##
## $keepalive
## [1] TRUE
##
## $headers
## name value
## 1 user-agent Mozilla/5.0
## 2 accept text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
## 3 host 127.0.0.1
##
## $content
## raw(0)
##
## attr(,"class")
## [1] "http_request" "list"
parse_request_raw(req_raw)
## $method
## [1] "GET"
##
## $uri
## [1] "/uri.cgi"
##
## $vers_maj
## [1] 1
##
## $vers_min
## [1] 1
##
## $keepalive
## [1] TRUE
##
## $headers
## name value
## 1 user-agent Mozilla/5.0
## 2 accept text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
## 3 host 127.0.0.1
##
## $content
## raw(0)
##
## attr(,"class")
## [1] "http_request" "list"
microbenchmark::microbenchmark(
parse_request = parse_request(req),
parse_request_raw = parse_request_raw(req_raw)
)
## Unit: microseconds
## expr min lq mean median uq max neval
## parse_request 143.630 147.311 189.2766 155.0385 191.2150 1014.576 100
## parse_request_raw 144.949 149.028 195.7029 159.6640 204.2565 829.436 100
```
### Responses
``` r
paste0(c(
"HTTP/1.1 200 OK\r\n",
"Server: nginx/1.2.1\r\n",
"Content-Type: text/html\r\n",
"Content-Length: 8\r\n",
"Connection: keep-alive\r\n",
"\r\n",
"<html />"
), collapse = "") -> resp
resp_raw <- charToRaw(resp)
parse_response(resp)
## $status_msg
## [1] "OK"
##
## $status_code
## [1] 200
##
## $vers_maj
## [1] 1
##
## $vers_min
## [1] 0
##
## $keepalive
## [1] TRUE
##
## $headers
## name value
## 1 server nginx/1.2.1
## 2 content-type text/html
## 3 content-length 8
## 4 connection keep-alive
##
## $content
## [1] 3c 68 74 6d 6c 20 2f 3e
##
## attr(,"class")
## [1] "http_response" "list"
parse_response_raw(resp_raw)
## $status_msg
## [1] "OK"
##
## $status_code
## [1] 200
##
## $vers_maj
## [1] 1
##
## $vers_min
## [1] 0
##
## $keepalive
## [1] TRUE
##
## $headers
## name value
## 1 server nginx/1.2.1
## 2 content-type text/html
## 3 content-length 8
## 4 connection keep-alive
##
## $content
## [1] 3c 68 74 6d 6c 20 2f 3e
##
## attr(,"class")
## [1] "http_response" "list"
microbenchmark::microbenchmark(
parse_response = parse_response(resp),
parse_response_raw = parse_response_raw(resp_raw)
)
## Unit: microseconds
## expr min lq mean median uq max neval
## parse_response 142.521 146.0330 159.1186 147.1825 152.0095 275.389 100
## parse_response_raw 143.475 145.6985 160.2940 147.5150 154.3220 432.524 100
```
### curl output example
`HEAD` request:
``` r
sys::exec_internal(
cmd = "curl",
args = c("--include", "--head", "--silent", "https://httpbin.org/")
) -> res
str(parse_response(rawToChar(res$stdout)), 2)
## List of 7
## $ status_msg : chr ""
## $ status_code: num 200
## $ vers_maj : int 2
## $ vers_min : int 0
## $ keepalive : logi TRUE
## $ headers :'data.frame': 6 obs. of 2 variables:
## ..$ name : chr [1:6] "date" "content-type" "content-length" "server" ...
## ..$ value: chr [1:6] "Tue, 01 Sep 2020 03:01:20 GMT" "text/html; charset=utf-8" "9593" "gunicorn/19.9.0" ...
## $ content : raw(0)
## - attr(*, "class")= chr [1:2] "http_response" "list"
curl::curl_fetch_memory(
"https://httpbin.org/",
handle = curl::new_handle(
nobody = TRUE
)
) -> res
str(construe::parse_response_raw(res$headers), 2)
## List of 7
## $ status_msg : chr ""
## $ status_code: num 200
## $ vers_maj : int 2
## $ vers_min : int 0
## $ keepalive : logi TRUE
## $ headers :'data.frame': 6 obs. of 2 variables:
## ..$ name : chr [1:6] "date" "content-type" "content-length" "server" ...
## ..$ value: chr [1:6] "Tue, 01 Sep 2020 03:01:20 GMT" "text/html; charset=utf-8" "9593" "gunicorn/19.9.0" ...
## $ content : raw(0)
## - attr(*, "class")= chr [1:2] "http_response" "list"
curl::curl_fetch_memory(
"http://rud.is/b",
handle = curl::new_handle(
nobody = TRUE,
followlocation = TRUE
)
) -> res
rawToChar(res$headers) %>%
strsplit("(?m)\r\n\r\n", perl = TRUE) %>%
unlist() %>%
lapply(construe::parse_response) %>%
str(2)
## List of 3
## $ :List of 7
## ..$ status_msg : chr "Moved Permanently"
## ..$ status_code: num 301
## ..$ vers_maj : int 1
## ..$ vers_min : int 0
## ..$ keepalive : logi FALSE
## ..$ headers :'data.frame': 6 obs. of 2 variables:
## ..$ content : raw(0)
## ..- attr(*, "class")= chr [1:2] "http_response" "list"
## $ :List of 7
## ..$ status_msg : chr ""
## ..$ status_code: num 301
## ..$ vers_maj : int 2
## ..$ vers_min : int 0
## ..$ keepalive : logi FALSE
## ..$ headers :'data.frame': 14 obs. of 2 variables:
## ..$ content : raw(0)
## ..- attr(*, "class")= chr [1:2] "http_response" "list"
## $ :List of 7
## ..$ status_msg : chr ""
## ..$ status_code: num 200
## ..$ vers_maj : int 2
## ..$ vers_min : int 0
## ..$ keepalive : logi FALSE
## ..$ headers :'data.frame': 19 obs. of 2 variables:
## ..$ content : raw(0)
## ..- attr(*, "class")= chr [1:2] "http_response" "list"
```
`GET` request:
``` r
sys::exec_internal(
cmd = "curl",
args = c("--include", "--silent", "https://httpbin.org/")
) -> res
str(parse_response_raw(res$stdout), 2)
## List of 7
## $ status_msg : chr ""
## $ status_code: num 200
## $ vers_maj : int 2
## $ vers_min : int 0
## $ keepalive : logi TRUE
## $ headers :'data.frame': 6 obs. of 2 variables:
## ..$ name : chr [1:6] "date" "content-type" "content-length" "server" ...
## ..$ value: chr [1:6] "Tue, 01 Sep 2020 03:01:22 GMT" "text/html; charset=utf-8" "9593" "gunicorn/19.9.0" ...
## $ content : raw [1:9593] 3c 21 44 4f ...
## - attr(*, "class")= chr [1:2] "http_response" "list"
res <- curl::curl_fetch_memory("https://httpbin.org/")
str(construe::parse_response_raw(res$headers), 2)
## List of 7
## $ status_msg : chr ""
## $ status_code: num 200
## $ vers_maj : int 2
## $ vers_min : int 0
## $ keepalive : logi TRUE
## $ headers :'data.frame': 6 obs. of 2 variables:
## ..$ name : chr [1:6] "date" "content-type" "content-length" "server" ...
## ..$ value: chr [1:6] "Tue, 01 Sep 2020 03:01:22 GMT" "text/html; charset=utf-8" "9593" "gunicorn/19.9.0" ...
## $ content : raw(0)
## - attr(*, "class")= chr [1:2] "http_response" "list"
```
### URLs
``` r
c(
"git+ssh://example.com/path/file",
"https://example.com/path/file",
"http://www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com",
"http://username@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"ftp://username:passwd@ftp.example.com/dir/filename.ext",
"mailto:username@example.com",
"svn+ssh://hostname-01.org/path/to/file",
"xddp::://///blah.wat/?"
) -> turls
parse_url(turls)
## scheme username password hostname port path query fragment
## 1 git+ssh example.com /path/file
## 2 https example.com /path/file
## 3 http www.example.com /dir/subdir param=1&param=2;param%20=%20 fragment
## 4 http www.example.com /
## 5 http username www.example.com /dir/subdir param=1&param=2;param%20=%20 fragment
## 6 http username passwd www.example.com /dir/subdir param=1&param=2;param%20=%20 fragment
## 7 http www.example.com 8080 /dir/subdir param=1&param=2;param%20=%20 fragment
## 8 http username passwd www.example.com 8080 /dir/subdir param=1&param=2;param%20=%20 fragment
## 9 ftp username passwd ftp.example.com /dir/filename.ext
## 10 mailto username example.com /
## 11 svn+ssh hostname-01.org /path/to/file
## 12 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
microbenchmark::microbenchmark(
parse_url = parse_url(turls[1])
)
## Unit: microseconds
## expr min lq mean median uq max neval
## parse_url 352.05 359.3035 397.343 374.0825 388.4345 755.995 100
```
### Parse headers from Palo Alto `HEAD` requests
``` r
hdr <- read_file_raw(system.file("extdat", "example.hdr", package = "construe"))
cat(rawToChar(hdr))
## HTTP/1.1 200 OK
## Date: Mon, 13 Jul 2020 11:23:49 GMT
## Content-Type: text/html; charset=UTF-8
## Content-Length: 11757
## Connection: keep-alive
## ETag: "6e185d1cea69"
## Pragma: no-cache
## Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0
## Expires: Thu, 19 Nov 1981 08:52:00 GMT
## X-FRAME-OPTIONS: DENY
## Set-Cookie: PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
## Set-Cookie: PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
## Set-Cookie: PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
## Set-Cookie: PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
## Set-Cookie: PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
## Strict-Transport-Security: max-age=31536000;
## X-XSS-Protection: 1; mode=block;
## X-Content-Type-Options: nosniff
## Content-Security-Policy: default-src 'self'; script-src 'self' 'unsafe-inline'; img-src * data:; style-src 'self' 'unsafe-inline';
##
parse_response_raw(hdr)
## $status_msg
## [1] "OK"
##
## $status_code
## [1] 200
##
## $vers_maj
## [1] 1
##
## $vers_min
## [1] 0
##
## $keepalive
## [1] TRUE
##
## $headers
## name
## 1 date
## 2 content-type
## 3 content-length
## 4 connection
## 5 etag
## 6 pragma
## 7 cache-control
## 8 expires
## 9 x-frame-options
## 10 set-cookie
## 11 set-cookie
## 12 set-cookie
## 13 set-cookie
## 14 set-cookie
## 15 strict-transport-security
## 16 x-xss-protection
## 17 x-content-type-options
## 18 content-security-policy
## value
## 1 Mon, 13 Jul 2020 11:23:49 GMT
## 2 text/html; charset=UTF-8
## 3 11757
## 4 keep-alive
## 5 "6e185d1cea69"
## 6 no-cache
## 7 no-store, no-cache, must-revalidate, post-check=0, pre-check=0
## 8 Thu, 19 Nov 1981 08:52:00 GMT
## 9 DENY
## 10 PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
## 11 PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
## 12 PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
## 13 PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
## 14 PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
## 15 max-age=31536000;
## 16 1; mode=block;
## 17 nosniff
## 18 default-src 'self'; script-src 'self' 'unsafe-inline'; img-src * data:; style-src 'self' 'unsafe-inline';
##
## $content
## raw(0)
##
## attr(,"class")
## [1] "http_response" "list"
```
## construe Metrics
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :----------- | -------: | ---: | ---: | ---: | ----------: | ---: | -------: | ---: |
| C/C++ Header | 5 | 0.19 | 1556 | 0.39 | 122 | 0.22 | 66 | 0.12 |
| C++ | 2 | 0.08 | 305 | 0.08 | 92 | 0.17 | 81 | 0.14 |
| Rmd | 1 | 0.04 | 89 | 0.02 | 49 | 0.09 | 47 | 0.08 |
| R | 4 | 0.15 | 23 | 0.01 | 9 | 0.02 | 90 | 0.16 |
| YAML | 1 | 0.04 | 22 | 0.01 | 2 | 0.00 | 2 | 0.00 |
| SUM | 13 | 0.50 | 1995 | 0.50 | 274 | 0.50 | 286 | 0.50 |
clock Package Metrics for construe
## Code of Conduct
Please note that this project is released with a Contributor Code of
Conduct. By participating in this project you agree to abide by its
terms.

52
appveyor.yml

@ -1,52 +0,0 @@
# DO NOT CHANGE the "init" and "install" sections below
# Download script file from GitHub
init:
ps: |
$ErrorActionPreference = "Stop"
Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
Import-Module '..\appveyor-tool.ps1'
install:
ps: Bootstrap
cache:
- C:\RLibrary
environment:
NOT_CRAN: true
# env vars that may need to be set, at least temporarily, from time to time
# see https://github.com/krlmlr/r-appveyor#readme for details
# USE_RTOOLS: true
# R_REMOTES_STANDALONE: true
# Adapt as necessary starting from here
build_script:
- travis-tool.sh install_deps
test_script:
- travis-tool.sh run_tests
on_failure:
- 7z a failure.zip *.Rcheck\*
- appveyor PushArtifact failure.zip
artifacts:
- path: '*.Rcheck\**\*.log'
name: Logs
- path: '*.Rcheck\**\*.out'
name: Logs
- path: '*.Rcheck\**\*.fail'
name: Logs
- path: '*.Rcheck\**\*.Rout'
name: Logs
- path: '\*_*.tar.gz'
name: Bits
- path: '\*_*.zip'
name: Bits

21
construe.Rproj

@ -1,21 +0,0 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageBuildArgs: --resave-data
PackageRoxygenize: rd,collate,namespace

20
inst/extdat/example.hdr

@ -1,20 +0,0 @@
HTTP/1.1 200 OK
Date: Mon, 13 Jul 2020 11:23:49 GMT
Content-Type: text/html; charset=UTF-8
Content-Length: 11757
Connection: keep-alive
ETag: "6e185d1cea69"
Pragma: no-cache
Cache-Control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0
Expires: Thu, 19 Nov 1981 08:52:00 GMT
X-FRAME-OPTIONS: DENY
Set-Cookie: PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
Set-Cookie: PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
Set-Cookie: PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
Set-Cookie: PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
Set-Cookie: PHPSESSID=d242c76e2e6ad2e71dd0c524c63e66d0; path=/; secure; HttpOnly
Strict-Transport-Security: max-age=31536000;
X-XSS-Protection: 1; mode=block;
X-Content-Type-Options: nosniff
Content-Security-Policy: default-src 'self'; script-src 'self' 'unsafe-inline'; img-src * data:; style-src 'self' 'unsafe-inline';

64
inst/tinytest/test_construe.R

@ -1,64 +0,0 @@
library(construe)
paste0(c(
"GET /uri.cgi HTTP/1.1\r\n",
"User-Agent: Mozilla/5.0\r\n",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n",
"Host: 127.0.0.1\r\n", "\r\n"
), collapse = "") -> req
res <- parse_request(req)
res <- parse_request_raw(charToRaw(req))
expect_true(res$method[1] == "GET")
expect_true(res$keepalive[1] == TRUE)
expect_true("host" %in% res$headers$name)
paste0(c(
"HTTP/1.1 200 OK\r\n",
"Server: nginx/1.2.1\r\n",
"Content-Type: text/html\r\n",
"Content-Length: 8\r\n",
"Connection: keep-alive\r\n",
"\r\n",
"<html />"
), collapse = "") -> resp
res <- parse_response(resp)
res <- parse_response_raw(charToRaw(resp))
expect_true(res$status_msg[1] == "OK")
expect_true(res$keepalive[1] == TRUE)
expect_true("server" %in% res$headers$name)
c(
"git+ssh://example.com/path/file",
"https://example.com/path/file",
"http://www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com",
"http://username@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"http://username:passwd@www.example.com:8080/dir/subdir?param=1&param=2;param%20=%20#fragment",
"ftp://username:passwd@ftp.example.com/dir/filename.ext",
"mailto:username@example.com",
"svn+ssh://hostname-01.org/path/to/file",
"xddp::://///blah.wat/?"
) -> turls
res <- parse_url(turls)
expect_true(is.na(res$scheme[12]))
expect_true(res$scheme[1] == "git+ssh")
parse_response_raw(
read_file_raw(
system.file("extdat", "example.hdr", package = "construe")
)
) -> res
expect_true(res$headers$name[[5]] == "etag")

23
man/construe.Rd

@ -1,23 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/construe-package.R
\docType{package}
\name{construe}
\alias{construe}
\alias{construe-package}
\title{HTTP Request, Response and URL Parser}
\description{
A simple and fast HTTP request, response and URL parser based on the C++ 'httpparser' library
by Alex Nekipelov (\url{https://github.com/nekipelov/httpparser})
}
\seealso{
Useful links:
\itemize{
\item \url{https://git.rud.is/hrbrmstr/construe}
\item Report bugs at \url{https://git.rud.is/hrbrmstr/construe/issues}
}
}
\author{
Bob Rudis (bob@rud.is)
}
\keyword{internal}

31
man/parse_request.Rd

@ -1,31 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{parse_request}
\alias{parse_request}
\alias{parse_request_raw}
\title{Parse an HTTP request}
\usage{
parse_request(req, headers_lowercase = TRUE)
parse_request_raw(req, headers_lowercase = TRUE)
}
\arguments{
\item{req}{HTTP request character string}
\item{headers_lowercase}{if \code{TRUE} (the default) names in the \code{headers} data frame
element are converted to lower case}
}
\description{
You can use the non- \verb{_raw} version on input you know for sure is plain text
}
\examples{
paste0(c(
"GET /uri.cgi HTTP/1.1\r\n",
"User-Agent: Mozilla/5.0\r\n",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n",
"Host: 127.0.0.1\r\n", "\r\n"
), collapse = "") -> req
res <- parse_request(req)
res <- parse_request_raw(charToRaw(req))
}

34
man/parse_response.Rd

@ -1,34 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{parse_response}
\alias{parse_response}
\alias{parse_response_raw}
\title{Parse an HTTP response}
\usage{
parse_response(resp, headers_lowercase = TRUE)
parse_response_raw(resp, headers_lowercase = TRUE)
}
\arguments{
\item{resp}{HTTP request character string}
\item{headers_lowercase}{if \code{TRUE} (the default) names in the \code{headers} data frame
element are converted to lower case}
}
\description{
You can use the non- \verb{_raw} version on input you know for sure is plain text
}
\examples{
paste0(c(
"HTTP/1.1 200 OK\r\n",
"Server: nginx/1.2.1\r\n",
"Content-Type: text/html\r\n",
"Content-Length: 8\r\n",
"Connection: keep-alive\r\n",
"\r\n",
"<html />"
), collapse = "") -> resp
res <- parse_response(resp)
res <- parse_response_raw(charToRaw(resp))
}

18
man/parse_url.Rd

@ -1,18 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{parse_url}
\alias{parse_url}
\title{Parse URLs}
\usage{
parse_url(urls)
}
\arguments{
\item{urls}{character vector of URLs}
}
\description{
Parse URLs
}
\examples{
URL <- "http://www.example.com/dir/subdir?param=1&param=2;param\%20=\%20#fragment"
parse_url(URL)
}

12
man/pipe.Rd

@ -1,12 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils-pipe.R
\name{\%>\%}
\alias{\%>\%}
\title{Pipe operator}
\usage{
lhs \%>\% rhs
}
\description{
See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
}
\keyword{internal}

23
man/read_file_raw.Rd

@ -1,23 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{read_file_raw}
\alias{read_file_raw}
\title{Read in a file, fast and raw}
\usage{
read_file_raw(fil, buffer_size = 16384L)
}
\arguments{
\item{fil}{file to read in (no path expansion is performed)}
\item{buffer_size}{larger buffer sizes may speed up reading of
very large files. It can also hurt performance, and this
function reads in the entire file into memory, so a
large buffer size also means more (temporary) memory will
be allocated.}
}
\description{
Read in a file, fast and raw
}
\examples{
read_file_raw(system.file("extdat", "example.hdr", package = "construe"))
}

3
src/.gitignore

@ -1,3 +0,0 @@
*.o
*.so
*.dll

2
src/Makevars

@ -1,2 +0,0 @@
CXX_STD = CXX11
PKG_LIBS = -L. -lz -lpthread -pthread -std=c++11

93
src/RcppExports.cpp

@ -1,93 +0,0 @@
// Generated by using Rcpp::compileAttributes() -> do not edit by hand
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#include <Rcpp.h>
using namespace Rcpp;
// parse_request
List parse_request(String req, bool headers_lowercase);
RcppExport SEXP _construe_parse_request(SEXP reqSEXP, SEXP headers_lowercaseSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< String >::type req(reqSEXP);
Rcpp::traits::input_parameter< bool >::type headers_lowercase(headers_lowercaseSEXP);
rcpp_result_gen = Rcpp::wrap(parse_request(req, headers_lowercase));
return rcpp_result_gen;
END_RCPP
}
// parse_request_raw
List parse_request_raw(RawVector req, bool headers_lowercase);
RcppExport SEXP _construe_parse_request_raw(SEXP reqSEXP, SEXP headers_lowercaseSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< RawVector >::type req(reqSEXP);
Rcpp::traits::input_parameter< bool >::type headers_lowercase(headers_lowercaseSEXP);
rcpp_result_gen = Rcpp::wrap(parse_request_raw(req, headers_lowercase));
return rcpp_result_gen;
END_RCPP
}
// parse_response
List parse_response(String resp, bool headers_lowercase);
RcppExport SEXP _construe_parse_response(SEXP respSEXP, SEXP headers_lowercaseSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< String >::type resp(respSEXP);
Rcpp::traits::input_parameter< bool >::type headers_lowercase(headers_lowercaseSEXP);
rcpp_result_gen = Rcpp::wrap(parse_response(resp, headers_lowercase));
return rcpp_result_gen;
END_RCPP
}
// parse_response_raw
List parse_response_raw(RawVector resp, bool headers_lowercase);
RcppExport SEXP _construe_parse_response_raw(SEXP respSEXP, SEXP headers_lowercaseSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< RawVector >::type resp(respSEXP);
Rcpp::traits::input_parameter< bool >::type headers_lowercase(headers_lowercaseSEXP);
rcpp_result_gen = Rcpp::wrap(parse_response_raw(resp, headers_lowercase));
return rcpp_result_gen;
END_RCPP
}
// parse_url
DataFrame parse_url(std::vector < std::string > urls);
RcppExport SEXP _construe_parse_url(SEXP urlsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::vector < std::string > >::type urls(urlsSEXP);
rcpp_result_gen = Rcpp::wrap(parse_url(urls));
return rcpp_result_gen;
END_RCPP
}
// read_file_raw
RawVector read_file_raw(CharacterVector fil, int buffer_size);
RcppExport SEXP _construe_read_file_raw(SEXP filSEXP, SEXP buffer_sizeSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< CharacterVector >::type fil(filSEXP);
Rcpp::traits::input_parameter< int >::type buffer_size(buffer_sizeSEXP);
rcpp_result_gen = Rcpp::wrap(read_file_raw(fil, buffer_size));
return rcpp_result_gen;
END_RCPP
}
static const R_CallMethodDef CallEntries[] = {
{"_construe_parse_request", (DL_FUNC) &_construe_parse_request, 2},
{"_construe_parse_request_raw", (DL_FUNC) &_construe_parse_request_raw, 2},
{"_construe_parse_response", (DL_FUNC) &_construe_parse_response, 2},
{"_construe_parse_response_raw", (DL_FUNC) &_construe_parse_response_raw, 2},
{"_construe_parse_url", (DL_FUNC) &_construe_parse_url, 1},
{"_construe_read_file_raw", (DL_FUNC) &_construe_read_file_raw, 2},
{NULL, NULL, 0}
};
RcppExport void R_init_construe(DllInfo *dll) {
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
R_useDynamicSymbols(dll, FALSE);
}

385
src/code.cpp

@ -1,385 +0,0 @@
#include <locale>
#include "request.h"
#include "httprequestparser.h"
#include "response.h"
#include "httpresponseparser.h"
#include "urlparser.h"
#include <fstream>
#include <string>
#include <sys/stat.h>
#include <stdlib.h>
#include <Rcpp.h>
using namespace Rcpp;
using namespace httpparser;
std::string str_tolower(std::string str) {
std::transform(
str.begin(), str.end(), str.begin(),
[](unsigned char c){ return(std::tolower(c)); }
);
return(str);
}
//' Parse an HTTP request
//'
//' You can use the non- `_raw` version on input you know for sure is plain text
//'
//' @param req HTTP request character string
//' @param headers_lowercase if `TRUE` (the default) names in the `headers` data frame
//' element are converted to lower case
//' @export
//' @examples
//' paste0(c(
//' "GET /uri.cgi HTTP/1.1\r\n",
//' "User-Agent: Mozilla/5.0\r\n",
//' "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n",
//' "Host: 127.0.0.1\r\n", "\r\n"
//' ), collapse = "") -> req
//'
//' res <- parse_request(req)
//' res <- parse_request_raw(charToRaw(req))
// [[Rcpp::export]]
List parse_request(String req, bool headers_lowercase = true) {
List l;
Request request;
HttpRequestParser parser;
const char *text = req.get_cstring();
HttpRequestParser::ParseResult res = parser.parse(request, (const unsigned char *)text, (const unsigned char *)(text + strlen(text)));
if ((res == HttpRequestParser::ParsingCompleted) || (res == HttpRequestParser::ParsingIncompleted)) {
StringVector names(request.headers.size());
StringVector vals(request.headers.size());
R_xlen_t idx = 0;
for (std::vector<Request::HeaderItem>::const_iterator it = request.headers.begin(); it != request.headers.end(); ++it) {
names[idx] = headers_lowercase ? str_tolower(it->name) : it->name;
vals[idx++] = it->value;
}
DataFrame headers = DataFrame::create(
_["name"] = names,
_["value"] = vals
);
RawVector content(request.content.begin(), request.content.end());
l = List::create(
_["method"] = request.method,
_["uri"] = request.uri,
_["vers_maj"] = request.versionMajor,
_["vers_min"] = request.versionMinor,
_["keepalive"] = request.keepAlive,
_["headers"] = headers,
_["content"] = content
);
l.attr("class") = CharacterVector::create("http_request", "list");
} else {
Rf_error("Parse error.");
}
return(l);
}
//' @rdname parse_request
//' @param req HTTP request character string
//' @param headers_lowercase if `TRUE` (the default) names in the `headers` data frame
//' element are converted to lower case
//' @export
// [[Rcpp::export]]
List parse_request_raw(RawVector req, bool headers_lowercase = true) {
List l;
Request request;
HttpRequestParser parser;
HttpRequestParser::ParseResult res = parser.parse(request, req.begin(), req.end());
if ((res == HttpRequestParser::ParsingCompleted) || (res == HttpRequestParser::ParsingIncompleted)) {
StringVector names(request.headers.size());
StringVector vals(request.headers.size());
R_xlen_t idx = 0;
for (std::vector<Request::HeaderItem>::const_iterator it = request.headers.begin(); it != request.headers.end(); ++it) {
names[idx] = headers_lowercase ? str_tolower(it->name) : it->name;
vals[idx++] = it->value;
}
DataFrame headers = DataFrame::create(
_["name"] = names,
_["value"] = vals
);
RawVector content(request.content.begin(), request.content.end());
l = List::create(
_["method"] = request.method,
_["uri"] = request.uri,
_["vers_maj"] = request.versionMajor,
_["vers_min"] = request.versionMinor,
_["keepalive"] = request.keepAlive,
_["headers"] = headers,
_["content"] = content
);
l.attr("class") = CharacterVector::create("http_request", "list");
} else {
Rf_error("Parse error.");
}
return(l);
}
//' Parse an HTTP response
//'
//' You can use the non- `_raw` version on input you know for sure is plain text
//'
//' @param resp HTTP response character string
//' @param headers_lowercase if `TRUE` (the default) names in the `headers` data frame
//' element are converted to lower case
//' @export
//' @examples
//' paste0(c(
//' "HTTP/1.1 200 OK\r\n",
//' "Server: nginx/1.2.1\r\n",
//' "Content-Type: text/html\r\n",
//' "Content-Length: 8\r\n",
//' "Connection: keep-alive\r\n",
//' "\r\n",
//' "<html />"
//' ), collapse = "") -> resp
//'
//' res <- parse_response(resp)
//' res <- parse_response_raw(charToRaw(resp))
// [[Rcpp::export]]
List parse_response(String resp, bool headers_lowercase = true) {
List l;
Response response;
HttpResponseParser parser;
const char *text = resp.get_cstring();
HttpResponseParser::ParseResult res = parser.parse(response, (const unsigned char *)text, (const unsigned char *)(text + strlen(text)));
if ((res == HttpResponseParser::ParsingCompleted) || (res == HttpResponseParser::ParsingIncompleted)) {
StringVector names(response.headers.size());
StringVector vals(response.headers.size());
R_xlen_t idx = 0;
for (std::vector<Response::HeaderItem>::const_iterator it = response.headers.begin(); it != response.headers.end(); ++it) {
names[idx] = headers_lowercase ? str_tolower(it->name) : it->name;
vals[idx++] = it->value;
}
DataFrame headers = DataFrame::create(
_["name"] = names,
_["value"] = vals
);
RawVector content(response.content.begin(), response.content.end());
l = List::create(
_["status_msg"] = response.status,
_["status_code"] = response.statusCode,
_["vers_maj"] = response.versionMajor,
_["vers_min"] = response.versionMinor,
_["keepalive"] = response.keepAlive,
_["headers"] = headers,
_["content"] = content
);
l.attr("class") = CharacterVector::create("http_response", "list");
} else {
Rf_error("Parse error.");
}
return(l);
}
//' @rdname parse_response
//' @param resp HTTP request character string
//' @param headers_lowercase if `TRUE` (the default) names in the `headers` data frame
//' element are converted to lower case
//' @export
// [[Rcpp::export]]
List parse_response_raw(RawVector resp, bool headers_lowercase = true) {
List l;
Response response;
HttpResponseParser parser;
HttpResponseParser::ParseResult res = parser.parse(response, resp.begin(), resp.end());
if ((res == HttpResponseParser::ParsingCompleted) || (res == HttpResponseParser::ParsingIncompleted)) {
StringVector names(response.headers.size());
StringVector vals(response.headers.size());
R_xlen_t idx = 0;
for (std::vector<Response::HeaderItem>::const_iterator it = response.headers.begin(); it != response.headers.end(); ++it) {
names[idx] = headers_lowercase ? str_tolower(it->name) : it->name;
vals[idx++] = it->value;
}
DataFrame headers = DataFrame::create(
_["name"] = names,
_["value"] = vals
);
RawVector content(response.content.begin(), response.content.end());
l = List::create(
_["status_msg"] = response.status,
_["status_code"] = response.statusCode,
_["vers_maj"] = response.versionMajor,
_["vers_min"] = response.versionMinor,
_["keepalive"] = response.keepAlive,
_["headers"] = headers,
_["content"] = content
);
l.attr("class") = CharacterVector::create("http_response", "list");
} else {
Rf_error("Parse error.");
}
return(l);
}
//' Parse URLs
//'
//' @param urls character vector of URLs
//' @export
//' @examples
//' URL <- "http://www.example.com/dir/subdir?param=1&param=2;param%20=%20#fragment"
//' parse_url(URL)
// [[Rcpp::export]]
DataFrame parse_url(std::vector < std::string > urls) {
UrlParser parser;
StringVector scheme(urls.size());
StringVector username(urls.size());
StringVector password(urls.size());
StringVector hostname(urls.size());
StringVector port(urls.size());
StringVector path(urls.size());
StringVector query(urls.size());
StringVector fragment(urls.size());
UrlParser u;
for (R_xlen_t idx=0; idx<(R_xlen_t)urls.size(); idx++) {
int res = u.parse(urls[idx].c_str());
if (res) {
scheme[idx] = u.url.scheme;
username[idx] = u.url.username;
password[idx] = u.url.password;
hostname[idx] = u.url.hostname;
port[idx] = u.url.port;
path[idx] = u.url.path;
query[idx] = u.url.query;
fragment[idx] = u.url.fragment;
port[idx] = u.url.port;
} else {
scheme[idx] = NA_STRING;
username[idx] = NA_STRING;
password[idx] = NA_STRING;
hostname[idx] = NA_STRING;
port[idx] = NA_STRING;
path[idx] = NA_STRING;
query[idx] = NA_STRING;
fragment[idx] = NA_STRING;
port[idx] = NA_STRING;
}
}
return(DataFrame::create(
_["scheme"] = scheme,
_["username"] = username,
_["password"] = password,
_["hostname"] = hostname,
_["port"] = port,
_["path"] = path,
_["query"] = query,
_["fragment"] = fragment
));
}
//' Read in a file, fast and raw
//'
//' @param fil file to read in (no path expansion is performed)
//' @param buffer_size larger buffer sizes may speed up reading of
//' very large files. It can also hurt performance, and this
//' function reads in the entire file into memory, so a
//' large buffer size also means more (temporary) memory will
//' be allocated.
//' @export
//' @examples
//' read_file_raw(system.file("extdat", "example.hdr", package = "construe"))
// [[Rcpp::export]]
RawVector read_file_raw(CharacterVector fil, int buffer_size = 16384) {
char buf[buffer_size];
std::ifstream in;
in.rdbuf()->pubsetbuf(buf, sizeof buf);
in.open(fil[0], std::ios::in | std::ios::binary);
if (in) {
#ifdef _WIN32
std::string f = std::string(fil[0]);
wchar_t wfil[f.length()*2];
std::mbstowcs(&wfil[0], f.c_str(), f.length()*2);
struct _stati64 st;
_wstati64(&wfil[0], &st);
#else
struct stat st;
stat(fil[0].begin(), &st);
#endif
RawVector out(st.st_size);
in.seekg(0, std::ios::beg);
in.read((char *)(out.begin()), st.st_size);
in.close();
return(out);
} else {
return(R_NilValue);
}
}

625
src/httprequestparser.h

@ -1,625 +0,0 @@
/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_REQUESTPARSER_H
#define HTTPPARSER_REQUESTPARSER_H
#include <algorithm>
#include <string.h>
#include <stdlib.h>
#include "request.h"
namespace httpparser
{
class HttpRequestParser
{
public:
HttpRequestParser()
: state(RequestMethodStart), contentSize(0),
chunkSize(0), chunked(false)
{
}
enum ParseResult {
ParsingCompleted,
ParsingIncompleted,
ParsingError
};
ParseResult parse(Request &req, const unsigned char *begin, const unsigned char *end)
{
return consume(req, begin, end);
}
private:
static bool checkIfConnection(const Request::HeaderItem &item)
{
return strcasecmp(item.name.c_str(), "Connection") == 0;
}
ParseResult consume(Request &req, const unsigned char *begin, const unsigned char *end)
{
while( begin != end )
{
char input = *begin++;
switch (state)
{
case RequestMethodStart:
if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
state = RequestMethod;
req.method.push_back(input);
}
break;
case RequestMethod:
if( input == ' ' )
{
state = RequestUriStart;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
req.method.push_back(input);
}
break;
case RequestUriStart:
if( isControl(input) )
{
return ParsingError;
}
else
{
state = RequestUri;
req.uri.push_back(input);
}
break;
case RequestUri:
if( input == ' ' )
{
state = RequestHttpVersion_h;
}
else if (input == '\r')
{
req.versionMajor = 0;
req.versionMinor = 9;
return ParsingCompleted;
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
req.uri.push_back(input);
}
break;
case RequestHttpVersion_h:
if( input == 'H' )
{
state = RequestHttpVersion_ht;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_ht:
if( input == 'T' )
{
state = RequestHttpVersion_htt;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_htt:
if( input == 'T' )
{
state = RequestHttpVersion_http;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_http:
if( input == 'P' )
{
state = RequestHttpVersion_slash;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_slash:
if( input == '/' )
{
req.versionMajor = 0;
req.versionMinor = 0;
state = RequestHttpVersion_majorStart;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_majorStart:
if( isDigit(input) )
{
req.versionMajor = input - '0';
state = RequestHttpVersion_major;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_major:
if( input == '.' )
{
state = RequestHttpVersion_minorStart;
}
else if (isDigit(input))
{
req.versionMajor = req.versionMajor * 10 + input - '0';
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_minorStart:
if( isDigit(input) )
{
req.versionMinor = input - '0';
state = RequestHttpVersion_minor;
}
else
{
return ParsingError;
}
break;
case RequestHttpVersion_minor:
if( input == '\r' )
{
state = ResponseHttpVersion_newLine;
}
else if( isDigit(input) )
{
req.versionMinor = req.versionMinor * 10 + input - '0';
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_newLine:
if( input == '\n' )
{
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case HeaderLineStart:
if( input == '\r' )
{
state = ExpectingNewline_3;
}
else if( !req.headers.empty() && (input == ' ' || input == '\t') )
{
state = HeaderLws;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
req.headers.push_back(Request::HeaderItem());
req.headers.back().name.reserve(16);
req.headers.back().value.reserve(16);
req.headers.back().name.push_back(input);
state = HeaderName;
}
break;
case HeaderLws:
if( input == '\r' )
{
state = ExpectingNewline_2;
}
else if( input == ' ' || input == '\t' )
{
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
state = HeaderValue;
req.headers.back().value.push_back(input);
}
break;
case HeaderName:
if( input == ':' )
{
state = SpaceBeforeHeaderValue;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
req.headers.back().name.push_back(input);
}
break;
case SpaceBeforeHeaderValue:
if( input == ' ' )
{
state = HeaderValue;
}
else
{
return ParsingError;
}
break;
case HeaderValue:
if( input == '\r' )
{
if( req.method == "POST" || req.method == "PUT" )
{
Request::HeaderItem &h = req.headers.back();
if( strcasecmp(h.name.c_str(), "Content-Length") == 0 )
{
contentSize = atoi(h.value.c_str());
req.content.reserve( contentSize );
}
else if( strcasecmp(h.name.c_str(), "Transfer-Encoding") == 0 )
{
if(strcasecmp(h.value.c_str(), "chunked") == 0)
chunked = true;
}
}
state = ExpectingNewline_2;
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
req.headers.back().value.push_back(input);
}
break;
case ExpectingNewline_2:
if( input == '\n' )
{
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case ExpectingNewline_3: {
std::vector<Request::HeaderItem>::iterator it = std::find_if(req.headers.begin(),
req.headers.end(),
checkIfConnection);
if( it != req.headers.end() )
{
if( strcasecmp(it->value.c_str(), "Keep-Alive") == 0 )
{
req.keepAlive = true;
}
else // == Close
{
req.keepAlive = false;
}
}
else
{
if( req.versionMajor > 1 || (req.versionMajor == 1 && req.versionMinor == 1) )
req.keepAlive = true;
}
if( chunked )
{
state = ChunkSize;
}
else if( contentSize == 0 )
{
if( input == '\n')
return ParsingCompleted;
else
return ParsingError;
}
else
{
state = Post;
}
break;
}
case Post:
--contentSize;
req.content.push_back( input );
if( contentSize == 0 )
{
return ParsingCompleted;
}
break;
case ChunkSize:
if( isalnum(input) )
{
chunkSizeStr.push_back(input);
}
else if( input == ';' )
{
state = ChunkExtensionName;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionName:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '=' )
{
state = ChunkExtensionValue;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine:
if( input == '\n' )
{
chunkSize = strtol(chunkSizeStr.c_str(), NULL, 16);
chunkSizeStr.clear();
req.content.reserve(req.content.size() + chunkSize);
if( chunkSize == 0 )
state = ChunkSizeNewLine_2;
else
state = ChunkData;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_2:
if( input == '\r' )
{
state = ChunkSizeNewLine_3;
}
else if( isalpha(input) )
{
state = ChunkTrailerName;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_3:
if( input == '\n' )
{
return ParsingCompleted;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerName:
if( isalnum(input) )
{
// skip
}
else if( input == ':' )
{
state = ChunkTrailerValue;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkData:
req.content.push_back(input);
if( --chunkSize == 0 )
{
state = ChunkDataNewLine_1;
}
break;
case ChunkDataNewLine_1:
if( input == '\r' )
{
state = ChunkDataNewLine_2;
}
else
{
return ParsingError;
}
break;
case ChunkDataNewLine_2:
if( input == '\n' )
{
state = ChunkSize;
}
else
{
return ParsingError;
}
break;
default:
return ParsingError;
}
}
return ParsingIncompleted;
}
// Check if a byte is an HTTP character.
inline bool isChar(int c)
{
return c >= 0 && c <= 127;
}
// Check if a byte is an HTTP control character.
inline bool isControl(int c)
{
return (c >= 0 && c <= 31) || (c == 127);
}
// Check if a byte is defined as an HTTP special character.
inline bool isSpecial(int c)
{
switch (c)
{
case '(': case ')': case '<': case '>': case '@':
case ',': case ';': case ':': case '\\': case '"':
case '/': case '[': case ']': case '?': case '=':
case '{': case '}': case ' ': case '\t':
return true;
default:
return false;
}
}
// Check if a byte is a digit.
inline bool isDigit(int c)
{
return c >= '0' && c <= '9';
}
// The current state of the parser.
enum State
{
RequestMethodStart,
RequestMethod,
RequestUriStart,
RequestUri,
RequestHttpVersion_h,
RequestHttpVersion_ht,
RequestHttpVersion_htt,
RequestHttpVersion_http,
RequestHttpVersion_slash,
RequestHttpVersion_majorStart,
RequestHttpVersion_major,
RequestHttpVersion_minorStart,
RequestHttpVersion_minor,
ResponseStatusStart,
ResponseHttpVersion_ht,
ResponseHttpVersion_htt,
ResponseHttpVersion_http,
ResponseHttpVersion_slash,
ResponseHttpVersion_majorStart,
ResponseHttpVersion_major,
ResponseHttpVersion_minorStart,
ResponseHttpVersion_minor,
ResponseHttpVersion_spaceAfterVersion,
ResponseHttpVersion_statusCodeStart,
ResponseHttpVersion_spaceAfterStatusCode,
ResponseHttpVersion_statusTextStart,
ResponseHttpVersion_newLine,
HeaderLineStart,
HeaderLws,
HeaderName,
SpaceBeforeHeaderValue,
HeaderValue,
ExpectingNewline_2,
ExpectingNewline_3,
Post,
ChunkSize,
ChunkExtensionName,
ChunkExtensionValue,
ChunkSizeNewLine,
ChunkSizeNewLine_2,
ChunkSizeNewLine_3,
ChunkTrailerName,
ChunkTrailerValue,
ChunkDataNewLine_1,
ChunkDataNewLine_2,
ChunkData,
} state;
size_t contentSize;
std::string chunkSizeStr;
size_t chunkSize;
bool chunked;
};
} // namespace httpparser
#endif // LIBAHTTP_REQUESTPARSER_H

618
src/httpresponseparser.h

@ -1,618 +0,0 @@
/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_RESPONSEPARSER_H
#define HTTPPARSER_RESPONSEPARSER_H
#include <algorithm>
#include <string.h>
#include <stdlib.h>
#include "response.h"
namespace httpparser
{
class HttpResponseParser
{
public:
HttpResponseParser()
: state(ResponseStatusStart),
contentSize(0),
chunkSize(0),
chunked(false)
{
}
enum ParseResult {
ParsingCompleted,
ParsingIncompleted,
ParsingError
};
ParseResult parse(Response &resp, const unsigned char *begin, const unsigned char *end)
{
return consume(resp, begin, end);
}
private:
static bool checkIfConnection(const Response::HeaderItem &item)
{
return strcasecmp(item.name.c_str(), "Connection") == 0;
}
ParseResult consume(Response &resp, const unsigned char *begin, const unsigned char *end)
{
while( begin != end )
{
char input = *begin++;
switch (state)
{
case ResponseStatusStart:
if( input != 'H' )
{
return ParsingError;
}
else
{
state = ResponseHttpVersion_ht;
}
break;
case ResponseHttpVersion_ht:
if( input == 'T' )
{
state = ResponseHttpVersion_htt;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_htt:
if( input == 'T' )
{
state = ResponseHttpVersion_http;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_http:
if( input == 'P' )
{
state = ResponseHttpVersion_slash;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_slash:
if( input == '/' )
{
resp.versionMajor = 0;
resp.versionMinor = 0;
state = ResponseHttpVersion_majorStart;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_majorStart:
if( isDigit(input) )
{
resp.versionMajor = input - '0';
state = ResponseHttpVersion_major;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_major:
if( input == ' ' ) {
resp.versionMinor = 0;
state = ResponseHttpVersion_statusCodeStart;
} else if( input == '.' ) {
state = ResponseHttpVersion_minorStart;
} else if( isDigit(input) ) {
resp.versionMajor = resp.versionMajor * 10 + input - '0';
} else {
return ParsingError;
}
break;
case ResponseHttpVersion_minorStart:
if( input == ' ' ) {
resp.versionMinor = 0;
state = ResponseHttpVersion_statusCodeStart;
} else if( isDigit(input) ) {
resp.versionMinor = input - '0';
state = ResponseHttpVersion_minor;
} else{
return ParsingError;
}
break;
case ResponseHttpVersion_minor:
if( input == ' ') {
state = ResponseHttpVersion_statusCodeStart;
resp.versionMinor = 0;
}
else if( isDigit(input) )
{
resp.versionMinor = resp.versionMinor * 10 + input - '0';
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_statusCodeStart:
// printf("ResponseHttpVersion_statusCodeStart\n\n");
if( isDigit(input) )
{
// printf(" - digit - ResponseHttpVersion_statusCodeStart\n\n");
resp.statusCode = input - '0';
state = ResponseHttpVersion_statusCode;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_statusCode:
// printf("ResponseHttpVersion_statusCode\n\n");
if( isDigit(input) )
{
// printf(" - digit - ResponseHttpVersion_statusCode\n\n");
resp.statusCode = resp.statusCode * 10 + input - '0';
}
else
{
if( resp.statusCode < 100 || resp.statusCode > 999 ) {
return ParsingError;
} else if( input == ' ' ) {
// printf(" - SPACE - ResponseHttpVersion_statusCode\n\n");
state = ResponseHttpVersion_statusTextStart;
} else if( input == '\r' ) {
// printf(" - CR - ResponseHttpVersion_statusCode\n\n");
resp.status = "";
state = ResponseHttpVersion_newLine;
} else {
return ParsingError;
}
}
break;
case ResponseHttpVersion_statusTextStart:
// printf("ResponseHttpVersion_statusTextStart\n\n");
if( input == '\r' ) {
// printf(" - CR - ResponseHttpVersion_statusTextStart\n\n");
resp.status = "";
state = ResponseHttpVersion_newLine;
} else if( isChar(input) ) {
// printf(" - char - ResponseHttpVersion_statusTextStart\n\n");
resp.status += input;
state = ResponseHttpVersion_statusText;
} else {
return ParsingError;
}
break;
case ResponseHttpVersion_statusText:
if( input == '\r' )
{
state = ResponseHttpVersion_newLine;
}
else if( isChar(input) )
{
resp.status += input;
}
else
{
return ParsingError;
}
break;
case ResponseHttpVersion_newLine:
// printf("ResponseHttpVersion_newLine\n\n");
if( input == '\n' )
{
// printf(" - NL - ResponseHttpVersion_newLine\n\n");
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case HeaderLineStart:
// printf("HeaderLineStart\n\n");
if( input == '\r' )
{
state = ExpectingNewline_3;
}
else if( !resp.headers.empty() && (input == ' ' || input == '\t') )
{
state = HeaderLws;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
resp.headers.push_back(Response::HeaderItem());
resp.headers.back().name.reserve(16);
resp.headers.back().value.reserve(16);
resp.headers.back().name.push_back(input);
state = HeaderName;
}
break;
case HeaderLws:
if( input == '\r' )
{
state = ExpectingNewline_2;
}
else if( input == ' ' || input == '\t' )
{
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
state = HeaderValue;
resp.headers.back().value.push_back(input);
}
break;
case HeaderName:
if( input == ':' )
{
state = SpaceBeforeHeaderValue;
}
else if( !isChar(input) || isControl(input) || isSpecial(input) )
{
return ParsingError;
}
else
{
resp.headers.back().name.push_back(input);
}
break;
case SpaceBeforeHeaderValue:
if( input == ' ' )
{
state = HeaderValue;
}
else
{
return ParsingError;
}
break;
case HeaderValue:
if( input == '\r' )
{
Response::HeaderItem &h = resp.headers.back();
if( strcasecmp(h.name.c_str(), "Content-Length") == 0 )
{
contentSize = atoi(h.value.c_str());
resp.content.reserve( contentSize );
}
else if( strcasecmp(h.name.c_str(), "Transfer-Encoding") == 0 )
{
if(strcasecmp(h.value.c_str(), "chunked") == 0)
chunked = true;
}
state = ExpectingNewline_2;
}
else if( isControl(input) )
{
return ParsingError;
}
else
{
resp.headers.back().value.push_back(input);
}
break;
case ExpectingNewline_2:
if( input == '\n' )
{
state = HeaderLineStart;
}
else
{
return ParsingError;
}
break;
case ExpectingNewline_3: {
std::vector<Response::HeaderItem>::iterator it = std::find_if(resp.headers.begin(),
resp.headers.end(),
checkIfConnection);
if( it != resp.headers.end() )
{
if( strcasecmp(it->value.c_str(), "Keep-Alive") == 0 )
{
resp.keepAlive = true;
}
else // == Close
{
resp.keepAlive = false;
}
}
else
{
if( resp.versionMajor > 1 || (resp.versionMajor == 1 && resp.versionMinor == 1) )
resp.keepAlive = true;
}
if( chunked )
{
state = ChunkSize;
}
else if( contentSize == 0 )
{
if( input == '\n')
return ParsingCompleted;
else
return ParsingError;
}
else
{
state = Post;
}
break;
}
case Post:
--contentSize;
resp.content.push_back(input);
if( contentSize == 0 )
{
return ParsingCompleted;
}
break;
case ChunkSize:
if( isalnum(input) )
{
chunkSizeStr.push_back(input);
}
else if( input == ';' )
{
state = ChunkExtensionName;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionName:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '=' )
{
state = ChunkExtensionValue;
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkExtensionValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine:
if( input == '\n' )
{
chunkSize = strtol(chunkSizeStr.c_str(), NULL, 16);
chunkSizeStr.clear();
resp.content.reserve(resp.content.size() + chunkSize);
if( chunkSize == 0 )
state = ChunkSizeNewLine_2;
else
state = ChunkData;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_2:
if( input == '\r' )
{
state = ChunkSizeNewLine_3;
}
else if( isalpha(input) )
{
state = ChunkTrailerName;
}
else
{
return ParsingError;
}
break;
case ChunkSizeNewLine_3:
if( input == '\n' )
{
return ParsingCompleted;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerName:
if( isalnum(input) )
{
// skip
}
else if( input == ':' )
{
state = ChunkTrailerValue;
}
else
{
return ParsingError;
}
break;
case ChunkTrailerValue:
if( isalnum(input) || input == ' ' )
{
// skip
}
else if( input == '\r' )
{
state = ChunkSizeNewLine;
}
else
{
return ParsingError;
}
break;
case ChunkData:
resp.content.push_back(input);
if( --chunkSize == 0 )
{
state = ChunkDataNewLine_1;
}
break;
case ChunkDataNewLine_1:
if( input == '\r' )
{
state = ChunkDataNewLine_2;
}
else
{
return ParsingError;
}
break;
case ChunkDataNewLine_2:
if( input == '\n' )
{
state = ChunkSize;
}
else
{
return ParsingError;
}
break;
default:
return ParsingError;
}
}
return ParsingIncompleted;
}
// Check if a byte is an HTTP character.
inline bool isChar(int c)
{
return c >= 0 && c <= 127;
}
// Check if a byte is an HTTP control character.
inline bool isControl(int c)
{
return (c >= 0 && c <= 31) || (c == 127);
}
// Check if a byte is defined as an HTTP special character.
inline bool isSpecial(int c)
{
switch (c)
{
case '(': case ')': case '<': case '>': case '@':
case ',': case ';': case ':': case '\\': case '"':
case '/': case '[': case ']': case '?': case '=':
case '{': case '}': case ' ': case '\t':
return true;
default:
return false;
}
}
// Check if a byte is a digit.
inline bool isDigit(int c)
{
return c >= '0' && c <= '9';
}
// The current state of the parser.
enum State
{
ResponseStatusStart,
ResponseHttpVersion_ht,
ResponseHttpVersion_htt,
ResponseHttpVersion_http,
ResponseHttpVersion_slash,
ResponseHttpVersion_majorStart,
ResponseHttpVersion_major,
ResponseHttpVersion_minorStart,
ResponseHttpVersion_minor,
ResponseHttpVersion_statusCodeStart,
ResponseHttpVersion_statusCode,
ResponseHttpVersion_statusTextStart,
ResponseHttpVersion_statusText,
ResponseHttpVersion_newLine,
HeaderLineStart,
HeaderLws,
HeaderName,
SpaceBeforeHeaderValue,
HeaderValue,
ExpectingNewline_2,
ExpectingNewline_3,
Post,
ChunkSize,
ChunkExtensionName,
ChunkExtensionValue,
ChunkSizeNewLine,
ChunkSizeNewLine_2,
ChunkSizeNewLine_3,
ChunkTrailerName,
ChunkTrailerValue,
ChunkDataNewLine_1,
ChunkDataNewLine_2,
ChunkData,
} state;
size_t contentSize;
std::string chunkSizeStr;
size_t chunkSize;
bool chunked;
};
} // namespace httpparser
#endif // HTTPPARSER_RESPONSEPARSER_H

57
src/request.h

@ -1,57 +0,0 @@
/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_REQUEST_H
#define HTTPPARSER_REQUEST_H
#include <string>
#include <vector>
#include <sstream>
namespace httpparser
{
struct Request {
Request()
: versionMajor(0), versionMinor(0), keepAlive(false)
{}
struct HeaderItem
{
std::string name;
std::string value;
};
std::string method;
std::string uri;
int versionMajor;
int versionMinor;
std::vector<HeaderItem> headers;
std::vector<unsigned char> content;
bool keepAlive;
std::string inspect() const
{
std::stringstream stream;
stream << method << " " << uri << " HTTP/"
<< versionMajor << "." << versionMinor << "\n";
for(std::vector<Request::HeaderItem>::const_iterator it = headers.begin();
it != headers.end(); ++it)
{
stream << it->name << ": " << it->value << "\n";
}
std::string data(content.begin(), content.end());
stream << data << "\n";
stream << "+ keep-alive: " << keepAlive << "\n";;
return stream.str();
}
};
} // namespace httpparser
#endif // HTTPPARSER_REQUEST_H

57
src/response.h

@ -1,57 +0,0 @@
/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_RESPONSE_H
#define HTTPPARSER_RESPONSE_H
#include <string>
#include <vector>
#include <sstream>
namespace httpparser
{
struct Response {
Response()
: versionMajor(0), versionMinor(0), keepAlive(false), statusCode(0)
{}
struct HeaderItem
{
std::string name;
std::string value;
};
int versionMajor;
int versionMinor;
std::vector<HeaderItem> headers;
std::vector<unsigned char> content;
bool keepAlive;
unsigned int statusCode;
std::string status;
std::string inspect() const
{
std::stringstream stream;
stream << "HTTP/" << versionMajor << "." << versionMinor
<< " " << statusCode << " " << status << "\n";
for(std::vector<Response::HeaderItem>::const_iterator it = headers.begin();
it != headers.end(); ++it)
{
stream << it->name << ": " << it->value << "\n";
}
std::string data(content.begin(), content.end());
stream << data << "\n";
return stream.str();
}
};
} // namespace httpparser
#endif // HTTPPARSER_RESPONSE_H

387
src/urlparser.h

@ -1,387 +0,0 @@
/*
* Copyright (C) Alex Nekipelov (alex@nekipelov.net)
* License: MIT
*/
#ifndef HTTPPARSER_URLPARSER_H
#define HTTPPARSER_URLPARSER_H
#include <string>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
namespace httpparser
{
class UrlParser
{
public:
UrlParser()
: valid(false)
{
}
explicit UrlParser(const std::string &url)
: valid(true)
{
parse(url);
}
bool parse(const std::string &str)
{
url = Url();
parse_(str);
return isValid();
}
bool isValid() const
{
return valid;
}
std::string scheme() const
{
assert( isValid() );
return url.scheme;
}
std::string username() const
{
assert( isValid() );
return url.username;
}
std::string password() const
{
assert( isValid() );
return url.password;
}
std::string hostname() const
{
assert( isValid() );
return url.hostname;
}
std::string port() const
{
assert( isValid() );
return url.port;
}
std::string path() const
{
assert( isValid() );
return url.path;
}
std::string query() const
{
assert( isValid() );
return url.query;
}
std::string fragment() const
{
assert( isValid() );
return url.fragment;
}
uint16_t httpPort() const
{
const uint16_t defaultHttpPort = 80;
const uint16_t defaultHttpsPort = 443;
assert( isValid() );
if( url.port.empty() )
{
if( scheme() == "https" )
return defaultHttpsPort;
else
return defaultHttpPort;
}
else
{
return url.integerPort;
}
}
struct Url
{
Url() : integerPort(0)
{}
std::string scheme;
std::string username;
std::string password;
std::string hostname;
std::string port;
std::string path;
std::string query;
std::string fragment;
uint16_t integerPort;
} url;
private:
bool isUnreserved(char ch) const
{
if( isalnum(ch) )
return true;
switch(ch)
{
case '-':
case '.':
case '_':
case '~':
return true;
}
return false;
}
void parse_(const std::string &str)
{
enum {
Scheme,
SlashAfterScheme1,
SlashAfterScheme2,
UsernameOrHostname,
Password,
Hostname,
IPV6Hostname,
PortOrPassword,
Port,
Path,
Query,
Fragment
} state = Scheme;
std::string usernameOrHostname;
std::string portOrPassword;
valid = true;
url.path = "/";
url.integerPort = 0;
for(size_t i = 0; i < str.size() && valid; ++i)
{
char ch = str[i];
switch(state)
{
case Scheme:
if( isalnum(ch) || ch == '+' || ch == '-' || ch == '.')
{
url.scheme += ch;
}
else if( ch == ':' )
{
state = SlashAfterScheme1;
}
else
{
valid = false;
url = Url();
}
break;
case SlashAfterScheme1:
if( ch == '/' )
{
state = SlashAfterScheme2;
}
else if( isalnum(ch) )
{
usernameOrHostname = ch;
state = UsernameOrHostname;
}
else
{
valid = false;
url = Url();
}
break;
case SlashAfterScheme2:
if( ch == '/' )
{
state = UsernameOrHostname;
}
else
{
valid = false;
url = Url();
}
break;
case UsernameOrHostname:
if( isUnreserved(ch) || ch == '%' )
{
usernameOrHostname += ch;
}
else if( ch == ':' )
{
state = PortOrPassword;
}
else if( ch == '@' )
{
state = Hostname;
std::swap(url.username, usernameOrHostname);
}
else if( ch == '/' )
{
state = Path;
std::swap(url.hostname, usernameOrHostname);
}
else
{
valid = false;
url = Url();
}
break;
case Password:
if( isalnum(ch) || ch == '%' )
{
url.password += ch;
}
else if( ch == '@' )
{
state = Hostname;
}
else
{
valid = false;
url = Url();
}
break;
case Hostname:
if( ch == '[' && url.hostname.empty() )
{
state = IPV6Hostname;
}
else if(isUnreserved(ch) || ch == '%')
{
url.hostname += ch;
}
else if(ch == ':')
{
state = Port;
}
else if(ch == '/')
{
state = Path;
}
else
{
valid = false;
url = Url();
}
break;
case IPV6Hostname:
break;
case PortOrPassword:
if( isdigit(ch) )
{
portOrPassword += ch;
}
else if( ch == '/' )
{
std::swap(url.hostname, usernameOrHostname);
std::swap(url.port, portOrPassword);
url.integerPort = atoi(url.port.c_str());
state = Path;
}
else if( isalnum(ch) || ch == '%' )
{
std::swap(url.username, usernameOrHostname);
std::swap(url.password, portOrPassword);
url.password += ch;
state = Password;
}
else
{
valid = false;
url = Url();
}
break;
case Port:
if( isdigit(ch) )
{
portOrPassword += ch;
}
else if(ch == '/')
{
std::swap(url.port, portOrPassword);
url.integerPort = atoi(url.port.c_str());
state = Path;
}
else
{
valid = false;
url = Url();
}
break;
case Path:
if( ch == '#')
{
state = Fragment;
}
else if( ch == '?' )
{
state = Query;
}
else
{
url.path += ch;
}
break;
case Query:
if( ch == '#')
{
state = Fragment;
}
else if( ch == '?' )
{
state = Query;
}
else
{
url.query += ch;
}
break;
case Fragment:
url.fragment += ch;
break;
}
}
assert(portOrPassword.empty());
if( !usernameOrHostname.empty() )
url.hostname = usernameOrHostname;
}
bool valid;
//
// struct Url
// {
// Url() : integerPort(0)
// {}
//
// std::string scheme;
// std::string username;
// std::string password;
// std::string hostname;
// std::string port;
// std::string path;
// std::string query;
// std::string fragment;
// uint16_t integerPort;
// } url;
};
} // namespace httpparser
#endif // HTTPPARSER_URLPARSER_H

5
tests/tinytest.R

@ -1,5 +0,0 @@
if ( requireNamespace("tinytest", quietly=TRUE) ){
tinytest::test_package("construe")
}
Loading…
Cancel
Save