Browse Source

make httr functions and response objects from HAR entries

master
boB Rudis 7 years ago
parent
commit
4ef5b7a1aa
  1. 8
      DESCRIPTION
  2. 13
      NAMESPACE
  3. 5
      NEWS.md
  4. 93
      R/as_req.r
  5. 36
      R/as_request.r
  6. 26
      R/content.r
  7. 45
      R/helpers.r
  8. 5
      R/splashr-package.R
  9. 10
      README.Rmd
  10. 14
      README.md
  11. 18
      man/as_req.Rd
  12. 29
      man/as_request.Rd
  13. 2
      man/get_content_size.Rd
  14. 20
      man/get_request_type.Rd
  15. 14
      man/get_request_url.Rd
  16. 17
      man/get_response_body.Rd

8
DESCRIPTION

@ -1,7 +1,7 @@
Package: splashr
Type: Package
Title: Tools to Work with the 'Splash' JavaScript Rendering Service
Version: 0.2.0
Version: 0.3.0
Date: 2017-02-14
Encoding: UTF-8
Author: Bob Rudis (bob@rud.is)
@ -29,6 +29,10 @@ Imports:
xml2,
jsonlite,
magick,
HARtools
stringi,
clipr,
HARtools,
openssl,
lubridate
RoxygenNote: 6.0.0
Remotes: wch/harbor

13
NAMESPACE

@ -6,15 +6,21 @@ export("%>%")
export(HARviewer)
export(HARviewerOutput)
export(as_har)
export(as_req)
export(as_request)
export(execute_lua)
export(get_body_size)
export(get_content_size)
export(get_content_type)
export(get_headers_size)
export(get_request_type)
export(get_request_url)
export(get_response_body)
export(install_splash)
export(is_binary)
export(is_content_type)
export(is_css)
export(is_get)
export(is_gif)
export(is_html)
export(is_javascript)
@ -22,6 +28,7 @@ export(is_jpeg)
export(is_json)
export(is_plain)
export(is_png)
export(is_post)
export(is_svg)
export(is_xhr)
export(is_xml)
@ -55,6 +62,12 @@ importFrom(HARtools,HARviewer)
importFrom(HARtools,HARviewerOutput)
importFrom(HARtools,renderHARviewer)
importFrom(HARtools,writeHAR)
importFrom(clipr,read_clip)
importFrom(jsonlite,fromJSON)
importFrom(lubridate,ymd_hms)
importFrom(openssl,base64_decode)
importFrom(stringi,stri_detect_regex)
importFrom(stringi,stri_split_fixed)
importFrom(stringi,stri_split_regex)
importFrom(xml2,read_html)
importFrom(xml2,url_parse)

5
NEWS.md

@ -1,3 +1,8 @@
0.3.0
* added `as_req()`
* added `as_request()`
0.2.0
* added `execute`()

93
R/as_req.r

@ -0,0 +1,93 @@
#' Create an httr function from an HAR request
#'
#' @md
#' @param entry HAR entry
#' @param quiet quiet
#' @param add_clip add clip
#' @export
as_req <- function(entry, quiet=TRUE, add_clip=TRUE) {
req <- entry$request
req$headers <- purrr::map(req$headers, "value") %>%
setNames(map_chr(req$headers, "name"))
ml <- getOption("deparse.max.lines")
options(deparse.max.lines=10000)
template <- "httr::VERB(verb = '%s', url = '%s' %s%s%s%s%s%s)"
hdrs <- enc <- bdy <- ckies <- auth <- verbos <- cfg <- ""
if (length(req$headers) > 0) {
# try to determine encoding
ct_idx <- which(grepl("content-type", names(req$headers), ignore.case=TRUE))
if (length(ct_idx) > 0) {
# retrieve & delete the content type
ct <- req$headers[[ct_idx]]
req$headers[[ct_idx]] <- NULL
if (stringi::stri_detect_regex(ct, "multipart")) {
enc <- ", encode = 'multipart'"
} else if (stringi::stri_detect_regex(ct, "form")) {
enc <- ", encode = 'form'"
} else if (stringi::stri_detect_regex(ct, "json")) {
enc <- ", encode = 'json'"
} else {
enc <- ""
}
}
hdrs <- paste0(capture.output(dput(req$headers, control=NULL)),
collapse="")
hdrs <- sub("^list", ", httr::add_headers", hdrs)
}
if (length(req$data) > 0) {
bdy_bits <- paste0(capture.output(dput(parse_query(req$data), control=NULL)),
collapse="")
bdy <- sprintf(", body = %s", bdy_bits)
}
if (length(req$url_parts$username) > 0) {
auth <- sprintf(", httr::authenticate(user='%s', password='%s')",
req$url_parts$username, req$url_parts$password)
}
if (length(req$verbose) > 0) {
verbos <- ", httr::verbose()"
}
if (length(req$cookies) > 0) {
ckies <- paste0(capture.output(dput(req$cookies, control=NULL)),
collapse="")
ckies <- sub("^list", ", httr::set_cookies", ckies)
}
REQ_URL <- req$url
out <- sprintf(template, toupper(req$method), REQ_URL, auth, verbos, hdrs, ckies, bdy, enc)
# this does a half-decent job formatting the R function text
fil <- tempfile(fileext=".R")
on.exit(unlink(fil))
formatR::tidy_source(text=out, width.cutoff=30, indent=4, file=fil)
tmp <- paste0(readLines(fil), collapse="\n")
if (add_clip) clipr::write_clip(tmp)
if (!quiet) cat(tmp, "\n")
# make a bona fide R function
f <- function() {}
formals(f) <- NULL
environment(f) <- parent.frame()
body(f) <- as.expression(parse(text=tmp))
options(deparse.max.lines=ml)
return(f)
}

36
R/as_request.r

@ -0,0 +1,36 @@
#' Return a HAR entry response as an httr::response object
#'
#' @param har_entry a HAR object (should contain a response body to be most useful)
#' @export
#' @examples \dontrun{
#' URL <- "http://www.svs.cl/portal/principal/605/w3-propertyvalue-18554.html"
#'
#' splash_local %>%
#' splash_response_body(TRUE) %>%
#' splash_go(URL) %>%
#' splash_wait(2) %>%
#' splash_har() -> har
#'
#' keep(har$log$entries, is_xhr) %>%
#' map(as_request) %>%
#' map(httr::content, as="parsed")
#' }
as_request <- function(har_entry) {
if (length(har_entry$response$content$text) > 0) {
content_body <- openssl::base64_decode(har_entry$response$content$text)
} else {
content_body <- NULL
}
structure(list(
url = har_entry$request$url,
status_code = har_entry$response$status,
date = lubridate::ymd_hms(har_entry$startedDateTime),
headers = setNames(map(har_entry$response$headers, "value"),
map(har_entry$response$headers, "name")) %>%
insensitive(),
content = content_body
), class="response")
}

26
R/content.r

@ -0,0 +1,26 @@
#' Retrieve size of content | body | headers
#'
#' @param har_resp_obj HAR response object
#' @export
get_content_size <- function(har_resp_obj) {
csize <- har_resp_obj$response$content$size
if (is.null(csize)) return(NA_real_)
return(as.numeric(csize))
}
#' @rdname get_content_size
#' @export
get_body_size <- function(har_resp_obj) {
bsize <- har_resp_obj$response$bodySize
if (is.null(bsize)) return(NA_real_)
return(as.numeric(bsize))
}
#' @rdname get_content_size
#' @export
get_headers_size <- function(har_resp_obj) {
hsize <- har_resp_obj$response$headersSize
if (is.null(hsize)) return(NA_real_)
return(as.numeric(hsize))
}

45
R/helpers.r

@ -1,3 +1,15 @@
#' Retrieve the body content of a HAR entry
#'
#' @md
#' @param har_resp_obj HAR response object
#' @return A `raw` vector of the content or `NULL`
#' @export
get_response_body <- function(har_resp_obj) {
resp <- har_resp_obj$response$content$text
if (resp == "") return(NULL)
openssl::base64_decode(resp)
}
#' Retrieve or test content type of a HAR request object
#'
#' @param har_resp_obj HAR response object
@ -82,29 +94,30 @@ is_xhr <- function(x) {
}
#' Retrieve size of content | body | headers
#' Retrieve request URL
#'
#' @param har_resp_obj HAR response object
#' @export
get_content_size <- function(har_resp_obj) {
csize <- har_resp_obj$response$content$size
if (is.null(csize)) return(NA_real_)
return(as.numeric(csize))
get_request_url <- function(har_resp_obj) {
utype <- har_resp_obj$request$url
if (utype == "") return(NA_character_)
utype
}
#' @rdname get_content_size
#' Retrieve or test request type
#'
#' @param har_resp_obj HAR response object
#' @export
get_body_size <- function(har_resp_obj) {
bsize <- har_resp_obj$response$bodySize
if (is.null(bsize)) return(NA_real_)
return(as.numeric(bsize))
get_request_type <- function(har_resp_obj) {
rtype <- har_resp_obj$request$method
if (rtype == "") return(NA_character_)
rtype
}
#' @rdname get_content_size
#' @rdname get_request_type
#' @export
get_headers_size <- function(har_resp_obj) {
hsize <- har_resp_obj$response$headersSize
if (is.null(hsize)) return(NA_real_)
return(as.numeric(hsize))
}
is_get <- function(har_resp_obj) { get_requet_type(har_resp_obj) == "GET" }
#' @rdname get_request_type
#' @export
is_post <- function(har_resp_obj) { get_requet_type(har_resp_obj) == "POST" }

5
R/splashr-package.R

@ -15,9 +15,14 @@
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import purrr httr magick harbor
#' @importFrom stringi stri_split_regex stri_split_fixed stri_detect_regex
#' @importFrom HARtools writeHAR HARviewer renderHARviewer HARviewerOutput
#' @importFrom xml2 read_html url_parse
#' @importFrom jsonlite fromJSON
#' @importFrom openssl base64_decode
#' @importFrom clipr read_clip
#' @importFrom lubridate ymd_hms
NULL
#' splashr exported operators

10
README.Rmd

@ -49,10 +49,7 @@ The following functions are implemented:
- `start_splash`: Start a Splash server Docker container
- `stop_splash`: Stop a running a Splash server Docker container
Mini-DSL (domain-specific language). These can be used to create a "script" without actually
scripting in Lua. They are a less-powerful/configurable set of calls than what you
can make with a full Lua function but the idea is to have it take care of very common but
simple use-cases, like waiting a period of time before capturing a HAR/HTML/PNG image of a site:
Mini-DSL (domain-specific language). These can be used to create a "script" without actually scripting in Lua. They are a less-powerful/configurable set of calls than what you can make with a full Lua function but the idea is to have it take care of very common but simple use-cases, like waiting a period of time before capturing a HAR/HTML/PNG image of a site:
- `splash_plugins`: Enable or disable browser plugins (e.g. Flash).
- `splash_images`: Enable/disable images
@ -63,6 +60,11 @@ simple use-cases, like waiting a period of time before capturing a HAR/HTML/PNG
- `splash_html`: Return a HTML snapshot of a current page.
- `splash_png`: Return a screenshot of a current page in PNG format.
`httr` helpers. These help turn various bits of `splashr` objects into `httr`-ish things:
- `as_req`: Turn a HAR response entry into a working `httr` function you can use to make a request with
- `as_request`: Turn a HAR response entry into an `httr` `response`-like object (i.e. you can use `httr::content()` on it)
Helpers:
- `get_body_size`: Retrieve size of content | body | headers

14
README.md

@ -46,10 +46,7 @@ The following functions are implemented:
- `start_splash`: Start a Splash server Docker container
- `stop_splash`: Stop a running a Splash server Docker container
Mini-DSL (domain-specific language). These can be used to create a "script" without actually
scripting in Lua. They are a less-powerful/configurable set of calls than what you
can make with a full Lua function but the idea is to have it take care of very common but
simple use-cases, like waiting a period of time before capturing a HAR/HTML/PNG image of a site:
Mini-DSL (domain-specific language). These can be used to create a "script" without actually scripting in Lua. They are a less-powerful/configurable set of calls than what you can make with a full Lua function but the idea is to have it take care of very common but simple use-cases, like waiting a period of time before capturing a HAR/HTML/PNG image of a site:
- `splash_plugins`: Enable or disable browser plugins (e.g. Flash).
- `splash_images`: Enable/disable images
@ -60,6 +57,11 @@ simple use-cases, like waiting a period of time before capturing a HAR/HTML/PNG
- `splash_html`: Return a HTML snapshot of a current page.
- `splash_png`: Return a screenshot of a current page in PNG format.
`httr` helpers. These help turn various bits of `splashr` objects into `httr`-ish things:
- `as_req`: Turn a HAR response entry into a working `httr` function you can use to make a request with
- `as_request`: Turn a HAR response entry into an `httr` `response`-like object (i.e. you can use `httr::content()` on it)
Helpers:
- `get_body_size`: Retrieve size of content | body | headers
@ -119,7 +121,7 @@ library(tidyverse)
packageVersion("splashr")
```
## [1] '0.2.0'
## [1] '0.3.0'
``` r
splash("splash", 8050L) %>%
@ -292,7 +294,7 @@ library(testthat)
date()
```
## [1] "Tue Feb 14 09:02:35 2017"
## [1] "Tue Feb 15 09:02:35 2017"
``` r
test_dir("tests/")

18
man/as_req.Rd

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_req.r
\name{as_req}
\alias{as_req}
\title{Create an httr function from an HAR request}
\usage{
as_req(entry, quiet = TRUE, add_clip = TRUE)
}
\arguments{
\item{entry}{HAR entry}
\item{quiet}{quiet}
\item{add_clip}{add clip}
}
\description{
Create an httr function from an HAR request
}

29
man/as_request.Rd

@ -0,0 +1,29 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_request.r
\name{as_request}
\alias{as_request}
\title{Return a HAR entry response as an httr::response object}
\usage{
as_request(har_entry)
}
\arguments{
\item{har_entry}{a HAR object (should contain a response body to be most useful)}
}
\description{
Return a HAR entry response as an httr::response object
}
\examples{
\dontrun{
URL <- "http://www.svs.cl/portal/principal/605/w3-propertyvalue-18554.html"
splash_local \%>\%
splash_response_body(TRUE) \%>\%
splash_go(URL) \%>\%
splash_wait(2) \%>\%
splash_har() -> har
keep(har$log$entries, is_xhr) \%>\%
map(as_request) \%>\%
map(httr::content, as="parsed")
}
}

2
man/get_content_size.Rd

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
% Please edit documentation in R/content.r
\name{get_content_size}
\alias{get_content_size}
\alias{get_body_size}

20
man/get_request_type.Rd

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
\name{get_request_type}
\alias{get_request_type}
\alias{is_get}
\alias{is_post}
\title{Retrieve or test request type}
\usage{
get_request_type(har_resp_obj)
is_get(har_resp_obj)
is_post(har_resp_obj)
}
\arguments{
\item{har_resp_obj}{HAR response object}
}
\description{
Retrieve or test request type
}

14
man/get_request_url.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
\name{get_request_url}
\alias{get_request_url}
\title{Retrieve request URL}
\usage{
get_request_url(har_resp_obj)
}
\arguments{
\item{har_resp_obj}{HAR response object}
}
\description{
Retrieve request URL
}

17
man/get_response_body.Rd

@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
\name{get_response_body}
\alias{get_response_body}
\title{Retrieve the body content of a HAR entry}
\usage{
get_response_body(har_resp_obj)
}
\arguments{
\item{har_resp_obj}{HAR response object}
}
\value{
A \code{raw} vector of the content or \code{NULL}
}
\description{
Retrieve the body content of a HAR entry
}
Loading…
Cancel
Save