From 4a1320a5957e7cad5020c44f92def1ed4aaf30e1 Mon Sep 17 00:00:00 2001 From: boB Rudis Date: Fri, 24 Feb 2017 22:07:53 -0500 Subject: [PATCH] a few new DSL & helper functions --- DESCRIPTION | 44 +++++++++++++++++++++++--------------------- NAMESPACE | 6 +++++- R/aaa.r | 1 - R/as_req.r | 4 ++-- R/content.r | 2 +- R/dsl.r | 10 ++++++++++ R/helpers.r | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ man/as_httr_req.Rd | 18 ++++++++++++++++++ man/as_req.Rd | 18 ------------------ man/get_content_size.Rd | 4 ++-- man/get_har_entry.Rd | 16 ++++++++++++++++ man/har_entries.Rd | 14 ++++++++++++++ man/har_entry_count.Rd | 14 ++++++++++++++ man/splash_har_reset.Rd | 14 ++++++++++++++ 14 files changed, 168 insertions(+), 46 deletions(-) create mode 100644 man/as_httr_req.Rd delete mode 100644 man/as_req.Rd create mode 100644 man/get_har_entry.Rd create mode 100644 man/har_entries.Rd create mode 100644 man/har_entry_count.Rd create mode 100644 man/splash_har_reset.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 107d0dd..025399c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,44 +1,46 @@ Package: splashr Type: Package -Title: Tools to Work with the 'Splash' JavaScript Rendering Service +Title: Tools to Work with the 'Splash' 'JavaScript' Rendering Service Version: 0.3.0 Date: 2017-02-14 Encoding: UTF-8 Author: Bob Rudis (bob@rud.is) Maintainer: Bob Rudis -Description: 'Splash' is a javascript rendering service. - It’s a lightweight web browser with an 'HTTP' API, implemented in Python using 'Twisted' +Description: 'Splash' is a 'JavaScript' rendering service. + It’s a lightweight web browser with an 'HTTP' API, implemented in 'Python' using 'Twisted' and 'QT' and provides some of the core functionality of the 'RSelenium' or 'seleniumPipes' R pacakges but with a Java-free footprint. The (twisted) 'QT' reactor is used to make the - sever fully asynchronous allowing to take advantage of 'webkit' concurrency via QT main loop. - Some of Splash features include the ability to process multiple webpages in parallel; - retrieving HTML results and/or take screenshots; disabling images or use Adblock Plus rules - to make rendering faster; executing custom JavaScript in page context; getting detailed - rendering info in HAR format. + sever fully asynchronous allowing to take advantage of 'webkit' concurrency via 'QT' main loop. + Some of 'Splash' features include the ability to process multiple webpages in parallel; + retrieving 'HTML' results and/or take screenshots; disabling images or use 'Adblock Plus' rules + to make rendering faster; executing custom 'JavaScript' in page context; getting detailed + rendering info in 'HAR' format. URL: http://github.com/hrbrmstr/splashr BugReports: https://github.com/hrbrmstr/splashr/issues License: AGPL Suggests: testthat, - tibble + tibble, + jpeg, + png Depends: R (>= 3.2.0) Imports: - purrr, - httr, xml2, - jsonlite, - magick, - stringi, + curl, + httr, clipr, - HARtools, - openssl, - lubridate, - formatR, - scales, - harbor, + purrr, stats, utils, - curl + harbor, + magick, + scales, + formatR, + openssl, + stringi, + jsonlite, + HARtools, + lubridate RoxygenNote: 6.0.0 Remotes: wch/harbor diff --git a/NAMESPACE b/NAMESPACE index 67a8e18..ff480db 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,16 +6,19 @@ export("%>%") export(HARviewer) export(HARviewerOutput) export(as_har) -export(as_req) +export(as_httr_req) export(as_response) export(execute_lua) export(get_body_size) export(get_content_size) export(get_content_type) +export(get_har_entry) export(get_headers_size) export(get_request_type) export(get_request_url) export(get_response_body) +export(har_entries) +export(har_entry_count) export(install_splash) export(is_binary) export(is_content_type) @@ -47,6 +50,7 @@ export(splash_debug) export(splash_focus) export(splash_go) export(splash_har) +export(splash_har_reset) export(splash_history) export(splash_html) export(splash_images) diff --git a/R/aaa.r b/R/aaa.r index d8e67f2..a414d96 100644 --- a/R/aaa.r +++ b/R/aaa.r @@ -7,7 +7,6 @@ trunc_string <- function (x, maxlen = 20, justify = "left") { return(formatC(chopx, width = maxlen, flag = ifelse(justify == "left", "-", " "))) } - parse_query <- function(query) { params <- vapply(stri_split_regex(query, "&", omit_empty=TRUE)[[1]], stri_split_fixed, "=", 2, simplify=TRUE, diff --git a/R/as_req.r b/R/as_req.r index 25c9b69..528e5c1 100644 --- a/R/as_req.r +++ b/R/as_req.r @@ -1,11 +1,11 @@ -#' Create an httr function from an HAR request +#' Create an httr verb request function from an HAR request #' #' @md #' @param entry HAR entry #' @param quiet quiet #' @param add_clip add clip #' @export -as_req <- function(entry, quiet=TRUE, add_clip=TRUE) { +as_httr_req <- function(entry, quiet=TRUE, add_clip=TRUE) { req <- entry$request diff --git a/R/content.r b/R/content.r index c72d65b..60af224 100644 --- a/R/content.r +++ b/R/content.r @@ -1,4 +1,4 @@ -#' Retrieve size of content | body | headers +#' Retrieve size of content | body | headers #' #' @param har_resp_obj HAR response object #' @export diff --git a/R/dsl.r b/R/dsl.r index 1476f19..c52bb05 100644 --- a/R/dsl.r +++ b/R/dsl.r @@ -229,6 +229,16 @@ splash_wait <- function(splash_obj, time=2) { splash_obj } +#' Drops all internally stored HAR records. +#' +#' @md +#' @param splash_obj splashr object +#' @export +splash_har_reset <- function(splash_obj, keys) { + splash_obj$calls <- c(splash_obj$calls, 'splash:har_reset()') + splash_obj +} + #' Return information about Splash interaction with a website in HAR format. #' #' Similar to [render_har()] but used in a script context. Should be the LAST element in diff --git a/R/helpers.r b/R/helpers.r index 63f03de..0ce7078 100644 --- a/R/helpers.r +++ b/R/helpers.r @@ -121,3 +121,52 @@ is_get <- function(har_resp_obj) { get_request_type(har_resp_obj) == "GET" } #' @rdname get_request_type #' @export is_post <- function(har_resp_obj) { get_request_type(har_resp_obj) == "POST" } + +#' Retrieve just the HAR entries from a splashr request +#' +#' @param x can be a `har` object, `harlog` object or `harentries` object +#' @export +har_entries <- function(x) { + if (inherits(x, "har")) { + x$log$entries + } else if (inherits(x, "harlog")) { + x$entries + } else if (inherits(x, "harentries")) { + x + } else { + NULL + } +} + +#' Retrieve an entry by index from a HAR object +#' +#' @param x can be a `har` object, `harlog` object or `harentries` object +#' @param i index of the HAR entry to retrieve +#' @export +get_har_entry <- function(x, i=1) { + if (inherits(x, "har")) { + x$log$entries[[i]] + } else if (inherits(x, "harlog")) { + x$entries[[i]] + } else if (inherits(x, "harentries")) { + x[[i]] + } else { + NULL + } +} + +#' Retrieves number of HAR entries in a response +#' +#' @param x can be a `har` object, `harlog` object or `harentries` object +#' @export +har_entry_count <- function(x) { + if (inherits(x, "har")) { + length(x$log$entries) + } else if (inherits(x, "harlog")) { + length(x$entries) + } else if (inherits(x, "harentries")) { + length(x[[i]]) + } else { + NULL + } +} diff --git a/man/as_httr_req.Rd b/man/as_httr_req.Rd new file mode 100644 index 0000000..7e88a3b --- /dev/null +++ b/man/as_httr_req.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as_req.r +\name{as_httr_req} +\alias{as_httr_req} +\title{Create an httr verb request function from an HAR request} +\usage{ +as_httr_req(entry, quiet = TRUE, add_clip = TRUE) +} +\arguments{ +\item{entry}{HAR entry} + +\item{quiet}{quiet} + +\item{add_clip}{add clip} +} +\description{ +Create an httr verb request function from an HAR request +} diff --git a/man/as_req.Rd b/man/as_req.Rd deleted file mode 100644 index 0d3e90a..0000000 --- a/man/as_req.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/as_req.r -\name{as_req} -\alias{as_req} -\title{Create an httr function from an HAR request} -\usage{ -as_req(entry, quiet = TRUE, add_clip = TRUE) -} -\arguments{ -\item{entry}{HAR entry} - -\item{quiet}{quiet} - -\item{add_clip}{add clip} -} -\description{ -Create an httr function from an HAR request -} diff --git a/man/get_content_size.Rd b/man/get_content_size.Rd index e63943f..aa1c5b2 100644 --- a/man/get_content_size.Rd +++ b/man/get_content_size.Rd @@ -4,7 +4,7 @@ \alias{get_content_size} \alias{get_body_size} \alias{get_headers_size} -\title{Retrieve size of content | body | headers} +\title{Retrieve size of content | body | headers} \usage{ get_content_size(har_resp_obj) @@ -16,5 +16,5 @@ get_headers_size(har_resp_obj) \item{har_resp_obj}{HAR response object} } \description{ -Retrieve size of content | body | headers +Retrieve size of content | body | headers } diff --git a/man/get_har_entry.Rd b/man/get_har_entry.Rd new file mode 100644 index 0000000..d4d3ead --- /dev/null +++ b/man/get_har_entry.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.r +\name{get_har_entry} +\alias{get_har_entry} +\title{Retrieve an entry by index from a HAR object} +\usage{ +get_har_entry(x, i = 1) +} +\arguments{ +\item{x}{can be a `har` object, `harlog` object or `harentries` object} + +\item{i}{index of the HAR entry to retrieve} +} +\description{ +Retrieve an entry by index from a HAR object +} diff --git a/man/har_entries.Rd b/man/har_entries.Rd new file mode 100644 index 0000000..b5969f9 --- /dev/null +++ b/man/har_entries.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.r +\name{har_entries} +\alias{har_entries} +\title{Retrieve just the HAR entries from a splashr request} +\usage{ +har_entries(x) +} +\arguments{ +\item{x}{can be a `har` object, `harlog` object or `harentries` object} +} +\description{ +Retrieve just the HAR entries from a splashr request +} diff --git a/man/har_entry_count.Rd b/man/har_entry_count.Rd new file mode 100644 index 0000000..05b9102 --- /dev/null +++ b/man/har_entry_count.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.r +\name{har_entry_count} +\alias{har_entry_count} +\title{Retrieves number of HAR entries in a response} +\usage{ +har_entry_count(x) +} +\arguments{ +\item{x}{can be a `har` object, `harlog` object or `harentries` object} +} +\description{ +Retrieves number of HAR entries in a response +} diff --git a/man/splash_har_reset.Rd b/man/splash_har_reset.Rd new file mode 100644 index 0000000..6c2c49a --- /dev/null +++ b/man/splash_har_reset.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dsl.r +\name{splash_har_reset} +\alias{splash_har_reset} +\title{Drops all internally stored HAR records.} +\usage{ +splash_har_reset(splash_obj, keys) +} +\arguments{ +\item{splash_obj}{splashr object} +} +\description{ +Drops all internally stored HAR records. +}