Browse Source

a few new DSL & helper functions

master
boB Rudis 7 years ago
parent
commit
4a1320a595
  1. 44
      DESCRIPTION
  2. 6
      NAMESPACE
  3. 1
      R/aaa.r
  4. 4
      R/as_req.r
  5. 2
      R/content.r
  6. 10
      R/dsl.r
  7. 49
      R/helpers.r
  8. 18
      man/as_httr_req.Rd
  9. 18
      man/as_req.Rd
  10. 4
      man/get_content_size.Rd
  11. 16
      man/get_har_entry.Rd
  12. 14
      man/har_entries.Rd
  13. 14
      man/har_entry_count.Rd
  14. 14
      man/splash_har_reset.Rd

44
DESCRIPTION

@ -1,44 +1,46 @@
Package: splashr Package: splashr
Type: Package Type: Package
Title: Tools to Work with the 'Splash' JavaScript Rendering Service Title: Tools to Work with the 'Splash' 'JavaScript' Rendering Service
Version: 0.3.0 Version: 0.3.0
Date: 2017-02-14 Date: 2017-02-14
Encoding: UTF-8 Encoding: UTF-8
Author: Bob Rudis (bob@rud.is) Author: Bob Rudis (bob@rud.is)
Maintainer: Bob Rudis <bob@rud.is> Maintainer: Bob Rudis <bob@rud.is>
Description: 'Splash' <https://github.com/scrapinghub/splash> is a javascript rendering service. Description: 'Splash' <https://github.com/scrapinghub/splash> is a 'JavaScript' rendering service.
It’s a lightweight web browser with an 'HTTP' API, implemented in Python using 'Twisted' It’s a lightweight web browser with an 'HTTP' API, implemented in 'Python' using 'Twisted'
and 'QT' and provides some of the core functionality of the 'RSelenium' or 'seleniumPipes' and 'QT' and provides some of the core functionality of the 'RSelenium' or 'seleniumPipes'
R pacakges but with a Java-free footprint. The (twisted) 'QT' reactor is used to make the R pacakges but with a Java-free footprint. The (twisted) 'QT' reactor is used to make the
sever fully asynchronous allowing to take advantage of 'webkit' concurrency via QT main loop. sever fully asynchronous allowing to take advantage of 'webkit' concurrency via 'QT' main loop.
Some of Splash features include the ability to process multiple webpages in parallel; Some of 'Splash' features include the ability to process multiple webpages in parallel;
retrieving HTML results and/or take screenshots; disabling images or use Adblock Plus rules retrieving 'HTML' results and/or take screenshots; disabling images or use 'Adblock Plus' rules
to make rendering faster; executing custom JavaScript in page context; getting detailed to make rendering faster; executing custom 'JavaScript' in page context; getting detailed
rendering info in HAR format. rendering info in 'HAR' format.
URL: http://github.com/hrbrmstr/splashr URL: http://github.com/hrbrmstr/splashr
BugReports: https://github.com/hrbrmstr/splashr/issues BugReports: https://github.com/hrbrmstr/splashr/issues
License: AGPL License: AGPL
Suggests: Suggests:
testthat, testthat,
tibble tibble,
jpeg,
png
Depends: Depends:
R (>= 3.2.0) R (>= 3.2.0)
Imports: Imports:
purrr,
httr,
xml2, xml2,
jsonlite, curl,
magick, httr,
stringi,
clipr, clipr,
HARtools, purrr,
openssl,
lubridate,
formatR,
scales,
harbor,
stats, stats,
utils, utils,
curl harbor,
magick,
scales,
formatR,
openssl,
stringi,
jsonlite,
HARtools,
lubridate
RoxygenNote: 6.0.0 RoxygenNote: 6.0.0
Remotes: wch/harbor Remotes: wch/harbor

6
NAMESPACE

@ -6,16 +6,19 @@ export("%>%")
export(HARviewer) export(HARviewer)
export(HARviewerOutput) export(HARviewerOutput)
export(as_har) export(as_har)
export(as_req) export(as_httr_req)
export(as_response) export(as_response)
export(execute_lua) export(execute_lua)
export(get_body_size) export(get_body_size)
export(get_content_size) export(get_content_size)
export(get_content_type) export(get_content_type)
export(get_har_entry)
export(get_headers_size) export(get_headers_size)
export(get_request_type) export(get_request_type)
export(get_request_url) export(get_request_url)
export(get_response_body) export(get_response_body)
export(har_entries)
export(har_entry_count)
export(install_splash) export(install_splash)
export(is_binary) export(is_binary)
export(is_content_type) export(is_content_type)
@ -47,6 +50,7 @@ export(splash_debug)
export(splash_focus) export(splash_focus)
export(splash_go) export(splash_go)
export(splash_har) export(splash_har)
export(splash_har_reset)
export(splash_history) export(splash_history)
export(splash_html) export(splash_html)
export(splash_images) export(splash_images)

1
R/aaa.r

@ -7,7 +7,6 @@ trunc_string <- function (x, maxlen = 20, justify = "left") {
return(formatC(chopx, width = maxlen, flag = ifelse(justify == "left", "-", " "))) return(formatC(chopx, width = maxlen, flag = ifelse(justify == "left", "-", " ")))
} }
parse_query <- function(query) { parse_query <- function(query) {
params <- vapply(stri_split_regex(query, "&", omit_empty=TRUE)[[1]], params <- vapply(stri_split_regex(query, "&", omit_empty=TRUE)[[1]],
stri_split_fixed, "=", 2, simplify=TRUE, stri_split_fixed, "=", 2, simplify=TRUE,

4
R/as_req.r

@ -1,11 +1,11 @@
#' Create an httr function from an HAR request #' Create an httr verb request function from an HAR request
#' #'
#' @md #' @md
#' @param entry HAR entry #' @param entry HAR entry
#' @param quiet quiet #' @param quiet quiet
#' @param add_clip add clip #' @param add_clip add clip
#' @export #' @export
as_req <- function(entry, quiet=TRUE, add_clip=TRUE) { as_httr_req <- function(entry, quiet=TRUE, add_clip=TRUE) {
req <- entry$request req <- entry$request

2
R/content.r

@ -1,4 +1,4 @@
#' Retrieve size of content | body | headers #' Retrieve size of content | body | headers
#' #'
#' @param har_resp_obj HAR response object #' @param har_resp_obj HAR response object
#' @export #' @export

10
R/dsl.r

@ -229,6 +229,16 @@ splash_wait <- function(splash_obj, time=2) {
splash_obj splash_obj
} }
#' Drops all internally stored HAR records.
#'
#' @md
#' @param splash_obj splashr object
#' @export
splash_har_reset <- function(splash_obj, keys) {
splash_obj$calls <- c(splash_obj$calls, 'splash:har_reset()')
splash_obj
}
#' Return information about Splash interaction with a website in HAR format. #' Return information about Splash interaction with a website in HAR format.
#' #'
#' Similar to [render_har()] but used in a script context. Should be the LAST element in #' Similar to [render_har()] but used in a script context. Should be the LAST element in

49
R/helpers.r

@ -121,3 +121,52 @@ is_get <- function(har_resp_obj) { get_request_type(har_resp_obj) == "GET" }
#' @rdname get_request_type #' @rdname get_request_type
#' @export #' @export
is_post <- function(har_resp_obj) { get_request_type(har_resp_obj) == "POST" } is_post <- function(har_resp_obj) { get_request_type(har_resp_obj) == "POST" }
#' Retrieve just the HAR entries from a splashr request
#'
#' @param x can be a `har` object, `harlog` object or `harentries` object
#' @export
har_entries <- function(x) {
if (inherits(x, "har")) {
x$log$entries
} else if (inherits(x, "harlog")) {
x$entries
} else if (inherits(x, "harentries")) {
x
} else {
NULL
}
}
#' Retrieve an entry by index from a HAR object
#'
#' @param x can be a `har` object, `harlog` object or `harentries` object
#' @param i index of the HAR entry to retrieve
#' @export
get_har_entry <- function(x, i=1) {
if (inherits(x, "har")) {
x$log$entries[[i]]
} else if (inherits(x, "harlog")) {
x$entries[[i]]
} else if (inherits(x, "harentries")) {
x[[i]]
} else {
NULL
}
}
#' Retrieves number of HAR entries in a response
#'
#' @param x can be a `har` object, `harlog` object or `harentries` object
#' @export
har_entry_count <- function(x) {
if (inherits(x, "har")) {
length(x$log$entries)
} else if (inherits(x, "harlog")) {
length(x$entries)
} else if (inherits(x, "harentries")) {
length(x[[i]])
} else {
NULL
}
}

18
man/as_httr_req.Rd

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_req.r
\name{as_httr_req}
\alias{as_httr_req}
\title{Create an httr verb request function from an HAR request}
\usage{
as_httr_req(entry, quiet = TRUE, add_clip = TRUE)
}
\arguments{
\item{entry}{HAR entry}
\item{quiet}{quiet}
\item{add_clip}{add clip}
}
\description{
Create an httr verb request function from an HAR request
}

18
man/as_req.Rd

@ -1,18 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_req.r
\name{as_req}
\alias{as_req}
\title{Create an httr function from an HAR request}
\usage{
as_req(entry, quiet = TRUE, add_clip = TRUE)
}
\arguments{
\item{entry}{HAR entry}
\item{quiet}{quiet}
\item{add_clip}{add clip}
}
\description{
Create an httr function from an HAR request
}

4
man/get_content_size.Rd

@ -4,7 +4,7 @@
\alias{get_content_size} \alias{get_content_size}
\alias{get_body_size} \alias{get_body_size}
\alias{get_headers_size} \alias{get_headers_size}
\title{Retrieve size of content | body | headers} \title{Retrieve size of content | body | headers}
\usage{ \usage{
get_content_size(har_resp_obj) get_content_size(har_resp_obj)
@ -16,5 +16,5 @@ get_headers_size(har_resp_obj)
\item{har_resp_obj}{HAR response object} \item{har_resp_obj}{HAR response object}
} }
\description{ \description{
Retrieve size of content | body | headers Retrieve size of content | body | headers
} }

16
man/get_har_entry.Rd

@ -0,0 +1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
\name{get_har_entry}
\alias{get_har_entry}
\title{Retrieve an entry by index from a HAR object}
\usage{
get_har_entry(x, i = 1)
}
\arguments{
\item{x}{can be a `har` object, `harlog` object or `harentries` object}
\item{i}{index of the HAR entry to retrieve}
}
\description{
Retrieve an entry by index from a HAR object
}

14
man/har_entries.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
\name{har_entries}
\alias{har_entries}
\title{Retrieve just the HAR entries from a splashr request}
\usage{
har_entries(x)
}
\arguments{
\item{x}{can be a `har` object, `harlog` object or `harentries` object}
}
\description{
Retrieve just the HAR entries from a splashr request
}

14
man/har_entry_count.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
\name{har_entry_count}
\alias{har_entry_count}
\title{Retrieves number of HAR entries in a response}
\usage{
har_entry_count(x)
}
\arguments{
\item{x}{can be a `har` object, `harlog` object or `harentries` object}
}
\description{
Retrieves number of HAR entries in a response
}

14
man/splash_har_reset.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
\name{splash_har_reset}
\alias{splash_har_reset}
\title{Drops all internally stored HAR records.}
\usage{
splash_har_reset(splash_obj, keys)
}
\arguments{
\item{splash_obj}{splashr object}
}
\description{
Drops all internally stored HAR records.
}
Loading…
Cancel
Save