Browse Source

a few new DSL & helper functions

master
boB Rudis 3 years ago
parent
commit
4a1320a595
14 changed files with 168 additions and 46 deletions
  1. +23
    -21
      DESCRIPTION
  2. +5
    -1
      NAMESPACE
  3. +0
    -1
      R/aaa.r
  4. +2
    -2
      R/as_req.r
  5. +1
    -1
      R/content.r
  6. +10
    -0
      R/dsl.r
  7. +49
    -0
      R/helpers.r
  8. +18
    -0
      man/as_httr_req.Rd
  9. +0
    -18
      man/as_req.Rd
  10. +2
    -2
      man/get_content_size.Rd
  11. +16
    -0
      man/get_har_entry.Rd
  12. +14
    -0
      man/har_entries.Rd
  13. +14
    -0
      man/har_entry_count.Rd
  14. +14
    -0
      man/splash_har_reset.Rd

+ 23
- 21
DESCRIPTION View File

@@ -1,44 +1,46 @@
Package: splashr
Type: Package
Title: Tools to Work with the 'Splash' JavaScript Rendering Service
Title: Tools to Work with the 'Splash' 'JavaScript' Rendering Service
Version: 0.3.0
Date: 2017-02-14
Encoding: UTF-8
Author: Bob Rudis (bob@rud.is)
Maintainer: Bob Rudis <bob@rud.is>
Description: 'Splash' <https://github.com/scrapinghub/splash> is a javascript rendering service.
It’s a lightweight web browser with an 'HTTP' API, implemented in Python using 'Twisted'
Description: 'Splash' <https://github.com/scrapinghub/splash> is a 'JavaScript' rendering service.
It’s a lightweight web browser with an 'HTTP' API, implemented in 'Python' using 'Twisted'
and 'QT' and provides some of the core functionality of the 'RSelenium' or 'seleniumPipes'
R pacakges but with a Java-free footprint. The (twisted) 'QT' reactor is used to make the
sever fully asynchronous allowing to take advantage of 'webkit' concurrency via QT main loop.
Some of Splash features include the ability to process multiple webpages in parallel;
retrieving HTML results and/or take screenshots; disabling images or use Adblock Plus rules
to make rendering faster; executing custom JavaScript in page context; getting detailed
rendering info in HAR format.
sever fully asynchronous allowing to take advantage of 'webkit' concurrency via 'QT' main loop.
Some of 'Splash' features include the ability to process multiple webpages in parallel;
retrieving 'HTML' results and/or take screenshots; disabling images or use 'Adblock Plus' rules
to make rendering faster; executing custom 'JavaScript' in page context; getting detailed
rendering info in 'HAR' format.
URL: http://github.com/hrbrmstr/splashr
BugReports: https://github.com/hrbrmstr/splashr/issues
License: AGPL
Suggests:
testthat,
tibble
tibble,
jpeg,
png
Depends:
R (>= 3.2.0)
Imports:
purrr,
httr,
xml2,
jsonlite,
magick,
stringi,
curl,
httr,
clipr,
HARtools,
openssl,
lubridate,
formatR,
scales,
harbor,
purrr,
stats,
utils,
curl
harbor,
magick,
scales,
formatR,
openssl,
stringi,
jsonlite,
HARtools,
lubridate
RoxygenNote: 6.0.0
Remotes: wch/harbor

+ 5
- 1
NAMESPACE View File

@@ -6,16 +6,19 @@ export("%>%")
export(HARviewer)
export(HARviewerOutput)
export(as_har)
export(as_req)
export(as_httr_req)
export(as_response)
export(execute_lua)
export(get_body_size)
export(get_content_size)
export(get_content_type)
export(get_har_entry)
export(get_headers_size)
export(get_request_type)
export(get_request_url)
export(get_response_body)
export(har_entries)
export(har_entry_count)
export(install_splash)
export(is_binary)
export(is_content_type)
@@ -47,6 +50,7 @@ export(splash_debug)
export(splash_focus)
export(splash_go)
export(splash_har)
export(splash_har_reset)
export(splash_history)
export(splash_html)
export(splash_images)


+ 0
- 1
R/aaa.r View File

@@ -7,7 +7,6 @@ trunc_string <- function (x, maxlen = 20, justify = "left") {
return(formatC(chopx, width = maxlen, flag = ifelse(justify == "left", "-", " ")))
}


parse_query <- function(query) {
params <- vapply(stri_split_regex(query, "&", omit_empty=TRUE)[[1]],
stri_split_fixed, "=", 2, simplify=TRUE,


+ 2
- 2
R/as_req.r View File

@@ -1,11 +1,11 @@
#' Create an httr function from an HAR request
#' Create an httr verb request function from an HAR request
#'
#' @md
#' @param entry HAR entry
#' @param quiet quiet
#' @param add_clip add clip
#' @export
as_req <- function(entry, quiet=TRUE, add_clip=TRUE) {
as_httr_req <- function(entry, quiet=TRUE, add_clip=TRUE) {

req <- entry$request



+ 1
- 1
R/content.r View File

@@ -1,4 +1,4 @@
#' Retrieve size of content | body | headers
#' Retrieve size of content | body | headers
#'
#' @param har_resp_obj HAR response object
#' @export


+ 10
- 0
R/dsl.r View File

@@ -229,6 +229,16 @@ splash_wait <- function(splash_obj, time=2) {
splash_obj
}

#' Drops all internally stored HAR records.
#'
#' @md
#' @param splash_obj splashr object
#' @export
splash_har_reset <- function(splash_obj, keys) {
splash_obj$calls <- c(splash_obj$calls, 'splash:har_reset()')
splash_obj
}

#' Return information about Splash interaction with a website in HAR format.
#'
#' Similar to [render_har()] but used in a script context. Should be the LAST element in


+ 49
- 0
R/helpers.r View File

@@ -121,3 +121,52 @@ is_get <- function(har_resp_obj) { get_request_type(har_resp_obj) == "GET" }
#' @rdname get_request_type
#' @export
is_post <- function(har_resp_obj) { get_request_type(har_resp_obj) == "POST" }

#' Retrieve just the HAR entries from a splashr request
#'
#' @param x can be a `har` object, `harlog` object or `harentries` object
#' @export
har_entries <- function(x) {
if (inherits(x, "har")) {
x$log$entries
} else if (inherits(x, "harlog")) {
x$entries
} else if (inherits(x, "harentries")) {
x
} else {
NULL
}
}

#' Retrieve an entry by index from a HAR object
#'
#' @param x can be a `har` object, `harlog` object or `harentries` object
#' @param i index of the HAR entry to retrieve
#' @export
get_har_entry <- function(x, i=1) {
if (inherits(x, "har")) {
x$log$entries[[i]]
} else if (inherits(x, "harlog")) {
x$entries[[i]]
} else if (inherits(x, "harentries")) {
x[[i]]
} else {
NULL
}
}

#' Retrieves number of HAR entries in a response
#'
#' @param x can be a `har` object, `harlog` object or `harentries` object
#' @export
har_entry_count <- function(x) {
if (inherits(x, "har")) {
length(x$log$entries)
} else if (inherits(x, "harlog")) {
length(x$entries)
} else if (inherits(x, "harentries")) {
length(x[[i]])
} else {
NULL
}
}

+ 18
- 0
man/as_httr_req.Rd View File

@@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_req.r
\name{as_httr_req}
\alias{as_httr_req}
\title{Create an httr verb request function from an HAR request}
\usage{
as_httr_req(entry, quiet = TRUE, add_clip = TRUE)
}
\arguments{
\item{entry}{HAR entry}

\item{quiet}{quiet}

\item{add_clip}{add clip}
}
\description{
Create an httr verb request function from an HAR request
}

+ 0
- 18
man/as_req.Rd View File

@@ -1,18 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_req.r
\name{as_req}
\alias{as_req}
\title{Create an httr function from an HAR request}
\usage{
as_req(entry, quiet = TRUE, add_clip = TRUE)
}
\arguments{
\item{entry}{HAR entry}

\item{quiet}{quiet}

\item{add_clip}{add clip}
}
\description{
Create an httr function from an HAR request
}

+ 2
- 2
man/get_content_size.Rd View File

@@ -4,7 +4,7 @@
\alias{get_content_size}
\alias{get_body_size}
\alias{get_headers_size}
\title{Retrieve size of content | body | headers}
\title{Retrieve size of content | body | headers}
\usage{
get_content_size(har_resp_obj)

@@ -16,5 +16,5 @@ get_headers_size(har_resp_obj)
\item{har_resp_obj}{HAR response object}
}
\description{
Retrieve size of content | body | headers
Retrieve size of content | body | headers
}

+ 16
- 0
man/get_har_entry.Rd View File

@@ -0,0 +1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
\name{get_har_entry}
\alias{get_har_entry}
\title{Retrieve an entry by index from a HAR object}
\usage{
get_har_entry(x, i = 1)
}
\arguments{
\item{x}{can be a `har` object, `harlog` object or `harentries` object}

\item{i}{index of the HAR entry to retrieve}
}
\description{
Retrieve an entry by index from a HAR object
}

+ 14
- 0
man/har_entries.Rd View File

@@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
\name{har_entries}
\alias{har_entries}
\title{Retrieve just the HAR entries from a splashr request}
\usage{
har_entries(x)
}
\arguments{
\item{x}{can be a `har` object, `harlog` object or `harentries` object}
}
\description{
Retrieve just the HAR entries from a splashr request
}

+ 14
- 0
man/har_entry_count.Rd View File

@@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
\name{har_entry_count}
\alias{har_entry_count}
\title{Retrieves number of HAR entries in a response}
\usage{
har_entry_count(x)
}
\arguments{
\item{x}{can be a `har` object, `harlog` object or `harentries` object}
}
\description{
Retrieves number of HAR entries in a response
}

+ 14
- 0
man/splash_har_reset.Rd View File

@@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
\name{splash_har_reset}
\alias{splash_har_reset}
\title{Drops all internally stored HAR records.}
\usage{
splash_har_reset(splash_obj, keys)
}
\arguments{
\item{splash_obj}{splashr object}
}
\description{
Drops all internally stored HAR records.
}

Loading…
Cancel
Save