Browse Source

new functions

master
boB Rudis 3 years ago
parent
commit
372ed67fda
21 changed files with 271 additions and 2 deletions
  1. +2
    -1
      DESCRIPTION
  2. +17
    -0
      NAMESPACE
  3. +3
    -0
      R/as_request.r
  4. +81
    -0
      R/dsl.r
  5. +1
    -0
      R/splashr-package.R
  6. +42
    -1
      R/splashr.r
  7. +1
    -0
      README.Rmd
  8. +1
    -0
      README.md
  9. +3
    -0
      man/as_request.Rd
  10. +1
    -0
      man/splash_go.Rd
  11. +1
    -0
      man/splash_har.Rd
  12. +14
    -0
      man/splash_history.Rd
  13. +1
    -0
      man/splash_html.Rd
  14. +1
    -0
      man/splash_images.Rd
  15. +14
    -0
      man/splash_perf_stats.Rd
  16. +1
    -0
      man/splash_plugins.Rd
  17. +1
    -0
      man/splash_png.Rd
  18. +1
    -0
      man/splash_response_body.Rd
  19. +70
    -0
      man/splash_user_agent.Rd
  20. +14
    -0
      man/splash_version.Rd
  21. +1
    -0
      man/splash_wait.Rd

+ 2
- 1
DESCRIPTION View File

@@ -33,6 +33,7 @@ Imports:
clipr,
HARtools,
openssl,
lubridate
lubridate,
scales
RoxygenNote: 6.0.0
Remotes: wch/harbor

+ 17
- 0
NAMESPACE View File

@@ -44,15 +44,31 @@ export(splash_active)
export(splash_debug)
export(splash_go)
export(splash_har)
export(splash_history)
export(splash_html)
export(splash_images)
export(splash_local)
export(splash_perf_stats)
export(splash_plugins)
export(splash_png)
export(splash_response_body)
export(splash_user_agent)
export(splash_version)
export(splash_wait)
export(start_splash)
export(stop_splash)
export(ua_ios_safari)
export(ua_linux_chrome)
export(ua_linux_firefox)
export(ua_macos_chrome)
export(ua_macos_safari)
export(ua_splashr)
export(ua_win10_chrome)
export(ua_win10_firefox)
export(ua_win10_ie11)
export(ua_win7_chrome)
export(ua_win7_firefox)
export(ua_win7_ie11)
export(writeHAR)
import(harbor)
import(httr)
@@ -66,6 +82,7 @@ importFrom(clipr,read_clip)
importFrom(jsonlite,fromJSON)
importFrom(lubridate,ymd_hms)
importFrom(openssl,base64_decode)
importFrom(scales,comma)
importFrom(stringi,stri_detect_regex)
importFrom(stringi,stri_split_fixed)
importFrom(stringi,stri_split_regex)


+ 3
- 0
R/as_request.r View File

@@ -3,10 +3,13 @@
#' @param har_entry a HAR object (should contain a response body to be most useful)
#' @export
#' @examples \dontrun{
#' library(purrr)
#'
#' URL <- "http://www.svs.cl/portal/principal/605/w3-propertyvalue-18554.html"
#'
#' splash_local %>%
#' splash_response_body(TRUE) %>%
#' splash_user_agent(ua_macos_chrome) %>%
#' splash_go(URL) %>%
#' splash_wait(2) %>%
#' splash_har() -> har


+ 81
- 0
R/dsl.r View File

@@ -20,6 +20,7 @@ end
#' @examples \dontrun{
#' splash_local %>%
#' splash_response_body(TRUE) %>%
#' splash_user_agent(ua_macos_chrome) %>%
#' splash_go("https://rud.is/b") %>%
#' splash_wait(2) %>%
#' splash_har() -> rud_har
@@ -40,6 +41,7 @@ splash_response_body <- function(splash_obj, enable=FALSE) {
#' @examples \dontrun{
#' splash_local %>%
#' splash_plugins(TRUE) %>%
#' splash_user_agent(ua_macos_chrome) %>%
#' splash_go("https://rud.is/b") %>%
#' splash_wait(2) %>%
#' splash_har() -> rud_har
@@ -63,6 +65,7 @@ splash_plugins <- function(splash_obj, enable=FALSE) {
#' @examples \dontrun{
#' splash_local %>%
#' splash_images(TRUE) %>%
#' splash_user_agent(ua_macos_chrome) %>%
#' splash_go("https://rud.is/b") %>%
#' splash_wait(2) %>%
#' splash_har() -> rud_har
@@ -84,6 +87,7 @@ splash_images <- function(splash_obj, enable=TRUE) {
#' @examples \dontrun{
#' splash_local %>%
#' splash_response_body(TRUE) %>%
#' splash_user_agent(ua_macos_chrome) %>%
#' splash_go("https://rud.is/b") %>%
#' splash_wait(2) %>%
#' splash_har() -> rud_har
@@ -106,6 +110,7 @@ splash_go <- function(splash_obj, url) {
#' @examples \dontrun{
#' splash_local %>%
#' splash_response_body(TRUE) %>%
#' splash_user_agent(ua_macos_chrome) %>%
#' splash_go("https://rud.is/b") %>%
#' splash_wait(2) %>%
#' splash_har() -> rud_har
@@ -126,6 +131,7 @@ splash_wait <- function(splash_obj, time=2) {
#' @examples \dontrun{
#' splash_local %>%
#' splash_response_body(TRUE) %>%
#' splash_user_agent(ua_macos_chrome) %>%
#' splash_go("https://rud.is/b") %>%
#' splash_wait(2) %>%
#' splash_har() -> rud_har
@@ -153,6 +159,7 @@ splash_har <- function(splash_obj) {
#' @examples \dontrun{
#' splash_local %>%
#' splash_response_body(TRUE) %>%
#' splash_user_agent(ua_macos_chrome) %>%
#' splash_go("https://rud.is/b") %>%
#' splash_wait(2) %>%
#' splash_html() -> rud_pg
@@ -182,6 +189,7 @@ splash_html <- function(splash_obj, raw_html=FALSE) {
#' @export
#' @examples \dontrun{
#' splash_local %>%
#' splash_user_agent(ua_macos_chrome) %>%
#' splash_go("https://rud.is/b") %>%
#' splash_wait(2) %>%
#' splash_png()
@@ -198,3 +206,76 @@ splash_png <- function(splash_obj) {

}

#' Overwrite the User-Agent header for all further requests.
#'
#' There are a few built-in user agents, all beginning with `ua_`.
#'
#' @md
#' @param splash_obj splashr object
#' @param user_agent 1 element character vector, defaults to `splashr/#.#.#`.
#' @export
#' @examples \dontrun{
#' library(rvest)
#'
#' URL <- "https://httpbin.org/user-agent"
#'
#' splash_local %>%
#' splash_response_body(TRUE) %>%
#' splash_user_agent(ua_macos_chrome) %>%
#' splash_go(URL) %>%
#' splash_html() %>%
#' html_text("body") %>%
#' jsonlite::fromJSON()
#' }
splash_user_agent <- function(splash_obj, user_agent=ua_splashr) {
splash_obj$calls <- c(splash_obj$calls, sprintf('splash:set_user_agent("%s")', user_agent))
splash_obj
}

#' @rdname splash_user_agent
#' @export
ua_splashr <- sprintf("splashr/%s", packageVersion("splashr"))

#' @rdname splash_user_agent
#' @export
ua_win10_chrome <- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"

#' @rdname splash_user_agent
#' @export
ua_win10_firefox <- "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"

#' @rdname splash_user_agent
#' @export
ua_win10_ie11 <- "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"

#' @rdname splash_user_agent
#' @export
ua_win7_chrome <- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"

#' @rdname splash_user_agent
#' @export
ua_win7_firefox <- "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"

#' @rdname splash_user_agent
#' @export
ua_win7_ie11 <- "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"

#' @rdname splash_user_agent
#' @export
ua_macos_chrome <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"

#' @rdname splash_user_agent
#' @export
ua_macos_safari <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12"

#' @rdname splash_user_agent
#' @export
ua_linux_chrome <- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"

#' @rdname splash_user_agent
#' @export
ua_linux_firefox <- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0"

#' @rdname splash_user_agent
#' @export
ua_ios_safari <- "Mozilla/5.0 (iPad; CPU OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1"

+ 1
- 0
R/splashr-package.R View File

@@ -22,6 +22,7 @@
#' @importFrom openssl base64_decode
#' @importFrom clipr read_clip
#' @importFrom lubridate ymd_hms
#' @importFrom scales comma

NULL



+ 42
- 1
R/splashr.r View File

@@ -33,7 +33,8 @@ splash_active <- function(splash_obj) {

out$url <- splash_url(splash_obj)

message(sprintf("Status of splash instance on [%s]: %s. Max RSS: %s\n", out$url, out$status, out$maxrss))
message(sprintf("Status of splash instance on [%s]: %s. Max RSS: %s Mb\n",
out$url, out$status, scales::comma(out$maxrss/1024/1024)))

if ("status" %in% names(out)) return(out$status == "ok")

@@ -41,6 +42,46 @@ splash_active <- function(splash_obj) {

}

#' Get Splash version information
#'
#' @param splash_obj A splash connection object
#' @export
splash_version <- function(splash_obj) {
execute_lua(splash_obj, '
function main(splash)
return splash:get_version()
end
') -> res
jsonlite::fromJSON(rawToChar(res))
}

#' Get information about requests/responses for the pages loaded
#'
#' @param splash_obj A splash connection object
#' @export
splash_history <- function(splash_obj) {
execute_lua(splash_obj, '
function main(splash)
return splash:history()
end
') -> res
jsonlite::fromJSON(rawToChar(res))
}


#' Get Splash performance-related statistics
#'
#' @param splash_obj A splash connection object
#' @export
splash_perf_stats <- function(splash_obj) {
execute_lua(splash_obj, '
function main(splash)
return splash:get_perf_stats()
end
') -> res
jsonlite::fromJSON(rawToChar(res))
}

#' Retrieve debug-level info for a Splash server
#'
#' @param splash_obj A splash connection object


+ 1
- 0
README.Rmd View File

@@ -59,6 +59,7 @@ Mini-DSL (domain-specific language). These can be used to create a "script" with
- `splash_har`: Return information about Splash interaction with a website in HAR format.
- `splash_html`: Return a HTML snapshot of a current page.
- `splash_png`: Return a screenshot of a current page in PNG format.
- `splash_user_agent: Overwrite the User-Agent header for all further requests. NOTE: There are many "helper" user agent strings to go with `splash_user_agent`. Look for objects in `splashr` starting with `ua_`.

`httr` helpers. These help turn various bits of `splashr` objects into `httr`-ish things:



+ 1
- 0
README.md View File

@@ -56,6 +56,7 @@ Mini-DSL (domain-specific language). These can be used to create a "script" with
- `splash_har`: Return information about Splash interaction with a website in HAR format.
- `splash_html`: Return a HTML snapshot of a current page.
- `splash_png`: Return a screenshot of a current page in PNG format.
- `splash_user_agent: Overwrite the User-Agent header for all further requests. NOTE: There are many "helper" user agent strings to go with `splash_user_agent`. Look for objects in `splashr` starting with `ua_`.

`httr` helpers. These help turn various bits of `splashr` objects into `httr`-ish things:



+ 3
- 0
man/as_request.Rd View File

@@ -14,10 +14,13 @@ Return a HAR entry response as an httr::response object
}
\examples{
\dontrun{
library(purrr)

URL <- "http://www.svs.cl/portal/principal/605/w3-propertyvalue-18554.html"

splash_local \%>\%
splash_response_body(TRUE) \%>\%
splash_user_agent(ua_macos_chrome) \%>\%
splash_go(URL) \%>\%
splash_wait(2) \%>\%
splash_har() -> har


+ 1
- 0
man/splash_go.Rd View File

@@ -19,6 +19,7 @@ until page loads.
\dontrun{
splash_local \%>\%
splash_response_body(TRUE) \%>\%
splash_user_agent(ua_macos_chrome) \%>\%
splash_go("https://rud.is/b") \%>\%
splash_wait(2) \%>\%
splash_har() -> rud_har


+ 1
- 0
man/splash_har.Rd View File

@@ -17,6 +17,7 @@ a DSL script chain as this will execute the script and return the HAR content
\dontrun{
splash_local \%>\%
splash_response_body(TRUE) \%>\%
splash_user_agent(ua_macos_chrome) \%>\%
splash_go("https://rud.is/b") \%>\%
splash_wait(2) \%>\%
splash_har() -> rud_har


+ 14
- 0
man/splash_history.Rd View File

@@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/splashr.r
\name{splash_history}
\alias{splash_history}
\title{Get information about requests/responses for the pages loaded}
\usage{
splash_history(splash_obj)
}
\arguments{
\item{splash_obj}{A splash connection object}
}
\description{
Get information about requests/responses for the pages loaded
}

+ 1
- 0
man/splash_html.Rd View File

@@ -19,6 +19,7 @@ a DSL script chain as this will execute the script and return the HTML content
\dontrun{
splash_local \%>\%
splash_response_body(TRUE) \%>\%
splash_user_agent(ua_macos_chrome) \%>\%
splash_go("https://rud.is/b") \%>\%
splash_wait(2) \%>\%
splash_html() -> rud_pg


+ 1
- 0
man/splash_images.Rd View File

@@ -21,6 +21,7 @@ positions of DOM elements, and scripts may read and use them.
\dontrun{
splash_local \%>\%
splash_images(TRUE) \%>\%
splash_user_agent(ua_macos_chrome) \%>\%
splash_go("https://rud.is/b") \%>\%
splash_wait(2) \%>\%
splash_har() -> rud_har


+ 14
- 0
man/splash_perf_stats.Rd View File

@@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/splashr.r
\name{splash_perf_stats}
\alias{splash_perf_stats}
\title{Get Splash performance-related statistics}
\usage{
splash_perf_stats(splash_obj)
}
\arguments{
\item{splash_obj}{A splash connection object}
}
\description{
Get Splash performance-related statistics
}

+ 1
- 0
man/splash_plugins.Rd View File

@@ -18,6 +18,7 @@ Plugins are disabled by default.
\dontrun{
splash_local \%>\%
splash_plugins(TRUE) \%>\%
splash_user_agent(ua_macos_chrome) \%>\%
splash_go("https://rud.is/b") \%>\%
splash_wait(2) \%>\%
splash_har() -> rud_har


+ 1
- 0
man/splash_png.Rd View File

@@ -19,6 +19,7 @@ a DSL script chain as this will execute the script and return the PNG content
\examples{
\dontrun{
splash_local \%>\%
splash_user_agent(ua_macos_chrome) \%>\%
splash_go("https://rud.is/b") \%>\%
splash_wait(2) \%>\%
splash_png()


+ 1
- 0
man/splash_response_body.Rd View File

@@ -18,6 +18,7 @@ By default Splash doesn’t keep bodies of each response in memory, for efficien
\dontrun{
splash_local \%>\%
splash_response_body(TRUE) \%>\%
splash_user_agent(ua_macos_chrome) \%>\%
splash_go("https://rud.is/b") \%>\%
splash_wait(2) \%>\%
splash_har() -> rud_har


+ 70
- 0
man/splash_user_agent.Rd View File

@@ -0,0 +1,70 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
\docType{data}
\name{splash_user_agent}
\alias{splash_user_agent}
\alias{ua_splashr}
\alias{ua_win10_chrome}
\alias{ua_win10_firefox}
\alias{ua_win10_ie11}
\alias{ua_win7_chrome}
\alias{ua_win7_firefox}
\alias{ua_win7_ie11}
\alias{ua_macos_chrome}
\alias{ua_macos_safari}
\alias{ua_linux_chrome}
\alias{ua_linux_firefox}
\alias{ua_ios_safari}
\title{Overwrite the User-Agent header for all further requests.}
\format{An object of class \code{character} of length 1.}
\usage{
splash_user_agent(splash_obj, user_agent = ua_splashr)

ua_splashr

ua_win10_chrome

ua_win10_firefox

ua_win10_ie11

ua_win7_chrome

ua_win7_firefox

ua_win7_ie11

ua_macos_chrome

ua_macos_safari

ua_linux_chrome

ua_linux_firefox

ua_ios_safari
}
\arguments{
\item{splash_obj}{splashr object}

\item{user_agent}{1 element character vector, defaults to \code{splashr/#.#.#}.}
}
\description{
There are a few built-in user agents, all beginning with \code{ua_}.
}
\examples{
\dontrun{
library(rvest)

URL <- "https://httpbin.org/user-agent"

splash_local \%>\%
splash_response_body(TRUE) \%>\%
splash_user_agent(ua_macos_chrome) \%>\%
splash_go(URL) \%>\%
splash_html() \%>\%
html_text("body") \%>\%
jsonlite::fromJSON()
}
}
\keyword{datasets}

+ 14
- 0
man/splash_version.Rd View File

@@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/splashr.r
\name{splash_version}
\alias{splash_version}
\title{Get Splash version information}
\usage{
splash_version(splash_obj)
}
\arguments{
\item{splash_obj}{A splash connection object}
}
\description{
Get Splash version information
}

+ 1
- 0
man/splash_wait.Rd View File

@@ -18,6 +18,7 @@ When script is waiting WebKit continues processing the webpage
\dontrun{
splash_local \%>\%
splash_response_body(TRUE) \%>\%
splash_user_agent(ua_macos_chrome) \%>\%
splash_go("https://rud.is/b") \%>\%
splash_wait(2) \%>\%
splash_har() -> rud_har


Loading…
Cancel
Save