Browse Source

gepetto sneak peek

master
boB Rudis 6 years ago
parent
commit
cf0133c34b
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 6
      DESCRIPTION
  2. 13
      NAMESPACE
  3. 4
      R/decapitated-package.R
  4. 279
      R/gepetto.R
  5. 5
      R/util.r
  6. 11
      R/utils-pipe.R
  7. 90
      R/utils-safely.R
  8. 2
      R/zzz.R
  9. 14
      README.Rmd
  10. 31
      README.md
  11. 23
      man/gep_active.Rd
  12. 24
      man/gep_debug.Rd
  13. 30
      man/gep_render_har.Rd
  14. 27
      man/gep_render_html.Rd
  15. 27
      man/gep_render_magick.Rd
  16. 33
      man/gep_render_pdf.Rd
  17. 27
      man/gepetto.Rd
  18. 12
      man/pipe.Rd
  19. 17
      man/print.gepetto.Rd

6
DESCRIPTION

@ -23,9 +23,13 @@ Suggests:
Depends:
R (>= 3.2.0)
Imports:
httr,
jsonlite,
HARtools,
xml2,
magick,
processx,
tools,
utils
utils,
magrittr
RoxygenNote: 6.0.1.9000

13
NAMESPACE

@ -1,14 +1,27 @@
# Generated by roxygen2: do not edit by hand
S3method(print,gepetto)
export("%>%")
export(chrome_dump_pdf)
export(chrome_read_html)
export(chrome_shot)
export(chrome_version)
export(download_chromium)
export(gep_active)
export(gep_debug)
export(gep_render_har)
export(gep_render_html)
export(gep_render_magick)
export(gep_render_pdf)
export(gepetto)
export(get_chrome_env)
export(set_chrome_env)
import(httr)
import(magick)
import(processx)
import(tools)
import(utils)
import(xml2)
importFrom(HARtools,readHAR)
importFrom(jsonlite,fromJSON)
importFrom(magrittr,"%>%")

4
R/decapitated-package.R

@ -44,5 +44,7 @@
#' @name decapitated
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import xml2 magick processx tools utils
#' @import xml2 magick processx tools utils httr
#' @importFrom jsonlite fromJSON
#' @importFrom HARtools readHAR
NULL

279
R/gepetto.R

@ -0,0 +1,279 @@
#' Create a connection to a Gepetto API server
#'
#' @md
#' @param host where is it running? Defaults to "`localhost`"
#' @param port same, but what port? Defaults to `8080` since the most common
#' use case is that you have `gepetto` running in a Docker container.
#' Use `3000` if you're running it locally via `npm` as that's the default
#' port for development.`
#' @return A `gepetto` connection object
#' @export
#' @examples \dontrun{
#' gepetto()
#' }
gepetto <- function(host = "localhost", port = 8080) {
list(
host = host,
port = port
) -> out
class(out) <- c("gepetto")
out
}
#' Print
#' @md
#' @param x `gepetto` object
#' @param ... unused
#' @keywords internal
#' @export
print.gepetto <- function(x, ...) {
cat("<gepetto@", x$host, ":", x$port, ">\n", sep="")
}
#' Render a page in a javascript context and serialize to HTML
#'
#' @md
#' @param gep a gepetto connection object
#' @param url the URL to fetch and render
#' @param width,height viewport width/height
#' @return HTML
#' @export
#' @examples \dontrun{
#' gepetto(port=3000) %>%
#' gep_render_html("https://r-project.org/")
#' }
gep_render_html <- function(gep, url, width=1440, height=5000) {
httr::GET(
url = sprintf("http://%s:%s/render_html", gep$host, gep$port),
query = list(
url = url,
width = width,
height = height
)
) -> res
httr::stop_for_status(res)
out <- httr::content(res, as="text")
out <- xml2::read_html(out)
out
}
#' Render a page in a javascript context and serialize to HAR
#'
#' TODO: Modify the `puppeteer-har` node module to allow for saving content
#'
#' @md
#' @param gep a gepetto connection object
#' @param url the URL to fetch and render
#' @param width,height viewport width/height
#' @return HAR
#' @note content is not returned, just HAR information
#' @export
#' @examples \dontrun{
#' gepetto(port=3000) %>%
#' gep_render_har("https://r-project.org/")
#' }
gep_render_har <- function(gep, url, width=1440, height=5000) {
httr::GET(
url = sprintf("http://%s:%s/render_har", gep$host, gep$port),
query = list(
url = url,
width = width,
height = height
)
) -> res
httr::stop_for_status(res)
out <- httr::content(res, as="text")
out <- HARtools::readHAR(out)
out
}
#' Render a page in a javascript context and take a screenshot
#'
#' @md
#' @param gep a gepetto connection object
#' @param url the URL to fetch and render
#' @param width,height viewport width/height
#' @return `magick` image
#' @export
#' @examples \dontrun{
#' gepetto(port=3000) %>%
#' gep_render_magick("https://r-project.org/")
#' }
gep_render_magick <- function(gep, url, width=1440, height=5000) {
httr::GET(
url = sprintf("http://%s:%s/render_png", gep$host, gep$por),
query = list(
url = url,
width = width,
height = height
)
) -> res
httr::stop_for_status(res)
out <- httr::content(res)
out <- magick::image_read(out)
out
}
# #' Take a screenshot of the current browser page
# #'
# #' @md
# #' @param gep a gepetto connection object
# #' @return `magick` image
# #' @export
# #' @examples \dontrun{
# #' gepetto(port=3000) %>%
# #' gep_screenshot()
# #' }
# gep_screenshot <- function(gep) {
#
# httr::GET(
# url = sprintf("http://%s:%s/screenshot", gep$host, gep$por),
# ) -> res
#
# httr::stop_for_status(res)
#
# out <- httr::content(res)
# out <- magick::image_read(out)
# out
#
# }
#' Render a page in a javascript context and rendero to PDF
#'
#' @md
#' @param gep a gepetto connection object
#' @param url the URL to fetch and render
#' @param path directory & filename to save the PDF to. If `NULL` will be saved
#' to a tempfile and it location will be returned.
#' @param overwrite if `TRUE` any existing `path` (file) will be overwritten
#' @param width,height viewport width/height
#' @return object
#' @export
#' @examples \dontrun{
#' gepetto(port=3000) %>%
#' gep_render_pdf("https://r-project.org/")
#' }
gep_render_pdf <- function(gep, url, path=NULL, overwrite=TRUE, width=1440, height=5000) {
if (is.null(path)) {
path <- tempfile(fileext = ".pdf")
} else {
path <- path.expand(path)
}
httr::GET(
url = sprintf("http://%s:%s/render_pdf", gep$host, gep$por),
query = list(
url = url,
width = width,
height = height
),
httr::write_disk(path = path)
) -> res
httr::stop_for_status(res)
path
}
#' Get "debug-level" information of a running gepetto server
#'
#' @md
#' @param gep a gepetto connection object
#' @return debug info
#' @export
#' @examples \dontrun{
#' gepetto() %>%
#' gep_debug() %>%
#' str()
#' }
gep_debug <- function(gep) {
httr::GET(
url = sprintf("http://%s:%s/_debug", gep$host, gep$port)
) -> res
httr::stop_for_status(res)
out <- httr::content(res, as="text")
out <- jsonlite::fromJSON(out)
out
}
#' Get test whether the gepetto server is active
#'
#' @md
#' @param gep a gepetto connection object
#' @return logical (`TRUE` if alive)
#' @export
#' @examples \dontrun{
#' gepetto() %>%
#' gep_active()
#' }
gep_active <- function(gep) {
s_GET(
url = sprintf("http://%s:%s/_ping", gep$host, gep$port)
) -> res
res <- stop_for_problem(res)
httr::stop_for_status(res)
out <- httr::content(res, as="text")
out <- jsonlite::fromJSON(out)
out$status == "ok"
}
#' #' Execute Puppeteer commands
#' #'
#' #' This is a **low-level** call that makes **you** responsible for the return
#' #' type. Eventually there will likely be more boilerplate code for handling return
#' #' values.
#' #'
#' #' @md
#' #' @param gep a gepetto connection object
#' #' @param js Puppeteer js to execute in-browser
#' #' @references [Puppeteer API](https://github.com/GoogleChrome/puppeteer/blob/v1.7.0/docs/api.md)
#' #' @export
#' #' @examples \dontrun{
#' #' gepetto() %>%
#' #' gep_exec()
#' #' }
#' gep_exec <- function(gep, js) {
#'
#' httr::POST(
#' url = sprintf("http://%s:%s/exec", gep$host, gep$port),
#' encode = "form",
#' body = js
#' ) -> res
#'
#' httr::stop_for_status(res)
#'
#' out <- httr::content(res, as="text")
#' # out <- jsonlite::fromJSON(out)
#' #
#' out
#'
#' }

5
R/util.r

@ -1,3 +1,8 @@
stop_for_problem <- function(res) {
if (is.null(res$result)) stop(res$error$message, call.=FALSE) else res$result
}
.get_app_dir <- function() {
ddir <- file.path(Sys.getenv("HOME"), ".rdecapdata")
if (!dir.exists(ddir)) {

11
R/utils-pipe.R

@ -0,0 +1,11 @@
#' Pipe operator
#'
#' See \code{magrittr::\link[magrittr]{\%>\%}} for details.
#'
#' @name %>%
#' @rdname pipe
#' @keywords internal
#' @export
#' @importFrom magrittr %>%
#' @usage lhs \%>\% rhs
NULL

90
R/utils-safely.R

@ -0,0 +1,90 @@
# Less cool counterparts to purrr's side-effect capture-rs
#
# Most of the helper functions are 100% from output.R in purrr repo
#
# @param quiet Hide errors (`TRUE`, the default), or display them
# as they occur?
# @param otherwise Default value to use when an error occurs.
#
# @return `safely`: wrapped function instead returns a list with
# components `result` and `error`. One value is always `NULL`.
#
# `quietly`: wrapped function instead returns a list with components
# `result`, `output`, `messages` and `warnings`.
#
# `possibly`: wrapped function uses a default value (`otherwise`)
# whenever an error occurs.
safely <- function(.f, otherwise = NULL, quiet = TRUE) {
function(...) capture_error(.f(...), otherwise, quiet)
}
quietly <- function(.f) {
function(...) capture_output(.f(...))
}
possibly <- function(.f, otherwise, quiet = TRUE) {
force(otherwise)
function(...) {
tryCatch(.f(...),
error = function(e) {
if (!quiet)
message("Error: ", e$message)
otherwise
},
interrupt = function(e) {
stop("Terminated by user", call. = FALSE)
}
)
}
}
capture_error <- function(code, otherwise = NULL, quiet = TRUE) {
tryCatch(
list(result = code, error = NULL),
error = function(e) {
if (!quiet)
message("Error: ", e$message)
list(result = otherwise, error = e)
},
interrupt = function(e) {
stop("Terminated by user", call. = FALSE)
}
)
}
capture_output <- function(code) {
warnings <- character()
wHandler <- function(w) {
warnings <<- c(warnings, w$message)
invokeRestart("muffleWarning")
}
messages <- character()
mHandler <- function(m) {
messages <<- c(messages, m$message)
invokeRestart("muffleMessage")
}
temp <- file()
sink(temp)
on.exit({
sink()
close(temp)
})
result <- withCallingHandlers(
code,
warning = wHandler,
message = mHandler
)
output <- paste0(readLines(temp, warn = FALSE), collapse = "\n")
list(
result = result,
output = output,
warnings = warnings,
messages = messages
)
}

2
R/zzz.R

@ -1,3 +1,5 @@
s_GET <- safely(httr::GET)
.onAttach <- function(libname, pkgname) {
HEADLESS_CHROME <- Sys.getenv("HEADLESS_CHROME")

14
README.Rmd

@ -62,6 +62,8 @@ Chrome.
The following functions are implemented:
### CLI-based ops
- `downlaod_chromium`: Download a standalone version of Chromium (recommended)
- `chrome_dump_pdf`: "Print" to PDF
- `chrome_read_html`: Read a URL via headless Chrome and return the raw or rendered '<body>' 'innerHTML' DOM elements
@ -70,6 +72,18 @@ The following functions are implemented:
- `get_chrome_env`: get an envrionment variable 'HEADLESS_CHROME'
- `set_chrome_env`: set an envrionment variable 'HEADLESS_CHROME'
### `gepetto`-based ops
- `gepetto`: Create a connection to a Gepetto API server
- `gep_active`: Get test whether the gepetto server is active
- `gep_debug`: Get "debug-level" information of a running gepetto server
- `gep_render_har`: Render a page in a javascript context and serialize to HAR
- `gep_render_html`: Render a page in a javascript context and serialize to HTML
- `gep_render_magick`: Render a page in a javascript context and take a screenshot
- `gep_render_pdf`: Render a page in a javascript context and rendero to PDF
More information on `gepetto` is forthcoming but you can take a sneak peek [here](https://gitlab.com/hrbrmstr/gepetto).
## Installation
```{r eval=FALSE}

31
README.md

@ -63,16 +63,27 @@ control over the command-line execution of headless Chrome.
The following functions are implemented:
- `downlaod_chromium`: Download a standalone version of Chromium (recommended)
- `chrome_dump_pdf`: “Print” to PDF
- `chrome_read_html`: Read a URL via headless Chrome and return the
raw or rendered ’
<body>
‘’innerHTML’ DOM elements
- `chrome_shot`: Capture a screenshot
- `chrome_version`: Get Chrome version
- `get_chrome_env`: get an envrionment variable ‘HEADLESS\_CHROME’
- `set_chrome_env`: set an envrionment variable ‘HEADLESS\_CHROME’
### CLI-based ops
- `downlaod_chromium`: Download a standalone version of Chromium (recommended)
- `chrome_dump_pdf`: "Print" to PDF
- `chrome_read_html`: Read a URL via headless Chrome and return the raw or rendered '<body>' 'innerHTML' DOM elements
- `chrome_shot`: Capture a screenshot
- `chrome_version`: Get Chrome version
- `get_chrome_env`: get an envrionment variable 'HEADLESS_CHROME'
- `set_chrome_env`: set an envrionment variable 'HEADLESS_CHROME'
### `gepetto`-based ops
- `gepetto`: Create a connection to a Gepetto API server
- `gep_active`: Get test whether the gepetto server is active
- `gep_debug`: Get "debug-level" information of a running gepetto server
- `gep_render_har`: Render a page in a javascript context and serialize to HAR
- `gep_render_html`: Render a page in a javascript context and serialize to HTML
- `gep_render_magick`: Render a page in a javascript context and take a screenshot
- `gep_render_pdf`: Render a page in a javascript context and rendero to PDF
More information on `gepetto` is forthcoming but you can take a sneak peek [here](https://gitlab.com/hrbrmstr/gepetto).
## Installation

23
man/gep_active.Rd

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gepetto.R
\name{gep_active}
\alias{gep_active}
\title{Get test whether the gepetto server is active}
\usage{
gep_active(gep)
}
\arguments{
\item{gep}{a gepetto connection object}
}
\value{
logical (\code{TRUE} if alive)
}
\description{
Get test whether the gepetto server is active
}
\examples{
\dontrun{
gepetto() \%>\%
gep_active()
}
}

24
man/gep_debug.Rd

@ -0,0 +1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gepetto.R
\name{gep_debug}
\alias{gep_debug}
\title{Get "debug-level" information of a running gepetto server}
\usage{
gep_debug(gep)
}
\arguments{
\item{gep}{a gepetto connection object}
}
\value{
debug info
}
\description{
Get "debug-level" information of a running gepetto server
}
\examples{
\dontrun{
gepetto() \%>\%
gep_debug() \%>\%
str()
}
}

30
man/gep_render_har.Rd

@ -0,0 +1,30 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gepetto.R
\name{gep_render_har}
\alias{gep_render_har}
\title{Render a page in a javascript context and serialize to HAR}
\usage{
gep_render_har(gep, url, width = 1440, height = 5000)
}
\arguments{
\item{gep}{a gepetto connection object}
\item{url}{the URL to fetch and render}
\item{width, height}{viewport width/height}
}
\value{
HAR
}
\description{
TODO: Modify the \code{puppeteer-har} node module to allow for saving content
}
\note{
content is not returned, just HAR information
}
\examples{
\dontrun{
gepetto(port=3000) \%>\%
gep_render_har("https://r-project.org/")
}
}

27
man/gep_render_html.Rd

@ -0,0 +1,27 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gepetto.R
\name{gep_render_html}
\alias{gep_render_html}
\title{Render a page in a javascript context and serialize to HTML}
\usage{
gep_render_html(gep, url, width = 1440, height = 5000)
}
\arguments{
\item{gep}{a gepetto connection object}
\item{url}{the URL to fetch and render}
\item{width, height}{viewport width/height}
}
\value{
HTML
}
\description{
Render a page in a javascript context and serialize to HTML
}
\examples{
\dontrun{
gepetto(port=3000) \%>\%
gep_render_html("https://r-project.org/")
}
}

27
man/gep_render_magick.Rd

@ -0,0 +1,27 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gepetto.R
\name{gep_render_magick}
\alias{gep_render_magick}
\title{Render a page in a javascript context and take a screenshot}
\usage{
gep_render_magick(gep, url, width = 1440, height = 5000)
}
\arguments{
\item{gep}{a gepetto connection object}
\item{url}{the URL to fetch and render}
\item{width, height}{viewport width/height}
}
\value{
\code{magick} image
}
\description{
Render a page in a javascript context and take a screenshot
}
\examples{
\dontrun{
gepetto(port=3000) \%>\%
gep_render_magick("https://r-project.org/")
}
}

33
man/gep_render_pdf.Rd

@ -0,0 +1,33 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gepetto.R
\name{gep_render_pdf}
\alias{gep_render_pdf}
\title{Render a page in a javascript context and rendero to PDF}
\usage{
gep_render_pdf(gep, url, path = NULL, overwrite = TRUE, width = 1440,
height = 5000)
}
\arguments{
\item{gep}{a gepetto connection object}
\item{url}{the URL to fetch and render}
\item{path}{directory & filename to save the PDF to. If \code{NULL} will be saved
to a tempfile and it location will be returned.}
\item{overwrite}{if \code{TRUE} any existing \code{path} (file) will be overwritten}
\item{width, height}{viewport width/height}
}
\value{
object
}
\description{
Render a page in a javascript context and rendero to PDF
}
\examples{
\dontrun{
gepetto(port=3000) \%>\%
gep_render_pdf("https://r-project.org/")
}
}

27
man/gepetto.Rd

@ -0,0 +1,27 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gepetto.R
\name{gepetto}
\alias{gepetto}
\title{Create a connection to a Gepetto API server}
\usage{
gepetto(host = "localhost", port = 8080)
}
\arguments{
\item{host}{where is it running? Defaults to "\code{localhost}"}
\item{port}{same, but what port? Defaults to \code{8080} since the most common
use case is that you have \code{gepetto} running in a Docker container.
Use \code{3000} if you're running it locally via \code{npm} as that's the default
port for development.`}
}
\value{
A \code{gepetto} connection object
}
\description{
Create a connection to a Gepetto API server
}
\examples{
\dontrun{
gepetto()
}
}

12
man/pipe.Rd

@ -0,0 +1,12 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils-pipe.R
\name{\%>\%}
\alias{\%>\%}
\title{Pipe operator}
\usage{
lhs \%>\% rhs
}
\description{
See \code{magrittr::\link[magrittr]{\%>\%}} for details.
}
\keyword{internal}

17
man/print.gepetto.Rd

@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gepetto.R
\name{print.gepetto}
\alias{print.gepetto}
\title{Print}
\usage{
\method{print}{gepetto}(x, ...)
}
\arguments{
\item{x}{\code{gepetto} object}
\item{...}{unused}
}
\description{
Print
}
\keyword{internal}
Loading…
Cancel
Save