boB Rudis
7 years ago
commit
b322eadedf
23 changed files with 477 additions and 0 deletions
@ -0,0 +1,10 @@ |
|||||
|
^.*\.Rproj$ |
||||
|
^\.Rproj\.user$ |
||||
|
^\.travis\.yml$ |
||||
|
^README\.*Rmd$ |
||||
|
^README\.*html$ |
||||
|
^NOTES\.*Rmd$ |
||||
|
^NOTES\.*html$ |
||||
|
^\.codecov\.yml$ |
||||
|
^README_files$ |
||||
|
^doc$ |
@ -0,0 +1 @@ |
|||||
|
comment: false |
@ -0,0 +1,8 @@ |
|||||
|
.DS_Store |
||||
|
.Rproj.user |
||||
|
.Rhistory |
||||
|
.RData |
||||
|
.Rproj |
||||
|
src/*.o |
||||
|
src/*.so |
||||
|
src/*.dll |
@ -0,0 +1,31 @@ |
|||||
|
language: r |
||||
|
|
||||
|
warnings_are_errors: true |
||||
|
|
||||
|
sudo: required |
||||
|
|
||||
|
cache: packages |
||||
|
|
||||
|
r: |
||||
|
- oldrel |
||||
|
- release |
||||
|
- devel |
||||
|
|
||||
|
apt_packages: |
||||
|
- libv8-dev |
||||
|
- xclip |
||||
|
|
||||
|
env: |
||||
|
global: |
||||
|
- CRAN: http://cran.rstudio.com |
||||
|
|
||||
|
after_success: |
||||
|
- Rscript -e 'covr::codecov()' |
||||
|
|
||||
|
notifications: |
||||
|
email: |
||||
|
- bob@rud.is |
||||
|
irc: |
||||
|
channels: |
||||
|
- "104.236.112.222#builds" |
||||
|
nick: travisci |
@ -0,0 +1,24 @@ |
|||||
|
Package: decapitated |
||||
|
Type: Package |
||||
|
Title: Headless 'Chrome' Orchestration |
||||
|
Version: 0.1.0 |
||||
|
Date: 2017-05-02 |
||||
|
Author: Bob Rudis (bob@rud.is) |
||||
|
Maintainer: Bob Rudis <bob@rud.is> |
||||
|
Description: The 'Chrome' browser <https://www.google.com/chrome/> has a headless mode |
||||
|
which can be instrumented programmatically. Tools are provided to perform headless |
||||
|
'Chrome' instrumentation on the command-line and will eventually provide support |
||||
|
for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level |
||||
|
'API' being promised by the development team. |
||||
|
URL: https://github.com/hrbrmstr/decapitated |
||||
|
BugReports: https://github.com/hrbrmstr/decapitated/issues |
||||
|
License: AGPL |
||||
|
Suggests: |
||||
|
testthat, |
||||
|
covr |
||||
|
Depends: |
||||
|
R (>= 3.2.0) |
||||
|
Imports: |
||||
|
xml2, |
||||
|
magick |
||||
|
RoxygenNote: 6.0.1 |
@ -0,0 +1,8 @@ |
|||||
|
# Generated by roxygen2: do not edit by hand |
||||
|
|
||||
|
export(chrome_dump_pdf) |
||||
|
export(chrome_read_html) |
||||
|
export(chrome_shot) |
||||
|
export(chrome_version) |
||||
|
import(magick) |
||||
|
import(xml2) |
@ -0,0 +1,2 @@ |
|||||
|
0.1.0 |
||||
|
* Initial release |
@ -0,0 +1 @@ |
|||||
|
chrome_bin <- Sys.getenv("HEADLESS_CHROME") |
@ -0,0 +1,26 @@ |
|||||
|
#' Headless 'Chrome' Orchestration |
||||
|
#' |
||||
|
#' The 'Chrome' browser <https://www.google.com/chrome/> has a headless mode |
||||
|
#' which can be instrumented programmatically. Tools are provided to perform headless |
||||
|
#' 'Chrome' instrumentation on the command-line and will eventually provide support |
||||
|
#' for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level |
||||
|
#' 'API' being promised by the development team. |
||||
|
#' |
||||
|
#' @section Important: |
||||
|
#' |
||||
|
#' This pkg will eventually do much under the covers to find the location of the Chrome binary |
||||
|
#' on all operating systems. For now, you'll need to set an envrionment variable `HEADLESS_CHROME` to one of these two values: |
||||
|
#' |
||||
|
#' - Windows: `C:\Program Files\Google\Chrome\Application\chrome.exe` |
||||
|
#' - macOS: `/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome` |
||||
|
#' |
||||
|
#' Linux folks will know where their binary is (many of you use non-default locations for things). |
||||
|
#' |
||||
|
#' Use `~/.Renviron` to store this value for the time being. |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @name decapitated |
||||
|
#' @docType package |
||||
|
#' @author Bob Rudis (bob@@rud.is) |
||||
|
#' @import xml2 magick |
||||
|
NULL |
@ -0,0 +1,55 @@ |
|||||
|
#' Read a URL via headless Chrome and return the renderd `<body>` `innerHTML` DOM elements |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @note This only grabs the `<body>` `innerHTML` contents |
||||
|
#' @param url URL to read from |
||||
|
#' @export |
||||
|
#' @examples |
||||
|
#' chrome_read_html("https://www.r-project.org/") |
||||
|
chrome_read_html <- function(url) { |
||||
|
tmp <- system2(chrome_bin, c("--version", "--headless", "--disable-gpu", "--dump-dom", url), stdout=TRUE) |
||||
|
xml2::read_html(tmp) |
||||
|
} |
||||
|
|
||||
|
#' "Print" to PDF |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @note this is a quick version of the function and will overwrite `output.pdf` if it exists in CWD |
||||
|
#' @param url URL to read from |
||||
|
#' @export |
||||
|
#' @examples |
||||
|
#' chrome_dump_pdf("https://www.r-project.org/") |
||||
|
chrome_dump_pdf <- function(url) { |
||||
|
tmp <- system2(chrome_bin, c("--version", "--headless", "--disable-gpu", "--print-to-pdf", url)) |
||||
|
} |
||||
|
|
||||
|
#' Capture a screenshot |
||||
|
#' |
||||
|
#' For the moment, the capture file is in the current working directory and named |
||||
|
#' `screenshot.png`. This will change, soon. |
||||
|
#' |
||||
|
#' A `magick` image object is returned. |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @note this is a quick version of the function and will overwrite `screenshot.png` if it exists in CWD |
||||
|
#' @param url URL to read from |
||||
|
#' @param width,height screen size to emulate |
||||
|
#' @return `magick` |
||||
|
#' @export |
||||
|
#' @examples |
||||
|
#' chrome_shot("https://www.r-project.org/logo/Rlogo.svg") |
||||
|
chrome_shot <- function(url, width=NULL, height=NULL) { |
||||
|
|
||||
|
args <- c("--version", "--headless", "--disable-gpu", "--screenshot") |
||||
|
|
||||
|
if (!is.null(width) & !is.null(height)) { |
||||
|
args <- c(args, sprintf("--window-size=%s,%s", height, width)) |
||||
|
} |
||||
|
|
||||
|
args <- c(args, url) |
||||
|
|
||||
|
tmp <- system2(chrome_bin, args) |
||||
|
|
||||
|
magick::image_read("screenshot.png") |
||||
|
|
||||
|
} |
@ -0,0 +1,4 @@ |
|||||
|
#' Get Chrome version |
||||
|
#' |
||||
|
#' @export |
||||
|
chrome_version <- function(x) { system2(chrome_bin, "--version") } |
@ -0,0 +1,80 @@ |
|||||
|
--- |
||||
|
output: rmarkdown::github_document |
||||
|
--- |
||||
|
|
||||
|
`decapitated` : Headless 'Chrome' Orchestration |
||||
|
|
||||
|
The 'Chrome' browser <https://www.google.com/chrome/> has a headless mode |
||||
|
which can be instrumented programmatically. Tools are provided to perform headless |
||||
|
'Chrome' instrumentation on the command-line and will eventually provide support |
||||
|
for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level |
||||
|
'API' being promised by the development team. |
||||
|
|
||||
|
### IMPORTANT |
||||
|
|
||||
|
This pkg will eventually do much under the covers to find the location of the Chrome binary |
||||
|
on all operating systems. For now, you'll need to set an envrionment variable `HEADLESS_CHROME` to one of these two values: |
||||
|
|
||||
|
- Windows: `C:\Program Files\Google\Chrome\Application\chrome.exe` |
||||
|
- macOS: `/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome` |
||||
|
|
||||
|
Linux folks will know where their binary is (many of you use non-default locations for things). |
||||
|
|
||||
|
Use `~/.Renviron` to store this value for the time being. |
||||
|
|
||||
|
The following functions are implemented: |
||||
|
|
||||
|
- `chrome_dump_pdf`: "Print" to PDF |
||||
|
- `chrome_read_html`: Read a URL via headless Chrome and return the renderd '<body>' 'innerHTML' DOM elements |
||||
|
- `chrome_shot`: Capture a screenshot |
||||
|
- `chrome_version`: Get Chrome version |
||||
|
|
||||
|
### Installation |
||||
|
|
||||
|
```{r eval=FALSE} |
||||
|
devtools::install_github("hrbrmstr/decapitated") |
||||
|
``` |
||||
|
|
||||
|
```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} |
||||
|
options(width=120) |
||||
|
``` |
||||
|
|
||||
|
### Usage |
||||
|
|
||||
|
```{r message=FALSE, warning=FALSE, error=FALSE} |
||||
|
library(decapitated) |
||||
|
|
||||
|
# current verison |
||||
|
packageVersion("decapitated") |
||||
|
|
||||
|
chrome_version() |
||||
|
|
||||
|
chrome_read_html("http://httpbin.org/") |
||||
|
``` |
||||
|
|
||||
|
```{r eval=FALSE, message=FALSE, warning=FALSE, error=FALSE} |
||||
|
chrome_dump_pdf("http://httpbin.org/") |
||||
|
## [0502/094321.911089:INFO:headless_shell.cc(436)] Written to file output.pdf. |
||||
|
``` |
||||
|
|
||||
|
```{r message=FALSE, warning=FALSE, error=FALSE, eval=FALSE} |
||||
|
chrome_shot("http://httpbin.org/") |
||||
|
|
||||
|
## [0502/094257.370837:INFO:headless_shell.cc(436)] Written to file screenshot.png. |
||||
|
## format width height colorspace filesize |
||||
|
## 1 PNG 1600 1200 sRGB 238967 |
||||
|
``` |
||||
|
|
||||
|
![screenshot.png](screenshot.png) |
||||
|
|
||||
|
### Test Results |
||||
|
|
||||
|
```{r message=FALSE, warning=FALSE, error=FALSE} |
||||
|
library(decapitated) |
||||
|
library(testthat) |
||||
|
|
||||
|
date() |
||||
|
|
||||
|
test_dir("tests/") |
||||
|
``` |
||||
|
|
@ -0,0 +1,86 @@ |
|||||
|
|
||||
|
`decapitated` : Headless 'Chrome' Orchestration |
||||
|
|
||||
|
The 'Chrome' browser <https://www.google.com/chrome/> has a headless mode which can be instrumented programmatically. Tools are provided to perform headless 'Chrome' instrumentation on the command-line and will eventually provide support for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level 'API' being promised by the development team. |
||||
|
|
||||
|
### IMPORTANT |
||||
|
|
||||
|
This pkg will eventually do much under the covers to find the location of the Chrome binary on all operating systems. For now, you'll need to set an envrionment variable `HEADLESS_CHROME` to one of these two values: |
||||
|
|
||||
|
- Windows: `C:\Program Files\Google\Chrome\Application\chrome.exe` |
||||
|
- macOS: `/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome` |
||||
|
|
||||
|
Linux folks will know where their binary is (many of you use non-default locations for things). |
||||
|
|
||||
|
Use `~/.Renviron` to store this value for the time being. |
||||
|
|
||||
|
The following functions are implemented: |
||||
|
|
||||
|
- `chrome_dump_pdf`: "Print" to PDF |
||||
|
- `chrome_read_html`: Read a URL via headless Chrome and return the renderd ' |
||||
|
<body> |
||||
|
' 'innerHTML' DOM elements |
||||
|
- `chrome_shot`: Capture a screenshot |
||||
|
- `chrome_version`: Get Chrome version |
||||
|
|
||||
|
### Installation |
||||
|
|
||||
|
``` r |
||||
|
devtools::install_github("hrbrmstr/decapitated") |
||||
|
``` |
||||
|
|
||||
|
### Usage |
||||
|
|
||||
|
``` r |
||||
|
library(decapitated) |
||||
|
|
||||
|
# current verison |
||||
|
packageVersion("decapitated") |
||||
|
``` |
||||
|
|
||||
|
## [1] '0.1.0' |
||||
|
|
||||
|
``` r |
||||
|
chrome_version() |
||||
|
|
||||
|
chrome_read_html("http://httpbin.org/") |
||||
|
``` |
||||
|
|
||||
|
## {xml_document} |
||||
|
## <html> |
||||
|
## [1] <body id="manpage"></body> |
||||
|
|
||||
|
``` r |
||||
|
chrome_dump_pdf("http://httpbin.org/") |
||||
|
## [0502/094321.911089:INFO:headless_shell.cc(436)] Written to file output.pdf. |
||||
|
``` |
||||
|
|
||||
|
``` r |
||||
|
chrome_shot("http://httpbin.org/") |
||||
|
|
||||
|
## [0502/094257.370837:INFO:headless_shell.cc(436)] Written to file screenshot.png. |
||||
|
## format width height colorspace filesize |
||||
|
## 1 PNG 1600 1200 sRGB 238967 |
||||
|
``` |
||||
|
|
||||
|
![](screenshot.png) |
||||
|
|
||||
|
### Test Results |
||||
|
|
||||
|
``` r |
||||
|
library(decapitated) |
||||
|
library(testthat) |
||||
|
|
||||
|
date() |
||||
|
``` |
||||
|
|
||||
|
## [1] "Tue May 2 09:45:23 2017" |
||||
|
|
||||
|
``` r |
||||
|
test_dir("tests/") |
||||
|
``` |
||||
|
|
||||
|
## testthat results ======================================================================================================== |
||||
|
## OK: 0 SKIPPED: 0 FAILED: 0 |
||||
|
## |
||||
|
## DONE =================================================================================================================== |
@ -0,0 +1,21 @@ |
|||||
|
Version: 1.0 |
||||
|
|
||||
|
RestoreWorkspace: Default |
||||
|
SaveWorkspace: Default |
||||
|
AlwaysSaveHistory: Default |
||||
|
|
||||
|
EnableCodeIndexing: Yes |
||||
|
UseSpacesForTab: Yes |
||||
|
NumSpacesForTab: 2 |
||||
|
Encoding: UTF-8 |
||||
|
|
||||
|
RnwWeave: Sweave |
||||
|
LaTeX: pdfLaTeX |
||||
|
|
||||
|
StripTrailingWhitespace: Yes |
||||
|
|
||||
|
BuildType: Package |
||||
|
PackageUseDevtools: Yes |
||||
|
PackageInstallArgs: --no-multiarch --with-keep.source |
||||
|
PackageBuildArgs: --resave-data |
||||
|
PackageRoxygenize: rd,collate,namespace |
@ -0,0 +1,20 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/read-html.r |
||||
|
\name{chrome_dump_pdf} |
||||
|
\alias{chrome_dump_pdf} |
||||
|
\title{"Print" to PDF} |
||||
|
\usage{ |
||||
|
chrome_dump_pdf(url) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{url}{URL to read from} |
||||
|
} |
||||
|
\description{ |
||||
|
"Print" to PDF |
||||
|
} |
||||
|
\note{ |
||||
|
this is a quick version of the function and will overwrite \code{output.pdf} if it exists in CWD |
||||
|
} |
||||
|
\examples{ |
||||
|
chrome_dump_pdf("https://www.r-project.org/") |
||||
|
} |
@ -0,0 +1,20 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/read-html.r |
||||
|
\name{chrome_read_html} |
||||
|
\alias{chrome_read_html} |
||||
|
\title{Read a URL via headless Chrome and return the renderd \code{<body>} \code{innerHTML} DOM elements} |
||||
|
\usage{ |
||||
|
chrome_read_html(url) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{url}{URL to read from} |
||||
|
} |
||||
|
\description{ |
||||
|
Read a URL via headless Chrome and return the renderd \code{<body>} \code{innerHTML} DOM elements |
||||
|
} |
||||
|
\note{ |
||||
|
This only grabs the \code{<body>} \code{innerHTML} contents |
||||
|
} |
||||
|
\examples{ |
||||
|
chrome_read_html("https://www.r-project.org/") |
||||
|
} |
@ -0,0 +1,29 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/read-html.r |
||||
|
\name{chrome_shot} |
||||
|
\alias{chrome_shot} |
||||
|
\title{Capture a screenshot} |
||||
|
\usage{ |
||||
|
chrome_shot(url, width = NULL, height = NULL) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{url}{URL to read from} |
||||
|
|
||||
|
\item{width, height}{screen size to emulate} |
||||
|
} |
||||
|
\value{ |
||||
|
\code{magick} |
||||
|
} |
||||
|
\description{ |
||||
|
For the moment, the capture file is in the current working directory and named |
||||
|
\code{screenshot.png}. This will change, soon. |
||||
|
} |
||||
|
\details{ |
||||
|
A \code{magick} image object is returned. |
||||
|
} |
||||
|
\note{ |
||||
|
this is a quick version of the function and will overwrite \code{screenshot.png} if it exists in CWD |
||||
|
} |
||||
|
\examples{ |
||||
|
chrome_shot("https://www.r-project.org/logo/Rlogo.svg") |
||||
|
} |
@ -0,0 +1,11 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/version.r |
||||
|
\name{chrome_version} |
||||
|
\alias{chrome_version} |
||||
|
\title{Get Chrome version} |
||||
|
\usage{ |
||||
|
chrome_version(x) |
||||
|
} |
||||
|
\description{ |
||||
|
Get Chrome version |
||||
|
} |
@ -0,0 +1,32 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/decapitated-package.R |
||||
|
\docType{package} |
||||
|
\name{decapitated} |
||||
|
\alias{decapitated} |
||||
|
\alias{decapitated-package} |
||||
|
\title{Headless 'Chrome' Orchestration} |
||||
|
\description{ |
||||
|
The 'Chrome' browser \url{https://www.google.com/chrome/} has a headless mode |
||||
|
which can be instrumented programmatically. Tools are provided to perform headless |
||||
|
'Chrome' instrumentation on the command-line and will eventually provide support |
||||
|
for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level |
||||
|
'API' being promised by the development team. |
||||
|
} |
||||
|
\section{Important}{ |
||||
|
|
||||
|
|
||||
|
This pkg will eventually do much under the covers to find the location of the Chrome binary |
||||
|
on all operating systems. For now, you'll need to set an envrionment variable \code{HEADLESS_CHROME} to one of these two values: |
||||
|
\itemize{ |
||||
|
\item Windows: \code{C:\Program Files\Google\Chrome\Application\chrome.exe} |
||||
|
\item macOS: \code{/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome} |
||||
|
} |
||||
|
|
||||
|
Linux folks will know where their binary is (many of you use non-default locations for things). |
||||
|
|
||||
|
Use \code{~/.Renviron} to store this value for the time being. |
||||
|
} |
||||
|
|
||||
|
\author{ |
||||
|
Bob Rudis (bob@rud.is) |
||||
|
} |
Binary file not shown.
After Width: | Height: | Size: 233 KiB |
@ -0,0 +1,2 @@ |
|||||
|
library(testthat) |
||||
|
test_check("decapitated") |
@ -0,0 +1,6 @@ |
|||||
|
context("basic functionality") |
||||
|
test_that("we can do something", { |
||||
|
|
||||
|
#expect_that(some_function(), is_a("data.frame")) |
||||
|
|
||||
|
}) |
Loading…
Reference in new issue