commit b322eadedfa5afe5bbd3650a405e0c41420561bf Author: boB Rudis Date: Tue May 2 09:46:01 2017 -0400 initial commit diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..1c60b19 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,10 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^\.travis\.yml$ +^README\.*Rmd$ +^README\.*html$ +^NOTES\.*Rmd$ +^NOTES\.*html$ +^\.codecov\.yml$ +^README_files$ +^doc$ diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..69cb760 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1 @@ +comment: false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cce1f17 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +.Rproj.user +.Rhistory +.RData +.Rproj +src/*.o +src/*.so +src/*.dll diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..76d9586 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,31 @@ +language: r + +warnings_are_errors: true + +sudo: required + +cache: packages + +r: + - oldrel + - release + - devel + +apt_packages: + - libv8-dev + - xclip + +env: + global: + - CRAN: http://cran.rstudio.com + +after_success: + - Rscript -e 'covr::codecov()' + +notifications: + email: + - bob@rud.is + irc: + channels: + - "104.236.112.222#builds" + nick: travisci diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..b84d3c3 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,24 @@ +Package: decapitated +Type: Package +Title: Headless 'Chrome' Orchestration +Version: 0.1.0 +Date: 2017-05-02 +Author: Bob Rudis (bob@rud.is) +Maintainer: Bob Rudis +Description: The 'Chrome' browser has a headless mode + which can be instrumented programmatically. Tools are provided to perform headless + 'Chrome' instrumentation on the command-line and will eventually provide support + for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level + 'API' being promised by the development team. +URL: https://github.com/hrbrmstr/decapitated +BugReports: https://github.com/hrbrmstr/decapitated/issues +License: AGPL +Suggests: + testthat, + covr +Depends: + R (>= 3.2.0) +Imports: + xml2, + magick +RoxygenNote: 6.0.1 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..badecfb --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,8 @@ +# Generated by roxygen2: do not edit by hand + +export(chrome_dump_pdf) +export(chrome_read_html) +export(chrome_shot) +export(chrome_version) +import(magick) +import(xml2) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..9b4679b --- /dev/null +++ b/NEWS.md @@ -0,0 +1,2 @@ +0.1.0 +* Initial release diff --git a/R/aaa.r b/R/aaa.r new file mode 100644 index 0000000..0542fb7 --- /dev/null +++ b/R/aaa.r @@ -0,0 +1 @@ +chrome_bin <- Sys.getenv("HEADLESS_CHROME") \ No newline at end of file diff --git a/R/decapitated-package.R b/R/decapitated-package.R new file mode 100644 index 0000000..716c84b --- /dev/null +++ b/R/decapitated-package.R @@ -0,0 +1,26 @@ +#' Headless 'Chrome' Orchestration +#' +#' The 'Chrome' browser has a headless mode +#' which can be instrumented programmatically. Tools are provided to perform headless +#' 'Chrome' instrumentation on the command-line and will eventually provide support +#' for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level +#' 'API' being promised by the development team. +#' +#' @section Important: +#' +#' This pkg will eventually do much under the covers to find the location of the Chrome binary +#' on all operating systems. For now, you'll need to set an envrionment variable `HEADLESS_CHROME` to one of these two values: +#' +#' - Windows: `C:\Program Files\Google\Chrome\Application\chrome.exe` +#' - macOS: `/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome` +#' +#' Linux folks will know where their binary is (many of you use non-default locations for things). +#' +#' Use `~/.Renviron` to store this value for the time being. +#' +#' @md +#' @name decapitated +#' @docType package +#' @author Bob Rudis (bob@@rud.is) +#' @import xml2 magick +NULL diff --git a/R/read-html.r b/R/read-html.r new file mode 100644 index 0000000..ba7607f --- /dev/null +++ b/R/read-html.r @@ -0,0 +1,55 @@ +#' Read a URL via headless Chrome and return the renderd `` `innerHTML` DOM elements +#' +#' @md +#' @note This only grabs the `` `innerHTML` contents +#' @param url URL to read from +#' @export +#' @examples +#' chrome_read_html("https://www.r-project.org/") +chrome_read_html <- function(url) { + tmp <- system2(chrome_bin, c("--version", "--headless", "--disable-gpu", "--dump-dom", url), stdout=TRUE) + xml2::read_html(tmp) +} + +#' "Print" to PDF +#' +#' @md +#' @note this is a quick version of the function and will overwrite `output.pdf` if it exists in CWD +#' @param url URL to read from +#' @export +#' @examples +#' chrome_dump_pdf("https://www.r-project.org/") +chrome_dump_pdf <- function(url) { + tmp <- system2(chrome_bin, c("--version", "--headless", "--disable-gpu", "--print-to-pdf", url)) +} + +#' Capture a screenshot +#' +#' For the moment, the capture file is in the current working directory and named +#' `screenshot.png`. This will change, soon. +#' +#' A `magick` image object is returned. +#' +#' @md +#' @note this is a quick version of the function and will overwrite `screenshot.png` if it exists in CWD +#' @param url URL to read from +#' @param width,height screen size to emulate +#' @return `magick` +#' @export +#' @examples +#' chrome_shot("https://www.r-project.org/logo/Rlogo.svg") +chrome_shot <- function(url, width=NULL, height=NULL) { + + args <- c("--version", "--headless", "--disable-gpu", "--screenshot") + + if (!is.null(width) & !is.null(height)) { + args <- c(args, sprintf("--window-size=%s,%s", height, width)) + } + + args <- c(args, url) + + tmp <- system2(chrome_bin, args) + + magick::image_read("screenshot.png") + +} diff --git a/R/version.r b/R/version.r new file mode 100644 index 0000000..eba422c --- /dev/null +++ b/R/version.r @@ -0,0 +1,4 @@ +#' Get Chrome version +#' +#' @export +chrome_version <- function(x) { system2(chrome_bin, "--version") } \ No newline at end of file diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..766ea79 --- /dev/null +++ b/README.Rmd @@ -0,0 +1,80 @@ +--- +output: rmarkdown::github_document +--- + +`decapitated` : Headless 'Chrome' Orchestration + +The 'Chrome' browser has a headless mode +which can be instrumented programmatically. Tools are provided to perform headless +'Chrome' instrumentation on the command-line and will eventually provide support +for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level +'API' being promised by the development team. + +### IMPORTANT + +This pkg will eventually do much under the covers to find the location of the Chrome binary +on all operating systems. For now, you'll need to set an envrionment variable `HEADLESS_CHROME` to one of these two values: + +- Windows: `C:\Program Files\Google\Chrome\Application\chrome.exe` +- macOS: `/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome` + +Linux folks will know where their binary is (many of you use non-default locations for things). + +Use `~/.Renviron` to store this value for the time being. + +The following functions are implemented: + +- `chrome_dump_pdf`: "Print" to PDF +- `chrome_read_html`: Read a URL via headless Chrome and return the renderd '' 'innerHTML' DOM elements +- `chrome_shot`: Capture a screenshot +- `chrome_version`: Get Chrome version + +### Installation + +```{r eval=FALSE} +devtools::install_github("hrbrmstr/decapitated") +``` + +```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} +options(width=120) +``` + +### Usage + +```{r message=FALSE, warning=FALSE, error=FALSE} +library(decapitated) + +# current verison +packageVersion("decapitated") + +chrome_version() + +chrome_read_html("http://httpbin.org/") +``` + +```{r eval=FALSE, message=FALSE, warning=FALSE, error=FALSE} +chrome_dump_pdf("http://httpbin.org/") +## [0502/094321.911089:INFO:headless_shell.cc(436)] Written to file output.pdf. +``` + +```{r message=FALSE, warning=FALSE, error=FALSE, eval=FALSE} +chrome_shot("http://httpbin.org/") + +## [0502/094257.370837:INFO:headless_shell.cc(436)] Written to file screenshot.png. +## format width height colorspace filesize +## 1 PNG 1600 1200 sRGB 238967 +``` + +![screenshot.png](screenshot.png) + +### Test Results + +```{r message=FALSE, warning=FALSE, error=FALSE} +library(decapitated) +library(testthat) + +date() + +test_dir("tests/") +``` + diff --git a/README.md b/README.md new file mode 100644 index 0000000..376b545 --- /dev/null +++ b/README.md @@ -0,0 +1,86 @@ + +`decapitated` : Headless 'Chrome' Orchestration + +The 'Chrome' browser has a headless mode which can be instrumented programmatically. Tools are provided to perform headless 'Chrome' instrumentation on the command-line and will eventually provide support for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level 'API' being promised by the development team. + +### IMPORTANT + +This pkg will eventually do much under the covers to find the location of the Chrome binary on all operating systems. For now, you'll need to set an envrionment variable `HEADLESS_CHROME` to one of these two values: + +- Windows: `C:\Program Files\Google\Chrome\Application\chrome.exe` +- macOS: `/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome` + +Linux folks will know where their binary is (many of you use non-default locations for things). + +Use `~/.Renviron` to store this value for the time being. + +The following functions are implemented: + +- `chrome_dump_pdf`: "Print" to PDF +- `chrome_read_html`: Read a URL via headless Chrome and return the renderd ' + + ' 'innerHTML' DOM elements +- `chrome_shot`: Capture a screenshot +- `chrome_version`: Get Chrome version + +### Installation + +``` r +devtools::install_github("hrbrmstr/decapitated") +``` + +### Usage + +``` r +library(decapitated) + +# current verison +packageVersion("decapitated") +``` + + ## [1] '0.1.0' + +``` r +chrome_version() + +chrome_read_html("http://httpbin.org/") +``` + + ## {xml_document} + ## + ## [1] + +``` r +chrome_dump_pdf("http://httpbin.org/") +## [0502/094321.911089:INFO:headless_shell.cc(436)] Written to file output.pdf. +``` + +``` r +chrome_shot("http://httpbin.org/") + +## [0502/094257.370837:INFO:headless_shell.cc(436)] Written to file screenshot.png. +## format width height colorspace filesize +## 1 PNG 1600 1200 sRGB 238967 +``` + +![](screenshot.png) + +### Test Results + +``` r +library(decapitated) +library(testthat) + +date() +``` + + ## [1] "Tue May 2 09:45:23 2017" + +``` r +test_dir("tests/") +``` + + ## testthat results ======================================================================================================== + ## OK: 0 SKIPPED: 0 FAILED: 0 + ## + ## DONE =================================================================================================================== diff --git a/decapitated.Rproj b/decapitated.Rproj new file mode 100644 index 0000000..446d9e1 --- /dev/null +++ b/decapitated.Rproj @@ -0,0 +1,21 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageBuildArgs: --resave-data +PackageRoxygenize: rd,collate,namespace diff --git a/man/chrome_dump_pdf.Rd b/man/chrome_dump_pdf.Rd new file mode 100644 index 0000000..3c2bd01 --- /dev/null +++ b/man/chrome_dump_pdf.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read-html.r +\name{chrome_dump_pdf} +\alias{chrome_dump_pdf} +\title{"Print" to PDF} +\usage{ +chrome_dump_pdf(url) +} +\arguments{ +\item{url}{URL to read from} +} +\description{ +"Print" to PDF +} +\note{ +this is a quick version of the function and will overwrite \code{output.pdf} if it exists in CWD +} +\examples{ +chrome_dump_pdf("https://www.r-project.org/") +} diff --git a/man/chrome_read_html.Rd b/man/chrome_read_html.Rd new file mode 100644 index 0000000..3b9c5af --- /dev/null +++ b/man/chrome_read_html.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read-html.r +\name{chrome_read_html} +\alias{chrome_read_html} +\title{Read a URL via headless Chrome and return the renderd \code{} \code{innerHTML} DOM elements} +\usage{ +chrome_read_html(url) +} +\arguments{ +\item{url}{URL to read from} +} +\description{ +Read a URL via headless Chrome and return the renderd \code{} \code{innerHTML} DOM elements +} +\note{ +This only grabs the \code{} \code{innerHTML} contents +} +\examples{ +chrome_read_html("https://www.r-project.org/") +} diff --git a/man/chrome_shot.Rd b/man/chrome_shot.Rd new file mode 100644 index 0000000..a6c0fd0 --- /dev/null +++ b/man/chrome_shot.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read-html.r +\name{chrome_shot} +\alias{chrome_shot} +\title{Capture a screenshot} +\usage{ +chrome_shot(url, width = NULL, height = NULL) +} +\arguments{ +\item{url}{URL to read from} + +\item{width, height}{screen size to emulate} +} +\value{ +\code{magick} +} +\description{ +For the moment, the capture file is in the current working directory and named +\code{screenshot.png}. This will change, soon. +} +\details{ +A \code{magick} image object is returned. +} +\note{ +this is a quick version of the function and will overwrite \code{screenshot.png} if it exists in CWD +} +\examples{ +chrome_shot("https://www.r-project.org/logo/Rlogo.svg") +} diff --git a/man/chrome_version.Rd b/man/chrome_version.Rd new file mode 100644 index 0000000..99e05ec --- /dev/null +++ b/man/chrome_version.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/version.r +\name{chrome_version} +\alias{chrome_version} +\title{Get Chrome version} +\usage{ +chrome_version(x) +} +\description{ +Get Chrome version +} diff --git a/man/decapitated.Rd b/man/decapitated.Rd new file mode 100644 index 0000000..6d2efab --- /dev/null +++ b/man/decapitated.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/decapitated-package.R +\docType{package} +\name{decapitated} +\alias{decapitated} +\alias{decapitated-package} +\title{Headless 'Chrome' Orchestration} +\description{ +The 'Chrome' browser \url{https://www.google.com/chrome/} has a headless mode +which can be instrumented programmatically. Tools are provided to perform headless +'Chrome' instrumentation on the command-line and will eventually provide support +for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level +'API' being promised by the development team. +} +\section{Important}{ + + +This pkg will eventually do much under the covers to find the location of the Chrome binary +on all operating systems. For now, you'll need to set an envrionment variable \code{HEADLESS_CHROME} to one of these two values: +\itemize{ +\item Windows: \code{C:\Program Files\Google\Chrome\Application\chrome.exe} +\item macOS: \code{/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome} +} + +Linux folks will know where their binary is (many of you use non-default locations for things). + +Use \code{~/.Renviron} to store this value for the time being. +} + +\author{ +Bob Rudis (bob@rud.is) +} diff --git a/output.pdf b/output.pdf new file mode 100644 index 0000000..1b6ed37 Binary files /dev/null and b/output.pdf differ diff --git a/screenshot.png b/screenshot.png new file mode 100644 index 0000000..28240d1 Binary files /dev/null and b/screenshot.png differ diff --git a/tests/test-all.R b/tests/test-all.R new file mode 100644 index 0000000..b65e621 --- /dev/null +++ b/tests/test-all.R @@ -0,0 +1,2 @@ +library(testthat) +test_check("decapitated") diff --git a/tests/testthat/test-decapitated.R b/tests/testthat/test-decapitated.R new file mode 100644 index 0000000..ab6f62f --- /dev/null +++ b/tests/testthat/test-decapitated.R @@ -0,0 +1,6 @@ +context("basic functionality") +test_that("we can do something", { + + #expect_that(some_function(), is_a("data.frame")) + +})