Browse Source

initial commit

pull/2/head
boB Rudis 7 years ago
commit
b322eadedf
No known key found for this signature in database GPG Key ID: 2A514A4997464560
  1. 10
      .Rbuildignore
  2. 1
      .codecov.yml
  3. 8
      .gitignore
  4. 31
      .travis.yml
  5. 24
      DESCRIPTION
  6. 8
      NAMESPACE
  7. 2
      NEWS.md
  8. 1
      R/aaa.r
  9. 26
      R/decapitated-package.R
  10. 55
      R/read-html.r
  11. 4
      R/version.r
  12. 80
      README.Rmd
  13. 86
      README.md
  14. 21
      decapitated.Rproj
  15. 20
      man/chrome_dump_pdf.Rd
  16. 20
      man/chrome_read_html.Rd
  17. 29
      man/chrome_shot.Rd
  18. 11
      man/chrome_version.Rd
  19. 32
      man/decapitated.Rd
  20. BIN
      output.pdf
  21. BIN
      screenshot.png
  22. 2
      tests/test-all.R
  23. 6
      tests/testthat/test-decapitated.R

10
.Rbuildignore

@ -0,0 +1,10 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.travis\.yml$
^README\.*Rmd$
^README\.*html$
^NOTES\.*Rmd$
^NOTES\.*html$
^\.codecov\.yml$
^README_files$
^doc$

1
.codecov.yml

@ -0,0 +1 @@
comment: false

8
.gitignore

@ -0,0 +1,8 @@
.DS_Store
.Rproj.user
.Rhistory
.RData
.Rproj
src/*.o
src/*.so
src/*.dll

31
.travis.yml

@ -0,0 +1,31 @@
language: r
warnings_are_errors: true
sudo: required
cache: packages
r:
- oldrel
- release
- devel
apt_packages:
- libv8-dev
- xclip
env:
global:
- CRAN: http://cran.rstudio.com
after_success:
- Rscript -e 'covr::codecov()'
notifications:
email:
- bob@rud.is
irc:
channels:
- "104.236.112.222#builds"
nick: travisci

24
DESCRIPTION

@ -0,0 +1,24 @@
Package: decapitated
Type: Package
Title: Headless 'Chrome' Orchestration
Version: 0.1.0
Date: 2017-05-02
Author: Bob Rudis (bob@rud.is)
Maintainer: Bob Rudis <bob@rud.is>
Description: The 'Chrome' browser <https://www.google.com/chrome/> has a headless mode
which can be instrumented programmatically. Tools are provided to perform headless
'Chrome' instrumentation on the command-line and will eventually provide support
for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level
'API' being promised by the development team.
URL: https://github.com/hrbrmstr/decapitated
BugReports: https://github.com/hrbrmstr/decapitated/issues
License: AGPL
Suggests:
testthat,
covr
Depends:
R (>= 3.2.0)
Imports:
xml2,
magick
RoxygenNote: 6.0.1

8
NAMESPACE

@ -0,0 +1,8 @@
# Generated by roxygen2: do not edit by hand
export(chrome_dump_pdf)
export(chrome_read_html)
export(chrome_shot)
export(chrome_version)
import(magick)
import(xml2)

2
NEWS.md

@ -0,0 +1,2 @@
0.1.0
* Initial release

1
R/aaa.r

@ -0,0 +1 @@
chrome_bin <- Sys.getenv("HEADLESS_CHROME")

26
R/decapitated-package.R

@ -0,0 +1,26 @@
#' Headless 'Chrome' Orchestration
#'
#' The 'Chrome' browser <https://www.google.com/chrome/> has a headless mode
#' which can be instrumented programmatically. Tools are provided to perform headless
#' 'Chrome' instrumentation on the command-line and will eventually provide support
#' for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level
#' 'API' being promised by the development team.
#'
#' @section Important:
#'
#' This pkg will eventually do much under the covers to find the location of the Chrome binary
#' on all operating systems. For now, you'll need to set an envrionment variable `HEADLESS_CHROME` to one of these two values:
#'
#' - Windows: `C:\Program Files\Google\Chrome\Application\chrome.exe`
#' - macOS: `/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome`
#'
#' Linux folks will know where their binary is (many of you use non-default locations for things).
#'
#' Use `~/.Renviron` to store this value for the time being.
#'
#' @md
#' @name decapitated
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import xml2 magick
NULL

55
R/read-html.r

@ -0,0 +1,55 @@
#' Read a URL via headless Chrome and return the renderd `<body>` `innerHTML` DOM elements
#'
#' @md
#' @note This only grabs the `<body>` `innerHTML` contents
#' @param url URL to read from
#' @export
#' @examples
#' chrome_read_html("https://www.r-project.org/")
chrome_read_html <- function(url) {
tmp <- system2(chrome_bin, c("--version", "--headless", "--disable-gpu", "--dump-dom", url), stdout=TRUE)
xml2::read_html(tmp)
}
#' "Print" to PDF
#'
#' @md
#' @note this is a quick version of the function and will overwrite `output.pdf` if it exists in CWD
#' @param url URL to read from
#' @export
#' @examples
#' chrome_dump_pdf("https://www.r-project.org/")
chrome_dump_pdf <- function(url) {
tmp <- system2(chrome_bin, c("--version", "--headless", "--disable-gpu", "--print-to-pdf", url))
}
#' Capture a screenshot
#'
#' For the moment, the capture file is in the current working directory and named
#' `screenshot.png`. This will change, soon.
#'
#' A `magick` image object is returned.
#'
#' @md
#' @note this is a quick version of the function and will overwrite `screenshot.png` if it exists in CWD
#' @param url URL to read from
#' @param width,height screen size to emulate
#' @return `magick`
#' @export
#' @examples
#' chrome_shot("https://www.r-project.org/logo/Rlogo.svg")
chrome_shot <- function(url, width=NULL, height=NULL) {
args <- c("--version", "--headless", "--disable-gpu", "--screenshot")
if (!is.null(width) & !is.null(height)) {
args <- c(args, sprintf("--window-size=%s,%s", height, width))
}
args <- c(args, url)
tmp <- system2(chrome_bin, args)
magick::image_read("screenshot.png")
}

4
R/version.r

@ -0,0 +1,4 @@
#' Get Chrome version
#'
#' @export
chrome_version <- function(x) { system2(chrome_bin, "--version") }

80
README.Rmd

@ -0,0 +1,80 @@
---
output: rmarkdown::github_document
---
`decapitated` : Headless 'Chrome' Orchestration
The 'Chrome' browser <https://www.google.com/chrome/> has a headless mode
which can be instrumented programmatically. Tools are provided to perform headless
'Chrome' instrumentation on the command-line and will eventually provide support
for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level
'API' being promised by the development team.
### IMPORTANT
This pkg will eventually do much under the covers to find the location of the Chrome binary
on all operating systems. For now, you'll need to set an envrionment variable `HEADLESS_CHROME` to one of these two values:
- Windows: `C:\Program Files\Google\Chrome\Application\chrome.exe`
- macOS: `/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome`
Linux folks will know where their binary is (many of you use non-default locations for things).
Use `~/.Renviron` to store this value for the time being.
The following functions are implemented:
- `chrome_dump_pdf`: "Print" to PDF
- `chrome_read_html`: Read a URL via headless Chrome and return the renderd '<body>' 'innerHTML' DOM elements
- `chrome_shot`: Capture a screenshot
- `chrome_version`: Get Chrome version
### Installation
```{r eval=FALSE}
devtools::install_github("hrbrmstr/decapitated")
```
```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE}
options(width=120)
```
### Usage
```{r message=FALSE, warning=FALSE, error=FALSE}
library(decapitated)
# current verison
packageVersion("decapitated")
chrome_version()
chrome_read_html("http://httpbin.org/")
```
```{r eval=FALSE, message=FALSE, warning=FALSE, error=FALSE}
chrome_dump_pdf("http://httpbin.org/")
## [0502/094321.911089:INFO:headless_shell.cc(436)] Written to file output.pdf.
```
```{r message=FALSE, warning=FALSE, error=FALSE, eval=FALSE}
chrome_shot("http://httpbin.org/")
## [0502/094257.370837:INFO:headless_shell.cc(436)] Written to file screenshot.png.
## format width height colorspace filesize
## 1 PNG 1600 1200 sRGB 238967
```
![screenshot.png](screenshot.png)
### Test Results
```{r message=FALSE, warning=FALSE, error=FALSE}
library(decapitated)
library(testthat)
date()
test_dir("tests/")
```

86
README.md

@ -0,0 +1,86 @@
`decapitated` : Headless 'Chrome' Orchestration
The 'Chrome' browser <https://www.google.com/chrome/> has a headless mode which can be instrumented programmatically. Tools are provided to perform headless 'Chrome' instrumentation on the command-line and will eventually provide support for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level 'API' being promised by the development team.
### IMPORTANT
This pkg will eventually do much under the covers to find the location of the Chrome binary on all operating systems. For now, you'll need to set an envrionment variable `HEADLESS_CHROME` to one of these two values:
- Windows: `C:\Program Files\Google\Chrome\Application\chrome.exe`
- macOS: `/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome`
Linux folks will know where their binary is (many of you use non-default locations for things).
Use `~/.Renviron` to store this value for the time being.
The following functions are implemented:
- `chrome_dump_pdf`: "Print" to PDF
- `chrome_read_html`: Read a URL via headless Chrome and return the renderd '
<body>
' 'innerHTML' DOM elements
- `chrome_shot`: Capture a screenshot
- `chrome_version`: Get Chrome version
### Installation
``` r
devtools::install_github("hrbrmstr/decapitated")
```
### Usage
``` r
library(decapitated)
# current verison
packageVersion("decapitated")
```
## [1] '0.1.0'
``` r
chrome_version()
chrome_read_html("http://httpbin.org/")
```
## {xml_document}
## <html>
## [1] <body id="manpage"></body>
``` r
chrome_dump_pdf("http://httpbin.org/")
## [0502/094321.911089:INFO:headless_shell.cc(436)] Written to file output.pdf.
```
``` r
chrome_shot("http://httpbin.org/")
## [0502/094257.370837:INFO:headless_shell.cc(436)] Written to file screenshot.png.
## format width height colorspace filesize
## 1 PNG 1600 1200 sRGB 238967
```
![](screenshot.png)
### Test Results
``` r
library(decapitated)
library(testthat)
date()
```
## [1] "Tue May 2 09:45:23 2017"
``` r
test_dir("tests/")
```
## testthat results ========================================================================================================
## OK: 0 SKIPPED: 0 FAILED: 0
##
## DONE ===================================================================================================================

21
decapitated.Rproj

@ -0,0 +1,21 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageBuildArgs: --resave-data
PackageRoxygenize: rd,collate,namespace

20
man/chrome_dump_pdf.Rd

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read-html.r
\name{chrome_dump_pdf}
\alias{chrome_dump_pdf}
\title{"Print" to PDF}
\usage{
chrome_dump_pdf(url)
}
\arguments{
\item{url}{URL to read from}
}
\description{
"Print" to PDF
}
\note{
this is a quick version of the function and will overwrite \code{output.pdf} if it exists in CWD
}
\examples{
chrome_dump_pdf("https://www.r-project.org/")
}

20
man/chrome_read_html.Rd

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read-html.r
\name{chrome_read_html}
\alias{chrome_read_html}
\title{Read a URL via headless Chrome and return the renderd \code{<body>} \code{innerHTML} DOM elements}
\usage{
chrome_read_html(url)
}
\arguments{
\item{url}{URL to read from}
}
\description{
Read a URL via headless Chrome and return the renderd \code{<body>} \code{innerHTML} DOM elements
}
\note{
This only grabs the \code{<body>} \code{innerHTML} contents
}
\examples{
chrome_read_html("https://www.r-project.org/")
}

29
man/chrome_shot.Rd

@ -0,0 +1,29 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read-html.r
\name{chrome_shot}
\alias{chrome_shot}
\title{Capture a screenshot}
\usage{
chrome_shot(url, width = NULL, height = NULL)
}
\arguments{
\item{url}{URL to read from}
\item{width, height}{screen size to emulate}
}
\value{
\code{magick}
}
\description{
For the moment, the capture file is in the current working directory and named
\code{screenshot.png}. This will change, soon.
}
\details{
A \code{magick} image object is returned.
}
\note{
this is a quick version of the function and will overwrite \code{screenshot.png} if it exists in CWD
}
\examples{
chrome_shot("https://www.r-project.org/logo/Rlogo.svg")
}

11
man/chrome_version.Rd

@ -0,0 +1,11 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/version.r
\name{chrome_version}
\alias{chrome_version}
\title{Get Chrome version}
\usage{
chrome_version(x)
}
\description{
Get Chrome version
}

32
man/decapitated.Rd

@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/decapitated-package.R
\docType{package}
\name{decapitated}
\alias{decapitated}
\alias{decapitated-package}
\title{Headless 'Chrome' Orchestration}
\description{
The 'Chrome' browser \url{https://www.google.com/chrome/} has a headless mode
which can be instrumented programmatically. Tools are provided to perform headless
'Chrome' instrumentation on the command-line and will eventually provide support
for the 'DevTools' instrumentation 'API' or the forthcoming 'phantomjs'-like higher-level
'API' being promised by the development team.
}
\section{Important}{
This pkg will eventually do much under the covers to find the location of the Chrome binary
on all operating systems. For now, you'll need to set an envrionment variable \code{HEADLESS_CHROME} to one of these two values:
\itemize{
\item Windows: \code{C:\Program Files\Google\Chrome\Application\chrome.exe}
\item macOS: \code{/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome}
}
Linux folks will know where their binary is (many of you use non-default locations for things).
Use \code{~/.Renviron} to store this value for the time being.
}
\author{
Bob Rudis (bob@rud.is)
}

BIN
output.pdf

Binary file not shown.

BIN
screenshot.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 233 KiB

2
tests/test-all.R

@ -0,0 +1,2 @@
library(testthat)
test_check("decapitated")

6
tests/testthat/test-decapitated.R

@ -0,0 +1,6 @@
context("basic functionality")
test_that("we can do something", {
#expect_that(some_function(), is_a("data.frame"))
})
Loading…
Cancel
Save