19 changed files with 275 additions and 119 deletions
@ -1,6 +1,8 @@ |
|||
0.2.0 |
|||
* Re-design of how the Chrome binary is set |
|||
* env var functions to help with ^^ |
|||
* switch to using processx |
|||
* options for naming & placing PDF & screenshot files |
|||
|
|||
0.1.0 |
|||
* Initial release |
|||
|
@ -0,0 +1,74 @@ |
|||
#' "Print" to PDF |
|||
#' |
|||
#' @md |
|||
#' @note The default Chrome filename is `output.pdf` |
|||
#' @param url URL to read from |
|||
#' @param path path (with optional output filename) for the generated PDF. If `NULL` then |
|||
#' and `overwrite` is `FALSE`, the fuction will will ensure a uniquely-named file is |
|||
#' placed in the current working directory by incrementing trailing numbers before |
|||
#' the end of it. |
|||
#' @param overwrite overwrite existing file? Default: `TRUE` |
|||
#' @param chrome_bin the path to Chrome (auto-set from `HEADLESS_CHROME` environment variable) |
|||
#' @return output fileame (invisibly) |
|||
#' @export |
|||
#' @examples |
|||
#' chrome_dump_pdf("https://www.r-project.org/") |
|||
chrome_dump_pdf <- function(url, path=NULL, overwrite=TRUE, chrome_bin=Sys.getenv("HEADLESS_CHROME")) { |
|||
|
|||
curwd <- getwd() |
|||
on.exit(setwd(curwd), add = TRUE) |
|||
|
|||
if (is.null(path)) path <- "." |
|||
|
|||
path <- normalizePath(path.expand(path[1])) |
|||
|
|||
if (!grepl("\\.pdf$", path)) { |
|||
fil_nam <- "output.pdf" |
|||
dir_nam <- path |
|||
} else { |
|||
fil_nam <- basename(path) |
|||
dir_nam <- dirname(path) |
|||
} |
|||
|
|||
fil_ext <- tools::file_ext(fil_nam) |
|||
fil_pre <- tools::file_path_sans_ext(fil_nam) |
|||
|
|||
td <- tempdir() |
|||
|
|||
setwd(td) |
|||
|
|||
args <- c("--headless") |
|||
args <- c(args, "--disable-gpu") |
|||
args <- c(args, "--no-sandbox") |
|||
args <- c(args, "--allow-no-sandbox-job") |
|||
args <- c(args, sprintf("--user-data-dir=%s", .get_app_dir())) |
|||
args <- c(args, sprintf("--crash-dumps-dir=%s", .get_app_dir())) |
|||
args <- c(args, sprintf("--utility-allowed-dir=%s", .get_app_dir())) |
|||
args <- c(args, "--print-to-pdf", url) |
|||
|
|||
processx::run( |
|||
command = chrome_bin, |
|||
args = args, |
|||
error_on_status = FALSE, |
|||
echo_cmd = FALSE, |
|||
echo = FALSE |
|||
) -> res |
|||
|
|||
first_fil <- file.path(dir_nam, sprintf("%s.%s", fil_pre, fil_ext)) |
|||
out_fil <- first_fil |
|||
|
|||
if (!overwrite) { |
|||
|
|||
moar_fils <- sprintf(file.path(dir_nam, sprintf("%s%%04d.%s", fil_pre, fil_ext)), 0:9999) |
|||
fils <- c(first_fil, moar_fils) |
|||
|
|||
out_fil <- fils[which(!file.exists(fils))[1]] |
|||
if (is.na(out_fil)) stop("Cannot create unique filename") |
|||
|
|||
} |
|||
|
|||
file.copy("output.pdf", out_fil, overwrite = overwrite) |
|||
|
|||
return(invisible(out_fil)) |
|||
|
|||
} |
@ -0,0 +1,86 @@ |
|||
#' Capture a screenshot |
|||
#' |
|||
#' For the moment, the capture file is in the current working directory and named |
|||
#' `screenshot.png`. This will change, soon. |
|||
#' |
|||
#' A `magick` image object is returned. |
|||
#' |
|||
#' @md |
|||
#' @note The default Chrome filename is `screenshot.png` |
|||
#' @param url URL to read from |
|||
#' @param width,height screen size to emulate |
|||
#' @param path path (with optional output filename) for the generated PDF. If `NULL` then |
|||
#' and `overwrite` is `FALSE`, the fuction will will ensure a uniquely-named file is |
|||
#' placed in the current working directory by incrementing trailing numbers before |
|||
#' the end of it. |
|||
#' @param overwrite overwrite existing file? Default: `TRUE` |
|||
#' @param chrome_bin the path to Chrome (auto-set from `HEADLESS_CHROME` environment variable) |
|||
#' @return `magick` |
|||
#' @export |
|||
#' @examples |
|||
#' chrome_shot("https://www.r-project.org/logo/Rlogo.svg") |
|||
chrome_shot <- function(url, width=NULL, height=NULL, path=NULL, overwrite=TRUE, |
|||
chrome_bin=Sys.getenv("HEADLESS_CHROME")) { |
|||
|
|||
curwd <- getwd() |
|||
on.exit(setwd(curwd), add = TRUE) |
|||
|
|||
if (is.null(path)) path <- "." |
|||
|
|||
path <- normalizePath(path.expand(path[1])) |
|||
|
|||
if (!grepl("\\.pdf$", path)) { |
|||
fil_nam <- "screenshot.png" |
|||
dir_nam <- path |
|||
} else { |
|||
fil_nam <- basename(path) |
|||
dir_nam <- dirname(path) |
|||
} |
|||
|
|||
fil_ext <- tools::file_ext(fil_nam) |
|||
fil_pre <- tools::file_path_sans_ext(fil_nam) |
|||
|
|||
td <- tempdir() |
|||
|
|||
setwd(td) |
|||
|
|||
args <- c("--headless") |
|||
args <- c(args, "--disable-gpu") |
|||
args <- c(args, "--no-sandbox") |
|||
args <- c(args, "--allow-no-sandbox-job") |
|||
args <- c(args, sprintf("--user-data-dir=%s", .get_app_dir())) |
|||
args <- c(args, sprintf("--crash-dumps-dir=%s", .get_app_dir())) |
|||
args <- c(args, sprintf("--utility-allowed-dir=%s", .get_app_dir())) |
|||
args <- c(args, "--screenshot", url) |
|||
|
|||
if (!is.null(width) & !is.null(height)) { |
|||
args <- c(args, sprintf("--window-size=%s,%s", height, width)) |
|||
} |
|||
|
|||
processx::run( |
|||
command = chrome_bin, |
|||
args = args, |
|||
error_on_status = FALSE, |
|||
echo_cmd = FALSE, |
|||
echo = FALSE |
|||
) -> res |
|||
|
|||
|
|||
first_fil <- file.path(dir_nam, sprintf("%s.%s", fil_pre, fil_ext)) |
|||
out_fil <- first_fil |
|||
|
|||
if (!overwrite) { |
|||
|
|||
moar_fils <- sprintf(file.path(dir_nam, sprintf("%s%%04d.%s", fil_pre, fil_ext)), 0:9999) |
|||
fils <- c(first_fil, moar_fils) |
|||
|
|||
out_fil <- fils[which(!file.exists(fils))[1]] |
|||
if (is.na(out_fil)) stop("Cannot create unique filename") |
|||
|
|||
} |
|||
|
|||
file.copy("screenshot.png", out_fil, overwrite = overwrite) |
|||
|
|||
if (file.exists(out_fil)) magick::image_read(out_fil) |
|||
|
|||
} |
@ -1,62 +1,32 @@ |
|||
#' Read a URL via headless Chrome and return the renderd `<body>` `innerHTML` DOM elements |
|||
#' Read a URL via headless Chrome and return the raw or rendered `<body>` `innerHTML` DOM elements |
|||
#' |
|||
#' @md |
|||
#' @note This only grabs the `<body>` `innerHTML` contents |
|||
#' @param url URL to read from |
|||
#' @param render if `TRUE` then return an `xml_document`, else the raw HTML (invisibly) |
|||
#' @param chrome_bin the path to Chrome (auto-set from `HEADLESS_CHROME` environment variable) |
|||
#' @export |
|||
#' @examples |
|||
#' chrome_read_html("https://www.r-project.org/") |
|||
chrome_read_html <- function(url, chrome_bin=Sys.getenv("HEADLESS_CHROME")) { |
|||
url <- shQuote(url) |
|||
tmp <- system2(chrome_bin, c("--headless", "--no-sandbox", "--disable-gpu", "--dump-dom", url), stdout=TRUE) |
|||
xml2::read_html(paste0(tmp, collapse="\n")) |
|||
} |
|||
|
|||
#' "Print" to PDF |
|||
#' |
|||
#' @md |
|||
#' @note this is a quick version of the function and will overwrite `output.pdf` if it exists in CWD |
|||
#' @param url URL to read from |
|||
#' @param chrome_bin the path to Chrome (auto-set from `HEADLESS_CHROME` environment variable) |
|||
#' @export |
|||
#' @examples |
|||
#' chrome_dump_pdf("https://www.r-project.org/") |
|||
chrome_dump_pdf <- function(url, chrome_bin=Sys.getenv("HEADLESS_CHROME")) { |
|||
url <- shQuote(url) |
|||
tmp <- system2(chrome_bin, c("--headless", "--no-sandbox", "--disable-gpu", "--print-to-pdf", url)) |
|||
} |
|||
|
|||
#' Capture a screenshot |
|||
#' |
|||
#' For the moment, the capture file is in the current working directory and named |
|||
#' `screenshot.png`. This will change, soon. |
|||
#' |
|||
#' A `magick` image object is returned. |
|||
#' |
|||
#' @md |
|||
#' @note this is a quick version of the function and will overwrite `screenshot.png` if it exists in CWD |
|||
#' @param url URL to read from |
|||
#' @param width,height screen size to emulate |
|||
#' @param chrome_bin the path to Chrome (auto-set from `HEADLESS_CHROME` environment variable) |
|||
#' @return `magick` |
|||
#' @export |
|||
#' @examples |
|||
#' chrome_shot("https://www.r-project.org/logo/Rlogo.svg") |
|||
chrome_shot <- function(url, width=NULL, height=NULL, chrome_bin=Sys.getenv("HEADLESS_CHROME")) { |
|||
|
|||
args <- c("--headless", "--no-sandbox", "--disable-gpu", "--screenshot") |
|||
|
|||
url <- shQuote(url) |
|||
|
|||
if (!is.null(width) & !is.null(height)) { |
|||
args <- c(args, sprintf("--window-size=%s,%s", height, width)) |
|||
} |
|||
|
|||
args <- c(args, url) |
|||
|
|||
tmp <- system2(chrome_bin, args) |
|||
|
|||
magick::image_read("screenshot.png") |
|||
chrome_read_html <- function(url, render=TRUE, chrome_bin=Sys.getenv("HEADLESS_CHROME")) { |
|||
|
|||
args <- c("--headless") |
|||
args <- c(args, "--disable-gpu") |
|||
args <- c(args, "--no-sandbox") |
|||
args <- c(args, "--allow-no-sandbox-job") |
|||
args <- c(args, sprintf("--user-data-dir=%s", .get_app_dir())) |
|||
args <- c(args, sprintf("--crash-dumps-dir=%s", .get_app_dir())) |
|||
args <- c(args, sprintf("--utility-allowed-dir=%s", .get_app_dir())) |
|||
args <- c(args, "--dump-dom", url) |
|||
|
|||
processx::run( |
|||
command = chrome_bin, |
|||
args = args, |
|||
error_on_status = FALSE, |
|||
echo_cmd = FALSE, |
|||
echo = FALSE |
|||
) -> res |
|||
|
|||
if (render) xml2::read_html(res$stdout) else return(invisible(res$stdout)) |
|||
|
|||
} |
|||
|
@ -0,0 +1,8 @@ |
|||
.get_app_dir <- function() { |
|||
ddir <- file.path(Sys.getenv("HOME"), ".rdecapdata") |
|||
if (!dir.exists(ddir)) { |
|||
message(sprintf("Creating application data directory [%s]...", ddir)) |
|||
dir.create(ddir, recursive=TRUE) |
|||
} |
|||
return(ddir) |
|||
} |
Binary file not shown.
Before Width: | Height: | Size: 233 KiB After Width: | Height: | Size: 211 KiB |
@ -1,2 +0,0 @@ |
|||
library(testthat) |
|||
test_check("decapitated") |
@ -1,6 +0,0 @@ |
|||
context("basic functionality") |
|||
test_that("we can do something", { |
|||
|
|||
#expect_that(some_function(), is_a("data.frame")) |
|||
|
|||
}) |
Loading…
Reference in new issue