added Sections to communicate working directory changes

6 years ago · ddb077d61d
9 changed files with 172 additions and 34 deletions
--- a/R/chrome-pdf.r
+++ b/R/chrome-pdf.r
@ -1,5 +1,20 @@
 #' "Print" to PDF
 #'
+#' @section Working around headless Chrome & OS security restrictions:
+#' Security restrictions on various operating systems and OS configurations can cause
+#' headless Chrome execution to fail. As a result, headless Chrome operations should
+#' use a special directory for `decapitated` package operations. You can pass this
+#' in as `work_dir`. If `work_dir` is `NULL` a `.rdecapdata` directory will be
+#' created in your home directory and used for the data, crash dumps and utility
+#' directories for Chrome operations.\cr
+#' \cr
+#' `tempdir()` does not always meet these requirements (after testing on various
+#' macOS 10.13 systems) as Chrome does some interesting attribute setting for
+#' some of its file operations.
+#' \cr
+#' If you pass in a `work_dir`, it must be one that does not violate OS security
+#' restrictions or headless Chrome will not function.
+#'
 #' @md
 #' @note The default Chrome filename is `output.pdf`
 #' @param url URL to read from
@ -12,12 +27,14 @@
 #'        headless Chrome cache. This seems to be necessary primarily on recent versions of macOS.
 #'        If numeric, that number of "prime" requests will be sent ahead of the capture request.
 #'        If `FALSE` no priming requests will be sent.
+#' @param work_dir See special Section.
 #' @param chrome_bin the path to Chrome (auto-set from `HEADLESS_CHROME` environment variable)
 #' @return output fileame (invisibly)
 #' @export
 #' @examples
 #' chrome_dump_pdf("https://www.r-project.org/")
-chrome_dump_pdf <- function(url, path=NULL, overwrite=TRUE, chrome_bin=Sys.getenv("HEADLESS_CHROME")) {
+chrome_dump_pdf <- function(url, path=NULL, overwrite=TRUE, work_dir = NULL,
+                             chrome_bin=Sys.getenv("HEADLESS_CHROME")) {

  curwd <- getwd()
  on.exit(setwd(curwd), add = TRUE)
@ -41,19 +58,20 @@ chrome_dump_pdf <- function(url, path=NULL, overwrite=TRUE, chrome_bin=Sys.geten

  setwd(td)

+  work_dir <- if (is.null(work_dir)) .get_app_dir() else work_dir
+
  args <- c("--headless")
  args <- c(args, "--disable-gpu")
  args <- c(args, "--no-sandbox")
  args <- c(args, "--allow-no-sandbox-job")
-  args <- c(args, sprintf("--user-data-dir=%s", .get_app_dir()))
-  args <- c(args, sprintf("--crash-dumps-dir=%s", .get_app_dir()))
-  args <- c(args, sprintf("--utility-allowed-dir=%s", .get_app_dir()))
+  args <- c(args, sprintf("--user-data-dir=%s", work_dir))
+  args <- c(args, sprintf("--crash-dumps-dir=%s", work_dir))
+  args <- c(args, sprintf("--utility-allowed-dir=%s", work_dir))
  args <- c(args, "--print-to-pdf", url)

  vers <- chrome_version(quiet=TRUE)

-  if (is.logical(prime) & prime) .prime_url(url, 1, chrome_bin)
-  if (is.numeric(prime) & (prime>0)) .prime_url(url, prime, chrome_bin)
+  .prime_url(url, as.numeric(prime), work_dir, chrome_bin)

  processx::run(
    command = chrome_bin,
--- a/R/chrome-shot.r
+++ b/R/chrome-shot.r
@ -1,10 +1,22 @@
 #' Capture a screenshot
 #'
-#' For the moment, the capture file is in the current working directory and named
-#' `screenshot.png`. This will change, soon.
-#'
 #' A `magick` image object is returned.
 #'
+#' @section Working around headless Chrome & OS security restrictions:
+#' Security restrictions on various operating systems and OS configurations can cause
+#' headless Chrome execution to fail. As a result, headless Chrome operations should
+#' use a special directory for `decapitated` package operations. You can pass this
+#' in as `work_dir`. If `work_dir` is `NULL` a `.rdecapdata` directory will be
+#' created in your home directory and used for the data, crash dumps and utility
+#' directories for Chrome operations.\cr
+#' \cr
+#' `tempdir()` does not always meet these requirements (after testing on various
+#' macOS 10.13 systems) as Chrome does some interesting attribute setting for
+#' some of its file operations.
+#' \cr
+#' If you pass in a `work_dir`, it must be one that does not violate OS security
+#' restrictions or headless Chrome will not function.
+#'
 #' @md
 #' @note The default Chrome filename is `screenshot.png`
 #' @param url URL to read from
@ -18,13 +30,14 @@
 #'        headless Chrome cache. This seems to be necessary primarily on recent versions of macOS.
 #'        If numeric, that number of "prime" requests will be sent ahead of the capture request.
 #'        If `FALSE` no priming requests will be sent.
+#' @param work_dir See special Section.
 #' @param chrome_bin the path to Chrome (auto-set from `HEADLESS_CHROME` environment variable)
 #' @return `magick`
 #' @export
 #' @examples
 #' chrome_shot("https://www.r-project.org/logo/Rlogo.svg")
-chrome_shot <- function(url, width=NULL, height=NULL, path=NULL, overwrite=TRUE,
-                        chrome_bin=Sys.getenv("HEADLESS_CHROME")) {
+chrome_shot <- function(url, width=NULL, height=NULL, path=NULL, work_dir = NULL,
+                             chrome_bin=Sys.getenv("HEADLESS_CHROME")) {

  curwd <- getwd()
  on.exit(setwd(curwd), add = TRUE)
@ -48,13 +61,15 @@ chrome_shot <- function(url, width=NULL, height=NULL, path=NULL, overwrite=TRUE,

  setwd(td)

+  work_dir <- if (is.null(work_dir)) .get_app_dir() else work_dir
+
  args <- c("--headless")
  args <- c(args, "--disable-gpu")
  args <- c(args, "--no-sandbox")
  args <- c(args, "--allow-no-sandbox-job")
-  args <- c(args, sprintf("--user-data-dir=%s", .get_app_dir()))
-  args <- c(args, sprintf("--crash-dumps-dir=%s", .get_app_dir()))
-  args <- c(args, sprintf("--utility-allowed-dir=%s", .get_app_dir()))
+  args <- c(args, sprintf("--user-data-dir=%s", work_dir))
+  args <- c(args, sprintf("--crash-dumps-dir=%s", work_dir))
+  args <- c(args, sprintf("--utility-allowed-dir=%s", work_dir))
  args <- c(args, "--screenshot", url)

  if (!is.null(width) & !is.null(height)) {
@ -63,8 +78,7 @@ chrome_shot <- function(url, width=NULL, height=NULL, path=NULL, overwrite=TRUE,

  vers <- chrome_version(quiet=TRUE)

-  if (is.logical(prime) & prime) .prime_url(url, 1, chrome_bin)
-  if (is.numeric(prime) & (prime>0)) .prime_url(url, prime, chrome_bin)
+  .prime_url(url, as.numeric(prime), work_dir, chrome_bin)

  processx::run(
    command = chrome_bin,
--- a/R/decapitated-package.R
+++ b/R/decapitated-package.R
@ -5,6 +5,21 @@
 #' Chrome' instrumentation on the command-line, including retrieving the javascript-executed
 #' web page, PDF output or screen shot of a URL.
 #'
+#' @section Working around headless Chrome & OS security restrictions:
+#' Security restrictions on various operating systems and OS configurations can cause
+#' headless Chrome execution to fail. As a result, headless Chrome operations should
+#' use a special directory for `decapitated` package operations. You can pass this
+#' in as `work_dir`. If `work_dir` is `NULL` a `.rdecapdata` directory will be
+#' created in your home directory and used for the data, crash dumps and utility
+#' directories for Chrome operations.\cr
+#' \cr
+#' `tempdir()` does not always meet these requirements (after testing on various
+#' macOS 10.13 systems) as Chrome does some interesting attribute setting for
+#' some of its file operations.
+#' \cr
+#' If you pass in a `work_dir`, it must be one that does not violate OS security
+#' restrictions or headless Chrome will not function.
+#'
 #' @section Important:
 #'
 #' You'll need to set an envrionment variable `HEADLESS_CHROME` to one of these two values:
--- a/R/prime-url.r
+++ b/R/prime-url.r
@ -1,12 +1,15 @@
-.prime_url <- function(url, prime_ct = 1, chrome_bin=Sys.getenv("HEADLESS_CHROME")) {
+.prime_url <- function(url, prime_ct = 1, work_dir=NULL,
+                       chrome_bin=Sys.getenv("HEADLESS_CHROME")) {
+
+  work_dir <- if (is.null(work_dir)) .get_app_dir() else work_dir

  args <- c("--headless")
  args <- c(args, "--disable-gpu")
  args <- c(args, "--no-sandbox")
  args <- c(args, "--allow-no-sandbox-job")
-  args <- c(args, sprintf("--user-data-dir=%s", .get_app_dir()))
-  args <- c(args, sprintf("--crash-dumps-dir=%s", .get_app_dir()))
-  args <- c(args, sprintf("--utility-allowed-dir=%s", .get_app_dir()))
+  args <- c(args, sprintf("--user-data-dir=%s", work_dir))
+  args <- c(args, sprintf("--crash-dumps-dir=%s", work_dir))
+  args <- c(args, sprintf("--utility-allowed-dir=%s", work_dir))
  args <- c(args, "--dump-dom", url)

  for (i in 1:prime_ct) {
--- a/R/read-html.r
+++ b/R/read-html.r
@ -1,5 +1,20 @@
 #' Read a URL via headless Chrome and return the raw or rendered `<body>` `innerHTML` DOM elements
 #'
+#' @section Working around headless Chrome & OS security restrictions:
+#' Security restrictions on various operating systems and OS configurations can cause
+#' headless Chrome execution to fail. As a result, headless Chrome operations should
+#' use a special directory for `decapitated` package operations. You can pass this
+#' in as `work_dir`. If `work_dir` is `NULL` a `.rdecapdata` directory will be
+#' created in your home directory and used for the data, crash dumps and utility
+#' directories for Chrome operations.\cr
+#' \cr
+#' `tempdir()` does not always meet these requirements (after testing on various
+#' macOS 10.13 systems) as Chrome does some interesting attribute setting for
+#' some of its file operations.
+#' \cr
+#' If you pass in a `work_dir`, it must be one that does not violate OS security
+#' restrictions or headless Chrome will not function.
+#'
 #' @md
 #' @note This only grabs the `<body>` `innerHTML` contents
 #' @param url URL to read from
@ -8,25 +23,28 @@
 #'        headless Chrome cache. This seems to be necessary primarily on recent versions of macOS.
 #'        If numeric, that number of "prime" requests will be sent ahead of the capture request.
 #'        If `FALSE` no priming requests will be sent.
+#' @param work_dir See special Section.
 #' @param chrome_bin the path to Chrome (auto-set from `HEADLESS_CHROME` environment variable)
 #' @export
 #' @examples
 #' chrome_read_html("https://www.r-project.org/")
-chrome_read_html <- function(url, render=TRUE, prime=TRUE, chrome_bin=Sys.getenv("HEADLESS_CHROME")) {
+chrome_read_html <- function(url, render=TRUE, prime=TRUE, work_dir = NULL,
+                             chrome_bin=Sys.getenv("HEADLESS_CHROME")) {
+
+  work_dir <- if (is.null(work_dir)) .get_app_dir() else work_dir

  args <- c("--headless")
  args <- c(args, "--disable-gpu")
  args <- c(args, "--no-sandbox")
  args <- c(args, "--allow-no-sandbox-job")
-  args <- c(args, sprintf("--user-data-dir=%s", .get_app_dir()))
-  args <- c(args, sprintf("--crash-dumps-dir=%s", .get_app_dir()))
-  args <- c(args, sprintf("--utility-allowed-dir=%s", .get_app_dir()))
+  args <- c(args, sprintf("--user-data-dir=%s", work_dir))
+  args <- c(args, sprintf("--crash-dumps-dir=%s", work_dir))
+  args <- c(args, sprintf("--utility-allowed-dir=%s", work_dir))
  args <- c(args, "--dump-dom", url)

  vers <- chrome_version(quiet=TRUE)

-  if (is.logical(prime) & prime) .prime_url(url, 1, chrome_bin)
-  if (is.numeric(prime) & (prime>0)) .prime_url(url, prime, chrome_bin)
+  .prime_url(url, as.numeric(prime), work_dir, chrome_bin)

  processx::run(
    command = chrome_bin,
--- a/man/chrome_dump_pdf.Rd
+++ b/man/chrome_dump_pdf.Rd
@ -4,7 +4,7 @@
 \alias{chrome_dump_pdf}
 \title{"Print" to PDF}
 \usage{
-chrome_dump_pdf(url, path = NULL, overwrite = TRUE,
+chrome_dump_pdf(url, path = NULL, overwrite = TRUE, work_dir = NULL,
  chrome_bin = Sys.getenv("HEADLESS_CHROME"))
 }
 \arguments{
@ -17,6 +17,8 @@ the end of it.}

 \item{overwrite}{overwrite existing file? Default: \code{TRUE}}

+\item{work_dir}{See special Section.}
+
 \item{chrome_bin}{the path to Chrome (auto-set from \code{HEADLESS_CHROME} environment variable)}

 \item{prime}{if \code{TRUE} preliminary URL retrieval requests will be sent to "prime" the
@ -33,6 +35,23 @@ output fileame (invisibly)
 \note{
 The default Chrome filename is \code{output.pdf}
 }
+\section{Working around headless Chrome & OS security restrictions}{
+
+Security restrictions on various operating systems and OS configurations can cause
+headless Chrome execution to fail. As a result, headless Chrome operations should
+use a special directory for \code{decapitated} package operations. You can pass this
+in as \code{work_dir}. If \code{work_dir} is \code{NULL} a \code{.rdecapdata} directory will be
+created in your home directory and used for the data, crash dumps and utility
+directories for Chrome operations.\cr
+\cr
+\code{tempdir()} does not always meet these requirements (after testing on various
+macOS 10.13 systems) as Chrome does some interesting attribute setting for
+some of its file operations.
+\cr
+If you pass in a \code{work_dir}, it must be one that does not violate OS security
+restrictions or headless Chrome will not function.
+}
+
 \examples{
 chrome_dump_pdf("https://www.r-project.org/")
 }
--- a/man/chrome_read_html.Rd
+++ b/man/chrome_read_html.Rd
@ -4,7 +4,7 @@
 \alias{chrome_read_html}
 \title{Read a URL via headless Chrome and return the raw or rendered \code{<body>} \code{innerHTML} DOM elements}
 \usage{
-chrome_read_html(url, render = TRUE, prime = TRUE,
+chrome_read_html(url, render = TRUE, prime = TRUE, work_dir = NULL,
  chrome_bin = Sys.getenv("HEADLESS_CHROME"))
 }
 \arguments{
@ -17,6 +17,8 @@ headless Chrome cache. This seems to be necessary primarily on recent versions o
 If numeric, that number of "prime" requests will be sent ahead of the capture request.
 If \code{FALSE} no priming requests will be sent.}

+\item{work_dir}{See special Section.}
+
 \item{chrome_bin}{the path to Chrome (auto-set from \code{HEADLESS_CHROME} environment variable)}
 }
 \description{
@ -25,6 +27,23 @@ Read a URL via headless Chrome and return the raw or rendered \code{<body>} \cod
 \note{
 This only grabs the \code{<body>} \code{innerHTML} contents
 }
+\section{Working around headless Chrome & OS security restrictions}{
+
+Security restrictions on various operating systems and OS configurations can cause
+headless Chrome execution to fail. As a result, headless Chrome operations should
+use a special directory for \code{decapitated} package operations. You can pass this
+in as \code{work_dir}. If \code{work_dir} is \code{NULL} a \code{.rdecapdata} directory will be
+created in your home directory and used for the data, crash dumps and utility
+directories for Chrome operations.\cr
+\cr
+\code{tempdir()} does not always meet these requirements (after testing on various
+macOS 10.13 systems) as Chrome does some interesting attribute setting for
+some of its file operations.
+\cr
+If you pass in a \code{work_dir}, it must be one that does not violate OS security
+restrictions or headless Chrome will not function.
+}
+
 \examples{
 chrome_read_html("https://www.r-project.org/")
 }
--- a/man/chrome_shot.Rd
+++ b/man/chrome_shot.Rd
@ -5,7 +5,7 @@
 \title{Capture a screenshot}
 \usage{
 chrome_shot(url, width = NULL, height = NULL, path = NULL,
-  overwrite = TRUE, chrome_bin = Sys.getenv("HEADLESS_CHROME"))
+  work_dir = NULL, chrome_bin = Sys.getenv("HEADLESS_CHROME"))
 }
 \arguments{
 \item{url}{URL to read from}
@ -17,10 +17,12 @@ and \code{overwrite} is \code{FALSE}, the fuction will will ensure a uniquely-na
 placed in the current working directory by incrementing trailing numbers before
 the end of it.}

-\item{overwrite}{overwrite existing file? Default: \code{TRUE}}
+\item{work_dir}{See special Section.}

 \item{chrome_bin}{the path to Chrome (auto-set from \code{HEADLESS_CHROME} environment variable)}

+\item{overwrite}{overwrite existing file? Default: \code{TRUE}}
+
 \item{prime}{if \code{TRUE} preliminary URL retrieval requests will be sent to "prime" the
 headless Chrome cache. This seems to be necessary primarily on recent versions of macOS.
 If numeric, that number of "prime" requests will be sent ahead of the capture request.
@ -30,15 +32,28 @@ If \code{FALSE} no priming requests will be sent.}
 \code{magick}
 }
 \description{
-For the moment, the capture file is in the current working directory and named
-\code{screenshot.png}. This will change, soon.
-}
-\details{
 A \code{magick} image object is returned.
 }
 \note{
 The default Chrome filename is \code{screenshot.png}
 }
+\section{Working around headless Chrome & OS security restrictions}{
+
+Security restrictions on various operating systems and OS configurations can cause
+headless Chrome execution to fail. As a result, headless Chrome operations should
+use a special directory for \code{decapitated} package operations. You can pass this
+in as \code{work_dir}. If \code{work_dir} is \code{NULL} a \code{.rdecapdata} directory will be
+created in your home directory and used for the data, crash dumps and utility
+directories for Chrome operations.\cr
+\cr
+\code{tempdir()} does not always meet these requirements (after testing on various
+macOS 10.13 systems) as Chrome does some interesting attribute setting for
+some of its file operations.
+\cr
+If you pass in a \code{work_dir}, it must be one that does not violate OS security
+restrictions or headless Chrome will not function.
+}
+
 \examples{
 chrome_shot("https://www.r-project.org/logo/Rlogo.svg")
 }
--- a/man/decapitated.Rd
+++ b/man/decapitated.Rd
@ -11,6 +11,23 @@ which can be instrumented programmatically. Tools are provided to perform headle
 Chrome' instrumentation on the command-line, including retrieving the javascript-executed
 web page, PDF output or screen shot of a URL.
 }
+\section{Working around headless Chrome & OS security restrictions}{
+
+Security restrictions on various operating systems and OS configurations can cause
+headless Chrome execution to fail. As a result, headless Chrome operations should
+use a special directory for \code{decapitated} package operations. You can pass this
+in as \code{work_dir}. If \code{work_dir} is \code{NULL} a \code{.rdecapdata} directory will be
+created in your home directory and used for the data, crash dumps and utility
+directories for Chrome operations.\cr
+\cr
+\code{tempdir()} does not always meet these requirements (after testing on various
+macOS 10.13 systems) as Chrome does some interesting attribute setting for
+some of its file operations.
+\cr
+If you pass in a \code{work_dir}, it must be one that does not violate OS security
+restrictions or headless Chrome will not function.
+}
+
 \section{Important}{