From f5ab84dc23a54ed3df62e3bafbef457c445baec8 Mon Sep 17 00:00:00 2001 From: hrbrmstr Date: Mon, 13 Aug 2018 16:36:28 -0400 Subject: [PATCH] basic auth to api --- DESCRIPTION | 4 ++-- NEWS.md | 4 ++++ R/dsl.r | 48 ------------------------------------------------ R/execute.r | 9 ++++++++- R/render-har.r | 9 ++++++++- R/render-html.r | 9 ++++++++- R/render-jpg.r | 9 ++++++++- R/render-png.r | 44 ++++++++++++++++++++++++++------------------ R/splashr.r | 29 ++++++++++++++++++++++------- R/user-agents.R | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ man/render_jpeg.Rd | 9 ++++++--- man/render_json.Rd | 9 ++++++--- man/render_png.Rd | 10 ++++++---- man/splash.Rd | 7 +++++-- man/splash_user_agent.Rd | 2 +- 15 files changed, 158 insertions(+), 92 deletions(-) create mode 100644 R/user-agents.R diff --git a/DESCRIPTION b/DESCRIPTION index 0ecf6a2..5294c07 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: splashr Type: Package Title: Tools to Work with the 'Splash' 'JavaScript' Rendering and Scraping Service -Version: 0.4.1 -Date: 2018-01-16 +Version: 0.5.0 +Date: 2018-08-13 Encoding: UTF-8 Authors@R: c( person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index df44147..7996235 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +0.5.0 + +* support Splash API basic auth + 0.4.1 * removed clipr usage due to CRAN diff --git a/R/dsl.r b/R/dsl.r index 9280627..d8cbd33 100644 --- a/R/dsl.r +++ b/R/dsl.r @@ -418,51 +418,3 @@ splash_user_agent <- function(splash_obj, user_agent=ua_splashr) { splash_obj$calls <- c(splash_obj$calls, sprintf('splash:set_user_agent("%s")', user_agent)) splash_obj } - -#' @rdname splash_user_agent -#' @export -ua_splashr <- sprintf("splashr/%s", packageVersion("splashr")) - -#' @rdname splash_user_agent -#' @export -ua_win10_chrome <- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" - -#' @rdname splash_user_agent -#' @export -ua_win10_firefox <- "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0" - -#' @rdname splash_user_agent -#' @export -ua_win10_ie11 <- "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko" - -#' @rdname splash_user_agent -#' @export -ua_win7_chrome <- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" - -#' @rdname splash_user_agent -#' @export -ua_win7_firefox <- "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0" - -#' @rdname splash_user_agent -#' @export -ua_win7_ie11 <- "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko" - -#' @rdname splash_user_agent -#' @export -ua_macos_chrome <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36" - -#' @rdname splash_user_agent -#' @export -ua_macos_safari <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12" - -#' @rdname splash_user_agent -#' @export -ua_linux_chrome <- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" - -#' @rdname splash_user_agent -#' @export -ua_linux_firefox <- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0" - -#' @rdname splash_user_agent -#' @export -ua_ios_safari <- "Mozilla/5.0 (iPad; CPU OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1" diff --git a/R/execute.r b/R/execute.r index 60d6f8f..ca6bcb2 100644 --- a/R/execute.r +++ b/R/execute.r @@ -44,7 +44,14 @@ execute_lua <- function(splash_obj, lua_source, timeout=30, allowed_domains, if (!missing(save_args)) params$save_args <- save_args if (!missing(load_args)) params$load_args <- load_args - res <- httr::GET(splash_url(splash_obj), path="execute", encode="json", query=params) + if (is.null(splash_obj$user)) { + res <- httr::GET(splash_url(splash_obj), path="execute", encode="json", query=params) + } else { + res <- httr::GET( + splash_url(splash_obj), path="execute", encode="json", query=params, + httr::authenticate(splash_obj$user, splash_obj$pass) + ) + } httr::stop_for_status(res) diff --git a/R/render-har.r b/R/render-har.r index d47a3f8..eac1693 100644 --- a/R/render-har.r +++ b/R/render-har.r @@ -36,7 +36,14 @@ render_har <- function(splash_obj = splash_local, url, base_url, response_body=F if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args) if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args) - res <- httr::GET(splash_url(splash_obj), path="render.har", encode="json", query=params) + if (is.null(splash_obj$user)) { + res <- httr::GET(splash_url(splash_obj), path="render.har", encode="json", query=params) + } else { + res <- httr::GET( + splash_url(splash_obj), path="render.html", encode="json", query=params, + httr::authenticate(splash_obj$user, splash_obj$pass) + ) + } httr::stop_for_status(res) diff --git a/R/render-html.r b/R/render-html.r index 21ee7c7..222d82b 100644 --- a/R/render-html.r +++ b/R/render-html.r @@ -65,7 +65,14 @@ render_html <- function(splash_obj = splash_local, url, base_url, timeout=30, re if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args) if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args) - res <- httr::GET(splash_url(splash_obj), path="render.html", encode="json", query=params) + if (is.null(splash_obj$user)) { + res <- httr::GET(splash_url(splash_obj), path="render.html", encode="json", query=params) + } else { + res <- httr::GET( + splash_url(splash_obj), path="render.html", encode="json", query=params, + httr::authenticate(splash_obj$user, splash_obj$pass) + ) + } httr::stop_for_status(res) diff --git a/R/render-jpg.r b/R/render-jpg.r index 9bbd7c1..cd73a12 100644 --- a/R/render-jpg.r +++ b/R/render-jpg.r @@ -42,7 +42,14 @@ render_jpeg <- render_jpg <- function( if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args) if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args) - res <- httr::GET(splash_url(splash_obj), path="render.jpeg", encode="json", query=params) + if (is.null(splash_obj$user)) { + res <- httr::GET(splash_url(splash_obj), path="render.jpeg", encode="json", query=params) + } else { + res <- httr::GET( + splash_url(splash_obj), path="render.html", encode="json", query=params, + httr::authenticate(splash_obj$user, splash_obj$pass) + ) + } httr::stop_for_status(res) diff --git a/R/render-png.r b/R/render-png.r index d264c0d..8d9af81 100644 --- a/R/render-png.r +++ b/R/render-png.r @@ -1,8 +1,10 @@ -#' Return a image (in PNG format) of the javascript-rendered page. +#' Return an image (in PNG format) of the javascript-rendered page. #' #' @md -#' @param width,height Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional -#' @param render_all If `TRUE` extend the viewport to include the whole webpage (possibly very tall) before rendering. +#' @param width,height Resize the rendered image to the given width/height (in +#' pixels) keeping the aspect ratio. These are optional +#' @param render_all If `TRUE` extend the viewport to include the whole webpage +#' (possibly very tall) before rendering. #' @family splash_renderers #' @return a [magick] image object #' @references [Splash docs](http://splash.readthedocs.io/en/stable/index.html) @@ -12,18 +14,19 @@ #' render_png(url = "https://httpbin.org/") #' } render_png <- function( - splash_obj = splash_local, url, base_url=NULL, width, height, - timeout=30, resource_timeout, wait=0, render_all=TRUE, - proxy, js, js_src, filters, allowed_domains, allowed_content_types, - forbidden_content_types, viewport="full", images, headers, body, - http_method, save_args, load_args) { - + splash_obj = splash_local, url, base_url=NULL, width, height, + timeout=30, resource_timeout, wait=0, render_all=TRUE, + proxy, js, js_src, filters, allowed_domains, allowed_content_types, + forbidden_content_types, viewport="full", images, headers, body, + http_method, save_args, load_args) { wait <- check_wait(wait) - params <- list(url=url, timeout=timeout, - wait=if (render_all & wait == 0) 0.5 else wait, - viewport=jsonlite::unbox(viewport), - render_all=as.numeric(render_all)) + params <- list( + url = url, timeout = timeout, + wait = if (render_all & wait == 0) 0.5 else wait, + viewport = jsonlite::unbox(viewport), + render_all = as.numeric(render_all) + ) if (!missing(width)) params$width <- width if (!missing(height)) params$height <- height @@ -44,11 +47,16 @@ render_png <- function( if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args) if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args) - res <- httr::GET(splash_url(splash_obj), path="render.png", encode="json", query=params) + if (is.null(splash_obj$user)) { + res <- httr::GET(splash_url(splash_obj), path="render.png", encode="json", query=params) + } else { + res <- httr::GET( + splash_url(splash_obj), path="render.html", encode="json", query=params, + httr::authenticate(splash_obj$user, splash_obj$pass) + ) + } httr::stop_for_status(res) - magick::image_read(httr::content(res, as="raw")) - -} - + magick::image_read(httr::content(res, as = "raw")) +} \ No newline at end of file diff --git a/R/splashr.r b/R/splashr.r index c451694..b17c37a 100644 --- a/R/splashr.r +++ b/R/splashr.r @@ -4,12 +4,14 @@ splash_url <- function(splash_obj) { sprintf("http://%s:%s", splash_obj$host, sp #' #' @param host host or IP address #' @param port port the server is running on (default is 8050) +#' @param user,pass leave `NULL` if basic auth is not configured. Otherwise, +#' fill in what you need for basic authentication. #' @export #' @examples \dontrun{ #' sp <- splash() #' } -splash <- function(host, port=8050L) { - list(host=host, port=port) +splash <- function(host, port=8050L, user=NULL, pass=NULL) { + list(host=host, port=port, user=user, pass=pass) } #' @rdname splash @@ -31,7 +33,12 @@ s_GET <- purrr::safely(GET) #' } splash_active <- function(splash_obj = splash_local) { - res <- s_GET(splash_url(splash_obj), path="_ping") + if (is.null(splash_obj$user)) { + res <- s_GET(splash_url(splash_obj), path="_ping") + } else { + res <- s_GET(splash_url(splash_obj), path="_ping", + httr::authenticate(splash_obj$user, splash_obj$pass)) + } if (is.null(res$result)) return(FALSE) if (httr::status_code(res$result) >=300) return(FALSE) @@ -116,10 +123,18 @@ end #' } splash_debug <- function(splash_obj = splash_local) { - httr::GET(splash_url(splash_obj), path="_debug") %>% - httr::stop_for_status() %>% - httr::content(as="text", encoding="UTF-8") %>% - jsonlite::fromJSON() -> out + if (is.null(splash_obj$user)) { + httr::GET(splash_url(splash_obj), path="_debug") %>% + httr::stop_for_status() %>% + httr::content(as="text", encoding="UTF-8") %>% + jsonlite::fromJSON() -> out + } else { + httr::GET(splash_url(splash_obj), path="_debug", + httr::authenticate(splash_obj$user, splash_obj$pass)) %>% + httr::stop_for_status() %>% + httr::content(as="text", encoding="UTF-8") %>% + jsonlite::fromJSON() -> out + } out$url <- splash_url(splash_obj) diff --git a/R/user-agents.R b/R/user-agents.R new file mode 100644 index 0000000..16a8d34 --- /dev/null +++ b/R/user-agents.R @@ -0,0 +1,48 @@ + +#' @rdname splash_user_agent +#' @export +ua_splashr <- sprintf("splashr/%s", packageVersion("splashr")) + +#' @rdname splash_user_agent +#' @export +ua_win10_chrome <- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" + +#' @rdname splash_user_agent +#' @export +ua_win10_firefox <- "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0" + +#' @rdname splash_user_agent +#' @export +ua_win10_ie11 <- "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko" + +#' @rdname splash_user_agent +#' @export +ua_win7_chrome <- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" + +#' @rdname splash_user_agent +#' @export +ua_win7_firefox <- "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0" + +#' @rdname splash_user_agent +#' @export +ua_win7_ie11 <- "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko" + +#' @rdname splash_user_agent +#' @export +ua_macos_chrome <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36" + +#' @rdname splash_user_agent +#' @export +ua_macos_safari <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12" + +#' @rdname splash_user_agent +#' @export +ua_linux_chrome <- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" + +#' @rdname splash_user_agent +#' @export +ua_linux_firefox <- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0" + +#' @rdname splash_user_agent +#' @export +ua_ios_safari <- "Mozilla/5.0 (iPad; CPU OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1" diff --git a/man/render_jpeg.Rd b/man/render_jpeg.Rd index c5b7500..b5d0b3f 100644 --- a/man/render_jpeg.Rd +++ b/man/render_jpeg.Rd @@ -19,9 +19,11 @@ render_jpeg(splash_obj = splash_local, url, base_url = NULL, quality = 75, \item{quality}{JPEG quality parameter in range from 0 to 100. Default is quality=75.} -\item{width}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional} +\item{width}{Resize the rendered image to the given width/height (in +pixels) keeping the aspect ratio. These are optional} -\item{height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional} +\item{height}{Resize the rendered image to the given width/height (in +pixels) keeping the aspect ratio. These are optional} \item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without reconfiguring the startup parameters of the Splash server (not this package) @@ -31,7 +33,8 @@ the maximum allowed value for the timeout is 60 seconds.} \item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).} -\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering.} +\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage +(possibly very tall) before rendering.} \item{proxy}{Proxy profile name or proxy URL.} diff --git a/man/render_json.Rd b/man/render_json.Rd index 5a4b7d8..ea55df2 100644 --- a/man/render_json.Rd +++ b/man/render_json.Rd @@ -21,9 +21,11 @@ render_json(splash_obj = splash_local, url, base_url = NULL, quality = 75, \item{quality}{JPEG quality parameter in range from 0 to 100. Default is quality=75.} -\item{width}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional} +\item{width}{Resize the rendered image to the given width/height (in +pixels) keeping the aspect ratio. These are optional} -\item{height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional} +\item{height}{Resize the rendered image to the given width/height (in +pixels) keeping the aspect ratio. These are optional} \item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without reconfiguring the startup parameters of the Splash server (not this package) @@ -33,7 +35,8 @@ the maximum allowed value for the timeout is 60 seconds.} \item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).} -\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering.} +\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage +(possibly very tall) before rendering.} \item{proxy}{Proxy profile name or proxy URL.} diff --git a/man/render_png.Rd b/man/render_png.Rd index e3e4408..8c0b209 100644 --- a/man/render_png.Rd +++ b/man/render_png.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/render-png.r \name{render_png} \alias{render_png} -\title{Return a image (in PNG format) of the javascript-rendered page.} +\title{Return an image (in PNG format) of the javascript-rendered page.} \usage{ render_png(splash_obj = splash_local, url, base_url = NULL, width, height, timeout = 30, resource_timeout, wait = 0, render_all = TRUE, proxy, js, @@ -17,7 +17,8 @@ render_png(splash_obj = splash_local, url, base_url = NULL, width, height, \item{base_url}{The base url to render the page with.} -\item{width, height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional} +\item{width, height}{Resize the rendered image to the given width/height (in +pixels) keeping the aspect ratio. These are optional} \item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without reconfiguring the startup parameters of the Splash server (not this package) @@ -27,7 +28,8 @@ the maximum allowed value for the timeout is 60 seconds.} \item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).} -\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering.} +\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage +(possibly very tall) before rendering.} \item{proxy}{Proxy profile name or proxy URL.} @@ -68,7 +70,7 @@ web page. Format is “x”, e.g. 800x600. Default value is "full a \link{magick} image object } \description{ -Return a image (in PNG format) of the javascript-rendered page. +Return an image (in PNG format) of the javascript-rendered page. } \examples{ \dontrun{ diff --git a/man/splash.Rd b/man/splash.Rd index 92a3a31..9c271be 100644 --- a/man/splash.Rd +++ b/man/splash.Rd @@ -5,9 +5,9 @@ \alias{splash} \alias{splash_local} \title{Configure parameters for connecting to a Splash server} -\format{An object of class \code{list} of length 2.} +\format{An object of class \code{list} of length 4.} \usage{ -splash(host, port = 8050L) +splash(host, port = 8050L, user = NULL, pass = NULL) splash_local } @@ -15,6 +15,9 @@ splash_local \item{host}{host or IP address} \item{port}{port the server is running on (default is 8050)} + +\item{user, pass}{leave `NULL` if basic auth is not configured. Otherwise, +fill in what you need for basic authentication.} } \description{ Configure parameters for connecting to a Splash server diff --git a/man/splash_user_agent.Rd b/man/splash_user_agent.Rd index ffba323..215129b 100644 --- a/man/splash_user_agent.Rd +++ b/man/splash_user_agent.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dsl.r +% Please edit documentation in R/dsl.r, R/user-agents.R \docType{data} \name{splash_user_agent} \alias{splash_user_agent}