Browse Source

basic auth to api

master
boB Rudis 6 years ago
parent
commit
f5ab84dc23
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 4
      DESCRIPTION
  2. 4
      NEWS.md
  3. 48
      R/dsl.r
  4. 9
      R/execute.r
  5. 9
      R/render-har.r
  6. 9
      R/render-html.r
  7. 9
      R/render-jpg.r
  8. 44
      R/render-png.r
  9. 29
      R/splashr.r
  10. 48
      R/user-agents.R
  11. 9
      man/render_jpeg.Rd
  12. 9
      man/render_json.Rd
  13. 10
      man/render_png.Rd
  14. 7
      man/splash.Rd
  15. 2
      man/splash_user_agent.Rd

4
DESCRIPTION

@ -1,8 +1,8 @@
Package: splashr Package: splashr
Type: Package Type: Package
Title: Tools to Work with the 'Splash' 'JavaScript' Rendering and Scraping Service Title: Tools to Work with the 'Splash' 'JavaScript' Rendering and Scraping Service
Version: 0.4.1 Version: 0.5.0
Date: 2018-01-16 Date: 2018-08-13
Encoding: UTF-8 Encoding: UTF-8
Authors@R: c( Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),

4
NEWS.md

@ -1,3 +1,7 @@
0.5.0
* support Splash API basic auth
0.4.1 0.4.1
* removed clipr usage due to CRAN * removed clipr usage due to CRAN

48
R/dsl.r

@ -418,51 +418,3 @@ splash_user_agent <- function(splash_obj, user_agent=ua_splashr) {
splash_obj$calls <- c(splash_obj$calls, sprintf('splash:set_user_agent("%s")', user_agent)) splash_obj$calls <- c(splash_obj$calls, sprintf('splash:set_user_agent("%s")', user_agent))
splash_obj splash_obj
} }
#' @rdname splash_user_agent
#' @export
ua_splashr <- sprintf("splashr/%s", packageVersion("splashr"))
#' @rdname splash_user_agent
#' @export
ua_win10_chrome <- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_win10_firefox <- "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_win10_ie11 <- "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
#' @rdname splash_user_agent
#' @export
ua_win7_chrome <- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_win7_firefox <- "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_win7_ie11 <- "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
#' @rdname splash_user_agent
#' @export
ua_macos_chrome <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_macos_safari <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12"
#' @rdname splash_user_agent
#' @export
ua_linux_chrome <- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_linux_firefox <- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_ios_safari <- "Mozilla/5.0 (iPad; CPU OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1"

9
R/execute.r

@ -44,7 +44,14 @@ execute_lua <- function(splash_obj, lua_source, timeout=30, allowed_domains,
if (!missing(save_args)) params$save_args <- save_args if (!missing(save_args)) params$save_args <- save_args
if (!missing(load_args)) params$load_args <- load_args if (!missing(load_args)) params$load_args <- load_args
res <- httr::GET(splash_url(splash_obj), path="execute", encode="json", query=params) if (is.null(splash_obj$user)) {
res <- httr::GET(splash_url(splash_obj), path="execute", encode="json", query=params)
} else {
res <- httr::GET(
splash_url(splash_obj), path="execute", encode="json", query=params,
httr::authenticate(splash_obj$user, splash_obj$pass)
)
}
httr::stop_for_status(res) httr::stop_for_status(res)

9
R/render-har.r

@ -36,7 +36,14 @@ render_har <- function(splash_obj = splash_local, url, base_url, response_body=F
if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args) if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args)
if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args) if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args)
res <- httr::GET(splash_url(splash_obj), path="render.har", encode="json", query=params) if (is.null(splash_obj$user)) {
res <- httr::GET(splash_url(splash_obj), path="render.har", encode="json", query=params)
} else {
res <- httr::GET(
splash_url(splash_obj), path="render.html", encode="json", query=params,
httr::authenticate(splash_obj$user, splash_obj$pass)
)
}
httr::stop_for_status(res) httr::stop_for_status(res)

9
R/render-html.r

@ -65,7 +65,14 @@ render_html <- function(splash_obj = splash_local, url, base_url, timeout=30, re
if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args) if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args)
if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args) if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args)
res <- httr::GET(splash_url(splash_obj), path="render.html", encode="json", query=params) if (is.null(splash_obj$user)) {
res <- httr::GET(splash_url(splash_obj), path="render.html", encode="json", query=params)
} else {
res <- httr::GET(
splash_url(splash_obj), path="render.html", encode="json", query=params,
httr::authenticate(splash_obj$user, splash_obj$pass)
)
}
httr::stop_for_status(res) httr::stop_for_status(res)

9
R/render-jpg.r

@ -42,7 +42,14 @@ render_jpeg <- render_jpg <- function(
if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args) if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args)
if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args) if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args)
res <- httr::GET(splash_url(splash_obj), path="render.jpeg", encode="json", query=params) if (is.null(splash_obj$user)) {
res <- httr::GET(splash_url(splash_obj), path="render.jpeg", encode="json", query=params)
} else {
res <- httr::GET(
splash_url(splash_obj), path="render.html", encode="json", query=params,
httr::authenticate(splash_obj$user, splash_obj$pass)
)
}
httr::stop_for_status(res) httr::stop_for_status(res)

44
R/render-png.r

@ -1,8 +1,10 @@
#' Return a image (in PNG format) of the javascript-rendered page. #' Return an image (in PNG format) of the javascript-rendered page.
#' #'
#' @md #' @md
#' @param width,height Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional #' @param width,height Resize the rendered image to the given width/height (in
#' @param render_all If `TRUE` extend the viewport to include the whole webpage (possibly very tall) before rendering. #' pixels) keeping the aspect ratio. These are optional
#' @param render_all If `TRUE` extend the viewport to include the whole webpage
#' (possibly very tall) before rendering.
#' @family splash_renderers #' @family splash_renderers
#' @return a [magick] image object #' @return a [magick] image object
#' @references [Splash docs](http://splash.readthedocs.io/en/stable/index.html) #' @references [Splash docs](http://splash.readthedocs.io/en/stable/index.html)
@ -12,18 +14,19 @@
#' render_png(url = "https://httpbin.org/") #' render_png(url = "https://httpbin.org/")
#' } #' }
render_png <- function( render_png <- function(
splash_obj = splash_local, url, base_url=NULL, width, height, splash_obj = splash_local, url, base_url=NULL, width, height,
timeout=30, resource_timeout, wait=0, render_all=TRUE, timeout=30, resource_timeout, wait=0, render_all=TRUE,
proxy, js, js_src, filters, allowed_domains, allowed_content_types, proxy, js, js_src, filters, allowed_domains, allowed_content_types,
forbidden_content_types, viewport="full", images, headers, body, forbidden_content_types, viewport="full", images, headers, body,
http_method, save_args, load_args) { http_method, save_args, load_args) {
wait <- check_wait(wait) wait <- check_wait(wait)
params <- list(url=url, timeout=timeout, params <- list(
wait=if (render_all & wait == 0) 0.5 else wait, url = url, timeout = timeout,
viewport=jsonlite::unbox(viewport), wait = if (render_all & wait == 0) 0.5 else wait,
render_all=as.numeric(render_all)) viewport = jsonlite::unbox(viewport),
render_all = as.numeric(render_all)
)
if (!missing(width)) params$width <- width if (!missing(width)) params$width <- width
if (!missing(height)) params$height <- height if (!missing(height)) params$height <- height
@ -44,11 +47,16 @@ render_png <- function(
if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args) if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args)
if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args) if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args)
res <- httr::GET(splash_url(splash_obj), path="render.png", encode="json", query=params) if (is.null(splash_obj$user)) {
res <- httr::GET(splash_url(splash_obj), path="render.png", encode="json", query=params)
} else {
res <- httr::GET(
splash_url(splash_obj), path="render.html", encode="json", query=params,
httr::authenticate(splash_obj$user, splash_obj$pass)
)
}
httr::stop_for_status(res) httr::stop_for_status(res)
magick::image_read(httr::content(res, as="raw")) magick::image_read(httr::content(res, as = "raw"))
}
}

29
R/splashr.r

@ -4,12 +4,14 @@ splash_url <- function(splash_obj) { sprintf("http://%s:%s", splash_obj$host, sp
#' #'
#' @param host host or IP address #' @param host host or IP address
#' @param port port the server is running on (default is 8050) #' @param port port the server is running on (default is 8050)
#' @param user,pass leave `NULL` if basic auth is not configured. Otherwise,
#' fill in what you need for basic authentication.
#' @export #' @export
#' @examples \dontrun{ #' @examples \dontrun{
#' sp <- splash() #' sp <- splash()
#' } #' }
splash <- function(host, port=8050L) { splash <- function(host, port=8050L, user=NULL, pass=NULL) {
list(host=host, port=port) list(host=host, port=port, user=user, pass=pass)
} }
#' @rdname splash #' @rdname splash
@ -31,7 +33,12 @@ s_GET <- purrr::safely(GET)
#' } #' }
splash_active <- function(splash_obj = splash_local) { splash_active <- function(splash_obj = splash_local) {
res <- s_GET(splash_url(splash_obj), path="_ping") if (is.null(splash_obj$user)) {
res <- s_GET(splash_url(splash_obj), path="_ping")
} else {
res <- s_GET(splash_url(splash_obj), path="_ping",
httr::authenticate(splash_obj$user, splash_obj$pass))
}
if (is.null(res$result)) return(FALSE) if (is.null(res$result)) return(FALSE)
if (httr::status_code(res$result) >=300) return(FALSE) if (httr::status_code(res$result) >=300) return(FALSE)
@ -116,10 +123,18 @@ end
#' } #' }
splash_debug <- function(splash_obj = splash_local) { splash_debug <- function(splash_obj = splash_local) {
httr::GET(splash_url(splash_obj), path="_debug") %>% if (is.null(splash_obj$user)) {
httr::stop_for_status() %>% httr::GET(splash_url(splash_obj), path="_debug") %>%
httr::content(as="text", encoding="UTF-8") %>% httr::stop_for_status() %>%
jsonlite::fromJSON() -> out httr::content(as="text", encoding="UTF-8") %>%
jsonlite::fromJSON() -> out
} else {
httr::GET(splash_url(splash_obj), path="_debug",
httr::authenticate(splash_obj$user, splash_obj$pass)) %>%
httr::stop_for_status() %>%
httr::content(as="text", encoding="UTF-8") %>%
jsonlite::fromJSON() -> out
}
out$url <- splash_url(splash_obj) out$url <- splash_url(splash_obj)

48
R/user-agents.R

@ -0,0 +1,48 @@
#' @rdname splash_user_agent
#' @export
ua_splashr <- sprintf("splashr/%s", packageVersion("splashr"))
#' @rdname splash_user_agent
#' @export
ua_win10_chrome <- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_win10_firefox <- "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_win10_ie11 <- "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
#' @rdname splash_user_agent
#' @export
ua_win7_chrome <- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_win7_firefox <- "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_win7_ie11 <- "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
#' @rdname splash_user_agent
#' @export
ua_macos_chrome <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_macos_safari <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12"
#' @rdname splash_user_agent
#' @export
ua_linux_chrome <- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_linux_firefox <- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_ios_safari <- "Mozilla/5.0 (iPad; CPU OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1"

9
man/render_jpeg.Rd

@ -19,9 +19,11 @@ render_jpeg(splash_obj = splash_local, url, base_url = NULL, quality = 75,
\item{quality}{JPEG quality parameter in range from 0 to 100. Default is quality=75.} \item{quality}{JPEG quality parameter in range from 0 to 100. Default is quality=75.}
\item{width}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional} \item{width}{Resize the rendered image to the given width/height (in
pixels) keeping the aspect ratio. These are optional}
\item{height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional} \item{height}{Resize the rendered image to the given width/height (in
pixels) keeping the aspect ratio. These are optional}
\item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without \item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without
reconfiguring the startup parameters of the Splash server (not this package) reconfiguring the startup parameters of the Splash server (not this package)
@ -31,7 +33,8 @@ the maximum allowed value for the timeout is 60 seconds.}
\item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).} \item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).}
\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering.} \item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage
(possibly very tall) before rendering.}
\item{proxy}{Proxy profile name or proxy URL.} \item{proxy}{Proxy profile name or proxy URL.}

9
man/render_json.Rd

@ -21,9 +21,11 @@ render_json(splash_obj = splash_local, url, base_url = NULL, quality = 75,
\item{quality}{JPEG quality parameter in range from 0 to 100. Default is quality=75.} \item{quality}{JPEG quality parameter in range from 0 to 100. Default is quality=75.}
\item{width}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional} \item{width}{Resize the rendered image to the given width/height (in
pixels) keeping the aspect ratio. These are optional}
\item{height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional} \item{height}{Resize the rendered image to the given width/height (in
pixels) keeping the aspect ratio. These are optional}
\item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without \item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without
reconfiguring the startup parameters of the Splash server (not this package) reconfiguring the startup parameters of the Splash server (not this package)
@ -33,7 +35,8 @@ the maximum allowed value for the timeout is 60 seconds.}
\item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).} \item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).}
\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering.} \item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage
(possibly very tall) before rendering.}
\item{proxy}{Proxy profile name or proxy URL.} \item{proxy}{Proxy profile name or proxy URL.}

10
man/render_png.Rd

@ -2,7 +2,7 @@
% Please edit documentation in R/render-png.r % Please edit documentation in R/render-png.r
\name{render_png} \name{render_png}
\alias{render_png} \alias{render_png}
\title{Return a image (in PNG format) of the javascript-rendered page.} \title{Return an image (in PNG format) of the javascript-rendered page.}
\usage{ \usage{
render_png(splash_obj = splash_local, url, base_url = NULL, width, height, render_png(splash_obj = splash_local, url, base_url = NULL, width, height,
timeout = 30, resource_timeout, wait = 0, render_all = TRUE, proxy, js, timeout = 30, resource_timeout, wait = 0, render_all = TRUE, proxy, js,
@ -17,7 +17,8 @@ render_png(splash_obj = splash_local, url, base_url = NULL, width, height,
\item{base_url}{The base url to render the page with.} \item{base_url}{The base url to render the page with.}
\item{width, height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional} \item{width, height}{Resize the rendered image to the given width/height (in
pixels) keeping the aspect ratio. These are optional}
\item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without \item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without
reconfiguring the startup parameters of the Splash server (not this package) reconfiguring the startup parameters of the Splash server (not this package)
@ -27,7 +28,8 @@ the maximum allowed value for the timeout is 60 seconds.}
\item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).} \item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).}
\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering.} \item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage
(possibly very tall) before rendering.}
\item{proxy}{Proxy profile name or proxy URL.} \item{proxy}{Proxy profile name or proxy URL.}
@ -68,7 +70,7 @@ web page. Format is “<width>x<height>”, e.g. 800x600. Default value is "full
a \link{magick} image object a \link{magick} image object
} }
\description{ \description{
Return a image (in PNG format) of the javascript-rendered page. Return an image (in PNG format) of the javascript-rendered page.
} }
\examples{ \examples{
\dontrun{ \dontrun{

7
man/splash.Rd

@ -5,9 +5,9 @@
\alias{splash} \alias{splash}
\alias{splash_local} \alias{splash_local}
\title{Configure parameters for connecting to a Splash server} \title{Configure parameters for connecting to a Splash server}
\format{An object of class \code{list} of length 2.} \format{An object of class \code{list} of length 4.}
\usage{ \usage{
splash(host, port = 8050L) splash(host, port = 8050L, user = NULL, pass = NULL)
splash_local splash_local
} }
@ -15,6 +15,9 @@ splash_local
\item{host}{host or IP address} \item{host}{host or IP address}
\item{port}{port the server is running on (default is 8050)} \item{port}{port the server is running on (default is 8050)}
\item{user, pass}{leave `NULL` if basic auth is not configured. Otherwise,
fill in what you need for basic authentication.}
} }
\description{ \description{
Configure parameters for connecting to a Splash server Configure parameters for connecting to a Splash server

2
man/splash_user_agent.Rd

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r % Please edit documentation in R/dsl.r, R/user-agents.R
\docType{data} \docType{data}
\name{splash_user_agent} \name{splash_user_agent}
\alias{splash_user_agent} \alias{splash_user_agent}

Loading…
Cancel
Save