Browse Source

basic auth to api

master
boB Rudis 6 years ago
parent
commit
f5ab84dc23
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 4
      DESCRIPTION
  2. 4
      NEWS.md
  3. 48
      R/dsl.r
  4. 9
      R/execute.r
  5. 9
      R/render-har.r
  6. 9
      R/render-html.r
  7. 9
      R/render-jpg.r
  8. 44
      R/render-png.r
  9. 29
      R/splashr.r
  10. 48
      R/user-agents.R
  11. 9
      man/render_jpeg.Rd
  12. 9
      man/render_json.Rd
  13. 10
      man/render_png.Rd
  14. 7
      man/splash.Rd
  15. 2
      man/splash_user_agent.Rd

4
DESCRIPTION

@ -1,8 +1,8 @@
Package: splashr
Type: Package
Title: Tools to Work with the 'Splash' 'JavaScript' Rendering and Scraping Service
Version: 0.4.1
Date: 2018-01-16
Version: 0.5.0
Date: 2018-08-13
Encoding: UTF-8
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),

4
NEWS.md

@ -1,3 +1,7 @@
0.5.0
* support Splash API basic auth
0.4.1
* removed clipr usage due to CRAN

48
R/dsl.r

@ -418,51 +418,3 @@ splash_user_agent <- function(splash_obj, user_agent=ua_splashr) {
splash_obj$calls <- c(splash_obj$calls, sprintf('splash:set_user_agent("%s")', user_agent))
splash_obj
}
#' @rdname splash_user_agent
#' @export
ua_splashr <- sprintf("splashr/%s", packageVersion("splashr"))
#' @rdname splash_user_agent
#' @export
ua_win10_chrome <- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_win10_firefox <- "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_win10_ie11 <- "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
#' @rdname splash_user_agent
#' @export
ua_win7_chrome <- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_win7_firefox <- "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_win7_ie11 <- "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
#' @rdname splash_user_agent
#' @export
ua_macos_chrome <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_macos_safari <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12"
#' @rdname splash_user_agent
#' @export
ua_linux_chrome <- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_linux_firefox <- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_ios_safari <- "Mozilla/5.0 (iPad; CPU OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1"

9
R/execute.r

@ -44,7 +44,14 @@ execute_lua <- function(splash_obj, lua_source, timeout=30, allowed_domains,
if (!missing(save_args)) params$save_args <- save_args
if (!missing(load_args)) params$load_args <- load_args
res <- httr::GET(splash_url(splash_obj), path="execute", encode="json", query=params)
if (is.null(splash_obj$user)) {
res <- httr::GET(splash_url(splash_obj), path="execute", encode="json", query=params)
} else {
res <- httr::GET(
splash_url(splash_obj), path="execute", encode="json", query=params,
httr::authenticate(splash_obj$user, splash_obj$pass)
)
}
httr::stop_for_status(res)

9
R/render-har.r

@ -36,7 +36,14 @@ render_har <- function(splash_obj = splash_local, url, base_url, response_body=F
if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args)
if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args)
res <- httr::GET(splash_url(splash_obj), path="render.har", encode="json", query=params)
if (is.null(splash_obj$user)) {
res <- httr::GET(splash_url(splash_obj), path="render.har", encode="json", query=params)
} else {
res <- httr::GET(
splash_url(splash_obj), path="render.html", encode="json", query=params,
httr::authenticate(splash_obj$user, splash_obj$pass)
)
}
httr::stop_for_status(res)

9
R/render-html.r

@ -65,7 +65,14 @@ render_html <- function(splash_obj = splash_local, url, base_url, timeout=30, re
if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args)
if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args)
res <- httr::GET(splash_url(splash_obj), path="render.html", encode="json", query=params)
if (is.null(splash_obj$user)) {
res <- httr::GET(splash_url(splash_obj), path="render.html", encode="json", query=params)
} else {
res <- httr::GET(
splash_url(splash_obj), path="render.html", encode="json", query=params,
httr::authenticate(splash_obj$user, splash_obj$pass)
)
}
httr::stop_for_status(res)

9
R/render-jpg.r

@ -42,7 +42,14 @@ render_jpeg <- render_jpg <- function(
if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args)
if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args)
res <- httr::GET(splash_url(splash_obj), path="render.jpeg", encode="json", query=params)
if (is.null(splash_obj$user)) {
res <- httr::GET(splash_url(splash_obj), path="render.jpeg", encode="json", query=params)
} else {
res <- httr::GET(
splash_url(splash_obj), path="render.html", encode="json", query=params,
httr::authenticate(splash_obj$user, splash_obj$pass)
)
}
httr::stop_for_status(res)

44
R/render-png.r

@ -1,8 +1,10 @@
#' Return a image (in PNG format) of the javascript-rendered page.
#' Return an image (in PNG format) of the javascript-rendered page.
#'
#' @md
#' @param width,height Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional
#' @param render_all If `TRUE` extend the viewport to include the whole webpage (possibly very tall) before rendering.
#' @param width,height Resize the rendered image to the given width/height (in
#' pixels) keeping the aspect ratio. These are optional
#' @param render_all If `TRUE` extend the viewport to include the whole webpage
#' (possibly very tall) before rendering.
#' @family splash_renderers
#' @return a [magick] image object
#' @references [Splash docs](http://splash.readthedocs.io/en/stable/index.html)
@ -12,18 +14,19 @@
#' render_png(url = "https://httpbin.org/")
#' }
render_png <- function(
splash_obj = splash_local, url, base_url=NULL, width, height,
timeout=30, resource_timeout, wait=0, render_all=TRUE,
proxy, js, js_src, filters, allowed_domains, allowed_content_types,
forbidden_content_types, viewport="full", images, headers, body,
http_method, save_args, load_args) {
splash_obj = splash_local, url, base_url=NULL, width, height,
timeout=30, resource_timeout, wait=0, render_all=TRUE,
proxy, js, js_src, filters, allowed_domains, allowed_content_types,
forbidden_content_types, viewport="full", images, headers, body,
http_method, save_args, load_args) {
wait <- check_wait(wait)
params <- list(url=url, timeout=timeout,
wait=if (render_all & wait == 0) 0.5 else wait,
viewport=jsonlite::unbox(viewport),
render_all=as.numeric(render_all))
params <- list(
url = url, timeout = timeout,
wait = if (render_all & wait == 0) 0.5 else wait,
viewport = jsonlite::unbox(viewport),
render_all = as.numeric(render_all)
)
if (!missing(width)) params$width <- width
if (!missing(height)) params$height <- height
@ -44,11 +47,16 @@ render_png <- function(
if (!missing(save_args)) params$save_args <- jsonlite::unbox(save_args)
if (!missing(load_args)) params$load_args <- jsonlite::unbox(load_args)
res <- httr::GET(splash_url(splash_obj), path="render.png", encode="json", query=params)
if (is.null(splash_obj$user)) {
res <- httr::GET(splash_url(splash_obj), path="render.png", encode="json", query=params)
} else {
res <- httr::GET(
splash_url(splash_obj), path="render.html", encode="json", query=params,
httr::authenticate(splash_obj$user, splash_obj$pass)
)
}
httr::stop_for_status(res)
magick::image_read(httr::content(res, as="raw"))
}
magick::image_read(httr::content(res, as = "raw"))
}

29
R/splashr.r

@ -4,12 +4,14 @@ splash_url <- function(splash_obj) { sprintf("http://%s:%s", splash_obj$host, sp
#'
#' @param host host or IP address
#' @param port port the server is running on (default is 8050)
#' @param user,pass leave `NULL` if basic auth is not configured. Otherwise,
#' fill in what you need for basic authentication.
#' @export
#' @examples \dontrun{
#' sp <- splash()
#' }
splash <- function(host, port=8050L) {
list(host=host, port=port)
splash <- function(host, port=8050L, user=NULL, pass=NULL) {
list(host=host, port=port, user=user, pass=pass)
}
#' @rdname splash
@ -31,7 +33,12 @@ s_GET <- purrr::safely(GET)
#' }
splash_active <- function(splash_obj = splash_local) {
res <- s_GET(splash_url(splash_obj), path="_ping")
if (is.null(splash_obj$user)) {
res <- s_GET(splash_url(splash_obj), path="_ping")
} else {
res <- s_GET(splash_url(splash_obj), path="_ping",
httr::authenticate(splash_obj$user, splash_obj$pass))
}
if (is.null(res$result)) return(FALSE)
if (httr::status_code(res$result) >=300) return(FALSE)
@ -116,10 +123,18 @@ end
#' }
splash_debug <- function(splash_obj = splash_local) {
httr::GET(splash_url(splash_obj), path="_debug") %>%
httr::stop_for_status() %>%
httr::content(as="text", encoding="UTF-8") %>%
jsonlite::fromJSON() -> out
if (is.null(splash_obj$user)) {
httr::GET(splash_url(splash_obj), path="_debug") %>%
httr::stop_for_status() %>%
httr::content(as="text", encoding="UTF-8") %>%
jsonlite::fromJSON() -> out
} else {
httr::GET(splash_url(splash_obj), path="_debug",
httr::authenticate(splash_obj$user, splash_obj$pass)) %>%
httr::stop_for_status() %>%
httr::content(as="text", encoding="UTF-8") %>%
jsonlite::fromJSON() -> out
}
out$url <- splash_url(splash_obj)

48
R/user-agents.R

@ -0,0 +1,48 @@
#' @rdname splash_user_agent
#' @export
ua_splashr <- sprintf("splashr/%s", packageVersion("splashr"))
#' @rdname splash_user_agent
#' @export
ua_win10_chrome <- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_win10_firefox <- "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_win10_ie11 <- "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
#' @rdname splash_user_agent
#' @export
ua_win7_chrome <- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_win7_firefox <- "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_win7_ie11 <- "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
#' @rdname splash_user_agent
#' @export
ua_macos_chrome <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_macos_safari <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12"
#' @rdname splash_user_agent
#' @export
ua_linux_chrome <- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
#' @rdname splash_user_agent
#' @export
ua_linux_firefox <- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0"
#' @rdname splash_user_agent
#' @export
ua_ios_safari <- "Mozilla/5.0 (iPad; CPU OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1"

9
man/render_jpeg.Rd

@ -19,9 +19,11 @@ render_jpeg(splash_obj = splash_local, url, base_url = NULL, quality = 75,
\item{quality}{JPEG quality parameter in range from 0 to 100. Default is quality=75.}
\item{width}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional}
\item{width}{Resize the rendered image to the given width/height (in
pixels) keeping the aspect ratio. These are optional}
\item{height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional}
\item{height}{Resize the rendered image to the given width/height (in
pixels) keeping the aspect ratio. These are optional}
\item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without
reconfiguring the startup parameters of the Splash server (not this package)
@ -31,7 +33,8 @@ the maximum allowed value for the timeout is 60 seconds.}
\item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).}
\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering.}
\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage
(possibly very tall) before rendering.}
\item{proxy}{Proxy profile name or proxy URL.}

9
man/render_json.Rd

@ -21,9 +21,11 @@ render_json(splash_obj = splash_local, url, base_url = NULL, quality = 75,
\item{quality}{JPEG quality parameter in range from 0 to 100. Default is quality=75.}
\item{width}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional}
\item{width}{Resize the rendered image to the given width/height (in
pixels) keeping the aspect ratio. These are optional}
\item{height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional}
\item{height}{Resize the rendered image to the given width/height (in
pixels) keeping the aspect ratio. These are optional}
\item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without
reconfiguring the startup parameters of the Splash server (not this package)
@ -33,7 +35,8 @@ the maximum allowed value for the timeout is 60 seconds.}
\item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).}
\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering.}
\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage
(possibly very tall) before rendering.}
\item{proxy}{Proxy profile name or proxy URL.}

10
man/render_png.Rd

@ -2,7 +2,7 @@
% Please edit documentation in R/render-png.r
\name{render_png}
\alias{render_png}
\title{Return a image (in PNG format) of the javascript-rendered page.}
\title{Return an image (in PNG format) of the javascript-rendered page.}
\usage{
render_png(splash_obj = splash_local, url, base_url = NULL, width, height,
timeout = 30, resource_timeout, wait = 0, render_all = TRUE, proxy, js,
@ -17,7 +17,8 @@ render_png(splash_obj = splash_local, url, base_url = NULL, width, height,
\item{base_url}{The base url to render the page with.}
\item{width, height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio. These are optional}
\item{width, height}{Resize the rendered image to the given width/height (in
pixels) keeping the aspect ratio. These are optional}
\item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without
reconfiguring the startup parameters of the Splash server (not this package)
@ -27,7 +28,8 @@ the maximum allowed value for the timeout is 60 seconds.}
\item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).}
\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering.}
\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage
(possibly very tall) before rendering.}
\item{proxy}{Proxy profile name or proxy URL.}
@ -68,7 +70,7 @@ web page. Format is “<width>x<height>”, e.g. 800x600. Default value is "full
a \link{magick} image object
}
\description{
Return a image (in PNG format) of the javascript-rendered page.
Return an image (in PNG format) of the javascript-rendered page.
}
\examples{
\dontrun{

7
man/splash.Rd

@ -5,9 +5,9 @@
\alias{splash}
\alias{splash_local}
\title{Configure parameters for connecting to a Splash server}
\format{An object of class \code{list} of length 2.}
\format{An object of class \code{list} of length 4.}
\usage{
splash(host, port = 8050L)
splash(host, port = 8050L, user = NULL, pass = NULL)
splash_local
}
@ -15,6 +15,9 @@ splash_local
\item{host}{host or IP address}
\item{port}{port the server is running on (default is 8050)}
\item{user, pass}{leave `NULL` if basic auth is not configured. Otherwise,
fill in what you need for basic authentication.}
}
\description{
Configure parameters for connecting to a Splash server

2
man/splash_user_agent.Rd

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.r, R/user-agents.R
\docType{data}
\name{splash_user_agent}
\alias{splash_user_agent}

Loading…
Cancel
Save