initial commit

7 years ago · 5c2b43b154
26 changed files with 808 additions and 0 deletions
--- a/.Rbuildignore
+++ b/.Rbuildignore
@ -0,0 +1,8 @@
+^.*\.Rproj$
+^\.Rproj\.user$
+^\.travis\.yml$
+^README\.*Rmd$
+^README\.*html$
+^NOTES\.*Rmd$
+^NOTES\.*html$
+^img$
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,7 @@
+.Rproj.user
+.Rhistory
+.RData
+.Rproj
+src/*.o
+src/*.so
+src/*.dll
--- a/.travis.yml
+++ b/.travis.yml
@ -0,0 +1,24 @@
+language: r
+warnings_are_errors: true
+sudo: required
+
+r:
+ - oldrel
+ - release
+ - devel
+
+apt_packages:
+  - libv8-dev
+  - xclip
+
+env:
+ global:
+   - CRAN: http://cran.rstudio.com
+
+notifications:
+  email:
+    - bob@rud.is
+  irc:
+    channels:
+      - "104.236.112.222#builds"
+    nick: travisci
--- a/31
+++ b/31
@ -0,0 +1,31 @@
+Package: splashr
+Type: Package
+Title: Tools to Work with the 'Splash' JavaScript Rendering Service
+Version: 0.1.0
+Date: 2017-02-03
+Encoding: UTF-8
+Author: Bob Rudis (bob@rud.is)
+Maintainer: Bob Rudis <bob@rud.is>
+Description: 'Splash' <https://github.com/scrapinghub/splash> is a javascript rendering service.
+    It’s a lightweight web browser with an 'HTTP' API, implemented in Python using 'Twisted' 
+    and 'QT' and provides some of the core functionality of the 'RSelenium' or 'seleniumPipes'
+    R pacakges but with a Java-free footprint. The (twisted) 'QT' reactor is used to make the 
+    sever fully asynchronous allowing to take advantage of 'webkit' concurrency via QT main loop. 
+    Some of Splash features include the ability to process multiple webpages in parallel; 
+    retrieving HTML results and/or take screenshots; disabling images or use Adblock Plus rules 
+    to make rendering faster; executing custom JavaScript in page context; getting detailed
+    rendering info in HAR format.
+URL: http://github.com/hrbrmstr/splashr
+BugReports: https://github.com/hrbrmstr/splashr/issues
+License: AGPL
+Suggests:
+    testthat
+Depends:
+    R (>= 3.2.0)
+Imports:
+    purrr,
+    httr,
+    xml2,
+    jsonlite,
+    magick
+RoxygenNote: 6.0.0
--- a/16
+++ b/16
@ -0,0 +1,16 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(print,splash_debug)
+S3method(print,splash_status)
+export("%>%")
+export(render_html)
+export(render_jpeg)
+export(render_png)
+export(splash)
+export(splash_active)
+export(splash_debug)
+import(httr)
+import(magick)
+import(purrr)
+importFrom(jsonlite,fromJSON)
+importFrom(xml2,read_html)
--- a/NEWS.md
+++ b/NEWS.md
@ -0,0 +1,2 @@
+0.1.0 
+* Initial release
--- a/R/render-html.r
+++ b/R/render-html.r
@ -0,0 +1,41 @@
+#' Return the HTML of the javascript-rendered page.
+#'
+#' Similar to `rvest::read_html`.
+#'
+#' @md
+#' @param splash_obj Object created by a call to [splash]
+#' @param url The URL to render (required)
+#' @param base_url TBD The base url to render the page with.
+#' @param timeout TBD A timeout (in seconds) for the render (defaults to 30).
+#' @param resource_timeout A timeout (in seconds) for individual network requests.
+#' @param wait Time (in seconds) to wait for updates after page is loaded (defaults to 0).
+#' @param proxy TBD Proxy profile name or proxy URL.
+#' @param js TBD Javascript profile name.
+#' @param js_src TBD JavaScript code to be executed in page context.
+#' @param filters TBD Comma-separated list of request filter names.
+#' @param allowed_domains TBD Comma-separated list of allowed domain names. If present, Splash won’t load anything neither from domains not in this list nor from subdomains of domains not in this list.
+#' @param allowed_content_types TBD Comma-separated list of allowed content types. If present, Splash will abort any request if the response’s content type doesn’t match any of the content types in this list. Wildcards are supported.
+#' @param forbidden_content_types TBD Comma-separated list of forbidden content types. If present, Splash will abort any request if the response’s content type matches any of the content types in this list. Wildcards are supported.
+#' @param viewport View width and height (in pixels) of the browser viewport to render the web page. Format is “<width>x<height>”, e.g. 800x600. Default value is 1024x768.
+#' @param images TBD Whether to download images.
+#' @param headers TBD HTTP headers to set for the first outgoing request.
+#' @param body TBD Body of HTTP POST request to be sent if method is POST.
+#' @param http_method TBD HTTP method of outgoing Splash request.
+#' @param save_args TBD A list of argument names to put in cache.
+#' @param load_args TBD Parameter values to load from cache
+#' @export
+render_html <- function(splash_obj, url, base_url, timeout=30, resource_timeout=NULL, wait=0,
+                        proxy, js, js_src, filters, allowed_domains="", allowed_content_types="",
+                        forbidden_content_types="", viewport="1024x768", images, headers, body,
+                        http_method, save_args, load_args) {
+
+  res <- httr::GET(splash_url(splash_obj), path="render.html",
+                   encode="json",
+                   query=list(url=url, timeout=timeout, wait=wait, viewport=viewport))
+
+  httr::stop_for_status(res)
+
+  httr::content(res, as="text", encoding="UTF-8") %>%
+    xml2::read_html()
+
+}
--- a/R/render-jpg.r
+++ b/R/render-jpg.r
@ -0,0 +1,23 @@
+#' Return a image (in JPEG format) of the javascript-rendered page.
+#'
+#' @md
+#' @param quality JPEG quality parameter in range from 0 to 100. Default is quality=75.
+#' @inheritParams render_html
+#' @inheritParams render_png
+#' @export
+render_jpeg <- function(splash_obj, url, base_url=NULL, quality=75, width=1024, height=768,
+                        timeout=30, resource_timeout=NULL, wait=0, render_all=FALSE,
+                        proxy, js, js_src, filters, allowed_domains="", allowed_content_types="",
+                        forbidden_content_types="", viewport="1024x768", images, headers, body,
+                        http_method, save_args, load_args) {
+
+  res <- httr::GET(splash_url(splash_obj), path="render.jpeg",
+                   encode="json",
+                   query=list(url=url, timeout=timeout, wait=wait, viewport=viewport,
+                              quality=quality, width=width, height=height, render_all=as.numeric(render_all)))
+
+  httr::stop_for_status(res)
+
+  magick::image_read(httr::content(res, as="raw"))
+
+}
--- a/R/render-png.r
+++ b/R/render-png.r
@ -0,0 +1,23 @@
+#' Return a image (in PNG format) of the javascript-rendered page.
+#'
+#' @md
+#' @param width,height Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio.
+#' @param render_all If `TRUE` extend the viewport to include the whole webpage (possibly very tall) before rendering. Default is `FASLE`
+#' @inheritParams render_html
+#' @export
+render_png <- function(splash_obj, url, base_url=NULL, width=1024, height=768, render_all=FALSE,
+                       timeout=30, resource_timeout=NULL, wait=0,
+                       proxy, js, js_src, filters, allowed_domains="", allowed_content_types="",
+                       forbidden_content_types="", viewport="1024x768", images, headers, body,
+                       http_method, save_args, load_args) {
+
+  res <- httr::GET(splash_url(splash_obj), path="render.png",
+                   encode="json",
+                   query=list(url=url, timeout=timeout, wait=wait, viewport=viewport,
+                              width=width, height=height, render_all=as.numeric(render_all)))
+
+  httr::stop_for_status(res)
+
+  magick::image_read(httr::content(res, as="raw"))
+
+}
--- a/R/splashr-package.R
+++ b/R/splashr-package.R
@ -0,0 +1,33 @@
+#' Tools to Work with the 'Splash' JavaScript Rendering Service
+#'
+#' 'Splash' <https://github.com/scrapinghub/splash> is a javascript rendering service.
+#' It’s a lightweight web browser with an 'HTTP' API, implemented in Python using
+#' 'Twisted'and 'QT' and provides some of the core functionality of the 'RSelenium' or
+#' 'seleniumPipes'R pacakges but with a Java-free footprint. The (twisted) 'QT' reactor is
+#' used to make the sever fully asynchronous allowing to take advantage of 'webkit'
+#' concurrency via QT main loop. Some of Splash features include the ability to process
+#' multiple webpages in parallel; retrieving HTML results and/or take screenshots;
+#' disabling images or use Adblock Plus rules to make rendering faster; executing custom
+#' JavaScript in page context; getting detailed rendering info in HAR format.
+#'
+#' @name splashr
+#' @docType package
+#' @author Bob Rudis (bob@@rud.is)
+#' @import purrr httr magick
+#' @importFrom xml2 read_html
+#' @importFrom jsonlite fromJSON
+NULL
+
+#' splashr exported operators
+#'
+#' The following functions are imported and then re-exported
+#' from the splashr package to enable use of the magrittr
+#' pipe operator with no additional library calls
+#'
+#' @name splashr-exports
+NULL
+
+#' @name %>%
+#' @export
+#' @rdname splashr-exports
+NULL
--- a/R/splashr.r
+++ b/R/splashr.r
@ -0,0 +1,64 @@
+splash_url <- function(splash_obj) { sprintf("http://%s:%s", splash_obj$host, splash_obj$port) }
+
+#' Configure parameters for connecting to a Splash server
+#'
+#' @param host host or IP address
+#' @param port port the server is running on (default is 8050)
+#' @export
+splash <- function(host, port=8050L) {
+  list(host=host, port=port)
+}
+
+#' Test if a Splash server is up
+#'
+#' @param splash_obj A splash connection object
+#' @export
+splash_active <- function(splash_obj) {
+
+  httr::GET(splash_url(splash_obj), path="_ping") %>%
+    httr::stop_for_status() %>%
+    httr::content(as="text", encoding="UTF-8") %>%
+    jsonlite::fromJSON() -> out
+
+  out$url <- splash_url(splash_obj)
+
+  class(out) <- c("splash_status", class(out))
+
+  out
+
+}
+
+#' @rdname splash_active
+#' @keywords internal
+#' @export
+print.splash_status <- function(x, ...) {
+  cat(sprintf("Status of splash instance on [%s]: %s. Max RSS: %s\n", x$url, x$status, x$maxrss))
+  invisible(x)
+}
+
+#' Retrieve debug-level info for a Splash server
+#'
+#' @param splash_obj A splash connection object
+#' @export
+splash_debug <- function(splash_obj) {
+
+  httr::GET(splash_url(splash_obj), path="_debug") %>%
+    httr::stop_for_status() %>%
+    httr::content(as="text", encoding="UTF-8") %>%
+    jsonlite::fromJSON() -> out
+
+  out$url <- splash_url(splash_obj)
+
+  class(out) <- c("splash_debug", class(out))
+
+  out
+
+}
+
+#' @rdname splash_debug
+#' @keywords internal
+#' @export
+print.splash_debug <- function(x, ...) {
+  print(str(x))
+  invisible(x)
+}
--- a/README.Rmd
+++ b/README.Rmd
@ -0,0 +1,107 @@
+---
+output: rmarkdown::github_document
+---
+
+`splashr` : Tools to Work with the 'Splash' JavaScript Rendering Service
+
+**Ridicuously basic functionality working at the moment. More coming soon**
+
+TL;DR: This package works with Splash rendering servers which are really just a REST API & `lua` scripting interface to a QT browser. It's an alternative to the Selenium ecosystem and does not do everything Selenium can, but if you're just trying to get a page back that needs javascript rendering, this is a nice alternative. 
+
+You can also get it running with two commands:
+
+    sudo docker pull scrapinghub/splash
+    sudo docker run -p 5023:5023 -p 8050:8050 -p 8051:8051 scrapinghub/splash
+    
+(Do whatever you Windows ppl do with Docker on your systems to make ^^ work.)
+
+All you need for this package to work is a running Splash instance. You provide the host/port for it and it's scrape-tastic from there.
+
+### About Splash
+
+>'Splash' <https://github.com/scrapinghub/splash> is a javascript rendering service. It’s a lightweight web browser with an 'HTTP' API, implemented in Python using 'Twisted'and 'QT' and provides some of the core functionality of the 'RSelenium' or 'seleniumPipes'R pacakges but with a Java-free footprint. The (twisted) 'QT' reactor is used to make the sever fully asynchronous allowing to take advantage of 'webkit' concurrency via QT main loop. Some of Splash features include the ability to process multiple webpages in parallel; retrieving HTML results and/or take screenshots; disabling images or use Adblock Plus rules to make rendering faster; executing custom JavaScript in page context; getting detailed rendering info in HAR format.
+    
+The following functions are implemented:
+
+- `render_html`:	Return the HTML of the javascript-rendered page.
+- `render_jpeg`:	Return a image (in JPEG format) of the javascript-rendered page.
+- `render_png`:	Return a image (in PNG format) of the javascript-rendered page.
+- `splash`:	Configure parameters for connecting to a Splash server
+- `splashr`:	Tools to Work with the 'Splash' JavaScript Rendering Service
+
+### Installation
+
+```{r eval=FALSE}
+devtools::install_github("hrbrmstr/splashr")
+```
+
+```{r message=FALSE, warning=FALSE, error=FALSE}
+options(width=120)
+```
+
+### Usage
+
+```{r message=FALSE, warning=FALSE, error=FALSE}
+library(splashr)
+library(magick)
+library(rvest)
+
+# current verison
+packageVersion("splashr")
+
+splash("splash", 8050L) %>%
+  splash_active()
+
+splash("splash", 8050L) %>%
+  splash_debug()
+```
+
+Notice the difference between a rendered HTML scrape and a non-rendered one:
+
+```{r}
+splash("splash", 8050L) %>%
+  render_html("http://marvel.com/universe/Captain_America_(Steve_Rogers)")
+
+read_html("http://marvel.com/universe/Captain_America_(Steve_Rogers)")
+```
+
+Web page snapshots are easy-peasy too:
+
+```{r eval=FALSE}
+splash("splash", 8050L) %>%
+  render_png("http://marvel.com/universe/Captain_America_(Steve_Rogers)")
+```
+
+```{r eval=TRUE, include=FALSE}
+splash("splash", 8050L) %>%
+  render_png("http://marvel.com/universe/Captain_America_(Steve_Rogers)") %>% 
+  image_write("img/cap.png")
+```
+
+![](img/cap.png)
+
+```{r eval=FALSE}
+splash("splash", 8050L) %>%
+  render_jpeg("http://marvel.com/universe/Captain_America_(Steve_Rogers)") 
+```
+
+```{r eval=TRUE, include=FALSE}
+splash("splash", 8050L) %>%
+  render_jpeg("http://marvel.com/universe/Captain_America_(Steve_Rogers)") %>% 
+  image_write("img/cap.jpg")
+```
+
+![](img/cap.jpg)
+
+### Test Results
+
+```{r message=FALSE, warning=FALSE, error=FALSE}
+library(splashr)
+library(testthat)
+
+date()
+
+test_dir("tests/")
+```
+
+```{r eval = FALSE, include = FALSE}
--- a/README.md
+++ b/README.md
@ -0,0 +1,134 @@
+
+`splashr` : Tools to Work with the 'Splash' JavaScript Rendering Service
+
+**Ridicuously basic functionality working at the moment. More coming soon**
+
+TL;DR: This package works with Splash rendering servers which are really just a REST API & `lua` scripting interface to a QT browser. It's an alternative to the Selenium ecosystem and does not do everything Selenium can, but if you're just trying to get a page back that needs javascript rendering, this is a nice alternative.
+
+You can also get it running with two commands:
+
+    sudo docker pull scrapinghub/splash
+    sudo docker run -p 5023:5023 -p 8050:8050 -p 8051:8051 scrapinghub/splash
+
+(Do whatever you Windows ppl do with Docker on your systems to make ^^ work.)
+
+All you need for this package to work is a running Splash instance. You provide the host/port for it and it's scrape-tastic from there.
+
+### About Splash
+
+> 'Splash' <https://github.com/scrapinghub/splash> is a javascript rendering service. It’s a lightweight web browser with an 'HTTP' API, implemented in Python using 'Twisted'and 'QT' and provides some of the core functionality of the 'RSelenium' or 'seleniumPipes'R pacakges but with a Java-free footprint. The (twisted) 'QT' reactor is used to make the sever fully asynchronous allowing to take advantage of 'webkit' concurrency via QT main loop. Some of Splash features include the ability to process multiple webpages in parallel; retrieving HTML results and/or take screenshots; disabling images or use Adblock Plus rules to make rendering faster; executing custom JavaScript in page context; getting detailed rendering info in HAR format.
+
+The following functions are implemented:
+
+-   `render_html`: Return the HTML of the javascript-rendered page.
+-   `render_jpeg`: Return a image (in JPEG format) of the javascript-rendered page.
+-   `render_png`: Return a image (in PNG format) of the javascript-rendered page.
+-   `splash`: Configure parameters for connecting to a Splash server
+-   `splashr`: Tools to Work with the 'Splash' JavaScript Rendering Service
+
+### Installation
+
+``` r
+devtools::install_github("hrbrmstr/splashr")
+```
+
+``` r
+options(width=120)
+```
+
+### Usage
+
+``` r
+library(splashr)
+library(magick)
+library(rvest)
+
+# current verison
+packageVersion("splashr")
+```
+
+    ## [1] '0.1.0'
+
+``` r
+splash("splash", 8050L) %>%
+  splash_active()
+```
+
+    ## Status of splash instance on [http://splash:8050]: ok. Max RSS: 349298688
+
+``` r
+splash("splash", 8050L) %>%
+  splash_debug()
+```
+
+    ## List of 7
+    ##  $ active  : list()
+    ##  $ argcache: int 0
+    ##  $ fds     : int 18
+    ##  $ leaks   :List of 4
+    ##   ..$ Deferred  : int 50
+    ##   ..$ LuaRuntime: int 1
+    ##   ..$ QTimer    : int 1
+    ##   ..$ Request   : int 1
+    ##  $ maxrss  : int 341112
+    ##  $ qsize   : int 0
+    ##  $ url     : chr "http://splash:8050"
+    ##  - attr(*, "class")= chr [1:2] "splash_debug" "list"
+    ## NULL
+
+Notice the difference between a rendered HTML scrape and a non-rendered one:
+
+``` r
+splash("splash", 8050L) %>%
+  render_html("http://marvel.com/universe/Captain_America_(Steve_Rogers)")
+```
+
+    ## {xml_document}
+    ## <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
+    ## [1] <head>\n<script src="http://widget-cdn.rpxnow.com/manifest/login?version=1.114.1_widgets_244" type="text/javascri ...
+    ## [2] <body id="index-index" class="index-index" onload="findLinks('myLink');">\n\n\t<div id="page_frame" style="overfl ...
+
+``` r
+read_html("http://marvel.com/universe/Captain_America_(Steve_Rogers)")
+```
+
+    ## {xml_document}
+    ## <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
+    ## [1] <head>\n<meta http-equiv="X-UA-Compatible" content="IE=Edge">\n<link href="https://plus.google.com/10852333737344 ...
+    ## [2] <body id="index-index" class="index-index" onload="findLinks('myLink');">\n\n\t<div id="page_frame" style="overfl ...
+
+Web page snapshots are easy-peasy too:
+
+``` r
+splash("splash", 8050L) %>%
+  render_png("http://marvel.com/universe/Captain_America_(Steve_Rogers)")
+```
+
+![](img/cap.png)
+
+``` r
+splash("splash", 8050L) %>%
+  render_jpeg("http://marvel.com/universe/Captain_America_(Steve_Rogers)") 
+```
+
+![](img/cap.jpg)
+
+### Test Results
+
+``` r
+library(splashr)
+library(testthat)
+
+date()
+```
+
+    ## [1] "Fri Feb  3 14:58:40 2017"
+
+``` r
+test_dir("tests/")
+```
+
+    ## testthat results ========================================================================================================
+    ## OK: 0 SKIPPED: 0 FAILED: 0
+    ## 
+    ## DONE ===================================================================================================================
--- a/img/cap.jpg
+++ b/img/cap.jpg
--- a/img/cap.png
+++ b/img/cap.png
--- a/man/render_html.Rd
+++ b/man/render_html.Rd
@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/render-html.r
+\name{render_html}
+\alias{render_html}
+\title{Return the HTML of the javascript-rendered page.}
+\usage{
+render_html(splash_obj, url, base_url, timeout = 30,
+  resource_timeout = NULL, wait = 0, proxy, js, js_src, filters,
+  allowed_domains = "", allowed_content_types = "",
+  forbidden_content_types = "", viewport = "1024x768", images, headers,
+  body, http_method, save_args, load_args)
+}
+\arguments{
+\item{splash_obj}{Object created by a call to \link{splash}}
+
+\item{url}{The URL to render (required)}
+
+\item{base_url}{TBD The base url to render the page with.}
+
+\item{timeout}{TBD A timeout (in seconds) for the render (defaults to 30).}
+
+\item{resource_timeout}{A timeout (in seconds) for individual network requests.}
+
+\item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).}
+
+\item{proxy}{TBD Proxy profile name or proxy URL.}
+
+\item{js}{TBD Javascript profile name.}
+
+\item{js_src}{TBD JavaScript code to be executed in page context.}
+
+\item{filters}{TBD Comma-separated list of request filter names.}
+
+\item{allowed_domains}{TBD Comma-separated list of allowed domain names. If present, Splash won’t load anything neither from domains not in this list nor from subdomains of domains not in this list.}
+
+\item{allowed_content_types}{TBD Comma-separated list of allowed content types. If present, Splash will abort any request if the response’s content type doesn’t match any of the content types in this list. Wildcards are supported.}
+
+\item{forbidden_content_types}{TBD Comma-separated list of forbidden content types. If present, Splash will abort any request if the response’s content type matches any of the content types in this list. Wildcards are supported.}
+
+\item{viewport}{View width and height (in pixels) of the browser viewport to render the web page. Format is “<width>x<height>”, e.g. 800x600. Default value is 1024x768.}
+
+\item{images}{TBD Whether to download images.}
+
+\item{headers}{TBD HTTP headers to set for the first outgoing request.}
+
+\item{body}{TBD Body of HTTP POST request to be sent if method is POST.}
+
+\item{http_method}{TBD HTTP method of outgoing Splash request.}
+
+\item{save_args}{TBD A list of argument names to put in cache.}
+
+\item{load_args}{TBD Parameter values to load from cache}
+}
+\description{
+Similar to \code{rvest::read_html}.
+}
--- a/man/render_jpeg.Rd
+++ b/man/render_jpeg.Rd
@ -0,0 +1,65 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/render-jpg.r
+\name{render_jpeg}
+\alias{render_jpeg}
+\title{Return a image (in JPEG format) of the javascript-rendered page.}
+\usage{
+render_jpeg(splash_obj, url, base_url = NULL, quality = 75, width = 1024,
+  height = 768, timeout = 30, resource_timeout = NULL, wait = 0,
+  render_all = FALSE, proxy, js, js_src, filters, allowed_domains = "",
+  allowed_content_types = "", forbidden_content_types = "",
+  viewport = "1024x768", images, headers, body, http_method, save_args,
+  load_args)
+}
+\arguments{
+\item{splash_obj}{Object created by a call to \link{splash}}
+
+\item{url}{The URL to render (required)}
+
+\item{base_url}{TBD The base url to render the page with.}
+
+\item{quality}{JPEG quality parameter in range from 0 to 100. Default is quality=75.}
+
+\item{width}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio.}
+
+\item{height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio.}
+
+\item{timeout}{TBD A timeout (in seconds) for the render (defaults to 30).}
+
+\item{resource_timeout}{A timeout (in seconds) for individual network requests.}
+
+\item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).}
+
+\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering. Default is \code{FASLE}}
+
+\item{proxy}{TBD Proxy profile name or proxy URL.}
+
+\item{js}{TBD Javascript profile name.}
+
+\item{js_src}{TBD JavaScript code to be executed in page context.}
+
+\item{filters}{TBD Comma-separated list of request filter names.}
+
+\item{allowed_domains}{TBD Comma-separated list of allowed domain names. If present, Splash won’t load anything neither from domains not in this list nor from subdomains of domains not in this list.}
+
+\item{allowed_content_types}{TBD Comma-separated list of allowed content types. If present, Splash will abort any request if the response’s content type doesn’t match any of the content types in this list. Wildcards are supported.}
+
+\item{forbidden_content_types}{TBD Comma-separated list of forbidden content types. If present, Splash will abort any request if the response’s content type matches any of the content types in this list. Wildcards are supported.}
+
+\item{viewport}{View width and height (in pixels) of the browser viewport to render the web page. Format is “<width>x<height>”, e.g. 800x600. Default value is 1024x768.}
+
+\item{images}{TBD Whether to download images.}
+
+\item{headers}{TBD HTTP headers to set for the first outgoing request.}
+
+\item{body}{TBD Body of HTTP POST request to be sent if method is POST.}
+
+\item{http_method}{TBD HTTP method of outgoing Splash request.}
+
+\item{save_args}{TBD A list of argument names to put in cache.}
+
+\item{load_args}{TBD Parameter values to load from cache}
+}
+\description{
+Return a image (in JPEG format) of the javascript-rendered page.
+}
--- a/man/render_png.Rd
+++ b/man/render_png.Rd
@ -0,0 +1,61 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/render-png.r
+\name{render_png}
+\alias{render_png}
+\title{Return a image (in PNG format) of the javascript-rendered page.}
+\usage{
+render_png(splash_obj, url, base_url = NULL, width = 1024, height = 768,
+  render_all = FALSE, timeout = 30, resource_timeout = NULL, wait = 0,
+  proxy, js, js_src, filters, allowed_domains = "",
+  allowed_content_types = "", forbidden_content_types = "",
+  viewport = "1024x768", images, headers, body, http_method, save_args,
+  load_args)
+}
+\arguments{
+\item{splash_obj}{Object created by a call to \link{splash}}
+
+\item{url}{The URL to render (required)}
+
+\item{base_url}{TBD The base url to render the page with.}
+
+\item{width, height}{Resize the rendered image to the given width/height (in pixels) keeping the aspect ratio.}
+
+\item{render_all}{If \code{TRUE} extend the viewport to include the whole webpage (possibly very tall) before rendering. Default is \code{FASLE}}
+
+\item{timeout}{TBD A timeout (in seconds) for the render (defaults to 30).}
+
+\item{resource_timeout}{A timeout (in seconds) for individual network requests.}
+
+\item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).}
+
+\item{proxy}{TBD Proxy profile name or proxy URL.}
+
+\item{js}{TBD Javascript profile name.}
+
+\item{js_src}{TBD JavaScript code to be executed in page context.}
+
+\item{filters}{TBD Comma-separated list of request filter names.}
+
+\item{allowed_domains}{TBD Comma-separated list of allowed domain names. If present, Splash won’t load anything neither from domains not in this list nor from subdomains of domains not in this list.}
+
+\item{allowed_content_types}{TBD Comma-separated list of allowed content types. If present, Splash will abort any request if the response’s content type doesn’t match any of the content types in this list. Wildcards are supported.}
+
+\item{forbidden_content_types}{TBD Comma-separated list of forbidden content types. If present, Splash will abort any request if the response’s content type matches any of the content types in this list. Wildcards are supported.}
+
+\item{viewport}{View width and height (in pixels) of the browser viewport to render the web page. Format is “<width>x<height>”, e.g. 800x600. Default value is 1024x768.}
+
+\item{images}{TBD Whether to download images.}
+
+\item{headers}{TBD HTTP headers to set for the first outgoing request.}
+
+\item{body}{TBD Body of HTTP POST request to be sent if method is POST.}
+
+\item{http_method}{TBD HTTP method of outgoing Splash request.}
+
+\item{save_args}{TBD A list of argument names to put in cache.}
+
+\item{load_args}{TBD Parameter values to load from cache}
+}
+\description{
+Return a image (in PNG format) of the javascript-rendered page.
+}
--- a/man/splash.Rd
+++ b/man/splash.Rd
@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splashr.r
+\name{splash}
+\alias{splash}
+\title{Configure parameters for connecting to a Splash server}
+\usage{
+splash(host, port = 8050L)
+}
+\arguments{
+\item{host}{host or IP address}
+
+\item{port}{port the server is running on (default is 8050)}
+}
+\description{
+Configure parameters for connecting to a Splash server
+}
--- a/man/splash_active.Rd
+++ b/man/splash_active.Rd
@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splashr.r
+\name{splash_active}
+\alias{splash_active}
+\alias{print.splash_status}
+\title{Test if a Splash server is up}
+\usage{
+splash_active(splash_obj)
+
+\method{print}{splash_status}(x, ...)
+}
+\arguments{
+\item{splash_obj}{A splash connection object}
+}
+\description{
+Test if a Splash server is up
+}
+\keyword{internal}
--- a/man/splash_debug.Rd
+++ b/man/splash_debug.Rd
@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splashr.r
+\name{splash_debug}
+\alias{splash_debug}
+\alias{print.splash_debug}
+\title{Retrieve debug-level info for a Splash server}
+\usage{
+splash_debug(splash_obj)
+
+\method{print}{splash_debug}(x, ...)
+}
+\arguments{
+\item{splash_obj}{A splash connection object}
+}
+\description{
+Retrieve debug-level info for a Splash server
+}
+\keyword{internal}
--- a/man/splashr-exports.Rd
+++ b/man/splashr-exports.Rd
@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splashr-package.R
+\name{splashr-exports}
+\alias{splashr-exports}
+\alias{\%>\%}
+\title{splashr exported operators}
+\description{
+The following functions are imported and then re-exported
+from the splashr package to enable use of the magrittr
+pipe operator with no additional library calls
+}
--- a/man/splashr.Rd
+++ b/man/splashr.Rd
@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/splashr-package.R
+\docType{package}
+\name{splashr}
+\alias{splashr}
+\alias{splashr-package}
+\title{Tools to Work with the 'Splash' JavaScript Rendering Service}
+\description{
+'Splash' <https://github.com/scrapinghub/splash> is a javascript rendering service.
+It’s a lightweight web browser with an 'HTTP' API, implemented in Python using
+'Twisted'and 'QT' and provides some of the core functionality of the 'RSelenium' or
+'seleniumPipes'R pacakges but with a Java-free footprint. The (twisted) 'QT' reactor is
+used to make the sever fully asynchronous allowing to take advantage of 'webkit'
+concurrency via QT main loop. Some of Splash features include the ability to process
+multiple webpages in parallel; retrieving HTML results and/or take screenshots;
+disabling images or use Adblock Plus rules to make rendering faster; executing custom
+JavaScript in page context; getting detailed rendering info in HAR format.
+}
+\author{
+Bob Rudis (bob@rud.is)
+}
--- a/splashr.Rproj
+++ b/splashr.Rproj
@ -0,0 +1,21 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageBuildArgs: --resave-data
+PackageRoxygenize: rd,collate,namespace
--- a/tests/test-all.R
+++ b/tests/test-all.R
@ -0,0 +1,2 @@
+library(testthat)
+test_check("splashr")
--- a/tests/testthat/test-splash.R
+++ b/tests/testthat/test-splash.R
@ -0,0 +1,6 @@
+context("basic functionality")
+test_that("we can do something", {
+
+  #expect_that(some_function(), is_a("data.frame"))
+
+})