diff --git a/DESCRIPTION b/DESCRIPTION index fb20ec8..f516e54 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,5 +27,6 @@ Imports: httr, xml2, jsonlite, - magick + magick, + HARtools RoxygenNote: 6.0.0 diff --git a/NAMESPACE b/NAMESPACE index 4110b3f..85e2d73 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,12 @@ # Generated by roxygen2: do not edit by hand S3method(print,splash_debug) -S3method(print,splash_har) S3method(print,splash_json) S3method(print,splash_status) export("%>%") +export(HARviewer) +export(HARviewerOutput) +export(renderHARviewer) export(render_har) export(render_html) export(render_jpeg) @@ -13,9 +15,14 @@ export(render_png) export(splash) export(splash_active) export(splash_debug) +export(writeHAR) import(httr) import(magick) import(purrr) +importFrom(HARtools,HARviewer) +importFrom(HARtools,HARviewerOutput) +importFrom(HARtools,renderHARviewer) +importFrom(HARtools,writeHAR) importFrom(jsonlite,fromJSON) importFrom(xml2,read_html) importFrom(xml2,url_parse) diff --git a/R/render-har.r b/R/render-har.r index 6a6efd8..a573235 100644 --- a/R/render-har.r +++ b/R/render-har.r @@ -6,9 +6,7 @@ #' @md #' @param response_body When `TRUE`, response content is included in the HAR records #' @inheritParams render_html -#' @return a huge `list` -#' @note a custom `print` method is defined to stop your console from being -#' overwhelmed with data. Use [str] to inspect various portions of the result. +#' @return a [HARtools::har] object #' @references [Splash docs](http://splash.readthedocs.io/en/stable/index.html) #' @export render_har <- function(splash_obj, url, base_url, response_body=FALSE, timeout=30, resource_timeout, wait=0, @@ -40,16 +38,33 @@ render_har <- function(splash_obj, url, base_url, response_body=FALSE, timeout=3 httr::stop_for_status(res) out <- httr::content(res, as="text", encoding="UTF-8") - out <- jsonlite::fromJSON(out) + spl <- jsonlite::fromJSON(out, flatten=FALSE, simplifyVector=FALSE) - class(out) <- c("splash_har", class(out)) + sphar <- list( + log=list( + version=spl$log$version, + creator=spl$log$creator, + browser=spl$log$browser, + pages=spl$log$pages, + entries=spl$log$entries + ) + ) - out + class(sphar$log$creator) <- c("harcreator", "list") + class(sphar$log$version) <- c("harversion", "character") + class(sphar$log$browser) <- c("harbrowser", "list") + class(sphar$log$pages) <- c("harpages", "list") + class(sphar$log$entries) <- c("harentries", "list") + class(sphar$log) <- c("harlog", "list") + class(sphar) <- c("har", "list") -} + for (i in 1:length(sphar$log$pages)) class(sphar$log$pages[[i]]) <- c("harpage", "list") + for (i in 1:length(sphar$log$entries)) { + class(sphar$log$entries[[i]]) <- c("harentry", "list") + class(sphar$log$entries[[i]]$request) <- c("harrequest", "list") + class(sphar$log$entries[[i]]$response) <- c("harresponse", "list") + } -#' @export -print.splash_har <- function(x, ...) { - cat("") - invisible(x) -} \ No newline at end of file + sphar + +} diff --git a/R/splashr-package.R b/R/splashr-package.R index b2b9cbd..b372784 100644 --- a/R/splashr-package.R +++ b/R/splashr-package.R @@ -15,6 +15,7 @@ #' @docType package #' @author Bob Rudis (bob@@rud.is) #' @import purrr httr magick +#' @importFrom HARtools writeHAR HARviewer renderHARviewer HARviewerOutput #' @importFrom xml2 read_html url_parse #' @importFrom jsonlite fromJSON NULL @@ -32,3 +33,23 @@ NULL #' @export #' @rdname splashr-exports NULL + +#' @name writeHAR +#' @export +#' @rdname splashr-exports +NULL + +#' @name HARviewer +#' @export +#' @rdname splashr-exports +NULL + +#' @name renderHARviewer +#' @export +#' @rdname splashr-exports +NULL + +#' @name HARviewerOutput +#' @export +#' @rdname splashr-exports +NULL diff --git a/README.Rmd b/README.Rmd index b122402..4924135 100644 --- a/README.Rmd +++ b/README.Rmd @@ -33,13 +33,15 @@ The following functions are implemented: - `render_png`: Return a image (in PNG format) of the javascript-rendered page. - `splash`: Configure parameters for connecting to a Splash server +Some functions from `HARtools` are imported/exported and `%>%` is imported/exported. + ### TODO Suggest more in a feature req! - Implement `render.json` - Implement `execute` (you can script Splash!) -- Add interation with [`HARtools`](https://github.com/johndharrison/HARtools) +- (more than ¾ done) Add integration with [`HARtools`](https://github.com/johndharrison/HARtools) - _Possibly_ writing R function wrappers to start Splash which would also support enabling javascript profiles, request filters and proxy profiles from with R directly, possibly using [`harbor`](https://github.com/wch/harbor) - Testing results with all combinations of parameters @@ -84,34 +86,16 @@ read_html("http://marvel.com/universe/Captain_America_(Steve_Rogers)") You can also profile pages: -```{r fig.width=11, fig.height=6.5, fig.retina=2} +```{r} splash("splash", 8050L) %>% render_har("http://www.poynter.org/") -> har -data_frame( - start=anytime::anytime(har$log$entries$startedDateTime), - end=(start + lubridate::milliseconds(har$log$entries$time)), - rsrc=sprintf("%02d: %s...", 1:length(start), substr(har$log$entries$request$url, 1, 30))) %>% - mutate(rsrc=factor(rsrc, levels=rev(rsrc))) %>% - bind_cols(xml2::url_parse(har$log$entries$request$url) %>% select(server)) -> df - -total_time <- diff(range(c(df$start, df$end))) -total_time <- sprintf("Total time: %s %s", - format(unclass(total_time), digits = getOption("digits")), - attr(total_time, "units")) - -ggplot(df) + - geom_segment(data=df, aes(x=start, xend=end, y=rsrc, yend=rsrc, color=server), - size=0.25) + - scale_x_datetime(expand=c(0,0)) + - labs(x=total_time, y=NULL, - title=sprintf("HAR Waterfalll Profile for [%s]", "http://www.poynter.org/")) + - theme_hrbrmstr_msc(grid="") + - theme(legend.position="none") + - theme(panel.background=element_rect(color="#2b2b2b", fill="#2b2b2b")) +print(har) ``` -And, web page snapshots are easy-peasy too: +You can use [`HARtools::HARviewer`](https://github.com/johndharrison/HARtools/blob/master/R/HARviewer.R) — which this pkg import/exports — to get view the HAR in an interactive HTML widget. + +Full web page snapshots are easy-peasy too: ```{r eval=FALSE} splash("splash", 8050L) %>% diff --git a/README.md b/README.md index cd1b96c..71d1a2d 100644 --- a/README.md +++ b/README.md @@ -30,13 +30,15 @@ The following functions are implemented: - `render_png`: Return a image (in PNG format) of the javascript-rendered page. - `splash`: Configure parameters for connecting to a Splash server +Some functions from `HARtools` are imported/exported and `%>%` is imported/exported. + ### TODO Suggest more in a feature req! - Implement `render.json` - Implement `execute` (you can script Splash!) -- Add interation with [`HARtools`](https://github.com/johndharrison/HARtools) +- (more than ¾ done) Add integration with [`HARtools`](https://github.com/johndharrison/HARtools) - *Possibly* writing R function wrappers to start Splash which would also support enabling javascript profiles, request filters and proxy profiles from with R directly, possibly using [`harbor`](https://github.com/wch/harbor) - Testing results with all combinations of parameters @@ -71,7 +73,7 @@ splash("splash", 8050L) %>% splash_active() ``` - ## Status of splash instance on [http://splash:8050]: ok. Max RSS: 356077568 + ## Status of splash instance on [http://splash:8050]: ok. Max RSS: 378863616 ``` r splash("splash", 8050L) %>% @@ -87,7 +89,7 @@ splash("splash", 8050L) %>% ## ..$ LuaRuntime: int 1 ## ..$ QTimer : int 1 ## ..$ Request : int 1 - ## $ maxrss : int 347732 + ## $ maxrss : int 369984 ## $ qsize : int 0 ## $ url : chr "http://splash:8050" ## - attr(*, "class")= chr [1:2] "splash_debug" "list" @@ -120,32 +122,39 @@ You can also profile pages: splash("splash", 8050L) %>% render_har("http://www.poynter.org/") -> har -data_frame( - start=anytime::anytime(har$log$entries$startedDateTime), - end=(start + lubridate::milliseconds(har$log$entries$time)), - rsrc=sprintf("%02d: %s...", 1:length(start), substr(har$log$entries$request$url, 1, 30))) %>% - mutate(rsrc=factor(rsrc, levels=rev(rsrc))) %>% - bind_cols(xml2::url_parse(har$log$entries$request$url) %>% select(server)) -> df - -total_time <- diff(range(c(df$start, df$end))) -total_time <- sprintf("Total time: %s %s", - format(unclass(total_time), digits = getOption("digits")), - attr(total_time, "units")) - -ggplot(df) + - geom_segment(data=df, aes(x=start, xend=end, y=rsrc, yend=rsrc, color=server), - size=0.25) + - scale_x_datetime(expand=c(0,0)) + - labs(x=total_time, y=NULL, - title=sprintf("HAR Waterfalll Profile for [%s]", "http://www.poynter.org/")) + - theme_hrbrmstr_msc(grid="") + - theme(legend.position="none") + - theme(panel.background=element_rect(color="#2b2b2b", fill="#2b2b2b")) +print(har) ``` - - -And, web page snapshots are easy-peasy too: + ## --------HAR VERSION-------- + ## HAR specification version: 1.2 + ## --------HAR CREATOR-------- + ## Created by: Splash + ## version: 2.3.1 + ## --------HAR BROWSER-------- + ## Browser: QWebKit + ## version: 538.1 + ## --------HAR PAGES-------- + ## Page id: 1 , Page title: Poynter – A global leader in journalism. Strengthening democracy. + ## --------HAR ENTRIES-------- + ## Number of entries: 53 + ## REQUESTS: + ## Page: 1 + ## Number of entries: 53 + ## - http://www.poynter.org/ + ## - http://www.poynter.org/wp-content/plugins/easy-author-image/css/easy-author-image.css?ver=2016_06_24.1 + ## - http://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css?ver=2016_06_24.1 + ## - http://cloud.webtype.com/css/162ac332-3b31-4b73-ad44-da375b7f2fe3.css?ver=2016_06_24.1 + ## - http://maxcdn.bootstrapcdn.com/font-awesome/4.4.0/css/font-awesome.min.css?ver=2016_06_24.1 + ## ........ + ## - https://securepubads.g.doubleclick.net/pcs/view?xai=AKAOjsuSInOKwkGjVqYf-u6Jqs7AwKtu3Vg5I9uRWRiJqvMeKGnaWdBEAwcEZ... + ## - https://tpc.googlesyndication.com/simgad/541736962153447056 + ## - https://securepubads.g.doubleclick.net/pcs/view?xai=AKAOjstnQKWZmAgTQWYXtIE1Ek_1hT6hsl9Tds_vXV_ZPQiykcV-y-ZQsPZEm... + ## - https://tpc.googlesyndication.com/simgad/7689404493724466164 + ## - https://securepubads.g.doubleclick.net/pcs/view?xai=AKAOjsvTViMv7cHz-iL9UpuT02AFVcCFa25_XM_MAi6707YHGZI7ggxbXoXNd... + +You can use [`HARtools::HARviewer`](https://github.com/johndharrison/HARtools/blob/master/R/HARviewer.R) — which this pkg import/exports — to get view the HAR in an interactive HTML widget. + +Full web page snapshots are easy-peasy too: ``` r splash("splash", 8050L) %>% @@ -170,7 +179,7 @@ library(testthat) date() ``` - ## [1] "Sat Feb 4 13:25:07 2017" + ## [1] "Sat Feb 4 14:20:57 2017" ``` r test_dir("tests/") diff --git a/img/cap.jpg b/img/cap.jpg index aa27037..616464a 100644 Binary files a/img/cap.jpg and b/img/cap.jpg differ diff --git a/img/cap.png b/img/cap.png index 8c6934a..64ebc10 100644 Binary files a/img/cap.png and b/img/cap.png differ diff --git a/man/render_har.Rd b/man/render_har.Rd index ac01f83..0b88f35 100644 --- a/man/render_har.Rd +++ b/man/render_har.Rd @@ -53,16 +53,12 @@ render_har(splash_obj, url, base_url, response_body = FALSE, timeout = 30, \item{load_args}{Parameter values to load from cache} } \value{ -a huge \code{list} +a \link[HARtools:har]{HARtools::har} object } \description{ It includes information about requests made, responses received, timings, headers, etc and is incredibly detailed, full of information on every componenent loaded. } -\note{ -a custom \code{print} method is defined to stop your console from being -overwhelmed with data. Use \link{str} to inspect various portions of the result. -} \references{ \href{http://splash.readthedocs.io/en/stable/index.html}{Splash docs} } diff --git a/man/splashr-exports.Rd b/man/splashr-exports.Rd index fa73853..c9496af 100644 --- a/man/splashr-exports.Rd +++ b/man/splashr-exports.Rd @@ -3,6 +3,10 @@ \name{splashr-exports} \alias{splashr-exports} \alias{\%>\%} +\alias{writeHAR} +\alias{HARviewer} +\alias{renderHARviewer} +\alias{HARviewerOutput} \title{splashr exported operators} \description{ The following functions are imported and then re-exported