Browse Source

integration with HARtools

master
boB Rudis 7 years ago
parent
commit
90e4dccecf
  1. 3
      DESCRIPTION
  2. 9
      NAMESPACE
  3. 39
      R/render-har.r
  4. 21
      R/splashr-package.R
  5. 32
      README.Rmd
  6. 65
      README.md
  7. BIN
      img/cap.jpg
  8. BIN
      img/cap.png
  9. 6
      man/render_har.Rd
  10. 4
      man/splashr-exports.Rd

3
DESCRIPTION

@ -27,5 +27,6 @@ Imports:
httr,
xml2,
jsonlite,
magick
magick,
HARtools
RoxygenNote: 6.0.0

9
NAMESPACE

@ -1,10 +1,12 @@
# Generated by roxygen2: do not edit by hand
S3method(print,splash_debug)
S3method(print,splash_har)
S3method(print,splash_json)
S3method(print,splash_status)
export("%>%")
export(HARviewer)
export(HARviewerOutput)
export(renderHARviewer)
export(render_har)
export(render_html)
export(render_jpeg)
@ -13,9 +15,14 @@ export(render_png)
export(splash)
export(splash_active)
export(splash_debug)
export(writeHAR)
import(httr)
import(magick)
import(purrr)
importFrom(HARtools,HARviewer)
importFrom(HARtools,HARviewerOutput)
importFrom(HARtools,renderHARviewer)
importFrom(HARtools,writeHAR)
importFrom(jsonlite,fromJSON)
importFrom(xml2,read_html)
importFrom(xml2,url_parse)

39
R/render-har.r

@ -6,9 +6,7 @@
#' @md
#' @param response_body When `TRUE`, response content is included in the HAR records
#' @inheritParams render_html
#' @return a huge `list`
#' @note a custom `print` method is defined to stop your console from being
#' overwhelmed with data. Use [str] to inspect various portions of the result.
#' @return a [HARtools::har] object
#' @references [Splash docs](http://splash.readthedocs.io/en/stable/index.html)
#' @export
render_har <- function(splash_obj, url, base_url, response_body=FALSE, timeout=30, resource_timeout, wait=0,
@ -40,16 +38,33 @@ render_har <- function(splash_obj, url, base_url, response_body=FALSE, timeout=3
httr::stop_for_status(res)
out <- httr::content(res, as="text", encoding="UTF-8")
out <- jsonlite::fromJSON(out)
spl <- jsonlite::fromJSON(out, flatten=FALSE, simplifyVector=FALSE)
class(out) <- c("splash_har", class(out))
sphar <- list(
log=list(
version=spl$log$version,
creator=spl$log$creator,
browser=spl$log$browser,
pages=spl$log$pages,
entries=spl$log$entries
)
)
out
class(sphar$log$creator) <- c("harcreator", "list")
class(sphar$log$version) <- c("harversion", "character")
class(sphar$log$browser) <- c("harbrowser", "list")
class(sphar$log$pages) <- c("harpages", "list")
class(sphar$log$entries) <- c("harentries", "list")
class(sphar$log) <- c("harlog", "list")
class(sphar) <- c("har", "list")
}
for (i in 1:length(sphar$log$pages)) class(sphar$log$pages[[i]]) <- c("harpage", "list")
for (i in 1:length(sphar$log$entries)) {
class(sphar$log$entries[[i]]) <- c("harentry", "list")
class(sphar$log$entries[[i]]$request) <- c("harrequest", "list")
class(sphar$log$entries[[i]]$response) <- c("harresponse", "list")
}
#' @export
print.splash_har <- function(x, ...) {
cat("<splashr render_har() object>")
invisible(x)
}
sphar
}

21
R/splashr-package.R

@ -15,6 +15,7 @@
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import purrr httr magick
#' @importFrom HARtools writeHAR HARviewer renderHARviewer HARviewerOutput
#' @importFrom xml2 read_html url_parse
#' @importFrom jsonlite fromJSON
NULL
@ -32,3 +33,23 @@ NULL
#' @export
#' @rdname splashr-exports
NULL
#' @name writeHAR
#' @export
#' @rdname splashr-exports
NULL
#' @name HARviewer
#' @export
#' @rdname splashr-exports
NULL
#' @name renderHARviewer
#' @export
#' @rdname splashr-exports
NULL
#' @name HARviewerOutput
#' @export
#' @rdname splashr-exports
NULL

32
README.Rmd

@ -33,13 +33,15 @@ The following functions are implemented:
- `render_png`: Return a image (in PNG format) of the javascript-rendered page.
- `splash`: Configure parameters for connecting to a Splash server
Some functions from `HARtools` are imported/exported and `%>%` is imported/exported.
### TODO
Suggest more in a feature req!
- <strike>Implement `render.json`</strike>
- Implement `execute` (you can script Splash!)
- Add interation with [`HARtools`](https://github.com/johndharrison/HARtools)
- (more than ¾ done) Add integration with [`HARtools`](https://github.com/johndharrison/HARtools)
- _Possibly_ writing R function wrappers to start Splash which would also support enabling javascript profiles, request filters and proxy profiles from with R directly, possibly using [`harbor`](https://github.com/wch/harbor)
- Testing results with all combinations of parameters
@ -84,34 +86,16 @@ read_html("http://marvel.com/universe/Captain_America_(Steve_Rogers)")
You can also profile pages:
```{r fig.width=11, fig.height=6.5, fig.retina=2}
```{r}
splash("splash", 8050L) %>%
render_har("http://www.poynter.org/") -> har
data_frame(
start=anytime::anytime(har$log$entries$startedDateTime),
end=(start + lubridate::milliseconds(har$log$entries$time)),
rsrc=sprintf("%02d: %s...", 1:length(start), substr(har$log$entries$request$url, 1, 30))) %>%
mutate(rsrc=factor(rsrc, levels=rev(rsrc))) %>%
bind_cols(xml2::url_parse(har$log$entries$request$url) %>% select(server)) -> df
total_time <- diff(range(c(df$start, df$end)))
total_time <- sprintf("Total time: %s %s",
format(unclass(total_time), digits = getOption("digits")),
attr(total_time, "units"))
ggplot(df) +
geom_segment(data=df, aes(x=start, xend=end, y=rsrc, yend=rsrc, color=server),
size=0.25) +
scale_x_datetime(expand=c(0,0)) +
labs(x=total_time, y=NULL,
title=sprintf("HAR Waterfalll Profile for [%s]", "http://www.poynter.org/")) +
theme_hrbrmstr_msc(grid="") +
theme(legend.position="none") +
theme(panel.background=element_rect(color="#2b2b2b", fill="#2b2b2b"))
print(har)
```
And, web page snapshots are easy-peasy too:
You can use [`HARtools::HARviewer`](https://github.com/johndharrison/HARtools/blob/master/R/HARviewer.R) — which this pkg import/exports — to get view the HAR in an interactive HTML widget.
Full web page snapshots are easy-peasy too:
```{r eval=FALSE}
splash("splash", 8050L) %>%

65
README.md

@ -30,13 +30,15 @@ The following functions are implemented:
- `render_png`: Return a image (in PNG format) of the javascript-rendered page.
- `splash`: Configure parameters for connecting to a Splash server
Some functions from `HARtools` are imported/exported and `%>%` is imported/exported.
### TODO
Suggest more in a feature req!
- <strike>Implement `render.json`</strike>
- Implement `execute` (you can script Splash!)
- Add interation with [`HARtools`](https://github.com/johndharrison/HARtools)
- (more than ¾ done) Add integration with [`HARtools`](https://github.com/johndharrison/HARtools)
- *Possibly* writing R function wrappers to start Splash which would also support enabling javascript profiles, request filters and proxy profiles from with R directly, possibly using [`harbor`](https://github.com/wch/harbor)
- Testing results with all combinations of parameters
@ -71,7 +73,7 @@ splash("splash", 8050L) %>%
splash_active()
```
## Status of splash instance on [http://splash:8050]: ok. Max RSS: 356077568
## Status of splash instance on [http://splash:8050]: ok. Max RSS: 378863616
``` r
splash("splash", 8050L) %>%
@ -87,7 +89,7 @@ splash("splash", 8050L) %>%
## ..$ LuaRuntime: int 1
## ..$ QTimer : int 1
## ..$ Request : int 1
## $ maxrss : int 347732
## $ maxrss : int 369984
## $ qsize : int 0
## $ url : chr "http://splash:8050"
## - attr(*, "class")= chr [1:2] "splash_debug" "list"
@ -120,32 +122,39 @@ You can also profile pages:
splash("splash", 8050L) %>%
render_har("http://www.poynter.org/") -> har
data_frame(
start=anytime::anytime(har$log$entries$startedDateTime),
end=(start + lubridate::milliseconds(har$log$entries$time)),
rsrc=sprintf("%02d: %s...", 1:length(start), substr(har$log$entries$request$url, 1, 30))) %>%
mutate(rsrc=factor(rsrc, levels=rev(rsrc))) %>%
bind_cols(xml2::url_parse(har$log$entries$request$url) %>% select(server)) -> df
total_time <- diff(range(c(df$start, df$end)))
total_time <- sprintf("Total time: %s %s",
format(unclass(total_time), digits = getOption("digits")),
attr(total_time, "units"))
ggplot(df) +
geom_segment(data=df, aes(x=start, xend=end, y=rsrc, yend=rsrc, color=server),
size=0.25) +
scale_x_datetime(expand=c(0,0)) +
labs(x=total_time, y=NULL,
title=sprintf("HAR Waterfalll Profile for [%s]", "http://www.poynter.org/")) +
theme_hrbrmstr_msc(grid="") +
theme(legend.position="none") +
theme(panel.background=element_rect(color="#2b2b2b", fill="#2b2b2b"))
print(har)
```
<img src="README_files/figure-markdown_github/unnamed-chunk-5-1.png" width="1056" />
And, web page snapshots are easy-peasy too:
## --------HAR VERSION--------
## HAR specification version: 1.2
## --------HAR CREATOR--------
## Created by: Splash
## version: 2.3.1
## --------HAR BROWSER--------
## Browser: QWebKit
## version: 538.1
## --------HAR PAGES--------
## Page id: 1 , Page title: Poynter – A global leader in journalism. Strengthening democracy.
## --------HAR ENTRIES--------
## Number of entries: 53
## REQUESTS:
## Page: 1
## Number of entries: 53
## - http://www.poynter.org/
## - http://www.poynter.org/wp-content/plugins/easy-author-image/css/easy-author-image.css?ver=2016_06_24.1
## - http://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css?ver=2016_06_24.1
## - http://cloud.webtype.com/css/162ac332-3b31-4b73-ad44-da375b7f2fe3.css?ver=2016_06_24.1
## - http://maxcdn.bootstrapcdn.com/font-awesome/4.4.0/css/font-awesome.min.css?ver=2016_06_24.1
## ........
## - https://securepubads.g.doubleclick.net/pcs/view?xai=AKAOjsuSInOKwkGjVqYf-u6Jqs7AwKtu3Vg5I9uRWRiJqvMeKGnaWdBEAwcEZ...
## - https://tpc.googlesyndication.com/simgad/541736962153447056
## - https://securepubads.g.doubleclick.net/pcs/view?xai=AKAOjstnQKWZmAgTQWYXtIE1Ek_1hT6hsl9Tds_vXV_ZPQiykcV-y-ZQsPZEm...
## - https://tpc.googlesyndication.com/simgad/7689404493724466164
## - https://securepubads.g.doubleclick.net/pcs/view?xai=AKAOjsvTViMv7cHz-iL9UpuT02AFVcCFa25_XM_MAi6707YHGZI7ggxbXoXNd...
You can use [`HARtools::HARviewer`](https://github.com/johndharrison/HARtools/blob/master/R/HARviewer.R) — which this pkg import/exports — to get view the HAR in an interactive HTML widget.
Full web page snapshots are easy-peasy too:
``` r
splash("splash", 8050L) %>%
@ -170,7 +179,7 @@ library(testthat)
date()
```
## [1] "Sat Feb 4 13:25:07 2017"
## [1] "Sat Feb 4 14:20:57 2017"
``` r
test_dir("tests/")

BIN
img/cap.jpg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 125 KiB

After

Width:  |  Height:  |  Size: 123 KiB

BIN
img/cap.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 447 KiB

After

Width:  |  Height:  |  Size: 433 KiB

6
man/render_har.Rd

@ -53,16 +53,12 @@ render_har(splash_obj, url, base_url, response_body = FALSE, timeout = 30,
\item{load_args}{Parameter values to load from cache}
}
\value{
a huge \code{list}
a \link[HARtools:har]{HARtools::har} object
}
\description{
It includes information about requests made, responses received, timings, headers, etc and
is incredibly detailed, full of information on every componenent loaded.
}
\note{
a custom \code{print} method is defined to stop your console from being
overwhelmed with data. Use \link{str} to inspect various portions of the result.
}
\references{
\href{http://splash.readthedocs.io/en/stable/index.html}{Splash docs}
}

4
man/splashr-exports.Rd

@ -3,6 +3,10 @@
\name{splashr-exports}
\alias{splashr-exports}
\alias{\%>\%}
\alias{writeHAR}
\alias{HARviewer}
\alias{renderHARviewer}
\alias{HARviewerOutput}
\title{splashr exported operators}
\description{
The following functions are imported and then re-exported

Loading…
Cancel
Save