boB Rudis преди 9 месеца
родител
ревизия
55adc4feb3
No known key found for this signature in database
променени са 48 файла, в които са добавени 454 реда и са изтрити 194 реда
  1. 2
    0
      .Rbuildignore
  2. 2
    0
      .gitignore
  3. 3
    0
      NAMESPACE
  4. 8
    6
      R/docker-splash.r
  5. 60
    2
      R/helpers.r
  6. 22
    12
      cran-comments.md
  7. 1
    1
      man/as_har.Rd
  8. 1
    1
      man/as_httr_req.Rd
  9. 1
    1
      man/as_response.Rd
  10. 1
    1
      man/execute_lua.Rd
  11. 3
    1
      man/get_content_size.Rd
  12. 3
    1
      man/get_content_type.Rd
  13. 3
    1
      man/get_har_entry.Rd
  14. 32
    0
      man/get_header_val.Rd
  15. 29
    0
      man/get_headers.Rd
  16. 3
    1
      man/get_request_type.Rd
  17. 3
    1
      man/get_request_url.Rd
  18. 3
    1
      man/get_response_body.Rd
  19. 25
    0
      man/get_response_url.Rd
  20. 1
    1
      man/har_entries.Rd
  21. 4
    2
      man/har_entry_count.Rd
  22. 1
    1
      man/json_fromb64.Rd
  23. 1
    1
      man/render_har.Rd
  24. 1
    1
      man/render_html.Rd
  25. 1
    1
      man/render_jpeg.Rd
  26. 1
    1
      man/render_json.Rd
  27. 1
    1
      man/render_png.Rd
  28. 1
    1
      man/splash_add_lua.Rd
  29. 1
    1
      man/splash_click.Rd
  30. 1
    1
      man/splash_enable_javascript.Rd
  31. 1
    1
      man/splash_focus.Rd
  32. 1
    1
      man/splash_go.Rd
  33. 1
    1
      man/splash_har.Rd
  34. 1
    1
      man/splash_har_reset.Rd
  35. 1
    1
      man/splash_html.Rd
  36. 1
    1
      man/splash_images.Rd
  37. 1
    1
      man/splash_plugins.Rd
  38. 1
    1
      man/splash_png.Rd
  39. 1
    1
      man/splash_press.Rd
  40. 1
    1
      man/splash_private_mode.Rd
  41. 1
    1
      man/splash_release.Rd
  42. 1
    1
      man/splash_response_body.Rd
  43. 1
    1
      man/splash_send_keys.Rd
  44. 1
    1
      man/splash_send_text.Rd
  45. 1
    1
      man/splash_user_agent.Rd
  46. 1
    1
      man/splash_wait.Rd
  47. Двоични данни
      vignettes/figures/splashr04.png
  48. 219
    136
      vignettes/splashr_helpers.Rmd

+ 2
- 0
.Rbuildignore Целия файл

@@ -1,3 +1,5 @@
1
+^Meta$
2
+^doc$
1 3
 ^LICENSE\.md$
2 4
 ^.*\.Rproj$
3 5
 ^\.Rproj\.user$

+ 2
- 0
.gitignore Целия файл

@@ -1,3 +1,5 @@
1
+Meta
2
+doc
1 3
 .Rproj.user
2 4
 .Rhistory
3 5
 .RData

+ 3
- 0
NAMESPACE Целия файл

@@ -20,10 +20,13 @@ export(get_body_size)
20 20
 export(get_content_size)
21 21
 export(get_content_type)
22 22
 export(get_har_entry)
23
+export(get_header_val)
24
+export(get_headers)
23 25
 export(get_headers_size)
24 26
 export(get_request_type)
25 27
 export(get_request_url)
26 28
 export(get_response_body)
29
+export(get_response_url)
27 30
 export(har_entries)
28 31
 export(har_entry_count)
29 32
 export(install_splash)

+ 8
- 6
R/docker-splash.r Целия файл

@@ -120,12 +120,14 @@ killall_splash <- function() {
120 120
   x <- docker$container$list(all=TRUE)
121 121
 
122 122
   for (i in 1:nrow(x)) {
123
-    if (grepl("bin/splash", x$command[i])) {
124
-      message(sprintf("Pruning: %s...", x$id[i]))
125
-      if (x$state[i] == "running") {
126
-        cntnr <- docker$container$get(x$id[i])
127
-        cntnr$stop()
128
-        cntnr$remove()
123
+    if (length(x$command[i])) {
124
+      if (grepl("bin/splash", x$command[i])) {
125
+        message(sprintf("Pruning: %s...", x$id[i]))
126
+        if (x$state[i] == "running") {
127
+          cntnr <- docker$container$get(x$id[i])
128
+          cntnr$stop()
129
+          cntnr$remove()
130
+        }
129 131
       }
130 132
     }
131 133
   }

+ 60
- 2
R/helpers.r Целия файл

@@ -33,7 +33,9 @@ get_content_type <- function(har_resp_obj) {
33 33
 #' @param type content type to compare to (default: "`application/json`")
34 34
 #' @export
35 35
 is_content_type <- function(har_resp_obj, type="application/json") {
36
-  get_content_type(har_resp_obj) == type
36
+  res <- get_content_type(har_resp_obj) == type
37
+  if (is.na(res)) res <- FALSE
38
+  res
37 39
 }
38 40
 
39 41
 #' @rdname get_content_type
@@ -101,6 +103,51 @@ is_xhr <- function(har_resp_obj) {
101 103
 
102 104
 }
103 105
 
106
+#' Retrieve response headers as a data frame
107
+#'
108
+#' @md
109
+#' @param har_resp_obj HAR response object
110
+#' @note the `name` column that contains the header key is normalized to lower case
111
+#' @family splash_har_helpers
112
+#' @export
113
+get_headers <- function(har_resp_obj) {
114
+  if (length(har_resp_obj$response$headers)) {
115
+    do.call(
116
+      rbind.data.frame,
117
+      lapply(har_resp_obj$response$headers, as.data.frame, stringsAsFactors=FALSE)
118
+    ) -> ret
119
+    ret[["name"]] <- tolower(ret[["name"]])
120
+    class(ret) <- c("tbl_df", "tbl", "data.frame")
121
+    ret
122
+  }
123
+}
124
+
125
+#' Retrieve the value of a specific response header
126
+#'
127
+#' @md
128
+#' @param har_resp_obj HAR response object
129
+#' @param header the header you want the value for
130
+#' @note the `name` column that contains the header key is normalized to lower case
131
+#'        as is the passed-in requested header. Also, if there is more than one only
132
+#'        the first is returned.
133
+#' @family splash_har_helpers
134
+#' @export
135
+get_header_val <- function(har_resp_obj, header) {
136
+  if (length(har_resp_obj$response$headers)) {
137
+    header <- tolower(header)
138
+    do.call(
139
+      rbind.data.frame,
140
+      lapply(har_resp_obj$response$headers, as.data.frame, stringsAsFactors=FALSE)
141
+    ) -> ret
142
+    ret[["name"]] <- tolower(ret[["name"]])
143
+    ret <- unlist(ret[ret$name == header, "value"], use.names = FALSE)
144
+    if (length(ret)) ret <- ret[1] else ret <- NA_character_
145
+    ret
146
+  } else {
147
+    NA_character_
148
+  }
149
+}
150
+
104 151
 #' Retrieve request URL
105 152
 #'
106 153
 #' @param har_resp_obj HAR response object
@@ -108,7 +155,18 @@ is_xhr <- function(har_resp_obj) {
108 155
 #' @export
109 156
 get_request_url <- function(har_resp_obj) {
110 157
   utype <- har_resp_obj$request$url
111
-  if (utype == "") return(NA_character_)
158
+  if (utype == "") utype <- NA_character_
159
+  utype
160
+}
161
+
162
+#' Retrieve response URL
163
+#'
164
+#' @param har_resp_obj HAR response object
165
+#' @family splash_har_helpers
166
+#' @export
167
+get_response_url <- function(har_resp_obj) {
168
+  utype <- har_resp_obj$response$url
169
+  if (utype == "") utype <- NA_character_
112 170
   utype
113 171
 }
114 172
 

+ 22
- 12
cran-comments.md Целия файл

@@ -1,23 +1,33 @@
1 1
 ## Test environments
2 2
 
3
-* local OS X install, R 3.4.3 on both 10.12 and 10.13.2
4
-* local ubuntu 3.4.2 and r-devel
3
+* local macOS install, R 3.5.2 on both macOS 10.14
4
+* local ubuntu 3.5.1
5 5
 * ubuntu on travis-ci, R oldrel, current and r-devel
6 6
 * win-builder (devel and release)
7 7
 
8
-## R CMD check results
9
-
10
-0 errors | 0 warnings | 1 note
8
+---
11 9
 
12
-* This is a new release.
10
+Per a note from Kurt the splashr now uses the
11
+stevedore package since the docker package is
12
+likely being retired from CRAN.
13 13
 
14
-## Reverse dependencies
14
+The invalid URL in the vignette (as noted in
15
+an email thread) has been fixed.
15 16
 
16
-This is a new release, so there are no reverse dependencies.
17
+Tests require instllation of ~1.2GB docker image
18
+which also means docker needs to be available.
19
+Examples also require a Splash instance (dockerized
20
+or full install) to work. Therefore, as has been the
21
+case since the previous CRAN version, examples
22
+are marked as dontrun and tests do not run on CRAN.
23
+They do run monthly and on every repo push in Travis
24
+https://travis-ci.org/hrbrmstr/splashr/settings.
17 25
 
18
----
26
+I can modify any of the above behavior to conform
27
+to any CRAN policy I may be violating.
19 28
 
20
-Submitting patch due to CRAN note.
29
+License has been changed to MIT.
21 30
 
22
-Removed clipboard functionality since that was the path of 
23
-least resistance. 
31
+As always, thanks to the CRAN team for their
32
+herculean efforts to keep the R package universe
33
+healthy!

+ 1
- 1
man/as_har.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/render-har.r
2
+% Please edit documentation in R/render-har.R
3 3
 \name{as_har}
4 4
 \alias{as_har}
5 5
 \title{Turn a generic Splash HAR response into a HAR object}

+ 1
- 1
man/as_httr_req.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/as_req.r
2
+% Please edit documentation in R/as_req.R
3 3
 \name{as_httr_req}
4 4
 \alias{as_httr_req}
5 5
 \title{Create an httr verb request function from an HAR request}

+ 1
- 1
man/as_response.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/as_request.r
2
+% Please edit documentation in R/as_request.R
3 3
 \name{as_response}
4 4
 \alias{as_response}
5 5
 \title{Return a HAR entry response as an httr::response object}

+ 1
- 1
man/execute_lua.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/execute.r
2
+% Please edit documentation in R/execute.R
3 3
 \name{execute_lua}
4 4
 \alias{execute_lua}
5 5
 \title{Execute a custom rendering script and return a result.}

+ 3
- 1
man/get_content_size.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/content.r
2
+% Please edit documentation in R/content.R
3 3
 \name{get_content_size}
4 4
 \alias{get_content_size}
5 5
 \alias{get_body_size}
@@ -21,9 +21,11 @@ Retrieve size of content | body | headers
21 21
 \seealso{
22 22
 Other splash_har_helpers: \code{\link{get_content_type}},
23 23
   \code{\link{get_har_entry}},
24
+  \code{\link{get_header_val}}, \code{\link{get_headers}},
24 25
   \code{\link{get_request_type}},
25 26
   \code{\link{get_request_url}},
26 27
   \code{\link{get_response_body}},
28
+  \code{\link{get_response_url}},
27 29
   \code{\link{har_entry_count}}
28 30
 }
29 31
 \concept{splash_har_helpers}

+ 3
- 1
man/get_content_type.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/helpers.r
2
+% Please edit documentation in R/helpers.R
3 3
 \name{get_content_type}
4 4
 \alias{get_content_type}
5 5
 \alias{is_content_type}
@@ -56,9 +56,11 @@ Retrieve or test content type of a HAR request object
56 56
 \seealso{
57 57
 Other splash_har_helpers: \code{\link{get_content_size}},
58 58
   \code{\link{get_har_entry}},
59
+  \code{\link{get_header_val}}, \code{\link{get_headers}},
59 60
   \code{\link{get_request_type}},
60 61
   \code{\link{get_request_url}},
61 62
   \code{\link{get_response_body}},
63
+  \code{\link{get_response_url}},
62 64
   \code{\link{har_entry_count}}
63 65
 }
64 66
 \concept{splash_har_helpers}

+ 3
- 1
man/get_har_entry.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/helpers.r
2
+% Please edit documentation in R/helpers.R
3 3
 \name{get_har_entry}
4 4
 \alias{get_har_entry}
5 5
 \title{Retrieve an entry by index from a HAR object}
@@ -17,9 +17,11 @@ Retrieve an entry by index from a HAR object
17 17
 \seealso{
18 18
 Other splash_har_helpers: \code{\link{get_content_size}},
19 19
   \code{\link{get_content_type}},
20
+  \code{\link{get_header_val}}, \code{\link{get_headers}},
20 21
   \code{\link{get_request_type}},
21 22
   \code{\link{get_request_url}},
22 23
   \code{\link{get_response_body}},
24
+  \code{\link{get_response_url}},
23 25
   \code{\link{har_entry_count}}
24 26
 }
25 27
 \concept{splash_har_helpers}

+ 32
- 0
man/get_header_val.Rd Целия файл

@@ -0,0 +1,32 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/helpers.R
3
+\name{get_header_val}
4
+\alias{get_header_val}
5
+\title{Retrieve the value of a specific response header}
6
+\usage{
7
+get_header_val(har_resp_obj, header)
8
+}
9
+\arguments{
10
+\item{har_resp_obj}{HAR response object}
11
+
12
+\item{header}{the header you want the value for}
13
+}
14
+\description{
15
+Retrieve the value of a specific response header
16
+}
17
+\note{
18
+the \code{name} column that contains the header key is normalized to lower case
19
+as is the passed-in requested header. Also, if there is more than one only
20
+the first is returned.
21
+}
22
+\seealso{
23
+Other splash_har_helpers: \code{\link{get_content_size}},
24
+  \code{\link{get_content_type}},
25
+  \code{\link{get_har_entry}}, \code{\link{get_headers}},
26
+  \code{\link{get_request_type}},
27
+  \code{\link{get_request_url}},
28
+  \code{\link{get_response_body}},
29
+  \code{\link{get_response_url}},
30
+  \code{\link{har_entry_count}}
31
+}
32
+\concept{splash_har_helpers}

+ 29
- 0
man/get_headers.Rd Целия файл

@@ -0,0 +1,29 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/helpers.R
3
+\name{get_headers}
4
+\alias{get_headers}
5
+\title{Retrieve response headers as a data frame}
6
+\usage{
7
+get_headers(har_resp_obj)
8
+}
9
+\arguments{
10
+\item{har_resp_obj}{HAR response object}
11
+}
12
+\description{
13
+Retrieve response headers as a data frame
14
+}
15
+\note{
16
+the \code{name} column that contains the header key is normalized to lower case
17
+}
18
+\seealso{
19
+Other splash_har_helpers: \code{\link{get_content_size}},
20
+  \code{\link{get_content_type}},
21
+  \code{\link{get_har_entry}},
22
+  \code{\link{get_header_val}},
23
+  \code{\link{get_request_type}},
24
+  \code{\link{get_request_url}},
25
+  \code{\link{get_response_body}},
26
+  \code{\link{get_response_url}},
27
+  \code{\link{har_entry_count}}
28
+}
29
+\concept{splash_har_helpers}

+ 3
- 1
man/get_request_type.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/helpers.r
2
+% Please edit documentation in R/helpers.R
3 3
 \name{get_request_type}
4 4
 \alias{get_request_type}
5 5
 \alias{is_get}
@@ -22,8 +22,10 @@ Retrieve or test request type
22 22
 Other splash_har_helpers: \code{\link{get_content_size}},
23 23
   \code{\link{get_content_type}},
24 24
   \code{\link{get_har_entry}},
25
+  \code{\link{get_header_val}}, \code{\link{get_headers}},
25 26
   \code{\link{get_request_url}},
26 27
   \code{\link{get_response_body}},
28
+  \code{\link{get_response_url}},
27 29
   \code{\link{har_entry_count}}
28 30
 }
29 31
 \concept{splash_har_helpers}

+ 3
- 1
man/get_request_url.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/helpers.r
2
+% Please edit documentation in R/helpers.R
3 3
 \name{get_request_url}
4 4
 \alias{get_request_url}
5 5
 \title{Retrieve request URL}
@@ -16,8 +16,10 @@ Retrieve request URL
16 16
 Other splash_har_helpers: \code{\link{get_content_size}},
17 17
   \code{\link{get_content_type}},
18 18
   \code{\link{get_har_entry}},
19
+  \code{\link{get_header_val}}, \code{\link{get_headers}},
19 20
   \code{\link{get_request_type}},
20 21
   \code{\link{get_response_body}},
22
+  \code{\link{get_response_url}},
21 23
   \code{\link{har_entry_count}}
22 24
 }
23 25
 \concept{splash_har_helpers}

+ 3
- 1
man/get_response_body.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/helpers.r
2
+% Please edit documentation in R/helpers.R
3 3
 \name{get_response_body}
4 4
 \alias{get_response_body}
5 5
 \title{Retrieve the body content of a HAR entry}
@@ -22,8 +22,10 @@ Retrieve the body content of a HAR entry
22 22
 Other splash_har_helpers: \code{\link{get_content_size}},
23 23
   \code{\link{get_content_type}},
24 24
   \code{\link{get_har_entry}},
25
+  \code{\link{get_header_val}}, \code{\link{get_headers}},
25 26
   \code{\link{get_request_type}},
26 27
   \code{\link{get_request_url}},
28
+  \code{\link{get_response_url}},
27 29
   \code{\link{har_entry_count}}
28 30
 }
29 31
 \concept{splash_har_helpers}

+ 25
- 0
man/get_response_url.Rd Целия файл

@@ -0,0 +1,25 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/helpers.R
3
+\name{get_response_url}
4
+\alias{get_response_url}
5
+\title{Retrieve response URL}
6
+\usage{
7
+get_response_url(har_resp_obj)
8
+}
9
+\arguments{
10
+\item{har_resp_obj}{HAR response object}
11
+}
12
+\description{
13
+Retrieve response URL
14
+}
15
+\seealso{
16
+Other splash_har_helpers: \code{\link{get_content_size}},
17
+  \code{\link{get_content_type}},
18
+  \code{\link{get_har_entry}},
19
+  \code{\link{get_header_val}}, \code{\link{get_headers}},
20
+  \code{\link{get_request_type}},
21
+  \code{\link{get_request_url}},
22
+  \code{\link{get_response_body}},
23
+  \code{\link{har_entry_count}}
24
+}
25
+\concept{splash_har_helpers}

+ 1
- 1
man/har_entries.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/helpers.r
2
+% Please edit documentation in R/helpers.R
3 3
 \name{har_entries}
4 4
 \alias{har_entries}
5 5
 \title{Retrieve just the HAR entries from a splashr request}

+ 4
- 2
man/har_entry_count.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/helpers.r
2
+% Please edit documentation in R/helpers.R
3 3
 \name{har_entry_count}
4 4
 \alias{har_entry_count}
5 5
 \title{Retrieves number of HAR entries in a response}
@@ -16,8 +16,10 @@ Retrieves number of HAR entries in a response
16 16
 Other splash_har_helpers: \code{\link{get_content_size}},
17 17
   \code{\link{get_content_type}},
18 18
   \code{\link{get_har_entry}},
19
+  \code{\link{get_header_val}}, \code{\link{get_headers}},
19 20
   \code{\link{get_request_type}},
20 21
   \code{\link{get_request_url}},
21
-  \code{\link{get_response_body}}
22
+  \code{\link{get_response_body}},
23
+  \code{\link{get_response_url}}
22 24
 }
23 25
 \concept{splash_har_helpers}

+ 1
- 1
man/json_fromb64.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/utils.r
2
+% Please edit documentation in R/utils.R
3 3
 \name{json_fromb64}
4 4
 \alias{json_fromb64}
5 5
 \title{Convert a Base64 encoded string into an R object}

+ 1
- 1
man/render_har.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/render-har.r
2
+% Please edit documentation in R/render-har.R
3 3
 \name{render_har}
4 4
 \alias{render_har}
5 5
 \title{Return information about Splash interaction with a website in HAR format.}

+ 1
- 1
man/render_html.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/render-html.r
2
+% Please edit documentation in R/render-html.R
3 3
 \name{render_html}
4 4
 \alias{render_html}
5 5
 \title{Return the HTML of the javascript-rendered page.}

+ 1
- 1
man/render_jpeg.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/render-jpg.r
2
+% Please edit documentation in R/render-jpg.R
3 3
 \name{render_jpeg}
4 4
 \alias{render_jpeg}
5 5
 \title{Return a image (in JPEG format) of the javascript-rendered page.}

+ 1
- 1
man/render_json.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/render-json.r
2
+% Please edit documentation in R/render-json.R
3 3
 \name{render_json}
4 4
 \alias{render_json}
5 5
 \title{Return a json-encoded dictionary with information about javascript-rendered webpage.}

+ 1
- 1
man/render_png.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/render-png.r
2
+% Please edit documentation in R/render-png.R
3 3
 \name{render_png}
4 4
 \alias{render_png}
5 5
 \title{Return an image (in PNG format) of the javascript-rendered page.}

+ 1
- 1
man/splash_add_lua.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_add_lua}
4 4
 \alias{splash_add_lua}
5 5
 \title{Add raw lua code into DSL call chain}

+ 1
- 1
man/splash_click.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_click}
4 4
 \alias{splash_click}
5 5
 \title{Trigger mouse click event in web page.}

+ 1
- 1
man/splash_enable_javascript.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_enable_javascript}
4 4
 \alias{splash_enable_javascript}
5 5
 \title{Enable or disable execution of JavaSript code embedded in the page.}

+ 1
- 1
man/splash_focus.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_focus}
4 4
 \alias{splash_focus}
5 5
 \title{Focus on a document element provided by a CSS selector}

+ 1
- 1
man/splash_go.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_go}
4 4
 \alias{splash_go}
5 5
 \title{Go to an URL.}

+ 1
- 1
man/splash_har.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_har}
4 4
 \alias{splash_har}
5 5
 \title{Return information about Splash interaction with a website in HAR format.}

+ 1
- 1
man/splash_har_reset.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_har_reset}
4 4
 \alias{splash_har_reset}
5 5
 \title{Drops all internally stored HAR records.}

+ 1
- 1
man/splash_html.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_html}
4 4
 \alias{splash_html}
5 5
 \title{Return a HTML snapshot of a current page.}

+ 1
- 1
man/splash_images.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_images}
4 4
 \alias{splash_images}
5 5
 \title{Enable/disable images}

+ 1
- 1
man/splash_plugins.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_plugins}
4 4
 \alias{splash_plugins}
5 5
 \title{Enable or disable browser plugins (e.g. Flash).}

+ 1
- 1
man/splash_png.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_png}
4 4
 \alias{splash_png}
5 5
 \title{Return a screenshot of a current page in PNG format.}

+ 1
- 1
man/splash_press.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_press}
4 4
 \alias{splash_press}
5 5
 \title{Trigger mouse press event in web page.}

+ 1
- 1
man/splash_private_mode.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_private_mode}
4 4
 \alias{splash_private_mode}
5 5
 \title{Enable or disable execution of JavaSript code embedded in the page.}

+ 1
- 1
man/splash_release.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_release}
4 4
 \alias{splash_release}
5 5
 \title{Trigger mouse release event in web page.}

+ 1
- 1
man/splash_response_body.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_response_body}
4 4
 \alias{splash_response_body}
5 5
 \title{Enable or disable response content tracking.}

+ 1
- 1
man/splash_send_keys.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_send_keys}
4 4
 \alias{splash_send_keys}
5 5
 \title{Send keyboard events to page context.}

+ 1
- 1
man/splash_send_text.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_send_text}
4 4
 \alias{splash_send_text}
5 5
 \title{Send text as input to page context, literally, character by character.}

+ 1
- 1
man/splash_user_agent.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r, R/user-agents.R
2
+% Please edit documentation in R/dsl.R, R/user-agents.R
3 3
 \docType{data}
4 4
 \name{splash_user_agent}
5 5
 \alias{splash_user_agent}

+ 1
- 1
man/splash_wait.Rd Целия файл

@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/dsl.r
2
+% Please edit documentation in R/dsl.R
3 3
 \name{splash_wait}
4 4
 \alias{splash_wait}
5 5
 \title{Wait for a period time}

Двоични данни
vignettes/figures/splashr04.png Целия файл


+ 219
- 136
vignettes/splashr_helpers.Rmd Целия файл

@@ -22,109 +22,106 @@ Let's see what extra goodies `splashr` provides to make our lives easier.
22 22
 ## Handling `splashr` Objects
23 23
 
24 24
 One of the most powerful functions in `splashr` is `render_har()`. You get every component loaded by dynamic web page, and some sites have upwards of 100 elements for any given page. How can you get to the bits that you want?
25
-
26
-Let's use a different example that's a bit gnarly (i.e. you may need to work through it a couple times).
27
-
28
-The U.K. government has an open data portal and one of the sections contains map tiles for various grid quadrants. It's a really nice site, but it's designed for interactive use and we want to be able to get to all the tile files programmatically. For our example, we'll be grabbing data from <http://environment.data.gov.uk/ds/survey/index.jsp#/survey?grid=TQ38>.
29
-
30
-<img width="100%" style="max-width:100%" src="figures/splashr04.png"/>
31
-
32
-Since we don't know what we need, let's use `render_har()` to get everything back into R:
33
-
25
+We'll use `render_har()` to demonstrate how to find resources a site loads and use the data we gather to assess how "safe" these sites are &mdash; i.e. how many third-party javascript components they load and how safely they are loaded. Note that code in this vignette assumes a Splash instance is running locally on your system.
26
+
27
+We'll check <https://apple.com/> first since Apple claims to care about our privacy. If that's true, then they'll will load few or no third-party content.
28
+
29
+```{r eval=FALSE}
30
+(apple <- render_har(url = "https://apple.com/", response_body = TRUE))
31
+## --------HAR VERSION-------- 
32
+## HAR specification version: 1.2 
33
+## --------HAR CREATOR-------- 
34
+## Created by: Splash 
35
+## version: 3.3.1 
36
+## --------HAR BROWSER-------- 
37
+## Browser: QWebKit 
38
+## version: 602.1 
39
+## --------HAR PAGES-------- 
40
+## Page id: 1 , Page title: Apple 
41
+## --------HAR ENTRIES-------- 
42
+## Number of entries: 84 
43
+## REQUESTS: 
44
+## Page: 1 
45
+## Number of entries: 84 
46
+##   -  https://apple.com/ 
47
+##   -  https://www.apple.com/ 
48
+##   -  https://www.apple.com/ac/globalnav/4/en_US/styles/ac-globalnav.built.css 
49
+##   -  https://www.apple.com/ac/localnav/4/styles/ac-localnav.built.css 
50
+##   -  https://www.apple.com/ac/globalfooter/4/en_US/styles/ac-globalfooter.built.css 
51
+##      ........ 
52
+##   -  https://www.apple.com/v/home/ea/images/heroes/iphone-xs/iphone_xs_0afef_mediumtall.jpg 
53
+##   -  https://www.apple.com/v/home/ea/images/heroes/iphone-xr/iphone_xr_5e40f_mediumtall.jpg 
54
+##   -  https://www.apple.com/v/home/ea/images/heroes/iphone-xs/iphone_xs_0afef_mediumtall.jpg 
55
+##   -  https://www.apple.com/v/home/ea/images/heroes/macbook-air/macbook_air_mediumtall.jpg 
56
+##   -  https://www.apple.com/v/home/ea/images/heroes/macbook-air/macbook_air_mediumtall.jpg 
34 57
 ```
35
-library(splashr)
36
-library(httr)
37
-library(tidyverse)
38
-
39
-pg_har <- render_har(url = "http://environment.data.gov.uk/ds/survey/index.jsp#/survey?grid=TQ38", response_body = TRUE, wait = 10)
40
-
41
-entries <- har_entries(pg_har)
42
-
43
-map_chr(entries, get_content_type) %>%
44
-  table()
45
-## .
46
-## application/json        image/gif        image/png         text/css        text/html
47
-##               33                1               24                1                1
48
-##  text/javascript
49
-##                1
50
-
51
-map_chr(entries, get_request_url)
52
-##  [1] "http://environment.data.gov.uk/ds/survey/index.jsp#/survey?grid=TQ38"
53
-##  [2] "http://www.geostore.com/environment-agency/survey.full.min.170718.css"
54
-##  [3] "http://www.geostore.com/environment-agency/survey.full.min.170718.js"
55
-##  [4] "http://environment.data.gov.uk/ds/survey/images/busy.gif"
56
-##  [5] "http://environment.data.gov.uk/ds/survey/rest/config/download?_=1503933543160"
57
-##  [6] "http://www.geostore.com/environment-agency/rest/grid/EA_SUPPLIED_OS_10KM/TQ38"
58
-##  [7] "http://www.geostore.com/environment-agency/rest/gazetteer/search/postcode/TQ38"
59
-##  [8] "http://environment.data.gov.uk/ds/survey/images/download.png"
60
-##  [9] "http://www.geostore.com/environment-agency/images/dgu-header-white.png"
61
-## [10] "http://www.geostore.com/environment-agency/images/airbus-footer-logo.png"
62
-## [11] "http://www.geostore.com/environment-agency/images/ogl-symbol-41px-retina-black.png"
63
-## [12] "http://environment.data.gov.uk/ds/survey/fonts/glyphicons-halflings-regular.woff2"
64
-## [13] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=390919.47990708053%2C234551.68794424832%2C469103.375888497%2C312735.5839256648"
65
-## [14] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=469103.37588850036%2C234551.68794424832%2C547287.2718699168%2C312735.5839256648"
66
-## [15] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=390919.47990708053%2C312735.5839256644%2C469103.375888497%2C390919.4799070809"
67
-## [16] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=390919.47990708053%2C156367.7919628322%2C469103.375888497%2C234551.68794424867"
68
-## [17] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=312735.5839256644%2C234551.68794424832%2C390919.4799070809%2C312735.5839256648"
69
-## [18] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=469103.37588850036%2C312735.5839256644%2C547287.2718699168%2C390919.4799070809"
70
-## [19] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=469103.37588850036%2C156367.7919628322%2C547287.2718699168%2C234551.68794424867"
71
-## [20] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=312735.5839256644%2C312735.5839256644%2C390919.4799070809%2C390919.4799070809"
72
-## [21] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=312735.5839256644%2C156367.7919628322%2C390919.4799070809%2C234551.68794424867"
73
-## [22] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=547287.2718699165%2C234551.68794424832%2C625471.1678513329%2C312735.5839256648"
74
-## [23] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=547287.2718699165%2C312735.5839256644%2C625471.1678513329%2C390919.4799070809"
75
-## [24] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=547287.2718699165%2C156367.7919628322%2C625471.1678513329%2C234551.68794424867"
76
-## [25] "http://www.geostore.com/environment-agency/rest/grid/EA_SUPPLIED_OS_10KM/535000/185000"
77
-## [26] "http://www.geostore.com/environment-agency/rest/gazetteer/search/postcode/TQ38 - OS"
78
-## [27] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=527741.2978745624%2C175913.76595818624%2C537514.2848722395%2C185686.7529558633"
79
-## [28] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=527741.2978745624%2C185686.75295586511%2C537514.2848722395%2C195459.7399535422"
80
-## [29] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=537514.2848722376%2C175913.76595818624%2C547287.2718699146%2C185686.7529558633"
81
-## [30] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=537514.2848722376%2C185686.75295586511%2C547287.2718699146%2C195459.7399535422"
82
-## [31] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=517968.31087688357%2C175913.76595818624%2C527741.2978745606%2C185686.7529558633"
83
-## [32] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=517968.31087688357%2C185686.75295586511%2C527741.2978745606%2C195459.7399535422"
84
-## [33] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=547287.2718699165%2C175913.76595818624%2C557060.2588675935%2C185686.7529558633"
85
-## [34] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=547287.2718699165%2C185686.75295586511%2C557060.2588675935%2C195459.7399535422"
86
-## [35] "http://www.geostore.com/environment-agency/rest/product/EA_SUPPLIED_OS_10KM/TQ38?catalogName=Survey"
87
-## [36] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2003-EA"
88
-## [37] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2005-EA"
89
-## [38] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2009-EA"
90
-## [39] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2015-EA"
91
-## [40] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-GROUP-ENGLAND-EA"
92
-## [41] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-1999-EA"
93
-## [42] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2002-EA"
94
-## [43] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2007-EA"
95
-## [44] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2011-EA"
96
-## [45] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2012-EA"
97
-## [46] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2003-EA"
98
-## [47] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2005-EA"
99
-## [48] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2009-EA"
100
-## [49] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2015-EA"
101
-## [50] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-GROUP-ENGLAND-EA"
102
-## [51] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2002-EA"
103
-## [52] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2007-EA"
104
-## [53] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2011-EA"
105
-## [54] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2012-EA"
106
-## [55] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-LAZ-ENGLAND-EA"
107
-## [56] "http://www.geostore.com/environment-agency/rest/product/group/OAP-INCIDENTRESPONSE-ENGLAND-EA"
108
-## [57] "http://www.geostore.com/environment-agency/rest/product/group/VAP-NIGHTTIME-ENGLAND-2012-EA"
109
-## [58] "http://www.geostore.com/environment-agency/rest/product/group/VAP-RGB-ENGLAND-2008-EA"
110
-## [59] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-EA"
111
-## [60] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-EA"
112
-## [61] "http://www.geostore.com/environment-agency/rest/product/group/VAP-NIGHTTIME-ENGLAND-EA"
113
-## [62] "http://www.geostore.com/environment-agency/rest/product/group/VAP-RGB-ENGLAND-EA"
58
+
59
+The HAR output shows that when you visit `apple.com` your browser makes at least 84 requests for resources. We can see what types of content is loaded:
60
+
61
+```{r eval=FALSE}
62
+har_entries(apple) %>% 
63
+  purrr::map_chr(get_content_type) %>% 
64
+  table(dnn = "content_type") %>% 
65
+  broom::tidy() %>% 
66
+  dplyr::arrange(desc(n))
67
+## # A tibble: 9 x 2
68
+##   content_type                 n
69
+##   <chr>                    <int>
70
+## 1 font/woff2                  27
71
+## 2 application/x-javascript    15
72
+## 3 image/svg+xml               10
73
+## 4 text/css                     9
74
+## 5 image/jpeg                   7
75
+## 6 image/png                    6
76
+## 7 application/font-woff        4
77
+## 8 text/html                    3
78
+## 9 application/json             2
114 79
 ```
115 80
 
116
-Many of those resources are just image tiles for the map you see in the screenshot. Let's try to find data files:
81
+Lots of calls to fonts, 15 javascript files and even 2 JSON files. Let's see what the domains are for these resources:
117 82
 
83
+```{r eval=FALSE}
84
+har_entries(apple) %>% 
85
+  purrr::map_chr(get_response_url) %>% 
86
+  purrr::map_chr(urltools::domain) %>% 
87
+  unique()
88
+## [1] "apple.com"               "www.apple.com"           "securemetrics.apple.com"
118 89
 ```
119
-map_lgl(entries, is_json)
120
-##  [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE    NA FALSE
121
-## [14] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
122
-## [27] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
123
-## [40]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
124
-## [53]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
90
+
91
+Wow! Only calls to Apple-controlled resources. 
92
+
93
+I wonder what's in those JSON files, though:
94
+
95
+```{r eval=FALSE}
96
+har_entries(apple) %>% 
97
+  purrr::keep(is_json) %>% 
98
+  purrr::map(get_response_body, "text") %>% 
99
+  purrr::map(jsonlite::fromJSON) %>% 
100
+  str(3)
101
+## List of 2
102
+##  $ :List of 2
103
+##   ..$ locale        :List of 3
104
+##   .. ..$ country      : chr "us"
105
+##   .. ..$ attr         : chr "en-US"
106
+##   .. ..$ textDirection: chr "ltr"
107
+##   ..$ localeswitcher:List of 7
108
+##   .. ..$ name        : chr "localeswitcher"
109
+##   .. ..$ metadata    : Named list()
110
+##   .. ..$ displayIndex: int 1
111
+##   .. ..$ copy        :List of 5
112
+##   .. ..$ continue    :List of 5
113
+##   .. ..$ exit        :List of 5
114
+##   .. ..$ select      :List of 5
115
+##  $ :List of 2
116
+##   ..$ id     : chr "ad6ca319-1ef1-20da-c4e0-5185088996cb"
117
+##   ..$ results:'data.frame': 2 obs. of  2 variables:
118
+##   .. ..$ sectionName   : chr [1:2] "quickLinks" "suggestions"
119
+##   .. ..$ sectionResults:List of 2
125 120
 ```
126 121
 
127
-Now, we're getting somewhere. The `har_entries()` function makes it easy to get to the individual elements and we can use the `is_json()` helper with `purrr` functions to slice and dice at will. Here are all the `is_` functions you can use with HAR objects:
122
+So, locale metadata and something to do with on-page links/suggestions.
123
+
124
+As demonstrated, the `har_entries()` function makes it easy to get to the individual elements and we used the `is_json()` helper with `purrr` functions to slice and dice the structure at will. Here are all the `is_` functions you can use with HAR objects:
128 125
 
129 126
 - `is_binary()`
130 127
 - `is_content_type()`
@@ -145,60 +142,141 @@ You can also use various `get_` helpers to avoid gnarly `$` or `[[]]` constructs
145 142
 - `get_body_size()` ---	Retrieve size of content | body | headers
146 143
 - `get_content_size()` ---	Retrieve size of content | body | headers
147 144
 - `get_content_type()` ---	Retrieve or test content type of a HAR request object
145
+- `get_headers` --- Retrieve response headers as a data frame
148 146
 - `get_headers_size()` ---	Retrieve size of content | body | headers
149 147
 - `get_request_type()` ---	Retrieve or test request type
150 148
 - `get_request_url()` ---	Retrieve request URL
149
+- `get_response_url()` ---	Retrieve response URL
151 150
 - `get_response_body()` ---	Retrieve the body content of a HAR entry
152 151
 
153 152
 We've seen one example of them already, here's another:
154 153
 
155
-```
156
-map_dbl(entries, get_body_size)
157
-##  [1]    1180  132571 1211097     701      -1     466   20342     579    4489
158
-## [10]   13332    1774   18028   59782   48008   55270   48323   42879   36116
159
-## [19]   69560   59602   58135   37443   17266   49840     464   20342   14579
160
-## [28]   14626   16265   14473   14565   13639   15106   12383   41887     186
161
-## [37]     186     186     186     185     186     186     186     186     186
162
-## [46]     186     186     186     186     185     186     186     186     186
163
-## [55]     223     286     170     158     272     272     280     267
154
+```{r eval=FALSE}
155
+har_entries(apple) %>% 
156
+  purrr::map_dbl(get_body_size)
157
+##  [1]      0  54521  95644  98069  43183   8689  19035 794210  66487 133730 311054  13850 199928 161859  90322 343189  19035
158
+## [18] 794210  66487 133730    554    802   1002   1160   1694    264   1082   1661    390    416 108468 108828 100064 109728
159
+## [35] 109412  99196 108856 109360 108048   8868  10648  10380  10476    137 311054  13850   3192   3253   4130   2027   1247
160
+## [52]   1748    582 199928 109628 107832 109068 100632 108928  97812 108312 108716 107028  65220  73628  72188  72600  70400
161
+## [69]  73928  72164  73012  71080   1185 161859  90322 343189      0    491  60166  58509  60166  58509  53281  53281
164 162
 ```
165 163
 
166
-You can bop around the data and you'll find that the one we want is a "catalog" file). We can look for it with these tools:
164
+So, a visit to Apple's page transfers nearly 8MB of content down to your browser.
167 165
 
168
-```
169
-idx <- which(map_lgl(entries, is_json))
166
+California also claims to care about your privacy, but is it _really_ true?
170 167
 
171
-map_chr(entries[idx], get_request_url) %>%
172
-  grepl("catalog", .) %>%
173
-  which()
174
-## [1] 6
175
-```
168
+```{r eval=FALSE}
169
+ca <- render_har(url = "https://www.ca.gov/", response_body = TRUE)
176 170
 
177
-and, then use another helper `as_response()` which makes the HAR entry behave like an `httr` `response` object so we can use familiar idioms to get the data.
171
+har_entries(ca) %>% 
172
+  purrr::map_chr(~.x$response$url %>% urltools::domain()) %>% 
173
+  unique()
174
+##  [1] "www.ca.gov"                      "fonts.googleapis.com"            "california.azureedge.net"       
175
+##  [4] "portal-california.azureedge.net" "az416426.vo.msecnd.net"          "fonts.gstatic.com"              
176
+##  [7] "ssl.google-analytics.com"        "cse.google.com"                  "translate.google.com"           
177
+## [10] "api.stateentityprofile.ca.gov"   "translate.googleapis.com"        "www.google.com"                 
178
+## [13] "clients1.google.com"             "www.gstatic.com"                 "platform.twitter.com"           
179
+## [16] "dc.services.visualstudio.com"   
180
+```
178 181
 
182
+Yikes! It _sure_ doesn't look that way given all the folks they let track you when you visit their main page. Are they executing javascript from those sites?
183
+
184
+```{r eval=FALSE}
185
+## # A tibble: 8 x 2
186
+##   dom                      type                    
187
+##   <chr>                    <chr>                   
188
+## 1 california.azureedge.net application/javascript  
189
+## 2 california.azureedge.net application/x-javascript
190
+## 3 az416426.vo.msecnd.net   application/x-javascript
191
+## 4 cse.google.com           text/javascript         
192
+## 5 translate.google.com     text/javascript         
193
+## 6 translate.googleapis.com text/javascript         
194
+## 7 www.google.com           text/javascript         
195
+## 8 platform.twitter.com     application/javascript  
179 196
 ```
180
-as_response(entries[idx][[6]]) %>%
181
-  content(as = "text", encoding = "UTF-8") %>%
182
-  jsonlite::fromJSON(flatten=TRUE) %>%
183
-  tbl_df() %>%
184
-  glimpse()
185
-## Observations: 99
186
-## Variables: 12
187
-## $ id              <int> 170653, 170659, 170560, 170565, 178189, 178307, 201556, 238312, 238307, 2383...
188
-## $ guid            <chr> "54595a8c-b267-11e6-93d3-9457a5578ca0", "63176082-b267-11e6-93d3-9457a5578ca...
189
-## $ pyramid         <chr> "LIDAR-DSM-1M-ENGLAND-2003-EA", "LIDAR-DSM-1M-ENGLAND-2003-EA", "LIDAR-DSM-1...
190
-## $ tileReference   <chr> "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ3...
191
-## $ fileName        <chr> "LIDAR-DSM-1M-2003-TQ38se.zip", "LIDAR-DSM-1M-2003-TQ38ne.zip", "LIDAR-DSM-1...
192
-## $ coverageLayer   <chr> "LIDAR-DSM-1M-ENGLAND-2003-EA-MD-YY", "LIDAR-DSM-1M-ENGLAND-2003-EA-MD-YY", ...
193
-## $ fileSize        <int> 76177943, 52109669, 59326278, 18048623, 11919071, 13204420, 511124, 11736980...
194
-## $ descriptiveName <chr> "LIDAR Tiles DSM at 1m spatial resolution 2003", "LIDAR Tiles DSM at 1m spat...
195
-## $ description     <chr> "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m", "DSM at 1m...
196
-## $ groupName       <chr> "LIDAR-DSM-TIMESTAMPED-ENGLAND-2003-EA", "LIDAR-DSM-TIMESTAMPED-ENGLAND-2003...
197
-## $ displayOrder    <int> -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100...
198
-## $ metaDataUrl     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "https://data.gov.uk/dataset/lid...
197
+
198
+We can also examine the response headers to check for signs of safety as well (i.e. are there content security policy headers or other types of security-oriented headers):
199
+
200
+```{r eval=FALSE}
201
+har_entries(ca) %>% 
202
+  purrr::map_df(get_headers) %>% 
203
+  dplyr::count(name, sort=TRUE) %>% 
204
+  print(n=50)
205
+## # A tibble: 42 x 2
206
+##    name                              n
207
+##    <chr>                         <int>
208
+##  1 date                            149
209
+##  2 server                          148
210
+##  3 content-type                    142
211
+##  4 last-modified                   126
212
+##  5 etag                            104
213
+##  6 content-encoding                 83
214
+##  7 access-control-allow-origin      78
215
+##  8 accept-ranges                    74
216
+##  9 vary                             69
217
+## 10 content-length                   66
218
+## 11 x-ms-ref                         57
219
+## 12 x-ms-ref-originshield            57
220
+## 13 access-control-expose-headers    56
221
+## 14 content-md5                      51
222
+## 15 x-ms-blob-type                   51
223
+## 16 x-ms-lease-status                51
224
+## 17 x-ms-request-id                  51
225
+## 18 x-ms-version                     51
226
+## 19 cache-control                    37
227
+## 20 expires                          34
228
+## 21 alt-svc                          30
229
+## 22 x-xss-protection                 29
230
+## 23 x-content-type-options           27
231
+## 24 age                              22
232
+## 25 transfer-encoding                20
233
+## 26 timing-allow-origin              14
234
+## 27 x-powered-by                     14
235
+## 28 access-control-allow-headers      7
236
+## 29 pragma                            6
237
+## 30 request-context                   5
238
+## 31 x-aspnet-version                  5
239
+## 32 x-frame-options                   4
240
+## 33 content-disposition               3
241
+## 34 access-control-max-age            2
242
+## 35 content-language                  2
243
+## 36 p3p                               2
244
+## 37 x-cache                           2
245
+## 38 access-control-allow-methods      1
246
+## 39 location                          1
247
+## 40 set-cookie                        1
248
+## 41 strict-transport-security         1
249
+## 42 x-ms-session-id                   1
199 250
 ```
200 251
 
201
-Nowm, we have the data file download and metadata info.
252
+Unfortunately, they do let Google and Twitter execute javascript.
253
+
254
+They seem to use quite a bit of Microsoft tech. Let's look at the HTTP servers they directly and indirectly rely on:
255
+
256
+```{r eval=FALSE}
257
+har_entries(ca) %>% 
258
+  purrr::map_chr(get_header_val, "server") %>% 
259
+  table(dnn = "server") %>% 
260
+  broom::tidy() %>% 
261
+  dplyr::arrange(desc(n))
262
+## # A tibble: 14 x 2
263
+##    server                                           n
264
+##    <chr>                                        <int>
265
+##  1 Apache                                          55
266
+##  2 Windows-Azure-Blob/1.0 Microsoft-HTTPAPI/2.0    50
267
+##  3 sffe                                            23
268
+##  4 Microsoft-IIS/10.0                               7
269
+##  5 ESF                                              3
270
+##  6 HTTP server (unknown)                            2
271
+##  7 ECAcc (bsa/EAD2)                                 1
272
+##  8 ECD (sjc/16E0)                                   1
273
+##  9 ECD (sjc/16EA)                                   1
274
+## 10 ECD (sjc/16F4)                                   1
275
+## 11 ECD (sjc/4E95)                                   1
276
+## 12 ECD (sjc/4E9F)                                   1
277
+## 13 ECS (bsa/EB1F)                                   1
278
+## 14 gws                                              1
279
+```
202 280
 
203 281
 ## Impersonating Other Browsers
204 282
 
@@ -216,12 +294,17 @@ The various `render_` functions present themselves as modern WebKit Linux browse
216 294
 - `ua_linux_chrome`
217 295
 - `ua_linux_firefox`
218 296
 - `ua_ios_safari`
297
+- `ua_android_samsung`
298
+- `ua_kindle`
299
+- `ua_ps4`
300
+- `ua_apple_tv`
301
+- `ua_chromecast`
219 302
 
220 303
 NOTE: These can be used with `curl`, `httr`, `rvest` and `RCurl` calls as well.
221 304
 
222 305
 We can wee it in action:
223 306
 
224
-```
307
+```{r eval=FALSE}
225 308
 URL <- "https://httpbin.org/user-agent"
226 309
 
227 310
 splash_local %>%
@@ -251,7 +334,7 @@ The `install_splash()` will pull the image locally for you. It takes a bit (the
251 334
 
252 335
 The best way to use start/stop is to:
253 336
 
254
-```
337
+```{r eval=FALSE}
255 338
 spi <- start_splash()
256 339
 
257 340
 # ... scraping tasks ...

Loading…
Отказ
Запис