Browse Source

a few new DSL & helper functions

boB Rudis 3 years ago
parent
commit
4a1320a595
14 changed files with 168 additions and 46 deletions
  1. 23
    21
      DESCRIPTION
  2. 5
    1
      NAMESPACE
  3. 0
    1
      R/aaa.r
  4. 2
    2
      R/as_req.r
  5. 1
    1
      R/content.r
  6. 10
    0
      R/dsl.r
  7. 49
    0
      R/helpers.r
  8. 18
    0
      man/as_httr_req.Rd
  9. 0
    18
      man/as_req.Rd
  10. 2
    2
      man/get_content_size.Rd
  11. 16
    0
      man/get_har_entry.Rd
  12. 14
    0
      man/har_entries.Rd
  13. 14
    0
      man/har_entry_count.Rd
  14. 14
    0
      man/splash_har_reset.Rd

+ 23
- 21
DESCRIPTION View File

@@ -1,44 +1,46 @@
1 1
 Package: splashr
2 2
 Type: Package
3
-Title: Tools to Work with the 'Splash' JavaScript Rendering Service
3
+Title: Tools to Work with the 'Splash' 'JavaScript' Rendering Service
4 4
 Version: 0.3.0
5 5
 Date: 2017-02-14
6 6
 Encoding: UTF-8
7 7
 Author: Bob Rudis (bob@rud.is)
8 8
 Maintainer: Bob Rudis <bob@rud.is>
9
-Description: 'Splash' <https://github.com/scrapinghub/splash> is a javascript rendering service.
10
-    It’s a lightweight web browser with an 'HTTP' API, implemented in Python using 'Twisted' 
9
+Description: 'Splash' <https://github.com/scrapinghub/splash> is a 'JavaScript' rendering service.
10
+    It’s a lightweight web browser with an 'HTTP' API, implemented in 'Python' using 'Twisted' 
11 11
     and 'QT' and provides some of the core functionality of the 'RSelenium' or 'seleniumPipes'
12 12
     R pacakges but with a Java-free footprint. The (twisted) 'QT' reactor is used to make the 
13
-    sever fully asynchronous allowing to take advantage of 'webkit' concurrency via QT main loop. 
14
-    Some of Splash features include the ability to process multiple webpages in parallel; 
15
-    retrieving HTML results and/or take screenshots; disabling images or use Adblock Plus rules 
16
-    to make rendering faster; executing custom JavaScript in page context; getting detailed
17
-    rendering info in HAR format.
13
+    sever fully asynchronous allowing to take advantage of 'webkit' concurrency via 'QT' main loop. 
14
+    Some of 'Splash' features include the ability to process multiple webpages in parallel; 
15
+    retrieving 'HTML' results and/or take screenshots; disabling images or use 'Adblock Plus' rules 
16
+    to make rendering faster; executing custom 'JavaScript' in page context; getting detailed
17
+    rendering info in 'HAR' format.
18 18
 URL: http://github.com/hrbrmstr/splashr
19 19
 BugReports: https://github.com/hrbrmstr/splashr/issues
20 20
 License: AGPL
21 21
 Suggests:
22 22
     testthat,
23
-    tibble
23
+    tibble,
24
+    jpeg,
25
+    png
24 26
 Depends:
25 27
     R (>= 3.2.0)
26 28
 Imports:
27
-    purrr,
28
-    httr,
29 29
     xml2,
30
-    jsonlite,
31
-    magick,
32
-    stringi,
30
+    curl,
31
+    httr,
33 32
     clipr,
34
-    HARtools,
35
-    openssl,
36
-    lubridate,
37
-    formatR,
38
-    scales,
39
-    harbor,
33
+    purrr,
40 34
     stats,
41 35
     utils,
42
-    curl
36
+    harbor,
37
+    magick,
38
+    scales,
39
+    formatR,
40
+    openssl,
41
+    stringi,
42
+    jsonlite,
43
+    HARtools,
44
+    lubridate
43 45
 RoxygenNote: 6.0.0
44 46
 Remotes: wch/harbor

+ 5
- 1
NAMESPACE View File

@@ -6,16 +6,19 @@ export("%>%")
6 6
 export(HARviewer)
7 7
 export(HARviewerOutput)
8 8
 export(as_har)
9
-export(as_req)
9
+export(as_httr_req)
10 10
 export(as_response)
11 11
 export(execute_lua)
12 12
 export(get_body_size)
13 13
 export(get_content_size)
14 14
 export(get_content_type)
15
+export(get_har_entry)
15 16
 export(get_headers_size)
16 17
 export(get_request_type)
17 18
 export(get_request_url)
18 19
 export(get_response_body)
20
+export(har_entries)
21
+export(har_entry_count)
19 22
 export(install_splash)
20 23
 export(is_binary)
21 24
 export(is_content_type)
@@ -47,6 +50,7 @@ export(splash_debug)
47 50
 export(splash_focus)
48 51
 export(splash_go)
49 52
 export(splash_har)
53
+export(splash_har_reset)
50 54
 export(splash_history)
51 55
 export(splash_html)
52 56
 export(splash_images)

+ 0
- 1
R/aaa.r View File

@@ -7,7 +7,6 @@ trunc_string <- function (x, maxlen = 20, justify = "left")  {
7 7
   return(formatC(chopx, width = maxlen, flag = ifelse(justify == "left", "-", " ")))
8 8
 }
9 9
 
10
-
11 10
 parse_query <- function(query) {
12 11
   params <- vapply(stri_split_regex(query, "&", omit_empty=TRUE)[[1]],
13 12
                    stri_split_fixed, "=", 2, simplify=TRUE,

+ 2
- 2
R/as_req.r View File

@@ -1,11 +1,11 @@
1
-#' Create an httr function from an HAR request
1
+#' Create an httr verb request function from an HAR request
2 2
 #'
3 3
 #' @md
4 4
 #' @param entry HAR entry
5 5
 #' @param quiet quiet
6 6
 #' @param add_clip add clip
7 7
 #' @export
8
-as_req <- function(entry, quiet=TRUE, add_clip=TRUE) {
8
+as_httr_req <- function(entry, quiet=TRUE, add_clip=TRUE) {
9 9
 
10 10
   req <- entry$request
11 11
 

+ 1
- 1
R/content.r View File

@@ -1,4 +1,4 @@
1
-#' Retrieve size of content |  body | headers
1
+#' Retrieve size of content | body | headers
2 2
 #'
3 3
 #' @param har_resp_obj HAR response object
4 4
 #' @export

+ 10
- 0
R/dsl.r View File

@@ -229,6 +229,16 @@ splash_wait <- function(splash_obj, time=2) {
229 229
    splash_obj
230 230
 }
231 231
 
232
+#' Drops all internally stored HAR records.
233
+#'
234
+#' @md
235
+#' @param splash_obj splashr object
236
+#' @export
237
+splash_har_reset <- function(splash_obj, keys) {
238
+   splash_obj$calls <- c(splash_obj$calls, 'splash:har_reset()')
239
+   splash_obj
240
+}
241
+
232 242
 #' Return information about Splash interaction with a website in HAR format.
233 243
 #'
234 244
 #' Similar to [render_har()] but used in a script context. Should be the LAST element in

+ 49
- 0
R/helpers.r View File

@@ -121,3 +121,52 @@ is_get <- function(har_resp_obj) { get_request_type(har_resp_obj) == "GET" }
121 121
 #' @rdname get_request_type
122 122
 #' @export
123 123
 is_post <- function(har_resp_obj) { get_request_type(har_resp_obj) == "POST" }
124
+
125
+#' Retrieve just the HAR entries from a splashr request
126
+#'
127
+#' @param x can be a `har` object, `harlog` object or `harentries` object
128
+#' @export
129
+har_entries <- function(x) {
130
+  if (inherits(x, "har")) {
131
+    x$log$entries
132
+  } else if (inherits(x, "harlog")) {
133
+    x$entries
134
+  } else if (inherits(x, "harentries")) {
135
+    x
136
+  } else {
137
+    NULL
138
+  }
139
+}
140
+
141
+#' Retrieve an entry by index from a HAR object
142
+#'
143
+#' @param x can be a `har` object, `harlog` object or `harentries` object
144
+#' @param i index of the HAR entry to retrieve
145
+#' @export
146
+get_har_entry <- function(x, i=1) {
147
+  if (inherits(x, "har")) {
148
+    x$log$entries[[i]]
149
+  } else if (inherits(x, "harlog")) {
150
+    x$entries[[i]]
151
+  } else if (inherits(x, "harentries")) {
152
+    x[[i]]
153
+  } else {
154
+    NULL
155
+  }
156
+}
157
+
158
+#' Retrieves number of HAR entries in a response
159
+#'
160
+#' @param x can be a `har` object, `harlog` object or `harentries` object
161
+#' @export
162
+har_entry_count <- function(x) {
163
+  if (inherits(x, "har")) {
164
+    length(x$log$entries)
165
+  } else if (inherits(x, "harlog")) {
166
+    length(x$entries)
167
+  } else if (inherits(x, "harentries")) {
168
+    length(x[[i]])
169
+  } else {
170
+    NULL
171
+  }
172
+}

+ 18
- 0
man/as_httr_req.Rd View File

@@ -0,0 +1,18 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/as_req.r
3
+\name{as_httr_req}
4
+\alias{as_httr_req}
5
+\title{Create an httr verb request function from an HAR request}
6
+\usage{
7
+as_httr_req(entry, quiet = TRUE, add_clip = TRUE)
8
+}
9
+\arguments{
10
+\item{entry}{HAR entry}
11
+
12
+\item{quiet}{quiet}
13
+
14
+\item{add_clip}{add clip}
15
+}
16
+\description{
17
+Create an httr verb request function from an HAR request
18
+}

+ 0
- 18
man/as_req.Rd View File

@@ -1,18 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/as_req.r
3
-\name{as_req}
4
-\alias{as_req}
5
-\title{Create an httr function from an HAR request}
6
-\usage{
7
-as_req(entry, quiet = TRUE, add_clip = TRUE)
8
-}
9
-\arguments{
10
-\item{entry}{HAR entry}
11
-
12
-\item{quiet}{quiet}
13
-
14
-\item{add_clip}{add clip}
15
-}
16
-\description{
17
-Create an httr function from an HAR request
18
-}

+ 2
- 2
man/get_content_size.Rd View File

@@ -4,7 +4,7 @@
4 4
 \alias{get_content_size}
5 5
 \alias{get_body_size}
6 6
 \alias{get_headers_size}
7
-\title{Retrieve size of content |  body | headers}
7
+\title{Retrieve size of content | body | headers}
8 8
 \usage{
9 9
 get_content_size(har_resp_obj)
10 10
 
@@ -16,5 +16,5 @@ get_headers_size(har_resp_obj)
16 16
 \item{har_resp_obj}{HAR response object}
17 17
 }
18 18
 \description{
19
-Retrieve size of content |  body | headers
19
+Retrieve size of content | body | headers
20 20
 }

+ 16
- 0
man/get_har_entry.Rd View File

@@ -0,0 +1,16 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/helpers.r
3
+\name{get_har_entry}
4
+\alias{get_har_entry}
5
+\title{Retrieve an entry by index from a HAR object}
6
+\usage{
7
+get_har_entry(x, i = 1)
8
+}
9
+\arguments{
10
+\item{x}{can be a `har` object, `harlog` object or `harentries` object}
11
+
12
+\item{i}{index of the HAR entry to retrieve}
13
+}
14
+\description{
15
+Retrieve an entry by index from a HAR object
16
+}

+ 14
- 0
man/har_entries.Rd View File

@@ -0,0 +1,14 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/helpers.r
3
+\name{har_entries}
4
+\alias{har_entries}
5
+\title{Retrieve just the HAR entries from a splashr request}
6
+\usage{
7
+har_entries(x)
8
+}
9
+\arguments{
10
+\item{x}{can be a `har` object, `harlog` object or `harentries` object}
11
+}
12
+\description{
13
+Retrieve just the HAR entries from a splashr request
14
+}

+ 14
- 0
man/har_entry_count.Rd View File

@@ -0,0 +1,14 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/helpers.r
3
+\name{har_entry_count}
4
+\alias{har_entry_count}
5
+\title{Retrieves number of HAR entries in a response}
6
+\usage{
7
+har_entry_count(x)
8
+}
9
+\arguments{
10
+\item{x}{can be a `har` object, `harlog` object or `harentries` object}
11
+}
12
+\description{
13
+Retrieves number of HAR entries in a response
14
+}

+ 14
- 0
man/splash_har_reset.Rd View File

@@ -0,0 +1,14 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/dsl.r
3
+\name{splash_har_reset}
4
+\alias{splash_har_reset}
5
+\title{Drops all internally stored HAR records.}
6
+\usage{
7
+splash_har_reset(splash_obj, keys)
8
+}
9
+\arguments{
10
+\item{splash_obj}{splashr object}
11
+}
12
+\description{
13
+Drops all internally stored HAR records.
14
+}

Loading…
Cancel
Save