Browse Source

2.43.0

master
boB Rudis 4 years ago
parent
commit
cef05d7ddf
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 10
      DESCRIPTION
  2. 4
      NEWS.md
  3. 8
      R/wc-inspect.R
  4. BIN
      inst/java/htmlunit-1.0-SNAPSHOT.jar
  5. BIN
      java/htmlunit/deps/commons-io-2.7.jar
  6. BIN
      java/htmlunit/deps/commons-lang3-3.11.jar
  7. BIN
      java/htmlunit/deps/commons-net-3.7.jar
  8. BIN
      java/htmlunit/deps/commons-text-1.9.jar
  9. BIN
      java/htmlunit/deps/htmlunit-2.43.0.jar
  10. BIN
      java/htmlunit/deps/htmlunit-core-js-2.43.0.jar
  11. BIN
      java/htmlunit/deps/jetty-client-9.4.31.v20200723.jar
  12. BIN
      java/htmlunit/deps/jetty-http-9.4.31.v20200723.jar
  13. BIN
      java/htmlunit/deps/jetty-io-9.4.31.v20200723.jar
  14. BIN
      java/htmlunit/deps/jetty-util-9.4.31.v20200723.jar
  15. BIN
      java/htmlunit/deps/jetty-xml-9.4.31.v20200723.jar
  16. BIN
      java/htmlunit/deps/neko-htmlunit-2.43.0.jar
  17. BIN
      java/htmlunit/deps/salvation-2.7.2.jar
  18. BIN
      java/htmlunit/deps/websocket-api-9.4.31.v20200723.jar
  19. BIN
      java/htmlunit/deps/websocket-client-9.4.31.v20200723.jar
  20. BIN
      java/htmlunit/deps/websocket-common-9.4.31.v20200723.jar
  21. 2
      java/htmlunit/pom.xml
  22. 4
      java/htmlunit/src/main/java/is/rud/htmlunit/Zapp.java
  23. BIN
      java/htmlunit/target/classes/is/rud/htmlunit/Zapp$1.class
  24. BIN
      java/htmlunit/target/classes/is/rud/htmlunit/Zapp.class
  25. BIN
      java/htmlunit/target/htmlunit-1.0-SNAPSHOT.jar
  26. 4
      man/hu_read_html.Rd
  27. 7
      man/wc_inspect.Rd

10
DESCRIPTION

@ -1,8 +1,8 @@
Package: htmlunit Package: htmlunit
Type: Package Type: Package
Title: Tools to Scrape Dynamic Web Content via the 'HtmlUnit' Java Library Title: Tools to Scrape Dynamic Web Content via the 'HtmlUnit' Java Library
Version: 0.4.0 Version: 0.5.0
Date: 2020-05-09 Date: 2020-07-18
Authors@R: c( Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640")), comment = c(ORCID = "0000-0001-5670-2640")),
@ -28,11 +28,11 @@ Imports:
Suggests: Suggests:
covr, tinytest covr, tinytest
Depends: Depends:
R (>= 3.2.0), R (>= 3.6.0),
rJava, rJava,
htmlunitjars (>= 2.40.0), htmlunitjars (>= 2.43.0),
rvest, rvest,
xml2 xml2
Roxygen: list(markdown = TRUE) Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.0 RoxygenNote: 7.1.1
Remotes: gitlab::hrbrmstr/htmlunitjars Remotes: gitlab::hrbrmstr/htmlunitjars

4
NEWS.md

@ -1,3 +1,7 @@
0.5.0
* Updated for 2.43.0 jars
* Added `timeout` to `wc_inspect()`
0.4.0 0.4.0
* Switched to {tinytest} * Switched to {tinytest}
* Updated for 2.40.0 jars * Updated for 2.40.0 jars

8
R/wc-inspect.R

@ -5,12 +5,16 @@
#' @md #' @md
#' @param url URL to fetch #' @param url URL to fetch
#' @param js_delay (ms) How long to wait for JavaScript to execute/XHRs to load? (Default: 5000) #' @param js_delay (ms) How long to wait for JavaScript to execute/XHRs to load? (Default: 5000)
#' @param timeout Sets the timeout (milliseconds) of the webc onnection. Set to zero for an infinite wait.
#' Defaults to `30000`. Note: The timeout is used twice. The first is for making the socket
#' connection, the second is for data retrieval. If the time is critical you must allow for twice
#' the time specified here.
#' @export #' @export
wc_inspect <- function(url, js_delay = 5000L) { wc_inspect <- function(url, js_delay = 5000L, timeout = 30000L) {
app <- J("is.rud.htmlunit.Zapp") app <- J("is.rud.htmlunit.Zapp")
res <- app$getRequestsFor(url, .jlong(js_delay)) res <- app$getRequestsFor(url, .jlong(js_delay), .jint(timeout))
res <- as.list(res) res <- as.list(res)
lapply(res, function(.x) { lapply(res, function(.x) {

BIN
inst/java/htmlunit-1.0-SNAPSHOT.jar

Binary file not shown.

BIN
java/htmlunit/deps/commons-io-2.7.jar

Binary file not shown.

BIN
java/htmlunit/deps/commons-lang3-3.11.jar

Binary file not shown.

BIN
java/htmlunit/deps/commons-net-3.7.jar

Binary file not shown.

BIN
java/htmlunit/deps/commons-text-1.9.jar

Binary file not shown.

BIN
java/htmlunit/deps/htmlunit-2.43.0.jar

Binary file not shown.

BIN
java/htmlunit/deps/htmlunit-core-js-2.43.0.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-client-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-http-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-io-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-util-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-xml-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/neko-htmlunit-2.43.0.jar

Binary file not shown.

BIN
java/htmlunit/deps/salvation-2.7.2.jar

Binary file not shown.

BIN
java/htmlunit/deps/websocket-api-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/websocket-client-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/websocket-common-9.4.31.v20200723.jar

Binary file not shown.

2
java/htmlunit/pom.xml

@ -25,7 +25,7 @@
<dependency> <dependency>
<groupId>net.sourceforge.htmlunit</groupId> <groupId>net.sourceforge.htmlunit</groupId>
<artifactId>htmlunit</artifactId> <artifactId>htmlunit</artifactId>
<version>2.40.0</version> <version>2.43.0</version>
</dependency> </dependency>
</dependencies> </dependencies>
</project> </project>

4
java/htmlunit/src/main/java/is/rud/htmlunit/Zapp.java

@ -8,7 +8,7 @@ import java.io.*;
public class Zapp { public class Zapp {
public static List<WebResponse> getRequestsFor(String url, long jsDelay) throws IOException { public static List<WebResponse> getRequestsFor(String url, long jsDelay, int timeout) throws IOException {
final WebClient webClient = new WebClient(BrowserVersion.CHROME); final WebClient webClient = new WebClient(BrowserVersion.CHROME);
@ -16,7 +16,7 @@ public class Zapp {
wco.setThrowExceptionOnScriptError(false); wco.setThrowExceptionOnScriptError(false);
wco.setCssEnabled(true); wco.setCssEnabled(true);
wco.setDownloadImages(true); wco.setDownloadImages(true);
wco.setTimeout(30000); wco.setTimeout(timeout);
final List<WebResponse> list = new ArrayList<>(); final List<WebResponse> list = new ArrayList<>();

BIN
java/htmlunit/target/classes/is/rud/htmlunit/Zapp$1.class

Binary file not shown.

BIN
java/htmlunit/target/classes/is/rud/htmlunit/Zapp.class

Binary file not shown.

BIN
java/htmlunit/target/htmlunit-1.0-SNAPSHOT.jar

Binary file not shown.

4
man/hu_read_html.Rd

@ -22,7 +22,7 @@ hu_read_html(
\item{emulate}{browser to emulate; one of "\code{best}", "\code{chrome}", "\code{firefox}", "\code{ie}"} \item{emulate}{browser to emulate; one of "\code{best}", "\code{chrome}", "\code{firefox}", "\code{ie}"}
\item{ret}{what to return; if \code{html_document} (the default) then the HTML created \item{ret}{what to return; if \code{html_document} (the default) then the HTML created
by the \code{HtmlUnit} emulated browser context is passed to \code{\link[xml2:read_html]{xml2::read_html()}} by the \code{HtmlUnit} emulated browser context is passed to \code{\link[xml2:read_xml]{xml2::read_html()}}
and an \code{xml2} \code{html_document}/\code{xml_document} is returned. Note that this causes and an \code{xml2} \code{html_document}/\code{xml_document} is returned. Note that this causes
further HTML processing by \code{xml2}/\code{libxml2} so is not \emph{exactly} what further HTML processing by \code{xml2}/\code{libxml2} so is not \emph{exactly} what
\code{HtmlUnit} generated. If you want the HTML code (text) without any further \code{HtmlUnit} generated. If you want the HTML code (text) without any further
@ -47,7 +47,7 @@ function is a high-level wrapper designed to do a read of HTML,
it is recommended that you leave this the default \code{FALSE} to save it is recommended that you leave this the default \code{FALSE} to save
time/bandwidth.} time/bandwidth.}
\item{options}{options to pass to \code{\link[xml2:read_html]{xml2::read_html()}} if \code{ret} == \code{html_document}.} \item{options}{options to pass to \code{\link[xml2:read_xml]{xml2::read_html()}} if \code{ret} == \code{html_document}.}
} }
\value{ \value{
an \code{xml2} \code{html_document}/\code{xml_document} if \code{ret} == \code{html_document} else an \code{xml2} \code{html_document}/\code{xml_document} if \code{ret} == \code{html_document} else

7
man/wc_inspect.Rd

@ -4,12 +4,17 @@
\alias{wc_inspect} \alias{wc_inspect}
\title{Perform a "Developer Tools"-like Network Inspection of a URL} \title{Perform a "Developer Tools"-like Network Inspection of a URL}
\usage{ \usage{
wc_inspect(url, js_delay = 5000L) wc_inspect(url, js_delay = 5000L, timeout = 30000L)
} }
\arguments{ \arguments{
\item{url}{URL to fetch} \item{url}{URL to fetch}
\item{js_delay}{(ms) How long to wait for JavaScript to execute/XHRs to load? (Default: 5000)} \item{js_delay}{(ms) How long to wait for JavaScript to execute/XHRs to load? (Default: 5000)}
\item{timeout}{Sets the timeout (milliseconds) of the webc onnection. Set to zero for an infinite wait.
Defaults to \code{30000}. Note: The timeout is used twice. The first is for making the socket
connection, the second is for data retrieval. If the time is critical you must allow for twice
the time specified here.}
} }
\description{ \description{
Retrieves \emph{all} content loaded Retrieves \emph{all} content loaded

Loading…
Cancel
Save