diff --git a/DESCRIPTION b/DESCRIPTION
index 72dc0b6..98acc37 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
Package: htmlunit
Type: Package
Title: Tools to Scrape Dynamic Web Content via the 'HtmlUnit' Java Library
-Version: 0.4.0
-Date: 2020-05-09
+Version: 0.5.0
+Date: 2020-07-18
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640")),
@@ -28,11 +28,11 @@ Imports:
Suggests:
covr, tinytest
Depends:
- R (>= 3.2.0),
+ R (>= 3.6.0),
rJava,
- htmlunitjars (>= 2.40.0),
+ htmlunitjars (>= 2.43.0),
rvest,
xml2
Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.0
+RoxygenNote: 7.1.1
Remotes: gitlab::hrbrmstr/htmlunitjars
diff --git a/NEWS.md b/NEWS.md
index 43641de..caa6949 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+0.5.0
+* Updated for 2.43.0 jars
+* Added `timeout` to `wc_inspect()`
+
0.4.0
* Switched to {tinytest}
* Updated for 2.40.0 jars
diff --git a/R/wc-inspect.R b/R/wc-inspect.R
index 40e0e63..4429c1a 100644
--- a/R/wc-inspect.R
+++ b/R/wc-inspect.R
@@ -5,12 +5,16 @@
#' @md
#' @param url URL to fetch
#' @param js_delay (ms) How long to wait for JavaScript to execute/XHRs to load? (Default: 5000)
+#' @param timeout Sets the timeout (milliseconds) of the webc onnection. Set to zero for an infinite wait.
+#' Defaults to `30000`. Note: The timeout is used twice. The first is for making the socket
+#' connection, the second is for data retrieval. If the time is critical you must allow for twice
+#' the time specified here.
#' @export
-wc_inspect <- function(url, js_delay = 5000L) {
+wc_inspect <- function(url, js_delay = 5000L, timeout = 30000L) {
app <- J("is.rud.htmlunit.Zapp")
- res <- app$getRequestsFor(url, .jlong(js_delay))
+ res <- app$getRequestsFor(url, .jlong(js_delay), .jint(timeout))
res <- as.list(res)
lapply(res, function(.x) {
diff --git a/inst/java/htmlunit-1.0-SNAPSHOT.jar b/inst/java/htmlunit-1.0-SNAPSHOT.jar
index a85d537..f3a0cf0 100644
Binary files a/inst/java/htmlunit-1.0-SNAPSHOT.jar and b/inst/java/htmlunit-1.0-SNAPSHOT.jar differ
diff --git a/java/htmlunit/deps/commons-io-2.7.jar b/java/htmlunit/deps/commons-io-2.7.jar
new file mode 100644
index 0000000..5889458
Binary files /dev/null and b/java/htmlunit/deps/commons-io-2.7.jar differ
diff --git a/java/htmlunit/deps/commons-lang3-3.11.jar b/java/htmlunit/deps/commons-lang3-3.11.jar
new file mode 100644
index 0000000..bbaa8a6
Binary files /dev/null and b/java/htmlunit/deps/commons-lang3-3.11.jar differ
diff --git a/java/htmlunit/deps/commons-net-3.7.jar b/java/htmlunit/deps/commons-net-3.7.jar
new file mode 100644
index 0000000..7d7bb5f
Binary files /dev/null and b/java/htmlunit/deps/commons-net-3.7.jar differ
diff --git a/java/htmlunit/deps/commons-text-1.9.jar b/java/htmlunit/deps/commons-text-1.9.jar
new file mode 100644
index 0000000..cc0c690
Binary files /dev/null and b/java/htmlunit/deps/commons-text-1.9.jar differ
diff --git a/java/htmlunit/deps/htmlunit-2.43.0.jar b/java/htmlunit/deps/htmlunit-2.43.0.jar
new file mode 100644
index 0000000..3c8c449
Binary files /dev/null and b/java/htmlunit/deps/htmlunit-2.43.0.jar differ
diff --git a/java/htmlunit/deps/htmlunit-core-js-2.43.0.jar b/java/htmlunit/deps/htmlunit-core-js-2.43.0.jar
new file mode 100644
index 0000000..491cb35
Binary files /dev/null and b/java/htmlunit/deps/htmlunit-core-js-2.43.0.jar differ
diff --git a/java/htmlunit/deps/jetty-client-9.4.31.v20200723.jar b/java/htmlunit/deps/jetty-client-9.4.31.v20200723.jar
new file mode 100644
index 0000000..8393c73
Binary files /dev/null and b/java/htmlunit/deps/jetty-client-9.4.31.v20200723.jar differ
diff --git a/java/htmlunit/deps/jetty-http-9.4.31.v20200723.jar b/java/htmlunit/deps/jetty-http-9.4.31.v20200723.jar
new file mode 100644
index 0000000..edd09e2
Binary files /dev/null and b/java/htmlunit/deps/jetty-http-9.4.31.v20200723.jar differ
diff --git a/java/htmlunit/deps/jetty-io-9.4.31.v20200723.jar b/java/htmlunit/deps/jetty-io-9.4.31.v20200723.jar
new file mode 100644
index 0000000..aaa26bd
Binary files /dev/null and b/java/htmlunit/deps/jetty-io-9.4.31.v20200723.jar differ
diff --git a/java/htmlunit/deps/jetty-util-9.4.31.v20200723.jar b/java/htmlunit/deps/jetty-util-9.4.31.v20200723.jar
new file mode 100644
index 0000000..97e9836
Binary files /dev/null and b/java/htmlunit/deps/jetty-util-9.4.31.v20200723.jar differ
diff --git a/java/htmlunit/deps/jetty-xml-9.4.31.v20200723.jar b/java/htmlunit/deps/jetty-xml-9.4.31.v20200723.jar
new file mode 100644
index 0000000..568668f
Binary files /dev/null and b/java/htmlunit/deps/jetty-xml-9.4.31.v20200723.jar differ
diff --git a/java/htmlunit/deps/neko-htmlunit-2.43.0.jar b/java/htmlunit/deps/neko-htmlunit-2.43.0.jar
new file mode 100644
index 0000000..95fa4b5
Binary files /dev/null and b/java/htmlunit/deps/neko-htmlunit-2.43.0.jar differ
diff --git a/java/htmlunit/deps/salvation-2.7.2.jar b/java/htmlunit/deps/salvation-2.7.2.jar
new file mode 100644
index 0000000..1759a3d
Binary files /dev/null and b/java/htmlunit/deps/salvation-2.7.2.jar differ
diff --git a/java/htmlunit/deps/websocket-api-9.4.31.v20200723.jar b/java/htmlunit/deps/websocket-api-9.4.31.v20200723.jar
new file mode 100644
index 0000000..230e9a2
Binary files /dev/null and b/java/htmlunit/deps/websocket-api-9.4.31.v20200723.jar differ
diff --git a/java/htmlunit/deps/websocket-client-9.4.31.v20200723.jar b/java/htmlunit/deps/websocket-client-9.4.31.v20200723.jar
new file mode 100644
index 0000000..dc07417
Binary files /dev/null and b/java/htmlunit/deps/websocket-client-9.4.31.v20200723.jar differ
diff --git a/java/htmlunit/deps/websocket-common-9.4.31.v20200723.jar b/java/htmlunit/deps/websocket-common-9.4.31.v20200723.jar
new file mode 100644
index 0000000..9fbebb0
Binary files /dev/null and b/java/htmlunit/deps/websocket-common-9.4.31.v20200723.jar differ
diff --git a/java/htmlunit/pom.xml b/java/htmlunit/pom.xml
index 0a5e444..3e2dad3 100644
--- a/java/htmlunit/pom.xml
+++ b/java/htmlunit/pom.xml
@@ -25,7 +25,7 @@
net.sourceforge.htmlunit
htmlunit
- 2.40.0
+ 2.43.0
diff --git a/java/htmlunit/src/main/java/is/rud/htmlunit/Zapp.java b/java/htmlunit/src/main/java/is/rud/htmlunit/Zapp.java
index 6547afa..d0f3ff6 100644
--- a/java/htmlunit/src/main/java/is/rud/htmlunit/Zapp.java
+++ b/java/htmlunit/src/main/java/is/rud/htmlunit/Zapp.java
@@ -8,7 +8,7 @@ import java.io.*;
public class Zapp {
- public static List getRequestsFor(String url, long jsDelay) throws IOException {
+ public static List getRequestsFor(String url, long jsDelay, int timeout) throws IOException {
final WebClient webClient = new WebClient(BrowserVersion.CHROME);
@@ -16,7 +16,7 @@ public class Zapp {
wco.setThrowExceptionOnScriptError(false);
wco.setCssEnabled(true);
wco.setDownloadImages(true);
- wco.setTimeout(30000);
+ wco.setTimeout(timeout);
final List list = new ArrayList<>();
diff --git a/java/htmlunit/target/classes/is/rud/htmlunit/Zapp$1.class b/java/htmlunit/target/classes/is/rud/htmlunit/Zapp$1.class
index 1f61b17..f2f0e05 100644
Binary files a/java/htmlunit/target/classes/is/rud/htmlunit/Zapp$1.class and b/java/htmlunit/target/classes/is/rud/htmlunit/Zapp$1.class differ
diff --git a/java/htmlunit/target/classes/is/rud/htmlunit/Zapp.class b/java/htmlunit/target/classes/is/rud/htmlunit/Zapp.class
index 6eea53d..2c47824 100644
Binary files a/java/htmlunit/target/classes/is/rud/htmlunit/Zapp.class and b/java/htmlunit/target/classes/is/rud/htmlunit/Zapp.class differ
diff --git a/java/htmlunit/target/htmlunit-1.0-SNAPSHOT.jar b/java/htmlunit/target/htmlunit-1.0-SNAPSHOT.jar
index a85d537..f3a0cf0 100644
Binary files a/java/htmlunit/target/htmlunit-1.0-SNAPSHOT.jar and b/java/htmlunit/target/htmlunit-1.0-SNAPSHOT.jar differ
diff --git a/man/hu_read_html.Rd b/man/hu_read_html.Rd
index da4ff76..01a02a3 100644
--- a/man/hu_read_html.Rd
+++ b/man/hu_read_html.Rd
@@ -22,7 +22,7 @@ hu_read_html(
\item{emulate}{browser to emulate; one of "\code{best}", "\code{chrome}", "\code{firefox}", "\code{ie}"}
\item{ret}{what to return; if \code{html_document} (the default) then the HTML created
-by the \code{HtmlUnit} emulated browser context is passed to \code{\link[xml2:read_html]{xml2::read_html()}}
+by the \code{HtmlUnit} emulated browser context is passed to \code{\link[xml2:read_xml]{xml2::read_html()}}
and an \code{xml2} \code{html_document}/\code{xml_document} is returned. Note that this causes
further HTML processing by \code{xml2}/\code{libxml2} so is not \emph{exactly} what
\code{HtmlUnit} generated. If you want the HTML code (text) without any further
@@ -47,7 +47,7 @@ function is a high-level wrapper designed to do a read of HTML,
it is recommended that you leave this the default \code{FALSE} to save
time/bandwidth.}
-\item{options}{options to pass to \code{\link[xml2:read_html]{xml2::read_html()}} if \code{ret} == \code{html_document}.}
+\item{options}{options to pass to \code{\link[xml2:read_xml]{xml2::read_html()}} if \code{ret} == \code{html_document}.}
}
\value{
an \code{xml2} \code{html_document}/\code{xml_document} if \code{ret} == \code{html_document} else
diff --git a/man/wc_inspect.Rd b/man/wc_inspect.Rd
index 90019af..c3c142b 100644
--- a/man/wc_inspect.Rd
+++ b/man/wc_inspect.Rd
@@ -4,12 +4,17 @@
\alias{wc_inspect}
\title{Perform a "Developer Tools"-like Network Inspection of a URL}
\usage{
-wc_inspect(url, js_delay = 5000L)
+wc_inspect(url, js_delay = 5000L, timeout = 30000L)
}
\arguments{
\item{url}{URL to fetch}
\item{js_delay}{(ms) How long to wait for JavaScript to execute/XHRs to load? (Default: 5000)}
+
+\item{timeout}{Sets the timeout (milliseconds) of the webc onnection. Set to zero for an infinite wait.
+Defaults to \code{30000}. Note: The timeout is used twice. The first is for making the socket
+connection, the second is for data retrieval. If the time is critical you must allow for twice
+the time specified here.}
}
\description{
Retrieves \emph{all} content loaded