Browse Source

2.43.0

master
boB Rudis 4 years ago
parent
commit
cef05d7ddf
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 10
      DESCRIPTION
  2. 4
      NEWS.md
  3. 8
      R/wc-inspect.R
  4. BIN
      inst/java/htmlunit-1.0-SNAPSHOT.jar
  5. BIN
      java/htmlunit/deps/commons-io-2.7.jar
  6. BIN
      java/htmlunit/deps/commons-lang3-3.11.jar
  7. BIN
      java/htmlunit/deps/commons-net-3.7.jar
  8. BIN
      java/htmlunit/deps/commons-text-1.9.jar
  9. BIN
      java/htmlunit/deps/htmlunit-2.43.0.jar
  10. BIN
      java/htmlunit/deps/htmlunit-core-js-2.43.0.jar
  11. BIN
      java/htmlunit/deps/jetty-client-9.4.31.v20200723.jar
  12. BIN
      java/htmlunit/deps/jetty-http-9.4.31.v20200723.jar
  13. BIN
      java/htmlunit/deps/jetty-io-9.4.31.v20200723.jar
  14. BIN
      java/htmlunit/deps/jetty-util-9.4.31.v20200723.jar
  15. BIN
      java/htmlunit/deps/jetty-xml-9.4.31.v20200723.jar
  16. BIN
      java/htmlunit/deps/neko-htmlunit-2.43.0.jar
  17. BIN
      java/htmlunit/deps/salvation-2.7.2.jar
  18. BIN
      java/htmlunit/deps/websocket-api-9.4.31.v20200723.jar
  19. BIN
      java/htmlunit/deps/websocket-client-9.4.31.v20200723.jar
  20. BIN
      java/htmlunit/deps/websocket-common-9.4.31.v20200723.jar
  21. 2
      java/htmlunit/pom.xml
  22. 4
      java/htmlunit/src/main/java/is/rud/htmlunit/Zapp.java
  23. BIN
      java/htmlunit/target/classes/is/rud/htmlunit/Zapp$1.class
  24. BIN
      java/htmlunit/target/classes/is/rud/htmlunit/Zapp.class
  25. BIN
      java/htmlunit/target/htmlunit-1.0-SNAPSHOT.jar
  26. 4
      man/hu_read_html.Rd
  27. 7
      man/wc_inspect.Rd

10
DESCRIPTION

@ -1,8 +1,8 @@
Package: htmlunit
Type: Package
Title: Tools to Scrape Dynamic Web Content via the 'HtmlUnit' Java Library
Version: 0.4.0
Date: 2020-05-09
Version: 0.5.0
Date: 2020-07-18
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640")),
@ -28,11 +28,11 @@ Imports:
Suggests:
covr, tinytest
Depends:
R (>= 3.2.0),
R (>= 3.6.0),
rJava,
htmlunitjars (>= 2.40.0),
htmlunitjars (>= 2.43.0),
rvest,
xml2
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.0
RoxygenNote: 7.1.1
Remotes: gitlab::hrbrmstr/htmlunitjars

4
NEWS.md

@ -1,3 +1,7 @@
0.5.0
* Updated for 2.43.0 jars
* Added `timeout` to `wc_inspect()`
0.4.0
* Switched to {tinytest}
* Updated for 2.40.0 jars

8
R/wc-inspect.R

@ -5,12 +5,16 @@
#' @md
#' @param url URL to fetch
#' @param js_delay (ms) How long to wait for JavaScript to execute/XHRs to load? (Default: 5000)
#' @param timeout Sets the timeout (milliseconds) of the webc onnection. Set to zero for an infinite wait.
#' Defaults to `30000`. Note: The timeout is used twice. The first is for making the socket
#' connection, the second is for data retrieval. If the time is critical you must allow for twice
#' the time specified here.
#' @export
wc_inspect <- function(url, js_delay = 5000L) {
wc_inspect <- function(url, js_delay = 5000L, timeout = 30000L) {
app <- J("is.rud.htmlunit.Zapp")
res <- app$getRequestsFor(url, .jlong(js_delay))
res <- app$getRequestsFor(url, .jlong(js_delay), .jint(timeout))
res <- as.list(res)
lapply(res, function(.x) {

BIN
inst/java/htmlunit-1.0-SNAPSHOT.jar

Binary file not shown.

BIN
java/htmlunit/deps/commons-io-2.7.jar

Binary file not shown.

BIN
java/htmlunit/deps/commons-lang3-3.11.jar

Binary file not shown.

BIN
java/htmlunit/deps/commons-net-3.7.jar

Binary file not shown.

BIN
java/htmlunit/deps/commons-text-1.9.jar

Binary file not shown.

BIN
java/htmlunit/deps/htmlunit-2.43.0.jar

Binary file not shown.

BIN
java/htmlunit/deps/htmlunit-core-js-2.43.0.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-client-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-http-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-io-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-util-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-xml-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/neko-htmlunit-2.43.0.jar

Binary file not shown.

BIN
java/htmlunit/deps/salvation-2.7.2.jar

Binary file not shown.

BIN
java/htmlunit/deps/websocket-api-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/websocket-client-9.4.31.v20200723.jar

Binary file not shown.

BIN
java/htmlunit/deps/websocket-common-9.4.31.v20200723.jar

Binary file not shown.

2
java/htmlunit/pom.xml

@ -25,7 +25,7 @@
<dependency>
<groupId>net.sourceforge.htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>2.40.0</version>
<version>2.43.0</version>
</dependency>
</dependencies>
</project>

4
java/htmlunit/src/main/java/is/rud/htmlunit/Zapp.java

@ -8,7 +8,7 @@ import java.io.*;
public class Zapp {
public static List<WebResponse> getRequestsFor(String url, long jsDelay) throws IOException {
public static List<WebResponse> getRequestsFor(String url, long jsDelay, int timeout) throws IOException {
final WebClient webClient = new WebClient(BrowserVersion.CHROME);
@ -16,7 +16,7 @@ public class Zapp {
wco.setThrowExceptionOnScriptError(false);
wco.setCssEnabled(true);
wco.setDownloadImages(true);
wco.setTimeout(30000);
wco.setTimeout(timeout);
final List<WebResponse> list = new ArrayList<>();

BIN
java/htmlunit/target/classes/is/rud/htmlunit/Zapp$1.class

Binary file not shown.

BIN
java/htmlunit/target/classes/is/rud/htmlunit/Zapp.class

Binary file not shown.

BIN
java/htmlunit/target/htmlunit-1.0-SNAPSHOT.jar

Binary file not shown.

4
man/hu_read_html.Rd

@ -22,7 +22,7 @@ hu_read_html(
\item{emulate}{browser to emulate; one of "\code{best}", "\code{chrome}", "\code{firefox}", "\code{ie}"}
\item{ret}{what to return; if \code{html_document} (the default) then the HTML created
by the \code{HtmlUnit} emulated browser context is passed to \code{\link[xml2:read_html]{xml2::read_html()}}
by the \code{HtmlUnit} emulated browser context is passed to \code{\link[xml2:read_xml]{xml2::read_html()}}
and an \code{xml2} \code{html_document}/\code{xml_document} is returned. Note that this causes
further HTML processing by \code{xml2}/\code{libxml2} so is not \emph{exactly} what
\code{HtmlUnit} generated. If you want the HTML code (text) without any further
@ -47,7 +47,7 @@ function is a high-level wrapper designed to do a read of HTML,
it is recommended that you leave this the default \code{FALSE} to save
time/bandwidth.}
\item{options}{options to pass to \code{\link[xml2:read_html]{xml2::read_html()}} if \code{ret} == \code{html_document}.}
\item{options}{options to pass to \code{\link[xml2:read_xml]{xml2::read_html()}} if \code{ret} == \code{html_document}.}
}
\value{
an \code{xml2} \code{html_document}/\code{xml_document} if \code{ret} == \code{html_document} else

7
man/wc_inspect.Rd

@ -4,12 +4,17 @@
\alias{wc_inspect}
\title{Perform a "Developer Tools"-like Network Inspection of a URL}
\usage{
wc_inspect(url, js_delay = 5000L)
wc_inspect(url, js_delay = 5000L, timeout = 30000L)
}
\arguments{
\item{url}{URL to fetch}
\item{js_delay}{(ms) How long to wait for JavaScript to execute/XHRs to load? (Default: 5000)}
\item{timeout}{Sets the timeout (milliseconds) of the webc onnection. Set to zero for an infinite wait.
Defaults to \code{30000}. Note: The timeout is used twice. The first is for making the socket
connection, the second is for data retrieval. If the time is critical you must allow for twice
the time specified here.}
}
\description{
Retrieves \emph{all} content loaded

Loading…
Cancel
Save