mirror of https://git.sr.ht/~hrbrmstr/htmlunit
boB Rudis
5 years ago
45 changed files with 245 additions and 14 deletions
@ -0,0 +1,4 @@ |
|||
`%l0%` <- function(x, y) if (length(x) == 0) y else x |
|||
`%||%` <- function(x, y) if (is.null(x)) y else x |
|||
`%@%` <- function(x, name) attr(x, name, exact = TRUE) |
|||
`%nin%` <- function(x, table) match(x, table, nomatch = 0) == 0 |
@ -0,0 +1,52 @@ |
|||
#' Perform a "Developer Tools"-like Network Inspection of a URL |
|||
#' |
|||
#' Retrieves _all_ content loaded |
|||
#' |
|||
#' @md |
|||
#' @param url URL to fetch |
|||
#' @param js_delay (ms) How long to wait for JavaScript to execute/XHRs to load? (Default: 5000) |
|||
#' @export |
|||
wc_inspect <- function(url, js_delay = 5000L) { |
|||
|
|||
app <- J("is.rud.htmlunit.App") |
|||
|
|||
res <- app$getRequestsFor(url, .jlong(js_delay)) |
|||
res <- as.list(res) |
|||
|
|||
lapply(res, function(.x) { |
|||
|
|||
wr <- .x$getWebRequest() |
|||
hdrs <- as.list(.x$getResponseHeaders()) |
|||
|
|||
lapply(hdrs, function(.x) { |
|||
data.frame( |
|||
name = .x$getName() %||% NA_character_, |
|||
value = .x$getValue() %||% NA_character_, |
|||
stringsAsFactors = FALSE |
|||
) |
|||
}) -> hdrs |
|||
|
|||
hdrs <- do.call(rbind.data.frame, hdrs) |
|||
class(hdrs) <- c("tbl_df", "tbl", "data.frame") |
|||
|
|||
data.frame( |
|||
method = wr$getHttpMethod()$toString() %||% NA_character_, |
|||
url = wr$getUrl()$toString() %||% NA_character_, |
|||
status_code = .x$getStatusCode() %||% NA_integer_, |
|||
message = .x$getStatusMessage() %||% NA_character_, |
|||
content = .x$getContentAsString()%||% NA_character_, |
|||
content_length = as.double(.x$getContentLength() %||% NA_real_), |
|||
content_type = .x$getContentType() %||% NA_character_, |
|||
load_time = as.double(.x$getLoadTime() %||% NA_real_), |
|||
headers = I(list(hdrs)), |
|||
stringsAsFactors = FALSE |
|||
) |
|||
|
|||
}) -> out |
|||
|
|||
out <- do.call(rbind.data.frame, out) |
|||
class(out) <- c("tbl_df", "tbl", "data.frame") |
|||
|
|||
out |
|||
|
|||
} |
Binary file not shown.
@ -0,0 +1,14 @@ |
|||
.PHONY: clean pkg deps run |
|||
|
|||
pkg: |
|||
mvn --quiet package |
|||
cp target/htmlunit-1.0-SNAPSHOT.jar ../../inst/java |
|||
|
|||
clean: |
|||
mvn clean |
|||
|
|||
deps: |
|||
mvn dependency:copy-dependencies -DoutputDirectory=deps |
|||
|
|||
new: |
|||
mvn archetype:generate -DgroupId=is.rud.htmlunit -DartifactId=htmlunit -DarchetypeArtifactId=maven-archetype-quickstart -DinteractiveMode=false |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,31 @@ |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
<groupId>is.rud.htmlunit</groupId> |
|||
<artifactId>htmlunit</artifactId> |
|||
<packaging>jar</packaging> |
|||
<version>1.0-SNAPSHOT</version> |
|||
<name>htmlunit</name> |
|||
<url>http://maven.apache.org</url> |
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<version>3.1</version> |
|||
<configuration> |
|||
<source>1.7</source> |
|||
<target>1.7</target> |
|||
</configuration> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
<dependencies> |
|||
<!-- https://mvnrepository.com/artifact/net.sourceforge.htmlunit/htmlunit --> |
|||
<dependency> |
|||
<groupId>net.sourceforge.htmlunit</groupId> |
|||
<artifactId>htmlunit</artifactId> |
|||
<version>2.35.0</version> |
|||
</dependency> |
|||
</dependencies> |
|||
</project> |
@ -0,0 +1,40 @@ |
|||
package is.rud.htmlunit; |
|||
|
|||
import com.gargoylesoftware.htmlunit.*; |
|||
import com.gargoylesoftware.htmlunit.util.*; |
|||
import java.util.*; |
|||
import java.lang.*; |
|||
import java.io.*; |
|||
|
|||
public class App { |
|||
|
|||
public static List<WebResponse> getRequestsFor(String url, long jsDelay) throws IOException { |
|||
|
|||
final WebClient webClient = new WebClient(BrowserVersion.CHROME); |
|||
|
|||
WebClientOptions wco = webClient.getOptions(); |
|||
wco.setThrowExceptionOnScriptError(false); |
|||
wco.setCssEnabled(true); |
|||
wco.setDownloadImages(true); |
|||
wco.setTimeout(30000); |
|||
|
|||
final List<WebResponse> list = new ArrayList<>(); |
|||
|
|||
new WebConnectionWrapper(webClient) { |
|||
@Override |
|||
public WebResponse getResponse(final WebRequest request) throws IOException { |
|||
final WebResponse response = super.getResponse(request); |
|||
// list.add(request.getHttpMethod() + " " + request.getUrl());
|
|||
list.add(response); |
|||
return response; |
|||
} |
|||
}; |
|||
|
|||
webClient.getPage(url); |
|||
webClient.waitForBackgroundJavaScript(jsDelay); |
|||
|
|||
return(list); |
|||
|
|||
} |
|||
|
|||
} |
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,5 @@ |
|||
#Generated by Maven |
|||
#Mon Apr 29 10:10:01 EDT 2019 |
|||
version=1.0-SNAPSHOT |
|||
groupId=is.rud.htmlunit |
|||
artifactId=htmlunit |
@ -0,0 +1,2 @@ |
|||
is/rud/htmlunit/App$1.class |
|||
is/rud/htmlunit/App.class |
@ -0,0 +1 @@ |
|||
/Users/hrbrmstr/packages/htmlunit/java/htmlunit/src/main/java/is/rud/htmlunit/App.java |
@ -0,0 +1 @@ |
|||
/Users/hrbrmstr/packages/htmlunit/java/htmlunit/src/test/java/is/rud/htmlunit/AppTest.java |
@ -0,0 +1,16 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/wc-inspect.R |
|||
\name{wc_inspect} |
|||
\alias{wc_inspect} |
|||
\title{Perform a "Developer Tools"-like Network Inspection of a URL} |
|||
\usage{ |
|||
wc_inspect(url, js_delay = 5000L) |
|||
} |
|||
\arguments{ |
|||
\item{url}{URL to fetch} |
|||
|
|||
\item{js_delay}{(ms) How long to wait for JavaScript to execute/XHRs to load? (Default: 5000)} |
|||
} |
|||
\description{ |
|||
Retrieves \emph{all} content loaded |
|||
} |
Loading…
Reference in new issue