Browse Source

tinytest / 2.40.0 jars

master
boB Rudis 4 years ago
parent
commit
a5e22b9d5f
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 13
      DESCRIPTION
  2. 4
      NEWS.md
  3. 62
      README.md
  4. BIN
      inst/java/htmlunit-1.0-SNAPSHOT.jar
  5. 41
      inst/tinytest/test_htmlunit.R
  6. BIN
      java/htmlunit/deps/commons-lang3-3.10.jar
  7. BIN
      java/htmlunit/deps/htmlunit-2.40.0.jar
  8. BIN
      java/htmlunit/deps/htmlunit-core-js-2.40.0.jar
  9. BIN
      java/htmlunit/deps/jetty-client-9.4.28.v20200408.jar
  10. BIN
      java/htmlunit/deps/jetty-http-9.4.28.v20200408.jar
  11. BIN
      java/htmlunit/deps/jetty-io-9.4.28.v20200408.jar
  12. BIN
      java/htmlunit/deps/jetty-util-9.4.28.v20200408.jar
  13. BIN
      java/htmlunit/deps/jetty-xml-9.4.28.v20200408.jar
  14. BIN
      java/htmlunit/deps/neko-htmlunit-2.40.0.jar
  15. BIN
      java/htmlunit/deps/salvation-2.7.1.jar
  16. BIN
      java/htmlunit/deps/websocket-api-9.4.28.v20200408.jar
  17. BIN
      java/htmlunit/deps/websocket-client-9.4.28.v20200408.jar
  18. BIN
      java/htmlunit/deps/websocket-common-9.4.28.v20200408.jar
  19. 2
      java/htmlunit/pom.xml
  20. BIN
      java/htmlunit/target/classes/is/rud/htmlunit/App$1.class
  21. BIN
      java/htmlunit/target/classes/is/rud/htmlunit/App.class
  22. BIN
      java/htmlunit/target/htmlunit-1.0-SNAPSHOT.jar
  23. 2
      java/htmlunit/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
  24. 1
      java/htmlunit/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
  25. 2
      tests/test-all.R
  26. 45
      tests/testthat/test-htmlunit.R
  27. 5
      tests/tinytest.R

13
DESCRIPTION

@ -1,8 +1,8 @@
Package: htmlunit
Type: Package
Title: Tools to Scrape Dynamic Web Content via the 'HtmlUnit' Java Library
Version: 0.3.2
Date: 2020-04-09
Version: 0.4.0
Date: 2020-05-09
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640")),
@ -25,13 +25,12 @@ Encoding: UTF-8
License: Apache License 2.0 | file LICENSE
Imports:
magrittr
Suggests:
testthat,
covr
Depends:
Suggests:
covr, tinytest
Depends:
R (>= 3.2.0),
rJava,
htmlunitjars (>= 2.39.0),
htmlunitjars (>= 2.40.0),
rvest,
xml2
Roxygen: list(markdown = TRUE)

4
NEWS.md

@ -1,3 +1,7 @@
0.4.0
* Switched to {tinytest}
* Updated for 2.40.0 jars
0.3.0
* java 11 compile
* tested against new htmlunit jar release

62
README.md

@ -132,7 +132,7 @@ library(tidyverse) # for some data ops; not req'd for pkg
# current verison
packageVersion("htmlunit")
## [1] '0.3.2'
## [1] '0.4.0'
```
Something `xml2::read_html()` cannot do, read the table from
@ -178,20 +178,20 @@ colnames(xdf)
## [7] "content_type" "load_time" "headers"
select(xdf, method, url, status_code, content_length, load_time)
## # A tibble: 45 x 5
## # A tibble: 59 x 5
## method url status_code content_length load_time
## <chr> <chr> <int> <dbl> <dbl>
## 1 GET https://rstudio.com/ 200 12292 701
## 2 GET https://dev.visualwebsiteoptimizer.com/j.php?a=450622&u=https%3A%2F%2Frs… 200 2498 349
## 3 GET https://dev.visualwebsiteoptimizer.com/6.0/va-268e5d055e3477f16578a91cda… 200 55711 91
## 4 GET https://use.fontawesome.com/releases/v5.0.6/css/all.css 200 8699 427
## 5 GET https://d33wubrfki0l68.cloudfront.net/bundles/c5ddb3e999592179708beea702… 200 53046 599
## 6 GET https://cdn.rawgit.com/noelboss/featherlight/1.7.13/release/featherlight… 200 763 402
## 7 GET https://d33wubrfki0l68.cloudfront.net/css/4a0f49009a213e6e2207c6f66893f0… 200 505 80
## 8 GET https://gitcdn.github.io/bootstrap-toggle/2.2.2/css/bootstrap-toggle.min… 200 548 346
## 9 GET https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-aweso… 200 6663 292
## 10 GET https://snap.licdn.com/li.lms-analytics/insight.min.js 200 1576 483
## # … with 35 more rows
## 1 GET https://rstudio.com/ 200 13531 625
## 2 GET https://use.fontawesome.com/releases/v5.0.6/css/all.css 200 8699 376
## 3 GET https://d33wubrfki0l68.cloudfront.net/bundles/c5ddb3e999592179708beea702… 200 53046 563
## 4 GET https://cdn.rawgit.com/noelboss/featherlight/1.7.13/release/featherlight… 200 763 376
## 5 GET https://d33wubrfki0l68.cloudfront.net/css/4a0f49009a213e6e2207c6f66893f0… 200 505 73
## 6 GET https://gitcdn.github.io/bootstrap-toggle/2.2.2/css/bootstrap-toggle.min… 200 548 258
## 7 GET https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-aweso… 200 6663 247
## 8 GET https://metadata-static-files.sfo2.cdn.digitaloceanspaces.com/pixel/lp.js 200 3876 364
## 9 GET https://snap.licdn.com/li.lms-analytics/insight.min.js 200 1576 455
## 10 GET https://connect.facebook.net/en_US/fbevents.js 200 31766 412
## # … with 49 more rows
group_by(xdf, content_type) %>%
summarise(
@ -201,18 +201,18 @@ group_by(xdf, content_type) %>%
## # A tibble: 12 x 3
## content_type total_size total_load_time
## <chr> <dbl> <dbl>
## 1 "" 45565 0.521
## 2 "application/javascript" 265147 1.78
## 3 "application/json" 4100 0.687
## 4 "application/x-javascript" 152398 1.97
## 5 "image/gif" 35 0.557
## 6 "image/jpeg" 59772 0.114
## 7 "image/png" 40634 0.269
## 8 "image/svg+xml" 10869 0.314
## 9 "text/css" 118095 2.81
## 10 "text/html" 12709 0.798
## 11 "text/javascript" 249573 2.02
## 12 "text/plain" 28 0.344
## 1 "" 0 1.02
## 2 "application/javascript" 443531 3.61
## 3 "application/json" 4176 3.10
## 4 "application/x-javascript" 161004 1.69
## 5 "image/gif" 131 0.561
## 6 "image/jpeg" 59772 0.105
## 7 "image/png" 40634 0.234
## 8 "image/svg+xml" 10869 0.303
## 9 "text/css" 121175 2.81
## 10 "text/html" 14425 1.3
## 11 "text/javascript" 174172 1.42
## 12 "text/plain" 28 0.354
```
### DSL
@ -221,7 +221,7 @@ group_by(xdf, content_type) %>%
wc <- web_client(emulate = "chrome")
wc %>% wc_browser_info()
## < Netscape / 5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36 / en-US >
## < Netscape / 5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36 / en-US >
wc <- web_client()
@ -263,7 +263,7 @@ wc %>%
wc_render("text") %>%
substr(1, 300) %>%
cat()
## USA.gov: The U.S. Government's Official Web Portal | USAGov
## Official Guide to Government Information and Services | USAGov
## Skip to main content
## An official website of the United States government Here's how you know
##
@ -275,17 +275,17 @@ wc %>%
## All Topics and Services
## Benefits, Grants, Loans
## Government Agencies and Elected Officials
## Jobs and Unemployme
## Jobs and Unemplo
```
### htmlunit Metrics
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :---- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: |
| R | 14 | 0.78 | 351 | 0.76 | 193 | 0.74 | 372 | 0.83 |
| Rmd | 1 | 0.06 | 41 | 0.09 | 52 | 0.20 | 75 | 0.17 |
| R | 13 | 0.76 | 320 | 0.75 | 182 | 0.73 | 372 | 0.83 |
| Rmd | 1 | 0.06 | 41 | 0.10 | 52 | 0.21 | 75 | 0.17 |
| Maven | 1 | 0.06 | 30 | 0.07 | 0 | 0.00 | 1 | 0.00 |
| Java | 1 | 0.06 | 28 | 0.06 | 12 | 0.05 | 0 | 0.00 |
| Java | 1 | 0.06 | 28 | 0.07 | 12 | 0.05 | 0 | 0.00 |
| make | 1 | 0.06 | 10 | 0.02 | 4 | 0.02 | 0 | 0.00 |
## Code of Conduct

BIN
inst/java/htmlunit-1.0-SNAPSHOT.jar

Binary file not shown.

41
inst/tinytest/test_htmlunit.R

@ -0,0 +1,41 @@
test_url <- "https://hrbrmstr.github.io/htmlunitjars/index.html"
w <- web_client()
expect_equal(class(w), "webclient")
expect_equal(class(wc_browser_info(w)), "browserinfo")
expect_equal(class(wc_go(w, url = test_url)), "webclient")
expect_equal(wc_url(w), test_url)
expect_equal(wc_title(w), "")
expect_true(inherits(wc_render(w, "parsed"), "xml_document"))
expect_true(inherits(wc_render(w, "html"), "character"))
expect_true(inherits(wc_render(w, "text"), "character"))
expect_true(inherits(wc_click_on(w, "table"), "webclient"))
expect_equal(
wc_html_nodes(w, "title") %>% sapply(wc_html_text),
""
)
expect_equal(
wc_html_nodes(w, "title") %>% sapply(wc_html_name),
"title"
)
h <- wc_headers(w)
expect_true(any(h$value == "GitHub.com"))
expect_inherits(
hu_read_html(url = test_url, ret = "html_document"),
"xml_document"
)
expect_true(
inherits(hu_read_html(url = test_url, ret = "text"),
"character"
))

BIN
java/htmlunit/deps/commons-lang3-3.10.jar

Binary file not shown.

BIN
java/htmlunit/deps/htmlunit-2.40.0.jar

Binary file not shown.

BIN
java/htmlunit/deps/htmlunit-core-js-2.40.0.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-client-9.4.28.v20200408.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-http-9.4.28.v20200408.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-io-9.4.28.v20200408.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-util-9.4.28.v20200408.jar

Binary file not shown.

BIN
java/htmlunit/deps/jetty-xml-9.4.28.v20200408.jar

Binary file not shown.

BIN
java/htmlunit/deps/neko-htmlunit-2.40.0.jar

Binary file not shown.

BIN
java/htmlunit/deps/salvation-2.7.1.jar

Binary file not shown.

BIN
java/htmlunit/deps/websocket-api-9.4.28.v20200408.jar

Binary file not shown.

BIN
java/htmlunit/deps/websocket-client-9.4.28.v20200408.jar

Binary file not shown.

BIN
java/htmlunit/deps/websocket-common-9.4.28.v20200408.jar

Binary file not shown.

2
java/htmlunit/pom.xml

@ -25,7 +25,7 @@
<dependency>
<groupId>net.sourceforge.htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>2.38.0</version>
<version>2.40.0</version>
</dependency>
</dependencies>
</project>

BIN
java/htmlunit/target/classes/is/rud/htmlunit/App$1.class

Binary file not shown.

BIN
java/htmlunit/target/classes/is/rud/htmlunit/App.class

Binary file not shown.

BIN
java/htmlunit/target/htmlunit-1.0-SNAPSHOT.jar

Binary file not shown.

2
java/htmlunit/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst

@ -1,4 +1,2 @@
is/rud/htmlunit/App$1.class
is/rud/htmlunit/Zapp.class
is/rud/htmlunit/App.class
is/rud/htmlunit/Zapp$1.class

1
java/htmlunit/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst

@ -1,2 +1 @@
/Users/hrbrmstr/packages/htmlunit/java/htmlunit/src/main/java/is/rud/htmlunit/App.java
/Users/hrbrmstr/packages/htmlunit/java/htmlunit/src/main/java/is/rud/htmlunit/Zapp.java

2
tests/test-all.R

@ -1,2 +0,0 @@
library(testthat)
test_check("htmlunit")

45
tests/testthat/test-htmlunit.R

@ -1,45 +0,0 @@
context("Core htmlunit ops work")
test_that("we can do something", {
test_url <- "https://hrbrmstr.github.io/htmlunitjars/index.html"
w <- web_client()
expect_is(w, "webclient")
expect_is(wc_browser_info(w), "browserinfo")
expect_is(wc_go(w, url = test_url), "webclient")
expect_equal(wc_url(w), test_url)
expect_equal(wc_title(w), "")
expect_is(wc_render(w, "parsed"), "xml_document")
expect_is(wc_render(w, "html"), "character")
expect_is(wc_render(w, "text"), "character")
expect_is(wc_click_on(w, "table"), "webclient")
expect_equal(
wc_html_nodes(w, "title") %>% sapply(wc_html_text),
""
)
expect_equal(
wc_html_nodes(w, "title") %>% sapply(wc_html_name),
"title"
)
h <- wc_headers(w)
expect_true(any(h$value == "GitHub.com"))
expect_is(
hu_read_html(url = test_url, ret = "html_document"),
"xml_document"
)
expect_is(
hu_read_html(url = test_url, ret = "text"),
"character"
)
})

5
tests/tinytest.R

@ -0,0 +1,5 @@
if ( requireNamespace("tinytest", quietly=TRUE) ){
tinytest::test_package("htmlunit")
}
Loading…
Cancel
Save