Browse Source

pre-CRAN flight check

master
boB Rudis 8 years ago
parent
commit
51652bb0c9
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 11
      R/tidy.r
  2. 2
      README.Rmd
  3. 4
      README.md
  4. 5
      man/tidy_html.Rd

11
R/tidy.r

@ -69,10 +69,11 @@
#' res <- GET("http://rud.is/test/untidy.html") #' res <- GET("http://rud.is/test/untidy.html")
#' #'
#' # look at the original, un-tidy source #' # look at the original, un-tidy source
#' cat(content(res, as="text")) #' cat(content(res, as="text", encoding="UTF-8"))
#' #'
#' # see the tidied version #' # see the tidied version
#' cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200))) #' cat(tidy_html(content(res, as="text", encoding="UTF-8"),
#' list(TidyDocType="html5", TidyWrapLen=200)))
#' #'
#' # but, you could also just do: #' # but, you could also just do:
#' cat(tidy_html(url("http://rud.is/test/untidy.html"))) #' cat(tidy_html(url("http://rud.is/test/untidy.html")))
@ -128,8 +129,10 @@ tidy_html.HTMLInternalDocument <- function(content, options=list(TidyXhtmlOut=TR
#' @rdname tidy_html #' @rdname tidy_html
tidy_html.connection <- function(content, options=list(TidyXhtmlOut=TRUE)) { tidy_html.connection <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- paste0(readLines(content), collapse="") html <- paste0(readLines(content, warn=FALSE), collapse="")
close(content)
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options) source=html, options=options)
} }

2
README.Rmd

@ -1,7 +1,7 @@
--- ---
output: rmarkdown::github_document output: rmarkdown::github_document
--- ---
[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/htmltidy)](https://cran.r-project.org/package=htmltidy) [![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) <!-- [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/htmltidy)](https://cran.r-project.org/package=htmltidy) -->
<!-- README.md is generated from README.Rmd. Please edit that file --> <!-- README.md is generated from README.Rmd. Please edit that file -->

4
README.md

@ -1,5 +1,5 @@
[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/htmltidy)](https://cran.r-project.org/package=htmltidy) [![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) <!-- [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/htmltidy)](https://cran.r-project.org/package=htmltidy) -->
<!-- README.md is generated from README.Rmd. Please edit that file --> <!-- README.md is generated from README.Rmd. Please edit that file -->
`htmltidy` — Clean up gnarly HTML/XHTML `htmltidy` — Clean up gnarly HTML/XHTML
@ -197,7 +197,7 @@ sum(map_int(book, nchar))
## [1] 207501 ## [1] 207501
system.time(tidy_book <- tidy_html(book)) system.time(tidy_book <- tidy_html(book))
## user system elapsed ## user system elapsed
## 0.022 0.001 0.022 ## 0.021 0.001 0.023
``` ```
(It's usually between 20 & 25 milliseconds to process those 202 kilobytes of HTML.) Not too shabby. (It's usually between 20 & 25 milliseconds to process those 202 kilobytes of HTML.) Not too shabby.

5
man/tidy_html.Rd

@ -96,10 +96,11 @@ library(httr)
res <- GET("http://rud.is/test/untidy.html") res <- GET("http://rud.is/test/untidy.html")
# look at the original, un-tidy source # look at the original, un-tidy source
cat(content(res, as="text")) cat(content(res, as="text", encoding="UTF-8"))
# see the tidied version # see the tidied version
cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200))) cat(tidy_html(content(res, as="text", encoding="UTF-8"),
list(TidyDocType="html5", TidyWrapLen=200)))
# but, you could also just do: # but, you could also just do:
cat(tidy_html(url("http://rud.is/test/untidy.html"))) cat(tidy_html(url("http://rud.is/test/untidy.html")))

Loading…
Cancel
Save