From 51652bb0c99e7b603340cbc883da4fa049cc5d57 Mon Sep 17 00:00:00 2001 From: boB Rudis Date: Sat, 10 Sep 2016 15:06:28 -0400 Subject: [PATCH] pre-CRAN flight check --- R/tidy.r | 11 +++++++---- README.Rmd | 2 +- README.md | 4 ++-- man/tidy_html.Rd | 5 +++-- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/R/tidy.r b/R/tidy.r index 6944e4b..5d8fe7a 100644 --- a/R/tidy.r +++ b/R/tidy.r @@ -69,10 +69,11 @@ #' res <- GET("http://rud.is/test/untidy.html") #' #' # look at the original, un-tidy source -#' cat(content(res, as="text")) +#' cat(content(res, as="text", encoding="UTF-8")) #' #' # see the tidied version -#' cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200))) +#' cat(tidy_html(content(res, as="text", encoding="UTF-8"), +#' list(TidyDocType="html5", TidyWrapLen=200))) #' #' # but, you could also just do: #' cat(tidy_html(url("http://rud.is/test/untidy.html"))) @@ -128,8 +129,10 @@ tidy_html.HTMLInternalDocument <- function(content, options=list(TidyXhtmlOut=TR #' @rdname tidy_html tidy_html.connection <- function(content, options=list(TidyXhtmlOut=TRUE)) { - content <- paste0(readLines(content), collapse="") + html <- paste0(readLines(content, warn=FALSE), collapse="") + close(content) + .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', - source=content, options=options) + source=html, options=options) } diff --git a/README.Rmd b/README.Rmd index d78c011..5cec1e2 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,7 +1,7 @@ --- output: rmarkdown::github_document --- -[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/htmltidy)](https://cran.r-project.org/package=htmltidy) +[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) diff --git a/README.md b/README.md index baa3610..c5d5d21 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/htmltidy)](https://cran.r-project.org/package=htmltidy) +[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) `htmltidy` — Clean up gnarly HTML/XHTML @@ -197,7 +197,7 @@ sum(map_int(book, nchar)) ## [1] 207501 system.time(tidy_book <- tidy_html(book)) ## user system elapsed -## 0.022 0.001 0.022 +## 0.021 0.001 0.023 ``` (It's usually between 20 & 25 milliseconds to process those 202 kilobytes of HTML.) Not too shabby. diff --git a/man/tidy_html.Rd b/man/tidy_html.Rd index dca275e..13783c2 100644 --- a/man/tidy_html.Rd +++ b/man/tidy_html.Rd @@ -96,10 +96,11 @@ library(httr) res <- GET("http://rud.is/test/untidy.html") # look at the original, un-tidy source -cat(content(res, as="text")) +cat(content(res, as="text", encoding="UTF-8")) # see the tidied version -cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200))) +cat(tidy_html(content(res, as="text", encoding="UTF-8"), + list(TidyDocType="html5", TidyWrapLen=200))) # but, you could also just do: cat(tidy_html(url("http://rud.is/test/untidy.html")))