From a4388921c4bf2b59a0b594ce4d067030e2a9d150 Mon Sep 17 00:00:00 2001 From: Bob Rudis Date: Sun, 11 Sep 2016 09:15:52 -0400 Subject: [PATCH] Updated documentation & README --- R/tidy.r | 3 ++- README.md | 2 +- man/tidy_html.Rd | 3 ++- src/htmltidy.cpp | 15 +++++++++++---- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/R/tidy.r b/R/tidy.r index 3e4b651..de5350f 100644 --- a/R/tidy.r +++ b/R/tidy.r @@ -45,7 +45,8 @@ #' @param verbose output document errors? (default: \code{FALSE}) #' @note If document parsing errors are severe enough, \code{tidy_html()} will not be able #' to clean the document and will display the errors (this output can be captured with -#' \code{sink()} or \code{capture.output()}) along with a warning and return \code{NA}. +#' \code{sink()} or \code{capture.output()}) along with a warning and return a "best effort" +#' cleaned version of the document. #' @return Tidied HTML/XHTML content. The object type will be the same as that of the input type #' except when it is a \code{connection}, then a character vector will be returned. #' @references \url{http://api.html-tidy.org/tidy/quickref_5.1.25.html} & diff --git a/README.md b/README.md index 8e62636..74b771d 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,7 @@ sum(map_int(book, nchar)) ## [1] 207501 system.time(tidy_book <- tidy_html(book)) ## user system elapsed -## 0.021 0.001 0.023 +## 0.022 0.002 0.024 ``` (It's usually between 20 & 25 milliseconds to process those 202 kilobytes of HTML.) Not too shabby. diff --git a/man/tidy_html.Rd b/man/tidy_html.Rd index 842953f..f5e3b58 100644 --- a/man/tidy_html.Rd +++ b/man/tidy_html.Rd @@ -86,7 +86,8 @@ It may also be advantageous to remove all comments with \code{TidyHideComments}. \note{ If document parsing errors are severe enough, \code{tidy_html()} will not be able to clean the document and will display the errors (this output can be captured with - \code{sink()} or \code{capture.output()}) along with a warning and return \code{NULL}. + \code{sink()} or \code{capture.output()}) along with a warning and return a "best effort" + cleaned version of the document. } \examples{ opts <- list( diff --git a/src/htmltidy.cpp b/src/htmltidy.cpp index ef0ac22..5ebc3a5 100644 --- a/src/htmltidy.cpp +++ b/src/htmltidy.cpp @@ -176,6 +176,9 @@ Rcpp::CharacterVector tidy_html_int(std::string source, Rcpp::List options, if (ok == no) Rcpp::stop("Error setting TidyHTML options"); } + ok = tidyOptSetBool(tdoc, TidyForceOutput, yes); + if (ok == no) Rcpp::stop("Error setting TidyHTML options"); + rc = tidySetErrorBuffer(tdoc, &errbuf); max_rc = (rc > max_rc) ? rc : max_rc; @@ -210,10 +213,14 @@ Rcpp::CharacterVector tidy_html_int(std::string source, Rcpp::List options, show_errors = true; } - if (show_errors & (errbuf.allocated > 0)) { - Rcpp::Rcout << std::string(reinterpret_cast(errbuf.bp)) << std::endl; + if (max_rc > 1) show_errors = true; + + if (show_errors) { + if (errbuf.allocated > 0) { + Rcpp::Rcout << std::string(reinterpret_cast(errbuf.bp)) << std::endl; + } if (max_rc > 1) { - Rcpp::warning("\nSevere errors were generated during document evaluation.\nReturing original document"); + Rcpp::warning("\nSevere errors were generated during document evaluation.\n"); } } @@ -222,6 +229,6 @@ Rcpp::CharacterVector tidy_html_int(std::string source, Rcpp::List options, tidyRelease(tdoc); - return((max_rc > 1) ? NA_STRING : Rcpp::wrap(ret)); + return(Rcpp::wrap(ret)); }