diff --git a/.Rbuildignore b/.Rbuildignore index e7ccc30..d94455e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,4 +4,4 @@ ^README-.*\.png$ ^\.travis\.yml$ ^CONDUCT\.md$ -^README\.md$ +^README\.html$ diff --git a/DESCRIPTION b/DESCRIPTION index 24bb684..c2fc60f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: htmltidy -Title: Clean Up Gnarly HTML and XHTML -Version: 0.2.0.9000 +Title: Clean Up or Pretty Print Gnarly HTML and XHTML +Version: 0.2.0 Authors@R: c( person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre")), person("Dave", "Dave", email = "dsr@w3.org", role = c("ctb", "aut"), @@ -9,14 +9,21 @@ Authors@R: c( Maintainer: Bob Rudis Description: HTML documents can be beautiful and pristine. They can also be wretched, evil, malformed demon-spawn. Now, you can tidy up that HTML and XHTML - before processing it with your favorite angle-bracket crunching tools. + before processing it with your favorite angle-bracket crunching tools, going beyond + the limited tidying that 'libxml2' affords in the 'XML' and 'xml2' packages and + taming even the ugliest HTML code generated by the likes of Google Docs and Microsoft + Word. It's also possible to use the functions provided to format or "pretty print" + HTML content as it is being tidied. +Copyright: ile inst/COPYRIGHTS Depends: R (>= 3.2.0) -License: AGPL + file LICENSE +License: AGPL LazyData: true NeedsCompilation: yes Suggests: - testthat + testthat, + httr, + rvest LinkingTo: Rcpp Imports: Rcpp, diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 5b085be..0000000 --- a/LICENSE +++ /dev/null @@ -1,2 +0,0 @@ -YEAR: 2016 -COPYRIGHT HOLDER: Bob Rudis diff --git a/NAMESPACE b/NAMESPACE index 8dba643..76b2f65 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(tidy_html,HTMLInternalDocument) S3method(tidy_html,character) +S3method(tidy_html,connection) S3method(tidy_html,default) S3method(tidy_html,raw) S3method(tidy_html,xml_document) diff --git a/NEWS.md b/NEWS.md index 856f365..a7f07cb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ * Bundled tidy-html5 library with the package * Windows compatibility * Options handling +* Enabled generics * Modified tests diff --git a/R/htmltidy-package.r b/R/htmltidy-package.r index ab68c8a..4304c8b 100644 --- a/R/htmltidy-package.r +++ b/R/htmltidy-package.r @@ -1,8 +1,12 @@ -#' Clean Up Gnarly HTML/XML +#' Clean Up or Pretty Print Gnarly HTML and XHTML #' #' HTML documents can be beautiful and pristine. They can also be -#' wretched, evil, malformed hellspawn. Now, you can tidy up that HTML and XML before -#' processing it with your favorite angle-bracket parsing tools. +#' wretched, evil, malformed demon-spawn. Now, you can tidy up that HTML and XHTML +#' before processing it with your favorite angle-bracket crunching tools, going beyond +#' the limited tidying that 'libxml2' affords in the 'XML' and 'xml2' packages and +#' taming even the ugliest HTML code generated by the likes of Google Docs and Microsoft +#' Word. It's also possible to use the functions provided to format or "pretty print" +#' HTML content as it is being tidied. #' #' @name htmltidy #' @docType package diff --git a/R/tidy.r b/R/tidy.r index f857395..6944e4b 100644 --- a/R/tidy.r +++ b/R/tidy.r @@ -1,4 +1,4 @@ -#' Tidy HTML/XHTML Documents +#' Tidy or "Pretty Print" HTML/XHTML Documents #' #' Pass in HTML content as either plain or raw text or parsed objects (either with the #' \code{XML} or \code{xml2} packages) along with an options list that specifies how @@ -42,7 +42,8 @@ #' @param content accepts a character vector, raw vector or parsed content from the \code{xml2} #' or \code{XML} packages. #' @param options named list of options -#' @return Tidied HTML/XHTML content. The object type will be the same as that of the input type. +#' @return Tidied HTML/XHTML content. The object type will be the same as that of the input type +#' except when it is a \code{connection}, then a character vector will be returned. #' @references \url{http://api.html-tidy.org/tidy/quickref_5.1.25.html} & #' \url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h} #' for definitions of the options supported above and \url{https://www.w3.org/People/Raggett/tidy/} @@ -63,6 +64,18 @@ #' collapse="") #' #' cat(tidy_html(txt, option=opts)) +#' +#' library(httr) +#' res <- GET("http://rud.is/test/untidy.html") +#' +#' # look at the original, un-tidy source +#' cat(content(res, as="text")) +#' +#' # see the tidied version +#' cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200))) +#' +#' # but, you could also just do: +#' cat(tidy_html(url("http://rud.is/test/untidy.html"))) tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE)) { UseMethod("tidy_html") } @@ -70,7 +83,7 @@ tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE)) { #' @export #' @rdname tidy_html tidy_html.default <- function(content, options=list(TidyXhtmlOut=TRUE)) { - content <- content[1] + content <- paste0(content, collapse="") .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', source=content, options=options) } @@ -78,7 +91,7 @@ tidy_html.default <- function(content, options=list(TidyXhtmlOut=TRUE)) { #' @export #' @rdname tidy_html tidy_html.character <- function(content, options=list(TidyXhtmlOut=TRUE)) { - content <- content[1] + content <- paste0(content, collapse="") .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', source=content, options=options) } @@ -110,3 +123,13 @@ tidy_html.HTMLInternalDocument <- function(content, options=list(TidyXhtmlOut=TR source=content, options=options) XML::htmlParse(out) } + +#' @export +#' @rdname tidy_html +tidy_html.connection <- function(content, options=list(TidyXhtmlOut=TRUE)) { + + content <- paste0(readLines(content), collapse="") + .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', + source=content, options=options) + +} diff --git a/README.Rmd b/README.Rmd index f16b51c..d78c011 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,14 +1,14 @@ --- output: rmarkdown::github_document --- -[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) +[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/htmltidy)](https://cran.r-project.org/package=htmltidy) ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, - comment = "#>", + comment = "##", message = FALSE, warning = FALSE, error = FALSE, @@ -25,7 +25,7 @@ It relies on a locally included version of [`libtidy`](http://www.html-tidy.org/ The following functions are implemented: -- `tidy_html` : Clean up gnarly HTML/XHTML +- `tidy_html` : Tidy or "Pretty Print" HTML/XHTML Documents ### Installation @@ -48,18 +48,46 @@ packageVersion("htmltidy") library(XML) library(xml2) library(httr) +library(purrr) +``` + +This is really "un-tidy" content: + +```{r message=FALSE, warning=FALSE} +res <- GET("http://rud.is/test/untidy.html") +cat(content(res, as="text")) +``` -res <- GET("http://rud.is") +Let's see what `tidy_html()` does to it: -head(tidy_html(res$content), 256) +```{r message=FALSE, warning=FALSE} +cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200))) +``` -head(tidy_html(content(res, as="raw")), 256) +NOTE: you could also just have done: -(class(tidy_html(content(res, as="text", encoding="UTF-8")))) # output is too long to show +```{r message=FALSE, warning=FALSE} +cat(tidy_html(url("http://rud.is/test/untidy.html"), + list(TidyDocType="html5", TidyWrapLen=200))) +``` -tidy_html(content(res, as="parsed", encoding="UTF-8")) # same as tidy_html(read_html("http://rud.is")) +You'll see that this differs substantially from the mangling `libxml2` does (via `read_html()`): -(class(tidy_html(htmlParse("http://rud.is")))) # output is too long to show +```{r message=FALSE, warning=FALSE} +pg <- read_html("http://rud.is/test/untidy.html") +cat(toString(pg)) +``` + +It can also deal with "raw" and parsed objects: + +```{r message=FALSE, warning=FALSE} +tidy_html(content(res, as="raw")) + +tidy_html(content(res, as="text", encoding="UTF-8")) + +tidy_html(content(res, as="parsed", encoding="UTF-8")) + +tidy_html(htmlParse("http://rud.is/test/untidy.html")) ``` ### Testing Options @@ -91,6 +119,18 @@ cat(tidy_html(txt, option=opts)) ``` +But, you're probably better off running it on plain HTML source. + +Since it's C/C++-backed, it's pretty fast: + +```{r message=FALSE, warning=FALSE} +book <- readLines("http://singlepageappbook.com/single-page.html") +sum(map_int(book, nchar)) +system.time(tidy_book <- tidy_html(book)) +``` + +(It's usually between 20 & 25 milliseconds to process those 202 kilobytes of HTML.) Not too shabby. + ### Code of Conduct Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). diff --git a/README.md b/README.md index ae6e4ed..baa3610 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) +[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/htmltidy)](https://cran.r-project.org/package=htmltidy) `htmltidy` — Clean up gnarly HTML/XHTML @@ -10,7 +10,7 @@ It relies on a locally included version of [`libtidy`](http://www.html-tidy.org/ The following functions are implemented: -- `tidy_html` : Clean up gnarly HTML/XHTML +- `tidy_html` : Tidy or "Pretty Print" HTML/XHTML Documents ### Installation @@ -25,43 +25,125 @@ library(htmltidy) # current verison packageVersion("htmltidy") -#> [1] '0.2.0.9000' +## [1] '0.2.0' library(XML) library(xml2) library(httr) +library(purrr) +``` + +This is really "un-tidy" content: -res <- GET("http://rud.is") - -head(tidy_html(res$content), 256) -#> [1] 3c 21 44 4f 43 54 59 50 45 20 68 74 6d 6c 20 50 55 42 4c 49 43 20 22 2d 2f 2f 57 33 43 2f 2f 44 54 44 20 58 48 54 -#> [39] 4d 4c 20 31 2e 30 20 54 72 61 6e 73 69 74 69 6f 6e 61 6c 2f 2f 45 4e 22 0a 20 20 20 20 22 68 74 74 70 3a 2f 2f 77 -#> [77] 77 77 2e 77 33 2e 6f 72 67 2f 54 52 2f 78 68 74 6d 6c 31 2f 44 54 44 2f 78 68 74 6d 6c 31 2d 74 72 61 6e 73 69 74 -#> [115] 69 6f 6e 61 6c 2e 64 74 64 22 3e 0a 3c 68 74 6d 6c 20 78 6d 6c 6e 73 3d 22 68 74 74 70 3a 2f 2f 77 77 77 2e 77 33 -#> [153] 2e 6f 72 67 2f 31 39 39 39 2f 78 68 74 6d 6c 22 3e 0a 3c 68 65 61 64 3e 0a 3c 6d 65 74 61 20 6e 61 6d 65 3d 22 67 -#> [191] 65 6e 65 72 61 74 6f 72 22 20 63 6f 6e 74 65 6e 74 3d 0a 22 48 54 4d 4c 20 54 69 64 79 20 66 6f 72 20 48 54 4d 4c -#> [229] 35 20 66 6f 72 20 52 20 76 65 72 73 69 6f 6e 20 35 2e 30 2e 30 22 20 2f 3e 0a 3c 6d - -head(tidy_html(content(res, as="raw")), 256) -#> [1] 3c 21 44 4f 43 54 59 50 45 20 68 74 6d 6c 20 50 55 42 4c 49 43 20 22 2d 2f 2f 57 33 43 2f 2f 44 54 44 20 58 48 54 -#> [39] 4d 4c 20 31 2e 30 20 54 72 61 6e 73 69 74 69 6f 6e 61 6c 2f 2f 45 4e 22 0a 20 20 20 20 22 68 74 74 70 3a 2f 2f 77 -#> [77] 77 77 2e 77 33 2e 6f 72 67 2f 54 52 2f 78 68 74 6d 6c 31 2f 44 54 44 2f 78 68 74 6d 6c 31 2d 74 72 61 6e 73 69 74 -#> [115] 69 6f 6e 61 6c 2e 64 74 64 22 3e 0a 3c 68 74 6d 6c 20 78 6d 6c 6e 73 3d 22 68 74 74 70 3a 2f 2f 77 77 77 2e 77 33 -#> [153] 2e 6f 72 67 2f 31 39 39 39 2f 78 68 74 6d 6c 22 3e 0a 3c 68 65 61 64 3e 0a 3c 6d 65 74 61 20 6e 61 6d 65 3d 22 67 -#> [191] 65 6e 65 72 61 74 6f 72 22 20 63 6f 6e 74 65 6e 74 3d 0a 22 48 54 4d 4c 20 54 69 64 79 20 66 6f 72 20 48 54 4d 4c -#> [229] 35 20 66 6f 72 20 52 20 76 65 72 73 69 6f 6e 20 35 2e 30 2e 30 22 20 2f 3e 0a 3c 6d - -(class(tidy_html(content(res, as="text", encoding="UTF-8")))) # output is too long to show -#> [1] "character" - -tidy_html(content(res, as="parsed", encoding="UTF-8")) # same as tidy_html(read_html("http://rud.is")) -#> {xml_document} -#> -#> [1] \n \n Welcome to rud.is.

\n> You are in a maze of twisty little passages ... - -(class(tidy_html(htmlParse("http://rud.is")))) # output is too long to show -#> [1] "HTMLInternalDocument" "HTMLInternalDocument" "XMLInternalDocument" "XMLAbstractDocument" +``` r +res <- GET("http://rud.is/test/untidy.html") +cat(content(res, as="text")) +## +## +## +## +## This is some really poorly formatted HTML +## +## as is this portion
+``` + +Let's see what `tidy_html()` does to it: + +``` r +cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200))) +## +## +## +## +## +## +## +## +## This is some really poorly formatted HTML as is this portion +##
+## +## +``` + +NOTE: you could also just have done: + +``` r +cat(tidy_html(url("http://rud.is/test/untidy.html"), + list(TidyDocType="html5", TidyWrapLen=200))) +## +## +## +## +## +## +## +## +## This is some really poorly formatted HTMLas is this portion +##
+## +## +``` + +You'll see that this differs substantially from the mangling `libxml2` does (via `read_html()`): + +``` r +pg <- read_html("http://rud.is/test/untidy.html") +cat(toString(pg)) +## +## +## +## This is some really poorly formatted HTML +## +## as is this portion
+``` + +It can also deal with "raw" and parsed objects: + +``` r +tidy_html(content(res, as="raw")) +## [1] 3c 21 44 4f 43 54 59 50 45 20 68 74 6d 6c 3e 0a 3c 68 74 6d 6c 20 78 6d 6c 6e 73 3d 22 68 74 74 70 3a 2f 2f 77 77 +## [39] 77 2e 77 33 2e 6f 72 67 2f 31 39 39 39 2f 78 68 74 6d 6c 22 3e 0a 3c 68 65 61 64 3e 0a 3c 6d 65 74 61 20 6e 61 6d +## [77] 65 3d 22 67 65 6e 65 72 61 74 6f 72 22 20 63 6f 6e 74 65 6e 74 3d 0a 22 48 54 4d 4c 20 54 69 64 79 20 66 6f 72 20 +## [115] 48 54 4d 4c 35 20 66 6f 72 20 52 20 76 65 72 73 69 6f 6e 20 35 2e 30 2e 30 22 20 2f 3e 0a 3c 74 69 74 6c 65 3e 3c +## [153] 2f 74 69 74 6c 65 3e 0a 3c 2f 68 65 61 64 3e 0a 3c 62 6f 64 79 3e 0a 3c 2f 62 6f 64 79 3e 0a 3c 2f 68 74 6d 6c 3e +## [191] 0a + +tidy_html(content(res, as="text", encoding="UTF-8")) +## [1] "\n\n\n\n\n\n\n\nThis is some really poorly formatted HTML as is this\nportion\n
\n\n\n" + +tidy_html(content(res, as="parsed", encoding="UTF-8")) +## {xml_document} +## +## [1] \n \n \nThis is some really poorly formatted HTML as is this\nportion\n + +tidy_html(htmlParse("http://rud.is/test/untidy.html")) +## +## +## +## +## +## +## +## +## This is some really poorly formatted HTML as is this +## portion +##
+## +## +## ``` ### Testing Options @@ -75,7 +157,7 @@ opts <- list(TidyDocType="html5", TidyWrapLen=200) txt <- " - + @@ -90,21 +172,36 @@ txt <- " " cat(tidy_html(txt, option=opts)) -#> -#> -#> -#> -#> -#> -#> -#> -#>

Test

-#> -#> +## +## +## +## +## +## +## +## +##

Test

+## +## ``` +But, you're probably better off running it on plain HTML source. + +Since it's C/C++-backed, it's pretty fast: + +``` r +book <- readLines("http://singlepageappbook.com/single-page.html") +sum(map_int(book, nchar)) +## [1] 207501 +system.time(tidy_book <- tidy_html(book)) +## user system elapsed +## 0.022 0.001 0.022 +``` + +(It's usually between 20 & 25 milliseconds to process those 202 kilobytes of HTML.) Not too shabby. + ### Code of Conduct Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. diff --git a/inst/COPYRIGHTS b/inst/COPYRIGHTS new file mode 100644 index 0000000..57bf1c8 --- /dev/null +++ b/inst/COPYRIGHTS @@ -0,0 +1,22 @@ +All R source code and source file src/htmltidy.cpp are released under the GNU AGPL license. + +As per https://github.com/htacg/tidy-html5/blob/master/README/LICENSE.md, libtidy source +code is: + +"Copyright (c) 1998-2016 World Wide Web Consortium (Massachusetts Institute of +Technology, European Research Consortium for Informatics and Mathematics, Keio University). +All Rights Reserved. + +Additional contributions (c) 2001-2016 University of Toronto, Terry Teague, @geoffmcl, +HTACG, and others."" + +The authors of the libtidy sources also used other source code that is licensed GPL-2. + +All licenses in the included source files have been left intact. As required by the +libtidy copyright notice, the following files are noted as being modified for use in +this package: + +- src/alloc.cpp +- src/sprtf.cpp + +Both of those files have been marked as modified in the license copyright header. diff --git a/man/htmltidy.Rd b/man/htmltidy.Rd index 932302f..374f75b 100644 --- a/man/htmltidy.Rd +++ b/man/htmltidy.Rd @@ -4,11 +4,15 @@ \name{htmltidy} \alias{htmltidy} \alias{htmltidy-package} -\title{Clean Up Gnarly HTML/XML} +\title{Clean Up or Pretty Print Gnarly HTML and XHTML} \description{ HTML documents can be beautiful and pristine. They can also be -wretched, evil, malformed hellspawn. Now, you can tidy up that HTML and XML before -processing it with your favorite angle-bracket parsing tools. +wretched, evil, malformed demon-spawn. Now, you can tidy up that HTML and XHTML +before processing it with your favorite angle-bracket crunching tools, going beyond +the limited tidying that 'libxml2' affords in the 'XML' and 'xml2' packages and +taming even the ugliest HTML code generated by the likes of Google Docs and Microsoft +Word. It's also possible to use the functions provided to format or "pretty print" +HTML content as it is being tidied. } \author{ Bob Rudis (bob@rud.is) diff --git a/man/tidy_html.Rd b/man/tidy_html.Rd index 95b2ad3..dca275e 100644 --- a/man/tidy_html.Rd +++ b/man/tidy_html.Rd @@ -4,10 +4,11 @@ \alias{tidy_html} \alias{tidy_html.HTMLInternalDocument} \alias{tidy_html.character} +\alias{tidy_html.connection} \alias{tidy_html.default} \alias{tidy_html.raw} \alias{tidy_html.xml_document} -\title{Tidy HTML/XHTML Documents} +\title{Tidy or "Pretty Print" HTML/XHTML Documents} \usage{ tidy_html(content, options = list(TidyXhtmlOut = TRUE)) @@ -21,6 +22,8 @@ tidy_html(content, options = list(TidyXhtmlOut = TRUE)) \method{tidy_html}{HTMLInternalDocument}(content, options = list(TidyXhtmlOut = TRUE)) + +\method{tidy_html}{connection}(content, options = list(TidyXhtmlOut = TRUE)) } \arguments{ \item{content}{accepts a character vector, raw vector or parsed content from the \code{xml2} @@ -29,7 +32,8 @@ or \code{XML} packages.} \item{options}{named list of options} } \value{ -Tidied HTML/XHTML content. The object type will be the same as that of the input type. +Tidied HTML/XHTML content. The object type will be the same as that of the input type + except when it is a \code{connection}, then a character vector will be returned. } \description{ Pass in HTML content as either plain or raw text or parsed objects (either with the @@ -87,6 +91,18 @@ txt <- paste0( collapse="") cat(tidy_html(txt, option=opts)) + +library(httr) +res <- GET("http://rud.is/test/untidy.html") + +# look at the original, un-tidy source +cat(content(res, as="text")) + +# see the tidied version +cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200))) + +# but, you could also just do: +cat(tidy_html(url("http://rud.is/test/untidy.html"))) } \references{ \url{http://api.html-tidy.org/tidy/quickref_5.1.25.html} & diff --git a/src/alloc.cpp b/src/alloc.cpp index 493204a..70fb9c0 100644 --- a/src/alloc.cpp +++ b/src/alloc.cpp @@ -5,6 +5,8 @@ (c) 1998-2006 (W3C) MIT, ERCIM, Keio University See tidy.h for the copyright notice. + Modified 2016-09-09 by Bob Rudis for the R package htmltidy + */ /* #define DEBUG_MEMORY very NOISY extra DEBUG of memory allocation, reallocation and free */ diff --git a/src/sprtf.cpp b/src/sprtf.cpp index 096809b..7dae76a 100644 --- a/src/sprtf.cpp +++ b/src/sprtf.cpp @@ -23,6 +23,9 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US * + * + * Modified 2016-09-09 by Bob Rudis for the R package htmltidy + * */ #ifdef _MSC_VER diff --git a/src/tidylib.c b/src/tidylib.c index cca3685..b4c36dd 100755 --- a/src/tidylib.c +++ b/src/tidylib.c @@ -16,6 +16,8 @@ Created 2001-05-20 by Charles Reitzel + Modified 2016-09-09 by Bob Rudis for the R package htmltidy + */ #include @@ -1073,10 +1075,6 @@ int tidyDocSaveStdout( TidyDocImpl* doc ) { #if !defined(NO_SETMODE_SUPPORT) -#if defined(_WIN32) || defined(OS2_OS) - int oldstdoutmode = -1, oldstderrmode = -1; -#endif - #endif int status = 0; // uint outenc = cfg( doc, TidyOutCharEncoding );