Browse Source

README

master
Bob Rudis 8 years ago
parent
commit
e6fce82ec1
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 17
      DESCRIPTION
  2. 11
      NAMESPACE
  3. 2
      NEWS.md
  4. 53
      R/tidy.r
  5. 64
      R/xml.r
  6. 51
      README.Rmd
  7. 78
      README.md
  8. 23
      man/tidy_html.Rd
  9. 50
      man/tidy_xml.Rd
  10. 19
      src/htmltidy.cpp

17
DESCRIPTION

@ -1,5 +1,5 @@
Package: htmltidy Package: htmltidy
Title: Clean Up Gnarly HTML/XML Title: Clean Up Gnarly HTML and XHTML
Version: 0.2.0.9000 Version: 0.2.0.9000
Authors@R: c( Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre")), person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre")),
@ -7,18 +7,19 @@ Authors@R: c(
comment="HTML Tidy library") comment="HTML Tidy library")
) )
Maintainer: Bob Rudis <bob@rud.is> Maintainer: Bob Rudis <bob@rud.is>
Description: HTML and XML documents can be beautiful and pristine. They can also be Description: HTML documents can be beautiful and pristine. They can also be
wretched, evil, malformed hellspawn. Now, you can tidy up that HTML and XML before wretched, evil, malformed demon-spawn. Now, you can tidy up that HTML and XHTML
processing it with your favorite angle-bracket parsing tools. before processing it with your favorite angle-bracket crunching tools.
Depends: Depends:
R (>= 3.3.0) R (>= 3.2.0)
License: AGPL + file LICENSE License: AGPL + file LICENSE
LazyData: true LazyData: true
NeedsCompilation: yes NeedsCompilation: yes
Suggests: Suggests:
testthat, testthat
xml2
LinkingTo: Rcpp LinkingTo: Rcpp
Imports: Imports:
Rcpp Rcpp,
xml2,
XML
RoxygenNote: 5.0.1 RoxygenNote: 5.0.1

11
NAMESPACE

@ -1,5 +1,16 @@
# Generated by roxygen2: do not edit by hand # Generated by roxygen2: do not edit by hand
S3method(tidy_html,HTMLInternalDocument)
S3method(tidy_html,character)
S3method(tidy_html,default)
S3method(tidy_html,raw)
S3method(tidy_html,xml_document)
S3method(tidy_xml,XMLInternalDocument)
S3method(tidy_xml,character)
S3method(tidy_xml,default)
S3method(tidy_xml,raw)
S3method(tidy_xml,xml_document)
export(tidy_html) export(tidy_html)
export(tidy_xml)
importFrom(Rcpp,sourceCpp) importFrom(Rcpp,sourceCpp)
useDynLib(htmltidy) useDynLib(htmltidy)

2
NEWS.md

@ -1,6 +1,8 @@
# htmltidy 0.2.0.9000 # htmltidy 0.2.0.9000
* Bundled tidy-html5 library with the package * Bundled tidy-html5 library with the package
* Windows compatibility
* Options handling
* Modified tests * Modified tests

53
R/tidy.r

@ -1,4 +1,4 @@
#' Tidy HTML/XML/XHTML Documents #' Tidy HTML/XHTML Documents
#' #'
#' Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly}, #' Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly},
#' \code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags}, #' \code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags},
@ -9,15 +9,58 @@
#' \code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark}, #' \code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark},
#' \code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize}, #' \code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize},
#' \code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000}, #' \code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000},
#' \code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut}, #' \code{TidyWrapLen}, \code{TidyXhtmlOut}
#' \code{TidyXmlTags}.
#' #'
#' @param content atomic character or raw vector of content to tidy #' @param content atomic character or raw vector of content to tidy
#' @param options named list of options #' @param options named list of options
#' @return Atomic character vector of tidy HTML/XML/XHTML content #' @return tidied HTML/XHTML content
#' @references \url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h} #' @references \url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h}
#' (for definitions of the options supported above). #' (for definitions of the options supported above).
#' @export #' @export
tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE)) { tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE)) {
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', source=content, options=options) UseMethod("tidy_html")
}
#' @export
#' @rdname tidy_html
tidy_html.default <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- content[1]
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
}
#' @export
#' @rdname tidy_html
tidy_html.character <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- content[1]
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
}
#' @export
#' @rdname tidy_html
tidy_html.raw <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- content[1]
content <- iconv(readBin(content, character()), to="UTF-8")
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
charToRaw(out)
}
#' @export
#' @rdname tidy_html
tidy_html.xml_document <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- toString(content)
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
xml2::read_html(out)
}
#' @export
#' @rdname tidy_html
tidy_html.HTMLInternalDocument <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- saveXML(content)
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
XML::htmlParse(out)
} }

64
R/xml.r

@ -0,0 +1,64 @@
#' Tidy XML Documents
#'
#' Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly},
#' \code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags},
#' \code{TidyDoctype}, \code{TidyDropEmptyElems}, \code{TidyDropEmptyParas},
#' \code{TidyFixBackslash}, \code{TidyFixComments}, \code{TidyHideComments},
#' \code{TidyHtmlOut}, \code{TidyIndentContent}, \code{TidyIndentSpaces},
#' \code{TidyJoinClasses}, \code{TidyJoinStyles}, \code{TidyLogicalEmphasis},
#' \code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark},
#' \code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize},
#' \code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000},
#' \code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut},
#' \code{TidyXmlTags}.
#'
#' @param content atomic character or raw vector of content to tidy
#' @param options named list of options
#' @return tidied XML content
#' @references \url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h}
#' (for definitions of the options supported above).
#' @export
tidy_xml <- function(content, options=list(TidyXmlOut=TRUE)) {
UseMethod("tidy_xml")
}
#' @export
#' @rdname tidy_xml
tidy_xml.default <- function(content, options=list(TidyXmlOut=TRUE)) {
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
}
#' @export
#' @rdname tidy_xml
tidy_xml.character <- function(content, options=list(TidyXmlOut=TRUE)) {
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
}
#' @export
#' @rdname tidy_xml
tidy_xml.raw <- function(content, options=list(TidyXmlOut=TRUE)) {
content <- iconv(readBin(content, character()), to="UTF-8")
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
charToRaw(out)
}
#' @export
#' @rdname tidy_xml
tidy_xml.xml_document <- function(content, options=list(TidyXmlOut=TRUE)) {
content <- toString(content)
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
xml2::read_xml(out)
}
#' @export
#' @rdname tidy_xml
tidy_xml.XMLInternalDocument <- function(content, options=list(TidyXmlOut=TRUE)) {
content <- saveXML(content)
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
XML::xmlParse(out)
}

51
README.Rmd

@ -17,7 +17,7 @@ knitr::opts_chunk$set(
) )
``` ```
`htmltidy` — Clean up gnarly HTML/XML `htmltidy` — Clean up gnarly HTML/XHTML
Inspired by [this SO question](http://stackoverflow.com/questions/37061873/identify-a-weblink-in-bold-in-r) and because there's a great deal of cruddy HTML out there that needs fixing to use properly when scraping data. Inspired by [this SO question](http://stackoverflow.com/questions/37061873/identify-a-weblink-in-bold-in-r) and because there's a great deal of cruddy HTML out there that needs fixing to use properly when scraping data.
@ -25,7 +25,7 @@ It relies on a locally included version of [`libtidy`](http://www.html-tidy.org/
The following functions are implemented: The following functions are implemented:
- `tidy_html` : Clean up gnarly HTML/XML - `tidy_html` : Clean up gnarly HTML/XHTML
### Installation ### Installation
@ -39,13 +39,56 @@ options(width=120)
### Usage ### Usage
```{r} ```{r message=FALSE, warning=FALSE}
library(htmltidy) library(htmltidy)
# current verison # current verison
packageVersion("htmltidy") packageVersion("htmltidy")
cat(tidy_html("<b><p><a href='http://google.com'>google &gt</a></p></b>")) library(XML)
library(xml2)
library(httr)
res <- GET("http://rud.is")
head(tidy_html(res$content), 256)
head(tidy_html(content(res, as="raw")), 256)
(class(tidy_html(content(res, as="text", encoding="UTF-8")))) # output is too long to show
tidy_html(content(res, as="parsed", encoding="UTF-8")) # same as tidy_html(read_html("http://rud.is"))
(class(tidy_html(htmlParse("http://rud.is")))) # output is too long to show
```
### Testing Options
```{r message=FALSE, warning=FALSE}
opts <- list(TidyDocType="html5",
TidyMakeClean=TRUE,
TidyHideComments=TRUE,
TidyIndentContent=FALSE,
TidyWrapLen=200)
txt <- "<html>
<head>
<style>
p { color: red; }
</style>
<body>
<!-- ===== body ====== -->
<p>Test</p>
</body>
<!--Default Zone
-->
<!--Default Zone End-->
</html>"
cat(tidy_html(txt, option=opts))
``` ```
### Code of Conduct ### Code of Conduct

78
README.md

@ -2,7 +2,7 @@
[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy) [![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/htmltidy.svg?branch=master)](https://travis-ci.org/hrbrmstr/htmltidy)
<!-- README.md is generated from README.Rmd. Please edit that file --> <!-- README.md is generated from README.Rmd. Please edit that file -->
`htmltidy` — Clean up gnarly HTML/XML `htmltidy` — Clean up gnarly HTML/XHTML
Inspired by [this SO question](http://stackoverflow.com/questions/37061873/identify-a-weblink-in-bold-in-r) and because there's a great deal of cruddy HTML out there that needs fixing to use properly when scraping data. Inspired by [this SO question](http://stackoverflow.com/questions/37061873/identify-a-weblink-in-bold-in-r) and because there's a great deal of cruddy HTML out there that needs fixing to use properly when scraping data.
@ -10,7 +10,7 @@ It relies on a locally included version of [`libtidy`](http://www.html-tidy.org/
The following functions are implemented: The following functions are implemented:
- `tidy_html` : Clean up gnarly HTML/XML - `tidy_html` : Clean up gnarly HTML/XHTML
### Installation ### Installation
@ -27,16 +27,80 @@ library(htmltidy)
packageVersion("htmltidy") packageVersion("htmltidy")
#> [1] '0.2.0.9000' #> [1] '0.2.0.9000'
cat(tidy_html("<b><p><a href='http://google.com'>google &gt</a></p></b>")) library(XML)
#> <!DOCTYPE html> library(xml2)
library(httr)
res <- GET("http://rud.is")
head(tidy_html(res$content), 256)
#> [1] 3c 21 44 4f 43 54 59 50 45 20 68 74 6d 6c 20 50 55 42 4c 49 43 20 22 2d 2f 2f 57 33 43 2f 2f 44 54 44 20 58 48 54
#> [39] 4d 4c 20 31 2e 30 20 54 72 61 6e 73 69 74 69 6f 6e 61 6c 2f 2f 45 4e 22 0a 20 20 20 20 22 68 74 74 70 3a 2f 2f 77
#> [77] 77 77 2e 77 33 2e 6f 72 67 2f 54 52 2f 78 68 74 6d 6c 31 2f 44 54 44 2f 78 68 74 6d 6c 31 2d 74 72 61 6e 73 69 74
#> [115] 69 6f 6e 61 6c 2e 64 74 64 22 3e 0a 3c 68 74 6d 6c 20 78 6d 6c 6e 73 3d 22 68 74 74 70 3a 2f 2f 77 77 77 2e 77 33
#> [153] 2e 6f 72 67 2f 31 39 39 39 2f 78 68 74 6d 6c 22 3e 0a 3c 68 65 61 64 3e 0a 3c 6d 65 74 61 20 6e 61 6d 65 3d 22 67
#> [191] 65 6e 65 72 61 74 6f 72 22 20 63 6f 6e 74 65 6e 74 3d 0a 22 48 54 4d 4c 20 54 69 64 79 20 66 6f 72 20 48 54 4d 4c
#> [229] 35 20 66 6f 72 20 52 20 76 65 72 73 69 6f 6e 20 35 2e 30 2e 30 22 20 2f 3e 0a 3c 6d
head(tidy_html(content(res, as="raw")), 256)
#> [1] 3c 21 44 4f 43 54 59 50 45 20 68 74 6d 6c 20 50 55 42 4c 49 43 20 22 2d 2f 2f 57 33 43 2f 2f 44 54 44 20 58 48 54
#> [39] 4d 4c 20 31 2e 30 20 54 72 61 6e 73 69 74 69 6f 6e 61 6c 2f 2f 45 4e 22 0a 20 20 20 20 22 68 74 74 70 3a 2f 2f 77
#> [77] 77 77 2e 77 33 2e 6f 72 67 2f 54 52 2f 78 68 74 6d 6c 31 2f 44 54 44 2f 78 68 74 6d 6c 31 2d 74 72 61 6e 73 69 74
#> [115] 69 6f 6e 61 6c 2e 64 74 64 22 3e 0a 3c 68 74 6d 6c 20 78 6d 6c 6e 73 3d 22 68 74 74 70 3a 2f 2f 77 77 77 2e 77 33
#> [153] 2e 6f 72 67 2f 31 39 39 39 2f 78 68 74 6d 6c 22 3e 0a 3c 68 65 61 64 3e 0a 3c 6d 65 74 61 20 6e 61 6d 65 3d 22 67
#> [191] 65 6e 65 72 61 74 6f 72 22 20 63 6f 6e 74 65 6e 74 3d 0a 22 48 54 4d 4c 20 54 69 64 79 20 66 6f 72 20 48 54 4d 4c
#> [229] 35 20 66 6f 72 20 52 20 76 65 72 73 69 6f 6e 20 35 2e 30 2e 30 22 20 2f 3e 0a 3c 6d
(class(tidy_html(content(res, as="text", encoding="UTF-8")))) # output is too long to show
#> [1] "character"
tidy_html(content(res, as="parsed", encoding="UTF-8")) # same as tidy_html(read_html("http://rud.is"))
#> {xml_document}
#> <html xmlns="http://www.w3.org/1999/xhtml"> #> <html xmlns="http://www.w3.org/1999/xhtml">
#> [1] <head>\n <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0" />\n <meta http-equiv="Conten ...
#> [2] <body>\n<div id="main"><b>Welcome to rud.is.</b><br /><br />\n&gt; <i>You are in a maze of twisty little passages ...
(class(tidy_html(htmlParse("http://rud.is")))) # output is too long to show
#> [1] "HTMLInternalDocument" "HTMLInternalDocument" "XMLInternalDocument" "XMLAbstractDocument"
```
### Testing Options
``` r
opts <- list(TidyDocType="html5",
TidyMakeClean=TRUE,
TidyHideComments=TRUE,
TidyIndentContent=FALSE,
TidyWrapLen=200)
txt <- "<html>
<head>
<style>
p { color: red; }
</style>
<body>
<!-- ===== body ====== -->
<p>Test</p>
</body>
<!--Default Zone
-->
<!--Default Zone End-->
</html>"
cat(tidy_html(txt, option=opts))
#> <!DOCTYPE html>
#> <html>
#> <head> #> <head>
#> <meta name="generator" content= #> <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">
#> "HTML Tidy for HTML5 for R version 5.0.0" /> #> <style>
#> p { color: red; }
#> </style>
#> <title></title> #> <title></title>
#> </head> #> </head>
#> <body> #> <body>
#> <p><b><a href='http://google.com'>google &gt;</a></b></p> #> <p>Test</p>
#> </body> #> </body>
#> </html> #> </html>
``` ```

23
man/tidy_html.Rd

@ -2,9 +2,25 @@
% Please edit documentation in R/tidy.r % Please edit documentation in R/tidy.r
\name{tidy_html} \name{tidy_html}
\alias{tidy_html} \alias{tidy_html}
\title{Tidy HTML/XML/XHTML Documents} \alias{tidy_html.HTMLInternalDocument}
\alias{tidy_html.character}
\alias{tidy_html.default}
\alias{tidy_html.raw}
\alias{tidy_html.xml_document}
\title{Tidy HTML/XHTML Documents}
\usage{ \usage{
tidy_html(content, options = list(TidyXhtmlOut = TRUE)) tidy_html(content, options = list(TidyXhtmlOut = TRUE))
\method{tidy_html}{default}(content, options = list(TidyXhtmlOut = TRUE))
\method{tidy_html}{character}(content, options = list(TidyXhtmlOut = TRUE))
\method{tidy_html}{raw}(content, options = list(TidyXhtmlOut = TRUE))
\method{tidy_html}{xml_document}(content, options = list(TidyXhtmlOut = TRUE))
\method{tidy_html}{HTMLInternalDocument}(content, options = list(TidyXhtmlOut
= TRUE))
} }
\arguments{ \arguments{
\item{content}{atomic character or raw vector of content to tidy} \item{content}{atomic character or raw vector of content to tidy}
@ -12,7 +28,7 @@ tidy_html(content, options = list(TidyXhtmlOut = TRUE))
\item{options}{named list of options} \item{options}{named list of options}
} }
\value{ \value{
Atomic character vector of tidy HTML/XML/XHTML content tidied HTML/XHTML content
} }
\description{ \description{
Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly}, Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly},
@ -24,8 +40,7 @@ Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly},
\code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark}, \code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark},
\code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize}, \code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize},
\code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000}, \code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000},
\code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut}, \code{TidyWrapLen}, \code{TidyXhtmlOut}
\code{TidyXmlTags}.
} }
\references{ \references{
\url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h} \url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h}

50
man/tidy_xml.Rd

@ -0,0 +1,50 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xml.r
\name{tidy_xml}
\alias{tidy_xml}
\alias{tidy_xml.XMLInternalDocument}
\alias{tidy_xml.character}
\alias{tidy_xml.default}
\alias{tidy_xml.raw}
\alias{tidy_xml.xml_document}
\title{Tidy XML Documents}
\usage{
tidy_xml(content, options = list(TidyXmlOut = TRUE))
\method{tidy_xml}{default}(content, options = list(TidyXmlOut = TRUE))
\method{tidy_xml}{character}(content, options = list(TidyXmlOut = TRUE))
\method{tidy_xml}{raw}(content, options = list(TidyXmlOut = TRUE))
\method{tidy_xml}{xml_document}(content, options = list(TidyXmlOut = TRUE))
\method{tidy_xml}{XMLInternalDocument}(content, options = list(TidyXmlOut =
TRUE))
}
\arguments{
\item{content}{atomic character or raw vector of content to tidy}
\item{options}{named list of options}
}
\value{
tidied XML content
}
\description{
Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly},
\code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags},
\code{TidyDoctype}, \code{TidyDropEmptyElems}, \code{TidyDropEmptyParas},
\code{TidyFixBackslash}, \code{TidyFixComments}, \code{TidyHideComments},
\code{TidyHtmlOut}, \code{TidyIndentContent}, \code{TidyIndentSpaces},
\code{TidyJoinClasses}, \code{TidyJoinStyles}, \code{TidyLogicalEmphasis},
\code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark},
\code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize},
\code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000},
\code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut},
\code{TidyXmlTags}.
}
\references{
\url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h}
(for definitions of the options supported above).
}

19
src/htmltidy.cpp

@ -21,31 +21,16 @@ std::string tidy_html_int(std::string source, Rcpp::List options) {
if (ok == no) Rcpp::stop("Error setting TidyHTML options"); if (ok == no) Rcpp::stop("Error setting TidyHTML options");
} }
if (options.containsElementNamed("TidyXmlOut")) {
ok = tidyOptSetBool(tdoc, TidyXmlOut, options["TidyXmlOut"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyHtmlOut")) { if (options.containsElementNamed("TidyHtmlOut")) {
ok = tidyOptSetBool(tdoc, TidyHtmlOut, options["TidyHtmlOut"] ? yes : no); ok = tidyOptSetBool(tdoc, TidyHtmlOut, options["TidyHtmlOut"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options"); if (ok == no) Rcpp::stop("Error setting TidyHTML options");
} }
if (options.containsElementNamed("TidyXmlTags")) {
ok = tidyOptSetBool(tdoc, TidyXmlTags, options["TidyXmlTags"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyOmitOptionalTags")) { if (options.containsElementNamed("TidyOmitOptionalTags")) {
ok = tidyOptSetBool(tdoc, TidyOmitOptionalTags, options["TidyOmitOptionalTags"] ? yes : no); ok = tidyOptSetBool(tdoc, TidyOmitOptionalTags, options["TidyOmitOptionalTags"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options"); if (ok == no) Rcpp::stop("Error setting TidyHTML options");
} }
if (options.containsElementNamed("TidyXmlDecl")) {
ok = tidyOptSetBool(tdoc, TidyXmlDecl, options["TidyXmlDecl"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyBreakBeforeBR")) { if (options.containsElementNamed("TidyBreakBeforeBR")) {
ok = tidyOptSetBool(tdoc, TidyBreakBeforeBR, options["TidyBreakBeforeBR"] ? yes : no); ok = tidyOptSetBool(tdoc, TidyBreakBeforeBR, options["TidyBreakBeforeBR"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options"); if (ok == no) Rcpp::stop("Error setting TidyHTML options");
@ -87,7 +72,7 @@ std::string tidy_html_int(std::string source, Rcpp::List options) {
} }
if (options.containsElementNamed("TidyHideComments")) { if (options.containsElementNamed("TidyHideComments")) {
ok = tidyOptSetBool(tdoc, TidyBodyOnly, options["TidyHideComments"] ? yes : no); ok = tidyOptSetBool(tdoc, TidyHideComments, options["TidyHideComments"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options"); if (ok == no) Rcpp::stop("Error setting TidyHTML options");
} }
@ -137,7 +122,7 @@ std::string tidy_html_int(std::string source, Rcpp::List options) {
} }
if (options.containsElementNamed("TidyMakeClean")) { if (options.containsElementNamed("TidyMakeClean")) {
ok = tidyOptSetValue(tdoc, TidyMakeClean, Rcpp::as<std::string>(options["TidyMakeClean"]).c_str()); ok = tidyOptSetBool(tdoc, TidyMakeClean, options["TidyMakeClean"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options"); if (ok == no) Rcpp::stop("Error setting TidyHTML options");
} }

Loading…
Cancel
Save