10 changed files with 323 additions and 45 deletions
@ -1,5 +1,16 @@ |
|||
# Generated by roxygen2: do not edit by hand |
|||
|
|||
S3method(tidy_html,HTMLInternalDocument) |
|||
S3method(tidy_html,character) |
|||
S3method(tidy_html,default) |
|||
S3method(tidy_html,raw) |
|||
S3method(tidy_html,xml_document) |
|||
S3method(tidy_xml,XMLInternalDocument) |
|||
S3method(tidy_xml,character) |
|||
S3method(tidy_xml,default) |
|||
S3method(tidy_xml,raw) |
|||
S3method(tidy_xml,xml_document) |
|||
export(tidy_html) |
|||
export(tidy_xml) |
|||
importFrom(Rcpp,sourceCpp) |
|||
useDynLib(htmltidy) |
|||
|
@ -0,0 +1,64 @@ |
|||
#' Tidy XML Documents |
|||
#' |
|||
#' Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly}, |
|||
#' \code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags}, |
|||
#' \code{TidyDoctype}, \code{TidyDropEmptyElems}, \code{TidyDropEmptyParas}, |
|||
#' \code{TidyFixBackslash}, \code{TidyFixComments}, \code{TidyHideComments}, |
|||
#' \code{TidyHtmlOut}, \code{TidyIndentContent}, \code{TidyIndentSpaces}, |
|||
#' \code{TidyJoinClasses}, \code{TidyJoinStyles}, \code{TidyLogicalEmphasis}, |
|||
#' \code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark}, |
|||
#' \code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize}, |
|||
#' \code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000}, |
|||
#' \code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut}, |
|||
#' \code{TidyXmlTags}. |
|||
#' |
|||
#' @param content atomic character or raw vector of content to tidy |
|||
#' @param options named list of options |
|||
#' @return tidied XML content |
|||
#' @references \url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h} |
|||
#' (for definitions of the options supported above). |
|||
#' @export |
|||
tidy_xml <- function(content, options=list(TidyXmlOut=TRUE)) { |
|||
UseMethod("tidy_xml") |
|||
} |
|||
|
|||
#' @export |
|||
#' @rdname tidy_xml |
|||
tidy_xml.default <- function(content, options=list(TidyXmlOut=TRUE)) { |
|||
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', |
|||
source=content, options=options) |
|||
} |
|||
|
|||
#' @export |
|||
#' @rdname tidy_xml |
|||
tidy_xml.character <- function(content, options=list(TidyXmlOut=TRUE)) { |
|||
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', |
|||
source=content, options=options) |
|||
} |
|||
|
|||
#' @export |
|||
#' @rdname tidy_xml |
|||
tidy_xml.raw <- function(content, options=list(TidyXmlOut=TRUE)) { |
|||
content <- iconv(readBin(content, character()), to="UTF-8") |
|||
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', |
|||
source=content, options=options) |
|||
charToRaw(out) |
|||
} |
|||
|
|||
#' @export |
|||
#' @rdname tidy_xml |
|||
tidy_xml.xml_document <- function(content, options=list(TidyXmlOut=TRUE)) { |
|||
content <- toString(content) |
|||
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', |
|||
source=content, options=options) |
|||
xml2::read_xml(out) |
|||
} |
|||
|
|||
#' @export |
|||
#' @rdname tidy_xml |
|||
tidy_xml.XMLInternalDocument <- function(content, options=list(TidyXmlOut=TRUE)) { |
|||
content <- saveXML(content) |
|||
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', |
|||
source=content, options=options) |
|||
XML::xmlParse(out) |
|||
} |
@ -0,0 +1,50 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/xml.r |
|||
\name{tidy_xml} |
|||
\alias{tidy_xml} |
|||
\alias{tidy_xml.XMLInternalDocument} |
|||
\alias{tidy_xml.character} |
|||
\alias{tidy_xml.default} |
|||
\alias{tidy_xml.raw} |
|||
\alias{tidy_xml.xml_document} |
|||
\title{Tidy XML Documents} |
|||
\usage{ |
|||
tidy_xml(content, options = list(TidyXmlOut = TRUE)) |
|||
|
|||
\method{tidy_xml}{default}(content, options = list(TidyXmlOut = TRUE)) |
|||
|
|||
\method{tidy_xml}{character}(content, options = list(TidyXmlOut = TRUE)) |
|||
|
|||
\method{tidy_xml}{raw}(content, options = list(TidyXmlOut = TRUE)) |
|||
|
|||
\method{tidy_xml}{xml_document}(content, options = list(TidyXmlOut = TRUE)) |
|||
|
|||
\method{tidy_xml}{XMLInternalDocument}(content, options = list(TidyXmlOut = |
|||
TRUE)) |
|||
} |
|||
\arguments{ |
|||
\item{content}{atomic character or raw vector of content to tidy} |
|||
|
|||
\item{options}{named list of options} |
|||
} |
|||
\value{ |
|||
tidied XML content |
|||
} |
|||
\description{ |
|||
Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly}, |
|||
\code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags}, |
|||
\code{TidyDoctype}, \code{TidyDropEmptyElems}, \code{TidyDropEmptyParas}, |
|||
\code{TidyFixBackslash}, \code{TidyFixComments}, \code{TidyHideComments}, |
|||
\code{TidyHtmlOut}, \code{TidyIndentContent}, \code{TidyIndentSpaces}, |
|||
\code{TidyJoinClasses}, \code{TidyJoinStyles}, \code{TidyLogicalEmphasis}, |
|||
\code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark}, |
|||
\code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize}, |
|||
\code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000}, |
|||
\code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut}, |
|||
\code{TidyXmlTags}. |
|||
} |
|||
\references{ |
|||
\url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h} |
|||
(for definitions of the options supported above). |
|||
} |
|||
|
Loading…
Reference in new issue