Bob Rudis
8 years ago
10 changed files with 323 additions and 45 deletions
@ -1,5 +1,16 @@ |
|||||
# Generated by roxygen2: do not edit by hand |
# Generated by roxygen2: do not edit by hand |
||||
|
|
||||
|
S3method(tidy_html,HTMLInternalDocument) |
||||
|
S3method(tidy_html,character) |
||||
|
S3method(tidy_html,default) |
||||
|
S3method(tidy_html,raw) |
||||
|
S3method(tidy_html,xml_document) |
||||
|
S3method(tidy_xml,XMLInternalDocument) |
||||
|
S3method(tidy_xml,character) |
||||
|
S3method(tidy_xml,default) |
||||
|
S3method(tidy_xml,raw) |
||||
|
S3method(tidy_xml,xml_document) |
||||
export(tidy_html) |
export(tidy_html) |
||||
|
export(tidy_xml) |
||||
importFrom(Rcpp,sourceCpp) |
importFrom(Rcpp,sourceCpp) |
||||
useDynLib(htmltidy) |
useDynLib(htmltidy) |
||||
|
@ -0,0 +1,64 @@ |
|||||
|
#' Tidy XML Documents |
||||
|
#' |
||||
|
#' Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly}, |
||||
|
#' \code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags}, |
||||
|
#' \code{TidyDoctype}, \code{TidyDropEmptyElems}, \code{TidyDropEmptyParas}, |
||||
|
#' \code{TidyFixBackslash}, \code{TidyFixComments}, \code{TidyHideComments}, |
||||
|
#' \code{TidyHtmlOut}, \code{TidyIndentContent}, \code{TidyIndentSpaces}, |
||||
|
#' \code{TidyJoinClasses}, \code{TidyJoinStyles}, \code{TidyLogicalEmphasis}, |
||||
|
#' \code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark}, |
||||
|
#' \code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize}, |
||||
|
#' \code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000}, |
||||
|
#' \code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut}, |
||||
|
#' \code{TidyXmlTags}. |
||||
|
#' |
||||
|
#' @param content atomic character or raw vector of content to tidy |
||||
|
#' @param options named list of options |
||||
|
#' @return tidied XML content |
||||
|
#' @references \url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h} |
||||
|
#' (for definitions of the options supported above). |
||||
|
#' @export |
||||
|
tidy_xml <- function(content, options=list(TidyXmlOut=TRUE)) { |
||||
|
UseMethod("tidy_xml") |
||||
|
} |
||||
|
|
||||
|
#' @export |
||||
|
#' @rdname tidy_xml |
||||
|
tidy_xml.default <- function(content, options=list(TidyXmlOut=TRUE)) { |
||||
|
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', |
||||
|
source=content, options=options) |
||||
|
} |
||||
|
|
||||
|
#' @export |
||||
|
#' @rdname tidy_xml |
||||
|
tidy_xml.character <- function(content, options=list(TidyXmlOut=TRUE)) { |
||||
|
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', |
||||
|
source=content, options=options) |
||||
|
} |
||||
|
|
||||
|
#' @export |
||||
|
#' @rdname tidy_xml |
||||
|
tidy_xml.raw <- function(content, options=list(TidyXmlOut=TRUE)) { |
||||
|
content <- iconv(readBin(content, character()), to="UTF-8") |
||||
|
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', |
||||
|
source=content, options=options) |
||||
|
charToRaw(out) |
||||
|
} |
||||
|
|
||||
|
#' @export |
||||
|
#' @rdname tidy_xml |
||||
|
tidy_xml.xml_document <- function(content, options=list(TidyXmlOut=TRUE)) { |
||||
|
content <- toString(content) |
||||
|
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', |
||||
|
source=content, options=options) |
||||
|
xml2::read_xml(out) |
||||
|
} |
||||
|
|
||||
|
#' @export |
||||
|
#' @rdname tidy_xml |
||||
|
tidy_xml.XMLInternalDocument <- function(content, options=list(TidyXmlOut=TRUE)) { |
||||
|
content <- saveXML(content) |
||||
|
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', |
||||
|
source=content, options=options) |
||||
|
XML::xmlParse(out) |
||||
|
} |
@ -0,0 +1,50 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/xml.r |
||||
|
\name{tidy_xml} |
||||
|
\alias{tidy_xml} |
||||
|
\alias{tidy_xml.XMLInternalDocument} |
||||
|
\alias{tidy_xml.character} |
||||
|
\alias{tidy_xml.default} |
||||
|
\alias{tidy_xml.raw} |
||||
|
\alias{tidy_xml.xml_document} |
||||
|
\title{Tidy XML Documents} |
||||
|
\usage{ |
||||
|
tidy_xml(content, options = list(TidyXmlOut = TRUE)) |
||||
|
|
||||
|
\method{tidy_xml}{default}(content, options = list(TidyXmlOut = TRUE)) |
||||
|
|
||||
|
\method{tidy_xml}{character}(content, options = list(TidyXmlOut = TRUE)) |
||||
|
|
||||
|
\method{tidy_xml}{raw}(content, options = list(TidyXmlOut = TRUE)) |
||||
|
|
||||
|
\method{tidy_xml}{xml_document}(content, options = list(TidyXmlOut = TRUE)) |
||||
|
|
||||
|
\method{tidy_xml}{XMLInternalDocument}(content, options = list(TidyXmlOut = |
||||
|
TRUE)) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{content}{atomic character or raw vector of content to tidy} |
||||
|
|
||||
|
\item{options}{named list of options} |
||||
|
} |
||||
|
\value{ |
||||
|
tidied XML content |
||||
|
} |
||||
|
\description{ |
||||
|
Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly}, |
||||
|
\code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags}, |
||||
|
\code{TidyDoctype}, \code{TidyDropEmptyElems}, \code{TidyDropEmptyParas}, |
||||
|
\code{TidyFixBackslash}, \code{TidyFixComments}, \code{TidyHideComments}, |
||||
|
\code{TidyHtmlOut}, \code{TidyIndentContent}, \code{TidyIndentSpaces}, |
||||
|
\code{TidyJoinClasses}, \code{TidyJoinStyles}, \code{TidyLogicalEmphasis}, |
||||
|
\code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark}, |
||||
|
\code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize}, |
||||
|
\code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000}, |
||||
|
\code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut}, |
||||
|
\code{TidyXmlTags}. |
||||
|
} |
||||
|
\references{ |
||||
|
\url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h} |
||||
|
(for definitions of the options supported above). |
||||
|
} |
||||
|
|
Loading…
Reference in new issue