You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
64 lines
2.4 KiB
64 lines
2.4 KiB
#' Tidy XML Documents
|
|
#'
|
|
#' Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly},
|
|
#' \code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags},
|
|
#' \code{TidyDoctype}, \code{TidyDropEmptyElems}, \code{TidyDropEmptyParas},
|
|
#' \code{TidyFixBackslash}, \code{TidyFixComments}, \code{TidyHideComments},
|
|
#' \code{TidyHtmlOut}, \code{TidyIndentContent}, \code{TidyIndentSpaces},
|
|
#' \code{TidyJoinClasses}, \code{TidyJoinStyles}, \code{TidyLogicalEmphasis},
|
|
#' \code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark},
|
|
#' \code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize},
|
|
#' \code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000},
|
|
#' \code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut},
|
|
#' \code{TidyXmlTags}.
|
|
#'
|
|
#' @param content atomic character or raw vector of content to tidy
|
|
#' @param options named list of options
|
|
#' @return tidied XML content
|
|
#' @references \url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h}
|
|
#' (for definitions of the options supported above).
|
|
#' @export
|
|
tidy_xml <- function(content, options=list(TidyXmlOut=TRUE)) {
|
|
UseMethod("tidy_xml")
|
|
}
|
|
|
|
#' @export
|
|
#' @rdname tidy_xml
|
|
tidy_xml.default <- function(content, options=list(TidyXmlOut=TRUE)) {
|
|
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
|
|
source=content, options=options)
|
|
}
|
|
|
|
#' @export
|
|
#' @rdname tidy_xml
|
|
tidy_xml.character <- function(content, options=list(TidyXmlOut=TRUE)) {
|
|
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
|
|
source=content, options=options)
|
|
}
|
|
|
|
#' @export
|
|
#' @rdname tidy_xml
|
|
tidy_xml.raw <- function(content, options=list(TidyXmlOut=TRUE)) {
|
|
content <- iconv(readBin(content, character()), to="UTF-8")
|
|
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
|
|
source=content, options=options)
|
|
charToRaw(out)
|
|
}
|
|
|
|
#' @export
|
|
#' @rdname tidy_xml
|
|
tidy_xml.xml_document <- function(content, options=list(TidyXmlOut=TRUE)) {
|
|
content <- toString(content)
|
|
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
|
|
source=content, options=options)
|
|
xml2::read_xml(out)
|
|
}
|
|
|
|
#' @export
|
|
#' @rdname tidy_xml
|
|
tidy_xml.XMLInternalDocument <- function(content, options=list(TidyXmlOut=TRUE)) {
|
|
content <- saveXML(content)
|
|
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
|
|
source=content, options=options)
|
|
XML::xmlParse(out)
|
|
}
|
|
|