You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

66 lines
2.4 KiB

#' Tidy HTML/XHTML Documents
#'
#' Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly},
#' \code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags},
#' \code{TidyDoctype}, \code{TidyDropEmptyElems}, \code{TidyDropEmptyParas},
#' \code{TidyFixBackslash}, \code{TidyFixComments}, \code{TidyHideComments},
#' \code{TidyHtmlOut}, \code{TidyIndentContent}, \code{TidyIndentSpaces},
#' \code{TidyJoinClasses}, \code{TidyJoinStyles}, \code{TidyLogicalEmphasis},
#' \code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark},
#' \code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize},
#' \code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000},
#' \code{TidyWrapLen}, \code{TidyXhtmlOut}
#'
#' @param content atomic character or raw vector of content to tidy
#' @param options named list of options
#' @return tidied HTML/XHTML content
#' @references \url{https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h}
#' (for definitions of the options supported above).
#' @export
tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE)) {
UseMethod("tidy_html")
}
#' @export
#' @rdname tidy_html
tidy_html.default <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- content[1]
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
}
#' @export
#' @rdname tidy_html
tidy_html.character <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- content[1]
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
}
#' @export
#' @rdname tidy_html
tidy_html.raw <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- content[1]
content <- iconv(readBin(content, character()), to="UTF-8")
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
charToRaw(out)
}
#' @export
#' @rdname tidy_html
tidy_html.xml_document <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- toString(content)
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
xml2::read_html(out)
}
#' @export
#' @rdname tidy_html
tidy_html.HTMLInternalDocument <- function(content, options=list(TidyXhtmlOut=TRUE)) {
content <- saveXML(content)
out <- .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy',
source=content, options=options)
XML::htmlParse(out)
}