diff --git a/R/tidy.r b/R/tidy.r index f6c4531..3e34576 100644 --- a/R/tidy.r +++ b/R/tidy.r @@ -1,68 +1,21 @@ #' Tidy HTML/XML/XHTML Documents #' +#' Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly}, +#' \code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags}, +#' \code{TidyDoctype}, \code{TidyDropEmptyElems}, \code{TidyDropEmptyParas}, +#' \code{TidyFixBackslash}, \code{TidyFixComments}, \code{TidyHideComments}, +#' \code{TidyHtmlOut}, \code{TidyIndentContent}, \code{TidyIndentSpaces}, +#' \code{TidyJoinClasses}, \code{TidyJoinStyles}, \code{TidyLogicalEmphasis}, +#' \code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark}, +#' \code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize}, +#' \code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000}, +#' \code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut}, +#' \code{TidyXmlTags}. +#' #' @param content atomic character or raw vector of content to tidy #' @param options named list of options -#' @return atomic character vector of tidy content +#' @return Atomic character vector of tidy HTML/XML/XHTML content #' @export tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE)) { .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', source=content, options=options) } - -# -# TidyXmlDecl, /**< Add for XML docs */ -# TidyUpperCaseTags, /**< Output tags in upper not lower case */ -# TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */ -# TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */ -# TidyMakeClean, /**< Replace presentational clutter by style rules */ -# TidyGDocClean, /**< Clean up HTML exported from Google Docs */ -# TidyLogicalEmphasis, /**< Replace i by em and b by strong */ -# TidyDropPropAttrs, /**< Discard proprietary attributes */ -# TidyDropFontTags, /**< Discard presentation tags */ -# TidyDropEmptyElems, /**< Discard empty elements */ -# TidyDropEmptyParas, /**< Discard empty p elements */ -# TidyFixComments, /**< Fix comments with adjacent hyphens */ -# TidyBreakBeforeBR, /**< Output newline before
or not? */ - # TidyNumEntities, /**< Use numeric entities */ - # TidyQuoteMarks, /**< Output " marks as " */ - # TidyQuoteNbsp, /**< Output non-breaking space as entity */ - # TidyQuoteAmpersand, /**< Output naked ampersand as & */ - # TidyWrapAttVals, /**< Wrap within attribute values */ - # TidyWrapScriptlets, /**< Wrap within JavaScript string literals */ - # TidyWrapSection, /**< Wrap within section tags */ - # TidyWrapAsp, /**< Wrap within ASP pseudo elements */ - # TidyWrapJste, /**< Wrap within JSTE pseudo elements */ - # TidyWrapPhp, /**< Wrap within PHP pseudo elements */ - # TidyFixBackslash, /**< Fix URLs by replacing \ with / */ - # TidyIndentAttributes,/**< Newline+indent before each attribute */ - # TidyXmlPIs, /**< If set to yes PIs must end with ?> */ - # TidyXmlSpace, /**< If set to yes adds xml:space attr as needed */ - # TidyEncloseBodyText, /**< If yes text at body is wrapped in P's */ - # TidyEncloseBlockText,/**< If yes text in blocks is wrapped in P's */ - # TidyKeepFileTimes, /**< If yes last modied time is preserved */ - # TidyWord2000, /**< Draconian cleaning for Word2000 */ - # TidyMark, /**< Add meta element indicating tidied doc */ - # TidyEmacs, /**< If true format error output for GNU Emacs */ - # TidyEmacsFile, /**< Name of current Emacs file */ - # TidyLiteralAttribs, /**< If true attributes may use newlines */ - # TidyBodyOnly, /**< Output BODY content only */ - # TidyFixUri, /**< Applies URI encoding if necessary */ - # TidyLowerLiterals, /**< Folds known attribute values to lower case */ - # TidyHideComments, /**< Hides all (real) comments in output */ - # TidyIndentCdata, /**< Indent section */ - # TidyForceOutput, /**< Output document even if errors were found */ - # TidyShowErrors, /**< Number of errors to put out */ - # TidyAsciiChars, /**< Convert quotes and dashes to nearest ASCII char */ - # TidyJoinClasses, /**< Join multiple class attributes */ - # TidyJoinStyles, /**< Join multiple style attributes */ - # TidyEscapeCdata, /**< Replace sections with escaped text */ - # TidyIndentSpaces, /**< Indentation n spaces/tabs */ - # TidyWrapLen, /**< Wrap margin */ - # TidyTabSize, /**< Expand tabs to n spaces */ - - - - - - - - diff --git a/man/tidy_html.Rd b/man/tidy_html.Rd index 6a7e8ca..7bd2c48 100644 --- a/man/tidy_html.Rd +++ b/man/tidy_html.Rd @@ -12,9 +12,19 @@ tidy_html(content, options = list(TidyXhtmlOut = TRUE)) \item{options}{named list of options} } \value{ -atomic character vector of tidy content +Atomic character vector of tidy HTML/XML/XHTML content } \description{ -Tidy HTML/XML/XHTML Documents +Currently supported options: \code{TidyAltText}, \code{TidyBodyOnly}, +\code{TidyBreakBeforeBR}, \code{TidyCoerceEndTags}, \code{TidyCoerceEndTags}, +\code{TidyDoctype}, \code{TidyDropEmptyElems}, \code{TidyDropEmptyParas}, +\code{TidyFixBackslash}, \code{TidyFixComments}, \code{TidyHideComments}, +\code{TidyHtmlOut}, \code{TidyIndentContent}, \code{TidyIndentSpaces}, +\code{TidyJoinClasses}, \code{TidyJoinStyles}, \code{TidyLogicalEmphasis}, +\code{TidyMakeBare}, \code{TidyMakeClean}, \code{TidyMark}, +\code{TidyOmitOptionalTags}, \code{TidyReplaceColor}, \code{TidyTabSize}, +\code{TidyUpperCaseAttrs}, \code{TidyUpperCaseTags}, \code{TidyWord2000}, +\code{TidyWrapLen}, \code{TidyXhtmlOut}, \code{TidyXmlDecl}, \code{TidyXmlOut}, +\code{TidyXmlTags}. } diff --git a/src/htmltidy.cpp b/src/htmltidy.cpp index 18a00cc..a405560 100644 --- a/src/htmltidy.cpp +++ b/src/htmltidy.cpp @@ -68,6 +68,11 @@ std::string tidy_html_int(std::string source, Rcpp::List options) { if (ok == no) Rcpp::stop("Error setting TidyHTML options"); } + if (options.containsElementNamed("TidyUpperCaseAttrs")) { + ok = tidyOptSetBool(tdoc, TidyUpperCaseAttrs, options["TidyUpperCaseAttrs"] ? yes : no); + if (ok == no) Rcpp::stop("Error setting TidyHTML options"); + } + if (options.containsElementNamed("TidyDropEmptyElems")) { ok = tidyOptSetBool(tdoc, TidyDropEmptyElems, options["TidyDropEmptyElems"] ? yes : no); if (ok == no) Rcpp::stop("Error setting TidyHTML options"); @@ -128,8 +133,23 @@ std::string tidy_html_int(std::string source, Rcpp::List options) { if (ok == no) Rcpp::stop("Error setting TidyHTML options"); } - if (options.containsElementNamed("TidyCSSPrefix")) { - ok = tidyOptSetValue(tdoc, TidyFixBackslash, Rcpp::as(options["TidyFixBackslash"]).c_str()); + if (options.containsElementNamed("TidyCoerceEndTags")) { + ok = tidyOptSetBool(tdoc, TidyCoerceEndTags, options["TidyCoerceEndTags"] ? yes : no); + if (ok == no) Rcpp::stop("Error setting TidyHTML options"); + } + + if (options.containsElementNamed("TidyCoerceEndTags")) { + ok = tidyOptSetBool(tdoc, TidyCoerceEndTags, options["TidyCoerceEndTags"] ? yes : no); + if (ok == no) Rcpp::stop("Error setting TidyHTML options"); + } + + if (options.containsElementNamed("TidyMakeBare")) { + ok = tidyOptSetBool(tdoc, TidyMakeBare, options["TidyMakeBare"] ? yes : no); + if (ok == no) Rcpp::stop("Error setting TidyHTML options"); + } + + if (options.containsElementNamed("TidyMakeClean")) { + ok = tidyOptSetValue(tdoc, TidyMakeClean, Rcpp::as(options["TidyMakeClean"]).c_str()); if (ok == no) Rcpp::stop("Error setting TidyHTML options"); }