diff --git a/NAMESPACE b/NAMESPACE
index 6cc908c..557df98 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand
export(tidy_html)
+export(tidy_options)
importFrom(Rcpp,sourceCpp)
useDynLib(htmltidy)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 23e0ec4..d17e027 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -1,11 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
-#' Tidy HTML/XML
-#'
-#' @param source length 1 character vetor containing the HTML/XML source to process
-#' @export
-tidy_html <- function(source) {
- .Call('htmltidy_tidy_html', PACKAGE = 'htmltidy', source)
+tidy_html_int <- function(source, options) {
+ .Call('htmltidy_tidy_html_int', PACKAGE = 'htmltidy', source, options)
}
diff --git a/R/aaa.r b/R/aaa.r
new file mode 100644
index 0000000..2daa840
--- /dev/null
+++ b/R/aaa.r
@@ -0,0 +1,53 @@
+#' @title HTML, XHTML & XML Options for tidy_html
+#' @description This dataset contains the options (and their default settings) for
+#' tidy_html. They are passed in a named-list to tidy_html
+#'
+#' \itemize{
+#' \item \code{Option}: Option name
+#' \item \code{Type}: Option value type
+#' \item \code{Default}: Is it the default for tidy_html?
+#' }
+#'
+#' @docType data
+#' @keywords datasets
+#' @name tidy_options
+#'
+#' @references The \href{http://api.html-tidy.org/tidy/quickref_5.1.25.html}{
+#' HTML Tidy Options Quick Reference}
+#'
+#' @export
+#' @usage tidy_options
+#' @note Last updated 2016-09-09.
+#' @format A data frame with 55 rows and 3 variables
+NULL
+
+tidy_options <- structure(list(Option = c("add-xml-decl", "add-xml-space", "alt-text",
+"anchor-as-name", "assume-xml-procins", "bare", "clean", "coerce-endtags",
+"css-prefix", "decorate-inferred-ul", "doctype", "drop-empty-elements",
+"drop-empty-paras", "drop-font-tags", "drop-proprietary-attributes",
+"enclose-block-text", "enclose-text", "escape-cdata", "fix-backslash",
+"fix-bad-comments", "fix-uri", "gdoc", "hide-comments", "hide-endtags",
+"indent-cdata", "input-xml", "join-classes", "join-styles", "literal-attributes",
+"logical-emphasis", "lower-literals", "merge-divs", "merge-emphasis",
+"merge-spans", "ncr", "new-blocklevel-tags", "new-empty-tags",
+"new-inline-tags", "new-pre-tags", "numeric-entities", "omit-optional-tags",
+"output-html", "output-xhtml", "output-xml", "preserve-entities",
+"quote-ampersand", "quote-marks", "quote-nbsp", "repeated-attributes",
+"replace-color", "show-body-only", "skip-nested", "uppercase-attributes",
+"uppercase-tags", "word-2000"), Type = c("Boolean", "Boolean",
+"String", "Boolean", "Boolean", "Boolean", "Boolean", "Boolean",
+"String", "Boolean", "DocType", "Boolean", "Boolean", "Boolean",
+"Boolean", "Boolean", "Boolean", "Boolean", "Boolean", "Boolean",
+"Boolean", "Boolean", "Boolean", "Boolean", "Boolean", "Boolean",
+"Boolean", "Boolean", "Boolean", "Boolean", "Boolean", "AutoBool",
+"Boolean", "AutoBool", "Boolean", "Tag names", "Tag names", "Tag names",
+"Tag names", "Boolean", "Boolean", "Boolean", "Boolean", "Boolean",
+"Boolean", "Boolean", "Boolean", "Boolean", "enum", "Boolean",
+"AutoBool", "Boolean", "Boolean", "Boolean", "Boolean"), Default = c("no",
+"no", "-", "yes", "no", "no", "no", "yes", "-", "no", "auto",
+"yes", "yes", "no", "no", "no", "no", "no", "yes", "yes", "yes",
+"no", "no", "no", "no", "no", "no", "yes", "no", "no", "yes",
+"auto", "yes", "auto", "yes", "-", "-", "-", "-", "no", "no",
+"no", "no", "no", "no", "yes", "no", "yes", "keep-last", "no",
+"no", "yes", "no", "no", "no")), .Names = c("Option", "Type",
+"Default"), row.names = 3:57, class = "data.frame")
diff --git a/R/tidy.r b/R/tidy.r
new file mode 100644
index 0000000..f6c4531
--- /dev/null
+++ b/R/tidy.r
@@ -0,0 +1,68 @@
+#' Tidy HTML/XML/XHTML Documents
+#'
+#' @param content atomic character or raw vector of content to tidy
+#' @param options named list of options
+#' @return atomic character vector of tidy content
+#' @export
+tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE)) {
+ .Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', source=content, options=options)
+}
+
+#
+# TidyXmlDecl, /**< Add for XML docs */
+# TidyUpperCaseTags, /**< Output tags in upper not lower case */
+# TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
+# TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
+# TidyMakeClean, /**< Replace presentational clutter by style rules */
+# TidyGDocClean, /**< Clean up HTML exported from Google Docs */
+# TidyLogicalEmphasis, /**< Replace i by em and b by strong */
+# TidyDropPropAttrs, /**< Discard proprietary attributes */
+# TidyDropFontTags, /**< Discard presentation tags */
+# TidyDropEmptyElems, /**< Discard empty elements */
+# TidyDropEmptyParas, /**< Discard empty p elements */
+# TidyFixComments, /**< Fix comments with adjacent hyphens */
+# TidyBreakBeforeBR, /**< Output newline before
or not? */
+ # TidyNumEntities, /**< Use numeric entities */
+ # TidyQuoteMarks, /**< Output " marks as " */
+ # TidyQuoteNbsp, /**< Output non-breaking space as entity */
+ # TidyQuoteAmpersand, /**< Output naked ampersand as & */
+ # TidyWrapAttVals, /**< Wrap within attribute values */
+ # TidyWrapScriptlets, /**< Wrap within JavaScript string literals */
+ # TidyWrapSection, /**< Wrap within section tags */
+ # TidyWrapAsp, /**< Wrap within ASP pseudo elements */
+ # TidyWrapJste, /**< Wrap within JSTE pseudo elements */
+ # TidyWrapPhp, /**< Wrap within PHP pseudo elements */
+ # TidyFixBackslash, /**< Fix URLs by replacing \ with / */
+ # TidyIndentAttributes,/**< Newline+indent before each attribute */
+ # TidyXmlPIs, /**< If set to yes PIs must end with ?> */
+ # TidyXmlSpace, /**< If set to yes adds xml:space attr as needed */
+ # TidyEncloseBodyText, /**< If yes text at body is wrapped in P's */
+ # TidyEncloseBlockText,/**< If yes text in blocks is wrapped in P's */
+ # TidyKeepFileTimes, /**< If yes last modied time is preserved */
+ # TidyWord2000, /**< Draconian cleaning for Word2000 */
+ # TidyMark, /**< Add meta element indicating tidied doc */
+ # TidyEmacs, /**< If true format error output for GNU Emacs */
+ # TidyEmacsFile, /**< Name of current Emacs file */
+ # TidyLiteralAttribs, /**< If true attributes may use newlines */
+ # TidyBodyOnly, /**< Output BODY content only */
+ # TidyFixUri, /**< Applies URI encoding if necessary */
+ # TidyLowerLiterals, /**< Folds known attribute values to lower case */
+ # TidyHideComments, /**< Hides all (real) comments in output */
+ # TidyIndentCdata, /**< Indent section */
+ # TidyForceOutput, /**< Output document even if errors were found */
+ # TidyShowErrors, /**< Number of errors to put out */
+ # TidyAsciiChars, /**< Convert quotes and dashes to nearest ASCII char */
+ # TidyJoinClasses, /**< Join multiple class attributes */
+ # TidyJoinStyles, /**< Join multiple style attributes */
+ # TidyEscapeCdata, /**< Replace sections with escaped text */
+ # TidyIndentSpaces, /**< Indentation n spaces/tabs */
+ # TidyWrapLen, /**< Wrap margin */
+ # TidyTabSize, /**< Expand tabs to n spaces */
+
+
+
+
+
+
+
+
diff --git a/README.Rmd b/README.Rmd
index deb95d1..a161f23 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -25,8 +25,8 @@ It relies on a locally included version of [`libtidy`](http://www.html-tidy.org/
This works enough for me to use in a pinch. It should be straightforward (but tedious) to:
-- enable passing options in a `list`
-- Getting it to work on Windows.
+- enable passing options in a `list` (IN PROGRESS)
+- Getting it to work on Windows (UNTESTED)
The following functions are implemented:
diff --git a/man/tidy_html.Rd b/man/tidy_html.Rd
index 7753e5c..6a7e8ca 100644
--- a/man/tidy_html.Rd
+++ b/man/tidy_html.Rd
@@ -1,15 +1,20 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RcppExports.R
+% Please edit documentation in R/tidy.r
\name{tidy_html}
\alias{tidy_html}
-\title{Tidy HTML/XML}
+\title{Tidy HTML/XML/XHTML Documents}
\usage{
-tidy_html(source)
+tidy_html(content, options = list(TidyXhtmlOut = TRUE))
}
\arguments{
-\item{source}{length 1 character vetor containing the HTML/XML source to process}
+\item{content}{atomic character or raw vector of content to tidy}
+
+\item{options}{named list of options}
+}
+\value{
+atomic character vector of tidy content
}
\description{
-Tidy HTML/XML
+Tidy HTML/XML/XHTML Documents
}
diff --git a/man/tidy_options.Rd b/man/tidy_options.Rd
new file mode 100644
index 0000000..d79e832
--- /dev/null
+++ b/man/tidy_options.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aaa.r
+\docType{data}
+\name{tidy_options}
+\alias{tidy_options}
+\title{HTML, XHTML & XML Options for tidy_html}
+\format{A data frame with 55 rows and 3 variables}
+\usage{
+tidy_options
+}
+\description{
+This dataset contains the options (and their default settings) for
+tidy_html. They are passed in a named-list to tidy_html
+
+\itemize{
+ \item \code{Option}: Option name
+ \item \code{Type}: Option value type
+ \item \code{Default}: Is it the default for tidy_html?
+}
+}
+\note{
+Last updated 2016-09-09.
+}
+\references{
+The \href{http://api.html-tidy.org/tidy/quickref_5.1.25.html}{
+ HTML Tidy Options Quick Reference}
+}
+\keyword{datasets}
+
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index d02a8bf..37745f7 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -5,14 +5,15 @@
using namespace Rcpp;
-// tidy_html
-std::string tidy_html(std::string source);
-RcppExport SEXP htmltidy_tidy_html(SEXP sourceSEXP) {
+// tidy_html_int
+std::string tidy_html_int(std::string source, Rcpp::List options);
+RcppExport SEXP htmltidy_tidy_html_int(SEXP sourceSEXP, SEXP optionsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string >::type source(sourceSEXP);
- rcpp_result_gen = Rcpp::wrap(tidy_html(source));
+ Rcpp::traits::input_parameter< Rcpp::List >::type options(optionsSEXP);
+ rcpp_result_gen = Rcpp::wrap(tidy_html_int(source, options));
return rcpp_result_gen;
END_RCPP
}
diff --git a/src/htmltidy.cpp b/src/htmltidy.cpp
index 9e9f14f..18a00cc 100644
--- a/src/htmltidy.cpp
+++ b/src/htmltidy.cpp
@@ -18,12 +18,8 @@
// NOTE: cannot do "using namespace Rcpp;" b/c of annoying warnings about the
// ambiguity of 'yes'.
-//' Tidy HTML/XML
-//'
-//' @param source length 1 character vetor containing the HTML/XML source to process
-//' @export
//[[Rcpp::export]]
-std::string tidy_html(std::string source) {
+std::string tidy_html_int(std::string source, Rcpp::List options) {
TidyBuffer output = {0};
TidyBuffer errbuf = {0};
@@ -32,9 +28,140 @@ std::string tidy_html(std::string source) {
TidyDoc tdoc = tidyCreate();
- ok = tidyOptSetBool(tdoc, TidyXhtmlOut, yes);
-
- if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ if (options.containsElementNamed("TidyXhtmlOut")) {
+ ok = tidyOptSetBool(tdoc, TidyXhtmlOut, options["TidyXhtmlOut"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyXmlOut")) {
+ ok = tidyOptSetBool(tdoc, TidyXmlOut, options["TidyXmlOut"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyHtmlOut")) {
+ ok = tidyOptSetBool(tdoc, TidyHtmlOut, options["TidyHtmlOut"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyXmlTags")) {
+ ok = tidyOptSetBool(tdoc, TidyXmlTags, options["TidyXmlTags"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyOmitOptionalTags")) {
+ ok = tidyOptSetBool(tdoc, TidyOmitOptionalTags, options["TidyOmitOptionalTags"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyXmlDecl")) {
+ ok = tidyOptSetBool(tdoc, TidyXmlDecl, options["TidyXmlDecl"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyBreakBeforeBR")) {
+ ok = tidyOptSetBool(tdoc, TidyBreakBeforeBR, options["TidyBreakBeforeBR"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyUpperCaseTags")) {
+ ok = tidyOptSetBool(tdoc, TidyUpperCaseTags, options["TidyUpperCaseTags"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyDropEmptyElems")) {
+ ok = tidyOptSetBool(tdoc, TidyDropEmptyElems, options["TidyDropEmptyElems"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyDropEmptyParas")) {
+ ok = tidyOptSetBool(tdoc, TidyDropEmptyParas, options["TidyDropEmptyParas"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyFixComments")) {
+ ok = tidyOptSetBool(tdoc, TidyFixComments, options["TidyFixComments"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyLogicalEmphasis")) {
+ ok = tidyOptSetBool(tdoc, TidyLogicalEmphasis, options["TidyLogicalEmphasis"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyBodyOnly")) {
+ ok = tidyOptSetBool(tdoc, TidyBodyOnly, options["TidyBodyOnly"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyHideComments")) {
+ ok = tidyOptSetBool(tdoc, TidyBodyOnly, options["TidyHideComments"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyJoinClasses")) {
+ ok = tidyOptSetBool(tdoc, TidyJoinClasses, options["TidyJoinClasses"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyJoinStyles")) {
+ ok = tidyOptSetBool(tdoc, TidyJoinStyles, options["TidyJoinStyles"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyFixBackslash")) {
+ ok = tidyOptSetBool(tdoc, TidyFixBackslash, options["TidyFixBackslash"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyMark")) {
+ ok = tidyOptSetBool(tdoc, TidyMark, options["TidyMark"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyReplaceColor")) {
+ ok = tidyOptSetBool(tdoc, TidyReplaceColor, options["TidyReplaceColor"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyIndentContent")) {
+ ok = tidyOptSetBool(tdoc, TidyIndentContent, options["TidyIndentContent"] ? yes : no);
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyCSSPrefix")) {
+ ok = tidyOptSetValue(tdoc, TidyFixBackslash, Rcpp::as(options["TidyFixBackslash"]).c_str());
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyDoctype")) {
+ ok = tidyOptSetValue(tdoc, TidyDoctype, Rcpp::as(options["TidyDoctype"]).c_str());
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyAltText")) {
+ ok = tidyOptSetValue(tdoc, TidyAltText, Rcpp::as(options["TidyAltText"]).c_str());
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyWord2000")) {
+ ok = tidyOptSetValue(tdoc, TidyWord2000, Rcpp::as(options["TidyWord2000"]).c_str());
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyIndentSpaces")) {
+ ok = tidyOptSetInt(tdoc, TidyIndentSpaces, Rcpp::as(options["TidyIndentSpaces"]));
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyWrapLen")) {
+ ok = tidyOptSetInt(tdoc, TidyWrapLen, Rcpp::as(options["TidyWrapLen"]));
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
+
+ if (options.containsElementNamed("TidyTabSize")) {
+ ok = tidyOptSetInt(tdoc, TidyTabSize, Rcpp::as(options["TidyTabSize"]));
+ if (ok == no) Rcpp::stop("Error setting TidyHTML options");
+ }
rc = tidySetErrorBuffer(tdoc, &errbuf);