Browse Source

begin options

tags/v0.2.0
hrbrmstr 6 years ago
parent
commit
4b1d517c39
  1. 1
      NAMESPACE
  2. 8
      R/RcppExports.R
  3. 53
      R/aaa.r
  4. 68
      R/tidy.r
  5. 4
      README.Rmd
  6. 15
      man/tidy_html.Rd
  7. 29
      man/tidy_options.Rd
  8. 9
      src/RcppExports.cpp
  9. 143
      src/htmltidy.cpp

1
NAMESPACE

@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand
export(tidy_html)
export(tidy_options)
importFrom(Rcpp,sourceCpp)
useDynLib(htmltidy)

8
R/RcppExports.R

@ -1,11 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#' Tidy HTML/XML
#'
#' @param source length 1 character vetor containing the HTML/XML source to process
#' @export
tidy_html <- function(source) {
.Call('htmltidy_tidy_html', PACKAGE = 'htmltidy', source)
tidy_html_int <- function(source, options) {
.Call('htmltidy_tidy_html_int', PACKAGE = 'htmltidy', source, options)
}

53
R/aaa.r

@ -0,0 +1,53 @@
#' @title HTML, XHTML & XML Options for tidy_html
#' @description This dataset contains the options (and their default settings) for
#' tidy_html. They are passed in a named-list to tidy_html
#'
#' \itemize{
#' \item \code{Option}: Option name
#' \item \code{Type}: Option value type
#' \item \code{Default}: Is it the default for tidy_html?
#' }
#'
#' @docType data
#' @keywords datasets
#' @name tidy_options
#'
#' @references The \href{http://api.html-tidy.org/tidy/quickref_5.1.25.html}{
#' HTML Tidy Options Quick Reference}
#'
#' @export
#' @usage tidy_options
#' @note Last updated 2016-09-09.
#' @format A data frame with 55 rows and 3 variables
NULL
tidy_options <- structure(list(Option = c("add-xml-decl", "add-xml-space", "alt-text",
"anchor-as-name", "assume-xml-procins", "bare", "clean", "coerce-endtags",
"css-prefix", "decorate-inferred-ul", "doctype", "drop-empty-elements",
"drop-empty-paras", "drop-font-tags", "drop-proprietary-attributes",
"enclose-block-text", "enclose-text", "escape-cdata", "fix-backslash",
"fix-bad-comments", "fix-uri", "gdoc", "hide-comments", "hide-endtags",
"indent-cdata", "input-xml", "join-classes", "join-styles", "literal-attributes",
"logical-emphasis", "lower-literals", "merge-divs", "merge-emphasis",
"merge-spans", "ncr", "new-blocklevel-tags", "new-empty-tags",
"new-inline-tags", "new-pre-tags", "numeric-entities", "omit-optional-tags",
"output-html", "output-xhtml", "output-xml", "preserve-entities",
"quote-ampersand", "quote-marks", "quote-nbsp", "repeated-attributes",
"replace-color", "show-body-only", "skip-nested", "uppercase-attributes",
"uppercase-tags", "word-2000"), Type = c("Boolean", "Boolean",
"String", "Boolean", "Boolean", "Boolean", "Boolean", "Boolean",
"String", "Boolean", "DocType", "Boolean", "Boolean", "Boolean",
"Boolean", "Boolean", "Boolean", "Boolean", "Boolean", "Boolean",
"Boolean", "Boolean", "Boolean", "Boolean", "Boolean", "Boolean",
"Boolean", "Boolean", "Boolean", "Boolean", "Boolean", "AutoBool",
"Boolean", "AutoBool", "Boolean", "Tag names", "Tag names", "Tag names",
"Tag names", "Boolean", "Boolean", "Boolean", "Boolean", "Boolean",
"Boolean", "Boolean", "Boolean", "Boolean", "enum", "Boolean",
"AutoBool", "Boolean", "Boolean", "Boolean", "Boolean"), Default = c("no",
"no", "-", "yes", "no", "no", "no", "yes", "-", "no", "auto",
"yes", "yes", "no", "no", "no", "no", "no", "yes", "yes", "yes",
"no", "no", "no", "no", "no", "no", "yes", "no", "no", "yes",
"auto", "yes", "auto", "yes", "-", "-", "-", "-", "no", "no",
"no", "no", "no", "no", "yes", "no", "yes", "keep-last", "no",
"no", "yes", "no", "no", "no")), .Names = c("Option", "Type",
"Default"), row.names = 3:57, class = "data.frame")

68
R/tidy.r

@ -0,0 +1,68 @@
#' Tidy HTML/XML/XHTML Documents
#'
#' @param content atomic character or raw vector of content to tidy
#' @param options named list of options
#' @return atomic character vector of tidy content
#' @export
tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE)) {
.Call('htmltidy_tidy_html_int', PACKAGE='htmltidy', source=content, options=options)
}
#
# TidyXmlDecl, /**< Add <?xml?> for XML docs */
# TidyUpperCaseTags, /**< Output tags in upper not lower case */
# TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
# TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
# TidyMakeClean, /**< Replace presentational clutter by style rules */
# TidyGDocClean, /**< Clean up HTML exported from Google Docs */
# TidyLogicalEmphasis, /**< Replace i by em and b by strong */
# TidyDropPropAttrs, /**< Discard proprietary attributes */
# TidyDropFontTags, /**< Discard presentation tags */
# TidyDropEmptyElems, /**< Discard empty elements */
# TidyDropEmptyParas, /**< Discard empty p elements */
# TidyFixComments, /**< Fix comments with adjacent hyphens */
# TidyBreakBeforeBR, /**< Output newline before <br> or not? */
# TidyNumEntities, /**< Use numeric entities */
# TidyQuoteMarks, /**< Output " marks as &quot; */
# TidyQuoteNbsp, /**< Output non-breaking space as entity */
# TidyQuoteAmpersand, /**< Output naked ampersand as &amp; */
# TidyWrapAttVals, /**< Wrap within attribute values */
# TidyWrapScriptlets, /**< Wrap within JavaScript string literals */
# TidyWrapSection, /**< Wrap within <![ ... ]> section tags */
# TidyWrapAsp, /**< Wrap within ASP pseudo elements */
# TidyWrapJste, /**< Wrap within JSTE pseudo elements */
# TidyWrapPhp, /**< Wrap within PHP pseudo elements */
# TidyFixBackslash, /**< Fix URLs by replacing \ with / */
# TidyIndentAttributes,/**< Newline+indent before each attribute */
# TidyXmlPIs, /**< If set to yes PIs must end with ?> */
# TidyXmlSpace, /**< If set to yes adds xml:space attr as needed */
# TidyEncloseBodyText, /**< If yes text at body is wrapped in P's */
# TidyEncloseBlockText,/**< If yes text in blocks is wrapped in P's */
# TidyKeepFileTimes, /**< If yes last modied time is preserved */
# TidyWord2000, /**< Draconian cleaning for Word2000 */
# TidyMark, /**< Add meta element indicating tidied doc */
# TidyEmacs, /**< If true format error output for GNU Emacs */
# TidyEmacsFile, /**< Name of current Emacs file */
# TidyLiteralAttribs, /**< If true attributes may use newlines */
# TidyBodyOnly, /**< Output BODY content only */
# TidyFixUri, /**< Applies URI encoding if necessary */
# TidyLowerLiterals, /**< Folds known attribute values to lower case */
# TidyHideComments, /**< Hides all (real) comments in output */
# TidyIndentCdata, /**< Indent <!CDATA[ ... ]]> section */
# TidyForceOutput, /**< Output document even if errors were found */
# TidyShowErrors, /**< Number of errors to put out */
# TidyAsciiChars, /**< Convert quotes and dashes to nearest ASCII char */
# TidyJoinClasses, /**< Join multiple class attributes */
# TidyJoinStyles, /**< Join multiple style attributes */
# TidyEscapeCdata, /**< Replace <![CDATA[]]> sections with escaped text */
# TidyIndentSpaces, /**< Indentation n spaces/tabs */
# TidyWrapLen, /**< Wrap margin */
# TidyTabSize, /**< Expand tabs to n spaces */

4
README.Rmd

@ -25,8 +25,8 @@ It relies on a locally included version of [`libtidy`](http://www.html-tidy.org/
This works enough for me to use in a pinch. It should be straightforward (but tedious) to:
- enable passing options in a `list`
- Getting it to work on Windows.
- enable passing options in a `list` (IN PROGRESS)
- Getting it to work on Windows (UNTESTED)
The following functions are implemented:

15
man/tidy_html.Rd

@ -1,15 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
% Please edit documentation in R/tidy.r
\name{tidy_html}
\alias{tidy_html}
\title{Tidy HTML/XML}
\title{Tidy HTML/XML/XHTML Documents}
\usage{
tidy_html(source)
tidy_html(content, options = list(TidyXhtmlOut = TRUE))
}
\arguments{
\item{source}{length 1 character vetor containing the HTML/XML source to process}
\item{content}{atomic character or raw vector of content to tidy}
\item{options}{named list of options}
}
\value{
atomic character vector of tidy content
}
\description{
Tidy HTML/XML
Tidy HTML/XML/XHTML Documents
}

29
man/tidy_options.Rd

@ -0,0 +1,29 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/aaa.r
\docType{data}
\name{tidy_options}
\alias{tidy_options}
\title{HTML, XHTML & XML Options for tidy_html}
\format{A data frame with 55 rows and 3 variables}
\usage{
tidy_options
}
\description{
This dataset contains the options (and their default settings) for
tidy_html. They are passed in a named-list to tidy_html
\itemize{
\item \code{Option}: Option name
\item \code{Type}: Option value type
\item \code{Default}: Is it the default for tidy_html?
}
}
\note{
Last updated 2016-09-09.
}
\references{
The \href{http://api.html-tidy.org/tidy/quickref_5.1.25.html}{
HTML Tidy Options Quick Reference}
}
\keyword{datasets}

9
src/RcppExports.cpp

@ -5,14 +5,15 @@
using namespace Rcpp;
// tidy_html
std::string tidy_html(std::string source);
RcppExport SEXP htmltidy_tidy_html(SEXP sourceSEXP) {
// tidy_html_int
std::string tidy_html_int(std::string source, Rcpp::List options);
RcppExport SEXP htmltidy_tidy_html_int(SEXP sourceSEXP, SEXP optionsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string >::type source(sourceSEXP);
rcpp_result_gen = Rcpp::wrap(tidy_html(source));
Rcpp::traits::input_parameter< Rcpp::List >::type options(optionsSEXP);
rcpp_result_gen = Rcpp::wrap(tidy_html_int(source, options));
return rcpp_result_gen;
END_RCPP
}

143
src/htmltidy.cpp

@ -18,12 +18,8 @@
// NOTE: cannot do "using namespace Rcpp;" b/c of annoying warnings about the
// ambiguity of 'yes'.
//' Tidy HTML/XML
//'
//' @param source length 1 character vetor containing the HTML/XML source to process
//' @export
//[[Rcpp::export]]
std::string tidy_html(std::string source) {
std::string tidy_html_int(std::string source, Rcpp::List options) {
TidyBuffer output = {0};
TidyBuffer errbuf = {0};
@ -32,9 +28,140 @@ std::string tidy_html(std::string source) {
TidyDoc tdoc = tidyCreate();
ok = tidyOptSetBool(tdoc, TidyXhtmlOut, yes);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
if (options.containsElementNamed("TidyXhtmlOut")) {
ok = tidyOptSetBool(tdoc, TidyXhtmlOut, options["TidyXhtmlOut"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyXmlOut")) {
ok = tidyOptSetBool(tdoc, TidyXmlOut, options["TidyXmlOut"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyHtmlOut")) {
ok = tidyOptSetBool(tdoc, TidyHtmlOut, options["TidyHtmlOut"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyXmlTags")) {
ok = tidyOptSetBool(tdoc, TidyXmlTags, options["TidyXmlTags"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyOmitOptionalTags")) {
ok = tidyOptSetBool(tdoc, TidyOmitOptionalTags, options["TidyOmitOptionalTags"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyXmlDecl")) {
ok = tidyOptSetBool(tdoc, TidyXmlDecl, options["TidyXmlDecl"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyBreakBeforeBR")) {
ok = tidyOptSetBool(tdoc, TidyBreakBeforeBR, options["TidyBreakBeforeBR"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyUpperCaseTags")) {
ok = tidyOptSetBool(tdoc, TidyUpperCaseTags, options["TidyUpperCaseTags"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyDropEmptyElems")) {
ok = tidyOptSetBool(tdoc, TidyDropEmptyElems, options["TidyDropEmptyElems"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyDropEmptyParas")) {
ok = tidyOptSetBool(tdoc, TidyDropEmptyParas, options["TidyDropEmptyParas"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyFixComments")) {
ok = tidyOptSetBool(tdoc, TidyFixComments, options["TidyFixComments"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyLogicalEmphasis")) {
ok = tidyOptSetBool(tdoc, TidyLogicalEmphasis, options["TidyLogicalEmphasis"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyBodyOnly")) {
ok = tidyOptSetBool(tdoc, TidyBodyOnly, options["TidyBodyOnly"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyHideComments")) {
ok = tidyOptSetBool(tdoc, TidyBodyOnly, options["TidyHideComments"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyJoinClasses")) {
ok = tidyOptSetBool(tdoc, TidyJoinClasses, options["TidyJoinClasses"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyJoinStyles")) {
ok = tidyOptSetBool(tdoc, TidyJoinStyles, options["TidyJoinStyles"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyFixBackslash")) {
ok = tidyOptSetBool(tdoc, TidyFixBackslash, options["TidyFixBackslash"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyMark")) {
ok = tidyOptSetBool(tdoc, TidyMark, options["TidyMark"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyReplaceColor")) {
ok = tidyOptSetBool(tdoc, TidyReplaceColor, options["TidyReplaceColor"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyIndentContent")) {
ok = tidyOptSetBool(tdoc, TidyIndentContent, options["TidyIndentContent"] ? yes : no);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyCSSPrefix")) {
ok = tidyOptSetValue(tdoc, TidyFixBackslash, Rcpp::as<std::string>(options["TidyFixBackslash"]).c_str());
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyDoctype")) {
ok = tidyOptSetValue(tdoc, TidyDoctype, Rcpp::as<std::string>(options["TidyDoctype"]).c_str());
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyAltText")) {
ok = tidyOptSetValue(tdoc, TidyAltText, Rcpp::as<std::string>(options["TidyAltText"]).c_str());
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyWord2000")) {
ok = tidyOptSetValue(tdoc, TidyWord2000, Rcpp::as<std::string>(options["TidyWord2000"]).c_str());
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyIndentSpaces")) {
ok = tidyOptSetInt(tdoc, TidyIndentSpaces, Rcpp::as<int>(options["TidyIndentSpaces"]));
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyWrapLen")) {
ok = tidyOptSetInt(tdoc, TidyWrapLen, Rcpp::as<int>(options["TidyWrapLen"]));
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
if (options.containsElementNamed("TidyTabSize")) {
ok = tidyOptSetInt(tdoc, TidyTabSize, Rcpp::as<int>(options["TidyTabSize"]));
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
}
rc = tidySetErrorBuffer(tdoc, &errbuf);

Loading…
Cancel
Save