commit fa0a3ca1294fd4125c48490f76af4c911a44b1b3 Author: Bob Rudis Date: Fri May 6 06:11:51 2016 -0400 initial commit diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..e7ccc30 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,7 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^README\.Rmd$ +^README-.*\.png$ +^\.travis\.yml$ +^CONDUCT\.md$ +^README\.md$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..807ea25 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.Rproj.user +.Rhistory +.RData diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8275323 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,14 @@ +# Sample .travis.yml for R projects + +language: r +warnings_are_errors: true +sudo: required + +env: + global: + - CRAN: http://cran.rstudio.com + +notifications: + email: + on_success: change + on_failure: change diff --git a/CONDUCT.md b/CONDUCT.md new file mode 100644 index 0000000..52a673e --- /dev/null +++ b/CONDUCT.md @@ -0,0 +1,25 @@ +# Contributor Code of Conduct + +As contributors and maintainers of this project, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating documentation, +submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free experience for +everyone, regardless of level of experience, gender, gender identity and expression, +sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. + +Examples of unacceptable behavior by participants include the use of sexual language or +imagery, derogatory comments or personal attacks, trolling, public or private harassment, +insults, or other unprofessional conduct. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, +commits, code, wiki edits, issues, and other contributions that are not aligned to this +Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed +from the project team. + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by +opening an issue or contacting one or more of the project maintainers. + +This Code of Conduct is adapted from the Contributor Covenant +(http:contributor-covenant.org), version 1.0.0, available at +http://contributor-covenant.org/version/1/0/0/ diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..c1fb5fb --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,11 @@ +Package: htmltidy +Title: Clean up gnarly HTML/XML +Version: 0.0.0.9000 +Authors@R: c(person("Bob", "Rudis", email = "bob@rudis.net", role = c("aut", "cre"))) +Description: Clean up gnarly HTML/XML +Depends: R (>= 3.3.0) +License: AGPL + file LICENSE +LazyData: true +Suggests: testthat +LinkingTo: Rcpp +Imports: Rcpp diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5b085be --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2016 +COPYRIGHT HOLDER: Bob Rudis diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..819437d --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,5 @@ +# Generated by roxygen2 (4.1.1): do not edit by hand + +export(tidy) +importFrom(Rcpp,sourceCpp) +useDynLib(htmltidy) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..eee8ac8 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,11 @@ +# This file was generated by Rcpp::compileAttributes +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#' Tidy HTML/XML +#' +#' @param source length 1 character vetor containing the HTML/XML source to process +#' @export +tidy <- function(source) { + .Call('htmltidy_tidy', PACKAGE = 'htmltidy', source) +} + diff --git a/R/htmltidy-package.r b/R/htmltidy-package.r new file mode 100644 index 0000000..0fe48d0 --- /dev/null +++ b/R/htmltidy-package.r @@ -0,0 +1,8 @@ +#' Clean up gnarly HTML/XML +#' +#' @name htmltidy +#' @docType package +#' @author Bob Rudis (@@hrbrmstr) +#' @useDynLib htmltidy +#' @importFrom Rcpp sourceCpp +NULL diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..8bc5a5d --- /dev/null +++ b/README.Rmd @@ -0,0 +1,48 @@ +--- +output: rmarkdown::github_document +--- + + + +```{r, echo = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.retina=2, + fig.path = "README-" +) +``` + +`htmltidy` — Clean up gnarly HTML/XML + +NOTE: Requires [`libtidy`](http://www.html-tidy.org/) and presently is super-basic (no way to set options and pretty much only does HTML) + +The following functions are implemented: + +- `tidy` : Clean up gnarly HTML/XML + +### Installation + +```{r eval=FALSE} +devtools::install_github("hrbrmstr/htmltidy") +``` + +```{r echo=FALSE, message=FALSE, warning=FALSE, error=FALSE} +options(width=120) +``` + +### Usage + +```{r} +library(htmltidy) + +# current verison +packageVersion("htmltidy") + +cat(tidy("

google >

")) +``` + +### Code of Conduct + +Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). +By participating in this project you agree to abide by its terms. diff --git a/README.md b/README.md new file mode 100644 index 0000000..02915af --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ + + +`htmltidy` — Clean up gnarly HTML/XML + +NOTE: Requires [`libtidy`](http://www.html-tidy.org/) and presently is super-basic (no way to set options and pretty much only does HTML) + +The following functions are implemented: + +- `tidy` : Clean up gnarly HTML/XML + +### Installation + +``` r +devtools::install_github("hrbrmstr/htmltidy") +``` + +### Usage + +``` r +library(htmltidy) + +# current verison +packageVersion("htmltidy") +#> [1] '0.0.0.9000' + +cat(tidy("

google >

")) +#> +#> +#> +#> "HTML Tidy for HTML5 for Mac OS X version 5.2.0" /> +#> +#> +#> +#>

google >

+#> +#> +``` + +### Code of Conduct + +Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. diff --git a/htmltidy.Rproj b/htmltidy.Rproj new file mode 100644 index 0000000..773de7e --- /dev/null +++ b/htmltidy.Rproj @@ -0,0 +1,25 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +Encoding: UTF-8 + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace + +UseSpacesForTab: Yes +NumSpacesForTab: 2 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +PackageBuildArgs: --resave-data +PackageCheckArgs: --as-cran diff --git a/man/htmltidy.Rd b/man/htmltidy.Rd new file mode 100644 index 0000000..8b81129 --- /dev/null +++ b/man/htmltidy.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/htmltidy-package.r +\docType{package} +\name{htmltidy} +\alias{htmltidy} +\alias{htmltidy-package} +\title{Clean up gnarly HTML/XML} +\description{ +Clean up gnarly HTML/XML +} +\author{ +Bob Rudis (@hrbrmstr) +} + diff --git a/man/tidy.Rd b/man/tidy.Rd new file mode 100644 index 0000000..7cc3249 --- /dev/null +++ b/man/tidy.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{tidy} +\alias{tidy} +\title{Tidy HTML/XML} +\usage{ +tidy(source) +} +\arguments{ +\item{source}{length 1 character vetor containing the HTML/XML source to process} +} +\description{ +Tidy HTML/XML +} + diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..22034c4 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +*.dll diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 0000000..c6616f4 --- /dev/null +++ b/src/Makevars @@ -0,0 +1 @@ +PKG_LIBS=-ltidy diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..a8a2e95 --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,18 @@ +// This file was generated by Rcpp::compileAttributes +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// tidy +std::string tidy(std::string source); +RcppExport SEXP htmltidy_tidy(SEXP sourceSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< std::string >::type source(sourceSEXP); + __result = Rcpp::wrap(tidy(source)); + return __result; +END_RCPP +} diff --git a/src/htmltidy.cpp b/src/htmltidy.cpp new file mode 100644 index 0000000..7136d2c --- /dev/null +++ b/src/htmltidy.cpp @@ -0,0 +1,41 @@ +#include +#include +#include + +// http://api.html-tidy.org/tidy/tidylib_api_5.2.0/tidyenum_8h.html#a3a1401652599150188a168dade7dc150 + +//' Tidy HTML/XML +//' +//' @param source length 1 character vetor containing the HTML/XML source to process +//' @export +//[[Rcpp::export]] +std::string tidy(std::string source) { + + TidyBuffer output = {0}; + TidyBuffer errbuf = {0}; + int rc = -1; + Bool ok; + + TidyDoc tdoc = tidyCreate(); + + ok = tidyOptSetBool(tdoc, TidyXhtmlOut, yes); + + rc = tidySetErrorBuffer(tdoc, &errbuf); + + rc = tidyParseString(tdoc, source.c_str()); + + rc = tidyCleanAndRepair(tdoc); + + rc = tidyRunDiagnostics(tdoc); + + rc = tidySaveBuffer(tdoc, &output); + + std::string ret = std::string(reinterpret_cast(output.bp)); + + tidyBufFree(&output); + tidyBufFree(&errbuf); + tidyRelease(tdoc); + + return(ret); + +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..52b18f3 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(htmltidy) + +test_check("htmltidy") diff --git a/tests/testthat/test-htmltidy.R b/tests/testthat/test-htmltidy.R new file mode 100644 index 0000000..ab6f62f --- /dev/null +++ b/tests/testthat/test-htmltidy.R @@ -0,0 +1,6 @@ +context("basic functionality") +test_that("we can do something", { + + #expect_that(some_function(), is_a("data.frame")) + +})