From fa0a3ca1294fd4125c48490f76af4c911a44b1b3 Mon Sep 17 00:00:00 2001 From: Bob Rudis Date: Fri, 6 May 2016 06:11:51 -0400 Subject: [PATCH] initial commit --- .Rbuildignore | 7 ++++++ .gitignore | 3 +++ .travis.yml | 14 ++++++++++++ CONDUCT.md | 25 ++++++++++++++++++++++ DESCRIPTION | 11 ++++++++++ LICENSE | 2 ++ NAMESPACE | 5 +++++ R/RcppExports.R | 11 ++++++++++ R/htmltidy-package.r | 8 +++++++ README.Rmd | 48 ++++++++++++++++++++++++++++++++++++++++++ README.md | 42 ++++++++++++++++++++++++++++++++++++ htmltidy.Rproj | 25 ++++++++++++++++++++++ man/htmltidy.Rd | 14 ++++++++++++ man/tidy.Rd | 15 +++++++++++++ src/.gitignore | 3 +++ src/Makevars | 1 + src/RcppExports.cpp | 18 ++++++++++++++++ src/htmltidy.cpp | 41 ++++++++++++++++++++++++++++++++++++ tests/testthat.R | 4 ++++ tests/testthat/test-htmltidy.R | 6 ++++++ 20 files changed, 303 insertions(+) create mode 100644 .Rbuildignore create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 CONDUCT.md create mode 100644 DESCRIPTION create mode 100644 LICENSE create mode 100644 NAMESPACE create mode 100644 R/RcppExports.R create mode 100644 R/htmltidy-package.r create mode 100644 README.Rmd create mode 100644 README.md create mode 100644 htmltidy.Rproj create mode 100644 man/htmltidy.Rd create mode 100644 man/tidy.Rd create mode 100644 src/.gitignore create mode 100644 src/Makevars create mode 100644 src/RcppExports.cpp create mode 100644 src/htmltidy.cpp create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-htmltidy.R diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..e7ccc30 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,7 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^README\.Rmd$ +^README-.*\.png$ +^\.travis\.yml$ +^CONDUCT\.md$ +^README\.md$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..807ea25 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.Rproj.user +.Rhistory +.RData diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8275323 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,14 @@ +# Sample .travis.yml for R projects + +language: r +warnings_are_errors: true +sudo: required + +env: + global: + - CRAN: http://cran.rstudio.com + +notifications: + email: + on_success: change + on_failure: change diff --git a/CONDUCT.md b/CONDUCT.md new file mode 100644 index 0000000..52a673e --- /dev/null +++ b/CONDUCT.md @@ -0,0 +1,25 @@ +# Contributor Code of Conduct + +As contributors and maintainers of this project, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating documentation, +submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free experience for +everyone, regardless of level of experience, gender, gender identity and expression, +sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. + +Examples of unacceptable behavior by participants include the use of sexual language or +imagery, derogatory comments or personal attacks, trolling, public or private harassment, +insults, or other unprofessional conduct. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, +commits, code, wiki edits, issues, and other contributions that are not aligned to this +Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed +from the project team. + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by +opening an issue or contacting one or more of the project maintainers. + +This Code of Conduct is adapted from the Contributor Covenant +(http:contributor-covenant.org), version 1.0.0, available at +http://contributor-covenant.org/version/1/0/0/ diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..c1fb5fb --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,11 @@ +Package: htmltidy +Title: Clean up gnarly HTML/XML +Version: 0.0.0.9000 +Authors@R: c(person("Bob", "Rudis", email = "bob@rudis.net", role = c("aut", "cre"))) +Description: Clean up gnarly HTML/XML +Depends: R (>= 3.3.0) +License: AGPL + file LICENSE +LazyData: true +Suggests: testthat +LinkingTo: Rcpp +Imports: Rcpp diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5b085be --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2016 +COPYRIGHT HOLDER: Bob Rudis diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..819437d --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,5 @@ +# Generated by roxygen2 (4.1.1): do not edit by hand + +export(tidy) +importFrom(Rcpp,sourceCpp) +useDynLib(htmltidy) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..eee8ac8 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,11 @@ +# This file was generated by Rcpp::compileAttributes +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#' Tidy HTML/XML +#' +#' @param source length 1 character vetor containing the HTML/XML source to process +#' @export +tidy <- function(source) { + .Call('htmltidy_tidy', PACKAGE = 'htmltidy', source) +} + diff --git a/R/htmltidy-package.r b/R/htmltidy-package.r new file mode 100644 index 0000000..0fe48d0 --- /dev/null +++ b/R/htmltidy-package.r @@ -0,0 +1,8 @@ +#' Clean up gnarly HTML/XML +#' +#' @name htmltidy +#' @docType package +#' @author Bob Rudis (@@hrbrmstr) +#' @useDynLib htmltidy +#' @importFrom Rcpp sourceCpp +NULL diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..8bc5a5d --- /dev/null +++ b/README.Rmd @@ -0,0 +1,48 @@ +--- +output: rmarkdown::github_document +--- + + + +```{r, echo = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.retina=2, + fig.path = "README-" +) +``` + +`htmltidy` — Clean up gnarly HTML/XML + +NOTE: Requires [`libtidy`](http://www.html-tidy.org/) and presently is super-basic (no way to set options and pretty much only does HTML) + +The following functions are implemented: + +- `tidy` : Clean up gnarly HTML/XML + +### Installation + +```{r eval=FALSE} +devtools::install_github("hrbrmstr/htmltidy") +``` + +```{r echo=FALSE, message=FALSE, warning=FALSE, error=FALSE} +options(width=120) +``` + +### Usage + +```{r} +library(htmltidy) + +# current verison +packageVersion("htmltidy") + +cat(tidy("

google >

")) +``` + +### Code of Conduct + +Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). +By participating in this project you agree to abide by its terms. diff --git a/README.md b/README.md new file mode 100644 index 0000000..02915af --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ + + +`htmltidy` — Clean up gnarly HTML/XML + +NOTE: Requires [`libtidy`](http://www.html-tidy.org/) and presently is super-basic (no way to set options and pretty much only does HTML) + +The following functions are implemented: + +- `tidy` : Clean up gnarly HTML/XML + +### Installation + +``` r +devtools::install_github("hrbrmstr/htmltidy") +``` + +### Usage + +``` r +library(htmltidy) + +# current verison +packageVersion("htmltidy") +#> [1] '0.0.0.9000' + +cat(tidy("

google >

")) +#> +#> +#> +#> "HTML Tidy for HTML5 for Mac OS X version 5.2.0" /> +#> +#> +#> +#>

google >

+#> +#> +``` + +### Code of Conduct + +Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. diff --git a/htmltidy.Rproj b/htmltidy.Rproj new file mode 100644 index 0000000..773de7e --- /dev/null +++ b/htmltidy.Rproj @@ -0,0 +1,25 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +Encoding: UTF-8 + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace + +UseSpacesForTab: Yes +NumSpacesForTab: 2 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +PackageBuildArgs: --resave-data +PackageCheckArgs: --as-cran diff --git a/man/htmltidy.Rd b/man/htmltidy.Rd new file mode 100644 index 0000000..8b81129 --- /dev/null +++ b/man/htmltidy.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/htmltidy-package.r +\docType{package} +\name{htmltidy} +\alias{htmltidy} +\alias{htmltidy-package} +\title{Clean up gnarly HTML/XML} +\description{ +Clean up gnarly HTML/XML +} +\author{ +Bob Rudis (@hrbrmstr) +} + diff --git a/man/tidy.Rd b/man/tidy.Rd new file mode 100644 index 0000000..7cc3249 --- /dev/null +++ b/man/tidy.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{tidy} +\alias{tidy} +\title{Tidy HTML/XML} +\usage{ +tidy(source) +} +\arguments{ +\item{source}{length 1 character vetor containing the HTML/XML source to process} +} +\description{ +Tidy HTML/XML +} + diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..22034c4 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +*.dll diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 0000000..c6616f4 --- /dev/null +++ b/src/Makevars @@ -0,0 +1 @@ +PKG_LIBS=-ltidy diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..a8a2e95 --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,18 @@ +// This file was generated by Rcpp::compileAttributes +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// tidy +std::string tidy(std::string source); +RcppExport SEXP htmltidy_tidy(SEXP sourceSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< std::string >::type source(sourceSEXP); + __result = Rcpp::wrap(tidy(source)); + return __result; +END_RCPP +} diff --git a/src/htmltidy.cpp b/src/htmltidy.cpp new file mode 100644 index 0000000..7136d2c --- /dev/null +++ b/src/htmltidy.cpp @@ -0,0 +1,41 @@ +#include +#include +#include + +// http://api.html-tidy.org/tidy/tidylib_api_5.2.0/tidyenum_8h.html#a3a1401652599150188a168dade7dc150 + +//' Tidy HTML/XML +//' +//' @param source length 1 character vetor containing the HTML/XML source to process +//' @export +//[[Rcpp::export]] +std::string tidy(std::string source) { + + TidyBuffer output = {0}; + TidyBuffer errbuf = {0}; + int rc = -1; + Bool ok; + + TidyDoc tdoc = tidyCreate(); + + ok = tidyOptSetBool(tdoc, TidyXhtmlOut, yes); + + rc = tidySetErrorBuffer(tdoc, &errbuf); + + rc = tidyParseString(tdoc, source.c_str()); + + rc = tidyCleanAndRepair(tdoc); + + rc = tidyRunDiagnostics(tdoc); + + rc = tidySaveBuffer(tdoc, &output); + + std::string ret = std::string(reinterpret_cast(output.bp)); + + tidyBufFree(&output); + tidyBufFree(&errbuf); + tidyRelease(tdoc); + + return(ret); + +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..52b18f3 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(htmltidy) + +test_check("htmltidy") diff --git a/tests/testthat/test-htmltidy.R b/tests/testthat/test-htmltidy.R new file mode 100644 index 0000000..ab6f62f --- /dev/null +++ b/tests/testthat/test-htmltidy.R @@ -0,0 +1,6 @@ +context("basic functionality") +test_that("we can do something", { + + #expect_that(some_function(), is_a("data.frame")) + +})