Bob Rudis
8 years ago
commit
fa0a3ca129
20 changed files with 303 additions and 0 deletions
@ -0,0 +1,7 @@ |
|||||
|
^.*\.Rproj$ |
||||
|
^\.Rproj\.user$ |
||||
|
^README\.Rmd$ |
||||
|
^README-.*\.png$ |
||||
|
^\.travis\.yml$ |
||||
|
^CONDUCT\.md$ |
||||
|
^README\.md$ |
@ -0,0 +1,3 @@ |
|||||
|
.Rproj.user |
||||
|
.Rhistory |
||||
|
.RData |
@ -0,0 +1,14 @@ |
|||||
|
# Sample .travis.yml for R projects |
||||
|
|
||||
|
language: r |
||||
|
warnings_are_errors: true |
||||
|
sudo: required |
||||
|
|
||||
|
env: |
||||
|
global: |
||||
|
- CRAN: http://cran.rstudio.com |
||||
|
|
||||
|
notifications: |
||||
|
email: |
||||
|
on_success: change |
||||
|
on_failure: change |
@ -0,0 +1,25 @@ |
|||||
|
# Contributor Code of Conduct |
||||
|
|
||||
|
As contributors and maintainers of this project, we pledge to respect all people who |
||||
|
contribute through reporting issues, posting feature requests, updating documentation, |
||||
|
submitting pull requests or patches, and other activities. |
||||
|
|
||||
|
We are committed to making participation in this project a harassment-free experience for |
||||
|
everyone, regardless of level of experience, gender, gender identity and expression, |
||||
|
sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. |
||||
|
|
||||
|
Examples of unacceptable behavior by participants include the use of sexual language or |
||||
|
imagery, derogatory comments or personal attacks, trolling, public or private harassment, |
||||
|
insults, or other unprofessional conduct. |
||||
|
|
||||
|
Project maintainers have the right and responsibility to remove, edit, or reject comments, |
||||
|
commits, code, wiki edits, issues, and other contributions that are not aligned to this |
||||
|
Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed |
||||
|
from the project team. |
||||
|
|
||||
|
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by |
||||
|
opening an issue or contacting one or more of the project maintainers. |
||||
|
|
||||
|
This Code of Conduct is adapted from the Contributor Covenant |
||||
|
(http:contributor-covenant.org), version 1.0.0, available at |
||||
|
http://contributor-covenant.org/version/1/0/0/ |
@ -0,0 +1,11 @@ |
|||||
|
Package: htmltidy |
||||
|
Title: Clean up gnarly HTML/XML |
||||
|
Version: 0.0.0.9000 |
||||
|
Authors@R: c(person("Bob", "Rudis", email = "bob@rudis.net", role = c("aut", "cre"))) |
||||
|
Description: Clean up gnarly HTML/XML |
||||
|
Depends: R (>= 3.3.0) |
||||
|
License: AGPL + file LICENSE |
||||
|
LazyData: true |
||||
|
Suggests: testthat |
||||
|
LinkingTo: Rcpp |
||||
|
Imports: Rcpp |
@ -0,0 +1,2 @@ |
|||||
|
YEAR: 2016 |
||||
|
COPYRIGHT HOLDER: Bob Rudis |
@ -0,0 +1,5 @@ |
|||||
|
# Generated by roxygen2 (4.1.1): do not edit by hand |
||||
|
|
||||
|
export(tidy) |
||||
|
importFrom(Rcpp,sourceCpp) |
||||
|
useDynLib(htmltidy) |
@ -0,0 +1,11 @@ |
|||||
|
# This file was generated by Rcpp::compileAttributes |
||||
|
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 |
||||
|
|
||||
|
#' Tidy HTML/XML |
||||
|
#' |
||||
|
#' @param source length 1 character vetor containing the HTML/XML source to process |
||||
|
#' @export |
||||
|
tidy <- function(source) { |
||||
|
.Call('htmltidy_tidy', PACKAGE = 'htmltidy', source) |
||||
|
} |
||||
|
|
@ -0,0 +1,8 @@ |
|||||
|
#' Clean up gnarly HTML/XML |
||||
|
#' |
||||
|
#' @name htmltidy |
||||
|
#' @docType package |
||||
|
#' @author Bob Rudis (@@hrbrmstr) |
||||
|
#' @useDynLib htmltidy |
||||
|
#' @importFrom Rcpp sourceCpp |
||||
|
NULL |
@ -0,0 +1,48 @@ |
|||||
|
--- |
||||
|
output: rmarkdown::github_document |
||||
|
--- |
||||
|
|
||||
|
<!-- README.md is generated from README.Rmd. Please edit that file --> |
||||
|
|
||||
|
```{r, echo = FALSE} |
||||
|
knitr::opts_chunk$set( |
||||
|
collapse = TRUE, |
||||
|
comment = "#>", |
||||
|
fig.retina=2, |
||||
|
fig.path = "README-" |
||||
|
) |
||||
|
``` |
||||
|
|
||||
|
`htmltidy` — Clean up gnarly HTML/XML |
||||
|
|
||||
|
NOTE: Requires [`libtidy`](http://www.html-tidy.org/) and presently is super-basic (no way to set options and pretty much only does HTML) |
||||
|
|
||||
|
The following functions are implemented: |
||||
|
|
||||
|
- `tidy` : Clean up gnarly HTML/XML |
||||
|
|
||||
|
### Installation |
||||
|
|
||||
|
```{r eval=FALSE} |
||||
|
devtools::install_github("hrbrmstr/htmltidy") |
||||
|
``` |
||||
|
|
||||
|
```{r echo=FALSE, message=FALSE, warning=FALSE, error=FALSE} |
||||
|
options(width=120) |
||||
|
``` |
||||
|
|
||||
|
### Usage |
||||
|
|
||||
|
```{r} |
||||
|
library(htmltidy) |
||||
|
|
||||
|
# current verison |
||||
|
packageVersion("htmltidy") |
||||
|
|
||||
|
cat(tidy("<b><p><a href='http://google.com'>google ></a></p></b>")) |
||||
|
``` |
||||
|
|
||||
|
### Code of Conduct |
||||
|
|
||||
|
Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). |
||||
|
By participating in this project you agree to abide by its terms. |
@ -0,0 +1,42 @@ |
|||||
|
|
||||
|
<!-- README.md is generated from README.Rmd. Please edit that file --> |
||||
|
`htmltidy` — Clean up gnarly HTML/XML |
||||
|
|
||||
|
NOTE: Requires [`libtidy`](http://www.html-tidy.org/) and presently is super-basic (no way to set options and pretty much only does HTML) |
||||
|
|
||||
|
The following functions are implemented: |
||||
|
|
||||
|
- `tidy` : Clean up gnarly HTML/XML |
||||
|
|
||||
|
### Installation |
||||
|
|
||||
|
``` r |
||||
|
devtools::install_github("hrbrmstr/htmltidy") |
||||
|
``` |
||||
|
|
||||
|
### Usage |
||||
|
|
||||
|
``` r |
||||
|
library(htmltidy) |
||||
|
|
||||
|
# current verison |
||||
|
packageVersion("htmltidy") |
||||
|
#> [1] '0.0.0.9000' |
||||
|
|
||||
|
cat(tidy("<b><p><a href='http://google.com'>google ></a></p></b>")) |
||||
|
#> <!DOCTYPE html> |
||||
|
#> <html xmlns="http://www.w3.org/1999/xhtml"> |
||||
|
#> <head> |
||||
|
#> <meta name="generator" content= |
||||
|
#> "HTML Tidy for HTML5 for Mac OS X version 5.2.0" /> |
||||
|
#> <title></title> |
||||
|
#> </head> |
||||
|
#> <body> |
||||
|
#> <p><b><a href='http://google.com'>google ></a></b></p> |
||||
|
#> </body> |
||||
|
#> </html> |
||||
|
``` |
||||
|
|
||||
|
### Code of Conduct |
||||
|
|
||||
|
Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. |
@ -0,0 +1,25 @@ |
|||||
|
Version: 1.0 |
||||
|
|
||||
|
RestoreWorkspace: No |
||||
|
SaveWorkspace: No |
||||
|
AlwaysSaveHistory: Default |
||||
|
|
||||
|
EnableCodeIndexing: Yes |
||||
|
Encoding: UTF-8 |
||||
|
|
||||
|
AutoAppendNewline: Yes |
||||
|
StripTrailingWhitespace: Yes |
||||
|
|
||||
|
BuildType: Package |
||||
|
PackageUseDevtools: Yes |
||||
|
PackageInstallArgs: --no-multiarch --with-keep.source |
||||
|
PackageRoxygenize: rd,collate,namespace |
||||
|
|
||||
|
UseSpacesForTab: Yes |
||||
|
NumSpacesForTab: 2 |
||||
|
|
||||
|
RnwWeave: Sweave |
||||
|
LaTeX: pdfLaTeX |
||||
|
|
||||
|
PackageBuildArgs: --resave-data |
||||
|
PackageCheckArgs: --as-cran |
@ -0,0 +1,14 @@ |
|||||
|
% Generated by roxygen2 (4.1.1): do not edit by hand |
||||
|
% Please edit documentation in R/htmltidy-package.r |
||||
|
\docType{package} |
||||
|
\name{htmltidy} |
||||
|
\alias{htmltidy} |
||||
|
\alias{htmltidy-package} |
||||
|
\title{Clean up gnarly HTML/XML} |
||||
|
\description{ |
||||
|
Clean up gnarly HTML/XML |
||||
|
} |
||||
|
\author{ |
||||
|
Bob Rudis (@hrbrmstr) |
||||
|
} |
||||
|
|
@ -0,0 +1,15 @@ |
|||||
|
% Generated by roxygen2 (4.1.1): do not edit by hand |
||||
|
% Please edit documentation in R/RcppExports.R |
||||
|
\name{tidy} |
||||
|
\alias{tidy} |
||||
|
\title{Tidy HTML/XML} |
||||
|
\usage{ |
||||
|
tidy(source) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{source}{length 1 character vetor containing the HTML/XML source to process} |
||||
|
} |
||||
|
\description{ |
||||
|
Tidy HTML/XML |
||||
|
} |
||||
|
|
@ -0,0 +1,3 @@ |
|||||
|
*.o |
||||
|
*.so |
||||
|
*.dll |
@ -0,0 +1 @@ |
|||||
|
PKG_LIBS=-ltidy |
@ -0,0 +1,18 @@ |
|||||
|
// This file was generated by Rcpp::compileAttributes
|
||||
|
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
|
||||
|
|
||||
|
#include <Rcpp.h> |
||||
|
|
||||
|
using namespace Rcpp; |
||||
|
|
||||
|
// tidy
|
||||
|
std::string tidy(std::string source); |
||||
|
RcppExport SEXP htmltidy_tidy(SEXP sourceSEXP) { |
||||
|
BEGIN_RCPP |
||||
|
Rcpp::RObject __result; |
||||
|
Rcpp::RNGScope __rngScope; |
||||
|
Rcpp::traits::input_parameter< std::string >::type source(sourceSEXP); |
||||
|
__result = Rcpp::wrap(tidy(source)); |
||||
|
return __result; |
||||
|
END_RCPP |
||||
|
} |
@ -0,0 +1,41 @@ |
|||||
|
#include <Rcpp.h> |
||||
|
#include <tidy.h> |
||||
|
#include <tidybuffio.h> |
||||
|
|
||||
|
// http://api.html-tidy.org/tidy/tidylib_api_5.2.0/tidyenum_8h.html#a3a1401652599150188a168dade7dc150
|
||||
|
|
||||
|
//' Tidy HTML/XML
|
||||
|
//'
|
||||
|
//' @param source length 1 character vetor containing the HTML/XML source to process
|
||||
|
//' @export
|
||||
|
//[[Rcpp::export]]
|
||||
|
std::string tidy(std::string source) { |
||||
|
|
||||
|
TidyBuffer output = {0}; |
||||
|
TidyBuffer errbuf = {0}; |
||||
|
int rc = -1; |
||||
|
Bool ok; |
||||
|
|
||||
|
TidyDoc tdoc = tidyCreate(); |
||||
|
|
||||
|
ok = tidyOptSetBool(tdoc, TidyXhtmlOut, yes); |
||||
|
|
||||
|
rc = tidySetErrorBuffer(tdoc, &errbuf); |
||||
|
|
||||
|
rc = tidyParseString(tdoc, source.c_str()); |
||||
|
|
||||
|
rc = tidyCleanAndRepair(tdoc); |
||||
|
|
||||
|
rc = tidyRunDiagnostics(tdoc); |
||||
|
|
||||
|
rc = tidySaveBuffer(tdoc, &output); |
||||
|
|
||||
|
std::string ret = std::string(reinterpret_cast<const char*>(output.bp)); |
||||
|
|
||||
|
tidyBufFree(&output); |
||||
|
tidyBufFree(&errbuf); |
||||
|
tidyRelease(tdoc); |
||||
|
|
||||
|
return(ret); |
||||
|
|
||||
|
} |
@ -0,0 +1,4 @@ |
|||||
|
library(testthat) |
||||
|
library(htmltidy) |
||||
|
|
||||
|
test_check("htmltidy") |
@ -0,0 +1,6 @@ |
|||||
|
context("basic functionality") |
||||
|
test_that("we can do something", { |
||||
|
|
||||
|
#expect_that(some_function(), is_a("data.frame")) |
||||
|
|
||||
|
}) |
Loading…
Reference in new issue