You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
1.4 KiB

8 years ago
#include <Rcpp.h>
#ifdef __linux__
#include <tidy/tidy.h>
#include <tidy/buffio.h>
#endif
#ifdef __APPLE__
8 years ago
#include <tidy.h>
#include <tidybuffio.h>
#endif
8 years ago
8 years ago
// libtidy docs:
8 years ago
// http://api.html-tidy.org/tidy/tidylib_api_5.2.0/tidyenum_8h.html#a3a1401652599150188a168dade7dc150
8 years ago
// NOTE: cannot do "using namespace Rcpp;" b/c of annoying warnings about the
// ambiguity of 'yes'.
8 years ago
//' Tidy HTML/XML
//'
//' @param source length 1 character vetor containing the HTML/XML source to process
//' @export
//[[Rcpp::export]]
std::string tidy(std::string source) {
TidyBuffer output = {0};
TidyBuffer errbuf = {0};
int rc = -1;
Bool ok;
TidyDoc tdoc = tidyCreate();
ok = tidyOptSetBool(tdoc, TidyXhtmlOut, yes);
if (ok == no) Rcpp::stop("Error setting TidyHTML options");
8 years ago
rc = tidySetErrorBuffer(tdoc, &errbuf);
if (rc<0) Rcpp::stop("Error setting TidyHTML error buffer");
8 years ago
rc = tidyParseString(tdoc, source.c_str());
if (rc<0) Rcpp::stop("Error parsing source document");
8 years ago
rc = tidyCleanAndRepair(tdoc);
if (rc<0) Rcpp::stop("Error tidying source document");
8 years ago
rc = tidyRunDiagnostics(tdoc);
if (rc<0) Rcpp::stop("Error generating tidy diagnostics");
8 years ago
rc = tidySaveBuffer(tdoc, &output);
if (rc<0) Rcpp::stop("Error converting parsed document to character vector");
8 years ago
std::string ret = std::string(reinterpret_cast<const char*>(output.bp));
tidyBufFree(&output);
tidyBufFree(&errbuf);
tidyRelease(tdoc);
return(ret);
}