Browse Source

Fix warning coming from URL redirection in examples

tags/v0.4.0
boB Rudis 7 years ago
parent
commit
30acf559c5
  1. 5
      DESCRIPTION
  2. 4
      NEWS.md
  3. 2
      R/RcppExports.R
  4. 4
      R/tidy.r
  5. 10
      README.Rmd
  6. 53
      README.md
  7. 1
      man/highlight_styles.Rd
  8. 1
      man/htmltidy.Rd
  9. 1
      man/renderXmlview.Rd
  10. 13
      man/tidy_html.Rd
  11. 3
      man/xml_tree_view.Rd
  12. 3
      man/xml_view.Rd
  13. 3
      man/xmltreeview-shiny.Rd
  14. 1
      man/xmlviewOutput.Rd
  15. 10
      src/RcppExports.cpp

5
DESCRIPTION

@ -1,6 +1,7 @@
Package: htmltidy
Title: Tidy Up and Test XPath Queries on HTML and XML Content
Version: 0.3.0
Version: 0.3.1
Encoding: UTF-8
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre")),
person("Dave", "Raggett", email = "dsr@w3.org", role = c("ctb", "aut"),
@ -53,4 +54,4 @@ Imports:
XML,
htmlwidgets,
htmltools
RoxygenNote: 5.0.1
RoxygenNote: 6.0.0

4
NEWS.md

@ -1,3 +1,7 @@
# htmltidy 0.3.1
* Fix warnings coming from URL redirection in examples
# htmltidy 0.3.0
* Better error handling (fixed crashing bug in #1)

2
R/RcppExports.R

@ -1,4 +1,4 @@
# This file was generated by Rcpp::compileAttributes
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
tidy_html_int <- function(source, options, show_errors) {

4
R/tidy.r

@ -71,7 +71,7 @@
#' cat(tidy_html(txt, option=opts))
#'
#' library(httr)
#' res <- GET("http://rud.is/test/untidy.html")
#' res <- GET("https://rud.is/test/untidy.html")
#'
#' # look at the original, un-tidy source
#' cat(content(res, as="text", encoding="UTF-8"))
@ -81,7 +81,7 @@
#' list(TidyDocType="html5", TidyWrapLen=200)))
#'
#' # but, you could also just do:
#' cat(tidy_html(url("http://rud.is/test/untidy.html")))
#' cat(tidy_html(url("https://rud.is/test/untidy.html")))
tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE), verbose=FALSE) {
UseMethod("tidy_html")
}

10
README.Rmd

@ -64,7 +64,7 @@ library(purrr)
This is really "un-tidy" content:
```{r message=FALSE, warning=FALSE}
res <- GET("http://rud.is/test/untidy.html")
res <- GET("https://rud.is/test/untidy.html")
cat(content(res, as="text"))
```
@ -85,14 +85,14 @@ cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200
NOTE: you could also just have done:
```{r message=FALSE, warning=FALSE}
cat(tidy_html(url("http://rud.is/test/untidy.html"),
cat(tidy_html(url("https://rud.is/test/untidy.html"),
list(TidyDocType="html5", TidyWrapLen=200)))
```
You'll see that this differs substantially from the mangling `libxml2` does (via `read_html()`):
```{r message=FALSE, warning=FALSE}
pg <- read_html("http://rud.is/test/untidy.html")
pg <- read_html("https://rud.is/test/untidy.html")
cat(toString(pg))
```
@ -105,13 +105,13 @@ tidy_html(content(res, as="text", encoding="UTF-8"))
tidy_html(content(res, as="parsed", encoding="UTF-8"))
tidy_html(htmlParse("http://rud.is/test/untidy.html"))
tidy_html(htmlParse("https://rud.is/test/untidy.html"))
```
And, show the markup errors:
```{r message=FALSE, warning=FALSE}
invisible(tidy_html(url("http://rud.is/test/untidy.html"), verbose=TRUE))
invisible(tidy_html(url("https://rud.is/test/untidy.html"), verbose=TRUE))
```
### Testing Options

53
README.md

@ -31,7 +31,7 @@ library(htmltidy)
# current verison
packageVersion("htmltidy")
## [1] '0.3.0'
## [1] '0.3.1'
library(XML)
library(xml2)
@ -42,7 +42,7 @@ library(purrr)
This is really "un-tidy" content:
``` r
res <- GET("http://rud.is/test/untidy.html")
res <- GET("https://rud.is/test/untidy.html")
cat(content(res, as="text"))
## <head>
## <style>
@ -62,16 +62,20 @@ It can handle the `response` object directly:
``` r
cat(tidy_html(res, list(TidyDocType="html5", TidyWrapLen=200)))
## <!DOCTYPE html>
## <html>
## <html xmlns="http://www.w3.org/1999/xhtml">
## <head>
## <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">
## <meta name="generator" content=
## "HTML Tidy for HTML5 for R version 5.0.0" />
## <style>
## <![CDATA[
## body { font-family: sans-serif; }
## ]]>
## </style>
## <title></title>
## </head>
## <body>
## <b>This is some <i>really</i> poorly formatted HTML as is this <span id="sp">portion</span></b>
## <b>This is some <i>really</i> poorly formatted HTML as is this
## <span id="sp">portion</span></b>
## <div><span id="sp"></span></div>
## </body>
## </html>
@ -100,7 +104,7 @@ cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200
NOTE: you could also just have done:
``` r
cat(tidy_html(url("http://rud.is/test/untidy.html"),
cat(tidy_html(url("https://rud.is/test/untidy.html"),
list(TidyDocType="html5", TidyWrapLen=200)))
## <!DOCTYPE html>
## <html>
@ -121,16 +125,22 @@ cat(tidy_html(url("http://rud.is/test/untidy.html"),
You'll see that this differs substantially from the mangling `libxml2` does (via `read_html()`):
``` r
pg <- read_html("http://rud.is/test/untidy.html")
pg <- read_html("https://rud.is/test/untidy.html")
cat(toString(pg))
## <?xml version="1.0" standalone="yes"?>
## <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
## <html><head><style><![CDATA[
## <html>
## <head>
## <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
## <style>
## body { font-family: sans-serif; }
## ]]></style></head><body>
## </style>
## </head>
## <body>
## <b>This is <b>some <i>really </i> poorly formatted HTML</b>
##
## as is this <span id="sp">portion<div/></span></b></body></html>
## as is this <span id="sp">portion<div></div></span></b>
## </body>
## </html>
```
It can also deal with "raw" and parsed objects:
@ -150,25 +160,18 @@ tidy_html(content(res, as="text", encoding="UTF-8"))
tidy_html(content(res, as="parsed", encoding="UTF-8"))
## {xml_document}
## <html xmlns="http://www.w3.org/1999/xhtml">
## [1] <head>\n <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n <meta name="generator" content ...
## [2] <body>\n<b>This is some <i>really</i> poorly formatted HTML as is this\n<span id="sp">portion</span></b>\n</body>
## [1] <head>\n<meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">\n<meta http-equiv="Content-Type ...
## [2] <body>\n<b>This is some <i>really</i> poorly formatted HTML as is this\n<span id="sp">portion</span></b>\n<div><s ...
tidy_html(htmlParse("http://rud.is/test/untidy.html"))
## <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
tidy_html(htmlParse("https://rud.is/test/untidy.html"))
## <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
## <html xmlns="http://www.w3.org/1999/xhtml">
## <head>
## <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">
## <style>
## <![CDATA[
## body { font-family: sans-serif; }
## ]]>
## </style>
## <title></title>
## </head>
## <body>
## <b>This is some <i>really</i> poorly formatted HTML as is this
## <span id="sp">portion</span></b>
## <div><span id="sp"></span></div>
## <p>https://rud.is/test/untidy.html</p>
## </body>
## </html>
##
@ -177,7 +180,7 @@ tidy_html(htmlParse("http://rud.is/test/untidy.html"))
And, show the markup errors:
``` r
invisible(tidy_html(url("http://rud.is/test/untidy.html"), verbose=TRUE))
invisible(tidy_html(url("https://rud.is/test/untidy.html"), verbose=TRUE))
## line 1 column 1 - Warning: missing <!DOCTYPE> declaration
## line 1 column 68 - Warning: nested emphasis <b>
## line 1 column 138 - Warning: missing </span> before <div>
@ -242,7 +245,7 @@ sum(map_int(book, nchar))
## [1] 207501
system.time(tidy_book <- tidy_html(book))
## user system elapsed
## 0.021 0.001 0.022
## 0.023 0.001 0.024
```
(It's usually between 20 & 25 milliseconds to process those 202 kilobytes of HTML.) Not too shabby.

1
man/highlight_styles.Rd

@ -17,4 +17,3 @@ highlight_styles()
See \url{https://highlightjs.org/static/demo/} for a demo of all
highlight.js styles
}

1
man/htmltidy.Rd

@ -25,4 +25,3 @@ and 'xml-viewer', respectively.
\author{
Bob Rudis (bob@rud.is)
}

1
man/renderXmlview.Rd

@ -16,4 +16,3 @@ renderXmlview(expr, env = parent.frame(), quoted = FALSE)
\description{
Widget render function for use in Shiny
}

13
man/tidy_html.Rd

@ -1,14 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/response.r, R/tidy.r
\name{tidy_html.response}
\alias{tidy_html.response}
\alias{tidy_html}
\alias{tidy_html.HTMLInternalDocument}
\alias{tidy_html.character}
\alias{tidy_html.connection}
\alias{tidy_html.default}
\alias{tidy_html.character}
\alias{tidy_html.raw}
\alias{tidy_html.response}
\alias{tidy_html.xml_document}
\alias{tidy_html.HTMLInternalDocument}
\alias{tidy_html.connection}
\title{Tidy or "Pretty Print" HTML/XHTML Documents}
\usage{
\method{tidy_html}{response}(content, options = list(TidyXhtmlOut = TRUE),
@ -110,7 +110,7 @@ txt <- paste0(
cat(tidy_html(txt, option=opts))
library(httr)
res <- GET("http://rud.is/test/untidy.html")
res <- GET("https://rud.is/test/untidy.html")
# look at the original, un-tidy source
cat(content(res, as="text", encoding="UTF-8"))
@ -120,7 +120,7 @@ cat(tidy_html(content(res, as="text", encoding="UTF-8"),
list(TidyDocType="html5", TidyWrapLen=200)))
# but, you could also just do:
cat(tidy_html(url("http://rud.is/test/untidy.html")))
cat(tidy_html(url("https://rud.is/test/untidy.html")))
}
\references{
\url{http://api.html-tidy.org/tidy/quickref_5.1.25.html} &
@ -128,4 +128,3 @@ cat(tidy_html(url("http://rud.is/test/untidy.html")))
for definitions of the options supported above and \url{https://www.w3.org/People/Raggett/tidy/}
for an explanation of what "tidy" HTML is and some canonical examples of what it can do.
}

3
man/xml_tree_view.Rd

@ -1,8 +1,8 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xmltreeview.R
\name{xml_tree_view}
\alias{html_tree_view}
\alias{xml_tree_view}
\alias{html_tree_view}
\title{HTML/XML tree viewer}
\usage{
xml_tree_view(doc = NULL, scroll = FALSE, elementId = NULL,
@ -57,4 +57,3 @@ htmltools::browsable(
\references{
\href{https://github.com/juliangruber/xml-viewer}{xml-viewer}
}

3
man/xml_view.Rd

@ -1,8 +1,8 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xmlview.R
\name{xml_view}
\alias{html_view}
\alias{xml_view}
\alias{html_view}
\title{HTML/XML pretty printer and viewer}
\usage{
xml_view(doc, style = "default", scroll = FALSE, add_filter = FALSE,
@ -88,4 +88,3 @@ xml_text(xml2::xml_find_all(doc, './/link[contains(., "soccer")]', ns=xml2::xml_
\href{https://highlightjs.org/}{highlight.js},
\href{http://www.eslinstructor.net/vkbeautify/}{vkbeautify}
}

3
man/xmltreeview-shiny.Rd

@ -1,9 +1,9 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/shiny-tree-view.R
\name{xmltreeview-shiny}
\alias{renderXmltreeview}
\alias{xmltreeview-shiny}
\alias{xmltreeviewOutput}
\alias{renderXmltreeview}
\title{Shiny bindings for xmltreeview}
\usage{
xmltreeviewOutput(outputId, width = "100\%", height = "400px")
@ -28,4 +28,3 @@ is useful if you want to save an expression in a variable.}
Output and render functions for using xmltreeview within Shiny
applications and interactive Rmd documents.
}

1
man/xmlviewOutput.Rd

@ -16,4 +16,3 @@ xmlviewOutput(outputId, width = "100\%", height = "400px")
\description{
Widget output function for use in Shiny
}

10
src/RcppExports.cpp

@ -1,4 +1,4 @@
// This file was generated by Rcpp::compileAttributes
// Generated by using Rcpp::compileAttributes() -> do not edit by hand
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#include <Rcpp.h>
@ -9,12 +9,12 @@ using namespace Rcpp;
Rcpp::CharacterVector tidy_html_int(std::string source, Rcpp::List options, bool show_errors);
RcppExport SEXP htmltidy_tidy_html_int(SEXP sourceSEXP, SEXP optionsSEXP, SEXP show_errorsSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string >::type source(sourceSEXP);
Rcpp::traits::input_parameter< Rcpp::List >::type options(optionsSEXP);
Rcpp::traits::input_parameter< bool >::type show_errors(show_errorsSEXP);
__result = Rcpp::wrap(tidy_html_int(source, options, show_errors));
return __result;
rcpp_result_gen = Rcpp::wrap(tidy_html_int(source, options, show_errors));
return rcpp_result_gen;
END_RCPP
}

Loading…
Cancel
Save