Browse Source

Fix warning coming from URL redirection in examples

master
boB Rudis 7 years ago
parent
commit
9a1593b15d
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 5
      DESCRIPTION
  2. 4
      NEWS.md
  3. 2
      R/RcppExports.R
  4. 4
      R/tidy.r
  5. 10
      README.Rmd
  6. 53
      README.md
  7. 1
      man/highlight_styles.Rd
  8. 1
      man/htmltidy.Rd
  9. 1
      man/renderXmlview.Rd
  10. 13
      man/tidy_html.Rd
  11. 3
      man/xml_tree_view.Rd
  12. 3
      man/xml_view.Rd
  13. 3
      man/xmltreeview-shiny.Rd
  14. 1
      man/xmlviewOutput.Rd
  15. 10
      src/RcppExports.cpp

5
DESCRIPTION

@ -1,6 +1,7 @@
Package: htmltidy Package: htmltidy
Title: Tidy Up and Test XPath Queries on HTML and XML Content Title: Tidy Up and Test XPath Queries on HTML and XML Content
Version: 0.3.0 Version: 0.3.1
Encoding: UTF-8
Authors@R: c( Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre")), person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre")),
person("Dave", "Raggett", email = "dsr@w3.org", role = c("ctb", "aut"), person("Dave", "Raggett", email = "dsr@w3.org", role = c("ctb", "aut"),
@ -53,4 +54,4 @@ Imports:
XML, XML,
htmlwidgets, htmlwidgets,
htmltools htmltools
RoxygenNote: 5.0.1 RoxygenNote: 6.0.0

4
NEWS.md

@ -1,3 +1,7 @@
# htmltidy 0.3.1
* Fix warnings coming from URL redirection in examples
# htmltidy 0.3.0 # htmltidy 0.3.0
* Better error handling (fixed crashing bug in #1) * Better error handling (fixed crashing bug in #1)

2
R/RcppExports.R

@ -1,4 +1,4 @@
# This file was generated by Rcpp::compileAttributes # Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
tidy_html_int <- function(source, options, show_errors) { tidy_html_int <- function(source, options, show_errors) {

4
R/tidy.r

@ -71,7 +71,7 @@
#' cat(tidy_html(txt, option=opts)) #' cat(tidy_html(txt, option=opts))
#' #'
#' library(httr) #' library(httr)
#' res <- GET("http://rud.is/test/untidy.html") #' res <- GET("https://rud.is/test/untidy.html")
#' #'
#' # look at the original, un-tidy source #' # look at the original, un-tidy source
#' cat(content(res, as="text", encoding="UTF-8")) #' cat(content(res, as="text", encoding="UTF-8"))
@ -81,7 +81,7 @@
#' list(TidyDocType="html5", TidyWrapLen=200))) #' list(TidyDocType="html5", TidyWrapLen=200)))
#' #'
#' # but, you could also just do: #' # but, you could also just do:
#' cat(tidy_html(url("http://rud.is/test/untidy.html"))) #' cat(tidy_html(url("https://rud.is/test/untidy.html")))
tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE), verbose=FALSE) { tidy_html <- function(content, options=list(TidyXhtmlOut=TRUE), verbose=FALSE) {
UseMethod("tidy_html") UseMethod("tidy_html")
} }

10
README.Rmd

@ -64,7 +64,7 @@ library(purrr)
This is really "un-tidy" content: This is really "un-tidy" content:
```{r message=FALSE, warning=FALSE} ```{r message=FALSE, warning=FALSE}
res <- GET("http://rud.is/test/untidy.html") res <- GET("https://rud.is/test/untidy.html")
cat(content(res, as="text")) cat(content(res, as="text"))
``` ```
@ -85,14 +85,14 @@ cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200
NOTE: you could also just have done: NOTE: you could also just have done:
```{r message=FALSE, warning=FALSE} ```{r message=FALSE, warning=FALSE}
cat(tidy_html(url("http://rud.is/test/untidy.html"), cat(tidy_html(url("https://rud.is/test/untidy.html"),
list(TidyDocType="html5", TidyWrapLen=200))) list(TidyDocType="html5", TidyWrapLen=200)))
``` ```
You'll see that this differs substantially from the mangling `libxml2` does (via `read_html()`): You'll see that this differs substantially from the mangling `libxml2` does (via `read_html()`):
```{r message=FALSE, warning=FALSE} ```{r message=FALSE, warning=FALSE}
pg <- read_html("http://rud.is/test/untidy.html") pg <- read_html("https://rud.is/test/untidy.html")
cat(toString(pg)) cat(toString(pg))
``` ```
@ -105,13 +105,13 @@ tidy_html(content(res, as="text", encoding="UTF-8"))
tidy_html(content(res, as="parsed", encoding="UTF-8")) tidy_html(content(res, as="parsed", encoding="UTF-8"))
tidy_html(htmlParse("http://rud.is/test/untidy.html")) tidy_html(htmlParse("https://rud.is/test/untidy.html"))
``` ```
And, show the markup errors: And, show the markup errors:
```{r message=FALSE, warning=FALSE} ```{r message=FALSE, warning=FALSE}
invisible(tidy_html(url("http://rud.is/test/untidy.html"), verbose=TRUE)) invisible(tidy_html(url("https://rud.is/test/untidy.html"), verbose=TRUE))
``` ```
### Testing Options ### Testing Options

53
README.md

@ -31,7 +31,7 @@ library(htmltidy)
# current verison # current verison
packageVersion("htmltidy") packageVersion("htmltidy")
## [1] '0.3.0' ## [1] '0.3.1'
library(XML) library(XML)
library(xml2) library(xml2)
@ -42,7 +42,7 @@ library(purrr)
This is really "un-tidy" content: This is really "un-tidy" content:
``` r ``` r
res <- GET("http://rud.is/test/untidy.html") res <- GET("https://rud.is/test/untidy.html")
cat(content(res, as="text")) cat(content(res, as="text"))
## <head> ## <head>
## <style> ## <style>
@ -62,16 +62,20 @@ It can handle the `response` object directly:
``` r ``` r
cat(tidy_html(res, list(TidyDocType="html5", TidyWrapLen=200))) cat(tidy_html(res, list(TidyDocType="html5", TidyWrapLen=200)))
## <!DOCTYPE html> ## <!DOCTYPE html>
## <html> ## <html xmlns="http://www.w3.org/1999/xhtml">
## <head> ## <head>
## <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0"> ## <meta name="generator" content=
## "HTML Tidy for HTML5 for R version 5.0.0" />
## <style> ## <style>
## <![CDATA[
## body { font-family: sans-serif; } ## body { font-family: sans-serif; }
## ]]>
## </style> ## </style>
## <title></title> ## <title></title>
## </head> ## </head>
## <body> ## <body>
## <b>This is some <i>really</i> poorly formatted HTML as is this <span id="sp">portion</span></b> ## <b>This is some <i>really</i> poorly formatted HTML as is this
## <span id="sp">portion</span></b>
## <div><span id="sp"></span></div> ## <div><span id="sp"></span></div>
## </body> ## </body>
## </html> ## </html>
@ -100,7 +104,7 @@ cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200
NOTE: you could also just have done: NOTE: you could also just have done:
``` r ``` r
cat(tidy_html(url("http://rud.is/test/untidy.html"), cat(tidy_html(url("https://rud.is/test/untidy.html"),
list(TidyDocType="html5", TidyWrapLen=200))) list(TidyDocType="html5", TidyWrapLen=200)))
## <!DOCTYPE html> ## <!DOCTYPE html>
## <html> ## <html>
@ -121,16 +125,22 @@ cat(tidy_html(url("http://rud.is/test/untidy.html"),
You'll see that this differs substantially from the mangling `libxml2` does (via `read_html()`): You'll see that this differs substantially from the mangling `libxml2` does (via `read_html()`):
``` r ``` r
pg <- read_html("http://rud.is/test/untidy.html") pg <- read_html("https://rud.is/test/untidy.html")
cat(toString(pg)) cat(toString(pg))
## <?xml version="1.0" standalone="yes"?>
## <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> ## <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
## <html><head><style><![CDATA[ ## <html>
## <head>
## <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
## <style>
## body { font-family: sans-serif; } ## body { font-family: sans-serif; }
## ]]></style></head><body> ## </style>
## </head>
## <body>
## <b>This is <b>some <i>really </i> poorly formatted HTML</b> ## <b>This is <b>some <i>really </i> poorly formatted HTML</b>
## ##
## as is this <span id="sp">portion<div/></span></b></body></html> ## as is this <span id="sp">portion<div></div></span></b>
## </body>
## </html>
``` ```
It can also deal with "raw" and parsed objects: It can also deal with "raw" and parsed objects:
@ -150,25 +160,18 @@ tidy_html(content(res, as="text", encoding="UTF-8"))
tidy_html(content(res, as="parsed", encoding="UTF-8")) tidy_html(content(res, as="parsed", encoding="UTF-8"))
## {xml_document} ## {xml_document}
## <html xmlns="http://www.w3.org/1999/xhtml"> ## <html xmlns="http://www.w3.org/1999/xhtml">
## [1] <head>\n <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n <meta name="generator" content ... ## [1] <head>\n<meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">\n<meta http-equiv="Content-Type ...
## [2] <body>\n<b>This is some <i>really</i> poorly formatted HTML as is this\n<span id="sp">portion</span></b>\n</body> ## [2] <body>\n<b>This is some <i>really</i> poorly formatted HTML as is this\n<span id="sp">portion</span></b>\n<div><s ...
tidy_html(htmlParse("http://rud.is/test/untidy.html")) tidy_html(htmlParse("https://rud.is/test/untidy.html"))
## <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> ## <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
## <html xmlns="http://www.w3.org/1999/xhtml"> ## <html xmlns="http://www.w3.org/1999/xhtml">
## <head> ## <head>
## <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0"> ## <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">
## <style>
## <![CDATA[
## body { font-family: sans-serif; }
## ]]>
## </style>
## <title></title> ## <title></title>
## </head> ## </head>
## <body> ## <body>
## <b>This is some <i>really</i> poorly formatted HTML as is this ## <p>https://rud.is/test/untidy.html</p>
## <span id="sp">portion</span></b>
## <div><span id="sp"></span></div>
## </body> ## </body>
## </html> ## </html>
## ##
@ -177,7 +180,7 @@ tidy_html(htmlParse("http://rud.is/test/untidy.html"))
And, show the markup errors: And, show the markup errors:
``` r ``` r
invisible(tidy_html(url("http://rud.is/test/untidy.html"), verbose=TRUE)) invisible(tidy_html(url("https://rud.is/test/untidy.html"), verbose=TRUE))
## line 1 column 1 - Warning: missing <!DOCTYPE> declaration ## line 1 column 1 - Warning: missing <!DOCTYPE> declaration
## line 1 column 68 - Warning: nested emphasis <b> ## line 1 column 68 - Warning: nested emphasis <b>
## line 1 column 138 - Warning: missing </span> before <div> ## line 1 column 138 - Warning: missing </span> before <div>
@ -242,7 +245,7 @@ sum(map_int(book, nchar))
## [1] 207501 ## [1] 207501
system.time(tidy_book <- tidy_html(book)) system.time(tidy_book <- tidy_html(book))
## user system elapsed ## user system elapsed
## 0.021 0.001 0.022 ## 0.023 0.001 0.024
``` ```
(It's usually between 20 & 25 milliseconds to process those 202 kilobytes of HTML.) Not too shabby. (It's usually between 20 & 25 milliseconds to process those 202 kilobytes of HTML.) Not too shabby.

1
man/highlight_styles.Rd

@ -17,4 +17,3 @@ highlight_styles()
See \url{https://highlightjs.org/static/demo/} for a demo of all See \url{https://highlightjs.org/static/demo/} for a demo of all
highlight.js styles highlight.js styles
} }

1
man/htmltidy.Rd

@ -25,4 +25,3 @@ and 'xml-viewer', respectively.
\author{ \author{
Bob Rudis (bob@rud.is) Bob Rudis (bob@rud.is)
} }

1
man/renderXmlview.Rd

@ -16,4 +16,3 @@ renderXmlview(expr, env = parent.frame(), quoted = FALSE)
\description{ \description{
Widget render function for use in Shiny Widget render function for use in Shiny
} }

13
man/tidy_html.Rd

@ -1,14 +1,14 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/response.r, R/tidy.r % Please edit documentation in R/response.r, R/tidy.r
\name{tidy_html.response} \name{tidy_html.response}
\alias{tidy_html.response}
\alias{tidy_html} \alias{tidy_html}
\alias{tidy_html.HTMLInternalDocument}
\alias{tidy_html.character}
\alias{tidy_html.connection}
\alias{tidy_html.default} \alias{tidy_html.default}
\alias{tidy_html.character}
\alias{tidy_html.raw} \alias{tidy_html.raw}
\alias{tidy_html.response}
\alias{tidy_html.xml_document} \alias{tidy_html.xml_document}
\alias{tidy_html.HTMLInternalDocument}
\alias{tidy_html.connection}
\title{Tidy or "Pretty Print" HTML/XHTML Documents} \title{Tidy or "Pretty Print" HTML/XHTML Documents}
\usage{ \usage{
\method{tidy_html}{response}(content, options = list(TidyXhtmlOut = TRUE), \method{tidy_html}{response}(content, options = list(TidyXhtmlOut = TRUE),
@ -110,7 +110,7 @@ txt <- paste0(
cat(tidy_html(txt, option=opts)) cat(tidy_html(txt, option=opts))
library(httr) library(httr)
res <- GET("http://rud.is/test/untidy.html") res <- GET("https://rud.is/test/untidy.html")
# look at the original, un-tidy source # look at the original, un-tidy source
cat(content(res, as="text", encoding="UTF-8")) cat(content(res, as="text", encoding="UTF-8"))
@ -120,7 +120,7 @@ cat(tidy_html(content(res, as="text", encoding="UTF-8"),
list(TidyDocType="html5", TidyWrapLen=200))) list(TidyDocType="html5", TidyWrapLen=200)))
# but, you could also just do: # but, you could also just do:
cat(tidy_html(url("http://rud.is/test/untidy.html"))) cat(tidy_html(url("https://rud.is/test/untidy.html")))
} }
\references{ \references{
\url{http://api.html-tidy.org/tidy/quickref_5.1.25.html} & \url{http://api.html-tidy.org/tidy/quickref_5.1.25.html} &
@ -128,4 +128,3 @@ cat(tidy_html(url("http://rud.is/test/untidy.html")))
for definitions of the options supported above and \url{https://www.w3.org/People/Raggett/tidy/} for definitions of the options supported above and \url{https://www.w3.org/People/Raggett/tidy/}
for an explanation of what "tidy" HTML is and some canonical examples of what it can do. for an explanation of what "tidy" HTML is and some canonical examples of what it can do.
} }

3
man/xml_tree_view.Rd

@ -1,8 +1,8 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xmltreeview.R % Please edit documentation in R/xmltreeview.R
\name{xml_tree_view} \name{xml_tree_view}
\alias{html_tree_view}
\alias{xml_tree_view} \alias{xml_tree_view}
\alias{html_tree_view}
\title{HTML/XML tree viewer} \title{HTML/XML tree viewer}
\usage{ \usage{
xml_tree_view(doc = NULL, scroll = FALSE, elementId = NULL, xml_tree_view(doc = NULL, scroll = FALSE, elementId = NULL,
@ -57,4 +57,3 @@ htmltools::browsable(
\references{ \references{
\href{https://github.com/juliangruber/xml-viewer}{xml-viewer} \href{https://github.com/juliangruber/xml-viewer}{xml-viewer}
} }

3
man/xml_view.Rd

@ -1,8 +1,8 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xmlview.R % Please edit documentation in R/xmlview.R
\name{xml_view} \name{xml_view}
\alias{html_view}
\alias{xml_view} \alias{xml_view}
\alias{html_view}
\title{HTML/XML pretty printer and viewer} \title{HTML/XML pretty printer and viewer}
\usage{ \usage{
xml_view(doc, style = "default", scroll = FALSE, add_filter = FALSE, xml_view(doc, style = "default", scroll = FALSE, add_filter = FALSE,
@ -88,4 +88,3 @@ xml_text(xml2::xml_find_all(doc, './/link[contains(., "soccer")]', ns=xml2::xml_
\href{https://highlightjs.org/}{highlight.js}, \href{https://highlightjs.org/}{highlight.js},
\href{http://www.eslinstructor.net/vkbeautify/}{vkbeautify} \href{http://www.eslinstructor.net/vkbeautify/}{vkbeautify}
} }

3
man/xmltreeview-shiny.Rd

@ -1,9 +1,9 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/shiny-tree-view.R % Please edit documentation in R/shiny-tree-view.R
\name{xmltreeview-shiny} \name{xmltreeview-shiny}
\alias{renderXmltreeview}
\alias{xmltreeview-shiny} \alias{xmltreeview-shiny}
\alias{xmltreeviewOutput} \alias{xmltreeviewOutput}
\alias{renderXmltreeview}
\title{Shiny bindings for xmltreeview} \title{Shiny bindings for xmltreeview}
\usage{ \usage{
xmltreeviewOutput(outputId, width = "100\%", height = "400px") xmltreeviewOutput(outputId, width = "100\%", height = "400px")
@ -28,4 +28,3 @@ is useful if you want to save an expression in a variable.}
Output and render functions for using xmltreeview within Shiny Output and render functions for using xmltreeview within Shiny
applications and interactive Rmd documents. applications and interactive Rmd documents.
} }

1
man/xmlviewOutput.Rd

@ -16,4 +16,3 @@ xmlviewOutput(outputId, width = "100\%", height = "400px")
\description{ \description{
Widget output function for use in Shiny Widget output function for use in Shiny
} }

10
src/RcppExports.cpp

@ -1,4 +1,4 @@
// This file was generated by Rcpp::compileAttributes // Generated by using Rcpp::compileAttributes() -> do not edit by hand
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#include <Rcpp.h> #include <Rcpp.h>
@ -9,12 +9,12 @@ using namespace Rcpp;
Rcpp::CharacterVector tidy_html_int(std::string source, Rcpp::List options, bool show_errors); Rcpp::CharacterVector tidy_html_int(std::string source, Rcpp::List options, bool show_errors);
RcppExport SEXP htmltidy_tidy_html_int(SEXP sourceSEXP, SEXP optionsSEXP, SEXP show_errorsSEXP) { RcppExport SEXP htmltidy_tidy_html_int(SEXP sourceSEXP, SEXP optionsSEXP, SEXP show_errorsSEXP) {
BEGIN_RCPP BEGIN_RCPP
Rcpp::RObject __result; Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope __rngScope; Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string >::type source(sourceSEXP); Rcpp::traits::input_parameter< std::string >::type source(sourceSEXP);
Rcpp::traits::input_parameter< Rcpp::List >::type options(optionsSEXP); Rcpp::traits::input_parameter< Rcpp::List >::type options(optionsSEXP);
Rcpp::traits::input_parameter< bool >::type show_errors(show_errorsSEXP); Rcpp::traits::input_parameter< bool >::type show_errors(show_errorsSEXP);
__result = Rcpp::wrap(tidy_html_int(source, options, show_errors)); rcpp_result_gen = Rcpp::wrap(tidy_html_int(source, options, show_errors));
return __result; return rcpp_result_gen;
END_RCPP END_RCPP
} }

Loading…
Cancel
Save