diff --git a/.Rbuildignore b/.Rbuildignore index 4e10fcd..349bb7a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,3 +7,4 @@ ^README\.html$ ^cran-comments\.md$ ^appveyor\.yml$ +^docs$ diff --git a/DESCRIPTION b/DESCRIPTION index cf30eb1..6d22294 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,6 +42,7 @@ Depends: R (>= 3.2.0) License: MIT + file LICENSE LazyData: true +Encoding: UTF-8 NeedsCompilation: yes Suggests: testthat, diff --git a/NEWS.md b/NEWS.md index aaa9cbb..3f6ee39 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,17 +1,18 @@ -# htmltidy 0.3.1 - +htmltidy 0.3.1 +==================== * Fix warnings coming from URL redirection in examples -# htmltidy 0.3.0 +htmltidy 0.3.0 +==================== * Better error handling (fixed crashing bug in #1) * New option to display document errors * Support for directly tidying httr::response objects * Added XML/HTML viewer & XPath query widgets -# htmltidy 0.2.0 - +htmltidy 0.2.0 +==================== * Bundled tidy-html5 library with the package * Windows compatibility * Options handling @@ -19,8 +20,8 @@ * Modified tests -# htmltidy 0.1.0 - +htmltidy 0.1.0 +==================== * Added a `NEWS.md` file to track changes to the package. * Added Debian & Ubuntu compatibility * Added basic error checking diff --git a/R/xmltreeview.R b/R/xmltreeview.R index 4a4df4f..8a34fe3 100644 --- a/R/xmltreeview.R +++ b/R/xmltreeview.R @@ -19,8 +19,8 @@ #' or used in a browser context vs an IDE viewer context. #' @export #' @references \href{https://github.com/juliangruber/xml-viewer}{xml-viewer} -#' @examples \dontrun{ -#' library(htmltidy) +#' @examples +#' if (interactive()) { #' #' # from ?xml2::read_xml #' cd <- xml2::read_xml("http://www.xmlfiles.com/examples/cd_catalog.xml") diff --git a/R/xmlview.R b/R/xmlview.R index be6551e..116e207 100644 --- a/R/xmlview.R +++ b/R/xmlview.R @@ -27,7 +27,8 @@ #' @export #' @references \href{https://highlightjs.org/}{highlight.js}, #' \href{http://www.eslinstructor.net/vkbeautify/}{vkbeautify} -#' @examples \dontrun{ +#' @examples +#' if (interactive()) { #' library(xml2) #' #' # plain text diff --git a/cran-comments.md b/cran-comments.md index efe9d58..74bc984 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -10,17 +10,13 @@ 0 errors | 0 warnings | 2 notes -* This is a new release. -* XHTML is a valid and widely used acronym - -This is a new release, so there are no reverse dependencies. - --- This fixes a fairly nasty bug that was user-identfied fairly early after release but I didn't want to bug the CRAN team -so quickly after the CRAN acceptange. This -also addes new functionality and (optionally) +so quickly after the CRAN acceptance. This +also addes new functionality (widgets for +viewing & querying XML/HTML) and (optionally) provides more informaiton on the tidying -process. +process. diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..f223441 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,292 @@ + + + +
+ + + + +htmltidy
— Tidy Up and Test XPath Queries on HTML and XML Content
Partly inspired by this SO question and because there’s a great deal of cruddy HTML out there that needs fixing to use properly when scraping data.
+It relies on a locally included version of libtidy
and works on macOS, Linux & Windows.
It also incorporates an htmlwidget
to view and test XPath queries on HTML/XML content.
The following functions are implemented:
+tidy_html
: Tidy or “Pretty Print” HTML/XHTML Documentshtml_view
: HTML/XML pretty printer and viewerxml_view
: HTML/XML pretty printer and viewerhtml_tree_view
: HTML/XML tree viewerxml_tree_view
: HTML/XML tree viewerdevtools::install_github("hrbrmstr/htmltidy")
library(htmltidy)
+
+# current verison
+packageVersion("htmltidy")
+## [1] '0.3.0'
+
+library(XML)
+library(xml2)
+library(httr)
+library(purrr)
This is really “un-tidy” content:
+res <- GET("http://rud.is/test/untidy.html")
+cat(content(res, as="text"))
+## <head>
+## <style>
+## body { font-family: sans-serif; }
+## </style>
+## </head>
+## <body>
+## <b>This is <b>some <i>really </i> poorly formatted HTML</b>
+##
+## as is this <span id="sp">portion<div>
Let’s see what tidy_html()
does to it.
It can handle the response
object directly:
cat(tidy_html(res, list(TidyDocType="html5", TidyWrapLen=200)))
+## <!DOCTYPE html>
+## <html>
+## <head>
+## <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">
+## <style>
+## body { font-family: sans-serif; }
+## </style>
+## <title></title>
+## </head>
+## <body>
+## <b>This is some <i>really</i> poorly formatted HTML as is this <span id="sp">portion</span></b>
+## <div><span id="sp"></span></div>
+## </body>
+## </html>
But, you’ll probably mostly use it on HTML you’ve identified as gnarly and already have that HTML text content handy:
+cat(tidy_html(content(res, as="text"), list(TidyDocType="html5", TidyWrapLen=200)))
+## <!DOCTYPE html>
+## <html>
+## <head>
+## <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">
+## <style>
+## body { font-family: sans-serif; }
+## </style>
+## <title></title>
+## </head>
+## <body>
+## <b>This is some <i>really</i> poorly formatted HTML as is this <span id="sp">portion</span></b>
+## <div><span id="sp"></span></div>
+## </body>
+## </html>
NOTE: you could also just have done:
+cat(tidy_html(url("http://rud.is/test/untidy.html"),
+ list(TidyDocType="html5", TidyWrapLen=200)))
+## <!DOCTYPE html>
+## <html>
+## <head>
+## <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">
+## <style>
+## body { font-family: sans-serif; }
+## </style>
+## <title></title>
+## </head>
+## <body>
+## <b>This is some <i>really</i> poorly formatted HTMLas is this <span id="sp">portion</span></b>
+## <div><span id="sp"></span></div>
+## </body>
+## </html>
You’ll see that this differs substantially from the mangling libxml2
does (via read_html()
):
pg <- read_html("http://rud.is/test/untidy.html")
+cat(toString(pg))
+## <?xml version="1.0" standalone="yes"?>
+## <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+## <html><head><style><![CDATA[
+## body { font-family: sans-serif; }
+## ]]></style></head><body>
+## <b>This is <b>some <i>really </i> poorly formatted HTML</b>
+##
+## as is this <span id="sp">portion<div/></span></b></body></html>
It can also deal with “raw” and parsed objects:
+tidy_html(content(res, as="raw"))
+## [1] 3c 21 44 4f 43 54 59 50 45 20 68 74 6d 6c 3e 0a 3c 68 74 6d 6c 20 78 6d 6c 6e 73 3d 22 68 74 74 70 3a 2f 2f 77 77
+## [39] 77 2e 77 33 2e 6f 72 67 2f 31 39 39 39 2f 78 68 74 6d 6c 22 3e 0a 3c 68 65 61 64 3e 0a 3c 6d 65 74 61 20 6e 61 6d
+## [77] 65 3d 22 67 65 6e 65 72 61 74 6f 72 22 20 63 6f 6e 74 65 6e 74 3d 0a 22 48 54 4d 4c 20 54 69 64 79 20 66 6f 72 20
+## [115] 48 54 4d 4c 35 20 66 6f 72 20 52 20 76 65 72 73 69 6f 6e 20 35 2e 30 2e 30 22 20 2f 3e 0a 3c 74 69 74 6c 65 3e 3c
+## [153] 2f 74 69 74 6c 65 3e 0a 3c 2f 68 65 61 64 3e 0a 3c 62 6f 64 79 3e 0a 3c 2f 62 6f 64 79 3e 0a 3c 2f 68 74 6d 6c 3e
+## [191] 0a
+
+tidy_html(content(res, as="text", encoding="UTF-8"))
+## [1] "<!DOCTYPE html>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta name=\"generator\" content=\n\"HTML Tidy for HTML5 for R version 5.0.0\" />\n<style>\n<![CDATA[\nbody { font-family: sans-serif; }\n]]>\n</style>\n<title></title>\n</head>\n<body>\n<b>This is some <i>really</i> poorly formatted HTML as is this\n<span id=\"sp\">portion</span></b>\n<div><span id=\"sp\"></span></div>\n</body>\n</html>\n"
+
+tidy_html(content(res, as="parsed", encoding="UTF-8"))
+## {xml_document}
+## <html xmlns="http://www.w3.org/1999/xhtml">
+## [1] <head>\n <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n <meta name="generator" content ...
+## [2] <body>\n<b>This is some <i>really</i> poorly formatted HTML as is this\n<span id="sp">portion</span></b>\n</body>
+
+tidy_html(htmlParse("http://rud.is/test/untidy.html"))
+## <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+## <html xmlns="http://www.w3.org/1999/xhtml">
+## <head>
+## <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">
+## <style>
+## <![CDATA[
+## body { font-family: sans-serif; }
+## ]]>
+## </style>
+## <title></title>
+## </head>
+## <body>
+## <b>This is some <i>really</i> poorly formatted HTML as is this
+## <span id="sp">portion</span></b>
+## <div><span id="sp"></span></div>
+## </body>
+## </html>
+##
And, show the markup errors:
+invisible(tidy_html(url("http://rud.is/test/untidy.html"), verbose=TRUE))
+## line 1 column 1 - Warning: missing <!DOCTYPE> declaration
+## line 1 column 68 - Warning: nested emphasis <b>
+## line 1 column 138 - Warning: missing </span> before <div>
+## line 1 column 68 - Warning: missing </b> before <div>
+## line 1 column 164 - Warning: inserting implicit <span>
+## line 1 column 164 - Warning: missing </span>
+## line 1 column 159 - Warning: missing </div>
+## line 1 column 1 - Warning: inserting missing 'title' element
+## line 1 column 164 - Warning: <span> anchor "sp" already defined
+## Info: Document content looks like XHTML5
+## Tidy found 9 warnings and 0 errors!
+opts <- list(TidyDocType="html5",
+ TidyMakeClean=TRUE,
+ TidyHideComments=TRUE,
+ TidyIndentContent=FALSE,
+ TidyWrapLen=200)
+
+txt <- "<html>
+<head>
+ <style>
+ p { color: red; }
+ </style>
+ <body>
+ <!-- ===== body ====== -->
+ <p>Test</p>
+
+ </body>
+ <!--Default Zone
+ -->
+ <!--Default Zone End-->
+</html>"
+
+cat(tidy_html(txt, option=opts))
+## <!DOCTYPE html>
+## <html>
+## <head>
+## <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0">
+## <style>
+## p { color: red; }
+## </style>
+## <title></title>
+## </head>
+## <body>
+## <p>Test</p>
+## </body>
+## </html>
But, you’re probably better off running it on plain HTML source.
+Since it’s C/C++-backed, it’s pretty fast:
+book <- readLines("http://singlepageappbook.com/single-page.html")
+sum(map_int(book, nchar))
+## [1] 207501
+system.time(tidy_book <- tidy_html(book))
+## user system elapsed
+## 0.021 0.001 0.022
(It’s usually between 20 & 25 milliseconds to process those 202 kilobytes of HTML.) Not too shabby.
+Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.
+ +NEWS.md
file to track changes to the package.Returns a character vector of available style sheets to use when displaying +an XML document.
+ + +highlight_styles()
+
+ See https://highlightjs.org/static/demo/ for a demo of all + highlight.js styles
++highlight_styles()#> [1] "agate" "androidstudio" +#> [3] "arta" "ascetic" +#> [5] "atelier-cave-dark" "atelier-cave-light" +#> [7] "atelier-cave.dark" "atelier-cave.light" +#> [9] "atelier-dune-dark" "atelier-dune-light" +#> [11] "atelier-dune.dark" "atelier-dune.light" +#> [13] "atelier-estuary-dark" "atelier-estuary-light" +#> [15] "atelier-estuary.dark" "atelier-estuary.light" +#> [17] "atelier-forest-dark" "atelier-forest-light" +#> [19] "atelier-forest.dark" "atelier-forest.light" +#> [21] "atelier-heath-dark" "atelier-heath-light" +#> [23] "atelier-heath.dark" "atelier-heath.light" +#> [25] "atelier-lakeside-dark" "atelier-lakeside-light" +#> [27] "atelier-lakeside.dark" "atelier-lakeside.light" +#> [29] "atelier-plateau-dark" "atelier-plateau-light" +#> [31] "atelier-plateau.dark" "atelier-plateau.light" +#> [33] "atelier-savanna-dark" "atelier-savanna-light" +#> [35] "atelier-savanna.dark" "atelier-savanna.light" +#> [37] "atelier-seaside-dark" "atelier-seaside-light" +#> [39] "atelier-seaside.dark" "atelier-seaside.light" +#> [41] "atelier-sulphurpool-dark" "atelier-sulphurpool-light" +#> [43] "atelier-sulphurpool.dark" "atelier-sulphurpool.light" +#> [45] "brown_paper" "brown-paper" +#> [47] "codepen-embed" "color-brewer" +#> [49] "dark" "darkula" +#> [51] "default" "docco" +#> [53] "far" "foundation" +#> [55] "github-gist" "github" +#> [57] "googlecode" "grayscale" +#> [59] "hopscotch" "hybrid" +#> [61] "idea" "ir_black" +#> [63] "ir-black" "kimbie.dark" +#> [65] "kimbie.light" "magula" +#> [67] "mono-blue" "monokai_sublime" +#> [69] "monokai-sublime" "monokai" +#> [71] "obsidian" "paraiso-dark" +#> [73] "paraiso-light" "paraiso.dark" +#> [75] "paraiso.light" "pojoaque" +#> [77] "railscasts" "rainbow" +#> [79] "school_book" "school-book" +#> [81] "solarized_dark" "solarized_light" +#> [83] "solarized-dark" "solarized-light" +#> [85] "sunburst" "tomorrow-night-blue" +#> [87] "tomorrow-night-bright" "tomorrow-night-eighties" +#> [89] "tomorrow-night" "tomorrow" +#> [91] "vs" "xcode" +#> [93] "zenburn" +#>
HTML documents can be beautiful and pristine. They can also be +wretched, evil, malformed demon-spawn. Now, you can tidy up that HTML and XHTML +before processing it with your favorite angle-bracket crunching tools, going beyond +the limited tidying that 'libxml2' affords in the 'XML' and 'xml2' packages and +taming even the ugliest HTML code generated by the likes of Google Docs and Microsoft +Word. It's also possible to use the functions provided to format or "pretty print" +HTML content as it is being tidied. Utilities are also included that make it +possible to view formatted and "pretty printed" HTML/XML +content from HTML/XML document objects, nodes, node sets and plain character HTML/XML +using 'vkbeautify' (by Vadim Kiryukhin) and 'highlight.js' (by Ivan Sagalaev). +Also (optionally) enables filtering of nodes via XPath or viewing an XML document +in "tree" view using 'xml-viewer' (by Julian Gruber). See +https://github.com/vkiryukhin/vkBeautify and +https://github.com/juliangruber/xml-viewer for more information about 'vkbeautify' +and 'xml-viewer', respectively.
+ + +Widget render function for use in Shiny
+ + +renderXmlview(expr, env = parent.frame(), quoted = FALSE)+ +
Pass in HTML content as either plain or raw text or parsed objects (either with the
+XML
or xml2
packages) or as an httr
response
object
+along with an options list that specifies how the content will be tidied and get back
+tidied content of the same object type as passed in to the function.
# S3 method for response +tidy_html(content, options = list(TidyXhtmlOut = TRUE), + verbose = FALSE) + +tidy_html(content, options = list(TidyXhtmlOut = TRUE), verbose = FALSE) + +# S3 method for default +tidy_html(content, options = list(TidyXhtmlOut = TRUE), + verbose = FALSE) + +# S3 method for character +tidy_html(content, options = list(TidyXhtmlOut = TRUE), + verbose = FALSE) + +# S3 method for raw +tidy_html(content, options = list(TidyXhtmlOut = TRUE), + verbose = FALSE) + +# S3 method for xml_document +tidy_html(content, options = list(TidyXhtmlOut = TRUE), + verbose = FALSE) + +# S3 method for HTMLInternalDocument +tidy_html(content, options = list(TidyXhtmlOut + = TRUE), verbose = FALSE) + +# S3 method for connection +tidy_html(content, options = list(TidyXhtmlOut = TRUE), + verbose = FALSE)+ +
xml2
+or XML
packages.FALSE
)Tidied HTML/XHTML content. The object type will be the same as that of the input type
+ except when it is a connection
, then a character vector will be returned.
The default option TixyXhtmlOut
will convert the input content to XHTML.
Currently supported options:
+TidyAltText
, TidyBodyOnly
, TidyBreakBeforeBR
,
+ TidyCoerceEndTags
, TidyDropEmptyElems
, TidyDropEmptyParas
,
+ TidyFixBackslash
, TidyFixComments
, TidyGDocClean
, TidyHideComments
,
+ TidyHtmlOut
, TidyIndentContent
, TidyJoinClasses
, TidyJoinStyles
,
+ TidyLogicalEmphasis
, TidyMakeBare
, TidyMakeClean
, TidyMark
,
+ TidyOmitOptionalTags
, TidyReplaceColor
, TidyUpperCaseAttrs
,
+ TidyUpperCaseTags
, TidyWord2000
, TidyXhtmlOut
+ TidyDoctype
, TidyInlineTags
, TidyBlockTags
,
+ TidyEmptyTags
, TidyPreTags
+ TidyIndentSpaces
, TidyTabSize
, TidyWrapLen
+File https://github.com/hrbrmstr/htmltidy/issues if there are other libtidy
+options you'd like supported.
It is likely that the most used options will be:
+TidyXhtmlOut
(logical),
+ TidyHtmlOut
(logical) and
+ TidyDocType
which should be one of "omit
",
+ "html5
", "auto
", "strict
" or "loose
".
+You can clean up Microsoft Word (2000) and Google Docs HTML via logical settings for
+TidyWord2000
and TidyGDocClean
, respectively.
It may also be advantageous to remove all comments with TidyHideComments
.
If document parsing errors are severe enough, tidy_html()
will not be able
+ to clean the document and will display the errors (this output can be captured with
+ sink()
or capture.output()
) along with a warning and return a "best effort"
+ cleaned version of the document.
http://api.html-tidy.org/tidy/quickref_5.1.25.html & + https://github.com/htacg/tidy-html5/blob/master/include/tidyenum.h + for definitions of the options supported above and https://www.w3.org/People/Raggett/tidy/ + for an explanation of what "tidy" HTML is and some canonical examples of what it can do.
++opts <- list( + TidyDocType="html5", + TidyMakeClean=TRUE, + TidyHideComments=TRUE, + TidyIndentContent=TRUE, + TidyWrapLen=200 +) + +txt <- paste0( + c("<html><head><style>p { color: red; }</style><body><!-- ===== body ====== -->", +"<p>Test</p></body><!--Default Zone --> <!--Default Zone End--></html>"), + collapse="") + +cat(tidy_html(txt, option=opts))#> <!DOCTYPE html> +#> <html> +#> <head> +#> <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0"> +#> <style> +#> p { color: red; } +#> </style> +#> <title></title> +#> </head> +#> <body> +#> <p> +#> Test +#> </p> +#> </body> +#> </html> +#>+library(httr) +res <- GET("http://rud.is/test/untidy.html") + +# look at the original, un-tidy source +cat(content(res, as="text", encoding="UTF-8"))#> <head> +#> <style> +#> body { font-family: sans-serif; } +#> </style> +#> </head> +#> <body> +#> <b>This is <b>some <i>really </i> poorly formatted HTML</b> +#> +#> as is this <span id="sp">portion<div> +#>+# see the tidied version +cat(tidy_html(content(res, as="text", encoding="UTF-8"), + list(TidyDocType="html5", TidyWrapLen=200)))#> <!DOCTYPE html> +#> <html> +#> <head> +#> <meta name="generator" content="HTML Tidy for HTML5 for R version 5.0.0"> +#> <style> +#> body { font-family: sans-serif; } +#> </style> +#> <title></title> +#> </head> +#> <body> +#> <b>This is some <i>really</i> poorly formatted HTML as is this <span id="sp">portion</span></b> +#> <div><span id="sp"></span></div> +#> </body> +#> </html> +#>+# but, you could also just do: +cat(tidy_html(url("http://rud.is/test/untidy.html")))#> <!DOCTYPE html> +#> <html xmlns="http://www.w3.org/1999/xhtml"> +#> <head> +#> <meta name="generator" content= +#> "HTML Tidy for HTML5 for R version 5.0.0" /> +#> <style> +#> <![CDATA[ +#> body { font-family: sans-serif; } +#> ]]> +#> </style> +#> <title></title> +#> </head> +#> <body> +#> <b>This is some <i>really</i> poorly formatted HTMLas is this +#> <span id="sp">portion</span></b> +#> <div><span id="sp"></span></div> +#> </body> +#> </html> +#>
This uses the xml-viewer
JavaScript module to provide a simple collapsible
+tree viewer for HTML/XML documents, nodes, node sets and plain character
+HTML/XML in an htmlwidget
pane.
xml_tree_view(doc = NULL, scroll = FALSE, elementId = NULL, + width = "100%", height = NULL) + +html_tree_view(doc = NULL, scroll = FALSE, elementId = NULL, + width = "100%", height = NULL)+ +
xml2
document/node/nodeset, an HTMLInternalDocument
/
+XMLInternalDocument
or atomic character vector of HTML/XML content<div>
holding the HTML/XML content scroll
+(TRUE
) or take up the full viewer/browser window (FALSE
).
+Default is FALSE
(take up the full viewer/browser window). If
+this is set to TRUE
, height
should be set to a value
+other than NULL
.div
widthdiv
heightLarge HTML or XML content may take some time to render properly. It is suggested + that this function be used on as minimal of a subset of HTML/XML as possible + or used in a browser context vs an IDE viewer context.
++## Not run: ------------------------------------ +# library(htmltidy) +# +# # from ?xml2::read_xml +# cd <- xml2::read_xml("http://www.xmlfiles.com/examples/cd_catalog.xml") +# +# xml_tree_view(cd) +# +# htmltools::browsable( +# htmltools::tagList( +# xml_tree_view(cd, width = "100%", height = "300px"), +# xml_view(cd) +# ) +# ) +## ---------------------------------------------
This uses the vkbeautify
and highlight.js
javascript modules to format and
+"pretty print" HTML/XML documents, nodes, node sets and plain character
+HTML/XML in an htmlwidget
pane.
xml_view(doc, style = "default", scroll = FALSE, add_filter = FALSE, + apply_xpath = NULL, elementId = NULL, width = "100%", height = NULL) + +html_view(doc, style = "default", scroll = FALSE, add_filter = FALSE, + apply_xpath = NULL, elementId = NULL, width = "100%", height = NULL)+ +
xml2
document/node/nodeset, an HTMLInternalDocument
/
+XMLInternalDocument
or atomic character vector of HTML/XML contenthiglight_styles()
)<div>
holding the HTML/XML content scroll
+(TRUE
) or take up the full viewer/browser window (FALSE
).
+Default is FALSE
(take up the full viewer/browser window). If
+this is set to TRUE
, height
should be set to a value
+other than NULL
.FALSE
)add_filter
is TRUE
then this query string will
+appear in the filter box and be applied to the passed in document.Large HTML or XML content may take some time to render properly. It is suggested + that this function be used on as minimal of a subset of HTML/XML as possible + or used in a browser context vs an IDE viewer context.
+https://highlightjs.org/, + http://www.eslinstructor.net/vkbeautify/
++## Not run: ------------------------------------ +# library(xml2) +# +# # plain text +# txt <- paste0("<note><to>Tove</to><from>Jani</from><heading>Reminder</heading>", +# "<body>Don't forget me this weekend!</body></note>") +# xml_view(txt) +# +# # xml object +# doc <- read_xml(txt) +# xml_view(doc, style="obsidian") +# +# # different style +# xml_view(xml_find_all(doc, ".//to"), style="github-gist") +# +# # some more complex daata +# xml_view(read_xml(system.file("extdata/dwml.xml", package="htmltidy"))) +# xml_view(read_xml(system.file("extdata/getHistory.xml", package="htmltidy")), +# "androidstudio") +# xml_view(read_xml(system.file("extdata/input.xml", package="htmltidy")), +# "sunburst") +# +# # filter + apply an initial XPath query string +# xml_view(read_xml(system.file("extdata/dwml.xml", package="xmlview")), +# add_filter=TRUE, apply_xpath=".//temperature") +# +# doc <- read_xml("http://www.npr.org/rss/rss.php?id=1001") +# +# str(doc) +# +# xml_view(doc, add_filter=TRUE) +# xml2::xml_find_all(doc, './/dc:creator', ns=xml2::xml_ns(doc)) +# +# xml_text(xml2::xml_find_all(doc, './/link[contains(., "soccer")]', ns=xml2::xml_ns(doc))) +## ---------------------------------------------
Output and render functions for using xmltreeview within Shiny +applications and interactive Rmd documents.
+ + +xmltreeviewOutput(outputId, width = "100%", height = "400px") + +renderXmltreeview(expr, env = parent.frame(), quoted = FALSE)+ +
Widget output function for use in Shiny
+ + +xmlviewOutput(outputId, width = "100%", height = "400px")+ +