Tools to Work with the 'Splash' JavaScript Rendering Service in R
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

110 lines
5.2KB

  1. #' Return a json-encoded dictionary with information about javascript-rendered webpage.
  2. #'
  3. #' It can include HTML, PNG and other information, based on arguments passed.
  4. #'
  5. #' @md
  6. #' @rdname render_json
  7. #' @param html Whether to include HTML in output.
  8. #' @param png Whether to include PNG in output.
  9. #' @param jpeg Whether to include JPEG in output.
  10. #' @param iframes Whether to include information about child frames in output.
  11. #' @param script Whether to include the result of the custom executed javascript final
  12. #' statement in output
  13. #' @param console Whether to include the executed javascript console messages in output.
  14. #' @param history Whether to include the history of requests/responses for webpage main frame.
  15. #' Use it to get HTTP status codes and headers. Only information about "main"
  16. #' requests/responses is returned (i.e. information about related resources
  17. #' like images and AJAX queries is not returned). To get information about all
  18. #' requests and responses use `har` parameter.
  19. #' @param har Whether to include HAR in output. If `TRUE` the result will contain the same
  20. #' data as [render_har()] provides under `har` list entry. By default, response
  21. #' content is not included. To enable it use `response_body` parameter.
  22. #' @param response_body Used with `har` parameter.
  23. #' @return a huge `list`
  24. #' @inheritParams render_jpeg
  25. #' @note All "whether to include..." parameters are default `TRUE` except for `png` and
  26. #' `jpeg` and a custom `print` method is defined to stop your console from being
  27. #' overwhelmed with data. Use [str()] to inspect various portions of the result.
  28. #' @references [Splash docs](http://splash.readthedocs.io/en/stable/index.html)
  29. #' @export
  30. render_json <- function(splash_obj = splash_local, url, base_url=NULL, quality=75, width=1024, height=768,
  31. timeout=30, resource_timeout, wait=0, render_all=FALSE,
  32. proxy, js, js_src, filters, allowed_domains, allowed_content_types,
  33. forbidden_content_types, viewport="1024x768", images, headers, body,
  34. http_method, save_args, load_args, html=TRUE, png=FALSE, jpeg=FALSE,
  35. iframes=TRUE, script=TRUE, console=TRUE, history=TRUE, har=TRUE,
  36. response_body=FALSE) {
  37. params <- list(url=url, timeout=timeout, wait=wait, viewport=viewport,
  38. quality=quality, width=width, height=height, render_all=as.numeric(render_all),
  39. html=as.numeric(html), png=as.numeric(png), jpeg=as.numeric(jpeg),
  40. iframes=as.numeric(iframes), script=as.numeric(script),
  41. console=as.numeric(console), history=as.numeric(history), har=as.numeric(har),
  42. response_body=as.numeric(response_body))
  43. if (!missing(base_url)) params$base_url <- base_url
  44. if (!missing(resource_timeout)) params$resource_timeout <- resource_timeout
  45. if (!missing(proxy)) proxy$base_url <- proxy
  46. if (!missing(js)) params$js <- js
  47. if (!missing(js_src)) params$js_src <- js_src
  48. if (!missing(filters)) params$filters <- filters
  49. if (!missing(allowed_domains)) params$allowed_domains <- allowed_domains
  50. if (!missing(allowed_content_types)) params$allowed_content_types <- allowed_content_types
  51. if (!missing(forbidden_content_types)) params$forbidden_content_types <- forbidden_content_types
  52. if (!missing(images)) params$images <- images
  53. if (!missing(headers)) params$headers <- headers
  54. if (!missing(body)) params$body <- body
  55. if (!missing(http_method)) params$http_method <- http_method
  56. if (!missing(save_args)) params$save_args <- save_args
  57. if (!missing(load_args)) params$load_args <- load_args
  58. res <- httr::GET(splash_url(splash_obj), path="render.json", encode="json", query=params)
  59. httr::stop_for_status(res)
  60. out <- httr::content(res, as="text", encoding="UTF-8")
  61. out <- jsonlite::fromJSON(out, flatten=FALSE, simplifyVector=FALSE)
  62. class(out) <- c("splash_json", class(out))
  63. if ("har" %in% names(out)) {
  64. sphar <- list(
  65. log=list(
  66. version=out$har$log$version,
  67. creator=out$har$log$creator,
  68. browser=out$har$log$browser,
  69. pages=out$har$log$pages,
  70. entries=out$har$log$entries
  71. )
  72. )
  73. class(sphar$log$creator) <- c("harcreator", "list")
  74. class(sphar$log$version) <- c("harversion", "character")
  75. class(sphar$log$browser) <- c("harbrowser", "list")
  76. class(sphar$log$pages) <- c("harpages", "list")
  77. class(sphar$log$entries) <- c("harentries", "list")
  78. class(sphar$log) <- c("harlog", "list")
  79. class(sphar) <- c("har", "list")
  80. for (i in 1:length(sphar$log$pages)) class(sphar$log$pages[[i]]) <- c("harpage", "list")
  81. for (i in 1:length(sphar$log$entries)) {
  82. class(sphar$log$entries[[i]]) <- c("harentry", "list")
  83. if (length(sphar$log$entries[[i]]$request) > 0) class(sphar$log$entries[[i]]$request) <- c("harrequest", "list")
  84. if (length(sphar$log$entries[[i]]$response) > 0) class(sphar$log$entries[[i]]$response) <- c("harresponse", "list")
  85. }
  86. out$har <- sphar
  87. }
  88. out
  89. }
  90. #' @export
  91. print.splash_json <- function(x, ...) {
  92. cat("<splashr render_json() object>")
  93. invisible(x)
  94. }