|
|
|
% Generated by roxygen2: do not edit by hand
|
|
|
|
% Please edit documentation in R/render-html.r
|
|
|
|
\name{render_html}
|
|
|
|
\alias{render_html}
|
|
|
|
\title{Return the HTML of the javascript-rendered page.}
|
|
|
|
\usage{
|
|
|
|
render_html(splash_obj = splash_local, url, base_url, timeout = 30,
|
|
|
|
resource_timeout, wait = 0, proxy, js, js_src, filters, allowed_domains,
|
|
|
|
allowed_content_types, forbidden_content_types, viewport = "1024x768",
|
|
|
|
images, headers, body, http_method, save_args, load_args, raw_html = FALSE)
|
|
|
|
}
|
|
|
|
\arguments{
|
|
|
|
\item{splash_obj}{Object created by a call to \code{\link[=splash]{splash()}}}
|
|
|
|
|
|
|
|
\item{url}{The URL to render (required)}
|
|
|
|
|
|
|
|
\item{base_url}{The base url to render the page with.}
|
|
|
|
|
|
|
|
\item{timeout}{A timeout (in seconds) for the render (defaults to 30). Without
|
|
|
|
reconfiguring the startup parameters of the Splash server (not this package)
|
|
|
|
the maximum allowed value for the timeout is 60 seconds.}
|
|
|
|
|
|
|
|
\item{resource_timeout}{A timeout (in seconds) for individual network requests.}
|
|
|
|
|
|
|
|
\item{wait}{Time (in seconds) to wait for updates after page is loaded (defaults to 0).}
|
|
|
|
|
|
|
|
\item{proxy}{Proxy profile name or proxy URL.}
|
|
|
|
|
|
|
|
\item{js}{Javascript profile name.}
|
|
|
|
|
|
|
|
\item{js_src}{JavaScript code to be executed in page context.}
|
|
|
|
|
|
|
|
\item{filters}{Comma-separated list of request filter names.}
|
|
|
|
|
|
|
|
\item{allowed_domains}{Comma-separated list of allowed domain names. If present, Splash
|
|
|
|
won’t load anything neither from domains not in this list nor from subdomains of
|
|
|
|
domains not in this list.}
|
|
|
|
|
|
|
|
\item{allowed_content_types}{Comma-separated list of allowed content types. If present,
|
|
|
|
Splash will abort any request if the response’s content type doesn’t match any of
|
|
|
|
the content types in this list. Wildcards are supported.}
|
|
|
|
|
|
|
|
\item{forbidden_content_types}{Comma-separated list of forbidden content types. If
|
|
|
|
present, Splash will abort any request if the response’s content type matches
|
|
|
|
any of the content types in this list. Wildcards are supported.}
|
|
|
|
|
|
|
|
\item{viewport}{View width and height (in pixels) of the browser viewport to render the
|
|
|
|
web page. Format is “<width>x<height>”, e.g. 800x600. Default value is "full".}
|
|
|
|
|
|
|
|
\item{images}{Whether to download images.}
|
|
|
|
|
|
|
|
\item{headers}{HTTP headers to set for the first outgoing request.}
|
|
|
|
|
|
|
|
\item{body}{Body of HTTP POST request to be sent if method is POST.}
|
|
|
|
|
|
|
|
\item{http_method}{HTTP method of outgoing Splash request.}
|
|
|
|
|
|
|
|
\item{save_args}{A list of argument names to put in cache.}
|
|
|
|
|
|
|
|
\item{load_args}{Parameter values to load from cache}
|
|
|
|
|
|
|
|
\item{raw_html}{if \code{TRUE} then return a character vector vs an XML document. Only valid for \code{render_html}}
|
|
|
|
}
|
|
|
|
\value{
|
|
|
|
An XML document. Note that this is processed by \code{\link[xml2:read_html]{xml2::read_html()}} so it will not be
|
|
|
|
the pristine, raw, rendered HTML from the site. Use \code{raw_html=TRUE} if you do not want it
|
|
|
|
to be processed first by \code{xml2}. If you choose \code{raw_html=TRUE} you'll get back a
|
|
|
|
character vector.
|
|
|
|
}
|
|
|
|
\description{
|
|
|
|
Similar (i.e. a dynamic equivalent) to \code{rvest::read_html}.
|
|
|
|
}
|
|
|
|
\references{
|
|
|
|
\href{http://splash.readthedocs.io/en/stable/index.html}{Splash docs}
|
|
|
|
}
|
|
|
|
\seealso{
|
|
|
|
Other splash_renderers: \code{\link{execute_lua}},
|
|
|
|
\code{\link{render_har}}, \code{\link{render_jpeg}},
|
|
|
|
\code{\link{render_json}}, \code{\link{render_png}}
|
|
|
|
}
|
|
|
|
\concept{splash_renderers}
|