mirror of https://git.sr.ht/~hrbrmstr/hgr
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
46 lines
1.5 KiB
46 lines
1.5 KiB
#' Retrieve parsed content of a URL processed by the Postlight Mercury API
|
|
#'
|
|
#' [Mercury](https://mercury.postlight.com) takes any web article and returns only the
|
|
#' relevant content --- headline, author, body text, relevant images and more --- free
|
|
#' from any clutter.
|
|
#'
|
|
#' @md
|
|
#' @param url URL to retrieve
|
|
#' @param mercury_api_key your Mercury API key. The function looks for it in `MERCURY_API_KEY`
|
|
#' but you can specify it manually as well. Get your key [here](https://mercury.postlight.com).
|
|
#' @return `data.frame`
|
|
#' @export
|
|
#' @examples \dontrun{
|
|
#' URL <- "https://www.techworld.com/careers/what-is-r-programming-language-3664613/"
|
|
#' just_the_facts(URL)
|
|
#' }
|
|
just_the_facts <- function(url, mercury_api_key=Sys.getenv("MERCURY_API_KEY")) {
|
|
|
|
res <- httr::GET("https://mercury.postlight.com/parser",
|
|
httr::content_type_json(),
|
|
httr::user_agent(.hgr_ua),
|
|
httr::add_headers(`x-api-key`=mercury_api_key),
|
|
query = list(url = url))
|
|
|
|
httr::stop_for_status(res)
|
|
|
|
res <- httr::content(res, as="text", encoding="UTF-8")
|
|
res <- jsonlite::fromJSON(res, flatten=TRUE)
|
|
res <- purrr::flatten_df(res)
|
|
res <- suppressWarnings(readr::type_convert(res, col_types=.hgr_cols))
|
|
|
|
class(res) <- c("hgr")
|
|
|
|
res
|
|
|
|
}
|
|
|
|
#' @md
|
|
#' @rdname just_the_facts
|
|
#' @param x `hgr` object
|
|
#' @param ... unused
|
|
#' @export
|
|
print.hgr <- function(x, ...) {
|
|
tmp <- htmltools::HTML(x$content)
|
|
htmltools::html_print(tmp)
|
|
}
|