mirror of https://git.sr.ht/~hrbrmstr/hgr
boB Rudis
7 years ago
commit
63434ba7f3
16 changed files with 289 additions and 0 deletions
@ -0,0 +1,10 @@ |
|||
^.*\.Rproj$ |
|||
^\.Rproj\.user$ |
|||
^\.travis\.yml$ |
|||
^README\.*Rmd$ |
|||
^README\.*html$ |
|||
^NOTES\.*Rmd$ |
|||
^NOTES\.*html$ |
|||
^\.codecov\.yml$ |
|||
^README_files$ |
|||
^doc$ |
@ -0,0 +1 @@ |
|||
comment: false |
@ -0,0 +1,8 @@ |
|||
.DS_Store |
|||
.Rproj.user |
|||
.Rhistory |
|||
.RData |
|||
.Rproj |
|||
src/*.o |
|||
src/*.so |
|||
src/*.dll |
@ -0,0 +1,31 @@ |
|||
language: r |
|||
|
|||
warnings_are_errors: true |
|||
|
|||
sudo: required |
|||
|
|||
cache: packages |
|||
|
|||
r: |
|||
- oldrel |
|||
- release |
|||
- devel |
|||
|
|||
apt_packages: |
|||
- libv8-dev |
|||
- xclip |
|||
|
|||
env: |
|||
global: |
|||
- CRAN: http://cran.rstudio.com |
|||
|
|||
after_success: |
|||
- Rscript -e 'covr::codecov()' |
|||
|
|||
notifications: |
|||
email: |
|||
- bob@rud.is |
|||
irc: |
|||
channels: |
|||
- "104.236.112.222#builds" |
|||
nick: travisci |
@ -0,0 +1,20 @@ |
|||
Package: hgr |
|||
Type: Package |
|||
Title: Tools to Work with the 'Postlight' 'Mercury' 'API' |
|||
Version: 0.1.0 |
|||
Date: 2017-04-19 |
|||
Author: Bob Rudis (bob@rud.is) |
|||
Maintainer: Bob Rudis <bob@rud.is> |
|||
Description: PTools to Work with the 'Postlight' 'Mercury' 'API'. |
|||
URL: https://github.com/hrbrmstr/hgr |
|||
BugReports: https://github.com/hrbrmstr/hgr/issues |
|||
License: AGPL |
|||
Suggests: |
|||
testthat, |
|||
covr |
|||
Depends: |
|||
R (>= 3.2.0) |
|||
Imports: |
|||
purrr, |
|||
httr |
|||
RoxygenNote: 6.0.1 |
@ -0,0 +1,5 @@ |
|||
# Generated by roxygen2: do not edit by hand |
|||
|
|||
export(just_the_facts) |
|||
import(httr) |
|||
import(purrr) |
@ -0,0 +1,2 @@ |
|||
0.1.0 |
|||
* Initial release |
@ -0,0 +1,10 @@ |
|||
#' Tools to Work with the 'Postlight' 'Mercury' 'API' |
|||
#' |
|||
#' Mercury takes any web article and returns only the relevant content — headline, author, |
|||
#' body text, relevant images and more — free from any clutter. |
|||
#' |
|||
#' @name hgr |
|||
#' @docType package |
|||
#' @author Bob Rudis (bob@@rud.is) |
|||
#' @import purrr httr |
|||
NULL |
@ -0,0 +1,25 @@ |
|||
#' Retrieve parsed content of a URL processed by the Postlight Mercury API |
|||
#' |
|||
#' Mercury takes any web article and returns only the relevant content — headline, author, |
|||
#' body text, relevant images and more — free from any clutter. |
|||
#' |
|||
#' @md |
|||
#' @param url URL to retrieve |
|||
#' @param mercury_api_key your Mercury API key. The function looks for it in `MERCURY_API_KEY` |
|||
#' but you can specify it manually as well. Get your key [here](https://mercury.postlight.com). |
|||
#' @return `data.frame` |
|||
#' @export |
|||
just_the_facts <- function(url, mercury_api_key=Sys.getenv("MERCURY_API_KEY")) { |
|||
|
|||
res <- httr::GET("https://mercury.postlight.com/parser", |
|||
httr::add_headers(`x-api-key`=mercury_api_key), |
|||
query = list(url = url)) |
|||
|
|||
httr::stop_for_status(res) |
|||
|
|||
res <- httr::content(res, as="text", encoding="UTF-8") |
|||
res <- jsonlite::fromJSON(res, flatten=TRUE) |
|||
|
|||
purrr::flatten_df(res) |
|||
|
|||
} |
@ -0,0 +1,47 @@ |
|||
--- |
|||
output: rmarkdown::github_document |
|||
--- |
|||
|
|||
`hgr` : Tools to Work with the 'Postlight' 'Mercury' 'API' |
|||
|
|||
Mercury takes any web article and returns only the relevant content — headline, author, body text, relevant images and more — free from any clutter. |
|||
|
|||
The following functions are implemented: |
|||
|
|||
- `just_the_facts`: Retrieve parsed content of a URL processed by the Postlight Mercury API |
|||
|
|||
### Installation |
|||
|
|||
```{r eval=FALSE} |
|||
devtools::install_github("hrbrmstr/hgr") |
|||
``` |
|||
|
|||
```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} |
|||
options(width=120) |
|||
``` |
|||
|
|||
### Usage |
|||
|
|||
```{r message=FALSE, warning=FALSE, error=FALSE} |
|||
library(hgr) |
|||
|
|||
# current verison |
|||
packageVersion("hgr") |
|||
|
|||
story <- "https://www.nytimes.com/2017/04/18/world/asia/aircraft-carrier-north-korea-carl-vinson.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=first-column-region®ion=top-news&WT.nav=top-news&_r=0" |
|||
|
|||
dplyr::glimpse(just_the_facts(story)) |
|||
|
|||
``` |
|||
|
|||
### Test Results |
|||
|
|||
```{r message=FALSE, warning=FALSE, error=FALSE} |
|||
library(hgr) |
|||
library(testthat) |
|||
|
|||
date() |
|||
|
|||
test_dir("tests/") |
|||
``` |
|||
|
@ -0,0 +1,66 @@ |
|||
|
|||
`hgr` : Tools to Work with the 'Postlight' 'Mercury' 'API' |
|||
|
|||
Mercury takes any web article and returns only the relevant content — headline, author, body text, relevant images and more — free from any clutter. |
|||
|
|||
The following functions are implemented: |
|||
|
|||
- `just_the_facts`: Retrieve parsed content of a URL processed by the Postlight Mercury API |
|||
|
|||
### Installation |
|||
|
|||
``` r |
|||
devtools::install_github("hrbrmstr/hgr") |
|||
``` |
|||
|
|||
### Usage |
|||
|
|||
``` r |
|||
library(hgr) |
|||
|
|||
# current verison |
|||
packageVersion("hgr") |
|||
``` |
|||
|
|||
## [1] '0.1.0' |
|||
|
|||
``` r |
|||
story <- "https://www.nytimes.com/2017/04/18/world/asia/aircraft-carrier-north-korea-carl-vinson.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=first-column-region®ion=top-news&WT.nav=top-news&_r=0" |
|||
|
|||
dplyr::glimpse(just_the_facts(story)) |
|||
``` |
|||
|
|||
## Observations: 1 |
|||
## Variables: 12 |
|||
## $ title <chr> "Aircraft Carrier Wasn’t Sailing to Deter North Korea, as U.S. Suggested" |
|||
## $ content <chr> "<div><article id=\"story\" class=\"story theme-main \">\n\n \n\n ... |
|||
## $ author <chr> "Mark Landler and Eric Schmitt" |
|||
## $ date_published <chr> "2017-04-18T17:57:41.000Z" |
|||
## $ lead_image_url <chr> "https://static01.nyt.com/images/2017/04/19/world/19carrier-sub/19carrier-sub-facebookJumbo.... |
|||
## $ url <chr> "https://www.nytimes.com/2017/04/18/world/asia/aircraft-carrier-north-korea-carl-vinson.html" |
|||
## $ domain <chr> "www.nytimes.com" |
|||
## $ excerpt <chr> "The saga might never have come to light had the Navy not posted a photograph of the Carl Vi... |
|||
## $ word_count <int> 1505 |
|||
## $ direction <chr> "ltr" |
|||
## $ total_pages <int> 1 |
|||
## $ rendered_pages <int> 1 |
|||
|
|||
### Test Results |
|||
|
|||
``` r |
|||
library(hgr) |
|||
library(testthat) |
|||
|
|||
date() |
|||
``` |
|||
|
|||
## [1] "Wed Apr 19 10:16:35 2017" |
|||
|
|||
``` r |
|||
test_dir("tests/") |
|||
``` |
|||
|
|||
## testthat results ======================================================================================================== |
|||
## OK: 0 SKIPPED: 0 FAILED: 0 |
|||
## |
|||
## DONE =================================================================================================================== |
@ -0,0 +1,21 @@ |
|||
Version: 1.0 |
|||
|
|||
RestoreWorkspace: Default |
|||
SaveWorkspace: Default |
|||
AlwaysSaveHistory: Default |
|||
|
|||
EnableCodeIndexing: Yes |
|||
UseSpacesForTab: Yes |
|||
NumSpacesForTab: 2 |
|||
Encoding: UTF-8 |
|||
|
|||
RnwWeave: Sweave |
|||
LaTeX: pdfLaTeX |
|||
|
|||
StripTrailingWhitespace: Yes |
|||
|
|||
BuildType: Package |
|||
PackageUseDevtools: Yes |
|||
PackageInstallArgs: --no-multiarch --with-keep.source |
|||
PackageBuildArgs: --resave-data |
|||
PackageRoxygenize: rd,collate,namespace |
@ -0,0 +1,14 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/hgr-package.R |
|||
\docType{package} |
|||
\name{hgr} |
|||
\alias{hgr} |
|||
\alias{hgr-package} |
|||
\title{Tools to Work with the 'Postlight' 'Mercury' 'API'} |
|||
\description{ |
|||
Mercury takes any web article and returns only the relevant content — headline, author, |
|||
body text, relevant images and more — free from any clutter. |
|||
} |
|||
\author{ |
|||
Bob Rudis (bob@rud.is) |
|||
} |
@ -0,0 +1,21 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/mercury.r |
|||
\name{just_the_facts} |
|||
\alias{just_the_facts} |
|||
\title{Retrieve parsed content of a URL processed by the Postlight Mercury API} |
|||
\usage{ |
|||
just_the_facts(url, mercury_api_key = Sys.getenv("MERCURY_API_KEY")) |
|||
} |
|||
\arguments{ |
|||
\item{url}{URL to retrieve} |
|||
|
|||
\item{mercury_api_key}{your Mercury API key. The function looks for it in \code{MERCURY_API_KEY} |
|||
but you can specify it manually as well. Get your key \href{https://mercury.postlight.com}{here}.} |
|||
} |
|||
\value{ |
|||
\code{data.frame} |
|||
} |
|||
\description{ |
|||
Mercury takes any web article and returns only the relevant content — headline, author, |
|||
body text, relevant images and more — free from any clutter. |
|||
} |
@ -0,0 +1,2 @@ |
|||
library(testthat) |
|||
test_check("hgr") |
@ -0,0 +1,6 @@ |
|||
context("basic functionality") |
|||
test_that("we can do something", { |
|||
|
|||
#expect_that(some_function(), is_a("data.frame")) |
|||
|
|||
}) |
Loading…
Reference in new issue