Browse Source

initial commit

master
boB Rudis 7 years ago
commit
63434ba7f3
No known key found for this signature in database GPG Key ID: 2A514A4997464560
  1. 10
      .Rbuildignore
  2. 1
      .codecov.yml
  3. 8
      .gitignore
  4. 31
      .travis.yml
  5. 20
      DESCRIPTION
  6. 5
      NAMESPACE
  7. 2
      NEWS.md
  8. 10
      R/hgr-package.R
  9. 25
      R/mercury.r
  10. 47
      README.Rmd
  11. 66
      README.md
  12. 21
      hgr.Rproj
  13. 14
      man/hgr.Rd
  14. 21
      man/just_the_facts.Rd
  15. 2
      tests/test-all.R
  16. 6
      tests/testthat/test-hgr.R

10
.Rbuildignore

@ -0,0 +1,10 @@
^.*\.Rproj$
^\.Rproj\.user$
^\.travis\.yml$
^README\.*Rmd$
^README\.*html$
^NOTES\.*Rmd$
^NOTES\.*html$
^\.codecov\.yml$
^README_files$
^doc$

1
.codecov.yml

@ -0,0 +1 @@
comment: false

8
.gitignore

@ -0,0 +1,8 @@
.DS_Store
.Rproj.user
.Rhistory
.RData
.Rproj
src/*.o
src/*.so
src/*.dll

31
.travis.yml

@ -0,0 +1,31 @@
language: r
warnings_are_errors: true
sudo: required
cache: packages
r:
- oldrel
- release
- devel
apt_packages:
- libv8-dev
- xclip
env:
global:
- CRAN: http://cran.rstudio.com
after_success:
- Rscript -e 'covr::codecov()'
notifications:
email:
- bob@rud.is
irc:
channels:
- "104.236.112.222#builds"
nick: travisci

20
DESCRIPTION

@ -0,0 +1,20 @@
Package: hgr
Type: Package
Title: Tools to Work with the 'Postlight' 'Mercury' 'API'
Version: 0.1.0
Date: 2017-04-19
Author: Bob Rudis (bob@rud.is)
Maintainer: Bob Rudis <bob@rud.is>
Description: PTools to Work with the 'Postlight' 'Mercury' 'API'.
URL: https://github.com/hrbrmstr/hgr
BugReports: https://github.com/hrbrmstr/hgr/issues
License: AGPL
Suggests:
testthat,
covr
Depends:
R (>= 3.2.0)
Imports:
purrr,
httr
RoxygenNote: 6.0.1

5
NAMESPACE

@ -0,0 +1,5 @@
# Generated by roxygen2: do not edit by hand
export(just_the_facts)
import(httr)
import(purrr)

2
NEWS.md

@ -0,0 +1,2 @@
0.1.0
* Initial release

10
R/hgr-package.R

@ -0,0 +1,10 @@
#' Tools to Work with the 'Postlight' 'Mercury' 'API'
#'
#' Mercury takes any web article and returns only the relevant content — headline, author,
#' body text, relevant images and more — free from any clutter.
#'
#' @name hgr
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import purrr httr
NULL

25
R/mercury.r

@ -0,0 +1,25 @@
#' Retrieve parsed content of a URL processed by the Postlight Mercury API
#'
#' Mercury takes any web article and returns only the relevant content — headline, author,
#' body text, relevant images and more — free from any clutter.
#'
#' @md
#' @param url URL to retrieve
#' @param mercury_api_key your Mercury API key. The function looks for it in `MERCURY_API_KEY`
#' but you can specify it manually as well. Get your key [here](https://mercury.postlight.com).
#' @return `data.frame`
#' @export
just_the_facts <- function(url, mercury_api_key=Sys.getenv("MERCURY_API_KEY")) {
res <- httr::GET("https://mercury.postlight.com/parser",
httr::add_headers(`x-api-key`=mercury_api_key),
query = list(url = url))
httr::stop_for_status(res)
res <- httr::content(res, as="text", encoding="UTF-8")
res <- jsonlite::fromJSON(res, flatten=TRUE)
purrr::flatten_df(res)
}

47
README.Rmd

@ -0,0 +1,47 @@
---
output: rmarkdown::github_document
---
`hgr` : Tools to Work with the 'Postlight' 'Mercury' 'API'
Mercury takes any web article and returns only the relevant content — headline, author, body text, relevant images and more — free from any clutter.
The following functions are implemented:
- `just_the_facts`: Retrieve parsed content of a URL processed by the Postlight Mercury API
### Installation
```{r eval=FALSE}
devtools::install_github("hrbrmstr/hgr")
```
```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE}
options(width=120)
```
### Usage
```{r message=FALSE, warning=FALSE, error=FALSE}
library(hgr)
# current verison
packageVersion("hgr")
story <- "https://www.nytimes.com/2017/04/18/world/asia/aircraft-carrier-north-korea-carl-vinson.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=first-column-region&region=top-news&WT.nav=top-news&_r=0"
dplyr::glimpse(just_the_facts(story))
```
### Test Results
```{r message=FALSE, warning=FALSE, error=FALSE}
library(hgr)
library(testthat)
date()
test_dir("tests/")
```

66
README.md

@ -0,0 +1,66 @@
`hgr` : Tools to Work with the 'Postlight' 'Mercury' 'API'
Mercury takes any web article and returns only the relevant content — headline, author, body text, relevant images and more — free from any clutter.
The following functions are implemented:
- `just_the_facts`: Retrieve parsed content of a URL processed by the Postlight Mercury API
### Installation
``` r
devtools::install_github("hrbrmstr/hgr")
```
### Usage
``` r
library(hgr)
# current verison
packageVersion("hgr")
```
## [1] '0.1.0'
``` r
story <- "https://www.nytimes.com/2017/04/18/world/asia/aircraft-carrier-north-korea-carl-vinson.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=first-column-region&region=top-news&WT.nav=top-news&_r=0"
dplyr::glimpse(just_the_facts(story))
```
## Observations: 1
## Variables: 12
## $ title <chr> "Aircraft Carrier Wasn’t Sailing to Deter North Korea, as U.S. Suggested"
## $ content <chr> "<div><article id=\"story\" class=\"story theme-main \">\n\n \n\n ...
## $ author <chr> "Mark Landler and Eric Schmitt"
## $ date_published <chr> "2017-04-18T17:57:41.000Z"
## $ lead_image_url <chr> "https://static01.nyt.com/images/2017/04/19/world/19carrier-sub/19carrier-sub-facebookJumbo....
## $ url <chr> "https://www.nytimes.com/2017/04/18/world/asia/aircraft-carrier-north-korea-carl-vinson.html"
## $ domain <chr> "www.nytimes.com"
## $ excerpt <chr> "The saga might never have come to light had the Navy not posted a photograph of the Carl Vi...
## $ word_count <int> 1505
## $ direction <chr> "ltr"
## $ total_pages <int> 1
## $ rendered_pages <int> 1
### Test Results
``` r
library(hgr)
library(testthat)
date()
```
## [1] "Wed Apr 19 10:16:35 2017"
``` r
test_dir("tests/")
```
## testthat results ========================================================================================================
## OK: 0 SKIPPED: 0 FAILED: 0
##
## DONE ===================================================================================================================

21
hgr.Rproj

@ -0,0 +1,21 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageBuildArgs: --resave-data
PackageRoxygenize: rd,collate,namespace

14
man/hgr.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hgr-package.R
\docType{package}
\name{hgr}
\alias{hgr}
\alias{hgr-package}
\title{Tools to Work with the 'Postlight' 'Mercury' 'API'}
\description{
Mercury takes any web article and returns only the relevant content — headline, author,
body text, relevant images and more — free from any clutter.
}
\author{
Bob Rudis (bob@rud.is)
}

21
man/just_the_facts.Rd

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mercury.r
\name{just_the_facts}
\alias{just_the_facts}
\title{Retrieve parsed content of a URL processed by the Postlight Mercury API}
\usage{
just_the_facts(url, mercury_api_key = Sys.getenv("MERCURY_API_KEY"))
}
\arguments{
\item{url}{URL to retrieve}
\item{mercury_api_key}{your Mercury API key. The function looks for it in \code{MERCURY_API_KEY}
but you can specify it manually as well. Get your key \href{https://mercury.postlight.com}{here}.}
}
\value{
\code{data.frame}
}
\description{
Mercury takes any web article and returns only the relevant content — headline, author,
body text, relevant images and more — free from any clutter.
}

2
tests/test-all.R

@ -0,0 +1,2 @@
library(testthat)
test_check("hgr")

6
tests/testthat/test-hgr.R

@ -0,0 +1,6 @@
context("basic functionality")
test_that("we can do something", {
#expect_that(some_function(), is_a("data.frame"))
})
Loading…
Cancel
Save