From 63434ba7f3e9733e26dcb6ad894c23af89fb4b6f Mon Sep 17 00:00:00 2001 From: Bob Rudis Date: Wed, 19 Apr 2017 10:17:05 -0400 Subject: [PATCH] initial commit --- .Rbuildignore | 10 +++++++ .codecov.yml | 1 + .gitignore | 8 ++++++ .travis.yml | 31 ++++++++++++++++++++++ DESCRIPTION | 20 ++++++++++++++ NAMESPACE | 5 ++++ NEWS.md | 2 ++ R/hgr-package.R | 10 +++++++ R/mercury.r | 25 ++++++++++++++++++ README.Rmd | 47 +++++++++++++++++++++++++++++++++ README.md | 66 +++++++++++++++++++++++++++++++++++++++++++++++ hgr.Rproj | 21 +++++++++++++++ man/hgr.Rd | 14 ++++++++++ man/just_the_facts.Rd | 21 +++++++++++++++ tests/test-all.R | 2 ++ tests/testthat/test-hgr.R | 6 +++++ 16 files changed, 289 insertions(+) create mode 100644 .Rbuildignore create mode 100644 .codecov.yml create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 DESCRIPTION create mode 100644 NAMESPACE create mode 100644 NEWS.md create mode 100644 R/hgr-package.R create mode 100644 R/mercury.r create mode 100644 README.Rmd create mode 100644 README.md create mode 100644 hgr.Rproj create mode 100644 man/hgr.Rd create mode 100644 man/just_the_facts.Rd create mode 100644 tests/test-all.R create mode 100644 tests/testthat/test-hgr.R diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..1c60b19 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,10 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^\.travis\.yml$ +^README\.*Rmd$ +^README\.*html$ +^NOTES\.*Rmd$ +^NOTES\.*html$ +^\.codecov\.yml$ +^README_files$ +^doc$ diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..69cb760 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1 @@ +comment: false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cce1f17 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +.Rproj.user +.Rhistory +.RData +.Rproj +src/*.o +src/*.so +src/*.dll diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..76d9586 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,31 @@ +language: r + +warnings_are_errors: true + +sudo: required + +cache: packages + +r: + - oldrel + - release + - devel + +apt_packages: + - libv8-dev + - xclip + +env: + global: + - CRAN: http://cran.rstudio.com + +after_success: + - Rscript -e 'covr::codecov()' + +notifications: + email: + - bob@rud.is + irc: + channels: + - "104.236.112.222#builds" + nick: travisci diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..23da876 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,20 @@ +Package: hgr +Type: Package +Title: Tools to Work with the 'Postlight' 'Mercury' 'API' +Version: 0.1.0 +Date: 2017-04-19 +Author: Bob Rudis (bob@rud.is) +Maintainer: Bob Rudis +Description: PTools to Work with the 'Postlight' 'Mercury' 'API'. +URL: https://github.com/hrbrmstr/hgr +BugReports: https://github.com/hrbrmstr/hgr/issues +License: AGPL +Suggests: + testthat, + covr +Depends: + R (>= 3.2.0) +Imports: + purrr, + httr +RoxygenNote: 6.0.1 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..f4424b8 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,5 @@ +# Generated by roxygen2: do not edit by hand + +export(just_the_facts) +import(httr) +import(purrr) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..9b4679b --- /dev/null +++ b/NEWS.md @@ -0,0 +1,2 @@ +0.1.0 +* Initial release diff --git a/R/hgr-package.R b/R/hgr-package.R new file mode 100644 index 0000000..97b2834 --- /dev/null +++ b/R/hgr-package.R @@ -0,0 +1,10 @@ +#' Tools to Work with the 'Postlight' 'Mercury' 'API' +#' +#' Mercury takes any web article and returns only the relevant content — headline, author, +#' body text, relevant images and more — free from any clutter. +#' +#' @name hgr +#' @docType package +#' @author Bob Rudis (bob@@rud.is) +#' @import purrr httr +NULL diff --git a/R/mercury.r b/R/mercury.r new file mode 100644 index 0000000..045e1fb --- /dev/null +++ b/R/mercury.r @@ -0,0 +1,25 @@ +#' Retrieve parsed content of a URL processed by the Postlight Mercury API +#' +#' Mercury takes any web article and returns only the relevant content — headline, author, +#' body text, relevant images and more — free from any clutter. +#' +#' @md +#' @param url URL to retrieve +#' @param mercury_api_key your Mercury API key. The function looks for it in `MERCURY_API_KEY` +#' but you can specify it manually as well. Get your key [here](https://mercury.postlight.com). +#' @return `data.frame` +#' @export +just_the_facts <- function(url, mercury_api_key=Sys.getenv("MERCURY_API_KEY")) { + + res <- httr::GET("https://mercury.postlight.com/parser", + httr::add_headers(`x-api-key`=mercury_api_key), + query = list(url = url)) + + httr::stop_for_status(res) + + res <- httr::content(res, as="text", encoding="UTF-8") + res <- jsonlite::fromJSON(res, flatten=TRUE) + + purrr::flatten_df(res) + +} \ No newline at end of file diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..8817c10 --- /dev/null +++ b/README.Rmd @@ -0,0 +1,47 @@ +--- +output: rmarkdown::github_document +--- + +`hgr` : Tools to Work with the 'Postlight' 'Mercury' 'API' + +Mercury takes any web article and returns only the relevant content — headline, author, body text, relevant images and more — free from any clutter. + +The following functions are implemented: + +- `just_the_facts`: Retrieve parsed content of a URL processed by the Postlight Mercury API + +### Installation + +```{r eval=FALSE} +devtools::install_github("hrbrmstr/hgr") +``` + +```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} +options(width=120) +``` + +### Usage + +```{r message=FALSE, warning=FALSE, error=FALSE} +library(hgr) + +# current verison +packageVersion("hgr") + +story <- "https://www.nytimes.com/2017/04/18/world/asia/aircraft-carrier-north-korea-carl-vinson.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=first-column-region®ion=top-news&WT.nav=top-news&_r=0" + +dplyr::glimpse(just_the_facts(story)) + +``` + +### Test Results + +```{r message=FALSE, warning=FALSE, error=FALSE} +library(hgr) +library(testthat) + +date() + +test_dir("tests/") +``` + diff --git a/README.md b/README.md new file mode 100644 index 0000000..ab8cdd2 --- /dev/null +++ b/README.md @@ -0,0 +1,66 @@ + +`hgr` : Tools to Work with the 'Postlight' 'Mercury' 'API' + +Mercury takes any web article and returns only the relevant content — headline, author, body text, relevant images and more — free from any clutter. + +The following functions are implemented: + +- `just_the_facts`: Retrieve parsed content of a URL processed by the Postlight Mercury API + +### Installation + +``` r +devtools::install_github("hrbrmstr/hgr") +``` + +### Usage + +``` r +library(hgr) + +# current verison +packageVersion("hgr") +``` + + ## [1] '0.1.0' + +``` r +story <- "https://www.nytimes.com/2017/04/18/world/asia/aircraft-carrier-north-korea-carl-vinson.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=first-column-region®ion=top-news&WT.nav=top-news&_r=0" + +dplyr::glimpse(just_the_facts(story)) +``` + + ## Observations: 1 + ## Variables: 12 + ## $ title "Aircraft Carrier Wasn’t Sailing to Deter North Korea, as U.S. Suggested" + ## $ content "
\n\n \n\n ... + ## $ author "Mark Landler and Eric Schmitt" + ## $ date_published "2017-04-18T17:57:41.000Z" + ## $ lead_image_url "https://static01.nyt.com/images/2017/04/19/world/19carrier-sub/19carrier-sub-facebookJumbo.... + ## $ url "https://www.nytimes.com/2017/04/18/world/asia/aircraft-carrier-north-korea-carl-vinson.html" + ## $ domain "www.nytimes.com" + ## $ excerpt "The saga might never have come to light had the Navy not posted a photograph of the Carl Vi... + ## $ word_count 1505 + ## $ direction "ltr" + ## $ total_pages 1 + ## $ rendered_pages 1 + +### Test Results + +``` r +library(hgr) +library(testthat) + +date() +``` + + ## [1] "Wed Apr 19 10:16:35 2017" + +``` r +test_dir("tests/") +``` + + ## testthat results ======================================================================================================== + ## OK: 0 SKIPPED: 0 FAILED: 0 + ## + ## DONE =================================================================================================================== diff --git a/hgr.Rproj b/hgr.Rproj new file mode 100644 index 0000000..446d9e1 --- /dev/null +++ b/hgr.Rproj @@ -0,0 +1,21 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageBuildArgs: --resave-data +PackageRoxygenize: rd,collate,namespace diff --git a/man/hgr.Rd b/man/hgr.Rd new file mode 100644 index 0000000..f9eed04 --- /dev/null +++ b/man/hgr.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hgr-package.R +\docType{package} +\name{hgr} +\alias{hgr} +\alias{hgr-package} +\title{Tools to Work with the 'Postlight' 'Mercury' 'API'} +\description{ +Mercury takes any web article and returns only the relevant content — headline, author, +body text, relevant images and more — free from any clutter. +} +\author{ +Bob Rudis (bob@rud.is) +} diff --git a/man/just_the_facts.Rd b/man/just_the_facts.Rd new file mode 100644 index 0000000..382199d --- /dev/null +++ b/man/just_the_facts.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mercury.r +\name{just_the_facts} +\alias{just_the_facts} +\title{Retrieve parsed content of a URL processed by the Postlight Mercury API} +\usage{ +just_the_facts(url, mercury_api_key = Sys.getenv("MERCURY_API_KEY")) +} +\arguments{ +\item{url}{URL to retrieve} + +\item{mercury_api_key}{your Mercury API key. The function looks for it in \code{MERCURY_API_KEY} +but you can specify it manually as well. Get your key \href{https://mercury.postlight.com}{here}.} +} +\value{ +\code{data.frame} +} +\description{ +Mercury takes any web article and returns only the relevant content — headline, author, +body text, relevant images and more — free from any clutter. +} diff --git a/tests/test-all.R b/tests/test-all.R new file mode 100644 index 0000000..6da9aee --- /dev/null +++ b/tests/test-all.R @@ -0,0 +1,2 @@ +library(testthat) +test_check("hgr") diff --git a/tests/testthat/test-hgr.R b/tests/testthat/test-hgr.R new file mode 100644 index 0000000..ab6f62f --- /dev/null +++ b/tests/testthat/test-hgr.R @@ -0,0 +1,6 @@ +context("basic functionality") +test_that("we can do something", { + + #expect_that(some_function(), is_a("data.frame")) + +})