Browse Source

initial commit

master
Bob Rudis 9 years ago
parent
commit
79fa745b54
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 5
      .Rbuildignore
  2. 14
      .travis.yml
  3. 25
      CONDUCT.md
  4. 7
      DESCRIPTION
  5. 8
      NAMESPACE
  6. 6
      R/longurl-package.r
  7. 92
      R/longurl.r
  8. 90
      README.Rmd
  9. 90
      README.md
  10. 9
      longurl.Rproj
  11. 38
      man/expand_urls.Rd
  12. 16
      man/known_services.Rd
  13. 14
      man/longurl.Rd
  14. 4
      tests/testthat.R
  15. 14
      tests/testthat/test-longurl.R

5
.Rbuildignore

@ -1,2 +1,7 @@
^.*\.Rproj$
^\.Rproj\.user$
^README\.Rmd$
^README-.*\.png$
^\.travis\.yml$
^CONDUCT\.md$
^README\.md$

14
.travis.yml

@ -0,0 +1,14 @@
# Sample .travis.yml for R projects
language: r
warnings_are_errors: true
sudo: required
env:
global:
- CRAN: http://cran.rstudio.com
notifications:
email:
on_success: change
on_failure: change

25
CONDUCT.md

@ -0,0 +1,25 @@
# Contributor Code of Conduct
As contributors and maintainers of this project, we pledge to respect all people who
contribute through reporting issues, posting feature requests, updating documentation,
submitting pull requests or patches, and other activities.
We are committed to making participation in this project a harassment-free experience for
everyone, regardless of level of experience, gender, gender identity and expression,
sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
Examples of unacceptable behavior by participants include the use of sexual language or
imagery, derogatory comments or personal attacks, trolling, public or private harassment,
insults, or other unprofessional conduct.
Project maintainers have the right and responsibility to remove, edit, or reject comments,
commits, code, wiki edits, issues, and other contributions that are not aligned to this
Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed
from the project team.
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by
opening an issue or contacting one or more of the project maintainers.
This Code of Conduct is adapted from the Contributor Covenant
(http:contributor-covenant.org), version 1.0.0, available at
http://contributor-covenant.org/version/1/0/0/

7
DESCRIPTION

@ -1,8 +1,11 @@
Package: longurl
Title: What the Package Does (one line, title case)
Title: Expand Short URLs using the 'LongURL' API
Version: 0.0.0.9000
Authors@R: c(person("Bob", "Rudis", email = "bob@rudis.net", role = c("aut", "cre")))
Description: What the package does (one paragraph).
Description: Interface to the 'LongURL' API to identify known URL shortener services
and expand vectors of short URLs.
Depends: R (>= 3.2.1)
License: MIT + file LICENSE
LazyData: true
Suggests: testthat
Imports: httr, dplyr, pbapply

8
NAMESPACE

@ -1 +1,7 @@
exportPattern("^[^\\.]")
# Generated by roxygen2 (4.1.1): do not edit by hand
export(expand_urls)
export(known_services)
import(dplyr)
import(httr)
import(pbapply)

6
R/longurl-package.r

@ -0,0 +1,6 @@
#' A package to work with the longurl API
#' @name longurl
#' @docType package
#' @author Bob Rudis (@@hrbrmstr)
#' @import httr dplyr pbapply
NULL

92
R/longurl.r

@ -0,0 +1,92 @@
# the longurl api requires the use of a custom user agent
LONGURL_USER_AGENT <- "longurl-rstats-pkg"
# this is the base endpoint for the longurl API
LONGURL_ENDPOINT <- "http://api.longurl.org/v2/%s"
#' Retrieve all the URL shortener services known to the longurl API
#'
#' @export
#' @examples
#' short_svcs <- known_services()
#' head(short_svcs)
known_services <- function() {
url <- sprintf(LONGURL_ENDPOINT, "services")
resp <- GET(url, query=list(format="json"),
user_agent(LONGURL_USER_AGENT))
warn_for_status(resp)
tmp <- content(resp)
data_frame(domain=as.vector(sapply(tmp, "[[", "domain", USE.NAMES=FALSE)),
regex=as.vector(sapply(tmp, "[[", "regex", USE.NAMES=FALSE)))
}
#' Expand a vector of (short) URLs using the longurl service
#'
#' Pass in a vector of URLs (ostensibly "short" URLs) and receive
#' a \code{data_frame} of the original URLs and expanded URLs.
#'
#' @param urls_to_expand character vector of URLs
#' @param check run an extra \code{HEAD} request on the expanded URL to determine
#' validity. This is an expensive operation, so recommended usage is to run
#' this only on URLs that did not seem to expand.
#' @param warn show any warnings (API or otherwise) as messages
#' @param .progress display a progress bar (generally only useful in
#' interactive sesions)
#' @return \code{data_frame} (compatible with \code{data.frame}) of results
#' with the orignial URLs in \code{orig_url} and expanded URLs in
#' \code{expanded_url})
#' @export
#' @examples
#' test_urls <- c("http://t.co/D4C7aWYIiA",
#' "1.usa.gov/1J6GNoW",
#' "ift.tt/1L2Llfr")
#' big_urls <- expand_urls(test_urls)
#' head(big_urls)
expand_urls <- function(urls_to_expand, check=FALSE, warn=TRUE,
.progress=interactive()) {
doapply <- ifelse(.progress, pbsapply, sapply)
data_frame(orig_url=urls_to_expand,
expanded_url=doapply(urls_to_expand, expand_url,
check=check, warn=warn, USE.NAMES=FALSE))
}
#' the thing that does all the work
#' @noRd
expand_url <- function(url_to_expand, check=FALSE, warn=TRUE) {
# make the API URL
url <- sprintf(LONGURL_ENDPOINT, "expand")
# use the API
resp <- GET(url, query=list(url=url_to_expand,
format="json"),
user_agent(LONGURL_USER_AGENT))
# warn for API errors
if (warn) warn_for_status(resp)
# response object
tmp <- content(resp)
# if bad response and/or API long-url not populated kick back NA
if ((resp$status != 200) | (!(names(tmp) %in% c("long-url")))) return(NA)
# _expensive_ validity check of expanded URL
if (check) {
chk <- HEAD(url_to_expand)
if (warn) warn_for_status(chk)
if (chk$status != 200) return(NA)
}
return(tmp$`long-url`)
}

90
README.Rmd

@ -0,0 +1,90 @@
---
output:
md_document:
variant: markdown_github
---
<!-- README.md is generated from README.Rmd. Please edit that file -->
```{r options, echo = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "README-"
)
```
[![Build Status](https://travis-ci.org/hrbrmstr/longurl.svg)](https://travis-ci.org/hrbrmstr/longurl)
<!-- ![Project Status: Concept - Minimal or no implementation has been done yet.](http://www.repostatus.org/badges/0.1.0/concept.svg)](http://www.repostatus.org/#concept) -->
<!-- [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/longurl)](http://cran.r-project.org/web/packages/longurl)
<!-- ![downloads](http://cranlogs.r-pkg.org/badges/grand-total/longurl) -->
longurl : Expand Short URLs using the 'LongURL' API
This does a bit more than [decode_short_url](https://github.com/geoffjentry/twitteR/blob/master/R/utils.R#L22-L31) from the [twitteR](https://github.com/geoffjentry/twitteR) package since it:
- enables you to get a list of what shortening services [longurl](http://longurl.org/) knows about
- has an option to do a post-expand check with a `HEAD` request to ensure the resource exists (useful when you think longurl just failed to expand)
- has an option to warn on expansion issues
- has an option for progress bars
- works with a vector of URLs
- returns `data_frame`s
The following functions are implemented:
- `expand_urls`: Expand a vector of (short) URLs using the longurl service
- `known_services`: Retrieve all the URL shortener services known to the longurl API
The following data sets are included:
### News
- Version `0.0.0.9000` released
### Installation
```{r install, eval=FALSE}
devtools::install_github("hrbrmstr/longurl")
```
```{r options_2, echo=FALSE, message=FALSE, warning=FALSE, error=FALSE}
options(width=200)
```
### Usage
```{r usage}
library(longurl)
library(dplyr)
# current verison
packageVersion("longurl")
test_urls <- c("http://t.co/D4C7aWYIiA",
"1.usa.gov/1J6GNoW",
"ift.tt/1L2Llfr",
"bit.ly/1GPr5w5",
"http://l.dds.ec/1da152x",
"http://l.rud.is/seven")
head(expand_urls(test_urls), 2)
svcs <- known_services()
filter(svcs, regex != "")
```
### Test Results
```{r test}
library(longurl)
library(testthat)
date()
test_dir("tests/")
```
### Code of Conduct
Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md).
By participating in this project you agree to abide by its terms.

90
README.md

@ -0,0 +1,90 @@
<!-- README.md is generated from README.Rmd. Please edit that file -->
[![Build Status](https://travis-ci.org/hrbrmstr/longurl.svg)](https://travis-ci.org/hrbrmstr/longurl) <!-- ![Project Status: Concept - Minimal or no implementation has been done yet.](http://www.repostatus.org/badges/0.1.0/concept.svg)](http://www.repostatus.org/#concept) --> <!-- [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/longurl)](http://cran.r-project.org/web/packages/longurl)
<!-- ![downloads](http://cranlogs.r-pkg.org/badges/grand-total/longurl) -->
longurl : Expand Short URLs using the 'LongURL' API
This does a bit more than [decode\_short\_url](https://github.com/geoffjentry/twitteR/blob/master/R/utils.R#L22-L31) from the [twitteR](https://github.com/geoffjentry/twitteR) package since it:
- enables you to get a list of what shortening services [longurl](http://longurl.org/) knows about
- has an option to do a post-expand check with a `HEAD` request to ensure the resource exists (useful when you think longurl just failed to expand)
- has an option to warn on expansion issues
- has an option for progress bars
- works with a vector of URLs
- returns `data_frame`s
The following functions are implemented:
- `expand_urls`: Expand a vector of (short) URLs using the longurl service
- `known_services`: Retrieve all the URL shortener services known to the longurl API
The following data sets are included:
### News
- Version `0.0.0.9000` released
### Installation
``` r
devtools::install_github("hrbrmstr/longurl")
```
### Usage
``` r
library(longurl)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#>
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#>
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# current verison
packageVersion("longurl")
#> [1] '0.0.0.9000'
test_urls <- c("http://t.co/D4C7aWYIiA",
"1.usa.gov/1J6GNoW",
"ift.tt/1L2Llfr",
"bit.ly/1GPr5w5",
"http://l.dds.ec/1da152x",
"http://l.rud.is/seven")
head(expand_urls(test_urls), 2)
#> Source: local data frame [2 x 2]
#>
#> orig_url expanded_url
#> 1 http://t.co/D4C7aWYIiA http://www.wired.com/2015/06/airlines-security-hole-grounded-polish-planes/
#> 2 1.usa.gov/1J6GNoW http://democrats.senate.gov/2015/06/22/schedule-for-monday-june-22-2015/
svcs <- known_services()
filter(svcs, regex != "")
#> Source: local data frame [1 x 2]
#>
#> domain regex
#> 1 digg.com http:\\/\\/digg\\.com\\/[^\\/]+$
```
### Test Results
``` r
library(longurl)
library(testthat)
date()
#> [1] "Tue Jun 23 11:29:31 2015"
test_dir("tests/")
#> basic functionality : ..
```
### Code of Conduct
Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.

9
longurl.Rproj

@ -14,3 +14,12 @@ BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
UseSpacesForTab: Yes
NumSpacesForTab: 2
RnwWeave: Sweave
LaTeX: pdfLaTeX
PackageBuildArgs: --resave-data
PackageCheckArgs: --as-cran

38
man/expand_urls.Rd

@ -0,0 +1,38 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/longurl.r
\name{expand_urls}
\alias{expand_urls}
\title{Expand a vector of (short) URLs using the longurl service}
\usage{
expand_urls(urls_to_expand, check = FALSE, warn = TRUE,
.progress = interactive())
}
\arguments{
\item{urls_to_expand}{character vector of URLs}
\item{check}{run an extra \code{HEAD} request on the expanded URL to determine
validity. This is an expensive operation, so recommended usage is to run
this only on URLs that did not seem to expand.}
\item{warn}{show any warnings (API or otherwise) as messages}
\item{.progress}{display a progress bar (generally only useful in
interactive sesions)}
}
\value{
\code{data_frame} (compatible with \code{data.frame}) of results
with the orignial URLs in \code{orig_url} and expanded URLs in
\code{expanded_url})
}
\description{
Pass in a vector of URLs (ostensibly "short" URLs) and receive
a \code{data_frame} of the original URLs and expanded URLs.
}
\examples{
test_urls <- c("http://t.co/D4C7aWYIiA",
"1.usa.gov/1J6GNoW",
"ift.tt/1L2Llfr")
big_urls <- expand_urls(test_urls)
head(big_urls)
}

16
man/known_services.Rd

@ -0,0 +1,16 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/longurl.r
\name{known_services}
\alias{known_services}
\title{Retrieve all the URL shortener services known to the longurl API}
\usage{
known_services()
}
\description{
Retrieve all the URL shortener services known to the longurl API
}
\examples{
short_svcs <- known_services()
head(short_svcs)
}

14
man/longurl.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/longurl-package.r
\docType{package}
\name{longurl}
\alias{longurl}
\alias{longurl-package}
\title{A package to work with the longurl API}
\description{
A package to work with the longurl API
}
\author{
Bob Rudis (@hrbrmstr)
}

4
tests/testthat.R

@ -0,0 +1,4 @@
library(testthat)
library(longurl)
test_check("longurl")

14
tests/testthat/test-longurl.R

@ -0,0 +1,14 @@
context("basic functionality")
test_that("the API works", {
test_urls <- c("http://t.co/D4C7aWYIiA",
"1.usa.gov/1J6GNoW",
"ift.tt/1L2Llfr",
"bit.ly/1GPr5w5",
"http://l.dds.ec/1da152x",
"http://l.rud.is/seven")
expect_that(expand_urls(test_urls), is_a("data.frame"))
expect_that(known_services(), is_a("data.frame"))
})
Loading…
Cancel
Save