commit f1d982ea950969bcf7acca3233059f6735db51c6 Author: Bob Rudis Date: Sat Jan 10 20:26:53 2015 -0500 initial commit diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..23a3633 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,4 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^\.travis\.yml$ +^.*md$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..60332de --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.Rproj.user +.Rhistory +.RData +.Rproj +src/*.o +src/*.so +src/*.dll diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..cf8c99a --- /dev/null +++ b/.travis.yml @@ -0,0 +1,23 @@ +language: c + +before_install: + - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh + - chmod 755 ./travis-tool.sh + - ./travis-tool.sh bootstrap + +install: + - ./travis-tool.sh install_github plyr + - ./travis-tool.sh install_deps + +script: ./travis-tool.sh run_tests + +on_failure: + - ./travis-tool.sh dump_logs + +branches: + except: + - /-expt$/ + notifications: + email: + on_success: change + on_failure: change diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..df397e9 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,18 @@ +Package: cdcfluview +Type: Package +Title: cdcfluview is package that retrieves the data behind the CDC's FluView portal +Version: 0.1 +Date: 2015-01-10 +Author: Bob Rudis (@hrbrmstr) +Maintainer: Bob Rudis +Description: The CDC's FluView is a Flash portal and the only way to get flu season + data is to use GUI controls, making it tedious to retrieve updates. This package + uses the same API the portal does to programmatically retrieve data. +URL: http://github.com/hrbrmstr/cdcfluview +BugReports: https://github.com/hrbrmstr/cdcfluview/issues +License: MIT + file LICENSE +Suggests: + testthat +Depends: + R (>= 3.0.0), + httr (>= 0.3.0) \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1b30c7e --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2015 +COPYRIGHT HOLDER: Bob Rudis diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..eac7d32 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,4 @@ +# Generated by roxygen2 (4.1.0): do not edit by hand + +export(get_flu_data) +import(httr) diff --git a/R/cdcfluview-package.R b/R/cdcfluview-package.R new file mode 100644 index 0000000..6149a32 --- /dev/null +++ b/R/cdcfluview-package.R @@ -0,0 +1,6 @@ +#' A package to retrive data behind the CDC FluView portal +#' @name cdcfluview +#' @docType package +#' @author Bob Rudis (@@hrbrmstr) +#' @import httr +NULL diff --git a/R/cdcfluview.R b/R/cdcfluview.R new file mode 100644 index 0000000..e161692 --- /dev/null +++ b/R/cdcfluview.R @@ -0,0 +1,87 @@ +# http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html + +#' Retrieve CDC flu data +#' +#' Uses the data source from the CDC FluView \url{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html} +#' and provides flu reporting data as either a single data frame or a list +#' of data frames (depending on whether either WHO NREVSS or ILINet - or both - is chosen) +#' +#' @param region one of "\code{hhs}", "\code{census}", "\code{national}" +#' @param sub_region depends on the \code{region_type}.\cr +#' For "\code{national}", the \code{sub_region} should be \code{NA}.\cr +#' For "\code{hhs}", should be a vector between \code{1:10}.\cr +#' For "\code{census}", should be a vector between \code{1:9} +#' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" or "\code{all}" (for both) +#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu seasn 2014-2015) +#' @return If only a single \code{data_source} is specified, then a single \code{data.frame} is +#' returned, otherwise a named list with each \code{data.frame} is returned. +#' @export +#' @examples \dontrun{ +#' flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014) +#' } +get_flu_data <- function(region="hhs", sub_region=1:10, + data_source="ilinet", years=2014) { + + region <- tolower(region) + data_source <- tolower(data_source) + + if (!(region %in% c("hhs", "census", "national"))) + stop("Error: region must be one of hhs, census or national") + + if (length(region) != 1) + stop("Error: can only select one region") + + if (region=="national") sub_region = "" + + if ((region=="hhs") && !all(sub_region %in% 1:10)) + stop("Error: sub_region values must fall between 1:10 when region is 'hhs'") + + if ((region=="census") && !all(sub_region %in% 1:19)) + stop("Error: sub_region values must fall between 1:10 when region is 'census'") + + if (!all(data_source %in% c("who", "ilinet"))) + stop("Error: data_source must be either 'who', 'ilinet' or both") + + if (any(years < 1997)) + stop("Error: years should be > 1997") + + years <- years - 1960 + + reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3)[[region]]) + data_source <- gsub("who", "WHO_NREVSS", data_source) + data_source <- gsub("ilinet", "ILINet", data_source) + + params <- list(SubRegionsList=sub_region, + DataSources=data_source, + RegionID=reg, + SeasonsList=years) + + out_file <- tempfile(fileext=".zip") + + tmp <- POST("http://gis.cdc.gov/grasp/fluview/FluViewPhase2CustomDownload.ashx", + body=params, + write_disk(out_file)) + + stop_for_status(tmp) + + if (!(file.exists(out_file))) + stop("Error: cannot process downloaded data") + + out_dir <- tempdir() + + files <- unzip(out_file, exdir=out_dir, overwrite=TRUE) + + file_list <- lapply(files, function(x) { + ct <- ifelse(grepl("who", x, ignore.case=TRUE), 0, 1) + read.csv(x, header=TRUE, skip=ct, stringsAsFactors=FALSE) + }) + + names(file_list) <- substr(basename(files), 1, 3) + + if (length(file_list) == 1) { + return(file_list[[1]]) + } else { + return(file_list) + } + +} diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..3022885 --- /dev/null +++ b/README.Rmd @@ -0,0 +1,67 @@ +--- +title: "README" +author: "Bob Rudis" +date: January 10, 2015 +output: + md_document: + variant: markdown_github +--- + +The CDC's FluView is a Flash portal and the only way to get flu season +data is to use GUI controls, making it tedious to retrieve updates. This package +uses the same API the portal does to programmatically retrieve data. + +The following functions are implemented: + +- `get_flu_data` : retrieve flu data + +The following data sets are included: + +### News + +- Version 0.1 released + +### Installation + +```{r eval=FALSE} +devtools::install_github("hrbrmstr/cdcfluview") +``` + +```{r echo=FALSE, message=FALSE, warning=FALSE, error=FALSE} +options(width=120) +``` + +### Usage + +```{r} +library(cdcfluview) +library(ggplot2) + +# current verison +packageVersion("cdcfluview") + +flu <- get_flu_data("hhs", sub_region=1:10, "ilinet", years=2014) + +dplyr::glimpse(flu) + +gg <- ggplot(flu, aes(x=WEEK, y=X..WEIGHTED.ILI, group=REGION)) +gg <- gg + geom_line() +gg <- gg + facet_wrap(~REGION, ncol=2) +gg <- gg + theme_bw() +``` + +```{r echo=FALSE, fig.height=10, fig.width=6} +gg +``` + +### Test Results + +```{r} +library(cdcfluview) +library(testthat) + +date() + +test_dir("tests/") +``` + diff --git a/README.md b/README.md new file mode 100644 index 0000000..72b98ab --- /dev/null +++ b/README.md @@ -0,0 +1,83 @@ +The CDC's FluView is a Flash portal and the only way to get flu season data is to use GUI controls, making it tedious to retrieve updates. This package uses the same API the portal does to programmatically retrieve data. + +The following functions are implemented: + +- `get_flu_data` : retrieve flu data + +The following data sets are included: + +### News + +- Version 0.1 released + +### Installation + +``` r +devtools::install_github("hrbrmstr/cdcfluview") +``` + +### Usage + +``` r +library(cdcfluview) +``` + + ## Loading required package: httr + +``` r +library(ggplot2) + +# current verison +packageVersion("cdcfluview") +``` + + ## [1] '0.1' + +``` r +flu <- get_flu_data("hhs", sub_region=1:10, "ilinet", years=2014) + +dplyr::glimpse(flu) +``` + + ## Variables: + ## $ REGION.TYPE (chr) "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions",... + ## $ REGION (chr) "Region 1", "Region 2", "Region 3", "Region 4", "Region 5", "Region 6", "Region 7", "Regi... + ## $ YEAR (int) 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,... + ## $ WEEK (int) 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 4... + ## $ ILITOTAL (int) 352, 2239, 1689, 1173, 1083, 1843, 217, 348, 1201, 61, 386, 2108, 1735, 1501, 1117, 2163,... + ## $ TOTAL.PATIENTS (int) 50896, 134096, 128589, 127408, 106896, 103717, 50032, 36991, 85312, 10868, 50711, 129275,... + ## $ NUM..OF.PROVIDERS (int) 142, 253, 242, 299, 266, 236, 83, 117, 227, 53, 148, 242, 238, 305, 277, 246, 83, 114, 24... + ## $ X..WEIGHTED.ILI (dbl) 0.8825503, 1.8170454, 1.2055377, 0.8357537, 0.7384711, 1.8292926, 0.6895413, 0.6733888, 1... + ## $ X.UNWEIGHTED.ILI (dbl) 0.6916064, 1.6696993, 1.3134872, 0.9206643, 1.0131343, 1.7769507, 0.4337224, 0.9407694, 1... + ## $ AGE.0.4 (int) 101, 872, 395, 330, 358, 465, 50, 82, 261, 22, 109, 837, 404, 353, 339, 560, 57, 58, 281,... + ## $ AGE.5.24 (int) 185, 758, 627, 530, 400, 710, 97, 152, 532, 30, 199, 675, 664, 763, 443, 807, 124, 146, 5... + ## $ AGE.25.64 (lgl) NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N... + ## $ AGE.25.49 (int) 44, 351, 451, 187, 181, 469, 42, 87, 202, 7, 37, 338, 461, 248, 182, 509, 54, 87, 216, 20... + ## $ AGE.50.64 (int) 13, 150, 126, 80, 80, 121, 15, 19, 101, 1, 24, 148, 131, 73, 105, 187, 17, 23, 117, 10, 2... + ## $ AGE.65 (int) 9, 108, 90, 46, 64, 78, 13, 8, 105, 1, 17, 110, 75, 64, 48, 100, 11, 12, 97, 3, 8, 108, 8... + +``` r +gg <- ggplot(flu, aes(x=WEEK, y=X..WEIGHTED.ILI, group=REGION)) +gg <- gg + geom_line() +gg <- gg + facet_wrap(~REGION, ncol=2) +gg <- gg + theme_bw() +``` + +![](README_files/figure-markdown_github/unnamed-chunk-4-1.png) + +### Test Results + +``` r +library(cdcfluview) +library(testthat) + +date() +``` + + ## [1] "Sat Jan 10 20:25:05 2015" + +``` r +test_dir("tests/") +``` + + ## basic functionality : diff --git a/README_files/figure-markdown_github/unnamed-chunk-4-1.png b/README_files/figure-markdown_github/unnamed-chunk-4-1.png new file mode 100644 index 0000000..49fb31e Binary files /dev/null and b/README_files/figure-markdown_github/unnamed-chunk-4-1.png differ diff --git a/cdcfluview.Rproj b/cdcfluview.Rproj new file mode 100644 index 0000000..446d9e1 --- /dev/null +++ b/cdcfluview.Rproj @@ -0,0 +1,21 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageBuildArgs: --resave-data +PackageRoxygenize: rd,collate,namespace diff --git a/man/cdcfluview.Rd b/man/cdcfluview.Rd new file mode 100644 index 0000000..5556ab6 --- /dev/null +++ b/man/cdcfluview.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2 (4.1.0): do not edit by hand +% Please edit documentation in R/cdcfluview-package.R +\docType{package} +\name{cdcfluview} +\alias{cdcfluview} +\alias{cdcfluview-package} +\title{A package to retrive data behind the CDC FluView portal} +\description{ +A package to retrive data behind the CDC FluView portal +} +\author{ +Bob Rudis (@hrbrmstr) +} + diff --git a/man/get_flu_data.Rd b/man/get_flu_data.Rd new file mode 100644 index 0000000..e36e7bb --- /dev/null +++ b/man/get_flu_data.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2 (4.1.0): do not edit by hand +% Please edit documentation in R/cdcfluview.R +\name{get_flu_data} +\alias{get_flu_data} +\title{Retrieve CDC flu data} +\usage{ +get_flu_data(region = "hhs", sub_region = 1:10, data_source = "ilinet", + years = 2014) +} +\arguments{ +\item{region}{one of "\code{hhs}", "\code{census}", "\code{national}"} + +\item{sub_region}{depends on the \code{region_type}.\cr +For "\code{national}", the \code{sub_region} should be \code{NA}.\cr +For "\code{hhs}", should be a vector between \code{1:10}.\cr +For "\code{census}", should be a vector between \code{1:9}} + +\item{data_source}{either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" or "\code{all}" (for both)} + +\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC flu seasn 2014-2015)} +} +\value{ +If only a single \code{data_source} is specified, then a single \code{data.frame} is + returned, otherwise a named list with each \code{data.frame} is returned. +} +\description{ +Uses the data source from the CDC FluView \url{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html} +and provides flu reporting data as either a single data frame or a list +of data frames (depending on whether either WHO NREVSS or ILINet - or both - is chosen) +} +\examples{ +\dontrun{ +flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014) +} +} + diff --git a/tests/test-all.R b/tests/test-all.R new file mode 100644 index 0000000..86f401f --- /dev/null +++ b/tests/test-all.R @@ -0,0 +1,2 @@ +library(testthat) +test_check("cdcfluview") diff --git a/tests/testthat/test-cdcfluview.R b/tests/testthat/test-cdcfluview.R new file mode 100644 index 0000000..ab6f62f --- /dev/null +++ b/tests/testthat/test-cdcfluview.R @@ -0,0 +1,6 @@ +context("basic functionality") +test_that("we can do something", { + + #expect_that(some_function(), is_a("data.frame")) + +})