diff --git a/DESCRIPTION b/DESCRIPTION index a413b06..5cbe88c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: cdcfluview Type: Package Title: cdcfluview is package that retrieves the data behind the CDC's FluView portal -Version: 0.2.1 -Date: 2015-01-11 +Version: 0.3 +Date: 2015-08-07 Author: Bob Rudis (@hrbrmstr) Maintainer: Bob Rudis Description: The CDC's FluView is a Flash portal and the only way to get flu season @@ -13,6 +13,8 @@ BugReports: https://github.com/hrbrmstr/cdcfluview/issues License: MIT + file LICENSE Suggests: testthat +Imports: + httr (>= 0.3.0), + pbapply Depends: - R (>= 3.0.0), - httr (>= 0.5.0) \ No newline at end of file + R (>= 3.0.0) diff --git a/NAMESPACE b/NAMESPACE index 8a301ea..c2e755d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ -# Generated by roxygen2 (4.1.0.9000): do not edit by hand +# Generated by roxygen2 (4.1.1): do not edit by hand export(get_flu_data) export(get_state_data) import(httr) +import(pbapply) diff --git a/R/cdcfluview-package.R b/R/cdcfluview-package.R index 6149a32..f8558ff 100644 --- a/R/cdcfluview-package.R +++ b/R/cdcfluview-package.R @@ -2,5 +2,5 @@ #' @name cdcfluview #' @docType package #' @author Bob Rudis (@@hrbrmstr) -#' @import httr +#' @import httr pbapply NULL diff --git a/README.Rmd b/README.Rmd index c2c9185..4f0fbdb 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,7 +1,7 @@ --- title: "README" author: "Bob Rudis" -date: January 11, 2015 +date: August 7, 2015 output: md_document: variant: markdown_github @@ -24,9 +24,10 @@ The following data sets are included: ### News -- Version 0.1 released -- Version 0.2 released : added state-level data retrieval +- Version 0.3 released : fix for the CDC API (it changed how year & region params are encoded in the request) - Version 0.2.1 released : bumped up `httr` version # requirement in `DESCRIPTION` (via Issue [1](https://github.com/hrbrmstr/cdcfluview/issues/1)) +- Version 0.2 released : added state-level data retrieval +- Version 0.1 released ### Installation diff --git a/README.md b/README.md index 78b2ca3..6830441 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,10 @@ The following data sets are included: ### News -- Version 0.1 released -- Version 0.2 released : added state-level data retrieval +- Version 0.3 released : fix for the CDC API (it changed how year & region params are encoded in the request) - Version 0.2.1 released : bumped up `httr` version \# requirement in `DESCRIPTION` (via Issue [1](https://github.com/hrbrmstr/cdcfluview/issues/1)) +- Version 0.2 released : added state-level data retrieval +- Version 0.1 released ### Installation @@ -36,30 +37,30 @@ suppressPackageStartupMessages(library(magrittr)) packageVersion("cdcfluview") ``` - ## [1] '0.2.1' + ## [1] '0.3' ``` r flu <- get_flu_data("hhs", sub_region=1:10, "ilinet", years=2014) glimpse(flu) ``` - ## Observations: 140 + ## Observations: 440 ## Variables: ## $ REGION.TYPE (chr) "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions",... ## $ REGION (chr) "Region 1", "Region 2", "Region 3", "Region 4", "Region 5", "Region 6", "Region 7", "Regi... ## $ YEAR (int) 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,... ## $ WEEK (int) 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 4... - ## $ ILITOTAL (int) 352, 2239, 1689, 1173, 1083, 1843, 217, 348, 1201, 61, 386, 2108, 1735, 1501, 1117, 2163,... - ## $ TOTAL.PATIENTS (int) 50896, 134096, 128589, 127408, 106896, 103717, 50032, 36991, 85312, 10868, 50711, 129275,... - ## $ NUM..OF.PROVIDERS (int) 142, 253, 242, 299, 266, 236, 83, 117, 227, 53, 148, 242, 238, 305, 277, 246, 83, 114, 24... - ## $ X..WEIGHTED.ILI (dbl) 0.8825503, 1.8170454, 1.2055377, 0.8357537, 0.7384711, 1.8292926, 0.6895413, 0.6733888, 1... - ## $ X.UNWEIGHTED.ILI (dbl) 0.6916064, 1.6696993, 1.3134872, 0.9206643, 1.0131343, 1.7769507, 0.4337224, 0.9407694, 1... - ## $ AGE.0.4 (int) 101, 872, 395, 330, 358, 465, 50, 82, 261, 22, 109, 837, 404, 353, 339, 560, 57, 58, 281,... - ## $ AGE.5.24 (int) 185, 758, 627, 530, 400, 710, 97, 152, 532, 30, 199, 675, 664, 763, 443, 807, 124, 146, 5... + ## $ ILITOTAL (int) 352, 2254, 1696, 1182, 1083, 1844, 220, 348, 1329, 61, 386, 2129, 1747, 1517, 1117, 2165,... + ## $ TOTAL.PATIENTS (int) 51688, 137157, 129302, 130419, 107261, 103975, 50272, 37014, 88421, 11172, 51169, 132513,... + ## $ NUM..OF.PROVIDERS (int) 147, 285, 245, 305, 267, 241, 84, 117, 237, 55, 151, 274, 241, 310, 277, 250, 84, 114, 24... + ## $ X..WEIGHTED.ILI (dbl) 0.8306102, 1.7759176, 1.1477759, 0.8167958, 0.7370374, 1.8252298, 0.6970221, 0.6731439, 1... + ## $ X.UNWEIGHTED.ILI (dbl) 0.6810091, 1.6433722, 1.3116580, 0.9063097, 1.0096867, 1.7735032, 0.4376194, 0.9401848, 1... + ## $ AGE.0.4 (int) 101, 869, 395, 333, 358, 465, 50, 82, 310, 22, 109, 837, 404, 356, 339, 560, 57, 58, 335,... + ## $ AGE.5.24 (int) 185, 757, 629, 536, 400, 711, 98, 152, 577, 30, 199, 677, 670, 774, 443, 809, 124, 146, 5... ## $ AGE.25.64 (lgl) NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N... - ## $ AGE.25.49 (int) 44, 351, 451, 187, 181, 469, 42, 87, 202, 7, 37, 338, 461, 248, 182, 509, 54, 87, 216, 20... - ## $ AGE.50.64 (int) 13, 150, 126, 80, 80, 121, 15, 19, 101, 1, 24, 148, 131, 73, 105, 187, 17, 23, 117, 10, 2... - ## $ AGE.65 (int) 9, 108, 90, 46, 64, 78, 13, 8, 105, 1, 17, 110, 75, 64, 48, 100, 11, 12, 97, 3, 8, 108, 8... + ## $ AGE.25.49 (int) 44, 363, 455, 187, 181, 469, 43, 87, 220, 7, 37, 349, 466, 249, 182, 509, 56, 87, 225, 20... + ## $ AGE.50.64 (int) 13, 157, 127, 80, 80, 121, 15, 19, 110, 1, 24, 151, 132, 74, 105, 187, 18, 23, 118, 10, 2... + ## $ AGE.65 (int) 9, 108, 90, 46, 64, 78, 14, 8, 112, 1, 17, 115, 75, 64, 48, 100, 14, 12, 103, 3, 9, 110, ... ``` r state_flu <- get_state_data() @@ -157,10 +158,13 @@ suppressPackageStartupMessages(library(testthat)) date() ``` - ## [1] "Mon Jan 12 14:39:51 2015" + ## [1] "Sat Aug 8 14:09:26 2015" ``` r test_dir("tests/") ``` - ## basic functionality : + ## testthat results ======================================================================================================== + ## OK: 0 SKIPPED: 0 FAILED: 0 + ## + ## DONE diff --git a/README_files/figure-markdown_github/unnamed-chunk-4-1.png b/README_files/figure-markdown_github/unnamed-chunk-4-1.png index 49fb31e..146d111 100644 Binary files a/README_files/figure-markdown_github/unnamed-chunk-4-1.png and b/README_files/figure-markdown_github/unnamed-chunk-4-1.png differ diff --git a/README_files/figure-markdown_github/unnamed-chunk-6-1.png b/README_files/figure-markdown_github/unnamed-chunk-6-1.png index d48ec19..e001fb9 100644 Binary files a/README_files/figure-markdown_github/unnamed-chunk-6-1.png and b/README_files/figure-markdown_github/unnamed-chunk-6-1.png differ diff --git a/README_files/figure-markdown_github/unnamed-chunk-8-1.png b/README_files/figure-markdown_github/unnamed-chunk-8-1.png index c7cec2c..2e217a1 100644 Binary files a/README_files/figure-markdown_github/unnamed-chunk-8-1.png and b/README_files/figure-markdown_github/unnamed-chunk-8-1.png differ diff --git a/R/cdcfluview.R b/REget_flu_data.r similarity index 68% rename from R/cdcfluview.R rename to REget_flu_data.r index d510d76..5f2182c 100644 --- a/R/cdcfluview.R +++ b/REget_flu_data.r @@ -12,7 +12,7 @@ #' For "\code{hhs}", should be a vector between \code{1:10}.\cr #' For "\code{census}", should be a vector between \code{1:9} #' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" or "\code{all}" (for both) -#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu seasn 2014-2015) +#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015) #' @return If only a single \code{data_source} is specified, then a single \code{data.frame} is #' returned, otherwise a named list with each \code{data.frame} is returned. #' @export @@ -51,10 +51,10 @@ get_flu_data <- function(region="hhs", sub_region=1:10, data_source <- gsub("who", "WHO_NREVSS", data_source) data_source <- gsub("ilinet", "ILINet", data_source) - params <- list(SubRegionsList=sub_region, + params <- list(SubRegionsList=paste0(sub_region, collapse=","), DataSources=data_source, RegionID=reg, - SeasonsList=years) + SeasonsList=paste0(years, collapse=",")) out_file <- tempfile(fileext=".zip") @@ -71,7 +71,7 @@ get_flu_data <- function(region="hhs", sub_region=1:10, files <- unzip(out_file, exdir=out_dir, overwrite=TRUE) - file_list <- lapply(files, function(x) { + file_list <- pblapply(files, function(x) { ct <- ifelse(grepl("who", x, ignore.case=TRUE), 0, 1) read.csv(x, header=TRUE, skip=ct, stringsAsFactors=FALSE) }) @@ -85,48 +85,3 @@ get_flu_data <- function(region="hhs", sub_region=1:10, } } - -#' Retrieves the state-level data from the CDC's FluView Portal -#' -#' Uses the data source from the CDC' State-levelFluView \url{http://gis.cdc.gov/grasp/fluview/main.html} -#' and provides state flu reporting data as a single data frame -#' -#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu seasn 2014-2015) -#' @return A \code{data.frame} of state-level data for the specified seasons -#' @export -#' @examples \dontrun{ -#' get_state_dat(2014) -#' get_state_data(c(2013,2014)) -#' } -get_state_data <- function(years=2014) { - - if (any(years < 1997)) - stop("Error: years should be > 1997") - - years <- years - 1960 - - out_file <- tempfile(fileext=".zip") - - params <- list(EndMMWRID=0, - StartMMWRID=0, - QueryType=1, - DataMode="STATE", - SeasonsList=years) - - tmp <- POST("http://gis.cdc.gov/grasp/fluview/FluViewPhase1CustomDownload.ashx", - body=params, - write_disk(out_file)) - - stop_for_status(tmp) - - if (!(file.exists(out_file))) - stop("Error: cannot process downloaded data") - - out_dir <- tempdir() - - files <- unzip(out_file, exdir=out_dir, overwrite=TRUE) - - read.csv(files, header=TRUE, stringsAsFactors=FALSE) - -} - diff --git a/REget_state_data.r b/REget_state_data.r new file mode 100644 index 0000000..606a914 --- /dev/null +++ b/REget_state_data.r @@ -0,0 +1,44 @@ + +#' Retrieves the state-level data from the CDC's FluView Portal +#' +#' Uses the data source from the CDC' State-levelFluView \url{http://gis.cdc.gov/grasp/fluview/main.html} +#' and provides state flu reporting data as a single data frame +#' +#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015) +#' @return A \code{data.frame} of state-level data for the specified seasons +#' @export +#' @examples \dontrun{ +#' get_state_dat(2014) +#' get_state_data(c(2013, 2014)) +#' } +get_state_data <- function(years=2014) { + + if (any(years < 1997)) + stop("Error: years should be > 1997") + + years <- years - 1960 + + out_file <- tempfile(fileext=".zip") + + params <- list(EndMMWRID=0, + StartMMWRID=0, + QueryType=1, + DataMode="STATE", + SeasonsList=paste0(years, collapse=",")) + + tmp <- POST("http://gis.cdc.gov/grasp/fluview/FluViewPhase1CustomDownload.ashx", + body=params, + write_disk(out_file)) + + stop_for_status(tmp) + + if (!(file.exists(out_file))) + stop("Error: cannot process downloaded data") + + out_dir <- tempdir() + + files <- unzip(out_file, exdir=out_dir, overwrite=TRUE) + + read.csv(files, header=TRUE, stringsAsFactors=FALSE) + +} diff --git a/man/cdcfluview.Rd b/man/cdcfluview.Rd index 3aef126..534124c 100644 --- a/man/cdcfluview.Rd +++ b/man/cdcfluview.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.0.9000): do not edit by hand +% Generated by roxygen2 (4.1.1): do not edit by hand % Please edit documentation in R/cdcfluview-package.R \docType{package} \name{cdcfluview} diff --git a/man/get_flu_data.Rd b/man/get_flu_data.Rd index c94a617..ac77fe3 100644 --- a/man/get_flu_data.Rd +++ b/man/get_flu_data.Rd @@ -1,5 +1,5 @@ -% Generated by roxygen2 (4.1.0.9000): do not edit by hand -% Please edit documentation in R/cdcfluview.R +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/get_flu_data.r \name{get_flu_data} \alias{get_flu_data} \title{Retrieve CDC flu data} @@ -17,7 +17,7 @@ For "\code{census}", should be a vector between \code{1:9}} \item{data_source}{either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" or "\code{all}" (for both)} -\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC flu seasn 2014-2015)} +\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015)} } \value{ If only a single \code{data_source} is specified, then a single \code{data.frame} is diff --git a/man/get_state_data.Rd b/man/get_state_data.Rd index e356cca..fe52ac4 100644 --- a/man/get_state_data.Rd +++ b/man/get_state_data.Rd @@ -1,5 +1,5 @@ -% Generated by roxygen2 (4.1.0.9000): do not edit by hand -% Please edit documentation in R/cdcfluview.R +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/get_state_data.r \name{get_state_data} \alias{get_state_data} \title{Retrieves the state-level data from the CDC's FluView Portal} @@ -7,7 +7,7 @@ get_state_data(years = 2014) } \arguments{ -\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC flu seasn 2014-2015)} +\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015)} } \value{ A \code{data.frame} of state-level data for the specified seasons @@ -19,7 +19,7 @@ and provides state flu reporting data as a single data frame \examples{ \dontrun{ get_state_dat(2014) -get_state_data(c(2013,2014)) +get_state_data(c(2013, 2014)) } }