From 230a8856650b54ea06d92c4229a43029fb367666 Mon Sep 17 00:00:00 2001 From: Bob Rudis Date: Sat, 8 Aug 2015 14:09:49 -0400 Subject: [PATCH] v0.3.0 --- R/get_flu_data.r | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ R/get_state_data.r | 44 +++++++++++++++++++++++++++ REget_flu_data.r | 87 ------------------------------------------------------ REget_state_data.r | 44 --------------------------- 4 files changed, 131 insertions(+), 131 deletions(-) create mode 100644 R/get_flu_data.r create mode 100644 R/get_state_data.r delete mode 100644 REget_flu_data.r delete mode 100644 REget_state_data.r diff --git a/R/get_flu_data.r b/R/get_flu_data.r new file mode 100644 index 0000000..5f2182c --- /dev/null +++ b/R/get_flu_data.r @@ -0,0 +1,87 @@ +# http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html + +#' Retrieve CDC flu data +#' +#' Uses the data source from the CDC FluView \url{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html} +#' and provides flu reporting data as either a single data frame or a list +#' of data frames (depending on whether either WHO NREVSS or ILINet - or both - is chosen) +#' +#' @param region one of "\code{hhs}", "\code{census}", "\code{national}" +#' @param sub_region depends on the \code{region_type}.\cr +#' For "\code{national}", the \code{sub_region} should be \code{NA}.\cr +#' For "\code{hhs}", should be a vector between \code{1:10}.\cr +#' For "\code{census}", should be a vector between \code{1:9} +#' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" or "\code{all}" (for both) +#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015) +#' @return If only a single \code{data_source} is specified, then a single \code{data.frame} is +#' returned, otherwise a named list with each \code{data.frame} is returned. +#' @export +#' @examples \dontrun{ +#' flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014) +#' } +get_flu_data <- function(region="hhs", sub_region=1:10, + data_source="ilinet", years=2014) { + + region <- tolower(region) + data_source <- tolower(data_source) + + if (!(region %in% c("hhs", "census", "national"))) + stop("Error: region must be one of hhs, census or national") + + if (length(region) != 1) + stop("Error: can only select one region") + + if (region=="national") sub_region = "" + + if ((region=="hhs") && !all(sub_region %in% 1:10)) + stop("Error: sub_region values must fall between 1:10 when region is 'hhs'") + + if ((region=="census") && !all(sub_region %in% 1:19)) + stop("Error: sub_region values must fall between 1:10 when region is 'census'") + + if (!all(data_source %in% c("who", "ilinet"))) + stop("Error: data_source must be either 'who', 'ilinet' or both") + + if (any(years < 1997)) + stop("Error: years should be > 1997") + + years <- years - 1960 + + reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3)[[region]]) + data_source <- gsub("who", "WHO_NREVSS", data_source) + data_source <- gsub("ilinet", "ILINet", data_source) + + params <- list(SubRegionsList=paste0(sub_region, collapse=","), + DataSources=data_source, + RegionID=reg, + SeasonsList=paste0(years, collapse=",")) + + out_file <- tempfile(fileext=".zip") + + tmp <- POST("http://gis.cdc.gov/grasp/fluview/FluViewPhase2CustomDownload.ashx", + body=params, + write_disk(out_file)) + + stop_for_status(tmp) + + if (!(file.exists(out_file))) + stop("Error: cannot process downloaded data") + + out_dir <- tempdir() + + files <- unzip(out_file, exdir=out_dir, overwrite=TRUE) + + file_list <- pblapply(files, function(x) { + ct <- ifelse(grepl("who", x, ignore.case=TRUE), 0, 1) + read.csv(x, header=TRUE, skip=ct, stringsAsFactors=FALSE) + }) + + names(file_list) <- substr(basename(files), 1, 3) + + if (length(file_list) == 1) { + return(file_list[[1]]) + } else { + return(file_list) + } + +} diff --git a/R/get_state_data.r b/R/get_state_data.r new file mode 100644 index 0000000..606a914 --- /dev/null +++ b/R/get_state_data.r @@ -0,0 +1,44 @@ + +#' Retrieves the state-level data from the CDC's FluView Portal +#' +#' Uses the data source from the CDC' State-levelFluView \url{http://gis.cdc.gov/grasp/fluview/main.html} +#' and provides state flu reporting data as a single data frame +#' +#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015) +#' @return A \code{data.frame} of state-level data for the specified seasons +#' @export +#' @examples \dontrun{ +#' get_state_dat(2014) +#' get_state_data(c(2013, 2014)) +#' } +get_state_data <- function(years=2014) { + + if (any(years < 1997)) + stop("Error: years should be > 1997") + + years <- years - 1960 + + out_file <- tempfile(fileext=".zip") + + params <- list(EndMMWRID=0, + StartMMWRID=0, + QueryType=1, + DataMode="STATE", + SeasonsList=paste0(years, collapse=",")) + + tmp <- POST("http://gis.cdc.gov/grasp/fluview/FluViewPhase1CustomDownload.ashx", + body=params, + write_disk(out_file)) + + stop_for_status(tmp) + + if (!(file.exists(out_file))) + stop("Error: cannot process downloaded data") + + out_dir <- tempdir() + + files <- unzip(out_file, exdir=out_dir, overwrite=TRUE) + + read.csv(files, header=TRUE, stringsAsFactors=FALSE) + +} diff --git a/REget_flu_data.r b/REget_flu_data.r deleted file mode 100644 index 5f2182c..0000000 --- a/REget_flu_data.r +++ /dev/null @@ -1,87 +0,0 @@ -# http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html - -#' Retrieve CDC flu data -#' -#' Uses the data source from the CDC FluView \url{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html} -#' and provides flu reporting data as either a single data frame or a list -#' of data frames (depending on whether either WHO NREVSS or ILINet - or both - is chosen) -#' -#' @param region one of "\code{hhs}", "\code{census}", "\code{national}" -#' @param sub_region depends on the \code{region_type}.\cr -#' For "\code{national}", the \code{sub_region} should be \code{NA}.\cr -#' For "\code{hhs}", should be a vector between \code{1:10}.\cr -#' For "\code{census}", should be a vector between \code{1:9} -#' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" or "\code{all}" (for both) -#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015) -#' @return If only a single \code{data_source} is specified, then a single \code{data.frame} is -#' returned, otherwise a named list with each \code{data.frame} is returned. -#' @export -#' @examples \dontrun{ -#' flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014) -#' } -get_flu_data <- function(region="hhs", sub_region=1:10, - data_source="ilinet", years=2014) { - - region <- tolower(region) - data_source <- tolower(data_source) - - if (!(region %in% c("hhs", "census", "national"))) - stop("Error: region must be one of hhs, census or national") - - if (length(region) != 1) - stop("Error: can only select one region") - - if (region=="national") sub_region = "" - - if ((region=="hhs") && !all(sub_region %in% 1:10)) - stop("Error: sub_region values must fall between 1:10 when region is 'hhs'") - - if ((region=="census") && !all(sub_region %in% 1:19)) - stop("Error: sub_region values must fall between 1:10 when region is 'census'") - - if (!all(data_source %in% c("who", "ilinet"))) - stop("Error: data_source must be either 'who', 'ilinet' or both") - - if (any(years < 1997)) - stop("Error: years should be > 1997") - - years <- years - 1960 - - reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3)[[region]]) - data_source <- gsub("who", "WHO_NREVSS", data_source) - data_source <- gsub("ilinet", "ILINet", data_source) - - params <- list(SubRegionsList=paste0(sub_region, collapse=","), - DataSources=data_source, - RegionID=reg, - SeasonsList=paste0(years, collapse=",")) - - out_file <- tempfile(fileext=".zip") - - tmp <- POST("http://gis.cdc.gov/grasp/fluview/FluViewPhase2CustomDownload.ashx", - body=params, - write_disk(out_file)) - - stop_for_status(tmp) - - if (!(file.exists(out_file))) - stop("Error: cannot process downloaded data") - - out_dir <- tempdir() - - files <- unzip(out_file, exdir=out_dir, overwrite=TRUE) - - file_list <- pblapply(files, function(x) { - ct <- ifelse(grepl("who", x, ignore.case=TRUE), 0, 1) - read.csv(x, header=TRUE, skip=ct, stringsAsFactors=FALSE) - }) - - names(file_list) <- substr(basename(files), 1, 3) - - if (length(file_list) == 1) { - return(file_list[[1]]) - } else { - return(file_list) - } - -} diff --git a/REget_state_data.r b/REget_state_data.r deleted file mode 100644 index 606a914..0000000 --- a/REget_state_data.r +++ /dev/null @@ -1,44 +0,0 @@ - -#' Retrieves the state-level data from the CDC's FluView Portal -#' -#' Uses the data source from the CDC' State-levelFluView \url{http://gis.cdc.gov/grasp/fluview/main.html} -#' and provides state flu reporting data as a single data frame -#' -#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015) -#' @return A \code{data.frame} of state-level data for the specified seasons -#' @export -#' @examples \dontrun{ -#' get_state_dat(2014) -#' get_state_data(c(2013, 2014)) -#' } -get_state_data <- function(years=2014) { - - if (any(years < 1997)) - stop("Error: years should be > 1997") - - years <- years - 1960 - - out_file <- tempfile(fileext=".zip") - - params <- list(EndMMWRID=0, - StartMMWRID=0, - QueryType=1, - DataMode="STATE", - SeasonsList=paste0(years, collapse=",")) - - tmp <- POST("http://gis.cdc.gov/grasp/fluview/FluViewPhase1CustomDownload.ashx", - body=params, - write_disk(out_file)) - - stop_for_status(tmp) - - if (!(file.exists(out_file))) - stop("Error: cannot process downloaded data") - - out_dir <- tempdir() - - files <- unzip(out_file, exdir=out_dir, overwrite=TRUE) - - read.csv(files, header=TRUE, stringsAsFactors=FALSE) - -}