|
|
@ -9,11 +9,14 @@ |
|
|
|
#' A lookup table between HHS regions and their member states/territories |
|
|
|
#' is provided in \code{\link{hhs_regions}}. |
|
|
|
#' |
|
|
|
#' @param region one of "\code{hhs}", "\code{census}", "\code{national}" |
|
|
|
#' @param region one of "\code{hhs}", "\code{census}", "\code{national}", |
|
|
|
#' "\code{state}" |
|
|
|
#' @param sub_region depends on the \code{region_type}.\cr |
|
|
|
#' For "\code{national}", the \code{sub_region} should be \code{NA}.\cr |
|
|
|
#' For "\code{hhs}", should be a vector between \code{1:10}.\cr |
|
|
|
#' For "\code{census}", should be a vector between \code{1:9} |
|
|
|
#' For "\code{census}", should be a vector between \code{1:9}.\cr |
|
|
|
#' For "\code{state}", should be a vector of state/territory names |
|
|
|
#' or "\code{all}". |
|
|
|
#' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" |
|
|
|
#' or "\code{all}" (for both) |
|
|
|
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC |
|
|
@ -37,13 +40,13 @@ get_flu_data <- function(region="hhs", sub_region=1:10, |
|
|
|
region <- tolower(region) |
|
|
|
data_source <- tolower(data_source) |
|
|
|
|
|
|
|
if (!(region %in% c("hhs", "census", "national"))) |
|
|
|
if (!(region %in% c("hhs", "census", "national", "state"))) |
|
|
|
stop("Error: region must be one of hhs, census or national") |
|
|
|
|
|
|
|
if (length(region) != 1) |
|
|
|
stop("Error: can only select one region") |
|
|
|
|
|
|
|
if (region=="national") sub_region = "" |
|
|
|
if (region=="national") sub_region = 0 |
|
|
|
|
|
|
|
if ((region=="hhs") && !all(sub_region %in% 1:10)) |
|
|
|
stop("Error: sub_region values must fall between 1:10 when region is 'hhs'") |
|
|
@ -57,31 +60,75 @@ get_flu_data <- function(region="hhs", sub_region=1:10, |
|
|
|
if (any(years < 1997)) |
|
|
|
stop("Error: years should be > 1997") |
|
|
|
|
|
|
|
# format the input parameters to fit the CDC API |
|
|
|
|
|
|
|
years <- years - 1960 |
|
|
|
# Match names of states to numbers for API |
|
|
|
if (region == "state") { |
|
|
|
sub_region <- tolower(sub_region) |
|
|
|
|
|
|
|
reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3)[[region]]) |
|
|
|
if (any(sub_region == "all")) { |
|
|
|
sub_region_inpt <- 1:57 |
|
|
|
} else { |
|
|
|
state_match <- data.frame(state = tolower(c(sort(c(datasets::state.name, |
|
|
|
"District of Columbia")), |
|
|
|
"American Samoa", |
|
|
|
"Commonwealth of the Northern Mariana Islands", |
|
|
|
"Puerto Rico", |
|
|
|
"Virgin Islands", |
|
|
|
"New York City", |
|
|
|
"Los Angeles")), |
|
|
|
num = 1:57, |
|
|
|
stringsAsFactors = F) |
|
|
|
|
|
|
|
sub_region_inpt <- state_match$num[state_match$state %in% sub_region] |
|
|
|
|
|
|
|
if (length(sub_region_inpt) == 0) |
|
|
|
stop("Error: no eligible state/territory names provided") |
|
|
|
} |
|
|
|
} else sub_region_inpt <- sub_region |
|
|
|
|
|
|
|
if ("all" %in% data_source) data_source <- c("who", "ilinet") |
|
|
|
# format the input parameters to fit the CDC API |
|
|
|
|
|
|
|
data_source <- gsub("who", "WHO_NREVSS", data_source) |
|
|
|
data_source <- gsub("ilinet", "ILINet", data_source) |
|
|
|
years <- years - 1960 |
|
|
|
|
|
|
|
params <- list(SubRegionsList = paste0(sub_region, collapse=","), |
|
|
|
DataSources = paste0(data_source, collapse=","), |
|
|
|
RegionID = reg, |
|
|
|
SeasonsList = paste0(years, collapse=",")) |
|
|
|
reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3, "state" = 5)[[region]]) |
|
|
|
|
|
|
|
# Format data source |
|
|
|
if (data_source == "who") { |
|
|
|
data_list <- list(list(ID = 0, |
|
|
|
Name = "WHO_NREVSS")) |
|
|
|
} else if (data_source == "ilinet") { |
|
|
|
data_list <- list(list(ID = 1, |
|
|
|
Name = "ILINet")) |
|
|
|
} else data_list <- list(list(ID = 0, |
|
|
|
Name = "WHO_NREVSS"), |
|
|
|
list(ID = 1, |
|
|
|
Name = "ILINet")) |
|
|
|
|
|
|
|
# Format years |
|
|
|
year_list <- lapply(seq_along(years), |
|
|
|
function(x) list(ID = years[x], |
|
|
|
Name = paste(years[x]))) |
|
|
|
|
|
|
|
# Format sub regions |
|
|
|
sub_reg_list <- lapply(seq_along(sub_region_inpt), |
|
|
|
function(x) list(ID = sub_region_inpt[x], |
|
|
|
Name = paste(sub_region_inpt[x]))) |
|
|
|
|
|
|
|
params <- list(AppVersion = "Public", |
|
|
|
DatasourceDT = data_list, |
|
|
|
RegionTypeId = reg, |
|
|
|
SeasonsDT = year_list, |
|
|
|
SubRegionsDT = sub_reg_list) |
|
|
|
|
|
|
|
out_file <- tempfile(fileext=".zip") |
|
|
|
|
|
|
|
# CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs |
|
|
|
|
|
|
|
tmp <- httr::POST("https://gis.cdc.gov/grasp/fluview/FluViewPhase2CustomDownload.ashx", |
|
|
|
tmp <- httr::POST("https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload", |
|
|
|
body = params, |
|
|
|
write_disk(out_file)) |
|
|
|
encode = "json", |
|
|
|
httr::write_disk(out_file)) |
|
|
|
|
|
|
|
stop_for_status(tmp) |
|
|
|
httr::stop_for_status(tmp) |
|
|
|
|
|
|
|
if (!(file.exists(out_file))) |
|
|
|
stop("Error: cannot process downloaded data") |
|
|
@ -97,7 +144,17 @@ get_flu_data <- function(region="hhs", sub_region=1:10, |
|
|
|
suppressMessages(readr::read_csv(x, skip=ct)) |
|
|
|
}) -> file_list |
|
|
|
|
|
|
|
names(file_list) <- substr(basename(files), 1, 3) |
|
|
|
names(file_list) <- substr(basename(files), 1, nchar(basename(files)) - 4) |
|
|
|
|
|
|
|
# If data are missing, X causes numeric columns to be read as character |
|
|
|
purrr::map(file_list, function(x) { |
|
|
|
# Create list of columns that should be numeric - exclude character columns |
|
|
|
cols <- which(!colnames(x) %in% c("REGION", "REGION TYPE", |
|
|
|
"SEASON_DESCRIPTION")) |
|
|
|
suppressWarnings(x[cols] <- purrr::map(x[cols], as.numeric)) |
|
|
|
return(x) |
|
|
|
}) -> file_list |
|
|
|
|
|
|
|
|
|
|
|
# Depending on the parameters, there could be more than one |
|
|
|
# file returned. When there's only one, return a more usable |
|
|
@ -128,3 +185,4 @@ get_flu_data <- function(region="hhs", sub_region=1:10, |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|