Browse Source

Update get_flu_data to new CDC API and availability of state level data

pull/10/head
Craig McGowan 7 years ago
parent
commit
da7430bd4e
  1. 4
      DESCRIPTION
  2. 96
      R/get_flu_data.r
  3. 8
      man/get_flu_data.Rd

4
DESCRIPTION

@ -3,8 +3,8 @@ Type: Package
Title: Retrieve U.S. Flu Season Data from the CDC FluView Portal Title: Retrieve U.S. Flu Season Data from the CDC FluView Portal
Version: 0.5.2 Version: 0.5.2
Date: 2017-03-14 Date: 2017-03-14
Author: Bob Rudis (bob@rud.is) Authors@R: c(person("Bob","Rudis", email = "bob@rud.is", role = c("aut", "cre")),
Maintainer: Bob Rudis <bob@rud.is> person("Craig", "McGowan", email = "mcgowan.cj@gmail.com", role = "ctb"))
Encoding: UTF-8 Encoding: UTF-8
Description: The U.S. Centers for Disease Control (CDC) maintains a portal Description: The U.S. Centers for Disease Control (CDC) maintains a portal
<http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html> for <http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html> for

96
R/get_flu_data.r

@ -9,11 +9,14 @@
#' A lookup table between HHS regions and their member states/territories #' A lookup table between HHS regions and their member states/territories
#' is provided in \code{\link{hhs_regions}}. #' is provided in \code{\link{hhs_regions}}.
#' #'
#' @param region one of "\code{hhs}", "\code{census}", "\code{national}" #' @param region one of "\code{hhs}", "\code{census}", "\code{national}",
#' "\code{state}"
#' @param sub_region depends on the \code{region_type}.\cr #' @param sub_region depends on the \code{region_type}.\cr
#' For "\code{national}", the \code{sub_region} should be \code{NA}.\cr #' For "\code{national}", the \code{sub_region} should be \code{NA}.\cr
#' For "\code{hhs}", should be a vector between \code{1:10}.\cr #' For "\code{hhs}", should be a vector between \code{1:10}.\cr
#' For "\code{census}", should be a vector between \code{1:9} #' For "\code{census}", should be a vector between \code{1:9}.\cr
#' For "\code{state}", should be a vector of state/territory names
#' or "\code{all}".
#' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" #' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}"
#' or "\code{all}" (for both) #' or "\code{all}" (for both)
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC #' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC
@ -37,13 +40,13 @@ get_flu_data <- function(region="hhs", sub_region=1:10,
region <- tolower(region) region <- tolower(region)
data_source <- tolower(data_source) data_source <- tolower(data_source)
if (!(region %in% c("hhs", "census", "national"))) if (!(region %in% c("hhs", "census", "national", "state")))
stop("Error: region must be one of hhs, census or national") stop("Error: region must be one of hhs, census or national")
if (length(region) != 1) if (length(region) != 1)
stop("Error: can only select one region") stop("Error: can only select one region")
if (region=="national") sub_region = "" if (region=="national") sub_region = 0
if ((region=="hhs") && !all(sub_region %in% 1:10)) if ((region=="hhs") && !all(sub_region %in% 1:10))
stop("Error: sub_region values must fall between 1:10 when region is 'hhs'") stop("Error: sub_region values must fall between 1:10 when region is 'hhs'")
@ -57,31 +60,75 @@ get_flu_data <- function(region="hhs", sub_region=1:10,
if (any(years < 1997)) if (any(years < 1997))
stop("Error: years should be > 1997") stop("Error: years should be > 1997")
# format the input parameters to fit the CDC API # Match names of states to numbers for API
if (region == "state") {
years <- years - 1960 sub_region <- tolower(sub_region)
reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3)[[region]]) if (any(sub_region == "all")) {
sub_region_inpt <- 1:57
} else {
state_match <- data.frame(state = tolower(c(sort(c(datasets::state.name,
"District of Columbia")),
"American Samoa",
"Commonwealth of the Northern Mariana Islands",
"Puerto Rico",
"Virgin Islands",
"New York City",
"Los Angeles")),
num = 1:57,
stringsAsFactors = F)
sub_region_inpt <- state_match$num[state_match$state %in% sub_region]
if (length(sub_region_inpt) == 0)
stop("Error: no eligible state/territory names provided")
}
} else sub_region_inpt <- sub_region
if ("all" %in% data_source) data_source <- c("who", "ilinet") # format the input parameters to fit the CDC API
data_source <- gsub("who", "WHO_NREVSS", data_source) years <- years - 1960
data_source <- gsub("ilinet", "ILINet", data_source)
params <- list(SubRegionsList = paste0(sub_region, collapse=","), reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3, "state" = 5)[[region]])
DataSources = paste0(data_source, collapse=","),
RegionID = reg, # Format data source
SeasonsList = paste0(years, collapse=",")) if (data_source == "who") {
data_list <- list(list(ID = 0,
Name = "WHO_NREVSS"))
} else if (data_source == "ilinet") {
data_list <- list(list(ID = 1,
Name = "ILINet"))
} else data_list <- list(list(ID = 0,
Name = "WHO_NREVSS"),
list(ID = 1,
Name = "ILINet"))
# Format years
year_list <- lapply(seq_along(years),
function(x) list(ID = years[x],
Name = paste(years[x])))
# Format sub regions
sub_reg_list <- lapply(seq_along(sub_region_inpt),
function(x) list(ID = sub_region_inpt[x],
Name = paste(sub_region_inpt[x])))
params <- list(AppVersion = "Public",
DatasourceDT = data_list,
RegionTypeId = reg,
SeasonsDT = year_list,
SubRegionsDT = sub_reg_list)
out_file <- tempfile(fileext=".zip") out_file <- tempfile(fileext=".zip")
# CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs # CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs
tmp <- httr::POST("https://gis.cdc.gov/grasp/fluview/FluViewPhase2CustomDownload.ashx", tmp <- httr::POST("https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload",
body = params, body = params,
write_disk(out_file)) encode = "json",
httr::write_disk(out_file))
stop_for_status(tmp) httr::stop_for_status(tmp)
if (!(file.exists(out_file))) if (!(file.exists(out_file)))
stop("Error: cannot process downloaded data") stop("Error: cannot process downloaded data")
@ -97,7 +144,17 @@ get_flu_data <- function(region="hhs", sub_region=1:10,
suppressMessages(readr::read_csv(x, skip=ct)) suppressMessages(readr::read_csv(x, skip=ct))
}) -> file_list }) -> file_list
names(file_list) <- substr(basename(files), 1, 3) names(file_list) <- substr(basename(files), 1, nchar(basename(files)) - 4)
# If data are missing, X causes numeric columns to be read as character
purrr::map(file_list, function(x) {
# Create list of columns that should be numeric - exclude character columns
cols <- which(!colnames(x) %in% c("REGION", "REGION TYPE",
"SEASON_DESCRIPTION"))
suppressWarnings(x[cols] <- purrr::map(x[cols], as.numeric))
return(x)
}) -> file_list
# Depending on the parameters, there could be more than one # Depending on the parameters, there could be more than one
# file returned. When there's only one, return a more usable # file returned. When there's only one, return a more usable
@ -128,3 +185,4 @@ get_flu_data <- function(region="hhs", sub_region=1:10,
} }
} }

8
man/get_flu_data.Rd

@ -8,12 +8,15 @@ get_flu_data(region = "hhs", sub_region = 1:10, data_source = "ilinet",
years = as.numeric(format(Sys.Date(), "\%Y"))) years = as.numeric(format(Sys.Date(), "\%Y")))
} }
\arguments{ \arguments{
\item{region}{one of "\code{hhs}", "\code{census}", "\code{national}"} \item{region}{one of "\code{hhs}", "\code{census}", "\code{national}",
"\code{state}"}
\item{sub_region}{depends on the \code{region_type}.\cr \item{sub_region}{depends on the \code{region_type}.\cr
For "\code{national}", the \code{sub_region} should be \code{NA}.\cr For "\code{national}", the \code{sub_region} should be \code{NA}.\cr
For "\code{hhs}", should be a vector between \code{1:10}.\cr For "\code{hhs}", should be a vector between \code{1:10}.\cr
For "\code{census}", should be a vector between \code{1:9}} For "\code{census}", should be a vector between \code{1:9}.\cr
For "\code{state}", should be a vector of state/territory names
or "\code{all}".}
\item{data_source}{either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" \item{data_source}{either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}"
or "\code{all}" (for both)} or "\code{all}" (for both)}
@ -49,4 +52,3 @@ There is often a noticeable delay when making the API request to the CDC.
flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014) flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014)
} }
} }

Loading…
Cancel
Save