boB Rudis
7 years ago
12 changed files with 18 additions and 718 deletions
@ -1,188 +0,0 @@ |
|||||
#' Retrieves state, regional or national influenza statistics from the CDC |
|
||||
#' |
|
||||
#' Uses the data source from the |
|
||||
#' \href{https://gis.cdc.gov/grasp/fluview/fluportaldashboard.html}{CDC FluView} |
|
||||
#' and provides flu reporting data as either a single data frame or a list of |
|
||||
#' data frames (depending on whether either \code{WHO NREVSS} or \code{ILINet} |
|
||||
#' (or both) is chosen. |
|
||||
#' |
|
||||
#' A lookup table between HHS regions and their member states/territories |
|
||||
#' is provided in \code{\link{hhs_regions}}. |
|
||||
#' |
|
||||
#' @param region one of "\code{hhs}", "\code{census}", "\code{national}", |
|
||||
#' "\code{state}" |
|
||||
#' @param sub_region depends on the \code{region_type}.\cr |
|
||||
#' For "\code{national}", the \code{sub_region} should be \code{NA}.\cr |
|
||||
#' For "\code{hhs}", should be a vector between \code{1:10}.\cr |
|
||||
#' For "\code{census}", should be a vector between \code{1:9}.\cr |
|
||||
#' For "\code{state}", should be a vector of state/territory names |
|
||||
#' or "\code{all}". |
|
||||
#' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" |
|
||||
#' or "\code{all}" (for both) |
|
||||
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC |
|
||||
#' flu season 2014-2015). Default value is the current year and all |
|
||||
#' \code{years} values should be > \code{1997} |
|
||||
#' @return If only a single \code{data_source} is specified, then a single |
|
||||
#' \code{data.frame} is returned, otherwise a named list with each |
|
||||
#' \code{data.frame} is returned. |
|
||||
#' @note There is often a noticeable delay when making the API request to the CDC. |
|
||||
#' This is not due to a large download size, but the time it takes for their |
|
||||
#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose} |
|
||||
#' if you would like to see what's going on. |
|
||||
#' @export |
|
||||
#' @examples \dontrun{ |
|
||||
#' flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014) |
|
||||
#' } |
|
||||
get_flu_data <- function(region="hhs", sub_region=1:10, |
|
||||
data_source="ilinet", |
|
||||
years=as.numeric(format(Sys.Date(), "%Y"))) { |
|
||||
|
|
||||
region <- tolower(region) |
|
||||
data_source <- tolower(data_source) |
|
||||
|
|
||||
if (!(region %in% c("hhs", "census", "national", "state"))) |
|
||||
stop("Error: region must be one of hhs, census or national") |
|
||||
|
|
||||
if (length(region) != 1) |
|
||||
stop("Error: can only select one region") |
|
||||
|
|
||||
if (region=="national") sub_region = 0 |
|
||||
|
|
||||
if ((region=="hhs") && !all(sub_region %in% 1:10)) |
|
||||
stop("Error: sub_region values must fall between 1:10 when region is 'hhs'") |
|
||||
|
|
||||
if ((region=="census") && !all(sub_region %in% 1:19)) |
|
||||
stop("Error: sub_region values must fall between 1:10 when region is 'census'") |
|
||||
|
|
||||
if (!all(data_source %in% c("who", "ilinet", "all"))) |
|
||||
stop("Error: data_source must be either 'who', 'ilinet', 'all' or c('who', 'ilinet')") |
|
||||
|
|
||||
if (any(years < 1997)) |
|
||||
stop("Error: years should be > 1997") |
|
||||
|
|
||||
# Match names of states to numbers for API |
|
||||
if (region == "state") { |
|
||||
sub_region <- tolower(sub_region) |
|
||||
|
|
||||
if (any(sub_region == "all")) { |
|
||||
sub_region_inpt <- 1:57 |
|
||||
} else { |
|
||||
state_match <- data.frame(state = tolower(c(sort(c(datasets::state.name, |
|
||||
"District of Columbia")), |
|
||||
"American Samoa", |
|
||||
"Commonwealth of the Northern Mariana Islands", |
|
||||
"Puerto Rico", |
|
||||
"Virgin Islands", |
|
||||
"New York City", |
|
||||
"Los Angeles")), |
|
||||
num = 1:57, |
|
||||
stringsAsFactors = F) |
|
||||
|
|
||||
sub_region_inpt <- state_match$num[state_match$state %in% sub_region] |
|
||||
|
|
||||
if (length(sub_region_inpt) == 0) |
|
||||
stop("Error: no eligible state/territory names provided") |
|
||||
} |
|
||||
} else sub_region_inpt <- sub_region |
|
||||
|
|
||||
# format the input parameters to fit the CDC API |
|
||||
|
|
||||
years <- years - 1960 |
|
||||
|
|
||||
reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3, "state" = 5)[[region]]) |
|
||||
|
|
||||
# Format data source |
|
||||
if (data_source == "who") { |
|
||||
data_list <- list(list(ID = 0, |
|
||||
Name = "WHO_NREVSS")) |
|
||||
} else if (data_source == "ilinet") { |
|
||||
data_list <- list(list(ID = 1, |
|
||||
Name = "ILINet")) |
|
||||
} else data_list <- list(list(ID = 0, |
|
||||
Name = "WHO_NREVSS"), |
|
||||
list(ID = 1, |
|
||||
Name = "ILINet")) |
|
||||
|
|
||||
# Format years |
|
||||
year_list <- lapply(seq_along(years), |
|
||||
function(x) list(ID = years[x], |
|
||||
Name = paste(years[x]))) |
|
||||
|
|
||||
# Format sub regions |
|
||||
sub_reg_list <- lapply(seq_along(sub_region_inpt), |
|
||||
function(x) list(ID = sub_region_inpt[x], |
|
||||
Name = paste(sub_region_inpt[x]))) |
|
||||
|
|
||||
params <- list(AppVersion = "Public", |
|
||||
DatasourceDT = data_list, |
|
||||
RegionTypeId = reg, |
|
||||
SeasonsDT = year_list, |
|
||||
SubRegionsDT = sub_reg_list) |
|
||||
|
|
||||
out_file <- tempfile(fileext=".zip") |
|
||||
|
|
||||
# CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs |
|
||||
|
|
||||
tmp <- httr::POST("https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload", |
|
||||
body = params, |
|
||||
encode = "json", |
|
||||
httr::write_disk(out_file)) |
|
||||
|
|
||||
httr::stop_for_status(tmp) |
|
||||
|
|
||||
if (!(file.exists(out_file))) |
|
||||
stop("Error: cannot process downloaded data") |
|
||||
|
|
||||
out_dir <- tempdir() |
|
||||
|
|
||||
files <- unzip(out_file, exdir=out_dir, overwrite=TRUE) |
|
||||
|
|
||||
pb <- dplyr::progress_estimated(length(files)) |
|
||||
purrr::map(files, function(x) { |
|
||||
pb$tick()$print() |
|
||||
ct <- ifelse(grepl("who", x, ignore.case=TRUE), 1, 1) |
|
||||
suppressMessages(readr::read_csv(x, skip=ct)) |
|
||||
}) -> file_list |
|
||||
|
|
||||
names(file_list) <- substr(basename(files), 1, nchar(basename(files)) - 4) |
|
||||
|
|
||||
# If data are missing, X causes numeric columns to be read as character |
|
||||
purrr::map(file_list, function(x) { |
|
||||
# Create list of columns that should be numeric - exclude character columns |
|
||||
cols <- which(!colnames(x) %in% c("REGION", "REGION TYPE", |
|
||||
"SEASON_DESCRIPTION")) |
|
||||
suppressWarnings(x[cols] <- purrr::map(x[cols], as.numeric)) |
|
||||
return(x) |
|
||||
}) -> file_list |
|
||||
|
|
||||
|
|
||||
# Depending on the parameters, there could be more than one |
|
||||
# file returned. When there's only one, return a more usable |
|
||||
# structure. |
|
||||
|
|
||||
if (length(file_list) == 1) { |
|
||||
|
|
||||
file_list <- file_list[[1]] |
|
||||
|
|
||||
# when no rows, then it's likely the caller specified the |
|
||||
# current year and the flu season has technically not started yet. |
|
||||
# so help them out and move the year back and get current flu |
|
||||
# season data. |
|
||||
|
|
||||
if ((nrow(file_list) == 0) && |
|
||||
(length(years)==1) && |
|
||||
(years == (as.numeric(format(Sys.Date(), "%Y"))-1960))) { |
|
||||
|
|
||||
message("Adjusting [years] to get current season...") |
|
||||
return(get_flu_data(region=region, sub_region=sub_region, |
|
||||
data_source=data_source, years=years+1960-1)) |
|
||||
} else { |
|
||||
return(file_list) |
|
||||
} |
|
||||
|
|
||||
} else { |
|
||||
return(file_list) |
|
||||
} |
|
||||
|
|
||||
} |
|
||||
|
|
@ -1,119 +0,0 @@ |
|||||
#' Retrieves influenza hospitalization statistics from the CDC |
|
||||
#' |
|
||||
#' Uses the data source from the |
|
||||
#' \href{https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html}{CDC FluView} |
|
||||
#' and provides influenza hospitalization reporting data as a data frame. |
|
||||
#' |
|
||||
#' @param area one of "\code{flusurvnet}", "\code{eip}", "\code{ihsp}", or two |
|
||||
#' digit state abbreviation for an individual site. Exceptions are |
|
||||
#' New York - Albany ("\code{nya}") and New York - Rochester |
|
||||
#' ("\code{nyr}") |
|
||||
#' @param age_group a vector of age groups to pull data for. Possible values are: |
|
||||
#' "\code{overall}", "\code{0-4y}", "\code{5-17y}, "\code{18-49y}, |
|
||||
#' "\code{50-64y}, "\code{65+y}". |
|
||||
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC |
|
||||
#' flu season 2014-2015). Default value is the current year and all |
|
||||
#' \code{years} values should be >= \code{2009} |
|
||||
#' @return A single \code{data.frame}. |
|
||||
#' @note There is often a noticeable delay when making the API request to the CDC. |
|
||||
#' This is not due to a large download size, but the time it takes for their |
|
||||
#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose} |
|
||||
#' if you would like to see what's going on. |
|
||||
#' @export |
|
||||
#' @examples \dontrun{ |
|
||||
#' # All of FluSurv-NET, 50-64 years old, 2010/11-2014/15 flu seasons |
|
||||
#' flu <- get_hosp_data("flusurvnet", "50-64y", years=2010:2014) |
|
||||
#' } |
|
||||
get_hosp_data <- function(area="flusurvnet", age_group="overall", |
|
||||
years=as.numeric(format(Sys.Date(), "%Y")) - 1) { |
|
||||
|
|
||||
area <- tolower(area) |
|
||||
age_group <- tolower(age_group) |
|
||||
|
|
||||
if (!(area %in% c("flusurvnet", "eip", "ihsp", "ca", "co", "ct", "ga", "md", |
|
||||
"mn", "nm", "nya", "nyr", "or", "tn", "id", "ia", "mi", |
|
||||
"oh", "ok", "ri", "sd", "ut"))) |
|
||||
stop("Error: area must be one of flusurvnet, eip, ihsp, or a valid state abbreviation") |
|
||||
|
|
||||
if (length(area) != 1) |
|
||||
stop("Error: can only select one area") |
|
||||
|
|
||||
if (!all(age_group %in% c("overall", "0-4y", "5-17y", "18-49y", |
|
||||
"50-64y", "65+y"))) |
|
||||
stop("Error: invalid age group specified") |
|
||||
|
|
||||
if (any(years < 2009)) |
|
||||
stop("Error: years should be >= 2009") |
|
||||
|
|
||||
# Match names of age groups to numbers for API |
|
||||
age_match <- data.frame(age_group = c("overall", "0-4y", "5-17y", |
|
||||
"18-49y", "50-64y", "65+y"), |
|
||||
code = c(6, 1, 2, 3, 4, 5)) |
|
||||
|
|
||||
age_group_num <- age_match$code[age_match$age_group %in% age_group] |
|
||||
|
|
||||
|
|
||||
# format the input parameters to fit the CDC API |
|
||||
|
|
||||
years <- years - 1960 |
|
||||
|
|
||||
area_match <- data.frame(area = c("flusurvnet", "eip", "ca", "co", "ct", |
|
||||
"ga", "md", "mn", "nm", "nya", "nyr", "or", |
|
||||
"tn", "ihsp", "id", "ia", "mi", "oh", "ok", |
|
||||
"ri", "sd", "ut"), |
|
||||
catch = c(22, 22, 1, 2, 3, 4, 7, 9, 11, 13, 14, 17, |
|
||||
20, 22, 6, 5, 8, 15, 16, 18, 19, 21), |
|
||||
network = c(1, rep(2, 12), rep(3, 9))) |
|
||||
|
|
||||
# Format years |
|
||||
year_list <- lapply(seq_along(years), |
|
||||
function(x) list(ID = years[x])) |
|
||||
|
|
||||
# Format age group |
|
||||
age_list <- lapply(seq_along(age_group_num), |
|
||||
function(x) list(ID = age_group_num[x])) |
|
||||
|
|
||||
params <- list(AppVersion = "Public", |
|
||||
agegroups = age_list, |
|
||||
catchmentid = area_match$catch[area_match$area == area], |
|
||||
networkid = area_match$network[area_match$area == area], |
|
||||
seasons = year_list) |
|
||||
|
|
||||
out_file <- tempfile(fileext=".json") |
|
||||
|
|
||||
# CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs |
|
||||
|
|
||||
tmp <- httr::POST("https://gis.cdc.gov/GRASP/Flu3/PostPhase03DownloadData", |
|
||||
body = params, |
|
||||
encode = "json", |
|
||||
httr::write_disk(out_file, overwrite = T)) |
|
||||
|
|
||||
httr::stop_for_status(tmp) |
|
||||
|
|
||||
if (!(file.exists(out_file))) |
|
||||
stop("Error: cannot process downloaded data") |
|
||||
|
|
||||
file <- jsonlite::fromJSON(out_file)[[1]] |
|
||||
|
|
||||
# pb <- dplyr::progress_estimated(length(file)) |
|
||||
# purrr::map(file, function(x) { |
|
||||
# pb$tick()$print() |
|
||||
# ct <- ifelse(grepl("who", x, ignore.case=TRUE), 1, 1) |
|
||||
# suppressMessages(readr::read_csv(x, skip=ct)) |
|
||||
# }) -> file_list |
|
||||
|
|
||||
# names(file_list) <- substr(basename(files), 1, nchar(basename(files)) - 4) |
|
||||
|
|
||||
# Depending on the parameters, there could be more than one |
|
||||
# file returned. When there's only one, return a more usable |
|
||||
# structure. |
|
||||
|
|
||||
# when no rows, then it's likely the caller specified the |
|
||||
# current year and the flu season has technically not started yet. |
|
||||
# so help them out and move the year back and get current flu |
|
||||
# season data. |
|
||||
|
|
||||
|
|
||||
return(file) |
|
||||
|
|
||||
} |
|
@ -1,53 +0,0 @@ |
|||||
#' Retrieves state/territory-level influenza statistics from the CDC |
|
||||
#' |
|
||||
#' Uses the data source from the CDC' State-levelFluView |
|
||||
#' \url{https://gis.cdc.gov/grasp/fluview/main.html} and provides state flu |
|
||||
#' reporting data as a single data frame.\cr |
|
||||
#' \cr |
|
||||
#' This function provides similar data to \code{\link{get_weekly_flu_report}} but |
|
||||
#' provides more metadata about the reporting sources and has access to more |
|
||||
#' historical infomation. |
|
||||
#' |
|
||||
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC |
|
||||
#' flu season 2014-2015). Default value is the current year and all |
|
||||
#' \code{years} values should be >= \code{2008} |
|
||||
#' @return A \code{data.frame} of state-level data for the specified seasons |
|
||||
#' (also classed as \code{cdcstatedata}) |
|
||||
#' @export |
|
||||
#' @note There is often a noticeable delay when making the API request to the CDC. This |
|
||||
#' is not due to a large download size, but the time it takes for their |
|
||||
#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose} |
|
||||
#' if you would like to see what's going on. |
|
||||
#' @examples \dontrun{ |
|
||||
#' get_state_data(2014) |
|
||||
#' get_state_data(c(2013, 2014)) |
|
||||
#' get_state_data(2010:2014) |
|
||||
#' httr::with_verbose(get_state_data(2009:2015)) |
|
||||
#' } |
|
||||
get_state_data <- function(years=as.numeric(format(Sys.Date(), "%Y"))) { |
|
||||
|
|
||||
if (any(years < 2008)) |
|
||||
stop("Error: years should be >= 2008") |
|
||||
|
|
||||
years <- c((years - 1960), 1) |
|
||||
years <- paste0(years, collapse=",") |
|
||||
|
|
||||
tmp <- httr::GET(sprintf("https://gis.cdc.gov/grasp/fluView1/Phase1DownloadDataP/%s", years)) |
|
||||
|
|
||||
stop_for_status(tmp) |
|
||||
|
|
||||
# the API doesn't return actual JSON. It returns a JavaScript data structre |
|
||||
# which is why we need the assistance of the super handy V8 pkg. |
|
||||
|
|
||||
res <- httr::content(tmp, as="parsed") |
|
||||
|
|
||||
ctx <- V8::v8() |
|
||||
ctx$eval(V8::JS(sprintf("var dat=%s;", res))) |
|
||||
res <- ctx$get("dat", flatten=FALSE) |
|
||||
out <- suppressMessages(readr::type_convert(res$datadownload)) |
|
||||
|
|
||||
class(out) <- c("cdcstatedata", class(out)) |
|
||||
|
|
||||
out |
|
||||
|
|
||||
} |
|
@ -1,58 +0,0 @@ |
|||||
#' Retrieves (high-level) weekly influenza surveillance report from the CDC |
|
||||
#' |
|
||||
#' The CDC publishes a \href{https://www.cdc.gov/flu/weekly/usmap.htm}{weekly |
|
||||
#' influenza report} detailing high-level flu activity per-state. They also |
|
||||
#' publish a data file (see \code{References}) of historical report readings. |
|
||||
#' This function reads that XML file and produces a long \code{data_frame} |
|
||||
#' with the historical surveillance readings.\cr |
|
||||
#' \cr |
|
||||
#' This function provides similar data to \code{\link{get_state_data}} but without |
|
||||
#' the reporting source metadata and a limit on the historical flu information. |
|
||||
#' |
|
||||
#' @references \url{https://www.cdc.gov/flu/weekly/flureport.xml} |
|
||||
#' @return \code{tbl_df} (also classed with \code{cdcweeklyreport}) with six |
|
||||
#' columns: \code{year}, \code{week_number}, \code{state}, \code{color}, |
|
||||
#' \code{label}, \code{subtitle} |
|
||||
#' @export |
|
||||
#' @examples \dontrun{ |
|
||||
#' get_weekly_flu_report() |
|
||||
#' } |
|
||||
get_weekly_flu_report <- function() { |
|
||||
|
|
||||
# grab the report |
|
||||
doc <- read_xml("https://www.cdc.gov/flu/weekly/flureport.xml") |
|
||||
|
|
||||
# extract the time periods |
|
||||
periods <- xml_attrs(xml_find_all(doc, "timeperiod")) |
|
||||
|
|
||||
# for each period extract the state information and |
|
||||
# shove it all into a data frame |
|
||||
pb <- progress_estimated(length(periods)) |
|
||||
purrr::map_df(periods, function(period) { |
|
||||
|
|
||||
pb$tick()$print() |
|
||||
|
|
||||
tp <- sprintf("//timeperiod[@number='%s' and @year='%s']", |
|
||||
period["number"], period["year"]) |
|
||||
|
|
||||
weeks <- xml_find_first(doc, tp) |
|
||||
kids <- xml_children(weeks) |
|
||||
|
|
||||
abbrev <- xml_text(xml_find_all(kids, "abbrev"), TRUE) |
|
||||
color <- xml_text(xml_find_all(kids, "color"), TRUE) |
|
||||
label <- xml_text(xml_find_all(kids, "label"), TRUE) |
|
||||
|
|
||||
data_frame(year=period["year"], |
|
||||
week_number=period["number"], |
|
||||
state=abbrev, |
|
||||
color=color, |
|
||||
label=label, |
|
||||
subtitle=period["subtitle"]) |
|
||||
|
|
||||
}) -> out |
|
||||
|
|
||||
class(out) <- c("cdcweeklyreport", class(out)) |
|
||||
|
|
||||
out |
|
||||
|
|
||||
} |
|
@ -1,89 +0,0 @@ |
|||||
#' Mortality Surveillance Data from the National Center for Health Statistics |
|
||||
#' |
|
||||
#' The National Center for Health Statistics (NCHS) collects and disseminates the Nation's |
|
||||
#' official vital statistics. These statistics are based on data provided to NCHS through |
|
||||
#' contracts with the vital registration systems operated in the various jurisdictions |
|
||||
#' legally responsible for the registration of deaths (i.e., death certificates) and other |
|
||||
#' vital events. These data have previously only been released as annual final data files |
|
||||
#' 12 months or more after the end of the data year. Recent NCHS efforts to improve the |
|
||||
#' timeliness of jurisdiction reporting and modernize the national vital statistics |
|
||||
#' infrastructure have created a system capable of supporting near real-time surveillance. |
|
||||
#' Capitalizing on these new capabilities, NCHS and CDC’s Influenza Division have |
|
||||
#' partnered to pilot the use of NCHS mortality surveillance data for Pneumonia and |
|
||||
#' Influenza (P&I) mortality surveillance. |
|
||||
#' |
|
||||
#' NCHS mortality surveillance data are presented by the week the death occurred. |
|
||||
#' Nationally P&I percentages are released two weeks after the week of death to allow for |
|
||||
#' collection of enough data to produce a stable P&I percentage at the national level. |
|
||||
#' Collection of complete data is not expected, and reliable P&I ratios are not expected |
|
||||
#' at the region and state level within this two week period. State and Region level |
|
||||
#' counts will be released only after 20% of the expected number of deaths are reported |
|
||||
#' through the system. |
|
||||
#' |
|
||||
#' @references \url{https://www.cdc.gov/flu/weekly/nchs.htm} |
|
||||
#' @return a list of \code{tbl_df}s |
|
||||
#' @export |
|
||||
#' @examples \dontrun{ |
|
||||
#' get_mortality_surveillance_data() |
|
||||
#' } |
|
||||
get_mortality_surveillance_data <- function() { |
|
||||
|
|
||||
# scrape (ugh) web page to get data file links for state mortality data |
|
||||
|
|
||||
pg <- xml2::read_html("https://www.cdc.gov/flu/weekly/nchs.htm") |
|
||||
|
|
||||
PREFIX <- "https://www.cdc.gov" |
|
||||
|
|
||||
xml2::xml_find_all(pg, ".//select[@id='State']/option[contains(@value, 'csv') and |
|
||||
contains(@value, 'State_')]") %>% |
|
||||
xml2::xml_attr("value") %>% |
|
||||
sprintf("%s%s", PREFIX, .) -> targets |
|
||||
|
|
||||
pb <- dplyr::progress_estimated(length(targets)) |
|
||||
purrr::map_df(targets, function(x) { |
|
||||
pb$tick()$print() |
|
||||
suppressMessages(read_csv(URLencode(x), col_types="ciidii")) |
|
||||
}) -> influenza_mortality_by_state |
|
||||
|
|
||||
# scrape (ugh) web page to get data file links for regional mortality data |
|
||||
|
|
||||
xml2::xml_find_all(pg, ".//select[@id='Regional Data']/ |
|
||||
option[contains(@value, 'csv') and |
|
||||
not(contains(@value, 'Week_'))]") %>% |
|
||||
xml2::xml_attr("value") %>% |
|
||||
sprintf("%s%s", PREFIX, .) -> targets |
|
||||
|
|
||||
pb <- dplyr::progress_estimated(length(targets)) |
|
||||
purrr::map_df(targets, function(x) { |
|
||||
pb$tick()$print() |
|
||||
suppressMessages(read_csv(URLencode(x), col_types="ciidii")) |
|
||||
}) -> influenza_mortality_by_region |
|
||||
|
|
||||
# scrape (ugh) web page to get data file links for weekly mortality data |
|
||||
|
|
||||
xml2::xml_find_all(pg, ".//select[@id='Regional Data']/ |
|
||||
option[contains(@value, 'csv') and |
|
||||
contains(@value, 'Week_')]") %>% |
|
||||
xml2::xml_attr("value") %>% |
|
||||
sprintf("%s%s", PREFIX, .) -> targets |
|
||||
|
|
||||
pb <- dplyr::progress_estimated(length(targets)) |
|
||||
purrr::map_df(targets, function(x) { |
|
||||
pb$tick()$print() |
|
||||
suppressMessages(read_csv(URLencode(x), col_types="ciidii")) |
|
||||
}) -> influenza_mortality_by_week |
|
||||
|
|
||||
# if return it all |
|
||||
|
|
||||
list( |
|
||||
by_state = influenza_mortality_by_state, |
|
||||
by_region = influenza_mortality_by_region, |
|
||||
by_week = influenza_mortality_by_week |
|
||||
) -> out |
|
||||
|
|
||||
class(out) <- c("cfv_mortality", class(out)) |
|
||||
|
|
||||
out |
|
||||
|
|
||||
} |
|
||||
|
|
@ -0,0 +1,18 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/cdcfluview-package.R |
||||
|
\docType{package} |
||||
|
\name{cdcfluview} |
||||
|
\alias{cdcfluview} |
||||
|
\alias{cdcfluview-package} |
||||
|
\title{Retrieve 'U.S'.' Flu Season Data from the 'CDC' 'FluView' Portal} |
||||
|
\description{ |
||||
|
The U.S. Centers for Disease Control (CDC) maintains a portal |
||||
|
\url{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html} for |
||||
|
accessing state, regional and national influenza statistics as well as |
||||
|
Mortality Surveillance Data. The Flash interface makes it difficult and |
||||
|
time-consuming to select and retrieve influenza data. This package |
||||
|
provides functions to access the data provided by the portal's underlying API. |
||||
|
} |
||||
|
\author{ |
||||
|
Bob Rudis (bob@rud.is) |
||||
|
} |
@ -1,54 +0,0 @@ |
|||||
% Generated by roxygen2: do not edit by hand |
|
||||
% Please edit documentation in R/get_flu_data.r |
|
||||
\name{get_flu_data} |
|
||||
\alias{get_flu_data} |
|
||||
\title{Retrieves state, regional or national influenza statistics from the CDC} |
|
||||
\usage{ |
|
||||
get_flu_data(region = "hhs", sub_region = 1:10, data_source = "ilinet", |
|
||||
years = as.numeric(format(Sys.Date(), "\%Y"))) |
|
||||
} |
|
||||
\arguments{ |
|
||||
\item{region}{one of "\code{hhs}", "\code{census}", "\code{national}", |
|
||||
"\code{state}"} |
|
||||
|
|
||||
\item{sub_region}{depends on the \code{region_type}.\cr |
|
||||
For "\code{national}", the \code{sub_region} should be \code{NA}.\cr |
|
||||
For "\code{hhs}", should be a vector between \code{1:10}.\cr |
|
||||
For "\code{census}", should be a vector between \code{1:9}.\cr |
|
||||
For "\code{state}", should be a vector of state/territory names |
|
||||
or "\code{all}".} |
|
||||
|
|
||||
\item{data_source}{either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" |
|
||||
or "\code{all}" (for both)} |
|
||||
|
|
||||
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC |
|
||||
flu season 2014-2015). Default value is the current year and all |
|
||||
\code{years} values should be > \code{1997}} |
|
||||
} |
|
||||
\value{ |
|
||||
If only a single \code{data_source} is specified, then a single |
|
||||
\code{data.frame} is returned, otherwise a named list with each |
|
||||
\code{data.frame} is returned. |
|
||||
} |
|
||||
\description{ |
|
||||
Uses the data source from the |
|
||||
\href{https://gis.cdc.gov/grasp/fluview/fluportaldashboard.html}{CDC FluView} |
|
||||
and provides flu reporting data as either a single data frame or a list of |
|
||||
data frames (depending on whether either \code{WHO NREVSS} or \code{ILINet} |
|
||||
(or both) is chosen. |
|
||||
} |
|
||||
\details{ |
|
||||
A lookup table between HHS regions and their member states/territories |
|
||||
is provided in \code{\link{hhs_regions}}. |
|
||||
} |
|
||||
\note{ |
|
||||
There is often a noticeable delay when making the API request to the CDC. |
|
||||
This is not due to a large download size, but the time it takes for their |
|
||||
servers to crunch the data. Wrap the function call in \code{httr::with_verbose} |
|
||||
if you would like to see what's going on. |
|
||||
} |
|
||||
\examples{ |
|
||||
\dontrun{ |
|
||||
flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014) |
|
||||
} |
|
||||
} |
|
@ -1,44 +0,0 @@ |
|||||
% Generated by roxygen2: do not edit by hand |
|
||||
% Please edit documentation in R/get_hosp_data.R |
|
||||
\name{get_hosp_data} |
|
||||
\alias{get_hosp_data} |
|
||||
\title{Retrieves influenza hospitalization statistics from the CDC} |
|
||||
\usage{ |
|
||||
get_hosp_data(area = "flusurvnet", age_group = "overall", |
|
||||
years = as.numeric(format(Sys.Date(), "\%Y")) - 1) |
|
||||
} |
|
||||
\arguments{ |
|
||||
\item{area}{one of "\code{flusurvnet}", "\code{eip}", "\code{ihsp}", or two |
|
||||
digit state abbreviation for an individual site. Exceptions are |
|
||||
New York - Albany ("\code{nya}") and New York - Rochester |
|
||||
("\code{nyr}")} |
|
||||
|
|
||||
|
|
||||
\item{age_group}{a vector of age groups to pull data for. Possible values are: |
|
||||
"\code{overall}", "\code{0-4y}", "\code{5-17y}, "\code{18-49y}, |
|
||||
"\code{50-64y}, "\code{65+y}".} |
|
||||
|
|
||||
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC |
|
||||
flu season 2014-2015). Default value is the current year and all |
|
||||
\code{years} values should be >= \code{2009}} |
|
||||
} |
|
||||
\value{ |
|
||||
A single \code{data.frame}. |
|
||||
} |
|
||||
\description{ |
|
||||
Uses the data source from the |
|
||||
\href{https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html}{CDC FluView} |
|
||||
and provides influenza hospitalization reporting data as a data frame. |
|
||||
} |
|
||||
\note{ |
|
||||
There is often a noticeable delay when making the API request to the CDC. |
|
||||
This is not due to a large download size, but the time it takes for their |
|
||||
servers to crunch the data. Wrap the function call in \code{httr::with_verbose} |
|
||||
if you would like to see what's going on. |
|
||||
} |
|
||||
\examples{ |
|
||||
\dontrun{ |
|
||||
# All of FluSurv-NET, 50-64 years old, 2010/11-2014/15 flu seasons |
|
||||
flu <- get_hosp_data("flusurvnet", "50-64y", years=2010:2014) |
|
||||
} |
|
||||
} |
|
@ -1,41 +0,0 @@ |
|||||
% Generated by roxygen2: do not edit by hand |
|
||||
% Please edit documentation in R/mortalty.r |
|
||||
\name{get_mortality_surveillance_data} |
|
||||
\alias{get_mortality_surveillance_data} |
|
||||
\title{Mortality Surveillance Data from the National Center for Health Statistics} |
|
||||
\usage{ |
|
||||
get_mortality_surveillance_data() |
|
||||
} |
|
||||
\value{ |
|
||||
a list of \code{tbl_df}s |
|
||||
} |
|
||||
\description{ |
|
||||
The National Center for Health Statistics (NCHS) collects and disseminates the Nation's |
|
||||
official vital statistics. These statistics are based on data provided to NCHS through |
|
||||
contracts with the vital registration systems operated in the various jurisdictions |
|
||||
legally responsible for the registration of deaths (i.e., death certificates) and other |
|
||||
vital events. These data have previously only been released as annual final data files |
|
||||
12 months or more after the end of the data year. Recent NCHS efforts to improve the |
|
||||
timeliness of jurisdiction reporting and modernize the national vital statistics |
|
||||
infrastructure have created a system capable of supporting near real-time surveillance. |
|
||||
Capitalizing on these new capabilities, NCHS and CDC’s Influenza Division have |
|
||||
partnered to pilot the use of NCHS mortality surveillance data for Pneumonia and |
|
||||
Influenza (P&I) mortality surveillance. |
|
||||
} |
|
||||
\details{ |
|
||||
NCHS mortality surveillance data are presented by the week the death occurred. |
|
||||
Nationally P&I percentages are released two weeks after the week of death to allow for |
|
||||
collection of enough data to produce a stable P&I percentage at the national level. |
|
||||
Collection of complete data is not expected, and reliable P&I ratios are not expected |
|
||||
at the region and state level within this two week period. State and Region level |
|
||||
counts will be released only after 20% of the expected number of deaths are reported |
|
||||
through the system. |
|
||||
} |
|
||||
\examples{ |
|
||||
\dontrun{ |
|
||||
get_mortality_surveillance_data() |
|
||||
} |
|
||||
} |
|
||||
\references{ |
|
||||
\url{https://www.cdc.gov/flu/weekly/nchs.htm} |
|
||||
} |
|
@ -1,40 +0,0 @@ |
|||||
% Generated by roxygen2: do not edit by hand |
|
||||
% Please edit documentation in R/get_state_data.r |
|
||||
\name{get_state_data} |
|
||||
\alias{get_state_data} |
|
||||
\title{Retrieves state/territory-level influenza statistics from the CDC} |
|
||||
\usage{ |
|
||||
get_state_data(years = as.numeric(format(Sys.Date(), "\%Y"))) |
|
||||
} |
|
||||
\arguments{ |
|
||||
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC |
|
||||
flu season 2014-2015). Default value is the current year and all |
|
||||
\code{years} values should be >= \code{2008}} |
|
||||
} |
|
||||
\value{ |
|
||||
A \code{data.frame} of state-level data for the specified seasons |
|
||||
(also classed as \code{cdcstatedata}) |
|
||||
} |
|
||||
\description{ |
|
||||
Uses the data source from the CDC' State-levelFluView |
|
||||
\url{https://gis.cdc.gov/grasp/fluview/main.html} and provides state flu |
|
||||
reporting data as a single data frame.\cr |
|
||||
\cr |
|
||||
This function provides similar data to \code{\link{get_weekly_flu_report}} but |
|
||||
provides more metadata about the reporting sources and has access to more |
|
||||
historical infomation. |
|
||||
} |
|
||||
\note{ |
|
||||
There is often a noticeable delay when making the API request to the CDC. This |
|
||||
is not due to a large download size, but the time it takes for their |
|
||||
servers to crunch the data. Wrap the function call in \code{httr::with_verbose} |
|
||||
if you would like to see what's going on. |
|
||||
} |
|
||||
\examples{ |
|
||||
\dontrun{ |
|
||||
get_state_data(2014) |
|
||||
get_state_data(c(2013, 2014)) |
|
||||
get_state_data(2010:2014) |
|
||||
httr::with_verbose(get_state_data(2009:2015)) |
|
||||
} |
|
||||
} |
|
@ -1,31 +0,0 @@ |
|||||
% Generated by roxygen2: do not edit by hand |
|
||||
% Please edit documentation in R/get_weekly_flu_report.r |
|
||||
\name{get_weekly_flu_report} |
|
||||
\alias{get_weekly_flu_report} |
|
||||
\title{Retrieves (high-level) weekly influenza surveillance report from the CDC} |
|
||||
\usage{ |
|
||||
get_weekly_flu_report() |
|
||||
} |
|
||||
\value{ |
|
||||
\code{tbl_df} (also classed with \code{cdcweeklyreport}) with six |
|
||||
columns: \code{year}, \code{week_number}, \code{state}, \code{color}, |
|
||||
\code{label}, \code{subtitle} |
|
||||
} |
|
||||
\description{ |
|
||||
The CDC publishes a \href{https://www.cdc.gov/flu/weekly/usmap.htm}{weekly |
|
||||
influenza report} detailing high-level flu activity per-state. They also |
|
||||
publish a data file (see \code{References}) of historical report readings. |
|
||||
This function reads that XML file and produces a long \code{data_frame} |
|
||||
with the historical surveillance readings.\cr |
|
||||
\cr |
|
||||
This function provides similar data to \code{\link{get_state_data}} but without |
|
||||
the reporting source metadata and a limit on the historical flu information. |
|
||||
} |
|
||||
\examples{ |
|
||||
\dontrun{ |
|
||||
get_weekly_flu_report() |
|
||||
} |
|
||||
} |
|
||||
\references{ |
|
||||
\url{https://www.cdc.gov/flu/weekly/flureport.xml} |
|
||||
} |
|
Loading…
Reference in new issue