diff --git a/.Rbuildignore b/.Rbuildignore index 1f58f13..9b4c381 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -22,3 +22,4 @@ ^README-.* ^cran-comments\.md$ ^codecov\.yml$ +^Makefile$ diff --git a/DESCRIPTION b/DESCRIPTION index 2010ad2..e6b56f4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,6 +34,9 @@ Imports: stats, utils, sf, + xml2, + purrr, + readr, MMWRweek, units (>= 0.4-6) RoxygenNote: 6.0.1 diff --git a/NAMESPACE b/NAMESPACE index 8e14f99..a001f7b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,11 @@ export(age_group_distribution) export(cdc_basemap) export(geographic_spread) +export(get_flu_data) +export(get_hosp_data) +export(get_mortality_surveillance_data) +export(get_state_data) +export(get_weekly_flu_report) export(hospitalizations) export(ili_weekly_activity_indicators) export(ilinet) @@ -16,6 +21,7 @@ export(surveillance_areas) export(who_nrevss) import(MMWRweek) import(httr) +import(xml2) importFrom(dplyr,"%>%") importFrom(dplyr,bind_rows) importFrom(dplyr,data_frame) @@ -23,8 +29,18 @@ importFrom(dplyr,filter) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(jsonlite,fromJSON) +importFrom(purrr,discard) +importFrom(purrr,keep) +importFrom(purrr,map) +importFrom(purrr,map_chr) +importFrom(purrr,map_df) +importFrom(purrr,map_lgl) +importFrom(readr,read_csv) +importFrom(readr,type_convert) importFrom(sf,st_read) importFrom(stats,setNames) importFrom(tools,file_path_sans_ext) +importFrom(utils,URLencode) +importFrom(utils,globalVariables) importFrom(utils,read.csv) importFrom(utils,unzip) diff --git a/R/aaa.R b/R/aaa.R index bec598c..48010c1 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -1,3 +1,5 @@ +utils::globalVariables(c(".", "mmwrid", "season", "seasonid")) + # CDC U.S. region names to ID map .region_map <- c(national=3, hhs=1, census=2, state=5) diff --git a/R/agd-ipt.r b/R/agd-ipt.r index 5220664..9a5096b 100644 --- a/R/agd-ipt.r +++ b/R/agd-ipt.r @@ -14,9 +14,8 @@ #' the function is smart enough to retrieve by season ID vs convert that #' to a year. #' @export -#' @examples \dontrun{ -#' agd_ipt() -#' } +#' @examples +#' age_group_distribution(years=2015) age_group_distribution <- function(years = NULL) { httr::GET( diff --git a/R/cdcfluview-package.R b/R/cdcfluview-package.R index b62db65..07429e9 100644 --- a/R/cdcfluview-package.R +++ b/R/cdcfluview-package.R @@ -13,10 +13,13 @@ #' @author Bob Rudis (bob@@rud.is) #' @import httr #' @import MMWRweek +#' @import xml2 +#' @importFrom purrr map map_df map_chr map_lgl discard keep +#' @importFrom readr read_csv type_convert #' @importFrom tools file_path_sans_ext #' @importFrom dplyr left_join bind_rows mutate filter data_frame %>% #' @importFrom jsonlite fromJSON #' @importFrom stats setNames #' @importFrom sf st_read -#' @importFrom utils read.csv unzip +#' @importFrom utils read.csv unzip URLencode globalVariables NULL diff --git a/R/get-flu-data.r b/R/get-flu-data.r new file mode 100644 index 0000000..9b91f86 --- /dev/null +++ b/R/get-flu-data.r @@ -0,0 +1,196 @@ +#' Retrieves state, regional or national influenza statistics from the CDC (deprecated) +#' +#' Uses the data source from the +#' \href{https://gis.cdc.gov/grasp/fluview/fluportaldashboard.html}{CDC FluView} +#' and provides flu reporting data as either a single data frame or a list of +#' data frames (depending on whether either \code{WHO NREVSS} or \code{ILINet} +#' (or both) is chosen. +#' +#' A lookup table between HHS regions and their member states/territories +#' is provided in \code{\link{hhs_regions}}. +#' +#' @param region one of "\code{hhs}", "\code{census}", "\code{national}", +#' "\code{state}" +#' @param sub_region depends on the \code{region_type}.\cr +#' For "\code{national}", the \code{sub_region} should be \code{NA}.\cr +#' For "\code{hhs}", should be a vector between \code{1:10}.\cr +#' For "\code{census}", should be a vector between \code{1:9}.\cr +#' For "\code{state}", should be a vector of state/territory names +#' or "\code{all}". +#' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" +#' or "\code{all}" (for both) +#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC +#' flu season 2014-2015). Default value is the current year and all +#' \code{years} values should be > \code{1997} +#' @return If only a single \code{data_source} is specified, then a single +#' \code{data.frame} is returned, otherwise a named list with each +#' \code{data.frame} is returned. +#' @note There is often a noticeable delay when making the API request to the CDC. +#' This is not due to a large download size, but the time it takes for their +#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose} +#' if you would like to see what's going on. +#' @export +#' @examples \dontrun{ +#' flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014) +#' } +get_flu_data <- function(region="hhs", sub_region=1:10, + data_source="ilinet", + years=as.numeric(format(Sys.Date(), "%Y"))) { + + message( + paste0( + c("This function has been deprecated and will be removed in future releases.", + "Use either ilinet() or who_nrevss() instead."), + collapse="\n" + ) + ) + + region <- tolower(region) + data_source <- tolower(data_source) + + if (!(region %in% c("hhs", "census", "national", "state"))) + stop("Error: region must be one of hhs, census or national") + + if (length(region) != 1) + stop("Error: can only select one region") + + if (region=="national") sub_region = 0 + + if ((region=="hhs") && !all(sub_region %in% 1:10)) + stop("Error: sub_region values must fall between 1:10 when region is 'hhs'") + + if ((region=="census") && !all(sub_region %in% 1:19)) + stop("Error: sub_region values must fall between 1:10 when region is 'census'") + + if (!all(data_source %in% c("who", "ilinet", "all"))) + stop("Error: data_source must be either 'who', 'ilinet', 'all' or c('who', 'ilinet')") + + if (any(years < 1997)) + stop("Error: years should be > 1997") + + # Match names of states to numbers for API + if (region == "state") { + sub_region <- tolower(sub_region) + + if (any(sub_region == "all")) { + sub_region_inpt <- 1:57 + } else { + state_match <- data.frame(state = tolower(c(sort(c(datasets::state.name, + "District of Columbia")), + "American Samoa", + "Commonwealth of the Northern Mariana Islands", + "Puerto Rico", + "Virgin Islands", + "New York City", + "Los Angeles")), + num = 1:57, + stringsAsFactors = F) + + sub_region_inpt <- state_match$num[state_match$state %in% sub_region] + + if (length(sub_region_inpt) == 0) + stop("Error: no eligible state/territory names provided") + } + } else sub_region_inpt <- sub_region + + # format the input parameters to fit the CDC API + + years <- years - 1960 + + reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3, "state" = 5)[[region]]) + + # Format data source + if (data_source == "who") { + data_list <- list(list(ID = 0, + Name = "WHO_NREVSS")) + } else if (data_source == "ilinet") { + data_list <- list(list(ID = 1, + Name = "ILINet")) + } else data_list <- list(list(ID = 0, + Name = "WHO_NREVSS"), + list(ID = 1, + Name = "ILINet")) + + # Format years + year_list <- lapply(seq_along(years), + function(x) list(ID = years[x], + Name = paste(years[x]))) + + # Format sub regions + sub_reg_list <- lapply(seq_along(sub_region_inpt), + function(x) list(ID = sub_region_inpt[x], + Name = paste(sub_region_inpt[x]))) + + params <- list(AppVersion = "Public", + DatasourceDT = data_list, + RegionTypeId = reg, + SeasonsDT = year_list, + SubRegionsDT = sub_reg_list) + + out_file <- tempfile(fileext=".zip") + + # CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs + + tmp <- httr::POST("https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload", + body = params, + encode = "json", + httr::write_disk(out_file)) + + httr::stop_for_status(tmp) + + if (!(file.exists(out_file))) + stop("Error: cannot process downloaded data") + + out_dir <- tempdir() + + files <- unzip(out_file, exdir=out_dir, overwrite=TRUE) + + pb <- dplyr::progress_estimated(length(files)) + lapply(files, function(x) { + pb$tick()$print() + ct <- ifelse(grepl("who", x, ignore.case=TRUE), 1, 1) + suppressMessages(readr::read_csv(x, skip=ct)) + }) -> file_list + + names(file_list) <- substr(basename(files), 1, nchar(basename(files)) - 4) + + # If data are missing, X causes numeric columns to be read as character + lapply(file_list, function(x) { + # Create list of columns that should be numeric - exclude character columns + cols <- which(!colnames(x) %in% c("REGION", "REGION TYPE", + "SEASON_DESCRIPTION")) + suppressWarnings(x[cols] <- purrr::map(x[cols], as.numeric)) + return(x) + }) -> file_list + + + # Depending on the parameters, there could be more than one + # file returned. When there's only one, return a more usable + # structure. + + if (length(file_list) == 1) { + + file_list <- file_list[[1]] + + # when no rows, then it's likely the caller specified the + # current year and the flu season has technically not started yet. + # so help them out and move the year back and get current flu + # season data. + + if ((nrow(file_list) == 0) && + (length(years)==1) && + (years == (as.numeric(format(Sys.Date(), "%Y"))-1960))) { + + message("Adjusting [years] to get current season...") + return(get_flu_data(region=region, sub_region=sub_region, + data_source=data_source, years=years+1960-1)) + } else { + return(file_list) + } + + } else { + return(file_list) + } + +} + diff --git a/R/get-hosp-data.r b/R/get-hosp-data.r new file mode 100644 index 0000000..f27af0a --- /dev/null +++ b/R/get-hosp-data.r @@ -0,0 +1,111 @@ +#' Retrieves influenza hospitalization statistics from the CDC (deprecated) +#' +#' Uses the data source from the +#' \href{https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html}{CDC FluView} +#' and provides influenza hospitalization reporting data as a data frame. +#' +#' @param area one of "\code{flusurvnet}", "\code{eip}", "\code{ihsp}", or two +#' digit state abbreviation for an individual site. Exceptions are +#' New York - Albany ("\code{nya}") and New York - Rochester +#' ("\code{nyr}") +#' @param age_group a vector of age groups to pull data for. Possible values are: +#' "\code{overall}", "\code{0-4y}", "\code{5-17y}, "\code{18-49y}, +#' "\code{50-64y}, "\code{65+y}". +#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC +#' flu season 2014-2015). Default value is the current year and all +#' \code{years} values should be >= \code{2009} +#' @return A single \code{data.frame}. +#' @note There is often a noticeable delay when making the API request to the CDC. +#' This is not due to a large download size, but the time it takes for their +#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose} +#' if you would like to see what's going on. +#' @export +#' @examples \dontrun{ +#' # All of FluSurv-NET, 50-64 years old, 2010/11-2014/15 flu seasons +#' flu <- get_hosp_data("flusurvnet", "50-64y", years=2010:2014) +#' } +get_hosp_data <- function(area="flusurvnet", age_group="overall", + years=as.numeric(format(Sys.Date(), "%Y")) - 1) { + + message( + paste0( + c("This function has been deprecated and will be removed in future releases.", + "Use hospitalizations() instead."), + collapse="\n" + ) + ) + + area <- tolower(area) + age_group <- tolower(age_group) + + if (!(area %in% c("flusurvnet", "eip", "ihsp", "ca", "co", "ct", "ga", "md", + "mn", "nm", "nya", "nyr", "or", "tn", "id", "ia", "mi", + "oh", "ok", "ri", "sd", "ut"))) + stop("Error: area must be one of flusurvnet, eip, ihsp, or a valid state abbreviation") + + if (length(area) != 1) + stop("Error: can only select one area") + + if (!all(age_group %in% c("overall", "0-4y", "5-17y", "18-49y", + "50-64y", "65+y"))) + stop("Error: invalid age group specified") + + if (any(years < 2009)) + stop("Error: years should be >= 2009") + + # Match names of age groups to numbers for API + age_match <- data.frame(age_group = c("overall", "0-4y", "5-17y", + "18-49y", "50-64y", "65+y"), + code = c(6, 1, 2, 3, 4, 5)) + + age_group_num <- age_match$code[age_match$age_group %in% age_group] + + + # format the input parameters to fit the CDC API + + years <- years - 1960 + + area_match <- data.frame( + area = c("flusurvnet", "eip", "ca", "co", "ct", + "ga", "md", "mn", "nm", "nya", "nyr", "or", + "tn", "ihsp", "id", "ia", "mi", "oh", "ok", + "ri", "sd", "ut"), + catch = c(22, 22, 1, 2, 3, 4, 7, 9, 11, 13, 14, 17, + 20, 22, 6, 5, 8, 15, 16, 18, 19, 21), + network = c(1, rep(2, 12), rep(3, 9)), + stringsAsFactors=FALSE + ) + + # Format years + year_list <- lapply(seq_along(years), + function(x) list(ID = years[x])) + + # Format age group + age_list <- lapply(seq_along(age_group_num), + function(x) list(ID = age_group_num[x])) + + params <- list(AppVersion = "Public", + agegroups = age_list, + catchmentid = area_match$catch[area_match$area == area], + networkid = area_match$network[area_match$area == area], + seasons = year_list) + + out_file <- tempfile(fileext=".json") + + # CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs + + tmp <- httr::POST("https://gis.cdc.gov/GRASP/Flu3/PostPhase03DownloadData", + body = params, + encode = "json", + httr::write_disk(out_file, overwrite = T)) + + httr::stop_for_status(tmp) + + if (!(file.exists(out_file))) + stop("Error: cannot process downloaded data") + + file <- jsonlite::fromJSON(out_file)[[1]] + + return(file) + +} diff --git a/R/get-mortality-surveillance-data.r b/R/get-mortality-surveillance-data.r new file mode 100644 index 0000000..7c0d84b --- /dev/null +++ b/R/get-mortality-surveillance-data.r @@ -0,0 +1,96 @@ +#' Mortality Surveillance Data from the National Center for Health Statistics (deprecated) +#' +#' The National Center for Health Statistics (NCHS) collects and disseminates the Nation's +#' official vital statistics. These statistics are based on data provided to NCHS through +#' contracts with the vital registration systems operated in the various jurisdictions +#' legally responsible for the registration of deaths (i.e., death certificates) and other +#' vital events. These data have previously only been released as annual final data files +#' 12 months or more after the end of the data year. Recent NCHS efforts to improve the +#' timeliness of jurisdiction reporting and modernize the national vital statistics +#' infrastructure have created a system capable of supporting near real-time surveillance. +#' Capitalizing on these new capabilities, NCHS and CDC’s Influenza Division have +#' partnered to pilot the use of NCHS mortality surveillance data for Pneumonia and +#' Influenza (P&I) mortality surveillance. +#' +#' NCHS mortality surveillance data are presented by the week the death occurred. +#' Nationally P&I percentages are released two weeks after the week of death to allow for +#' collection of enough data to produce a stable P&I percentage at the national level. +#' Collection of complete data is not expected, and reliable P&I ratios are not expected +#' at the region and state level within this two week period. State and Region level +#' counts will be released only after 20% of the expected number of deaths are reported +#' through the system. +#' +#' @references \url{https://www.cdc.gov/flu/weekly/nchs.htm} +#' @return a list of \code{tbl_df}s +#' @export +#' @examples \dontrun{ +#' get_mortality_surveillance_data() +#' } +get_mortality_surveillance_data <- function() { + + message( + paste0( + c("This function has been deprecated and will be removed in future releases.", + "Use pi_mortality() instead."), + collapse="\n" + ) + ) + + # scrape (ugh) web page to get data file links for state mortality data + + pg <- xml2::read_html("https://www.cdc.gov/flu/weekly/nchs.htm") + + PREFIX <- "https://www.cdc.gov" + + xml2::xml_find_all(pg, ".//select[@id='State']/option[contains(@value, 'csv') and + contains(@value, 'State_')]") %>% + xml2::xml_attr("value") %>% + sprintf("%s%s", PREFIX, .) -> targets + + pb <- dplyr::progress_estimated(length(targets)) + purrr::map_df(targets, function(x) { + pb$tick()$print() + suppressMessages(readr::read_csv(URLencode(x), col_types="ciidii")) + }) -> influenza_mortality_by_state + + # scrape (ugh) web page to get data file links for regional mortality data + + xml2::xml_find_all(pg, ".//select[@id='Regional Data']/ + option[contains(@value, 'csv') and + not(contains(@value, 'Week_'))]") %>% + xml2::xml_attr("value") %>% + sprintf("%s%s", PREFIX, .) -> targets + + pb <- dplyr::progress_estimated(length(targets)) + purrr::map_df(targets, function(x) { + pb$tick()$print() + suppressMessages(read_csv(URLencode(x), col_types="ciidii")) + }) -> influenza_mortality_by_region + + # scrape (ugh) web page to get data file links for weekly mortality data + + xml2::xml_find_all(pg, ".//select[@id='Regional Data']/ + option[contains(@value, 'csv') and + contains(@value, 'Week_')]") %>% + xml2::xml_attr("value") %>% + sprintf("%s%s", PREFIX, .) -> targets + + pb <- dplyr::progress_estimated(length(targets)) + purrr::map_df(targets, function(x) { + pb$tick()$print() + suppressMessages(read_csv(URLencode(x), col_types="ciidii")) + }) -> influenza_mortality_by_week + + # if return it all + + list( + by_state = influenza_mortality_by_state, + by_region = influenza_mortality_by_region, + by_week = influenza_mortality_by_week + ) -> out + + class(out) <- c("cfv_mortality", class(out)) + + out + +} \ No newline at end of file diff --git a/R/get-state-data.r b/R/get-state-data.r new file mode 100644 index 0000000..424535b --- /dev/null +++ b/R/get-state-data.r @@ -0,0 +1,58 @@ +#' Retrieves state/territory-level influenza statistics from the CDC (deprecated) +#' +#' Uses the data source from the CDC' State-levelFluView +#' \url{https://gis.cdc.gov/grasp/fluview/main.html} and provides state flu +#' reporting data as a single data frame.\cr +#' \cr +#' This function provides similar data to \code{\link{get_weekly_flu_report}} but +#' provides more metadata about the reporting sources and has access to more +#' historical infomation. +#' +#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC +#' flu season 2014-2015). Default value is the current year and all +#' \code{years} values should be >= \code{2008} +#' @return A \code{data.frame} of state-level data for the specified seasons +#' (also classed as \code{cdcstatedata}) +#' @export +#' @note There is often a noticeable delay when making the API request to the CDC. This +#' is not due to a large download size, but the time it takes for their +#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose} +#' if you would like to see what's going on. +#' @examples +#' get_state_data(2014) +#' \dontrun{ +#' get_state_data(c(2013, 2014)) +#' get_state_data(2010:2014) +#' httr::with_verbose(get_state_data(2009:2015)) +#' } +get_state_data <- function(years=as.numeric(format(Sys.Date(), "%Y"))) { + + message( + paste0( + c("This function has been deprecated and will be removed in future releases.", + "Use ili_weekly_activity_indicators() instead."), + collapse="\n" + ) + ) + + if (any(years < 2008)) + stop("Error: years should be >= 2008") + + years <- c((years - 1960), 1) + years <- paste0(years, collapse=",") + + tmp <- httr::GET(sprintf("https://gis.cdc.gov/grasp/fluView1/Phase1DownloadDataP/%s", years)) + + stop_for_status(tmp) + + # the API doesn't return actual JSON. It returns a JavaScript data structre + # which is why we need the assistance of the super handy V8 pkg. + + res <- httr::content(tmp, as="parsed") + res <- jsonlite::fromJSON(res) + + out <- suppressMessages(readr::type_convert(res$datadownload)) + + out + +} diff --git a/R/get-weekly-flu-report.r b/R/get-weekly-flu-report.r new file mode 100644 index 0000000..006fb03 --- /dev/null +++ b/R/get-weekly-flu-report.r @@ -0,0 +1,60 @@ +#' Retrieves (high-level) weekly (XML) influenza surveillance report from the CDC +#' +#' The CDC publishes a \href{https://www.cdc.gov/flu/weekly/usmap.htm}{weekly +#' influenza report} detailing high-level flu activity per-state. They also +#' publish a data file (see \code{References}) of historical report readings. +#' This function reads that XML file and produces a long \code{data_frame} +#' with the historical surveillance readings.\cr +#' \cr +#' This function provides similar data to \code{\link{get_state_data}} but without +#' the reporting source metadata and a limit on the historical flu information. +#' +#' @references \url{https://www.cdc.gov/flu/weekly/flureport.xml} +#' @return \code{tbl_df} (also classed with \code{cdcweeklyreport}) with six +#' columns: \code{year}, \code{week_number}, \code{state}, \code{color}, +#' \code{label}, \code{subtitle} +#' @export +#' @examples \dontrun{ +#' get_weekly_flu_report() +#' } +get_weekly_flu_report <- function() { + + # grab the report + doc <- xml2::read_xml("https://www.cdc.gov/flu/weekly/flureport.xml") + + # extract the time periods + periods <- xml2::xml_attrs(xml2::xml_find_all(doc, "timeperiod")) + + # for each period extract the state information and + # shove it all into a data frame + pb <- dplyr::progress_estimated(length(periods)) + purrr::map_df(periods, function(period) { + + pb$tick()$print() + + tp <- sprintf("//timeperiod[@number='%s' and @year='%s']", + period["number"], period["year"]) + + weeks <- xml2::xml_find_first(doc, tp) + kids <- xml2::xml_children(weeks) + + abbrev <- xml2::xml_text(xml2::xml_find_all(kids, "abbrev"), TRUE) + color <- xml2::xml_text(xml2::xml_find_all(kids, "color"), TRUE) + label <- xml2::xml_text(xml2::xml_find_all(kids, "label"), TRUE) + + dplyr::data_frame( + year = period["year"], + week_number = period["number"], + state = abbrev, + color = color, + label = label, + subtitle = period["subtitle"] + ) + + }) -> out + + class(out) <- c("cdcweeklyreport", class(out)) + + out + +} \ No newline at end of file diff --git a/R/hospital.r b/R/hospital.r index 5d82e73..a935527 100644 --- a/R/hospital.r +++ b/R/hospital.r @@ -16,8 +16,9 @@ #' @references #' - [Hospital Portal](https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html) #' @export -#' @examples \dontrun{ -#' hosp_fs <- hospitalizations("flusurv") +#' @examples +#' hosp_fs <- hospitalizations("flusurv", years=2015) +#' \dontrun{ #' hosp_eip <- hospitalizations("eip") #' hosp_ihsp <- hospitalizations("ihsp") #' } diff --git a/R/ilinet.r b/R/ilinet.r index 75d76d4..39797b9 100644 --- a/R/ilinet.r +++ b/R/ilinet.r @@ -21,16 +21,17 @@ #' - [ILINet Portal](https://wwwn.cdc.gov/ilinet/) (Login required) #' - [WHO/NREVSS](https://www.cdc.gov/surveillance/nrevss/index.html) #' @export -#' @examples -#' national_ili <- ilinet("national") +#' @examples +#' national_ili <- ilinet("national", years=2017) +#' \dontrun{ #' hhs_ili <- ilinet("hhs") #' census_ili <- ilinet("census") #' state_ili <- ilinet("state") -#' \dontrun{ +#' #' library(purrr) #' map_df( #' c("national", "hhs", "census", "state"), -#' ~ilinet(.x) %>% readr::type_convert()) +#' ~ilinet(.x)) #' } ilinet <- function(region=c("national", "hhs", "census", "state"), years=NULL) { @@ -117,6 +118,6 @@ ilinet <- function(region=c("national", "hhs", "census", "state"), years=NULL) { class(xdf) <- c("tbl_df", "tbl", "data.frame") - xdf + suppressMessages(readr::type_convert(xdf)) } \ No newline at end of file diff --git a/R/who-nrvess.r b/R/who-nrvess.r index e407f86..24a5a6f 100644 --- a/R/who-nrvess.r +++ b/R/who-nrvess.r @@ -16,6 +16,12 @@ #' the reason why a list of data frames is returned.\cr\cr #' **ALSO** The new CDC API seems to be missing some public health lab data fields. #' @param region one of "`national`", "`hhs`", "`census`", or "`state`" +#' @param years a vector of years to retrieve data for (i.e. `2014` for CDC +#' flu season 2014-2015). CDC has data for this API going back to 1997. +#' Default value (`NULL`) means retrieve **all** years. NOTE: if you +#' happen to specify a 2-digit season value (i.e. `57` == 2017-2018) +#' the function is smart enough to retrieve by season ID vs convert that +#' to a year. #' @return list of data frames identified by #' - `combined_prior_to_2015_16` #' - `public_health_labs` diff --git a/R/zzz.r b/R/zzz.r index caac07b..8d2e947 100644 --- a/R/zzz.r +++ b/R/zzz.r @@ -5,7 +5,7 @@ if (!interactive()) return() packageStartupMessage(paste0("cdcfluview is under *active* development. ", - "There are *MASSIVE* breaking changes*. ", + "There are numerous changes & dprecations.\n", "See https://github.com/hrbrmstr/cdcfluview for info/news.")) } # nocov end diff --git a/man/age_group_distribution.Rd b/man/age_group_distribution.Rd index 20d59aa..985b5a5 100644 --- a/man/age_group_distribution.Rd +++ b/man/age_group_distribution.Rd @@ -20,9 +20,7 @@ public health laboratories by influenza virus type and subtype/lineage. Laborato from multiple seasons and different age groups is provided. } \examples{ - \dontrun{ -agd_ipt() -} +age_group_distribution(years=2015) } \references{ - [CDC FluView Portal](https://gis.cdc.gov/grasp/fluview/fluportaldashboard.html) diff --git a/man/get_flu_data.Rd b/man/get_flu_data.Rd new file mode 100644 index 0000000..9c99f83 --- /dev/null +++ b/man/get_flu_data.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-flu-data.r +\name{get_flu_data} +\alias{get_flu_data} +\title{Retrieves state, regional or national influenza statistics from the CDC (deprecated)} +\usage{ +get_flu_data(region = "hhs", sub_region = 1:10, data_source = "ilinet", + years = as.numeric(format(Sys.Date(), "\%Y"))) +} +\arguments{ +\item{region}{one of "\code{hhs}", "\code{census}", "\code{national}", +"\code{state}"} + +\item{sub_region}{depends on the \code{region_type}.\cr +For "\code{national}", the \code{sub_region} should be \code{NA}.\cr +For "\code{hhs}", should be a vector between \code{1:10}.\cr +For "\code{census}", should be a vector between \code{1:9}.\cr +For "\code{state}", should be a vector of state/territory names +or "\code{all}".} + +\item{data_source}{either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" +or "\code{all}" (for both)} + +\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC +flu season 2014-2015). Default value is the current year and all +\code{years} values should be > \code{1997}} +} +\value{ +If only a single \code{data_source} is specified, then a single + \code{data.frame} is returned, otherwise a named list with each + \code{data.frame} is returned. +} +\description{ +Uses the data source from the +\href{https://gis.cdc.gov/grasp/fluview/fluportaldashboard.html}{CDC FluView} +and provides flu reporting data as either a single data frame or a list of +data frames (depending on whether either \code{WHO NREVSS} or \code{ILINet} +(or both) is chosen. +} +\details{ +A lookup table between HHS regions and their member states/territories +is provided in \code{\link{hhs_regions}}. +} +\note{ +There is often a noticeable delay when making the API request to the CDC. + This is not due to a large download size, but the time it takes for their + servers to crunch the data. Wrap the function call in \code{httr::with_verbose} + if you would like to see what's going on. +} +\examples{ +\dontrun{ +flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014) +} +} diff --git a/man/get_hosp_data.Rd b/man/get_hosp_data.Rd new file mode 100644 index 0000000..c841c1e --- /dev/null +++ b/man/get_hosp_data.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-hosp-data.r +\name{get_hosp_data} +\alias{get_hosp_data} +\title{Retrieves influenza hospitalization statistics from the CDC (deprecated)} +\usage{ +get_hosp_data(area = "flusurvnet", age_group = "overall", + years = as.numeric(format(Sys.Date(), "\%Y")) - 1) +} +\arguments{ +\item{area}{one of "\code{flusurvnet}", "\code{eip}", "\code{ihsp}", or two +digit state abbreviation for an individual site. Exceptions are +New York - Albany ("\code{nya}") and New York - Rochester +("\code{nyr}")} + +\item{age_group}{a vector of age groups to pull data for. Possible values are: +"\code{overall}", "\code{0-4y}", "\code{5-17y}, "\code{18-49y}, +"\code{50-64y}, "\code{65+y}".} + +\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC +flu season 2014-2015). Default value is the current year and all +\code{years} values should be >= \code{2009}} +} +\value{ +A single \code{data.frame}. +} +\description{ +Uses the data source from the +\href{https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html}{CDC FluView} +and provides influenza hospitalization reporting data as a data frame. +} +\note{ +There is often a noticeable delay when making the API request to the CDC. + This is not due to a large download size, but the time it takes for their + servers to crunch the data. Wrap the function call in \code{httr::with_verbose} + if you would like to see what's going on. +} +\examples{ +\dontrun{ +# All of FluSurv-NET, 50-64 years old, 2010/11-2014/15 flu seasons +flu <- get_hosp_data("flusurvnet", "50-64y", years=2010:2014) +} +} diff --git a/man/get_mortality_surveillance_data.Rd b/man/get_mortality_surveillance_data.Rd new file mode 100644 index 0000000..7f7aca7 --- /dev/null +++ b/man/get_mortality_surveillance_data.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-mortality-surveillance-data.r +\name{get_mortality_surveillance_data} +\alias{get_mortality_surveillance_data} +\title{Mortality Surveillance Data from the National Center for Health Statistics (deprecated)} +\usage{ +get_mortality_surveillance_data() +} +\value{ +a list of \code{tbl_df}s +} +\description{ +The National Center for Health Statistics (NCHS) collects and disseminates the Nation's +official vital statistics. These statistics are based on data provided to NCHS through +contracts with the vital registration systems operated in the various jurisdictions +legally responsible for the registration of deaths (i.e., death certificates) and other +vital events. These data have previously only been released as annual final data files +12 months or more after the end of the data year. Recent NCHS efforts to improve the +timeliness of jurisdiction reporting and modernize the national vital statistics +infrastructure have created a system capable of supporting near real-time surveillance. +Capitalizing on these new capabilities, NCHS and CDC’s Influenza Division have +partnered to pilot the use of NCHS mortality surveillance data for Pneumonia and +Influenza (P&I) mortality surveillance. +} +\details{ +NCHS mortality surveillance data are presented by the week the death occurred. +Nationally P&I percentages are released two weeks after the week of death to allow for +collection of enough data to produce a stable P&I percentage at the national level. +Collection of complete data is not expected, and reliable P&I ratios are not expected +at the region and state level within this two week period. State and Region level +counts will be released only after 20% of the expected number of deaths are reported +through the system. +} +\examples{ +\dontrun{ +get_mortality_surveillance_data() +} +} +\references{ +\url{https://www.cdc.gov/flu/weekly/nchs.htm} +} diff --git a/man/get_state_data.Rd b/man/get_state_data.Rd new file mode 100644 index 0000000..075f29c --- /dev/null +++ b/man/get_state_data.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-state-data.r +\name{get_state_data} +\alias{get_state_data} +\title{Retrieves state/territory-level influenza statistics from the CDC (deprecated)} +\usage{ +get_state_data(years = as.numeric(format(Sys.Date(), "\%Y"))) +} +\arguments{ +\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC +flu season 2014-2015). Default value is the current year and all +\code{years} values should be >= \code{2008}} +} +\value{ +A \code{data.frame} of state-level data for the specified seasons + (also classed as \code{cdcstatedata}) +} +\description{ +Uses the data source from the CDC' State-levelFluView +\url{https://gis.cdc.gov/grasp/fluview/main.html} and provides state flu +reporting data as a single data frame.\cr +\cr +This function provides similar data to \code{\link{get_weekly_flu_report}} but +provides more metadata about the reporting sources and has access to more +historical infomation. +} +\note{ +There is often a noticeable delay when making the API request to the CDC. This + is not due to a large download size, but the time it takes for their + servers to crunch the data. Wrap the function call in \code{httr::with_verbose} + if you would like to see what's going on. +} +\examples{ +get_state_data(2014) +\dontrun{ +get_state_data(c(2013, 2014)) +get_state_data(2010:2014) +httr::with_verbose(get_state_data(2009:2015)) +} +} diff --git a/man/get_weekly_flu_report.Rd b/man/get_weekly_flu_report.Rd new file mode 100644 index 0000000..5f4963a --- /dev/null +++ b/man/get_weekly_flu_report.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get-weekly-flu-report.r +\name{get_weekly_flu_report} +\alias{get_weekly_flu_report} +\title{Retrieves (high-level) weekly (XML) influenza surveillance report from the CDC} +\usage{ +get_weekly_flu_report() +} +\value{ +\code{tbl_df} (also classed with \code{cdcweeklyreport}) with six + columns: \code{year}, \code{week_number}, \code{state}, \code{color}, + \code{label}, \code{subtitle} +} +\description{ +The CDC publishes a \href{https://www.cdc.gov/flu/weekly/usmap.htm}{weekly +influenza report} detailing high-level flu activity per-state. They also +publish a data file (see \code{References}) of historical report readings. +This function reads that XML file and produces a long \code{data_frame} +with the historical surveillance readings.\cr +\cr +This function provides similar data to \code{\link{get_state_data}} but without +the reporting source metadata and a limit on the historical flu information. +} +\examples{ +\dontrun{ +get_weekly_flu_report() +} +} +\references{ +\url{https://www.cdc.gov/flu/weekly/flureport.xml} +} diff --git a/man/hospitalizations.Rd b/man/hospitalizations.Rd index 659fe43..d624122 100644 --- a/man/hospitalizations.Rd +++ b/man/hospitalizations.Rd @@ -27,8 +27,8 @@ to a year.} Laboratory-Confirmed Influenza Hospitalizations } \examples{ +hosp_fs <- hospitalizations("flusurv", years=2015) \dontrun{ -hosp_fs <- hospitalizations("flusurv") hosp_eip <- hospitalizations("eip") hosp_ihsp <- hospitalizations("ihsp") } diff --git a/man/ilinet.Rd b/man/ilinet.Rd index 1f110a3..469179d 100644 --- a/man/ilinet.Rd +++ b/man/ilinet.Rd @@ -27,15 +27,16 @@ This function retrieves current and historical ILINet surveillance data for the identified region. } \examples{ -national_ili <- ilinet("national") +national_ili <- ilinet("national", years=2017) +\dontrun{ hhs_ili <- ilinet("hhs") census_ili <- ilinet("census") state_ili <- ilinet("state") -\dontrun{ + library(purrr) map_df( c("national", "hhs", "census", "state"), - ~ilinet(.x) \%>\% readr::type_convert()) + ~ilinet(.x)) } } \references{ diff --git a/man/who_nrevss.Rd b/man/who_nrevss.Rd index 9ef038f..a021ecd 100644 --- a/man/who_nrevss.Rd +++ b/man/who_nrevss.Rd @@ -8,6 +8,13 @@ who_nrevss(region = c("national", "hhs", "census", "state"), years = NULL) } \arguments{ \item{region}{one of "\code{national}", "\code{hhs}", "\code{census}", or "\code{state}"} + +\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC +flu season 2014-2015). CDC has data for this API going back to 1997. +Default value (\code{NULL}) means retrieve \strong{all} years. NOTE: if you +happen to specify a 2-digit season value (i.e. \code{57} == 2017-2018) +the function is smart enough to retrieve by season ID vs convert that +to a year.} } \value{ list of data frames identified by diff --git a/tests/testthat/test-cdcfluview.R b/tests/testthat/test-cdcfluview.R index 4f81f76..cb9d431 100644 --- a/tests/testthat/test-cdcfluview.R +++ b/tests/testthat/test-cdcfluview.R @@ -1,37 +1,38 @@ -context("basic functionality") -test_that("we can do something", { +context("new API functionality") + +test_that("New API works", { skip_on_cran() - expect_that(age_group_distribution(), is_a("data.frame")) + expect_that(age_group_distribution(years=2017), is_a("data.frame")) - expect_that(geographic_spread(), is_a("data.frame")) + expect_that(geographic_spread(years=2017), is_a("data.frame")) expect_that(state_data_providers(), is_a("data.frame")) - expect_that(hospitalizations("flusurv"), is_a("data.frame")) - expect_that(hospitalizations("eip"), is_a("data.frame")) - expect_that(hospitalizations("eip", "Colorado"), is_a("data.frame")) - expect_that(hospitalizations("ihsp"), is_a("data.frame")) - expect_that(hospitalizations("ihsp", "Oklahoma"), is_a("data.frame")) + expect_that(hospitalizations("flusurv", years=2017), is_a("data.frame")) + expect_that(hospitalizations("eip", years=2017), is_a("data.frame")) + expect_that(hospitalizations("eip", "Colorado", years=2017), is_a("data.frame")) + expect_that(hospitalizations("ihsp", years=2017), is_a("data.frame")) + expect_that(hospitalizations("ihsp", "Oklahoma", years=2017), is_a("data.frame")) - expect_that(ilinet("national"), is_a("data.frame")) - expect_that(ilinet("hhs"), is_a("data.frame")) - expect_that(ilinet("census"), is_a("data.frame")) - expect_that(ilinet("state"), is_a("data.frame")) + expect_that(ilinet("national", years=2017), is_a("data.frame")) + expect_that(ilinet("hhs", years=2017), is_a("data.frame")) + expect_that(ilinet("census", years=2017), is_a("data.frame")) + expect_that(ilinet("state", years=2017), is_a("data.frame")) expect_that(ili_weekly_activity_indicators(2017), is_a("data.frame")) - expect_that(pi_mortality("national"), is_a("data.frame")) - expect_that(pi_mortality("state"), is_a("data.frame")) - expect_that(pi_mortality("region"), is_a("data.frame")) + expect_that(pi_mortality("national", years=2017), is_a("data.frame")) + expect_that(pi_mortality("state", years=2017), is_a("data.frame")) + expect_that(pi_mortality("region", years=2017), is_a("data.frame")) expect_that(surveillance_areas(), is_a("data.frame")) - expect_that(who_nrevss("national"), is_a("list")) - expect_that(who_nrevss("hhs"), is_a("list")) - expect_that(who_nrevss("census"), is_a("list")) - expect_that(who_nrevss("state"), is_a("list")) + expect_that(who_nrevss("national", years=2017), is_a("list")) + expect_that(who_nrevss("hhs", years=2017), is_a("list")) + expect_that(who_nrevss("census", years=2017), is_a("list")) + expect_that(who_nrevss("state", years=2017), is_a("list")) expect_that(cdc_basemap("national"), is_a("sf")) expect_that(cdc_basemap("hhs"), is_a("sf")) @@ -40,18 +41,43 @@ test_that("we can do something", { expect_that(cdc_basemap("spread"), is_a("sf")) expect_that(cdc_basemap("surv"), is_a("sf")) - expect_equal(mmwr_week(Sys.Date()), - structure(list(mmwr_year = 2017, mmwr_week = 45, mmwr_day = 2), - .Names = c("mmwr_year", "mmwr_week", "mmwr_day"), - row.names = c(NA, -1L), - class = c("tbl_df", "tbl", "data.frame")) - ) + m1 <- mmwr_week(as.Date("2017-03-01")) + m2 <- mmwr_weekday(as.Date("2017-03-01")) + + expect_equal(m1$mmwr_year[1], 2017) + expect_equal(m1$mmwr_week[1], 9) + expect_equal(m1$mmwr_day[1], 4) - expect_equal(mmwr_weekday(Sys.Date()), - structure(2L, .Label = c("Sunday", "Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday"), - class = "factor")) + expect_that(m2, is_a("factor")) + expect_equal(as.character(m2), "Wednesday") expect_equal(mmwr_week_to_date(2016,10,3), structure(16868, class = "Date")) }) + + +context("old API functionality") + +test_that("Old API works", { + + skip_on_cran() + + expect_that(dim(get_flu_data("hhs", years=2015)), equals(c(520L, 15L))) + + expect_that(dim(get_state_data(2008)), equals(c(2494L, 8L))) + + invisible(get_flu_data()) + + invisible(get_flu_data(data_source="all")) + + invisible(get_weekly_flu_report()) + +}) + +test_that("these are potentially time-consuming calls", { + + skip_on_cran() + + invisible(get_mortality_surveillance_data()) + +}) \ No newline at end of file