Browse Source

Old API functions restored & deprecated; new tests added; CRAN checks pass

tags/v0.7.0
boB Rudis 7 years ago
parent
commit
26d6713e42
No known key found for this signature in database GPG Key ID: 2A514A4997464560
  1. 1
      .Rbuildignore
  2. 3
      DESCRIPTION
  3. 16
      NAMESPACE
  4. 2
      R/aaa.R
  5. 5
      R/agd-ipt.r
  6. 5
      R/cdcfluview-package.R
  7. 196
      R/get-flu-data.r
  8. 111
      R/get-hosp-data.r
  9. 96
      R/get-mortality-surveillance-data.r
  10. 58
      R/get-state-data.r
  11. 60
      R/get-weekly-flu-report.r
  12. 5
      R/hospital.r
  13. 9
      R/ilinet.r
  14. 6
      R/who-nrvess.r
  15. 2
      R/zzz.r
  16. 4
      man/age_group_distribution.Rd
  17. 54
      man/get_flu_data.Rd
  18. 43
      man/get_hosp_data.Rd
  19. 41
      man/get_mortality_surveillance_data.Rd
  20. 40
      man/get_state_data.Rd
  21. 31
      man/get_weekly_flu_report.Rd
  22. 2
      man/hospitalizations.Rd
  23. 7
      man/ilinet.Rd
  24. 7
      man/who_nrevss.Rd
  25. 86
      tests/testthat/test-cdcfluview.R

1
.Rbuildignore

@ -22,3 +22,4 @@
^README-.*
^cran-comments\.md$
^codecov\.yml$
^Makefile$

3
DESCRIPTION

@ -34,6 +34,9 @@ Imports:
stats,
utils,
sf,
xml2,
purrr,
readr,
MMWRweek,
units (>= 0.4-6)
RoxygenNote: 6.0.1

16
NAMESPACE

@ -3,6 +3,11 @@
export(age_group_distribution)
export(cdc_basemap)
export(geographic_spread)
export(get_flu_data)
export(get_hosp_data)
export(get_mortality_surveillance_data)
export(get_state_data)
export(get_weekly_flu_report)
export(hospitalizations)
export(ili_weekly_activity_indicators)
export(ilinet)
@ -16,6 +21,7 @@ export(surveillance_areas)
export(who_nrevss)
import(MMWRweek)
import(httr)
import(xml2)
importFrom(dplyr,"%>%")
importFrom(dplyr,bind_rows)
importFrom(dplyr,data_frame)
@ -23,8 +29,18 @@ importFrom(dplyr,filter)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(jsonlite,fromJSON)
importFrom(purrr,discard)
importFrom(purrr,keep)
importFrom(purrr,map)
importFrom(purrr,map_chr)
importFrom(purrr,map_df)
importFrom(purrr,map_lgl)
importFrom(readr,read_csv)
importFrom(readr,type_convert)
importFrom(sf,st_read)
importFrom(stats,setNames)
importFrom(tools,file_path_sans_ext)
importFrom(utils,URLencode)
importFrom(utils,globalVariables)
importFrom(utils,read.csv)
importFrom(utils,unzip)

2
R/aaa.R

@ -1,3 +1,5 @@
utils::globalVariables(c(".", "mmwrid", "season", "seasonid"))
# CDC U.S. region names to ID map
.region_map <- c(national=3, hhs=1, census=2, state=5)

5
R/agd-ipt.r

@ -14,9 +14,8 @@
#' the function is smart enough to retrieve by season ID vs convert that
#' to a year.
#' @export
#' @examples \dontrun{
#' agd_ipt()
#' }
#' @examples
#' age_group_distribution(years=2015)
age_group_distribution <- function(years = NULL) {
httr::GET(

5
R/cdcfluview-package.R

@ -13,10 +13,13 @@
#' @author Bob Rudis (bob@@rud.is)
#' @import httr
#' @import MMWRweek
#' @import xml2
#' @importFrom purrr map map_df map_chr map_lgl discard keep
#' @importFrom readr read_csv type_convert
#' @importFrom tools file_path_sans_ext
#' @importFrom dplyr left_join bind_rows mutate filter data_frame %>%
#' @importFrom jsonlite fromJSON
#' @importFrom stats setNames
#' @importFrom sf st_read
#' @importFrom utils read.csv unzip
#' @importFrom utils read.csv unzip URLencode globalVariables
NULL

196
R/get-flu-data.r

@ -0,0 +1,196 @@
#' Retrieves state, regional or national influenza statistics from the CDC (deprecated)
#'
#' Uses the data source from the
#' \href{https://gis.cdc.gov/grasp/fluview/fluportaldashboard.html}{CDC FluView}
#' and provides flu reporting data as either a single data frame or a list of
#' data frames (depending on whether either \code{WHO NREVSS} or \code{ILINet}
#' (or both) is chosen.
#'
#' A lookup table between HHS regions and their member states/territories
#' is provided in \code{\link{hhs_regions}}.
#'
#' @param region one of "\code{hhs}", "\code{census}", "\code{national}",
#' "\code{state}"
#' @param sub_region depends on the \code{region_type}.\cr
#' For "\code{national}", the \code{sub_region} should be \code{NA}.\cr
#' For "\code{hhs}", should be a vector between \code{1:10}.\cr
#' For "\code{census}", should be a vector between \code{1:9}.\cr
#' For "\code{state}", should be a vector of state/territory names
#' or "\code{all}".
#' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}"
#' or "\code{all}" (for both)
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC
#' flu season 2014-2015). Default value is the current year and all
#' \code{years} values should be > \code{1997}
#' @return If only a single \code{data_source} is specified, then a single
#' \code{data.frame} is returned, otherwise a named list with each
#' \code{data.frame} is returned.
#' @note There is often a noticeable delay when making the API request to the CDC.
#' This is not due to a large download size, but the time it takes for their
#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
#' if you would like to see what's going on.
#' @export
#' @examples \dontrun{
#' flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014)
#' }
get_flu_data <- function(region="hhs", sub_region=1:10,
data_source="ilinet",
years=as.numeric(format(Sys.Date(), "%Y"))) {
message(
paste0(
c("This function has been deprecated and will be removed in future releases.",
"Use either ilinet() or who_nrevss() instead."),
collapse="\n"
)
)
region <- tolower(region)
data_source <- tolower(data_source)
if (!(region %in% c("hhs", "census", "national", "state")))
stop("Error: region must be one of hhs, census or national")
if (length(region) != 1)
stop("Error: can only select one region")
if (region=="national") sub_region = 0
if ((region=="hhs") && !all(sub_region %in% 1:10))
stop("Error: sub_region values must fall between 1:10 when region is 'hhs'")
if ((region=="census") && !all(sub_region %in% 1:19))
stop("Error: sub_region values must fall between 1:10 when region is 'census'")
if (!all(data_source %in% c("who", "ilinet", "all")))
stop("Error: data_source must be either 'who', 'ilinet', 'all' or c('who', 'ilinet')")
if (any(years < 1997))
stop("Error: years should be > 1997")
# Match names of states to numbers for API
if (region == "state") {
sub_region <- tolower(sub_region)
if (any(sub_region == "all")) {
sub_region_inpt <- 1:57
} else {
state_match <- data.frame(state = tolower(c(sort(c(datasets::state.name,
"District of Columbia")),
"American Samoa",
"Commonwealth of the Northern Mariana Islands",
"Puerto Rico",
"Virgin Islands",
"New York City",
"Los Angeles")),
num = 1:57,
stringsAsFactors = F)
sub_region_inpt <- state_match$num[state_match$state %in% sub_region]
if (length(sub_region_inpt) == 0)
stop("Error: no eligible state/territory names provided")
}
} else sub_region_inpt <- sub_region
# format the input parameters to fit the CDC API
years <- years - 1960
reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3, "state" = 5)[[region]])
# Format data source
if (data_source == "who") {
data_list <- list(list(ID = 0,
Name = "WHO_NREVSS"))
} else if (data_source == "ilinet") {
data_list <- list(list(ID = 1,
Name = "ILINet"))
} else data_list <- list(list(ID = 0,
Name = "WHO_NREVSS"),
list(ID = 1,
Name = "ILINet"))
# Format years
year_list <- lapply(seq_along(years),
function(x) list(ID = years[x],
Name = paste(years[x])))
# Format sub regions
sub_reg_list <- lapply(seq_along(sub_region_inpt),
function(x) list(ID = sub_region_inpt[x],
Name = paste(sub_region_inpt[x])))
params <- list(AppVersion = "Public",
DatasourceDT = data_list,
RegionTypeId = reg,
SeasonsDT = year_list,
SubRegionsDT = sub_reg_list)
out_file <- tempfile(fileext=".zip")
# CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs
tmp <- httr::POST("https://gis.cdc.gov/grasp/flu2/PostPhase02DataDownload",
body = params,
encode = "json",
httr::write_disk(out_file))
httr::stop_for_status(tmp)
if (!(file.exists(out_file)))
stop("Error: cannot process downloaded data")
out_dir <- tempdir()
files <- unzip(out_file, exdir=out_dir, overwrite=TRUE)
pb <- dplyr::progress_estimated(length(files))
lapply(files, function(x) {
pb$tick()$print()
ct <- ifelse(grepl("who", x, ignore.case=TRUE), 1, 1)
suppressMessages(readr::read_csv(x, skip=ct))
}) -> file_list
names(file_list) <- substr(basename(files), 1, nchar(basename(files)) - 4)
# If data are missing, X causes numeric columns to be read as character
lapply(file_list, function(x) {
# Create list of columns that should be numeric - exclude character columns
cols <- which(!colnames(x) %in% c("REGION", "REGION TYPE",
"SEASON_DESCRIPTION"))
suppressWarnings(x[cols] <- purrr::map(x[cols], as.numeric))
return(x)
}) -> file_list
# Depending on the parameters, there could be more than one
# file returned. When there's only one, return a more usable
# structure.
if (length(file_list) == 1) {
file_list <- file_list[[1]]
# when no rows, then it's likely the caller specified the
# current year and the flu season has technically not started yet.
# so help them out and move the year back and get current flu
# season data.
if ((nrow(file_list) == 0) &&
(length(years)==1) &&
(years == (as.numeric(format(Sys.Date(), "%Y"))-1960))) {
message("Adjusting [years] to get current season...")
return(get_flu_data(region=region, sub_region=sub_region,
data_source=data_source, years=years+1960-1))
} else {
return(file_list)
}
} else {
return(file_list)
}
}

111
R/get-hosp-data.r

@ -0,0 +1,111 @@
#' Retrieves influenza hospitalization statistics from the CDC (deprecated)
#'
#' Uses the data source from the
#' \href{https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html}{CDC FluView}
#' and provides influenza hospitalization reporting data as a data frame.
#'
#' @param area one of "\code{flusurvnet}", "\code{eip}", "\code{ihsp}", or two
#' digit state abbreviation for an individual site. Exceptions are
#' New York - Albany ("\code{nya}") and New York - Rochester
#' ("\code{nyr}")
#' @param age_group a vector of age groups to pull data for. Possible values are:
#' "\code{overall}", "\code{0-4y}", "\code{5-17y}, "\code{18-49y},
#' "\code{50-64y}, "\code{65+y}".
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC
#' flu season 2014-2015). Default value is the current year and all
#' \code{years} values should be >= \code{2009}
#' @return A single \code{data.frame}.
#' @note There is often a noticeable delay when making the API request to the CDC.
#' This is not due to a large download size, but the time it takes for their
#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
#' if you would like to see what's going on.
#' @export
#' @examples \dontrun{
#' # All of FluSurv-NET, 50-64 years old, 2010/11-2014/15 flu seasons
#' flu <- get_hosp_data("flusurvnet", "50-64y", years=2010:2014)
#' }
get_hosp_data <- function(area="flusurvnet", age_group="overall",
years=as.numeric(format(Sys.Date(), "%Y")) - 1) {
message(
paste0(
c("This function has been deprecated and will be removed in future releases.",
"Use hospitalizations() instead."),
collapse="\n"
)
)
area <- tolower(area)
age_group <- tolower(age_group)
if (!(area %in% c("flusurvnet", "eip", "ihsp", "ca", "co", "ct", "ga", "md",
"mn", "nm", "nya", "nyr", "or", "tn", "id", "ia", "mi",
"oh", "ok", "ri", "sd", "ut")))
stop("Error: area must be one of flusurvnet, eip, ihsp, or a valid state abbreviation")
if (length(area) != 1)
stop("Error: can only select one area")
if (!all(age_group %in% c("overall", "0-4y", "5-17y", "18-49y",
"50-64y", "65+y")))
stop("Error: invalid age group specified")
if (any(years < 2009))
stop("Error: years should be >= 2009")
# Match names of age groups to numbers for API
age_match <- data.frame(age_group = c("overall", "0-4y", "5-17y",
"18-49y", "50-64y", "65+y"),
code = c(6, 1, 2, 3, 4, 5))
age_group_num <- age_match$code[age_match$age_group %in% age_group]
# format the input parameters to fit the CDC API
years <- years - 1960
area_match <- data.frame(
area = c("flusurvnet", "eip", "ca", "co", "ct",
"ga", "md", "mn", "nm", "nya", "nyr", "or",
"tn", "ihsp", "id", "ia", "mi", "oh", "ok",
"ri", "sd", "ut"),
catch = c(22, 22, 1, 2, 3, 4, 7, 9, 11, 13, 14, 17,
20, 22, 6, 5, 8, 15, 16, 18, 19, 21),
network = c(1, rep(2, 12), rep(3, 9)),
stringsAsFactors=FALSE
)
# Format years
year_list <- lapply(seq_along(years),
function(x) list(ID = years[x]))
# Format age group
age_list <- lapply(seq_along(age_group_num),
function(x) list(ID = age_group_num[x]))
params <- list(AppVersion = "Public",
agegroups = age_list,
catchmentid = area_match$catch[area_match$area == area],
networkid = area_match$network[area_match$area == area],
seasons = year_list)
out_file <- tempfile(fileext=".json")
# CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs
tmp <- httr::POST("https://gis.cdc.gov/GRASP/Flu3/PostPhase03DownloadData",
body = params,
encode = "json",
httr::write_disk(out_file, overwrite = T))
httr::stop_for_status(tmp)
if (!(file.exists(out_file)))
stop("Error: cannot process downloaded data")
file <- jsonlite::fromJSON(out_file)[[1]]
return(file)
}

96
R/get-mortality-surveillance-data.r

@ -0,0 +1,96 @@
#' Mortality Surveillance Data from the National Center for Health Statistics (deprecated)
#'
#' The National Center for Health Statistics (NCHS) collects and disseminates the Nation's
#' official vital statistics. These statistics are based on data provided to NCHS through
#' contracts with the vital registration systems operated in the various jurisdictions
#' legally responsible for the registration of deaths (i.e., death certificates) and other
#' vital events. These data have previously only been released as annual final data files
#' 12 months or more after the end of the data year. Recent NCHS efforts to improve the
#' timeliness of jurisdiction reporting and modernize the national vital statistics
#' infrastructure have created a system capable of supporting near real-time surveillance.
#' Capitalizing on these new capabilities, NCHS and CDC’s Influenza Division have
#' partnered to pilot the use of NCHS mortality surveillance data for Pneumonia and
#' Influenza (P&I) mortality surveillance.
#'
#' NCHS mortality surveillance data are presented by the week the death occurred.
#' Nationally P&I percentages are released two weeks after the week of death to allow for
#' collection of enough data to produce a stable P&I percentage at the national level.
#' Collection of complete data is not expected, and reliable P&I ratios are not expected
#' at the region and state level within this two week period. State and Region level
#' counts will be released only after 20% of the expected number of deaths are reported
#' through the system.
#'
#' @references \url{https://www.cdc.gov/flu/weekly/nchs.htm}
#' @return a list of \code{tbl_df}s
#' @export
#' @examples \dontrun{
#' get_mortality_surveillance_data()
#' }
get_mortality_surveillance_data <- function() {
message(
paste0(
c("This function has been deprecated and will be removed in future releases.",
"Use pi_mortality() instead."),
collapse="\n"
)
)
# scrape (ugh) web page to get data file links for state mortality data
pg <- xml2::read_html("https://www.cdc.gov/flu/weekly/nchs.htm")
PREFIX <- "https://www.cdc.gov"
xml2::xml_find_all(pg, ".//select[@id='State']/option[contains(@value, 'csv') and
contains(@value, 'State_')]") %>%
xml2::xml_attr("value") %>%
sprintf("%s%s", PREFIX, .) -> targets
pb <- dplyr::progress_estimated(length(targets))
purrr::map_df(targets, function(x) {
pb$tick()$print()
suppressMessages(readr::read_csv(URLencode(x), col_types="ciidii"))
}) -> influenza_mortality_by_state
# scrape (ugh) web page to get data file links for regional mortality data
xml2::xml_find_all(pg, ".//select[@id='Regional Data']/
option[contains(@value, 'csv') and
not(contains(@value, 'Week_'))]") %>%
xml2::xml_attr("value") %>%
sprintf("%s%s", PREFIX, .) -> targets
pb <- dplyr::progress_estimated(length(targets))
purrr::map_df(targets, function(x) {
pb$tick()$print()
suppressMessages(read_csv(URLencode(x), col_types="ciidii"))
}) -> influenza_mortality_by_region
# scrape (ugh) web page to get data file links for weekly mortality data
xml2::xml_find_all(pg, ".//select[@id='Regional Data']/
option[contains(@value, 'csv') and
contains(@value, 'Week_')]") %>%
xml2::xml_attr("value") %>%
sprintf("%s%s", PREFIX, .) -> targets
pb <- dplyr::progress_estimated(length(targets))
purrr::map_df(targets, function(x) {
pb$tick()$print()
suppressMessages(read_csv(URLencode(x), col_types="ciidii"))
}) -> influenza_mortality_by_week
# if return it all
list(
by_state = influenza_mortality_by_state,
by_region = influenza_mortality_by_region,
by_week = influenza_mortality_by_week
) -> out
class(out) <- c("cfv_mortality", class(out))
out
}

58
R/get-state-data.r

@ -0,0 +1,58 @@
#' Retrieves state/territory-level influenza statistics from the CDC (deprecated)
#'
#' Uses the data source from the CDC' State-levelFluView
#' \url{https://gis.cdc.gov/grasp/fluview/main.html} and provides state flu
#' reporting data as a single data frame.\cr
#' \cr
#' This function provides similar data to \code{\link{get_weekly_flu_report}} but
#' provides more metadata about the reporting sources and has access to more
#' historical infomation.
#'
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC
#' flu season 2014-2015). Default value is the current year and all
#' \code{years} values should be >= \code{2008}
#' @return A \code{data.frame} of state-level data for the specified seasons
#' (also classed as \code{cdcstatedata})
#' @export
#' @note There is often a noticeable delay when making the API request to the CDC. This
#' is not due to a large download size, but the time it takes for their
#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
#' if you would like to see what's going on.
#' @examples
#' get_state_data(2014)
#' \dontrun{
#' get_state_data(c(2013, 2014))
#' get_state_data(2010:2014)
#' httr::with_verbose(get_state_data(2009:2015))
#' }
get_state_data <- function(years=as.numeric(format(Sys.Date(), "%Y"))) {
message(
paste0(
c("This function has been deprecated and will be removed in future releases.",
"Use ili_weekly_activity_indicators() instead."),
collapse="\n"
)
)
if (any(years < 2008))
stop("Error: years should be >= 2008")
years <- c((years - 1960), 1)
years <- paste0(years, collapse=",")
tmp <- httr::GET(sprintf("https://gis.cdc.gov/grasp/fluView1/Phase1DownloadDataP/%s", years))
stop_for_status(tmp)
# the API doesn't return actual JSON. It returns a JavaScript data structre
# which is why we need the assistance of the super handy V8 pkg.
res <- httr::content(tmp, as="parsed")
res <- jsonlite::fromJSON(res)
out <- suppressMessages(readr::type_convert(res$datadownload))
out
}

60
R/get-weekly-flu-report.r

@ -0,0 +1,60 @@
#' Retrieves (high-level) weekly (XML) influenza surveillance report from the CDC
#'
#' The CDC publishes a \href{https://www.cdc.gov/flu/weekly/usmap.htm}{weekly
#' influenza report} detailing high-level flu activity per-state. They also
#' publish a data file (see \code{References}) of historical report readings.
#' This function reads that XML file and produces a long \code{data_frame}
#' with the historical surveillance readings.\cr
#' \cr
#' This function provides similar data to \code{\link{get_state_data}} but without
#' the reporting source metadata and a limit on the historical flu information.
#'
#' @references \url{https://www.cdc.gov/flu/weekly/flureport.xml}
#' @return \code{tbl_df} (also classed with \code{cdcweeklyreport}) with six
#' columns: \code{year}, \code{week_number}, \code{state}, \code{color},
#' \code{label}, \code{subtitle}
#' @export
#' @examples \dontrun{
#' get_weekly_flu_report()
#' }
get_weekly_flu_report <- function() {
# grab the report
doc <- xml2::read_xml("https://www.cdc.gov/flu/weekly/flureport.xml")
# extract the time periods
periods <- xml2::xml_attrs(xml2::xml_find_all(doc, "timeperiod"))
# for each period extract the state information and
# shove it all into a data frame
pb <- dplyr::progress_estimated(length(periods))
purrr::map_df(periods, function(period) {
pb$tick()$print()
tp <- sprintf("//timeperiod[@number='%s' and @year='%s']",
period["number"], period["year"])
weeks <- xml2::xml_find_first(doc, tp)
kids <- xml2::xml_children(weeks)
abbrev <- xml2::xml_text(xml2::xml_find_all(kids, "abbrev"), TRUE)
color <- xml2::xml_text(xml2::xml_find_all(kids, "color"), TRUE)
label <- xml2::xml_text(xml2::xml_find_all(kids, "label"), TRUE)
dplyr::data_frame(
year = period["year"],
week_number = period["number"],
state = abbrev,
color = color,
label = label,
subtitle = period["subtitle"]
)
}) -> out
class(out) <- c("cdcweeklyreport", class(out))
out
}

5
R/hospital.r

@ -16,8 +16,9 @@
#' @references
#' - [Hospital Portal](https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html)
#' @export
#' @examples \dontrun{
#' hosp_fs <- hospitalizations("flusurv")
#' @examples
#' hosp_fs <- hospitalizations("flusurv", years=2015)
#' \dontrun{
#' hosp_eip <- hospitalizations("eip")
#' hosp_ihsp <- hospitalizations("ihsp")
#' }

9
R/ilinet.r

@ -22,15 +22,16 @@
#' - [WHO/NREVSS](https://www.cdc.gov/surveillance/nrevss/index.html)
#' @export
#' @examples
#' national_ili <- ilinet("national")
#' national_ili <- ilinet("national", years=2017)
#' \dontrun{
#' hhs_ili <- ilinet("hhs")
#' census_ili <- ilinet("census")
#' state_ili <- ilinet("state")
#' \dontrun{
#'
#' library(purrr)
#' map_df(
#' c("national", "hhs", "census", "state"),
#' ~ilinet(.x) %>% readr::type_convert())
#' ~ilinet(.x))
#' }
ilinet <- function(region=c("national", "hhs", "census", "state"), years=NULL) {
@ -117,6 +118,6 @@ ilinet <- function(region=c("national", "hhs", "census", "state"), years=NULL) {
class(xdf) <- c("tbl_df", "tbl", "data.frame")
xdf
suppressMessages(readr::type_convert(xdf))
}

6
R/who-nrvess.r

@ -16,6 +16,12 @@
#' the reason why a list of data frames is returned.\cr\cr
#' **ALSO** The new CDC API seems to be missing some public health lab data fields.
#' @param region one of "`national`", "`hhs`", "`census`", or "`state`"
#' @param years a vector of years to retrieve data for (i.e. `2014` for CDC
#' flu season 2014-2015). CDC has data for this API going back to 1997.
#' Default value (`NULL`) means retrieve **all** years. NOTE: if you
#' happen to specify a 2-digit season value (i.e. `57` == 2017-2018)
#' the function is smart enough to retrieve by season ID vs convert that
#' to a year.
#' @return list of data frames identified by
#' - `combined_prior_to_2015_16`
#' - `public_health_labs`

2
R/zzz.r

@ -5,7 +5,7 @@
if (!interactive()) return()
packageStartupMessage(paste0("cdcfluview is under *active* development. ",
"There are *MASSIVE* breaking changes*. ",
"There are numerous changes & dprecations.\n",
"See https://github.com/hrbrmstr/cdcfluview for info/news."))
} # nocov end

4
man/age_group_distribution.Rd

@ -20,9 +20,7 @@ public health laboratories by influenza virus type and subtype/lineage. Laborato
from multiple seasons and different age groups is provided.
}
\examples{
\dontrun{
agd_ipt()
}
age_group_distribution(years=2015)
}
\references{
- [CDC FluView Portal](https://gis.cdc.gov/grasp/fluview/fluportaldashboard.html)

54
man/get_flu_data.Rd

@ -0,0 +1,54 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get-flu-data.r
\name{get_flu_data}
\alias{get_flu_data}
\title{Retrieves state, regional or national influenza statistics from the CDC (deprecated)}
\usage{
get_flu_data(region = "hhs", sub_region = 1:10, data_source = "ilinet",
years = as.numeric(format(Sys.Date(), "\%Y")))
}
\arguments{
\item{region}{one of "\code{hhs}", "\code{census}", "\code{national}",
"\code{state}"}
\item{sub_region}{depends on the \code{region_type}.\cr
For "\code{national}", the \code{sub_region} should be \code{NA}.\cr
For "\code{hhs}", should be a vector between \code{1:10}.\cr
For "\code{census}", should be a vector between \code{1:9}.\cr
For "\code{state}", should be a vector of state/territory names
or "\code{all}".}
\item{data_source}{either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}"
or "\code{all}" (for both)}
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC
flu season 2014-2015). Default value is the current year and all
\code{years} values should be > \code{1997}}
}
\value{
If only a single \code{data_source} is specified, then a single
\code{data.frame} is returned, otherwise a named list with each
\code{data.frame} is returned.
}
\description{
Uses the data source from the
\href{https://gis.cdc.gov/grasp/fluview/fluportaldashboard.html}{CDC FluView}
and provides flu reporting data as either a single data frame or a list of
data frames (depending on whether either \code{WHO NREVSS} or \code{ILINet}
(or both) is chosen.
}
\details{
A lookup table between HHS regions and their member states/territories
is provided in \code{\link{hhs_regions}}.
}
\note{
There is often a noticeable delay when making the API request to the CDC.
This is not due to a large download size, but the time it takes for their
servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
if you would like to see what's going on.
}
\examples{
\dontrun{
flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014)
}
}

43
man/get_hosp_data.Rd

@ -0,0 +1,43 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get-hosp-data.r
\name{get_hosp_data}
\alias{get_hosp_data}
\title{Retrieves influenza hospitalization statistics from the CDC (deprecated)}
\usage{
get_hosp_data(area = "flusurvnet", age_group = "overall",
years = as.numeric(format(Sys.Date(), "\%Y")) - 1)
}
\arguments{
\item{area}{one of "\code{flusurvnet}", "\code{eip}", "\code{ihsp}", or two
digit state abbreviation for an individual site. Exceptions are
New York - Albany ("\code{nya}") and New York - Rochester
("\code{nyr}")}
\item{age_group}{a vector of age groups to pull data for. Possible values are:
"\code{overall}", "\code{0-4y}", "\code{5-17y}, "\code{18-49y},
"\code{50-64y}, "\code{65+y}".}
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC
flu season 2014-2015). Default value is the current year and all
\code{years} values should be >= \code{2009}}
}
\value{
A single \code{data.frame}.
}
\description{
Uses the data source from the
\href{https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html}{CDC FluView}
and provides influenza hospitalization reporting data as a data frame.
}
\note{
There is often a noticeable delay when making the API request to the CDC.
This is not due to a large download size, but the time it takes for their
servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
if you would like to see what's going on.
}
\examples{
\dontrun{
# All of FluSurv-NET, 50-64 years old, 2010/11-2014/15 flu seasons
flu <- get_hosp_data("flusurvnet", "50-64y", years=2010:2014)
}
}

41
man/get_mortality_surveillance_data.Rd

@ -0,0 +1,41 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get-mortality-surveillance-data.r
\name{get_mortality_surveillance_data}
\alias{get_mortality_surveillance_data}
\title{Mortality Surveillance Data from the National Center for Health Statistics (deprecated)}
\usage{
get_mortality_surveillance_data()
}
\value{
a list of \code{tbl_df}s
}
\description{
The National Center for Health Statistics (NCHS) collects and disseminates the Nation's
official vital statistics. These statistics are based on data provided to NCHS through
contracts with the vital registration systems operated in the various jurisdictions
legally responsible for the registration of deaths (i.e., death certificates) and other
vital events. These data have previously only been released as annual final data files
12 months or more after the end of the data year. Recent NCHS efforts to improve the
timeliness of jurisdiction reporting and modernize the national vital statistics
infrastructure have created a system capable of supporting near real-time surveillance.
Capitalizing on these new capabilities, NCHS and CDC’s Influenza Division have
partnered to pilot the use of NCHS mortality surveillance data for Pneumonia and
Influenza (P&I) mortality surveillance.
}
\details{
NCHS mortality surveillance data are presented by the week the death occurred.
Nationally P&I percentages are released two weeks after the week of death to allow for
collection of enough data to produce a stable P&I percentage at the national level.
Collection of complete data is not expected, and reliable P&I ratios are not expected
at the region and state level within this two week period. State and Region level
counts will be released only after 20% of the expected number of deaths are reported
through the system.
}
\examples{
\dontrun{
get_mortality_surveillance_data()
}
}
\references{
\url{https://www.cdc.gov/flu/weekly/nchs.htm}
}

40
man/get_state_data.Rd

@ -0,0 +1,40 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get-state-data.r
\name{get_state_data}
\alias{get_state_data}
\title{Retrieves state/territory-level influenza statistics from the CDC (deprecated)}
\usage{
get_state_data(years = as.numeric(format(Sys.Date(), "\%Y")))
}
\arguments{
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC
flu season 2014-2015). Default value is the current year and all
\code{years} values should be >= \code{2008}}
}
\value{
A \code{data.frame} of state-level data for the specified seasons
(also classed as \code{cdcstatedata})
}
\description{
Uses the data source from the CDC' State-levelFluView
\url{https://gis.cdc.gov/grasp/fluview/main.html} and provides state flu
reporting data as a single data frame.\cr
\cr
This function provides similar data to \code{\link{get_weekly_flu_report}} but
provides more metadata about the reporting sources and has access to more
historical infomation.
}
\note{
There is often a noticeable delay when making the API request to the CDC. This
is not due to a large download size, but the time it takes for their
servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
if you would like to see what's going on.
}
\examples{
get_state_data(2014)
\dontrun{
get_state_data(c(2013, 2014))
get_state_data(2010:2014)
httr::with_verbose(get_state_data(2009:2015))
}
}

31
man/get_weekly_flu_report.Rd

@ -0,0 +1,31 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get-weekly-flu-report.r
\name{get_weekly_flu_report}
\alias{get_weekly_flu_report}
\title{Retrieves (high-level) weekly (XML) influenza surveillance report from the CDC}
\usage{
get_weekly_flu_report()
}
\value{
\code{tbl_df} (also classed with \code{cdcweeklyreport}) with six
columns: \code{year}, \code{week_number}, \code{state}, \code{color},
\code{label}, \code{subtitle}
}
\description{
The CDC publishes a \href{https://www.cdc.gov/flu/weekly/usmap.htm}{weekly
influenza report} detailing high-level flu activity per-state. They also
publish a data file (see \code{References}) of historical report readings.
This function reads that XML file and produces a long \code{data_frame}
with the historical surveillance readings.\cr
\cr
This function provides similar data to \code{\link{get_state_data}} but without
the reporting source metadata and a limit on the historical flu information.
}
\examples{
\dontrun{
get_weekly_flu_report()
}
}
\references{
\url{https://www.cdc.gov/flu/weekly/flureport.xml}
}

2
man/hospitalizations.Rd

@ -27,8 +27,8 @@ to a year.}
Laboratory-Confirmed Influenza Hospitalizations
}
\examples{
hosp_fs <- hospitalizations("flusurv", years=2015)
\dontrun{
hosp_fs <- hospitalizations("flusurv")
hosp_eip <- hospitalizations("eip")
hosp_ihsp <- hospitalizations("ihsp")
}

7
man/ilinet.Rd

@ -27,15 +27,16 @@ This function retrieves current and historical ILINet surveillance data for
the identified region.
}
\examples{
national_ili <- ilinet("national")
national_ili <- ilinet("national", years=2017)
\dontrun{
hhs_ili <- ilinet("hhs")
census_ili <- ilinet("census")
state_ili <- ilinet("state")
\dontrun{
library(purrr)
map_df(
c("national", "hhs", "census", "state"),
~ilinet(.x) \%>\% readr::type_convert())
~ilinet(.x))
}
}
\references{

7
man/who_nrevss.Rd

@ -8,6 +8,13 @@ who_nrevss(region = c("national", "hhs", "census", "state"), years = NULL)
}
\arguments{
\item{region}{one of "\code{national}", "\code{hhs}", "\code{census}", or "\code{state}"}
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC
flu season 2014-2015). CDC has data for this API going back to 1997.
Default value (\code{NULL}) means retrieve \strong{all} years. NOTE: if you
happen to specify a 2-digit season value (i.e. \code{57} == 2017-2018)
the function is smart enough to retrieve by season ID vs convert that
to a year.}
}
\value{
list of data frames identified by

86
tests/testthat/test-cdcfluview.R

@ -1,37 +1,38 @@
context("basic functionality")
test_that("we can do something", {
context("new API functionality")
test_that("New API works", {
skip_on_cran()
expect_that(age_group_distribution(), is_a("data.frame"))
expect_that(age_group_distribution(years=2017), is_a("data.frame"))
expect_that(geographic_spread(), is_a("data.frame"))
expect_that(geographic_spread(years=2017), is_a("data.frame"))
expect_that(state_data_providers(), is_a("data.frame"))
expect_that(hospitalizations("flusurv"), is_a("data.frame"))
expect_that(hospitalizations("eip"), is_a("data.frame"))
expect_that(hospitalizations("eip", "Colorado"), is_a("data.frame"))
expect_that(hospitalizations("ihsp"), is_a("data.frame"))
expect_that(hospitalizations("ihsp", "Oklahoma"), is_a("data.frame"))
expect_that(hospitalizations("flusurv", years=2017), is_a("data.frame"))
expect_that(hospitalizations("eip", years=2017), is_a("data.frame"))
expect_that(hospitalizations("eip", "Colorado", years=2017), is_a("data.frame"))
expect_that(hospitalizations("ihsp", years=2017), is_a("data.frame"))
expect_that(hospitalizations("ihsp", "Oklahoma", years=2017), is_a("data.frame"))
expect_that(ilinet("national"), is_a("data.frame"))
expect_that(ilinet("hhs"), is_a("data.frame"))
expect_that(ilinet("census"), is_a("data.frame"))
expect_that(ilinet("state"), is_a("data.frame"))
expect_that(ilinet("national", years=2017), is_a("data.frame"))
expect_that(ilinet("hhs", years=2017), is_a("data.frame"))
expect_that(ilinet("census", years=2017), is_a("data.frame"))
expect_that(ilinet("state", years=2017), is_a("data.frame"))
expect_that(ili_weekly_activity_indicators(2017), is_a("data.frame"))
expect_that(pi_mortality("national"), is_a("data.frame"))
expect_that(pi_mortality("state"), is_a("data.frame"))
expect_that(pi_mortality("region"), is_a("data.frame"))
expect_that(pi_mortality("national", years=2017), is_a("data.frame"))
expect_that(pi_mortality("state", years=2017), is_a("data.frame"))
expect_that(pi_mortality("region", years=2017), is_a("data.frame"))
expect_that(surveillance_areas(), is_a("data.frame"))
expect_that(who_nrevss("national"), is_a("list"))
expect_that(who_nrevss("hhs"), is_a("list"))
expect_that(who_nrevss("census"), is_a("list"))
expect_that(who_nrevss("state"), is_a("list"))
expect_that(who_nrevss("national", years=2017), is_a("list"))
expect_that(who_nrevss("hhs", years=2017), is_a("list"))
expect_that(who_nrevss("census", years=2017), is_a("list"))
expect_that(who_nrevss("state", years=2017), is_a("list"))
expect_that(cdc_basemap("national"), is_a("sf"))
expect_that(cdc_basemap("hhs"), is_a("sf"))
@ -40,18 +41,43 @@ test_that("we can do something", {
expect_that(cdc_basemap("spread"), is_a("sf"))
expect_that(cdc_basemap("surv"), is_a("sf"))
expect_equal(mmwr_week(Sys.Date()),
structure(list(mmwr_year = 2017, mmwr_week = 45, mmwr_day = 2),
.Names = c("mmwr_year", "mmwr_week", "mmwr_day"),
row.names = c(NA, -1L),
class = c("tbl_df", "tbl", "data.frame"))
)
m1 <- mmwr_week(as.Date("2017-03-01"))
m2 <- mmwr_weekday(as.Date("2017-03-01"))
expect_equal(m1$mmwr_year[1], 2017)
expect_equal(m1$mmwr_week[1], 9)
expect_equal(m1$mmwr_day[1], 4)
expect_equal(mmwr_weekday(Sys.Date()),
structure(2L, .Label = c("Sunday", "Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday"),
class = "factor"))
expect_that(m2, is_a("factor"))
expect_equal(as.character(m2), "Wednesday")
expect_equal(mmwr_week_to_date(2016,10,3), structure(16868, class = "Date"))
})
context("old API functionality")
test_that("Old API works", {
skip_on_cran()
expect_that(dim(get_flu_data("hhs", years=2015)), equals(c(520L, 15L)))
expect_that(dim(get_state_data(2008)), equals(c(2494L, 8L)))
invisible(get_flu_data())
invisible(get_flu_data(data_source="all"))
invisible(get_weekly_flu_report())
})
test_that("these are potentially time-consuming calls", {
skip_on_cran()
invisible(get_mortality_surveillance_data())
})
Loading…
Cancel
Save