Browse Source

Add function to pull hospital data using new HTML5 interface and update documentation and tests. Resolves #8

pull/10/head
Craig McGowan 3 years ago
parent
commit
bbeeb8f516
10 changed files with 170 additions and 7 deletions
  1. +1
    -1
      DESCRIPTION
  2. +1
    -0
      NAMESPACE
  3. +126
    -0
      R/get_hosp_data.R
  4. +0
    -1
      man/cdcfluview.Rd
  5. +0
    -1
      man/census_regions.Rd
  6. +42
    -0
      man/get_hosp_data.Rd
  7. +0
    -1
      man/get_mortality_surveillance_data.Rd
  8. +0
    -1
      man/get_state_data.Rd
  9. +0
    -1
      man/get_weekly_flu_report.Rd
  10. +0
    -1
      man/hhs_regions.Rd

+ 1
- 1
DESCRIPTION View File

@ -30,4 +30,4 @@ Imports:
V8
Depends:
R (>= 3.2.0)
RoxygenNote: 5.0.1
RoxygenNote: 6.0.1

+ 1
- 0
NAMESPACE View File

@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand
export(get_flu_data)
export(get_hosp_data)
export(get_mortality_surveillance_data)
export(get_state_data)
export(get_weekly_flu_report)


+ 126
- 0
R/get_hosp_data.R View File

@ -0,0 +1,126 @@
#' Retrieves influenza hospitalization statistics from the CDC
#'
#' Uses the data source from the
#' \href{https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html}{CDC FluView}
#' and provides influenza hospitalization reporting data as a data frame.
#'
#' @param area one of "\code{flusurvnet}", "\code{eip}", "\code{ihsp}", or two
#' digit state abbreviation for an individual site. Exceptions are
#' New York - Albany ("\code{nya}") and New York - Rochester ("\code{nyr}")
#' @param age_group a vector of age groups to pull data for. Possible values are:
#' "\code{overall}", "\code{0-4y}", "\code{5-17y}, "\code{18-49y},
#' "\code{50-64y}, "\code{65+y}".
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC
#' flu season 2014-2015). Default value is the current year and all
#' \code{years} values should be >= \code{2009}
#' @return A single \code{data.frame}.
#' @note There is often a noticeable delay when making the API request to the CDC.
#' This is not due to a large download size, but the time it takes for their
#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
#' if you would like to see what's going on.
#' @export
#' @examples \dontrun{
#' # All of FluSurv-NET, 50-64 years old, 2010/11-2014/15 flu seasons
#' flu <- get_hosp_data("flusurvnet", "50-64y", years=2010:2014)
#' }
get_hosp_data <- function(area="flusurvnet", age_group="overall",
years=as.numeric(format(Sys.Date(), "%Y")) - 1) {
area <- tolower(area)
age_group <- tolower(age_group)
if (!(area %in% c("flusurvnet", "eip", "ihsp", "ca", "co", "ct", "ga", "md",
"mn", "nm", "nya", "nyr", "or", "tn", "id", "ia", "mi",
"oh", "ok", "ri", "sd", "ut")))
stop("Error: area must be one of flusurvnet, eip, ihsp, or a valid state abbreviation")
if (length(area) != 1)
stop("Error: can only select one area")
if (!all(age_group %in% c("overall", "0-4y", "5-17y", "18-49y",
"50-64y", "65+y")))
stop("Error: invalid age group specified")
if (any(years < 2009))
stop("Error: years should be >= 2009")
# Match names of age groups to numbers for API
age_match <- data.frame(age_group = c("overall", "0-4y", "5-17y",
"18-49y", "50-64y", "65+y"),
code = c(6, 1, 2, 3, 4, 5))
age_group_num <- age_match$code[age_match$age_group %in% age_group]
# format the input parameters to fit the CDC API
years <- years - 1960
area_match <- data.frame(area = c("flusurvnet", "eip", "ca", "co", "ct",
"ga", "md", "mn", "nm", "nya", "nyr", "or",
"tn", "ihsp", "id", "ia", "mi", "oh", "ok",
"ri", "sd", "ut"),
catch = c(22, 22, 1, 2, 3, 4, 7, 9, 11, 13, 14, 17,
20, 22, 6, 5, 8, 15, 16, 18, 19, 21),
network = c(1, rep(2, 12), rep(3, 9)))
# Format years
year_list <- lapply(seq_along(years),
function(x) list(ID = years[x]))
# Format age group
age_list <- lapply(seq_along(age_group_num),
function(x) list(ID = age_group_num[x]))
params <- list(AppVersion = "Public",
agegroups = age_list,
catchmentid = area_match$catch[area_match$area == area],
networkid = area_match$network[area_match$area == area],
seasons = year_list)
out_file <- tempfile(fileext=".json")
# CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs
tmp <- httr::POST("https://gis.cdc.gov/GRASP/Flu3/PostPhase03DownloadData",
body = params,
encode = "json",
httr::write_disk(out_file, overwrite = T))
httr::stop_for_status(tmp)
if (!(file.exists(out_file)))
stop("Error: cannot process downloaded data")
file <- jsonlite::fromJSON(out_file)[[1]]
# pb <- dplyr::progress_estimated(length(file))
# purrr::map(file, function(x) {
# pb$tick()$print()
# ct <- ifelse(grepl("who", x, ignore.case=TRUE), 1, 1)
# suppressMessages(readr::read_csv(x, skip=ct))
# }) -> file_list
# names(file_list) <- substr(basename(files), 1, nchar(basename(files)) - 4)
# Depending on the parameters, there could be more than one
# file returned. When there's only one, return a more usable
# structure.
# when no rows, then it's likely the caller specified the
# current year and the flu season has technically not started yet.
# so help them out and move the year back and get current flu
# season data.
if ((nrow(file) == 0) &&
(length(years)==1) &&
(years == (as.numeric(format(Sys.Date(), "%Y"))-1960))) {
message("Adjusting [years] to get current season...")
return(get_hosp_data(area=area, age_group=age_group,
years=years+1960-1))
} else {
return(file)
}
}

+ 0
- 1
man/cdcfluview.Rd View File

@ -16,4 +16,3 @@ provides functions to access the data provided by portal's underlying API.
\author{
Bob Rudis (bob@rud.is)
}

+ 0
- 1
man/census_regions.Rd View File

@ -25,4 +25,3 @@ Last updated 2015-08-09.
\url{https://www.cdc.gov/std/stats12/images/CensusMap.png}
}
\keyword{datasets}

+ 42
- 0
man/get_hosp_data.Rd View File

@ -0,0 +1,42 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_hosp_data.R
\name{get_hosp_data}
\alias{get_hosp_data}
\title{Retrieves influenza hospitalization statistics from the CDC}
\usage{
get_hosp_data(area = "flusurvnet", age_group = "overall",
years = as.numeric(format(Sys.Date(), "\%Y")) - 1)
}
\arguments{
\item{area}{one of "\code{flusurvnet}", "\code{eip}", "\code{ihsp}", or two
digit state abbreviation for an individual site. Exceptions are
New York - Albany ("\code{nya}") and New York - Rochester ("\code{nyr}")}
\item{age_group}{a vector of age groups to pull data for. Possible values are:
"\code{overall}", "\code{0-4y}", "\code{5-17y}, "\code{18-49y},
"\code{50-64y}, "\code{65+y}".}
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC
flu season 2014-2015). Default value is the current year and all
\code{years} values should be >= \code{2009}}
}
\value{
A single \code{data.frame}.
}
\description{
Uses the data source from the
\href{https://gis.cdc.gov/GRASP/Fluview/FluHospRates.html}{CDC FluView}
and provides influenza hospitalization reporting data as a data frame.
}
\note{
There is often a noticeable delay when making the API request to the CDC.
This is not due to a large download size, but the time it takes for their
servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
if you would like to see what's going on.
}
\examples{
\dontrun{
# All of FluSurv-NET, 50-64 years old, 2010/11-2014/15 flu seasons
flu <- get_hosp_data("flusurvnet", "50-64y", years=2010:2014)
}
}

+ 0
- 1
man/get_mortality_surveillance_data.Rd View File

@ -39,4 +39,3 @@ get_mortality_surveillance_data()
\references{
\url{https://www.cdc.gov/flu/weekly/nchs.htm}
}

+ 0
- 1
man/get_state_data.Rd View File

@ -38,4 +38,3 @@ get_state_data(2010:2014)
httr::with_verbose(get_state_data(2009:2015))
}
}

+ 0
- 1
man/get_weekly_flu_report.Rd View File

@ -29,4 +29,3 @@ get_weekly_flu_report()
\references{
\url{https://www.cdc.gov/flu/weekly/flureport.xml}
}

+ 0
- 1
man/hhs_regions.Rd View File

@ -28,4 +28,3 @@ Last updated 2015-08-09.
\url{https://www.hhs.gov/about/agencies/iea/regional-offices/index.html}
}
\keyword{datasets}

Loading…
Cancel
Save