Browse Source

0.4.0.9000 (pre-CRAN)

pull/4/head
Bob Rudis 9 years ago
parent
commit
b26a613ba9
  1. 3
      .Rbuildignore
  2. 20
      DESCRIPTION
  3. 3
      NAMESPACE
  4. 12
      R/cdcfluview-package.R
  5. 22
      R/datasets.r
  6. 38
      R/get_flu_data.r
  7. 31
      R/get_state_data.r
  8. 55
      R/get_weekly_flu_report.r
  9. 34
      README.Rmd
  10. 106
      README.md
  11. BIN
      README_files/README-unnamed-chunk-5-1.png
  12. BIN
      README_files/README-unnamed-chunk-7-1.png
  13. BIN
      README_files/README-unnamed-chunk-9-1.png
  14. BIN
      README_files/figure-markdown_github/unnamed-chunk-6-1.png
  15. 1
      cdcfluview.Rproj
  16. 26
      crunch/mkdata.r
  17. BIN
      data/hhs_regions.rda
  18. 9
      man/cdcfluview.Rd
  19. 34
      man/get_flu_data.Rd
  20. 26
      man/get_state_data.Rd
  21. 32
      man/get_weekly_flu_report.Rd
  22. 31
      man/hhs_regions.Rd

3
.Rbuildignore

@ -2,3 +2,6 @@
^\.Rproj\.user$
^\.travis\.yml$
^.*md$
crunch/
^README_files/
^README-.*

20
DESCRIPTION

@ -1,20 +1,26 @@
Package: cdcfluview
Type: Package
Title: cdcfluview is package that retrieves the data behind the CDC's FluView portal
Version: 0.3
Date: 2015-08-07
Title: Retrieve Flu Season Data from the CDC FluView Portal
Version: 0.4.0.9000
Date: 2015-08-09
Author: Bob Rudis (@hrbrmstr)
Maintainer: Bob Rudis <bob@rudis.net>
Description: The CDC's FluView is a Flash portal and the only way to get flu season
data is to use GUI controls, making it tedious to retrieve updates. This package
uses the same API the portal does to programmatically retrieve data.
Description: The U.S. Centers for Disease Control (CDC) maintains a portal
<http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html> for
accessing state, regional and national influenza statistics. The Flash
interface makes it difficult and time-consuming to select and retrieve
influenza data. This package provides functions to access the data provided
by portal's underlying API.
URL: http://github.com/hrbrmstr/cdcfluview
BugReports: https://github.com/hrbrmstr/cdcfluview/issues
License: MIT + file LICENSE
LazyData: TRUE
Suggests:
testthat
Imports:
httr (>= 0.3.0),
pbapply
pbapply,
xml2,
dplyr
Depends:
R (>= 3.0.0)

3
NAMESPACE

@ -2,5 +2,8 @@
export(get_flu_data)
export(get_state_data)
export(get_weekly_flu_report)
import(dplyr)
import(httr)
import(pbapply)
import(xml2)

12
R/cdcfluview-package.R

@ -1,6 +1,14 @@
#' A package to retrive data behind the CDC FluView portal
#' Retrieve Flu Season Data from the CDC FluView Portal
#'
#' The U.S. Centers for Disease Control (CDC) maintains a portal
#' \code{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html} for
#' accessing state, regional and national influenza statistics. The Flash
#' interface makes it difficult and time-consuming to select and retrieve
#' influenza data. This package provides functions to access the data provided
#' by portal's underlying API.
#'
#' @name cdcfluview
#' @docType package
#' @author Bob Rudis (@@hrbrmstr)
#' @import httr pbapply
#' @import httr pbapply xml2 dplyr
NULL

22
R/datasets.r

@ -0,0 +1,22 @@
#' @title HHS Region Table
#' @description This dataset contains the names, numbers, regional offices for
#' and states/territories belonging to the (presently) 10 HHS U.S.
#' regions in "long" format. It consists of a \code{data.frame}
#' with the following columns:
#'
#' \itemize{
#' \item \code{region}: the official HHS region name (e.g. "\code{Region 1}")
#' \item \code{region_number}: the associated region number
#' \item \code{regional_office}: the HHS regional office for the entire region
#' \item \code{state_or_territory}: state or territory belonging to the region
#' }
#'
#' @docType data
#' @keywords datasets
#' @name hhs_regions
#'
#' @references \url{http://www.hhs.gov/iea/regional/}
#' @usage data(hhs_regions)
#' @note Last updated 2015-08-09.
#' @format A data frame with 59 rows and 4 variables
NULL

38
R/get_flu_data.r

@ -1,26 +1,38 @@
# http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html
#' Retrieve CDC flu data
#' Retrieves state, regional or national influenza statistics from the CDC
#'
#' Uses the data source from the
#' \href{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html}{CDC FluView}
#' and provides flu reporting data as either a single data frame or a list of
#' data frames (depending on whether either \code{WHO NREVSS} or \code{ILINet}
#' (or both) is chosen.
#'
#' Uses the data source from the CDC FluView \url{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html}
#' and provides flu reporting data as either a single data frame or a list
#' of data frames (depending on whether either WHO NREVSS or ILINet - or both - is chosen)
#' A lookup table between HHS regions and their member states/territories
#' is provided in \code{\link{hhs_regions}}.
#'
#' @param region one of "\code{hhs}", "\code{census}", "\code{national}"
#' @param sub_region depends on the \code{region_type}.\cr
#' For "\code{national}", the \code{sub_region} should be \code{NA}.\cr
#' For "\code{hhs}", should be a vector between \code{1:10}.\cr
#' For "\code{census}", should be a vector between \code{1:9}
#' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" or "\code{all}" (for both)
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015)
#' @return If only a single \code{data_source} is specified, then a single \code{data.frame} is
#' returned, otherwise a named list with each \code{data.frame} is returned.
#' @param data_source either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}"
#' or "\code{all}" (for both)
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC
#' flu season 2014-2015). Default value is the current year and all
#' \code{years} values should be > \code{1997}
#' @return If only a single \code{data_source} is specified, then a single
#' \code{data.frame} is returned, otherwise a named list with each
#' \code{data.frame} is returned.
#' @note There is often a noticeable delay when making the API request to the CDC.
#' This is not due to a large download size, but the time it takes for their
#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
#' if you would like to see what's going on.
#' @export
#' @examples \dontrun{
#' flu <- get_flu_data("hhs", 1:10, c("who", "ilinet"), years=2000:2014)
#' }
get_flu_data <- function(region="hhs", sub_region=1:10,
data_source="ilinet", years=2014) {
data_source="ilinet",
years=as.numeric(format(Sys.Date(), "%Y"))) {
region <- tolower(region)
data_source <- tolower(data_source)
@ -45,6 +57,8 @@ get_flu_data <- function(region="hhs", sub_region=1:10,
if (any(years < 1997))
stop("Error: years should be > 1997")
# format the input parameters to fit the CDC API
years <- years - 1960
reg <- as.numeric(c("hhs"=1, "census"=2, "national"=3)[[region]])
@ -52,7 +66,7 @@ get_flu_data <- function(region="hhs", sub_region=1:10,
data_source <- gsub("ilinet", "ILINet", data_source)
params <- list(SubRegionsList=paste0(sub_region, collapse=","),
DataSources=data_source,
DataSources=paste0(data_source, collapse=","),
RegionID=reg,
SeasonsList=paste0(years, collapse=","))

31
R/get_state_data.r

@ -1,17 +1,30 @@
#' Retrieves the state-level data from the CDC's FluView Portal
#' Retrieves state/territory-level influenza statistics from the CDC
#'
#' Uses the data source from the CDC' State-levelFluView \url{http://gis.cdc.gov/grasp/fluview/main.html}
#' and provides state flu reporting data as a single data frame
#' Uses the data source from the CDC' State-levelFluView
#' \url{http://gis.cdc.gov/grasp/fluview/main.html} and provides state flu
#' reporting data as a single data frame.\cr
#' \cr
#' This function provides similar data to \code{\link{get_weekly_flu_report}} but
#' provides more metadata about the reporting sources and has access to more
#' historical infomation.
#'
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015)
#' @param years a vector of years to retrieve data for (i.e. \code{2014} for CDC
#' flu season 2014-2015). Default value is the current year and all
#' \code{years} values should be > \code{1997}
#' @return A \code{data.frame} of state-level data for the specified seasons
#' (also classed as \code{cdcstatedata})
#' @export
#' @note There is often a noticeable delay when making the API request to the CDC. This
#' is not due to a large download size, but the time it takes for their
#' servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
#' if you would like to see what's going on.
#' @examples \dontrun{
#' get_state_dat(2014)
#' get_state_data(c(2013, 2014))
#' get_state_data(2010:2014)
#' httr::with_verbose(get_state_data(2009:2015))
#' }
get_state_data <- function(years=2014) {
get_state_data <- function(years=as.numeric(format(Sys.Date(), "%Y"))) {
if (any(years < 1997))
stop("Error: years should be > 1997")
@ -39,6 +52,10 @@ get_state_data <- function(years=2014) {
files <- unzip(out_file, exdir=out_dir, overwrite=TRUE)
read.csv(files, header=TRUE, stringsAsFactors=FALSE)
out <- read.csv(files, header=TRUE, stringsAsFactors=FALSE)
class(out) <- c("cdcstatedata", class(out))
out
}

55
R/get_weekly_flu_report.r

@ -0,0 +1,55 @@
#' Retrieves weekly influenza surveillance report from the CDC
#'
#' The CDC publishes a \href{http://www.cdc.gov/flu/weekly/usmap.htm}{weekly
#' influenza report} detailing high-level flu activity per-state. They also
#' publish a data file (see \code{References}) of historical report readings.
#' This function reads that XML file and produces a long \code{data_frame}
#' with the historical surveillance readings.\cr
#' \cr
#' This function provides similar data to \code{\link{get_state_data}} but without
#' the reporting source metadata and a limit on the historical flu information.
#'
#' @references \url{http://www.cdc.gov/flu/weekly/flureport.xml}
#' @return \code{tbl_df} (also classed with \code{cdcweeklyreport}) with six
#' columns: \code{year}, \code{week_number}, \code{state}, \code{color},
#' \code{label}, \code{subtitle}
#' @export
#' @examples \dontrun{
#' get_weekly_flu_report()
#' }
get_weekly_flu_report <- function() {
# grab the report
doc <- read_xml("http://www.cdc.gov/flu/weekly/flureport.xml")
# extract the time periods
periods <- xml_attrs(xml_find_all(doc, "timeperiod"))
# for each period extract the state information and
# shove it all into a data frame
bind_rows(pblapply(periods, function(period) {
tp <- sprintf("//timeperiod[@number='%s' and @year='%s']",
period["number"], period["year"])
weeks <- xml_find_one(doc, tp)
kids <- xml_children(weeks)
abbrev <- xml_text(xml_find_all(kids, "abbrev"), TRUE)
color <- xml_text(xml_find_all(kids, "color"), TRUE)
label <- xml_text(xml_find_all(kids, "label"), TRUE)
data_frame(year=period["year"],
week_number=period["number"],
state=abbrev,
color=color,
label=label,
subtitle=period["subtitle"])
})) -> out
class(out) <- c("cdcweeklyreport", class(out))
out
}

34
README.Rmd

@ -1,29 +1,42 @@
---
title: "README"
author: "Bob Rudis"
date: August 7, 2015
date: August 9, 2015
output:
md_document:
variant: markdown_github
---
```{r, echo=FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
warning = FALSE,
message = FALSE,
comment = "#>",
fig.path = "README_files/README-",
fig.retina = 2
)
```
**NOTE** If there's a particular data set from http://www.cdc.gov/flu/weekly/fluviewinteractive.htm that you want and that isn't in the package, please file it as an issue and be as specific as you can (screen shot if possible).
-----
The CDC's FluView is a Flash portal and the only way to get flu season
data is to use GUI controls, making it tedious to retrieve updates. This package
uses the same API the portal does to programmatically retrieve data.
The U.S. Centers for Disease Control (CDC) maintains a [portal](http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html) for accessing state, regional and national influenza statistics. The portal's Flash interface makes it difficult and time-consuming to select and retrieve influenza data. This package provides functions to access the data provided by portal's underlying API.
The following functions are implemented:
- `get_flu_data` : retrieve flu data (national, by various region/sub-region types)
- `get_state_data` : retrieve state-level flu data
- `get_flu_data`: Retrieves state, regional or national influenza statistics from the CDC
- `get_state_data`: Retrieves state/territory-level influenza statistics from the CDC
- `get_weekly_flu_report`: Retrieves weekly influenza surveillance report from the CDC
The following data sets are included:
- `hhs_regions` HHS Region Table (a data frame with 59 rows and 4 variables)
### News
- Version 0.4.0.999 released : another fix for the CDC API (for region parameter); added data file for region lookups; added weekly high-level flu report retrieval
- Version 0.3 released : fix for the CDC API (it changed how year & region params are encoded in the request)
- Version 0.2.1 released : bumped up `httr` version # requirement in `DESCRIPTION` (via Issue [1](https://github.com/hrbrmstr/cdcfluview/issues/1))
- Version 0.2 released : added state-level data retrieval
@ -46,7 +59,6 @@ suppressPackageStartupMessages(library(cdcfluview))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(statebins))
suppressPackageStartupMessages(library(magrittr))
# current verison
packageVersion("cdcfluview")
@ -54,7 +66,7 @@ packageVersion("cdcfluview")
flu <- get_flu_data("hhs", sub_region=1:10, "ilinet", years=2014)
glimpse(flu)
state_flu <- get_state_data()
state_flu <- get_state_data(years=2014)
glimpse(state_flu)
gg <- ggplot(flu, aes(x=WEEK, y=X..WEIGHTED.ILI, group=REGION))
@ -73,7 +85,7 @@ dat <- get_flu_data(region="hhs",
data_source="ilinet",
years=2000:2014)
dat %<>%
dat %>%
mutate(REGION=factor(REGION,
levels=unique(REGION),
labels=c("Boston", "New York",
@ -85,7 +97,7 @@ dat %<>%
mutate(season_week=ifelse(WEEK>=40, WEEK-40, WEEK),
season=ifelse(WEEK<40,
sprintf("%d-%d", YEAR-1, YEAR),
sprintf("%d-%d", YEAR, YEAR+1)))
sprintf("%d-%d", YEAR, YEAR+1))) -> dat
prev_years <- dat %>% filter(season != "2014-2015")
curr_year <- dat %>% filter(season == "2014-2015")
@ -105,7 +117,7 @@ gg <- gg + geom_line(data=curr_year,
gg <- gg + geom_vline(xintercept=curr_week, color="#d7301f", size=0.5, linetype="dashed", alpha=0.5)
gg <- gg + facet_wrap(~REGION, ncol=3)
gg <- gg + labs(x=NULL, y="Weighted ILI Index",
title="ILINet - 1999-2015 year weighted flu index history by CDC region\nWeek Ending Jan 3, 2015 (Red == current season)\n")
title="ILINet - 1999-2015 year weighted flu index history by CDC region\nWeek Ending Jan 3, 2015 (Red == 2014-2015 season)\n")
gg <- gg + theme_bw()
gg <- gg + theme(panel.grid=element_blank())
gg <- gg + theme(strip.background=element_blank())

106
README.md

@ -2,17 +2,21 @@
------------------------------------------------------------------------
The CDC's FluView is a Flash portal and the only way to get flu season data is to use GUI controls, making it tedious to retrieve updates. This package uses the same API the portal does to programmatically retrieve data.
The U.S. Centers for Disease Control (CDC) maintains a [portal](http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html) for accessing state, regional and national influenza statistics. The portal's Flash interface makes it difficult and time-consuming to select and retrieve influenza data. This package provides functions to access the data provided by portal's underlying API.
The following functions are implemented:
- `get_flu_data` : retrieve flu data (national, by various region/sub-region types)
- `get_state_data` : retrieve state-level flu data
- `get_flu_data`: Retrieves state, regional or national influenza statistics from the CDC
- `get_state_data`: Retrieves state/territory-level influenza statistics from the CDC
- `get_weekly_flu_report`: Retrieves weekly influenza surveillance report from the CDC
The following data sets are included:
- `hhs_regions` HHS Region Table (a data frame with 59 rows and 4 variables)
### News
- Version 0.4.0.999 released : another fix for the CDC API (for region parameter); added data file for region lookups; added weekly high-level flu report retrieval
- Version 0.3 released : fix for the CDC API (it changed how year & region params are encoded in the request)
- Version 0.2.1 released : bumped up `httr` version \# requirement in `DESCRIPTION` (via Issue [1](https://github.com/hrbrmstr/cdcfluview/issues/1))
- Version 0.2 released : added state-level data retrieval
@ -31,61 +35,51 @@ suppressPackageStartupMessages(library(cdcfluview))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(statebins))
suppressPackageStartupMessages(library(magrittr))
# current verison
packageVersion("cdcfluview")
```
#> [1] '0.4.0.9000'
## [1] '0.3'
``` r
flu <- get_flu_data("hhs", sub_region=1:10, "ilinet", years=2014)
glimpse(flu)
```
## Observations: 440
## Variables:
## $ REGION.TYPE (chr) "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions",...
## $ REGION (chr) "Region 1", "Region 2", "Region 3", "Region 4", "Region 5", "Region 6", "Region 7", "Regi...
## $ YEAR (int) 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,...
## $ WEEK (int) 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 4...
## $ ILITOTAL (int) 352, 2254, 1696, 1182, 1083, 1844, 220, 348, 1329, 61, 386, 2129, 1747, 1517, 1117, 2165,...
## $ TOTAL.PATIENTS (int) 51688, 137157, 129302, 130419, 107261, 103975, 50272, 37014, 88421, 11172, 51169, 132513,...
## $ NUM..OF.PROVIDERS (int) 147, 285, 245, 305, 267, 241, 84, 117, 237, 55, 151, 274, 241, 310, 277, 250, 84, 114, 24...
## $ X..WEIGHTED.ILI (dbl) 0.8306102, 1.7759176, 1.1477759, 0.8167958, 0.7370374, 1.8252298, 0.6970221, 0.6731439, 1...
## $ X.UNWEIGHTED.ILI (dbl) 0.6810091, 1.6433722, 1.3116580, 0.9063097, 1.0096867, 1.7735032, 0.4376194, 0.9401848, 1...
## $ AGE.0.4 (int) 101, 869, 395, 333, 358, 465, 50, 82, 310, 22, 109, 837, 404, 356, 339, 560, 57, 58, 335,...
## $ AGE.5.24 (int) 185, 757, 629, 536, 400, 711, 98, 152, 577, 30, 199, 677, 670, 774, 443, 809, 124, 146, 5...
## $ AGE.25.64 (lgl) NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ AGE.25.49 (int) 44, 363, 455, 187, 181, 469, 43, 87, 220, 7, 37, 349, 466, 249, 182, 509, 56, 87, 225, 20...
## $ AGE.50.64 (int) 13, 157, 127, 80, 80, 121, 15, 19, 110, 1, 24, 151, 132, 74, 105, 187, 18, 23, 118, 10, 2...
## $ AGE.65 (int) 9, 108, 90, 46, 64, 78, 14, 8, 112, 1, 17, 115, 75, 64, 48, 100, 14, 12, 103, 3, 9, 110, ...
``` r
state_flu <- get_state_data()
#> Observations: 440
#> Variables:
#> $ REGION.TYPE (chr) "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions", "HHS Regions",...
#> $ REGION (chr) "Region 1", "Region 2", "Region 3", "Region 4", "Region 5", "Region 6", "Region 7", "Regi...
#> $ YEAR (int) 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014,...
#> $ WEEK (int) 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 4...
#> $ ILITOTAL (int) 352, 2254, 1696, 1182, 1083, 1844, 220, 348, 1329, 61, 386, 2129, 1747, 1517, 1117, 2165,...
#> $ TOTAL.PATIENTS (int) 51688, 137157, 129302, 130419, 107261, 103975, 50272, 37014, 88421, 11172, 51169, 132513,...
#> $ NUM..OF.PROVIDERS (int) 147, 285, 245, 305, 267, 241, 84, 117, 237, 55, 151, 274, 241, 310, 277, 250, 84, 114, 24...
#> $ X..WEIGHTED.ILI (dbl) 0.8306102, 1.7759176, 1.1477759, 0.8167958, 0.7370374, 1.8252298, 0.6970221, 0.6731439, 1...
#> $ X.UNWEIGHTED.ILI (dbl) 0.6810091, 1.6433722, 1.3116580, 0.9063097, 1.0096867, 1.7735032, 0.4376194, 0.9401848, 1...
#> $ AGE.0.4 (int) 101, 869, 395, 333, 358, 465, 50, 82, 310, 22, 109, 837, 404, 356, 339, 560, 57, 58, 335,...
#> $ AGE.5.24 (int) 185, 757, 629, 536, 400, 711, 98, 152, 577, 30, 199, 677, 670, 774, 443, 809, 124, 146, 5...
#> $ AGE.25.64 (lgl) NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
#> $ AGE.25.49 (int) 44, 363, 455, 187, 181, 469, 43, 87, 220, 7, 37, 349, 466, 249, 182, 509, 56, 87, 225, 20...
#> $ AGE.50.64 (int) 13, 157, 127, 80, 80, 121, 15, 19, 110, 1, 24, 151, 132, 74, 105, 187, 18, 23, 118, 10, 2...
#> $ AGE.65 (int) 9, 108, 90, 46, 64, 78, 14, 8, 112, 1, 17, 115, 75, 64, 48, 100, 14, 12, 103, 3, 9, 110, ...
state_flu <- get_state_data(years=2014)
glimpse(state_flu)
```
## Observations: 2809
## Variables:
## $ STATENAME (chr) "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama"...
## $ URL (chr) "http://adph.org/influenza/", "http://adph.org/influenza/", "http://adph.org/influenza...
## $ WEBSITE (chr) "Influenza Surveillance", "Influenza Surveillance", "Influenza Surveillance", "Influen...
## $ ACTIVITY.LEVEL (chr) "Level 1", "Level 1", "Level 1", "Level 1", "Level 1", "Level 1", "Level 5", "Level 10...
## $ ACTIVITY.LEVEL.LABEL (chr) "Minimal", "Minimal", "Minimal", "Minimal", "Minimal", "Minimal", "Low", "High", "High...
## $ WEEKEND (chr) "Oct-04-2014", "Oct-11-2014", "Oct-18-2014", "Oct-25-2014", "Nov-01-2014", "Nov-08-201...
## $ WEEK (int) 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,...
## $ SEASON (chr) "2014-15", "2014-15", "2014-15", "2014-15", "2014-15", "2014-15", "2014-15", "2014-15"...
#> Observations: 2809
#> Variables:
#> $ STATENAME (chr) "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama"...
#> $ URL (chr) "http://adph.org/influenza/", "http://adph.org/influenza/", "http://adph.org/influenza...
#> $ WEBSITE (chr) "Influenza Surveillance", "Influenza Surveillance", "Influenza Surveillance", "Influen...
#> $ ACTIVITY.LEVEL (chr) "Level 1", "Level 1", "Level 1", "Level 1", "Level 1", "Level 1", "Level 5", "Level 10...
#> $ ACTIVITY.LEVEL.LABEL (chr) "Minimal", "Minimal", "Minimal", "Minimal", "Minimal", "Minimal", "Low", "High", "High...
#> $ WEEKEND (chr) "Oct-04-2014", "Oct-11-2014", "Oct-18-2014", "Oct-25-2014", "Nov-01-2014", "Nov-08-201...
#> $ WEEK (int) 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,...
#> $ SEASON (chr) "2014-15", "2014-15", "2014-15", "2014-15", "2014-15", "2014-15", "2014-15", "2014-15"...
``` r
gg <- ggplot(flu, aes(x=WEEK, y=X..WEIGHTED.ILI, group=REGION))
gg <- gg + geom_line()
gg <- gg + facet_wrap(~REGION, ncol=2)
gg <- gg + theme_bw()
```
![](README_files/figure-markdown_github/unnamed-chunk-4-1.png)
<img src="README_files/README-unnamed-chunk-5-1.png" title="" alt="" width="576" />
``` r
dat <- get_flu_data(region="hhs",
@ -93,7 +87,7 @@ dat <- get_flu_data(region="hhs",
data_source="ilinet",
years=2000:2014)
dat %<>%
dat %>%
mutate(REGION=factor(REGION,
levels=unique(REGION),
labels=c("Boston", "New York",
@ -105,7 +99,7 @@ dat %<>%
mutate(season_week=ifelse(WEEK>=40, WEEK-40, WEEK),
season=ifelse(WEEK<40,
sprintf("%d-%d", YEAR-1, YEAR),
sprintf("%d-%d", YEAR, YEAR+1)))
sprintf("%d-%d", YEAR, YEAR+1))) -> dat
prev_years <- dat %>% filter(season != "2014-2015")
curr_year <- dat %>% filter(season == "2014-2015")
@ -125,7 +119,7 @@ gg <- gg + geom_line(data=curr_year,
gg <- gg + geom_vline(xintercept=curr_week, color="#d7301f", size=0.5, linetype="dashed", alpha=0.5)
gg <- gg + facet_wrap(~REGION, ncol=3)
gg <- gg + labs(x=NULL, y="Weighted ILI Index",
title="ILINet - 1999-2015 year weighted flu index history by CDC region\nWeek Ending Jan 3, 2015 (Red == current season)\n")
title="ILINet - 1999-2015 year weighted flu index history by CDC region\nWeek Ending Jan 3, 2015 (Red == 2014-2015 season)\n")
gg <- gg + theme_bw()
gg <- gg + theme(panel.grid=element_blank())
gg <- gg + theme(strip.background=element_blank())
@ -133,7 +127,7 @@ gg <- gg + theme(axis.ticks.x=element_blank())
gg <- gg + theme(axis.text.x=element_blank())
```
![](README_files/figure-markdown_github/unnamed-chunk-6-1.png)
<img src="README_files/README-unnamed-chunk-7-1.png" title="" alt="" width="576" />
``` r
gg_s <- state_flu %>%
@ -147,7 +141,7 @@ gg_s <- state_flu %>%
ggtitle("CDC State FluView (2015-01-03)")
```
![](README_files/figure-markdown_github/unnamed-chunk-8-1.png)
<img src="README_files/README-unnamed-chunk-9-1.png" title="" alt="" width="672" />
### Test Results
@ -156,15 +150,11 @@ suppressPackageStartupMessages(library(cdcfluview))
suppressPackageStartupMessages(library(testthat))
date()
```
#> [1] "Sun Aug 9 09:40:34 2015"
## [1] "Sat Aug 8 14:09:26 2015"
``` r
test_dir("tests/")
#> testthat results ========================================================================================================
#> OK: 0 SKIPPED: 0 FAILED: 0
#>
#> DONE
```
## testthat results ========================================================================================================
## OK: 0 SKIPPED: 0 FAILED: 0
##
## DONE

BIN
README_files/README-unnamed-chunk-5-1.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 183 KiB

BIN
README_files/README-unnamed-chunk-7-1.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 343 KiB

BIN
README_files/README-unnamed-chunk-9-1.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

BIN
README_files/figure-markdown_github/unnamed-chunk-6-1.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 140 KiB

After

Width:  |  Height:  |  Size: 140 KiB

1
cdcfluview.Rproj

@ -18,4 +18,5 @@ BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageBuildArgs: --resave-data
PackageCheckArgs: --as-cran
PackageRoxygenize: rd,collate,namespace

26
crunch/mkdata.r

@ -0,0 +1,26 @@
hhs_regions <- read.table(text="region;region_number;regional_office;state_or_territory
Region 1;1;Boston;Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont
Region 2;2;New York;New Jersey, New York, Puerto Rico, Virgin Islands
Region 3;3;Philadelphia;Delaware, District of Columbia, Maryland, Pennsylvania, Virginia, West Virginia
Region 4;4;Atlanta;Alabama, Florida, Georgia, Kentucky, Mississippi, North Carolina, South Carolina, Tennessee
Region 5;5;Chicago;Illinois, Indiana, Michigan, Minnesota, Ohio, Wisconsin
Region 6;6;Dallas;Arkansas, Louisiana, New Mexico, Oklahoma, Texas
Region 7;7;Kansas City;Iowa, Kansas, Missouri, Nebraska
Region 8;8;Denver;Colorado, Montana, North Dakota, South Dakota, Utah, Wyoming
Region 9;9;San Francisco;Arizona, California, Hawaii, Nevada, American Samoa, Commonwealth of the Northern Mariana Islands, Federated States of Micronesia, Guam, Marshall Islands, Republic of Palau
Region 10;10;Seattle;Alaska, Idaho, Oregon, Washington", sep=";", stringsAsFactors=FALSE, header=TRUE)
library(stringr)
do.call(rbind.data.frame, lapply(1:nrow(hhs_regions), function(i) {
x <- hhs_regions[i,]
rownames(x) <- NULL
out <- data.frame(x[, c(1:3)],
str_split(x$state_or_territory, ", ")[1],
stringsAsFactors=FALSE)
colnames(out) <- c("region", "region_number", "regional_office", "state_or_territory")
out
})) -> hhs_regions
str(hhs_regions)
devtools::use_data(hhs_regions, overwrite=TRUE)

BIN
data/hhs_regions.rda

Binary file not shown.

9
man/cdcfluview.Rd

@ -4,9 +4,14 @@
\name{cdcfluview}
\alias{cdcfluview}
\alias{cdcfluview-package}
\title{A package to retrive data behind the CDC FluView portal}
\title{Retrieve Flu Season Data from the CDC FluView Portal}
\description{
A package to retrive data behind the CDC FluView portal
The U.S. Centers for Disease Control (CDC) maintains a portal
\code{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html} for
accessing state, regional and national influenza statistics. The Flash
interface makes it difficult and time-consuming to select and retrieve
influenza data. This package provides functions to access the data provided
by portal's underlying API.
}
\author{
Bob Rudis (@hrbrmstr)

34
man/get_flu_data.Rd

@ -2,10 +2,10 @@
% Please edit documentation in R/get_flu_data.r
\name{get_flu_data}
\alias{get_flu_data}
\title{Retrieve CDC flu data}
\title{Retrieves state, regional or national influenza statistics from the CDC}
\usage{
get_flu_data(region = "hhs", sub_region = 1:10, data_source = "ilinet",
years = 2014)
years = as.numeric(format(Sys.Date(), "\%Y")))
}
\arguments{
\item{region}{one of "\code{hhs}", "\code{census}", "\code{national}"}
@ -15,18 +15,34 @@ For "\code{national}", the \code{sub_region} should be \code{NA}.\cr
For "\code{hhs}", should be a vector between \code{1:10}.\cr
For "\code{census}", should be a vector between \code{1:9}}
\item{data_source}{either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}" or "\code{all}" (for both)}
\item{data_source}{either of "\code{who}" (for WHO NREVSS) or "\code{ilinet}"
or "\code{all}" (for both)}
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015)}
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC
flu season 2014-2015). Default value is the current year and all
\code{years} values should be > \code{1997}}
}
\value{
If only a single \code{data_source} is specified, then a single \code{data.frame} is
returned, otherwise a named list with each \code{data.frame} is returned.
If only a single \code{data_source} is specified, then a single
\code{data.frame} is returned, otherwise a named list with each
\code{data.frame} is returned.
}
\description{
Uses the data source from the CDC FluView \url{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html}
and provides flu reporting data as either a single data frame or a list
of data frames (depending on whether either WHO NREVSS or ILINet - or both - is chosen)
Uses the data source from the
\href{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html}{CDC FluView}
and provides flu reporting data as either a single data frame or a list of
data frames (depending on whether either \code{WHO NREVSS} or \code{ILINet}
(or both) is chosen.
}
\details{
A lookup table between HHS regions and their member states/territories
is provided in \code{\link{hhs_regions}}.
}
\note{
There is often a noticeable delay when making the API request to the CDC.
This is not due to a large download size, but the time it takes for their
servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
if you would like to see what's going on.
}
\examples{
\dontrun{

26
man/get_state_data.Rd

@ -2,24 +2,40 @@
% Please edit documentation in R/get_state_data.r
\name{get_state_data}
\alias{get_state_data}
\title{Retrieves the state-level data from the CDC's FluView Portal}
\title{Retrieves state/territory-level influenza statistics from the CDC}
\usage{
get_state_data(years = 2014)
get_state_data(years = as.numeric(format(Sys.Date(), "\%Y")))
}
\arguments{
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC flu season 2014-2015)}
\item{years}{a vector of years to retrieve data for (i.e. \code{2014} for CDC
flu season 2014-2015). Default value is the current year and all
\code{years} values should be > \code{1997}}
}
\value{
A \code{data.frame} of state-level data for the specified seasons
(also classed as \code{cdcstatedata})
}
\description{
Uses the data source from the CDC' State-levelFluView \url{http://gis.cdc.gov/grasp/fluview/main.html}
and provides state flu reporting data as a single data frame
Uses the data source from the CDC' State-levelFluView
\url{http://gis.cdc.gov/grasp/fluview/main.html} and provides state flu
reporting data as a single data frame.\cr
\cr
This function provides similar data to \code{\link{get_weekly_flu_report}} but
provides more metadata about the reporting sources and has access to more
historical infomation.
}
\note{
There is often a noticeable delay when making the API request to the CDC. This
is not due to a large download size, but the time it takes for their
servers to crunch the data. Wrap the function call in \code{httr::with_verbose}
if you would like to see what's going on.
}
\examples{
\dontrun{
get_state_dat(2014)
get_state_data(c(2013, 2014))
get_state_data(2010:2014)
httr::with_verbose(get_state_data(2009:2015))
}
}

32
man/get_weekly_flu_report.Rd

@ -0,0 +1,32 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/get_weekly_flu_report.r
\name{get_weekly_flu_report}
\alias{get_weekly_flu_report}
\title{Retrieves weekly influenza surveillance report from the CDC}
\usage{
get_weekly_flu_report()
}
\value{
\code{tbl_df} (also classed with \code{cdcweeklyreport}) with six
columns: \code{year}, \code{week_number}, \code{state}, \code{color},
\code{label}, \code{subtitle}
}
\description{
The CDC publishes a \href{http://www.cdc.gov/flu/weekly/usmap.htm}{weekly
influenza report} detailing high-level flu activity per-state. They also
publish a data file (see \code{References}) of historical report readings.
This function reads that XML file and produces a long \code{data_frame}
with the historical surveillance readings.\cr
\cr
This function provides similar data to \code{\link{get_state_data}} but without
the reporting source metadata and a limit on the historical flu information.
}
\examples{
\dontrun{
get_weekly_flu_report()
}
}
\references{
\url{http://www.cdc.gov/flu/weekly/flureport.xml}
}

31
man/hhs_regions.Rd

@ -0,0 +1,31 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/datasets.r
\docType{data}
\name{hhs_regions}
\alias{hhs_regions}
\title{HHS Region Table}
\format{A data frame with 59 rows and 4 variables}
\usage{
data(hhs_regions)
}
\description{
This dataset contains the names, numbers, regional offices for
and states/territories belonging to the (presently) 10 HHS U.S.
regions in "long" format. It consists of a \code{data.frame}
with the following columns:
\itemize{
\item \code{region}: the official HHS region name (e.g. "\code{Region 1}")
\item \code{region_number}: the associated region number
\item \code{regional_office}: the HHS regional office for the entire region
\item \code{state_or_territory}: state or territory belonging to the region
}
}
\note{
Last updated 2015-08-09.
}
\references{
\url{http://www.hhs.gov/iea/regional/}
}
\keyword{datasets}
Loading…
Cancel
Save