22 changed files with 370 additions and 113 deletions
@ -1,20 +1,26 @@ |
|||
Package: cdcfluview |
|||
Type: Package |
|||
Title: cdcfluview is package that retrieves the data behind the CDC's FluView portal |
|||
Version: 0.3 |
|||
Date: 2015-08-07 |
|||
Title: Retrieve Flu Season Data from the CDC FluView Portal |
|||
Version: 0.4.0.9000 |
|||
Date: 2015-08-09 |
|||
Author: Bob Rudis (@hrbrmstr) |
|||
Maintainer: Bob Rudis <bob@rudis.net> |
|||
Description: The CDC's FluView is a Flash portal and the only way to get flu season |
|||
data is to use GUI controls, making it tedious to retrieve updates. This package |
|||
uses the same API the portal does to programmatically retrieve data. |
|||
Description: The U.S. Centers for Disease Control (CDC) maintains a portal |
|||
<http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html> for |
|||
accessing state, regional and national influenza statistics. The Flash |
|||
interface makes it difficult and time-consuming to select and retrieve |
|||
influenza data. This package provides functions to access the data provided |
|||
by portal's underlying API. |
|||
URL: http://github.com/hrbrmstr/cdcfluview |
|||
BugReports: https://github.com/hrbrmstr/cdcfluview/issues |
|||
License: MIT + file LICENSE |
|||
LazyData: TRUE |
|||
Suggests: |
|||
testthat |
|||
Imports: |
|||
httr (>= 0.3.0), |
|||
pbapply |
|||
pbapply, |
|||
xml2, |
|||
dplyr |
|||
Depends: |
|||
R (>= 3.0.0) |
|||
|
@ -1,6 +1,14 @@ |
|||
#' A package to retrive data behind the CDC FluView portal |
|||
#' Retrieve Flu Season Data from the CDC FluView Portal |
|||
#' |
|||
#' The U.S. Centers for Disease Control (CDC) maintains a portal |
|||
#' \code{http://gis.cdc.gov/grasp/fluview/fluportaldashboard.html} for |
|||
#' accessing state, regional and national influenza statistics. The Flash |
|||
#' interface makes it difficult and time-consuming to select and retrieve |
|||
#' influenza data. This package provides functions to access the data provided |
|||
#' by portal's underlying API. |
|||
#' |
|||
#' @name cdcfluview |
|||
#' @docType package |
|||
#' @author Bob Rudis (@@hrbrmstr) |
|||
#' @import httr pbapply |
|||
#' @import httr pbapply xml2 dplyr |
|||
NULL |
|||
|
@ -0,0 +1,22 @@ |
|||
#' @title HHS Region Table |
|||
#' @description This dataset contains the names, numbers, regional offices for |
|||
#' and states/territories belonging to the (presently) 10 HHS U.S. |
|||
#' regions in "long" format. It consists of a \code{data.frame} |
|||
#' with the following columns: |
|||
#' |
|||
#' \itemize{ |
|||
#' \item \code{region}: the official HHS region name (e.g. "\code{Region 1}") |
|||
#' \item \code{region_number}: the associated region number |
|||
#' \item \code{regional_office}: the HHS regional office for the entire region |
|||
#' \item \code{state_or_territory}: state or territory belonging to the region |
|||
#' } |
|||
#' |
|||
#' @docType data |
|||
#' @keywords datasets |
|||
#' @name hhs_regions |
|||
#' |
|||
#' @references \url{http://www.hhs.gov/iea/regional/} |
|||
#' @usage data(hhs_regions) |
|||
#' @note Last updated 2015-08-09. |
|||
#' @format A data frame with 59 rows and 4 variables |
|||
NULL |
@ -0,0 +1,55 @@ |
|||
#' Retrieves weekly influenza surveillance report from the CDC |
|||
#' |
|||
#' The CDC publishes a \href{http://www.cdc.gov/flu/weekly/usmap.htm}{weekly |
|||
#' influenza report} detailing high-level flu activity per-state. They also |
|||
#' publish a data file (see \code{References}) of historical report readings. |
|||
#' This function reads that XML file and produces a long \code{data_frame} |
|||
#' with the historical surveillance readings.\cr |
|||
#' \cr |
|||
#' This function provides similar data to \code{\link{get_state_data}} but without |
|||
#' the reporting source metadata and a limit on the historical flu information. |
|||
#' |
|||
#' @references \url{http://www.cdc.gov/flu/weekly/flureport.xml} |
|||
#' @return \code{tbl_df} (also classed with \code{cdcweeklyreport}) with six |
|||
#' columns: \code{year}, \code{week_number}, \code{state}, \code{color}, |
|||
#' \code{label}, \code{subtitle} |
|||
#' @export |
|||
#' @examples \dontrun{ |
|||
#' get_weekly_flu_report() |
|||
#' } |
|||
get_weekly_flu_report <- function() { |
|||
|
|||
# grab the report |
|||
doc <- read_xml("http://www.cdc.gov/flu/weekly/flureport.xml") |
|||
|
|||
# extract the time periods |
|||
periods <- xml_attrs(xml_find_all(doc, "timeperiod")) |
|||
|
|||
# for each period extract the state information and |
|||
# shove it all into a data frame |
|||
bind_rows(pblapply(periods, function(period) { |
|||
|
|||
tp <- sprintf("//timeperiod[@number='%s' and @year='%s']", |
|||
period["number"], period["year"]) |
|||
|
|||
weeks <- xml_find_one(doc, tp) |
|||
kids <- xml_children(weeks) |
|||
|
|||
abbrev <- xml_text(xml_find_all(kids, "abbrev"), TRUE) |
|||
color <- xml_text(xml_find_all(kids, "color"), TRUE) |
|||
label <- xml_text(xml_find_all(kids, "label"), TRUE) |
|||
|
|||
data_frame(year=period["year"], |
|||
week_number=period["number"], |
|||
state=abbrev, |
|||
color=color, |
|||
label=label, |
|||
subtitle=period["subtitle"]) |
|||
|
|||
})) -> out |
|||
|
|||
class(out) <- c("cdcweeklyreport", class(out)) |
|||
|
|||
out |
|||
|
|||
} |
After Width: | Height: | Size: 183 KiB |
After Width: | Height: | Size: 343 KiB |
After Width: | Height: | Size: 83 KiB |
Before Width: | Height: | Size: 140 KiB After Width: | Height: | Size: 140 KiB |
@ -0,0 +1,26 @@ |
|||
hhs_regions <- read.table(text="region;region_number;regional_office;state_or_territory |
|||
Region 1;1;Boston;Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont |
|||
Region 2;2;New York;New Jersey, New York, Puerto Rico, Virgin Islands |
|||
Region 3;3;Philadelphia;Delaware, District of Columbia, Maryland, Pennsylvania, Virginia, West Virginia |
|||
Region 4;4;Atlanta;Alabama, Florida, Georgia, Kentucky, Mississippi, North Carolina, South Carolina, Tennessee |
|||
Region 5;5;Chicago;Illinois, Indiana, Michigan, Minnesota, Ohio, Wisconsin |
|||
Region 6;6;Dallas;Arkansas, Louisiana, New Mexico, Oklahoma, Texas |
|||
Region 7;7;Kansas City;Iowa, Kansas, Missouri, Nebraska |
|||
Region 8;8;Denver;Colorado, Montana, North Dakota, South Dakota, Utah, Wyoming |
|||
Region 9;9;San Francisco;Arizona, California, Hawaii, Nevada, American Samoa, Commonwealth of the Northern Mariana Islands, Federated States of Micronesia, Guam, Marshall Islands, Republic of Palau |
|||
Region 10;10;Seattle;Alaska, Idaho, Oregon, Washington", sep=";", stringsAsFactors=FALSE, header=TRUE) |
|||
|
|||
library(stringr) |
|||
do.call(rbind.data.frame, lapply(1:nrow(hhs_regions), function(i) { |
|||
x <- hhs_regions[i,] |
|||
rownames(x) <- NULL |
|||
out <- data.frame(x[, c(1:3)], |
|||
str_split(x$state_or_territory, ", ")[1], |
|||
stringsAsFactors=FALSE) |
|||
colnames(out) <- c("region", "region_number", "regional_office", "state_or_territory") |
|||
out |
|||
})) -> hhs_regions |
|||
|
|||
str(hhs_regions) |
|||
|
|||
devtools::use_data(hhs_regions, overwrite=TRUE) |
Binary file not shown.
@ -0,0 +1,32 @@ |
|||
% Generated by roxygen2 (4.1.1): do not edit by hand |
|||
% Please edit documentation in R/get_weekly_flu_report.r |
|||
\name{get_weekly_flu_report} |
|||
\alias{get_weekly_flu_report} |
|||
\title{Retrieves weekly influenza surveillance report from the CDC} |
|||
\usage{ |
|||
get_weekly_flu_report() |
|||
} |
|||
\value{ |
|||
\code{tbl_df} (also classed with \code{cdcweeklyreport}) with six |
|||
columns: \code{year}, \code{week_number}, \code{state}, \code{color}, |
|||
\code{label}, \code{subtitle} |
|||
} |
|||
\description{ |
|||
The CDC publishes a \href{http://www.cdc.gov/flu/weekly/usmap.htm}{weekly |
|||
influenza report} detailing high-level flu activity per-state. They also |
|||
publish a data file (see \code{References}) of historical report readings. |
|||
This function reads that XML file and produces a long \code{data_frame} |
|||
with the historical surveillance readings.\cr |
|||
\cr |
|||
This function provides similar data to \code{\link{get_state_data}} but without |
|||
the reporting source metadata and a limit on the historical flu information. |
|||
} |
|||
\examples{ |
|||
\dontrun{ |
|||
get_weekly_flu_report() |
|||
} |
|||
} |
|||
\references{ |
|||
\url{http://www.cdc.gov/flu/weekly/flureport.xml} |
|||
} |
|||
|
@ -0,0 +1,31 @@ |
|||
% Generated by roxygen2 (4.1.1): do not edit by hand |
|||
% Please edit documentation in R/datasets.r |
|||
\docType{data} |
|||
\name{hhs_regions} |
|||
\alias{hhs_regions} |
|||
\title{HHS Region Table} |
|||
\format{A data frame with 59 rows and 4 variables} |
|||
\usage{ |
|||
data(hhs_regions) |
|||
} |
|||
\description{ |
|||
This dataset contains the names, numbers, regional offices for |
|||
and states/territories belonging to the (presently) 10 HHS U.S. |
|||
regions in "long" format. It consists of a \code{data.frame} |
|||
with the following columns: |
|||
|
|||
\itemize{ |
|||
\item \code{region}: the official HHS region name (e.g. "\code{Region 1}") |
|||
\item \code{region_number}: the associated region number |
|||
\item \code{regional_office}: the HHS regional office for the entire region |
|||
\item \code{state_or_territory}: state or territory belonging to the region |
|||
} |
|||
} |
|||
\note{ |
|||
Last updated 2015-08-09. |
|||
} |
|||
\references{ |
|||
\url{http://www.hhs.gov/iea/regional/} |
|||
} |
|||
\keyword{datasets} |
|||
|
Loading…
Reference in new issue