From 3b143cc66d55952cbbaf7d7756925e2aadac37a9 Mon Sep 17 00:00:00 2001 From: boB Rudis Date: Mon, 6 Nov 2017 07:41:17 -0500 Subject: [PATCH] enhancements to maps and incorporating MMWR week calcs --- DESCRIPTION | 1 + NAMESPACE | 6 ++++-- R/aaa.R | 44 ++++++++++++++++++++++++++++++++++++-- R/agd-ipt.r | 14 ++++++++---- R/cdcfluview-package.R | 3 ++- R/coverage-map.r | 46 +++++++++++++++++++++++++--------------- R/geographic-spread.R | 1 - R/ili-weekly-state.r | 2 +- R/ilinet.r | 19 +++++++++++++++-- R/zzz.r | 4 ++-- README.Rmd | 46 ++++++++++++++++++++++++---------------- README.md | 6 ++---- man/agd_ipt.Rd | 22 ------------------- man/age_group_distribution.Rd | 22 +++++++++++++++++++ man/cdc_basemap.Rd | 37 ++++++++++++++++++++++++++++++++ man/cdc_coverage_map.Rd | 32 ---------------------------- man/state_data_providers.Rd | 4 ++-- tests/testthat/test-cdcfluview.R | 24 ++++++++++++++++++--- 18 files changed, 220 insertions(+), 113 deletions(-) delete mode 100644 man/agd_ipt.Rd create mode 100644 man/age_group_distribution.Rd create mode 100644 man/cdc_basemap.Rd delete mode 100644 man/cdc_coverage_map.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 2b85040..2010ad2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,5 +34,6 @@ Imports: stats, utils, sf, + MMWRweek, units (>= 0.4-6) RoxygenNote: 6.0.1 diff --git a/NAMESPACE b/NAMESPACE index 7acd81f..7611c2f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,7 @@ # Generated by roxygen2: do not edit by hand -export(agd_ipt) -export(cdc_coverage_map) +export(age_group_distribution) +export(cdc_basemap) export(geographic_spread) export(hospitalizations) export(ili_weekly_activity_indicators) @@ -10,9 +10,11 @@ export(pi_mortality) export(state_data_providers) export(surveillance_areas) export(who_nrevss) +import(MMWRweek) import(httr) importFrom(dplyr,"%>%") importFrom(dplyr,bind_rows) +importFrom(dplyr,data_frame) importFrom(dplyr,filter) importFrom(dplyr,left_join) importFrom(dplyr,mutate) diff --git a/R/aaa.R b/R/aaa.R index e54c45b..30d21fe 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -1,3 +1,15 @@ +# THIS IS NOT EXPORTED FROM MMWRweek but I need it +# Find start date for a calendar year +# +# Finds the state date given a numeric calendar year +# @author Jarad Niemi \email{niemi@@iastate.edu} +.start_date = function(year) { + # Finds start state for this calendar year + jan1 = as.Date(paste(year, '-01-01', sep='')) + wday = as.numeric(MMWRweekday(jan1)) + jan1 - (wday-1) + 7*(wday>4) +} + # CDC U.S. region names to ID map .region_map <- c(national=3, hhs=1, census=2, state=5) @@ -11,5 +23,33 @@ # Our bot's user-agent string .cdcfluview_ua <- "Mozilla/5.0 (compatible; R-cdcvluview Bot/2.0; https://github.com/hrbrmstr/cdcfluview)" -# CDC Basemap -.cdc_basemap <- "https://gis.cdc.gov/grasp/fluview/FluView1References/data/US_States_w_PR_labels.json" \ No newline at end of file +# CDC Basemaps +.national_outline <- "https://gis.cdc.gov/grasp/fluview/FluView2References/Data/US_84.json" +.hhs_subregions_basemap <- "https://gis.cdc.gov/grasp/fluview/FluView2References/Data/HHSRegions_w_SubGroups.json" +.census_divisions_basemap <- "https://gis.cdc.gov/grasp/fluview/FluView2References/Data/CensusDivs_w_SubGroups.json" +.states_basemap <- "https://gis.cdc.gov/grasp/fluview/FluView2References/Data/StatesFluView.json" +.spread_basemap <- "https://gis.cdc.gov/grasp/fluview/FluView8References/Data/States_Territories_labels.json" +.surv_basemap <- "https://gis.cdc.gov/grasp/fluview/FluView1References/data/US_States_w_PR_labels.json" + +# CDC Age Groups +.age_grp <- c("0-4 yr", "5-24 yr", "25-64 yr", "65+ yr") + +# CDC Virus Groups +.vir_grp <- c("A (Subtyping not Performed)", "A (H1N1)pdm09", "A (Unable to Subtype)", + "B (Lineage Unspecified)", "A (H1)", "A (H3)", "B (Victoria Lineage)", + "B (Yamagata Lineage)", "H3N2v") + +# Week Starts + +.tmp <- lapply(1962:2030, .start_date) + +mapply(function(.x, .y) { + data_frame( + wk_start = seq(.tmp[[.x]], .tmp[[.y]], "1 week"), + wk_num = 1:length(wk_start) + ) -> tmp + tmp[-nrow(tmp),] +}, 1:(length(.tmp)-1), 2:length(.tmp), SIMPLIFY=FALSE) -> .wk + +.wk <- Reduce(rbind.data.frame, .wk) +.wk$mmwrid <- 1:nrow(.wk) \ No newline at end of file diff --git a/R/agd-ipt.r b/R/agd-ipt.r index db34c32..902c6ed 100644 --- a/R/agd-ipt.r +++ b/R/agd-ipt.r @@ -11,7 +11,8 @@ #' @examples \dontrun{ #' agd_ipt() #' } -agd_ipt <- function() { +age_group_distribution <- function() { + httr::GET( url = "https://gis.cdc.gov/grasp/fluView6/GetFlu6AllDataP", httr::user_agent(.cdcfluview_ua), @@ -47,9 +48,14 @@ agd_ipt <- function() { class(vir_df) <- c("tbl_df", "tbl", "data.frame") vir_df_cols <- c("sea_label", "age_label", "vir_label", "count", "mmwrid", "seasonid", - "publishyearweekid", "sea_description", "sea_startweek", "sea_endweek", - "vir_description", "vir_startmmwrid", "vir_endmmwrid") + "publishyearweekid", "sea_description", "sea_startweek", + "sea_endweek", "vir_description", "vir_startmmwrid", "vir_endmmwrid") + + vir_df <- vir_df[,vir_df_cols] + + vir_df$age_label <- factor(vir_df$age_label, levels=.age_grp) + vir_df$vir_label <- factor(vir_df$vir_label, levels=.vir_grp) - vir_df[,vir_df_cols] + vir_df } diff --git a/R/cdcfluview-package.R b/R/cdcfluview-package.R index b341ad7..b62db65 100644 --- a/R/cdcfluview-package.R +++ b/R/cdcfluview-package.R @@ -12,8 +12,9 @@ #' @docType package #' @author Bob Rudis (bob@@rud.is) #' @import httr +#' @import MMWRweek #' @importFrom tools file_path_sans_ext -#' @importFrom dplyr left_join bind_rows mutate filter %>% +#' @importFrom dplyr left_join bind_rows mutate filter data_frame %>% #' @importFrom jsonlite fromJSON #' @importFrom stats setNames #' @importFrom sf st_read diff --git a/R/coverage-map.r b/R/coverage-map.r index 99a2f21..b626d02 100644 --- a/R/coverage-map.r +++ b/R/coverage-map.r @@ -1,28 +1,40 @@ -#' Retrieve CDC U.S. Coverage Map +#' Retrieve CDC U.S. Basemaps #' -#' The CDC FluView application uses a composite basemap of coverage areas -#' within the United States that elides and scales Alaska, Hawaii and -#' Puerto Rico and provides elided and scaled breakouts for New York City -#' and the District of Columbia.\cr +#' The CDC FluView application uses a composite basemaps of coverage areas +#' within the United States that elides and scales Alaska, Hawaii, +#' Puerto Rico & the Virgin Islands and some further provide elided and scaled +#' breakouts for New York City and the District of Columbia.\cr #' \cr -#' The basemap provides polygon identifiers by: -#' \cr -#' - `STATE_FIPS` -#' - `STATE_ABBR` -#' - `STATE_NAME` -#' - `HHS_Region` -#' - `FIPSTXT`) -#' \cr -#' This function retrieves the shapefile, projects to EPSG:5069 and +#' This function retrieves the given shapefile, projects to EPSG:5069 and #' returns it as an `sf` (simple features) object. #' #' @md #' @export +#' @param basemap select the CDC basemap. One of: +#' - "`national`": outline of the U.S. + AK, HI, PR + VI +#' - "`hhs`": outline of the U.S. + HHS Region Outlines + AK, HI, PR + VI +#' - "`census`": outline of the U.S. + Census Region Outlines + AK, HI, PR + VI +#' - "`states`": outline of the U.S. + State Outlines + AK, HI, PR + VI +#' - "`spread`": outline of the U.S. + State Outlines + AK, HI, PR + VI & Guam +#' - "`surv`": outline of the U.S. + State Outlines + AK, HI, PR + VI +#' @note These are just the basemaps. You need to pair it with the data you wish to visualize. #' @examples \dontrun{ -#' plot(cdc_coverage_map()) +#' plot(cdc_basemap("national")) #' } -cdc_coverage_map <- function() { - xsf <- sf::st_read(.cdc_basemap, quiet=TRUE, stringsAsFactors=FALSE) +cdc_basemap <- function(basemap = c("national", "hhs", "census", "states", "spread", "surv")) { + + switch( + basemap, + national = .national_outline, + hhs = .hhs_subregions_basemap, + census = .census_divisions_basemap, + states = .states_basemap, + spread = .spread_basemap, + surv = .surv_basemap + ) -> selected_map + + xsf <- sf::st_read(selected_map, quiet=TRUE, stringsAsFactors=FALSE) sf::st_crs(xsf) <- 4326 sf::st_transform(xsf, 5069) + } diff --git a/R/geographic-spread.R b/R/geographic-spread.R index 3e6629a..82e4478 100644 --- a/R/geographic-spread.R +++ b/R/geographic-spread.R @@ -7,7 +7,6 @@ geographic_spread <- function() { meta <- jsonlite::fromJSON("https://gis.cdc.gov/grasp/Flu8/GetPhase08InitApp?appVersion=Public") - meta$seasons$seasonid httr::POST( url = "https://gis.cdc.gov/grasp/Flu8/PostPhase08DownloadData", diff --git a/R/ili-weekly-state.r b/R/ili-weekly-state.r index 488a854..c70d717 100644 --- a/R/ili-weekly-state.r +++ b/R/ili-weekly-state.r @@ -60,7 +60,7 @@ ili_weekly_activity_indicators <- function(season_start_year) { } -#' Retrieve metadat about U.S. State CDC Provider Data +#' Retrieve metadata about U.S. State CDC Provider Data #' #' @md #' @export diff --git a/R/ilinet.r b/R/ilinet.r index 3996426..9625f58 100644 --- a/R/ilinet.r +++ b/R/ilinet.r @@ -71,9 +71,24 @@ ilinet <- function(region=c("national", "hhs", "census", "state")) { xdf <- read.csv(nm, skip = 1, stringsAsFactors=FALSE) xdf <- .mcga(xdf) - class(xdf) <- c("tbl_df", "tbl", "data.frame") - xdf[xdf=="X"] <- NA + suppressWarnings(xdf$weighted_ili <- to_num(xdf$weighted_ili)) + suppressWarnings(xdf$unweighted_ili <- to_num(xdf$unweighted_ili)) + suppressWarnings(xdf$age_0_4 <- to_num(xdf$age_0_4)) + suppressWarnings(xdf$age_25_49 <- to_num(xdf$age_25_49)) + suppressWarnings(xdf$age_25_64 <- to_num(xdf$age_25_64)) + suppressWarnings(xdf$age_5_24 <- to_num(xdf$age_5_24)) + suppressWarnings(xdf$age_50_64 <- to_num(xdf$age_50_64)) + suppressWarnings(xdf$age_65 <- to_num(xdf$age_65)) + suppressWarnings(xdf$ilitotal <- to_num(xdf$ilitotal)) + suppressWarnings(xdf$num_of_providers <- to_num(xdf$num_of_providers)) + suppressWarnings(xdf$total_patients <- to_num(xdf$total_patients)) + suppressWarnings(xdf$week_start <- as.Date(sprintf("%s-%02d-1", xdf$year, xdf$week), "%Y-%U-%u")) + + if (region == "national") xdf$region <- "National" + if (region == "hhs") xdf$region <- factor(xdf$region, levels=sprintf("Region %s", 1:10)) + + class(xdf) <- c("tbl_df", "tbl", "data.frame") xdf diff --git a/R/zzz.r b/R/zzz.r index 7d2e72e..caac07b 100644 --- a/R/zzz.r +++ b/R/zzz.r @@ -1,6 +1,6 @@ # this is only used during active development phases before/after CRAN releases -.onAttach <- function(...) { +.onAttach <- function(...) { # nocov start if (!interactive()) return() @@ -8,4 +8,4 @@ "There are *MASSIVE* breaking changes*. ", "See https://github.com/hrbrmstr/cdcfluview for info/news.")) -} +} # nocov end diff --git a/README.Rmd b/README.Rmd index 288b444..739a969 100644 --- a/README.Rmd +++ b/README.Rmd @@ -28,14 +28,14 @@ The U.S. Centers for Disease Control (CDC) maintains a portal gg + + print(gg) + +}) ``` ### Retrieve weekly state-level ILI indicators per-state for a given season @@ -128,12 +143,7 @@ pi_mortality("state") pi_mortality("region") ``` -### Retrieve metadat about U.S. State CDC Provider Data - -```{r message=FALSE, warning=FALSE} -``` - -### Retrieve a list of valid sub-regions for each surveillance area. +### Retrieve metadata about U.S. State CDC Provider Data ```{r message=FALSE, warning=FALSE} state_data_providers() diff --git a/README.md b/README.md index 7bbce56..c91c03f 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ The following functions are implemented: - `ili_weekly_activity_indicators`: Retrieve weekly state-level ILI indicators per-state for a given season - `pi_mortality`: Pneumonia and Influenza Mortality Surveillance -- `state_data_providers`: Retrieve metadat about U.S. State CDC +- `state_data_providers`: Retrieve metadata about U.S. State CDC Provider Data - `surveillance_areas`: Retrieve a list of valid sub-regions for each surveillance area. @@ -484,9 +484,7 @@ pi_mortality("region") ## # ... with 4,180 more rows, and 10 more variables: weeknumber , geo_description , age_label , ## # weekend , weekstart , year , yearweek , coverage_area , region_name , callout -### Retrieve metadat about U.S. State CDC Provider Data - -### Retrieve a list of valid sub-regions for each surveillance area. +### Retrieve metadata about U.S. State CDC Provider Data ``` r state_data_providers() diff --git a/man/agd_ipt.Rd b/man/agd_ipt.Rd deleted file mode 100644 index bca8816..0000000 --- a/man/agd_ipt.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/agd-ipt.r -\name{agd_ipt} -\alias{agd_ipt} -\title{Age Group Distribution of Influenza Positive Tests Reported by Public Health Laboratories} -\usage{ -agd_ipt() -} -\description{ -Retrieves the age group distribution of influenza positive tests that are reported by -public health laboratories by influenza virus type and subtype/lineage. Laboratory data -from multiple seasons and different age groups is provided. -} -\examples{ - \dontrun{ -agd_ipt() -} -} -\references{ -- [CDC FluView Portal](https://gis.cdc.gov/grasp/fluview/fluportaldashboard.html) -- [AGD IPT Portal](https://gis.cdc.gov/grasp/fluview/flu_by_age_virus.html) -} diff --git a/man/age_group_distribution.Rd b/man/age_group_distribution.Rd new file mode 100644 index 0000000..0db2388 --- /dev/null +++ b/man/age_group_distribution.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/agd-ipt.r +\name{age_group_distribution} +\alias{age_group_distribution} +\title{Age Group Distribution of Influenza Positive Tests Reported by Public Health Laboratories} +\usage{ +age_group_distribution() +} +\description{ +Retrieves the age group distribution of influenza positive tests that are reported by +public health laboratories by influenza virus type and subtype/lineage. Laboratory data +from multiple seasons and different age groups is provided. +} +\examples{ + \dontrun{ +agd_ipt() +} +} +\references{ +- [CDC FluView Portal](https://gis.cdc.gov/grasp/fluview/fluportaldashboard.html) +- [AGD IPT Portal](https://gis.cdc.gov/grasp/fluview/flu_by_age_virus.html) +} diff --git a/man/cdc_basemap.Rd b/man/cdc_basemap.Rd new file mode 100644 index 0000000..75c62ea --- /dev/null +++ b/man/cdc_basemap.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/coverage-map.r +\name{cdc_basemap} +\alias{cdc_basemap} +\title{Retrieve CDC U.S. Basemaps} +\usage{ +cdc_basemap(basemap = c("national", "hhs", "census", "states", "spread", + "surv")) +} +\arguments{ +\item{basemap}{select the CDC basemap. One of: +\itemize{ +\item "\code{national}": outline of the U.S. + AK, HI, PR + VI +\item "\code{hhs}": outline of the U.S. + HHS Region Outlines + AK, HI, PR + VI +\item "\code{census}": outline of the U.S. + Census Region Outlines + AK, HI, PR + VI +\item "\code{states}": outline of the U.S. + State Outlines + AK, HI, PR + VI +\item "\code{spread}": outline of the U.S. + State Outlines + AK, HI, PR + VI & Guam +\item "\code{surv}": outline of the U.S. + State Outlines + AK, HI, PR + VI +}} +} +\description{ +The CDC FluView application uses a composite basemaps of coverage areas +within the United States that elides and scales Alaska, Hawaii, +Puerto Rico & the Virgin Islands and some further provide elided and scaled +breakouts for New York City and the District of Columbia.\cr +\cr +This function retrieves the given shapefile, projects to EPSG:5069 and +returns it as an \code{sf} (simple features) object. +} +\note{ +These are just the basemaps. You need to pair it with the data you wish to visualize. +} +\examples{ +\dontrun{ +plot(cdc_basemap("national")) +} +} diff --git a/man/cdc_coverage_map.Rd b/man/cdc_coverage_map.Rd deleted file mode 100644 index 93a4f2b..0000000 --- a/man/cdc_coverage_map.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/coverage-map.r -\name{cdc_coverage_map} -\alias{cdc_coverage_map} -\title{Retrieve CDC U.S. Coverage Map} -\usage{ -cdc_coverage_map() -} -\description{ -The CDC FluView application uses a composite basemap of coverage areas -within the United States that elides and scales Alaska, Hawaii and -Puerto Rico and provides elided and scaled breakouts for New York City -and the District of Columbia.\cr -\cr -The basemap provides polygon identifiers by: -\cr -\itemize{ -\item \code{STATE_FIPS} -\item \code{STATE_ABBR} -\item \code{STATE_NAME} -\item \code{HHS_Region} -\item \code{FIPSTXT}) -\cr -This function retrieves the shapefile, projects to EPSG:5069 and -returns it as an \code{sf} (simple features) object. -} -} -\examples{ -\dontrun{ -plot(cdc_coverage_map()) -} -} diff --git a/man/state_data_providers.Rd b/man/state_data_providers.Rd index 8f5b267..1d860bb 100644 --- a/man/state_data_providers.Rd +++ b/man/state_data_providers.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/ili-weekly-state.r \name{state_data_providers} \alias{state_data_providers} -\title{Retrieve metadat about U.S. State CDC Provider Data} +\title{Retrieve metadata about U.S. State CDC Provider Data} \usage{ state_data_providers() } \description{ -Retrieve metadat about U.S. State CDC Provider Data +Retrieve metadata about U.S. State CDC Provider Data } \examples{ state_data_providers() diff --git a/tests/testthat/test-cdcfluview.R b/tests/testthat/test-cdcfluview.R index 4ca1576..a9b0b83 100644 --- a/tests/testthat/test-cdcfluview.R +++ b/tests/testthat/test-cdcfluview.R @@ -1,26 +1,44 @@ context("basic functionality") test_that("we can do something", { + skip_on_cran() + expect_that(agd_ipt(), is_a("data.frame")) + expect_that(geographic_spread(), is_a("data.frame")) - expect_that(surveillance_areas(), is_a("data.frame")) + + expect_that(state_data_providers(), is_a("data.frame")) + expect_that(hospitalizations("flusurv"), is_a("data.frame")) expect_that(hospitalizations("eip"), is_a("data.frame")) expect_that(hospitalizations("eip", "Colorado"), is_a("data.frame")) expect_that(hospitalizations("ihsp"), is_a("data.frame")) expect_that(hospitalizations("ihsp", "Oklahoma"), is_a("data.frame")) + expect_that(ilinet("national"), is_a("data.frame")) expect_that(ilinet("hhs"), is_a("data.frame")) expect_that(ilinet("census"), is_a("data.frame")) expect_that(ilinet("state"), is_a("data.frame")) + expect_that(ili_weekly_activity_indicators(2017), is_a("data.frame")) + expect_that(pi_mortality("national"), is_a("data.frame")) expect_that(pi_mortality("state"), is_a("data.frame")) expect_that(pi_mortality("region"), is_a("data.frame")) - expect_that(state_data_providers(), is_a("data.frame")) + + expect_that(surveillance_areas(), is_a("data.frame")) + expect_that(who_nrevss("national"), is_a("list")) expect_that(who_nrevss("hhs"), is_a("list")) expect_that(who_nrevss("census"), is_a("list")) expect_that(who_nrevss("state"), is_a("list")) - expect_that(cdc_coverage_map(), is_a("sf")) + + expect_that(cdc_basemap("national"), is_a("sf")) + expect_that(cdc_basemap("hhs"), is_a("sf")) + expect_that(cdc_basemap("census"), is_a("sf")) + expect_that(cdc_basemap("states"), is_a("sf")) + expect_that(cdc_basemap("spread"), is_a("sf")) + expect_that(cdc_basemap("surv"), is_a("sf")) + + })