diff --git a/.Rbuildignore b/.Rbuildignore index bc0d06d..86832c2 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,3 +8,4 @@ ^README_files/.* ^README-.* ^cran-comments\.md$ +^codecov\.yml$ diff --git a/.travis.yml b/.travis.yml index a91e09f..dcf629a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,3 +19,6 @@ notifications: channels: - "irc.rud.is#builds" nick: travisci + +after_success: + - Rscript -e 'covr::codecov()' \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 1f4dc47..67927b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: cdcfluview Type: Package Title: Retrieve U.S. Flu Season Data from the CDC FluView Portal Version: 0.5.2 -Date: 2017-01-02 +Date: 2017-03-14 Author: Bob Rudis (bob@rud.is) Maintainer: Bob Rudis Encoding: UTF-8 @@ -17,7 +17,8 @@ BugReports: https://github.com/hrbrmstr/cdcfluview/issues License: MIT + file LICENSE LazyData: true Suggests: - testthat + testthat, + covr Imports: httr (>= 0.3.0), xml2, diff --git a/NEWS.md b/NEWS.md index b542cda..2f107c9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,9 @@ # cdcfluview 0.5.2 -* Modified behavior of `get_flu_data()` to actually grab current flu season year - if a single year was specified and it is the current year and the return is a 0 length - data frame #7 +* Modified behavior of `get_flu_data()` to actually grab current flu season + year if a single year was specified and it is the current year and the + return is a 0 length data frame (fixes #7) +* Added code coverage tests for all API functions. # cdcfluview 0.5.1 diff --git a/R/get_flu_data.r b/R/get_flu_data.r index ee42343..22d4279 100644 --- a/R/get_flu_data.r +++ b/R/get_flu_data.r @@ -68,15 +68,17 @@ get_flu_data <- function(region="hhs", sub_region=1:10, data_source <- gsub("who", "WHO_NREVSS", data_source) data_source <- gsub("ilinet", "ILINet", data_source) - params <- list(SubRegionsList=paste0(sub_region, collapse=","), - DataSources=paste0(data_source, collapse=","), - RegionID=reg, - SeasonsList=paste0(years, collapse=",")) + params <- list(SubRegionsList = paste0(sub_region, collapse=","), + DataSources = paste0(data_source, collapse=","), + RegionID = reg, + SeasonsList = paste0(years, collapse=",")) out_file <- tempfile(fileext=".zip") + # CDC API returns a ZIP file so we grab, save & expand it to then read in CSVs + tmp <- httr::POST("https://gis.cdc.gov/grasp/fluview/FluViewPhase2CustomDownload.ashx", - body=params, + body = params, write_disk(out_file)) stop_for_status(tmp) @@ -97,10 +99,19 @@ get_flu_data <- function(region="hhs", sub_region=1:10, names(file_list) <- substr(basename(files), 1, 3) + # Depending on the parameters, there could be more than one + # file returned. When there's only one, return a more usable + # structure. + if (length(file_list) == 1) { file_list <- file_list[[1]] + # when no rows, then it's likely the caller specified the + # current year and the flu season has technically not started yet. + # so help them out and move the year back and get current flu + # season data. + if ((nrow(file_list) == 0) && (length(years)==1) && (years == (as.numeric(format(Sys.Date(), "%Y"))-1960))) { @@ -111,6 +122,7 @@ get_flu_data <- function(region="hhs", sub_region=1:10, } else { return(file_list) } + } else { return(file_list) } diff --git a/R/get_state_data.r b/R/get_state_data.r index 35db31a..568d8cd 100644 --- a/R/get_state_data.r +++ b/R/get_state_data.r @@ -36,6 +36,9 @@ get_state_data <- function(years=as.numeric(format(Sys.Date(), "%Y"))) { stop_for_status(tmp) + # the API doesn't return actual JSON. It returns a JavaScript data structre + # which is why we need the assistance of the super handy V8 pkg. + res <- httr::content(tmp, as="parsed") ctx <- V8::v8() diff --git a/R/mortalty.r b/R/mortalty.r index e58f96b..4939e05 100644 --- a/R/mortalty.r +++ b/R/mortalty.r @@ -28,52 +28,62 @@ #' } get_mortality_surveillance_data <- function() { - pg <- read_html("https://www.cdc.gov/flu/weekly/nchs.htm") + # scrape (ugh) web page to get data file links for state mortality data + + pg <- xml2::read_html("https://www.cdc.gov/flu/weekly/nchs.htm") PREFIX <- "https://www.cdc.gov" - xml_find_all(pg, ".//select[@id='State']/option[contains(@value, 'csv') and - contains(@value, 'State_')]") %>% - xml_attr("value") %>% + xml2::xml_find_all(pg, ".//select[@id='State']/option[contains(@value, 'csv') and + contains(@value, 'State_')]") %>% + xml2::xml_attr("value") %>% sprintf("%s%s", PREFIX, .) -> targets - pb <- progress_estimated(length(targets)) - map_df(targets, function(x) { + pb <- dplyr::progress_estimated(length(targets)) + purrr::map_df(targets, function(x) { pb$tick()$print() suppressMessages(read_csv(URLencode(x), col_types="ciidii")) }) -> influenza_mortality_by_state + # scrape (ugh) web page to get data file links for regional mortality data - xml_find_all(pg, ".//select[@id='Regional Data']/ - option[contains(@value, 'csv') and - not(contains(@value, 'Week_'))]") %>% - xml_attr("value") %>% + xml2::xml_find_all(pg, ".//select[@id='Regional Data']/ + option[contains(@value, 'csv') and + not(contains(@value, 'Week_'))]") %>% + xml2::xml_attr("value") %>% sprintf("%s%s", PREFIX, .) -> targets - pb <- progress_estimated(length(targets)) - map_df(targets, function(x) { + pb <- dplyr::progress_estimated(length(targets)) + purrr::map_df(targets, function(x) { pb$tick()$print() suppressMessages(read_csv(URLencode(x), col_types="ciidii")) }) -> influenza_mortality_by_region + # scrape (ugh) web page to get data file links for weekly mortality data - xml_find_all(pg, ".//select[@id='Regional Data']/ - option[contains(@value, 'csv') and - contains(@value, 'Week_')]") %>% - xml_attr("value") %>% + xml2::xml_find_all(pg, ".//select[@id='Regional Data']/ + option[contains(@value, 'csv') and + contains(@value, 'Week_')]") %>% + xml2::xml_attr("value") %>% sprintf("%s%s", PREFIX, .) -> targets - pb <- progress_estimated(length(targets)) - map_df(targets, function(x) { + pb <- dplyr::progress_estimated(length(targets)) + purrr::map_df(targets, function(x) { pb$tick()$print() suppressMessages(read_csv(URLencode(x), col_types="ciidii")) }) -> influenza_mortality_by_week + # if return it all + list( - by_state=influenza_mortality_by_state, - by_region=influenza_mortality_by_region, - by_week=influenza_mortality_by_week - ) + by_state = influenza_mortality_by_state, + by_region = influenza_mortality_by_region, + by_week = influenza_mortality_by_week + ) -> out + + class(out) <- c("cfv_mortality", class(out)) + + out } diff --git a/R/zzz.r b/R/zzz.r index 2dc758c..3e91d4f 100644 --- a/R/zzz.r +++ b/R/zzz.r @@ -1,10 +1,12 @@ -.onAttach <- function(...) { - - if (!interactive()) return() - - # packageStartupMessage(paste0("cdcfluview is under *active* development. ", - # "There are *breaking changes*. ", - # "See https://github.com/hrbrmstr/cdcfluview for info/news.")) - -} - +# this is only used during active development phases before/after CRAN releases + +# .onAttach <- function(...) { +# +# if (!interactive()) return() +# +# packageStartupMessage(paste0("cdcfluview is under *active* development. ", +# "There are *breaking changes*. ", +# "See https://github.com/hrbrmstr/cdcfluview for info/news.")) +# +# } +# diff --git a/README.Rmd b/README.Rmd index f77967e..48764b5 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,6 +16,7 @@ knitr::opts_chunk$set( [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/cdcfluview)](https://cran.r-project.org/package=cdcfluview) [![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/cdcfluview.svg?branch=master)](https://travis-ci.org/hrbrmstr/cdcfluview) +[![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/cdcfluview/master.svg)](https://codecov.io/github/hrbrmstr/cdcfluview?branch=master) **NOTE** If there's a particular data set from https://www.cdc.gov/flu/weekly/fluviewinteractive.htm that you want and that isn't in the package, please file it as an issue and be as specific as you can (screen shot if possible). diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..69cb760 --- /dev/null +++ b/codecov.yml @@ -0,0 +1 @@ +comment: false diff --git a/tests/testthat/test-cdcfluview.R b/tests/testthat/test-cdcfluview.R index 17aeb7f..45ab764 100644 --- a/tests/testthat/test-cdcfluview.R +++ b/tests/testthat/test-cdcfluview.R @@ -5,4 +5,18 @@ test_that("we can do something", { expect_that(dim(get_state_data(2008)), equals(c(2494L, 8L))) + invisible(get_flu_data()) + + invisible(get_flu_data(data_source="all")) + + invisible(get_weekly_flu_report()) + }) + +test_that("these are potentially time-consuming calls", { + + skip_on_cran() + + invisible(get_mortality_surveillance_data()) + +}) \ No newline at end of file