You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

37 lines
1.8 KiB

  1. hhs_regions <- read.table(text="region;region_number;regional_office;state_or_territory
  2. Region 1;1;Boston;Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont
  3. Region 2;2;New York;New Jersey, New York, Puerto Rico, Virgin Islands
  4. Region 3;3;Philadelphia;Delaware, District of Columbia, Maryland, Pennsylvania, Virginia, West Virginia
  5. Region 4;4;Atlanta;Alabama, Florida, Georgia, Kentucky, Mississippi, North Carolina, South Carolina, Tennessee
  6. Region 5;5;Chicago;Illinois, Indiana, Michigan, Minnesota, Ohio, Wisconsin
  7. Region 6;6;Dallas;Arkansas, Louisiana, New Mexico, Oklahoma, Texas
  8. Region 7;7;Kansas City;Iowa, Kansas, Missouri, Nebraska
  9. Region 8;8;Denver;Colorado, Montana, North Dakota, South Dakota, Utah, Wyoming
  10. Region 9;9;San Francisco;Arizona, California, Hawaii, Nevada, American Samoa, Commonwealth of the Northern Mariana Islands, Federated States of Micronesia, Guam, Marshall Islands, Republic of Palau
  11. Region 10;10;Seattle;Alaska, Idaho, Oregon, Washington", sep=";", stringsAsFactors=FALSE, header=TRUE)
  12. library(stringr)
  13. do.call(rbind.data.frame, lapply(1:nrow(hhs_regions), function(i) {
  14. x <- hhs_regions[i,]
  15. rownames(x) <- NULL
  16. out <- data.frame(x[, c(1:3)],
  17. str_split(x$state_or_territory, ", ")[1],
  18. stringsAsFactors=FALSE)
  19. colnames(out) <- c("region", "region_number", "regional_office", "state_or_territory")
  20. out
  21. })) -> hhs_regions
  22. str(hhs_regions)
  23. library(rvest)
  24. library(magrittr)
  25. pg <- html("http://www.cdc.gov/std/stats11/census.htm")
  26. pg %>% html_table() %>% extract2(1) %>% as.list -> cens
  27. do.call(rbind.data.frame, lapply(names(cens), function(x) {
  28. data.frame(region=x,
  29. state=cens[[x]][cens[[x]]!=""],
  30. stringsAsFactors=FALSE)
  31. })) -> census_regions
  32. devtools::use_data(hhs_regions, census_regions, overwrite=TRUE)