boB Rudis
5 years ago
14 changed files with 10255 additions and 29 deletions
@ -1,15 +1,19 @@ |
|||||
#' @md |
#' @md |
||||
#' @title June 2019 U.S. Democratic Debate Candidate/Topic Times |
#' @title 2019-2020 U.S. Democratic Debate Candidate/Topic Times |
||||
#' @description The New York Times and other media outlets kept track of the time each |
#' @description The New York Times and other media outlets kept track of the time each |
||||
#' candidate spent talking including the timestamp of the start of the blathering |
#' candidate spent talking including the timestamp of the start of the blathering |
||||
#' and the topic up for debate. This dataset only includes candidates and |
#' and the topic up for debate. This dataset only includes candidates and |
||||
#' topic times. The complete datasets (See References) also include moderator |
#' topic times. The complete datasets (See References) also include moderator |
||||
#' metadata and opening/closing statement records. |
#' metadata and opening/closing statement records. |
||||
#' @format data frame with columns: `elapsed` (dbl), `timestamp` (drtn), `speaker` (chr), `topic` (chr) |
#' @format data frame with columns: `elapsed` (dbl), `timestamp` (time), `speaker` (chr), `topic` (chr) |
||||
|
#' `debate_date` (date), `debate_group` (dbl), `night` (dbl) |
||||
#' @docType data |
#' @docType data |
||||
#' @keywords datasets |
#' @keywords datasets |
||||
#' @name debates2019 |
#' @name debates2019 |
||||
#' @references <https://www.nytimes.com/interactive/2019/admin/100000006581096.embedded.html> |
#' @references <https://www.nytimes.com/interactive/2019/06/26/us/elections/debate-speaking-time.html> |
||||
#' @references <https://www.nytimes.com/interactive/2019/admin/100000006584572.embedded.html> |
#' @references <https://www.nytimes.com/interactive/2019/06/27/us/elections/debate-speaking-time.html> |
||||
|
#' @references <https://www.nytimes.com/interactive/2019/07/30/us/elections/debate-speaking-time.html> |
||||
|
#' @references <https://www.nytimes.com/interactive/2019/07/31/us/elections/debate-speaking-time.html> |
||||
|
#' @references <https://www.nytimes.com/interactive/2019/09/12/us/elections/debate-speaking-time.html> |
||||
#' @usage data("debates2019") |
#' @usage data("debates2019") |
||||
NULL |
NULL |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,14 +1,264 @@ |
|||||
## code to prepare `debates2019` dataset goes here |
## code to prepare `debates2019` dataset goes here |
||||
|
|
||||
read_csv( |
# read_csv( |
||||
file = "https://rud.is/data/2019-dem-debates.csv.gz", |
# file = "https://rud.is/data/2019-dem-debates.csv.gz", |
||||
col_types = cols( |
# col_types = cols( |
||||
elapsed = col_double(), |
# elapsed = col_double(), |
||||
timestamp = col_time(format = ""), |
# timestamp = col_time(format = ""), |
||||
speaker = col_character(), |
# speaker = col_character(), |
||||
topic = col_character() |
# topic = col_character() |
||||
) |
# ) |
||||
) -> debates2019 |
# ) -> debates2019 |
||||
|
# |
||||
|
# |
||||
|
# usethis::use_data(debates2019, overwrite = TRUE) |
||||
|
library(rvest) |
||||
|
library(stringi) |
||||
|
library(tidyverse) |
||||
|
|
||||
|
if (!file.exists(here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/06/26/us/elections/debate-speaking-time.html", here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html")) |
||||
|
if (!file.exists(here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/06/27/us/elections/debate-speaking-time.html", here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html")) |
||||
|
if (!file.exists(here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/07/30/us/elections/debate-speaking-time.html", here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html")) |
||||
|
if (!file.exists(here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/07/31/us/elections/debate-speaking-time.html", here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html")) |
||||
|
if (!file.exists(here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/09/12/us/elections/debate-speaking-time.html", here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html")) |
||||
|
|
||||
|
read_html(here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html")) %>% |
||||
|
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% |
||||
|
html_text() %>% |
||||
|
stri_split_lines() %>% |
||||
|
unlist() %>% |
||||
|
.[3] %>% |
||||
|
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% |
||||
|
jsonlite::fromJSON() %>% |
||||
|
mutate( |
||||
|
elapsed = as.numeric(elapsed)/60, |
||||
|
debate_date = as.Date("2019-09-13"), |
||||
|
speaker = stri_trans_totitle(speaker), |
||||
|
timestamp = parse_time(timestamp), |
||||
|
topic = stri_trans_totitle(topic), |
||||
|
debate_group = 1, |
||||
|
night = 1 |
||||
|
) %>% |
||||
|
mutate( |
||||
|
speaker = case_when( |
||||
|
speaker == "Orourke" ~ "O'Rourke", |
||||
|
speaker == "Deblasio" ~ "de Blasio", |
||||
|
TRUE ~ speaker |
||||
|
) |
||||
|
) %>% |
||||
|
mutate( |
||||
|
topic = case_when( |
||||
|
topic == "" ~ "Other", |
||||
|
grepl("Campaign", topic) ~ "Campaign Finance Reform", |
||||
|
grepl("Civil", topic) ~ "Civil Rights", |
||||
|
grepl("Climate", topic) ~ "Climate", |
||||
|
grepl("Foreign", topic) ~ "Foreign Policy", |
||||
|
grepl("Gun", topic) ~ "Gun Control", |
||||
|
grepl("Election", topic) ~ "Elections Reform", |
||||
|
grepl("Health", topic) ~ "Healthcare", |
||||
|
grepl("Party", topic) ~ "Party Strategy", |
||||
|
grepl("Women", topic) ~ "Womens Rights", |
||||
|
TRUE ~ topic |
||||
|
) |
||||
|
) %>% |
||||
|
filter( |
||||
|
!is.na(timestamp), |
||||
|
speaker != "", |
||||
|
speaker != "Moderator" |
||||
|
) %>% |
||||
|
as_tibble() -> jun_day_1 |
||||
|
|
||||
|
read_html(here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html")) %>% |
||||
|
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% |
||||
|
html_text() %>% |
||||
|
stri_split_lines() %>% |
||||
|
unlist() %>% |
||||
|
.[3] %>% |
||||
|
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% |
||||
|
jsonlite::fromJSON() %>% |
||||
|
mutate( |
||||
|
elapsed = as.numeric(elapsed)/60, |
||||
|
debate_date = as.Date("2019-09-13"), |
||||
|
speaker = stri_trans_totitle(speaker), |
||||
|
timestamp = parse_time(timestamp), |
||||
|
topic = stri_trans_totitle(topic), |
||||
|
debate_group = 1, |
||||
|
night = 2 |
||||
|
) %>% |
||||
|
mutate( |
||||
|
speaker = case_when( |
||||
|
speaker == "Orourke" ~ "O'Rourke", |
||||
|
speaker == "Deblasio" ~ "de Blasio", |
||||
|
TRUE ~ speaker |
||||
|
) |
||||
|
) %>% |
||||
|
mutate( |
||||
|
topic = case_when( |
||||
|
topic == "" ~ "Other", |
||||
|
grepl("Campaign", topic) ~ "Campaign Finance Reform", |
||||
|
grepl("Civil", topic) ~ "Civil Rights", |
||||
|
grepl("Climate", topic) ~ "Climate", |
||||
|
grepl("Foreign", topic) ~ "Foreign Policy", |
||||
|
grepl("Gun", topic) ~ "Gun Control", |
||||
|
grepl("Election", topic) ~ "Elections Reform", |
||||
|
grepl("Health", topic) ~ "Healthcare", |
||||
|
grepl("Party", topic) ~ "Party Strategy", |
||||
|
grepl("Women", topic) ~ "Womens Rights", |
||||
|
TRUE ~ topic |
||||
|
) |
||||
|
) %>% |
||||
|
filter( |
||||
|
!is.na(timestamp), |
||||
|
speaker != "", |
||||
|
speaker != "Moderator" |
||||
|
) %>% |
||||
|
as_tibble() -> jun_day_2 |
||||
|
|
||||
|
read_html(here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html")) %>% |
||||
|
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% |
||||
|
html_text() %>% |
||||
|
stri_split_lines() %>% |
||||
|
unlist() %>% |
||||
|
.[2] %>% |
||||
|
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% |
||||
|
jsonlite::fromJSON() %>% |
||||
|
mutate( |
||||
|
elapsed = as.numeric(elapsed)/60, |
||||
|
debate_date = as.Date("2019-09-13"), |
||||
|
speaker = stri_trans_totitle(speaker), |
||||
|
timestamp = parse_time(timestamp), |
||||
|
topic = stri_trans_totitle(topic), |
||||
|
debate_group = 2, |
||||
|
night = 1 |
||||
|
) %>% |
||||
|
mutate( |
||||
|
speaker = case_when( |
||||
|
speaker == "Orourke" ~ "O'Rourke", |
||||
|
speaker == "Deblasio" ~ "de Blasio", |
||||
|
TRUE ~ speaker |
||||
|
) |
||||
|
) %>% |
||||
|
mutate( |
||||
|
topic = case_when( |
||||
|
topic == "" ~ "Other", |
||||
|
grepl("Campaign", topic) ~ "Campaign Finance Reform", |
||||
|
grepl("Civil", topic) ~ "Civil Rights", |
||||
|
grepl("Climate", topic) ~ "Climate", |
||||
|
grepl("Foreign", topic) ~ "Foreign Policy", |
||||
|
grepl("Gun", topic) ~ "Gun Control", |
||||
|
grepl("Election", topic) ~ "Elections Reform", |
||||
|
grepl("Health", topic) ~ "Healthcare", |
||||
|
grepl("Party", topic) ~ "Party Strategy", |
||||
|
grepl("Women", topic) ~ "Womens Rights", |
||||
|
TRUE ~ topic |
||||
|
) |
||||
|
) %>% |
||||
|
filter( |
||||
|
!is.na(timestamp), |
||||
|
speaker != "", |
||||
|
speaker != "Moderator" |
||||
|
) %>% |
||||
|
as_tibble() -> jul_day_1 |
||||
|
|
||||
|
read_html(here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html")) %>% |
||||
|
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% |
||||
|
html_text() %>% |
||||
|
stri_split_lines() %>% |
||||
|
unlist() %>% |
||||
|
.[2] %>% |
||||
|
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% |
||||
|
jsonlite::fromJSON() %>% |
||||
|
mutate( |
||||
|
elapsed = as.numeric(elapsed)/60, |
||||
|
debate_date = as.Date("2019-09-13"), |
||||
|
speaker = stri_trans_totitle(speaker), |
||||
|
timestamp = parse_time(timestamp), |
||||
|
topic = stri_trans_totitle(topic), |
||||
|
debate_group = 2, |
||||
|
night = 2 |
||||
|
) %>% |
||||
|
mutate( |
||||
|
speaker = case_when( |
||||
|
speaker == "Orourke" ~ "O'Rourke", |
||||
|
speaker == "Deblasio" ~ "de Blasio", |
||||
|
TRUE ~ speaker |
||||
|
) |
||||
|
) %>% |
||||
|
mutate( |
||||
|
topic = case_when( |
||||
|
topic == "" ~ "Other", |
||||
|
grepl("Campaign", topic) ~ "Campaign Finance Reform", |
||||
|
grepl("Civil", topic) ~ "Civil Rights", |
||||
|
grepl("Climate", topic) ~ "Climate", |
||||
|
grepl("Foreign", topic) ~ "Foreign Policy", |
||||
|
grepl("Gun", topic) ~ "Gun Control", |
||||
|
grepl("Election", topic) ~ "Elections Reform", |
||||
|
grepl("Health", topic) ~ "Healthcare", |
||||
|
grepl("Party", topic) ~ "Party Strategy", |
||||
|
grepl("Women", topic) ~ "Womens Rights", |
||||
|
TRUE ~ topic |
||||
|
) |
||||
|
) %>% |
||||
|
filter( |
||||
|
!is.na(timestamp), |
||||
|
speaker != "", |
||||
|
speaker != "Moderator" |
||||
|
) %>% |
||||
|
as_tibble() -> jul_day_2 |
||||
|
|
||||
|
read_html(here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html")) %>% |
||||
|
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% |
||||
|
html_text() %>% |
||||
|
stri_split_lines() %>% |
||||
|
unlist() %>% |
||||
|
.[3] %>% |
||||
|
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% |
||||
|
jsonlite::fromJSON() %>% |
||||
|
mutate( |
||||
|
elapsed = as.numeric(elapsed)/60, |
||||
|
debate_date = as.Date("2019-09-13"), |
||||
|
speaker = stri_trans_totitle(speaker), |
||||
|
timestamp = parse_time(timestamp), |
||||
|
topic = stri_trans_totitle(topic), |
||||
|
debate_group = 3, |
||||
|
night = 1 |
||||
|
) %>% |
||||
|
mutate( |
||||
|
speaker = case_when( |
||||
|
speaker == "Orourke" ~ "O'Rourke", |
||||
|
speaker == "Deblasio" ~ "de Blasio", |
||||
|
TRUE ~ speaker |
||||
|
) |
||||
|
) %>% |
||||
|
mutate( |
||||
|
topic = case_when( |
||||
|
topic == "" ~ "Other", |
||||
|
grepl("Campaign", topic) ~ "Campaign Finance Reform", |
||||
|
grepl("Civil", topic) ~ "Civil Rights", |
||||
|
grepl("Climate", topic) ~ "Climate", |
||||
|
grepl("Foreign", topic) ~ "Foreign Policy", |
||||
|
grepl("Gun", topic) ~ "Gun Control", |
||||
|
grepl("Election", topic) ~ "Elections Reform", |
||||
|
grepl("Health", topic) ~ "Healthcare", |
||||
|
grepl("Party", topic) ~ "Party Strategy", |
||||
|
grepl("Women", topic) ~ "Womens Rights", |
||||
|
TRUE ~ topic |
||||
|
) |
||||
|
) %>% |
||||
|
filter( |
||||
|
!is.na(timestamp), |
||||
|
speaker != "", |
||||
|
speaker != "Moderator" |
||||
|
) %>% |
||||
|
as_tibble() -> sep_day_1 |
||||
|
|
||||
|
bind_rows( |
||||
|
jun_day_1, |
||||
|
jun_day_2, |
||||
|
jul_day_1, |
||||
|
jul_day_2, |
||||
|
sep_day_1 |
||||
|
) -> debates2019 |
||||
|
|
||||
usethis::use_data(debates2019, overwrite = TRUE) |
usethis::use_data(debates2019, overwrite = TRUE) |
||||
|
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 83 KiB After Width: | Height: | Size: 86 KiB |
Loading…
Reference in new issue