|
|
@ -1,14 +1,264 @@ |
|
|
|
## code to prepare `debates2019` dataset goes here |
|
|
|
|
|
|
|
read_csv( |
|
|
|
file = "https://rud.is/data/2019-dem-debates.csv.gz", |
|
|
|
col_types = cols( |
|
|
|
elapsed = col_double(), |
|
|
|
timestamp = col_time(format = ""), |
|
|
|
speaker = col_character(), |
|
|
|
topic = col_character() |
|
|
|
) |
|
|
|
) -> debates2019 |
|
|
|
# read_csv( |
|
|
|
# file = "https://rud.is/data/2019-dem-debates.csv.gz", |
|
|
|
# col_types = cols( |
|
|
|
# elapsed = col_double(), |
|
|
|
# timestamp = col_time(format = ""), |
|
|
|
# speaker = col_character(), |
|
|
|
# topic = col_character() |
|
|
|
# ) |
|
|
|
# ) -> debates2019 |
|
|
|
# |
|
|
|
# |
|
|
|
# usethis::use_data(debates2019, overwrite = TRUE) |
|
|
|
library(rvest) |
|
|
|
library(stringi) |
|
|
|
library(tidyverse) |
|
|
|
|
|
|
|
if (!file.exists(here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/06/26/us/elections/debate-speaking-time.html", here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html")) |
|
|
|
if (!file.exists(here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/06/27/us/elections/debate-speaking-time.html", here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html")) |
|
|
|
if (!file.exists(here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/07/30/us/elections/debate-speaking-time.html", here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html")) |
|
|
|
if (!file.exists(here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/07/31/us/elections/debate-speaking-time.html", here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html")) |
|
|
|
if (!file.exists(here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/09/12/us/elections/debate-speaking-time.html", here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html")) |
|
|
|
|
|
|
|
read_html(here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html")) %>% |
|
|
|
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% |
|
|
|
html_text() %>% |
|
|
|
stri_split_lines() %>% |
|
|
|
unlist() %>% |
|
|
|
.[3] %>% |
|
|
|
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% |
|
|
|
jsonlite::fromJSON() %>% |
|
|
|
mutate( |
|
|
|
elapsed = as.numeric(elapsed)/60, |
|
|
|
debate_date = as.Date("2019-09-13"), |
|
|
|
speaker = stri_trans_totitle(speaker), |
|
|
|
timestamp = parse_time(timestamp), |
|
|
|
topic = stri_trans_totitle(topic), |
|
|
|
debate_group = 1, |
|
|
|
night = 1 |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
speaker = case_when( |
|
|
|
speaker == "Orourke" ~ "O'Rourke", |
|
|
|
speaker == "Deblasio" ~ "de Blasio", |
|
|
|
TRUE ~ speaker |
|
|
|
) |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
topic = case_when( |
|
|
|
topic == "" ~ "Other", |
|
|
|
grepl("Campaign", topic) ~ "Campaign Finance Reform", |
|
|
|
grepl("Civil", topic) ~ "Civil Rights", |
|
|
|
grepl("Climate", topic) ~ "Climate", |
|
|
|
grepl("Foreign", topic) ~ "Foreign Policy", |
|
|
|
grepl("Gun", topic) ~ "Gun Control", |
|
|
|
grepl("Election", topic) ~ "Elections Reform", |
|
|
|
grepl("Health", topic) ~ "Healthcare", |
|
|
|
grepl("Party", topic) ~ "Party Strategy", |
|
|
|
grepl("Women", topic) ~ "Womens Rights", |
|
|
|
TRUE ~ topic |
|
|
|
) |
|
|
|
) %>% |
|
|
|
filter( |
|
|
|
!is.na(timestamp), |
|
|
|
speaker != "", |
|
|
|
speaker != "Moderator" |
|
|
|
) %>% |
|
|
|
as_tibble() -> jun_day_1 |
|
|
|
|
|
|
|
read_html(here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html")) %>% |
|
|
|
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% |
|
|
|
html_text() %>% |
|
|
|
stri_split_lines() %>% |
|
|
|
unlist() %>% |
|
|
|
.[3] %>% |
|
|
|
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% |
|
|
|
jsonlite::fromJSON() %>% |
|
|
|
mutate( |
|
|
|
elapsed = as.numeric(elapsed)/60, |
|
|
|
debate_date = as.Date("2019-09-13"), |
|
|
|
speaker = stri_trans_totitle(speaker), |
|
|
|
timestamp = parse_time(timestamp), |
|
|
|
topic = stri_trans_totitle(topic), |
|
|
|
debate_group = 1, |
|
|
|
night = 2 |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
speaker = case_when( |
|
|
|
speaker == "Orourke" ~ "O'Rourke", |
|
|
|
speaker == "Deblasio" ~ "de Blasio", |
|
|
|
TRUE ~ speaker |
|
|
|
) |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
topic = case_when( |
|
|
|
topic == "" ~ "Other", |
|
|
|
grepl("Campaign", topic) ~ "Campaign Finance Reform", |
|
|
|
grepl("Civil", topic) ~ "Civil Rights", |
|
|
|
grepl("Climate", topic) ~ "Climate", |
|
|
|
grepl("Foreign", topic) ~ "Foreign Policy", |
|
|
|
grepl("Gun", topic) ~ "Gun Control", |
|
|
|
grepl("Election", topic) ~ "Elections Reform", |
|
|
|
grepl("Health", topic) ~ "Healthcare", |
|
|
|
grepl("Party", topic) ~ "Party Strategy", |
|
|
|
grepl("Women", topic) ~ "Womens Rights", |
|
|
|
TRUE ~ topic |
|
|
|
) |
|
|
|
) %>% |
|
|
|
filter( |
|
|
|
!is.na(timestamp), |
|
|
|
speaker != "", |
|
|
|
speaker != "Moderator" |
|
|
|
) %>% |
|
|
|
as_tibble() -> jun_day_2 |
|
|
|
|
|
|
|
read_html(here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html")) %>% |
|
|
|
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% |
|
|
|
html_text() %>% |
|
|
|
stri_split_lines() %>% |
|
|
|
unlist() %>% |
|
|
|
.[2] %>% |
|
|
|
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% |
|
|
|
jsonlite::fromJSON() %>% |
|
|
|
mutate( |
|
|
|
elapsed = as.numeric(elapsed)/60, |
|
|
|
debate_date = as.Date("2019-09-13"), |
|
|
|
speaker = stri_trans_totitle(speaker), |
|
|
|
timestamp = parse_time(timestamp), |
|
|
|
topic = stri_trans_totitle(topic), |
|
|
|
debate_group = 2, |
|
|
|
night = 1 |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
speaker = case_when( |
|
|
|
speaker == "Orourke" ~ "O'Rourke", |
|
|
|
speaker == "Deblasio" ~ "de Blasio", |
|
|
|
TRUE ~ speaker |
|
|
|
) |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
topic = case_when( |
|
|
|
topic == "" ~ "Other", |
|
|
|
grepl("Campaign", topic) ~ "Campaign Finance Reform", |
|
|
|
grepl("Civil", topic) ~ "Civil Rights", |
|
|
|
grepl("Climate", topic) ~ "Climate", |
|
|
|
grepl("Foreign", topic) ~ "Foreign Policy", |
|
|
|
grepl("Gun", topic) ~ "Gun Control", |
|
|
|
grepl("Election", topic) ~ "Elections Reform", |
|
|
|
grepl("Health", topic) ~ "Healthcare", |
|
|
|
grepl("Party", topic) ~ "Party Strategy", |
|
|
|
grepl("Women", topic) ~ "Womens Rights", |
|
|
|
TRUE ~ topic |
|
|
|
) |
|
|
|
) %>% |
|
|
|
filter( |
|
|
|
!is.na(timestamp), |
|
|
|
speaker != "", |
|
|
|
speaker != "Moderator" |
|
|
|
) %>% |
|
|
|
as_tibble() -> jul_day_1 |
|
|
|
|
|
|
|
read_html(here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html")) %>% |
|
|
|
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% |
|
|
|
html_text() %>% |
|
|
|
stri_split_lines() %>% |
|
|
|
unlist() %>% |
|
|
|
.[2] %>% |
|
|
|
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% |
|
|
|
jsonlite::fromJSON() %>% |
|
|
|
mutate( |
|
|
|
elapsed = as.numeric(elapsed)/60, |
|
|
|
debate_date = as.Date("2019-09-13"), |
|
|
|
speaker = stri_trans_totitle(speaker), |
|
|
|
timestamp = parse_time(timestamp), |
|
|
|
topic = stri_trans_totitle(topic), |
|
|
|
debate_group = 2, |
|
|
|
night = 2 |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
speaker = case_when( |
|
|
|
speaker == "Orourke" ~ "O'Rourke", |
|
|
|
speaker == "Deblasio" ~ "de Blasio", |
|
|
|
TRUE ~ speaker |
|
|
|
) |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
topic = case_when( |
|
|
|
topic == "" ~ "Other", |
|
|
|
grepl("Campaign", topic) ~ "Campaign Finance Reform", |
|
|
|
grepl("Civil", topic) ~ "Civil Rights", |
|
|
|
grepl("Climate", topic) ~ "Climate", |
|
|
|
grepl("Foreign", topic) ~ "Foreign Policy", |
|
|
|
grepl("Gun", topic) ~ "Gun Control", |
|
|
|
grepl("Election", topic) ~ "Elections Reform", |
|
|
|
grepl("Health", topic) ~ "Healthcare", |
|
|
|
grepl("Party", topic) ~ "Party Strategy", |
|
|
|
grepl("Women", topic) ~ "Womens Rights", |
|
|
|
TRUE ~ topic |
|
|
|
) |
|
|
|
) %>% |
|
|
|
filter( |
|
|
|
!is.na(timestamp), |
|
|
|
speaker != "", |
|
|
|
speaker != "Moderator" |
|
|
|
) %>% |
|
|
|
as_tibble() -> jul_day_2 |
|
|
|
|
|
|
|
read_html(here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html")) %>% |
|
|
|
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% |
|
|
|
html_text() %>% |
|
|
|
stri_split_lines() %>% |
|
|
|
unlist() %>% |
|
|
|
.[3] %>% |
|
|
|
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% |
|
|
|
jsonlite::fromJSON() %>% |
|
|
|
mutate( |
|
|
|
elapsed = as.numeric(elapsed)/60, |
|
|
|
debate_date = as.Date("2019-09-13"), |
|
|
|
speaker = stri_trans_totitle(speaker), |
|
|
|
timestamp = parse_time(timestamp), |
|
|
|
topic = stri_trans_totitle(topic), |
|
|
|
debate_group = 3, |
|
|
|
night = 1 |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
speaker = case_when( |
|
|
|
speaker == "Orourke" ~ "O'Rourke", |
|
|
|
speaker == "Deblasio" ~ "de Blasio", |
|
|
|
TRUE ~ speaker |
|
|
|
) |
|
|
|
) %>% |
|
|
|
mutate( |
|
|
|
topic = case_when( |
|
|
|
topic == "" ~ "Other", |
|
|
|
grepl("Campaign", topic) ~ "Campaign Finance Reform", |
|
|
|
grepl("Civil", topic) ~ "Civil Rights", |
|
|
|
grepl("Climate", topic) ~ "Climate", |
|
|
|
grepl("Foreign", topic) ~ "Foreign Policy", |
|
|
|
grepl("Gun", topic) ~ "Gun Control", |
|
|
|
grepl("Election", topic) ~ "Elections Reform", |
|
|
|
grepl("Health", topic) ~ "Healthcare", |
|
|
|
grepl("Party", topic) ~ "Party Strategy", |
|
|
|
grepl("Women", topic) ~ "Womens Rights", |
|
|
|
TRUE ~ topic |
|
|
|
) |
|
|
|
) %>% |
|
|
|
filter( |
|
|
|
!is.na(timestamp), |
|
|
|
speaker != "", |
|
|
|
speaker != "Moderator" |
|
|
|
) %>% |
|
|
|
as_tibble() -> sep_day_1 |
|
|
|
|
|
|
|
bind_rows( |
|
|
|
jun_day_1, |
|
|
|
jun_day_2, |
|
|
|
jul_day_1, |
|
|
|
jul_day_2, |
|
|
|
sep_day_1 |
|
|
|
) -> debates2019 |
|
|
|
|
|
|
|
usethis::use_data(debates2019, overwrite = TRUE) |
|
|
|
|