## code to prepare `debates2019` dataset goes here # read_csv( # file = "https://rud.is/data/2019-dem-debates.csv.gz", # col_types = cols( # elapsed = col_double(), # timestamp = col_time(format = ""), # speaker = col_character(), # topic = col_character() # ) # ) -> debates2019 # # # usethis::use_data(debates2019, overwrite = TRUE) library(rvest) library(stringi) library(tidyverse) if (!file.exists(here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/06/26/us/elections/debate-speaking-time.html", here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/06/27/us/elections/debate-speaking-time.html", here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/07/30/us/elections/debate-speaking-time.html", here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/07/31/us/elections/debate-speaking-time.html", here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/09/12/us/elections/debate-speaking-time.html", here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2019-10-15-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/10/15/us/elections/debate-speaking-time.html", here::here("data-raw/2019-10-15-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2019-11-20-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/11/20/us/elections/debate-speaking-time.html", here::here("data-raw/2019-11-20-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2019-12-19-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/12/19/us/elections/debate-speaking-time.html", here::here("data-raw/2019-12-19-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2020-01-14-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2020/01/14/us/politics/debate-speaking-time.html", here::here("data-raw/2020-01-14-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2020-02-07-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2020/02/07/us/elections/debate-speaking-time.html", here::here("data-raw/2020-02-07-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2020-02-19-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2020/02/19/us/elections/debate-speaking-time.html", here::here("data-raw/2020-02-19-us-elections-debate-speaking-time.html")) if (!file.exists(here::here("data-raw/2020-02-25-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2020/02/25/us/elections/debate-speaking-time.html", here::here("data-raw/2020-02-25-us-elections-debate-speaking-time.html")) read_html(here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[3] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2019-09-13"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), topic = stri_trans_totitle(topic), debate_group = 1, night = 1 ) %>% mutate( speaker = case_when( speaker == "Orourke" ~ "O'Rourke", speaker == "Deblasio" ~ "de Blasio", TRUE ~ speaker ) ) %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("Campaign", topic) ~ "Campaign Finance Reform", grepl("Civil", topic) ~ "Civil Rights", grepl("Climate", topic) ~ "Climate", grepl("Foreign", topic) ~ "Foreign Policy", grepl("Gun", topic) ~ "Gun Control", grepl("Election", topic) ~ "Elections Reform", grepl("Health", topic) ~ "Healthcare", grepl("Party", topic) ~ "Party Strategy", grepl("Women", topic) ~ "Women's Rights", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> jun_day_1 read_html(here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[3] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2019-09-13"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), topic = stri_trans_totitle(topic), debate_group = 1, night = 2 ) %>% mutate( speaker = case_when( speaker == "Orourke" ~ "O'Rourke", speaker == "Deblasio" ~ "de Blasio", TRUE ~ speaker ) ) %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("Campaign", topic) ~ "Campaign Finance Reform", grepl("Civil", topic) ~ "Civil Rights", grepl("Climate", topic) ~ "Climate", grepl("Foreign", topic) ~ "Foreign Policy", grepl("Gun", topic) ~ "Gun Control", grepl("Election", topic) ~ "Elections Reform", grepl("Health", topic) ~ "Healthcare", grepl("Party", topic) ~ "Party Strategy", grepl("Women", topic) ~ "Women's Rights", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> jun_day_2 read_html(here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[2] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2019-09-13"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), topic = stri_trans_totitle(topic), debate_group = 2, night = 1 ) %>% mutate( speaker = case_when( speaker == "Orourke" ~ "O'Rourke", speaker == "Deblasio" ~ "de Blasio", TRUE ~ speaker ) ) %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("Campaign", topic) ~ "Campaign Finance Reform", grepl("Civil", topic) ~ "Civil Rights", grepl("Climate", topic) ~ "Climate", grepl("Foreign", topic) ~ "Foreign Policy", grepl("Gun", topic) ~ "Gun Control", grepl("Election", topic) ~ "Elections Reform", grepl("Health", topic) ~ "Healthcare", grepl("Party", topic) ~ "Party Strategy", grepl("Women", topic) ~ "Women's Rights", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> jul_day_1 read_html(here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[2] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2019-09-13"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), topic = stri_trans_totitle(topic), debate_group = 2, night = 2 ) %>% mutate( speaker = case_when( speaker == "Orourke" ~ "O'Rourke", speaker == "Deblasio" ~ "de Blasio", TRUE ~ speaker ) ) %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("Campaign", topic) ~ "Campaign Finance Reform", grepl("Civil", topic) ~ "Civil Rights", grepl("Climate", topic) ~ "Climate", grepl("Foreign", topic) ~ "Foreign Policy", grepl("Gun", topic) ~ "Gun Control", grepl("Election", topic) ~ "Elections Reform", grepl("Health", topic) ~ "Healthcare", grepl("Party", topic) ~ "Party Strategy", grepl("Women", topic) ~ "Women's Rights", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> jul_day_2 read_html(here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[3] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2019-09-13"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), topic = stri_trans_totitle(topic), debate_group = 3, night = 1 ) %>% mutate( speaker = case_when( speaker == "Orourke" ~ "O'Rourke", speaker == "Deblasio" ~ "de Blasio", TRUE ~ speaker ) ) %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("Campaign", topic) ~ "Campaign Finance Reform", grepl("Civil", topic) ~ "Civil Rights", grepl("Climate", topic) ~ "Climate", grepl("Foreign", topic) ~ "Foreign Policy", grepl("Gun", topic) ~ "Gun Control", grepl("Election", topic) ~ "Elections Reform", grepl("Health", topic) ~ "Healthcare", grepl("Party", topic) ~ "Party Strategy", grepl("Women", topic) ~ "Women's Rights", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> sep_day_1 read_html(here::here("data-raw/2019-10-15-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[3] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2019-10-15"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), debate_group = 4, night = 1 ) %>% mutate( speaker = case_when( speaker == "Orourke" ~ "O'Rourke", TRUE ~ speaker ) ) %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("impeachment", topic) ~ "Impeachment", grepl("economy", topic) ~ "Economy", grepl("opioids", topic) ~ "Opioids", grepl("candidate-age", topic) ~ "Age", grepl("tech-companies", topic) ~ "Tech Companies", grepl("middle-east policy", topic) ~ "Foreign Policy", grepl("gun-control", topic) ~ "Gun Control", grepl("income-inequality", topic) ~ "Income Inequality", grepl("health-care", topic) ~ "Healthcare", grepl("party-strategy", topic) ~ "Party Strategy", grepl("womens-rights", topic) ~ "Women's Rights", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> oct_day_1 read_html(here::here("data-raw/2019-11-20-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[3] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% as_tibble() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2019-11-20"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), debate_group = 5, night = 1 ) %>% filter(speaker != "") %>% mutate( topic = case_when( topic == "" & speaker == "Biden" ~ "Closing", grepl("climate", topic) ~ "Climate", grepl("closing", topic) ~ "Closing", grepl("criminal-justice", topic) ~ "Criminal Justice", grepl("electability", topic) ~ "Electability", grepl("election-reform", topic) ~ "Election Reform", grepl("executive-power", topic) ~ "Executive Power", grepl("candidate-age", topic) ~ "Age", grepl("foreign-policy", topic) ~ "Foreign Policy", grepl("gun-control", topic) ~ "Gun Control", grepl("health-care", topic) ~ "Healthcare", grepl("immigration", topic) ~ "Immigration", grepl("impeachment", topic) ~ "Impeachment", grepl("income-inequality", topic) ~ "Income Inequality", grepl("economic-inequality", topic) ~ "Income Inequality", grepl("economy", topic) ~ "Economy", grepl("middle-east policy", topic) ~ "Foreign Policy", grepl("opioids", topic) ~ "Opioids", grepl("party-strategy", topic) ~ "Party Strategy", grepl("public-service", topic) ~ "Public Service", grepl("tech-companies", topic) ~ "Tech Companies", grepl("white-supremacist violence", topic) ~ "White-Supremacy", grepl("womens-issues", topic) ~ "Women's Rights", topic == "" ~ "Other", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> nov_day_1 read_html(here::here("data-raw/2019-12-19-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[3] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% as_tibble() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2019-12-19"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), debate_group = 6, night = 1 ) %>% filter(speaker != "") %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("climate", topic) ~ "Climate", grepl("closing", topic) ~ "Closing", grepl("judges", topic) ~ "Judges", grepl("transparency", topic) ~ "Transparency", grepl("race", topic) ~ "Race", grepl("economic-inequality", topic) ~ "Income Inequality", grepl("education", topic) ~ "Education", grepl("human-rights", topic) ~ "Human Rights", grepl("criminal-justice", topic) ~ "Criminal Justice", grepl("electability", topic) ~ "Electability", grepl("election-reform", topic) ~ "Election Reform", grepl("executive-power", topic) ~ "Executive Power", grepl("candidate-age", topic) ~ "Age", grepl("foreign-policy", topic) ~ "Foreign Policy", grepl("gun-control", topic) ~ "Gun Control", grepl("health-care", topic) ~ "Healthcare", grepl("immigration", topic) ~ "Immigration", grepl("impeachment", topic) ~ "Impeachment", grepl("income-inequality", topic) ~ "Income Inequality", grepl("economy", topic) ~ "Economy", grepl("middle-east policy", topic) ~ "Foreign Policy", grepl("opioids", topic) ~ "Opioids", grepl("party-strategy", topic) ~ "Party Strategy", grepl("public-service", topic) ~ "Public Service", grepl("tech-companies", topic) ~ "Tech Companies", grepl("white-supremacist violence", topic) ~ "White-Supremacy", grepl("womens-issues", topic) ~ "Women's Rights", topic == "" ~ "Other", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> dec_day_1 read_html(here::here("data-raw/2020-01-14-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[3] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% as_tibble() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2020-01-14"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), debate_group = 7, night = 1 ) %>% filter(speaker != "") %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("military", topic) ~ "Military", grepl("climate", topic) ~ "Climate", grepl("closing", topic) ~ "Closing", grepl("judges", topic) ~ "Judges", grepl("race", topic) ~ "Race", grepl("education", topic) ~ "Education", grepl("human-rights", topic) ~ "Human Rights", grepl("criminal-justice", topic) ~ "Criminal Justice", grepl("electability", topic) ~ "Electability", grepl("election-reform", topic) ~ "Election Reform", grepl("executive-power", topic) ~ "Executive Power", grepl("candidate-age", topic) ~ "Age", grepl("foreign-policy", topic) ~ "Foreign Policy", grepl("gun-control", topic) ~ "Gun Control", grepl("child-care", topic) ~ "Child Care", grepl("health-care", topic) ~ "Healthcare", grepl("immigration", topic) ~ "Immigration", grepl("impeachment", topic) ~ "Impeachment", grepl("income-inequality", topic) ~ "Income Inequality", grepl("female-president", topic) ~ "Female President", grepl("trade", topic) ~ "Trade", grepl("economy", topic) ~ "Economy", grepl("middle-east policy", topic) ~ "Foreign Policy", grepl("opioids", topic) ~ "Opioids", grepl("party-strategy", topic) ~ "Party Strategy", grepl("public-service", topic) ~ "Public Service", grepl("tech-companies", topic) ~ "Tech Companies", grepl("white-supremacist violence", topic) ~ "White-Supremacy", grepl("womens-issues", topic) ~ "Women's Rights", topic == "" ~ "Other", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> jan_20_day_1 read_html(here::here("data-raw/2020-02-07-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[3] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% as_tibble() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2020-02-07"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), debate_group = 8, night = 1 ) %>% filter(speaker != "") %>% filter(!is.na(timestamp)) %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("military", topic) ~ "Military", grepl("climate", topic) ~ "Climate", grepl("closing", topic) ~ "Closing", grepl("judges", topic) ~ "Judges", grepl("race", topic) ~ "Race", grepl("Climate-change", topic) ~ "Climate", grepl("poverty", topic) ~ "Poverty", grepl("education", topic) ~ "Education", grepl("bloomberg", topic) ~ "Bloomberg", grepl("human-rights", topic) ~ "Human Rights", grepl("criminal-justice", topic) ~ "Criminal Justice", grepl("electability", topic) ~ "Electability", grepl("election-reform", topic) ~ "Election Reform", grepl("executive-power", topic) ~ "Executive Power", grepl("candidate-age", topic) ~ "Age", grepl("supreme-court", topic) ~ "Supreme Court", grepl("foreign-policy", topic) ~ "Foreign Policy", grepl("gun-control", topic) ~ "Gun Control", grepl("child-care", topic) ~ "Child Care", grepl("health-care", topic) ~ "Healthcare", grepl("immigration", topic) ~ "Immigration", grepl("impeachment", topic) ~ "Impeachment", grepl("income-inequality", topic) ~ "Income Inequality", grepl("female-president", topic) ~ "Female President", grepl("trade", topic) ~ "Trade", grepl("economy", topic) ~ "Economy", grepl("middle-east policy", topic) ~ "Foreign Policy", grepl("opioids", topic) ~ "Opioids", grepl("party-strategy", topic) ~ "Party Strategy", grepl("public-service", topic) ~ "Public Service", grepl("tech-companies", topic) ~ "Tech Companies", grepl("white-supremacist violence", topic) ~ "White-Supremacy", grepl("womens-issues", topic) ~ "Women's Rights", topic == "" ~ "Other", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> feb_07_day_1 read_html(here::here("data-raw/2020-02-19-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[3] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% as_tibble() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2020-02-19"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), debate_group = 9, night = 1 ) %>% filter(speaker != "") %>% filter(!is.na(timestamp)) %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("military", topic) ~ "Military", grepl("climate", topic) ~ "Climate", grepl("closing", topic) ~ "Closing", grepl("judges", topic) ~ "Judges", grepl("race", topic) ~ "Race", grepl("Climate-change", topic) ~ "Climate", grepl("poverty", topic) ~ "Poverty", grepl("education", topic) ~ "Education", grepl("bloomberg", topic) ~ "Bloomberg", grepl("human-rights", topic) ~ "Human Rights", grepl("criminal-justice", topic) ~ "Criminal Justice", grepl("electability", topic) ~ "Electability", grepl("election-reform", topic) ~ "Election Reform", grepl("executive-power", topic) ~ "Executive Power", grepl("candidate-age", topic) ~ "Age", grepl("supreme-court", topic) ~ "Supreme Court", grepl("foreign-policy", topic) ~ "Foreign Policy", grepl("gun-control", topic) ~ "Gun Control", grepl("child-care", topic) ~ "Child Care", grepl("party-loyalty", topic) ~ "Party Loyalty", grepl("health-care", topic) ~ "Healthcare", grepl("immigration", topic) ~ "Immigration", grepl("impeachment", topic) ~ "Impeachment", grepl("economic-inequality", topic) ~ "Income Inequality", grepl("income-inequality", topic) ~ "Income Inequality", grepl("female-president", topic) ~ "Female President", grepl("trade", topic) ~ "Trade", grepl("economy", topic) ~ "Economy", grepl("sexism", topic) ~ "Sexism", grepl("middle-east policy", topic) ~ "Foreign Policy", grepl("opioids", topic) ~ "Opioids", grepl("party-strategy", topic) ~ "Party Strategy", grepl("public-service", topic) ~ "Public Service", grepl("tech-companies", topic) ~ "Tech Companies", grepl("white-supremacist violence", topic) ~ "White-Supremacy", grepl("womens-issues", topic) ~ "Women's Rights", topic == "" ~ "Other", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> feb_19_day_1 read_html(here::here("data-raw/2020-02-25-us-elections-debate-speaking-time.html")) %>% html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>% html_text() %>% stri_split_lines() %>% unlist() %>% .[3] %>% stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>% jsonlite::fromJSON() %>% as_tibble() %>% mutate( elapsed = as.numeric(elapsed)/60, debate_date = as.Date("2020-02-25"), speaker = stri_trans_totitle(speaker), timestamp = parse_time(timestamp), debate_group = 10, night = 1 ) %>% filter(speaker != "") %>% filter(!is.na(timestamp)) %>% mutate( topic = case_when( topic == "" ~ "Other", grepl("coronavirus", topic) ~ "Coronavirus", grepl("military", topic) ~ "Military", grepl("climate", topic) ~ "Climate", grepl("closing", topic) ~ "Closing", grepl("judges", topic) ~ "Judges", grepl("race", topic) ~ "Race", grepl("Climate-change", topic) ~ "Climate", grepl("poverty", topic) ~ "Poverty", grepl("education", topic) ~ "Education", grepl("bloomberg", topic) ~ "Bloomberg", grepl("human-rights", topic) ~ "Human Rights", grepl("criminal-justice", topic) ~ "Criminal Justice", grepl("racial-justice", topic) ~ "Racial Justice", grepl("electability", topic) ~ "Electability", grepl("election-reform", topic) ~ "Election Reform", grepl("executive-power", topic) ~ "Executive Power", grepl("candidate-age", topic) ~ "Age", grepl("supreme-court", topic) ~ "Supreme Court", grepl("foreign-policy", topic) ~ "Foreign Policy", grepl("gun-control", topic) ~ "Gun Control", grepl("child-care", topic) ~ "Child Care", grepl("party-loyalty", topic) ~ "Party Loyalty", grepl("health-care", topic) ~ "Healthcare", grepl("immigration", topic) ~ "Immigration", grepl("impeachment", topic) ~ "Impeachment", grepl("economic-inequality", topic) ~ "Income Inequality", grepl("income-inequality", topic) ~ "Income Inequality", grepl("female-president", topic) ~ "Female President", grepl("trade", topic) ~ "Trade", grepl("economy", topic) ~ "Economy", grepl("sexism", topic) ~ "Sexism", grepl("middle-east policy", topic) ~ "Foreign Policy", grepl("opioids", topic) ~ "Opioids", grepl("party-strategy", topic) ~ "Party Strategy", grepl("public-service", topic) ~ "Public Service", grepl("tech-companies", topic) ~ "Tech Companies", grepl("white-supremacist violence", topic) ~ "White-Supremacy", grepl("womens-issues", topic) ~ "Women's Rights", topic == "" ~ "Other", TRUE ~ topic ) ) %>% filter( !is.na(timestamp), speaker != "", speaker != "Moderator" ) %>% as_tibble() -> feb_25_day_1 bind_rows( jun_day_1, jun_day_2, jul_day_1, jul_day_2, sep_day_1, oct_day_1, nov_day_1, dec_day_1, jan_20_day_1, feb_07_day_1, feb_19_day_1, feb_25_day_1 ) %>% mutate(topic = case_when( grepl("elect.*form", topic, ignore.case = TRUE) ~ "Election Reform", grepl("transparency", topic, ignore.case = TRUE) ~ "Transparency", TRUE ~ topic )) -> debates2019 usethis::use_data(debates2019, overwrite = TRUE)