Browse Source

updated debates data; regen'd vignette

master
boB Rudis 1 year ago
parent
commit
7494388716
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
14 changed files with 10255 additions and 29 deletions
  1. +2
    -2
      DESCRIPTION
  2. +8
    -4
      R/datasets.R
  3. +1
    -2
      README.Rmd
  4. +8
    -6
      README.md
  5. +3523
    -0
      data-raw/2019-06-26-us-elections-debate-speaking-time.html
  6. +3344
    -0
      data-raw/2019-06-27-us-elections-debate-speaking-time.html
  7. +1491
    -0
      data-raw/2019-07-30-us-elections-debate-speaking-time.html
  8. +418
    -0
      data-raw/2019-07-31-us-elections-debate-speaking-time.html
  9. +1181
    -0
      data-raw/2019-09-12-us-elections-debate-speaking-time.html
  10. +259
    -9
      data-raw/debates2019.R
  11. BIN
      data/debates2019.rda
  12. +11
    -4
      man/debates2019.Rd
  13. BIN
      man/figures/README-nyt-1.png
  14. +9
    -2
      vignettes/using-ggchicklet.Rmd

+ 2
- 2
DESCRIPTION View File

@ -1,8 +1,8 @@
Package: ggchicklet
Type: Package
Title: Create 'Chicklet' (Rounded Segmented Column) Charts
Version: 0.1.0
Date: 2019-06-28
Version: 0.2.0
Date: 2019-09-15
Authors@R: c( person("Bob", "Rudis", email = "bob@rud.is", role =
c("aut", "cre"), comment = c(ORCID = "0000-0001-5670-2640")),
person("Antoine", "Bichat", role = "ctb") )


+ 8
- 4
R/datasets.R View File

@ -1,15 +1,19 @@
#' @md
#' @title June 2019 U.S. Democratic Debate Candidate/Topic Times
#' @title 2019-2020 U.S. Democratic Debate Candidate/Topic Times
#' @description The New York Times and other media outlets kept track of the time each
#' candidate spent talking including the timestamp of the start of the blathering
#' and the topic up for debate. This dataset only includes candidates and
#' topic times. The complete datasets (See References) also include moderator
#' metadata and opening/closing statement records.
#' @format data frame with columns: `elapsed` (dbl), `timestamp` (drtn), `speaker` (chr), `topic` (chr)
#' @format data frame with columns: `elapsed` (dbl), `timestamp` (time), `speaker` (chr), `topic` (chr)
#' `debate_date` (date), `debate_group` (dbl), `night` (dbl)
#' @docType data
#' @keywords datasets
#' @name debates2019
#' @references <https://www.nytimes.com/interactive/2019/admin/100000006581096.embedded.html>
#' @references <https://www.nytimes.com/interactive/2019/admin/100000006584572.embedded.html>
#' @references <https://www.nytimes.com/interactive/2019/06/26/us/elections/debate-speaking-time.html>
#' @references <https://www.nytimes.com/interactive/2019/06/27/us/elections/debate-speaking-time.html>
#' @references <https://www.nytimes.com/interactive/2019/07/30/us/elections/debate-speaking-time.html>
#' @references <https://www.nytimes.com/interactive/2019/07/31/us/elections/debate-speaking-time.html>
#' @references <https://www.nytimes.com/interactive/2019/09/12/us/elections/debate-speaking-time.html>
#' @usage data("debates2019")
NULL

+ 1
- 2
README.Rmd View File

@ -53,6 +53,7 @@ library(tidyverse)
data("debates2019")
debates2019 %>%
filter(debate_group == 1) %>%
mutate(speaker = fct_reorder(speaker, elapsed, sum, .desc=FALSE)) %>%
mutate(topic = fct_other(
topic,
@ -92,8 +93,6 @@ debates2019 %>%
theme_ipsum_rc(grid="X") +
theme(axis.text.x = element_text(color = "gray60", size = 10)) +
theme(legend.position = "top")
```
## ggchicklet Metrics


+ 8
- 6
README.md View File

@ -26,7 +26,7 @@ segmented column charts (i.e. “chicklets”).
## What’s Inside The Tin
- `debates2019`: June 2019 U.S. Democratic Debate Candidate/Topic
- `debates2019`: 2019-2020 U.S. Democratic Debate Candidate/Topic
Times
- `geom_chicklet`: Chicklet (rounded segmented column) charts
@ -58,7 +58,7 @@ library(ggchicklet)
# current version
packageVersion("ggchicklet")
## [1] '0.1.0'
## [1] '0.2.0'
```
### From the NYTimes
@ -70,6 +70,7 @@ library(tidyverse)
data("debates2019")
debates2019 %>%
filter(debate_group == 1) %>%
mutate(speaker = fct_reorder(speaker, elapsed, sum, .desc=FALSE)) %>%
mutate(topic = fct_other(
topic,
@ -115,10 +116,11 @@ debates2019 %>%
## ggchicklet Metrics
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :--- | -------: | --: | --: | ---: | ----------: | ---: | -------: | ---: |
| R | 8 | 0.8 | 119 | 0.52 | 27 | 0.36 | 105 | 0.55 |
| Rmd | 2 | 0.2 | 109 | 0.48 | 47 | 0.64 | 85 | 0.45 |
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :--- | -------: | ---: | ---: | ---: | ----------: | ---: | -------: | ---: |
| HTML | 5 | 0.33 | 7860 | 0.94 | 1858 | 0.96 | 239 | 0.54 |
| R | 8 | 0.53 | 350 | 0.04 | 34 | 0.02 | 121 | 0.27 |
| Rmd | 2 | 0.13 | 117 | 0.01 | 45 | 0.02 | 85 | 0.19 |
## Code of Conduct


+ 3523
- 0
data-raw/2019-06-26-us-elections-debate-speaking-time.html
File diff suppressed because it is too large
View File


+ 3344
- 0
data-raw/2019-06-27-us-elections-debate-speaking-time.html
File diff suppressed because it is too large
View File


+ 1491
- 0
data-raw/2019-07-30-us-elections-debate-speaking-time.html
File diff suppressed because it is too large
View File


+ 418
- 0
data-raw/2019-07-31-us-elections-debate-speaking-time.html
File diff suppressed because it is too large
View File


+ 1181
- 0
data-raw/2019-09-12-us-elections-debate-speaking-time.html
File diff suppressed because it is too large
View File


+ 259
- 9
data-raw/debates2019.R View File

@ -1,14 +1,264 @@
## code to prepare `debates2019` dataset goes here
read_csv(
file = "https://rud.is/data/2019-dem-debates.csv.gz",
col_types = cols(
elapsed = col_double(),
timestamp = col_time(format = ""),
speaker = col_character(),
topic = col_character()
)
) -> debates2019
# read_csv(
# file = "https://rud.is/data/2019-dem-debates.csv.gz",
# col_types = cols(
# elapsed = col_double(),
# timestamp = col_time(format = ""),
# speaker = col_character(),
# topic = col_character()
# )
# ) -> debates2019
#
#
# usethis::use_data(debates2019, overwrite = TRUE)
library(rvest)
library(stringi)
library(tidyverse)
if (!file.exists(here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/06/26/us/elections/debate-speaking-time.html", here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html"))
if (!file.exists(here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/06/27/us/elections/debate-speaking-time.html", here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html"))
if (!file.exists(here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/07/30/us/elections/debate-speaking-time.html", here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html"))
if (!file.exists(here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/07/31/us/elections/debate-speaking-time.html", here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html"))
if (!file.exists(here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html"))) download.file("https://www.nytimes.com/interactive/2019/09/12/us/elections/debate-speaking-time.html", here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html"))
read_html(here::here("data-raw/2019-06-26-us-elections-debate-speaking-time.html")) %>%
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>%
html_text() %>%
stri_split_lines() %>%
unlist() %>%
.[3] %>%
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>%
jsonlite::fromJSON() %>%
mutate(
elapsed = as.numeric(elapsed)/60,
debate_date = as.Date("2019-09-13"),
speaker = stri_trans_totitle(speaker),
timestamp = parse_time(timestamp),
topic = stri_trans_totitle(topic),
debate_group = 1,
night = 1
) %>%
mutate(
speaker = case_when(
speaker == "Orourke" ~ "O'Rourke",
speaker == "Deblasio" ~ "de Blasio",
TRUE ~ speaker
)
) %>%
mutate(
topic = case_when(
topic == "" ~ "Other",
grepl("Campaign", topic) ~ "Campaign Finance Reform",
grepl("Civil", topic) ~ "Civil Rights",
grepl("Climate", topic) ~ "Climate",
grepl("Foreign", topic) ~ "Foreign Policy",
grepl("Gun", topic) ~ "Gun Control",
grepl("Election", topic) ~ "Elections Reform",
grepl("Health", topic) ~ "Healthcare",
grepl("Party", topic) ~ "Party Strategy",
grepl("Women", topic) ~ "Womens Rights",
TRUE ~ topic
)
) %>%
filter(
!is.na(timestamp),
speaker != "",
speaker != "Moderator"
) %>%
as_tibble() -> jun_day_1
read_html(here::here("data-raw/2019-06-27-us-elections-debate-speaking-time.html")) %>%
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>%
html_text() %>%
stri_split_lines() %>%
unlist() %>%
.[3] %>%
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>%
jsonlite::fromJSON() %>%
mutate(
elapsed = as.numeric(elapsed)/60,
debate_date = as.Date("2019-09-13"),
speaker = stri_trans_totitle(speaker),
timestamp = parse_time(timestamp),
topic = stri_trans_totitle(topic),
debate_group = 1,
night = 2
) %>%
mutate(
speaker = case_when(
speaker == "Orourke" ~ "O'Rourke",
speaker == "Deblasio" ~ "de Blasio",
TRUE ~ speaker
)
) %>%
mutate(
topic = case_when(
topic == "" ~ "Other",
grepl("Campaign", topic) ~ "Campaign Finance Reform",
grepl("Civil", topic) ~ "Civil Rights",
grepl("Climate", topic) ~ "Climate",
grepl("Foreign", topic) ~ "Foreign Policy",
grepl("Gun", topic) ~ "Gun Control",
grepl("Election", topic) ~ "Elections Reform",
grepl("Health", topic) ~ "Healthcare",
grepl("Party", topic) ~ "Party Strategy",
grepl("Women", topic) ~ "Womens Rights",
TRUE ~ topic
)
) %>%
filter(
!is.na(timestamp),
speaker != "",
speaker != "Moderator"
) %>%
as_tibble() -> jun_day_2
read_html(here::here("data-raw/2019-07-30-us-elections-debate-speaking-time.html")) %>%
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>%
html_text() %>%
stri_split_lines() %>%
unlist() %>%
.[2] %>%
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>%
jsonlite::fromJSON() %>%
mutate(
elapsed = as.numeric(elapsed)/60,
debate_date = as.Date("2019-09-13"),
speaker = stri_trans_totitle(speaker),
timestamp = parse_time(timestamp),
topic = stri_trans_totitle(topic),
debate_group = 2,
night = 1
) %>%
mutate(
speaker = case_when(
speaker == "Orourke" ~ "O'Rourke",
speaker == "Deblasio" ~ "de Blasio",
TRUE ~ speaker
)
) %>%
mutate(
topic = case_when(
topic == "" ~ "Other",
grepl("Campaign", topic) ~ "Campaign Finance Reform",
grepl("Civil", topic) ~ "Civil Rights",
grepl("Climate", topic) ~ "Climate",
grepl("Foreign", topic) ~ "Foreign Policy",
grepl("Gun", topic) ~ "Gun Control",
grepl("Election", topic) ~ "Elections Reform",
grepl("Health", topic) ~ "Healthcare",
grepl("Party", topic) ~ "Party Strategy",
grepl("Women", topic) ~ "Womens Rights",
TRUE ~ topic
)
) %>%
filter(
!is.na(timestamp),
speaker != "",
speaker != "Moderator"
) %>%
as_tibble() -> jul_day_1
read_html(here::here("data-raw/2019-07-31-us-elections-debate-speaking-time.html")) %>%
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>%
html_text() %>%
stri_split_lines() %>%
unlist() %>%
.[2] %>%
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>%
jsonlite::fromJSON() %>%
mutate(
elapsed = as.numeric(elapsed)/60,
debate_date = as.Date("2019-09-13"),
speaker = stri_trans_totitle(speaker),
timestamp = parse_time(timestamp),
topic = stri_trans_totitle(topic),
debate_group = 2,
night = 2
) %>%
mutate(
speaker = case_when(
speaker == "Orourke" ~ "O'Rourke",
speaker == "Deblasio" ~ "de Blasio",
TRUE ~ speaker
)
) %>%
mutate(
topic = case_when(
topic == "" ~ "Other",
grepl("Campaign", topic) ~ "Campaign Finance Reform",
grepl("Civil", topic) ~ "Civil Rights",
grepl("Climate", topic) ~ "Climate",
grepl("Foreign", topic) ~ "Foreign Policy",
grepl("Gun", topic) ~ "Gun Control",
grepl("Election", topic) ~ "Elections Reform",
grepl("Health", topic) ~ "Healthcare",
grepl("Party", topic) ~ "Party Strategy",
grepl("Women", topic) ~ "Womens Rights",
TRUE ~ topic
)
) %>%
filter(
!is.na(timestamp),
speaker != "",
speaker != "Moderator"
) %>%
as_tibble() -> jul_day_2
read_html(here::here("data-raw/2019-09-12-us-elections-debate-speaking-time.html")) %>%
html_nodes(xpath = ".//script[contains(., 'NYTG_DEMDEBATES')]") %>%
html_text() %>%
stri_split_lines() %>%
unlist() %>%
.[3] %>%
stri_replace_first_regex("^.*NYTG_DEMDEBATES = ", "") %>%
jsonlite::fromJSON() %>%
mutate(
elapsed = as.numeric(elapsed)/60,
debate_date = as.Date("2019-09-13"),
speaker = stri_trans_totitle(speaker),
timestamp = parse_time(timestamp),
topic = stri_trans_totitle(topic),
debate_group = 3,
night = 1
) %>%
mutate(
speaker = case_when(
speaker == "Orourke" ~ "O'Rourke",
speaker == "Deblasio" ~ "de Blasio",
TRUE ~ speaker
)
) %>%
mutate(
topic = case_when(
topic == "" ~ "Other",
grepl("Campaign", topic) ~ "Campaign Finance Reform",
grepl("Civil", topic) ~ "Civil Rights",
grepl("Climate", topic) ~ "Climate",
grepl("Foreign", topic) ~ "Foreign Policy",
grepl("Gun", topic) ~ "Gun Control",
grepl("Election", topic) ~ "Elections Reform",
grepl("Health", topic) ~ "Healthcare",
grepl("Party", topic) ~ "Party Strategy",
grepl("Women", topic) ~ "Womens Rights",
TRUE ~ topic
)
) %>%
filter(
!is.na(timestamp),
speaker != "",
speaker != "Moderator"
) %>%
as_tibble() -> sep_day_1
bind_rows(
jun_day_1,
jun_day_2,
jul_day_1,
jul_day_2,
sep_day_1
) -> debates2019
usethis::use_data(debates2019, overwrite = TRUE)

BIN
data/debates2019.rda View File


+ 11
- 4
man/debates2019.Rd View File

@ -3,8 +3,9 @@
\docType{data}
\name{debates2019}
\alias{debates2019}
\title{June 2019 U.S. Democratic Debate Candidate/Topic Times}
\format{data frame with columns: \code{elapsed} (dbl), \code{timestamp} (drtn), \code{speaker} (chr), \code{topic} (chr)}
\title{2019-2020 U.S. Democratic Debate Candidate/Topic Times}
\format{data frame with columns: \code{elapsed} (dbl), \code{timestamp} (time), \code{speaker} (chr), \code{topic} (chr)
\code{debate_date} (date), \code{debate_group} (dbl), \code{night} (dbl)}
\usage{
data("debates2019")
}
@ -16,8 +17,14 @@ topic times. The complete datasets (See References) also include moderator
metadata and opening/closing statement records.
}
\references{
\url{https://www.nytimes.com/interactive/2019/admin/100000006581096.embedded.html}
\url{https://www.nytimes.com/interactive/2019/06/26/us/elections/debate-speaking-time.html}
\url{https://www.nytimes.com/interactive/2019/admin/100000006584572.embedded.html}
\url{https://www.nytimes.com/interactive/2019/06/27/us/elections/debate-speaking-time.html}
\url{https://www.nytimes.com/interactive/2019/07/30/us/elections/debate-speaking-time.html}
\url{https://www.nytimes.com/interactive/2019/07/31/us/elections/debate-speaking-time.html}
\url{https://www.nytimes.com/interactive/2019/09/12/us/elections/debate-speaking-time.html}
}
\keyword{datasets}

BIN
man/figures/README-nyt-1.png View File

Before After
Width: 1920  |  Height: 1728  |  Size: 83 KiB Width: 1920  |  Height: 1728  |  Size: 86 KiB

+ 9
- 2
vignettes/using-ggchicklet.Rmd View File

@ -51,7 +51,10 @@ The `elapsed` column contains how long the candidate spoke and `timestamp` is th
There are also candidates:
```{r data-ex-01}
distinct(debates2019, speaker) %>%
debates2019 %>%
filter(debate_group == 1) %>%
filter(night == 1) %>%
distinct(speaker) %>%
arrange(speaker) %>%
print(n=nrow(.))
```
@ -59,7 +62,10 @@ distinct(debates2019, speaker) %>%
and the topics debates:
```{r data-ex-02}
distinct(debates2019, topic) %>%
debates2019 %>%
filter(debate_group == 1) %>%
filter(night == 1) %>%
distinct(topic) %>%
arrange(topic) %>%
print(n=nrow(.))
```
@ -76,6 +82,7 @@ the segments by topic.
```{r chicklet, fig.width=600/72, fig.height=600/72}
debates2019 %>%
filter(debate_group == 1) %>%
mutate(speaker = fct_reorder(speaker, elapsed, sum, .desc=FALSE)) %>%
mutate(topic = fct_other(
topic,


Loading…
Cancel
Save