Browse Source

Horizon charts (area version)

pull/38/head
boB Rudis 4 years ago
parent
commit
a9cf92daec
No known key found for this signature in database GPG Key ID: 2A514A4997464560
9 changed files with 314 additions and 10 deletions
  1. +5
    -2
      DESCRIPTION
  2. +1
    -1
      LICENSE
  3. +4
    -0
      NAMESPACE
  4. +148
    -0
      R/geom_horizon.r
  5. +47
    -0
      README.Rmd
  6. +69
    -7
      README.md
  7. BIN
      README_figs/README-dumbbell2-1.png
  8. BIN
      README_figs/README-horizon-1.png
  9. +40
    -0
      man/geom_horizon.Rd

+ 5
- 2
DESCRIPTION View File

@ -9,7 +9,9 @@ Authors@R: c(
person("Ben", "Marwick", comment="General codebase cleanup", role = c("ctb")),
person("Jan", "Schulz", comment="Annotations", role = c("aut", "ctb")),
person("Rosen", "Matev", role="ctb", comment="Original annotate_textp implementation on stackoverflow"),
person("ProPublica", role="dtc", comment="StateFace font")
person("ProPublica", role="dtc", comment="StateFace font"),
person("Aditya", "Kothari", role=c("aut", "ctb"), comment="Core functionality of horizon plots"),
person("Ather", role="dtc", comment="Core functionality of horizon plots")
)
Description: A compendium of new geometries, coordinate systems, statistical
transformations, scales and fonts for 'ggplot2', including splines, 1d and 2d densities,
@ -19,7 +21,7 @@ Description: A compendium of new geometries, coordinate systems, statistical
compatibility and the 'StateFace' open source font 'ProPublica'. Further new
functionality includes lollipop charts, dumbbell charts, the ability to encircle
points and coordinate-system-based text annotations.
License: AGPL + file LICENSE
License: MIT + file LICENSE
LazyData: true
URL: https://github.com/hrbrmstr/ggalt
BugReports: https://github.com/hrbrmstr/ggalt/issues
@ -65,6 +67,7 @@ Collate:
'geom_dumbbell.R'
'geom_cartogram.r'
'geom_encircle.r'
'geom_horizon.r'
'geom_lollipop.r'
'geom_table.r'
'geom_twoway_bar.r'


+ 1
- 1
LICENSE View File

@ -1,2 +1,2 @@
YEAR: 2015
YEAR: 2017
COPYRIGHT HOLDER: Bob Rudis

+ 4
- 0
NAMESPACE View File

@ -13,6 +13,7 @@ export(GeomBkde2d)
export(GeomCartogram)
export(GeomDumbbell)
export(GeomEncircle)
export(GeomHorizon)
export(GeomLollipop)
export(GeomStateface)
export(GeomXSpline2)
@ -22,6 +23,7 @@ export(Mb)
export(StatAsh)
export(StatBkde)
export(StatBkde2d)
export(StatHorizon)
export(StatStepribbon)
export(StatXspline)
export(annotate_textp)
@ -33,6 +35,7 @@ export(geom_bkde2d)
export(geom_cartogram)
export(geom_dumbbell)
export(geom_encircle)
export(geom_horizon)
export(geom_lollipop)
export(geom_stateface)
export(geom_xspline)
@ -41,6 +44,7 @@ export(show_stateface)
export(stat_ash)
export(stat_bkde)
export(stat_bkde2d)
export(stat_horizon)
export(stat_stepribbon)
export(stat_xspline)
export(to_basic.GeomBkde2d)


+ 148
- 0
R/geom_horizon.r View File

@ -0,0 +1,148 @@
#' Plot a time series as a horizon plot
#'
#' A horizon plot breaks the Y dimension down using colours. This is useful
#' when visualising y values spanning a vast range and / or trying to highlight
#' outliers without losing context of the rest of the data.\cr \cr Horizon
#' plots are best viewed in an apsect ratio of very low vertical length.
#'
#' @md
#' @section Aesthetics: `x`, `y`, `fill`. `fill` defaults to `..band..` which is
#' the band number the current data fill area belongs in.
#' @section Other parameters: `bandwidth`, to dictate the span of a band.
#' @export
geom_horizon <- function(mapping = NULL, data = NULL, show.legend = TRUE,
inherit.aes = TRUE, na.rm = TRUE, bandwidth = NULL, ...) {
list(
layer(
data = data,
mapping = mapping,
stat = "horizon",
geom = GeomHorizon,
position = 'identity',
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(bandwidth = bandwidth, na.rm = na.rm, ...)
)
)
}
#' @rdname geom_horizon
#' @keywords internal
#' @export
GeomHorizon <- ggproto("GeomHorizon", GeomArea,
required_aes = c("x", "y"),
default_aes = plyr::defaults(
aes(fill=NA, size = 0.15, linetype = 1, alpha = NA, colour = "gray20"),
ggplot2::GeomArea$default_aes
),
draw_key = ggplot2::draw_key_rect
)
#' Transforms data for a horizon plot
#' @rdname geom_horizon
#' @export
stat_horizon <- function(mapping = NULL, data = NULL, geom = "horizon", show.legend = TRUE,
inherit.aes = TRUE, na.rm = TRUE, bandwidth = NULL, ...) {
list(
layer(
stat = StatHorizon,
data = data,
mapping = mapping,
geom = geom,
position = 'identity',
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(bandwidth = bandwidth, na.rm = na.rm, ...)
)
)
}
#' @rdname geom_horizon
#' @keywords internal
#' @export
StatHorizon <- ggproto(
"StatHorizon",
Stat,
required_aes = c("x", "y"),
default_aes = aes(fill=..band..),
setup_params = function(data, params) {
# calculating a default bandwidth
if (is.null(params$bandwidth)) {
params$bandwidth <- diff(range(data$y)) / 4
message(sprintf("bandwidth not specified. Using computed bandwidth %s",
params$bandwidth))
}
params$n_min_y <- min(data$y, na.rm = TRUE)
params
},
compute_group = function(data, scales, bandwidth, n_min_y) {
# calculating the band in which the values fall
data$fillb <- ((data$y - n_min_y) %/% bandwidth) + 1
# calculating the banded y value
orig_y <- data$y
orig_fill_b <- data$fillb
data$y <- data$y - (bandwidth * (data$fillb - 1)) - n_min_y
fill_bands <- sort(unique(data$fillb))
# for each band, calculating value at a particular x
banded_data <- lapply(
fill_bands,
function(i_fill_band) {
df_banded_data <- data[data$fillb == i_fill_band,]
df_banded_data_high <- data[data$fillb > i_fill_band,]
if (nrow(df_banded_data_high) > 0) {
df_banded_data_high$y <- bandwidth
df_banded_data_high$fillb <- i_fill_band
}
df_banded_data_low <- data[data$fillb < i_fill_band,]
if (nrow(df_banded_data_low) > 0) {
df_banded_data_low$y <- 0
df_banded_data_low$fillb <- i_fill_band
}
data <- rbind(
rbind(df_banded_data, df_banded_data_low),
df_banded_data_high
)
data$fillb <- data$fillb * bandwidth
data$band <- i_fill_band
data$group <- i_fill_band
data
}
)
data <- do.call(rbind, banded_data)
data$band <- factor(data$band)
data
}
)

+ 47
- 0
README.Rmd View File

@ -19,6 +19,7 @@ A compendium of 'geoms', 'coords', 'stats', scales and fonts for 'ggplot2', incl
The following functions are implemented:
- `geom_horizon` : Horizon charts (modified from <https://github.com/AtherEnergy/ggTimeSeries>)
- `coord_proj` : Like `coord_map`, only better (prbly shld use this with `geom_cartogram` as `geom_map`'s new defaults are ugh)
- `geom_xspline` : Connect control points/observations with an X-spline
- `stat_xspline` : Connect control points/observations with an X-spline
@ -64,6 +65,52 @@ dat <- data.frame(x=c(1:10, 1:10, 1:10),
)
```
### Horzon Chart
Example carved from: <https://github.com/halhen/viz-pub/blob/master/sports-time-of-day/2_gen_chart.R>
```{r horizon, message=FALSE, warning=FALSE, fig.height=9.5, fig.width=9.5}
library(hrbrthemes)
library(ggalt)
library(tidyverse)
sports <- read_tsv("https://github.com/halhen/viz-pub/raw/master/sports-time-of-day/activity.tsv")
sports %>%
group_by(activity) %>%
filter(max(p) > 3e-04,
!grepl('n\\.e\\.c', activity)) %>%
arrange(time) %>%
mutate(p_peak = p / max(p),
p_smooth = (lag(p_peak) + p_peak + lead(p_peak)) / 3,
p_smooth = coalesce(p_smooth, p_peak)) %>%
ungroup() %>%
do({
rbind(.,
filter(., time == 0) %>%
mutate(time = 24*60))
}) %>%
mutate(time = ifelse(time < 3 * 60, time + 24 * 60, time)) %>%
mutate(activity = reorder(activity, p_peak, FUN=which.max)) %>%
arrange(activity) %>%
mutate(activity.f = reorder(as.character(activity), desc(activity))) -> sports
sports <- mutate(sports, time2 = time/60)
ggplot(sports, aes(time2, p_smooth)) +
geom_horizon(bandwidth=0.1) +
facet_grid(activity.f~.) +
scale_x_continuous(expand=c(0,0), breaks=seq(from = 3, to = 27, by = 3), labels = function(x) {sprintf("%02d:00", as.integer(x %% 24))}) +
viridis::scale_fill_viridis(name = "Activity relative to peak", discrete=TRUE,
labels=scales::percent(seq(0, 1, 0.1)+0.1)) +
labs(x=NULL, y=NULL, title="Peak time of day for sports and leisure",
subtitle="Number of participants throughout the day compared to peak popularity.\nNote the morning-and-evening everyday workouts, the midday hobbies,\nand the evenings/late nights out.") +
theme_ipsum_rc(grid="") +
theme(panel.spacing.y=unit(-0.05, "lines")) +
theme(strip.text.y = element_text(hjust=0, angle=360)) +
theme(axis.text.y=element_blank())
```
### Splines!
```{r splines}


+ 69
- 7
README.md View File

@ -8,6 +8,7 @@ A compendium of 'geoms', 'coords', 'stats', scales and fonts for 'ggplot2', incl
The following functions are implemented:
- `geom_horizon` : Horizon charts (modified from <https://github.com/AtherEnergy/ggTimeSeries>)
- `coord_proj` : Like `coord_map`, only better (prbly shld use this with `geom_cartogram` as `geom_map`'s new defaults are ugh)
- `geom_xspline` : Connect control points/observations with an X-spline
- `stat_xspline` : Connect control points/observations with an X-spline
@ -50,6 +51,54 @@ dat <- data.frame(x=c(1:10, 1:10, 1:10),
)
```
### Horzon Chart
Example carved from: <https://github.com/halhen/viz-pub/blob/master/sports-time-of-day/2_gen_chart.R>
``` r
library(hrbrthemes)
library(ggalt)
library(tidyverse)
sports <- read_tsv("https://github.com/halhen/viz-pub/raw/master/sports-time-of-day/activity.tsv")
sports %>%
group_by(activity) %>%
filter(max(p) > 3e-04,
!grepl('n\\.e\\.c', activity)) %>%
arrange(time) %>%
mutate(p_peak = p / max(p),
p_smooth = (lag(p_peak) + p_peak + lead(p_peak)) / 3,
p_smooth = coalesce(p_smooth, p_peak)) %>%
ungroup() %>%
do({
rbind(.,
filter(., time == 0) %>%
mutate(time = 24*60))
}) %>%
mutate(time = ifelse(time < 3 * 60, time + 24 * 60, time)) %>%
mutate(activity = reorder(activity, p_peak, FUN=which.max)) %>%
arrange(activity) %>%
mutate(activity.f = reorder(as.character(activity), desc(activity))) -> sports
sports <- mutate(sports, time2 = time/60)
ggplot(sports, aes(time2, p_smooth)) +
geom_horizon(bandwidth=0.1) +
facet_grid(activity.f~.) +
scale_x_continuous(expand=c(0,0), breaks=seq(from = 3, to = 27, by = 3), labels = function(x) {sprintf("%02d:00", as.integer(x %% 24))}) +
viridis::scale_fill_viridis(name = "Activity relative to peak", discrete=TRUE,
labels=scales::percent(seq(0, 1, 0.1)+0.1)) +
labs(x=NULL, y=NULL, title="Peak time of day for sports and leisure",
subtitle="Number of participants throughout the day compared to peak popularity.\nNote the morning-and-evening everyday workouts, the midday hobbies,\nand the evenings/late nights out.") +
theme_ipsum_rc(grid="") +
theme(panel.spacing.y=unit(-0.05, "lines")) +
theme(strip.text.y = element_text(hjust=0, angle=360)) +
theme(axis.text.y=element_blank())
```
<img src="README_figs/README-horizon-1.png" width="912" />
### Splines!
``` r
@ -66,7 +115,7 @@ ggplot(dat, aes(x, y, group=group, color=factor(group))) +
geom_point() +
geom_line() +
geom_smooth(se=FALSE, linetype="dashed", size=0.5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess'
```
<img src="README_figs/README-splines-2.png" width="672" />
@ -77,7 +126,7 @@ ggplot(dat, aes(x, y, group=group, color=factor(group))) +
geom_point(color="black") +
geom_smooth(se=FALSE, linetype="dashed", size=0.5) +
geom_xspline(size=0.5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess'
```
<img src="README_figs/README-splines-3.png" width="672" />
@ -88,7 +137,7 @@ ggplot(dat, aes(x, y, group=group, color=factor(group))) +
geom_point(color="black") +
geom_smooth(se=FALSE, linetype="dashed", size=0.5) +
geom_xspline(spline_shape=-0.4, size=0.5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess'
```
<img src="README_figs/README-splines-4.png" width="672" />
@ -99,7 +148,7 @@ ggplot(dat, aes(x, y, group=group, color=factor(group))) +
geom_point(color="black") +
geom_smooth(se=FALSE, linetype="dashed", size=0.5) +
geom_xspline(spline_shape=0.4, size=0.5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess'
```
<img src="README_figs/README-splines-5.png" width="672" />
@ -110,7 +159,7 @@ ggplot(dat, aes(x, y, group=group, color=factor(group))) +
geom_point(color="black") +
geom_smooth(se=FALSE, linetype="dashed", size=0.5) +
geom_xspline(spline_shape=1, size=0.5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess'
```
<img src="README_figs/README-splines-6.png" width="672" />
@ -121,7 +170,7 @@ ggplot(dat, aes(x, y, group=group, color=factor(group))) +
geom_point(color="black") +
geom_smooth(se=FALSE, linetype="dashed", size=0.5) +
geom_xspline(spline_shape=0, size=0.5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess'
```
<img src="README_figs/README-splines-7.png" width="672" />
@ -132,7 +181,7 @@ ggplot(dat, aes(x, y, group=group, color=factor(group))) +
geom_point(color="black") +
geom_smooth(se=FALSE, linetype="dashed", size=0.5) +
geom_xspline(spline_shape=-1, size=0.5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess'
```
<img src="README_figs/README-splines-8.png" width="672" />
@ -257,6 +306,11 @@ m + stat_bkde2d(bandwidth=c(0.5, 4), aes(fill = ..level..), geom = "polygon")
``` r
world <- map_data("world")
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
world <- world[world$region != "Antarctica",]
gg <- ggplot()
@ -408,6 +462,14 @@ International Students,0.87", stringsAsFactors=FALSE, sep=",", header=TRUE)
library(ggplot2)
library(ggalt)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
gg <- ggplot(df, aes(y=reorder(category, pct), x=pct))
gg <- gg + geom_lollipop(point.colour="steelblue", point.size=2, horizontal=TRUE)


BIN
README_figs/README-dumbbell2-1.png View File

Before After
Width: 1344  |  Height: 480  |  Size: 35 KiB Width: 1344  |  Height: 480  |  Size: 36 KiB

BIN
README_figs/README-horizon-1.png View File

Before After
Width: 1824  |  Height: 1824  |  Size: 791 KiB

+ 40
- 0
man/geom_horizon.Rd View File

@ -0,0 +1,40 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/geom_horizon.r
\docType{data}
\name{geom_horizon}
\alias{geom_horizon}
\alias{GeomHorizon}
\alias{stat_horizon}
\alias{StatHorizon}
\title{Plot a time series as a horizon plot}
\format{An object of class \code{GeomHorizon} (inherits from \code{GeomArea}, \code{GeomRibbon}, \code{Geom}, \code{ggproto}) of length 4.}
\usage{
geom_horizon(mapping = NULL, data = NULL, show.legend = TRUE,
inherit.aes = TRUE, na.rm = TRUE, bandwidth = NULL, ...)
GeomHorizon
stat_horizon(mapping = NULL, data = NULL, geom = "horizon",
show.legend = TRUE, inherit.aes = TRUE, na.rm = TRUE,
bandwidth = NULL, ...)
StatHorizon
}
\description{
A horizon plot breaks the Y dimension down using colours. This is useful
when visualising y values spanning a vast range and / or trying to highlight
outliers without losing context of the rest of the data.\cr \cr Horizon
plots are best viewed in an apsect ratio of very low vertical length.
Transforms data for a horizon plot
}
\section{Aesthetics}{
\code{x}, \code{y}, \code{fill}. \code{fill} defaults to \code{..band..} which is
the band number the current data fill area belongs in.
}
\section{Other parameters}{
\code{bandwidth}, to dictate the span of a band.
}
\keyword{internal}

Loading…
Cancel
Save