Extra Coordinate Systems, 'Geoms', Statistical Transformations, Scales and Fonts for 'ggplot2'
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

192 lines
5.9 KiB

#' Display a smooth density estimate.
#'
#' A kernel density estimate, useful for displaying the distribution of
#' variables with underlying smoothness.
#'
#' \if{html}{
#' A sample of the output from \code{geom_bkde()}:
#'
#' \figure{geombkde01.png}{options: width="100\%" alt="Figure: geombkde01.png"}
#' }
#'
#' \if{latex}{
#' A sample of the output from \code{geom_bkde()}:
#'
#' \figure{geombkde01.png}{options: width=10cm}
#' }
#'
#' @section Aesthetics:
#' \code{geom_bkde} understands the following aesthetics (required aesthetics
#' are in bold):
#' \itemize{
#' \item \strong{\code{x}}
#' \item \strong{\code{y}}
#' \item \code{alpha}
#' \item \code{color}
#' \item \code{fill}
#' \item \code{linetype}
#' \item \code{size}
#' }
#' @inheritParams ggplot2::geom_point
#' @param geom,stat Use to override the default connection between
#' \code{geom_bkde} and \code{stat_bkde}.
#' @seealso See \code{\link{geom_histogram}}, \code{\link{geom_freqpoly}} for
#' other methods of displaying continuous distribution.
#' See \code{\link{geom_violin}} for a compact density display.
#' @export
geom_bkde <- function(mapping = NULL, data = NULL, stat = "bkde",
position = "identity", bandwidth=NULL, range.x=NULL,
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomBkde,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(bandwidth=bandwidth,
range.x=range.x,
na.rm=na.rm,
...)
)
}
#' Geom Proto
#' @rdname ggalt-ggproto
#' @format NULL
#' @usage NULL
#' @keywords internal
#' @export
GeomBkde <- ggproto("GeomBkde", GeomArea,
default_aes = aes(colour = NA, fill = "gray20", size = 0.5,
linetype = 1, alpha = NA)
)
#' @param bandwidth the kernel bandwidth smoothing parameter. see
#' \code{\link[KernSmooth]{bkde}} for details. If \code{NULL},
#' it will be computed for you but will most likely not yield optimal
#' results.
#' @param kernel character string which determines the smoothing kernel. see
#' \code{\link[KernSmooth]{bkde}} for details
#' @param canonical logical flag: if TRUE, canonically scaled kernels are used.
#' see \code{\link[KernSmooth]{bkde}} for details
#' @param gridsize the number of equally spaced points at which to estimate the
#' density. see \code{\link[KernSmooth]{bkde}} for details.
#' @param range.x vector containing the minimum and maximum values of x at which
#' to compute the estimate. see \code{\link[KernSmooth]{bkde}} for details
#' @param truncate logical flag: if TRUE, data with x values outside the range
#' specified by range.x are ignored. see \code{\link[KernSmooth]{bkde}}
#' for details
#' @section Computed variables:
#' \describe{
#' \item{density}{density estimate}
#' \item{count}{density * number of points - useful for stacked density
#' plots}
#' \item{scaled}{density estimate, scaled to maximum of 1}
#' }
#' @export
#' @rdname geom_bkde
#' @examples
#' data(geyser, package="MASS")
#'
#' ggplot(geyser, aes(x=duration)) +
#' stat_bkde(alpha=1/2)
#'
#' ggplot(geyser, aes(x=duration)) +
#' geom_bkde(alpha=1/2)
#'
#' ggplot(geyser, aes(x=duration)) +
#' stat_bkde(bandwidth=0.25)
#'
#' ggplot(geyser, aes(x=duration)) +
#' geom_bkde(bandwidth=0.25)
stat_bkde <- function(mapping = NULL, data = NULL, geom = "area",
position = "stack", kernel="normal", canonical=FALSE,
bandwidth = NULL, gridsize=410, range.x=NULL,
truncate=TRUE, na.rm=FALSE, show.legend = NA, inherit.aes = TRUE,
...) {
layer(
data = data,
mapping = mapping,
stat = StatBkde,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
kernel=kernel,
canonical=canonical,
bandwidth=bandwidth,
gridsize=gridsize,
range.x=range.x,
truncate=truncate,
na.rm=na.rm,
...
)
)
}
#' @rdname ggalt-ggproto
#' @format NULL
#' @usage NULL
#' @export
StatBkde <- ggproto("StatBkde", Stat,
required_aes = "x",
default_aes = aes(y = ..density.., colour = NA, fill = "gray20", size = 0.5,
linetype = 1, alpha = NA),
compute_group = function(data, scales, kernel="normal", canonical=FALSE,
bandwidth=NULL, gridsize=410, range.x=NULL,
truncate=TRUE, na.rm = TRUE) {
# KernSmooth::dpik uses a generated normal distribution as part of its
# operation but doesn't do this seed save/create/restore. When bandwidth
# is NULL the only way to produce consistency in calculated resuts is to use
# a dedicated random seed. This might be a candidate for parameterization.
if (is.null(bandwidth)) {
tmp <- tempfile()
on.exit(unlink(tmp))
save(".Random.seed", file=tmp)
set.seed(1492)
bandwidth <- KernSmooth::dpik(data$x)
message(
sprintf("Bandwidth not specified. Using '%3.2f', via KernSmooth::dpik.",
bandwidth))
load(tmp)
}
if (is.null(range.x)) range.x <- range(data$x)
compute_bkde(data$x, kernel=kernel, canonical=canonical,
bandwidth=bandwidth, gridsize=gridsize, range.x=range.x,
truncate=truncate, na.rm)
}
)
compute_bkde <- function(x, kernel="normal", canonical=FALSE,
bandwidth, gridsize=410, range.x, truncate=TRUE, na.rm=TRUE) {
n <- length(x)
if (missing(range.x) | is.null(range.x)) range.x <- range(x)
dens <- KernSmooth::bkde(x, kernel, canonical, bandwidth, gridsize, range.x, truncate)
data.frame(
x = dens$x,
density = dens$y,
scaled = dens$y / max(dens$y, na.rm = na.rm),
count = dens$y * n,
n = n
)
}