diff --git a/DESCRIPTION b/DESCRIPTION index faf05ae..2c305b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,4 +9,4 @@ License: AGPL + file LICENSE LazyData: true Suggests: testthat Encoding: UTF-8 -Imports: graphics, grDevices, dplyr, KernSmooth, proj4, scales, grid, gtable +Imports: graphics, grDevices, dplyr, KernSmooth, proj4, scales, grid, gtable, ash diff --git a/NAMESPACE b/NAMESPACE index a9decd3..5c92d5c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(CoordProj) export(GeomBkde) export(GeomBkde2d) export(GeomXspline) +export(StatAsh) export(StatBkde) export(StatBkde2d) export(StatXspline) @@ -16,10 +17,12 @@ export(coord_proj) export(geom_bkde) export(geom_bkde2d) export(geom_xspline) +export(stat_ash) export(stat_bkde) export(stat_bkde2d) export(stat_xspline) import(KernSmooth) +import(ash) import(dplyr) import(ggplot2) import(grDevices) diff --git a/R/geom_ash.r b/R/geom_ash.r new file mode 100644 index 0000000..d5288ff --- /dev/null +++ b/R/geom_ash.r @@ -0,0 +1,96 @@ +#' Compute and display a univariate averaged shifted histogram (polynomial kernel) +#' +#' See \code{\link[ash]{bin1}} & \code{\link[ash]{ash1}} for more information. +#' +#' @inheritParams ggplot2::geom_area +#' @param geom Use to override the default Geom +#' @param ab half-open interval for bins \emph{[a,b)}. If no value is specified, +#' the range of x is stretched by \code{5\%} at each end and used the +#' interval. +#' @param nbin number of bins desired. Default \code{50}. +#' @param m integer smoothing parameter; Default \code{5}. +#' @param kopt vector of length 2 specifying the kernel, which is proportional +#' to \emph{( 1 - abs(i/m)^kopt(1) )i^kopt(2)}; (2,2)=biweight (default); +#' (0,0)=uniform; (1,0)=triangle; (2,1)=Epanechnikov; (2,3)=triweight. +#' @references David Scott (1992), \emph{"Multivariate Density Estimation,"} +#' John Wiley, (chapter 5 in particular).\cr +#' \cr +#' B. W. Silverman (1986), \emph{"Density Estimation for Statistics +#' and Data Analysis,"} Chapman & Hall. +#' @section Aesthetics: +#' \code{geom_ash} understands the following aesthetics (required aesthetics +#' are in bold): +#' \itemize{ +#' \item \strong{\code{x}} +#' \item \code{alpha} +#' \item \code{color} +#' \item \code{fill} +#' \item \code{linetype} +#' \item \code{size} +#' } +#' @section Computed variables: +#' \describe{ +#' \item{\code{estimate}}{ash estimates} +#' } +#' @export +stat_ash <- function(mapping = NULL, data = NULL, geom = "area", + position = "stack", + ab = NULL, nbin = 50, m = 5, kopt = c(2, 2), + show.legend = NA, inherit.aes = TRUE, ...) { + + layer( + data = data, + mapping = mapping, + stat = StatAsh, + geom = geom, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list( + ab = ab, + nbin = nbin, + m = m, + kopt = kopt, + ... + ) + ) + +} + +#' @export +StatAsh <- ggproto("StatAsh", Stat, + + required_aes = c("x"), + + default_aes = aes(y = ..estimate.., colour = NA, fill = "gray20", size = 0.5, + linetype = 1, alpha = NA), + + + setup_params = function(data, params) { + if (!is.null(data$y) || !is.null(params$y)) { + stop("stat_ash() must not be used with a y aesthetic.", call. = FALSE) + } + params + }, + + compute_group = function(data, scales, ab = NULL, + nbin = 50, m = 5, kopt = c(2, 2)) { + + if (is.null(ab)) ab <- nicerange(data$x) + + bin_res <- ash::bin1(data$x, ab, nbin) + ash_msg <- capture.output(ash_res <- ash1(bin_res)) + + if (ash_res$ier == 1) message("Estimate nonzero outside interval ab.") + + data.frame(x=ash_res$x, estimate=ash_res$y) + + } + +) + +nicerange <- function (x, beta = 0.1) { + ab <- range(x) + del <- ((ab[2] - ab[1]) * beta)/2 + return(c(ab + c(-del, del))) +} diff --git a/R/ggalt-package.r b/R/ggalt-package.r index 10905db..b1b817b 100644 --- a/R/ggalt-package.r +++ b/R/ggalt-package.r @@ -6,7 +6,7 @@ #' @name ggalt #' @docType package #' @author Bob Rudis (@@hrbrmstr) -#' @import ggplot2 graphics grDevices dplyr KernSmooth proj4 +#' @import ggplot2 graphics grDevices dplyr KernSmooth proj4 ash #' @importFrom scales rescale expand_range #' @importFrom grid grobName grobTree unit.c grobHeight grobWidth viewport #' grid.draw grobX grobY gTree gList diff --git a/README.Rmd b/README.Rmd index a127f98..3e20159 100644 --- a/README.Rmd +++ b/README.Rmd @@ -34,9 +34,11 @@ The following functions are implemented: - `stat_bkde` : Display a smooth density estimate (uses `KernSmooth::bkde`) - `geom_bkde2d` : Contours from a 2d density estimate. (uses `KernSmooth::bkde2D`) - `stat_bkde2d` : Contours from a 2d density estimate. (uses `KernSmooth::bkde2D`) +- `stat_ash` : Compute and display a univariate averaged shifted histogram (polynomial kernel) (uses `ash::ash1`/`ash::bin1`) ### News +- Version 0.0.4.9000 released - `stat_ash` - Version 0.0.3.9000 released - `coord_proj`! (requires my github copy of ggplot2 for now) - Version 0.0.2.9005 released - cleanup before blog post - Version 0.0.2.9002 released - working 2D density plots @@ -57,6 +59,8 @@ options(width=120) ### Usage ```{r} +library(ggplot2) +library(gridExtra) library(ggalt) # current verison @@ -144,7 +148,7 @@ ggplot(geyser_dat, aes(x, y)) + geom_point() + stat_bkde2d(bandwidth=c(0.7, 7)) -### coord_proj LIVES! (still needs work) +# coord_proj LIVES! (still needs work) world <- map_data("world") world <- world[world$region != "Antarctica",] @@ -155,6 +159,15 @@ gg <- gg + geom_map(data=world, map=world, gg <- gg + coord_proj("+proj=wintri") gg +# stat_ash + compare density plots + +set.seed(1492) +dat <- data.frame(x=rnorm(100)) +grid.arrange(ggplot(dat, aes(x)) + stat_ash(), + ggplot(dat, aes(x)) + stat_bkde(), + ggplot(dat, aes(x)) + stat_density(), + nrow=3) + ``` ### Test Results diff --git a/README.md b/README.md index a34a6de..70daea8 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,11 @@ The following functions are implemented: - `stat_bkde` : Display a smooth density estimate (uses `KernSmooth::bkde`) - `geom_bkde2d` : Contours from a 2d density estimate. (uses `KernSmooth::bkde2D`) - `stat_bkde2d` : Contours from a 2d density estimate. (uses `KernSmooth::bkde2D`) +- `stat_ash` : Compute and display a univariate averaged shifted histogram (polynomial kernel) (uses `ash::ash1`/`ash::bin1`) ### News +- Version 0.0.4.9000 released - `stat_ash` - Version 0.0.3.9000 released - `coord_proj`! (requires my github copy of ggplot2 for now) - Version 0.0.2.9005 released - cleanup before blog post - Version 0.0.2.9002 released - working 2D density plots @@ -37,8 +39,9 @@ devtools::install_github("hrbrmstr/ggalt") ### Usage ``` r +library(ggplot2) +library(gridExtra) library(ggalt) -#> Loading required package: ggplot2 # current verison packageVersion("ggalt") @@ -212,7 +215,7 @@ ggplot(geyser_dat, aes(x, y)) + ``` r -### coord_proj LIVES! (still needs work) +# coord_proj LIVES! (still needs work) world <- map_data("world") world <- world[world$region != "Antarctica",] @@ -226,6 +229,22 @@ gg +``` r + +# stat_ash + compare density plots + +set.seed(1492) +dat <- data.frame(x=rnorm(100)) +grid.arrange(ggplot(dat, aes(x)) + stat_ash(), + ggplot(dat, aes(x)) + stat_bkde(), + ggplot(dat, aes(x)) + stat_density(), + nrow=3) +#> Estimate nonzero outside interval ab. +#> Bandwidth not specified. Using '0.43', via KernSmooth::dpik. +``` + + + ### Test Results ``` r @@ -233,7 +252,7 @@ library(ggalt) library(testthat) date() -#> [1] "Fri Sep 11 18:14:16 2015" +#> [1] "Sat Sep 12 12:55:07 2015" test_dir("tests/") #> testthat results ======================================================================================================== diff --git a/README_figs/README-unnamed-chunk-4-18.png b/README_figs/README-unnamed-chunk-4-18.png new file mode 100644 index 0000000..3f0351b Binary files /dev/null and b/README_figs/README-unnamed-chunk-4-18.png differ diff --git a/man/stat_ash.Rd b/man/stat_ash.Rd new file mode 100644 index 0000000..d653f61 --- /dev/null +++ b/man/stat_ash.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/geom_ash.r +\name{stat_ash} +\alias{stat_ash} +\title{Compute and display a univariate averaged shifted histogram (polynomial kernel)} +\usage{ +stat_ash(mapping = NULL, data = NULL, geom = "area", position = "stack", + ab = NULL, nbin = 50, m = 5, kopt = c(2, 2), show.legend = NA, + inherit.aes = TRUE, ...) +} +\arguments{ +\item{mapping}{Set of aesthetic mappings created by \code{\link{aes}} or +\code{\link{aes_}}. If specified and \code{inherit.aes = TRUE} (the +default), is combined with the default mapping at the top level of the +plot. You only need to supply \code{mapping} if there isn't a mapping +defined for the plot.} + +\item{data}{A data frame. If specified, overrides the default data frame +defined at the top level of the plot.} + +\item{geom}{Use to override the default Geom} + +\item{position}{Position adjustment, either as a string, or the result of +a call to a position adjustment function.} + +\item{ab}{half-open interval for bins \emph{[a,b)}. If no value is specified, +the range of x is stretched by \code{5\%} at each end and used the +interval.} + +\item{nbin}{number of bins desired. Default \code{50}.} + +\item{m}{integer smoothing parameter; Default \code{5}.} + +\item{kopt}{vector of length 2 specifying the kernel, which is proportional +to \emph{( 1 - abs(i/m)^kopt(1) )i^kopt(2)}; (2,2)=biweight (default); + (0,0)=uniform; (1,0)=triangle; (2,1)=Epanechnikov; (2,3)=triweight.} + +\item{show.legend}{logical. Should this layer be included in the legends? +\code{NA}, the default, includes if any aesthetics are mapped. +\code{FALSE} never includes, and \code{TRUE} always includes.} + +\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, +rather than combining with them. This is most useful for helper functions +that define both data and aesthetics and shouldn't inherit behaviour from +the default plot specification, e.g. \code{\link{borders}}.} + +\item{...}{other arguments passed on to \code{\link{layer}}. There are + three types of arguments you can use here: + + \itemize{ + \item Aesthetics: to set an aesthetic to a fixed value, like + \code{color = "red"} or \code{size = 3}. + \item Other arguments to the layer, for example you override the + default \code{stat} associated with the layer. + \item Other arguments passed on to the stat. + }} +} +\description{ +See \code{\link[ash]{bin1}} & \code{\link[ash]{ash1}} for more information. +} +\section{Aesthetics}{ + +\code{geom_ash} understands the following aesthetics (required aesthetics +are in bold): +\itemize{ + \item \strong{\code{x}} + \item \code{alpha} + \item \code{color} + \item \code{fill} + \item \code{linetype} + \item \code{size} +} +} + +\section{Computed variables}{ + +\describe{ + \item{\code{estimate}}{ash estimates} +} +} +\references{ +David Scott (1992), \emph{"Multivariate Density Estimation,"} + John Wiley, (chapter 5 in particular).\cr + \cr + B. W. Silverman (1986), \emph{"Density Estimation for Statistics + and Data Analysis,"} Chapman & Hall. +} +