Browse Source

Avoid R CMD check WARNING and improve documentation

pull/11/head
Philipp Baumann 4 years ago
parent
commit
55f3405a8d
  1. 55
      R/average-spc.R

55
R/average-spc.R

@ -1,36 +1,47 @@
#' @title Average spectra in spectral tibble data frame
#' @description Average spectra in tibble by a group column after
#' resampling spectra by \code{simplerspec::resample_spc()}.
#' @param spc_tbl Tibble data.frame containing spectra in list-column
#' \code{spc_rs}. This list-column is created when resampling spectra
#' with \code{resample_spc()}
#' @param by Character vector of length 1; specifies column by which spectra
#' are averaged. Default is \code{"sample_id"}.
#' @return Tibble data frame (class `tbl_df`) with mean spectra appended
#' as list-column named \code{spc_mean}.
#' @title Average spectra in list-column by entries in grouping column
#' @description Average spectra in list-column of spectra tibble (`spc_tbl`) by
#' groups given in group column.
#' @param spc_tbl Tibble data frame containing at least the grouping column
#' given in argument `by` and input spectra given in list-column `column_in`.
#' @param by Character vector of length 1L or name/symbol that specifies the
#' column by which groups of spectra are averaged. Default is `"sample_id"`.
#' @param column_in Character vector of length 1L or or name/symbol that
#' specifies the list-column that contains the inputs spectra to be averaged.
#' Default is `"spc_rs"`, which are resampled spectra (i.e., resulting after
#' preceding `resample_spc()` step).
#' @return Spectra tibble data frame (class `"tbl_df"`, `"tbl"`, `"data.frame"`)
#' with a new list-column of column name `"spc_mean"` at the last position,
#' containing mean spectra with identical row replicates within the same
#' `by`-group.
#' @details For memory efficiency and subsequent modeling, consider slicing the
#' extra row copies of `spc_mean` resulting from `average_spc()` for example by
#' * `split(x = spc_tbl, f = spc_tbl$<by>) %>% lapply(., function(x) x x[1, ]) %>% do.call(., rbind)`
#' * `dplyr::group_by(spc_tbl, <by>) %>% dplyr::slice(1L)`
#' @import stats
#' @importFrom data.table data.table rbindlist setkey setDT := .SD
#' @importFrom rlang ensym quo_name
#' @export
average_spc <- function(spc_tbl, by = "sample_id", column_in = "spc_rs") {
# Avoid R CMD check note: `no visible binding for global variable`
# Avoid R CMD check note: `"...no visible binding for global variable..."`
spc_rs <- sample_id <- id <- NULL
# Quote the symbol or the string supplied by the second and third argument
column_in <- rlang::enquo(column_in)
by <- rlang::enquo(by)
# Combine rows of all resampled spectra in one data.table
spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!column_in))
# Add sample_id column to resampled spectra
spc[, id := spc_tbl[, by][[by]]]
# Add `id` group column to input spectra
spc[, id := dplyr::pull(spc_tbl, !!by)] # spc_tbl[, by][[by]]
# Average spectra, use sample_id as index for grouping
# Average spectra, use `id` column as index for grouping
data.table::setkey(spc, id)
spc_mean <- spc[, lapply(.SD, mean), by = id]
# Create vector of sample_id from column sample_id in spc_mean
sample_id_mean <- spc_mean[, id]
# Create new vector of group ID values from column `id`
group_id_mean <- spc_mean[, id]
# Delete sample_id column in data.table
spc_mean_noid <- spc_mean[, id := NULL]
@ -39,16 +50,16 @@ average_spc <- function(spc_tbl, by = "sample_id", column_in = "spc_rs") {
# https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md
spc_mean_list <- stats::setNames(
map(purrr::transpose(spc_mean_noid), data.table::as.data.table),
sample_id_mean
group_id_mean
)
# Quote the symbol or the string supplied to by argument
by <- ensym(by)
# Convert averaged spectra and sample_id to tibble
spc_mean_tbl <- tibble::tibble(
!! by := sample_id_mean, spc_mean = spc_mean_list
!!by := group_id_mean, spc_mean = spc_mean_list
)
# Join mean spectra tibble spc_tbl_mean to spc_tbl
spc_tbl <- dplyr::left_join(spc_tbl, spc_mean_tbl, by = quo_name(by))
spc_tbl_out <- dplyr::left_join(spc_tbl, spc_mean_tbl,
by = rlang::quo_name(by))
return(spc_tbl_out)
}

Loading…
Cancel
Save