You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
65 lines
2.9 KiB
65 lines
2.9 KiB
#' @title Average spectra in list-column by entries in grouping column
|
|
#' @description Average spectra in list-column of spectra tibble (`spc_tbl`) by
|
|
#' groups given in group column.
|
|
#' @param spc_tbl Tibble data frame containing at least the grouping column
|
|
#' given in argument `by` and input spectra given in list-column `column_in`.
|
|
#' @param by Character vector of length 1L or name/symbol that specifies the
|
|
#' column by which groups of spectra are averaged. Default is `"sample_id"`.
|
|
#' @param column_in Character vector of length 1L or or name/symbol that
|
|
#' specifies the list-column that contains the inputs spectra to be averaged.
|
|
#' Default is `"spc_rs"`, which are resampled spectra (i.e., resulting after
|
|
#' preceding `resample_spc()` step).
|
|
#' @return Spectra tibble data frame (class `"tbl_df"`, `"tbl"`, `"data.frame"`)
|
|
#' with a new list-column of column name `"spc_mean"` at the last position,
|
|
#' containing mean spectra with identical row replicates within the same
|
|
#' `by`-group.
|
|
#' @details For memory efficiency and subsequent modeling, consider slicing the
|
|
#' extra row copies of `spc_mean` resulting from `average_spc()` for example by
|
|
#' * `split(x = spc_tbl, f = spc_tbl$<by>) %>% lapply(., function(x) x x[1, ]) %>% do.call(., rbind)`
|
|
#' * `dplyr::group_by(spc_tbl, <by>) %>% dplyr::slice(1L)`
|
|
#' @import stats
|
|
#' @importFrom data.table data.table rbindlist setkey setDT := .SD
|
|
#' @importFrom rlang ensym quo_name
|
|
#' @export
|
|
average_spc <- function(spc_tbl, by = "sample_id", column_in = "spc_rs") {
|
|
|
|
# Avoid R CMD check note: `"...no visible binding for global variable..."`
|
|
spc_rs <- sample_id <- id <- NULL
|
|
|
|
# Quote the symbol or the string supplied by the second and third argument
|
|
column_in <- rlang::enquo(column_in)
|
|
by <- rlang::enquo(by)
|
|
|
|
# Combine rows of all resampled spectra in one data.table
|
|
spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!column_in))
|
|
|
|
# Add `id` group column to input spectra
|
|
spc[, id := dplyr::pull(spc_tbl, !!by)] # spc_tbl[, by][[by]]
|
|
|
|
# Average spectra, use `id` column as index for grouping
|
|
data.table::setkey(spc, id)
|
|
spc_mean <- spc[, lapply(.SD, mean), by = id]
|
|
|
|
# Create new vector of group ID values from column `id`
|
|
group_id_mean <- spc_mean[, id]
|
|
# Delete sample_id column in data.table
|
|
spc_mean_noid <- spc_mean[, id := NULL]
|
|
|
|
# Create list of averaged spectra, one spectrum is one data.table
|
|
# Use best performing alternative:
|
|
# https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md
|
|
spc_mean_list <- stats::setNames(
|
|
map(purrr::transpose(spc_mean_noid), data.table::as.data.table),
|
|
group_id_mean
|
|
)
|
|
|
|
# Convert averaged spectra and sample_id to tibble
|
|
spc_mean_tbl <- tibble::tibble(
|
|
!!by := group_id_mean, spc_mean = spc_mean_list
|
|
)
|
|
# Join mean spectra tibble spc_tbl_mean to spc_tbl
|
|
spc_tbl_out <- dplyr::left_join(spc_tbl, spc_mean_tbl,
|
|
by = rlang::quo_name(by))
|
|
|
|
return(spc_tbl_out)
|
|
}
|
|
|