|
|
@ -1,36 +1,47 @@ |
|
|
|
#' @title Average spectra in spectral tibble data frame |
|
|
|
#' @description Average spectra in tibble by a group column after |
|
|
|
#' resampling spectra by \code{simplerspec::resample_spc()}. |
|
|
|
#' @param spc_tbl Tibble data.frame containing spectra in list-column |
|
|
|
#' \code{spc_rs}. This list-column is created when resampling spectra |
|
|
|
#' with \code{resample_spc()} |
|
|
|
#' @param by Character vector of length 1; specifies column by which spectra |
|
|
|
#' are averaged. Default is \code{"sample_id"}. |
|
|
|
#' @return Tibble data frame (class `tbl_df`) with mean spectra appended |
|
|
|
#' as list-column named \code{spc_mean}. |
|
|
|
#' @title Average spectra in list-column by entries in grouping column |
|
|
|
#' @description Average spectra in list-column of spectra tibble (`spc_tbl`) by |
|
|
|
#' groups given in group column. |
|
|
|
#' @param spc_tbl Tibble data frame containing at least the grouping column |
|
|
|
#' given in argument `by` and input spectra given in list-column `column_in`. |
|
|
|
#' @param by Character vector of length 1L or name/symbol that specifies the |
|
|
|
#' column by which groups of spectra are averaged. Default is `"sample_id"`. |
|
|
|
#' @param column_in Character vector of length 1L or or name/symbol that |
|
|
|
#' specifies the list-column that contains the inputs spectra to be averaged. |
|
|
|
#' Default is `"spc_rs"`, which are resampled spectra (i.e., resulting after |
|
|
|
#' preceding `resample_spc()` step). |
|
|
|
#' @return Spectra tibble data frame (class `"tbl_df"`, `"tbl"`, `"data.frame"`) |
|
|
|
#' with a new list-column of column name `"spc_mean"` at the last position, |
|
|
|
#' containing mean spectra with identical row replicates within the same |
|
|
|
#' `by`-group. |
|
|
|
#' @details For memory efficiency and subsequent modeling, consider slicing the |
|
|
|
#' extra row copies of `spc_mean` resulting from `average_spc()` for example by |
|
|
|
#' * `split(x = spc_tbl, f = spc_tbl$<by>) %>% lapply(., function(x) x x[1, ]) %>% do.call(., rbind)` |
|
|
|
#' * `dplyr::group_by(spc_tbl, <by>) %>% dplyr::slice(1L)` |
|
|
|
#' @import stats |
|
|
|
#' @importFrom data.table data.table rbindlist setkey setDT := .SD |
|
|
|
#' @importFrom rlang ensym quo_name |
|
|
|
#' @export |
|
|
|
average_spc <- function(spc_tbl, by = "sample_id", column_in = "spc_rs") { |
|
|
|
|
|
|
|
# Avoid R CMD check note: `no visible binding for global variable` |
|
|
|
# Avoid R CMD check note: `"...no visible binding for global variable..."` |
|
|
|
spc_rs <- sample_id <- id <- NULL |
|
|
|
|
|
|
|
# Quote the symbol or the string supplied by the second and third argument |
|
|
|
column_in <- rlang::enquo(column_in) |
|
|
|
by <- rlang::enquo(by) |
|
|
|
|
|
|
|
# Combine rows of all resampled spectra in one data.table |
|
|
|
spc <- data.table::rbindlist(dplyr::pull(spc_tbl, !!column_in)) |
|
|
|
|
|
|
|
# Add sample_id column to resampled spectra |
|
|
|
spc[, id := spc_tbl[, by][[by]]] |
|
|
|
# Add `id` group column to input spectra |
|
|
|
spc[, id := dplyr::pull(spc_tbl, !!by)] # spc_tbl[, by][[by]] |
|
|
|
|
|
|
|
# Average spectra, use sample_id as index for grouping |
|
|
|
# Average spectra, use `id` column as index for grouping |
|
|
|
data.table::setkey(spc, id) |
|
|
|
spc_mean <- spc[, lapply(.SD, mean), by = id] |
|
|
|
|
|
|
|
# Create vector of sample_id from column sample_id in spc_mean |
|
|
|
sample_id_mean <- spc_mean[, id] |
|
|
|
# Create new vector of group ID values from column `id` |
|
|
|
group_id_mean <- spc_mean[, id] |
|
|
|
# Delete sample_id column in data.table |
|
|
|
spc_mean_noid <- spc_mean[, id := NULL] |
|
|
|
|
|
|
@ -39,16 +50,16 @@ average_spc <- function(spc_tbl, by = "sample_id", column_in = "spc_rs") { |
|
|
|
# https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md |
|
|
|
spc_mean_list <- stats::setNames( |
|
|
|
map(purrr::transpose(spc_mean_noid), data.table::as.data.table), |
|
|
|
sample_id_mean |
|
|
|
group_id_mean |
|
|
|
) |
|
|
|
|
|
|
|
# Quote the symbol or the string supplied to by argument |
|
|
|
by <- ensym(by) |
|
|
|
|
|
|
|
# Convert averaged spectra and sample_id to tibble |
|
|
|
spc_mean_tbl <- tibble::tibble( |
|
|
|
!! by := sample_id_mean, spc_mean = spc_mean_list |
|
|
|
!!by := group_id_mean, spc_mean = spc_mean_list |
|
|
|
) |
|
|
|
# Join mean spectra tibble spc_tbl_mean to spc_tbl |
|
|
|
spc_tbl <- dplyr::left_join(spc_tbl, spc_mean_tbl, by = quo_name(by)) |
|
|
|
spc_tbl_out <- dplyr::left_join(spc_tbl, spc_mean_tbl, |
|
|
|
by = rlang::quo_name(by)) |
|
|
|
|
|
|
|
return(spc_tbl_out) |
|
|
|
} |
|
|
|