Streamlining spectral data processing and modeling for spectroscopy applications
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

261 lines
12 KiB

#' @title Preprocess spectra
#' @description Preprocesses spectra in tibble column by sample_id after
#' averaging spectra by \code{simplerspec::average_spc()}.
#' @param spc_tbl Tibble that contains spectra to be preprocessed within
#' a list-column.
#' @param select Character vector of predefined preprocessing options to be
#' applied to the spectra list-column specified in \code{column_in}.
#' Common prefined values are stated as abbreviated preprocessing methods and
#' options such as \code{"sg_1_w21"}, where \code{"sg"} stands for
#' Savitzky-Golay and \code{1} for first derivative and \code{"w21"}
#' for a window size of 21 points.
#' @param column_in Character vector of single list-column in \code{spc_tbl} that
#' contain list of spectra (1 row matrix) to be processed by function supplied
#' in \code{select}.
#' @param custom_function A character string of a custom processing function
#' that is later parsed (produces expression in a list) and evaluated within
#' the function \code{preprocess_spc}.
#' The character vector argument of \code{custom_function}
#' needs to contain \code{"spc_raw"}, which is the single data table of spectra
#' that results from binding a list of data.tables (spectra to preprocess)
#' from the spectra list-column specified in \code{column_in}.
#' An example for a value is
#' \code{"prospectr::savitzkyGolay(X = spc_raw, m = 0, p = 3, w = 9)"}.
#' Optional argument. Default is \code{NULL}.
#' @export
preprocess_spc <- function(spc_tbl, select, column_in = "spc_mean",
custom_function = NULL) {
# Convert list of spectral data.tables to one data.table
spc_raw <- data.table::rbindlist(spc_tbl[column_in][[column_in]])
## Perform preprocessing =====================================================
# Use custom function when supplying option ----------------------------------
if (!is.null(custom_function) & select == "custom") {
# Create full character string for parsing
custom_fct <- paste0("custom <- ", custom_function)
# parse converts the character string into an expression
# eval evaluates the expression; as a result, custom object is computed
# and saved within the current workspace
eval(parse(text = custom_fct))
## x <- spc_raw
## custom <- eval(substitute(custom_function), envir = parent.frame())
# -> Error in is.data.frame(X) : object 'x' not found
}
# -> returns error:
# custom_function = prospectr::savitzkyGolay(X = x, m = 0, p = 3, w = 9)
# Error in is.data.frame(X) : object 'x' not found
# -> Maybe solution: http://stackoverflow.com/questions/30563745/non-standard-evaluation-from-another-function-in-r
# Savitzky-Golay preprocessing
# use different derivatives and window sizes ---------------------------------
# Zero order Savitzky-Golay (no derivative) -> only smoothing
if (select == "sg_0_w9") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)}
# First derivative Savitzky-Golay
if (select == "sg_1_w5") {
sg_1_w5 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 5)}
if (select == "sg_1_w9") {
sg_1_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 9)}
if (select == "sg_1_w11") {
sg_1_w11 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 11)}
if (select == "sg_1_w13") {
sg_1_w13 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 13)}
if (select == "sg_1_p2_w13") {
sg_1_p2_w13 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 2, w = 13)}
if (select == "sg_1_w15") {
sg_1_w15 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 15)}
if (select == "sg_1_w17") {
sg_1_w17 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 17)}
if (select == "sg_1_w19") {
sg_1_w19 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 19)}
# Implement window size of 21, corresponds to ICRAF standard;
# see e.g. Terhoeven-Urselmans et al. (2010)
if (select == "sg_1_w21") {
sg_1_w21 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 21)}
if (select == "sg_1_w23") {
sg_1_w23 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 23)}
if (select == "sg_1_w25") {
sg_1_w25 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 25)}
if (select == "sg_1_w27") {
sg_1_w27 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 27)}
if (select == "sg_1_w35") {
sg_1_w35 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 35)}
if (select == "sg_1_w41") {
sg_1_w41 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 41)}
if (select == "sg_1_w51") {
sg_1_w51 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 51)}
# Second derivative Savitzky-Golay
if (select == "sg_2_w11") {
sg_2_w11 <- prospectr::savitzkyGolay(X = spc_raw,
m = 2, p = 3, w = 11)}
if (select == "sg_2_w21") {
sg_2_w21 <- prospectr::savitzkyGolay(X = spc_raw,
m = 2, p = 3, w = 21)}
# Savitzky-Golay (order 0) smoothing and derivative with a window size of
# 21 points
if (select == "sg_0_1_w21") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_1_w21 <- prospectr::savitzkyGolay(X = sg_0_w9,
m = 1, p = 3, w = 21)}
# Savitzky-Golay second derivative
if (select == "sg_2_w5") {
sg_2_w5 <- prospectr::savitzkyGolay(X = spc_raw,
m = 2, p = 3, w = 5)}
if (select == "sg_2_w11") {
sg_2_w11 <- prospectr::savitzkyGolay(X = spc_raw,
m = 2, p = 3, w = 11)}
# Standard normal variate (SNV) ----------------------------------------------
# Calculate standard normal variate (SNV) after Savitzky-Golay smoothing
if (select == "sg_0_snv") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)}
if (select == "sg_1_snv") {
sg_1_snv <- prospectr::standardNormalVariate(sg_1_w5)}
if (select == "sg_1_p2_w13_snv") {
sg_1_p2_w13 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 2, w = 13)
sg_1_p2_w13_snv <- prospectr::standardNormalVariate(sg_1_p2_w13)}
# Standard normal variate (SNV) and first gap-segment derivative
if (select == "snv_gsd_m1_w11_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m1_w11_s1 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 11, s = 1)}
if (select == "snv_gsd_m1_w21_s5") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m1_w21_s5 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 21, s = 5)}
if (select == "snv_gsd_m1_w31_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m1_w31_s1 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 31, s = 5)}
if (select == "snv_gsd_m1_w31_s5") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m1_w31_s5 <- prospectr::gapDer(X = sg_0_snv, m = 1, w = 31, s = 5)}
# Standard normal variate (SNV) and second gap-segment derivative
if (select == "snv_gsd_m2_w5_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w5_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 5, s = 1)}
if (select == "snv_gsd_m2_w21_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w21_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 21, s = 1)}
if (select == "snv_gsd_m2_w31_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w31_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 31, s = 5)}
if (select == "snv_gsd_m2_w31_s5") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w31_s5 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 31, s = 1)}
if (select == "snv_gsd_m2_w51_s1") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w51_s1 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 51, s = 1)}
if (select == "snv_gsd_m2_w51_s5") {
sg_0_w9 <- prospectr::savitzkyGolay(X = spc_raw,
m = 0, p = 3, w = 9)
sg_0_snv <- prospectr::standardNormalVariate(sg_0_w9)
snv_gsd_m2_w51_s5 <- prospectr::gapDer(X = sg_0_snv, m = 2, w = 51, s = 5)}
# 1rst Gap-segement derivative
if (select == "gsd_m1_w5_s4") {
gsd_m1_w5_s4 <- prospectr::gapDer(X = spc_raw, m = 1, w = 5, s = 4)}
if (select == "gsd_m1_w11_s5") {
gsd_m1_w11_s5 <- prospectr::gapDer(X = spc_raw, m = 1, w = 11, s = 5)}
if (select == "gsd_m1_w11_s21") {
gsd_m1_w11_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 11, s = 21)}
if (select == "gsd_m1_w21_s1") {
gsd_m1_w21_s1 <- prospectr::gapDer(X = spc_raw, m = 1, w = 21, s = 1)}
if (select == "gsd_m1_w21_s21") {
gsd_m1_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 21, s = 21)}
if (select == "gsd_m1_w35_s21") {
gsd_m1_w35_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 35, s = 21)}
if (select == "gsd_m1_w5_s21") {
gsd_m1_w5_s21 <- prospectr::gapDer(X = spc_raw, m = 1, w = 5, s = 21)}
# 2nd Gap-segment derivative
if (select == "gsd_m2_w21_s21") {
gsd_m2_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 2, w = 21, s = 21)}
# 4th Gap-segment derivative
if (select == "gsd_m4_w21_s21") {
gsd_m4_w21_s21 <- prospectr::gapDer(X = spc_raw, m = 4, w = 21, s = 21)}
# Savitzky-Golay combined with multiple scatter correction (MSC --------------
# Savitzky-Golay with 3rd order polynomial, a window size of 21
# and first derivative + MSC
if (select == "sg_1_w21_msc") {
sg_1_w21 <- prospectr::savitzkyGolay(X = spc_raw,
m = 1, p = 3, w = 21)
# Use msc function from the pls package; use column means of X as reference
# spectrum
sg_1_w21_msc <- pls::msc(X = sg_1_w21, reference = NULL)
}
# Savitzky-Golay combined with multiple scatter correction (MSC --------------
# Savitzky-Golay with 4th order polynomial, a window size of 21
# and second derivative + MSC
if (select == "sg_2_w21_msc") {
sg_2_w21 <- prospectr::savitzkyGolay(X = spc_raw,
m = 2, p = 4, w = 21)
# Use msc function from the pls package; use column means of X as reference
# spectrum
sg_2_w21_msc <- pls::msc(X = sg_2_w21, reference = NULL)
}
# Continuum-removal ----------------------------------------------------------
if (select == "cr") {
cr <- prospectr::continuumRemoval(X = spc_raw,
wav = as.numeric(colnames(spc_raw)), type = "A")}
if (select == "cr_refl") {
cr_refl <- prospectr::continuumRemoval(X = spc_raw,
wav = as.numeric(colnames(spc_raw)), type = "R")}
# Select final preprocessing based on selection argument and
# save matrix in data.table
pre <- select
spc_pre <- data.table::as.data.table(get(pre))
# Convert preprocessed spectra in data.table to list of data.table spectra
# https://github.com/jennybc/row-oriented-workflows/blob/master/iterate-over-rows.md
spc_pre_list <- map(purrr::transpose(spc_pre), data.table::as.data.table)
# Convert x-values of preprocessed spectra in list of vectors
# prospectr only hands over new xunits in matrix colnames of type character
xvalues_pre_list <- lapply(spc_pre_list,
function(x) as.numeric(colnames(x)))
# Add list of preprocessed spectra and correspoding wavenumbers to tibble
spc_tbl_out <- tibble::add_column(spc_tbl,
spc_pre = spc_pre_list, xvalues_pre = xvalues_pre_list)
return(spc_tbl_out)
}