Philipp Baumann
4 years ago
2 changed files with 266 additions and 145 deletions
@ -1,111 +1,185 @@ |
|||
#' @title Resample spectra from spectral data object (tibble) |
|||
#' @description Resamples spectra stored in tibble column after |
|||
#' gathering spectra by \code{simplerspec::gather_spc()}. |
|||
#' @param spc_tbl Spectra data in a tibble object (classes "tibble_df", "tbl" |
|||
#' and "data.frame"). The spectra tibble is expected to contain at least |
|||
#' the columns 'spc' (list-column with spectral matrices stored in a list) and |
|||
#' `wavenumbers` or `wavelengths` (list-column that contains list of x-axis values |
|||
#' corresponding to each spectrum in `spc` as wavenumber or wavelength in |
|||
#' numeric values). |
|||
#' @param x_unit Character vector of the x-axis unit of the spectra in |
|||
#' within \code{spc_tbl}. Default is \code{"wavenumber"}. A further possible |
|||
#' argument is \code{"wavelength"}, which is usually the unit reported for |
|||
#' VIS/NIR spectrometers such as ASD field spectrometers. |
|||
#' @param wn_lower Numeric (integer or float) value of lowest wavenumber. |
|||
#' This argument will only be used if \code{x_unit = "wavenumber"}. The value |
|||
#' will be used as starting value when creating the new wavenumber sequence |
|||
#' that the spectra will be resampled upon. Default value is 510. |
|||
#' @param wn_upper Numeric (integer or float) value of highest wavenumber. |
|||
#' This argument will only be used if \code{x_unit = "wavenumber"}. The value |
|||
#' will be used as last value when creating the new wavenumber sequence |
|||
#' that the spectra will be resampled upon. Default value is 3988. |
|||
#' @param wn_interval Numeric value (integer or float) value of the wavenumber |
|||
#' increment when creating the new wavenumber sequence that the spectra will be |
|||
#' resampled upon. Default value is 2. |
|||
#' @param wl_lower Numeric (integer or float) value of lowest wavelength. |
|||
#' This argument will only be used if \code{x_unit = "wavelength"}. The value |
|||
#' will be used as starting value when creating the new wavenumber sequence |
|||
#' that the spectra will be resampled upon. Default value is 350. |
|||
#' @param wl_upper Numeric (integer or float) value of highest wavelength. |
|||
#' This argument will only be used if \code{x_unit = "wavelength"}. The value |
|||
#' will be used as last value when creating the new wavenumber sequence |
|||
#' that the spectra will be resampled upon. Default value is 2500. |
|||
#' @param wl_interval Numeric value (integer or float) value of the wavelength |
|||
#' increment when creating the new wavenumber sequence that the spectra will be |
|||
#' resampled upon. This argument will only be used if \code{x_unit = "wavelength"}. |
|||
#' Default value is 1. |
|||
#' @return A spectra tibble containing the following additional columns |
|||
#' added to the input spectra tibble (`spc_tbl`): \code{wavenumbers_rs} or |
|||
#' \code{wavelengths_rs} as list-columns containing the resampled wavenumbers |
|||
#' or wavelengths, \code{spc_rs} list-column of resampled spectra returned |
|||
#' as list of data.tables (class "data.table" and "data.frame"). |
|||
#' @title Resample spectra to new x-axis interval |
|||
#' @description Resamples (interpolates) different spectra types with |
|||
#' corresponding x-axis values that are both stored in list-columns of a spectra |
|||
#' tibble. A spectra tibble hosts spectra, x-unit vectors, metadata, and |
|||
#' further linked data with standardized naming conventions. Data input for |
|||
#' resampling can for example be generated with `simplerspec::gather_spc()`. |
|||
#' Resampling is a key harmonizing step to process and later model spectra |
|||
#' measured at different resolutions and spectral ranges (i.e., different |
|||
#' spectrometer devices and/or measurement settings). |
|||
#' @param spc_tbl Spectra data embedded in a tibble object (classes |
|||
#' `"tbl_df", "tbl", "data.frame"`). The spectra tibble needs to contain at |
|||
#' least of one of the the spectra columns `spc`, `spc_rs`, `spc_mean`, |
|||
#' `spc_nocomp`, `sc_sm`, `sc_rf`, or `spc_pre` (list-colums with spectral |
|||
#' `data.table`s), and `wavenumbers` or `wavelengths` (list-column with vectors |
|||
#' of x-axis values corresponding to each spectrum). The section *"Matching |
|||
#' spectrum type and corresponding x-axis type"* describes the spectra types |
|||
#' and corresponding x-axis types. |
|||
#' @param column_in Character vector of length 1L or symbol/name |
|||
#' specifying the name of list-column that contains the spectra to be resampled. |
|||
#' @param x_unit Character vector of length 1L specifying the measurement unit |
|||
#' of the x-axis values (list-column) of the input spectra in `spc_tbl`. |
|||
#' Possible values are `"wavenumber"` (default) or `"wavelength"`. Wavenumber |
|||
#' is a convenient unit of frequency in the mid-infrared spectral range, |
|||
#' where wavelength is often used as spatial period for the visible and |
|||
#' near-infrared range. |
|||
#' @param wn_lower Numeric value of lowest wavenumber. This argument will only |
|||
#' be used if `x_unit = "wavenumber"`. The value serves as starting value for |
|||
#' the new wavenumber sequence that the spectra will be resampled upon. Default |
|||
#' value is 500 (i.e., in reciprocal centimeters). |
|||
#' @param wn_upper Numeric value of highest wavenumber. This argument will only |
|||
#' be used if `x_unit = "wavenumber`. The value will be used as last value of |
|||
#' the new wavenumber sequence that the spectra will be resampled upon. Default |
|||
#' value is 4000 (i.e., in reciprocal centimeters). |
|||
#' @param wn_interval Numeric value of the wavenumber increment for the new |
|||
#' wavenumber sequence that the spectra will be resampled upon. Default value |
|||
#' is 2 (i.e., in reciprocal centimeters). |
|||
#' @param wl_lower Numeric value of lowest wavelength. This argument will only |
|||
#' be used if `x_unit = "wavelength"`. The value serves as starting value of |
|||
#' the new wavenumber sequence that the spectra will be resampled upon. |
|||
#' Default value is 350 (i.e. in nanometers). |
|||
#' @param wl_upper Numeric value of highest wavelength. This argument will only |
|||
#' be used if `x_unit = "wavelength"`. The value will be used as last value of |
|||
#' the new wavenumber sequence that the spectra will be resampled upon. Default |
|||
#' value is 2500 (i.e., in nanometers). |
|||
#' @param wl_interval Numeric value of the wavelength increment for the new |
|||
#' wavenumber sequence that the spectra will be resampled upon. This argument |
|||
#' will only be used if `x_unit = "wavelength"`. Default value is 1 (i.e., in |
|||
#' nanometers). |
|||
#' @param interpol_method Character of `"linear"` (default) or `"spline"` with |
|||
#' the interpolation method. `"spline"` uses a cubic spline to interpolate the |
|||
#' input spectra at given x-axis values to new equispaced x-axis intervals. |
|||
#' @return A spectra tibble (`spc_tbl`) containing two added list-columns: |
|||
#' * `spc_rs:` Resampled spectra as list of `data.table`s |
|||
#' * `wavenumbers_rs` or `wavelengths_rs`: Resampled x-axis values as list of |
|||
#' numeric vectors |
|||
#' @section Matching spectrum type and corresponding x-axis type: |
|||
#' The combinations of input spectrum types (`column_in`) and |
|||
#' corresponding x-axis types are generated from a simple lookup list. The |
|||
#' following key-value(s) pairs can be matched at given key, which is the column |
|||
#' name from `column_in` containing the spectra. |
|||
#' * `"spc"` : `"wavenumbers"` or `"wavelengths"` (raw spectra) |
|||
#' * `"spc_rs"` : `"wavenumbers_rs"` or `"wavelengths_rs"`) (resampled spectra) |
|||
#' * `"spc_mean"` : `"wavenumbers_rs"` or `"wavelengths_rs"` (mean spectra) |
|||
#' * `"spc_nocomp"` `"wavenumbers"` or `"wavelengths"` (spectra prior |
|||
#' atmospheric compensation) |
|||
#' * `"sc_sm" : c("wavenumbers_sc_sm", "wavelengths_sc_sm")` (single channel |
|||
#' sample spectra) |
|||
#' * `"sc_rf" : c("wavenumbers_sc_rf", "wavelengths_sc_rf")` (single channel |
|||
#' reference spectra) |
|||
#' * `"spc_pre" : "xvalues_pre"` (preprocessed spectra) |
|||
#' @export |
|||
resample_spc <- function(spc_tbl, x_unit = "wavenumber", |
|||
wn_lower = 510, wn_upper = 3988, wn_interval = 2, |
|||
wl_lower = 350, wl_upper = 2500, wl_interval = 1) { |
|||
resample_spc <- function(spc_tbl, |
|||
column_in = "spc", |
|||
x_unit = c("wavenumber", "wavelength"), |
|||
wn_lower = 500, wn_upper = 4000, wn_interval = 2, |
|||
wl_lower = 350, wl_upper = 2500, wl_interval = 1, |
|||
interpol_method = c("linear", "spline")) { |
|||
# Capture user input as expressions (can be both of type character or symbol), |
|||
# also called quoting; convert quosures to characters for later arg matching |
|||
column_in <- rlang::enquo(column_in) |
|||
column_in_chr <- rlang::quo_name(column_in) |
|||
|
|||
if (x_unit == "wavenumber" && "wavenumbers" %in% names(spc_tbl)) { |
|||
# Create sequence of new wavenumbers |
|||
wn_seq <- rev(seq(from = wn_lower, wn_upper, by = wn_interval)) |
|||
stopifnot( |
|||
is.character(x_unit) && length(x_unit) > 0, |
|||
is.numeric(wn_lower), is.numeric(wn_upper), is.numeric(wn_interval), |
|||
is.numeric(wl_lower), is.numeric(wl_upper), is.numeric(wl_interval) |
|||
) |
|||
|
|||
# Collect sequence of wavenumbers in list |
|||
wavenumbers_rs <- rep(list(wn_seq), nrow(spc_tbl)) |
|||
names(wavenumbers_rs) <- names(spc_tbl$spc) |
|||
# Lookup list to match spectrum types and corresponding x-axis types |
|||
spc_xaxis_types <- list( |
|||
"spc" = c("wavenumbers", "wavelengths"), # raw/unprocessed |
|||
"spc_rs" = c("wavenumbers_rs", "wavelengths_rs"), # resampled |
|||
"spc_mean" = c("wavenumbers_rs", "wavelengths_rs"), # mean |
|||
"spc_nocomp" = c("wavenumbers", "wavelengths"), # no atm. compensation |
|||
"sc_sm" = c("wavenumbers_sc_sm", "wavelengths_sc_sm"), # single channel sample |
|||
"sc_rf" = c("wavenumbers_sc_rf", "wavelengths_sc_rf"), # single channel reference |
|||
"spc_pre" = rep("xvalues_pre", 2) # preprocessed |
|||
) |
|||
spctypes <- names(spc_xaxis_types) |
|||
column_spc <- match.arg(column_in_chr, spctypes) |
|||
|
|||
# Resample all spectra in list column spc using prospectr |
|||
spc_rs <- lapply(seq_along(spc_tbl$spc), function(i) { |
|||
data.table::data.table( |
|||
prospectr::resample( |
|||
X = spc_tbl$spc[[i]], # spectral matrix to resample |
|||
wav = spc_tbl$wavenumbers[[i]], # old wavenumbers |
|||
new.wav = wn_seq # new wavenumbers |
|||
) |
|||
) |
|||
} |
|||
) |
|||
names(spc_rs) <- names(spc_tbl$spc) |
|||
x_unit <- match.arg(x_unit) |
|||
switch(x_unit, |
|||
wavenumber = {x_unit_int <- 1L}, |
|||
wavelength = {x_unit_int <- 2L}) |
|||
|
|||
# Add list of resampled spectra matrices to tibble spc_tbl |
|||
spc_tbl %>% tibble::add_column( |
|||
spc_rs = spc_rs, |
|||
wavenumbers_rs = wavenumbers_rs |
|||
) |
|||
interpol_method <- match.arg(interpol_method) |
|||
|
|||
# Final selection of `x_unit` column name string from user input and lookup |
|||
x_unit_sel <- spc_xaxis_types[[column_spc]][x_unit_int] |
|||
|
|||
# Both columns with X-values and input spectra need to be present in `spc_tbl` |
|||
colnm <- colnames(spc_tbl) |
|||
stopifnot(x_unit_sel %in% colnm, column_spc %in% colnm) |
|||
|
|||
} else if (x_unit == "wavelength" && "wavelengths" %in% names(spc_tbl)) { |
|||
# Create sequence of new wavelengths |
|||
wl_seq <- seq(from = wl_lower, wl_upper, by = wl_interval) |
|||
# Convert from wavelength (in nm) to wavenumbers (in cm^-1) |
|||
wn_seq <- 10000000 / wl_seq |
|||
# Extract list-column containing spectra |
|||
spc_in_list <- dplyr::pull(spc_tbl, !!column_in) |
|||
|
|||
# Collect sequence of wavelengths and wavenumbers in list |
|||
wavelengths_rs <- rep(list(wl_seq), nrow(spc_tbl)) |
|||
wavenumbers_rs <- rep(list(wn_seq), nrow(spc_tbl)) |
|||
names(wavelengths_rs) <- names(spc_tbl$spc) |
|||
names(wavenumbers_rs) <- names(spc_tbl$spc) |
|||
# Extract list-column containing x-axis values |
|||
xvalues_in_list <- dplyr::pull(spc_tbl, !!x_unit_sel) |
|||
|
|||
# Resample all spectra in list column spc using prospectr |
|||
spc_rs <- lapply(seq_along(spc_tbl$spc), function(i) { |
|||
# Automatically check the arrangement of the input x-Unit values; |
|||
# often, it is convenient to have have a descending ordner of spectral columns |
|||
# if the physical quantity of the x-axis is wavenumbers |
|||
xvalue_order_chr <- purrr::map_chr(xvalues_in_list, seq_order) |
|||
|
|||
if (length(unique(xvalue_order_chr)) > 1L) { |
|||
stop( |
|||
glue::glue( |
|||
"The column `{x_unit_sel}` which contains the list of X-values |
|||
has both elements of ascending and descending order. |
|||
* To resolve, you can split `spc_tbl` in a list of `spc_tbl`s |
|||
with identical X-value vectors based on `group_by_col_hash()`, |
|||
and apply `resample_spc()` separately to each list element. |
|||
* Alternatively, you could fix the order of x-axis values |
|||
for all input spectra and X-value vectors to all ascending or |
|||
descending"), |
|||
call. = FALSE) |
|||
} |
|||
xvalue_order <- xvalue_order_chr[1L] |
|||
|
|||
# Generate sequence of new x-axis values |
|||
switch(x_unit_int, |
|||
`1L` = { |
|||
xvalues_out <- seq(from = wn_lower, to = wn_upper, by = wn_interval) |
|||
x_unit_type_rs <- "wavenumbers_rs" |
|||
}, |
|||
`2L` = { |
|||
xvalues_out <- seq(from = wl_lower, to = wl_upper, by = wl_interval) |
|||
x_unit_type_rs <- "wavelengths_rs" |
|||
}) |
|||
|
|||
if (xvalue_order == "descending") xvalues_out <- rev(xvalues_out) |
|||
|
|||
# Repeat sequence of new (resampled) x-axis values in list (for every obs.) |
|||
xvalues_out_list <- rep(list(xvalues_out), nrow(spc_tbl)) |
|||
names(xvalues_out_list) <- names(spc_in_list) |
|||
|
|||
# Resample all spectra extracted from list-column `column_in` using prospectr |
|||
spc_rs <- lapply( |
|||
seq_along(spc_in_list), |
|||
function(i) { |
|||
data.table::data.table( |
|||
prospectr::resample( |
|||
X = spc_tbl$spc[[i]], # spectral matrix to resample |
|||
wav = spc_tbl$wavelengths[[i]], # old wavenumbers |
|||
new.wav = wl_seq # new wavenumbers |
|||
X = spc_in_list[[i]], # spectral data.table to resample |
|||
wav = xvalues_in_list[[i]], # old x-values vector |
|||
new.wav = xvalues_out_list[[i]], # new x-values vector |
|||
interpol = interpol_method |
|||
) |
|||
) |
|||
} |
|||
) |
|||
names(spc_rs) <- names(spc_tbl$spc) |
|||
) |
|||
names(spc_rs) <- names(spc_in_list) |
|||
|
|||
# Add list of resampled spectra matrices to tibble spc_tbl |
|||
spc_tbl %>% tibble::add_column( |
|||
spc_tbl_out <- |
|||
spc_tbl %>% |
|||
tibble::add_column( |
|||
spc_rs = spc_rs, |
|||
wavelengths_rs = wavelengths_rs, |
|||
wavenumbers_rs = wavenumbers_rs |
|||
!!x_unit_type_rs := xvalues_out_list |
|||
) |
|||
} else { |
|||
stop("Either columns 'wavenumbers' and 'wavelengths' are missing in the |
|||
spectra tibble <spc_tbl> or argument x_unit has not been set to |
|||
'wavenumber' or 'wavelength.") |
|||
} |
|||
|
|||
return(spc_tbl_out) |
|||
} |
|||
|
|||
# Helper |
|||
seq_order <- function(x) ifelse(x[1L] < x[length(x)], "ascending", "descending") |
|||
|
Loading…
Reference in new issue