# Quick fix implementation of select_ref_samples using the tibble framework ---- # Perform sampling for selection of reference samples based on spectral PCA ---- #' @title Select a set of reference spectra to be measured by reference analysis #' methods #' @description Select a set of calibration spectra to develop spectral models. #' Samples in this list will be analyzed using laboratory reference methods. #' @param spc_tbl Spectra as tibble objects that contain preprocessed spectra #' @param ratio_ref Ratio of desired reference samples to total sample number #' @param pc Number of principal components (numeric). If pc < 1, the number #' of principal components kept corresponds to the number of components #' explaining at least (pc * 100) percent of the total variance. #' @param print logical expression whether a plot (ggplot2) of sample selection #' for reference analysis is shown in PCA space #' (\code{TRUE} or \code{FALSE}). #' @usage select_ref_spc(spc_tbl, ratio_ref, pc, print = TRUE) #' @export select_ref_spc <- function(spc_tbl, ratio_ref = 0.15, pc = 2, print = TRUE) { # Avoid `R CMD check` NOTE: `no visible binding for global variable ...` PC1 <- PC2 <- type <- NULL if (tibble::is_tibble(spc_tbl)) { # Slice based on sample_id if spectral data is in tibble class spc_tbl <- dplyr::group_by(spc_tbl, !!rlang::sym("sample_id")) %>% dplyr::slice(1L) # Bind list of data.tables in list-column spc_pre to one data table # containing spectral data spc_pre <- as.matrix(data.table::rbindlist(spc_tbl$spc_pre)) } # Perform Kennard-Stone calibration sampling --------------------------------- sel <- prospectr::kenStone(X = spc_pre, k = round(ratio_ref * nrow(spc_pre)), pc = pc) # Select spectra tibble of reference samples based on row indices spc_ref <- spc_tbl[sel$model, ] # Select spectra tibble of prediction samples based on row indices spc_pred <- spc_tbl[-sel$model, ] # Prepare data for ggplot graphs of reference and prediction sample PC score # plots (PC1 and PC2) -------------------------------------------------------- sel_df_ref <- data.frame(sel$pc[sel$model, 1:2]) sel_df_ref$type <- as.factor( rep("reference analysis", nrow(sel_df_ref)) ) sel_df_pred <- data.frame(sel$pc[-sel$model, 1:2]) # Create type column for visually differentiate reference and prediction # samples sel_df_pred$type <- as.factor( rep("model prediction", nrow(sel_df_pred))) # Bind rows of reference and prediction PC scores data frames sel_df <- rbind(sel_df_ref, sel_df_pred) # Compute ratio needed to make the figure square ratio <- with(sel_df, diff(range(PC1)) / diff(range(PC2))) # Create spectra PC score plots ---------------------------------------------- p_pca <- ggplot2::ggplot(data = sel_df) + ggplot2::geom_point( ggplot2::aes(x = PC1, y = PC2, shape = type), size = 4) + ggplot2::coord_fixed(ratio = 1) + ggplot2::scale_shape_manual(values=c(19, 1)) + ggplot2::scale_colour_manual(values=c("black", "red")) + ggplot2::theme_bw() + ggplot2::theme(legend.title = ggplot2::element_blank()) # Print reference and prediction samples in PC1 and PC2 if (print == TRUE) { p_pca } # Return spectral tibbles for reference spectra (spc_ref), # prediction spectra (spc_pr) and ggplot object of score plots (p_pca) list( spc_ref = spc_ref, spc_pred = spc_pred, p_pca = p_pca, pc_scores = sel$pc ) }