diff --git a/R/plot_spc.R b/R/plot_spc.R index dfe3030..b5b5afa 100644 --- a/R/plot_spc.R +++ b/R/plot_spc.R @@ -4,33 +4,33 @@ #' list-column #' @param spc_tbl_2 Tibble that contains the second set of spectra (optional) #' to plot as list-column. -#' @param x_unit Character describing the x axis unit. Default is +#' @param x_unit Character string describing the x axis unit. Default is #' \code{"wavenumber"}, which will produce a graph with wavenumbers on the #' x axis with reversed number. If \code{x_unit = "wavelength"}, the axis #' will be in regular order (lower wavelengths in nm on the left and higher #' on the right side of the axis). -#' @param y Character vector of list-column name in tibble where spectra of +#' @param y Character string of list-column name in tibble where spectra of #' desired type are extracted to plot. -#' @param by Character vector of column that is used to group the spectra. +#' @param by Character string of column that is used to group the spectra. #' Default is \code{"unique_id"}. If replica spectra are present in the file #' and processed spectra resulting after averaging need to be plotted, #' it is recommend to use \code{"sample_id"} as argument to group according #' the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl} #' and \code{spc_tbl_2}). -#' @param graph_id_1 Character used for grouping the first spectra set +#' @param graph_id_1 Character string used for grouping the first spectra set #' (\code{spc_tbl}) and producing #' the label text accordingly. Default is \code{"Set 1"}. -#' @param graph_id_2 Character used for grouping the second spectra set +#' @param graph_id_2 Character string used for grouping the second spectra set #' (\code{spc_tbl_2}) and producing the label text accordingly. Default is #' \code{"Set 2"} -#' @param graph_id_1_col Character for the colour of the first spectra set. -#' Default is \code{"black"}. -#' @param graph_id_2_col Character for the colour of the first spectra set. -#' Default is \code{"red"}. -#' @param xlab Character vector or mathematical expression +#' @param graph_id_1_col Character string for the colour of the first spectra +#' set. Default is \code{"black"}. +#' @param graph_id_2_col Character string for the colour of the first spectra +#' set. Default is \code{"red"}. +#' @param xlab Character string or mathematical expression #' (use \code{expression}) for the x axis title. Default is #' \code{expression(paste("Wavenumber [", cm^-1, "]"))}. -#' @param ylab Character vector or mathematical expression +#' @param ylab Character string or mathematical expression #' (use \code{expression}) for the y axis title. Default is \code{"absorbance"}. #' @param slice Logical whether to slice the data sets (select rows by position). #' Default is \code{TRUE}. diff --git a/R/pls-modeling.R b/R/pls-modeling.R index 1574b33..a923bcc 100644 --- a/R/pls-modeling.R +++ b/R/pls-modeling.R @@ -621,7 +621,7 @@ evaluate_model_q <- function(x, model, response, #' reference as list-columns. The tibble to be supplied to \code{spec_chem} can #' be generated by the `join_chem_spc() function` #' @param response Response variable as symbol or name -#' (without quotes, no character vector). The provided response symbol needs to be +#' (without quotes, no character string). The provided response symbol needs to be #' a column name in the \code{spec_chem} tibble. #' @param variable Depreciated and replaced by `response` #' @param center Logical whether to perform mean centering of each spectrum column @@ -630,7 +630,7 @@ evaluate_model_q <- function(x, model, response, #' @param scale Logical whether to perform standard deviation scaling #' of each spectrum column (e.g. wavenumber or wavelength) after common #' spectrum preprocessing. Default is \code{scale = TRUE} -#' @param evaluation_method Character vector stating evaluation method. +#' @param evaluation_method Character string stating evaluation method. #' Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"} #' will split the data into a calibration (training) and validation (test) set, #' and evaluate the final model by predicting on the validation set. @@ -810,10 +810,10 @@ pls_ken_stone <- fit_pls #' reference as list-columns. The tibble to be supplied to \code{spec_chem} can #' be generated by the `join_chem_spc() function` #' @param response Response variable as symbol or name -#' (without quotes, no character vector). The provided response symbol needs to be +#' (without quotes, no character string). The provided response symbol needs to be #' a column name in the \code{spec_chem} tibble. #' @param variable Depreciated and replaced by `response` -#' @param evaluation_method Character vector stating evaluation method. +#' @param evaluation_method Character string stating evaluation method. #' Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"} #' will split the data into a calibration (training) and validation (test) set, #' and evaluate the final model by predicting on the validation set. diff --git a/R/pls-vip.R b/R/pls-vip.R index 0babdeb..15d5628 100644 --- a/R/pls-vip.R +++ b/R/pls-vip.R @@ -2,7 +2,7 @@ ### `pls' package. ### $Id: VIP.R,v 1.2 2007/07/30 09:17:36 bhm Exp $ -### Copyright ? 2006,2007 Björn-Helge Mevik +### Copyright: 2006,2007 Bjoern-Helge Mevik ### This program is free software; you can redistribute it and/or modify ### it under the terms of the GNU General Public License version 2 as ### published by the Free Software Foundation. @@ -16,9 +16,9 @@ ### http://www.gnu.org/licenses/gpl-2.0.txt ### Contact info: -### Bj?rn-Helge Mevik +### Boejrn-Helge Mevik ### bhx6@mevik.net -### R?dtvetvien 20 +### Roedtvetvien 20 ### N-0955 Oslo ### Norway @@ -60,7 +60,18 @@ VIPjh <- function(object, j, h) { sqrt(nrow(W) * sum(SS * W[j,]^2 / Wnorm2) / sum(SS)) } - +#' @title Extract VIPs (variable importance in the projection) for a PLS +#' regression model output returned from model fitting with +#' \code{simplerspec::fit_pls()} +#' @description VIPs are extracted based on the \code{finalModel} sublist +#' in the \code{caret::train} output contained in the \code{model} element +#' of the \code{simplerspec::fit_pls()} model output list. The VIPs for +#' derived number of PLS components in the \code{finalModel} are computed. +#' @param mout Model output list returned from \code{simplerspec::fit_pls()}. +#' @usage extract_pls_vip(mout) +#' @return A tibble data frame with columns \code{wavenumber} and correponding +#' VIP values in the column \code{vip} for the finally chosen PLS regression +#' model at the final number of PLS components. #' @export extract_pls_vip <- function(mout) { # Compute VIP for all wavenumbers and select only VIPs with ncomp in final @@ -95,10 +106,43 @@ create_vip_rects <- function(df_vip) { } #' @title Plot stacked ggplot2 graphs with the Variable Importance for the -#' Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id, and the preprocessed spectra. +#' Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id, +#'and the preprocessed spectra. #' @description Plot stacked ggplot2 graphs of VIP for the final -#' PLS regression model of the calibration (training) data set for the final -#' number of components, raw (replicate mean) spectra, and preprocessed spectra. +#' PLS regression model output of the calibration (training) data set for the +#' final number of components, raw (replicate mean) spectra, and preprocessed +#' spectra. Regions with VIP > 1 are highlighted across the stacked graphs +#' in beige colour rectangles. VIP calculation is implemented as described in +#' Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection +#' methods when multicollinearity is present. Chemometrics and Intelligent +#' Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011 +#' @param mout Model output list that is returned from +#' \code{simplerspec::fit_pls()}. This object contains a nested list with +#' the \code{caret::train()} object (class \code{train}), based on which +#' VIPs at finally selected number of PLS components are computed. +#' @param y1 Character vector of list-column name in +#' \code{mout$data$calibration}, where spectra for bottom graph are extracted. +#' Default is \code{"spc_mean"}, which plots the mean calibration spectra after +#' resampling. +#' @param y2 Character string of list-column name in +#' \code{mout$data$calibration}, where spectra for bottom graph are extracted. +#' Default is \code{"spc_pre"}, which plots the preprocessed calibration +#' spectra after resampling. +#' @param by Character string that is used to assign spectra to the same group +#' and therefore ensures that all spectra are plotted with the same colour. +#' Default is \code{"sample_id"} +#' @param xlab Character string of X axis title for shared x axis of stacked +#' graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))} +#' @param ylab1 Y axis title of bottom spectrum. Default is \code{"Absorbance"}. +#' @param ylab2 Y axis title of bottom spectrum. Default is +#' \code{"Preprocessed Abs."}. +#' @param alpha Double between 0 and 1 that defines transparency of spectra +#' lines in returned graph (ggplot plot object). +#' @usage plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre", +#' by = "sample_id", +#' xlab = expression(paste("Wavenumber [", cm^-1, "]")), +#' ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.", +#' alpha = 0.2) #' @export plot_pls_vip <- function(mout, y1 = "spc_mean", y2 = "spc_pre", by = "sample_id", diff --git a/man/average_spc.Rd b/man/average_spc.Rd index 1651c30..577ff2c 100644 --- a/man/average_spc.Rd +++ b/man/average_spc.Rd @@ -6,6 +6,9 @@ \usage{ average_spc(spc_tbl) } +\arguments{ +\item{spc_tbl}{Spectra after resampling spectra with \code{resample_spc()}} +} \description{ Averages spectra in tibble column by sample_id after resampling spectra by \code{simplerspec::resample_spc()}. diff --git a/man/extract_pls_vip.Rd b/man/extract_pls_vip.Rd new file mode 100644 index 0000000..bab31f3 --- /dev/null +++ b/man/extract_pls_vip.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pls-vip.R +\name{extract_pls_vip} +\alias{extract_pls_vip} +\title{Extract VIPs (variable importance in the projection) for a PLS +regression model output returned from model fitting with +\code{simplerspec::fit_pls()}} +\usage{ +extract_pls_vip(mout) +} +\arguments{ +\item{mout}{Model output list returned from \code{simplerspec::fit_pls()}.} +} +\value{ +A tibble data frame with columns \code{wavenumber} and correponding +VIP values in the column \code{vip} for the finally chosen PLS regression +model at the final number of PLS components. +} +\description{ +VIPs are extracted based on the \code{finalModel} sublist +in the \code{caret::train} output contained in the \code{model} element +of the \code{simplerspec::fit_pls()} model output list. The VIPs for +derived number of PLS components in the \code{finalModel} are computed. +} diff --git a/man/fit_pls.Rd b/man/fit_pls.Rd index edfef47..0008a7f 100644 --- a/man/fit_pls.Rd +++ b/man/fit_pls.Rd @@ -25,7 +25,7 @@ reference as list-columns. The tibble to be supplied to \code{spec_chem} can be generated by the `join_chem_spc() function`} \item{response}{Response variable as symbol or name -(without quotes, no character vector). The provided response symbol needs to be +(without quotes, no character string). The provided response symbol needs to be a column name in the \code{spec_chem} tibble.} \item{variable}{Depreciated and replaced by `response`} @@ -38,7 +38,7 @@ a column name in the \code{spec_chem} tibble.} of each spectrum column (e.g. wavenumber or wavelength) after common spectrum preprocessing. Default is \code{scale = TRUE}} -\item{evaluation_method}{Character vector stating evaluation method. +\item{evaluation_method}{Character string stating evaluation method. Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"} will split the data into a calibration (training) and validation (test) set, and evaluate the final model by predicting on the validation set. diff --git a/man/fit_rf.Rd b/man/fit_rf.Rd index 575178c..65c88a3 100644 --- a/man/fit_rf.Rd +++ b/man/fit_rf.Rd @@ -16,12 +16,12 @@ reference as list-columns. The tibble to be supplied to \code{spec_chem} can be generated by the `join_chem_spc() function`} \item{response}{Response variable as symbol or name -(without quotes, no character vector). The provided response symbol needs to be +(without quotes, no character string). The provided response symbol needs to be a column name in the \code{spec_chem} tibble.} \item{variable}{Depreciated and replaced by `response`} -\item{evaluation_method}{Character vector stating evaluation method. +\item{evaluation_method}{Character string stating evaluation method. Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"} will split the data into a calibration (training) and validation (test) set, and evaluate the final model by predicting on the validation set. diff --git a/man/plot_pls_vip.Rd b/man/plot_pls_vip.Rd index f918c7a..8ac16ae 100644 --- a/man/plot_pls_vip.Rd +++ b/man/plot_pls_vip.Rd @@ -3,14 +3,53 @@ \name{plot_pls_vip} \alias{plot_pls_vip} \title{Plot stacked ggplot2 graphs with the Variable Importance for the -Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id, and the preprocessed spectra.} +Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id, +and the preprocessed spectra.} \usage{ -plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre", by = "sample_id", +plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre", + by = "sample_id", xlab = expression(paste("Wavenumber [", cm^-1, "]")), - ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.", alpha = 0.2) + ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.", + alpha = 0.2) +} +\arguments{ +\item{mout}{Model output list that is returned from +\code{simplerspec::fit_pls()}. This object contains a nested list with +the \code{caret::train()} object (class \code{train}), based on which +VIPs at finally selected number of PLS components are computed.} + +\item{y1}{Character vector of list-column name in +\code{mout$data$calibration}, where spectra for bottom graph are extracted. +Default is \code{"spc_mean"}, which plots the mean calibration spectra after +resampling.} + +\item{y2}{Character string of list-column name in +\code{mout$data$calibration}, where spectra for bottom graph are extracted. +Default is \code{"spc_pre"}, which plots the preprocessed calibration +spectra after resampling.} + +\item{by}{Character string that is used to assign spectra to the same group +and therefore ensures that all spectra are plotted with the same colour. +Default is \code{"sample_id"}} + +\item{xlab}{Character string of X axis title for shared x axis of stacked +graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}} + +\item{ylab1}{Y axis title of bottom spectrum. Default is \code{"Absorbance"}.} + +\item{ylab2}{Y axis title of bottom spectrum. Default is +\code{"Preprocessed Abs."}.} + +\item{alpha}{Double between 0 and 1 that defines transparency of spectra +lines in returned graph (ggplot plot object).} } \description{ Plot stacked ggplot2 graphs of VIP for the final -PLS regression model of the calibration (training) data set for the final -number of components, raw (replicate mean) spectra, and preprocessed spectra. +PLS regression model output of the calibration (training) data set for the +final number of components, raw (replicate mean) spectra, and preprocessed +spectra. Regions with VIP > 1 are highlighted across the stacked graphs +in beige colour rectangles. VIP calculation is implemented as described in +Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection +methods when multicollinearity is present. Chemometrics and Intelligent +Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011 } diff --git a/man/plot_spc.Rd b/man/plot_spc.Rd index a5afec1..a31b673 100644 --- a/man/plot_spc.Rd +++ b/man/plot_spc.Rd @@ -20,41 +20,41 @@ list-column} \item{spc_tbl_2}{Tibble that contains the second set of spectra (optional) to plot as list-column.} -\item{x_unit}{Character describing the x axis unit. Default is +\item{x_unit}{Character string describing the x axis unit. Default is \code{"wavenumber"}, which will produce a graph with wavenumbers on the x axis with reversed number. If \code{x_unit = "wavelength"}, the axis will be in regular order (lower wavelengths in nm on the left and higher on the right side of the axis).} -\item{y}{Character vector of list-column name in tibble where spectra of +\item{y}{Character string of list-column name in tibble where spectra of desired type are extracted to plot.} -\item{by}{Character vector of column that is used to group the spectra. +\item{by}{Character string of column that is used to group the spectra. Default is \code{"unique_id"}. If replica spectra are present in the file and processed spectra resulting after averaging need to be plotted, it is recommend to use \code{"sample_id"} as argument to group according the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl} and \code{spc_tbl_2}).} -\item{graph_id_1}{Character used for grouping the first spectra set +\item{graph_id_1}{Character string used for grouping the first spectra set (\code{spc_tbl}) and producing the label text accordingly. Default is \code{"Set 1"}.} -\item{graph_id_2}{Character used for grouping the second spectra set +\item{graph_id_2}{Character string used for grouping the second spectra set (\code{spc_tbl_2}) and producing the label text accordingly. Default is \code{"Set 2"}} -\item{graph_id_1_col}{Character for the colour of the first spectra set. -Default is \code{"black"}.} +\item{graph_id_1_col}{Character string for the colour of the first spectra +set. Default is \code{"black"}.} -\item{graph_id_2_col}{Character for the colour of the first spectra set. -Default is \code{"red"}.} +\item{graph_id_2_col}{Character string for the colour of the first spectra +set. Default is \code{"red"}.} -\item{xlab}{Character vector or mathematical expression +\item{xlab}{Character string or mathematical expression (use \code{expression}) for the x axis title. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}.} -\item{ylab}{Character vector or mathematical expression +\item{ylab}{Character string or mathematical expression (use \code{expression}) for the y axis title. Default is \code{"absorbance"}.} \item{slice}{Logical whether to slice the data sets (select rows by position). diff --git a/man/select_ref_spc.Rd b/man/select_ref_spc.Rd index 04cd64f..28702d8 100644 --- a/man/select_ref_spc.Rd +++ b/man/select_ref_spc.Rd @@ -17,10 +17,7 @@ of principal components kept corresponds to the number of components explaining at least (pc * 100) percent of the total variance.} \item{print}{logical expression whether a plot (ggplot2) of sample selection -for reference analysis is shown in PCA space} - -\item{validation}{Logical expression whether -calibration sampling is performed +for reference analysis is shown in PCA space (\code{TRUE} or \code{FALSE}).} } \description{