Update documentation, first final draft

7 years ago · 15a2d8b197
10 changed files with 153 additions and 46 deletions
--- a/R/plot_spc.R
+++ b/R/plot_spc.R
@ -4,33 +4,33 @@
 #' list-column
 #' @param spc_tbl_2 Tibble that contains the second set of spectra (optional)
 #' to plot as list-column.
-#' @param x_unit Character describing the x axis unit. Default is
+#' @param x_unit Character string describing the x axis unit. Default is
 #'  \code{"wavenumber"}, which will produce a graph with wavenumbers on the
 #'  x axis with reversed number. If \code{x_unit = "wavelength"}, the axis
 #'  will be in regular order (lower wavelengths in nm on the left and higher
 #'  on the right side of the axis).
-#' @param y Character vector of list-column name in tibble where spectra of
+#' @param y Character string of list-column name in tibble where spectra of
 #' desired type are extracted to plot.
-#' @param by Character vector of column that is used to group the spectra.
+#' @param by Character string of column that is used to group the spectra.
 #' Default is \code{"unique_id"}. If replica spectra are present in the file
 #' and processed spectra resulting after averaging need to be plotted,
 #' it is recommend to use \code{"sample_id"} as argument to group according
 #' the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl}
 #' and \code{spc_tbl_2}).
-#' @param graph_id_1 Character used for grouping the first spectra set
+#' @param graph_id_1 Character string used for grouping the first spectra set
 #' (\code{spc_tbl}) and producing
 #' the label text accordingly. Default is \code{"Set 1"}.
-#' @param graph_id_2 Character used for grouping the second spectra set
+#' @param graph_id_2 Character string used for grouping the second spectra set
 #' (\code{spc_tbl_2}) and producing the label text accordingly. Default is
 #' \code{"Set 2"}
-#' @param graph_id_1_col Character for the colour of the first spectra set.
-#' Default is \code{"black"}.
-#' @param graph_id_2_col Character for the colour of the first spectra set.
-#' Default is \code{"red"}.
-#' @param xlab Character vector or mathematical expression
+#' @param graph_id_1_col Character string for the colour of the first spectra
+#' set. Default is \code{"black"}.
+#' @param graph_id_2_col Character string for the colour of the first spectra
+#' set. Default is \code{"red"}.
+#' @param xlab Character string or mathematical expression
 #' (use \code{expression}) for the x axis title. Default is
 #' \code{expression(paste("Wavenumber [", cm^-1, "]"))}.
-#' @param ylab Character vector or mathematical expression
+#' @param ylab Character string or mathematical expression
 #' (use \code{expression}) for the y axis title. Default is \code{"absorbance"}.
 #' @param slice Logical whether to slice the data sets (select rows by position).
 #' Default is \code{TRUE}.
--- a/R/pls-modeling.R
+++ b/R/pls-modeling.R
@ -621,7 +621,7 @@ evaluate_model_q <- function(x, model, response,
 #' reference as list-columns. The tibble to be supplied to \code{spec_chem} can
 #' be generated by the `join_chem_spc() function`
 #' @param response Response variable as symbol or name
-#' (without quotes, no character vector). The provided response symbol needs to be
+#' (without quotes, no character string). The provided response symbol needs to be
 #' a column name in the \code{spec_chem} tibble.
 #' @param variable Depreciated and replaced by `response`
 #' @param center Logical whether to perform mean centering of each spectrum column
@ -630,7 +630,7 @@ evaluate_model_q <- function(x, model, response,
 #' @param scale Logical whether to perform standard deviation scaling
 #' of each spectrum column (e.g. wavenumber or wavelength) after common
 #' spectrum preprocessing. Default is \code{scale = TRUE}
-#' @param evaluation_method Character vector stating evaluation method.
+#' @param evaluation_method Character string stating evaluation method.
 #' Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
 #' will split the data into a calibration (training) and validation (test) set,
 #' and evaluate the final model by predicting on the validation set.
@ -810,10 +810,10 @@ pls_ken_stone <- fit_pls
 #' reference as list-columns. The tibble to be supplied to \code{spec_chem} can
 #' be generated by the `join_chem_spc() function`
 #' @param response Response variable as symbol or name
-#' (without quotes, no character vector). The provided response symbol needs to be
+#' (without quotes, no character string). The provided response symbol needs to be
 #' a column name in the \code{spec_chem} tibble.
 #' @param variable Depreciated and replaced by `response`
-#' @param evaluation_method Character vector stating evaluation method.
+#' @param evaluation_method Character string stating evaluation method.
 #' Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
 #' will split the data into a calibration (training) and validation (test) set,
 #' and evaluate the final model by predicting on the validation set.
--- a/R/pls-vip.R
+++ b/R/pls-vip.R
@ -2,7 +2,7 @@
 ### `pls' package.
 ### $Id: VIP.R,v 1.2 2007/07/30 09:17:36 bhm Exp $

-### Copyright ? 2006,2007 Björn-Helge Mevik
+### Copyright: 2006,2007 Bjoern-Helge Mevik
 ### This program is free software; you can redistribute it and/or modify
 ### it under the terms of the GNU General Public License version 2 as
 ### published by the Free Software Foundation.
@ -16,9 +16,9 @@
 ### http://www.gnu.org/licenses/gpl-2.0.txt

 ### Contact info:
-### Bj?rn-Helge Mevik
+### Boejrn-Helge Mevik
 ### bhx6@mevik.net
-### R?dtvetvien 20
+### Roedtvetvien 20
 ### N-0955 Oslo
 ### Norway

@ -60,7 +60,18 @@ VIPjh <- function(object, j, h) {
    sqrt(nrow(W) * sum(SS * W[j,]^2 / Wnorm2) / sum(SS))
 }

-
+#' @title Extract VIPs (variable importance in the projection) for a PLS
+#' regression model output returned from model fitting with
+#' \code{simplerspec::fit_pls()}
+#' @description VIPs are extracted based on the \code{finalModel} sublist
+#' in the \code{caret::train} output contained in the \code{model} element
+#' of the \code{simplerspec::fit_pls()} model output list. The VIPs for
+#' derived number of PLS components in the \code{finalModel} are computed.
+#' @param mout Model output list returned from \code{simplerspec::fit_pls()}.
+#' @usage extract_pls_vip(mout)
+#' @return A tibble data frame with columns \code{wavenumber} and correponding
+#' VIP values in the column \code{vip} for the finally chosen PLS regression
+#' model at the final number of PLS components.
 #' @export
 extract_pls_vip <- function(mout) {
  # Compute VIP for all wavenumbers and select only VIPs with ncomp in final
@ -95,10 +106,43 @@ create_vip_rects <- function(df_vip) {
 }

 #' @title Plot stacked ggplot2 graphs with the Variable Importance for the
-#' Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id, and the preprocessed spectra.
+#' Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id,
+#'and the preprocessed spectra.
 #' @description Plot stacked ggplot2 graphs of VIP for the final
-#' PLS regression model of the calibration (training) data set for the final
-#' number of components, raw (replicate mean) spectra, and preprocessed spectra.
+#' PLS regression model output of the calibration (training) data set for the
+#' final number of components, raw (replicate mean) spectra, and preprocessed
+#' spectra. Regions with VIP > 1 are highlighted across the stacked graphs
+#' in beige colour rectangles. VIP calculation is implemented as described in
+#' Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection
+#' methods when multicollinearity is present. Chemometrics and Intelligent
+#' Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011
+#' @param mout Model output list that is returned from
+#' \code{simplerspec::fit_pls()}. This object contains a nested list with
+#' the \code{caret::train()} object (class \code{train}), based on which
+#' VIPs at finally selected number of PLS components are computed.
+#' @param y1 Character vector of list-column name in
+#' \code{mout$data$calibration}, where spectra for bottom graph are extracted.
+#' Default is \code{"spc_mean"}, which plots the mean calibration spectra after
+#' resampling.
+#' @param y2 Character string of list-column name in
+#' \code{mout$data$calibration}, where spectra for bottom graph are extracted.
+#' Default is \code{"spc_pre"}, which plots the preprocessed calibration
+#' spectra after resampling.
+#' @param by Character string that is used to assign spectra to the same group
+#' and therefore ensures that all spectra are plotted with the same colour.
+#' Default is \code{"sample_id"}
+#' @param xlab Character string of X axis title for shared x axis of stacked
+#' graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}
+#' @param ylab1 Y axis title of bottom spectrum. Default is \code{"Absorbance"}.
+#' @param ylab2 Y axis title of bottom spectrum. Default is
+#' \code{"Preprocessed Abs."}.
+#' @param alpha Double between 0 and 1 that defines transparency of spectra
+#' lines in returned graph (ggplot plot object).
+#' @usage plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre",
+#'   by = "sample_id",
+#'   xlab = expression(paste("Wavenumber [", cm^-1, "]")),
+#'   ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.",
+#'   alpha = 0.2)
 #' @export
 plot_pls_vip <- function(mout, y1 = "spc_mean", y2 = "spc_pre",
                         by = "sample_id",
--- a/man/average_spc.Rd
+++ b/man/average_spc.Rd
@ -6,6 +6,9 @@
 \usage{
 average_spc(spc_tbl)
 }
+\arguments{
+\item{spc_tbl}{Spectra after resampling spectra with \code{resample_spc()}}
+}
 \description{
 Averages spectra in tibble column by sample_id after
 resampling spectra by \code{simplerspec::resample_spc()}.
--- a/man/extract_pls_vip.Rd
+++ b/man/extract_pls_vip.Rd
@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pls-vip.R
+\name{extract_pls_vip}
+\alias{extract_pls_vip}
+\title{Extract VIPs (variable importance in the projection) for a PLS
+regression model output returned from model fitting with
+\code{simplerspec::fit_pls()}}
+\usage{
+extract_pls_vip(mout)
+}
+\arguments{
+\item{mout}{Model output list returned from \code{simplerspec::fit_pls()}.}
+}
+\value{
+A tibble data frame with columns \code{wavenumber} and correponding
+VIP values in the column \code{vip} for the finally chosen PLS regression
+model at the final number of PLS components.
+}
+\description{
+VIPs are extracted based on the \code{finalModel} sublist
+in the \code{caret::train} output contained in the \code{model} element
+of the \code{simplerspec::fit_pls()} model output list. The VIPs for
+derived number of PLS components in the \code{finalModel} are computed.
+}
--- a/man/fit_pls.Rd
+++ b/man/fit_pls.Rd
@ -25,7 +25,7 @@ reference as list-columns. The tibble to be supplied to \code{spec_chem} can
 be generated by the `join_chem_spc() function`}

 \item{response}{Response variable as symbol or name
-(without quotes, no character vector). The provided response symbol needs to be
+(without quotes, no character string). The provided response symbol needs to be
 a column name in the \code{spec_chem} tibble.}

 \item{variable}{Depreciated and replaced by `response`}
@ -38,7 +38,7 @@ a column name in the \code{spec_chem} tibble.}
 of each spectrum column (e.g. wavenumber or wavelength) after common
 spectrum preprocessing. Default is \code{scale = TRUE}}

-\item{evaluation_method}{Character vector stating evaluation method.
+\item{evaluation_method}{Character string stating evaluation method.
 Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
 will split the data into a calibration (training) and validation (test) set,
 and evaluate the final model by predicting on the validation set.
--- a/man/fit_rf.Rd
+++ b/man/fit_rf.Rd
@ -16,12 +16,12 @@ reference as list-columns. The tibble to be supplied to \code{spec_chem} can
 be generated by the `join_chem_spc() function`}

 \item{response}{Response variable as symbol or name
-(without quotes, no character vector). The provided response symbol needs to be
+(without quotes, no character string). The provided response symbol needs to be
 a column name in the \code{spec_chem} tibble.}

 \item{variable}{Depreciated and replaced by `response`}

-\item{evaluation_method}{Character vector stating evaluation method.
+\item{evaluation_method}{Character string stating evaluation method.
 Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
 will split the data into a calibration (training) and validation (test) set,
 and evaluate the final model by predicting on the validation set.
--- a/man/plot_pls_vip.Rd
+++ b/man/plot_pls_vip.Rd
@ -3,14 +3,53 @@
 \name{plot_pls_vip}
 \alias{plot_pls_vip}
 \title{Plot stacked ggplot2 graphs with the Variable Importance for the
-Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id, and the preprocessed spectra.}
+Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id,
+and the preprocessed spectra.}
 \usage{
-plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre", by = "sample_id",
+plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre",
+  by = "sample_id",
  xlab = expression(paste("Wavenumber [", cm^-1, "]")),
-  ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.", alpha = 0.2)
+  ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.",
+  alpha = 0.2)
+}
+\arguments{
+\item{mout}{Model output list that is returned from
+\code{simplerspec::fit_pls()}. This object contains a nested list with
+the \code{caret::train()} object (class \code{train}), based on which
+VIPs at finally selected number of PLS components are computed.}
+
+\item{y1}{Character vector of list-column name in
+\code{mout$data$calibration}, where spectra for bottom graph are extracted.
+Default is \code{"spc_mean"}, which plots the mean calibration spectra after
+resampling.}
+
+\item{y2}{Character string of list-column name in
+\code{mout$data$calibration}, where spectra for bottom graph are extracted.
+Default is \code{"spc_pre"}, which plots the preprocessed calibration
+spectra after resampling.}
+
+\item{by}{Character string that is used to assign spectra to the same group
+and therefore ensures that all spectra are plotted with the same colour.
+Default is \code{"sample_id"}}
+
+\item{xlab}{Character string of X axis title for shared x axis of stacked
+graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}}
+
+\item{ylab1}{Y axis title of bottom spectrum. Default is \code{"Absorbance"}.}
+
+\item{ylab2}{Y axis title of bottom spectrum. Default is
+\code{"Preprocessed Abs."}.}
+
+\item{alpha}{Double between 0 and 1 that defines transparency of spectra
+lines in returned graph (ggplot plot object).}
 }
 \description{
 Plot stacked ggplot2 graphs of VIP for the final
-PLS regression model of the calibration (training) data set for the final
-number of components, raw (replicate mean) spectra, and preprocessed spectra.
+PLS regression model output of the calibration (training) data set for the
+final number of components, raw (replicate mean) spectra, and preprocessed
+spectra. Regions with VIP > 1 are highlighted across the stacked graphs
+in beige colour rectangles. VIP calculation is implemented as described in
+Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection
+methods when multicollinearity is present. Chemometrics and Intelligent
+Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011
 }
--- a/man/plot_spc.Rd
+++ b/man/plot_spc.Rd
@ -20,41 +20,41 @@ list-column}
 \item{spc_tbl_2}{Tibble that contains the second set of spectra (optional)
 to plot as list-column.}

-\item{x_unit}{Character describing the x axis unit. Default is
+\item{x_unit}{Character string describing the x axis unit. Default is
 \code{"wavenumber"}, which will produce a graph with wavenumbers on the
 x axis with reversed number. If \code{x_unit = "wavelength"}, the axis
 will be in regular order (lower wavelengths in nm on the left and higher
 on the right side of the axis).}

-\item{y}{Character vector of list-column name in tibble where spectra of
+\item{y}{Character string of list-column name in tibble where spectra of
 desired type are extracted to plot.}

-\item{by}{Character vector of column that is used to group the spectra.
+\item{by}{Character string of column that is used to group the spectra.
 Default is \code{"unique_id"}. If replica spectra are present in the file
 and processed spectra resulting after averaging need to be plotted,
 it is recommend to use \code{"sample_id"} as argument to group according
 the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl}
 and \code{spc_tbl_2}).}

-\item{graph_id_1}{Character used for grouping the first spectra set
+\item{graph_id_1}{Character string used for grouping the first spectra set
 (\code{spc_tbl}) and producing
 the label text accordingly. Default is \code{"Set 1"}.}

-\item{graph_id_2}{Character used for grouping the second spectra set
+\item{graph_id_2}{Character string used for grouping the second spectra set
 (\code{spc_tbl_2}) and producing the label text accordingly. Default is
 \code{"Set 2"}}

-\item{graph_id_1_col}{Character for the colour of the first spectra set.
-Default is \code{"black"}.}
+\item{graph_id_1_col}{Character string for the colour of the first spectra
+set. Default is \code{"black"}.}

-\item{graph_id_2_col}{Character for the colour of the first spectra set.
-Default is \code{"red"}.}
+\item{graph_id_2_col}{Character string for the colour of the first spectra
+set. Default is \code{"red"}.}

-\item{xlab}{Character vector or mathematical expression
+\item{xlab}{Character string or mathematical expression
 (use \code{expression}) for the x axis title. Default is
 \code{expression(paste("Wavenumber [", cm^-1, "]"))}.}

-\item{ylab}{Character vector or mathematical expression
+\item{ylab}{Character string or mathematical expression
 (use \code{expression}) for the y axis title. Default is \code{"absorbance"}.}

 \item{slice}{Logical whether to slice the data sets (select rows by position).
--- a/man/select_ref_spc.Rd
+++ b/man/select_ref_spc.Rd
@ -17,10 +17,7 @@ of principal components kept corresponds to the number of components
 explaining at least (pc * 100) percent of the total variance.}

 \item{print}{logical expression whether a plot (ggplot2) of sample selection
-for reference analysis is shown in PCA space}
-
-\item{validation}{Logical expression whether
-calibration sampling is performed
+for reference analysis is shown in PCA space
 (\code{TRUE} or \code{FALSE}).}
 }
 \description{