Browse Source

Update documentation, first final draft

pull/3/head
Philipp Baumann 7 years ago
parent
commit
15a2d8b197
  1. 22
      R/plot_spc.R
  2. 8
      R/pls-modeling.R
  3. 58
      R/pls-vip.R
  4. 3
      man/average_spc.Rd
  5. 24
      man/extract_pls_vip.Rd
  6. 4
      man/fit_pls.Rd
  7. 4
      man/fit_rf.Rd
  8. 49
      man/plot_pls_vip.Rd
  9. 22
      man/plot_spc.Rd
  10. 5
      man/select_ref_spc.Rd

22
R/plot_spc.R

@ -4,33 +4,33 @@
#' list-column
#' @param spc_tbl_2 Tibble that contains the second set of spectra (optional)
#' to plot as list-column.
#' @param x_unit Character describing the x axis unit. Default is
#' @param x_unit Character string describing the x axis unit. Default is
#' \code{"wavenumber"}, which will produce a graph with wavenumbers on the
#' x axis with reversed number. If \code{x_unit = "wavelength"}, the axis
#' will be in regular order (lower wavelengths in nm on the left and higher
#' on the right side of the axis).
#' @param y Character vector of list-column name in tibble where spectra of
#' @param y Character string of list-column name in tibble where spectra of
#' desired type are extracted to plot.
#' @param by Character vector of column that is used to group the spectra.
#' @param by Character string of column that is used to group the spectra.
#' Default is \code{"unique_id"}. If replica spectra are present in the file
#' and processed spectra resulting after averaging need to be plotted,
#' it is recommend to use \code{"sample_id"} as argument to group according
#' the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl}
#' and \code{spc_tbl_2}).
#' @param graph_id_1 Character used for grouping the first spectra set
#' @param graph_id_1 Character string used for grouping the first spectra set
#' (\code{spc_tbl}) and producing
#' the label text accordingly. Default is \code{"Set 1"}.
#' @param graph_id_2 Character used for grouping the second spectra set
#' @param graph_id_2 Character string used for grouping the second spectra set
#' (\code{spc_tbl_2}) and producing the label text accordingly. Default is
#' \code{"Set 2"}
#' @param graph_id_1_col Character for the colour of the first spectra set.
#' Default is \code{"black"}.
#' @param graph_id_2_col Character for the colour of the first spectra set.
#' Default is \code{"red"}.
#' @param xlab Character vector or mathematical expression
#' @param graph_id_1_col Character string for the colour of the first spectra
#' set. Default is \code{"black"}.
#' @param graph_id_2_col Character string for the colour of the first spectra
#' set. Default is \code{"red"}.
#' @param xlab Character string or mathematical expression
#' (use \code{expression}) for the x axis title. Default is
#' \code{expression(paste("Wavenumber [", cm^-1, "]"))}.
#' @param ylab Character vector or mathematical expression
#' @param ylab Character string or mathematical expression
#' (use \code{expression}) for the y axis title. Default is \code{"absorbance"}.
#' @param slice Logical whether to slice the data sets (select rows by position).
#' Default is \code{TRUE}.

8
R/pls-modeling.R

@ -621,7 +621,7 @@ evaluate_model_q <- function(x, model, response,
#' reference as list-columns. The tibble to be supplied to \code{spec_chem} can
#' be generated by the `join_chem_spc() function`
#' @param response Response variable as symbol or name
#' (without quotes, no character vector). The provided response symbol needs to be
#' (without quotes, no character string). The provided response symbol needs to be
#' a column name in the \code{spec_chem} tibble.
#' @param variable Depreciated and replaced by `response`
#' @param center Logical whether to perform mean centering of each spectrum column
@ -630,7 +630,7 @@ evaluate_model_q <- function(x, model, response,
#' @param scale Logical whether to perform standard deviation scaling
#' of each spectrum column (e.g. wavenumber or wavelength) after common
#' spectrum preprocessing. Default is \code{scale = TRUE}
#' @param evaluation_method Character vector stating evaluation method.
#' @param evaluation_method Character string stating evaluation method.
#' Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
#' will split the data into a calibration (training) and validation (test) set,
#' and evaluate the final model by predicting on the validation set.
@ -810,10 +810,10 @@ pls_ken_stone <- fit_pls
#' reference as list-columns. The tibble to be supplied to \code{spec_chem} can
#' be generated by the `join_chem_spc() function`
#' @param response Response variable as symbol or name
#' (without quotes, no character vector). The provided response symbol needs to be
#' (without quotes, no character string). The provided response symbol needs to be
#' a column name in the \code{spec_chem} tibble.
#' @param variable Depreciated and replaced by `response`
#' @param evaluation_method Character vector stating evaluation method.
#' @param evaluation_method Character string stating evaluation method.
#' Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
#' will split the data into a calibration (training) and validation (test) set,
#' and evaluate the final model by predicting on the validation set.

58
R/pls-vip.R

@ -2,7 +2,7 @@
### `pls' package.
### $Id: VIP.R,v 1.2 2007/07/30 09:17:36 bhm Exp $
### Copyright ? 2006,2007 Björn-Helge Mevik
### Copyright: 2006,2007 Bjoern-Helge Mevik
### This program is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License version 2 as
### published by the Free Software Foundation.
@ -16,9 +16,9 @@
### http://www.gnu.org/licenses/gpl-2.0.txt
### Contact info:
### Bj?rn-Helge Mevik
### Boejrn-Helge Mevik
### bhx6@mevik.net
### R?dtvetvien 20
### Roedtvetvien 20
### N-0955 Oslo
### Norway
@ -60,7 +60,18 @@ VIPjh <- function(object, j, h) {
sqrt(nrow(W) * sum(SS * W[j,]^2 / Wnorm2) / sum(SS))
}
#' @title Extract VIPs (variable importance in the projection) for a PLS
#' regression model output returned from model fitting with
#' \code{simplerspec::fit_pls()}
#' @description VIPs are extracted based on the \code{finalModel} sublist
#' in the \code{caret::train} output contained in the \code{model} element
#' of the \code{simplerspec::fit_pls()} model output list. The VIPs for
#' derived number of PLS components in the \code{finalModel} are computed.
#' @param mout Model output list returned from \code{simplerspec::fit_pls()}.
#' @usage extract_pls_vip(mout)
#' @return A tibble data frame with columns \code{wavenumber} and correponding
#' VIP values in the column \code{vip} for the finally chosen PLS regression
#' model at the final number of PLS components.
#' @export
extract_pls_vip <- function(mout) {
# Compute VIP for all wavenumbers and select only VIPs with ncomp in final
@ -95,10 +106,43 @@ create_vip_rects <- function(df_vip) {
}
#' @title Plot stacked ggplot2 graphs with the Variable Importance for the
#' Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id, and the preprocessed spectra.
#' Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id,
#'and the preprocessed spectra.
#' @description Plot stacked ggplot2 graphs of VIP for the final
#' PLS regression model of the calibration (training) data set for the final
#' number of components, raw (replicate mean) spectra, and preprocessed spectra.
#' PLS regression model output of the calibration (training) data set for the
#' final number of components, raw (replicate mean) spectra, and preprocessed
#' spectra. Regions with VIP > 1 are highlighted across the stacked graphs
#' in beige colour rectangles. VIP calculation is implemented as described in
#' Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection
#' methods when multicollinearity is present. Chemometrics and Intelligent
#' Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011
#' @param mout Model output list that is returned from
#' \code{simplerspec::fit_pls()}. This object contains a nested list with
#' the \code{caret::train()} object (class \code{train}), based on which
#' VIPs at finally selected number of PLS components are computed.
#' @param y1 Character vector of list-column name in
#' \code{mout$data$calibration}, where spectra for bottom graph are extracted.
#' Default is \code{"spc_mean"}, which plots the mean calibration spectra after
#' resampling.
#' @param y2 Character string of list-column name in
#' \code{mout$data$calibration}, where spectra for bottom graph are extracted.
#' Default is \code{"spc_pre"}, which plots the preprocessed calibration
#' spectra after resampling.
#' @param by Character string that is used to assign spectra to the same group
#' and therefore ensures that all spectra are plotted with the same colour.
#' Default is \code{"sample_id"}
#' @param xlab Character string of X axis title for shared x axis of stacked
#' graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}
#' @param ylab1 Y axis title of bottom spectrum. Default is \code{"Absorbance"}.
#' @param ylab2 Y axis title of bottom spectrum. Default is
#' \code{"Preprocessed Abs."}.
#' @param alpha Double between 0 and 1 that defines transparency of spectra
#' lines in returned graph (ggplot plot object).
#' @usage plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre",
#' by = "sample_id",
#' xlab = expression(paste("Wavenumber [", cm^-1, "]")),
#' ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.",
#' alpha = 0.2)
#' @export
plot_pls_vip <- function(mout, y1 = "spc_mean", y2 = "spc_pre",
by = "sample_id",

3
man/average_spc.Rd

@ -6,6 +6,9 @@
\usage{
average_spc(spc_tbl)
}
\arguments{
\item{spc_tbl}{Spectra after resampling spectra with \code{resample_spc()}}
}
\description{
Averages spectra in tibble column by sample_id after
resampling spectra by \code{simplerspec::resample_spc()}.

24
man/extract_pls_vip.Rd

@ -0,0 +1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pls-vip.R
\name{extract_pls_vip}
\alias{extract_pls_vip}
\title{Extract VIPs (variable importance in the projection) for a PLS
regression model output returned from model fitting with
\code{simplerspec::fit_pls()}}
\usage{
extract_pls_vip(mout)
}
\arguments{
\item{mout}{Model output list returned from \code{simplerspec::fit_pls()}.}
}
\value{
A tibble data frame with columns \code{wavenumber} and correponding
VIP values in the column \code{vip} for the finally chosen PLS regression
model at the final number of PLS components.
}
\description{
VIPs are extracted based on the \code{finalModel} sublist
in the \code{caret::train} output contained in the \code{model} element
of the \code{simplerspec::fit_pls()} model output list. The VIPs for
derived number of PLS components in the \code{finalModel} are computed.
}

4
man/fit_pls.Rd

@ -25,7 +25,7 @@ reference as list-columns. The tibble to be supplied to \code{spec_chem} can
be generated by the `join_chem_spc() function`}
\item{response}{Response variable as symbol or name
(without quotes, no character vector). The provided response symbol needs to be
(without quotes, no character string). The provided response symbol needs to be
a column name in the \code{spec_chem} tibble.}
\item{variable}{Depreciated and replaced by `response`}
@ -38,7 +38,7 @@ a column name in the \code{spec_chem} tibble.}
of each spectrum column (e.g. wavenumber or wavelength) after common
spectrum preprocessing. Default is \code{scale = TRUE}}
\item{evaluation_method}{Character vector stating evaluation method.
\item{evaluation_method}{Character string stating evaluation method.
Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
will split the data into a calibration (training) and validation (test) set,
and evaluate the final model by predicting on the validation set.

4
man/fit_rf.Rd

@ -16,12 +16,12 @@ reference as list-columns. The tibble to be supplied to \code{spec_chem} can
be generated by the `join_chem_spc() function`}
\item{response}{Response variable as symbol or name
(without quotes, no character vector). The provided response symbol needs to be
(without quotes, no character string). The provided response symbol needs to be
a column name in the \code{spec_chem} tibble.}
\item{variable}{Depreciated and replaced by `response`}
\item{evaluation_method}{Character vector stating evaluation method.
\item{evaluation_method}{Character string stating evaluation method.
Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
will split the data into a calibration (training) and validation (test) set,
and evaluate the final model by predicting on the validation set.

49
man/plot_pls_vip.Rd

@ -3,14 +3,53 @@
\name{plot_pls_vip}
\alias{plot_pls_vip}
\title{Plot stacked ggplot2 graphs with the Variable Importance for the
Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id, and the preprocessed spectra.}
Projection (VIP) scores, mean replicate spectra (absorbance) per sample_id,
and the preprocessed spectra.}
\usage{
plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre", by = "sample_id",
plot_pls_vip(mout, y1 = "spc_mean", y2 = "spc_pre",
by = "sample_id",
xlab = expression(paste("Wavenumber [", cm^-1, "]")),
ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.", alpha = 0.2)
ylab1 = "Absorbance", ylab2 = "Preprocessed Abs.",
alpha = 0.2)
}
\arguments{
\item{mout}{Model output list that is returned from
\code{simplerspec::fit_pls()}. This object contains a nested list with
the \code{caret::train()} object (class \code{train}), based on which
VIPs at finally selected number of PLS components are computed.}
\item{y1}{Character vector of list-column name in
\code{mout$data$calibration}, where spectra for bottom graph are extracted.
Default is \code{"spc_mean"}, which plots the mean calibration spectra after
resampling.}
\item{y2}{Character string of list-column name in
\code{mout$data$calibration}, where spectra for bottom graph are extracted.
Default is \code{"spc_pre"}, which plots the preprocessed calibration
spectra after resampling.}
\item{by}{Character string that is used to assign spectra to the same group
and therefore ensures that all spectra are plotted with the same colour.
Default is \code{"sample_id"}}
\item{xlab}{Character string of X axis title for shared x axis of stacked
graphs. Default is \code{expression(paste("Wavenumber [", cm^-1, "]"))}}
\item{ylab1}{Y axis title of bottom spectrum. Default is \code{"Absorbance"}.}
\item{ylab2}{Y axis title of bottom spectrum. Default is
\code{"Preprocessed Abs."}.}
\item{alpha}{Double between 0 and 1 that defines transparency of spectra
lines in returned graph (ggplot plot object).}
}
\description{
Plot stacked ggplot2 graphs of VIP for the final
PLS regression model of the calibration (training) data set for the final
number of components, raw (replicate mean) spectra, and preprocessed spectra.
PLS regression model output of the calibration (training) data set for the
final number of components, raw (replicate mean) spectra, and preprocessed
spectra. Regions with VIP > 1 are highlighted across the stacked graphs
in beige colour rectangles. VIP calculation is implemented as described in
Chong, I.-G., and Jun, C.-H. (2005). Performance of some variable selection
methods when multicollinearity is present. Chemometrics and Intelligent
Laboratory Systems, 78(1--2), 103--112. https://doi.org/10.1016/j.chemolab.2004.12.011
}

22
man/plot_spc.Rd

@ -20,41 +20,41 @@ list-column}
\item{spc_tbl_2}{Tibble that contains the second set of spectra (optional)
to plot as list-column.}
\item{x_unit}{Character describing the x axis unit. Default is
\item{x_unit}{Character string describing the x axis unit. Default is
\code{"wavenumber"}, which will produce a graph with wavenumbers on the
x axis with reversed number. If \code{x_unit = "wavelength"}, the axis
will be in regular order (lower wavelengths in nm on the left and higher
on the right side of the axis).}
\item{y}{Character vector of list-column name in tibble where spectra of
\item{y}{Character string of list-column name in tibble where spectra of
desired type are extracted to plot.}
\item{by}{Character vector of column that is used to group the spectra.
\item{by}{Character string of column that is used to group the spectra.
Default is \code{"unique_id"}. If replica spectra are present in the file
and processed spectra resulting after averaging need to be plotted,
it is recommend to use \code{"sample_id"} as argument to group according
the sample_id column in the tibble(s) containing the spectra (\code{spc_tbl}
and \code{spc_tbl_2}).}
\item{graph_id_1}{Character used for grouping the first spectra set
\item{graph_id_1}{Character string used for grouping the first spectra set
(\code{spc_tbl}) and producing
the label text accordingly. Default is \code{"Set 1"}.}
\item{graph_id_2}{Character used for grouping the second spectra set
\item{graph_id_2}{Character string used for grouping the second spectra set
(\code{spc_tbl_2}) and producing the label text accordingly. Default is
\code{"Set 2"}}
\item{graph_id_1_col}{Character for the colour of the first spectra set.
Default is \code{"black"}.}
\item{graph_id_1_col}{Character string for the colour of the first spectra
set. Default is \code{"black"}.}
\item{graph_id_2_col}{Character for the colour of the first spectra set.
Default is \code{"red"}.}
\item{graph_id_2_col}{Character string for the colour of the first spectra
set. Default is \code{"red"}.}
\item{xlab}{Character vector or mathematical expression
\item{xlab}{Character string or mathematical expression
(use \code{expression}) for the x axis title. Default is
\code{expression(paste("Wavenumber [", cm^-1, "]"))}.}
\item{ylab}{Character vector or mathematical expression
\item{ylab}{Character string or mathematical expression
(use \code{expression}) for the y axis title. Default is \code{"absorbance"}.}
\item{slice}{Logical whether to slice the data sets (select rows by position).

5
man/select_ref_spc.Rd

@ -17,10 +17,7 @@ of principal components kept corresponds to the number of components
explaining at least (pc * 100) percent of the total variance.}
\item{print}{logical expression whether a plot (ggplot2) of sample selection
for reference analysis is shown in PCA space}
\item{validation}{Logical expression whether
calibration sampling is performed
for reference analysis is shown in PCA space
(\code{TRUE} or \code{FALSE}).}
}
\description{

Loading…
Cancel
Save