Streamlining spectral data processing and modeling for spectroscopy applications
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

145 lines
5.2 KiB

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pls-modeling.R
\name{fit_pls}
\alias{fit_pls}
\alias{pls_ken_stone}
\title{Calibration sampling, model tuning, and PLS regression}
\usage{
fit_pls(
spec_chem,
response,
variable = NULL,
center = TRUE,
scale = TRUE,
evaluation_method = "test_set",
validation = TRUE,
split_method = "ken_stone",
ratio_val = 1/3,
ken_sto_pc = 2,
pc,
invert = TRUE,
tuning_method = "resampling",
resampling_method = "kfold_cv",
cv = NULL,
resampling_seed = 123,
pls_ncomp_max = 20,
ncomp_fixed = 5,
print = TRUE,
env = parent.frame()
)
pls_ken_stone(
spec_chem,
response,
variable = NULL,
center = TRUE,
scale = TRUE,
evaluation_method = "test_set",
validation = TRUE,
split_method = "ken_stone",
ratio_val = 1/3,
ken_sto_pc = 2,
pc,
invert = TRUE,
tuning_method = "resampling",
resampling_method = "kfold_cv",
cv = NULL,
resampling_seed = 123,
pls_ncomp_max = 20,
ncomp_fixed = 5,
print = TRUE,
env = parent.frame()
)
}
\arguments{
\item{spec_chem}{Tibble that contains spectra, metadata and chemical
reference as list-columns. The tibble to be supplied to \code{spec_chem} can
be generated by the \verb{join_chem_spc() function}}
\item{response}{Response variable as symbol or name
(without quotes, no character string). The provided response symbol needs to be
a column name in the \code{spec_chem} tibble.}
\item{variable}{Depreciated and replaced by \code{response}}
\item{center}{Logical whether to perform mean centering of each spectrum column
(e.g. wavenumber or wavelength) after common spectrum preprocessing. Default is
\code{center = TRUE}}
\item{scale}{Logical whether to perform standard deviation scaling
of each spectrum column (e.g. wavenumber or wavelength) after common
spectrum preprocessing. Default is \code{scale = TRUE}}
\item{evaluation_method}{Character string stating evaluation method.
Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}
will split the data into a calibration (training) and validation (test) set,
and evaluate the final model by predicting on the validation set.
If \code{"resampling"}, the finally selected model will be evaluated based
on the cross-validation hold-out predictions.}
\item{validation}{Depreciated and replaced by \code{evaluation_method}.
Default is \code{TRUE}.}
\item{split_method}{Method how to to split the data into a independent test
set. Default is \code{"ken_sto"}, which will select samples for calibration
based on Kennard-Stone sampling algorithm of preprocessed spectra. The
proportion of validation to the total number of samples can be specified
in the argument \code{ratio_val}.
\code{split_method = "random"} will create a single random split.}
\item{ratio_val}{Ratio of validation (test) samples to
total number of samples (calibration (training) and validation (test)).}
\item{ken_sto_pc}{Number of component used
for calculating mahalanobsis distance on PCA scores for computing
Kennard-Stone algorithm.
Default is \code{ken_sto_pc = 2}, which will use the first two PCA
components.}
\item{pc}{Depreciated; renamed argument is \code{ken_sto_pc}.}
\item{invert}{Logical}
\item{tuning_method}{Character specifying tuning method. Tuning method
affects how caret selects a final tuning value set from a list of candidate
values. Possible values are \code{"resampling"}, which will use a
specified resampling method such as repeated k-fold cross-validation (see
argument \code{resampling_method}) and the generated performance profile
based on the hold-out predictions to decide on the final tuning values
that lead to optimal model performance. The value \code{"none"} will force
caret to compute a final model for a predefined canditate PLS tuning
parameter number of PLS components. In this case, the value
supplied by \code{ncomp_fixed}` is used to set model complexity at
a fixed number of components.}
\item{resampling_method}{Character specifying resampling method. Currently,
\code{"kfold_cv"} (default, performs 10-fold cross-validation),
\code{"rep_kfold_cv"} (performs 5-times repeated 10-fold cross-validation),
\code{"loocv"} (performs leave-one-out cross-validation), and \code{"none"}
(if \code{resampling_method = "none"}) are supported.}
\item{cv}{Depreciated. Use \code{resampling_method} instead.}
\item{resampling_seed}{Random seed (integer) that will be used for generating
resampling indices, which will be supplied to \code{caret::trainControl}.
This makes sure that modeling results are constant when re-fitting.
Default is \code{resampling_seed = 123}.}
\item{pls_ncomp_max}{Maximum number of PLS components that are evaluated
by caret::train. Caret will aggregate a performance profile using resampling
for an integer sequence from 1 to \code{pls_ncomp_max}}
\item{ncomp_fixed}{Integer of fixed number of PLS components. Will only be
used when \code{tuning_method = "none"} and \code{resampling_method = "none"}
are used.}
\item{print}{Logical expression whether model evaluation graphs shall be
printed}
\item{env}{Environment where function is evaluated. Default is
\code{parent.frame}.}
}
\description{
Perform calibration sampling and use selected
calibration set for model tuning
}