simplerspec/man/fit_pls.Rd


								% Generated by roxygen2: do not edit by hand

								% Please edit documentation in R/pls-modeling.R

								\name{fit_pls}

								\alias{fit_pls}

								\alias{pls_ken_stone}

								\title{Calibration sampling, model tuning, and PLS regression}

								\usage{

								fit_pls(

								  spec_chem,

								  response,

								  variable = NULL,

								  center = TRUE,

								  scale = TRUE,

								  evaluation_method = "test_set",

								  validation = TRUE,

								  split_method = "ken_stone",

								  ratio_val = 1/3,

								  ken_sto_pc = 2,

								  pc,

								  invert = TRUE,

								  tuning_method = "resampling",

								  resampling_method = "kfold_cv",

								  cv = NULL,

								  resampling_seed = 123,

								  pls_ncomp_max = 20,

								  ncomp_fixed = 5,

								  print = TRUE,

								  env = parent.frame()

								)


								pls_ken_stone(

								  spec_chem,

								  response,

								  variable = NULL,

								  center = TRUE,

								  scale = TRUE,

								  evaluation_method = "test_set",

								  validation = TRUE,

								  split_method = "ken_stone",

								  ratio_val = 1/3,

								  ken_sto_pc = 2,

								  pc,

								  invert = TRUE,

								  tuning_method = "resampling",

								  resampling_method = "kfold_cv",

								  cv = NULL,

								  resampling_seed = 123,

								  pls_ncomp_max = 20,

								  ncomp_fixed = 5,

								  print = TRUE,

								  env = parent.frame()

								)

								}

								\arguments{

								\item{spec_chem}{Tibble that contains spectra, metadata and chemical

								reference as list-columns. The tibble to be supplied to \code{spec_chem} can

								be generated by the \verb{join_chem_spc() function}}


								\item{response}{Response variable as symbol or name

								(without quotes, no character string). The provided response symbol needs to be

								a column name in the \code{spec_chem} tibble.}


								\item{variable}{Depreciated and replaced by \code{response}}


								\item{center}{Logical whether to perform mean centering of each spectrum column

								(e.g. wavenumber or wavelength) after common spectrum preprocessing. Default is

								\code{center = TRUE}}


								\item{scale}{Logical whether to perform standard deviation scaling

								of each spectrum column (e.g. wavenumber or wavelength) after common

								spectrum preprocessing. Default is \code{scale = TRUE}}


								\item{evaluation_method}{Character string stating evaluation method.

								Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"}

								will split the data into a calibration (training) and validation (test) set,

								and evaluate the final model by predicting on the validation set.

								If \code{"resampling"}, the finally selected model will be evaluated based

								on the cross-validation hold-out predictions.}


								\item{validation}{Depreciated and replaced by \code{evaluation_method}.

								Default is \code{TRUE}.}


								\item{split_method}{Method how to to split the data into a independent test

								set. Default is \code{"ken_sto"}, which will select samples for calibration

								based on Kennard-Stone sampling algorithm of preprocessed spectra. The

								proportion of validation to the total number of samples can be specified

								in the argument \code{ratio_val}.

								\code{split_method = "random"} will create a single random split.}


								\item{ratio_val}{Ratio of validation (test) samples to

								total number of samples (calibration (training) and validation (test)).}


								\item{ken_sto_pc}{Number of component used

								for calculating mahalanobsis distance on PCA scores for computing

								Kennard-Stone algorithm.

								Default is \code{ken_sto_pc = 2}, which will use the first two PCA

								components.}


								\item{pc}{Depreciated; renamed argument is \code{ken_sto_pc}.}


								\item{invert}{Logical}


								\item{tuning_method}{Character specifying tuning method. Tuning method

								affects how caret selects a final tuning value set from a list of candidate

								values. Possible values are \code{"resampling"}, which will use a

								specified resampling method such as repeated k-fold cross-validation (see

								argument \code{resampling_method}) and the generated performance profile

								based on the hold-out predictions to decide on the final tuning values

								that lead to optimal model performance. The value \code{"none"} will force

								caret to compute a final model for a predefined canditate PLS tuning

								parameter number of PLS components. In this case, the value

								supplied by \code{ncomp_fixed}` is used to set model complexity at

								a fixed number of components.}


								\item{resampling_method}{Character specifying resampling method. Currently,

								\code{"kfold_cv"} (default, performs 10-fold cross-validation),

								\code{"rep_kfold_cv"} (performs 5-times repeated 10-fold cross-validation),

								\code{"loocv"} (performs leave-one-out cross-validation), and \code{"none"}

								(if \code{resampling_method = "none"}) are supported.}


								\item{cv}{Depreciated. Use \code{resampling_method} instead.}


								\item{resampling_seed}{Random seed (integer) that will be used for generating

								resampling indices, which will be supplied to \code{caret::trainControl}.

								This makes sure that modeling results are constant when re-fitting.

								Default is \code{resampling_seed = 123}.}


								\item{pls_ncomp_max}{Maximum number of PLS components that are evaluated

								by caret::train. Caret will aggregate a performance profile using resampling

								for an integer sequence from 1 to \code{pls_ncomp_max}}


								\item{ncomp_fixed}{Integer of fixed number of PLS components. Will only be

								used when \code{tuning_method = "none"} and  \code{resampling_method = "none"}

								are used.}


								\item{print}{Logical expression whether model evaluation graphs shall be

								printed}


								\item{env}{Environment where function is evaluated. Default is

								\code{parent.frame}.}

								}

								\description{

								Perform calibration sampling and use selected

								calibration set for model tuning

								}