diff --git a/DESCRIPTION b/DESCRIPTION index 676d09d..0c83260 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,6 +38,6 @@ Imports: broom License: GPL-2 LazyData: TRUE -RoxygenNote: 7.1.0 +RoxygenNote: 7.2.3 Roxygen: list(markdown = TRUE) Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index 5f23f29..68c2174 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,4 +51,5 @@ importFrom(purrr,modify_depth) importFrom(rlang,ensym) importFrom(rlang,quo_name) importFrom(rlang,set_names) +importFrom(stringr,str_replace) importFrom(tidyselect,one_of) diff --git a/R/plot_spc.R b/R/plot_spc.R index ea93b46..83e7678 100644 --- a/R/plot_spc.R +++ b/R/plot_spc.R @@ -32,10 +32,6 @@ #' \code{expression(paste("Wavenumber [", cm^-1, "]"))}. #' @param ylab Character string or mathematical expression #' (use \code{expression}) for the y axis title. Default is \code{"absorbance"}. -#' @param slice Logical whether to slice the data sets (select rows by position). -#' Default is \code{TRUE}. -#' This argument will soon be supported to group spectra based id variables present -#' in spc_tbl such as the `sample_id` and slicing the data set prior to plotting. #' @param alpha Double in between 0 and 1. Sets the transparency for the plotted #' spectra lines. #' @param legend Logical whether to plot a legend for the spectra describing @@ -56,17 +52,12 @@ plot_spc <- function(spc_tbl, spc_tbl_2 = NULL, graph_id_1 = "Set 1", graph_id_2 = "Set 2", graph_id_1_col = "black", graph_id_2_col = "red", xlab = expression(paste("Wavenumber [", cm^-1, "]")), - ylab = "Absorbance", slice = TRUE, alpha = 0.2, + ylab = "Absorbance", alpha = 0.2, legend = TRUE) { # Fix `R CMD check NOTE`: "no visible binding for global variable ‘...‘" graph_id <- id <- variable <- value <- NULL - # (0) Slice spectra tibble to remove triplicate spectra (reps) - # only sample_id level - if(slice == TRUE) { - spc_tbl <- dplyr::slice(spc_tbl) - } # (1) Gather spectra into one data.table if(!is.null(spc_tbl_2)) { if(y == "spc") { diff --git a/R/select-spc.R b/R/select-spc.R index 9f3b587..8680bdf 100644 --- a/R/select-spc.R +++ b/R/select-spc.R @@ -7,7 +7,7 @@ #' @param lcol_xvalues List-column containing x-values, specified with #' column name as symbols or 1L character vector. #' @param every Every n-th spectral positions to keep as 1L integer vector. -#' @return +#' @return a spectral tibble #' @export select_spc_vars <- function(spc_tbl, lcol_spc = "spc_pre", diff --git a/man/assess_multimodels.Rd b/man/assess_multimodels.Rd index b61b50a..914a4e0 100644 --- a/man/assess_multimodels.Rd +++ b/man/assess_multimodels.Rd @@ -4,15 +4,19 @@ \alias{assess_multimodels} \title{Assess multiple pairs of measured and predicted values} \usage{ -assess_multimodels(data, ..., .metrics = c("simplerspec", "yardstick"), - .model_name = "model") +assess_multimodels( + data, + ..., + .metrics = c("simplerspec", "yardstick"), + .model_name = "model" +) } \arguments{ \item{data}{Data frame with all measured (observed) and predicted variables.} \item{...}{Multiple arguments with observed (measured)-predicted pairs, specified with \code{dplyr::vars(o = , p = )}. -Column names can strings or symbols. The arguments in `...` need to be named.} +Column names can strings or symbols. The arguments in \code{...} need to be named.} \item{.metrics}{Character vector with package used for metrics calculation. Default is \code{"simplerspec"}, which uses diff --git a/man/fit_pls.Rd b/man/fit_pls.Rd index 43222c1..2821663 100644 --- a/man/fit_pls.Rd +++ b/man/fit_pls.Rd @@ -5,32 +5,62 @@ \alias{pls_ken_stone} \title{Calibration sampling, model tuning, and PLS regression} \usage{ -fit_pls(spec_chem, response, variable = NULL, center = TRUE, scale = TRUE, - evaluation_method = "test_set", validation = TRUE, - split_method = "ken_stone", ratio_val = 1/3, ken_sto_pc = 2, pc, - invert = TRUE, tuning_method = "resampling", - resampling_method = "kfold_cv", cv = NULL, resampling_seed = 123, - pls_ncomp_max = 20, ncomp_fixed = 5, print = TRUE, - env = parent.frame()) - -pls_ken_stone(spec_chem, response, variable = NULL, center = TRUE, - scale = TRUE, evaluation_method = "test_set", validation = TRUE, - split_method = "ken_stone", ratio_val = 1/3, ken_sto_pc = 2, pc, - invert = TRUE, tuning_method = "resampling", - resampling_method = "kfold_cv", cv = NULL, resampling_seed = 123, - pls_ncomp_max = 20, ncomp_fixed = 5, print = TRUE, - env = parent.frame()) +fit_pls( + spec_chem, + response, + variable = NULL, + center = TRUE, + scale = TRUE, + evaluation_method = "test_set", + validation = TRUE, + split_method = "ken_stone", + ratio_val = 1/3, + ken_sto_pc = 2, + pc, + invert = TRUE, + tuning_method = "resampling", + resampling_method = "kfold_cv", + cv = NULL, + resampling_seed = 123, + pls_ncomp_max = 20, + ncomp_fixed = 5, + print = TRUE, + env = parent.frame() +) + +pls_ken_stone( + spec_chem, + response, + variable = NULL, + center = TRUE, + scale = TRUE, + evaluation_method = "test_set", + validation = TRUE, + split_method = "ken_stone", + ratio_val = 1/3, + ken_sto_pc = 2, + pc, + invert = TRUE, + tuning_method = "resampling", + resampling_method = "kfold_cv", + cv = NULL, + resampling_seed = 123, + pls_ncomp_max = 20, + ncomp_fixed = 5, + print = TRUE, + env = parent.frame() +) } \arguments{ \item{spec_chem}{Tibble that contains spectra, metadata and chemical reference as list-columns. The tibble to be supplied to \code{spec_chem} can -be generated by the `join_chem_spc() function`} +be generated by the \verb{join_chem_spc() function}} \item{response}{Response variable as symbol or name (without quotes, no character string). The provided response symbol needs to be a column name in the \code{spec_chem} tibble.} -\item{variable}{Depreciated and replaced by `response`} +\item{variable}{Depreciated and replaced by \code{response}} \item{center}{Logical whether to perform mean centering of each spectrum column (e.g. wavenumber or wavelength) after common spectrum preprocessing. Default is @@ -66,7 +96,7 @@ Kennard-Stone algorithm. Default is \code{ken_sto_pc = 2}, which will use the first two PCA components.} -\item{pc}{Depreciated; renamed argument is `ken_sto_pc`.} +\item{pc}{Depreciated; renamed argument is \code{ken_sto_pc}.} \item{invert}{Logical} diff --git a/man/fit_rf.Rd b/man/fit_rf.Rd index 193cee3..ce0b143 100644 --- a/man/fit_rf.Rd +++ b/man/fit_rf.Rd @@ -4,22 +4,35 @@ \alias{fit_rf} \title{Calibration sampling, and random forest model tuning and evaluation} \usage{ -fit_rf(spec_chem, response, variable = NULL, evaluation_method = "test_set", - validation = NULL, split_method = "ken_stone", ratio_val, - ken_sto_pc = 2, pc = NULL, invert = TRUE, - tuning_method = "resampling", resampling_seed = 123, cv = NULL, - ntree_max = 500, print = TRUE, env = parent.frame()) +fit_rf( + spec_chem, + response, + variable = NULL, + evaluation_method = "test_set", + validation = NULL, + split_method = "ken_stone", + ratio_val, + ken_sto_pc = 2, + pc = NULL, + invert = TRUE, + tuning_method = "resampling", + resampling_seed = 123, + cv = NULL, + ntree_max = 500, + print = TRUE, + env = parent.frame() +) } \arguments{ \item{spec_chem}{Tibble that contains spectra, metadata and chemical reference as list-columns. The tibble to be supplied to \code{spec_chem} can -be generated by the `join_chem_spc() function`} +be generated by the \verb{join_chem_spc() function}} \item{response}{Response variable as symbol or name (without quotes, no character string). The provided response symbol needs to be a column name in the \code{spec_chem} tibble.} -\item{variable}{Depreciated and replaced by `response`} +\item{variable}{Depreciated and replaced by \code{response}} \item{evaluation_method}{Character string stating evaluation method. Either \code{"test_set"} (default) or \code{"resampling"}. \code{"test_set"} @@ -47,7 +60,7 @@ Kennard-Stone algorithm. Default is \code{ken_sto_pc = 2}, which will use the first two PCA components.} -\item{pc}{Depreciated; renamed argument is `ken_sto_pc`.} +\item{pc}{Depreciated; renamed argument is \code{ken_sto_pc}.} \item{invert}{Logical} diff --git a/man/merge_dts.Rd b/man/merge_dts.Rd index f0df224..ec47bfa 100644 --- a/man/merge_dts.Rd +++ b/man/merge_dts.Rd @@ -5,8 +5,13 @@ \title{Merge list-columns of spectra, x-axis values, metadata and additional measured variables into a single long form data.table} \usage{ -merge_dts(spc_tbl, lcols_spc = c("spc", "spc_pre"), lcol_measure = NULL, - spc_id = "unique_id", group_id = "sample_id") +merge_dts( + spc_tbl, + lcols_spc = c("spc", "spc_pre"), + lcol_measure = NULL, + spc_id = "unique_id", + group_id = "sample_id" +) } \arguments{ \item{spc_tbl}{Tibble data frame containing spectra, x-axis values, metadata diff --git a/man/merge_dts_l.Rd b/man/merge_dts_l.Rd index b84b335..ac2ce3d 100644 --- a/man/merge_dts_l.Rd +++ b/man/merge_dts_l.Rd @@ -5,8 +5,13 @@ \title{Wrapper function around \code{merge_dts()} for list of tibbles to aggregate data for plotting.} \usage{ -merge_dts_l(spc_tbl_l, lcols_spc = c("spc", "spc_pre"), lcol_measure = NULL, - spc_id = "unique_id", group_id = "sample_id") +merge_dts_l( + spc_tbl_l, + lcols_spc = c("spc", "spc_pre"), + lcol_measure = NULL, + spc_id = "unique_id", + group_id = "sample_id" +) } \arguments{ \item{spc_tbl_l}{List of spectral tibbles (data frames).} diff --git a/man/plot_spc.Rd b/man/plot_spc.Rd index ac4289b..cde0a66 100644 --- a/man/plot_spc.Rd +++ b/man/plot_spc.Rd @@ -57,11 +57,6 @@ set. Default is \code{"red"}.} \item{ylab}{Character string or mathematical expression (use \code{expression}) for the y axis title. Default is \code{"absorbance"}.} -\item{slice}{Logical whether to slice the data sets (select rows by position). -Default is \code{TRUE}. -This argument will soon be supported to group spectra based id variables present -in spc_tbl such as the `sample_id` and slicing the data set prior to plotting.} - \item{alpha}{Double in between 0 and 1. Sets the transparency for the plotted spectra lines.} diff --git a/man/plot_spc_ext.Rd b/man/plot_spc_ext.Rd index bcc3ebb..be063f6 100644 --- a/man/plot_spc_ext.Rd +++ b/man/plot_spc_ext.Rd @@ -4,13 +4,27 @@ \alias{plot_spc_ext} \title{ggplot2 wrapper for extended spectra plotting} \usage{ -plot_spc_ext(spc_tbl, spc_tbl_l = NULL, lcols_spc = "spc", - lcol_measure = NULL, lcol_measure_col_palette = "Spectral", - lcol_measure_col_direction = -1, spc_id = "unique_id", - group_id = "sample_id", group_id_order = TRUE, group_color = TRUE, - group_color_palette = NULL, group_panel = TRUE, group_legend = FALSE, - ncol = NULL, relabel_spc = TRUE, ylab = "Spectrum value", alpha = 0.5, - line_width = 0.2, ...) +plot_spc_ext( + spc_tbl, + spc_tbl_l = NULL, + lcols_spc = "spc", + lcol_measure = NULL, + lcol_measure_col_palette = "Spectral", + lcol_measure_col_direction = -1, + spc_id = "unique_id", + group_id = "sample_id", + group_id_order = TRUE, + group_color = TRUE, + group_color_palette = NULL, + group_panel = TRUE, + group_legend = FALSE, + ncol = NULL, + relabel_spc = TRUE, + ylab = "Spectrum value", + alpha = 0.5, + line_width = 0.2, + ... +) } \arguments{ \item{spc_tbl}{Tibble data frame containing spectra, x-axis values, metadata @@ -27,14 +41,14 @@ of the measure columns. This argument is optional. Default is \code{NULL}, which does not extract an additional measure column.} \item{lcol_measure_col_palette}{Palette value supplied to -`ggplot::scale_colour_brewer()`. Default is `"Spectral"`, but you can set -it to the default argument `1` (will use -`scale_colour_brewer(..., palette = 1)`).} +\code{ggplot::scale_colour_brewer()}. Default is \code{"Spectral"}, but you can set +it to the default argument \code{1} (will use +\code{scale_colour_brewer(..., palette = 1)}).} \item{lcol_measure_col_direction}{Sets the the order of colours in the scale that is based on a measure column. Default is \code{-1} which reverses the -scale. Argument is passed on to the function `ggplot2::sclae_colour_brewer()` -as argument `direction`.} +scale. Argument is passed on to the function \code{ggplot2::sclae_colour_brewer()} +as argument \code{direction}.} \item{spc_id}{Character vector denoting column name for a unique spectrum ID. Default is \code{"unique_id"}.} @@ -51,8 +65,8 @@ numbers. Default is \code{TRUE}.} specified by \code{group_id}.} \item{group_color_palette}{Character (1L) defining the diverging colour -scales from colorbrewer.org; see `?scale_colour_brewer` for supported -diverging colur types (`palette` argument).} +scales from colorbrewer.org; see \code{?scale_colour_brewer} for supported +diverging colur types (\code{palette} argument).} \item{group_panel}{Logical defining whether spectra are arranged into panels by groups specified in \code{group_id}. Default is \code{TRUE}.} diff --git a/man/preprocess_spc.Rd b/man/preprocess_spc.Rd index 9502585..99e9a57 100644 --- a/man/preprocess_spc.Rd +++ b/man/preprocess_spc.Rd @@ -4,8 +4,7 @@ \alias{preprocess_spc} \title{Preprocess spectra} \usage{ -preprocess_spc(spc_tbl, select, column_in = "spc_mean", - custom_function = NULL) +preprocess_spc(spc_tbl, select, column_in = "spc_mean", custom_function = NULL) } \arguments{ \item{spc_tbl}{Tibble that contains spectra to be preprocessed within @@ -24,7 +23,7 @@ in \code{select}.} \item{custom_function}{A character string of a custom processing function that is later parsed (produces expression in a list) and evaluated within - the function \code{preprocess_spc}. +the function \code{preprocess_spc}. The character vector argument of \code{custom_function} needs to contain \code{"spc_raw"}, which is the single data table of spectra that results from binding a list of data.tables (spectra to preprocess) diff --git a/man/read_asd.Rd b/man/read_asd.Rd index a6e99a9..e270644 100644 --- a/man/read_asd.Rd +++ b/man/read_asd.Rd @@ -14,7 +14,7 @@ remaining columns are sample spectra referred by an ID name provided in the first row of these columns.} } \value{ -Spectra data in tibble data frame (class `tbl_df`) that contains +Spectra data in tibble data frame (class \code{tbl_df}) that contains columns \code{sample_id} (derived from 2nd and following column names of tab delimited ASD exported text file), \code{spc} (list-column of spectral matrices) diff --git a/man/read_opus_univ.Rd b/man/read_opus_univ.Rd index 282c73f..7faad1e 100644 --- a/man/read_opus_univ.Rd +++ b/man/read_opus_univ.Rd @@ -16,7 +16,7 @@ Possible values are: "spc" (AB block in Bruker Opus software), "spc_nocomp" correction has been set in Opus), "ScSm" (Single channel spectrum of the sample), "ScRf" (Single channel spectrum of the sample), "IgSm" (Interferogram of the sample), "IgRf" (Interferogram of the reference). Default is - \code{extract = c("spc")}.} +\code{extract = c("spc")}.} \item{parallel}{Logical (\code{TRUE} or \code{FALSE} indicating whether files are read in parallel (multiple processors or multiple cores)). diff --git a/man/remove_outliers.Rd b/man/remove_outliers.Rd index ab17744..26a6582 100644 --- a/man/remove_outliers.Rd +++ b/man/remove_outliers.Rd @@ -19,20 +19,20 @@ If \code{rm = FALSE}, there will be no outlier removal} \value{ Returns list \code{spectra_out} that contains: \itemize{ - \item \code{MIR_mean}: Outlier removed MIR spectra as - data.frame object. If \code{remove = FALSE}, - the function will - return almost identical list identical to \code{list_spectra}, - except that the first \code{indices} column of the spectral - data frame \code{MIR_mean} is removed - (This is done for both options - \code{remove = TRUE} and \code{remove = FALSE}). - \item \code{data_meta}: metadata data.frame, identical - as in the \code{list_spectra} input list. - \item \code{plot_out}: (optional) ggplot2 graph - that shows all spectra (absorbance on x-axis and wavenumber - on y-axis) with outlier marked, if - \code{remove = TRUE}. +\item \code{MIR_mean}: Outlier removed MIR spectra as +data.frame object. If \code{remove = FALSE}, +the function will +return almost identical list identical to \code{list_spectra}, +except that the first \code{indices} column of the spectral +data frame \code{MIR_mean} is removed +(This is done for both options +\code{remove = TRUE} and \code{remove = FALSE}). +\item \code{data_meta}: metadata data.frame, identical +as in the \code{list_spectra} input list. +\item \code{plot_out}: (optional) ggplot2 graph +that shows all spectra (absorbance on x-axis and wavenumber +on y-axis) with outlier marked, if +\code{remove = TRUE}. } } \description{ diff --git a/man/select_spc_vars.Rd b/man/select_spc_vars.Rd index bdab462..4788486 100644 --- a/man/select_spc_vars.Rd +++ b/man/select_spc_vars.Rd @@ -24,7 +24,7 @@ column name as symbols or 1L character vector.} \item{every}{Every n-th spectral positions to keep as 1L integer vector.} } \value{ - +a spectral tibble } \description{ Select every n-th spectral variable for all spectra and x-values in spectral diff --git a/man/slice_xvalues.Rd b/man/slice_xvalues.Rd index 8546d3e..dc28dd4 100644 --- a/man/slice_xvalues.Rd +++ b/man/slice_xvalues.Rd @@ -4,8 +4,12 @@ \alias{slice_xvalues} \title{Slice spectra into defined x-axis ranges} \usage{ -slice_xvalues(spc_tbl, xunit_lcol = "wavenumbers", spc_lcol = "spc", - xvalues_cut = NULL) +slice_xvalues( + spc_tbl, + xunit_lcol = "wavenumbers", + spc_lcol = "spc", + xvalues_cut = NULL +) } \arguments{ \item{spc_tbl}{Spectral data in a tibble object (classes "tibble_df", "tbl"