Add first locally developed version of simplerspec package to github

8 years ago · 7ddbe3ec0e
320 changed files with 491036 additions and 0 deletions
--- a/.Rbuildignore
+++ b/.Rbuildignore
@ -0,0 +1,2 @@
 ^.*\.Rproj$
 ^\.Rproj\.user$
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
 .Rproj.user
 .Rhistory
 .RData
 .Ruserdata
--- a/27
+++ b/27
@ -0,0 +1,27 @@
 Package: simplerspec
 Type: Package
 Title: Soil and plant spectroscopic model building and prediction
 Depends: R (>= 3.2)
 Imports:
    ggplot2 (>= 2.0.0),
    plyr,
    data.table,
    reshape2,
    mvoutlier,
    hexView,
    Rcpp,
    hyperSpec,
    prospectr,
    dplyr,
    caret,
    tidyr
 Version: 0.1.0
 Authors@R: person("Philipp", "Baumann",
  email = "baumanph@student.ethz.ch", role = c("aut", "cre"))
 Description: Functions that cover
    reading of spectral data, outlier removal,
    spectral preprocessing, calibration sampling, PLS regression
    using caret, and model diagnostic statistics and plots.
 License: GPL-2
 LazyData: TRUE
 RoxygenNote: 5.0.1
--- a/23
+++ b/23
@ -0,0 +1,23 @@
 # Generated by roxygen2: do not edit by hand
 export(average_spectra)
 export(do_pretreatment)
 export(evaluate_pls_q)
 export(fit_pls)
 export(fit_pls_q)
 export(join_chem_spec)
 export(ken_stone)
 export(pls_ken_stone)
 export(predict_from_spectra)
 export(readOPUS)
 export(readOPUS_bin)
 export(readOPUS_text)
 export(read_spectra)
 export(remove_outliers)
 export(resample_spectra)
 export(summary_df)
 export(tune_model)
 export(tune_model_q)
 import(Rcpp)
 import(data.table)
 import(hyperSpec)
--- a/R/average-spectra.R
+++ b/R/average-spectra.R
@ -0,0 +1,85 @@
 # Helper function written by Antoine Stevens that
 # is used for averaging replication scans of one sample
 #' @import data.table
 #' @import Rcpp
 by_spc <- function(spc, indices, fun = mean){
  # Fast summary of spectral data
  # spc = spectral matrix
  # indices  = factor variable used to summarize data
  # fun = summary function
  # Avoid NOTE "no visible binding for global variable '.SD'"
  # when checking package by devtools::check()
  # .SD <- NULL
  spc <- data.table::data.table(indices, spc, check.names = F)
  if(is.null(ncol(indices))){
    x <- 1
  } else {
    x <- ncol(indices)
  }
  as.data.frame(spc[, lapply(.SD, fun),
    by = eval(names(spc)[1:x])])
 }
 #' @title Calculate mean of spectra
 #' @description Calculate the mean of each spectral repetitions
 #' (absorbance average per wavenumber)
 #' @param in_spectra List that contains spectral data in the
 #' element \code{MIR} (data.frame) and sample metadata in the
 #' list element \code{data_rep} (data.frame).
 #' The data.frame \code{data_meta}
 #' contains the sample ID stored in the \code{ID}
 #' vector (originally from spectral file names),
 #' country abbreviation stored in \code{contry} (2 letters),
 #' and the vector \code{site} (2 letters) that is the country
 #' abbreviation.
 #' @return \code{out_spectra}: List that contains:
 #' \itemize{
 #'  \item \code{data_meta}: metadata of sample (data.frame)
 #'  that is
 #'  taken from the element \code{rep} of the input list argument
 #'  \code{in_spectra}
 #'  \item \code{MIR_mean}: average spectra from replicates of
 #'   sample ID
 #'  (data.frame)
 #'  \item \code{MIR_sd}: standard deviation of spectra calculated
 #'  from replicates of sample ID (data.frame)
 #'  \item \code{cvar} coefficient of variance over all
 #'  wavenumbers of spectra
 #'  calculated from replicates of sample ID (vector)
 #' }
 #' @export
 average_spectra <- function(in_spectra) {
  # Compute mean per sample with by_spc,
  # provided by Antoine
  # spc = spectral data
  # indices = character vector(s) or factor(s) to group the rows
  # by fun = summary function
  # Also compute the standard deviation (SD)
  # of the three measurements
  # Identify samples in which the spectrum has SD higher than 1.5
  # and need to be re-scanned - ?
  MIR <- NULL
  data_rep <- NULL
  ID <- NULL
  MIR_mean <- by_spc(spc = in_spectra$MIR[, ],
    indices = in_spectra$data_rep$ID[], fun = mean)
  MIR_sd <- by_spc(spc = in_spectra$MIR[, ],
    indices = in_spectra$data_rep$ID[], fun = sd)
  # MIR_sd[order(rowMeans(MIR_sd[, -1])), 1]
  # rowMeans(MIR_sd[, -1])[order(rowMeans(MIR_sd[, -1]))] /
  # rowMeans(MIR_mean[, -1])[order(rowMeans(MIR_sd[, -1]))]
  # compute the coefficient of variation
  cvar <- rowMeans(MIR_sd[, -1])/rowMeans(MIR_mean[, -1])
  # add metadata for each sample; take strings of rownames
  # from spectra; first two characters of the sample_ID code
  # is country, character pos. 4 and 5 is site abbreviation
  data_meta <- data.frame(ID = MIR_mean[, 1])
  data_meta <- cbind(data_meta,
    country = substring(data_meta$ID, first = 1, last = 2),
    site = substring(data_meta$ID, first = 4, last = 5)
  )
  out_spectra <- list(data_meta = data_meta,
    MIR_mean = MIR_mean,
    MIR_sd = MIR_sd,
    cvar = cvar)
  return(out_spectra)
 }
--- a/R/join-chem-spectra.R
+++ b/R/join-chem-spectra.R
@ -0,0 +1,34 @@
 ## Join chemical and spectral data ==============================
 #' @title Join chemical and spectral data frames
 #' @description Combines spectral data (data.frame) and chemical
 #' data (data.frame).
 #' @param dat_chem data.frame that contains chemical values of
 #' the sample
 #' @param dat_spec List that contains spectral data
 #' @return List: xxx
 #' @param by character of column name that defines sample_ID
 #' @export
 join_chem_spec <- function(
  dat_chem, dat_spec, by = "sample_ID") {
  # Alternative when "no visible binding for global variable":
  data_meta <- MIR <- MIR0 <- ori <- MIR_mean <- NULL
  # http://stackoverflow.com/questions/23475309/in-r-is-it-possible-to-suppress-note-no-visible-binding-for-global-variable
  # Replace sample_ID by ID
  if(!is.data.frame(dat_chem)) {
    stop(dat_chem, "needs to be a data.frame", call. = FALSE)
  } else {
  colnames(dat_chem)[colnames(dat_chem) == by] <- "ID"
  dat_chem$ID <- as.factor(dat_chem$ID)
  # Select only chemical data that have no outlier spectra
  dat_chem <- dat_chem[dat_spec$data_meta$ID, ]
  ID <- as.factor(dat_spec$data_meta$ID)
  # Join ref analyses
  MIRdata <- data.frame(ID = ID)
  MIRdata$MIR <- dat_spec$MIR0
  MIRdata$ori <- dat_spec$MIR_mean
  # Joining by ID, type = "inner"
  MIRdata_chem <- plyr::join(dat_chem, MIRdata, type = "inner")
  # before dplyr::inner_join(dat_chem, MIRdata)
  MIRdata_chem
  }
 }
--- a/R/load-spectra-yamsys.R
+++ b/R/load-spectra-yamsys.R
@ -0,0 +1,71 @@
 ## Function 1: Read spectra in text form ========================
 #' @title Read an OPUS text file and extract metadata
 #' @description
 #' Read single text file acquired with
 #' an Bruker Vertex FTIR Instrument
 #' (as exported from OPUS software) and extract sample metadata
 #' provided in the filename
 #' @usage
 #' read_spectra(path)
 #' @param path character of the directory
 #' where the spectral text files are stored
 #' @return
 #' List that contains the following elements:
 #' \itemize{
 #'  \item \code{MIR}: data.frame that contains all the spectra.
 #'  The columns of \code{MIR} contain absorbance values at
 #'  different wavenumber in the MIR range. The wavenumbers
 #'  rounded to 0.1 are given as column names. The original file
 #'  names are stored as row names. One line in the data frame
 #'  \code{MIR} contains one replicate scan of a sample.
 #'  \item \code{data_rep}: data.frame that constists of sample
 #'  metadata that was extracted from the file name of
 #'  individual spectral files. The first vector \code{ID}
 #'  contains the spectral file name without the repetition number
 #'  supplied as \code{.<number>} in the file name.
 #'  Letters 1 to 2 of the spectral
 #'  file name are used for the country abbreviation, stored
 #'  as in the \code{} vector \code{data_rep} . Letters
 #'  4 to 5 of the file name are used for the landscape (site)
 #'  abbreviation.
 #' }
 #' @note: This function is derived from  a re-factored and
 #' simplified version of the \code{read.opus} function from the
 #' \sQuote{soil.spec} package for reading OPUS VERTEX files
 #' The function should also work for other OPUS files (eg alpha),
 #' see \code{read.opus}. The function readOPUS() was
 #' written by Antoine Stevens.
 #' @export
 read_spectra <- function(path){
  # Needs various utilities for spectral processing that Antoine
  # put on github
  ID <- NULL
  # Load the MIR data exported from OPUS to txt files
  # List files in the directory
  lf <- list.files(path, full.names = TRUE)
  # Read files into R with readOPUS()
  # (comes from the github file)
  MIR <- readOPUS(lf, in_format = "txt")
  # Wavenumber, from 3996.4 to 599.8 cm-1
  colnames(MIR) <- round(as.numeric(colnames(MIR)), 1)
  # Remove the txt extension
  rownames(MIR) <- sub("\\.txt", "", row.names(MIR))
  # Prepare a dataset with ID,
  # Extract country with substring
  # and repetition
  # (ID = character before the dot; rep = number after the dot)
  data_rep <- data.frame(ID = sub("(.+)\\.[[:digit:]]+$", "\\1",
    row.names(MIR)),
    rep = as.numeric(sub(".+\\.([[:digit:]])+$", "\\1",
      row.names(MIR)))
  )
  data_rep <- cbind(data_rep,
    country = substring(data_rep$ID, first = 1, last = 2),
    site = substring(data_rep$ID, first = 4, last = 5)
  )
  list_spectra <- list(
    MIR = MIR,
    data_rep = data_rep
  )
  return(list_spectra)
 }
--- a/R/load-spectra.R
+++ b/R/load-spectra.R
@ -0,0 +1,310 @@
 ## Soil spectroscopy related functions that were compiled by
 ## Antoine Stevens ==============================================
 #' @title Read an OPUS text file
 #' @description
 #' Read single text file acquired with
 #' an Bruker Vertex FTIR Instrument
 #' (as exported from OPUS software)
 #' @param file.name Character vector with path to files
 #' @usage readOPUS_text(file.name)
 #' @export
 readOPUS_text <- function(file.name){
  if (file.exists(file.name)) {
    out <- read.csv(file.name, header=F,
      col.names = c("wavenumber", "absorbance")
    )
    return(out)
  } else {
    warning(paste("File", file.name, "does not exist"))
  }
 }
 #' @title Read an OPUS binary file
 #' @description
 #' Read single binary file acquired with an
 #' Bruker Vertex FTIR Instrument
 #' @param file.name Character vector with path to files
 #' @usage readOPUS_bin(file.name)
 #' @export
 readOPUS_bin <- function(file.name){
  size <- fileRaw <- NULL
  if (file.exists(file.name)) {
    try(
      pa <- hexView::readRaw(file.name, offset = 0,
        nbytes = file.info(file.name)$size, human = "char",
        size = 1, endian = "little"), silent = TRUE)
    if (!class(.Last.value)[1] == "try-error") {
      pr <- pa$fileRaw
      # Get source of instrument
      ins <- grepRaw("INS", pr, all = TRUE)
      ins <- hexView::readRaw(
        file.name, offset = ins[length(ins)] + 7,
        nbytes = 3, human = "char", size = 1, endian = "little"
      )
      ins <- hexView::blockString(ins)
      # Get source of infrared to know if NIR or MIR
      src <- grepRaw("SRC", pr, all = TRUE)
      src <- hexView::readRaw(
        file.name, offset = src[length(src)] + 4,
        nbytes = 3, human = "char", size = 1, endian = "little"
      )
      src <- hexView::blockString(src)
      instr.range <- tolower(paste(ins, src, sep = "-"))
      # Get Beam Splitter
      bms <- grepRaw("BMS", pr, all = TRUE)
      bms <- hexView::readRaw(
        file.name, offset = bms[length(bms)] + 4,
        nbytes = 4, human = "char", size = 1, endian = "little"
        )
      bms <- hexView::blockString(bms)
      z <- grepRaw("ZFF", pr, all = TRUE)[1] + 5
      re <- grepRaw("RES", pr, all = TRUE)[1] + 5
      snm <- grepRaw("SNM", pr, all = TRUE)[1] + 7
      lwn <- grepRaw("LWN", pr, all = TRUE)[1] + 7
      fx <- grepRaw("FXV", pr, all = TRUE)[3] + 7
      lx <- grepRaw("LXV", pr, all = TRUE)[3] + 7
      npt0 <- grepRaw("NPT", pr, all = TRUE)[2] + 3
      npt1 <- grepRaw("NPT", pr, all = TRUE)[3] + 7
      mxy <- grepRaw("MXY", pr, all = TRUE)[1] + 7
      mny <- grepRaw("MNY", pr, all = TRUE)[3] + 7
      end <- grepRaw("END", pr, all = TRUE) + 11
      dat <- grepRaw( "DAT", pr, all = TRUE)[1] + 7
      tim <- grepRaw("TIM", pr, all = TRUE) + 11
      # calculate end and start of each block
      offs <- end[5:10]
      byts <- diff(offs)
      ZFF <- hexView::readRaw(file.name, offset = z, nbytes = 4,
        human = "int", size = 2)[[5]][1]
      RES <- hexView::readRaw(file.name, offset = re, nbytes = 4,
        human = "int", size = 2)[[5]][1]
      snm.lab.material <- hexView::blockString(
        hexView::readRaw(file.name, offset = snm, nbytes = 22,
          human = "char", size = 1, endian = "little")
      )
      if (!nzchar(snm.lab.material)) {
        SSN <- ""
        Material <- ""
        warning("Product name not found inside OPUS file...")
      }
      else {
        if (!length(grep(snm.lab.material, pattern = ";")) == 0) {
          snm.lab.material <- as.vector(
            strsplit(snm.lab.material, ";")
            )[[1]]
          SSN <- paste0(snm.lab.material[2], snm.lab.material[1])
          Material <- snm.lab.material[3]
        }   else {
          if (!length(grep(snm.lab.material, pattern = "_")) == 0) {
            # Don't remove "_" from unique id SSN (@baumann)
            # SSN <- sub("_", "", snm.lab.material)
            SSN <- snm.lab.material
            Material <- ""
          } else {
            if (!length(snm.lab.material) == 0) {
              SSN <- snm.lab.material
              Material <- ""
            }
          }
        }
      }
      # Set three SSN first three characters to lower
      # Don't convert to lowercase
      # SSN <- paste0(tolower(substr(SSN, 1, 3)),
      #  substr(SSN, 4, 20))
      Scandate <- hexView::blockString(
        hexView::readRaw(file.name, offset = dat,
        nbytes = 10, human = "char", size = 1,
        endian = "little")
      )
      Scantime <- hexView::blockString(
        hexView::readRaw(file.name,
        offset = tim[2] - 4, nbytes = 8, human = "char",
        size = 1, endian = "little")
      )
      Scandate <- paste(Scandate, Scantime)
      LWN <- hexView::readRaw(
        file.name, offset = lwn, nbytes = 8,
        human = "real", size = 8)[[5]][1]
      # Combine the above parameters
      spectrum.meta <- c(SSN, Material, Scandate, ZFF, RES, LWN)
      # Get number of data points for each spectra data block
      NPT0 <- hexView::readRaw(
        file.name, offset = npt0, nbytes = 12,
        human = "int", size = 4)[[5]][2]
      NPT1 <- hexView::readRaw(
        file.name, offset = npt1, nbytes = 4,
        human = "int", size = 4)[[5]][1]
      # fxv:	Frequency of first point
      fxv <- hexView::readRaw(
        file.name, offset = fx, nbytes = 16,
        human = "real", size = 8)[[5]][1]
      # lxv:	Frequency of last point
      lxv <- hexView::readRaw(
        file.name, offset = lx, nbytes = 16,
        human = "real", size = 8)[[5]][1]
      # Read all through all the data blocks inside the OPUS file
      nbytes1 <- NPT0 * 4 # initial parameters
      nbytes.f <- NPT1 * 4
      if (offs[1] < 2000) {
        offs.f <- offs[3]
        nbytes.f <- NPT1 * 4
        wavenumbers <- rev(seq(lxv, fxv, (fxv - lxv)/(NPT1 - 1)))
      }
      else if (offs[1] > 20000) {
        offs.f <- offs[2]
        nbytes.f <- NPT1 * 4
        wavenumbers <- rev(seq(lxv, fxv, (fxv - lxv)/(NPT1 - 1)))
      } else { # for vert-MIR
        offs.f <- 7188
        nbytes.f <- NPT0 * 4
        lxv <- hexView::readRaw(
          file.name, offset = 8768, nbytes = 16,
          human = "real", size = 8)[[5]][1]
        fxv <- hexView::readRaw(
          file.name, offset = 8752, nbytes = 16,
          human = "real", size = 8)[[5]][1]
        wavenumbers <- rev(seq(lxv, fxv, (fxv - lxv)/(NPT0 - 1)))
      }
      spectra <- hexView::readRaw(file.name, width = NULL,
        offset = offs.f - 4, nbytes = nbytes.f, human = "real", # needs to be -4 according to soil.spec function
        size = 4, endian = "little")[[5]]
      # File name
      file_name <- sub(".+/(.+)", "\\1", file.name)
      # Create date_time object
      date_time <- as.POSIXct(spectrum.meta[3],
        format = "%d/%m/%Y %H:%M:%S ")
      # Create unique_id using file_name and time
      ymd_id <- format(date_time, "%Y%m%d")
      unique_id <- paste0(file_name, "_", ymd_id)
      # Add sample_id: remove extension .0, .1 etc. from OPUS files
      sample_id <- sub("(.+)\\.[[:digit:]]+$", "\\1", file_name)
      # Extract repetition number (rep_no) from file name
      rep_no <- sub(".+\\.([[:digit:]])+$", "\\1", file.name)
      # Convert spectra to matrix and add dimnames (wavenumbers for columns
      # and unique_id for rows)
      spc_m <- matrix(spectra, ncol = length(spectra), byrow = FALSE)
      rownames(spc_m) <- unique_id
      colnames(spc_m) <- round(wavenumbers, 1)
      out <- list(
        metadata = data.frame(
          unique_id = unique_id,
          scan_id = file_name, # changed file_name to scan_id in output list
          sample_id = sample_id,
          rep_no = rep_no,
          date_time = date_time,
          sample_info = spectrum.meta[1],
          instrument_name = instr.range,
          resolution = spectrum.meta[5],
          bms = bms,
          lwn = spectrum.meta[6]
          ),
        spc = spc_m,
        wavenumbers =  wavenumbers
      )
      # names(out)[-c(1:9)] <- as.character(round(wavenumbers, 1))
      return(out)
    }
  } else {
    warning(paste("File", file.name, "does not exist"))
  }
 }
 #' @title Read OPUS binary and ASCII files
 #' @description
 #' Read single or multiple binary and ASCII files acquired with
 #' an Bruker Vertex FTIR Instrument
 #' @usage
 #' readOPUS(fnames, in_format, out_format)
 #' @param fnames character \code{vector} of the name(s)
 #' (with absolute path) of the file(s) to read
 #' @param in_format format of the input file: \code{'binary'} or
 #' \code{'txt'}
 #' @param out_format format of the output:
 #' \code{'matrix'} (default) or \code{'list'} (see below)
 #' @return
 #' if \code{out_format} = \code{'matrix'}, absorbance values
 #' of the input file(s) in a single \code{matrix}.
 #'
 #' if \code{out_format} = \code{'list'}, a \code{list} of the
 #' input file(s) data consisting of a \code{list} with components:
 #' \itemize{
 #'  \item{\code{Name}}{ name of the file imported}
 #'  \item{\code{datetime}}{ date and time of acquisition in
 #'  \code{POSIXct} format (available only when
 #'  \code{in_format} = 'binary')}
 #'  \item{\code{metadata}}{ \code{list} with information
 #'  on instrument configuration (available only when
 #'  \code{in_format} = 'binary')}
 #'  \item{\code{absorbance}}{  a numeric \code{vector}
 #'  of absorbance values}
 #'  \item{\code{wavenumbers}}{ numeric \code{vector}
 #' of the band positions}
 #' }
 #' @author Antoine Stevens and Andrew Sila (soil.spec package)
 #' @note
 #' This is essentially a re-factored and simplified version of
 #' the \code{read.opus} function from the
 #' \sQuote{soil.spec} package for reading OPUS VERTEX files
 #' The function should also work for other OPUS files (eg alpha),
 #' see \code{read.opus}.
 #' @export
 readOPUS<- function(fnames, in_format = c("binary", "txt"),
  out_format = c("matrix", "list")) {
  # hexView and plyr are required
  wavenumbers <- NULL
  absorbance <- NULL
  in_format <- match.arg(in_format)
  out_format <- match.arg(out_format)
  spc <- vector("list", length(fnames))
  i <- 1
  for (file.name in fnames) {
    if (in_format == "binary") {
      spc[[i]] <- readOPUS_bin(file.name)
    } else {
      spc[[i]] <- readOPUS_text(file.name)
    }
    i <- i + 1
  }
  names(spc) <- sub(".+/(.+)(\\.txt)?$", "\\1", fnames)
  if (out_format == "matrix") {
    test <- sapply(spc, function(x) class(x) != "character")
    # warning(
    # paste0(paste(names(spc)[!test], collapse = ","),
    # " do not exist")
    # )
    spc <- spc[test]
    if(in_format == "binary"){
      spc <- do.call(plyr::rbind.fill, lapply(spc, function(x){
        x <- t(data.frame(
          wav = x$wavenumbers, absorbance = x$absorbance))
        colnames(x) <- x[1,]
        data.frame(x[2, , drop = F], check.names = F)}))
    } else {
      spc <- do.call(plyr::rbind.fill, lapply(spc, function(x) {
        x <- t(x)
        colnames(x) <- x[1,]
        data.frame(x[2, , drop = F], check.names = F)}))
    }
    rownames(spc) <- sub(".+/(.+)(\\.txt)?$", "\\1", fnames)
  }
  return(spc)
 }
--- a/R/pls-modeling.R
+++ b/R/pls-modeling.R
@ -0,0 +1,466 @@
 # Perform calibration sampling based on spectral PCA ------------
 #' @title Split
 #' @description Perform calibration sampling based on
 #' the Kennard-Stones algorithm.
 #' @param spec_chem data.frame that contains chemical
 #' and IR spectroscopy data
 #' @param ratio_val Ratio of number of validation and all samples.
 #' @param pc Number of principal components (numeric)
 #' @param print logical expression weather calibration
 #' @param validation Logical expression weather
 #' calibration sampling is performed
 #' (\code{TRUE} or \code{FALSE}).
 #' @usage ken_stone(spec_chem, ratio_val, pc, print = TRUE,
 #' validation = TRUE)
 #' @export
 ken_stone <- function(spec_chem, ratio_val, pc,
  print = TRUE, validation = TRUE) {
  MIR <- model <- type <- PC1 <- PC2 <- NULL
  # Now with a real dataset
  # k = number of samples to select
  # pc = if provided, the number of principal components
  # (see ?kenStone)
  if(validation == TRUE) {
    # pc = 0.99 before !!!
    sel <- prospectr::kenStone(X = spec_chem$MIR,
      k = round(ratio_val * nrow(spec_chem)), pc = 2)
    sel$model # The row index of calibration samples
    # plot(sel$pc[, 1:2], xlab = 'PC1', ylab = 'PC2')
    # Points selected for calibration
    # points(sel$pc[sel$model, 1:2], pch = 19, col = 2)
    # Plot samples selected for calibration in ggplot
    sel_df_cal <- data.frame(sel$pc[- sel$model,1:2])
    sel_df_cal$type <- as.factor(
      rep("calibration", nrow(sel_df_cal))
    )
    sel_df_val <- data.frame(sel$pc[sel$model, 1:2])
    sel_df_val$type <- as.factor(
      rep("validation", nrow(sel_df_val)))
    sel_df <- rbind(sel_df_cal, sel_df_val)
    # Compute ratio needed to make the figure square
    ratio <- with(sel_df, diff(range(PC1))/diff(range(PC2)))
    p_pc <- ggplot2::ggplot(data = sel_df) +
      ggplot2::geom_point(
        ggplot2::aes(x = PC1, y = PC2, shape = type), size = 4) +
      ggplot2::coord_fixed(ratio = 1) +
      ggplot2::scale_shape_manual(values=c(1, 19)) +
      ggplot2::scale_colour_manual(values=c("black", "red")) +
      ggplot2::theme_bw()
      # ggplot2::theme.user +
      ggplot2::theme(legend.title = ggplot2::element_blank())
    # print(p_pc)
    # Split MIR data into calibration and validation set using
    # the results of Kennard-Stone Calibration Sampling
    # Selct by row index of calibration samples
    val_set <- spec_chem[sel$model, ]
    # Check number of observations (rows) for validation set
    nrow(val_set)
    cal_set <- spec_chem[- sel$model, ]
    list_out <- list(
      calibration = cal_set,
      validation = val_set,
      p_pc = p_pc
    )
    list_out
    # Check number of observations (rows) for calibration set
    # nrow(cal_set)
  } else {
    cal_set <- spec_chem
    list(calibration = cal_set)
  }
 }
 #' @title Perform model tuning
 #' @description Uses function from caret to to model tuning
 #' for PLS regression.
 #' @param x list from calibration sampling
 #' @param variable response variable for PLS regression, supplied
 #' as character expression
 #' @param validation Logical expression weather an independent
 #' validation is performed.
 #' @param env Environment where function is evaluated
 #' @export
 tune_model_q <- function(x, variable,
  env = parent.frame(), validation = TRUE) {
  calibration <- NULL
  # List of calibration and validation samples
  # set up a cross-validation scheme
  # create 10 folds that we will keep for the different
  # modeling approaches to allow comparison
  # randomly break the data into 10 partitions
  # note that k is the total number of samples for leave-one-out
  # use substitute function to make non-standard evaluation
  # of variable argument (looks at a function as argument,
  # sees code used to compute value;
  # see chapter 13.1 Capturing expressions
  # in Advanced R (Hadley Wickham)
  # !! p. 270
  r <- eval(variable, x$calibration, env)
  idx <- caret::createFolds(y = r, k = 10, returnTrain = T) # update ***
  idx
  # inject the index in the trainControl object
  tr_control <- caret::trainControl(method = "cv", index = idx,
  savePredictions = T)
  if (validation == TRUE) {
  tr_control
 } else {
  tr_control
 }
 }
 #' @title Perform model tuning
 #' @description Uses function from caret to to model tuning
 #' for PLS regression.
 #' @param x list from calibration sampling
 #' @param variable response variable for PLS regression, supplied
 #' as character expression
 #' @param validation Logical expression weather an independent
 #' validation is performed.
 #' @param env Environment where function is evaluated
 #' @export
 tune_model <- function(x, variable,
  env = parent.frame(), validation = TRUE) {
  tune_model_q(x, substitute(variable), env)
 }
 # Fit a PLS regression model using the caret package ------------
 #' @title Fit a PLS regression model
 #' (quoted version of the function)
 #' @description Uses the caret package to perform PLS modeling.
 #' Spectra are centered and scaled prior to modeling.
 #' @param x List that contains calibration
 #' set, validation set, and model tuning options
 #' @param validation Logical expression weather independent
 #' validation is performed
 #' @param variable Response variable to be modeled
 #' @param tr_control Object that defines controlling parameters
 #' of the desired internal validation framework
 #' @param env Environment where function is evaluated
 #' @export
 fit_pls_q <- function(x, validation = TRUE,
  variable, tr_control, env = parent.frame()) {
 # Fit a partial least square regression (pls) model
 # center and scale MIR (you can try without)
  calibration <- MIR <- NULL
  v <- eval(variable, x$calibration, env)
  if (validation == TRUE) {
  pls_model <- caret::train(x = x$calibration$MIR, y = v,
    method = "pls",
    tuneLength = 20,
    trControl = tr_control,
    preProcess = c("center", "scale")
    )
  } else {
    pls_model <- caret::train(x = x$calibration$MIR, y = v,
      method = "pls",
      tuneLength = 20,
      trControl = tr_control,
      preProcess = c("center", "scale")
    )
  }
  # Collect fitted object into a list
  # fitList_cal <- list(pls = fit_pls)
  # fitList_cal
  pls_model
 }
 #' @title Fit a PLS regression model
 #' @description Uses the caret package to perform PLS modeling.
 #' Spectra are centered and scaled prior to modeling.
 #' @param x List that contains calibration
 #' set, validation set, and model tuning options
 #' @param validation Logical expression weather independent
 #' validation is performed
 #' @param variable Response variable to be modeled
 #' @param env Environment where function is evaluated
 #' @export
 fit_pls <- function(x, validation = TRUE,
  variable, env = parent.frame()) {
  fit_pls_q(x = x, validation = TRUE,
    variable = substitute(variable), env
  )
 }
 # Evaluate PLS performance (validation and cross-validation) ----
 #' @title Evaluate PLS performance
 #' @description Calculate model performance indices based
 #' on observed and predicted values of validation and calibration
 #' set, and internal cross-validation
 #' @param x List that contains calibration and validation data
 #' frame with combined spectral and chemical data
 #' @param pls_model List with PLS regression model output from
 #' the caret package
 #' @param variable Response variable (e.g. chemical property) to be
 #' modelled (needs to be non-quoted expression). \code{variable}
 #' needs to be a column name in the \code{validation} data.frame
 #' (element of \code{x})
 #' @param validation Logical expression if independent validation
 #' is performed (split data set into calibration set and
 #' validation set)
 #' @param print Print observed vs. predicted for calibration
 #' and validation. Default is \code{TRUE}.
 #' @param env Specifiy the environment in which the function is
 #' called. Default argument of \code{env} is
 #' \code{parent.frame()}
 #' @export
 evaluate_pls_q <- function(x, pls_model, variable,
  validation = TRUE, print = TRUE, env = parent.frame()) {
  # Set global variables to NULL to avoid R CMD check notes
  MIR <- object <- model <- dataType <- obs <- pred <- NULL
  ncomp <- finalModel <- rmsd <- r2 <- r2 <- rpd <- n <- NULL
  rmse <- calibration <- NULL
  # Collect fitted object into a list
  list_models <- list(pls = pls_model)
  # Extract best tuning parameters and associated cv predictions
  if(validation == TRUE) {
    predobs_cal <- plyr::ldply(list_models,
      function(x) plyr::match_df(x$pred, x$bestTune),
      .id = "model"
    )
    # Calculate training (calibration) and test (validation) data
    # predictions based on pls model with calibration data
    v <- eval(variable, x$validation, env)
    predobs_val <- caret::extractPrediction(list_models,
      testX = x$validation$MIR, testY = v) # update ***
    # Create new data frame column <object>
    predobs_val$object <- predobs_val$model
    # Replace levels "Training" and "Test" in dataType column
    # by "Calibration" and "Validation" (rename levels of factor)
    predobs_val$dataType <- plyr::revalue(predobs_val$dataType,
      c("Test" = "Validation", "Training" = "Calibration")
    )
    # Change the order of rows in the data frame
    # Calibration as first level (show Calibration in ggplot graph
    # on left panel)
    predobs_val$dataType <- factor(predobs_val$dataType,
      levels = c("Calibration", "Validation"))
    # Calculate model performance indexes by model and dataType
    # uses package plyr and function summary.df of SPECmisc.R
    stats <- plyr::ddply(predobs_val, c("model", "dataType"),
      function(x) summary_df(x, "obs", "pred")
    )
  } else {
    # Extract best tuning parameters and associated cv predictions
    predobs_cv <- plyr::ldply(list_models,
      function(x) plyr::match_df(x$pred, x$bestTune),
      .id = "model"
    )
    # Extract auto-prediction
    predobs <- caret::extractPrediction(list_models)
    predobs_cv$object <- predobs_cv$model
    predobs_cv$dataType <- "Cross-validation"
    predobs_cv <- dplyr::select(
      predobs_cv, obs, pred, model, dataType, object
    )
    predobs_val <- rbind(predobs, predobs_cv)
    stats <- plyr::ddply(predobs_val, c("model", "dataType"),
      function(x) summary_df(x, "obs", "pred")
    )
  }
  # Add number of components to stats; from finalModel list item
  # from train() function output (function from caret package)
  stats$ncomp <- rep(pls_model$finalModel$ncomp, nrow(stats))
  # Add range of observed values for validation and calibraton
  # get range from predicted vs. observed data frame
  # stored in object predobs
  obs_cal <- subset(predobs_val, dataType == "Calibration")$obs
  obs_val <- subset(predobs_val, dataType == "Validation")$obs
  # Get name of predicted variable; see p. 261 of book
  # "Advanced R" (Hadley Wickham)
  variable_name <- deparse(variable)
  # before: deparse(substitute(variable))
  df_range <- data.frame(
    variable = rep(variable_name, 2),
    dataType = c("Calibration", "Validation"),
    min_obs = c(range(obs_cal)[1], range(obs_val)[1]),
    median_obs = c(median(obs_cal), median(obs_val)),
    max_obs = c(range(obs_cal)[2], range(obs_val)[2]),
    mean_obs = c(mean(obs_cal), mean(obs_val)),
    CV = c(sd(obs_cal) / mean(obs_cal) * 100,
      sd(obs_val) / mean(obs_val) * 100)
  )
  # Join stats with range data frame (df_range)
  stats <- plyr::join(stats, df_range, type = "inner")
  annotation <- plyr::mutate(stats,
    rmse = as.character(as.expression(paste0("RMSE == ",
      round(rmsd, 2)))),
    r2 = as.character(as.expression(paste0("italic(R)^2 == ",
      round(r2, 2)))),
    rpd = as.character(as.expression(paste("RPD == ",
      round(rpd, 2)))),
    n = as.character(as.expression(paste0("italic(n) == ", n))),
    ncomp = as.character(as.expression(paste0("ncomp = ",
      ncomp)))
  )
  # Plot predicted vs. observed values and model indexes
  # update label, xlim, and ylim ***
  # Add label number of samples to facet_grid using a
  # labeling function
  # ! Update labeller API:
  # https://github.com/hadley/ggplot2/commit/ef33dc7
  # http://sahirbhatnagar.com/facet_wrap_labels
  # Prepare lookup character vector
  make_label <- function(x, validation = TRUE) {
    dataType <- n <- NULL
    if (validation == TRUE) {
      c(`Calibration` = paste0("Calibration", "~(",
        x[x$dataType == "Calibration", ]$n, ")"
      ),
        `Validation` = paste0("Validation", "~(",
          x[x$dataType == "Validation", ]$n, ")"
        )
      )
    } else{
      c(`Calibration` = paste0("Calibration", "~(",
        x[x$dataType == "Calibration", ]$n, ")"
      ),
        `Cross-Validation` = paste0("Cross-Validation", "~(",
          x[x$dataType == "Cross-Validation", ]$n, ")"
        )
      )
    }
  }
  if (validation == TRUE) {
    label_validation <- make_label(x = annotation,
      validation = TRUE
    )
  } else {
    label_validation <- make_label(x = annotation,
      validation = FALSE
    )
  }
  # Rename labels on the fly with a lookup character vector
  to_string <- ggplot2::as_labeller(
    x = label_validation, ggplot2::label_parsed
  )
  # -------------------------------------------------------------
  # http://docs.ggplot2.org/0.9.3.1/label_parsed.html
  # some other info: https://coderclub.b.uib.no/tag/plotmath/
  # !!! now depreciated in ggplot2 >= 2.0.0
  # dataType_labeller <- function(variable, value){
  #   new <- paste0(dataType_names[value], "~(", annotation$n, ")")
  #   plyr::llply(as.character(new), function(x) parse(text = x))
  # }
  p_pred_obs <- ggplot2::ggplot(data = predobs_val) +
    ggplot2::geom_point(ggplot2::aes(x = obs, y = pred),
      shape = 1, size = 4) +
    ggplot2::geom_text(data = annotation,
      ggplot2::aes(x = -Inf, y = Inf, label = r2), size = 7,
      hjust = -0.1, vjust = 1.5, parse = TRUE) +
    ggplot2::geom_text(data = annotation,
      ggplot2::aes(x = -Inf, y = Inf, label = rmse), size = 7,
      hjust = -0.075, vjust = 4.25, parse = TRUE) +
    ggplot2::geom_text(data = annotation,
      ggplot2::aes(x = -Inf, y = Inf, label = rpd), size = 7,
      hjust = -0.1, vjust = 6.5, parse = TRUE) +
    ggplot2::facet_grid(~ dataType,
      labeller =ggplot2::as_labeller(to_string)) +
    # ggplot2::facet_grid(~ dataType,
    #   labeller = dataType_labeller) +
    ggplot2::theme_bw() +
    ggplot2::geom_abline(col = "red") +
    ggplot2::labs(x = "Observed", y = "Predicted") +
    ggplot2::xlim(c(min(predobs_val$obs) -
        0.05 * diff(range(predobs_val$obs)),
      max(predobs_val$obs) +
        0.05 * diff(range(predobs_val$obs)))) +
    ggplot2::ylim(c(min(predobs_val$obs) -
        0.05 * diff(range(predobs_val$obs)),
      max(predobs_val$obs) +
        0.05 * diff(range(predobs_val$obs)))) # +
    # theme.user
  ## ggplot graph for model comparison
  ## (arranged later in panels)
  x_label <- paste0("Observed ",
    as.character(variable_name))
  y_label <- paste0("Predicted ",
    as.character(variable_name))
  p_model <- ggplot2::ggplot(data = predobs_val) +
    ggplot2::geom_point(ggplot2::aes(x = obs, y = pred),
      shape = 1, size = 2, alpha = 1/2) +
    ggplot2::geom_text(data = annotation,
      ggplot2::aes(x = Inf, y = -Inf, label = r2), size = 3,
      hjust = 1.15, vjust = -3, parse = TRUE) +
    ggplot2::geom_text(data = annotation,
      ggplot2::aes(x = Inf, y = -Inf, label = rmse), size = 3,
      hjust = 1.12, vjust = -2.5, parse = TRUE) +
    ggplot2::geom_text(data = annotation,
      ggplot2::aes(x = Inf, y = -Inf, label = rpd), size = 3,
      hjust = 1.15, vjust = -1.25, parse = TRUE) +
    ggplot2::facet_grid(~ dataType,
      labeller = ggplot2::as_labeller(to_string)) +
    # ggplot2::facet_grid(~ dataType,
    #   labeller = dataType_labeller) +
    ggplot2::theme_bw() +
    ggplot2::geom_abline(col = "red") +
    ggplot2::labs(x = x_label, y = y_label) +
    ggplot2::xlim(c(min(predobs_val$obs) -
        0.05 * diff(range(predobs_val$obs)),
      max(predobs_val$obs) +
        0.05 * diff(range(predobs_val$obs)))) +
    ggplot2::ylim(c(min(predobs_val$obs) -
        0.05 * diff(range(predobs_val$obs)),
      max(predobs_val$obs) +
        0.05 * diff(range(predobs_val$obs)))) +
    ggplot2::coord_fixed()
  if(print == TRUE) {
    print(p_model)
  }
  list(stats = stats, p_model = p_model)
 }
 ## PLS regression modeling in one function ======================
 #' @title Calibration sampling, model tuning, and PLS regression
 #' @description Perform calibration sampling and use selected
 #' calibration set for model tuning
 #' @param spec_chem data.frame that contains IR spectroscopy
 #' and chemical data
 #' @param k Number of validation samples
 #' @param pc Number of Principal Components used for Calibration
 #' sampling (Kennard-Stones algorithm)
 #' @param ratio_val Ratio of number of validation and all samples.
 #' @param print Logical expression weather graphs shall be printed
 #' @param validation Logical expression weather independent
 #' validation is performed
 #' @param variable Response variable (without quotes)
 #' @param env Environment where function is evaluated
 #' @export
 # Note: check non standard evaluation, argument passing...
 pls_ken_stone <- function(spec_chem, ratio_val, pc,
  print = TRUE, validation = TRUE, variable,
  env = parent.frame()) {
  calibration <- 0
  # Calibration sampling
  list_sampled <- ken_stone(
    spec_chem, ratio_val = ratio_val, pc = 2, validation = TRUE
  )
  tr_control <- tune_model_q(list_sampled,
    substitute(variable), env
  )
  pls <- fit_pls_q(x = list_sampled, validation = TRUE,
    variable = substitute(variable), tr_control = tr_control, env
  )
  stats <- evaluate_pls_q(x = list_sampled, pls_model = pls,
    variable = substitute(variable), env = parent.frame()
  )
  list(data = list_sampled, p_pc = list_sampled$p_pc, 
    pls_model = pls, stats = stats$stats, p_model = stats$p_model)
 }
--- a/R/predict-spectra.R
+++ b/R/predict-spectra.R
@ -0,0 +1,42 @@
 #' @title Predict soil properties of new spectra based on calibration models
 #' @description
 #' Function that uses pre-processed spectra, additional metadata of new
 #' samples, and caret model output for the different soil property models
 #' to create predicted values.
 #' @param model_list List that contains caret output objects
 #' of the different calibration models to predict (one model per soil property)
 #' @param spectra_list List that contains spectra and additional data
 #' after pre-processing (\code{do_pretreatment()}including metadata
 #' (\code{sample_ID})
 #' @usage predict_from_spectra(model_list, spectra_list)
 #' @export
 predict_from_spectra <- function(model_list, spectra_list) {
  # Use extractPrediction function (caret) and supply model_list that contains
  # caret calibration outputs; use pre-processed spectra dataset (list
  # resulting from do_pretreatment())
  predictions_caret <- caret::extractPrediction(
    models_prediction,
    unkX = soilspec_test$MIR0
  )
  # Convert data.frame into long form; one sample should be represented by
  # one single row and the predicted values of soil properties should be
  # in the different columns
  # Use the tidyr::spread() function (from tidyr packge)
  # to gather columns into rows
  # Add sample_ID column to uniquely identify observations
  # Number of caret model objects used to predict
  n <- length(unique(predictions_caret$object))
  # Add sample_ID from metadata of spectra to predicted values
  sample_ID <- spectra_list$data_meta$ID
  # Repeat meta_data for each of the additional model rows and add
  # ID column to long form data frame
  id <- rep(sample_ID, n)
  predictions_metadata <- cbind(predictions_caret, sample_ID = id)
  # Get data into wide form
  predictions_wide <- tidyr::spread(
    data = predictions_metadata, key = "object", value = "pred"
  )
 }
--- a/R/pretreat-spectra.R
+++ b/R/pretreat-spectra.R
@ -0,0 +1,51 @@
 #' @title Preprocess spectra
 #' @description Use commonly used preprocessing algorithms on
 #' the spectra.
 #' @param list_spectra List that contains averaged spectra
 #' in the list element called \code{MIR_mean}
 #' @param select Character string that specifies the predefined
 #' pretreatment options. Possible arguments are:
 #' \code{select = "MIR0"} for Savitzky Golay smoothing filter
 #' without derivative, \code{select = "MIR1"} for Savitky Golay
 #' with first derivative, \code{select = "MIR2"} for Savitzky
 #' Golay with second derivative, \code{select = "MIR0_snv"}
 #' for Standard Normal Variate after Savitzky Golay without
 #' derivative, and \code{select = "MIRb"} for
 #' baseline correction.
 #' @usage do_pretreatment(list_spectra, select)
 #' @return list_spectra: List that contains preprocessed
 #' spectra in element \code{MIR0}
 #' @import hyperSpec
 #' @export
 do_pretreatment <- function(list_spectra, select) {
  MIR_mean <- NULL
  MIR_raw <- list_spectra$MIR_mean
  # Filter the data using the Savitzky and Golay smoothing filter
  # with a window size of 5 spectral variables and
  # a polynomial order of 3 (no differentiation)
  # p = polynomial order; plot variance vs polynomial order?
  # w = window size (must be odd)
  # m = m-th derivative of the polynomial coefficients
  # (0 = smoothing)
  MIR0 <- prospectr::savitzkyGolay(X = list_spectra$MIR_mean,
    m = 0, p = 3, w = 9) # smoothing and averaging
  MIR1 <- prospectr::savitzkyGolay(X = list_spectra$MIR_mean,
    m = 1, p = 3, w = 5) # first derivative ***
  MIR2 <- prospectr::savitzkyGolay(X = list_spectra$MIR_mean,
    m = 2, p = 3, w = 5) # second derivative ***
  # Calculate standard normal variate (SNV) after smoothing
  MIR0_snv <- prospectr::standardNormalVariate(MIR0)
  MIR1_snv <- prospectr::standardNormalVariate(MIR1) # added 2016-08-05
  # Baseline correction
  # Compute baseline but first, create hyperSpec obj
  spc <- new("hyperSpec", spc = as.matrix(list_spectra$MIR_mean),
    wavelength = as.numeric(colnames(list_spectra$MIR_mean)))
  below <- hyperSpec::spc.fit.poly.below(
    fit.to = spc[, , 4000 ~ 900],
    apply.to = spc, npts.min = 20, poly.order = 2)
  spc_corr <- spc - below
  MIRb <- spc_corr[[]]
  pre <- select
  list_spectra$MIR0 <- get(pre)
  return(list_spectra)
 }
--- a/R/remove-outl-spectra.R
+++ b/R/remove-outl-spectra.R
@ -0,0 +1,162 @@
 #' @title Remove outlier spectra
 #' @description Remove outlier spectra based on the
 #' \code{pcout()} function of the \code{mvoutlier} package.
 #' @usage remove_outliers(list_spectra, remove = TRUE)
 #' @param list_spectra List that contains averaged
 #' spectral information
 #' in list element \code{MIR_mean} (data.frame) and metadata in
 #' \code{data_meta} (data.frame).
 #' @param remove logical expression (\code{TRUE} or \code{FALSE})
 #' that specifies weather spectra shall be removed.
 #' If \code{rm = FALSE}, there will be no outlier removal
 #' @return Returns list \code{spectra_out} that contains:
 #' \itemize{
 #'  \item \code{MIR_mean}: Outlier removed MIR spectra as
 #'  data.frame object. If \code{remove = FALSE},
 #'  the function will
 #'  return almost identical list identical to \code{list_spectra},
 #'  except that the first \code{indices} column of the spectral
 #'  data frame \code{MIR_mean} is removed
 #'  (This is done for both options
 #'  \code{remove = TRUE} and \code{remove = FALSE}).
 #'  \item \code{data_meta}: metadata data.frame, identical
 #'  as in the \code{list_spectra} input list.
 #'  \item \code{plot_out}: (optional) ggplot2 graph
 #'  that shows all spectra (absorbance on x-axis and wavenumber
 #'  on y-axis) with outlier marked, if
 #'  \code{remove = TRUE}.
 #' }
 #' @details This is an optional function if one wants to remove
 #' outliers.
 #' @export
 remove_outliers <- function(list_spectra, remove = TRUE) {
  # Outlier detection
  # Use the mvoutlier package and pcout function to identify
  # multivariate outliers
  wfinal01 <- ID <-  NULL
  if (remove == TRUE) {
    # Remove the 'indices' column
    list_spectra$MIR_mean <- list_spectra$MIR_mean[, -1]
    out <- mvoutlier::pcout(list_spectra$MIR_mean, makeplot = T,
      outbound = 0.05) # parameters should be adapted
    # Plot outlying spectra
    plot_out <- plotMIR(
      list_spectra$MIR_mean[
        order(out$wfinal01, decreasing = T), ],
      col = as.factor(out$wfinal01[order(out$wfinal01,
        decreasing = T)])) +
      ggplot2::scale_colour_brewer("outlier", palette = "Set1")
    out_id <- as.character(
      list_spectra$data_meta$ID[!as.logical(out$wfinal01)]
    )
    # Remove  outliers
    MIR_mean <- list_spectra$MIR_mean[
      ! list_spectra$data_meta$ID %in% out_id, ]
    # rep ID and country name
    data_meta <- list_spectra$data_meta[
      ! list_spectra$data_meta$ID %in% out_id, ]
    spectra_out <- list(MIR_mean = MIR_mean,
      data_meta = data_meta,
      plot_out = plot_out)
  } else {
    # Remove the 'indices' column
    list_spectra$MIR_mean <- list_spectra$MIR_mean[, -1]
    spectra_out <- list(MIR_mean = list_spectra$MIR_mean,
      data_meta = list_spectra$data_meta)
  }
  spectra_out
 }
 ## plotMIR function of Antoine Stevens; don't export this
 ## function to the NAMESPACE
 plotMIR <- function(spc, group = NULL, col = NULL,
  linetype = NULL, wr = NULL, brk = NULL,
  ylab = "Absorbance", xlab = "Wavenumber /cm-1",
  by = NULL, by.wrap = T, ...){
  # Function to plot spectra, based on the ggplot2 package
  # spc = spectral matrix, with colnames = wavelengths
  # group = grouping variable, usually the id's of the sample
  # wr = wavelength range to plot
  # brk = breaks of the x-axis
  # by = factor variable for which the mean and sd of
  # each level will be computed and plotted (optional)
  # Requires packages ggplot2; data.table; reshape2
  # Workaround to pass R CMD check:
  # http://stackoverflow.com/questions/9439256/how-can-i-handle-r-cmd-check-no-visible-binding-for-global-variable-notes-when
  # Setting the variables to NULL first
  variable <- value <- colour  <- NULL
  spc <- as.data.frame(spc)
  if (!is.null(wr))
    spc <- spc[, as.numeric(colnames(spc)) >= min(wr) &
        as.numeric(colnames(spc)) <= max(wr)]
  if (is.null(brk))
    brk  <- pretty(as.numeric(colnames(spc)), n = 10)
  if (!is.null(by)) {
    spc$by <- by
    spc <- data.table::data.table(spc, check.names = F)
    mean.spc <- reshape2::melt(
      spc[, lapply(data.table::.SD, mean), by = by],
      id.vars = "by"
    )
    sd.spc <- reshape2::melt(
      spc[, lapply(data.table::.SD, sd), by = by],
      id.vars = "by"
    )
    mean.spc$min <- mean.spc$value - sd.spc$value
    mean.spc$max <- mean.spc$value + sd.spc$value
    mean.spc$variable <-  as.numeric(
      as.character(mean.spc$variable)
    )
    if (by.wrap) {
      p <- ggplot2::ggplot(data = mean.spc) +
        ggplot2::geom_ribbon(
          ggplot2::aes(x = variable, ymin = min, ymax = max),
          fill = "grey", col = "black", size = 0.15)  +
        ggplot2::theme_bw()
      p <-  p +  ggplot2::geom_line(
        ggplot2::aes(x = variable, y = value),
        size = 0.25) +
        ggplot2::facet_wrap(~ by) +
        ggplot2::labs(x = xlab, y = ylab) +
        ggplot2::scale_x_reverse(breaks = brk)
    } else {
      p <- ggplot2::ggplot(data = mean.spc,
        ggplot2::aes(x = variable, y = value, group = by, col = by)) +
        ggplot2::geom_line(size = 0.25)  +
        ggplot2::labs(x = xlab, y = ylab) +
        ggplot2::scale_x_reverse(breaks = brk) +
        ggplot2::theme_bw()
    }
    return(p)
  } else {
    if (is.null(group))
      group  <- as.character(1:nrow(spc))
    spc$group <- group
    spc$colour <- col
    spc$linetype <- linetype
    id.var  <- colnames(spc)[
      grep("group|colour|linetype",colnames(spc))]
    tmp <- reshape2::melt(spc, id.var = id.var)
    tmp$variable <- as.numeric(as.character(tmp$variable))
    p <- ggplot2::ggplot(tmp,
      ggplot2::aes(variable, value, group = group)) +
      ggplot2::labs(x = xlab, y = ylab) +
      ggplot2::theme_bw() +
      ggplot2::scale_x_reverse(breaks = brk)
    if (is.null(col) & is.null(linetype))
      p <- p + ggplot2::geom_line(
        ggplot2::aes(colour = group))
    else if (!is.null(col) & is.null(linetype))
      p <- p + ggplot2::geom_line(
        ggplot2::aes(colour = colour))
    else if (is.null(col) & !is.null(linetype))
      p <- p + ggplot2::geom_line(
        ggplot2::aes(colour = group,
        linetype = linetype))
    else  p <- p + ggplot2::geom_line(
      ggplot2::aes(colour = colour,
      linetype = linetype))
    return(p)
  }
 }
--- a/R/resample-spectra.R
+++ b/R/resample-spectra.R
@ -0,0 +1,19 @@
 #' @title Resample spectra stored to new
 #' @description Calculates model statistics for predicted (y)
 #' vs. observed (y) values
 #' @param list_spectra List of spectra and metadata
 #' @param wn_lower Numerical value for lowest  wavenumber in sampling interval
 #' @param wn_upper Numerical value for highest wavenumber in sampling interval
 #' @export
 resample_spectra <- function(
  list_spectra, wn_lower = 510, wn_upper = 3988, wn_interval = 2)
  {
  # Create sequence of new wavenumbers
  wn_seq <- rev(seq(from = wn_lower, wn_upper, by = wn_interval))
  list_spectra$MIR0 <- prospectr::resample(
    X = list_spectra$MIR_mean, # spectral matrix to resample
    wav = as.numeric(colnames(list_spectra$MIR_mean)), # old wavenumbers
    new.wav = wn_seq # new wavenumbers
    )
  return(list_spectra)
 }
--- a/R/spectra-utils.R
+++ b/R/spectra-utils.R
@ -0,0 +1,25 @@
 #' @title Calculate model statistics
 #' @description Calculates model statistics for predicted (y)
 #' vs. observed (y) values
 #' @param df data.frame with predicted and observed data
 #' @param x column with observed values
 #' @param y column with predicted values
 #' @export
 summary_df <- function(df, x, y){
  x <- df[, x]
  y <- df[, y]
  data.frame(rmse = sqrt(sum((x - y)^2, na.rm = T) / (length(x)-1)),
    rmsd = mean((x - y)^2)^.5,
    sdev = sd(x, na.rm = T),
    rpd =  sd(x,na.rm = T) /
      sqrt(sum((x - y)^2, na.rm = T) / (length(x) - 1)),
    rpiq = (quantile(x, .75, na.rm = T) - quantile(x, .25, na.rm = T)) /
      sqrt(sum((x - y)^2, na.rm = T) / (length(x) - 1)),
    r2  = cor(x, y, use = "pairwise.complete.obs")^2,
    bias  = mean(x, na.rm = T) - mean(y, na.rm = T),
    SB = (mean(x, na.rm = T) - mean(y, na.rm = T))^2,
    NU = var(x, na.rm = T) * (1 - lm(y ~ x)$coefficients[2])^2,
    LC = var(y, na.rm = T) *
      (1 - cor(x, y, use = "pairwise.complete.obs")^2),
    n = length(x))
 }
--- a/man/average_spectra.Rd
+++ b/man/average_spectra.Rd
@ -0,0 +1,41 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/average-spectra.R
 \name{average_spectra}
 \alias{average_spectra}
 \title{Calculate mean of spectra}
 \usage{
 average_spectra(in_spectra)
 }
 \arguments{
 \item{in_spectra}{List that contains spectral data in the
 element \code{MIR} (data.frame) and sample metadata in the
 list element \code{data_rep} (data.frame).
 The data.frame \code{data_meta}
 contains the sample ID stored in the \code{ID}
 vector (originally from spectral file names),
 country abbreviation stored in \code{contry} (2 letters),
 and the vector \code{site} (2 letters) that is the country
 abbreviation.}
 }
 \value{
 \code{out_spectra}: List that contains:
 \itemize{
 \item \code{data_meta}: metadata of sample (data.frame)
 that is
 taken from the element \code{rep} of the input list argument
 \code{in_spectra}
 \item \code{MIR_mean}: average spectra from replicates of
  sample ID
 (data.frame)
 \item \code{MIR_sd}: standard deviation of spectra calculated
 from replicates of sample ID (data.frame)
 \item \code{cvar} coefficient of variance over all
 wavenumbers of spectra
 calculated from replicates of sample ID (vector)
 }
 }
 \description{
 Calculate the mean of each spectral repetitions
 (absorbance average per wavenumber)
 }
--- a/man/do_pretreatment.Rd
+++ b/man/do_pretreatment.Rd
@ -0,0 +1,31 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/pretreat-spectra.R
 \name{do_pretreatment}
 \alias{do_pretreatment}
 \title{Preprocess spectra}
 \usage{
 do_pretreatment(list_spectra, select)
 }
 \arguments{
 \item{list_spectra}{List that contains averaged spectra
 in the list element called \code{MIR_mean}}
 \item{select}{Character string that specifies the predefined
 pretreatment options. Possible arguments are:
 \code{select = "MIR0"} for Savitzky Golay smoothing filter
 without derivative, \code{select = "MIR1"} for Savitky Golay
 with first derivative, \code{select = "MIR2"} for Savitzky
 Golay with second derivative, \code{select = "MIR0_snv"}
 for Standard Normal Variate after Savitzky Golay without
 derivative, and \code{select = "MIRb"} for
 baseline correction.}
 }
 \value{
 list_spectra: List that contains preprocessed
 spectra in element \code{MIR0}
 }
 \description{
 Use commonly used preprocessing algorithms on
 the spectra.
 }
--- a/man/evaluate_pls_q.Rd
+++ b/man/evaluate_pls_q.Rd
@ -0,0 +1,38 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/pls-modeling.R
 \name{evaluate_pls_q}
 \alias{evaluate_pls_q}
 \title{Evaluate PLS performance}
 \usage{
 evaluate_pls_q(x, pls_model, variable, validation = TRUE, print = TRUE,
  env = parent.frame())
 }
 \arguments{
 \item{x}{List that contains calibration and validation data
 frame with combined spectral and chemical data}
 \item{pls_model}{List with PLS regression model output from
 the caret package}
 \item{variable}{Response variable (e.g. chemical property) to be
 modelled (needs to be non-quoted expression). \code{variable}
 needs to be a column name in the \code{validation} data.frame
 (element of \code{x})}
 \item{validation}{Logical expression if independent validation
 is performed (split data set into calibration set and
 validation set)}
 \item{print}{Print observed vs. predicted for calibration
 and validation. Default is \code{TRUE}.}
 \item{env}{Specifiy the environment in which the function is
 called. Default argument of \code{env} is
 \code{parent.frame()}}
 }
 \description{
 Calculate model performance indices based
 on observed and predicted values of validation and calibration
 set, and internal cross-validation
 }
--- a/man/fit_pls.Rd
+++ b/man/fit_pls.Rd
@ -0,0 +1,24 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/pls-modeling.R
 \name{fit_pls}
 \alias{fit_pls}
 \title{Fit a PLS regression model}
 \usage{
 fit_pls(x, validation = TRUE, variable, env = parent.frame())
 }
 \arguments{
 \item{x}{List that contains calibration
 set, validation set, and model tuning options}
 \item{validation}{Logical expression weather independent
 validation is performed}
 \item{variable}{Response variable to be modeled}
 \item{env}{Environment where function is evaluated}
 }
 \description{
 Uses the caret package to perform PLS modeling.
 Spectra are centered and scaled prior to modeling.
 }
--- a/man/fit_pls_q.Rd
+++ b/man/fit_pls_q.Rd
@ -0,0 +1,28 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/pls-modeling.R
 \name{fit_pls_q}
 \alias{fit_pls_q}
 \title{Fit a PLS regression model
 (quoted version of the function)}
 \usage{
 fit_pls_q(x, validation = TRUE, variable, tr_control, env = parent.frame())
 }
 \arguments{
 \item{x}{List that contains calibration
 set, validation set, and model tuning options}
 \item{validation}{Logical expression weather independent
 validation is performed}
 \item{variable}{Response variable to be modeled}
 \item{tr_control}{Object that defines controlling parameters
 of the desired internal validation framework}
 \item{env}{Environment where function is evaluated}
 }
 \description{
 Uses the caret package to perform PLS modeling.
 Spectra are centered and scaled prior to modeling.
 }
--- a/man/join_chem_spec.Rd
+++ b/man/join_chem_spec.Rd
@ -0,0 +1,24 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/join-chem-spectra.R
 \name{join_chem_spec}
 \alias{join_chem_spec}
 \title{Join chemical and spectral data frames}
 \usage{
 join_chem_spec(dat_chem, dat_spec, by = "sample_ID")
 }
 \arguments{
 \item{dat_chem}{data.frame that contains chemical values of
 the sample}
 \item{dat_spec}{List that contains spectral data}
 \item{by}{character of column name that defines sample_ID}
 }
 \value{
 List: xxx
 }
 \description{
 Combines spectral data (data.frame) and chemical
 data (data.frame).
 }
--- a/man/ken_stone.Rd
+++ b/man/ken_stone.Rd
@ -0,0 +1,28 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/pls-modeling.R
 \name{ken_stone}
 \alias{ken_stone}
 \title{Split}
 \usage{
 ken_stone(spec_chem, ratio_val, pc, print = TRUE,
 validation = TRUE)
 }
 \arguments{
 \item{spec_chem}{data.frame that contains chemical
 and IR spectroscopy data}
 \item{ratio_val}{Ratio of number of validation and all samples.}
 \item{pc}{Number of principal components (numeric)}
 \item{print}{logical expression weather calibration}
 \item{validation}{Logical expression weather
 calibration sampling is performed
 (\code{TRUE} or \code{FALSE}).}
 }
 \description{
 Perform calibration sampling based on
 the Kennard-Stones algorithm.
 }
--- a/man/pls_ken_stone.Rd
+++ b/man/pls_ken_stone.Rd
@ -0,0 +1,34 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/pls-modeling.R
 \name{pls_ken_stone}
 \alias{pls_ken_stone}
 \title{Calibration sampling, model tuning, and PLS regression}
 \usage{
 pls_ken_stone(spec_chem, ratio_val, pc, print = TRUE, validation = TRUE,
  variable, env = parent.frame())
 }
 \arguments{
 \item{spec_chem}{data.frame that contains IR spectroscopy
 and chemical data}
 \item{ratio_val}{Ratio of number of validation and all samples.}
 \item{pc}{Number of Principal Components used for Calibration
 sampling (Kennard-Stones algorithm)}
 \item{print}{Logical expression weather graphs shall be printed}
 \item{validation}{Logical expression weather independent
 validation is performed}
 \item{variable}{Response variable (without quotes)}
 \item{env}{Environment where function is evaluated}
 \item{k}{Number of validation samples}
 }
 \description{
 Perform calibration sampling and use selected
 calibration set for model tuning
 }
--- a/man/predict_from_spectra.Rd
+++ b/man/predict_from_spectra.Rd
@ -0,0 +1,22 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/predict-spectra.R
 \name{predict_from_spectra}
 \alias{predict_from_spectra}
 \title{Predict soil properties of new spectra based on calibration models}
 \usage{
 predict_from_spectra(model_list, spectra_list)
 }
 \arguments{
 \item{model_list}{List that contains caret output objects
 of the different calibration models to predict (one model per soil property)}
 \item{spectra_list}{List that contains spectra and additional data
 after pre-processing (\code{do_pretreatment()}including metadata
 (\code{sample_ID})}
 }
 \description{
 Function that uses pre-processed spectra, additional metadata of new
 samples, and caret model output for the different soil property models
 to create predicted values.
 }
--- a/man/readOPUS.Rd
+++ b/man/readOPUS.Rd
@ -0,0 +1,53 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/load-spectra.R
 \name{readOPUS}
 \alias{readOPUS}
 \title{Read OPUS binary and ASCII files}
 \usage{
 readOPUS(fnames, in_format, out_format)
 }
 \arguments{
 \item{fnames}{character \code{vector} of the name(s)
 (with absolute path) of the file(s) to read}
 \item{in_format}{format of the input file: \code{'binary'} or
 \code{'txt'}}
 \item{out_format}{format of the output:
 \code{'matrix'} (default) or \code{'list'} (see below)}
 }
 \value{
 if \code{out_format} = \code{'matrix'}, absorbance values
 of the input file(s) in a single \code{matrix}.
 if \code{out_format} = \code{'list'}, a \code{list} of the
 input file(s) data consisting of a \code{list} with components:
 \itemize{
 \item{\code{Name}}{ name of the file imported}
 \item{\code{datetime}}{ date and time of acquisition in
 \code{POSIXct} format (available only when
 \code{in_format} = 'binary')}
 \item{\code{metadata}}{ \code{list} with information
 on instrument configuration (available only when
 \code{in_format} = 'binary')}
 \item{\code{absorbance}}{  a numeric \code{vector}
 of absorbance values}
 \item{\code{wavenumbers}}{ numeric \code{vector}
 of the band positions}
 }
 }
 \description{
 Read single or multiple binary and ASCII files acquired with
 an Bruker Vertex FTIR Instrument
 }
 \note{
 This is essentially a re-factored and simplified version of
 the \code{read.opus} function from the
 \sQuote{soil.spec} package for reading OPUS VERTEX files
 The function should also work for other OPUS files (eg alpha),
 see \code{read.opus}.
 }
 \author{
 Antoine Stevens and Andrew Sila (soil.spec package)
 }
--- a/man/readOPUS_bin.Rd
+++ b/man/readOPUS_bin.Rd
@ -0,0 +1,16 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/load-spectra.R
 \name{readOPUS_bin}
 \alias{readOPUS_bin}
 \title{Read an OPUS binary file}
 \usage{
 readOPUS_bin(file.name)
 }
 \arguments{
 \item{file.name}{Character vector with path to files}
 }
 \description{
 Read single binary file acquired with an
 Bruker Vertex FTIR Instrument
 }
--- a/man/readOPUS_text.Rd
+++ b/man/readOPUS_text.Rd
@ -0,0 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/load-spectra.R
 \name{readOPUS_text}
 \alias{readOPUS_text}
 \title{Read an OPUS text file}
 \usage{
 readOPUS_text(file.name)
 }
 \arguments{
 \item{file.name}{Character vector with path to files}
 }
 \description{
 Read single text file acquired with
 an Bruker Vertex FTIR Instrument
 (as exported from OPUS software)
 }
--- a/man/read_spectra.Rd
+++ b/man/read_spectra.Rd
@ -0,0 +1,48 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/load-spectra-yamsys.R
 \name{read_spectra}
 \alias{read_spectra}
 \title{Read an OPUS text file and extract metadata}
 \usage{
 read_spectra(path)
 }
 \arguments{
 \item{path}{character of the directory
 where the spectral text files are stored}
 }
 \value{
 List that contains the following elements:
 \itemize{
 \item \code{MIR}: data.frame that contains all the spectra.
 The columns of \code{MIR} contain absorbance values at
 different wavenumber in the MIR range. The wavenumbers
 rounded to 0.1 are given as column names. The original file
 names are stored as row names. One line in the data frame
 \code{MIR} contains one replicate scan of a sample.
 \item \code{data_rep}: data.frame that constists of sample
 metadata that was extracted from the file name of
 individual spectral files. The first vector \code{ID}
 contains the spectral file name without the repetition number
 supplied as \code{.<number>} in the file name.
 Letters 1 to 2 of the spectral
 file name are used for the country abbreviation, stored
 as in the \code{} vector \code{data_rep} . Letters
 4 to 5 of the file name are used for the landscape (site)
 abbreviation.
 }
 }
 \description{
 Read single text file acquired with
 an Bruker Vertex FTIR Instrument
 (as exported from OPUS software) and extract sample metadata
 provided in the filename
 }
 \note{
 : This function is derived from  a re-factored and
 simplified version of the \code{read.opus} function from the
 \sQuote{soil.spec} package for reading OPUS VERTEX files
 The function should also work for other OPUS files (eg alpha),
 see \code{read.opus}. The function readOPUS() was
 written by Antoine Stevens.
 }
--- a/man/remove_outliers.Rd
+++ b/man/remove_outliers.Rd
@ -0,0 +1,46 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/remove-outl-spectra.R
 \name{remove_outliers}
 \alias{remove_outliers}
 \title{Remove outlier spectra}
 \usage{
 remove_outliers(list_spectra, remove = TRUE)
 }
 \arguments{
 \item{list_spectra}{List that contains averaged
 spectral information
 in list element \code{MIR_mean} (data.frame) and metadata in
 \code{data_meta} (data.frame).}
 \item{remove}{logical expression (\code{TRUE} or \code{FALSE})
 that specifies weather spectra shall be removed.
 If \code{rm = FALSE}, there will be no outlier removal}
 }
 \value{
 Returns list \code{spectra_out} that contains:
 \itemize{
 \item \code{MIR_mean}: Outlier removed MIR spectra as
 data.frame object. If \code{remove = FALSE},
 the function will
 return almost identical list identical to \code{list_spectra},
 except that the first \code{indices} column of the spectral
 data frame \code{MIR_mean} is removed
 (This is done for both options
 \code{remove = TRUE} and \code{remove = FALSE}).
 \item \code{data_meta}: metadata data.frame, identical
 as in the \code{list_spectra} input list.
 \item \code{plot_out}: (optional) ggplot2 graph
 that shows all spectra (absorbance on x-axis and wavenumber
 on y-axis) with outlier marked, if
 \code{remove = TRUE}.
 }
 }
 \description{
 Remove outlier spectra based on the
 \code{pcout()} function of the \code{mvoutlier} package.
 }
 \details{
 This is an optional function if one wants to remove
 outliers.
 }
--- a/man/resample_spectra.Rd
+++ b/man/resample_spectra.Rd
@ -0,0 +1,21 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/resample-spectra.R
 \name{resample_spectra}
 \alias{resample_spectra}
 \title{Resample spectra stored to new}
 \usage{
 resample_spectra(list_spectra, wn_lower = 510, wn_upper = 3988,
  wn_interval = 2)
 }
 \arguments{
 \item{list_spectra}{List of spectra and metadata}
 \item{wn_lower}{Numerical value for lowest  wavenumber in sampling interval}
 \item{wn_upper}{Numerical value for highest wavenumber in sampling interval}
 }
 \description{
 Calculates model statistics for predicted (y)
 vs. observed (y) values
 }
--- a/man/summary_df.Rd
+++ b/man/summary_df.Rd
@ -0,0 +1,20 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/spectra-utils.R
 \name{summary_df}
 \alias{summary_df}
 \title{Calculate model statistics}
 \usage{
 summary_df(df, x, y)
 }
 \arguments{
 \item{df}{data.frame with predicted and observed data}
 \item{x}{column with observed values}
 \item{y}{column with predicted values}
 }
 \description{
 Calculates model statistics for predicted (y)
 vs. observed (y) values
 }
--- a/man/tune_model.Rd
+++ b/man/tune_model.Rd
@ -0,0 +1,24 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/pls-modeling.R
 \name{tune_model}
 \alias{tune_model}
 \title{Perform model tuning}
 \usage{
 tune_model(x, variable, env = parent.frame(), validation = TRUE)
 }
 \arguments{
 \item{x}{list from calibration sampling}
 \item{variable}{response variable for PLS regression, supplied
 as character expression}
 \item{env}{Environment where function is evaluated}
 \item{validation}{Logical expression weather an independent
 validation is performed.}
 }
 \description{
 Uses function from caret to to model tuning
 for PLS regression.
 }
--- a/man/tune_model_q.Rd
+++ b/man/tune_model_q.Rd
@ -0,0 +1,24 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/pls-modeling.R
 \name{tune_model_q}
 \alias{tune_model_q}
 \title{Perform model tuning}
 \usage{
 tune_model_q(x, variable, env = parent.frame(), validation = TRUE)
 }
 \arguments{
 \item{x}{list from calibration sampling}
 \item{variable}{response variable for PLS regression, supplied
 as character expression}
 \item{env}{Environment where function is evaluated}
 \item{validation}{Logical expression weather an independent
 validation is performed.}
 }
 \description{
 Uses function from caret to to model tuning
 for PLS regression.
 }
--- a/simplerspec.Rproj
+++ b/simplerspec.Rproj
@ -0,0 +1,21 @@
 Version: 1.0
 RestoreWorkspace: Default
 SaveWorkspace: Default
 AlwaysSaveHistory: Default
 EnableCodeIndexing: Yes
 UseSpacesForTab: Yes
 NumSpacesForTab: 2
 Encoding: UTF-8
 RnwWeave: Sweave
 LaTeX: pdfLaTeX
 AutoAppendNewline: Yes
 StripTrailingWhitespace: Yes
 BuildType: Package
 PackageUseDevtools: Yes
 PackageInstallArgs: --no-multiarch --with-keep.source
 PackageRoxygenize: rd,collate,namespace,vignette
--- a/tests/.Rapp.history
+++ b/tests/.Rapp.history
--- a/tests/testdata/soilchem/soilchem_YAMSYS.csv
+++ b/tests/testdata/soilchem/soilchem_YAMSYS.csv
@ -0,0 +1,95 @@
 "","sample_ID","country","site","material","S","C","N","ex_Ca","ex_Mg","ex_K","ex_Al","ex_Na","ex_Fe","ex_Mn","pH_BaCl2","CEC_eff","BS_eff","pH","P_meas","Fe_tot","Si_tot","Al_tot","P_tot","S_tot","K_tot","Ca_tot","Mn_tot","Zn_tot","Cu_tot","Zn_DTPA","Cu_DTPA","Fe_DTPA","Mn_DTPA","site_comb"
 "1","BF_lo_01_soil_cal","BF","lo","soil",71.3333333333333,6.92366666666667,0.437666666666667,488,62.5,113,0,12.2,1.79,5.57,3.67,3.29195331572746,99.9902582362911,6.69066666666667,4.73401678211257,9.769,291.1,38.16,529,194.6,4.389,1.162,149.5,14.7,2.8,2.49,0.1202,8.69,5.58,"lo"
 "2","BF_lo_02_soil_cal","BF","lo","soil",87,5.734,0.447,656,47.5,140,0,9.36,1.43,2.67,3.63,4.0636220946398,99.9913467760016,7.429,4.26093310483561,8.926,257.6,53.01,432.8,262,20.48,1.599,148.7,18.7,2.5,1.92,0.1302,4.22,2.66,"lo"
 "3","BF_lo_03_soil_cal","BF","lo","soil",72,3.413,0.233,270,21.9,61.6,10.4,11.3,1.89,8.18,3.5,1.85040263524987,93.7248491137882,5.244,1.51171556966768,9.964,262.9,32.19,399,173.1,4.256,0.4823,169.4,14.8,2.9,2.16,0.094,6.82,5.24,"lo"
 "4","BF_lo_04_soil_cal","BF","lo","soil",73,5.372,0.423,532.333333333333,49.5333333333333,110.333333333333,2.21870428052087e-314,9.04666666666667,1.50666666666667,5.33333333333333,3.75,3.38589846285646,99.9921219754688,6.903,4.22860458213604,7.861,286.7,41.17,516.2,243.6,5.368,1.369,212.6,14,3.6,1.452,0.1128,4.92,4.56,"lo"
 "5","BF_lo_05_soil_cal","BF","lo","soil",70,9.032,0.624,604,92.6,115,0,11.1,1.67,8.98,3.56,4.11893328043747,99.9899698713872,6.179,2.87533495721635,8.53,283.2,51.45,454.6,253.5,12.09,1.152,234.7,16.7,2.6,1.498,0.1812,14.42,7.36,"lo"
 "6","BF_lo_06_soil_cal","BF","lo","soil",57,3.577,0.276,426,41.8,90.7,0,9.81,1.75,5.55,3.64,2.74481226332668,99.9874807413058,6.789,6.17013760061038,8.176,284.5,51.44,477.6,185.3,24.98,0.923,264.9,19.3,1.9,1.886,0.1228,3.66,4.6,"lo"
 "7","BF_lo_07_soil_cal","BF","lo","soil",56,4.659,0.355,321,46,72.9,4.76,10.2,1.86,10.1,3.45,2.2646812600132,97.6393889986691,5.537,5.89928208153678,6.664,278.9,45.77,472.9,197.3,12.6,0.7728,211.7,17.7,2.8,1.832,0.1208,9.08,7.02,"lo"
 "8","BF_lo_08_soil_cal","BF","lo","soil",53,4.259,0.315,250,34.1,75.5,5.85,12.6,1.78,10,3.47,1.84163533759903,96.4403131968809,5.427,1.85277627041077,8.582,266.3,45.64,419.1,190.6,15.11,0.7131,247.4,19.7,2.9,1.552,0.1206,9.36,7.16,"lo"
 "9","BF_lo_09_soil_cal","BF","lo","soil",57,4.305,0.336,375,47,88.5,0,10.7,1.66,4.34,3.56,2.53141196129241,99.9836796889713,6.663,9.16956204724951,9.257,268.2,36.71,713,190.7,9.979,0.974,213.7,16.4,3.4,1.488,0.1892,5.72,2.9,"lo"
 "10","BF_lo_10_soil_cal","BF","lo","soil",42,2.932,0.262,225,31.6,121,6.45,13.2,2.32,8.73,3.15,1.8225160449192,96.0065256315125,5.665,2.9388362959186,7.233,266,42.33,421.5,188.3,22.05,0.5456,195.4,13.8,1,1.24,0.1402,5.82,5.58,"lo"
 "11","BF_lo_11_soil_cal","BF","lo","soil",98,4.429,0.402,491,64.4,137,0,12.2,1.86,8.23,3.57,3.38402601566496,99.9880695296455,5.982,6.24444616416491,7.586,292.9,33.88,451.8,222,5.653,1.142,250.5,14.9,2.5,1.36,0.151,4.5,5.66,"lo"
 "12","BF_lo_12_soil_cal","BF","lo","soil",91,4.807,0.361,448,39.3,58.4,0,9.82,1.78,5.03,3.74,2.75138618277789,99.9900793596152,6.472,2.62713100996961,8.554,261.7,35.44,379.8,221.1,4.889,1.083,145.4,14.8,4.1,1.598,0.1368,14.22,3.6,"lo"
 "13","BF_lo_13_soil_cal","BF","lo","soil",81,4.391,0.342,306,34.8,67.1,0,14.4,2.02,7.1,3.15,2.04870101666342,99.9481662444672,5.854,1.98271482526627,10.22,281.3,44.2,407.4,240.3,6.701,0.8856,204.2,14.7,3.4,1.418,0.1138,9.92,5.02,"lo"
 "14","BF_lo_14_soil_cal","BF","lo","soil",62,3.257,0.307,276,34.9,120,9.96,13.1,1.87,16.1,3.44,2.13969363046569,94.7986329332249,4.892,1.9261550105179,6.576,287.7,38.45,394,218.4,17.73,0.6133,218.8,12.8,2.4,1.188,0.112,8.06,11.38,"lo"
 "15","BF_lo_15_soil_cal","BF","lo","soil",60,4.287,0.389,531,43.4,104,0,11.1,1.93,5.03,3.4,3.32183827213418,99.9820231839449,6.865,2.17138101151652,7.086,267,37.54,411.6,225.3,13.62,1.183,167.7,16.2,2.6,2.04,0.0952,2.78,3.78,"lo"
 "16","BF_lo_16_soil_cal","BF","lo","soil",60,4.666,0.351,NA,44.9,85.4,0,11,1.68,8.9,3.4,NA,NA,5.952,1.47188894474904,9.006,293.4,39.58,439.7,217.6,13.06,0.7457,235.1,14.6,2.8,0.914,0.1132,7.32,6.8,"lo"
 "17","BF_lo_17_soil_cal","BF","lo","soil",43,3.137,0.22,194,27.6,79,4.87,13.2,1.86,5.9,3.41,1.50943337380506,96.3738186927883,5.934,1.05754232840649,6.579,298.4,36.51,358.2,192.3,4.104,0.4555,164.2,13.3,2,1.288,0.0938,5.86,3.36,"lo"
 "18","BF_lo_18_soil_cal","BF","lo","soil",50,3.52,0.275,448,29.1,73.6,5.05,12.4,1.77,9.46,3.47,2.77393979197903,97.9573827771613,5.703,0.809488832871622,8.121,295.2,36.49,406.5,204,8.608,0.501,203.5,12.2,2,1.08,0.098,10.4,7.5,"lo"
 "19","BF_lo_19_soil_cal","BF","lo","soil",60,3.419,0.271,253,26.2,114,10.8,10.1,1.9,9.51,3.17,1.93474179036083,93.7406076693148,5.228,2.78721264862261,8.521,274.6,47.76,400,222.2,22.11,0.5483,153.4,13.1,0.5,1.054,0.0956,8.5,6.44,"lo"
 "20","BF_lo_20_soil_cal","BF","lo","soil",70,6.52633333333333,0.484,593,62.4,110,0,12.6,2.19,7.25,3.41,3.80943841576141,99.9846810040273,6.464,2.39837203400038,10.16,275.8,47.35,453.5,280.8,16.73,1.409,194.5,16.8,2.8,1.086,0.137,8.58666666666667,6.26666666666667,"lo"
 "21","BF_mo_01_soil_cal","BF","mo","soil",107.333333333333,14.5073333333333,1.00533333333333,768,139,104,11.8,17.5,2.28,14,3.59,5.45003632048912,97.5854498195003,5.63933333333333,4.39620829533527,10.41,353.9,32.83,421.4,271.9,2.886,0.991,221.4,18.3,9.9,1.894,0.97,173.733333333333,10.1933333333333,"mo"
 "22","BF_mo_02_soil_cal","BF","mo","soil",85,5.639,0.408,338,51.9,74.2,5.77,13.3,2.31,8.73,3.38,2.4261982406569,97.3298167059774,5.935,2.46200419893588,14.75,261.4,42.06,470.2,289,1.948,0.8159,216.8,15.4,9.7,1.576,0.404,30.6,5.68,"mo"
 "23","BF_mo_03_soil_cal","BF","mo","soil",62,5.283,0.388,418,47.6,71.2,0,10,1.84,6.43,3.4,2.70382221969937,99.9779142004426,6.681,2.23524037414389,4.506,340.9,15.79,310.6,192.4,0.9273,0.6999,111.7,11,1.9,1.386,0.1496,6.62,4.68,"mo"
 "24","BF_mo_04_soil_cal","BF","mo","soil",126,16.446,1.136,911.333333333333,104.666666666667,120.666666666667,15.4666666666667,34.3666666666667,2.22666666666667,24.9666666666667,3.68666666666667,6.03947462375707,97.1473037806956,5.074,5.86609322743792,29.62,262.6,69.86,610,460.3,2.504,1.398,362.5,35.9,29.2,1.448,1.446,137,6.7,"mo"
 "25","BF_mo_05_soil_cal","BF","mo","soil",55,4.577,0.318,343,40.2,55.5,0,11.2,1.83,8.49,3.18,2.23411725959241,99.9556406375826,6.295,1.76818207237125,7.527,293.2,30.57,375.5,211.5,1.397,0.7349,181.6,13.6,3.4,1.606,0.1968,9.78,5.78,"mo"
 "26","BF_mo_06_soil_cal","BF","mo","soil",54,3.376,0.246,199,28.1,58.7,0,13.5,1.77,6.97,3.35,1.4338184337989,99.9532698581332,6.339,1.20043536263818,5.595,292.7,22.46,318.3,196.2,1.21,0.5012,146.8,12,2.5,2,0.212,7.72,4.58,"mo"
 "27","BF_mo_07_soil_cal","BF","mo","soil",95,11.443,0.819,805,159,160,0,13.8,1.74,10.9,3.6,5.79517026285867,99.9934983279587,6.561,4.31575851299964,12.91,319.3,43.23,425.7,331.5,2.172,1.225,226.2,18.7,9.6,2.08,0.5,29.2,10.04,"mo"
 "28","BF_mo_08_soil_cal","BF","mo","soil",65,8.379,0.627,326,58.1,92,29.2,18.9,3.09,11.4,3.43,2.74767667860451,88.1630085085196,5.236,3.27661549116438,9.321,336.4,31.25,400.8,253.8,2.704,0.5309,137.9,15.8,6.8,1.958,0.734,216,7.48,"mo"
 "29","BF_mo_09_soil_cal","BF","mo","soil",51,5.399,0.372,569,55.6,68.6,0,11.1,1.77,5.52,3.6,3.52109726580015,99.989299274167,7.121,2.49317254770934,7.956,284.7,24.54,425.9,246.6,1.381,1.289,192.3,11.3,3.6,1.118,0.1528,3.58,4.76,"mo"
 "30","BF_mo_10_soil_cal","BF","mo","soil",41,2.404,0.181,97.7,17.5,38.9,9.22,8.91,1.36,6.72,3.42,0.872892801070666,88.18976746758,5.621,1.30611652169977,7.079,247.3,20.16,318.3,217.4,1.248,0.3259,111.3,9.5,2.8,0.686,0.095,8.74,4.3,"mo"
 "31","BF_mo_11_soil_cal","BF","mo","soil",51,5.003,0.318,504,53.1,68,0,10.9,1.65,7.77,3.24,3.17423971174348,99.9728073496525,6.396,2.36135023459846,7.252,279,21.74,571,271.2,2.548,1.631,184.6,11.3,2.6,1.924,0.168,10.5,6.56,"mo"
 "32","BF_mo_12_soil_cal","BF","mo","soil",59,5.909,0.477,458,43.4,531,0,95,1.47,5.95,3.64,4.41436385968293,99.9922156360726,6.697,12.7093524700174,8.09,280.4,36.61,415.5,297.6,8.788,3.168,247.8,15,1.5,1.436,0.1246,6.2,4.26,"mo"
 "33","BF_mo_13_soil_cal","BF","mo","soil",126,18.633,1.239,2050,126,121,0,15.1,2.07,5.89,3.58,11.642429115265,99.9966111865928,7.364,5.24153990444905,10.5,280.5,42.58,520.8,347.6,3.014,4.06,300.4,18.8,6.4,2.2,0.312,11.96,10.16,"mo"
 "34","BF_mo_14_soil_cal","BF","mo","soil",72,5.305,0.369,582,43.9,70.7,0,10.3,1.71,2.81,3.67,3.49152953242323,99.990815076589,8.389,4.16826724538431,11.36,253.2,27.15,412,226.3,1.187,2.148,115.8,12.1,3.9,1.06,0.1722,7.3,4.32,"mo"
 "35","BF_mo_15_soil_cal","BF","mo","soil",52,4.862,0.352,395,41.5,65,0,9.18,1.29,5.31,3.51,2.51929262699996,99.9816002194462,6.458,2.19119622884057,3.883,363,10.43,240.1,124.6,1.699,1.126,67.9,10.1,1.7,1.78,0.1344,7.44,3.66,"mo"
 "36","BF_mo_16_soil_cal","BF","mo","soil",79,9.191,0.723,814,80.5,113,0,13.8,1.87,7.58,3.7,5.07383681566402,99.99410132099,6.531,2.91459147065403,12.99,235.7,37.55,418.5,283.7,8.888,3.129,239.9,17.2,3.4,1.514,0.1668,9.48,6.5,"mo"
 "37","BF_mo_17_soil_cal","BF","mo","soil",55,4.734,0.342,413,34,41.8,0,11.1,1.78,7,3.68,2.49626500590144,99.9874454667719,6.545,1.44017552359073,6.309,266.1,22.04,366.5,206.8,1.858,1.054,163.6,11.4,2.3,1.224,0.1118,4.2,5.08,"mo"
 "38","BF_mo_18_soil_cal","BF","mo","soil",48,4.812,0.357,392,49.5,53.2,0,11.8,1.94,8.17,2.48,2.55587100604375,99.805664416925,6.224,1.59928607731342,6.03,275.7,21.61,375,229.4,3.903,1.468,170.1,11.5,1.7,1.468,0.1072,8.98,6.26,"mo"
 "39","BF_mo_19_soil_cal","BF","mo","soil",76,7.099,0.531,614,60.6,100,0,11.9,2.19,4.94,3.58,3.87061171166989,99.9898067740147,7.031,3.70217994019234,13.1,243.6,38.55,478.8,300.3,2.167,1.944,155.6,15,5.1,1.372,0.187,8.56,4.5,"mo"
 "40","BF_mo_20_soil_cal","BF","mo","soil",72.6666666666667,7.17366666666667,0.477333333333333,682,64.3,67.2,12.9,9.87,6.35,8.48,3.41,4.29130333937973,96.643836587511,6.60933333333333,1.79050194409809,14.86,272,39.81,453.3,265.5,4.68,2.486,235,16.5,4.6,1.69666666666667,0.2008,7.40666666666667,8.6,"mo"
 "41","CI_sb_01_soil_cal","CI","sb","soil",241.666666666667,19.5193333333333,1.972,1990,223,137,0,13.3,1.73,9.59,3.68,12.1742148221306,99.9974257525088,6.54866666666667,8.75037784048082,29.57,218.8,78.07,626,521.8,23.84,5.204,753.4,48.1,26.1,9.42,1.201,13.71,19.65,"sb"
 "42","CI_sb_02_soil_cal","CI","sb","soil",182,18.157,1.539,1600,336,380,0,14.1,1.94,7.48,2.57,11.7865785954425,99.9657466144802,7.253,10.8321908821859,20.94,216,72.66,582,460.7,32.28,3.359,1146,40.2,12.1,9.76,0.904,10.28,23.6,"sb"
 "43","CI_sb_03_soil_cal","CI","sb","soil",154,12.946,1.129,784,86.3666666666667,168.766666666667,8.04040419712669e-315,10.07,1.97666666666667,5.17333333333333,3.6,5.09889022551558,99.9926104907527,6.264,5.11724332035826,10.17,232,64.58,434,420.5,1.991,1.592,121.3,21.5,5.9,2.68,0.342,49.8,3.94,"sb"
 "44","CI_sb_04_soil_cal","CI","sb","soil",172,12.999,1.228,453,91.8,77.6,32.7,17.5,4.33,12.3,3.41,3.6547738832411,90.0353278206649,4.705,4.86549869229313,17.05,230.6,95.97,375.2,420.5,0.5404,0.7148,102.2,29.1,7.7,NA,0.72,176.6,8.88,"sb"
 "45","CI_sb_05_soil_cal","CI","sb","soil",147,12.578,1.064,520,111,234,0,14.9,2.65,5.06,3.42,4.17220933151836,99.9863313642925,6.058,7.12270396901361,11.75,243.9,82.83,409.3,410.2,1.762,0.9808,78.2,27.4,5.1,NA,0.436,86.8,4.26,"sb"
 "46","CI_sb_06_soil_cal","CI","sb","soil",152,15.181,1.239,1010,61.6,113,0,15.9,2.14,10.7,3.42,5.9058113810483,99.9903436791715,5.746,4.48774011971219,12.29,217.6,82.84,429,546.1,1.369,1.796,207.5,26.7,7.3,5.5,0.55,86.4,8.28,"sb"
 "47","CI_sb_07_soil_cal","CI","sb","soil",225,21.919,1.82,1650,432,868,0,15.1,1.87,3.55,3.7,14.0748103472207,99.9978735816692,7.723,7.14271635267919,14.92,200.7,79.05,541.4,585.6,3.099,3.772,462,31.3,4.7,4.68,0.414,20,13.16,"sb"
 "48","CI_sb_08_soil_cal","CI","sb","soil",222,18.674,1.561,1570,241,421,0,12.2,1.57,1.7,4.24,10.9477876222361,99.9992115668153,7.823,6.55642139269418,14.55,209.1,91.8,476.9,526.1,1.048,3.668,131,29.5,4.7,4.04,0.452,22.4,5.04,"sb"
 "49","CI_sb_09_soil_cal","CI","sb","soil",110,7.81,0.707,374,40.9,35.3,0,13,1.91,4.3,3.35,2.35041915898682,99.9714933659529,6.058,3.3537050453293,5.356,241.8,32.1,390,379.6,3.466,1.838,100.7,14.1,2.6,1.976,0.183,24.6,3.56,"sb"
 "50","CI_sb_10_soil_cal","CI","sb","soil",115,8.201,0.795,464,85.4,122,0,12.3,1.87,7.36,3.14,3.38484546481102,99.9678964977453,5.927,4.481069325157,7.285,245.5,76.38,309.5,351,1.99,0.9411,141.9,20.5,2.8,3.84,0.32,23,6.08,"sb"
 "51","CI_sb_11_soil_cal","CI","sb","soil",160,12.752,1.09,1020,77.5,104,0,12.7,1.96,2.31,3.3,6.04979466832117,99.9875734484944,7.125,8.56618867973226,5.419,257.3,36.06,431.4,471.9,9.37,3.473,59.2,16.3,1.1,3.36,0.1066,57.8,2.54,"sb"
 "52","CI_sb_12_soil_cal","CI","sb","soil",90,5.691,0.515,406,43.9,46.6,0,12.1,1.77,10.2,3.43,2.55966831076419,99.9782275366968,6.028,2.921295619182,4.943,282.6,27.95,448.9,318.1,9.247,1.338,234.8,13.9,1.1,2.66,0.1586,14.8,7.44,"sb"
 "53","CI_sb_13_soil_cal","CI","sb","soil",120,8.996,0.838,725,59.4,84.3,0,9.82,1.84,3.23,3.62,4.36541942267805,99.9917573799213,7.363,7.90388886910784,7.087,264.8,34.64,459,402.8,5.295,2.231,207.7,21.3,2.8,3.34,0.1624,7.34,4.24,"sb"
 "54","CI_sb_14_soil_cal","CI","sb","soil",151,9.778,0.866,622,146,70.6,0,15.9,2.34,15,3.34,4.55576422498939,99.9849502245823,6.152,2.33803469724865,28.35,238.6,72.87,498.7,421.7,2.501,1.362,496.6,21.5,16.1,3.2,1.174,23.2,16.02,"sb"
 "55","CI_sb_15_soil_cal","CI","sb","soil",105,7.752,0.762,615,81.4,166,0,13.9,1.84,13.2,3.24,4.22473482136334,99.9795688973982,6.541,2.20632838183602,11.99,270.5,50.35,504.6,330.4,27.04,1.466,597.9,22.5,7.5,3,0.504,12.04,14.98,"sb"
 "56","CI_sb_16_soil_cal","CI","sb","soil",85,7.065,0.636,603,60.1,146,0,12.6,1.92,5.33,3.27,3.93271325964953,99.979516744767,7.029,3.56717047109549,5.847,246.8,34.04,508,378.6,18.03,2.082,340.6,18.1,4.1,2.44,0.274,6.1,5.56,"sb"
 "57","CI_sb_17_soil_cal","CI","sb","soil",216,21.174,1.803,1850,330,261,0,16.3,2.46,4.79,3.46,12.6864620687431,99.9959003126887,7.405,3.71545548183189,35.2,200.4,74.48,615,580,4.774,3.904,754.7,38.4,21,3.74,0.768,14.94,13,"sb"
 "58","CI_sb_18_soil_cal","CI","sb","soil",116,6.364,0.662,NA,62.6,90.4,0,13.5,2.07,25.1,3.49,NA,NA,6.047,5.88193618174763,18.6,237.8,64.72,650,344.2,23.62,1.664,978.5,41.2,20.1,5.7,1.486,16.78,26.6,"sb"
 "59","CI_sb_19_soil_cal","CI","sb","soil",121,8.759,0.9,690,179,124,0,14.4,1.94,13.2,3.65,5.29635705549414,99.9936596387429,6.704,2.45200522222413,10.98,276.6,51.96,430,316.6,31.59,2.138,616.2,23.9,6.1,3.52,0.406,9.08,15.2,"sb"
 "60","CI_sb_20_soil_cal","CI","sb","soil",103.333333333333,6.693,0.680666666666667,351,39.7,55.2,0,12.7,1.75,26.1,3.44,2.27523588177652,99.9760632694607,6.569,1.69267720108613,12.56,270.3,54.21,464.4,286.4,34.12,1.369,756.6,23.9,5.3,3.14666666666667,0.512,16.2933333333333,25.0666666666667,"sb"
 "61","CI_sb_YAMS_0001","CI","sb_icraf","soil",128,8.592,0.845,664,241,19,0,50.4,1.7,9.33,3.55,5.56491398659586,99.992403163091,NA,0.974056133266166,10.5,262.9,53.83,385.3,450.4,11.73,2.38,83.2,17.3,9.7,0.616,1.07,81.2,7.04,"sb"
 "62","CI_sb_YAMS_0002","CI","sb_icraf","soil",83,5.175,0.518,200,26.6,33.6,23.5,13.3,2.05,8.14,3.39,1.6226436780343,83.8587026783345,NA,1.619298460979,21.49,244.5,51.18,360.3,381.4,2.395,0.4743,210.9,14,6.3,0.21,0.27,49.2,5.96,"sb"
 "63","CI_sb_YAMS_0003","CI","sb_icraf","soil",77,5.562,0.491,171,30.9,32.3,29.6,18,2.3,6.06,3.28,1.59843239808863,79.3597838440324,NA,1.37934145774902,7.681,339.1,34.91,307.5,243.8,2.229,0.2773,108.2,13.9,5.6,0.358,0.572,79.2,4.2,"sb"
 "64","CI_sb_YAMS_0004","CI","sb_icraf","soil",152,12.088,1.13,1060,118,52.6,0,15.8,1.69,6.01,3.45,6.4644702254042,99.9917669961297,NA,2.62052593752826,13.24,249.6,78.44,471.4,508.9,2.306,1.966,415.6,23.2,8,1.646,0.592,14.4,12.06,"sb"
 "65","CI_sb_YAMS_0005","CI","sb_icraf","soil",133,9.512,1.002,733,71.1,79.8,0,13.5,2.12,12.8,3.36,4.50641042902993,99.9854701705787,NA,2.92677419379692,23.95,248.1,69.5,462.1,374.1,9.427,1.627,832.6,26.4,11.8,0.988,0.76,17.5,21.4,"sb"
 "66","CI_sb_YAMS_0006","CI","sb_icraf","soil",153,10.011,1.011,667,85.1,276,0,15.2,1.35,10.8,3.52,4.80126399788101,99.9905651353839,NA,1.73937276297248,27.89,221.1,75.38,630,366.9,30.55,2.514,1009,31.1,18.3,1.576,1.084,25.4,31.4,"sb"
 "67","CI_sb_YAMS_0007","CI","sb_icraf","soil",194,14.711,1.283,418,108,404,46.8,17.5,2.02,8.74,3.28,4.60523212953971,88.6830312294916,NA,4.18247547422748,29.37,220.6,101.6,472.7,392.6,11.74,0.7073,204.5,46.1,12.9,1.148,1.176,810,8.1,"sb"
 "68","CI_sb_YAMS_0008","CI","sb_icraf","soil",176,11.439,1.271,1030,229,20.6,0,104,1.67,10.4,3.6,7.52979846949917,99.9949961082458,NA,2.41178033105632,26.81,236.3,91.35,585,431.8,12.47,3.681,552.5,56.7,17.4,0.618,1.496,43.4,13.96,"sb"
 "69","CI_sb_YAMS_0009","CI","sb_icraf","soil",128,10.064,1.048,859,138,129,0,31.9,1.85,4.35,3.56,5.8913191285121,99.9929874057662,NA,16.4254702436258,10.7,268.9,45.53,558,426,12.9,4.015,227.6,32.1,6.3,1.01,0.35,41,4.7,"sb"
 "70","CI_sb_YAMS_0010","CI","sb_icraf","soil",160,13.035,1.271,1250,298,32.3,0,56.2,1.43,2.42,3.55,9.01749634399373,99.9953118091368,NA,4.60114362715048,15.95,241.1,53.79,447.6,464.2,20.82,7.568,433.5,29.2,11.6,0.55,0.656,27,5.24,"sb"
 "71","CI_sb_YAMS_0011","CI","sb_icraf","soil",181,18.286,1.385,797,69.3,287,4.83,25.1,2.09,5.23,3.47,5.44494291945002,99.0043106739982,NA,2.30261875130942,14.59,227.4,102.4,412.4,404.1,8.324,1.36,190.8,21.6,7.6,0.396,0.1118,90.4,6.6,"sb"
 "72","CI_sb_YAMS_0012","CI","sb_icraf","soil",186,9.009,0.967,506,71.4,105,0,15.4,2.14,6.91,3.37,3.44879098251365,99.9814465625361,NA,1.53353687389232,21.87,231,80,493.5,409.9,18.51,1.476,797,29.8,26.7,1.224,1.348,12.2,16.5,"sb"
 "73","CI_sb_YAMS_0013","CI","sb_icraf","soil",157,9.584,0.934,809,76.3,42.4,0,19,1.81,7.56,3.43,4.8566293278569,99.9885249047019,NA,2.66676144461771,13.13,250.2,53.99,509,416.4,19.86,2.526,293.7,17.5,9.7,0.656,0.442,44.8,9.28,"sb"
 "74","CI_sb_YAMS_0014","CI","sb_icraf","soil",150,9.9,1.039,756,160,37.7,5.54,22.1,2.34,19.1,3.38,5.3440244790999,98.8355874793927,NA,3.36237707825105,17.74,241.5,63.54,555.7,372.4,29.73,2.724,725,23,14.4,0.474,0.704,38,21.8,"sb"
 "75","CI_tb_01_soil_cal","CI","tb","soil",161.333333333333,14.5586666666667,1.07533333333333,1340,133,339,0,31.3,3.12,25,2.96,8.78622833351177,99.9812807358086,5.82966666666667,7.73937161265099,18.84,267.5,61.05,702,346.5,3.448,1.813,581,49.4,12.1,11.4733333333333,0.546666666666667,33.7333333333333,24.3333333333333,"tb"
 "76","CI_tb_02_soil_cal","CI","tb","soil",195,24.558,1.355,2170,317,415,0,16.3,1.58,9.21,2.44,14.5751823727618,99.9626339439036,6.64,NA,18.66,229.1,98.21,1440,392.3,10.29,3.849,529,71.6,13.3,NA,0.752,52,13.42,"tb"
 "77","CI_tb_03_soil_cal","CI","tb","soil",101,13.036,0.812,749.333333333333,163.666666666667,162.666666666667,8.04040419712669e-315,10.1266666666667,1.59,7.73666666666667,3.37,5.54688509406309,99.9884643494979,6.306,20.1767366747937,9.42,261.7,48.8,856,324.9,19.38,1.925,367.4,29.7,2.8,8.34,0.252,28.6,6.52,"tb"
 "78","CI_tb_04_soil_cal","CI","tb","soil",139,16.054,1.35,2130,187,303,0,11,1.49,1.56,4.64,12.9909079020713,99.9997354841167,7.961,16.2304485190393,14,248.5,57.57,781,418.8,5.514,4.858,359.9,34.5,6.6,8.06,0.232,6.32,4.54,"tb"
 "79","CI_tb_05_soil_cal","CI","tb","soil",95,12.648,0.748,612,233,130,0,13.7,1.89,7.61,3.66,5.36376269668,99.9938818276991,5.887,6.19962424935905,10.37,244.6,60.74,601.1,282.7,1.624,1.315,157.9,36.6,7.7,8.76,0.412,63.6,7.3,"tb"
 "80","CI_tb_06_soil_cal","CI","tb","soil",117,12.21,0.976,1300,153,205,0,11.4,1.97,3.04,3.91,8.32044468584915,99.997782085903,7.61,24.240819961995,8.203,246.2,54.29,674,372.6,18.77,3.269,316.9,30.4,3.9,4.6,0.228,8.1,4.3,"tb"
 "81","CI_tb_07_soil_cal","CI","tb","soil",100,11.63,0.725,887,156,327,0,13.4,2.01,9.17,3.59,6.60508675240094,99.9941626903345,6.24,8.45686288005231,17.9,233.4,77.96,668,269.4,1.915,1.426,265.4,31.4,7.9,5.18,0.426,51.6,8.88,"tb"
 "82","CI_tb_08_soil_cal","CI","tb","soil",161,23.734,1.608,1900,366,446,0,13,1.83,5.05,3.59,13.6908922302655,99.9971838258535,6.606,NA,16.48,220.3,99.43,1631,419.2,6.75,3.341,323.3,36.1,10.2,6.14,0.496,122.2,7.96,"tb"
 "83","CI_tb_09_soil_cal","CI","tb","soil",93,7.45,0.459,388,NA,50.7,0,12.9,2.04,4.13,3.42,NA,NA,6.12,10.1379031079611,3.899,257.7,36.18,600,264.2,13.45,1.255,111.9,29.5,1.4,11.06,0.138,23.4,2.52,"tb"
 "84","CI_tb_10_soil_cal","CI","tb","soil",177,23.874,1.914,2120,318,519,0,14.4,2.36,4.06,3.65,14.5865211188213,99.9976978186365,7.217,33.1290569634135,19.22,230.6,80.02,1145,511.8,5.276,4.561,530.4,34.4,12.3,3.98,0.432,13.06,7.58,"tb"
 "85","CI_tb_11_soil_cal","CI","tb","soil",84,11.159,0.733,844,127,157,0,11.9,1.66,8.08,3.37,5.71079604518926,99.9887954450984,6.471,14.1975815626322,7.595,262.2,53.24,715,335.7,18.09,2.122,335.1,22.4,2.7,5.68,0.234,20.6,8.7,"tb"
 "86","CI_tb_12_soil_cal","CI","tb","soil",96,8.031,0.563,601,77,67.6,0,12.6,1.95,9.07,3.41,3.86105501169735,99.9848857963504,5.953,7.84884478723953,5.971,266.8,37.44,601,296.4,11.28,1.306,186.7,14.6,2.4,2.58,0.1856,24.6,6.1,"tb"
 "87","CI_tb_13_soil_cal","CI","tb","soil",182,16.947,1.649,2140,220,0,0,0.444,1.82,6.99,3.54,12.4918665799486,99.9965369088543,7.075,20.6566944476248,16,244.2,85.88,793,447.3,5.955,2.425,478.7,24.1,11.3,4.36,0.664,19.06,15.9,"tb"
 "88","CI_tb_14_soil_cal","CI","tb","soil",87,7.532,0.547,405,102,132,0,10.7,1.52,8.55,3.49,3.24503294795757,99.9850420475484,6.265,10.2846605881753,8.235,260.7,53.67,652,261.9,11.17,0.8891,221.6,23.8,3.6,4.92,0.208,21.6,6.3,"tb"
 "89","CI_tb_15_soil_cal","CI","tb","soil",109,15.834,1.071,1440,177,180,0,15.1,2.13,4.65,3.51,9.16900363904844,99.994944441805,7.35,13.3195313177701,5.979,285.6,35.02,654,319.5,11.58,2.965,240.5,22.2,2.7,3.98,0.1856,12.64,6.18,"tb"
 "90","CI_tb_16_soil_cal","CI","tb","soil",72,7.085,0.453,494,87.4,64,0,8.82,1.43,4.09,3.64,3.38678582932193,99.9898537975168,7.274,14.1975815626322,6.209,289.8,30.25,709,277.7,6.651,1.159,162,15,2.3,NA,NA,NA,NA,"tb"
 "91","CI_tb_17_soil_cal","CI","tb","soil",242,24.687,2.481,2040,337,327,0,11.5,1.85,7.05,3.84,13.8398391470632,99.9984333924455,6.839,12.7395129844865,20.23,228.7,74.88,909,639.4,4.258,3.771,687.6,26.2,14.6,NA,NA,NA,NA,"tb"
 "92","CI_tb_18_soil_cal","CI","tb","soil",137,18.226,1.264,1150,316,21,0,1.32,1.83,13,3.52,8.39900313523078,99.994606589011,6.207,10.0880303955813,14.96,251.4,55.86,918,392.9,4.178,2.205,415.7,28.6,7.4,NA,0.288,52.6,13.24,"tb"
 "93","CI_tb_19_soil_cal","CI","tb","soil",212,20.617,2.127,1540,201,370,0,13.8,1.98,5.87,3.27,10.3461664969186,99.9922140466733,6.413,11.445378840779,16.05,232.2,86.42,694,466.2,3.215,2.373,279,25.6,10.7,NA,0.448,47.2,6.56,"tb"
 "94","CI_tb_20_soil_cal","CI","tb","soil",112.333333333333,10.8713333333333,0.836,375,108,163,41.3,10.6,2.51,11.5,3.36,3.68294884899498,87.5131626540034,5.32366666666667,5.1805936731077,13.11,233.2,73.95,596.6,286.8,2.928,0.7313,255.4,20.7,9.4,2.067,0.785,298,12.7,"tb"
--- a/tests/testdata/soilspec/BF_lo_01_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_01_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_01_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_01_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_01_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_01_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_02_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_02_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_02_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_02_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_02_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_02_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_03_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_03_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_03_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_03_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_03_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_03_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_04_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_04_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_04_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_04_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_04_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_04_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_05_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_05_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_05_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_05_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_05_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_05_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_06_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_06_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_06_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_06_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_06_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_06_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_07_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_07_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_07_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_07_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_07_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_07_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_08_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_08_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_08_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_08_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_08_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_08_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_09_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_09_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_09_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_09_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_09_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_09_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_10_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_10_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_10_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_10_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_10_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_10_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_11_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_11_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_11_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_11_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_11_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_11_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_12_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_12_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_12_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_12_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_12_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_12_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_13_soil_cal.3.txt
+++ b/tests/testdata/soilspec/BF_lo_13_soil_cal.3.txt
--- a/tests/testdata/soilspec/BF_lo_13_soil_cal.4.txt
+++ b/tests/testdata/soilspec/BF_lo_13_soil_cal.4.txt
--- a/tests/testdata/soilspec/BF_lo_13_soil_cal.5.txt
+++ b/tests/testdata/soilspec/BF_lo_13_soil_cal.5.txt
--- a/tests/testdata/soilspec/BF_lo_14_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_14_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_14_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_14_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_14_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_14_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_15_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_15_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_15_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_15_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_15_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_15_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_16_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_16_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_16_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_16_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_16_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_16_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_17_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_17_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_17_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_17_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_17_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_17_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_18_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_18_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_18_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_18_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_18_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_18_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_19_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_19_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_19_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_19_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_19_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_19_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_lo_20_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_lo_20_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_lo_20_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_lo_20_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_lo_20_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_lo_20_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_mo_01_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_mo_01_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_mo_01_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_mo_01_soil_cal.1.txt
--- a/tests/testdata/soilspec/BF_mo_01_soil_cal.2.txt
+++ b/tests/testdata/soilspec/BF_mo_01_soil_cal.2.txt
--- a/tests/testdata/soilspec/BF_mo_02_soil_cal.0.txt
+++ b/tests/testdata/soilspec/BF_mo_02_soil_cal.0.txt
--- a/tests/testdata/soilspec/BF_mo_02_soil_cal.1.txt
+++ b/tests/testdata/soilspec/BF_mo_02_soil_cal.1.txt