mdsFuncs: Auxiliar functions to pre-process raster data and modelling with machine learning models using caret package

Documented in mpsplinet

#' @importFrom sp  coordinates
#'
# Converts a SoilProfileCollection to a data frame:
.as.data.frame.SoilProfileCollection <- function(x, row.names = NULL, optional = FALSE, ...) {

  ## derive layer sequence:
  s1 <- unlist(by(x@horizons[, x@depthcols[1]], x@horizons[, paste(x@idcol)], order))
  s2 <- unlist(by(x@horizons[, x@depthcols[2]], x@horizons[, paste(x@idcol)], order))
  HONU <- ifelse(s1 == s2 & !x@horizons[, x@depthcols[1]] == x@horizons[, x@depthcols[2]], s1, NA)

  ## Put all horizon in the same row:
  HOR.list <- as.list(rep(NA, summary(HONU)[[6]])) ## highest number of layers
  for (j in 1:length(HOR.list)) {
    HOR.list[[j]] <- subset(x@horizons, HONU == j)
    sel <- !{
      names(HOR.list[[j]]) %in% paste(x@idcol)
    }
    ## rename variables using a sufix e.g. "_A", "_B" etc:
    names(HOR.list[[j]])[sel] <- paste(names(HOR.list[[j]])[sel], "_", LETTERS[j], sep = "")
  }

  ## Merge all tables (per horizon):
  HOR.list.m <- as.list(rep(NA, length(HOR.list)))
  for (j in 1:length(HOR.list)) {
    sid <- data.frame(x@site[, paste(x@idcol)])
    names(sid) <- paste(x@idcol)
    HOR.list.m[[j]] <- merge(sid, HOR.list[[j]], all.x = TRUE, by = paste(x@idcol))
  }

  ## Merge all horizon tables to one single table:
  tmp <- do.call(cbind, HOR.list.m)

  sel <- which(names(tmp) %in% paste(x@idcol))[-1] ## delete copies of IDs:
  tmp2 <- cbind(sp::coordinates(x@sp), x@site)
  fdb <- merge(tmp2, tmp[, -sel], all.x = TRUE, by = paste(x@idcol), ...)
  if (optional == TRUE) {
    row.names(fdb) <- row.names
  }

  return(fdb)
}


#' Fits a mass preserving spline
#' @description Fits a mass preserving spline to a soil profile data
#' @param obj object of class SoilProfileCollection
#' @param var.name character target variable name (must be a numeric variable)
#' @param lam numeric lambda the smoothing parameter
#' @param d numeric standard depths
#' @param vlow numeric smallest value of the target variable (smaller values will be replaced)
#' @param vhigh numeric highest value of the target variable (larger values will be replaced)
#' @param show.progress logical specifies whether to display the progress bar
#' @author Brendan Malone and Tomislav Hengl
#' @return Returns a list with four elements:
#'    idcol  - site ID column
#'    var.fitted - matrix; are are spline-estimated values of the target variable at observed depths (upper and lower depths are indicated as attributes)
#'    var.std - matrix; are spline-estimated values of the target variable at standard depths
#'    var.1cm - matrix; are spline-estimated values of the target variable using the 1 cm increments
#' @export
#'
#' @examples
#' # mpsplinet(rdSoil,
#' #          var.name = "C", lam = 0.1,
#' #          d = t(c(0, 10)), vlow = 0,
#' #          vhigh = 1000, show.progress = TRUE
#' # )
mpsplinet <- function(obj, var.name, lam = 0.1, d = t(c(0, 5, 15, 30, 60, 100, 200)), vlow = 0, vhigh = 1000, show.progress = TRUE) {
  depthcols <- obj@depthcols
  idcol <- obj@idcol
  ## convert to a data frame:
  obj@horizons <- obj@horizons[, c(idcol, depthcols, var.name)]
  ## TH: remove all horizons with negative depth!
  obj@horizons <- obj@horizons[!obj@horizons[, depthcols[1]] < 0 & !obj@horizons[, depthcols[2]] < 0, ]
  objd <- .as.data.frame.SoilProfileCollection(x = obj)
  ## organize the data:
  ndata <- nrow(objd)
  mxd <- max(d)
  ## Matrix in which the averaged values of the spline are fitted. The depths are specified in the (d) object:
  m_fyfit <- matrix(NA, ncol = length(c(1:mxd)), nrow = ndata)
  ## Matrix in which the sum of square errors of each lamda iteration for the working profile are stored
  yave <- matrix(NA, ncol = length(d), nrow = ndata)
  ## Matrix in which the sum of square errors for eac h lambda iteration for each profile are stored
  sse <- matrix(NA, ncol = length(lam), nrow = 1)
  sset <- matrix(NA, ncol = length(lam), nrow = ndata)
  nl <- length(lam) # Length of the lam matrix
  svar.lst <- grep(names(objd), pattern = glob2rx(paste(var.name, "_*", sep = "")))
  s <- 0.05 * sd(unlist(unclass(objd[, svar.lst])), na.rm = TRUE) # 5% of the standard deviation of the target attribute
  s2 <- s * s # overall variance of soil attribute
  ## reformat table (profile no, upper boundary, lower boundary, vars):
  upperb.lst <- grep(names(objd), pattern = glob2rx(paste(depthcols[1], "_*", sep = "")))
  lowerb.lst <- grep(names(objd), pattern = glob2rx(paste(depthcols[2], "_*", sep = "")))
  objd_m <- objd[, c(grep(names(objd), pattern = glob2rx(paste0("^", idcol))), upperb.lst, lowerb.lst, svar.lst)]

  np <- length(svar.lst) # max number of horizons
  ## Matrix in which the averaged values of spline-fitted values at observed depths are entered:
  dave <- matrix(NA, ncol = np, nrow = ndata)

  if (np < 2 | is.na(np)) {
    print("Submitted soil profiles all have 1 horizon")
  }
  svar.lst <- grep(names(objd_m), pattern = glob2rx(paste(var.name, "_*", sep = "")))
  upperb.lst <- grep(names(objd_m), pattern = glob2rx(paste(depthcols[1], "_*", sep = "")))
  lowerb.lst <- grep(names(objd_m), pattern = glob2rx(paste(depthcols[2], "_*", sep = "")))

  ## if missing, fill in the depth of first horizon as "0"
  missing.A <- is.na(objd_m[, which(names(objd_m) == paste(depthcols[1], "_A", sep = ""))])
  objd_m[missing.A, which(names(objd_m) == paste(depthcols[1], "_A", sep = ""))] <- 0

  ## mask out all profiles with <2 horizons and with at least one of the first 3 horizons defined:
  sel <- !(is.na(objd_m[, which(names(objd_m) == paste(var.name, "_A", sep = ""))]) & is.na(objd_m[, which(names(objd_m) == paste(var.name, "_B", sep = ""))])) &
    rowSums(!is.na(objd_m[, svar.lst])) > 0 &
    rowSums(!is.na(objd_m[, upperb.lst])) > 0 &
    rowSums(!is.na(objd_m[, lowerb.lst])) > 0

  if (sum(sel) == 0) {
    stop("Submitted soil profiles do not contain enough horizons (>2) for spline fitting")
  }

  ## detect lowest horizon no:
  uw.hor <- rowSums(!is.na(objd_m[, upperb.lst]))
  lw.hor <- as.vector(which(rowSums(!is.na(objd_m[, lowerb.lst])) < rowSums(!is.na(objd_m[, upperb.lst])) & rowSums(!is.na(objd_m[, upperb.lst])) > 1)) # profiles with un-even number of lower/upper depths

  ## add missing lower depth where necessary:
  for (lw in lw.hor) {
    uwx <- objd_m[lw, upperb.lst[uw.hor[lw]]]
    if (!is.na(uwx) & sel[lw] == TRUE) {
      message("Adding missing lower depths...")
      if (uwx < 150) {
        objd_m[lw, lowerb.lst[uw.hor[lw]]] <- 150
      } else {
        objd_m[lw, lowerb.lst[uw.hor[lw]]] <- 200
      }
    }
  }

  ## Fit splines profile by profile:
  message("Fitting mass preserving splines per profile...")
  if (show.progress) pb <- txtProgressBar(min = 0, max = length(sel), style = 3)
  for (st in as.vector(which(sel))) {
    subs <- matrix(unlist(c(1:np, as.vector(objd_m[st, upperb.lst]), as.vector(objd_m[st, lowerb.lst]), as.vector(objd_m[st, svar.lst]))), ncol = 4)
    d.ho <- rowMeans(data.frame(x = subs[, 2], y = c(NA, subs[1:(nrow(subs) - 1), 3])), na.rm = TRUE)
    ## mask out missing values
    if (ncol(as.matrix(subs[!is.na(subs[, 2]) & !is.na(subs[, 3]) & !is.na(subs[, 4]), ])) == 1) {
      subs <- t(as.matrix(subs[!is.na(subs[, 2]) & !is.na(subs[, 3]) & !is.na(subs[, 4]), ]))
    } else {
      subs <- subs[!is.na(subs[, 2]) & !is.na(subs[, 3]) & !is.na(subs[, 4]), ]
    }

    ## manipulate the profile data to the required form
    ir <- c(1:length(subs[, 1]))
    ir <- as.matrix(t(ir))
    u <- subs[ir, 2]
    u <- as.matrix(t(u)) # upper
    v <- subs[ir, 3]
    v <- as.matrix(t(v)) # lower
    y <- subs[ir, 4]
    y <- as.matrix(t(y)) # concentration
    n <- length(y) # number of observations in the profile

    ############################################################################################################################################################
    ## routine for handling profiles with one observation
    if (n == 1) {
      message(paste("Spline not fitted to profile:", objd_m[st, 1], sep = " "))
      xfit <- as.matrix(t(c(1:mxd))) ## spline will be interpolated onto these depths (1cm res)
      nj <- max(v)
      if (nj > mxd) {
        nj <- mxd
      }
      yfit <- xfit
      yfit[, 1:nj] <- y ## values extrapolated onto yfit
      if (nj < mxd) {
        yfit[, (nj + 1):mxd] <- NA
      }
      m_fyfit[st, ] <- yfit


      ## Averages of the spline at specified depths
      nd <- length(d) - 1 ## number of depth intervals
      dl <- d + 1 ##  increase d by 1
      for (cj in 1:nd) {
        xd1 <- dl[cj]
        xd2 <- dl[cj + 1] - 1
        if (nj >= xd1 & nj <= xd2) {
          xd2 <- nj - 1
          yave[st, cj] <- mean(yfit[, xd1:xd2])
        } else {
          yave[st, cj] <- mean(yfit[, xd1:xd2])
        } # average of the spline at the specified depth intervals
        yave[st, cj + 1] <- max(v)
      } # maximum soil depth
    }
    ## End of single observation profile routine
    ###############################################################################################################################################################

    ## Start of routine for fitting spline to profiles with multiple observations

    else {
      ###############################################################################################################################################################
      ## ESTIMATION OF SPLINE PARAMETERS
      np1 <- n + 1 # number of interval boundaries
      nm1 <- n - 1
      delta <- v - u # depths of each layer
      del <- c(u[2:n], u[n]) - v # del is (u1-v0,u2-v1, ...)

      ## create the (n-1)x(n-1) matrix r; first create r with 1's on the diagonal and upper diagonal, and 0's elsewhere
      r <- matrix(0, ncol = nm1, nrow = nm1)
      for (dig in 1:nm1) {
        r[dig, dig] <- 1
      }
      for (udig in 1:nm1 - 1) {
        r[udig, udig + 1] <- 1
      }

      ## then create a diagonal matrix d2 of differences to premultiply the current r
      d2 <- matrix(0, ncol = nm1, nrow = nm1)
      diag(d2) <- delta[2:n] # delta = depth of each layer

      ## then premultiply and add the transpose; this gives half of r
      r <- d2 %*% r
      r <- r + t(r)

      ## then create a new diagonal matrix for differences to add to the diagonal
      d1 <- matrix(0, ncol = nm1, nrow = nm1)
      diag(d1) <- delta[1:nm1] # delta = depth of each layer

      d3 <- matrix(0, ncol = nm1, nrow = nm1)
      diag(d3) <- del[1:nm1] # del =  differences

      r <- r + 2 * d1 + 6 * d3

      ## create the (n-1)xn matrix q
      q <- matrix(0, ncol = n, nrow = n)
      for (dig in 1:n) {
        q[dig, dig] <- -1
      }
      for (udig in 1:n - 1) {
        q[udig, udig + 1] <- 1
      }
      q <- q[1:nm1, 1:n]
      dim.mat <- matrix(q[], ncol = length(1:n), nrow = length(1:nm1))

      ## inverse of r
      rinv <- try(solve(r), TRUE)

      ## Note: in same cases this will fail due to singular matrix problems, hence you need to check if the object is meaningfull:
      if (is.matrix(rinv)) {
        ## identity matrix i
        ind <- diag(n)

        ## create the matrix coefficent z

        pr.mat <- matrix(0, ncol = length(1:nm1), nrow = length(1:n))
        pr.mat[] <- 6 * n * lam
        fdub <- pr.mat * t(dim.mat) %*% rinv
        z <- fdub %*% dim.mat + ind

        ## solve for the fitted layer means
        sbar <- solve(z, t(y))

        ## calculate the fitted value at the knots
        b <- 6 * rinv %*% dim.mat %*% sbar
        b0 <- rbind(0, b) # add a row to top = 0
        b1 <- rbind(b, 0) # add a row to bottom = 0
        gamma <- (b1 - b0) / t(2 * delta)
        alfa <- sbar - b0 * t(delta) / 2 - gamma * t(delta)^2 / 3

        ## END ESTIMATION OF SPLINE PARAMETERS
        ###############################################################################################################################################################


        ## fit the spline
        xfit <- as.matrix(t(c(1:mxd))) ## spline will be interpolated onto these depths (1cm res)
        nj <- max(v)
        if (nj > mxd) {
          nj <- mxd
        }
        yfit <- xfit
        for (k in 1:nj) {
          xd <- xfit[k]
          if (xd < u[1]) {
            p <- alfa[1]
          } else {
            for (its in 1:n) {
              if (its < n) {
                tf2 <- as.numeric(xd > v[its] & xd < u[its + 1])
              } else {
                tf2 <- 0
              }
              if (xd >= u[its] & xd <= v[its]) {
                p <- alfa[its] + b0[its] * (xd - u[its]) + gamma[its] * (xd - u[its])^2
              } else if (tf2) {
                phi <- alfa[its + 1] - b1[its] * (u[its + 1] - v[its])
                p <- phi + b1[its] * (xd - v[its])
              }
            }
          }
          yfit[k] <- p
        }
        if (nj < mxd) {
          yfit[, (nj + 1):mxd] <- NA
        }

        yfit[which(yfit > vhigh)] <- vhigh
        yfit[which(yfit < vlow)] <- vlow
        m_fyfit[st, ] <- yfit

        ## Averages of the spline at specified depths
        nd <- length(d) - 1 # number of depth intervals
        dl <- d + 1 #  increase d by 1
        for (cj in 1:nd) {
          xd1 <- dl[cj]
          xd2 <- dl[cj + 1] - 1
          if (nj >= xd1 & nj <= xd2) {
            xd2 <- nj - 1
            yave[st, cj] <- mean(yfit[, xd1:xd2])
          } else {
            yave[st, cj] <- mean(yfit[, xd1:xd2])
          } # average of the spline at the specified depth intervals
          yave[st, cj + 1] <- max(v)
        } # maximum soil depth

        ## Spline estimates at observed depths
        dave[st, 1:n] <- sbar


        ## CALCULATION OF THE ERROR BETWEEN OBSERVED AND FITTED VALUES
        ## calculate Wahba's estimate of the residual variance sigma^2
        ssq <- sum((t(y) - sbar)^2)
        g <- solve(z)
        ei <- eigen(g)
        ei <- ei$values
        df <- n - sum(ei)
        sig2w <- ssq / df
        ## calculate the Carter and Eagleson estimate of residual variance
        dfc <- n - 2 * sum(ei) + sum(ei^2)
        sig2c <- ssq / dfc
        ## calculate the estimate of the true mean squared error
        tmse <- ssq / n - 2 * s2 * df / n + s2
        sset[st] <- tmse
      }
    }

    if (show.progress) {
      setTxtProgressBar(pb, st)
    }
  }
  if (show.progress) {
    close(pb)
    # cat(st, "\r")  ## TH: Suggested by D. Rossiter but not required
    # flush.console()
  }


  ## asthetics for output
  ## yave
  yave <- as.data.frame(yave)
  jmat <- matrix(NA, ncol = 1, nrow = length(d))
  for (i in 1:length(d) - 1) {
    a1 <- paste(d[i], d[i + 1], sep = "-")
    a1 <- paste(a1, "cm", sep = " ")
    jmat[i] <- a1
  }
  jmat[length(d)] <- "soil depth"
  for (jj in 1:length(jmat)) {
    names(yave)[jj] <- jmat[jj]
  }

  retval <- list(idcol = objd_m[, 1], var.fitted = dave, var.std = yave, var.1cm = t(m_fyfit))

  return(retval)
}

elpidiofilho/mdsFuncs documentation built on April 14, 2022, 5:40 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

elpidiofilho/mdsFuncs
Auxiliar functions to pre-process raster data and modelling with machine learning models using caret package

R/mass_splines.R
In elpidiofilho/mdsFuncs: Auxiliar functions to pre-process raster data and modelling with machine learning models using caret package

Defines functions mpsplinet .as.data.frame.SoilProfileCollection

Documented in mpsplinet

R Package Documentation

Browse R Packages

We want your feedback!

elpidiofilho/mdsFuncs Auxiliar functions to pre-process raster data and modelling with machine learning models using caret package

R/mass_splines.R In elpidiofilho/mdsFuncs: Auxiliar functions to pre-process raster data and modelling with machine learning models using caret package

Defines functions mpsplinet .as.data.frame.SoilProfileCollection

Documented in mpsplinet

R Package Documentation

Browse R Packages

We want your feedback!

elpidiofilho/mdsFuncs
Auxiliar functions to pre-process raster data and modelling with machine learning models using caret package

R/mass_splines.R
In elpidiofilho/mdsFuncs: Auxiliar functions to pre-process raster data and modelling with machine learning models using caret package