rioja: Analysis of Quaternary Science Data

Documented in .check.data crossval .crossval crossval.default performance .performance performance.default .predict .print.crossval .print.performance rand.t.test rand.t.test.default

# Utility functions for Quant Reconstructions II

rand.t.test <- function(object, ...) UseMethod("rand.t.test")

rand.t.test.default <- function(object, ...) { 
  stop(paste("No method to rand.t.test an object of class", class(object)[1]))
}

crossval <- function(object, ...) UseMethod("crossval")

crossval.default <- function(object, ...) {
  stop(paste("No method to cross-validate an object of class", class(object)[1]))
}

performance <- function(object, ...) UseMethod("performance")

performance.default <- function(object, ...) {
  stop(paste("No method for calculating performance for object of class", class(object)[1]))
}

.check.data <- function(y, x) {
  if (any(apply(y, 1, sum) < 1.0E-8))
     stop(paste("Species data have zero abundaces for the following rows:", which(apply(y, 1, sum) < 1.0E-8)))
  if (any(apply(y, 2, sum) < 1.0E-8))
     stop(paste("Species data have zero abundaces for the following columns:", which(apply(y, 2, sum) < 1.0E-8)))
}

.print.crossval <- function(object) {
  if (object$cv.summary$cv.method == "none" | object$cv.summary$cv.method == "loo")
    cat(paste("Cross val.         :", object$cv.summary$cv.method, "\n\n"))
  else if (object$cv.summary$cv.method == "lgo")
    cat(paste("Cross val.         :", object$cv.summary$cv.method, ": no. groups = ", object$cv.summary$ngroups , "\n\n"))
  else if (object$cv.summary$cv.method == "bootstrap")
    cat(paste("Cross val.         :", object$cv.summary$cv.method, ": no. boot cycles = ", object$cv.summary$nboot , "\n\n"))
  else if (object$cv.summary$cv.method == "h-block")
    cat(paste("Cross val.         :", object$cv.summary$cv.method, ": h-block cutoff = ", object$cv.summary$h.cutoff , "\n\n"))
}

.print.performance <- function(object, ...) {
   perf <- performance(object)
   results <- perf$object
   if (object$cv.summary$cv.method != "none") {
      results <- rbind(results, perf$crossval)
      n <- nrow(results)
      rownames(results)[(n/2+1):n] <- paste(rownames(results)[(n/2+1):n], "_XVal", sep="")
   }
   print.default(round(results, 4), print.gap = 2, ...)
}

.performance <- function(object, ...) {
  RMSE <- apply(residuals(object), 2, .rmse)
  R2 <- apply(object$fitted.values, 2, .r2, x=object$x)
  Avg.Bias <- apply(residuals(object), 2, mean, na.rm=TRUE)
  Max.Bias <- apply(residuals(object), 2, .max.bias, x=object$x)
  Skill <- 100 * (1.0 - apply((object$x - object$fitted)^2, 2, sum) / sum((object$x - mean(object$x))^2) )
  RMSE0 <- sqrt(mean((mean(object$x)-object$x)^2))
  res <- cbind(RMSE, R2, Avg.Bias, Max.Bias, Skill)
  result <- list(RMSE0=RMSE0, object=res)
  if (object$cv.summary$cv.method != "none") {
    if (object$cv.summary$cv.method == "bootstrap")
      RMSE <- object$cv.summary$RMSE.boot
    else
      RMSE <- apply(object$x - object$predicted, 2, .rmse)
    R2 <- apply(object$predicted, 2, .r2, x=object$x)
    Avg.Bias <- apply(object$x - object$predicted, 2, mean, na.rm=TRUE)
    Max.Bias <- apply(object$x - object$predicted, 2, .max.bias, x=object$x)
    Skill <- 100 * (1.0 - apply((object$x - object$predicted)^2, 2, sum) / sum((object$x - mean(object$x))^2))
    result.cv <- cbind(RMSE, R2, Avg.Bias, Max.Bias, Skill)
    result$crossval <- result.cv
  }
  result
}

.predict <- function(object, newdata=NULL, sse=FALSE, nboot=100, match.data=TRUE, keep.boot.data=FALSE, verbose=TRUE, ...) {
  if (is.null(newdata)) {
     return (object$fitted.values)
  }
  if (is.null(newdata))
     return(object$fitted.values)
  if (match.data) {
    nms <- rownames(coef(object))
    mt <- match(colnames(newdata), nms)
    mt1 <- na.omit(mt)
    if (length(mt1) == 0)
       stop("No species in common between model and newdata")
    d <- matrix(0, nrow=nrow(newdata), ncol=length(nms))
    d[, mt1] <- as.matrix(newdata)[, !is.na(mt)]
    rownames(d) <- rownames(newdata)
    colnames(d) <- nms
    newdata <- d
  } else {
    if (ncol(object$y) != ncol(newdata)) 
       stop("Number of taxa does not match between datasets")
    if (any(colnames(object$y) != colnames(newdata)))
       stop("Taxon names do not match between datasets")
  }
#  nm.mod <- rownames(coef(object))
#  nm.new <- colnames(newdata)
#  mt <- match(nm.new, nm.mod)
#  mt1 <- na.omit(mt)
#  if (length(mt1) == 0 )
#    stop("Cannot predict: no taxa in common between object and newdata")
#  newdata2 <- matrix(0, ncol=nrow(coef(object)), nrow=nrow(newdata))
#  mt3 <- which(!is.na(mt))
#  newdata2[, mt1] <- as.matrix(newdata)[, mt3]
  predict.func <- paste("predict.internal", class(object)[1], sep=".")
  xHat.new <- do.call(predict.func, args=list(object=quote(object), y=quote(newdata), lean=FALSE, ...))
  rownames(xHat.new) <- rownames(newdata)
  colnames(xHat.new) <- colnames(object$fitted.values)
  if (sse) {
    if (verbose) {
      writeLines("Bootstrapping for SSE:")
      pb <- txtProgressBar(min = 0, max = 1, style = 3)
      on.exit(close(pb))
    }
    nsam <- nrow(object$y)
    nsam.new <- nrow(newdata)
    nest <- ncol(object$fitted.values)
    res2 <- array(dim=c(nsam, nest, nboot))
    res2.new <- array(dim=c(nsam.new, nest, nboot))
    call <- as.call(object$call.fit)
#    .set.rand.seed(100)
    for (i in 1:nboot) {
      o <- sample(nsam, replace=TRUE)
#      o <- apply(data.frame(rep(nsam, nsam)), 1, .get.rand) + 1
      out <- (1:nsam)[-unique(o)]
      y <- object$y[o, ]
      x <- object$x[o]
      y.test <- object$y[out, , drop=FALSE]
      mod <- eval(call)
      res2[out, , i] <- do.call(predict.func, args=list(object=quote(mod), y=quote(y.test), lean=TRUE, ...))
      res2.new[, , i] <- do.call(predict.func, args=list(object=quote(mod), y=quote(newdata), lean=TRUE, ...))
      if (verbose) {
        setTxtProgressBar(pb, i/nboot)
      }        
    }
    xHat <- object$fitted.values
    xHat.boot <- apply(res2, c(1,2), mean, na.rm=TRUE)
    xHat.new.boot <- apply(res2.new, c(1,2), mean, na.rm=TRUE)
    colnames(xHat.new.boot) <- colnames(xHat)
    rownames(xHat.new.boot) <- rownames(newdata)
    v1.boot <- apply(res2.new, c(1,2), sd, na.rm=TRUE)
    v2.boot <- apply(object$x-xHat.boot, 2, .rmse)
    colnames(v1.boot) <- colnames(xHat)
    rownames(v1.boot) <- rownames(newdata)
    SEP.boot <- sqrt(sweep(v1.boot^2, 2, v2.boot^2, "+"))
    colnames(SEP.boot) <- colnames(xHat)
    rownames(SEP.boot) <- rownames(newdata)
    results <- list(fit=xHat.new, fit.boot=xHat.new.boot, v1.boot=v1.boot, v2.boot=v2.boot, SEP.boot=SEP.boot)
    if (keep.boot.data) {
       results$boot.array=res2
       results$boot.array.newdata=res2.new
    }
  } else {
    results <- list(fit=xHat.new)
  }
  results
}

.crossval <- function(object, cv.method="loo", ngroups=10, nboot=100, verbose=TRUE, h.cutoff=0, h.dist, ...) 
{
  if (!("y" %in% names(object)))
    stop("Object does not contain species data, refit object using option lean=FALSE")
  METHODS <- c("loo", "lgo", "bootstrap", "h-block")
  cv.method <- pmatch(cv.method, METHODS)
  if (is.na(cv.method))
     stop("Unknown cross-validation method")
  nsam <- length(object$x)
  nres <- ncol(object$fitted.values)
#  func <- object$call.fit[1]
  call <- as.call(object$call.fit)
  predict.func <- paste("predict.internal", class(object)[1], sep=".")
  result <- matrix(nrow=nsam, ncol=nres)
  object$cv.summary$cv.method=METHODS[cv.method]
  if (verbose) {
    writeLines("Cross-validating:")
    pb <- txtProgressBar(min = 0, max = 1, style = 3)
    on.exit(close(pb))
  }
  if (cv.method == 1) {
    for (i in 1:nsam) {
      y <- object$y[-i, ]
      x <- object$x[-i]
      y.test <- object$y[i, , drop=FALSE]
      mod <- eval(call)
      xHat <- do.call(predict.func, args=list(object=quote(mod), y=quote(y.test), lean=TRUE, ...))
#      xHat <- do.call(predict, args=list(object=quote(mod), y=quote(y.test), lean=TRUE))
      result[i, ] <- xHat
      if (verbose) {
        setTxtProgressBar(pb, i/nsam)
      }        
    }
  } else if (cv.method == 2) {
    if (length(ngroups) > 1) {
      if (length(ngroups) != nsam)
        stop("length of leave-out groups does not equal number of samples")
      ngroups <- factor(ngroups)
      grps <- as.integer(ngroups)
      ngroups <- length(unique(ngroups))
      o <- 1:nsam
    }
    else {
      o <- sample(nsam)
      grps <- rep(1:ngroups, length.out=nsam) 
    }
    for (i in 1:ngroups) {
      out <- o[grps==i]
      y <- object$y[-out, ]
      x <- object$x[-out]
      y.test <- object$y[out, , drop=FALSE]
      mod <- eval(call)
      xHat <- do.call(predict.func, args=list(object=quote(mod), y=quote(y.test), lean=TRUE))
      result[out, ] <- xHat
      if (verbose) {
        setTxtProgressBar(pb, i/ngroups)
      }
    }
    object$cv.summary$ngroups=ngroups
  } else if (cv.method == 3) {
    nest <- ncol(object$fitted.values)
    res2 <- array(dim=c(nsam, nest, nboot))
#    .set.rand.seed(100)
    for (i in 1:nboot) {
#      o <- apply(data.frame(rep(nsam, nsam)), 1, .get.rand) + 1
      o <- sample(nsam, replace=TRUE)
      out <- (1:nsam)[-unique(o)]
      y <- object$y[o, ]
      x <- object$x[o]
      y.test <- object$y[out, , drop=FALSE]
      mod <- eval(call)
      xHat <- do.call(predict.func, args=list(object=quote(mod), y=quote(y.test), lean=TRUE))
      res2[out, , i] <- xHat
      if (verbose) {
        setTxtProgressBar(pb, i/nboot)
      }
    }
    result <- apply(res2, c(1,2), mean, na.rm=TRUE)
    MS <- apply((object$x-res2)^2, c(1,2), mean, na.rm=TRUE)
    RMSE.boot <- sqrt(apply(MS, 2, mean, na.rm=TRUE))
    object$cv.summary$nboot=nboot
    object$cv.summary$RMSE.boot <- RMSE.boot
  } else if (cv.method == 4) {
    if (is.null(h.dist))
       stop("h-block cross-validation requested but h.dist is null") 
    h.dist <- as.matrix(h.dist)  
    if (nrow(h.dist) != ncol(h.dist))
       stop("h.dist doesn't look like a matrix of inter-site distances") 
    if (nrow(h.dist) != nsam) 
       stop(paste("Number of rows in h.dist (", nrow(h.dist), ") not equal to number of samples (", nsam, ")", sep="")) 
    nSamp <- vector("numeric", length=nsam)
    nmiss <- 0
    for (i in 1:nsam) {
       d <- h.dist[i, ]  
       sel <- d > h.cutoff
       y <- object$y[sel, , drop=FALSE]
       nSamp[i] <- nrow(y)
       if (nSamp[i] > 0) {
          keep <- abs(apply(y, 2, max)) > 0
          y <- y[, keep]
          x <- object$x[sel]
          y.test <- object$y[i, keep, drop=FALSE]
          mod <- eval(call)
          xHat <- do.call(predict.func, args=list(object=quote(mod), y=quote(y.test), lean=TRUE, ...))
          result[i, ] <- xHat
       } 
       if (verbose) {
         setTxtProgressBar(pb, i/nsam)
       }
    }
    if (sum(nSamp < 1) > 0) {
       warning(paste(sum(nSamp < 1), "samples had no training samples with distance greater than ", h.cutoff, " and have not been predicted"))
    }
    object$n.h.block <- nSamp
    object$cv.summary$h.cutoff=h.cutoff
  } 
  colnames(result) <- colnames(object$fitted.values)
  rownames(result) <- rownames(object$fitted.values)
  object$predicted <- result
  object$residuals.cv <- object$x - result
  object
}

nsj3/rioja documentation built on Oct. 24, 2023, 1:37 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

nsj3/rioja
Analysis of Quaternary Science Data

R/PTF.r
In nsj3/rioja: Analysis of Quaternary Science Data

Defines functions .crossval .predict .performance .print.performance .print.crossval .check.data performance.default performance crossval.default crossval rand.t.test.default rand.t.test

Documented in .check.data crossval .crossval crossval.default performance .performance performance.default .predict .print.crossval .print.performance rand.t.test rand.t.test.default

R Package Documentation

Browse R Packages

We want your feedback!

nsj3/rioja Analysis of Quaternary Science Data

R/PTF.r In nsj3/rioja: Analysis of Quaternary Science Data

Defines functions .crossval .predict .performance .print.performance .print.crossval .check.data performance.default performance crossval.default crossval rand.t.test.default rand.t.test

Documented in .check.data crossval .crossval crossval.default performance .performance performance.default .predict .print.crossval .print.performance rand.t.test rand.t.test.default

R Package Documentation

Browse R Packages

We want your feedback!

nsj3/rioja
Analysis of Quaternary Science Data

R/PTF.r
In nsj3/rioja: Analysis of Quaternary Science Data