R/shared.support.code.r

Defines functions getNodeDepth anc.BM ace.pics pic.prep reorder.phy phy.sim.mat Cov.proj Effect.size.matrix Pval.matrix effect.size box.cox box.cox.fast box.cox.iter box.cox.spline box.cox.true pval fastLM fastFit boot.index perm.index pcoa fast.solve fast.ginv apply.pPsup center.scale cs.scale csize fast.scale fast.center center get.names

Documented in effect.size pval

### Functions that both RRPP and geomorph use, but should remain internal
### any alterations to these functions must be saved for both RRPP and geomorph

# get.names
# a universal subject name search
# used in lm.rrpp and its descendants
get.names <- function(Y) {
  nms <- if(is.vector(Y)) names(Y) else if(inherits(Y, "dist")) attr(Y, "Labels") else
    if(inherits(Y, "matrix")) rownames(Y) else dimnames(Y)[[3]]
  nms
}

# center
# centers a matrix faster than scale()
# used in various functions where mean-centering is required
center <- function(x){
  if(is.vector(x)) x - mean(x) else {
    x <- as.matrix(x)
    dims <- dim(x)
    fast.center(x, dims[1], dims[2])
  }
}

fast.center <- function(x, n, p){
  m <- colMeans(x)
  x - rep.int(m, rep_len(n, p))
}

fast.scale <- function(x, n, p){
  if(p > 1) {
    x <- fast.center(x, n, p)
    scale <- apply(x, 2, sd)
    x / rep.int(scale, rep_len(n, p))
  } else {
    x <- x - mean(x)
    x/sd(x)
  }
}

# csize
# calculates centroid size
# digitsurface
csize <- function(x) sqrt(sum(center(as.matrix(x))^2))

# cs.scale
# divide matrices by centroid size
# used in other functions for gpagen
cs.scale <- function(x) x/csize(x)

# center.scale
# center and divide matrices by centroid size; faster than scale()
# used in other functions for gpagen
center.scale <- function(x) {
  x <- center(x)
  cs <- sqrt(sum(x^2))
  y <- x/cs
  list(coords=y, CS=cs)
}

# apply.pPsup
# applies a partial Procrustes superimposition to matrices in a list
# used in gpagen functions
apply.pPsup<-function(M, Ya) {	# M = mean (reference); Ya all Y targets
  dims <- dim(Ya[[1]])
  k <- dims[2]; p <- dims[1]; n <- length(Ya)
  M <- cs.scale(M)
  lapply(1:n, function(j){
    y <- Ya[[j]]
    MY <- crossprod(M,y)
    sv <- La.svd(MY,k,k)
    u <- sv$u; u[,k] <- u[,k]*determinant(MY)$sign
    tcrossprod(y,u%*%sv$vt)
  })
}

# fast.ginv
# same as ginv, but without traps (faster)
# used in any function requiring a generalized inverse
fast.ginv <- function(X, tol = sqrt(.Machine$double.eps)){
  k <- ncol(X)
  if(inherits(X, "matrix")) {
    Xsvd <- La.svd(X, k, k)
    Positive <- Xsvd$d > max(tol * Xsvd$d[1L], 0)
    rtu <-((1/Xsvd$d[Positive]) * t(Xsvd$u[, Positive, drop = FALSE]))
    v <-t(Xsvd$vt)[, Positive, drop = FALSE]
  } else {
    Xsvd <- svd(X, k, k)
    Positive <- Xsvd$d > max(tol * Xsvd$d[1L], 0)
    rtu <-((1/Xsvd$d[Positive]) * t(Xsvd$u[, Positive, drop = FALSE]))
    v <-Xsvd$v[, Positive, drop = FALSE]
  }

  v %*% rtu
}

# fast.solve
# same as solve, but without traps (faster)
# used in any function requiring a generalized inverse
fast.solve <- function(x) { 
  res <- try(solve(x), silent = TRUE)
  if(inherits(res, "try-error")) res <- fast.ginv(x)
  return(res)
}


# pcoa
# acquires principal coordinates from distance matrices
# used in all linear model functions with data input
pcoa <- function(D){
  options(warn=-1)
  if(!inherits(D, "dist")) stop("function only works with distance matrices")
  cmd <- cmdscale(D, k=attr(D, "Size") -1, eig=TRUE)
  options(warn=0)
  d <- cmd$eig
  min.d <- min(d)
  if(min.d < 0) {
    options(warn=-1)
    cmd.c <- cmdscale(D, k=attr(D, "Size") -1, eig=TRUE, add= TRUE)
    options(warn=0)
    d <- cmd.c$eig
  } else cmd.c <- cmd
  p <- length(cmd.c$eig[zapsmall(d) > 0])
  Yp <- cmd.c$points[,1:p]
  Yp
}


# perm.index
# creates a permutation index for resampling
# used in all functions with a resampling procedure
perm.index <-function(n, iter, block = NULL, seed = NULL){
  
  if(!is.null(block)){
    block <- as.factor(block)
    if(length(block) != n)
      stop("block factor not the same length as the number of observations.\n",
           call. = FALSE)
    n <- length(block)
    indx <- 1:n
    g <- nlevels(block)
    gp <- levels(block)
    loc.list <- lapply(1:g, function(j) {
      indx[block == gp[[j]]]
    })
    rindx <- unlist(loc.list)
    
  }
  
  if(is.null(seed)) seed = iter else
    if(seed == "random") seed = sample(1:iter,1) else
      if(!is.numeric(seed)) seed = iter
      set.seed(seed)
      get.samp <- function(n){
        if(!is.null(block)){
          out <- array(NA, n)
          r <- unlist(lapply(loc.list, sample, replace = FALSE))
          for(i in indx) out[rindx[i]] <- r[i]
        } else  out <- sample.int(n, n)
        out
      }
      ind <- c(list(1:n), (Map(function(x) get.samp(n), 1:iter)))
      rm(.Random.seed, envir=globalenv())
      attr(ind, "seed") <- seed
      names(ind) <- c("obs", paste("iter", 1:(length(ind) - 1), sep = "."))
      ind
      
}


# boot.index
# creates a bootstrap index for resampling
# used in lm.rrpp for intercept models
boot.index <-function(n, iter, block = NULL, seed = NULL){
  
  if(!is.null(block)){
    block <- as.factor(block)
    if(length(block) != n)
      stop("block factor not the same length as the number of observations.\n",
           call. = FALSE)
    n <- length(block)
    indx <- 1:n
    g <- nlevels(block)
    gp <- levels(block)
    loc.list <- lapply(1:g, function(j) {
      indx[block == gp[[j]]]
    })
    rindx <- unlist(loc.list)
  }
  
  if(is.null(seed)) seed = iter else
    if(seed == "random") seed = sample(1:iter,1) else
      if(!is.numeric(seed)) seed = iter
      set.seed(seed)
      get.samp <- function(n){
        if(!is.null(block)){
          out <- array(NA, n)
          r <- unlist(lapply(loc.list, sample, replace = TRUE))
          for(i in indx) out[rindx[i]] <- r[i]
        } else  out <- sample.int(n, n, replace = TRUE)
        out
      }
      ind <- c(list(1:n), (Map(function(x) get.samp(n), 
                              1:iter)))
      rm(.Random.seed, envir=globalenv())
      attr(ind, "seed") <- seed
      ind
}

# fastFit
# calculates fitted values for a linear model, after decomoposition of X to get U
# used in SS.iter
fastFit <- function(U,y,n,p){
  if(p > n) tcrossprod(U) %*% y else
    U %*% crossprod(U,y)
} 


# fastLM
# calculates fitted values and residuals, after fastFit
# placeholder in case needed later
fastLM<- function(U, y){
  p <- dim(y)[2]; n <- dim(y)[1]
  yh <- fastFit(U,y,n,p)
  list(fitted = yh, residuals = y-yh)
}

# pval
# P-values form random outcomes
# any analytical function

#' Obtain P-value from a vector of values
#'
#' A function to find the probability of values greater or lesser than target,
#' from a vector of values presumably obtained in random permutations.
#'
#' @param s The sampling distribution vector to use.
#' @param target The value to target in the distribution.  (If null, the first value
#' in the vector is used.).  If the target exists outside the range of s,
#' a probability of 0 or 1 is certain.
#' @param greater Logical value for whether the probability should be "greater than 
#' or equal to".  Change to greater = FALSE for "less than or equal to".
#' @export
#' @author Michael Collyer
#' @keywords utilities
pval <- function(s, target = NULL, greater = TRUE){
  if(is.null(target)) target <- s[1]
  p <- length(s)
  pv <- if(greater) 
    length(which(s >= target)) else 
      length(which(s <= target))
  pv / p
}

# box.cox
# Box-Cox transformation for normalizing distributions.  Similar to MASS::boxcox
# without unneeded arguments, plus faster
# Used in effect.size
box.cox.true <- function(y, eps = 0.001){
  
  if(any(y <= 0)) y = y - min(y) + 0.0001
  
  y.obs <- y[1]
  y <- y[-1]
  
  # Note the code below (to get yy) is short-cut for Jacobian adjustment 
  n <- length(y)
  yy <- y / exp(mean(log(y)))
  logy <- log(yy)
  
  lambda <- seq(-5, 5, 0.001)
  m <- length(lambda)
  
  loglik <- sapply(1:m, function(j){ # same as MASS::boxcox loglik 
    la <- lambda[j]
    yt <- if(abs(la) > eps) yt <- (yy^la - 1)/la else
      logy * (1 + (la * logy)/2 * (1 + (la * logy)/3 * (1 + (la * logy)/4)))
    
    -n/2 * log(sum(center(yt)^2))
  })
  
  lambda.opt <- lambda[which.max(loglik)][[1]]
  if(abs(lambda.opt) < eps) lambda.opt <- 0
  y <- c(y.obs, y)
  res <- if(lambda.opt == 0) log(y) else (y^lambda.opt - 1)/lambda.opt
  
  list(opt.lambda = lambda.opt, transformed = res, lambda = lambda, 
       loglik = loglik)
  
}

box.cox.spline <- function(y, eps = 0.001) {
  
  if(any(y <= 0)) y = y - min(y) + 0.0001
  
  y.obs <- y[1]
  y <- y[-1]
  
  n <- length(y)
  yy <- y / exp(mean(log(y)))
  logy <- log(yy)
  m <- 20
  lambda <- seq(-0.5, 1.5, length.out = m)
  
  loglik <- sapply(1:m, function(j){ # same as MASS::boxcox loglik 
    la <- lambda[j]
    yt <- if(abs(la) > eps) yt <- (yy^la - 1)/la else
      logy * (1 + (la * logy)/2 * (1 + (la * logy)/3 * (1 + (la * logy)/4)))
    
    -n/2 * log(sum(center(yt)^2))
  })
  
  sp <- spline(lambda, loglik, n = 300)
  lambda.opt <- sp$x[which.max(sp$y)]
  if(abs(lambda.opt) < eps) lambda.opt <- 0
  y <- c(y.obs, y)
  res <- if(lambda.opt == 0) log(y) else (y^lambda.opt - 1)/lambda.opt
  
  list(opt.lambda = lambda.opt, transformed = res, lambda = sp$x, loglik = sp$y)
  
}

box.cox.iter <- function(y, eps = 0.001) {
  bc <- box.cox.spline(y, eps = eps)
  if(bc$opt.lambda == -0.5 || bc$opt.lambda == 1.5) 
    bc <- box.cox.true(y, eps = eps)
  return(bc)
}

box.cox.fast <- function(y, eps = 0.001) {
  if(any(y <= 0)) y = y - min(y) + 0.0001
  y.obs <- y[1]
  y <- y[-1]
  
  n <- length(y)
  yy <- y / exp(mean(log(y)))
  logy <- log(yy)
  
  logLik <- function(lambda) {
    la <- lambda
    yt <- if(abs(la) > eps) yt <- (yy^la - 1)/la else
      logy * (1 + (la * logy)/2 * (1 + (la * logy)/3 * (1 + (la * logy)/4)))
    
    -n/2 * log(sum(center(yt)^2))
  }
  
  result <- optimise(logLik, lower = -5, upper = 5, maximum = TRUE)
  lambda.opt <- result$maximum
  if(abs(lambda.opt) < eps) lambda.opt <- 0
  y <- c(y.obs, y)
  res <- if(lambda.opt == 0) log(y) else (y^lambda.opt - 1)/lambda.opt
  list(opt.lambda = lambda.opt, transformed = res, lambda = NULL, loglik = NULL)
}

box.cox <- function(y, eps = 0.001, iterate = FALSE) {
  result <- if(iterate) box.cox.iter(y, eps = eps) else
    box.cox.fast(y, eps = eps)
  return(result)
}


# effect.size
# Effect sizes (standard deviates) form random outcomes
# any analytical function

#' Obtain Effect-size from a vector of values
#'
#' A function to find the effect size (Z-score) of a target,
#' from a vector of values presumably obtained in random permutations.
#'
#' @param x The vector of data to use.
#' @param center Logical value for whether to center x.
#' @param target The value to target in the distribution.  (If null, the first value
#' in the vector is used.).  If the target exists outside the range of x,
#' very small or very large z-scores are possible.  Additionally, if the target
#' is excessively outside of the range of x, it could affect the Box-Cox transformation 
#' used to transform x.
#' @export
#' @author Michael Collyer
#' @keywords utilities
effect.size <- function(x, center = TRUE, target = NULL) {
  if(is.null(target)) target <- x[1] else x <- c(target, x)
  if(length(unique(x)) == 1) {
    sdx <- 1
    x <- 0
  } else {
    x <- box.cox(x)$transformed
    n <- length(x)
    if(center) x <- center(x)
    sdx <- sqrt((sum(x^2)/n))
  }
  
  (x[1]- mean(x)) / sdx
}

# Pval.matrix
# P-values form random outcomes that comprise matrices
# any analytical function with results in matrices
Pval.matrix = function(M){
  P = matrix(0,dim(M)[1],dim(M)[2])
  for(i in 1:dim(M)[1]){
    for(j in 1:dim(M)[2]){
      y = M[i,j,]
      p = pval(y)
      P[i,j]=p
    }
  }
  if(dim(M)[1] > 1 && dim(M)[2] >1) diag(P)=1
  rownames(P) = dimnames(M)[[1]]
  colnames(P) = dimnames(M)[[2]]
  P
}

# Effect.size.matrix
# Effect sizes form random outcomes that comprise matrices
# any analytical function with results in matrices
Effect.size.matrix <- function(M, center=TRUE){
  Z = matrix(0,dim(M)[1],dim(M)[2])
  for(i in 1:dim(M)[1]){
    for(j in 1:dim(M)[2]){
      y = M[i,j,]
      z = effect.size(y, center=center)
      Z[i,j]=z
    }
  }
  if(dim(M)[1] > 1 && dim(M)[2] >1) diag(Z)=0
  rownames(Z) = dimnames(M)[[1]]
  colnames(Z) = dimnames(M)[[2]]
  Z
}


# Cov.proj
# generates projection matrix from covariance matrix
# used in lm.rrpp

Cov.proj <- function(Cov, id = NULL, symmetric = FALSE){
  Cov <- if(is.null(id)) Cov else Cov[id, id]
  if(inherits(Cov, "matrix")) {
    Cov.s <- Matrix(Cov, sparse = TRUE)
    if(object.size(Cov.s) < object.size(Cov)) Cov <- Cov.s
    Cov.s <- NULL
  }
  ow <- options()$warn
  options(warn = -1)
  if(!symmetric) {
    Chol <- try(chol(Cov), silent = TRUE)
    if(inherits(Chol, "try-error")) 
      symmetric <- TRUE
  }
  
 if(symmetric) {
   sym <- isSymmetric(Cov)
   eigC <- eigen(Cov, symmetric = sym)
   eigC.vect = t(eigC$vectors)
   L <- eigC.vect * sqrt(abs(eigC$values))
   P <- fast.solve(crossprod(L, eigC.vect))
   dimnames(P) <- dimnames(Cov)
 } else P <- solve(t(Chol))
  options(warn = ow)
  P
}



# ape replacement functions below --------------------------------------------

# sim.char has a similar function to this but it is called in every simulation
# and defers to C for help.  This is done once only here. 
# (Produces a projection matrix)
phy.sim.mat <- function(phy) {
  if(is.null(phy$edge.length))
    stop("The tree has no branch lengths.\n", call. = FALSE)
  N <- length(phy$tip.label)
  n <- nrow(phy$edge)
  m <- matrix(0, N, n)
  edg <- phy$edge.length
  idx <- phy$edge[, 2]
  anc <- phy$edge[, 1]
  tips <- which(idx <= N)
  non.tips <- which(idx > N)
  for(i in 1:length(tips)) m[idx[tips[i]], tips[i]] <- sqrt(edg[tips[i]])
  for(i in 1:length(non.tips)) {
    x <- idx[non.tips[i]]
    anc.i <- which(anc == x)
    edg.i <- idx[anc.i]
    if(any(edg.i > N)) {
      edg.i <- as.list(edg.i)
      while(any(edg.i > N)) {
        edg.i <- lapply(1:length(edg.i), function(j){
          edg.i.j <- edg.i[[j]]
          if(edg.i.j > N) {
            idx[which(anc == edg.i.j)]
          } else edg.i.j <- edg.i.j
        })
        edg.i <- unlist(edg.i)
      }
    }
    m[edg.i, which(idx == x)] <- sqrt(edg[which(idx == x)])
  }
  rownames(m) <- phy$tip.label
  m
}

# fast.phy.vcv
# same as vcv.phylo but without options, in order to not use ape

fast.phy.vcv <- function (phy) {
  x <- phy.sim.mat(phy)
  tcrossprod(x)
}

# reorder.phy
# same as reorder function, but without options

reorder.phy <- function(phy){
  edge <- phy$edge
  edge.length <- phy$edge.length
  edge <- cbind(edge, edge.length)
  n <- nrow(edge)
  ind <-rank(edge[,1], ties.method = "last")
  edge <- edge[order(ind, decreasing = TRUE), ]
  edge.length <- edge[,3]
  phy$edge <- edge[,-3]
  phy$edge.length <- edge.length
  attr(phy, "order") <- "postorder"
  return(phy)
}


# anc.BM 
# via PICs
# same as ace, but multivariate
pic.prep <- function(phy, nx, px){
  phy <- reorder.phy(phy)
  ntip <- length(phy$tip.label)
  nnode <- phy$Nnode
  edge <- phy$edge
  edge1 <- edge[, 1]
  edge2 <- edge[,2]
  edge_len <- phy$edge.length
  phe <- matrix(0, ntip + nnode, px)
  contr <- matrix(0, nnode, px)
  var_contr <- rep(0, nnode)
  i.seq <- seq(1, ntip * 2 -2, 2)
  list(ntip = ntip, nnode = nnode, edge1 = edge1,
       edge2 = edge2, edge_len = edge_len, phe = phe,
       contr = contr, var_contr = var_contr, 
       tip.label = phy$tip.label,
       i.seq = i.seq)
}

ace.pics <- function(ntip, nnode, edge1, edge2, edge_len, phe, contr,
                     var_contr, tip.label, i.seq, x) {
  phe[1:ntip,] <- if (is.null(rownames(x))) x else x[tip.label,]
  N <- ntip + nnode
  for(ii in 1:nnode) {
    anc <- edge1[i.seq[ii]]
    ij <- which(edge1 == anc)
    i <- ij[1]
    j <- ij[2]
    d1 <- edge2[i]
    d2 <- edge2[j]
    sumbl <- edge_len[i] + edge_len[j]
    ic <- anc - ntip 
    ya <- (phe[d1,] - phe[d2,])/sqrt(sumbl)
    contr[ic, ] <- ya
    var_contr[ic] <- sumbl
    phe[anc,] <- (phe[d1, ] * edge_len[j] + phe[d2, ] * edge_len[i])/sumbl
    k <- which(edge2 == anc)
    edge_len[k] <- edge_len[k] + edge_len[i] * edge_len[j] / sumbl
  }
  phe
}

# anc.BM
# multivariate as opposed to fastAnc
# uses Ho and Ane (2014) algorithm

anc.BM <- function(phy, Y){
  Y <- as.matrix(Y)
  N <- length(phy$tip.label)
  edge <- cbind(phy$edge, phy$edge.length)
  ind <-rank(edge[,1], ties.method = "last")
  edge <- edge[order(ind, decreasing = TRUE), ]
  ev <- edge[,3]
  anc <- edge[, 1]
  des <- edge[, 2]
  ne <- nrow(edge)
  if (is.null(phy$node.label)) {
    phy$node.label <- (length(phy$tip.label) + 1):(length(phy$tip.label) + 
                                                     phy$Nnode)
  }
  nl <- phy$node.label
  rm(phy, edge)
  
  Z <- matrix(0, ne + 1, NCOL(Y))
  p <- rep(0, ne + 1)
  
  for(i in 1:ne){
    a <- anc[i] 
    d <- des[i] 
    len <- ev[i] 
    
    if(d <= N){
      p[d] <- 1 / len
      Z[d, ] <- Y[d, ]
    } else {
      pA <- p[d]
      Z[d, ] <- Z[d, ] / pA
      p[d] <- pA / (1 + len * pA)
    }
    
    p[a] <- p[a] + p[d]
    Z[a, ] <- Z[a, ] + Z[d, ] * p[d]
  }
  
  Z[a, ] <- Z[a, ] / p[a]
  
  for(i in ne:1){
    a <- anc[i] 
    d <- des[i] 
    len <- ev[i]
    
    if(d > N) {
      Z[d, ] <-  Z[d, ] * p[d] * len + 
        Z[a, ] - Z[a, ] * p[d] * len
    }
  }
  Z <- Z[-(1:N), , drop = FALSE]
  rownames(Z) <- nl
  if(!is.null(colnames(Y)))
    colnames(Z) <- colnames(Y)
  
  Z
  
}

# getNode Depth
# replaces node.depth.edgelength

getNodeDepth <- function(phy){
  phy <- reorder.phy(phy)
  E <- phy$edge
  anc <- E[,1]
  des <- E[,2]
  ntip <- length(phy$tip.label)
  nnode <- phy$Nnode
  N <- ntip + nnode
  L <- phy$edge.length
  base.tax <- ntip + 1
  full.depth.seq <- (ntip + 1):N
  full.node.depth <- 0
  tips <- which(des <= ntip)
  non.tips <- which(des > ntip)
  
  get.edge.ind <- function(tax){
    root.t <- ntip +1
    des.i <- which(des == tax)
    edge <- numeric()
    tax.i <- tax
    while(tax.i != root.t) {
      anc.i <- anc[which(des == tax.i)]
      tax.i <- anc[des.i]
      edge <- c(edge, des.i)
      des.i <- which(des == anc.i)
    }
    edge
  }
  
  tips.taxa <- lapply(as.list(tips), function(j) des[j])
  tips.edges <- lapply(tips.taxa, get.edge.ind)
  tips.depths <- sapply(1:ntip, function(j) sum(L[tips.edges[[j]]]))
  
  nodes <- as.list((ntip + 1):(N))
  nodes.edges <- lapply(nodes, get.edge.ind)
  nodes.depths <- sapply(1:nnode, function(j) sum(L[nodes.edges[[j]]]))
  
  c(tips.depths, nodes.depths)
}

Try the RRPP package in your browser

Any scripts or data that you put into this service are public.

RRPP documentation built on June 22, 2024, 6:45 p.m.