Features for Association with a Response Variable, with Applications to Gene Set Testing

##############################################
# Linear model (normal errors)
##############################################
.lineartest <- function(response, Z, X, offset, dir, perms, fromGLM=FALSE) {

  # establish dimensions of the problem
  m <- ncol(Z)
  n <- nrow(Z)

  # fit the null model
  form <- formula(.makeFormula(m>0, !is.null(offset)))
  if (fromGLM)
    Y <- response
  else
    Y <- residuals(lm(form))
  sumYY <- sum(Y*Y)

  # adjust the alternative
  if (m > 0)
    X <- X - Z %*% solve(crossprod(Z), crossprod(Z, X))
  # set columns numerically zero to exactly zero
  csm <- colSums(X*X)
  X[,csm < max(csm)*1e-14] <- 0

  if (perms > 0) {
    # all permutations if possible
    allperms <- npermutations(Y) <= perms
    if (allperms) {
      random <- FALSE
      permY <- allpermutations(Y)
    } else {
      # otherwise random permutations
      random <- TRUE
      permY <- replicate(perms-1, Y[sample(n)])
    }
  }

  # Calculate the test statistic
  # 1: asymptotic version
  if (perms == 0) {
    test <- function(subset, weights, calculateP = TRUE) {
      if (!missing(subset))
        X <- X[,subset,drop=FALSE]
      p <- ncol(X)
      if (p == 0) {
        return(c(p = NA, S = NA, ES = NA, sdS = NA, ncov=0))
      } else {
        if(!missing(weights)) {
          X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
        }
          
        if (p > n) {
          XX <- crossprod(t(X))
          if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
          norm.const <- sum(diag(XX)) / 100
          S <- sum(Y * (XX %*% Y)) / sum(Y*Y)
          if (sumYY == 0) S <- 0
          lams <- eigen(XX, symmetric = TRUE, only.values=TRUE)$values
          lams[1:(n-m)] <- lams[1:(n-m)] - S
  
          # term needed for mean and variance of S for z-score
          var.num <- 2*sum(XX*XX)
  
        } else {
          if (dir && (p > 1)) X <- cbind(X, sqrt(dir) * rowSums(X))
  
          norm.const <- sum(X * X) / 100
          xy <- crossprod(X, Y)
          S <- sum(xy * xy) / sumYY
          if (sumYY == 0) S <- 0
          lams <- eigen(crossprod(X), symmetric = TRUE, only.values=TRUE)$values
          if (length(lams) < n) lams <- c(lams, numeric(n-length(lams)))
          lams[1:(n-m)] <- lams[1:(n-m)] - S
  
          # term needed for mean and variance of S for z-score
          tr.term <- crossprod(X)
          var.num <- 2*sum(tr.term*tr.term)
        }
  
        # mean and variance of S for z-score
        # use series approximation as given in Paolella (2003) 319:
        mu.num <- sum(lams) + (n-m)*S
        mu.den <- n-m
        var.den <- 2*(n-m)
        cov.term <- 2*mu.num
        ES <- (mu.num/mu.den) * (1 - cov.term/(mu.num*mu.den) + var.den/(mu.den^2))
        VarS <- (mu.num^2/mu.den^2) * (var.num/(mu.num^2)  + var.den/(mu.den^2) - 2*cov.term/(mu.num*mu.den))
        
        # repair for case all(X == 0)
        if (norm.const == 0) {
          norm.const <- 1
          ES <- 0
          VarS <- 0
        }
                     
        # calculate the p-value
        if (calculateP)
          p.value <- .getP(lams)
        else
          p.value <- NA
                                        
        # give back
        return(c(p = p.value, S = S/norm.const, ES = ES/norm.const, sdS = sqrt(VarS)/norm.const, ncov=p))
      }
    }
  } else {
  # 2: permutation version
    test <- function(subset, weights, calculateP = TRUE) {          # calculateP not used in permutation testing
      if (!missing(subset))
        X <- X[,subset,drop=FALSE]
      p <- ncol(X)
      if (p == 0) {
        return(c(p = NA, S = NA, ES = NA, sdS = NA, ncov=0))
      } else {
        if(!missing(weights)) {
          X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
        }
        
        if (p > n) {
  
          XX <- crossprod(t(X))
          if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
  
          norm.const <- sum(diag(XX)) * sumYY / 100
  
          S <- sum(Y * (XX %*% Y))
          permS <- colSums(permY * (XX %*% permY))
  
        } else {
          if (dir && (p > 1)) X <- cbind(X, sqrt(dir) * rowSums(X))
  
          norm.const <- sum(X * X) * sumYY / 100
          xy <- crossprod(X, Y)
          S <- sum(xy * xy)
  
          permxy <- crossprod(X, permY)
          permS <- colSums(permxy * permxy)
  
        }
  
        # mean and variance of S for z-score
        ES <- mean(permS)
        VarS <- var(permS)

        # repair for case all(X == 0)
        if (norm.const == 0) 
          norm.const <- 1
  
        # calculate the p-value
        if (allperms)
          p.value <- mean(S*(1-1e-14) <= permS)
        else
          p.value <- mean(S*(1-1e-14) <= c(Inf,permS))
  
        # give back
        return(c(p = p.value, S = S/norm.const, ES = ES/norm.const, sdS = sqrt(VarS)/norm.const, ncov = p))
      }
    }
  }

  # Get the adjusted X matrix
  getX <- function(subset, weights) {
    if (!missing(subset))
      X <- X[,subset,drop=FALSE]
    p <- ncol(X)
    if(!missing(weights)) 
      X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
    X
  }

  # Subjectsplot function
  subjectplot <- function(subset, weights, mirror) {
    if (!missing(subset))
      X <- X[,subset,drop=FALSE]
    p <- ncol(X)
    if(!missing(weights))
      X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
    
    XX <- crossprod(t(X))
    if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
    if (mirror) {
      R <- XX %*% Y * sign(Y)
      ER <- diag(XX) * Y * sign(Y)
    } else {
      R <- XX %*% Y
      ER <- diag(XX) * Y
    }

    norm.const <- mean(abs(ER))

    XXmd <- XX
    diag(XXmd) <- 0
    if (m > 0) XXmd <- XXmd - (XXmd %*% Z) %*% solve(crossprod(Z), t(Z))
    varR <- colSums(XXmd * XXmd) * sumYY / (n-m)

    cbind(R=R/norm.const, ER=ER/norm.const, sdR=sqrt(varR)/norm.const, Y=Y)
  }

  # permutations for permutations histogram
  permutations <- function(subset, weights) {
      if (!perms)
        stop("histogram only for permutation version of the test")

      if (!missing(subset))
        X <- X[,subset,drop=FALSE]
      p <- ncol(X)
      if(!missing(weights))
        X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
      
      if (p > n) {

        XX <- crossprod(t(X))
        if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))

        norm.const <- sum(diag(XX)) * sumYY / 100

        S <- sum(Y * (XX %*% Y))
        permS <- colSums(permY * (XX %*% permY))

      } else {
        if (dir && (p > 1)) X <- cbind(X, sqrt(dir) * rowSums(X))

        xy <- crossprod(X, Y)
        S <- sum(xy * xy)
        norm.const <- sum(X * X) * sumYY / 100

        permxy <- crossprod(X, permY)
        permS <- colSums(permxy * permxy)

      }

      return(list(S = S/norm.const, permS = permS/norm.const))
  }

  df <- function() {
    c(n,m,ncol(X))
  }

  nperms <- function() {
    if (perms)
      c(n = ncol(permY), random = random)
    else
      c(0, FALSE)
  }

  cov.names <- function(subset)
    if (missing(subset))
      colnames(X)
    else
      colnames(X[,subset,drop=FALSE])

  dist.cor <- function(subset, weights, transpose = FALSE) {
    if (!missing(subset))
      X <- X[,subset,drop=FALSE]
    p <- ncol(X)
    if(!missing(weights))
      X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
    if (transpose) 
      X <- t(X)
    sds <- sqrt(colSums(X*X))
    sds[sds < max(sds)*1e-14] <- Inf
    crossprod(X) / outer(sds,sds)
  }
  
  getY <- function() Y

  positive <- function(subset) {
    if (!missing(subset))
      X <- X[,subset,drop=FALSE] 
    xy <- crossprod(X, Y)
    drop(xy >= 0)+0
  }

  out <- sys.frame(sys.nframe())

  return(out)
}


##############################################
# Generalized linear model (canonical link functions only)
##############################################
.glmtest <- function(response, Z, X, offset, family, dir, perms) {

  # establish dimensions of the problem
  m <- ncol(Z)
  n <- nrow(Z)

  # fit the null model
  form <- formula(.makeFormula(m>0, !is.null(offset)))
  null.fit <- glm(form, family = family)

  Y <- null.fit$y - fitted.values(null.fit)
  W <- null.fit$weights          

  if (max(abs(W-W[1]))<1e-15)
    out <- .lineartest(Y, Z, X, offset, dir, perms, fromGLM=TRUE)
  else {
    if ((perms > 0) && (!(max(abs(W-W[1]))<1e-15)))
      stop("Generalized linear model with covariates:\n\tPermutation testing is not possible.", call.=FALSE)
  
    if (perms == 0) {
      sqrtW <- sqrt(W)
      # adjust the alternative
      if (m > 0) {
        ZWhf <- Z * matrix(sqrtW, n, m)
        ZW <- Z * matrix(W, n, m)
        X <- X - Z %*% solve(crossprod(ZWhf), t(ZW)) %*% X
        csm <- colSums(X*X)
        X[,csm < max(csm)*1e-14] <- 0
        ZWZinvZW <- solve(crossprod(ZWhf), t(ZWhf))
      }
    } else {
      equal.size.groups <- all(abs(Y) == abs(Y[1])) && (sum(Y) == 0)
      if (equal.size.groups)
        nperms <- npermutations(Y[-1])
      else
        nperms <- npermutations(Y)
      # all permutations if possible
      if (nperms <= perms) {
        random <- FALSE
        if (equal.size.groups)
          permY <- rbind(Y[1], allpermutations(Y[-1]))
        else
          permY <- allpermutations(Y)
      } else {
        # otherwise random permutations
        random <- TRUE
        permY <- replicate(perms-1, Y[sample(n)])
      }
      # adjust the alternative
      if (m > 0)
        X <- X - matrix(colMeans(X), n, ncol(X), byrow=TRUE)
    }
  
    norm.const <- (n-m) / 100
  
    # calculate the test statistic
    # 1: asymptotic version
    if (perms == 0) {
      test <- function(subset, weights, calculateP = TRUE) {
        if (!missing(subset))
          X <- X[,subset,drop=FALSE]
        p <- ncol(X)
        if (p == 0) {
          return(c(p = NA, S = NA, ES = NA, sdS = NA, ncov=0))
        } else {
          if(!missing(weights))
            X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
          
          XX <- crossprod(t(X))
          if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
                         
          S <- sum(Y * (XX %*% Y)) / sum(Y*Y*diag(XX))
    
          # get lambdas
          XX <- XX * outer(sqrtW, sqrtW)
          dXX <- diag(diag(XX))
          if (m > 0) {
            dXX <- dXX - (dXX %*% ZWhf) %*% ZWZinvZW
            dXX <- dXX - crossprod(ZWZinvZW, crossprod(ZWhf, dXX))
          }
    
          if (is.nan(S))
            lams <- 0
          else
            lams <- eigen(XX - S*dXX, symmetric = TRUE, only.values=TRUE)$values
  
          # calculate the p-value
          if (calculateP)
            p.value <- .getP(lams)
          else
            p.value <- NA
    
          # mean and variance of S for z-score                                                           
          # use series approximation as given in Paolella (2003) 319:
          mu.num <- sum(diag(XX))
          mu.den <- sum(diag(dXX))
          var.num <- 2*sum(XX*XX)
          var.den <- 2*sum(dXX*dXX)
          cov.term <- 2*sum(XX*dXX)
    
          ES <- (mu.num/mu.den) * (1 - cov.term/(mu.num*mu.den) + var.den/(mu.den^2))
          VarS <- (mu.num^2/mu.den^2) * (var.num/(mu.num^2)  + var.den/(mu.den^2) - 2*cov.term/(mu.num*mu.den))
    
          # repair for case all(X == 0)
          if (is.na(S)) {
            S <- 0
            ES <- 0
            VarS <- 0
            p.value <- 1
          }
    
          # give back
          return(c(p = p.value, S = S/norm.const, ES = ES/norm.const, sdS = sqrt(VarS)/norm.const, ncov=p))
        }
      }
    } else {
    # 2: permutation version
      test <- function(subset, weights, calculateP=TRUE) {  # calculateP not used in permutation testing
        if (!missing(subset))
          X <- X[,subset,drop=FALSE]
        p <- ncol(X)
        if (p == 0) {
          return(c(p = NA, S = NA, ES = NA, sdS = NA, ncov=0))
        } else {
          if(!missing(weights))
            X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
          
          if (p > n) {
            XX <- crossprod(t(X))
            if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
    
            S <- sum(Y * (XX %*% Y)) / sum(Y*Y*diag(XX))
            permS <- colSums(permY * (XX %*% permY)) / drop(diag(XX) %*% (permY * permY))
    
          } else {
            if (dir) X <- X + dir * matrix(rowSums(X), nrow(X), ncol(X))
    
            diagXX <- rowSums(X * X)
            xy <- crossprod(X, Y)
            S <- sum(xy * xy) / sum(Y*Y*diagXX)
    
            permxy <- crossprod(X, permY)
            permS <- colSums(permxy * permxy) / drop(diagXX %*% (permY * permY))
    
          }
    
          # mean and variance of S for z-score
          ES <- mean(permS)
          VarS <- var(permS)
    
          # calculate the p-value
          p.value <- mean(S <= c(Inf,permS))
    
          # give back
          return(c(p = p.value, S = S/norm.const, ES = ES/norm.const, sdS = sqrt(VarS)/norm.const, ncov=p))
        }
      }
    }
  
    # Get the adjusted X matrix
    getX <- function(subset, weights) {
      if (!missing(subset))
        X <- X[,subset,drop=FALSE]
      p <- ncol(X)
      if(!missing(weights))
        X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
      X
    }
  
    # Subjectsplot function
    subjectplot <- function(subset, weights, mirror) {
      if (!missing(subset))
        X <- X[,subset,drop=FALSE]
      p <- ncol(X)
      if(!missing(weights))
        X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
      
      XX <- crossprod(t(X))
      if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
      if (mirror) {
        R <- XX %*% Y * sign(Y)
        ER <- diag(XX) * Y * sign(Y)
      } else {
        R <- XX %*% Y
        ER <- diag(XX) * Y
      }
  
      norm.const <- mean(abs(ER))
  
      XXmd <- XX
      diag(XXmd) <- 0
      if (m > 0) XXmd <- XXmd - (XXmd %*% ZW %*% solve(crossprod(ZWhf), t(Z)))
      varR <- drop((XXmd * XXmd) %*% W)
  
      cbind(R=R/norm.const, ER=ER/norm.const, sdR=sqrt(varR)/norm.const, Y=Y)
    }
  
    # permutations for permutations histogram
    permutations <- function(subset, weights) {
        if (!perms)
          stop("histogram only for permutation version of the test")
  
        if (!missing(subset))
          X <- X[,subset,drop=FALSE]
        p <- ncol(X)
        if(!missing(weights))
          X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
        
        if (p > n) {
          XX <- crossprod(t(X))
          if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
  
          S <- sum(Y * (XX %*% Y)) / sum(Y*Y*diag(XX))
          permS <- colSums(permY * (XX %*% permY)) / drop(diag(XX) %*% (permY * permY))
  
        } else {
          if (dir) X <- X + dir * matrix(rowSums(X), nrow(X), ncol(X))
  
          diagXX <- rowSums(X * X)
          xy <- crossprod(X, Y)
          S <- sum(xy * xy) / sum(Y*Y*diagXX)
  
          permxy <- crossprod(X, permY)
          permS <- colSums(permxy * permxy) / drop(diagXX %*% (permY * permY))
        }
  
        return(list(S = S/norm.const, permS = permS/norm.const))
    }
  
    df <- function() {
      c(n,m,ncol(X))
    }
  
    nperms <- function() {
      if (perms)
        c(n = ncol(permY), random = random)
      else
        c(0, FALSE)
    }
  
    cov.names <- function(subset)
      if (missing(subset))
        colnames(X)
      else
        colnames(X[,subset,drop=FALSE])
  
    dist.cor <- function(subset, weights, transpose = FALSE) {
      if (!missing(subset))
        X <- X[,subset,drop=FALSE]
      p <- ncol(X)
      if(!missing(weights))
        X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
      if (transpose) 
        X <- t(X)
      sds <- sqrt(colSums(X*X))
      sds[sds < max(sds)*1e-14] <- Inf
      crossprod(X) / outer(sds,sds)
    }
    
    getY <- function() Y
  
    positive <- function(subset, weights) {
      if (!missing(subset))
        X <- X[,subset,drop=FALSE] 
      xy <- crossprod(X, Y)
      drop(xy >= 0)+0
    }
  
    out <- sys.frame(sys.nframe())
  }
  
  return(out)
}


##############################################
# Cox proportional hazards model
##############################################
.coxtest <- function (response, Z, X, offset, dir, perms, strat) {
                                 
  # fit the model
  m <- ncol(Z)
  form <- formula(.makeFormula(m>0, !is.null(offset)))
  fit <- coxph(form, method = "breslow")
  expci <- exp(fit$linear.predictors)
  n <- length(expci)

  # check possibility of permutations
  if ((perms > 0) && (m > 0) | (perms > 0) && !is.null(strat))
    stop("Cox model with covariates and/or strata:\n\tPermutation testing is not possible.", call.=FALSE)

  # extract survival times, strata and risksets
  if (ncol(response) == 2) {
    times <- as.vector(fit$y)[1:n]
    d <- as.vector(fit$y)[(n + 1):(2 * n)]
    if (!is.null(strat)) {
      dtimes <- times[d == 1]
      dstrat <- strat[d == 1]
      unq <- which(!duplicated(cbind(dtimes,dstrat)))
      dtimes <- dtimes[unq]
      dstrat <- dstrat[unq]
      atrisk <- outer(times, dtimes, ">=") * outer(strat, dstrat, "==")
    } else {
      dtimes <- unique(times[d == 1])
      atrisk <- outer(times, dtimes, ">=")
    }
  } else {
    times <- as.vector(fit$y)[(n + 1):(2 * n)]
    start <- as.vector(fit$y)[1:n]
    d <- as.vector(fit$y)[(2*n + 1):(3 * n)]
    if (!is.null(strat)) {
      dtimes <- times[d == 1]
      dstrat <- strat[d == 1]
      unq <- which(!duplicated(cbind(dtimes,dstrat)))
      dtimes <- dtimes[unq]
      dstrat <- dstrat[unq]
      atrisk <- outer(times, dtimes, ">=") * outer(start, dtimes, "<") * outer(strat, dstrat, "==")
    } else {
      dtimes <- unique(times[d == 1])
      atrisk <- outer(times, dtimes, ">=") * outer(start, dtimes, "<") 
    }
  }
  
  ## Construct matrixO
  nd <- length(dtimes)
  if (!is.null(strat)) {
    matrixO <- outer(times, dtimes, "==") * outer(strat, dstrat, "==") * matrix(d, n, nd)
  } else {
    matrixO <- outer(times, dtimes, "==") * matrix(d, n, nd)
  }

  # any ties present?
  ties <- any(colSums(matrixO) > 1)

  # construct relevant matrices
  hazinc <- as.vector(1 / (expci %*% atrisk))
  matrixP <- outer(expci, hazinc, "*") * atrisk
  matrixPO <- matrixP %*% t(matrixO)
  matrixM <- matrix(d, n, nd, byrow=FALSE) * (!atrisk) - matrixPO %*% (!atrisk)
  matrixW <- -crossprod(t(matrixPO))
  diag(matrixW) <- diag(matrixW) + rowSums(matrixPO)
  if (m > 0) {
    ZW <- crossprod(Z, matrixW)
    ZWZ <- ZW %*% Z
    ZWZinvZ <- solve(ZWZ, t(Z))
    X <- X - Z %*% solve(ZWZ, ZW) %*% X
    csm <- colSums(X*X)
    X[,csm < max(csm)*1e-14] <- 0
  }

  # martingale residuals
  Y <- rowSums(matrixPO) - d
                           
  if (perms > 0) {
    # unlike the glm and lm models, we make permutations of the index vector rather than Y itself
    # This is for also being able to permute tr(XXW)
    piy <- seq_along(Y)
    # all permutations if possible
    if (npermutations(piy) <= perms) {
      random <- FALSE
      permIY <- allpermutations(piy)
    } else {
      # otherwise random permutations
      random <- TRUE
      permIY <- replicate(perms-1, piy[sample(n)])
    }
    permY <- matrix(Y[permIY], nrow=length(Y))
  }

  # The test statistic
  if (perms == 0) {
    test <- function(subset, weights, calculateP = TRUE) {   # calculateP not used in survival models
      if (!missing(subset))
        X <- X[,subset,drop=FALSE]
      p <- ncol(X)
      if (p == 0) {
        return(c(p = NA, S = NA, ES = NA, sdS = NA, ncov=0))
      } else {
        if(!missing(weights))
          X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
        
        XX <- crossprod(t(X))
        if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
        Q <- crossprod(Y, XX) %*% Y

        # adjust Q in case of tied survival times
        if (ties) {
          tiecorrect <- sum( sapply(1:length(dtimes), function(i) {
            if (sum(matrixO[,i]) == 1)
              0
            else {
              matrixtie <- outer(matrixO[,i], matrixO[,i])
              diag(matrixtie) <- 0
              sum(XX[as.logical(matrixtie)]) - 2 * sum(matrixP[,i] %*% XX %*% matrixtie) + sum(matrixtie) * (matrixP[,i] %*% XX %*% matrixP[,i])
            }
          }))
          Q <- Q - tiecorrect
        }
  
        # calculate mean and variance of Q
        EQ <- sum(XX * matrixW)
        tussen <- matrix(diag(XX), n, nd) + 2 * XX %*% (matrixM - matrixP)
        matrixT <- tussen - matrix(colSums(matrixP * tussen), n, nd, byrow = TRUE)
        varQ <- sum(matrixPO * ((matrixT * matrixT) %*% t(matrixO)))
  
        # calculate p-value
        Z <- (Q - EQ) / sqrt(varQ)
        p.value <- pnorm(Z, lower.tail = FALSE)
        norm.const <- (n-m) * EQ / 100
  
        # positive = negative association with occurence of the event
  
        # give back
        return(c(p = p.value, S = Q/norm.const, ES = EQ/norm.const, sdS=sqrt(varQ)/norm.const, ncov=p))
      }
    }
  } else {
    test <- function(subset, weights, calculateP = TRUE) {     # calculateP not used in permutation testing
      if (!missing(subset))
        X <- X[,subset,drop=FALSE]
      p <- ncol(X)
      if (p == 0) {
        return(c(p = NA, S = NA, ES = NA, sdS = NA, ncov=0))
      } else {
        if(!missing(weights))
          X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
        
        XX <- crossprod(t(X))
        if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
        Q <- drop(crossprod(Y, XX) %*% Y)
        EQ <- sum(XX * matrixW)
  
        permQ <- colSums(permY * (XX %*% permY))
        permEQ <- apply(permIY, 2, function(prm) {
          pW <- matrixW[prm,prm]
          sum(XX * pW)
        })
  
        norm.const <- (n-m) * EQ / 100
  
        # calculate the p-value
        p.value <- mean(Q - EQ <= c(Inf, permQ - permEQ))
  
        # mean and variance of S for z-score
        ES <- EQ + mean(permQ - permEQ)
        VarS <- var(permQ - permEQ)
        
        # give back
        return(c(p = p.value, S = Q/norm.const, ES = ES/norm.const, sdS = sqrt(VarS)/norm.const, ncov = p))
      }
    }
  }

  # Get the adjusted X matrix
  getX <- function(subset, weights) {
    if (!missing(subset))
      X <- X[,subset,drop=FALSE]
    p <- ncol(X)
    if(!missing(weights))
      X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
    X
  }


  # Subjectsplot function
  subjectplot <- function(subset, weights, mirror) {
    if (!missing(subset))
      X <- X[,subset,drop=FALSE]
    p <- ncol(X)
    if(!missing(weights))
      X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
    
    XX <- crossprod(t(X))
    if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
    if (mirror) {
      R <- drop(XX %*% Y * sign(Y))
      ER <- diag(XX) * Y * sign(Y)
    } else {
      R <- XX %*% Y
      ER <- diag(XX) * Y
    }

    norm.const <- mean(abs(ER))

    XXmd <- XX
    diag(XXmd) <- 0
    if (m > 0) XXmd <- XXmd - XXmd %*% matrixW %*% Z %*% ZWZinvZ
    varR <- diag(XXmd %*% matrixW %*% t(XXmd))

    cbind(R=R/norm.const, ER=ER/norm.const, sdR=sqrt(varR)/norm.const, Y=Y)
  }

  # permutations for permutations histogram
  permutations <- function(subset, weights) {
      if (!perms)
        stop("histogram only for permutation version of the test")

      if (!missing(subset))
        X <- X[,subset,drop=FALSE]
      p <- ncol(X)
      if(!missing(weights))
        X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
      
      XX <- crossprod(t(X))
      if (dir) XX <- XX + dir * outer(rowSums(X), rowSums(X))
      Q <- drop(crossprod(Y, XX) %*% Y)
      EQ <- sum(XX * matrixW)

      permQ <- colSums(permY * (XX %*% permY))
      permEQ <- apply(permIY, 2, function(prm) {
        pW <- matrixW[prm,prm]
        sum(XX * pW)
      })

      norm.const <- (n-m) * EQ / 100

      # calculate the p-value
      permS <- EQ + permQ - permEQ

      return(list(S = Q/norm.const, permS = permS/norm.const))
  }

  df <- function() {
    c(n,m,ncol(X))
  }

  nperms <- function() {
    if (perms)
      c(n = ncol(permY), random = random)
    else
      c(0, FALSE)
  }

  cov.names <- function(subset)
    if (missing(subset))
      colnames(X)
    else
      colnames(X[,subset,drop=FALSE])

  dist.cor <- function(subset, weights, transpose = FALSE) {
    if (!missing(subset))
      X <- X[,subset,drop=FALSE]
    p <- ncol(X)
    if(!missing(weights))
      X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
    if (transpose) 
      X <- t(X)
    sds <- sqrt(colSums(X*X))
    sds[sds < max(sds)*1e-14] <- Inf
    crossprod(X) / outer(sds,sds)
  }
  
  positive <- function(subset, weights) {
    if (!missing(subset))
      X <- X[,subset,drop=FALSE] 
    xy <- crossprod(X, Y)
    drop(xy >= 0)+0
  }
  
  getY <- function() Y

  out <- sys.frame(sys.nframe())

  return(out)
}


##############################################
# Multinomial logistic model
##############################################
.multinomialtest <- function(response, Z, X, dir, perms) {

  # establish dimensions of the problem
  m <- ncol(Z)
  n <- nrow(Z)

  # fit the null model
  form <- formula(.makeFormula(m>0, FALSE))
  null.fit <- mlogit(form)
  fitted <- fitted.values(null.fit)
  G <- ncol(fitted)
  if ((perms > 0) && (!all(sapply(1:G, function(i) all(fitted[,G] == fitted[1,G])))))
    stop("Multinomial model with covariates:\n\tPermutation testing is not possible.", call.=FALSE)
  bY <- as.vector(null.fit@y - fitted)
  cY <- apply(null.fit@y==1, 1, which)
  bX <- diag(G) %x% X
  W <- matrix(0,n*G,n*G)
  for (i in 1:G) 
    for (j in 1:G)
      diag(W[(i-1)*n + 1:n, (j-1)*n + 1:n]) <- if (i==j) fitted[,i]*(1-fitted[,i]) else -fitted[,i]*fitted[,j]

  if (perms == 0) {
    eW <- eigen(W,symmetric=TRUE)
    eW$vectors <- eW$vectors[,1:(n*(G-1)),drop=FALSE]
    eW$values <- eW$values[1:(n*(G-1))]
    halfW <- eW$vectors %*% diag(eW$values^(1/2)) %*% t(eW$vectors)   
    # adjust the alternative
    if (m > 0) {
      bZ <- diag(G) %x% Z       
      bZW <- crossprod(bZ, W)
      eZWZ <- eigen(bZW %*% bZ, symmetric=TRUE)    
      eZWZ$vectors <- eZWZ$vectors[,1:(m*(G-1)),drop=FALSE]
      eZWZ$values <- eZWZ$values[1:(m*(G-1))]
      ZWZinvZW <- eZWZ$vectors %*% diag(1/eZWZ$values, nrow=m*(G-1)) %*% crossprod(eZWZ$vectors, bZW) 
      bX <- bX - bZ %*% (ZWZinvZW %*% bX) 
      csm <- colSums(bX*bX)
      bX[,csm < max(csm)*1e-14] <- 0
    }
  } else {
    equal.size.groups <- all(table(cY)==table(cY)[1])
    if (equal.size.groups)
      nperms <- npermutations(cY[-1])
    else
      nperms <- npermutations(cY)
    # all permutations if possible
    if (nperms <= perms) {
      random <- FALSE
      if (equal.size.groups)
        permY <- rbind(cY[1], allpermutations(cY[-1]))
      else
        permY <- allpermutations(cY)
    } else {         
      # otherwise random permutations
      random <- TRUE
      permY <- replicate(perms-1, cY[sample(n)])
    }
    permY <- apply(permY, 2, function(prm) {
      as.vector(sapply(1:G, function(i) { prm == i }) - fitted)
    }) 
    # adjust the alternative
    if (m > 0) {
      bZ <- diag(G) %x% Z       
      bZW <- crossprod(bZ, W)
      eZWZ <- eigen(bZW %*% bZ, symmetric=TRUE)    
      eZWZ$vectors <- eZWZ$vectors[,1:(m*(G-1)),drop=FALSE]
      eZWZ$values <- eZWZ$values[1:(m*(G-1))]
      ZWZinvZW <- eZWZ$vectors %*% diag(1/eZWZ$values,nrow=m*(G-1)) %*% crossprod(eZWZ$vectors, bZW) 
      bX <- bX - bZ %*% (ZWZinvZW %*% bX) 
      csm <- colSums(bX*bX)
      bX[,csm < max(csm)*1e-14] <- 0
    }
  }

  # utility to select columns of bX
  select.help <- 1:ncol(X)
  names(select.help) <- colnames(X)
  selector <- function(set) outer(ncol(X)*(1:G-1), select.help[set], "+")

  norm.const <- (n-m) / 100

  # calculate the test statistic
  # 1: asymptotic version
  if (perms == 0) {
    test <- function(subset, weights, calculateP = TRUE) {
      if (!missing(subset))
        bX <- bX[,selector(subset),drop=FALSE]
      p <- ncol(bX)/G
      if (p == 0) {
        return(c(p = NA, S = NA, ES = NA, sdS = NA, ncov=0))
      } else {
        if(!missing(weights))
          bX <- bX * matrix(rep(sign(weights)*sqrt(abs(weights)),G), n*G, p*G, byrow = TRUE)
  
        XX <- crossprod(t(bX))
        if (dir) 
          for (i in 1:G) { 
            rs <- rowSums(bX[,p*(i-1)+1:p])
            XX <- XX + dir * outer(rs, rs)
          }
        dXX <- XX
        dXX[(row(XX) - col(XX)) %% n != 0] <- 0
                     
        S <- sum(bY * (XX %*% bY)) / sum(bY * (dXX %*% bY))
  
        # get lambdas
        if (m > 0) {
          dXX <- dXX - bZ %*% (ZWZinvZW %*% dXX)
          dXX <- dXX - dXX %*% t(ZWZinvZW) %*% t(bZ)
        }
        XX <- halfW %*% XX %*% halfW
        dXX <- halfW %*% dXX %*% halfW
        if (is.nan(S))
          lams <- 0
        else
          lams <- eigen(XX - S*dXX, symmetric=TRUE, only.values=TRUE)$values
  
        # calculate the p-value
        if (calculateP)
          p.value <- .getP(lams)
        else
          p.value <- NA
  
        # mean and variance of S for z-score
        # use series approximation as given in Paolella (2003) 319:
        mu.num <- sum(diag(XX))
        mu.den <- sum(diag(dXX))
        var.num <- 2*sum(XX*XX)
        var.den <- 2*sum(dXX*dXX)
        cov.term <- 2*sum(XX*dXX)
  
        ES <- (mu.num/mu.den) * (1 - cov.term/(mu.num*mu.den) + var.den/(mu.den^2))
        VarS <- (mu.num^2/mu.den^2) * (var.num/(mu.num^2)  + var.den/(mu.den^2) - 2*cov.term/(mu.num*mu.den))
  
        # give back
        return(c(p = p.value, S = S/norm.const, ES = ES/norm.const, sdS = sqrt(VarS)/norm.const, ncov=p))
      }
    }
  } else {
  # 2: permutation version
    test <- function(subset, weights, calculateP=TRUE) {  # calculateP not used in permutation testing
      if (!missing(subset))
        bX <- bX[,selector(subset),drop=FALSE]
      p <- ncol(bX)/G
      if (p == 0) {
        return(c(p = NA, S = NA, ES = NA, sdS = NA, ncov=0))
      } else {
        if(!missing(weights))
          bX <- bX * matrix(rep(sign(weights)*sqrt(abs(weights)),G), n*G, p*G, byrow = TRUE)
          
        XX <- crossprod(t(bX))
        if (dir) 
          for (i in 1:G) { 
            rs <- rowSums(bX[,p*(i-1)+1:p])
            XX <- XX + dir * outer(rs, rs)
          }
        dXX <- XX
        dXX[(row(XX) - col(XX)) %% n != 0] <- 0
      
        S <- sum(bY * (XX %*% bY)) / sum(bY * (dXX %*% bY))
        permS <- colSums(permY * (XX %*% permY)) / colSums(permY * (dXX %*% permY))
  
        # mean and variance of S for z-score
        ES <- mean(permS)
        VarS <- var(permS)
  
        # calculate the p-value
        p.value <- mean(S <= c(Inf,permS))
  
        # give back
        return(c(p = p.value, S = S/norm.const, ES = ES/norm.const, sdS = sqrt(VarS)/norm.const, ncov=p))
      }
    }
  }

  # Get the adjusted X matrix
  getX <- function(subset, weights) {
    if (!missing(subset))
      bX <- bX[,selector(subset),drop=FALSE]
    p <- ncol(bX)/G
    if(!missing(weights))
      bX <- bX * matrix(rep(sign(weights)*sqrt(abs(weights)),G), n*G, p*G, byrow = TRUE)
    bX
  }

  # Subjectsplot function
  subjectplot <- function(subset, weights, mirror) {
    if (!missing(subset))
      bX <- bX[,selector(subset),drop=FALSE]
    p <- ncol(bX)/G
    if(!missing(weights))
      bX <- bX * matrix(rep(sign(weights)*sqrt(abs(weights)),G), n*G, p*G, byrow = TRUE)

    XX <- crossprod(t(bX))
    if (dir) 
      for (i in 1:G) { 
        rs <- rowSums(bX[,p*(i-1)+1:p])
        XX <- XX + dir * outer(rs, rs)
      }    
    dXX <- XX
    dXX[(row(XX) - col(XX)) %% n != 0] <- 0

    mY <- matrix(bY, n*G, n)
    mY[(row(mY) - col(mY)) %% n != 0] <- 0
    if (mirror) {
      R <- drop(crossprod(mY, XX %*% bY))
      ER <- drop(crossprod(mY, dXX %*% bY))
    } else 
      stop("mirror=FALSE not appropriate for the multinomial model")

    norm.const <- mean(abs(ER))

    XXmd <- XX - dXX
    if (m > 0) 
      XXmd <- XXmd - XXmd %*% t(ZWZinvZW) %*% t(bZ)
    XXmdW <- crossprod(mY, XXmd) %*% halfW 
    varR <- drop(rowSums(XXmdW * XXmdW))

    cbind(R=R/norm.const, ER=ER/norm.const, sdR=sqrt(varR)/norm.const, Y=cY)
  }

  # permutations for permutations histogram
  permutations <- function(subset, weights) {
      if (!perms)
        stop("histogram only for permutation version of the test")

      if (!missing(subset))
        bX <- bX[,selector(subset),drop=FALSE]
      p <- ncol(bX)/G
      if(!missing(weights))
        bX <- bX * matrix(rep(sign(weights)*sqrt(abs(weights)),G), n*G, p*G, byrow = TRUE)
        
      XX <- crossprod(t(bX))
      if (dir) 
        for (i in 1:G) { 
          rs <- rowSums(bX[,p*(i-1)+1:p])
          XX <- XX + dir * outer(rs, rs)
        }
      dXX <- XX
      dXX[(row(XX) - col(XX)) %% n != 0] <- 0
    
      S <- sum(bY * (XX %*% bY)) / sum(bY * (dXX %*% bY))
      permS <- colSums(permY * (XX %*% permY)) / colSums(permY * (dXX %*% permY))

      return(list(S = S/norm.const, permS = permS/norm.const))
  }

  df <- function() {
    c(n,m,ncol(X))
  }

  nperms <- function() {
    if (perms)
      c(n = ncol(permY), random = random)
    else
      c(0, FALSE)
  }

  cov.names <- function(subset)
    if (missing(subset))
      colnames(X)
    else
      colnames(X[,subset,drop=FALSE])

  dist.cor <- function(subset, weights, transpose=FALSE) {
    if (!missing(subset))
      X <- X[,subset,drop=FALSE]
    p <- ncol(X)
    if(!missing(weights))
      X <- X * matrix(sign(weights)*sqrt(abs(weights)), n, p, byrow = TRUE)
    if (transpose)
      uit <- matrix(0,n,n)
    else
      uit <- matrix(0,p,p)
    for (i in 1:G) {
      rngN <- 1:n + (i-1)*n
      if (m > 0) {
        ZW <- Z * matrix(diag(W[rngN,rngN]), nrow(Z), ncol(Z))
        adjX <- X - Z %*% solve(crossprod(ZW, Z), crossprod(ZW, X))
      } else
        adjX <- X
      if (transpose)
        adjX <- t(adjX)
      sds <- sqrt(colSums(adjX*adjX))
      sds[sds < max(sds)*1e-14] <- Inf
      uit <- uit + crossprod(adjX) / outer(sds,sds)
    }
    uit/G
  }

  
  positive <- function(subset) {
    if (!missing(subset))
      X <- X[,subset,drop=FALSE] 
    drop(apply(cor(X, null.fit@y - fitted), 1, which.max))
  }  
          
  getY <- function() bY

  out <- sys.frame(sys.nframe())
  
  return(out)
}
Any scripts or data that you put into this service are public.
globaltest documentation built on Nov. 8, 2020, 8:18 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
globaltest
Testing Groups of Covariates/Features for Association with a Response Variable, with Applications to Gene Set Testing

R/models.R
In globaltest: Testing Groups of Covariates/Features for Association with a Response Variable, with Applications to Gene Set Testing

Defines functions .multinomialtest .glmtest .lineartest

Try the globaltest package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

globaltest Testing Groups of Covariates/Features for Association with a Response Variable, with Applications to Gene Set Testing

R/models.R In globaltest: Testing Groups of Covariates/Features for Association with a Response Variable, with Applications to Gene Set Testing

Defines functions .multinomialtest .glmtest .lineartest

Try the globaltest package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

globaltest
Testing Groups of Covariates/Features for Association with a Response Variable, with Applications to Gene Set Testing

R/models.R
In globaltest: Testing Groups of Covariates/Features for Association with a Response Variable, with Applications to Gene Set Testing