R/mvnpermute.r

#' Permuted Y for (QTLRel) Calculation of Permutation Threshold
#'  
#' R function "mvnpermute" for executing a permutation-based test with
#' multivariate normally distributed data. 
#' 
#' @details This function takes multivariate normal data with known covariates
#' and covariance matrix and generates "permutations" of this data that
#' maintain the mean and covariance of the original data. The
#' permutations are generated by finding the residuals of the data and
#' mapping the residuals to an orthonormal space, then simulating
#' random permutations within this orthonormal space. The permutations
#' are then mapped back to the original space, and the expected values
#' (given the covariates, and their estimated effect sizes) are added
#' back in. The resulting "permuted" data can now be used exactly like
#' the original data; for example, you could estimate the null
#' distribution of some test statistic while observing the (known)
#' covariance of the data.
#'  
#' @author Mark Abney
#'  
#' @param y vector of length n containing the observed trait data
#' @param X n x m matrix or data frame, in which each row corresponds to a
#'        sample and each column corresponds to a covariate. If the
#'        model includes an intercept term, this should be included as
#'        one of the covariates of this matrix, as in
#'
#'         \code{X <- cbind(rep(n,1),X)}
#' @param S known or estimated covariance matrix of the data.
#' @param nr number of replicates to generate.
#'
#' @references Abney M, Ober C, McPeek MS (2002). "Quantitative trait homozygosity
#' and association mapping and empirical genome-wide significance in
#' large complex pedigrees: Fasting serum insulin levels in the
#' Hutterites." American Journal of Human Genetics 70: 920-934.
#'
#' @return The function returns a matrix with n rows and nr columns, in
#' which each column is a "permuted" version of y.
#' 
#' @keywords manip


mvnpermute <- function (y, X, S, nr) {

  # Get the number of samples.
  n <- length(y)

  # Make sure X is a matrix.
  X <- as.matrix(X)

  # Compute the lower triangular ("Choleksy") factorization of the
  # covariance matrix, so that L*L' = S, then solve for z and W in
  # linear systems of equations A*z = y and A*W = X to obtain
  # exchangeable elements of the trait and covariate samples.
  L <- t(chol(S))    
  z <- solve(L,y)
  W <- solve(L,X)

  # Fit the linear model and estimate the regression coefficients.
  # Note that the -1 in the formula indicates that the implicit
  # intercept should not be included in the linear model. Then get the
  # components of the linear model y <- U + E.
  model <- lm(z ~ W - 1)
  beta  <- coef(model)
  u     <- drop(X %*% beta)  
  e     <- resid(model)

  # Get the covariance matrix of the residuals, then the spectral
  # decomposition of this covariance matrix. Take only the
  # eigenvectors associated with eigenvalue of 1.  
  sigma.mat <- diag(n) - W %*% solve(t(W) %*% W,t(W))
  sigma.eig <- eigen(sigma.mat)
  V1        <- sigma.eig$vectors[ ,sigma.eig$values > 0.9]

  # The vector g now has exchangeable elements, and has covariance
  # matrix V1'*S*V1.
  g <- t(V1) %*% e
  C <- L %*% V1

  # Initialize storage for the output.
  perms           <- matrix(nrow = n,ncol = nr)
  rownames(perms) <- rownames(y)
  colnames(perms) <- 1:nr

  # Repeat for each replicate to generate.
  for (i in 1:nr) {

    # Shuffle the entries of the "exchangeable" observations, g.
    gp <- sample(g)

    # Transform the exchangeable random sample back to the original
    # space, and store the result.
    perms[,i] <- u + drop(C %*% gp)
  }

  # Output the result.
  return(perms)
}
simecek/HPQTL documentation built on May 29, 2019, 10 p.m.