Nothing
## R function "mvnpermute" for executing a permutation-based test with
## multivariate normally distributed data.
##
## copyright Mark Abney, 2013
## With contributions from Peter Carbonetto
## Licensed under GPL3
##
## For more information, see:
## Abney M (2015) "Permutation testing in the presence of polygenic
## variation." Genetic Epidemiology, in press.
## and
## Abney M, Ober C, McPeek MS (2002). "Quantitative trait homozygosity
## and association mapping and empirical genome-wide significance in
## large complex pedigrees: Fasting serum insulin levels in the
## Hutterites." American Journal of Human Genetics 70: 920-934.
##
## INPUTS:
##
## y Vector of length n containing the observed trait data.
## X n x m matrix or data frame, in which each row corresponds to a
## sample and each column corresponds to a covariate. If the
## model includes an intercept term, this should be included as
## one of the covariates of this matrix, as in
##
## X <- cbind(rep(n,1),X)
##
## S Known or estimated covariance matrix of the data.
## nr Number of replicates to generate.
## seed Seed for sampling the nr permutations.
##
## OUTPUT: The function returns a matrix with n rows and nr columns, in
## which each column is a "permuted" version of y.
##
#' Generate Permutation-based Multivariate Normal Data
#'
#' Simulate new multivariate normal data sets with a specified covariance
#' matrix by permutation. Permutations are done on a linear transformation
#' of residuals that is guaranteed to be exchangeable when the original data
#' are multivariate normally distributed.
#'
#' This function takes multivariate normal data with known covariates
#' and covariance matrix and generates "permutations" of this data that
#' maintain the mean and covariance of the original data. The
#' permutations are generated by finding the residuals of the data and
#' mapping the residuals to an orthonormal space, then simulating
#' random permutations within this orthonormal space. The permutations
#' are then mapped back to the original space, and the expected values
#' (given the covariates, and their estimated effect sizes) are added
#' back in. The resulting "permuted" data can now be used exactly like
#' the original data; for example, you could estimate the null
#' distribution of some test statistic while maintaining the (known)
#' covariance of the data.
#'
#' @references Abney M (2015) "Permutation testing in the presence of polygenic
#' variation." Genetic Epidemiology, in press.
#' @references Abney M, Ober C, McPeek MS (2002). "Quantitative trait homozygosity
#' and association mapping and empirical genome-wide significance in
#' large complex pedigrees: Fasting serum insulin levels in the
#' Hutterites." American Journal of Human Genetics 70: 920--934.
#' @param y Vector of multivariate normal data.
#' @param X Matrix or data frame of covariate values (including the intercept
#' term if desired). Number of rows must equal the length of \code{y}.
#' @param S Known or estimated covariance matrix of \code{y}.
#' @param nr Number of data sets to generate.
#' @param seed Optional random number seed for sampling the \code{nr} permutations.
#' @return A matrix with number of rows equal to \code{length(y)} and \code{nr} columns.
#' @export
#' @examples
#' set.seed(98765)
#' N <- 100
#' r.mat <- matrix(rexp(N^2), ncol=N)
#' cv.mat <- crossprod(r.mat) # a positive definite covariance matrix
#' x.mat <- rep(1, N)
#' r.mean <- 3
#' r1 <- rnorm(N) # independent random noise
#' r.data <- x.mat * r.mean + drop( t(chol(cv.mat)) %*% r1 ) # correlated random noise
#' r.perm <- mvnpermute( r.data, x.mat, cv.mat, nr=1000, seed=1234)
#' est.cvm <- cov(t(r.perm))
#' plot(cv.mat, est.cvm); abline(0,1)
mvnpermute <- function (y, X, S, nr, seed) {
if(!missing(seed)) set.seed(seed)
## Get the number of samples.
n <- length(y)
## Make sure X is a matrix.
X <- as.matrix(X)
if (nrow(X) != n) stop("Number of elements in y does not match number of rows in X.")
## Compute the lower triangular ("Choleksy") factorization of the
## covariance matrix, so that R' * R = S, then solve for z and W in
## linear systems of equations A*z = y and A*W = X to obtain
## exchangeable elements of the trait and covariate samples.
R <- chol(S)
z <- backsolve(R, y, transpose=TRUE)
W <- backsolve(R, X, transpose=TRUE)
## Fit the linear model and estimate the regression coefficients.
## Note that the -1 in the formula indicates that the implicit
## intercept should not be included in the linear model. Then get the
## components of the linear model y <- U + E.
model <- stats::lm(z ~ W - 1)
beta <- stats::coef(model)
u <- drop(X %*% beta)
e <- stats::resid(model)
## Get the covariance matrix of the residuals, then the spectral
## decomposition of this covariance matrix. Take only the
## eigenvectors associated with eigenvalue of 1.
sigma.mat <- diag(n) - W %*% solve(t(W) %*% W, t(W))
sigma.eig <- eigen(sigma.mat)
V1 <- sigma.eig$vectors[ , sigma.eig$values > 0.9]
## The vector g now has exchangeable elements, and has covariance
## matrix equal to the identity.
g <- crossprod(V1, e) # t(V1) %*% e
C <- crossprod(R, V1) # t(R) %*% V1
## Initialize storage for the output.
perms <- matrix(nrow = n, ncol = nr)
rownames(perms) <- rownames(y)
colnames(perms) <- 1:nr
## Repeat for each replicate to generate.
for (i in 1:nr) {
## Shuffle the entries of the exchangeable observations, g.
gp <- sample(g)
## Transform the exchangeable random sample back to the original
## space, and store the result.
perms[,i] <- u + drop(C %*% gp)
}
## Output the result.
return(perms)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.