EM3.cov | R Documentation |
This function solves the following multi-kernel linear mixed effects model with covairance structure.
y = X \beta + \sum _{l=1} ^ {L} Z _ {l} u _ {l} + \epsilon
where Var[y] = \sum _{i=1} ^ {L} Z _ {i} K _ {i} Z _ {i}' \sigma _ {i} ^ 2 + \sum _{i=1} ^ {L-1} \sum _{j=1} ^ {L} (Z _ {i} K _ {i j} Z _ {j}' + Z _ {j} K _ {j i} Z _ {i}') \sigma _ {i} \sigma _ {j} \rho _{i j} + I \sigma _ {e} ^ {2}
.
Here, K _ {i j}
and K _ {j i}
are m_i \times m_j
and m_j \times m_i
matrices representing covariance structure between two random effects.
\rho _{i j}
is a correlation parameter to be estimated in addition to \sigma^2_i
and \sigma_{e}^2
.
EM3.cov(
y,
X0 = NULL,
ZETA,
covList,
eigen.G = NULL,
eigen.SGS = NULL,
tol = NULL,
n.core = NA,
optimizer = "optim",
traceInside = 0,
nIterOptimization = NULL,
n.thres = 450,
REML = TRUE,
pred = TRUE,
return.u.always = TRUE,
return.u.each = TRUE,
return.Hinv = TRUE
)
y |
A |
X0 |
A |
ZETA |
A list of variance matrices and its design matrices of random effects. You can use more than one kernel matrix. For example, ZETA = list(A = list(Z = Z.A, K = K.A), D = list(Z = Z.D, K = K.D)) (A for additive, D for dominance) Please set names of lists "Z" and "K"! |
covList |
A list of matrices representing covariance structure between paired random effects.
If there are |
eigen.G |
A list with
The result of the eigen decompsition of |
eigen.SGS |
A list with
The result of the eigen decompsition of |
tol |
The tolerance for detecting linear dependencies in the columns of G = ZKZ'. Eigen vectors whose eigen values are less than "tol" argument will be omitted from results. If tol is NULL, top 'n' eigen values will be effective. |
n.core |
Setting n.core > 1 will enable parallel execution on a machine with multiple cores. |
optimizer |
The function used in the optimization process. We offer "optim", "optimx", and "nlminb" functions. |
traceInside |
Perform trace for the optimzation if traceInside >= 1, and this argument shows the frequency of reports. |
nIterOptimization |
Maximum number of iterations allowed. Defaults are different depending on 'optimizer'. |
n.thres |
If |
REML |
You can choose which method you will use, "REML" or "ML". If REML = TRUE, you will perform "REML", and if REML = FALSE, you will perform "ML". |
pred |
If TRUE, the fitting values of y is returned. |
return.u.always |
If TRUE, BLUP ('u'; |
return.u.each |
If TRUE, the function also computes each BLUP corresponding to different kernels (when solving multi-kernel mixed-effects model). It takes additional time compared to the one with 'return.u.each = FALSE'. |
return.Hinv |
If TRUE, |
The fitting values of y y = X\beta + Zu
Estimator for \sigma^2_u
, all of the genetic variance
Estimator for \sigma^2_e
BLUE(\beta
)
BLUP(Sum of Zu
)
BLUP(Each u
)
The proportion of each genetic variance (corresponding to each kernel of ZETA) to Vu
The estimator for a matrix of correlation parameters \rho
. Diagonal elements are always 0.
Maximized log-likelihood (full or restricted, depending on method)
The inverse of V = Vu \times ZKZ' + Ve \times I
The inverse of H = ZKZ' + \lambda I
Kang, H.M. et al. (2008) Efficient Control of Population Structure in Model Organism Association Mapping. Genetics. 178(3): 1709-1723.
Zhou, X. and Stephens, M. (2012) Genome-wide efficient mixed-model analysis for association studies. Nat Genet. 44(7): 821-824.
### Import RAINBOWR
require(RAINBOWR)
### Load example datasets
data("Rice_Zhao_etal")
Rice_geno_score <- Rice_Zhao_etal$genoScore
Rice_geno_map <- Rice_Zhao_etal$genoMap
Rice_pheno <- Rice_Zhao_etal$pheno
### View each dataset
See(Rice_geno_score)
See(Rice_geno_map)
See(Rice_pheno)
### Select one trait for example
trait.name <- "Flowering.time.at.Arkansas"
y <- as.matrix(Rice_pheno[, trait.name, drop = FALSE])
### Remove SNPs whose MAF <= 0.05
x.0 <- t(Rice_geno_score)
MAF.cut.res <- MAF.cut(x.0 = x.0, map.0 = Rice_geno_map)
x <- MAF.cut.res$x
map <- MAF.cut.res$map
### Assume adjacent individuals are regarded as "neighbors"
xAdj <- array(data = NA, dim = dim(x), dimnames = dimnames(x))
for (i in 1:nrow(x)) {
adjs <- (i - 1):(i + 1)
adjs <- adjs[adjs %in% 1:nrow(x)]
adjs <- adjs[adjs != i]
nAdjs <- length(adjs)
xAdj[i, ] <- x[i, , drop = FALSE] *
apply(X = x[adjs, , drop = FALSE],
MARGIN = 2, FUN = mean)
}
### Estimate additive genomic relationship matrix (GRM)
K.A <- tcrossprod(x) / ncol(x)
K.Adj <- tcrossprod(xAdj) / ncol(xAdj) # for neighbor kernel
### Modify data
Z <- design.Z(pheno.labels = rownames(y),
geno.names = rownames(K.A)) ### design matrix for random effects
pheno.mat <- y[rownames(Z), , drop = FALSE]
ZETA <- list(A = list(Z = Z, K = K.A),
Adj = list(Z = Z, K = K.Adj))
### Prepare for covairance structure between two random effects
K12 <- tcrossprod(x, xAdj) / sqrt(ncol(x) * ncol(xAdj))
K21 <- tcrossprod(xAdj, x) / sqrt(ncol(x) * ncol(xAdj))
covList <- rep(list(rep(list(NULL), 2)), 2)
covList[[1]][[2]] <- K12
covList[[2]][[1]] <- K21
### Solve multi-kernel linear mixed effects model (2 random efects)
### conidering covariance structure
EM3cov.res <- EM3.cov(y = pheno.mat,
X0 = NULL,
ZETA = ZETA,
covList = covList)
(Vu <- EM3cov.res$Vu) ### estimated genetic variance
(Ve <- EM3cov.res$Ve) ### estimated residual variance
(weights <- EM3cov.res$weights) ### estimated proportion of two genetic variances
(herit <- Vu * weights / (Vu + Ve)) ### genomic heritability (additive, neighbor)
(rho <- EM3cov.res$rhosMat[2, 1]) ### correlation parameter
(beta <- EM3cov.res$beta) ### Here, this is an intercept.
u.each <- EM3cov.res$u.each ### estimated genotypic values (additive, neighbor)
See(u.each)
### Perform genomic prediction with 10-fold cross validation (multi-kernel)
noNA <- !is.na(c(pheno.mat)) ### NA (missing) in the phenotype data
phenoNoNA <- pheno.mat[noNA, , drop = FALSE] ### remove NA
ZETANoNA <- ZETA
ZETANoNA <- lapply(X = ZETANoNA, FUN = function (List) {
List$Z <- List$Z[noNA, ]
return(List)
}) ### remove NA
nFold <- 10 ### # of folds
nLine <- nrow(phenoNoNA)
idCV <- sample(1:nLine %% nFold) ### assign random ids for cross-validation
idCV[idCV == 0] <- nFold
yPred <- yPredCov <- rep(NA, nLine)
for (noCV in 1:nFold) {
print(paste0("Fold: ", noCV))
yTrain <- phenoNoNA
yTrain[idCV == noCV, ] <- NA ### prepare test data
EM3.resCV <- EM3.cpp(y = yTrain, X0 = NULL,
ZETA = ZETANoNA) ### prediction
EM3cov.resCV <- EM3.cov(y = yTrain, X0 = NULL,
ZETA = ZETANoNA,
covList = covList) ### prediction
yTest <- EM3.resCV$y.pred ### predicted values
yTestCov <- EM3cov.resCV$y.pred
yPred[idCV == noCV] <- yTest[idCV == noCV]
yPredCov[idCV == noCV] <- yTestCov[idCV == noCV]
}
### Plot the results
plotRange <- range(phenoNoNA, yPred)
plot(x = phenoNoNA, y = yPred,
xlim = plotRange, ylim = plotRange,
xlab = "Observed values", ylab = "Predicted values (EM3.cpp)",
main = "Results of Genomic Prediction (multi-kernel)",
cex.lab = 1.5, cex.main = 1.5, cex.axis = 1.3)
abline(a = 0, b = 1, col = 2, lwd = 2, lty = 2)
R2 <- cor(x = phenoNoNA[, 1], y = yPred) ^ 2
text(x = plotRange[2] - 10,
y = plotRange[1] + 10,
paste0("R2 = ", round(R2, 3)),
cex = 1.5)
plotRange <- range(phenoNoNA, yPred)
plot(x = phenoNoNA, y = yPredCov,
xlim = plotRange, ylim = plotRange,
xlab = "Observed values", ylab = "Predicted values (EM3.cov)",
main = "Results of Genomic Prediction (multi-kernel with covariance)",
cex.lab = 1.5, cex.main = 1.5, cex.axis = 1.3)
abline(a = 0, b = 1, col = 2, lwd = 2, lty = 2)
R2 <- cor(x = phenoNoNA[, 1], y = yPredCov) ^ 2
text(x = plotRange[2] - 10,
y = plotRange[1] + 10,
paste0("R2 = ", round(R2, 3)),
cex = 1.5)
plotRange <- range(yPred, yPredCov)
plot(x = yPred, y = yPredCov,
xlim = plotRange, ylim = plotRange,
xlab = "Predicted values (EM3.cpp)", ylab = "Predicted values (EM3.cov)",
main = "Comparison of Multi-Kernel Genomic Prediction",
cex.lab = 1.5, cex.main = 1.5, cex.axis = 1.3)
abline(a = 0, b = 1, col = 2, lwd = 2, lty = 2)
R2 <- cor(x = yPred, y = yPredCov) ^ 2
text(x = plotRange[2] - 10,
y = plotRange[1] + 10,
paste0("R2 = ", round(R2, 3)),
cex = 1.5)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.