knitr::opts_chunk$set(echo = TRUE)
s_ex11_p01_data_path <- "https://charlotte-ngs.github.io/asmss2022/data/asm_geno_sim_data.csv" if (!params$isonline) s_ex11_p01_data_path <- file.path(here::here(), "docs", "data", "asm_geno_sim_data.csv") sigma_q2 <- 3 sigma_e2 <- 36
Predict genomic breeding values using a marker effects model. The dataset is available from
cat(s_ex11_p01_data_path, "\n")
r sigma_q2
$.r sigma_e2
$tbl_ex11_p01 <- readr::read_csv(s_ex11_p01_data_path) tbl_ex11_p01
$$y = Xb + Wq + e$$ where $y$ is the vector of observations, $b$ is the vector of fixed effects and $q$ is the vector of random marker effects for each SNP. The matrices $X$ and $W$ are design matrices. The matrix $W$ is special because it contains the genotype encodings.
From that model the mixed model equations can be specified as
$$ \left[ \begin{array}{cc} X^TX & X^TW \ W^TX & W^TW + \lambda_q * I \end{array} \right] \left[ \begin{array}{c} \hat{b} \ \hat{q} \end{array} \right] = \left[ \begin{array}{c} X^Ty \ W^Ty \end{array} \right] $$ with $\lambda_q = \sigma_e^2 / \sigma_q^2$.
The matrix $X$
mat_X <- model.matrix(lm(P ~ 0 + SEX, data = tbl_ex11_p01)) attr(mat_X, "assign") <- NULL attr(mat_X, "contrasts") <- NULL mat_X
The matrix $W$
library(dplyr) tbl_geno_ex11_p01 <- tbl_ex11_p01 %>% select(SNP1:SNP100) mat_W <- as.matrix(tbl_geno_ex11_p01) mat_W[,1:10]
The vector $y$
vec_y <- tbl_ex11_p01$P vec_y
The mixed model equations
# coefficient matrix mat_xtx <- crossprod(mat_X) mat_xtw <- crossprod(mat_X, mat_W) mat_wtx <- t(mat_xtw) lambda_q <- sigma_e2 / sigma_q2 mat_ztz_lambda_I <- crossprod(mat_W) + lambda_q * diag(1, nrow = ncol(mat_W)) mat_coef <- rbind(cbind(mat_xtx, mat_xtw), cbind(mat_wtx, mat_ztz_lambda_I)) # right hand side mat_xty <- crossprod(mat_X, vec_y) mat_wty <- crossprod(mat_W, vec_y) mat_rhs <- rbind(mat_xty, mat_wty) # solution mat_sol <- solve(mat_coef, mat_rhs) # partition solutions vec_sol_fix <- mat_sol[1:2,] vec_sol_marker <- mat_sol[3:nrow(mat_sol),]
The solution for the estimates of the fixed effects are:
vec_sol_fix
The solutions for the first few marker effects are
vec_sol_marker[1:10]
mat_mem_gbv <- crossprod(t(mat_W), vec_sol_marker) mat_mem_gbv
s_ex11_p02_data_path <- "https://charlotte-ngs.github.io/asmss2022/data/asm_geno_sim_data.csv" if (!params$isonline) s_ex11_p02_data_path <- file.path(here::here(), "docs", "data", "asm_geno_sim_data.csv") sigma_q2 <- 3 sigma_g2 <- 3*sigma_q2 sigma_e2 <- 36
Use the same dataset as in Problem 1 to predict genomic breeding values based on a breeding-value model. The dataset is available from
cat(s_ex11_p02_data_path, "\n")
r sigma_g2
$.r sigma_e2
$computeMatGrm <- function(pmatData) { matData <- pmatData # check the coding, if matData is -1, 0, 1 coded, then add 1 to get to 0, 1, 2 coding if (min(matData) < 0) matData <- matData + 1 # Allele frequencies, column vector of P and sum of frequency products freq <- apply(matData, 2, mean) / 2 P <- 2 * (freq - 0.5) sumpq <- sum(freq*(1-freq)) # Changing the coding from (0,1,2) to (-1,0,1) and subtract matrix P Z <- matData - 1 - matrix(P, nrow = nrow(matData), ncol = ncol(matData), byrow = TRUE) # Z%*%Zt is replaced by tcrossprod(Z) return(tcrossprod(Z)/(2*sumpq)) }
tbl_ex11_p02 <- readr::read_csv(s_ex11_p02_data_path) tbl_ex11_p02
The matrix $W$
library(dplyr) tbl_geno_ex11_p02 <- tbl_ex11_p02 %>% select(SNP1:SNP100) mat_W <- as.matrix(tbl_geno_ex11_p01)
The genomic relationship matrix $G$
mat_G <- computeMatGrm(pmatData = mat_W) mat_G
We have to check whether $G$ can be inverted. This is done by computing the rank of the matrix
Matrix::rankMatrix(mat_G)
This shows that matrix $G$ does not have full column rank. Hence we add $0.05 * I$ to get to matrix $G^*$.
mat_G_star <- mat_G + 0.05 * diag(1, nrow = nrow(mat_G)) Matrix::rankMatrix(mat_G_star)
Matrix $G^*$ can be used as genomic relationship matrix.
$$ y = Xb + Zg + e$$ where $y$ is the vector of observations, $b$ is the vector of fixed effects and $g$ is the vector of random genomic breeding values. The matrices $X$ and $Z$ are design matrices.
The mixed model equations are
$$ \left[ \begin{array}{cc} X^TX & X^TZ \ Z^TX & Z^TZ + \lambda_g * (G^*)^{-1} \end{array} \right] \left[ \begin{array}{c} \hat{b} \ \hat{g} \end{array} \right] = \left[ \begin{array}{c} X^Ty \ Z^Ty \end{array} \right] $$
with $\lambda_g = \sigma_e^2 / \sigma_g^2$.
The matrix $X$
mat_X <- model.matrix(lm(P ~ 0 + SEX, data = tbl_ex11_p02)) attr(mat_X, "assign") <- NULL attr(mat_X, "contrasts") <- NULL colnames(mat_X) <- NULL mat_X
The matrix $Z$
# model matrix from data mat_Z <- model.matrix(lm(P ~ 0 + as.factor(ID), data = tbl_ex11_p02)) attr(mat_Z, "assign") <- NULL attr(mat_Z, "contrasts") <- NULL colnames(mat_Z) <- NULL mat_Z
The vector $y$
vec_y <- tbl_ex11_p02$P vec_y
The mixed model equations are
# coefficient matrix mat_xtx <- crossprod(mat_X) mat_xtz <- crossprod(mat_X, mat_Z) mat_ztx <- t(mat_xtz) lambda_g <- sigma_e2 / sigma_g2 mat_ztz_g_inv_lambda <- crossprod(mat_Z) + lambda_g * mat_G_star mat_coef <- rbind(cbind(mat_xtx, mat_xtz), cbind(mat_ztx, mat_ztz_g_inv_lambda)) # right hand side mat_xty <- crossprod(mat_X, vec_y) mat_zty <- crossprod(mat_Z, vec_y) mat_rhs <- rbind(mat_xty, mat_zty) # solution mat_sol <- solve(mat_coef, mat_rhs) # partition the solution vec_sol_fix <- mat_sol[1:2,] vec_sol_gbv <- mat_sol[3:nrow(mat_sol),]
The solution for the estimated fixed effects are
vec_sol_fix
The predicted genomic breeding values are
vec_sol_gbv
Comparing order of animals according to predicted genomic breeding values from Problem 1 and Problem 2:
order(mat_mem_gbv[,1], decreasing = TRUE)
order(vec_sol_gbv, decreasing = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.