knitr::opts_chunk$set(echo = TRUE)

Problem 1: Marker Effects Model

s_ex11_p01_data_path <- "https://charlotte-ngs.github.io/asmss2022/data/asm_geno_sim_data.csv"
if (!params$isonline)
  s_ex11_p01_data_path <- file.path(here::here(), "docs", "data", "asm_geno_sim_data.csv")

sigma_q2 <- 3
sigma_e2 <- 36

Predict genomic breeding values using a marker effects model. The dataset is available from

cat(s_ex11_p01_data_path, "\n")

Hints

Solution

tbl_ex11_p01 <- readr::read_csv(s_ex11_p01_data_path)
tbl_ex11_p01

$$y = Xb + Wq + e$$ where $y$ is the vector of observations, $b$ is the vector of fixed effects and $q$ is the vector of random marker effects for each SNP. The matrices $X$ and $W$ are design matrices. The matrix $W$ is special because it contains the genotype encodings.

From that model the mixed model equations can be specified as

$$ \left[ \begin{array}{cc} X^TX & X^TW \ W^TX & W^TW + \lambda_q * I \end{array} \right] \left[ \begin{array}{c} \hat{b} \ \hat{q} \end{array} \right] = \left[ \begin{array}{c} X^Ty \ W^Ty \end{array} \right] $$ with $\lambda_q = \sigma_e^2 / \sigma_q^2$.

The matrix $X$

mat_X <- model.matrix(lm(P ~ 0 + SEX, data = tbl_ex11_p01))
attr(mat_X, "assign") <- NULL
attr(mat_X, "contrasts") <- NULL
mat_X

The matrix $W$

library(dplyr)
tbl_geno_ex11_p01 <- tbl_ex11_p01 %>%
  select(SNP1:SNP100)
mat_W <- as.matrix(tbl_geno_ex11_p01)
mat_W[,1:10]

The vector $y$

vec_y <- tbl_ex11_p01$P
vec_y

The mixed model equations

# coefficient matrix
mat_xtx <- crossprod(mat_X)
mat_xtw <- crossprod(mat_X, mat_W)
mat_wtx <- t(mat_xtw)
lambda_q <- sigma_e2 / sigma_q2
mat_ztz_lambda_I <- crossprod(mat_W) + lambda_q * diag(1, nrow = ncol(mat_W))
mat_coef <- rbind(cbind(mat_xtx, mat_xtw), 
                  cbind(mat_wtx, mat_ztz_lambda_I))
# right hand side
mat_xty <- crossprod(mat_X, vec_y)
mat_wty <- crossprod(mat_W, vec_y)
mat_rhs <- rbind(mat_xty, mat_wty)
# solution
mat_sol <- solve(mat_coef, mat_rhs)
# partition solutions
vec_sol_fix <- mat_sol[1:2,]
vec_sol_marker <- mat_sol[3:nrow(mat_sol),]

The solution for the estimates of the fixed effects are:

vec_sol_fix

The solutions for the first few marker effects are

vec_sol_marker[1:10]
mat_mem_gbv <- crossprod(t(mat_W), vec_sol_marker)
mat_mem_gbv

Problem 2: Breeding Value Based Model

s_ex11_p02_data_path <- "https://charlotte-ngs.github.io/asmss2022/data/asm_geno_sim_data.csv"
if (!params$isonline)
  s_ex11_p02_data_path <- file.path(here::here(), "docs", "data", "asm_geno_sim_data.csv")

sigma_q2 <- 3
sigma_g2 <- 3*sigma_q2
sigma_e2 <- 36

Use the same dataset as in Problem 1 to predict genomic breeding values based on a breeding-value model. The dataset is available from

cat(s_ex11_p02_data_path, "\n")

Hints

computeMatGrm <- function(pmatData) {
  matData <- pmatData
  # check the coding, if matData is -1, 0, 1 coded, then add 1 to get to 0, 1, 2 coding
  if (min(matData) < 0) matData <- matData + 1
  # Allele frequencies, column vector of P and sum of frequency products
  freq <- apply(matData, 2, mean) / 2
  P <- 2 * (freq - 0.5)
  sumpq <- sum(freq*(1-freq))
  # Changing the coding from (0,1,2) to (-1,0,1) and subtract matrix P
  Z <- matData - 1 - matrix(P, nrow = nrow(matData), 
                             ncol = ncol(matData), 
                             byrow = TRUE)
  # Z%*%Zt is replaced by tcrossprod(Z)
  return(tcrossprod(Z)/(2*sumpq))
}

Solution

tbl_ex11_p02 <- readr::read_csv(s_ex11_p02_data_path)
tbl_ex11_p02

The matrix $W$

library(dplyr)
tbl_geno_ex11_p02 <- tbl_ex11_p02 %>%
  select(SNP1:SNP100)
mat_W <- as.matrix(tbl_geno_ex11_p01)

The genomic relationship matrix $G$

mat_G <- computeMatGrm(pmatData = mat_W)
mat_G

We have to check whether $G$ can be inverted. This is done by computing the rank of the matrix

Matrix::rankMatrix(mat_G)

This shows that matrix $G$ does not have full column rank. Hence we add $0.05 * I$ to get to matrix $G^*$.

mat_G_star <- mat_G + 0.05 * diag(1, nrow = nrow(mat_G))
Matrix::rankMatrix(mat_G_star)

Matrix $G^*$ can be used as genomic relationship matrix.

$$ y = Xb + Zg + e$$ where $y$ is the vector of observations, $b$ is the vector of fixed effects and $g$ is the vector of random genomic breeding values. The matrices $X$ and $Z$ are design matrices.

The mixed model equations are

$$ \left[ \begin{array}{cc} X^TX & X^TZ \ Z^TX & Z^TZ + \lambda_g * (G^*)^{-1} \end{array} \right] \left[ \begin{array}{c} \hat{b} \ \hat{g} \end{array} \right] = \left[ \begin{array}{c} X^Ty \ Z^Ty \end{array} \right] $$

with $\lambda_g = \sigma_e^2 / \sigma_g^2$.

The matrix $X$

mat_X <- model.matrix(lm(P ~ 0 + SEX, data = tbl_ex11_p02))
attr(mat_X, "assign") <- NULL
attr(mat_X, "contrasts") <- NULL
colnames(mat_X) <- NULL
mat_X

The matrix $Z$

# model matrix from data
mat_Z <- model.matrix(lm(P ~ 0 + as.factor(ID), data = tbl_ex11_p02))
attr(mat_Z, "assign") <- NULL
attr(mat_Z, "contrasts") <- NULL
colnames(mat_Z) <- NULL
mat_Z

The vector $y$

vec_y <- tbl_ex11_p02$P
vec_y

The mixed model equations are

# coefficient matrix
mat_xtx <- crossprod(mat_X)
mat_xtz <- crossprod(mat_X, mat_Z)
mat_ztx <- t(mat_xtz)
lambda_g <- sigma_e2 / sigma_g2
mat_ztz_g_inv_lambda <- crossprod(mat_Z) + lambda_g * mat_G_star
mat_coef <- rbind(cbind(mat_xtx, mat_xtz), cbind(mat_ztx, mat_ztz_g_inv_lambda))
# right hand side
mat_xty <- crossprod(mat_X, vec_y)
mat_zty <- crossprod(mat_Z, vec_y)
mat_rhs <- rbind(mat_xty, mat_zty)
# solution
mat_sol <- solve(mat_coef, mat_rhs)
# partition the solution
vec_sol_fix <- mat_sol[1:2,]
vec_sol_gbv <- mat_sol[3:nrow(mat_sol),]

The solution for the estimated fixed effects are

vec_sol_fix

The predicted genomic breeding values are

vec_sol_gbv

Comparing order of animals according to predicted genomic breeding values from Problem 1 and Problem 2:

order(mat_mem_gbv[,1], decreasing = TRUE)
order(vec_sol_gbv, decreasing = TRUE)


charlotte-ngs/asmss2022 documentation built on June 7, 2022, 1:33 p.m.