knitr::opts_chunk$set(echo = FALSE, results = 'asis', fig.pos = 'H') knitr::knit_hooks$set(hook_convert_odg = rmdhelp::hook_convert_odg)
cnt <- rmdhelp::R6ClassCount$new() cnt$set_prefix(ps_prefix = "## Problem")
cat(cnt$out(ps_suffix = "Marker Effect Model"), "\n")
library(dplyr) ### # fix the number of animals n_nr_animal <- 10 ### # intercept n_inter_cept <- 150 ### # residual standard deviation n_res_sd <- 9.3 ### # vector of genotype value coefficients vec_geno_value_coeff <- c(-1,0,1) ### # sample genotypes of unlinked SNP randomly set.seed(5432) ### # fix allele frequency of positive allele n_prob_snps <- .45 ### # genotypic values vec_geno_val <- c(17.9, 3.3) n_nr_snp <- length(vec_geno_val) ### # put together the genotypes into a matrix mat_geno_snp <- matrix(c(sample(vec_geno_value_coeff, n_nr_animal, prob = c((1-n_prob_snps)^2, 2*(1-n_prob_snps)*n_prob_snps, n_prob_snps^2), replace = TRUE), sample(vec_geno_value_coeff, n_nr_animal, prob = c(n_prob_snps^2, 2*(1-n_prob_snps)*n_prob_snps, (1-n_prob_snps)^2), replace = TRUE)), nrow = n_nr_snp) mat_obs_y <- n_inter_cept + crossprod(mat_geno_snp, vec_geno_val) + rnorm(n = n_nr_animal, mean = 0, sd = n_res_sd) ### # combine SNP genotypes into a tibble geno_code <- tibble::tibble(`SNP A` = mat_geno_snp[1,], `SNP B` = mat_geno_snp[2,]) ### # add the data mat_obs_y_rounded <- round(mat_obs_y, digits = 0) tbl_obs <- tibble::tibble(Observation = mat_obs_y_rounded[,1]) geno_code %>% bind_cols(tbl_obs) -> tbl_all_data ### # add animal ids tbl_all_data <- bind_cols(Animal = c(1:n_nr_animal),tbl_all_data)
We are given the dataset that is shown in the table below. This dataset contains gentyping results of r n_nr_animal
for r n_nr_snp
SNP loci.
knitr::kable(tbl_all_data, booktabs = TRUE, longtable = TRUE, # caption = "Animals With Two SNP Loci A and B Affecting A Quantitative Trait", escape = FALSE)
marker effect model
. Because we have just r n_nr_snp
SNP loci, you can use a fixed effects linear model with the r n_nr_snp
loci as fixed effects. Furthermore you can also include a fixed intercept into the model.lm()
to get the solutions for estimates of the unknown SNP effects.The fixed effects model to estimate the marker effects can be written as
$$y = X\beta + e$$ where $y$ is the vector of observations, $\beta$ is the vector of fixed effects and $e$ is the vector of residuals. Inserting the data from the dataset into the model components leads to
vec_beta <- c("\\beta_0", "\\beta_{A}", "\\beta_{B}") vec_res <- rmdhelp::vecGetVecElem(psBaseElement = "e", pnVecLen = n_nr_animal, psResult = 'latex') cat("$$\n") cat(paste(rmdhelp::bcolumn_vector(pvec = tbl_all_data$Observation, ps_name = 'y'), collapse = '\n'), '\n') cat(paste(rmdhelp::bcolumn_vector(pvec = vec_beta, ps_name = '\\beta'), collapse = '\n'), '\n') cat(paste(rmdhelp::bcolumn_vector(pvec = vec_res, ps_name = 'e'), collapse = '\n'), '\n') cat("$$\n")
where $\beta_0$ is the intercept and $\beta_A$ and $\beta_B$ correspond to the marker effects (a-values) for both SNPs A and B.
The design matrix $X$ is taken from the dataset as
cat(paste(rmdhelp::bmatrix(pmat = t(mat_geno_snp), ps_name = 'X', ps_env = '$$'), collapse = '\n'), '\n')
The solution for the intercept and the marker effects are obtained with
lm_snp_eff <- lm(tbl_all_data$Observation ~ tbl_all_data$`SNP A` + tbl_all_data$`SNP B`, data = tbl_all_data) summary(lm_snp_eff)
\pagebreak
cat(cnt$out(ps_suffix = "Breeding Value Model"), "\n")
Use the same data as in Problem 1 to estimate genomic breeding values using a breeding value model
.
#' Compute genomic relationship matrix based on data matrix computeMatGrm <- function(pmatData) { matData <- pmatData # check the coding, if matData is -1, 0, 1 coded, then add 1 to get to 0, 1, 2 coding if (min(matData) < 0) matData <- matData + 1 # Allele frequencies, column vector of P and sum of frequency products freq <- apply(matData, 2, mean) / 2 P <- 2 * (freq - 0.5) sumpq <- sum(freq*(1-freq)) # Changing the coding from (0,1,2) to (-1,0,1) and subtract matrix P Z <- matData - 1 - matrix(P, nrow = nrow(matData), ncol = ncol(matData), byrow = TRUE) # Z%*%Zt is replaced by tcrossprod(Z) return(tcrossprod(Z)/(2*sumpq)) } matG <-computeMatGrm(pmatData = t(mat_geno_snp)) matG_star <- rvcetools::make_pd_rat_ev(matG, pn_max_ratio = 100)
cat(paste(rmdhelp::bmatrix(pmat = round(matG_star, digits = 3), ps_name = 'G', ps_env = '$$'), collapse = '\n'), '\n')
The breeding value model is a linear mixed effects model which can be written as
$$y = X \beta + W u + e$$ where
Inserting the information from the dataset into the model leads to
vec_beta <- c("\\mu") vec_u <- rmdhelp::vecGetVecElem(psBaseElement = "u", pnVecLen = n_nr_animal, psResult = 'latex') vec_res <- rmdhelp::vecGetVecElem(psBaseElement = "e", pnVecLen = n_nr_animal, psResult = 'latex') cat("$$\n") cat(paste(rmdhelp::bcolumn_vector(pvec = tbl_all_data$Observation, ps_name = 'y'), collapse = '\n'), '\n') cat(paste(rmdhelp::bcolumn_vector(pvec = vec_beta, ps_name = '\\beta'), collapse = '\n'), '\n') cat(paste(rmdhelp::bcolumn_vector(pvec = vec_u, ps_name = 'u'), collapse = '\n'), '\n') cat(paste(rmdhelp::bcolumn_vector(pvec = vec_res, ps_name = 'e'), collapse = '\n'), '\n') cat("$$\n")
The design matrices $X$ and $W$ correspond to
mat_x_bv <- matrix(1, nrow = n_nr_animal, ncol = 1) mat_w_bv <- diag(nrow = n_nr_animal) cat("$$\n") cat(paste(rmdhelp::bmatrix(pmat = mat_x_bv, ps_name = 'X'), collapse = '\n'), '\n') cat(paste(rmdhelp::bmatrix(pmat = mat_w_bv, ps_name = 'W'), collapse = '\n'), '\n') cat("$$\n")
The expected values of the random effects are
$$E(u) = 0$$ $$E(e) = 0$$ $$E(y) = X\beta$$
The variance-covariance matrices of the random effects are
$$var(u) = G * \sigma_u^2$$ where $G$ is the genomic relationship matrix and $\sigma_u^2$ the genetic additive variance explained by the SNPs
$$var(e) = I * \sigma_e^2 = R$$ where $I$ is the identity matrix and $\sigma_e^2$ the residual variance.
$$var(y) = WGW^T * \sigma_u^2 + R$$
The solutions for the fixed effects are obtained from mixed model equations.
\begin{equation} \left[ \begin{array}{lr} X^TX & X^TW \ W^TX & W^TW + G^{-1}* \lambda \end{array} \right] \left[ \begin{array}{c} \hat{\beta} \ \hat{u} \end{array} \right] = \left[ \begin{array}{c} X^Ty \ W^Ty \end{array} \right] \notag \end{equation}
lambda <- 3
The parameter $\lambda = \sigma_e^2 / \sigma_u^2$ is the ratio between residual variance and genetic variance. We assume that this value corresponds to $\lambda = r lambda
$.
The single components of the mixed model equations are
mat_xtx <- crossprod(mat_x_bv) mat_xtw <- crossprod(mat_x_bv, mat_w_bv) mat_wtx <- t(mat_xtw) mat_wtw_ginv_lam <- crossprod(mat_w_bv) + solve(matG_star) * lambda mat_coeff <- rbind(cbind(mat_xtx, mat_xtw), cbind(mat_wtx, mat_wtw_ginv_lam)) mat_rhs <- rbind(crossprod(mat_x_bv, mat_obs_y), crossprod(mat_w_bv, mat_obs_y)) mat_sol <- solve(mat_coeff, mat_rhs)
cat("$$\n") cat(paste(rmdhelp::bmatrix(pmat = mat_xtx, ps_name = 'X^TX'), collapse = '\n'), '\n') cat(", ") cat(paste(rmdhelp::bmatrix(pmat = mat_xtw, ps_name = 'X^TW'), collapse = '\n'), '\n') cat(", ") cat(paste(rmdhelp::bmatrix(pmat = mat_wtx, ps_name = 'W^TX'), collapse = '\n'), '\n') cat("$$\n\n") cat(paste(rmdhelp::bmatrix(pmat = round(mat_wtw_ginv_lam, digits = 3), ps_name = 'W^TW + G^{-1}', ps_env = '$$'), collapse = '\n'), '\n') cat("with $$rhs = \\left[\\begin{array}{c} X^Ty \\\\ W^Ty \\end{array}\\right]$$\n") cat(paste(rmdhelp::bmatrix(pmat = round(mat_rhs, digits = 3), ps_name = 'rhs', ps_env = '$$'), collapse = '\n'), '\n')
The solution vector for the estimate of the fixed effect $\mu$ and the genomic breeding values for all animals are given by
$$sol = \left[\begin{array}{c} \hat{\beta} \ \hat{u} \end{array}\right]$$
cat(paste(rmdhelp::bmatrix(pmat = round(mat_sol, digits = 4), ps_name = 'sol', ps_env = '$$'), collapse = '\n'), '\n')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.