knitr::opts_chunk$set(echo = FALSE, results = 'asis')
n_nr_sire <- 3 n_nr_dam <- 8 n_nr_parents <- n_nr_sire + n_nr_dam n_nr_offspring <- 16 n_nr_animals <- n_nr_parents + n_nr_offspring dam_id <- rep(4:11,2) tbl_beef_data <- tibble::tibble(Animal = (n_nr_parents + 1):n_nr_animals, Sire = c(rep(1,8), rep(2,6), rep(3,2)), # Dam = dam_id[order(dam_id)], # Herd = c(rep(1,4), rep(2,4), rep(1,4), rep(2,4)), `Weaning Weight`= c(2.61,2.31,2.44,2.41,2.51,2.55,2.14,2.61,2.34,1.99,3.1,2.81,2.14,2.41,2.54,3.16)) # names(tbl_beef_data) <- c("Animal", "Sire", "Weaning Weight") n_nr_observation <- nrow(tbl_beef_data) ### # parameters h2 <- .25 n_var_p <- round(var(tbl_beef_data$`Weaning Weight`), digits = 4) n_var_g <- round(h2 * n_var_p, digits = 4) n_pop_mean <- round(mean(tbl_beef_data$`Weaning Weight`), digits = 2)
We are using the dataset shown in Table \@ref(tab:TabBeefExample) for this exercise. For the animals listed in Table \@ref(tab:TabBeefExample), the weaning weight (in 100kg) was observed as phenotypic records. The following parameters are associated with the observed data
r n_pop_mean
$r n_var_p
$r h2
$r h2
* r n_var_p
= r n_var_g
$knitr::kable(tbl_beef_data, booktabs = TRUE, longtable = TRUE, caption = "Example Data Set To Predict Breeding Values")
Compute the predicted breeding values and the reliabilities for the animals listed in Table \@ref(tab:TabBeefExample). Compare the ranking of the animals according to their phenotypic values and according to their predicted breeding values. Compare the reliabilities of the predicted breeding values.
The predicted breeding values based on own performance are computed as
$$\hat{a_i} = h^2(y_i - \mu)$$ The reliabilities are constant and correspond to
$$B = r_{a,y}^2 = h^2$$
The results are listed in the following table.
suppressPackageStartupMessages( require(dplyr) ) tbl_beef_pbv_op <- tbl_beef_data %>% mutate(`Predicted Breeding Value` = h2 * (`Weaning Weight` - n_pop_mean)) %>% mutate(Reliability = h2) %>% select(Animal, `Weaning Weight`, `Predicted Breeding Value`, Reliability) knitr::kable(tbl_beef_pbv_op, booktabs = TRUE, longtable = TRUE, caption = "Predicted Breeding Values Using Own Performance Records")
The ranking according to the phenotypic records and according to the predicted breeding values are the same, because each phenotypic record is corrected for the same population mean and is multiplied with the same factor which corresponds to $h^2$. The main difference between the phenotypic records and the predicted breeding values is the variability. The predicted breeding values have a much smaller variability compared to the phenotypic records.
Compute the predicted breeding values and the reliabilities for the sires based on the progeny records. We are assuming that all progeny for a given sire are half-sibbs. Compare the ranking of the sires according to the average progeny performance values and according to the predicted breeding values.
k <- (4-h2)/h2
The predicted breeding values for the sires based on the average performance of their progeny is computed as
$$\hat{a_i} = \frac{2n_i}{n_i + k}(\bar{y_i} - \mu)$$
where $n_i$ is the number of progeny of sire $i$, $\bar{y_i}$ is the average of the performance values of the progeny of sire $i$ and $k = \frac{4-h^2}{h^2} = \frac{4 - r h2
}{r h2
} = r k
$
The reliabilities are no longer constant, but they depend on the number of progeny
$$B_i = \frac{n_i}{n_i+k}$$
Before we compute the predicted values, we have to prepare a few intermediate quantities that are needed for the computation such as
tbl_beef_pap <- tbl_beef_data %>% group_by(Sire) %>% summarise(`Number of Progeny Records` = n(), `Average Progeny Performance` = mean(`Weaning Weight`)) knitr::kable(tbl_beef_pap, booktabs = TRUE, longtable = TRUE, caption = "Intermediate Results To Predict Breeding Values")
We are using the intermediate results to compute the predicted breeding values for each sire.
tbl_beef_pap_res <- tbl_beef_pap %>% mutate( `Predicted Breeding Value` = 2 * `Number of Progeny Records` / (`Number of Progeny Records` + k) * (`Average Progeny Performance` - n_pop_mean) ) %>% mutate( Reliability = `Number of Progeny Records` / (`Number of Progeny Records` + k) ) %>% select( Sire, `Predicted Breeding Value`, Reliability) knitr::kable(tbl_beef_pap_res, booktabs = TRUE, longtable = TRUE, caption = "Predicted Breeding Values and Reliabilities for all Sires")
### # compare rankings vec_order_pa <- order(tbl_beef_pap$`Average Progeny Performance`, decreasing = TRUE) vec_order_pbv <- order(tbl_beef_pap_res$`Predicted Breeding Value`, decreasing = TRUE) s_compare_result <- "not the same" if(all(vec_order_pa == vec_order_pbv)) s_compare_result <- "the same" ### # compare reliabilities vec_order_rel <- order(tbl_beef_pap_res$`Reliability`, decreasing = TRUE) n_best_sire <- vec_order_rel[1]
The rankings of the sires according to the progeny averages and according to the predicted breeding values are r s_compare_result
. The reliability for sire r n_best_sire
is the highest. This is mainly due to the larger number of progeny of sire r n_best_sire
.
bonline <- FALSE s_wwg_path <- rprojroot::find_rstudio_root_file() ### # read tibble from file s_wwg_path <- 'https://charlotte-ngs.github.io/lbgfs2020/misc/weaningweightbeef.csv' tbl_beef_data <- readr::read_csv(file = s_wwg_path) ### # count number of observations n_nr_observation <- nrow(tbl_beef_data) ### # number of sires and dams n_nr_sire <- nlevels(as.factor(tbl_beef_data$Sire)) n_nr_dam <- nlevels(as.factor(tbl_beef_data$Dam)) n_nr_parent <- n_nr_sire + n_nr_dam n_nr_offspring <- n_nr_observation n_nr_animals <- n_nr_parent + n_nr_offspring n_nr_herd <- nlevels(as.factor(tbl_beef_data$Herd)) ### # parameters h2 <- .25 n_var_p <- round(var(tbl_beef_data$`Weaning Weight`), digits = 4) n_var_g <- round(h2 * n_var_p, digits = 4) n_var_e <- n_var_p - n_var_g n_pop_mean <- round(mean(tbl_beef_data$`Weaning Weight`), digits = 2)
We are using the following dataset shown in Table \@ref(tab:tablebeefdatasiremodel) to predict breeding values using a sire model.
### # show the data frame knitr::kable( tbl_beef_data, format = "latex", booktabs = TRUE, longtable = TRUE, caption = "Example Data Set for Weaning Weight in Beef Cattle" )
We assume that the sires are unrelated and that the genetic additive variance $\sigma_u^2 = r n_var_g
$. Hence the variance-covariance matrix $G$ of all sire effects corresponds to
$$var(s) = G = I * \sigma_s^2 = I * {\sigma_u^2 \over 4}$$
Furthermore, the residuals $e$ are not correlated which means that the variance-covariance matrix $R$ is
$$var(e) = R = I * \sigma_e^2$$
with $\sigma_e^2 = r n_var_e
$.
The sire model for the data set given in Table \@ref(tab:tablebeefdatasiremodel) has the following structure
$$y = X\beta + Zs + e$$
\begin{tabular}{lll}
where & & \
& $y$ & vector of length $r n_nr_observation
$ of phenotypic observations \
& $\beta$ & vector of length $r n_nr_herd
$ of unknown fixed herd effects \
& $X$ & $r n_nr_observation
\times r n_nr_herd
$ design matrix linking observations to fixed effects \
& $s$ & vector of length $r n_nr_sire
$ of unknown random sire effects \
& $Z$ & $r n_nr_observation
\times r n_nr_sire
$ design matrix linking observations to sire effects \
& $e$ & vector of length $r n_nr_observation
$ of unknown random residual effects
\end{tabular}
As in the lecture notes, we can put the information from the dataset into the model leaning to
mat_x_sire <- matrix(data = c(1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1), ncol = 2, byrow = TRUE) # vec_betahat_sire <- c("\\beta_1", "\\beta_2") vec_beta_sire <- rmdhelp::vecGetVecElem(psBaseElement = "\\beta", pnVecLen = n_nr_herd, psResult = "latex") mat_z_sire <- matrix(data = c(1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1), ncol = 3, byrow = TRUE) # vec_sirehat_sire <- c("s_1", "s_2", "s_3") vec_sire_sire <- rmdhelp::vecGetVecElem(psBaseElement = "s", pnVecLen = n_nr_sire, psResult = "latex") # vec_res_sire <- c("e_1", "e_2", "e_3", "e_4", "e_5", "e_6", "e_7", "e_8", "e_9", "e_{10}", "e_{11}", "e_{12}", "e_{13}", "e_{14}", "e_{15}", "e_{16}") vec_res_sire <- rmdhelp::vecGetVecElem(psBaseElement = "e", pnVecLen = n_nr_observation, psResult = "latex") cat("$$ \n") cat(paste(rmdhelp::bcolumn_vector(pvec = tbl_beef_data$`Weaning Weight`), sep = "\n"),"\n") cat("= \n") cat(paste(rmdhelp::bmatrix(pmat = mat_x_sire), sep = "\n"), "\n") cat(paste(rmdhelp::bcolumn_vector(pvec = vec_beta_sire), sep = "\n"), "\n") cat("+ \n") cat(paste(rmdhelp::bmatrix(pmat = mat_z_sire), sep = "\n"), "\n") cat(paste(rmdhelp::bcolumn_vector(pvec = vec_sire_sire), sep = "\n"), "\n") cat("+ \n") cat(paste(rmdhelp::bcolumn_vector(pvec = vec_res_sire), sep = "\n"), "\n") cat("$$ \n")
We fix the expected values $E(s)$ and $E(e)$ of the random components $s$ and $e$ to be
$$E(s) = 0 \quad \text{and} \quad E(e) = 0$$
From this we can compute $E(y) = X\beta$.
The variances $var(s)$ and $var(e)$ of the random components are defined as
$$var(s) = G \quad \text{and} \quad var(e) = R$$
The variance $var(y)$ can be computed as $var(y) = V = Z^TGZ + R$.
Using the assumptions, we can further specify
$$var(s) = G = I * \sigma_s^2 = I * {\sigma_a^2 \over 4} \quad \text{and} \quad var(e) = R = I * \sigma_e^2$$
The general form of the mixed model equations was presented in the lecture notes. We now use that form for the sire model which results in
\begin{equation} \left[ \begin{array}{lr} X^T R^{-1} X & X^T R^{-1} Z \ Z^T R^{-1} X & Z^T R^{-1} Z + G^{-1} \end{array} \right] \left[ \begin{array}{c} \hat{\beta} \ \hat{s} \end{array} \right] = \left[ \begin{array}{c} X^T R^{-1} y \ Z^T R^{-1} y \end{array} \right] \notag \end{equation}
Using the above specified assumptions regarding the variance-covariance matrices $G$ and $R$, the mixed model equations can be simplified to
\begin{equation} \left[ \begin{array}{lr} X^T X & X^T Z \ Z^T X & Z^T Z + I * 4 * \lambda \end{array} \right] \left[ \begin{array}{c} \hat{\beta} \ \hat{s} \end{array} \right] = \left[ \begin{array}{c} X^T y \ Z^T y \end{array} \right] \notag \end{equation}
where $\lambda = \frac{\sigma_e^2}{\sigma_a^2}$.
Inserting the number leads to
### # variance ratio lambda <- n_var_e/n_var_g ### # components of coefficient matrix mat_xtx_sire <- crossprod(mat_x_sire) mat_xtz_sire <- crossprod(mat_x_sire, mat_z_sire) mat_ztx_sire <- t(mat_xtz_sire) mat_ztz_sire <- crossprod(mat_z_sire) mat_ztzginv_sire <- mat_ztz_sire + diag(1, nrow = nrow(mat_ztz_sire)) * 4 * lambda ### # put together coefficient matrix mat_coeff_sire <- rbind(cbind(mat_xtx_sire, mat_xtz_sire), cbind(mat_ztx_sire, mat_ztzginv_sire)) ### # vector of estimates vec_betahat_sire <- rmdhelp::vecGetVecElem(psBaseElement = "\\hat{\\beta}", pnVecLen = n_nr_herd, psResult = "latex") vec_sirehat_sire <- rmdhelp::vecGetVecElem(psBaseElement = "\\hat{s}", pnVecLen = n_nr_sire, psResult = "latex") vec_estpred_sire <- c(vec_betahat_sire, vec_sirehat_sire) ### # righthandside vec_weight_sire <- as.vector(tbl_beef_data$`Weaning Weight`) vec_xty_sire <- crossprod(mat_x_sire, vec_weight_sire) vec_zty_sire <- crossprod(mat_z_sire, vec_weight_sire) vec_rhs_sire <- c(vec_xty_sire, vec_zty_sire) ### # solutions vec_sol <- solve(mat_coeff_sire, vec_rhs_sire)
cat("$$ \n") cat(paste(rmdhelp::bmatrix(pmat = mat_coeff_sire), sep = "\n"), "\n") cat(paste(rmdhelp::bcolumn_vector(pvec = vec_estpred_sire), sep = "\n"),"\n") cat("= \n") cat(paste(rmdhelp::bcolumn_vector(pvec = vec_rhs_sire), sep = "\n"),"\n") cat("$$ \n")
The solution consists of the estimates for the fixed effects and the predictions of the sire breeding values. Because, we are working with a very small dataset, we can obtain the solutions by pre-multiplying both sides of the mixed model equations with the inverse of the coefficient matrix. Hence
cat("$$ \n") cat(paste(rmdhelp::bcolumn_vector(pvec = vec_estpred_sire), sep = "\n"),"\n") cat("= \n") cat(paste(rmdhelp::bmatrix(pmat = mat_coeff_sire), sep = "\n"), "\n") cat("^{-1} \n") cat(paste(rmdhelp::bcolumn_vector(pvec = vec_rhs_sire), sep = "\n"),"\n") cat("= \n") cat(paste(rmdhelp::bcolumn_vector(pvec = vec_sol), sep = "\n"),"\n") cat("$$ \n")
vec_sire_pbv <- vec_sol[(n_nr_herd+1):length(vec_sol)] vec_sire_rank <- order(vec_sire_pbv, decreasing = TRUE)
Now that we have predicted breeding values for all sire, we can rank the accordingly. For our example the ranking of the sires is
tbl_sire_rank <- tibble::tibble(Sire = c(1:n_nr_sire), `Predicted Breeding Value` = vec_sire_pbv, Rank = vec_sire_rank) knitr::kable(tbl_sire_rank, booktabs = TRUE, longtable = TRUE, caption = "Ranking of Sires According To Predicted Breeding Values")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.