Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup_data---------------------------------------------------------------
library(selection.index)
library(MASS) # For robust ridge regression implementation (lm.ridge)
# 1. Load the built-in synthetic maize datasets
data("maize_pheno")
data("maize_geno")
# Ensure markers are numeric matrices
X <- as.matrix(maize_geno)
# Ensure pheno lines up perfectly with geno
# (Note: maize_pheno contains 4 repetitions per genotype; we take the means)
Y_agg <- mean_performance(
data = maize_pheno[, c("Yield", "PlantHeight", "DaysToMaturity")],
genotypes = maize_pheno$Genotype,
replications = maize_pheno$Block
)
# Sort both datasets to ensure identical ordering
Y_agg <- Y_agg[order(Y_agg$Genotypes), ]
X <- X[order(rownames(X)), ]
# Ensure overlapping genotypes
common_genotypes <- intersect(Y_agg$Genotypes, rownames(X))
Y_agg <- Y_agg[Y_agg$Genotypes %in% common_genotypes, ]
X <- X[rownames(X) %in% common_genotypes, ]
# Select only multi-traits relevant to breeding
# Yield, PlantHeight, DaysToMaturity
Y <- as.matrix(Y_agg[, c("Yield", "PlantHeight", "DaysToMaturity")])
## ----calculate_gebv-----------------------------------------------------------
# Simulate Genomic Estimated Breeding Values (GEBVs) using Ridge Regression
# In best practice, you'd use cross-validation or a separate training/testing set
# We use lambda = 100 to handle the p >> n dimensionality problem
gebv_mat <- matrix(0, nrow = nrow(Y), ncol = ncol(Y))
colnames(gebv_mat) <- colnames(Y)
for (i in seq_len(ncol(Y))) {
# Fit a ridge regression model for trait `i` using markers `X`
model_ridge <- lm.ridge(Y[, i] ~ X, lambda = 100)
# Predict values using coef() which correctly un-scales the coefficients: Include intercept
betas <- coef(model_ridge)
intercept <- betas[1]
beta <- betas[-1] # Exclude intercept
# Calculate the GEBV for the trait
gebv_mat[, i] <- intercept + (X %*% beta)
}
head(gebv_mat, 3)
## ----calculated_lgsi----------------------------------------------------------
# 2. Compute Trait Covariances
pmat <- cov(Y) # Phenotypic Covariance (Approximation)
# In optimal practice, the true Genomic Covariance Matrix (\Gamma) is
# estimated using Restricted Maximum Likelihood (REML) utilizing both general
# phenotypic and genotypic information. Here, we simulate \Gamma as proportional
# to the phenotypic variance (assuming a high heritability correlation).
gmat <- pmat * 0.4 # Approximate Genotypic Covariance (Approximating \Gamma)
# 3. Define Economic Weights
weights <- data.frame(
Trait = c("Yield", "PlantHeight", "DaysToMaturity"),
Weight = c(5, -0.1, -0.1)
)
wmat <- weight_mat(weights)
# 4. Calculate Linear Genomic Selection Index (LGSI)
# For the testing population where we only use genomic values
lgsi_result <- lgsi(
gebv_mat = gebv_mat,
gmat = gmat,
wmat = wmat
)
# Output Summary
lgsi_result$summary
## ----calculate_clgsi----------------------------------------------------------
clgsi_result <- clgsi(
phen_mat = Y, # Observed phenotypic data
gebv_mat = gebv_mat, # Genomic Estimated Breeding Values
pmat = pmat, # Expected Phenotypic traits covariance
gmat = gmat, # Expected Genotypic traits covariance
wmat = wmat
)
clgsi_result$summary
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.