#' Predict cross variances
#'
#' Function to predict the variances (and trait-trait co-variances) expected in the offspring of a cross. Takes a list of crosses to predict, marker effects, parental haplotype matrix and recombination frequency matrix as input. Predicts potentially over multiple crosses and multiple traits. When multiple traits are supplied, trait-trait co-variances are also predicted.
#'
#' @param CrossesToPredict data.frame or tibble, col/colnames: sireID, damID. sireID and damID must both be in the haploMat.
#' @param modelType string, "A" or "AD", should additive only or additive and dominance variances be predicted?
#' @param AddEffectList list of ADDITIVE effect matrices, one matrix per trait, Each element of the list is named with a string identifying the trait and the colnames of each matrix are labelled with snpIDs.
#' @param DomEffectList list of DOMINANCE effect matrices, one matrix per trait, Each element of the list is named with a string identifying the trait and the colnames of each matrix are labelled with snpIDs.
#' @param predType, string, default \code{predType="VPM"}, "VPM" or "PMV". Choose option "VPM" if you have REML marker effect estimates (or posterior-means from MCMC) one set of marker effect estimates per trait. Variance of posterior means is faster but the alternative predType=="PMV" is expected to be less biassed. PMV requires user to supply a (probably LARGE) variance-covariance matrix of effects estimates.
#' @param haploMat matrix of phased haplotypes, 2 rows per sample, cols = loci, {0,1}, rownames assumed to contain GIDs with a suffix, separated by "_" to distinguish haplotypes
#' @param recombFreqMat a square symmetric matrix with values = (1-2*c1), where c1=matrix of expected recomb. frequencies. The choice to do 1-2c1 outside the function was made for computation efficiency; every operation on a big matrix takes time.
#' @param ncores number of cores, parallelizes across \code{CrossesToPredict}, in multi-trait cases, process traits for each family in serial within each worker.
#' @param nBLASthreads number of cores for each worker to use for multi-thread BLAS
#' @param ...
#'
#' @return tibble, each row contains predictions for a single cross. Columns:
#' \itemize{
#' \item \code{"Nsegsnps"}: give the number of segregating sites in the cross and thus those used for variance predictions
#' \item \code{"ComputeTime"}: time in minutes
#' \item \code{"predVars"}: list-column, each element is a tibble, containing all predicted variances and covariances in a long-format.
#' }
#' @export
#' @family predCrossVar
predCrossVars<-function(CrossesToPredict,modelType,
AddEffectList,DomEffectList=NULL,
predType="VPM",
haploMat,recombFreqMat,
ncores=1,nBLASthreads=NULL,...){
starttime<-proc.time()[3]
# Center posterior distribution of effects
## on posterior mean across MCMC samples
AddEffectList<-purrr::map(AddEffectList,~scale(.,center = T, scale = F));
if(modelType=="AD"){
DomEffectList<-purrr::map(DomEffectList,~scale(.,center = T, scale = F)) }
## Extract the posterior mean effects vectors
## If predType="VPM" this just recovers the original effects
postMeanAddEffects<-purrr::map(AddEffectList,~attr(.,which = "scaled:center"))
if(modelType=="AD"){
postMeanDomEffects<-purrr::map(DomEffectList,~attr(.,which = "scaled:center"))
} else {
postMeanDomEffects<-NULL
}
parents<-union(CrossesToPredict$sireID,
CrossesToPredict$damID)
haploMat<-haploMat[sort(c(paste0(parents,"_HapA"),paste0(parents,"_HapB"))),]
if(predType=="VPM"){
AddEffectList<-NULL;
if(predType=="VPM" & modelType=="AD"){ DomEffectList<-NULL; } }
# Set-up a loop over the crosses
suppressMessages(require(furrr));
if(ncores>1){ plan(multisession, workers = ncores); }
options(future.globals.maxSize=+Inf); options(future.rng.onMisuse="ignore")
crossespredicted<-CrossesToPredict %>%
dplyr::mutate(predVars=future_pmap(.,
predOneCross,
modelType=modelType,
haploMat=haploMat,
recombFreqMat=recombFreqMat,
predType=predType,
postMeanAddEffects=postMeanAddEffects,
postMeanDomEffects=postMeanDomEffects,
AddEffectList=AddEffectList,
DomEffectList=DomEffectList,
nBLASthreads=nBLASthreads))
if(ncores>1){ plan(sequential) }
crossespredicted %<>%
unnest(predVars)
totcomputetime<-proc.time()[3]-starttime
print(paste0("Done predicting fam vars. ",
"Took ",round((totcomputetime)/60,2),
" mins for ",nrow(crossespredicted)," crosses"))
return(crossespredicted)
}
# INTERNAL FUNCTION - predict all variance-covariance components for one cross
#' Title
#'
#' @param sireID
#' @param damID
#' @param modelType
#' @param haploMat
#' @param recombFreqMat
#' @param predType
#' @param postMeanAddEffects
#' @param postMeanDomEffects
#' @param AddEffectList
#' @param DomEffectList
#' @param nBLASthreads
#' @param ...
#'
#' @return
predOneCross<-function(sireID,damID,modelType,
haploMat,recombFreqMat,
predType,
postMeanAddEffects,
postMeanDomEffects=NULL,
AddEffectList=NULL,DomEffectList=NULL,
nBLASthreads,...){
starttime<-proc.time()[3]
if(!is.null(nBLASthreads)) { RhpcBLASctl::blas_set_num_threads(nBLASthreads) }
# Before predicting variances
# check for and remove SNPs that
# won't segregate, i.e. are fixed in parents
### hopes to save time / mem
x<-colSums(rbind(haploMat[grep(paste0("^",sireID,"_"),rownames(haploMat)),],
haploMat[grep(paste0("^",damID,"_"),rownames(haploMat)),]))
segsnps2keep<-names(x[x>0 & x<4])
if(length(segsnps2keep)>0){
recombFreqMat<-recombFreqMat[segsnps2keep,segsnps2keep,drop=F]
haploMat<-haploMat[,segsnps2keep,drop=F]
postMeanAddEffects<-purrr::map(postMeanAddEffects,~.[segsnps2keep])
if(modelType=="AD"){
postMeanDomEffects<-purrr::map(postMeanDomEffects,~.[segsnps2keep]) }
# calc cross LD matrix
progenyLD<-calcCrossLD(sireID,damID,recombFreqMat,haploMat)
rm(recombFreqMat,haploMat); gc()
# Set-up loop over variance and covarance parameters
## Trait variances to-be-predicted
traits<-names(postMeanAddEffects)
varcovars<-tibble::tibble(Trait1=traits,
Trait2=traits)
## If multiple traits
if(length(traits)>1){
## Add covariances to-be-predicted
varcovars<-dplyr::bind_rows(varcovars, # trait variances
combn(traits,2,simplify = T) %>% # covariances
t(.) %>% #
`colnames<-`(.,c("Trait1","Trait2")) %>%
tibble::as_tibble(.)) }
varcovars<-varcovars %>%
dplyr::mutate(predVars=purrr::pmap(.,
predOneCrossVar,
modelType=modelType,
progenyLD=progenyLD,
# haploMat=haploMat,
# recombFreqMat=recombFreqMat,
predType=predType,
postMeanAddEffects=postMeanAddEffects,
postMeanDomEffects=postMeanDomEffects,
AddEffectList=AddEffectList,
DomEffectList=DomEffectList)) %>%
unnest(predVars)
computetime<-proc.time()[3]-starttime
out_thiscross<-tibble::tibble(Nsegsnps=length(segsnps2keep),
ComputeTime=computetime,
predVars=list(varcovars))
} else {
computetime<-proc.time()[3]-starttime
out_thiscross<-tibble::tibble(Nsegsnps=length(segsnps2keep),
ComputeTime=computetime,
predVars=list()) }
return(out_thiscross)
}
# INTERNAL FUNCTION - predict one variance-covariance component for one cross
#' Title
#'
#' @param Trait1
#' @param Trait2
#' @param progenyLD
#' @param modelType
#' @param predType
#' @param postMeanAddEffects
#' @param postMeanDomEffects
#' @param AddEffectList
#' @param DomEffectList
#' @param segsnps2keep
#' @param ...
#'
#' @return
predOneCrossVar<-function(Trait1,Trait2,progenyLD,modelType,
#haploMat,recombFreqMat,
predType,
postMeanAddEffects,
postMeanDomEffects=NULL,
AddEffectList=NULL,
DomEffectList=NULL,
segsnps2keep=NULL,...){
if(predType=="PMV"){
# Posterior Sample Variance-Covariance Matrix of Marker Effects
postVarCovarOfAddEffects<-(1/(nrow(AddEffectList[[Trait1]])-1))*crossprod(AddEffectList[[Trait1]],AddEffectList[[Trait2]])
postVarCovarOfAddEffects<-postVarCovarOfAddEffects[segsnps2keep,
segsnps2keep,drop=F]
if(modelType=="AD"){
postVarCovarOfDomEffects<-(1/(nrow(DomEffectList[[Trait1]])-1))*crossprod(DomEffectList[[Trait1]],DomEffectList[[Trait2]])
postVarCovarOfDomEffects<-postVarCovarOfDomEffects[segsnps2keep,
segsnps2keep,drop=F]
}
} else {
postVarCovarOfAddEffects<-NULL;
postVarCovarOfDomEffects<-NULL;
}
## Predicts variance for one cross and
### one variance or covariance paramater (Trait1-Trait2)
## Predict cross additive variance
#### posterior mean (VPM)
predVarA_vpm<-genomicMateSelectR::quadform(D=progenyLD,
x=postMeanAddEffects[[Trait1]],
y=postMeanAddEffects[[Trait2]])
#### posterior mean (co)variance (PMV)
if(predType=="PMV"){ predVarA_pmv<-predVarA_vpm+sum(diag(progenyLD%*%postVarCovarOfAddEffects)) }
if(modelType=="AD"){
## Predict cross dominance variance
#### VPM
progenyLDsq<-progenyLD*progenyLD
predVarD_vpm<-genomicMateSelectR::quadform(D=progenyLDsq,
x=postMeanDomEffects[[Trait1]],
y=postMeanDomEffects[[Trait2]])
#### PMV
if(predType=="PMV"){ predVarD_pmv<-predVarD_vpm+sum(diag(progenyLDsq%*%postVarCovarOfDomEffects)) }
}
if(modelType=="A"){ rm(progenyLD); gc() }
if(modelType=="AD"){ rm(progenyLD,progenyLDsq); gc() }
# Tidy the results
out<-tibble::tibble(predOf="VarA",predVar=predVarA_vpm)
### VarA
if(predType=="PMV"){
out<-out %>%
rename(VPM=predVar) %>%
mutate(PMV=predVarA_pmv) }
### If modelType=="AD" - VarD
if(modelType=="AD"){
if(predType=="VPM"){
out %<>% dplyr::bind_rows(tibble::tibble(predOf="VarD",predVar=predVarD_vpm)) }
if(predType=="PMV"){
out %<>% dplyr::bind_rows(tibble::tibble(predOf="VarD",
VPM=predVarD_vpm,
PMV=predVarD_pmv)) }
}
return(out)
}
#' Calculate the gametic LD matrix for a single parent
#'
#' Function to compute gametic LD matrix for a single parent. Uses a matrix of recombination frequencies and the supplied haplotypes of the parent as in Bijma et al. 2020 and Lehermeier et al. 2017b (and others).
#'
#' @param parentGID string, the GID of an individual. Needs to correspond to renames in haploMat
#' @param recombFreqMat a square symmetric matrix with values = (1-2*c1), where c1=matrix of expected recomb. frequencies. The choice to do 1-2c1 outside the function was made for computation efficiency; every operation on a big matrix takes time.
#' @param haploMat matrix of phased haplotypes, 2 rows per sample, cols = loci, {0,1}, rownames assumed to contain GIDs with a suffix, separated by "_" to distinguish haplotypes
#'
#' @details Columns of haploMat and row/cols of recombFreqMat should be in same order. May be worth computing in an R session using multi-threaded BLAS.
#' @return Potentially really large matrix representing the LD between loci in gametes
#' @family predCrossVar
calcGameticLD<-function(parentGID,recombFreqMat,haploMat){
X<-haploMat[paste0(parentGID,c("_HapA","_HapB")),,drop=F]
p<-colMeans(X);
D<-recombFreqMat*((0.5*t(X)%*%X)-p%*%t(p));
return(D) }
#' Calculate the cross LD matrix based on both parents
#'
#' Wraps the \code{\link{calcGameticLD}} function. CrossLD = sireLD + damLD.
#'
#' @param sireID string, Male parent genotype ID.
#' @param damID string, Female parent genotype ID.
#' @param recombFreqMat a square symmetric matrix with values = (1-2*c1), where c1=matrix of expected recomb. frequencies. The choice to do 1-2c1 outside the function was made for computation efficiency; every operation on a big matrix takes time.
#' @param haploMat matrix of phased haplotypes, 2 rows per sample, cols = loci, {0,1}, rownames assumed to contain GIDs with a suffix, separated by "_" to distinguish haplotypes
#'
#' @details Columns of haploMat and row/cols of recombFreqMat should be in same order. May be worth computing in an R session using multi-threaded BLAS.
#' @return Potentially really large matrix representing the LD between loci in the cross
#' @export
#' @family predCrossVar
calcCrossLD<-function(sireID,damID,recombFreqMat,haploMat){
return(calcGameticLD(sireID,recombFreqMat,haploMat)+
calcGameticLD(damID,recombFreqMat,haploMat)) }
#' Predict cross means
#'
#' Function to predict the mean performances of the offspring of crosses. Takes
#' a list of crosses to predict, marker effects, parental allele dosage matrix
#' as input. Predicts potentially over multiple crosses and multiple traits.
#' With \code{predType="BV"} predicts the mid-parent of crosses by computing
#' parental GEBV. With \code{predType="TGV"} predicts the mean total merit of
#' cross offspring using a Falconer-MacKay Eqn. 14.6 and takes user supplied
#' additive and dominance effects as input. The additive-dominance effects
#' should be partitioned according to the "genotypic" marker codings (see
#' Vitezica et al. 2013. GENETICS).
#'
#'
#' @param CrossesToPredict data.frame or tibble, col/colnames: sireID, damID.
#' sireID and damID must both be in the haploMat.
#' @param predType string, "BV" or "TGV". "BV" predicts cross mean breeding
#' values as the mean GEBV of parents. "TGV" predicts the cross total genetic
#' value. Warning: prediction of meanTGV with F-M Eqn. 14.6 appropriate only
#' using a+d partition not allele sub. + dom. dev.; genotypic NOT classical in
#' terms used by Vitezica et al. 2013. For that reason,
#' \code{\link{predCrossMeans}} has a "predType" not a "modelType" argument
#' predType="TGV" uses Falconer-MacKay Eqn. 14.6 and takes add and dom
#' effects. predType="BV" input should be allele subst. effs, computes
#' mid-parent GEBV there is no equivalent to predicting the dominance variance
#' for the mean thus the difference from the predCrossVars() function. NOTICE:
#' NOT SAME as predType argument used in \code{\link{predCrossVars}}, sorry.
#' @param AddEffectList list of ADDITIVE effect matrices, one matrix per trait,
#' Each element of the list is named with a string identifying the trait and
#' the colnames of each matrix are labelled with snpIDs.
#' @param DomEffectList list of DOMINANCE effect matrices, one matrix per trait,
#' Each element of the list is named with a string identifying the trait and
#' the colnames of each matrix are labelled with snpIDs.
#' @param doseMat dosage matrix. required only for modelType=="DirDom". Assumes
#' SNPs coded 0, 1, 2. Nind rows x Nsnp cols, numeric matrix, with rownames
#' and colnames to indicate SNP/ind ID
#' @param ncores number of cores, parallelizes across \code{CrossesToPredict},
#' in multi-trait cases, process traits for each family in serial within each
#' worker.
#' @param ...
#'
#' @return tibble, each row contains predictions for a single cross. Columns:
#' \itemize{
#' \item \code{"Trait"}:
#' \item \code{"sireID"}:
#' \item \code{"damID"}:
#' \item \code{"sireGEBV"}: genomic estimated breeding value (GEBV) of the male parent of the cross
#' \item \code{"damGEBV"}: genomic estimated breeding value (GEBV) of the female parent of the cross
#' \item \code{"predOf"}: "MeanBV" or "MeanTGV"
#' \item \code{"predMean"}: The predicted mean value for the cross
#' }
#' @export
#' @family predCrossVar
predCrossMeans<-function(CrossesToPredict,predType,
AddEffectList,DomEffectList=NULL,
doseMat,
ncores=1,
...){
#nBLASthreads=NULL,
means<-tibble::tibble(Trait=names(AddEffectList))
parents<-CrossesToPredict %$% union(sireID,damID)
doseMat<-doseMat[parents,colnames(AddEffectList[[1]])]
suppressMessages(require(furrr));
if(ncores>1){ plan(multisession, workers = ncores); }
options(future.globals.maxSize=+Inf); options(future.rng.onMisuse="ignore")
if(predType=="BV"){
means<-means %>%
mutate(predictedMeans=future_map(Trait,function(Trait,
#nBLASthreads=nBLASthreads,
...){
#if(!is.null(nBLASthreads)) { RhpcBLASctl::blas_set_num_threads(nBLASthreads) }
parentGEBVs<-tcrossprod(doseMat,AddEffectList[[Trait]])
predmeans<-CrossesToPredict %>%
dplyr::left_join(tibble::tibble(sireID=rownames(parentGEBVs),
sireGEBV=as.numeric(parentGEBVs)),
by = "sireID") %>%
dplyr::left_join(tibble::tibble(damID=rownames(parentGEBVs),
damGEBV=as.numeric(parentGEBVs)),
by = "damID") %>%
dplyr::mutate(predOf="MeanBV",
predMean=(sireGEBV+damGEBV)/2)
return(predmeans) }))
if(ncores>1){ plan(sequential) }
}
if(predType=="TGV"){
plan(multisession, workers = ncores)
means<-means %>%
dplyr::mutate(predictedMeans=future_map(Trait,function(Trait,
#BLASthreads=nBLASthreads,
...){
#if(!is.null(nBLASthreads)) { RhpcBLASctl::blas_set_num_threads(nBLASthreads) }
predmeans<-CrossesToPredict %>%
dplyr::mutate(predOf="MeanTGV",
predMean=map2_dbl(sireID,damID,function(sireID,damID,...){
# Eqn 14.6 from Falconer+MacKay
p1<-doseMat[sireID,]/2
p2<-doseMat[damID,]/2
q<-1-p1
y<-p1-p2
g<-AddEffectList[[Trait]]*(p1-q-y) + DomEffectList[[Trait]]*((2*p1*q)+y*(p1-q))
meanG<-sum(g)
return(meanG)}))
return(predmeans) }))
if(ncores>1){ plan(sequential) }
}
means<-means %>%
unnest(predictedMeans)
return(means)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.