BreedSimCost: Functions And Data Structures Added To AlphaSimR

Documented in calcMinMaxEntries calcVDPcovMat calcVDPGain chooseTrialEntries iidPhenoEval makeNEntryGrid makeVarietyCandidates multistageGain multistageTruncPt runVDPtrial

#' runVDPtrial function
#'
#' Function to run a variety development pipeline stage
#'
#' @param bsd The breeding scheme data list
#' @param trialType String name of the kind of trial to run. Must link back to
#' a trial type with parameters in bsd
#' @param entries A vector of entry IDs
#'
#' @return Updated bsd
#'
#' @details Add records to phenoRecords
#'
#' @examples
#' params <- runVDPtrial(bsd, trialType)
#'
#' @export
runVDPtrial <- function(bsd, trialType){
  entries <- bsd$entries
  # Calculate the error variance resulting from the number of locations and reps
  # Note that the number of years for a trial is always 1
  nL <- bsd$nLoc[trialType]; nR <- bsd$nRep[trialType]
  eV <- bsd$errVars[trialType]
  impliedVarE <- bsd$gxlVar/nL + bsd$gxyVar + bsd$gxyxlVar/nL + eV/nL/nR
  # Phenotypic evaluation of experimental lines
  pheno <- AlphaSimR::setPheno(bsd$varietyCandidates[entries],
                               varE=impliedVarE, simParam=bsd$SP)
  # Add the information to phenoRecords
  newRec <- tibble(year=bsd$year, trialID=bsd$nextTrialID, trialType=trialType,
                   id=pheno@id, pheno=pheno@pheno, selCrit=NA,
                   errVar=impliedVarE)
  bsd$phenoRecords <- bsd$phenoRecords %>% bind_rows(newRec)
  
  # Manage the trialID number
  bsd$nextTrialID <- bsd$nextTrialID + 1
  
  return(bsd)
}

#' chooseTrialEntries function
#'
#' Function to select which varieties to advance to the next stage assuming
#' they are IID (i.e., estimating genotypic values), by truncation selection
#'
#' @param bsd The breeding scheme data list
#' @param toTrial String the trial type the entries are going to 
#' @param fromTrial String the trial type the entries are coming from 
#' @return bsd updated with a vector of entry IDs in bsd$entries
#' @details Accesses all data in phenoRecords to pick the highest 
#' performing candidates.
#'
#' @examples
#' bsd <- chooseTrialEntries(bsd, toTrial, fromTrial)]
#'
#' @export
chooseTrialEntries <- function(bsd, toTrial, fromTrial=NULL){
  if (bsd$debug){
    require(here)
    on.exit(expr={
      print(traceback())
      saveRDS(mget(ls()), file=here::here("data", "chooseTrialEntries.rds"))
    })
  }
  
  if (toTrial == bsd$stageNames[1]){
    nInd <- nInd(bsd$varietyCandidates)
    nIndMax <- min(nInd, bsd$nEntries[1])
    entries <- bsd$varietyCandidates@id[nInd - (nIndMax - 1):0]
  } else{
    if (toTrial == "MarketingDept"){
      nToSelect <- bsd$nToMarketingDept
      fromYear <- 0
    } else{
      nToSelect <- bsd$nEntries[toTrial]
      fromYear <- -1
    }
    phenoRecords <- bsd$phenoRecords
    candidates <- phenoRecords %>% pull(id) %>% unique
    if (!is.null(fromTrial)){
      candidates <- phenoRecords %>% 
        dplyr::filter(trialType == fromTrial & year == bsd$year + fromYear) %>% 
        pull(id) %>% unique
      phenoRecords <- phenoRecords %>% dplyr::filter(id %in% candidates)
    }
    if (nrow(phenoRecords) > length(candidates)){ # There is some replication
      crit <- iidPhenoEval(phenoRecords)
    } else{
      crit <- phenoRecords %>% pull(pheno)
      names(crit) <- phenoRecords %>% pull(id)
    }
    # Add crit to phenoRecords
    for (n in names(crit)){
      bsd$phenoRecords$selCrit[bsd$phenoRecords$id == n] <- crit[n]
    }
    if (length(candidates) < nToSelect){
      # Workaround for a problem that should only happen in the transition from
      # burnin to two-part: the VDP nEntries between the two are mismatched
      print("There are too few variety candidates for trial")
      nCand <- nToSelect - length(candidates)
      stageNum <- which(bsd$stageNames == toTrial) # How many years back parents
      nBreedInd <- AlphaSimR::nInd(bsd$breedingPop)
      nIndBack <- bsd$initNBreedingProg * 
        (bsd$nPopImpCycPerYear * (stageNum - 1) + 1)
      nIndBack <- min(nIndBack, nBreedInd)
      breedPopIDs <- bsd$breedingPop@id[nBreedInd - nIndBack + 
                                          1:bsd$nBreedingProg]
      bsd <- makeVarietyCandidates(bsd, breedPopIDs, nCand) 
      nVarInd <- AlphaSimR::nInd(bsd$varietyCandidates)
      entries <- c(candidates, 
                   bsd$varietyCandidates@id[nVarInd - (nCand - 1):0])
    } else{
      crit <- crit[candidates]
      entries <- crit[(crit %>% order(decreasing=T))[1:nToSelect]] %>% names
    }
  }
  bsd$entries <- entries
  if (bsd$debug) on.exit()
  return(bsd)
}

#' makeVarietyCandidates function
#'
#' @param bsd List of breeding program data
#' @param breedPopIDs String vector IDs of breeding pop progenitors
#' of the variety candidates.  If NULL, the last nBreedingProg individuals used
#' @param nCandidates Make this number of candidates. If NULL, defaults to the
#' number of individuals in the first stage of the VDP.
#' If more individuals come out of the PIC than are needed in the first stage of
#' the VDP, then requisite individuals are chosen at random.  If fewer
#' individuals come out of the PIC than are needed, then individuals from past
#' generations of the PIC are chosen.
#'
#' @return Updated bsd with new variety candidates in bsd$varietyCandidates
#' @details Here, creates DHs evenly distributed among the breeding progeny
#' in the last generation
#' @examples
#' bsd <- makeVarietyCandidates(bsd)]
#'
#' @export
makeVarietyCandidates <- function(bsd, breedPopIDs=NULL, nCandidates=NULL){
  if (is.null(nCandidates)) nCandidates <- bsd$nEntries[1]
  nInd <- nInd(bsd$breedingPop)
  switch(bsd$varietyType,
         clonal = {
           if (is.null(breedPopIDs)){ # Take the last breeding pop progeny
             if (nInd < nCandidates) stop("Not enough breeding pop progeny")
             if (bsd$nPopImpCycPerYear*bsd$nBreedingProg < nCandidates)
               cat("Previous-year Breeding pop progeny will be reused", bsd$nPopImpCycPerYear*bsd$nBreedingProg, nCandidates, "\n")
             newCand <- bsd$breedingPop[nInd - (nCandidates - 1):0]
           } else{
             newCand <- bsd$breedingPop[breedPopIDs]
           }
         }, 
         inbred = {
           if (is.null(breedPopIDs)){ # Take the last bsd$nBreedingProg
             nIndMax <- min(nInd, bsd$nBreedingProg)
             breedPopIDs <- bsd$breedingPop@id[nInd - (nIndMax - 1):0]
           }
           nInd <- length(breedPopIDs)
           nDH <- nCandidates %/% nInd
           if (nDH > 0){
             newCand <- makeDH(bsd$breedingPop[breedPopIDs], 
                               nDH=nDH, simParam=bsd$SP)
           }
           nExtra <- nCandidates %% nInd
           if (nExtra > 0){
             whichPar <- sample(nInd, nExtra)
             extraCand <- makeDH(bsd$breedingPop[breedPopIDs][whichPar], 
                                 nDH=1, simParam=bsd$SP)
             if (exists("newCand")){
               newCand <- c(newCand, extraCand)
             } else{
               newCand <- extraCand
             }
           }
         }#END inbred
  )#END switch
  
  # Update varietyCandidates population
  if (exists("varietyCandidates", bsd)){
    bsd$varietyCandidates <- c(bsd$varietyCandidates, newCand)
  } else{
    bsd$varietyCandidates <- newCand
  }
  
  return(bsd)
}

#' iidPhenoEval function
#'
#' Function to estimate variety candidate BLUPs assuming they are IID
#'
#' @param phenoRecords Tibble of phenotypic observations on variety candidates.
#' @return Named real vector of the BLUPs of all individuals in phenoRecords,
#' (names are the individual ids), with appropriate weights by error variance
#' @details Given all the phenotypic records calculate the best prediction of
#' the genotypic value for each individual using all its records
#'
#' @examples
#' iidBLUPs <- iidPhenoEval(phenoRecords)
#'
#' @export
iidPhenoEval <- function(phenoRecords){
  require(lme4)
  # Make error variances into weights
  phenoRecords <- phenoRecords %>% dplyr::mutate(wgt=1/errVar)
  fm <- lmer(pheno ~ (1|id), weights=wgt, data=phenoRecords)
  blup <- ranef(fm)[[1]]
  namesBlup <- rownames(blup)
  blup <- unlist(blup)
  # Ensure output has variation: needed for optimal contributions
  if (sd(blup) == 0){
    blup <- phenoRecords %>% group_by(id) %>%
      summarise(wm = weighted.mean(x=pheno, w=wgt)) %>% pull(wm)
  }
  names(blup) <- namesBlup
  return(blup)
}

#' calcVDPcovMat function
#' 
#' @param bsd List of breeding program data
#'
#' @return Covariance matrix of the true genotypic values of the variety
#' candidates with the phenotypic means from the VDP stages
#' 
#' @details This function came essentially from the selectiongain package 
#' developed by Melchinger's group: Mi et al. 2014 Crop Science. 
#' Simplified just to the "LonginII" method.
#'
#' @examples
#' vdpCovMat <- calcVDPcovMat(bsd)
#' 
#' @export
calcVDPcovMat <- function(bsd){
  nS1 = bsd$nStages + 1
  Vg <- bsd$genVar
  Vgy <- bsd$gxyVar
  Vgl <- bsd$gxlVar
  Vgyl <- bsd$gxyxlVar
  Ve <- bsd$errVars
  covMat <- diag(nS1)*Vg
  covMat[1,] <- Vg
  covMat[,1] <- Vg
  for (i in 2:nS1){
    covMat[i,i] <- Vg + Vgy +
      (Vgl+Vgyl)/bsd$nLocs[i-1] +
      Ve[i-1]/bsd$nLocs[i-1]/bsd$nReps[i-1]
  }
  for (i in 2:(nS1-1)){
    for (j in (i+1):nS1){
      covMat[i,j] <- Vg +
        Vgl/max(bsd$nLocs[i-1], bsd$nLocs[j-1])
      covMat[j,i] <- covMat[i,j]
    }
  }
  
  return(covMat)
}

#' multistageTrucPt function
#' 
#' @param alpha vector the selected fractions from one VDP stage to the next
#' @param corr matrix the correlation matrix from calcVDPcovMat
#' @param alg function the truncation point calculation algorithm from
#' package mvtnorm. Options are Miwa() and GenzBretz(). The latter is slower
#'
#' @return Truncation points consistent with the vector of selected fractions
#' 
#' @details This function came from the selectiongain package (Mi et al. 2014
#'   Crop Science) but uses the newer tmvtnorm package
#'
#' @examples
#' corMat <- cov2cor(vdpCovMat)
#' truncPts <- multistageTruncPt(alpha=alpha, corr=corMat)
#' 
#' @export
multistageTruncPt <- function(alpha,  corr=NA){
  if (any(is.na(corr))) stop("The corr matrix must not be NA")
  
  if (nrow(corr) == length(alpha)+1){
    corr <- corr[-1,-1]
  }
  else{
    stop ("The dimension of corr matrix must be one more than the length alpha")
  }
  
  nStg=length(alpha)
  alpha[alpha == 1] <- 0.9999 # Error thrown if fraction selected == 1
  
  lower <- rep(-Inf, nStg)
  for (stage in 1:nStg){
    truncPt <- tmvtnorm::qtmvnorm.marginal(p=1-alpha[stage], interval=c(-5,5), 
                                           n=stage, tail="lower.tail", 
                                           sigma=corr, lower=lower)
    lower[stage] <- truncPt$root
  }

  return(lower)
}

#' multistageGain function
#' 
#' @param corr matrix the correlation matrix from calcVDPcovMat
#' @param truncPts vector the truncation points that came from multistageTruncPt
#' @param Vg numeric the genotypic variance to provide a scale
#' 
#' @return numeric the expected gain from selection
#' 
#' @details This function came essentially from the selectiongain package 
#' but then I simplified it very much by using a current R package, tmvtnorm
#'
#' @examples
#' vdpCovMat <- calcVDPcovMat(bsd)
#' corMat <- cov2cor(vdpCovMat)
#' truncPts <- multistageTruncPt(alpha=alpha, corr=corMat)
#' gain <- multistageGain(corMat, truncPts)
#'  
#' @export
multistageGain <- function(corr, truncPts, Vg=1){
  require(tmvtnorm)
  # check if truncPts and corr have corresponding dimensions
  if (length(truncPts)!=nrow(corr)-1){
    stop("Dimension of truncPts must be same as nrow(corr)-1")
  }
  meanGenVal <- tmvtnorm::mtmvnorm(sigma=corr, lower=c(-Inf, truncPts))$tmean[1]
  return(meanGenVal*sqrt(Vg))
}

# Order of operations
# 0. From the full optimization, get the VDP budget
# 0.1 Calculate the implied covariance matrix in the VDP: calcVDPcovMat
# 1. With the VDP budget, calculate the min max entries: calcMinMaxEntries
# 2. With the min max entries, make the grid: makeNEntryGrid
# 3. Calculate the gains across the grid: calcVDPgain
# 4. Determine if the max is balanced more toward the end than the full two-part

#' makeNEntryGrid function
#'
#' @param nIndMin nStages vector minimum number of individuals at each stage
#' @param nIndMax nStages vector maximum number of individuals at each stage
#' @param step nStages vector: interval of number of individuals. If NULL will
#' increment for last stage and have the same number of steps for other stages
#' @param maxBudget the maximum cost of all evaluations
#' @param costProd per individual, how much does it cost to produce the seed?
#' @param costTest per individual, how much does it cost to run all the plots?
#'
#' @return List with feasible entry number schemes
#'
#' @details Call this function to set up optimization
#'
#' @examples
#' bsd <- makeNEntryGrid(bsd)
#'
#' @export
makeNEntryGrid <- function(nIndMin, nIndMax, step=NULL, 
                           minBudget=0.9*maxBudget, maxBudget, indCost){
  nStages <- length(nIndMin)
  if (is.null(step)){
    nSteps <- nIndMax[nStages] - nIndMin[nStages]
    step <- (nIndMax - nIndMin) / nSteps
  }
  nEntryList <- as.list(seq(from=nIndMin[1], to=nIndMax[1], by=step[1]) %>% round)
  for (stg in 2:nStages){
    nEntryNew <- seq(from=nIndMin[stg], to=nIndMax[stg], by=step[stg]) %>% round
    nEntryList <- mapply(c, rep(nEntryList, each=length(nEntryNew)),
                         rep(nEntryNew, length(nEntryList)), SIMPLIFY=F)
  }
  
  # Make sure that nEntries gets smaller going forward
  nIndDecreases <- function(nEntryVec){
    all(diff(nEntryVec) <= 0)
  }
  nEntryList <- nEntryList[sapply(nEntryList, nIndDecreases)]
  
  # Calculate budgets to eliminate ones that are too small or too big
  calcBudget <- function(nEntryVec, indCost){
    return(nEntryVec %*% indCost)
  }
  budgets <- sapply(nEntryList, calcBudget, indCost=indCost)
  
  return(nEntryList[budgets > minBudget & budgets < maxBudget])
}#END makeNEntryGrid

#' calcVDPGain function
#'
#' @param nEntryVec nStages vector with number of individuals at each stage
#' @param nFinal numeric how many individuals are going to the marketing dept
#' @param cov matrix covariances as came out of calcVDPcovMat
#'
#' @return numeric expected gain from the VDP
#'
#' @details Call this function on a grid of budget equal VDPs. Wrapper function 
#' to call the truncation point and gain functions with elements from the grid
#'
#' @examples
#' gains <- sapply(testGrid, calcVDPGain, 
#'                 nFinal=bsd$nToMarketingDept, cov=vdpCovMat)
#'
#' @export
calcVDPGain <- function(nEntryVec, nFinal, cov){
  nEntryVec <- c(nEntryVec, nFinal)
  genVar <- cov[1, 1]
  corr <- cov2cor(cov)
  selFrac <- nEntryVec[-1] / nEntryVec[-length(nEntryVec)]
  truncPts <- multistageTruncPt(selFrac, corr)
  return(multistageGain(corr=corr, truncPts=truncPts, Vg=genVar))
}

#' calcMinMaxEntries function
#'
#' @param bsd List of breeding program data
#' @param budgetVec vector as comes out of calcBudget. If NULL look in bsd
#' @param includePIC logical should a min and max for nProg in PIC be figured?
#'
#' @return List with vectors for minimum entries and maximum entries and cost
#' per individual entry
#'
#' @details Assume the location costs are fixed. 
#'
#' @examples
#' minMaxList <- calcMinMaxEntries(bsd)
#'
#' @export
calcMinMaxEntries <- function(bsd, budgetVec=NULL, includePIC=F){
  if (is.null(budgetVec)){
    if (exists("realizedBudget", bsd)){
      budgetVec <- bsd$realizedBudget
    }
    else{
      budgetVec <- bsd$initBudget
    }
  }
  indCost <- bsd$plotCosts * bsd$nReps * bsd$nLocs + bsd$qcGenoCost
  indCost[1] <- indCost[1] + 
    bsd$candidateDevelCost + bsd$wholeGenomeCost
  varBudget <- budgetVec["budget"] - budgetVec["locationCosts"]
  if (includePIC){
    costPerPIC <- bsd$nPopImpCycPerYear * 
      (bsd$crossingCost + bsd$wholeGenomeCost)
    indCost <- c(costPerPIC, indCost)
  }
  else{
    varBudget <- varBudget - budgetVec["picBudget"]
  }
  
  minLast <- bsd$nToMarketingDept
  minEntries <- rep(minLast, length(indCost))
  minStgCosts <- indCost * minEntries
  maxEntries <- NULL
  for (stg in 1:length(indCost)){
    costRest <- sum(minStgCosts[-(1:stg)])
    perEntToStg <- sum(indCost[1:stg])
    maxEntries <- c(maxEntries, floor((varBudget - costRest)/perEntToStg))
  }
  minEntries[1] <- maxEntries[stg]
  return(list(minEntries=minEntries, maxEntries=maxEntries, 
              indCost=indCost, varBudget=varBudget))
}

#' calcMultivarVDPcovMat function
#' 
#' @param bsd List of breeding program data
#' The following objects should be in bsd:
#' @param nStages The number of stages in the variety development pipeline (VDP)
#' @param nTraits The number of traits being selected upon
#' @param genVar A nTraits x nTraits genetic covariance matrix
#' @param gxyVar Same dimension genotype x year covariance matrix
#' @param gxlVar Same dimension genotype x location covariance matrix
#' @param gxyxlVar Same dimension genotype x year x location covariance matrix
#' @param errVars A list, nStages long, each element being the nTraits x nTraits
#'   error covariance matrix of that stage in the VDP
#' @param whichTwhen A list, nStages long, each element being an integer vector
#'   of which traits are being selected upon during that stage
#' @return Covariance matrix of the true genotypic values of the variety
#' candidates with the phenotypic means from the VDP stages
#' 
#' @details This function is a multivariate extension from the selectiongain
#'   package (Mi et al. 2014 Crop Science) of the "LonginII" method.
#'
#' @examples
#' vdpCovMat <- calcMultivarVDPcovMat(bsd)
#' 
#' @export
calcMultivarVDPcovMat <- function(bsd){
  # Assume that all Var matrices are multivariate d x d, and d=number of traits
  nS1 <- bsd$nStages + 1
  nT <- bsd$nTraits
  Vg <- bsd$genVar
  Vgy <- bsd$gxyVar
  Vgl <- bsd$gxlVar
  Vgyl <- bsd$gxyxlVar
  Ve <- bsd$errVars
  covMat <- kronecker(diag(nS1), Vg)
  covMat[1:nT,] <- kronecker(matrix(1, ncol=nS1), Vg)
  covMat[,1:nT] <- kronecker(matrix(1, nrow=nS1), Vg)
  for (i in 1:bsd$nStages){
    covMat[i*nT+1:nT,i*nT+1:nT] <- Vg + Vgy +
      (Vgl+Vgyl)/bsd$nLocs[i] +
      Ve[[i]]/bsd$nLocs[i]/bsd$nReps[i]
  }
  for (i in 1:(bsd$nStages-1)){
    for (j in (i+1):bsd$nStages){
      covMat[i*nT+1:nT,j*nT+1:nT] <- Vg +
        Vgl/max(bsd$nLocs[i], bsd$nLocs[j])
      covMat[j*nT+1:nT,i*nT+1:nT] <- 
        t(covMat[i*nT+1:nT,j*nT+1:nT])
    }
  }
  keep <- 1:nT
  for (i in 1:bsd$nStages){
    keep <- c(keep, i*nT+bsd$whichTwhen[[i]])
  }
  return(covMat[keep, keep])
}
jeanlucj/BreedSimCost documentation built on July 21, 2023, 1:59 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
jeanlucj/BreedSimCost
Functions And Data Structures Added To AlphaSimR

R/VarietyDevPipe.R
In jeanlucj/BreedSimCost: Functions And Data Structures Added To AlphaSimR

Defines functions calcMultivarVDPcovMat calcMinMaxEntries calcVDPGain makeNEntryGrid multistageGain multistageTruncPt calcVDPcovMat iidPhenoEval makeVarietyCandidates chooseTrialEntries runVDPtrial

Documented in calcMinMaxEntries calcVDPcovMat calcVDPGain chooseTrialEntries iidPhenoEval makeNEntryGrid makeVarietyCandidates multistageGain multistageTruncPt runVDPtrial

R Package Documentation

Browse R Packages

We want your feedback!

jeanlucj/BreedSimCost Functions And Data Structures Added To AlphaSimR

R/VarietyDevPipe.R In jeanlucj/BreedSimCost: Functions And Data Structures Added To AlphaSimR

Defines functions calcMultivarVDPcovMat calcMinMaxEntries calcVDPGain makeNEntryGrid multistageGain multistageTruncPt calcVDPcovMat iidPhenoEval makeVarietyCandidates chooseTrialEntries runVDPtrial

Documented in calcMinMaxEntries calcVDPcovMat calcVDPGain chooseTrialEntries iidPhenoEval makeNEntryGrid makeVarietyCandidates multistageGain multistageTruncPt runVDPtrial

R Package Documentation

Browse R Packages

We want your feedback!

jeanlucj/BreedSimCost
Functions And Data Structures Added To AlphaSimR

R/VarietyDevPipe.R
In jeanlucj/BreedSimCost: Functions And Data Structures Added To AlphaSimR