R/gen_diploid.R
In PROSPER: Simulation of Weed Population Dynamics

Documented in gen_diploid

#' @title Diploid genetics: recombination of alleles
#' @seealso \code{\link{pop_reprod}}
#' @description \code{gen_diploid} recombines the alleles of weeds with diploid genome. 
#' @export
#  @template dfgenotype
#' @template start
#' @template result
#' @template max_vec_length
#' @param newseeds number of new seeds calculated by \code{\link{pop_reprod}}. \code{positive} \code{integer}.
#  @param xprobab  probabilities of occurence (F1 generation) for all possible genotypes with all possible parent genotypes. A \code{table} that is generated by \code{\link{struc_preparation}}.
#  @param mf       all possible combinations of parental genotypes. A \code{data.frame} that is generated by \code{\link{struc_preparation}}
#' @template start_comb 


#' @details \code{gen_diploid} assumes independent allele recombination.
#' @return A column with the name of \code{result} and, if necessary, a column with the name of \code{start_comb} is added to the \code{data.frame} \code{dfgenotype}.

#' @section warning: newseeds are always coerced to a whole number.

#' @examples 
#'\dontrun{
#' # generate a 'dfgenotype' data.frame:
#' struc_preparation2(Rmx=10, af=c(0.01, 0.2), epis=0, dom=1)
#' #Distribute 10000 individuals of the starting population across the genotypes. 
#' #The two gene loci have initial frequencies of 0.01 and 0.8.
#' gen_freq(af=c(0.01,0.8), n_seeds=10000)
#' # The column "initialSB" represents the parent generation, which recombines and 
#' #therefore defines the genetics of the new seeds
#' newseeds <- 10000
#' gen_diploid(start="initialSB", result="followingSB")
#' # If a second cohort is reprodusing in the same time
#'gen_diploid(start=c("initialSB", "followingSB"), 
#'                   start_comb="two_cohorts_combind", result="followingSB2")
#' rm(dfgenotype, mf, newseeds, xprobab)
#'}






gen_diploid <-
function(start, start_comb=NA, result, newseeds=get0("newseeds", envir = parent.frame(n = 1)), max_vec_length=1e+07){
cat("gen_diploid starts...")

if(is.na(newseeds)) {stop("newseeds must be given")}

dfgenotype <- get0("dfgenotype", envir = parent.frame(n = 1))
###------- if no genetics are included --------
loci <- get0("n_loci", envir = parent.frame(n = 1))

if(loci==0){
            dfgenotype[[result]] <- newseeds
            assign("dfgenotype", value=dfgenotype, pos = -1, envir = parent.frame(n = 1))   
            cat(" finished!\n")
            invisible(dfgenotype)
            }
            
###------- start of the main function --------
mf <- get0("mf", envir = parent.frame(n = 1))
xprobab <- get0("xprobab", envir = parent.frame(n = 1))

#if(!is.character(start)) stop("gen_diploid: start must be of type character.")
if(typeof(start)!="character") stop("gen_diploid: start must be of type character.")

if(newseeds != round(newseeds,digits=0)){
                                newseeds <- round(newseeds, digits=0)}

nrows <- nrow(dfgenotype)
 first_amount <- data.frame(matrix(nrow=nrows,ncol=length(start)), stringsAsFactors = TRUE)
 names(first_amount) <- start            
 
 gt_seeds <- rep(0,nrows)
for(cohort in seq_along(start)){   
 first_amount[cohort] <- dfgenotype[[start[cohort]]]  #the amount that gives the start for each cohort
}#END for(cohort)

if(length(start)>1){
                    if(is.na(start_comb)) {start_comb <- "def_col_name"
                                           warning("gen_diploid: if length(start)>1 'start_comb' should be defined")}
                   eval(parse(text=paste("dfgenotype$",start_comb,"<-rowSums(first_amount)",sep=""))) #the amount that gives the start summed up
                   #dfgenotype[[start_comb]] <- rowSums(first_amount)
                   }#END if(length(start))

if(is.null(mf)){
                cat("No genetics!\nNo recombination!\n")
                dfgenotype[[result]] <- newseeds
                assign("dfgenotype", value=dfgenotype, pos = -1, envir=parent.frame(n = 1) )  
                return(dfgenotype)}


#-------------------------------------------------------------------------------
### Genotypes of new seeds  ----------------------------------------------------
# - which parents do combine?
cat(" step 1...") 
dfgenotype[[result]] <- 0


if(newseeds > 0){                  #the recombination starts only if seeds were produced
 surv <- rowSums(first_amount)     #all living individuals for each genotype
 tmp   <- mf
 tmp$n <- 0
 if(sum(surv)!=0){
  w <- c(surv/sum(surv))                        #proportion of genotype / probability to get as parant
  }else{w<-0}
csw <- c(0, cumsum(w))                        #used for the changing base of probabilities

ngt <- nrow(xprobab)

i1 <- newseeds %/% max_vec_length
i2 <- newseeds %% max_vec_length

if(i1 > 0){
  for(i in 1:i1){
    mother <- rep(0, ngt) ;  father <- rep(0, ngt)
    for(j in 1:2){
      asum <- 0
      wstop <- max(which(w > 0))                              
      for(k in which(w > 0)){
        if(k == wstop){
          a <- max_vec_length - asum
          }else{
          a <- rbinom(1, max_vec_length - asum, prob = w[k]/(1-csw[k]))       
        }#END if(k)
        if(j == 1){mother[k] <- mother[k] + a}
        if(j == 2){father[k]  <- father[k]  + a}
        asum <- asum + a
      }#END for(k)
    }#END for(j)
    parents <- data.frame(table(paste(sample(rep(dfgenotype$genotype, mother)), sample(rep(dfgenotype$genotype, father)), sep = "")), stringsAsFactors = TRUE)
    tmp   <- merge(tmp, parents, by.x = "mf", by.y = "Var1", all.x = TRUE)
    tmp[is.na(tmp)] <- 0
    tmp$n <- tmp$n + tmp$Freq
    tmp$Freq <- NULL
  }#END for(i)
}# END if(i1)

if(i2>0){
mother <- rep(0, ngt) ;  father <- rep(0, ngt)
for(j in 1:2){
  asum <- 0                                #collects how many individuals are already defined
  wstop <- max(which(w > 0))               #stop procedure when the maximum value of w is reached, if more then 1, the latest

  #for every genotype, which is present
  for(pres in which(w > 0)){
        if(pres == wstop){a <- i2 - asum                            #the last genotype does not need calculation, just a number?
        }else{a <- rbinom(1, i2 - asum, prob = w[pres]/(1-csw[pres]))}
        if(j == 1){mother[pres] <- mother[pres] + a}
        if(j == 2){father[pres]  <- father[pres]  + a}
        asum <- asum + a
  }#END for(pres)
}#END for(j)
parents <- data.frame(table(paste(sample(rep(dfgenotype$genotype, mother)), sample(rep(dfgenotype$genotype, father)), sep = "")), stringsAsFactors = TRUE)
tmp <- merge(tmp, parents, by.x = "mf", by.y = "Var1", all.x = TRUE)
tmp[is.na(tmp)] <- 0
tmp$n <- tmp$n + tmp$Freq
tmp$Freq <- NULL
}
###-----------------------------------------------------------------------------
cat(" step 2...")
#what is the outcome in f1 generation?
for(pres in which(tmp$n > 0)){                                #only present combinations of parents are used
  i1 <- tmp$n[pres] %/% max_vec_length
  i2 <- tmp$n[pres] %% max_vec_length
  if(i1 > 0){
    for(i in 1:i1){
      asum <- 0
      xprob <- xprobab[,pres]
      wstop <- max(which(xprob > 0))
      csw <- c(0, cumsum(xprob))
      for(k in which(xprob > 0)){
        if(k == wstop){
          a <- max_vec_length - asum
          }else{
          a <- rbinom(1, max_vec_length - asum, prob = xprob[k]/(1-csw[k]))
        }
        gt_seeds[k] <- gt_seeds[k] + a
        asum <- asum + a
      }
    }
  }
  asum <- 0
  xprob <- xprobab[,pres]                     #the probabilities for a certain f1 generation by the actual parental combination
  wstop <- max(which(xprob > 0))
  csw <- c(0, cumsum(xprob))
  for(k in which(xprob > 0)){
     if(k == wstop){a <- i2 - asum}
     else{a <- rbinom(1, i2 - asum, prob = xprob[k]/(1-csw[k]))} 
     gt_seeds[k] <- gt_seeds[k] + a
     asum <- asum + a
  }
}#END for(pres)
}#END if(newseeds>0)

dfgenotype[[result]] <- gt_seeds



assign("dfgenotype", value=dfgenotype, pos = -1, envir=parent.frame(n=1)) 
cat(" finished!\n")  
return(dfgenotype)
}