R/sim_dof1_pedigree.R

Defines functions sim_dof1_pedigree

Documented in sim_dof1_pedigree

# sim_dof1_pedigree
#
#' Simulate pedigree for F1 between diversity outbreds and another inbred line
#'
#' Simulate a pedigree for a set of DOF1 individuals: the F1 offspring
#' of a set of diversity outbred mice and another inbred strain (such
#' as a mutant line).
#'
#' @param ngen Number of generations of outbreeding
#' @param npairs Number of breeding pairs at each generation
#' @param ccgen Vector of length `npairs`, with the number of
#' generations for each CC line. If length 1, it is repeated to a
#' vector of length `npairs`.
#' @param nkids_per Number of offspring per pair for the last
#' DO generation (each will be crossed to produce one F1)
#' @param design How to choose crosses: either random but avoiding
#' siblings, or completely at random
#'
#' @return A data frame with seven columns: individual ID, mother ID,
#' father ID, sex, generation, a TRUE/FALSE indicator for whether DO
#' or pre-DO, and a TRUE/FALSE indicator for whether DOF1.  Founders
#' have `0` for mother and father ID. Sex is coded 0 for female
#' and 1 for male.
#'
#' @details Diversity outbred (DO) mice are generated from a set of 8 inbred
#' lines. We need two individuals from each line (one female and one
#' male) as the order of the initial crosses will be randomized; for
#' example, sometimes the individual from line 1 will be a mother and
#' sometimes a father. The founders are numbered 1-8 for the females
#' from the 8 lines, and 9-16 for the corresponding males.
#'
#' Diversity Outbred mice are generated by first creating a panel of
#' partially-inbred 8-way RIL (the so-called pre-CC, for
#' pre-Collaborative Cross). The `ccgen` argument specifies the
#' number of inbreeding generations for each of the CC lines. We
#' generate a pre-CC line for each of the `npairs` breeding
#' pairs, and generate a sibling pair from each as the starting
#' material.
#'
#' The subsequent `ngen` outbreeding generations then proceed by
#' crossing a male and female from the preceding generation (mated
#' completely at random, with `design="random"`, or avoiding
#' siblings, with `design="nosib"`). Each breeding pair gives a
#' single female and a single male to the next generation, except at
#' the last generation `nkids_per` offspring are mated, in equal
#' numbers male and female. (If `nkids_per` is an odd number, the
#' number of males and females in each sibship will differ by one,
#' alternating between sibships, with one additional female and then
#' one additional male.
#'
#' The default for `ccgen` is taken from Figure 1 of Svenson et
#' al. (2012).
#'
#' We assume that the F1 offspring are all from a cross DO female x
#' line 17 male, and so the last generation of the DO is taken to be
#' all females.
#'
#' @references
#' Svenson KL, Gatti DM, Valdar W, Welsh CE, Cheng R, Chesler EJ,
#' Palmer AA, McMillan L, Churchill GA (2012) High-resolution genetic
#' mapping using the mouse Diversity Outbred population. Genetics
#' 190:437-447
#'
#' @keywords datagen
#' @export
#' @seealso [sim_from_pedigree()],
#' [sim_ril_pedigree()], [sim_ail_pedigree()],
#' [sim_4way_pedigree()]
#'
#' @examples
#' tab <- sim_dof1_pedigree(8)
sim_dof1_pedigree <-
    function(ngen=12, npairs=144,
             ccgen=rep(4:12, c(21, 64, 24, 10, 5, 9, 5, 3, 3)),
             nkids_per=5, design=c("nosib", "random"))
{
    design <- match.arg(design)
    if(length(ccgen)==1) ccgen <- rep(ccgen, npairs)
    stopifnot(length(ccgen) == npairs)
    stopifnot(all(ccgen >= 0))

    # initial generation : need to double each strain
    #     1-8 are the mothers; 9-16 are the corresponding dads
    id <- 1:16
    mom <- rep(0, 16)
    dad <- rep(0, 16)
    sex <- rep(0:1, each=8)
    gen <- rep(0, 16)
    result <- data.frame(id=id, mom=mom, dad=dad, sex=sex, gen=gen)

    cur_nind <- 16
    lastgen <- NULL
    for(i in 1:npairs) {
        # random cross among the 8 strains
        parents <- sample(1:8)
        parents[seq(2, 8, by=2)] <- parents[seq(2, 8, by=2)] + 8

        tab <- sim_ril_pedigree(ccgen[i], selfing=FALSE, parents=parents, firstind=cur_nind+1)[-(1:8),]
        result <- rbind(result, tab)
        lastgen <- c(lastgen, tab[tab[,"gen"] == max(tab[,"gen"]),1])
        cur_nind <- max(tab[,1])
    }
    result <- data.frame(result, do=rep(FALSE, nrow(result)))

    moms <- lastgen[seq(1, length(lastgen), by=2)]
    dads <- lastgen[seq(2, length(lastgen), by=2)]
    for(i in 1:ngen) {
        dads <- sample(dads)
        while(design=="nosib" && any(dads - moms == 1)) { # sample until no sibs
            dads <- sample(dads)
        }

        if(i < ngen) {
            id <- 1:(npairs*2) + cur_nind

            mom <- rep(moms, each=2)
            dad <- rep(dads, each=2)
            sex <- rep(c(0,1), npairs)
            gen <- rep(i, npairs*2)

            moms <- id[seq(1, length(id), by=2)]
            dads <- id[seq(2, length(id), by=2)]
        }
        else { # last generation: expand
            id <- 1:(npairs*nkids_per)+cur_nind

            mom <- rep(moms, each=nkids_per)
            dad <- rep(dads, each=nkids_per)
            sex <- rep(c(0,1), ceiling(npairs*nkids_per/2))[1:(npairs*nkids_per)]
            gen <- rep(i, npairs*nkids_per)
        }
        cur_nind <- max(id)

        newrows <- data.frame(id=id, mom=mom, dad=dad, sex=sex, gen=gen,
                              do=rep(TRUE, length(id)))
        result <- rbind(result, newrows)
    }

    offspring <- result[-(1:16),]
    offspring$id <- offspring$id + 1
    offspring$mom[offspring$mom > 16]  <- offspring$mom[offspring$mom > 16] + 1
    offspring$dad[offspring$dad > 16]  <- offspring$dad[offspring$dad > 16] + 1
    offspring$sex[offspring$do & offspring$gen==ngen] <- 0

    result <- rbind(result[1:16,],
                    data.frame(id=17, mom=0, dad=0, sex=1, gen=0, do=FALSE),
                    offspring)

    last_gen <- result$id[result$do & result$gen==ngen]
    n_last_gen <- length(last_gen)

    result <- rbind(cbind(result, dof1=FALSE),
                    data.frame(id=max(last_gen)+seq(along=last_gen),
                               mom=last_gen,
                               dad=17,
                               sex=sample(0:1, n_last_gen, replace=TRUE),
                               gen=ngen+1,
                               do=FALSE,
                               dof1=TRUE))
    rownames(result) <- 1:nrow(result)
    result
}
kbroman/simcross documentation built on Jan. 13, 2024, 10:31 p.m.