R/sim_do_pedigree.R

Defines functions sim_do_pedigree

Documented in sim_do_pedigree

# sim_do_pedigree
#
#' Simulate a pedigree for Diversity Outbred mice
#'
#' Simulate a pedigree for generating Diversity Outbred (DO) mice (a
#' table of individual, mom, dad, sex).
#'
#' @param ngen Number of generations of outbreeding
#' @param npairs Number of breeding pairs at each generation
#' @param ccgen Vector of length `npairs`, with the number of
#' generations for each CC line. If length 1, it is repeated to a
#' vector of length `npairs`.
#' @param nkids_per Number of offspring per pair for the last
#' generation
#' @param design How to choose crosses: either random but avoiding
#' siblings, or completely at random
#'
#' @return A data frame with six columns: individual ID, mother ID, father
#' ID, sex, generation, and TRUE/FALSE indicator for whether DO or pre-DO.
#' Founders have `0` for mother and father ID. Sex is coded 0 for
#' female and 1 for male.
#'
#' @details Diversity outbred (DO) mice are generated from a set of 8 inbred
#' lines. We need two individuals from each line (one female and one
#' male) as the order of the initial crosses will be randomized; for
#' example, sometimes the individual from line 1 will be a mother and
#' sometimes a father. The founders are numbered 1-8 for the females
#' from the 8 lines, and 9-16 for the corresponding males.
#'
#' Diversity Outbred mice are generated by first creating a panel of
#' partially-inbred 8-way RIL (the so-called pre-CC, for
#' pre-Collaborative Cross). The `ccgen` argument specifies the
#' number of inbreeding generations for each of the CC lines. We
#' generate a pre-CC line for each of the `npairs` breeding
#' pairs, and generate a sibling pair from each as the starting
#' material.
#'
#' The subsequent `ngen` outbreeding generations then proceed by
#' crossing a male and female from the preceding generation (mated
#' completely at random, with `design="random"`, or avoiding
#' siblings, with `design="nosib"`). Each breeding pair gives a
#' single female and a single male to the next generation, except at
#' the last generation `nkids_per` offspring are mated, in equal
#' numbers male and female. (If `nkids_per` is an odd number, the
#' number of males and females in each sibship will differ by one,
#' alternating between sibships, with one additional female and then
#' one additional male.
#'
#' The default for `ccgen` is taken from Figure 1 of Svenson et
#' al. (2012).
#'
#' @references
#' Svenson KL, Gatti DM, Valdar W, Welsh CE, Cheng R, Chesler EJ,
#' Palmer AA, McMillan L, Churchill GA (2012) High-resolution genetic
#' mapping using the mouse Diversity Outbred population. Genetics
#' 190:437-447
#'
#' @keywords datagen
#' @export
#' @seealso [sim_from_pedigree()],
#' [sim_ril_pedigree()], [sim_ail_pedigree()],
#' [sim_4way_pedigree()]
#'
#' @examples
#' tab <- sim_do_pedigree(8)
sim_do_pedigree <-
    function(ngen=12, npairs=144,
             ccgen=rep(4:12, c(21, 64, 24, 10, 5, 9, 5, 3, 3)),
             nkids_per=5, design=c("nosib", "random"))
{
    design <- match.arg(design)
    if(length(ccgen)==1) ccgen <- rep(ccgen, npairs)
    stopifnot(length(ccgen) == npairs)
    stopifnot(all(ccgen >= 0))

    # initial generation : need to double each strain
    #     1-8 are the mothers; 9-16 are the corresponding dads
    id <- 1:16
    mom <- rep(0, 16)
    dad <- rep(0, 16)
    sex <- rep(0:1, each=8)
    gen <- rep(0, 16)
    result <- data.frame(id=id, mom=mom, dad=dad, sex=sex, gen=gen)

    cur_nind <- 16
    lastgen <- NULL
    for(i in 1:npairs) {
        # random cross among the 8 strains
        parents <- sample(1:8)
        parents[seq(2, 8, by=2)] <- parents[seq(2, 8, by=2)] + 8

        tab <- sim_ril_pedigree(ccgen[i], selfing=FALSE, parents=parents, firstind=cur_nind+1)[-(1:8),]
        result <- rbind(result, tab)
        lastgen <- c(lastgen, tab[tab[,"gen"] == max(tab[,"gen"]),1])
        cur_nind <- max(tab[,1])
    }
    result <- data.frame(result, do=rep(FALSE, nrow(result)))

    moms <- lastgen[seq(1, length(lastgen), by=2)]
    dads <- lastgen[seq(2, length(lastgen), by=2)]
    for(i in 1:ngen) {
        dads <- sample(dads)
        while(design=="nosib" && any(dads - moms == 1)) { # sample until no sibs
            dads <- sample(dads)
        }

        if(i < ngen) {
            id <- 1:(npairs*2) + cur_nind

            mom <- rep(moms, each=2)
            dad <- rep(dads, each=2)
            sex <- rep(c(0,1), npairs)
            gen <- rep(i, npairs*2)

            moms <- id[seq(1, length(id), by=2)]
            dads <- id[seq(2, length(id), by=2)]
        }
        else { # last generation: expand
            id <- 1:(npairs*nkids_per)+cur_nind

            mom <- rep(moms, each=nkids_per)
            dad <- rep(dads, each=nkids_per)
            sex <- rep(c(0,1), ceiling(npairs*nkids_per/2))[1:(npairs*nkids_per)]
            gen <- rep(i, npairs*nkids_per)
        }
        cur_nind <- max(id)

        newrows <- data.frame(id=id, mom=mom, dad=dad, sex=sex, gen=gen,
                              do=rep(TRUE, length(id)))
        result <- rbind(result, newrows)
    }

    result
}
kbroman/simcross documentation built on Jan. 13, 2024, 10:31 p.m.