R/convert2geno.R

Defines functions convert2geno

Documented in convert2geno

# convert2geno:
#
#' Convert continuous allele information into marker genotypes
#'
#' Convert the continuous crossover location information produced by
#' sim_from_pedigree to marker genotypes
#'
#' @param xodat The sort of detailed genotype/crossover data generated by
#' [sim_from_pedigree()]
#' @param map vector of marker locations; can also be a list of such
#' vectors (one per chromosome), in which case xodat and founder_geno
#' must be lists with the same length.
#' @param founder_geno Optional matrix (size `n_founders` x
#' `length(map)`) of founder genotypes. If coded as 1/2 (or 1/3),
#' results are 1/2/3 genotypes. If coded as A/T/G/C/N, results are
#' A/T/G/C/N/H genotypes. If coded as letters A-H for the 8 founders,
#' results are two-letter genotypes AA-HH with 36 possible values.
#' @param shift_map If TRUE, shift genetic map to start at 0
#'
#' @return If `founder_geno` is provided or there are just two
#' founders, the result is a numeric matrix of genotypes, individuals
#' x markers, with genotypes 1/2/3 codes for 11/12/22 genotypes.
#'
#' If `founder_geno` is not provided and there are more than two
#' founders, the result is a 3-dimensional array, individuals x
#' markers x alleles, with the third dimensional corresponding to the
#' maternal and paternal allele.
#'
#' If the input `map` is a list (the components being
#' chromosomes), then `xodat` and `founder_geno` must be
#' lists of the same length, and the result will be a list of
#' matrices.
#'
#' @export
#' @keywords utilities
#' @seealso [get_geno()], [sim_from_pedigree()]
#'
#' @examples
#' # simulate AIL pedigree
#' tab <- sim_ail_pedigree(12, 30)
#' # simulate data from that pedigree
#' dat <- sim_from_pedigree(tab)
#' # marker map (could also use sim.map in R/qtl)
#' map <- seq(0, 100, by=5)
#' names(map) <- paste0("marker", seq(along=map))
#' # convert data to marker genotypes
#' geno <- convert2geno(dat, map)
#'
#'
#' # AIL with multiple chromosomes
#' dat <- sim_from_pedigree(tab, c("1"=100, "2"=75, "X"=100), xchr="X")
#' # marker map
#' multmap <- list("1"=seq(0, 100, by=5),
#'                 "2"=seq(0, 75, by=5),
#'                 "X"=seq(0, 100, by=5))
#' for(i in 1:3)
#'   names(multmap[[i]]) <- paste0("marker", i, "_", 1:length(map[[i]]))
#' geno <- convert2geno(dat, multmap)
#'
#' # simulate DO pedigree
#' tab <- sim_do_pedigree(8)
#' # simulate data from that pedigree
#' dat <- sim_from_pedigree(tab)
#' # simulate founder snp alleles
#' fg <- matrix(sample(1:2, 8*length(map), repl=TRUE), nrow=8)
#' # for DO, need female & male founders (to deal with X chr)
#' fg <- rbind(fg, fg)
#' # convert dat to SNP genotypes
#' geno <- convert2geno(dat, map, fg)
#' # if fg not provided, result is a 3d array
#' genoarray <- convert2geno(dat, map)
convert2geno <-
    function(xodat, map, founder_geno=NULL, shift_map=FALSE)
{
    if(is.list(map)) { # map is a list of multiple chromosomes
        if(length(xodat) != length(map))
            stop("length(xodat) != length(map)")
        if(!is.null(founder_geno)) {
            if(length(founder_geno) != length(map))
                stop("length(founder_geno) != length(map)")
        }
        result <- vector("list", length(map))
        names(result) <- names(map)
        for(i in seq(along=map))
            result[[i]] <- convert2geno(xodat[[i]], map[[i]],
                                        founder_geno[[i]], shift_map)
        return(result)
    }

    if(shift_map) map <- map - map[1]
    if(max(map) > max(xodat[[1]]$mat$locations))
        stop("maximum simulated position is less than the length of the map.")
    if(any(map < 0))
        stop("Some markers at < 0 cM")

    # maximum founder genotype
    max_geno <- max(sapply(xodat, function(a) max(a$mat$alleles, a$pat$alleles)))

    if(is.null(founder_geno))
        founder_geno <- matrix(nrow=0, ncol=0)
    else {
        if(nrow(founder_geno) < max_geno)
            stop("founder_geno should have at least ", max_geno,
                 " rows but has ", nrow(founder_geno))
        if(ncol(founder_geno) != length(map))
            stop("founder_geno should have ", length(map),
                 " columns but has ", ncol(founder_geno))
        if(any(is.na(founder_geno) | founder_geno==0)) # 0 as missing
            stop("founder genotypes cannot be missing")
    }

    if(length(founder_geno) > 0 || max_geno <= 2) {
        output <- t(.convert2geno(xodat, map, founder_geno))

        if(any(founder_geno != 1 & founder_geno != 2)) {
            if(all(founder_geno == 1 | founder_geno==3)) {
                founder_geno[founder_geno==3] <- 2
                output <- t(.convert2geno(xodat, map, founder_geno))
            }
            else if(all(c("A","T","G","C") %in% unique(c(founder_geno)))){
                output <- t(convert2geno_char(xodat, map, founder_geno))
            } else {
                output <- t(convert2geno_char_paste(xodat, map, founder_geno))
            }
        } else {
            output <- t(.convert2geno(xodat, map, founder_geno))
        }

        dimnames(output) <- list(names(xodat), names(map))
    }
    else {
        output <- convert2genoarray(xodat, map)
        # output was markers x ind x alleles; switch first two dimensions
        output <- aperm(output, c(2, 1, 3))

        dimnames(output) <- list(names(xodat), names(map), c("mat", "pat"))
    }

    output
}
kbroman/simcross documentation built on Jan. 13, 2024, 10:31 p.m.