R/likelihood.R

Defines functions .informative .peel_MM .peel_MD .peel_M .eliminate_X .build_genolist_X .reduce_alleles .eliminate .build_genolist .startdata_MM .startdata_MD .startdata_M likelihood.list likelihood.singleton likelihood.linkdat likelihood

Documented in likelihood likelihood.linkdat likelihood.list likelihood.singleton

#' Pedigree likelihood
#'
#' Calculates various forms of pedigree likelihoods.
#'
#' All likelihoods are calculated using the Elston-Stewart algorithm.
#'
#' If \code{locus2 = NULL}, the result is simply the likelihood of the genotypes
#' observed at the marker in locus1.
#'
#' If \code{locus2 = 'disease'}, the result is the likelihood of the marker
#' genotypes in locus1, given the affection statuses of the pedigree members,
#' the disease model and the recombination rate \code{theta} between the marker
#' and disease loci.  The main use of this is for computation of LOD scores in
#' parametric linkage analysis.
#'
#' If \code{locus2} is a marker object, the result is the likelihood of the
#' genotypes at the two markers, given the recombination rate theta between
#' them.
#'
#' The function \code{likelihood_LINKAGE} is a fast version of
#' \code{likelihood.linkdat} in the case where \code{locus2 = 'disease'} and the
#' marker in locus1 has less than 5 alleles.
#'
#' @param x a \code{\link{linkdat}} object, a \code{\link{singleton}} object, or
#'   a list of such objects. In \code{likelihood_LINKAGE}, \code{x} must be a
#'   \code{linkdat} object, with \code{x$model} different from NULL.
#' @param locus1 a \code{\link{marker}} object compatible with \code{x}. If
#'   \code{x} is a list, then \code{locus1} must be a list of corresponding
#'   \code{marker} objects.
#' @param locus2 either NULL, the character 'disease', or a \code{\link{marker}}
#'   object compatible with \code{x}. See Details.
#' @param theta the recombination rate between locus1 and locus2 (in
#'   \code{likelihood_LINKAGE}: between the marker and the disease locus).  To
#'   make biological sense theta should be between 0 and 0.5.
#' @param eliminate mostly for internal use: a non-negative integer indicating
#'   the number of iterations in the internal genotype-compatibility algorithm.
#'   Positive values can save time if \code{partialmarker} is non-empty and the
#'   number of alleles is large.
#' @param logbase a numeric, or NULL. If numeric the log-likelihood is returned,
#'   with \code{logbase} as basis for the logarithm.
#' @param loop_breakers a numeric containing IDs of individuals to be used as
#'   loop breakers. If NULL, automatic selection of loop breakers will be
#'   performed. See \code{\link{breakLoops}}.
#' @param returnprod a logical; if TRUE, the product of the likelihoods is
#'   returned, otherwise a vector with the likelihoods for each pedigree in the
#'   list.
#' @param marker an integer between 0 and \code{x$nMark}, indicating which
#'   marker to use in the calculation.
#' @param afreq a numeric containing the marker allele frequencies.
#' @param startdata for internal use.
#' @param TR.MATR,initialCalc,singleNum.geno for internal use, speeding up
#'   linkage computations with few-allelic markers.
#' @param \dots further arguments.
#' @return The likelihood of the data. If the parameter \code{logbase} is a
#'   positive number, the output is \code{log(likelihood, logbase)}.
#' @seealso \code{\link{lod}}
#'
#' @examples
#'
#' x = linkdat(toyped, model=1) #dominant model
#'
#' lod1 = likelihood_LINKAGE(x, marker=1, theta=0, logbase=10) -
#'        likelihood_LINKAGE(x, marker=1, theta=0.5, logbase=10)
#' lod2 = lod(x, markers=1, theta=0)
#'
#' # these should be the same:
#' stopifnot(identical(lod1, as.numeric(lod2)), round(lod1, 2)==0.3)
#'
#' # likelihood of inbred pedigree (grandfather/granddaughter incest)
#' y = addOffspring(addDaughter(nuclearPed(1, sex=2), 3), father=1, mother=5, 1)
#' m = marker(y, 1, 1, 6, 1:2)
#' l1 = likelihood(y, m)
#' l2 = likelihood(y, m, loop_breaker=5) # manual specification of loop_breaker
#' stopifnot(l1==0.09375, l2==l1)
#'
#' @export
likelihood = function(x, ...) UseMethod("likelihood", x)

#' @export
#' @rdname likelihood
likelihood.linkdat = function(x, locus1, locus2 = NULL, theta = NULL, startdata = NULL, eliminate = 0,
    logbase = NULL, loop_breakers = NULL, ...) {
    if (!inherits(locus1, "marker") && is.numeric(locus1))
        locus1 = x$markerdata[[locus1]]
    if (!inherits(locus2, "marker") && is.numeric(locus2))
        locus2 = x$markerdata[[locus2]]

    # analysisType = switch(class(locus2), `NULL` = 1, character = 2, marker = 3)
    if(inherits(locus2, "marker"))
      analysisType = 3
    else if(is.character(locus2)) {
      analysisType = 2
      locus2 = NULL  # 'disease'
    }
    else
      analysisType = 1

    locus1 = .reduce_alleles(locus1)
    locus2 = .reduce_alleles(locus2)  # unchanged if NULL

    if (x$hasLoops) {
        cat("Tip: To optimize speed consider breaking loops before calling 'likelihood'. See ?breakLoops.\n")
        m = list(locus1)
        m[[2]] = locus2  # no effect if NULL
        x = breakLoops(setMarkers(x, m), loop_breakers = loop_breakers, verbose = TRUE)
        locus1 = x$markerdata[[1]]
        if (analysisType == 3)
            locus2 = x$markerdata[[2]]
    }

    chrom = if (identical(attr(locus1, "chrom"), 23))
        "X" else "AUTOSOMAL"
    SEX = x$pedigree[, "SEX"]
    mutmat = attr(locus1, "mutmat")
    inform_subnucs = x$subnucs

    if (is.null(startdata)) {
        if (analysisType != 2) {
            inform = .informative(x, locus1, locus2)
            inform_subnucs = inform$subnucs
            x$founders = c(x$founders, inform$newfounders)
            x$nonfounders = .mysetdiff(x$nonfounders, inform$newfounders)
        }
        dat = switch(analysisType, .startdata_M(x, marker = locus1, eliminate = eliminate),
            .startdata_MD(x, marker = locus1, eliminate = eliminate), .startdata_MM(x, marker1 = locus1,
                marker2 = locus2, eliminate = eliminate))
    } else dat = startdata

    peelFUN = switch(analysisType, function(dat, sub) .peel_M(dat, sub, chrom, SEX, mutmat = mutmat),
        function(dat, sub) .peel_MD(dat, sub, theta, chrom, SEX), function(dat, sub) .peel_MM(dat,
            sub, theta, chrom, SEX))

    if (attr(dat, "impossible"))
        return(ifelse(is.numeric(logbase), -Inf, 0))

    if (is.null(dups <- x$loop_breakers)) {
        for (sub in inform_subnucs) {
            dat = peelFUN(dat, sub)
            if (sub$pivtype > 0 && attr(dat, "impossible"))
                return(ifelse(is.numeric(logbase), -Inf, 0))
        }
        likelihood = dat
    } else {
        two2one = function(matr) if (is.matrix(matr))
            1000 * matr[1, ] + matr[2, ] else matr  #if input is vector (i.e. X-linked male genotypes), return it unchanged
        origs = match(dups[, 1], x$orig.ids)
        copies = match(dups[, 2], x$orig.ids)

        # For each orig, find the indices of its haplos (in orig$hap) that also occur in its copy.
        # Then take cross product of these vectors.
        loopgrid = fast.grid(lapply(seq_along(origs), function(i) {
            ori = two2one(dat[[c(origs[i], 1)]])
            seq_along(ori)[ori %in% two2one(dat[[c(copies[i], 1)]])]
        }), as.list = TRUE)

        likelihood = 0
        for (r in loopgrid) {
            # r a vector of indices: r[i] gives a column number of the hap matrix of orig[i].
            dat1 = dat
            attr(dat1, "impossible") = FALSE
            for (i in seq_along(origs)) {
                orig.int = origs[i]
                copy.int = copies[i]
                orighap = dat[[orig.int]]$hap
                origprob = dat[[orig.int]]$prob
                hap = if (is.matrix(orighap))
                  orighap[, r[i], drop = F] else orighap[r[i]]
                prob = origprob[r[i]]
                if (sum(prob) == 0)
                  print("Loop-loekke: Alle sannsynligheter er null. Magnus lurer paa om dette kan gi feilmelding.")
                dat1[[orig.int]] = list(hap = hap, prob = prob)
                dat1[[copy.int]] = list(hap = hap, prob = 1)
            }
            for (sub in inform_subnucs) {
                pivtype = sub$pivtype
                dat1 = peelFUN(dat1, sub)
                if (pivtype > 0 && attr(dat1, "impossible"))
                  {
                    break
                  }  #if impossible data - break out of ES-algorithm and go to next r in loopgrid.
                if (pivtype == 0)
                  likelihood = likelihood + dat1
            }
        }
    }
    if (is.numeric(logbase))
        log(likelihood, logbase) else likelihood
}

#' @export
#' @rdname likelihood
likelihood.singleton = function(x, locus1, logbase = NULL, ...) {
    if (!inherits(locus1, "marker") && is.numeric(locus1))
        locus1 = x$markerdata[[locus1]]
    if (is.null(locus1) || all(locus1 == 0))
        return(if (is.numeric(logbase)) 0 else 1)

    m = locus1
    chrom = as.integer(attr(m, "chrom"))
    afreq = attr(m, "afreq")
    if (identical(chrom, 23L) && x$pedigree[, "SEX"] == 1) {
        # X chrom and male
        if (all(m > 0) && m[1] != m[2])
            stop("Heterozygous genotype detected for X-linked marker in male individual.")
        res = afreq[m[1]]
    } else if (0 %in% m) {
        p = afreq[m[m != 0]]
        res = p^2 + 2 * p * (1 - p)
    } else res = prod(afreq[m]) * ifelse(m[1] != m[2], 2, 1)  # assumes HWE
    return(if (is.numeric(logbase)) log(res, logbase) else res)
}

#' @export
#' @rdname likelihood
likelihood.list = function(x, locus1, locus2 = NULL, ..., returnprod = TRUE) {
    if (!is.linkdat.list(x))
        stop("x must be either a 'linkdat' object, a 'singleton' object, or a list of such")
    if (is.atomic(locus1))
        locus1 = rep(list(locus1), length = length(x))
    if (is.atomic(locus2))
        locus2 = rep(list(locus2), length = length(x))  # Note: NULL is atomic
    liks = vapply(1:length(x), function(i) likelihood(x[[i]], locus1[[i]], locus2[[i]], ...),
        numeric(1))

    if (returnprod)
        return(prod(liks)) else liks
}


#### FUNCTIONS FOR CREATING THE INTITIAL HAPLOTYPE COMBINATIONS W/PROBABILITIES.

.startdata_M = function(x, marker, eliminate = 0) {
    afreq = attr(marker, "afreq")
    chromX = identical(attr(marker, "chrom"), 23)

    impossible = FALSE
    if (chromX) {
        glist = .build_genolist_X(x, marker, eliminate)
        if (attr(glist, "impossible")) {
            dat = list()
            attr(dat, "impossible") = TRUE
            return(dat)
        }
        sex = x$pedigree[, "SEX"]
        dat = lapply(1:x$nInd, function(i) {
            h = glist[[i]]
            if (i %in% x$founders) {
                prob = switch(sex[i], afreq[h], afreq[h[1, ]] * afreq[h[2, ]] * ((h[1, ] !=
                  h[2, ]) + 1))
                if (sum(prob) == 0)
                  impossible = TRUE
            } else prob = rep.int(1, length(h)/sex[i])
            list(hap = h, prob = as.numeric(prob))
        })
    } else {
        glist = .build_genolist(x, marker, eliminate)
        if (attr(glist, "impossible")) {
            dat = list()
            attr(dat, "impossible") = TRUE
            return(dat)
        }
        dat = lapply(1:x$nInd, function(i) {
            h = glist[[i]]
            if (i %in% x$founders) {
                prob = afreq[h[1, ]] * afreq[h[2, ]] * ((h[1, ] != h[2, ]) + 1)
                if (sum(prob) == 0)
                  impossible = TRUE
            } else prob = rep.int(1, ncol(h))
            list(hap = h, prob = as.numeric(prob))
        })
    }
    attr(dat, "impossible") = impossible
    dat
}


.startdata_MD = function(x, marker, eliminate = 0) {
    startprob <- function(h, model, afreq, aff, founder) {
        pat = h[1, ]
        mat = h[2, ]
        al1 = abs(pat)
        al2 = abs(mat)
        d.no = (pat < 0) + (mat < 0)
        prob = switch(aff + 1, rep.int(1, length(d.no)), (1 - model$penetrances)[d.no + 1],
            model$penetrances[d.no + 1])
        if (founder)
            prob = prob * afreq[al1] * afreq[al2] * ((al1 != al2) + 1) * model$dfreq^d.no *
                (1 - model$dfreq)^(2 - d.no)
        as.numeric(prob)
    }

    startprob_X <- function(h, model, afreq, sex, aff, founder) {
        switch(sex, {
            mat = h  #vector
            d.no = as.numeric(mat < 0)
            prob <- switch(aff + 1, rep.int(1, length(d.no)), (1 - model$penetrances$male)[d.no +
                1], model$penetrances$male[d.no + 1])
            if (founder) prob <- prob * afreq[abs(mat)] * c(1 - model$dfreq, model$dfreq)[d.no +
                1]
        }, {
            pat = h[1, ]
            mat = h[2, ]
            al1 = abs(pat)
            al2 = abs(mat)
            d.no = (pat < 0) + (mat < 0)
            prob <- switch(aff + 1, rep.int(1, length(d.no)), (1 - model$penetrances$female)[d.no +
                1], model$penetrances$female[d.no + 1])
            if (founder) prob <- prob * afreq[al1] * afreq[al2] * ((al1 != al2) + 1) * model$dfreq^d.no *
                (1 - model$dfreq)^(2 - d.no)
        })
        as.numeric(prob)
    }

    ## MAIN ###
    afreq = attr(marker, "afreq")
    chromX = identical(attr(marker, "chrom"), 23)
    AFF = x$pedigree[, "AFF"]
    FOU = (1:x$nInd) %in% x$founders
    dlist = cbind(c(-1, -1), c(-1, 1), c(1, -1), c(1, 1))  #D=-1; N=1
    impossible = FALSE
    if (chromX) {
        glist = .build_genolist_X(x, marker, eliminate)
        if (attr(glist, "impossible")) {
            dat = list()
            attr(dat, "impossible") = TRUE
            return(dat)
        }

        SEX = x$pedigree[, "SEX"]
        dat = lapply(1:x$nInd, function(i) {
            switch(SEX[i], {
                hap = c(glist[[i]], -glist[[i]])
                prob = startprob_X(hap, model = x$model, afreq = afreq, sex = 1, aff = AFF[i],
                  founder = FOU[i])
                list(hap = hap[prob > 0], prob = prob[prob > 0])
            }, {
                gl = ncol(glist[[i]])
                hap = glist[[i]][, rep(1:gl, each = 4), drop = F] * dlist[, rep(1:4, times = gl),
                  drop = F]
                prob = startprob_X(hap, model = x$model, afreq = afreq, sex = 2, aff = AFF[i],
                  founder = FOU[i])
                if (sum(prob) == 0) impossible = TRUE
                list(hap = hap[, prob > 0, drop = F], prob = as.numeric(prob[prob > 0]))
            })
        })
    } else {
        glist = .build_genolist(x, marker, eliminate)
        if (attr(glist, "impossible")) {
            dat = list()
            attr(dat, "impossible") = TRUE
            return(dat)
        }

        dat = lapply(1:x$nInd, function(i) {
            gl = ncol(glist[[i]])
            hap = glist[[i]][, rep(1:gl, each = 4), drop = F] * dlist[, rep(1:4, times = gl),
                drop = F]
            prob = startprob(hap, model = x$model, afreq = afreq, aff = AFF[i], founder = FOU[i])
            if (sum(prob) == 0)
                impossible = TRUE
            list(hap = hap[, prob > 0, drop = F], prob = as.numeric(prob[prob > 0]))
        })
    }
    attr(dat, "impossible") = impossible
    dat
}


.startdata_MM = function(x, marker1, marker2, eliminate = 0) {

    startprob <- function(h, afreq1, afreq2, founder) {
        if (founder) {
            m1_1 = h[1, ]
            m1_2 = h[2, ]
            m2_1 = h[3, ]
            m2_2 = h[4, ]
            hetfact = ((m1_1 != m1_2 | m2_1 != m2_2) + 1)  #multiply with two if heteroz for at least 1 marker. If heteroz for both, then both phases are included in h, hence the factor 2 (not 4) in this case as well.
            prob = afreq1[m1_1] * afreq1[m1_2] * afreq2[m2_1] * afreq2[m2_2] * hetfact
            return(as.numeric(prob))
        }
        return(rep.int(1, ncol(h)))
    }

    startprob_X <- function(h, afreq1, afreq2, sex, founder) {
        if (founder && sex == 1)
            return(as.numeric(afreq1[h[1, ]] * afreq2[h[2, ]]))
        startprob(h, afreq1, afreq2, founder)
    }

    afreq1 = attr(marker1, "afreq")
    afreq2 = attr(marker2, "afreq")
    chromX = identical(attr(marker1, "chrom"), 23)
    impossible = FALSE

    if (chromX) {
        sex = x$pedigree[, "SEX"]
        m1_list = .build_genolist_X(x, marker1, eliminate)
        m2_list = .build_genolist_X(x, marker2, eliminate)
        if (attr(m1_list, "impossible") || attr(m2_list, "impossible")) {
            dat = list()
            attr(dat, "impossible") = TRUE
            return(dat)
        }

        dat = lapply(1:x$nInd, function(i) {
            sexi = sex[i]
            founder = i %in% x$founders
            h1 = m1_list[[i]]
            h2 = m2_list[[i]]
            if (sexi == 1)
                hap = rbind(rep(h1, each = length(h2)), rep(h2, times = length(h1)))  #matrix with two rows: m1, m2
 else {
                hl1 = dim(h1)[2]
                hl2 = dim(h2)[2]
                hap = rbind(h1[, rep(seq_len(hl1), each = hl2), drop = F], h2[, rep(seq_len(hl2),
                  , times = hl1), drop = FALSE])  #matrix with four rows: m1_1, m1_2, m2_1, m2_2
                if (founder) {
                  # doubly heterozygous founders: Include the other phase as well. (This is necessary since
                  # .build_genolist returns unordered genotypes for founders.)
                  doublyhet = hap[1, ] != hap[2, ] & hap[3, ] != hap[4, ]
                  if (any(doublyhet))
                    hap = cbind(hap, hap[c(1, 2, 4, 3), doublyhet, drop = FALSE])
                }
            }
            prob = startprob_X(hap, afreq1 = afreq1, afreq2 = afreq2, sex = sexi, founder = founder)
            keep = prob > 0
            if (!any(keep))
                impossible = TRUE
            list(hap = hap[, keep, drop = F], prob = as.numeric(prob[keep]))
        })
    } else {
        m1_list = .build_genolist(x, marker1, eliminate)
        m2_list = .build_genolist(x, marker2, eliminate)
        if (attr(m1_list, "impossible") || attr(m2_list, "impossible")) {
            dat = list()
            attr(dat, "impossible") = TRUE
            return(dat)
        }

        dat = lapply(1:x$nInd, function(i) {
            h1 = m1_list[[i]]
            hl1 = dim(h1)[2]
            h2 = m2_list[[i]]
            hl2 = dim(h2)[2]
            hap = rbind(h1[, rep(seq_len(hl1), each = hl2), drop = F], h2[, rep(seq_len(hl2),
                , times = hl1), drop = FALSE])  #matrix with four rows: m1_1, m1_2, m2_1, m2_2
            if (i %in% x$founders) {
                # doubly heterozygous founders: Include the other phase as well. (This is necessary since
                # .build_genolist returns unordered genotypes for founders.)
                doublyhet = hap[1, ] != hap[2, ] & hap[3, ] != hap[4, ]
                if (any(doublyhet))
                  hap = cbind(hap, hap[c(1, 2, 4, 3), doublyhet, drop = FALSE])
            }
            prob = startprob(hap, afreq1 = afreq1, afreq2 = afreq2, founder = (i %in% x$founders))
            keep = prob > 0
            if (!any(keep))
                impossible = TRUE
            list(hap = hap[, keep, drop = F], prob = as.numeric(prob[keep]))
        })
    }
    attr(dat, "impossible") = impossible
    dat
}


#### .BUILD_GENOLIST and ELIMINATE

.build_genolist <- function(x, marker, eliminate = 0) {
    # mm: marker matrix, dim = (nInd , 2)
    n = attr(marker, "nalleles")
    nseq = seq_len(n)

    .COMPLETE = {
        tmp1 = rep(nseq, each = n)
        tmp2 = rep.int(nseq, times = n)
        fath = c(tmp1, tmp2)
        moth = c(tmp2, tmp1)
        rbind(fath, moth, deparse.level = 0)[, !duplicated.default(fath * 1000 + moth), drop = F]  #faster than unique(m, MARGIN=2)
    }
    genolist = lapply(1:x$nInd, function(i) {
        g_i = marker[i, ]
        m = switch(sum(g_i == 0) + 1, {
            a = g_i[1]
            b = g_i[2]
            if (a == b) cbind(g_i, deparse.level = 0) else cbind(g_i, c(b, a), deparse.level = 0)
        }, {
            nz = g_i[g_i != 0]
            rbind(c(nseq, rep.int(nz, n - 1)), c(rep.int(nz, n), nseq[-nz]), deparse.level = 0)
        }, {
            .COMPLETE
        })
        if ((i %in% x$founders) && (!x$orig.ids[i] %in% x$loop_breakers[, 2]))
            m = m[, m[1, ] <= m[2, ], drop = FALSE]
        m
    })
    attr(genolist, "impossible") = FALSE

    # If mutations, don't eliminate any genotypes
    if (!is.null(attr(marker, "mutmat")))
        return(genolist)

    .eliminate(x, genolist, n, repeats = eliminate)
}


.eliminate = function(x, genolist, nall, repeats = 0) {
    if (repeats == 0 || attr(genolist, "impossible"))
        return(genolist)
    offs = lapply(1:x$nInd, function(i) offspring(x, i, original.id = FALSE))
    ncols_ny = unlist(lapply(genolist, ncol))
    p = x$pedigree
    informative = logical(x$nInd)
    for (k in seq_len(repeats)) {
        ncols = ncols_ny
        informative[x$founders] = (ncols[x$founders] < nall * (nall + 1)/2)
        informative[x$nonfounders] = (ncols[x$nonfounders] < nall^2)
        for (i in 1:x$nInd) {
            if (ncols[i] == 1)
                next
            g = genolist[[i]]
            kjonn = p[i, "SEX"]
            if (i %in% x$nonfounders && informative[far <- p[i, "FID"]])
                g = g[, g[1, ] %in% genolist[[far]][1, ] | g[1, ] %in% genolist[[far]][2, ],
                  drop = F]
            if (i %in% x$nonfounders && informative[mor <- p[i, "MID"]])
                g = g[, g[2, ] %in% genolist[[mor]][1, ] | g[2, ] %in% genolist[[mor]][2, ],
                  drop = F]
            barn = offs[[i]]
            for (b in barn[informative[barn]]) {
                g = g[, g[1, ] %in% genolist[[b]][kjonn, ] | g[2, ] %in% genolist[[b]][kjonn,
                  ], drop = F]
            }
            genolist[[i]] = g
        }
        ncols_ny = unlist(lapply(genolist, ncol))
        if (any(ncols_ny == 0)) {
            attr(genolist, "impossible") = TRUE
            return(genolist)
        }
        if (sum(ncols_ny) == sum(ncols))
            return(genolist)
    }
    genolist
}


.reduce_alleles = function(marker) {
    if (all(marker != 0))
        return(marker)  # no reduction needed (OK!)
    attrs = attributes(marker)

    if (!is.null(attrs$mutmat)) {
        malem = attrs$mutmat$male
        femalem = attrs$mutmat$female
        male_lump = identical(attr(malem, "lumpability"), "always")
        female_lump = identical(attr(femalem, "lumpability"), "always")
        if (!male_lump || !female_lump)
            return(marker)
    }
    orig_alleles = attrs$alleles

    # indices of observed alleles
    present = sort.int(setdiff(unique.default(as.numeric(marker)), 0))
    if (length(present) >= length(orig_alleles) - 1)
        return(marker)  # return unchanged if all, or all but one, are observed

    redund = setdiff(1:attrs$nalleles, present)
    dummylab = paste(orig_alleles[redund], collapse = "_")

    if (length(present) == 0) {
        new_marker = rep.int(0, length(marker))
        attributes(new_marker) = modifyList(attrs, list(alleles = dummylab, nalleles = 1, afreq = 1))
        if (!is.null(attrs$mutmat)) {
            mm = matrix(1, dimnames = list(dummylab, dummylab))
            attr(new_marker, "mutmat") = list(male = mm, female = mm)
        }
        return(new_marker)
    }

    new_marker = match(marker, present, nomatch = 0)
    new_alleles = c(orig_alleles[present], dummylab)
    present_freq = attrs$afreq[present]
    new_freq = c(present_freq, 1 - sum(present_freq))
    n = length(present) + 1

    attributes(new_marker) = modifyList(attrs, list(alleles = new_alleles, nalleles = n, afreq = new_freq))

    if (!is.null(attrs$mutmat)) {
        if (male_lump) {
            mm = malem[c(present, redund[1]), c(present, redund[1])]
            mm[, n] = 1 - rowSums(mm[, -n, drop = F])
        }
        if (female_lump) {
            mf = femalem[c(present, redund[1]), c(present, redund[1])]
            mf[, n] = 1 - rowSums(mf[, -n, drop = F])
        }
        # for(i in 1:(n-1)) { present_i = present[i] #m_weight = f_weight = attrs$afreq[redund]
        # m_weight = malem[present_i, redund] f_weight = femalem[present_i, redund] mm[n, i] =
        # (m_weight/sum(m_weight)) %*% malem[redund, present_i] mf[n, i] = (f_weight/sum(f_weight))
        # %*% femalem[redund, present_i] }
        attr(new_marker, "mutmat") = list(male = mm, female = mf)
    }
    new_marker
}


#------------X-linked-------------------

.build_genolist_X <- function(x, marker, eliminate) {
    # marker: marker matrix, dim = (nInd , 2).
    n = attr(marker, "nalleles")
    nseq = seq_len(n)

    .COMPLETE = {
        tmp1 = rep(nseq, each = n)
        tmp2 = rep.int(nseq, times = n)
        fath = c(tmp1, tmp2)
        moth = c(tmp2, tmp1)
        rbind(fath, moth, deparse.level = 0)[, !duplicated.default(fath * 1000 + moth), drop = F]  #faster than unique(m, MARGIN=2)
    }
    SEX = x$pedigree[, "SEX"]
    females = (1:x$nInd)[SEX == 2]
    genolist = list()
    genolist[SEX == 1 & marker[, 1] == 0] = list(nseq)
    genolist[SEX == 1 & marker[, 1] != 0] = marker[SEX == 1 & marker[, 1] != 0, 1]
    genolist[females] = lapply(females, function(i) {
        g_i = marker[i, ]
        m = switch(sum(g_i == 0) + 1, {
            a = g_i[1]
            b = g_i[2]
            if (a == b) cbind(g_i, deparse.level = 0) else cbind(g_i, c(b, a), deparse.level = 0)
        }, {
            nz = g_i[g_i != 0]
            rbind(c(nseq, rep.int(nz, n - 1)), c(rep.int(nz, n), nseq[-nz]), deparse.level = 0)
        }, {
            .COMPLETE
        })
        if ((i %in% x$founders) && (!x$orig.ids[i] %in% x$loop_breakers))
            m = m[, m[1, ] <= m[2, ], drop = FALSE]
        m
    })
    attr(genolist, "impossible") = FALSE

    # If mutations, don't eliminate any genotypes
    if (!is.null(attr(marker, "mutmat")))
        return(genolist)

    .eliminate_X(x, genolist, n, eliminate)
}


.eliminate_X = function(x, genolist, nall, repeats = 0) {
    if (repeats == 0 || attr(genolist, "impossible"))
        return(genolist)
    SEX = x$pedigree[, "SEX"]
    FID = x$pedigree[, "FID"]
    MID = x$pedigree[, "MID"]
    males = (1:x$nInd)[SEX == 1]
    females = (1:x$nInd)[SEX == 2]
    fem_fou = .myintersect(females, x$founders)
    fem_nonfou = .myintersect(females, x$nonfounders)
    offs = lapply(1:x$nInd, function(i) offspring(x, i, original.id = FALSE))
    p = x$pedigree
    informative = logical(x$nInd)
    ncols_ny = lengths(genolist)/SEX  #males are vectors, females matrices w/ 2 rows
    for (k in seq_len(repeats)) {
        ncols = ncols_ny
        informative[males] = (ncols[males] < nall)
        informative[fem_fou] = (ncols[fem_fou] < nall * (nall + 1)/2)
        informative[fem_nonfou] = (ncols[fem_nonfou] < nall^2)
        for (i in males) {
            if (ncols[i] == 1)
                next
            g = genolist[[i]]
            if (i %in% x$nonfounders && informative[mor <- MID[i]])
                g = g[g %in% genolist[[mor]][1, ] | g %in% genolist[[mor]][2, ]]
            barn = offs[[i]]
            for (b in barn[informative[barn] & SEX[barn] == 2]) g = g[g %in% genolist[[b]][1,
                ]]
            genolist[[i]] = g
        }
        for (i in females) {
            if (ncols[i] == 1)
                next
            g = genolist[[i]]
            if (i %in% x$nonfounders && informative[far <- FID[i]])
                g = g[, g[1, ] %in% genolist[[far]], drop = F]
            if (i %in% x$nonfounders && informative[mor <- MID[i]])
                g = g[, g[2, ] %in% genolist[[mor]][1, ] | g[2, ] %in% genolist[[mor]][2, ],
                  drop = F]
            barn = offs[[i]]
            for (b in barn[informative[barn]]) {
                if (SEX[b] == 1)
                  g = g[, g[1, ] %in% genolist[[b]] | g[2, ] %in% genolist[[b]], drop = F] else g = g[, g[1, ] %in% genolist[[b]][2, ] | g[2, ] %in% genolist[[b]][2,
                  ], drop = F]
            }
            genolist[[i]] = g
        }
        ncols_ny = lengths(genolist)/SEX
        if (any(ncols_ny == 0)) {
            attr(genolist, "impossible") = TRUE
            return(genolist)
        }
        if (sum(ncols_ny) == sum(ncols))
            return(genolist)
    }
    genolist
}


#### PEELING FUNCTIONS (one for each case: single Marker, Marker-Disease, Marker-Marker)

.peel_M <- function(dat, sub, chrom, SEX, mutmat = NULL) {
    far = sub[["father"]]
    mor = sub[["mother"]]
    offs = sub[["offspring"]]
    piv = sub[["pivot"]]
    pivtype = sub[["pivtype"]]
    if (pivtype == 3)
        offs = offs[offs != piv]  #pivtype indicates who is pivot: 0 = none; 1 = father; 2 = mother; 3 = an offspring
    farh = dat[[c(far, 1)]]
    morh = dat[[c(mor, 1)]]
    likel = dat[[c(far, 2)]] %*% t.default(dat[[c(mor, 2)]])
    dims = dim(likel)
    fa_len = dims[1L]
    mo_len = dims[2L]

    .trans_M <- function(parent, childhap, mutmat = NULL) {
        # parent = matrix with 2 rows; childhap = vector of any length (parental allele); mutmat =
        # mutation matrix
        if (is.null(mutmat))
            unlist(lapply(seq_len(ncol(parent)), function(i) ((parent[1, i] == childhap) +
                (parent[2, i] == childhap))/2)) else unlist(lapply(seq_len(ncol(parent)), function(i) (mutmat[parent[1, i], childhap] +
            mutmat[parent[2, i], childhap])/2))
    }

    switch(chrom, AUTOSOMAL = {
        for (datb in dat[offs]) {
            bh = datb[[1]]
            bp = datb[[2]]
            bl = length(bp)
            trans_pats = .trans_M(farh, bh[1, ], mutmat = mutmat$male)
            trans_mats = .trans_M(morh, bh[2, ], mutmat = mutmat$female)
            dim(trans_mats) = c(bl, mo_len)
            trans_mats_rep = as.numeric(do.call(rbind, rep(list(trans_mats), fa_len)))
            mm = .colSums((trans_pats * bp) * trans_mats_rep, bl, fa_len * mo_len)
            likel = likel * mm
        }
        if (pivtype == 0) return(sum(likel))

        res = switch(pivtype, .rowSums(likel, fa_len, mo_len), .colSums(likel, fa_len, mo_len),
            {
                pivh = dat[[c(piv, 1)]]
                pivp = dat[[c(piv, 2)]]
                pi_len = length(pivp)
                T = numeric(fa_len * mo_len * pi_len)
                dim(T) = c(fa_len, mo_len, pi_len)
                trans_pats = .trans_M(farh, pivh[1, ], mutmat = mutmat$male)
                dim(trans_pats) = c(pi_len, fa_len)
                trans_mats = .trans_M(morh, pivh[2, ], mutmat = mutmat$female)
                dim(trans_mats) = c(pi_len, mo_len)
                for (i in seq_len(fa_len)) {
                  transpat = trans_pats[, i]
                  for (j in seq_len(mo_len)) T[i, j, ] = transpat * trans_mats[, j]
                }
                arr = as.vector(T) * as.vector(likel)
                dim(arr) = dim(T)
                res = .colSums(arr, fa_len * mo_len, pi_len)  #sum for each entry of haps[[piv]]
                res * pivp
            })
        pivhap_update = dat[[c(piv, 1)]][, res > 0, drop = F]
    }, X = {
        for (b in offs) {
            datb = dat[[b]]
            bh = datb[[1]]
            bp = datb[[2]]
            bl = length(bp)
            switch(SEX[b], {
                trans_mats = .trans_M(morh, bh, mutmat = mutmat$female)
                mm = rep(.colSums(trans_mats * bp, bl, mo_len), each = fa_len)
            }, {
                trans_pats = if (is.null(mutmat)) unlist(lapply(farh, function(fh) as.numeric(fh ==
                  bh[1, ]))) else unlist(lapply(farh, function(fh) mutmat$male[fh, bh[1, ]]))

                trans_mats = .trans_M(morh, bh[2, ], mutmat = mutmat$female)
                dim(trans_mats) = c(bl, mo_len)
                trans_mats_rep = as.numeric(do.call(rbind, rep(list(trans_mats), fa_len)))
                mm = .colSums((trans_pats * bp) * trans_mats_rep, bl, fa_len * mo_len)
            })
            likel = likel * mm
        }
        if (pivtype == 0) return(sum(likel))

        res = switch(pivtype, .rowSums(likel, fa_len, mo_len), .colSums(likel, fa_len, mo_len),
            {
                pivh = dat[[c(piv, 1)]]
                pivp = dat[[c(piv, 2)]]
                pi_len = length(pivp)
                switch(SEX[piv], {
                  trans_mats = .trans_M(morh, pivh, mutmat = mutmat$female)
                  T = rep(trans_mats, each = fa_len)
                }, {
                  T = numeric(fa_len * mo_len * pi_len)
                  dim(T) = c(fa_len, mo_len, pi_len)
                  trans_mats = .trans_M(morh, pivh[2, ], mutmat = mutmat$female)
                  dim(trans_mats) = c(pi_len, mo_len)
                  for (i in seq_len(fa_len)) {
                    trans_pats = if (is.null(mutmat)) as.numeric(farh[i] == pivh[1, ]) else mutmat$male[farh[i],
                      pivh[1, ]]
                    T[i, , ] = t.default(trans_pats * trans_mats)  #TODO:make faster?
                  }
                })
                arr = as.vector(T) * as.vector(likel)
                dim(arr) = dim(T)
                res = .colSums(arr, fa_len * mo_len, pi_len)  #sum for each entry of haps[[piv]]
                res * pivp
            })
        pivhap_update = switch(SEX[piv], dat[[c(piv, 1)]][res > 0], dat[[c(piv, 1)]][, res >
            0, drop = F])
    })
    dat[[piv]] = list(hap = pivhap_update, prob = res[res > 0])
    if (sum(res) == 0)
        attr(dat, "impossible") = TRUE
    return(dat)
}


.peel_MD <- function(dat, sub, theta, chrom, SEX) {
    far = sub[["father"]]
    mor = sub[["mother"]]
    offs = nonpiv.offs = sub[["offspring"]]
    piv = sub[["pivot"]]
    pivtype = sub[["pivtype"]]
    if (pivtype == 3)
        non.pivoffs = offs[offs != piv]
    farh = dat[[c(far, 1)]]
    morh = dat[[c(mor, 1)]]
    likel = dat[[c(far, 2)]] %*% t.default(dat[[c(mor, 2)]])
    dims = dim(likel)
    fa_len = dims[1L]
    mo_len = dims[2L]

    .trans_MD <- function(parent_ph, child_haplo, theta) {
        if (theta == 0)
            return(((parent_ph[1] == child_haplo) + (parent_ph[2] == child_haplo))/2)
        parent_rec = abs(parent_ph) * sign(parent_ph[2:1])
        ((parent_ph[1] == child_haplo) * (1 - theta) + (parent_ph[2] == child_haplo) * (1 -
            theta) + (parent_rec[1] == child_haplo) * theta + (parent_rec[2] == child_haplo) *
            theta)/2
    }

    switch(chrom, AUTOSOMAL = {
        alloffs_pat = unique.default(unlist(lapply(offs, function(i) dat[[c(i, 1)]][1, ])))
        alloffs_mat = unique.default(unlist(lapply(offs, function(i) dat[[c(i, 1)]][2, ])))
        fathertrans = unlist(lapply(seq_len(fa_len), function(i) .trans_MD(farh[, i], alloffs_pat,
            theta)))
        mothertrans = unlist(lapply(seq_len(mo_len), function(j) .trans_MD(morh[, j], alloffs_mat,
            theta)))
        dim(fathertrans) = c(length(alloffs_pat), fa_len)
        dim(mothertrans) = c(length(alloffs_mat), mo_len)
        mm_init = numeric(fa_len * mo_len)
        dim(mm_init) = dims
        for (b in nonpiv.offs) {
            mm = mm_init
            bh = dat[[c(b, 1)]]
            bp = dat[[c(b, 2)]]
            b_pat = match(bh[1, ], alloffs_pat)
            b_mat = match(bh[2, ], alloffs_mat)
            for (i in seq_len(fa_len)) {
                trans_pats = fathertrans[b_pat, i]
                for (j in seq_len(mo_len)) mm[i, j] = (trans_pats * mothertrans[b_mat, j]) %*%
                  bp
            }
            likel = likel * mm
        }
        if (pivtype == 0) return(sum(likel))

        res = switch(pivtype, .rowSums(likel, fa_len, mo_len), .colSums(likel, fa_len, mo_len),
            {
                pivh = dat[[c(piv, 1)]]
                pivp = dat[[c(piv, 2)]]
                pi_len = length(pivp)
                piv_pat = match(pivh[1, ], alloffs_pat)
                piv_mat = match(pivh[2, ], alloffs_mat)
                T = numeric(fa_len * mo_len * pi_len)
                dim(T) = c(fa_len, mo_len, pi_len)
                for (i in seq_len(fa_len)) {
                  trans_pats = fathertrans[piv_pat, i]
                  for (j in seq_len(mo_len)) T[i, j, ] <- trans_pats * mothertrans[piv_mat,
                    j]
                }
                arr = as.vector(T) * as.vector(likel)
                dim(arr) = dim(T)
                res = .colSums(arr, fa_len * mo_len, pi_len)  #sum for each entry of haps[[piv]]
                res = res * pivp
            })
        pivhap_update = dat[[c(piv, 1)]][, res > 0, drop = F]
    }, X = {
        mm_init = numeric(length(likel))
        dim(mm_init) = dims
        for (b in nonpiv.offs) {
            mm = mm_init
            switch(SEX[b], for (j in seq_len(mo_len)) mm[, j] = .trans_MD(morh[, j], dat[[c(b,
                1)]], theta) %*% dat[[c(b, 2)]], {
                for (j in seq_len(mo_len)) {
                  transmother = .trans_MD(morh[, j], dat[[c(b, 1)]][2, ], theta)
                  for (i in seq_len(fa_len)) mm[i, j] = (as.numeric(farh[i] == dat[[c(b, 1)]][1,
                    ]) * transmother) %*% dat[[c(b, 2)]]
                }
            })
            likel = likel * mm
        }
        if (pivtype == 0) return(sum(likel))

        res = switch(pivtype, .rowSums(likel, fa_len, mo_len), .colSums(likel, fa_len, mo_len),
            {
                pivh = dat[[c(piv, 1)]]
                pivp = dat[[c(piv, 2)]]
                pi_len = length(pivp)
                T = numeric(fa_len * mo_len * pi_len)
                dim(T) = c(fa_len, mo_len, pi_len)
                switch(SEX[piv], {
                  for (j in seq_len(mo_len)) {
                    transmother = .trans_MD(morh[, j], pivh, theta)
                    for (i in seq_len(fa_len)) T[i, j, ] = transmother
                  }
                }, {
                  for (j in seq_len(mo_len)) {
                    transmother = .trans_MD(morh[, j], pivh[2, ], theta)
                    for (i in seq_len(fa_len)) T[i, j, ] = (as.numeric(farh[i] == pivh[1, ]) *
                      transmother)
                  }
                })

                arr = as.vector(T) * as.vector(likel)
                dim(arr) = dim(T)
                res = .colSums(arr, fa_len * mo_len, pi_len)  #sum for each entry of haps[[piv]]
                res * pivp
            })
        pivhap_update = switch(SEX[piv], dat[[c(piv, 1)]][res > 0], dat[[c(piv, 1)]][, res >
            0, drop = F])
    })
    dat[[piv]] = list(hap = pivhap_update, prob = res[res > 0])
    if (all(res == 0))
        attr(dat, "impossible") = TRUE
    return(dat)
}


.peel_MM <- function(dat, sub, theta, chrom, SEX) {
    far = sub[["father"]]
    mor = sub[["mother"]]
    offs = sub[["offspring"]]
    piv = sub[["pivot"]]
    pivtype = sub[["pivtype"]]
    if (pivtype == 3)
        offs = offs[offs != piv]

    farh = dat[[c(far, 1)]]
    morh = dat[[c(mor, 1)]]
    likel = dat[[c(far, 2)]] %*% t.default(dat[[c(mor, 2)]])
    dims = dim(likel)
    fa_len = dims[1L]
    mo_len = dims[2L]
    mm_init = numeric(length(likel))
    dim(mm_init) = dims

    .trans_MM <- function(parent.haps, gamete.hap, theta) {
        # parent.haps = c(M1_1, M1_2, M2_1, M2_2)
        if (is.matrix(gamete.hap))
            vapply(seq_len(ncol(gamete.hap)), function(kol) .trans_MM(parent.haps, gamete.hap[,
                kol], theta), 1) else sum(c(all(parent.haps[c(1, 3)] == gamete.hap), all(parent.haps[c(2, 4)] == gamete.hap),
            all(parent.haps[c(1, 4)] == gamete.hap), all(parent.haps[c(2, 3)] == gamete.hap)) *
            c(1 - theta, 1 - theta, theta, theta))/2
    }

    switch(chrom, AUTOSOMAL = {
        for (b in offs) {
            mm = mm_init
            bh = dat[[c(b, 1)]]
            bp = dat[[c(b, 2)]]
            for (i in seq_len(fa_len)) {
                transfather = .trans_MM(farh[, i], bh[c(1, 3), , drop = F], theta)
                for (j in seq_len(mo_len)) mm[i, j] = (transfather * .trans_MM(morh[, j], bh[c(2,
                  4), , drop = F], theta)) %*% bp
            }
            likel <- likel * mm
        }
        if (pivtype == 0) return(sum(likel))

        res = switch(pivtype, .rowSums(likel, fa_len, mo_len), .colSums(likel, fa_len, mo_len),
            {
                pivh = dat[[c(piv, 1)]]
                pivp = dat[[c(piv, 2)]]
                pi_len = length(pivp)
                T = numeric(fa_len * mo_len * pi_len)
                dim(T) = c(fa_len, mo_len, pi_len)
                for (i in seq_len(fa_len)) {
                  transfather = .trans_MM(farh[, i], pivh[c(1, 3), , drop = F], theta)
                  for (j in seq_len(mo_len)) T[i, j, ] <- transfather * .trans_MM(morh[, j],
                    pivh[c(2, 4), , drop = F], theta)
                }
                arr = as.vector(T) * as.vector(likel)
                dim(arr) = dim(T)
                res = .colSums(arr, fa_len * mo_len, pi_len)  #sum for each entry of haps[[piv]]
                res = res * pivp
            })
        pivhap_update = dat[[c(piv, 1)]][, res > 0, drop = F]
    }, X = {
        for (b in offs) {
            mm = mm_init
            bh = dat[[c(b, 1)]]
            bp = dat[[c(b, 2)]]
            if (SEX[b] == 1) for (j in seq_len(mo_len)) mm[, j] = .trans_MM(morh[, j], bh,
                theta) %*% bp else for (j in seq_len(mo_len)) {
                transmother = .trans_MM(morh[, j], bh[c(2, 4), , drop = F], theta)
                for (i in seq_len(fa_len)) mm[i, j] = (as.numeric(farh[1, i] == bh[1, ] & farh[2,
                  i] == bh[3, ]) * transmother) %*% bp
            }
            likel = likel * mm
        }
        if (pivtype == 0) return(sum(likel))

        res = switch(pivtype, .rowSums(likel, fa_len, mo_len), .colSums(likel, fa_len, mo_len),
            {
                pivh = dat[[c(piv, 1)]]
                pivp = dat[[c(piv, 2)]]
                pi_len = length(pivp)
                T = numeric(fa_len * mo_len * pi_len)
                dim(T) = c(fa_len, mo_len, pi_len)
                if (SEX[piv] == 1) {
                  for (j in seq_len(mo_len)) {
                    transmother = .trans_MM(morh[, j], pivh, theta)
                    for (i in seq_len(fa_len)) T[i, j, ] = transmother
                  }
                } else {
                  for (j in seq_len(mo_len)) {
                    transmother = .trans_MM(morh[, j], pivh[c(2, 4), ], theta)
                    for (i in seq_len(fa_len)) T[i, j, ] = (as.numeric(farh[1, i] == pivh[1,
                      ] & farh[2, i] == pivh[3, ]) * transmother)
                  }
                }
                arr = as.vector(T) * as.vector(likel)
                dim(arr) = dim(T)
                res = .colSums(arr, fa_len * mo_len, pi_len)  #sum for each entry of haps[[piv]]
                res * pivp
            })
        pivhap_update = dat[[c(piv, 1)]][, res > 0, drop = F]
    })
    dat[[piv]] = list(hap = pivhap_update, prob = res[res > 0])
    if (sum(res) == 0)
        attr(dat, "impossible") = TRUE
    return(dat)
}



###### OTHER AUXILIARY FUNCTIONS

.informative = function(x, marker, marker2 = NULL) {
    # Trim pedigree by removing leaves without genotypes, and also remove completely
    # uninformative subnucs.
    if (!is.null(marker2))
        marker = marker + marker2
    if (all(marker[, 1] > 0))
        return(list(subnucs = x$subnucs, newfounders = numeric(0)))
    newfounders = numeric(0)
    new_subnucs = list()
    p = x$pedigree
    is_miss = marker[, 1] == 0 & marker[, 2] == 0
    is_miss[loop_int <- .internalID(x, x$loop_breakers)] = F  # works (and quick) also if no loops.
    is_uninf_leaf = is_miss & !p[, "ID"] %in% p[, c("FID", "MID")]
    is_uninf_fou = is_miss & seq_len(x$nInd) %in% x$founders

    for (sub in x$subnucs) {
        fa = sub[["father"]]
        mo = sub[["mother"]]
        offs = sub[["offspring"]]
        pivot = sub[["pivot"]]
        sub[["offspring"]] = offs[!is_uninf_leaf[offs]]
        noffs = length(sub[["offspring"]])
        switch(sub[["pivtype"]], {
            if (noffs == 0 && is_uninf_fou[mo]) sub = NULL
        }, {
            if (noffs == 0 && is_uninf_fou[fa]) sub = NULL
        }, {
            if (noffs == 1 && is_uninf_fou[fa] && is_uninf_fou[mo]) {
                newfounders = c(newfounders, pivot)
                sub = NULL
            }
        })
        if (!is.null(sub)) {
            new_subnucs = c(new_subnucs, list(sub))
            is_uninf_fou[pivot] = FALSE  #added in v0.8-1 to correct a bug marking certain 'middle' subnucs uninformative
        }
    }
    list(subnucs = new_subnucs, newfounders = newfounders)
}

Try the paramlink package in your browser

Any scripts or data that you put into this service are public.

paramlink documentation built on April 15, 2022, 9:06 a.m.