qtl: Tools for Analyzing QTL Experiments

Documented in fitstahl

######################################################################
#
# fitstahl.R
#
# copyright (c) 2006-2019, Karl W Broman
# last modified Dec, 2019
# first written Aug, 2006
#
#     This program is free software; you can redistribute it and/or
#     modify it under the terms of the GNU General Public License,
#     version 3, as published by the Free Software Foundation.
#
#     This program is distributed in the hope that it will be useful,
#     but without any warranty; without even the implied warranty of
#     merchantability or fitness for a particular purpose.  See the GNU
#     General Public License, version 3, for more details.
#
#     A copy of the GNU General Public License, version 3, is available
#     at http://www.r-project.org/Licenses/GPL-3
#
# Part of the R/qtl package
# Contains: fitstahl, fitstahl.estp, fitstahl.este, fitstahl.estpe,
#           fitstahl.estp.sub, fitstahl.este.sub, fitstahl.estpe.sub
#
######################################################################

######################################################################
# fitstahl
#
# Fit the Stahl model for crossover interference (or the chi-square
# model, which is a special case)
#
# cross: the cross object
#
# chr:   Chromosome(s) to use; if unspecified, pooled estimates for
#        all chromosomes are obtained
#
# m:     Interference parameter (a non-negative integer); if unspecified,
#        this is estimated
#
# p:     The proportion of chiasmata coming from the no interference
#        mechanism in the Stahl model (0 <= p <= 1).  p=0 gives the
#        chi-square model.  If unspecified, this is estimated
#
# error.prob  The genotyping error probability.  If = NULL, it is
#             estimated
#
# NOTE: If m, p, error.prob are all specified, they can be vectors
#       or have length 1; any with length > 1 should all have the
#       same length
#
# maxit  Maximum number of iterations
#
# tol    Tolerance for convergence
#
# maxm   Maximum value of m to consider, if m is unspecified.
#
######################################################################

fitstahl <-
    function(cross, chr, m, p, error.prob=0.0001, maxit=4000, tol=1e-4,
             maxm=15, verbose=TRUE)
{
    if(!inherits(cross, "cross"))
        stop("Input should have class \"cross\".")

    type <- crosstype(cross)
    if(type != "bc" && type != "f2")
        stop("fitstahl only working for backcrosses and intercrosses.")

    if(!missing(chr)) cross <- subset(cross, chr)

    if(!missing(error.prob) && any(error.prob > 0.5))
        warning("You probably want to use error.prob <= 0.5.")

    if(!missing(m)) { # m was specified
        if(!missing(p)) { # p was specified

            # m, p, error.prob all specified
            if(!is.null(error.prob)) {

                n <- c(length(m), length(p), length(error.prob))
                mn <- max(n)
                if(mn > 1) {
                    if(any(n > 1 & n < mn))
                        stop("Any m, p, error.prob with length > 1 must have same length")
                    if(length(m) == 1) m <- rep(m, mn)
                    if(length(p) == 1) p <- rep(p, mn)
                    if(length(error.prob) == 1) error.prob <- rep(error.prob, mn)
                }

                result <- matrix(ncol=4, nrow=mn)
                colnames(result) <- c("m", "p", "error.prob", "loglik")
                for(i in 1:mn) {
                    temp <- fitstahl.estp.sub(p[i], cross, error.prob[i],
                                              m[i], tol, maxit)
                    result[i,] <- c(m[i], p[i], error.prob[i], as.numeric(temp))
                    cross <- replace.map(cross, attr(temp, "map"))
                    if(verbose) cat(i,result[i,], "\n")
                }
            }

            # m,p specified but error.prob wasn't
            else {
                if(length(m) == 1)
                    m <- rep(m, length(p))
                else {
                    if(length(p) == 1)
                        p <- rep(p, length(m))
                    else if(length(m) != length(p))
                        stop("Any m, p, error.prob with length > 1 must have same length")
                }
                mn <- length(m)

                result <- matrix(ncol=4, nrow=mn)
                colnames(result) <- c("m", "p", "error.prob", "loglik")
                for(i in 1:mn) {
                    temp <- fitstahl.este(cross, m[i], p[i], tol, maxit)
                    cross <- replace.map(cross, attr(temp, "map"))
                    result[i,] <- c(m[i], p[i], temp$est, temp$loglik)
                    if(verbose) cat(i, result[i,], "\n")
                }
            }
        }
        else {
            # m, error.prob specified; p unspecified
            if(!is.null(error.prob)) {
                if(length(m) == 1)
                    m <- rep(m, length(error.prob))
                else {
                    if(length(error.prob) == 1)
                        error.prob <- rep(error.prob, length(m))
                    else if(length(m) != length(error.prob))
                        stop("Any m, p, error.prob with length > 1 must have same length")
                }
                mn <- length(m)

                result <- matrix(ncol=4, nrow=mn)
                colnames(result) <- c("m", "p", "error.prob", "loglik")
                for(i in 1:mn) {
                    temp <- fitstahl.estp(cross, error.prob[i], m[i], tol, maxit)
                    cross <- replace.map(cross, attr(temp, "map"))
                    result[i,] <- c(m[i], temp$est, error.prob[i], temp$loglik)
                    if(verbose) cat(i, result[i,], "\n")
                }
            }

            # only m specified
            else {
                result <- matrix(ncol=4, nrow=length(m))
                colnames(result) <- c("m", "p", "error.prob", "loglik")
                for(i in 1:length(m)) {
                    temp <- fitstahl.estpe(cross, m[i], tol, maxit)
                    cross <- replace.map(cross, attr(temp, "map"))
                    result[i,] <- c(m[i], temp$est, temp$loglik)
                    if(verbose) cat(i, result[i,], "\n")
                }
            }

        }
    }
    else { # m unspecified
        if(!missing(p)) { # p was specified

            # p, e specified
            if(!is.null(error.prob)) {
                if(length(p) == 1)
                    p <- rep(p, length(error.prob))
                else {
                    if(length(error.prob) == 1)
                        error.prob <- rep(error.prob, length(p))
                    else if(length(p) != length(error.prob))
                        stop("Any m, p, error.prob with length > 1 must have same length")
                }
                mn <- length(p)

                result <- matrix(ncol=4, nrow=mn)
                colnames(result) <- c("m", "p", "error.prob", "loglik")
                for(i in 1:mn) {
                    # fit the case m=0
                    maxll <- fitstahl.estp.sub(p[i], cross, error.prob[i], 0, tol, maxit)
                    cross <- replace.map(cross, attr(maxll, "map"))
                    themax <- 0
                    if(verbose) cat(i, 0, maxll, "\n")
                    for(j in 1:maxm) {
                        curll <- fitstahl.estp.sub(p[i], cross, error.prob[i], j, tol, maxit)
                        cross <- replace.map(cross, attr(curll, "map"))
                        if(verbose) cat(i, j, curll, "\n")
                        if(curll < maxll) break
                        if(curll > maxll) {
                            maxll <- curll
                            themax <- j
                        }
                    }
                    result[i,] <- c(themax, p[i], error.prob[i], maxll)
                    if(verbose) cat(i, result[i,], "\n")
                }
            }

            # only p specified
            else {
                mn <- length(p)

                result <- matrix(ncol=4, nrow=mn)
                colnames(result) <- c("m", "p", "error.prob", "loglik")
                for(i in 1:mn) {
                    # fit the case m=0
                    temp <- fitstahl.este(cross, 0, p[i], tol, maxit)
                    cross <- replace.map(cross, attr(temp, "map"))
                    maxll <- temp$loglik
                    themax <- 0
                    themaxe <- temp$est
                    if(verbose) cat(i, 0, 0, temp$est, maxll, "\n")
                    for(j in 1:maxm) {
                        temp <- fitstahl.este(cross, j, p[i], tol, maxit)
                        cross <- replace.map(cross, attr(temp, "map"))
                        curll <- temp$loglik
                        if(verbose) cat(i, j, p[i], temp$est, curll, "\n")
                        if(curll < maxll) break
                        if(curll > maxll) {
                            maxll <- curll
                            themax <- j
                            themaxe <- temp$est
                        }
                    }
                    result[i,] <- c(themax, p[i], themaxe, maxll)
                    if(verbose) cat(i, result[i,], "\n")
                }
            }
        }
        else {
            if(!is.null(error.prob)) { # error.prob specified; p unspecified
                mn <- length(error.prob)

                result <- matrix(ncol=4, nrow=mn)
                colnames(result) <- c("m", "p", "error.prob", "loglik")
                for(i in 1:mn) {
                    # fit the case m=0 (in which case p doesn't matter)
                    maxll <- fitstahl.estp.sub(0, cross, error.prob[i], 0, tol, maxit)
                    cross <- replace.map(cross, attr(maxll, "map"))
                    themax <- 0
                    themaxp <- 0
                    if(verbose) cat(i, 0, 0, error.prob[i], maxll, "\n")
                    for(j in 1:maxm) {
                        temp <- fitstahl.estp(cross, error.prob[i], j, tol, maxit)
                        cross <- replace.map(cross, attr(temp, "map"))
                        curll <- temp$loglik
                        if(verbose) cat(i, j, temp$est, error.prob[i], curll, "\n")
                        if(curll < maxll) break
                        if(curll > maxll) {
                            maxll <- curll
                            themax <- j
                            themaxp <- temp$est
                        }
                    }
                    result[i,] <- c(themax, themaxp, error.prob[i], maxll)
                    if(verbose) cat(i, result[i,], "\n")
                }
            }

            else { # nothing specified

                result <- matrix(ncol=4, nrow=1)
                colnames(result) <- c("m", "p", "error.prob", "loglik")
                # fit the case m=0 (in which case p doesn't matter)
                temp <- fitstahl.este(cross, 0, 0, tol, maxit)
                cross <- replace.map(cross, attr(temp, "map"))
                maxll <- temp$loglik
                themax <- 0
                themaxest <- c(0, temp$est)
                if(verbose) cat(0, 0, temp$est, maxll, "\n")
                for(j in 1:maxm) {
                    temp <- fitstahl.estpe(cross, j, tol, maxit)
                    cross <- replace.map(cross, attr(temp, "map"))
                    curll <- temp$loglik
                    if(verbose) cat(j, temp$est, curll, "\n")
                    if(curll < maxll) break
                    if(curll > maxll) {
                        maxll <- curll
                        themax <- j
                        themaxest <- temp$est
                    }
                }
                result[1,] <- c(themax, themaxest, maxll)
                if(verbose) cat(1, result[1,], "\n")
            }
        }
    }
    as.data.frame(result, stringsAsFactors=TRUE)
}



######################################################################
# fitstahl.estp:  estimate p for fixed m and error.prob
######################################################################
fitstahl.estp <-
    function(cross, error.prob=0.0001, m=0, tol=1e-4, maxit=4000)
{
    out <- optimize(fitstahl.estp.sub, interval=c(0,1), maximum=TRUE,
                    cross=cross, m=m, error.prob=error.prob, thetol=tol,
                    maxit=maxit)

    # make sure we fit p=0
    temp <- fitstahl.estp.sub(0, cross, error.prob, m, tol, maxit)

    if(temp >= out[[2]]) {
        est <- 0
        loglik <- temp
        themap <- attr(temp, "map")
    }
    else {
        est <- out[[1]]
        loglik <- out[[2]]
        themap <- attr(out[[2]], "map")
    }
    out <- list(est=est, loglik=loglik)
    attr(out, "map") <- themap
    out
}

fitstahl.estp.sub <-
    function(p, cross, error.prob=0.0001, m=0, thetol=1e-4, maxit=4000)
{
    newmap <- est.map(cross, error.prob=error.prob, m=m, p=p, tol=thetol,
                      maxit=maxit)
    out <- sum(sapply(newmap, function(a) attr(a, "loglik")))

    attr(out, "map") <- newmap
    out
}

######################################################################
# fitstahl.este: estimate error.prob for fixed m and p
######################################################################
fitstahl.este <-
    function(cross, m=0, p=0, tol=1e-4, maxit=4000)
{
    out <- optimize(fitstahl.este.sub, interval=c(0,0.5), maximum=TRUE,
                    cross=cross, m=m, p=p, thetol=tol, maxit=maxit)

    # make sure we fit error.prob=0
    temp <- fitstahl.este.sub(0, cross, m, p, tol, maxit)
    if(temp >= out[[2]]) {
        est <- 0
        loglik <- temp
        themap <- attr(temp, "map")
    }
    else {
        est <- out[[1]]
        loglik <- out[[2]]
        themap <- attr(out[[2]], "map")
    }

    out <- list(est=est, loglik=loglik)
    attr(out, "map") <- themap
    out
}

fitstahl.este.sub <-
    function(error.prob, cross, m=0, p=0, thetol=1e-4, maxit=4000)
{
    newmap <- est.map(cross, error.prob=error.prob, m=m, p=p, tol=thetol,
                      maxit=maxit)
    out <- sum(sapply(newmap, function(a) attr(a, "loglik")))
    attr(out, "map") <- newmap
    out
}


######################################################################
# fitstahl.estpe: estimate p and error.prob for fixed m
######################################################################
fitstahl.estpe <-
    function(cross, m=0, tol=1e-4, maxit=4000)
{
    if(m==0) { # don't need to estimate p
        out <- fitstahl.este(cross, 0, 0, tol, maxit)
        out$est <- c(0, out$est)
        return(out)
    }

    out <- optim(c(0.1,0.01), fitstahl.estpe.sub, method="L-BFGS-B",
                 lower=c(0,0), upper=c(1,0.5),
                 control=list(fnscale=-1, maxit=maxit, factr=tol/1e-15),
                 cross=cross, m=m, thetol=tol, maxit=maxit)

    if(out$convergence !=0)
        warning(" Didn't converge.")

    # make sure we fit the case p=0
    temp <- fitstahl.este(cross, m, 0, tol, maxit)
    templl <- temp$loglik
    if(templl >= out$value) {
        est <- c(0, temp$est)
        loglik <- templl
        themap <- attr(temp, "map")
    }
    else {
        est <- out$par
        loglik <- out$value
        themap <- est.map(cross, error.prob=out$par[2], m=m, p=out$par[1],
                          maxit=maxit, tol=tol)
    }
    names(est) <- c("p", "error.prob")
    out <- list(est=est, loglik=loglik)
    attr(out, "map") <- themap
    out
}

fitstahl.estpe.sub <-
    function(x, cross, m=0, thetol=1e-4, maxit=4000)
{
    newmap <- est.map(cross, error.prob=x[2], m=m, p=x[1], tol=thetol,
                      maxit=maxit)
    out <- sum(sapply(newmap, function(a) attr(a, "loglik")))
    attr(out, "map") <- newmap
    out
}

# end of fitstahl.R