R/fitqtl.R
In qtl: Tools for Analyzing QTL Experiments

Documented in checkformula deparseQTLformula fitqtl fitqtlengine parseformula print.summary.fitqtl summary.fitqtl

######################################################################
#
# fitqtl.R
#
# copyright (c) 2002-2019, Hao Wu and Karl W. Broman
# last modified Dec, 2019
# first written Apr, 2002
#
#     This program is free software; you can redistribute it and/or
#     modify it under the terms of the GNU General Public License,
#     version 3, as published by the Free Software Foundation.
#
#     This program is distributed in the hope that it will be useful,
#     but without any warranty; without even the implied warranty of
#     merchantability or fitness for a particular purpose.  See the GNU
#     General Public License, version 3, for more details.
#
#     A copy of the GNU General Public License, version 3, is available
#     at http://www.r-project.org/Licenses/GPL-3
#
# Part of the R/qtl package
# Contains: fitqtl, fitqtlengine, parseformula, summary.fitqtl,
#           print.summary.fitqtl, mybinaryrep, deparseQTLformula
#           printQTLformulanicely
#
######################################################################

######################################################################
#
# This is the function to fit a model and generate some tables
#
# Only Haley-Knott regression and multiple imputation are implemented.
#
######################################################################

fitqtl <-
    function(cross, pheno.col=1, qtl, covar=NULL, formula, method=c("imp", "hk"),
             model=c("normal", "binary"), dropone=TRUE, get.ests=FALSE,
             run.checks=TRUE, tol=1e-4, maxit=1000, forceXcovar=FALSE)
{
    # some input checking stuff in here
    if( !inherits(cross, "cross") )
        stop("The cross argument must be an object of class \"cross\".")

    if( !inherits(qtl, "qtl") )
        stop("The qtl argument must be an object of class \"qtl\".")

    if(!is.null(covar) && !is.data.frame(covar)) {
        if(is.matrix(covar) && is.numeric(covar))
            covar <- as.data.frame(covar, stringsAsFactors=TRUE)
        else stop("covar should be a data.frame")
    }

    if(LikePheVector(pheno.col, nind(cross), nphe(cross))) {
        cross$pheno <- cbind(pheno.col, cross$pheno)
        pheno.col <- 1
    }

    if(length(pheno.col) > 1) {
        pheno.col <- pheno.col[1]
        warning("fitqtl can take just one phenotype; only the first will be used")
    }

    if(is.character(pheno.col)) {
        num <- find.pheno(cross, pheno.col)
        if(is.na(num))
            stop("Couldn't identify phenotype \"", pheno.col, "\"")
        pheno.col <- num
    }

    if(pheno.col < 1 | pheno.col > nphe(cross))
        stop("pheno.col values should be between 1 and the no. phenotypes")

    pheno <- cross$pheno[,pheno.col]
    if(!is.null(covar) && nrow(covar) != length(pheno))
        stop("nrow(covar) != no. individuals in cross.")

    method <- match.arg(method)
    model <- match.arg(model)

    if(model=="binary" && any(!is.na(pheno) & pheno != 0 & pheno != 1))
        stop("For model=\"binary\", phenotypes must by 0 or 1.")

    # allow formula to be a character string
    if(!missing(formula) && is.character(formula))
        formula <- as.formula(formula)

    if(method=="imp") {
        if(!("geno" %in% names(qtl))) {
            if("prob" %in% names(qtl)) {
                warning("The qtl object doesn't contain imputations; using method=\"hk\".")
                method <- "hk"
            }
            else
                stop("The qtl object needs to be created with makeqtl with what=\"draws\".")
        }
    }
    else {
        if(!("prob" %in% names(qtl))) {
            if("geno" %in% names(qtl)) {
                warning("The qtl object doesn't contain QTL genotype probabilities; using method=\"imp\".")
                method <- "imp"
            }
            else
                stop("The qtl object needs to be created with makeqtl with what=\"prob\".")
        }
    }

    if(qtl$n.ind != nind(cross)) {
        warning("No. individuals in qtl object doesn't match that in the input cross; re-creating qtl object.")
        if(method=="imp")
            qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="draws")
        else
            qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="prob")
    }
    if(method=="imp" && dim(qtl$geno)[3] != dim(cross$geno[[1]]$draws)[3])  {
        warning("No. imputations in qtl object doesn't match that in the input cross; re-creating qtl object.")
        qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="draws")
    }

    fitqtlengine(pheno=pheno, qtl=qtl, covar=covar, formula=formula,
                 method=method, model=model, dropone=dropone, get.ests=get.ests,
                 run.checks=run.checks, cross.attr=attributes(cross), crosstype=crosstype(cross),
                 sexpgm=getsex(cross), tol=tol, maxit=maxit, forceXcovar=forceXcovar)
}


fitqtlengine <-
    function(pheno, qtl, covar=NULL, formula, method=c("imp", "hk"),
             model=c("normal", "binary"),
             dropone=TRUE, get.ests=FALSE, run.checks=TRUE, cross.attr,
             crosstype, sexpgm, tol, maxit, forceXcovar=FALSE)
{
    model <- match.arg(model)
    method <- match.arg(method)

    # local variables
    n.ind <- qtl$n.ind # number of individuals
    n.qtl <- qtl$n.qtl # number of selected markers
    n.gen <- qtl$n.gen # number of genotypes
    if(method=="imp")
        n.draws <- dim(qtl$geno)[3] # number of draws

    if( is.null(covar) )  # number of covariates
        n.covar <- 0
    else
        n.covar <- ncol(covar)

    # if formula is missing, build one
    # all QTLs and covariates will be additive by default
    if(missing(formula)) {
        tmp.Q <- paste("Q", 1:n.qtl, sep="") # QTL term names
        formula <- "y~Q1"
        if(n.qtl > 1)
            for (i in 2:n.qtl)
                formula <- paste(formula, tmp.Q[i], sep="+")
        if (n.covar) { # if covariate is not empty
            tmp.C <- colnames(covar) # covariate term names
            for(i in 1:n.covar)
                formula <- paste(formula, tmp.C[i], sep="+")
        }
        formula <- as.formula(formula)
    }

    # check input formula
    if(run.checks) {
        formula <- checkformula(formula, qtl$altname, colnames(covar))

        if(qtl$n.ind != length(pheno))
            stop("No. individuals in qtl object doesn't match length of input phenotype.")

        # drop covariates that are not in the formula
        if(!is.null(covar)) {
            theterms <- rownames(attr(terms(formula), "factors"))
            m <- match(colnames(covar), theterms)
            if(all(is.na(m))) covar <- NULL
            else covar <- covar[,!is.na(m),drop=FALSE]
        }

        # check phenotypes and covariates; drop ind'ls with missing values
        if(!is.null(covar)) phcovar <- cbind(pheno, covar)
        else phcovar <- as.data.frame(pheno, stringsAsFactors=TRUE)
        if(any(is.na(phcovar))) {
            if(ncol(phcovar)==1) hasmissing <- is.na(phcovar)
            else hasmissing <- apply(phcovar, 1, function(a) any(is.na(a)))
            if(all(hasmissing))
                stop("All individuals are missing phenotypes or covariates.")
            if(any(hasmissing)) {
                warning("Dropping ", sum(hasmissing), " individuals with missing phenotypes.\n")
                pheno <- pheno[!hasmissing]
                n.ind <- qtl$n.ind <- sum(!hasmissing)
                if(method=="imp")
                    qtl$geno <- qtl$geno[!hasmissing,,,drop=FALSE]
                else
                    qtl$prob <- lapply(qtl$prob, function(a) a[!hasmissing,,drop=FALSE])

                if(!is.null(covar)) covar <- covar[!hasmissing,,drop=FALSE]

                # subset sexpgm
                for(i in seq(along=sexpgm))
                    if(!is.null(sexpgm[[i]]))
                        sexpgm[[i]] <- sexpgm[[i]][!hasmissing]
            }
        }
    }

    # parse the input formula
    p <- parseformula(formula, qtl$altname, colnames(covar))
    # make an array n.gen.QC to represent the genotype numbers
    # for all input QTLs and covariates. For covariates the
    # number of genotyps is 1. This makes programming easier
    n.gen.QC <- c(n.gen[p$idx.qtl]-1, rep(1, p$n.covar))

    # covariates to be passed to C function
    # This is done in case of that user input covar but has no covar in formula
    covar.C <- NULL
    if(!is.null(p$idx.covar))
        covar.C <- as.matrix(covar[,p$idx.covar,drop=FALSE])

    sizefull <- 1+sum(n.gen.QC)
    if(p$n.int > 0) {
        form <- p$formula.intmtx*n.gen.QC
        if(!is.matrix(form)) {
            sizefull <- sizefull + prod(form[form!=0])
        }
        else {
            form <- apply(form,2,function(a) prod(a[a != 0]))
            sizefull <- sizefull + sum(form)
        }
    }

    if(method != "imp") {
        # form genotype probabilities as a matrix
        prob <- matrix(ncol=sum(qtl$n.gen[p$idx.qtl]), nrow=n.ind)

        curcol <- 0
        for(i in p$idx.qtl) {
            prob[,curcol+1:n.gen[i]] <- qtl$prob[[i]]
            curcol <- curcol + n.gen[i]
        }
    }

    Xadjustment <- scanoneXnull(crosstype, sexpgm, cross.attr)
    n.origcovar <- p$n.covar
    if((sum(qtl$chrtype[p$idx.qtl]=="X") >= 1 || forceXcovar) && Xadjustment$adjustX) { # need to include X chromosome covariates
        adjustX <- TRUE

        n.newcovar <- ncol(Xadjustment$sexpgmcovar)
        n.gen.QC <- c(n.gen.QC, rep(1, n.newcovar))
        p$n.covar <- p$n.covar + n.newcovar
        covar.C <- cbind(covar.C, Xadjustment$sexpgmcovar)
        sizefull <- sizefull + n.newcovar

        if(p$n.int==1)
            p$formula.intmtx <- c(p$formula.intmtx, rep(0,n.newcovar))
        if(p$n.int>1) {
            for(i in 1:n.newcovar)
                p$formula.intmtx <- rbind(p$formula.intmtx, rep(0,p$n.int))
        }
    }
    else adjustX <- FALSE

    # call C function to do the genome scan
    if(model=="normal") {
        if(method == "imp") {
            z <- .C("R_fitqtl_imp",
                    as.integer(n.ind), # number of individuals
                    as.integer(p$n.qtl), # number of qtls
                    as.integer(n.gen.QC), # number of genotypes QTLs and covariates
                    as.integer(n.draws), # number of draws
                    as.integer(qtl$geno[,p$idx.qtl,]), # genotypes for selected marker
                    as.integer(p$n.covar), # number of covariate
                    as.double(covar.C), # covariate
                    as.integer(p$formula.intmtx),  # formula matrix for interactive terms
                    as.integer(p$n.int), # number of interactions in the formula
                    as.double(pheno), # phenotype
                    as.integer(get.ests), # get estimates?
                    # return variables
                    lod=as.double(0), # LOD score
                    df=as.integer(0), # degree of freedom
                    ests=as.double(rep(0,sizefull)),
                    ests.cov=as.double(rep(0,sizefull*sizefull)),
                    design.mat=as.double(rep(0,sizefull*n.ind)),
                    matrix.rank=as.integer(0), # on return, minimum of matrix rank across imputations
                    resid=as.double(rep(0,n.ind)), # on return, the average residuals across imputations
                    PACKAGE="qtl")
        }
        else {
            z <- .C("R_fitqtl_hk",
                    as.integer(n.ind), # number of individuals
                    as.integer(p$n.qtl), # number of qtls
                    as.integer(n.gen.QC), # number of genotypes QTLs and covariates
                    as.double(prob),      # QTL genotype probabilities
                    as.integer(p$n.covar), # number of covariate
                    as.double(covar.C), # covariates
                    as.integer(p$formula.intmtx),  # formula matrix for interactive terms
                    as.integer(p$n.int), # number of interactions in the formula
                    as.double(pheno), # phenotype
                    as.integer(get.ests), # get estimates?
                    # return variables
                    lod=as.double(0), # LOD score
                    df=as.integer(0), # degree of freedom
                    ests=as.double(rep(0,sizefull)),
                    ests.cov=as.double(rep(0,sizefull*sizefull)),
                    design.mat=as.double(rep(0,sizefull*n.ind)),
                    matrix.rank=as.integer(0), # on return, rank of matrix
                    resid=as.double(rep(0,n.ind)), # on return, residuals from the fit
                    PACKAGE="qtl")
        }
    }
    else {
        if(method=="imp") {
            z <- .C("R_fitqtl_imp_binary",
                    as.integer(n.ind), # number of individuals
                    as.integer(p$n.qtl), # number of qtls
                    as.integer(n.gen.QC), # number of genotypes QTLs and covariates
                    as.integer(n.draws), # number of draws
                    as.integer(qtl$geno[,p$idx.qtl,]), # genotypes for selected marker
                    as.integer(p$n.covar), # number of covariate
                    as.double(covar.C), # covariate
                    as.integer(p$formula.intmtx),  # formula matrix for interactive terms
                    as.integer(p$n.int), # number of interactions in the formula
                    as.double(pheno), # phenotype
                    as.integer(get.ests), # get estimates?
                    # return variables
                    lod=as.double(0), # LOD score
                    df=as.integer(0), # degree of freedom
                    ests=as.double(rep(0,sizefull)),
                    ests.cov=as.double(rep(0,sizefull*sizefull)),
                    design.mat=as.double(rep(0,sizefull*n.ind)),
                    as.double(tol),
                    as.integer(maxit),
                    matrix.rank=as.integer(0), # on return, minimum of matrix rank across imputations
                    PACKAGE="qtl")
        }
        else {
            z <- .C("R_fitqtl_hk_binary",
                    as.integer(n.ind), # number of individuals
                    as.integer(p$n.qtl), # number of qtls
                    as.integer(n.gen.QC), # number of genotypes QTLs and covariates
                    as.double(prob),      # QTL genotype probabilities
                    as.integer(p$n.covar), # number of covariate
                    as.double(covar.C), # covariates
                    as.integer(p$formula.intmtx),  # formula matrix for interactive terms
                    as.integer(p$n.int), # number of interactions in the formula
                    as.double(pheno), # phenotype
                    as.integer(get.ests), # get estimates?
                    # return variables
                    lod=as.double(0), # LOD score
                    df=as.integer(0), # degree of freedom
                    ests=as.double(rep(0,sizefull)),
                    ests.cov=as.double(rep(0,sizefull*sizefull)),
                    design.mat=as.double(rep(0,sizefull*n.ind)),
                    # convergence
                    as.double(tol),
                    as.integer(maxit),
                    matrix.rank=as.integer(0), # on return, rank of matrix
                    PACKAGE="qtl")
        }

    }
    matrix.rank <- z$matrix.rank
    matrix.ncol <- sizefull
    residuals <- z$resid

    if(get.ests) {
        # first, construct the new design matrix
        #  X = the matrix used in the C coe
        #  Z = the matrix we want
        thenames <- qtl$name[p$idx.qtl]
        if(n.covar > 0) thenames <- c(thenames,names(covar)[p$idx.covar])

        ests <- z$ests
        ests.cov <- matrix(z$ests.cov,ncol=sizefull)

        if(adjustX) {
            keep <- 1:(length(ests)-n.newcovar)
            ests <- ests[keep]
            ests.cov <- ests.cov[keep,keep]
        }

        if(any(qtl$n.gen[p$idx.qtl]>=4)) {
            type <- crosstype
            if(type == "4way")
                genotypes <- c("AC","BC","AD","BD")
            else {
                genotypes <- cross.attr$genotypes
                if(is.null(genotypes))
                    genotypes <- as.character(1:max(qtl$n.gen))
            }

            # just attach rownames for this case
            thenames <- "Intercept"

            if(length(p$idx.qtl) > 0) { # qtl names
                qtlnames <- vector("list", length(p$idx.qtl))
                names(qtlnames) <- paste("Q", 1:length(p$idx.qtl), sep="")
                for(i in seq(along=p$idx.qtl)) {
                    qtlnames[[i]] <- paste(qtl$name[p$idx.qtl[i]], genotypes[2:qtl$n.gen[p$idx.qtl[i]]], sep=".")
                    thenames <- c(thenames, qtlnames[[i]])
                }
            }

            if(p$n.covar > 0) {
                covnames <- colnames(covar)[p$idx.covar]
                thenames <- c(thenames, covnames)
            }

            if(p$n.int > 0) { # interactions
                if(!is.matrix(p$formula.intmtx)) {
                    nam <- names(p$formula.intmtx)
                    p$formula.intmtx <- matrix(p$formula.intmtx, nrow=p$n.int)
                    colnames(p$formula.intmtx) <- nam
                }

                for(i in 1:p$n.int) {
                    wh <- which(p$formula.intmtx[i,]==1)
                    nam <- colnames(p$formula.intmtx)[wh]

                    if(length(grep("^Q[0-9]+$", nam[1])) > 0)
                        curnam <- qtlnames[[nam[1]]]
                    else
                        curnam <- nam[1]

                    for(j in 2:length(nam)) {
                        if(length(grep("Q[0-9]+", nam[j])) > 0)
                            curnam <- paste(curnam, qtlnames[[nam[j]]], sep=".")
                        else
                            curnam <- paste(curnam, nam[j], sep=".")
                    }
                    thenames <- c(thenames, curnam)
                }
            }

            if(length(thenames) ==length(ests)) {
                names(ests) <- thenames
                dimnames(ests.cov) <- list(thenames, thenames)
            }
            else
                warning("Estimated QTL effects not yet made meaningful for this case.\n   ")

        }
        else {
            X <- matrix(z$design.mat,ncol=sizefull)
            Z <- matrix(0,nrow=n.ind,ncol=sizefull)
            colnames(Z) <- rep("",sizefull)

            # mean column
            Z[,1] <- 1
            colnames(Z)[1] <- "Intercept"

            # ZZ stores the main effects matrices, for creating the interactions
            ZZ <- vector("list",p$n.qtl+p$n.covar)
            curcol <- 1
            # covariates
            if(p$n.covar > 0) {
                for(j in 1:p$n.covar) {
                    Z[,curcol+j] <- ZZ[[p$n.qtl+j]] <- as.matrix(covar[,p$idx.covar[j],drop=FALSE])
                    colnames(Z)[curcol+j] <- colnames(ZZ[[p$n.qtl+j]]) <- names(covar)[p$idx.covar[j]]
                }
                curcol <- curcol + p$n.covar
            }
            # QTL main effects
            for(i in seq(along=p$idx.qtl)) {
                if(n.gen[i]==2) {
                    if(method=="imp") {
                        if(crosstype == "bc") {
                            Z[qtl$geno[,p$idx.qtl[i],1]==1,curcol+1] <- -0.5
                            Z[qtl$geno[,p$idx.qtl[i],1]==2,curcol+1] <- 0.5
                        } else {
                            Z[qtl$geno[,p$idx.qtl[i],1]==1,curcol+1] <- -1
                            Z[qtl$geno[,p$idx.qtl[i],1]==2,curcol+1] <- 1
                        }
                    }
                    else
                        if(crosstype == "bc") {
                            Z[,curcol+1] <- (qtl$prob[[p$idx.qtl[i]]][,2] - qtl$prob[[p$idx.qtl[i]]][,1])/2
                        } else {
                            Z[,curcol+1] <- (qtl$prob[[p$idx.qtl[i]]][,2] - qtl$prob[[p$idx.qtl[i]]][,1])
                        }
                    colnames(Z)[curcol+1] <- thenames[i]
                }
                else { # 3 genotypes
                    if(method=="imp") {
                        Z[qtl$geno[,p$idx.qtl[i],1]==1,curcol+1] <- -1
                        Z[qtl$geno[,p$idx.qtl[i],1]==3,curcol+1] <- 1
                        Z[qtl$geno[,p$idx.qtl[i],1]==2,curcol+2] <- 0.5
                        Z[qtl$geno[,p$idx.qtl[i],1]!=2,curcol+2] <- -0.5
                    }
                    else {
                        Z[,curcol+1] <- qtl$prob[[p$idx.qtl[i]]][,3] - qtl$prob[[p$idx.qtl[i]]][,1]
                        Z[,curcol+2] <- (qtl$prob[[p$idx.qtl[i]]][,2] - qtl$prob[[p$idx.qtl[i]]][,1] -
                                         qtl$prob[[p$idx.qtl[i]]][,3])/2
                    }
                    colnames(Z)[curcol+1:2] <- paste(thenames[i],c("a","d"),sep="")
                }
                ZZ[[i]] <- Z[,curcol+1:(n.gen[i]-1),drop=FALSE]
                curcol <- curcol + n.gen[i]-1
            }

            if(p$n.int>0) {
                for(i in 1:p$n.int) {
                    if(p$n.int==1)
                        intform <- p$formula.intmtx
                    else
                        intform <- p$formula.intmtx[,i]
                    tempZ <- matrix(1,ncol=1,nrow=nrow(Z))
                    colnames(tempZ) <- ""
                    for(j in seq(along=intform)) {
                        if(intform[j]==1) {
                            tZ <- NULL
                            for(k in 1:ncol(ZZ[[j]])) {
                                tZZ <- tempZ * ZZ[[j]][,k]
                                if(all(colnames(tempZ) == ""))
                                    colnames(tZZ) <- colnames(ZZ[[j]])[k]
                                else
                                    colnames(tZZ) <- paste(colnames(tempZ),colnames(ZZ[[j]])[k],sep=":")
                                tZ <- cbind(tZ,tZZ)
                            }
                            tempZ <- tZ
                        }
                    }
                    Z[,curcol+1:ncol(tempZ)] <- tempZ
                    colnames(Z)[curcol+1:ncol(tempZ)] <- colnames(tempZ)
                    curcol <- curcol + ncol(tempZ)
                }
            }

            b <- solve(t(Z) %*% Z, t(Z) %*% X)
            ests <- as.numeric(b %*% ests)
            ests.cov <- b %*% ests.cov %*% t(b)
            names(ests) <- colnames(Z)
            dimnames(ests.cov) <- list(colnames(Z),colnames(Z))
        }
    }

    ##### output ANOVA table for full model #####
    result.full <- matrix(NA, 3, 7)
    colnames(result.full) <- c("df", "SS", "MS", "LOD", "%var", "Pvalue(Chi2)",
                               "Pvalue(F)")
    rownames(result.full) <- c("Model", "Error", "Total")
    result.full[1,1] <- z$df # model degree of freedom

    if(model=="normal") {
        # compute the SS for total

        if(adjustX) {
            mpheno <- mean(pheno)
            Rss0 <- sum( (pheno-mpheno)^2 )

            Rss0adj <- Rss0x <- sum( lm(pheno ~ Xadjustment$sexpgmcovar)$resid^2 )
            OrigModellod <- z$lod
            Modellod <- z$lod + length(pheno)/2 * (log10(Rss0x) - log10(Rss0))
        } else {
            mpheno <- mean(pheno)
            Rss0adj <- Rss0 <- sum( (pheno-mpheno)^2 )
            OrigModellod <- Modellod <- z$lod
        }

        # third row, for Total
        result.full[3,1] <- length(pheno) - 1 # total degree of freedom
        result.full[3,2] <- Rss0adj # total sum of squares

        # first row, for Model
        result.full[1,1] <- z$df # df for Model
        # Variance explained by model
        result.full[1,5] <- 100 * (1 - exp(-2*Modellod*log(10)/n.ind))
        result.full[1,2] <- Rss0adj * result.full[1,5]/100  # SS for model
        result.full[1,3] <- result.full[1,2]/z$df # MS for model
        result.full[1,4] <- Modellod # Model LOD score

        # Second row, for Error
        # df
        result.full[2,1] <- result.full[3,1] - result.full[1,1]
        # SS
        result.full[2,2] <- result.full[3,2] - result.full[1,2]
        # MS
        result.full[2,3] <- result.full[2,2] / result.full[2,1]

        # first row, P values
        # P value (chi2) for model
        result.full[1,6] <- 1 - pchisq(2*log(10)*Modellod, z$df)
        # P value (F statistics) for model
        df0 <- result.full[3,1]; df1 <- result.full[2,1];
        Rss1 <- result.full[2,2]
        Fstat <- ((Rss0adj-Rss1)/(df0-df1)) / (Rss1/df1)
        result.full[1,7] <- 1 - pf(Fstat, df0-df1, df1)
    }
    else {
        # third row, for Total
        result.full[3,1] <- length(pheno) - 1 # total degree of freedom
        result.full[3,2] <- NA # total sum of squares

        # first row, for Model
        result.full[1,1] <- z$df # df for Model
        # Variance explained by model
        result.full[1,5] <- 100 * (1 - exp(-2*z$lod*log(10)/n.ind))
        result.full[1,2] <- NA  # SS for model
        result.full[1,3] <- NA # MS for model
        result.full[1,4] <- z$lod # Model LOD score
        OrigModellod <- Modellod <- z$lod

        # Second row, for Error
        # df
        result.full[2,1] <- result.full[3,1] - result.full[1,1]
        # SS
        result.full[2,2] <- NA
        # MS
        result.full[2,3] <- NA

        # first row, P values
        # P value (chi2) for model
        result.full[1,6] <- 1 - pchisq(2*log(10)*z$lod, z$df)
        # P value (F statistics) for model
        result.full[1,7] <- NA
    }
    ############# Finish ANOVA table for full model


    # initialize output object
    output <- NULL
    output$result.full <- result.full

    # drop one at a time?
    if(dropone && (p$n.qtl+n.origcovar)>1) {
        # user wants to do drop one term at a time and output anova table

        # get the terms etc. for input formula
        f.terms <- terms(formula)
        f.order <- attr(f.terms, "order")
        f.label <- attr(f.terms, "term.labels")

        # initialize output matrix
        # ANOVA table will have five columns, e.g., df,Type III SS,
        # LOD, %var, Pvalue for each dropping term
        # Full model result will not be in this table
        result <- matrix(0, length(f.order), 7)
        colnames(result) <- c("df", "Type III SS", "LOD", "%var", "F value",
                              "Pvalue(Chi2)", "Pvalue(F)")
        rownames(result) <- rep("",length(f.order))

        drop.term.name <- NULL
        formulas <- rep("", length(f.order))
        lods <- rep(NA, length(f.order))

        for( i in (1:length(f.order)) ) {
            # loop thru all terms in formula, from the highest order
            # the label of the term to be droped
            label.term.drop <- f.label[i]

            ### find the corresponding QTL name for this term ###
            # This is used for output ANOVA table
            if(f.order[i] == 1) {
                # this is a first order term
                # if the term label is like Q(q)1, Q(q)2, etc., then it's a QTL
                if( length(grep("Q[0-9]", label.term.drop, ignore.case=TRUE)) != 0) {
                    idx.qtlname <- as.integer(substr(label.term.drop, 2, nchar(label.term.drop)))
                    drop.term.name[i] <- qtl$name[idx.qtlname]
                }
                else { # this is a covariate
                    drop.term.name[i] <- label.term.drop
                }
            }
            else {
                # this is a 2nd (or higher)order and the term is a string like "Q2:Q3:C1"
                # I use strsplit to split it to a string vector "Q2" "Q3" "C1".
                # then take out 2 and 3 as integer. Then find out the
                # QTL name from the input QTL object and concatenate them
                tmp.str <- strsplit(label.term.drop,":")[[1]]
                for(j in 1:length(tmp.str)) {
                    if( length(grep("Q[0-9]", tmp.str[j], ignore.case=TRUE)) != 0 ) {
                        # this is a QTL
                        idx.qtlname <- as.integer(substr(tmp.str[j], 2, nchar(tmp.str[j])))
                        tmp.str[j] <- qtl$name[idx.qtlname]
                    }
                    if(j == 1) # first term
                        drop.term.name[i] <- tmp.str[j]
                    else # not the first term
                        drop.term.name[i] <- paste(drop.term.name[i], tmp.str[j], sep=":")
                }
            }
            ### Finish QTL name ###

            # find the indices of the term(s) to be dropped
            # All terms contain label.term.drop will be dropped
            idx.term.drop <- NULL
            tmp.str.drop <- tolower(strsplit(label.term.drop,":")[[1]])
            for(j in 1:length(f.label)) {
                tmp.str.label <- tolower(strsplit(f.label[j], ":")[[1]])
                if(all(tmp.str.drop %in% tmp.str.label))
                    idx.term.drop <- c(idx.term.drop, j)
            }

            # the indices of term(s) to be kept
            idx.term.kept <- setdiff(1:length(f.order), idx.term.drop)

            #### regenerate a formula with the kept terms additive ###
            if(length(idx.term.kept) == 0) # nothing left after drop label.term.drop
                stop("There will be nothing left if drop ", drop.term.name[i])
            else {
                # All terms for idx.term.kept will be additive
                formula.new <- as.formula(paste("y~", paste(f.label[idx.term.kept], collapse="+"), sep=""))
            }

            ### Start fitting model again
            # parse the input formula
            p.new <- parseformula(formula.new, qtl$altname, colnames(covar))
            n.gen.QC <- c(n.gen[p.new$idx.qtl]-1, rep(1, p.new$n.covar))

            formulas[i] <- deparseQTLformula(formula.new)

            # covariate to be passed to C function
            covar.C <- NULL
            if(!is.null(p.new$idx.covar))
                covar.C <- as.matrix(covar[,p.new$idx.covar,drop=FALSE])

            if(method != "imp") {
                # form genotype probabilities as a matrix
                prob <- matrix(ncol=sum(qtl$n.gen[p.new$idx.qtl]), nrow=n.ind)

                curcol <- 0
                for(z in p.new$idx.qtl) {
                    prob[,curcol+1:n.gen[z]] <- qtl$prob[[z]]
                    curcol <- curcol + n.gen[z]
                }
            }

            if(adjustX)  { # need to include X chromosome covariates
                n.newcovar <- ncol(Xadjustment$sexpgmcovar)
                n.gen.QC <- c(n.gen.QC, rep(1, n.newcovar))
                p.new$n.covar <- p.new$n.covar + n.newcovar
                covar.C <- cbind(covar.C, Xadjustment$sexpgmcovar)
                sizefull <- sizefull + n.newcovar

                if(p.new$n.int==1)
                    p.new$formula.intmtx <- c(p.new$formula.intmtx, rep(0,n.newcovar))
                if(p.new$n.int>1) {
                    for(i2 in 1:n.newcovar)
                        p.new$formula.intmtx <- rbind(p.new$formula.intmtx, rep(0,p.new$n.int))
                }
            }

            # call C function fit model
            if(model=="normal") {
                if(method == "imp") {
                    z <- .C("R_fitqtl_imp",
                            as.integer(n.ind), # number of individuals
                            as.integer(p.new$n.qtl), # number of qtls
                            as.integer(n.gen.QC), # number of genotypes QTLs and covariates
                            as.integer(n.draws), # number of draws
                            as.integer(qtl$geno[,p.new$idx.qtl,]), # genotypes for selected marker
                            as.integer(p.new$n.covar), # number of covariate
                            as.double(covar.C), # covariate
                            as.integer(p.new$formula.intmtx),  # formula matrix for interactive terms
                            as.integer(p.new$n.int), # number of interactions in the formula
                            as.double(pheno), # phenotype
                            as.integer(0),
                            # return variables
                            lod=as.double(0), # LOD score
                            df=as.integer(0), # degree of freedom
                            as.double(rep(0,sizefull)),
                            as.double(rep(0,sizefull*sizefull)),
                            as.double(rep(0,n.ind*sizefull)),
                            matrix.rank=as.integer(0),
                            resid=as.double(rep(0,n.ind)), # on return, the average residuals across imputations
                            PACKAGE="qtl")
                }

                else {
                    z <- .C("R_fitqtl_hk",
                            as.integer(n.ind), # number of individuals
                            as.integer(p.new$n.qtl), # number of qtls
                            as.integer(n.gen.QC), # number of genotypes QTLs and covariates
                            as.double(prob),
                            as.integer(p.new$n.covar), # number of covariate
                            as.double(covar.C), # covariate
                            as.integer(p.new$formula.intmtx),  # formula matrix for interactive terms
                            as.integer(p.new$n.int), # number of interactions in the formula
                            as.double(pheno), # phenotype
                            as.integer(0),
                            # return variables
                            lod=as.double(0), # LOD score
                            df=as.integer(0), # degree of freedom
                            as.double(rep(0,sizefull)),
                            as.double(rep(0,sizefull*sizefull)),
                            as.double(rep(0,n.ind*sizefull)),
                            matrix.rank=as.integer(0),
                            resid=as.double(rep(0,n.ind)), # on return, the residuals
                            PACKAGE="qtl")
                }
            }
            else { # binary trait
                if(method=="imp") {
                    z <- .C("R_fitqtl_imp_binary",
                            as.integer(n.ind), # number of individuals
                            as.integer(p.new$n.qtl), # number of qtls
                            as.integer(n.gen.QC), # number of genotypes QTLs and covariates
                            as.integer(n.draws), # number of draws
                            as.integer(qtl$geno[,p.new$idx.qtl,]), # genotypes for selected marker
                            as.integer(p.new$n.covar), # number of covariate
                            as.double(covar.C), # covariate
                            as.integer(p.new$formula.intmtx),  # formula matrix for interactive terms
                            as.integer(p.new$n.int), # number of interactions in the formula
                            as.double(pheno), # phenotype
                            as.integer(0),
                            # return variables
                            lod=as.double(0), # LOD score
                            df=as.integer(0), # degree of freedom
                            as.double(rep(0,sizefull)),
                            as.double(rep(0,sizefull*sizefull)),
                            as.double(rep(0,n.ind*sizefull)),
                            as.double(tol),
                            as.integer(maxit),
                            matrix.rank=as.integer(0),
                            PACKAGE="qtl")
                }
                else {
                    z <- .C("R_fitqtl_hk_binary",
                            as.integer(n.ind), # number of individuals
                            as.integer(p.new$n.qtl), # number of qtls
                            as.integer(n.gen.QC), # number of genotypes QTLs and covariates
                            as.double(prob),
                            as.integer(p.new$n.covar), # number of covariate
                            as.double(covar.C), # covariate
                            as.integer(p.new$formula.intmtx),  # formula matrix for interactive terms
                            as.integer(p.new$n.int), # number of interactions in the formula
                            as.double(pheno), # phenotype
                            as.integer(0),
                            # return variables
                            lod=as.double(0), # LOD score
                            df=as.integer(0), # degree of freedom
                            as.double(rep(0,sizefull)),
                            as.double(rep(0,sizefull*sizefull)),
                            as.double(rep(0,n.ind*sizefull)),
                            # convergence
                            as.double(tol),
                            as.integer(maxit),
                            matrix.rank = as.integer(0),
                            PACKAGE="qtl")
                }
            }

            if(model=="normal" && adjustX) # adjust for X chromosome covariates
                z$lod <- z$lod + length(pheno)/2 * (log10(Rss0x) - log10(Rss0))

            # record the result for dropping this term
            # df
            result[i,1] <- result.full[1,1] - z$df
            # LOD score
            result[i,3] <- Modellod - z$lod
            # % variance explained
            result[i,4] <- result.full[1,5] - 100*(1 - 10^(-2*z$lod/n.ind))

            # lod score for reduced model
            lods[i] <- z$lod

            # Type III SS for this term - computed from %var
            if(model=="normal")
                result[i,2] <- result.full[3,2] * result[i,4] / 100
            else
                result[i,2] <- NA
            # F value
            if(model=="normal") {
                df0 <- length(pheno) - z$df - 1; df1 <- result.full[2,1];
                Rss0p <- result.full[2,2] + result[i,2];
                Rss1p <- result.full[2,2]
                Fstat <- ((Rss0p-Rss1p)/(df0-df1)) / (Rss1/df1)
                result[i,5] <- Fstat
                # P value (F)
                result[i,7] <- 1 - pf(Fstat, df0-df1, df1)
            }
            else # ignore F stat for binary trait
                result[i,c(5,7)] <- NA
            # P value (chi2)
            result[i,6] <- 1 - pchisq(2*log(10)*result[i,3], result[i,1])
            # assign row name
            rownames(result)[i] <- drop.term.name[i]
        } # finish dropping terms loop

        attr(result, "formulas") <- formulas
        attr(result, "lods") <- lods

        # assign output object
        output$result.drop <- result

    }  ## if(dropone)

    if(get.ests)
        output$ests <- list(ests=ests, covar=ests.cov)

    output$lod <- output$result.full[1,4]

    class(output) <- "fitqtl"
    attr(output, "method") <- method
    attr(output, "model") <- model
    attr(output, "formula") <- deparseQTLformula(formula)
    attr(output, "type") <- qtl$type
    attr(output, "nind") <- length(pheno)

    attr(output, "matrix.rank") <- matrix.rank
    attr(output, "matrix.ncol") <- matrix.ncol

    if(!is.null(residuals))
        attr(output, "residuals") <- residuals

    output
}


######################################################################
# checkformula
#
# check that input formula satisfies our imposed hiearchy: that
# main effects for terms in any interactions are also included
######################################################################
checkformula <-
    function(formula, qtl.name, covar.name)
{
    factors <- attr(terms(formula), "factors")
    altform <- deparseQTLformula(formula)

    if(sum(factors[1,])==0) factors <- factors[-1,,drop=FALSE]

    rn <- rownames(factors)
    # if mentions of "q1" or such, convert to "Q1" and such
    g <- grep("^[Qq][0-9]+$", rn)
    todrop <- NULL
    if(length(g) >= 1) {
        rownames(factors)[g] <- rn[g] <- toupper(rn[g])
        if(any(table(rn) > 1)) { # now there are some duplicates
            urn <- unique(rn)
            for(i in urn) {
                wh <- which(rn == i)
                if(length(wh) > 1) {
                    factors[wh[1],] <- apply(factors[wh,], 2, sum)
                    todrop <- c(todrop, wh[-1])
                    rownames(factors)[wh[1]] <- rn[wh[1]]
                }
            }
        }
    }
    if(length(todrop) > 0) factors <- factors[-todrop,]
    rn <- rownames(factors)

    if(!missing(qtl.name) || !missing(covar.name)) {
        m <- match(rn, c(qtl.name, covar.name))
        if(any(is.na(m)))
            warning("Terms ", paste(rn[is.na(m)], collapse=" "), " not understood.")
    }

    # paste rows together
    zo <- factors
    zo[zo>1] <- 1
    pzo <- apply(zo, 2, paste, collapse="")
    nt <- apply(zo, 2, sum)

    # form binary representations
    maxnt <- max(nt)
    v <- vector("list", maxnt)
    for(i in 2:maxnt) {
        v[[i]] <- mybinaryrep(i)
        v[[i]] <- v[[i]][,-ncol(v[[i]])+c(0,1)]
    }

    # for each higher-order column, form all lower-order terms
    for(i in which(nt > 1)) {
        cur <- zo[,i]
        wh <- which(cur==1)
        z <- v[[nt[i]]]
        zz <- matrix(0, ncol=ncol(z), nrow=length(cur))
        for(j in seq(along=wh))
            zz[wh[j],] <- z[j,]
        pzo <- unique(c(pzo, apply(zz, 2, paste, collapse="")))
    }

    zo <- matrix(as.numeric(unlist(strsplit(pzo, ""))), ncol=length(pzo))
    nt <- apply(zo, 2, sum)
    zo <- zo[,order(nt, apply(1-zo, 2, paste, collapse="")), drop=FALSE]
    rownames(zo) <- rn

    # form column names
    theterms <- apply(zo, 2, function(a, b) paste(b[as.logical(a)], collapse=":"),
                      rownames(zo))

    as.formula(paste("y ~ ", paste(theterms, collapse=" + ")))
}

#####################################################################
#
# parseformula
#
# Function to be called by fitqtl. It's used to
# parse the input formula
#
# This is the internal function and not supposed to be used by user
#
#####################################################################

parseformula <- function(formula, qtl.dimname, covar.dimname)
{
    # The terms for input formula
    f.formula <- terms(formula)
    order.term <- attr(f.formula, "order") # get the order of the terms
    idx.term <- which(order.term==1) # get the first order terms
    label.term <- attr(f.formula, "term.labels")[idx.term]
    formula.mtx <- attr(f.formula, "factors") # formula matrix

    idx.qtl <- NULL
    idx.covar <- NULL

    # loop thru all terms and find out how many QTLs and covariates
    # are there in the formula. Construct idx.qtl and idx.covar at the same time
    termisqtl <- rep(0, length(idx.term))
    for (i in 1:length(idx.term)) {
        # find out if there term is a QTL or a covariate
        # ignore the case for QTLs, e.g., Q1 is equivalent to q1
        idx.tmp <- grep(paste(label.term[i],"$", sep=""),
                        qtl.dimname, ignore.case=TRUE)
        if( length(idx.tmp) ) { # it's a QTL
            idx.qtl <- c(idx.qtl, idx.tmp)
            termisqtl[i] <- 1
        }
        else if(label.term[i] %in% covar.dimname) # it's a covariate
            idx.covar <- c(idx.covar, which(label.term[i]==covar.dimname))
        else
            stop("Unrecognized term ", label.term[i], " in formula")
    }
    n.qtl <- length(idx.qtl) # number of QTLs in formula
    n.covar <- length(idx.covar) # number of covariates in formula
    # now idx.qtl and idx.covar are the indices for genotype
    # and covariate matrices according to input formula

    # loop thru all terms again and reorganize formula.mtx
    formula.idx <- NULL
    ii <- 1
    jj <- 1
    for (i in 1:length(idx.term)) {
        #    if(label.term[i] %in% qtl.dimname) {  # it's a QTL
        if(termisqtl[i]) {
            formula.idx <- c(formula.idx, ii)
            ii <- ii+1
        }
        else { # it's a covariate
            formula.idx <- c(formula.idx, jj+n.qtl)
            jj <- jj+1
        }
    }

    # reorganize formula.mtx according to formula.idx
    # remove the first row (for y)
    formula.mtx <- formula.mtx[2:nrow(formula.mtx),]
    # rearrange the rows according to formula.idx if there's more than one row
    if(length(formula.idx) > 1)
        formula.mtx <- formula.mtx[order(formula.idx),]
    # take out only part of the matrix for interactions and pass to C function
    # all the input QTLs and covariates for C function will be additive
    n.int <- length(order.term) - length(idx.term) # number of interactions
    if(n.int != 0)
        formula.intmtx <- formula.mtx[,(length(idx.term)+1):length(order.term)]
    else # no interaction terms
        formula.intmtx <- NULL

    # return object
    result <- NULL
    result$idx.qtl <- idx.qtl
    result$n.qtl <- n.qtl
    result$idx.covar <- idx.covar
    result$n.covar <- n.covar
    result$formula.intmtx <- formula.intmtx
    result$n.int <- n.int

    result

}


#####################################################################
#
# summary.fitqtl
#
#####################################################################
summary.fitqtl <-
    function(object, pvalues=TRUE, simple=FALSE, ...)
{
    if(!inherits(object, "fitqtl"))
        stop("Input should have class \"fitqtl\".")

    # this is just an interface.
    if("ests" %in% names(object)) {
        ests <- object$ests$ests
        se <- sqrt(diag(object$ests$covar))
        object$ests <- cbind(est=ests, SE=se, t=ests/se)
    }
    class(object) <- "summary.fitqtl"
    if(simple) pvalues <- FALSE
    attr(object, "pvalues") <- pvalues
    attr(object, "simple") <- simple
    object
}


#####################################################################
#
# print.summary.fitqtl
#
#####################################################################
print.summary.fitqtl <- function(x, ...)
{
    cat("\n")
    cat("\t\tfitqtl summary\n\n")
    meth <- attr(x, "method")
    mod <- attr(x, "model")
    if(is.null(mod)) mod <- "normal"
    if(meth=="imp") meth <- "multiple imputation"
    else if(meth=="hk") meth <- "Haley-Knott regression"
    cat("Method:", meth, "\n")
    cat("Model: ", mod, "phenotype\n")
    cat("Number of observations :", attr(x, "nind"), "\n\n")

    # print ANOVA table for full model
    cat("Full model result\n")
    cat("----------------------------------  \n")
    cat("Model formula:")
    w <- options("width")[[1]]
    printQTLformulanicely(attr(x, "formula"), "                   ", w+5, w)
    cat("\n")

    pval <- attr(x, "pvalues")
    simple <- attr(x, "simple")
    if(!is.null(pval) && !pval)
        x$result.full <- x$result.full[,-ncol(x$result.full)+(0:1)]
    if(mod=="binary" || (!is.null(simple) && simple))
        x$result.full <- x$result.full[1,-c(2:3,7),drop=FALSE]

    print(x$result.full, quote=FALSE, na.print="")
    cat("\n")

    # print ANOVA table for dropping one at a time analysis (if any)
    if("result.drop" %in% names(x)) {
        cat("\n")
        cat("Drop one QTL at a time ANOVA table: \n")
        cat("----------------------------------  \n")
        # use printCoefmat instead of print.data.frame
        # make sure the last column is P value
        if(!is.null(pval) && !pval)
            x$result.drop <- x$result.drop[,-ncol(x$result.drop)+(0:1)]
        if(mod=="binary" || (!is.null(simple) && simple))
            x$result.drop <- x$result.drop[,-c(2,5,7),drop=FALSE]

        printCoefmat(x$result.drop, digits=4, cs.ind=1, P.values=TRUE, has.Pvalue=TRUE)
        cat("\n")
    }

    if("ests" %in% names(x)) {
        cat("\n")
        cat("Estimated effects:\n")
        cat("-----------------\n")
        printCoefmat(x$ests,digits=4)
        cat("\n")
    }

}

######################################################################
# binary repreentation of the numbers 1...2^n;
# used in checkformula
######################################################################
mybinaryrep <-
    function(n)
{
    lx <- 2^n
    x <- 1:lx
    ans <- 0:(n-1)
    x <- matrix(rep(x,rep(n, lx)), ncol=lx)
    (x %/% 2^ans) %% 2
}

######################################################################
# deparseQTLformula: turn QTL formula into a string
######################################################################
deparseQTLformula <-
    function(formula, reorderterms=FALSE)
{
    if(is.null(formula)) return(NULL)
    if(reorderterms) {
        if(is.character(formula)) formula <- as.formula(formula)
        factors <- colnames(attr(terms(formula), "factors"))
        wh <- grep("^[Qq][0-9]+$", factors)
        if(length(wh)>0)
            factors[wh] <- paste("Q", sort(as.numeric(substr(factors[wh], 2, nchar(factors[wh])))), sep="")
        wh <- grep(":", factors)
        if(length(wh)>0) {
            temp <- strsplit(factors[wh], ":")
            temp <- sapply(temp, function(a) {
                wh <- grep("^[Qq][0-9]+$", a)
                if(any(wh)) a[wh] <- paste("Q", sort(as.numeric(substr(a[wh], 2, nchar(a[wh])))), sep="")
                paste(a[order(is.na(match(seq(along=a),wh)))], collapse=":")
            })
            factors[wh] <- temp
        }
        return(paste("y ~ ", paste(factors, collapse=" + "), sep=""))
    }


    if(is.character(formula)) return(formula)
    paste(as.character(formula)[c(2,1,3)], collapse=" ")
}

printQTLformulanicely <-
    function(formula, header, width, width2, sep=" ")
{
    if(!is.character(formula)) formula <- deparseQTLformula(formula)
    thetext <- unlist(strsplit(formula, " "))

    if(missing(width2)) width2 <- width
    nleft <- width - nchar(header)
    nsep <- nchar(sep)
    if(length(thetext) < 2) cat("", thetext, "\n", sep=sep)
    else {
        z <- paste("", thetext[1], sep=sep, collapse=sep)
        for(j in 2:length(thetext)) {
            if(nchar(z) + nsep + nchar(thetext[j]) > nleft) {
                cat(z, "\n")
                nleft <- width2
                z <- paste(header, thetext[j], sep=sep)
            }
            else {
                z <- paste(z, thetext[j], sep=sep)
            }
        }
        cat(z, "\n")
    }
}

# end of fitqtl.R
Any scripts or data that you put into this service are public.
qtl documentation built on Nov. 28, 2023, 1:09 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
qtl
Tools for Analyzing QTL Experiments

R/fitqtl.R
In qtl: Tools for Analyzing QTL Experiments

Defines functions printQTLformulanicely deparseQTLformula mybinaryrep print.summary.fitqtl summary.fitqtl parseformula checkformula fitqtlengine fitqtl

Documented in checkformula deparseQTLformula fitqtl fitqtlengine parseformula print.summary.fitqtl summary.fitqtl

Try the qtl package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

qtl Tools for Analyzing QTL Experiments

R/fitqtl.R In qtl: Tools for Analyzing QTL Experiments

Defines functions printQTLformulanicely deparseQTLformula mybinaryrep print.summary.fitqtl summary.fitqtl parseformula checkformula fitqtlengine fitqtl

Documented in checkformula deparseQTLformula fitqtl fitqtlengine parseformula print.summary.fitqtl summary.fitqtl

Try the qtl package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

qtl
Tools for Analyzing QTL Experiments

R/fitqtl.R
In qtl: Tools for Analyzing QTL Experiments