R/summary.cross.R

Defines functions chrlen print.cross ntyped nmissing nphe totmar nmar nchr nind print.summary.cross summary.cross

Documented in chrlen nchr nind nmar nmissing nphe ntyped print.cross print.summary.cross summary.cross totmar

######################################################################
#
# summary.cross.R
#
# copyright (c) 2001-2022, Karl W Broman
# last modified Oct, 2022
# first written Feb, 2001
#
#     This program is free software; you can redistribute it and/or
#     modify it under the terms of the GNU General Public License,
#     version 3, as published by the Free Software Foundation.
#
#     This program is distributed in the hope that it will be useful,
#     but without any warranty; without even the implied warranty of
#     merchantability or fitness for a particular purpose.  See the GNU
#     General Public License, version 3, for more details.
#
#     A copy of the GNU General Public License, version 3, is available
#     at http://www.r-project.org/Licenses/GPL-3
#
# Part of the R/qtl package
# Contains: summary.cross, print.summary.cross, nind, nchr, nmar,
#           totmar, nphe, nmissing, ntyped, print.cross, chrlen
#
######################################################################

summary.cross <-
    function(object,...)
{
    if(!inherits(object, "cross"))
        stop("Input should have class \"cross\".")

    n.ind <- nind(object)
    tot.mar <- totmar(object)
    n.phe <- nphe(object)
    n.chr <- nchr(object)
    n.mar <- nmar(object)
    type <- crosstype(object)

    if(!(type %in% c("f2", "bc", "4way", "riself", "risib", "dh", "haploid",
                     "ri4self", "ri4sib", "ri8self", "ri8selfIRIP1", "ri8sib", "bcsft", "bgmagic16")))
        stop("Cross type ", type, " is not supported.")

    # combine genotype data into one big matrix
    Geno <- pull.geno(object)

    # A and X-specific genotypes?
    chr_type <- sapply(object$geno, chrtype)
    if(any(chr_type=="A")) {
        GenoA <- pull.geno(object, chr=chrnames(object)[chr_type=="A"])
    } else {
        GenoA <- NULL
    }
    if(any(chr_type=="X") && type %in% c("f2", "bc", "bcsft")) {
        GenoX <- pull.geno(object, chr=chrnames(object)[chr_type=="X"])
        GenoX <- reviseXdata(type, "full", getsex(object), geno=GenoX, cross.attr=attributes(object))
    } else{
        GenoX <- NULL
    }

    # proportion of missing genotype data
    missing.gen <- mean(is.na(Geno))

    # Get cross scheme for BCsFt.
    if(type == "bcsft") {
        cross.scheme <- attr(object, "scheme")
        is.bcs <- (cross.scheme[2] == 0)
    }
    else {
        cross.scheme <- rep(0,2)
        is.bcs <- FALSE
    }

    # table of genotype values
    typings <- typingsA <- typingsX <- NULL
    if(type %in% c("f2", "bcsft") && !is.bcs) {
        if(is.null(GenoX)) {
            typings <- table(factor(Geno[!is.na(Geno)], levels=1:5))
            temp <- getgenonames("f2", "A", cross.attr=attributes(object))
            names(typings) <- c(temp, paste("not", temp[c(3,1)]))
        } else {
            if(!is.null(GenoA)) {
                typingsA <- table(factor(GenoA[!is.na(GenoA)], levels=1:5))
                temp <- getgenonames("f2", "A", cross.attr=attributes(object))
                names(typingsA) <- c(temp, paste("not", temp[c(3,1)]))
            }
            if(!is.null(GenoX)) {
                gnames <- getgenonames("f2", "X", "full", getsex(object), cross.attr=attributes(object))
                typingsX <- table(factor(GenoX[!is.na(GenoX)], levels=1:length(gnames)))
                names(typingsX) <- gnames
            }
        }
    }
    else if(type %in% c("bc", "riself", "risib", "dh", "haploid", "bcsft")) {
        if(is.null(GenoX)) {
            typings <- table(factor(Geno[!is.na(Geno)], levels=1:2))
            temp <- getgenonames(type, "A", cross.attr=attributes(object))
            names(typings) <- temp
        } else {
            if(!is.null(GenoA)) {
                typingsA <- table(factor(GenoA[!is.na(GenoA)], levels=1:2))
                temp <- getgenonames(type, "A", cross.attr=attributes(object))
                names(typingsA) <- temp
            }
            if(!is.null(GenoX)) {
                gnames <- getgenonames(type, "X", "full", getsex(object), cross.attr=attributes(object))
                typingsX <- table(factor(GenoX[!is.na(GenoX)], levels=1:length(gnames)))
                names(typingsX) <- gnames
            }
        }
    }
    else if(type=="4way") {
        typings <- table(factor(Geno[!is.na(Geno)], levels=1:14))

        temp <- getgenonames("4way", "A", cross.attr=attributes(object))
        names(typings) <- c(temp,
                            paste(temp[c(1,3)], collapse="/"),
                            paste(temp[c(2,4)], collapse="/"),
                            paste(temp[c(1,2)], collapse="/"),
                            paste(temp[c(3,4)], collapse="/"),
                            paste(temp[c(1,4)], collapse="/"),
                            paste(temp[c(2,3)], collapse="/"),
                            paste("not", temp[1]),
                            paste("not", temp[2]),
                            paste("not", temp[3]),
                            paste("not", temp[4]))
    }
    else typings <- table(factor(Geno[!is.na(Geno)]))

    # turn into fractions
    if(!is.null(typings)) typings <- typings/sum(typings)
    if(!is.null(typingsA)) typingsA <- typingsA/sum(typingsA)
    if(!is.null(typingsX)) typingsX <- typingsX/sum(typingsX)


    # amount of missing phenotype data
    if(ncol(object$pheno) <= 30)
        missing.phe <- as.numeric(cbind(apply(object$pheno,2,function(a) mean(is.na(a)))))
    else
        missing.phe <- mean(as.numeric(is.na(object$pheno)))

    # check that, in the case of a "4way" cross, the genetic
    #     maps are matrices with 2 rows, and that for other crosses,
    #     the genetic maps are numeric vectors
    if(type=="4way") {
        if(any(!sapply(object$geno, function(a) (is.matrix(a$map) && nrow(a$map)==2))))
            warning("The genetic maps should all be matrices with two rows.")
    }
    else {
        if(any(sapply(object$geno, function(a) is.matrix(a$map))))
            warning("The genetic maps should all be numeric vectors rather than matrices.")
    }

    # check that object$geno[[i]]$data has colnames and that they match
    #     the names in object$geno[[i]]$map
    jitterwarning <- NULL
    for(i in 1:n.chr) {
        nam1 <- colnames(object$geno[[i]]$data)
        map <- object$geno[[i]]$map
        if(is.matrix(map)) nam2 <- colnames(map)
        else nam2 <- names(map)
        chr <- names(object$geno)[[i]]
        if(is.null(nam1)) {
            warn <- paste("The data matrix for chr", chr,
                          "lacks column names")
            warning(warn)
        }
        if(is.null(nam2)) {
            warn <- paste("The genetic map for chr", chr,
                          "lacks column names")
            warning(warn)
        }
        if(any(nam1 != nam2))
            stop("Marker names in the data matrix and genetic map\n",
                 "for chr ", chr, " do not match.")

        if((is.matrix(map) && (any(diff(map[1,])<0) || any(diff(map[2,])<0))) ||
           (!is.matrix(map) && any(diff(map)<0)))
            stop("Markers out of order on chr ", chr)


        # check that no two markers are on top of each other
        if(is.matrix(map)) { # sex-specific maps
            n <- ncol(map)
            if(n > 1) {
                d1 <- diff(map[1,])
                d2 <- diff(map[2,])
                if(any(d1 < 1e-14 & d2 < 1e-14)) {
                    if (is.null(jitterwarning)) jitterwarning<-list()
                    jitterwarning[[names(object$geno)[i]]]<-which(d1 < 1e-14 & d2 < 1e-14)
                }
            }
        }
        else {
            n <- length(map)
            if(n > 1) {
                d <- diff(map)
                if(any(d < 1e-14)) {
                    if (is.null(jitterwarning)) jitterwarning<-list()
                    jitterwarning[[names(object$geno)[i]]]<-which(d < 1e-14)
                }
            }
        }

    }

    if (!is.null(jitterwarning))
        warning("Some markers at the same position on chr ",
                paste(names(jitterwarning),collapse=",",sep=""),"; use jittermap().")

    if(!is.data.frame(object$pheno))
        warning("Phenotypes should be a data.frame.")

    if(is.null(colnames(object$pheno)))
        stop("Phenotype data needs column names")

    x <- table(colnames(object$pheno))

    if(any(x > 1))
        warning("Some phenotypes have the same name:\n",
                paste(names(x)[x>1], collapse="  "))

    # check genotype data
    if(type %in% c("bc", "riself", "risib", "dh", "haploid") | (type == "bcsft" & is.bcs)) {
        # Invalid genotypes?
        if(any(!is.na(Geno) & Geno != 1 & Geno != 2)) {
            u <- unique(as.numeric(Geno))
            u <- sort(u[!is.na(u)])
            warn <- paste("Invalid genotypes.",
                          "\n    Observed genotypes:",
                          paste(u, collapse=" "))
            warning(warn)
        }

        # Missing genotype category on autosomes?
        if(sum(!is.na(Geno) & Geno==2) == 0 ||
           sum(!is.na(Geno) & Geno==1) == 0) {
            warn <- paste("Strange genotype pattern on chr ", chr, ".", sep="")
            warning(warn)
        }
    }
    else if(type %in% c("f2","bcsft") & !is.bcs) {
        # invalid genotypes
        if(any(!is.na(Geno) & Geno!=1 & Geno!=2 & Geno!=3 &
               Geno!=4 & Geno!=5)) {
            u <- unique(as.numeric(Geno))
            u <- sort(u[!is.na(u)])
            warn <- paste("Invalid genotypes on chr", chr, ".",
                          "\n    Observed genotypes:",
                          paste(u, collapse=" "))
            warning(warn)
        }

        # X chromosome
        for(i in 1:n.chr) {
            if(chrtype(object$geno[[i]]) == "X") {
                dat <- object$geno[[i]]$data
                if(any(!is.na(dat) & dat!=1 & dat!=2)) {
                    u <- unique(as.numeric(dat))
                    u <- sort(u[!is.na(u)])
                    warn <- paste("Invalid genotypes on X chromosome:",
                                  "\n    Observed genotypes:",
                                  paste(u, collapse=" "))
                    warning(warn)
                }
            }
        }

        # Missing genotype category on autosomes?
        dat <- NULL; flag <- 0
        for(i in 1:n.chr) {
            if(chrtype(object$geno[[i]]) != "X") {
                dat <- cbind(dat,object$geno[[i]]$data)
                flag <- 1
            }
        }
        if(flag && (sum(!is.na(dat) & dat==2) == 0 ||
                    sum(!is.na(dat) & dat==1) == 0 ||
                    sum(!is.na(dat) & dat==3) == 0))
            warning("Strange genotype pattern.")
    }
    else if(type=="4way") {
        # Invalid genotypes?
        if(any(!is.na(Geno) & (Geno != round(Geno) | Geno < 1 | Geno > 14))) {
            u <- unique(as.numeric(Geno))
            u <- sort(u[!is.na(u)])
            warn <- paste("Invalid genotypes.",
                          "\n    Observed genotypes:",
                          paste(u, collapse=" "))
            warning(warn)
        }
    }
    else if(type %in% c("ri4sib", "ri4self", "ri8sib", "ri8self", "ri8selfIRIP1", "bgmagic16")) {
        if(type=="bgmagic16") n.str <- 16
        else n.str <- as.numeric(substr(type, 3, 3))
        if(any(!is.na(Geno) & (Geno != round(Geno) | Geno < 1 | Geno > 2^n.str-1))) {
            u <- unique(as.numeric(Geno))
            u <- sort(u[!is.na(u)])
            warn <- paste("Invalid genotypes.",
                          "\n    Observed genotypes:",
                          paste(u, collapse=" "))
            warning(warn)
        }
    }

    # Look for duplicate marker names
    mnames <- NULL
    for(i in 1:nchr(object))
        mnames <- c(mnames,colnames(object$geno[[i]]$data))

    o <- table(mnames)
    if(any(o > 1))
        warning("Duplicate markers [", paste(names(o)[o>1], collapse=", "), "]")

    # make sure the genotype data are matrices rather than data frames
    if(any(sapply(object$geno, function(a) is.data.frame(a$data))))
        warning("The $data objects should be simple matrices, not data frames.")

    # make sure each chromosome has class "A" or "X"
    chr.class <- sapply(object$geno, chrtype)
    if(!all(chr.class == "A" | chr.class == "X"))
        warning("Each chromosome should have class \"A\" or \"X\".")
    chr.nam <- names(object$geno)
    if(is.null(chr.nam)) {
        warning("The chromosome names are missing.")
        chr.nam <- as.character(1:length(chr.class))
    }

    if(type != "riself" && any(chr.class=="A" & (chr.nam=="X" | chr.nam=="x"))) {
        wh <- which(chr.nam=="X" | chr.nam=="x")
        warning("Chromosome \"", chr.nam[wh], "\" has class \"A\" but probably ",
                "should have class \"X\".")
    }

    if(length(chr.nam) > length(unique(chr.nam))) {
        tab <- table(chr.nam)
        dups <- names(tab[tab > 1])
        warning("Duplicate chromosome names: ", paste(dups,sep=", "))
    }
    g <- grep("^-", chr.nam)
    if(length(g) > 0)
        warning("Chromosome names shouldn't start with '-': ", paste(chr.nam[g], sep=", "))

    # if more than one X chromosome, print a warning
    if(sum(chr.class=="X") > 1)
        warning("More than one X chromosome: [",
                paste(chr.nam[chr.class == "X"], collapse=", "), "]")

    if(any(chr.class=="A"))
        autosomes <- chr.nam[chr.class == "A"]
    else autosomes <- NULL
    if(any(chr.class=="X"))
        Xchr <- chr.nam[chr.class == "X"]
    else Xchr <- NULL

    # check individual IDs
    id <- getid(object)
    if(!is.null(id) && length(id) != length(unique(id)))
        warning("The individual IDs are not unique.")

    # check that chromosomes aren't too long
    mapsum <- summaryMap(object)
    if(ncol(mapsum)==7)  # sex-specific map
        maxlen <- max(mapsum[1:(nrow(mapsum)-1),2:3])
    else
        maxlen <- max(mapsum[1:(nrow(mapsum)-1),2])
    if(maxlen > 1000)
        warning(paste("Some chromosomes > 1000 cM in length; there may",
                      "be a problem with the genetic map.\n  (Perhaps it is in basepairs?)"))

    cross.summary <- list(type=type, n.ind = n.ind, n.phe=n.phe,
                          n.chr=n.chr, n.mar=n.mar,
                          missing.gen=missing.gen,
                          typing.freq=typings, typing.freq.A=typingsA, typing.freq.X=typingsX,
                          missing.phe=missing.phe,
                          autosomes=autosomes, Xchr=Xchr, cross.scheme=cross.scheme)
    class(cross.summary) <- "summary.cross"
    cross.summary

}


print.summary.cross <-
    function(x,...)
{
    print.genotypes <- TRUE

    if(x$type=="f2") cat("    F2 intercross\n\n")
    else if(x$type=="bc") cat("    Backcross\n\n")
    else if(x$type=="4way") cat("    4-way cross\n\n")
    else if(x$type=="riself") cat("    RI strains via selfing\n\n")
    else if(x$type=="risib") cat("    RI strains via sib matings\n\n")
    else if(x$type=="dh") cat("    Doubled haploids\n\n")
    else if(x$type=="haploid") cat("    Haploids\n\n")
    else if(x$type %in% c("ri4self", "ri4sib", "ri8self", "ri8selfIRIP1", "ri8sib")) {
        n.str <- substr(x$type, 3, 3)
        if(substr(x$type, 4, min(6, nchar(x$type)))=="sib") crosstype <- "sib-mating"
        else crosstype <- "selfing"
        print.genotypes <- FALSE
        cat("    ", n.str, "-way RIL by ", crosstype, "\n\n", sep="")
    }
    else if(x$type=="bcsft") cat(paste("    BC(", x$cross.scheme[1], ")F(", x$cross.scheme[2], ") cross\n\n", sep = ""))
    else if(x$type %in% c("bgmagic16")) {
        n.str <- 16
        print.genotypes <- FALSE
        cat("    ", n.str, "-way Biogemma MAGIC lines\n\n", sep="")
    }
    else cat("    cross", x$type, "\n\n",sep=" ")

    cat("    No. individuals:   ", x$n.ind,"\n\n")
    cat("    No. phenotypes:    ", x$n.phe,"\n")

    header <- "                       "
    width <- options("width")$width

    cat("    Percent phenotyped:")

    ######################################################################
    # function to print things nicely
    printnicely <-
        function(thetext, header, width, sep=" ")
        {
            nleft <- width - nchar(header)
            nsep <- nchar(sep)
            if(length(thetext) < 2) cat("", thetext, "\n", sep=sep)
            else {
                z <- paste("", thetext[1], sep=sep, collapse=sep)
                for(j in 2:length(thetext)) {
                    if(nchar(z) + nsep + nchar(thetext[j]) > nleft) {
                        cat(z, "\n")
                        nleft <- width
                        z <- paste(header, thetext[j], sep=sep)
                    }
                    else {
                        z <- paste(z, thetext[j], sep=sep)
                    }
                }
                cat(z, "\n")
            }
        }
    ######################################################################
    # function to pre-pad with spaces
    pad_w_spaces <-
        function(x, width, pre=TRUE)
        {
            padding <- vapply(nchar(x), function(n) paste(rep(" ", width-n), collapse=""), "")
            if(pre) return(paste0(padding, x))
            paste0(x, padding)
        }
    ######################################################################

    printnicely(round((1-x$missing.phe)*100,1), header, width)
    cat("\n")

    cat("    No. chromosomes:   ", x$n.chr,"\n")
    if(!is.null(x$autosomes)) {
        cat("        Autosomes:     ")
        printnicely(x$autosomes, header, width)
    }
    if(!is.null(x$Xchr)) {
        cat("        X chr:         ")
        printnicely(x$Xchr, header, width)
    }
    cat("\n")
    cat("    Total markers:     ", sum(x$n.mar), "\n")
    cat("    No. markers:       ")
    printnicely(x$n.mar, header, width)
    cat("    Percent genotyped: ", round((1-x$missing.gen)*100,1), "\n")
    if(print.genotypes) {
        cat("    Genotypes (%):    ")
        header <- "                      "
        if(!is.null(x$typing.freq.X)) {
            # contortions to line things up
            roundedX <- sprintf("%.1f", x$typing.freq.X*100)

            genoX <- paste(names(x$typing.freq.X),roundedX,sep=":")
            if(!is.null(x$typing.freq.A)) {
                roundedA <- sprintf("%.1f", x$typing.freq.A*100)
                genoA <- paste(names(x$typing.freq.A),roundedA,sep=":")

                # line up values
                maxchar <- max(nchar(c(roundedA, roundedX)))
                roundedA <- pad_w_spaces(roundedA, maxchar, pre=FALSE)
                roundedX <- pad_w_spaces(roundedX, maxchar, pre=FALSE)
                genoX <- paste(names(x$typing.freq.X),roundedX,sep=":")
                genoA <- paste(names(x$typing.freq.A),roundedA,sep=":")

                # line things up again
                maxchar <- max(nchar(c(genoA, genoX)))
                genoA <- pad_w_spaces(genoA, maxchar)
                genoX <- pad_w_spaces(genoX, maxchar)

                cat("\n          Autosomes:  ")
                printnicely(genoA, header, width, "  ")
                cat("       X chromosome:  ")
            }
            printnicely(genoX, header, width, "  ")
        } else {
            if(!is.null(x$typing.freq.A) && !is.null(x$typing.freq))
                x$typing.freq <- x$typing.freq.A
            geno <- paste(names(x$typing.freq),sprintf("%.1f", x$typing.freq*100), sep=":")
            printnicely(geno, header, width, "  ")
        }
    }
}



nind <-
    function(object)
{
    if(!inherits(object, "cross"))
        stop("Input should have class \"cross\".")

    n.ind1 <- nrow(object$pheno)
    n.ind2 <- sapply(object$geno,function(x) nrow(x$data))
    if(any(n.ind2 != n.ind1))
        stop("Different numbers of individuals in genotypes and phenotypes.")
    n.ind1
}

nchr <-
    function(object)
{
    if(inherits(object, "map")) {
        return(length(object))
    } else if(inherits(object, "cross")) {
        return(length(object$geno))
    } else {
        # neither cross nor map object
        stop("Input should have class \"cross\" or \"map\".")
    }
}

nmar <-
    function(object)
{
    if(inherits(object, "map")) {
        if(is.matrix(object[[1]]))
            return(sapply(object, function(x) ncol(x)))
        else return(sapply(object, function(x) length(x)))
    }
    else if(!inherits(object, "cross")) {
        stop("Input should have class \"cross\" or \"map\".")
    }


    if(length(object$geno) == 0)
        stop("There is no genotype data.")

    if(!is.matrix(object$geno[[1]]$map))
        n.mar1 <- sapply(object$geno, function(x) length(x$map))
    else # sex-specific maps
        n.mar1 <- sapply(object$geno, function(x) ncol(x$map))

    n.mar2 <- sapply(object$geno, function(x) ncol(x$data))
    if(any(n.mar1 != n.mar2))
        stop("Different numbers of markers in genotypes and maps.")
    n.mar1
}

totmar <-
    function(object)
{
    if(inherits(object, "map")) {
        if(is.matrix(object[[1]]))
            return(sum(sapply(object, function(x) ncol(x))))
        else return(sum(sapply(object, function(x) length(x))))
    } else if(!inherits(object, "cross")) {
        stop("Input should have class \"cross\" or \"map\".")
    }

    if(length(object$geno) == 0)
        stop("There is no genotype data.")

    if(!is.matrix(object$geno[[1]]$map))
        totmar1 <- sum(sapply(object$geno, function(x) length(x$map)))
    else # sex-specific maps
        totmar1 <- sum(sapply(object$geno, function(x) ncol(x$map)))
    totmar2 <- sum(sapply(object$geno, function(x) ncol(x$data)))
    if(totmar1 != totmar2)
        stop("Different numbers of markers in genotypes and maps.")
    totmar1
}

nphe <-
    function(object)
{
    if(!inherits(object, "cross"))
        stop("Input should have class \"cross\".")

    ncol(object$pheno)
}

# count number of missing genotypes for each individual or each marker
nmissing <-
    function(cross,what=c("ind","mar"))
{
    if(!inherits(cross, "cross"))
        stop("Input should have class \"cross\".")

    what <- match.arg(what)

    if(what=="ind") {
        n.missing <- rep(0,nind(cross))
        for(i in 1:nchr(cross))
            n.missing <- n.missing +
                apply(cross$geno[[i]]$data,1,function(a) sum(is.na(a)))

        # individual IDs
        id <- getid(cross)
        if(!is.null(id)) names(n.missing) <- id
    }
    else {
        n.missing <- NULL
        for(i in 1:nchr(cross))
            n.missing <- c(n.missing,
                           apply(cross$geno[[i]]$data,2,function(a) sum(is.na(a))))
    }

    n.missing
}


# like nmissing, but for the opposite value
ntyped <-
    function(cross, what=c("ind","mar"))
{
    if(!inherits(cross, "cross"))
        stop("Input should have class \"cross\".")

    what <- match.arg(what)

    if(what=="ind") n <- totmar(cross)
    else n <- nind(cross)

    n - nmissing(cross, what)
}

# "print" method for cross object
#
# to avoid ever printing the entire object, print just a little
#     warning message and then the summary
print.cross <-
    function(x, ...)
{
    cat("  This is an object of class \"cross\".\n")
    cat("  It is too complex to print, so we provide just this summary.\n")
    print(summary(x))
    return(summary(x))
}


# get chromosome lengths
chrlen <-
    function(object)
{
    if(!inherits(object, "map") && !inherits(object, "cross"))
        stop("Input should have class \"map\" or \"cross\".")

    if(!inherits(object, "map"))
        x <- pull.map(object)
    else x <- object

    if(is.matrix(x[[1]]))
        return(sapply(x, apply, 1, function(a) diff(range(a))))

    sapply(x, function(a) diff(range(a)))
}


# end of summary.cross.R
kbroman/qtl documentation built on Jan. 13, 2024, 10:14 p.m.