tests/testthat/test-fit1_binary.R

context("fit1 for binary traits")

test_that("fit1 for binary traits works in intercross", {

    iron <- read_cross2(system.file("extdata", "iron.zip", package="qtl2"))
    iron <- iron[,c(18:19,"X")]
    map <- insert_pseudomarkers(iron$gmap, step=1)
    probs <- calc_genoprob(iron, map, error_prob=0.002)

    pheno <- setNames(as.numeric(iron$pheno[,1] > median(iron$pheno[,1])),
                      ind_ids_pheno(iron))
    covar <- match(iron$covar$sex, c("f", "m")) # make numeric
    names(covar) <- rownames(iron$covar)
    Xcovar <- get_x_covar(iron)

    # calculate LOD scores
    out <- scan1(probs, pheno, addcovar=covar, Xcovar=Xcovar, model="binary")

    # estimate coefficients; no covariates for X chromosome
    coef <- lapply(seq_len(length(probs)), function(i) {
        if(i==3) cov <- NULL
        else cov <- covar
        scan1coef(subset(probs, chr=names(probs)[i]), pheno, addcovar=cov, model="binary", zerosum=FALSE) })

    # fit1, no missing data
    npos <- sapply(probs, function(a) dim(a)[3])
    pmar <- c(3, 4, 12)
    out_fit1 <- lapply(seq(along=pmar),
                       function(i) {
        if(i==3) { nullcov <- Xcovar; cov <- NULL } # need Xcovar under null on X chr but no other covariates
        else { nullcov <- NULL; cov <- covar }      # sex as covariate; no additional covariates under null
        fit1(probs[[i]][,,pmar[i]], pheno, addcovar=cov, nullcovar=nullcov, model="binary", zerosum=FALSE) })

    pos <- cumsum(c(0, npos[-3])) + pmar
    # check LOD vs scan1, plus ind'l contributions to LOD
    for(i in 1:3) {
        expect_equal(out_fit1[[i]]$lod, out[pos[i],1])
        expect_equal(sum(out_fit1[[i]]$ind_lod), out_fit1[[i]]$lod)
    }

    # check coefficients
    for(i in 1:3)
        expect_equal(out_fit1[[i]]$coef, coef[[i]][pmar[i],])

    # repeat the whole thing with a couple of missing phenotypes
    pheno[c(187, 244)] <- NA

    # calculate LOD scores
    out <- scan1(probs, pheno, addcovar=covar, Xcovar=Xcovar, model="binary")

    # estimate coefficients; no covariates for X chromosome
    coef <- lapply(seq_len(length(probs)), function(i) {
        if(i==3) cov <- NULL
        else cov <- covar
        scan1coef(subset(probs, chr=names(probs)[i]), pheno, addcovar=cov, se=TRUE, model="binary", zerosum=FALSE) })

    # fit1, missing data
    out_fit1 <- lapply(seq(along=pmar),
                       function(i) {
        if(i==3) { nullcov <- Xcovar; cov <- NULL } # need Xcovar under null on X chr but no other covariates
        else { nullcov <- NULL; cov <- covar }      # sex as covariate; no additional covariates under null
        fit1(probs[[i]][,,pmar[i]], pheno, addcovar=cov, nullcovar=nullcov, se=TRUE, model="binary", zerosum=FALSE) })

    # check LOD vs scan1, plus ind'l contributions to LOD
    for(i in 1:3) {
        expect_equal(out_fit1[[i]]$lod, out[pos[i],1])
        expect_equal(sum(out_fit1[[i]]$ind_lod), out_fit1[[i]]$lod)
    }

    # check coefficients
    for(i in 1:3)
        expect_equal(out_fit1[[i]]$coef, coef[[i]][pmar[i],])

    # check SEs
    for(i in 1:3)
        expect_equal(out_fit1[[i]]$SE, attr(coef[[i]], "SE")[pmar[i],])

    # direct calculations, chr 18
    glm0 <- glm(pheno ~ covar, family=binomial(link=logit))
    X <- cbind(probs[[1]][,,pmar[1]], covar)
    colnames(X) <- c("SS", "SB", "BB", "ac1")
    glm1 <- glm(pheno ~ -1 + X, family=binomial(link=logit))
    glm_lod <- (glm1$deviance - glm0$deviance)/(-2*log(10))
    p1 <- glm1$fitted
    p0 <- glm0$fitted
    y <- pheno[!is.na(pheno)]
    glm_ind_lod <- (y * log10(p1) + (1-y)*log10(1-p1)) -
        (y * log10(p0) + (1-y)*log10(1-p0))

    expect_equal(out_fit1[[1]]$lod, glm_lod)
    expect_equal(out_fit1[[1]]$ind_lod, glm_ind_lod)

    expect_equal(out_fit1[[1]]$coef, stats::setNames(glm1$coef, c("SS", "SB", "BB", "ac1")))
    expect_equal(out_fit1[[1]]$SE, stats::setNames(summary(glm1)$coef[,2], c("SS", "SB", "BB", "ac1")), tol=1e-6)

    # direct calculations, chr X
    glm0 <- glm(pheno ~ Xcovar, family=binomial(link=logit))
    X <- probs[[3]][,,pmar[3]]
    colnames(X) <- c("SS", "SB", "BS", "BB", "SY", "BY")
    glm1 <- glm(pheno ~ -1 + X, family=binomial(link=logit))
    glm_lod <- (glm1$deviance - glm0$deviance)/(-2*log(10))
    p1 <- glm1$fitted
    p0 <- glm0$fitted
    y <- pheno[!is.na(pheno)]
    glm_ind_lod <- (y * log10(p1) + (1-y)*log10(1-p1)) -
        (y * log10(p0) + (1-y)*log10(1-p0))

    expect_equal(out_fit1[[3]]$lod, glm_lod)
    expect_equal(out_fit1[[3]]$ind_lod, glm_ind_lod)
    expect_equal(out_fit1[[3]]$coef, stats::setNames(glm1$coef, c("SS", "SB", "BS", "BB", "SY", "BY")))
    expect_equal(out_fit1[[3]]$SE, stats::setNames(summary(glm1)$coef[,2], c("SS", "SB", "BS", "BB", "SY", "BY")), tol=1e-6)

})


test_that("fit1 by H-K works in riself", {

    grav2 <- read_cross2(system.file("extdata", "grav2.zip", package="qtl2"))
    grav2 <- grav2[,4:5]
    map <- insert_pseudomarkers(grav2$gmap, step=1)
    probs <- calc_genoprob(grav2, map, error_prob=0.002)

    pheno <- setNames(as.numeric(grav2$pheno[,219] > 101.25), ind_ids_pheno(grav2))

    # calculate LOD scores
    out <- scan1(probs, pheno, model="binary")

    # estimate coefficients
    coef <- lapply(seq_len(length(probs)), function(i) scan1coef(subset(probs, chr=names(probs)[i]), pheno, model="binary", zerosum=FALSE))

    # fit1, no missing data
    npos <- sapply(probs, function(a) dim(a)[3])
    pmar <- c(1, 172)
    out_fit1 <- lapply(seq(along=pmar), function(i) fit1(probs[[i]][,,pmar[i]], pheno, model="binary", zerosum=FALSE))

    pos <- c(0,npos[1]) + pmar
    # check LOD vs scan1, plus ind'l contributions to LOD
    for(i in 1:2) {
        expect_equal(out_fit1[[i]]$lod, out[pos[i],1])
        expect_equal(sum(out_fit1[[i]]$ind_lod), out_fit1[[i]]$lod)
    }

    # check coefficients
    for(i in 1:2)
        expect_equal(out_fit1[[i]]$coef, coef[[i]][pmar[i],])

    # repeat the whole thing with a couple of missing phenotypes
    pheno[c(24, 106)] <- NA

    # calculate LOD scores
    out <- scan1(probs, pheno, model="binary")

    # estimate coefficients
    coef <- lapply(seq_len(length(probs)), function(i) scan1coef(subset(probs, chr=names(probs)[i]), pheno, se=TRUE, model="binary", zerosum=FALSE))

    # fit1, missing data
    out_fit1 <- lapply(seq(along=pmar), function(i) fit1(probs[[i]][,,pmar[i]], pheno, se=TRUE, model="binary", zerosum=FALSE))

    # check LOD vs scan1, plus ind'l contributions to LOD
    for(i in 1:2) {
        expect_equal(out_fit1[[i]]$lod, out[pos[i],1])
        expect_equal(sum(out_fit1[[i]]$ind_lod), out_fit1[[i]]$lod)
    }

    # check coefficients
    for(i in 1:2)
        expect_equal(out_fit1[[i]]$coef, coef[[i]][pmar[i],])


    # check SEs
    for(i in 1:2)
        expect_equal(out_fit1[[i]]$SE, attr(coef[[i]], "SE")[pmar[i],])

    # direct calculations, chr 18
    glm0 <- glm(pheno ~ 1, family=binomial(link=logit))
    X <- probs[[1]][,,pmar[1]]
    colnames(X) <- c("LL", "CC")
    glm1 <- glm(pheno ~ -1 + X, family=binomial(link=logit))
    glm_lod <- (glm1$deviance - glm0$deviance)/(-2*log(10))
    p1 <- glm1$fitted
    p0 <- glm0$fitted
    y <- pheno[!is.na(pheno)]
    glm_ind_lod <- (y * log10(p1) + (1-y)*log10(1-p1)) -
        (y * log10(p0) + (1-y)*log10(1-p0))

    expect_equal(out_fit1[[1]]$lod, glm_lod)
    expect_equal(out_fit1[[1]]$ind_lod, glm_ind_lod)
    expect_equal(out_fit1[[1]]$coef, stats::setNames(glm1$coef, c("LL", "CC")))
    expect_equal(out_fit1[[1]]$SE, stats::setNames(summary(glm1)$coef[,2], c("LL", "CC")), tol=1e-6)

})

test_that("fit1 works for binary traits with weights", {

    set.seed(17262911)

    iron <- read_cross2(system.file("extdata", "iron.zip", package="qtl2"))
    iron <- iron[,c(2,"X")]
    map <- insert_pseudomarkers(iron$gmap, step=1)
    probs <- calc_genoprob(iron, map, err=0.002)

    phe <- iron$pheno[,1]
    phe <- setNames(as.numeric(phe > quantile(phe, 0.7)),
                    ind_ids(iron))
    phe[c(108,142,268)] <- NA
    weights <- setNames(sample(1:10, n_ind(iron), replace=TRUE), names(phe))

    npos <- dim(probs)[3,]
    pos <- sapply(npos, sample, 1)
    pr <- list(probs[[1]][,,pos[1]],
               probs[[2]][,,pos[2]])

    out_fit1_1 <- fit1(pr[[1]], phe, model="binary", se=TRUE, weights=weights, zerosum=FALSE)
    out_fit1_2 <- fit1(pr[[2]], phe, model="binary", se=TRUE, weights=weights, zerosum=FALSE)

    # coefficients and SEs
    co2 <- scan1coef(probs[,"2"], phe, model="binary", se=TRUE, weights=weights, zerosum=FALSE)
    expect_equal(out_fit1_1$coef, co2[pos[1],])
    expect_equal(out_fit1_1$SE, attr(co2, "SE")[pos[1],])
    coX <- scan1coef(probs[,"X"], phe, model="binary", se=TRUE, weights=weights, zerosum=FALSE)
    expect_equal(out_fit1_2$coef, coX[pos[2],])
    expect_equal(out_fit1_2$SE, attr(coX, "SE")[pos[2],])

    # lod
    out <- scan1(probs, phe, model="binary", weights=weights)
    expect_equal(out_fit1_1$lod, out[pos[1]])
    expect_equal(out_fit1_2$lod, out[npos[1] + pos[2]])

    # compare to glm
    out_glm_1 <- glm(phe ~ -1 + pr[[1]], family=binomial(link=logit), weights=weights,
                     control=list(epsilon=1e-11))
    out_glm_2 <- glm(phe ~ -1 + pr[[2]], family=binomial(link=logit), weights=weights,
                     control=list(epsilon=1e-11))
    out_glm_0 <- glm(phe ~ 1, family=binomial(link=logit), weights=weights)
    sum_glm_1 <- summary(out_glm_1)
    sum_glm_2 <- summary(out_glm_2)

    # function to calc lod
    lod_glm <- function(out_glm_alt, out_glm_null)
        (out_glm_null$deviance - out_glm_alt$deviance)/(2*log(10))

    # lod scores
    expect_equivalent(out_fit1_1$lod, lod_glm(out_glm_1, out_glm_0))
    expect_equivalent(out_fit1_2$lod, lod_glm(out_glm_2, out_glm_0))

    # coefficients and SEs
    expect_equivalent(out_fit1_1$coef, out_glm_1$coef)
    expect_equivalent(out_fit1_2$coef, out_glm_2$coef)
    expect_equivalent(out_fit1_1$SE, sum_glm_1$coef[,2])
    expect_equivalent(out_fit1_2$SE, sum_glm_2$coef[,2])

    # add a covariate
    X <- setNames(rnorm(n_ind(iron)), names(phe))

    out_fit1_1 <- fit1(pr[[1]], phe, model="binary", se=TRUE, weights=weights, addcovar=X, zerosum=FALSE)
    out_fit1_2 <- fit1(pr[[2]], phe, model="binary", se=TRUE, weights=weights, addcovar=X, zerosum=FALSE)

    # coefficients and SEs
    co2 <- scan1coef(probs[,"2"], phe, model="binary", se=TRUE, weights=weights, addcovar=X, zerosum=FALSE)
    expect_equal(out_fit1_1$coef, co2[pos[1],])
    expect_equal(out_fit1_1$SE, attr(co2, "SE")[pos[1],])
    coX <- scan1coef(probs[,"X"], phe, model="binary", se=TRUE, weights=weights, addcovar=X, zerosum=FALSE)
    expect_equal(out_fit1_2$coef, coX[pos[2],])
    expect_equal(out_fit1_2$SE, attr(coX, "SE")[pos[2],])

    # lod
    out <- scan1(probs, phe, model="binary", weights=weights, addcovar=X)
    expect_equal(out_fit1_1$lod, out[pos[1]])
    expect_equal(out_fit1_2$lod, out[npos[1] + pos[2]])

    # compare to glm
    out_glm_1 <- glm(phe ~ -1 + pr[[1]] + X, family=binomial(link=logit), weights=weights,
                     control=list(epsilon=1e-11))
    out_glm_2 <- glm(phe ~ -1 + pr[[2]] + X, family=binomial(link=logit), weights=weights,
                     control=list(epsilon=1e-11))
    out_glm_0 <- glm(phe ~ X, family=binomial(link=logit), weights=weights)
    sum_glm_1 <- summary(out_glm_1)
    sum_glm_2 <- summary(out_glm_2)

    # lod scores
    expect_equivalent(out_fit1_1$lod, lod_glm(out_glm_1, out_glm_0))
    expect_equivalent(out_fit1_2$lod, lod_glm(out_glm_2, out_glm_0))

    # coefficients and SEs
    expect_equivalent(out_fit1_1$coef, out_glm_1$coef)
    expect_equivalent(out_fit1_2$coef, out_glm_2$coef)
    expect_equivalent(out_fit1_1$SE, sum_glm_1$coef[,2])
    expect_equivalent(out_fit1_2$SE, sum_glm_2$coef[,2])

    # interactive covariate, autosome only
    out_fit1_1 <- fit1(pr[[1]], phe, model="binary", se=TRUE, weights=weights, addcovar=X, intcovar=X, zerosum=FALSE)

    # coefficients and SEs
    co2 <- scan1coef(probs[,"2"], phe, model="binary", se=TRUE, weights=weights, addcovar=X, intcovar=X, zerosum=FALSE)
    expect_equal(out_fit1_1$coef, co2[pos[1],])
    expect_equal(out_fit1_1$SE, attr(co2, "SE")[pos[1],])

    # lod
    out <- scan1(probs, phe, model="binary", weights=weights, addcovar=X, intcovar=X)
    expect_equal(out_fit1_1$lod, out[pos[1]])

    # compare to glm
    out_glm_1 <- glm(phe ~ -1 + pr[[1]] + X + I(pr[[1]][,2]*X) + I(pr[[1]][,3]*X),
                     family=binomial(link=logit), weights=weights,
                     control=list(epsilon=1e-11))
    out_glm_0 <- glm(phe ~ X, family=binomial(link=logit), weights=weights)
    sum_glm_1 <- summary(out_glm_1)

    # lod scores
    expect_equivalent(out_fit1_1$lod, lod_glm(out_glm_1, out_glm_0))

    # coefficients and SEs
    expect_equivalent(out_fit1_1$coef, out_glm_1$coef)
    expect_equivalent(out_fit1_1$SE, sum_glm_1$coef[,2])

})

test_that("fit one for binary traits handles NA case", {

    iron <- read_cross2(system.file("extdata", "iron.zip", package="qtl2"))
    iron <- iron[,c("2", "X")]
    map <- insert_pseudomarkers(iron$gmap, step=1)
    probs <- calc_genoprob(iron, map, err=0.002)

    phe <- iron$pheno[,1]
    phe <- setNames(as.numeric(phe > quantile(phe, 0.95)),
                    ind_ids(iron))

    suppressWarnings(out <- scan1(probs, phe, model="binary"))

    mar <- c("D2Mit304", "DXMit186")
    p1 <- pull_genoprobpos(probs, mar[1])
    p2 <- pull_genoprobpos(probs, mar[2])

    suppressWarnings(ll1a <- glm(phe ~ -1 + p1, family=binomial(link=logit))$deviance)
    suppressWarnings(ll1b <- glm(phe ~ -1 + p2, family=binomial(link=logit))$deviance)
    ll0 <- glm(phe ~ 1, family=binomial(link=logit))$deviance

    expect_equal(out[mar[1],1], (ll0-ll1a)/2/log(10), tol=1e-5)
    expect_equal(out[mar[2],1], (ll0-ll1b)/2/log(10), tol=1e-6)

    # repeat with weights
    set.seed(20180720)
    w <- setNames( runif(length(phe), 1, 5), names(phe))
    suppressWarnings(outw <- scan1(probs, phe, model="binary", weights=w))

    suppressWarnings(ll1a_w <- glm(phe ~ -1 + p1, family=binomial(link=logit), weights=w)$deviance)
    suppressWarnings(ll1b_w <- glm(phe ~ -1 + p2, family=binomial(link=logit), weights=w)$deviance)
    suppressWarnings(ll0_w <- glm(phe ~ 1, family=binomial(link=logit), weights=w)$deviance)

    expect_equal(outw[mar[1],1], (ll0_w-ll1a_w)/2/log(10), tol=1e-4)
    expect_equal(outw[mar[2],1], (ll0_w-ll1b_w)/2/log(10), tol=1e-6)

})

test_that("fit one for binary traits handles NA case with DO data", {

    skip_if(isnt_karl(), "this test only run locally")

    file <- paste0("https://raw.githubusercontent.com/rqtl/",
                   "qtl2data/main/DOex/DOex.zip")
    DOex <- read_cross2(file)
    probs <- calc_genoprob(DOex, error_prob=0.002, cores=2)
    apr <- genoprob_to_alleleprob(probs)

    phe <- setNames((DOex$pheno[,1] > quantile(DOex$pheno[,1], 0.95, na.rm=TRUE))*1, rownames(DOex$pheno))

    suppressWarnings(out <- scan1(apr, phe, model="binary"))

    mar <- c("backupUNC021331957", "UNC020459944")
    p1 <- pull_genoprobpos(apr, mar[1])
    p2 <- pull_genoprobpos(apr, mar[2])

    suppressWarnings(ll1a <- glm(phe ~ -1 + p1, family=binomial(link=logit))$deviance)
    suppressWarnings(ll1b <- glm(phe ~ -1 + p2, family=binomial(link=logit))$deviance)
    ll0 <- glm(phe ~ 1, family=binomial(link=logit))$deviance

    expect_equal(out[mar[1],1], (ll0-ll1a)/2/log(10))
    expect_equal(out[mar[2],1], (ll0-ll1b)/2/log(10))


    # repeat with weights
    set.seed(20180720)
    w <- setNames( runif(length(phe), 1, 5), names(phe))
    suppressWarnings(outw <- scan1(apr, phe, model="binary", weights=w))

    suppressWarnings(ll1a_w <- glm(phe ~ -1 + p1, family=binomial(link=logit), weights=w)$deviance)
    suppressWarnings(ll1b_w <- glm(phe ~ -1 + p2, family=binomial(link=logit), weights=w)$deviance)
    suppressWarnings(ll0_w <- glm(phe ~ 1, family=binomial(link=logit), weights=w)$deviance)

    expect_equal(outw[mar[1],1], (ll0_w-ll1a_w)/2/log(10))
    expect_equal(outw[mar[2],1], (ll0_w-ll1b_w)/2/log(10), tol=1e-6)

})
rqtl/qtl2 documentation built on March 20, 2024, 6:35 p.m.