tests/testthat/test-linreg.R

context("linear regression")
library(qtl)

test_that("lin regr works for simple example", {

    set.seed(27343534)

    n <- 1000
    x <- rnorm(n, 50, 10)
    X <- cbind(1, x)
    y <- 30 + 0.5*x + rnorm(n, 0, 2.5)
    Y <- as.matrix(y)

    lm.out <- lm(y ~ x)
    resid <- lm.out$resid
    names(resid) <- NULL
    rss <- sum(resid^2)

    # QR-based regression
    qrFit <- fit_linreg_eigenqr(X, y, TRUE)
    expect_equal(rss, qrFit$rss)
    expect_equivalent(lm.out$coef, qrFit$coef)
    expect_equivalent(lm.out$fitted, qrFit$fitted)
    expect_equivalent(summary(lm.out)$coef[,2], qrFit$SE)

    qrRSS <- calc_rss_eigenqr(X, y)
    expect_equal(rss, qrRSS)

    # eigen residuals
    eigenqr_resid <- calc_resid_eigenqr(X,Y)
    expect_equal(eigenqr_resid, as.matrix(resid))
    eigenchol_resid <- calc_resid_eigenqr(X,Y)
    expect_equal(eigenchol_resid, as.matrix(resid))

    # generic
    linreg_rss <- calc_rss_linreg(X,Y)
    expect_equal(linreg_rss, rss)
    linreg_resid <- calc_resid_linreg(X,Y)
    expect_equal(linreg_resid, as.matrix(resid))

    # just the coefficients
    coef <- calc_coef_linreg(X,Y)
    expect_equal(coef, as.numeric(lm.out$coef))

    # coefficients and SEs
    coefSE <- calc_coefSE_linreg(X,Y)
    expected <- summary(lm.out)$coef
    expected <- list(coef=as.numeric(expected[,1]),
                     SE=as.numeric(expected[,2]))
    expect_equal(coefSE, expected)

})

test_that("lin regr works for reduced-rank example", {

    dd <- data.frame(f1 = gl(4, 6, labels = LETTERS[1:4]),
                     f2 = gl(3, 2, labels = letters[1:3]))[-(7:8), ]
    mm <- model.matrix(~ f1*f2, dd)
    y <- mm %*% seq_len(ncol(mm)) + rnorm(nrow(mm), sd = 0.1)
    Y <- as.matrix(y)

    lm.out <- lm(y ~ -1 + mm)
    resid <- lm.out$resid
    names(resid) <- NULL
    rss <- sum(resid^2)
    lm_se <- lm.out$coef
    lm_se[!is.na(lm_se)] <- summary(lm.out)$coef[,2]

    # (cholesky-based regression won't work here)

    # QR-based regression
    qrFit <- fit_linreg_eigenqr(mm, y, TRUE)
    expect_equal(rss, qrFit$rss)
    expect_equivalent(lm.out$fitted, qrFit$fitted)
    lm_reduced <- lm(y ~ -1 + mm[,!is.na(qrFit$coef)])
    lm_reduced_se <- lm_reduced$coef
    lm_reduced_se <- summary(lm_reduced)$coef[,2]
    expect_equivalent(lm_reduced$coef, qrFit$coef[!is.na(qrFit$coef)])
    expect_equivalent(lm_reduced_se, qrFit$SE[!is.na(qrFit$coef)])

    qrRSS <- calc_rss_eigenqr(mm, y)
    expect_equal(rss, qrRSS)

    # eigen resid
    eigenqr_resid <- calc_resid_eigenqr(mm, Y)
    expect_equal(eigenqr_resid, as.matrix(resid))

    # generic
    linreg_rss <- calc_rss_linreg(mm,Y)
    expect_equal(linreg_rss, rss)
    linreg_resid <- calc_resid_linreg(mm,Y)
    expect_equal(linreg_resid, as.matrix(resid))

    # just the coefficients
    coef <- calc_coef_linreg(mm, y)
    expect_equivalent(coef[!is.na(coef)], lm_reduced$coef)

    # coefficients and SEs
    coefSE <- calc_coefSE_linreg(mm, y)
    expect_equivalent(coefSE$coef[!is.na(coefSE$coef)], lm_reduced$coef)
    expect_equivalent(coefSE$SE[!is.na(coefSE$coef)], lm_reduced_se)

})


test_that("lin regr works in a QTL situation", {

    data(hyper)
    hyper <- calc.genoprob(hyper, step=1, err=0.001)
    qtl <- makeqtl(hyper, c(1,4,6,15), c(68.3, 30, 66.7, 17.5),
                   what="prob")

    n <- nind(hyper)
    y <- hyper$pheno[,1]
    Y <- as.matrix(y)
    X <- matrix(ncol=6, nrow=n)
    X[,1] <- 1
    for(i in 2:5) X[,i] <- qtl$prob[[i-1]][,2]
    X[,6] <- X[,4]*X[,5]

    lodfull <- fitqtl(hyper, qtl=qtl, method="hk", dropone=FALSE,
                      formula=y~q1+q2+q3*q4)$lod
    lodadd <- fitqtl(hyper, qtl=qtl, method="hk", dropone=FALSE,
                     formula=y~q1+q2+q3+q4)$lod
    lod14 <- fitqtl(hyper, qtl=qtl, method="hk", dropone=FALSE,
                    formula=y~q1+q2)$lod
    lod4 <- fitqtl(hyper, qtl=qtl, method="hk", dropone=FALSE,
                    formula=y~q2)$lod

    # eigen qr
    rss0 <- calc_rss_eigenqr(X[,1,drop=FALSE], y)
    expect_equal(rss0, sum(lm(y~1)$resid^2))

    rssfull <- calc_rss_eigenqr(X, y)
    expect_equal(rssfull, sum(lm(y ~ X)$resid^2))
    expect_equal(lodfull, (n/2)*log10(rss0/rssfull))

    rssadd <- calc_rss_eigenqr(X[,1:5], y)
    expect_equal(rssadd, sum(lm(y ~ X[,2:5])$resid^2))
    expect_equal(lodadd, (n/2)*log10(rss0/rssadd))

    rss14 <- calc_rss_eigenqr(X[,1:3], y)
    expect_equal(rss14, sum(lm(y ~ X[,2:3])$resid^2))
    expect_equal(lod14, (n/2)*log10(rss0/rss14))

    rss4 <- calc_rss_eigenqr(X[,c(1,3)], y)
    expect_equal(rss4, sum(lm(y ~ X[,3,drop=FALSE])$resid^2))
    expect_equal(lod4, (n/2)*log10(rss0/rss4))

})

test_that("lin regr works for multiple columns", {

    set.seed(27343534)

    n <- 1000
    x <- rnorm(n, 50, 10)
    X <- cbind(1, x)
    y <- 30 + 0.5*x + rnorm(n, 0, 2.5)

    ncolY <- 10
    Y <- permute_nvector(10, y)

    resid <- lm.fit(X,Y)$resid
    lm.rss <- colSums(resid^2)

    # RSS
    expect_equal(lm.rss, calc_mvrss_eigenchol(X, Y))
    expect_equal(lm.rss, calc_mvrss_eigenqr(X, Y))

    # residuals
    expect_equal(resid, calc_resid_eigenqr(X, Y))
    expect_equal(resid, calc_resid_eigenchol(X, Y))

    # generic
    expect_equal(lm.rss, calc_rss_linreg(X, Y))
    expect_equal(as.matrix(resid), calc_resid_linreg(X, Y))

})

test_that("lin regr works for multiple columns, reduced-rank X", {

    dd <- data.frame(f1 = gl(4, 6, labels = LETTERS[1:4]),
                     f2 = gl(3, 2, labels = letters[1:3]))[-(7:8), ]
    mm <- model.matrix(~ f1*f2, dd)
    y <- mm %*% seq_len(ncol(mm)) + rnorm(nrow(mm), sd = 0.1)
    Y <- permute_nvector(10, y)

    resid <- lm.fit(mm, Y)$resid
    lm.rss <- colSums(resid^2)

    # RSS
    expect_equal(lm.rss, calc_mvrss_eigenqr(mm, Y))

    # resid
    expect_equal(resid, calc_resid_eigenqr(mm, Y))

    # generic
    expect_equal(lm.rss, calc_rss_linreg(mm, Y))
    expect_equal(as.matrix(resid), calc_resid_linreg(mm, Y))
})


test_that("calculation of residuals for 3d arrays works", {

    library(qtl)
    data(hyper)
    hyper <- hyper[1,]
    hyper2 <- convert2cross2(hyper)
    map <- insert_pseudomarkers(hyper2$gmap, step=1)
    pr <- calc_genoprob(hyper2, map, error_prob=0.002)
    pr <- aperm(pr[[1]], c(1,3,2)) # reorient to have genomic position last

    # residuals with intercept plus the phenotype
    X <- cbind(1, hyper$pheno[,1])
    expected <- array(0, dim=dim(pr))
    for(i in 1:dim(pr)[3])
        expected[,,i] <- lm(pr[,,i] ~ X)$resid

    resid <- calc_resid_linreg_3d(X, pr)
    expect_equal(expected, resid)

    # residuals with just the intercept
    expected <- array(0, dim=dim(pr))
    for(i in 1:dim(pr)[3])
        expected[,,i] <- lm(pr[,,i] ~ 1)$resid

    resid <- calc_resid_linreg_3d(X[,1,drop=FALSE], pr)
    expect_equal(expected, resid)

})
rqtl/qtl2 documentation built on March 20, 2024, 6:35 p.m.