ligera: LIght GEnetic Robust Association

library(tibble)
library(popkin) # inbr

# simulate X (to share across tests)
# create a simple matrix with random valid data
n <- 20
m <- 1000
# create ancestral allele frequencies
p_anc <- runif( m )
# create genotypes
X <- rbinom( n * m, 2, p_anc )
# turn into matrix
X <- matrix( X, nrow = m, ncol = n )

# redraw fixed loci, seem to be a big problem in my evals
redraw_fixed <- function( X ) {
    x_bar <- rowMeans( X, na.rm = TRUE )
    indexes_fixed <- x_bar == 0 | x_bar == 2
    m_fixed <- sum( indexes_fixed )
    while( m_fixed > 0 ) {
        X[ indexes_fixed, ] <- rbinom( ncol(X) * m_fixed, 2, 0.5 ) # draw from mid frequency, preventing future issues
        # if there were NAs, won't preserve them, whatever
        # re-evaluate
        x_bar <- rowMeans( X, na.rm = TRUE )
        indexes_fixed <- x_bar == 0 | x_bar == 2
        m_fixed <- sum( indexes_fixed )
    }
    return( X )
}
# removed fixed loci, they appear problematic for *_multi methods under X missingness, for unknown reasons
X <- redraw_fixed( X )

# X with missing values
miss <- 0.1
X_miss <- X # copy first
X_miss[ sample( n * m, n * m * miss ) ] <- NA
# no fixed loci here either
X_miss <- redraw_fixed( X_miss )

# true kinship matrix in this unstructured case is I / 2
# don't use that trivial case in examples (fails to test WG bias), but do use its true mean kinship for other things
mean_kinship <- mean( diag( n ) / 2 )

# simulate phenotype (to share across tests)
trait <- rnorm( n )
# to have some significant cases, add genetic effect (
m_causal <- 5
causal_indexes <- sample( m, m_causal )
causal_coeffs <- rnorm( m_causal )
trait <- trait + drop( causal_coeffs %*% X[ causal_indexes, ] )
# matrix version, for code that takes covariates
Y <- cbind( trait, 1 )
# get rid of names for comparisons
dimnames( Y ) <- NULL

# trait with missingness
trait_miss <- trait # copy first
trait_miss[ sample( n, n * miss ) ] <- NA
indexes_ind <- !is.na( trait_miss ) # to test for ind removals
Y_ind_rm <- Y[ indexes_ind , ]
# sadly, loci become fixed when individuals with missingness are removed, so redraw those again as needed
X[ , indexes_ind ] <- redraw_fixed( X[ , indexes_ind ] )
X_miss[ , indexes_ind ] <- redraw_fixed( X_miss[ , indexes_ind ] )

# for ligera2 (full BOLT-like trick)
# construct exact kinship matrix estimate we'll test in trick version
# NOTE: this all requires that X have no NAs
x_bar <- rowMeans( X )
b <- (1 - mean( x_bar * ( 2 - x_bar ) ) - mean_kinship ) / ( 1 - mean_kinship )
kinship_est <- ( crossprod( X - 1 ) / m - b ) / ( 1 - b ) # here we do normalize properly for a plot
kinship_est_inv <- solve( kinship_est )
inbr_est <- inbr( kinship_est ) # for a comparison
# a basic validation
expect_true( !anyNA( inbr_est ) )

# version of b and inbr_est with ind removals (but no X missingness, first)
x_bar <- rowMeans( X[ , indexes_ind ] ) # overwrite x_bar, not reused elsewhere
b_ind_rm <- mean( x_bar * ( 2 - x_bar ) ) # compute in two parts for troubleshooting
b_ind_rm <- (1 - b_ind_rm - mean_kinship ) / ( 1 - mean_kinship )
kinship_est_ind_rm <- ( crossprod( X[ , indexes_ind ] - 1 ) / m - b_ind_rm ) / ( 1 - b_ind_rm ) # here we do normalize properly for a plot
inbr_est_ind_rm <- inbr( kinship_est_ind_rm ) # for a comparison

# true product for tests
KY <- kinship_est %*% Y    
# compare to vanilla `solve` (using actual inversion, which is least scalable solution)
Z <- solve( kinship_est, Y )
# a basic validation
expect_true( !anyNA( Z ) )

# versions with ind removals
# true product for tests
KY_ind_rm <- kinship_est_ind_rm %*% Y_ind_rm
# compare to vanilla `solve` (using actual inversion, which is least scalable solution)
Z_ind_rm <- solve( kinship_est_ind_rm, Y_ind_rm )

test_that("trait missingness is as desired", {
    # I had simulated this wrong, it was terrible, it deserves its own tests
    n_ind_kept <- sum( !is.na( trait_miss ) )
    # we should have repeated drawing above, until there was at least one NA, so this is a strict inequality
    expect_true( n_ind_kept < n )
    # check that the other items above agree
    expect_equal( n_ind_kept, sum( indexes_ind ) )
    expect_equal( n_ind_kept, nrow( Y_ind_rm ) )
    expect_equal( n_ind_kept, ncol( kinship_est_ind_rm ) )
    expect_equal( n_ind_kept, nrow( kinship_est_ind_rm ) )
    expect_equal( n_ind_kept, length( inbr_est_ind_rm ) )
    expect_equal( n_ind_kept, nrow( KY_ind_rm ) )
    expect_equal( n_ind_kept, nrow( Z_ind_rm ) )
    # other basic validations
    expect_true( !anyNA( b_ind_rm ) )
    expect_true( !anyNA( kinship_est_ind_rm ) )
    expect_true( !anyNA( inbr_est_ind_rm ) )
    expect_true( !anyNA( KY_ind_rm ) )
    expect_true( !anyNA( Z_ind_rm ) )
})

# missingness versions
x_bar_miss <- rowMeans( X_miss, na.rm = TRUE )
b_miss <- (1 - mean( x_bar_miss * ( 2 - x_bar_miss ), na.rm = TRUE ) - mean_kinship ) / ( 1 - mean_kinship )
expect_true( !is.na( b_miss ) )
# this one takes more steps
# missing values are just treated as zeroes internally, if missingness is random and uninformative then we're fine
Xc_miss <- X_miss - 1
Xc_miss[ is.na( Xc_miss ) ] <- 0
kinship_est_miss <- ( crossprod( Xc_miss ) / m - b_miss ) / ( 1 - b_miss )
# inbreding doesn't come from kinship under missigness, do things much more carefully actually!
# inbr_est_miss <- inbr( kinship_est_miss ) # WRONG in this implementation
inbr_est_miss <- colMeans( ( X_miss - 1 )^2, na.rm = TRUE ) # the first pass calculation
inbr_est_miss <- ( 2 * inbr_est_miss - 1 - b_miss ) / ( 1 - b_miss ) # this completes normalization
# a basic validation
expect_true( !anyNA( inbr_est_miss ) )
# true product for tests
KY_miss <- kinship_est_miss %*% Y    
# compare to vanilla `solve` (using actual inversion, which is least scalable solution)
Z_miss <- solve( kinship_est_miss, Y )
# a basic validation
expect_true( !anyNA( Z_miss ) )

# simulate some simple covariates
# add two
covar <- cbind(
    rnorm( n ), # random continuous covariate
    rbinom( n, 1, 0.5 ) # kinda like sex (binary, 1:1 proportion)
)
# also a version with missingness
covar_miss <- covar
covar_miss[ sample( 2 * n, 2 * n * miss ) ] <- NA

# for ld_prune tests, draw some more global variables
# draw random "p-values"
pvals_prune_in <- runif( m )
# leave constant loci but make sure they are always most insignificant, as they should be in reality
# because fixed loci were already excluded, only all-hetz case is left
# use same pvalues for both cases (missingness and non-missingness; they are technically subsets but because of fixed redraws it might not be exactly so, this guarantees that it'll all work)
pvals_prune_in[ rowMeans( X == 1L ) == 1 ] <- 1
pvals_prune_in[ rowMeans( X_miss == 1L, na.rm = TRUE ) == 1 ] <- 1
indexes_prune_in <- sample.int( m, 20L ) # works if m=1000
r2_max <- 0.3 # default

test_that( "ld_prune works", {
    # when selecting a single random index, it should always be kept!
    index <- sample.int( m, 1L )
    expect_equal( ld_prune( X, pvals_prune_in, index, r2_max = r2_max ), index )

    # bigger test on random data
    # NOTE: use <<- to remember variable globally (outside this scope)
    # (will compare to bedmatrix versions)
    expect_silent(
        indexes_prune_out <<- ld_prune( X, pvals_prune_in, indexes_prune_in, r2_max = r2_max )
    )
    # output should be subset of input
    expect_true( all( indexes_prune_out %in% indexes_prune_in ) )
    # confirm that remaining dataset has all squared correlations below cutoff
    # NOTE: default is correlations of columns, but we want rows! (hence transpose)
    r2s <- cor( t( X[ indexes_prune_out, , drop = FALSE ] ), use = 'pairwise.complete.obs' )^2
    # exclude diagonal from test, of course
    # all of these should be non-missing
    expect_true( all( r2s[ lower.tri( r2s ) ] <= r2_max ) )
    
    # repeat test with missingness in X
    expect_silent(
        indexes_prune_out_miss <<- ld_prune( X_miss, pvals_prune_in, indexes_prune_in, r2_max = r2_max )
    )
    # output should be subset of input
    expect_true( all( indexes_prune_out_miss %in% indexes_prune_in ) )
    # also suppress warnings here!
    suppressWarnings(
        r2s <- cor( t( X_miss[ indexes_prune_out_miss, , drop = FALSE ] ), use = 'pairwise.complete.obs' )^2
    )
    # exclude diagonal from test, of course
    # these may have missingness where there are no complete pairwise cases (which were treated as independent)
    expect_true( all( abs( r2s[ lower.tri( r2s ) ] ) <= r2_max, na.rm = TRUE ) )

    # test with bim
    # will only test here (no BEDMatrix version) so no need for global copies of random data
    # minimal bim table with just coordinates, all that is needed
    pos_window <- 10000000L # default 10 Mb
    n_chr <- 10L # 10 chrs, equal numbers of SNPs each case (100)
    bim <- tibble(
        chr = ceiling( ( 1L : m ) / m * n_chr ) ,
        pos = seq.int( 1L, by = pos_window/10L, length.out = m ) # LD window contains exactly 10 SNPs in each case
    )
    # test case with missingness only
    expect_silent(
        indexes <- ld_prune( X_miss, pvals_prune_in, indexes_prune_in, r2_max = r2_max, bim = bim, pos_window = pos_window )
    )
    # is it the case that these indexes are always supersets of the case without chr/pos pass?
    chrs <- bim$chr[ indexes ]
    for ( chr in 1L : n_chr ) {
        # test each chr separately (correlations across chromosomes exceeding r2_max would be left there if they happen)
        indexes_chr <- indexes[ chrs == chr ]
        # some chrs have no hits, just test ones with hits
        # also singletons are uninteresting, only test bigger sets
        if ( length( indexes_chr ) > 1L ) {
            suppressWarnings(
                r2s <- cor( t( X_miss[ indexes_chr, , drop = FALSE ] ), use = 'pairwise.complete.obs' )^2
            )
            # this is not true in general, because of the window threshold!
            # treat things farther than window as having zero correlation
            pos <- bim$pos[ indexes_chr ]
            for ( i in 2L : length( pos ) ) {
                for ( j in 1L : (i-1L) ) {
                    if ( abs( pos[i] - pos[j] ) > pos_window ) {
                        r2s[i,j] <- 0 # only lower triangle needs editing
                        # r2s[j,i] <- 0
                    }
                }
            }
            expect_true( all( abs( r2s[ lower.tri( r2s ) ] ) <= r2_max, na.rm = TRUE ) )
        }
    }

    # repeat test providing BIM but setting window size to zero, which is code for ignoring BIM even if it is provided
    # test case with missingness only
    expect_silent(
        indexes <- ld_prune( X_miss, pvals_prune_in, indexes_prune_in, r2_max = r2_max, bim = bim, pos_window = 0L )
    )
    expect_equal( indexes, indexes_prune_out_miss )
})


test_that("covar_fix_na works", {
    out <- covar_fix_na( covar_miss )
    expect_true( !anyNA( out ) )
    expect_equal( nrow( out ), nrow( covar_miss ) )
    expect_equal( ncol( out ), ncol( covar_miss ) )
    # find non-NA subsets, these should agree too
    indexes_complete <- which( !is.na( covar_miss ) )
    expect_equal( out[ indexes_complete ], covar_miss[ indexes_complete ] )
})

test_that("get_proj_denom_basic and get_proj_denom_multi work and agree with each other", {
    # run basic version first
    # nothing to compare it to yet, just save values
    # these are treated as true values/direct calculations later on
    obj <- get_proj_denom_basic(Z, trait)
    proj <- obj$proj
    beta_var <- obj$var
    # make sure dimensions make sense
    expect_equal( length( proj ), n )
    expect_equal( length( beta_var ), 1 )
    # non-negative (it's a variance scale)
    expect_true( beta_var >= 0 )

    # now run fancier version for covariates!
    obj <- get_proj_denom_multi(Z, Y) # , trait_only = TRUE
    expect_equal( proj, obj$proj )
    expect_equal( beta_var, obj$var )

    # and lastly, test complete outputs (not just for trait)
    K <- ncol(Y)
    obj <- get_proj_denom_multi(Z, Y, trait_only = FALSE)
    expect_equal( nrow( obj$proj ), n )
    expect_equal( ncol( obj$proj ), K )
    expect_equal( nrow( obj$cov_mat ), K )
    expect_equal( ncol( obj$cov_mat ), K )
})

test_that("cgsolve_mat works", {
    # test direct version on kinship and covariates
    expect_silent(
        Z2 <- cgsolve_mat( kinship_est, Y )
    )
    # vanilla "solve" version was precalculated, test here
    expect_equal( Z2, Z )

    # test transposed version
    expect_silent(
        Z2 <- cgsolve_mat( kinship_est, t(Y), transpose = TRUE )
    )
    # the result is aligned with input t(Y), so comparison to truth requires transposing earlier result too:
    expect_equal( Z2, t(Z) )
})

test_that("ligera stops when needed", {
    # test that there are errors when crucial data is missing
    # there are 3 mandatory arguments
    # everything missing
    expect_error( ligera() )
    # singletons (2 things missing)
    expect_error( ligera( X ) )
    expect_error( ligera( trait = trait ) )
    expect_error( ligera( kinship = kinship_est ) )
    # pairs (1 thing missing)
    expect_error( ligera( X, trait ) )
    expect_error( ligera( X, kinship = kinship_est ) )
    expect_error( ligera( trait = trait, kinship = kinship_est ) )

    # other validations, when all three are present but some are wrong (wrong dimensions, lack of symmetry, etc)
    # here genotype has wrong number of individuals, compared to other two
    expect_error( ligera( X[ , -1 ], trait, kinship_est ) )
    # here trait is wrong length
    expect_error( ligera( X, trait[ -1 ], kinship_est ) )
    # here kinship is wrong dim
    expect_error( ligera( X, trait, kinship_est[ -1, -1 ] ) )
    # here kinship is not symmetric
    expect_error( ligera( X, trait, matrix(1:n^2, nrow = n) ) )
})

test_that("ligera runs on random data without missingness, matches basic version", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent( tib1 <<- ligera( X, trait, kinship_est ) )
    expect_true( is_tibble( tib1 ) )
    expect_equal( names( tib1 ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib1 ), m )
    expect_true( !anyNA( tib1 ) )
    # range for some things
    expect_true( all( tib1$pval >= 0 ) )
    expect_true( all( tib1$pval <= 1 ) )
    expect_true( all( tib1$beta_std_dev > 0 ) )
    expect_true( all( tib1$p_q > 0 ) )
    #    expect_true( all( tib1$p_q < 1 ) ) # because of inbreeding weights, there is no upper limit technically

    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib_basic <- ligera_basic( X, trait, kinship_est, kinship_est_inv )
    expect_equal( tib1, tib_basic )
})

test_that("ligera_f V=0 runs on random data without missingness, matches basic version", {
    # V==0 is most like basic version in how residuals are calculated, makes sure it'd be most stable
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent( tib1_f <<- ligera_f( X, trait, kinship_est, V = 0 ) )
    expect_true( is_tibble( tib1_f ) )
    expect_equal( names( tib1_f ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib1_f ), m )
    expect_true( !anyNA( tib1_f ) )
    # range for some things
    expect_true( all( tib1_f$pval >= 0 ) )
    expect_true( all( tib1_f$pval <= 1 ) )
    expect_true( all( tib1_f$f_stat >= 0 ) )
    expect_true( all( tib1_f$df > 0 ) )

    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib_f_basic <- ligera_f_basic( X, trait, kinship_est_inv )
    expect_equal( tib1_f, tib_f_basic )
})

test_that("ligera_f V=1 runs on random data without missingness, matches V=0", {
    expect_silent( tib1_f1 <- ligera_f( X, trait, kinship_est, V = 1 ) )
    expect_equal( tib1_f1, tib1_f )
})

test_that("ligera_f V=2 runs on random data without missingness, matches V=0", {
    expect_silent( tib1_f2 <- ligera_f( X, trait, kinship_est, V = 2 ) )
    expect_equal( tib1_f2, tib1_f )
})

# this failed to agree, singularity appears to be an issue here, or maybe other details of how the pseudoinverse is used
## test_that("ligera with kinship_std_inv agrees with true kinship, on random data without missingness", {
##     # first calculate biased kinship matrix
##     library(popkinsuppl)
##     kinship_est_std <- kinship_std_limit( kinship_est )
##     # invert, but it is singular so use pseudoinverse
##     library(MASS)
##     kinship_est_std_inv <- ginv( kinship_est_std )
##     # now use that to calculate statistics
##     # note regular kinship is still passed to calculate inbreeding coefficients the same way as before
##     expect_silent( tib1_std <- ligera( X, trait, kinship_est, kinship_inv = kinship_est_std_inv ) )
##     # if theory holds with pseudoinverse, entire statistics table should be the same!
##     expect_equal( tib1_std, tib1 )
## })

# calculate biased kinship matrix (shared by two tests)
kinship_est_wg <- popkinsuppl::kinship_wg_limit( kinship_est )
kinship_est_miss_wg <- popkinsuppl::kinship_wg_limit( kinship_est_miss )

# failing unexpectedly, not as numerically stable as it should be?  Or variance estimate is not bias-invariant?
## test_that("ligera with WG bias agrees with unbiased kinship, on random data without missingness", {
##     # note inbreeding coefficients are true kinship to calculate variance the same way as before
##     expect_silent( tib1_wg <- ligera( X, trait, kinship_est_wg, inbr = inbr( kinship_est ) ) )
##     # if theory holds, entire statistics table should be the same!
##     expect_equal( tib1_wg, tib1 )
## })

test_that("ligera_f with WG bias agrees with unbiased kinship, on random data without missingness", {
    # this version pass WG only (as main matrix; the F version doesn't have inbreeding variance stuff)
    expect_silent( tib1_f_wg <- ligera_f( X, trait, kinship_est_wg ) )
    # if theory holds, entire statistics table should be the same!
    expect_equal( tib1_f_wg, tib1_f )
})


test_that("ligera runs on random data with missingness in X", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent( tib2 <<- ligera( X_miss, trait, kinship_est_miss ) )
    expect_true( is_tibble( tib2 ) )
    expect_equal( names( tib2 ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib2 ), m )
    expect_true( !anyNA( tib2 ) )
    # range for some things
    expect_true( all( tib2$pval >= 0 ) )
    expect_true( all( tib2$pval <= 1 ) )
    expect_true( all( tib2$beta_std_dev > 0 ) )
    expect_true( all( tib2$p_q > 0 ) )
})

test_that("ligera_f runs on random data with missingness in X", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent( tib2_f <<- ligera_f( X_miss, trait, kinship_est_miss ) )
    expect_true( is_tibble( tib2_f ) )
    expect_equal( names( tib2_f ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib2_f ), m )
    expect_true( !anyNA( tib2_f ) )
    # range for some things
    expect_true( all( tib2_f$pval >= 0 ) )
    expect_true( all( tib2_f$pval <= 1 ) )
    expect_true( all( tib2_f$f_stat >= 0 ) )
    expect_true( all( tib2_f$df > 0 ) )
})

# failing unexpectedly, not as numerically stable as it should be?  Or variance estimate is not bias-invariant?
## test_that("ligera with WG bias agrees with unbiased kinship, on random data with missingness in X", {
##     # note inbreeding coefficients are true kinship to calculate variance the same way as before
##     expect_silent( tib2_wg <- ligera( X_miss, trait, kinship_est_miss_wg, inbr = inbr( kinship_est_miss ) ) )
##     # if theory holds, entire statistics table should be the same!
##     expect_equal( tib2_wg, tib2 )
## })

test_that("ligera_f with WG bias agrees with unbiased kinship, on random data with missingness in X", {
    # this version pass WG only (as main matrix; the F version doesn't have inbreeding variance stuff)
    expect_silent( tib2_f_wg <- ligera_f( X_miss, trait, kinship_est_miss_wg ) )
    # if theory holds, entire statistics table should be the same!
    expect_equal( tib2_f_wg, tib2_f )
})

test_that("ligera runs on random data with missingness in trait", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib3 <<- ligera( X, trait_miss, kinship_est )
    )
    expect_true( is_tibble( tib3 ) )
    expect_equal( names( tib3 ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib3 ), m )
    expect_true( !anyNA( tib3 ) )
    # range for some things
    expect_true( all( tib3$pval >= 0 ) )
    expect_true( all( tib3$pval <= 1 ) )
    expect_true( all( tib3$beta_std_dev > 0 ) )
    expect_true( all( tib3$p_q > 0 ) )
})

test_that("ligera_f runs on random data with missingness in trait", {
    expect_silent( tib3_f <<- ligera_f( X, trait_miss, kinship_est ) )
    expect_true( is_tibble( tib3_f ) )
    expect_equal( names( tib3_f ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib3_f ), m )
    expect_true( !anyNA( tib3_f ) )
    # range for some things
    expect_true( all( tib3_f$pval >= 0 ) )
    expect_true( all( tib3_f$pval <= 1 ) )
    expect_true( all( tib3_f$f_stat >= 0 ) )
    expect_true( all( tib3_f$df > 0 ) )
})

# failing unexpectedly, not as numerically stable as it should be?  Or variance estimate is not bias-invariant?
## test_that("ligera with WG bias agrees with unbiased kinship, on random data with missingness in trait", {
##     # note inbreeding coefficients are true kinship to calculate variance the same way as before
##     expect_silent( tib3_wg <- ligera( X, trait_miss, kinship_est_wg, inbr = inbr( kinship_est ) ) )
##     # if theory holds, entire statistics table should be the same!
##     expect_equal( tib3_wg, tib3 )
## })

test_that("ligera_f with WG bias agrees with unbiased kinship, on random data with missingness in trait", {
    # this version pass WG only (as main matrix; the F version doesn't have inbreeding variance stuff)
    expect_silent( tib3_f_wg <- ligera_f( X, trait_miss, kinship_est_wg ) )
    # if theory holds, entire statistics table should be the same!
    expect_equal( tib3_f_wg, tib3_f )
})



test_that("ligera runs on random data with missingness in X and trait", {
    expect_silent( tib4 <<- ligera( X_miss, trait_miss, kinship_est_miss ) )
    expect_true( is_tibble( tib4 ) )
    expect_equal( names( tib4 ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib4 ), m )
    expect_true( !anyNA( tib4 ) )
    # range for some things
    expect_true( all( tib4$pval >= 0 ) )
    expect_true( all( tib4$pval <= 1 ) )
    expect_true( all( tib4$beta_std_dev > 0 ) )
    expect_true( all( tib4$p_q > 0 ) )
})

test_that("ligera_f runs on random data with missingness in X and trait", {
    expect_silent( tib4_f <<- ligera_f( X_miss, trait_miss, kinship_est_miss ) )
    expect_true( is_tibble( tib4_f ) )
    expect_equal( names( tib4_f ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib4_f ), m )
    expect_true( !anyNA( tib4_f ) )
    # range for some things
    expect_true( all( tib4_f$pval >= 0 ) )
    expect_true( all( tib4_f$pval <= 1 ) )
    expect_true( all( tib4_f$f_stat >= 0 ) )
    expect_true( all( tib4_f$df > 0 ) )
})

# failing unexpectedly, not as numerically stable as it should be?  Or variance estimate is not bias-invariant?
## test_that("ligera with WG bias agrees with unbiased kinship, on random data with missingness in X and trait", {
##     # note inbreeding coefficients are true kinship to calculate variance the same way as before
##     expect_silent( tib4_wg <- ligera( X_miss, trait_miss, kinship_est_miss_wg, inbr = inbr( kinship_est_miss ) ) )
##     # if theory holds, entire statistics table should be the same!
##     expect_equal( tib4_wg, tib4 )
## })

test_that("ligera_f with WG bias agrees with unbiased kinship, on random data with missingness in X and trait", {
    # this version pass WG only (as main matrix; the F version doesn't have inbreeding variance stuff)
    expect_silent( tib4_f_wg <- ligera_f( X_miss, trait_miss, kinship_est_miss_wg ) )
    # if theory holds, entire statistics table should be the same!
    expect_equal( tib4_f_wg, tib4_f )
})


test_that("ligera works with covariates", {
    expect_silent( tib <- ligera( X, trait, kinship_est, covar = covar ) )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$beta_std_dev > 0 ) )
    expect_true( all( tib$p_q > 0 ) )
    #    expect_true( all( tib$p_q < 1 ) ) # because of inbreeding weights, there is no upper limit technically

    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib_basic <- ligera_basic( X, trait, kinship_est, kinship_est_inv, covar = covar )
    expect_equal( tib, tib_basic )
})

test_that("ligera_f works with covariates", {
    expect_silent( tib <- ligera_f( X, trait, kinship_est, covar = covar ) )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$f_stat >= 0 ) )
    expect_true( all( tib$df > 0 ) )

    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib_basic <- ligera_f_basic( X, trait, kinship_est_inv, covar = covar )
    expect_equal( tib, tib_basic )
})

test_that("ligera works with covariates with missingness", {
    expect_silent( tib <- ligera( X, trait, kinship_est, covar = covar_miss ) )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$beta_std_dev > 0 ) )
    expect_true( all( tib$p_q > 0 ) )
    #    expect_true( all( tib$p_q < 1 ) ) # because of inbreeding weights, there is no upper limit technically

    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib_basic <- ligera_basic( X, trait, kinship_est, kinship_est_inv, covar = covar_miss )
    expect_equal( tib, tib_basic )
})

test_that("ligera_f works with covariates with missingness", {
    expect_silent( tib <- ligera_f( X, trait, kinship_est, covar = covar_miss ) )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$f_stat >= 0 ) )
    expect_true( all( tib$df > 0 ) )
    
    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib_basic <- ligera_f_basic( X, trait, kinship_est_inv, covar = covar_miss )
    expect_equal( tib, tib_basic )
})

test_that("ligera_multi runs without errors", {
    expect_silent(
        tib <- ligera_multi( X, trait, kinship_est )
    )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat', 'qval', 'sel') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$qval >= 0 ) )
    expect_true( all( tib$qval <= 1 ) )
    expect_true( all( tib$beta_std_dev > 0 ) )
    expect_true( all( tib$p_q > 0 ) )
    expect_true( all( tib$sel >= 0 ) )
})

test_that("ligera_f_multi runs without errors", {
    expect_silent(
        tib <- ligera_f_multi( X, trait, kinship_est )
    )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'f_stat', 'df', 'qval', 'sel') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$qval >= 0 ) )
    expect_true( all( tib$qval <= 1 ) )
    expect_true( all( tib$sel >= 0 ) )
})

test_that("ligera_multi `one_per_iter = TRUE` runs without errors", {
    expect_silent(
        tib <- ligera_multi( X, trait, kinship_est, one_per_iter = TRUE )
    )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat', 'qval', 'sel') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$qval >= 0 ) )
    expect_true( all( tib$qval <= 1 ) )
    expect_true( all( tib$beta_std_dev > 0 ) )
    expect_true( all( tib$p_q > 0 ) )
    expect_true( all( tib$sel >= 0 ) )
})

test_that("ligera_f_multi `one_per_iter = TRUE` runs without errors", {
    expect_silent(
        tib <- ligera_f_multi( X, trait, kinship_est, one_per_iter = TRUE )
    )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'f_stat', 'df', 'qval', 'sel') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$qval >= 0 ) )
    expect_true( all( tib$qval <= 1 ) )
    expect_true( all( tib$sel >= 0 ) )
})

test_that("ligera_multi runs with X missingness without errors", {
    expect_silent(
        tib <- ligera_multi( X_miss, trait, kinship_est_miss )
    )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat', 'qval', 'sel') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$qval >= 0 ) )
    expect_true( all( tib$qval <= 1 ) )
    expect_true( all( tib$beta_std_dev > 0 ) )
    expect_true( all( tib$p_q > 0 ) )
    expect_true( all( tib$sel >= 0 ) )
})

test_that("ligera_f_multi runs with X missingness without errors", {
    expect_silent(
        tib <- ligera_f_multi( X_miss, trait, kinship_est_miss )
    )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'f_stat', 'df', 'qval', 'sel') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$qval >= 0 ) )
    expect_true( all( tib$qval <= 1 ) )
    expect_true( all( tib$sel >= 0 ) )
})

test_that("ligera_multi runs with covariates without errors", {
    expect_silent(
        tib <- ligera_multi( X, trait, kinship_est, covar = covar )
    )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat', 'qval', 'sel') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$qval >= 0 ) )
    expect_true( all( tib$qval <= 1 ) )
    expect_true( all( tib$beta_std_dev > 0 ) )
    expect_true( all( tib$p_q > 0 ) )
    expect_true( all( tib$sel >= 0 ) )
})

test_that("ligera_f_multi runs with covariates without errors", {
    expect_silent(
        tib <- ligera_f_multi( X, trait, kinship_est, covar = covar )
    )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'f_stat', 'df', 'qval', 'sel') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$qval >= 0 ) )
    expect_true( all( tib$qval <= 1 ) )
    expect_true( all( tib$sel >= 0 ) )
})

test_that("ligera_multi runs with covariates with missingness without errors", {
    expect_silent(
        tib <- ligera_multi( X, trait, kinship_est, covar = covar_miss )
    )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat', 'qval', 'sel') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$qval >= 0 ) )
    expect_true( all( tib$qval <= 1 ) )
    expect_true( all( tib$beta_std_dev > 0 ) )
    expect_true( all( tib$p_q > 0 ) )
    expect_true( all( tib$sel >= 0 ) )
})

test_that("ligera_f_multi runs with covariates with missingness without errors", {
    expect_silent(
        tib <- ligera_f_multi( X, trait, kinship_est, covar = covar_miss )
    )
    expect_true( is_tibble( tib ) )
    expect_equal( names( tib ), c('pval', 'beta', 'f_stat', 'df', 'qval', 'sel') )
    expect_equal( nrow( tib ), m )
    expect_true( !anyNA( tib ) )
    # range for some things
    expect_true( all( tib$pval >= 0 ) )
    expect_true( all( tib$pval <= 1 ) )
    expect_true( all( tib$qval >= 0 ) )
    expect_true( all( tib$qval <= 1 ) )
    expect_true( all( tib$sel >= 0 ) )
})


#################################
### LIGERA2 (Full BOLT trick) ###
#################################

test_that("popkin_prod stops when needed", {
    # NOTE either mean_kinship or b must be supplied (test all combos)
    # everything missing
    expect_error( popkin_prod() )
    # singletons (2 things missing)
    expect_error( popkin_prod( X = X ) )
    expect_error( popkin_prod( P = Y ) )
    expect_error( popkin_prod( mean_kinship = mean_kinship ) )
    expect_error( popkin_prod( b = b ) )
    # pairs (1 thing missing)
    expect_error( popkin_prod( X = X, P = Y ) )
    expect_error( popkin_prod( X = X, mean_kinship = mean_kinship ) )
    expect_error( popkin_prod( P = Y, mean_kinship = mean_kinship ) )
    expect_error( popkin_prod( X = X, b = b ) )
    expect_error( popkin_prod( P = Y, b = b ) )

    # other validations
    # X and Y must be matrices
    expect_error( popkin_prod( X = 1:10, P = Y, mean_kinship = mean_kinship ) )
    expect_error( popkin_prod( X = X, P = 1:10, mean_kinship = mean_kinship ) )
    # X and Y dimensions disagree
    expect_error( popkin_prod( X = X, P = Y[ , -1 ], mean_kinship = mean_kinship ) )
    # mean kinship is not scalar
    expect_error( popkin_prod( X = X, P = Y, mean_kinship = 1:10 ) )
})

test_that("popkin_prod matches direct product", {
    # for exact comparisons, use X without missingness

    # version with b present
    obj <- popkin_prod( X = X, P = Y, b = b ) # , mean_kinship = mean_kinship
    # compare to direct calculation
    expect_equal( obj$KP, KY )
    
    # version with missingness, b present
    obj <- popkin_prod( X = X_miss, P = Y, b = b_miss ) # , mean_kinship = mean_kinship
    # compare to direct calculation
    expect_equal( obj$KP, KY_miss )

    # versions without inbreeding coefficients calculated at all
    # return value is always a list though
    obj <- popkin_prod( X = X, P = Y, b = b, want_inbr = FALSE )
    expect_equal( obj$KP, KY )
    obj <- popkin_prod( X = X_miss, P = Y, b = b_miss, want_inbr = FALSE )
    expect_equal( obj$KP, KY_miss )

    # version with b missing
    obj <- popkin_prod( X = X, P = Y, mean_kinship = mean_kinship )
    # compare to direct calculations
    expect_equal( obj$KP, KY )
    expect_equal( obj$b, b )
    expect_equal( obj$inbr, inbr_est )

})

test_that("conj_grad_scan stops when needed", {
    # everything missing
    expect_error( conj_grad_scan() )
    # singletons (2 things missing)
    expect_error( conj_grad_scan( X = X ) )
    expect_error( conj_grad_scan( Y = Y ) )
    expect_error( conj_grad_scan( mean_kinship = mean_kinship ) )
    # pairs (1 thing missing)
    expect_error( conj_grad_scan( X = X, Y = Y ) )
    expect_error( conj_grad_scan( X = X, mean_kinship = mean_kinship ) )
    expect_error( conj_grad_scan( Y = Y, mean_kinship = mean_kinship ) )

    # other validations
    # X and Y must be matrices
    expect_error( conj_grad_scan( X = 1:10, Y = Y, mean_kinship = mean_kinship ) )
    expect_error( conj_grad_scan( X = X, Y = 1:10, mean_kinship = mean_kinship ) )
    # X and Y dimensions disagree
    expect_error( conj_grad_scan( X = X, Y = Y[ , -1 ], mean_kinship = mean_kinship ) )
    # mean kinship is not scalar
    expect_error( conj_grad_scan( X = X, Y = Y, mean_kinship = 1:10 ) )
})

test_that("conj_grad_scan matches solve", {
    obj <- conj_grad_scan( X = X, Y = Y, mean_kinship = mean_kinship )
    # compare to precomputed Z from solve (considered ground truth)
    expect_equal( Z, obj$Z )
    # ditto direct computation
    expect_equal( inbr_est, obj$inbr )
    
    # here we just want to know that missingness in X doesn't result in missingness in outputs
    obj <- conj_grad_scan( X = X_miss, Y = Y, mean_kinship = mean_kinship )
    # compare to precomputed values
    expect_equal( Z_miss, obj$Z )
    expect_equal( inbr_est_miss, obj$inbr )

    # now test missingness in trait! (individuals removed)
    obj <- conj_grad_scan(
        X = X,
        Y = Y_ind_rm,
        mean_kinship = mean_kinship,
        indexes_ind = indexes_ind
    )
    # compare to precomputed values
    expect_equal( Z_ind_rm, obj$Z )
    expect_equal( inbr_est_ind_rm, obj$inbr )

    # repeat without inbreeding coefficients, asking for b (as done first time with ligera2_f)
    # only X missingness case
    obj <- conj_grad_scan( X = X_miss, Y = Y, mean_kinship = mean_kinship, want_inbr = FALSE, want_b = TRUE )
    # compare to precomputed Z from solve (considered ground truth)
    expect_equal( Z_miss, obj$Z )
    # ditto direct computation
    expect_equal( b_miss, obj$b )

    # and now other mode, providing b but not mean_kinship, and not asking for anything else
    # only X missingness case
    Z_out <- conj_grad_scan( X = X_miss, Y = Y, b = b_miss, want_inbr = FALSE )
    # compare to precomputed Z from solve (considered ground truth)
    expect_equal( Z_miss, Z_out )
})

test_that("ligera2 stops when needed", {
    # test that there are errors when crucial data is missing
    # there are 3 mandatory arguments
    # everything missing
    expect_error( ligera2() )
    # singletons (2 things missing)
    expect_error( ligera2( X = X ) )
    expect_error( ligera2( trait = trait ) )
    expect_error( ligera2( mean_kinship = mean_kinship ) )
    # pairs (1 thing missing)
    expect_error( ligera2( X = X, trait = trait ) )
    expect_error( ligera2( X = X, mean_kinship = mean_kinship ) )
    expect_error( ligera2( trait = trait, mean_kinship = mean_kinship ) )

    # other validations, when all three are present but some are wrong (wrong dimensions, etc)
    # X must be matrix
    expect_error( ligera2( X = 1:10, trait = trait, mean_kinship = mean_kinship ) )
    # here genotype has wrong number of individuals, compared to other two
    expect_error( ligera2( X = X[ , -1 ], trait = trait, mean_kinship = mean_kinship ) )
    # here trait is wrong length
    expect_error( ligera2( X = X, trait = trait[ -1 ], mean_kinship = mean_kinship ) )
    # mean_kinship is not scalar
    expect_error( ligera2( X = X, trait = trait, mean_kinship = 1:10 ) )
})

test_that("ligera2_f stops when needed", {
    # test that there are errors when crucial data is missing
    # there are 3 mandatory arguments
    # everything missing
    expect_error( ligera2_f() )
    # singletons (2 things missing)
    expect_error( ligera2_f( X = X ) )
    expect_error( ligera2_f( trait = trait ) )
    expect_error( ligera2_f( mean_kinship = mean_kinship ) )
    # pairs (1 thing missing)
    expect_error( ligera2_f( X = X, trait = trait ) )
    expect_error( ligera2_f( X = X, mean_kinship = mean_kinship ) )
    expect_error( ligera2_f( trait = trait, mean_kinship = mean_kinship ) )

    # other validations, when all three are present but some are wrong (wrong dimensions, etc)
    # X must be matrix
    expect_error( ligera2_f( X = 1:10, trait = trait, mean_kinship = mean_kinship ) )
    # here genotype has wrong number of individuals, compared to other two
    expect_error( ligera2_f( X = X[ , -1 ], trait = trait, mean_kinship = mean_kinship ) )
    # here trait is wrong length
    expect_error( ligera2_f( X = X, trait = trait[ -1 ], mean_kinship = mean_kinship ) )
    # mean_kinship is not scalar
    expect_error( ligera2_f( X = X, trait = trait, mean_kinship = 1:10 ) )
})

test_that("ligera2 runs on random data without missingness, matches basic version", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib5 <<- ligera2(
            X = X,
            trait = trait,
            mean_kinship = mean_kinship
        )
    )
    expect_true( is_tibble( tib5 ) )
    expect_equal( names( tib5 ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib5 ), m )
    expect_true( !anyNA( tib5 ) )
    # range for some things
    expect_true( all( tib5$pval >= 0 ) )
    expect_true( all( tib5$pval <= 1 ) )
    expect_true( all( tib5$beta_std_dev > 0 ) )
    expect_true( all( tib5$p_q > 0 ) )
    #    expect_true( all( tib1$p_q < 1 ) ) # because of inbreeding weights, there is no upper limit technically

    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib5_basic <- ligera_basic( X, trait, kinship_est, kinship_est_inv )
    expect_equal( tib5, tib5_basic )
})

test_that("ligera2_f V=0 runs on random data without missingness, matches basic version", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib5_f <<- ligera2_f(
            X = X,
            trait = trait,
            mean_kinship = mean_kinship
        )
    )
    expect_true( is_tibble( tib5_f ) )
    expect_equal( names( tib5_f ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib5_f ), m )
    expect_true( !anyNA( tib5_f ) )
    # range for some things
    expect_true( all( tib5_f$pval >= 0 ) )
    expect_true( all( tib5_f$pval <= 1 ) )
    expect_true( all( tib5_f$f_stat >= 0 ) )
    expect_true( all( tib5_f$df > 0 ) )
    
    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib5_f_basic <- ligera_f_basic( X, trait, kinship_est_inv )
    expect_equal( tib5_f, tib5_f_basic )
})

test_that("ligera2_f V=1 runs on random data without missingness, matches V=0", {
    expect_silent( tib5_f1 <- ligera2_f( X, trait, mean_kinship, V = 1 ) )
    expect_equal( tib5_f1, tib5_f )
})

test_that("ligera2_f V=2 runs on random data without missingness, matches V=0", {
    expect_silent( tib5_f2 <- ligera2_f( X, trait, mean_kinship, V = 2 ) )
    expect_equal( tib5_f2, tib5_f )
})

test_that("ligera2_f works on weird case, matches ligera_f", {
    # this is an odd example I just came up with for multiscan, which reliably makes ligera2_f fail!
    # OK, problem is constant loci (includes fixed but also all-hetz case), which are not being removed in example!  New code catches that and handles correctly!
    n_ind <- 5
    m_loci <- 100
    X <- matrix(
        rbinom( m_loci * n_ind, 2, 0.5 ),
        nrow = m_loci
    )
    # introduce fixed locus on purpose to cause problem reliably
    X[1, ] <- 1
    # random trait
    trait <- rnorm( n_ind )
    # a required parameter
    mean_kinship <- mean( diag( n_ind ) / 2 ) # unstructured case
    
    tib2 <- ligera2_f( X, trait, mean_kinship )
    
    # additional work to get this other version to work
    x_bar <- rowMeans( X )
    b <- (1 - mean( x_bar * ( 2 - x_bar ) ) - mean_kinship ) / ( 1 - mean_kinship )
    kinship_est <- ( crossprod( X - 1 ) / m_loci - b ) / ( 1 - b ) # here we do normalize properly for a plot
    # for ligera_f_basic, weirdly cases that should be NaN are sometimes random numbers (including negative f_stats, or Inf, etc) with different versions, not worth debugging at this point to ensure compatibility in this edge case
    #tib1 <- ligera_f_basic( X, trait, solve( kinship_est ) ) # direct sol is more numerically sensitive???
    tib1 <- ligera_f( X, trait, kinship_est ) # uses cgsolve, which has the same solution to issue
    expect_equal( tib2, tib1 )
})

test_that("ligera2 runs on random data with missingness in X", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib6 <<- ligera2(
            X = X_miss,
            trait = trait,
            mean_kinship = mean_kinship
        )
    )
    expect_true( is_tibble( tib6 ) )
    expect_equal( names( tib6 ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib6 ), m )
    expect_true( !anyNA( tib6 ) )
    # range for some things
    expect_true( all( tib6$pval >= 0 ) )
    expect_true( all( tib6$pval <= 1 ) )
    expect_true( all( tib6$beta_std_dev > 0 ) )
    expect_true( all( tib6$p_q > 0 ) )

    # compare to earlier ligera with exact same kinship matrix for comparison
    tib6_basic <- ligera(
        X = X_miss,
        trait = trait,
        kinship = kinship_est_miss,
        inbr = inbr_est_miss
    )
    expect_equal( tib6, tib6_basic )

})

test_that("ligera2_f runs on random data with missingness in X", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib6_f <<- ligera2_f(
            X = X_miss,
            trait = trait,
            mean_kinship = mean_kinship
        )
    )
    expect_true( is_tibble( tib6_f ) )
    expect_equal( names( tib6_f ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib6_f ), m )
    expect_true( !anyNA( tib6_f ) )
    # range for some things
    expect_true( all( tib6_f$pval >= 0 ) )
    expect_true( all( tib6_f$pval <= 1 ) )
    expect_true( all( tib6_f$f_stat >= 0 ) )
    expect_true( all( tib6_f$df > 0 ) )

    # compare to earlier ligera with exact same kinship matrix for comparison
    tib6_f_basic <- ligera_f(
        X = X_miss,
        trait = trait,
        kinship = kinship_est_miss
    )
    expect_equal( tib6_f, tib6_f_basic )

})

test_that("ligera2 runs on random data with missingness in trait", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent( tib7 <<- ligera2( X = X, trait = trait_miss, mean_kinship = mean_kinship ) )
    expect_true( is_tibble( tib7 ) )
    expect_equal( names( tib7 ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib7 ), m )
    expect_true( !anyNA( tib7 ) )
    # range for some things
    expect_true( all( tib7$pval >= 0 ) )
    expect_true( all( tib7$pval <= 1 ) )
    expect_true( all( tib7$beta_std_dev > 0 ) )
    expect_true( all( tib7$p_q > 0 ) )
})

test_that("ligera2_f runs on random data with missingness in trait", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent( tib7_f <<- ligera2_f( X = X, trait = trait_miss, mean_kinship = mean_kinship ) )
    expect_true( is_tibble( tib7_f ) )
    expect_equal( names( tib7_f ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib7_f ), m )
    expect_true( !anyNA( tib7_f ) )
    # range for some things
    expect_true( all( tib7_f$pval >= 0 ) )
    expect_true( all( tib7_f$pval <= 1 ) )
    expect_true( all( tib7_f$f_stat >= 0 ) )
    expect_true( all( tib7_f$df > 0 ) )
})

test_that("ligera2 runs on random data with missingness in X and trait", {
    expect_silent( tib8 <- ligera2( X = X_miss, trait = trait_miss, mean_kinship = mean_kinship ) )
    expect_true( is_tibble( tib8 ) )
    expect_equal( names( tib8 ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib8 ), m )
    expect_true( !anyNA( tib8 ) )
    # range for some things
    expect_true( all( tib8$pval >= 0 ) )
    expect_true( all( tib8$pval <= 1 ) )
    expect_true( all( tib8$beta_std_dev > 0 ) )
    expect_true( all( tib8$p_q > 0 ) )
})

test_that("ligera2_f runs on random data with missingness in X and trait", {
    expect_silent( tib8_f <- ligera2_f( X = X_miss, trait = trait_miss, mean_kinship = mean_kinship ) )
    expect_true( is_tibble( tib8_f ) )
    expect_equal( names( tib8_f ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib8_f ), m )
    expect_true( !anyNA( tib8_f ) )
    # range for some things
    expect_true( all( tib8_f$pval >= 0 ) )
    expect_true( all( tib8_f$pval <= 1 ) )
    expect_true( all( tib8_f$f_stat >= 0 ) )
    expect_true( all( tib8_f$df > 0 ) )
})

test_that("ligera2 works with covariates", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib_covar <<- ligera2(
            X = X,
            trait = trait,
            mean_kinship = mean_kinship,
            covar = covar
        )
    )
    expect_true( is_tibble( tib_covar ) )
    expect_equal( names( tib_covar ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib_covar ), m )
    expect_true( !anyNA( tib_covar ) )
    # range for some things
    expect_true( all( tib_covar$pval >= 0 ) )
    expect_true( all( tib_covar$pval <= 1 ) )
    expect_true( all( tib_covar$beta_std_dev > 0 ) )
    expect_true( all( tib_covar$p_q > 0 ) )
    #    expect_true( all( tib1$p_q < 1 ) ) # because of inbreeding weights, there is no upper limit technically

    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib_covar_basic <- ligera_basic( X, trait, kinship_est, kinship_est_inv, covar = covar )
    expect_equal( tib_covar, tib_covar_basic )
})

test_that("ligera2_f works with covariates", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib_covar_f <<- ligera2_f(
            X = X,
            trait = trait,
            mean_kinship = mean_kinship,
            covar = covar
        )
    )
    expect_true( is_tibble( tib_covar_f ) )
    expect_equal( names( tib_covar_f ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib_covar_f ), m )
    expect_true( !anyNA( tib_covar_f ) )
    # range for some things
    expect_true( all( tib_covar_f$pval >= 0 ) )
    expect_true( all( tib_covar_f$pval <= 1 ) )
    expect_true( all( tib_covar_f$f_stat >= 0 ) )
    expect_true( all( tib_covar_f$df > 0 ) )
    
    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib_covar_f_basic <- ligera_f_basic( X, trait, kinship_est_inv, covar = covar )
    expect_equal( tib_covar_f, tib_covar_f_basic )
})

test_that("ligera2 works with covariates with missingness", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib_covar_miss <<- ligera2(
            X = X,
            trait = trait,
            mean_kinship = mean_kinship,
            covar = covar_miss
        )
    )
    expect_true( is_tibble( tib_covar_miss ) )
    expect_equal( names( tib_covar_miss ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat') )
    expect_equal( nrow( tib_covar_miss ), m )
    expect_true( !anyNA( tib_covar_miss ) )
    # range for some things
    expect_true( all( tib_covar_miss$pval >= 0 ) )
    expect_true( all( tib_covar_miss$pval <= 1 ) )
    expect_true( all( tib_covar_miss$beta_std_dev > 0 ) )
    expect_true( all( tib_covar_miss$p_q > 0 ) )

    # this is the basic version (requires non-missingness, so it can only be compared here)
    tib_covar_miss_basic <- ligera_basic( X, trait, kinship_est, kinship_est_inv, covar = covar_miss )
    expect_equal( tib_covar_miss, tib_covar_miss_basic )
})

test_that("ligera2_f works with covariates with missingness", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib_covar_miss_f <<- ligera2_f(
            X = X,
            trait = trait,
            mean_kinship = mean_kinship,
            covar = covar_miss
        )
    )
    expect_true( is_tibble( tib_covar_miss_f ) )
    expect_equal( names( tib_covar_miss_f ), c('pval', 'beta', 'f_stat', 'df') )
    expect_equal( nrow( tib_covar_miss_f ), m )
    expect_true( !anyNA( tib_covar_miss_f ) )
    # range for some things
    expect_true( all( tib_covar_miss_f$pval >= 0 ) )
    expect_true( all( tib_covar_miss_f$pval <= 1 ) )
    expect_true( all( tib_covar_miss_f$f_stat >= 0 ) )
    expect_true( all( tib_covar_miss_f$df > 0 ) )

    # this is the basic version (requires non-missingness in X and trait, so it can only be compared here)
    tib_covar_miss_f_basic <- ligera_f_basic( X, trait, kinship_est_inv, covar = covar_miss )
    expect_equal( tib_covar_miss_f, tib_covar_miss_f_basic )
})

test_that("ligera2_multi runs without errors, matches ligera_multi", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib_multi <<- ligera2_multi(
            X = X,
            trait = trait,
            mean_kinship = mean_kinship
        )
    )
    expect_true( is_tibble( tib_multi ) )
    expect_equal( names( tib_multi ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat', 'qval', 'sel') )
    expect_equal( nrow( tib_multi ), m )
    expect_true( !anyNA( tib_multi ) )
    # range for some things
    expect_true( all( tib_multi$pval >= 0 ) )
    expect_true( all( tib_multi$pval <= 1 ) )
    expect_true( all( tib_multi$qval >= 0 ) )
    expect_true( all( tib_multi$qval <= 1 ) )
    expect_true( all( tib_multi$beta_std_dev > 0 ) )
    expect_true( all( tib_multi$p_q > 0 ) )
    expect_true( all( tib_multi$sel >= 0 ) )

    # compare to earlier ligera with exact same kinship matrix for comparison
    tib_multi_basic <- ligera_multi( X, trait, kinship_est )
    expect_equal( tib_multi, tib_multi_basic )
})

test_that("ligera2_f_multi runs without errors, matches ligera_f_multi", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib_multi_f <<- ligera2_f_multi(
            X = X,
            trait = trait,
            mean_kinship = mean_kinship
        )
    )
    expect_true( is_tibble( tib_multi_f ) )
    expect_equal( names( tib_multi_f ), c('pval', 'beta', 'f_stat', 'df', 'qval', 'sel') )
    expect_equal( nrow( tib_multi_f ), m )
    expect_true( !anyNA( tib_multi_f ) )
    # range for some things
    expect_true( all( tib_multi_f$pval >= 0 ) )
    expect_true( all( tib_multi_f$pval <= 1 ) )
    expect_true( all( tib_multi_f$qval >= 0 ) )
    expect_true( all( tib_multi_f$qval <= 1 ) )
    expect_true( all( tib_multi_f$f_stat >= 0 ) )
    expect_true( all( tib_multi_f$df > 0 ) )
    expect_true( all( tib_multi_f$sel >= 0 ) )
    
    # compare to earlier ligera with exact same kinship matrix for comparison
    tib_multi_f_basic <- ligera_f_multi( X, trait, kinship_est )
    expect_equal( tib_multi_f, tib_multi_f_basic )
})

test_that("ligera2_multi `one_per_iter = TRUE` runs without errors, matches ligera_multi", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib_multi_opi <<- ligera2_multi(
            X = X,
            trait = trait,
            mean_kinship = mean_kinship,
            one_per_iter = TRUE
        )
    )
    expect_true( is_tibble( tib_multi ) )
    expect_equal( names( tib_multi ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat', 'qval', 'sel') )
    expect_equal( nrow( tib_multi ), m )
    expect_true( !anyNA( tib_multi ) )
    # range for some things
    expect_true( all( tib_multi$pval >= 0 ) )
    expect_true( all( tib_multi$pval <= 1 ) )
    expect_true( all( tib_multi$qval >= 0 ) )
    expect_true( all( tib_multi$qval <= 1 ) )
    expect_true( all( tib_multi$beta_std_dev > 0 ) )
    expect_true( all( tib_multi$p_q > 0 ) )
    expect_true( all( tib_multi$sel >= 0 ) )

    # compare to earlier ligera with exact same kinship matrix for comparison
    tib_multi_opi_basic <- ligera_multi( X, trait, kinship_est, one_per_iter = TRUE )
    expect_equal( tib_multi_opi, tib_multi_opi_basic )
})

test_that("ligera2_f_multi `one_per_iter = TRUE` runs without errors, matches ligera_f_multi", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib_multi_f_opi <<- ligera2_f_multi(
            X = X,
            trait = trait,
            mean_kinship = mean_kinship,
            one_per_iter = TRUE
        )
    )
    expect_true( is_tibble( tib_multi_f ) )
    expect_equal( names( tib_multi_f ), c('pval', 'beta', 'f_stat', 'df', 'qval', 'sel') )
    expect_equal( nrow( tib_multi_f ), m )
    expect_true( !anyNA( tib_multi_f ) )
    # range for some things
    expect_true( all( tib_multi_f$pval >= 0 ) )
    expect_true( all( tib_multi_f$pval <= 1 ) )
    expect_true( all( tib_multi_f$qval >= 0 ) )
    expect_true( all( tib_multi_f$qval <= 1 ) )
    expect_true( all( tib_multi_f$f_stat >= 0 ) )
    expect_true( all( tib_multi_f$df > 0 ) )
    expect_true( all( tib_multi_f$sel >= 0 ) )

    # compare to earlier ligera with exact same kinship matrix for comparison
    tib_multi_f_opi_basic <- ligera_f_multi( X, trait, kinship_est, one_per_iter = TRUE )
    expect_equal( tib_multi_f_opi, tib_multi_f_opi_basic )
})

test_that("ligera2_multi runs with X missingness without errors, matches ligera_multi", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib_multi_miss <<- ligera2_multi(
            X = X_miss,
            trait = trait,
            mean_kinship = mean_kinship
        )
    )
    expect_true( is_tibble( tib_multi_miss ) )
    expect_equal( names( tib_multi_miss ), c('pval', 'beta', 'beta_std_dev', 'p_q', 't_stat', 'qval', 'sel') )
    expect_equal( nrow( tib_multi_miss ), m )
    expect_true( !anyNA( tib_multi_miss ) )
    # range for some things
    expect_true( all( tib_multi_miss$pval >= 0 ) )
    expect_true( all( tib_multi_miss$pval <= 1 ) )
    expect_true( all( tib_multi_miss$qval >= 0 ) )
    expect_true( all( tib_multi_miss$qval <= 1 ) )
    expect_true( all( tib_multi_miss$beta_std_dev > 0 ) )
    expect_true( all( tib_multi_miss$p_q > 0 ) )
    expect_true( all( tib_multi_miss$sel >= 0 ) )

    # compare to earlier ligera with exact same kinship matrix for comparison
    tib_multi_miss_basic <- ligera_multi(
        X = X_miss,
        trait = trait,
        kinship = kinship_est_miss,
        inbr = inbr_est_miss
    )
    expect_equal( tib_multi_miss, tib_multi_miss_basic )
})

test_that("ligera2_f_multi runs with X missingness without errors, matches ligera_f_multi", {
    # NOTE: use <<- to remember variable globally (outside this scope)
    expect_silent(
        tib_multi_f_miss <<- ligera2_f_multi(
            X = X_miss,
            trait = trait,
            mean_kinship = mean_kinship
        )
    )
    expect_true( is_tibble( tib_multi_f_miss ) )
    expect_equal( names( tib_multi_f_miss ), c('pval', 'beta', 'f_stat', 'df', 'qval', 'sel') )
    expect_equal( nrow( tib_multi_f_miss ), m )
    expect_true( !anyNA( tib_multi_f_miss ) )
    # range for some things
    expect_true( all( tib_multi_f_miss$pval >= 0 ) )
    expect_true( all( tib_multi_f_miss$pval <= 1 ) )
    expect_true( all( tib_multi_f_miss$qval >= 0 ) )
    expect_true( all( tib_multi_f_miss$qval <= 1 ) )
    expect_true( all( tib_multi_f_miss$f_stat >= 0 ) )
    expect_true( all( tib_multi_f_miss$df > 0 ) )
    expect_true( all( tib_multi_f_miss$sel >= 0 ) )
    
    # compare to earlier ligera with exact same kinship matrix for comparison
    tib_multi_f_miss_basic <- ligera_f_multi(
        X = X_miss,
        trait = trait,
        kinship = kinship_est_miss
    )
    expect_equal( tib_multi_f_miss, tib_multi_f_miss_basic )
})


##############################################################
### LIGERA2_BED (Full BOLT trick, Rcpp optimized BED only) ###
##############################################################

test_that("popkin_prod_bed stops when needed", {
    # everything missing
    expect_error( popkin_prod_bed() )
    # singletons (2 things missing)
    expect_error( popkin_prod_bed( X = X ) )
    expect_error( popkin_prod_bed( P = Y ) )
    expect_error( popkin_prod_bed( b = b ) )
    # pairs (1 thing missing)
    expect_error( popkin_prod_bed( X = X, P = Y ) )
    expect_error( popkin_prod_bed( X = X, b = b ) )
    expect_error( popkin_prod_bed( P = Y, b = b ) )

    # other validations
    # X and Y must be matrices
    expect_error( popkin_prod_bed( X = 1:10, P = Y, b = b ) )
    expect_error( popkin_prod_bed( X = X, P = 1:10, b = b ) )
    # X and Y dimensions disagree
    expect_error( popkin_prod_bed( X = X, P = Y[ , -1 ], b = b ) )
    # b is not scalar
    expect_error( popkin_prod_bed( X = X, P = Y, b = 1:10 ) )
    # b is not numeric
    expect_error( popkin_prod_bed( X = X, P = Y, b = 'b' ) )
})

test_that("popkin_prod_bed matches direct product", {
    # for exact comparisons, use X without missingness

    # version with b present
    KP <- popkin_prod_bed( X = X, P = Y, b = b )
    # compare to direct calculation
    expect_equal( KP, KY )

    # version with missingness, b present
    KP <- popkin_prod_bed( X = X_miss, P = Y, b = b_miss )
    # compare to direct calculation
    expect_equal( KP, KY_miss )
})

# test BEDMatrix version, which also requires us to write the random data out (with genio)
if (
    suppressMessages( suppressWarnings( require(BEDMatrix) ) ) &&
    suppressMessages( suppressWarnings( require(genio) ) )
) {
    # write the random data to a temporary file
    # write the version with missingness separately
    name <- tempfile('delete-me-random-test') # output name without extensions!
    name_miss <- tempfile('delete-me-random-test-with-missingness') # output name without extensions!
    # write BED/BIM/FAM files
    write_plink( name, X, verbose = FALSE )
    write_plink( name_miss, X_miss, verbose = FALSE )
    
    # load back using BEDMatrix
    # load this way, passing dims, for speed and to avoid dimnames (complicates testing)
    X_BEDMatrix <- suppressMessages( suppressWarnings( BEDMatrix( name, n = n, p = m ) ) )
    X_miss_BEDMatrix <- suppressMessages( suppressWarnings( BEDMatrix( name_miss, n = n, p = m ) ) )

    test_that( "ld_prune works on BEDMatrix data, recovers R matrix outputs", {
        # just want equality, noting else matters
        expect_equal(
            ld_prune( X_BEDMatrix, pvals_prune_in, indexes_prune_in, r2_max = r2_max ),
            indexes_prune_out
        )
        expect_equal(
            ld_prune( X_miss_BEDMatrix, pvals_prune_in, indexes_prune_in, r2_max = r2_max ),
            indexes_prune_out_miss
        )
    })

    ### ligera ###
    
    test_that("ligera runs correctly on BEDMatrix data, recovers R matrix outputs", {
        # this one compares to tib1
        expect_silent( tib1_BEDMatrix <- ligera( X_BEDMatrix, trait, kinship_est ) )
        expect_equal( tib1, tib1_BEDMatrix )
        
        # and this one compares to tib2
        expect_silent( tib2_BEDMatrix <- ligera( X_miss_BEDMatrix, trait, kinship_est_miss ) )
        expect_equal( tib2, tib2_BEDMatrix )
    })

    ### ligera2 ###
    
    test_that("popkin_prod matches direct product", {
        # version with b present
        expect_silent(
            obj <- popkin_prod( X = X_BEDMatrix, P = Y, b = b )
        )
        expect_equal( KY, obj$KP )

        # version with missingness, b present
        expect_silent(
            obj <- popkin_prod( X = X_miss_BEDMatrix, P = Y, b = b_miss )
        )
        expect_equal( KY_miss, obj$KP )
    })
    
    test_that("conj_grad_scan runs correctly on BEDMatrix data, recovers R matrix outputs", {
        expect_silent(
            obj <- conj_grad_scan( X = X_BEDMatrix, Y = Y, mean_kinship = mean_kinship )
        )
        expect_equal( Z, obj$Z )
        expect_equal( inbr_est, obj$inbr )
        
        expect_silent(
            obj <- conj_grad_scan( X = X_miss_BEDMatrix, Y = Y, mean_kinship = mean_kinship )
        )
        expect_equal( Z_miss, obj$Z )
        expect_equal( inbr_est_miss, obj$inbr )

        # now test missingness in trait! (individuals removed)
        expect_silent(
            obj <- conj_grad_scan(
                X = X_BEDMatrix,
                Y = Y_ind_rm,
                mean_kinship = mean_kinship,
                indexes_ind = indexes_ind
            )
        )
        # compare to precomputed values
        expect_equal( Z_ind_rm, obj$Z )
        expect_equal( inbr_est_ind_rm, obj$inbr )
    })

    test_that("ligera2 runs correctly on BEDMatrix data, recovers R matrix outputs", {
        tib5_BEDMatrix <- ligera2( X = X_BEDMatrix, trait = trait, mean_kinship = mean_kinship )
        expect_equal( tib5, tib5_BEDMatrix )
        
        tib6_BEDMatrix <- ligera2( X = X_miss_BEDMatrix, trait = trait, mean_kinship = mean_kinship )
        expect_equal( tib6, tib6_BEDMatrix )
    })

    ### ligera2_bed ###

    file_bed <- paste0(name, '.bed')
    file_bed_miss <- paste0(name_miss, '.bed')
    
    test_that("get_b_inbr_bed_cpp works", {
        # version with no individuals removed
        # NULL = no individuals to remove
        expect_silent(
            obj <- get_b_inbr_bed_cpp( file_bed, m, n, mean_kinship, NULL )
        )
        expect_equal( class( obj ), 'list' )
        expect_equal( length( obj ), 2 )
        expect_equal( names( obj ), c('b', 'inbr') )
        expect_equal( obj$b, b )
        expect_equal( obj$inbr, inbr_est )

        # version with X missingness, no individuals removed
        # NULL = no individuals to remove
        expect_silent(
            obj <- get_b_inbr_bed_cpp( file_bed_miss, m, n, mean_kinship, NULL )
        )
        expect_equal( class( obj ), 'list' )
        expect_equal( length( obj ), 2 )
        expect_equal( names( obj ), c('b', 'inbr') )
        expect_equal( obj$b, b_miss )
        expect_equal( obj$inbr, inbr_est_miss )

        # version with individuals removed
        expect_silent(
            obj <- get_b_inbr_bed_cpp( file_bed, m, n, mean_kinship, indexes_ind )
        )
        expect_equal( class( obj ), 'list' )
        expect_equal( length( obj ), 2 )
        expect_equal( names( obj ), c('b', 'inbr') )
        expect_equal( obj$b, b_ind_rm )
        expect_equal( obj$inbr, inbr_est_ind_rm )
    })
    
    test_that("popkin_prod_bed matches direct product", {
        # version with b present
        expect_silent(
            KY_BM <- popkin_prod_bed( X = X_BEDMatrix, P = Y, b = b )
        )
        expect_equal( KY, KY_BM )

        # version with missingness, b present
        expect_silent(
            KY_miss_BM <- popkin_prod_bed( X = X_miss_BEDMatrix, P = Y, b = b_miss )
        )
        expect_equal( KY_miss, KY_miss_BM )

        # version with individuals removed
        expect_silent(
            KY_ind_rm_BM <- popkin_prod_bed(
                X = X_BEDMatrix,
                P = Y_ind_rm,
                b = b_ind_rm,
                indexes_ind = indexes_ind
            )
        )
        expect_equal( KY_ind_rm, KY_ind_rm_BM )
    })
    
    test_that("popkin_prod_bed_cpp matches direct product", {
        # version with b present
        # NULL = no individuals to remove
        expect_silent(
            KY_cpp <- popkin_prod_bed_cpp( file_bed, m, n, Y, b, NULL )
        )
        expect_equal( KY, KY_cpp )

        # version with missingness, b present
        # NULL = no individuals to remove
        expect_silent(
            KY_miss_cpp <- popkin_prod_bed_cpp( file_bed_miss, m, n, Y, b_miss, NULL )
        )
        expect_equal( KY_miss, KY_miss_cpp )

        # version with individuals removed
        expect_silent(
            KY_ind_rm_cpp <- popkin_prod_bed_cpp( file_bed, m, n, Y_ind_rm, b_ind_rm, indexes_ind )
        )
        expect_equal( KY_ind_rm, KY_ind_rm_cpp )
    })
    
    test_that("conj_grad_scan_bed_wcpp matches precomputed values", {
        # this one compares to Z
        expect_silent(
            Z_BM <- conj_grad_scan_bed_wcpp(
                file = file_bed,
                m_loci = m,
                n_ind = n,
                Y = Y,
                b = b
            )
        )
        expect_equal( Z, Z_BM )
        
        # and this one compares to Z_miss
        expect_silent(
            Z_miss_BM <- conj_grad_scan_bed_wcpp(
                file = file_bed_miss,
                m_loci = m,
                n_ind = n,
                Y = Y,
                b = b_miss
            )
        )
        expect_equal( Z_miss, Z_miss_BM )

        # this one has individuals removed
        expect_silent(
            Z_ind_rm_BM <- conj_grad_scan_bed_wcpp(
                file = file_bed,
                m_loci = m,
                n_ind = n,
                Y = Y_ind_rm,
                b = b_ind_rm,
                indexes_ind = indexes_ind
            )
        )
        expect_equal( Z_ind_rm, Z_ind_rm_BM )
    })

    test_that("ligera2_bed recovers R matrix outputs", {
        tib5_bed <- ligera2_bed(
            file = name,
            m_loci = m,
            n_ind = n,
            trait = trait,
            mean_kinship = mean_kinship
        )
        expect_equal( tib5, tib5_bed )
        
        tib6_bed <- ligera2_bed(
            file = name_miss,
            m_loci = m,
            n_ind = n,
            trait = trait,
            mean_kinship = mean_kinship
        )
        expect_equal( tib6, tib6_bed )

        tib7_bed <- ligera2_bed(
            file = name,
            m_loci = m,
            n_ind = n,
            trait = trait_miss,
            mean_kinship = mean_kinship
        )
        expect_equal( tib7, tib7_bed )
    })

    test_that("ligera2_bed_f recovers R matrix outputs", {
        tib5_bed_f <- ligera2_bed_f(
            file = name,
            m_loci = m,
            n_ind = n,
            trait = trait,
            mean_kinship = mean_kinship
        )
        expect_equal( tib5_f, tib5_bed_f )
        
        tib6_bed_f <- ligera2_bed_f(
            file = name_miss,
            m_loci = m,
            n_ind = n,
            trait = trait,
            mean_kinship = mean_kinship
        )
        expect_equal( tib6_f, tib6_bed_f )

        tib7_bed_f <- ligera2_bed_f(
            file = name,
            m_loci = m,
            n_ind = n,
            trait = trait_miss,
            mean_kinship = mean_kinship
        )
        expect_equal( tib7_f, tib7_bed_f )
    })
    
    test_that("ligera2_bed works with covariates", {
        expect_silent(
            tib_covar_bed <- ligera2_bed(
                file = name,
                m_loci = m,
                n_ind = n,
                trait = trait,
                mean_kinship = mean_kinship,
                covar = covar
            )
        )
        expect_equal( tib_covar, tib_covar_bed )
    })

    test_that("ligera2_bed_f works with covariates", {
        expect_silent(
            tib_covar_bed_f <- ligera2_bed_f(
                file = name,
                m_loci = m,
                n_ind = n,
                trait = trait,
                mean_kinship = mean_kinship,
                covar = covar
            )
        )
        expect_equal( tib_covar_f, tib_covar_bed_f )
    })

    test_that("ligera2_bed works with covariates with missingness", {
        expect_silent(
            tib_covar_miss_bed <- ligera2_bed(
                file = name,
                m_loci = m,
                n_ind = n,
                trait = trait,
                mean_kinship = mean_kinship,
                covar = covar_miss
            )
        )
        expect_equal( tib_covar_miss, tib_covar_miss_bed )
    })

    test_that("ligera2_bed_f works with covariates with missingness", {
        expect_silent(
            tib_covar_miss_bed_f <- ligera2_bed_f(
                file = name,
                m_loci = m,
                n_ind = n,
                trait = trait,
                mean_kinship = mean_kinship,
                covar = covar_miss
            )
        )
        expect_equal( tib_covar_miss_f, tib_covar_miss_bed_f )
    })
    
    test_that("ligera2_bed_f V=1 agrees with V=0 with covariates with missingness", {
        expect_silent(
            tib_covar_miss_bed_f1 <- ligera2_bed_f(
                file = name,
                m_loci = m,
                n_ind = n,
                trait = trait,
                mean_kinship = mean_kinship,
                covar = covar_miss,
                V = 1
            )
        )
        expect_equal( tib_covar_miss_bed_f1, tib_covar_miss_f )
    })
    
    test_that("ligera2_bed_f V=2 agrees with V=0 with covariates with missingness", {
        expect_silent(
            tib_covar_miss_bed_f2 <- ligera2_bed_f(
                file = name,
                m_loci = m,
                n_ind = n,
                trait = trait,
                mean_kinship = mean_kinship,
                covar = covar_miss,
                V = 2
            )
        )
        expect_equal( tib_covar_miss_bed_f2, tib_covar_miss_f )
    })
    
    test_that("ligera2_bed_multi runs without errors, matches ligera2_multi", {
        expect_silent(
            tib_multi_bed <- ligera2_bed_multi(
                file = name,
                trait = trait,
                mean_kinship = mean_kinship
            )
        )
        # remove bim portion before comparing
        tib_multi_bed[ c('chr', 'id', 'posg', 'pos', 'ref', 'alt') ] <- NULL
        # stupid subset ([]) used to make classes match, otherwise the latter has "spec_tbl_df" "tbl_df" "tbl" "data.frame" , the latter is missing the first one
        # https://www.tidyverse.org/blog/2018/12/readr-1-3-1/#tibble-subclass
        expect_equal( tib_multi, tib_multi_bed[] )
    })

    test_that("ligera2_bed_f_multi runs without errors, matches ligera2_f_multi", {
        expect_silent(
            tib_multi_bed_f <- ligera2_bed_f_multi(
                file = name,
                trait = trait,
                mean_kinship = mean_kinship
            )
        )
        # remove bim portion before comparing
        tib_multi_bed_f[ c('chr', 'id', 'posg', 'pos', 'ref', 'alt') ] <- NULL
        # stupid subset ([]) used to make classes match, otherwise the latter has "spec_tbl_df" "tbl_df" "tbl" "data.frame" , the latter is missing the first one
        # https://www.tidyverse.org/blog/2018/12/readr-1-3-1/#tibble-subclass
        expect_equal( tib_multi_f, tib_multi_bed_f[] )
    })

    test_that("ligera2_bed_multi `one_per_iter = TRUE` runs without errors, matches ligera2_multi", {
        expect_silent(
            tib_multi_opi_bed <- ligera2_bed_multi(
                file = name,
                trait = trait,
                mean_kinship = mean_kinship,
                one_per_iter = TRUE
            )
        )
        # remove bim portion before comparing
        tib_multi_opi_bed[ c('chr', 'id', 'posg', 'pos', 'ref', 'alt') ] <- NULL
        # stupid subset ([]) used to make classes match, otherwise the latter has "spec_tbl_df" "tbl_df" "tbl" "data.frame" , the latter is missing the first one
        # https://www.tidyverse.org/blog/2018/12/readr-1-3-1/#tibble-subclass
        expect_equal( tib_multi_opi, tib_multi_opi_bed[] )
    })

    test_that("ligera2_bed_f_multi `one_per_iter = TRUE` runs without errors, matches ligera2_f_multi", {
        expect_silent(
            tib_multi_opi_bed_f <- ligera2_bed_f_multi(
                file = name,
                trait = trait,
                mean_kinship = mean_kinship,
                one_per_iter = TRUE
            )
        )
        # remove bim portion before comparing
        tib_multi_opi_bed_f[ c('chr', 'id', 'posg', 'pos', 'ref', 'alt') ] <- NULL
        # stupid subset ([]) used to make classes match, otherwise the latter has "spec_tbl_df" "tbl_df" "tbl" "data.frame" , the latter is missing the first one
        # https://www.tidyverse.org/blog/2018/12/readr-1-3-1/#tibble-subclass
        expect_equal( tib_multi_f_opi, tib_multi_opi_bed_f[] )
    })

    test_that("ligera2_bed_f_multi runs with `prune_ld = TRUE`", {
        # don't check output in this case, just hope there are no errors/warnings/messages
        expect_silent(
            tib <- ligera2_bed_f_multi(
                file = name,
                trait = trait,
                mean_kinship = mean_kinship,
                prune_ld = TRUE
            )
        )
    })

    test_that("ligera2_bed_multi runs with `prune_ld = TRUE`", {
        # don't check output in this case, just hope there are no errors/warnings/messages
        expect_silent(
            tib <- ligera2_bed_multi(
                file = name,
                trait = trait,
                mean_kinship = mean_kinship,
                prune_ld = TRUE
            )
        )
    })

    # delete temporary files now
    delete_files_plink( name )
    delete_files_plink( name_miss )
}
OchoaLab/ligera documentation built on Jan. 5, 2023, 8:29 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
OchoaLab/ligera
LIght GEnetic Robust Association

tests/testthat/test-ligera.R
In OchoaLab/ligera: LIght GEnetic Robust Association

R Package Documentation

Browse R Packages

We want your feedback!

OchoaLab/ligera LIght GEnetic Robust Association

tests/testthat/test-ligera.R In OchoaLab/ligera: LIght GEnetic Robust Association

R Package Documentation

Browse R Packages

We want your feedback!

OchoaLab/ligera
LIght GEnetic Robust Association

tests/testthat/test-ligera.R
In OchoaLab/ligera: LIght GEnetic Robust Association