library(tibble)
library(Matrix)
test_that( "close_relatives_sparse_cpp works", {
# create a matrix with only unrelated people first, most trivial case but one that happens in every first generation of a pedigree
# my basic function requires the matrix to already be in the right format
n <- 10
cutoff <- 1 / 4^3 # match sim_pedigree default
kinship <- Diagonal( n, 0.5 )
kinship <- as( kinship, 'symmetricMatrix' )
# successful yet trivial run
expect_silent(
close_relatives <- close_relatives_sparse_cpp( kinship, cutoff )
)
expect_true( is.list( close_relatives ) )
expect_equal( length( close_relatives ), n )
expect_equal( length( unlist( close_relatives ) ), 0 )
# now a non-empty case, constructed manually
# start from previous example
kinship[ 1, 2 ] <- 0.25
kinship[ 9, 10 ] <- 0.25
# previous edits turn class to dgCMatrix (no longer symmetric), but this does exactly what we want:
kinship <- forceSymmetric( kinship )
expect_silent(
close_relatives <- close_relatives_sparse_cpp( kinship, cutoff )
)
expect_true( is.list( close_relatives ) )
expect_equal( length( close_relatives ), n )
expect_equal( close_relatives[ c(1:2, 9:10) ], list(2,1,10,9) )
expect_equal( length( unlist( close_relatives[ 3:8 ] ) ), 0 )
# continue to make example more complex
kinship[ 1, 3 ] <- 0.25
kinship[ 2, 3 ] <- 0.25
kinship <- forceSymmetric( kinship )
expect_silent(
close_relatives <- close_relatives_sparse_cpp( kinship, cutoff )
)
expect_true( is.list( close_relatives ) )
expect_equal( length( close_relatives ), n )
expect_equal( close_relatives[ c(1:3, 9:10) ], list( 2:3, c(1,3), 1:2, 10, 9 ) )
expect_equal( length( unlist( close_relatives[ 4:8 ] ) ), 0 )
# and introduce relatives that are too distant and don't change anything, and one that's right at the threshold
kinship[ 4, 5 ] <- 1 / 4^3
kinship[ 6, 7 ] <- 1 / 4^4
kinship[ 6, 8 ] <- 1 / 4^4
kinship <- forceSymmetric( kinship )
expect_silent(
close_relatives <- close_relatives_sparse_cpp( kinship, cutoff )
)
expect_true( is.list( close_relatives ) )
expect_equal( length( close_relatives ), n )
expect_equal( close_relatives[ c(1:5, 9:10) ], list( 2:3, c(1,3), 1:2, 5, 4, 10, 9 ) )
expect_equal( length( unlist( close_relatives[ 6:8 ] ) ), 0 )
})
test_that( "draw_couples_nearest works", {
# create kinship for unrelated founders first
n <- 10
kinship_local <- diag( n ) / 2
# test sparse version too
kinship_local_sparse <- Matrix( kinship_local, sparse = TRUE )
kinship_local_sparse <- as( kinship_local_sparse, 'symmetricMatrix' )
# even sex distribution to ensure prefect pairing
sex <- rep.int( c(1, 2), n / 2 )
# cause errors on purpose
# first two arguments are mandatory
expect_error( draw_couples_nearest( ) )
expect_error( draw_couples_nearest( kinship_local = kinship_local ) )
expect_error( draw_couples_nearest( sex = sex ) )
expect_error( draw_couples_nearest( 1:n, sex = sex ) ) # must be matrix
expect_error( draw_couples_nearest( cbind( 1:n ), sex = sex ) ) # must be square
expect_error( draw_couples_nearest( kinship_local, sex[-1] ) ) # dimensions must agree
# a successful run
expect_silent(
parents <- draw_couples_nearest( kinship_local, sex )
)
expect_true( is.matrix( parents ) )
expect_equal( nrow( parents ), 2 )
expect_equal( ncol( parents ), n / 2 ) # equality only because everybody is unrelated in this case
expect_true( !anyNA( parents ) )
# parents are a subset of all individuals available (as indexes)
expect_true( all( parents %in% 1 : n ) )
# and no parent appears twice
expect_equal( length( parents ), length( unique( parents ) ) )
# verify parent sexes
expect_true( all( sex[ parents[ 1, ] ] == 1 ) )
expect_true( all( sex[ parents[ 2, ] ] == 2 ) )
# no need to verify kinship here because it was all trivial
# test sparse matrix input
expect_silent(
parents <- draw_couples_nearest( kinship_local_sparse, sex, sparse = TRUE )
)
expect_true( is.matrix( parents ) )
expect_equal( nrow( parents ), 2 )
expect_equal( ncol( parents ), n / 2 ) # equality only because everybody is unrelated in this case
expect_true( !anyNA( parents ) )
# parents are a subset of all individuals available (as indexes)
expect_true( all( parents %in% 1 : n ) )
# and no parent appears twice
expect_equal( length( parents ), length( unique( parents ) ) )
# verify parent sexes
expect_true( all( sex[ parents[ 1, ] ] == 1 ) )
expect_true( all( sex[ parents[ 2, ] ] == 2 ) )
# no need to verify kinship here because it was all trivial
# create a smaller case where a unique solution exists
# start with two nuclear families (can't be paired within because of thresholds)
# the numbers are hacked so that across only one pairing works for each sibling (not realistic numbers but good for test)
# pairings must have local kinship strictly < 1 / 4^3 = 0.015625
n <- 4
sex <- c(1, 2, 2, 1)
kinship_local <- matrix(
c(
0.50, 0.25, 0.00, 0.06,
0.25, 0.50, 0.06, 0.00,
0.00, 0.06, 0.50, 0.25,
0.06, 0.00, 0.25, 0.50
),
nrow = n,
ncol = n
)
expect_silent(
parents <- draw_couples_nearest( kinship_local, sex )
)
expect_true( is.matrix( parents ) )
expect_equal( nrow( parents ), 2 )
expect_equal( ncol( parents ), n / 2 ) # exact solution also ensures equality here
expect_true( !anyNA( parents ) )
# parents are a subset of all individuals available (as indexes)
expect_true( all( parents %in% 1 : n ) )
# and no parent appears twice
expect_equal( length( parents ), length( unique( parents ) ) )
# unfortunately, though the pairings are unique, there are two column permutations in which they can appear in output
# NOTE: sex means fathers (first row) can only be individuals 1 and 4
parents_exp = cbind( c(1,3), c(4,2) )
expect_true( all( parents == parents_exp ) || all( parents == parents_exp[, 2:1 ] ) )
# verify parent sexes
expect_true( all( sex[ parents[ 1, ] ] == 1 ) )
expect_true( all( sex[ parents[ 2, ] ] == 2 ) )
# the restricted parent pairings confirms kinship filter was correct (no need for additional kinship validation)
# now a more challenging random example
cutoff <- 1 / 4^3 # have out here for tests (same as default)
# also make it odd, should work now!
n <- 11
# a random positive definite matrix with all values between 0 and 1
# divided by extra 1/n to make numbers tiny, and as it is there are frequently between 0-2 pairs, great test for that edge case
kinship_local <- crossprod( matrix( runif( n*n ), nrow = n, ncol = n ) ) / n^2
expect_true( min( kinship_local ) >= 0 )
expect_true( max( kinship_local ) <= 1 )
sex <- sample( c(1, 2), n, replace = TRUE ) # completely random sex too
# start tests
expect_silent(
parents <- draw_couples_nearest( kinship_local, sex, cutoff = cutoff )
)
expect_true( is.matrix( parents ) )
expect_equal( nrow( parents ), 2 )
expect_true( ncol( parents ) < n / 2 ) # must be fewer here because n is odd!
#message( 'pairs: ', ncol( parents ) )
expect_true( !anyNA( parents ) )
# parents are a subset of all individuals available (as indexes)
expect_true( all( parents %in% 1 : n ) )
# and no parent appears twice
expect_equal( length( parents ), length( unique( parents ) ) )
# verify parent sexes
expect_true( all( sex[ parents[ 1, ] ] == 1 ) )
expect_true( all( sex[ parents[ 2, ] ] == 2 ) )
# verify kinship (only non-trivial test here)
# (sometimes there's no parents at all)
n_fam <- ncol( parents )
if ( n_fam > 0 ) {
kinship_parents <- vector( 'numeric', n_fam )
for ( j in 1 : n_fam ) {
kinship_parents[ j ] <- kinship_local[ parents[ 1, j ], parents[ 2, j ] ]
}
expect_true( all( kinship_parents < cutoff ) )
}
})
test_that( "draw_num_children_per_fam works", {
pop_size <- 30
n_fam <- 10
children_min <- 1
expect_silent(
children_per_fam <- draw_num_children_per_fam( n_fam, pop_size, children_min )
)
expect_equal( length( children_per_fam ), n_fam )
expect_equal( sum( children_per_fam ), pop_size )
expect_true( min( children_per_fam ) >= children_min )
})
test_that( "match_fam_founders works", {
# smallest toy example
fam <- tibble(
id = c('a', 'b', 'c'),
pat = c(NA, NA, 'a'),
mat = c(NA, NA, 'b')
)
names_founders <- c('a', 'b')
# start with case that works
expect_silent(
data <- match_fam_founders( fam, names_founders, 'X', 'column' )
)
expect_true( is.list( data ) )
expect_equal( names( data ), c('fam', 'indexes') )
# check that new columns are as they should be
fam2 <- data$fam
expect_equal( fam2$founder, c(TRUE, TRUE, FALSE) )
expect_equal( fam2$pati, c(NA, NA, 1) )
expect_equal( fam2$mati, c(NA, NA, 2) )
expect_equal( data$indexes, 1:2 )
# ultimately we want this to be true
expect_equal( fam2$id[ fam2$founder ], names_founders[ data$indexes ] )
# works with founders in reversed order!
expect_silent(
data <- match_fam_founders( fam, rev(names_founders), 'X', 'column' )
)
expect_true( is.list( data ) )
expect_equal( names( data ), c('fam', 'indexes') )
# check that new columns are as they should be
fam2 <- data$fam
expect_equal( fam2$founder, c(TRUE, TRUE, FALSE) )
expect_equal( fam2$pati, c(NA, NA, 1) )
expect_equal( fam2$mati, c(NA, NA, 2) )
expect_equal( data$indexes, 2:1 )
# ultimately we want this to be true
expect_equal( fam2$id[ fam2$founder ], rev(names_founders)[ data$indexes ] )
# works with founders with extra IDs
names_founders <- c('a', 'd', 'b')
expect_silent(
data <- match_fam_founders( fam, names_founders, 'X', 'column' )
)
expect_true( is.list( data ) )
expect_equal( names( data ), c('fam', 'indexes') )
# check that new columns are as they should be
fam2 <- data$fam
expect_equal( fam2$founder, c(TRUE, TRUE, FALSE) )
expect_equal( fam2$pati, c(NA, NA, 1) )
expect_equal( fam2$mati, c(NA, NA, 2) )
expect_equal( data$indexes, c(1, 3) )
# ultimately we want this to be true
expect_equal( fam2$id[ fam2$founder ], names_founders[ data$indexes ] )
# cause errors on purpose
# missing one founder
expect_error( match_fam_founders( fam, c('a'), 'X', 'column' ) )
expect_error( match_fam_founders( fam, c('b'), 'X', 'column' ) )
# larger toy example
# founders intermingled with non-founders
fam <- tibble(
id = c('a', 'b', 'c', 'd', 'e', 'f'),
pat = c( NA, NA, 'a', NA, NA, 'e'),
mat = c( NA, NA, 'b', NA, NA, 'd')
)
names_founders <- c('d', 'e', 'a', 'b')
# expect to work
expect_silent(
data <- match_fam_founders( fam, names_founders, 'X', 'column' )
)
expect_true( is.list( data ) )
expect_equal( names( data ), c('fam', 'indexes') )
# check that new columns are as they should be
fam2 <- data$fam
expect_equal( fam2$founder, c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE) )
expect_equal( fam2$pati, c(NA, NA, 1, NA, NA, 5) )
expect_equal( fam2$mati, c(NA, NA, 2, NA, NA, 4) )
expect_equal( data$indexes, c(3, 4, 1, 2) )
# ultimately we want this to be true
expect_equal( fam2$id[ fam2$founder ], names_founders[ data$indexes ] )
# make sure there's no problems with empty `missing_vals` vector
expect_silent(
data2 <- match_fam_founders( fam, names_founders, 'X', 'column', missing_vals = c() )
)
expect_equal( data, data2 ) # redundant check
# cause errors on purpose
fam$id[1] <- '' # tests check for missingness and for mapping to NAs
expect_error( match_fam_founders( fam, names_founders, 'X', 'column' ) )
fam$id[1] <- 'b' # set to a repeat value (though it also fails because "a" is completely missing, though that check happens later)
expect_error( match_fam_founders( fam, names_founders, 'X', 'column' ) )
fam$id[1] <- 'a' # set back to normal
names_founders[1] <- '' # now here
expect_error( match_fam_founders( fam, names_founders, 'X', 'column' ) )
names_founders[1] <- 'e' # set to a repeat value
expect_error( match_fam_founders( fam, names_founders, 'X', 'column' ) )
names_founders[1] <- 'd' # set back
})
# tests for recurrent data
validate_kinship_proper <- function( kinship, ids ) {
expect_true( is.matrix( kinship ) || is( kinship, 'Matrix' ) )
# this test fails with sparse matrices for stupid reasons... TODO: revisit if we can force symmetry more smartly
if ( is.matrix( kinship ) )
expect_true( isSymmetric( kinship ) )
expect_equal( nrow( kinship ), length( ids ) )
expect_true( !anyNA( kinship ) )
expect_true( min( kinship ) >= 0 )
expect_true( max( kinship ) <= 1 )
expect_equal( rownames( kinship ), ids )
}
# kinship calculated from FAM table using external package `kinship2`, for validation
kin2 <- function( fam ) {
# create the pedigree object
pedigree <- kinship2::pedigree(
id = fam$id,
dadid = fam$pat,
momid = fam$mat,
sex = fam$sex,
affected = fam$pheno
)
# calculate full kinship matrix using this package, which assumes (as we do) that founders are unrelated
return( kinship2::kinship( pedigree ) )
}
test_that( "kinship_fam works", {
# here any kinship matrix will work
n <- 9
n2 <- floor( n / 2 )
# a different number for more fun
n_kids <- 11
# a random kinship matrix for founders
# a random positive definite matrix with all values between 0 and 1
# divided by extra 1/n to make numbers tiny, and as it is there are frequently between 0-2 pairs, great test for that edge case
kinship <- crossprod( matrix( runif( n*n ), nrow = n, ncol = n ) ) / n^2
# new code needs names, use letters to be extra non-trivial
rownames( kinship ) <- letters[ 1 : n ]
colnames( kinship ) <- letters[ 1 : n ]
# individuals just paired consecutively
# (parents matrix in old format)
parents <- matrix( letters[ 1 : ( 2 * n2 ) ], nrow = 2, ncol = n2 )
# draw family sizes as usual
children_per_fam <- draw_num_children_per_fam( n2, n_kids )
# minimal plink FAM table
# contains founders and their children! (complete pedigree)
fam <- tibble(
# kid names should be letters after that, non-overlapping with parents
id = c( letters[ 1 : n ], letters[ n + (1 : n_kids) ] ),
# founders have missing parents
# for children, expand parents * children_per_fam to make FAM pat/mat vectors
pat = c( rep.int( NA, n ), rep.int( parents[ 1, ], children_per_fam ) ),
mat = c( rep.int( NA, n ), rep.int( parents[ 2, ], children_per_fam ) )
)
# make kinship for all
expect_silent(
kinship_all <- kinship_fam( kinship, fam )
)
validate_kinship_proper( kinship_all, fam$id )
# and check that kinship of founders appears in output as it should (here trivial because orders agree)
expect_equal( kinship_all[ 1:n, 1:n ], kinship )
# a trivial case for unrelated founders results in agreement with kinship2
if ( suppressMessages(suppressWarnings(require(kinship2))) ) {
# recalculate our way first, with unrelated founders
kinship <- diag( n ) / 2
# new code needs names, use letters to be extra non-trivial
rownames( kinship ) <- letters[ 1 : n ]
colnames( kinship ) <- letters[ 1 : n ]
expect_silent(
kinship_all <- kinship_fam( kinship, fam )
)
# calculate the kinship matrix with their code
# first fix `fam` (kinship2 triggers checks that are irrelevant here, but meh)
fam$sex <- rep_len( c(1L, 2L), nrow( fam ) ) # this agrees with our parent setup
fam$pheno <- 0
kinship_kinship2 <- kin2( fam )
# these should be identical!
expect_equal( kinship_all, kinship_kinship2 )
# repeat with sparse input and sparse output
kinship_sparse <- Matrix( kinship, sparse = TRUE )
expect_silent(
kinship_all_sparse <- kinship_fam( kinship_sparse, fam, sparse = TRUE )
)
# these should be identical! After previous output is also made sparse
#expect_equal( kinship_all_sparse, Matrix( kinship_all, sparse = TRUE ) ) # this doesnt work because the first one isn't encoded as a symmetric matrix
expect_equal( as.matrix( kinship_all_sparse ), kinship_all ) # sadly, to compare, we need to unsparsen
}
# a smaller toy example with unrelated founders, but with interlaced founders and non-founders, and different kinship order, to check that case explicitly (and also not dependent on kinship2 being available for testing)
fam <- tibble(
id = c('a', 'b', 'c', 'd', 'e', 'f', 'g'),
pat = c( NA, NA, 'a', NA, NA, 'e', 'f'),
mat = c( NA, NA, 'b', NA, NA, 'd', 'c')
)
n <- 4
# since all are unrelated, order is unimportant for numeric values, though column names will change that
names_founders <- c('d', 'e', 'a', 'b')
kinship <- diag( n ) / 2
colnames( kinship ) <- names_founders
rownames( kinship ) <- names_founders
# expected output
# note order is that of `fam`
kinship_all_exp <- matrix(
c(
1/2, 0, 1/4, 0, 0, 0, 1/8,
0, 1/2, 1/4, 0, 0, 0, 1/8,
1/4, 1/4, 1/2, 0, 0, 0, 1/4,
0, 0, 0, 1/2, 0, 1/4, 1/8,
0, 0, 0, 0, 1/2, 1/4, 1/8,
0, 0, 0, 1/4, 1/4, 1/2, 1/4,
1/8, 1/8, 1/4, 1/8, 1/8, 1/4, 1/2
),
nrow = nrow(fam),
ncol = nrow(fam)
)
colnames( kinship_all_exp ) <- fam$id
rownames( kinship_all_exp ) <- fam$id
expect_silent(
kinship_all <- kinship_fam( kinship, fam )
)
expect_equal( kinship_all, kinship_all_exp )
})
test_that( "kinship_last_gen works", {
# test toy example here only, G==3
fam <- tibble(
id = c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'),
pat = c( NA, NA, 'a', NA, NA, 'e', 'f', 'f'),
mat = c( NA, NA, 'b', NA, NA, 'd', 'c', 'c')
)
n <- 4
# since all are unrelated, order is unimportant for numeric values, though column names will change that
names_1 <- c('d', 'e', 'a', 'b')
kinship <- diag( n ) / 2
colnames( kinship ) <- names_1
rownames( kinship ) <- names_1
# expected output
# note order is that of `fam` (subset of last generation
kinship_G_exp <- matrix(
c(
1/2, 1/4,
1/4, 1/2
),
nrow = 2,
ncol = 2
)
names_2 <- c('c', 'f')
names_G <- c('g', 'h')
colnames( kinship_G_exp ) <- names_G
rownames( kinship_G_exp ) <- names_G
ids <- list( names_1, names_2, names_G )
expect_silent(
kinship_G <- kinship_last_gen( kinship, fam, ids )
)
expect_equal( kinship_G, kinship_G_exp )
})
validate_fam <- function( fam, n, G ) {
# fix n if needed
if ( length( n ) == 1 )
n <- rep.int( n, G )
expect_true( is.data.frame( fam ) )
expect_equal( names( fam ), c('fam', 'id', 'pat', 'mat', 'sex', 'pheno') )
expect_equal( nrow( fam ), sum( n ) )
# check values broadly
expect_true( all( fam$fam == 'fam1' ) ) # dummy default
expect_true( all( fam$pheno == 0 ) ) # dummy default
expect_true( all( fam$sex %in% c(1L, 2L) ) )
# check that all IDs we expect are indeed in FAM file, in the expected order!
# need a loop to build vector
ids_exp <- NULL
for ( g in 1 : G ) {
ids_exp <- c( ids_exp, paste0( g, '-', 1 : n[g] ) )
}
expect_equal( fam$id, ids_exp )
# check that everybody's parents are the sex the should be
# NOTE: list of parents exclude founders (who have no parents)
expect_true( all( fam$sex[ fam$id %in% fam$pat[ -(1:n[1]) ] ] == 1L ) ) # all fathers are male
expect_true( all( fam$sex[ fam$id %in% fam$mat[ -(1:n[1]) ] ] == 2L ) ) # all mothers are female
}
validate_ids <- function( ids, n, G ) {
# construct expected `ids` from scratch
# this makes it easier
if ( length(n) == 1 )
n <- rep.int( n, G )
ids_exp <- vector( 'list', G )
for ( g in 1 : G ) {
ids_exp[[ g ]] <- paste0( g, '-', 1 : n[g] )
}
expect_equal( ids, ids_exp )
}
test_that( "sim_pedigree works", {
G <- 3
n <- 16
# cause errors on purpose
expect_error( sim_pedigree() )
expect_error( sim_pedigree( n = n ) ) # fails because n is scalar only
expect_error( sim_pedigree( G = G ) )
# a minimal, successful run
# NOTE: had to set sex of founders to be exactly half male/female because otherwise they are set randomly, and there is a good chance (for small `n`) that they are all the same sex, in which case there's no solution!
expect_silent(
data <- sim_pedigree( n, G, sex = rep_len( c(1L, 2L), n ) )
)
expect_true( is.list( data ) )
expect_equal( names( data ), c('fam', 'ids', 'kinship_local') )
# check fam
validate_fam( data$fam, n, G )
# check ids
validate_ids( data$ids, n, G )
# check kinship_local
names_kinship_exp <- paste0( G, '-', 1 : n )
validate_kinship_proper( data$kinship_local, names_kinship_exp )
# it'd be nice to compare kinship calculated by `kinship2` package!
if ( suppressMessages(suppressWarnings(require(kinship2))) ) {
# calculate the kinship matrix directly from FAM
kinship_kinship2 <- kin2( data$fam )
# subset because we only have last generation (though that suffices, since it couldn't be correct if the previous steps weren't also correct due to the recursivity of calculations)
indexes <- (G-1) * n + 1:n # these are the indexes we want
kinship_kinship2 <- kinship_kinship2[ indexes, indexes ]
# finally, compare!
expect_equal( data$kinship_local, kinship_kinship2 )
}
# do a version with variable `n` per generation and full output
# better to have population grow since there's a minimum number of children
G <- 3
n <- c(16, 19, 21)
# another type of error if G and n are mismatched in dimensions
expect_error( sim_pedigree( n[1:2], G ) )
# and now the proper run
# again set sex to ensure at least one pair in first generation
expect_silent(
data <- sim_pedigree( n, sex = rep_len( c(1L, 2L), n[1] ), full = TRUE )
)
expect_true( is.list( data ) )
expect_equal( names( data ), c('fam', 'ids', 'kinship_local') )
# check fam
validate_fam( data$fam, n, G )
# check ids
validate_ids( data$ids, n, G )
# check kinship_local
# this time it's a list because `full = TRUE`
expect_true( is.list( data$kinship_local ) )
expect_equal( length( data$kinship_local ), G )
for ( g in 1 : G ) {
names_kinship_exp <- paste0( g, '-', 1 : n[g] )
validate_kinship_proper( data$kinship_local[[g]], names_kinship_exp )
}
# compare to `kinship2` package again!
if ( suppressMessages(suppressWarnings(require(kinship2))) ) {
# calculate the kinship matrix directly from FAM
kinship_kinship2 <- kin2( data$fam )
# here we subset each generation and compare
for ( g in 1 : G ) {
# subset the current generation
# these are the indexes we want
indexes <- sum( n[ seq_len(g-1) ] ) + 1 : n[g]
kinship_kinship2_g <- kinship_kinship2[ indexes, indexes ]
# finally, compare!
expect_equal( data$kinship_local[[ g ]], kinship_kinship2_g )
}
}
})
test_that( "sim_pedigree sparse works", {
G <- 3
n <- 16
# cause errors on purpose
expect_error( sim_pedigree() )
expect_error( sim_pedigree( n = n ) ) # fails because n is scalar only
expect_error( sim_pedigree( G = G ) )
# a minimal, successful run
# NOTE: had to set sex of founders to be exactly half male/female because otherwise they are set randomly, and there is a good chance (for small `n`) that they are all the same sex, in which case there's no solution!
expect_silent(
data <- sim_pedigree( n, G, sex = rep_len( c(1L, 2L), n ), sparse = TRUE )
)
expect_true( is.list( data ) )
expect_equal( names( data ), c('fam', 'ids', 'kinship_local') )
# check fam
validate_fam( data$fam, n, G )
# check ids
validate_ids( data$ids, n, G )
# check kinship_local
names_kinship_exp <- paste0( G, '-', 1 : n )
validate_kinship_proper( data$kinship_local, names_kinship_exp )
# it'd be nice to compare kinship calculated by `kinship2` package!
if ( suppressMessages(suppressWarnings(require(kinship2))) ) {
# calculate the kinship matrix directly from FAM
kinship_kinship2 <- kin2( data$fam )
# subset because we only have last generation (though that suffices, since it couldn't be correct if the previous steps weren't also correct due to the recursivity of calculations)
indexes <- (G-1) * n + 1:n # these are the indexes we want
kinship_kinship2 <- kinship_kinship2[ indexes, indexes ]
# finally, compare!
#expect_equal( data$kinship_local, Matrix( kinship_kinship2, sparse = TRUE ) ) # doesn't work because first one isn't encoded as symmetric
expect_equal( as.matrix( data$kinship_local ), kinship_kinship2 ) # unsparsen for comparison
}
# do a version with variable `n` per generation and full output
# better to have population grow since there's a minimum number of children
G <- 3
n <- c(16, 19, 21)
# another type of error if G and n are mismatched in dimensions
expect_error( sim_pedigree( n[1:2], G ) )
# and now the proper run
# again set sex to ensure at least one pair in first generation
expect_silent(
data <- sim_pedigree( n, sex = rep_len( c(1L, 2L), n[1] ), full = TRUE, sparse = TRUE )
)
expect_true( is.list( data ) )
expect_equal( names( data ), c('fam', 'ids', 'kinship_local') )
# check fam
validate_fam( data$fam, n, G )
# check ids
validate_ids( data$ids, n, G )
# check kinship_local
# this time it's a list because `full = TRUE`
expect_true( is.list( data$kinship_local ) )
expect_equal( length( data$kinship_local ), G )
for ( g in 1 : G ) {
names_kinship_exp <- paste0( g, '-', 1 : n[g] )
validate_kinship_proper( data$kinship_local[[g]], names_kinship_exp )
}
# compare to `kinship2` package again!
if ( suppressMessages(suppressWarnings(require(kinship2))) ) {
# calculate the kinship matrix directly from FAM
kinship_kinship2 <- kin2( data$fam )
# here we subset each generation and compare
for ( g in 1 : G ) {
# subset the current generation
# these are the indexes we want
indexes <- sum( n[ seq_len(g-1) ] ) + 1 : n[g]
kinship_kinship2_g <- kinship_kinship2[ indexes, indexes ]
# finally, compare!
expect_equal( as.matrix( data$kinship_local[[ g ]] ), kinship_kinship2_g )
}
}
})
test_that( "draw_allele (cpp) works", {
# test homozygotes, which are deterministic
expect_equal( draw_allele(2), 1 )
expect_equal( draw_allele(0), 0 )
# heterozygote is a random coin toss
for ( i in 1:10 ) { # repeat enough times to know weird stuff doesn't happen
expect_true( draw_allele(1) %in% c(0L, 1L) )
}
# NA should return NA
expect_true( is.na( draw_allele( NA ) ) )
# values out of range must cause errors
expect_error( draw_allele( -1 ) )
expect_error( draw_allele( 3 ) )
})
test_that( "geno_fam works", {
# construct genotypes for parents
G <- 2
n <- c( 4, 5 )
m <- 10
# random genotype data
X <- matrix( rbinom( m * n[1], 2, 0.5 ), nrow = m, ncol = n[1] )
# and FAM table
# NOTE: had to set sex of founders to be exactly half male/female because otherwise they are set randomly, and there is a good chance (for small `n`) that they are all the same sex, in which case there's no solution!
expect_silent(
data <- sim_pedigree( n, sex = rep_len( c(1L, 2L), n[1] ) )
)
# extract table including both generations
fam <- data$fam
ids <- data$ids
# cause errors on purpose
# stuff missing
expect_error( geno_fam() )
expect_error( geno_fam( X = X ) )
expect_error( geno_fam( fam = fam ) )
# X is missing colnames!
expect_error( geno_fam( X, fam ) )
# add colnames to X now
colnames( X ) <- ids[[ 1 ]]
# successful run
expect_silent(
X_all <- geno_fam( X, fam )
)
expect_true( is.matrix( X_all ) )
expect_equal( ncol( X_all ), sum(n) )
expect_equal( nrow( X_all ), m )
expect_true( !anyNA( X_all ) )
expect_true( all( X_all %in% 0L:2L ) )
expect_equal( colnames( X_all ), fam$id )
expect_equal( X_all[ , 1 : n[1] ], X ) # submatrix of parents must be equal! (here there was no reordering)
# a more explicit toy case with deterministic solution
# also tests for edge case of a single child
X <- cbind( rep.int( 0L, m ), rep.int( 2L, m ) )
colnames( X ) <- letters[1:2]
X_all_exp <- cbind( X, rep.int( 1L, m ) )
colnames( X_all_exp ) <- letters[1:3]
# minimal FAM table for this
fam <- tibble(
id = c('a', 'b', 'c'),
pat = c(NA, NA, 'a'),
mat = c(NA, NA, 'b')
)
expect_silent(
X_all <- geno_fam( X, fam )
)
expect_equal( X_all, X_all_exp )
# same but reorder parents in input
X <- X[ , 2:1 ]
expect_silent(
X_all <- geno_fam( X, fam )
)
expect_equal( X_all, X_all_exp )
# same homozygous, and add NAs!
x_exp <- c(0L, 2L, NA)
X <- cbind( x_exp, x_exp )
colnames( X ) <- letters[1:2]
X_all_exp <- cbind( X, x_exp )
colnames( X_all_exp ) <- letters[1:3]
expect_silent(
X_all <- geno_fam( X, fam )
)
expect_equal( X_all, X_all_exp )
# cause a new error by including a parent missing in X
fam <- tibble(
id = c('a', 'b', 'c'),
pat = c(NA, NA, 'a'),
mat = c(NA, NA, 'd') # not "b"
)
expect_error( geno_fam( X, fam ) )
})
test_that( "geno_last_gen works", {
# construct genotypes for parents
G <- 3
n <- c( 14, 15, 17 )
m <- 10
# random genotype data
X <- matrix( rbinom( m * n[1], 2, 0.5 ), nrow = m, ncol = n[1] )
# and FAM table
# NOTE: had to set sex of founders to be exactly half male/female because otherwise they are set randomly, and there is a good chance (for small `n`) that they are all the same sex, in which case there's no solution!
expect_silent(
data <- sim_pedigree( n, sex = rep_len( c(1L, 2L), n[1] ) )
)
# extract table including both generations
fam <- data$fam
ids <- data$ids
# cause errors on purpose
# stuff missing
expect_error( geno_last_gen() )
# singletons
expect_error( geno_last_gen( X = X ) )
expect_error( geno_last_gen( fam = fam ) )
expect_error( geno_last_gen( ids = ids ) )
# pairs
expect_error( geno_last_gen( X = X, fam = fam ) )
expect_error( geno_last_gen( X = X, ids = ids ) )
expect_error( geno_last_gen( fam = fam, ids = ids ) )
# X is missing colnames!
expect_error( geno_last_gen( X, fam, ids ) )
# add colnames to X now
colnames( X ) <- ids[[ 1 ]]
# successful run
expect_silent(
X_G <- geno_last_gen( X, fam, ids )
)
expect_true( is.matrix( X_G ) )
expect_equal( ncol( X_G ), n[[G]] )
expect_equal( nrow( X_G ), m )
expect_true( !anyNA( X_G ) )
expect_true( all( X_G %in% 0L:2L ) )
expect_equal( colnames( X_G ), ids[[ G ]] )
})
test_that( "prune_fam works", {
# create a pedigree with a founder without descendants
fam <- tibble(
id = letters[1:4],
pat = c(NA, NA, NA, 'a'),
mat = c(NA, NA, NA, 'b')
)
# only want 'd' and its ancestors
ids <- 'd'
# cause errors on purpose
expect_error( prune_fam() )
expect_error( prune_fam( fam ) )
expect_error( prune_fam( ids = ids ) )
# successful run
expect_silent(
fam_out <- prune_fam( fam, ids )
)
expect_equal( fam_out, fam[ -3, ] ) # exclude 3rd row (id="c")
# more complicated
# here 'c' mated with both 'd' and 'e'
fam <- tibble(
id = c('a', 'b', 'c', 'd', 'e', 'f', 'g'),
pat = c( NA, NA, NA, NA, 'a', 'c', 'e'),
mat = c( NA, NA, NA, NA, 'b', 'd', 'c')
)
# listed both 'g' (leaf node) and one of its ancestors (to try to trip it up)
ids <- c('g', 'c')
expect_silent(
fam_out <- prune_fam( fam, ids )
)
expect_equal( fam_out, fam[ -c(4, 6), ] ) # exclude c('d', 'f')
})
test_that( "admix_fam works", {
# toy example, with interspersed founders and reordered founders in `admix`
fam <- tibble(
id = c('a', 'b', 'c', 'd', 'e', 'f', 'g'),
pat = c( NA, NA, 'a', NA, NA, 'e', 'f'),
mat = c( NA, NA, 'b', NA, NA, 'd', 'c')
)
k_subpops <- 3
names_subpops <- paste0( 'S', 1:3 )
admix <- matrix(
c(
1.0, 0.0, 0.0, # a
0.0, 1.0, 0.0, # b
0.0, 0.0, 1.0, # e
0.2, 0.3, 0.5 # d
),
byrow = TRUE,
ncol = k_subpops
)
rownames( admix ) <- c('a', 'b', 'e', 'd') # scrambled from FAM
# expected output
admix2_exp <- matrix(
c(
1.0, 0.0, 0.0, # a
0.0, 1.0, 0.0, # b
0.5, 0.5, 0.0, # c
0.2, 0.3, 0.5, # d
0.0, 0.0, 1.0, # e
0.1, 0.15, 0.75, # f
0.3, 0.325, 0.375 # g
),
byrow = TRUE,
ncol = k_subpops
)
rownames( admix2_exp ) <- fam$id
# a succcessful run
expect_silent(
admix2 <- admix_fam( admix, fam )
)
expect_equal( admix2, admix2_exp )
})
test_that( "admix_last_gen works", {
# toy example, with interspersed founders and reordered founders in `admix`
fam <- tibble(
id = c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'),
pat = c( NA, NA, 'a', NA, NA, 'e', 'f', 'f'),
mat = c( NA, NA, 'b', NA, NA, 'd', 'c', 'c')
)
ids <- list( c('a', 'b', 'd', 'e'), c('c', 'f'), c('g', 'h') )
k_subpops <- 3
names_subpops <- paste0( 'S', 1:3 )
admix <- matrix(
c(
1.0, 0.0, 0.0, # a
0.0, 1.0, 0.0, # b
0.0, 0.0, 1.0, # e
0.2, 0.3, 0.5 # d
),
byrow = TRUE,
ncol = k_subpops
)
rownames( admix ) <- c('a', 'b', 'e', 'd') # scrambled from FAM
# expected output
admix_G_exp <- c( 0.3, 0.325, 0.375 ) # g,h
admix_G_exp <- rbind( admix_G_exp, admix_G_exp ) # duplicate sibs
rownames( admix_G_exp ) <- ids[[ 3 ]]
# a succcessful run
expect_silent(
admix_G <- admix_last_gen( admix, fam, ids )
)
expect_equal( admix_G, admix_G_exp )
})
test_that( "fam_ancestors works", {
# only one argument is mandatory
expect_error( fam_ancestors() )
# zero or negative generations are invalid
expect_error( fam_ancestors( 0 ) )
expect_error( fam_ancestors( -10 ) )
# test trivial example
G <- 1L
expect_silent(
data <- fam_ancestors( G )
)
expect_true( is.list( data ) )
expect_equal( names( data ), c( 'fam', 'ids' ) )
fam <- data$fam
ids <- data$ids
# validate fam
expect_true( is.data.frame( fam ) )
expect_equal( names( fam ), c('id', 'pat', 'mat', 'sex') )
expect_true( all( fam$pat[ !is.na( fam$pat ) ] %in% fam$id ) )
expect_true( all( fam$mat[ !is.na( fam$mat ) ] %in% fam$id ) )
# confirm that all fathers are male, mothers female
expect_true( all( fam$sex[ fam$id %in% fam$pat ] == 1L ) )
expect_true( all( fam$sex[ fam$id %in% fam$mat ] == 2L ) )
# validate IDs
expect_true( is.list( ids ) )
expect_equal( length( ids ), G )
for ( ids_g in ids ) {
expect_true( all( ids_g %in% fam$id ) )
}
# test substantially larger example
G <- 8L
expect_silent(
data <- fam_ancestors( G )
)
expect_true( is.list( data ) )
expect_equal( names( data ), c( 'fam', 'ids' ) )
fam <- data$fam
ids <- data$ids
# validate fam
expect_true( is.data.frame( fam ) )
expect_equal( names( fam ), c('id', 'pat', 'mat', 'sex') )
expect_true( all( fam$pat[ !is.na( fam$pat ) ] %in% fam$id ) )
expect_true( all( fam$mat[ !is.na( fam$mat ) ] %in% fam$id ) )
# confirm that all fathers are male, mothers female
expect_true( all( fam$sex[ fam$id %in% fam$pat ] == 1L ) )
expect_true( all( fam$sex[ fam$id %in% fam$mat ] == 2L ) )
# validate IDs
expect_true( is.list( ids ) )
expect_equal( length( ids ), G )
for ( ids_g in ids ) {
expect_true( all( ids_g %in% fam$id ) )
}
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.