Nothing
context("genoprobs -> snpprobs")
test_that(".alleleprob_to_snpprob works in simple cases", {
# conversion correct, with 1 map position
pr <- array(c(0.15, 0.03, 0.20, 0.08, 0.18, 0.09, 0.15, 0.11),
dim=c(1, 8, 1))
sdp <- 1:255
interval <- rep(0, length(sdp))
on_map <- rep(TRUE, length(sdp))
# sdp -> genotype matrix
g <- matrix(rep(sdp,rep(8, length(sdp))), ncol=length(sdp))
g <- (g %/% 2^(0:7)) %% 2
expected <- array(dim=c(1, 2, length(sdp)))
for(i in 1:length(sdp)) {
expected[1,1,i] <- sum(pr[1,g[,i]==0,1])
expected[1,2,i] <- sum(pr[1,g[,i]==1,1])
}
expect_equal(.alleleprob_to_snpprob(pr, sdp, interval, on_map),
expected)
# add a second map position
pr <- array(c(0.15, 0.03, 0.20, 0.08, 0.18, 0.09, 0.15, 0.11,
0.07, 0.05, 0.17, 0.02, 0.46, 0.03, 0.11, 0.08),
dim=c(1, 8, 2))
prsum <- (pr[,,1] + pr[,,2])/2
interval[201:255] <- 1 # last 50 at 2nd marker
on_map[151:200] <- FALSE # 50 before that in-between
expected <- array(dim=c(1, 2, length(sdp)))
for(i in 1:150) {
expected[1,1,i] <- sum(pr[1,g[,i]==0,1])
expected[1,2,i] <- sum(pr[1,g[,i]==1,1])
}
for(i in 151:200) {
expected[1,1,i] <- sum((pr[1,g[,i]==0,1] + pr[1,g[,i]==0,2])/2)
expected[1,2,i] <- sum((pr[1,g[,i]==1,1] + pr[1,g[,i]==1,2])/2)
}
for(i in 201:255) {
expected[1,1,i] <- sum(pr[1,g[,i]==0,2])
expected[1,2,i] <- sum(pr[1,g[,i]==1,2])
}
expect_equal(.alleleprob_to_snpprob(pr, sdp, interval, on_map),
expected)
})
test_that("genocol_to_snpcol works with 8 alleles", {
# A has one allele and rest have another
expected <- rep(0, 36)
expected[1] <- 2
expected[c(2,4,7,11,16,22,29)] <- 1
expect_equal(genocol_to_snpcol(8,1), expected)
# opposite
expect_equal(genocol_to_snpcol(8,254), 2-expected)
# B has one allele and rest have another
expected <- rep(0, 36)
expected[3] <- 2
expected[c(2,5,8,12,17,23,30)] <- 1
expect_equal(genocol_to_snpcol(8,2), expected)
# opposite
expect_equal(genocol_to_snpcol(8,253), 2-expected)
# E has one allele and rest have another
expected <- rep(0, 36)
expected[15] <- 2
expected[c(11,12,13,14,20,26,33)] <- 1
expect_equal(genocol_to_snpcol(8,16), expected)
# opposite
expect_equal(genocol_to_snpcol(8,239), 2-expected)
# H has one allele and rest have another
expected <- rep(0, 36)
expected[36] <- 2
expected[29:35] <- 1
expect_equal(genocol_to_snpcol(8,128), expected)
# opposite
expect_equal(genocol_to_snpcol(8,127), 2-expected)
# ABCD have one allele and EFGH has
expected <- rep(1, 36)
expected[1:10] <- 2
expected[c(15,20,21,26:28,33:36)] <- 0
expect_equal(genocol_to_snpcol(8,15), expected)
# opposite
expect_equal(genocol_to_snpcol(8,255-15), 2-expected)
# ACEG have one allele and BDFH has
expected <- rep(1, 36)
expected[c(1,4,6,11,13,15,22,24,26,28)] <- 2
expected[c(3,8,10,17,19,21,30,32,34,36)] <- 0
expect_equal(genocol_to_snpcol(8,85), expected)
# opposite
expect_equal(genocol_to_snpcol(8,255-85), 2-expected)
# BE have one allele and ACDFGH have other
expected <- rep(0, 36)
expected[c(3,12,15)] <- 2
expected[c(2,5,8,17,23,30,11,13,14,20,26,33)] <- 1
expect_equal(genocol_to_snpcol(8,18), expected)
# opposite
expect_equal(genocol_to_snpcol(8,255-18), 2-expected)
# test all opposites
for(g in 1:254)
expect_equal(genocol_to_snpcol(8, g),
2 - genocol_to_snpcol(8, 255-g))
})
test_that("genocol_to_snpcol works with 4 alleles", {
# A,BCD (1)
expect_equal(genocol_to_snpcol(4, 1),
c(2,1,0,1,0,0,1,0,0,0))
expect_equal(genocol_to_snpcol(4, 15-1),
2-c(2,1,0,1,0,0,1,0,0,0))
# B,ACD (2)
expect_equal(genocol_to_snpcol(4, 2),
c(0,1,2,0,1,0,0,1,0,0))
expect_equal(genocol_to_snpcol(4, 15-2),
2-c(0,1,2,0,1,0,0,1,0,0))
# C,ABD (4)
expect_equal(genocol_to_snpcol(4, 4),
c(0,0,0,1,1,2,0,0,1,0))
expect_equal(genocol_to_snpcol(4, 15-4),
2-c(0,0,0,1,1,2,0,0,1,0))
# D,ABC (8)
expect_equal(genocol_to_snpcol(4, 8),
c(0,0,0,0,0,0,1,1,1,2))
expect_equal(genocol_to_snpcol(4, 15-8),
2-c(0,0,0,0,0,0,1,1,1,2))
# AB,CD (3)
expect_equal(genocol_to_snpcol(4, 3),
c(2,2,2,1,1,0,1,1,0,0))
expect_equal(genocol_to_snpcol(4, 15-3),
2-c(2,2,2,1,1,0,1,1,0,0))
# AC,BD (5)
expect_equal(genocol_to_snpcol(4, 5),
c(2,1,0,2,1,2,1,0,1,0))
expect_equal(genocol_to_snpcol(4, 15-5),
2-c(2,1,0,2,1,2,1,0,1,0))
# AD,BC (9)
expect_equal(genocol_to_snpcol(4, 9),
c(2,1,0,1,0,0,2,1,1,2))
expect_equal(genocol_to_snpcol(4, 15-9),
2-c(2,1,0,1,0,0,2,1,1,2))
})
test_that(".genoprob_to_snpprob works with 8 alleles", {
# make a fake probability matrix
set.seed(36319176)
npos <- 8
nind <- 5
x <- matrix(runif(npos*nind*36), ncol=36)
x <- x/rowSums(x)
pr <- array(dim=c(nind, 36, npos))
k <- 1
for(i in 1:nind) {
for(j in 1:npos) {
pr[i,,j] <- x[k,]
k <- k+1
}
}
# errors
expect_error(.genoprob_to_snpprob(pr[,-1,], 1, 0, TRUE)) # not enough columns
expect_error(.genoprob_to_snpprob(pr[,-(1:2),], 1, 0, TRUE)) # not enough columns
expect_error(.genoprob_to_snpprob(pr, c(1,0,200), c(0,1,2), c(TRUE,FALSE,FALSE))) # SDP out of range
expect_error(.genoprob_to_snpprob(pr, c(1,2,256), c(0,1,2), c(TRUE,FALSE,FALSE))) # SDP out of range
expect_error(.genoprob_to_snpprob(pr, c(1,255,200), c(0,1,-1), c(TRUE,FALSE,FALSE))) # snp out of map range
expect_error(.genoprob_to_snpprob(pr, c(1,2,254), c(0,1,npos-1), c(TRUE,FALSE,FALSE))) # snp out of map range
nsnp <- 200
sdp <- sample(255, nsnp, replace=TRUE)
interval <- sample(0:(npos-1), nsnp, replace=TRUE)
on_map <- sample(c(FALSE, TRUE), nsnp, replace=TRUE)
on_map[interval==npos-1] <- TRUE
# calculate snp genotype probabilities
result <- .genoprob_to_snpprob(pr, sdp, interval, on_map)
expect_equal(dim(result), c(nind,3,nsnp))
# calculate in R
expected <- array(dim=c(nind, 3, nsnp))
for(snp in 1:nsnp) {
snpcol <- genocol_to_snpcol(8, sdp[snp])
for(snpg in 1:3) {
if(on_map[snp])
expected[,snpg,snp] <- rowSums(pr[,snpcol==snpg-1,interval[snp]+1,drop=FALSE])
else
expected[,snpg,snp] <- rowSums((pr[,snpcol==snpg-1,interval[snp]+1,drop=FALSE] +
pr[,snpcol==snpg-1,interval[snp]+2,drop=FALSE])/2)
}
}
expect_equal(result, expected)
})
test_that("Xgenocol_to_snpcol works with 8 alleles", {
# flip expected when flipping SDP
opp <- function(x) c(2-x[1:36],7-x[37:44])
# A has one allele and rest have another
expected <- rep(0, 44)
expected[1] <- 2
expected[c(2,4,7,11,16,22,29)] <- 1
expected[37:44] <- c(4,3,3,3,3,3,3,3)
expect_equal(Xgenocol_to_snpcol(8,1), expected)
# opposite
expect_equal(Xgenocol_to_snpcol(8,254), opp(expected))
# B has one allele and rest have another
expected <- rep(0, 44)
expected[3] <- 2
expected[c(2,5,8,12,17,23,30)] <- 1
expected[37:44] <- c(3,4,3,3,3,3,3,3)
expect_equal(Xgenocol_to_snpcol(8,2), expected)
# opposite
expect_equal(Xgenocol_to_snpcol(8,253), opp(expected))
# E has one allele and rest have another
expected <- rep(0, 44)
expected[15] <- 2
expected[c(11,12,13,14,20,26,33)] <- 1
expected[37:44] <- c(3,3,3,3,4,3,3,3)
expect_equal(Xgenocol_to_snpcol(8,16), expected)
# opposite
expect_equal(Xgenocol_to_snpcol(8,239), opp(expected))
# H has one allele and rest have another
expected <- rep(0, 44)
expected[36] <- 2
expected[29:35] <- 1
expected[37:44] <- c(3,3,3,3,3,3,3,4)
expect_equal(Xgenocol_to_snpcol(8,128), expected)
# opposite
expect_equal(Xgenocol_to_snpcol(8,127), opp(expected))
# ABCD have one allele and EFGH has
expected <- rep(1, 44)
expected[1:10] <- 2
expected[c(15,20,21,26:28,33:36)] <- 0
expected[37:44] <- c(4,4,4,4,3,3,3,3)
expect_equal(Xgenocol_to_snpcol(8,15), expected)
# opposite
expect_equal(Xgenocol_to_snpcol(8,255-15), opp(expected))
# ACEG have one allele and BDFH has
expected <- rep(1, 44)
expected[c(1,4,6,11,13,15,22,24,26,28)] <- 2
expected[c(3,8,10,17,19,21,30,32,34,36)] <- 0
expected[37:44] <- c(4,3,4,3,4,3,4,3)
expect_equal(Xgenocol_to_snpcol(8,85), expected)
# opposite
expect_equal(Xgenocol_to_snpcol(8,255-85), opp(expected))
# BE have one allele and ACDFGH have other
expected <- rep(0, 36)
expected[c(3,12,15)] <- 2
expected[c(2,5,8,17,23,30,11,13,14,20,26,33)] <- 1
expected[37:44] <- c(3,4,3,3,4,3,3,3)
expect_equal(Xgenocol_to_snpcol(8,18), expected)
# opposite
expect_equal(Xgenocol_to_snpcol(8,255-18), opp(expected))
# test all opposites
for(g in 1:254)
expect_equal(Xgenocol_to_snpcol(8, g),
opp(Xgenocol_to_snpcol(8, 255-g)))
})
test_that("Xgenocol_to_snpcol works with 4 alleles", {
# A,BCD (1)
expect_equal(Xgenocol_to_snpcol(4, 1),
c(2,1,0,1,0,0,1,0,0,0,4,3,3,3))
expect_equal(Xgenocol_to_snpcol(4, 15-1),
c(0,1,2,1,2,2,1,2,2,2,3,4,4,4))
# B,ACD (2)
expect_equal(Xgenocol_to_snpcol(4, 2),
c(0,1,2,0,1,0,0,1,0,0,3,4,3,3))
expect_equal(Xgenocol_to_snpcol(4, 15-2),
c(2,1,0,2,1,2,2,1,2,2,4,3,4,4))
# C,ABD (4)
expect_equal(Xgenocol_to_snpcol(4, 4),
c(0,0,0,1,1,2,0,0,1,0,3,3,4,3))
expect_equal(Xgenocol_to_snpcol(4, 15-4),
c(2,2,2,1,1,0,2,2,1,2,4,4,3,4))
# D,ABC (8)
expect_equal(Xgenocol_to_snpcol(4, 8),
c(0,0,0,0,0,0,1,1,1,2,3,3,3,4))
expect_equal(Xgenocol_to_snpcol(4, 15-8),
c(2,2,2,2,2,2,1,1,1,0,4,4,4,3))
# AB,CD (3)
expect_equal(Xgenocol_to_snpcol(4, 3),
c(2,2,2,1,1,0,1,1,0,0,4,4,3,3))
expect_equal(Xgenocol_to_snpcol(4, 15-3),
c(0,0,0,1,1,2,1,1,2,2,3,3,4,4))
# AC,BD (5)
expect_equal(Xgenocol_to_snpcol(4, 5),
c(2,1,0,2,1,2,1,0,1,0,4,3,4,3))
expect_equal(Xgenocol_to_snpcol(4, 15-5),
c(0,1,2,0,1,0,1,2,1,2,3,4,3,4))
# AD,BC (9)
expect_equal(Xgenocol_to_snpcol(4, 9),
c(2,1,0,1,0,0,2,1,1,2,4,3,3,4))
expect_equal(Xgenocol_to_snpcol(4, 15-9),
c(0,1,2,1,2,2,0,1,1,0,3,4,4,3))
})
test_that(".Xgenoprob_to_snpprob works with 8 alleles", {
# make a fake probability matrix
set.seed(20939472)
npos <- 8
nind <- 5
x <- matrix(runif(npos*nind*36), ncol=36)
x <- x/rowSums(x)
x2 <- matrix(runif(npos*nind*8), ncol=8)
x2 <- x2/rowSums(x2)
x <- rbind(cbind(x, matrix(0, ncol=8, nrow=nrow(x))),
cbind(matrix(0, ncol=36, nrow=nrow(x2)), x2))
nind <- nind*2
pr <- array(dim=c(nind, 44, npos))
k <- 1
for(i in 1:nind) {
for(j in 1:npos) {
pr[i,,j] <- x[k,]
k <- k+1
}
}
# errors
expect_error(.Xgenoprob_to_snpprob(pr[,-1,], 1, 0, TRUE)) # not enough columns
expect_error(.Xgenoprob_to_snpprob(pr[,-(1:2),], 1, 0, TRUE)) # not enough columns
expect_error(.Xgenoprob_to_snpprob(pr, c(1,0,200), c(0,1,2), c(TRUE,FALSE,FALSE))) # SDP out of range
expect_error(.Xgenoprob_to_snpprob(pr, c(1,2,256), c(0,1,2), c(TRUE,FALSE,FALSE))) # SDP out of range
expect_error(.Xgenoprob_to_snpprob(pr, c(1,255,200), c(0,1,-1), c(TRUE,FALSE,FALSE))) # snp out of map range
expect_error(.Xgenoprob_to_snpprob(pr, c(1,2,254), c(0,1,npos-1), c(TRUE,FALSE,FALSE))) # snp out of map range
nsnp <- 200
sdp <- sample(255, nsnp, replace=TRUE)
interval <- sample(0:(npos-1), nsnp, replace=TRUE)
on_map <- sample(c(FALSE, TRUE), nsnp, replace=TRUE)
on_map[interval==npos-1] <- TRUE
# calculate snp genotype probabilities
result <- .Xgenoprob_to_snpprob(pr, sdp, interval, on_map)
expect_equal(dim(result), c(nind,5,nsnp))
# calculate in R
expected <- array(dim=c(nind, 5, nsnp))
for(snp in 1:nsnp) {
snpcol <- Xgenocol_to_snpcol(8, sdp[snp])
for(snpg in 1:5) {
if(on_map[snp])
expected[,snpg,snp] <- rowSums(pr[,snpcol==snpg-1,interval[snp]+1,drop=FALSE])
else
expected[,snpg,snp] <- rowSums((pr[,snpcol==snpg-1,interval[snp]+1,drop=FALSE] +
pr[,snpcol==snpg-1,interval[snp]+2,drop=FALSE])/2)
}
}
expect_equal(result, expected)
})
test_that("the genoprob_to_snpprob R function works", {
skip_if(isnt_karl(), "this test only run locally")
# load example data and calculate genotype probabilities
file <- paste0("https://raw.githubusercontent.com/rqtl/",
"qtl2data/main/DO_Recla/recla.zip")
recla <- read_cross2(file)
recla <- recla[c(1:2,53:54), c("19","X")] # subset to 4 mice and 2 chromosomes
probs <- calc_genoprob(recla, err=0.002)
# founder genotypes for a set of SNPs
snpgeno <- rbind(m1=c(3,1,1,3,1,1,1,1),
m2=c(1,3,1,3,1,3,1,3),
m3=c(1,1,1,1,3,3,3,3),
m4=c(1,3,1,3,1,3,1,3),
m5=c(1,1,1,3,1,3,1,1),
m6=c(1,1,3,1,1,1,3,1))
sdp <- calc_sdp(snpgeno)
snpinfo <- data.frame(chr=c("19", "19", "X", "X", "19", "X"),
pos=c(40.36, 40.53, 110.91, 111.21, 56.480843, 56.480843),
sdp=sdp,
snp=c("m1", "m2", "m3", "m4", "m5", "m6"), stringsAsFactors=FALSE)
# identify equivalent snps
snpinfo19 <- index_snps(recla$pmap, snpinfo[snpinfo$chr=="19",])
snpinfoX <- index_snps(recla$pmap, snpinfo[snpinfo$chr=="X",])
snpinfo <- index_snps(recla$pmap, snpinfo)
snpprob <- genoprob_to_snpprob(probs, snpinfo)
snpprob19 <- genoprob_to_snpprob(probs, snpinfo19)
snpprobX <- genoprob_to_snpprob(probs, snpinfoX)
# test combination
expect_equal(snpprob, cbind(snpprob19, snpprobX))
# construct expected for chr 19
int19 <- find_intervals(snpinfo$pos[snpinfo$chr=="19"], recla$pmap[["19"]])
expected19 <- array(dim=c(nrow(probs[["19"]]), 3, nrow(int19)))
for(i in 1:nrow(int19)) {
gencol <- genocol_to_snpcol(8, snpinfo$sdp[snpinfo$chr=="19"][i])
for(j in 1:3) {
if(int19[i,2]==0)
expected19[,j,i] <- rowSums((probs[["19"]][,gencol==j-1,int19[i,1]+1] +
probs[["19"]][,gencol==j-1,int19[i,1]+2])/2)
else
expected19[,j,i] <- rowSums(probs[["19"]][,gencol==j-1,int19[i,1]+1])
}
}
dimnames(expected19) <- dimnames(snpprob19[["19"]])
expect_equivalent(snpprob19[["19"]], expected19)
# construct expected for X chr
intX <- find_intervals(snpinfo$pos[snpinfo$chr=="X"], recla$pmap$X)
expectedX <- array(dim=c(nrow(probs[["X"]]), 5, nrow(intX)))
for(i in 1:nrow(intX)) {
sdp <- snpinfo$sdp[snpinfo$chr=="X"][i]
gencol <- genocol_to_snpcol(8, sdp)
yg <- invert_sdp(sdp, 8)
gencol <- c(gencol, (yg-1)/2+3) # add hemizygous males
for(j in 1:5) {
if(intX[i,2]==0)
expectedX[,j,i] <- rowSums((probs[["X"]][,gencol==j-1,intX[i,1]+1] +
probs[["X"]][,gencol==j-1,intX[i,1]+2])/2)
else
expectedX[,j,i] <- rowSums(probs[["X"]][,gencol==j-1,intX[i,1]+1])
}
}
dimnames(expectedX) <- dimnames(snpprob$X)
expect_equivalent(snpprobX$X, expectedX)
expect_equivalent(snpprob, list("19"=expected19, X=expectedX))
# repeat with some redundant SNPs
snpinfo2 <- rbind(snpinfo[,1:4], data.frame(chr=c("19", "19", "X", "X"),
pos=c(40.41, 40.49, 110.99, 111.01),
sdp=c(170,9,170,240),
snp=c("m7","m8","m9","m10"),
stringsAsFactors=FALSE))
# identify equivalent snps
snpinfo19 <- index_snps(recla$pmap, snpinfo2[snpinfo2$chr=="19",])
snpinfoX <- index_snps(recla$pmap, snpinfo2[snpinfo2$chr=="X",])
snpinfo2 <- index_snps(recla$pmap, snpinfo2)
snpprob2 <- genoprob_to_snpprob(probs, snpinfo2)
expect_equivalent(snpprob2$probs, snpprob$probs)
# convert to allele probabilities
aprobs <- genoprob_to_alleleprob(probs)
# collapse to snp probabilities
snpaprob <- genoprob_to_snpprob(aprobs, snpinfo2)
snpaprob19 <- genoprob_to_snpprob(aprobs, snpinfo19)
snpaprobX <- genoprob_to_snpprob(aprobs, snpinfoX)
# test combination
expect_equal(snpaprob, cbind(snpaprob19, snpaprobX))
# construct expected for chr 19
expected19 <- array(dim=c(nrow(probs[["19"]]), 2, nrow(int19)))
for(i in 1:nrow(int19)) {
gencol <- (invert_sdp(snpinfo$sdp[snpinfo$chr=="19"][i], 8)-1)/2
for(j in 1:2) {
if(int19[i,2]==0)
expected19[,j,i] <- rowSums((aprobs[["19"]][,gencol==j-1,int19[i,1]+1] +
aprobs[["19"]][,gencol==j-1,int19[i,1]+2])/2)
else
expected19[,j,i] <- rowSums(aprobs[["19"]][,gencol==j-1,int19[i,1]+1])
}
}
expect_equivalent(snpaprob19, list("19"=expected19))
# construct expected for X chr
intX <- find_intervals(snpinfo$pos[snpinfo$chr=="X"], recla$pmap$X)
expectedX <- array(dim=c(nrow(probs[["X"]]), 2, nrow(int19)))
for(i in 1:nrow(intX)) {
sdp <- snpinfo$sdp[snpinfo$chr=="X"][i]
gencol <- (invert_sdp(sdp, 8)-1)/2
for(j in 1:2) {
if(intX[i,2]==0)
expectedX[,j,i] <- rowSums((aprobs[["X"]][,gencol==j-1,intX[i,1]+1] +
aprobs[["X"]][,gencol==j-1,intX[i,1]+2])/2)
else
expectedX[,j,i] <- rowSums(aprobs[["X"]][,gencol==j-1,intX[i,1]+1])
}
}
expect_equivalent(snpaprobX, list(X=expectedX))
expect_equivalent(snpaprob, list("19"=expected19, X=expectedX))
# repeat with some redundant SNPs
snpaprob2 <- genoprob_to_snpprob(aprobs, snpinfo2)
expect_equivalent(snpprob2, snpprob)
# expect error if snpinfo is missing columns
snpinfo2a <- snpinfo2[,colnames(snpinfo2) != "sdp"]
expect_error(genoprob_to_snpprob(aprobs, snpinfo2a))
snpinfo2a <- snpinfo2[,colnames(snpinfo2) != "index"]
expect_error(genoprob_to_snpprob(aprobs, snpinfo2a))
snpinfo2a <- snpinfo2[,colnames(snpinfo2) != "interval"]
expect_error(genoprob_to_snpprob(aprobs, snpinfo2a))
snpinfo2a <- snpinfo2[,colnames(snpinfo2) != "on_map"]
expect_error(genoprob_to_snpprob(aprobs, snpinfo2a))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.