test_probToGenotype <- function() {
probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
package="GWASdata")
prob <- matrix(as.character(c(0.001, 0, 0.999, 1, 0, 0,
0.33, 0.34, 0.33, 0, 1, 0,
0.91, 0.04, 0.05, 0, 0, 1,
0.89, 0.05, 0.06, 0.80, 0.10, 0.10,
0.45, 0.45, 0.1, 0.1, 0.45, 0.45)), nrow=5, byrow=TRUE)
geno.exp <- matrix(c(0, 2,
NA, 1,
2, 0,
NA, NA,
NA, NA), nrow=5, byrow=TRUE)
geno <- GWASTools:::.probToGenotype(prob, BB=TRUE)
checkIdentical(geno, geno.exp)
geno <- GWASTools:::.probToGenotype(prob[, c(TRUE, TRUE, FALSE)], BB=FALSE)
checkIdentical(geno, geno.exp)
geno.exp <- matrix(c(0, 2,
1, 1,
2, 0,
2, 2,
NA, NA), nrow=5, byrow=TRUE)
geno <- GWASTools:::.probToGenotype(prob, BB=TRUE, prob.threshold=0)
checkIdentical(geno, geno.exp)
geno <- GWASTools:::.probToGenotype(prob[, c(TRUE, TRUE, FALSE)], BB=FALSE, prob.threshold=0)
checkIdentical(geno, geno.exp)
}
test_probToDosage_beagle <- function() {
probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
package="GWASdata")
prob <- read.table(probfile, as.is=TRUE, header=TRUE)
prob <- as.matrix(prob[,4:ncol(prob)])
dose <- read.table(dosefile, as.is=TRUE, header=TRUE)
dose <- 2 - as.matrix(dose[,4:ncol(dose)])
checkEquals(dose, GWASTools:::.probToDosage(prob, BB=TRUE), tolerance=0.0001)
}
test_probToDosage_mach <- function() {
probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
package="GWASdata")
prob <- read.table(probfile, as.is=TRUE, header=FALSE)
prob <- as.matrix(prob[,3:ncol(prob)])
dose <- read.table(dosefile, as.is=TRUE, header=FALSE)
dose <- as.matrix(dose[,3:ncol(dose)])
test <- GWASTools:::.probToDosage(prob, BB=FALSE)
# make colnames agree to pass check
colnames(test) <- colnames(dose)
checkEquals(dose, test, tolerance=0.001)
}
test_probToDosage_impute2 <- function() {
mat <- matrix(c('1','0','0'), nrow=1)
checkEquals(GWASTools:::.probToDosage(mat), matrix(2))
mat <- matrix(c('0','1','0'), nrow=1)
checkEquals(GWASTools:::.probToDosage(mat), matrix(1))
mat <- matrix(c('0','0','1'), nrow=1)
checkEquals(GWASTools:::.probToDosage(mat), matrix(0))
# check missing/equal values
mat <- matrix(c('0','0','0'), nrow=1)
checkEquals(GWASTools:::.probToDosage(mat), matrix(as.numeric(NA)))
mat <- matrix(c("0.33", "0.33", "0.33"), nrow=1)
checkEquals(GWASTools:::.probToDosage(mat), matrix(as.numeric(NA)))
mat <- matrix(c('-1','-1','-1'), nrow=1)
checkEquals(GWASTools:::.probToDosage(mat), matrix(as.numeric(NA)))
x <- runif(3)
x <- x / sum(x)
mat <- matrix(as.character(round(x, digits=4)), nrow=1)
checkEquals(GWASTools:::.probToDosage(mat), matrix(2*round(x[1], digits=4) + round(x[2], digits=4)), tolerance=0.000101)
}
test_beagle_ncdf <- function() {
probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
package="GWASdata")
ncfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
files <- c(probfile, dosefile)
inputs <- c(FALSE, TRUE)
# 100 lines in file
blocks <- c(5000, 40, 99)
for (b in blocks) {
for (i in 1:2) {
imputedDosageFile(input.files=c(files[i], markfile), filename=ncfile, file.type="ncdf", chromosome=22,
input.type="BEAGLE", input.dosage=inputs[i], block.size=b, genotypeDim="snp,scan",
snp.annot.filename=snpfile, scan.annot.filename=scanfile)
nc <- NcdfGenotypeReader(ncfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(nc, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getVariable(genoData, "alleleA")
alleleB <- getVariable(genoData, "alleleB")
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(dosefile, as.is=TRUE, header=TRUE)
dose <- 2 - as.matrix(dat[,4:ncol(dat)])
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.0001)
checkIdentical(names(dat)[-1:-3], scanAnnot$sampleID)
mark <- read.table(markfile, as.is=TRUE, header=FALSE)
checkIdentical(mark[,1], snpAnnot$snp)
checkIdentical(mark[,2], snpAnnot$position)
checkIdentical(mark[,3], snpAnnot$alleleA)
checkIdentical(mark[,4], snpAnnot$alleleB)
close(genoData)
}
}
unlink(c(ncfile, snpfile, scanfile))
}
test_mach_ncdf <- function() {
probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
package="GWASdata")
posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
package="GWASdata")
ncfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
files <- c(probfile, dosefile)
inputs <- c(FALSE, TRUE)
# 500 lines in file
blocks <- c(5000, 200, 499)
for (b in blocks) {
for (i in 1:2) {
imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=ncfile, file.type="ncdf", chromosome=22,
input.type="MaCH", input.dosage=inputs[i], block.size=b, genotypeDim="snp,scan",
snp.annot.filename=snpfile, scan.annot.filename=scanfile)
nc <- NcdfGenotypeReader(ncfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(nc, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getVariable(genoData, "alleleA")
alleleB <- getVariable(genoData, "alleleB")
checkIdentical(as.character(snpAnnot$alleleA), alleleA)
checkIdentical(as.character(snpAnnot$alleleB), alleleB)
dat <- read.table(dosefile, as.is=TRUE, header=FALSE)
## samples <- as.data.frame(matrix(unlist(strsplit(dat[,1], "->")), ncol=2, byrow=TRUE))
## checkIdentical(scanAnnot$ID_1, samples[,1])
## checkIdentical(scanAnnot$ID_2, samples[,2])
checkIdentical(scanAnnot$sampleID, dat[,1])
dose <- t(as.matrix(dat[,3:ncol(dat)]))
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.001)
mark <- read.table(markfile, as.is=TRUE, header=TRUE)
checkIdentical(mark[,1],snpAnnot$snp)
checkIdentical(mark[,2], snpAnnot$alleleA)
checkIdentical(mark[,3], snpAnnot$alleleB)
pos <- read.table(posfile, as.is=TRUE, header=TRUE)
checkIdentical(pos[,1],snpAnnot$snp)
checkIdentical(pos[,2], snpAnnot$position)
close(genoData)
}
}
unlink(c(ncfile, snpfile, scanfile))
}
test_impute2_ncdf <- function() {
probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
package="GWASdata")
sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
package="GWASdata")
ncfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
# 33 lines in file
blocks <- c(5000, 10, 32)
for (b in blocks) {
imputedDosageFile(input.files=c(probfile, sampfile), filename=ncfile, file.type="ncdf", chromosome=22,
input.type="IMPUTE2", input.dosage=FALSE, block.size=b, genotypeDim="snp,scan",
snp.annot.filename=snpfile, scan.annot.filename=scanfile)
nc <- NcdfGenotypeReader(ncfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(nc, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getVariable(genoData, "alleleA")
alleleB <- getVariable(genoData, "alleleB")
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(probfile, as.is=TRUE, header=FALSE)
dose <- GWASTools:::.probToDosage(as.matrix(dat[,6:ncol(dat)]))
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.0001)
checkIdentical(dat[,1],snpAnnot$snp)
checkIdentical(dat[,2], snpAnnot$rsID)
checkIdentical(dat[,3], snpAnnot$position)
checkIdentical(dat[,4], snpAnnot$alleleA)
checkIdentical(dat[,5], snpAnnot$alleleB)
samp <- read.table(sampfile, as.is=TRUE, header=FALSE, skip=2)
checkIdentical(paste(samp[,1], samp[,2]), scanAnnot$sampleID)
close(genoData)
}
unlink(c(ncfile, snpfile, scanfile))
}
test_beagle <- function() {
probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
package="GWASdata")
# subsets of data to read in
header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)
markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
files <- c(probfile, dosefile)
inputs <- c(FALSE, TRUE)
# 100 lines in file
blocks <- c(5000, 40, 99)
for (genoDim in c("snp,scan", "scan,snp")) {
for (b in blocks) {
for (i in 1:2) {
imputedDosageFile(input.files=c(files[i], markfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="BEAGLE", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim)
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getAlleleA(genoData)
alleleB <- getAlleleB(genoData)
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(dosefile, as.is=TRUE, header=TRUE)
dose <- 2 - as.matrix(dat[,4:ncol(dat)])
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.0001)
checkIdentical(names(dat)[-1:-3], scanAnnot$sampleID)
mark <- read.table(markfile, as.is=TRUE, header=FALSE)
checkIdentical(mark[,1], snpAnnot$snp)
checkIdentical(mark[,2], snpAnnot$position)
checkIdentical(mark[,3], snpAnnot$alleleA)
checkIdentical(mark[,4], snpAnnot$alleleB)
close(genoData)
}
}
}
unlink(c(gdsfile, snpfile, scanfile))
}
test_beagle_genotype <- function() {
probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
package="GWASdata")
# subsets of data to read in
header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)
markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
# 100 lines in file
blocks <- c(5000, 40, 99)
for (genoDim in c("snp,scan", "scan,snp")) {
for (b in blocks) {
imputedDosageFile(input.files=c(probfile, markfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="BEAGLE", input.dosage=FALSE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim,
output.type="genotype")
checkException(imputedDosageFile(input.files=c(dosfile, markfile), filename=gdsfile,
file.type="gds", chromosome=22,
input.type="BEAGLE", input.dosage=TRUE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
genotypeDim=genoDim, output.type="genotype"))
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getAlleleA(genoData)
alleleB <- getAlleleB(genoData)
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(probfile, as.is=TRUE, header=TRUE)
geno.exp <- GWASTools:::.probToGenotype(as.matrix(dat[, 4:ncol(dat)]), BB=TRUE)
checkTrue(length(setdiff(geno.exp, c(0, 1, 2, NA))) == 0)
checkEquals(geno.exp, geno)
mark <- read.table(markfile, as.is=TRUE, header=FALSE)
checkIdentical(mark[,1], snpAnnot$snp)
checkIdentical(mark[,2], snpAnnot$position)
checkIdentical(mark[,3], snpAnnot$alleleA)
checkIdentical(mark[,4], snpAnnot$alleleB)
# check it
checkImputedDosageFile(genoData, snpAnnot=snpAnnot, scanAnnot=scanAnnot,
input.files = c(probfile, markfile), input.dosage=FALSE,
input.type="BEAGLE",
output.type="genotype", block.size=b)
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile))
}
test_beagle_missing <- function() {
probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
package="GWASdata")
# subsets of data to read in
header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)
markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)
newprobfile <- tempfile()
prob <- read.table(probfile, header=TRUE, stringsAsFactors=FALSE)
prob[1, 4:6] <- -1
write.table(prob, file=newprobfile, row.names=FALSE, col.names=TRUE)
x <- read.table(newprobfile, header=TRUE, stringsAsFactors=FALSE)
checkEquals(prob, x)
newdosefile <- tempfile()
dose <- read.table(dosefile, header=TRUE, stringsAsFactors=FALSE)
dose[1, 4] <- -1
write.table(dose, file=newdosefile, row.names=FALSE, col.names=TRUE)
x <- read.table(newdosefile, header=TRUE, stringsAsFactors=FALSE)
checkEquals(dose, x)
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
files <- c(newprobfile, newdosefile)
inputs <- c(FALSE, TRUE)
# 100 lines in file
blocks <- c(5000, 40, 99)
for (genoDim in c("snp,scan", "scan,snp")) {
for (b in blocks) {
for (i in 1:2) {
imputedDosageFile(input.files=c(files[i], markfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="BEAGLE", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim)
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
dat <- read.table(newdosefile, as.is=TRUE, header=TRUE)
dose <- 2 - as.matrix(dat[,4:ncol(dat)])
dose[dose < 0 | dose > 2 | is.na(dose)] <- NA
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.0001)
close(genoData)
}
}
}
unlink(c(gdsfile, snpfile, scanfile))
}
# tests adding only a subset of samples/snps
test_beagle_subset <- function() {
probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
package="GWASdata")
# subsets of data to read in
header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)
# reverse the samples and insert a missing sample between
scan.df <- data.frame(sampleID=c(header[1,5], "missing", header[1,4]), scanID=c(200, 205, 208))
markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)
i_snp_rm <- sample(1:nrow(markers), 5)
#snp.names <- markers$V1[i_snp_rm] # remove 5 random SNPs
snp.id.start <- 100
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
files <- c(probfile, dosefile)
inputs <- c(FALSE, TRUE)
# 100 lines in file
blocks <- c(5000, 40, 99, 1)
genoDim <- "snp,scan"
for (b in blocks) {
for (i in 1:2) {
# test reading snp.names and scan.df
imputedDosageFile(input.files=c(files[i], markfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="BEAGLE", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
scan.df=scan.df, snp.exclude=i_snp_rm, genotypeDim=genoDim,
snp.id.start=snp.id.start)
dat <- read.table(dosefile, as.is=TRUE, header=TRUE)
dose <- 2 - as.matrix(dat[,4:ncol(dat)])
dimnames(dose) <- NULL
# samples were switched and a missing sample inserted in the middle
# five random SNPs not included
dose <- cbind(dose[-i_snp_rm, 2], rep(NA, nrow(dose)-length(i_snp_rm)), dose[-i_snp_rm, 1])
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getAlleleA(genoData)
alleleB <- getAlleleB(genoData)
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
checkEquals(dose, geno, tolerance=0.0001)
#sampIDs <- c(names(dat)[5], "missing", names(dat)[4])
checkIdentical(scan.df$sampleID, scanAnnot$sampleID)
checkEquals(scanAnnot$scanID, scan.df$scanID)
mark <- read.table(markfile, as.is=TRUE, header=FALSE)
checkIdentical(mark[-i_snp_rm, 1], snpAnnot$snp)
checkIdentical(mark[-i_snp_rm, 2], snpAnnot$position)
checkIdentical(mark[-i_snp_rm, 3], snpAnnot$alleleA)
checkIdentical(mark[-i_snp_rm, 4], snpAnnot$alleleB)
checkIdentical(1:nsnp(genoData) + as.integer(snp.id.start-1), getSnpID(genoData))
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile))
}
test_mach <- function() {
probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
package="GWASdata")
posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
package="GWASdata")
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
dosages <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE)
markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)
files <- c(probfile, dosefile)
inputs <- c(FALSE, TRUE)
# 500 lines in file
blocks <- c(5000, 200, 499)
for (genoDim in c("snp,scan", "scan,snp")) {
for (b in blocks) {
for (i in 1:2) {
imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="MaCH", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
genotypeDim=genoDim)
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getAlleleA(genoData)
alleleB <- getAlleleB(genoData)
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(dosefile, as.is=TRUE, header=FALSE)
#samples <- as.data.frame(matrix(unlist(strsplit(dat[,1], "->")), ncol=2, byrow=TRUE))
checkIdentical(scanAnnot$sampleID, dat[,1])
#checkIdentical(scanAnnot$ID_2, dat[,1])
dose <- t(as.matrix(dat[,3:ncol(dat)]))
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.001)
mark <- read.table(markfile, as.is=TRUE, header=TRUE)
checkIdentical(mark[,1], snpAnnot$snp)
checkIdentical(mark[,2], snpAnnot$alleleA)
checkIdentical(mark[,3], snpAnnot$alleleB)
pos <- read.table(posfile, as.is=TRUE, header=TRUE)
checkIdentical(pos[,1], snpAnnot$snp)
checkIdentical(pos[,2], snpAnnot$position)
close(genoData)
}
}
}
unlink(c(gdsfile, snpfile, scanfile))
}
test_mach_genotype <- function() {
probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
package="GWASdata")
posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
package="GWASdata")
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)
blocks <- c(5000, 200, 499)
for (genoDim in c("snp,scan", "scan,snp")) {
for (b in blocks) {
imputedDosageFile(input.files=c(probfile, markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="MaCH", input.dosage=FALSE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
genotypeDim=genoDim, output.type="genotype")
checkException(imputedDosageFile(input.files=c(dosfile, markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="MaCH", input.dosage=TRUE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
genotypeDim=genoDim, output.type="genotype"))
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getAlleleA(genoData)
alleleB <- getAlleleB(genoData)
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(probfile, as.is=TRUE, header=FALSE)
#samples <- as.data.frame(matrix(unlist(strsplit(dat[,1], "->")), ncol=2, byrow=TRUE))
checkIdentical(scanAnnot$sampleID, dat[,1])
geno.exp <- t(GWASTools:::.probToGenotype(as.matrix(dat[, 3:ncol(dat)]), BB=FALSE))
checkTrue(length(setdiff(geno.exp, c(0, 1, 2, NA))) == 0)
checkEquals(geno.exp, geno)
mark <- read.table(markfile, as.is=TRUE, header=TRUE)
checkIdentical(mark[,1], snpAnnot$snp)
checkIdentical(mark[,2], snpAnnot$alleleA)
checkIdentical(mark[,3], snpAnnot$alleleB)
pos <- read.table(posfile, as.is=TRUE, header=TRUE)
checkIdentical(pos[,1], snpAnnot$snp)
checkIdentical(pos[,2], snpAnnot$position)
# check it
checkImputedDosageFile(genoData, snpAnnot=snpAnnot, scanAnnot=scanAnnot,
input.files = c(probfile, markfile, posfile), input.dosage=FALSE,
input.type="MaCH",
output.type="genotype", block.size=b)
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile))
}
test_mach_missing <- function() {
probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
package="GWASdata")
posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
package="GWASdata")
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
newprobfile <- tempfile()
newdosefile <- tempfile()
dosages <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE)
dosages[1, 3] <- -1
write.table(dosages, file=newdosefile, row.names=FALSE, col.names=FALSE)
x <- read.table(newdosefile, stringsAsFactors=FALSE, header=FALSE)
checkEquals(x, dosages)
# mach probs are AA, AB not AA, AB, BB
prob <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
prob[1, 3:4] <- -1
write.table(prob, file=newprobfile, row.names=FALSE, col.names=FALSE)
x <- read.table(newprobfile, stringsAsFactors=FALSE, header=FALSE)
checkEquals(x, prob)
markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)
files <- c(newprobfile, newdosefile)
inputs <- c(FALSE, TRUE)
# 500 lines in file
blocks <- c(5000, 1)
genoDim <- c("snp,scan")
for (b in blocks) {
for (i in 1:2) {
imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="MaCH", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
genotypeDim=genoDim)
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
dat <- read.table(newdosefile, as.is=TRUE, header=FALSE)
dose <- t(as.matrix(dat[,3:ncol(dat)]))
dose[dose < 0 | dose > 2 | is.na(dose)] <- NA
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.001)
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile, newprobfile, newdosefile))
}
# tests adding only a subset of samples/snps
test_mach_subset <- function() {
probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
package="GWASdata")
dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
package="GWASdata")
markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
package="GWASdata")
posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
package="GWASdata")
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
# set up scan.df for subsetting later.
dosages <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE)
# remove 5 random samples
i_samp_rm <- sample(1:nrow(dosages), nrow(dosages)-5)
scan.df <- data.frame(sampleID=c("MISSING->MISSING", dosages$V1[i_samp_rm]), stringsAsFactors=FALSE)
scan.df$scanID <- 200:(200+nrow(scan.df)-1)
markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)
i_snp_rm <- sample(1:nrow(markers), 5)
#message(paste(i_snp_rm, collapse=" "))
#snp.names <- markers$SNP[i_snp_rm] # remove 5 random SNPs
snp.id.start <- 100
files <- c(probfile, dosefile)
inputs <- c(FALSE, TRUE)
# 500 lines in file
blocks <- c(5000, 200, 499, 1)
genoDim <- "snp,scan"
for (b in blocks) {
for (i in 1:2) {
# test scan.df and snp.names
imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=gdsfile, file.type="gds",
chromosome=22,
input.type="MaCH", input.dosage=inputs[i], block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
genotypeDim=genoDim,
scan.df=scan.df, snp.exclude=i_snp_rm,
snp.id.start=snp.id.start)
#dose <- cbind(dose[i_snp_rm, 2], rep(NA, nrow(dose)-length(i_snp_rm)), dose[i_snp_rm, 1])
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getAlleleA(genoData)
alleleB <- getAlleleB(genoData)
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(dosefile, as.is=TRUE, header=FALSE, stringsAsFactors=FALSE)
checkIdentical(scanAnnot$sampleID, scan.df$sampleID)
dose <- t(as.matrix(dat[,3:ncol(dat)]))
# 5 random samples were removed and a missing sample was inserted at the top
# first SNP was not included
dose <- dose[-i_snp_rm, i_samp_rm]
dose <- cbind(rep(NA, nrow(dose)), dose)
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.001)
checkIdentical(scan.df$sampleID, scanAnnot$sampleID)
checkEquals(scanAnnot$scanID, scan.df$scanID)
mark <- read.table(markfile, as.is=TRUE, header=TRUE)
checkIdentical(mark[-i_snp_rm,1], snpAnnot$snp)
checkIdentical(mark[-i_snp_rm,2], snpAnnot$alleleA)
checkIdentical(mark[-i_snp_rm,3], snpAnnot$alleleB)
pos <- read.table(posfile, as.is=TRUE, header=TRUE)
checkIdentical(pos[-i_snp_rm, 1], snpAnnot$snp)
checkIdentical(pos[-i_snp_rm, 2], snpAnnot$position)
checkIdentical(1:nsnp(genoData) + as.integer(snp.id.start-1), getSnpID(genoData))
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile))
}
test_impute2 <- function() {
probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
package="GWASdata")
sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
package="GWASdata")
samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
samp <- samp[-1, ]
dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
snps <- dos[, 2]
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
# 33 lines in file
blocks <- c(5000, 10, 32)
for (genoDim in c("snp,scan", "scan,snp")) {
for (b in blocks) {
# make a normal one
imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim)
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getAlleleA(genoData)
alleleB <- getAlleleB(genoData)
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(probfile, as.is=TRUE, header=FALSE)
dose <- GWASTools:::.probToDosage(as.matrix(dat[,6:ncol(dat)]))
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.0001)
checkIdentical(dat[,1], snpAnnot$snp)
checkIdentical(dat[,2], snpAnnot$rsID)
checkIdentical(dat[,3], snpAnnot$position)
checkIdentical(dat[,4], snpAnnot$alleleA)
checkIdentical(dat[,5], snpAnnot$alleleB)
samp <- read.table(sampfile, as.is=TRUE, header=FALSE, skip=2)
checkIdentical(paste(samp[,1], samp[,2]), scanAnnot$sampleID)
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile))
}
test_impute2_genotype <- function() {
probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
package="GWASdata")
sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
package="GWASdata")
samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
samp <- samp[-1, ]
dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
snps <- dos[, 2]
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
# 33 lines in file
blocks <- c(5000, 10, 32)
for (genoDim in c("snp,scan", "scan,snp")) {
for (b in blocks) {
# make a normal one
imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim,
output.type="genotype")
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getAlleleA(genoData)
alleleB <- getAlleleB(genoData)
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(probfile, as.is=TRUE, header=FALSE)
geno.exp <- GWASTools:::.probToGenotype(as.matrix(dat[,6:ncol(dat)]))
checkEquals(geno, geno.exp)
checkIdentical(dat[,1], snpAnnot$snp)
checkIdentical(dat[,2], snpAnnot$rsID)
checkIdentical(dat[,3], snpAnnot$position)
checkIdentical(dat[,4], snpAnnot$alleleA)
checkIdentical(dat[,5], snpAnnot$alleleB)
samp <- read.table(sampfile, as.is=TRUE, header=FALSE, skip=2)
checkIdentical(paste(samp[,1], samp[,2]), scanAnnot$sampleID)
# check it
checkImputedDosageFile(genoData, snpAnnot=snpAnnot, scanAnnot=scanAnnot,
input.files = c(probfile, sampfile), input.dosage=FALSE,
input.type="IMPUTE2",
output.type="genotype", block.size=b)
close(genoData)
}
}
unlink(c(gdsfile, snpfile, scanfile))
}
test_impute2_missing <- function() {
probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
package="GWASdata")
sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
package="GWASdata")
samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
samp <- samp[-1, ]
dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
snps <- dos[, 2]
# make a dosage missing
newprobfile <- tempfile()
dos[1, 6:8] <- -1
write.table(dos, file=newprobfile, row.names=FALSE, col.names=FALSE)
x <- read.table(newprobfile, stringsAsFactors=FALSE, header=FALSE)
checkEquals(x, dos)
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
# 33 lines in file
genoDim <- "snp,scan"
blocks <- c(5000, 1)
for (b in blocks) {
# make a normal one
imputedDosageFile(input.files=c(newprobfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim)
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
dat <- read.table(newprobfile, as.is=TRUE, header=FALSE)
dose <- GWASTools:::.probToDosage(as.matrix(dat[,6:ncol(dat)]))
dose[dose < 0 | dose > 2 | is.na(dose)] <- NA
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.0001)
close(genoData)
}
unlink(c(gdsfile, snpfile, scanfile, newprobfile))
}
# tests adding only a subset of samples/snps
test_impute2_subset <- function() {
probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
package="GWASdata")
sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
package="GWASdata")
samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
samp <- samp[-1, ]
i_samp_rm <- sample(-1:-nrow(samp), 5)
scan.df <- data.frame(sampleID=paste(samp$ID_1[i_samp_rm], samp$ID_2[i_samp_rm]), stringsAsFactors=FALSE)
scan.df <- rbind(data.frame(sampleID="MISSING MISSING", stringsAsFactors=FALSE), scan.df)
scan.df$scanID <- 200:(200+nrow(scan.df)-1)
dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
snps <- dos[, 2]
# remove 5 random snps
i_snp_rm <- sample(1:length(snps), 5)
snp.id.start <- 100
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
# 33 lines in file
blocks <- c(5000, 10, 32, 1)
genoDim <- "snp,scan"
for (b in blocks) {
# now the subset of samples/snps
imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
snp.annot.filename=snpfile, scan.annot.filename=scanfile,
scan.df=scan.df, snp.exclude=i_snp_rm, genotypeDim=genoDim,
snp.id.start=snp.id.start)
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getAlleleA(genoData)
alleleB <- getAlleleB(genoData)
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(probfile, as.is=TRUE, header=FALSE)
dose <- GWASTools:::.probToDosage(as.matrix(dat[,6:ncol(dat)]))
dimnames(dose) <- NULL
dose <- dose[-i_snp_rm, i_samp_rm]
dose <- cbind(rep(NA, nrow(dose)), dose)
checkEquals(dose, geno, tolerance=0.0001)
checkIdentical(dat[-i_snp_rm, 1], snpAnnot$snp)
checkIdentical(dat[-i_snp_rm, 2], snpAnnot$rsID)
checkIdentical(dat[-i_snp_rm, 3], snpAnnot$position)
checkIdentical(dat[-i_snp_rm, 4], snpAnnot$alleleA)
checkIdentical(dat[-i_snp_rm, 5], snpAnnot$alleleB)
samp <- read.table(sampfile, as.is=TRUE, header=FALSE, skip=2)
#checkIdentical(paste(samp[, 1], samp[, 2]), scanAnnot$sampleID)
checkIdentical(scan.df$sampleID, scanAnnot$sampleID)
checkEquals(scan.df$scanID, scanAnnot$scanID)
checkEquals(scan.df$scanID, getScanID(genoData))
checkIdentical(1:nsnp(genoData) + as.integer(snp.id.start-1), getSnpID(genoData))
close(genoData)
}
unlink(c(gdsfile, snpfile, scanfile))
}
## tests including phenotype columns in .samples file
test_impute2_phen <- function() {
probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
package="GWASdata")
sampfile1 <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
package="GWASdata")
samp <- read.table(sampfile1, stringsAsFactors=FALSE, header=TRUE)
samp$phen <- c("B", sample(c(0,1), nrow(samp)-1, replace=TRUE))
samp$sex <- c("D", sample(c(1,2), nrow(samp)-1, replace=TRUE))
samp.hdr <- names(samp)
sampfile <- tempfile()
write.table(samp, file=sampfile, quote=FALSE, row.names=FALSE)
gdsfile <- tempfile()
snpfile <- tempfile()
scanfile <- tempfile()
imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
input.type="IMPUTE2", input.dosage=FALSE,
snp.annot.filename=snpfile, scan.annot.filename=scanfile)
gds <- GdsGenotypeReader(gdsfile)
scanAnnot <- getobj(scanfile)
snpAnnot <- getobj(snpfile)
genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
geno <- getGenotype(genoData)
alleleA <- getAlleleA(genoData)
alleleB <- getAlleleB(genoData)
checkIdentical(snpAnnot$alleleA, alleleA)
checkIdentical(snpAnnot$alleleB, alleleB)
dat <- read.table(probfile, as.is=TRUE, header=FALSE)
dose <- GWASTools:::.probToDosage(as.matrix(dat[,6:ncol(dat)]))
dimnames(dose) <- NULL
checkEquals(dose, geno, tolerance=0.0001)
checkIdentical(dat[,1], snpAnnot$snp)
checkIdentical(dat[,2], snpAnnot$rsID)
checkIdentical(dat[,3], snpAnnot$position)
checkIdentical(dat[,4], snpAnnot$alleleA)
checkIdentical(dat[,5], snpAnnot$alleleB)
samp <- read.table(sampfile, as.is=TRUE, header=FALSE, skip=2)
names(samp) <- samp.hdr
checkIdentical(paste(samp[,1], samp[,2]), scanAnnot$sampleID)
for (i in setdiff(names(samp), "sex")) checkIdentical(samp[[i]], scanAnnot[[i]])
checkEquals(samp$sex == 1, scanAnnot$sex == "M")
checkEquals(samp$sex == 2, scanAnnot$sex == "F")
close(genoData)
unlink(c(gdsfile, snpfile, scanfile, sampfile))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.