inst/unitTests/imputedDosageFile_test.R

test_probToGenotype <- function() {
  probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
                          package="GWASdata")

  
  prob <- matrix(as.character(c(0.001, 0, 0.999,  1, 0, 0,
                                0.33, 0.34, 0.33, 0, 1, 0,
                                0.91, 0.04, 0.05, 0, 0, 1,
                                0.89, 0.05, 0.06, 0.80, 0.10, 0.10,
                                0.45, 0.45, 0.1, 0.1, 0.45, 0.45)), nrow=5, byrow=TRUE)
  geno.exp <- matrix(c(0, 2,
                       NA, 1,
                       2, 0,
                       NA, NA,
                       NA, NA), nrow=5, byrow=TRUE)
  geno <- GWASTools:::.probToGenotype(prob, BB=TRUE)
  checkIdentical(geno, geno.exp)
  geno <- GWASTools:::.probToGenotype(prob[, c(TRUE, TRUE, FALSE)], BB=FALSE)
  checkIdentical(geno, geno.exp)
  
  
  geno.exp <- matrix(c(0, 2,
                       1, 1,
                       2, 0,
                       2, 2,
                       NA, NA), nrow=5, byrow=TRUE)
  geno <- GWASTools:::.probToGenotype(prob, BB=TRUE, prob.threshold=0)
  checkIdentical(geno, geno.exp)  
  geno <- GWASTools:::.probToGenotype(prob[, c(TRUE, TRUE, FALSE)], BB=FALSE, prob.threshold=0)
  checkIdentical(geno, geno.exp)
  
}

test_probToDosage_beagle <- function() {
  probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
                          package="GWASdata")

  prob <- read.table(probfile, as.is=TRUE, header=TRUE)
  prob <- as.matrix(prob[,4:ncol(prob)])

  dose <- read.table(dosefile, as.is=TRUE, header=TRUE)
  dose <- 2 - as.matrix(dose[,4:ncol(dose)])

  checkEquals(dose, GWASTools:::.probToDosage(prob, BB=TRUE), tolerance=0.0001)
}


test_probToDosage_mach <- function() {
  probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
                          package="GWASdata")

  prob <- read.table(probfile, as.is=TRUE, header=FALSE)
  prob <- as.matrix(prob[,3:ncol(prob)])

  dose <- read.table(dosefile, as.is=TRUE, header=FALSE)
  dose <- as.matrix(dose[,3:ncol(dose)])

  test <- GWASTools:::.probToDosage(prob, BB=FALSE)
  # make colnames agree to pass check
  colnames(test) <- colnames(dose)
  checkEquals(dose, test, tolerance=0.001)
}


test_probToDosage_impute2 <- function() {

  mat <- matrix(c('1','0','0'), nrow=1)
  checkEquals(GWASTools:::.probToDosage(mat), matrix(2))

  mat <- matrix(c('0','1','0'), nrow=1)
  checkEquals(GWASTools:::.probToDosage(mat), matrix(1))

  mat <- matrix(c('0','0','1'), nrow=1)
  checkEquals(GWASTools:::.probToDosage(mat), matrix(0))

  # check missing/equal values
  mat <- matrix(c('0','0','0'), nrow=1)
  checkEquals(GWASTools:::.probToDosage(mat), matrix(as.numeric(NA)))

  mat <- matrix(c("0.33", "0.33", "0.33"), nrow=1)
  checkEquals(GWASTools:::.probToDosage(mat), matrix(as.numeric(NA)))

  mat <- matrix(c('-1','-1','-1'), nrow=1)
  checkEquals(GWASTools:::.probToDosage(mat), matrix(as.numeric(NA)))

  x <- runif(3)
  x <- x / sum(x)
  mat <- matrix(as.character(round(x, digits=4)), nrow=1)
  checkEquals(GWASTools:::.probToDosage(mat), matrix(2*round(x[1], digits=4) + round(x[2], digits=4)), tolerance=0.000101)

}



test_beagle_ncdf <- function() {
  probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
                        package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
                      package="GWASdata")
  markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
                      package="GWASdata")

  ncfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  files <- c(probfile, dosefile)
  inputs <- c(FALSE, TRUE)
  # 100 lines in file
  blocks <- c(5000, 40, 99)
  for (b in blocks) {
    for (i in 1:2) {
      imputedDosageFile(input.files=c(files[i], markfile), filename=ncfile, file.type="ncdf", chromosome=22,
                        input.type="BEAGLE", input.dosage=inputs[i], block.size=b, genotypeDim="snp,scan",
                        snp.annot.filename=snpfile, scan.annot.filename=scanfile)

      nc <- NcdfGenotypeReader(ncfile)
      scanAnnot <- getobj(scanfile)
      snpAnnot <- getobj(snpfile)
      genoData <- GenotypeData(nc, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
      geno <- getGenotype(genoData)
      alleleA <- getVariable(genoData, "alleleA")
      alleleB <- getVariable(genoData, "alleleB")
      checkIdentical(snpAnnot$alleleA, alleleA)
      checkIdentical(snpAnnot$alleleB, alleleB)

      dat <- read.table(dosefile, as.is=TRUE, header=TRUE)
      dose <- 2 - as.matrix(dat[,4:ncol(dat)])
      dimnames(dose) <- NULL
      checkEquals(dose, geno, tolerance=0.0001)
      checkIdentical(names(dat)[-1:-3], scanAnnot$sampleID)

      mark <- read.table(markfile, as.is=TRUE, header=FALSE)
      checkIdentical(mark[,1], snpAnnot$snp)
      checkIdentical(mark[,2], snpAnnot$position)
      checkIdentical(mark[,3], snpAnnot$alleleA)
      checkIdentical(mark[,4], snpAnnot$alleleB)

      close(genoData)
    }
  }

  unlink(c(ncfile, snpfile, scanfile))
}

test_mach_ncdf <- function() {
  probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
                          package="GWASdata")
  markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
                          package="GWASdata")
  posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
                          package="GWASdata")

  ncfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  files <- c(probfile, dosefile)
  inputs <- c(FALSE, TRUE)
  # 500 lines in file
  blocks <- c(5000, 200, 499)
  for (b in blocks) {
    for (i in 1:2) {
      imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=ncfile, file.type="ncdf", chromosome=22,
                        input.type="MaCH", input.dosage=inputs[i], block.size=b, genotypeDim="snp,scan",
                        snp.annot.filename=snpfile, scan.annot.filename=scanfile)

      nc <- NcdfGenotypeReader(ncfile)
      scanAnnot <- getobj(scanfile)
      snpAnnot <- getobj(snpfile)
      genoData <- GenotypeData(nc, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
      geno <- getGenotype(genoData)
      alleleA <- getVariable(genoData, "alleleA")
      alleleB <- getVariable(genoData, "alleleB")
      checkIdentical(as.character(snpAnnot$alleleA), alleleA)
      checkIdentical(as.character(snpAnnot$alleleB), alleleB)

      dat <- read.table(dosefile, as.is=TRUE, header=FALSE)
      ## samples <- as.data.frame(matrix(unlist(strsplit(dat[,1], "->")), ncol=2, byrow=TRUE))
      ## checkIdentical(scanAnnot$ID_1, samples[,1])
      ## checkIdentical(scanAnnot$ID_2, samples[,2])
      checkIdentical(scanAnnot$sampleID, dat[,1])
      dose <-  t(as.matrix(dat[,3:ncol(dat)]))
      dimnames(dose) <- NULL
      checkEquals(dose, geno, tolerance=0.001)

      mark <- read.table(markfile, as.is=TRUE, header=TRUE)
      checkIdentical(mark[,1],snpAnnot$snp)
      checkIdentical(mark[,2], snpAnnot$alleleA)
      checkIdentical(mark[,3], snpAnnot$alleleB)

      pos <- read.table(posfile, as.is=TRUE, header=TRUE)
      checkIdentical(pos[,1],snpAnnot$snp)
      checkIdentical(pos[,2], snpAnnot$position)

      close(genoData)
    }
  }

  unlink(c(ncfile, snpfile, scanfile))
}

test_impute2_ncdf <- function() {
  probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
                          package="GWASdata")
  sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
                          package="GWASdata")

  ncfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  # 33 lines in file
  blocks <- c(5000, 10, 32)
  for (b in blocks) {
    imputedDosageFile(input.files=c(probfile, sampfile), filename=ncfile, file.type="ncdf", chromosome=22,
                      input.type="IMPUTE2", input.dosage=FALSE, block.size=b, genotypeDim="snp,scan",
                      snp.annot.filename=snpfile, scan.annot.filename=scanfile)

    nc <- NcdfGenotypeReader(ncfile)
    scanAnnot <- getobj(scanfile)
    snpAnnot <- getobj(snpfile)
    genoData <- GenotypeData(nc, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
    geno <- getGenotype(genoData)
    alleleA <- getVariable(genoData, "alleleA")
    alleleB <- getVariable(genoData, "alleleB")
    checkIdentical(snpAnnot$alleleA, alleleA)
    checkIdentical(snpAnnot$alleleB, alleleB)

    dat <- read.table(probfile, as.is=TRUE, header=FALSE)
    dose <- GWASTools:::.probToDosage(as.matrix(dat[,6:ncol(dat)]))
    dimnames(dose) <- NULL
    checkEquals(dose, geno, tolerance=0.0001)
    checkIdentical(dat[,1],snpAnnot$snp)
    checkIdentical(dat[,2], snpAnnot$rsID)
    checkIdentical(dat[,3], snpAnnot$position)
    checkIdentical(dat[,4], snpAnnot$alleleA)
    checkIdentical(dat[,5], snpAnnot$alleleB)

    samp <- read.table(sampfile, as.is=TRUE, header=FALSE, skip=2)
    checkIdentical(paste(samp[,1], samp[,2]), scanAnnot$sampleID)

    close(genoData)
  }

  unlink(c(ncfile, snpfile, scanfile))
}

test_beagle <- function() {
  probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
                          package="GWASdata")
  markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
                          package="GWASdata")

  # subsets of data to read in
  header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)

  markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)



  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  files <- c(probfile, dosefile)
  inputs <- c(FALSE, TRUE)
  # 100 lines in file
  blocks <- c(5000, 40, 99)
  for (genoDim in c("snp,scan", "scan,snp")) {
    for (b in blocks) {
      for (i in 1:2) {
        imputedDosageFile(input.files=c(files[i], markfile), filename=gdsfile, file.type="gds", chromosome=22,
                         input.type="BEAGLE", input.dosage=inputs[i], block.size=b,
                         snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim)

        gds <- GdsGenotypeReader(gdsfile)
        scanAnnot <- getobj(scanfile)
        snpAnnot <- getobj(snpfile)
        genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
        geno <- getGenotype(genoData)
        alleleA <- getAlleleA(genoData)
        alleleB <- getAlleleB(genoData)
        checkIdentical(snpAnnot$alleleA, alleleA)
        checkIdentical(snpAnnot$alleleB, alleleB)

        dat <- read.table(dosefile, as.is=TRUE, header=TRUE)
        dose <- 2 - as.matrix(dat[,4:ncol(dat)])
        dimnames(dose) <- NULL
        checkEquals(dose, geno, tolerance=0.0001)
        checkIdentical(names(dat)[-1:-3], scanAnnot$sampleID)

        mark <- read.table(markfile, as.is=TRUE, header=FALSE)
        checkIdentical(mark[,1], snpAnnot$snp)
        checkIdentical(mark[,2], snpAnnot$position)
        checkIdentical(mark[,3], snpAnnot$alleleA)
        checkIdentical(mark[,4], snpAnnot$alleleB)

        close(genoData)
      }
    }
  }
  unlink(c(gdsfile, snpfile, scanfile))
}


test_beagle_genotype <- function() {
  probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
                          package="GWASdata")
  
  markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
                          package="GWASdata")
  
  # subsets of data to read in
  header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)
  
  markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)
  
  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()
  
  # 100 lines in file
  blocks <- c(5000, 40, 99)
  for (genoDim in c("snp,scan", "scan,snp")) {
    for (b in blocks) {
      imputedDosageFile(input.files=c(probfile, markfile), filename=gdsfile, file.type="gds", chromosome=22,
                        input.type="BEAGLE", input.dosage=FALSE, block.size=b,
                        snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim,
                        output.type="genotype")
      checkException(imputedDosageFile(input.files=c(dosfile, markfile), filename=gdsfile,
                                       file.type="gds", chromosome=22,
                                       input.type="BEAGLE", input.dosage=TRUE, block.size=b,
                                       snp.annot.filename=snpfile, scan.annot.filename=scanfile,
                                       genotypeDim=genoDim, output.type="genotype"))
      
        gds <- GdsGenotypeReader(gdsfile)
        scanAnnot <- getobj(scanfile)
        snpAnnot <- getobj(snpfile)
        genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
        geno <- getGenotype(genoData)
        alleleA <- getAlleleA(genoData)
        alleleB <- getAlleleB(genoData)
        checkIdentical(snpAnnot$alleleA, alleleA)
        checkIdentical(snpAnnot$alleleB, alleleB)
        
        dat <- read.table(probfile, as.is=TRUE, header=TRUE)
        geno.exp <- GWASTools:::.probToGenotype(as.matrix(dat[, 4:ncol(dat)]), BB=TRUE)
        checkTrue(length(setdiff(geno.exp, c(0, 1, 2, NA))) == 0)
        checkEquals(geno.exp, geno)
        
        mark <- read.table(markfile, as.is=TRUE, header=FALSE)
        checkIdentical(mark[,1], snpAnnot$snp)
        checkIdentical(mark[,2], snpAnnot$position)
        checkIdentical(mark[,3], snpAnnot$alleleA)
        checkIdentical(mark[,4], snpAnnot$alleleB)
        
        # check it
        checkImputedDosageFile(genoData, snpAnnot=snpAnnot, scanAnnot=scanAnnot,
                               input.files = c(probfile, markfile), input.dosage=FALSE,
                               input.type="BEAGLE",
                               output.type="genotype", block.size=b)
        
        close(genoData)
      }
  }
  unlink(c(gdsfile, snpfile, scanfile))
}

test_beagle_missing <- function() {
  probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
                          package="GWASdata")
  markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
                          package="GWASdata")

  # subsets of data to read in
  header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)

  markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)

  newprobfile <- tempfile()
  prob <- read.table(probfile, header=TRUE, stringsAsFactors=FALSE)
  prob[1, 4:6] <- -1
  write.table(prob, file=newprobfile, row.names=FALSE, col.names=TRUE)
  x <- read.table(newprobfile, header=TRUE, stringsAsFactors=FALSE)
  checkEquals(prob, x)

  newdosefile <- tempfile()
  dose <- read.table(dosefile, header=TRUE, stringsAsFactors=FALSE)
  dose[1, 4] <- -1
  write.table(dose, file=newdosefile, row.names=FALSE, col.names=TRUE)
  x <- read.table(newdosefile, header=TRUE, stringsAsFactors=FALSE)
  checkEquals(dose, x)

  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  files <- c(newprobfile, newdosefile)
  inputs <- c(FALSE, TRUE)
  # 100 lines in file
  blocks <- c(5000, 40, 99)
  for (genoDim in c("snp,scan", "scan,snp")) {
    for (b in blocks) {
      for (i in 1:2) {
        imputedDosageFile(input.files=c(files[i], markfile), filename=gdsfile, file.type="gds", chromosome=22,
                         input.type="BEAGLE", input.dosage=inputs[i], block.size=b,
                         snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim)

        gds <- GdsGenotypeReader(gdsfile)
        scanAnnot <- getobj(scanfile)
        snpAnnot <- getobj(snpfile)
        genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
        geno <- getGenotype(genoData)

        dat <- read.table(newdosefile, as.is=TRUE, header=TRUE)
        dose <- 2 - as.matrix(dat[,4:ncol(dat)])
        dose[dose < 0 | dose > 2 | is.na(dose)] <- NA
        dimnames(dose) <- NULL
        checkEquals(dose, geno, tolerance=0.0001)

        close(genoData)
      }
    }
  }
  unlink(c(gdsfile, snpfile, scanfile))
}


# tests adding only a subset of samples/snps
test_beagle_subset <- function() {
  probfile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.gprobs",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "BEAGLE", "example.hapmap.unphased.bgl.dose",
                          package="GWASdata")
  markfile <- system.file("extdata", "imputation", "BEAGLE", "hapmap.markers",
                          package="GWASdata")

  # subsets of data to read in
  header <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE, nrow=1)
  # reverse the samples and insert a missing sample between
  scan.df <- data.frame(sampleID=c(header[1,5], "missing", header[1,4]), scanID=c(200, 205, 208))
  markers <- read.table(markfile, header=FALSE, stringsAsFactors=FALSE)
  i_snp_rm <- sample(1:nrow(markers), 5)
  #snp.names <- markers$V1[i_snp_rm] # remove 5 random SNPs

  snp.id.start <- 100

  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  files <- c(probfile, dosefile)
  inputs <- c(FALSE, TRUE)
  # 100 lines in file
  blocks <- c(5000, 40, 99, 1)
  genoDim <- "snp,scan"
  for (b in blocks) {
    for (i in 1:2) {

      # test reading snp.names and scan.df
      imputedDosageFile(input.files=c(files[i], markfile), filename=gdsfile, file.type="gds", chromosome=22,
                       input.type="BEAGLE", input.dosage=inputs[i], block.size=b,
                       snp.annot.filename=snpfile, scan.annot.filename=scanfile,
                       scan.df=scan.df, snp.exclude=i_snp_rm, genotypeDim=genoDim,
                       snp.id.start=snp.id.start)

      dat <- read.table(dosefile, as.is=TRUE, header=TRUE)
      dose <- 2 - as.matrix(dat[,4:ncol(dat)])
      dimnames(dose) <- NULL
      # samples were switched and a missing sample inserted in the middle
      # five random SNPs not included
      dose <- cbind(dose[-i_snp_rm, 2], rep(NA, nrow(dose)-length(i_snp_rm)), dose[-i_snp_rm, 1])
      gds <- GdsGenotypeReader(gdsfile)
      scanAnnot <- getobj(scanfile)
      snpAnnot <- getobj(snpfile)
      genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
      geno <- getGenotype(genoData)
      alleleA <- getAlleleA(genoData)
      alleleB <- getAlleleB(genoData)
      checkIdentical(snpAnnot$alleleA, alleleA)
      checkIdentical(snpAnnot$alleleB, alleleB)

      checkEquals(dose, geno, tolerance=0.0001)
      #sampIDs <- c(names(dat)[5], "missing", names(dat)[4])
      checkIdentical(scan.df$sampleID, scanAnnot$sampleID)
      checkEquals(scanAnnot$scanID, scan.df$scanID)

      mark <- read.table(markfile, as.is=TRUE, header=FALSE)
      checkIdentical(mark[-i_snp_rm, 1], snpAnnot$snp)
      checkIdentical(mark[-i_snp_rm, 2], snpAnnot$position)
      checkIdentical(mark[-i_snp_rm, 3], snpAnnot$alleleA)
      checkIdentical(mark[-i_snp_rm, 4], snpAnnot$alleleB)

      checkIdentical(1:nsnp(genoData) + as.integer(snp.id.start-1), getSnpID(genoData))

      close(genoData)
    }
  }
  unlink(c(gdsfile, snpfile, scanfile))
}


test_mach <- function() {
  probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
                          package="GWASdata")
  markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
                          package="GWASdata")
  posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
                         package="GWASdata")

  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  dosages <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE)

  markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)


  files <- c(probfile, dosefile)
  inputs <- c(FALSE, TRUE)
  # 500 lines in file
  blocks <- c(5000, 200, 499)
  for (genoDim in c("snp,scan", "scan,snp")) {
    for (b in blocks) {
      for (i in 1:2) {
        imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
                         input.type="MaCH", input.dosage=inputs[i], block.size=b,
                         snp.annot.filename=snpfile, scan.annot.filename=scanfile,
                         genotypeDim=genoDim)

        gds <- GdsGenotypeReader(gdsfile)
        scanAnnot <- getobj(scanfile)
        snpAnnot <- getobj(snpfile)
        genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
        geno <- getGenotype(genoData)
        alleleA <- getAlleleA(genoData)
        alleleB <- getAlleleB(genoData)
        checkIdentical(snpAnnot$alleleA, alleleA)
        checkIdentical(snpAnnot$alleleB, alleleB)

        dat <- read.table(dosefile, as.is=TRUE, header=FALSE)
        #samples <- as.data.frame(matrix(unlist(strsplit(dat[,1], "->")), ncol=2, byrow=TRUE))
        checkIdentical(scanAnnot$sampleID, dat[,1])
        #checkIdentical(scanAnnot$ID_2, dat[,1])
        dose <-  t(as.matrix(dat[,3:ncol(dat)]))
        dimnames(dose) <- NULL
        checkEquals(dose, geno, tolerance=0.001)

        mark <- read.table(markfile, as.is=TRUE, header=TRUE)
        checkIdentical(mark[,1], snpAnnot$snp)
        checkIdentical(mark[,2], snpAnnot$alleleA)
        checkIdentical(mark[,3], snpAnnot$alleleB)

        pos <- read.table(posfile, as.is=TRUE, header=TRUE)
        checkIdentical(pos[,1], snpAnnot$snp)
        checkIdentical(pos[,2], snpAnnot$position)

        close(genoData)
      }
    }
  }
  unlink(c(gdsfile, snpfile, scanfile))
}


test_mach_genotype <- function() {
  probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
                          package="GWASdata")
  
  markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
                          package="GWASdata")
  posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
                         package="GWASdata")
  
  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()
  
  markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)
  
  
  blocks <- c(5000, 200, 499)
  for (genoDim in c("snp,scan", "scan,snp")) {
    for (b in blocks) {
        imputedDosageFile(input.files=c(probfile, markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
                          input.type="MaCH", input.dosage=FALSE, block.size=b,
                          snp.annot.filename=snpfile, scan.annot.filename=scanfile,
                          genotypeDim=genoDim, output.type="genotype")

        checkException(imputedDosageFile(input.files=c(dosfile, markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
                                                 input.type="MaCH", input.dosage=TRUE, block.size=b,
                                                 snp.annot.filename=snpfile, scan.annot.filename=scanfile,
                                                 genotypeDim=genoDim, output.type="genotype"))
        
        gds <- GdsGenotypeReader(gdsfile)
        scanAnnot <- getobj(scanfile)
        snpAnnot <- getobj(snpfile)
        genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
        geno <- getGenotype(genoData)
        alleleA <- getAlleleA(genoData)
        alleleB <- getAlleleB(genoData)
        checkIdentical(snpAnnot$alleleA, alleleA)
        checkIdentical(snpAnnot$alleleB, alleleB)
        
        dat <- read.table(probfile, as.is=TRUE, header=FALSE)
        
        
        #samples <- as.data.frame(matrix(unlist(strsplit(dat[,1], "->")), ncol=2, byrow=TRUE))
        checkIdentical(scanAnnot$sampleID, dat[,1])
        
        geno.exp <- t(GWASTools:::.probToGenotype(as.matrix(dat[, 3:ncol(dat)]), BB=FALSE))
        checkTrue(length(setdiff(geno.exp, c(0, 1, 2, NA))) == 0)
        checkEquals(geno.exp, geno)
        
        
        mark <- read.table(markfile, as.is=TRUE, header=TRUE)
        checkIdentical(mark[,1], snpAnnot$snp)
        checkIdentical(mark[,2], snpAnnot$alleleA)
        checkIdentical(mark[,3], snpAnnot$alleleB)
        
        pos <- read.table(posfile, as.is=TRUE, header=TRUE)
        checkIdentical(pos[,1], snpAnnot$snp)
        checkIdentical(pos[,2], snpAnnot$position)
        
        # check it
        checkImputedDosageFile(genoData, snpAnnot=snpAnnot, scanAnnot=scanAnnot,
                               input.files = c(probfile, markfile, posfile), input.dosage=FALSE,
                               input.type="MaCH",
                               output.type="genotype", block.size=b)
        
        close(genoData)
      }
  }
  unlink(c(gdsfile, snpfile, scanfile))
}



test_mach_missing <- function() {
  probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
                          package="GWASdata")
  markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
                          package="GWASdata")
  posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
                         package="GWASdata")

  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()
  newprobfile <- tempfile()
  newdosefile <- tempfile()

  dosages <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE)
  dosages[1, 3] <- -1
  write.table(dosages, file=newdosefile, row.names=FALSE, col.names=FALSE)
  x <- read.table(newdosefile, stringsAsFactors=FALSE, header=FALSE)
  checkEquals(x, dosages)

  # mach probs are AA, AB not AA, AB, BB
  prob <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
  prob[1, 3:4]  <- -1
  write.table(prob, file=newprobfile, row.names=FALSE, col.names=FALSE)
  x <- read.table(newprobfile, stringsAsFactors=FALSE, header=FALSE)
  checkEquals(x, prob)

  markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)


  files <- c(newprobfile, newdosefile)
  inputs <- c(FALSE, TRUE)
  # 500 lines in file
  blocks <- c(5000, 1)
  genoDim <- c("snp,scan")
  for (b in blocks) {
    for (i in 1:2) {
      imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=gdsfile, file.type="gds", chromosome=22,
                       input.type="MaCH", input.dosage=inputs[i], block.size=b,
                       snp.annot.filename=snpfile, scan.annot.filename=scanfile,
                       genotypeDim=genoDim)

      gds <- GdsGenotypeReader(gdsfile)
      scanAnnot <- getobj(scanfile)
      snpAnnot <- getobj(snpfile)
      genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
      geno <- getGenotype(genoData)

      dat <- read.table(newdosefile, as.is=TRUE, header=FALSE)
      dose <-  t(as.matrix(dat[,3:ncol(dat)]))
      dose[dose < 0 | dose > 2 | is.na(dose)] <- NA
      dimnames(dose) <- NULL
      checkEquals(dose, geno, tolerance=0.001)

      close(genoData)

    }
  }

  unlink(c(gdsfile, snpfile, scanfile, newprobfile, newdosefile))
}


# tests adding only a subset of samples/snps
test_mach_subset <- function() {
  probfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlprob",
                          package="GWASdata")
  dosefile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mldose",
                          package="GWASdata")
  markfile <- system.file("extdata", "imputation", "MaCH", "mach1.out.mlinfo",
                          package="GWASdata")
  posfile <- system.file("extdata", "imputation", "MaCH", "mach1.snp.position",
                         package="GWASdata")

  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  # set up scan.df for subsetting later.
  dosages <- read.table(dosefile, header=FALSE, stringsAsFactors=FALSE)
  # remove 5 random samples
  i_samp_rm <- sample(1:nrow(dosages), nrow(dosages)-5)
  scan.df <- data.frame(sampleID=c("MISSING->MISSING", dosages$V1[i_samp_rm]), stringsAsFactors=FALSE)
  scan.df$scanID <- 200:(200+nrow(scan.df)-1)

  markers <- read.table(markfile, header=TRUE, stringsAsFactors=FALSE)
  i_snp_rm <- sample(1:nrow(markers), 5)
  #message(paste(i_snp_rm, collapse=" "))
  #snp.names <- markers$SNP[i_snp_rm] # remove 5 random SNPs

  snp.id.start <- 100

  files <- c(probfile, dosefile)
  inputs <- c(FALSE, TRUE)
  # 500 lines in file
  blocks <- c(5000, 200, 499, 1)
  genoDim <- "snp,scan"
  for (b in blocks) {
    for (i in 1:2) {
      # test scan.df and snp.names
      imputedDosageFile(input.files=c(files[i], markfile, posfile), filename=gdsfile, file.type="gds",
                       chromosome=22,
                       input.type="MaCH", input.dosage=inputs[i], block.size=b,
                       snp.annot.filename=snpfile, scan.annot.filename=scanfile,
                       genotypeDim=genoDim,
                       scan.df=scan.df, snp.exclude=i_snp_rm,
                       snp.id.start=snp.id.start)

      #dose <- cbind(dose[i_snp_rm, 2], rep(NA, nrow(dose)-length(i_snp_rm)), dose[i_snp_rm, 1])

      gds <- GdsGenotypeReader(gdsfile)
      scanAnnot <- getobj(scanfile)
      snpAnnot <- getobj(snpfile)
      genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
      geno <- getGenotype(genoData)
      alleleA <- getAlleleA(genoData)
      alleleB <- getAlleleB(genoData)
      checkIdentical(snpAnnot$alleleA, alleleA)
      checkIdentical(snpAnnot$alleleB, alleleB)

      dat <- read.table(dosefile, as.is=TRUE, header=FALSE, stringsAsFactors=FALSE)
      checkIdentical(scanAnnot$sampleID, scan.df$sampleID)
      dose <-  t(as.matrix(dat[,3:ncol(dat)]))
      # 5 random samples were removed and a missing sample was inserted at the top
      # first SNP was not included
      dose <- dose[-i_snp_rm, i_samp_rm]
      dose <- cbind(rep(NA, nrow(dose)), dose)
      dimnames(dose) <- NULL
      checkEquals(dose, geno, tolerance=0.001)

      checkIdentical(scan.df$sampleID, scanAnnot$sampleID)
      checkEquals(scanAnnot$scanID, scan.df$scanID)

      mark <- read.table(markfile, as.is=TRUE, header=TRUE)
      checkIdentical(mark[-i_snp_rm,1], snpAnnot$snp)
      checkIdentical(mark[-i_snp_rm,2], snpAnnot$alleleA)
      checkIdentical(mark[-i_snp_rm,3], snpAnnot$alleleB)

      pos <- read.table(posfile, as.is=TRUE, header=TRUE)
      checkIdentical(pos[-i_snp_rm, 1], snpAnnot$snp)
      checkIdentical(pos[-i_snp_rm, 2], snpAnnot$position)

      checkIdentical(1:nsnp(genoData) + as.integer(snp.id.start-1), getSnpID(genoData))

      close(genoData)

    }
  }
  unlink(c(gdsfile, snpfile, scanfile))
}



test_impute2 <- function() {
  probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
                          package="GWASdata")
  sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
                          package="GWASdata")

  samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
  samp <- samp[-1, ]

  dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
  snps <- dos[, 2]


  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  # 33 lines in file
  blocks <- c(5000, 10, 32)
  for (genoDim in c("snp,scan", "scan,snp")) {
    for (b in blocks) {
      # make a normal one
      imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
                       input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
                       snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim)

      gds <- GdsGenotypeReader(gdsfile)
      scanAnnot <- getobj(scanfile)
      snpAnnot <- getobj(snpfile)
      genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
      geno <- getGenotype(genoData)
      alleleA <- getAlleleA(genoData)
      alleleB <- getAlleleB(genoData)
      checkIdentical(snpAnnot$alleleA, alleleA)
      checkIdentical(snpAnnot$alleleB, alleleB)

      dat <- read.table(probfile, as.is=TRUE, header=FALSE)
      dose <- GWASTools:::.probToDosage(as.matrix(dat[,6:ncol(dat)]))
      dimnames(dose) <- NULL
      checkEquals(dose, geno, tolerance=0.0001)
      checkIdentical(dat[,1], snpAnnot$snp)
      checkIdentical(dat[,2], snpAnnot$rsID)
      checkIdentical(dat[,3], snpAnnot$position)
      checkIdentical(dat[,4], snpAnnot$alleleA)
      checkIdentical(dat[,5], snpAnnot$alleleB)

      samp <- read.table(sampfile, as.is=TRUE, header=FALSE, skip=2)
      checkIdentical(paste(samp[,1], samp[,2]), scanAnnot$sampleID)

      close(genoData)

    }
  }
  unlink(c(gdsfile, snpfile, scanfile))
}



test_impute2_genotype <- function() {
  probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
                          package="GWASdata")
  sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
                          package="GWASdata")
  
  samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
  samp <- samp[-1, ]
  
  dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
  snps <- dos[, 2]
  
  
  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()
  
  # 33 lines in file
  blocks <- c(5000, 10, 32)
  for (genoDim in c("snp,scan", "scan,snp")) {
    for (b in blocks) {
      # make a normal one
      imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
                        input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
                        snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim,
                        output.type="genotype")
      
      gds <- GdsGenotypeReader(gdsfile)
      scanAnnot <- getobj(scanfile)
      snpAnnot <- getobj(snpfile)
      genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
      geno <- getGenotype(genoData)
      alleleA <- getAlleleA(genoData)
      alleleB <- getAlleleB(genoData)
      checkIdentical(snpAnnot$alleleA, alleleA)
      checkIdentical(snpAnnot$alleleB, alleleB)
      
      dat <- read.table(probfile, as.is=TRUE, header=FALSE)
      geno.exp <- GWASTools:::.probToGenotype(as.matrix(dat[,6:ncol(dat)]))
      checkEquals(geno, geno.exp)
      
      checkIdentical(dat[,1], snpAnnot$snp)
      checkIdentical(dat[,2], snpAnnot$rsID)
      checkIdentical(dat[,3], snpAnnot$position)
      checkIdentical(dat[,4], snpAnnot$alleleA)
      checkIdentical(dat[,5], snpAnnot$alleleB)
      
      samp <- read.table(sampfile, as.is=TRUE, header=FALSE, skip=2)
      checkIdentical(paste(samp[,1], samp[,2]), scanAnnot$sampleID)
      
      # check it
      checkImputedDosageFile(genoData, snpAnnot=snpAnnot, scanAnnot=scanAnnot,
                             input.files = c(probfile, sampfile), input.dosage=FALSE,
                             input.type="IMPUTE2",
                             output.type="genotype", block.size=b)
      
      close(genoData)
      
    }
  }
  unlink(c(gdsfile, snpfile, scanfile))
}

test_impute2_missing <- function() {
  probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
                          package="GWASdata")
  sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
                          package="GWASdata")

  samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
  samp <- samp[-1, ]

  dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
  snps <- dos[, 2]

  # make a dosage missing
  newprobfile <- tempfile()
  dos[1, 6:8] <- -1
  write.table(dos, file=newprobfile, row.names=FALSE, col.names=FALSE)
  x <- read.table(newprobfile, stringsAsFactors=FALSE, header=FALSE)
  checkEquals(x, dos)

  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  # 33 lines in file
  genoDim <- "snp,scan"
  blocks <- c(5000, 1)
  for (b in blocks) {
    # make a normal one
    imputedDosageFile(input.files=c(newprobfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
                     input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
                     snp.annot.filename=snpfile, scan.annot.filename=scanfile, genotypeDim=genoDim)

    gds <- GdsGenotypeReader(gdsfile)
    scanAnnot <- getobj(scanfile)
    snpAnnot <- getobj(snpfile)
    genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
    geno <- getGenotype(genoData)

    dat <- read.table(newprobfile, as.is=TRUE, header=FALSE)
    dose <- GWASTools:::.probToDosage(as.matrix(dat[,6:ncol(dat)]))
    dose[dose < 0 | dose > 2 | is.na(dose)] <- NA
    dimnames(dose) <- NULL
    checkEquals(dose, geno, tolerance=0.0001)

    close(genoData)

  }

  unlink(c(gdsfile, snpfile, scanfile, newprobfile))
}



# tests adding only a subset of samples/snps
test_impute2_subset <- function() {
  probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
                          package="GWASdata")
  sampfile <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
                          package="GWASdata")

  samp <- read.table(sampfile, stringsAsFactors=FALSE, header=TRUE)
  samp <- samp[-1, ]
  i_samp_rm <- sample(-1:-nrow(samp), 5)
  scan.df <- data.frame(sampleID=paste(samp$ID_1[i_samp_rm], samp$ID_2[i_samp_rm]), stringsAsFactors=FALSE)
  scan.df <- rbind(data.frame(sampleID="MISSING MISSING", stringsAsFactors=FALSE), scan.df)
  scan.df$scanID <- 200:(200+nrow(scan.df)-1)

  dos <- read.table(probfile, header=FALSE, stringsAsFactors=FALSE)
  snps <- dos[, 2]
  # remove 5 random snps
  i_snp_rm <- sample(1:length(snps), 5)

  snp.id.start <- 100

  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  # 33 lines in file
  blocks <- c(5000, 10, 32, 1)
  genoDim <- "snp,scan"
  for (b in blocks) {

    # now the subset of samples/snps
    imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
                     input.type="IMPUTE2", input.dosage=FALSE, block.size=b,
                     snp.annot.filename=snpfile, scan.annot.filename=scanfile,
                     scan.df=scan.df, snp.exclude=i_snp_rm, genotypeDim=genoDim,
                     snp.id.start=snp.id.start)

    gds <- GdsGenotypeReader(gdsfile)
    scanAnnot <- getobj(scanfile)
    snpAnnot <- getobj(snpfile)
    genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
    geno <- getGenotype(genoData)
    alleleA <- getAlleleA(genoData)
    alleleB <- getAlleleB(genoData)
    checkIdentical(snpAnnot$alleleA, alleleA)
    checkIdentical(snpAnnot$alleleB, alleleB)

    dat <- read.table(probfile, as.is=TRUE, header=FALSE)
    dose <- GWASTools:::.probToDosage(as.matrix(dat[,6:ncol(dat)]))
    dimnames(dose) <- NULL
    dose <- dose[-i_snp_rm, i_samp_rm]
    dose <- cbind(rep(NA, nrow(dose)), dose)
    checkEquals(dose, geno, tolerance=0.0001)
    checkIdentical(dat[-i_snp_rm, 1], snpAnnot$snp)
    checkIdentical(dat[-i_snp_rm, 2], snpAnnot$rsID)
    checkIdentical(dat[-i_snp_rm, 3], snpAnnot$position)
    checkIdentical(dat[-i_snp_rm, 4], snpAnnot$alleleA)
    checkIdentical(dat[-i_snp_rm, 5], snpAnnot$alleleB)

    samp <- read.table(sampfile, as.is=TRUE, header=FALSE, skip=2)
    #checkIdentical(paste(samp[, 1], samp[, 2]), scanAnnot$sampleID)
    checkIdentical(scan.df$sampleID, scanAnnot$sampleID)
    checkEquals(scan.df$scanID, scanAnnot$scanID)
    checkEquals(scan.df$scanID, getScanID(genoData))

    checkIdentical(1:nsnp(genoData) + as.integer(snp.id.start-1), getSnpID(genoData))

    close(genoData)
  }
  unlink(c(gdsfile, snpfile, scanfile))
}


## tests including phenotype columns in .samples file
test_impute2_phen <- function() {
  probfile <- system.file("extdata", "imputation", "IMPUTE2", "example.chr22.study.gens",
                          package="GWASdata")
  sampfile1 <- system.file("extdata", "imputation", "IMPUTE2", "example.study.samples",
                          package="GWASdata")

  samp <- read.table(sampfile1, stringsAsFactors=FALSE, header=TRUE)
  samp$phen <- c("B", sample(c(0,1), nrow(samp)-1, replace=TRUE))
  samp$sex <- c("D", sample(c(1,2), nrow(samp)-1, replace=TRUE))
  samp.hdr <- names(samp)
  sampfile <- tempfile()
  write.table(samp, file=sampfile, quote=FALSE, row.names=FALSE)

  gdsfile <- tempfile()
  snpfile <- tempfile()
  scanfile <- tempfile()

  imputedDosageFile(input.files=c(probfile, sampfile), filename=gdsfile, file.type="gds", chromosome=22,
                    input.type="IMPUTE2", input.dosage=FALSE,
                    snp.annot.filename=snpfile, scan.annot.filename=scanfile)

  gds <- GdsGenotypeReader(gdsfile)
  scanAnnot <- getobj(scanfile)
  snpAnnot <- getobj(snpfile)
  genoData <- GenotypeData(gds, scanAnnot=scanAnnot, snpAnnot=snpAnnot)
  geno <- getGenotype(genoData)
  alleleA <- getAlleleA(genoData)
  alleleB <- getAlleleB(genoData)
  checkIdentical(snpAnnot$alleleA, alleleA)
  checkIdentical(snpAnnot$alleleB, alleleB)

  dat <- read.table(probfile, as.is=TRUE, header=FALSE)
  dose <- GWASTools:::.probToDosage(as.matrix(dat[,6:ncol(dat)]))
  dimnames(dose) <- NULL
  checkEquals(dose, geno, tolerance=0.0001)
  checkIdentical(dat[,1], snpAnnot$snp)
  checkIdentical(dat[,2], snpAnnot$rsID)
  checkIdentical(dat[,3], snpAnnot$position)
  checkIdentical(dat[,4], snpAnnot$alleleA)
  checkIdentical(dat[,5], snpAnnot$alleleB)

  samp <- read.table(sampfile, as.is=TRUE, header=FALSE, skip=2)
  names(samp) <- samp.hdr
  checkIdentical(paste(samp[,1], samp[,2]), scanAnnot$sampleID)
  for (i in setdiff(names(samp), "sex")) checkIdentical(samp[[i]], scanAnnot[[i]])
  checkEquals(samp$sex == 1, scanAnnot$sex == "M")
  checkEquals(samp$sex == 2, scanAnnot$sex == "F")

  close(genoData)

  unlink(c(gdsfile, snpfile, scanfile, sampfile))
}
smgogarten/GWASTools documentation built on July 4, 2023, 2:32 a.m.