tests/testthat/test-import.R

context("Data import tests")

data(monpop, package = "poppr")
data(Pinf, package = "poppr")
data(H3N2, package = "adegenet")
pr <- recode_polyploids(Pinf, newploidy = TRUE)
y <- "13	6	1	6											
			7_09_BB											
Ind	Pop	CHMFc4	CHMFc5	CHMFc12	SEA	SED	SEE	SEG	SEI	SEL	SEN	SEP	SEQ	SER
A004	7_09_BB	224	85	163	132	133	156	144	116	143	227	257	142	145
A002	7_09_BB	224	97	159	156	129	156	144	113	143	231	261	136	153
A011	7_09_BB	224	97	159	160	133	156	126	119	147	227	257	134	149
A009	7_09_BB	224	97	159	160	133	156	126	119	147	227	261	134	149
A006	7_09_BB	224	97	159	160	133	156	126	119	147	235	261	134	149
A013	7_09_BB	224	97	163	160	133	156	126	119	147	235	257	134	149"

yd <- "13	6	1	6											
			7_09_BB											
Ind	Pop	CHMFc4	CHMFc5	CHMFc12	SEA	SED	SEE	SEG	SEI	SEL	SEN	SEP	SEQ	SER
4	7_09_BB	224	85	163	132	133	156	144	116	143	227	257	142	145
2	7_09_BB	224	97	159	156	129	156	144	113	143	231	261	136	153
2	7_09_BB	224	97	159	160	133	156	126	119	147	227	257	134	149
9	7_09_BB	224	97	159	160	133	156	126	119	147	227	261	134	149
6	7_09_BB	224	97	159	160	133	156	126	119	147	235	261	134	149
3	7_09_BB	224	97	163	160	133	156	126	119	147	235	257	134	149"

zz <- "1	6	1	6
7_09_BB			
Ind	Pop	CHMFc4	CHMFc5
A004	7_09_BB	224	85
A002	7_09_BB	224	97
A011	7_09_BB	224	97
A009	7_09_BB	224	97
A006	7_09_BB	224	97
A013	7_09_BB	224	97"


zzna <- "13	6	1	6											
			7_09_BB											
Ind	Pop	CHMFc4	CHMFc5	CHMFc12	SEA	SED	SEE	SEG	SEI	SEL	SEN	SEP	SEQ	SER
A004	7_09_BB	224	85	163	132	133	156	 	116	143	227	257	142	145
A002	7_09_BB	224	97	159	156	129	156	144	113	143	231	261	136	153
A011	7_09_BB	224	97	159	160	133	156	126	119	147	227	257	134	149
A009	7_09_BB	224	97	159	160	133	156	126	119	147	227	261	134	149
A006	7_09_BB	224	97	159	160	133	156	126	119	147	235	261	134	149
A013	7_09_BB	224	97	163	160	133	156	126	119	147	235	257	134	149"

z <- "1	6	1	6
7_09_BB			
Ind	Pop	CHMFc4	
A004	7_09_BB	224	 
A002	7_09_BB	224	 
A011	7_09_BB	224	 
A009	7_09_BB	224	 
A006	7_09_BB	224	 
A013	7_09_BB	224	 "

zna <- "3	6	1	6											
			7_09_BB											
Ind	Pop	CHMFc4	CHMFc5	CHMFc12	SEA	SED	SEE	SEG	SEI	SEL	SEN	SEP	SEQ
A002	7_09_BB	224	97	159	156	129	156	144	113	143	231	261	136
A004	7_09_BB	224	85	163	0	133	156	144		143	227	257	0
A011	7_09_BB	224	97	159	160	133	156	126	119	147	227	257	134
A009	7_09_BB	224	97	159	160	133	156	126	119	147	227	261	134
A006	7_09_BB	224	97	159	160	133	156	126	119	147	235	261	134
A013	7_09_BB	224	97	163	160	133	156	126	119	147	235	257	134"

hapdip <- "6	4	1	4											
			7_09_BB											
Ind	Pop	CHMFc4	CHMFc5	CHMFc12	SEA	SED	SEE	SEG	SEI	SEL	SEN	SEP	SEQ
A011	7_09_BB	224	0	159	0	133	0	126	0	147		257	0
A009	7_09_BB	224	97	159	160	133	156	126	119	147	227	261	134
A006	7_09_BB	224	97	159	160	133	156	126	119	147	235	261	134
A013	7_09_BB	224	97	163	160	133	156	126	119	147	235	257	134"

bad_genalex <- "1	6	1	6
Ind	Pop	CHMFc4	
A004	7_09_BB	224	 
A002	7_09_BB	224	 
A011	7_09_BB	224	 
A009	7_09_BB	224	 
A006	7_09_BB	224	 
A013	7_09_BB	224	 "

missing_single <-  "13	6	1	6											
			7_09_BB											
Ind	Pop	CHMFc4	CHMFc5	CHMFc12	SEA	SED	SEE	SEG	SEI	SEL	SEN	SEP	SEQ	SER
A004	7_09_BB_A004	224	85	163	132	133	156	144	116	143	227	257	142	145
A002	7_09_BB_A002	0	0	0	0	0	0	0	0	0	0	0	0	0
A011	7_09_BB_A011	224	97	159	160	133	156	126	119	147	227	257	134	149
A009	7_09_BB_A009	224	97	159	160	133	156	126	119	147	227	261	134	149
A006	7_09_BB_A006	224	97	159	160	133	156	126	119	147	235	261	134	149
A013	7_09_BB_A013	224	97	163	160	133	156	126	119	147	235	257	134	149"

test_that("basic text connections work", {
	gen <- read.genalex(textConnection(y), sep = "\t")
	expect_equivalent(tab(gen), tab(monpop[1:6, drop = TRUE]))
})

test_that("names are corrected properly", {
	expect_warning(gen <- read.genalex(textConnection(yd), sep = "\t"),
                 "duplicate labels detected")
  expect_false(anyNA(strata(gen)))
  expect_named(other(gen), "original_names")
  expect_identical(indNames(gen), as.character(1:6))
  indNames(gen) <- sprintf("A%03d", c(4, 2, 11, 9, 6, 13))
	expect_equivalent(tab(gen), tab(monpop[1:6, drop = TRUE]))
})

test_that("missing rows and columns are eliminated", {
  gen <- read.genalex(textConnection(zzna), sep = "\t")
  expect_true(any(is.na(tab(gen))))
  expect_equal(nInd(gen), 6L)
  expect_equal(nLoc(gen), 13L)
})

test_that("single locus diploids can be imported", {
  gen <- read.genalex(textConnection(zz), sep = "\t")
  expect_equivalent(nLoc(gen), 1L)
  expect_output(show(gen), "diploid")
})

test_that("single locus haploids can be imported", {
  gen <- read.genalex(textConnection(z), sep = "\t")
  expect_equivalent(nLoc(gen), 1L)
  expect_output(show(gen), "haploid")
})

test_that("missing cells are converted to zeroes for polyploids", {
  skip_on_cran()
  gen <- read.genalex(textConnection(zna), sep = "\t", ploidy = 4L)
  expect_equivalent(nLoc(gen), 3L)
  expect_output(show(gen), "tetraploid")
  expect_output(show(recode_polyploids(gen, newploidy = TRUE)), "triploid \\(1\\) and tetraploid \\(5\\)")
})

test_that("haplodiploids can be imported correctly", {
  skip_on_cran()
  gen <- read.genalex(textConnection(hapdip), sep = "\t")
  expect_equivalent(nLoc(gen), 6L)
  expect_output(show(gen), "diploid")
  expect_output(show(recode_polyploids(gen, newploidy = TRUE)), "haploid \\(1\\) and diploid \\(3\\)")
})

test_that("duplicate columns are flagged and fixed", {
  skip_on_cran()
  f <- "4,5,1,5,,,,,,
,,,Admix,,,,,,
Ind,Pop,RM127, ,RM22, ,RM22, ,RM127, 
1,Admix,210,210,200,200,195,195,130,110
2,Admix,230,230,185,185,200,200,110,120
3,Admix,210,210,200,200,195,195,130,130
4,Admix,230,230,200,200,195,195,130,130
5,Admix,210,230,200,200,200,200,120,120"
  expect_warning(read.genalex(textConnection(f)), "col 7: RM22 -> RM22_1")
})

test_that("missing samples do not shift strata", {
  skip_on_cran()
  expect_warning(ms <- read.genalex(textConnection(missing_single), sep = "\t"), 
    "[Ii]ndividual[s(][ s][)]?(deleted|with no scored loci have been removed)")
  expect_equal(as.character(strata(ms)$Pop), as.character(pop(ms)))
  expect_equal(rownames(strata(ms)), indNames(ms))
})

test_that("missing samples do not shift strata, even with duplicated names", {
  skip_on_cran()
  missing_single2 <- gsub("A004\t", "A011\t", missing_single)
  expect_warning(ms <- read.genalex(textConnection(missing_single2), sep = "\t"), "duplicate labels detected")
  expect_equal(as.character(strata(ms)$Pop), as.character(pop(ms)))
  expect_equal(rownames(strata(ms)), indNames(ms))
})

test_that("improperly-formatted data causes an error", {
  skip_on_cran()
  msg <- "^.+?6 individuals.+?5 rows.+?Please inspect "
  tcmsg  <- paste0(msg, "textConnection\\(bad_genalex\\).+?$")
  expect_error(read.genalex(textConnection(bad_genalex), sep = "\t"), tcmsg)
  skip_on_os("windows")
  f <- tempfile()
  writeLines(bad_genalex, f)
  fmsg <- paste0(msg, f, ".+?$")
  expect_error(read.genalex(f, sep = "\t"), fmsg)
})

test_that("sample names with apostrophes can be imported", {
  skip_on_cran()
  better_than_yar <- "1,5,1,5
,,,7_09_BB,
Ind,Pop,CHMFc4,CHMFc5
phaser,7_09_BB,224,85
rock,7_09_BB,224,97
bat'leth,7_09_BB,224,97
paper,7_09_BB,224,97
scissors,7_09_BB,224,97"
  
  res <- poppr::read.genalex(textConnection(better_than_yar))
  expect_is(res, "genclone")
  expect_equal(nInd(res), 5L)
  expect_equal(nLoc(res), 1L)
  expect_equal(indNames(res), c("phaser", "rock", "bat'leth", "paper", "scissors"))
})

test_that("loci with entirely T loci are not converted to TRUE", {
  # https://github.com/grunwaldlab/poppr/issues/214
  tea <- read.genalex(test_path("genalex", "test.txt"))
  expected <- list(
    `605-4471` = c("T", "C"), 
    `681-4471` = c("G", "T"), 
    `682-4471` = c("G", "T")
  )
  expect_equal(alleles(tea), expected)

})

context("Data export tests")

test_that("not specifying a file for genind2genalex will generate a tempfile", {
  skip_on_cran()
  expect_warning(f <- genind2genalex(monpop, quiet = TRUE), "temporary file")
  expect_match(f, "^.+?file.+\\.csv$")
  expect_is(read.genalex(f), "genclone")
})

test_that("genind2genalex() handles snp data appropriately", {
  # context: https://github.com/grunwaldlab/poppr/issues/231
  tmp <- tempfile(fileext = ".csv")
  on.exit(unlink(tmp), add = TRUE)
  x <- new("genind", tab = structure(c(NA, 2L, 2L, 2L, 2L, NA, 0L, 0L,
0L, 0L, NA, 2L, 2L, 2L, 2L, NA, 0L, 0L, 0L, 0L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 0L, 0L, 1L), .Dim = 5:6, .Dimnames = list(c("TT056001.trim",
"TT060001.trim", "TT062001.trim", "TT063001.trim", "TT064001.trim"
), c("loc87_pos30.A", "loc87_pos30.G", "loc106_pos31.G", "loc106_pos31.T",
"loc345_pos27.G", "loc345_pos27.T"))), loc.fac = structure(c(1L,
1L, 2L, 2L, 3L, 3L), .Label = c("loc87_pos30", "loc106_pos31",
"loc345_pos27"), class = "factor"), loc.n.all = c(loc87_pos30 = 2L,
loc106_pos31 = 2L, loc345_pos27 = 2L), all.names = list(loc87_pos30 = c("A",
"G"), loc106_pos31 = c("G", "T"), loc345_pos27 = c("G", "T")),
    ploidy = c(2L, 2L, 2L, 2L, 2L), type = "codom", other = list(),
    call = .local(x = x, i = i, j = j, loc = ..1, drop = drop),
    pop = NULL, strata = NULL, hierarchy = NULL)
  expect_output(genind2genalex(x, tmp), "Extracting the table ...")
  y <- read.genalex(tmp)
  expect_equal(genind2df(x, pop = FALSE), genind2df(y, pop = FALSE))
})

test_that("fill_zero() works with character and numeric data", {
  char <- "A"
  num  <- "13"

  # Default
  expect_equal(fill_zero(char, 2), "0/A")
  expect_equal(fill_zero(num, 2), "0/13")
  expect_equal(fill_zero(char, 3, character(0)), "0/0/A")
  expect_equal(fill_zero(num, 3, character(0)), "0/0/13")

  # As character vector
  expect_equal(fill_zero(char, 3, "character"), c("0", "0", "A"))
  expect_equal(fill_zero(num, 3, "character"), c("0", "0", "13"))

  # As numeric vector
  expect_equal(expect_warning(fill_zero(char, 3, "numeric")), c(0, 0, NA_real_))
  expect_equal(fill_zero(num, 3, "numeric"), c(0.0, 0.0, 13.0))
})

test_that("genind2genalex will prevent a file from being overwritten", {
  skip_on_cran()
  f <- tempfile()
  writeLines("hey!\n", f)
  expect_error(genind2genalex(monpop, filename = f, quiet = TRUE), "exists and will not be overwritten")
  expect_match(readLines(f)[1], "hey")
})

test_that("genclone objects can be saved and restored", {
	mp <- file()
	genind2genalex(monpop, filename = mp, quiet = TRUE)
	gen <- read.genalex(mp)
	close(mp)
	
	expect_equal(gen@tab, monpop@tab)
})

test_that("genalex will give a warning if user asks for geo data when there is none", {
  skip_on_cran()
  mp <- file()
  expect_warning(genind2genalex(monpop, filename = mp, quiet = TRUE, geo = TRUE), 
                 "monpop@other")
  close(mp)
})

test_that("polyploids can be saved", {
	skip_on_cran()
	file1 <- tempfile()
	file2 <- tempfile()
	genind2genalex(Pinf, filename = file1, quiet = TRUE)
	genind2genalex(pr, filename = file2, quiet = TRUE)
	Pinf2 <- read.genalex(file1, ploidy = 4)
	pr2   <- read.genalex(file2, ploidy = 3)
	expect_equal(summary(Pinf, verbose = FALSE)$He, summary(Pinf2, verbose = FALSE)$He)
	expect_equal(summary(pr2, verbose = FALSE)$NA.perc, summary(Pinf, verbose = FALSE)$NA.perc)
	expect_true(all(ploidy(pr2) == 3))
	expect_true(all(ploidy(Pinf2) == 4))
})

test_that("diploid missing data is handled correctly", {
  skip_on_cran()
  data("nancycats", package = "adegenet")
  file1 <- tempfile()
  genind2genalex(nancycats, file1, quiet = TRUE)
  nan <- read.genalex(file1, genclone = FALSE)
  expect_identical(summary(nancycats, verbose = FALSE), summary(nan, verbose = FALSE))
})

test_that("sequence data is handled correctly", {
  skip_on_cran()
  tmp <- tempfile()
  htab <- tab(H3N2[1:10, loc = 1:10, drop = TRUE])
  genind2genalex(H3N2[1:10, loc = 1:10], filename = tmp, quiet = TRUE, overwrite = TRUE)
  h3n2 <- read.genalex(tmp)
  # The alleles are imported in a different order, so I have to resort with the
  # column names.
  expect_equivalent(htab, tab(h3n2)[, colnames(htab)])
  genind2genalex(H3N2[1:10, loc = 1:10], filename = tmp, sequence = TRUE, quiet = TRUE, overwrite = TRUE)
  h3n2numbers <- read.genalex(tmp)
  # The sequence option converts letters to numbers. If the last test worked,
  # then this test should work, too.
  expect_equivalent(tab(h3n2), tab(h3n2numbers))
})

test_that("errors are reported", {
	skip_on_cran()
	file1 <- tempfile()
	file2 <- tempfile()
	genind2genalex(Pinf, filename = file1, quiet = TRUE)
	genind2genalex(pr, filename = file2, quiet = TRUE)
	expect_error(Pinf2 <- read.genalex(file1, ploidy = 4), NA)

	expect_error(Pinf2 <- read.genalex(file1), "set the flag?")
	expect_error(Pinf2 <- read.genalex(file1, geo = TRUE), "geo = TRUE")
	expect_error(Pinf2 <- read.genalex(file1, region = TRUE), "region = TRUE")
})

context("Extra info data import tests")

test_that("genalex data can be imported with region data to genind and genclone", {
	skip_on_cran()
	rr1 <- system.file("files/rootrot.csv", package = "poppr")
	rr2 <- system.file("files/rootrot2.csv", package = "poppr")
	Xcoord <- rnorm(187)
	Ycoord <- rnorm(187)
	rrg <- read.csv(rr2, header = FALSE) 
	blank <- rep("", nrow(rrg))
	rrg <- cbind(rrg, blank, data.frame(X = c(NA, NA, "x", Xcoord), 
	                                    Y = c(NA, NA, "y", Ycoord)))
	rrfile <- tempfile()
	write.table(rrg, file = rrfile, quote = FALSE, sep = ",", row.names = FALSE,
	            col.names = FALSE, na = "")
	root1gc <- read.genalex(rr1)
	root1gd <- read.genalex(rr1, genclone = FALSE)
	
	root2gc <- read.genalex(rr2)
	root2gd <- read.genalex(rr2, genclone = FALSE)
	
	root2re <- read.genalex(rr2, region = TRUE)

	root2reg <- read.genalex(rrfile, region = TRUE, geo = TRUE)
	
	expect_is(root1gc, "genclone")
	expect_is(root1gd, "genind")

	expect_is(root2gc, "genclone")
	expect_is(root2gd, "genind")

	expect_is(root2reg, "genclone")
	
	expect_equal(length(nameStrata(root2gc)), 1L)
	expect_equal(length(nameStrata(root2re)), 2L)
	expect_identical(nameStrata(root2re), c("Pop", "Region"))
	expect_identical(nameStrata(root2reg), c("Pop", "Region"))
	expect_equivalent(other(root2reg)$xy, 
	                  data.frame(x = Xcoord, y = Ycoord))
})

test_that("genalex data can be imported with a region column", {
  skip_on_cran()
  yr <- read.table(textConnection(y), sep = "\t", header = FALSE, row.names = NULL)
  yr <- as.matrix(yr)
  yrnums <- yr[1, 1:4]
  region <- c("", "", "Region", rep(c("one", "two"), 3))
  blank <- rep("", 9)
  Xcoords <- rnorm(6)
  X       <- c("", "", "x", Xcoords)
  Ycoords <- rnorm(6)
  Y       <- c("", "", "y", Ycoords)
  yrg         <- yr
  yr          <- cbind(yr[, 2], region, yr[, -c(1:2)], blank, yr[, 1])
  yr[1, ]     <- c(yrnums, rep("", ncol(yr) - 4))
  yr[1, 5:7]  <- c("2", "3", "3")
  yr[2, 6:7]  <- c("one", "two")
  yrg         <- cbind(yr, X, Y)
  yrfile  <- tempfile()
  yrgfile <- tempfile()
  write.table(yr, file = yrfile, quote = FALSE, sep = ",", row.names = FALSE,
              col.names = FALSE)
  write.table(yrg, file = yrgfile, quote = FALSE, sep = ",", row.names = FALSE,
              col.names = FALSE)
  
  genind_region     <- read.genalex(yrfile, region = TRUE)
  genind_region_geo <- read.genalex(yrgfile, region = TRUE, geo = TRUE)
  
  expect_is(genind_region, "genclone")
  expect_is(genind_region_geo, "genclone")
  
  expect_equal(nameStrata(genind_region), c("Pop", "Region"))
  expect_equal(nameStrata(genind_region_geo), c("Pop", "Region"))
  
  setPop(genind_region) <- ~Region
  setPop(genind_region_geo) <- ~Region
  
  expect_equal(popNames(genind_region), c("one", "two"))
  expect_equal(popNames(genind_region_geo), c("one", "two"))
  
  expect_equivalent(other(genind_region_geo)$xy, 
                    data.frame(x = Xcoords, y = Ycoords))
})


test_that("genalex can import geographic information", {
	skip_on_cran()
  skip_on_os("windows")
	data("Pram", package = "poppr")
	filepram <- tempfile()
	sourpram <- tempfile()
	staypram <- tempfile()
	custpram <- tempfile()
	custpop  <- sample(.genlab("p", 10), nInd(Pram), replace = TRUE)

	expect_output(show(genind2genalex(Pram, filename = filepram, geo = TRUE)), filepram)
	expect_output(show(genind2genalex(Pram, pop = ~SOURCE, allstrata = FALSE, filename = sourpram, geo = TRUE)), sourpram)
	expect_output(show(genind2genalex(Pram, pop = ~STATE/YEAR, allstrata = FALSE, filename = staypram, geo = TRUE)), staypram)
	expect_output(show(genind2genalex(Pram, pop = custpop, allstrata = FALSE, filename = custpram, geo = TRUE)), custpram)

	expect_error(read.genalex(filepram))
	pall <- read.genalex(filepram, geo = TRUE)
	sour <- read.genalex(sourpram, geo = TRUE)
	stay <- read.genalex(staypram, geo = TRUE)
	cust <- read.genalex(custpram, geo = TRUE)

	splitStrata(pall) <- ~SOURCE/YEAR/STATE
	nameStrata(sour)  <- ~SOURCE
	splitStrata(stay) <- ~STATE/YEAR

	expect_equal(nInd(pall), nInd(Pram))
	expect_equal(nInd(sour), nInd(Pram))
	expect_equal(nInd(stay), nInd(Pram))
	expect_equal(nInd(cust), nInd(Pram))

	expect_equivalent(strata(pall), strata(Pram))
	expect_equivalent(strata(sour), strata(Pram, ~SOURCE, combine = FALSE))
	expect_equivalent(strata(stay), strata(Pram, ~STATE/YEAR, combine = FALSE))

	expect_equivalent(other(pall)$xy[1:513, ], other(Pram)$xy)
	expect_equivalent(other(sour)$xy[1:513, ], other(Pram)$xy)
	expect_equivalent(other(stay)$xy[1:513, ], other(Pram)$xy)
	expect_equivalent(other(cust)$xy[1:513, ], other(Pram)$xy)

})
grunwaldlab/poppr documentation built on March 18, 2024, 11:24 p.m.