compcodeR: RNAseq data simulation, differential expression analysis and performance comparison of differential expression methods

test_that("generateSyntheticData fails with wrong inputs", {
  tdir <- tempdir()

  expect_error(generateSyntheticData(
    dataset = "B_625_625", n.vars = 500,
    samples.per.cond = 5, n.diffexp = 50,
    output.file = file.path(tdir, "tmp.txt")
  ), "output.file must be an .rds file.")

  expect_error(generateSyntheticData(
    dataset = "B_625_625", n.vars = 500,
    samples.per.cond = 5, n.diffexp = 50,
    effect.size = 1:3,
    output.file = file.path(tdir, "tmp.rds")
  ), "The length of the effect.size vector must be the same as the number of simulated genes.")

  expect_error(generateSyntheticData(
    dataset = "B_625_625", n.vars = 500,
    samples.per.cond = 5, n.diffexp = 50,
    relmeans = 1:3,
    output.file = file.path(tdir, "tmp.rds")
  ), "The length of the relmeans vector must be the same as the number of simulated genes.")

  expect_error(generateSyntheticData(
    dataset = "B_625_625", n.vars = 500,
    samples.per.cond = 5, n.diffexp = 50,
    dispersions = 1:3,
    output.file = file.path(tdir, "tmp.rds")
  ), "The number of provided dispersions must be the same as the number of simulated genes.")
})

test_that("compData object checks work", {
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 5, n.diffexp = 0,
    output.file = NULL
  )

  expect_equal(checkDataObject(tmp), "Data object looks ok.")

  l <- convertcompDataToList(tmp)
  expect_is(l, "list")
  cpd <- convertListTocompData(l)
  expect_is(cpd, "compData")
  expect_equal(checkDataObject(cpd), "Data object looks ok.")
  l$count.matrix <- NULL
  expect_message({cpd2 <- convertListTocompData(l)},
                 "Cannot convert list to compData object")
  expect_equal(cpd2, NULL)

  tmp2 <- tmp; expect_error({count.matrix(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({sample.annotations(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({filtering(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({analysis.date(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({package.version(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({info.parameters(tmp2) <- 1:3})

  expect_equal(check_compData(tmp), TRUE)
  expect_equal(check_compData(count.matrix(tmp)), "This is not an S4 object.")
  tmp2 <- tmp; count.matrix(tmp2) <- as.matrix(numeric(0)); expect_equal(check_compData(tmp2), "Object must contain a non-empty count matrix.")
  tmp2 <- tmp; sample.annotations(tmp2) <- as.data.frame(NULL); expect_equal(check_compData(tmp2), "Object must contain a non-empty sample annotation data frame.")
  tmp2 <- tmp; sample.annotations(tmp2) <- data.frame(condition = numeric(0)); expect_equal(check_compData(tmp2), "The sample.annotations must contain a column named condition.")
  tmp2 <- tmp; info.parameters(tmp2) <- list(); expect_equal(check_compData(tmp2), "Object must contain a non-empty list called info.parameters.")
  tmp2 <- tmp; info.parameters(tmp2) <- list(tmp = 1); expect_equal(check_compData(tmp2), "info.parameters list must contain an entry named 'dataset'.")
  tmp2 <- tmp; info.parameters(tmp2) <- list(dataset = ""); expect_equal(check_compData(tmp2), "info.parameters list must contain an entry named 'uID'.")
  tmp2 <- tmp; count.matrix(tmp2) <- count.matrix(tmp2)[1:10, ]; expect_equal(check_compData(tmp2), "count.matrix and variable.annotations do not contain the same number of rows.")
  tmp2 <- tmp; rownames(count.matrix(tmp2)) <- paste0("r", rownames(count.matrix(tmp2))); expect_equal(check_compData(tmp2), "The rownames of count.matrix and variable.annotations are not the same.")
  tmp2 <- tmp; count.matrix(tmp2) <- count.matrix(tmp2)[, 1:2]; expect_equal(check_compData(tmp2), "The number of columns of count.matrix is different from the number of rows of sample.annotations.")
  tmp2 <- tmp; colnames(count.matrix(tmp2)) <- paste0("r", colnames(count.matrix(tmp2))); expect_equal(check_compData(tmp2), "The colnames of count.matrix are different from the rownames of sample.annotations.")

  expect_equal(check_compData_results(tmp), "Object must contain a list named 'method.names' identifying the differential expression method used.")
})

test_that("phyloCompData object checks work", {
  tree <- ape::read.tree(text = "(((A1:0,A2:0,A3:0):1,B1:1):1,((C1:0,C2:0):1.5,(D1:0,D2:0):1.5):0.5);")
  idsp <- as.factor(c("A", "A", "A", "B", "C", "C", "D", "D"))
  names(idsp) <- tree$tip.label
  idcond <- c(1, 1, 1, 1, 2, 2, 2, 2)
  names(idcond) <- tree$tip.label

  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 4, n.diffexp = 10,
    tree = tree,
    id.species =  idsp,
    id.condition = idcond,
    lengths.relmeans = "auto",
    lengths.dispersions = "auto",
    output.file = NULL
  )

  expect_equal(checkDataObject(tmp), "Data object looks ok.")

  l <- convertphyloCompDataToList(tmp)
  expect_is(l, "list")

  cpd <- convertListTocompData(l)
  expect_is(cpd, "compData")
  expect_equal(checkDataObject(cpd), "Data object looks ok.")
  expect_null(length.matrix(cpd))
  expect_null(phylo.tree(cpd))
  expect_error(phylo.tree(cpd) <- l$tree, "There is no 'phylo.tree' slot in a 'compData' object. Please use a 'phyloCompData' object.")
  expect_error(length.matrix(cpd) <- l$length.matrix, "There is no 'lenght.matrix' slot in a 'compData' object. Please use a 'phyloCompData' object.")


  cpd <- convertListTophyloCompData(l)
  expect_is(cpd, "phyloCompData")
  expect_equal(checkDataObject(cpd), "Data object looks ok.")

  cpdbis <- phyloCompData(l$count.matrix, l$sample.annotations,
                          l$info.parameters, l$variable.annotations,
                          l$filtering, character(0),
                          l$package.version, l$method.names,
                          l$code, l$result.table,
                          l$tree,
                          l$length.matrix)
  expect_equal(cpd, cpdbis)

  l$count.matrix <- NULL
  expect_message({cpd2 <- convertListTocompData(l)},
                 "Cannot convert list to compData object")
  expect_message({cpd2 <- convertListTophyloCompData(l)},
                 "Cannot convert list to compData object")
  expect_equal(cpd2, NULL)

  tmp2 <- tmp; expect_error({count.matrix(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({sample.annotations(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({filtering(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({analysis.date(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({package.version(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({info.parameters(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({length.matrix(tmp2) <- 1:3})
  tmp2 <- tmp; expect_error({phylo.tree(tmp2) <- 1:3})

  expect_equal(check_compData(tmp), TRUE)
  expect_equal(check_compData(count.matrix(tmp)), "This is not an S4 object.")
  tmp2 <- tmp; count.matrix(tmp2) <- as.matrix(numeric(0)); expect_equal(check_phyloCompData(tmp2), "Object must contain a non-empty count matrix.")
  tmp2 <- tmp; sample.annotations(tmp2) <- as.data.frame(NULL); expect_equal(check_phyloCompData(tmp2), "Object must contain a non-empty sample annotation data frame.")
  tmp2 <- tmp; sample.annotations(tmp2) <- data.frame(condition = numeric(0)); expect_equal(check_phyloCompData(tmp2), "The sample.annotations must contain a column named condition.")
  tmp2 <- tmp; info.parameters(tmp2) <- list(); expect_equal(check_phyloCompData(tmp2), "Object must contain a non-empty list called info.parameters.")
  tmp2 <- tmp; info.parameters(tmp2) <- list(tmp = 1); expect_equal(check_phyloCompData(tmp2), "info.parameters list must contain an entry named 'dataset'.")
  tmp2 <- tmp; info.parameters(tmp2) <- list(dataset = ""); expect_equal(check_phyloCompData(tmp2), "info.parameters list must contain an entry named 'uID'.")
  tmp2 <- tmp; count.matrix(tmp2) <- count.matrix(tmp2)[1:10, ]; expect_equal(check_phyloCompData(tmp2), "count.matrix and variable.annotations do not contain the same number of rows.")
  tmp2 <- tmp; rownames(count.matrix(tmp2)) <- paste0("r", rownames(count.matrix(tmp2))); expect_equal(check_phyloCompData(tmp2), "The rownames of count.matrix and variable.annotations are not the same.")
  tmp2 <- tmp; count.matrix(tmp2) <- count.matrix(tmp2)[, 1:2]; expect_equal(check_phyloCompData(tmp2), "The number of columns of count.matrix is different from the number of rows of sample.annotations.")
  tmp2 <- tmp; colnames(count.matrix(tmp2)) <- paste0("r", colnames(count.matrix(tmp2))); expect_equal(check_phyloCompData(tmp2), "The colnames of count.matrix are different from the rownames of sample.annotations.")

  tmp2 <- tmp; length.matrix(tmp2) <- length.matrix(tmp2)[1:10, ]; expect_equal(check_phyloCompData(tmp2), "The dimension of count.matrix is different from the dimension of length.matrix.")
  tmp2 <- tmp; rownames(length.matrix(tmp2)) <- paste0("r", rownames(length.matrix(tmp2))); expect_equal(check_phyloCompData(tmp2), "The rownames of count.matrix are different from the rownames of length.matrix.")
  tmp2 <- tmp; length.matrix(tmp2) <- length.matrix(tmp2)[, 1:2]; expect_equal(check_phyloCompData(tmp2), "The dimension of count.matrix is different from the dimension of length.matrix.")
  tmp2 <- tmp; colnames(length.matrix(tmp2)) <- paste0("r", colnames(length.matrix(tmp2))); expect_equal(check_phyloCompData(tmp2), "The colnames of count.matrix are different from the colnames of length.matrix.")

  tmp2 <- tmp; phylo.tree(tmp2)$tip.label <- NULL; expect_equal(check_phyloCompData(tmp2), "The tips of the phylogeny are not named.")
  tmp2 <- tmp; phylo.tree(tmp2)$tip.label <- paste0("r", phylo.tree(tmp2)$tip.label); expect_equal(check_phyloCompData(tmp2), "Column names of count.matrix do not match the tip labels.")
  tmp2 <- tmp; phylo.tree(tmp2)$tip.label <- phylo.tree(tmp2)$tip.label[c(2, 1, 3:8)]; expect_equal(check_phyloCompData(tmp2), "Column names of count.matrix do not match the tip labels.")
  tmp2 <- tmp; colnames(count.matrix(tmp2)) <- colnames(count.matrix(tmp2))[c(2, 1, 3:8)]; expect_equal(check_phyloCompData(tmp2), "The colnames of count.matrix are different from the rownames of sample.annotations.")
  tmp2 <- tmp; colnames(length.matrix(tmp2)) <- colnames(length.matrix(tmp2))[c(2, 1, 3:8)]; expect_equal(check_phyloCompData(tmp2), "The colnames of count.matrix are different from the colnames of length.matrix.")
  tmp2 <- tmp; rownames(sample.annotations(tmp2)) <- rownames(sample.annotations(tmp2))[c(2, 1, 3:8)]; expect_equal(check_phyloCompData(tmp2), "The colnames of count.matrix are different from the rownames of sample.annotations.")
  tmp2 <- tmp; sample.annotations(tmp2)$id.species <- rep(1, 8); expect_equal(check_phyloCompData(tmp2), "Error in checkSpecies(ids, \"id.species\", phylo.tree(object), tol = 1e-10,  : \n  The provided species do not match with the tree branch lengths. Please check the 'id.species' vector.\n")
  tmp2 <- tmp; sample.annotations(tmp2)$id.species <- NULL; expect_equal(check_phyloCompData(tmp2), "The sample.annotations must contain a column named id.species.")

  expect_equal(check_compData_results(tmp), "Object must contain a list named 'method.names' identifying the differential expression method used.")
})

test_that("generateSyntheticData works", {
  tdir <- tempdir()

  ## No DEGs
  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 5, n.diffexp = 0,
    output.file = NULL
  )
  expect_is(tmp, "compData")
  expect_equal(variable.annotations(tmp)$truedispersions.S1,
               variable.annotations(tmp)$truedispersions.S2)
  expect_equal(variable.annotations(tmp)$truemeans.S1,
               variable.annotations(tmp)$truemeans.S2)
  expect_equal(sum(variable.annotations(tmp)$n.random.outliers.up.S1 +
                     variable.annotations(tmp)$n.random.outliers.up.S2 +
                     variable.annotations(tmp)$n.random.outliers.down.S1 +
                     variable.annotations(tmp)$n.random.outliers.down.S2 +
                     variable.annotations(tmp)$n.single.outliers.up.S1 +
                     variable.annotations(tmp)$n.single.outliers.up.S2 +
                     variable.annotations(tmp)$n.single.outliers.down.S1 +
                     variable.annotations(tmp)$n.single.outliers.down.S2), 0)
  expect_equal(sum(abs(variable.annotations(tmp)$truelog2foldchanges)), 0)
  expect_equal(sum(variable.annotations(tmp)$upregulation +
                     variable.annotations(tmp)$downregulation +
                     variable.annotations(tmp)$differential.expression), 0)

  ## Specify effect sizes individually
  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 5, n.diffexp = 10,
    effect.size = c(exp(abs(rnorm(5))), exp(-abs(rnorm(5))), rep(1, 40)),
    output.file = NULL
  )
  expect_is(tmp, "compData")
  expect_equal(variable.annotations(tmp)$truedispersions.S1,
               variable.annotations(tmp)$truedispersions.S2)
  expect_equal(variable.annotations(tmp)$truemeans.S1[-(1:10)],
               variable.annotations(tmp)$truemeans.S2[-(1:10)])
  expect_equal(sum(variable.annotations(tmp)$n.random.outliers.up.S1 +
                     variable.annotations(tmp)$n.random.outliers.up.S2 +
                     variable.annotations(tmp)$n.random.outliers.down.S1 +
                     variable.annotations(tmp)$n.random.outliers.down.S2 +
                     variable.annotations(tmp)$n.single.outliers.up.S1 +
                     variable.annotations(tmp)$n.single.outliers.up.S2 +
                     variable.annotations(tmp)$n.single.outliers.down.S1 +
                     variable.annotations(tmp)$n.single.outliers.down.S2), 0)
  expect_equal(sum(abs(variable.annotations(tmp)$truelog2foldchanges[-(1:10)])), 0)
  expect_equal(sign(variable.annotations(tmp)$truelog2foldchanges),
               c(rep(1, 5), rep(-1, 5), rep(0, 40)))
  expect_equal(variable.annotations(tmp)$upregulation,
               c(rep(1, 5), rep(0, 45)))
  expect_equal(variable.annotations(tmp)$downregulation,
               c(rep(0, 5), rep(1, 5), rep(0, 40)))
  expect_equal(sum(variable.annotations(tmp)$upregulation +
                     variable.annotations(tmp)$downregulation), 10)

  ## Different dispersions between groups
  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 5, n.diffexp = 10,
    between.group.diffdisp = TRUE,
    output.file = NULL
  )
  expect_is(tmp, "compData")
  expect_equal(any(variable.annotations(tmp)$truedispersions.S1 !=
                     variable.annotations(tmp)$truedispersions.S2),
               TRUE)
  expect_equal(variable.annotations(tmp)$upregulation,
               c(rep(1, 10), rep(0, 40)))
  expect_equal(variable.annotations(tmp)$downregulation,
               rep(0, 50))
  expect_equal(sum(variable.annotations(tmp)$upregulation +
                     variable.annotations(tmp)$downregulation), 10)

  ## Not overdispersed
  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 5, n.diffexp = 10,
    between.group.diffdisp = FALSE,
    fraction.non.overdispersed = 0.5,
    output.file = NULL
  )
  expect_is(tmp, "compData")
  expect_equal(variable.annotations(tmp)$truedispersions.S1,
               variable.annotations(tmp)$truedispersions.S2)
  expect_equal(any(variable.annotations(tmp)$truedispersions.S1 == 0), TRUE)
  expect_equal(info.parameters(tmp)$fraction.non.overdispersed, 0.5)

  ## Outliers
  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 5, n.diffexp = 10,
    random.outlier.high.prob = 0.1,
    random.outlier.low.prob = 0.1,
    single.outlier.high.prob = 0.1,
    single.outlier.low.prob = 0.1,
    output.file = NULL
  )
  expect_equal(any(variable.annotations(tmp)$n.random.outliers.up.S1 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.random.outliers.up.S2 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.random.outliers.down.S1 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.random.outliers.down.S2 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.single.outliers.up.S1 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.single.outliers.up.S2 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.single.outliers.down.S1 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.single.outliers.down.S2 > 0), TRUE)

  ## Summary report
  expect_error(summarizeSyntheticDataSet(tmp, file.path(tdir, "tmp.rds")),
               "output.file must be an .html file.")
  summarizeSyntheticDataSet(tmp, file.path(tdir, "tmp_summaryrep.html"))
  expect_equal(file.exists(normalizePath(file.path(tdir, "tmp_summaryrep.html"),
                                         winslash = "/")), TRUE)
})

test_that("generateSyntheticData works - with lengths and phylo", {
  tdir <- tempdir()

  ## Errors with lengths
  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 50,
      samples.per.cond = 4, n.diffexp = 5,
      repl.id = 1,
      lengths.relmeans = rpois(40, 1e4),
      lengths.dispersions = rgamma(50, 1, 1),
      output.file = NULL
    ),
    "The length of the 'lengths.relmeans' vector must be the same as the number of simulated genes.")
  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 50,
      samples.per.cond = 4, n.diffexp = 5,
      repl.id = 1,
      lengths.relmeans = rpois(50, 1e4),
      lengths.dispersions = rgamma(40, 1, 1),
      output.file = NULL
    ),
    "The length of the 'lengths.dispersions' vector must be the same as the number of simulated genes.")
  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 500,
      samples.per.cond = 4, n.diffexp = 50,
      repl.id = 1,
      lengths.relmeans = rpois(50, 1e4),
      output.file = NULL
    ),
    "For lengths to be used, both the 'lengths.relmeans' and 'lengths.dispersions' vectors must be provided.")

  ## Errors and warnings with tree
  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 500,
      samples.per.cond = 4, n.diffexp = 50,
      repl.id = 1,
      tree = "(((A1:0,A2:0,A3:0):1,B1:1):1,((C1:0,C2:0):1.5,(D1:0,D2:0):1.5):0.5);",
      output.file = NULL
    ),
    "The `tree` must be of class `phylo` from package `ape`.")

  tree <- ape::read.tree(text = "(((A1:0,A2:0,A3:0):0.5,B1:1):1,((C1:0,C2:0):1.5,(D1:0,D2:0):1.5):0.5);")

  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 500,
      samples.per.cond = 4, n.diffexp = 50,
      repl.id = 1,
      tree = tree,
      output.file = NULL
    ),
    "The tree should be ultrametric.")

  tree <- ape::read.tree(text = "(((A1:0,A2:0,A3:0):1,B1:1):1,((C1:0,C2:0):1.5,(D1:0,D2:0):1.5):0.5);")

  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 500,
      samples.per.cond = 5, n.diffexp = 50,
      repl.id = 1,
      tree = tree,
      output.file = NULL
    ),
    "The tree should have as many species as `samples.per.cond` times two")
  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 500,
      samples.per.cond = 4, n.diffexp = 50,
      repl.id = 1,
      tree = tree,
      id.species =  as.factor(c("A", "A", "A", "B", "C", "C", "D")),
      output.file = NULL
    ),
    "`id.species` should have the same length as the number of taxa in the tree.")
  expect_warning(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 500,
      samples.per.cond = 4, n.diffexp = 50,
      repl.id = 1,
      tree = tree,
      id.species =  c("A", "A", "A", "B", "C", "C", "D", "D"),
      output.file = NULL
    ),
    "Vector 'id.species' must be a factor.")
  expect_warning(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 500,
      samples.per.cond = 4, n.diffexp = 50,
      repl.id = 1,
      tree = tree,
      id.species =  as.factor(c("A", "A", "A", "B", "C", "C", "D", "D")),
      output.file = NULL
    ),
    "`id.species` is not named. I'm naming them, assuming they are in the same order as the tree.")

  idsp <- as.factor(c("A", "A", "A", "B", "C", "C", "D", "D"))
  names(idsp) <- c("F", tree$tip.label[-1])

  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 500,
      samples.per.cond = 4, n.diffexp = 50,
      repl.id = 1,
      tree = tree,
      id.species =  idsp,
      output.file = NULL
    ),
    "`id.species` names do not match the tip labels.")

  idsp <- as.factor(c("A", "A", "A", "B", "C", "C", "D", "D"))
  names(idsp) <- tree$tip.label

  expect_warning(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 500,
      samples.per.cond = 4, n.diffexp = 50,
      repl.id = 1,
      tree = tree,
      id.condition = c(1, 1, 1, 1, 2, 2, 2, 2),
      id.species =  idsp,
      output.file = NULL
    ),
    "`id.condition` is not named. I'm naming them, assuming they are in the same order as the tree.")

  idcond <- c(1, 1, 1, 1, 2, 2, 2, 2)
  names(idcond) <- tree$tip.label

  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 50,
      samples.per.cond = 4, n.diffexp = 0,
      tree = tree,
      id.species =  idsp,
      id.condition = idcond,
      output.file = NULL,
      prop.var.tree = c(0.1, 0.1)
    ),
    "should be a vector of length the number of genes"
  )

  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 50,
      samples.per.cond = 4, n.diffexp = 0,
      tree = tree,
      id.species =  idsp,
      id.condition = idcond,
      output.file = NULL,
      prop.var.tree = c(1.1, rep(0.1, 48), 2)
    ),
    "All entries of `prop.var.tree` should be between 0 and 1"
  )

  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 50,
      samples.per.cond = 4, n.diffexp = 0,
      tree = tree,
      id.species =  idsp,
      id.condition = idcond,
      output.file = NULL,
      prop.var.tree = 1.1
    ),
    "All entries of `prop.var.tree` should be between 0 and 1"
  )

  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 50,
      samples.per.cond = 4, n.diffexp = 0,
      tree = tree,
      id.species =  idsp,
      id.condition = idcond,
      output.file = NULL,
      prop.var.tree = matrix(0.1, 10, 5)
    ),
    "should be a vector"
  )

  ## No DEGs
  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 4, n.diffexp = 0,
    tree = tree,
    id.species =  idsp,
    id.condition = idcond,
    output.file = NULL
  )
  expect_is(tmp, "phyloCompData")
  expect_equal(variable.annotations(tmp)$truedispersions.S1,
               variable.annotations(tmp)$truedispersions.S2)
  expect_equal(variable.annotations(tmp)$truemeans.S1,
               variable.annotations(tmp)$truemeans.S2)
  expect_equal(sum(variable.annotations(tmp)$n.random.outliers.up.S1 +
                     variable.annotations(tmp)$n.random.outliers.up.S2 +
                     variable.annotations(tmp)$n.random.outliers.down.S1 +
                     variable.annotations(tmp)$n.random.outliers.down.S2 +
                     variable.annotations(tmp)$n.single.outliers.up.S1 +
                     variable.annotations(tmp)$n.single.outliers.up.S2 +
                     variable.annotations(tmp)$n.single.outliers.down.S1 +
                     variable.annotations(tmp)$n.single.outliers.down.S2), 0)
  expect_equal(sum(abs(variable.annotations(tmp)$truelog2foldchanges)), 0)
  expect_equal(sum(variable.annotations(tmp)$upregulation +
                     variable.annotations(tmp)$downregulation +
                     variable.annotations(tmp)$differential.expression), 0)

  ## Specify effect sizes individually
  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 4, n.diffexp = 10,
    tree = tree,
    id.species =  idsp,
    id.condition = idcond,
    effect.size = c(exp(abs(rnorm(5))), exp(-abs(rnorm(5))), rep(1, 40)),
    output.file = NULL
  )
  expect_is(tmp, "phyloCompData")
  expect_equal(variable.annotations(tmp)$truedispersions.S1,
               variable.annotations(tmp)$truedispersions.S2)
  expect_equal(variable.annotations(tmp)$truemeans.S1[-(1:10)],
               variable.annotations(tmp)$truemeans.S2[-(1:10)])
  expect_equal(sum(variable.annotations(tmp)$n.random.outliers.up.S1 +
                     variable.annotations(tmp)$n.random.outliers.up.S2 +
                     variable.annotations(tmp)$n.random.outliers.down.S1 +
                     variable.annotations(tmp)$n.random.outliers.down.S2 +
                     variable.annotations(tmp)$n.single.outliers.up.S1 +
                     variable.annotations(tmp)$n.single.outliers.up.S2 +
                     variable.annotations(tmp)$n.single.outliers.down.S1 +
                     variable.annotations(tmp)$n.single.outliers.down.S2), 0)
  expect_equal(sum(abs(variable.annotations(tmp)$truelog2foldchanges[-(1:10)])), 0)
  expect_equal(sign(variable.annotations(tmp)$truelog2foldchanges),
               c(rep(1, 5), rep(-1, 5), rep(0, 40)))
  expect_equal(variable.annotations(tmp)$upregulation,
               c(rep(1, 5), rep(0, 45)))
  expect_equal(variable.annotations(tmp)$downregulation,
               c(rep(0, 5), rep(1, 5), rep(0, 40)))
  expect_equal(sum(variable.annotations(tmp)$upregulation +
                     variable.annotations(tmp)$downregulation), 10)

  ## Different dispersions between groups
  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 4, n.diffexp = 10,
    between.group.diffdisp = TRUE,
    tree = tree,
    id.species =  idsp,
    id.condition = idcond,
    output.file = NULL
  )
  expect_is(tmp, "phyloCompData")
  expect_equal(any(variable.annotations(tmp)$truedispersions.S1 !=
                     variable.annotations(tmp)$truedispersions.S2),
               TRUE)
  expect_equal(variable.annotations(tmp)$upregulation,
               c(rep(1, 10), rep(0, 40)))
  expect_equal(variable.annotations(tmp)$downregulation,
               rep(0, 50))
  expect_equal(sum(variable.annotations(tmp)$upregulation +
                     variable.annotations(tmp)$downregulation), 10)

  ## Not overdispersed
  expect_error(
    generateSyntheticData(
      dataset = "B_625_625", n.vars = 50,
      samples.per.cond = 4, n.diffexp = 10,
      between.group.diffdisp = FALSE,
      fraction.non.overdispersed = 0.5,
      tree = tree,
      id.species =  idsp,
      id.condition = idcond,
      output.file = NULL
    ),
    "The Phylogenetic Poisson lognormal distribution is always over-dispersed.")

  ## Outliers
  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 4, n.diffexp = 10,
    tree = tree,
    id.species =  idsp,
    random.outlier.high.prob = 0.1,
    random.outlier.low.prob = 0.1,
    single.outlier.high.prob = 0.1,
    single.outlier.low.prob = 0.1,
    output.file = NULL
  )
  expect_equal(any(variable.annotations(tmp)$n.random.outliers.up.S1 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.random.outliers.up.S2 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.random.outliers.down.S1 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.random.outliers.down.S2 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.single.outliers.up.S1 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.single.outliers.up.S2 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.single.outliers.down.S1 > 0), TRUE)
  expect_equal(any(variable.annotations(tmp)$n.single.outliers.down.S2 > 0), TRUE)

  ## Summary report
  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 4, n.diffexp = 10,
    tree = tree,
    id.species =  idsp,
    id.condition = idcond,
    lengths.relmeans = "auto",
    lengths.dispersions = "auto",
    output.file = NULL
  )

  expect_error(summarizeSyntheticDataSet(tmp, file.path(tdir, "tmp.rds")),
               "output.file must be an .html file.")
  summarizeSyntheticDataSet(tmp, file.path(tdir, "tmp_summaryrep.html"))
  expect_equal(file.exists(normalizePath(file.path(tdir, "tmp_summaryrep.html"),
                                         winslash = "/")), TRUE)
})

test_that("help functions work", {
  listcreateRmd()

  expect_warning(expect_error(checkRange("hello", "name", 0, 1), "Illegal value"), "NAs introduced by coercion")
  expect_equal(checkRange(-1, "name", 0, 1), 0)
  expect_equal(checkRange(2, "name", 0, 1), 1)
  expect_equal(checkRange("-1", "name", 0, 1), 0)

  expect_equal(shorten.method.names(c("AUC", "ROC, all replicates")),
               c("auc", "rocall"))

  set.seed(1)
  tmp <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 50,
    samples.per.cond = 5, n.diffexp = 10,
    output.file = NULL
  )

  expect_error(
    computeMval(count.matrix(tmp), c(3, sample.annotations(tmp)$condition[-1])),
    "Must have exactly two groups to calculate M-value"
  )
  expect_error(
    computeAval(count.matrix(tmp), c(3, sample.annotations(tmp)$condition[-1])),
    "Must have exactly two groups to calculate A-value"
  )

  mval <- computeMval(count.matrix(tmp), sample.annotations(tmp)$condition)
  aval <- computeAval(count.matrix(tmp), sample.annotations(tmp)$condition)

  expect_is(mval, "numeric")
  expect_is(aval, "numeric")
  expect_equal(length(mval), nrow(count.matrix(tmp)))
  expect_equal(length(aval), nrow(count.matrix(tmp)))
})

test_that("runDiffExp works", {

  tdir <- tempdir()
  set.seed(1)  ## note that with other seeds, the number of genes
  ## passing the filtering threshold could be different

  testdat <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 500,
    samples.per.cond = 5, n.diffexp = 50,
    repl.id = 1, seqdepth = 1e5,
    fraction.upregulated = 0.5,
    between.group.diffdisp = FALSE,
    filter.threshold.total = 1,
    filter.threshold.mediancpm = 0,
    fraction.non.overdispersed = 0,
    output.file = file.path(tdir, "B_625_625_5spc_repl1.rds")
  )

  expect_equal(checkDataObject(testdat), "Data object looks ok.")

  tmp <- readRDS(normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"))
  expect_is(tmp, "compData")
  expect_is(count.matrix(tmp), "matrix")
  expect_equal(nrow(count.matrix(tmp)), 500)
  expect_equal(ncol(count.matrix(tmp)), 10)
  expect_is(sample.annotations(tmp), "data.frame")
  expect_equal(nrow(sample.annotations(tmp)), 10)
  expect_equal(ncol(sample.annotations(tmp)), 2)
  expect_is(variable.annotations(tmp), "data.frame")
  expect_equal(nrow(variable.annotations(tmp)), 500)
  expect_equal(ncol(variable.annotations(tmp)), 18)
  expect_is(info.parameters(tmp), "list")
  expect_equal(info.parameters(tmp)$n.diffexp, 50)
  expect_equal(info.parameters(tmp)$dataset, "B_625_625")
  expect_equal(info.parameters(tmp)$fraction.upregulated, 0.5)
  expect_equal(info.parameters(tmp)$between.group.diffdisp, FALSE)
  expect_equal(info.parameters(tmp)$filter.threshold.total, 1)
  expect_equal(info.parameters(tmp)$filter.threshold.mediancpm, 0)
  expect_equal(info.parameters(tmp)$fraction.non.overdispersed, 0)
  expect_equal(info.parameters(tmp)$random.outlier.high.prob, 0)
  expect_equal(info.parameters(tmp)$random.outlier.low.prob, 0)
  expect_equal(info.parameters(tmp)$single.outlier.high.prob, 0)
  expect_equal(info.parameters(tmp)$single.outlier.low.prob, 0)
  expect_equal(info.parameters(tmp)$effect.size, 1.5)
  expect_equal(info.parameters(tmp)$samples.per.cond, 5)
  expect_equal(info.parameters(tmp)$repl.id, 1)
  expect_equal(info.parameters(tmp)$seqdepth, 1e5)
  expect_equal(info.parameters(tmp)$minfact, 0.7)
  expect_equal(info.parameters(tmp)$maxfact, 1.4)
  expect_equal(filtering(tmp), "total count >= 1 ;  median cpm >= 0")
  expect_is(analysis.date(tmp), "character")
  expect_equal(analysis.date(tmp), "")
  expect_is(package.version(tmp), "character")
  expect_equal(package.version(tmp), "")
  expect_is(method.names(tmp), "list")
  expect_equal(method.names(tmp), list())

  # if (requireNamespace("baySeq", quietly = TRUE)) {
  #   runDiffExp(
  #     data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
  #     result.extent = "baySeq",
  #     Rmdfunction = "baySeq.createRmd",
  #     output.directory = tdir, norm.method = "edgeR",
  #     equaldisp = TRUE
  #   )
  # }
  if (requireNamespace("DESeq2", quietly = TRUE)) {
    expect_message(
      runDiffExp(
        data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
        result.extent = "DESeq2",
        Rmdfunction = "DESeq2.createRmd",
        output.directory = tdir, fit.type = "parametric",
        test = "Wald", beta.prior = TRUE,
        independent.filtering = TRUE, cooks.cutoff = TRUE,
        impute.outliers = TRUE,
        nas.as.ones = FALSE
      ),
      "there might be some NAs in the adjusted p values computed by DESeq2"
    )
    expect_message(
      runDiffExp(
        data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
        result.extent = "DESeq2_nona",
        Rmdfunction = "DESeq2.createRmd",
        output.directory = tdir, fit.type = "parametric",
        test = "Wald", beta.prior = TRUE,
        independent.filtering = TRUE, cooks.cutoff = TRUE,
        impute.outliers = TRUE,
        nas.as.ones = TRUE
      ),
      "all NAs in adjusted p values are replaced by 1 to allow for benchmarking with other methods"
    )
  }
  if (requireNamespace("DSS", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "DSS",
      Rmdfunction = "DSS.createRmd",
      output.directory = tdir, norm.method = "quantile",
      disp.trend = FALSE
    )
  }
  if (requireNamespace("EBSeq", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "EBSeq",
      Rmdfunction = "EBSeq.createRmd",
      output.directory = tdir, norm.method = "median"
    )
  }
  # edgeR is in Imports -> always installed
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "edgeR.exact",
    Rmdfunction = "edgeR.exact.createRmd",
    output.directory = tdir, norm.method = "TMM",
    trend.method = "movingave", disp.type = "tagwise"
  )
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "edgeR.GLM",
    Rmdfunction = "edgeR.GLM.createRmd",
    output.directory = tdir, norm.method = "TMM",
    disp.type = "tagwise", disp.method = "CoxReid",
    trended = TRUE
  )
  # limma is in Imports -> always installed
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "logcpm.limma",
    Rmdfunction = "logcpm.limma.createRmd",
    output.directory = tdir, norm.method = "TMM"
  )
  if (requireNamespace("NBPSeq", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "NBPSeq",
      Rmdfunction = "NBPSeq.createRmd",
      output.directory = tdir, norm.method = "TMM",
      disp.method = "NBP"
    )
  }
  if (requireNamespace("NOISeq", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "NOISeq",
      Rmdfunction = "NOISeq.prenorm.createRmd",
      output.directory = tdir, norm.method = "TMM"
    )
  }
  # limma is in Imports -> always installed
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "sqrtcpm.limma",
    Rmdfunction = "sqrtcpm.limma.createRmd",
    output.directory = tdir, norm.method = "TMM"
  )
  if (requireNamespace("TCC", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "TCC",
      Rmdfunction = "TCC.createRmd",
      output.directory = tdir, norm.method = "tmm",
      test.method = "edger", iteration = 3,
      normFDR = 0.1, floorPDEG = 0.05
    )
  }
  if (requireNamespace("genefilter", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "ttest",
      Rmdfunction = "ttest.createRmd",
      output.directory = tdir, norm.method = "TMM"
    )
  }
  # limma is in Imports -> always installed
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "voom.limma",
    Rmdfunction = "voom.limma.createRmd",
    output.directory = tdir, norm.method = "TMM"
  )
  if (requireNamespace("genefilter", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "voom.ttest",
      Rmdfunction = "voom.ttest.createRmd",
      output.directory = tdir, norm.method = "TMM"
    )
  }

  methods <- c("DESeq2", "DESeq2_nona", "DSS", "EBSeq", "edgeR.exact",
               "edgeR.GLM", "logcpm.limma", "NBPSeq", "NOISeq",
               "sqrtcpm.limma", "TCC", "ttest", "voom.limma",
               "voom.ttest")
  pkgs <- c("DESeq2", "DESeq2", "DSS", "EBSeq", "edgeR",
            "edgeR", "limma", "NBPSeq", "NOISeq",
            "limma", "TCC", "genefilter", "limma",
            "genefilter")

  ## Test show() method
  m <- "edgeR.exact" # edgeR always installed
  tmp <- readRDS(normalizePath(file.path(tdir, paste0("B_625_625_5spc_repl1_", m, ".rds")),
                               winslash = "/"))
  show(tmp)
  count.matrix(tmp) <- count.matrix(tmp)[, 1:4]
  show(tmp)

  for (i in seq_len(length(methods))) {
    m <- methods[i]
    pkg <- pkgs[i]
    if (requireNamespace(pkg, quietly = TRUE)) {
      tmp <- readRDS(normalizePath(file.path(tdir, paste0("B_625_625_5spc_repl1_", m, ".rds")), winslash = "/"))

      expect_is(tmp, "compData")
      expect_is(result.table(tmp), "data.frame")
      expect_equal(nrow(result.table(tmp)), 500)
      expect_is(code(tmp), "character")
      expect_is(analysis.date(tmp), "character")
      expect_is(compcodeR:::package.version(tmp), "character")
      expect_is(method.names(tmp), "list")
      expect_named(method.names(tmp), c("short.name", "full.name"))

      tmp2 <- tmp; result.table(tmp2) <- result.table(tmp2)[1:10, ]; expect_equal(check_compData_results(tmp2), "result.table must have the same number of rows as count.matrix.")
      tmp2 <- tmp; result.table(tmp2) <- data.frame(); expect_equal(check_compData_results(tmp2), "Object must contain a data frame named 'result.table'.")
      tmp2 <- tmp; result.table(tmp2)$score <- NULL; expect_equal(check_compData_results(tmp2), "result.table must contain a column named 'score'.")
    }
  }

  for (i in seq_len(length(methods))) {
    m <- methods[i]
    pkg <- pkgs[i]
    if (requireNamespace(pkg, quietly = TRUE)) {
      generateCodeHTMLs(
        normalizePath(file.path(tdir, paste0("B_625_625_5spc_repl1_", m, ".rds")),
                      winslash = "/"), normalizePath(tdir)
      )
      expect_true(file.exists(normalizePath(file.path(
        tdir, paste0("B_625_625_5spc_repl1_",
                     m, "_code.html")), winslash = "/")))
    }
  }

  ## Comparison report
  file.table <- data.frame(input.files = normalizePath(file.path(
    tdir, paste0("B_625_625_5spc_repl1_",
                 c("voom.limma", "sqrtcpm.limma", "edgeR.exact", "edgeR.GLM"), ## Only packages in import
                 ".rds")), winslash = "/"))
  parameters <- NULL
  comp <- runComparison(file.table = file.table, output.directory = tdir,
                        parameters = parameters)

  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = parameters, save.result.table = FALSE, knit.results = FALSE),
               "At least on of 'save.result.table' or 'knit.results' must be set to TRUE, otherwise the function does not produce anything.")
  comp2 <- runComparison(file.table = file.table, output.directory = tdir,
                         parameters = parameters, save.result.table = TRUE, knit.results = FALSE)
  ff <- list.files(path = tdir, pattern = "compcodeR_result_table_.*.rds", full.names = TRUE)
  resTable <- readRDS(ff[length(ff)])
  expect_equal(resTable$fp + resTable$tp + resTable$fn + resTable$tn, rep(500, nrow(resTable)))
  expect_equal(ncol(resTable), 14)
  expect_equal(nrow(resTable), 4)

  parameters <- list()
  par2 <- parameters; par2$incl.dataset <- "missing"
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with datasets")
  par2 <- parameters; par2$incl.nbr.samples <- 10
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with nbr.samples")
  par2 <- parameters; par2$incl.replicates <- 10
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with replicates")
  par2 <- parameters; par2$incl.de.methods <- "missing"
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with DE methods")

})

test_that("runDiffExp works - with lengths", {

  tdir <- tempdir()
  set.seed(1)  ## note that with other seeds, the number of genes
  ## passing the filtering threshold could be different

  testdat <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 500,
    samples.per.cond = 5, n.diffexp = 50,
    repl.id = 1, seqdepth = 1e5,
    fraction.upregulated = 0.5,
    between.group.diffdisp = FALSE,
    filter.threshold.total = 1,
    filter.threshold.mediancpm = 0,
    fraction.non.overdispersed = 0,
    id.species = factor(1:10),
    lengths.relmeans = rpois(500, 1e4),
    lengths.dispersions = rgamma(500, 1, 1),
    output.file = file.path(tdir, "B_625_625_5spc_repl1.rds")
  )

  expect_equal(checkDataObject(testdat), "Data object looks ok.")

  tmp <- readRDS(normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"))
  expect_is(tmp, "compData")
  expect_is(count.matrix(tmp), "matrix")
  expect_equal(nrow(count.matrix(tmp)), 498) # some are filtered
  expect_equal(ncol(count.matrix(tmp)), 10)
  expect_is(sample.annotations(tmp), "data.frame")
  expect_equal(nrow(sample.annotations(tmp)), 10)
  expect_equal(ncol(sample.annotations(tmp)), 2)
  expect_is(variable.annotations(tmp), "data.frame")
  expect_equal(nrow(variable.annotations(tmp)), 498)
  expect_equal(ncol(variable.annotations(tmp)), 22)
  expect_is(info.parameters(tmp), "list")
  expect_equal(info.parameters(tmp)$n.diffexp, 50)
  expect_equal(info.parameters(tmp)$dataset, "B_625_625")
  expect_equal(info.parameters(tmp)$fraction.upregulated, 0.5)
  expect_equal(info.parameters(tmp)$between.group.diffdisp, FALSE)
  expect_equal(info.parameters(tmp)$filter.threshold.total, 1)
  expect_equal(info.parameters(tmp)$filter.threshold.mediancpm, 0)
  expect_equal(info.parameters(tmp)$fraction.non.overdispersed, 0)
  expect_equal(info.parameters(tmp)$random.outlier.high.prob, 0)
  expect_equal(info.parameters(tmp)$random.outlier.low.prob, 0)
  expect_equal(info.parameters(tmp)$single.outlier.high.prob, 0)
  expect_equal(info.parameters(tmp)$single.outlier.low.prob, 0)
  expect_equal(info.parameters(tmp)$effect.size, 1.5)
  expect_equal(info.parameters(tmp)$samples.per.cond, 5)
  expect_equal(info.parameters(tmp)$repl.id, 1)
  expect_equal(info.parameters(tmp)$seqdepth, 1e5)
  expect_equal(info.parameters(tmp)$minfact, 0.7)
  expect_equal(info.parameters(tmp)$maxfact, 1.4)
  expect_equal(info.parameters(tmp)$nEff, 2.5)
  expect_equal(info.parameters(tmp)$nEffRatio, 1.0)
  expect_equal(filtering(tmp), "total count >= 1 ;  median cpm >= 0")
  expect_is(analysis.date(tmp), "character")
  expect_equal(analysis.date(tmp), "")
  expect_is(package.version(tmp), "character")
  expect_equal(package.version(tmp), "")
  expect_is(method.names(tmp), "list")
  expect_equal(method.names(tmp), list())

  if (requireNamespace("DESeq2", quietly = TRUE)) {
    expect_message(
      runDiffExp(
        data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
        result.extent = "DESeq2.length",
        Rmdfunction = "DESeq2.length.createRmd",
        output.directory = tdir, fit.type = "parametric",
        test = "Wald", beta.prior = TRUE,
        independent.filtering = TRUE, cooks.cutoff = TRUE,
        impute.outliers = TRUE,
        extra.design.covariates = NULL,
        nas.as.ones = FALSE
      ),
      "there might be some NAs in the adjusted p values computed by DESeq2"
    )
  }
  # limma is in Imports
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "lengthNorm.limma",
    Rmdfunction = "lengthNorm.limma.createRmd",
    output.directory = tdir, norm.method = "TMM",
    extra.design.covariates = NULL,
    length.normalization = "RPKM",
    data.transformation = "log2",
    block.factor = NULL
  )
  # phylolm is in Imports
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "phylolm",
    Rmdfunction = "phylolm.createRmd",
    output.directory = tdir, norm.method = "TMM",
    model = "BM", measurement_error = TRUE,
    extra.design.covariates = NULL,
    length.normalization = "RPKM",
    data.transformation = "log2"
  )

  methods <- c("DESeq2.length", "lengthNorm.limma", "phylolm")

  ## Test show() method
  m <- "lengthNorm.limma"
  tmp <- readRDS(normalizePath(file.path(tdir, paste0("B_625_625_5spc_repl1_", m, ".rds")),
                               winslash = "/"))
  show(tmp)
  count.matrix(tmp) <- count.matrix(tmp)[, 1:4]
  show(tmp)

  for (m in methods) {
    if (m != "DESeq2.length" || requireNamespace("DESeq2", quietly = TRUE)) {
      tmp <- readRDS(normalizePath(file.path(tdir, paste0("B_625_625_5spc_repl1_", m, ".rds")), winslash = "/"))

      expect_is(tmp, "phyloCompData")
      expect_is(result.table(tmp), "data.frame")
      expect_equal(nrow(result.table(tmp)), 498)
      expect_is(code(tmp), "character")
      expect_is(analysis.date(tmp), "character")
      expect_is(compcodeR:::package.version(tmp), "character")
      expect_is(method.names(tmp), "list")
      expect_named(method.names(tmp), c("short.name", "full.name"))

      tmp2 <- tmp; result.table(tmp2) <- result.table(tmp2)[1:10, ]; expect_equal(check_compData_results(tmp2), "result.table must have the same number of rows as count.matrix.")
      tmp2 <- tmp; result.table(tmp2) <- data.frame(); expect_equal(check_compData_results(tmp2), "Object must contain a data frame named 'result.table'.")
      tmp2 <- tmp; result.table(tmp2)$score <- NULL; expect_equal(check_compData_results(tmp2), "result.table must contain a column named 'score'.")
    }
  }

  for (m in methods) {
    if (m != "DESeq2.length" || requireNamespace("DESeq2", quietly = TRUE)) {
      generateCodeHTMLs(
        normalizePath(file.path(tdir, paste0("B_625_625_5spc_repl1_", m, ".rds")),
                      winslash = "/"), normalizePath(tdir)
      )
      expect_true(file.exists(normalizePath(file.path(
        tdir, paste0("B_625_625_5spc_repl1_",
                     m, "_code.html")), winslash = "/")))
    }
  }

  ## Comparison report
  file.table <- data.frame(input.files = normalizePath(file.path(
    tdir, paste0("B_625_625_5spc_repl1_",
                 methods[-1],
                 ".rds")), winslash = "/"))
  parameters <- NULL
  comp <- runComparison(file.table = file.table, output.directory = tdir,
                        parameters = parameters)

  comp2 <- runComparison(file.table = file.table, output.directory = tdir,
                         parameters = parameters, save.result.table = TRUE, knit.results = FALSE)
  ff <- list.files(path = tdir, pattern = "compcodeR_result_table_.*.rds", full.names = TRUE)
  resTable <- readRDS(ff[length(ff)])
  expect_equal(resTable$fp + resTable$tp + resTable$fn + resTable$tn, rep(498, nrow(resTable)))
  expect_equal(ncol(resTable), 14)
  expect_equal(nrow(resTable), length(methods) - 1)

  parameters <- list()
  par2 <- parameters; par2$incl.dataset <- "missing"
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with datasets")
  par2 <- parameters; par2$incl.nbr.samples <- 10
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with nbr.samples")
  par2 <- parameters; par2$incl.replicates <- 10
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with replicates")
  par2 <- parameters; par2$incl.de.methods <- "missing"
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with DE methods")
})

test_that("runDiffExp works - phylo", {

  tdir <- tempdir()

  tree <- ape::read.tree(text = "(((A1:0,A2:0,A3:0):1,B1:1):1,((C1:0,C2:0):1.5,(D1:0,D2:0):1.5):0.5);")

  idsp <- as.factor(c("A", "A", "A", "B", "C", "C", "D", "D"))
  names(idsp) <- tree$tip.label

  idcond <- c(1, 1, 1, 1, 2, 2, 2, 2)
  names(idcond) <- tree$tip.label

  set.seed(1)  ## note that with other seeds, the number of genes
  ## passing the filtering threshold could be different

  testdat <- generateSyntheticData(
    dataset = "B_625_625", n.vars = 500,
    samples.per.cond = 4, n.diffexp = 50,
    repl.id = 1, seqdepth = 1e5,
    fraction.upregulated = 0.5,
    between.group.diffdisp = FALSE,
    filter.threshold.total = 1,
    filter.threshold.mediancpm = 0,
    fraction.non.overdispersed = 0,
    tree = tree,
    id.condition = idcond,
    id.species =  idsp,
    lengths.relmeans = "auto",
    lengths.dispersions = "auto",
    output.file = file.path(tdir, "B_625_625_5spc_repl1.rds")
  )

  expect_equal(checkDataObject(testdat), "Data object looks ok.")

  tmp <- readRDS(normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"))
  expect_is(tmp, "phyloCompData")
  expect_is(count.matrix(tmp), "matrix")
  expect_equal(nrow(count.matrix(tmp)), 489) # some are filtered
  expect_equal(ncol(count.matrix(tmp)), 8)
  expect_equal(nrow(length.matrix(tmp)), 489) # some are filtered
  expect_equal(ncol(length.matrix(tmp)), 8)
  expect_is(sample.annotations(tmp), "data.frame")
  expect_equal(nrow(sample.annotations(tmp)), 8)
  expect_equal(ncol(sample.annotations(tmp)), 4)
  expect_is(variable.annotations(tmp), "data.frame")
  expect_equal(nrow(variable.annotations(tmp)), 489)
  expect_equal(ncol(variable.annotations(tmp)), 23)
  expect_is(info.parameters(tmp), "list")
  expect_equal(info.parameters(tmp)$n.diffexp, 50)
  expect_equal(info.parameters(tmp)$dataset, "B_625_625")
  expect_equal(info.parameters(tmp)$fraction.upregulated, 0.5)
  expect_equal(info.parameters(tmp)$between.group.diffdisp, FALSE)
  expect_equal(info.parameters(tmp)$filter.threshold.total, 1)
  expect_equal(info.parameters(tmp)$filter.threshold.mediancpm, 0)
  expect_equal(info.parameters(tmp)$fraction.non.overdispersed, 0)
  expect_equal(info.parameters(tmp)$random.outlier.high.prob, 0)
  expect_equal(info.parameters(tmp)$random.outlier.low.prob, 0)
  expect_equal(info.parameters(tmp)$single.outlier.high.prob, 0)
  expect_equal(info.parameters(tmp)$single.outlier.low.prob, 0)
  expect_equal(info.parameters(tmp)$effect.size, 1.5)
  expect_equal(info.parameters(tmp)$samples.per.cond, 4)
  expect_equal(info.parameters(tmp)$repl.id, 1)
  expect_equal(info.parameters(tmp)$seqdepth, 1e5)
  expect_equal(info.parameters(tmp)$minfact, 0.7)
  expect_equal(info.parameters(tmp)$maxfact, 1.4)
  expect_equal(info.parameters(tmp)$nEff, 0.3636364, tolerance = 1e-7)
  expect_equal(info.parameters(tmp)$nEffRatio, info.parameters(tmp)$nEff) # n/4 = 1
  expect_equal(unique(variable.annotations(tmp)$prop.var.tree), 1.0)
  expect_equal(filtering(tmp), "total count >= 1 ;  median cpm >= 0")
  expect_is(analysis.date(tmp), "character")
  expect_equal(analysis.date(tmp), "")
  expect_is(package.version(tmp), "character")
  expect_equal(package.version(tmp), "")
  expect_is(method.names(tmp), "list")
  expect_equal(method.names(tmp), list())

  if (requireNamespace("DESeq2", quietly = TRUE)) {
    expect_message(
      runDiffExp(
        data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
        result.extent = "DESeq2.length",
        Rmdfunction = "DESeq2.length.createRmd",
        output.directory = tdir, fit.type = "parametric",
        test = "Wald", beta.prior = TRUE,
        independent.filtering = TRUE, cooks.cutoff = TRUE,
        impute.outliers = TRUE,
        extra.design.covariates = NULL,
        nas.as.ones = TRUE
      ),
      "all NAs in adjusted p values are replaced by 1"
    )
  }
  # limma is in Imports
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "lengthNorm.limma",
    Rmdfunction = "lengthNorm.limma.createRmd",
    output.directory = tdir, norm.method = "TMM",
    extra.design.covariates = NULL,
    length.normalization = "TPM",
    data.transformation = "log2",
    block.factor = NULL
  )
  if (requireNamespace("statmod", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "lengthNorm.limma.cor",
      Rmdfunction = "lengthNorm.limma.createRmd",
      output.directory = tdir, norm.method = "TMM",
      extra.design.covariates = NULL,
      length.normalization = "TPM",
      data.transformation = "log2",
      block.factor = "id.species"
    )
  }
  if (requireNamespace("sva", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "lengthNorm.sva.limma",
      Rmdfunction = "lengthNorm.sva.limma.createRmd",
      output.directory = tdir, norm.method = "TMM",
      extra.design.covariates = NULL,
      length.normalization = "TPM",
      data.transformation = "log2"
    )
  }
  # phylolm is in Imports
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "phylolm_cpm",
    Rmdfunction = "phylolm.createRmd",
    output.directory = tdir, norm.method = "TMM",
    model = "OUfixedRoot", measurement_error = TRUE,
    extra.design.covariates = NULL,
    length.normalization = "none",
    data.transformation = "sqrt"
  )
  # phylolm is in Imports
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "phylolm",
    Rmdfunction = "phylolm.createRmd",
    output.directory = tdir, norm.method = "TMM",
    model = "BM", measurement_error = TRUE,
    extra.design.covariates = NULL,
    length.normalization = "TPM",
    data.transformation = "log2"
  )

  # with extra factor
  tmp <- readRDS(normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"))
  sample.annotations(tmp)$test_reg <- rnorm(nrow(sample.annotations(tmp)))
  sample.annotations(tmp)$test_fac <- factor(sample(c(0, 1)))
  saveRDS(tmp, normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"))

  if (requireNamespace("DESeq2", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "DESeq2.length.factor",
      Rmdfunction = "DESeq2.length.createRmd",
      output.directory = tdir, fit.type = "parametric",
      test = "Wald", beta.prior = TRUE,
      independent.filtering = TRUE, cooks.cutoff = TRUE,
      impute.outliers = FALSE,
      extra.design.covariates = c("test_reg", "test_fac")
    )
  }
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "lengthNorm.limma.factor",
    Rmdfunction = "lengthNorm.limma.createRmd",
    output.directory = tdir, norm.method = "TMM",
    extra.design.covariates = c("test_reg", "test_fac"),
    trend = TRUE,
    length.normalization = "TPM",
    data.transformation = "log2",
    block.factor = NULL
  )
  if (requireNamespace("sva", quietly = TRUE)) {
    runDiffExp(
      data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
      result.extent = "lengthNorm.sva.limma.factor",
      Rmdfunction = "lengthNorm.sva.limma.createRmd",
      output.directory = tdir, norm.method = "TMM",
      length.normalization = "TPM",
      data.transformation = "log2",
      extra.design.covariates = c("test_reg", "test_fac"),
      n.sv = 1
    )
  }
  runDiffExp(
    data.file = normalizePath(file.path(tdir, "B_625_625_5spc_repl1.rds"), winslash = "/"),
    result.extent = "phylolm.factor",
    Rmdfunction = "phylolm.createRmd",
    output.directory = tdir, norm.method = "TMM",
    model = "BM", measurement_error = TRUE,
    extra.design.covariates = c("test_reg", "test_fac"),
    length.normalization = "TPM",
    data.transformation = "log2"
  )

  methods <- c("DESeq2.length", "DESeq2.length.factor", "lengthNorm.limma", "lengthNorm.limma.cor", "lengthNorm.limma.factor", "lengthNorm.sva.limma", "lengthNorm.sva.limma.factor", "phylolm.factor", "phylolm_cpm", "phylolm")

  ## Test show() method
  m <- "lengthNorm.limma"
  tmp <- readRDS(normalizePath(file.path(tdir, paste0("B_625_625_5spc_repl1_", m, ".rds")),
                               winslash = "/"))
  show(tmp)
  count.matrix(tmp) <- count.matrix(tmp)[, 1:4]
  show(tmp)

  for (m in methods) {
    if (!(m %in% c("DESeq2.length", "DESeq2.length.factor")) || requireNamespace("DESeq2", quietly = TRUE)) {
      if (!(m %in% c("lengthNorm.limma.cor")) || requireNamespace("statmod", quietly = TRUE)) {
        if (!(m %in% c("lengthNorm.sva.limma", "lengthNorm.sva.limma.factor")) || requireNamespace("sva", quietly = TRUE)) {
          tmp <- readRDS(normalizePath(file.path(tdir, paste0("B_625_625_5spc_repl1_", m, ".rds")), winslash = "/"))

          expect_is(tmp, "phyloCompData")
          expect_is(result.table(tmp), "data.frame")
          expect_equal(nrow(result.table(tmp)), 489)
          expect_is(code(tmp), "character")
          expect_is(analysis.date(tmp), "character")
          expect_is(compcodeR:::package.version(tmp), "character")
          expect_is(method.names(tmp), "list")
          expect_named(method.names(tmp), c("short.name", "full.name"))

          tmp2 <- tmp; result.table(tmp2) <- result.table(tmp2)[1:10, ]; expect_equal(check_compData_results(tmp2), "result.table must have the same number of rows as count.matrix.")
          tmp2 <- tmp; result.table(tmp2) <- data.frame(); expect_equal(check_compData_results(tmp2), "Object must contain a data frame named 'result.table'.")
          tmp2 <- tmp; result.table(tmp2)$score <- NULL; expect_equal(check_compData_results(tmp2), "result.table must contain a column named 'score'.")
        }
      }
    }
  }

  for (m in methods) {
    if (!(m %in% c("DESeq2.length", "DESeq2.length.factor")) || requireNamespace("DESeq2", quietly = TRUE)) {
      if (!(m %in% c("lengthNorm.limma.cor")) || requireNamespace("statmod", quietly = TRUE)) {
        if (!(m %in% c("lengthNorm.sva.limma", "lengthNorm.sva.limma.factor")) || requireNamespace("sva", quietly = TRUE)) {
          generateCodeHTMLs(
            normalizePath(file.path(tdir, paste0("B_625_625_5spc_repl1_", m, ".rds")),
                          winslash = "/"), normalizePath(tdir)
          )
          expect_true(file.exists(normalizePath(file.path(
            tdir, paste0("B_625_625_5spc_repl1_",
                         m, "_code.html")), winslash = "/")))
        }
      }
    }
  }

  ## Comparison report
  file.table <- data.frame(input.files = normalizePath(file.path(
    tdir, paste0("B_625_625_5spc_repl1_",
                 methods[!(methods %in% c("DESeq2.length", "DESeq2.length.factor", "lengthNorm.limma.cor", "lengthNorm.sva.limma", "lengthNorm.sva.limma.factor"))],
                 ".rds")), winslash = "/"))
  parameters <- NULL

  comp <- runComparison(file.table = file.table, output.directory = tdir,
                        parameters = parameters, save.result.table = TRUE, knit.results = FALSE)
  ff <- list.files(path = tdir, pattern = "compcodeR_result_table_.*.rds", full.names = TRUE)
  resTable <- readRDS(ff[length(ff)])
  expect_equal(resTable$fp + resTable$tp + resTable$fn + resTable$tn, rep(489, nrow(resTable)))
  expect_equal(ncol(resTable), 14)
  expect_equal(nrow(resTable), length(methods) - 5)

  parameters <- list()
  par2 <- parameters; par2$incl.dataset <- "missing"
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with datasets")
  par2 <- parameters; par2$incl.nbr.samples <- 10
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with nbr.samples")
  par2 <- parameters; par2$incl.replicates <- 10
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with replicates")
  par2 <- parameters; par2$incl.de.methods <- "missing"
  expect_error(runComparison(file.table = file.table, output.directory = tdir,
                             parameters = par2),
               "No methods left to compare after matching with DE methods")
})
csoneson/compcodeR documentation built on Dec. 23, 2024, 10:42 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
csoneson/compcodeR
RNAseq data simulation, differential expression analysis and performance comparison of differential expression methods

tests/testthat/test-runComparison.R
In csoneson/compcodeR: RNAseq data simulation, differential expression analysis and performance comparison of differential expression methods

R Package Documentation

Browse R Packages

We want your feedback!

csoneson/compcodeR RNAseq data simulation, differential expression analysis and performance comparison of differential expression methods

tests/testthat/test-runComparison.R In csoneson/compcodeR: RNAseq data simulation, differential expression analysis and performance comparison of differential expression methods

R Package Documentation

Browse R Packages

We want your feedback!

csoneson/compcodeR
RNAseq data simulation, differential expression analysis and performance comparison of differential expression methods

tests/testthat/test-runComparison.R
In csoneson/compcodeR: RNAseq data simulation, differential expression analysis and performance comparison of differential expression methods