PeakSegJoint: Joint Peak Detection in Several ChIP-Seq Samples

library(testthat)
library(PeakSegJoint)
context("Faster")

data(overflow.list, envir=environment())
if(FALSE){#for testing.
  bad.df <- overflow.list[[2]]
}
flat.loss <- sapply(overflow.list, function(bad.df){
  w <- with(bad.df, chromEnd-chromStart)
  x <- as.numeric(bad.df$count)
  cusum <- sum(x*w)
  bases <- sum(w)
  m <- cusum/bases
  PoissonLoss(x, m, w)
})
fit <- PeakSegJointFasterOne(overflow.list, 2)
test_that("flat_loss_vec is finite", {
  expect_equal(as.numeric(fit$flat_loss_vec), as.numeric(flat.loss))
})

data(H3K36me3.TDH.other.chunk1, envir=environment())
nogroup <- subset(
  H3K36me3.TDH.other.chunk1$counts,
  43000000 < chromEnd &
  chromStart < 43200000)
test_that("Faster needs group", {
  expect_error({
    fit <- PeakSegJointFaster(nogroup, 2:7)
  }, "no sample.group")
})

some.counts <- data.frame(
  nogroup,
  sample.group=nogroup$cell.type)
plist <- ProfileList(some.counts)
fit <- PeakSegJointFaster(plist, 2:7)
max.samples <- fit$sample.modelSelection$complexity[1]
sample.id.vec <- names(fit$sample.loss.diff.vec[1:max.samples])
mean.mat <- fit$mean_mat[sample.id.vec,]
is.feasible <- mean.mat[,1] < mean.mat[,2] & mean.mat[,2] > mean.mat[,3]
test_that("all selectable samples are feasible", {
  expect_true(all(is.feasible))
})
test_that("sample loss diff is sorted", {
  expect_identical(sort(fit$sample.loss.diff.vec), fit$sample.loss.diff.vec)
})
test_that("biggest selectable model has min.loss", {
  expect_equal(fit$sample.modelSelection$loss[1], fit$min.loss)
})

fit <- PeakSegJointFaster(some.counts, 2:7)
max.groups <- fit$group.modelSelection$complexity[1]
group.id.vec <- names(fit$group.loss.diff.vec[1:max.groups])
sample.id.vec <- unlist(fit$group.list[group.id.vec])
mean.mat <- fit$mean_mat[sample.id.vec,]
is.feasible <- mean.mat[,1] < mean.mat[,2] & mean.mat[,2] > mean.mat[,3]
test_that("all selectable samples are feasible for max groups", {
  expect_true(all(is.feasible))
})

## what if there are no samples up?
pathological <- data.frame(
  sample.id="foo",
  sample.group="bar",
  chromStart=0:9,
  chromEnd=1:10,
  count=1:10)
fit <- PeakSegJointFaster(pathological, 2L)
test_that("no feasible samples is fine", {
  expect_identical(fit$sample.modelSelection$complexity, 0L)
  expect_identical(fit$group.modelSelection$complexity, 0L)
})

test_that("informative error", {
  expect_error({
    fit <- PeakSegJointFaster(pathological, 3L)
  }, "bin factor too large")
})

data(demo.profiles)

test_that("mean always positive, no memory issues", {
  fit <- PeakSegJointFaster(demo.profiles)
  print(fit$peak_start_end)
  expect_true(all(0 <= fit$mean_mat))
  if(FALSE){##more
    for(i in 1:1000){
      fit <- PeakSegJointFasterOne(demo.profiles, 2L)
      print(fit$peak_start_end)
      expect_true(all(0 <= fit$mean_mat))
    }
    peak.df <- data.frame(
      peakStart=fit$peak_start_end[1],
      peakEnd=fit$peak_start_end[2])
    ggplot()+
      geom_rect(aes(
        xmin=chromStart/1e3, xmax=chromEnd/1e3,
        ymin=0, ymax=count),
        color="grey",
        data=demo.profiles)+
      facet_grid(sample.id + sample.group ~ .)+
      geom_segment(aes(
        peakStart/1e3, 0,
        xend=peakEnd/1e3, yend=0),
        data=peak.df)+
      geom_point(aes(
        peakStart/1e3, 0),
        shape=1,
        data=peak.df)+
      theme_bw()+
      theme(panel.margin=grid::unit(0, "lines"))
  }
})

test_that("same model for Heuristic and Faster", {
  N <- 10
  N.bases <- 10
  rmat <- function(Nr, Nc, mu){
    matrix(rpois(Nr*Nc, mu), Nr, Nc)
  }
  set.seed(1)
  big.mat <- cbind(
    rmat(N, N.bases, 5),
    rmat(N, N.bases, 10),
    rmat(N, N.bases, 5))
  big.df <- data.frame(
    sample.id=as.integer(row(big.mat)),
    sample.group="all",
    chromStart=as.integer(col(big.mat)-1),
    chromEnd=as.integer(col(big.mat)),
    count=as.integer(big.mat))
  full.list <- ProfileList(big.df)
  fit.h <- PeakSegJointHeuristicStep2(full.list, 2L)
  fit.f <- PeakSegJointFaster(full.list, 2L)
  expect_equal(fit.f$min.loss, fit.h$models[[11]]$loss)
})

tdhock/PeakSegJoint documentation built on Jan. 27, 2024, 9 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

tdhock/PeakSegJoint
Joint Peak Detection in Several ChIP-Seq Samples

tests/testthat/test-CRAN.R
In tdhock/PeakSegJoint: Joint Peak Detection in Several ChIP-Seq Samples

R Package Documentation

Browse R Packages

We want your feedback!

tdhock/PeakSegJoint Joint Peak Detection in Several ChIP-Seq Samples

tests/testthat/test-CRAN.R In tdhock/PeakSegJoint: Joint Peak Detection in Several ChIP-Seq Samples

R Package Documentation

Browse R Packages

We want your feedback!

tdhock/PeakSegJoint
Joint Peak Detection in Several ChIP-Seq Samples

tests/testthat/test-CRAN.R
In tdhock/PeakSegJoint: Joint Peak Detection in Several ChIP-Seq Samples