test_de_analysis.R
In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

context("de_analysis")

test_that("le_diff_rcpp gives the same result as le.diff",{
  set.seed(1)
  n  <- 1000
  m  <- 20
  X  <- matrix(rnorm(n*m),n,m)
  Y1 <- t(apply(X,1,le.diff))
  Y2 <- le_diff_rcpp(X)
  expect_equal(Y1,Y2,scale = 1,tolerance = 1e-15)

  # Next check the calculations for the special case of two elements.
  m  <- 2
  X  <- matrix(rnorm(n*m),n,m)
  Y1 <- t(apply(X,1,le.diff))
  Y2 <- le_diff_rcpp(X)
  expect_equal(Y1,Y2,scale = 1,tolerance = 1e-15)
})

test_that(paste("R and C++ implementations of simulate_posterior_poisson",
                "produce the same output, and the Monte Carlo estimate of",
                "the covariance matrix is roughly similar to the covariance",
                "matrix computed using Laplace's method"),{
  
  # Simulate a Poisson data set.
  set.seed(1)
  n  <- 200
  f1 <- 0.1
  f2 <- 1
  s  <- sample(10,n,replace = TRUE)
  q  <- runif(n)
  u  <- (1-q)*f1 + q*f2
  x  <- rpois(n,s*u)
  L  <- cbind(s*(1-q),s*q)

  # Fit the Poisson glm.
  fit <- fit_poisson_glm(x,L)

  # Compute the covariance of log(f) using Laplace's method.
  f <- fit$coef
  S <- compute_poisson_covariance(x,L,f)

  # Draw samples from the posterior using the R and C++
  # implementations of the random-walk Metropolis algorithm.
  set.seed(1)
  ns   <- 1e4
  out1 <- simulate_posterior_poisson(x,L,f,ns = ns,s = 0.3)
  set.seed(1)
  i <- which(x > 0)
  D <- matrix(rnorm(2*ns),ns,2)
  U <- matrix(runif(2*ns),ns,2)
  M <- matrix(sample(2,2*ns,replace = TRUE),ns,2) - 1
  out2 <- simulate_posterior_poisson_rcpp(x,L,f,D,U,M,0.3,1e-15)
  out3 <- simulate_posterior_poisson_sparse_rcpp(x[i],L[i,],colSums(L),f,
                                                 D,U,M,0.3,1e-15)
  out2$ar <- drop(out2$ar)
  out3$ar <- drop(out3$ar)
  
  # The outputs from the R and C++ implementations should be the same.
  expect_equal(out1,out2,scale = 1,tolerance = 1e-15)
  expect_equal(out1,out3,scale = 1,tolerance = 1e-15)
  
  # The Laplace and MCMC estimates of the covariance matrix should be
  # roughly similar.
  Smc <- cov(out1$samples)
  expect_equal(S,Smc,scale = 1,tolerance = 0.01)
})

test_that(paste("All variants of fit_poisson_models should produce the",
                "same, or nearly the same, result"),{

  # Simulate gene expression data.
  set.seed(1)
  n   <- 100
  m   <- 200
  k   <- 4
  s   <- 10^runif(n,-1,1)
  dat <- simulate_poisson_gene_data(n,m,k,s)
  X   <- dat$X
  L   <- dat$L

  # Remove all-zero columns.
  X <- X[,colSums(X) > 0]

  # Add "pseudocounts" to the data.
  out <- add_pseudocounts(X,s*L,0.1)
  X   <- out$X
  L   <- out$L
  Y   <- as(X,"CsparseMatrix")
  
  # Fit the univariate Poisson models using glm and scd.
  F1 <- fit_poisson_models(X,L,method = "glm")
  F2 <- fit_poisson_models(Y,L,method = "glm")
  F3 <- fit_poisson_models(X,L,method = "scd")
  F4 <- fit_poisson_models(Y,L,method = "scd")

  # All four implementations should produce the same, or nearly the
  # same, estimates of the model parameters.
  expect_equal(F1,F2,scale = 1,tolerance = 1e-15)
  expect_equal(F3,F4,scale = 1,tolerance = 1e-15)
  expect_equal(F1,F3,scale = 1,tolerance = 1e-5)
})

test_that(paste("de_analysis with and without multithreading, using a",
                "sparse or dense counts matrix, with shrink.method =",
                "\"none\", produces the same result"),{

  # Skip this test on CRAN and CI because it takes too long.
  skip_if(on_cran)
  skip_on_ci()

  # Simulate gene expression data.
  set.seed(1)
  n   <- 800
  m   <- 1000
  k   <- 4
  dat <- simulate_multinom_gene_data(n,m,k,sparse = FALSE)
  X   <- dat$X
  Y   <- as(X,"CsparseMatrix")
  L   <- dat$L

  # Run de_analysis twice, using the single-threaded computations (nc
  # = 1) and again using the multithreaded computations (nc = 2). As
  # long as the sequence of pseudorandom numbers is the same, the
  # output should be the same.
  fit <- init_poisson_nmf(X,L = L,init.method = "random")
  nc <- ifelse(.Platform$OS.type == "windows",1,2)
  for (lfc.stat in c("le","vsnull",paste0("k",1:4))) {
    set.seed(1)
    capture.output(de1 <- de_analysis(fit,X,lfc.stat = lfc.stat,
                                      shrink.method = "none",
                                      control = list(nc = 1)))
    set.seed(1)
    capture.output(de2 <- de_analysis(fit,X,lfc.stat = lfc.stat,
                                      shrink.method = "none",
                                      control = list(nc = nc)))
    set.seed(1)
    capture.output(de3 <- de_analysis(fit,Y,lfc.stat = lfc.stat,
                                      shrink.method = "none",
                                      control = list(nc = 1)))
    set.seed(1)
    capture.output(de4 <- de_analysis(fit,Y,lfc.stat = lfc.stat,
                                      shrink.method = "none",
                                      control = list(nc = nc)))
    expect_equal(de1,de2,scale = 1,tolerance = 1e-12)
    expect_equal(de1,de3,scale = 1,tolerance = 1e-7)
    expect_equal(de1,de4,scale = 1,tolerance = 1e-7)
  }
})

test_that(paste("de_analysis with and without multithreading, using a",
                "sparse or dense counts matrix, with shrink.method =",
                "\"ash\", produces the same result"),{
                    
  # Skip this test on CRAN and CI because it takes too long.
  skip_if(on_cran)
  skip_on_ci()

  # Simulate gene expression data.
  set.seed(1)
  n   <- 800
  m   <- 100
  k   <- 4
  dat <- simulate_multinom_gene_data(n,m,k,sparse = FALSE)
  X   <- dat$X
  Y   <- as(X,"CsparseMatrix")
  L   <- dat$L

  # Run de_analysis twice, using the single-threaded computations (nc
  # = 1) and again using the multithreaded computations (nc = 2). As
  # long as the sequence of pseudorandom numbers is the same, the
  # output should be the same.
  fit <- init_poisson_nmf(X,L = L,init.method = "random")
  nc <- ifelse(.Platform$OS.type == "windows",1,2)
  for (lfc.stat in c("le","vsnull",paste0("k",1:4))) {
    set.seed(1)
    capture.output(de1 <- de_analysis(fit,X,lfc.stat = lfc.stat,
                                      shrink.method = "ash",
                                      control = list(nc = 1)))
    set.seed(1)
    capture.output(de2 <- de_analysis(fit,X,lfc.stat = lfc.stat,
                                      shrink.method = "ash",
                                      control = list(nc = nc)))
    set.seed(1)
    capture.output(de3 <- de_analysis(fit,Y,lfc.stat = lfc.stat,
                                      shrink.method = "ash",
                                      control = list(nc = 1)))
    set.seed(1)
    capture.output(de4 <- de_analysis(fit,Y,lfc.stat = lfc.stat,
                                      shrink.method = "ash",
                                      control = list(nc = nc)))
    de1$ash$logLR <- NA
    de2$ash$logLR <- NA
    de3$ash$logLR <- NA
    de4$ash$logLR <- NA
    de1$svalue <- NA
    de2$svalue <- NA
    de3$svalue <- NA
    de4$svalue <- NA
    expect_equal(de1,de2,scale = 1,tolerance = 1e-7)
    expect_equal(de1,de3,scale = 1,tolerance = 1e-7)
    expect_equal(de1,de4,scale = 1,tolerance = 1e-7)
  }
})

test_that(paste("de_analysis with s = rowSums(X) closely recovers true",
                "probabilities (relative gene expression levels) when",
                "provided with the true topic proportions"),{
                    
  # Skip this test on CRAN and CI because it takes too long.
  skip_if(on_cran)
  skip_on_ci()
  
  # Simulate gene expression data.
  set.seed(1)
  n   <- 800
  m   <- 1000
  k   <- 4
  dat <- simulate_multinom_gene_data(n,m,k,sparse = TRUE)
  X   <- dat$X
  L   <- dat$L

  # Fit a Poisson model (approximating a binomial model) to each gene
  # and topic, and compute the log-fold change statistics.
  fit <- init_poisson_nmf(X,L = L,init.method = "random")
  de  <- de_analysis(fit,X,verbose = FALSE)

  # The parameter estimates should be close to the multinomial
  # probabilities that were used to simulate the data.
  expect_equal(dat$F,de$F,scale = 1,tolerance = 1e-4)

  # Create a volcano plot from the de_analysis output.
  p1 <- volcano_plot(de,k = 1,y = "z")
  p2 <- volcano_plot(de,k = 1,y = "f0")
  p3 <- volcano_plotly(de,k = 1,y = "z")
  p4 <- volcano_plotly(de,k = 1,y = "f0")
  expect_s3_class(p1,"ggplot")
  expect_s3_class(p2,"ggplot")
  expect_s3_class(p3,"plotly")
  expect_s3_class(p4,"plotly")

  # Check that the volcano plots also work without the adaptive
  # shrinkage step.
  de <- de_analysis(fit,X,shrink.method = "none",verbose = FALSE)
  p1 <- volcano_plot(de,k = 1,y = "z")
  p2 <- volcano_plot(de,k = 1,y = "f0")
  p3 <- volcano_plotly(de,k = 1,y = "z")
  p4 <- volcano_plotly(de,k = 1,y = "f0")
  expect_s3_class(p1,"ggplot")
  expect_s3_class(p2,"ggplot")
  expect_s3_class(p3,"plotly")
  expect_s3_class(p4,"plotly")
})

test_that(paste("Pairwise and \"least extreme\" LFC statistics are correct",
                "for k = 2 topics, with shrink.method = \"none\""),{

  # Skip this test on CRAN and CI because it takes too long.
  skip_if(on_cran)
  skip_on_ci()
  
  # Simulate gene expression data.
  set.seed(1)
  n   <- 100
  m   <- 200
  k   <- 2
  s   <- 10^runif(n,-1,1)
  dat <- simulate_poisson_gene_data(n,m,k,s)
  X   <- dat$X
  L   <- dat$L

  # Remove all-zero columns.
  X <- X[,colSums(X) > 0]
  m <- ncol(X)

  # Fit a multinomial topic model with k = 2 topics.
  capture.output(fit <- fit_topic_model(X,k = 2))
  
  # Compute "pairwise" LFC statistics.
  capture.output(de1 <- de_analysis(fit,X,lfc.stat="k1",shrink.method="none"))
  capture.output(de2 <- de_analysis(fit,X,lfc.stat="k2",shrink.method="none"))

  # By definition, LFC(k) where k is the same as lfc.stat should be
  # zero, and the z-scores and p-values should be NA.
  expect_equivalent(de1$est[,1],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de2$est[,2],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de1$postmean[,1],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de2$postmean[,2],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de1$lower[,1],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de2$lower[,2],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de1$upper[,1],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de2$upper[,2],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de1$z[,1],rep(0,m))
  expect_equivalent(de2$z[,2],rep(0,m))
  expect_equivalent(de1$lpval[,1],rep(0,m))
  expect_equivalent(de2$lpval[,2],rep(0,m))
  
  # Compute "least extreme" LFC statistics.
  capture.output(de <- de_analysis(fit,X,lfc.stat="le",shrink.method="none"))

  # By definition, LFC(1) should always be the same as -LFC(2) when
  # there are only two topics. Other quantities follow similarly.
  expect_equal(de$est[,1],-de$est[,2],scale = 1,tolerance = 1e-15)
  expect_equal(de$postmean[,1],-de$postmean[,2],scale = 1,tolerance = 1e-15)
  expect_equal(de$lower[,1],-de$upper[,2],scale = 1,tolerance = 1e-15)
  expect_equal(de$upper[,1],-de$lower[,2],scale = 1,tolerance = 1e-15)
  expect_equal(de$z[,1],-de$z[,2],scale = 1,tolerance = 1e-15)
  expect_equal(de$lpval[,1],de$lpval[,2],scale = 1,tolerance = 1e-15)
})

test_that(paste("Pairwise and \"least extreme\" LFC statistics are correct",
                "for k = 2 topics, with shrink.method = \"ash\""),{

  # Skip this test on CRAN and CI because it takes too long.
  skip_if(on_cran)
  skip_on_ci()
  
  # Simulate gene expression data.
  set.seed(1)
  n   <- 100
  m   <- 200
  k   <- 2
  s   <- 10^runif(n,-1,1)
  dat <- simulate_poisson_gene_data(n,m,k,s)
  X   <- dat$X
  L   <- dat$L

  # Remove all-zero columns.
  X <- X[,colSums(X) > 0]
  m <- ncol(X)

  # Fit a multinomial topic model with k = 2 topics.
  capture.output(fit <- fit_topic_model(X,k = 2))
  
  # Compute "pairwise" LFC statistics.
  capture.output(de1 <- de_analysis(fit,X,lfc.stat="k1",shrink.method="ash"))
  capture.output(de2 <- de_analysis(fit,X,lfc.stat="k2",shrink.method="ash"))

  # By definition, LFC(k) where k is the same as lfc.stat should be
  # zero, and the z-scores and p-values should be NA.
  expect_equivalent(de1$est[,1],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de2$est[,2],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de1$postmean[,1],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de2$postmean[,2],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de1$lower[,1],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de2$lower[,2],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de1$upper[,1],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de2$upper[,2],rep(0,m),scale = 1,tolerance = 1e-5)
  expect_equivalent(de1$z[,1],rep(0,m))
  expect_equivalent(de2$z[,2],rep(0,m))
  expect_equivalent(de1$lfsr[,1],rep(0.5,m))
  expect_equivalent(de2$lfsr[,2],rep(0.5,m))
  
  # Compute "least extreme" LFC statistics.
  capture.output(de <- de_analysis(fit,X,lfc.stat="le",shrink.method="ash"))

  # By definition, LFC(1) should always be the same as -LFC(2) when
  # there are only two topics. Other quantities follow similarly.
  expect_equal(de$est[,1],-de$est[,2],scale = 1,tolerance = 1e-15)
  expect_equal(de$postmean[,1],-de$postmean[,2],scale = 1,tolerance = 1e-15)
  expect_equal(de$lower[,1],-de$upper[,2],scale = 1,tolerance = 1e-15)
  expect_equal(de$upper[,1],-de$lower[,2],scale = 1,tolerance = 1e-15)
  expect_equal(de$z[,1],-de$z[,2],scale = 1,tolerance = 1e-15)
  expect_equal(de$lfsr[,1],de$lfsr[,2],scale = 1,tolerance = 1e-15)
})

test_that("no output from de_analysis when verbose = FALSE",{

  # Skip this test on CRAN and CI because it takes too long.
  skip_if(on_cran)
  skip_on_ci()
  
  set.seed(1)
  X <- simulate_multinom_gene_data(n = 50,m = 100,k = 3,sparse = FALSE)$X
  fit0 <- init_poisson_nmf(X,k = 3,verbose = "none")
  out1 <- capture.output(
      result <- de_analysis(fit0,X,control = list(nc = 1),verbose = FALSE),
    type = "output")
  out2 <- capture.output(
      result <- de_analysis(fit0,X,control = list(nc = 2),verbose = FALSE),
    type = "output")
  out3 <- capture.output(
      result <- de_analysis(fit0,X,control = list(nc = 1),verbose = FALSE),
    type = "message")
  out4 <- capture.output(
      result <- de_analysis(fit0,X,control = list(nc = 2),verbose = FALSE),
    type = "message")
  expect_equal(out1,character(0))
  expect_equal(out2,character(0))
  expect_equal(out3,character(0))
  expect_equal(out4,character(0))
})

test_that("de_analysis provided with fit and fit$L give the same result",{

  # Simulate a gene expression data set.
  set.seed(1)
  n   <- 100
  m   <- 200
  k   <- 4
  dat <- simulate_multinom_gene_data(n,m,k,sparse = TRUE)
  X   <- dat$X
  L   <- dat$L

  # Fit a topic model with k = 4 topics to the simulatd gene
  # expression data.
  capture.output(fit <- fit_topic_model(X,k = 4,init.method = "random"))

  set.seed(1)
  capture.output(de1 <- de_analysis(fit,X,control = list(ns = 100)))
  
  set.seed(1)
  capture.output(de2 <- de_analysis(fit$L,X,control = list(ns = 100)))

  de1["ash"] <- NULL
  de2["ash"] <- NULL
  de1["svalue"] <- NULL
  de2["svalue"] <- NULL
  expect_equal(de1,de2,scale = 1,tolerance = 1e-8)
})
Any scripts or data that you put into this service are public.
fastTopics documentation built on Sept. 11, 2024, 5:14 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
fastTopics
Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

tests/testthat/test_de_analysis.R
In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

Try the fastTopics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastTopics Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

tests/testthat/test_de_analysis.R In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

Try the fastTopics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastTopics
Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data

tests/testthat/test_de_analysis.R
In fastTopics: Fast Algorithms for Fitting Topic Models and Non-Negative Matrix Factorizations to Count Data