SSPA.R
In SSPA: General Sample Size and Power Analysis for Microarray and Next-Generation Sequencing Data

## ----style, echo = FALSE, results = 'asis', warning=FALSE---------------------
suppressPackageStartupMessages(library(SSPA))
suppressPackageStartupMessages(library(lattice))
BiocStyle::markdown()

## ----simulated-data-generation------------------------------------------------
library(SSPA)
library(genefilter)
set.seed(12345)
m <- 5000
J <- 10
pi0 <- 0.8
m0 <- as.integer(m * pi0)
mu <- rbitri(m - m0,
             a = log2(1.2),
             b = log2(4),
             m = log2(2))
data <- simdat(
  mu,
  m = m,
  pi0 = pi0,
  J = J,
  noise = 0.01
)
statistics <-
  rowttests(data, factor(rep(c(0, 1), each = J)))$statistic

## ----simulated-data-plotting, fig.cap="Exploratory plots of the pilot-data: Upper left panel shows a histogram of the test statistics, upper right panel a histogram of the p-values. Lower left panel shows a qq-plot for the test statistics using the user-defined null distributions. Lower right panel shows the sorted p-values against their ranks."----
pdD <- pilotData(
  statistics = statistics,
  samplesize = sqrt(1 / (1 / J + 1 / J)),
  distribution = "norm"
)
pdD
plot(pdD)

## ----simulated-data-deconvolution, fig.cap="Estimated density of effect sizes: True density of effect sizes, the bitriangular distribution, and estimated density of effect sizes in blue."----
ssD <- sampleSize(pdD, control = list(from = -6, to = 6))
ssD
plot(
  ssD,
  panel = function(x, y, ...)
  {
    panel.xyplot(x, y)
    panel.curve(dbitri(x),
                lwd = 2,
                lty = 2,
                n = 500)
  },
  ylim = c(0, 0.6)
)

## ----simulated-data-power, fig.cap="Predicted power curve: For sample sizes from 10 to 20 the power is predicted based on the information obtained from the pilot-data."----
Jpred <- seq(10, 20, by = 2)
N <- sqrt(Jpred / 2)
pwrD <- predictpower(ssD, samplesizes = N, alpha = 0.05)
matplot(
  Jpred,
  pwrD,
  type = "b",
  pch = 16,
  ylim = c(0, 1),
  ylab = "predicted power",
  xlab = "sample size (per group)"
)
grid()

## ----simulated-data-effectsize, fig.cap="Conjugate gradient method: Estimated density of effect sizes using the conjugate gradient method."----
pdC <- pilotData(
  statistics = statistics,
  samplesize = sqrt(2 * J),
  distribution = "t",
  df = 2 * J - 2
)
ssC <- sampleSize(pdC,
                  method = "congrad",
                  control = list(
                    from = -6,
                    to = 6,
                    resolution = 250
                  ))
plot(
  ssC,
  panel = function(x, y, ...)
  {
    panel.xyplot(x, y)
    panel.curve(2 * dbitri(2 * x),
                lwd = 2,
                lty = 2,
                n = 500)
  },
  xlim = c(-2, 2),
  ylim = c(0, 1.2)
)

## ----tikohonov, fig.cap="Tikohonov regularization: Estimated density of effect sizes using the Tikohonov regularization."----
ssT <- sampleSize(
  pdC,
  method = "tikhonov",
  control = list(
    resolution = 250,
    scale = "pdfstat",
    lambda = 10 ^ seq(-10, 10, length = 50),
    verbose = FALSE,
    modelselection = "lcurve"
  )
)
plot(
  ssT,
  panel = function(x, y, ...)
  {
    panel.xyplot(x, y, type = "b")
    panel.curve(2 * dbitri(2 * x),
                lwd = 2,
                lty = 2,
                n = 500)
  },
  xlim = c(-2, 2),
  ylim = c(0, 1.2)
)

## ----nutrigenomics-effectsize, fig.cap="Nutrigenomic example: Estimated density of effect sizes for the five treatment exposure conditions."----
library(lattice)
data(Nutrigenomics)
str(Nutrigenomics)
pd <- apply(Nutrigenomics, 2,
            function(x)
              pilotData(
                statistics = x[-1],
                samplesize = sqrt(x[1]),
                distribution = "norm"
              ))
ss <- lapply(pd,
             sampleSize,
             control = list(
               pi0Method = "Storey",
               a = 0,
               resolution = 2 ^ 10,
               verbose = FALSE
             ))

##ss <- lapply(pd, sampleSize,
##             method = "congrad",
##             control=list(verbose=FALSE, resolution=2^10, from=-10, to=10))

compounds <-
  c("Wy14,643",
    "fenofibrate",
    "trilinolenin (C18:3)",
    "Wy14,643",
    "fenofibrate")
exposure <- c("5 Days", "6 Hours")

effectsize <-
  data.frame(
    exposure = factor(rep(rep(exposure, c(
      2, 3
    )), each = 1024)),
    compound = factor(rep(compounds, each = 1024)),
    lambda = as.vector(sapply(ss,
                              function(x)
                                x@lambda)),
    theta = as.vector(sapply(ss,
                             function(x)
                               x@theta))
  )

print(
  xyplot(
    lambda ~ theta | exposure,
    group = compound,
    data = effectsize,
    type = c("g", "l"),
    layout = c(1, 2),
    lwd = 2,
    xlab = "effect size",
    ylab = "",
    auto.key = list(
      columns = 3,
      lines = TRUE,
      points = FALSE,
      cex = 0.7
    )
  )
)

## ----nutrigenomics-power, fig.cap="Nutrigenomic example: Predicted power curves for the five treatment exposure conditions."----
samplesize <- seq(2, 8)
averagepower <- data.frame(
  power = as.vector(sapply(ss,
                           function(x)
                             as.numeric(
                               predictpower(x, samplesize = sqrt(samplesize))
                             ))),
  exposure = factor(rep(rep(exposure, c(
    2, 3
  )), each = length(samplesize))),
  compound = factor(rep(compounds, each = length(samplesize))),
  samplesize = rep(2 * samplesize, 5)
)

print(
  xyplot(
    power ~ samplesize |
      exposure,
    group = compound,
    data = averagepower,
    type = c("g", "b"),
    layout = c(1, 2),
    lwd = 2,
    pch = 16,
    xlab = "sample size (per group)",
    ylab = "",
    auto.key = list(
      columns = 3,
      lines = TRUE,
      points = FALSE,
      cex = 0.7
    )
  )
)

## ----obtain-teststatistics, eval=FALSE----------------------------------------
#  ##files contains the full path and file names of each sample
#  targets <- data.frame(files = files,
#                        group = rep(c("DCLK", "WT"), 4),
#                        description = rep(
#                          c(
#                            "transgenic (Dclk1) mouse hippocampus",
#                            "wild-type mouse hippocampus"
#                          ),
#                          4
#                        ))
#  d <- readDGE(targets) ##reading the data
#  ##filter out low read counts
#  cpm.d <- cpm(d)
#  d <- d[rowSums(cpm.d > 1) >= 4,]
#  
#  design <- model.matrix( ~ group, data = d$samples)
#  ##estimate dispersion
#  disp <- estimateGLMCommonDisp(d, design)
#  disp <- estimateGLMTagwiseDisp(disp, design)
#  ##fit model
#  glmfit.hoen <-
#    glmFit(d, design, dispersion = disp$tagwise.dispersion)
#  ##perform likelihood ratio test
#  lrt.hoen <- glmLRT(glmfit.hoen)
#  ##extract results
#  tbl <- topTags(lrt.hoen, n = nrow(d))[[1]]
#  statistics <- tbl$LR

## ----deepsage-effectsize, fig.cap="Deep SAGE example: Estimated density of effect size and predicted power curve."----
library(lattice)
data(deepSAGE)
str(deepSAGE)
pd <- pilotData(
  statistics = deepSAGE,
  samplesize = 8,
  distribution = "chisq",
  df = 1
)
ss <- sampleSize(pd,
                 method = "congrad",
                 control = list(
                   trim = c(0, 0.98),
                   symmetric = FALSE,
                   from = 0,
                   to = 10
                 ))
pwr <- predictpower(ss, samplesize = c(8, 16, 32))
op <- par(mfcol = c(2, 1), mar = c(5, 4, 1, 1))
plot(ss@theta,
     ss@lambda,
     xlab = "effect size",
     ylab = "",
     type = "l")
plot(
  c(8, 16, 32),
  pwr,
  xlab = "sample size",
  ylab = "power",
  type = "b",
  ylim = c(0, 1)
)
grid(col = 1)
par(op)

## ----session info, echo=FALSE-------------------------------------------------
sessionInfo()

Any scripts or data that you put into this service are public.

SSPA documentation built on Nov. 8, 2020, 5:50 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

SSPA
General Sample Size and Power Analysis for Microarray and Next-Generation Sequencing Data

inst/doc/SSPA.R
In SSPA: General Sample Size and Power Analysis for Microarray and Next-Generation Sequencing Data

Try the SSPA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SSPA General Sample Size and Power Analysis for Microarray and Next-Generation Sequencing Data

inst/doc/SSPA.R In SSPA: General Sample Size and Power Analysis for Microarray and Next-Generation Sequencing Data

Try the SSPA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

SSPA
General Sample Size and Power Analysis for Microarray and Next-Generation Sequencing Data

inst/doc/SSPA.R
In SSPA: General Sample Size and Power Analysis for Microarray and Next-Generation Sequencing Data