inst/doc/SSPA.R

## ----style, echo = FALSE, results = 'asis', warning=FALSE---------------------
suppressPackageStartupMessages(library(SSPA))
suppressPackageStartupMessages(library(lattice))
BiocStyle::markdown()

## ----simulated-data-generation------------------------------------------------
library(SSPA)
library(genefilter)
set.seed(12345)
m <- 5000
J <- 10
pi0 <- 0.8
m0 <- as.integer(m * pi0)
mu <- rbitri(m - m0,
             a = log2(1.2),
             b = log2(4),
             m = log2(2))
data <- simdat(
  mu,
  m = m,
  pi0 = pi0,
  J = J,
  noise = 0.01
)
statistics <-
  rowttests(data, factor(rep(c(0, 1), each = J)))$statistic

## ----simulated-data-plotting, fig.cap="Exploratory plots of the pilot-data: Upper left panel shows a histogram of the test statistics, upper right panel a histogram of the p-values. Lower left panel shows a qq-plot for the test statistics using the user-defined null distributions. Lower right panel shows the sorted p-values against their ranks."----
pdD <- pilotData(
  statistics = statistics,
  samplesize = sqrt(1 / (1 / J + 1 / J)),
  distribution = "norm"
)
pdD
plot(pdD)

## ----simulated-data-deconvolution, fig.cap="Estimated density of effect sizes: True density of effect sizes, the bitriangular distribution, and estimated density of effect sizes in blue."----
ssD <- sampleSize(pdD, control = list(from = -6, to = 6))
ssD
plot(
  ssD,
  panel = function(x, y, ...)
  {
    panel.xyplot(x, y)
    panel.curve(dbitri(x),
                lwd = 2,
                lty = 2,
                n = 500)
  },
  ylim = c(0, 0.6)
)

## ----simulated-data-power, fig.cap="Predicted power curve: For sample sizes from 10 to 20 the power is predicted based on the information obtained from the pilot-data."----
Jpred <- seq(10, 20, by = 2)
N <- sqrt(Jpred / 2)
pwrD <- predictpower(ssD, samplesizes = N, alpha = 0.05)
matplot(
  Jpred,
  pwrD,
  type = "b",
  pch = 16,
  ylim = c(0, 1),
  ylab = "predicted power",
  xlab = "sample size (per group)"
)
grid()

## ----simulated-data-effectsize, fig.cap="Conjugate gradient method: Estimated density of effect sizes using the conjugate gradient method."----
pdC <- pilotData(
  statistics = statistics,
  samplesize = sqrt(2 * J),
  distribution = "t",
  df = 2 * J - 2
)
ssC <- sampleSize(pdC,
                  method = "congrad",
                  control = list(
                    from = -6,
                    to = 6,
                    resolution = 250
                  ))
plot(
  ssC,
  panel = function(x, y, ...)
  {
    panel.xyplot(x, y)
    panel.curve(2 * dbitri(2 * x),
                lwd = 2,
                lty = 2,
                n = 500)
  },
  xlim = c(-2, 2),
  ylim = c(0, 1.2)
)

## ----tikohonov, fig.cap="Tikohonov regularization: Estimated density of effect sizes using the Tikohonov regularization."----
ssT <- sampleSize(
  pdC,
  method = "tikhonov",
  control = list(
    resolution = 250,
    scale = "pdfstat",
    lambda = 10 ^ seq(-10, 10, length = 50),
    verbose = FALSE,
    modelselection = "lcurve"
  )
)
plot(
  ssT,
  panel = function(x, y, ...)
  {
    panel.xyplot(x, y, type = "b")
    panel.curve(2 * dbitri(2 * x),
                lwd = 2,
                lty = 2,
                n = 500)
  },
  xlim = c(-2, 2),
  ylim = c(0, 1.2)
)

## ----nutrigenomics-effectsize, fig.cap="Nutrigenomic example: Estimated density of effect sizes for the five treatment exposure conditions."----
library(lattice)
data(Nutrigenomics)
str(Nutrigenomics)
pd <- apply(Nutrigenomics, 2,
            function(x)
              pilotData(
                statistics = x[-1],
                samplesize = sqrt(x[1]),
                distribution = "norm"
              ))
ss <- lapply(pd,
             sampleSize,
             control = list(
               pi0Method = "Storey",
               a = 0,
               resolution = 2 ^ 10,
               verbose = FALSE
             ))

##ss <- lapply(pd, sampleSize,
##             method = "congrad",
##             control=list(verbose=FALSE, resolution=2^10, from=-10, to=10))

compounds <-
  c("Wy14,643",
    "fenofibrate",
    "trilinolenin (C18:3)",
    "Wy14,643",
    "fenofibrate")
exposure <- c("5 Days", "6 Hours")

effectsize <-
  data.frame(
    exposure = factor(rep(rep(exposure, c(
      2, 3
    )), each = 1024)),
    compound = factor(rep(compounds, each = 1024)),
    lambda = as.vector(sapply(ss,
                              function(x)
                                x@lambda)),
    theta = as.vector(sapply(ss,
                             function(x)
                               x@theta))
  )

print(
  xyplot(
    lambda ~ theta | exposure,
    group = compound,
    data = effectsize,
    type = c("g", "l"),
    layout = c(1, 2),
    lwd = 2,
    xlab = "effect size",
    ylab = "",
    auto.key = list(
      columns = 3,
      lines = TRUE,
      points = FALSE,
      cex = 0.7
    )
  )
)

## ----nutrigenomics-power, fig.cap="Nutrigenomic example: Predicted power curves for the five treatment exposure conditions."----
samplesize <- seq(2, 8)
averagepower <- data.frame(
  power = as.vector(sapply(ss,
                           function(x)
                             as.numeric(
                               predictpower(x, samplesize = sqrt(samplesize))
                             ))),
  exposure = factor(rep(rep(exposure, c(
    2, 3
  )), each = length(samplesize))),
  compound = factor(rep(compounds, each = length(samplesize))),
  samplesize = rep(2 * samplesize, 5)
)

print(
  xyplot(
    power ~ samplesize |
      exposure,
    group = compound,
    data = averagepower,
    type = c("g", "b"),
    layout = c(1, 2),
    lwd = 2,
    pch = 16,
    xlab = "sample size (per group)",
    ylab = "",
    auto.key = list(
      columns = 3,
      lines = TRUE,
      points = FALSE,
      cex = 0.7
    )
  )
)

## ----obtain-teststatistics, eval=FALSE----------------------------------------
#  ##files contains the full path and file names of each sample
#  targets <- data.frame(files = files,
#                        group = rep(c("DCLK", "WT"), 4),
#                        description = rep(
#                          c(
#                            "transgenic (Dclk1) mouse hippocampus",
#                            "wild-type mouse hippocampus"
#                          ),
#                          4
#                        ))
#  d <- readDGE(targets) ##reading the data
#  ##filter out low read counts
#  cpm.d <- cpm(d)
#  d <- d[rowSums(cpm.d > 1) >= 4,]
#  
#  design <- model.matrix( ~ group, data = d$samples)
#  ##estimate dispersion
#  disp <- estimateGLMCommonDisp(d, design)
#  disp <- estimateGLMTagwiseDisp(disp, design)
#  ##fit model
#  glmfit.hoen <-
#    glmFit(d, design, dispersion = disp$tagwise.dispersion)
#  ##perform likelihood ratio test
#  lrt.hoen <- glmLRT(glmfit.hoen)
#  ##extract results
#  tbl <- topTags(lrt.hoen, n = nrow(d))[[1]]
#  statistics <- tbl$LR

## ----deepsage-effectsize, fig.cap="Deep SAGE example: Estimated density of effect size and predicted power curve."----
library(lattice)
data(deepSAGE)
str(deepSAGE)
pd <- pilotData(
  statistics = deepSAGE,
  samplesize = 8,
  distribution = "chisq",
  df = 1
)
ss <- sampleSize(pd,
                 method = "congrad",
                 control = list(
                   trim = c(0, 0.98),
                   symmetric = FALSE,
                   from = 0,
                   to = 10
                 ))
pwr <- predictpower(ss, samplesize = c(8, 16, 32))
op <- par(mfcol = c(2, 1), mar = c(5, 4, 1, 1))
plot(ss@theta,
     ss@lambda,
     xlab = "effect size",
     ylab = "",
     type = "l")
plot(
  c(8, 16, 32),
  pwr,
  xlab = "sample size",
  ylab = "power",
  type = "b",
  ylim = c(0, 1)
)
grid(col = 1)
par(op)

## ----session info, echo=FALSE-------------------------------------------------
sessionInfo()

Try the SSPA package in your browser

Any scripts or data that you put into this service are public.

SSPA documentation built on Nov. 8, 2020, 5:50 p.m.