inst/doc/splat_params.R

## ----knitr-options, include = FALSE-------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## ----setup--------------------------------------------------------------------
library("splatter")
library("scater")
library("ggplot2")

## ----splat-params-------------------------------------------------------------
params <- newSplatParams()
params

## ----nGenes-------------------------------------------------------------------
# Set the number of genes to 1000
params <- setParam(params, "nGenes", 1000)

sim <- splatSimulate(params, verbose = FALSE)
dim(sim)

## ----batches------------------------------------------------------------------
# Simulation with two batches of 100 cells
sim <- splatSimulate(params, batchCells = c(100, 100), verbose = FALSE)

# PCA plot using scater
sim <- logNormCounts(sim)
sim <- runPCA(sim)
plotPCA(sim, colour_by = "Batch")

## ----batch-factors------------------------------------------------------------
# Simulation with small batch effects
sim1 <- splatSimulate(params, batchCells = c(100, 100),
                      batch.facLoc = 0.001, batch.facScale = 0.001,
                      verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Batch") + ggtitle("Small batch effects")

# Simulation with big batch effects
sim2 <- splatSimulate(params, batchCells = c(100, 100),
                      batch.facLoc = 0.5, batch.facScale = 0.5,
                      verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Batch") + ggtitle("Big batch effects")

## ----batch-rmEffect-----------------------------------------------------------
sim1 <- splatSimulate(params, batchCells = c(100, 100), batch.rmEffect = FALSE,
                      verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Batch") + ggtitle("With batch effects")

sim2 <- splatSimulate(params, batchCells = c(100, 100), batch.rmEffect = TRUE,
                      verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Batch") + ggtitle("Batch effects removed")

## ----outlier-prob-------------------------------------------------------------
# Few outliers
sim1 <- splatSimulate(out.prob = 0.001, verbose = FALSE)
ggplot(as.data.frame(rowData(sim1)),
       aes(x = log10(GeneMean), fill = OutlierFactor != 1)) +
    geom_histogram(bins = 100) +
    ggtitle("Few outliers")

# Lots of outliers
sim2 <- splatSimulate(out.prob = 0.2, verbose = FALSE)
ggplot(as.data.frame(rowData(sim2)),
       aes(x = log10(GeneMean), fill = OutlierFactor != 1)) +
    geom_histogram(bins = 100) +
    ggtitle("Lots of outliers")

## ----groups-------------------------------------------------------------------
params.groups <- newSplatParams(batchCells = 500, nGenes = 1000)

# One small group, one big group
sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.9, 0.1),
                            verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("One small group, one big group")

# Five groups
sim2 <- splatSimulateGroups(params.groups,
                            group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2),
                            verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Five groups")

## ----de-prob------------------------------------------------------------------
# Few DE genes
sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),
                            de.prob = 0.01, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("Few DE genes")

# Lots of DE genes
sim2 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),
                            de.prob = 0.3, verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Lots of DE genes")

## ----de-factors---------------------------------------------------------------
# Small DE factors
sim1 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),
                            de.facLoc = 0.01, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("Small DE factors")

# Big DE factors
sim2 <- splatSimulateGroups(params.groups, group.prob = c(0.5, 0.5),
                            de.facLoc = 0.3, verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Big DE factors")

## ----complex-de---------------------------------------------------------------
# Different DE probs
sim1 <- splatSimulateGroups(params.groups,
                            group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2),
                            de.prob = c(0.01, 0.01, 0.1, 0.1, 0.3),
                            verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") +
    labs(title = "Different DE probabilities",
         caption = paste("Groups 1 and 2 have very few DE genes,",
                         "Groups 3 and 4 have the default number,",
                         "Group 5 has many DE genes"))
    

# Different DE factors
sim2 <- splatSimulateGroups(params.groups,
                            group.prob = c(0.2, 0.2, 0.2, 0.2, 0.2),
                            de.facLoc = c(0.01, 0.01, 0.1, 0.1, 0.2),
                            de.facScale = c(0.2, 0.5, 0.2, 0.5, 0.4),
                            verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") +
    labs(title = "Different DE factors",
         caption = paste("Group 1 has factors with small location (value),",
                         "and scale (variability),",
                         "Group 2 has small location and greater scale.\n",
                         "Groups 3 and 4 have greater location with small,",
                         "and large scales",
                         "Group 5 has bigger factors with moderate",
                         "variability"))

# Combination of everything
sim3 <- splatSimulateGroups(params.groups,
                            group.prob = c(0.05, 0.2, 0.2, 0.2, 0.35),
                            de.prob = c(0.3, 0.1, 0.2, 0.01, 0.1),
                            de.downProb = c(0.1, 0.4, 0.9, 0.6, 0.5),
                            de.facLoc = c(0.6, 0.1, 0.1, 0.01, 0.2),
                            de.facScale = c(0.1, 0.4, 0.2, 0.5, 0.4),
                            verbose = FALSE)
sim3 <- logNormCounts(sim3)
sim3 <- runPCA(sim3)
plotPCA(sim3, colour_by = "Group") +
    labs(title = "Different DE factors",
         caption = paste(
             "Group 1 is small with many very up-regulated DE genes,",
             "Group 2 has the default DE parameters,\n",
             "Group 3 has many down-regulated DE genes,",
             "Group 4 has very few DE genes,",
             "Group 5 is large with moderate DE factors")
         )

## ----paths--------------------------------------------------------------------
# Linear paths
sim1 <- splatSimulatePaths(params.groups,
                           group.prob = c(0.25, 0.25, 0.25, 0.25),
                           de.prob = 0.5, de.facLoc = 0.2,
                           path.from = c(0, 1, 2, 3),
                           verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Group") + ggtitle("Linear paths")

# Branching path
sim2 <- splatSimulatePaths(params.groups,
                           group.prob = c(0.25, 0.25, 0.25, 0.25),
                           de.prob = 0.5, de.facLoc = 0.2,
                           path.from = c(0, 1, 1, 3),
                           verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Group") + ggtitle("Branching path")

## ----paths-steps--------------------------------------------------------------
# Few steps
sim1 <- splatSimulatePaths(params.groups, path.nSteps = 3,
                           de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Step") + ggtitle("Few steps")

# Lots of steps
sim2 <- splatSimulatePaths(params.groups, path.nSteps = 1000,
                           de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE)
sim2 <- logNormCounts(sim2)
sim2 <- runPCA(sim2)
plotPCA(sim2, colour_by = "Step") + ggtitle("Lots of steps")

## ----paths-skew---------------------------------------------------------------
# Skew towards the end
sim1 <- splatSimulatePaths(params.groups, path.skew = 0.1,
                           de.prob = 0.5, de.facLoc = 0.2, verbose = FALSE)
sim1 <- logNormCounts(sim1)
sim1 <- runPCA(sim1)
plotPCA(sim1, colour_by = "Step") + ggtitle("Skewed towards the end")

Try the splatter package in your browser

Any scripts or data that you put into this service are public.

splatter documentation built on Dec. 3, 2020, 2:01 a.m.