inst/doc/Introduction.R

## ---- eval = FALSE------------------------------------------------------------
#  ## try http:// if https:// URLs are not supported
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("scFeatureFilter")

## ---- message=FALSE, warning=FALSE, collapse=TRUE-----------------------------
library(scFeatureFilter)

library(ggplot2)
library(cowplot) # multipanel figures + nice theme

## ---- collapse=TRUE-----------------------------------------------------------
# example dataset included with the package:
scData_hESC

# filtering of the dataset with a single function call:
sc_feature_filter(scData_hESC)

## ---- collapse=TRUE-----------------------------------------------------------
scData_hESC

## ---- collapse=TRUE-----------------------------------------------------------
calculate_cvs(scData_hESC)

## ---- collapse=TRUE-----------------------------------------------------------
library(magrittr) # to use the pipe %>%

calculate_cvs(scData_hESC) %>%
    plot_mean_variance(colourByBin = FALSE)

## ---- collapse=TRUE-----------------------------------------------------------
scData_hESC %>%
    calculate_cvs %>%
    define_top_genes(window_size = 100) %>%
    bin_scdata(window_size = 1000)

## ----collapse=TRUE------------------------------------------------------------
myPlot <- scData_hESC %>%
    calculate_cvs %>%
    define_top_genes(window_size = 100) %>%
    bin_scdata(window_size = 1000) %>%
    plot_mean_variance(colourByBin = TRUE, density_color = "blue")

myPlot

## ---- collapse=TRUE-----------------------------------------------------------
myPlot + annotation_logticks(sides = "l")

## ---- collapse=TRUE-----------------------------------------------------------
corDistrib <- scData_hESC %>%
    calculate_cvs %>%
    define_top_genes(window_size = 100) %>%
    bin_scdata(window_size = 1000) %>%
    correlate_windows(n_random = 3)

## ---- collapse=TRUE-----------------------------------------------------------
corDens <- correlations_to_densities(corDistrib, absolute_cc = TRUE)
plot_correlations_distributions(corDens, facet_ncol = 5) +
    scale_x_continuous(breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1"))

## ---- collapse=TRUE-----------------------------------------------------------
metrics <- get_mean_median(corDistrib)
metrics
plot_correlations_distributions(corDens, metrics = metrics, facet_ncol = 5) +
    scale_x_continuous(breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1"))

## ---- collapse=TRUE-----------------------------------------------------------
plot_metric(metrics, show_ctrl = FALSE, show_threshold = FALSE)

## ---- collapse=TRUE-----------------------------------------------------------
plot_metric(metrics, show_ctrl = TRUE, show_threshold = FALSE)

## ---- collapse=TRUE-----------------------------------------------------------
plot_metric(metrics, show_ctrl = TRUE, show_threshold = TRUE, threshold = 2)

## ---- collapse=TRUE-----------------------------------------------------------
determine_bin_cutoff(metrics, threshold = 2)

## ---- collapse=TRUE-----------------------------------------------------------
binned_data <- scData_hESC %>%
    calculate_cvs %>%
    define_top_genes(window_size = 100) %>%
    bin_scdata(window_size = 1000)
metrics <- correlate_windows(binned_data, n_random = 3) %>%
    get_mean_median

filtered_data <- filter_expression_table(
    binned_data,
    bin_cutoff = determine_bin_cutoff(metrics)
)

dim(scData_hESC)
dim(filtered_data)
filtered_data

## ---- message=FALSE, warning=FALSE, collapse=TRUE-----------------------------
library(SingleCellExperiment)
library(scRNAseq) # example datasets

sce_allen <- ReprocessedAllenData()

# sce_allen is an SingleCellExperiment object
sce_allen

filtered_allen <- sc_feature_filter(sce_allen, sce_assay = "rsem_tpm")
is.matrix(filtered_allen) # filtered_allen is a tibble

sce_filtered_allen <- sce_allen[rownames(filtered_allen), ]
sce_filtered_allen

## ---- collapse=TRUE-----------------------------------------------------------
plot_top_window_autocor(calculate_cvs(scData_hESC))

## ---- collapse=TRUE-----------------------------------------------------------
metrics_bigBins <- scData_hESC %>%
    calculate_cvs %>%
    define_top_genes(window_size = 100) %>%
    bin_scdata(window_size = 1000) %>%
    correlate_windows(n_random = 3) %>%
    get_mean_median

metrics_smallBins <- scData_hESC %>%
    calculate_cvs %>%
    define_top_genes(window_size = 100) %>%
    bin_scdata(window_size = 500) %>%
    correlate_windows(n_random = 3) %>%
    get_mean_median

plot_grid(
    plot_metric(metrics_bigBins) +
        labs(title = "1000 genes per bin"),
    plot_metric(metrics_smallBins) +
        labs(title = "500 genes per bin")
)

Try the scFeatureFilter package in your browser

Any scripts or data that you put into this service are public.

scFeatureFilter documentation built on Nov. 8, 2020, 7:49 p.m.