inst/doc/introduction.R

## ---- echo=FALSE, include=FALSE-----------------------------------------------
library(knitr)
knitr::opts_chunk$set(
    cache=TRUE, warning=FALSE,
    message=FALSE, cache.lazy=FALSE
)

## ---- eval=FALSE--------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("tidySingleCellExperiment")

## ----message=FALSE------------------------------------------------------------
# Bioconductor single-cell packages
library(scater)
library(scran)
library(SingleR)
library(SingleCellSignalR)

# Tidyverse-compatible packages
library(ggplot2)
library(purrr)
library(tidyHeatmap)

# Both
library(tidySingleCellExperiment)

## -----------------------------------------------------------------------------
pbmc_small_tidy <- tidySingleCellExperiment::pbmc_small %>% tidy()

## -----------------------------------------------------------------------------
pbmc_small_tidy

## -----------------------------------------------------------------------------
assay(pbmc_small_tidy)

## -----------------------------------------------------------------------------
pbmc_small_tidy$file[1:5]

## -----------------------------------------------------------------------------
# Create sample column
pbmc_small_polished <-
    pbmc_small_tidy %>%
    extract(file, "sample", "../data/([a-z0-9]+)/outs.+", remove=FALSE)

# Reorder to have sample column up front
pbmc_small_polished %>%
    select(sample, everything())

## -----------------------------------------------------------------------------
# Use colourblind-friendly colours
friendly_cols <- dittoSeq::dittoColors()

# Set theme
custom_theme <-
    list(
        scale_fill_manual(values=friendly_cols),
        scale_color_manual(values=friendly_cols),
        theme_bw() +
            theme(
                panel.border=element_blank(),
                axis.line=element_line(),
                panel.grid.major=element_line(size=0.2),
                panel.grid.minor=element_line(size=0.1),
                text=element_text(size=12),
                legend.position="bottom",
                aspect.ratio=1,
                strip.background=element_blank(),
                axis.title.x=element_text(margin=margin(t=10, r=10, b=10, l=10)),
                axis.title.y=element_text(margin=margin(t=10, r=10, b=10, l=10))
            )
    )

## ----plot1--------------------------------------------------------------------
pbmc_small_polished %>%
    tidySingleCellExperiment::ggplot(aes(nFeature_RNA, fill=groups)) +
    geom_histogram() +
    custom_theme

## ----plot2--------------------------------------------------------------------
pbmc_small_polished %>%
    tidySingleCellExperiment::ggplot(aes(groups, nCount_RNA, fill=groups)) +
    geom_boxplot(outlier.shape=NA) +
    geom_jitter(width=0.1) +
    custom_theme

## -----------------------------------------------------------------------------
pbmc_small_polished %>%
    join_transcripts(transcripts=c("HLA-DRA", "LYZ")) %>%
    ggplot(aes(groups, abundance_counts + 1, fill=groups)) +
    geom_boxplot(outlier.shape=NA) +
    geom_jitter(aes(size=nCount_RNA), alpha=0.5, width=0.2) +
    scale_y_log10() +
    custom_theme

## ----preprocess---------------------------------------------------------------
# Identify variable genes with scran
variable_genes <-
    pbmc_small_polished %>%
    modelGeneVar() %>%
    getTopHVGs(prop=0.1)

# Perform PCA with scater
pbmc_small_pca <-
    pbmc_small_polished %>%
    runPCA(subset_row=variable_genes)

pbmc_small_pca

## ----pc_plot------------------------------------------------------------------
# Create pairs plot with GGally
pbmc_small_pca %>%
    as_tibble() %>%
    select(contains("PC"), everything()) %>%
    GGally::ggpairs(columns=1:5, ggplot2::aes(colour=groups)) +
    custom_theme

## ----cluster------------------------------------------------------------------
pbmc_small_cluster <- pbmc_small_pca

# Assign clusters to the 'colLabels' of the SingleCellExperiment object
colLabels(pbmc_small_cluster) <-
    pbmc_small_pca %>%
    buildSNNGraph(use.dimred="PCA") %>%
    igraph::cluster_walktrap() %$%
    membership %>%
    as.factor()

# Reorder columns
pbmc_small_cluster %>% select(label, everything())

## ----cluster count------------------------------------------------------------
# Count number of cells for each cluster per group
pbmc_small_cluster %>%
    tidySingleCellExperiment::count(groups, label)

## -----------------------------------------------------------------------------
# Identify top 10 markers per cluster
marker_genes <-
    pbmc_small_cluster %>%
    findMarkers(groups=pbmc_small_cluster$label) %>%
    as.list() %>%
    map(~ .x %>%
        head(10) %>%
        rownames()) %>%
    unlist()

# Plot heatmap
pbmc_small_cluster %>%
    join_transcripts(transcripts=marker_genes) %>%
    group_by(label) %>%
    heatmap(transcript, cell, abundance_counts, .scale="column")

## ----umap---------------------------------------------------------------------
pbmc_small_UMAP <-
    pbmc_small_cluster %>%
    runUMAP(ncomponents=3)

## ----umap plot, eval=FALSE----------------------------------------------------
#  pbmc_small_UMAP %>%
#      plot_ly(
#          x=~`UMAP1`,
#          y=~`UMAP2`,
#          z=~`UMAP3`,
#          color=~label,
#          colors=friendly_cols[1:4]
#      )

## ----eval=FALSE---------------------------------------------------------------
#  # Get cell type reference data
#  blueprint <- celldex::BlueprintEncodeData()
#  
#  # Infer cell identities
#  cell_type_df <-
#  
#      assays(pbmc_small_UMAP)$logcounts %>%
#      Matrix::Matrix(sparse = TRUE) %>%
#      SingleR::SingleR(
#          ref = blueprint,
#          labels = blueprint$label.main,
#          method = "single"
#      ) %>%
#      as.data.frame() %>%
#      as_tibble(rownames="cell") %>%
#      select(cell, first.labels)

## -----------------------------------------------------------------------------
# Join UMAP and cell type info
pbmc_small_cell_type <-
    pbmc_small_UMAP %>%
    left_join(cell_type_df, by="cell")

# Reorder columns
pbmc_small_cell_type %>%
    tidySingleCellExperiment::select(cell, first.labels, everything())

## -----------------------------------------------------------------------------
# Count number of cells for each cell type per cluster
pbmc_small_cell_type %>%
    count(label, first.labels)

## -----------------------------------------------------------------------------
pbmc_small_cell_type %>%

    # Reshape and add classifier column
    pivot_longer(
        cols=c(label, first.labels),
        names_to="classifier", values_to="label"
    ) %>%

    # UMAP plots for cell type and cluster
    ggplot(aes(UMAP1, UMAP2, color=label)) +
    geom_point() +
    facet_wrap(~classifier) +
    custom_theme

## -----------------------------------------------------------------------------
pbmc_small_cell_type %>%

    # Add some mitochondrial abundance values
    mutate(mitochondrial=rnorm(dplyr::n())) %>%

    # Plot correlation
    join_transcripts(transcripts=c("CST3", "LYZ"), shape="wide") %>%
    ggplot(aes(CST3 + 1, LYZ + 1, color=groups, size=mitochondrial)) +
    geom_point() +
    facet_wrap(~first.labels, scales="free") +
    scale_x_log10() +
    scale_y_log10() +
    custom_theme

## -----------------------------------------------------------------------------
pbmc_small_nested <-
    pbmc_small_cell_type %>%
    filter(first.labels != "Erythrocytes") %>%
    mutate(cell_class=dplyr::if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
    nest(data=-cell_class)

pbmc_small_nested

## ----warning=FALSE------------------------------------------------------------
pbmc_small_nested_reanalysed <-
    pbmc_small_nested %>%
    mutate(data=map(
        data, ~ {
            .x <- runPCA(.x, subset_row=variable_genes)

            variable_genes <-
                .x %>%
                modelGeneVar() %>%
                getTopHVGs(prop=0.3)

            colLabels(.x) <-
                .x %>%
                buildSNNGraph(use.dimred="PCA") %>%
                igraph::cluster_walktrap() %$%
                membership %>%
                as.factor()

            .x %>% runUMAP(ncomponents=3)
        }
    ))

pbmc_small_nested_reanalysed

## -----------------------------------------------------------------------------
pbmc_small_nested_reanalysed %>%

    # Convert to tibble otherwise SingleCellExperiment drops reduced dimensions when unifying data sets.
    mutate(data=map(data, ~ .x %>% as_tibble())) %>%
    unnest(data) %>%

    # Define unique clusters
    unite("cluster", c(cell_class, label), remove=FALSE) %>%

    # Plotting
    ggplot(aes(UMAP1, UMAP2, color=cluster)) +
    geom_point() +
    facet_wrap(~cell_class) +
    custom_theme

## ---- eval=FALSE--------------------------------------------------------------
#  pbmc_small_nested_interactions <-
#      pbmc_small_nested_reanalysed %>%
#  
#      # Unnest based on cell category
#      unnest(data) %>%
#  
#      # Create unambiguous clusters
#      mutate(integrated_clusters=first.labels %>% as.factor() %>% as.integer()) %>%
#  
#      # Nest based on sample
#      tidySingleCellExperiment::nest(data=-sample) %>%
#      tidySingleCellExperiment::mutate(interactions=map(data, ~ {
#  
#          # Produce variables. Yuck!
#          cluster <- colData(.x)$integrated_clusters
#          data <- data.frame(assays(.x) %>% as.list() %>% .[[1]] %>% as.matrix())
#  
#          # Ligand/Receptor analysis using SingleCellSignalR
#          data %>%
#              cell_signaling(genes=rownames(data), cluster=cluster) %>%
#              inter_network(data=data, signal=., genes=rownames(data), cluster=cluster) %$%
#              `individual-networks` %>%
#              map_dfr(~ bind_rows(as_tibble(.x)))
#      }))
#  
#  pbmc_small_nested_interactions %>%
#      select(-data) %>%
#      unnest(interactions)

## -----------------------------------------------------------------------------
tidySingleCellExperiment::pbmc_small_nested_interactions

## -----------------------------------------------------------------------------
sessionInfo()

Try the tidySingleCellExperiment package in your browser

Any scripts or data that you put into this service are public.

tidySingleCellExperiment documentation built on Nov. 8, 2020, 6:54 p.m.