inst/doc/manuscript_transcriptional_signatures.R

## ---- echo=FALSE, include=FALSE-----------------------------------------------
library(knitr)
#library(kableExtra)
knitr::opts_chunk$set(cache = TRUE, warning = FALSE,
                      message = FALSE, cache.lazy = FALSE)
#options(width = 120)
options(pillar.min_title_chars = Inf)

library(magrittr)
library(tibble)
library(dplyr)
library(magrittr)
library(tidyr)
library(ggplot2)
library(rlang)
library(purrr)
library(tidybulk)

my_theme = 	
	theme_bw() +
	theme(
		panel.border = element_blank(),
		axis.line = element_line(),
		panel.grid.major = element_line(size = 0.2),
		panel.grid.minor = element_line(size = 0.1),
		text = element_text(size=12),
		legend.position="bottom",
		aspect.ratio=1,
		strip.background = element_blank(),
		axis.title.x  = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
		axis.title.y  = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10))
	)


## ---- eval=FALSE,echo=FALSE, include=FALSE------------------------------------
#  load("../dev/counts_cell_type.rda")
#  options(tidybulk_do_validate = FALSE)

## ---- eval = FALSE------------------------------------------------------------
#  counts_scaled =
#    counts_cell_type %>%
#  	
#  	# Convert to tidybulk tibble
#    tidybulk(sample, symbol, count) %>%
#  	
#  	# Preprocess and scale the data
#    aggregate_duplicates() %>%
#    identify_abundant() %>%
#    scale_abundance() %>%
#  	
#  	# Impute missing sample-transcript pairs
#  	impute_missing_abundance(~cell_type) %>%
#  	mutate(.abundant = TRUE)
#  

## ---- eval = FALSE------------------------------------------------------------
#  counts_non_red =
#    counts_scaled %>%
#  	
#  	# Perform operation for each cell type
#  	nest(data = -cell_type) %>%
#  	mutate(data = map(
#  		data,
#  		~ .x %>%
#  			remove_redundancy(
#  		    method="correlation",
#  		    correlation_threshold = 0.99,
#  		    top=1000
#  		 )
#  	)) %>%
#  	unnest(data)
#  
#  

## ---- eval = FALSE------------------------------------------------------------
#  # Select genes that are in at least one sample for all cell types
#  gene_all =
#      counts_non_red %>%
#  		distinct(symbol, cell_type) %>%
#      count(symbol) %>%
#      filter(n == max(n))
#  
#  # filter dataset and impute missing transcripts-samples pairs
#  counts_non_red_common =
#      counts_non_red %>%
#      inner_join(gene_all)
#  
#  
#  

## ---- eval = FALSE------------------------------------------------------------
#  counts_non_red_common %>%
#    reduce_dimensions(method = "tSNE", action="get") %>%
#    ggplot(aes(x = `tSNE1`, y = `tSNE2`, color = cell_type)) +
#    geom_point(size =2)
#  

## ---- echo=F------------------------------------------------------------------

# saveRDS(counts_non_red_common, "dev/counts_non_red_common.rds", compress = "xz")

tidybulk::vignette_manuscript_signature_tsne %>%
  ggplot(aes(x = `tSNE1`, y = `tSNE2`, color = cell_type)) +
  geom_point(size =2)

## ---- eval = FALSE------------------------------------------------------------
#  markers =
#  
#    # Define all-versus-all cell type permutations
#    counts_non_red_common %>%
#    distinct(cell_type) %>%
#    pull(cell_type) %>%
#    gtools::permutations(n = length(.), r = 2, v = .) %>%
#    as_tibble() %>%
#    setNames(c("cell_type1", "cell_type2")) %>%
#    mutate(contrast = sprintf("cell_type%s - cell_type%s", cell_type1, cell_type2)) %>%
#  
#    # Rank marker genes
#    mutate(de =
#      pmap(
#        list(cell_type1, cell_type2, contrast),
#        ~   counts_non_red_common %>%
#          filter(cell_type %in% c(..1, ..2)) %>%
#          test_differential_abundance(~ 0 + cell_type, .contrasts = ..3, fill_missing_values = TRUE, action="get", omit_contrast_in_colnames = T) %>%
#          filter(logFC > 0) %>%
#          arrange(FDR) %>%
#         rowid_to_column(var = "i")
#      )) %>%
#    unnest(de)
#  

## ---- eval = FALSE------------------------------------------------------------
#  markers %>%
#  
#      # Filter best markers for monocytes
#      filter(cell_type1=="monocyte" & i==1) %>%
#  
#      # Prettify contrasts for plotting
#      unite(pair, c("cell_type1", "cell_type2"), remove = FALSE, sep = "\n") %>%
#  
#      # Reshape
#      gather(which, cell_type, cell_type1, cell_type2) %>%
#      distinct(pair,  symbol,   which, cell_type) %>%
#  
#      # Attach counts
#      left_join(counts_non_red) %>%
#  
#      # Plot
#      ggplot(aes(y = count_scaled + 1, x = cell_type, fill = cell_type)) +
#      geom_boxplot() +
#      facet_wrap(~pair+ symbol, scales ="free_x", nrow = 2) +
#      scale_y_log10()
#  
#  

## ---- echo=F------------------------------------------------------------------
# saveRDS(markers, "dev/vignette_markers.rds", compress = "xz")

tidybulk::vignette_manuscript_signature_boxplot  %>%

    # Plot
    ggplot(aes(y = count_scaled + 1, x = cell_type, fill = cell_type)) +
    geom_boxplot() +
    facet_wrap(~pair+ symbol, scales ="free_x", nrow = 2) +
    scale_y_log10()



## ---- eval = FALSE------------------------------------------------------------
#  markers %>%
#  
#    # Select first 5 markers from each cell-type pair
#    filter(i <= 5) %>%
#    unite(pair, c("cell_type1", "cell_type2"), remove = FALSE, sep = "\n") %>%
#  
#    # Reshape
#    gather(which, cell_type, cell_type1, cell_type2) %>%
#    distinct(symbol) %>%
#  
#    # Attach counts
#    left_join(counts_non_red, by = c("symbol"))  %>%
#  
#    # Plot
#    reduce_dimensions(sample, symbol, count_scaled, method = "tSNE", action="get") %>%
#    pivot_sample(sample) %>%
#    ggplot(aes(x = `tSNE1`, y = `tSNE2`, color = cell_type)) +
#    geom_point(size =2)
#  

## ---- echo=F------------------------------------------------------------------
tidybulk::vignette_manuscript_signature_tsne2   %>%

  pivot_sample(sample) %>%
  ggplot(aes(x = `tSNE1`, y = `tSNE2`, color = cell_type)) +
  geom_point(size =2) 

Try the tidybulk package in your browser

Any scripts or data that you put into this service are public.

tidybulk documentation built on April 7, 2021, 6 p.m.