Nothing
## ---- echo=FALSE, include=FALSE-----------------------------------------------
library(knitr)
knitr::opts_chunk$set(cache = TRUE, warning = FALSE,
message = FALSE, cache.lazy = FALSE)
library(dplyr)
library(tidyr)
library(tibble)
library(magrittr)
library(ggplot2)
library(ggrepel)
library(tidybulk)
my_theme =
theme_bw() +
theme(
panel.border = element_blank(),
axis.line = element_line(),
panel.grid.major = element_line(size = 0.2),
panel.grid.minor = element_line(size = 0.1),
text = element_text(size=12),
legend.position="bottom",
aspect.ratio=1,
strip.background = element_blank(),
axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10))
)
## -----------------------------------------------------------------------------
tt = counts_mini %>% tidybulk(sample, transcript, count)
## ----aggregate, cache=TRUE, message=FALSE, warning=FALSE, results='hide', class.source='yellow'----
tt.aggr = tt %>% aggregate_duplicates()
## ----aggregate long, eval=FALSE-----------------------------------------------
# temp = data.frame(
# symbol = dge_list$genes$symbol,
# dge_list$counts
# )
# dge_list.nr <- by(temp, temp$symbol,
# function(df)
# if(length(df[1,1])>0)
# matrixStats:::colSums(as.matrix(df[,-1]))
# )
# dge_list.nr <- do.call("rbind", dge_list.nr)
# colnames(dge_list.nr) <- colnames(dge_list)
## ----normalise, cache=TRUE----------------------------------------------------
tt.norm = tt.aggr %>% identify_abundant(factor_of_interest = condition) %>% scale_abundance()
## ----normalise long, eval=FALSE-----------------------------------------------
# library(edgeR)
#
# dgList <- DGEList(count_m=x,group=group)
# keep <- filterByExpr(dgList)
# dgList <- dgList[keep,,keep.lib.sizes=FALSE]
# [...]
# dgList <- calcNormFactors(dgList, method="TMM")
# norm_counts.table <- cpm(dgList)
## ----filter variable, cache=TRUE----------------------------------------------
tt.norm.variable = tt.norm %>% keep_variable()
## ----filter variable long, eval=FALSE-----------------------------------------
# library(edgeR)
#
# x = norm_counts.table
#
# s <- rowMeans((x-rowMeans(x))^2)
# o <- order(s,decreasing=TRUE)
# x <- x[o[1L:top],,drop=FALSE]
#
# norm_counts.table = norm_counts.table[rownames(x)]
#
# norm_counts.table$cell_type = tidybulk::counts[
# match(
# tidybulk::counts$sample,
# rownames(norm_counts.table)
# ),
# "Cell type"
# ]
## ----mds, cache=TRUE----------------------------------------------------------
tt.norm.MDS =
tt.norm %>%
reduce_dimensions(method="MDS", .dims = 2)
## ---- eval = FALSE------------------------------------------------------------
# library(limma)
#
# count_m_log = log(count_m + 1)
# cmds = limma::plotMDS(ndim = .dims, plot = FALSE)
#
# cmds = cmds %$%
# cmdscale.out %>%
# setNames(sprintf("Dim%s", 1:6))
#
# cmds$cell_type = tidybulk::counts[
# match(tidybulk::counts$sample, rownames(cmds)),
# "Cell type"
# ]
## ----pca, cache=TRUE, message=FALSE, warning=FALSE, results='hide'------------
tt.norm.PCA =
tt.norm %>%
reduce_dimensions(method="PCA", .dims = 2)
## ----eval=FALSE---------------------------------------------------------------
# count_m_log = log(count_m + 1)
# pc = count_m_log %>% prcomp(scale = TRUE)
# variance = pc$sdev^2
# variance = (variance / sum(variance))[1:6]
# pc$cell_type = counts[
# match(counts$sample, rownames(pc)),
# "Cell type"
# ]
## ----tsne, cache=TRUE, message=FALSE, warning=FALSE, results='hide'-----------
tt.norm.tSNE =
breast_tcga_mini %>%
tidybulk( sample, ens, count_scaled) %>%
identify_abundant() %>%
reduce_dimensions(
method = "tSNE",
perplexity=10,
pca_scale =TRUE
)
## ---- eval=FALSE--------------------------------------------------------------
# count_m_log = log(count_m + 1)
#
# tsne = Rtsne::Rtsne(
# t(count_m_log),
# perplexity=10,
# pca_scale =TRUE
# )$Y
# tsne$cell_type = tidybulk::counts[
# match(tidybulk::counts$sample, rownames(tsne)),
# "Cell type"
# ]
## ----rotate, cache=TRUE-------------------------------------------------------
tt.norm.MDS.rotated =
tt.norm.MDS %>%
rotate_dimensions(`Dim1`, `Dim2`, rotation_degrees = 45, action="get")
## ---- eval=FALSE--------------------------------------------------------------
# rotation = function(m, d) {
# r = d * pi / 180
# ((bind_rows(
# c(`1` = cos(r), `2` = -sin(r)),
# c(`1` = sin(r), `2` = cos(r))
# ) %>% as_matrix) %*% m)
# }
# mds_r = pca %>% rotation(rotation_degrees)
# mds_r$cell_type = counts[
# match(counts$sample, rownames(mds_r)),
# "Cell type"
# ]
## ----de, cache=TRUE, message=FALSE, warning=FALSE, results='hide'-------------
tt.de =
tt %>%
test_differential_abundance( ~ condition, action="get")
tt.de
## ---- eval=FALSE--------------------------------------------------------------
# library(edgeR)
#
# dgList <- DGEList(counts=counts_m,group=group)
# keep <- filterByExpr(dgList)
# dgList <- dgList[keep,,keep.lib.sizes=FALSE]
# dgList <- calcNormFactors(dgList)
# design <- model.matrix(~group)
# dgList <- estimateDisp(dgList,design)
# fit <- glmQLFit(dgList,design)
# qlf <- glmQLFTest(fit,coef=2)
# topTags(qlf, n=Inf)
## ----adjust, cache=TRUE, message=FALSE, warning=FALSE, results='hide'---------
tt.norm.adj =
tt.norm %>% adjust_abundance( ~ condition + time)
## ---- eval=FALSE--------------------------------------------------------------
# library(sva)
#
# count_m_log = log(count_m + 1)
#
# design =
# model.matrix(
# object = ~ condition + time,
# data = annotation
# )
#
# count_m_log.sva =
# ComBat(
# batch = design[,2],
# mod = design,
# ...
# )
#
# count_m_log.sva = ceiling(exp(count_m_log.sva) -1)
# count_m_log.sva$cell_type = counts[
# match(counts$sample, rownames(count_m_log.sva)),
# "Cell type"
# ]
#
## ----cibersort, cache=TRUE----------------------------------------------------
tt.cibersort =
tt %>%
deconvolve_cellularity(action="get", cores=1)
## ---- eval=FALSE--------------------------------------------------------------
#
# source(‘CIBERSORT.R’)
# count_m %>% write.table("mixture_file.txt")
# results <- CIBERSORT(
# "sig_matrix_file.txt",
# "mixture_file.txt",
# perm=100, QN=TRUE
# )
# results$cell_type = tidybulk::counts[
# match(tidybulk::counts$sample, rownames(results)),
# "Cell type"
# ]
#
## ----cluster, cache=TRUE------------------------------------------------------
tt.norm.cluster = tt.norm.MDS %>%
cluster_elements(method="kmeans", centers = 2, action="get" )
## ---- eval=FALSE--------------------------------------------------------------
# count_m_log = log(count_m + 1)
#
# k = kmeans(count_m_log, iter.max = 1000, ...)
# cluster = k$cluster
#
# cluster$cell_type = tidybulk::counts[
# match(tidybulk::counts$sample, rownames(cluster)),
# c("Cell type", "Dim1", "Dim2")
# ]
#
## ----SNN, cache=TRUE, message=FALSE, warning=FALSE, results='hide'------------
tt.norm.SNN =
tt.norm.tSNE %>%
cluster_elements(method = "SNN")
## ---- eval=FALSE--------------------------------------------------------------
# library(Seurat)
#
# snn = CreateSeuratObject(count_m)
# snn = ScaleData(
# snn, display.progress = TRUE,
# num.cores=4, do.par = TRUE
# )
# snn = FindVariableFeatures(snn, selection.method = "vst")
# snn = FindVariableFeatures(snn, selection.method = "vst")
# snn = RunPCA(snn, npcs = 30)
# snn = FindNeighbors(snn)
# snn = FindClusters(snn, method = "igraph", ...)
# snn = snn[["seurat_clusters"]]
#
# snn$cell_type = tidybulk::counts[
# match(tidybulk::counts$sample, rownames(snn)),
# c("Cell type", "Dim1", "Dim2")
# ]
#
## ----drop, cache=TRUE---------------------------------------------------------
tt.norm.non_redundant =
tt.norm.MDS %>%
remove_redundancy( method = "correlation" )
## ---- eval=FALSE--------------------------------------------------------------
# library(widyr)
#
# .data.correlated =
# pairwise_cor(
# counts,
# sample,
# transcript,
# rc,
# sort = TRUE,
# diag = FALSE,
# upper = FALSE
# ) %>%
# filter(correlation > correlation_threshold) %>%
# distinct(item1) %>%
# rename(!!.element := item1)
#
# # Return non redundant data frame
# counts %>% anti_join(.data.correlated) %>%
# spread(sample, rc, - transcript) %>%
# left_join(annotation)
#
#
#
## ----heat, eval=FALSE---------------------------------------------------------
# tt.norm.MDS %>%
#
# # filter lowly abundant
# keep_abundant() %>%
#
# # extract 500 most variable genes
# keep_variable( .abundance = count_scaled, top = 500) %>%
#
# # create heatmap
# as_tibble() %>%
# heatmap(sample, transcript, count_scaled, transform = log1p) %>%
# add_tile(`Cell type`)
## ---- eval=FALSE--------------------------------------------------------------
# # Example taken from airway dataset from BioC2020 workshop.
# dgList <- SE2DGEList(airway)
# group <- factor(dgList$samples$`Cell type`)
# keep.exprs <- filterByExpr(dgList, group=group)
# dgList <- dgList[keep.exprs,, keep.lib.sizes=FALSE]
# dgList <- calcNormFactors(dgList)
# logcounts <- cpm(dgList, log=TRUE)
# var_genes <- apply(logcounts, 1, var)
# select_var <- names(sort(var_genes, decreasing=TRUE))[1:500]
# highly_variable_lcpm <- logcounts[select_var,]
# colours <- c("#440154FF", "#21908CFF", "#fefada" )
# col.group <- c("red","grey")[group]
# gplots::heatmap.2(highly_variable_lcpm, col=colours, trace="none", ColSideColors=col.group, scale="row")
## ----density, eval=FALSE------------------------------------------------------
# # Example taken from airway dataset from BioC2020 workshop.
# airway %>%
# tidybulk() %>%
# identify_abundant() %>%
# scale_abundance() %>%
# pivot_longer(cols = starts_with("counts"), names_to = "source", values_to = "abundance") %>%
# filter(!lowly_abundant) %>%
# ggplot(aes(x=abundance + 1, color=sample)) +
# geom_density() +
# facet_wrap(~source) +
# scale_x_log10()
## ---- eval=FALSE--------------------------------------------------------------
# # Example taken from airway dataset from BioC2020 workshop.
# dgList <- SE2DGEList(airway)
# group <- factor(dgList$samples$dex)
# keep.exprs <- filterByExpr(dgList, group=group)
# dgList <- dgList[keep.exprs,, keep.lib.sizes=FALSE]
# dgList <- calcNormFactors(dgList)
# logcounts <- cpm(dgList, log=TRUE)
# var_genes <- apply(logcounts, 1, var)
# select_var <- names(sort(var_genes, decreasing=TRUE))[1:500]
# highly_variable_lcpm <- logcounts[select_var,]
# colours <- c("#440154FF", "#21908CFF", "#fefada" )
# col.group <- c("red","grey")[group]
# gplots::heatmap.2(highly_variable_lcpm, col=colours, trace="none", ColSideColors=col.group, scale="row")
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.