Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----getPackage, eval=FALSE---------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("cicero")
## ---- eval = FALSE------------------------------------------------------------
# BiocManager::install(cole-trapnell-lab/cicero)
## ----Load, message=FALSE------------------------------------------------------
library(cicero)
## -----------------------------------------------------------------------------
data(cicero_data)
## ---- eval=TRUE---------------------------------------------------------------
input_cds <- make_atac_cds(cicero_data, binarize = TRUE)
## ---- eval=TRUE---------------------------------------------------------------
set.seed(2017)
input_cds <- detectGenes(input_cds)
input_cds <- estimateSizeFactors(input_cds)
input_cds <- reduceDimension(input_cds, max_components = 2, num_dim=6,
reduction_method = 'tSNE', norm_method = "none")
## ---- eval=TRUE---------------------------------------------------------------
tsne_coords <- t(reducedDimA(input_cds))
row.names(tsne_coords) <- row.names(pData(input_cds))
cicero_cds <- make_cicero_cds(input_cds, reduced_coordinates = tsne_coords)
## ---- eval=TRUE---------------------------------------------------------------
data("human.hg19.genome")
sample_genome <- subset(human.hg19.genome, V1 == "chr18")
sample_genome$V2[1] <- 10000000
conns <- run_cicero(cicero_cds, sample_genome, sample_num = 2) # Takes a few minutes to run
head(conns)
## ---- fig.width = 7, fig.height = 4, fig.align='center', eval=TRUE------------
data(gene_annotation_sample)
plot_connections(conns, "chr18", 8575097, 8839855,
gene_model = gene_annotation_sample,
coaccess_cutoff = .25,
connection_width = .5,
collapseTranscripts = "longest" )
## ---- eval=TRUE---------------------------------------------------------------
chia_conns <- data.frame(Peak1 = c("chr18_10000_10200", "chr18_10000_10200",
"chr18_49500_49600"),
Peak2 = c("chr18_10600_10700", "chr18_111700_111800",
"chr18_10600_10700"))
conns$in_chia <- compare_connections(conns, chia_conns)
## ---- eval=TRUE---------------------------------------------------------------
conns$in_chia_100 <- compare_connections(conns, chia_conns, maxgap=100)
head(conns)
## ---- eval=TRUE---------------------------------------------------------------
# Add a column of 1s called "coaccess"
chia_conns <- data.frame(Peak1 = c("chr18_10000_10200", "chr18_10000_10200",
"chr18_49500_49600"),
Peak2 = c("chr18_10600_10700", "chr18_111700_111800",
"chr18_10600_10700"),
coaccess = c(1, 1, 1))
plot_connections(conns, "chr18", 10000, 112367,
gene_model = gene_annotation_sample,
coaccess_cutoff = 0,
connection_width = .5,
comparison_track = chia_conns,
include_axis_track = FALSE,
collapseTranscripts = "longest")
## ---- eval=TRUE---------------------------------------------------------------
CCAN_assigns <- generate_ccans(conns)
head(CCAN_assigns)
## ---- eval=TRUE---------------------------------------------------------------
#### Add a column for the pData table indicating the gene if a peak is a promoter ####
# Create a gene annotation set that only marks the transcription start sites of
# the genes. We use this as a proxy for promoters.
# To do this we need the first exon of each transcript
pos <- subset(gene_annotation_sample, strand == "+")
pos <- pos[order(pos$start),]
pos <- pos[!duplicated(pos$transcript),] # remove all but the first exons per transcript
pos$end <- pos$start + 1 # make a 1 base pair marker of the TSS
neg <- subset(gene_annotation_sample, strand == "-")
neg <- neg[order(neg$start, decreasing = TRUE),]
neg <- neg[!duplicated(neg$transcript),] # remove all but the first exons per transcript
neg$start <- neg$end - 1
gene_annotation_sub <- rbind(pos, neg)
# Make a subset of the TSS annotation columns containing just the coordinates
# and the gene name
gene_annotation_sub <- gene_annotation_sub[,c(1:3, 8)]
# Rename the gene symbol column to "gene"
names(gene_annotation_sub)[4] <- "gene"
input_cds <- annotate_cds_by_site(input_cds, gene_annotation_sub)
head(fData(input_cds))
#### Generate gene activity scores ####
# generate unnormalized gene activity matrix
unnorm_ga <- build_gene_activity_matrix(input_cds, conns)
# remove any rows/columns with all zeroes
unnorm_ga <- unnorm_ga[!Matrix::rowSums(unnorm_ga) == 0, !Matrix::colSums(unnorm_ga) == 0]
# make a list of num_genes_expressed
num_genes <- pData(input_cds)$num_genes_expressed
names(num_genes) <- row.names(pData(input_cds))
# normalize
cicero_gene_activities <- normalize_gene_activities(unnorm_ga, num_genes)
# if you had two datasets to normalize, you would pass both:
# num_genes should then include all cells from both sets
unnorm_ga2 <- unnorm_ga
cicero_gene_activities <- normalize_gene_activities(list(unnorm_ga, unnorm_ga2), num_genes)
## ---- eval=TRUE---------------------------------------------------------------
data("cicero_data")
input_cds <- make_atac_cds(cicero_data)
# Add some cell meta-data
data("cell_data")
pData(input_cds) <- cbind(pData(input_cds), cell_data[row.names(pData(input_cds)),])
pData(input_cds)$cell <- NULL
agg_cds <- aggregate_nearby_peaks(input_cds, distance = 10000)
agg_cds <- detectGenes(agg_cds)
agg_cds <- estimateSizeFactors(agg_cds)
agg_cds <- estimateDispersions(agg_cds)
## ---- eval=TRUE---------------------------------------------------------------
# This takes a few minutes to run
diff_timepoint <- differentialGeneTest(agg_cds,
fullModelFormulaStr="~timepoint + num_genes_expressed")
# We chose a very high q-value cutoff because there are
# so few sites in the sample dataset, in general a q-value
# cutoff in the range of 0.01 to 0.1 would be appropriate
ordering_sites <- row.names(subset(diff_timepoint, qval < .5))
length(ordering_sites)
## ---- fig.show='hold', eval=FALSE---------------------------------------------
# plot_pc_variance_explained(agg_cds, return_all = FALSE) #Choose 2 PCs
# agg_cds <- reduceDimension(agg_cds,
# max_components = 2,
# norm_method = 'log',
# num_dim = 2,
# reduction_method = 'tSNE',
# verbose = TRUE)
#
# agg_cds <- clusterCells(agg_cds, verbose = FALSE)
#
# plot_cell_clusters(agg_cds, color_by = 'as.factor(Cluster)') + theme(text = element_text(size=8))
# clustering_DA_sites <- differentialGeneTest(agg_cds[1:10,], #Subset for example only
# fullModelFormulaStr = '~Cluster')
#
# # Not run because using Option 1 to continue
# # ordering_sites <-
# # row.names(clustering_DA_sites)[order(clustering_DA_sites$qval)][1:1000]
#
## ---- eval=TRUE---------------------------------------------------------------
agg_cds <- setOrderingFilter(agg_cds, ordering_sites)
## ---- fig.align='center', fig.height=4, fig.width=4, eval=TRUE----------------
agg_cds <- reduceDimension(agg_cds, max_components = 2,
residualModelFormulaStr="~num_genes_expressed",
reduction_method = 'DDRTree')
agg_cds <- orderCells(agg_cds)
plot_cell_trajectory(agg_cds, color_by = "timepoint")
## ---- fig.align='center', fig.height=4, fig.width=4, eval=TRUE----------------
plot_cell_trajectory(agg_cds, color_by = "State")
## ---- fig.align='center', fig.height=4, fig.width=4, eval=TRUE----------------
agg_cds <- orderCells(agg_cds, root_state = 4)
plot_cell_trajectory(agg_cds, color_by = "Pseudotime")
## ---- fig.align='center', fig.height=4, fig.width=4, eval=TRUE----------------
pData(input_cds)$Pseudotime <- pData(agg_cds)[colnames(input_cds),]$Pseudotime
pData(input_cds)$State <- pData(agg_cds)[colnames(input_cds),]$State
## ---- fig.width = 3, fig.height = 4, fig.align='center', eval=TRUE------------
input_cds_lin <- input_cds[,row.names(subset(pData(input_cds), State != 5))]
plot_accessibility_in_pseudotime(input_cds_lin[c("chr18_38156577_38158261",
"chr18_48373358_48374180",
"chr18_60457956_60459080")])
## ---- eval=TRUE---------------------------------------------------------------
pData(input_cds_lin)$cell_subtype <- cut(pData(input_cds_lin)$Pseudotime, 10)
binned_input_lin <- aggregate_by_cell_bin(input_cds_lin, "cell_subtype")
## ---- eval=TRUE---------------------------------------------------------------
diff_test_res <- differentialGeneTest(binned_input_lin[1:10,], #Subset for example only
fullModelFormulaStr="~sm.ns(Pseudotime, df=3) + sm.ns(num_genes_expressed, df=3)",
reducedModelFormulaStr="~sm.ns(num_genes_expressed, df=3)", cores=1)
head(diff_test_res)
## -----------------------------------------------------------------------------
citation("cicero")
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.