In dleelab/leedonghyung: a set of useful R functions

Here, we used a version of IA-SVA (iasva2) using residuals as dependent variables in each iteration.

Load packages

rm(list = ls())
library(Seurat)
library(leedonghyung)
library(SummarizedExperiment)
library(irlba)
library(Rtsne)
library(RColorBrewer)
library(pheatmap)
library(corrplot)
library(tmod)
library(tagcloud)
library(limma)
library(VennDiagram)
library(ggrepel)
library(data.table)

# normalize function
normalize <- function (counts) 
{
  normfactor <- colSums(counts)
  return(t(t(counts)/normfactor)*median(normfactor))
}
color.vec <- c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "black", "#a65628", "#f781bf", "#999999")

Filter genes based on over-dispersion

path <- ("~/Dropbox/CZI_project/Public_Data/Kang_2017/")
name <- "Kang.2017.all.cells"

ctrl.data <- as.data.frame(fread(paste0("gzcat ", path,"immune_control_expression_matrix.txt.gz")))
dim(ctrl.data)
rownames(ctrl.data) <- ctrl.data$V1
ctrl.data$V1 <- NULL
dim(ctrl.data)
#head(ctrl.data[,1:5])

stim.data <- as.data.frame(fread(paste0("gzcat ", path,"immune_stimulated_expression_matrix.txt.gz")))
dim(stim.data)
rownames(stim.data) <- stim.data$V1
stim.data$V1 <- NULL
dim(stim.data)
#head(stim.data[,1:5])

# Set up control object
ctrl <- CreateSeuratObject(raw.data = ctrl.data, project = "IMMUNE_CTRL", min.cells = 5)
ctrl@meta.data$stim <- "CTRL"
ctrl <- FilterCells(ctrl, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf)
ctrl <- NormalizeData(ctrl)
ctrl <- ScaleData(ctrl, display.progress = F)
# Set up stimulated object
stim <- CreateSeuratObject(raw.data = stim.data, project = "IMMUNE_STIM", min.cells = 5)
stim@meta.data$stim <- "STIM"
stim <- FilterCells(stim, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf)
stim <- NormalizeData(stim)
stim <- ScaleData(stim, display.progress = F)

# Gene selection for input to CCA
ctrl <- FindVariableGenes(ctrl, do.plot = F)
stim <- FindVariableGenes(stim, do.plot = F)
g.1 <- head(rownames(ctrl@hvg.info), 2000)
g.2 <- head(rownames(stim@hvg.info), 2000)
genes.use <- union(g.1, g.2)

# combine data
ctrl.data.sel <- ctrl.data[genes.use, rownames(ctrl@meta.data)]
stim.data.sel <- stim.data[genes.use, rownames(stim@meta.data)]
all.counts <- cbind(ctrl.data.sel, stim.data.sel)
dim(all.counts)
# overdispersed genes
#saveRDS(all.counts, file = "immune_control_stimulated_filtered_counts.Rds")

Genes Detected

# load data
all.counts <- readRDS("~/Dropbox/Kang.2017/IASVA/By_Celltype/Kang.2017.by.cell.CD8 T.overdispersed_raw_counts.Rds")
dim(all.counts)

# load in seurat annotations
imm.comb <- readRDS("~/Dropbox/Kang.2017/Kang.2017.PBMC.stimulated.Seurat.immune.combined.Rds")
imm.meta <- imm.comb@meta.data
imm.meta <- cbind(imm.meta, imm.comb@ident)
colnames(imm.meta)[ncol(imm.meta)] <- "celltype"
dim(imm.meta)
all.meta <- imm.meta
imm.meta <- imm.meta[imm.meta$celltype == "CD8 T",]
table(rownames(imm.meta) == colnames(all.counts))

# calculate lib size
#pdf(file = paste(name, cell_interest, "IASVA.genes.detected.hist.pdf", sep = "."))
Num_Detected_Genes <- colSums(all.counts > 0)
hist(Num_Detected_Genes, col = "dodgerblue4", breaks = 20, ylab = "Number of Cells")
summ <- summary(Num_Detected_Genes)
legend("topright", legend = paste(names(summ), summ))
abline(v = 500, col = "red", lty = 2)
legend("right", legend = paste("< 500 genes = ", length(which(Num_Detected_Genes < 500)), " Cells\n",
                                  "> 500 genes = ", length(which(Num_Detected_Genes > 500)), " Cells", sep = ""))
#dev.off()

Known Factors

Here, the only known factors are stimulation, celltype, and library size

# make vector of condition
Condition <- factor(imm.meta$stim)
CellType <- factor(imm.meta$celltype)
# normalize counts
counts.filt <- normalize(all.counts)
Geo_Lib <- colSums(log(counts.filt+1))
Num_Detected_Genes <- colSums(all.counts > 0)
# categorize lib size
Geo_Lib_Cat <- cut(Num_Detected_Genes, breaks = 4)
summary(Geo_Lib)
barplot(Geo_Lib, xlab="Cell", las=2, ylab = "Geometric Library Size")               
lcounts <- log(counts.filt + 1)
lcounts <- as.matrix(lcounts)
# PC1 and Geometric library size correlation
pc1 = irlba(lcounts - rowMeans(lcounts), 1)$v[,1] ## partial SVD
cor(Geo_Lib, pc1)
cor(Num_Detected_Genes, pc1)
cor(as.numeric(Condition), pc1)
cor(Num_Detected_Genes, Geo_Lib)

T-SNE prior to IASVA analysis

#tsne.res <- Rtsne(unique(t(lcounts)), dims = 2)
#saveRDS(tsne.res, file = paste(name, "tsne.preIASVA.Rds", sep = "."))

# load tsne results
tsne.res <- readRDS("~/Dropbox/Kang.2017/IASVA/By_Celltype/Kang.2017.by.cell.CD8 T.tsne.preIASVA.Rds")

# by celltype
par(mfrow=c(2,2))
#par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE)
plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1",
     ylab="tSNE Dim2", pch=20, col=color.vec[CellType], 
     bg=color.vec[CellType], cex = 0.5)
#par(xpd=TRUE)
legend(50,20, levels(CellType), border="white", fill=color.vec, bty="n", title = "Cell Type")
# by condition
#par(mar=c(5, 4, 4, 2), xpd=FALSE)
plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1",
     ylab="tSNE Dim2", pch=20, col=color.vec[Condition], 
     bg=color.vec[Condition], oma=c(4,4,6,12), cex = 0.5)
legend("topleft", levels(Condition), border="white", fill=color.vec, bty="n", title = "Condition")
# by genes detect
plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1",
     ylab="tSNE Dim2", pch=20, col=color.vec[Geo_Lib_Cat], 
     bg=color.vec[Geo_Lib_Cat], oma=c(4,4,6,12), cex = 0.5)
legend("topleft", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n", title = "Genes Detected")

Run IASVA using num.sv = 10

# run iasva
mod <- model.matrix(~Num_Detected_Genes)
summ_exp <- SummarizedExperiment(assays = as.matrix(counts.filt))
pct_cutt <- 1
iasva.res<- fast_iasva2(summ_exp, mod[,-1, drop = F], verbose=FALSE, pct.cutoff = pct_cutt, num.sv = 10)
# load results
#iasva.res <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/IASVA/By_Celltype/Kang.2017.by.cell.CD8 T.IASVA.1.output.Rds")
iasva.sv <- iasva.res$sv
#saveRDS(iasva.res, file = paste(name, "IASVA", pct_cutt, "output.Rds", sep = "."))
#markers_pct1 <- find_markers(Y = summ_exp, iasva.sv = iasva.sv)
#markers_pct1 <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/IASVA/By_Celltype/Kang.2017.by.cell.CD8 T.IASVA.1.markers.Rds")

# revised find markers function
#source("/Users/lawlon/iasva_markers_update/R/find_markers.R")
#all_markers <- .GlobalEnv$find_markers(Y = summ_exp, iasva.sv = iasva.sv)
# all_markers <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/IASVA/All_Cells/Kang.2017.all.cells.IASVA.by.SV.markers.Rds")

#saveRDS(markers_pct1, file = paste(name, "IASVA", pct_cutt, "markers.Rds", sep = "."))
# tsne on markers
# set.seed(100)
# if (ncol(markers_pct1) > 100) {
#   perplex <- 30
# } else {
#   perplex <- round(ncol(markers_pct1)/4)
# }
# tsne.res.iasva <- Rtsne(unique(t(log(markers_pct1+1))), dims = 2, perplexity = perplex)
# saveRDS(tsne.res.iasva, file = paste(name, cell_interest, stim_cond, "IASVA", pct_cutt, "tsne.markers.Rds", sep = "."))

Visualize SVs

ncol(iasva.sv)
# if (ncol(iasva.sv) > 5) {
#   pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[CellType],
#       bg=color.vec[CellType], cex=0.5, oma=c(4,4,6,12))
#   legend("right", levels(CellType), border="white", fill=color.vec, bty="n")
#   pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[Condition],
#       bg=color.vec[Condition], cex=0.5, oma=c(4,4,6,12))
#   legend("right", levels(Condition), border="white", fill=color.vec, bty="n")
#   pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[Geo_Lib_Cat],
#       bg=color.vec[Geo_Lib_Cat], cex=0.5, oma=c(4,4,6,12))
#   legend("right", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n")
# } else {
  par(mar=c(5.1, 4.1, 4.1, 8.2), xpd=TRUE)
  pairs(iasva.sv, main="IA-SVA", pch=20, col=color.vec[Condition],
      bg=color.vec[Condition], cex=0.5, oma=c(4,4,6,12))
  legend("right", levels(Condition), border="white", fill=color.vec, bty="n")


# }

Use SVs separate cells. For example, SV1, SV4, SV8 are good candidates for further analysis.

Correlate SVs

iasva_vars <- cbind(iasva.sv, Geo_Lib, Condition)
corrplot(cor(iasva_vars))

Marker genes for SV1

SV1.marker.counts <- leedonghyung::find_markers(summ_exp, as.matrix(iasva.sv[,1]), rsq.cutoff=0.2)
dim(SV1.marker.counts)

anno.col <- data.frame(Condition = Condition, Genes_Detect = Geo_Lib_Cat, #CellType = CellType, 
                       SV1 = iasva.sv[,1]) #SV4= iasva.sv[,4])
rownames(anno.col) <- colnames(SV1.marker.counts)

pheatmap(log(SV1.marker.counts+1), show_colnames =FALSE, 
         clustering_method = "ward.D2",cutree_cols = 3,annotation_col = anno.col)
         #annotation_colors = anno.colors)

Marker genes for SV4

SV4.marker.counts <- leedonghyung::find_markers(summ_exp, as.matrix(iasva.sv[,4]), rsq.cutoff=0.2)
dim(SV4.marker.counts)

anno.col <- data.frame(Condition = Condition, Genes_Detect = Geo_Lib_Cat, #CellType = CellType, 
                       SV4 = iasva.sv[,4]) 
rownames(anno.col) <- colnames(SV4.marker.counts)

pheatmap(log(SV4.marker.counts+1), show_colnames =FALSE, 
         clustering_method = "ward.D2",cutree_cols = 3,annotation_col = anno.col)
         #annotation_colors = anno.colors)

Marker genes for SV8

SV8.marker.counts <- leedonghyung::find_markers(summ_exp, as.matrix(iasva.sv[,8]), rsq.cutoff=0.1)
dim(SV8.marker.counts)

anno.col <- data.frame(Condition = Condition, Genes_Detect = Geo_Lib_Cat, #CellType = CellType, 
                       SV8 = iasva.sv[,8]) 
rownames(anno.col) <- colnames(SV8.marker.counts)

pheatmap(log(SV8.marker.counts+1), show_colnames =FALSE, 
         clustering_method = "ward.D2",cutree_cols = 3,annotation_col = anno.col)
         #annotation_colors = anno.colors)

Marker genes for SV1 and SV4

SV1.SV4.marker.counts <- leedonghyung::find_markers(summ_exp, iasva.sv[,c(1,4)], rsq.cutoff=0.2)
dim(SV1.SV4.marker.counts)

anno.col <- data.frame(Condition = Condition, Genes_Detect = Geo_Lib_Cat, #CellType = CellType, 
                       SV1 = iasva.sv[,1], SV4= iasva.sv[,4])
rownames(anno.col) <- colnames(SV1.SV4.marker.counts)

pheatmap(log(SV1.SV4.marker.counts+1), show_colnames =FALSE, 
         clustering_method = "ward.D2",cutree_cols = 3,annotation_col = anno.col)
         #annotation_colors = anno.colors)

Modules enriched in each SV

all_markers <- find_markers2(summ_exp, iasva.sv[,c(1,4,8)], rsq.cutoff=0.1)

design <- model.matrix(~Condition)
fit <- eBayes(lmFit(counts.filt, design))
tt <- topTable(fit, coef=2, number=Inf, genelist = rownames(counts.filt))
head(tt, 10)

# iterative analysis of SVs an modules
all_sv_res <- list()
par(mfrow = c(3,1), mar = c(4,2,2,2))
for (j in 1:3) {
    sv_genes <- all_markers[[j]]
    # module analysis
    fg <- tt[sv_genes,]
    fg.ord <- fg[order(fg$logFC, decreasing = T),]
    res <- tmodHGtest(fg = fg.ord$ID, bg = tt$ID)
    res.ord <- res[order(res$E, decreasing = T),]
    # append results
    all_sv_res[[j]] <- res.ord
    names(all_sv_res)[j] <- names(all_markers)[j]
    if (nrow(res.ord) > 1) {
      # make plots
    bp1 <- barplot(res.ord$E, main = paste(names(all_markers)[j], " Module Enrichment", sep = ""), xlab = "", horiz = T,
                  cex.main = 2, cex.lab = 2)
    #barplot(res.ord$E, xlab = "Module Enrichment", main = paste(names(all_markers)[j], " Genes", sep = ""), horiz = T)
    par(xpd=TRUE)
    text(x = max(res.ord$E)/2, y = bp1[,1], labels = paste(res.ord$Title, " (", res$b, "/", res$B, ")", sep = ""), cex = 1, font = 2)
    }
}

Session Information

sessionInfo()

dleelab/leedonghyung documentation built on May 7, 2019, 8:43 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

dleelab/leedonghyung
a set of useful R functions

In dleelab/leedonghyung: a set of useful R functions

Load packages

Filter genes based on over-dispersion

Genes Detected

Known Factors

T-SNE prior to IASVA analysis

Run IASVA using num.sv = 10

Visualize SVs

Correlate SVs

Marker genes for SV1

Marker genes for SV4

Marker genes for SV8

Marker genes for SV1 and SV4

Modules enriched in each SV

Session Information

R Package Documentation

Browse R Packages

We want your feedback!

dleelab/leedonghyung a set of useful R functions

In dleelab/leedonghyung: a set of useful R functions

Load packages

Filter genes based on over-dispersion

Genes Detected

Known Factors

T-SNE prior to IASVA analysis

Run IASVA using num.sv = 10

Visualize SVs

Correlate SVs

Marker genes for SV1

Marker genes for SV4

Marker genes for SV8

Marker genes for SV1 and SV4

Modules enriched in each SV

Session Information

R Package Documentation

Browse R Packages

We want your feedback!

dleelab/leedonghyung
a set of useful R functions