knitr::opts_chunk$set(echo = T, warning = F, message = F, cache = T, results = 'hide')
library(spacexr) library(Matrix) library(devtools) library(ggplot2) library(ggpubr) library(reshape2) library(dplyr) library(ggrepel) library(fields) library(stringr) library(GSA) load_all() # Load in spatialRNA data and Reference data pwd = getwd() datadir <- paste0('../../../slideseq/Cell Demixing/ContentStructure/DEGLAM/data/tumor','/') resultsdir <- paste0('../../../slideseq/Cell Demixing/ContentStructure/DEGLAM/results/ResultsTumor','/') myRCTD = readRDS(paste0(resultsdir,'myRCTDde.rds')) cell_types = c("CAF","hepatocyte 2","vascular smooth mc") cell_types_present = c("CAF","hepatocyte 2","vascular smooth mc", 'monocyte/DC') myRCTD@de_results$gene_fits$s_mat <- myRCTD@de_results$gene_fits$I_mat de_results = myRCTD@de_results gene_fits <- de_results$gene_fits cell_type <- 'CAF' gene_list_type <- get_gene_list_type_wrapper(myRCTD, cell_type, cell_types_present) res_genes <- find_sig_genes_individual(cell_type, cell_types, myRCTD@de_results$gene_fits, gene_list_type,myRCTD@internal_vars_de$X2, fdr = 0.01, p_thresh = 0.001, log_fc_thresh = 0.4)$sig_genes my_genes <- gene_list_type[grep("^(Rps|Rpl|mt-)",gene_list_type)] my_genes <- intersect(rownames(res_genes),my_genes) my_beta <- myRCTD@internal_vars_de$my_beta cell_type <- 'CAF' barcodes_sing <- names(which(my_beta[myRCTD@internal_vars_de$all_barc,cell_type] > 0.999)) big_sing <- intersect(barcodes_sing,names(which(myRCTD@internal_vars_de$X2[,2] > 0.5))) sm_sing <- intersect(barcodes_sing,names(which(myRCTD@internal_vars_de$X2[,2] < 0.5))) Y <- colSums(myRCTD@spatialRNA@counts[my_genes,]) Yn <- Y / myRCTD@spatialRNA@nUMI p <- plot_puck_continuous(myRCTD@spatialRNA, barcodes_sing, Y, ylimit= c(0,100)) p <- plot_puck_continuous(myRCTD@spatialRNA, barcodes_sing, Yn, ylimit= c(0,0.05)) mean(Y[big_sing]) mean(Y[sm_sing]) mean(Yn[big_sing]) mean(Yn[sm_sing]) mean(myRCTD@spatialRNA@nUMI[big_sing]) mean(myRCTD@spatialRNA@nUMI[sm_sing]) gene_list_type <- gene_list_type[-grep("^(Rps|Rpl|mt-)",gene_list_type)] res_genes <- res_genes[intersect(rownames(res_genes),gene_list_type),] dim(res_genes)
my_barc <- rownames(myRCTD@results$results_df)[which((myRCTD@results$results_df$first_type == 'monocyte/DC' & myRCTD@results$results_df$spot_class != 'reject') | (myRCTD@results$results_df$second_type == 'monocyte/DC' & myRCTD@results$results_df$spot_class == 'doublet_certain' ))] p1 <- plot_puck_continuous(myRCTD@spatialRNA, colnames(myRCTD@spatialRNA@counts) , myRCTD@internal_vars_de$X2[,2], title ='') + geom_point(data = myRCTD@spatialRNA@coords[my_barc,], size = 0.5, color = "#D55E00") + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ scale_y_continuous(limits = c(1100,3750)) + scale_x_continuous( limits = c(2300,4800))+ geom_segment(aes(x = 2400, y = 1300, xend = 2784.6, yend = 1300), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()) + scale_colour_gradientn("Myeloid Density",breaks = c(0,1), labels = c(0,1), colors = pals::brewer.blues(20)[2:20], lim = c(0,1)) results_df <- myRCTD@results$results_df puck <- myRCTD@spatialRNA barcodes = rownames(results_df[results_df$spot_class != "reject" & puck@nUMI >= 100,]) my_table = puck@coords[barcodes,] my_table$class = results_df[barcodes,]$first_type n_levels = myRCTD@cell_type_info$info[[3]] my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)] names(my_pal) = myRCTD@cell_type_info$info[[2]] my_pal_curr <- my_pal my_pal_curr["vascular smooth mc"] <- "#CC79A7" my_pal_curr["LSEC"] <- "#E69F00" my_pal_curr["monocyte/DC"] <- "#D55E00" my_pal_curr["CAF"] <- "#009E73" my_pal_curr["hepatocyte 2"] <- "#0072B2" pres = unique(as.integer(my_table$class)) pres = pres[order(pres)] p2 <- ggplot2::ggplot(my_table, ggplot2::aes(x=x, y=y)) + ggplot2::geom_point(ggplot2::aes(size = .4, shape=19,color=class)) + ggplot2::scale_color_manual("",values = my_pal_curr[pres], breaks = c('CAF','monocyte/DC','hepatocyte 2','vascular smooth mc', 'LSEC'), labels = c('Cancer cells','Myeloid cells','Hepatocytes','Vascular Smooth MC', 'LSEC'))+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2)))+ scale_y_continuous(limits = c(1100,3750)) + scale_x_continuous( limits = c(2300,4800))+ geom_segment(aes(x = 2400, y = 1300, xend = 2784.6, yend = 1300), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank()) ggarrange(p1,p2,nrow = 2)
# Get a list of the overexpressed and under expressed genes in CAF cells over_genes <- tolower(rownames(res_genes[res_genes$log_fc > 0,])) under_genes <- tolower(rownames(res_genes[res_genes$log_fc < 0,])) # Load hallmark gene sets and change the genes to all lowercase so it's easier to work with. gene_sets = GSA.read.gmt(file.path(datadir,'hallmark_genesets.gmt')) length(intersect(Reduce(union,gene_sets), tolower(rownames(myRCTD@cell_type_info$info[[1]])))) / length(Reduce(union,gene_sets)) #check for overlap of genes gene_set_names = gene_sets$geneset.names gene_set_descriptions = gene_sets$geneset.descriptions gene_sets = gene_sets$genesets names(gene_sets)=gene_set_names gene_sets = lapply(gene_sets, tolower) n_sets = length(gene_sets) #### new GSA NUM_GENE_THRESH <- 5 Q_THRESH <- 0.05 N_samp <- 100000 log_fc_thresh <- 0.4 data_res <- matrix(0, n_sets, 3) for(i in 1:n_sets) { my_genes <- gene_list_type[tolower(gene_list_type) %in% gene_sets[[i]]] my_genes_big <- names(which(abs(myRCTD@de_results$gene_fits$mean_val[my_genes,'CAF']) > log_fc_thresh)) my_Z_big <- (myRCTD@de_results$gene_fits$mean_val[my_genes_big,'CAF'] / myRCTD@de_results$gene_fits$I_mat[my_genes_big,2]) Z_mean <- mean(my_Z_big) n <- length(my_Z_big) my_genes <- gene_list_type[tolower(gene_list_type) %in% Reduce(union, gene_sets)] my_genes_big <- names(which(abs(myRCTD@de_results$gene_fits$mean_val[my_genes,'CAF']) > log_fc_thresh)) my_Z_big <- (myRCTD@de_results$gene_fits$mean_val[my_genes_big,'CAF'] / myRCTD@de_results$gene_fits$I_mat[my_genes_big,2]) sample_res <- numeric(N_samp) for(j in 1:N_samp) sample_res[j] <- mean(sample(my_Z_big, n)) if(n > 0) { if(Z_mean > 0) p_val <- mean(Z_mean < sample_res)*2 else p_val <- mean(Z_mean > sample_res)*2 } else { p_val <- 1 Z_mean <- 0 } data_res[i,] <- c(n, p_val, Z_mean) } # NULL q_vals <- p.adjust(data_res[data_res[,1] >= NUM_GENE_THRESH,2], method = 'BH') sig_gene_sets <- which(data_res[,1] >= NUM_GENE_THRESH)[ which(q_vals < Q_THRESH)] final_df <- data.frame(data_res[sig_gene_sets,]) colnames(final_df) <- c('n', 'p', 'Z') final_df$name <- gene_set_names[sig_gene_sets] final_df$q <- q_vals[q_vals < Q_THRESH] saveRDS(final_df, file.path(resultsdir, 'gsa_results.rds')) final_df <- readRDS(file.path(resultsdir, 'gsa_results.rds'))
cell_type <- 'CAF' log_fc <- myRCTD@de_results$gene_fits$mean_val[gene_list_type,cell_type] cell_type_ind <- which(myRCTD@internal_vars_de$cell_types == cell_type)*2 z_score <- log_fc / myRCTD@de_results$gene_fits$I_mat[gene_list_type, cell_type_ind] p_vals <- 2*(1-pnorm(abs(z_score))) p_vals[p_vals == 0] <- 1e-16 plot_df <- data.frame(log_fc, -log(p_vals,10)) colnames(plot_df) <- c('log_fc','p_val') MAX_P_VAL <- -log(max(res_genes$p_val),10) plot_df$gene <- rownames(plot_df) #plot_df$label <- plot_df$gene #plot_df$label[!(plot_df$label %in% rownames(res_genes))] <- NA plot_df$sig <- (rownames(plot_df) %in% rownames(res_genes)) plot_df$label <- NA plot_df[c('Dpysl3', 'Col7a1','Col6a1','Col6a2','Lrp1','Inhba','Pmepa1','Timp3','Lrp1','Mgp','S100a13','Krt19','Krt79','S100a1','Ccl2','Spred1','Ecm1','Nfkb1','S100a10','S100a6','Krt18','Krt8','Krt14','Krtcap2'), 'label'] <- c('Dpysl3', 'Col7a1','Col6a1','Col6a2','Lrp1','Inhba','Pmepa1','Timp3','Lrp1','Mgp','S100a13','Krt19','Krt79','S100a1','Ccl2','Spred1','Ecm1','Nfkb1','S100a10','S100a6','Krt18','Krt8','Krt14','Krtcap2') plot_df$log_fc <- plot_df$log_fc*log(exp(1),2) plot_df$group <- 'reg' gene_sets = GSA.read.gmt(file.path(datadir,'hallmark_genesets.gmt')) gene_set_names = gene_sets$geneset.names gene_set_descriptions = gene_sets$geneset.descriptions gene_sets = gene_sets$genesets names(gene_sets)=gene_set_names gene_sets = lapply(gene_sets, tolower) my_genes <- gene_list_type[tolower(gene_list_type) %in% gene_sets[[30]]] my_gl <- intersect(my_genes,rownames(res_genes)) plot_df[rownames(plot_df) %in% my_gl, 'group'] <- 'emt' p <- ggplot(plot_df, aes(x=log_fc, y = p_val, color = group, alpha = sig)) + geom_point() + theme_classic() + geom_vline(xintercept = 0.4*log(exp(1),2), linetype = 'dotted') + geom_vline(xintercept = -0.4*log(exp(1),2), linetype = 'dotted') + geom_label_repel(data = plot_df,aes(label = label),nudge_x = 0,na.rm = TRUE, show.legend = F, size = 4, label.padding = 0.1, max.overlaps = 15)+ xlab('Estimated cell type-specific DE by CSIDE') + ylab('CSIDE p-value') + scale_color_manual("", labels = c('EMT', 'Other'), values = c('#D55E00','black')) + xlab('Estimated cell type-specific DE by CSIDE') + ylab('CSIDE p-value') + scale_alpha_manual("", labels = c('Not significant', 'Significant'), values = c(0.2,1))#+ geom_label_repel(data = plot_df[plot_df$group == 'emt',],aes(label = label),nudge_x = 0,na.rm = TRUE, show.legend = F, size = 4, label.padding = 0.1, max.overlaps = 15) #plot_df[plot_df$group != 'emt',] p
emt_list <- my_gl X2 <- myRCTD@internal_vars_de$X2 gene_fits <- myRCTD@de_results$gene_fits all_barc <- myRCTD@internal_vars_de$all_barc my_beta <- myRCTD@internal_vars_de$my_beta puck <- myRCTD@spatialRNA cell_type <- 'CAF' barcodes_sing <- names(which(my_beta[all_barc,cell_type] > 0.999)) MULT = 500 density_thresh <- 0.2 barc_plot <- intersect(barcodes_sing,colnames(puck@counts)[puck@nUMI >= 200]) Y_plot <- MULT*colSums(puck@counts[emt_list,])/puck@nUMI ge_thresh <- 2.5 my_class <- rep(0,length(barc_plot)); names(my_class) <- barc_plot my_class[(X2[barc_plot,2] <= density_thresh) & (Y_plot[barc_plot] <= ge_thresh)] <- 1 my_class[(X2[barc_plot,2] <= density_thresh) & (Y_plot[barc_plot] > ge_thresh)] <- 3 my_class[(X2[barc_plot,2] > density_thresh) & (Y_plot[barc_plot] <= ge_thresh)] <- 2 my_class[(X2[barc_plot,2] > density_thresh) & (Y_plot[barc_plot] > ge_thresh)] <- 4 p3 <- plot_class(puck, barc_plot[order(my_class[barc_plot])], factor(my_class)) suppressMessages(p3 <- p3 + scale_color_manual(values=c("#CCE2EF","#F6DECC","#0072B2","#D55E00"))+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2)))+ scale_y_continuous(limits = c(1100,3750)) + scale_x_continuous( limits = c(2300,4800))+ geom_segment(aes(x = 2400, y = 1300, xend = 2784.6, yend = 1300), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())) p3
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.