Using the position-developmental time relationship on the t-SNE plot we can define a pseudotime by fitting a principal line though the t-SNE plot and assigning each cell a position on this line based on which part of the line is closest. This approach is similar (the same?) to that used in SCUBA. We can then see how average component scores change over time and the consistency with gene expression changes over time - either by smoothed curves, or heatmaps.
library(testisAtlas) load2("../data/cache") load_component_orderings()
Low library size cells were removed prior to construction of the principal curve.
# create principal curve using Tsne (without somatic cells) # build up curve principal_curves <- testisAtlas::princurve_bootstrap(as.matrix(cell_data[somatic4==FALSE & V38<3, .(Tsne1_QC1, Tsne2_QC1)])) principal_curves[["df_8_unbootstrapped"]] <- princurve::principal.curve(as.matrix(cell_data[somatic4==FALSE & V38<3, .(Tsne1_QC1, Tsne2_QC1)]), df=8) saveRDS(principal_curves, "../data/cache/principal_curves.rds")
principal_curves <- readRDS("../data/cache/principal_curves.rds") # plot Principal curve on Tsne plot print_tsne("Prm2", curve = T, principal_curve = "df_4", curve_width = 1) + ggtitle("Principal Curve on tSNE, df=4") print_tsne("Prm2", curve = T, principal_curve = "df_8_unbootstrapped", curve_width = 1) + ggtitle("Principal Curve on tSNE, df=8, without bootstrap") print_tsne("Prm2", curve = T, principal_curve = "df_9", curve_width = 1) + ggtitle("Principal Curve on tSNE, bootstrap to df=9") print_tsne("Prm2", curve = T, principal_curve = "df_10", curve_width = 1) + ggtitle("Principal Curve on tSNE, bootstrap to df=10")
# Assign pseudotime based on position along principal line pseudotime <- cbind(principal_curves[["df_9"]]$s[principal_curves[["df_9"]]$tag, 1], principal_curves[["df_9"]]$s[principal_curves[["df_9"]]$tag, 2]) find_pseudotime <- function(sample.point){ which.min(colSums((t(pseudotime) - c(sample.point))^2)) } cell_pseudotime <- apply(as.matrix(cell_data[,.(Tsne1_QC1,Tsne2_QC1)]), 1, find_pseudotime) cell_data[,PseudoTime1 := pseudotime[cell_pseudotime,][,1]] cell_data[,PseudoTime2 := pseudotime[cell_pseudotime,][,2]] cell_data[,PseudoTime := cell_pseudotime] # points closer to line # ggplot(cell_data, aes(PseudoTime1, PseudoTime2, colour=sqrt(library_size))) + # geom_jitter(width=0.5, height=0.5, size=0.5) + scale_colour_distiller(palette="YlOrRd", direction=-1) + ggtitle("Cells moved towards point on principal Curve") cell_data[somatic4==TRUE, PseudoTime := NA] ggplot(cell_data, aes(Tsne1_QC1, Tsne2_QC1, color=PseudoTime)) + geom_point(size=0.25) + scale_color_viridis() + theme(legend.position = "bottom") + ggtitle("SDA t-SNE - cells coloured by PseudoTime") ggplot(cell_data, aes(Tsne1_QC2, Tsne2_QC2, color=PseudoTime)) + geom_point(size=0.25) + scale_color_viridis() + theme(legend.position = "bottom") + ggtitle("SDA t-SNE (differnt seed) - cells coloured by PseudoTime") ggplot(cell_data, aes(Tsne1, Tsne2, color=PseudoTime)) + geom_point(size=0.25) + scale_color_viridis() + theme(legend.position = "bottom") + ggtitle("Raw t-SNE - cells coloured by New PseudoTime")
The component scores and gene loadings show a cascade of expression programmes as spermatogenesis progresses.
tmp <- cell_data[,paste0("V",1:50), with=FALSE] names(tmp) <- component_order_dt$name tmp <- as.matrix(tmp)[order(-cell_data$PseudoTime,cell_data$hclust_group),component_order_dt[order(-pseudotime_average,name)]$name] colnames(tmp) <- paste0("V",component_order_dt[order(-pseudotime_average,name)]$component_number," ",colnames(tmp)) t=20 tmp[tmp>t] <- t tmp[tmp<(-t)] <- (-t) pdf("../results/scores_heatmap.pdf", width = 12, height = 9) draw(Heatmap(t(tmp[,!grepl("Single Cell",colnames(tmp))]), col = log_colour_scale(tmp, scale = 0.5), cluster_columns = F, cluster_rows = F, row_names_max_width = unit(10, "npc"), row_gap = unit(0, "mm"), border = "grey", row_split = c(1:45), heatmap_legend_param = list(title = "Cell Score", legend_direction="horizontal")), heatmap_legend_side="bottom") dev.off()
components_w_pseudotime <- paste0("V",component_order_dt[!is.na(pseudotime_average)][order(-pseudotime_average)]$component_number) # Plot component loading by pseudotime merge_sda3_melt <- melt(cell_data[somatic4==FALSE, c("cell", "PseudoTime", components_w_pseudotime), with=FALSE], id.vars = c("cell","PseudoTime"), variable.name = "Component") levels(merge_sda3_melt$Component) <- paste(levels(merge_sda3_melt$Component), component_order_dt[!is.na(pseudotime_average)][order(-pseudotime_average)]$name)
tmp <- merge_sda3_melt[Component %in% levels(merge_sda3_melt$Component)[c(4,16,19,25,28)]] #c(4,12,13,16,19,21,25,28) tmp[as.integer(Component) %in% c(4,13,16,19,25), value := value*(-1)] component_colours <- c(RColorBrewer::brewer.pal(12,"Paired"),"black","white")# ,"yellow") component_colours <- c("#6A3D9A", "#33A02C", "#FB9A99", "#B15928", "white", "#A6CEE3", "#FDBF6F", "#E31A1C", "#1F78B4", "#CAB2D6", "#B2DF8A", "black", "#FF7F00", "#FFFF99") scores_pseudotime <- ggplot(tmp, aes(-PseudoTime, value, colour=Component, group=Component)) + geom_point_rast(alpha=0.4, size=1, stroke=0) + geom_smooth(size=1, method = "gam", formula = y ~ s(x, k = 20), se = F) +#colour="black", ylab("Cell Component Score") + xlab("Pseudotime") + theme_minimal() + scale_colour_manual(values=component_colours[c(2,8,9,12,13)])+ #RColorBrewer::brewer.pal(9,"Set1")[-6] theme(legend.position = "none") + ylim(-2,8) scores_pseudotime saveRDS(scores_pseudotime,"../data/plots/scores_pseudotime.rds")
scores_pseudotime <- ggplot(merge_sda3_melt, aes(-PseudoTime, value, colour=Component)) + geom_point(alpha=0.1, size=0.2) + geom_smooth(colour="black", size=0.2) + ylab("Cell Component Score") + xlab("Pseudotime") + ggtitle("Component score through pseudotime") + facet_wrap(~Component, scales="free_y") + theme_minimal() + theme(legend.position = "none") scores_pseudotime
# 1, 30 #brewer.pal(3, "Set1")[3] scores_pseudotime <- ggplot(merge_sda3_melt[Component %in% levels(merge_sda3_melt$Component)[c(2, 4, 8, 10,12,13, 16, 19, 21, 24, 25, 28)]], aes(-PseudoTime, value)) + geom_point(alpha=0.1, size=0.2, aes(colour=Component)) + geom_smooth(colour="black", size=0.4, method = "gam", formula = y ~ s(x, bs = "ad", k=250)) + ylab("Cell Component Score") + xlab("Pseudotime") + facet_wrap(~Component, scales="free_y", ncol=4) + theme_minimal() + theme(legend.position = "none") scores_pseudotime pdf("../results/scores_pseudotime_facet.pdf", width = 16, height = 9) scores_pseudotime dev.off()
tmp <- merge_sda3_melt[Component %in% levels(merge_sda3_melt$Component)[c(2,4,6,8,10,12,13,16,19,21,24,25,28,30)]] tmp[as.integer(Component) %in% c(4,6,8,13,16,19,25,30), value := value*(-1)] tmp[, cell2 := factor(cell, levels = merge_sda3_melt[order(-PseudoTime)][Component=="V5 (pre)Leptotene"]$cell)] tmp[,value_pos := max(c(0,value)),by=.(cell,Component)] #component_colours <- c(RColorBrewer::brewer.pal(9,"Set1"),"skyblue", "black","white") component_colours <- c(RColorBrewer::brewer.pal(12,"Paired"),"black","white") component_colours <- c("#6A3D9A", "#33A02C", "#FB9A99", "#B15928", "white", "#A6CEE3", "#FDBF6F", "#E31A1C", "#1F78B4", "#CAB2D6", "#B2DF8A", "black", "#FF7F00", "#FFFF99") structure <- ggplot(tmp, aes(cell2, value_pos, fill=Component, colour=Component)) + geom_col(position = "fill") + scale_fill_manual(values = component_colours) + scale_colour_manual(values = component_colours) + labs(x="Pseudotime ordered cells", y="Normalised Cell Component Score") + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = "bottom")+ guides(colour = guide_legend(ncol = 2), fill = guide_legend(ncol = 2)) + scale_y_continuous(expand=c(0,0)) structure saveRDS(structure, file = "../data/plots/structure_plot.rds") pdf("../results/structure.pdf", height = 7, width = 10) structure dev.off()
png("../results/striking_image_4B.png",height = 900*5, width = 1800*5) structure + theme_void() + theme(legend.position = "none") dev.off()
Non-normalised Structure plot
ggplot(tmp, aes(cell2, value, fill=Component, colour=Component)) + geom_col() + scale_fill_manual(values = component_colours) + scale_colour_manual(values = component_colours) + labs(x="Pesudotime ordered cells", y="Cell Component Score") + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = "bottom" ) + ylim(0,NA)
print_tsne(2)+ facet_zoom(x = Tsne1_QC1 > (-8) & Tsne1_QC1<25, y= Tsne2_QC1>25, zoom.size = 1) print_tsne(5)+ facet_zoom(x = Tsne1_QC1 > (-8) & Tsne1_QC1<25, y= Tsne2_QC1>25, zoom.size = 1) # make bi colour plot like GFP tmp <- cell_data[,.(Tsne1_QC1, Tsne2_QC1, V2, V5)] tmp[V2>0, V2 := 0] tmp[,V2 := abs(V2)] tmp[,V2 := V2/max(V2)] tmp[V5>0, V5 := 0] tmp[,V5 := abs(V5)] tmp[,V5 := V5/max(V5)] tmp <- tmp[Tsne1_QC1 > (-8) & Tsne1_QC1<25 & Tsne2_QC1>25] ggplot(tmp, aes(Tsne1_QC1, Tsne2_QC1)) + geom_point(colour=rgb(red=tmp$V2, green=0, blue=tmp$V5), size=1) + ggtitle("t-SNE - experiment") + theme_dark() + theme(panel.background = element_rect(fill = "grey20", colour = NA))
V2 & 5 Cell Scores through Pseudotime
ggplot(merge_sda3_melt[Component %in% c("V2 Pre-leptotene","V5 Leptotene") & PseudoTime>15000], aes(-PseudoTime, value, colour=Component, group=Component)) + geom_point(size=0.8) + geom_smooth(se=FALSE) + ylab("Cell Component Score") + xlab("Pseudotime") + ggtitle("V5 Smoothed component score over pseudotime")
# plot example genes with predictions V2_genes <- get_top_genes(2) V5_genes <- get_top_genes(5) compare_two_genes <- unique(c(V2_genes, V5_genes)) tmp <- cell_data[somatic4==FALSE,c("cell","PseudoTime","Tsne1_QC1", "Tsne2_QC1"), with=FALSE] tmp <- merge(tmp, sda_predict(compare_two_genes, name_extension = "")) tmp_m <- melt(tmp, id.vars = c("cell","PseudoTime","Tsne1_QC1", "Tsne2_QC1")) tmp_m$gene = factor(tmp_m$variable, levels=compare_two_genes) #tmp$gene = factor(tmp$gene, levels = sort(as.character(unique(tmp$gene)))) tmp_m[,component := "V5 Leptotene"] tmp_m[gene %in% V2_genes, component := "V2 Pre-leptotene"] ggplot(tmp_m[PseudoTime>14000], aes(-PseudoTime, value, colour=component, group=gene)) + geom_smooth(colour="black") + geom_smooth(se=FALSE, size=0.4, aes(group=gene)) + ylab("Gene Expression") + xlab("Pseudotime") + facet_wrap(~component, scales="free_y", ncol = 1) + ggtitle("Smoothed gene expression over pseudotime - example gene from ~each component") + theme(legend.position = "none") + geom_vline(xintercept=-16100)
top.genes <- unlist(lapply(meiotic_component_order, function(x) get_top_genes(x, n = 20))) top.genes <- top.genes[top.genes %in% colnames(data)] top.genes <- unique(top.genes) tmp <- cell_data[somatic4==FALSE,c("cell","PseudoTime","Tsne1_QC1", "Tsne2_QC1"), with=FALSE] tmp <- merge(tmp, sda_predict(top.genes, name_extension = "")) tmp <- melt(tmp, id.vars = c("cell","PseudoTime","Tsne1_QC1", "Tsne2_QC1")) tmp$variable = factor(tmp$variable, levels=top.genes) tmp_m <- dcast(tmp, formula = PseudoTime + cell ~ variable, value.var = "value") cols3 <- colorRampPalette(brewer.pal(9, "YlOrRd"))(100) cols4 <- viridis(100, direction = -1) aheatmap(asinh(t(as.matrix(tmp_m[order(-PseudoTime)][,c(top.genes), with=FALSE]))), Colv=NA, Rowv=NA, labCol = NA, main="Gene expression over pseudotime", color=rev(colorRampPalette(brewer.pal(11, "RdYlBu"))(100)) )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.