Pseudotime

Using the position-developmental time relationship on the t-SNE plot we can define a pseudotime by fitting a principal line though the t-SNE plot and assigning each cell a position on this line based on which part of the line is closest. This approach is similar (the same?) to that used in SCUBA. We can then see how average component scores change over time and the consistency with gene expression changes over time - either by smoothed curves, or heatmaps.

library(testisAtlas)
load2("../data/cache")
load_component_orderings()

Pseudotime Construction

Low library size cells were removed prior to construction of the principal curve.

# create principal curve using Tsne (without somatic cells)
# build up curve

principal_curves <- testisAtlas::princurve_bootstrap(as.matrix(cell_data[somatic4==FALSE & V38<3, .(Tsne1_QC1, Tsne2_QC1)]))
principal_curves[["df_8_unbootstrapped"]] <- princurve::principal.curve(as.matrix(cell_data[somatic4==FALSE & V38<3, .(Tsne1_QC1, Tsne2_QC1)]), df=8)

saveRDS(principal_curves, "../data/cache/principal_curves.rds")
principal_curves <- readRDS("../data/cache/principal_curves.rds")

# plot Principal curve on Tsne plot

print_tsne("Prm2", curve = T, principal_curve = "df_4", curve_width = 1) + ggtitle("Principal Curve on tSNE, df=4")

print_tsne("Prm2", curve = T, principal_curve = "df_8_unbootstrapped", curve_width = 1) + ggtitle("Principal Curve on tSNE, df=8, without bootstrap")

print_tsne("Prm2", curve = T, principal_curve = "df_9", curve_width = 1) + ggtitle("Principal Curve on tSNE, bootstrap to df=9")

print_tsne("Prm2", curve = T, principal_curve = "df_10", curve_width = 1) + ggtitle("Principal Curve on tSNE, bootstrap to df=10")
# Assign pseudotime based on position along principal line
pseudotime <- cbind(principal_curves[["df_9"]]$s[principal_curves[["df_9"]]$tag, 1], principal_curves[["df_9"]]$s[principal_curves[["df_9"]]$tag, 2])
find_pseudotime <- function(sample.point){ which.min(colSums((t(pseudotime) - c(sample.point))^2)) }
cell_pseudotime <- apply(as.matrix(cell_data[,.(Tsne1_QC1,Tsne2_QC1)]), 1, find_pseudotime)

cell_data[,PseudoTime1 := pseudotime[cell_pseudotime,][,1]]
cell_data[,PseudoTime2 := pseudotime[cell_pseudotime,][,2]]
cell_data[,PseudoTime := cell_pseudotime]

# points closer to line
# ggplot(cell_data, aes(PseudoTime1, PseudoTime2, colour=sqrt(library_size))) +
#     geom_jitter(width=0.5, height=0.5, size=0.5) + scale_colour_distiller(palette="YlOrRd", direction=-1) + ggtitle("Cells moved towards point on principal Curve")

cell_data[somatic4==TRUE, PseudoTime := NA]

ggplot(cell_data, aes(Tsne1_QC1, Tsne2_QC1, color=PseudoTime)) +
  geom_point(size=0.25) +
  scale_color_viridis() +
  theme(legend.position = "bottom") +
  ggtitle("SDA t-SNE - cells coloured by PseudoTime")

ggplot(cell_data, aes(Tsne1_QC2, Tsne2_QC2, color=PseudoTime)) +
  geom_point(size=0.25) +
  scale_color_viridis() +
  theme(legend.position = "bottom") +
  ggtitle("SDA t-SNE (differnt seed) - cells coloured by PseudoTime")

ggplot(cell_data, aes(Tsne1, Tsne2, color=PseudoTime)) +
    geom_point(size=0.25) +
    scale_color_viridis() +
    theme(legend.position = "bottom") +
    ggtitle("Raw t-SNE - cells coloured by New PseudoTime")

Component scores through Pseudotime

The component scores and gene loadings show a cascade of expression programmes as spermatogenesis progresses.

tmp <- cell_data[,paste0("V",1:50), with=FALSE]
names(tmp) <- component_order_dt$name
tmp <- as.matrix(tmp)[order(-cell_data$PseudoTime,cell_data$hclust_group),component_order_dt[order(-pseudotime_average,name)]$name]

colnames(tmp) <- paste0("V",component_order_dt[order(-pseudotime_average,name)]$component_number," ",colnames(tmp))
t=20
tmp[tmp>t] <- t
tmp[tmp<(-t)] <- (-t)
pdf("../results/scores_heatmap.pdf", width = 12, height = 9)
draw(Heatmap(t(tmp[,!grepl("Single Cell",colnames(tmp))]),
        col = log_colour_scale(tmp, scale = 0.5),
        cluster_columns = F, cluster_rows = F,
        row_names_max_width = unit(10, "npc"),
        row_gap = unit(0, "mm"), border = "grey",
        row_split = c(1:45),
        heatmap_legend_param = list(title = "Cell Score", legend_direction="horizontal")),
heatmap_legend_side="bottom")
dev.off()

Components overlap and vary continuously

components_w_pseudotime <- paste0("V",component_order_dt[!is.na(pseudotime_average)][order(-pseudotime_average)]$component_number)

# Plot component loading by pseudotime
merge_sda3_melt <- melt(cell_data[somatic4==FALSE, c("cell", "PseudoTime", components_w_pseudotime), with=FALSE], id.vars = c("cell","PseudoTime"), variable.name = "Component")

levels(merge_sda3_melt$Component) <- paste(levels(merge_sda3_melt$Component), component_order_dt[!is.na(pseudotime_average)][order(-pseudotime_average)]$name)
tmp <- merge_sda3_melt[Component %in% levels(merge_sda3_melt$Component)[c(4,16,19,25,28)]] #c(4,12,13,16,19,21,25,28)

tmp[as.integer(Component) %in% c(4,13,16,19,25), value := value*(-1)]

component_colours <- c(RColorBrewer::brewer.pal(12,"Paired"),"black","white")# ,"yellow")
component_colours <- c("#6A3D9A", "#33A02C", "#FB9A99", "#B15928",
                       "white", "#A6CEE3", "#FDBF6F", "#E31A1C",
                       "#1F78B4", "#CAB2D6", "#B2DF8A", "black", 
                       "#FF7F00", "#FFFF99")


scores_pseudotime <- ggplot(tmp, aes(-PseudoTime, value, colour=Component, group=Component)) +
  geom_point_rast(alpha=0.4, size=1, stroke=0) +
  geom_smooth(size=1, method = "gam", formula = y ~ s(x, k = 20), se = F) +#colour="black",
  ylab("Cell Component Score") +
  xlab("Pseudotime") +
  theme_minimal() +
  scale_colour_manual(values=component_colours[c(2,8,9,12,13)])+ #RColorBrewer::brewer.pal(9,"Set1")[-6]
 theme(legend.position = "none") +
  ylim(-2,8)

scores_pseudotime

saveRDS(scores_pseudotime,"../data/plots/scores_pseudotime.rds")

All scores faceted

scores_pseudotime <- ggplot(merge_sda3_melt, aes(-PseudoTime, value, colour=Component)) +
  geom_point(alpha=0.1, size=0.2) +
  geom_smooth(colour="black", size=0.2) +
  ylab("Cell Component Score") +
  xlab("Pseudotime") +
  ggtitle("Component score through pseudotime") +
  facet_wrap(~Component, scales="free_y") +
  theme_minimal() +
  theme(legend.position = "none")

scores_pseudotime

selected scores faceted

# 1, 30
#brewer.pal(3, "Set1")[3]

scores_pseudotime <- ggplot(merge_sda3_melt[Component %in% levels(merge_sda3_melt$Component)[c(2, 4, 8, 10,12,13, 16, 19, 21, 24, 25, 28)]], aes(-PseudoTime, value)) +
  geom_point(alpha=0.1, size=0.2, aes(colour=Component)) +
  geom_smooth(colour="black", size=0.4, method = "gam", formula = y ~ s(x, bs = "ad", k=250)) +
  ylab("Cell Component Score") +
  xlab("Pseudotime") +
  facet_wrap(~Component, scales="free_y", ncol=4) +
  theme_minimal() +
  theme(legend.position = "none")

scores_pseudotime

pdf("../results/scores_pseudotime_facet.pdf", width = 16, height = 9)
scores_pseudotime
dev.off()

Structure-esque plot

tmp <- merge_sda3_melt[Component %in% levels(merge_sda3_melt$Component)[c(2,4,6,8,10,12,13,16,19,21,24,25,28,30)]]

tmp[as.integer(Component) %in% c(4,6,8,13,16,19,25,30), value := value*(-1)]

tmp[, cell2 := factor(cell, levels = merge_sda3_melt[order(-PseudoTime)][Component=="V5 (pre)Leptotene"]$cell)]
tmp[,value_pos := max(c(0,value)),by=.(cell,Component)]

#component_colours <- c(RColorBrewer::brewer.pal(9,"Set1"),"skyblue", "black","white")
component_colours <- c(RColorBrewer::brewer.pal(12,"Paired"),"black","white")
component_colours <- c("#6A3D9A", "#33A02C", "#FB9A99", "#B15928",
                       "white", "#A6CEE3", "#FDBF6F", "#E31A1C",
                       "#1F78B4", "#CAB2D6", "#B2DF8A", "black", 
                       "#FF7F00", "#FFFF99")

structure <- ggplot(tmp, aes(cell2, value_pos, fill=Component, colour=Component)) +
  geom_col(position = "fill") +
  scale_fill_manual(values = component_colours) +
  scale_colour_manual(values = component_colours) +
  labs(x="Pseudotime ordered cells", y="Normalised Cell Component Score") +
  theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = "bottom")+
  guides(colour = guide_legend(ncol = 2), fill = guide_legend(ncol = 2)) +
  scale_y_continuous(expand=c(0,0))

structure

saveRDS(structure, file = "../data/plots/structure_plot.rds")

pdf("../results/structure.pdf", height = 7, width = 10)
structure
dev.off()
png("../results/striking_image_4B.png",height = 900*5, width = 1800*5)
structure + theme_void() + theme(legend.position = "none")
dev.off()

Non-normalised Structure plot

ggplot(tmp, aes(cell2, value, fill=Component, colour=Component)) +
  geom_col() +
  scale_fill_manual(values = component_colours) +
  scale_colour_manual(values = component_colours) +
  labs(x="Pesudotime ordered cells", y="Cell Component Score") +
  theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), legend.position = "bottom" ) +
  ylim(0,NA)

Gene Expression through Pseudotime

Pre-Leptotene vs Leptotene (V2 vs V5)

print_tsne(2)+
  facet_zoom(x = Tsne1_QC1 > (-8) & Tsne1_QC1<25, y= Tsne2_QC1>25, zoom.size = 1)

print_tsne(5)+
  facet_zoom(x = Tsne1_QC1 > (-8) & Tsne1_QC1<25, y= Tsne2_QC1>25, zoom.size = 1)

# make bi colour plot like GFP
tmp <- cell_data[,.(Tsne1_QC1, Tsne2_QC1, V2, V5)]
tmp[V2>0, V2 := 0]
tmp[,V2 := abs(V2)]
tmp[,V2 := V2/max(V2)]
tmp[V5>0, V5 := 0]
tmp[,V5 := abs(V5)]
tmp[,V5 := V5/max(V5)]

tmp <- tmp[Tsne1_QC1 > (-8) & Tsne1_QC1<25 & Tsne2_QC1>25]

ggplot(tmp, aes(Tsne1_QC1, Tsne2_QC1)) +
  geom_point(colour=rgb(red=tmp$V2, green=0, blue=tmp$V5), size=1) +
  ggtitle("t-SNE - experiment") +
  theme_dark() + theme(panel.background = element_rect(fill = "grey20", colour = NA))

V2 & 5 Cell Scores through Pseudotime

ggplot(merge_sda3_melt[Component %in% c("V2 Pre-leptotene","V5 Leptotene") & PseudoTime>15000], aes(-PseudoTime, value, colour=Component, group=Component)) +
  geom_point(size=0.8) +
  geom_smooth(se=FALSE) +
  ylab("Cell Component Score") +
  xlab("Pseudotime") +
  ggtitle("V5 Smoothed component score over pseudotime")
# plot example genes with predictions
V2_genes <- get_top_genes(2)
V5_genes <- get_top_genes(5)
compare_two_genes <- unique(c(V2_genes, V5_genes))

tmp <- cell_data[somatic4==FALSE,c("cell","PseudoTime","Tsne1_QC1", "Tsne2_QC1"), with=FALSE]
tmp <- merge(tmp, sda_predict(compare_two_genes, name_extension = ""))

tmp_m <- melt(tmp, id.vars = c("cell","PseudoTime","Tsne1_QC1", "Tsne2_QC1"))
tmp_m$gene = factor(tmp_m$variable, levels=compare_two_genes)
#tmp$gene = factor(tmp$gene, levels = sort(as.character(unique(tmp$gene))))

tmp_m[,component := "V5 Leptotene"]
tmp_m[gene %in% V2_genes, component := "V2 Pre-leptotene"]

ggplot(tmp_m[PseudoTime>14000], aes(-PseudoTime, value, colour=component, group=gene)) +
  geom_smooth(colour="black") +
  geom_smooth(se=FALSE, size=0.4, aes(group=gene)) +
  ylab("Gene Expression") + xlab("Pseudotime") +
  facet_wrap(~component, scales="free_y", ncol = 1) +
  ggtitle("Smoothed gene expression over pseudotime - example gene from ~each component") +
  theme(legend.position = "none") +
  geom_vline(xintercept=-16100)

Heatmap of imputed expression

top.genes <- unlist(lapply(meiotic_component_order, function(x) get_top_genes(x, n = 20)))
top.genes <- top.genes[top.genes %in% colnames(data)]
top.genes <- unique(top.genes)

tmp <- cell_data[somatic4==FALSE,c("cell","PseudoTime","Tsne1_QC1", "Tsne2_QC1"), with=FALSE]
tmp <- merge(tmp, sda_predict(top.genes, name_extension = ""))
tmp <- melt(tmp, id.vars = c("cell","PseudoTime","Tsne1_QC1", "Tsne2_QC1"))
tmp$variable = factor(tmp$variable, levels=top.genes)

tmp_m <- dcast(tmp, formula = PseudoTime + cell ~ variable, value.var = "value")

cols3 <- colorRampPalette(brewer.pal(9, "YlOrRd"))(100)
cols4 <- viridis(100, direction = -1)

aheatmap(asinh(t(as.matrix(tmp_m[order(-PseudoTime)][,c(top.genes), with=FALSE]))), 
         Colv=NA, Rowv=NA, labCol = NA, 
         main="Gene expression over pseudotime",
         color=rev(colorRampPalette(brewer.pal(11, "RdYlBu"))(100))
         )


MyersGroup/testisAtlas documentation built on Nov. 29, 2020, 9:21 p.m.