library(pdacR) library(ggplot2) library(reshape2) library(GGally) library(sva) library(plyr) library(tinytex) library(ggpubr) library(survival) library(ggrepel) library(pdacmolgrad)
dataset <- pdacR::PACA_CA_seq geneMeans <- rowMeans(dataset$ex) genesToDelete <- which(geneMeans < .01) dataset$ex <- log2(1+dataset$ex[-genesToDelete,]) dataset$featInfo <- dataset$featInfo[-genesToDelete,] gene_lists <- pdacR::gene_lists
# ===================================================================== # perform single sample classifier for guidance tumor.classifier <- Moffitt_classifier_2019 dataset$sampInfo$SST_subtypes<- as.numeric(create.classif(dat=dataset$ex, fit=tumor.classifier$fit, classifier=tumor.classifier)$predprob) gene_lists$ADEX_unique <- gene_lists$ICGC.SAM$symbols[which(gene_lists$ICGC.SAM$type %in% "ADEX")] gene_lists$Immunogenic_unique <- gene_lists$ICGC.SAM$symbols[which(gene_lists$ICGC.SAM$type %in% "Immunogenic")] gene_lists$Progenitor_unique <- gene_lists$ICGC.SAM$symbols[which(gene_lists$ICGC.SAM$type %in% "Pancreatic progenitor")] gene_lists$Squamous_unique <- gene_lists$ICGC.SAM$symbols[which(gene_lists$ICGC.SAM$type %in% "Squamous")] # ===================================================================== # Calculate expression scores for(i in names(gene_lists)){ this_gene_list <- gene_lists[[i]] if(class(this_gene_list) %in% "data.frame"){ this_gene_list <- this_gene_list[,1] } tmp <- which(dataset$featInfo$SYMBOL %in% this_gene_list) dataset$sampInfo[i] <- colMeans((dataset$ex[tmp,]),na.rm = TRUE) } # ====================================================================== # Append purIST and molgrad predictions temp <- projectMolGrad(dataset$ex, geneSymbols = dataset$featInfo$SYMBOL) names(temp) <- paste0('molgrad_', names(temp)) temp$icgc_sample_id <- rownames(temp) # print(head(temp)) dataset$sampInfo <- join(dataset$sampInfo, temp, by = 'icgc_sample_id') tmp <- as.matrix(dataset$ex) rownames(tmp) <- dataset$featInfo$SYMBOL dataset$ex <- tmp dataset$sampInfo$purIST <- as.numeric(create.classif(dataset$ex, Moffitt_classifier_2019, fit = Moffitt_classifier_2019$fit)$predprob) dataset$sampInfo$molgrad_scaled <- GGally::rescale01(dataset$sampInfo$molgrad_PDX)
# Nanostring classifier # -------------------------------------- # Old # tmp <- create.classif(dat=dataset$ex, # fit=tumor.classifier$fit, # classifier=tumor.classifier) # # gene_lists$TSP.tumor <- rownames(tmp$indmat) # # dataset$featInfo_new <- join(dataset$featInfo, # data.frame(SYMBOL = rownames(tmp$indmat)), # type = "full") # # colnames(tmp$indmat) <- colnames(dataset$ex_new) # # dataset$ex_new <- rbind(dataset$ex_new, # tmp$indmat)
sampleset <- 1:nrow(dataset$sampInfo) tmp.k <- 2 tmp.ncusts <- 2 featureset <- which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Moffitt.Basal.25))) featureset <- c(featureset, which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Moffitt.Classical.25)))) featureset <- c(featureset, which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Moffitt.Normal.25)))) featureset <- c(featureset, which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Moffitt.Activated.25)))) featureset <- c(featureset, which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Collisson.Exocrine)))) smallx <- t(scale(t(dataset$ex[featureset,sampleset]))) sampletree <- convert_order_to_dendrogram(order(dataset$sampInfo$actual_type)) ColSideColors <- getSideColors(sampInfo = dataset$sampInfo[sampleset,], sampleTracks = c("actual_type"), colorlists = list(c("chartreuse2", "orange", "black"), drop.levels = TRUE)) RowSideColors <- getSideColors(sampInfo = data.frame(basal =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Moffitt.Basal.25, classical =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Moffitt.Classical.25, normal =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Moffitt.Normal.25, activated =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Moffitt.Activated.25, exocrine = dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Collisson.Exocrine), sampleTracks = c("basal", "classical", "normal", "activated", "exocrine"), colorlists = list(b=c("white","orange"), c=c("white","blue"), n=c("white","lightblue"), a=c("white","brown"), e=c("white","purple")))
heatmap.3(x = smallx, scale="row", labRow = dataset$featInfo$SYMBOL[featureset], col = colorRampPalette(c("blue", "white", "red"))(n = 299), Colv = sampletree, dendrogram = c("column"), Rowv = FALSE, distfun = function(x) as.dist((1-cor(t(x)))/2), ColSideColors = ColSideColors$SideColors, ColSideColorsSize = 6, RowSideColorsSize = 6, RowSideColors = t(RowSideColors$SideColors), margins = c(5,20)) legend(xy.coords(x=.90,y=1), legend=c(ColSideColors$text), fill=c(ColSideColors$colors), border=FALSE, bty="n", y.intersp = 0.9, cex=0.5)
df <- dataset for(genes in noquote(c(names(gene_lists), "molgrad_PDX", "molgrad_Puleo", "molgrad_ICGCarray", "molgrad_ICGCrnaseq", "purIST", "molgrad_scaled"))){ d <- data.frame(score = df$sampInfo[,genes], type = df$sampInfo$actual_type) p <- ggplot(d, aes(y = score, x = type)) + geom_dotplot(aes(fill = type), binaxis='y', stackdir='center', dotsize=1) + stat_compare_means(ref.group = "Cell line") + labs(title = paste(genes, " score")) + theme_pubr() print(p) }
dataset <- pdacR::Moffitt_S2 geneMeans <- rowMeans(dataset$ex) genesToDelete <- which(geneMeans < .01) dataset$ex <- log2(1+dataset$ex[-genesToDelete,]) dataset$featInfo <- dataset$featInfo[-genesToDelete,] gene_lists <- pdacR::gene_lists
# dataset$corrected <- ComBat(dat = dataset$ex, # batch = as.numeric(dataset$sampInfo$specimen_type))
pca <- prcomp((dataset$ex)) ggpairs(data = data.frame(data.frame(pca$rotation[,1:3]), kit = dataset$sampInfo$sample_type, sst = factor(dataset$sampInfo$sample_type)), columns = 1:5, aes(color = sst)) # pca <- prcomp((dataset$corrected)) # # ggpairs(data = data.frame(data.frame(pca$rotation[,1:3]), # kit = dataset$sampInfo$specimen_type, # sst = factor(dataset$sampInfo$specimen_type)), # columns = 1:5, # aes(color = sst))
# ===================================================================== # perform single sample classifier for guidance tumor.classifier <- Moffitt_classifier_2015 dataset$sampInfo$SST_subtypes<- as.numeric(create.classif(dat=dataset$ex, fit=tumor.classifier$fit, classifier=tumor.classifier)$predprob) gene_lists$ADEX_unique <- gene_lists$ICGC.SAM$symbols[which(gene_lists$ICGC.SAM$type %in% "ADEX")] gene_lists$Immunogenic_unique <- gene_lists$ICGC.SAM$symbols[which(gene_lists$ICGC.SAM$type %in% "Immunogenic")] gene_lists$Progenitor_unique <- gene_lists$ICGC.SAM$symbols[which(gene_lists$ICGC.SAM$type %in% "Pancreatic progenitor")] gene_lists$Squamous_unique <- gene_lists$ICGC.SAM$symbols[which(gene_lists$ICGC.SAM$type %in% "Squamous")] # ===================================================================== # Calculate expression scores for(i in names(gene_lists)){ this_gene_list <- gene_lists[[i]] if(class(this_gene_list) %in% "data.frame"){ this_gene_list <- this_gene_list[,1] } tmp <- which(dataset$featInfo$SYMBOL %in% this_gene_list) dataset$sampInfo[i] <- colMeans((dataset$ex[tmp,]),na.rm = TRUE) } # ====================================================================== # Append purIST and molgrad predictions temp <- projectMolGrad(dataset$ex, geneSymbols = dataset$featInfo$SYMBOL) names(temp) <- paste0('molgrad_', names(temp)) temp$joiner <- rownames(temp) dataset$sampInfo$joiner <- rownames(dataset$sampInfo) print(head(temp)) dataset$sampInfo <- join(dataset$sampInfo, temp, by = 'joiner') tmp <- as.matrix(dataset$ex) rownames(tmp) <- dataset$featInfo$SYMBOL dataset$ex <- tmp dataset$sampInfo$purIST <- as.numeric(create.classif(dataset$ex, Moffitt_classifier_2019, fit = Moffitt_classifier_2019$fit)$predprob) dataset$sampInfo$molgrad_scaled <- GGally::rescale01(dataset$sampInfo$molgrad_PDX)
# Nanostring classifier # -------------------------------------- # Old # tmp <- create.classif(dat=dataset$ex, # fit=tumor.classifier$fit, # classifier=tumor.classifier) # # gene_lists$TSP.tumor <- rownames(tmp$indmat) # # dataset$featInfo_new <- join(dataset$featInfo, # data.frame(SYMBOL = rownames(tmp$indmat)), # type = "full") # # colnames(tmp$indmat) <- colnames(dataset$ex_new) # # dataset$ex_new <- rbind(dataset$ex_new, # tmp$indmat)
sampleset <- 1:nrow(dataset$sampInfo) featureset <- which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$ICGC.Immunogenic.Up) ) & dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$ICGC.SAM[[1]]) ) ) #smallx <- t(scale(t(dataset$ex[featureset,sampleset]))) smallx <- dataset$ex[featureset,sampleset] sampletree <- convert_order_to_dendrogram(order(dataset$sampInfo$sample_type)) ColSideColors <- getSideColors(sampInfo = dataset$sampInfo[sampleset,], sampleTracks = c("sample_type"), colorlists = list(c("dodgerblue2", "chartreuse2", "orange", "black"), drop.levels = TRUE)) RowSideColors <- getSideColors(sampInfo = data.frame(immunogenic =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$ICGC.Immunogenic.Up, ADEX =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$ICGC.ADEX.Up, progenitor =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$ICGC.Progenitor.Up, classical =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Moffitt.Classical.25, squamous =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$ICGC.Squamous.Up, SAM =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$ICGC.SAM[[1]]), sampleTracks = c("immunogenic", "ADEX", "progenitor", "classical", "squamous", "SAM"), colorlists = list(i = c("white", "darkred"), a = c("white", "deeppink"), p = c("white", "blue"), c = c("white", "blue"), s = c("white", "orange"), S = c("white", "grey") ) ) heatmap.3(x = smallx, scale="row", col = colorRampPalette(c("blue", "white", "red"))(n = 299), Colv = sampletree, dendrogram = c("row"), labCol = "", labRow = "", Rowv = TRUE, distfun = function(x) as.dist((1-cor(t(x)))/2), ColSideColors = ColSideColors$SideColors, ColSideColorsSize = 1, RowSideColorsSize = 4, RowSideColors = t(RowSideColors$SideColors), margins = c(10,10)) legend(xy.coords(x=.90,y=1), legend=c(ColSideColors$text), fill=c(ColSideColors$colors), border=FALSE, bty="n", y.intersp = 0.9, cex=0.5) print("% of progentior overlap with immunogenic") print(length(which(pdacR::gene_lists$ICGC.Immunogenic.Up %in% pdacR::gene_lists$ICGC.Progenitor.Up)) / length(pdacR::gene_lists$ICGC.Immunogenic.Up)) print("% of ADEX overlap with immunogenic") print(length(which(pdacR::gene_lists$ICGC.Immunogenic.Up %in% pdacR::gene_lists$ICGC.ADEX.Up)) / length(pdacR::gene_lists$ICGC.Immunogenic.Up))
sampleset <- 1:nrow(dataset$sampInfo) tmp.k <- 2 tmp.ncusts <- 2 featureset <- which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Moffitt.Basal.25))) featureset <- c(featureset, which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Moffitt.Classical.25)))) featureset <- c(featureset, which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Collisson.QM)))) featureset <- c(featureset, which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Collisson.Classical)))) featureset <- c(featureset, which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Moffitt.Normal.25)))) featureset <- c(featureset, which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Moffitt.Activated.25)))) featureset <- c(featureset, which(dataset$featInfo$SYMBOL %in% c(as.character(pdacR::gene_lists$Collisson.Exocrine)))) smallx <- t(scale(t(dataset$ex[featureset,sampleset]))) sampletree <- convert_order_to_dendrogram(order(dataset$sampInfo$sample_type)) ColSideColors <- getSideColors(sampInfo = dataset$sampInfo[sampleset,], sampleTracks = c("sample_type"), colorlists = list(c("dodgerblue2", "chartreuse2", "orange", "black"), drop.levels = TRUE)) RowSideColors <- getSideColors(sampInfo = data.frame(basal =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Moffitt.Normal.25, classical =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Moffitt.Classical.25, qm =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Collisson.QM, col.classical =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Collisson.Classical, normal =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Moffitt.Normal.25, activated =dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Moffitt.Activated.25, exocrine = dataset$featInfo$SYMBOL[featureset] %in% pdacR::gene_lists$Collisson.Exocrine), sampleTracks = c("basal", "classical", "qm", "col.classical", "normal", "activated", "exocrine"), colorlists = list(b=c("white","orange"), c=c("white","blue"), qm = c("white", "darkgreen"), cc = c("white", "deeppink"), n=c("white","lightblue"), a=c("white","brown"), e=c("white","purple")))
heatmap.3(x = smallx, scale="row", labRow = dataset$featInfo$SYMBOL[featureset], col = colorRampPalette(c("blue", "white", "red"))(n = 299), Colv = sampletree, dendrogram = c("column"), Rowv = FALSE, distfun = function(x) as.dist((1-cor(t(x)))/2), ColSideColors = ColSideColors$SideColors, ColSideColorsSize = 6, RowSideColorsSize = 6, RowSideColors = t(RowSideColors$SideColors), margins = c(5,20)) legend(xy.coords(x=.90,y=1), legend=c(ColSideColors$text), fill=c(ColSideColors$colors), border=FALSE, bty="n", y.intersp = 0.9, cex=0.5)
df <- dataset IDs <- sapply(X = df$sampInfo$publicID, FUN = function(x){ y <- strsplit(x = as.character(x), split = "-", fixed = TRUE) return(as.numeric(y[[1]][2])) }) df$sampInfo$easyID <- IDs noscale <- c("molgrad_PDX", "molgrad_Puleo", "molgrad_ICGCarray", "molgrad_ICGCrnaseq", "purIST", "molgrad_scaled") for(genes in noquote(c(names(gene_lists), "molgrad_PDX", "molgrad_Puleo", "molgrad_ICGCarray", "molgrad_ICGCrnaseq", "purIST", "molgrad_scaled"))){ if(genes %in% noscale){ d <- data.frame(score = df$sampInfo[,genes], type = df$sampInfo$sample_type, ID = df$sampInfo$easyID) p <- ggplot(d, aes(y = score, x = type)) + geom_dotplot(aes(fill = type), binaxis='y', stackdir='center', binwidth = 0.05, dotsize= 0.5) + stat_compare_means(ref.group = "Primary") + labs(title = paste(genes, " score")) + theme_pubr() print(p) }else{ d <- data.frame(score = scale(df$sampInfo[,genes], center = TRUE, scale = TRUE), type = df$sampInfo$sample_type, ID = df$sampInfo$easyID) p <- ggplot(d, aes(y = score, x = type)) + geom_dotplot(aes(fill = type), binaxis='y', stackdir='center', binwidth = 0.115, dotsize=1.25) + stat_compare_means(ref.group = "Primary") + ylim(-2.1,4) + labs(title = paste(genes, " score")) + theme_pubr() print(p) } }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.