knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE, fig.width=10, fig.height=6) library(knitr) library(data.table) library(tidyr) library(dplyr) library(reshape2) library(ggplot2) library(plotly) library(scales) library(kableExtra) library(DT) library(RColorBrewer) theme_set(theme_bw()) show_text <- TRUE
if ("{{DMP_ID}}" != "") { asis_output("### DMP Patient ID: {{DMP_ID}} \n") }
if ("{{DMP_SAMPLE_ID}}" != "") { asis_output("### DMP Sample ID: {{DMP_SAMPLE_ID}} \n") }
dmp_id <- "{{DMP_ID}}" has_dmp <- F if (dmp_id != "") { has_dmp = T } if (dmp_id != "") { page_title = "{{PATIENT_ID}} ({{DMP_ID}})" } else { page_title = "{{PATIENT_ID}}" }
if ("{{TUMOR_TYPE}}" != "") { asis_output("### {{TUMOR_TYPE}} \n") }
sample="{{PATIENT_ID}}" access_data_analysis_results<-"{{RESULTS}}" access_data_analysis_cna_dir<-"{{CNA_RESULTS_DIR}}" impact_sample_id<-"{{DMP_SAMPLE_ID}}" impact_ccf_file<-"{{DMP_MAF_PATH}}" metadata_file<-"{{METADATA}}" ## read metadata files ## metadata <- fread(metadata_file) ## Possible timepoints in the treatment info file, and their associated colors timepointcolors <- NULL if ("timepoint" %in% colnames(metadata)) { metadata <- metadata[,c("sample_id", "sex", "collection_day", "timepoint")] timepoints <- unique(metadata$timepoint) colorcount <- length(timepoints) getPalette = colorRampPalette(brewer.pal(12, "Paired")) timepointcolors <- getPalette(colorcount) names(timepointcolors) <- timepoints } else { metadata <- metadata[,c("sample_id", "sex", "collection_day")] }
# Function to get variant colors # gg_color_hue <- function(n) { hues = seq(15, 375, length = n + 1) hcl(h = hues, l = 65, c = 100)[1:n] } # Function to generate VAF plots vaf_plot <- function(variant_table, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=FALSE, cnadjusted=FALSE){ tmp<-variant_table tmp$VarName<-factor(tmp$VarName,levels=names(varcolors)) if(log == FALSE){ if(cnadjusted==FALSE){ fig<-ggplot(tmp, aes(x=collection_day, y=vaf, col=VarName, hotspot=Hotspot, sampleid=sample_id)) + scale_y_continuous(labels = scales::percent_format(accuracy = yaccuracy)) + ylab("VAF") } else{ fig<-ggplot(tmp, aes(x=collection_day, y=adjustedvaf, vaf=vaf, col=VarName, hotspot=Hotspot, sampleid=sample_id)) + scale_y_continuous(labels = scales::percent_format(accuracy = yaccuracy)) + ylab("Adjusted VAF") } } else{ if(cnadjusted==FALSE){ tmp$vafforlog = tmp$vaf + 0.0001 fig<-ggplot(tmp, aes(x=collection_day, y=vafforlog, vaf=vaf, col=VarName, hotspot=Hotspot, sampleid=sample_id)) + scale_y_log10(labels = scales::percent_format(accuracy = yaccuracy)) + ylab("VAF") } else{ tmp$vafforlog = tmp$adjustedvaf + 0.0001 fig<-ggplot(tmp, aes(x=collection_day, y=vafforlog, adjustedvaf=adjustedvaf, vaf=vaf, col=VarName, hotspot=Hotspot, sampleid=sample_id)) + scale_y_log10(labels = scales::percent_format(accuracy = yaccuracy)) + ylab("VAF") } } fig<-fig + geom_point() + geom_line(aes(group=VarName)) + facet_grid(clonality~.) + theme( legend.title=element_blank(), strip.background = element_blank(), legend.text=element_text(size=6), legend.spacing.y = unit(0.5, "cm"), legend.key.height = unit(2, "cm"), axis.text.x = element_text(angle=45)) + scale_color_manual(values = varcolors) + scale_x_continuous(breaks=xbreaks, limits=xlimits, labels=xlabels) + labs(y="VAF") + xlab("") if(cnadjusted==FALSE){ fig<-ggplotly(fig, tooltip=c("collection_day","vaf","VarName","Hotspot","sample_id")) } else{ fig<-ggplotly(fig, tooltip=c("collection_day","vaf","adjustedvaf","VarName","Hotspot","sample_id")) } fig<-ggplotly(fig) return(fig) } # Function to generate treatment plot treatment_plot <- function(treatment_table, xbreaks, xlimits, colors, ids=c("collection_day","sample_id")){ # Any column other than the ids columns will be plotted tmp<-melt(treatment_table, id=ids) tmp$value<-factor(tmp$value, levels=names(colors)) fig<-ggplot(tmp, aes(x=collection_day, y=variable, fill=value)) + geom_tile() + theme(panel.grid=element_blank(), legend.title = element_blank()) + scale_x_continuous(breaks=xbreaks) + scale_fill_manual(values=colors) fig<-ggplotly(fig) fig<-style(fig, showlegend=FALSE) return(fig) } cna_plot<-function(cna, xlimits, xbreaks, xlabels){ cna$fc<-round(cna$fc,2) fig<-ggplot(cna, aes(x=collection_day, sampleid=sample_id, gene=Hugo_Symbol)) + geom_bar(aes(y=1, fill=fc), stat="identity",width=10) + scale_fill_gradient2(low="white",mid="red",high="red3",midpoint=1.5, limits=c(0,max(cna$fc))) + theme(legend.title=element_blank(), panel.grid.major.y=element_blank(), strip.background = element_blank(), axis.text.x=element_text(angle=45), axis.ticks.y=element_blank(), axis.text.y=element_blank()) + scale_x_continuous(breaks=xbreaks, limits=xlimits, labels=xlabels) + xlab("") + facet_grid(Hugo_Symbol~.) fig<-ggplotly(fig, tooltip=c("collection_day","CNA","sample_id","fc","Hugo_Symbol")) return(fig) } print_table<-function(table){ datatable(table, class='cell-border stripe compact', filter = 'top', rownames=FALSE, escape=FALSE, extensions = 'Buttons', options=list(scrollX=T, autoWidth = TRUE, dom = 'Bfrtip',buttons = c('copy', 'csv', 'excel', 'pdf', 'print'))) }
## read variant files ## accesstab<-fread(access_data_analysis_results, na.strings=c("NA","")) dmp <- data.frame() if (impact_ccf_file != "") { dmp<-fread(impact_ccf_file)[,c("Hugo_Symbol", "Chromosome", "Start_Position", "Reference_Allele", "Tumor_Seq_Allele2", "tcn", "expected_alt_copies", "clonality")] dmp$Chromosome<-as.character(dmp$Chromosome) } ## ACCESS variants ## tab1<-accesstab[,c("Hugo_Symbol","Chromosome", "Start_Position", "Variant_Classification", "HGVSp_Short", "Reference_Allele", "Tumor_Seq_Allele2", "Hotspot", "DMP", "CH")] tab3 <- NULL dmpsamples <- NULL dmpsamples.ids <- NULL if ('{{PLOT_IMPACT}}') { tab2<-accesstab %>% select(matches("(___total|___DMP_Tumor)$")) colnames(tab2)<-gsub("___total","",colnames(tab2)) colnames(tab2)<-gsub("___DMP_Tumor","",colnames(tab2)) dmpsamples.ids <- accesstab %>% select(matches("___DMP_Tumor$")) colnames(dmpsamples.ids)<-gsub("___DMP_Tumor","",colnames(dmpsamples.ids)) dmpsamples.ids <- colnames(dmpsamples.ids) } else { tab2<-accesstab %>% select(matches("___total$")) colnames(tab2)<-gsub("___total","",colnames(tab2)) tab3<-accesstab %>% select(matches("___DMP_(Tumor|Normal)$")) colnames(tab3)<-gsub("___DMP_Tumor","",colnames(tab3)) colnames(tab3)<-gsub("___DMP_Normal","",colnames(tab3)) dmpsamples<-colnames(tab3) } cmo_sample_id_order<-metadata$sample_id[order(metadata$collection_day)] cmo_sample_id_order<-cmo_sample_id_order[cmo_sample_id_order %in% colnames(tab2)] tab2<-data.frame(tab2,check.names=FALSE)[,cmo_sample_id_order, drop=FALSE] tab<-cbind(tab1,tab2) if (!is.null(tab3)) { tab<-cbind(tab,tab3) } tab$VarType <- "SNV_INDEL" tab[grep("__",tab$Hugo_Symbol),"VarType"]<-"SV" tab$Hugo_Symbol<- gsub("__","-",tab$Hugo_Symbol) tab$Chromosome<-as.character(tab$Chromosome) if (nrow(dmp) > 0) { tabletoprint<-merge(tab, dmp, all.x=TRUE) } else { tabletoprint <- tab } ## read CNA files ## all.cna.files <- list.files(path = access_data_analysis_cna_dir, pattern = sample, full.names=TRUE) cna<-do.call('rbind', lapply(all.cna.files,fread)) %>% rename(sample_id=Tumor_Sample_Barcode) if( nrow(cna) > 0){ cna<-merge(cna, metadata,by=c("sample_id")) cna$fc<-as.numeric(cna$fc) }
## Get VAFs of variants ## temp <- melt(tab, id=c("Hugo_Symbol","Chromosome", "Start_Position", "Variant_Classification", "HGVSp_Short", "Reference_Allele", "Tumor_Seq_Allele2", "Hotspot", "DMP", "CH", "VarType", dmpsamples)) %>% separate(col=value, into=c("num1", "num2", "num3")) temp$vaf = as.numeric(temp$num1)/as.numeric(temp$num2) temp$total=temp$num2 tempsv<-subset(temp,temp$VarType=="SV") temp[which(temp$VarType=="SV"), "vaf"] = (as.numeric(tempsv$num1)+as.numeric(tempsv$num2))/as.numeric(tempsv$num3) temp[which(temp$VarType=="SV"), "total"]=tempsv$num3 temp$Chromosome <- as.character(temp$Chromosome) temp$vaf=temp$vaf if (nrow(dmp) > 0) { # merge with DMP combined <- merge(temp, dmp, all.x=TRUE) %>% rename(sample_id = variable) } else { combined <- temp %>% rename(sample_id = variable) combined[, "clonality"] <- NA } final <- merge(combined, metadata, by=c("sample_id"), all.x=TRUE) # Create a variant name column final$VarName=paste(final$Hugo_Symbol, final$HGVSp_Short) final[is.na(final$HGVSp_Short), "VarName"] <- paste0(final$Hugo_Symbol, " ", final$Chromosome, ":", final$Start_Position, " ", final$Reference_Allele, ">", final$Tumor_Seq_Allele2)[is.na(final$HGVSp_Short)] final[is.na(final$HGVSp_Short) & nchar(final$Reference_Allele)>5,"VarName"] <- paste0(final$Hugo_Symbol, " ", final$Chromosome, ":", final$Start_Position, " ", substr(final$Reference_Allele,1,3),"..", ">", final$Tumor_Seq_Allele2)[is.na(final$HGVSp_Short)] final[is.na(final$HGVSp_Short) & nchar(final$Tumor_Seq_Allele2)>5,"VarName"] <- paste0(final$Hugo_Symbol, " ", final$Chromosome, ":", final$Start_Position, " ", final$Reference_Allele,1,3, ">", substr(final$Tumor_Seq_Allele2,1,3),"..")[is.na(final$HGVSp_Short)] final[which(final$VarType=="SV"),"VarName"]<-final$Hugo_Symbol[which(final$VarType=="SV")] # Select variant colors final$vaf<-round(final$vaf,4) final<-final[!is.na(final$vaf),] final$total<-as.numeric(final$total) final<-subset(final, final$total>=100) vafsum<-aggregate(final$vaf,by=list(VarName=final$VarName),FUN=sum) final<-subset(final, !final$VarName %in% vafsum[which(vafsum$x==0),"VarName"]) varcolors<-c(gg_color_hue(length(unique(final$VarName))),"black") names(varcolors)<-c(unique(final$VarName),"average") tabletoprint<-unique(merge(unique(final[,c("Hugo_Symbol","Chromosome","Start_Position")]), tabletoprint, by=c("Hugo_Symbol","Chromosome","Start_Position"))) final_snv<-subset(final,final$VarType=="SNV_INDEL") if ('{{COMBINE_ACCESS}}') { final_snv$clonality <- "MSK-ACCESS" } else { final_snv[is.na(final_snv$clonality),"clonality"]<-"ACCESS-only" final_snv$clonality<-factor( final_snv$clonality, levels=c("CLONAL","SUBCLONAL","INDETERMINATE","ACCESS-only")) } if (!is.null(dmpsamples.ids)) { final_snv[which(final_snv$sample_id %in% dmpsamples.ids), "clonality"] <- "IMPACT" } final_sv<-subset(final,final$VarType=="SV") if (nrow(final_sv) > 0) { final_sv[, "clonality"] = "ACCESS-only" final_sv[which(final_sv$DMP=="Signed out"),"clonality"]="IMPACT" } xlimits<-c(min(final$collection_day), max(final$collection_day)) xbreaks<-final$collection_day xlabels <- final$timepoint
cols <- c("sample_id", "collection_day") if (!is.null(timepointcolors)) { cols <- c(cols, "timepoint") } unique(final[, cols]) %>% kable() %>% kable_styling()
treatment_table <- NULL fig1 <- NULL if (!is.null(timepointcolors) && nrow(final_snv) > 0) { treatment_table<-unique(final[,c("sample_id","collection_day","timepoint")]) fig1<-treatment_plot(treatment_table, xbreaks, xlimits, timepointcolors, ids=c("collection_day","sample_id")) fig2<-vaf_plot(final_snv, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=FALSE) subplot(fig1, fig2, nrows=2,shareX=TRUE, heights=c(0.1,0.9)) } else if (nrow(final_snv) > 0) { fig1<-vaf_plot(final_snv, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=FALSE) subplot(fig1, nrows=1) }
if (nrow(final_snv) > 0) { fig2<-vaf_plot(final_snv, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=TRUE) subplot(fig1,fig2,nrows=2,shareX=TRUE, heights=c(0.1,0.9)) }
print_table(subset(tabletoprint, tabletoprint$VarType=="SNV_INDEL"))
r toString(impact_sample_id)
to identify CLONAL, SUBCLONAL or INDETERMINATE mutations.r toString(impact_sample_id)
are labeled as ACCESS-only.```{asis echo=nrow(cna)>0}
```r fig2<-cna_plot(cna, xlimits, xbreaks, xlabels) subplot(fig1,fig2,nrows=2,shareX=TRUE, heights=c(0.2,0.7), which_layout=1)
```{asis echo=has_dmp}
```r sample="{{PATIENT_ID}}" clonal <- subset(final, final$clonality=="CLONAL") if (nrow(clonal)>0) { clonal$ncn <- 2 clonal[which((clonal$Chromosome=="X" | clonal$Chromosome=="Y") & clonal$sex == "M"),"ncn"] <- 1 clonal$adjustedvaf <- clonal$vaf*clonal$ncn / (clonal$expected_alt_copies + (clonal$ncn - clonal$tcn)*clonal$vaf) clonal$adjustedvaf <- round(clonal$adjustedvaf,4) clonaltoplot <- clonal if (length(unique(clonal$VarName))>1) { clonal_mean <- data.frame( clonal[,c("vaf", "adjustedvaf", "collection_day", "sample_id")] %>% group_by(sample_id, collection_day) %>% summarize_all(mean) %>% mutate(clonality="CLONAL", VarName="average") ) common_cols <- c("sample_id", "collection_day", "vaf", "adjustedvaf", "Hotspot", "clonality", "VarName") clonal_mean[,"Hotspot"] <- NA clonaltoplot<-rbind(clonal[, common_cols], clonal_mean[, common_cols]) clonaltoplot$vaf<-round(clonaltoplot$vaf,4) clonaltoplot$adjustedvaf<-round(clonaltoplot$adjustedvaf,4) } fig2<-vaf_plot(clonaltoplot, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=FALSE, cnadjusted = TRUE) subplot(fig1,fig2,nrows=2,shareX=TRUE, heights=c(0.2,0.8)) }
if(nrow(clonal)>0){ sample="{{PATIENT_ID}}" filename = paste(sample,"_clonal_adjvaf.csv",sep='') fwrite(clonal, file=paste(getwd(),filename,sep='/')) }
```{asis echo=has_dmp}
```r if(nrow(clonal)>0){ fig2<-vaf_plot(clonaltoplot, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=TRUE, cnadjusted = TRUE) subplot(fig1,fig2,nrows=2,shareX=TRUE, heights=c(0.2,0.8)) }
```{asis echo=has_dmp}
```r if (nrow(clonal) > 0) { print_table(clonal[,c("sample_id","collection_day","VarName","tcn","expected_alt_copies","ncn", "vaf","adjustedvaf")]) }
```{asis echo=has_dmp}
We adjust the variant allele fractions to account for the copy number alterations of the segments they are in. \ Since it is not easy to call copy number changes from ACCESS data, here we rely on the copy number alterations called by FACETS in the IMPACT sample.\
Note: This assumes that there are no changes to copy numbers of these segments between the IMPACT and ACCESS samples. \
$$VAF = \frac{T_{ALT}P}{T_{CN}P + N_{CN}*(1-P)}$$
where
By solving for this, we get: $$P = \frac{N_{CN}VAF}{T_{ALT} + (N_{CN} - T_{CN})VAF}$$
For ACCESS samples, we compute this value and call it adjusted VAF (VAF~adj~). ```
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.