knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE, fig.width=10, fig.height=6)
library(data.table)
library(tidyr)
library(dplyr)
library(reshape2)
library(ggplot2)
library(plotly)
library(scales)
library(kableExtra)
library(DT)
library(RColorBrewer)
theme_set(theme_bw())

show_text <- TRUE

dmp_id <- "{{DMP_ID}}"
has_dmp <- F
if (dmp_id != "") {
  has_dmp = T
}
if (dmp_id != "") {
  page_title = "{{PATIENT_ID}} ({{DMP_ID}})"
} else {
  page_title = "{{PATIENT_ID}}"
}

title: "r page_title"

if ("{{TUMOR_TYPE}}" != "") {
  asis_output("### {{TUMOR_TYPE}} \n")
}

sample="{{PATIENT_ID}}"
access_data_analysis_results<-"{{RESULTS}}"
access_data_analysis_cna_dir<-"{{CNA_RESULTS_DIR}}"
impact_sample_id<-"{{DMP_SAMPLE_ID}}"
impact_ccf_file<-"{{DMP_MAF_PATH}}"
metadata_file<-"{{METADATA}}"

## read metadata files ##
metadata <- fread(metadata_file)
metadata$collection_date <- as.Date(metadata$collection_date, format="%m/%d/%y")

## Possible timepoints in the treatment info file, and their associated colors

timepointcolors <- NULL

if ("timepoint" %in% colnames(metadata)) {

  metadata <- metadata[,c("sample_id", "sex", "collection_date", "timepoint")]

  timepoints <- unique(metadata$timepoint)
  colorcount <- length(timepoints)
  getPalette = colorRampPalette(brewer.pal(12, "Paired"))
  timepointcolors <- getPalette(colorcount)
  names(timepointcolors) <- timepoints
} else {
  metadata <- metadata[,c("sample_id", "sex", "collection_date")]
}
# Function to get variant colors #
gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

# Function to generate VAF plots
vaf_plot <- function(variant_table, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=FALSE, cnadjusted=FALSE){
  tmp<-variant_table
  tmp$VarName<-factor(tmp$VarName,levels=names(varcolors))
  if(log == FALSE){
    if(cnadjusted==FALSE){
      fig<-ggplot(tmp, aes(x=collection_date, y=vaf, col=VarName, hotspot=Hotspot, sampleid=sample_id)) +
        scale_y_continuous(labels = scales::percent_format(accuracy = yaccuracy)) +
        ylab("VAF")
      } else{
        fig<-ggplot(tmp, aes(x=collection_date, y=adjustedvaf, vaf=vaf, col=VarName, hotspot=Hotspot, sampleid=sample_id)) +
          scale_y_continuous(labels = scales::percent_format(accuracy = yaccuracy)) +
          ylab("Adjusted VAF")
      }
  } else{
    if(cnadjusted==FALSE){
      tmp$vafforlog = tmp$vaf + 0.0001
      fig<-ggplot(tmp, aes(x=collection_date, y=vafforlog, vaf=vaf, col=VarName, hotspot=Hotspot, sampleid=sample_id)) +
        scale_y_log10(labels = scales::percent_format(accuracy = yaccuracy)) +
        ylab("VAF")
    } else{
      tmp$vafforlog = tmp$adjustedvaf + 0.0001
      fig<-ggplot(tmp, aes(x=collection_date, y=vafforlog, adjustedvaf=adjustedvaf, vaf=vaf, col=VarName, hotspot=Hotspot, sampleid=sample_id)) +
        scale_y_log10(labels = scales::percent_format(accuracy = yaccuracy)) +
        ylab("VAF")
    }
  }

  fig<-fig + geom_point() +
    geom_line(aes(group=VarName)) +
    facet_grid(clonality~.) +
    theme(
      legend.title=element_blank(),
      strip.background = element_blank(),
      legend.text=element_text(size=6),
      legend.spacing.y = unit(0.5, "cm"),
      legend.key.height = unit(2, "cm"),
      axis.text.x = element_text(angle=45)) +
    scale_color_manual(values = varcolors) +
    scale_x_date(breaks=xbreaks, limits=xlimits, labels=xlabels) +
    labs(y="VAF") +
    xlab("")
  if(cnadjusted==FALSE){
    fig<-ggplotly(fig, tooltip=c("collection_date","vaf","VarName","Hotspot","sample_id"))
  } else{
    fig<-ggplotly(fig, tooltip=c("collection_date","vaf","adjustedvaf","VarName","Hotspot","sample_id"))
  }
  fig<-ggplotly(fig)
  return(fig)
}

# Function to generate treatment plot
treatment_plot <- function(treatment_table, xbreaks, colors, ids=c("collection_date","sample_id")){
  # Any column other than the ids columns will be plotted
  tmp<-melt(treatment_table, id=ids)
  tmp$value<-factor(tmp$value, levels=names(colors))
  fig<-ggplot(tmp, aes(x=collection_date, y=variable, fill=value)) +
    geom_tile() +
    theme(panel.grid=element_blank(), legend.title = element_blank()) +
    scale_x_date(breaks=xbreaks) +
    scale_fill_manual(values=colors)
  fig<-ggplotly(fig)
  fig<-style(fig, showlegend=FALSE)
  return(fig)
}

cna_plot<-function(cna, xlimits, xbreaks){
  cna$fc<-round(cna$fc,2)
  fig<-ggplot(cna, aes(x=collection_date, sampleid=sample_id, gene=Hugo_Symbol)) +
    geom_bar(aes(y=1, fill=fc), stat="identity",width=10) +
    scale_fill_gradient2(low="white",mid="red",high="red3",midpoint=1.5, limits=c(0,max(cna$fc))) +
    theme(legend.title=element_blank(), panel.grid.major.y=element_blank(), strip.background = element_blank(), axis.text.x=element_text(angle=90), axis.ticks.y=element_blank(), axis.text.y=element_blank()) +
    scale_x_date(breaks=xbreaks, limits=xlimits) +
    xlab("") + facet_grid(Hugo_Symbol~.)
  fig<-ggplotly(fig, tooltip=c("collection_date","CNA","sample_id","fc","Hugo_Symbol"))
  return(fig)
}

print_table<-function(table){
  datatable(table, rownames=FALSE, escape=FALSE, options=list(scrollX=T, autoWidth = TRUE))
}
## read variant files ##
accesstab<-fread(access_data_analysis_results, na.strings=c("NA",""))

dmp <- data.frame()
if (impact_ccf_file != "") {
  dmp<-fread(impact_ccf_file)[,c("Hugo_Symbol", "Chromosome", "Start_Position", "Reference_Allele", "Tumor_Seq_Allele2", "tcn", "expected_alt_copies", "clonality")]
  dmp$Chromosome<-as.character(dmp$Chromosome)
}

## ACCESS variants ##
tab1<-accesstab[,c("Hugo_Symbol","Chromosome", "Start_Position", "Variant_Classification", "HGVSp_Short", "Reference_Allele", "Tumor_Seq_Allele2", "Hotspot", "DMP", "CH")]
tab2<-NULL
tab3 <- NULL
dmpsamples <- NULL
dmpsamples.ids <- NULL

if ('{{PLOT_IMPACT}}') {
  tab2<-accesstab %>% select(matches("(___total|___DMP_Tumor)$"))
  colnames(tab2)<-gsub("___total","",colnames(tab2))
  colnames(tab2)<-gsub("___DMP_Tumor","",colnames(tab2))
  dmpsamples.ids <- accesstab %>% select(matches("___DMP_Tumor$"))
  colnames(dmpsamples.ids)<-gsub("___DMP_Tumor","",colnames(dmpsamples.ids))
  dmpsamples.ids <- colnames(dmpsamples.ids)
} else {
  tab2<-accesstab %>% select(matches("___total$"))
  colnames(tab2)<-gsub("___total","",colnames(tab2))
  tab3<-accesstab %>% select(matches("___DMP_(Tumor|Normal)$"))
  colnames(tab3)<-gsub("___DMP_Tumor","",colnames(tab3))
  colnames(tab3)<-gsub("___DMP_Normal","",colnames(tab3))
  dmpsamples<-colnames(tab3)
}
cmo_sample_id_order<-metadata$sample_id[order(metadata$collection_date)]
cmo_sample_id_order<-cmo_sample_id_order[cmo_sample_id_order %in% colnames(tab2)]
tab2<-data.frame(tab2,check.names=FALSE)[,cmo_sample_id_order, drop=FALSE]
tab<-cbind(tab1,tab2)
if (!is.null(tab3)) {
  tab<-cbind(tab,tab3)
}
tab$VarType <- "SNV_INDEL"
tab[grep("__",tab$Hugo_Symbol),"VarType"]<-"SV"
tab$Hugo_Symbol<- gsub("__","-",tab$Hugo_Symbol)
tab$Chromosome<-as.character(tab$Chromosome)
if (nrow(dmp) > 0) {
  tabletoprint<-merge(tab, dmp, all.x=TRUE)
} else {
  tabletoprint <- tab
}
## read CNA files ##
all.cna.files <- list.files(path = access_data_analysis_cna_dir, pattern = sample, full.names=TRUE)
cna<-do.call('rbind', lapply(all.cna.files,fread)) %>% rename(sample_id=Tumor_Sample_Barcode)
if( nrow(cna) > 0){
  cna<-merge(cna, metadata,by=c("sample_id"))
  cna$collection_date<-as.Date(cna$collection_date, format="%m/%d/%y")
  cna$fc<-as.numeric(cna$fc)
}
## Get VAFs of variants ##
temp <- melt(tab, id=c("Hugo_Symbol","Chromosome", "Start_Position", "Variant_Classification", "HGVSp_Short", "Reference_Allele", "Tumor_Seq_Allele2", "Hotspot", "DMP", "CH", "VarType", dmpsamples)) %>% separate(col=value, into=c("num1", "num2", "num3"))
temp$vaf = as.numeric(temp$num1)/as.numeric(temp$num2)
temp$total=temp$num2
tempsv<-subset(temp,temp$VarType=="SV")
temp[which(temp$VarType=="SV"), "vaf"] = (as.numeric(tempsv$num1)+as.numeric(tempsv$num2))/as.numeric(tempsv$num3)
temp[which(temp$VarType=="SV"), "total"]=tempsv$num3
temp$Chromosome <- as.character(temp$Chromosome)
temp$vaf=temp$vaf
if (nrow(dmp) > 0) {
  # merge with DMP
  combined <- merge(temp, dmp, all.x=TRUE) %>% rename(sample_id = variable)
} else {
  combined <- temp %>% rename(sample_id = variable)
  combined[, "clonality"] <- NA
}
final <- merge(combined, metadata, by=c("sample_id"), all.x=TRUE)
final$collection_date <- as.Date(final$collection_date, format="%m/%d/%y")
# Create a variant name column
final$VarName=paste(final$Hugo_Symbol, final$HGVSp_Short)
final[is.na(final$HGVSp_Short), "VarName"] <- paste0(final$Hugo_Symbol, " ", final$Chromosome, ":", final$Start_Position, " ", final$Reference_Allele, ">", final$Tumor_Seq_Allele2)[is.na(final$HGVSp_Short)]
final[is.na(final$HGVSp_Short) & nchar(final$Reference_Allele)>5,"VarName"] <- paste0(final$Hugo_Symbol, " ", final$Chromosome, ":", final$Start_Position, " ", substr(final$Reference_Allele,1,3),"..", ">", final$Tumor_Seq_Allele2)[is.na(final$HGVSp_Short)]
final[is.na(final$HGVSp_Short) & nchar(final$Tumor_Seq_Allele2)>5,"VarName"] <- paste0(final$Hugo_Symbol, " ", final$Chromosome, ":", final$Start_Position, " ", final$Reference_Allele,1,3, ">", substr(final$Tumor_Seq_Allele2,1,3),"..")[is.na(final$HGVSp_Short)]
final[which(final$VarType=="SV"),"VarName"]<-final$Hugo_Symbol[which(final$VarType=="SV")]
# Select variant colors
final$vaf<-round(final$vaf,4)
final<-final[!is.na(final$vaf),]
final$total<-as.numeric(final$total)
final<-subset(final, final$total>=100)
vafsum<-aggregate(final$vaf,by=list(VarName=final$VarName),FUN=sum)
final<-subset(final, !final$VarName %in% vafsum[which(vafsum$x==0),"VarName"])
varcolors<-c(gg_color_hue(length(unique(final$VarName))),"black")
names(varcolors)<-c(unique(final$VarName),"average")
tabletoprint<-unique(merge(unique(final[,c("Hugo_Symbol","Chromosome","Start_Position")]), tabletoprint, by=c("Hugo_Symbol","Chromosome","Start_Position")))
final_snv<-subset(final,final$VarType=="SNV_INDEL")
if ('{{COMBINE_ACCESS}}') {
  final_snv$clonality <- "MSK-ACCESS"
} else {
  final_snv[is.na(final_snv$clonality),"clonality"]<-"ACCESS-only"
  final_snv$clonality<-factor(
    final_snv$clonality, levels=c("CLONAL","SUBCLONAL","INDETERMINATE","ACCESS-only"))
}
if (!is.null(dmpsamples.ids)) {
  final_snv[which(final_snv$sample_id %in% dmpsamples.ids), "clonality"] <-  "IMPACT"
}
final_sv<-subset(final,final$VarType=="SV")
if (nrow(final_sv) > 0) {
  final_sv[, "clonality"] = "ACCESS-only"
  final_sv[which(final_sv$DMP=="Signed out"),"clonality"]="IMPACT"
}
xlimits<-c(min(final$collection_date)-10, max(final$collection_date)+10)
xbreaks<-final$collection_date
xlabels <- final$timepoint

Sample information

cols <- c("sample_id", "collection_date")
if (!is.null(timepointcolors)) {
  cols <- c(cols, "timepoint")
}

unique(final[, cols]) %>% kable() %>% kable_styling()

SNVs and INDELs{.tabset}

Linear

treatment_table <- NULL
fig1 <- NULL

if (!is.null(timepointcolors) && nrow(final_snv) > 0) {
  treatment_table<-unique(final[,c("sample_id","collection_date","timepoint")])
  fig1<-treatment_plot(treatment_table, xbreaks, timepointcolors, ids=c("collection_date","sample_id"))
  fig2<-vaf_plot(final_snv, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=FALSE)
  subplot(fig1, fig2, nrows=2,shareX=TRUE, heights=c(0.1,0.9))
} else if (nrow(final_snv) > 0) {
  fig1<-vaf_plot(final_snv, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=FALSE)
  subplot(fig1, nrows=1)
}

Log

if (nrow(final_snv) > 0) {
  fig2<-vaf_plot(final_snv, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=TRUE)
  subplot(fig1,fig2,nrows=2,shareX=TRUE, heights=c(0.1,0.9))
}

Table

print_table(subset(tabletoprint, tabletoprint$VarType=="SNV_INDEL"))

Description

```{asis echo=nrow(cna)>0}

Copy number alterations

```r
fig2<-cna_plot(cna, xlimits, xbreaks)
subplot(fig1,fig2,nrows=2,shareX=TRUE, heights=c(0.2,0.7), which_layout=1)

```{asis echo=has_dmp}

Clonal SNVs/INDELs VAF adjusted for copy number {.tabset}

Linear

```r
sample="{{PATIENT_ID}}"
filename = paste(sample,"_clona.csv")
path = getwd()
file_path = file.path(path,filename)
clonal <- subset(final, final$clonality=="CLONAL")

if (nrow(clonal)>0) {
  clonal$ncn <- 2
  clonal[which((clonal$Chromosome=="X" | clonal$Chromosome=="Y") & clonal$sex == "M"),"ncn"] <- 1
  clonal$adjustedvaf <- clonal$vaf*clonal$ncn / (clonal$expected_alt_copies + (clonal$ncn - clonal$tcn)*clonal$vaf)
  clonal$adjustedvaf <- round(clonal$adjustedvaf,4)
  clonaltoplot <- clonal

  if (length(unique(clonal$VarName))>1) {

    write.csv(clonal, file_path)

    clonal_mean <- data.frame(
      clonal[,c("vaf", "adjustedvaf", "collection_date", "sample_id")] %>%
      group_by(sample_id, collection_date) %>%
      summarize_all(mean) %>%
      mutate(clonality="CLONAL", VarName="average")
    )

    common_cols <- c("sample_id", "collection_date", "vaf", "adjustedvaf", "Hotspot", "clonality", "VarName")
    clonal_mean[,"Hotspot"] <- NA

    clonaltoplot<-rbind(clonal[, common_cols], clonal_mean[, common_cols])
    clonaltoplot$vaf<-round(clonaltoplot$vaf,4)
    clonaltoplot$adjustedvaf<-round(clonaltoplot$adjustedvaf,4)
  }

  fig2<-vaf_plot(clonaltoplot, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=FALSE, cnadjusted = TRUE)
  subplot(fig1,fig2,nrows=2,shareX=TRUE, heights=c(0.2,0.8))
}

```{asis echo=has_dmp}

Log

```r
if(nrow(clonal)>0){
  fig2<-vaf_plot(clonaltoplot, xlimits, xbreaks, xlabels, varcolors, yaccuracy=0.01, log=TRUE, cnadjusted = TRUE)
  subplot(fig1,fig2,nrows=2,shareX=TRUE, heights=c(0.2,0.8))
}

```{asis echo=has_dmp}

Table

```r
if (nrow(clonal) > 0) {
  print_table(clonal[,c("sample_id","collection_date","VarName","tcn","expected_alt_copies","ncn", "vaf","adjustedvaf")])
}

```{asis echo=has_dmp}

Description

We adjust the variant allele fractions to account for the copy number alterations of the segments they are in. \ Since it is not easy to call copy number changes from ACCESS data, here we rely on the copy number alterations called by FACETS in the IMPACT sample r toString(impact_sample_id).\ Note: This assumes that there are no changes to copy numbers of these segments between the IMPACT and ACCESS samples. \

$$VAF = \frac{T_{ALT}P}{T_{CN}P + N_{CN}*(1-P)}$$

where

By solving for this, we get: $$P = \frac{N_{CN}VAF}{T_{ALT} + (N_{CN} - T_{CN})VAF}$$

For ACCESS samples, we compute this value and call it adjusted VAF (VAF~adj~). ```



msk-access/access_data_analysis documentation built on Nov. 13, 2023, 12:43 p.m.