So, let's see coocurence between position of TP53 mutation and mutation in other genes

library(knitr)
library(RTCGA)

calculate_cooccurence <- function(path = "gdac.broadinstitute.org_BRCA.Mutation_Packager_Calls.Level_3.2015020400.0.0/", 
                                  gene = "TP53", 
                                  position = "amino_acid_change_WU",
                                  onlyMissense = TRUE,
                                  minCount = 5) {

  allPatients <- lapply(list.files(path, pattern = ".maf"), function(p) {
      t <- read.table(paste0(path,"/",p), header=TRUE, fill=TRUE, sep="\t", quote="\"")
      colnames(t) <- tolower(colnames(t))
      cbind(t[,c("hugo_symbol", "variant_classification",position)], 
            patientID = strsplit(p, split=".", fixed=T)[[1]][1])
  })
  allPatientsDF <- do.call(rbind, allPatients)
  # only digits
  allPatientsDF[,position] <- gsub(as.character(allPatientsDF[,position]), 
                                   pattern="[^0-9]+", replacement="")

  # patients with change in gene
  ids <- unique(as.character(allPatientsDF[allPatientsDF$hugo_symbol == gene,"patientID"]))
  mutatedPatientsDF <- unique(allPatientsDF[allPatientsDF$patientID %in% ids,])

  toChange <- mutatedPatientsDF[mutatedPatientsDF$hugo_symbol == gene, ]
  if (nrow(toChange)==0) return(toChange)
  for (i in 1:nrow(toChange)) {
      mutatedPatientsDF[mutatedPatientsDF$patientID == toChange[i,"patientID"],position] = toChange[i,position]
  }

  tab <- table(factor(mutatedPatientsDF$hugo_symbol), factor(mutatedPatientsDF[,position]))
  tab <- tab[,tab[gene,] >= minCount, drop = FALSE]
  tab <- tab[rowSums(tab) > 0,,drop=FALSE]
  tab <- tab[order(-rowSums(tab)),,drop=FALSE]
  tab
}


dirs <- grep(list.dirs("/Users/pbiecek/_TCGA_wszystkie_raki_/data"), pattern="Mutation_Packager_Calls", value = TRUE)

for (di in dirs) {
    if (length(list.files(di)) > 50) {
        tab <- calculate_cooccurence(path = di, 
                                      gene = "TP53", 
                                      position = "start_position",
                                      minCount = 0)
        if (min(dim(tab))>1) {
            tab <- head(tab[order(-rowSums(tab)), order(-colSums(tab))], 20)
            tab <- tab[,1:min(15, ncol(tab))]
            cat(paste0("<h1>", di, "</h1>\n\n"))
            cat(kable(tab, format = "html"))
        }
    }
}


RTCGA/RTCGA documentation built on Nov. 1, 2022, 8:15 p.m.