R/oncoprintISB.R
In BiocOncoTK: Bioconductor components for general cancer genomics

Documented in oncoPrintISB TcgaMutCounts TcgaNIndWithAnyMut

Chromosome <- HGNC_gene_symbol <- Hugo_Symbol <- NCBI_Build <- 
   ParticipantBarcode <- Reference_Allele <- Start_Position <- Study <- 
   Subst <- Tumor_SampleTypeLetterCode <- Tumor_Seq_Allele1 <- 
   Tumor_Seq_Allele2 <- Variant_Classification <- Variant_Type <- 
   cDNA_Change <- cbounds <- chr <- chrmid <- 
   genome <- ml10dbp <- normalized_count <- 
   reference_allele <- seqinfo <- seqlengths <- 
   sym <- totalgd <- tumor_seq_allele1 <- 
   tumor_seq_allele2 <- yloc <- NULL

# define gene sets from cbioPortal

glioRTK = c(
"EGFR","ERBB2","PDGFRA","MET","KRAS","NRAS","HRAS","NF1","SPRY2","FOXO1","FOXO3","AKT1","AKT2","AKT3","PIK3R1","PIK3CA","PTEN"
)

pi3k = c(
"PIK3CA","PIK3R1","PIK3R2","PTEN","PDPK1","AKT1","AKT2","FOXO1","FOXO3","MTOR","RICTOR","TSC1","TSC2","RHEB","AKT1S1","RPTOR","MLST8"
)

ovtumsupp=c("DIRAS3","RASSF1","DLEC1","SPARC","DAB2","PLAGL1","RPS6KA2","PTEN","OPCML","BRCA2","ARL11","WWOX","TP53","DPH1","BRCA1","PEG3")

rasraf = c("KRAS","HRAS","BRAF","RAF1","MAP3K1","MAP3K2","MAP3K3","MAP3K4","MAP3K5","MAP2K1","MAP2K2","MAP2K3","MAP2K4","MAP2K5","MAPK1","MAPK3","MAPK4","MAPK6","MAPK7","MAPK8","MAPK9","MAPK12","MAPK14","DAB2","RASSF1","RAB25")

someSets = list(rasraf=rasraf, glioRTK=glioRTK,
   pi3k=pi3k,
   ovtumsupp = ovtumsupp)

attr(someSets, "fullTitle") = list(
   glioRTK="Glioblastoma: RTK/Ras/PI3K/AKT Signaling (17 genes)",
   pi3k="General: PI3K-AKT-mTOR signaling (17 genes)",
   ovtumsupp = "Ovarian Cancer: Putative tumor-suppressor genes in epithelial ovarian cancer (16 genes)",
   rasraf = "General: Ras-Raf-MEK-Erk/JNK signaling (26 genes)")

names(someSets) = unlist(attr(someSets, "fullTitle")[names(someSets)])
   
# some utilities for ISB bigquery content

isbTables = function(bq) {
# bq is a src_bigquery instance
  src_tbls(bq)
}

studiesInTable = function(bq, tabletag = "Somatic_Mutation_calls") {
  select = dplyr::select
  bq %>% tbl(tabletag) %>% select(Study) %>% group_by(Study) %>% summarise(nrec=n())
}

isbVarsInTable = function(bq, tabletag = "Somatic_Mutation_calls") {
  bq %>% tbl(tabletag) %>% tbl_vars()
}

# prepare query for data frame with table of mutation counts per gene symbol
# use query_exec( querystring, projectstring ) to retrieve table
.mutq <- function(studytag = "BRCA", limit=NULL, db="isb-cgc:tcga_201607_beta") {
  ans = sprintf("SELECT COUNT (DISTINCT(pb)) AS sampleN, hugosymbols AS gene FROM ( SELECT ParticipantBarcode AS pb, Hugo_Symbol AS hugosymbols FROM [%s.Somatic_Mutation_calls] WHERE Study = '%s' GROUP BY hugosymbols, pb) GROUP BY gene ORDER BY sampleN DESC", db, studytag)
  if (!is.null(limit)) ans = paste0(ans, paste0(" LIMIT ", limit))
  ans
}

#' obtain data frame with counts of mutation per gene symbol for selected tumor type
#' @import bigrquery
#' @import magrittr
#' @param tumor character(1) defaults to 'BRCA'
#' @param limit numeric(1) defaults to NULL, appended as limit to number of records returned if non-null
#' @param db character(1) BigQuery database name
#' @param project character(1) project code
#' @note This function returns overall mutation count, and many individuals have multiple
#' mutations recorded per gene.
#' @return table as returned by bigrquery::query_exec
#' @examples
#' if (interactive()) {
#' requireNamespace("bigrquery")
#' tt = TcgaMutCounts("BRCA", project="cgc-05-0009") # substitute your project name
#' head(tt)
#' }  # need authentication
#' @export
TcgaMutCounts = function(tumor, limit=NULL, db="isb-cgc:tcga_201607_beta", project) {
 requireNamespace("bigrquery")
 qq = .mutq(studytag = tumor, limit=limit, db=db)
 bigrquery::query_exec(qq, project=project)
}

.genesWmutInStudyDFq = function(studytag="OV", limit=NULL, db="isb-cgc:tcga_201607_beta") {
 # return data frame with columns sampleN, gene
 .mutq(studytag, limit, db=db)
}

.participantBarcodesInTableInStudyq = function(tabletag = "Somatic_Mutation_calls", studytag="OV", limit=NULL,
  db="isb-cgc:tcga_201607_beta") {
#  sub1 = sub( "%%STUDYTAG%%", paste0("'", studytag, "'"), "SELECT ParticipantBarcode FROM [isb-cgc:tcga_201510_alpha.%%TABLETAG%%] WHERE Study = %%STUDYTAG%% ")
  sub1 = sprintf("SELECT UNIQUE( ParticipantBarcode ) FROM [%s.%s] WHERE Study = '%s' ", db, tabletag, studytag)
  ans = sub("%%TABLETAG%%", tabletag, sub1 )
  if (!is.null(limit)) ans = paste0(ans, paste0(" LIMIT ", limit))
  ans
}
#' Give count of individuals with a mutation recorded for selected tumor
#' @importFrom bigrquery query_exec
#' @param tumor character(1) defaults to 'BRCA'
#' @param limit numeric(1) defaults to NULL, appended as limit to number of records returned if non-null
#' @param db character(1) BigQuery database name
#' @param project character(1) project code
#' @return numeric(1)
#' @examples
#' if (interactive()) TcgaNIndWithAnyMut(project="cgc-05-0009")
#' @export
TcgaNIndWithAnyMut = function(tumor="BRCA", limit=NULL, db="isb-cgc:tcga_201607_beta", project) {
 qq = .participantBarcodesInTableInStudyq(tabletag = "Somatic_Mutation_calls",
           studytag = tumor, limit=limit, db=db)
 nrow(query_exec(qq, project=project))
}


genesWmutInStudyDF = function(project, studytag="OV", limit=NULL, db="isb-cgc:tcga_201607_beta")
  query_exec( .genesWmutInStudyDFq(studytag="OV", limit=NULL, db=db), project = project )

mutsInGeneInStudyDF = function(bq, gene = "KRAS", studytag = "LUAD") {
  select = dplyr::select
  bq %>% tbl("Somatic_Mutation_calls") %>% select(ParticipantBarcode, Hugo_Symbol,
         Variant_Classification, Study) %>% filter(Study == studytag) %>% 
             filter(Hugo_Symbol == gene) %>% as.data.frame()
  }
mutsInGenesInStudyDF = function(bq, genevec = c("KRAS", "HRAS"), studytag = "LUAD") {
  select = dplyr::select
  bq %>% tbl("Somatic_Mutation_calls") %>% select(ParticipantBarcode, Hugo_Symbol,
         Variant_Classification, Study) %>% filter(Study == studytag) %>% 
             filter(Hugo_Symbol %in% genevec) %>% as.data.frame()
  }
 
exprsByMutation = function( bq, gene = "GATA3", studytag = "BRCA", project, limit=NULL,
   dup.expr.action=function(x) 2^mean(log2(x)+1,na.rm=TRUE),
   substNAMut = "None" ) {
#
#  need to merge mutation data with expression data by participant bar code
#  alternate dup.expr.action could be to filter among the mutations reported
#
  select = dplyr::select
  if (missing(project)) stop("project must be supplied")
  ginstud = genesWmutInStudyDF( project, studytag=studytag, limit=limit ) # uses direct SQL
  stopifnot(gene %in% ginstud$gene)
  exprdata = bq %>% tbl("mRNA_UNC_HiSeq_RSEM") %>% select(ParticipantBarcode, HGNC_gene_symbol,
         normalized_count, Study) %>% filter(Study == studytag) %>% 
             filter(HGNC_gene_symbol == gene) %>% as.data.frame()
  mutdata = bq %>% tbl("Somatic_Mutation_calls") %>% select(ParticipantBarcode, Hugo_Symbol,
         Variant_Classification, Study) %>% filter(Study == studytag) %>% 
             filter(Hugo_Symbol == gene) %>% as.data.frame()
  mrg = merge(exprdata, mutdata, all.x=TRUE, by="ParticipantBarcode")
  hasdup = sum(duplicated(mrg$ParticipantBarcode))
  if (any(lkna <- is.na(mrg$Variant_Classification)))
     mrg$Variant_Classification[which(lkna)] = substNAMut
  if (hasdup==0) return(mrg)
  smrg = split(mrg, mrg$ParticipantBarcode)
  hasdup = which(vapply(smrg,nrow,numeric(length(smrg)))>0)
  smrg[hasdup] = lapply(smrg[hasdup], function(x) { 
             tmp = dup.expr.action(x$normalized_count)
             fixd = x[1,,drop=FALSE]
             fixd$normalized_count = tmp
             fixd
             })
  do.call(rbind,smrg)
}

# very ad hoc support for oncoprint app

col = c("MIS"="blue", FRA="#008000", SPL="gold", "NONS"="red", "OTH"="pink")

alter_fun = list(
    background = function(x, y, w, h) {
        grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "#CCCCCC", col = NA))
    },
    MIS = function(x, y, w, h) {
        grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "blue", col = NA))
    },
    NONS = function(x, y, w, h) {
        grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "red", col = NA))
    },
    FRA = function(x, y, w, h) {
        grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "#008000", col = NA))
    },
    OTH = function(x, y, w, h) {
        grid.rect(x, y, w-unit(0.5, "mm"), h*0.33, gp = gpar(fill = "orange", col = NA))
    },
    SPL = function(x, y, w, h) {
        grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "gold", col = NA))
    }
)

# completely ad hoc management of mutation types here, must be
# pushed out to parameters

geneVecToOPInputByStudy = function(bq, genevec, studytag="LUAD") {
 jjj = mutsInGenesInStudyDF(bq, studytag=studytag, genevec=genevec)
 jjjs = split(jjj, jjj$Vari)
 jjjss = lapply(jjjs, function(x) split(x, x$Hugo_Symbol))
 types = names(jjjs)
 types= c("3'UTR", "Frame_Shift_Del", "Frame_Shift_Ins", "In_Frame_Del", 
 "In_Frame_Ins", "Intron", "Missense_Mutation", "Nonsense_Mutation", 
   "Silent", "Splice_Site")
 ttags = c("OTH;", "FRA;", "FRA;", "OTH;", "OTH;", "OTH;", "MIS;", "NONS;", "OTH;", "SPL;")
 names(ttags) = types
 targ = matrix("", nrow=length(unique(jjj$Hugo_Symbol)), ncol=length(unique(jjj$Particip)))
 rownames(targ) = unique(jjj$Hugo_Symbol)
 colnames(targ) = unique(jjj$Particip)
 for (i in names(jjjs)) {  # types
   for (j in names(jjjss[[i]])) { # genes 
       ini = targ[j, jjjss[[i]][[j]]$ParticipantBarcode] 
       targ[j, jjjss[[i]][[j]]$ParticipantBarcode] = paste(ini, rep(ttags[i], length(ini)), sep="")
       }
 }
 targ
}


#' interactive interface to ComplexHeatmap oncoPrint with inputs from ISB Cancer Genomics Cloud BigQuery back end
#' @import ComplexHeatmap
#' @import grid
#' @importFrom methods is
#' @param bq an instance of \code{\link[bigrquery:DBI]{BigQueryConnection-class}} authenticated for ISB Cancer Genomics Cloud access
#' @note This function will start a shiny app and will generate queries to
#' Google BigQuery tables representing TCGA.
#' @return only used for side effect of running shiny app
#' @examples
#' if (interactive()) {
#'  bcode = Sys.getenv("CGC_BILLING")
#'  if (nchar(bcode)>0) {
#'   con <- DBI::dbConnect(bigrquery::bigquery(), project = "isb-cgc", 
#'        dataset = "tcga_201607_beta", billing = bcode)
#'   oncoPrintISB(con)
#'  }
#' }
#' @export
oncoPrintISB = function(bq) {
  stopifnot(is(bq, "BigQueryConnection"))
  if (!requireNamespace("bigrquery")) stop("install bigrquery to run oncoPrintISB")
  if (!requireNamespace("shiny")) stop("install shiny to run oncoPrintISB")
  if (!requireNamespace("dbplyr")) stop("install dbplyr to run oncoPrintISB")
  if (!requireNamespace("magrittr")) stop("install magrittr to run oncoPrintISB")
  gnsets = someSets
  studies = sort((studiesInTable(bq) %>% as.data.frame())[,1])
  ui = shiny::fluidPage(
    shiny::titlePanel("TCGA/ISB/bigQuery interface"),
    shiny::sidebarPanel(
     shiny::fluidRow(
      shiny::selectInput("study", "Tumor", choices=studies, selected="LUAD", selectize=TRUE),
      shiny::selectInput("geneset", "Gene set", choices=names(gnsets), selected=names(gnsets)[1], selectize=TRUE)
      ), width=3
     ), # end sidebar
    shiny::mainPanel(
     shiny::tabsetPanel(
      shiny::tabPanel("oncoPrint", shiny::textOutput("nind"), shiny::plotOutput("onco")),
      shiny::tabPanel("genes", shiny::dataTableOutput("setelem"))
      )
    )
   )
  server = function(input, output) {
   output$nind = shiny::renderText({
      nind = TcgaNIndWithAnyMut( input$study, project = bq@billing )
      sprintf("Using data on %d individuals with any mutation for %s", nind, input$study)
      })
   output$onco = shiny::renderPlot({
      curstudy = input$study
      curset = gnsets[[input$geneset]]
      op = geneVecToOPInputByStudy(bq, curset, studytag=curstudy) 
      if (any(is.na(op))) op[is.na(op)] = "OTH;"
      if (any(op=="NA")) op[op=="NA"] = "OTH;" # Aug 2017 still yields NAFRA and NA
#   print(table(op))
      op = gsub("NA", "", op)
      draw(oncoPrint(op, get_type = function(x) strsplit(x, ";")[[1]], alter_fun=alter_fun, col=col))
      #plot(1,1)
   })
   output$setelem = shiny::renderDataTable({
      curset = gnsets[[input$geneset]]
      mkln = function(x) gsub("%%GENE%%", x, "<a href='http://www.genecards.org/cgi-bin/carddisp.pl?gene=%%GENE%%&keywords=%%GENE%%'>%%GENE%%</a>")
      lns = vapply(curset, mkln, character(length(curset)))
#      anc = function(x) paste0("<a href=\"", x, "\">", x, "</a>")
#      lns = sapply(lns, anc, character(length(lns)))
      data.frame(HGNC=lns) #, card=lns)
      }, escape=FALSE)
   }
  shiny::shinyApp(ui, server)
}

Any scripts or data that you put into this service are public.

BiocOncoTK documentation built on Nov. 8, 2020, 6:03 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

BiocOncoTK
Bioconductor components for general cancer genomics

R/oncoprintISB.R
In BiocOncoTK: Bioconductor components for general cancer genomics

Defines functions oncoPrintISB geneVecToOPInputByStudy exprsByMutation mutsInGenesInStudyDF mutsInGeneInStudyDF genesWmutInStudyDF TcgaNIndWithAnyMut .participantBarcodesInTableInStudyq .genesWmutInStudyDFq TcgaMutCounts .mutq isbVarsInTable studiesInTable isbTables

Documented in oncoPrintISB TcgaMutCounts TcgaNIndWithAnyMut

Try the BiocOncoTK package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BiocOncoTK Bioconductor components for general cancer genomics

R/oncoprintISB.R In BiocOncoTK: Bioconductor components for general cancer genomics

Defines functions oncoPrintISB geneVecToOPInputByStudy exprsByMutation mutsInGenesInStudyDF mutsInGeneInStudyDF genesWmutInStudyDF TcgaNIndWithAnyMut .participantBarcodesInTableInStudyq .genesWmutInStudyDFq TcgaMutCounts .mutq isbVarsInTable studiesInTable isbTables

Documented in oncoPrintISB TcgaMutCounts TcgaNIndWithAnyMut

Try the BiocOncoTK package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BiocOncoTK
Bioconductor components for general cancer genomics

R/oncoprintISB.R
In BiocOncoTK: Bioconductor components for general cancer genomics