# define gene sets from cbioPortal
glioRTK = c(
"EGFR","ERBB2","PDGFRA","MET","KRAS","NRAS","HRAS","NF1","SPRY2","FOXO1","FOXO3","AKT1","AKT2","AKT3","PIK3R1","PIK3CA","PTEN"
)
pi3k = c(
"PIK3CA","PIK3R1","PIK3R2","PTEN","PDPK1","AKT1","AKT2","FOXO1","FOXO3","MTOR","RICTOR","TSC1","TSC2","RHEB","AKT1S1","RPTOR","MLST8"
)
ovtumsupp=c("DIRAS3","RASSF1","DLEC1","SPARC","DAB2","PLAGL1","RPS6KA2","PTEN","OPCML","BRCA2","ARL11","WWOX","TP53","DPH1","BRCA1","PEG3")
rasraf = c("KRAS","HRAS","BRAF","RAF1","MAP3K1","MAP3K2","MAP3K3","MAP3K4","MAP3K5","MAP2K1","MAP2K2","MAP2K3","MAP2K4","MAP2K5","MAPK1","MAPK3","MAPK4","MAPK6","MAPK7","MAPK8","MAPK9","MAPK12","MAPK14","DAB2","RASSF1","RAB25")
someSets = list(rasraf=rasraf, glioRTK=glioRTK,
pi3k=pi3k,
ovtumsupp = ovtumsupp)
attr(someSets, "fullTitle") = list(
glioRTK="Glioblastoma: RTK/Ras/PI3K/AKT Signaling (17 genes)",
pi3k="General: PI3K-AKT-mTOR signaling (17 genes)",
ovtumsupp = "Ovarian Cancer: Putative tumor-suppressor genes in epithelial ovarian cancer (16 genes)",
rasraf = "General: Ras-Raf-MEK-Erk/JNK signaling (26 genes)")
names(someSets) = unlist(attr(someSets, "fullTitle")[names(someSets)])
# some utilities for ISB bigquery content
isbTables = function(bq) {
# bq is a src_bigquery instance
src_tbls(bq)
}
studiesInTable = function(bq, tabletag = "Somatic_Mutation_calls") {
select = dplyr::select
bq %>% tbl(tabletag) %>% select(Study) %>% group_by(Study) %>% summarise(nrec=n())
}
isbVarsInTable = function(bq, tabletag = "Somatic_Mutation_calls") {
bq %>% tbl(tabletag) %>% tbl_vars()
}
.mutq <- function(studytag = "BRCA", limit=NULL) {
ans = sub( "%%STUDYTAG%%", paste0("'", studytag, "'"), "SELECT COUNT (DISTINCT(pb)) AS sampleN, hugosymbols AS gene FROM ( SELECT ParticipantBarcode AS pb, Hugo_Symbol AS hugosymbols FROM [isb-cgc:tcga_201510_alpha.Somatic_Mutation_calls] WHERE Study = %%STUDYTAG%% GROUP BY hugosymbols, pb) GROUP BY gene ORDER BY sampleN DESC")
if (!is.null(limit)) ans = paste0(ans, paste0(" LIMIT ", limit))
ans
}
.genesWmutInStudyDFq = function(studytag="OV", limit=NULL) {
# return data frame with columns sampleN, gene
.mutq(studytag, limit)
}
.participantBarcodesInTableInStudyq = function(tabletag = "Somatic_Mutation_calls", studytag="OV", limit=NULL) {
sub1 = sub( "%%STUDYTAG%%", paste0("'", studytag, "'"), "SELECT ParticipantBarcode FROM [isb-cgc:tcga_201510_alpha.%%TABLETAG%%] WHERE Study = %%STUDYTAG%% ")
ans = sub("%%TABLETAG%%", tabletag, sub1 )
if (!is.null(limit)) ans = paste0(ans, paste0(" LIMIT ", limit))
ans
}
genesWmutInStudyDF = function(project, studytag="OV", limit=NULL)
query_exec( .genesWmutInStudyDFq(studytag="OV", limit=NULL), project = project )
mutsInGeneInStudyDF = function(bq, gene = "KRAS", studytag = "LUAD") {
select = dplyr::select
bq %>% tbl("Somatic_Mutation_calls") %>% select(ParticipantBarcode, Hugo_Symbol,
Variant_Classification, Study) %>% filter(Study == studytag) %>%
filter(Hugo_Symbol == gene) %>% as.data.frame()
}
mutsInGenesInStudyDF = function(bq, genevec = c("KRAS", "HRAS"), studytag = "LUAD") {
select = dplyr::select
bq %>% tbl("Somatic_Mutation_calls") %>% select(ParticipantBarcode, Hugo_Symbol,
Variant_Classification, Study) %>% filter(Study == studytag) %>%
filter(Hugo_Symbol %in% genevec) %>% as.data.frame()
}
exprsByMutation = function( bq, gene = "GATA3", studytag = "BRCA", project, limit=NULL,
dup.expr.action=function(x) 2^mean(log2(x)+1,na.rm=TRUE),
substNAMut = "None" ) {
#
# need to merge mutation data with expression data by participant bar code
# alternate dup.expr.action could be to filter among the mutations reported
#
select = dplyr::select
if (missing(project)) stop("project must be supplied")
ginstud = genesWmutInStudyDF( project, studytag=studytag, limit=limit ) # uses direct SQL
stopifnot(gene %in% ginstud$gene)
exprdata = bq %>% tbl("mRNA_UNC_HiSeq_RSEM") %>% select(ParticipantBarcode, HGNC_gene_symbol,
normalized_count, Study) %>% filter(Study == studytag) %>%
filter(HGNC_gene_symbol == gene) %>% as.data.frame()
mutdata = bq %>% tbl("Somatic_Mutation_calls") %>% select(ParticipantBarcode, Hugo_Symbol,
Variant_Classification, Study) %>% filter(Study == studytag) %>%
filter(Hugo_Symbol == gene) %>% as.data.frame()
mrg = merge(exprdata, mutdata, all.x=TRUE, by="ParticipantBarcode")
hasdup = sum(duplicated(mrg$ParticipantBarcode))
if (any(lkna <- is.na(mrg$Variant_Classification)))
mrg$Variant_Classification[which(lkna)] = substNAMut
if (hasdup==0) return(mrg)
smrg = split(mrg, mrg$ParticipantBarcode)
hasdup = which(sapply(smrg,nrow)>0)
smrg[hasdup] = lapply(smrg[hasdup], function(x) {
tmp = dup.expr.action(x$normalized_count)
fixd = x[1,,drop=FALSE]
fixd$normalized_count = tmp
fixd
})
do.call(rbind,smrg)
}
# very ad hoc support for oncoprint app
col = c("MIS"="blue", FRA="#008000", SPL="gold", "NONS"="red", "OTH"="pink")
alter_fun = list(
background = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "#CCCCCC", col = NA))
},
MIS = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "blue", col = NA))
},
NONS = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "red", col = NA))
},
FRA = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "#008000", col = NA))
},
OTH = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h*0.33, gp = gpar(fill = "orange", col = NA))
},
SPL = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "gold", col = NA))
}
)
# completely ad hoc management of mutation types here, must be
# pushed out to parameters
geneVecToOPInputByStudy = function(bq, genevec, studytag="LUAD") {
jjj = mutsInGenesInStudyDF(bq, studytag=studytag, genevec=genevec)
jjjs = split(jjj, jjj$Vari)
jjjss = lapply(jjjs, function(x) split(x, x$Hugo_Symbol))
types = names(jjjs)
types= c("3'UTR", "Frame_Shift_Del", "Frame_Shift_Ins", "In_Frame_Del",
"In_Frame_Ins", "Intron", "Missense_Mutation", "Nonsense_Mutation",
"Silent", "Splice_Site")
ttags = c("OTH;", "FRA;", "FRA;", "OTH;", "OTH;", "OTH;", "MIS;", "NONS;", "OTH;", "SPL;")
names(ttags) = types
targ = matrix("", nr=length(unique(jjj$Hugo_Symbol)), nc=length(unique(jjj$Particip)))
rownames(targ) = unique(jjj$Hugo_Symbol)
colnames(targ) = unique(jjj$Particip)
for (i in names(jjjs)) { # types
for (j in names(jjjss[[i]])) { # genes
ini = targ[j, jjjss[[i]][[j]]$ParticipantBarcode]
targ[j, jjjss[[i]][[j]]$ParticipantBarcode] = paste(ini, rep(ttags[i], length(ini)), sep="")
}
}
targ
}
library(shiny)
#' A simple oncoprint App based on Institute for Systems Biology NCI Cloud Pilot
#'
#' This function is a proof-of-concept for using the BigQuery interface
#' to TCGA data provided by the ISB Cancer Genomics Cloud Pilot. To use
#' this function, you must have a valid billing account with Google. If
#' you have an account with the ISB Cancer Genomics Cloud, then you can
#' use that account.
#'
#' @param bq A \code{\link{src_bigquery}} object
#'
#' @export
#'
#' @examples
#' \dontrun{
#' library(shiny)
#' library(dplyr)
#' isbApp(src_bigquery(project='isb-cgc',
#' billing='isb-cgc-01-0006',dataset='tcga_201510_alpha')
#' }
isbApp = function(bq) {
gnsets = someSets
studies = sort((studiesInTable(bq) %>% as.data.frame())[,1])
ui = fluidPage(
titlePanel("TCGA/ISB/bigQuery interface"),
sidebarPanel(
fluidRow(
selectInput("study", "Tumor", choices=studies, selected="LUAD", selectize=TRUE),
selectInput("geneset", "Gene set", choices=names(gnsets), selected=names(gnsets)[1], selectize=TRUE)
)
), # end sidebar
mainPanel(
tabsetPanel(
tabPanel("oncoPrint", plotOutput("onco")),
tabPanel("genes", dataTableOutput("setelem"))
)
)
)
server = function(input, output) {
output$onco = renderPlot({
curstudy = input$study
curset = gnsets[[input$geneset]]
op = geneVecToOPInputByStudy(bq, curset, studytag=curstudy)
if (any(is.na(op))) op[is.na(op)] = "OTH;"
if (any(op=="NA")) op[op=="NA"] = "OTH;"
draw(oncoPrint(op, get_type = function(x) strsplit(x, ";")[[1]], alter_fun=alter_fun, col=col))
#plot(1,1)
})
output$setelem = renderDataTable({
curset = gnsets[[input$geneset]]
mkln = function(x) gsub("%%GENE%%", x, "<a href='http://www.genecards.org/cgi-bin/carddisp.pl?gene=%%GENE%%&keywords=%%GENE%%'>%%GENE%%</a>")
lns = sapply(curset, mkln)
# anc = function(x) paste0("<a href=\"", x, "\">", x, "</a>")
# lns = sapply(lns, anc)
data.frame(HGNC=lns) #, card=lns)
}, escape=FALSE)
}
shinyApp(ui, server)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.