inst/doc/BiocOncoTK.R

## ----setup,echo=FALSE,results="hide"------------------------------------------
suppressPackageStartupMessages({
suppressMessages({
library(BiocOncoTK)
library(BiocStyle)
library(dplyr)
library(DBI)
library(magrittr)
library(pogos)
library(org.Hs.eg.db)
library(restfulSE)
})
})

## ----lkgbm,message=FALSE------------------------------------------------------
library(ontoProc)
library(ontologyPlot)
oto = getOncotreeOnto()
glioTag = names(grep("Glioblastoma$", oto$name, value=TRUE))
st = siblings_TAG(glioTag, oto, justSibs=FALSE)
if (.Platform$OS.type != "windows") {
onto_plot(oto, slot(st, "ontoTags"), fontsize=50)
}

## ----lktata-------------------------------------------------------------------
BiocOncoTK::pancan_sampTypeMap

## ----lkl, eval=FALSE----------------------------------------------------------
#  library(BiocOncoTK)
#  if (nchar(Sys.getenv("CGC_BILLING"))>0) {
#   pcbq = pancan_BQ() # basic connection
#   BRCA_mir = restfulSE::pancan_SE(pcbq)
#  }

## ----lknor,eval=FALSE---------------------------------------------------------
#   BRCA_mir_nor = restfulSE::pancan_SE(pcbq, assaySampleTypeCode="NT")

## ----dotab, eval=FALSE--------------------------------------------------------
#  bqcon %>% tbl(pancan_longname("rnaseq")) %>% filter(Study=="GBM") %>%
#     group_by(SampleTypeLetterCode) %>% summarise(n=n())

## ----lkgbmr, eval=FALSE-------------------------------------------------------
#  pancan_SE(bqcon, colDFilterValue="GBM", tumorFieldValue="GBM",
#    assayDataTableName=pancan_longname("rnaseq"),
#    assaySampleTypeCode="TR", assayFeatureName="Symbol",
#    assayValueFieldName="normalized_count")

## ----dose2, eval=FALSE--------------------------------------------------------
#  BRCA_mrna = pancan_SE(pcbq,
#     assayDataTableName = pancan_longname("rnaseq"),
#     assayFeatureName = "Entrez",
#     assayValueFieldName = "normalized_count")
#  BRCA_rppa = pancan_SE(pcbq,
#     assayDataTableName = pancan_longname("RPPA"),
#     assayFeatureName = "Protein",
#     assayValueFieldName = "Value")
#  BRCA_meth = pancan_SE(pcbq,
#     assayDataTableName = pancan_longname("27k")[2],
#     assayFeatureName = "ID",
#     assayValueFieldName = "Beta")

## ----dofig1,eval=FALSE--------------------------------------------------------
#  library(BiocOncoTK)
#  
#   infilGenes = c(`PD-L1`="CD274", `PD-L2`="PDCD1LG2", CD8A="CD8A")
#   tumcodes = c("COAD", "STAD", "UCEC")
#   combs = expand.grid(tumcode=tumcodes, ali=names(infilGenes),
#      stringsAsFactors=FALSE)
#   combs$sym = infilGenes[combs$ali]
#  
#   bq = pancan_BQ()
#   exprByMSI = function(bq, tumcode, genesym, alias) {
#    print(tumcode)
#    if (missing(alias)) alias=genesym
#    ex = bindMSI(buildPancanSE(bq, tumcode, assay="RNASeqv2"))
#    ex = replaceRownames(ex)
#    data.frame(
#     patient_barcode=colnames(ex),
#     acronym=tumcode,
#     symbol = genesym,
#     alias = alias,
#     log2ex=log2(as.numeric(SummarizedExperiment::assay(ex[genesym,]))+1),
#     msicode = ifelse(ex$msiTest >= 4, ">=4", "<4"))
#   }
#   allshow = lapply(1:nrow(combs), function(x) exprByMSI(bq, combs$tumcode[x],
#      combs$sym[x], combs$ali[x]))
#  
#   rr = do.call(rbind, allshow)
#  
#   library(ggplot2)
#   png(file="microsatpan2.png")
#   ggplot(rr,
#      aes(msicode, log2ex)) + geom_boxplot() +
#      facet_grid(acronym~alias) +
#      ylab("log2(normalized expr. + 1)") +
#      xlab("microsatellite instability score")
#   dev.off()

## ----lkapi--------------------------------------------------------------------
args(restfulSE::pancan_SE)

## ----lklo---------------------------------------------------------------------
pancan_longname("rnaseq")

## ----lktarg, message=FALSE,eval=FALSE-----------------------------------------
#  billco = Sys.getenv("CGC_BILLING")
#  if (nchar(billco)>0) {
#    con = DBI::dbConnect(bigrquery::bigquery(), project="isb-cgc",
#       dataset="TARGET_hg38_data_v0", billing=billco)
#    DBI::dbListTables(con)
#    con %>% tbl("RNAseq_Gene_Expression") %>% glimpse()
#    }

## ----lklk, message=FALSE, warning=FALSE,eval=FALSE----------------------------
#  if (nchar(billco)>0) {
#    con %>% tbl("RNAseq_Gene_Expression") %>%
#        select(project_short_name) %>%
#        group_by(project_short_name) %>%
#        summarise(n=n())
#  }

## ----lkccle2, message=FALSE, eval=FALSE---------------------------------------
#  billco = Sys.getenv("CGC_BILLING")
#  if (nchar(billco)>0) {
#    con = DBI::dbConnect(bigrquery::bigquery(), project="isb-cgc",
#       dataset="ccle_201602_alpha", billing=billco)
#    DBI::dbListTables(con)
#  }

## ----lkmucc,eval=FALSE--------------------------------------------------------
#  muttab = con %>% tbl("Mutation_calls")
#  length(muttab %>% colnames())
#  muttab %>% select(Cell_line_primary_name, Hugo_Symbol,
#     Variant_Classification, cDNA_Change)%>% glimpse()

## ----lknras, warning=FALSE,eval=FALSE-----------------------------------------
#  nrastab = muttab %>% select(Variant_Classification, Hugo_Symbol,
#      Cell_line_primary_name, CCLE_name) %>%
#       filter(Hugo_Symbol == "NRAS") %>% group_by(Hugo_Symbol)
#  nrastab %>% summarise(n=n())
#  nrasdf = nrastab %>% as.data.frame()

## ----dospl,eval=FALSE---------------------------------------------------------
#  spl = function(x) {
#    z = strsplit(x, "_")
#    fir = vapply(z, function(x)x[1], character(1))
#    rest = vapply(z, function(x) paste(x[-1], collapse="_"), character(1))
#    list(fir, rest)
#  }
#  nrasdf$organ = spl(nrasdf$CCLE_name)[[2]]

## ----getmodnr,echo=FALSE------------------------------------------------------
nrasdf = load_nrasdf()

## ----illus--------------------------------------------------------------------
head(nrasdf)
table(nrasdf$organ)
prim_names = as.character(nrasdf$Cell_line_primary_name)

## ----lkccleex, message=FALSE, warning=FALSE, eval=FALSE-----------------------
#  ccexp = con %>% tbl("AffyU133_RMA_expression")
#  ccexp %>% glimpse()
#  ccexp %>% select(Cell_line_primary_name, RMA_normalized_expression,
#      HGNC_gene_symbol) %>% filter(HGNC_gene_symbol == "AHR") %>%
#      filter(Cell_line_primary_name %in% nrasdf$Cell_line_primary_name) %>%
#      as.data.frame() -> NRAS_AHR
#  head(NRAS_AHR)

## ----domockahr,echo=FALSE-----------------------------------------------------
NRAS_AHR = load_NRAS_AHR()
head(NRAS_AHR)

## ----dopog,eval=FALSE---------------------------------------------------------
#  library(pogos)
#  ccleNRAS = DRTraceSet(NRAS_AHR[,1], drug="PD-0325901")
#  plot(ccleNRAS)

## ----dopogmock,echo=FALSE,results="hide"--------------------------------------
ccleNRAS = load_ccleNRAS()
if (.Platform$OS.type != "windows") {
plot(ccleNRAS)
}

## ----drrr---------------------------------------------------------------------
responsiveness = function (x, f) 
{
    r = sapply(slot(x, "traces"), function(x) f(slot(slot(x,"DRProfiles")[[1]],"responses")))
    data.frame(Cell_line_primary_name = slot(x,"cell_lines"), resp = r, 
        drug = slot(x,"drug"), dataset = x@dataset)
}

## ----lkaa---------------------------------------------------------------------
AA = function(x) sum((pmax(0, x/100)))
head(rr <- responsiveness(ccleNRAS, AA))
summary(rr$resp)

## ----mrg----------------------------------------------------------------------
rexp = merge(rr, NRAS_AHR)
rexp[1:2,]

## ----lkda---------------------------------------------------------------------
data(cell_70138)
names(cell_70138)
table(cell_70138$primary_site)
data(pert_70138)
dim(pert_70138)
names(pert_70138)

## ----lkdem--------------------------------------------------------------------
cd = clueDemos()
names(cd)
cd$sigs

## ----lkp1---------------------------------------------------------------------
if (nchar(Sys.getenv("CLUE_KEY"))>0) {
lkbytarg = query_clue(service="perts", filter=list(where=list(target="EGFR")))
print(names(lkbytarg[[1]]))
sig1 = lkbytarg[[1]]$sig_id_gold[1]
}

## ----lkp2---------------------------------------------------------------------
if (nchar(Sys.getenv("CLUE_KEY"))>0) {
sig1d = query_clue(service="sigs", filter=list(where=list(sig_id=sig1)))
print(names(sig1d[[1]]))
print(head(sig1d[[1]]$pert_iname)) # perturbagen
print(head(sig1d[[1]]$cell_id))  # cell type
print(head(sig1d[[1]]$dn50_lm))  # some downregulated genes among the landmark
print(head(sig1d[[1]]$up50_lm))  # some upregulated genes among the landmark
}

## ----lknpc, cache=TRUE--------------------------------------------------------
# use pertClasses() to get names of perturbagen classes in Clue
if (nchar(Sys.getenv("CLUE_KEY"))>0) {
tuinh = query_clue("perts", 
   filter=list(where=list(pcl_membership=list(inq=list("CP_HDAC_INHIBITOR"))))) 
inames_tu = sapply(tuinh, function(x)x$pert_iname)

npcSigs = query_clue(service="sigs", filter=list(where=list(cell_id="NPC")))
length(npcSigs)
gns = lapply(npcSigs, function(x) x$up50_lm)
perts = lapply(npcSigs, function(x) x$pert_iname)
touse = which(perts %in% inames_tu)
rec = names(tab <- sort(table(unlist(gns[touse])),decreasing=TRUE)[1:5])
cbind(select(org.Hs.eg.db, keys=rec, columns="SYMBOL"), n=as.numeric(tab))
}

## ----trylop-------------------------------------------------------------------
if (interactive()) {
 patelSE = loadPatel() # uses BiocFileCache
 patelSE
 assay(patelSE[1:4,1:3]) # in memory
}

## ----lkdar--------------------------------------------------------------------
 darmSE = BiocOncoTK::darmGBMcls  # count_lstpm from CONQUER
 darmSE
 assay(darmSE)  # out of memory

Try the BiocOncoTK package in your browser

Any scripts or data that you put into this service are public.

BiocOncoTK documentation built on Nov. 8, 2020, 6:03 p.m.