inst/doc/casestudy.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(dpi = 300)
knitr::opts_chunk$set(cache=FALSE)

## ----message = FALSE, warning = FALSE, include = FALSE------------------------
library(TCGAbiolinks)
library(SummarizedExperiment)
library(dplyr)
library(DT)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  library(SummarizedExperiment)
#  library(TCGAbiolinks)
#  query.exp <- GDCquery(project = "TCGA-BRCA",
#  legacy = TRUE,
#  data.category = "Gene expression",
#  data.type = "Gene expression quantification",
#  platform = "Illumina HiSeq",
#  file.type = "results",
#  experimental.strategy = "RNA-Seq",
#  sample.type = c("Primary Tumor","Solid Tissue Normal"))
#  GDCdownload(query.exp)
#  brca.exp <- GDCprepare(query = query.exp, save = TRUE, save.filename = "brcaExp.rda")
#  
#  # get subtype information
#  dataSubt <- TCGAquery_subtype(tumor = "BRCA")
#  
#  # get clinical data
#  dataClin <- GDCquery_clinic(project = "TCGA-BRCA","clinical")
#  
#  # Which samples are Primary Tumor
#  dataSmTP <- TCGAquery_SampleTypes(getResults(query.exp,cols="cases"),"TP")
#  # which samples are solid tissue normal
#  dataSmNT <- TCGAquery_SampleTypes(getResults(query.exp,cols="cases"),"NT")

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  dataPrep <- TCGAanalyze_Preprocessing(object = brca.exp, cor.cut = 0.6)
#  
#  dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
#                                        geneInfo = geneInfo,
#                                        method = "gcContent")
#  
#  dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,
#                                    method = "quantile",
#                                    qnt.cut =  0.25)
#  
#  dataDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,dataSmNT],
#                              mat2 = dataFilt[,dataSmTP],
#                              Cond1type = "Normal",
#                              Cond2type = "Tumor",
#                              fdr.cut = 0.01 ,
#                              logFC.cut = 1,
#                              method = "glmLRT")

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  ansEA <- TCGAanalyze_EAcomplete(TFname="DEA genes Normal Vs Tumor",
#                                  RegulonList = rownames(dataDEGs))
#  
#  TCGAvisualize_EAbarplot(tf = rownames(ansEA$ResBP),
#                          GOBPTab = ansEA$ResBP,
#                          GOCCTab = ansEA$ResCC,
#                          GOMFTab = ansEA$ResMF,
#                          PathTab = ansEA$ResPat,
#                          nRGTab = rownames(dataDEGs),
#                          nBar = 20)

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("case1_EA.png")
grid.raster(img)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  
#  group1 <- TCGAquery_SampleTypes(colnames(dataFilt), typesample = c("NT"))
#  group2 <- TCGAquery_SampleTypes(colnames(dataFilt), typesample = c("TP"))
#  
#  dataSurv <- TCGAanalyze_SurvivalKM(clinical_patient = dataClin,
#                                     dataGE = dataFilt,
#                                     Genelist = rownames(dataDEGs),
#                                     Survresult = FALSE,
#                                     ThreshTop = 0.67,
#                                     ThreshDown = 0.33,
#                                     p.cut = 0.05, group1, group2)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  
#  require(dnet)  # to change
#  org.Hs.string <- dRDataLoader(RData = "org.Hs.string")
#  
#  TabCoxNet <- TCGAvisualize_SurvivalCoxNET(dataClin,
#                                            dataFilt,
#                                            Genelist = rownames(dataSurv),
#                                            scoreConfidence = 700,
#                                            org.Hs.string = org.Hs.string,
#                                            titlePlot = "Case Study n.1 dnet")

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("case1_dnet.png")
grid.raster(img)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  library(TCGAbiolinks)
#  library(SummarizedExperiment)
#  
#  query.exp <- GDCquery(project = "TCGA-LGG",
#                        legacy = TRUE,
#                        data.category = "Gene expression",
#                        data.type = "Gene expression quantification",
#                        platform = "Illumina HiSeq",
#                        file.type = "results",
#                        experimental.strategy = "RNA-Seq",
#                        sample.type = "Primary Tumor")
#  GDCdownload(query.exp)
#  lgg.exp <- GDCprepare(query = query.exp, save = TRUE, save.filename = "lggExp.rda")

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  library(dplyr)
#  
#  dataPrep <- TCGAanalyze_Preprocessing(object = lgg.exp, cor.cut = 0.6)
#  dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
#                                        geneInfo = geneInfo,
#                                        method = "gcContent")
#  
#  datFilt <- dataNorm %>% TCGAanalyze_Filtering(method = "varFilter") %>%
#      TCGAanalyze_Filtering(method = "filter1") %>%  TCGAanalyze_Filtering(method = "filter2",foldChange = 0.2)
#  
#  data_Hc2 <- TCGAanalyze_Clustering(tabDF = datFilt,
#                                     method = "consensus",
#                                     methodHC = "ward.D2")
#  # Add  cluster information to Summarized Experiment
#  colData(lgg.exp)$groupsHC <- paste0("EC",data_Hc2[[4]]$consensusClass)
#  

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  TCGAanalyze_survival(data = colData(lgg.exp),
#                       clusterCol = "groupsHC",
#                       main = "TCGA kaplan meier survival plot from consensus cluster",
#                       legend = "RNA Group",height = 10,
#                       risk.table = T,conf.int = F,
#                       color = c("black","red","blue","green3"),
#                       filename = "survival_lgg_expression_subtypes.png")

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("case2_surv.png")
grid.raster(img)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  TCGAvisualize_Heatmap(t(datFilt),
#                        col.metadata =  colData(lgg.exp)[,c("barcode",
#                                                            "groupsHC",
#                                                            "subtype_Histology",
#                                                            "subtype_IDH.codel.subtype")],
#                        col.colors =  list(
#                            groupsHC = c("EC1"="black",
#                                         "EC2"="red",
#                                         "EC3"="blue",
#                                         "EC4"="green3")),
#                        sortCol = "groupsHC",
#                        type = "expression", # sets default color
#                        scale = "row", # use z-scores for better visualization. Center gene expression level around 0.
#                        title = "Heatmap from concensus cluster",
#                        filename = "case2_Heatmap.png",
#                        cluster_rows = TRUE,
#                        color.levels = colorRampPalette(c("green", "black", "red"))(n = 11),
#                        extrems =seq(-5,5,1),
#                        cluster_columns = FALSE,
#                        width = 1000,
#                        height = 1000)

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(jpeg)
library(grid)
img <- readJPEG("case2_Heatmap.jpg")
grid.raster(img)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  LGGmut <- GDCquery_Maf(tumor = "LGG", pipelines = "muse")
#  # Selecting gene
#  mRNAsel <- "ATRX"
#  LGGselected <- LGGmut[LGGmut$Hugo_Symbol == mRNAsel,]
#  
#  dataMut <- LGGselected[!duplicated(LGGselected$Tumor_Sample_Barcode),]
#  dataMut$Tumor_Sample_Barcode <- substr(dataMut$Tumor_Sample_Barcode,1,12)
#  
#  # Adding the Expression Cluster classification found before
#  dataMut <- merge(dataMut, cluster, by.y="patient", by.x="Tumor_Sample_Barcode")
#  dataMut <- dataMut[dataMut$Variant_Classification!=0,]

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  library(TCGAbiolinks)
#  library(SummarizedExperiment)
#  dir.create("case3")
#  setwd("case3")
#  #-----------------------------------
#  # STEP 1: Search, download, prepare |
#  #-----------------------------------
#  # 1.1 - DNA methylation
#  # ----------------------------------
#  query.met <- GDCquery(project = "TCGA-ACC",
#                        legacy = TRUE,
#                        data.category = "DNA methylation",
#                        platform = "Illumina Human Methylation 450")
#  GDCdownload(query.met)
#  
#  acc.met <- GDCprepare(query = query.met,
#                        save = TRUE,
#                        save.filename = "accDNAmet.rda",
#                        summarizedExperiment = TRUE)
#  
#  #-----------------------------------
#  # 1.2 - RNA expression
#  # ----------------------------------
#  query.exp <- GDCquery(project = "TCGA-ACC",
#                        legacy = TRUE,
#                        data.category = "Gene expression",
#                        data.type = "Gene expression quantification",
#                        platform = "Illumina HiSeq",
#                        file.type = "results")
#  GDCdownload(query.exp)
#  acc.exp <- GDCprepare(query = query.exp, save = TRUE, save.filename = "accExp.rda")

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  # na.omit
#  acc.met <- subset(acc.met,subset = (rowSums(is.na(assay(acc.met))) == 0))
#  
#  # Volcano plot
#  acc.met <- TCGAanalyze_DMC(acc.met, groupCol = "subtype_MethyLevel",
#                             group1 = "CIMP-high",
#                             group2="CIMP-low",
#                             p.cut = 10^-5,
#                             diffmean.cut = 0.25,
#                             legend = "State",
#                             plot.filename = "CIMP-highvsCIMP-low_metvolcano.png")

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("CIMP-highvsCIMP-low_metvolcano.png")
grid.raster(img)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  #-------------------------------------------------
#  # 2.3 - DEA - Expression analysis - volcano plot
#  # ------------------------------------------------
#  acc.exp.aux <- subset(acc.exp,
#                        select = colData(acc.exp)$subtype_MethyLevel %in% c("CIMP-high","CIMP-low"))
#  
#  idx <- colData(acc.exp.aux)$subtype_MethyLevel %in% c("CIMP-high")
#  idx2 <- colData(acc.exp.aux)$subtype_MethyLevel %in% c("CIMP-low")
#  
#  dataPrep <- TCGAanalyze_Preprocessing(object = acc.exp.aux, cor.cut = 0.6)
#  
#  dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
#                                        geneInfo = geneInfo,
#                                        method = "gcContent")
#  
#  dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,
#                                    qnt.cut = 0.25,
#                                    method='quantile')
#  
#  dataDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,idx],
#                              mat2 = dataFilt[,idx2],
#                              Cond1type = "CIMP-high",
#                              Cond2type = "CIMP-low",
#                              method = "glmLRT")
#  
#  TCGAVisualize_volcano(x = dataDEGs$logFC,
#                        y = dataDEGs$FDR,
#                        filename = "Case3_volcanoexp.png",
#                        x.cut = 3,
#                        y.cut = 10^-5,
#                        names = rownames(dataDEGs),
#                        color = c("black","red","darkgreen"),
#                        names.size = 2,
#                        xlab = " Gene expression fold change (Log2)",
#                        legend = "State",
#                        title = "Volcano plot (CIMP-high vs CIMP-low)",
#                        width = 10)

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("figure5exp.png")
grid.raster(img)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  
#  #------------------------------------------
#  # 2.4 - Starburst plot
#  # -----------------------------------------
#  # If true the argument names of the geenes in circle
#  # (biologically significant genes, has a change in gene
#  # expression and DNA methylation and respects all the thresholds)
#  # will be shown
#  # these genes are returned by the function see starburst object after the function is executed
#  starburst <- TCGAvisualize_starburst(met = acc.met,
#                                       exp = dataDEGs,
#                                       genome = "hg19"
#                                       group1 = "CIMP-high",
#                                       group2 = "CIMP-low",
#                                       filename = "starburst.png",
#                                       met.platform = "450K",
#                                       met.p.cut = 10^-5,
#                                       exp.p.cut = 10^-5,
#                                       diffmean.cut = 0.25,
#                                       logFC.cut = 3,
#                                       names = FALSE,
#                                       height=10,
#                                       width=15,
#                                       dpi=300)

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("figure5star.png")
grid.raster(img)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  library(TCGAbiolinks)
#  library(SummarizedExperiment)
#  library(ELMER)
#  library(parallel)
#  dir.create("case4")
#  setwd("case4")
#  #-----------------------------------
#  # STEP 1: Search, download, prepare |
#  #-----------------------------------
#  # 1.1 - DNA methylation
#  # ----------------------------------
#  query.met <- GDCquery(project = "TCGA-KIRC",
#                        data.category = "DNA Methylation",
#                        platform = "Illumina Human Methylation 450")
#  GDCdownload(query.met)
#  kirc.met <- GDCprepare(query = query.met,
#                         save = TRUE,
#                         save.filename = "kircDNAmet.rda",
#                         summarizedExperiment = TRUE)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  # Step 1.2 download expression data
#  #-----------------------------------
#  # 1.2 - RNA expression
#  # ----------------------------------
#  query.exp <- GDCquery(project = "TCGA-KIRC",
#                        data.category = "Transcriptome Profiling",
#                        data.type = "Gene Expression Quantification",
#                        workflow.type = "HTSeq - FPKM-UQ")
#  GDCdownload(query.exp)
#  kirc.exp <- GDCprepare(query = query.exp,
#                         save = TRUE,
#                         save.filename = "kircExp.rda")
#  kirc.exp <- kirc.exp

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  distal.probes <- get.feature.probe(genome = "hg38", met.platform = "450K")

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  library(MultiAssayExperiment)
#  mae <- createMAE(exp = kirc.exp,
#                   met = kirc.met,
#                   save = TRUE,
#                   linearize.exp = TRUE,
#                   filter.probes = distal.probes,
#                   save.filename = "mae_kirc.rda",
#                   met.platform = "450K",
#                   genome = "hg38",
#                   TCGA = TRUE)
#  # Remove FFPE samples
#  mae <- mae[,!mae$is_ffpe]

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  group.col <- "definition"
#  group1 <-  "Primary Tumor"
#  group2 <- "Solid Tissue Normal"
#  direction <- "hypo"
#  dir.out <- file.path("kirc",direction)
#  dir.create(dir.out, recursive = TRUE)
#  #--------------------------------------
#  # STEP 3: Analysis                     |
#  #--------------------------------------
#  # Step 3.1: Get diff methylated probes |
#  #--------------------------------------
#  sig.diff <- get.diff.meth(data = mae,
#                            group.col = group.col,
#                            group1 =  group1,
#                            group2 = group2,
#                            minSubgroupFrac = 0.2,
#                            sig.dif = 0.3,
#                            diff.dir = direction, # Search for hypomethylated probes in group 1
#                            cores = 1,
#                            dir.out = dir.out,
#                            pvalue = 0.01)
#  
#  #-------------------------------------------------------------
#  # Step 3.2: Identify significant probe-gene pairs            |
#  #-------------------------------------------------------------
#  # Collect nearby 20 genes for Sig.probes
#  nearGenes <- GetNearGenes(data = mae,
#                            probes = sig.diff$probe,
#                            numFlankingGenes = 20, # 10 upstream and 10 dowstream genes
#                            cores = 1)
#  
#  pair <- get.pair(data = mae,
#                   group.col = group.col,
#                   group1 =  group1,
#                   group2 = group2,
#                   nearGenes = nearGenes,
#                   minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M
#                   permu.dir = file.path(dir.out,"permu"),
#                   permu.size = 100, # Please set to 100000 to get significant results
#                   raw.pvalue  = 0.05,
#                   Pe = 0.01, # Please set to 0.001 to get significant results
#                   filter.probes = TRUE, # See preAssociationProbeFiltering function
#                   filter.percentage = 0.05,
#                   filter.portion = 0.3,
#                   dir.out = dir.out,
#                   cores = 1,
#                   label = direction)
#  
#  # Identify enriched motif for significantly hypomethylated probes which
#  # have putative target genes.
#  enriched.motif <- get.enriched.motif(data = mae,
#                                       probes = pair$Probe,
#                                       dir.out = dir.out,
#                                       label = direction,
#                                       min.incidence = 10,
#                                       lower.OR = 1.1)
#  TF <- get.TFs(data = mae,
#                group.col = group.col,
#                group1 =  group1,
#                group2 = group2,
#                minSubgroupFrac = 0.4,
#                enriched.motif = enriched.motif,
#                dir.out = dir.out,
#                cores = 1,
#                label = direction)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  scatter.plot(data = mae,
#               byProbe = list(probe = sig.diff$probe[1], numFlankingGenes = 20),
#               category = "definition",
#               dir.out = "plots",
#               lm = TRUE, # Draw linear regression curve
#               save = TRUE)

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("case4_elmer.png")
grid.raster(img)

## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  scatter.plot(data = mae,
#               byTF = list(TF = c("RUNX1","RUNX2","RUNX3"),
#                           probe = enriched.motif[[names(enriched.motif)[10]]]),
#               category = "definition",
#               dir.out = "plots",
#               save = TRUE,
#               lm_line = TRUE)

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("elmer1.png")
grid.raster(img)


## ----eval=FALSE,echo=TRUE,message=FALSE,warning=FALSE-------------------------
#  heatmapPairs(data = mae,
#               group.col = "definition",
#               group1 = "Primary Tumor",
#               annotation.col = c("gender"),
#               group2 = "Solid Tissue Normal",
#               pairs = pair,
#               filename =  "heatmap.pdf")

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(jpeg)
library(grid)
img <- readJPEG("elmer2.jpg")
grid.raster(img)

## ---- fig.width=6, fig.height=4, echo = FALSE, fig.align="center",hide=TRUE, message=FALSE,warning=FALSE----
library(png)
library(grid)
img <- readPNG("elmer3.png")
grid.raster(img)

## ----sessionInfo--------------------------------------------------------------
sessionInfo()

Try the TCGAbiolinks package in your browser

Any scripts or data that you put into this service are public.

TCGAbiolinks documentation built on Nov. 8, 2020, 5:37 p.m.