inst/doc/query.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(progress = FALSE)

## ----message=FALSE, warning=FALSE, include=FALSE------------------------------
library(TCGAbiolinks)
library(SummarizedExperiment)
library(dplyr)
library(DT)

## ---- eval = TRUE, echo = FALSE-----------------------------------------------
datatable(TCGAbiolinks:::getGDCprojects(),
          filter = 'top',
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 10), 
          rownames = FALSE,
          caption = "List of projects")

## ---- eval = TRUE, echo = FALSE-----------------------------------------------
datatable(TCGAbiolinks:::getBarcodeDefinition(),
          filter = 'top',
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 10), 
          rownames = FALSE,
          caption = "List sample types")

## -----------------------------------------------------------------------------
datatable(readr::read_csv("https://docs.google.com/spreadsheets/d/1f98kFdj9mxVDc1dv4xTZdx8iWgUiDYO-qiFJINvmTZs/export?format=csv&gid=2046985454",col_types = readr::cols()),
          filter = 'top',
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 40), 
          rownames = FALSE)

## -----------------------------------------------------------------------------
datatable(readr::read_csv("https://docs.google.com/spreadsheets/d/1f98kFdj9mxVDc1dv4xTZdx8iWgUiDYO-qiFJINvmTZs/export?format=csv&gid=1817673686",col_types = readr::cols()),
          filter = 'top',
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 40), 
          rownames = FALSE)

## ----message=FALSE, warning=FALSE---------------------------------------------
query <- GDCquery(project = c("TCGA-GBM", "TCGA-LGG"),
                  data.category = "DNA Methylation",
                  legacy = FALSE,
                  platform = c("Illumina Human Methylation 450"),
                  sample.type = "Recurrent Tumor"
)
datatable(getResults(query), 
          filter = 'top',
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
          rownames = FALSE)

## ----message=FALSE, warning = FALSE, eval = FALSE-----------------------------
#  query.met <- GDCquery(project = "TCGA-COAD",
#                        data.category = "DNA Methylation",
#                        legacy = FALSE,
#                        platform = c("Illumina Human Methylation 450"))
#  query.exp <- GDCquery(project = "TCGA-COAD",
#                        data.category = "Transcriptome Profiling",
#                        data.type = "Gene Expression Quantification",
#                        workflow.type = "HTSeq - FPKM-UQ")
#  
#  # Get all patients that have DNA methylation and gene expression.
#  common.patients <- intersect(substr(getResults(query.met, cols = "cases"), 1, 12),
#                               substr(getResults(query.exp, cols = "cases"), 1, 12))
#  
#  # Only seelct the first 5 patients
#  query.met <- GDCquery(project = "TCGA-COAD",
#                        data.category = "DNA Methylation",
#                        legacy = FALSE,
#                        platform = c("Illumina Human Methylation 450"),
#                        barcode = common.patients[1:5])
#  query.exp <- GDCquery(project = "TCGA-COAD",
#                        data.category = "Transcriptome Profiling",
#                        data.type = "Gene Expression Quantification",
#                        workflow.type = "HTSeq - FPKM-UQ",
#                        barcode = common.patients[1:5])

## ----results_matched, message=FALSE, warning=FALSE, eval = FALSE--------------
#  datatable(getResults(query.met, cols = c("data_type","cases")),
#            filter = 'top',
#            options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),
#            rownames = FALSE)
#  datatable(getResults(query.exp, cols = c("data_type","cases")),
#            filter = 'top',
#            options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),
#            rownames = FALSE)
#  
#  

## ----message=FALSE, warning=FALSE---------------------------------------------
query <- GDCquery(project = c("TCGA-BRCA"),
                  data.category = "Sequencing Reads",  
                  sample.type = "Primary Tumor")
# Only first 100 to make render faster
datatable(getResults(query, rows = 1:100,cols = c("file_name","cases")), 
          filter = 'top',
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
          rownames = FALSE)

## ----message=FALSE, warning=FALSE---------------------------------------------
query <- GDCquery(project = c("TCGA-GBM","TCGA-LGG"),
                  legacy = TRUE,
                  data.category = "DNA methylation",
                  platform = c("Illumina Human Methylation 450", "Illumina Human Methylation 27"))
datatable(getResults(query, rows = 1:100), 
          filter = 'top',
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
          rownames = FALSE)

## ----message = FALSE, warning = FALSE, eval = FALSE---------------------------
#  
#  query <- GDCquery(project = c("TCGA-LUAD"),
#                    legacy = TRUE,
#  		  data.category = "DNA methylation",
#                    data.type = "Methylation percentage",
#                    experimental.strategy = "Bisulfite-Seq")
#  
#  # VCF - controlled data
#  query <- GDCquery(project = c("TCGA-LUAD"),
#                    legacy = TRUE,
#                    data.category = "DNA methylation",
#  		  data.type = "Bisulfite sequence alignment",
#                    experimental.strategy = "Bisulfite-Seq")
#  
#  
#  # WGBS BAM files - controlled data
#  query <- GDCquery(project = c("TCGA-LUAD"),
#                    legacy = TRUE,
#                    data.type = "Aligned reads",
#                    data.category = "Raw sequencing data",
#                    experimental.strategy = "Bisulfite-Seq")

## ----message=FALSE, warning=FALSE---------------------------------------------
# Gene expression aligned against hg19.
query.exp.hg19 <- GDCquery(project = "TCGA-GBM",
                           data.category = "Gene expression",
                           data.type = "Gene expression quantification",
                           platform = "Illumina HiSeq", 
                           file.type  = "normalized_results",
                           experimental.strategy = "RNA-Seq",
                           barcode = c("TCGA-14-0736-02A-01R-2005-01", "TCGA-06-0211-02A-02R-2005-01"),
                           legacy = TRUE)
datatable(getResults(query.exp.hg19), 
          filter = 'top',
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
          rownames = FALSE)

## ----message=FALSE, warning=FALSE---------------------------------------------
getManifest(query.exp.hg19,save = FALSE) 

## ----message=FALSE, warning=FALSE---------------------------------------------

datatable(getResults(TCGAbiolinks:::GDCquery_ATAC_seq())[,c("file_name","file_size")], 
          filter = 'top',
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
          rownames = FALSE)

## ----message=FALSE, warning=FALSE,eval = FALSE--------------------------------
#  query <- TCGAbiolinks:::GDCquery_ATAC_seq(file.type = "rds")
#  GDCdownload(query,method = "client")
#  
#  query <- TCGAbiolinks:::GDCquery_ATAC_seq(file.type = "bigWigs")
#  GDCdownload(query,method = "client")
#  

## ----message=FALSE, warning=FALSE,eval = TRUE---------------------------------
tab <-  getSampleFilesSummary(project = "TCGA-ACC")
datatable(head(tab),
          filter = 'top',
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), 
          rownames = FALSE)

Try the TCGAbiolinks package in your browser

Any scripts or data that you put into this service are public.

TCGAbiolinks documentation built on Nov. 8, 2020, 5:37 p.m.