inst/doc/usecase.R

## ---- eval=FALSE--------------------------------------------------------------
#  library(MultiAssayExperiment)
#  library(ELMER.data)
#  library(ELMER)
#  # get distal probes that are 2kb away from TSS on chromosome 1
#  distal.probes <- get.feature.probe(genome = "hg19",
#                                     met.platform = "450K",
#                                     rm.chr = paste0("chr",c(2:22,"X","Y")))
#  data(LUSC_RNA_refined,package = "ELMER.data") # GeneExp
#  data(LUSC_meth_refined,package = "ELMER.data") # Meth
#  
#  mae <- createMAE(exp = GeneExp,
#                   met = Meth,
#                   save = TRUE,
#                   linearize.exp = TRUE,
#                   save.filename = "mae.rda",
#                   filter.probes = distal.probes,
#                   met.platform = "450K",
#                   genome = "hg19",
#                   TCGA = TRUE)
#  
#  group.col <- "definition"
#  group1 <-  "Primary solid Tumor"
#  group2 <- "Solid Tissue Normal"
#  dir.out <- "result"
#  diff.dir <-  "hypo" # Search for hypomethylated probes in group 1
#  
#  sig.diff <- get.diff.meth(data = mae,
#                            group.col = group.col,
#                            group1 = group1,
#                            group2 = group2,
#                            minSubgroupFrac = 0.2,
#                            sig.dif = 0.3,
#                            diff.dir = diff.dir,
#                            cores = 1,
#                            dir.out = dir.out,
#                            pvalue = 0.01)
#  
#  
#  nearGenes <- GetNearGenes(data = mae,
#                            probes = sig.diff$probe,
#                            numFlankingGenes = 20) # 10 upstream and 10 dowstream genes
#  
#  pair <- get.pair(data = mae,
#                   group.col = group.col,
#                   group1 = group1,
#                   mode = "unsupervised",
#                   group2 = group2,
#                   nearGenes = nearGenes,
#                   diff.dir = diff.dir,
#                   minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M
#                   permu.dir = file.path(dir.out,"permu"),
#                   permu.size = 100, # Please set to 100000 to get significant results
#                   raw.pvalue = 0.05,
#                   Pe = 0.01, # Please set to 0.001 to get significant results
#                   filter.probes = TRUE, # See preAssociationProbeFiltering function
#                   filter.percentage = 0.05,
#                   filter.portion = 0.3,
#                   dir.out = dir.out,
#                   cores = 1,
#                   label = diff.dir)
#  
#  # Identify enriched motif for significantly hypomethylated probes which
#  # have putative target genes.
#  enriched.motif <- get.enriched.motif(data = mae,
#                                       probes = pair$Probe,
#                                       dir.out = dir.out,
#                                       label = diff.dir,
#                                       min.incidence = 10,
#                                       lower.OR = 1.1)
#  
#  TF <- get.TFs(data = mae,
#                mode = "unsupervised",
#                group.col = group.col,
#                group1 = group1,
#                group2 = group2,
#                enriched.motif = enriched.motif,
#                dir.out = dir.out,
#                cores = 1,
#                label = diff.dir)
#  
#  

## ---- eval=FALSE--------------------------------------------------------------
#  library(stringr)
#  library(TCGAbiolinks)
#  library(dplyr)
#  library(ELMER)
#  library(MultiAssayExperiment)
#  library(parallel)
#  library(readr)
#  dir.create("~/paper_elmer/",showWarnings = FALSE)
#  setwd("~/paper_elmer/")
#  
#  file <- "mae_BRCA_hg38_450K_no_ffpe.rda"
#  if(file.exists(file)) {
#    mae <- get(load(file))
#  } else {
#    getTCGA(disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
#            basedir = "DATA", # Where data will be downloaded
#            genome  = "hg38") # Genome of refenrece "hg38" or "hg19"
#  
#    distal.probes <- get.feature.probe(feature = NULL,
#                                       genome = "hg38",
#                                       met.platform = "450K")
#  
#    mae <- createMAE(exp = "~/paper_elmer/Data/BRCA/BRCA_RNA_hg38.rda",
#                     met = "~/paper_elmer/Data/BRCA/BRCA_meth_hg38.rda",
#                     met.platform = "450K",
#                     genome = "hg38",
#                     linearize.exp = TRUE,
#                     filter.probes = distal.probes,
#                     met.na.cut = 0.2,
#                     save = FALSE,
#                     TCGA = TRUE)
#    # Remove FFPE samples from the analysis
#    mae <- mae[,!mae$is_ffpe]
#  
#    # Get molecular subytpe information from cell paper and more metadata (purity etc...)
#    # https://doi.org/10.1016/j.cell.2015.09.033
#    file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
#    downloader::download(file, basename(file))
#    subtypes <- readxl::read_excel(basename(file), skip = 2)
#  
#    subtypes$sample <- substr(subtypes$Methylation,1,16)
#    meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
#    meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
#    meta.data <- S4Vectors::DataFrame(meta.data)
#    rownames(meta.data) <- meta.data$sample
#    stopifnot(all(meta.data$patient == colData(mae)$patient))
#    colData(mae) <- meta.data
#    save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
#  }
#  dir.out <- "BRCA_unsupervised_hg38/hypo"
#  cores <- 10
#  diff.probes <- get.diff.meth(data = mae,
#                               group.col = "definition",
#                               group1 = "Primary solid Tumor",
#                               group2 = "Solid Tissue Normal",
#                               diff.dir = "hypo", # Get probes hypometh. in group 1
#                               cores = cores,
#                               minSubgroupFrac = 0.2, # % group samples  used.
#                               pvalue = 0.01,
#                               sig.dif = 0.3,
#                               dir.out = dir.out,
#                               save = TRUE)
#  
#  # For each differently methylated probes we will get the
#  # 20 nearby genes (10 downstream and 10 upstream)
#  nearGenes <- GetNearGenes(data = mae,
#                            probes =  diff.probes$probe,
#                            numFlankingGenes = 20)
#  
#  # This step is the most time consuming. Depending on the size of the groups
#  # and the number of probes found previously it migh take hours
#  Hypo.pair <- get.pair(data = mae,
#                        nearGenes = nearGenes,
#                        group.col = "definition",
#                        group1 = "Primary solid Tumor",
#                        group2 = "Solid Tissue Normal",
#                        permu.dir = paste0(dir.out,"/permu"),
#                        permu.size = 10000,
#                        mode = "unsupervised",
#                        minSubgroupFrac = 0.4, # 40% of samples to create U and M
#                        raw.pvalue = 0.001,
#                        Pe = 0.001,
#                        filter.probes = TRUE,
#                        filter.percentage = 0.05,
#                        filter.portion = 0.3,
#                        dir.out = dir.out,
#                        cores = cores,
#                        label = "hypo")
#  # Number of pairs: 2950
#  
#  
#  enriched.motif <- get.enriched.motif(data = mae,
#                                       min.motif.quality = "DS",
#                                       probes = unique(Hypo.pair$Probe),
#                                       dir.out = dir.out,
#                                       label = "hypo",
#                                       min.incidence = 10,
#                                       lower.OR = 1.1)
#  TF <- get.TFs(data = mae,
#                group.col = "definition",
#                group1 = "Primary solid Tumor",
#                group2 = "Solid Tissue Normal",
#                minSubgroupFrac = 0.4, # Set to 1 if supervised mode
#                enriched.motif = enriched.motif,
#                dir.out = dir.out,
#                cores = cores,
#                label = "hypo")
#  

## ---- eval=FALSE--------------------------------------------------------------
#  library(stringr)
#  library(TCGAbiolinks)
#  library(dplyr)
#  library(ELMER)
#  library(MultiAssayExperiment)
#  library(parallel)
#  library(readr)
#  #-----------------------------------
#  # 1 - Samples
#  # ----------------------------------
#  dir.create("~/paper_elmer/",showWarnings = FALSE)
#  setwd("~/paper_elmer/")
#  
#  file <- "mae_BRCA_hg38_450K_no_ffpe.rda"
#  if(file.exists(file)) {
#    mae <- get(load(file))
#  } else {
#    getTCGA(disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
#            basedir = "DATA", # Where data will be downloaded
#            genome  = "hg38") # Genome of refenrece "hg38" or "hg19"
#  
#    distal.probes <- get.feature.probe(feature = NULL,
#                                       genome = "hg38",
#                                       met.platform = "450K")
#  
#    mae <- createMAE(exp = "DATA/BRCA/BRCA_RNA_hg38.rda",
#                     met = "DATA/BRCA/BRCA_meth_hg38.rda",
#                     met.platform = "450K",
#                     genome = "hg38",
#                     linearize.exp = TRUE,
#                     filter.probes = distal.probes,
#                     met.na.cut = 0.2,
#                     save = FALSE,
#                     TCGA = TRUE)
#    # Remove FFPE samples from the analysis
#    mae <- mae[,!mae$is_ffpe]
#  
#    # Get molecular subytpe information from cell paper and more metadata (purity etc...)
#    # https://doi.org/10.1016/j.cell.2015.09.033
#    file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
#    downloader::download(file, basename(file))
#    subtypes <- readxl::read_excel(basename(file), skip = 2)
#  
#    subtypes$sample <- substr(subtypes$Methylation,1,16)
#    meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
#    meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
#    meta.data <- S4Vectors::DataFrame(meta.data)
#    rownames(meta.data) <- meta.data$sample
#    stopifnot(all(meta.data$patient == colData(mae)$patient))
#    colData(mae) <- meta.data
#    save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
#  }
#  
#  cores <- 6
#  direction <- c( "hypo","hyper")
#  genome <- "hg38"
#  group.col  <- "PAM50"
#  groups <- t(combn(na.omit(unique(colData(mae)[,group.col])),2))
#  for(g in 1:nrow(groups)) {
#    group1 <- groups[g,1]
#    group2 <- groups[g,2]
#    for (j in direction){
#      tryCatch({
#        message("Analysing probes ",j, "methylated in ", group1, " vs ", group2)
#        dir.out <- paste0("BRCA_supervised_",genome,"/",group1,"_",group2,"/",j)
#        dir.create(dir.out, recursive = TRUE)
#        #--------------------------------------
#        # STEP 3: Analysis                     |
#        #--------------------------------------
#        # Step 3.1: Get diff methylated probes |
#        #--------------------------------------
#        Sig.probes <- get.diff.meth(data       = mae,
#                                    group.col  = group.col,
#                                    group1     = group1,
#                                    group2     = group2,
#                                    sig.dif    = 0.3,
#                                    minSubgroupFrac = 1,
#                                    cores      = cores,
#                                    dir.out    = dir.out,
#                                    diff.dir   = j,
#                                    pvalue     = 0.01)
#        if(nrow(Sig.probes) == 0) next
#        #-------------------------------------------------------------
#        # Step 3.2: Identify significant probe-gene pairs            |
#        #-------------------------------------------------------------
#        # Collect nearby 20 genes for Sig.probes
#        nearGenes <- GetNearGenes(data  = mae,
#                                  probe = Sig.probes$probe)
#  
#        pair <- get.pair(data       = mae,
#                         nearGenes  = nearGenes,
#                         group.col  = group.col,
#                         group1     = group1,
#                         group2     = group2,
#                         permu.dir  = paste0(dir.out,"/permu"),
#                         dir.out    = dir.out,
#                         mode       = "supervised",
#                         diff.dir   = j,
#                         cores      = cores,
#                         label      = j,
#                         permu.size = 10000,
#                         raw.pvalue = 0.001)
#  
#        Sig.probes.paired <- readr::read_csv(paste0(dir.out,
#                                             "/getPair.",j,
#                                             ".pairs.significant.csv"))[,1, drop = TRUE]
#  
#  
#        #-------------------------------------------------------------
#        # Step 3.3: Motif enrichment analysis on the selected probes |
#        #-------------------------------------------------------------
#        if(length(Sig.probes.paired) > 0 ){
#          #-------------------------------------------------------------
#          # Step 3.3: Motif enrichment analysis on the selected probes |
#          #-------------------------------------------------------------
#          enriched.motif <- get.enriched.motif(probes  = Sig.probes.paired,
#                                               dir.out = dir.out,
#                                               data    = mae,
#                                               label   = j,
#                                               plot.title =  paste0("BRCA: OR for paired probes ",
#                                                                    j, "methylated in ",
#                                                                    group1, " vs ",group2))
#          motif.enrichment <- readr::read_csv(paste0(dir.out,
#                                              "/getMotif.",j,
#                                              ".motif.enrichment.csv"))
#          if(length(enriched.motif) > 0){
#            #-------------------------------------------------------------
#            # Step 3.4: Identifying regulatory TFs                        |
#            #-------------------------------------------------------------
#            print("get.TFs")
#  
#            TF <- get.TFs(data           = mae,
#                          enriched.motif = enriched.motif,
#                          dir.out        = dir.out,
#                          mode           = "supervised",
#                          group.col      = group.col,
#                          group1         = group1,
#                          diff.dir       = j,
#                          group2         = group2,
#                          cores          = cores,
#                          label          = j)
#            TF.meth.cor <- get(load(paste0(dir.out,
#                                           "/getTF.",j,
#                                           ".TFs.with.motif.pvalue.rda")))
#            save(mae,TF, enriched.motif, Sig.probes.paired,
#                 pair, nearGenes, Sig.probes, motif.enrichment,
#                 TF.meth.cor,
#                 file = paste0(dir.out,"/ELMER_results_",j,".rda"))
#          }
#        }
#      }, error = function(e){
#        message(e)
#      })
#    }
#  }

Try the ELMER package in your browser

Any scripts or data that you put into this service are public.

ELMER documentation built on Nov. 8, 2020, 4:59 p.m.