createMAE: Construct a Multi Assay Experiment for ELMER analysis

Description Usage Arguments Value Examples

Description

This function will receive a gene expression and DNA methylation data objects and create a Multi Assay Experiment.

Usage

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
createMAE(
  exp,
  met,
  colData,
  sampleMap,
  linearize.exp = FALSE,
  filter.probes = NULL,
  met.na.cut = 0.2,
  filter.genes = NULL,
  met.platform = "450K",
  genome = NULL,
  save = TRUE,
  save.filename,
  TCGA = FALSE
)

Arguments

exp

A Summaerized Experiment, a matrix or path of rda file only containing the data. Rownames should be either Ensembl gene id (ensembl_gene_id) or gene symbol (external_gene_name)

met

A Summaerized Experiment, a matrix or path of rda file only containing the data.

colData

A DataFrame or data.frame of the phenotype data for all participants. Must have column primary (sample ID).

sampleMap

A DataFrame or data.frame of the matching samples and colnames of the gene expression and DNA methylation matrix. This should be used if your matrix have different columns names. This object must have following columns: assay ("DNA methylation" and "Gene expression"), primary (sample ID) and colname (names of the columns of the matrix).

linearize.exp

Take log2(exp + 1) in order to linearize relation between methylation and expression

filter.probes

A GRanges object contains the coordinate of probes which locate within promoter regions or distal feature regions such as union enhancer from REMC and FANTOM5. See get.feature.probe function.

met.na.cut

Define the percentage of NA that the line should have to remove the probes for humanmethylation platforms.

filter.genes

List of genes ensemble ids to filter from object

met.platform

DNA methylation platform "450K" or "EPIC"

genome

Which is the default genome to make gene information. Options hg19 and hg38

save

If TRUE, MAE object will be saved into a file named as the argument save.file if this was set, otherwise as mae_genome_met.platform.rda.

save.filename

Name of the rda file to save the object (must end in .rda)

TCGA

A logical. FALSE indicate data is not from TCGA (FALSE is default). TRUE indicates data is from TCGA and sample section will automatically filled in.

Value

A MultiAssayExperiment object

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# NON TCGA example: matrices has different column names
gene.exp <- S4Vectors::DataFrame(sample1.exp = c("ENSG00000141510"=2.3,"ENSG00000171862"=5.4),
                  sample2.exp = c("ENSG00000141510"=1.6,"ENSG00000171862"=2.3))
dna.met <- S4Vectors::DataFrame(sample1.met = c("cg14324200"=0.5,"cg23867494"=0.1),
                       sample2.met =  c("cg14324200"=0.3,"cg23867494"=0.9))
sample.info <- S4Vectors::DataFrame(primary =  c("sample1","sample2"), 
                                    sample.type = c("Normal", "Tumor"))
sampleMap <- S4Vectors::DataFrame(
                 assay = c("Gene expression","DNA methylation","Gene expression","DNA methylation"),
                 primary = c("sample1","sample1","sample2","sample2"), 
                 colname = c("sample1.exp","sample1.met","sample2.exp","sample2.met"))
mae <- createMAE(exp = gene.exp, 
                 met = dna.met, 
                 sampleMap = sampleMap, 
                 met.platform ="450K",
                 colData = sample.info, 
                 genome = "hg38") 
# You can also use sample Mapping and Sample information tables from a tsv file
# You can use the createTSVTemplates function to create the tsv files
readr::write_tsv(as.data.frame(sampleMap), path = "sampleMap.tsv")
readr::write_tsv(as.data.frame(sample.info), path = "sample.info.tsv")
mae <- createMAE(exp = gene.exp, 
                 met = dna.met, 
                 sampleMap = "sampleMap.tsv", 
                 met.platform ="450K",
                 colData = "sample.info.tsv", 
                 genome = "hg38") 
                 
# NON TCGA example: matrices has same column names
gene.exp <- S4Vectors::DataFrame(sample1 = c("ENSG00000141510"=2.3,"ENSG00000171862"=5.4),
                  sample2 = c("ENSG00000141510"=1.6,"ENSG00000171862"=2.3))
dna.met <- S4Vectors::DataFrame(sample1 = c("cg14324200"=0.5,"cg23867494"=0.1),
                       sample2=  c("cg14324200"=0.3,"cg23867494"=0.9))
sample.info <- S4Vectors::DataFrame(primary =  c("sample1","sample2"), 
                                    sample.type = c("Normal", "Tumor"))
sampleMap <- S4Vectors::DataFrame(
                 assay = c("Gene expression","DNA methylation","Gene expression","DNA methylation"),
                 primary = c("sample1","sample1","sample2","sample2"), 
                 colname = c("sample1","sample1","sample2","sample2")
)
mae <- createMAE(exp = gene.exp, 
                 met = dna.met, 
                 sampleMap = sampleMap, 
                 met.platform ="450K",
                 colData = sample.info, 
                 genome = "hg38") 

## Not run: 
   # TCGA example using TCGAbiolinks
   # Testing creating MultyAssayExperiment object
   # Load library
   library(TCGAbiolinks)
   library(SummarizedExperiment)
   
   samples <- c("TCGA-BA-4074", "TCGA-BA-4075", "TCGA-BA-4077", "TCGA-BA-5149",
                "TCGA-UF-A7JK", "TCGA-UF-A7JS", "TCGA-UF-A7JT", "TCGA-UF-A7JV")
   
   #1) Get gene expression matrix
   query.exp <- GDCquery(project = "TCGA-HNSC", 
                         data.category = "Transcriptome Profiling", 
                         data.type = "Gene Expression Quantification", 
                         workflow.type = "HTSeq - FPKM-UQ",
                         barcode = samples)
   
   GDCdownload(query.exp)
   exp.hg38 <- GDCprepare(query = query.exp)
   
   
   # Aligned against Hg19
   query.exp.hg19 <- GDCquery(project = "TCGA-HNSC", 
                              data.category = "Gene expression",
                              data.type = "Gene expression quantification",
                              platform = "Illumina HiSeq", 
                              file.type  = "normalized_results",
                              experimental.strategy = "RNA-Seq",
                              barcode = samples,
                              legacy = TRUE)
   GDCdownload(query.exp.hg19)
   exp.hg19 <- GDCprepare(query.exp.hg19)
   
   # Our object needs to have emsembl gene id as rownames
   rownames(exp.hg19) <- values(exp.hg19)$ensembl_gene_id
   
   # DNA Methylation
   query.met <- GDCquery(project = "TCGA-HNSC",
                         legacy = TRUE,
                         data.category = "DNA methylation",
                         barcode = samples,
                         platform = "Illumina Human Methylation 450")
   
   GDCdownload(query.met)
   met <- GDCprepare(query = query.met)
   
   distal.enhancer <- get.feature.probe(genome = "hg19",met.platform = "450k")
   
   # Consisering it is TCGA and SE
   mae.hg19 <- createMAE(exp = exp.hg19, 
                         met =  met, 
                         TCGA = TRUE, 
                         genome = "hg19",  
                         filter.probes = distal.enhancer)
   values(getExp(mae.hg19))
   
   mae.hg38 <- createMAE(exp = exp.hg38, met = met, 
                        TCGA = TRUE, genome = "hg38",  
                        filter.probes = distal.enhancer)
   values(getExp(mae.hg38))
   
   # Consisering it is TCGA and not SE
   mae.hg19.test <- createMAE(exp = assay(exp.hg19), met =  assay(met), 
                              TCGA = TRUE, genome = "hg19",  
                              filter.probes = distal.enhancer)
   
   mae.hg38 <- createMAE(exp = assay(exp.hg38), met = assay(met), 
                         TCGA = TRUE, genome = "hg38",  
                         filter.probes = distal.enhancer)
   values(getExp(mae.hg38))
   
   # Consisering it is not TCGA and SE
   # DNA methylation and gene expression Objects should have same sample names in columns
   not.tcga.exp <- exp.hg19 
   colnames(not.tcga.exp) <- substr(colnames(not.tcga.exp),1,15)
   not.tcga.met <- met 
   colnames(not.tcga.met) <- substr(colnames(not.tcga.met),1,15)
   
   phenotype.data <- data.frame(row.names = colnames(not.tcga.exp),
                                primary = colnames(not.tcga.exp),
                                samples = colnames(not.tcga.exp), 
                                group = c(rep("group1",4),rep("group2",4)))
   distal.enhancer <- get.feature.probe(genome = "hg19",met.platform = "450k")
   mae.hg19 <- createMAE(exp = not.tcga.exp, 
                         met =  not.tcga.met, 
                         TCGA = FALSE, 
                         filter.probes = distal.enhancer,
                         genome = "hg19", 
                         colData = phenotype.data)

## End(Not run)
createMAE

ELMER documentation built on Nov. 8, 2020, 4:59 p.m.