```r knitr::opts_chunk$set(echo = TRUE)

# Use Case 2: Evaluation of genomic signatures on cancer methylation profiles

```r
###############################################
# Use Case 2
# Title - Building cancer profiles with genomic signatures of DNA methylation 
# Example - Lung adenocarcinoma GSE32866 data series
###############################################



library(MeinteR)

gse.accession <- "GSE32866"
annotation.file <- "GSE32866_annotation.csv" #Change path to annotation file appropriately (file included in the vignette folder)
working.dir <- getwd()
name<-"LUAD"
offset <- 100
db <- 0.25

#-----FUNCTIONS----------------------
calcP <- function(input1, input2) {
  if (length(input1)<200 | length(input2)<200) {
    shap.inc <- shapiro.test(input1)
    shap.dec <- shapiro.test(input2)
    if (shap.inc$p.value < 0.05 | shap.dec$p.value <0.05) { # Not normal distribution
      wilc <- suppressWarnings(wilcox.test(input1,input2))
      message("p-value: ", round(wilc$p.value,4))
    } else { #Normal distribution
      ttest <- t.test(input1,input2)
      message("p-value: ", round(ttest$p.value,4))
    }
  } else {
    ttest <- t.test(input1,input2)
    message("p-value: ", round(ttest$p.value,4))
  }
}
#------------------------------------

geo.data <- importGEO(gse.acc=gse.accession, annotation.file=annotation.file)
bed.data<-na.omit(reorderBed(geo.data[[1]],3,4,5,2))
head(bed.data)
group1 <- merge(geo.data[[4]],geo.data[[1]], by="probes")
par(mfrow = c(1, 2))
nameStudy(paste(colnames(group1[2]), name))
group1 <- na.omit(reorderBed(group1,4,5,6,2))
plotBeta(group1)
group2 <- merge(geo.data[[5]],geo.data[[1]], by="probes")
nameStudy(paste(colnames(group2[2]), name))
group2<-na.omit(reorderBed(group2,4,5,6,2))
plotBeta(group2)


dec <- subset(bed.data, bed.data$score < -1*db) 
inc <- subset(bed.data, bed.data$score > db) 
message("Number of sequences with DMS+ (hypermethylated in cancer): ", nrow(inc))
message("Number of sequences with DMS- (hypomethylated in cancer): ", nrow(dec))

#### G-quadruplex
quads.inc <- findQuads(inc, offset=offset)
quads.dec <- findQuads(dec, offset=offset)
message("Mean quadruplexes per sequence with DMS+: ",round(mean(quads.inc[[2]]$quads),3))
message("Mean quadruplexes per sequence with DMS-: ",round(mean(quads.dec[[2]]$quads),3))
calcP(quads.inc[[2]]$quads,quads.dec[[2]]$quads)
#### TFBS
tfbs.dec <- findTFBS(dec, mcores=1, persim=0.9)
tfbs.inc <- findTFBS(inc, mcores=1, persim=0.9)
message("Mean TFBS in promoters per sequence DMS+: ",round(mean(tfbs.inc[[2]]$tfbs),3), " Promoters: ", length(tfbs.inc[[2]]$tfbs))
message("Mean TFBS in promoters per sequence DMS-: ",round(mean(tfbs.dec[[2]]$tfbs),3), " Promoters: ", length(tfbs.dec[[2]]$tfbs))
calcP(tfbs.inc[[2]]$tfbs,tfbs.dec[[2]]$tfbs)
#### Conserved TFBS
ctfbs.dec <- findConservedTFBS(dec)
ctfbs.inc <- findConservedTFBS(inc)
message("Mean conserved TFBS per sequence in DMS+: ",round(mean(ctfbs.inc[[3]]$freq),3))
message("Mean conserved TFBS per sequence in DMS-: ",round(mean(ctfbs.dec[[3]]$freq),3))
calcP(ctfbs.inc[[3]]$freq,ctfbs.dec[[3]]$freq)

#### DNA shapes
shapes.inc <- findShapes(inc, offset=offset)
shapes.dec <- findShapes(dec, offset=offset)
message("Frequency of significant MGW changes in DMS+ ", sum(shapes.inc[[1]]$p.MGW < 0.05)/nrow(inc))
message("Frequency of significant MGW changes in DMS- ", sum(shapes.dec[[1]]$p.MGW < 0.05)/nrow(dec))
message("Frequency of significant HelT changes in DMS+ ", sum(shapes.inc[[2]]$p.HelT < 0.05)/nrow(inc))
message("Frequency of significant HelT changes in DMS- ", sum(shapes.dec[[2]]$p.HelT < 0.05)/nrow(dec))
message("Frequency of significant ProT changes in DMS+ ", sum(shapes.inc[[3]]$p.ProT < 0.05)/nrow(inc))
message("Frequency of significant ProT changes in DMS- ", sum(shapes.dec[[3]]$p.ProT < 0.05)/nrow(dec))
message("Frequency of significant Roll changes in DMS+ ", sum(shapes.inc[[4]]$p.Roll < 0.05)/nrow(inc))
message("Frequency of significant Roll changes in DMS- ", sum(shapes.dec[[4]]$p.Roll < 0.05)/nrow(dec))

#### Splice sites
splice.inc <- findSpliceSites(inc, persim = 0.8)
splice.dec <- findSpliceSites(dec, persim = 0.8)
message("Mean splice sites per sequence in DMS+: ",round(mean(splice.inc[[1]]$freq),3))
message("Mean splice sites per sequence in DMS-: ",round(mean(splice.dec[[1]]$freq),3))
calcP(splice.inc[[1]]$freq,splice.dec[[1]]$freq)

#### Alternative splicing events
alt.inc <- findAltSplicing(inc)
alt.dec <- findAltSplicing(dec)
message("Mean alternative splicing events per sequence in DMS+: ",round(mean(alt.inc[[3]]$obs),3))
message("Mean alternative splicing events per sequence in DMS-: ",round(mean(alt.dec[[3]]$obs),3))
calcP(alt.inc[[3]]$obs,alt.dec[[3]]$obs)


#Pals
pals.dec <- findPals(dec, offset=offset)
pals.inc <- findPals(inc, offset=offset)
message("Mean palindromes in DMS+: ",round(mean(pals.inc[[3]]$pals),3))
message("Mean palindromes in DMS-: ", round(mean(pals.dec[[3]]$pals),3))
calcP(pals.dec[[3]]$pals,pals.inc[[3]]$pals)

#Example plots (more plots available in vignette file)
scatterConsTF(ctfbs.inc[[2]])
plotTF(tfbs.inc[[1]])
alt.inc[[4]]


andigoni/MeinteR documentation built on Oct. 1, 2021, 9:33 p.m.