knitr::opts_chunk$set(echo = TRUE)

Use Case 1: Genome-wide association of G-quadruplexes with DNA methylation using TCGA/GEO breast cancer datasets

###############################################
# Use Case 1
# Title - TCGA/GSE66695 cancer profiles for breast cancer (BRCA)
###############################################

library(dplyr)
library(MeinteR)
library(TCGAbiolinks)
library(ggplot2)
#library(lattice)


thr = 0.3
query <- GDCquery(project= "TCGA-BRCA", 
                            data.category = "DNA methylation", 
                            platform = "Illumina Human Methylation 450", 
                            sample.type = c("Primary solid Tumor","Solid Tissue Normal"),
                            legacy = TRUE)

# Primary solid tumor samples
Smp.TP <- TCGAquery_SampleTypes(query$results[[1]]$cases,"TP")
# Solid normal tissue samples
Smp.NT <- TCGAquery_SampleTypes(query$results[[1]]$cases,"NT")
matched_bars <- TCGAquery_MatchedCoupledSampleTypes(query$results[[1]]$cases,c("NT","TP"))

query.m <- GDCquery(project= "TCGA-BRCA", 
                            data.category = "DNA methylation", 
                            platform = "Illumina Human Methylation 450", 
                            barcode = matched_bars,
                            sample.type = c("Primary solid Tumor","Solid Tissue Normal"),
                            legacy = TRUE)

#Download DNA methylation data (~4GB of data)
GDCdownload(query.m)
gdc.pr <- GDCprepare(query.m)

#Plot beta-value distribution
TCGAvisualize_meanMethylation(gdc.pr, groupCol = "shortLetterCode",filename = NULL)

beta.df <- data.frame(gdc.pr@assays$data)
#trim data frame
beta.df <- na.omit(beta.df[,-c(1,2)])
row.ranges <-as.data.frame(gdc.pr@rowRanges)
#Calculate Δβ values
colnames(beta.df) <- gsub("\\.", "-",colnames(beta.df))
B.NT <- beta.df[,colnames(beta.df) %in% Smp.NT]
B.TP <- beta.df[,colnames(beta.df) %in% Smp.TP]
mB.NT <- data.frame(rowMeans(B.NT, na.rm = TRUE))
mB.NT$probes <- rownames(mB.NT)
mB.TP <- data.frame(rowMeans(B.TP, na.rm = TRUE))
mB.TP$probes <- rownames(mB.TP)
db.df <- merge(mB.NT,mB.TP, by= "probes")
db.df$db <- db.df$rowMeans.B.NT..na.rm...TRUE. - db.df$rowMeans.B.TP..na.rm...TRUE.
db.df.3d <- sample_n(subset(db.df, db.df$db > thr),1000) # hypoTP
db.df.3d.ranges <- merge(db.df.3d, row.ranges, by.x="probes", by.y="probeID")
db.df.3d.r <- reorderBed(db.df.3d.ranges, 5,6,7,4)
g4.db.df.3d.r <- findQuads(db.df.3d.r)

db.df.3i <- sample_n(subset(db.df, db.df$db < -1*thr),1000) # hyperTP
db.df.3i.ranges <- merge(db.df.3i, row.ranges, by.x="probes", by.y="probeID")
db.df.3i.r <- reorderBed(db.df.3i.ranges, 5,6,7,4)
g4.db.df.3i.r <- findQuads(db.df.3i.r)

g4.dms <- c(g4.db.df.3i.r[[2]]$quads,g4.db.df.3d.r[[2]]$quads)
class <- c(rep("DMS+", length(g4.db.df.3i.r[[2]]$quads)), rep("DMS-",length(g4.db.df.3d.r[[2]]$quads)))
dens.df <- data.frame(g4.dms,class)
dens.df$g4.dms[dens.df$g4.dms > 6] <- 6

p <- ggplot(dens.df, aes(x = g4.dms, fill = factor(class))) +
  geom_histogram(aes(y = ..density..),position="identity", binwidth = 1, alpha = 0.7) + 
  stat_function(fun = dnorm, aes(color = "DMS-"), size = 1, args = list(mean = mean(dens.df$g4.dms[dens.df$class=="DMS-"]), sd = sd(dens.df$g4.dms[dens.df$class=="DMS-"]))) +
  stat_function(fun = dnorm, aes(color = "DMS+"), size = 1, args = list(mean = mean(dens.df$g4.dms[dens.df$class=="DMS+"]), sd = sd(dens.df$g4.dms[dens.df$class=="DMS+"]))) +
  scale_color_manual(values=c("red", "blue","#999999", "#E69F00", "#56B4E9"))+
  scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"),name=" ")+
  labs( x="G-Quadruplex", y = "Density")+
  scale_x_continuous(breaks=seq(0,6,1)) +
  theme_bw() +
  theme(axis.text.x=element_text(size=30, angle=0))+
  theme(axis.text.y=element_text(size=30, angle=90))+
  theme(axis.title=element_text(size=30))+
  theme(legend.text=element_text(size=20))+
  theme(legend.position=c(0.8,0.8))+
  theme(legend.title=element_blank()) +
  ggtitle("TCGA-BRCA") + 
  theme(plot.title = element_text(margin = margin(t = 10, b = -30))) +
  theme(plot.title = element_text(hjust = 0.5, size=22))

p
gse.accession <- "GSE66695"
annotation.file <- "GSE66695_annotation.csv" #Change path to annotation file appropriately (file included in the vignette folder)
working.dir <- "UC1" # set working directory
name<-"BRCA"
offset <- 100
thr <- 0.3

geo.data <- importGEO(gse.acc=gse.accession, annotation.file=annotation.file)
bed.data<-na.omit(reorderBed(geo.data[[1]],3,4,5,2))

tumor <- merge(geo.data[[4]],geo.data[[1]], by="probes")
tumor <- na.omit(reorderBed(tumor,4,5,6,2))
par(mfrow = c(1, 2))
nameStudy(paste("Tumor ", name))
plotBeta(tumor)
normal <- merge(geo.data[[5]],geo.data[[1]], by="probes")
normal<-na.omit(reorderBed(normal,4,5,6,2))
nameStudy(paste("Normal ", name))
plotBeta(normal)

db.df.3d <- subset(bed.data, bed.data$score < -1*thr) #hypomethylated in cancer
db.df.3i <- subset(bed.data, bed.data$score > thr)  #hypermethylated in cancer

#### G-quadruplex
g4.db.df.3i <- findQuads(db.df.3i, offset=offset)
g4.db.df.3d <- findQuads(db.df.3d, offset=offset)


g4.dms <- c(g4.db.df.3i[[2]]$quads,g4.db.df.3d[[2]]$quads)
class <- c(rep("DMS+", nrow(db.df.3i)), rep("DMS-",nrow(db.df.3d)))
dens.df <- data.frame(g4.dms,class)
dens.df$g4.dms[dens.df$g4.dms > 6] <- 6

p <- ggplot(dens.df, aes(x = g4.dms, fill = factor(class))) +
  geom_histogram(aes(y = ..density..),position="identity", binwidth = 1, alpha = 0.7) + 
  stat_function(fun = dnorm, aes(color = "DMS+"), size = 1, args = list(mean = mean(dens.df$g4.dms[dens.df$class=="DMS+"]), sd = sd(dens.df$g4.dms[dens.df$class=="DMS+"]))) +
  stat_function(fun = dnorm, aes(color = "DMS-"), size = 1, args = list(mean = mean(dens.df$g4.dms[dens.df$class=="DMS-"]), sd = sd(dens.df$g4.dms[dens.df$class=="DMS-"]))) +
  scale_color_manual(values=c("red", "blue","#999999", "#E69F00", "#56B4E9"))+
  scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"),name=" ")+
  labs( x="G-Quadruplex", y = "Density")+
  scale_x_continuous(breaks=seq(0,6,1)) +
  theme_bw() +
  theme(axis.text.x=element_text(size=30, angle=0))+
  theme(axis.text.y=element_text(size=30, angle=90))+
  theme(axis.title=element_text(size=30))+
  theme(legend.text=element_text(size=20))+
  theme(legend.position=c(0.8,0.8))+
  theme(legend.title=element_blank()) +
  ggtitle("GEO-BRCA") + 
  theme(plot.title = element_text(margin = margin(t = 10, b = -30))) +
  theme(plot.title = element_text(hjust = 0.5, size=22))

p


andigoni/MeinteR documentation built on Oct. 1, 2021, 9:33 p.m.