inst/doc/parp1_methylation.R

## ----setupKnitr, echo=FALSE, results='hide'------------------------------
knitr::opts_chunk$set(dev='png')

## ----load_packages-------------------------------------------------------
library(GenomicRanges)
library(ggplot2)
library(fmcorrelationbreastcaparp1)
library(fmdatabreastcaparp1)
library(BSgenome.Hsapiens.UCSC.hg19)

## ----load_tss_parp1------------------------------------------------------
data(parp1_ln4_unique)
data(parp1_ln5_unique)
data(methyl_rep1)
data(methyl_rep2)
data(tss_windows)

## ----ln4_ln5_counts------------------------------------------------------
mcf7_cov <- coverage(parp1_ln4_unique, weight = "n_count")
mdamb231_cov <- coverage(parp1_ln5_unique, weight = "n_count")

tss_windows <- binned_function(tss_windows, mcf7_cov, "sum", "parp1_mcf7")
tss_windows <- binned_function(tss_windows, mdamb231_cov, "sum", "parp1_mdamb231")

## ----read_methylation----------------------------------------------------
mcols(methyl_rep1)$methyl_read <- mcols(methyl_rep1)$mcols.readCount * mcols(methyl_rep1)$mcols.percentMeth / 100
mcols(methyl_rep2)$methyl_read <- mcols(methyl_rep2)$mcols.readCount * mcols(methyl_rep2)$mcols.percentMeth / 100

methyl_r1_cov <- coverage(methyl_rep1, weight = "methyl_read")
methyl_r2_cov <- coverage(methyl_rep2, weight = "methyl_read")
tss_windows <- binned_function(tss_windows, methyl_r1_cov, "sum", "methyl_r1")
tss_windows <- binned_function(tss_windows, methyl_r2_cov, "sum", "methyl_r2")

## ----set_non_zero--------------------------------------------------------
non_zero <- "both"

## ----graphit-------------------------------------------------------------
r1_v_mcf7 <- subsample_nonzeros(mcols(tss_windows), c("methyl_r1", "parp1_mcf7"), non_zero = non_zero, n_points = 10000)
ggplot(r1_v_mcf7, aes(x = methyl_r1, y = parp1_mcf7)) + geom_point() + scale_y_log10() + scale_x_log10()
cor(log10(r1_v_mcf7[,1]+1), log10(r1_v_mcf7[,2]+1))

r2_v_mcf7 <- subsample_nonzeros(mcols(tss_windows), c("methyl_r2", "parp1_mcf7"), non_zero = non_zero, n_points = 10000)
ggplot(r2_v_mcf7, aes(x = methyl_r2, y = parp1_mcf7)) + geom_point() + scale_y_log10() + scale_x_log10()
cor(log10(r2_v_mcf7[,1]+1), log10(r2_v_mcf7[,2]+1))

## ----grab_all------------------------------------------------------------
all_comb <- expand.grid(c("methyl_r1", "methyl_r2"), c("parp1_mcf7", "parp1_mdamb231"), stringsAsFactors = FALSE)
out_cor <- lapply(seq(1, nrow(all_comb)), function(i_row){
  #print(i_row)
  correlate_non_zero(mcols(tss_windows), as.character(all_comb[i_row,]), log_transform = TRUE, non_zero = non_zero, test = TRUE)
})
all_comb_names <- paste(all_comb[,1], all_comb[,2], sep = "_v_")
out_cor <- do.call(rbind, out_cor)
rownames(out_cor) <- all_comb_names

## ----tss_correlations, results='asis', echo=FALSE------------------------
knitr::kable(out_cor)

## ----graphs--------------------------------------------------------------
out_graphs <- lapply(seq(1, nrow(all_comb)), function(i_row){
  use_vars <- as.character(all_comb[i_row,])
  subpoints <- subsample_nonzeros(mcols(tss_windows), use_vars, non_zero = non_zero, n_points = 10000)
  ggplot(subpoints, aes_string(x = use_vars[1], y = use_vars[2])) + geom_point() + scale_y_log10() + scale_x_log10()
})
out_graphs

## ----genome_tiles--------------------------------------------------------
genome_tiles <- tileGenome(seqinfo(Hsapiens), tilewidth = 2000, cut.last.tile.in.chrom = TRUE)

genome_tiles <- binned_function(genome_tiles, mcf7_cov, "sum", "parp1_mcf7")
genome_tiles <- binned_function(genome_tiles, mdamb231_cov, "sum", "parp1_mdamb231")
genome_tiles <- binned_function(genome_tiles, methyl_r1_cov, "sum", "methyl_r1")
genome_tiles <- binned_function(genome_tiles, methyl_r2_cov, "sum", "methyl_r2")

## ----genome_graph_test---------------------------------------------------
genome_r1_v_mcf7 <- subsample_nonzeros(mcols(genome_tiles), c("methyl_r1", "parp1_mcf7"), non_zero = non_zero, n_points = 10000)
ggplot(genome_r1_v_mcf7, aes(x = methyl_r1, y = parp1_mcf7)) + scale_x_log10() + scale_y_log10() + geom_point()
cor(log(genome_r1_v_mcf7[,1]+1), log(genome_r1_v_mcf7[,2]+1))

## ----genome_cor----------------------------------------------------------
genome_cor <- lapply(seq(1, nrow(all_comb)), function(i_row){
  #print(i_row)
  correlate_non_zero(mcols(genome_tiles), as.character(all_comb[i_row,]), log_transform = TRUE, non_zero = non_zero, test = TRUE)
})
all_comb_names <- paste(all_comb[,1], all_comb[,2], sep = "_v_")
genome_cor <- do.call(rbind, genome_cor)
rownames(genome_cor) <- all_comb_names

## ----genome_cor_display, results='asis', echo=FALSE----------------------
knitr::kable(genome_cor)

## ----save_tables---------------------------------------------------------
saveloc <- "../inst/correlation_tables"
write.table(out_cor, file = file.path(saveloc, "methylation_tss.txt"), sep = "\t")
write.table(genome_cor, file = file.path(saveloc, "methylation_genome.txt"), sep = "\t")

## ----session_info, echo=FALSE--------------------------------------------
Sys.time()
sessionInfo()
rmflight/fmcorrelationbreastcaparp1 documentation built on May 27, 2019, 9:31 a.m.