data-raw/cpg.R

# cpgislandExt_hg19.txt: cpgislandExt track from the UCSC genome browser for the hg19 genome build. Downloaded April 10, 2020
# from https://genome.ucsc.edu/cgi-bin/hgTables with the following settings:
# clade: Mammal
# genome: Human
# assembly: Feb. 2009 (GRCh37/hg19)
# group: Regulation
# track: CpG islands
# table: cpgislandExt

library(GenomicRanges)

cpg.df <- read.table("/dcl01/scharpf1/data/dbruhm/delfi_followup/split-reads/pcawg_analysis/data/raw_data/cpgislandExt_hg19.txt",
                     header = FALSE, stringsAsFactors = FALSE, skip = 1)
chrs <- paste0("chr", c(seq(1, 22, 1), "X", "Y"))
cpg.df <- subset(cpg.df, V2 %in% chrs)
cpg.gr <- GRanges(seqnames = cpg.df$V2, ranges = IRanges(start = cpg.df$V3, end = cpg.df$V4))
cpg <- cpg.gr

save(cpg, file = "/dcl01/scharpf1/data/dbruhm/delfi_followup/split-reads/pcawg_analysis/data/processed_data/cpg.rda")
cancer-genomics/plasmasv documentation built on May 15, 2020, 11:35 a.m.