data-raw/alu.R

# rmsk_hg19.txt: rmsk track from the UCSC genome browser for the hg19 genome build. Downloaded April 10, 2020
# clade: Mammal
# genome: Human
# assembly: Feb. 2009 (GRCh37/hg19)
# group: Repeats
# track: RepeatMasker
# table: rmsk

library(GenomicRanges)

repeat.df <- read.table("/dcl01/scharpf1/data/dbruhm/delfi_followup/split-reads/pcawg_analysis/data/raw_data/rmsk_hg19.txt",
                        header = TRUE, stringsAsFactors = FALSE, comment.char = "")
chrs <- paste0("chr", c(seq(1, 22, 1), "X", "Y"))
repeat.df <- subset(repeat.df, genoName %in% chrs)
alu.df <- subset(repeat.df, repFamily == "Alu")
alu.gr <- GRanges(seqnames = alu.df$genoName, ranges = IRanges(start = alu.df$genoStart, end = alu.df$genoEnd))
alu <- alu.gr

save(alu, file = "/dcl01/scharpf1/data/dbruhm/delfi_followup/split-reads/pcawg_analysis/data/processed_data/alu.rda")
cancer-genomics/plasmasv documentation built on May 15, 2020, 11:35 a.m.