R/referenceData.R

library(ggplot2)

# define constants; the reference data (centromere and telomere positions, chromosome size)

# the columns needed for an analysis, per standard sequenza naming conventions
seq.cols.needed = c("chromosome", "start.pos", "end.pos", "CNt", "A", "B")

# predefined data about chromosome size, centromere, and telomere location
# chromosome size, centromere and telomere locations (in hg19/GRCh37)
# definitions come from UCSC
grch37.ref.dat = data.frame( chromosome = paste("chr", c(seq(1:22), "X", "Y"), sep=""),
                      centromere.start = c(121535434, 92326171, 90504854, 49660117, 46405641, 58830166,
                                           58054331, 43838887, 47367679, 39254935, 51644205, 34856694, 
                                           16000000, 16000000, 17000000, 35335801, 22263006, 15460898,
                                           24681782, 26369569, 11288129, 13000000, 58632012, 10104553),
                      centromere.end = c(124535434, 95326171, 93504854, 52660117, 49405641, 61830166,
                                         61054331, 46838887, 50367679, 42254935, 54644205, 37856694,
                                         19000000, 19000000, 20000000, 38335801, 25263006, 18460898,
                                         27681782, 29369569, 14288129, 16000000, 61632012, 13104553),
                      p.telomere.end = rep(10000, 24),
                      q.telomere.start = c(249240621, 243189373, 198012430, 191144276, 180905260,
                                           171105067, 159128663, 146354022, 141203431, 135524747,
                                           134996516, 133841895, 115159878, 107339540, 102521392,
                                           90344753, 81185210, 78067248, 59118983, 63015520,
                                           48119895, 51294566, 155260560, 59363566),
                      chr.size = c(249250621, 243199373,  198022430, 191154276, 180915260,
                                   171115067, 159138663, 146364022, 141213431, 135534747,
                                   135006516, 133851895, 115169878, 107349540, 102531392,
                                   90354753, 81195210, 78077248, 59128983, 63025520,
                                   48129895, 51304566, 155270560, 59373566)
                      
)

# for now, Y chromosome wont be considered, drop it
grch37.ref.dat <- grch37.ref.dat[1:23,]

# grch38.p13
# release date: 3/1/19
grch38.ref.dat <- data.frame( chromosome = paste("chr", c(seq(1:22), "X", "Y"), sep =""),
                              centromere.start = c(122026460, 92188145, 90772459, 49708101, 46485901,
                                                   58553889, 58169654, 44033745, 43236168,
                                                   39686683, 51078349, 34769408, 16000001, 16000001,
                                                   17000001, 36311159, 22813680, 15460900, 24498981,
                                                   26436233, 10864561, 12954789, 58605580, 10316945),
                              centromere.end = c(125184587, 94090557, 93655574, 51743951, 50059807,
                                                 59829934, 60828234, 45877265, 45518558,
                                                 41593521, 54425074, 37185252, 18051248, 1817523,
                                                 19725254, 38280682, 26885980, 20861206, 27190874,
                                                 30038348, 12915508, 15054318, 62412542, 10544039),
                              p.telomere.end = rep(10000, 24),
                              q.telomere.start = c(248946422, 242183529, 198285559, 190204555, 181528259, 
                                                   170795979, 159335973, 145128636, 138384717, 133787422, 
                                                   135076622, 133265309, 114354328, 107033718, 101981189,  
                                                   90328345, 83247441, 80363285, 58607616, 64434167,  
                                                   46699983, 50808468, 156030895, 57217415),
                              chr.size = c(248956422, 242193529, 198295559, 190214555, 181538259,
                                           170805979, 159345973, 145138636, 138394717, 133797422,
                                           135086622, 133275309, 114364328, 107043718, 101991189,
                                           90338345, 83257441, 80373285, 58617616, 64444167,
                                           46709983, 50818468, 156040895, 57227415))
grch38.ref.dat <- grch38.ref.dat[1:23,]
maxwell-lab/HRDex documentation built on May 3, 2020, 9:01 p.m.