simulate_background <- function(blocks, snp_distance_kb) {
blocks[,.(chr,start=seq(from = round(start/1000), to = round(end/1000), by = snp_distance_kb)*1000),by=id][,.(chr,start,end=start)][]
}
simulate_background_from_genes <- function(genes, snp_distance_kb) {
distances <- vector(mode="numeric",length=genes[,.N]-1)
for(i in 1:(genes[,.N]-1)) {
if(genes[i,chr] == genes[i+1,chr]){
distances[i] <- genes[i+1,start] - genes[i,end]
}
if(genes[i,chr] != genes[i+1,chr]){
distances[i] <- NA
}
}
distances <- distances[!distances %in% NA]
distances <- distances[distances > 0]
mean_distance <- mean(distances)
genes[,.(chr,start=seq(from = round( (min(start)-mean_distance) /1000), to = round( (max(end)+mean_distance)/1000), by = snp_distance_kb)*1000),by=chr][,.(chr,start,end=start)][]
}
simulate_random_candidates <- function(background,n_candidates){
background[sample(.N,size = n_candidates,replace = F)][order(chr,start)][]
}
simulate_genic_enriched_candidates <- function(background,features,n_candidates,genic_fraction=0.5){
if(!identical(data.table::key(background), c("chr", "start", "end"))){
data.table::setkey(background,chr,start,end)
}
if(!identical(data.table::key(features), c("chr", "start", "end"))){
data.table::setkey(features,chr,start,end)
}
intersect <- data.table::foverlaps(background,features)
# if enrichment is 10, this means n_nongenic/n_genic == 0.9
n_genic <- round(n_candidates * genic_fraction)
n_non_genic <- n_candidates - n_genic
genic_cands <- intersect[!gene %in% NA,.(chr,start=i.start,end=i.end)][sample(.N,n_genic,replace = F)]
non_genic_cands <- intersect[gene %in% NA,.(chr,start=i.start,end=i.end)][sample(.N,n_non_genic,replace = F)]
data.table::rbindlist(list(genic_cands,non_genic_cands))[]
}
# simulate a genome
poisson_snp_positions <- function(chr,chr_length_Mb, mean_snps_per_kb){
# expected number of snps is the poison rate parameter, lambda
lambda <- chr_length_Mb * 1000 * mean_snps_per_kb
# a n of snps can is form the (log) distribution function
n_snps <- sample(1:(chr_length_Mb * 1000),size = 1, prob = ppois(1:(chr_length_Mb * 1000), lambda = lambda, lower.tail = FALSE))
snp_positions <- cumsum(round(rexp(n = n_snps,rate = lambda)*chr_length_Mb*1e6))
snp_positions <- round(snp_positions/max(snp_positions)*chr_length_Mb*1e6)
snp_positions <- data.table::data.table(chr=chr,start=snp_positions,end=snp_positions)
return(snp_positions)
}
# chrs <- 1:22
# lengths <- c(250,240,200,190,180,170,160,145,140,133,135,133,115,107,101,90,83,80,60,64,46,50)
# data.table::data.table(chr=)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.