optimise_blocks <- function(background,
candidates,
features,
initial_block_size_kb=500,
tolerance_percent=0.5,
n_perms=25,
increment_step_size_kb=25,
genic_only=TRUE,
combined_test="multiple_hits") {
candidatesDT <- rbindlist(candidates,idcol = T)[,.(chr,start, end,stat= .id)]
setkey(candidatesDT,chr,start,end)
backgroundDT <- rbindlist(background,idcol = T)[,.(chr,start, end,stat= .id)]
setkey(backgroundDT,chr,start,end)
#backgroundDT <- unique(backgroundDT, by = key(backgroundDT), fromLast = TRUE)
setkey(features, chr, start, end)
before_remap_gene <- foverlaps(candidatesDT, features, nomatch = NULL)[,.(gene=unique(gene)),by=stat]
# initial blocks
remap_list <- blocks_to_remapped(block_size = initial_block_size_kb*1000,
background = backgroundDT,
candidates = candidatesDT,
features = features,
genic_only = genic_only)
after_remap_gene <- data.table::foverlaps(data.table::setkey(remap_list[[2]],chr,start,end),
data.table::setkey(remap_list[[3]][,.(chr=id,start=block.relative_start,end=block.relative_end,gene)],chr,start,end),
nomatch = 0)[,.(gene=unique(gene)),by=stat]
# tolerance range of genic n
obs_gene_number <- 0
if(combined_test=="multiple_hits"){
obs_gene_number <- after_remap_gene[,.N]
}
if(combined_test=="unique_hits"){
obs_gene_number <- after_remap_gene[,uniqueN(gene)]
}
# independent option is signposted here, but not yet implemented
if(combined_test=="independent"){
obs_gene_number <- after_remap_gene[,.(N=uniqueN(gene)),by=stat]
}
upper_limit <- round((100+tolerance_percent)/100*obs_gene_number)
lower_limit <- round((100-tolerance_percent)/100*obs_gene_number)
# average permutation genic n in permutations with initial block size
test_permutations <- data.table::rbindlist(parallel::mclapply(1:n_perms,
function(x) { permuted_genes <- permute_genes(features = remap_list[[3]]);
out <- data.table::foverlaps(remap_list[[2]],
permuted_genes,
nomatch = NULL)[,.(gene=unique(gene),perm_n=x),by=stat];}))
mean_perm_n <- 0
if(combined_test=="multiple_hits"){
mean_perm_n <- round(test_permutations[,.N,by=perm_n][,mean(N)])
}
if(combined_test=="unique_hits"){
obs_gene_number <- round(test_permutations[,uniqueN(gene),by=perm_n][,mean(V1)])
}
if(mean_perm_n >= lower_limit & mean_perm_n <= upper_limit){
# already optimised... lucky!
return(list(remap_list,optimised_block_size=initial_block_size_kb*1000,before_remap_gene,after_remap_gene))
}
max_increments <- round((initial_block_size_kb - increment_step_size_kb)
/ increment_step_size_kb)
# we assume that making the blocks SMALLER will increase the genic n
if(mean_perm_n < lower_limit) {
increment_step <- 1
optimised_block_size_kb <- 0
while(mean_perm_n < lower_limit & increment_step < max_increments) {
optimised_block_size_kb <- initial_block_size_kb - (increment_step_size_kb*increment_step)
remap_list <- blocks_to_remapped(block_size = optimised_block_size_kb*1000,
background = backgroundDT,
candidates = candidatesDT,
features = features,
genic_only = genic_only)
test_permutations <- data.table::rbindlist(parallel::mclapply(1:n_perms,
function(x) { permuted_genes <- permute_genes(features = remap_list[[3]]);
out <- data.table::foverlaps(remap_list[[2]],
permuted_genes,
nomatch = NULL)[,.(gene=unique(gene),perm_n=x),by=stat];}))
if(combined_test=="multiple_hits"){
mean_perm_n <- round(test_permutations[,.N,by=perm_n][,mean(N)])
}
if(combined_test=="unique_hits"){
obs_gene_number <- round(test_permutations[,uniqueN(gene),by=perm_n][,mean(V1)])
}
increment_step <- increment_step+1
}
after_remap_gene <- data.table::foverlaps(data.table::setkey(remap_list[[2]],chr,start,end),
data.table::setkey(remap_list[[3]][,.(chr=id,start=block.relative_start,end=block.relative_end,gene)],chr,start,end),
nomatch = 0)[,.(gene=unique(gene)),by=stat]
return(list(remap_list,optimised_block_size=optimised_block_size_kb*1000,before_remap_gene,after_remap_gene))
#return(list(remap_list,optimised_block_size_kb*1000,before_remap_gene,after_remap_gene))
}
# if(mean_perm_n > upper_limit) {
# increment_step <- 1
# optimised_block_size_kb <- 0
# while(mean_perm_n < lower_limit & increment_step < max_increments) {
# optimised_block_size_kb <- initial_block_size_kb + (increment_step_size_kb*increment_step)
# remap_list <- blocks_to_remapped(block_size = optimised_block_size_kb*1000,
# background = background,
# candidates = candidates,
# features = features,
# genic_only = genic_only)
# mean_perm_n <- mean(make_permutation_n_vector(n_perms = n_perms,
# stats = remap_list[[2]],
# features = remap_list[[3]],
# blocks = remap_list[[1]]))
# increment_step <- increment_step+1
# }
# return(list(remap_list,optimised_block_size_kb*1000))
# }
}
blocks_to_remapped <- function(block_size,
background,
candidates,
features,
genic_only) {
blocks <- if (genic_only) {
block_maker(
block_size = block_size,
stats = background,
features = features
)
} else {
block_maker(
block_size = block_size,
stats = background,
features = NULL
)
}
clean_blocks <- block_cleaner(blocks,
stats = background,
features = NULL
)
remapped_candidates <- remapper(blocks = clean_blocks, to_remap = candidates)[,.(chr=id,start=block.relative_start,end=block.relative_end,stat)]
setkey(remapped_candidates,chr,start,end)
remapped_features <- remapper(blocks = clean_blocks, to_remap = features)
return(list(clean_blocks, remapped_candidates, remapped_features))
}
make_permutation_n_vector <- function(n_perms, stats, features, blocks) {
perm_gene_n <- vector(mode = "numeric", length = n_perms)
for (i in 1:n_perms) {
perm_gene_n[i] <- melt_permuation_list(lapply(stats, function(x) data.table::foverlaps(x, permute_blocks(features = features, blocks = blocks), nomatch = 0, by.x = c("id", "block.relative_start", "block.relative_end"))))[,.N]#[, uniqueN(gene)]
}
return(perm_gene_n)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.