annotate_by_feature <- function(stat, feature) {
if (!all.equal(key(stat), c("chr", "start", "end"))) {
data.table::setkey(stat, chr, start, end)
}
if (!all.equal(key(feature), c("chr", "start", "end"))) {
data.table::setkey(feature, chr, start, end)
}
annotated <- foverlaps(stat, remapped.genes)[, c(1, 9, 10, 8, 11, 12, 7), with = F]
data.table::setnames(annotated, c("i.start", "i.end", "i.id", "i.block.relative_start", "i.block.relative_end"), c("start", "end", "id", "block.relative_start", "block.relative_end"))
cols <- names(annotated)[-7]
annotated[, gene = list(gene), by = cols]
return(annotated)
}
gene_set_enrichment <- function(stats, features, feature_set_map, n_permutations) {
# stat_block_ids <- vector()
# if (inherits(stats, "list")) {
# stat_block_ids <- sort(Reduce(union, lapply(stats, function(x) x[, unique(id)])))
# }
# if (inherits(stats, "data.table")) {
# stat_block_ids <- stats[, sort(unique(id))]
# }
# n_associated_blocks <- length(stat_block_ids)
n.cores <- data.table::getDTthreads()
observed <- get_observed_multi_stat2(
statDT = stats,
features = features,
feature_set_map = feature_set_map,
permutation_n = 0,
allow_multiple_hits = T
)
permuted <- data.table::rbindlist(parallel::mclapply(1:n_permutations,
function(x) {
get_permutation_multi_stat2(statDT = stats,
features = features,
feature_set_map = feature_set_map,
permutation_n = x,
allow_multiple_hits = T)
},
mc.cores = n.cores
))
# include observed set as perm_n 0
permuted <- data.table::rbindlist(list(permuted, observed))
return(permuted[])
}
annotate_permutation_multi <- function(permDT, feature_set_map, permutation_n, output_genes) {
permDT[, stat_n_gene := .N]
permDT[, stat_unique_gene := uniqueN(gene)]
permDT <- unique(feature_set_map[permDT, on = .(gene)
][!id %in% NA
][, .(stat = "multi_hits",
set_n = length(gene),
stat_n_gene,
stat_unique_gene,
genes = ifelse(output_genes,
paste(sort(gene),
collapse = " "),
NA)), by = .(id)])[]
if(permDT[,uniqueN(id)] < feature_set_map[,uniqueN(id)]) {
permDT <- rbind(permDT,
data.table(id = feature_set_map[!id %in% permDT$id,
unique(id)],
stat = "multi_hits",
set_n = 0,
stat_n_gene = permDT[1]$stat_n_gene,
stat_unique_gene = permDT[1]$stat_unique_gene,
genes = NA))
}
permDT[, set_pr := ifelse(set_n > 0, set_n / stat_n_gene, 0)]
permDT[, perm_n := permutation_n]
return(permDT)
}
annotate_permutation_unique <- function(permDT, feature_set_map, permutation_n, output_genes) {
permDT[, total_n_gene := uniqueN(gene)]
permDT <- unique(feature_set_map[out, on = .(gene)][!id %in% NA][, .(stat = "unique_hits", set_n = uniqueN(gene), stat_n_gene = total_n_gene, genes = ifelse(output_genes, paste(sort(gene), collapse = " "), NA)), by = .(id)])
permDT <- rbind(permDT, data.table(id = feature_set_map[!id %in% permDT$id, unique(id)], stat = "unique_hits", set_n = 0, stat_n_gene = out[1]$stat_n_gene, genes = NA))
permDT[, set_pr := ifelse(set_n > 0, set_n / stat_n_gene, 0)]
permDT[, perm_n := permutation_n]
return(permDT)
}
get_permutation_multi_stat <- function(stats,
features,
feature_set_map,
blocks,
stat_block_ids,
n_associated_blocks,
permutation_n,
allow_multiple_hits) {
perm <- permute_blocks(features = features, blocks = blocks)
out <- melt_permuation_list(lapply(stats, function(x) data.table::foverlaps(x, perm, nomatch = 0, by.x = c("id", "block.relative_start", "block.relative_end"))))
if (allow_multiple_hits) {
out <- annotate_permutation_multi(permDT = out, feature_set_map = feature_set_map, permutation_n = permutation_n, output_genes = FALSE)
# out[,total_n_gene:= .N]
# out[,unique_n_gene:= uniqueN(gene)]
# out <- unique(feature_set_map[out,on=.(gene)][!id %in% NA][,.(stat='multi_hits',set_n=length(gene),stat_n_gene=total_n_gene,stat_unique_gene=unique_n_gene,genes=NA),by=.(id)])[]
# out <- rbind(out,data.table(id=feature_set_map[!id %in% out$id,unique(id)],stat='multi_hits',set_n=0,stat_n_gene=out[1]$stat_n_gene,stat_unique_gene=out[1]$stat_unique_gene,genes=NA))
} else {
out <- annotate_permutation_unique(permDT = out, feature_set_map = feature_set_map, permutation_n = permutation_n, output_genes = FALSE)
# out[,total_n_gene:=uniqueN(gene)]
# out <- unique(feature_set_map[out,on=.(gene)][!id %in% NA][,.(stat='unique_hits',set_n=uniqueN(gene),stat_n_gene=total_n_gene,genes=NA),by=.(id)])
# out <- rbind(out,data.table(id=feature_set_map[!id %in% out$id,unique(id)],stat='unique_hits',set_n=0,stat_n_gene=out[1]$stat_n_gene,genes=NA))
}
# out[,set_pr:=ifelse(set_n>0,set_n/stat_n_gene,0)]
# out[,perm_n:=permutation_n]
return(out[])
}
get_permutation_multi_stat2 <- function(statDT,
features,
feature_set_map,
permutation_n,
allow_multiple_hits) {
permuted_genes <- permute_genes(features = features)
setkey(permuted_genes,chr,start,end)
# out <- data.table::foverlaps(statDT,
# data.table::setkey(features[,.(chr=id,start=block.relative_start,end=block.relative_end,gene)],chr,start,end),
# nomatch = NULL)[,.(gene=unique(gene)),by=stat]
#
out <- data.table::foverlaps(statDT,
permuted_genes,
nomatch = NULL)[,.(gene=unique(gene)),by=stat]
if (allow_multiple_hits) {
out <- annotate_permutation_multi(permDT = out,
feature_set_map = feature_set_map,
permutation_n = permutation_n,
output_genes = FALSE)
} else {
out <- annotate_permutation_unique(permDT = out,
feature_set_map = feature_set_map,
permutation_n = permutation_n,
output_genes = FALSE)
}
return(out[])
}
permute_genes <- function(features){
perm.genes <- features[features[,.(id=unique(id),
new=sample(unique(id),
uniqueN(id),
replace = FALSE)),by=chr
], on=.(id)][,.(chr=new,
start=block.relative_start,
end=block.relative_end,gene)]
setkey(perm.genes,chr,start,end)[]
}
get_observed_multi_stat2 <- function(statDT,
features,
feature_set_map,
permutation_n,
allow_multiple_hits){
out <- data.table::foverlaps(data.table::setkey(statDT,chr,start,end),
data.table::setkey(features[,.(chr=id,start=block.relative_start,end=block.relative_end,gene)],chr,start,end),
nomatch = 0)[,.(gene=unique(gene)),by=stat]
if (allow_multiple_hits) {
out <- annotate_permutation_multi(permDT = out,
feature_set_map = feature_set_map,
permutation_n = permutation_n,
output_genes = TRUE)
} else {
out <- annotate_permutation_unique(permDT = out,
feature_set_map = feature_set_map,
permutation_n = permutation_n,
output_genes = TRUE)
}
return(out[])
}
get_observed_multi_stat <- function(stats,
features,
feature_set_map,
stat_block_ids,
allow_multiple_hits) {
out <- melt_permuation_list(lapply(stats, function(x) data.table::foverlaps(x, setkey(features[id %in% stat_block_ids][, .(id, block.relative_start, block.relative_end, gene)], id, block.relative_start, block.relative_end), nomatch = 0, by.x = c("id", "block.relative_start", "block.relative_end"))))
if (allow_multiple_hits) {
out <- annotate_permutation_multi(permDT = out, feature_set_map = feature_set_map, permutation_n = 0, output_genes = TRUE)
# out[,total_n_gene:= .N]
# out[,unique_n_gene:= uniqueN(gene)]
# out <- unique(feature_set_map[out,on=.(gene)][!id %in% NA][,.(stat='multi_hits',set_n=length(gene),stat_n_gene=total_n_gene,stat_unique_gene=unique_n_gene,genes=paste(sort(gene),collapse = " ")),by=.(id)])[]
# out <- rbind(out,data.table(id=feature_set_map[!id %in% out$id,unique(id)],stat='multi_hits',set_n=0,stat_n_gene=out[1]$stat_n_gene,stat_unique_gene=out[1]$stat_unique_gene,genes=NA))
} else {
out <- annotate_permutation_unique(permDT = out, feature_set_map = feature_set_map, permutation_n = 0, output_genes = TRUE)
# out[,total_n_gene:=uniqueN(gene)]
# out <- unique(feature_set_map[out,on=.(gene)][!id %in% NA][,.(stat='unique_hits',set_n=uniqueN(gene),stat_n_gene=total_n_gene,genes=paste(sort(gene),collapse = " ")),by=.(id)])
# out <- rbind(out,data.table(id=feature_set_map[!id %in% out$id,unique(id)],stat='unique_hits',set_n=0,stat_n_gene=out[1]$stat_n_gene,genes=NA))
}
# out[,set_pr:=ifelse(set_n>0,set_n/stat_n_gene,0)]
# out[,perm_n:=0]
return(out[])
}
# permute feature locations, and get new stat -> feature mapping
# permute_blocks <- function(features, blocks, stat_block_ids, n_associated_blocks) {
# setkey(features[blocks[,.(old=stat_block_ids,id=sample(unique(id),size = n_associated_blocks,replace = F))],on=.(id)][!gene %in% NA,.(id=old,block.relative_start,block.relative_end,gene)], id, block.relative_start,block.relative_end)[]
# }
permute_blocks <- function(features, blocks) {
blocks$id2 <- blocks$id
setkey(features[blocks[sample(.N, .N, replace = F), .(chr, start, end, old = id, id = blocks$id2)], on = .(id)][!gene %in% NA][, .(id = old, block.relative_start, block.relative_end, gene)], id, block.relative_start, block.relative_end)[]
}
force_genic_permute_blocks <- function(features, blocks, stat_block_ids, n_associated_blocks) {
features[blocks[, .(chr, start, end, id)], on = .(id)][, uniqueN(gene)]
blocks[features[, .(chr, start, end, id, gene)], on = .(id)][, uniqueN(gene)]
}
#genic_stat_block_ids <- sort(Reduce(union, lapply(remapped_stats, function(x) data.table::fovrelapsx[, unique(id)])))
# melt output of permute_blocks to a two column DT. (gene,stat)
melt_permuation_list <- function(permutation_list) {
list_names <- names(permutation_list)
feature <- (names(permutation_list[[1]])[4])
data.table::rbindlist(lapply(seq_along(permutation_list), function(i) permutation_list[[i]][, .(gene = unique(get(feature)), stat = names(permutation_list)[[i]])]))
}
melt_permuation_list2 <- function(permutation_list) {
list_names <- names(permutation_list)
feature <- (names(permutation_list[[1]])[4])
data.table::rbindlist(lapply(seq_along(permutation_list), function(i) permutation_list[[i]][, .(gene = unique(get(feature)), stat = names(permutation_list)[[i]])]))
}
##
# roll blocks within chromosome
roll_within <- function(chr_ids, shift, direction) {
# n_ids <- length(chr_ids)
range_ids <- range(chr_ids)
max_ids <- range_ids[2] - range_ids[1] + 1
new_ids <- chr_ids + shift * direction
ifelse(new_ids > range_ids[2], new_ids - max_ids, ifelse(new_ids < range_ids[1], new_ids + max_ids, new_ids))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.