R/tidy_functions.R

Defines functions create_results_table

create_results_table <- function(permDT, feature_set_map) {
  # mean set_n and total gene number over re-samples
  # need to add total set n in Feature Set
  mean_resamples <- permDT[perm_n!=0][,.("mean permutation set n"=mean(set_n),"mean permutation set pr."=mean(set_pr),"mean permutation stat n gene"=mean(stat_n_gene)),by=id]
  body <- permDT[perm_n==0]
  body <- body[mean_resamples,on=.(id)]
  body[,"normalised permutation set n":= `mean permutation set pr.`*stat_n_gene]
  body <- body[unique(feature_set_map[,.(name),by=id]),on=.(id)]
  body <- body[,.(id,name,"observed set n"=set_n,`mean permutation set n`,`normalised permutation set n`,"observed set pr."=set_pr,`mean permutation set pr.`,"stat gene n"=stat_n_gene,"stat unique gene"=stat_unique_gene, `mean permutation stat n gene`, enriched_p, enriched_fdr, enriched_BH,  depleted_p, depleted_fdr, depleted_BH,  two_tailed_p, two_tailed_fdr, two_tailed_BH,genes,stat)][order(two_tailed_p)]
  return(body[])
}

plot_results_table <- function(resultsDT,show_n,sort_by="enriched_p"){
  resultsDT <- resultsDT[order(get(sort_by))][1:show_n]
  table <- resultsDT %>%
    gt::gt() %>%
    gt::tab_header(title = "Feature Set enrichment test") %>%
    gt::fmt_number(columns = gt::matches("ed_p$|_fdr|_BH"), decimals = 4) %>%
    gt::tab_spanner(label = "Feature Set", columns = c("id", "name")) %>%
    gt::tab_spanner(label = "Feature Set Summaries", columns = gt::matches("set n|set pr")) %>%
    gt::fmt_number(columns = gt::matches("set pr"), decimals = 4) %>%
    gt::fmt_number(columns = gt::matches("permutation set n"), decimals = 3) %>%
    gt::tab_spanner(label = "Total Genes", columns = c("stat gene n", "stat unique gene","mean permutation stat n gene")) %>%
    gt::tab_spanner(label = "Enrichment", columns = gt::matches("enriched")) %>%
    gt::tab_spanner(label = "Depletion", columns = gt::matches("depleted")) %>%
    gt::tab_spanner(label = "Two-tailed", columns = gt::matches("two_tailed")) %>%
    gt::cols_label(
      enriched_p = "empP",
      enriched_fdr = "empFDR",
      enriched_BH = "BH-FDR",
      depleted_p = "empP",
      depleted_fdr = "empFDR",
      depleted_BH = "BH-FDR",
      two_tailed_p = "empP",
      two_tailed_fdr = "empFDR",
      two_tailed_BH = "BH-FDR"
    ) %>%
    gt::tab_options(table.font.size = 14, heading.title.font.size = 24 , column_labels.font.size = 18,column_labels.font.weight = 'bold',heading.title.font.weight = 'bold') %>%
    gt::cols_width(gt::vars("enriched_p","enriched_fdr","enriched_BH","depleted_p","depleted_fdr","depleted_BH","two_tailed_p","two_tailed_fdr","two_tailed_BH") ~ px(75),
                   gt::everything() ~ px(100))
  return(table)
}

add_length_distribution <- function(resultsDT, features){
  lengthDT <- 
  # Basic box plot
  bp <- ggplot2::ggplot(PlantGrowth, aes(x=group, y=weight))+
    geom_boxplot()+coord_flip()
}

#add_jaccard <- 
# 
# example_genes <- data.table(chr=rep.int(1,3),start=c(500,1000,1500),end=c(1500,2000,2000),gene=c("a","b","c"))
# # pairwise intersects
# intersects <- foverlaps(example_genes,example_genes)[!gene==i.gene,.( intersect = min(i.end,end) - max(start,i.start), union=i.end-i.start + end-start  ), by=.(gene,i.gene)]
# intersects[,pair:=paste(sort(c(gene,i.gene)),collapse = "_"),by=.(gene,i.gene)]
# intersects <- unique(intersects[,.(intersect,union,pair)])
# intersects[,jaccard:= intersect/(union - intersect),by=pair]
# 
# 
# dt <- clean_kegg[,.(id,gene)][genes,on=.(gene)][!id %in% NA]
# setkey(dt,chr,start,end)
# # unions
# unions <- dt[dt,on=.(id),allow.cartesian=TRUE][!gene==i.gene]
# unions <- unions[,.(union=i.end-i.start + end-start,max_intersect=(min(c(i.end-i.start , end-start)))),by=.(id,gene,i.gene)]
# unions[,geneA:=ifelse(gene < i.gene,gene,i.gene)]
# unions[,geneB:=ifelse(gene > i.gene,gene,i.gene)]
# unions <- unique(unions, by= c('id','geneA','geneB'))[,.(id,geneA,geneB,union,max_intersect)]
# 
# #
# intersects <- foverlaps(dt,dt)[!gene==i.gene][id==i.id][,.( intersect = min(i.end,end) - max(start,i.start)), by=.(id,gene,i.gene)]
# intersects[,geneA:=ifelse(gene < i.gene,gene,i.gene)]
# intersects[,geneB:=ifelse(gene > i.gene,gene,i.gene)]
# intersects <- unique(intersects, by= c('id','geneA','geneB'))[,.(id,geneA,geneB,intersect)]
# jaccardDT <- intersects[unions,on=.(id,geneA,geneB)]
# jaccardDT[intersect %in% NA,intersect:=0]
# jaccardDT[,jaccard:= intersect/(union - intersect),by=.(id,geneA,geneB)]
# jaccardDT[,max_jaccard:= max_intersect/(union - max_intersect),by=.(id,geneA,geneB)]
# jaccardDT[,scaled_jaccard:=jaccard/max_jaccard]
# jaccardDT[,.(meanJI=mean(jaccard)),by=id][order(meanJI)]
# 
# # scaled jaccard
# 
# 
# intersect_pr <- jaccardDT[intersect!=0][,.(n_overlap_pairs=.N),by=id]
# intersect_pr <- intersect_pr[jaccardDT[,.(n_total_pairs=.N),by=id],on=.(id)]
# intersect_pr[n_overlap_pairs %in% NA, n_overlap_pairs:=0]
# intersect_pr[,pr_overlap_pairs:= n_overlap_pairs / n_total_pairs]
# [,.(n_overlap_pairs,n,pr=n_overlap_pairs/n),by=id][order(-pr)]
# 
# jaccardDT[intersect!=0]
# jaccardDT[,.(meanJI=mean(jaccard),medianJI=median(jaccard),maxJI=max(jaccard)),by=id][]
joshuamschmidt/multiPermr documentation built on Oct. 12, 2020, 11:42 a.m.