R_tmp/GoShifter.R

####----- Go Shifter: Main Function -----####

#' Run \code{GoShifter} enrichment pipeline
#'
#' \code{GoShifter}: Locus-specific enrichment tool.
#'
#' @source
#' \href{https://github.com/immunogenomics/goshifter}{GitHub}
#' \href{https://pubmed.ncbi.nlm.nih.gov/26140449/}{Publication}
#'
#' @export
#' @importFrom dplyr %>%
#' @importFrom data.table fread
#' @examples
#' locus_dir <- echodata::locus_dir
#' dat <- echodata::BST1
#' gs_out <- echolocatoR:::GOSHIFTER(locus_dir = locus_dir, dat = dat)
GOSHIFTER <- function(locus_dir,
                     dat,
                     SNP.Group = "",
                     goshifter_path = NULL,
                     permutations = 1000,
                     ROADMAP_search = "",
                     ROADMAP_type = NA, # PrimaryTissue
                     chromatin_states = c("TssA"),
                     R2_filter = 0.8, # Include LD SNPs at rsquared >= NUM [default: 0.8]
                     overlap_threshold = 1,
                     force_new_goshifter = FALSE,
                     remove_tmps = TRUE,
                     verbose = TRUE,
                     save_results = TRUE){
 # locus_dir <-  "./Data/GWAS/Nalls23andMe_2019/LRRK2"; Roadmap_chromatinMarks_search <- "monocytes"; CredibleSet_only = T; permutations = 1000; chromatin_state = NA;  R2_filter = 0.8
 # Cleanup pyc tmp files
 goshifter <- GOSHIFTER.find_goshifter_folder(goshifter=goshifter_path)
 out <- suppressWarnings(file.remove(
   file.path(goshifter_path,
             c("chromtree.pyc",
               "data.pyc",
               "docopt.pyc",
               "functions.pyc")) ) )
 # Term key for chromatin states
 chromState_key <- data.table::fread(system.file("extdata/ROADMAP",
                                                 "ROADMAP_chromatinState_HMM.tsv",
                                                 package = "echoannot"))

 # Create GoShifter folder
 # locus_dir="Data/GWAS/Nalls23andMe_2019/LRRK2"
 GS_results_path <- file.path(locus_dir,"GoShifter")
 dir.create(GS_results_path, showWarnings = FALSE, recursive = TRUE)
 GS.RESULTS.path <- file.path(GS_results_path,
                              paste0("GOSHIFTER.",
                                     paste(chromatin_states, collapse = "-"),
                                     ".",SNP.Group,".txt"))

 # snpmap file
 GOSHIFTER.create_snpmap(dat = dat,
                         GS_results = GS_results_path,
                         verbose = verbose)
 # OR, copy file directly from example
 # file.copy(file.path(goshifter_path,"test_data","bc.snpmappings.hg19.txt"),
 #           file.path(GS_results_path,"snpmap.txt"),overwrite = TRUE)

 # LD file
 GOSHIFTER.create_LD(locus_dir = locus_dir,
                     dat = dat,
                     verbose = verbose)
 # Gather Annotation BED files
 # RoadMap_ref <- GOSHIFTER.search_ROADMAP(fuzzy_search = ROADMAP_search)
 grl.roadmap <- echoannot::ROADMAP_query(
   results_path = file.path(locus_dir,"annotations"),
   gr.snp = dat,
   keyword_query = ROADMAP_search,
   nThread = 1)

 # if(any(!is.null(ROADMAP_type))){RoadMap_ref <- subset(RoadMap_ref, TYPE %in% ROADMAP_type)}

 # Use existing results
 if(file.exists(GS.RESULTS.path) & force_new_goshifter==FALSE){
   GS.RESULTS <- data.table::fread(GS.RESULTS.path)
 } else{
   # Run GS over each chromatin mark subset
   GS.RESULTS <- lapply(chromatin_states, function(chromatin_state){
     try({
       messager("+ GoShifter:: Running on chromatin state subset:",chromatin_state)
       GS_results <- GOSHIFTER.run(goshifter_path = goshifter_path,
                                   locus_dir = locus_dir,
                                   chromatin_state = chromatin_state,
                                   R2_filter = R2_filter,
                                   permutations = permutations,
                                   remove_tmps = remove_tmps,
                                   verbose = verbose,
                                   overlap_threshold = overlap_threshold)
       message("GoShifter results data.frame: ",nrow(GS_results),
               " rows x ", ncol(GS_results)," cols" )
     })
     return(GS_results)
   }) %>% data.table::rbindlist(fill=TRUE)
 }

 if(save_results){
   data.table::fwrite(GS.RESULTS,
                      GS.RESULTS.path,
                      sep="\t", quote  = FALSE)
 }
 if(remove_tmps){
   # Remove GS output files to avoid reading in old files in future runs
   suppressWarnings(
     file.remove(file.path(GS_results_path,
                           paste0("GS_results",c(".nperm1000.enrich",
                                                 ".nperm1000.locusscore",
                                                 ".nperm1000.snpoverlap"))) ) )
 }
 return(GS.RESULTS)
}




# ######## PLOTS  # ########

GOSHIFTER.histograms_pvals <- function(GS_results){
 GS <- GS_results[,c("Annotation","pval","GS.pval")] %>% unique() %>%
   data.table::melt.data.table(id.vars = "Annotation",
                               variable.name = "P.value Source",
                               value.name = "P.value")
 hst <- ggplot(data = GS, aes(x=P.value, fill=`P.value Source`)) +
   geom_histogram(alpha=.5) +
   theme_bw()
 print(hst)
}

GOSHIFTER.histograms_SNPgroups <- function(GS.groups, show_plot = TRUE){
 GS <- GS.groups[,c("SNP.Group","Annotation","pval","GS.pval")] %>% unique()
 hst <- ggplot(GS, aes(x=pval, fill=SNP.Group)) +
   geom_histogram(alpha=.7)
 if(show_plot){print(hst)}
 return(hst)
}

GOSHIFTER.heatmap <- function(GS.groups, show_plot = TRUE){
 # GS_results <- GS_results_CS
 sig.tests <- subset(GS.groups, pval<=0.05)
 mat <- reshape2::acast(sig.tests,
                        Epigenome.name ~ chromatin_state + SNP.Group,
                        value.var="mean.enrichment",
                        fun.aggregate = mean, drop = FALSE, fill = 0)
 # library(heatmaply)
 hm <- heatmaply::heatmaply(mat, dendrogram = "row", k_row=3)
 # hm <- htmltools::tagList(list(hm))
 if(show_plot){print(hm)}
 return(hm)
}


GOSHIFTER.find_goshifter_folder <- function(goshifter=NULL){
if(is.null(goshifter)){
  goshifter <- system.file("tools/goshifter",package = "echolocatoR")
}
return(goshifter)
}


#' Prepare SNP input for \emph{GoShifter}
#'
#' @family GOSHIFTER
#' @examples
#' BST1 <- echodata::BST1; locus_dir <- echodata::locus_dir
#' snpmap <- GOSHIFTER.create_snpmap(dat=BST1, GS_results=file.path(locus_dir,"GoShifter"))
GOSHIFTER.create_snpmap <- function(dat,
                                  GS_results,
                                  nThread=1,
                                  verbose=TRUE){
messager("+ GOSHIFTER:: Creating snpmap file...", v=verbose)
snpmap <- file.path(GS_results,"snpmap.txt")
dir.create(GS_results, showWarnings = FALSE, recursive = TRUE)
dat %>%
  dplyr::rename(Chrom = CHR, BP = POS) %>%
  dplyr::mutate(Chrom = paste0("chr",Chrom)) %>%
  dplyr::select(SNP, Chrom, BP) %>%
  data.table::fwrite(snpmap,sep="\t")
messager("++ GOSHIFTER:: snpmap written to :",snpmap)
return(snpmap)
}


#' Prepare LD input for \emph{GoShifter}
#'
#' @family GOSHIFTER
#' @importFrom data.table fread merge.data.table
#' @importFrom echoconda find_packages
#' @examples
#' BST1 <- echodata::BST1; locus_dir <- echodata::locus_dir
#' LD_folder <- GOSHIFTER.create_LD(locus_dir=locus_dir, dat=subset(BST1, P<5e-8))
GOSHIFTER.create_LD <- function(locus_dir,
                              dat=NULL,
                              LD_path=NULL,
                              conda_env="goshifter",
                              nThread=1,
                              verbose=TRUE){
messager("+ GOSHIFTER:: Creating LD file(s)...", v=verbose)
# (chrA\tposA\trsIdA\tposB\trsIdB\tRsquared\tDPrime)
GS_results_path <- file.path(locus_dir,"GoShifter")
if(is.null(LD_path)) LD_path <- list.files(file.path(locus_dir,'LD'),".RDS", full.names = TRUE)[1]
if( !(any(endsWith(LD_path,"_LD.RDS"), na.rm = TRUE) | any(endsWith(LD_path,"plink.ld"), na.rm = TRUE))){
  stop("No LD RDS files detected.")
}

if(any(endsWith(LD_path,"_LD.RDS"), na.rm = TRUE)){
  messager("+ GOSHIFTER:: Reformatting LD",v=verbose)
  LD_matrix <- readSparse(LD_path, convert_to_df  = FALSE)
  LD_df <- as.data.frame(as.table(as.matrix(LD_matrix))) %>% `colnames<-`(c('rsIdA', 'rsIdB', 'Rsquared'))
  LD_df$Rsquared <- LD_df$Rsquared^2

  if(is.null(dat)){
    dat <- data.table::fread(list.files(file.path(locus_dir,"Multi-finemap"),
                                                "_Multi-finemap.tsv.gz", full.names = TRUE)[1])
  }
  LD_df <- subset(LD_df, rsIdA %in% dat$SNP | rsIdB %in% dat$SNP)
  ld_file <- data.table::merge.data.table(data.table::data.table(LD_df),
                                         subset(dat, select=c("SNP","CHR","POS"))%>% dplyr::rename(chrA=CHR,posA=POS),
                                         by.x = "rsIdA",
                                         by.y="SNP")  %>%
    data.table::merge.data.table(subset(dat, select=c("SNP","CHR","POS"))%>% dplyr::rename(chrB=CHR,posB=POS),
                                          by.x = "rsIdB",
                                          by.y="SNP") %>%
    dplyr::mutate(chrA=paste0("chr",gsub("chr","",chrA)),
                  chrB=paste0("chr",gsub("chr","",chrB)) ) %>%
    dplyr::arrange(chrA,posA) %>%
    dplyr::select(chrA,posA,rsIdA,posB,rsIdB,Rsquared)
}

if(any(endsWith(LD_path,"plink.ld"), na.rm = TRUE)){
  messager("++ GoShifter:: Reformatting 1000 Genomes LD",v=verbose)
  ld_file <- LD_path %>% dplyr::rename(chrA = CHR_A,
                                        posA = BP_A,
                                        rsIdA = SNP_A,
                                        posB = BP_B,
                                        rsIdB = SNP_B,
                                        Rsquared = R,
                                        DPrime = DP) %>%
    dplyr::mutate(Rsquared = Rsquared^2, chrA = paste0("chr",chrA)) %>%
    dplyr::select(chrA,posA,rsIdA,posB,rsIdB,Rsquared,DPrime)
}

# Create tabix file(s)
LD_folder <- file.path(GS_results_path,"LD")
dir.create(LD_folder, showWarnings = FALSE, recursive = TRUE)
for(chr in unique(ld_file$chrA)){
  ld_path <- file.path(LD_folder, paste0(chr,".EUR.tsv"))
  gz_path <- paste0(ld_path,".gz")
  out <- suppressWarnings(file.remove(gz_path))
  data.table::fwrite(subset(ld_file, chrA==chr),
                     ld_path,
                     sep="\t",
                     quote = FALSE,
                     col.names = FALSE,
                     nThread=nThread)
  # gzip(ld_path, destname = gz_path)
  bgzip <- echoconda::find_packages(package = "bgzip",
                                   conda_env = conda_env)
  tabix <- echoconda::find_packages(package = "tabix",
                                   conda_env = conda_env)
  system(paste(bgzip,ld_path))
  cmd <- paste(tabix,
               "-b 2",
               "-e 2",
               "-f",
               gz_path)
  system(cmd)
}
messager("+++ LD file(s) written to :",LD_folder)
return(LD_folder)
}





GOSHIFTER.search_ROADMAP <- function(Roadmap_reference =
                                     system.file("extdata/ROADMAP",
                                                 "ROADMAP_Epigenomic.js",
                                                 package = "echoannot"),
                                   EID_filter = NA,
                                   GROUP_filter = NA,
                                   ANATOMY_filter = NA,
                                   GR_filter = NA,
                                   fuzzy_search = NA){
messager("++ GoShifter: Searching for Roadmap annotation BED files...", v=verbose)
fuzzy_search <- paste(fuzzy_search, collapse="|")
RM_ref <- suppressWarnings(data.table::fread(Roadmap_reference, skip = 1 , header = FALSE,
                                             col.names = c("EID",
                                                           "GROUP",
                                                           "ANATOMY",
                                                           "GR",
                                                           "Epigenome.Mnemonic",
                                                           "Standardized.Epigenome.name",
                                                           "Epigenome.name",
                                                           "TYPE")) )
if(!is.na(EID_filter)){RM_ref <- subset(RM_ref, EID %in% EID_filter)}
if(!is.na(GROUP_filter)){RM_ref <- subset(RM_ref, GROUP %in% GROUP_filter)}
if(!is.na(ANATOMY_filter)){RM_ref <- subset(RM_ref, ANATOMY %in% ANATOMY_filter)}
if(!is.na(GR_filter)){RM_ref <- subset(RM_ref, GR %in% GR_filter)}
if(!is.na(fuzzy_search)){
  RM_ref <- RM_ref[(tolower(GROUP) %like% fuzzy_search) | (tolower(ANATOMY)  %like% fuzzy_search) | (tolower(GR) %like% fuzzy_search) | (tolower(Standardized.Epigenome.name) %like% fuzzy_search) | (tolower(Epigenome.name) %like% fuzzy_search) | (tolower(TYPE) %like% fuzzy_search )]
}
total_found <- dim(RM_ref)[1]
if(total_found > 0){
  messager("++ GoShifter: Found",dim(RM_ref)[1],"annotation BED files matching search query.", v=verbose)
} else {stop("No annotation BED files found :(")}
return(RM_ref)
}


GOSHIFTER.bed_names <- function(RM_ref,
                              suffix="_15_coreMarks_mnemonics.bed.gz"){
# All files found here:
# https://egg2.wustl.edu/roadmap/data/byFileType/chromhmmSegmentations/ChmmModels/coreMarks/jointModel/final/

# Alternative suffixes
# _15_coreMarks_dense.bed.gz
# _15_coreMarks_dense.bed.gz

# Create URLs
bed_names <- lapply(unique(RM_ref$EID), function(eid,
                                                 v=verbose,
                                                 suffix.=suffix){
  return(paste0(eid,suffix))
}) %>% unlist()
return(bed_names)
}




GOSHIFTER.list_chromatin_states <- function(annotations_path = "./annotations"){
# Term key for chromatin states
chromState_key <- data.table::fread(file.path(annotations_path, "ROADMAP",
                                              "ROADMAP_chromatinState_HMM.tsv"))
return(chromState_key)
}



GOSHIFTER.get_roadmap_annotations <- function(annotations_path = "./annotations",
                                            bed.list,
                                            chromatin_state = "TssA",
                                            verbose = TRUE){
output_paths <- lapply(bed.list, function(bed, v=verbose){
  eid <- strsplit(bed, "_")[[1]][1]
  roadmap.annot.path <- file.path(annotations_path,"ROADMAP/Chromatin_Marks")
  dir.create(roadmap.annot.path, showWarnings = FALSE, recursive = TRUE)
  output_path <- file.path(roadmap.annot.path, bed)
  bed_subset <- file.path(dirname(output_path), paste0(eid,"_",chromatin_state,"_subset.bed"))
  bed_subset.gz <- paste0(bed_subset,".gz")
  bed_url <- file.path("https://egg2.wustl.edu/roadmap/data/byFileType",
                       "chromhmmSegmentations/ChmmModels/coreMarks/jointModel/final",
                       bed)

  # If you have the exact file subset you need, just load it
  if(file.exists(bed_subset.gz)){
    messager("+++ GoShifter:: Importing Previously downloaded BED subset:",bed_subset.gz, v=v);
    dat <- data.table::fread(bed_subset.gz, col.names = c("Chrom", "Start","End","State","NA") )
  } else {
    # If you have the right file but it needs to be subset
    if (file.exists(output_path)){
      messager("+++ GoShifter:: Importing Previously downloaded full BED:",bed_subset.gz, v=v);
      dat <- data.table::fread(output_path, col.names = c("Chrom", "Start","End","State") )
    } else {
      messager("+++ GoShifter:: Downloading annotation BED file from Roadmap server...", v=v)
      dat <- data.table::fread(bed_url, col.names = c("Chrom", "Start","End","State") )
      data.table::fwrite(dat, file.path(roadmap.annot.path, bed), col.names = FALSE, sep = " ")
    }
    # Subset data to just the chromatin state(s) you want to check enrichment for
    dat[, c("Num", "State") := tstrsplit(State, "_", fixed=TRUE)] # inplace transform
    if(!is.na(chromatin_state)){
      messager("+++ GoShifter:: Subsetting BED file by chromatin state: ",chromatin_state)
      dat <- dat[State == chromatin_state,]
      data.table::fwrite(dat, bed_subset, col.names = FALSE,sep = "\t")
      gzip(bed_subset, destname=bed_subset.gz, overwrite = TRUE)
    } else {
      messager("+++ GoShifter:: Including all",length(unique(dat$State)),"chromatin states.")
      bed_subset.gz <- file.path(roadmap.annot.path, bed)
    }
    messager("+++ GoShifter:: BED file saved to  ==> ", bed_subset.gz)

  }
  return(bed_subset.gz)
}) %>% unlist()
return(output_paths)
}

GOSHIFTER.process_results <- function(locus_dir,
                                    output_bed,
                                    out.prefix){
# From GitHub Repo: https://github.com/immunogenomics/goshifter
# ------- .LOCUSSCORE -------
## *.locusscore – the likelihood of a locus to overlap an annotation under the null.
##The smaller the value the more likely a locus overlaps an annotation not by chance.
##Loci not overlapping any annotation are denoted as “N/A”.
## ** NOTE **: This file is terribly named. Each SNP is not necessarily a "locus".
.locusscore<- data.table::fread(file.path(locus_dir,"GoShifter",
                                          paste0(out.prefix,".nperm1000.locusscore")),sep="\t")
.locusscore$Annotation <- basename(output_bed)
eid <- strsplit(basename(output_bed),"_")[[1]][1]
.locusscore$EID <- eid
RoadMap_ref <- ROADMAP.construct_reference()
data <- subset(RoadMap_ref, EID == eid)
GS.stats <- data.table:::merge.data.table(.locusscore, data.table::data.table(data), by="EID")

# ------- .ENRICH -------
## *.enrich – output file with observed and permuted overlap values
# nperm = 0 is the observed overlap nSnpOverlap –
## number of loci where at least one SNP overlaps an annotation allSnps
## – total number of tested loci enrichment – nSnpOverlap/allSnps
# ***Note***, P-value is the number of times the “enrichment” is greater or equal to the observed overlap divided by total number of permutations.
.enrich <- data.table::fread(file.path(locus_dir,"GoShifter",
                                                  paste0(out.prefix,".nperm1000.enrich")) ,sep="\t")
# Manually calculate p-value...annoying that they don't just provide this in output files
GS.stats$pval <- sum(.enrich$enrichment >= .enrich$nSnpOverlap) / max(.enrich$nperm)

mean.enrich <- .enrich %>% summarise(mean.nSnpOverlap = mean(nSnpOverlap),
                        allSnps=mean(allSnps),
                        mean.enrichment = mean(enrichment))
GS.stats$mean.nSnpOverlap <- mean.enrich$mean.nSnpOverlap
GS.stats$allSnps <- mean.enrich$allSnps
GS.stats$mean.enrichment <- mean.enrich$mean.enrichment
GS.stats <- dt_replace(GS.stats, "N/A", NA)
return(GS.stats)
}


GOSHIFTER.check_overlap <- function(output_bed,
                                  GS_results_path,
                                  overlap_threshold = 1){
bed.file <- data.table::fread(output_bed, col.names = c("Chrom", "Start","End","State","NA"))
snpmap <- data.table::fread(file.path(GS_results_path,"snpmap.txt"))
# Find overlappying bed
bed.overlap <- lapply(1:nrow(snpmap), function(i){
  row <- snpmap[i,]
  bed.sub <- subset(bed.file, (Chrom==row$Chrom) & (Start<=row$BP & End>=row$BP))
  return(bed.sub)
}) %>% data.table::rbindlist(fill=TRUE)
return(bed.overlap)
}


#' Run \code{GoShifter} enrichment test
#'
#' \code{GoShifter}: Locus-specific enrichment tool.
#'
#' @source
#' \href{https://github.com/immunogenomics/goshifter}{GitHub}
#' \href{https://pubmed.ncbi.nlm.nih.gov/26140449/}{Publication}
#' @importFrom echodata granges_to_bed
#' @examples
#' BST1 <- echodata::BST1; locus_dir <- echodata::locus_dir
#' peaks <- echoannot::NOTT2019_get_epigenomic_peaks()
#' grl.peaks <- GenomicRanges::makeGRangesListFromDataFrame(peaks, split.field ="Cell_type")
#' GS_results <- GOSHIFTER.run(dat=subset(BST1, P<5e-8), locus_dir=locus_dir, GRlist=grl.peaks)
GOSHIFTER.run <- function(dat,
                        locus_dir,
                        GRlist,
                        permutations = 1000,
                        goshifter_path = NULL,
                        chromatin_state = "TssA",
                        R2_filter = 0.8,
                        overlap_threshold = 1,
                        remove_tmps = TRUE,
                        verbose = TRUE){
goshifter_path <- GOSHIFTER.find_goshifter_folder(goshifter = goshifter_path)
# python <- echoconda::find_python_path(conda_env = "goshifter")

# Iterate over each bed file
GS_results <- lapply(names(GRlist), function(gr.name,
                                             .dat = dat,
                                             .remove_tmps = remove_tmps,
                                             .goshifter_path=goshifter_path,
                                             .locus_dir = locus_dir,
                                             .R2_filter = R2_filter,
                                             .overlap_threshold = overlap_threshold,
                                             .permutations=permutations,
                                             .verbose=verbose){
    message(gr.name)
    gr <- GRlist[[gr.name]]
    GS_results_path <- file.path(.locus_dir,"GoShifter")
    # Check if there's any overlap first
    gr.hits <- GRanges_overlap(dat1 = .dat,
                               chrom_col.1 = "CHR",
                               start_col.1 = "POS",
                               end_col.1 = "POS",
                               dat2 = gr)

    if(length(gr.hits) >= .overlap_threshold){
      messager("++ GoShifter:: Converting GRanges to BED", v=.verbose)
      # Prepare annotation
      bed_dir <- file.path(.locus_dir,"annotations",gr.name)
      bed_file <- echodata::granges_to_bed(GR.annotations = GRlist[gr.name],
                                 output_path = bed_dir,
                                 gzip = TRUE,
                                 nThread = 1)
      # Prepare LD
      LD_folder <- GOSHIFTER.create_LD(locus_dir=.locus_dir,
                                       dat=.dat)
      # Run
      messager("GoShifter: Overlapping SNPs detected", v=.verbose)
      python <- echoconda::find_python_path(conda_env = "goshifter")
      if(python=="python") python <- "python2.7"
      cmd <- paste(python,
                   file.path(.goshifter_path,"goshifter.py"),
        "--snpmap",file.path(GS_results_path,"snpmap.txt"),
        "--annotation",bed_file,
        "--permute",.permutations,
        "--ld",file.path(GS_results_path,"LD"),
        "--out",file.path(GS_results_path, gr.name)
        # "--rsquared",.R2_filter # Optional
        # "--window",window, # Optional
        # "--min-shift",min_shift, # Optional
        # "--max-shift",max_shift, # Optional
        # "--ld-extend",ld_extend, # Optional
        # "--nold" # Optional
      )
      cmd.out <- system(cmd, intern = TRUE)
      cat(paste(cmd.out,collapse="\n"))
      # Gather results from written output files
      GS.stats <- GOSHIFTER.process_results(locus_dir = .locus_dir.,
                                            output_bed = output_bed,
                                            out.prefix = gr.name)
      GS.stats$chromatin_state <- chromatin_state
      # Extract p-value from console output
      GS.pval <- gsub("p-value = ","",cmd.out[startsWith(cmd.out, "p-value")]) %>% as.numeric()
      GS.stats$GS.pval <- GS.pval
      GS.stats$true.overlap <- nrow(bed.overlap)
      if(remove_tmps.){ file.remove(output_bed)}
    } else{
      messager("GoShifter:: There is no overlap between the snpmap and bed files ( at overlap_threshold =",
              overlap_threshold,")." )
      GS.stats <- data.table::data.table(Ennotation=output_bed,
                                         pval=NA,
                                         GS.pval=NA,
                                         true.overlap=0)
    }
  return(GS.stats)
}) %>% data.table::rbindlist(fill=TRUE)
messager(".")
messager(".")
messager(".")
return(GS_results)
}



GOSHIFTER.summarise_results <- function(GS_results){
# Summarise results
sig_results <- GS_results %>%
  # Apply bonferonni correction
  # subset(enrichment <= 0.05/nrow(GS_results) ) %>%
  subset(enrichment <= 0.05) %>%
 dplyr::arrange(desc(enrichment),dplyr::desc(nSnpOverlap)) %>%
  dplyr::mutate(enrichment = formatC(enrichment,format="e", digits=7) )
echodata::createDT(sig_results) %>% print()

# Get sig enrichments per tissue per chomatin_state
sig_count <- sig_results %>%
  dplyr::group_by(EID) %>%
  slice(1) %>%
  dplyr::group_by(chromatin_state) %>%
  count()
messager("+++",sig_count$n,"/",nrow(RoadMap_ref),
        "tissues had significant enrichment for the chomatin state(s):",
        paste(chromatin_state, collapse = ", "), v = verbose)
return(sig_results)
}
RajLabMSSM/echolocatoR documentation built on Jan. 29, 2023, 6:04 a.m.