R/p04_get_TF_binding_data.R

Defines functions get_TF_binding_data

Documented in get_TF_binding_data

##################################################################################
#' Get the TF peak annotation data
#'
#' This function extract the columns from SAMPLE_ID_peaks.annotated.tab file for
#'  multiple samples and returns a dataframe.
#'
#'
#' @param genesDf Dataframe with genes of interest
#' @param exptInfo Experiment information dataframe generated by get_sample_information()
#' @param allColumns Whether to get all the columns TRUE or FALSE
#'
#' @return Dataframe with all columns when allColumns is TRUE.
#' If allColumns = FALSE, "hasPeak.sampleId",  "peakId.sampleId",  "peakDist.sampleId",
#' "summitDist.sampleId",  "peakType.sampleId",   "enrichment.sampleId",  "pval.sampleId"
#'  columns are returned from TF peak annotation file
#' @export
#'
#' @examples NA
get_TF_binding_data <- function(genesDf, exptInfo, allColumns = FALSE){

  if (is.null(exptInfo$peakTargetFile)) {
    warning("TF binding data not found...")
    return(genesDf)
  }

  for(i in 1:nrow(exptInfo)){
    if(exptInfo$IP_tag[i] != "polII" & !is.na(exptInfo$peakTargetFile[i])){

      if(exptInfo$TF[i] == "untagged"){
        next()
      }

      # "hasPeak", "peakPosition", "peakType", "peakId", "peakEnrichment", "peakPval", "peakQval",
      # "peakSummit", "peakDist", "summitDist", "bidirectional", "targetOverlap", "peakOverlap",
      # "relativeSummitPos", "peakRegion", "peakCoverage", "relativePeakPos"

      colNames <- paste(c("hasPeak", "peakPosition", "peakType", "peakId", "peakDist", "summitDist",
                          "peakEnrichment", "peakCoverage", "peakPval"), exptInfo$sampleId[i], sep = ".")


      df <- data.table::fread(input = exptInfo$peakTargetFile[i], header = T, stringsAsFactors = F,
                             drop = c("chr", "start", "end", "strand"), sep = "\t", data.table = F)


      if(!allColumns){
        df <- df %>% dplyr::select(geneId, !!!colNames)
      }


      genesDf <- left_join(x = genesDf, y = df, by = c("geneId" = "geneId"))


    }
  }

  return(genesDf)
}

##################################################################################
lakhanp1/chipmine documentation built on Oct. 23, 2019, 7:54 p.m.