##################################################################################
## get the polII expression values for list of samples
#' PolII signal list from multiple samples
#'
#' @param genesDf dataframe with geneId column
#' @param exptInfo dataframe with experiment information for polII samples
#' @param log2 Logical. Whether to conver polII signal to log2. Default: FALSE
#'
#' @return dataframe with polII expression and isExpressed columns for samples of interest
#' @export
#'
#' @examples NA
get_polII_expressions <- function(genesDf, exptInfo, log2 = FALSE){
if(is.null(exptInfo$polIIExpFile)){
warning("Expression data not found...")
return(genesDf)
}
for(i in 1:nrow(exptInfo)){
if(toupper(exptInfo$IP_tag[i]) == "POLII" & !is.na(exptInfo$polIIExpFile[i])){
df <- suppressMessages(readr::read_tsv(file = exptInfo$polIIExpFile[i]))
if(log2){
df[[exptInfo$sampleId[i]]] <- log2(pmax(df[[exptInfo$sampleId[i]]], 1))
}
genesDf <- dplyr::left_join(x = genesDf, y = df, by = c("geneId" = "geneId"))
}
}
return(genesDf)
}
##################################################################################
##################################################################################
#' Read polII binding signal data for a sample
#'
#' This function reads the polII binding data for single sample and add it
#' to the provided dataframe. It also convert the polII signal values to log2
#' which can be used for plotting polII signal heatmap
#'
#' @param file polII binding signal file
#' @param title Title of the polII sample
#' @param clusterData A dataframe to which polII binding signal column should be added.
#' This dataframe should have 'geneId' column
#'
#' @return A list with three elements:
#' \itemize{
#' \item log2_mat: a matrix with log2(polII binding signal) values for heatmap generation
#' \item clusterDf: dataframe with polII binding signal columns
#' \item quantiles: polII binding signal quantile values generated by quantile() function
#' }
#' @export
#'
#' @examples NA
get_polII_signal <- function(file, title, clusterData){
polII_df <- suppressMessages(readr::read_tsv(file = file))
clusterData <- clusterData %>% dplyr::left_join(y = polII_df, by = c("geneId" = "geneId"))
polII_mat <- as.matrix(clusterData[[title]])
rownames(polII_mat) <- clusterData$geneId
polII_log2_mat <- log2(pmax(polII_mat, 1))
polII_q <- quantile(polII_log2_mat, c(seq(0, 0.9, by = 0.1), 0.95, 0.99, 0.992, 0.995, 0.999, 1), na.rm = T)
print(paste("Quantiles for polII data", file, sep = ": "), quote = F)
print(polII_q)
# return: log2(polII expression) matrix, clusterData
return(list(
"log2_mat" = polII_log2_mat,
"clusterDf" = clusterData,
"quantiles" = polII_q
))
}
##################################################################################
##################################################################################
#' Read peak file as dataframe with selected columns
#'
#' @param file Peak file
#' @param peakFormat one of \code{c("narrowPeak", "broadPeak", "bed")}
#' @param peakCols Column to select in dataframe. Column names should be from this list:
#' \code{c("peakChr", "peakStart", "peakEnd", "peakId", "peakScore", "peakStrand",
#' "peakEnrichment", "peakPval", "peakQval", "peakSummit")}.
#' Default: \code{c("peakId", "peakEnrichment", "peakPval")}
#' @param sampleId sampleId to be used as suffix in column names
#' @param column_suffix Logical: whether to add sampleId suffix to column name. Default: TRUE
#'
#' @return A dataframe with columns specified
#' @export
#'
#' @examples NA
import_peaks_as_df <- function(
file, peakFormat = "narrowPeak",
peakCols = c("peakId", "peakEnrichment", "peakPval"),
sampleId = NULL, column_suffix = TRUE){
peakFormat <- match.arg(arg = peakFormat, choices = c("narrowPeak", "broadPeak", "bed"))
colNames <- c("peakChr", "peakStart", "peakEnd", "peakId", "peakScore", "peakStrand",
"peakEnrichment", "peakPval", "peakQval", "peakSummit")
if(peakFormat == "narrowPeak"){
peakCols <- match.arg(arg = peakCols, choices = colNames, several.ok = TRUE)
} else if(peakFormat == "broadPeak"){
peakCols <- match.arg(arg = peakCols, choices = colNames[1:9], several.ok = TRUE)
colNames <- colNames[1:9]
} else{
peakCols <- match.arg(arg = peakCols, choices = colNames[1:6], several.ok = TRUE)
colNames <- colNames[1:6]
}
peaksDf <- suppressMessages(readr::read_tsv(file = file, col_names = colNames))
if(nrow(peaksDf) == 0){
return(NULL)
}
peaksDf <- dplyr::select(peaksDf, !!!peakCols)
if(column_suffix && !is.null(sampleId)){
renameCols <- peakCols
names(renameCols) <- paste(peakCols, sampleId, sep = ".")
peaksDf <- dplyr::rename(peaksDf, !!! renameCols)
}
return(peaksDf)
}
##################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.