##################################################################################
## function to process polII expression matrix and add top 10% info
#' Pre-process polII expression data
#'
#' @param expMat Path to the polII expression file generated by Miao's script
#' @param sampleId Sample id to be used
#' @param expFraction Fraction of the genes to be considered as expressed
#' (normally 10\% genes are under active transcription in the cell)
#' @param polIIExpFile Complete path for processed output file
#'
#' @return complete output file path
#' @export
#'
#' @examples NA
preProcess_polII_expression <- function(expMat, sampleId, expFraction, polIIExpFile){
polIIDf <- data.table::fread(
input = expMat, header = F, drop = c(1,2,3,5,6), col.names = c("geneId", sampleId),
stringsAsFactors = F, sep = "\t", data.table = F)
topFraction <- round(nrow(polIIDf) * expFraction / 100)
expressedDf <- polIIDf %>% dplyr::top_n(topFraction, !! sym(sampleId))
isExpCol <- paste("is_expressed.", sampleId, sep = "")
expressedDf[isExpCol] <- TRUE
expressedDf <- expressedDf %>% dplyr::select(geneId, starts_with("is_expressed"))
finalDf <- dplyr::left_join(x = polIIDf, y = expressedDf, by = c("geneId" = "geneId")) %>%
dplyr::mutate(!!isExpCol := ifelse(is.na(!! sym(isExpCol)), FALSE, !! sym(isExpCol)))
write.table(x = finalDf, file = polIIExpFile, sep = "\t", col.names = T, quote = F, row.names = F)
# cat("Processed polII expression for sample ", sampleId, "\n")
return(polIIExpFile)
}
##################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.