#' Read deeptools computeMatrix files
#'
#' @param PATH An abosulte path to a computeMatrix file
#' @param FUNC Which summarising function? Default: mean
#' @param sortOnMiddle Sort the matrices on the centers?
#' @param quantiles A two-number vector. c(0.05,0.95) will dampen the top and bottem 5%. Handy for outliers.
#' @return A list:
#' \itemize{
#' \item{matrixList: }{A named list of matrices: one per bigwig/bed combination. Names are bigwig<>bed.}
#' \item{metaData: }{A named list of metadata from you computeMatrix-run.}
#' \item{tidyDF: }{A dataframe for easy ggplotting.}
#' \item{enrichmentDF: }{A dataframe with per-loop enrichment of average signal.}
#' \item{BED: }{A dataframe of the BED-entries.}
#' }
#' @export
read.deeptools <- function(PATH, FUNC = mean , sortOnMiddle = T, quantiles = c(0,1)){
require(reshape2)
require(dplyr)
json <- read.delim(PATH, header = F, nrows = 1, stringsAsFactors = F)
json <- unlist(json[,1])
json <- gsub(json, pattern = "^@", replacement = "")
json <- strsplit(json, split = ",")[[1]]
jsonLE <- list()
jsonListEntries <- cbind(grep(json,pattern = "\\["),grep(json,pattern = "\\]"))
rmEntriesIndexes <- c()
for(i in 1:nrow(jsonListEntries)){
row = json[jsonListEntries[i ,1]:jsonListEntries[i ,2] ]
NAAM = strsplit(row[1], split = ":\\[")[[1]][1]
row[1] = strsplit(row[1], split = ":\\[")[[1]][2]
row[length(row)] <- gsub(row[length(row)], pattern = "\\]", replacement = "")
if(any("null" %in% row)){
row = NULL
}
if(!any(grepl(row, pattern = "[a-zA-Z]"))){
row = as.numeric(row)
}
jsonLE[[gsub(NAAM, pattern = " ",replacement = "_")]] = row
rmEntriesIndexes <- c(rmEntriesIndexes, jsonListEntries[i ,1]:jsonListEntries[i ,2])
}
json = json[-rmEntriesIndexes]
json = gsub(json , pattern = "[\\{\\}]", replacement = "")
for(i in 1:length(json)){
row = strsplit(json[i], split = ":")[[1]]
NAAM = row[1]
entry = row[2]
if(entry == 'null'){
entry = NULL
} else if( entry %in% c('false', 'true') ){
entry = as.logical(toupper(entry))
} else if( grepl(entry, pattern = "[a-zA-Z]") & !is.logical(entry) ){
entry = as.character(entry)
} else {
entry = as.numeric(entry)
}
jsonLE[[gsub(NAAM, pattern = " ",replacement = "_")]] = entry
}
mat <- read.delim(PATH, skip = 1, h = F)
meta <- mat[, 1:6]
mat <- mat[, -c(1:6)]
matList <- list()
for(Si in 1:(length(jsonLE$sample_boundaries)-1)){
SB = (jsonLE$sample_boundaries[Si]+1):jsonLE$sample_boundaries[Si +1]
for(Gi in 1:(length(jsonLE$group_boundaries)-1)){
GB = (jsonLE$group_boundaries[Gi]+1):jsonLE$group_boundaries[Gi +1]
matChunk <- mat[GB,SB]
for(C in 1:ncol(matChunk)){
col = matChunk[, C]
Q = quantile(col, probs = quantiles, na.rm = T)
col[col > Q[2]] <- Q[2]
col[col < Q[1]] <- Q[1]
matChunk[, C] = col
}
if(sortOnMiddle){
before = jsonLE$upstream[Si] / jsonLE$bin_size[Si]
middle = jsonLE$body[Si] / jsonLE$bin_size[Si]
after = jsonLE$downstream[Si] / jsonLE$bin_size[Si]
leftIDX <- ncol(matChunk) - (before + middle) + 1
rightIDX <- ncol(matChunk) - (before )
O <- order(rowMeans(matChunk[, leftIDX:rightIDX]))
matChunk <- matChunk[O, ]
}
matList[[paste0(c(jsonLE$sample_labels[Si], jsonLE$group_labels[Gi]), collapse = "<>")]] = matChunk
}
}
before = jsonLE$upstream[Si] / jsonLE$bin_size[Si]
middle = jsonLE$body[Si] / jsonLE$bin_size[Si]
after = jsonLE$downstream[Si] / jsonLE$bin_size[Si]
if(jsonLE$body[1] == 0){
# refpoint
newBinNames <- c( rev(1:before * jsonLE$bin_size[1])*-1 ,
1:after * jsonLE$bin_size[1])
} else {
newBinNames <- c( rev(1:before * jsonLE$bin_size[1])*-1 ,
seq(-jsonLE$bin_size[1],
jsonLE$bin_size[1],
length.out = middle+2)[-c(1, middle+2)],
1:after * jsonLE$bin_size[1])
}
forGG <- data.frame()
enrich <- data.frame()
for(i in 1:length(matList)){
NAAM = names(matList)[i]
NAAM = strsplit(NAAM, split = "<>", fixed = T)[[1]]
BW = NAAM[1]
BED = NAAM[2]
if(BED == "genes"){BED = ""}
matChunk = matList[[i]]
colnames(matChunk) <- newBinNames
rownames(matChunk) <- 1:nrow(matChunk)
melted = reshape2::melt(as.matrix(matChunk))
melted$BW = BW
melted$BED = BED
colnames(melted)[2] = 'bin'
melted$binNumber <- match(as.character(melted$bin), as.character(newBinNames))
forGG <- rbind(forGG, melted)
midSize <- unique(jsonLE$body/jsonLE$bin_size)
if(jsonLE$body[1] == 0){
midSize = 1
}
midStart <- unique(jsonLE$upstream/jsonLE$bin_size)
middleTidy <- melted[melted$binNumber >= midStart + 1 &
melted$binNumber <= midStart + midSize,]
surroundingTidy <- melted[melted$binNumber <= midSize |
melted$binNumber > max(melted$binNumber) - midSize,]
middleScore <- dplyr::summarise( dplyr::group_by(middleTidy, Var1, BW, BED),
value = FUNC(value, na.rm = T))
surroundingScore <- dplyr::summarise( dplyr::group_by(surroundingTidy, Var1, BW, BED),
value = FUNC(value, na.rm = T))
enrichRow <- merge(middleScore, surroundingScore, by = c(1,2,3))
colnames(enrichRow) <- c('loopID', 'BW', 'BED','middle','surrounding')
enrichRow$log2FC <- log2(enrichRow$middle/enrichRow$surrounding)
enrich <- rbind(enrich, enrichRow)
}
forGG = dplyr::summarise(dplyr::group_by(forGG, bin, BED, BW, binNumber),
value = FUNC(value, na = T))
return(list(matrixList = matList, metaData = jsonLE,
tidyDF = forGG, enrichmentDF = enrich))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.