#' A function to generate an FPWM class object.
#'
#' This function assigns proper data to their associated slots of a S4 classe. This information is either provided by user, or exported from TFregulomeR's dataware using user specified data.
#' @param mainTF character with the name of the main TF.
#' @param partners List or character vector with the names of the partner TFs.
#' @param cell character with the name of the main TF.
#' @param mainTF_MMID character with the name of the main TF.
#' @param partners_MMID character with the name of the main TF.
#' @param forkPosition This argument, defines from which point on, the matrix needs to be forked, or in the other words, up to which point two exclusive matrices need to be aggregated.
#' @param probabilityMatrix Logical, whether the function should return a frequency matrix or probability matrix (Default FALSE).
#' @param scaleFrequencyCounts Logical, whether the count matrix should have equal rowSums across all the rows (Default FALSE).
#' @param flipMatrix Logical, whether to apply reverse complement in case the core motif is after the forkPosition (Default FALSE).
#' @examples
#' fpwm <- createFPWM(mainTF ="CEBPB", partners = c("ATF4","ATF7","JUND"), cell = "K562", forkPosition = 5)
#' @return This component, returns a FPWM class object that can be used to plot or written in transfact format.
#' @export
createFPWM <- function( mainTF = NULL,
partners = NULL,
cell = NULL,
mainTF_MMID = NULL,
partners_MMID = NULL,
forkPosition = NULL,
probabilityMatrix = FALSE,
scaleFrequencyCounts = FALSE,
flipMatrix = FALSE)
{
tfnames <- FALSE
tfIDs <- FALSE
# Check if inputs are compatible
if(!is.null(mainTF) & !is.null(partners) & !is.null(cell) & !is.null(forkPosition) ) { tfnames <- TRUE }
if(!is.null(mainTF_MMID) & !is.null(partners_MMID) & !is.null(forkPosition) ) { tfIDs <- TRUE }
if(tfnames & tfIDs) { stop("Incompatible input. Provide 'mainTF', 'partners' and 'cell'. !!OR!! 'mainTF_MMID' and 'partners_MMID'. ") }
if(!(tfnames | tfIDs) ) { stop("Missing input.\nProvide: 'mainTF', 'partners', 'cell' and 'forkPosition' \nOR\n 'mainTF_MMID', 'partners_MMID' and 'forkPosition'") }
if( probabilityMatrix == TRUE & scaleFrequencyCounts == TRUE ) { stop("scaleFrequencyCounts only works for count matrices, disable probabilityMatrix.") }
if (tfnames){
xTF_cell_tissue_name <- suppressMessages(dataBrowser(tf = mainTF, cell_tissue_name = cell)$ID[1])
partners <- as.list(unlist(partners)) ;
MMpartners <- partners
for(i in 1:length(partners)){ suppressMessages( MMpartners[[i]] <- dataBrowser(tf = partners[[i]], cell_tissue_name = cell)$ID[1] ) }
if(is.null(xTF_cell_tissue_name)){ stop("Please check the spelling of your mainTF or cell. \nOr there might no be information for this TF-cell combination.") }
if( length(xTF_cell_tissue_name) != 1 ){ stop("More than one record for the combination of TF and cell tissue") ; }
if( sum(MMpartners %in% partners) ){
message( paste("The following partners were not found in combination with the cell:", unlist(partners[which(MMpartners %in% partners)] ) ) )
stop("\n")
}
peak_id_y_list <- MMpartners
peak_id_x <- xTF_cell_tissue_name
}
if (tfIDs){
partners_MMID = as.list(unlist(partners_MMID))
peak_id_y_list = partners_MMID ; peak_id_x = mainTF_MMID
}
if( length(peak_id_y_list) < 2) { stop("The list of partner TF should contain 2 or more elements.") }
Motif <- TFregulomeR::intersectPeakMatrix(peak_id_x = peak_id_x, motif_only_for_id_x = TRUE, peak_id_y = peak_id_y_list, motif_only_for_id_y = TRUE)
motif_length <- dim(Motif[1,1][[1]]@MethMotif_x@MMBetaScore)[2] # get number of positions in motif
if(as.numeric(forkPosition)>=motif_length){ stop("The forkPosition is larger than the motif length.") }
if(flipMatrix==TRUE){
for( i in 1:length(peak_id_y_list) ){
Motif[1,i][[1]]@MethMotif_x@MMBetaScore <- Motif[1,i][[1]]@MethMotif_x@MMBetaScore[,motif_length:1] # reverse meth info
colnames(Motif[1,i][[1]]@MethMotif_x@MMBetaScore) <- rev(colnames(Motif[1,i][[1]]@MethMotif_x@MMBetaScore)) # reverse meth info colnames
Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix[motif_length:1,] # reverse
tmp_matrix <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix
tmp_matrix[,'A'] <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix[,'T'] # complementary
tmp_matrix[,'T'] <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix[,'A'] # complementary
tmp_matrix[,'C'] <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix[,'G'] # complementary
tmp_matrix[,'G'] <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix[,'C'] # complementary
Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix <- tmp_matrix
}
forkPosition <- which(motif_length:1 %in% forkPosition)-1 # adjust fork position
}
TFregulomeDataovlaplist <- list()
TFregulomeDataBetalist <- list()
TFregulomeDatamatrixlist <- list()
TFregulomeDatanPeaks <- vector()
TFregulomeDatanSites <- vector()
for( i in 1:length(peak_id_y_list) ){
y <- Motif[1,i]
TFregulomeDataovlaplist[[i]] = y[[1]]@overlap_percentage_x
TFregulomeDataBetalist[[i]] = y[[1]]@MethMotif_x@MMBetaScore
TFregulomeDatamatrixlist[[i]] = y[[1]]@MethMotif_x@MMmotif@motif_matrix
TFregulomeDatanPeaks[i] <- y[[1]]@MethMotif_x@MMmotif@nPeaks
TFregulomeDatanSites[i] <- y[[1]]@MethMotif_x@MMmotif@nsites
}
FPWM <- new("FPWMClassObj")
FPWM <- updateFPWMClassObj(FPWM, id = peak_id_y_list,
nSites = TFregulomeDatanSites,
nPeaks = TFregulomeDatanPeaks,
matrix=TFregulomeDatamatrixlist,
betalevel = TFregulomeDataBetalist,
score= TFregulomeDataovlaplist,
forkPosition = forkPosition)
FPWM <- MatrixAdder( FPWM, forkPosition, probabilityMatrix)
FPWM <- BetaAdder(FPWM, forkPosition)
FPWM <- ConvertToFTRANSFAC(FPWM, probabilityMatrix, scaleFrequencyCounts)
for (i in c(1:length(FPWM@betalevel))) {
X<-FPWM@betalevel[[i]]
FPWM@betalevel[[i]] <- X[,(forkPosition+1):ncol(FPWM@betalevel[[i]])]
}
FPWM@xid <- peak_id_x
FPWM <- ModifyBetaFormat(FPWM)
# order object based on overlapping score
FPWM_tmp <- FPWM
objOrder <- order(unlist(FPWM@score),decreasing=TRUE)
FPWM_tmp@betalevel <- FPWM@betalevel[objOrder]
FPWM_tmp@id <- FPWM@id[objOrder]
FPWM_tmp@matrix <- FPWM@matrix[objOrder]
FPWM_tmp@score <- FPWM@score[objOrder]
FPWMPO <- FPWM@forked$PO
from <- min(FPWMPO[duplicated(FPWMPO)])
to <- max(FPWMPO)
ix <- cbind(which(FPWMPO %in% from) , which(FPWMPO %in% to))
for ( jx in 1:length(objOrder) ){
from_row_ix <- ix[objOrder[jx],1] : ix[objOrder[jx],2]
to_row_ix <- ix[jx,1] : ix[jx,2]
FPWM_tmp@forked[to_row_ix ,2:5] <- FPWM@forked[ from_row_ix, 2:5]
}
# add colnames
FPWM <- FPWM_tmp
colnames(FPWM@forked) <- c("PO","A","C","G","T")
colnames(FPWM@parentbeta) <- c("PO","number","meth")
for( i in 1:length(FPWM@betalevel) ) { colnames(FPWM@betalevel[[i]]) <- c("PO","number","meth") }
return(FPWM)
}
MatrixAdder <- function( fpwmObject, forkPosition, probabilityMatrix)
{
nSites <- fpwmObject@nSites
if(probabilityMatrix==TRUE){
total_nSites <- sum(nSites)
S <- (fpwmObject@matrix[[1]][1:forkPosition,]) * (nSites[1]/total_nSites)
for ( i in 2:length(fpwmObject@matrix) ) { S <- S + (fpwmObject@matrix[[i]][1:forkPosition,] * (nSites[i]/total_nSites) ) }
}
if(probabilityMatrix==FALSE){
S <- round(fpwmObject@matrix[[1]][1:forkPosition,] * nSites[1])
for ( i in 2:length(fpwmObject@matrix) ) { S <- S + round(fpwmObject@matrix[[i]][1:forkPosition,] * nSites[i]) }
}
fpwmObject@parentmatrix <- S
return(fpwmObject)
}
BetaAdder <- function( fpwmObject, forkPosition)
{
S <- fpwmObject@betalevel[[1]][,1:forkPosition]
for ( i in 2:length(fpwmObject@id) ) {
S <- S + fpwmObject@betalevel[[i]][,1:forkPosition]
}
fpwmObject@parentbeta <- S
return(fpwmObject)
}
ConvertToFTRANSFAC <- function(fpwmObject, probabilityMatrix, scaleFrequencyCounts)
{
Cnumber = length(fpwmObject@matrix)
RowNum = nrow(fpwmObject@matrix[[1]])
forkPosition = fpwmObject@forkPosition
R = rep(c((forkPosition + 1):RowNum), times = Cnumber)
Step = (RowNum - forkPosition)
DF <- data.frame(colnames(c("PO","A","C","G","T")))
DF[1:forkPosition,"PO"]=c(1:forkPosition)
DF[(forkPosition+1):(length(R)+forkPosition),"PO"] <- R
DF[1:forkPosition,2:5]=fpwmObject@parentmatrix
c = 1
if(probabilityMatrix == TRUE){
for(i in seq(forkPosition+1, dim.data.frame(x = DF)[1], Step)){
DF[i:((Step+i)-1),2:5] <- fpwmObject@matrix[[c]][(forkPosition+1):RowNum,]
c <- c+1
}
}
if(probabilityMatrix == FALSE){
for(i in seq(forkPosition+1, dim.data.frame(x = DF)[1], Step)){
if(scaleFrequencyCounts == FALSE){
DF[i:((Step+i)-1),2:5] <- round(fpwmObject@matrix[[c]][(forkPosition+1):RowNum,] * fpwmObject@nSites[c])
}
if(scaleFrequencyCounts == TRUE){
scaleNumber <- sum(DF[1,2:5])
tmp_scaled_mat <- round(fpwmObject@matrix[[c]][(forkPosition+1):RowNum,] * scaleNumber)
# check the round didn't add an extra count
if( sum(rowSums(tmp_scaled_mat) != scaleNumber) ){
for(ifix in which(rowSums(tmp_scaled_mat) != scaleNumber) ){
diff_val <- sum(tmp_scaled_mat[ifix,]) - scaleNumber
tmp_scaled_mat[ ifix, which.max(tmp_scaled_mat[ifix,]) ] <- tmp_scaled_mat[ ifix, which.max(tmp_scaled_mat[ifix,]) ] - diff_val
}
}
DF[i:((Step+i)-1),2:5] <- tmp_scaled_mat
}
c <- c+1
}
}
fpwmObject@forked <- DF
return(fpwmObject)
}
ModifyBetaFormat <- function(fpwmObject)
{
BS1 <- fpwmObject@parentbeta
BS1 <-
cbind(c("beta score<10%", "beta score 10-90%", "beta score>90%"),
BS1)
BS1 <- rbind(c("position", c(1:(ncol(BS1) - 1))), BS1)
M1 <- matrix(nrow = ((ncol(BS1) - 1) * 3), ncol = 3)
pos1 <- rep(t(BS1[1, 2:ncol(BS1)]), times = 3)
M1[, 1] <- pos1
pos1 <- t(BS1[2, 2:ncol(BS1)])
M1[1:(ncol(BS1) - 1), 2] <- pos1
pos1 <- t(BS1[3, 2:ncol(BS1)])
M1[ncol(BS1):((ncol(BS1) - 1) * 2), 2] <- pos1
pos1 <- t(BS1[4, 2:ncol(BS1)])
M1[(((ncol(BS1) - 1) * 2) + 1):((ncol(BS1) - 1) * 3), 2] <- pos1
pos1 <- rep(t(BS1[2, 1]), times = (ncol(BS1) - 1))
M1[1:(ncol(BS1) - 1), 3] <- pos1
pos1 <- rep(t(BS1[3, 1]), times = (ncol(BS1) - 1))
M1[ncol(BS1):((ncol(BS1) - 1) * 2), 3] <- pos1
pos1 <- rep(t(BS1[4, 1]), times = (ncol(BS1) - 1))
M1[(((ncol(BS1) - 1) * 2) + 1):((ncol(BS1) - 1) * 3), 3] <- pos1
fpwmObject@parentbeta <- M1
for (i in c(1:length(fpwmObject@betalevel))) {
BS2 <- as.matrix(fpwmObject@betalevel[[i]])
BS2 <-
cbind(c("beta score<10%", "beta score 10-90%", "beta score>90%"),
BS2)
BS2 <- rbind(c("position", c(fpwmObject@forkPosition + 1:(ncol(
BS2
) - 1))), BS2)
M2 <- matrix(nrow = ((ncol(BS2) - 1) * 3), ncol = 3)
pos2 <- rep(t(BS2[1, 2:ncol(BS2)]), times = 3)
M2[, 1] <- pos2
pos2 <- t(BS2[2, 2:ncol(BS2)])
M2[1:(ncol(BS2) - 1), 2] <- pos2
pos2 <- t(BS2[3, 2:ncol(BS2)])
M2[ncol(BS2):((ncol(BS2) - 1) * 2), 2] <- pos2
pos2 <- t(BS2[4, 2:ncol(BS2)])
M2[(((ncol(BS2) - 1) * 2) + 1):((ncol(BS2) - 1) * 3), 2] <- pos2
pos2 <- rep(t(BS2[2, 1]), times = (ncol(BS2) - 1))
M2[1:(ncol(BS2) - 1), 3] <- pos2
pos2 <- rep(t(BS2[3, 1]), times = (ncol(BS2) - 1))
M2[ncol(BS2):((ncol(BS2) - 1) * 2), 3] <- pos2
pos2 <- rep(t(BS2[4, 1]), times = (ncol(BS2) - 1))
M2[(((ncol(BS2) - 1) * 2) + 1):((ncol(BS2) - 1) * 3), 3] <- pos2
fpwmObject@betalevel[[i]] <- M2}
return(fpwmObject)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.