R/createFPWM.R

Defines functions createFPWM MatrixAdder BetaAdder ConvertToFTRANSFAC ModifyBetaFormat

Documented in createFPWM

#' A function to generate an FPWM class object.
#'
#' This function assigns proper data to their associated slots of a S4 classe. This information is either provided by user, or exported from TFregulomeR's dataware using user specified data.
#' @param mainTF character with the name of the main TF.
#' @param partners List or character vector with the names of the partner TFs.
#' @param cell character with the name of the main TF.
#' @param mainTF_MMID character with the name of the main TF.
#' @param partners_MMID character with the name of the main TF.
#' @param forkPosition This argument, defines from which point on, the matrix needs to be forked, or in the other words, up to which point two exclusive matrices need to be aggregated.
#' @param probabilityMatrix Logical, whether the function should return a frequency matrix or probability matrix (Default FALSE). 
#' @param scaleFrequencyCounts Logical, whether the count matrix should have equal rowSums across all the rows (Default FALSE). 
#' @param flipMatrix Logical, whether to apply reverse complement in case the core motif is after the forkPosition (Default FALSE). 
#' @examples
#' fpwm <- createFPWM(mainTF ="CEBPB", partners = c("ATF4","ATF7","JUND"), cell = "K562", forkPosition = 5)
#' @return This component, returns a FPWM class object that can be used to plot or written in transfact format.
#' @export
createFPWM <- function( mainTF = NULL,
						partners = NULL,
						cell = NULL,
						mainTF_MMID = NULL,
						partners_MMID = NULL,
						forkPosition = NULL,
            probabilityMatrix = FALSE,
            scaleFrequencyCounts = FALSE,
            flipMatrix = FALSE)
						{
  
  tfnames <- FALSE
  tfIDs <- FALSE
  
  # Check if inputs are compatible
  if(!is.null(mainTF) & !is.null(partners) & !is.null(cell) & !is.null(forkPosition) ) { tfnames <- TRUE }
  if(!is.null(mainTF_MMID) & !is.null(partners_MMID) & !is.null(forkPosition) ) { tfIDs <- TRUE }
  
  if(tfnames & tfIDs) { stop("Incompatible input. Provide 'mainTF', 'partners' and 'cell'. !!OR!! 'mainTF_MMID' and 'partners_MMID'. ") }

  if(!(tfnames | tfIDs) ) { stop("Missing input.\nProvide: 'mainTF', 'partners', 'cell' and 'forkPosition' \nOR\n 'mainTF_MMID', 'partners_MMID' and 'forkPosition'") }

  if( probabilityMatrix == TRUE & scaleFrequencyCounts == TRUE ) { stop("scaleFrequencyCounts only works for count matrices, disable probabilityMatrix.") }

  if (tfnames){
        xTF_cell_tissue_name <- suppressMessages(dataBrowser(tf = mainTF, cell_tissue_name = cell)$ID[1]) 
  
        partners <- as.list(unlist(partners)) ; 
        MMpartners <- partners
        for(i in 1:length(partners)){ suppressMessages( MMpartners[[i]] <- dataBrowser(tf = partners[[i]], cell_tissue_name = cell)$ID[1] ) }
        
	    if(is.null(xTF_cell_tissue_name)){ stop("Please check the spelling of your mainTF or cell. \nOr there might no be information for this TF-cell combination.") }
	
	    if( length(xTF_cell_tissue_name) != 1 ){ stop("More than one record for the combination of TF and cell tissue") ; } 
  
  		if( sum(MMpartners %in% partners) ){
 			 message( paste("The following partners were not found in combination with the cell:", unlist(partners[which(MMpartners %in% partners)] ) ) )
 			 stop("\n")
 			 }
 	    peak_id_y_list <- MMpartners
 	    peak_id_x <- xTF_cell_tissue_name
   }
   
  if (tfIDs){
      partners_MMID = as.list(unlist(partners_MMID))
      peak_id_y_list = partners_MMID ; peak_id_x = mainTF_MMID
    }

    if( length(peak_id_y_list) < 2) { stop("The list of partner TF should contain 2 or more elements.") }

    Motif <- TFregulomeR::intersectPeakMatrix(peak_id_x = peak_id_x, motif_only_for_id_x = TRUE, peak_id_y = peak_id_y_list, motif_only_for_id_y = TRUE)

    motif_length <- dim(Motif[1,1][[1]]@MethMotif_x@MMBetaScore)[2] # get number of positions in motif

    if(as.numeric(forkPosition)>=motif_length){ stop("The forkPosition is larger than the motif length.") }

    if(flipMatrix==TRUE){
      for( i in 1:length(peak_id_y_list) ){
        Motif[1,i][[1]]@MethMotif_x@MMBetaScore <- Motif[1,i][[1]]@MethMotif_x@MMBetaScore[,motif_length:1] # reverse meth info
        colnames(Motif[1,i][[1]]@MethMotif_x@MMBetaScore) <- rev(colnames(Motif[1,i][[1]]@MethMotif_x@MMBetaScore)) # reverse meth info colnames
        Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix[motif_length:1,] # reverse
        tmp_matrix <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix
        tmp_matrix[,'A'] <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix[,'T'] # complementary
        tmp_matrix[,'T'] <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix[,'A'] # complementary
        tmp_matrix[,'C'] <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix[,'G'] # complementary
        tmp_matrix[,'G'] <- Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix[,'C'] # complementary
        Motif[1,i][[1]]@MethMotif_x@MMmotif@motif_matrix <- tmp_matrix

      }
      forkPosition <- which(motif_length:1 %in% forkPosition)-1 # adjust fork position
    }

    TFregulomeDataovlaplist <- list()
    TFregulomeDataBetalist <- list()
    TFregulomeDatamatrixlist <- list()
    TFregulomeDatanPeaks <- vector()
    TFregulomeDatanSites <- vector()

    for( i in 1:length(peak_id_y_list) ){
      y <- Motif[1,i]
      TFregulomeDataovlaplist[[i]] = y[[1]]@overlap_percentage_x
      TFregulomeDataBetalist[[i]] = y[[1]]@MethMotif_x@MMBetaScore
      TFregulomeDatamatrixlist[[i]] = y[[1]]@MethMotif_x@MMmotif@motif_matrix
      TFregulomeDatanPeaks[i] <- y[[1]]@MethMotif_x@MMmotif@nPeaks
      TFregulomeDatanSites[i] <- y[[1]]@MethMotif_x@MMmotif@nsites
      }
  
  FPWM <- new("FPWMClassObj")
  FPWM <- updateFPWMClassObj(FPWM, id = peak_id_y_list,
                                 nSites = TFregulomeDatanSites,
                                 nPeaks = TFregulomeDatanPeaks,
                                 matrix=TFregulomeDatamatrixlist,
                                 betalevel = TFregulomeDataBetalist,
                                 score= TFregulomeDataovlaplist,
                                 forkPosition = forkPosition)
  FPWM <- MatrixAdder( FPWM, forkPosition, probabilityMatrix)
  FPWM <- BetaAdder(FPWM, forkPosition)
  FPWM <- ConvertToFTRANSFAC(FPWM, probabilityMatrix, scaleFrequencyCounts)
  
  for (i in c(1:length(FPWM@betalevel))) {
    X<-FPWM@betalevel[[i]]
    FPWM@betalevel[[i]] <- X[,(forkPosition+1):ncol(FPWM@betalevel[[i]])]
  }
  FPWM@xid <- peak_id_x
  FPWM <- ModifyBetaFormat(FPWM)


  # order object based on overlapping score
  FPWM_tmp <- FPWM
  objOrder <- order(unlist(FPWM@score),decreasing=TRUE)
  FPWM_tmp@betalevel <- FPWM@betalevel[objOrder]
  FPWM_tmp@id <- FPWM@id[objOrder]
  FPWM_tmp@matrix <- FPWM@matrix[objOrder]
  FPWM_tmp@score <- FPWM@score[objOrder]

  FPWMPO <- FPWM@forked$PO
  from <- min(FPWMPO[duplicated(FPWMPO)])
  to <- max(FPWMPO)
  ix <- cbind(which(FPWMPO %in% from) , which(FPWMPO %in% to))

  for ( jx in 1:length(objOrder) ){
  from_row_ix <- ix[objOrder[jx],1] : ix[objOrder[jx],2]
  to_row_ix <- ix[jx,1] : ix[jx,2]
  FPWM_tmp@forked[to_row_ix ,2:5] <-  FPWM@forked[ from_row_ix, 2:5]
  }
  
  # add colnames
   FPWM <- FPWM_tmp
   colnames(FPWM@forked) <- c("PO","A","C","G","T")
   colnames(FPWM@parentbeta) <- c("PO","number","meth")
   for( i in 1:length(FPWM@betalevel) ) { colnames(FPWM@betalevel[[i]]) <- c("PO","number","meth")  }
   
return(FPWM)
}

MatrixAdder <- function( fpwmObject, forkPosition, probabilityMatrix)
{ 
  nSites <- fpwmObject@nSites
  if(probabilityMatrix==TRUE){
    total_nSites <- sum(nSites)
    S <- (fpwmObject@matrix[[1]][1:forkPosition,]) * (nSites[1]/total_nSites)
    for ( i in 2:length(fpwmObject@matrix) ) { S <- S + (fpwmObject@matrix[[i]][1:forkPosition,] * (nSites[i]/total_nSites) ) }
  }

  if(probabilityMatrix==FALSE){
    S <- round(fpwmObject@matrix[[1]][1:forkPosition,] * nSites[1])
    for ( i in 2:length(fpwmObject@matrix) ) { S <- S + round(fpwmObject@matrix[[i]][1:forkPosition,] * nSites[i])  }
  }

  fpwmObject@parentmatrix <- S
  return(fpwmObject)
}

BetaAdder <- function( fpwmObject, forkPosition)
{
  S <- fpwmObject@betalevel[[1]][,1:forkPosition]
  for ( i in 2:length(fpwmObject@id) ) {
      S <- S + fpwmObject@betalevel[[i]][,1:forkPosition]
      }
  fpwmObject@parentbeta <- S
  return(fpwmObject)
}

ConvertToFTRANSFAC <- function(fpwmObject, probabilityMatrix, scaleFrequencyCounts)
{ 
  Cnumber = length(fpwmObject@matrix)
  RowNum = nrow(fpwmObject@matrix[[1]])
  forkPosition = fpwmObject@forkPosition
  R = rep(c((forkPosition + 1):RowNum), times = Cnumber)
  Step = (RowNum - forkPosition)
  DF <- data.frame(colnames(c("PO","A","C","G","T")))
  DF[1:forkPosition,"PO"]=c(1:forkPosition)
  DF[(forkPosition+1):(length(R)+forkPosition),"PO"] <- R
  DF[1:forkPosition,2:5]=fpwmObject@parentmatrix
  c = 1

  if(probabilityMatrix == TRUE){
    for(i in seq(forkPosition+1, dim.data.frame(x = DF)[1], Step)){
      DF[i:((Step+i)-1),2:5] <- fpwmObject@matrix[[c]][(forkPosition+1):RowNum,]
      c <- c+1
    }
  }

  if(probabilityMatrix == FALSE){
    for(i in seq(forkPosition+1, dim.data.frame(x = DF)[1], Step)){

      if(scaleFrequencyCounts == FALSE){
        DF[i:((Step+i)-1),2:5] <- round(fpwmObject@matrix[[c]][(forkPosition+1):RowNum,] * fpwmObject@nSites[c])
      }

      if(scaleFrequencyCounts == TRUE){
        scaleNumber <- sum(DF[1,2:5])
        tmp_scaled_mat <- round(fpwmObject@matrix[[c]][(forkPosition+1):RowNum,] * scaleNumber)
        # check the round didn't add an extra count
        if( sum(rowSums(tmp_scaled_mat) != scaleNumber) ){
          for(ifix in which(rowSums(tmp_scaled_mat) != scaleNumber) ){
            diff_val <- sum(tmp_scaled_mat[ifix,]) - scaleNumber
            tmp_scaled_mat[ ifix, which.max(tmp_scaled_mat[ifix,]) ] <- tmp_scaled_mat[ ifix, which.max(tmp_scaled_mat[ifix,]) ] - diff_val
          }


        }

        DF[i:((Step+i)-1),2:5] <- tmp_scaled_mat
      }
      c <- c+1
    }
  }

  fpwmObject@forked <- DF
  return(fpwmObject)
}

ModifyBetaFormat <- function(fpwmObject)
{
  BS1 <- fpwmObject@parentbeta
  BS1 <-
    cbind(c("beta score<10%", "beta score 10-90%", "beta score>90%"),
          BS1)
  BS1 <- rbind(c("position", c(1:(ncol(BS1) - 1))), BS1)


  M1 <- matrix(nrow = ((ncol(BS1) - 1) * 3), ncol = 3)


  pos1 <- rep(t(BS1[1, 2:ncol(BS1)]), times = 3)
  M1[, 1] <- pos1

  pos1 <- t(BS1[2, 2:ncol(BS1)])
  M1[1:(ncol(BS1) - 1), 2] <- pos1
  pos1 <- t(BS1[3, 2:ncol(BS1)])
  M1[ncol(BS1):((ncol(BS1) - 1) * 2), 2] <- pos1
  pos1 <- t(BS1[4, 2:ncol(BS1)])
  M1[(((ncol(BS1) - 1) * 2) + 1):((ncol(BS1) - 1) * 3), 2] <- pos1

  pos1 <- rep(t(BS1[2, 1]), times = (ncol(BS1) - 1))
  M1[1:(ncol(BS1) - 1), 3] <- pos1
  pos1 <- rep(t(BS1[3, 1]), times = (ncol(BS1) - 1))
  M1[ncol(BS1):((ncol(BS1) - 1) * 2), 3] <- pos1
  pos1 <- rep(t(BS1[4, 1]), times = (ncol(BS1) - 1))
  M1[(((ncol(BS1) - 1) * 2) + 1):((ncol(BS1) - 1) * 3), 3] <- pos1

  fpwmObject@parentbeta <- M1
  
  for (i in c(1:length(fpwmObject@betalevel))) {
    
    BS2 <- as.matrix(fpwmObject@betalevel[[i]])
    BS2 <-
      cbind(c("beta score<10%", "beta score 10-90%", "beta score>90%"),
            BS2)
    BS2 <- rbind(c("position", c(fpwmObject@forkPosition + 1:(ncol(
      BS2
    ) - 1))), BS2)
    M2 <- matrix(nrow = ((ncol(BS2) - 1) * 3), ncol = 3)
  
  
    pos2 <- rep(t(BS2[1, 2:ncol(BS2)]), times = 3)
    M2[, 1] <- pos2
    
    pos2 <- t(BS2[2, 2:ncol(BS2)])
    M2[1:(ncol(BS2) - 1), 2] <- pos2
    pos2 <- t(BS2[3, 2:ncol(BS2)])
    M2[ncol(BS2):((ncol(BS2) - 1) * 2), 2] <- pos2
    pos2 <- t(BS2[4, 2:ncol(BS2)])
    M2[(((ncol(BS2) - 1) * 2) + 1):((ncol(BS2) - 1) * 3), 2] <- pos2
    
    pos2 <- rep(t(BS2[2, 1]), times = (ncol(BS2) - 1))
    M2[1:(ncol(BS2) - 1), 3] <- pos2
    pos2 <- rep(t(BS2[3, 1]), times = (ncol(BS2) - 1))
    M2[ncol(BS2):((ncol(BS2) - 1) * 2), 3] <- pos2
    pos2 <- rep(t(BS2[4, 1]), times = (ncol(BS2) - 1))
    M2[(((ncol(BS2) - 1) * 2) + 1):((ncol(BS2) - 1) * 3), 3] <- pos2
    
    fpwmObject@betalevel[[i]] <- M2}
  
  return(fpwmObject)
}
rtmag/FPWM documentation built on March 23, 2020, 12:46 a.m.