R/highedges.R

Defines functions generateMechanismGraph

#' @title calculating edge scores, determining the change-point and generating the KO-gene associated sub-graph
#' @description It is not executed itself, instead, it is invoked by pathwayko.
#' @details Reference: Hanoudi S., et al. (2017) Identifying biologically relevant putative mechanisms in a given phenotype comparison. PLoS ONE 12, e0176950. 
#' @param KEGGgraph graphical representation of pathways in KEGG
#' @param DataObject Object generated by calling preprocess.R
#' @param condition_FC named list of logFC of all genes in DataObject
#' @param condition_pVal named list of adjusted pval of all genes in DataObject
#' @param mc.cores number of cores to be used in parallel lapply
#' @return various information grouped together as a list
#'
#' @author  Original implementation by Samer Hanoudi and Sorin Draghici, modification by Hannan Ai

highedges <- function (KEGGgraph = NULL, DataObject = NULL, condition_FC = NULL, condition_pVal = NULL, mc.cores=1){
  if(is.null(KEGGgraph)||is.null(DataObject)||is.null(condition_FC)||is.null(condition_pVal)){
    stop("\nERROR: missing argument in highedges.")
  }

  # Calculate and assign edge weights to KEGG graph
  edgeDataDefaults(KEGGgraph, attr = "weight") <- 0
  eScores <- mclapply(names(edgeData(KEGGgraph)), function(X){
      genesInEdge <- unlist(str_split(X, pattern = "[|]"))
      firstGene <- genesInEdge[1]
      secondGene <- genesInEdge[2]
      tempEdgeScore <- ((abs(as.numeric(condition_FC[firstGene]) * (1-as.numeric(condition_pVal[firstGene]))))+(abs(as.numeric(condition_FC[secondGene]) * (1-as.numeric(condition_pVal[secondGene])))))
      names(tempEdgeScore) <- X
      return(tempEdgeScore)
    },mc.cores = mc.cores, mc.preschedule = TRUE, mc.silent = TRUE)
  eScores <- unlist(eScores)
  oddSeq <- seq.int(from = 1, by = 2, length.out = length(eScores))
  evenSeq <- seq.int(from = 2, by = 2, length.out = length(eScores))
  fromNodeList <- unlist(str_split(names(eScores), pattern = "[|]"))[oddSeq]
  toNodeList <- unlist(str_split(names(eScores), pattern = "[|]"))[evenSeq]
  edgeData(self = KEGGgraph, from = fromNodeList, to = toNodeList, attr = "weight") <- unlist(eScores)

  # Obtain, remove NA in and order by decending all edge weights in KEGG graph
  allEdgeScores <- unlist(edgeData(KEGGgraph))
  names(allEdgeScores) <- gsub(".weight$","", names(allEdgeScores))
  allEdgeScores <- allEdgeScores[!is.na(allEdgeScores)]
  allEdgeScores <- allEdgeScores[order(allEdgeScores, decreasing=TRUE)]
 
  # Generate numBreaks histogram domains
  numBreaks <- 1000
  testData <- allEdgeScores
  histObject <- hist(x = testData, breaks = numBreaks, plot = FALSE)
  histObject$counts <- histObject$counts[2:length(histObject$counts)]

  #apply change point analysis
  ansmeanvar <- cpt.meanvar(data = histObject$counts, test.stat = "Normal")
  changePoints <- list()
  changePoints[1] <- ansmeanvar@cpts[1]
  changePoints[2] <- ansmeanvar@cpts[1] + ((ansmeanvar@cpts[2] - ansmeanvar@cpts[1]) * 0.25)
  changePoints[3] <- ansmeanvar@cpts[2]

  return(list(changePoints=changePoints, histObject=histObject, testData=testData))
}

generateMechanismGraph <- function(hes=NULL,CP_index=0){
  if(is.null(hes)){
    stop("ERR: in generateMechanismGraph NULL first argument. Exit.\n")
  }
  if(CP_index<1||CP_index>3){
    return(NULL)
  }
  # Extract all edges after the middle changepoint
  changePoint <- hes$changePoints[[CP_index]]
  cutOffNumber  <- sum(hes$histObject$counts[changePoint:length(hes$histObject$counts)])
  sigEdges <- hes$testData[1:cutOffNumber]

  # Construct Mechanism Graph
  temp <- str_split(names(sigEdges), pattern = "[|]")
  startNodes <- unlist(lapply(temp, FUN = function(X) {return (X[1])}))
  endNodes <- unlist(lapply(temp, FUN = function(X) {return (X[2])}))
  relations <- data.frame(startNodes, endNodes)

  mechanismGraph <- graph.data.frame(d = relations, directed = TRUE, vertices = NULL)
  mechanismGraph <- igraph.to.graphNEL(mechanismGraph)
  temp <- nodes(mechanismGraph)
  suppressMessages(requireNamespace("org.Mm.eg.db"))
  x <- org.Mm.eg.db
  temp <- suppressMessages(select(x,temp,columns="SYMBOL",keytype="ENTREZID"))[,"SYMBOL"]
  nodes(mechanismGraph) <- temp
  # To not interfere with other graphical packages
  return(mechanismGraph)
}

#
# Original Code
# 
# #' HighEdgeS_B: Identifies the mechanism in a phenotype comparison. This function was written for benchmarking ONLY
# #' @param f_kpg all pathways in KEGG (ROntoTools object)
# #' @param f_KEGGgraph Graph with all interactions from all pathways in KEGG
# #' @param KO.gene The KO gene that the analysis will be applied on.
# #' @param exprTable Table with genes (rows) and genes "ID", "logFC", and "p-value" (columns).
# #' @return mechanismGraph a graph that contain the mechanism of a particular experiment, and other valriables
# #' for benchmarking
# #'
# #' @author
# #'
# #' Samer Hanoudi and Sorin Draghici
# #'
# #' @examples
# #'
# #' @export
# HighEdgeS_B = function (f_kpg = kpg, f_KEGGgraph = KEGGgraph, KO.gene  = "No KO", exprTable){
#   # Store FC and p-vlaues in two variables
#   temp <- cbind(as.matrix(exprTable[, "ID"]), as.numeric(exprTable[, "logFC"]))
#   row.names(temp)<-temp[, 1]
#   temp <- temp[, -1]
#   condition <- temp
#   condition <- as.numeric(condition)
#   names(condition) <- names(temp)
#   temp <- cbind(as.matrix(exprTable[, "ID"]), as.numeric(exprTable[, "adj.P.Val"]))
#   row.names(temp) <- temp[, 1]
#   temp <- temp[, -1]
#   condition_pVal <- temp
#   condition_pVal <- as.numeric(condition_pVal)
#   names(condition_pVal) <- names(temp)

#   KEGGgenes <- nodes(KEGGgraph)
#   #analyze with the genes that are available in KEGG Database
#   assign(x = "condition", condition[ names(condition)%in%KEGGgenes], envir = .GlobalEnv)
#   assign(x = "condition_pVal", condition[ names(condition_pVal)%in%KEGGgenes], envir = .GlobalEnv)

#   namesSameTokpg <- paste("mmu:", names(condition), sep="")
#   f_kpg <- lapply(X = f_kpg, function (X, y) {return(subGraph( nodes(X)[nodes(X)%in%y], X))}, y = namesSameTokpg )


#   edgeDataDefaults(f_KEGGgraph, attr = "weight")<-0
#   conditionTemp <- condition
#   condition_pVal_temp <- condition_pVal

#   Condition_FC_Pvalue <- cbind(conditionTemp, condition_pVal_temp)
#   colnames(Condition_FC_Pvalue) <- c("FC", "pVal")

#   # Compute edges scores
#   eScores = getFCEdgesGraph(f_graph = f_KEGGgraph, f_condition = Condition_FC_Pvalue[, "FC"], f_condition_pVal = Condition_FC_Pvalue[, "pVal"])
#   allEdgesScores = edgeData(eScores)
#   allEdgesScores <- pbsapply(allEdgesScores, unlist)
#   names(allEdgesScores) <- str_replace(names(allEdgesScores), pattern = ".weight", replacement="")
#   allEdgesScores <- allEdgesScores[!is.na(allEdgesScores)]
#   allEdgesScores <- sort(allEdgesScores, decreasing = TRUE)
#   nBreakes <- 1000
#   mainL <- paste("KO gene ", KO.gene, " dataset")
#   testData <- allEdgesScores
#   histObject <- hist(testData, breaks = nBreakes, xlab = 'edges scores', main = "")
#   histObject$counts <- histObject$counts[2:length(histObject$counts)]

#   #apply change point analysis
#   ansmeanvar <- cpt.meanvar(data = (histObject$counts), test.stat = "Normal")
#   changePoint <- ansmeanvar@cpts[1]
#   changePoint <- changePoint + ((ansmeanvar@cpts[2] - ansmeanvar@cpts[1]) * 0.25)
#   mLable <- paste("Histogram for dataset ", DataObject$GEOid, " \n KO gene ", KO.gene, sep= "")
#   allEdgesScores <- sort(allEdgesScores,decreasing = TRUE)

#   edgesT  <- sum(histObject$counts[changePoint:length(histObject$counts)])
#   sigEdges <- allEdgesScores[1:edgesT]

#   changePoints <- seq(from = ansmeanvar@cpts[1],to = ansmeanvar@cpts[2],length.out = numIntervals)

#   changePoints <- round(changePoints)
#   numOfEdgesToSelect <- list()
#   for (i in 1:length(changePoints)){
#     numOfEdgesToSelect[i] <- sum(histObject$counts[changePoints[i]:length(histObject$counts)])
#   }
#   temp <- str_split(names(sigEdges), pattern = "[|]")
#   startNodes <- unlist(pblapply(temp, FUN = function(X) {return (X[1]) }))
#   endNodes <- unlist(pblapply(temp, FUN = function(X) {return (X[2]) }))
#   relations <- data.frame(startNodes, endNodes)
#   FCgraphiGraph <- graph.data.frame(d = relations, directed = TRUE, vertices = NULL)
#   mechanismGraph <- igraph.to.graphNEL(FCgraphiGraph)
#   temp <- (nodes(mechanismGraph))
#   temp <- unlist(mapedEntrez2Symb[temp])
#   names(temp) <- NULL
#   nodes(mechanismGraph) <- temp
#   graphics.off()

#   return(list(mechanismGraph=mechanismGraph,kpg=f_kpg,ansmeanvar=ansmeanvar,testData=testData,histObject=histObject,
#               numOfEdgesToSelect=numOfEdgesToSelect,changePoints=changePoints,changePoint=changePoint))
# }# end HighEdgeS_B



# getFCEdgesGraph <- function (f_graph, f_condition, f_condition_pVal){
#   genesInGraph <- nodes(f_graph)
#   genesInCondition <- names(f_condition)
#   allEdges <- edgeData(f_graph)
#   edgeScores <-names(allEdges)

#   FCedges <- function (fx){
#     genesInEdge <- str_split(fx, pattern = "[|]")
#     firstGene <- genesInEdge[[1]][1]
#     secondGene <- genesInEdge[[1]][2]
#     tempEdgeScore <- NULL
#      tempEdgeScore[1] <- ((abs(f_condition[firstGene] * (1-f_condition_pVal[ firstGene ][[1]])))
#                        +(abs(f_condition[secondGene]) * (1-f_condition_pVal[ secondGene ][[1]])) ) #*0.5
#     names(tempEdgeScore)<-fx
#     return(tempEdgeScore)
#   }
#   eScores <- lapply(X = edgeScores, FUN = FCedges)
#   eScores <- unlist(eScores)
#   oddSeq <- seq.int(from = 1, by = 2, length.out = length(eScores))
#   evenSeq <- seq.int(from = 2, by = 2, length.out = length(eScores))
#   fromNodeList <- unlist(str_split(names(eScores), pattern = "[|]"))[oddSeq]
#   toNodeList <- unlist(str_split(names(eScores), pattern = "[|]"))[evenSeq]
#   edgeData(self = f_graph, from = fromNodeList, to = toNodeList, attr = "weight") <- unlist(eScores)
#   return(f_graph)
# }# end getFCEdgesGraph
allenaigit/pathwayko documentation built on Nov. 23, 2022, 5:43 p.m.