#' @title calculating edge scores, determining the change-point and generating the KO-gene associated sub-graph
#' @description It is not executed itself, instead, it is invoked by pathwayko.
#' @details Reference: Hanoudi S., et al. (2017) Identifying biologically relevant putative mechanisms in a given phenotype comparison. PLoS ONE 12, e0176950.
#' @param KEGGgraph graphical representation of pathways in KEGG
#' @param DataObject Object generated by calling preprocess.R
#' @param condition_FC named list of logFC of all genes in DataObject
#' @param condition_pVal named list of adjusted pval of all genes in DataObject
#' @param mc.cores number of cores to be used in parallel lapply
#' @return various information grouped together as a list
#'
#' @author Original implementation by Samer Hanoudi and Sorin Draghici, modification by Hannan Ai
highedges <- function (KEGGgraph = NULL, DataObject = NULL, condition_FC = NULL, condition_pVal = NULL, mc.cores=1){
if(is.null(KEGGgraph)||is.null(DataObject)||is.null(condition_FC)||is.null(condition_pVal)){
stop("\nERROR: missing argument in highedges.")
}
# Calculate and assign edge weights to KEGG graph
edgeDataDefaults(KEGGgraph, attr = "weight") <- 0
eScores <- mclapply(names(edgeData(KEGGgraph)), function(X){
genesInEdge <- unlist(str_split(X, pattern = "[|]"))
firstGene <- genesInEdge[1]
secondGene <- genesInEdge[2]
tempEdgeScore <- ((abs(as.numeric(condition_FC[firstGene]) * (1-as.numeric(condition_pVal[firstGene]))))+(abs(as.numeric(condition_FC[secondGene]) * (1-as.numeric(condition_pVal[secondGene])))))
names(tempEdgeScore) <- X
return(tempEdgeScore)
},mc.cores = mc.cores, mc.preschedule = TRUE, mc.silent = TRUE)
eScores <- unlist(eScores)
oddSeq <- seq.int(from = 1, by = 2, length.out = length(eScores))
evenSeq <- seq.int(from = 2, by = 2, length.out = length(eScores))
fromNodeList <- unlist(str_split(names(eScores), pattern = "[|]"))[oddSeq]
toNodeList <- unlist(str_split(names(eScores), pattern = "[|]"))[evenSeq]
edgeData(self = KEGGgraph, from = fromNodeList, to = toNodeList, attr = "weight") <- unlist(eScores)
# Obtain, remove NA in and order by decending all edge weights in KEGG graph
allEdgeScores <- unlist(edgeData(KEGGgraph))
names(allEdgeScores) <- gsub(".weight$","", names(allEdgeScores))
allEdgeScores <- allEdgeScores[!is.na(allEdgeScores)]
allEdgeScores <- allEdgeScores[order(allEdgeScores, decreasing=TRUE)]
# Generate numBreaks histogram domains
numBreaks <- 1000
testData <- allEdgeScores
histObject <- hist(x = testData, breaks = numBreaks, plot = FALSE)
histObject$counts <- histObject$counts[2:length(histObject$counts)]
#apply change point analysis
ansmeanvar <- cpt.meanvar(data = histObject$counts, test.stat = "Normal")
changePoints <- list()
changePoints[1] <- ansmeanvar@cpts[1]
changePoints[2] <- ansmeanvar@cpts[1] + ((ansmeanvar@cpts[2] - ansmeanvar@cpts[1]) * 0.25)
changePoints[3] <- ansmeanvar@cpts[2]
return(list(changePoints=changePoints, histObject=histObject, testData=testData))
}
generateMechanismGraph <- function(hes=NULL,CP_index=0){
if(is.null(hes)){
stop("ERR: in generateMechanismGraph NULL first argument. Exit.\n")
}
if(CP_index<1||CP_index>3){
return(NULL)
}
# Extract all edges after the middle changepoint
changePoint <- hes$changePoints[[CP_index]]
cutOffNumber <- sum(hes$histObject$counts[changePoint:length(hes$histObject$counts)])
sigEdges <- hes$testData[1:cutOffNumber]
# Construct Mechanism Graph
temp <- str_split(names(sigEdges), pattern = "[|]")
startNodes <- unlist(lapply(temp, FUN = function(X) {return (X[1])}))
endNodes <- unlist(lapply(temp, FUN = function(X) {return (X[2])}))
relations <- data.frame(startNodes, endNodes)
mechanismGraph <- graph.data.frame(d = relations, directed = TRUE, vertices = NULL)
mechanismGraph <- igraph.to.graphNEL(mechanismGraph)
temp <- nodes(mechanismGraph)
suppressMessages(requireNamespace("org.Mm.eg.db"))
x <- org.Mm.eg.db
temp <- suppressMessages(select(x,temp,columns="SYMBOL",keytype="ENTREZID"))[,"SYMBOL"]
nodes(mechanismGraph) <- temp
# To not interfere with other graphical packages
return(mechanismGraph)
}
#
# Original Code
#
# #' HighEdgeS_B: Identifies the mechanism in a phenotype comparison. This function was written for benchmarking ONLY
# #' @param f_kpg all pathways in KEGG (ROntoTools object)
# #' @param f_KEGGgraph Graph with all interactions from all pathways in KEGG
# #' @param KO.gene The KO gene that the analysis will be applied on.
# #' @param exprTable Table with genes (rows) and genes "ID", "logFC", and "p-value" (columns).
# #' @return mechanismGraph a graph that contain the mechanism of a particular experiment, and other valriables
# #' for benchmarking
# #'
# #' @author
# #'
# #' Samer Hanoudi and Sorin Draghici
# #'
# #' @examples
# #'
# #' @export
# HighEdgeS_B = function (f_kpg = kpg, f_KEGGgraph = KEGGgraph, KO.gene = "No KO", exprTable){
# # Store FC and p-vlaues in two variables
# temp <- cbind(as.matrix(exprTable[, "ID"]), as.numeric(exprTable[, "logFC"]))
# row.names(temp)<-temp[, 1]
# temp <- temp[, -1]
# condition <- temp
# condition <- as.numeric(condition)
# names(condition) <- names(temp)
# temp <- cbind(as.matrix(exprTable[, "ID"]), as.numeric(exprTable[, "adj.P.Val"]))
# row.names(temp) <- temp[, 1]
# temp <- temp[, -1]
# condition_pVal <- temp
# condition_pVal <- as.numeric(condition_pVal)
# names(condition_pVal) <- names(temp)
# KEGGgenes <- nodes(KEGGgraph)
# #analyze with the genes that are available in KEGG Database
# assign(x = "condition", condition[ names(condition)%in%KEGGgenes], envir = .GlobalEnv)
# assign(x = "condition_pVal", condition[ names(condition_pVal)%in%KEGGgenes], envir = .GlobalEnv)
# namesSameTokpg <- paste("mmu:", names(condition), sep="")
# f_kpg <- lapply(X = f_kpg, function (X, y) {return(subGraph( nodes(X)[nodes(X)%in%y], X))}, y = namesSameTokpg )
# edgeDataDefaults(f_KEGGgraph, attr = "weight")<-0
# conditionTemp <- condition
# condition_pVal_temp <- condition_pVal
# Condition_FC_Pvalue <- cbind(conditionTemp, condition_pVal_temp)
# colnames(Condition_FC_Pvalue) <- c("FC", "pVal")
# # Compute edges scores
# eScores = getFCEdgesGraph(f_graph = f_KEGGgraph, f_condition = Condition_FC_Pvalue[, "FC"], f_condition_pVal = Condition_FC_Pvalue[, "pVal"])
# allEdgesScores = edgeData(eScores)
# allEdgesScores <- pbsapply(allEdgesScores, unlist)
# names(allEdgesScores) <- str_replace(names(allEdgesScores), pattern = ".weight", replacement="")
# allEdgesScores <- allEdgesScores[!is.na(allEdgesScores)]
# allEdgesScores <- sort(allEdgesScores, decreasing = TRUE)
# nBreakes <- 1000
# mainL <- paste("KO gene ", KO.gene, " dataset")
# testData <- allEdgesScores
# histObject <- hist(testData, breaks = nBreakes, xlab = 'edges scores', main = "")
# histObject$counts <- histObject$counts[2:length(histObject$counts)]
# #apply change point analysis
# ansmeanvar <- cpt.meanvar(data = (histObject$counts), test.stat = "Normal")
# changePoint <- ansmeanvar@cpts[1]
# changePoint <- changePoint + ((ansmeanvar@cpts[2] - ansmeanvar@cpts[1]) * 0.25)
# mLable <- paste("Histogram for dataset ", DataObject$GEOid, " \n KO gene ", KO.gene, sep= "")
# allEdgesScores <- sort(allEdgesScores,decreasing = TRUE)
# edgesT <- sum(histObject$counts[changePoint:length(histObject$counts)])
# sigEdges <- allEdgesScores[1:edgesT]
# changePoints <- seq(from = ansmeanvar@cpts[1],to = ansmeanvar@cpts[2],length.out = numIntervals)
# changePoints <- round(changePoints)
# numOfEdgesToSelect <- list()
# for (i in 1:length(changePoints)){
# numOfEdgesToSelect[i] <- sum(histObject$counts[changePoints[i]:length(histObject$counts)])
# }
# temp <- str_split(names(sigEdges), pattern = "[|]")
# startNodes <- unlist(pblapply(temp, FUN = function(X) {return (X[1]) }))
# endNodes <- unlist(pblapply(temp, FUN = function(X) {return (X[2]) }))
# relations <- data.frame(startNodes, endNodes)
# FCgraphiGraph <- graph.data.frame(d = relations, directed = TRUE, vertices = NULL)
# mechanismGraph <- igraph.to.graphNEL(FCgraphiGraph)
# temp <- (nodes(mechanismGraph))
# temp <- unlist(mapedEntrez2Symb[temp])
# names(temp) <- NULL
# nodes(mechanismGraph) <- temp
# graphics.off()
# return(list(mechanismGraph=mechanismGraph,kpg=f_kpg,ansmeanvar=ansmeanvar,testData=testData,histObject=histObject,
# numOfEdgesToSelect=numOfEdgesToSelect,changePoints=changePoints,changePoint=changePoint))
# }# end HighEdgeS_B
# getFCEdgesGraph <- function (f_graph, f_condition, f_condition_pVal){
# genesInGraph <- nodes(f_graph)
# genesInCondition <- names(f_condition)
# allEdges <- edgeData(f_graph)
# edgeScores <-names(allEdges)
# FCedges <- function (fx){
# genesInEdge <- str_split(fx, pattern = "[|]")
# firstGene <- genesInEdge[[1]][1]
# secondGene <- genesInEdge[[1]][2]
# tempEdgeScore <- NULL
# tempEdgeScore[1] <- ((abs(f_condition[firstGene] * (1-f_condition_pVal[ firstGene ][[1]])))
# +(abs(f_condition[secondGene]) * (1-f_condition_pVal[ secondGene ][[1]])) ) #*0.5
# names(tempEdgeScore)<-fx
# return(tempEdgeScore)
# }
# eScores <- lapply(X = edgeScores, FUN = FCedges)
# eScores <- unlist(eScores)
# oddSeq <- seq.int(from = 1, by = 2, length.out = length(eScores))
# evenSeq <- seq.int(from = 2, by = 2, length.out = length(eScores))
# fromNodeList <- unlist(str_split(names(eScores), pattern = "[|]"))[oddSeq]
# toNodeList <- unlist(str_split(names(eScores), pattern = "[|]"))[evenSeq]
# edgeData(self = f_graph, from = fromNodeList, to = toNodeList, attr = "weight") <- unlist(eScores)
# return(f_graph)
# }# end getFCEdgesGraph
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.