#' Network Analysis
#' @description The phosphorylation signaling network shows the inferred causal relations between the kinases/phosphatases and downstream peptides. Network inference is based on dynamic Bayesian principles. For more than one condition, the common phosphopeptides between the conditions are used as input and a network is generated for each condition separately. As such, the network results might differ slightly when running each condition separately through NetPhorce compared to running the conditions together.
#' @param netPhorceData (Required). Processed netPhorceData data
#' @param requestPlotData (Required). If TRUE, data required to visualize the network in R via \code{\link{visNetwork}} package will be saved.
#' @return The netPhorce object with the following slots:
#' \describe{
#' \item{Design}{Contains data design information and filtering parameters}
#' \item{data.filtered}{Contains all the data points in a `Long` format that passed filtering criteria.}
#' \item{data.filtered.aov.summary}{Contains all the data points in a `Long` format with anova results.}
#' \item{Misc}{Contains accessory data including default plotting colors and FASTA Keys, if present.}
#' \item{regulationData}{Contains regulation data calculated through \code{\link{regulationCheck}} function}
#' \item{networkPhorceResutls}{Contains the network results}
#' \item{networkPlotData}{Contains data required to generate the network via \code{\link{visNetwork}} package}
#' }
#' @export
#' @examples
#' \dontrun{
#' ## Loading One Condition Data
#' data("oneConditionExample")
#' ## Identify the Key Columns
#' identifiedCols <- confirmColumnNames(rawMaxQuant = oneConditionExample,
#' positionCol = "Position",
#' reverseCol = "Reverse",
#' localizationProbCol = "Localization prob",
#' potentialContaminationCol = "Potential contaminant",
#' aminoAcidCol = "Amino acid",
#' uniqueIDCol = "Protein",
#' seqWindowIDCol = "Sequence window",
#' fastaIDCol = "Fasta headers")
#' ## Identify the Intensity Columns with Condition, Time Point and Replication Information
#' intensityCols <- confirmIntensityColumns(rawMaxQuant = oneConditionExample,
#' intensityPattern = "con_time_rep",
#' verbose = TRUE)
#' ## Process the data based on the identified columns
#' netPhorceData <- processData(rawMaxQuant = oneConditionExample,
#' processedColNames = identifiedCols,
#' processedIntensity = intensityCols,
#' minReplication = 3,
#' minLocalProb = 0.75)
#' ## Validating the Kinase/Phosphatase Information
#' netPhorceData <- validateKinaseTable(netPhorceData = netPhorceData,
#' defaultKinaseTable = TRUE,
#' abbrev = "Ath")
#' ## Regulation Validation based on user inputs
#' netPhorceData <- regulationCheck(netPhorceData = netPhorceData,
#' upReg = 0.25,
#' downReg = 0.25,
#' absMinThreshold = 0.1,
#' qValueCutOff = 0.05,
#' verbose = TRUE)
#' ## Network Analysis
#' networkData <- networkAnalysis(netPhorceData = netPhorceData,
#' requestPlotData = TRUE)
#' }
networkAnalysis <- function(netPhorceData = netPhorceData,
requestPlotData = TRUE){
ESS = 1e-15 ### Dirichlet distr param.
data.significant.proteins <- netPhorceData@regulationData
n_levels = data.significant.proteins %>% filter(!is.na(discreteState)) %>% distinct(discreteState) %>% nrow ### determined by numberOfDiscreteStates
conditions = data.significant.proteins %>% filter(!is.na(experiment)) %>% distinct(experiment) %>% nrow ### determined by numberOfDiscreteStates
maxConditionMatching = conditions-1
percentChangeThreshold = 0.5
# returnList <- list()
pb = txtProgressBar(min = 0, max = 9,
initial = 0, style = 3)
data_V4 <-
netPhorceData@data.filtered %>%
mutate(timepoint = factor(timepoint, levels = c(as.vector(netPhorceData@Design$uniqueTimes$Origin)))) %>%
arrange(timepoint) %>%
dplyr::select(timepoint, replicate) %>%
group_by(timepoint) %>%
distinct() %>%
summarise(ReplicateCount = n())
# print("1")
setTxtProgressBar(pb, 1)
################ . . 3.3.1 Identify all possible target-regulator combinations ############
data.scoring = data.significant.proteins %>% group_by(condition) %>% nest()
# print(head(data.scoring$data))
# print("1.5")
data.scoring = data.scoring %>% ### expand the possible edges from kinases to genes, then filter to keep only data where changes correspond.
mutate(potentialEdges = map(data, ~ { ### Reg -> target are filtered by sigSignChange, not by change in state
allEdges = .x %>% dplyr::select(UniqueID, isKinase, experiment, sigChangeSign) %>% filter(!is.na(sigChangeSign)) %>%
tidyr::expand(nesting(regulator = UniqueID, regulatorCondition = experiment, regulatorChange = sigChangeSign, regulatorisKinase = isKinase),
nesting(target = UniqueID, targetCondition = experiment, targetChange = sigChangeSign)) %>%
filter(regulatorisKinase) %>% dplyr::select(-regulatorisKinase) %>%
filter(regulator != target) %>%
filter(regulatorChange != 0 & targetChange != 0) %>%
filter(!is.na(levels(targetCondition)[as.numeric(targetCondition)-1])) %>% ### The regulators must have the proper timepoints and proper % matching changes
filter(regulatorCondition == targetCondition | regulatorCondition == levels(targetCondition)[as.numeric(targetCondition)-1]) ### Allow co-regulators
allEdges.filterd = allEdges %>% group_by(regulator, target) %>%
add_count(coReg = regulatorCondition == targetCondition, wt = (abs(regulatorChange) == abs(targetChange))/maxConditionMatching, name = "percentChange") %>%
ungroup() %>% filter(percentChange > percentChangeThreshold)
return(allEdges.filterd)
}))
# print(head(data.scoring))
# print("2")
setTxtProgressBar(pb, 2)
message(paste0("\nUndergoing a time consuming step, please wait... "))
data.scoring.searches = data.scoring %>% dplyr::select(condition, potentialEdges) %>% unnest(cols = potentialEdges) %>%
mutate(regulator = paste(regulator, coReg, sep = "**"), .keep = "unused") %>% group_by(condition, target) %>% nest() %>%
mutate(neighborCombinations = map(data, possibly( ~ { ### possibly for error catching.
allSearchCombinations = bind_rows( ### Generate combinations of regulators whithin each searchID to be scored.
.x %>% distinct(regulator) %>% pull(regulator) %>% as.list %>% possibly(~combn(.x, m=1), tibble())() %>% as.data.frame %>% gather(searchID,regulator) %>% mutate(searchSizeK = 1),
.x %>% distinct(regulator) %>% pull(regulator) %>% as.list %>% possibly(~combn(.x, m=2), tibble())() %>% as.data.frame %>% gather(searchID,regulator) %>% mutate(searchSizeK = 2),
.x %>% distinct(regulator) %>% pull(regulator) %>% as.list %>% possibly(~combn(.x, m=3), tibble())() %>% as.data.frame %>% gather(searchID,regulator) %>% mutate(searchSizeK = 3)
) %>% as_tibble %>% unnest(cols = c(regulator)) %>% tidyr::separate(regulator, c("regulator", "coReg"), sep = "\\*\\*", remove = T)
},tibble()))) %>% ungroup()
# print(head(data.scoring.searches))
# print("3")
setTxtProgressBar(pb, 3)
######### . . 3.3.2 Scoring of the identified target - (co)regulator combinations ###############
coRegulater = data.scoring.searches %>% dplyr::select(condition, target, neighborCombinations) %>% unnest(neighborCombinations) %>%
dplyr::select(condition, target, regulator, coReg) %>% distinct()
allEdgesCompareState.Nj = data.significant.proteins %>% group_by(condition) %>% nest() %>%
mutate(data = map(data, ~ {.x %>% dplyr::select(UniqueID, isKinase, experiment, discreteState) %>%
filter(!is.na(discreteState)) %>%
tidyr::expand(nesting(regulator = UniqueID, regulatorCondition = experiment, regulatorState = discreteState, regulatorisKinase = isKinase),
nesting(target = UniqueID, targetCondition = experiment, targetState = discreteState)) %>% filter(regulatorisKinase) })) %>%
unnest(cols = c(data)) %>% right_join(coRegulater, by = c("condition", "regulator", "target")) %>% group_by(condition) %>% nest() %>%
mutate(data = map(data, ~ {.x %>% filter((as.numeric(regulatorCondition) == as.numeric(targetCondition) & coReg == TRUE) | (as.numeric(regulatorCondition) == as.numeric(targetCondition)-1) & coReg == FALSE) %>% filter(regulator != target)})) %>% unnest(cols = c(data))
# print(head(allEdgesCompareState.Nj))
# print("4")
setTxtProgressBar(pb, 4)
allEdgesCompareState.Nj.full = data.scoring.searches %>%
dplyr::select(condition, target, neighborCombinations) %>%
unnest(neighborCombinations) %>% left_join(allEdgesCompareState.Nj, by = c("condition", "target", "regulator", "coReg"))
data.scoring.genist.1reg = allEdgesCompareState.Nj.full %>% filter(searchSizeK == 1) %>% dplyr::select(-regulatorisKinase) %>%
group_by(condition, target, searchID) %>%
mutate(Nj = case_when(
regulatorState == -1 & targetState == -1 ~ "A",
regulatorState == -1 & targetState == 1 ~ "B",
regulatorState == -1 & targetState == 5 ~ "C",
regulatorState == 1 & targetState == -1 ~ "D",
regulatorState == 1 & targetState == 1 ~ "E",
regulatorState == 1 & targetState == 5 ~ "F",
regulatorState == 5 & targetState == 1 ~ "G",
regulatorState == 5 & targetState == -1 ~ "H",
regulatorState == 5 & targetState == 5 ~ "I")) %>%
mutate(qi = case_when(
regulatorState == -1 ~ "A",
regulatorState == 1 ~ "B",
regulatorState == 5 ~ "C")) %>%
add_count(qi) %>% dplyr::count(condition, target, coReg, searchID, regulator, searchSizeK, Nj, n, name = "Nj.count") %>%
mutate(BDei =
log(gamma(Nj.count+ESS/(n_levels*n_levels^searchSizeK))/gamma(ESS/(n_levels*n_levels^searchSizeK)))) %>% ungroup() %>%
group_by(condition, target, regulator, searchID, searchSizeK, coReg, n) %>% summarize(BDei = sum(BDei)) %>%
mutate_at(vars(BDei), ~ BDei + log(gamma(ESS/n_levels^searchSizeK)/gamma(n+ESS/n_levels^searchSizeK))) %>%
group_by(condition, target, regulator, searchID, searchSizeK, coReg) %>% summarize(BDei = sum(BDei)) %>% ungroup() %>%
group_by(condition, target) %>% filter(BDei == max(BDei))
tp0 = as.character(data_V4$timepoint)[1] #QUESTION KUNCHENG; IS DATA_V4 ALWAYS ordered chronologically??
# print("5")
setTxtProgressBar(pb, 5)
# message(paste0("\nUndergoing a time consuming step, please wait... "))
data.scoring.genist.2reg = allEdgesCompareState.Nj.full %>% filter(searchSizeK == 2) %>%
dplyr::select(-regulatorisKinase) %>%
group_by(condition, target, searchID) %>%
filter(regulatorCondition != tp0 | targetCondition != tp0 | all(coReg == TRUE)) %>%
ungroup() %>% group_by(condition, target, searchID, targetCondition) %>% mutate(Reg = row_number()) %>%
pivot_wider(names_from = Reg, values_from= c(regulator, regulatorState, coReg, regulatorCondition)) %>%
ungroup() %>% group_by(condition, target, searchID) %>%
mutate(Nj = case_when(
regulatorState_1 == -1 & regulatorState_2 == -1 & targetState == -1 ~ "A",
regulatorState_1 == -1 & regulatorState_2 == -1 & targetState == 1 ~ "B",
regulatorState_1 == -1 & regulatorState_2 == -1 & targetState == 5 ~ "C",
regulatorState_1 == -1 & regulatorState_2 == 1 & targetState == 1 ~ "D",
regulatorState_1 == -1 & regulatorState_2 == 1 & targetState == -1 ~ "E",
regulatorState_1 == -1 & regulatorState_2 == 1 & targetState == 5 ~ "F",
regulatorState_1 == -1 & regulatorState_2 == 5 & targetState == -1 ~ "G",
regulatorState_1 == -1 & regulatorState_2 == 5 & targetState == 1 ~ "H",
regulatorState_1 == -1 & regulatorState_2 == 5 & targetState == 5 ~ "I",
regulatorState_1 == 1 & regulatorState_2 == -1 & targetState == -1 ~ "J",
regulatorState_1 == 1 & regulatorState_2 == -1 & targetState == 1 ~ "K",
regulatorState_1 == 1 & regulatorState_2 == -1 & targetState == 5 ~ "L",
regulatorState_1 == 1 & regulatorState_2 == 1 & targetState == 1 ~ "M",
regulatorState_1 == 1 & regulatorState_2 == 1 & targetState == -1 ~ "N",
regulatorState_1 == 1 & regulatorState_2 == 1 & targetState == 5 ~ "O",
regulatorState_1 == 1 & regulatorState_2 == 5 & targetState == -1 ~ "P",
regulatorState_1 == 1 & regulatorState_2 == 5 & targetState == 1 ~ "Q",
regulatorState_1 == 1 & regulatorState_2 == 5 & targetState == 5 ~ "R",
regulatorState_1 == 5 & regulatorState_2 == -1 & targetState == -1 ~ "S",
regulatorState_1 == 5 & regulatorState_2 == -1 & targetState == 1 ~ "T",
regulatorState_1 == 5 & regulatorState_2 == -1 & targetState == 5 ~ "U",
regulatorState_1 == 5 & regulatorState_2 == 1 & targetState == 1 ~ "V",
regulatorState_1 == 5 & regulatorState_2 == 1 & targetState == -1 ~ "W",
regulatorState_1 == 5 & regulatorState_2 == 1 & targetState == 5 ~ "X",
regulatorState_1 == 5 & regulatorState_2 == 5 & targetState == -1 ~ "Y",
regulatorState_1 == 5 & regulatorState_2 == 5 & targetState == 1 ~ "Z",
regulatorState_1 == 5 & regulatorState_2 == 5 & targetState == 5 ~ "AA")) %>%
mutate(qi = case_when(
regulatorState_1 == -1 & regulatorState_2 == -1 ~ "A",
regulatorState_1 == -1 & regulatorState_2 == 1 ~ "B",
regulatorState_1 == -1 & regulatorState_2 == 5 ~ "C",
regulatorState_1 == 1 & regulatorState_2 == -1 ~ "D",
regulatorState_1 == 1 & regulatorState_2 == 1 ~ "E",
regulatorState_1 == 1 & regulatorState_2 == 5 ~ "F",
regulatorState_1 == 5 & regulatorState_2 == -1 ~ "G",
regulatorState_1 == 5 & regulatorState_2 == 1 ~ "H",
regulatorState_1 == 5 & regulatorState_2 == 5 ~ "I")) %>% add_count(qi) %>%
dplyr::count(condition, target, coReg_1, coReg_2, searchID, regulator_1, regulator_2, searchSizeK, n, Nj, name = "Nj.count") %>%
mutate(BDei =
log(gamma(Nj.count+ESS/(n_levels*n_levels^searchSizeK))/gamma(ESS/(n_levels*n_levels^searchSizeK)))) %>% ungroup() %>%
group_by(condition, target, regulator_1, regulator_2, coReg_1, coReg_2, searchID, searchSizeK, n) %>%
summarize(BDei = sum(BDei)) %>%
mutate_at(vars(BDei), ~ BDei + log(gamma(ESS/n_levels^searchSizeK)/gamma(n+ESS/n_levels^searchSizeK))) %>%
group_by(condition, target, regulator_1, regulator_2, coReg_1, coReg_2, searchID, searchSizeK) %>%
summarize(BDei = sum(BDei)) %>% ungroup() %>% group_by(condition, target) %>% filter(BDei == max(BDei))
# print("6")
setTxtProgressBar(pb, 6)
##we keep only the max scores across entire dataset instead of grouping per target##
data.scoring.genist = bind_rows(data.scoring.genist.1reg %>% mutate(set = "1"), data.scoring.genist.2reg %>% pivot_longer(cols = c(ends_with("_1"), ends_with("_2")), names_to = c(".value", "set"), names_pattern = "(.+)_(.+)")) %>%
distinct(condition, target, regulator, coReg, BDei) %>% group_by(condition, target) %>%
filter(BDei == max(BDei)) %>% mutate(BDei = exp(BDei), .keep = "unused")
# print("7")
setTxtProgressBar(pb, 7)
### Sign identification
data.scoring.sign = data.scoring %>% ### expand with all identified SigChangeSign, count the changes in the same and opposite direction
dplyr::select(potentialEdges, condition) %>% unnest(c(condition, potentialEdges)) %>% dplyr::select(-percentChange) %>%
mutate(coReg = as.character(coReg), .keep = "unused") %>%
right_join(data.scoring.genist, by = c("regulator", "target", "coReg", "condition")) %>%
group_by(condition, target, regulator, coReg) %>% dplyr::count(regulatorChange == targetChange) %>%
pivot_wider(names_from = "regulatorChange == targetChange", values_from = n, values_fill = 0) %>%
mutate(regulation = case_when(
`FALSE` > `TRUE` ~ "dephosphorylation",
`TRUE` > `FALSE` ~ "phosphorylation",
TRUE ~ "undetermined"
))
# print("8")
setTxtProgressBar(pb, 8)
network.final = data.scoring.sign %>% dplyr::select(-`FALSE`, -`TRUE`) %>% dplyr::rename(timelapse = coReg) %>%
mutate_at(vars(timelapse), ~ case_when(timelapse == TRUE ~ "tl.1", timelapse == FALSE ~ "tl.0")) %>%
left_join(data.scoring.genist %>% dplyr::select(-coReg), by = c("condition", "target", "regulator")) %>% distinct()
setTxtProgressBar(pb, 9)
netPhorceData@netPhorceResult <- network.final
if(requestPlotData == TRUE){
plotData = list()
plotData[["data.scoring.sign"]] <- data.scoring.sign
plotData[["data.scoring.genist"]] <- data.scoring.genist
netPhorceData@networkPlotData <- plotData
}
return(netPhorceData)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.