vignettes/netOmics.R

## ----echo=FALSE---------------------------------------------------------------
knitr::opts_chunk$set(fig.align = "center")


## ----eval=FALSE---------------------------------------------------------------
## # install the package via BioConductor
## if (!requireNamespace("BiocManager", quietly = TRUE))
##     install.packages("BiocManager")
## 
## BiocManager::install("netOmics")


## ----eval=FALSE---------------------------------------------------------------
## # install the package via github
## library(devtools)
## install_github("abodein/netOmics")


## ----eval=TRUE, message=FALSE-------------------------------------------------
# load the package
library(netOmics)


## ----eval=TRUE, message=FALSE-------------------------------------------------
# usefull packages to build this vignette
library(timeOmics)
library(tidyverse)
library(igraph)


## ----load_data----------------------------------------------------------------
# load data
data("hmp_T2D")


## ----timeOmics_1, eval=FALSE--------------------------------------------------
## # not evaluated in this vignette
## 
## #1 filter fold-change
## remove.low.cv <- function(X, cutoff = 0.5){
##     # var.coef
##     cv <- unlist(lapply(as.data.frame(X),
##                     function(x) abs(sd(x, na.rm = TRUE)/mean(x, na.rm= TRUE))))
##     return(X[,cv > cutoff])
## }
## fc.threshold <- list("RNA"= 1.5, "CLINICAL"=0.2, "GUT"=1.5, "METAB"=1.5,
##                      "PROT" = 1.5, "CYTO" = 1)
## 
## # --> hmp_T2D$raw
## data.filter <- imap(raw, ~{remove.low.cv(.x, cutoff = fc.threshold[[.y]])})
## 
## #2 scale
## data <- lapply(data.filter, function(x) log(x+1))
## # --> hmp_T2D$data
## 
## 
## #3 modelling
## lmms.func <- function(X){
##     time <- rownames(X) %>% str_split("_") %>%
##       map_chr(~.x[[2]]) %>% as.numeric()
##     lmms.output <- lmms::lmmSpline(data = X, time = time,
##                                    sampleID = rownames(X), deri = FALSE,
##                                    basis = "p-spline", numCores = 4,
##                                    keepModels = TRUE)
##     return(lmms.output)
## }
## data.modelled <- lapply(data, function(x) lmms.func(x))
## 
## # 4 clustering
## block.res <- block.pls(data.modelled, indY = 1, ncomp = 1)
## getCluster.res <- getCluster(block.res)
## # --> hmp_T2D$getCluster.res
## 
## 
## # 5 signature
## list.keepX <- list("CLINICAL" = 4, "CYTO" = 3, "GUT" = 10, "METAB" = 3,
##                    "PROT" = 2,"RNA" = 34)
## sparse.block.res  <- block.spls(data.modelled, indY = 1, ncomp = 1, scale =TRUE,
##                                 keepX =list.keepX)
## getCluster.sparse.res <- getCluster(sparse.block.res)
## # --> hmp_T2D$getCluster.sparse.res


## ----timeOmics_2--------------------------------------------------------------
# clustering results
cluster.info <- hmp_T2D$getCluster.res


## ----graph.rna, warning=FALSE-------------------------------------------------
cluster.info.RNA <- timeOmics::getCluster(cluster.info, user.block = "RNA")
graph.rna <- get_grn(X = hmp_T2D$data$RNA, cluster = cluster.info.RNA)

# to get info about the network
get_graph_stats(graph.rna)


## ----PROT_graph, warning=FALSE------------------------------------------------
# Utility function to get the molecules by cluster
get_list_mol_cluster <- function(cluster.info, user.block){
  require(timeOmics)
    tmp <- timeOmics::getCluster(cluster.info, user.block) 
    res <- tmp %>% split(.$cluster) %>% 
        lapply(function(x) x$molecule)
    res[["All"]] <- tmp$molecule
    return(res)
}

cluster.info.prot <- get_list_mol_cluster(cluster.info, user.block = 'PROT')
graph.prot <-  get_interaction_from_database(X = cluster.info.prot, 
                                             db = hmp_T2D$interaction.biogrid, 
                                             type = "PROT", user.ego = TRUE)
# get_graph_stats(graph.prot)


## ----GUT_graph, eval = FALSE--------------------------------------------------
## # not evaluated in this vignette
## library(SpiecEasi)
## 
## get_sparcc_graph <- function(X, threshold = 0.3){
##     res.sparcc <- sparcc(data = X)
##     sparcc.graph <- abs(res.sparcc$Cor) >= threshold
##     colnames(sparcc.graph) <-  colnames(X)
##     rownames(sparcc.graph) <-  colnames(X)
##     res.graph <- graph_from_adjacency_matrix(sparcc.graph,
##                                              mode = "undirected") %>% simplify
##     return(res.graph)
## }
## 
## gut_list <- get_list_mol_cluster(cluster.info, user.block = 'GUT')
## 
## graph.gut <- list()
## graph.gut[["All"]] <- get_sparcc_graph(hmp_T2D$raw$GUT, threshold = 0.3)
## graph.gut[["1"]] <- get_sparcc_graph(hmp_T2D$raw$GUT %>%
##                                        dplyr::select(gut_list[["1"]]),
##                                      threshold = 0.3)
## graph.gut[["-1"]] <- get_sparcc_graph(hmp_T2D$raw$GUT %>%
##                                         dplyr::select(gut_list[["-1"]]),
##                                       threshold = 0.3)
## class(graph.gut) <- "list.igraph"


## ----GUT----------------------------------------------------------------------
graph.gut <- hmp_T2D$graph.gut
# get_graph_stats(graph.gut)


## ----CYTO_graph, warning=FALSE------------------------------------------------
# CYTO -> from database (biogrid)
cyto_list = get_list_mol_cluster(cluster.info = cluster.info, 
                                 user.block = "CYTO")
graph.cyto <-  get_interaction_from_database(X = cyto_list,
                                             db = hmp_T2D$interaction.biogrid, 
                                             type = "CYTO", user.ego = TRUE)
# get_graph_stats(graph.cyto)

# METAB -> inference
cluster.info.metab <-  timeOmics::getCluster(X = cluster.info, 
                                             user.block = "METAB")
graph.metab <-  get_grn(X = hmp_T2D$data$METAB, 
                        cluster = cluster.info.metab)
# get_graph_stats(graph.metab)

# CLINICAL -> inference
cluster.info.clinical <- timeOmics::getCluster(X = cluster.info, 
                                               user.block = 'CLINICAL')
graph.clinical <- get_grn(X = hmp_T2D$data$CLINICAL,
                          cluster = cluster.info.clinical)
# get_graph_stats(graph.clinical)


## ----merged_0-----------------------------------------------------------------
full.graph <- combine_layers(graph1 = graph.rna, graph2 = graph.prot)
full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.cyto)

full.graph <- combine_layers(graph1 = full.graph,
                             graph2 = hmp_T2D$interaction.TF)
# get_graph_stats(full.graph)


## ----merged_1_gut, warning=FALSE----------------------------------------------
all_data <- reduce(hmp_T2D$data, cbind)

# omic = gut
gut_list <- get_list_mol_cluster(cluster.info, user.block = "GUT")
omic_data <- lapply(gut_list, function(x)dplyr::select(hmp_T2D$data$GUT, x))

# other data = "RNA", "PROT", "CYTO"
other_data_list <- get_list_mol_cluster(cluster.info,
                                        user.block = c("RNA", "PROT", "CYTO"))
other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x))

# get interaction between gut data and other data
interaction_df_gut <- get_interaction_from_correlation(X = omic_data,
                                                       Y = other_data,
                                                       threshold = 0.99)

# and merge with full graph
full.graph <- combine_layers(graph1 = full.graph,
                             graph2 = graph.gut,
                             interaction.df = interaction_df_gut$All)


## ----merged_2_clinical, warning=FALSE-----------------------------------------
# omic =  Clinical
clinical_list <- get_list_mol_cluster(cluster.info, user.block = "CLINICAL")
omic_data <- lapply(clinical_list, 
                    function(x)dplyr::select(hmp_T2D$data$CLINICAL, x))

# other data = "RNA", "PROT", "CYTO", "GUT"
other_data_list <- get_list_mol_cluster(cluster.info,
                                        user.block = c("RNA", "PROT", 
                                                       "CYTO", "GUT"))
other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x))


# get interaction between gut data and other data
interaction_df_clinical <- get_interaction_from_correlation(X = omic_data
                                                            , Y = other_data,
                                                            threshold = 0.99)

# and merge with full graph
full.graph <- combine_layers(graph1 = full.graph,
                             graph2 = graph.clinical, 
                             interaction.df = interaction_df_clinical$All)


## ----merged_3_metab, warning=FALSE--------------------------------------------
# omic =  Metab
metab_list <- get_list_mol_cluster(cluster.info, user.block = "METAB")
omic_data <- lapply(metab_list, function(x)dplyr::select(hmp_T2D$data$METAB, x))

# other data = "RNA", "PROT", "CYTO", "GUT", "CLINICAL"
other_data_list <- get_list_mol_cluster(cluster.info,
                                        user.block = c("RNA", "PROT", "CYTO", 
                                                       "GUT", "CLINICAL"))
other_data <- lapply(other_data_list, function(x)dplyr::select(all_data, x))

# get interaction between gut data and other data
interaction_df_metab <- get_interaction_from_correlation(X = omic_data,
                                                         Y = other_data, 
                                                         threshold = 0.99)

# and merge with full graph
full.graph <- combine_layers(graph1 = full.graph, 
                             graph2 = graph.metab, 
                             interaction.df = interaction_df_metab$All)


## -----------------------------------------------------------------------------
# ORA by cluster/All
mol_ora <- get_list_mol_cluster(cluster.info, 
                                user.block = c("RNA", "PROT", "CYTO"))

# get ORA interaction graph by cluster
graph.go <- get_interaction_from_ORA(query = mol_ora,
                                     sources = "GO",
                                     organism = "hsapiens",
                                     signif.value = TRUE)

# merge
full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.go)


## -----------------------------------------------------------------------------
# medlineRanker -> database
medlineranker.res.df <- hmp_T2D$medlineranker.res.df %>% 
  dplyr::select(Disease, symbol) %>% 
  set_names(c("from", "to"))
  
mol_list <-  get_list_mol_cluster(cluster.info = cluster.info,
                                  user.block = c("RNA", "PROT", "CYTO"))
graph.medlineranker <-  get_interaction_from_database(X = mol_list,
                                                      db = medlineranker.res.df, 
                                                      type = "Disease",
                                                      user.ego = TRUE)
# get_graph_stats(graph.medlineranker)

# merging
full.graph <- combine_layers(graph1 = full.graph, graph2 = graph.medlineranker)


## -----------------------------------------------------------------------------
# graph cleaning
graph_cleaning <- function(X, cluster.info){
    # no reusability
    X <- igraph::simplify(X)
    va <- vertex_attr(X)
    viewed_mol <- c()
    for(omic in unique(cluster.info$block)){
        mol <- intersect(cluster.info %>% dplyr::filter(.$block == omic) %>%
                           pull(molecule), V(X)$name)
        viewed_mol <- c(viewed_mol, mol)
        X <- set_vertex_attr(graph = X, 
                             name = "type", 
                             index = mol, 
                             value = omic)
        X <- set_vertex_attr(graph = X, 
                             name = "mode",
                             index = mol,
                             value = "core")
    }
    # add medline ranker and go
    mol <- intersect(map(graph.go, ~ as_data_frame(.x)$to) %>%
                       unlist %>% unique(), V(X)$name) # only GO terms
    viewed_mol <- c(viewed_mol, mol)
    X <- set_vertex_attr(graph = X, name = "type", index = mol, value = "GO")
    X <- set_vertex_attr(graph = X, name = "mode", 
                         index = mol, value = "extended")
    
    mol <- intersect(as.character(medlineranker.res.df$from), V(X)$name)
    viewed_mol <- c(viewed_mol, mol)
    X <- set_vertex_attr(graph = X, name = "type",
                         index = mol, value = "Disease")
    X <- set_vertex_attr(graph = X, name = "mode",
                         index = mol, value = "extended")
    
    other_mol <- setdiff(V(X), viewed_mol)
    if(!is_empty(other_mol)){
        X <- set_vertex_attr(graph = X, name = "mode",
                             index = other_mol, value = "extended")
    }
    X <- set_vertex_attr(graph = X, name = "mode", 
                         index = intersect(cluster.info$molecule, V(X)$name), 
                         value = "core")
    
    # signature
    mol <-  intersect(V(X)$name, hmp_T2D$getCluster.sparse.res$molecule)
    X <- set_vertex_attr(graph = X, name = "sparse", index = mol, value = TRUE)
    mol <-  setdiff(V(X)$name, hmp_T2D$getCluster.sparse.res$molecule)
    X <- set_vertex_attr(graph = X, name = "sparse", index = mol, value = FALSE)
    
    return(X)
}


## -----------------------------------------------------------------------------
FULL <- lapply(full.graph, function(x) graph_cleaning(x, cluster.info))
get_graph_stats(FULL)


## ----eval = FALSE-------------------------------------------------------------
## # degree analysis
## d <- degree(FULL$All)
## hist(d)
## d[max(d)]
## 
## # modularity # Warnings: can take several minutes
## res.mod <- walktrap.community(FULL$All)
## # ...
## 
## # modularity
## sp <- shortest.paths(FULL$All)


## -----------------------------------------------------------------------------
# seeds = all vertices -> takes 5 minutes to run on regular computer
# seeds <- V(FULL$All)$name
# rwr_res <- random_walk_restart(FULL, seeds)

# seed = some GO terms
seeds <- head(V(FULL$All)$name[V(FULL$All)$type == "GO"])
rwr_res <- random_walk_restart(FULL, seeds)


## -----------------------------------------------------------------------------
rwr_type_k15 <- rwr_find_seeds_between_attributes(X = rwr_res, 
                                                  attribute = "type", k = 15)

# a summary plot function
summary_plot_rwr_attributes(rwr_type_k15)
summary_plot_rwr_attributes(rwr_type_k15$All)


## -----------------------------------------------------------------------------
rwr_type_k15 <- rwr_find_seeds_between_attributes(X = rwr_res$All, 
                                                  attribute = "cluster", k = 15)
summary_plot_rwr_attributes(rwr_type_k15)


## -----------------------------------------------------------------------------
sub_res <- rwr_type_k15$`GO:0005737`
sub <- plot_rwr_subnetwork(sub_res, legend = TRUE, plot = TRUE)


## -----------------------------------------------------------------------------
rwr_res <- random_walk_restart(FULL$All, seed = "ZNF263")

# closest GO term
rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "type", 
                      value = "GO", top = 5)

# closest Disease
rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "type", 
                      value = "Disease", top = 5)

# closest nodes with an attribute "cluster" and the value "-1"
rwr_find_closest_type(rwr_res, seed = "ZNF263", attribute = "cluster",
                      value = "-1", top = 5)


## ----eval = FALSE-------------------------------------------------------------
## seeds <- V(FULL$All)$name[V(FULL$All)$type %in% c("GO", "Disease")]


## -----------------------------------------------------------------------------
sessionInfo()
abodein/netOmics documentation built on April 16, 2024, 2:59 p.m.