omnipath_intro.R
In OmnipathR: OmniPath web service client

## ---- fig1, dpi=300, fig.width=10, fig.height=10, fig.cap="Overview of the resources featured in OmniPath. Causal resources (including activity-flow and enzyme-substrate resources) can provide direction (*) or sign and direction (+) of interactions.", echo=FALSE----
library(knitr)
knitr::include_graphics("../man/figures/page1_1.png")

## ----installation, eval=FALSE-------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("OmnipathR")

## ----libraries, message=FALSE-------------------------------------------------
library(OmnipathR)
library(tidyr)
library(dnet)
library(gprofiler2)

## ----interactions-------------------------------------------------------------
## We check some of the different interaction databases
get_interaction_resources()

## The interactions are stored into a data frame.
interactions <-
    import_omnipath_interactions(resources=c("SignaLink3","PhosphoSite",
    "SIGNOR"))

## We visualize the first interactions in the data frame.
print_interactions(head(interactions))

## ----sp, message=TRUE---------------------------------------------------------
## We transform the interactions data frame into a graph
OPI_g <- interaction_graph(interactions = interactions)

## Find and print shortest paths on the directed network between proteins
## of interest:
print_path_es(shortest_paths(OPI_g,from = "TYRO3",to = "STAT3",
    output = 'epath')$epath[[1]],OPI_g)

## Find and print all shortest paths between proteins of interest:
print_path_vs(all_shortest_paths(OPI_g,from = "DYRK2",
    to = "MAPKAPK2")$res,OPI_g)

## ----clustering, message=FALSE------------------------------------------------
## We apply a clustering algorithm (Louvain) to group proteins in
## our network. We apply here Louvain which is fast but can only run
## on undirected graphs. Other clustering algorithms can deal with
## directed networks but with longer computational times,
## such as cluster_edge_betweenness. These cluster methods are directly
## available in the igraph package.
OPI_g_undirected <- as.undirected(OPI_g, mode=c("mutual"))
OPI_g_undirected <- simplify(OPI_g_undirected)
cl_results <- cluster_fast_greedy(OPI_g_undirected)
## We extract the cluster where a protein of interest is contained
cluster_id <- cl_results$membership[which(cl_results$names == "ERBB2")]
module_graph <- induced_subgraph(OPI_g_undirected,
    V(OPI_g)$name[which(cl_results$membership == cluster_id)])

## ----pathwayextra-------------------------------------------------------------
## We query and store the interactions into a dataframe
interactions <-
    import_pathwayextra_interactions(resources=c("BioGRID","STRING"),
    organism = 10090)

## We select all the interactions in which Amfr gene is involved
interactions_Amfr <- dplyr::filter(interactions, source_genesymbol == "Amfr" |
    target_genesymbol == "Amfr")

## We print these interactions:
print_interactions(interactions_Amfr)

## ----kinaseextra--------------------------------------------------------------
## We query and store the interactions into a dataframe
interactions <-
    import_kinaseextra_interactions(resources=c("PhosphoPoint",
    "PhosphoSite"), organism = 10116)

## We select the interactions in which Dpysl2 gene is a target
interactions_TargetDpysl2 <- dplyr::filter(interactions,
    target_genesymbol == "Dpysl2")

## We print these interactions:
print_interactions(interactions_TargetDpysl2)

## ----ligrecextra--------------------------------------------------------------
## We query and store the interactions into a dataframe
interactions <- import_ligrecextra_interactions(resources=c("iTALK",
    "Baccin2019"), organism=9606)

## Receptors of the CDH1 ligand.
interactions_ADM2 <- dplyr::filter(interactions, source_genesymbol == "ADM2")

## We transform the interactions data frame into a graph
OPI_g <- interaction_graph(interactions = interactions_ADM2)

## We induce a network with these genes
Induced_Network <-  dNetInduce(g=OPI_g,
    nodes_query=as.character( V(OPI_g)$name), knn=0,
    remove.loops=FALSE, largest.comp=FALSE)

## ----dorothea-----------------------------------------------------------------
## We query and store the interactions into a dataframe
interactions <- import_dorothea_interactions(
    resources=c("DoRothEA"),
    dorothea_levels = 'A',
    organism=9606
)

## We select the most confident interactions for a given TF and we print
## the interactions to check the way it regulates its different targets
interactions_A_GLI1  <- dplyr::filter(interactions, dorothea_level=="A",
    source_genesymbol == "GLI1")
print_interactions(interactions_A_GLI1)

## ----mirnatarget--------------------------------------------------------------
## We query and store the interactions into a dataframe
interactions <-
  import_mirnatarget_interactions(resources=c("miRTarBase","miRecords"))

## We select the interactions where a miRNA is interacting with the TF
## used in the previous code chunk and we print these interactions.
interactions_miRNA_GLI1 <-
    dplyr::filter(interactions,  target_genesymbol == "GLI1")
print_interactions(interactions_miRNA_GLI1)

## We transform the previous selections to graphs (igraph objects)
OPI_g_1 <-interaction_graph(interactions = interactions_A_GLI1)
OPI_g_2 <-interaction_graph(interactions = interactions_miRNA_GLI1)

## ----PTMs---------------------------------------------------------------------
## We check the different PTMs databases
get_enzsub_resources()

## We query and store the ptms into a dataframe. No filtering by
## databases in this case.
ptms <- import_omnipath_enzsub()

## We can select and print the reactions between a specific kinase and
## a specific substrate
print_interactions(dplyr::filter(ptms,enzyme_genesymbol=="MAP2K1",
    substrate_genesymbol=="MAPK3"))

## In the previous results, we can see that ptms does not contain sign
## (activation/inhibition). We can generate this information based on the
## protein-protein OmniPath interaction dataset.
interactions <- import_omnipath_interactions()
ptms <- get_signed_ptms(ptms, interactions)

## We select again the same kinase and substrate. Now we have information
## about inhibition or activation when we print the ptms
print_interactions(dplyr::filter(ptms,enzyme_genesymbol=="MAP2K1",
    substrate_genesymbol=="MAPK3"))

## We can also transform the ptms into a graph.
ptms_g <- ptms_graph(ptms = ptms)

## We download PTMs for mouse
ptms <- import_omnipath_enzsub(resources=c("PhosphoSite", "SIGNOR"),
    organism=10090)

## ----complexes----------------------------------------------------------------
## We check the different complexes databases
get_complex_resources()

## We query and store complexes from some sources into a dataframe.
complexes <- import_omnipath_complexes(resources=c("CORUM", "hu.MAP"))

## We check all the molecular complexes where a set of genes participate
query_genes <- c("WRN","PARP1")

## Complexes where any of the input genes participate
complexes_query_genes_any <- unique(get_complex_genes(complexes,query_genes,
    total_match=FALSE))

## We print the components of the different selected components
head(complexes_query_genes_any$components_genesymbols,6)

## Complexes where all the input genes participate jointly
complexes_query_genes_join <- unique(get_complex_genes(complexes,query_genes,
    total_match=TRUE))

## We print the components of the different selected components
complexes_query_genes_join$components_genesymbols

## ----enrichment---------------------------------------------------------------
genes_complex <-
  unlist(strsplit(complexes_query_genes_join$components_genesymbols, "_"))

## We can perform an enrichment analyses with the genes in the complex
EnrichmentResults <- gost(genes_complex, significant = TRUE,
    user_threshold = 0.001, correction_method = c("fdr"),
    sources=c("GO:BP","GO:CC","GO:MF"))

## We show the most significant results
EnrichmentResults$result %>%
  dplyr::select(term_id, source, term_name,p_value) %>%
  dplyr::top_n(5,-p_value)

## ----complex_annotations------------------------------------------------------
## We check the different annotation databases
get_annotation_resources()

## We can further investigate the features of the complex selected
## in the previous section.

## We first get the annotations of the complex itself:
annotations <- import_omnipath_annotations(proteins=paste0("COMPLEX:",
  complexes_query_genes_join$components_genesymbols))

head(dplyr::select(annotations,source,label,value),10)

## ----annotations_components---------------------------------------------------
## Then, we explore some annotations of its individual components

## Pathways where the proteins belong:
annotations <- import_omnipath_annotations(proteins=genes_complex,
    resources=c("NetPath"))

dplyr::select(annotations,genesymbol,value)

## Cellular localization of our proteins
annotations <-import_omnipath_annotations(proteins=genes_complex,
   resources=c("ComPPI"))

## Since we have same record_id for some results of our query, we spread
## these records across columns
spread(annotations, label, value) %>%
    dplyr::arrange(desc(score)) %>%
    dplyr::top_n(10, score)

## ----intercell----------------------------------------------------------------
## We check some of the different intercell categories
get_intercell_generic_categories()

## We import the intercell data into a dataframe
intercell <- import_omnipath_intercell(scope = 'generic',
    aspect = 'locational')

## We check the intercell annotations for the individual components of
## our previous complex. We filter our data to print it in a good format
dplyr::filter(intercell,genesymbol %in% genes_complex) %>%
    dplyr::distinct(genesymbol, parent, .keep_all = TRUE) %>%
    dplyr::select(category, genesymbol, parent) %>%
    dplyr::arrange(genesymbol)

## We close graphical connections
while (!is.null(dev.list()))  dev.off()

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()