knitr::opts_chunk$set(echo = TRUE)

This article shows how to install PItools package and download protein interaction data for one species (for example, human), 2 species/taxomony groups (for example, human and viruses) and how to filter this data by interaction detection method, publication ID (PMID) or list of protein IDs (UniprotKB).

Install PItools

if(!"PItools" %in% installed.packages()){
  # Install R PItools package
  install.packages("BiocManager") # for installing BioConductor dependencies
  BiocManager::install("vitkl/PItools", dependencies = T)
}
suppressPackageStartupMessages({
  library(PItools)
})

Load all interactions between human protein

Interactome search by species is taxonomy-hierarchy aware.

human = fullInteractome(taxid = 9606, database = "IntActFTP",
                        format = "tab27", clean = TRUE,
                        protein_only = TRUE,
                        directory = NULL) # keep data files inside R library - default

Load all human-viral interactions

10239 is the top viral clade identifier.

human_viral = interSpeciesInteractome(taxid1 = 9606,  taxid2 = 10239,
                                      database = "IntActFTP", format = "tab27",
                                      clean = TRUE, protein_only = TRUE)

Load all human-mouse interactions

human_mouse = interSpeciesInteractome(taxid1 = 9606,  taxid2 = 10090,
                                      database = "IntActFTP", format = "tab27",
                                      clean = TRUE, protein_only = TRUE)

Count interactors and interactions

uniqueNinteractions(human)
uniqueNinteractors(human)

uniqueNinteractions(human_viral)
uniqueNinteractors(human_viral)

uniqueNinteractions(human_mouse)
uniqueNinteractors(human_mouse)

filter human-human data by detection method and recount interactors and interactions

# subset two-hybrid interactions
human_two_hybrid = subsetMITABbyMethod(MITABdata = human,
               Interaction_detection_methods = "MI:0018")
uniqueNinteractions(human_two_hybrid)
uniqueNinteractors(human_two_hybrid)

# subset all interactions but two-hybrid
human_NOT_two_hybrid = subsetMITABbyMethod(MITABdata = human,
                   Interaction_detection_methods = "MI:0018", inverse_filter = T)
uniqueNinteractions(human_NOT_two_hybrid)
uniqueNinteractors(human_NOT_two_hybrid)

# subset affinity purification - mass spectrometry interactions
human_AP_MS = subsetMITABbyMethod(MITABdata = human,
               Interaction_detection_methods = "MI:0004",  Identification_method = "MI:0433")
uniqueNinteractions(human_AP_MS)
uniqueNinteractors(human_AP_MS)

filter human-human data by PMID and recount interactors and interactions

# subset both published and unpublished Vidal group data
Vidal_all = subsetMITABbyPMIDs(MITABdata = human,
               PMIDs = c("25416956", "unassigned1304"))
uniqueNinteractions(Vidal_all)
uniqueNinteractors(Vidal_all)

# subset Mattias Mann 2015 paper data
Mann = subsetMITABbyPMIDs(MITABdata = human,
               PMIDs = "26496610")
uniqueNinteractions(Mann)
uniqueNinteractors(Mann)

You can get help and more details on these functions (for example, how to find which molecular ontology terms correspond to which methods): ?subsetMITABbyMethod

Find interactions between components of the mediator complex in the Vidal dataset

mediator_complex_proteins = fread("https://www.uniprot.org/uniprot/?query=GO:0016592%20AND%20taxonomy:9606&format=tab&columns=id")
mediator_complex = subsetMITABbyID(Vidal_all,
                                   ID_seed = mediator_complex_proteins$Entry,
                                   within_seed = T, only_seed2nonseed = F)
uniqueNinteractions(mediator_complex)
uniqueNinteractors(mediator_complex)

Find interactions of the components of the mediator complex with other proteins

mediator_complex_interactions = subsetMITABbyID(Vidal_all,
                                   ID_seed = mediator_complex_proteins$Entry,
                                   within_seed = F, only_seed2nonseed = T)
uniqueNinteractions(mediator_complex_interactions)
uniqueNinteractors(mediator_complex_interactions)

2.3 Advanced search

This can be useful in you need interactions for a small number of proteins or if you want to query non-IMEx databases. Note that queryPSICQUIC doesn't keep track of database version data, while queryPSICQUICrlib does.

# Query for interactions of bacterial RNA polymerase sigma factor SigA identified using two-hybrid methods in all imex databases
queryPSICQUIC(query = "id:P74565 AND detmethod:\"MI:0018\"",
              format = "tab27",
              database = "imex",
              file = "./P74565_2H_interactions_imex_tab27.tsv")

# Query for interactions of sigma factor SigA identified using two-hybrid methods in mentha (a database that aggregates data from all primary databases, but does no interaction predition)
queryPSICQUIC(query = "id:P74565 AND detmethod:\"MI:0018\"",
              format = "tab25",
              database = "mentha",
              file = "./P74565_2H_interactions_mentha_tab25.tsv")

# Query for interactions of sigma factor SigA in mentha
queryPSICQUIC(query = "id:P74565",
              format = "tab25",
              database = "mentha",
              file = "./P74565_2H_interactions_mentha_tab25.tsv")

# Retrieve interaction of any proteins encoded by Beta-adrenergic receptor kinase 1 gene (Entrez GeneID 156) from BioGRID (which recognises only this type of ID)
queryPSICQUIC(query = "id:156",
              format = "tab25",
              database = "BioGrid",
              file = "./entrezgene156_interactions_BioGrid_tab25.tsv")
# The function return the report of how many interaction were found in each database, not the data itself. Reading data into R.
fread("./entrezgene156_interactions_BioGrid_tab25.tsv", header = T, stringsAsFactors = F)[1:5]

All the same operations can be done using function queryPSICQUICrlib but with the convienience of automatic tracking of database release date and the exact query text. This function also return the data in object of class RAW_MItab that after cleaned make data ready for use with other tools in the package.

BioGrid_156 = queryPSICQUICrlib(query = "id:156",
                                format = "tab25",
                                database = "BioGrid",
                                directory = "./")
# The same protein, but only two-hybrid interactions
BioGrid_156_2H = queryPSICQUICrlib(query = "id:156 AND detmethod:\"MI:0018\"",
                                   format = "tab25",
                                   database = "BioGrid",
                                   directory = "./")
# The data returned by queryPSICQUICrlib constains auxillary information that is not necessary for most analysis. Let's clean the data.
BioGrid_156 = cleanMITAB(BioGrid_156)
BioGrid_156$data[1:5]

Date and packages used

Sys.Date. = Sys.Date()
Sys.Date.
session_info. = devtools::session_info()
session_info.


vitkl/MItools documentation built on May 29, 2019, 2:55 p.m.