knitr::opts_chunk$set(echo = TRUE)
This article shows how to install PItools package and download protein interaction data for one species (for example, human), 2 species/taxomony groups (for example, human and viruses) and how to filter this data by interaction detection method, publication ID (PMID) or list of protein IDs (UniprotKB).
if(!"PItools" %in% installed.packages()){ # Install R PItools package install.packages("BiocManager") # for installing BioConductor dependencies BiocManager::install("vitkl/PItools", dependencies = T) } suppressPackageStartupMessages({ library(PItools) })
Interactome search by species is taxonomy-hierarchy aware.
human = fullInteractome(taxid = 9606, database = "IntActFTP", format = "tab27", clean = TRUE, protein_only = TRUE, directory = NULL) # keep data files inside R library - default
10239 is the top viral clade identifier.
human_viral = interSpeciesInteractome(taxid1 = 9606, taxid2 = 10239, database = "IntActFTP", format = "tab27", clean = TRUE, protein_only = TRUE)
human_mouse = interSpeciesInteractome(taxid1 = 9606, taxid2 = 10090, database = "IntActFTP", format = "tab27", clean = TRUE, protein_only = TRUE)
uniqueNinteractions(human) uniqueNinteractors(human) uniqueNinteractions(human_viral) uniqueNinteractors(human_viral) uniqueNinteractions(human_mouse) uniqueNinteractors(human_mouse)
# subset two-hybrid interactions human_two_hybrid = subsetMITABbyMethod(MITABdata = human, Interaction_detection_methods = "MI:0018") uniqueNinteractions(human_two_hybrid) uniqueNinteractors(human_two_hybrid) # subset all interactions but two-hybrid human_NOT_two_hybrid = subsetMITABbyMethod(MITABdata = human, Interaction_detection_methods = "MI:0018", inverse_filter = T) uniqueNinteractions(human_NOT_two_hybrid) uniqueNinteractors(human_NOT_two_hybrid) # subset affinity purification - mass spectrometry interactions human_AP_MS = subsetMITABbyMethod(MITABdata = human, Interaction_detection_methods = "MI:0004", Identification_method = "MI:0433") uniqueNinteractions(human_AP_MS) uniqueNinteractors(human_AP_MS)
# subset both published and unpublished Vidal group data Vidal_all = subsetMITABbyPMIDs(MITABdata = human, PMIDs = c("25416956", "unassigned1304")) uniqueNinteractions(Vidal_all) uniqueNinteractors(Vidal_all) # subset Mattias Mann 2015 paper data Mann = subsetMITABbyPMIDs(MITABdata = human, PMIDs = "26496610") uniqueNinteractions(Mann) uniqueNinteractors(Mann)
You can get help and more details on these functions (for example, how to find which molecular ontology terms correspond to which methods): ?subsetMITABbyMethod
mediator_complex_proteins = fread("https://www.uniprot.org/uniprot/?query=GO:0016592%20AND%20taxonomy:9606&format=tab&columns=id") mediator_complex = subsetMITABbyID(Vidal_all, ID_seed = mediator_complex_proteins$Entry, within_seed = T, only_seed2nonseed = F) uniqueNinteractions(mediator_complex) uniqueNinteractors(mediator_complex)
mediator_complex_interactions = subsetMITABbyID(Vidal_all, ID_seed = mediator_complex_proteins$Entry, within_seed = F, only_seed2nonseed = T) uniqueNinteractions(mediator_complex_interactions) uniqueNinteractors(mediator_complex_interactions)
This can be useful in you need interactions for a small number of proteins or if you want to query non-IMEx databases. Note that queryPSICQUIC doesn't keep track of database version data, while queryPSICQUICrlib does.
# Query for interactions of bacterial RNA polymerase sigma factor SigA identified using two-hybrid methods in all imex databases queryPSICQUIC(query = "id:P74565 AND detmethod:\"MI:0018\"", format = "tab27", database = "imex", file = "./P74565_2H_interactions_imex_tab27.tsv") # Query for interactions of sigma factor SigA identified using two-hybrid methods in mentha (a database that aggregates data from all primary databases, but does no interaction predition) queryPSICQUIC(query = "id:P74565 AND detmethod:\"MI:0018\"", format = "tab25", database = "mentha", file = "./P74565_2H_interactions_mentha_tab25.tsv") # Query for interactions of sigma factor SigA in mentha queryPSICQUIC(query = "id:P74565", format = "tab25", database = "mentha", file = "./P74565_2H_interactions_mentha_tab25.tsv") # Retrieve interaction of any proteins encoded by Beta-adrenergic receptor kinase 1 gene (Entrez GeneID 156) from BioGRID (which recognises only this type of ID) queryPSICQUIC(query = "id:156", format = "tab25", database = "BioGrid", file = "./entrezgene156_interactions_BioGrid_tab25.tsv") # The function return the report of how many interaction were found in each database, not the data itself. Reading data into R. fread("./entrezgene156_interactions_BioGrid_tab25.tsv", header = T, stringsAsFactors = F)[1:5]
All the same operations can be done using function queryPSICQUICrlib but with the convienience of automatic tracking of database release date and the exact query text. This function also return the data in object of class RAW_MItab that after cleaned make data ready for use with other tools in the package.
BioGrid_156 = queryPSICQUICrlib(query = "id:156", format = "tab25", database = "BioGrid", directory = "./") # The same protein, but only two-hybrid interactions BioGrid_156_2H = queryPSICQUICrlib(query = "id:156 AND detmethod:\"MI:0018\"", format = "tab25", database = "BioGrid", directory = "./") # The data returned by queryPSICQUICrlib constains auxillary information that is not necessary for most analysis. Let's clean the data. BioGrid_156 = cleanMITAB(BioGrid_156) BioGrid_156$data[1:5]
Sys.Date. = Sys.Date() Sys.Date. session_info. = devtools::session_info() session_info.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.