R/filterSif.R

Defines functions filterSif

Documented in filterSif

#' Keep interactions in SIF network based on certain criteria
#' 
#' @param sif a binary SIF as a data.frame with three columns: 
#'   "PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B"
#' @param ids a vector of IDs to be kept
#' @param interactionTypes a vector of interaction types to be kept
#'   (List of interaction types: http://www.pathwaycommons.org/pc2/formats)
#' @param dataSources a vector of data sources to be kept. For Extended SIF.
#' @param interactionPubmedIds a vector of Pubmed IDs to be kept. For Extended SIF.
#' @param pathwayNames a vector of pathway names to be kept. For Extended SIF.
#' @param mediatorIds a vector of mediator IDs to be kept. For Extended SIF.
#'   Mediator IDs are the full BioPAX objects that were simplified to interaction 
#'   given in the SIF. For Extended SIF.
#' @param edgelist a two-column data.frame where each row is an interaction to be kept.
#'   Directionality is ignored (e.g. Edge A B will return interactions A B and B A from SIF)
#' @param idsBothParticipants a boolean whether both interaction participants should be in 
#'   a given interaction when using the ids parameter; TRUE if both (DEFAULT: TRUE)
#' @param edgelistCheckReverse a boolean whether to check for edges in the reverse order (DEFAULT: TRUE)
#' @param verbose Show debugging information (DEFAULT: FALSE)
#'  
#' @return filtered interactions with three columns: "PARTICIPANT_A", "INTERACTION_TYPE", "PARTICIPANT_B". 
#'   The intersection of multiple filters is returned.
#' 
#' @examples 
#' results <- readSif(system.file("extdata", "test_sif.txt", package="paxtoolsr"))
#' intTypes <- c("controls-state-change-of", "controls-expression-of", "catalysis-precedes")
#' filteredNetwork <- filterSif(results, intTypes)
#' 
#' tmp <- readSifnx(system.file("extdata", "test_sifnx_250.txt", package = "paxtoolsr"))
#' results <- filterSif(tmp$edges, ids=c("CHEBI:17640", "MCM3"))
#' results <- filterSif(tmp$edges, dataSources=c("INOH", "KEGG"))
#' results <- filterSif(tmp$edges, dataSources=c("IntAct"), ids=c("CHEBI:17640", "MCM3"))
#' results <- filterSif(tmp$edges, pathwayNames=c("Metabolic pathways"))
#' results <- filterSif(tmp$edges, 
#'   mediatorIds=c("http://purl.org/pc2/8/MolecularInteraction_1452626895158"))
#' results <- filterSif(tmp$edges, interactionPubmedId="17654400")
#' 
#' tmp <- readSifnx(system.file("extdata", "test_sifnx_250.txt", package = "paxtoolsr"))
#' edgelist <- read.table(system.file("extdata", "test_edgelist.txt", package = "paxtoolsr"), 
#'   sep="\t", header=FALSE, stringsAsFactors=FALSE)
#' results <- filterSif(tmp$edges, edgelist=edgelist)
#' 
#' @concept paxtoolsr
#' @export
filterSif <- function(sif, ids=NULL, interactionTypes=NULL, dataSources=NULL, interactionPubmedIds=NULL, pathwayNames=NULL, mediatorIds=NULL, edgelist=NULL, idsBothParticipants=FALSE, edgelistCheckReverse=TRUE, verbose=FALSE) {
    idxList <- NULL
    
    if(!is.null(ids)) {
        aIdx <- which(sif$PARTICIPANT_A %in% ids) 
        bIdx <- which(sif$PARTICIPANT_B %in% ids) 
        
        if(idsBothParticipants) {
            idxIds <- intersect(aIdx, bIdx)
        } else {
            idxIds <- unique(c(aIdx, bIdx))
        }
        
        #cat("II: ", paste(idxIds, collapse=","), "\n")
        idxList[["idxIds"]] <- idxIds
    } 
    
    if(!is.null(interactionTypes)) {
        idxInteractionTypes <- which(sif$INTERACTION_TYPE %in% interactionTypes) 
        
        #cat("IIT: ", paste(idxInteractionTypes, collapse=","), "\n")
        idxList[["idxInteractionTypes"]] <- idxInteractionTypes
    } 
    
    if(!is.null(dataSources)) {
        results <- searchListOfVectors(dataSources, sif$INTERACTION_DATA_SOURCE)
        
        idxDataSources <- unique(unlist(results))
        
        #cat("IDS: ", paste(idxDataSources, collapse=","), "\n")
        idxList[["idxDataSources"]] <- idxDataSources
    } 
    
    if(!is.null(interactionPubmedIds)) {
        idxInteractionPubmedId <- which(sif$INTERACTION_PUBMED_ID %in% interactionPubmedIds) 
        
        #cat("IIT: ", paste(idxInteractionPubmedId, collapse=","), "\n")
        idxList[["idxInteractionPubmedId"]] <- idxInteractionPubmedId
    }
    
    if(!is.null(pathwayNames)) {
        idxPathwayNames <- which(sif$PATHWAY_NAMES %in% pathwayNames) 
        
        #cat("IIT: ", paste(idxPathwayNames, collapse=","), "\n")
        idxList[["idxPathwayNames"]] <- idxPathwayNames
    } 
    
    if(!is.null(mediatorIds)) {
        results <- searchListOfVectors(mediatorIds, sif$MEDIATOR_IDS)
        
        idxMediatorIds <- unique(unlist(results))
        
        #cat("IIT: ", paste(idxMediatorIds, collapse=","), "\n")
        idxList[["idxMediatorIds"]] <- idxMediatorIds
    } 
    
    if(!is.null(edgelist)) {
        aIdx <- which(sif$PARTICIPANT_A %in% edgelist[,1]) 
        bIdx <- which(sif$PARTICIPANT_B %in% edgelist[,2]) 
        idxEdgelist1 <- intersect(aIdx, bIdx)
        
        # Same in reverse
        idxEdgelist2 <- NULL 
        
        if(edgelistCheckReverse) {
            aIdx <- which(sif$PARTICIPANT_A %in% edgelist[,2]) 
            bIdx <- which(sif$PARTICIPANT_B %in% edgelist[,1]) 
            idxEdgelist2 <- intersect(aIdx, bIdx)
        }

        idxEdgelist <- c(idxEdgelist1, idxEdgelist2)
        
        #cat("II: ", paste(idxIds, collapse=","), "\n")
        idxList[["idxEdgelist"]] <- idxEdgelist
    } 

    idx <- Reduce(intersect, idxList)
    
    filteredNetwork <- sif[idx, ]
    
    return(filteredNetwork)
}
BioPAX/paxtoolsr documentation built on Jan. 31, 2023, 4:22 a.m.