############################################################
#
# author: Ludwig Geistlinger
# date: 2021-02-26 08:22:09
#
# descr: obtain BioPlex PPIs
#
############################################################
#' @title Obtain BioPlex protein-protein interaction data
#' @description Functionality for retrieving the BioPlex protein-protein
#' interaction data. Available networks include:
#' \itemize{
#' \item BioPlex 293T cells (versions 1.0, 2.0, and 3.0),
#' \item BioPlex HCT116 cells (version 1.0).
#' } See references.
#' @param cell.line character. Valid options include:
#' \itemize{
#' \item \code{"293T"}: derivative of human embryonic kidney 293 cell line,
#' \item \code{"HCT116"}: human colon cancer cell line 116.
#' }
#' Defaults to \code{"293T"}.
#' @param version character. Valid options include \code{"1.0"}, \code{"2.0"},
#' and \code{"3.0"} for 293T cells. For HCT116 cells, only \code{"1.0"} is
#' available.
#' Defaults to \code{"3.0"}.
#' @param remap.uniprot.ids logical. Should the protein-to-gene mappings from BioPlex
#' (i.e. UNIPROT-to-SYMBOL and UNIPROT-to-ENTREZID) be updated using Bioc annotation
#' functionality?
#' Defaults to \code{FALSE} which will then keep the mappings provided by BioPlex.
#' @param cache logical. Should a locally cached version used if available?
#' Defaults to \code{TRUE}.
#' @return A \code{data.frame}.
#' @references BioPlex: \url{https://bioplex.hms.harvard.edu/interactions.php}
#' @examples
#' # (1) Obtain the latest version of the 293T PPI network
#' bp.293t <- getBioPlex(cell.line = "293T", version = "3.0")
#'
#' # (2) Obtain the latest version of the HCT116 PPI network
#' bp.hct116 <- getBioPlex(cell.line = "HCT116", version = "1.0")
#' @importFrom utils read.delim
#' @export
getBioPlex <- function(cell.line = c("293T", "HCT116"),
version = c("3.0", "1.0", "2.0"),
remap.uniprot.ids = FALSE,
cache = TRUE)
{
bioplex.url <- "https://bioplex.hms.harvard.edu/data/BioPlex_"
cell.line <- match.arg(cell.line)
version <- match.arg(version)
# we only have version 1.0 for HCT116 cells currently
if(cell.line == "HCT116") version <- "1.0"
clver <- paste(cell.line, version, sep = ".")
rname <- paste("bioplex", clver, sep = ".")
# should a cache version be used?
if(cache) ppi.file <- .getResourceFromCache2(rname)
if(!cache || is.null(ppi.file))
{
# get the data
file.ext <- switch(clver,
`293T.1.0` = "interactionList_v2",
`293T.2.0` = "interactionList_v4a",
`293T.3.0` = "293T_Network_10K_Dec_2019",
`HCT116.1.0` = "HCT116_Network_5.5K_Dec_2019",
`293T.1.0d` = "1.0_293T_" )
file.ext <- paste(file.ext, "tsv", sep = ".")
ppi.file <- paste0(bioplex.url, file.ext)
ppi.file <- .cacheResource2(rname, ppi.file)
}
bioplex <- read.delim(ppi.file)
colnames(bioplex) <- c("GeneA", "GeneB",
"UniprotA", "UniprotB",
"SymbolA", "SymbolB",
"pW", "pNI", "pInt")
bioplex$GeneA <- as.character(bioplex$GeneA)
bioplex$GeneB <- as.character(bioplex$GeneB)
# remap gene ids
if(remap.uniprot.ids) bioplex <- .remapUniprotIdsBP(bioplex)
# clean up & cache
return(bioplex)
}
#' @title Representation of BioPlex PPIs in a graph data structure
#' @description Representation of BioPlex PPIs in a \code{graphNEL} object
#' from the \code{graph} package.
#' @param bioplex.df a \code{data.frame} storing the Bioplex PPIs in a flat
#' from-to format. Typically obtained via \code{\link{getBioPlex}}.
#' @return An object of class \code{graphNEL}.
#' @references BioPlex: \url{https://bioplex.hms.harvard.edu/interactions.php}
#' @seealso \code{\link{getBioPlex}}, \code{\link{ftM2graphNEL}}
#' @examples
#' # (1) Obtain the latest version of the 293T PPI network
#' bp.293t <- getBioPlex(cell.line = "293T", version = "3.0")
#'
#' # (2) Turn the data into a graph
#' bp.gr <- bioplex2graph(bp.293t)
#'
#' @export
bioplex2graph <- function(bioplex.df)
{
stopifnot(is.data.frame(bioplex.df))
node.cols <- paste0("Uniprot", c("A", "B"))
ftm <- as.matrix(bioplex.df[,node.cols])
ftm <- sub("-[0-9]+$", "", ftm)
ind <- !duplicated(ftm)
ftm <- ftm[ind,]
bioplex.df <- bioplex.df[ind,]
gr <- graph::ftM2graphNEL(ftm, edgemode = "directed")
gr <- .annotateBioplexGraph(gr, bioplex.df)
return(gr)
}
#' @title Annotate PFAM domains to BioPlex PPI graph
#' @description This function adds PFAM domain annotations to the node metadata
#' of the BioPlex PPI graph.
#' @param bp.gr an object of class \code{\linkS4class{graph}} storing the
#' BioPlex PPIs. Typically obtained via \code{\link{bioplex2graph}}.
#' @param orgdb an \code{orgdb} object storing annotation data for human.
#' @return An object of class \code{graphNEL} containing PFAM domain annotations
#' in the \code{nodeData}.
#' @references
#' BioPlex: \url{https://bioplex.hms.harvard.edu/interactions.php}
#'
#' PFAM: \url{http://pfam.xfam.org}
#' @seealso \code{\link{nodeData}}
#' @examples
#' # (1) Obtain the latest version of the 293T PPI network
#' bp.293t <- getBioPlex(cell.line = "293T", version = "3.0")
#'
#' # (2) Turn the data into a graph
#' bp.gr <- bioplex2graph(bp.293t)
#'
#' # (3) Obtain orgdb package from AnnotationHub
#' ah <- AnnotationHub::AnnotationHub()
#' orgdb <- AnnotationHub::query(ah, c("orgDb", "Homo sapiens"))
#' orgdb <- orgdb[[1]]
#'
#' # (4) Annotate PFAM domains
#' bp.gr <- annotatePFAM(bp.gr, orgdb)
#'
#' @export
annotatePFAM <- function(bp.gr, orgdb)
{
up2pfam <- suppressMessages(AnnotationDbi::mapIds(orgdb,
keys = graph::nodes(bp.gr),
keytype = "UNIPROT",
column = "PFAM",
multiVals = "list"))
graph::nodeDataDefaults(bp.gr, "PFAM") <- NA
graph::nodeData(bp.gr, graph::nodes(bp.gr), "PFAM") <- up2pfam
return(bp.gr)
}
# we will want to annotate these somewhat
# ‘edgeData’: An ‘attrData’ instance for edge attributes.
#
# ‘nodeData’: An ‘attrData’ instance for node attributes.
#
# ‘graphData’: A ‘list’ for graph-level attributes. Only mandatory
# list item is ‘edgemode’ which indicates whether edges are
# ‘"directed"’ or ‘"undirected"’
#
.annotateBioplexGraph <- function(gr, df)
{
# create maps to annotate node data:
# 1. uniprot -> symbol
# 2. uniprot -> entrez
ucols <- paste0("Uniprot", c("A", "B"))
scols <- paste0("Symbol", c("A", "B"))
ecols <- paste0("Gene", c("A", "B"))
iids <- unlist(df[,ucols])
sids <- unlist(df[,scols])
eids <- unlist(df[,ecols])
ind <- !duplicated(iids)
iids <- iids[ind]
uids <- sub("-[0-9]+$", "", iids)
up2sym <- sids[ind]
up2eg <- eids[ind]
names(iids) <- names(up2eg) <- names(up2sym) <- uids
# graph data annotation
# when starting off with a ordinary dfs we'll be losing the ability
# to annotate graph-level annotation such as cell.line, version, PMID, ...
# we might need to work with DataFrames where we have mcols and metadata
# node data annotation
graph::nodeDataDefaults(gr, "ENTREZID") <- NA
graph::nodeData(gr, graph::nodes(gr), "ENTREZID") <- up2eg[graph::nodes(gr)]
graph::nodeDataDefaults(gr, "SYMBOL") <- NA
graph::nodeData(gr, graph::nodes(gr), "SYMBOL") <- up2sym[graph::nodes(gr)]
graph::nodeDataDefaults(gr, "ISOFORM") <- NA
graph::nodeData(gr, graph::nodes(gr), "ISOFORM") <- iids[graph::nodes(gr)]
# edge data annotation
for(col in ucols) df[,col] <- sub("-[0-9]+$", "", df[,col])
for(col in c("pW", "pNI", "pInt"))
{
graph::edgeDataDefaults(gr, col) <- numeric(0L)
graph::edgeData(gr, df[,ucols[1]], df[,ucols[2]], col) <- df[,col]
}
return(gr)
}
#' @title Map experimental data onto a graph
#' @description Functionality for mapping experimental data stored in
#' a \code{\linkS4class{SummarizedExperiment}} onto a
#' \code{\linkS4class{graph}} object.
#' @param gr an object of class \code{\linkS4class{graph}}.
#' @param se an object of class \code{\linkS4class{SummarizedExperiment}}.
#' @param col.names character. Column names of \code{se} for which assay
#' data should be mapped onto the nodes of \code{gr}. Defaults to \code{NULL}
#' which will then use all column names of \code{se}.
#' @param rowdata.cols character. Column names of \code{rowData(se)} which
#' should be mapped onto the nodes of \code{gr}. Defaults to \code{NULL}
#' which will then use all column names of \code{rowData(se)}.
#' @param prefix character. Informative prefix that should be pasted together
#' with the selected \code{col.names} and \code{rowdata.cols} to allow easy
#' identification of columns of interest when mapping from multiple experimental
#' datasets.
#' @return An object of class \code{\linkS4class{graph}}.
#' @examples
#' # (1) Obtain the latest version of the 293T PPI network ...
#' bp.293t <- getBioPlex(cell.line = "293T", version = "3.0")
#'
#' # (2) ... and turn into a graph
#' bp.gr <- bioplex2graph(bp.293t)
#'
#' # (3) Obtain the BioPlex3 proteome data ...
#' se <- getBioplexProteome()
#'
#' # (4) ... and map onto the graph
#' bp.gr <- mapSummarizedExperimentOntoGraph(bp.gr, se)
#' @export
mapSummarizedExperimentOntoGraph <- function(gr, se,
col.names = NULL,
rowdata.cols = NULL,
prefix = "")
{
isect <- intersect(rownames(se), graph::nodes(gr))
if(!length(isect)) stop("SummarizedExperiment (se)", " and ",
"graph (gr) have no node IDs in common")
if(is.null(col.names)) col.names <- colnames(se)
else if(!all(col.names %in% colnames(se)))
stop("Invalid col.names provided")
if(is.null(rowdata.cols)) rowdata.cols <- colnames(rowData(se))
else if(!all(rowdata.cols %in% colnames(rowData(se))))
stop("Invalid rowdata.cols provided")
# map assay data onto nodes
for(n in col.names)
{
gn <- paste0(prefix, n)
graph::nodeDataDefaults(gr, gn) <- NA
graph::nodeData(gr, isect, gn) <- assay(se)[isect, n]
}
# map rowData onto nodes
for(n in rowdata.cols)
{
gn <- paste0(prefix, n)
graph::nodeDataDefaults(gr, gn) <- NA
graph::nodeData(gr, isect, gn) <- rowData(se)[isect, n]
}
return(gr)
}
.remapUniprotIdsBP <- function(df)
{
stopifnot(is.data.frame(df))
uids1 <- sub("-[0-9]+$", "", df$UniprotA)
uids2 <- sub("-[0-9]+$", "", df$UniprotB)
suppressMessages({
ah <- AnnotationHub::AnnotationHub()
orgdb <- AnnotationHub::query(ah, c("orgDb", "Homo sapiens"))
orgdb <- orgdb[[1]]
})
suppressMessages({
df$GeneA <- AnnotationDbi::mapIds(orgdb,
keys = uids1,
keytype = "UNIPROT",
column = "ENTREZID")
df$GeneB <- AnnotationDbi::mapIds(orgdb,
keys = uids2,
keytype = "UNIPROT",
column = "ENTREZID")
df$SymbolA <- AnnotationDbi::mapIds(orgdb,
keys = uids1,
keytype = "UNIPROT",
column = "SYMBOL")
df$SymbolB <- AnnotationDbi::mapIds(orgdb,
keys = uids2,
keytype = "UNIPROT",
column = "SYMBOL")
})
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.