R/getPPI_String.R

Defines functions getPPI_String

Documented in getPPI_String

#' get PPI from STRING 11.0
#'
#' get PPI from STRING 11.0
#'
#' @param object an Seurat object which contains the expression matrix
#' @param species NCBI taxonomy identifier of the species to query for homologs, default is 9606(Homosapiens)
#' @param score_threshold threshold on the score of the interaction, default is 600
#' @param save whether save the result as a Rdata file, default is FALSE
#'
#' @return PPI matrix
#' @import STRINGdb data.table
#' @export
#'
#' @examples
getPPI_String <- function(object = NULL, species = 9606, score_threshold = 600, save = FALSE)
{
  linkFiles <- paste('https://stringdb-static.org/download/protein.links.v11.0/', species, '.protein.links.v11.0.txt.gz', sep="")
  if(!file.exists(sub(pattern = '.gz', replacement = '', x = basename(linkFiles))))
  {
    if(!file.exists(basename(linkFiles)))
      download.file(linkFiles, destfile = basename(linkFiles))
    gf <- gzfile(basename(linkFiles), 'rt')
  }
  PPI <- read.table(gf, header = T, sep = '')
  close(gf)

  infoFiles <- paste('https://stringdb-static.org/download/protein.info.v11.0/', species, '.protein.info.v11.0.txt.gz', sep = '')
  if(!file.exists(sub(pattern = '.gz', replacement = '', x = basename(infoFiles))))
  {
    if(!file.exists(basename(infoFiles)))
      download.file(infoFiles, destfile = basename(infoFiles))
    gf <- gzfile(basename(infoFiles), 'rt')
  }
  Pinfo <- read.table(gf, header = T, sep = '\t', colClasses = c('character','character','NULL','NULL'), quote = '', row.names = 1)
  close(gf)

  PPI <- subset(PPI, combined_score > score_threshold)
  ENSP1 <- levels(PPI[,1])
  levels(PPI[,1]) <- toupper(Pinfo[ENSP1,])
  ENSP2 <- levels(PPI[,2])
  levels(PPI[,2]) <- toupper(Pinfo[ENSP2,])

  if(!is.null(object))
  {
    gene_data <- rownames(object)
    gene_data_upper <- toupper(gene_data)
    gene_data <- as.data.frame(unique(as.data.table(data.frame(gene_data, gene_data_upper)), by = 'gene_data_upper'))
    rownames(gene_data) <- gene_data[,2]
    PPI <- PPI[which(is.element(PPI[,1], gene_data[,2])),]
    PPI <- PPI[which(is.element(PPI[,2], gene_data[,2])),]
    levels(PPI[,1]) <- gene_data[levels(PPI[,1]),1]
    levels(PPI[,2]) <- gene_data[levels(PPI[,2]),1]
  }
  nodes <- union(PPI[,1],PPI[,2])
  links <- PPI[,1:2]
  net <- graph_from_data_frame(d = links,vertices = nodes,directed = FALSE)
  net <- igraph::simplify(net)
  if(save){
    saveRDS(as_adj(net),paste(species,'_ppi_matrix_STRING-11.0.Rda',sep=""))
  }
  file.remove(paste(species,'.protein.links.v11.0.txt.gz',sep=""))
  file.remove(paste(species,'.protein.info.v11.0.txt.gz',sep=""))
  return(as_adj(net))
}
wycwycpku/RSCORE documentation built on Sept. 5, 2021, 4:25 p.m.