R/GetExpression.R

Defines functions GetExpression

Documented in GetExpression

#' Connect and parse UniProt Expression information.
#'
#' The function is work to retrieve Expression data from UniProt for a
#' list of proteins accessions.For more information about what included
#' in the Expression data see https://www.uniprot.org/help/return_fields.
#'
#' @usage GetExpression(ProteinAccList , directorypath = NULL)
#'
#' @param ProteinAccList Vector of UniProt Accession/s
#'
#' @param directorypath path to save excel file containig results returened by the function.
#'
#'
#' @return DataFrame where rows names are the accession
#'      and columns contains the information retrieved from the UniProt
#'
#' @note The function also, Creates a csv file with the retrieved information.
#'
#'
#' @export
#'
#' @author Mohmed Soudy \email{Mohamed.soudy@57357.com} and Ali Mostafa \email{ali.mo.anwar@std.agr.cu.edu.eg}

GetExpression <- function(ProteinAccList , directorypath = NULL){
  
  if(!has_internet())
  {
    message("Please connect to the internet as the package requires internect connection.")
    return()
  }
  message("Please wait we are processing your accessions ...")
  pb <- progress::progress_bar$new(total = length(ProteinAccList))
    # Expression information to be collected
    columns <- c("cc_developmental_stage,cc_induction,cc_tissue_specificity")
    baseUrl <- "https://rest.uniprot.org/uniprotkb/search?query=accession:"
    ProteinInfoParsed_total = data.frame()
    for (ProteinAcc in ProteinAccList)
    {
      #to see if Request == 200 or not
      Request <- tryCatch(
        {
          GET(paste0(baseUrl , ProteinAcc,"&format=tsv"))
        },error = function(cond)
        {
          message("Internet connection problem occurs and the function will return the original error")
          message(cond)
        }
      )  

      #this link return information in tab formate (format = tab)
      #columns = what to return from all of the information (see: https://www.uniprot.org/help/uniprotkb_column_names)
      ProteinName_url <- paste0(ProteinAcc,"&format=tsv&fields=",columns)

      RequestUrl <- paste0(baseUrl , ProteinName_url)
      RequestUrl <- URLencode(RequestUrl)
      if (length(Request) == 0)
      {
        message("Internet connection problem occurs")
        return()
      }
      if (Request$status_code == 200){
        # parse the information in DataFrame
        ProteinDataTable <- tryCatch(read.csv(RequestUrl, header = TRUE, sep = '\t'), error=function(e) NULL)
        if (!is.null(ProteinDataTable))
        {
          ProteinDataTable <- ProteinDataTable[1,]
          ProteinInfoParsed <- as.data.frame(ProteinDataTable,row.names = ProteinAcc)
          # add Dataframes together if more than one accession
          ProteinInfoParsed_total <- rbind(ProteinInfoParsed_total, ProteinInfoParsed)
        }
    }else {
      HandleBadRequests(Request$status_code)
    }
      pb$tick()
      
    }
    if(!is.null(directorypath))
    {
      write.csv(ProteinInfoParsed_total , paste0(directorypath , "/" , "Expression Information.csv"))
    }
    return(ProteinInfoParsed_total)
}

Try the UniprotR package in your browser

Any scripts or data that you put into this service are public.

UniprotR documentation built on Aug. 27, 2023, 5:06 p.m.