R/accdpx.R

Defines functions stru.part res.dpx atom.dpx dpx get.area acc.dssp mkdssp parse.dssp

Documented in acc.dssp atom.dpx dpx get.area mkdssp parse.dssp res.dpx stru.part

## --------------- accdpx.R ----------------- ##
#                                              #
#       parse.dssp                             #
#       mkdssp                                 #
#       acc.dssp                               #
#       get.area                               #
#       dpx                                    #
#       atom.dpx                               #
#       res.dpx                                #
#       stru.part                              #
#                                              #
## ------------------------------------------ ##


## ---------------------------------------------------------------- ##
#      parse.dssp <- function(file, keepfiles = FALSE)               #
## ---------------------------------------------------------------- ##
#' Parse a DSSP File to Return a Dataframe
#' @description Parses a DSSP file to return a dataframe.
#' @usage parse.dssp(file, keepfiles = FALSE)
#' @param file input dssp file.
#' @param keepfiles logical, if TRUE the dataframe will be saved in the working directory and we will keep the dssp file.
#' @details If the argument 'keepfiles' is not set to TRUE, the dssp file used to get the parsed dataframe will be removed.
#' @return Returns a dataframe providing data for 'acc', 'ss', 'phi' and 'psi' for each residues from the structure.
#' @author Juan Carlos Aledo
#' @examples \dontrun{compute.dssp('3cwm'); parse.dssp('3cwm.dssp')}
#' @references Touw et al (2015) Nucl. Ac. Res. 43(Database issue): D364-D368 (PMID: 25352545).
#' @seealso download.dssp(), compute.dssp(), mkdssp() and acc.dssp()
#' @export

parse.dssp <- function(file, keepfiles = FALSE){
  ## --------------- Reading the dssp file ------------------ ##
  con <- tryCatch(
    {
      file(file, 'r')
    },
    error = function(cond){
      return(NULL)
    },
    warning = function(w) conditionMessage(w)
  )
  if (is.null(con) | grepl("no fue posible", con) | grepl("No such file", con)){
    message("Sorry, no connection could be established")
    return(NULL)
  }

  counter <- 0
  resnum <- c()
  respdb <- c()
  chain <- c()
  aa <- c()
  ss <- c()
  sasa <- c()
  phi <- c()
  psi <- c()

  while(TRUE){
    line <- readLines(con, n = 1)
    counter <- counter + 1

    if (counter == 1){
      l <- strsplit(line, split = "")[[1]]
      l <- paste(l, collapse = "")
      if ("have bz2" %in% l){
        first_valid_line <- 29 # dssp file coming from the API
      } else {
        first_valid_line <- 28 # dssp file coming from the sync
      }
    }

    if (counter > first_valid_line & length(line) != 0){
      a <- strsplit(line, split = "")[[1]]
      resnum <- c(resnum, paste(a[1:5], collapse = ""))
      respdb <- c(respdb, paste(a[6:10], collapse = ""))
      chain <- c(chain, paste(a[11:12], collapse = ""))
      aa <- c(aa, paste(a[13:14], collapse = ""))
      ss <- c(ss, paste(a[15:17], collapse = ""))
      sasa <- c(sasa, paste(a[36:38], collapse = ""))
      phi <- c(phi, paste(a[104:109], collapse = ""))
      psi <- c(psi, paste(a[110:115], collapse = ""))
    }
    if (length(line) == 0){
      break
    }
  }
  close(con)

  ## ------ Setting the variable types ------------- ##
  resnum <- as.numeric(resnum)
  respdb <- as.numeric(respdb)
  chain <- gsub(" ", "", chain)
  aa <- gsub(" ", "", aa)
  ss <- gsub("   ", "C", ss)
  ss <- gsub(" ", "", ss)

  ## -------- Building the dataframe ---------------- ##
  df <- as.data.frame(matrix(c(resnum, respdb, chain, aa,
                               ss, sasa, phi, psi), ncol = 8),
                      stringsAsFactors = FALSE)

  colnames(df) <- c('resnum', 'respdb', 'chain', 'aa', 'ss',
                    'sasa', 'phi', 'psi')

  df$resnum <- as.numeric(df$resnum)
  df$respdb <- as.numeric(df$respdb)
  df$sasa <- as.numeric(df$sasa)
  df$phi <- as.numeric(df$phi)
  df$psi <- as.numeric(df$psi)

  if (keepfiles == TRUE){
    save(df, file = paste(file, ".Rda", sep = ""))
  } else {
    file.remove(file)
  }

  ## --------------- Remove empty lines between chains ------------- ##
  badlines <- c()
  for (i in 1:nrow(df)){
    if (df$aa[i] == '!' | df$aa[i] == 'X'){
      badlines <- c(badlines, i)
    }
  }
  if (length(badlines) != 0){
    df <- df[-badlines,]
    df$resnum <- 1:nrow(df)
  }
  return(df)
}


## ---------------------------------------------------------------- ##
#         mkdssp <- function(pdb, method, exefile = "dssp")          #
## ---------------------------------------------------------------- ##
#' Compute DSSP File Using an In-House Version of the DSSP Software
#' @description Computes the DSSP file using an in-house version of the DSSP software.
#' @usage mkdssp(pdb, method = 'ptm', exefile = "dssp")
#' @param pdb is either a 4-character identifier of the PDB structure, or the path to a pdb file.
#' @param method a character string specifying the desired method to get the dssp dataframe; it should be one of 'ptm' or 'bio3d'.
#' @param exefile  file path to the DSSP executable on your system (i.e. how is DSSP invoked).
#' @details The structure of the output data depends on the method chosen, but it will always contain the DSSP-related data.
#' @return Returns either a dataframe containing the information extracted from the dssp file (method ptm), or a list with that information (method bio3d).
#' @author Juan Carlos Aledo
#' @examples \dontrun{mkdssp('3cwm', method = 'ptm')}
#' @references Touw et al (2015) Nucl. Ac. Res. 43(Database issue): D364-D368 (PMID: 25352545).
#' @seealso download.dssp(), parse.dssp(), compute.dssp() and acc.dssp()
#' @importFrom bio3d dssp
#' @importFrom bio3d read.pdb
#' @importFrom bio3d get.pdb
#' @export

mkdssp <- function(pdb, method = 'ptm', exefile = "dssp"){

  ## --- pdb id or path to its file
  del <- FALSE
  if (nchar(pdb) == 4){ # when input is a PDB ID
    suppressWarnings(bio3d::get.pdb(pdb)) # avoids warning: 'pdb exists. Skipping download'
    file <- paste("./", pdb, ".pdb", sep = "")
    del <- TRUE
  } else {
    file <- pdb
  }

  ## ---- Path to DSSP executable
  exefile <- .get.exepath(exefile)
  success <- .test.exefile(exefile)

  if (!success){
    message(paste("Launching external program 'dssp' (or 'mkdssp') failed\n",
               "  make sure '", exefile, "' is in your search path", sep = ""))
    return(NULL)
  }

  ## ------ Method to compute DSSP
  if(method == 'ptm'){
    mydssp <- tryCatch(
      {
        system(paste(exefile, " -i ", file, " -o ./temp.dssp", sep = ""))
      },
      error = function(cond){
        message(cond)
        return(NULL)
      }
    )
    if (is.null(mydssp) | mydssp == 1){
      return(NULL)
    }

    mydssp <- parse.dssp('./temp.dssp')

  } else if (method == 'bio3d'){

    mydssp <- tryCatch(
      {
        suppressWarnings(bio3d::dssp(read.pdb(file) , exefile = exefile))
      },
      error = function(cond){
        return(NULL)
      }
    )
    if (is.null(mydssp)){
      return(NULL)
    }
  }

  if (del){ # if a pdb file was downloaded now is deleted
    file.remove(file)
  }
  return(mydssp)
}


## ---------------------------------------------------------------- ##
#         acc.dssp <- function(pdb, aa = 'all')                      #
## ---------------------------------------------------------------- ##
#' Compute Residue Accessibility and SASA
#' @description Computes the accessibility as well as the SASA for each reside from the indicated protein.
#' @usage acc.dssp(pdb, aa = 'all')
#' @param pdb is either a PDB id, or the path to a pdb file.
#' @param aa one letter code for the amino acid of interest, or 'all' for all the protein residues.
#' @details You must have installed DSSP on your system and in the search path for executables.
#' @return A dataframe where each row is an individual residue of the selected protein. The variables computed, among others, are:
#' (i) the secondary structure (ss) element to which the residue belongs,
#' (ii) the solvent accessible surface area (sasa) of each residue in square angstrom (Ų), and
#' (iii) the accessibility (acc) computed as the percent of the sasa that the residue X would have in the tripeptide GXG with the polypeptide skeleton in an extended conformation and the side chain in the conformation most frequently observed in proteins.
#' @author Juan Carlos Aledo
#' @examples \dontrun{acc.dssp('3cwm')}
#' @references Miller et al (1987) J. Mol. Biol. 196: 641-656 (PMID: 3681970).
#' @references Touw et al (2015) Nucl. Ac. Res. 43(Database issue): D364-D368 (PMID: 25352545).
#' @seealso atom.dpx(), res.dpx(), str.part()
#' @importFrom bio3d get.pdb
#' @importFrom bio3d pdbsplit
#' @importFrom bio3d read.pdb
#' @export

acc.dssp <- function(pdb, aa = 'all'){

  ## --------------- Download and split the PDB -------------- ##
  del <- FALSE
  if (nchar(pdb) == 4){
    del <- TRUE
    suppressWarnings(bio3d::get.pdb(pdb)) # avoids warning: 'pdb exists. Skipping download'
    file <- paste("./", pdb, ".pdb", sep = "")
    id <- pdb
  } else {
    file <- pdb
    t <- strsplit(file, split = "/")[[1]]
    id <- strsplit(t[length(t)], split = "\\.")[[1]][1]
  }
  chains <- pdb.chain(file, keepfiles = TRUE)
  if (is.null(chains)){
    return(NULL)
  }

  # df <- mkdssp(id, method = 'ptm')     #---------------- sustituido
  df <- mkdssp(file, method = 'ptm')

  if (is.null(df)){
    message()
    return(NULL)
  }

  ## -------- Starting the dataframe construction ------------ ##
  df <- df[,1:6] # keep only relevant variables
  colnames(df)[6] <- 'sasa_complex'
  df$sasa_chain <- NA
  df$delta_sasa <- NA
  df$acc_complex <- NA
  df$acc_chain <- NA
  df$delta_acc <- NA

  ## --------- Get dssp files for individual chains  --------- ##
  chains <- unique(df$chain) # Sometimes we have to remove non-protein chains
  chain_files <- paste('./split_chain/', id, '_', chains,  '.pdb', sep = "")
  dssp_files <- paste('./split_chain/', id, '_', chains, '.dssp', sep = "")

  df$sasa_chain <- unlist(lapply(chain_files, function(x) mkdssp(x, method = "ptm")$sasa))

  df$delta_sasa <- df$sasa_chain - df$sasa_complex

  ## ---------------- Compute accessibility ------------------ ##
  df$acc_complex[which(df$aa == "A")] <-
    round(df$sasa_complex[which(df$aa == "A")]/113, 3)
  df$acc_chain[which(df$aa == "A")] <-
    round(df$sasa_chain[which(df$aa == "A")]/113, 3)

  df$acc_complex[which(df$aa == "R")] <-
    round(df$sasa_complex[which(df$aa == "R")]/241, 3)
  df$acc_chain[which(df$aa == "R")] <-
    round(df$sasa_chain[which(df$aa == "R")]/241, 3)

  df$acc_complex[which(df$aa == "N")] <-
    round(df$sasa_complex[which(df$aa == "N")]/158, 3)
  df$acc_chain[which(df$aa == "N")] <-
    round(df$sasa_chain[which(df$aa == "N")]/158, 3)

  df$acc_complex[which(df$aa == "D")] <-
    round(df$sasa_complex[which(df$aa == "D")]/151, 3)
  df$acc_chain[which(df$aa == "D")] <-
    round(df$sasa_chain[which(df$aa == "D")]/151, 3)

  df$acc_complex[which(df$aa == "C")] <-
    round(df$sasa_complex[which(df$aa == "C")]/140, 3)
  df$acc_chain[which(df$aa == "C")] <-
    round(df$sasa_chain[which(df$aa == "C")]/140, 3)
  # Because dssp convention, lower case letters (a, b, ...)
  # are introduced for bridged cysteines:
  df$acc_complex[which(df$aa %in% letters)] <-
    round(df$sasa_complex[which(df$aa %in% letters)]/140, 3)
  df$acc_chain[which(df$aa %in% letters)] <-
    round(df$sasa_chain[which(df$aa %in% letters)]/140, 3)

  df$acc_complex[which(df$aa == "Q")] <-
    round(df$sasa_complex[which(df$aa == "Q")]/189, 3)
  df$acc_chain[which(df$aa == "Q")] <-
    round(df$sasa_chain[which(df$aa == "Q")]/189, 3)

  df$acc_complex[which(df$aa == "E")] <-
    round(df$sasa_complex[which(df$aa == "E")]/183, 3)
  df$acc_chain[which(df$aa == "E")] <-
    round(df$sasa_chain[which(df$aa == "E")]/183, 3)

  df$acc_complex[which(df$aa == "G")] <-
    round(df$sasa_complex[which(df$aa == "G")]/85, 3)
  df$acc_chain[which(df$aa == "G")] <-
    round(df$sasa_chain[which(df$aa == "G")]/85, 3)

  df$acc_complex[which(df$aa == "H")] <-
    round(df$sasa_complex[which(df$aa == "H")]/194, 3)
  df$acc_chain[which(df$aa == "H")] <-
    round(df$sasa_chain[which(df$aa == "H")]/194, 3)

  df$acc_complex[which(df$aa == "I")] <-
    round(df$sasa_complex[which(df$aa == "I")]/182, 3)
  df$acc_chain[which(df$aa == "I")] <-
    round(df$sasa_chain[which(df$aa == "I")]/182, 3)

  df$acc_complex[which(df$aa == "L")] <-
    round(df$sasa_complex[which(df$aa == "L")]/180, 3)
  df$acc_chain[which(df$aa == "L")] <-
    round(df$sasa_chain[which(df$aa == "L")]/180, 3)

  df$acc_complex[which(df$aa == "K")] <-
    round(df$sasa_complex[which(df$aa == "K")]/211, 3)
  df$acc_chain[which(df$aa == "K")] <-
    round(df$sasa_chain[which(df$aa == "K")]/211, 3)

  df$acc_complex[which(df$aa == "M")] <-
    round(df$sasa_complex[which(df$aa == "M")]/204, 3)
  df$acc_chain[which(df$aa == "M")] <-
    round(df$sasa_chain[which(df$aa == "M")]/204, 3)

  df$acc_complex[which(df$aa == "F")] <-
    round(df$sasa_complex[which(df$aa == "F")]/218, 3)
  df$acc_chain[which(df$aa == "F")] <-
    round(df$sasa_chain[which(df$aa == "F")]/218, 3)

  df$acc_complex[which(df$aa == "P")] <-
    round(df$sasa_complex[which(df$aa == "P")]/143, 3)
  df$acc_chain[which(df$aa == "P")] <-
    round(df$sasa_chain[which(df$aa == "P")]/143, 3)

  df$acc_complex[which(df$aa == "S")] <-
    round(df$sasa_complex[which(df$aa == "S")]/122, 3)
  df$acc_chain[which(df$aa == "S")] <-
    round(df$sasa_chain[which(df$aa == "S")]/122, 3)

  df$acc_complex[which(df$aa == "T")] <-
    round(df$sasa_complex[which(df$aa == "T")]/146, 3)
  df$acc_chain[which(df$aa == "T")] <-
    round(df$sasa_chain[which(df$aa == "T")]/146, 3)

  df$acc_complex[which(df$aa == "W")] <-
    round(df$sasa_complex[which(df$aa == "W")]/259, 3)
  df$acc_chain[which(df$aa == "W")] <-
    round(df$sasa_chain[which(df$aa == "W")]/259, 3)

  df$acc_complex[which(df$aa == "Y")] <-
    round(df$sasa_complex[which(df$aa == "Y")]/229, 3)
  df$acc_chain[which(df$aa == "Y")] <-
    round(df$sasa_chain[which(df$aa == "Y")]/229, 3)

  df$acc_complex[which(df$aa == "V")] <-
    round(df$sasa_complex[which(df$aa == "V")]/160, 3)
  df$acc_chain[which(df$aa == "V")] <-
    round(df$sasa_chain[which(df$aa == "V")]/160, 3)

  df$delta_acc <- df$acc_chain - df$acc_complex

  ## ---------------- Cleaning and Tidying ------------------- ##
  unlink("./temp_split", recursive = TRUE)
  # aai <- as.character(aai$aa) # ------------------------------------------ Sustituido:
  aai <- c('A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V')
  if (del & file.exists(file)){
    file.remove(file)
  }

  ## ------------------ Returning Results -------------------- ##
  if (aa != 'all' & aa %in% aai){
      df <- df[which(df$aa == aa),]
    return(df)
  } else if (aa == 'all'){
    return(df)
  } else {
    message("A proper amino acid must be selected!")
    return(NULL)
  }
}

## ---------------------------------------------------------------- ##
#         get.area <- function(pdb, keepfiles = FALSE)               #
## ---------------------------------------------------------------- ##
#' Atomic Solvation Energies.
#' @description Computes online surface energies using the Getarea server.
#' @usage get.area(pdb, keepfiles = FALSE)
#' @param pdb is either a PDB id, or the path to a pdb file.
#' @param keepfiles logical, if TRUE the dataframe will be saved in the working directory and we will keep the getarea txt file.
#' @details If the option keepfiles is set as TRUE, then txt and Rda files are saved in the working directory.
#' @return This function returns a dataframe containing the requested information.
#' @author Juan Carlos Aledo
#' @examples \dontrun{get.area('3cwm')}
#' @references Fraczkiewicz, R. and Braun, W. (1998) J. Comp. Chem., 19, 319-333.
#' @seealso compute.dssp(), atom.dpx(), res.dpx(), acc.dssp(), str.part()
#' @importFrom bio3d aa321
#' @importFrom RCurl fileUpload
#' @importFrom RCurl postForm
#' @export

get.area <- function(pdb, keepfiles = FALSE){

  del <- FALSE
  if (nchar(pdb) == 4){
    del <- TRUE
    suppressWarnings(get.pdb(pdb)) # avoids warning: 'pdb exists. Skipping download'
    file <- paste("./", pdb, ".pdb", sep = "")
    id <- pdb
  } else {
    file <- pdb
    t <- strsplit(file, split = "/")[[1]]
    id <- strsplit(t[length(t)], split = "\\.")[[1]][1]
  }

  output_file = gsub("\\.pdb", "_getarea.txt", file)
  url <- "http://curie.utmb.edu/cgi-bin/getarea.cgi"

  result <- tryCatch(
    {
      RCurl::postForm(url,
                      "water" = "1.4",
                      "gradient" = "n",
                      "name" = "test",
                      "email" = 'metosite2018@gmail.com',
                      "Method" = "4",
                      "PDBfile" = RCurl::fileUpload(file))
    },
    error = function(cond){
      return(NULL)
    }
  )
  if (is.null(result)){
    message("Sorry, getare failed")
    return(NULL)
  }

  writeLines(gsub("[</pre></td>|<td><pre>]","", result), con = output_file)
  con <- file(output_file, 'r')
  counter <- 0
  eleno <- c()
  elety <- c()
  alt <- c()
  resid <- c()
  resno <- c()
  areaenergy <- c()

  while(TRUE){
    counter <- counter + 1
    line <- readLines(con, n = 1)

    if (counter > 17 & length(line) != 0){
      a <- strsplit(line, split = "")[[1]]
      eleno <- c(eleno, paste(a[1:6], collapse = ""))
      elety <- c(elety, paste(a[7:10], collapse = ""))
      alt <- c(alt, paste(a[11:12], collapse = ""))
      resid <- c(resid, paste(a[13:15], collapse = ""))
      resno <- c(resno, paste(a[16:23], collapse = ""))
      areaenergy <- c(areaenergy, paste(a[24:30], collapse = ""))
    } else if (length(line) == 0) {
      break
    }
  }
  close(con)

  ## ------ Setting the variable types ------------- ##
  eleno <- eleno[1:(length(eleno)-11)]
  elety <- gsub(" ", "", elety[1:(length(elety)-11)])
  alt <- alt[1:(length(alt)-11)]
  resid <- aa321(resid[1:(length(resid)-11)])
  resno <- gsub(" ", "", resno[1:(length(resno)-11)])
  areaenergy <- gsub(" ", "", areaenergy[1:(length(areaenergy)-11)])

  ## -------- Building the dataframe ---------------- ##
  df <- as.data.frame(matrix(c(alt, eleno, elety, resid, resno, areaenergy),
                             ncol = 6), stringsAsFactors = FALSE)

  colnames(df) <- c('alt','eleno', 'elety', 'resid', 'resno', 'areaenergy')

  df$alt <- gsub(" ", "", as.character(df$alt))
  df$eleno <- as.numeric(df$eleno)
  df$resno <- as.numeric(df$resno)
  df$areaenergy <- as.numeric(df$areaenergy)

  ## --- When PDB has ALT records, taking A only --- ##
  df <- df[which(df$alt != "B"),]
  df <- df[,-1]

  if (keepfiles == TRUE){
    rda_file <- gsub(".txt", ".Rda", output_file)
    save(df, file = rda_file)
  } else {
    file.remove(output_file)
    if (del){
      file.remove(file)
    }
  }
  return(df)
}


## ---------------------------------------------------------------- ##
#                   dpx <- function(pdb)                             #
## ---------------------------------------------------------------- ##
#' Atom Depth Analysis
#' @description Computes the depth from the surface for each protein's atom.
#' @usage dpx(pdb)
#' @param pdb is either a PDB id, or the path to a pdb file.
#' @details This function computes the depth, defined as the distance in angstroms between the target atom and the closest atom on the protein surface.
#' @return A dataframe with the computed depths.
#' @author Juan Carlos Aledo
#' @examples \dontrun{dpx('3cwm')}
#' @references Pintar et al. 2003. Bioinformatics 19:313-314 (PMID: 12538266)
#' @seealso compute.dssp(), atom.dpx(), res.dpx(), acc.dssp(), str.part()
#' @importFrom bio3d read.pdb
#' @export

dpx <- function(pdb){

  ## --------------- Generate a dataframe ---------------- ##
  atom <- tryCatch(
    {
      get.area(pdb)
    },
    error = function(cond){
      return(NULL)
    }
  )
  if (is.null(atom)){
    message("Sorry, get.area failed")
    return(NULL)
  }

  mypdb <- tryCatch(
   {
     suppressWarnings(read.pdb(pdb))
   },
   error = function(cond){
     return(NULL)
   }
  )

  if (is.null(mypdb)){
   return(NULL)
  }

  mypdb <- mypdb$atom[which(mypdb$atom$type == 'ATOM'),]
  atom$x <- mypdb$x
  atom$y <- mypdb$y
  atom$z <- mypdb$z
  atom$dpx <- NA
  atom$dpx[which(atom$areaenergy != 0)] = 0

  exposed <- atom[which(atom$areaenergy != 0), c(6:8)]
  exposed <- as.matrix(exposed) # xyz coordinates of exposed atoms
  buried <- as.matrix(atom[is.na(atom$dpx), 6:8]) # xyz coordinates of buried atoms
  dpx <- round(pairwise.dist(buried, exposed, FALSE),2)
  dpx_buried <- apply(dpx, 1, min) # distance to the closest exposed atom
  atom$dpx[is.na(atom$dpx)] <- dpx_buried

  return(atom)
}


## ---------------------------------------------------------------- ##
#                atom.dpx <- function(pdb)                           #
## ---------------------------------------------------------------- ##
#' Atom Depth Analysis
#' @description Computes the depth from the surface for each protein's atom.
#' @usage atom.dpx(pdb)
#' @param pdb is either a PDB id, or the path to a pdb file.
#' @details This function computes the depth, defined as the distance in angstroms between the target atom and the closest atom on the protein surface. When the protein is composed of several subunits, the calculations are made for both, the atom being part of the complex, and the atom being only part of the polypeptide chain to which it belongs.
#' @return A dataframe with the computed depths.
#' @author Juan Carlos Aledo
#' @examples \dontrun{atom.dpx('1cll')}
#' @references Pintar et al. 2003. Bioinformatics 19:313-314 (PMID: 12538266)
#' @seealso res.dpx(), acc.dssp(), str.part()
#' @importFrom bio3d read.pdb
#' @export

atom.dpx <- function(pdb){
  del <- FALSE
  if (nchar(pdb) == 4){
    del <- TRUE
    id <- pdb
  } else {
    t <- strsplit(pdb, split = "/")[[1]]
    id <- strsplit(t[length(t)], split = "\\.")[[1]][1]
  }

  ## ------------ For the whole structure ---------------- ##
  atom <- dpx(pdb)
  if (is.null(atom)){
    message("Sorry, dpx failed")
    return(NULL)
  }
  energies <- atom$areaenergy
  dpx <- atom$dpx
  atom <- atom[,1:4]
  atom$chain <- NA
  atom$areaenergy <- energies
  atom$dpx_complex <- dpx
  atom$dpx_chain <- NA
  atom$delta_dpx <- NA

  ## ----------- For multi-chains structure -------------- ##
  chains <- suppressWarnings(pdb.chain(pdb, keepfiles = TRUE))
  if (is.null(chains)){
    message("Sorry, pdb.chain failed")
    return(NULL)
  }
  counter_a <- 0
  for (i in seq_len(length(chains))){
    t <- paste('./split_chain/', id, '_', chains[i], '.pdb', sep = "")
    atom_chain <- dpx(t)
    counter_b <- counter_a + nrow(atom_chain)
    atom$chain[(counter_a + 1) : counter_b] <- chains[i]
    atom$dpx_chain[(counter_a + 1) : counter_b] <- atom_chain$dpx
    counter_a <- counter_b
  }
  atom$delta_dpx <- atom$dpx_complex - atom$dpx_chain

  ## ---------------- Cleaning and Tidying ------------------- ##
  unlink("./split_chain", recursive = TRUE)
  if (del){
    file.remove(paste("./", id, ".pdb", sep = ""))
  }

  return(atom)
}


## ---------------------------------------------------------------- ##
#             res.dpx <- function(pdb, aa = 'all')                   #
## ---------------------------------------------------------------- ##
#' Residue Depth Analysis
#' @description Computes the depth from the surface for each protein's residue.
#' @usage res.dpx(pdb, aa = 'all')
#' @param pdb is either a PDB id, or the path to a pdb file.
#' @param aa one letter code for the amino acid of interest, or 'all' for all the protein residues.
#' @details This function computes the depth, defined as the distance in angstroms between the residue and the closest atom on the protein surface.
#' @return A dataframe with the computed depths.
#' @author Juan Carlos Aledo
#' @examples \dontrun{res.dpx('1cll')}
#' @references Pintar et al. 2003. Bioinformatics 19:313-314 (PMID: 12538266)
#' @seealso atom.dpx(), acc.dssp(), str.part()
#' @export

res.dpx <- function(pdb, aa = 'all'){

  atom <- atom.dpx(pdb)
  if (is.null(atom)){
    message("Sorry, atom.dpx failed")
    return(NULL)
  }
  atom$res <- paste(atom$resid, atom$resno, atom$chain, sep ="-")
  res <- unique(atom$res)

  ## ------------------ Dataframe structure -------------------- ##
  df <- as.data.frame(matrix(rep(NA, length(res) * 10), ncol = 10))
  colnames(df) <- c('res','resno', 'resid', 'chain', 'min_dpx_complex',
                    'avg_dpx_complex', 'max_dpx_complex',
                    'min_dpx_chain', 'avg_dpx_chain', 'max_dpx_chain')
  df$res <- res
  for (i in 1:nrow(df)){
    t <- strsplit(res[i], split = '-')[[1]]
    df$resno[i] <- as.numeric(t[2])
    df$resid[i] <- t[1]
    df$chain[i] <- t[3]
  }

  ## -------------------- Depth computation -------------------- ##
  df$min_dpx_complex <- df$min_dpx_chain <- NA
  df$max_dpx_complex <- df$max_dpx_chain <- NA
  df$avg_dpx_complex <- df$avg_dpx_chain <- NA

  for (i in 1:length(res)){
    temp <- atom[which(atom$res == res[i]),]

    df$min_dpx_chain[i] <- min(temp$dpx_chain)
    df$avg_dpx_chain[i] <- round(mean(temp$dpx_chain), 2)
    df$max_dpx_chain[i] <- max(temp$dpx_chain)

    df$min_dpx_complex[i] <- min(temp$dpx_complex)
    df$avg_dpx_complex[i] <- round(mean(temp$dpx_complex), 2)
    df$max_dpx_complex[i] <- max(temp$dpx_complex)
  }

  ## ------------------ Returning Results -------------------- ##
  if (aa != 'all' & aa %in% aai$aa){
    df <- df[which(df$resid == aa),]
    return(df)
  } else if (aa == 'all'){
    return(df)
  } else {
    message("A proper amino acid must be provided!")
    return(NULL)
  }
}

## ---------------------------------------------------------------- ##
#           stru.part <- function(pdb, cutoff = 0.25)                #
## ---------------------------------------------------------------- ##
#' Partition of Structural Regions
#' @description Carries out a partition of the structural regions of a given protein.
#' @usage stru.part(pdb, cutoff = 0.25)
#' @param pdb is either a PDB id, or the path to a pdb file
#' @param cutoff accessibility below which a residue is considered to be buried.
#' @details The accessibilities of a residue computed in the complex (ACCc) and in the monomer (ACCm) allow to distinguish four structural regions as follows.
#'
#' Interior: ACCc < cutoff & (ACCm - ACCc) = 0.
#'
#' Surface: ACCc > cutoff &  (ACCm - ACCc) = 0.
#'
#' Support: ACCm < cutoff & (ACCm - ACCc) > 0.
#'
#' Rim: ACCc > cutoff & (ACCm - ACCc) > 0.
#'
#' Core: ACCm > cutoff & ACCc < cutoff.
#' @return A dataframe where each residue is assigned to one of the four structural groups considered.
#' @author Juan Carlos Aledo
#' @examples \dontrun{stru.part('1u8f')}
#' @references Levy (2010) J. Mol. Biol. 403: 660-670 (PMID: 20868694).
#' @seealso  atom.dpx(), res.dpx(), acc.dssp()
#' @export

stru.part <- function(pdb, cutoff = 0.25){

  ## --- Getting Accessibilities
  t <- acc.dssp(pdb, aa = 'all')
  if (is.null(t)){
    message("Sorry, acc.dssp failed")
    return(NULL)
  }
  output <- t[, c(1:5, 9:11)]
  names(output) <- c(names(t)[1:5], "ACCc", "ACCm", "DACC")
  output$str <- NA

  ## --- Sorting Structural Regions
  for (i in 1:nrow(output)){
    if (output$DACC[i] == 0 & output$ACCc[i] < cutoff){
      output$str[i] <- "interior"
    } else if (output$DACC[i] == 0 & output$ACCc[i] > cutoff){
      output$str[i] <- 'surface'
    } else if (output$DACC[i] > 0 & output$ACCm[i] < cutoff){
      output$str[i] <- 'support'
    } else if (output$DACC[i] > 0 & output$ACCc[i] > cutoff){
      output$str[i] <- 'rim'
    } else {
      output$str[i] <- 'core'
    }
  }
  return(output)
}

Try the ptm package in your browser

Any scripts or data that you put into this service are public.

ptm documentation built on Aug. 7, 2022, 5:05 p.m.