prodyna: Protein Dynamics Analysis

#' Read dihedrals file.
#'
#' Read a file containing dihedral angles. A .dih.info file as generated by
#' \code{\link{write.dihedrals.info}} is needed to correctly identify the
#' corresponding residues.
#'
#' @param dihedrals Character, name of the .dih file containing the dihedral
#'  angles.
#' @param dihedralsInfo Character, name of the .dih.info file as generated by
#'  \code{\link{generate.dihedrals}}. If \code{NULL} the filename is assumed to
#'  be <dihedrals>.info
#' @param resnos Numeric vector, residue numbers for which dihedral angles
#'  should be read. If \code{NULL} (default) read dihedrals for all residues.
#' @return A data.frame with \eqn{\phi} and \eqn{\psi} angles.
#' @importFrom data.table fread
#' @export
read.dihedrals <- function(dihedrals, resnos=NULL, dihedralsInfo=NULL) {

  # Assume default naming
  if (is.null(dihedralsInfo)) {
    dihedralsInfo <- paste(dihedrals, ".info", sep="")
  }

  if (file.exists(dihedralsInfo)) {
    dInf <- read.dihedrals.info(dihedralsInfo)
    storedResnos <- (1:dInf$nRes)[-dInf$skippedResnos]
  } else {
    # No info file exists, assume default case: first and last dihedral missing
    n_dih <- ncol(fread(dihedrals, verbose=F, showProgress=F, nrows=1))
    storedResnos <- 2:(n_dih/2+1)
    message(paste0("No .info file found. Assuming .dih file ",
                   "contains dihedrals of all but the incomplete ",
                   "terminal residues."))
  }

  if (is.null(resnos)){
    # Read all dihedrals.
    dih <- fread(dihedrals, verbose=FALSE, showProgress=FALSE)
    resnos <- storedResnos

  } else if (!all(resnos %in% storedResnos)) {
    stop(paste("read.dihedrals: File", dihedrals,
               "does not contain all of the requested residues."))
  } else {
    # Compute the columns corresponding to the given residue numbers.
    cols <- do.call("c", lapply(resnos,
                                function(i){c(2*(which(i==storedResnos))-1,
                                              2*(which(i==storedResnos)))}))
    dih <- fread(dihedrals, select=cols, verbose=FALSE, showProgress=FALSE)
  }
  colnames(dih) <- paste(c("phi", "psi"), rep(resnos, each=2), sep="")
  return(dih)
}

#' Read dihedrals information.
#'
#' Reads a .dih.info file as generated by \code{\link{write.dihedrals.info}}.
#'
#' @param dihedralsInfo Character, name of the .dih.info file.
#' @export
#' @importFrom data.table fread
#' @return named list with elements \code{ref}, \code{traj}, \code{nRes},
#'   \code{skippedResnos}
read.dihedrals.info <- function(dihedralsInfo) {

  onError <- function(c) {stop(msg("format", dihedralsInfo))}

  tryCatch(
    info <- fread(dihedralsInfo, sep="\t", header=F, verbose=F, select=c(2)),
    error   = onError,
    warning = onError
  )

  ref  <- info[[1,1]]
  traj <- info[[2,1]]

  tryCatch(
    {
      nRes          <- as.numeric(info[[3,1]])
      skippedResnos <- as.numeric(strsplit(info[[4,1]], split=" ")[[1]])
    },
    error   = onError,
    warning = onError
  )

  return(list(ref=ref, traj=traj, nRes=nRes, skippedResnos = skippedResnos))
}

#' Write dihedrals infomation.
#'
#' Writes a .dih.info file with the following format (fields are tab separated)
#' \preformatted{
#' reference <normalized path to .pdb file>
#' trajectory <normalized path to .xtc file>r
#' nResidues <integer>
#' skippedResidues <space separated integers increasingly ordered>
#' }
#'
#' @param ref Character, name of the PDB file describing the reference structure.
#' @param traj Character, name of the XTC file describing the trajectory.
#' @param nRes Numeric, largest residue index
#' @param skipCA Numeric vector, C\eqn{_\alpha} indices to be skipped in
#'   increasing order.
#' @param fname Character, filename that should end in .dih.info \cr
#'   If \code{NULL} (default) the file is named <traj>.dih.info
#' @export
write.dihedrals.info <- function(ref, traj, nRes, skipCA, fname=NULL) {
  if (is.null(fname)) {
    fname <- paste(traj, ".dih.info", sep="")
  }
  write(paste(paste("reference",  ref, sep="\t"),
              paste("trajectory", traj, sep="\t"),
              paste("nResidues", nRes, sep="\t"),
              paste("skippedResidues", paste(skipCA, collapse=" "), sep="\t"),
              sep = "\n"),
        fname)
}

#' Read C\eqn{_\alpha} information.
#'
#' Read a .caDist.info file as generated by \code{\link{write.caDistances.info}}.
#'
#' @param caDistInfo Character, name of the .caDist.info file.
#' @export
#' @importFrom data.table fread
#' @return named list with elements \code{ref}, \code{traj}, \code{mindist},
#'   \code{maxdist}
read.caDistances.info <- function(caDistInfo) {

  onError <- function(c) {stop(msg("format", caDistInfo))}

  tryCatch(
    info <- fread(caDistInfo, sep="\t", header=F, verbose=F, select=c(2)),
    error   = onError,
    warning = onError
  )

  ref  <- info[[1,1]]
  traj <- info[[2,1]]

  tryCatch(
    {
      mindist <- as.numeric(info[[3,1]])
      maxdist <- as.numeric(info[[4,1]])
    },
    error   = onError,
    warning = onError
  )

  return(list(ref=ref, traj=traj, mindist=mindist, maxdist = maxdist))
}

#' Write C\eqn{_\alpha} infomation.
#'
#' Writes a .caDist.info file with the following format (fields are tab
#' separated)
#' \preformatted{
#' reference <normalized path to .pdb file>
#' trajectory <normalized path to .xtc file>
#' mindist <integer>
#' maxdist <integer>
#' }
#'
#' @param ref Character, name of the PDB file describing the reference structure.
#' @param traj Character, name of the XTC file describing the trajectory.
#' @param residue.mindist Numeric
#' @param residue.maxdist Numeric
#' @param fname Character, filename that should end in .caDist.info \cr
#'   If \code{NULL} (default) the file is named <traj>.caDist.info
#' @export
write.caDistances.info <- function(ref, traj, mindist, maxdist, fname=NULL) {
  if (is.null(fname)) {
    fname <- paste(traj, ".caDist.info", sep="")
  }
  write(paste(paste("reference",  ref, sep="\t"),
              paste("trajectory", traj, sep="\t"),
              paste("mindist", mindist, sep="\t"),
              paste("maxdist", maxdist, sep="\t"),
              sep = "\n"),
        fname)
}

#' Cumulative fluctuations.
#'
#' @param vals Character or numeric vector, either name of the .val/.valn file
#'  or vector of eigenvalues.
#' @return cumulative fluctuations
#' @importFrom data.table fread
#' @export
read.cumFlucts <- function(vals) {

  if (is.character(vals)) {
    vals <- fread(vals, showProgress=F, verbose=F)
    vals <- vals$V1
  }
  return(cumsum(vals/sum(vals)))
}

#' Read population files.
#'
#' Read population files with the given prefix and return them as a
#' data frame.
#'
#' @param prefix_pop Character, population files prefix.
#'  File names are assumed to be <prefix_pop>_<radius>.
#' @param radii Numeric vector, selection of radii. If \code{NULL} (default),
#'  read all available population files.
#' @return data frame where each column stores the neighbourhood population
#'  per frame for a particular neighbourhood radius.
#' @export
read.populations <- function(pop_prefix, radii=NULL) {

  dir        <- normalizePath(dirname(pop_prefix))
  pop_prefix <- basename(pop_prefix)

  if (is.null(radii)) {
    popfiles <- list.files(dir,
                           pattern=paste(pop_prefix, "*", sep="_"),
                           full.names=TRUE)
  } else {
    popfiles <- sapply(radii, function(r) {
      list.files(dir,
                 pattern=paste(pop_prefix, format(r, nsmall=6), sep="_"),
                 full.names=TRUE)
    })
  }

  do.call(
    data.frame,
    lapply(popfiles, function(fname){
      pops        <- data.frame(read.table(fname)[[1]])
      R           <- as.numeric(tail(strsplit(fname, split="_")[[1]], n=1))
      names(pops) <- paste0("R", R)

      return(pops)
  }))
}

#' Read waiting time distributions.
#'
#' Read waiting time distribution files <wtd_prefix>_<wsize>_<state> as
#' generated by \code{\link{clustering.wtDistributions}} for given states
#' and window sizes.
#'
#' @param wtd_prefix Character, prefix of the distribution files
#'  (window size and state id will be appended).
#' @param state Numeric, vector of state ids.
#' @param wsizes Numeric, vector of coring window sizes.
#' @return data frame with columns `frame`, `probability`, `state` and `wsize`.
#' @importFrom data.table fread
#' @export
read.wtDistributions <- function(wtd_prefix, states, wsizes) {

  dists <- list()
  k     <- 1

  for (j in 1:length(states)) {

    state <- states[j]

    for (i in 1:length(wsizes)) {

      w <- wsizes[i]
      dists[[k]] <- fread(paste(wtd_prefix, w, state, sep="_"), select=c(1,2))
      dists[[k]]$state <- state
      dists[[k]]$wsize <- w

      k <- k + 1
    }
  }

  wtDistribution        <- data.frame(do.call(rbind, dists))
  names(wtDistribution) <- c("frame", "probability", "state", "wsize")

  return(wtDistribution)
}