R/protDistanceCompute.R

Defines functions nearestProts

Documented in nearestProts

# compute distances, and find nearest genes

# first, create a distance matrix, n.genes by n.genes


#' Compute distances between a particular protein or profile and all other proteins,
#'  and list the nearest ones
#'
#' @param protName  Name of protein to which distances are to be computed
#' @param n.nearest Number of nearest proteins to list
#' @param distProts distance matrix created by, for example
#' @param protNames A list of all proteins in a dataset
#' @param profile dataframe of profiles for proteins
#' @return List of the proteins in protName closest to protName or to the profile
#' @export
#' @examples
#' data(protNSA_AT5tmtMS2)
#' distUse <- dist(protNSA_AT5tmtMS2[,seq_len(9)], method="euclidean")
#' protsUse <- names(protNSA_AT5tmtMS2)
#' nearestProts(protName="CTSD", n.nearest=10,  distProts=distUse,
#'   protNames=protsUse, profile=protNSA_AT5tmtMS2[,seq_len(9)])
nearestProts <- function(protName, n.nearest=5, distProts, protNames, profile) {
  distProtsMat <- as.matrix(distProts)

    ref <- protIndex(protName, profile)
    if (is.character(ref)) {
      return(ref)
    }
    if (nrow(ref) > 1) {
      cat("More than one protein matches protName\n")
      return(ref)
    }

    ind.ref <- ref[1,1]
    vect.dist <- distProtsMat[ind.ref,]  # vector of distances to the reference protein



  nearest.list <- sort(vect.dist)

  resultAll <- data.frame(names(nearest.list), as.numeric(nearest.list))
  names(resultAll) <- c("protName", "euclidean distance")
  result <- resultAll[seq_len(n.nearest),]
  result

}

#nearestProts("AADAC", n.nearest=10,  distProts=distUse, protNames=protsUse)

#nearestProts("AAD", n.nearest=10,  distProts=distUse, protNames=genesUse)
mooredf22/protlocassign0p1p1 documentation built on Feb. 7, 2022, 1:55 a.m.