R/expr_dist.R

# expr_dist.R

#' Expression Distance.
#'
#' \code{expr_dist} calculates the pairwise distance of gene expression profiles.
#'
#' Similarity between 2 expression profiles is calculated by taking the absolute
#' value of the Spearman correlation. Spearman correlation is used because we do
#' not care whether the correlation is linear. Absolute value ensures that both strong
#' positive and negative correlations result in high similarity. Distance is then
#' taken as 1 - similarity.
#' Distance is returned as 1 (maximum possible) if the expression profile for
#' either gene is unavailable.
#'
#' @param gene1 Character, HGNC symbol of the first gene.
#' @param gene2 Character, HGNC sysmbol of the second gene
#' @param profiles Matrix of expression profiles, generated by calling
#' \code{fetchData("GEOprofiles")}
#' @return Numeric, distance score.
#'
#'
#' @author \href{https://orcid.org/0000-0001-5724-2252}{Rachel Silverstein} (aut)
#'
#' @seealso See \code{\link{fetchData}} for the format of expression profiles used by
#' this function.
#'
#' @examples
#' \dontrun{
#' # Find the distance between the expression profiles of BRCA1 and BRCA2
#' GEO <- fetchData("GEOprofiles")
#' expr_dist("BRCA1", "BRCA2", GEO)
#' }
#'
#' @export

expr_dist <- function(gene1, gene2, profiles) {
  prof1 <- profiles[gene1,]
  prof2 <- profiles[gene2,]
  similarity <- abs(stats::cor(x = prof1,
                               y = prof2,
                               method = "spearman",
                               use = "na.or.complete"))
  if (is.na(similarity)) {
    similarity <- 0
  }
  distance <- 1 - similarity
  return(distance)
}

# [END]
hyginn/BCB420.2019.ESA documentation built on May 29, 2019, 1:23 p.m.