R/tf_dist.R

# tf_dist.R

#' Transcription Factor Distance
#'
#' \code{tf_dist} Calculate upstream transcription factor binding site distance between 2
#' genes.
#'
#' Calculate the distance/dissimilarity of upstream transcription factors of 2 genes.
#' Similarity is taken to be the Jaccard Index of the set overlap between the 2
#' transcription factors sets. Distance is then taken to be 1 - similarity.
#'
#' @param gene1 String, HGNC symbol for the first gene.
#' @param gene2 String, HGNC symbol for the second gene.
#' @param geneList A list of lists of transcription factors than have binding
#' sites upstream of genes. Generated by calling \code{\link{fetchData}("GTRDgeneTFs")}.
#' @return The similarity score of gene1 and gene2: number of shared upstream
#' transcription factors.
#'
#'
#' @author \href{https://orcid.org/0000-0001-5724-2252}{Rachel Silverstein} (aut)
#'
#' @seealso \code{\link{fetchData}} For format of geneList
#'
#' @examples
#' # Calculate the transcription factor distance of 2 related genes "BRCA1" and "BRCA2"
#' \dontrun{
#' geneList <- fetchData("GTRDgeneTFs")
#' tf_dist("BRCA1", "BRCA2", geneList)
#' }
#'
#' @export

tf_dist <- function(gene1, gene2, geneList) {
  tfs1 <- geneList[[gene1]]
  len1 <- length(tfs1)
  tfs2 <- geneList[[gene2]]
  len2 <- length(tfs2)
  int <- intersect(tfs1, tfs2)
  union <- union(tfs1, tfs2)
  similarity <- length(int)/length(union)
  if (is.na(similarity)) {
    similarity <- 0
  }
  distance <- 1 - similarity
  return(distance)
}

# [END]
hyginn/BCB420.2019.ESA documentation built on May 29, 2019, 1:23 p.m.