# tf_dist.R
#' Transcription Factor Distance
#'
#' \code{tf_dist} Calculate upstream transcription factor binding site distance between 2
#' genes.
#'
#' Calculate the distance/dissimilarity of upstream transcription factors of 2 genes.
#' Similarity is taken to be the Jaccard Index of the set overlap between the 2
#' transcription factors sets. Distance is then taken to be 1 - similarity.
#'
#' @param gene1 String, HGNC symbol for the first gene.
#' @param gene2 String, HGNC symbol for the second gene.
#' @param geneList A list of lists of transcription factors than have binding
#' sites upstream of genes. Generated by calling \code{\link{fetchData}("GTRDgeneTFs")}.
#' @return The similarity score of gene1 and gene2: number of shared upstream
#' transcription factors.
#'
#'
#' @author \href{https://orcid.org/0000-0001-5724-2252}{Rachel Silverstein} (aut)
#'
#' @seealso \code{\link{fetchData}} For format of geneList
#'
#' @examples
#' # Calculate the transcription factor distance of 2 related genes "BRCA1" and "BRCA2"
#' \dontrun{
#' geneList <- fetchData("GTRDgeneTFs")
#' tf_dist("BRCA1", "BRCA2", geneList)
#' }
#'
#' @export
tf_dist <- function(gene1, gene2, geneList) {
tfs1 <- geneList[[gene1]]
len1 <- length(tfs1)
tfs2 <- geneList[[gene2]]
len2 <- length(tfs2)
int <- intersect(tfs1, tfs2)
union <- union(tfs1, tfs2)
similarity <- length(int)/length(union)
if (is.na(similarity)) {
similarity <- 0
}
distance <- 1 - similarity
return(distance)
}
# [END]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.