# make_distance_matrices_list.R
# Purpose: This is a helper function for clusterSystems and is unlikely to be useful elsewhere.
#
# Parameters:
# distances Character vector indicating the distance metrics to be used.
# all_genes Character vector of HGNC symbols with which to make distance matrices
#
# Value: A list of objects of class distance.
# Author: Rachel Silverstein
make_distance_matrices_list <- function (distances, all_genes) {
# initialize a list to hold the distance matrices
distanceMatrices <- list()
for (i in seq_along(distances)) {
if (distances[i] == "expression_profile") {
GEO <- fetchData("GEOprofiles")
expr_dist_matrix <- make_matrix(all_genes, dist_fn = expr_dist, data_source = GEO)
distanceMatrices <- append(distanceMatrices, list(expr_dist_matrix))
} else if (distances[i] == "transcription_factor") {
GTRD <- fetchData("GTRDgeneTFs")
tf_dist_matrix <- make_matrix(all_genes, dist_fn = tf_dist, data_source = GTRD)
distanceMatrices <- append(distanceMatrices, list(tf_dist_matrix))
} else if ((distances[i] == "network_jaccard") | (distances[i] == "network_distance")) {
STRING <- fetchData("STRINGedges0.8")
# convert the STRINGedges object into an igraph object
STRINGgraph <- igraph::graph_from_edgelist(as.matrix(STRING[,1:2]))
# The string graph does not contain any unconnected vertices, only interactions,
# so any genes that have no annotated interactions will be missing.
# Add these missing genes as unconnected nodes in the graph.
string_genes <- unique(c(STRING$a, STRING$b))
missing_genes <- setdiff(all_genes, string_genes)
STRINGgraph <- igraph::add.vertices(STRINGgraph, nv = length(missing_genes), name = missing_genes)
if (distances[i] == "network_jaccard") {
string_dist_matrix <- make_matrix(all_genes, dist_fn = jaccard_dist, data_source = STRINGgraph)
distanceMatrices <- append(distanceMatrices, list(string_dist_matrix))
} else if (distances[i] == "network_distance") {
string_matrix <- igraph::distances(STRINGgraph, v = all_genes, to = all_genes)
# remove all the infinite values generated by unconnected nodes
# get a number one larger than the largest finite distance in the matrix
large_num <- max(string_matrix[is.finite(string_matrix)]) + 1
# replace any infinite distances with this distance
sel <- is.infinite(string_matrix)
string_matrix[sel] <- large_num
# convert to a scale of 0 to 1
string_matrix <- string_matrix/large_num
# put it into distance format recognized by clustering functions
string_dist_matrix <- stats::as.dist(string_matrix)
distanceMatrices <- append(distanceMatrices, list(string_dist_matrix))
}
} else {
msg <- paste(c("\n", distances[i], " is not a recognized distance metric."), collapse = "")
stop(msg)
}
}
# add names to each of the distance matrices
names(distanceMatrices) <- distances
return(distanceMatrices)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.