options(stringsAsFactors = F)
options(warn = 1)
get_celltype_medians <- function(tab)
{
tab <- tab[tab$cellType != 0,]
ret <- ddply(tab, ~cellType, colwise(median))
}
mark_nearest_neighbour <- function(G)
{
E(G)$nearest_neigh <- 0
for(i in 1:vcount(G))
{
if(V(G)$type[i] == 2)
{
sel.edges <- incident(G, i)
max.edge <- sel.edges[which.max(E(G)[sel.edges]$weight)]
E(G)[max.edge]$nearest_neigh <- 1
}
}
return(G)
}
cosine_similarity_from_matrix <- function(v, m)
{
m <- as.matrix(m[, names(v), drop = F])
ret <- apply(m, 1, function(x, v) {return(crossprod(x, v)/sqrt(crossprod(x) * crossprod(v)))}, v)
return(ret)
}
cosine_similarity_matrix <- function(m)
{
ret <- t(apply(m, 1, function(x, m) {cosine_similarity_from_matrix(x, m)}, m = m))
return(ret)
}
filter_similarity_matrix <- function(m, T)
{
ret <- t(apply(m, 1, function(x)
{
if(max(x) <= T)
x[x < max(x)] <- 0
else
x[x < T] <- 0
return(x)
}))
return(ret)
}
filter_similarity_matrix_by_rank <- function(m, T)
{
ret <- t(apply(m, 1, function(x)
{
r <- rank(x, ties.method = "first")
r <- max(r) - r + 1
x[r > T] <- 0
return(x)
}))
return(ret)
}
distance_from_attractor_hard_filter <- function(dd, tab, col.names, thresh = 0.5)
{
tab <- tab[, col.names]
w <- apply(tab[,col.names], 1, function(x, thresh) {all(x < thresh)}, thresh = thresh)
if(any(w))
print("Hard removing some connections to unstained landmarks")
dd[, w] <- 0
return(dd)
}
get_distances_from_attractors <- function(m, tab, col.names, dist.thresh)
{
att <- as.matrix(tab[, col.names])
row.names(att) <- as.character(1:nrow(tab))
m <- as.matrix(m[, col.names])
dd <- t(apply(m, 1, function(x, att) {cosine_similarity_from_matrix(x, att)}, att))
dd <- distance_from_attractor_hard_filter(dd, tab, col.names, thresh = 1)
dist.thresh <- quantile(dd, probs = 0.85, na.rm = T)
dist.thresh <- max(c(dist.thresh, 0.5))
dd[is.na(dd)] <- 0 #This can happen if one of the attractors has all 0's for the markers of interest
dd <- filter_similarity_matrix(dd, dist.thresh)
return(dd)
}
add_attractors_labels <- function(G, v)
{
V(G)$name[1:length(v)] <- V(G)$Label[1:length(v)] <- v
return(G)
}
set_visual_attributes <- function(G)
{
att <- V(G)$type == 1
V(G)$r <- 79
V(G)$g <- 147
V(G)$b <- 222
V(G)$size <- 10
V(G)[att]$r <- 255
V(G)[att]$g <- 117
V(G)[att]$b <- 128
V(G)[att]$size <- 20
E(G)$r <- 180
E(G)$g <- 180
E(G)$b <- 180
return(G)
}
add_vertices_to_attractors_graph <- function(G, tab.clustered, tab.median, col.names, dist.thresh = 0.7)
{
dd <- get_distances_from_attractors(tab.clustered, tab.median, col.names, dist.thresh)
n <- nrow(dd)
num.vertices <- length(V(G))
G <- add.vertices(G, n)
v.seq <- (num.vertices + 1):length(V(G))
V(G)[v.seq]$name <- as.character(v.seq)
V(G)[v.seq]$Label <- paste("c", tab.clustered$groups, sep = "")
row.names(dd) <- as.character(v.seq)
for(i in 1:nrow(dd))
{
v <- dd[i,]
v <- v[v > 0]
if(length(v) > 0)
{
e.list <- c(rbind(as.character(num.vertices + i), names(v)))
G <- G + edges(e.list, weight = v)
}
}
# weight <- E(G)$weight
# E(G)$weight <- weight ^ 10
maxx <- maxy <- rep(Inf, vcount(G))
minx <- miny <- rep(-Inf, vcount(G))
maxx[1:num.vertices] <- minx[1:num.vertices] <- V(G)$x[1:num.vertices]
maxy[1:num.vertices] <- miny[1:num.vertices] <- V(G)$y[1:num.vertices]
lay <- layout.kamada.kawai(G, minx = minx, maxx = maxx, miny = miny, maxy = maxy)
colnames(lay) <- c("x", "y")
G <- set.vertex.attribute(G, name = "x", value = lay[, "x"])
G <- set.vertex.attribute(G, name = "y", value = lay[, "y"])
V(G)[1:num.vertices]$type <- 1 #attractor
V(G)[(num.vertices + 1):vcount(G)]$type <- 2 #cell
for(i in names(tab.clustered))
G <- set.vertex.attribute(G, name = i, index = (num.vertices + 1):vcount(G), value = tab.clustered[, i])
G <- set_visual_attributes(G)
return(G)
}
get_vertex_table <- function(G)
{
att <- list.vertex.attributes(G)
ret <- NULL
for(a in att)
{
d <- data.frame(get.vertex.attribute(G, a), stringsAsFactors = FALSE)
if(is.null(ret))
ret <- d
else
ret <- cbind(ret, d, stringsAsFactors = FALSE)
}
names(ret) <- att
return(ret)
}
build_graph <- function(tab, col.names, filtering_T = 0.8)
{
m <- as.matrix(tab[, col.names])
row.names(m) <- tab$cellType
dd <- cosine_similarity_matrix(m)
diag(dd) <- 0
dd[is.na(dd)] <- 0 #This can happen if one of the attractors has all 0's for the markers of interest
if(filtering_T >= 1)
dd <- filter_similarity_matrix_by_rank(dd, filtering_T)
else
dd <- filter_similarity_matrix(dd, filtering_T)
G <- graph.adjacency(dd, mode = "undirected", weighted = T)
n.vertices <- length(V(G))
lay <- layout.kamada.kawai(G)
colnames(lay) <- c("x", "y")
G <- set.vertex.attribute(G, name = "x", value = lay[, "x"])
G <- set.vertex.attribute(G, name = "y", value = lay[, "y"])
for(i in names(tab))
G <- set.vertex.attribute(G, name = i, value = tab[, i])
return(G)
}
cluster_data <- function(tab, col.names, k = 200, algorithm = "", ...)
{
m <- as.matrix(tab[, col.names])
if(algorithm == "clara")
{
print("Performing clara clustering")
groups <- clara(m, k, ...)$clustering
}
else if(algorithm == "hierarchical")
{
print("Performing hierarchical clustering")
dend <- hclust(dist(m), ...)
groups <- cutree(dend, k)
}
print("Clustering done")
tab <- cbind(tab, groups, stringsAsFactors = FALSE)
return(tab)
}
add_inter_clusters_connections <- function(G, col.names, weight.factor)
{
tab <- get_vertex_table(G)
tab <- tab[tab$type == 2,]
m <- as.matrix(tab[, col.names])
row.names(m) <- tab$name
dd <- cosine_similarity_matrix(m)
diag(dd) <- 0
dd[is.na(dd)] <- 0
dist.thresh <- quantile(dd, probs = 0.85, na.rm = T)
dist.thresh <- max(c(dist.thresh, 0.5))
dd <- filter_similarity_matrix(dd, dist.thresh)
dd <- filter_similarity_matrix_by_rank(dd, 3)
e.list <- NULL
for(i in 1:nrow(dd))
{
v <- dd[i,]
v <- v[v > 0]
if(length(v) > 0)
{
e.list <- rbind(e.list, data.frame(a = tab[i, "name"], b = names(v), weight = v, stringsAsFactors = FALSE))
}
}
temp <- as.matrix(e.list[, c("a", "b")])
temp <- t(apply(temp, 1, sort))
e.list <- data.frame(temp, weight = e.list$weight, stringsAsFactors = FALSE)
names(e.list)[1:2] <- c("a", "b")
e.list <- e.list[!duplicated(e.list[, c("a", "b")]),]
e.list.igraph <- c(t(as.matrix(e.list[, c("a", "b")])))
#G <- G + edges(e.list, weight = (v ^ 30))
G <- G + edges(e.list.igraph, weight = e.list$weight * weight.factor)
return(G)
}
process_data <- function(tab, G.attractors = NULL, tab.attractors = NULL, col.names = NULL, att.labels = NULL, dist.thresh = 0.7,
already.clustered = FALSE, inter.cluster.connections = FALSE, col.names.inter_cluster = NULL, inter_cluster.weight_factor = 0.7, ew_influence,
overlap_method = NULL)
{
if(!already.clustered)
{
tab <- cluster_data(tab, col.names)
tab.clustered <- ddply(tab, ~groups, colwise(median))
}
else
tab.clustered <- tab
if(is.null(col.names.inter_cluster) || col.names.inter_cluster == "")
col.names.inter_cluster = col.names
if(is.null(G.attractors))
{
G.attractors <- build_graph(tab.attractors, col.names)
G.complete <- add_vertices_to_attractors_graph(G.attractors, tab.clustered, tab.attractors, col.names, dist.thresh)
G.complete <- complete.forceatlas2(G.complete, first.iter = 50000,
overlap.iter = 20000, ew_influence = ew_influence, overlap_method = "repel")
if(inter.cluster.connections)
{
print("Adding inter-cluster connections with markers:")
print(col.names.inter_cluster)
print(sprintf("Weight factor:%f", inter_cluster.weight_factor))
G.complete <- add_inter_clusters_connections(G.complete, col.names.inter_cluster, weight.factor = inter_cluster.weight_factor)
G.complete <- complete.forceatlas2(G.complete, first.iter = 50000, overlap.iter = 20000,
ew_influence = ew_influence, overlap_method = overlap_method)
}
V(G.attractors)$x <- V(G.complete)$x[1:vcount(G.attractors)]
V(G.attractors)$y <- V(G.complete)$y[1:vcount(G.attractors)]
}
else
{
G.complete <- add_vertices_to_attractors_graph(G.attractors, tab.clustered, tab.attractors, col.names, dist.thresh)
fixed <- rep(FALSE, vcount(G.complete))
fixed[1:vcount(G.attractors)] <- TRUE
G.complete <- complete.forceatlas2(G.complete, first.iter = 50000, overlap.iter = 20000,
overlap_method = "repel", ew_influence = ew_influence, fixed = fixed)
if(inter.cluster.connections)
{
print("Adding inter-cluster connections with markers:")
print(col.names.inter_cluster)
print(sprintf("Weight factor:%f", inter_cluster.weight_factor))
G.complete <- add_inter_clusters_connections(G.complete, col.names.inter_cluster, weight.factor = inter_cluster.weight_factor)
G.complete <- complete.forceatlas2(G.complete, first.iter = 50000, overlap.iter = 20000,
overlap_method = overlap_method, ew_influence = ew_influence, fixed = fixed)
}
}
G.complete <- add_attractors_labels(G.complete, att.labels)
V(G.complete)$name <- gsub(".fcs", "", V(G.complete)$name)
return(list(G.attractors = G.attractors, G.complete = G.complete, tab.attractors = tab.attractors, tab = tab, col.names = col.names))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.