## Heteromotility plotting and analysis functions
library(devtools)
#source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## Plotting
pca_plots <- function(df, df.class, df.plate, plot_path, class_num, clust_pcs=10, prefix=NULL){
# Generates 2D PCA plots with a defined set of labels and k-means clusters.
#
# Parameters
# ----------
# df : data frame, N x M. samples as rows, features as columns.
# df.class : vector numeric. N x 1. class labels.
# df.plate : vector numeric. N x 1. plate number labels.
# plot_path : string. directory for plot outputs.
# class_num : integer. k argument for k-means clustering.
# clust_pcs : integer. number of PCs to use for k-means clustering.
# prefix : character vector. prefix for filenames. Default = None.
#
# Returns
# -------
# df.comp : data frame. N x 30 of principal component scores.
#
require(ggplot2)
# PCA
df.pca_eigens <- prcomp(df)
df.comp <- data.frame(df.pca_eigens$x[,1:30])
df.k <- kmeans(df.comp[,1:clust_pcs], class_num, nstart=25, iter.max=1000)
# plot PC1 vs PC2
pca_theme <- theme(axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
legend.position="right",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank())
pca_k <- ggplot(df.comp, aes(x = PC1, y = PC2, col = as.factor(df.k$cluster))) + geom_point(size=1) + pca_theme + scale_colour_discrete(name = "Cluster")
ggsave(pca_k, filename = paste(prefix, "pca_k.png", sep=''), path = plot_path, width = 4, height = 4, units = "in")
pca_class <- ggplot(df.comp, aes(x = PC1, y = PC2, col = as.factor(df.class))) + geom_point(size=1) + pca_theme + scale_colour_discrete(name = "Class")
ggsave(pca_class, filename = paste(prefix, "pca_class.png", sep=''), path = plot_path, width = 4, height = 4, units = "in")
pca_plate <- ggplot(df.comp, aes(x = PC1, y = PC2, col = as.factor(df.plate))) + geom_point(size=1) + pca_theme + scale_colour_discrete(name = "Plate")
ggsave(pca_plate, filename = paste(prefix, "pca_plate.png", sep=''), path = plot_path, width = 4, height = 4, units = "in")
return(df.comp)
}
tsne_plots <- function(df, df.groups, df.class, df.plate, plot_path, df.comp = NULL, class_num = NULL, experiment = NULL, method='ward.D2', perplexity=NULL, max_iter=5000, width=4, height=4){
# Generates tSNE plots with cluster, class, and plate labels.
#
# Parameters
# ----------
# df : data frame, N x M. samples as rows, features as columns.
# df.groups : vector numeric. N x 1. cluster labels.
# df.class : vector numeric. N x 1. class labels.
# df.plate : vector numeric. N x 1. plate number labels.
# plot_path : string. directory for plot outputs.
# df.comp : data frame. N x D matrix of principal component scores, if plotting
# of both raw and PCA values is desired.
# class_num : integer. k parameter for h-clustering of PCA values.
# experiment : string. Used as the prefix title for plot exports.
# method : string. hierarchical clustering method for PCA values.
# perplexity : vector numeric. perplexity values to try.
# if NULL, a default set [10,...,70] is used.
# max_iter : integer. maximum iterations of the tSNE Barnes-Hut algorithm.
# width : integer. inches width of plots.
# height : integer. inches height of plots.
#
# Returns
# -------
# None.
#
require(Rtsne)
require(ggplot2)
no_axes_bg <- theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position="none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank())
no_axes_w_legend <- theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position="right",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank())
if (is.null(perplexity)){
pset = c(10, 20, 30, 50, 70)
} else {
pset = c(perplexity)
}
costs <- numeric(length(pset))
i <- 1
for (perplexity in pset){
df.tsne <- Rtsne(df, perplexity = perplexity, max_iter=max_iter)
tsne_class <- ggplot(data.frame(df.tsne$Y), aes(x=X1, y=X2, col = as.factor(df.class))) + geom_point(size=1) + labs(title = paste(experiment, 'tSNE', sep = ' ')) + no_axes_bg
ggsave(tsne_class, path = plot_path, filename = paste(experiment,"tsne_class_p", perplexity, ".png", sep=''), width = width, height = height, units = "in")
tsne_wardD <- ggplot(data.frame(df.tsne$Y), aes(x=X1, y=X2, col = as.factor(df.groups))) + geom_point(size=1) + labs(title = paste(experiment, 'tSNE', sep = ' ')) + no_axes_bg
ggsave(tsne_wardD, path = plot_path, filename = paste(experiment,"tsne_hclust_p", perplexity, ".png", sep=''), width = width, height = height, units = "in")
tsne_plate <- ggplot(data.frame(df.tsne$Y), aes(x=X1, y=X2, col = as.factor(df.plate))) + geom_point() + labs(title = paste(experiment, 'tSNE', sep = ' ')) + no_axes_w_legend
ggsave(tsne_plate, path = plot_path, filename = paste(experiment,"tsne_plate_p", perplexity, ".png", sep=''), width = width+3, height = height, units = "in") # extra width for legend
costs[i] <- sum(df.tsne$costs)
}
if (!is.null(df.comp)){
df.comp.tsne <- Rtsne(df.comp, perplexity = 50)
df.comp.fit <- hclust(dist(df.comp), method = method)
df.comp.groups <- cutree(df.comp.fit, k = class_num)
tsne_pca_class <- ggplot(data.frame(df.comp.tsne$Y), aes(x=X1, y=X2, col = as.factor(df.class))) + geom_point(size=1) + labs(title = paste(experiment, 'tSNE', sep = ' ')) + no_axes_bg
ggsave(tsne_pca_class, path = plot_path, filename = "tsne_pca_class.png", width = 4, height = 4, units = "in")
tsne_pca_wardD <- ggplot(data.frame(df.comp.tsne$Y), aes(x=X1, y=X2, col = as.factor(df.comp.groups))) + geom_point(size=1) + labs(title = paste(experiment, 'tSNE', sep = ' ')) + no_axes_bg
ggsave(tsne_pca_wardD, path = plot_path, filename = "tsne_pca_hclust.png", width = 4, height = 4, units = "in")
}
}
## Plotting Colors and Themes
fgf_class_colors <- function(class_vector) {
return_vector <- numeric(0)
for (i in class_vector) {
if (i == "FGF2+") {
return_vector <- append(return_vector, 'blue')
} else {
return_vector <- append(return_vector, 'red')
}
}
return(return_vector)
}
mef_class_colors <- function(class_vector) {
return_vector <- numeric(0)
for (i in class_vector) {
if (i == "MycRas") {
return_vector <- append(return_vector, 'purple')
} else {
return_vector <- append(return_vector, 'blue')
}
}
return(return_vector)
}
## Clustering
som_plots <- function(df, grid_size, class_num, plot_path, method="ward.D2", iter=500){
require(kohonen)
som_grid <- somgrid(xdim = grid_size, ydim = grid_size, topo = "hexagonal") # set 20x20 hexagonal perceptron grid
df.som_model <- som(as.matrix(df), grid=som_grid, rlen = iter, alpha = c(0.05, 0.01), keep.data = TRUE)
require(RColorBrewer)
somcol <- brewer.pal(11, 'RdBu')
png(filename = paste(plot_path, "som_changes.png", sep = ''), width = 4, height = 4, units = "in", res = 600)
plot(df.som_model, type = "changes")
dev.off()
png(filename = paste(plot_path, "som_counts.png", sep = ''), width = 4, height = 4, units = "in", res = 600)
plot(df.som_model, type="count")
dev.off()
png(filename = paste(plot_path, "som_neighbor_distances.png", sep = ''), width = 4, height = 4, units = "in", res = 600)
plot(df.som_model, type="dist.neighbours")
dev.off()
df.som_cluster <- cutree( hclust(dist(data.frame(df.som_model$codes)), method = method), k = class_num)
pretty_palette <- c("#1f77b4", '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2')
png(filename = paste(plot_path, "som_clusters.png", sep = ''), width = 4, height = 4, units = "in", res = 600)
plot(df.som_model, type="mapping", bgcol = pretty_palette[df.som_cluster], main = "Clusters")
add.cluster.boundaries(df.som_model, df.som_cluster)
dev.off()
}
heatmap2_plots <- function(df, df.class, plot_path, class_num, method="ward.D2", prefix=''){
# Heirarchical clustering
require(gplots)
df.d <- dist(df, method = "euclidean")
df.fit <- hclust(df.d, method = method)
df.groups <- cutree(df.fit, k = class_num)
# make RowSideColors vector for heatmap.2
rsc <- as.character(df.groups)
hclust_fnx <- function(x) hclust(x, method=method)
out_file <- paste(plot_path, prefix, "heatmap_groups_only.png", sep ='')
png(out_file, width = 10, height = 10, units = "in", res = 300)
heatmap.2(as.matrix(df), hclustfun = hclust_fnx, scale = "row", trace = 'none', col=redgreen, RowSideColors = rsc, margins = c(10,10))
dev.off()
}
heatmap3_plots <- function(df, df.class, plot_path, class_num, method="ward.D2", prefix=''){
# Heirarchical clustering
require(gplots)
df.d <- dist(df, method = "euclidean")
df.fit <- hclust(df.d, method = method)
df.groups <- cutree(df.fit, k = class_num)
# make RowSideColors vector for heatmap.3
r1 <- fgf_class_colors(df.class)
r2 <- as.character(df.groups)
rsc <- t( cbind(r1,r2) )
rownames(rsc) <- c("Class", "Cluster")
hclust_fnx <- function(x) hclust(x, method=method)
out_file <- paste(plot_path, prefix, "heatmap_class.png", sep ='')
png(out_file, width = 10, height = 10, units = "in", res = 300)
heatmap.3(as.matrix(df), hclustfun = hclust_fnx, scale = "row", trace = 'none', col=redgreen, RowSideColors = rsc, margins = c(10,10))
dev.off()
return(df.groups)
}
## Cluster Membership
cluster_props <- function(df, df.class, c1, c2, df.groups){
# Generic cluster proportion calculation
# Parameters
# ----------
# df : data.frame
# df.class : factor. vector of unique class labels.
# c1 : string or numeric. class in df.class.
# c2 : string or numeric. class in df.class.
# df.groups : factor, numeric. cluster assignments.
#
if (is.factor(df.groups)){
df.groups = as.numeric(df.groups)
}
class1 = rep(0, max(df.groups))
class2 = rep(0, max(df.groups))
for (i in 1:nrow(df)){
if (df.class[i] == c1){
class1[df.groups[i]] <- class1[df.groups[i]] + 1
}
else {
class2[df.groups[i]] <- class2[df.groups[i]] + 1
}
}
props1 <- class1/sum(class1)
props2 <- class2/sum(class2)
return( list(c1=class1, c2=class2, c1_p = props1, c2_p = props2) )
}
fgf_cluster_props <- function(class_df, df.groups){
class1 = rep(0, max(df.groups))
class2 = rep(0, max(df.groups))
for (i in 1:nrow(class_df)){
if (class_df$class[i] == 'FGF2+'){
class1[df.groups[i]] <- class1[df.groups[i]] + 1
}
else {
class2[df.groups[i]] <- class2[df.groups[i]] + 1
}
}
props1 <- class1/sum(class1)
props2 <- class2/sum(class2)
return( list(fgf=class1, nofgf=class2, fgf_p = props1, nofgf_p = props2) )
}
plot_cluster_memberships <- function(cluster_membership, plot_path, c1='Young', c2='Aged', bar_order=NULL, prefix=''){
d <- data.frame(rbind(cluster_membership$c1_p, cluster_membership$c2_p))
clust_nb <- ncol(d)
names = c()
for (i in 1:clust_nb){ names[i] = paste('Cluster', i, sep=' ') }
colnames(d) <- names
d$class <- c(c1, c2)
d.m <- melt(d, id.vars = "class")
if (!is.null(bar_order)){
d.m$variable <- factor(d.m$variable, levels=bar_order)
}
cluster_theme <- theme(
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank(),
axis.line.x = element_line(size = 0.5),
axis.line.y = element_line(size = 0.5),
axis.text.x = element_text(size = 10),
text= element_text(size = 10)
)
clust_p <- ggplot(d.m, aes(variable, value, fill = class)) + geom_bar(position = "dodge", stat = "identity")
clust_p <- clust_p + cluster_theme + scale_y_continuous(expand = c(0,0), limits = c(0, 1.10*max(d.m$value))) + labs(title = "Cluster Membership", y = "Proportion in Cluster", x = "")
ggsave(clust_p, filename = paste(prefix,"cluster_membership.png", sep=''), path = plot_path, width = 4, height = 3, units = "in")
}
fgf_plot_cluster_memberships <- function(cluster_membership, plot_path){
d <- data.frame(rbind(cluster_membership$fgf_p, cluster_membership$nofgf_p))
clust_nb <- ncol(d)
names = c()
for (i in 1:clust_nb){ names[i] = paste('Cluster', i, sep=' ') }
colnames(d) <- names
d$class <- c("FGF2+", "FGF2-")
d.m <- melt(d, id.vars = "class")
cluster_theme <- theme(
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank(),
axis.line.x = element_line(size = 0.5),
axis.line.y = element_line(size = 0.5),
axis.text.x = element_text(size = 10),
text= element_text(size = 10)
)
clust_p <- ggplot(d.m, aes(variable, value, fill = class)) + geom_bar(position = "dodge", stat = "identity")
clust_p <- clust_p + cluster_theme + scale_y_continuous(expand = c(0,0), limits = c(0, 1.10*max(d.m$value))) + labs(title = "Cluster Membership", y = "Proportion in Cluster", x = "")
ggsave(clust_p, filename = "cluster_membership.png", path = plot_path, width = 4, height = 3, units = "in")
}
mef_cluster_props <- function(class_df, df.groups){
class1 = rep(0, max(df.groups))
class2 = rep(0, max(df.groups))
for (i in 1:nrow(class_df)){
if (class_df$class[i] == 'MycRas'){
class1[df.groups[i]] <- class1[df.groups[i]] + 1
}
else {
class2[df.groups[i]] <- class2[df.groups[i]] + 1
}
}
props1 <- class1/sum(class1)
props2 <- class2/sum(class2)
return( list(mycras=class1, wt=class2, mycras_p = props1, wt_p = props2) )
}
mef_plot_cluster_memberships <- function(cluster_membership, plot_path, prefix=''){
d <- data.frame(rbind(cluster_membership$mycras_p, cluster_membership$wt_p))
names = c()
for (i in 1:ncol(d)){ names[i] = paste('Cluster', i, sep=' ') }
colnames(d) <- names
d$class <- c("MycRas", "WT")
d.m <- melt(d, id.vars = "class")
cluster_theme <- theme(
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank(),
axis.line.x = element_line(size = 0.5),
axis.line.y = element_line(size = 0.5),
axis.text.x = element_text(size = 10),
text= element_text(size = 10)
)
clust_p <- ggplot(d.m, aes(variable, value, fill = class)) + geom_bar(position = "dodge", stat = "identity")
clust_p <- clust_p + cluster_theme + scale_y_continuous(expand = c(0,0), limits = c(0, 1.10*max(d.m$value))) + labs(title = "Cluster Membership", y = "Proportion in Cluster", x = "")
ggsave(clust_p, filename = paste(prefix, "cluster_membership.png", sep=''), path = plot_path, width = 4, height = 3, units = "in")
}
density_cluster_membership <- function(df.groups, density.state, plot_path, prefix='', chi2test=T){
n_groups <- max(df.groups)
n_dens <- max(density.state)
groups_mat <- matrix(nrow=n_groups, ncol=n_dens, 0)
# Generate matrix of all proportions of each density state in each group
for (i in 1:length(df.groups)){
groups_mat[df.groups[i], density.state[i]] = groups_mat[df.groups[i], density.state[i]] + 1
}
density_contingency_table_preftest(groups_mat, plot_path, prefix=prefix)
# Get proportion of density state in each cluster
sums_mat <- t(replicate(n_groups, apply(groups_mat, 2, sum)))
props_mat <- groups_mat/sums_mat
props_df <- data.frame(props_mat)
densclust_names <- c()
for (i in 1:n_dens){densclust_names <- c(densclust_names, paste('Density ', i, sep=''))}
colnames(props_df) <- densclust_names
return(props_df)
}
plot_density_cluster_membership <- function(props_df, plot_path, prefix=''){
# props_df : data.frame of State x Density_Cluster, containing proportion of each density cluster
# in each state.
require(reshape2)
cluster_theme <- theme(
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank(),
axis.line.x = element_line(size = 0.5),
axis.line.y = element_line(size = 0.5),
axis.text.x = element_text(size = 10),
text= element_text(size = 10)
)
clust_names <- c()
for (i in 1:nrow(props_df)){clust_names <- c(clust_names, paste('Cluster ', i, sep=''))}
props_df$Cluster <- clust_names
props.m <- melt(props_df, id.vars=c('Cluster'))
clust_p <- ggplot(props.m, aes(variable, value, fill = as.factor(Cluster))) + geom_bar(position = "dodge", stat = "identity")
clust_p <- clust_p + cluster_theme + scale_y_continuous(expand = c(0,0), limits = c(0, 1.10*max(props.m$value))) + labs(title = "Density ~ Cluster Membership", y = "Proportion in Cluster", x = "")
ggsave(clust_p, path=plot_path, filename = paste(prefix, '_density_clusterprops.png', sep=''), height=3, width=4, units='in')
clust_p2 <- ggplot(props.m, aes(Cluster, value, fill = variable)) + geom_bar(position = "dodge", stat = "identity")
clust_p2 <- clust_p2 + cluster_theme + scale_y_continuous(expand = c(0,0), limits = c(0, 1.10*max(props.m$value))) + labs(title = "Cluster Membership ~ Density", y= 'Proportion in Cluster', x = "")
ggsave(clust_p2, path=plot_path, filename = paste(prefix, '_cluster_densityprops.png', sep=''), height=3, width=4, units='in')
}
pca_loading_variation <- function(df, plot_path, prefix=''){
# Saves PCA loadings and percentage variation captured at different levels of components
df.pc <- princomp(df)
aload <- abs( with(df.pc, unclass(loadings)) ) # get abs of all loadings
pload <- sweep(aload, 2, colSums(aload), "/") # proportion per PC
write.csv(x = pload, file = paste(plot_path, prefix, 'pca_loading_proportional.csv', sep=''), row.names = F)
# get prop variance
df.pca_eigens <- prcomp(df)
s <- summary(df.pca_eigens)
capture.output(s, file = paste(plot_path, prefix, 'pca_var_explained.txt', sep=''))
write.csv(s$importance[3,], file = paste(plot_path, prefix, 'pca_cum_var_explained.csv', sep=''), row.names = F)
}
## Feature Specific Plots
add_sig_bars <- function(plot, plt_df, test_results, y_shift = 0.05, alpha = 0.05, text.size=5, lab.space=0.003){
# Adds significance bars to a provided ggplot object
#
# Parameters
# ----------
# plot : ggplot geom_bar object.
# plt_df : longform data frame with Mean and SE for a set of variables
# test_results : wideform data frame with a column titled adj_p_val of p-values to use for sig bars
# y_shift : float. Distance above highest error bar val to place sig bar.
# alpha : float, (0, 1). significance level.
# text.size : integer.
# lab.space : float. Distance above bar to place sig mark.
#
# Returns
# -------
# plot : ggplot geom_bar object with error bars.
sig <- data.frame(matrix(nrow = length(levels(plt_df$variable)), ncol=2))
colnames(sig) <- c('variable', 'adj_p_val')
for (i in 1:length(levels(plt_df$variable))){
sig[i,] <- c(levels(plt_df$variable)[i], test_results[test_results$Feature_Name == levels(plt_df$variable)[i],]$adj_p_value)
}
sig$adj_p_val <- as.numeric(sig$adj_p_val)
plt_df$high_pt <- plt_df$Mean + plt_df$SE
if (sum(colnames(plt_df)=='class') > 0){
max_arr <- acast(plt_df, variable ~ class, value.var = 'high_pt')
} else {
max_arr <- acast(plt_df, variable ~ groups, value.var = 'high_pt')
}
y <- apply(max_arr, 1, max) + y_shift
sig_marks <- data.frame(matrix(nrow=sum(sig$adj_p_val < alpha), ncol=5))
colnames(sig_marks) <- c('x', 'xend', 'ylow', 'ylow', 'yhigh')
j <- 1
for (i in 1:nrow(sig)){
if (sig[i,]$adj_p_val < alpha){
sig_marks[j,] <- c(0.8+(i-1), 1.2+(i-1), y[i], y[i], y[i] + 0.02)
j <- j + 1
}
}
if (nrow(sig_marks) > 0 ){
for (i in 1:nrow(sig_marks)){
plot <- plot + sigbra(sig_marks[i,1], sig_marks[i,2], sig_marks[i,3], sig_marks[i,4], sig_marks[i,5], label = '*', text.size = text.size, lab.space = lab.space)
}
}
return(plot)
}
plot_class_feature_means <- function(df, df.class, plot_path, ttest_results=NULL, width = NULL, height = NULL, limited = F, prefix = NULL){
require(reshape2)
require(ggplot2)
if (limited == T){
limited_features <- c("total_distance",
"net_distance",
"linearity",
"spearmanrsq",
"progressivity",
"avg_speed",
"MSD_slope",
"hurst_RS",
"nongauss",
"rw_kurtosis01",
"rw_kurtosis05",
"avg_moving_speed05",
"time_moving05",
"autocorr_5")
class_df <- df[limited_features]
class_df$class <- as.factor(df.class)
} else {
class_df <- df
class_df$class <- as.factor(df.class)
}
sem <- function(x){sd(x)/sqrt(length(x))}
means <- aggregate(. ~ class, data = class_df, FUN = mean)
stderr <- aggregate(. ~ class, data = class_df, FUN = sem)
means.m <- melt(means, value.name = "Mean")
stderr.m <- melt(stderr, value.name = "SE")
plt_df <- merge(means.m, stderr.m)
write.csv(plt_df, file = paste(plot_path, "class_feature_means.txt"), row.names = F)
if (is.null(width)){
width = 8
}
if (is.null(height)){
height = 5
}
feature_theme <- theme(
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1),
text= element_text(size = 10),
plot.title = element_text(hjust = 0.5)
)
mean_limits <- aes(ymax = plt_df$Mean + plt_df$SE, ymin = plt_df$Mean - plt_df$SE)
p_mean <- ggplot(plt_df, aes(variable, Mean, fill = class)) + geom_bar(position = "dodge", stat = "identity") + geom_errorbar(mean_limits, position = position_dodge(0.9), width = 0.3) + feature_theme + labs(title = "Feature Means", x = "Feature", y = "Mean (+/- SE)")
## add sig bars
# Find sig levels
if (!is.null(ttest_results)){
p_mean <- add_sig_bars(p_mean, plt_df, ttest_results)
}
ggsave(p_mean, filename = paste(prefix, 'class_feature_means.png', sep = ''), path = plot_path, width = width, height = height, units = 'in')
}
plot_class_feature_medians <- function(df, df.class, plot_path){
require(reshape2)
require(ggplot2)
class_df <- df
class_df$class <- as.factor(df.class)
sem <- function(x){sd(x)/sqrt(length(x))}
medians <- aggregate(. ~ class, data = class_df, FUN = median)
stderr <- aggregate(. ~ class, data = class_df, FUN = sem)
medians.m <- melt(medians, value.name = "Median")
stderr.m <- melt(stderr, value.name = "SE")
plt_df <- merge(medians.m, stderr.m)
write.csv(plt_df, file = paste(plot_path, "class_feature_medians.txt"), row.names = F)
mean_limits <- aes(ymax = plt_df$Median + plt_df$SE, ymin = plt_df$Median - plt_df$SE)
p_median <- ggplot(plt_df, aes(variable, Median, fill = class)) + geom_bar(position = "dodge", stat = "identity") + geom_errorbar(mean_limits, position = position_dodge(0.9), width = 0.3) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text= element_text(size = 10)) + labs(title = "Feature Means", x = "Feature", y = "Mean (+/- SE)")
ggsave(p_median, filename = 'feature_medians.png', path = plot_path, width = 8, height = 5, units = 'in')
}
plot_cluster_features <- function(df, df.groups, plot_path, anova_results=NULL, width = 7, height = 4, limited = F, prefix=NULL){
require(reshape2)
require(ggplot2)
if (limited == T){
limited_features <- c("total_distance",
"net_distance",
"linearity",
"spearmanrsq",
"progressivity",
"avg_speed",
"MSD_slope",
"hurst_RS",
"nongauss",
"rw_kurtosis01",
"rw_kurtosis05",
"avg_moving_speed05",
"time_moving05",
"autocorr_5")
group_df <- df[limited_features]
group_df$groups <- as.factor(df.groups)
} else {
group_df <- df
group_df$groups <- as.factor(df.groups)
}
sem <- function(x){sd(x)/sqrt(length(x))}
means = aggregate(. ~ groups, data = group_df, FUN = mean)
medians = aggregate(. ~ groups, data = group_df, FUN = median)
stderr <- aggregate(. ~ groups, data = group_df, FUN = sem)
# melt data
means.m <- melt(means, value.name = "Mean")
medians.m <- melt(medians, value.name = "Median")
stderr.m <- melt(stderr, value.name = "SE")
# make data.frame from melted data
plt_df <- merge(means.m, stderr.m)
plt_df <- merge(plt_df, medians.m)
write.csv(plt_df, file = paste(plot_path, prefix, "cluster_feature_means.csv", sep =''), row.names = F)
feature_theme <- theme(
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1),
text= element_text(size = 10),
plot.title = element_text(hjust = 0.5)
)
mean_limits <- aes(ymax = plt_df$Mean + plt_df$SE, ymin = plt_df$Mean - plt_df$SE)
p_mean <- ggplot(plt_df, aes(variable, Mean, fill = groups)) + geom_bar(position = "dodge", stat = "identity") + geom_errorbar(mean_limits, position = position_dodge(0.9), width = 0.3) + labs(title = "Feature Means", x = "Feature", y = "Mean (+/- SE)") + feature_theme
# Add sig bars
if (!is.null(anova_results)){
p_mean <- add_sig_bars(p_mean, plt_df, anova_results)
}
ggsave(p_mean, filename = paste(prefix, "cluster_feature_means.png", sep =''), path = plot_path, width = width, height = height, units = "in")
median_limits <- aes(ymax = plt_df$Median + plt_df$SE, ymin = plt_df$Median - plt_df$SE)
p_median <- ggplot(plt_df, aes(variable, Median, fill = groups)) + geom_bar(position = "dodge", stat = "identity") + geom_errorbar(median_limits, position = position_dodge(0.9), width = 0.3) + labs(title = "Feature Medians", x = "Feature", y = "Median (+/- SE)") + feature_theme
ggsave(p_median, filename = paste(prefix, "cluster_feature_medians.png", sep =''), path = plot_path, width = width, height = height, units = "in")
}
single_class_plots <- function(class_df, plot_path, class_num){
fgf_sub <- subset(class_df, class_df$class == "FGF2+")
fgf_sub.noturn <- data.frame(scale(fgf_sub[1:56]))
nofgf_sub <- subset(class_df, class_df$class == "FGF2-")
nofgf_sub.noturn <- data.frame(scale(nofgf_sub[1:56]))
fgf_sub.t <- Rtsne(as.matrix(fgf_sub.noturn), perplexity = 50)
nofgf_sub.t <- Rtsne(as.matrix(nofgf_sub.noturn), perplexity = 50)
fgf_sub.d <- dist(fgf_sub.noturn)
fgf_sub.fit <- hclust(fgf_sub.d, method = "ward.D")
fgf_sub.groups <- cutree(fgf_sub.fit, k = class_num)
nofgf_sub.d <- dist(nofgf_sub.noturn)
nofgf_sub.fit <- hclust(nofgf_sub.d, method = "ward.D")
nofgf_sub.groups <- cutree(nofgf_sub.fit, k = class_num)
no_axes_bg <- theme(axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position="none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank())
tsne_groups_fgf <- ggplot(data.frame(fgf_sub.t$Y), aes(x=X1, y=X2, col = as.factor(fgf_sub.groups))) + geom_point(size=1) + labs(title = 'SC FGF+ tSNE') + no_axes_bg
ggsave(tsne_groups_fgf, path = plot_path, filename = "tsne_groups_fgf_only.png", width = 4, height = 4, units = "in")
tsne_groups_nofgf <- ggplot(data.frame(nofgf_sub.t$Y), aes(x=X1, y=X2, col = as.factor(nofgf_sub.groups))) + geom_point(size=1) + labs(title = 'SC FGF- tSNE') + no_axes_bg
ggsave(tsne_groups_nofgf, path = plot_path, filename = "tsne_groups_nofgf_only.png", width = 4, height = 4, units = "in")
plot_cluster_features(fgf_sub.noturn, fgf_sub.groups, file_prefix = "fgfonly_")
plot_cluster_features(nofgf_sub.noturn, nofgf_sub.groups, file_prefix = "nofgfonly_")
both_subs <- rbind(fgf_sub.noturn, nofgf_sub.noturn)
both_subs <- data.frame( scale(both_subs) )
both_subs.groups <- c(fgf_sub.groups, (nofgf_sub.groups + 4))
plot_cluster_features(both_subs, both_subs.groups, file_prefix = "bothsub_")
}
## Statistics
ttest_feature <- function(df, df.class, feature_name, class1 = 'Aged', class2 = 'Young', plot_path=FALSE, prefix=''){
class1_sub <- subset(df, df.class == class1)
class2_sub <- subset(df, df.class == class2)
t_result <- t.test(class1_sub[,feature_name], class2_sub[,feature_name], alternative = "two.sided")
if (is.character(plot_path)){
capture.output(t_result, file = paste(plot_path, prefix, "ttest_", feature_name, ".txt", sep=''))
}
return(t_result$p.value)
}
fgf_ttest_feature <- function(class_df, feature_name, plot_path, prefix=''){
fgf2_sub <- subset(class_df, class_df$class == "FGF2+")
nofgf2_sub <- subset(class_df, class_df$class == "FGF2-")
t_result <- t.test(fgf2_sub[,feature_name], nofgf2_sub[,feature_name], alternative = "two.sided")
capture.output(t_result, file = paste(plot_path, prefix, "ttest_", feature_name, ".txt", sep=''))
return(t_result$p.value)
}
mef_ttest_feature <- function(class_df, feature_name, plot_path, prefix=''){
myc_sub <- subset(class_df, class_df$class == "MycRas")
wt_sub <- subset(class_df, class_df$class == "WT")
t_result <- t.test(myc_sub[,feature_name], wt_sub[,feature_name], alternative = "two.sided")
capture.output(t_result, file = paste(plot_path, prefix, "ttest_", feature_name, ".txt", sep=''))
return(t_result$p.value)
}
anova_feature <- function(df, df.groups, feature_name, plot_path, prefix=''){
groups <- as.factor(df.groups)
av <- aov(df[,feature_name] ~ groups, data = df)
capture.output(summary(av), file = paste(plot_path, prefix, "anova_", feature_name, ".txt", sep =''))
p <- summary(av)[[1]][['Pr(>F)']][1]
return(p)
}
contingency_table_preftest <- function(df.cluster_membership, df.groups, plot_path, experiment = NULL){
if (is.factor(df.groups)){
df.groups = as.numeric(df.groups)
}
ct <- matrix(0, ncol = max(df.groups), nrow = 2)
ct[1,] <- df.cluster_membership$c1
ct[2,] <- df.cluster_membership$c2
# Fisher test
fisher <- fisher.test(ct)
capture.output(fisher, file = paste(plot_path, experiment, '_fisher.txt', sep = ''))
# Chi^2 test
chi2 <- chisq.test(ct)
capture.output(fisher, file = paste(plot_path, experiment, '_chi2.txt', sep = ''))
}
fgf_contingency_table_preftest <- function(df.cluster_membership, df.groups, plot_path, experiment = NULL){
ct <- matrix(0, ncol = max(df.groups), nrow = 2)
ct[1,] <- df.cluster_membership$fgf
ct[2,] <- df.cluster_membership$nofgf
# Fisher test
fisher <- fisher.test(ct)
capture.output(fisher, file = paste(plot_path, experiment, '_fisher.txt', sep = ''))
# Chi^2 test
chi2 <- chisq.test(ct)
capture.output(fisher, file = paste(plot_path, experiment, '_chi2.txt', sep = ''))
}
mef_contingency_table_preftest <- function(df.cluster_membership, df.groups, plot_path, experiment = NULL){
ct <- matrix(0, ncol = max(df.groups), nrow = 2)
ct[1,] <- df.cluster_membership$mycras
ct[2,] <- df.cluster_membership$wt
# Fisher test
fisher <- fisher.test(ct)
capture.output(fisher, file = paste(plot_path, experiment, '_fisher.txt', sep = ''))
# Chi^2 test
chi2 <- chisq.test(ct)
capture.output(chi2, file = paste(plot_path, experiment, '_chi2.txt', sep = ''))
}
density_contingency_table_preftest <- function(groups_mat, plot_path, prefix=''){
chi2 <- chisq.test(t(groups_mat))
capture.output(chi2, file = paste(plot_path, prefix, '_chi2.txt', sep = ''))
}
density_regression_tests <- function(df, density){
df$density <- density
features <- colnames(df)
features <- features[features != 'density']
results <- data.frame(matrix(nrow=length(features), ncol=3, 0))
colnames(results) <- c('Fstat', 'Rsq', 'p')
for (i in 1:length(features)){
f <- features[i]
f.lm <- lm(df[,f] ~ density, df)
f.s <- summary(f.lm)
rsq <- f.s$r.squared
fstat <- f.s$fstatistic[1]
p <- pf(f.s$fstatistic[1], f.s$fstatistic[2], f.s$fstatistic[3], lower.tail = F)
results[i,] <- c(fstat, rsq, p)
}
results$feature <- features
return(results)
}
#####
real_units_speed <- function(df.raw, df.class, time_interval=6.5, unit_multiplier=0.325){
# Find the mean displacement speed of each cell category in df.class
# in real units, as specified by a multiplier in unit_multiplier
# which converts pixels to real distance
# i.e. 6.5 um / px on the Hamamatsu camera
# Parameters
# ----------
# df.raw : data frame with raw (unscaled) feature values
# df.class : vector of class labels
# time_interval : float, time in minutes per frame.
# unit_multiplier : float, um distance per pixel (make sure to include effect of Mag!).
# Returns
# -------
# real_speeds : data frame, k x 3, class name, <speed in um/min>, speed SEM
#
real_speeds = data.frame(matrix(nrow = length(unique(df.class)), ncol = 3))
for ( i in 1:length(unique(df.class)) ){
real_speeds[i,] = c(unique(df.class)[i],
mean(df.raw[df.class==unique(df.class)[i],]$avg_speed)/time_interval * unit_multiplier,
(sd(df.raw[df.class==unique(df.class)[i],]$avg_speed)/time_interval * unit_multiplier)/sqrt(sum(df.class==unique(df.class)[i])))
}
colnames(real_speeds) <- c('Class', 'SpeedMean', 'SpeedSE')
return(real_speeds)
}
## Cluster Validation
clustval <- function(df, class_num, plot_path, prefix = NULL, method = 'ward.D2'){
df.groups <- cutree(hclust(dist(df), method = "ward.D"), k = class_num)
require(NbClust)
df.nbclust <- NbClust(df, min.nc = 2, max.nc = (class_num + 3), method = method)
capture.output(df.nbclust$All.index, file = paste(plot_path, prefix, "nbclust_indices.txt", sep = ''))
capture.output(df.nbclust$All.CriticalValues, file = paste(plot_path, prefix, "nbclust_critvals.txt", sep = ''))
capture.output(df.nbclust$Best.nc, file = paste(plot_path, prefix, "nbclust_bestnc.txt", sep = ''))
# library(clv)
# df.scattd <- cls.scatt.data(df, df.groups, dist="euclidean")
# df.conn <- connectivity(df, df.groups, neighbour.num = 10)
# df.dunn <- clv.Dunn(df.scattd, "complete", intercls = "complete")
#library(clusteval)
#hclust_fun <- function(x, num_clusters){ cutree(hclust(dist(x), method = "ward.D"), num_clusters)}
#df.clustomit <- clustomit(df, class_num, hclust_fun)
#capture.output(df.clustomit$boot_similarity, file = paste(plot_path, prefix, "clustomit_bootsimilarity.txt", sep = ''))
#capture.output(df.clustomit$boot_aggregate, file = paste(plot_path, prefix, "clustomit_bootaggregate.txt", sep = ''))
}
### LESS COMMON
## Cluster centroid distances
centroid_distances <- function(df, df.groups){
# Takes data.frame and factor of cluster assignments
# Returns distance matrix b/w centroids of each cluster
# d = [d(i1,i2), ]
# [d(i1,i3), d(i2,i3)], &c.
numc <- max(df.groups)
centroids <- matrix(0, numc, ncol(df))
for (i in 1:numc){
clust <- df[which(df.groups == i),]
centroid <- colMeans(clust, na.rm = T)
centroids[i,] = centroid
}
d <- dist(centroids, diag = T, upper = T)
return(d)
}
diff_centroid_distance <- function(class_df, df.groups){
fgf2_sub.groups <- df.groups[which(class_df$class == "FGF2+")]
nofgf2_sub.groups <- df.groups[which(class_df$class == "FGF2-")]
fgf2_sub <- subset(class_df, class_df$class == "FGF2+")
nofgf2_sub <- subset(class_df, class_df$class == "FGF2-")
fgf2_sub$class <- NULL
nofgf2_sub$class <- NULL
fgf2_sub.d <- centroid_distances(fgf2_sub, fgf2_sub.groups)
nofgf2_sub.d <- centroid_distances(nofgf2_sub, nofgf2_sub.groups)
diff_d <- fgf2_sub.d - nofgf2_sub.d
return(diff_d)
}
plot_diff_cent_dist <- function(centroid_dist, plot_path, prefix = NULL){
require(reshape2)
require(ggplot2)
d.m <- melt(as.matrix(centroid_dist))
diff_theme <- theme(#legend.position = "none",
#axis.text.x=element_blank(),
#axis.text.y=element_blank(),
axis.ticks=element_blank(),
legend.position="right",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank())
p <- ggplot(d.m, aes(x = Var2, y = Var1, fill = value))
p <- p + geom_tile() + labs(x = "Cluster", y = "Cluster") + diff_theme + coord_fixed() + scale_fill_distiller(palette = "RdBu", limits=c(-1.5,1.5))
ggsave(p, filename= paste(prefix, "centroid_distance_diff.png", sep = ''), path = plot_path, width = 4, height = 4)
}
## ICA
ica_plots <- function(df, plot_path, class_num){
require(ica)
require(ggplot2)
df.ica <- icafast(df, n = class_num, center = TRUE, maxit = 1000, alpha = 1)
df.ica.d <- dist(df.ica$S, method = "euclidean")
df.ica.fit <- hclust(df.ica.d, method = 'ward.D')
df.ica.groups <- as.factor(cutree(df.ica.fit, k = class_num))
ic_labs <- c("IC1", "IC2", "IC3", "IC4", "IC5", "IC6")
df.ics <- data.frame(df.ica$S)
colnames(df.ics) <- ic_labs[1:ncol(df.ics)]
ica_theme <- theme(legend.position = "none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank())
ica_wardD <- ggplot(df.ics, aes(x = IC1, y = IC2, col = df.ica.groups)) + geom_point() + ica_theme + labs(title= "ICA" )
ggsave(ica_wardD, filename = "ica_wardD.png", path = plot_path, width = 4, height = 4, units = "in")
}
sem <- function(x){sd(x, na.rm = T)/sqrt(length(x) - sum(is.nan(x)))}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.