# set global chunk options
library(knitr)
opts_chunk$set(size='tiny')

Purpose

Example of fuzzy c-means with visualization. This example is not as great as one can be, but it serves the purpose of showing the code.

Start

library(vp.misc)
data("cptac_oca")  # oca.set
oca.set <- oca.set[complete.cases(exprs(oca.set)),]
# let's keep the most varying (otherwise, there is not strong clustering)
oca.set <- oca.set[order(apply(exprs(oca.set),1,sd))[1:100],]
ym <- exprs(oca.set)
# scaling by st.dev.
ym <- sweep(ym, 1, apply(ym, 1, sd, na.rm=T), "/")
library(e1071)
set.seed(0)
res <- cmeans(ym, centers=2, iter.max=1000, verbose=FALSE, 
              dist="euclidean", method="cmeans")
colSums(res$membership > 0.5)


library(reshape2)
# subset data to one cluster
for(cluster_num in colnames(res$membership)){
    ym1 <- ym[names(which(res$membership[,cluster_num] > 0.5)),]

    x <- melt(ym1, varnames = c("protein", "sample"), 
              value.name = "relative_abundance")
    x <- merge(x, res$membership[,cluster_num,drop=F], by.x='protein', by.y = 0)
    colnames(x)[ncol(x)] <- "membership"

    # order levels according to protein membership score
    x$protein <- ordered(x$protein, 
                         levels=names(sort(res$membership[res$membership[,cluster_num] > 0.5,cluster_num])))
    library(ggplot2)
    p <-
    ggplot(x, aes(x=sample, y=relative_abundance, color=membership, group=protein)) +
        stat_summary(aes(group = protein), fun.y = mean, geom = "line", size = 1.5) +
        scale_color_gradientn(colours = c("cyan","blue", "magenta", "red"), values=c(0,0.5,0.75,0.9,1)) +
        # ylim(-2.2,+2.2) +
        theme_classic() +
        ylab("log2(sample/average)") +
        xlab("sample name")
    plot(p)
    # ggsave(filename = paste0("fuzzy_cluster_", cluster_num,".png"), scale = 0.4, dpi=500)
}


vladpetyuk/vp.misc documentation built on June 25, 2021, 6:35 a.m.