inst/doc/ANF.R

## ----setup, echo=FALSE, results="hide"----------------------------------------
knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
dev="png",
message=FALSE, error=FALSE, warning=TRUE)

## -----------------------------------------------------------------------------
library(MASS)
true.class = rep(c(1,2),each=100)
feature.mat1 = mvrnorm(100, rep(0, 20), diag(runif(20,0.2,2)))
feature.mat2 = mvrnorm(100, rep(0.5, 20), diag(runif(20,0.2,2)))
feature1 = rbind(feature.mat1, feature.mat2)

## -----------------------------------------------------------------------------
library(igraph)
set.seed(1)
km = kmeans(feature1, 2)
compare(km$cluster, true.class, method='nmi')

## -----------------------------------------------------------------------------
library(ANF)
d = dist(feature1)
d = as.matrix(d)
A1 = affinity_matrix(d, 10)
labels = spectral_clustering(A1, 2)
compare(labels, true.class, method='nmi')

## -----------------------------------------------------------------------------
feature.mat1 = mvrnorm(100, rep(10, 30), diag(runif(30,0.2,3)))
feature.mat2 = mvrnorm(100, rep(9.5, 30), diag(runif(30,0.2,3)))
feature2 = rbind(feature.mat1, feature.mat2)

## -----------------------------------------------------------------------------
set.seed(123)
km = kmeans(feature2, 2)
compare(km$cluster, true.class, method='nmi')

d = dist(feature2)
d = as.matrix(d)
A2 = affinity_matrix(d, 10)
labels = spectral_clustering(A2, 2)
compare(labels, true.class, method='nmi')

## -----------------------------------------------------------------------------
feature.cat = cbind(feature1, feature2)
set.seed(1)
km = kmeans(feature.cat, 2)
compare(km$cluster, true.class, method='nmi')

## -----------------------------------------------------------------------------
W = ANF(list(A1, A2), K=30)
labels = spectral_clustering(W,2)
compare(labels, true.class, method='nmi')

## -----------------------------------------------------------------------------
library(ExperimentHub)
eh <- ExperimentHub()
myfiles <- query(eh, "HarmonizedTCGAData")
Wall <- myfiles[[1]]
project_ids <- myfiles[[2]]
surv.plot <- myfiles[[3]]

## -----------------------------------------------------------------------------
affinity.mat <- Wall[["adrenal_gland"]][["log.sel"]][["fpkm"]]
labels <- spectral_clustering(affinity.mat, k = 2)

## -----------------------------------------------------------------------------
true.disease.types <- as.factor(project_ids[rownames(affinity.mat)])
table(labels, true.disease.types)

nmi <- igraph::compare(true.disease.types, labels, method = "nmi")

adjusted.rand = igraph::compare(true.disease.types, labels, method = "adjusted.rand")

# we can also calculate p-value using `surv.plot` data
surv.plot <- surv.plot[rownames(affinity.mat), ]
f <- survival::Surv(surv.plot$time, !surv.plot$censored)
fit <- survival::survdiff(f ~ labels)
pval <- stats::pchisq(fit$chisq, df = length(fit$n) - 1, lower.tail = FALSE)

message(paste("NMI =", nmi, ", ARI =", adjusted.rand, ", p-val =", pval))

## -----------------------------------------------------------------------------
res <- eval_clu(project_ids, w = affinity.mat, surv = surv.plot)

## -----------------------------------------------------------------------------
res <- eval_clu(project_ids, w = Wall$uterus$raw.all$fpkm)

## -----------------------------------------------------------------------------
# fuse three matrices: "fpkm" (gene expression), "mirnas" (miRNA expression) and "methy450" (DNA methylation)
fused.mat <- ANF(Wall = Wall$uterus$raw.all)
# Spectral clustering on fused patient affinity matrix
labels <- spectral_clustering(A = fused.mat, k = 2)
# Or we can directly evaluate clustering results using function `eval_clu`, which calls `spectral_clustering` and calculate NMI and ARI (and p-value if patient survival data is available. `surv.plot` does not contain information for uterus cancer patients)
res <- eval_clu(true_class = project_ids[rownames(fused.mat)], w = fused.mat)

Try the ANF package in your browser

Any scripts or data that you put into this service are public.

ANF documentation built on Nov. 8, 2020, 7:51 p.m.