inst/doc/coca-vignette.R

## ----build_coca, fig.show='hold', message=FALSE, warning=FALSE, cache=TRUE----
### Load data
data <- list()
data[[1]] <- as.matrix(read.csv(system.file("extdata",
                      "dataset1.csv", package = "coca"), row.names = 1))
data[[2]] <- as.matrix(read.csv(system.file("extdata",
                      "dataset2.csv", package = "coca"), row.names = 1))
data[[3]] <- as.matrix(read.csv(system.file("extdata",
                      "dataset3.csv", package = "coca"), row.names = 1))

### Build matrix of clusters
outputBuildMOC <- coca::buildMOC(data, M = 3, K = 5, distances = "cor")

### Extract matrix of clusters and dataset indicator vector
moc <- outputBuildMOC$moc
datasetIndicator <- outputBuildMOC$datasetIndicator

## ----plot_moc, fig.show='hold', message=FALSE, warning=FALSE, cache=TRUE------

### Prepare annotations
true_labels <- as.matrix(read.csv(system.file("extdata", "cluster_labels.csv",
                package = "coca"), row.names = 1))
annotations <- data.frame(true_labels = as.factor(true_labels))

### Plot matrix of clusters
coca::plotMOC(moc, datasetIndicator, annotations = annotations)

## ----plotmoc_wnames, fig.show='hold', message=FALSE, warning=FALSE, cache=TRUE----

### Prepare annotations
true_labels <- as.matrix(read.csv(system.file("extdata", "cluster_labels.csv",
                package = "coca"), row.names = 1))
annotations <- data.frame(true_labels = as.factor(true_labels))

### Set dataset names
datasetNames <- c(rep("A", 5), rep("B", 5), rep("C", 5))

### Plot matrix of clusters
coca::plotMOC(moc, datasetIndicator, datasetNames = datasetNames,
        annotations = annotations)

## ----coca, fig.show='hold', message=FALSE, warning=FALSE, cache=TRUE----------
### COCA

# Use COCA to find global clustering
coca <- coca::coca(moc, K = 5)

# Compare clustering to the true labels
ari <- mclust::adjustedRandIndex(true_labels, coca$clusterLabels)
ari

### Plot the matrix of clusters with the newly found cluster labels
annotations$coca <- as.factor(coca$clusterLabels)
coca::plotMOC(moc, datasetIndicator, datasetNames = datasetNames,
        annotations = annotations)

## ----coca_unknownK, fig.show='hold', message=FALSE, warning=FALSE, cache=TRUE----

# Use COCA to find global clustering and chooose the number of clusters
coca <- coca::coca(moc, maxK = 10, hclustMethod = "average")

# Compare clustering to the true labels
ari <- mclust::adjustedRandIndex(true_labels, coca$clusterLabels)
ari

### Plot the matrix of clusters with the newly found cluster labels
annotations$coca <- as.factor(coca$clusterLabels)
coca::plotMOC(moc, datasetIndicator, datasetNames = datasetNames,
        annotations = annotations)

Try the coca package in your browser

Any scripts or data that you put into this service are public.

coca documentation built on July 8, 2020, 7:29 p.m.