Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----echo = FALSE-------------------------------------------------------------
options(crayon.enabled = FALSE, cli.num_colors = 0)
## -----------------------------------------------------------------------------
# Load the package
library(metasnf)
dl <- data_list(
list(subc_v, "subcortical_volume", "neuroimaging", "continuous"),
list(income, "household_income", "demographics", "continuous"),
list(pubertal, "pubertal_status", "demographics", "continuous"),
list(anxiety, "anxiety", "behaviour", "ordinal"),
list(depress, "depressed", "behaviour", "ordinal"),
uid = "unique_id"
)
set.seed(42)
sc <- snf_config(
dl = dl,
n_solutions = 5,
max_k = 40
)
## -----------------------------------------------------------------------------
# Available functions
sc$"clust_fns_list"
# Which functions will be used
sc$"settings_df"$"clust_alg"
## -----------------------------------------------------------------------------
# The default list:
sc <- snf_config(
dl = dl,
n_solutions = 5,
use_default_clust_fns = TRUE
)
sc$"clust_fns_list"
# Adding algorithms provided by the package
sc <- snf_config(
dl = dl,
n_solutions = 5,
clust_fns = list(
"two_cluster_spectral" = spectral_two,
"five_cluster_spectral" = spectral_five
),
use_default_clust_fns = TRUE
)
# Note that this one has the default algorithms as well as the newly added ones
sc$"clust_fns_list"
# This list has only the newly added ones
sc <- snf_config(
dl = dl,
n_solutions = 5,
clust_fns = list(
"two_cluster_spectral" = spectral_two,
"five_cluster_spectral" = spectral_five
)
)
sc$"clust_fns_list"
## -----------------------------------------------------------------------------
sc
## ----eval = FALSE-------------------------------------------------------------
# sol_df <- batch_snf(
# dl = dl,
# sc = sc
# )
## ----eval = FALSE-------------------------------------------------------------
# # Default clustering algorithm #1
# spectral_eigen <- function(similarity_matrix) {
# estimated_n <- estimate_nclust_given_graph(
# W = similarity_matrix,
# NUMC = 2:10
# )
# nclust_estimate <- estimated_n$`Eigen-gap best`
# solution <- SNFtool::spectralClustering(
# similarity_matrix,
# nclust_estimate
# )
# return(solution)
# }
#
# # Default clustering algorithm #2
# spectral_rot <- function(similarity_matrix) {
# estimated_n <- estimate_nclust_given_graph(
# W = similarity_matrix,
# NUMC = 2:10
# )
# nclust_estimate <- estimated_n$`Rotation cost best`
# solution <- SNFtool::spectralClustering(
# similarity_matrix,
# nclust_estimate
# )
# return(solution)
# }
## -----------------------------------------------------------------------------
sol_df <- batch_snf(
dl,
sc,
return_sim_mats = TRUE
)
# Similarity matrices are in the list below:
similarity_matrices <- sim_mats_list(sol_df)
length(similarity_matrices)
dim(similarity_matrices[[1]])
# Your manual clustering goes here...
## ----eval = FALSE-------------------------------------------------------------
# library(dbscan)
# ## Example 1: use dbscan on the iris data set
# data(iris)
# iris <- as.matrix(iris[, 1:4])
# iris_dist <- dist(iris)
#
# ## Find suitable DBSCAN parameters:
# ## 1. We use minPts = dim + 1 = 5 for iris. A larger value can also be used.
# ## 2. We inspect the k-NN distance plot for k = minPts - 1 = 4
# kNNdistplot(iris, minPts = 5)
#
# ## Noise seems to start around a 4-NN distance of .7
# abline(h=.7, col = "red", lty = 2)
#
# results <- dbscan(iris_dist, eps = 0.7, minPts = 5)
#
# # The 1 is added to ensure that those with no cluster (cluster 0) are still
# # plotted.
# pairs(iris, col = results$cluster + 1)
## ----fig.width = 5, fig.height = 4.5------------------------------------------
library(dbscan)
library(ggplot2)
dl <- data_list(
list(
data = expression_df,
name = "genes_1_and_2_exp",
domain = "gene_expression",
type = "continuous"
),
list(
data = methylation_df,
name = "genes_1_and_2_meth",
domain = "gene_methylation",
type = "continuous"
),
uid = "patient_id"
)
set.seed(42)
sc <- snf_config(
dl = dl,
n_solutions = 5
)
sol_df <- batch_snf(
dl = dl,
sc = sc,
return_sim_mats = TRUE
)
similarity_matrices <- sim_mats_list(sol_df)
representative_sm <- similarity_matrices[[1]]
representative_sms <- similarity_matrices[c(1, 2)]
distance_matrix1 <- as.dist(
max(representative_sm) - representative_sm
)
kNNdistplot(
distance_matrix1,
minPts = 10
)
## Maybe there?
abline(h=0.4872, col = "red", lty = 2)
dbscan_results <- dbscan(distance_matrix1, eps = 0.4872, minPts = 10)$"cluster"
spectral_results <- t(sol_df[1, ])[, 2]
dbscan_vs_spectral <- data.frame(
dbscan = dbscan_results,
spectral = spectral_results
)
ggplot(dbscan_vs_spectral, aes(x = dbscan, y = spectral)) +
geom_jitter(height = 0.1, width = 0.1, alpha = 0.5) +
theme_bw()
## ----eval = FALSE-------------------------------------------------------------
# for (i in seq(0.485, 0.488, by = 0.0001)) {
# results <- dbscan(distance_matrix1, eps = i, minPts = 10)
# if (length(unique(results$"cluster")) == 3) {
# print(i)
# }
# }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.