Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----echo = FALSE-------------------------------------------------------------
options(crayon.enabled = FALSE, cli.num_colors = 0)
## -----------------------------------------------------------------------------
library(metasnf)
class(cort_t)
dim(cort_t)
str(cort_t[1:5, 1:5])
cort_t[1:5, 1:5]
## -----------------------------------------------------------------------------
dim(income)
str(income[1:5, ])
income[1:5, ]
## -----------------------------------------------------------------------------
df_list <- list(
anxiety,
depress,
cort_t,
cort_sa,
subc_v,
income,
pubertal
)
# The number of rows in each data frame:
lapply(df_list, dim)
# Whether or not each data frame has missing values:
lapply(df_list,
function(x) {
any(is.na(x))
}
)
## -----------------------------------------------------------------------------
complete_uids <- get_complete_uids(df_list, uid = "unique_id")
print(length(complete_uids))
# Reducing data frames to only common observations with no missing data
anxiety <- anxiety[anxiety$"unique_id" %in% complete_uids, ]
depress <- depress[depress$"unique_id" %in% complete_uids, ]
cort_t <- cort_t[cort_t$"unique_id" %in% complete_uids, ]
cort_sa <- cort_sa[cort_sa$"unique_id" %in% complete_uids, ]
subc_v <- subc_v[subc_v$"unique_id" %in% complete_uids, ]
income <- income[income$"unique_id" %in% complete_uids, ]
pubertal <- pubertal[pubertal$"unique_id" %in% complete_uids, ]
## -----------------------------------------------------------------------------
# Note that you do not need to explicitly name every single named element
# (data = ..., name = ..., etc.)
input_dl <- data_list(
list(
data = cort_t,
name = "cortical_thickness",
domain = "neuroimaging",
type = "continuous"
),
list(
data = cort_sa,
name = "cortical_surface_area",
domain = "neuroimaging",
type = "continuous"
),
list(
data = subc_v,
name = "subcortical_volume",
domain = "neuroimaging",
type = "continuous"
),
list(
data = income,
name = "household_income",
domain = "demographics",
type = "continuous"
),
list(
data = pubertal,
name = "pubertal_status",
domain = "demographics",
type = "continuous"
),
uid = "unique_id"
)
## -----------------------------------------------------------------------------
summary(input_dl)
## -----------------------------------------------------------------------------
target_dl <- data_list(
list(anxiety, "anxiety", "behaviour", "ordinal"),
list(depress, "depressed", "behaviour", "ordinal"),
uid = "unique_id"
)
summary(target_dl)
## -----------------------------------------------------------------------------
set.seed(42)
my_sc <- snf_config(
dl = input_dl,
n_solutions = 20,
min_k = 20,
max_k = 50
)
my_sc
## -----------------------------------------------------------------------------
my_sc$"settings_df"
## -----------------------------------------------------------------------------
head(as.data.frame(my_sc$"settings_df"))
## -----------------------------------------------------------------------------
names(my_sc)
## -----------------------------------------------------------------------------
sol_df <- batch_snf(input_dl, my_sc)
sol_df
## -----------------------------------------------------------------------------
cluster_solutions <- t(sol_df)
cluster_solutions
## -----------------------------------------------------------------------------
sol_aris <- calc_aris(sol_df)
head(sol_aris)
## ----eval = FALSE-------------------------------------------------------------
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# # To make heatmaps with metasnf
# BiocManager::install("ComplexHeatmap")
#
# # To use heatmap shiny app (later in vignette) with metasnf
# BiocManager::install("InteractiveComplexHeatmap")
## -----------------------------------------------------------------------------
meta_cluster_order <- get_matrix_order(sol_aris)
# Just a vector of numbers
meta_cluster_order
## ----eval = FALSE-------------------------------------------------------------
# ari_hm <- meta_cluster_heatmap(
# sol_aris,
# order = meta_cluster_order
# )
#
# ari_hm
## ----eval = FALSE, echo = FALSE-----------------------------------------------
# save_heatmap(
# heatmap = ari_hm,
# path = "vignettes/meta_cluster_heatmap.png",
# width = 330,
# height = 300,
# res = 100
# )
## ----eval = FALSE-------------------------------------------------------------
# shiny_annotator(ari_hm)
## -----------------------------------------------------------------------------
split_vec <- c(2, 5, 12, 17)
## -----------------------------------------------------------------------------
mc_sol_df <- label_meta_clusters(
sol_df,
order = meta_cluster_order,
split_vector = split_vec
)
mc_sol_df
## ----eval = FALSE-------------------------------------------------------------
# ari_mc_hm <- meta_cluster_heatmap(
# sol_aris,
# order = meta_cluster_order,
# split_vector = split_vec
# )
#
# ari_mc_hm
## ----eval = FALSE, echo = FALSE-----------------------------------------------
# save_heatmap(
# heatmap = ari_mc_hm,
# path = "vignettes/ari_mc_hm.png",
# width = 330,
# height = 300,
# res = 100
# )
## -----------------------------------------------------------------------------
# Only looking at our out-of-model p-values
ext_sol_df <- extend_solutions(
mc_sol_df,
target_dl = target_dl
)
ext_sol_df
# Re-running to calculate the p-value for every single input and out-of-model
# feature:
ext_sol_df <- extend_solutions(
mc_sol_df,
dl = input_dl,
target_dl = target_dl
)
ext_sol_df
## ----eval = FALSE-------------------------------------------------------------
# # No features are used to calculate summary p-values
# ext_sol_df_no_summaries <- extend_solutions(
# mc_sol_df,
# dl = c(input_dl, target_dl)
# )
#
# # Every features are used to calculate summary p-values
# ext_sol_df_all_summaries <- extend_solutions(
# mc_sol_df,
# target_dl = c(input_dl, target_dl)
# )
## ----eval = FALSE-------------------------------------------------------------
# annotated_ari_hm <- meta_cluster_heatmap(
# sol_aris,
# order = meta_cluster_order,
# split_vector = split_vec,
# data = as.data.frame(ext_sol_df),
# top_hm = list(
# "Depression p-value" = "cbcl_depress_r_pval",
# "Anxiety p-value" = "cbcl_anxiety_r_pval",
# "Overall outcomes p-value" = "mean_pval"
# ),
# bottom_bar = list(
# "Number of Clusters" = "nclust"
# ),
# annotation_colours = list(
# "Depression p-value" = colour_scale(
# ext_sol_df$"cbcl_depress_r_pval",
# min_colour = "purple",
# max_colour = "black"
# ),
# "Anxiety p-value" = colour_scale(
# ext_sol_df$"cbcl_anxiety_r_pval",
# min_colour = "green",
# max_colour = "black"
# ),
# "Overall outcomes p-value" = colour_scale(
# ext_sol_df$"mean_pval",
# min_colour = "lightblue",
# max_colour = "black"
# )
# )
# )
## ----eval = FALSE, echo = FALSE-----------------------------------------------
# save_heatmap(
# heatmap = annotated_ari_hm,
# path = "vignettes/annotated_ari_hm.png",
# width = 700,
# height = 500,
# res = 100
# )
## ----eval = FALSE-------------------------------------------------------------
# annotation_data <- ext_sol_df |>
# as.data.frame(keep_attributes = TRUE) |>
# dplyr::mutate(
# key_subjects_cluster_together = dplyr::case_when(
# uid_NDAR_INVLF3TNDUZ == uid_NDAR_INVLDQH8ATK ~ TRUE,
# TRUE ~ FALSE
# )
# )
#
# annotated_ari_hm2 <- meta_cluster_heatmap(
# sol_aris,
# order = meta_cluster_order,
# split_vector = split_vec,
# data = annotation_data,
# top_hm = list(
# "Depression p-value" = "cbcl_depress_r_pval",
# "Anxiety p-value" = "cbcl_anxiety_r_pval",
# "Key Subjects Clustered Together" = "key_subjects_cluster_together"
# ),
# bottom_bar = list(
# "Number of Clusters" = "nclust"
# ),
# annotation_colours = list(
# "Depression p-value" = colour_scale(
# ext_sol_df$"cbcl_depress_r_pval",
# min_colour = "purple",
# max_colour = "black"
# ),
# "Anxiety p-value" = colour_scale(
# ext_sol_df$"cbcl_anxiety_r_pval",
# min_colour = "purple",
# max_colour = "black"
# ),
# "Key Subjects Clustered Together" = c(
# "TRUE" = "blue",
# "FALSE" = "black"
# )
# )
# )
## ----eval = FALSE, echo = FALSE-----------------------------------------------
# save_heatmap(
# heatmap = annotated_ari_hm2,
# path = "vignettes/annotated_ari_hm2.png",
# width = 700,
# height = 500,
# res = 100
# )
## ----eval = FALSE-------------------------------------------------------------
# rep_solutions <- get_representative_solutions(sol_aris, ext_sol_df)
#
# mc_manhattan <- mc_manhattan_plot(
# rep_solutions,
# dl = input_dl,
# target_dl = target_dl,
# hide_x_labels = TRUE,
# point_size = 2,
# text_size = 12,
# threshold = 0.05,
# neg_log_pval_thresh = 5
# )
## ----eval = FALSE, echo = FALSE-----------------------------------------------
# ggplot2::ggsave(
# "vignettes/mc_manhattan.png",
# mc_manhattan,
# height = 4,
# width = 8,
# dpi = 100
# )
## ----eval = FALSE-------------------------------------------------------------
# rep_solutions_no_cort <- dplyr::select(rep_solutions, -dplyr::contains("mrisdp"))
#
# mc_manhattan2 <- mc_manhattan_plot(
# ext_sol_df = rep_solutions_no_cort,
# dl = input_dl,
# target_dl = target_dl,
# point_size = 4,
# threshold = 0.01,
# text_size = 12,
# domain_colours = c(
# "neuroimaging" = "cadetblue",
# "demographics" = "purple",
# "behaviour" = "firebrick"
# )
# )
## ----eval = FALSE, echo = FALSE-----------------------------------------------
# ggplot2::ggsave(
# "vignettes/mc_manhattan2.png",
# mc_manhattan2,
# height = 8,
# width = 12,
# dpi = 100
# )
## ----eval = FALSE-------------------------------------------------------------
# config_hm <- config_heatmap(
# sc = my_sc,
# order = meta_cluster_order,
# hide_fixed = TRUE
# )
## ----eval = FALSE, echo = FALSE-----------------------------------------------
# save_heatmap(
# heatmap = config_hm,
# path = "vignettes/config_hm.png",
# width = 400,
# height = 500,
# res = 75
# )
## ----eval = FALSE-------------------------------------------------------------
# annotation_data <- ext_sol_df |>
# as.data.frame(keep_attributes = TRUE) |>
# dplyr::mutate(
# key_subjects_cluster_together = dplyr::case_when(
# uid_NDAR_INVLF3TNDUZ == uid_NDAR_INVLDQH8ATK ~ TRUE,
# TRUE ~ FALSE
# )
# )
#
# annotation_data$"clust_alg" <- as.factor(annotation_data$"clust_alg")
#
# annotated_ari_hm3 <- meta_cluster_heatmap(
# sol_aris,
# order = meta_cluster_order,
# split_vector = split_vec,
# data = annotation_data,
# top_hm = list(
# "Depression p-value" = "cbcl_depress_r_pval",
# "Anxiety p-value" = "cbcl_anxiety_r_pval",
# "Key Subjects Clustered Together" = "key_subjects_cluster_together"
# ),
# left_hm = list(
# "Clustering Algorithm" = "clust_alg" # from the original settings
# ),
# bottom_bar = list(
# "Number of Clusters" = "nclust" # also from the original settings
# ),
# annotation_colours = list(
# "Depression p-value" = colour_scale(
# ext_sol_df$"cbcl_depress_r_pval",
# min_colour = "purple",
# max_colour = "black"
# ),
# "Anxiety p-value" = colour_scale(
# ext_sol_df$"cbcl_anxiety_r_pval",
# min_colour = "purple",
# max_colour = "black"
# ),
# "Key Subjects Clustered Together" = c(
# "TRUE" = "blue",
# "FALSE" = "black"
# )
# )
# )
#
# annotated_ari_hm3
## -----------------------------------------------------------------------------
sol_df <- batch_snf(dl = input_dl, sc = my_sc, return_sim_mats = TRUE)
## ----eval = FALSE-------------------------------------------------------------
# silhouette_scores <- calculate_silhouettes(sol_df)
# dunn_indices <- calculate_dunn_indices(sol_df)
# db_indices <- calculate_db_indices(sol_df)
## -----------------------------------------------------------------------------
ext_sol_df <- extend_solutions(sol_df, target_dl)
## ----eval = FALSE-------------------------------------------------------------
# pval_hm <- pval_heatmap(ext_sol_df, order = meta_cluster_order)
#
# pval_hm
## ----eval = FALSE, echo = FALSE-----------------------------------------------
# save_heatmap(
# heatmap = pval_hm,
# path = "vignettes/pval_heatmap_ordered.png",
# width = 400,
# height = 500,
# res = 100
# )
## -----------------------------------------------------------------------------
# All the observations present in all data frames with no NAs
all_observations <- uids(input_dl)
all_observations
# Remove the "uid_" prefix to allow merges with the original data
all_observations <- gsub("uid_", "", all_observations)
# data frame assigning 80% of observations to train and 20% to test
assigned_splits <- train_test_assign(train_frac = 0.8, uids = all_observations)
# Pulling the training and testing observations specifically
train_obs <- assigned_splits$"train"
test_obs <- assigned_splits$"test"
# Partition a training set
train_cort_t <- cort_t[cort_t$"unique_id" %in% train_obs, ]
train_cort_sa <- cort_sa[cort_sa$"unique_id" %in% train_obs, ]
train_subc_v <- subc_v[subc_v$"unique_id" %in% train_obs, ]
train_income <- income[income$"unique_id" %in% train_obs, ]
train_pubertal <- pubertal[pubertal$"unique_id" %in% train_obs, ]
train_anxiety <- anxiety[anxiety$"unique_id" %in% train_obs, ]
train_depress <- depress[depress$"unique_id" %in% train_obs, ]
# Partition a test set
test_cort_t <- cort_t[cort_t$"unique_id" %in% test_obs, ]
test_cort_sa <- cort_sa[cort_sa$"unique_id" %in% test_obs, ]
test_subc_v <- subc_v[subc_v$"unique_id" %in% test_obs, ]
test_income <- income[income$"unique_id" %in% test_obs, ]
test_pubertal <- pubertal[pubertal$"unique_id" %in% test_obs, ]
test_anxiety <- anxiety[anxiety$"unique_id" %in% test_obs, ]
test_depress <- depress[depress$"unique_id" %in% test_obs, ]
# A data list with just training observations
train_dl <- data_list(
list(train_cort_t, "cortical_thickness", "neuroimaging", "continuous"),
list(train_cort_sa, "cortical_sa", "neuroimaging", "continuous"),
list(train_subc_v, "subcortical_volume", "neuroimaging", "continuous"),
list(train_income, "household_income", "demographics", "continuous"),
list(train_pubertal, "pubertal_status", "demographics", "continuous"),
uid = "unique_id"
)
# A data list with training and testing observations
full_dl <- data_list(
list(cort_t, "cortical_thickness", "neuroimaging", "continuous"),
list(cort_sa, "cortical_surface_area", "neuroimaging", "continuous"),
list(subc_v, "subcortical_volume", "neuroimaging", "continuous"),
list(income, "household_income", "demographics", "continuous"),
list(pubertal, "pubertal_status", "demographics", "continuous"),
uid = "unique_id"
)
# Construct the target lists
train_target_dl <- data_list(
list(train_anxiety, "anxiety", "behaviour", "ordinal"),
list(train_depress, "depressed", "behaviour", "ordinal"),
uid = "unique_id"
)
# Find a clustering solution in your training data
set.seed(42)
my_sc <- snf_config(
train_dl,
n_solutions = 5,
min_k = 10,
max_k = 30
)
train_sol_df <- batch_snf(
train_dl,
my_sc
)
ext_sol_df <- extend_solutions(
train_sol_df,
train_target_dl
)
# The first row had the lowest minimum p-value across our outcomes
lowest_min_pval <- min(ext_sol_df$"min_pval")
which(ext_sol_df$"min_pval" == lowest_min_pval)
# Keep track of your top solution
top_row <- ext_sol_df[1, ]
# Use the solutions data frame from the training observations and the data list from
# the training and testing observations to propagate labels to the test observations
propagated_labels <- label_propagate(top_row, full_dl)
head(propagated_labels)
tail(propagated_labels)
## -----------------------------------------------------------------------------
propagated_labels_all <- label_propagate(
ext_sol_df,
full_dl
)
head(propagated_labels_all)
tail(propagated_labels_all)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.