spatialr.dev: Research, Development and Testing of Spatial Clustering Project

library(bluster)
library(mclust)
library(dplyr)
library(BayesSpace)
library(Matrix)
library(ggplot2)
library(ggpubr)
library(gridExtra)

safe_ari <- purrr::safely(mclust::adjustedRandIndex)
results_raw_1 <- read.table("G:/My Drive/Dissertation/Spatial/spatial_analysis/cv_2022_06_27_01/summary.csv")
results_raw_2 <- read.table("G:/My Drive/Dissertation/Spatial/spatial_analysis/cv_2022_07_02_01/summary.csv")
results_raw <- rbind(results_raw_1, results_raw_2) 
colnames(results_raw) <- c("sample","time","lambda","k", "penal_lik", "cv_like")
lambda_filt <- c(5, 10,15,20,25)
results <- results_raw %>% 
  tibble() %>% 
  filter(lambda %in% lambda_filt) %>%
  mutate(sample = as.numeric(sample)) %>% 
  group_by(sample, lambda, k) %>% 
  dplyr::slice(n = 1) %>% 
  arrange(sample, lambda, k) %>% 
  group_by(sample, time, lambda, k, penal_lik) %>% 
  summarize(cv_like_sum = sum(cv_like),
            cv_like_mean = mean(cv_like),
            cv_like_median = median(cv_like)) %>% 
  arrange(sample, k, lambda) %>% 
  filter(k > 5)


# results %>% filter(k==5,sample=="151673")


# Compile selection plots

input <- "151674"
if(input %in% c("151669", "151670", "151671", "151672")){
  centers <- 5
} else {
  centers <- 7
}
set.seed(1)

p_grid <- results %>%
  filter(sample == as.numeric(input)) %>%
  ggplot(aes(x = lambda, y = cv_like_mean)) +
  geom_point() +
  geom_line() +
  facet_wrap(~k) +
  ylab("Mean of CV Likelihood")

p_k <- results %>%
  filter(sample == input) %>%
  group_by(k) %>%
  slice_max(order_by = cv_like_mean, n = 1) %>%
  ungroup() %>%
  mutate(selected = ifelse(penal_lik == max(penal_lik), "Yes", "No")) %>%
  ggplot(aes(x = k, y = penal_lik, label = lambda)) +
  geom_point() +
  geom_line() +
  geom_label(aes(color = selected)) +
  ylab("Penalized Likelihood")
p_k

lambda <- results %>%
  filter(sample == input) %>%
  group_by(k) %>%
  slice_max(order_by = cv_like_mean, n = 1) %>% 
  ungroup() %>% 
  slice_max(order_by = penal_lik, n = 1) %>% 
  pull(lambda)

k <- results %>%
  filter(sample == input) %>%
  group_by(k) %>%
  slice_max(order_by = cv_like_mean, n = 1) %>% 
  ungroup() %>% 
  slice_max(order_by = penal_lik, n = 1) %>% 
  pull(k)
norm_string <- "NULL"
seed <- 1
in_string <- paste0("/N/project/zhangclab/alex/spatial/cv_2022_06_29_01/output/clust_",input, "_",k,"_", scales::number(lambda, accuracy = 0.1),"_",norm_string,"_",seed,".rds")

in_string

model_path <- paste0("G:/My Drive/Dissertation/Spatial/spatial_analysis/cv_2022_06_29_01/output/clust_",input, "_",k,"_", scales::number(lambda, accuracy = 0.1),"_",norm_string,"_",seed,".rds")
out_path <- paste0("G:/My Drive/Dissertation/Spatial/spatial_analysis/2022_07_06_selection_EEE/")
result <- readRDS(model_path)
model <- result$model
data <- model$v

true_data <- getRDS("2020_maynard_prefrontal-cortex", input)
true <- as.numeric(true_data$layer_guess_reordered)


# Compile cluster plots

plots <- NULL
clust <- kmeans(data,centers, nstart = 50, iter.max = 100)$cluster
shuffled <- clue::solve_LSAP(table(clust, true), maximum = TRUE)[clust]
ari <- safe_ari(true,shuffled)
p0 <- clusterPlot(true_data, label=shuffled, palette=NULL, size=0.05) +
  scale_fill_viridis_d(option = "A") +
  labs(title=paste("kmeans ari =",round(ari$result, 4))) +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none")
plots <- c(plots,list(p0))

clust <- Mclust(data, G = centers, modelNames = "EEE")$classification
shuffled <- clue::solve_LSAP(table(clust, true), maximum = TRUE)[clust]
ari <- safe_ari(true,shuffled)
p1 <- clusterPlot(true_data, label=shuffled, palette=NULL, size=0.05) +
  scale_fill_viridis_d(option = "A") +
  labs(title=paste("mclust ari =",round(ari$result, 4))) +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none")
plots <- c(plots,list(p1))

clust <- readRDS(paste0("G:/My Drive/Dissertation/Spatial/spatial_analysis/cv_2022_06_17_01/output/clust_",input,"_1_t.rds"))
shuffled <- clue::solve_LSAP(table(clust, true), maximum = TRUE)[clust]
ari <- safe_ari(true,shuffled)
p2 <- clusterPlot(true_data, label=shuffled, palette=NULL, size=0.05) +
  scale_fill_viridis_d(option = "A") +
  labs(title=paste("bayes t ari =",round(ari$result, 4))) +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none")
plots <- c(plots,list(p2))

clust <- readRDS(paste0("G:/My Drive/Dissertation/Spatial/spatial_analysis/cv_2022_06_17_01/output/clust_",input,"_1_norm.rds"))
shuffled <- clue::solve_LSAP(table(clust, true), maximum = TRUE)[clust]
ari <- safe_ari(true,shuffled)
p3 <- clusterPlot(true_data, label=shuffled, palette=NULL, size=0.05) +
  scale_fill_viridis_d(option = "A") +
  labs(title=paste("bayes n ari =",round(ari$result, 4))) +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none")
plots <- c(plots,list(p3))


p4 <- clusterPlot(true_data, label=true, palette=NULL, size=0.05) +
  scale_fill_viridis_d(option = "A") +
  labs(title=paste("true ari = 1")) +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none")
plots <- c(plots,list(p4))


pdf(file = paste0(out_path,input,".pdf"), width = 10, height = 10)
do.call("grid.arrange", c(list(p_grid), ncol=1))
do.call("grid.arrange", c(list(p_k), ncol=1))
do.call("grid.arrange", c(plots, ncol=2))
dev.off()

# 
# pdf(file = paste0(out_path,input,"_",k,"_",lambda,".pdf"), width = 10, height = 10)
# # do.call("grid.arrange", c(list(p1), ncol=1))
# # do.call("grid.arrange", c(list(p2), ncol=1))
# do.call("grid.arrange", c(plots, ncol=3))
# dev.off()
# 
# 
# set.seed(1)
# M <- 30
# clust_range <- 1:M %>% 
#   purrr::map_dfr(.f = function(.x){
#     print(.x)
#     clust <- mclust::Mclust(data, G = centers)$classification
#     # clust <- kmeans(data,centers, nstart = 50, iter.max = 100)$cluster
#     ari <- safe_ari(clust,clue::solve_LSAP(table(true, clust), maximum = TRUE)[true])
#     tibble(id = .x, ari = ari$result)
# })
# 
# clust_range %>% 
#   summarize(med_ari = median(ari),
#             mean_ari = mean(ari),
#             sd_ari = sd(ari),
#             max_ari = max(ari),
#             min_ari = min(ari))
# 
#

alexanderjwhite/spatialr.dev documentation built on Sept. 4, 2022, 4:49 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

alexanderjwhite/spatialr.dev
Research, Development and Testing of Spatial Clustering Project

development/cluster_2022_06_23_GREAT.R
In alexanderjwhite/spatialr.dev: Research, Development and Testing of Spatial Clustering Project

R Package Documentation

Browse R Packages

We want your feedback!

alexanderjwhite/spatialr.dev Research, Development and Testing of Spatial Clustering Project

development/cluster_2022_06_23_GREAT.R In alexanderjwhite/spatialr.dev: Research, Development and Testing of Spatial Clustering Project

R Package Documentation

Browse R Packages

We want your feedback!

alexanderjwhite/spatialr.dev
Research, Development and Testing of Spatial Clustering Project

development/cluster_2022_06_23_GREAT.R
In alexanderjwhite/spatialr.dev: Research, Development and Testing of Spatial Clustering Project