development/cluster_human.R

library(bluster)
library(mclust)
library(dplyr)
library(BayesSpace)
library(Matrix)
library(ggplot2)
library(ggpubr)
library(gridExtra)
library(Seurat)

results_raw <- read.table("G:/My Drive/Dissertation/Spatial/spatial_analysis/cv_2022_07_13_02/summary.csv")

colnames(results_raw) <- c("sample","time","lambda","k", "penal_lik", "cv_like")
# lambda_filt <- c(5,10,15,20,25)
# k_filt <- c(10,20,30,40)
results <- results_raw %>% 
  tibble() %>% 
  # filter(lambda %in% lambda_filt & k %in% k_filt) %>% 
  # mutate(sample = as.numeric(sample)) %>% 
  # group_by(sample, lambda, k) %>% 
  # dplyr::slice(n = 1) %>% 
  # arrange(sample, lambda, k) %>% 
  group_by(sample, time, lambda, k, penal_lik) %>% 
  summarize(cv_like_sum = sum(cv_like),
            cv_like_mean = mean(cv_like),
            cv_like_median = median(cv_like)) %>% 
  group_by(sample, lambda, k) %>% 
  dplyr::slice(n = 1) %>% 
  arrange(sample, k, lambda)

input <- "breast2"
visiumDataPath <- "G:/My Drive/Dissertation/Spatial/data/spatial_data/HumanHeart"
visiumDataFileName <- "V1_Human_Heart_filtered_feature_bc_matrix.h5"

#no update
visiumData <- Load10X_Spatial(
  data.dir = visiumDataPath,
  filename = visiumDataFileName,
  assay = 'ExtractSpatial',
  slice = 'A1',
  filter.matrix = TRUE,
  to.upper = FALSE
)
sce <- as.SingleCellExperiment(visiumData)
# cdata <- visiumData@images$B1@coordinates
# cdata[,"spatial.cluster"] <- 
colData(sce)<- cbind(colData(sce),visiumData@images$A1@coordinates)
# sce <- visiumData
sce <- spatialPreprocess(sce, platform="Visium", skip.PCA=TRUE)
# bs_colnames <- c("col","row","spatial.cluster")

n_clust <- 2:8
p_grid <- results %>%
  filter(sample == input) %>%
  ggplot(aes(x = lambda, y = cv_like_mean)) +
  geom_point() +
  geom_line() +
  facet_wrap(~k, scales = "free") +
  ylab("Mean of CV Likelihood")

p_k <- results %>%
  filter(sample == input) %>%
  group_by(k) %>%
  slice_max(order_by = cv_like_mean, n = 1) %>%
  ungroup() %>%
  mutate(selected = ifelse(penal_lik == max(penal_lik), "Yes", "No")) %>%
  ggplot(aes(x = k, y = penal_lik, label = lambda)) +
  geom_point() +
  geom_line() +
  geom_label(aes(color = selected)) +
  ylab("Penalized Likelihood")

lambda <- results %>%
  filter(sample == input) %>%
  group_by(k) %>%
  slice_max(order_by = cv_like_mean, n = 1) %>% 
  ungroup() %>% 
  slice_max(order_by = penal_lik, n = 1) %>% 
  pull(lambda)

k <- results %>%
  filter(sample == input) %>%
  group_by(k) %>%
  slice_max(order_by = cv_like_mean, n = 1) %>% 
  ungroup() %>% 
  slice_max(order_by = penal_lik, n = 1) %>% 
  pull(k)
norm_string <- "NULL"
seed <- 1
# in_string <- paste0("/N/project/zhangclab/alex/spatial/cv_2022_06_29_01/output/clust_",input, "_",k,"_", scales::number(lambda, accuracy = 0.1),"_",norm_string,"_",seed,".rds")
# 
# in_string

set.seed(seed)
model_path <- paste0("G:/My Drive/Dissertation/Spatial/spatial_analysis/cv_2022_07_13_10/output/clust_",input, "_",k,"_", scales::number(lambda, accuracy = 0.1),"_",norm_string,"_",seed,".rds")
out_path <- paste0("G:/My Drive/Dissertation/Spatial/spatial_analysis/human_selection/")
result <- readRDS(model_path)
model <- result$model
data <- model$v


# Compile cluster plots

plots <<- NULL
# SpatialDimPlot(sce)
n_clust %>% 
  purrr::walk(.f = function(.x){
    clust <- Mclust(data, G = .x, modelNames = "EEE")$classification
    saveRDS(clust, paste0("G:/My Drive/Dissertation/Spatial/spatial_analysis/human_selection/",input,"_",.x,".rds"))
    # p <- clusterPlot(sce, label=clust, palette=NULL, size=0.05, color = NA) +
    #   scale_fill_viridis_d(option = "B") +
    #   labs(title=paste("k =",.x)) +
    #   theme(axis.title.x = element_blank(),
    #         axis.title.y = element_blank())
    # plots <<- c(plots,list(p))
  })



# pdf(file = paste0(out_path,input,".pdf"), width = 10, height = 10)
# do.call("grid.arrange", c(list(p_grid), ncol=1))
# do.call("grid.arrange", c(list(p_k), ncol=1))
# do.call("grid.arrange", c(plots, ncol=2))
# dev.off()

# 
# pdf(file = paste0(out_path,input,"_",k,"_",lambda,".pdf"), width = 10, height = 10)
# # do.call("grid.arrange", c(list(p1), ncol=1))
# # do.call("grid.arrange", c(list(p2), ncol=1))
# do.call("grid.arrange", c(plots, ncol=3))
# dev.off()
# 
# 
# set.seed(1)
# M <- 30
# clust_range <- 1:M %>% 
#   purrr::map_dfr(.f = function(.x){
#     print(.x)
#     clust <- mclust::Mclust(data, G = centers)$classification
#     # clust <- kmeans(data,centers, nstart = 50, iter.max = 100)$cluster
#     ari <- safe_ari(clust,clue::solve_LSAP(table(true, clust), maximum = TRUE)[true])
#     tibble(id = .x, ari = ari$result)
# })
# 
# clust_range %>% 
#   summarize(med_ari = median(ari),
#             mean_ari = mean(ari),
#             sd_ari = sd(ari),
#             max_ari = max(ari),
#             min_ari = min(ari))
# 
# 
alexanderjwhite/spatialr.dev documentation built on Sept. 4, 2022, 4:49 a.m.