R/main.R
In PRECAST: Embedding and Clustering with Alignment for Spatial Omics Datasets

Documented in getAdj_fixedNumber getAdj_reg ICM.EM ICM.EM_structure model_set SelectModel SelectModel.SeqK_PRECAST_Object

# library(pkgdown)
# pkgdown::build_site()
# pkgdown::build_reference()
# build_home()
# build_article(name="PRECAST.DLPFC4") # Solely compile one article for updating.
# build_article(name="PRECAST.BreastCancer")
# build_article(name="PRECAST.Simu")
# build_article(name="PRECAST.DLPFC")
# R CMD check --as-cran PRECAST_1.8.tar.gz
# devtools::check_win_release()
# setwd("D:\\Working\\Research paper\\GithubCode\\PRECAST\\vignettes_data")


# Basic functions ---------------------------------------------------------
.logDiffTime <- function(main = "", t1 = NULL, verbose = TRUE, addHeader = FALSE,
                         t2 = Sys.time(), units = "mins", header = "*****",
                         tail = "elapsed.", precision = 3){
  
  # main = ""; t1 = NULL; verbose = TRUE; addHeader = FALSE;
  # t2 = Sys.time(); units = "mins"; header = "###########";
  # tail = "elapsed."; precision = 3
  if (verbose) {
    timeStamp <- tryCatch({
      dt <- abs(round(difftime(t2, t1, units = units),
                      precision))
      if (addHeader) {
        msg <- sprintf("%s\n%s : %s, %s %s %s\n%s",
                       header, Sys.time(), main, dt, units, tail,
                       header)
      }
      else {
        msg <- sprintf("%s : %s, %s %s %s", Sys.time(),
                       main, dt, units, tail)
      }
      if (verbose)
        message(msg)
    }, error = function(x) {
      if (verbose)
        message("Time Error : ", x)
    })
  }
  
  return(invisible(0))
}


.logTime <- function(main='', prefix='*****', versoe=TRUE){
  
  if(versoe){
    message(paste0(Sys.time()," : ", prefix," ",  main))
  }
  
  
}
# model_set <- function(Sigma_equal=FALSE, Sigma_diag=TRUE,mix_prop_heter=TRUE,
#                       error_heter=TRUE, Sp2=TRUE, wpca_int=FALSE,int.model='EEE',
#                       coreNum = 1, coreNum_int=coreNum,
#                       beta_grid=seq(0.2,4, by=0.2),
#                       maxIter_ICM=6,maxIter=20, epsLogLik=1e-5, verbose=TRUE, seed=1){
#   para_settings <- list(Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
#                         mix_prop_heter=mix_prop_heter,
#                         error_heter=error_heter, Sp2=Sp2, wpca_int=wpca_int,int.model=int.model,
#                         coreNum = coreNum, coreNum_int=coreNum_int,
#                         beta_grid= beta_grid,
#                         maxIter_ICM = maxIter_ICM, maxIter = maxIter, epsLogLik = epsLogLik,
#                         verbose = verbose, seed = seed)
#   return(para_settings)
#   
# }
# 参数验证函数
validate_parameters <- function(Sigma_equal, Sigma_diag, mix_prop_heter, error_heter,
                                Sp2, wpca_int, int.model, coreNum, coreNum_int, beta_grid, init.nstart,
                                maxIter_ICM, maxIter, epsLogLik, verbose, seed) {
  
  # 检查逻辑参数
  logical_params <- list(Sigma_equal, Sigma_diag, mix_prop_heter, error_heter, 
                         Sp2, wpca_int, verbose)
  if (!all(sapply(logical_params, is.logical))) {
    stop("All flag parameters (Sigma_equal, Sigma_diag, etc.) must be logical values")
  }
  
  # 检查整型参数
  if (!is.numeric(coreNum) || coreNum < 1 || coreNum != round(coreNum)) {
    stop("coreNum must be a positive integer")
  }
  if (!is.numeric(coreNum_int) || coreNum_int < 1 || coreNum_int != round(coreNum_int)) {
    stop("coreNum_int must be a positive integer")
  }
  if (!is.numeric(maxIter_ICM) || maxIter_ICM < 1 || maxIter_ICM != round(maxIter_ICM)) {
    stop("maxIter_ICM must be a positive integer")
  }
  if (!is.numeric(maxIter) || maxIter < 1 || maxIter != round(maxIter)) {
    stop("maxIter must be a positive integer")
  }
  if (!is.numeric(seed) || seed != round(seed)) {
    stop("seed must be an integer")
  }
  
  # 检查数值参数
  if (!is.numeric(epsLogLik) || epsLogLik <= 0) {
    stop("epsLogLik must be a positive numeric value")
  }
  if (!is.numeric(beta_grid) || any(beta_grid <= 0)) {
    stop("beta_grid must contain positive numeric values")
  }
  if (is.unsorted(beta_grid)) {
    warning("beta_grid is not sorted. Consider sorting it for better performance.")
  }
  if (!is.numeric(init.nstart) || init.nstart < 1 || init.nstart != round(init.nstart)) {
    stop("init.nstart must be a positive integer")
  }
  
  # 检查字符串参数
  valid_models <- c('kmeans', 'mclust')
  if (!int.model %in% valid_models) {
    stop("int.model must be one of: ", paste(valid_models, collapse = ", "))
  }
}
model_set <- function(Sigma_equal = FALSE, 
                      Sigma_diag = TRUE,
                      mix_prop_heter = TRUE,
                      error_heter = TRUE, 
                      Sp2 = TRUE, 
                      wpca_int = FALSE,
                      int.model = c('mclust', 'kmeans'),
                      coreNum = 1, 
                      coreNum_int = coreNum,
                      beta_grid = seq(0.2, 4, by = 0.2),
                      init.nstart=5,
                      maxIter_ICM = 6,
                      maxIter = 20, 
                      epsLogLik = 1e-5, 
                      verbose = TRUE, 
                      seed = 1) {
  
  int.model <- match.arg(int.model)
  # 参数验证
  validate_parameters(Sigma_equal, Sigma_diag, mix_prop_heter, error_heter, 
                      Sp2, wpca_int, int.model, coreNum, coreNum_int, beta_grid,init.nstart,
                      maxIter_ICM, maxIter, epsLogLik, verbose, seed)
  
  # 使用 modifyList 处理默认值，更灵活
  default_settings <- list(
    Sigma_equal = FALSE,
    Sigma_diag = TRUE,
    mix_prop_heter = TRUE,
    error_heter = TRUE,
    Sp2 = TRUE,
    wpca_int = FALSE,
    int.model = 'kmeans',
    coreNum = 1,
    coreNum_int = 1,
    beta_grid = seq(0.2, 4, by = 0.2),
    init.nstart = 5,
    maxIter_ICM = 6,
    maxIter = 20,
    epsLogLik = 1e-5,
    verbose = TRUE,
    seed = 1
  )
  
  # 合并用户提供的参数和默认值
  user_settings <- list(Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
                          mix_prop_heter=mix_prop_heter,
                          error_heter=error_heter, Sp2=Sp2, wpca_int=wpca_int,int.model=int.model,
                          coreNum = coreNum, coreNum_int=coreNum_int,
                          beta_grid= beta_grid, init.nstart=init.nstart,
                          maxIter_ICM = maxIter_ICM, maxIter = maxIter, epsLogLik = epsLogLik,
                          verbose = verbose, seed = seed)
  para_settings <- modifyList(default_settings, user_settings)
  
  # 确保 coreNum_int 正确设置
  if (!"coreNum_int" %in% names(user_settings)) {
    para_settings$coreNum_int <- para_settings$coreNum
  }
  return(para_settings)
}


ICM.EM_structure  <- function(XList,  K, AdjList, q=15,parameterList=NULL){
  
  if(is.null(parameterList)){
    parameterList <- model_set()
  }
  ### initialize variables
  beta_grid <- maxIter_ICM<- maxIter<- epsLogLik<- verbose<-mix_prop_heter<- Sigma_equal<- Sigma_diag<- NULL
  error_heter<-Sp2<- wpca_int<-int.model<- seed<- coreNum <- coreNum_int <- init.nstart<- NULL
  n_par <- length(parameterList)
  for(i in 1:n_par){
    assign(names(parameterList)[i], parameterList[[i]])
  }
  
  ## Centering
  ## XList <- lapply(XList, scale, scale=FALSE)
  ## centering in ICM.EM() function
  
  resList <- ICM.EM(XList, q, K, AdjList=AdjList, 
              beta_grid=beta_grid, init.nstart=init.nstart,
              maxIter_ICM=maxIter_ICM,maxIter=maxIter, epsLogLik=epsLogLik, verbose=verbose,
              mix_prop_heter=mix_prop_heter, Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
              error_heter=error_heter, Sp2=Sp2,
              wpca_int=wpca_int,int.model=int.model, seed=seed,coreNum = coreNum, coreNum_int=coreNum_int)
  return(resList)
}
ICM.EM <- function(XList, q, K, AdjList=NULL,  Adjlist_car=NULL, posList = NULL, platform = "ST",
                   beta_grid=seq(0.2,4, by=0.2), init.nstart=5, 
                   maxIter_ICM=6,maxIter=20, epsLogLik=1e-5, verbose=TRUE,
                   mix_prop_heter=TRUE, Sigma_equal=FALSE, Sigma_diag=TRUE,error_heter=TRUE, Sp2=TRUE,
                   wpca_int=FALSE,int.model=c('kmeans', 'mclust'), seed=1,coreNum = 1, coreNum_int=coreNum){
  
  int.model <- match.arg(int.model)
  # library(harmony)
  # library(irlba)
  
  r_max <- length(XList)
  
  pf <- unlist(lapply(XList, ncol))
  # if(is.null(posList) && is.null(AdjList)) stop("Check the values of arguments!")
  if(sum(pf!=pf[1]) > 0) stop("ncols of XList's components must be same!")
  if(is.null(posList) && is.null(AdjList)) stop("posList or AdjList must be provided one!")
  nvec <- sapply(XList, nrow)
  n <- sum(nvec)
  p <-  pf[1]
  if(verbose){
    message("-----Intergrative data info.: ", r_max, ' samples, ', p , " genes X ", n, " spots------")
    message("-----Numbers of spots are: ", paste(nvec, collapse = ", "), '-----')
  }
  
  
  if(is.null(AdjList) && !is.null(posList))
    AdjList <- getAdjList(posList, platform=platform)
  if(any(sapply(AdjList, nrow)!=sapply(XList, nrow)))  
    stop('The dimension of Adj matrix does not match that of X!\n')
  
  if(is.null(Adjlist_car)) Adjlist_car <- AdjList
  
  ## Centering
  XList <- lapply(XList, scale, scale=FALSE)
  
  # Xmat <- NULL
  # for(r in 1:r_max){
  #   Xmat <- rbind(Xmat, XList[[r]])
  # }
  Xmat <- do.call("rbind", XList)
 
  message(paste0("Starting computing initial values using ", int.model ," ..."))
  set.seed(seed)
  tstart <- Sys.time()
  if(wpca_int){
    princ1 <- wpca(Xmat, q, weighted = wpca_int)
  }else{
    princ1 <- approxPCA(Xmat, q)
  }
  W0 <- princ1$loadings
  sampleID <- get_sampleID(XList)
  if(r_max >1){
    hZ <- RunHarmony(princ1$PCs, meta_data=data.frame(batch=factor(sampleID)), vars_use='batch', verbose=FALSE)
  }else{
    hZ <- princ1$PCs
  }
  ## Delete the big objects and free the memory
  rm(princ1)
  rm(Xmat)
  gc()
  
  
  
  
  ### parallel to obtain the initial values 
  alpha <- FALSE
  nK <- length(K)
  if(nK>1){
    ## at most use 10 cores
    if(is.null(coreNum_int)){
      cores <- ifelse(nK < 10,  
                      nK, 10)
    }else{
      cores <- coreNum_int
    }
    
    if(Sys.info()[1]=="Windows"){
      cl <- parallel::makeCluster(cores) # memory can not be shared type in Windows.
    }else{
      cl <- parallel::makeCluster(cores, type='FORK') # memory-shared type in linux or Mac.
    }
    # Mclust <- mclust::Mclust
    # mclustBIC <- mclust::mclustBIC
    
    # parallel::clusterExport(cl, varlist = c("Mclust", "mclustBIC"))
    intList <- parallel::parLapply(cl, X=K, parafun_int, Z=hZ, alpha=alpha, 
                                   Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
                                   int.model =int.model, init.nstart=init.nstart, verbose=verbose)
    parallel::stopCluster(cl)
  }else{
    intList <- list(parafun_int(K, Z=hZ, alpha=alpha, Sigma_equal, Sigma_diag,
                                int.model =int.model,init.nstart=init.nstart, verbose=verbose))
  }
  
  .logDiffTime(sprintf(paste0("%s Initialization finished!"), "*****"), t1 = tstart, verbose = verbose)
  
  alpha0List = list()
  Mu0List = list()
  Sigma0List = list()
  ymat = matrix(0, n, nK)
  for (kk in 1:nK){
    # print(kk)
    
    ymat[,kk] <- intList[[kk]]$ymatk
    
    alpha0List[[kk]] <- intList[[kk]]$alpha0k
    
    Mu0List[[kk]] <- intList[[kk]]$Mu0k
    
    Sigma0List[[kk]] <- intList[[kk]]$Sigma0k
  }
  
  
  
  message("----Fitting PRECAST model...----------------\n")
  tstart <- Sys.time()
  beta0  =1.5
  resList <- idrsc2Cpp(XList, AdjList, Adjlist_car, hZ, ymat, Mu0List, 
                       Sigma0List, W0, alpha0List, beta0, 
                       beta_grid, maxIter_ICM, 
                       maxIter, epsLogLik, verbose,(!error_heter),
                       Sigma_equal, Sigma_diag, mix_prop_heter, Sp2, max(K), min(K), coreNum)
  
  .logDiffTime(sprintf(paste0("%s PRECAST model fitting finished!"), "*****"), t1 = tstart, verbose = verbose)
  
  
  
  para_settings <- list(K=K, n=n, p=p,q=q,r_max=r_max,
                        Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
                        mix_prop_heter=mix_prop_heter)
  attr(resList, "para_settings") <- para_settings
  class(resList) <- "SeqK_PRECAST_Object"
  return(resList)
  
  
}



### Following function is the early version of ICM.EM
idrsc <- function(XList, q, K, AdjList=NULL,  Adjlist_car=NULL, posList = NULL, platform = "ST",
                  beta_grid=seq(0.2,4, by=0.2), init.nstart=5,
                   maxIter_ICM=6,maxIter=20, epsLogLik=1e-5, verbose=TRUE,
                   mix_prop_heter=TRUE, Sigma_equal=FALSE, Sigma_diag=TRUE,error_heter=TRUE, Sp2=TRUE,
                   wpca_int=FALSE,int.model=c('kmeans', 'mclust'), seed=1,coreNum = 1, coreNum_int=coreNum){
  
  
  
  
  int.model <- match.arg(int.model)
  
  r_max <- length(XList)
  
  pf <- unlist(lapply(XList, ncol))
  # if(is.null(posList) && is.null(AdjList)) stop("Check the values of arguments!")
  if(sum(pf!=pf[1]) > 0) stop("ncols of XList's components must be same!")
  if(is.null(posList) && is.null(AdjList)) stop("posList or AdjList must be provided one!")
  n <- sum(sapply(XList, nrow))
  p <-  pf[1]
  message("Intergrative data info.: ", r_max, ' samples, ', p , " genes X ", n, " spots------")
  message("iDR-SC model setting: ", 'error_heter=',error_heter,", Sigma_equal=",Sigma_equal,
          ", Sigma_diag=", Sigma_diag, ', mix_prop_heter=', mix_prop_heter)
  
  if(is.null(AdjList) && !is.null(posList))
    AdjList <- getAdjList(posList,  platform=platform)
  if(any(sapply(AdjList, nrow)!=sapply(XList, nrow)))  
    stop('The dimension of Adj matrix does not match that of X!\n')
  
  if(is.null(Adjlist_car)) Adjlist_car <- AdjList
 
  # Xmat <- NULL
  # for(r in 1:r_max){
  #   Xmat <- rbind(Xmat, XList[[r]])
  # }
  Xmat <- do.call("rbind", XList)
  
  message(paste0("Starting computing initial values using ", int.model ," ..."))
  set.seed(seed)
  tstart <- Sys.time()
  if(wpca_int){
    princ1 <- wpca(Xmat, q, weighted = wpca_int)
  }else{
    princ1 <- approxPCA(Xmat, q)
  }
  W0 <- princ1$loadings
  sampleID <- get_sampleID(XList[[1]])
  if(r_max >1){
    hZ <- RunHarmony(princ1$PCs, meta_data=data.frame(batch=factor(sampleID)), vars_use='batch', verbose=FALSE)
  }else{
    hZ <- princ1$PCs
  }
  ## Delete the big objects and free the memory
  rm(princ1)
  rm(Xmat)
  gc()
  
  
  
  ### parallel to obtain the initial values 
  alpha <- FALSE
  nK <- length(K)
  if(nK>1){
    ## at most use 10 cores
    if(is.null(coreNum_int)){
      cores <- ifelse(nK < 10,  
                      nK, 10)
    }else{
      cores <- coreNum_int
    }
    
    if(Sys.info()[1]=="Windows"){
      cl <- parallel::makeCluster(cores) # memory can not be shared type in Windows.
    }else{
      cl <- parallel::makeCluster(cores, type='FORK') # memory-shared type in linux or Mac.
    }
    # Mclust <- mclust::Mclust
    # mclustBIC <- mclust::mclustBIC
    
    # parallel::clusterExport(cl, varlist = c("Mclust", "mclustBIC"))
    # Run
    
    intList <- parallel::parLapply(cl, X=K, parafun_int, Z=hZ, alpha=alpha, 
                                   Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
                                   int.model =int.model, init.nstart=init.nstart, verbose=verbose)
    parallel::stopCluster(cl)
  }else{
    intList <- list(parafun_int(K, Z=hZ, alpha=alpha, Sigma_equal, Sigma_diag,
                                int.model =int.model, init.nstart=init.nstart,verbose=verbose))
  }
  
  .logDiffTime(sprintf(paste0("%s Initialization finished!"), "*****"), t1 = tstart, verbose = verbose)
  
  alpha0List = list()
  Mu0List = list()
  Sigma0List = list()
  ymat = matrix(0, n, nK)
  for (kk in 1:nK){
    # print(kk)
    
    ymat[,kk] <- intList[[kk]]$ymatk
    
    alpha0List[[kk]] <- intList[[kk]]$alpha0k
    
    Mu0List[[kk]] <- intList[[kk]]$Mu0k
    
    Sigma0List[[kk]] <- intList[[kk]]$Sigma0k
  }
  
  
    
  message("----Fitting PRECAST model----------------\n")
  beta0  =1.5
  resList <- idrsc2Cpp(XList, AdjList, Adjlist_car, hZ, ymat, Mu0List, 
                         Sigma0List, W0, alpha0List, beta0, 
                         beta_grid, maxIter_ICM, 
                         maxIter, epsLogLik, verbose,(!error_heter),
                         Sigma_equal, Sigma_diag, mix_prop_heter, Sp2, max(K), min(K), coreNum)
    
  
  
  
  para_settings <- list(K=K, n=n, p=p,q=q,r_max=r_max,
                        Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
                        mix_prop_heter=mix_prop_heter)
  attr(resList, "para_settings") <- para_settings
  class(resList) <- "SeqK_PRECAST_Object"
  return(resList)
    

}


## Define a new function mycluster to avoid using parallel::clusterExport in parallel
mycluster <- function(Z, G, int.model=c('kmeans', 'mclust'), init.nstart=5, verbose=FALSE){
  # require(mclust)
  int.model <- match.arg(int.model)
  q <- ncol(Z)
  clslist <- list()
  if(int.model == 'mclust'){
    #mclust_z <- mclust::Mclust(Z, G=G,modelNames ='EEE' ,verbose=verbose)
    results <- replicate(init.nstart, mclust::Mclust(data = Z, G = G,
                                                     modelNames = "EEE", verbose = FALSE), simplify = FALSE)
    best_index <- which.max(sapply(results, function(x) x$bic))
    mclust_z <- results[[best_index]]
    clslist$cluster <- mclust_z$classification
    clslist$mu <- t(mclust_z$parameters$mean) # K * q
    clslist$sigma <- mclust_z$parameters$variance$sigma
    clslist$pro <- mclust_z$parameters$pro
    rm(mclust_z)
  }else if(int.model == 'kmeans'){
    km_z <- stats::kmeans(x = Z, centers = G,
                          iter.max = 100, nstart = init.nstart)
    clslist$cluster <- km_z$cluster
    clslist$mu <- km_z$centers
    clslist$pro <- rep(1/G, G)
    Sigma_int <- array(dim=c(q, q, G))
    for(k in 1:G){
      Sigma_int[,,k] <- var(Z[km_z$cluster==k,])
    }
    clslist$sigma <- Sigma_int
    rm(km_z)
  }
  
  return(clslist)
}
parafun_int <- function(k, Z, alpha, Sigma_equal, Sigma_diag, int.model=c('kmeans', 'mclust'), init.nstart=5, verbose=FALSE){
  
  int.model <- match.arg(int.model)
  
  clslist <- mycluster(Z, G=k, int.model = int.model, init.nstart=init.nstart, verbose=verbose)
  
  ymatk <- clslist$cluster
  
  if(alpha){
    alpha0k <- clslist$pro
  }else{
    alpha0k <- rep(0, k)
  }
  
  Mu0k <- clslist$mu
  Sigmak <- clslist$sigma
  Sigma0k <- Sigmak
  if(Sigma_diag){
    Sigma0k <- array(0, dim=dim(Sigmak))
    for(kk in 1:k){
      diag(Sigma0k[,,kk]) <- diag(Sigmak[,,kk])
    }
    Sigmak <- Sigma0k
  }
  
  if(Sigma_equal){
    for(kk in 1:k){
      Sigma0k[,,kk] <- apply(Sigmak, c(1,2), mean)
    }
  }
  Pi0k <- clslist$pro
  return(list(ymatk=ymatk, alpha0k=alpha0k, Mu0k=Mu0k, Sigma0k=Sigma0k, Pi0k=Pi0k))
}


degree_freedom <- function(K, paraList){
  
  p  <- paraList$p; q <- paraList$q
  r_max <- paraList$r_max; Sigma_equal <- paraList$Sigma_equal
  Sigma_diag <- paraList$Sigma_diag;  mix_prop_heter <- paraList$mix_prop_heter
  if(!Sigma_equal){
    if(mix_prop_heter){
      
      if(Sigma_diag){ # our model
        
        # # beta_r + W + Lam_r + Mu_k + Sigma_k + Psi_r
        dfree <- 1*r_max+p*(q+r_max) + K*(q+ q) + q*(q+1)/2*r_max
      }else{
        dfree <- 1*r_max+p*(q+r_max) + K*(q + q*(q+1)/2.0)+  q*(q+1)/2*r_max
      }
    }else{
      
      if(Sigma_diag){
        
        # # beta + W + Lam_r + Mu+Sigma + tau_r
        dfree <- 1+p*(q+r_max) + K*(q+q) +  q*(q+1)/2*r_max
      }else{
        dfree <- 1+p*(q+r_max) + K*(q+q*(q+1)/2.0)+  q*(q+1)/2*r_max
      }
    }
  }else{
    if(mix_prop_heter){
      if(Sigma_diag){ ## This is our model setting
        message("Sigma is set to diagonal matrices \n")
        # # beta_r + W + Lam_r + Mu+ Sigma_r + Phi_r
        dfree <- 1*r_max+p*(q+r_max) + K*q+ q + r_max *q*(q+1)/2
      }else{
        message("Sigma is set to dense matrices \n")
        dfree <- 1*r_max+p*(q+r_max) + K*q+ q*(q+1)/2.0+ r_max *q*(q+1)/2
      }
    }else{
      if(Sigma_diag){
        message("Sigma is set to diagonal matrices \n")
        # # beta + W + Lam_r + Mu_k +Sigma_r + Phi_r
        dfree <- 1+p*(q+r_max) + K*q+ q  + r_max *q*(q+1)/2
      }else{
        message("Sigma is set to dense matrices \n")
        dfree <- 1+p*(q+r_max) + K*q+ q*(q+1)/2.0 + r_max *q*(q+1)/2
      }
    }
  }
  return(dfree)
}

SelectModel <- function(obj, criteria = 'MBIC',pen_const=1, return_para_est=FALSE) UseMethod("SelectModel")
SelectModel.SeqK_PRECAST_Object <- function(obj, criteria = 'MBIC',pen_const=1, return_para_est=FALSE){
  if(!inherits(obj, 'SeqK_PRECAST_Object')) stop('obj must be "SeqK_PRECAST_Object"!\n')
  para_settings <- attr(obj, 'para_settings')
  K_set <- para_settings$K
  nK <- length(K_set)
  icVec <-  rep(Inf, nK)
  if(length(obj) != nK) stop("The length of obj must be equal to length of K!")
  n <- para_settings$n; p <- para_settings$p
  for(k in 1:nK){
      resList <- obj[[k]]
      dfree <- degree_freedom(K_set[k], para_settings)
      icVec[k] <- switch(criteria, 
             MAIC = -2.0* resList$loglik +dfree * 2 * log(log(p+n))*pen_const,
             AIC = -2.0* resList$loglik +dfree * 2,
             MBIC =  -2.0* resList$loglik +dfree * log(n) * log(log(p+n))*pen_const, 
             BIC =  -2.0* resList$loglik +dfree * log(n) * log(log(p+n))*pen_const )
      
    
    
  }
  bestK <- K_set[which.min(icVec)]
  icMat <- cbind(K=K_set, IC=icVec)
  resList <- obj[[which.min(icVec)]]
  cluster_PCList <- list(bestK= bestK, cluster=resList$cluster, hZ = resList$hZ, Rf = resList$Rf,
                         hV=resList$hV, hW = resList$W,icMat=icMat)
  if(return_para_est){
    attr(cluster_PCList, 'fit') <- resList[-c(1:4)]
  }
  
  return(cluster_PCList)
  
}



# gendataInte_sp <- function(height1=30, width1=30,height2=height1, width2=width1, 
#                            p =100, q=10, K=7,  G=4, beta=1.2, sigma2=1, 
#                            alpha=8, seed=1, view=TRUE){
#   # height <- 70
#   # width <- 70
#   # G <- 4
#   # beta <- 1.0
#   # K <- 7
#   # q <- 10
#   # p <- 1000
#   if(q <2) stop("error:gendata_sp::q must be greater than 2!")
#   
#   if(length(beta) <2) beta <- rep(beta, 2)
#   # require(GiRaF)
#   # require(MASS)
#   n1 <- height1 * width1 # # of cell in each indviduals 
#   n2 <- height2 * width2
#   index1 <- 1:n1; index2 <- (n1+1):(n1+n2)
#   ## generate deterministic parameters, fixed after generation
#  
#   if(length(sigma2)==1){
#     Lambda1 <- sigma2*(abs(rnorm(p, sd=1)))
#     Lambda2 <- sigma2*(abs(rnorm(p, sd=1)))
#   }else{
#     Lambda1 <- rep(sigma2[1], p)
#     Lambda2 <- rep(sigma2[2], p)
#   }
#   
#   
#   W1 <- matrix(rnorm(p*q), p, q)
#   W <- qr.Q(qr(W1))
#   
#   mu <- matrix(0, q,  K)
#   diagmat = array(0, dim = c(q, q, K))
#   if(q > K){
#     q1 <- floor(K/2)
#     for(j in 1:q1){
#       if(j <= (q1/2)) mu[j,j] <- alpha
#       if(j > (q1/2)) mu[j,j] <- -alpha
#     }
#     mu[(q1+1):q, K] <- -alpha
#     
#   }else if(q <= K){
#     for(k in 1:K)
#       mu[,k] <- rep(alpha/8 *k, q) #
#   }
#   for(k in 1:K){
#     tmp  <- rep(1, q)
#     if(k <= K/2){
#       tmp[q] <- alpha
#     }
#     diag(diagmat[,,k]) <- tmp
#   }
#   
#   Mu <- t(mu)
#   Sigma <- diagmat
#   tau2 <- rep(1, q); tau2[1] <- 4; tau2[2] <- -4
#   set.seed(seed)
#   # generate the spatial dependence for state variable y, a hidden Markov RF
#   y1 <- sampler.mrf(iter = n1, sampler = "Gibbs", h = height1, w = width1, ncolors = K, nei = G, param = beta[1],
#                     initialise = FALSE, view = view)
#   y1 <- c(y1) + 1
#   y2 <- sampler.mrf(iter = n2, sampler = "Gibbs", h = height2, w = width2, ncolors = K, nei = G, param = beta[2],
#                     initialise = FALSE, view = view)
#   y2 <- c(y2) + 1
#   
#   Z1 <- matrix(0, n1, q)
#   Z2 <- matrix(0, n2, q)
#   for(k in 1:K){
#     nk1 <- sum(y1==k)
#     Z1[y1==k, ] <- MASS::mvrnorm(nk1, Mu[k,], Sigma[,,k])
#     
#     nk2 <- sum(y2==k)
#     Z2[y2==k, ] <- MASS::mvrnorm(nk2, Mu[k,]+tau2, Sigma[,,k])
#   }
#   X1 <- Z1 %*% t(W)+  MASS::mvrnorm(n1, rep(0,p), diag(Lambda1))
#   X2 <- Z2 %*% t(W) + MASS::mvrnorm(n2, rep(0,p), diag(Lambda2))
#   # compute the strength of signal
#   svd_Sig1 <- svd(cov(Z1))
#   W11 <- W %*% svd_Sig1$u %*% diag(sqrt(svd_Sig1$d))
#   snr1 <- sum(svd(W11)$d^2) / (sum(svd(W11)$d^2)+ sum(Lambda1))
#   svd_Sig2 <- svd(cov(Z2))
#   W22 <- W %*% svd_Sig2$u %*% diag(sqrt(svd_Sig2$d))
#   snr2 <- sum(svd(W22)$d^2) / (sum(svd(W22)$d^2)+ sum(Lambda2))
#   
#   message("SNR1=", round(snr1,4),", SNR2=", round(snr2, 4),  '\n')
#   
#   # make position
#   pos1 <- cbind(rep(1:height1, width1), rep(1:height1, each=width1))
#   pos2 <- cbind(rep(1:height2, width2), rep(1:height2, each=width2))
#   return(list(X1=X1,X2=X2, Z1=Z1, Z2=Z2, cluster=c(y1,y2), Mu=Mu, Sigma=Sigma,
#               W=W, Lambda1=Lambda1, Lambda2=Lambda2, tau2=tau2, 
#               beta=beta,pos1=pos1,pos2=pos2, snr=c(snr1,snr2),
#               index1=index1,index2=index2))
#   
# }
# 


find_neighbors <- function(pos, platform=c('ST', "Visium")) {
  # require(purrr)
  # require(S4Vectors)
  if (tolower(platform) == "visium") {
    ## Spots to left and right, two above, two below
    offsets <- data.frame(x.offset=c(-2, 2, -1,  1, -1, 1),
                          y.offset=c( 0, 0, -1, -1,  1, 1))
  } else if (tolower(platform) == "st") {
    ## L1 radius of 1 (spots above, right, below, and left)
    offsets <- data.frame(x.offset=c( 0, 1, 0, -1),
                          y.offset=c(-1, 0, 1,  0))
  } else {
    stop(".find_neighbors: Unsupported platform \"", platform, "\".")
  }
  
  ## Get array coordinates (and label by index of spot in SCE)
  colnames(pos) <- c("row", "col")
  # pos <- DataFrame(pos) # reduce the dependency on S4Vector
  pos <- as.data.frame(pos)
  spot.positions <- pos
  spot.positions$spot.idx <- seq_len(nrow(spot.positions))
  
  ## Compute coordinates of each possible spot neighbor
  neighbor.positions <- merge(spot.positions, offsets)
  neighbor.positions$x.pos <- neighbor.positions$col + neighbor.positions$x.offset
  neighbor.positions$y.pos <- neighbor.positions$row + neighbor.positions$y.offset
  
  ## Select spots that exist at neighbor coordinates
  neighbors <- merge(as.data.frame(neighbor.positions), 
                     as.data.frame(spot.positions), 
                     by.x=c("x.pos", "y.pos"), by.y=c("col", "row"),
                     suffixes=c(".primary", ".neighbor"),
                     all.x=TRUE)
  
  ## Shift to zero-indexing for C++
  #neighbors$spot.idx.neighbor <- neighbors$spot.idx.neighbor - 1
  
  ## Group neighbor indices by spot 
  ## (sort first for consistency with older implementation)
  neighbors <- neighbors[order(neighbors$spot.idx.primary, 
                               neighbors$spot.idx.neighbor), ]
  df_j <- split(neighbors$spot.idx.neighbor, neighbors$spot.idx.primary)
  df_j <- unname(df_j)
  
  ## Discard neighboring spots without spot data
  ## This can be implemented by eliminating `all.x=TRUE` above, but
  ## this makes it easier to keep empty lists for spots with no neighbors
  ## (as expected by C++ code)
  ## df_j <- map(df_j, function(nbrs) discard(nbrs, function(x) is.na(x)))
  df_j <- lapply(df_j, function(nbrs) discard(nbrs, function(x) is.na(x)))
  
  ## Log number of spots with neighbors
  n_with_neighbors <- length(keep(df_j, function(nbrs) length(nbrs) > 0))
  message("Neighbors were identified for ", n_with_neighbors, " out of ",
          nrow(pos), " spots.")
  
  n <- length(df_j) 
  
  D <- matrix(0,  nrow = n, ncol = n)
  for (i in 1:n) {
    if(length(df_j[[i]]) != 0)
      D[i, df_j[[i]]] <- 1
  }
  ij <- which(D != 0, arr.ind = T)
  ij
}
getAdj_reg <- function(pos, platform= "Visium"){
  ij <- find_neighbors(pos, platform)
  # library(Matrix)
  n <- nrow(pos)
  Adj_sp <- Matrix::sparseMatrix(ij[,1], ij[,2], x = 1, dims=c(n, n))
  return(Adj_sp)
}
# getAdj <- function(pos, platform="Visisum"){
#   
#   if(tolower(platform) %in% tolower(c('ST', "Visium"))){
#     Adj_sp <- getAdj_reg(pos, platform)
#   }else{
#     Adj_sp <- getAdj_auto(pos)
#   }
# }

# getAdj_auto <- function(pos){
#   if (!inherits(pos, "matrix"))
#     stop("method is only for  matrix object!")
#   
#   radius.lower <- 1
#   radius.upper <- 50
#   start.radius <- 1
#   Med <- 0
#   while(!(Med >= 4 && Med <=6)){
#     
#     Adj_sp <- getneighborhood_fast(pos, radius=start.radius)
#     Med <- summary(Matrix::rowSums(Adj_sp))['Median']
#     if(Med < 4){
#       radius.lower <- start.radius
#       start.radius <- (radius.lower + radius.upper)/2
#     }else if(Med >6){
#       radius.upper <- start.radius
#       start.radius <- (radius.lower + radius.upper)/2
#     }
#     message("Current radius is ", start.radius, '\n')
#     message("Median of neighbors is ", Med, '\n')
#   }
#   return(Adj_sp)
# }
# 

# getAdj_manual <- function(pos, radius){
#   if (!inherits(pos, "matrix"))
#     stop("pos must be a matrix!")
#   if(radius <=0){
#     stop('radius must be a positive real!')
#   }
#   
#   Adj_sp <- getneighborhood_fast(pos, radius=radius)
#   return(Adj_sp)
# }

getAdj_fixedNumber <- function(pos, number=6){
  if(nrow(pos)< 6*4) stop("nrow of pos must be greater than 4*number!")
  # require(Matrix)
  Adj_sp <- get_fixedNumber_neighbors(pos, number)
  return(Adj_sp)
}

wpca <- function(X, q, weighted=T){
  if(!is.matrix(X)) stop("wpca: X must be a matrix!")
  if(q< 1) stop("wpca: q must be a positive integer!")
  X <- scale(X, scale=F) # centralize
  out <- wpcaCpp(X, q, weighted)
  return(out)
}
approxPCA <- function (X, q) {
  # require(irlba)
  n <- nrow(X)
  svdX <- irlba(A = X, nv = q)
  PCs <- svdX$u %*% diag(svdX$d[1:q])
  loadings <- svdX$v
  dX <- PCs %*% t(loadings) - X
  Lam_vec <- colSums(dX^2)/n
  return(list(PCs = PCs, loadings = loadings, Lam_vec = Lam_vec))
}
getAdjList <- function(posList, platform='Visium', ...){
  
  if(!inherits(posList, "list")) stop("posList must be a list consisting of matrix.")
  if(tolower(platform) %in% c("st", "visium")){
      AdjList <- pbapply::pblapply(posList, getAdj_reg, platform=platform)
  }else{
      AdjList <- pbapply::pblapply(posList, function(x, ...)getAdj_auto(x, ...))
  }
  return(AdjList)
}


# Access data from Seurat object ------------------------------------------
## To compatible with Seurat V5

get_data_fromSeurat <- function(seu, assay=NULL, slot='counts'){
  
  if(is.null(assay)) assay <- DefaultAssay(seu)
  dat <- GetAssayData(seu, assay = assay, layer= slot)
  
  
  return(dat)
}
get_varfeature_fromSeurat <- function(seu, assay=NULL){
  
  if(is.null(assay)) assay <- DefaultAssay(seu)
  
  if(inherits(seu[[assay]], "Assay5")){
    var.features <- seu[[assay]]@meta.data$var.features
    var.features <- var.features[!is.na(var.features)] 
    
  }else{
    var.features <- seu[[assay]]@var.features
  }
  return(var.features)
}



# Used in real data analysis ----------------------------------------------



## Get basic information
## format the log-normalized gene expression based on the selected genes.
getXList <- function(seuList, genelist){
  # require(Seurat)
  
  seuList <- lapply(seuList, NormalizeData)
  XList <- list()
  nsample <- length(seuList)
  indexList <- list()
  nr <- 0
  posList <- list()
  for(i in 1:nsample){
    XList[[i]] <- Matrix::t(seuList[[i]]@assays$RNA@data[genelist,])
    indexList[[i]] <- (nr+1):(nrow(XList[[i]] )+nr)
    nr <- nr + nrow(XList[[i]] )
    posList[[i]] <- cbind(seuList[[i]]$row, seuList[[i]]$col)
  }
  
  return(list(XList=XList, posList=posList, 
              indxList=indexList))
}
Any scripts or data that you put into this service are public.
PRECAST documentation built on Dec. 15, 2025, 5:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
PRECAST
Embedding and Clustering with Alignment for Spatial Omics Datasets

R/main.R
In PRECAST: Embedding and Clustering with Alignment for Spatial Omics Datasets

Defines functions getXList get_varfeature_fromSeurat get_data_fromSeurat getAdjList wpca getAdj_fixedNumber getAdj_reg find_neighbors SelectModel.SeqK_PRECAST_Object SelectModel degree_freedom parafun_int mycluster idrsc ICM.EM ICM.EM_structure model_set validate_parameters .logTime .logDiffTime

Documented in getAdj_fixedNumber getAdj_reg ICM.EM ICM.EM_structure model_set SelectModel SelectModel.SeqK_PRECAST_Object

Try the PRECAST package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PRECAST Embedding and Clustering with Alignment for Spatial Omics Datasets

R/main.R In PRECAST: Embedding and Clustering with Alignment for Spatial Omics Datasets

Defines functions getXList get_varfeature_fromSeurat get_data_fromSeurat getAdjList wpca getAdj_fixedNumber getAdj_reg find_neighbors SelectModel.SeqK_PRECAST_Object SelectModel degree_freedom parafun_int mycluster idrsc ICM.EM ICM.EM_structure model_set validate_parameters .logTime .logDiffTime

Documented in getAdj_fixedNumber getAdj_reg ICM.EM ICM.EM_structure model_set SelectModel SelectModel.SeqK_PRECAST_Object

Try the PRECAST package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PRECAST
Embedding and Clustering with Alignment for Spatial Omics Datasets

R/main.R
In PRECAST: Embedding and Clustering with Alignment for Spatial Omics Datasets