R/main.R

Defines functions getXList get_varfeature_fromSeurat get_data_fromSeurat getAdjList wpca getAdj_fixedNumber getAdj_reg find_neighbors SelectModel.SeqK_PRECAST_Object SelectModel degree_freedom parafun_int mycluster idrsc ICM.EM ICM.EM_structure model_set validate_parameters .logTime .logDiffTime

Documented in getAdj_fixedNumber getAdj_reg ICM.EM ICM.EM_structure model_set SelectModel SelectModel.SeqK_PRECAST_Object

# library(pkgdown)
# pkgdown::build_site()
# pkgdown::build_reference()
# build_home()
# build_article(name="PRECAST.DLPFC4") # Solely compile one article for updating.
# build_article(name="PRECAST.BreastCancer")
# build_article(name="PRECAST.Simu")
# build_article(name="PRECAST.DLPFC")
# R CMD check --as-cran PRECAST_1.8.tar.gz
# devtools::check_win_release()
# setwd("D:\\Working\\Research paper\\GithubCode\\PRECAST\\vignettes_data")


# Basic functions ---------------------------------------------------------
.logDiffTime <- function(main = "", t1 = NULL, verbose = TRUE, addHeader = FALSE,
                         t2 = Sys.time(), units = "mins", header = "*****",
                         tail = "elapsed.", precision = 3){
  
  # main = ""; t1 = NULL; verbose = TRUE; addHeader = FALSE;
  # t2 = Sys.time(); units = "mins"; header = "###########";
  # tail = "elapsed."; precision = 3
  if (verbose) {
    timeStamp <- tryCatch({
      dt <- abs(round(difftime(t2, t1, units = units),
                      precision))
      if (addHeader) {
        msg <- sprintf("%s\n%s : %s, %s %s %s\n%s",
                       header, Sys.time(), main, dt, units, tail,
                       header)
      }
      else {
        msg <- sprintf("%s : %s, %s %s %s", Sys.time(),
                       main, dt, units, tail)
      }
      if (verbose)
        message(msg)
    }, error = function(x) {
      if (verbose)
        message("Time Error : ", x)
    })
  }
  
  return(invisible(0))
}


.logTime <- function(main='', prefix='*****', versoe=TRUE){
  
  if(versoe){
    message(paste0(Sys.time()," : ", prefix," ",  main))
  }
  
  
}
# model_set <- function(Sigma_equal=FALSE, Sigma_diag=TRUE,mix_prop_heter=TRUE,
#                       error_heter=TRUE, Sp2=TRUE, wpca_int=FALSE,int.model='EEE',
#                       coreNum = 1, coreNum_int=coreNum,
#                       beta_grid=seq(0.2,4, by=0.2),
#                       maxIter_ICM=6,maxIter=20, epsLogLik=1e-5, verbose=TRUE, seed=1){
#   para_settings <- list(Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
#                         mix_prop_heter=mix_prop_heter,
#                         error_heter=error_heter, Sp2=Sp2, wpca_int=wpca_int,int.model=int.model,
#                         coreNum = coreNum, coreNum_int=coreNum_int,
#                         beta_grid= beta_grid,
#                         maxIter_ICM = maxIter_ICM, maxIter = maxIter, epsLogLik = epsLogLik,
#                         verbose = verbose, seed = seed)
#   return(para_settings)
#   
# }
# 参数验证函数
validate_parameters <- function(Sigma_equal, Sigma_diag, mix_prop_heter, error_heter,
                                Sp2, wpca_int, int.model, coreNum, coreNum_int, beta_grid, init.nstart,
                                maxIter_ICM, maxIter, epsLogLik, verbose, seed) {
  
  # 检查逻辑参数
  logical_params <- list(Sigma_equal, Sigma_diag, mix_prop_heter, error_heter, 
                         Sp2, wpca_int, verbose)
  if (!all(sapply(logical_params, is.logical))) {
    stop("All flag parameters (Sigma_equal, Sigma_diag, etc.) must be logical values")
  }
  
  # 检查整型参数
  if (!is.numeric(coreNum) || coreNum < 1 || coreNum != round(coreNum)) {
    stop("coreNum must be a positive integer")
  }
  if (!is.numeric(coreNum_int) || coreNum_int < 1 || coreNum_int != round(coreNum_int)) {
    stop("coreNum_int must be a positive integer")
  }
  if (!is.numeric(maxIter_ICM) || maxIter_ICM < 1 || maxIter_ICM != round(maxIter_ICM)) {
    stop("maxIter_ICM must be a positive integer")
  }
  if (!is.numeric(maxIter) || maxIter < 1 || maxIter != round(maxIter)) {
    stop("maxIter must be a positive integer")
  }
  if (!is.numeric(seed) || seed != round(seed)) {
    stop("seed must be an integer")
  }
  
  # 检查数值参数
  if (!is.numeric(epsLogLik) || epsLogLik <= 0) {
    stop("epsLogLik must be a positive numeric value")
  }
  if (!is.numeric(beta_grid) || any(beta_grid <= 0)) {
    stop("beta_grid must contain positive numeric values")
  }
  if (is.unsorted(beta_grid)) {
    warning("beta_grid is not sorted. Consider sorting it for better performance.")
  }
  if (!is.numeric(init.nstart) || init.nstart < 1 || init.nstart != round(init.nstart)) {
    stop("init.nstart must be a positive integer")
  }
  
  # 检查字符串参数
  valid_models <- c('kmeans', 'mclust')
  if (!int.model %in% valid_models) {
    stop("int.model must be one of: ", paste(valid_models, collapse = ", "))
  }
}
model_set <- function(Sigma_equal = FALSE, 
                      Sigma_diag = TRUE,
                      mix_prop_heter = TRUE,
                      error_heter = TRUE, 
                      Sp2 = TRUE, 
                      wpca_int = FALSE,
                      int.model = c('mclust', 'kmeans'),
                      coreNum = 1, 
                      coreNum_int = coreNum,
                      beta_grid = seq(0.2, 4, by = 0.2),
                      init.nstart=5,
                      maxIter_ICM = 6,
                      maxIter = 20, 
                      epsLogLik = 1e-5, 
                      verbose = TRUE, 
                      seed = 1) {
  
  int.model <- match.arg(int.model)
  # 参数验证
  validate_parameters(Sigma_equal, Sigma_diag, mix_prop_heter, error_heter, 
                      Sp2, wpca_int, int.model, coreNum, coreNum_int, beta_grid,init.nstart,
                      maxIter_ICM, maxIter, epsLogLik, verbose, seed)
  
  # 使用 modifyList 处理默认值,更灵活
  default_settings <- list(
    Sigma_equal = FALSE,
    Sigma_diag = TRUE,
    mix_prop_heter = TRUE,
    error_heter = TRUE,
    Sp2 = TRUE,
    wpca_int = FALSE,
    int.model = 'kmeans',
    coreNum = 1,
    coreNum_int = 1,
    beta_grid = seq(0.2, 4, by = 0.2),
    init.nstart = 5,
    maxIter_ICM = 6,
    maxIter = 20,
    epsLogLik = 1e-5,
    verbose = TRUE,
    seed = 1
  )
  
  # 合并用户提供的参数和默认值
  user_settings <- list(Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
                          mix_prop_heter=mix_prop_heter,
                          error_heter=error_heter, Sp2=Sp2, wpca_int=wpca_int,int.model=int.model,
                          coreNum = coreNum, coreNum_int=coreNum_int,
                          beta_grid= beta_grid, init.nstart=init.nstart,
                          maxIter_ICM = maxIter_ICM, maxIter = maxIter, epsLogLik = epsLogLik,
                          verbose = verbose, seed = seed)
  para_settings <- modifyList(default_settings, user_settings)
  
  # 确保 coreNum_int 正确设置
  if (!"coreNum_int" %in% names(user_settings)) {
    para_settings$coreNum_int <- para_settings$coreNum
  }
  return(para_settings)
}


ICM.EM_structure  <- function(XList,  K, AdjList, q=15,parameterList=NULL){
  
  if(is.null(parameterList)){
    parameterList <- model_set()
  }
  ### initialize variables
  beta_grid <- maxIter_ICM<- maxIter<- epsLogLik<- verbose<-mix_prop_heter<- Sigma_equal<- Sigma_diag<- NULL
  error_heter<-Sp2<- wpca_int<-int.model<- seed<- coreNum <- coreNum_int <- init.nstart<- NULL
  n_par <- length(parameterList)
  for(i in 1:n_par){
    assign(names(parameterList)[i], parameterList[[i]])
  }
  
  ## Centering
  ## XList <- lapply(XList, scale, scale=FALSE)
  ## centering in ICM.EM() function
  
  resList <- ICM.EM(XList, q, K, AdjList=AdjList, 
              beta_grid=beta_grid, init.nstart=init.nstart,
              maxIter_ICM=maxIter_ICM,maxIter=maxIter, epsLogLik=epsLogLik, verbose=verbose,
              mix_prop_heter=mix_prop_heter, Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
              error_heter=error_heter, Sp2=Sp2,
              wpca_int=wpca_int,int.model=int.model, seed=seed,coreNum = coreNum, coreNum_int=coreNum_int)
  return(resList)
}
ICM.EM <- function(XList, q, K, AdjList=NULL,  Adjlist_car=NULL, posList = NULL, platform = "ST",
                   beta_grid=seq(0.2,4, by=0.2), init.nstart=5, 
                   maxIter_ICM=6,maxIter=20, epsLogLik=1e-5, verbose=TRUE,
                   mix_prop_heter=TRUE, Sigma_equal=FALSE, Sigma_diag=TRUE,error_heter=TRUE, Sp2=TRUE,
                   wpca_int=FALSE,int.model=c('kmeans', 'mclust'), seed=1,coreNum = 1, coreNum_int=coreNum){
  
  int.model <- match.arg(int.model)
  # library(harmony)
  # library(irlba)
  
  r_max <- length(XList)
  
  pf <- unlist(lapply(XList, ncol))
  # if(is.null(posList) && is.null(AdjList)) stop("Check the values of arguments!")
  if(sum(pf!=pf[1]) > 0) stop("ncols of XList's components must be same!")
  if(is.null(posList) && is.null(AdjList)) stop("posList or AdjList must be provided one!")
  nvec <- sapply(XList, nrow)
  n <- sum(nvec)
  p <-  pf[1]
  if(verbose){
    message("-----Intergrative data info.: ", r_max, ' samples, ', p , " genes X ", n, " spots------")
    message("-----Numbers of spots are: ", paste(nvec, collapse = ", "), '-----')
  }
  
  
  if(is.null(AdjList) && !is.null(posList))
    AdjList <- getAdjList(posList, platform=platform)
  if(any(sapply(AdjList, nrow)!=sapply(XList, nrow)))  
    stop('The dimension of Adj matrix does not match that of X!\n')
  
  if(is.null(Adjlist_car)) Adjlist_car <- AdjList
  
  ## Centering
  XList <- lapply(XList, scale, scale=FALSE)
  
  # Xmat <- NULL
  # for(r in 1:r_max){
  #   Xmat <- rbind(Xmat, XList[[r]])
  # }
  Xmat <- do.call("rbind", XList)
 
  message(paste0("Starting computing initial values using ", int.model ," ..."))
  set.seed(seed)
  tstart <- Sys.time()
  if(wpca_int){
    princ1 <- wpca(Xmat, q, weighted = wpca_int)
  }else{
    princ1 <- approxPCA(Xmat, q)
  }
  W0 <- princ1$loadings
  sampleID <- get_sampleID(XList)
  if(r_max >1){
    hZ <- RunHarmony(princ1$PCs, meta_data=data.frame(batch=factor(sampleID)), vars_use='batch', verbose=FALSE)
  }else{
    hZ <- princ1$PCs
  }
  ## Delete the big objects and free the memory
  rm(princ1)
  rm(Xmat)
  gc()
  
  
  
  
  ### parallel to obtain the initial values 
  alpha <- FALSE
  nK <- length(K)
  if(nK>1){
    ## at most use 10 cores
    if(is.null(coreNum_int)){
      cores <- ifelse(nK < 10,  
                      nK, 10)
    }else{
      cores <- coreNum_int
    }
    
    if(Sys.info()[1]=="Windows"){
      cl <- parallel::makeCluster(cores) # memory can not be shared type in Windows.
    }else{
      cl <- parallel::makeCluster(cores, type='FORK') # memory-shared type in linux or Mac.
    }
    # Mclust <- mclust::Mclust
    # mclustBIC <- mclust::mclustBIC
    
    # parallel::clusterExport(cl, varlist = c("Mclust", "mclustBIC"))
    intList <- parallel::parLapply(cl, X=K, parafun_int, Z=hZ, alpha=alpha, 
                                   Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
                                   int.model =int.model, init.nstart=init.nstart, verbose=verbose)
    parallel::stopCluster(cl)
  }else{
    intList <- list(parafun_int(K, Z=hZ, alpha=alpha, Sigma_equal, Sigma_diag,
                                int.model =int.model,init.nstart=init.nstart, verbose=verbose))
  }
  
  .logDiffTime(sprintf(paste0("%s Initialization finished!"), "*****"), t1 = tstart, verbose = verbose)
  
  alpha0List = list()
  Mu0List = list()
  Sigma0List = list()
  ymat = matrix(0, n, nK)
  for (kk in 1:nK){
    # print(kk)
    
    ymat[,kk] <- intList[[kk]]$ymatk
    
    alpha0List[[kk]] <- intList[[kk]]$alpha0k
    
    Mu0List[[kk]] <- intList[[kk]]$Mu0k
    
    Sigma0List[[kk]] <- intList[[kk]]$Sigma0k
  }
  
  
  
  message("----Fitting PRECAST model...----------------\n")
  tstart <- Sys.time()
  beta0  =1.5
  resList <- idrsc2Cpp(XList, AdjList, Adjlist_car, hZ, ymat, Mu0List, 
                       Sigma0List, W0, alpha0List, beta0, 
                       beta_grid, maxIter_ICM, 
                       maxIter, epsLogLik, verbose,(!error_heter),
                       Sigma_equal, Sigma_diag, mix_prop_heter, Sp2, max(K), min(K), coreNum)
  
  .logDiffTime(sprintf(paste0("%s PRECAST model fitting finished!"), "*****"), t1 = tstart, verbose = verbose)
  
  
  
  para_settings <- list(K=K, n=n, p=p,q=q,r_max=r_max,
                        Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
                        mix_prop_heter=mix_prop_heter)
  attr(resList, "para_settings") <- para_settings
  class(resList) <- "SeqK_PRECAST_Object"
  return(resList)
  
  
}



### Following function is the early version of ICM.EM
idrsc <- function(XList, q, K, AdjList=NULL,  Adjlist_car=NULL, posList = NULL, platform = "ST",
                  beta_grid=seq(0.2,4, by=0.2), init.nstart=5,
                   maxIter_ICM=6,maxIter=20, epsLogLik=1e-5, verbose=TRUE,
                   mix_prop_heter=TRUE, Sigma_equal=FALSE, Sigma_diag=TRUE,error_heter=TRUE, Sp2=TRUE,
                   wpca_int=FALSE,int.model=c('kmeans', 'mclust'), seed=1,coreNum = 1, coreNum_int=coreNum){
  
  
  
  
  int.model <- match.arg(int.model)
  
  r_max <- length(XList)
  
  pf <- unlist(lapply(XList, ncol))
  # if(is.null(posList) && is.null(AdjList)) stop("Check the values of arguments!")
  if(sum(pf!=pf[1]) > 0) stop("ncols of XList's components must be same!")
  if(is.null(posList) && is.null(AdjList)) stop("posList or AdjList must be provided one!")
  n <- sum(sapply(XList, nrow))
  p <-  pf[1]
  message("Intergrative data info.: ", r_max, ' samples, ', p , " genes X ", n, " spots------")
  message("iDR-SC model setting: ", 'error_heter=',error_heter,", Sigma_equal=",Sigma_equal,
          ", Sigma_diag=", Sigma_diag, ', mix_prop_heter=', mix_prop_heter)
  
  if(is.null(AdjList) && !is.null(posList))
    AdjList <- getAdjList(posList,  platform=platform)
  if(any(sapply(AdjList, nrow)!=sapply(XList, nrow)))  
    stop('The dimension of Adj matrix does not match that of X!\n')
  
  if(is.null(Adjlist_car)) Adjlist_car <- AdjList
 
  # Xmat <- NULL
  # for(r in 1:r_max){
  #   Xmat <- rbind(Xmat, XList[[r]])
  # }
  Xmat <- do.call("rbind", XList)
  
  message(paste0("Starting computing initial values using ", int.model ," ..."))
  set.seed(seed)
  tstart <- Sys.time()
  if(wpca_int){
    princ1 <- wpca(Xmat, q, weighted = wpca_int)
  }else{
    princ1 <- approxPCA(Xmat, q)
  }
  W0 <- princ1$loadings
  sampleID <- get_sampleID(XList[[1]])
  if(r_max >1){
    hZ <- RunHarmony(princ1$PCs, meta_data=data.frame(batch=factor(sampleID)), vars_use='batch', verbose=FALSE)
  }else{
    hZ <- princ1$PCs
  }
  ## Delete the big objects and free the memory
  rm(princ1)
  rm(Xmat)
  gc()
  
  
  
  ### parallel to obtain the initial values 
  alpha <- FALSE
  nK <- length(K)
  if(nK>1){
    ## at most use 10 cores
    if(is.null(coreNum_int)){
      cores <- ifelse(nK < 10,  
                      nK, 10)
    }else{
      cores <- coreNum_int
    }
    
    if(Sys.info()[1]=="Windows"){
      cl <- parallel::makeCluster(cores) # memory can not be shared type in Windows.
    }else{
      cl <- parallel::makeCluster(cores, type='FORK') # memory-shared type in linux or Mac.
    }
    # Mclust <- mclust::Mclust
    # mclustBIC <- mclust::mclustBIC
    
    # parallel::clusterExport(cl, varlist = c("Mclust", "mclustBIC"))
    # Run
    
    intList <- parallel::parLapply(cl, X=K, parafun_int, Z=hZ, alpha=alpha, 
                                   Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
                                   int.model =int.model, init.nstart=init.nstart, verbose=verbose)
    parallel::stopCluster(cl)
  }else{
    intList <- list(parafun_int(K, Z=hZ, alpha=alpha, Sigma_equal, Sigma_diag,
                                int.model =int.model, init.nstart=init.nstart,verbose=verbose))
  }
  
  .logDiffTime(sprintf(paste0("%s Initialization finished!"), "*****"), t1 = tstart, verbose = verbose)
  
  alpha0List = list()
  Mu0List = list()
  Sigma0List = list()
  ymat = matrix(0, n, nK)
  for (kk in 1:nK){
    # print(kk)
    
    ymat[,kk] <- intList[[kk]]$ymatk
    
    alpha0List[[kk]] <- intList[[kk]]$alpha0k
    
    Mu0List[[kk]] <- intList[[kk]]$Mu0k
    
    Sigma0List[[kk]] <- intList[[kk]]$Sigma0k
  }
  
  
    
  message("----Fitting PRECAST model----------------\n")
  beta0  =1.5
  resList <- idrsc2Cpp(XList, AdjList, Adjlist_car, hZ, ymat, Mu0List, 
                         Sigma0List, W0, alpha0List, beta0, 
                         beta_grid, maxIter_ICM, 
                         maxIter, epsLogLik, verbose,(!error_heter),
                         Sigma_equal, Sigma_diag, mix_prop_heter, Sp2, max(K), min(K), coreNum)
    
  
  
  
  para_settings <- list(K=K, n=n, p=p,q=q,r_max=r_max,
                        Sigma_equal=Sigma_equal, Sigma_diag=Sigma_diag,
                        mix_prop_heter=mix_prop_heter)
  attr(resList, "para_settings") <- para_settings
  class(resList) <- "SeqK_PRECAST_Object"
  return(resList)
    

}


## Define a new function mycluster to avoid using parallel::clusterExport in parallel
mycluster <- function(Z, G, int.model=c('kmeans', 'mclust'), init.nstart=5, verbose=FALSE){
  # require(mclust)
  int.model <- match.arg(int.model)
  q <- ncol(Z)
  clslist <- list()
  if(int.model == 'mclust'){
    #mclust_z <- mclust::Mclust(Z, G=G,modelNames ='EEE' ,verbose=verbose)
    results <- replicate(init.nstart, mclust::Mclust(data = Z, G = G,
                                                     modelNames = "EEE", verbose = FALSE), simplify = FALSE)
    best_index <- which.max(sapply(results, function(x) x$bic))
    mclust_z <- results[[best_index]]
    clslist$cluster <- mclust_z$classification
    clslist$mu <- t(mclust_z$parameters$mean) # K * q
    clslist$sigma <- mclust_z$parameters$variance$sigma
    clslist$pro <- mclust_z$parameters$pro
    rm(mclust_z)
  }else if(int.model == 'kmeans'){
    km_z <- stats::kmeans(x = Z, centers = G,
                          iter.max = 100, nstart = init.nstart)
    clslist$cluster <- km_z$cluster
    clslist$mu <- km_z$centers
    clslist$pro <- rep(1/G, G)
    Sigma_int <- array(dim=c(q, q, G))
    for(k in 1:G){
      Sigma_int[,,k] <- var(Z[km_z$cluster==k,])
    }
    clslist$sigma <- Sigma_int
    rm(km_z)
  }
  
  return(clslist)
}
parafun_int <- function(k, Z, alpha, Sigma_equal, Sigma_diag, int.model=c('kmeans', 'mclust'), init.nstart=5, verbose=FALSE){
  
  int.model <- match.arg(int.model)
  
  clslist <- mycluster(Z, G=k, int.model = int.model, init.nstart=init.nstart, verbose=verbose)
  
  ymatk <- clslist$cluster
  
  if(alpha){
    alpha0k <- clslist$pro
  }else{
    alpha0k <- rep(0, k)
  }
  
  Mu0k <- clslist$mu
  Sigmak <- clslist$sigma
  Sigma0k <- Sigmak
  if(Sigma_diag){
    Sigma0k <- array(0, dim=dim(Sigmak))
    for(kk in 1:k){
      diag(Sigma0k[,,kk]) <- diag(Sigmak[,,kk])
    }
    Sigmak <- Sigma0k
  }
  
  if(Sigma_equal){
    for(kk in 1:k){
      Sigma0k[,,kk] <- apply(Sigmak, c(1,2), mean)
    }
  }
  Pi0k <- clslist$pro
  return(list(ymatk=ymatk, alpha0k=alpha0k, Mu0k=Mu0k, Sigma0k=Sigma0k, Pi0k=Pi0k))
}


degree_freedom <- function(K, paraList){
  
  p  <- paraList$p; q <- paraList$q
  r_max <- paraList$r_max; Sigma_equal <- paraList$Sigma_equal
  Sigma_diag <- paraList$Sigma_diag;  mix_prop_heter <- paraList$mix_prop_heter
  if(!Sigma_equal){
    if(mix_prop_heter){
      
      if(Sigma_diag){ # our model
        
        # # beta_r + W + Lam_r + Mu_k + Sigma_k + Psi_r
        dfree <- 1*r_max+p*(q+r_max) + K*(q+ q) + q*(q+1)/2*r_max
      }else{
        dfree <- 1*r_max+p*(q+r_max) + K*(q + q*(q+1)/2.0)+  q*(q+1)/2*r_max
      }
    }else{
      
      if(Sigma_diag){
        
        # # beta + W + Lam_r + Mu+Sigma + tau_r
        dfree <- 1+p*(q+r_max) + K*(q+q) +  q*(q+1)/2*r_max
      }else{
        dfree <- 1+p*(q+r_max) + K*(q+q*(q+1)/2.0)+  q*(q+1)/2*r_max
      }
    }
  }else{
    if(mix_prop_heter){
      if(Sigma_diag){ ## This is our model setting
        message("Sigma is set to diagonal matrices \n")
        # # beta_r + W + Lam_r + Mu+ Sigma_r + Phi_r
        dfree <- 1*r_max+p*(q+r_max) + K*q+ q + r_max *q*(q+1)/2
      }else{
        message("Sigma is set to dense matrices \n")
        dfree <- 1*r_max+p*(q+r_max) + K*q+ q*(q+1)/2.0+ r_max *q*(q+1)/2
      }
    }else{
      if(Sigma_diag){
        message("Sigma is set to diagonal matrices \n")
        # # beta + W + Lam_r + Mu_k +Sigma_r + Phi_r
        dfree <- 1+p*(q+r_max) + K*q+ q  + r_max *q*(q+1)/2
      }else{
        message("Sigma is set to dense matrices \n")
        dfree <- 1+p*(q+r_max) + K*q+ q*(q+1)/2.0 + r_max *q*(q+1)/2
      }
    }
  }
  return(dfree)
}

SelectModel <- function(obj, criteria = 'MBIC',pen_const=1, return_para_est=FALSE) UseMethod("SelectModel")
SelectModel.SeqK_PRECAST_Object <- function(obj, criteria = 'MBIC',pen_const=1, return_para_est=FALSE){
  if(!inherits(obj, 'SeqK_PRECAST_Object')) stop('obj must be "SeqK_PRECAST_Object"!\n')
  para_settings <- attr(obj, 'para_settings')
  K_set <- para_settings$K
  nK <- length(K_set)
  icVec <-  rep(Inf, nK)
  if(length(obj) != nK) stop("The length of obj must be equal to length of K!")
  n <- para_settings$n; p <- para_settings$p
  for(k in 1:nK){
      resList <- obj[[k]]
      dfree <- degree_freedom(K_set[k], para_settings)
      icVec[k] <- switch(criteria, 
             MAIC = -2.0* resList$loglik +dfree * 2 * log(log(p+n))*pen_const,
             AIC = -2.0* resList$loglik +dfree * 2,
             MBIC =  -2.0* resList$loglik +dfree * log(n) * log(log(p+n))*pen_const, 
             BIC =  -2.0* resList$loglik +dfree * log(n) * log(log(p+n))*pen_const )
      
    
    
  }
  bestK <- K_set[which.min(icVec)]
  icMat <- cbind(K=K_set, IC=icVec)
  resList <- obj[[which.min(icVec)]]
  cluster_PCList <- list(bestK= bestK, cluster=resList$cluster, hZ = resList$hZ, Rf = resList$Rf,
                         hV=resList$hV, hW = resList$W,icMat=icMat)
  if(return_para_est){
    attr(cluster_PCList, 'fit') <- resList[-c(1:4)]
  }
  
  return(cluster_PCList)
  
}



# gendataInte_sp <- function(height1=30, width1=30,height2=height1, width2=width1, 
#                            p =100, q=10, K=7,  G=4, beta=1.2, sigma2=1, 
#                            alpha=8, seed=1, view=TRUE){
#   # height <- 70
#   # width <- 70
#   # G <- 4
#   # beta <- 1.0
#   # K <- 7
#   # q <- 10
#   # p <- 1000
#   if(q <2) stop("error:gendata_sp::q must be greater than 2!")
#   
#   if(length(beta) <2) beta <- rep(beta, 2)
#   # require(GiRaF)
#   # require(MASS)
#   n1 <- height1 * width1 # # of cell in each indviduals 
#   n2 <- height2 * width2
#   index1 <- 1:n1; index2 <- (n1+1):(n1+n2)
#   ## generate deterministic parameters, fixed after generation
#  
#   if(length(sigma2)==1){
#     Lambda1 <- sigma2*(abs(rnorm(p, sd=1)))
#     Lambda2 <- sigma2*(abs(rnorm(p, sd=1)))
#   }else{
#     Lambda1 <- rep(sigma2[1], p)
#     Lambda2 <- rep(sigma2[2], p)
#   }
#   
#   
#   W1 <- matrix(rnorm(p*q), p, q)
#   W <- qr.Q(qr(W1))
#   
#   mu <- matrix(0, q,  K)
#   diagmat = array(0, dim = c(q, q, K))
#   if(q > K){
#     q1 <- floor(K/2)
#     for(j in 1:q1){
#       if(j <= (q1/2)) mu[j,j] <- alpha
#       if(j > (q1/2)) mu[j,j] <- -alpha
#     }
#     mu[(q1+1):q, K] <- -alpha
#     
#   }else if(q <= K){
#     for(k in 1:K)
#       mu[,k] <- rep(alpha/8 *k, q) #
#   }
#   for(k in 1:K){
#     tmp  <- rep(1, q)
#     if(k <= K/2){
#       tmp[q] <- alpha
#     }
#     diag(diagmat[,,k]) <- tmp
#   }
#   
#   Mu <- t(mu)
#   Sigma <- diagmat
#   tau2 <- rep(1, q); tau2[1] <- 4; tau2[2] <- -4
#   set.seed(seed)
#   # generate the spatial dependence for state variable y, a hidden Markov RF
#   y1 <- sampler.mrf(iter = n1, sampler = "Gibbs", h = height1, w = width1, ncolors = K, nei = G, param = beta[1],
#                     initialise = FALSE, view = view)
#   y1 <- c(y1) + 1
#   y2 <- sampler.mrf(iter = n2, sampler = "Gibbs", h = height2, w = width2, ncolors = K, nei = G, param = beta[2],
#                     initialise = FALSE, view = view)
#   y2 <- c(y2) + 1
#   
#   Z1 <- matrix(0, n1, q)
#   Z2 <- matrix(0, n2, q)
#   for(k in 1:K){
#     nk1 <- sum(y1==k)
#     Z1[y1==k, ] <- MASS::mvrnorm(nk1, Mu[k,], Sigma[,,k])
#     
#     nk2 <- sum(y2==k)
#     Z2[y2==k, ] <- MASS::mvrnorm(nk2, Mu[k,]+tau2, Sigma[,,k])
#   }
#   X1 <- Z1 %*% t(W)+  MASS::mvrnorm(n1, rep(0,p), diag(Lambda1))
#   X2 <- Z2 %*% t(W) + MASS::mvrnorm(n2, rep(0,p), diag(Lambda2))
#   # compute the strength of signal
#   svd_Sig1 <- svd(cov(Z1))
#   W11 <- W %*% svd_Sig1$u %*% diag(sqrt(svd_Sig1$d))
#   snr1 <- sum(svd(W11)$d^2) / (sum(svd(W11)$d^2)+ sum(Lambda1))
#   svd_Sig2 <- svd(cov(Z2))
#   W22 <- W %*% svd_Sig2$u %*% diag(sqrt(svd_Sig2$d))
#   snr2 <- sum(svd(W22)$d^2) / (sum(svd(W22)$d^2)+ sum(Lambda2))
#   
#   message("SNR1=", round(snr1,4),", SNR2=", round(snr2, 4),  '\n')
#   
#   # make position
#   pos1 <- cbind(rep(1:height1, width1), rep(1:height1, each=width1))
#   pos2 <- cbind(rep(1:height2, width2), rep(1:height2, each=width2))
#   return(list(X1=X1,X2=X2, Z1=Z1, Z2=Z2, cluster=c(y1,y2), Mu=Mu, Sigma=Sigma,
#               W=W, Lambda1=Lambda1, Lambda2=Lambda2, tau2=tau2, 
#               beta=beta,pos1=pos1,pos2=pos2, snr=c(snr1,snr2),
#               index1=index1,index2=index2))
#   
# }
# 


find_neighbors <- function(pos, platform=c('ST', "Visium")) {
  # require(purrr)
  # require(S4Vectors)
  if (tolower(platform) == "visium") {
    ## Spots to left and right, two above, two below
    offsets <- data.frame(x.offset=c(-2, 2, -1,  1, -1, 1),
                          y.offset=c( 0, 0, -1, -1,  1, 1))
  } else if (tolower(platform) == "st") {
    ## L1 radius of 1 (spots above, right, below, and left)
    offsets <- data.frame(x.offset=c( 0, 1, 0, -1),
                          y.offset=c(-1, 0, 1,  0))
  } else {
    stop(".find_neighbors: Unsupported platform \"", platform, "\".")
  }
  
  ## Get array coordinates (and label by index of spot in SCE)
  colnames(pos) <- c("row", "col")
  # pos <- DataFrame(pos) # reduce the dependency on S4Vector
  pos <- as.data.frame(pos)
  spot.positions <- pos
  spot.positions$spot.idx <- seq_len(nrow(spot.positions))
  
  ## Compute coordinates of each possible spot neighbor
  neighbor.positions <- merge(spot.positions, offsets)
  neighbor.positions$x.pos <- neighbor.positions$col + neighbor.positions$x.offset
  neighbor.positions$y.pos <- neighbor.positions$row + neighbor.positions$y.offset
  
  ## Select spots that exist at neighbor coordinates
  neighbors <- merge(as.data.frame(neighbor.positions), 
                     as.data.frame(spot.positions), 
                     by.x=c("x.pos", "y.pos"), by.y=c("col", "row"),
                     suffixes=c(".primary", ".neighbor"),
                     all.x=TRUE)
  
  ## Shift to zero-indexing for C++
  #neighbors$spot.idx.neighbor <- neighbors$spot.idx.neighbor - 1
  
  ## Group neighbor indices by spot 
  ## (sort first for consistency with older implementation)
  neighbors <- neighbors[order(neighbors$spot.idx.primary, 
                               neighbors$spot.idx.neighbor), ]
  df_j <- split(neighbors$spot.idx.neighbor, neighbors$spot.idx.primary)
  df_j <- unname(df_j)
  
  ## Discard neighboring spots without spot data
  ## This can be implemented by eliminating `all.x=TRUE` above, but
  ## this makes it easier to keep empty lists for spots with no neighbors
  ## (as expected by C++ code)
  ## df_j <- map(df_j, function(nbrs) discard(nbrs, function(x) is.na(x)))
  df_j <- lapply(df_j, function(nbrs) discard(nbrs, function(x) is.na(x)))
  
  ## Log number of spots with neighbors
  n_with_neighbors <- length(keep(df_j, function(nbrs) length(nbrs) > 0))
  message("Neighbors were identified for ", n_with_neighbors, " out of ",
          nrow(pos), " spots.")
  
  n <- length(df_j) 
  
  D <- matrix(0,  nrow = n, ncol = n)
  for (i in 1:n) {
    if(length(df_j[[i]]) != 0)
      D[i, df_j[[i]]] <- 1
  }
  ij <- which(D != 0, arr.ind = T)
  ij
}
getAdj_reg <- function(pos, platform= "Visium"){
  ij <- find_neighbors(pos, platform)
  # library(Matrix)
  n <- nrow(pos)
  Adj_sp <- Matrix::sparseMatrix(ij[,1], ij[,2], x = 1, dims=c(n, n))
  return(Adj_sp)
}
# getAdj <- function(pos, platform="Visisum"){
#   
#   if(tolower(platform) %in% tolower(c('ST', "Visium"))){
#     Adj_sp <- getAdj_reg(pos, platform)
#   }else{
#     Adj_sp <- getAdj_auto(pos)
#   }
# }

# getAdj_auto <- function(pos){
#   if (!inherits(pos, "matrix"))
#     stop("method is only for  matrix object!")
#   
#   radius.lower <- 1
#   radius.upper <- 50
#   start.radius <- 1
#   Med <- 0
#   while(!(Med >= 4 && Med <=6)){
#     
#     Adj_sp <- getneighborhood_fast(pos, radius=start.radius)
#     Med <- summary(Matrix::rowSums(Adj_sp))['Median']
#     if(Med < 4){
#       radius.lower <- start.radius
#       start.radius <- (radius.lower + radius.upper)/2
#     }else if(Med >6){
#       radius.upper <- start.radius
#       start.radius <- (radius.lower + radius.upper)/2
#     }
#     message("Current radius is ", start.radius, '\n')
#     message("Median of neighbors is ", Med, '\n')
#   }
#   return(Adj_sp)
# }
# 

# getAdj_manual <- function(pos, radius){
#   if (!inherits(pos, "matrix"))
#     stop("pos must be a matrix!")
#   if(radius <=0){
#     stop('radius must be a positive real!')
#   }
#   
#   Adj_sp <- getneighborhood_fast(pos, radius=radius)
#   return(Adj_sp)
# }

getAdj_fixedNumber <- function(pos, number=6){
  if(nrow(pos)< 6*4) stop("nrow of pos must be greater than 4*number!")
  # require(Matrix)
  Adj_sp <- get_fixedNumber_neighbors(pos, number)
  return(Adj_sp)
}

wpca <- function(X, q, weighted=T){
  if(!is.matrix(X)) stop("wpca: X must be a matrix!")
  if(q< 1) stop("wpca: q must be a positive integer!")
  X <- scale(X, scale=F) # centralize
  out <- wpcaCpp(X, q, weighted)
  return(out)
}
approxPCA <- function (X, q) {
  # require(irlba)
  n <- nrow(X)
  svdX <- irlba(A = X, nv = q)
  PCs <- svdX$u %*% diag(svdX$d[1:q])
  loadings <- svdX$v
  dX <- PCs %*% t(loadings) - X
  Lam_vec <- colSums(dX^2)/n
  return(list(PCs = PCs, loadings = loadings, Lam_vec = Lam_vec))
}
getAdjList <- function(posList, platform='Visium', ...){
  
  if(!inherits(posList, "list")) stop("posList must be a list consisting of matrix.")
  if(tolower(platform) %in% c("st", "visium")){
      AdjList <- pbapply::pblapply(posList, getAdj_reg, platform=platform)
  }else{
      AdjList <- pbapply::pblapply(posList, function(x, ...)getAdj_auto(x, ...))
  }
  return(AdjList)
}


# Access data from Seurat object ------------------------------------------
## To compatible with Seurat V5

get_data_fromSeurat <- function(seu, assay=NULL, slot='counts'){
  
  if(is.null(assay)) assay <- DefaultAssay(seu)
  dat <- GetAssayData(seu, assay = assay, layer= slot)
  
  
  return(dat)
}
get_varfeature_fromSeurat <- function(seu, assay=NULL){
  
  if(is.null(assay)) assay <- DefaultAssay(seu)
  
  if(inherits(seu[[assay]], "Assay5")){
    var.features <- seu[[assay]]@meta.data$var.features
    var.features <- var.features[!is.na(var.features)] 
    
  }else{
    var.features <- seu[[assay]]@var.features
  }
  return(var.features)
}



# Used in real data analysis ----------------------------------------------



## Get basic information
## format the log-normalized gene expression based on the selected genes.
getXList <- function(seuList, genelist){
  # require(Seurat)
  
  seuList <- lapply(seuList, NormalizeData)
  XList <- list()
  nsample <- length(seuList)
  indexList <- list()
  nr <- 0
  posList <- list()
  for(i in 1:nsample){
    XList[[i]] <- Matrix::t(seuList[[i]]@assays$RNA@data[genelist,])
    indexList[[i]] <- (nr+1):(nrow(XList[[i]] )+nr)
    nr <- nr + nrow(XList[[i]] )
    posList[[i]] <- cbind(seuList[[i]]$row, seuList[[i]]$col)
  }
  
  return(list(XList=XList, posList=posList, 
              indxList=indexList))
}

Try the PRECAST package in your browser

Any scripts or data that you put into this service are public.

PRECAST documentation built on Dec. 15, 2025, 5:07 p.m.