R/FindCell.R
In SCIntRuler: Guiding the Integration of Multiple Single-Cell RNA-Seq Datasets

Documented in FindCell

#' Find cells indicating shared biological features across conditions
#'
#' @param seuratobj The Seurat object that all samples/subjects were merged together.
#' @param seuratlist A list of Seurat objects, usually can be got by SplitObject().
#' @param fullcluster A list of clusters that generated by the function GetCluster().
#' @param distmat A list of distance vectors generated by the function FindNNDist().
#' @param firstn The number of nearest cells were detected that you want to include in the permutation test. Default to be 15.
#' @return A list of two vectors: one is for which cluster of which sample will be highlighted and the second one is which cells will be selected.
#' @export
#'
#' @examples
#' \donttest{
#' data(sim_data_sce)
#' data(sim_result)
#' sim_data <- SCEtoSeurat(sim_data_sce)
#' seuratlist <- Seurat::SplitObject(sim_data, split.by = "Study")
#' FindCell(sim_data, seuratlist, sim_result[[1]], sim_result[[3]], 15)
#' }



FindCell <- function(seuratobj,seuratlist,fullcluster, distmat,firstn = 15){

  sample_cluster <- c()

  for (i in seq_along(seuratlist)){

    nclust <- max(fullcluster[[i]]$finecluster)
    testres <- matrix(NA,nrow = nclust, ncol = 2)
    disttest <- list()
    attid <- c()

    for (q in seq_len(nclust)){
      dist1 <- as.vector(do.call(cbind, distmat[[i]][[1]][[q]])[,1:firstn])
      dist2 <- as.vector(do.call(cbind, distmat[[i]][[2]][[q]])[,1:firstn])
      # disttest[[q]] <- data.frame(rbind(cbind(dist1,rep("Internal Dist",length(dist1))),
      #                                   cbind(dist2,rep("External Dist",length(dist1)))))%>%
      #   dplyr::mutate(dist = as.numeric(dist1),group = factor(V2)) %>% dplyr::select(3,4)

      # disttest[[q]] <- data.frame(rbind(cbind(dist1, rep("Internal Dist", length(dist1))),
      #                                   cbind(dist2, rep("External Dist", length(dist1))))) |>
      #   (\(df) dplyr::mutate(df, dist = as.numeric(dist1), group = factor(V2)))() |>
      #   (\(df) dplyr::select(df, 3, 4))()

      # Combine the distances and create the data frame
      dist_combined <- data.frame(rbind(cbind(dist1, rep("Internal Dist", length(dist1))),
                                        cbind(dist2, rep("External Dist", length(dist1)))))

      # Mutate the data frame
      dist_mutated <- dplyr::mutate(dist_combined, dist = as.numeric(dist1), group = factor(V2))

      # Select the relevant columns and assign to disttest[[q]]
      disttest[[q]] <- dplyr::select(dist_mutated, 3, 4)


      testres[q,1] <- round(coin::pvalue(coin::independence_test(dist ~ group, data = disttest[[q]],
                                                     alternative = "greater")),3)

      testres[q,2] <- round((mean(dist2) - mean(dist1))/max(mean(dist2),mean(dist1)),4)

      if(testres[q,1] > 0.9 & testres[q,2] < 0){
        # print(paste0("Sample",i,"_",q))
        attid <- c(attid,paste0("Sample",i,"_",q))
      }

    }

    sample_cluster <- c(sample_cluster,attid)

  }


  meta <- seuratobj@meta.data

  fullcluster1 <- fullcluster

  # for (i in seq_along(fullcluster)) {
  #   fullcluster1[[i]] <- fullcluster[[i]] %>%
  #     dplyr::mutate(finecluster = factor(finecluster),
  #            rarecluster = factor(rarecluster))
  #   temp <- apply(fullcluster1[[i]], 2,
  #                 function(x){paste0("Sample",i,"_",x)}) %>% data.frame()
  #   rownames(temp) <- rownames(fullcluster[[i]])
  #   fullcluster1[[i]] <- temp
  # }

  for (i in seq_along(fullcluster)) {
    fullcluster1[[i]] <- dplyr::mutate(fullcluster[[i]],
                                       finecluster = factor(finecluster),
                                       rarecluster = factor(rarecluster))
    temp <- apply(fullcluster1[[i]], 2, function(x) { paste0("Sample", i, "_", x) })
    temp <- data.frame(temp)
    rownames(temp) <- rownames(fullcluster[[i]])
    fullcluster1[[i]] <- temp
  }



  clusters <- dplyr::bind_rows(fullcluster1)
  clusters[,"rowname"] <- rownames(clusters)

  meta[,"rowname"] <- rownames(seuratobj@meta.data)

  # meta <- meta %>% dplyr::left_join(clusters, by = "rowname")
  meta <- dplyr::left_join(meta, clusters, by = "rowname")

  rownames(meta) <- meta$rowname

  plotc <- sample_cluster

  # meta <- meta %>% dplyr::mutate(
  #   plot_cluster = dplyr::if_else(finecluster %in% plotc,finecluster, NA))
  meta <- dplyr::mutate(meta, plot_cluster = dplyr::if_else(finecluster %in% plotc, finecluster, NA))


  seuratobj@meta.data <- meta

  cell.use <- meta[which(!is.na(meta$plot_cluster)),"rowname"]

  return(list(sample_cluster,cell.use))
}