R/runA-Z.R

Defines functions runBayesSpaceClustering

Documented in runBayesSpaceClustering

# runA --------------------------------------------------------------------

# runB --------------------------------------------------------------------

#' @title Clustering with BayesSpace
#'
#' @description A wrapper around the BayesSpace clustering pipeline introduced
#' by *Zhao et al. (2021)*.
#'
#' @param q_force Numeric value or `FALSE`. If numeric, it forces the number
#' of output clusters with input value. If `FALSE`, the optimal number
#' of clusters is chosen for `q` determined by the elbow point of `BayesSpace::qTune()`.
#' @param name Character value. The name the cluster variable has in
#' the feature data of the \code{SPATA2} object. Defaults to \emph{bayes_space}.
#' @param prefix Character value. Prefix of the cluster groups.
#' @param overwrite Logical value. If TRUE, \code{name} overwrites features
#' in feature data of the \code{SPATA2} object.
#' @param assign_sce Character value or `NULL`. If character, specifies the
#' name under which the bayes space output (object of class `SingleCellExperiment`)
#' is assigned to the global environment. This makes the whole output
#' of the bayes space pipeline available instead of only adding the clustering
#' output as a grouping variable to the `SPATA2` object.
#' @param qs The values of q to evaluate. If `qs` is only one value exactly
#' that is given to `q` of `BayesSpace::spatialCluster()`. Else the optimal
#' `q` from all provided values is identified using `BayesSpace::qTune()`.
#'
#' @inherit BayesSpace::readVisium params
#' @inherit BayesSpace::qTune params
#' @inherit BayesSpace::spatialPreprocess params
#' @inherit BayesSpace::spatialCluster params
#'
#' @param ... Additional arguments given to `BayesSpace::spatialCluster()`. Exception:
#' `sce`, `q` are specified within the function.
#'
#' @inherit argument_dummy params
#' @inherit update_dummy return
#'
#' @details This function is a wrapper around \code{readVisium()},
#' \code{spatialPreprocess()}, \code{qTune()} and \code{spatialCluster()}
#' of the `BayesSpace` package. The results are stored in form of a grouping
#' variable in the feature data.frame of the returned \code{SPATA2} object.
#'
#' @references Zhao E, Stone MR, Ren X, Guenthoer J, Smythe KS, Pulliam T,
#'  Williams SR, Uytingco CR, Taylor SEB, Nghiem P, Bielas JH, Gottardo R.
#'  Spatial transcriptomics at subspot resolution with BayesSpace.
#'  Nat Biotechnol. 2021 Nov;39(11):1375-1384. doi: 10.1038/s41587-021-00935-2.
#'  Epub 2021 Jun 3. PMID: 34083791; PMCID: PMC8763026.
#'
#' @export
#'
#' @seealso [`plotLoglik()`], [`find_elbow_point()`]
#'
#' @examples
#' library(SPATA2)
#'
#' data("example_data")
#'
#' object <- example_data$object_UKF313T_diet
#'
#' # tests options for q from 3 to 15 and picks the best
#' object <- runBayesSpaceClustering(object, name = "new_bspace", qs = 3:15)
#'
#' plotLoglik(object)
#'
#' # run with q = 10 to force 10 clusters in the output
#' object <- runBayesSpaceClustering(object, name = "bspace_10", qs = 10)
#'
runBayesSpaceClustering <- function(object,
                                    name = "bayes_space",
                                    # given to spatialPreprocess()
                                    n.Pcs = 15,
                                    n.HVGs = 2000,
                                    skip.PCA = FALSE,
                                    log.normalize = TRUE,
                                    assay.type = "logcounts",
                                    BSPARAM = BiocSingular::ExactParam(),
                                    # given to qTune()
                                    qs = 3:15,
                                    burn.in = c(100, 1000),
                                    nrep = c(1000, 50000),
                                    # given to spatialCluster()
                                    use.dimred = "PCA",
                                    d = 15,
                                    init.method = "mclust",
                                    model = "t",
                                    gamma = 3,
                                    mu0 = NULL,
                                    lambda0 = NULL,
                                    alpha = 1,
                                    beta = 0.01,
                                    save.chain = FALSE,
                                    chain.fname = NULL,
                                    # miscellaneous
                                    prefix = "B",
                                    return_model = TRUE,
                                    empty_remove = FALSE,
                                    overwrite = FALSE,
                                    assign_sce = FALSE,
                                    assign_envir = .GlobalEnv,
                                    seed = 123,
                                    verbose = NULL,
                                    ...){

  if(!"BayesSpace" %in% base::rownames(installed.packages())){

    install <- utils::askYesNo(msg = "BayesSpace is not installed. Do you want to install it?")

    if(base::isTRUE(install)){

      if(!requireNamespace("BiocManager", quietly = TRUE)){ install.packages("BiocManager") }

      BiocManager::install("BayesSpace")

    } else {

      stop("Can not continue without BayesSpace package.")

    }

  }

  deprecated(...)

  hlpr_assign_arguments(object)

  containsMethod(object, method_name = c("VisiumSmall", "VisiumLarge"), error = TRUE)

  confuns::is_vec(x = burn.in, mode = "numeric", of.length = 2)
  confuns::is_vec(x = nrep, mode = "numeric", of.length = 2)

  platform <-
    stringr::str_extract(getSpatialMethod(object)@name, pattern = "Visium|ST")

  confuns::check_none_of(
    input = name,
    against = getFeatureNames(object),
    ref.against = "feature names",
    overwrite = overwrite
  )

  sce <- asSingleCellExperiment(object, bayes_space = T)

  if(FALSE){

    if(base::isTRUE(empty_remove)){

      require(SingleCellExperiment)

      sce <- sce[, colSums(SingleCellExperiment::counts(sce)) > 0]

    } else {

      spots_no_read <-
        SingleCellExperiment::counts(sce) %>%
        base::as.matrix() %>%
        base::colSums() %>%
        base::as.data.frame() %>%
        tibble::rownames_to_column("barcodes") %>%
        dplyr::filter(.==0) %>%
        dplyr::pull(barcodes)

      if(base::length(spots_no_read) > 0){

        confuns::give_feedback(
          msg = " --- Size factor was optimized ---- ",
          verbose = verbose
        )

        row_sub <-
          stats::runif(
            n = base::length(spots_no_read),
            min = 1,
            max = base::nrow(sce@assays@data$counts)
          ) %>%
          base::round()

        sce@assays@data$counts[row_sub, spots.no.read] <-  1

      }

    }

  }

  if(base::is.numeric(seed)){ base::set.seed(seed) }

  confuns::give_feedback(
    msg = "Running BayesSpace::spatialPreprocess().",
    verbose = verbose
  )

  bayes_space_out <-
    BayesSpace::spatialPreprocess(
      sce = sce,
      platform = platform,
      n.PCs = n.Pcs,
      n.HVGs = n.HVGs,
      skip.PCA = skip.PCA,
      log.normalize = log.normalize,
     # BSPARAM = BSPARAM,
      assay.type = assay.type
    )

  if(base::length(qs) >= 2){

    confuns::give_feedback(
      msg = "Running BayesSpace::qTune().",
      verbose = verbose
    )

    bayes_space_out <-
      BayesSpace::qTune(
        sce = bayes_space_out,
        qs = qs,
        burn.in = burn.in[1],
        nrep = nrep[1]
      )

    logliks <- base::attr(bayes_space_out, "q.logliks")

    optimal_cluster <- find_elbow_point(logliks)

    confuns::give_feedback(
      msg = glue::glue("Calculated optimal input for `q`: {optimal_cluster}."),
      verbose = verbose
    )

    ma <- getAssay(object, assay_name = "gene")
    ma@analysis$bayes_space <- list(logliks = logliks)
    object <- setAssay(object, assay = ma)

  } else {

    optimal_cluster <- base::as.integer(qs[1])

    confuns::give_feedback(
      msg = glue::glue("Using input for `q`: {optimal_cluster}."),
      verbose = verbose
    )

  }

  confuns::give_feedback(
    msg = "Running BayesSpace::spatialCluster().",
    verbose = verbose
  )

  bayes_space_out <-
    BayesSpace::spatialCluster(
      sce = bayes_space_out,
      q = optimal_cluster,
      use.dimred = use.dimred,
      d = d,
      platform = platform,
      init.method = init.method,
      model = model,
      nrep = nrep[2],
      burn.in = burn.in[2],
      gamma = gamma,
      mu0 = mu0,
      lambda0 = lambda0,
      alpha = alpha,
      beta = beta,
      save.chain = save.chain,
      chain.fname = chain.fname
    )

  cluster_df <-
    bayes_space_out@colData %>%
    base::as.data.frame() %>%
    tibble::as_tibble() %>%
    dplyr::select(spot, spatial.cluster) %>%
    dplyr::rename(barcodes = spot, !!rlang::sym(name) := spatial.cluster) %>%
    dplyr::mutate(!!rlang::sym(name) := base::as.factor(!!rlang::sym(name)))

  cluster_levels <-
    base::levels(cluster_df[[name]]) %>%
    stringr::str_c(prefix, .)

  cluster_df <-
    dplyr::mutate(
      .data = cluster_df,
      !!rlang::sym(name) := stringr::str_c(prefix, !!rlang::sym(name)),
      !!rlang::sym(name) := base::factor(!!rlang::sym(name), levels = cluster_levels)
    )

  if(base::is.character(assign_sce)){

    message(glue::glue("Assigning SingleCell Experiment object in global environment under {assign_sce}."))

    base::assign(x = assign_sce, value = bayes_space_out, envir = assign_envir)

  }

  object <- addFeatures(object, feature_df = cluster_df, overwrite = overwrite)

  returnSpataObject(object)

}



# runC --------------------------------------------------------------------



#' @title Compute chromosomal instability
#'
#' @description Computes **c**hromosomal **in**stability scores based on copy number
#' variation results according to *Drews et al., 2022*. Requires the results
#' of [`runCNV()`]. See details for more.
#'
#' @param bin_size The size used for the chromosomal binning. Defaults 1.000.000
#' base pairs.
#' @param window_k Given to [`window.k`] of [`imputeTS::na_ma`].
#' @param noise Numeric value. Sets the level of noise to add.
#' @param gene_pos_df Data.frame defining the positions of genes on the chromosomes.
#' @param chrom_regions_df Data.frame defining the chromosomal regions.
#' @param coverage_model A formula.
#'
#' @inherit argument_dummy params
#'
#' @note Requires the packages `CINSignatureQuantification` and `imputeTS` to be installed.
#'
#' @details Adds a total of 18 new numeric variables to the meta data.frame.
#'
#' \itemize{
#'   \item{*CX1*, *CX2*, *CX3*, ..., *CX17*:}{ Chromosomal instability scores as described in the paper
#'   referenced below.}
#'   \item{*ploidy*:}{ Quantification of abnormal ploidy.}
#'   }
#'
#' Adding these variables is forced! Existing variable with equal names will be overwritten!
#'
#' @references Drews, R.M., Hernando, B., Tarabichi, M. et al.
#' A pan-cancer compendium of chromosomal instability. Nature 606, 976–983 (2022).
#' https://doi.org/10.1038/s41586-022-04789-9
#'
#' @keywords internal
runCIN <- function(object,
                   bin_size = 1000000,
                   window_k = 10,
                   noise = 0.035,
                   gene_pos_df = SPATA2::gene_pos_df,
                   chrom_regions_df = SPATA2::cnv_regions_df,
                   coverage_model = SPATA2::coverage_model,
                   verbose = NULL){

  hlpr_assign_arguments(object)

  check_packages(pkgs = c("CINSignatureQuantification", "imputeTS"))

  containsCNV(object, error = TRUE)

  object <-
    activateAssay(object, assay_name = "gene", verbose = FALSE)

  # SPATAwrappers -> Create.ref.bins()

  confuns::give_feedback(
    msg = "Creating reference bins.",
    verbose = verbose
  )

  ref_bins <-
    purrr::map_df(
      .x = 1:nrow(chrom_regions_df),
      .f = function(i){

        dat <-
          SPATAwrappers::hlpr_bins(
            Chr = rownames(chrom_regions_df)[i],
            start = chrom_regions_df$Start[i],
            end = chrom_regions_df$End[i],
            bin.size = bin_size
          )

        dat$Chr = chrom_regions_df$Chrom[i]
        dat$Chr.arm = rownames(chrom_regions_df)[i]

        return(dat)

      }) %>%
    tibble::as_tibble()

  # SPATAwrappers -> runCNV.Coverage(object)

  confuns::give_feedback(
    msg = "Computing CNV coverage.",
    verbose = verbose
  )

  cnv_genes <-
    dplyr::mutate(gene_pos_df, chrom = chromosome_name) %>%
    dplyr::filter(chrom %in% unique(ref_bins$Chr)) %>%
    dplyr::mutate(
      bins =
        merge_cnv_bins(
          chr = .$chrom,
          start_pos = .$start_position,
          end_pos = .$end_position,
          ref_bins = ref_bins,
          verbose = verbose
        )
    )

  coverage <-
    dplyr::count(cnv_genes, bins) %>%
    dplyr::rename(`:=`("bin", bins))

  ref_bins_cov <- dplyr::left_join(x = ref_bins, y = coverage, by = "bin")
  ref_bins_cov$n[is.na(ref_bins_cov$n)] = 0
  ref_bins_cov$xaxis <- 1:nrow(ref_bins_cov)
  ref_bins_cov$Arm <- "q"
  ref_bins_cov[ref_bins_cov$Chr.arm %>% str_detect(., patter = "p"),]$Arm <- "p"

  # SPATAwrappers -> runCNV.Normalization()
  mat <- SPATA2::getCnvResults(object)[["cnv_mtr"]]
  sample <- getSampleName(object)

  confuns::give_feedback(
    msg = "Merging data to reference bins.",
    verbose = verbose
  )

  data.cnv <-
    reshape2::melt(mat) %>%
    dplyr::rename(`:=`("hgnc_symbol", Var1)) %>%
    dplyr::rename(`:=`("barcodes", Var2)) %>%
    dplyr::left_join(x = ., y = cnv_genes, by = "hgnc_symbol") %>%
    dplyr::filter(!is.na(bins))

  confuns::give_feedback(
    msg = "Summarizing bins.",
    verbose = verbose
  )

  data.cnv <-
    dplyr::group_by(data.cnv, barcodes, bins) %>%
    dplyr::summarise(CNV = mean(value, na.rm = TRUE), CNV.var = var(value, na.rm = TRUE))

  if(!base::is.null(coverage_model)) {

    data.cnv <-
      dplyr::mutate(
        .data = data.cnv,
        CNV = stats::predict(coverage_model,  data.frame(InferCNV.val = CNV))
      )

  }

  data.cnv$CNV.var[is.na(data.cnv$CNV.var)] <- 0

  confuns::give_feedback(
    msg = "Adding noise.",
    verbose = verbose
  )

  CNV.mat <- matrix(NA, nrow = nrow(ref_bins), ncol = ncol(mat))

  colnames(CNV.mat) <- colnames(mat)
  rownames(CNV.mat) <- ref_bins$bin

  data <-
    reshape2::melt(CNV.mat) %>% dplyr::rename(`:=`("bins", Var1)) %>%
    dplyr::rename(`:=`("barcodes", Var2)) %>%
    dplyr::left_join(., data.cnv, by = c("bins", "barcodes")) %>%
    dplyr::mutate(
      impute = imputeTS::na_ma(CNV, k = window_k, weighting = "simple"),
      impute.var = imputeTS::na_ma(CNV.var, k = window_k, weighting = "simple"),
      noise = seq(from = 1 - noise, to = 1 + noise, length.out = nrow(.)) %>% base::sample(),
      CNV.out.noise = c(impute + noise + impute.var) %>%
        scales::rescale(c(min(data.cnv$CNV), max(data.cnv$CNV)))
    ) %>%
    reshape2::acast(data = ., formula = bins ~  barcodes, value.var = "CNV.out.noise") %>%
    reshape2::melt(data = .) %>%
    dplyr::rename("bin" := Var1)

  data <-
    dplyr::left_join(x = data, y = ref_bins_cov, by="bin") %>%
    dplyr::select(Chr, start, end, value, Var2)

  base::names(data) <- c("chromosome", "start", "end", "segVal", "sample")
  data$sample <- base::as.character(data$sample)

  gc()

  # quantify instability
  confuns::give_feedback(
    msg = "Computing chromosomal instability. (This can take time.)",
    verbose = verbose
  )

  instability <-
    CINSignatureQuantification::quantifyCNSignatures(data, build = "hg38")

  cin_scores <-
    base::as.data.frame(instability@activities$scaledAct3) %>%
    tibble::rownames_to_column(var = "barcodes") %>%
    tibble::as_tibble()

  object <- addFeatures(object, feature_df = cin_scores, overwrite = TRUE)

  ploidy_score <-
    base::as.data.frame(instability@samplefeatData) %>%
    tibble::rownames_to_column(var = "barcodes") %>%
    tibble::as_tibble() %>%
    dplyr::select(barcodes, ploidy)

  object <- addFeatures(object, feature_df = ploidy_score, overwrite = TRUE)

  returnSpataObject(object)

}

#' @title Identify large-scale chromosomal copy number variations
#'
#' @description This functions integrates large-scale copy number variations analysis
#' using the `inferncnv` package. For more detailed information about infercnv works
#' visit \emph{https://github.com/broadinstitute/inferCNV/wiki}.
#'
#' @inherit argument_dummy params
#'
#' @param ref_annotation A data.frame in which the row names refer to the barcodes of
#' the reference matrix provided in argument \code{ref_mtr} and
#' and a column named \emph{sample} that refers to the reference group names.
#'
#' Defaults to the data.frame stored in slot \code{$annotation} of list \code{SPATA2::cnv_ref}.
#'
#' If you provide your own reference, make sure that barcodes of the reference
#' input do not overlap with barcodes of the `SPATA2` object. (e.g. by suffixing as
#' exemplified in the default list \code{SPATA2::cnv_ref}.)
#'
#' @param ref_mtr The count matrix that is supposed to be used as the reference.
#' Row names must refer to the gene names and column names must refer to
#' the barcodes. Barcodes must be identical to the row names of the data.frame
#' provided in argument \code{ref_annotation.}
#'
#' Defaults to the count matrix stored in slot \code{$mtr} of list \code{SPATA2::cnv_ref}.
#'
#' If you provide your own reference, make sure that barcodes of the reference
#' input do not overlap with barcodes of the `SPATA2` object. (e.g. by suffixing as
#' exemplified in the default list \code{SPATA2::cnv_ref}.)
#'
#' @param ref_regions A data.frame that contains information about chromosome positions.
#'
#' Defaults to the data.frame stored in slot \code{$regions} of list \code{SPATA2::cnv_ref}.
#'
#' If you provide your own regions reference, make sure that the data.frame has equal column names
#' and row names as the default input.
#'
#' @param directory_cnv_folder Character value. A directory that leads to the folder
#' in which to store temporary files, the infercnv-object as well as the output
#' heatmap.
#'
#' @param gene_pos_df Either NULL or a data.frame. If data.frame, it replaces
#' the output of \code{CONICsmat::getGenePositions()}. Must contain three
#' character variables \emph{ensembl_gene_id}, \emph{hgnc_symbol}, \emph{chromosome_name}
#' and two numeric variables \emph{start_position} and \emph{end_position.}.
#'
#' If NULL the data.frame is created via \code{CONICsmat::getGenePositions()} using
#' all gene names that appear in the count matrix and in the reference matrix.
#'
#' Defaults to the SPATA2 intern data.frame \code{SPATA2::gene_pos_df}.
#'
#' @param cnv_prefix Character value. Denotes the string with which the
#' the feature variables in which the information about the chromosomal gains and
#' losses are stored are prefixed.
#' @param save_infercnv_object Logical value. If set to TRUE the infercnv-object
#' is stored in the folder denoted in argument \code{directory_cnv_folder} under
#' \emph{'infercnv-object.RDS}.
#' @param CreateInfercnvObject A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param require_above_min_mean_expr_cutoff A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param require_above_min_cells_ref A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param normalize_counts_by_seq_depth A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param anscombe_transform A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param log2xplus1 A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param apply_max_threshold_bounds A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param smooth_by_chromosome A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param center_cell_expr_across_chromosome A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param subtract_ref_expr_from_obs A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param invert_log2 A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param clear_noise_via_ref_mean_sd A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param remove_outliers_norm A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param define_signif_tumor_subclusters A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} must not be specified.
#' @param plot_cnv A list of arguments with which the function is supposed
#' to be called. Make sure that your input does not conflict with downstream function
#' calls. Input for argument \code{infercnv_obj} and  must not be specified. Input for argument
#' \code{out_dir} is taken from argument \code{directory_cnv_folder}.
#'
#' @details \code{runCnvAnalysis()} is a wrapper around all functions the infercnv-pipeline
#' is composed of. Argument \code{directory_cnv_folder} should lead to an empty folder as
#' temporary files as well as the output heatmap and the infercnv-object are stored
#' there without asking for permission which can lead to overwriting due to naming issues.
#'
#' Results (including a PCA) are stored in the slot @@cnv of the `SPATA2` object
#' which can be obtained via \code{getCnvResults()}. Additionally, the variables
#' that store the copy-number-variations for each barcode-spot are added to
#' the `SPATA2` object's feature data. The corresponding feature variables are named according
#' to the chromosome's number and the prefix denoted with the argument \code{cnv_prefix.}
#'
#' Regarding the reference data:
#' In the list \code{SPATA2::cnv_ref} we offer reference data including a count matrix
#' that results from stRNA-seq of healthy human brain tissue, an annotation data.frame as
#' well as a data.frame containing information regarding the chromosome positions.
#' You can choose to provide your own reference data by specifying the \code{ref_*}-arguments.
#' Check out the content of list \code{SPATA2::cnv_ref} and make sure that your own
#' reference input is of similiar structure regarding column names, rownames, etc.
#'
#' @note `runCnvAnalysis()` has been deprecated in favor of `runCNV()`.
#'
#' @return An updated `SPATA2` object containg the results in the respective slot.
#' @export
#'
#' @examples
#' library(SPATA2)
#'
#' data("example_data")
#'
#' object <- example_data$object_UKF269T_diet
#'
#' # make sure that "/my_cnv_folder" exists
#' dir.exists("my_cnv_folder")
#'
#' # this can take some time
#' object <- runCNV(object, directory_cnv_folder = "my_cnv_folder")
#'
#' plotCnvHeatmap(object, across = "histology")
#'
#' # chromosomal alterations are immediately added to the objects
#' # meta features
#'
#' getFeatureNames(object)
#'
#' plotSurface(object, color_by = "Chr7", pt_clrsp = "Reds 3")
#'

runCNV <- function(object,
                   ref_annotation = cnv_ref[["annotation"]], # data.frame denoting reference data as reference
                   ref_mtr = cnv_ref[["mtr"]], # reference data set of healthy tissue
                   ref_regions = cnv_ref[["regions"]], # chromosome positions
                   gene_pos_df = SPATA2::gene_pos_df,
                   directory_cnv_folder = "data-development/cnv-results", # output folder
                   directory_regions_df = NA, # deprecated (chromosome positions)
                   cnv_prefix = "Chr",
                   save_infercnv_object = TRUE,
                   verbose = NULL,
                   CreateInfercnvObject = list(ref_group_names = "ref"),
                   require_above_min_mean_expr_cutoff = list(min_mean_expr_cutoff = 0.1),
                   require_above_min_cells_ref = list(min_cells_per_gene = 3),
                   normalize_counts_by_seq_depth = list(),
                   anscombe_transform = list(),
                   log2xplus1 = list(),
                   apply_max_threshold_bounds = list(),
                   smooth_by_chromosome = list(window_length = 101, smooth_ends = TRUE),
                   center_cell_expr_across_chromosome = list(method = "median"),
                   subtract_ref_expr_from_obs = list(inv_log = TRUE),
                   invert_log2 = list(),
                   clear_noise_via_ref_mean_sd = list(sd_amplifier = 1.5),
                   remove_outliers_norm = list(),
                   define_signif_tumor_subclusters = list(p_val = 0.05, hclust_method = "ward.D2", cluster_by_groups = TRUE, partition_method = "qnorm"),
                   plot_cnv = list(k_obs_groups = 5, cluster_by_groups = TRUE, output_filename = "infercnv.outliers_removed", color_safe_pal = FALSE,
                                   x.range = "auto", x.center = 1, output_format = "pdf", title = "Outliers Removed")){


  if(!"infercnv" %in% rownames(installed.packages())){

    install <- utils::askYesNo(msg = "infercnv is not installed. Do you want to install it?")

    if(base::isTRUE(install)){

      if(!requireNamespace("BiocManager", quietly = TRUE)){ install.packages("BiocManager") }

      BiocManager::install("infercnv")

    } else {

      stop("Can not continue without infercnv package.")

    }

  }

  # 1. Control --------------------------------------------------------------

  hlpr_assign_arguments(object)

  containsModality(object, modality = "gene", error = TRUE)

  confuns::are_values(c("save_infercnv_object"), mode = "logical")

  confuns::check_directories(directories = directory_cnv_folder, type = "folders")

  if(!base::is.na(directory_regions_df)){

    base::message(
      "Redirecting input for argument 'directory_regions_df' (deprecated) to ",
      "argument 'ref_regions'. Please use 'ref_regions' instead."
    )

    ref_regions <- directory_regions_df

    base::warning(
      "The argument 'directory_regions_df' is deprecated in favor of 'ref_regions'. ",
      "See documentation for more details."
    )

  }

  # -----


  # 2. Data extraction ------------------------------------------------------

  # preparing object derived data
  count_mtr <- getCountMatrix(object = object, assay_name = "gene")

  obj_anno <-
    getMetaDf(object = object) %>%
    dplyr::select(barcodes, sample) %>%
    tibble::column_to_rownames(var = "barcodes")

  # reading and preparing reference data
  confuns::give_feedback(msg = "Checking input for reference data.", verbose = verbose)

  if(base::is.character(ref_mtr) && stringr::str_detect(ref_mtr, pattern = "\\.RDS$")){

    confuns::give_feedback(
      msg = glue::glue("Reading in reference matrix from directory '{ref_mtr}'."),
      verbose = verbose
    )

    ref_mtr <- base::readRDS(file = ref_mtr)

    confuns::give_feedback(msg = "Done.", verbose = verbose)

  } else {

    ref_mtr <- base::as.matrix(ref_mtr)

  }


  if(base::is.character(ref_annotation) && stringr::str_detect(ref_annotation, pattern = "\\.RDS$")){

    confuns::give_feedback(
      msg = glue::glue("Reading in reference annotation from directory '{ref_annotation}'."),
      verbose = verbose
    )

    ref_anno <- base::readRDS(file = ref_annotation)

    confuns::give_feedback(msg = "Done.", verbose = verbose)

  } else if(base::is.data.frame(ref_annotation)){

    ref_anno <- ref_annotation

  } else {

    base::stop("Input for argument 'ref_annotation' must either be a directory leading to an .RDS-file or a data.frame.")

  }


  # combine data sets
  confuns::give_feedback(msg = "Combining input and reference data.", verbose = verbose)

  genes_inter <-
    base::intersect(x = base::rownames(count_mtr), y = base::rownames(ref_mtr)) %>% base::unique()

  if(base::length(genes_inter) < 500){

    msg <- "Less than 500 genes match ref and count matrix."

    confuns::give_feedback(msg = msg, fdb.fn = "stop", with.time = FALSE)

  }

  expr_inter <- base::cbind(count_mtr[genes_inter, ], ref_mtr[genes_inter, ])

  anno_inter <- base::rbind(obj_anno, ref_anno)

  # read and process gene positions
  confuns::give_feedback(msg = "Getting gene positions.", verbose = verbose)

  if(base::is.character(ref_regions) && stringr::str_detect(ref_regions, pattern = "\\.RDS$")){

    regions_df <- base::readRDS(file = ref_regions)

  } else if(base::is.data.frame(ref_regions)) {

    regions_df <- ref_regions

  } else {

    base::stop("Input for argument 'ref_regions' must either be a directory leading to an .RDS-file or a data.frame")

  }

  if(base::is.data.frame(gene_pos_df)){

    confuns::check_data_frame(
      df = gene_pos_df,
      var.class = list(
        ensembl_gene_id = "character",
        hgnc_symbol = "character",
        chromosome_name = "character",
        start_position = "integer",
        end_position = "integer"
      )
    )


  } else {

    gene_pos_df <-
      CONICSmat::getGenePositions(gene_names = base::rownames(expr_inter))

  }


  # -----


  # 3. Analysis pipeline ----------------------------------------------------

  gene_order_df <-
    dplyr::select(gene_pos_df, chromosome_name, start_position, end_position, hgnc_symbol) %>%
    magrittr::set_rownames(value = gene_pos_df$hgnc_symbol)

  confuns::give_feedback(msg = "Starting analysis pipeline.", verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "CreateInfercnvObject",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("raw_counts_matrix" = expr_inter,
                     "annotations_file" = anno_inter,
                     "gene_order_file" = gene_order_df
      )
    )


  confuns::give_feedback(msg = glue::glue("Removing genes from matrix with mean expression below threshold."), verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "require_above_min_mean_expr_cutoff",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )

  confuns::give_feedback(msg = "Removing low quality barcode spots.", verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "require_above_min_cells_ref",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )


  confuns::give_feedback(msg = "Normalizing counts by sequencing depth.", verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "normalize_counts_by_seq_depth",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )

  confuns::give_feedback(msg = "Conducting anscombe and logarithmic transformation.", verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "anscombe_transform",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "log2xplus1",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )

  confuns::give_feedback(msg = "Applying maximal threshold bounds.", verbose = verbose)

  threshold <-
    base::mean(base::abs(infercnv:::get_average_bounds(infercnv_obj))) %>%
    base::round(digits = 2)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "apply_max_threshold_bounds",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj, "threshold" = threshold),
      v.fail = infercnv_obj
    )

  confuns::give_feedback(msg = "Smoothing by chromosome.", verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "smooth_by_chromosome",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )


  confuns::give_feedback(msg = "Centering cell expression across chromosome.", verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "center_cell_expr_across_chromosome",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )

  confuns::give_feedback(msg = "Subtracting reference expression from observed expression.", verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "subtract_ref_expr_from_obs",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )

  confuns::give_feedback(msg = "Clearing noise via reference mean standard deviation.", verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "invert_log2",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj

    )

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "clear_noise_via_ref_mean_sd",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )


  confuns::give_feedback(msg = "Removing outliers.", verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "remove_outliers_norm",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )


  confuns::give_feedback(msg = "Defining significant tumor subclusters.", verbose = verbose)

  infercnv_obj <-
    confuns::call_flexibly(
      fn = "define_signif_tumor_subclusters",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj),
      v.fail = infercnv_obj
    )


  confuns::give_feedback(msg = "Copy number variation pipeline completed.", verbose = verbose)

  if(base::isTRUE(save_infercnv_object)){

    save_dir <- stringr::str_c(directory_cnv_folder, "infercnv-obj.RDS", sep = "/")

    msg <- glue::glue("Saving infercnv-object under '{save_dir}'.")

    confuns::give_feedback(msg = msg, verbose = verbose)

    if(class(infercnv_obj)!="infercnv"){infercnv_obj <- infercnv_obj[[1]]}

    base::saveRDS(infercnv_obj, file = save_dir)

  }

  confuns::give_feedback(msg = "Plotting results.", verbose = verbose)


  if(class(infercnv_obj)!="infercnv"){infercnv_obj <- infercnv_obj[[1]]}


  plot_results <-
    confuns::call_flexibly(
      fn = "plot_cnv",
      fn.ns = "infercnv",
      fn.ns.sep = ":::",
      default = list("infercnv_obj" = infercnv_obj, "out_dir" = directory_cnv_folder, "write_expr_matrix"=T),
      v.fail = NULL
    )


  # work around a weird error in plot_cnv()
  if(base::is.null(plot_results)){

    confuns::give_feedback(
      msg = "infercnv:::plot_cnv() failed. Attempting to plot with default setting.",
      verbose = TRUE
    )

    plot_results <-
      base::tryCatch({

        infercnv::plot_cnv(infercnv_obj = infercnv_obj, out_dir = directory_cnv_folder, write_expr_matrix=T)

      }, error = function(error){

        NULL

      })

    if(base::is.null(plot_results)){

      confuns::give_feedback(
        msg = "inferncnv::plot_cnv() failed with default setting, too.",
        verbose = TRUE
      )

    }

    if(base::isTRUE(save_infercnv_object)){

      msg <-
        glue::glue(
          "The infercnv-object has been saved under '{save_dir}'.",
          "Please try to plot the heatmap manually."
        )

      confuns::give_feedback(msg = msg, verbose = TRUE)

    } else {

      msg <-
        glue::glue(
          "Please consider to run runCnvAnalysis() again with argument 'save_infercnv_object' set to TRUE.",
          "This way you can plot the results manually."
        )

      confuns::give_feedback(msg = msg, verbose = TRUE)

    }

    msg <- "If the error in infercnv:::plot_cnv() persists, consider to open an issue at https://github.com/theMILOlab/SPATA2/issues."

    confuns::give_feedback(msg = msg, verbose = TRUE)

  }

  # ----


  # 4. Storing results ------------------------------------------------------

  result_dir <-
    stringr::str_c(directory_cnv_folder, "/", plot_cnv$output_filename, ".observations.txt")

  results <- utils::read.table(result_dir)

  barcodes <- base::colnames(results)

  confuns::give_feedback(msg = "Summarizing cnv-results by chromosome.", verbose = verbose)

  # join cnv results (per gene) with chromosome positions and summarize by chromosome
  ordered_cnv_df <-
    base::as.data.frame(results) %>%
    tibble::rownames_to_column("hgnc_symbol") %>%
    dplyr::left_join(., gene_pos_df, by = "hgnc_symbol") %>%
    dplyr::group_by(chromosome_name) %>%
    dplyr::select(chromosome_name, dplyr::any_of(barcodes)) %>%
    dplyr::summarise_all(base::mean) %>%
    dplyr::mutate(Chr = stringr::str_c(cnv_prefix, chromosome_name)) %>%
    dplyr::select(Chr, dplyr::everything()) %>%
    dplyr::ungroup() %>%
    dplyr::select(-chromosome_name)

  cnames <- c("barcodes", ordered_cnv_df$Chr)

  ordered_cnv_df2 <-
    dplyr::select(ordered_cnv_df, -Chr) %>%
    base::t() %>%
    base::as.data.frame() %>%
    tibble::rownames_to_column(var = "barcodes") %>%
    magrittr::set_colnames(value = cnames) %>%
    dplyr::mutate(barcodes = stringr::str_replace_all(string = barcodes, pattern = "\\.", replacement = "-")) %>%
    dplyr::mutate(dplyr::across(dplyr::starts_with(match = cnv_prefix), .fns = base::as.numeric)) %>%
    tibble::as_tibble()

  # add results to spata object
  confuns::give_feedback(msg = "Adding results to the `SPATA2` object's feature data.", verbose = verbose)

  # feature variables
  object <-
    addFeatures(
      object = object,
      feature_df = ordered_cnv_df2,
      overwrite = TRUE
    )

  # cnv matrix
  base::colnames(results) <-
    stringr::str_replace_all(
      string = base::colnames(results),
      pattern = "\\.",
      replacement = "-"
    )

  cnv_mtr <- base::as.matrix(results)

  # cnv list
  cnv_res <-
    list(
      prefix = cnv_prefix,
      cnv_df = ordered_cnv_df2,
      cnv_mtr = cnv_mtr,
      gene_pos_df = gene_pos_df,
      regions_df = regions_df
    )

  # post processing of data structure

  # mainly renamining
  cnv_res$regions_df <-
    tibble::rownames_to_column(cnv_res$regions_df, var = "chrom_arm") %>%
    dplyr::mutate(
      chrom_arm = base::factor(chrom_arm, levels = chrom_arm_levels),
      chrom = base::factor(Chrom, levels = chrom_levels),
      arm =
        stringr::str_extract(string = chrom_arm, pattern = "p|q") %>%
        base::factor(levels = c("p", "q")),
      start = Start,
      end = End,
      length = Length
    ) %>%
    dplyr::select(chrom_arm, chrom, arm, start, end, length) %>%
    tibble::as_tibble()

  # create wide format with observational unit = chromosome instead of = chromosome arm
  regions_df_wide <-
    dplyr::select(cnv_res$regions_df, -length, -chrom_arm) %>%
    tidyr::pivot_wider(
      names_from = arm,
      values_from = c(start, end),
      names_sep = "_"
    ) %>%
    dplyr::select(chrom, start_p, end_p, start_q, end_q)

  gene_pos_df <-
    tibble::as_tibble(cnv_res$gene_pos_df) %>%
    dplyr::rename(chrom = chromosome_name) %>%
    dplyr::filter(chrom %in% {{chrom_levels}}) %>% # remove not annotated genes
    dplyr::mutate(
      chrom = base::factor(chrom, levels = chrom_levels),
      genes = hgnc_symbol
    ) %>%
    # join wide format to compute gene wise arm location
    dplyr::left_join(
      x = .,
      y = regions_df_wide,
      by = "chrom"
    ) %>%
    dplyr::mutate(
      arm = dplyr::case_when(
        # if gene starts at position bigger than end of arm p it must be located
        # on arm q
        start_position > end_p ~ "q",
        # else it's located on arm p
        TRUE ~ "p"
      ),
      arm = base::factor(x = arm, levels = c("p", "q")),
      chrom_arm = stringr::str_c(chrom, arm, sep = ""),
      chrom_arm = base::factor(chrom_arm, levels = chrom_arm_levels)
    ) %>%
    dplyr::select(-start_p, -end_p, -start_q, -end_q) %>%
    dplyr::select(genes, chrom_arm, chrom, arm, start_position, end_position, dplyr::everything())

  cnv_res$gene_pos_df <- gene_pos_df

  # remove genes that are not annotated by chromosome
  cnv_res$cnv_mtr <-
    cnv_res$cnv_mtr[base::rownames(cnv_res$cnv_mtr) %in% gene_pos_df$genes,]

  object <-
    setCnvResults(
      object = object,
      cnv_list = cnv_res
    )

  object <- computeCnvByChrArm(object, overwrite = TRUE)

  # -----

  confuns::give_feedback(msg = "Done.", verbose = verbose)

  returnSpataObject(object)

}

#' @rdname runCNV
#' @export
runCnvAnalysis <- function(object, ...){

  deprecated(fn = T)

  runCNV(object = object, ...)

}


# runD --------------------------------------------------------------------

#' @title Differential expression analysis (DEA)
#'
#' @description This function makes use of \code{Seurat::FindAllMarkers()} to identify
#' the differently expressed variables across the groups of
#' the grouping variable denoted in the argument \code{across}.
#'
#' See details for more.
#'
#' @inherit argument_dummy params
#' @inherit check_method params
#' @param fc_name,base Given to corresponding arguments of \code{Seurat::FindAllMarkers()}.
#' @param ... Additional arguments given to \code{Seurat::FindAllMarkers()}
#'
#' @details This function is a wrapper around the DEA pipeline from the `Seurat`
#' package. It creates a temporary `Seurat` object via `Seurat::CreateSeuratObject()`,
#' and `Seurat::SCTransform()`. Then, `Seurat::FindAllMarkers()` is run. The output data.frame
#' is stored in the `SPATA2` object which is returned at the end.
#'
#' If \code{across} and/or \code{method_de} are vectors instead of single
#' values \code{runDEA()} iterates over all combinations in a for-loop and
#' stores the results in the respective slots. (e.g.: If \code{across} = \emph{'seurat_clusters'}
#' and \code{method_de} = \emph{c('wilcox', 'bimod')} the function computes the differently expressed genes
#' across all groups found in the feature variable \emph{seurat_clusters} according to method \emph{wilcox} and
#' stores the results in the respective slot. Then it does the same according to method \emph{bimod}.)
#'
#' The results are obtainable via \code{getDeaResults()}, `getDeaResultsDf()` and \code{getDeaGenes()}.
#'
#' @inherit update_dummy return
#'
#' @export
#'
#' @inherit plotDeaDotPlot examples
#'
#' @examples
#'
#' library(SPATA2)
#'
#' data("example_data")
#' object <- example_data$object_UKF269T_diet
#'
#' getGroupingOptions(object)
#'
#' plotSurface(object, color_by = "histology")
#'
#' object <- runDEA(object, across = "histology")
#'
#' # extract best marker gene for each group by lowest p-value
#' top_marker_genes <-
#'   getDeaGenes(object, across = "histology", n_lowest_pval = 1)
#'
#' print(top_marker_genes)
#'
#' plotSurfaceComparison(object, color_by = top_marker_genes)
#'

runDEA <- function(object,
                   across,
                   method_de = NULL,
                   verbose = NULL,
                   base = 2,
                   assay_name = activeAssay(object),
                   ...){

  hlpr_assign_arguments(object)

  purrr::walk(.x = method_de, .f = ~ check_method(method_de = .x))

  valid_across <- check_features(object, features = across, valid_classes = "factor")

  # prepare seurat object
  seurat_object <-
    Seurat::CreateSeuratObject(counts = getCountMatrix(object, assay_name = assay_name)) %>%
    Seurat::NormalizeData(object = .) %>%
    Seurat::ScaleData(object = .)

  seurat_object@meta.data <-
    getMetaDf(object) %>%
    tibble::column_to_rownames(var = "barcodes") %>%
    base::as.data.frame()

  for(across in valid_across){

    for(method in method_de){

      if(base::isTRUE(verbose)){base::message(glue::glue("Running DEA across '{across}' with method '{method}'."))}

      object <-
        base::tryCatch({

          # set the grouping based on which DEA is conducted
          groups <-
            purrr::set_names(
              x = seurat_object@meta.data[[across]],
              nm = base::rownames(seurat_object@meta.data) # set barcodes as names
            )

          seurat_object@meta.data$orig.ident <- base::unname(groups)

          seurat_object@active.ident <- groups

          n_groups <- dplyr::n_distinct(groups)

          if(n_groups < 2){

            base::stop(glue::glue("There is only one unique group in the object's '{across}'-variable. runDEA() needs a minimum of two different groups."))

          } else {

            base::message(glue::glue("Number of groups/clusters: {n_groups}"))

          }

          # De analysis ----------------------------------------------------------

          # perform analysis and remove seurat object afterwards
          dea_results <-
            Seurat::FindAllMarkers(
              object = seurat_object,
              test.use = method_de,
              base = base,
              ...
            )

          # save results in spata object
          object <-
            setDeaResultsDf(
              object = object,
              grouping_variable = across,
              dea_results = dea_results,
              across = across,
              method_de = method_de,
              assay_name = assay_name,
              ...
            )

          object

        },

        error = function(error){

          base::message(glue::glue("Skipping DEA on across-input '{across}' with method '{method}' as it resulted in the following error message: {error}"))

          returnSpataObject(object)

        }
        )

    }

  }


  returnSpataObject(object)

}

#' @rdname runDEA
#' @export
runDeAnalysis <- function(...){

  deprecated(fn = TRUE)

  object <- runDEA(...)

  returnSpataObject(object)

}


# runG --------------------------------------------------------------------

#' @title Compute gene set enrichment
#'
#' @description Computes gene set enrichment based on the results of
#' [`runDEA()`]. See details for more.
#'
#' @param across Character vector. All grouping variables of interest.
#' @param methods_de Character vector. All differential expression methods
#' of interest.
#' @inherit runDeAnalysis params
#' @inherit getDeaResultsDf params
#' @inherit hypeR::hypeR params
#' @inherit argument_dummy params
#' @param gene_set_list A named list of character vectors. Names of slots correspond to the
#' gene set names. The slot contains the genes of the gene sets.Holds priority over
#' \code{gene_set_names}.
#' @param signatures Character vector of signature names that are taken
#' from the assays stored signatures. Defaults to all signatures of
#' the currently active assay.
#' @param reduce Logical value. If set to TRUE (the default) the return value
#' of \code{hypeR::hypeR()} is reduced to what is necessary for \code{SPATA2}s
#' function to work. If `FALSE`, the complete objects are stored. This will
#' grow the `SPATA2` object's size quickly!
#'
#' @inherit update_dummy return
#'
#' @details Computes gene set enrichment analysis using \code{hypeR::hypeR()}.
#' It does so by iterating about all possible combinations of \code{across} and
#' \code{methods_de}. Combinations for which no DE-results are found are silently
#' skipped.
#'
#' @export
#'
#' @examples
#' library(SPATA2)
#'
#' data("example_data")
#'
#' object <- example_data$object_UKF269T_diet
#'
#' # requires the results of runDEA(object, across = "histology")!
#' object <- runDEA(object, across = "histology")
#'
#' object <- runGSEA(object, across = "histology")
#'
#' plotGseaDotplot(object, across = "histology", )
#'

runGSEA <- function(object,
                    across,
                    methods_de = "wilcox",
                    max_adj_pval = 0.05,
                    min_lfc = 0,
                    n_highest_lfc = NULL,
                    n_lowest_pval = NULL,
                    signatures = NULL,
                    test = c("hypergeometric", "kstest"),
                    absolute = FALSE,
                    background = 20000,
                    power = 1,
                    pval = 0.05,
                    fdr = 0.05,
                    reduce = TRUE,
                    quiet = TRUE,
                    chr_to_fct = TRUE,
                    assay_name = activeAssay(object),
                    verbose = NULL,
                    ...){

  hlpr_assign_arguments(object)

  deprecated(...)

  if(!"hypeR" %in% base::rownames(installed.packages())){

    install <- utils::askYesNo(msg = "hypeR is not installed. Do you want to install it?")

    if(base::isTRUE(install)){

      if (!require("BiocManager", quietly = TRUE)){ install.packages("BiocManager") }

      BiocManager::install("hypeR")

    }

  }

  if(!base::is.numeric(background)){

    background <- nMolecules(object)

  } else {

    background <- background[1]

  }

  dea_overview <- getDeaOverview(object)

  across <- base::unique(across)

  check_one_of(
    input = across,
    against = base::names(dea_overview),
    fdb.opt = 2,
    ref.opt.2 = "grouping options across which DEA has been computed"
  )

  methods_de <- base::unique(methods_de)

  check_one_of(
    input = methods_de,
    against = validDeaMethods()
  )

  ma <- getAssay(object, assay_name = assay_name)

  # prepare gene set list
  signature_list <- getSignatureList(object, assay_name = assay_name)

  if(base::is.character(signatures)){

    confuns::check_one_of(
      input = signatures,
      against = base::names(signature_list),
      fdb.opt = 2,
      ref.opt.2 = "known signatures in assay '{assay_name}'"
    )

    signature_list <- signature_list[signatures]

  }

  for(across_value in across){

    for(method_de in methods_de){

      dea_df <-
        getDeaResultsDf(
          object = object,
          across = across_value,
          method_de = method_de,
          max_adj_pval = max_adj_pval,
          min_lfc = min_lfc,
          n_highest_lfc = n_highest_lfc,
          n_lowest_pval = n_lowest_pval
        )

      if(!base::is.null(dea_df)){

        group_names <- getGroupNames(object, grouping = across_value)

        n_groups <- base::length(group_names)

        msg <-
          glue::glue(
            "Calculating enrichment of signatures across '{across_value}' (n = {n_groups}). ",
            "Based on DEA results of method '{method_de}'."
          )

        give_feedback(msg = msg, verbose = verbose)

        ma@analysis$dea[[across_value]][[method_de]][["hypeR_gsea"]] <-
          purrr::map2(
            .x = group_names,
            .y = base::seq_along(group_names),
            .f = function(group, index){

              signature <-
                dplyr::filter(dea_df, !!rlang::sym(across_value) == {{group}}) %>%
                dplyr::pull(var = "gene")

              give_feedback(
                msg = glue::glue("Working on group: '{group}' ({index}/{n_groups})"),
                verbose = verbose
              )

              out <-
                base::tryCatch({

                  hypeR::hypeR(
                    signature = signature,
                    genesets = signature_list,
                    test = test,
                    background = background,
                    power = power,
                    absolute = absolute,
                    fdr = fdr,
                    pval = pval,
                    quiet = quiet
                  )

                }, error = function(error){

                  msg <-
                    glue::glue(
                      "Computing enrichment for group '{group}' resulted in an error: {error}."
                    ) %>%
                    base::as.character()

                })

              if(base::is.character(out)){

                give_feedback(msg = out, fdb.fn = "warning")

                out <- NA

              } else {

                if(base::isTRUE(reduce)){

                  out <- confuns::lselect(lst = base::as.list(out), any_of(c("args", "info")), data)

                }

                out$data <-
                  dplyr::mutate(
                    .data = out$data,
                    overlap_perc = overlap/geneset,
                    label = base::as.factor(label)
                  )

              }

              return(out)

            }
          ) %>%
          purrr::set_names(nm = group_names) %>%
          purrr::discard(.p = ~ base::any(base::is.na(.x)))

      } else {

        give_feedback(msg = "GSEA results already present.", verbose = verbose)

      }

    }

  }

  object <- setAssay(object, assay = ma)

  give_feedback(msg = "Done.", verbose = verbose)

  returnSpataObject(object)

  }


# runI --------------------------------------------------------------------




# runK --------------------------------------------------------------------

#' @title Clustering with Kmeans
#'
#' @description A wrapper around the Kmeans clustering algorithm. Iterates over all
#' combinations of `ks` and `methods_kmeans` and stores the resulting clustering
#' variables in the feature data.frame.
#'
#' @inherit argument_dummy params
#' @param ks Numeric vector. Denotes all options for k-clusters over which
#' to iterate. Values <1 are discarded. (Givent o `centers` of `stats::kmeans()`).
#' @param methods_kmeans A character vector of kmeans methods. Should be one
#' or more of *c("Hartigan-Wong", "Lloyd", "Forgy", "MacQueen")*. (Given to `algorithm`
#' of `stats::kmeans()`).
#' @param naming A [`glue::glue()`] instruction on how to name the resulting cluster variables.
#' use *method_kmeans* to refer to the method and *k* for the value of k.
#' @param n_pcs Integer value. The number of principal components to use for
#' the clustering.
#' @param ... Additional arguments given to [`stats::kmeans()`].
#'
#' @inherit update_dummy return
#'
#' @seealso [`getFeatureDf()`], [`getFeatureNames()`], [`getGroupingOptions()`],
#' [`getGroupNames()`]
#'
#' @export
#'
#' @examples
#' library(SPATA2)
#'
#' data("example_data")
#'
#' object <- example_data$object_UKF269T_diet
#'
#' object <- runPCA(object, n_pcs = 20)
#'
#' object <- runKmeansClustering(object, ks = 3:10, n_pcs = 20)
#'
#' getFeatureNames(object)
#'

runKmeansClustering <- function(object,
                                ks,
                                methods_kmeans = "Hartigan-Wong",
                                prefix = "K",
                                naming = "{method_kmeans}_k{k}",
                                n_pcs = NULL,
                                overwrite = TRUE,
                                ...){

  pca_df <-
    getPcaDf(object, n_pcs = n_pcs) %>%
    dplyr::select(barcodes, dplyr::where(fn = base::is.numeric))

  cluster_df <-
    confuns::initiateAnalysis(
      data = pca_df,
      key_name = "barcodes",
      verbose = FALSE
      ) %>%
    confuns::computeClusteringKmeans(
      object = .,
      ks = ks,
      methods_kmeans = methods_kmeans,
      ...
    ) %>%
    confuns::getClusterVarsKmeans(
      object = .,
      ks = ks,
      methods_kmeans = methods_kmeans,
      prefix = prefix,
      naming = naming
    )

  object <- addFeatures(object, feature_df = cluster_df, overwrite = overwrite)

  returnSpataObject(object)

}



# runN --------------------------------------------------------------------

#' @title Clustering with nearest neighbor search
#'
#' @description
#' A wrapper around the [`RANN::nn2()`] function - nearest neighbor clustering.
#'
#'
#' @param k The maximum number of nearest neighbours to compute. The default value
#'  is set to the smaller of the number of columnns in data.
#' @param treetype Character vector. Character vector specifying the standard
#'  \emph{'kd'} tree or a \emph{'bd'} (box-decomposition, AMNSW98) tree which
#'   may perform better for larger point sets.
#' @param searchtypes Character value. Either \emph{'priority', 'standard'} or \emph{'radius '}. See details for more.
#' @param naming Character value. A glue expression for the new cluster variable name.
#' @inherit RANN::nn2 params
#' @inherit argument_dummy params
#' @inherit update_dummy return
#'
#' @note Requires the `RANN` packge.
#'
#' @details
#'
#' Search types: priority visits cells in increasing order of distance from the
#' query point, and hence, should converge more rapidly on the true nearest neighbour,
#' but standard is usually faster for exact searches. radius only searches for neighbours
#' within a specified radius of the point. If there are no neighbours then nn.idx will
#' contain 0 and nn.dists will contain 1.340781e+154 for that point.
#'
runNearestNeighborClustering <- function(object,
                                         n_pcs = NULL,
                                         k = 50,
                                         naming = "nn2_{priority}_{treetype}",
                                         searchtype = "priority",
                                         treetype = "bd",
                                         radius = 0,
                                         eps = 0,
                                         overwrite = FALSE,
                                         verbose = TRUE){


  check_cran_packages("RANN")

  confuns::check_one_of(
    input = searchtype,
    against = c("standard", "priority", "radius"),
    fdb.fn = "stop",
    ref.input = "input for argument 'searchtype'",
    ref.against = "valid searchtypes"
  )

  confuns::check_one_of(
    input = treetype,
    against = c("kd", "bd"),
    fdb.fn = "stop",
    ref.input = "input for argument 'treetype'",
    ref.against = "valid treetypes"
  )

  cluster_name <- glue::glue(naming)

  confuns::check_none_of(
    against = getFeatureNames(object),
    input = cluster_name,
    overwrite = overwrite
  )

  # 2. Data extraction and for loop -----------------------------------------

  pca_mtr <- getPcaMtr(object, n_pcs = n_pcs)

  cluster_df <- tibble::tibble(barcodes = base::rownames(pca_mtr))

  confuns::give_feedback(msg = msg, verbose = verbose)

  nearest <-
    RANN::nn2(
      data = pca_mtr,
      k = k,
      treetype = treetype,
      searchtype = searchtype,
      radius = radius,
      eps = eps
    )

  edges <-
    reshape::melt(base::t(nearest$nn.idx[, 1:k])) %>%
    dplyr::select(A = X2, B = value) %>%
    dplyr::mutate(C = 1)

  edges <-
    base::transform(edges, A = base::pmin(A, B), B = base::pmax(A, B)) %>%
    base::unique() %>%
    dplyr::rename(V1 = A, V2 = B, weight = C)

  edges$V1 <- base::rownames(pca_mtr)[edges$V1]
  edges$V2 <- base::rownames(pca_mtr)[edges$V2]

  g_df <- igraph::graph.data.frame(edges, directed = FALSE)

  graph_out <- igraph::cluster_louvain(g_df)

  clust_assign <-
    base::factor(x = graph_out$membership, levels = base::sort(base::unique(graph_out$membership)))

  cluster_df <-
    dplyr::mutate(.data = cluster_df, cluster_var = base::factor(clust_assign)) %>%
    dplyr::rename({{cluster_name}} := cluster_var)

  object <- addFeatures(object, feature_df = cluster_df, overwrite = TRUE)

  returnSpataObject(object)

}

# runP --------------------------------------------------------------------

#' @title Run Principal Component Analysis
#'
#' @description Takes the data matrix of choice and passes it to
#' \code{irlba::prcomp_irlba()}.
#'
#' @param variables Character vector or `NULL`. If character, subsets the data matrix
#' by variable/molecule names such that only the specified ones are used for dimensional reduction.
#' If `NULL`, the unsubstted matrix denoted in `mtr_name` is used.
#' @param n_pcs Numeric value. Denotes the number of principal components to be computed.
#' @param ... Additional arguments given to \code{irlba::prcomp_irlba()}.
#'
#' @inherit argument_dummy params
#'
#' @inherit update_dummy return
#'
#' @seealso [`identifyVariableMolecules()`], [`plotPcaElbow()`]
#'
#' @export
#'
#' @examples
#' library(SPATA2)
#'
#' data("example_data")
#'
#' object <- example_data$object_UKF269T_diet
#'
#' object <- runPCA(object)
#' object <- runTSNE(object)
#' object <- runUMAP(object)
#'
#' plotPCA(object, color_by = "histology")
#' plotTSNE(object, color_by = "histology")
#' plotUMAP(object, color_by = "histology")
#'

runPCA <- function(object,
                   n_pcs = 30,
                   variables = NULL,
                   mtr_name = activeMatrix(object),
                   assay_name = activeAssay(object),
                   ...){

  expr_mtr <-
    getMatrix(object, mtr_name = mtr_name, assay_name = assay_name) %>%
    base::as.matrix()

  if(base::is.character(variables)){

    confuns::check_one_of(
      input = variables,
      against = base::rownames(expr_mtr)
    )

    expr_mtr <- expr_mtr[variables, ]

  }

  pca_res <- irlba::prcomp_irlba(x = base::t(expr_mtr), n = n_pcs, ...)

  pca_df <-
    base::as.data.frame(x = pca_res[["x"]]) %>%
    dplyr::mutate(barcodes = base::colnames(expr_mtr)) %>%
    dplyr::select(barcodes, dplyr::everything())

  object <- setPcaDf(object = object, pca_df = pca_df)

  returnSpataObject(object)

}

#' @rdname runPCA
runPca <- function(...){

  deprecated(fn = T)

  runPCA(...)

}

#' @keywords internal
runPca2 <- function(object, n_pcs = 30, mtr_name = NULL, ...){

  check_object(object)

  expr_mtr <- getMatrix(object, mtr_name = mtr_name)

  pca_res <- irlba::prcomp_irlba(x = base::t(expr_mtr), n = n_pcs, ...)

  return(pca_res)

}


# runS --------------------------------------------------------------------

#' @title Run spatial differential expression analysis
#'
#' @description This function conducts differential expression analysis (*DEA*)
#' where data points are grouped based on their distance to a [SpatialAnnotation].
#'
#' @param interval Distance measure. The width of the spatial intervals in which
#' the data points are grouped starting from the boundary of the spatial annotation
#' till the edge of the tissue is reached.
#' @param naming A [`glue::glue()`] instruction on how to create the name of the
#' grouping variable.Providing a simple string without glue syntax works, too.
#' @inherit getSpatialAnnotation params
#' @inherit runDEA params
#' @inherit argument_dummy params
#'
#' @inherit update_dummy return
#'
#' @seealso [`createGroupAnnotations()`], [`createImageAnnotations()`],
#' [`createNumericAnnotations()`] to create spatial annotations.
#'
#' The function [`getCoordsDfSA()`] relates data points to a spatial annotation.
#' This information is used by `runSDEA()` to create the spatial grouping.
#'
#' [`plotDeaDotplot()`] to visualize results.
#'
#' @details This function bases on the concept of [`runDEA()`] where gene expression
#' is compared across different groups within a grouping variable. However, in contrast to
#' `runDEA()` where the grouping variable is denoted via `across`, the function [`runSDEA()`]
#' creates a grouping variable in which data points are grouped based on their distance
#' to a [`SpatialAnnotation`]. This approach aims to identify genes that are upregulated
#' within certain distance intervals to the spatial annotation of interest. The spatial
#' interval is defined via the argument `interval`. E.g. if `interval = 500um` data points
#' are grouped in 500um intervals starting from the boundaries of the spatial annotation
#' until the edge of the tissue is reached. The names of the groups correspond
#' to the distance itself: *500um*, *1000um*, *1500um*, etc. If `interval = 0.5mm`, which
#' is equivalent to 500um, the grouping will be the same but the group names correspond
#' to *0.5mm*, *1mm*, *1.5mm*, etc. Data points that lie within the boundaries
#' of the spatial annotation are assigned to group *core*.
#'
#' The grouping variable created this way is stored in the feature data.frame as
#' any other grouping variable and the DEA results are stored in slot @dea as all
#' other DEA results.
#'
#' @inheritSection section_dummy Distance measures
#'
#' @keywords internal
#'
runSDEA <- function(object,
                    interval,
                    id = idSA(object),
                    naming = "sdea_{id}",
                    method_de = "wilcox",
                    base = 2,
                    overwrite = FALSE,
                    ...){

  var_name <-
    glue::glue(naming) %>%
    base::as.character()

  # get genes
  genes <- getGenes(object)
  genes <- genes[!genes %in% genes_rm]

  spatial_parameters <-
    check_sas_input(
      distance = distToEdge(object, id = id),
      binwidth = interval,
      n_bins_dist = NA_integer_,
      object = object,
      verbose = FALSE
    )

  # which unit
  unit <- extract_unit(interval)

  sdea_groups <-
    stringr::str_c(
      extract_value(interval) * spatial_parameters$n_bins_dist,
      extract_unit(interval)
    )

  sdea_levels <- c("core", sdea_groups)

  # get grouping
  coords_df <-
    getCoordsDfSA(
      object = object,
      id = id,
      distance = spatial_parameters$distance,
      binwidth = spatial_parameters$binwidth,
      dist_unit = unit,
      verbose = FALSE
    ) %>%
    dplyr::mutate(
      bins_sdea = extract_bin_dist_val(bins_dist, fn = "max"),
      bins_sdea = stringr::str_c(bins_sdea, {{unit}}),
      bins_sdea =
        dplyr::case_when(
          rel_loc == "core" ~ "core",
          rel_loc == "outside" ~ "control",
          TRUE ~ bins_sdea
        ),
      bins_sdea = base::factor(bins_sdea, levels = sdea_levels)
    )

  coords_df[[var_name]] <- coords_df$bins_sdea

  object <-
    addFeatures(
      object = object,
      feature_df = coords_df[,c("barcodes", var_name)],
      overwrite = overwrite
    )

  object <- runDEA(object, across = var_name, method_de = method_de)

  confuns::give_feedback(
    msg = glue::glue("Added variable '{var_name}' and DEA results to the object."),
    verbose = verbose
  )

  returnSpataObject(object)

}


#' @title Clustering with Seurat
#'
#' @description A wrapper around the Seurat clustering pipeline suggested by
#' *Hao and Hao et al., 2021*.
#'
#' @param FindVariableFeatures,RunPCA,FindNeighbors,FindClusters Each argument
#' takes a list of arguments that is given to the equivalent function.
#'
#' @inherit update_dummy return
#' @export
#'
#' @examples
#' library(SPATA2)
#'
#' data("example_data")
#'
#' object <- example_data$object_UKF269T_diet
#'
#' object <- runSeuratClustering(object, name = "new_seurat_clst")
#'
#' plotSurface(object, color_by = "new_seurat_clst")
#'
runSeuratClustering <- function(object,
                                name = "seurat_clusters",
                                mtr_name = activeMatrix(object),
                                assay_name = activeAssay(object),
                                NormalizeData = list(),
                                ScaleData = list(),
                                FindVariableFeatures = list(selection.method = "vst", nfeatures = 2000),
                                RunPCA = list(npcs = 60),
                                FindNeighbors = list(dims = 1:30),
                                FindClusters = list(resolution = 0.8),
                                prefix = "S",
                                overwrite = FALSE){

  confuns::check_none_of(
    input = name,
    against = getFeatureNames(object),
    ref.against = "feature names",
    overwrite = overwrite
  )

  seurat_object <-
    Seurat::CreateSeuratObject(count = getCountMatrix(object = object))

  seurat_object <-
    confuns::call_flexibly(
      fn = "NormalizeData",
      fn.ns = "Seurat",
      default = list(object = seurat_object),
      v.fail = seurat_object
    )

  seurat_object <-
    confuns::call_flexibly(
      fn = "ScaleData",
      fn.ns = "Seurat",
      default = list(object = seurat_object),
      v.fail = seurat_object
    )

  seurat_object <-
    confuns::call_flexibly(
      fn = "FindVariableFeatures",
      fn.ns = "Seurat",
      default = list(object = seurat_object),
      v.fail = seurat_object
    )

  seurat_object <-
    confuns::call_flexibly(
      fn = "RunPCA",
      fn.ns = "Seurat",
      default = list(object = seurat_object),
      v.fail = seurat_object
    )

  seurat_object <-
    confuns::call_flexibly(
      fn = "FindNeighbors",
      fn.ns = "Seurat",
      default = list(object = seurat_object),
      v.fail = seurat_object
    )

  seurat_object <-
    confuns::call_flexibly(
      fn = "FindClusters",
      fn.ns = "Seurat",
      default = list(object = seurat_object)
    )

  nc <- dplyr::n_distinct(seurat_object@meta.data[["seurat_clusters"]])


  cluster_df <-
    seurat_object@meta.data %>%
    tibble::rownames_to_column(var = "barcodes") %>%
    dplyr::mutate(
      seurat_clusters = stringr::str_c(prefix, seurat_clusters),
      seurat_clusters = base::factor(seurat_clusters, levels = stringr::str_c(prefix, 0:(nc-1)))
    ) %>%
    dplyr::select(barcodes, !!rlang::sym(name) := seurat_clusters)

  object <- addFeatures(object, feature_df = cluster_df, overwrite = TRUE)

  returnSpataObject(object)

}

#' @title Identify spatially significant features with SPARKX
#'
#' @description A wrapper around the algorithm introduced by \emph{Zhu et al. 2021}
#' to identify features with non-random spatial expression pattern with SPARK-X.
#'
#' @inherit SPARK::sparkx params
#' @inherit argument_dummy params
#' @inherit update_dummy return
#'
#' @author Zhu, J., Sun, S. & Zhou, X. SPARK-X: non-parametric modeling enables
#'  scalable and robust detection of spatial expression patterns for large spatial
#'  transcriptomic studies. Genome Biol 22, 184 (2021). https://doi.org/10.1186/s13059-021-02404-0
#'
#' @export
#'
#' @examples
#' library(SPATA2)
#'
#' data("example_data")
#'
#' object <- example_data$object_UKF313T_diet
#'
#' object <- runSPARKX(object)
#'
#' sparkx_genes <- getSparkxGenes(object, threshold = 0.05)
#'
#' sparkx_df <- getSparkxGenesDf(object)
#'
#'
runSPARKX <- function(object,
                      assay_name = activeAssay(object),
                      numCores = 1,
                      option = "mixture",
                      verbose = NULL){

  hlpr_assign_arguments(object)

  # install if required
  if(!"SPARK" %in% base::rownames(installed.packages())){

    install <- utils::askYesNo(msg = "SPARK is not installed. Do you want to install it?")

    if(base::isTRUE(install)){

      if("devtools" %in% base::rownames(installed.packages())){

        install.packages("devtools")

      }

      devtools::install_github('xzhoulab/SPARK')

    }

  }

  # run algorithm
  coords_mtr <- if (object@platform == 'MERFISH') {
    getCoordsMtr(object, orig = TRUE)
  } else {
    getCoordsMtr(object)
  }

  count_mtr <- getCountMatrix(object)

  barcodes <- base::colnames(count_mtr)

  sparkx_out <-
    SPARK::sparkx(
      count_in = count_mtr[ ,barcodes],
      locus_in = coords_mtr[barcodes, ],
      numCores = numCores,
      option = option,
      verbose = verbose
    )

  ma <- getAssay(object, assay_name = assay_name)

  ma@analysis[["sparkx"]] <- sparkx_out

  object <- setAssay(object, assay = ma)

  returnSpataObject(object)

}

#' @export
runSparkx <- function(...){

  deprecated(fn = T)

  runSPARKX(...)

}

# runT --------------------------------------------------------------------

#' @title Run t-Stochastic Neighbour Embedding
#'
#' @description Takes the pca-data of the object up to the principal component denoted
#' in argument \code{n_pcs} and performs tSNE with it.
#'
#' @param n_pcs Numeric value. Denotes the number of principal components used. Must be
#' smaller or equal to the number of principal components the pca data.frame contains.
#' @param tsne_perplexity Numeric value. Given to argument \code{perplexity} of
#' \code{Rtsne::Rtsne()}.
#' @param ... Additional arguments given to \code{Rtsne::Rtsne()}.
#'
#' @inherit argument_dummy params
#' @inherit update_dummy return
#'
#' @seealso [`runPCA()`], [`getPcaDf()`], [`getTsneDf()`], [`getUmapDf()`]
#'
#' @export
#'
#' @inherit runPCA examples
#'

runTSNE <- function(object, n_pcs = NULL, tsne_perplexity = 30, ...){

  hlpr_assign_arguments(object)

  tsne_res <-
    runTsne2(
      object = object,
      tsne_perplexity = tsne_perplexity,
      ...
    )

  pca_mtr <- getPcaMtr(object = object, n_pcs = n_pcs)

  tsne_df <-
    base::data.frame(
      barcodes = base::rownames(pca_mtr),
      tsne1 = tsne_res$Y[,1],
      tsne2 = tsne_res$Y[,2]
    )

  object <- setTsneDf(object = object, tsne_df = tsne_df)

  returnSpataObject(object)

}

#' @rdname runTSNE
#' @export
runTsne <- function(...){

  deprecated(fn = T)

  runTSNE(...)

}

#' @keywords internal
runTsne2 <- function(object, n_pcs = 20, tsne_perplexity = 30, ...){

  check_object(object)

  pca_mtr <- getPcaMtr(object = object, n_pcs = n_pcs)

  tsne_res <- Rtsne::Rtsne(pca_mtr, perplexity = tsne_perplexity, ...)

  return(tsne_res)

}



# runU --------------------------------------------------------------------

#' @title Run Uniform Manifold Approximation and Projection
#'
#' @description Takes the pca data of the object up to the principal component denoted
#' in argument \code{n_pcs} and performs UMAP with it.
#'
#' @inherit runTsne params
#' @param ... Additional arguments given to \code{umap::umap()}.
#' @inherit argument_dummy params
#'
#' @inherit update_dummy return
#'
#' @export
#'
#' @seealso [`runPCA()`]
#'
#' @inherit runPCA examples
#'

runUMAP <- function(object, n_pcs = NULL, ...){

  check_object(object)

  umap_res <-
    runUmap2(object = object, n_pcs = n_pcs, ...)

  pca_mtr <- getPcaMtr(object = object, n_pcs = n_pcs)

  umap_df <-
    tibble::tibble(
      barcodes = base::rownames(pca_mtr),
      umap1 = umap_res$layout[,1],
      umap2 = umap_res$layout[,2]
    )

  object <- setUmapDf(object = object, umap_df = umap_df)

  returnSpataObject(object)

}

#' @rdname runUMAP
#' @export
runUmap <- function(...){

  deprecated(fn = T)

  runUMAP(...)

}

#' @keywords internal
runUmap2 <- function(object, n_pcs = 20, ...){

  deprecated(...)

  check_object(object)

  pca_mtr <- getPcaMtr(object = object, n_pcs = n_pcs)

  umap_res <- umap::umap(d = pca_mtr, ...)

  return(umap_res)

}
theMILOlab/SPATA2 documentation built on Feb. 8, 2025, 11:41 p.m.