singleCellTK: Comprehensive and Interactive Analysis of Single Cell RNA-Seq Data

Documented in plotScanpyDotPlot plotScanpyEmbedding plotScanpyHeatmap plotScanpyHVG plotScanpyMarkerGenes plotScanpyMarkerGenesDotPlot plotScanpyMarkerGenesHeatmap plotScanpyMarkerGenesMatrixPlot plotScanpyMarkerGenesViolin plotScanpyMatrixPlot plotScanpyPCA plotScanpyPCAGeneRanking plotScanpyPCAVariance plotScanpyViolin runScanpyFindClusters runScanpyFindHVG runScanpyFindMarkers runScanpyNormalizeData runScanpyPCA runScanpyScaleData runScanpyTSNE runScanpyUMAP

########################################
### Helper Functions ##################
#######################################

#' .updateAssaySCEFromScanpy
#' Update/Modify/Add an assay in the provided SingleCellExperiment object from
#' an AnnData object
#' @param inSCE Input SingleCellExperiment object
#' @param scanpyObject Input annData object
#' @param assaySlotSCE Selected assay to update in the input
#' SingleCellExperiment object
#' @param scanpyAssaySlot Selected assay from annData object. Default
#' \code{"X"}.
#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object with
#'  data from annData object appended to the \link{assay} slot.
#' @importFrom SummarizedExperiment assay<-
#' @noRd
.updateAssaySCEFromScanpy <- function(inSCE,
                                      scanpyObject,
                                      assaySlotSCE,
                                      scanpyAssaySlot = "X") {
  assay(inSCE, assaySlotSCE) <- NULL
  temp.matrix <- t(scanpyObject[[scanpyAssaySlot]])
  colnames(temp.matrix) <- colnames(inSCE)
  rownames(temp.matrix) <- rownames(inSCE)
  assay(inSCE, assaySlotSCE) <- temp.matrix
  
  return(inSCE)
}


#####################################################
#### Normalization and Scaling function ############
#####################################################

#' runScanpyNormalizeData
#' Wrapper for NormalizeData() function from scanpy library
#' Normalizes the sce object according to the input parameters
#' @param inSCE (sce) object to normalize
#' @param useAssay Assay containing raw counts to use for normalization.
#' @param targetSum If NULL, after normalization, each observation (cell) has a 
#' total count equal to the median of total counts for observations (cells) 
#' before normalization. Default \code{1e4}
#' @param maxFraction Include cells that have more counts than max_fraction of 
#' the original total counts in at least one cell. Default \code{0.05}
#' @param normAssayName Name of new assay containing normalized data. Default
#' \code{scanpyNormData}.
#' @examples
#' data(scExample, package = "singleCellTK")
#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'")
#' rownames(sce) <- rowData(sce)$feature_name
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' }
#' @return Normalized \code{SingleCellExperiment} object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
runScanpyNormalizeData <- function(inSCE,
                                   useAssay,
                                   targetSum = 1e4,
                                   maxFraction = 0.05,
                                   normAssayName = "scanpyNormData") {
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if (missing(useAssay)) {
    useAssay <- SummarizedExperiment::assayNames(inSCE)[1]
    message(
      "'useAssay' parameter missing. Using the first available assay ",
      "instead: '",
      useAssay,
      "'"
    )
  }
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE, 
                                             X_name = useAssay,
                                             assays = FALSE,
                                             colData = TRUE,
                                             rowData = TRUE,
                                             varm = TRUE,
                                             reducedDims = FALSE,
                                             metadata = FALSE,
                                             colPairs = FALSE,
                                             rowPairs = FALSE,
                                             skip_assays = FALSE,
                                             verbose = NULL)
  normValue <- sc$pp$normalize_total(scanpyObject, 
                                     target_sum = targetSum, 
                                     max_fraction =  maxFraction,
                                     inplace = FALSE)
  
  scanpyObject$obs$n_counts <- normValue$norm_factor
  scanpyObject$X <- normValue$X
  
  # log transform the data.
  scanpyObject <- sc$pp$log1p(scanpyObject, copy = TRUE)
  
  inSCE <-
    .updateAssaySCEFromScanpy(inSCE, scanpyObject, normAssayName)
  metadata(inSCE)$scanpy$normValues <- normValue$X
  colData(inSCE)$n_counts <- 
    as.factor(unlist(scanpyObject$obs['n_counts']))
  
  inSCE <- expSetDataTag(inSCE = inSCE,
                         assayType = "normalized",
                         assays = normAssayName)
  return(inSCE)
}

#' runScanpyScaleData
#' Scales the input sce object according to the input parameters
#' @param inSCE (sce) object to scale
#' @param useAssay Assay containing normalized counts to scale.
#' @param scaledAssayName Name of new assay containing scaled data. Default
#' \code{scanpyScaledData}.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' }
#' @return Scaled \code{SingleCellExperiment} object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
runScanpyScaleData <- function(inSCE,
                               useAssay = "scanpyNormData",
                               scaledAssayName = "scanpyScaledData") {
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE, 
                                             X_name = useAssay,
                                             assays = FALSE,
                                             colData = TRUE,
                                             rowData = TRUE,
                                             varm = TRUE,
                                             reducedDims = FALSE,
                                             metadata = TRUE,
                                             colPairs = FALSE,
                                             rowPairs = FALSE,
                                             skip_assays = FALSE,
                                             verbose = NULL)
  sc$pp$scale(scanpyObject, max_value=10)
  inSCE <-
    .updateAssaySCEFromScanpy(inSCE, scanpyObject, scaledAssayName, "X")
  rowData(inSCE)$"Scanpy_mean" <- scanpyObject$var$mean 
  rowData(inSCE)$"Scanpy_std" <- scanpyObject$var$std 
  
  inSCE <- expSetDataTag(inSCE = inSCE,
                         assayType = "scaled",
                         assays = scaledAssayName)
  
  return(inSCE)
}

######################################
### High Variable Genes ###############
######################################

#' runScanpyFindHVG
#' Find highly variable genes and store in the input sce object
#' @param inSCE (sce) object to compute highly variable genes from and to store
#' back to it
#' @param useAssay Specify the name of the assay to use for computation
#'  of variable genes. It is recommended to use log normalized data, except when 
#'  flavor='seurat_v3', in which counts data is expected.
#' @param method selected method to use for computation of highly variable
#' genes. One of \code{'seurat'}, \code{'cell_ranger'}, or \code{'seurat_v3'}.
#' Default \code{"seurat"}.
#' @param altExpName Character. Name of the alternative experiment object to
#' add if \code{returnAsAltExp = TRUE}. Default \code{featureSubset}.
#' @param altExp Logical value indicating if the input object is an
#' altExperiment. Default \code{FALSE}.
#' @param hvgNumber numeric value of how many genes to select as highly
#' variable. Default \code{2000}
#' @param minMean If n_top_genes unequals None, this and all other cutoffs for 
#' the means and the normalized dispersions are ignored. Ignored if 
#' flavor='seurat_v3'. Default \code{0.0125}
#' @param maxMean If n_top_genes unequals None, this and all other cutoffs for 
#' the means and the normalized dispersions are ignored. Ignored if 
#' flavor='seurat_v3'. Default \code{3}
#' @param minDisp If n_top_genes unequals None, this and all other cutoffs for 
#' the means and the normalized dispersions are ignored. Ignored if 
#' flavor='seurat_v3'. Default \code{0.5}
#' @param maxDisp If n_top_genes unequals None, this and all other cutoffs for 
#' the means and the normalized dispersions are ignored. Ignored if 
#' flavor='seurat_v3'. Default \code{Inf}
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' g <- getTopHVG(sce, method = "seurat", hvgNumber = 500)
#' }
#' @return Updated \code{SingleCellExperiment} object with highly variable genes
#' computation stored
#' \code{\link{getTopHVG}}, \code{\link{plotTopHVG}}
#' @export
#' @importFrom SummarizedExperiment rowData rowData<-
#' @importFrom S4Vectors metadata<-
#' @importFrom reticulate py_module_available py_set_seed import
runScanpyFindHVG <- function(inSCE,
                             useAssay = "scanpyNormData",
                             method = c("seurat", "cell_ranger", "seurat_v3"),
                             altExpName = "featureSubset",
                             altExp = FALSE,
                             hvgNumber = 2000,
                             minMean = 0.0125,
                             maxMean = 3,
                             minDisp = 0.5,
                             maxDisp = Inf) {
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  method <- match.arg(method)
  fullSCE_cellranger <- NULL
  
  if (!altExp) {
    if (method == "cell_ranger"){
      # for cell_ranger method only
      fullSCE_cellranger <- inSCE
      inSCE <- inSCE[which(rowSums(assay(inSCE, useAssay)) > 0), ]
      #############################
    }
    scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE, 
                                               X_name = useAssay,
                                               assays = FALSE,
                                               colData = TRUE,
                                               rowData = TRUE,
                                               varm = TRUE,
                                               reducedDims = FALSE,
                                               metadata = TRUE,
                                               colPairs = FALSE,
                                               rowPairs = FALSE,
                                               skip_assays = FALSE,
                                               verbose = NULL)
  }
  else{
    if (method == "cell_ranger"){
      # for cell_ranger method only
      tempSCE <- inSCE
      inSCE <- altExp(inSCE, altExpName)[which(rowSums(assay(altExp(inSCE, altExpName), useAssay)) > 0), ]
      #############################
    }
    scanpyObject <- zellkonverter::SCE2AnnData(sce = altExp(inSCE, altExpName), 
                                               X_name = useAssay,
                                               assays = FALSE,
                                               colData = TRUE,
                                               rowData = TRUE,
                                               varm = TRUE,
                                               reducedDims = FALSE,
                                               metadata = FALSE,
                                               colPairs = FALSE,
                                               rowPairs = FALSE,
                                               skip_assays = FALSE,
                                               verbose = NULL)
    
  }
  
  if (method == "seurat") {
    scanpyObject$var['mean'] <- rowData(inSCE)$"Scanpy_mean"
    scanpyObject$var['std'] <- rowData(inSCE)$"Scanpy_std"
    sc$pp$highly_variable_genes(scanpyObject, 
                                flavor = method,
                                n_top_genes = as.integer(hvgNumber),
                                min_mean = minMean,
                                max_mean = maxMean,
                                min_disp = minDisp,
                                max_disp = maxDisp)
    
    #tmpSCE <- zellkonverter::AnnData2SCE(adata = scanpyObject)
    metadata(inSCE)$hvg <- scanpyObject$uns['hvg']
    rowData(inSCE)$dispersions <-
      unlist(scanpyObject$var['dispersions'])
    rowData(inSCE)$dispersions_norm <-
      unlist(scanpyObject$var['dispersions_norm'])
    rowData(inSCE)$means <-
      unlist(scanpyObject$var['means'])
    rowData(inSCE)$highly_variable <-
      unlist(scanpyObject$var['highly_variable'])
    
    metadata(inSCE)$sctk$runFeatureSelection$seurat <-
      list(
        useAssay = useAssay,
        rowData = c(
          "dispersions",
          "dispersions_norm",
          "means"
        )
      )
  } 
  #for this approach it is required that sce basic filering of cells and genes 
  #must be done
  else if (method == "cell_ranger") {
    sc$pp$highly_variable_genes(scanpyObject, 
                                flavor = method,
                                n_top_genes = as.integer(hvgNumber),
                                min_mean = minMean,
                                max_mean = maxMean,
                                min_disp = minDisp,
                                max_disp = maxDisp)
    
    
    #metadata(inSCE)$hvg <- metadata(tmpSCE)['hvg'][['hvg']] 
    metadata(fullSCE_cellranger)$hvg <- scanpyObject$uns['hvg']
    if (!altExp) {
      rowData(inSCE)$dispersions <-
        unlist(scanpyObject$var['dispersions'])
      rowData(inSCE)$dispersions_norm <-
        unlist(scanpyObject$var['dispersions_norm'])
      rowData(inSCE)$means <-
        unlist(scanpyObject$var['means'])
      rowData(inSCE)$highly_variable <-
        unlist(scanpyObject$var['highly_variable'])
      
    }
    else{
      altExpRows <- match(rownames(inSCE), rownames(scanpyObject))
      rowData(inSCE)$dispersions <-
        unlist(scanpyObject$var['dispersions'])[altExpRows]
      rowData(inSCE)$dispersions_norm <-
        unlist(scanpyObject$var['dispersions_norm'])[altExpRows]   
      rowData(inSCE)$means <-
        unlist(scanpyObject$var['means'])[altExpRows]
      rowData(inSCE)$highly_variable <-
        unlist(scanpyObject$var['highly_variable'])
    }
    
    metadata(inSCE)$sctk$runFeatureSelection$cell_ranger <-
      list(
        useAssay = useAssay,
        rowData = c(
          "dispersions",
          "dispersions_norm",
          "means",
          "highly_variable"
        )
      )
    
    mergedRowData <- merge(rowData(fullSCE_cellranger), rowData(inSCE)[, metadata(inSCE)$sctk$runFeatureSelection$cell_ranger$rowData],
                           by = 'row.names', all = TRUE)
    row.names(mergedRowData) <- mergedRowData$Row.names
    mergedRowData$Row.names <- NULL
    rowData(fullSCE_cellranger) <- mergedRowData
    inSCE <- fullSCE_cellranger
  }
  
  else if (method == "seurat_v3") {
    sc$pp$highly_variable_genes(scanpyObject, 
                                flavor = method,
                                n_top_genes = as.integer(hvgNumber),
                                min_mean = minMean,
                                max_mean = maxMean,
                                min_disp = minDisp,
                                max_disp = maxDisp)
    
    metadata(inSCE)$hvg <- scanpyObject$uns['hvg']
    
    rowData(inSCE)$variances <-
      unlist(scanpyObject$var['variances'])
    rowData(inSCE)$variances_norm <-
      unlist(scanpyObject$var['variances_norm'])   
    rowData(inSCE)$means <-
      unlist(scanpyObject$var['means'])
    rowData(inSCE)$highly_variable <-
      unlist(scanpyObject$var['highly_variable'])
    metadata(inSCE)$sctk$runFeatureSelection$seurat_v3 <-
      list(
        useAssay = useAssay,
        rowData = c(
          "variances",
          "variances_norm",
          "means"
        )
      )
  }
  return(inSCE)
}


#' plotScanpyHVG
#'
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param log Plot on logarithmic axes. Default \code{FALSE}.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' plotScanpyHVG(sce)
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyHVG <- function(inSCE,
                          log = FALSE){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE,
                                             assays = FALSE,
                                             colData = TRUE,
                                             rowData = TRUE,
                                             varm = TRUE,
                                             reducedDims = FALSE,
                                             metadata = TRUE,
                                             colPairs = FALSE,
                                             rowPairs = FALSE,
                                             skip_assays = TRUE,
                                             verbose = NULL)
  if(is.null(scanpyObject$uns['hvg'])){
    stop(
      " High variable genes not found. Please run the 'runScanpyFindHVG' first."
    )
  }
  return(sc$pl$highly_variable_genes(scanpyObject,
                                     log = log))
}

################################################
######## PCA Function #########################
################################################

#' runScanpyPCA
#' Computes PCA on the input sce object and stores the calculated principal
#' components within the sce object
#' @param inSCE (sce) object on which to compute PCA
#' @param useAssay Assay containing scaled counts to use in PCA. Default
#' \code{"scanpyScaledData"}.
#' @param reducedDimName Name of new reducedDims object containing Scanpy PCA.
#' Default \code{scanpyPCA}.
#' @param nPCs numeric value of how many components to compute. Default
#' \code{50}.
#' @param method selected method to use for computation of pca. 
#' One of \code{'arpack'}, \code{'randomized'}, \code{'auto'} or \code{'lobpcg'}.
#' Default \code{"arpack"}.
#' @param use_highly_variable boolean value of whether to use highly variable 
#' genes only. By default uses them if they have been determined beforehand. 
#' @param seed Specify numeric value to set as a seed. Default \code{12345}.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' }
#' @return Updated \code{SingleCellExperiment} object which now contains the
#' computed principal components
#' @export
#' @importFrom SingleCellExperiment reducedDim<- rowSubset
#' @importFrom S4Vectors metadata<-
#' @importFrom reticulate py_module_available py_set_seed import
runScanpyPCA <- function(inSCE,
                         useAssay = "scanpyScaledData",
                         reducedDimName = "scanpyPCA",
                         nPCs = 50,
                         method = c("arpack", "randomized", "auto", "lobpcg"),
                         use_highly_variable = TRUE,
                         seed = 12345){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if (!is.null(seed)) {
    reticulate::py_set_seed(seed = seed)
  }
  
  params <- as.list(environment())
  params$inSCE <- NULL
  
  method <- match.arg(method)
  
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE,
                                             X_name = useAssay,
                                             assays = FALSE,
                                             colData = TRUE,
                                             rowData = TRUE,
                                             varm = TRUE,
                                             reducedDims = FALSE,
                                             metadata = FALSE,
                                             colPairs = FALSE,
                                             rowPairs = FALSE,
                                             skip_assays = FALSE,
                                             verbose = NULL)
  sc$tl$pca(scanpyObject, 
            svd_solver= method, 
            n_comps = as.integer(nPCs), 
            use_highly_variable = use_highly_variable)
  
  metadata(inSCE)$scanpy$PCA <- scanpyObject
  
  temp <- scanpyObject$obsm[['X_pca']]
  #colnames(temp) <- paste0(rep("PC", ncol(temp)), seq(ncol(10)))
  rownames(temp) <- colnames(inSCE)
  rotation <- scanpyObject$varm[['PCs']]
  rownames(rotation) <- rownames(inSCE)
  
  reducedDim(inSCE, reducedDimName) <- temp
  attr(reducedDim(inSCE, reducedDimName), "percentVar") <- 
    scanpyObject$uns[['pca']][['variance_ratio']]
  attr(reducedDim(inSCE, reducedDimName), "rotation") <- rotation
  metadata(inSCE)$sctk$runDimReduce$reddim[[reducedDimName]] <- params
  
  return(inSCE)
}


#' plotScanpyPCA
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param reducedDimName Name of new reducedDims object containing Scanpy PCA. 
#' @param color Keys for annotations of observations/cells or variables/genes.
#' @param title Provide title for panels either as string or list of strings
#' @param legend Location of legend, either 'on data', 'right margin' or a 
#' valid keyword for the loc parameter of Legend.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' plotScanpyPCA(sce)
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyPCA <- function(inSCE,
                          reducedDimName = "scanpyPCA", 
                          color = NULL,
                          title = '',
                          legend = 'right margin'){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if(!reducedDimName %in% reducedDimNames(inSCE)){
    stop(
      "PCA results not found. Please run the 'runScanpyPCA' function first."
    )
  }
  reducedDim (inSCE, "X_pca") <- reducedDim(inSCE, reducedDimName)
  useAssay = metadata(inSCE)$sctk$runDimReduce$reddim[[reducedDimName]]$useAssay
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE,
                                             X_name = useAssay,
                                             colData = TRUE,
                                             rowData = TRUE,
                                             varm = TRUE,
                                             reducedDims = TRUE,
                                             metadata = FALSE,
                                             colPairs = TRUE,
                                             rowPairs = TRUE,
                                             verbose = NULL)
  return(sc$pl$pca(scanpyObject,
                   color = color,
                   title = title, 
                   legend_loc = legend))
}


#' plotScanpyPCAGeneRanking
#'
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param PC_comp For example, '1,2,3' means [1, 2, 3], first, second, 
#' third principal component.
#' @param includeLowest Whether to show the variables with both highest and 
#' lowest loadings. Default \code{TRUE}
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' plotScanpyPCAGeneRanking(sce)
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyPCAGeneRanking <- function(inSCE, 
                                     PC_comp = "1,2,3",
                                     includeLowest = TRUE){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  scanpyObject <- metadata(inSCE)$scanpy$PCA
  return(sc$pl$pca_loadings(scanpyObject,
                            components = PC_comp,
                            include_lowest = includeLowest))
  
}

#' plotScanpyPCAVariance
#'
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param nPCs Number of PCs to show. Default \code{50}.
#' @param log Plot on logarithmic scale. Default \code{FALSE}
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' plotScanpyPCAVariance(sce)
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyPCAVariance <- function(inSCE,
                                  nPCs = 50,
                                  log = FALSE){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  scanpyObject <- metadata(inSCE)$scanpy$PCA
  return(sc$pl$pca_variance_ratio(scanpyObject,
                                  n_pcs = as.integer(nPCs),
                                  log = log))
  
}

########################################################
###### Clustering function ############################
#######################################################

#' runScanpyFindClusters
#' Computes the clusters from the input sce object and stores them back in sce
#' object
#' @param inSCE (sce) object from which clusters should be computed and stored
#' in
#' @param useAssay Assay containing scaled counts to use for clustering.
#' @param useReducedDim Reduction method to use for computing clusters. 
#' Default \code{"scanpyPCA"}.
#' @param nNeighbors The size of local neighborhood (in terms of number of 
#' neighboring data points) used for manifold approximation. Larger values 
#' result in more global views of the manifold, while smaller values result in 
#' more local data being preserved. Default \code{10}.
#' @param dims numeric value of how many components to use for computing
#' clusters. Default \code{40}.
#' @param method selected method to compute clusters. One of "louvain",
#' and "leiden". Default \code{louvain}.
#' @param colDataName Specify the name to give to this clustering result. 
#'  Default is \code{NULL} that will generate a meaningful name automatically.
#' @param resolution A parameter value controlling the coarseness of the 
#' clustering. Higher values lead to more clusters Default \code{1}.
#' @param niterations How many iterations of the Leiden clustering method to 
#' perform. Positive values above 2 define the total number of iterations to 
#' perform, -1 has the method run until it reaches its optimal clustering.
#' Default \code{-1}.
#' @param flavor Choose between to packages for computing the clustering.
#' Default \code{vtraag} 
#' @param use_weights Boolean. Use weights from knn graph. Default \code{FALSE}
#' @param cor_method correlation method to use. Options are ‘pearson’, 
#' ‘kendall’, and ‘spearman’. Default \code{pearson}.
#' @param inplace If True, adds dendrogram information to annData object, 
#' else this function returns the information. Default \code{TRUE}
#' @param externalReduction Pass DimReduce object if PCA computed through
#' other libraries. Default \code{NULL}.
#' @param seed Specify numeric value to set as a seed. Default \code{12345}.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' }
#' @return Updated sce object which now contains the computed clusters
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
runScanpyFindClusters <- function(inSCE,
                                  useAssay = "scanpyScaledData",
                                  useReducedDim = "scanpyPCA",
                                  nNeighbors = 10,
                                  dims = 40,
                                  method = c("leiden", "louvain"),
                                  colDataName = NULL,
                                  resolution = 1,
                                  niterations = -1,
                                  flavor = 'vtraag',
                                  use_weights = FALSE,
                                  cor_method = 'pearson',
                                  inplace = TRUE,
                                  externalReduction = NULL,
                                  seed = 12345) {
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if (!reticulate::py_module_available(module = "louvain")) {
    warning("Cannot find python module 'louvain', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, louvain can be installed on the local machine",
            "with pip (e.g. pip install louvain) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if (!reticulate::py_module_available(module = "leidenalg")) {
    warning("Cannot find python module 'leidenalg', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, leidenalg can be installed on the local machine",
            "with pip (e.g. pip install leidenalg) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if (!is.null(seed)) {
    reticulate::py_set_seed(seed = seed)
  }
  
  method <- match.arg(method)
  
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE,  
                                             X_name = useAssay,
                                             assays = FALSE,
                                             colData = TRUE,
                                             rowData = TRUE,
                                             varm = TRUE,
                                             reducedDims = useReducedDim,
                                             metadata = FALSE,
                                             colPairs = FALSE,
                                             rowPairs = FALSE,
                                             skip_assays = FALSE,
                                             verbose = NULL)
  
  if (!is.null(externalReduction)) {
    scanpyObject$obsm <- list(pca = externalReduction)
    useReducedDim <- "pca"
  } 
  
  if(is.null(colDataName)){
    colDataName = paste0("Scanpy", "_", method, "_", resolution) 
  }
  
  sc$pp$neighbors(scanpyObject, 
                  n_neighbors = as.integer(nNeighbors), 
                  n_pcs = as.integer(dims),
                  use_rep = useReducedDim)
  
  if (method == "louvain") {
    sc$tl$louvain(adata = scanpyObject,
                  key_added = colDataName,
                  flavor = flavor,
                  use_weights = use_weights)
  } else if (method == "leiden") {
    sc$tl$leiden(adata = scanpyObject,
                 key_added = colDataName,
                 n_iterations = as.integer(niterations))
  } 
  
  colData(inSCE)[[colDataName]] <-
    as.factor(unlist(scanpyObject$obs[colDataName]))
  S4Vectors::metadata(inSCE)$scanpy[method] <- colDataName
  
  return(inSCE)
}

###############################################
###### Embedding functions #####################
###############################################

#' runScanpyUMAP
#' Computes UMAP from the given sce object and stores the UMAP computations back
#' into the sce object
#' @param inSCE (sce) object on which to compute the UMAP
#' @param useAssay Specify name of assay to use. Default is \code{NULL}, so
#' \code{useReducedDim} param will be used instead.
#' @param useReducedDim Reduction to use for computing UMAP. 
#' Default is \code{"scanpyPCA"}.
#' @param reducedDimName Name of new reducedDims object containing Scanpy UMAP
#' Default \code{scanpyUMAP}.
#' @param dims Numerical value of how many reduction components to use for UMAP
#' computation. Default \code{40}.
#' @param minDist Sets the \code{"min_dist"} parameter to the underlying UMAP
#' call. Default \code{0.5}.
#' @param nNeighbors Sets the \code{"n_neighbors"} parameter to the underlying
#' UMAP call. Default \code{10}.
#' @param spread Sets the \code{"spread"} parameter to the underlying UMAP call.
#' Default \code{1}.
#' @param alpha Sets the \code{"alpha"} parameter to the underlying UMAP call.
#' Default \code{1}.
#' @param gamma Sets the \code{"gamma"} parameter to the underlying UMAP call.
#' Default \code{1}.
#' @param externalReduction Pass DimReduce object if PCA computed through
#' other libraries. Default \code{NULL}.
#' @param seed Specify numeric value to set as a seed. Default \code{12345}.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyUMAP(sce, useReducedDim = "scanpyPCA")
#' }
#' @return Updated sce object with UMAP computations stored
#' @export
#' @importFrom SingleCellExperiment reducedDim<-
#' @importFrom reticulate py_module_available py_set_seed import
runScanpyUMAP <- function(inSCE,
                          useAssay = NULL,
                          useReducedDim = "scanpyPCA",
                          reducedDimName = "scanpyUMAP",
                          dims = 40,  
                          minDist = 0.5,
                          nNeighbors = 10, 
                          spread = 1,
                          alpha=1.0, 
                          gamma=1.0, 
                          externalReduction = NULL,
                          seed = 12345) {
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if (!is.null(seed)) {
    reticulate::py_set_seed(seed = seed)
  }
  
  params <- as.list(environment())
  params$inSCE <- NULL
  
  if(!is.null(useAssay)){
    scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE, 
                                               X_name = useAssay,
                                               assays = FALSE,
                                               colData = TRUE,
                                               rowData = TRUE,
                                               varm = TRUE,
                                               reducedDims = useReducedDim,
                                               metadata = FALSE,
                                               colPairs = FALSE,
                                               rowPairs = FALSE,
                                               skip_assays = FALSE,
                                               verbose = NULL)
  } else{
    scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE, 
                                               reducedDims = useReducedDim,
                                               assays = FALSE,
                                               colData = TRUE,
                                               rowData = TRUE,
                                               varm = TRUE,
                                               metadata = FALSE,
                                               colPairs = FALSE,
                                               rowPairs = FALSE,
                                               skip_assays = FALSE,
                                               verbose = NULL)
  }
  
  if (!is.null(externalReduction)) {
    scanpyObject$obsm <- list(pca = externalReduction)
    useReducedDim <- "pca"
  } 
  
  sc$pp$neighbors(scanpyObject, 
                  n_neighbors = as.integer(nNeighbors), 
                  n_pcs = as.integer(dims),
                  use_rep = useReducedDim)
  
  
  sc$tl$umap(scanpyObject, 
             n_components = 2L,
             min_dist = minDist,
             alpha = alpha, 
             gamma = gamma,
             spread = spread)
  
  
  temp <- scanpyObject$obsm[['X_umap']]
  rownames(temp) <- colnames(inSCE)
  reducedDim(inSCE, reducedDimName) <- temp
  metadata(inSCE)$sctk$runDimReduce$reddim[[reducedDimName]] <- params
  
  return(inSCE)
}



#' runScanpyTSNE
#' Computes tSNE from the given sce object and stores the tSNE computations back
#' into the sce object
#' @param inSCE (sce) object on which to compute the tSNE
#' @param useAssay Specify name of assay to use. Default is \code{NULL}, so
#' \code{useReducedDim} param will be used instead.
#' @param useReducedDim selected reduction method to use for computing tSNE.
#' Default \code{"scanpyPCA"}.
#' @param reducedDimName Name of new reducedDims object containing Scanpy tSNE
#' Default \code{scanpyTSNE}.
#' @param dims Number of reduction components to use for tSNE computation.
#' Default \code{40}.
#' @param perplexity Adjust the perplexity tuneable parameter for the underlying
#' tSNE call. Default \code{30}.
#' @param externalReduction Pass DimReduc object if PCA computed through
#' other libraries. Default \code{NULL}.
#' @param seed Specify numeric value to set as a seed. Default \code{12345}.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyTSNE(sce, useReducedDim = "scanpyPCA")
#' }
#' @return Updated sce object with tSNE computations stored
#' @export
#' @importFrom SingleCellExperiment reducedDim<-
#' @importFrom reticulate py_module_available py_set_seed import
runScanpyTSNE <- function(inSCE,
                          useAssay = NULL,
                          useReducedDim = "scanpyPCA", 
                          reducedDimName = "scanpyTSNE",
                          dims = 40,
                          perplexity = 30,
                          externalReduction = NULL,
                          seed = 12345){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if (!is.null(seed)) {
    reticulate::py_set_seed(seed = seed)
  }
  
  params <- as.list(environment())
  params$inSCE <- NULL
  
  if(!is.null(useAssay)){
    scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE, 
                                               X_name = useAssay,
                                               assays = FALSE,
                                               colData = TRUE,
                                               rowData = TRUE,
                                               varm = TRUE,
                                               reducedDims = useReducedDim,
                                               metadata = FALSE,
                                               colPairs = FALSE,
                                               rowPairs = FALSE,
                                               skip_assays = FALSE,
                                               verbose = NULL)
  } else{
    scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE,
                                               reducedDims = useReducedDim,
                                               assays = FALSE,
                                               colData = TRUE,
                                               rowData = TRUE,
                                               varm = TRUE,
                                               metadata = FALSE,
                                               colPairs = FALSE,
                                               rowPairs = FALSE,
                                               skip_assays = FALSE,
                                               verbose = NULL)
  }
  
  
  if (!is.null(externalReduction)) {
    scanpyObject$obsm <- list(pca = externalReduction)
    useReducedDim <- "pca"
  }
  
  sc$tl$tsne(scanpyObject, 
             n_pcs = as.integer(dims),
             use_rep = useReducedDim, 
             perplexity = as.integer(perplexity))
  
  
  
  temp <- scanpyObject$obsm[['X_tsne']]
  rownames(temp) <- colnames(inSCE)
  reducedDim(inSCE, reducedDimName) <- temp
  metadata(inSCE)$sctk$runDimReduce$reddim[[reducedDimName]] <- params
  
  return(inSCE)
}

#' plotScanpyEmbedding
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param reducedDimName Name of reducedDims object containing embeddings.
#' Eg. scanpyUMAP.
#' @param useAssay Specify name of assay to use. Default is \code{NULL},
#' which will use scaled assay by default. 
#' @param color Keys for annotations of observations/cells or variables/genes.
#' @param title Provide title for panels either as string or list of strings
#' @param legend Location of legend, either 'on data', 'right margin' or a 
#' valid keyword for the loc parameter of Legend.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyUMAP(sce, useReducedDim = "scanpyPCA")
#' plotScanpyEmbedding(sce, reducedDimName = "scanpyUMAP", color = 'Scanpy_louvain_1')
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyEmbedding <- function(inSCE,
                                reducedDimName,
                                useAssay = NULL,
                                color = NULL,
                                legend = 'right margin',
                                title = ''){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if(!reducedDimName %in% reducedDimNames(inSCE)){
    stop(
      "Embedding result not found. Please run the 'runScanpyUMAP' or 
      'runScanpyTSNE' first."
    )
  }
  useAssay <- metadata(inSCE)$sctk$runDimReduce$reddim[[reducedDimName]]$useAssay
  if(is.null(useAssay)){
    useAssay <- "scanpyScaledData"
  }
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE,
                                             X_name = useAssay,
                                             assays = FALSE,
                                             colData = TRUE,
                                             rowData = TRUE,
                                             varm = TRUE,
                                             reducedDims = reducedDimName,
                                             metadata = FALSE,
                                             colPairs = FALSE,
                                             rowPairs = FALSE,
                                             skip_assays = FALSE,
                                             verbose = NULL)
  
  return(sc$pl$embedding(scanpyObject,
                         basis = reducedDimName,
                         color = color,
                         legend_loc = legend,
                         title = title))
  
}

###################################################
########## Marker Genes function ###################
###################################################

#' runScanpyFindMarkers
#'
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param nGenes The number of genes that appear in the returned tables. 
#' Defaults to all genes.
#' @param useAssay Specify the name of the assay to use for computation
#'  of marker genes. It is recommended to use log normalized assay. 
#' @param colDataName colData to use as the key of the observations grouping to 
#' consider.
#' @param group1 Name of group1. Subset of groups, to which comparison shall be 
#' restricted, or 'all' (default), for all groups.
#' @param group2 Name of group2. If 'rest', compare each group to the union of 
#' the rest of the group. If a group identifier, compare with respect to this 
#' group. Default is 'rest'
#' @param test Test to use for DE. Default \code{"t-test"}.
#' @param corr_method p-value correction method. Used only for 't-test', 
#' 't-test_overestim_var', and 'wilcoxon'.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyFindMarkers(sce, colDataName = "Scanpy_louvain_1" )
#' }
#' @return A \code{SingleCellExperiment} object that contains marker genes
#' populated in a data.frame stored inside metadata slot.
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
runScanpyFindMarkers <- function(inSCE,
                                 nGenes = NULL,
                                 useAssay = "scanpyNormData",
                                 colDataName,
                                 group1 = "all",
                                 group2 = "rest",
                                 test = c("wilcoxon", "t-test", "t-test_overestim_var", "logreg"),
                                 corr_method = c("benjamini-hochberg", "bonferroni")) {
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  test <- match.arg(test)
  corr_method <- match.arg(corr_method)
  
  #store results in a temporary sce object
  tmpSCE <- SingleCellExperiment(
    assays = list(counts = counts(inSCE), temp = as.matrix(assay(inSCE, useAssay))))
  # store back colData from sce into the tmpSCE colData slot
  colData(tmpSCE)[colDataName] <- as.character(colData(inSCE)[[colDataName]])
  rowData(tmpSCE)$id <- rownames(inSCE)
  
  scanpyObject <- zellkonverter::SCE2AnnData(sce = tmpSCE, X_name = "temp")
  if(!is.null(nGenes)){
    nGenes = as.integer(nGenes)
  }
  sc$tl$rank_genes_groups(scanpyObject, 
                          groupby = colDataName, 
                          groups = group1,
                          reference = group2,
                          method = test, 
                          n_genes = nGenes,
                          corr_method = corr_method)
  
  py <- reticulate::py
  py$scanpyObject <- scanpyObject
  reticulate::py_run_string("import pandas as pd")
  reticulate::py_run_string(
    "names = pd.DataFrame(scanpyObject.uns['rank_genes_groups']['names'])", 
    convert = TRUE)
  reticulate::py_run_string(
    "logFoldChanges = pd.DataFrame(scanpyObject.uns['rank_genes_groups']['logfoldchanges'])", 
    convert = TRUE)
  reticulate::py_run_string(
    "pvals_adj = pd.DataFrame(scanpyObject.uns['rank_genes_groups']['pvals_adj'])", 
    convert = TRUE)
  reticulate::py_run_string(
    "scores = pd.DataFrame(scanpyObject.uns['rank_genes_groups']['scores'])",
    convert = TRUE)
  
  markerGenesNames <- utils::stack(py$names)
  colnames(markerGenesNames) <- c("Gene","findMarker_cluster")
  Log2_FC <- unlist(py$logFoldChanges)
  Pvalue <- unlist(py$pvals_adj)
  zscore <- unlist(py$scores)
  
  
  markerGenesTable <- data.frame()
  markerGenesTable <- cbind(markerGenesNames, Log2_FC, Pvalue, zscore)
  
  S4Vectors::metadata(inSCE)$"findMarkerScanpyObject" <- scanpyObject
  S4Vectors::metadata(inSCE)$scanpyMarkersTable <- markerGenesTable
  return(inSCE)
}

#' plotScanpyMarkerGenes
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param groups The groups for which to show the gene ranking. Default \code{NULL}
#' means that all groups will be considered. 
#' @param nGenes Number of genes to show. Default \code{10}
#' @param nCols Number of panels shown per row. Default \code{4}
#' @param sharey Controls if the y-axis of each panels should be shared. 
#' Default \code{FALSE} allows each panel to have its own y-axis range.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyFindMarkers(sce, colDataName = "Scanpy_louvain_1" )
#' plotScanpyMarkerGenes(sce, groups = '0')
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyMarkerGenes <- function(inSCE,
                                  groups = NULL,
                                  nGenes = 10,
                                  nCols = 4,
                                  sharey = FALSE){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if(is.null(metadata(inSCE)[["findMarkerScanpyObject"]])){
    stop("Marker genes not found. Please run the 'runScanpyFindMarkers' function first.")
  }
  scanpyObject <- metadata(inSCE)[["findMarkerScanpyObject"]]
  return(sc$pl$rank_genes_groups(scanpyObject, 
                                 groups = groups, 
                                 n_genes = as.integer(nGenes),
                                 ncols = as.integer(nCols), 
                                 sharey = sharey))
  
}

#' plotScanpyMarkerGenesViolin
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param groups The groups for which to show the gene ranking. Default \code{NULL}
#' means that all groups will be considered. 
#' @param features List of genes to plot. Is only useful if interested in a 
#' custom gene list
#' @param nGenes Number of genes to show. Default \code{10}
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyFindMarkers(sce, colDataName = "Scanpy_louvain_1" )
#' plotScanpyMarkerGenesViolin(sce, groups = '0')
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyMarkerGenesViolin <- function(inSCE,
                                        groups = NULL,
                                        features = NULL,
                                        nGenes = 10){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if(is.null(metadata(inSCE)["findMarkerScanpyObject"])){
    stop("Marker genes not found. Please run the 'runScanpyFindMarkers' function first.")
  }
  scanpyObject <- metadata(inSCE)[["findMarkerScanpyObject"]]
  return(sc$pl$rank_genes_groups_violin(scanpyObject,
                                        groups = groups,
                                        gene_names = features,
                                        n_genes = as.integer(nGenes)))
  
}

#' plotScanpyMarkerGenesHeatmap
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param groups The groups for which to show the gene ranking. Default \code{NULL}
#' means that all groups will be considered.
#' @param groupBy The key of the observation grouping to consider. By default, 
#' the groupby is chosen from the rank genes groups parameter.
#' @param nGenes Number of genes to show. Default \code{10}
#' @param features Genes to plot. Sometimes is useful to pass a specific list of
#'  var names (e.g. genes). The var_names could be a dictionary or a list. 
#' @param log2fcThreshold Only output DEGs with the absolute values of log2FC
#' larger than this value. Default \code{NULL}.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyFindMarkers(sce, colDataName = "Scanpy_louvain_1" )
#' plotScanpyMarkerGenesHeatmap(sce, groupBy = 'Scanpy_louvain_1')
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyMarkerGenesHeatmap <- function(inSCE,
                                         groups = NULL,
                                         groupBy,
                                         nGenes = 10,
                                         features = NULL,
                                         log2fcThreshold = NULL){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if(is.null(metadata(inSCE)["findMarkerScanpyObject"])){
    stop("Marker genes not found. Please run the 'runScanpyFindMarkers' function first.")
  }
  scanpyObject <- metadata(inSCE)[["findMarkerScanpyObject"]]
  
  if(!is.null(features)){
    return(sc$pl$rank_genes_groups_heatmap(scanpyObject,
                                           groups = groups,
                                           groupby = groupBy,
                                           var_names = features,
                                           min_logfoldchange = log2fcThreshold,
                                           show_gene_labels = TRUE,
                                           dendrogram = FALSE))
  }
  else
    return(sc$pl$rank_genes_groups_heatmap(scanpyObject,
                                           groups = groups,
                                           groupby = groupBy,
                                           n_genes = as.integer(nGenes),
                                           var_names = NULL,
                                           min_logfoldchange = log2fcThreshold,
                                           show_gene_labels = TRUE,
                                           dendrogram = FALSE))
  
}

#' plotScanpyMarkerGenesDotPlot
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param groups The groups for which to show the gene ranking. Default \code{NULL}
#' means that all groups will be considered.
#' @param nGenes Number of genes to show. Default \code{10}
#' @param groupBy The key of the observation grouping to consider. By default, 
#' the groupby is chosen from the rank genes groups parameter.
#' @param log2fcThreshold Only output DEGs with the absolute values of log2FC
#' larger than this value. Default \code{NULL}.
#' @param parameters The options for marker genes results to plot are: 
#' ‘scores’, ‘logfoldchanges’, ‘pvals’, ‘pvals_adj’, ‘log10_pvals’, ‘log10_pvals_adj’.
#' If NULL provided then it uses mean gene value to plot.  
#' @param standardScale Whether or not to standardize the given dimension 
#' between 0 and 1, meaning for each variable or group, subtract the minimum and 
#' divide each by its maximum. Default \code{NULL} means that it doesn't perform
#' any scaling. 
#' @param features Genes to plot. Sometimes is useful to pass a specific list of
#'  var names (e.g. genes) to check their fold changes or p-values, instead of 
#'  the top/bottom genes. The gene names could be a dictionary or a list. 
#'  Default \code{NULL}
#' @param title Provide title for the figure.
#' @param vmin The value representing the lower limit of the color scale. 
#' Values smaller than vmin are plotted with the same color as vmin. 
#' Default \code{NULL}
#' @param vmax The value representing the upper limit of the color scale. 
#' Values larger than vmax are plotted with the same color as vmax. 
#' Default \code{NULL}
#' @param colorBarTitle Title for the color bar. 
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyFindMarkers(sce, colDataName = "Scanpy_louvain_1" )
#' plotScanpyMarkerGenesDotPlot(sce, groupBy = 'Scanpy_louvain_1')
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyMarkerGenesDotPlot <- function(inSCE,
                                         groups = NULL,
                                         nGenes = 10, 
                                         groupBy,
                                         log2fcThreshold = NULL,
                                         parameters = "logfoldchanges",
                                         standardScale = NULL,
                                         features = NULL,
                                         title = '',
                                         vmin = NULL,
                                         vmax = NULL,
                                         colorBarTitle = "log fold change"){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if(is.null(metadata(inSCE)["findMarkerScanpyObject"])){
    stop("Marker genes not found. Please run the 'runScanpyFindMarkers' function first.")
  }
  scanpyObject <- metadata(inSCE)[["findMarkerScanpyObject"]]
  
  if(!is.null(features)){
    return(sc$pl$rank_genes_groups_dotplot(scanpyObject,
                                           groups = groups,
                                           groupby = groupBy,
                                           min_logfoldchange = log2fcThreshold,
                                           values_to_plot = parameters,
                                           standard_scale = standardScale,
                                           var_names = features,
                                           title = title,
                                           vmin = vmin,
                                           vmax = vmax,
                                           cmap = 'bwr',
                                           dendrogram = FALSE,
                                           colorbar_title = colorBarTitle))
  }
  else
    return(sc$pl$rank_genes_groups_dotplot(scanpyObject,
                                           groups = groups,
                                           n_genes = as.integer(nGenes),
                                           groupby = groupBy,
                                           min_logfoldchange = log2fcThreshold,
                                           values_to_plot = parameters,
                                           standard_scale = standardScale,
                                           var_names = NULL,
                                           title = title,
                                           vmin = vmin,
                                           vmax = vmax,
                                           cmap = 'bwr',
                                           dendrogram = FALSE,
                                           colorbar_title = colorBarTitle))
  
  
  
}

#' plotScanpyMarkerGenesMatrixPlot
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param groups The groups for which to show the gene ranking. Default \code{NULL}
#' means that all groups will be considered.
#' @param nGenes Number of genes to show. Default \code{10}
#' @param groupBy The key of the observation grouping to consider. By default, 
#' the groupby is chosen from the rank genes groups parameter.
#' @param log2fcThreshold Only output DEGs with the absolute values of log2FC
#' larger than this value. Default \code{NULL}.
#' @param parameters The options for marker genes results to plot are: 
#' ‘scores’, ‘logfoldchanges’, ‘pvals’, ‘pvals_adj’, ‘log10_pvals’, ‘log10_pvals_adj’.
#' If NULL provided then it uses mean gene value to plot.  
#' @param standardScale Whether or not to standardize the given dimension 
#' between 0 and 1, meaning for each variable or group, subtract the minimum and 
#' divide each by its maximum. Default \code{NULL} means that it doesn't perform
#' any scaling. 
#' @param features Genes to plot. Sometimes is useful to pass a specific list of
#'  var names (e.g. genes) to check their fold changes or p-values, instead of 
#'  the top/bottom genes. The var_names could be a dictionary or a list. 
#'  Default \code{NULL}
#' @param title Provide title for the figure.
#' @param vmin The value representing the lower limit of the color scale. 
#' Values smaller than vmin are plotted with the same color as vmin. 
#' Default \code{NULL}
#' @param vmax The value representing the upper limit of the color scale. 
#' Values larger than vmax are plotted with the same color as vmax. 
#' Default \code{NULL}
#' @param colorBarTitle Title for the color bar. 
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyFindMarkers(sce, colDataName = "Scanpy_louvain_1" )
#' plotScanpyMarkerGenesMatrixPlot(sce, groupBy = 'Scanpy_louvain_1')
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyMarkerGenesMatrixPlot <- function(inSCE,
                                            groups = NULL,
                                            nGenes = 10, 
                                            groupBy,
                                            log2fcThreshold = NULL,
                                            parameters = "logfoldchanges",
                                            standardScale = 'var',
                                            features = NULL,
                                            title = '',
                                            vmin = NULL,
                                            vmax = NULL,
                                            colorBarTitle = "log fold change"){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  if(is.null(metadata(inSCE)["findMarkerScanpyObject"])){
    stop("Marker genes not found. Please run the 'runScanpyFindMarkers' function first.")
  }
  scanpyObject <- metadata(inSCE)[["findMarkerScanpyObject"]]
  
  if(!is.null(features)){
    return(sc$pl$rank_genes_groups_matrixplot(scanpyObject,
                                              groups = groups,
                                              groupby = groupBy,
                                              min_logfoldchange = log2fcThreshold,
                                              values_to_plot = parameters,
                                              standard_scale = standardScale,
                                              var_names = features,
                                              title = title,
                                              vmin = vmin,
                                              vmax = vmax,
                                              cmap = 'bwr',
                                              dendrogram = FALSE,
                                              colorbar_title = colorBarTitle))
  }
  else
    return(sc$pl$rank_genes_groups_matrixplot(scanpyObject,
                                              groups = groups,
                                              n_genes = as.integer(nGenes),
                                              groupby = groupBy,
                                              min_logfoldchange = log2fcThreshold,
                                              values_to_plot = parameters,
                                              standard_scale = standardScale,
                                              var_names = NULL,
                                              title = title,
                                              vmin = vmin,
                                              vmax = vmax,
                                              cmap = 'bwr',
                                              dendrogram = FALSE,
                                              colorbar_title = colorBarTitle))
  
  
}



###############################################
####### General plotting functions ############
################################################

#' plotScanpyHeatmap
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param useAssay Assay to use for plotting. By default it will use counts
#' assay.
#' @param features Genes to plot. Sometimes is useful to pass a specific list of
#'  var names (e.g. genes). The var_names could be a dictionary or a list. 
#' @param groupBy The key of the observation grouping to consider.
#' @param standardScale Whether or not to standardize the given dimension 
#' between 0 and 1, meaning for each variable or group, subtract the minimum and 
#' divide each by its maximum. Default \code{NULL} means that it doesn't perform
#' any scaling. 
#' @param vmin The value representing the lower limit of the color scale. 
#' Values smaller than vmin are plotted with the same color as vmin.
#' Default \code{NULL}
#' @param vmax The value representing the upper limit of the color scale. 
#' Values larger than vmax are plotted with the same color as vmax. 
#' Default \code{NULL}
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyUMAP(sce, useReducedDim = "scanpyPCA")
#' markers <- c("MALAT1" ,"RPS27" ,"CST3")
#' plotScanpyHeatmap(sce, features = markers, groupBy = 'Scanpy_louvain_1')
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyHeatmap <- function(inSCE,
                              useAssay = NULL,
                              features,
                              groupBy,
                              standardScale = 'var',
                              vmin = NULL,
                              vmax = NULL){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE,
                                             X_name = useAssay)
  
  return(sc$pl$heatmap(scanpyObject,
                       var_names = features,
                       groupby = groupBy,
                       standard_scale = standardScale,
                       vmin = vmin,
                       vmax = vmax,
                       show_gene_labels = TRUE,
                       dendrogram = FALSE))
  
}


#' plotScanpyDotPlot
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param useAssay Assay to use for plotting. By default it will use counts assay.
#' @param features Genes to plot. Sometimes is useful to pass a specific list of
#'  var names (e.g. genes). The var_names could be a dictionary or a list. 
#' @param groupBy The key of the observation grouping to consider.
#' @param standardScale Whether or not to standardize the given dimension 
#' between 0 and 1, meaning for each variable or group, subtract the minimum and 
#' divide each by its maximum. Default \code{NULL} means that it doesn't perform
#' any scaling. 
#' @param title Provide title for the figure.
#' @param vmin The value representing the lower limit of the color scale. 
#' Values smaller than vmin are plotted with the same color as vmin.
#' Default \code{NULL}
#' @param vmax The value representing the upper limit of the color scale. 
#' Values larger than vmax are plotted with the same color as vmax. 
#' Default \code{NULL}
#' @param title Provide title for the figure.
#' @param vmin The value representing the lower limit of the color scale. 
#' Values smaller than vmin are plotted with the same color as vmin. 
#' Default \code{NULL}
#' @param vmax The value representing the upper limit of the color scale. 
#' Values larger than vmax are plotted with the same color as vmax. 
#' Default \code{NULL}
#' @param colorBarTitle Title for the color bar. 
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyUMAP(sce, useReducedDim = "scanpyPCA")
#' markers <- c("MALAT1" ,"RPS27" ,"CST3")
#' plotScanpyDotPlot(sce, features = markers, groupBy = 'Scanpy_louvain_1')
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyDotPlot <- function(inSCE,
                              useAssay = NULL,
                              features,
                              groupBy,
                              standardScale = NULL,
                              title = '',
                              vmin = NULL,
                              vmax = NULL,
                              colorBarTitle = "Mean expression in group"){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE,
                                             X_name = useAssay)
  
  return(sc$pl$dotplot(scanpyObject,
                       var_names = features,
                       groupby = groupBy,
                       standard_scale = standardScale,
                       title = title,
                       vmin = vmin,
                       vmax = vmax,
                       cmap = 'bwr',
                       dendrogram = FALSE,
                       colorbar_title = colorBarTitle))
  
  
}

#' plotScanpyMatrixPlot
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param useAssay Assay to use for plotting. By default it will use counts assay.
#' @param features Genes to plot. Sometimes is useful to pass a specific list of
#'  var names (e.g. genes). The var_names could be a dictionary or a list. 
#' @param groupBy The key of the observation grouping to consider.
#' @param standardScale Whether or not to standardize the given dimension 
#' between 0 and 1, meaning for each variable or group, subtract the minimum and 
#' divide each by its maximum. Default \code{NULL} means that it doesn't perform
#' any scaling. 
#' @param title Provide title for the figure.
#' @param vmin The value representing the lower limit of the color scale. 
#' Values smaller than vmin are plotted with the same color as vmin.
#' Default \code{NULL}
#' @param vmax The value representing the upper limit of the color scale. 
#' Values larger than vmax are plotted with the same color as vmax. 
#' Default \code{NULL}
#' @param title Provide title for the figure.
#' @param vmin The value representing the lower limit of the color scale. 
#' Values smaller than vmin are plotted with the same color as vmin. 
#' Default \code{NULL}
#' @param vmax The value representing the upper limit of the color scale. 
#' Values larger than vmax are plotted with the same color as vmax. 
#' Default \code{NULL}
#' @param colorBarTitle Title for the color bar. 
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyUMAP(sce, useReducedDim = "scanpyPCA")
#' markers <- c("MALAT1" ,"RPS27" ,"CST3")
#' plotScanpyMatrixPlot(sce, features = markers, groupBy = 'Scanpy_louvain_1')
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyMatrixPlot <- function(inSCE,
                              useAssay = NULL,
                              features,
                              groupBy,
                              standardScale = NULL,
                              title = '',
                              vmin = NULL,
                              vmax = NULL,
                              colorBarTitle = "Mean expression in group"){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE,
                                             X_name = useAssay)
  
  return(sc$pl$matrixplot(scanpyObject,
                       var_names = features,
                       groupby = groupBy,
                       standard_scale = standardScale,
                       title = title,
                       vmin = vmin,
                       vmax = vmax,
                       cmap = 'bwr',
                       dendrogram = FALSE,
                       colorbar_title = colorBarTitle))
  
  
}


#' plotScanpyViolin
#' 
#' @param inSCE Input \code{SingleCellExperiment} object.
#' @param useAssay Assay to use for plotting. By default it will use counts
#' assay.
#' @param features Genes to plot. Sometimes is useful to pass a specific list of
#'  var names (e.g. genes). The var_names could be a dictionary or a list. 
#' @param groupBy The key of the observation grouping to consider.
#' @param xlabel Label of the x axis. Defaults to groupBy.
#' @param ylabel Label of the y axis. If NULL and groupBy is NULL, 
#' defaults to 'value'. If NULL and groupBy is not NULL, defaults to features.
#' @examples
#' data(scExample, package = "singleCellTK")
#' \dontrun{
#' sce <- runScanpyNormalizeData(sce, useAssay = "counts")
#' sce <- runScanpyFindHVG(sce, useAssay = "scanpyNormData", method = "seurat")
#' sce <- runScanpyScaleData(sce, useAssay = "scanpyNormData")
#' sce <- runScanpyPCA(sce, useAssay = "scanpyScaledData")
#' sce <- runScanpyFindClusters(sce, useReducedDim = "scanpyPCA")
#' sce <- runScanpyUMAP(sce, useReducedDim = "scanpyPCA")
#' markers <- c("MALAT1" ,"RPS27" ,"CST3")
#' plotScanpyViolin(sce, features = markers, groupBy = "Scanpy_louvain_1")
#' }
#' @return plot object
#' @export
#' @importFrom reticulate py_module_available py_set_seed import
plotScanpyViolin <- function(inSCE,
                             useAssay = NULL,
                             features,
                             groupBy, 
                             xlabel = '', 
                             ylabel = NULL){
  
  if (!reticulate::py_module_available(module = "scanpy")) {
    warning("Cannot find python module 'scanpy', please install Conda and",
            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
            "If one of these have been previously run to install the modules,",
            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
            " respectively, if R has been restarted since the module installation.",
            " Alternatively, Scanpy can be installed on the local machine",
            "with pip (e.g. pip install scanpy) and then the 'use_python()'",
            " function from the 'reticulate' package can be used to select the",
            " correct Python environment.")
    return(inSCE)
  }
  
  scanpyObject <- zellkonverter::SCE2AnnData(sce = inSCE,
                                             X_name = useAssay)
  
  return(sc$pl$violin(scanpyObject,
                      keys = features,
                      groupby = groupBy,
                      xlabel = xlabel,
                      ylabel = ylabel))
  
}
compbiomed/singleCellTK documentation built on May 8, 2024, 6:58 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
compbiomed/singleCellTK
Comprehensive and Interactive Analysis of Single Cell RNA-Seq Data

R/scanpyFunctions.R
In compbiomed/singleCellTK: Comprehensive and Interactive Analysis of Single Cell RNA-Seq Data

R Package Documentation

Browse R Packages

We want your feedback!

compbiomed/singleCellTK Comprehensive and Interactive Analysis of Single Cell RNA-Seq Data

R/scanpyFunctions.R In compbiomed/singleCellTK: Comprehensive and Interactive Analysis of Single Cell RNA-Seq Data

R Package Documentation

Browse R Packages

We want your feedback!

compbiomed/singleCellTK
Comprehensive and Interactive Analysis of Single Cell RNA-Seq Data

R/scanpyFunctions.R
In compbiomed/singleCellTK: Comprehensive and Interactive Analysis of Single Cell RNA-Seq Data