SPATA2: A Toolbox for Spatial Expression Analysis

Documented in identify_obs_in_polygon identify_obs_in_spat_ann identifySpatialOutliers identifyVariableMolecules idSA idST include_tissue_outline increase_n_data_points increase_polygon_vertices interpolate_points_along_path intersect_polygons is_area is_area_pixel is_area_si is_dist is_dist_pixel is_dist_si is_inside_plg is_outlier isSPATA2 is_unit_area is_unit_dist polygon_inside_polygon polygon_intersects_polygon

# id ----------------------------------------------------------------------

identify_artefact_threshold <- function(numbers) {
  # Calculate the median and MAD
  median_value <- median(numbers)
  mad_value <- mad(numbers)

  # Calculate the threshold multiplier based on the MAD
  threshold_multiplier <- 3.5  # Adjust this value based on your needs
  if (mad_value > 0) {
    threshold_multiplier <- qnorm(0.75) * (median(abs(numbers - median_value)) / mad_value)
  }

  # Calculate the artifact threshold based on the median and MAD
  artifact_threshold <- median_value + threshold_multiplier * mad_value

  # Return the calculated artifact threshold and threshold multiplier
  return(list(threshold = artifact_threshold, threshold_multiplier = threshold_multiplier))
}


identify_zero_inflated_variables <- function(df,
                                             variables,
                                             coef = 1.5,
                                             verbose = TRUE){

  confuns::check_one_of(
    input = variables,
    against = base::colnames(df)
  )

  confuns::give_feedback(
    msg = glue::glue("Identifying zero inflated variables."),
    verbose = verbose
  )

  if(base::isTRUE(verbose)){

    pb <- confuns::create_progress_bar(total = base::length(variables))

  }

  zero_inflated <-
    purrr::map_lgl(
      .x = variables,
      .f = function(var){

        if(base::isTRUE(verbose)){ pb$tick() }

        outlier <- is_outlier(df[[var]], coef = coef)

        x <- df[[var]][!outlier]

        res <- base::all(x == 0)

        return(res)

      }
    )

  out <- variables[zero_inflated]

  return(out)

}


#' @title Identify observations inside a polygon
#'
#' @description This function determines whether points (observations) in a given data
#' frame are located inside a specified polygon.
#'
#' @param coords_df A data frame containing the coordinates of the observations.
#' Must contain columns specified in `cvars`.
#' @param polygon_df A data frame containing the coordinates of the polygon's vertices.
#' Must contain columns corresponding to those specified in `cvars`.
#' @param outline_df A data.frame as returned by [`getSpatAnnOutlineDf()`].
#' @param strictly A logical value indicating whether to consider only points strictly inside the polygon (`TRUE`)
#' or to include points on the border (`FALSE`).
#' @param opt A character string specifying the operation to perform on identified points.
#' Options are "keep" (default), "remove", or any other string to create a new column in `coords_df`
#' indicating whether each point is inside (`TRUE`) or outside (`FALSE`) the polygon.
#' @param cvars A character vector specifying the variable names in `coords_df` and `polygon_df`
#' that represent the x and y coordinates. Defaults to `c("x", "y")`.
#'
#' @return A modified version of `coords_df` based on the operation specified in `opt`.
#'
#' @details The function can be configured to strictly consider points within the polygon
#' or to include points on the border. Additionally, the function allows for either keeping
#' or removing the identified points, or marking them based on their position relative to the polygon.
#'
#' @examples
#' # Example data frames
#' coords_df <- data.frame(x = c(1, 2, 3), y = c(3, 2, 1))
#' polygon_df <- data.frame(x = c(1, 2, 3, 1), y = c(1, 2, 3, 1))
#'
#' # Identify and keep points inside the polygon
#' inside_points <- identify_obs_in_polygon(coords_df, polygon_df, strictly = TRUE)
#'
#' # Identify and remove points inside the polygon
#' outside_points <- identify_obs_in_polygon(coords_df, polygon_df, strictly = TRUE, opt = "remove")
#'
#' @export
identify_obs_in_polygon <- function(coords_df,
                                    polygon_df,
                                    strictly,
                                    opt = "keep",
                                    cvars = c("x", "y")){

  var.class <-
    purrr::map_chr(
      .x = cvars,
      .f = function(i){ return("numeric")}
    ) %>%
    purrr::set_names(nm = cvars)

  confuns::check_data_frame(
    df = polygon_df,
    var.class = var.class
  )

  confuns::check_data_frame(
    df = coords_df,
    var.class = var.class
  )

  res <-
    sp::point.in.polygon(
      point.x = coords_df[[cvars[1]]],
      point.y = coords_df[[cvars[2]]],
      pol.x = polygon_df[[cvars[1]]],
      pol.y = polygon_df[[cvars[2]]]
    )

  inside <- if(base::isTRUE(strictly)){ 1 } else { c(1,2,3) }

  if(opt == "keep"){

    coords_df <- coords_df[res %in% inside, ]

  } else if(opt == "remove"){

    coords_df <- coords_df[!res %in% inside, ]

  } else {

    coords_df[[opt]] <- res %in% inside

  }

  return(coords_df)

}

#' @rdname identify_obs_in_polygon
#' @export
identify_obs_in_spat_ann <- function(coords_df,
                                     outline_df,
                                     strictly,
                                     opt = "keep",
                                     cvars = c("x", "y")){

  # identify "hole" obs
  if(containsInnerBorders(outline_df)){

    inner_bcs <-
      purrr::map(
        .x =
          stringr::str_subset(outline_df$border, pattern = "inner") %>%
          base::unique(),
        .f = function(ib){

          dplyr::filter(outline_df, border == {{ib}}) %>%
            identify_obs_in_polygon(
              coords_df = coords_df,
              polygon_df = .,
              strictly = strictly,
              cvars = cvars,
              opt = "keep"
            ) %>%
            dplyr::pull(barcodes)

        }
      ) %>%
      purrr::flatten_chr()

  } else {

    inner_bcs <- base::character()

  }


  outer_df <- dplyr::filter(outline_df, border == "outer")

  core_bcs <-
    identify_obs_in_polygon(
      coords_df = coords_df,
      polygon_df = outer_df,
      strictly = strictly,
      opt = "keep"
    ) %>%
    dplyr::pull(barcodes)

  core_bcs <- core_bcs[!core_bcs %in% inner_bcs]

  if(opt == "keep"){

    coords_df <- dplyr::filter(coords_df, barcodes %in% {{core_bcs}})

  } else if(opt == "remove"){

    coords_df <- dplyr::filter(coords_df, !barcodes %in% {{core_bcs}})

  } else {

    coords_df[[opt]] <- coords_df$barcodes %in% core_bcs

  }

  return(coords_df)

}


#' @title Quick access to IDs
#'
#' @description Handy functions to access the ID of a spatial annotation
#' or a spatial trajectory if there exist only one of each in the object. Mostly
#' used to define the default of dependent functions. Return an error if there
#' are no or more than one IDs found.
#'
#' @inherit argument_dummy params
#'
#' @return Character value.
#' @export
#' @keywords internal

idSA <- function(object, verbose = NULL){

  hlpr_assign_arguments(object)

  id <- getSpatAnnIds(object)

  if(base::length(id) == 0){

    stop("No spatial annotations found in this object.")

  } else if(base::length(id) > 1){

    stop("More than one spatial annotation found in this object. Please specify argument `ids`.")

  }

  confuns::give_feedback(
    msg = glue::glue("Spatial annotation: '{id}'"),
    verbose = verbose
  )

  return(id)

}

#' @rdname idSA
#' @export
idST <- function(object, verbose = NULL){

  hlpr_assign_arguments(object)

  id <- getSpatialTrajectoryIds(object)

  if(base::length(id) == 0){

    stop("No spatial trajectories found in this object.")

  } else if(base::length(id) > 1){

    stop("More than one spatial trajectories found in this object. Please specify argument `id`.")

  }

  confuns::give_feedback(
    msg = glue::glue("Spatial trajectory: '{id}'"),
    verbose = verbose
  )

  return(id)

}


#' @title Identifies the background color
#'
#' @description Identifies the background color based on the results
#' of [`identifyPixelContent()`] by averaging the color values of
#' all pixels identified as background.
#'
#' @inherit argument_dummy params
#' @inherit update_dummy params
#'
#' @export
#' @keywords internal
#'
setGeneric(name = "identifyBackgroundColor", def = function(object, ...){

  standardGeneric(f = "identifyBackgroundColor")

})

#' @rdname identifyBackgroundColor
#' @export
setMethod(
  f = "identifyBackgroundColor",
  signature = "SPATA2",
  definition = function(object, img_name = activeImage(object), verbose = NULL, ...){

    hlpr_assign_arguments(object)

    sp_data <- getSpatialData(object)

    sp_data <- identifyBackgroundColor(sp_data, img_name = img_name, verbose = verbose)

    object <- setSpatialData(object, sp_data = sp_data)

    returnSpataObject(object)

  }
)

#' @rdname identifyBackgroundColor
#' @export
setMethod(
  f = "identifyBackgroundColor",
  signature = "SpatialData",
  definition = function(object, img_name = activeImage(object), verbose = TRUE, ...){

    if(base::is.null(img_name)){

      img_name <- activeImage(object)

    }

    confuns::check_one_of(
      input = img_name,
      against = getImageNames(object)
    )

    for(i in base::seq_along(img_name)){

      hist_img <- getHistoImage(object, img_name = img_name[i])

      hist_img <- identifyBackgroundColor(hist_img)

      object <- setHistoImage(object, hist_img = hist_img)

    }

    return(object)

  }
)

#' @rdname identifyBackgroundColor
#' @export
setMethod(
  f = "identifyBackgroundColor",
  signature = "HistoImage",
  definition = function(object, verbose = TRUE, ...){

    confuns::give_feedback(
      msg = glue::glue("Identifying background color for image '{object@name}'."),
      verbose = verbose
    )

    col_df <-
      getPixelDf(object, colors = TRUE, content = TRUE, transform = FALSE) %>%
      dplyr::filter(content == "background") %>%
      dplyr::summarise(
        dplyr::across(
          .cols = dplyr::starts_with("col"),
          .fns = ~ base::mean(.x, na.rm = TRUE)
        )
      ) %>%
      magrittr::set_colnames(value = c("red", "green", "blue"))

    object@bg_color <-
      grDevices::rgb(
        red = col_df$red,
        green = col_df$green,
        blue = col_df$blue
      )

    return(object)

  })


#' @title Identify pixel content
#'
#' @description Determines the type of content displayed by each pixel in the image,
#' categorizing it as tissue from tissue segments or fragments, artifacts, or background.
#'
#' @param method Character value. The method to use. Either *'otsu'* or *'sps'*. See details
#' for more information. Defaults to *'otsu'*. (*'sps'* takes significantly longer, only
#' recommened if *'otsu'* does not provide any useful results).
#' @param superpixel Numeric value specifying the number of superpixels to compute.
#' Given as an argument to `$spixel_segmentation()` function. Increased values can
#' improve the output but increase runtime.
#' @param compactness_factor Numeric value controlling the compactness of superpixels.
#' Given as an argument to `$spixel_segmentation()` function.
#' @param eps Numeric value specifying the value of `eps` parameter used in `dbscan::dbscan()`
#' when applied on the tissue pixels. If the value is less than 1, it is calculated
#' as a percentage of the width or height of the image, depending on which is larger.
#' If the value is greater than or equal to 1, it is taken as an absolute value.
#'
#' Defaults to 1.414*2*2, twice the distance between two diagonally adjacent pixels in a
#' uniform pixel space.
#'
#' @param minPts Numeric value specifying the value of `minPts` parameter used in `dbscan::dbscan()`
#' when applied on the tissue pixels identified as potential tissue. If the value is less than 1,
#' it is calculated as a percentage of the width or height of the image, depending on which is larger.
#' If the value is greater than or equal to 1, it is taken as an absolute value.
#'
#' Defaults to 0.005, which is 0.5% of the total number of pixels.
#'
#' @param frgmt_threshold Numeric vector of length 2 specifying the range of the number of pixels
#' an identified non-background-object in the image must have to be considered a tissue fragment. Objects with a lower number
#' of pixels than the minimum threshold are considered artifacts, and objects with a higher number
#' of pixels than the maximum threshold are considered tissue sections. If a threshold value is less than 1,
#' it is calculated as a percentage of the total number of pixels in the image.
#' If a threshold value is greater than or equal to 1, it is taken as an absolute value.
#'
#' @param capture_area Data.frame or `NULL`. If data.frame contains the vertices of the polygon
#' that corresponds to the capture area in which the tissue is located in. Data.frame should have
#' at least three rows with variables idx (integer), x (numeric), y (numeric).
#'
#' @param use_capture_area Logical value. If `TRUE`, the capture area as obtained by [`getCaptureArea()`]
#' is used to guide the analysis. Defaults to `TRUE` as long as the object contains a valid
#' capture area data.frame.
#'
#' @param bg_dark Logical value. Should be set to `TRUE`, if the background is darker than
#' the tissue. Defaults to `FALSE`.
#'
#' @inherit make_binary_image params
#' @inherit background_white params details
#' @inherit argument_dummy params
#'
#' @note If `img_name` specifies multiple images, the function
#' iterates over all of them. If it is `NULL` the active image is picked.
#'
#' @seealso
#' For subsequent image processing: [`identifyTissueOutline(..., method = 'image')`].
#' For visualization of results: [`plotImageMask()`], [`plotPixelContent()`].
#' For extraction of results: [`getPixelDf()`].
#'
#' @details
#' This function classifies pixels in an image as tissue, background, or artifacts using two primary methods:
#' Otsu's method or Superpixel Segmentation (SPS), followed by DBSCAN clustering for refinement.
#'
#' \itemize{
#'   \item \strong{Otsu's Method:} Applies global thresholding to separate tissue from the background
#'    using Otsu's method, which minimizes intra-class variance.
#'    Parameters that affect the output: `sigma`
#'   \item \strong{Superpixel Segmentation (SPS):} Divides the image into superpixels using the SLIC algorithm.
#'    Superpixels are classified into tissue or background using k-means clustering.
#'    Parameters that affect the output: `percentile`, `compactness_factor`, `superpixel`
#' }
#'
#' The resulting binary image is refined with DBSCAN clustering to identify contiguous
#' tissue sections, fragments, and artifacts.
#'
#' @references
#' Nobuyuki Otsu, "A threshold selection method from gray-level histograms".
#' IEEE Trans. Sys., Man., Cyber. 9 (1): 62-66. doi:10.1109/TSMC.1979.4310076 (1979)
#'
#' @return The method for class `Image` returns a data.frame of the following
#' variables.
#'
#' \itemize{
#'  \item{*pixel*:}{ character. Pixel index.}
#'  \item{*width*:}{ numeric. Pixel position on horizontal axis of the image.}
#'  \item{*height*:}{ numeric. Pixel position on the vertical axis of the image.}
#'  \item{*clusterK2*:}{ character. Either *'background'* or *'tissue'*. (if *method = 'sps'*)}
#'  \item{*colTiss#* :}{ numeric. Numeric variables that correspond to the color dimensions
#'  of the image mask based on which the clustering of *clusterK2* was conducted. (if *method = 'sps'*)}
#'  \item{*clusterDBSCAN*:}{ character. Cluster results of dbscan::dbscan() after removal
#'  of background pixels.}
#'  \item{*clusterDBSCAN_size*:}{numeric. Size of each dbscan cluster.}
#'  \item{*content*:}{ character. The identified content of each pixel.}
#' }
#'
#' Methods for S4-classes serving as containers return the input object with the
#' the results stored in the corresponding slots.

setGeneric(name = "identifyPixelContent", def = function(object, ...){

  standardGeneric(f = "identifyPixelContent")

})

#' @rdname identifyPixelContent
#' @export
setMethod(
  f = "identifyPixelContent",
  signature = "SPATA2",
  definition = function(object,
                        method = "otsu",
                        img_name = activeImage(object),
                        sigma = 0,
                        percentile = 0,
                        compactness_factor = 10,
                        superpixel = 600,
                        eps = 1.414*2,
                        minPts = 0.005,
                        frgmt_threshold = c(0.001, 0.05),
                        use_capture_area = containsCaptureArea(object),
                        verbose = TRUE){

    if(base::is.null(img_name)){

      img_name <- activeImage(object)

    }

    sp_data <- getSpatialData(object)

    sp_data <-
      identifyPixelContent(
        object = sp_data,
        method = method,
        sigma = sigma,
        img_name = img_name,
        percentile = percentile,
        compactness_factor = compactness_factor,
        superpixel = superpixel,
        eps = eps,
        minPts = minPts,
        frgmt_threshold = frgmt_threshold,
        use_capture_area = use_capture_area,
        verbose = verbose
      )

    object <- setSpatialData(object, sp_data = sp_data)

    returnSpataObject(object)

  }
)

#' @rdname identifyPixelContent
#' @export
setMethod(
  f = "identifyPixelContent",
  signature = "SpatialData",
  definition = function(object,
                        method = "otsu",
                        sigma = 0,
                        img_name = activeImage(object),
                        percentile = 0,
                        compactness_factor = 10,
                        superpixel = 1000,
                        eps = 1.414*2214,
                        minPts = 0.005,
                        frgmt_threshold = c(0.001, 0.05),
                        use_capture_area = containsCaptureArea(object),
                        bg_dark = FALSE,
                        verbose = TRUE){

    containsHistoImages(object, error = TRUE)

    confuns::check_one_of(
      input = img_name,
      against = getImageNames(object)
    )

    for(i in base::seq_along(img_name)){

      hist_img <- getHistoImage(object, img_name = img_name[i])

      if(use_capture_area){

        capture_area <- getCaptureArea(object, img_name = img_name)

      } else {

        capture_area <- NULL

      }

      hist_img <-
        identifyPixelContent(
          object = hist_img,
          sigma = sigma,
          method = method,
          percentile = percentile,
          compactness_factor = compactness_factor,
          superpixel = superpixel,
          eps = eps,
          minPts = minPts,
          frgmt_threshold = frgmt_threshold,
          capture_area = capture_area,
          bg_dark = bg_dark,
          verbose = verbose
        )

      object <- setHistoImage(object, hist_img = hist_img)

    }

    return(object)

  })

#' @rdname identifyPixelContent
#' @export
setMethod(
  f = "identifyPixelContent",
  signature = "HistoImage",
  definition = function(object,
                        method = "otsu",
                        sigma = 0,
                        percentile = 0,
                        compactness_factor = 10,
                        superpixel = 1000,
                        eps = 1.414*2214,
                        minPts = 0.005,
                        frgmt_threshold = c(0.001, 0.05),
                        capture_area = NULL,
                        bg_dark = FALSE,
                        verbose = TRUE){

    confuns::give_feedback(
      msg = glue::glue("Identifying pixel content of image '{object@name}'."),
      verbose = verbose
    )

    if(!containsImage(object)){

      object <- loadImage(object)

    }

    pxl_df_out <-
      identifyPixelContent(
        object = getImage(object),
        sigma = sigma,
        method = method,
        percentile = percentile,
        compactness_factor = compactness_factor,
        superpixel = superpixel,
        eps = eps,
        minPts = minPts,
        frgmt_threshold = frgmt_threshold,
        capture_area = capture_area,
        bg_dark = bg_dark,
        verbose = verbose
      )

    out_vec <- pxl_df_out[["content"]]

    base::names(out_vec) <-
      stringr::str_c(pxl_df_out[["pixel"]], "_w", pxl_df_out[["width"]], "_h", pxl_df_out[["height"]])

    object@pixel_content <- out_vec

    return(object)

  }
)

#' @rdname identifyPixelContent
#' @export
setMethod(
  f = "identifyPixelContent",
  signature = "Image",
  definition = function(object,
                        method = "otsu",
                        sigma = 0,
                        percentile = 0,
                        compactness_factor = 10,
                        superpixel = 1000,
                        frgmt_threshold = c(0.001, 0.05),
                        eps = 1.414*2214,
                        minPts = 0.005,
                        capture_area = NULL,
                        bg_dark = FALSE,
                        verbose = TRUE,
                        ...){

    confuns::check_one_of(
      input = method,
      against = c("otsu", "sps")
    )

    image_orig <- object

    # extract image data and create base pixel df
    img_dims <- base::dim(image_orig@.Data)

    bg_val <- ifelse(bg_dark, yes = 0, no = 1)

    if(eps < 1){

      eps <- eps * base::max(img_dims[1:2])

    }

    if(minPts < 1){

      minPts <- minPts * base::max(img_dims[1:2])

    }

    pxl_df_base <- getPixelDf(object)

    # start algorithm
    if(method == "sps"){

      # use greyscaled image, if desired
      if(FALSE){

        # temporarily padd image to square for clahe()
        image_orig <- padd_image(image_orig)

        # use greyscale and enhance contrast, then reduce to original dims
        EBImage::colorMode(image_orig) <- EBImage::Grayscale
        image_orig <- EBImage::clahe(image_orig)

      }

      if(base::length(img_dims) == 3){

        n <- img_dims[3]

      } else {

        n <- 1

      }

      # increase contrast by setting potential background pixels to white
      if(percentile != 0){

        image_proc <- background_white(image_orig, percentile = percentile)

      } else {

        image_proc <- image_orig

      }


      # use slicap to create a binary image with a tissue mask
      if (!requireNamespace("SuperpixelImageSegmentation", quietly = TRUE)) {
        stop("Please install 'SuperpixelImageSegmentation' to identify the pixel content")
      }

      init <- SuperpixelImageSegmentation::Image_Segmentation$new()

      spx_masks <-
        init$spixel_segmentation(
          input_image = image_proc,
          method = "slic",
          compactness_factor = compactness_factor,
          superpixel = superpixel,
          verbose = verbose,
          # can not be adjusted
          AP_data = TRUE,
          kmeans_method = "kmeans",
          adjust_centroids_and_return_masks = TRUE
        )

      # potentially problematic:
      # assumes that all background pixel are identified as one cluster (what if heterogeneous background?)
      # assumes that the background is the cluster with the highest area / number of pixels
      # (as the tissue is usually composed of several different clusters each being small in size)
      # masks are presented in white (white value = 1, black value = 0)
      # ---> pick mask with highest mean to obtain background cluster
      mm <- purrr::map_dbl(spx_masks[["masks"]], .f = base::mean)

      mask_tissue <- base::which(mm == base::max(mm))

      image_mask <- EBImage::as.Image(spx_masks[["masks"]][[mask_tissue]])

      # extract the color values of the processed image
      for(i in 1:n){

        if (!requireNamespace("reshape", quietly = TRUE)) {
          stop("Please install 'reshape'")
        }

        temp_df <-
          reshape::melt(image_mask@.Data[ , ,i]) %>%
          magrittr::set_colnames(value = c("width", "height", stringr::str_c("colTiss", i))) %>%
          tibble::as_tibble()

        pxl_df_base <-
          dplyr::left_join(x = pxl_df_base, y = temp_df, by = c("width", "height")) %>%
          dplyr::filter(width <= img_dims[1], height <= img_dims[2])

      }

      # cluster color values with k = 2 in order to get background and tissue cluster
      k_out <-
        stats::kmeans(
          x = base::as.matrix(dplyr::select(pxl_df_base, dplyr::starts_with("colTiss"))),
          centers = 2
        )

      pxl_df_base$clusterK2 <- base::as.character(k_out$cluster)

      # identify background based on mean color intensity
      background_cluster <-
        dplyr::group_by(pxl_df_base, clusterK2) %>%
        dplyr::summarise(
          dplyr::across(
            .cols = dplyr::starts_with("col"),
            .fns = base::mean
          )
        )

      background_cluster[["rowMean"]] <-
        dplyr::select(background_cluster, dplyr::starts_with("col")) %>%
        base::as.matrix() %>%
        base::rowMeans()

      background_cluster_group <-
        dplyr::filter(background_cluster, rowMean == base::max(rowMean, na.rm = TRUE)) %>%
        dplyr::pull(clusterK2)

      pxl_df_base <-
        dplyr::mutate(
          .data = pxl_df_base,
          clusterK2 =
            dplyr::if_else(
              condition = clusterK2 == {background_cluster_group},
              true = "background",
              false = "tissue"
            )
        )

      # cluster pixel based on dbscan to identify possible tissue fragments
      pxl_df_tissue <-
        # 1. identify and remove background pixel, such that alleged tissue pixel remain
        dplyr::mutate(.data = pxl_df_base, background = clusterK2 == "background") %>%
        dplyr::filter(!background) %>%
        # 2. identify different tissue sections / parted tissue fragments / artefacts by ...
        # 2.1 ...running dbscan to identify contiguous pixel groups
        add_dbscan_variable(
          eps = eps,
          minPts = minPts,
          name = "clusterDBSCAN",
          x = "width",
          y = "height"
        ) %>%
        # 2.2 ... quantifying their size by counting the pixels per DSCAN group
        dplyr::group_by(clusterDBSCAN) %>%
        dplyr::mutate(clusterDBSCAN_size = dplyr::n()) %>%
        dplyr::ungroup()

      pxl_df_method <-
        dplyr::left_join(
          x = pxl_df_base,
          y = pxl_df_tissue[c("pixel", "background", "clusterDBSCAN", "clusterDBSCAN_size")],
          by = "pixel"
        )

    } else if(method == "otsu") {

      if(is.data.frame(capture_area)){

        pxl_df_base <- getPixelDf(object, colors = TRUE)

        pxl_df <-
          identify_obs_in_polygon(
            coords_df = dplyr::mutate(pxl_df_base, x = width, y = height),
            polygon_df = capture_area,
            strictly = TRUE,
            opt = "inside"
          )

        pxl_df$col1[!pxl_df$inside] <- bg_val
        pxl_df$col2[!pxl_df$inside] <- bg_val
        pxl_df$col3[!pxl_df$inside] <- bg_val

        object <- pixel_df_to_image(pxl_df)

      }

      # revert
      if(bg_dark){

        object[,,1:3] <- 1-object[,,1:3]

      }

      pxl_df_base <-
        object %>%
        #EBImage::flip() %>%
        EBImage::transpose() %>%
        make_binary_image(sigma = sigma) %>%
        make_pixel_dataframe() # creates background variable

      pxl_df_tissue <-
        dplyr::filter(pxl_df_base, !background) %>%
        # 2. identify different tissue sections / parted tissue fragments / artefacts by ...
        # 2.1 ...running dbscan to identify contiguous pixel groups
        add_dbscan_variable(
          eps = eps,
          minPts = minPts,
          name = "clusterDBSCAN",
          x = "width",
          y = "height"
        ) %>%
        # 2.2 ... quantifying their size by counting the pixels per DSCAN group
        dplyr::group_by(clusterDBSCAN) %>%
        dplyr::mutate(clusterDBSCAN_size = dplyr::n()) %>%
        dplyr::ungroup() %>%
        dplyr::select(-background)

      pxl_df_method <-
        dplyr::left_join(
          x = pxl_df_base,
          y = pxl_df_tissue[c("pixel", "clusterDBSCAN", "clusterDBSCAN_size")],
          by = "pixel"
        )

    }

    # set the frgmt threshold as an absolute measure based on the input
    threshold <- c(0, 0)

    for(i in 1:2){

      if(frgmt_threshold[i] > 1){

        threshold[i] <- frgmt_threshold[i]

      } else {

        threshold[i] <- base::nrow(pxl_df_base)*frgmt_threshold[i]

      }

    }

    threshold <- base::ceiling(threshold)

    # add results to base pxl_df
    pxl_df <-
      dplyr::mutate(
        .data = pxl_df_method,
        content = dplyr::case_when(
          clusterDBSCAN == "0" ~ "artefact",
          !background & clusterDBSCAN_size > {threshold[2]} ~ stringr::str_c("tissue_section", clusterDBSCAN),
          !background & clusterDBSCAN_size > {threshold[1]} ~ stringr::str_c("tissue_fragment", clusterDBSCAN),
          !background & clusterDBSCAN_size < {threshold[1]} ~ "artefact",
          TRUE ~ "background"
        ),
        content_type = stringr::str_remove(string = content, pattern = "\\d*$")
      ) %>%
      dplyr::arrange(dplyr::desc(content_type))

    pxl_df_out <-
      purrr::map_dfr(
        .x = base::unique(pxl_df[["content_type"]]),
        .f = function(ctype){

          df_ctype <- dplyr::filter(pxl_df, content_type == {{ctype}})

          if(ctype %in% c("background", "artefact")){

            out <-
              dplyr::mutate(
                .data = df_ctype,
                content_index = 1L,
                content_type = {{ctype}}
              )

          } else {

            smrd_df <-
              dplyr::group_by(df_ctype, content) %>%
              dplyr::summarise(mean_height = base::mean(height, na.rm = TRUE), .groups = "drop") %>%
              dplyr::arrange(mean_height) %>%
              dplyr::mutate(content_index = dplyr::row_number()) %>%
              dplyr::select(-mean_height)

            out <-
              dplyr::left_join(x = df_ctype, y = smrd_df, by = "content") %>%
              dplyr::mutate(
                content =
                  stringr::str_remove(content, pattern = "\\d*$") %>%
                  stringr::str_c(., content_index, sep = "_")
              )

          }

          # create levels
          levels_ordered <-
            dplyr::distinct(out, content, content_index) %>%
            dplyr::arrange(content_index) %>%
            dplyr::pull(content)

          out[["content"]] <- base::factor(out[["content"]], levels = levels_ordered)

          # sort factor

          return(out)

        }
      )

    return(pxl_df_out)

  }
)


#' @title Identify spatial outliers
#'
#' @description Labels data points as spatial outliers depending on certain
#' criteria with a new meta variable called *sp_outlier*. See details for more.
#'
#' Requires the results of [`identifyTissueOutline()`].
#'
#' @param method Character vector. The method(s) to use. A combination of *'obs'*
#' and/or *'image'*. Defaults to *'obs'*. See details for more.
#' @param img_name Character value. The name of the image whose tissue outline
#' is used if `method` contains *'image'*.
#' @param buffer Numeric value. Expands the tissue outline to include observations
#' that lie on the edge of the outline and are mistakenly removed.
#' @param eps,minPts Given to the corresponding arguments of
#' [`dbscan::dbscan()`] if `method` contains *'obs'*.
#' @param test Character value. Only required if `method = c('obs', 'image')`. If
#' *'any'*, spots are labeled as outliers if at least one method identifies them
#' as outliers. If *'all'*, spots are labeled as outliers if both methods identify
#' them as outliers.
#' @param min_section Numeric value. The minimum number of observations a spatial cluster must
#' contain such that the whole cluster is identified as a contiguous tissue section.
#'
#' @inherit argument_dummy params
#' @inherit dbscan::dbscan params
#' @inherit update_dummy return
#'
#' @details
#'
#' In case of `method = 'obs'`, the results from `identifyTissueOutline(object, method = 'obs')`
#' are directly transferred to the new meta feature *sp_outlier*, which declares
#' whether an observation is a spatial outlier or not.
#'
#' In case of `method = 'image'`, the image based tissue outline from the
#' `identifyTissueOutline()` function is used. This function has created polygons that
#' outline the tissue or tissue sections identified in the image. For each data point,
#' the function checks which polygon it falls within and assigns it to the corresponding
#' group. If an observation does not fall within any of the tissue polygons, it is
#' considered a spatial outlier.
#'
#' If `method = c('obs', 'image')`, the results of both methods are used Whether a
#' data point is considered a spatial outlier depends on the `test` argument:
#'
#' \itemize{
#'  \item{`test = 'any'`:} The data point is considered a spatial outlier if
#'   either of the two tests classifies it as an outlier.
#'  \item{`test = 'all'`:} The data point is considered a spatial outlier
#'   only if both tests classify it as an outlier.
#' }
#'
#' The results can be visualized using `plotSurface(object, color_by = "sp_outlier")`.
#' In case of bad results the function can be run over and over again with
#' changing parameters as the results are simply overwritten.
#'
#' @seealso [`identifyTissueOutline()`], [`mergeTissueSections()`]
#'
#' @export

identifySpatialOutliers <- function(object,
                                    method = "obs",
                                    img_name = activeImage(object),
                                    buffer = 0,
                                    eps = recDbscanEps(object),
                                    minPts = recDbscanMinPts(object),
                                    min_section = nObs(object)*0.05,
                                    test = "any",
                                    verbose = NULL){

  hlpr_assign_arguments(object)

  confuns::give_feedback(
    msg = "Identifying spatial outliers.",
    verbose = verbose
  )

  confuns::check_one_of(
    input = method,
    against = c("obs", "image")
  )

  confuns::check_one_of(
    input = test,
    against = c("all", "any")
  )

  # overwrite active image temporarily
  if(containsHistoImages(object)){

    active_image <- activeImage(object)
    object <- activateImageInt(object, img_name = img_name)

  }

  meta_df <- getMetaDf(object)

  if("obs" %in% method){

    if(!"tissue_section" %in% base::colnames(meta_df)){

      stop("Need output of `identifyTissueSection(object, method = 'obs').`")

    }

    meta_df$outlier_obs <- meta_df$tissue_section == "tissue_section_0"

    # check min_section
    meta_df <-
      dplyr::group_by(meta_df, tissue_section) %>%
      dplyr::mutate(n = dplyr::n()) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(
        outlier_obs = dplyr::if_else(n < {{min_section}}, TRUE, outlier_obs),
        n = NULL
        )

  }

  if("image" %in% method){

    containsTissueOutline(object, img_name = img_name, error = TRUE)

    outline_df <-
      getTissueOutlineDf(
        object = object,
        img_name = img_name,
        method = "image",
        by_section = TRUE
      )

    # declare all obs as artefacts
    coords_df <- getCoordsDf(object)
    coords_df$sample <- NULL
    coords_df[["section_outline"]] <- "artefact"

    # then set actual section name
    for(section in base::unique(outline_df$section)){

      section_df <-
        dplyr::filter(outline_df, section == {{section}})

      if(buffer != 0){

        section_df <-
          dplyr::select(section_df, x,y) %>%
          buffer_area(buffer = buffer)

      }

      ob_in_section <-
        identify_obs_in_polygon(
          coords_df = coords_df,
          polygon_df = section_df,
          strictly = FALSE # may lie on edge of outline -> allow
        ) %>%
        dplyr::pull(barcodes)

      coords_df[coords_df[["barcodes"]] %in% ob_in_section, "section_outline"] <- section

    }

    # make sure is NULL
    meta_df$section_outline <- NULL

    meta_df <-
      dplyr::left_join(x = meta_df, y = coords_df, by = "barcodes") %>%
      dplyr::mutate(outlier_image = section_outline == "artefact")

    meta_df <-
      dplyr::group_by(meta_df, section_outline) %>%
      dplyr::mutate(n = dplyr::n()) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(outlier_image = dplyr::if_else(n < {{min_section}}, TRUE, outlier_image))

  }

  if(base::all(c("obs", "image") %in% method)){

    if(test == "any"){

      meta_df <-
        dplyr::mutate(
          .data = meta_df,
          sp_outlier = outlier_obs | outlier_image
        )

    } else if(test == "all") {

      meta_df <-
        dplyr::mutate(
          .data = meta_df,
          sp_outlier = outlier_obs & outlier_image
        )

    }

  } else if(method == "obs"){

    meta_df$sp_outlier <- meta_df$outlier_obs

  } else if(method == "image"){

    meta_df$sp_outlier <- meta_df$outlier_image

  }

  n_outliers <- base::sum(meta_df$sp_outlier)

  confuns::give_feedback(
    msg = glue::glue("Spatial outliers: {n_outliers}"),
    verbose = verbose
  )

  object <- addFeatures(object, feature_df = meta_df, feature_names = "sp_outlier", overwrite = TRUE)

  if(containsHistoImages(object)){

    # restore original active image
    object <- activateImageInt(object, img_name = active_image)

  }

  return(object)

  returnSpataObject(object)

}


#' @title Identify tissue outline
#'
#' @description Identifies and stores the spatial boundaries of the tissue section(s).
#' Then it assigns \link[=concept_observations]{observations} to their respective section.
#' Note that information about the boundaries of the tissue is crucial for many SPATA2
#' intern function. This information should not be missing in the `SPATA2` object.
#' See details for more.
#'
#' @param method Character value. Defines the origin based on which the outline
#' is computed. Either *'image'* or *'obs'*.
#'
#' @inherit getPixelDf params
#' @inherit argument_dummy params
#' @inherit dbscan::dbscan params
#' @inherit update_dummy return
#'
#' @details
#' In case of `method = obs`, DBSCAN is applied to categorize the data points
#' of the object based on their spatial proximity, grouping those that are close
#' enough to be deemed part of a single contiguous tissue section. Data points that
#' are isolated and situated at a significant distance from others are identified
#' as spatial outliers.The resulting classifications are saved in a *tissue_section*
#' variable within the object's meta data.frame. Afterwards, polygons are created
#' to outline the groups of data points which represent a tissue section.
#' See example 'method = obs'.
#'
#' In case of `method = image` the object must contain an image named as
#' indicated by the input of argument `img_name`. Furthermore, the results
#' of [`identifyPixelContent()`] for that image are required. If `img_name` specifies
#' multiple images, the function iterates over all of them. Since results of both methods
#' are stored in different locations, the object can contain results of both methods.
#' When extracting the tissue outline via [`getTissueOutlineDf()`] or [`ggpLayerTissueOutline()`]
#' use argument `method` to decide on which results to use.
#' See example 'method = image'.
#'
#' @section DBSCAN input:
#' For objects derived from the Visium platform with a fixed center to center
#' distance, we recommend to set `eps = getCCD(object, unit = "px")*1.25`
#' and `minPts = 3` which has worked well for us. For objects derived
#' from platforms that do not rely on a fixed grid of data points (MERFISH, SlideSeq, etc.)
#' we recommend the average minimal distance between the data points times 10 for
#' `eps` and `minPts = 25`, which might require manual adjustments. The function defaults to these recommendations using
#' [`recDbscanEps()`] and [`recDbscanMinPts()`] by default. This can, of course,
#' be overwritten manually by the user by specifying the parameters otherwise!
#' Note that you can visualize the results with `plotSurface(object, color_by = 'tissue_section')`
#' and repeat the process with different parameter input to overwrite the last
#' results untill you are satisfied with the output.
#'
#' @seealso [`getTissueOutlineDf()`], [`ggpLayerTissueOutline()`], [`identifySpatialOutliers()`],
#' [`useVarForTissueOutline()`]
#'
#' @export
#'
#' @examples
#'
#' library(SPATA2)
#' library(ggplot2)
#'
#' data("example_data")
#'
#' obj1 <- loadExampleObject("UKF275T")
#'
#' obj2 <- loadExampleObject("LMU_MCI")
#'
#' # example: 'method = obs'
#' obj1 <- identifyTissueOutline(obj1, method = "obs")
#'
#' obj2 <- identifyTissueOutline(obj2, method = "obs")
#'
#' ## visualize the categorization of spots with the new meta variable 'tissue_section'
#'
#' plotSurface(obj1, color_by = "tissue_section") # one single contiguous section
#'
#' plotSurface(obj2, color_by = "tissue_section") # two contiguous sections
#'
#' ## visualize the outline
#'
#' df1_obs <- getTissueOutlineDf(obj1)
#'
#' ggplot(df1_obs, mapping = aes(x, y, group = section)) +
#'  geom_polygon(fill = NA, color = "black")
#'
#' df2_obs <- getTissueOutlineDf(obj2)
#'
#' ggplot(df2_obs, mapping = aes(x, y, group = section)) +
#'  geom_polygon(fill = NA, color = "black")
#'
#' # example 'method = image'
#'
#' activeImage(obj1)
#'
#' obj1 <- identifyPixelContent(obj1, img_name = "image1") # image processing needed
#' obj1 <- identifyTissueOutline(obj1, method = "image", img_name = "image1")
#'
#' plotImage(obj1, outline = T)
#'
#' df1_img <- getTissueOutlineDf(obj1, method = "image")
#'
#' ggplot(mapping = aes(x, y, group = section)) +
#'  geom_polygon(fill = NA, color = "black", data = df1_obs) +
#'  geom_polygon(fill = NA, color = "red", data = df1_img)
#'

setGeneric(name = "identifyTissueOutline", def = function(object, ...){

  standardGeneric(f = "identifyTissueOutline")

})

#' @rdname identifyTissueOutline
#' @export
setMethod(
  f = "identifyTissueOutline",
  signature = "SPATA2",
  definition = function(object,
                        method = "obs",
                        minPts = recDbscanMinPts(object),
                        eps = recDbscanEps(object),
                        img_name = activeImage(object),
                        concavity = 2,
                        verbose = NULL,
                        ...){

    hlpr_assign_arguments(object)

    confuns::give_feedback(
      msg = glue::glue("Identifying tissue outline with `method = {method}`."),
      verbose = verbose
    )

    sp_data <- getSpatialData(object)

    sp_data <-
      identifyTissueOutline(
        object = sp_data,
        method = method,
        minPts = minPts,
        eps = eps,
        img_name = img_name,
        concavity = concavity,
        verbose = verbose
        )

    if(method == "obs"){

      # transfer tissue_section variable to meta data
      coords_df <- getCoordsDf(sp_data, as_is = TRUE)

      object <-
        addFeatures(object, feature_df = coords_df, feature_name = "tissue_section", overwrite = TRUE)

      coords_df$tissue_section <- NULL
      sp_data <- setCoordsDf(sp_data, coords_df = coords_df, force = TRUE)

    }

    object <- setSpatialData(object, sp_data = sp_data)

    returnSpataObject(object)

  }
)

#' @rdname identifyTissueOutline
#' @export
setMethod(
  f = "identifyTissueOutline",
  signature = "SpatialData",
  definition = function(object,
                        method,
                        img_name = activeImage(object),
                        eps = recDbscanEps(object),
                        minPts = recDbscanMinPts(object),
                        concavity = 2,
                        verbose = TRUE,
                        ...){

    confuns::check_one_of(
      input = method,
      against = c("image", "obs")
    )

    # sets outline of slot @outline in the respective HistoImage
    if(method == "image"){

      containsHistoImages(object, error = TRUE)

      check_cran_packages(pkgs_req = c("OpenImageR", "SuperpixelImageSegmentation"))

      if(base::is.null(img_name)){

        img_name <- activeImage(object)

      }

      confuns::check_one_of(
        input = img_name,
        against = getImageNames(object)
      )

      purrr::walk(
        .x = img_name,
        .f = ~ containsPixelContent(object, img_name = .x, error = TRUE)
      )

      for(i in base::seq_along(img_name)){

        hist_img <- getHistoImage(object, img_name = img_name[i])

        hist_img <- identifyTissueOutline(object = hist_img, verbose = verbose, ...)

        object <- setHistoImage(object, hist_img = hist_img)

      }

      # sets slot @outline of SpatialData object
    } else if(method == "obs"){

      eps = as_pixel(eps, object = object)

      # for complete tissue
      mtr <- getCoordsMtr(object, orig = TRUE)
      mtr[,1] <- as.numeric(mtr[,1])
      mtr[,2] <- as.numeric(mtr[,2])

      object@outline[["tissue_whole"]] <-
        concaveman::concaveman(points = mtr, concavity = concavity) %>%
        tibble::as_tibble() %>%
        magrittr::set_colnames(value = c("x_orig", "y_orig"))

      # for possible sections
      coords_df <-
        getCoordsDf(object, exclude = TRUE) %>%
        add_dbscan_variable(
          eps = as_pixel(input = eps, object),
          minPts = minPts,
          name = "ts"
        ) %>%
        dplyr::mutate(ts = stringr::str_c("tissue_section", ts, sep = "_"))

      sections_ordered <-
        dplyr::filter(coords_df, ts != "tissue_section_0") %>%
        dplyr::group_by(ts) %>%
        dplyr::summarise(max_y = base::max(y, na.rm = TRUE)) %>%
        dplyr::arrange(max_y) %>%
        dplyr::pull(ts)

      coords_df$tissue_section <- "tissue_section_0"

      for(i in seq_along(sections_ordered)){

        section <- sections_ordered[i]

        coords_df$tissue_section[coords_df$ts == section] <-
          stringr::str_c("tissue_section", i, sep = "_")

      }

      coords_df_flt <- dplyr::filter(coords_df, ts != "tissue_section_0")

      coords_df$ts <- NULL

      object@outline[["tissue_section"]] <-
        purrr::map_df(
          .x = base::unique(coords_df_flt[["tissue_section"]]),
          .f = function(section){

            dplyr::filter(coords_df_flt, tissue_section == {{section}}) %>%
              dplyr::select(x = x_orig, y = y_orig) %>%
              #increase_n_data_points(fct = 10, cvars = c("x", "y")) %>%
              base::as.matrix() %>%
              concaveman::concaveman(concavity = concavity) %>%
              tibble::as_tibble() %>%
              magrittr::set_colnames(value = c("x_orig", "y_orig")) %>%
              dplyr::mutate(section = {{section}}) %>%
              dplyr::select(section, x_orig, y_orig)

          }
        )

      coords_df$tissue_section <- base::factor(coords_df$tissue_section)

      # return coords_df! ; not coords_df_flt
      object@coordinates <-
        dplyr::left_join(
          x = object@coordinates,
          y = coords_df[,c("barcodes", "tissue_section")],
          by = "barcodes"
        )

    }

    return(object)

  }
)


#' @rdname identifyTissueOutline
#' @export
setMethod(
  f = "identifyTissueOutline",
  signature = "HistoImage",
  definition = function(object, verbose = TRUE){

    containsPixelContent(object, error = TRUE)

    confuns::give_feedback(
      msg = glue::glue("Identifying tissue outline of image '{object@name}'."),
      verbose = verbose
    )

    if(!containsImage(object)){

      object <- loadImage(object, verbose = verbose)

    }

    img_dims <- getImageDims(object)

    pxl_df <-
      getPixelDf(
        object = object,
        content =  TRUE,
        transform = FALSE
      ) %>%
      dplyr::filter(!content %in% c("artefact", "background"))

    outline <- list()

    mtr_whole <-
      dplyr::select(pxl_df, x = width, y = height) %>%
      base::as.matrix()

    outline$tissue_whole <-
      concaveman::concaveman(points = mtr_whole, concavity = 1) %>%
      tibble::as_tibble() %>%
      magrittr::set_colnames(value = c("x", "y")) %>%
      dplyr::mutate(section = "whole")

    content_groups <-
      base::droplevels(pxl_df[["content"]]) %>%
      base::levels()

    outline$tissue_sections <-
      purrr::map_df(
        .x = content_groups,
        .f = function(cg){

          out <-
            dplyr::filter(pxl_df, content == {{cg}}) %>%
            dplyr::select(x = width, y = height) %>%
            base::as.matrix() %>%
            concaveman::concaveman(points = ., concavity = 1) %>%
            tibble::as_tibble() %>%
            dplyr::mutate(section = {{cg}}) %>%
            dplyr::select(x = V1, y = V2, section)

          return(out)

        }
      )

    object@outline <- outline

    return(object)

  }
)



#' @title Identify molecules of high variability
#'
#' @description
#' Identfies molecules that are outliers on a mean-variability plot as proposed by
#' `Seurat`.
#'
#' @param n_mol Numeric value. The number of molecules to be identified. Given
#' to `nfeatures` of [`Seurat::FindVariableFeatures()`].
#' @param method Character value. The identification method. One of *'vst'*,
#' *'mean.var.plot'* or *'dispersion'*. Given to `selection.method` of
#' [`Seurat::FindVariableFeatures()`].
#'
#' @inherit argument_dummy params
#' @inherit update_dummy return
#'
#' @export
#'
#' @examples
#'
#' library(SPATA2)
#' data("example_data")
#'
#' object <- loadExampleObject("UKF269T")
#'
#' # default is method = 'vst'
#' object <- identifyVariableMolecules(object, n_mol = 2000)
#'
#' # multiple results can be stored simultaneously
#' object <- identifyVariableMolecules(object, n_mol = 3000, method = "mean.var.plot")
#'
#'# fails, cause results for two methods are stored and method = NULL
#' vars_vst <- getVariableMolecules(object)
#'
#' vars_vst <- getVariableMoleculs(object, method = "vst") # works
#'
#' # should work
#' vars_mvp <- getVariableMolecules(object, method = "mean.var.plot")
#'
#' length(vars_vst)
#' length(vars_mvp)
#'
identifyVariableMolecules <- function(object,
                                      n_mol = 2500,
                                      method = "vst",
                                      assay_name = activeAssay(object),
                                      ...){

  confuns::check_one_of(
    input = method,
    against = c("vst", "mean.var.plot", "dispersion")
  )

  ma <- getAssay(object, assay_name = assay_name)

  count_mtr <- ma@mtr_counts

  vf <-
    Seurat::CreateSeuratObject(counts = count_mtr) %>%
    Seurat::FindVariableFeatures(object = ., nfeatures = n_mol, selection.method = method, ...) %>%
    Seurat::VariableFeatures()

  ma@analysis$variable_molecules[[method]] <- vf

  object <- setAssay(object, assay = ma)

  returnSpataObject(object)

}

# if ----------------------------------------------------------------------

if_null <- function(x, out){

  if(base::is.null(x)){

    x <- out

  }

  return(x)

}


# im ----------------------------------------------------------------------

#' @keywords internal
img_ann_highlight_group_button <- function(){

  shiny::splitLayout(
    shinyWidgets::checkboxGroupButtons(
      inputId = "highlight",
      label = NULL,
      choices = c("Highlight" = "highlight"),
      status = "default",
      justified = TRUE
    ),
    cellWidths = "100%"
  )

}


# in ----------------------------------------------------------------------

#' @title Include spatial extent of tissue sections in analysis
#'
#' @description Ensures section specific processing of observations
#' in relation by identifying the outline of the tissue section
#' (or -sections in case of multiple tissue sections per sample). Additionally,
#' allows to relate observations to the spatial position and extent of image
#' annotations.
#'
#' @inherit spatialAnnotationScreening params
#' @param input_df A data.frame that contains at least numeric *x* and *y*
#' variables.
#' @param outline_df A data.frame that contains the ouline/hull of all tissue sections.
#' Must contain variables *x*, *y* and *section*.
#' @inherit argument_dummy params
#' @param sas_circles Logical value. If `TRUE`, input data.frame is assumed
#' to contain polygon coordinates of the expanded spatial annotation encircling
#' and sorts them after filtering for those that lie inside the tissue section
#' in order to plot them via `ggplot2::geom_path()`.
#' @param opt Either *'concaveman'*' or *'chull'*. Defines with which function
#' the tissue outline is computed.
#' @param buffer Distance measure with which to buffer the tissue outline data.frame.
#' @return Filtered input data.frame.
#' @keywords internal
#'
include_tissue_outline <- function(input_df,
                                   outline_df = NULL, # hull_df should be used by calling function!
                                   coords_df = NULL, # either of both must be not NULL
                                   spat_ann_center = NULL,
                                   sas_circles = FALSE,
                                   ccd = NULL,
                                   outside_rm = TRUE,
                                   inside_if = c(1,2),
                                   opt = "concaveman",
                                   buffer = 0,
                                   ...){

  deprecated(...)
  outline_var <- "tissue_section"

  # identify sections
  if(base::is.null(outline_df)){

    is_dist_pixel(input = ccd, error = TRUE)

    if(!"tissue_section" %in% base::colnames(coords_df)){

      coords_df <- add_tissue_section_variable(coords_df, ccd = ccd, name = "tissue_section")

      coords_df <- dplyr::filter(coords_df, section != "0")

    }

    sections <- base::unique(coords_df[[outline_var]])

  } else {

    sections <- base::unique(outline_df$section)

  }

  buffer <- base::as.numeric(buffer)

  # iterate over sections
  proc_df <-
    purrr::map_df(
      .x = sections,
      .f = function(section){

        if(base::is.null(outline_df)){

          if(opt == "concaveman"){

            df_sub <-
              dplyr::filter(coords_df, !!rlang::sym(outline_var) == {{section}}) %>%
              dplyr::select(x, y)

            hull_df <-
              concaveman::concaveman(
                points = base::as.matrix(df_sub),
                concavity = 1
              ) %>%
              base::as.data.frame() %>%
              magrittr::set_colnames(value = c("x", "y"))

          } else if(opt == "chull") {

            df_sub <-
              dplyr::filter(coords_df, !!rlang::sym(outline_var) == {{section}})

            hull_points <- grDevices::chull(x = df_sub[["x"]], y = df_sub[["y"]])
            hull_df <- df_sub[hull_points, ]

          }

          if(buffer != 0){

            hull_df <- buffer_area(df = hull_df, buffer = buffer, close_plg = TRUE)

          }

        } else {

          hull_df <- dplyr::filter(outline_df, section == {{section}})

        }

        input_df$obs_in_section <-
          sp::point.in.polygon(
            point.x = input_df[["x"]],
            point.y = input_df[["y"]],
            pol.x = hull_df[["x"]],
            pol.y = hull_df[["y"]]
          ) %>%
          base::as.character()

        out_df <-
          dplyr::mutate(
            .data = input_df,
            pos_rel = dplyr::if_else(obs_in_section %in% {{inside_if}}, true = "inside", false = "outside"),
            section = {{section}}
          )

        if("inside" %in% out_df[["pos_rel"]]){

          if(base::isTRUE(sas_circles)){

            out_df[["part"]] <- 0
            out_df[["number"]] <- 0

            parts <- list(outside = 0, inside = 0)

            # walk along the drawing direction and mark entering and exit of line
            for(i in 1:base::nrow(out_df)){

              current_pos <- base::as.character(out_df[i, "pos_rel"])

              # switch
              if((i == 1) || (current_pos != prev_pos)){

                parts[[current_pos]] <- parts[[current_pos]]+1

                number <- 1

              } else {

                number <- number + 1

              }

              out_df[i, "part"] <- parts[[current_pos]]
              out_df[i, "number"] <- number

              prev_pos <- current_pos

            }

            out_df <-
              dplyr::mutate(
                .data = out_df,
                pos_rel_group = stringr::str_c(pos_rel, part),
                intersect = number == 1
              ) %>%
              dplyr::group_by(pos_rel_group) %>%
              dplyr::arrange(number, .by_group = TRUE) %>%
              dplyr::ungroup()

          }

          if(base::isTRUE(outside_rm)){

            out_df <- dplyr::filter(out_df, pos_rel == "inside")

          }

        } else {

          out_df <- NULL

        }

        return(out_df)

      }
    )

  # if multiple sections on visium slide
  # identify to which image section the spat ann belongs
  if(base::length(sections) > 1 &
     base::is.numeric(spat_ann_center) &
     base::nrow(proc_df) != 0){

    section_of_spat_ann <-
      dplyr::group_by(.data = coords_df, barcodes) %>%
      dplyr::mutate(
        dist = compute_distance(starting_pos = c(x,y), final_pos = spat_ann_center)
      ) %>%
      dplyr::ungroup() %>%
      dplyr::filter(dist == base::min(dist)) %>%
      dplyr::pull({{outline_var}}) %>%
      base::as.character()

    proc_df <- dplyr::filter(proc_df, section == {{section_of_spat_ann}})

  } else {

    if(base::nrow(proc_df) == 0){

      proc_df <- NULL

    }

  }

  return(proc_df)


}



#' @title Increase the Number of Data Points in a Coordinate Data Frame
#'
#' @description This function artificially increases the number of data points in a given coordinate data frame.
#' It generates additional points by adding random noise to the existing coordinates,
#' ensuring the new points stay within the original range of the data.
#'
#' @param coords_df A data frame containing the original coordinates.
#'                  Must include columns specified in `cvars`.
#' @param fct A numeric factor indicating how many times the number of data points should be increased.
#'            The function will round this number up to the nearest integer.
#' @param cvars A character vector specifying the column names of the coordinates in `coords_df`.
#'
#' @return A data frame with the increased number of data points.
#'
#' @keywords internal
increase_n_data_points <- function(coords_df, fct = 10, cvars = c("x", "y")){

  confuns::is_value(fct, mode = "numeric")
  fct <- base::ceiling(fct)

  frame_orig <-
    purrr::map(
      .x = coords_df[,cvars],
      .f = base::range
    ) %>%
    purrr::set_names(nm = cvars)

  size <- base::nrow(coords_df)*fct

  dist <-
    dplyr::select(coords_df, dplyr::all_of(cvars)) %>%
    base::as.matrix() %>%
    FNN::knn.dist(data = ., k = 1) %>%
    base::mean()

  noise <- stats::runif(n = size, min = dist*0.1, max = dist*0.5)

  neg <- base::sample(c(TRUE, FALSE), size = size, replace = TRUE)

  noise[neg] <- noise[neg]*-1

  new_df <-
    tidyr::expand_grid(
      barcodes = coords_df$barcodes,
      n = 1:fct
    ) %>%
    dplyr::left_join(y = coords_df[,c("barcodes", cvars)], by = "barcodes") %>%
    dplyr::mutate(
      dplyr::across(
        .cols = dplyr::any_of(x = cvars),
        .fns = ~ .x + noise
      )
    )

  in_xrange <-
    dplyr::between(
      x = new_df[[cvars[1]]],
      left = frame_orig[[cvars[1]]][1],
      right = frame_orig[[cvars[1]]][2]
    )

  new_df <- new_df[in_xrange, ]

  in_yrange <-
    dplyr::between(
      x = new_df[[cvars[2]]],
      left = frame_orig[[cvars[2]]][1],
      right = frame_orig[[cvars[2]]][2]
    )

  new_df <- new_df[in_yrange, ]

  return(new_df)

}


#' @title Increase the Number of Vertices in a Polygon
#'
#' @description This function interpolates additional vertices in a polygon data frame to increase its vertex density.
#' It ensures that the polygon remains closed and that the new vertices are evenly distributed.
#'
#' @param polygon_df A data frame representing the polygon, with columns for x and y coordinates.
#' @param avg_dist The average distance between vertices after interpolation.
#' @param skip Logical; if TRUE, the function skips the interpolation process and returns the original polygon.
#'
#' @return A data frame representing the polygon with increased vertex density.
#'
#' @keywords internal
increase_polygon_vertices <- function(polygon_df, avg_dist, skip = FALSE) {

  if (!base::isTRUE(skip)) {

    polygon_df <- base::as.data.frame(polygon_df)

    # Ensure the polygon is closed (first and last point are the same)
    if (!base::identical(polygon_df[1, ], polygon_df[nrow(polygon_df), ])) {
      polygon_df <- base::rbind(polygon_df, polygon_df[1, ])
    }

    # Calculate differences between consecutive points
    diffs <- base::diff(as.matrix(polygon_df))

    # Calculate distances between consecutive points
    dist_between_vertices <- sqrt(rowSums(diffs^2))

    # Calculate the number of points to interpolate between each pair of points
    num_interpolated <- base::pmax(1, floor(dist_between_vertices / avg_dist))

    # Generate sequences for interpolation
    interpolated_points <- lapply(1:(nrow(polygon_df) - 1), function(i) {
      n <- num_interpolated[i]
      x_seq <- seq(polygon_df[i, "x"], polygon_df[i + 1, "x"], length.out = n + 1)[-1]
      y_seq <- seq(polygon_df[i, "y"], polygon_df[i + 1, "y"], length.out = n + 1)[-1]
      data.frame(x = x_seq, y = y_seq)
    })

    # Combine all interpolated points into a single data frame
    interpolated_df <- do.call(rbind, interpolated_points)

    # Add the first point to close the polygon
    interpolated_df <- rbind(interpolated_df, interpolated_df[1, ])

    return(tibble::as_tibble(interpolated_df))
  }

  return(tibble::as_tibble(polygon_df))
}

#' @keywords internal
infer_gradient <- function(loess_model,
                           expr_est_pos,
                           ro = c(0, 1),
                           ...){

  grad <- stats::predict(loess_model, data.frame(dist = expr_est_pos))

  if(base::is.numeric(ro)){

    grad <- scales::rescale(grad, to = ro)

  }

  return(grad)


}

#' @keywords internal
initiate_plot <- function(xlim = c(1, 600), ylim = c(1,600), main = "") {

  plot(0, 0, type = "n", xlim = xlim, ylim = ylim, xlab = "x", ylab = "y", main = main, asp = 1)

}


#' @title Interpolate points along path
#'
#' @description Ensures equally distributed number of points along a
#' curved trajectory.
#'
#' @param data Data.frame of x- and y-coordinates.
#'
#' @keywords internal
interpolate_points_along_path <- function(data, max_distance = 1) {

  interpolated_data <- data[1,]

  for(i in 2:nrow(data)){

    prev_point <- data[i-1, ]
    current_point <- data[i, ]

    distance <- sqrt((current_point$x - prev_point$x)^2 + (current_point$y - prev_point$y)^2)

    if(distance > max_distance){

      num_interpolations <- ceiling(distance / max_distance)

      for(j in 1:num_interpolations){

        interpolation_fraction <- j / (num_interpolations + 1)

        interpolated_x <- prev_point$x + interpolation_fraction * (current_point$x - prev_point$x)

        interpolated_y <- prev_point$y + interpolation_fraction * (current_point$y - prev_point$y)

        interpolated_data <-
          rbind(
            interpolated_data,
            data.frame(
              x = interpolated_x,
              y = interpolated_y
            )
          )

      }

    }

    interpolated_data <-
      rbind(
        interpolated_data,
        current_point
      )

  }

  out <- interpolated_data

  return(out[reduce_vec(x = 1:nrow(out), nth = 2), ])

}

#' @title Test polygon intersection
#'
#' @description Tests which vertices of polygon `a` lay inside polygon `b`.
#'
#' @param a,b Matrix or data.frame with columns *x* and *y*.
#' @inherit getBarcodesInPolygon params
#'
#' @return Logical vector of the same length as the number of rows in `a`.
#' @keywords internal

intersect_polygons <- function(a, b, strictly = FALSE){

  a <- as.data.frame(a)
  b <- as.data.frame(b)

  res <-
    sp::point.in.polygon(
      point.x = a[["x"]],
      point.y = a[["y"]],
      pol.x = b[["x"]],
      pol.y = b[["y"]]
    )

  if(base::isTRUE(strictly)){

    out <- res == 1

  } else {

    out <- res %in% c(1,2)

  }

  return(out)

}


#' Check polygon containment and intersection
#'
#' @description This set of functions checks whether one polygon is completely inside another, or whether two polygons intersect.
#'
#' @param a A data frame or matrix with two columns named \code{x} and \code{y}, representing the vertices of the first polygon.
#' @param b A data frame or matrix with two columns named \code{x} and \code{y}, representing the vertices of the second polygon.
#' @param strictly Logical, if \code{TRUE}, the functions perform strict checks (i.e., points on the edges of the polygons are excluded). If \code{FALSE}, points on the edges are considered as inside or intersecting.
#'
#' @return
#' \itemize{
#'   \item \code{polygon_inside_polygon()}: A logical value \code{TRUE} if all points of polygon \code{a} are inside polygon \code{b}, \code{FALSE} otherwise.
#'   \item \code{polygon_intersects_polygon()}: A logical value \code{TRUE} if the polygons intersect, \code{FALSE} otherwise.
#' }
#'
#' @details
#' These functions help in determining spatial relationships between two polygons. They work with data frames or matrices where the columns represent the \code{x} and \code{y} coordinates of the polygon's vertices.
#'
#' \itemize{
#'   \item \code{polygon_inside_polygon()}: Checks whether all points of polygon \code{a} lie inside polygon \code{b}.
#'   \item \code{polygon_intersects_polygon()}: Checks whether the polygons \code{a} and \code{b} intersect, considering their vertices.
#'   \item If \code{strictly = TRUE}, the checks exclude points that lie on the edges of the polygons. If \code{strictly = FALSE}, points on the edges are considered inside or intersecting.
#' }
#'
#' @examples
#' polygon_a <- data.frame(x = c(1, 2, 2, 1), y = c(1, 1, 2, 2))
#' polygon_b <- data.frame(x = c(0, 3, 3, 0), y = c(0, 0, 3, 3))
#'
#' # Check if polygon_a is completely inside polygon_b
#' polygon_inside_polygon(polygon_a, polygon_b) # TRUE
#'
#' # Check if polygon_a intersects with polygon_b
#' polygon_intersects_polygon(polygon_a, polygon_b) # TRUE
#'
#' @keywords internal
#' @export
polygon_inside_polygon <- function(a, b, strictly = TRUE){
  res <- sp::point.in.polygon(
    point.x = a[["x"]],
    point.y = a[["y"]],
    pol.x = b[["x"]],
    pol.y = b[["y"]]
  )

  if(base::isTRUE(strictly)){
    out <- res == 1
  } else {
    out <- res %in% c(1, 2)
  }

  all(out)
}

#' @rdname polygon_inside_polygon
#' @export
polygon_intersects_polygon <- function(a, b, strictly = TRUE){
  x <- intersect_polygons(a = a, b = b, strictly = strictly)

  out <- any(x) & any(!x)

  return(out)
}



# is_ ----------------------------------------------------------------------



#' @title Test area input
#'
#' @description Tests if input refers to an area using international area
#' units according to the `SPATA2` area framework.
#'
#' \itemize{
#'  \item{`is_area()`:}{ Tests if input can be interpreted as an area}
#'  \item{`is_area_si()`:} {Tests if input can be interpreted as an area in SI units.}
#'  \item{`is_area_pixel()`:} {Tests if input can be interpreted as an area
#'  in pixel.}
#'  }
#'
#' @param input Character vector. Elements must match the requirements of
#' the `SPATA2` area framework. See details for more information.
#'
#' @return Logical vector of the same length as input and/or an error if `verbose`
#' is `TRUE`.
#'
#' @examples
#'
#' library(SPATA2)
#'
#' ##### provide input as character vectors
#'
#' # will return TRUE
#'
#' is_area(input = c('2mm2', '4mm2'))
#'
#' # will return FALSE
#'
#' is_area(input = c('200 m2')) # space between value and unit
#'
#' # will return TRUE
#'
#' area_values <- c(200, 400)
#'
#' area_values <- as_area(area_values, unit = "mm2")
#'
#' is_area(input = area_values)
#'
#' ###### use units package
#'
#' library(units)
#'
#' area_values2 <- set_units(x = c(200, 300), value = "mm2")
#'
#' is_area(area_values2)
#'
#'
#' @export
#'
is_area <- function(input, error = FALSE){

  if(base::is.character(input)){

    res <- stringr::str_detect(string = input, pattern = regex_area)

    feedback_area_input(x = res, error = error)

  }  else if(base::is.numeric(input)){

    res <- base::rep(TRUE, base::length(input))

  }  else if(base::all(base::class(input) == "units")){

    unit_attr <- attr(input, which = "units")

    test <- base::logical(2)

    test[1] <- base::length(unit_attr$numerator) == 2

    test[2] <-
      purrr::map_lgl(
        .x = unit_attr$numerator,
        .f = ~ .x %in% validEuropeanUnitsOfLength()
        ) %>%
      base::all()

    res <- base::all(test)

    if(base::isFALSE(res) & base::isTRUE(error)){

      stop("Input is of class 'units' but does not correspond to an area.")

    }

    res <- base::rep(res, base::length(input))

  }

  return(res)

}

#' @rdname is_area
#' @export
is_area_pixel <- function(input, error = FALSE){

  if(base::is.character(input) | is_numeric_input(input)){

    res <- stringr::str_detect(input, pattern = regex_pxl_area)

    feedback_area_pixel_input(x = res, error = error)

  } else {

    if(base::isTRUE(error)){

      stop(invalid_area_pixel_input)

    } else {

      res <- base::rep(FALSE, base::length(input))

    }

  }

  return(res)

}

#' @rdname is_area
#' @export
is_area_si <- function(input, error = FALSE){

  if(base::is.character(input) | is_numeric_input(input)){

    res <- stringr::str_detect(input, pattern = regex_si_area)

    feedback_area_si_input(x = res, error = error)

  } else {

    if(base::isTRUE(error)){

      stop(invalid_area_si_input)

    } else {

      res <- base::rep(FALSE, base::length(input))

    }

  }

  return(res)

}



#' @title Test distance input
#'
#' @description Tests if input that refers to a distance is of valid input.
#'
#' \itemize{
#'  \item{`is_dist()`:}{ Tests if input can be interpreted as a distance.}
#'  \item{`is_dist_si()`:} {Tests if input can be interpreted as a distance in SI units.}
#'  \item{`is_dist_pixel()`:} {Tests if input can be interpreted as a distance
#'  in pixel.}
#'  }
#'
#' @param input Character or numeric vector. Elements must match the
#' requirements of the \code{SPATA2} distance framework. See details
#' for more information.
#'
#' @inherit argument_dummy params
#'
#' @return Logical vector of the same length as `input`. If `error` is `TRUE`
#' and one or more elements of the input values can not be interpreted approapriately
#' the functions throws an error.
#'
#' @details Several functions in `SPATA2` have arguments that take *distance input*.
#' To specifically refer to a distance the unit must be specified. There are
#' three ways to create valid input for these arguments.
#'
#' **1. In pixel:**
#'
#' There are two valid input options to specify the distance in pixel:
#'
#' \itemize{
#'  \item{numeric:}{ Single numeric values, e.g. `arg_input <- c(2, 3.554, 69, 100.67)`. If no unit
#'  is specified the input will be interpreted as pixels.}
#'  \item{character:}{ Suffixed with *'px'*, e.g. `arg_input <- c('2px', '3.554px', '69px', '100.67px')`}
#'  }
#'
#'  Note: The unit pixel (px) is used for distances as well as for areas. If pixel
#'  refers to a distance the pixel side length is meant. If pixel refers to an area the
#'  number of pixels is meant.
#'
#' **2. According to the Systeme international d`unites (SI):**
#'
#'  Specifying distances in SI units e.g. `arg_input <- c('2mm', '4mm')` etc.
#'  requires the input to be a character as the unit must be provided as suffix.
#'  Between the numeric value and the unit must be no empty space! Valid suffixes
#'  can be obtained using the function `validUnitsOfLengthSI()`.
#'
#'  **3. As vectors of class `unit`:**
#'
#' Behind the scenes `SPATA2` works with the `units` package. Input
#' is converted into vectors of class `units`. Therefore, input can be directly
#' provided this way: `arg_input <- units::set_unit(x = c(2,4), value = 'mm')`
#' Note that *pixel* is not a valid unit in the `units` package. If you want
#' to specify the input in pixel you have to use input option 1. In pixel.
#'
#' @export
#'
#' @examples
#'
#' ##### use numeric or character vectors
#'
#' library(SPATA2)
#'
#' # will return TRUE
#' is_dist(input = 200) # -> 200 pixel
#' is_dist(input = "20px") # > 20 pixel
#'
#' is_dist(input = "40.5mm") # -> 40.5 mm
#'
#' # will return FALSE
#' is_dist(input = "30.5 mm") # -> empty space between 30.5 and mm
#'
#' is_dist(input = ".4mm") # -> must start with a number
#'
#' ##### use units package
#'
#' library(units)
#'
#' dist_input <- set_units(x = c(2, 3, 4.4), value = "mm")
#'
#' is_dist(dist_input)
#'
is_dist <- function(input, error = FALSE){

  if(base::is.null(input)){

    res <- FALSE

  } else {

    res <- is_dist_si(input, error = FALSE) | is_dist_pixel(input, error = FALSE)

  }

  feedback_distance_input(x = res, error = error)

  return(res)

}

#' @rdname is_dist
#' @export
is_dist_si <- function(input, error = FALSE){

  if(base::is.null(input)){

    res <- NULL

    feedback_distance_input(res, error = error)

  } else if(base::is.character(input)){

    res <- stringr::str_detect(input, pattern = regex_si_dist)

    feedback_distance_input(x = res, error = error)

  } else if(base::all(base::class(input) == "units")){

    unit_attr <- base::attr(input, which = "units")

    test <- base::logical(2)

    test[1] <- base::length(unit_attr$numerator) == 1

    test[2] <- base::all(unit_attr$numerator %in% validUnitsOfLengthSI())

    res <- base::all(test)

    if(base::isFALSE(res) & base::isTRUE(error)){

      stop("Input is of class 'units' but can not be interpreted as a SI unit.")

    }

    res <- base::rep(res, base::length(input))

  } else if(base::is.numeric(input)){

    res <- base::rep(TRUE, base::length(input))

  } else {

    if(base::isTRUE(error)){

      stop(invalid_dist_si_input)

    } else {

      res <- base::rep(FALSE, base::length(input))

    }

  }

  return(res)

}

#' @rdname is_dist
#' @export
is_dist_pixel <- function(input, error = FALSE){

  if(base::is.null(input)){

    res <- FALSE

    feedback_distance_input(res, error = error)

  } else if(base::is.character(input) | is_numeric_input(input)){

    res <- stringr::str_detect(input, pattern = regex_pxl_dist)

    feedback_distance_input(x = res, error = error)

  } else {

    if(base::isTRUE(error)){

      stop(invalid_dist_pixel_input)

    } else {

      res <- base::rep(FALSE, base::length(input))

    }

  }

  return(res)

}

#' @keywords internal
is_exclam <- function(input, error = FALSE){

  res <-
    stringr::str_detect(string = input, pattern = regex_exclam) &
    stringr::str_detect(string = input , pattern = "!$")

  return(res)

}

#' @keywords internal
is_image_dir <- function(input, error = FALSE){

  res <-
    stringr::str_detect(
      string = input,
      pattern = "\\.png$|\\.jpeg$\\.tiff$|\\.PNG$|\\.JPEG$\\.TIFF$"
      )

  if(base::any(!res)){

    stop("Image directories must end with either '.png', '.jpeg', '.tiff'")

  }

  return(res)

}


#' Check if a Point is Inside a Polygon
#'
#' This function determines whether a given point lies inside a polygon defined by its vertices.
#'
#' @param point A numeric vector with two elements representing the x (first value) and y (second value) coordinates of the point.
#' @param polygon_df A data frame with columns 'x' and 'y', containing the vertices of the polygon.
#' @param strictly Logical, indicating whether the point must strictly lie inside the polygon (TRUE) or might be part of
#' the polygon boundary (FALSE).
#'
#' @return Logical value indicating whether the point is inside the polygon.
#'
#' @examples
#' point <- c(2, 3)
#' polygon_df <- data.frame(x = c(1, 3, 3, 1), y = c(1, 1, 4, 4))
#' is_inside_plg(point, polygon_df) # Returns TRUE
#'
#' @seealso [`sp::point.in.polygon()`]
#'
#' @keywords internal
is_inside_plg <- function(point, polygon_df, strictly = TRUE){

  pos <-
    sp::point.in.polygon(
      point.x = point[1],
      point.y = point[2],
      pol.x = polygon_df[["x"]],
      pol.y = polygon_df[["y"]]
    )

  if(base::isTRUE(strictly)){

    out <- pos == 1

  } else {

    out <- pos != 0

  }

  return(out)

}

#' @keywords internal
is_number <- function(x){

  !(base::is.na(x) | base::is.na(x) | base::is.infinite(x))

}

#' @keywords internal
is_numeric_input <- function(input){

  (base::is.numeric(input)) &
  (!"units" %in% base::class(input))

}


#' @title Check for outliers in a numeric vector
#'
#' @description This function identifies outliers in a numeric vector `x`.
#'
#' @param x A numeric vector for which outliers need to be identified.
#' @param ... Additional arguments passed to `boxplot.stats`.
#'
#' @return A logical vector of the same length as `x`, where `TRUE` indicates that
#'  the corresponding element in `x` is an outlier.
#'
#' @details
#' An element is considered an outlier if it falls outside the range as defined
#' by `boxplot.stats` from the `grDevices` package. The function is useful
#' for quickly flagging or filtering outliers in a dataset.
#'
#' @examples
#' # Example vector
#' data <- c(-10, 1, 2, 3, 4, 5, 100)
#'
#' # Check for outliers
#' is_outlier(data)
#'
#' # Using the function to filter out outliers
#' data[!is_outlier(data)]
#'
#' @export
is_outlier <- function(x, ...) {

  x %in% grDevices::boxplot.stats(x = x, ...)[["out"]]

}


#' @keywords internal
is_percentage <- function(input, error = FALSE){

  res <- stringr::str_detect(string = input, pattern = regex_percentage)

  feedback_percentage_input(x = res, error = error)

  return(res)

}

#' @keywords internal
is_spatial_measure <- function(input, error = FALSE){

  res <- is_dist(input, error = FALSE) | is_area(input, error = FALSE)

  feedback_spatial_measure(res, error = error)

  return(res)

}



#' @title Test unit of area input
#'
#' @description Tests if input is a valid unit of area.
#'
#' @param input Character vector of area units. Obtain valid
#' input options with `validUnitsOfArea()`.
#'
#' @inherit argument_dummy params
#'
#' @return Logical value and/or error if argument `error` is `TRUE`.
#'
#' @export
is_unit_area <- function(input, error = FALSE){

  res <- input %in% validUnitsOfArea()

  if(base::isFALSE(res) & base::isTRUE(error)){

    stop("Invalid unit input. Must be a valid unit of area. Obtain valid input options with `validUnitsOfArea().`")

  }

  return(res)

}

#' @title Test unit of length input
#'
#' @description Tests if input is a valid unit of distance.
#'
#' @param input Character vector of distance units. Obtain valid
#' input options with `validUnitsOfLength()`.
#'
#' @inherit argument_dummy params
#'
#' @return Logical value and/or error if argument `error` is `TRUE`.
#'
#' @export
is_unit_dist <- function(input, error = FALSE){

  res <- input %in% validUnitsOfLength()

  if(base::isFALSE(res) & base::isTRUE(error)){

    stop("Invalid unit input. Must be a valid unit of length. Obtain valid input options with `validUnitsOfLength().`")

  }

  return(res)

}










# isG ---------------------------------------------------------------------


#' @keywords internal
#' @export
isGene <- function(object, gene){

  genes <- getGenes(object)

  out <- gene %in% genes

  base::isTRUE(out)

}

#' @keywords internal
#' @export
isGeneSet <- function(object, gene_set){

  gene_sets <- getGeneSets(object)

  out <- gene_set %in% gene_sets

  base::isTRUE(out)

}



# isF ---------------------------------------------------------------------


#' @keywords internal
#' @export
isFeature <- function(object, feature){

  features <- getFeatureNames(object)

  out <- feature %in% features

  base::isTRUE(out)

}



# isN ---------------------------------------------------------------------

#' @keywords internal
#' @export
isNumericVariable <- function(object, variable){

  out <- is.numeric(joinWithVariables(object, variables = variable)[[variable]])

  return(out)

}



# isS ---------------------------------------------------------------------

#' @title General test for SPATA2 object
#'
#' @description
#' Logical test of an object being interpretable as a [`SPATA2`] object.
#'
#' @inherit argument_dummy params
#' @param warn,error Logical values, allowing the function to throw warnings
#' or errors. If `FALSE`, the default, only a logical value is returned.
#'
#' @return Logical value.
#'
#' @export
#'
isSPATA2 <- function(object, warn = TRUE, error = FALSE){

  out <- all(class(object) == "SPATA2")

  if(isTRUE(out) & isTRUE(warn)){

    check_object(object)

  }

  if(isFALSE(out) & isTRUE(error)){

    stop("Input object is not of class `SPATA2`.")

  }

  return(out)

}

#' @export
#' @keywords internal
isSpatialTrajectory <- function(object){

  class_test <-
    SpatialTrajectory() %>%
    base::class()

  methods::is(object = object, class = class_test)

}


# isT ---------------------------------------------------------------------

#' @export
#' @keywords internal
isTrajectory <- function(object){

  class_test <-
    Trajectory() %>%
    base::class()

  methods::is(object = object, class = class_test)

}