R/h.R

Defines functions hlpr_widen_trajectory_df hlpr_vector_projection hlpr_transfer_slot_content hlpr_summarize_trajectory_df hlpr_subset_across hlpr_smooth_shiny hlpr_smooth hlpr_scatterplot hlpr_save_spata_object hlpr_process_spatial_correlation_cluster hlpr_one_distinct hlpr_normalize_vctr hlpr_normalize_imap hlpr_labs_add_on hlpr_join_with_color_by hlpr_image_to_df hlpr_image_add_on2 hlpr_image_add_on hlpr_geom_trajectory_fit hlpr_gene_set_name hlpr_drop_all_na hlpr_dist_mtr_to_df hlpr_display_subtitle hlpr_display_title hlpr_compare_samples hlpr_breaks hlpr_assign_arguments hlpr_adjust_legend_size hlpr_add_old_coords hlpr_add_barcode_suffix hide_unit

Documented in hlpr_add_barcode_suffix hlpr_add_old_coords hlpr_adjust_legend_size hlpr_assign_arguments hlpr_breaks hlpr_compare_samples hlpr_dist_mtr_to_df hlpr_gene_set_name hlpr_geom_trajectory_fit hlpr_image_add_on hlpr_join_with_color_by hlpr_labs_add_on hlpr_normalize_imap hlpr_normalize_vctr hlpr_one_distinct hlpr_process_spatial_correlation_cluster hlpr_save_spata_object hlpr_scatterplot hlpr_smooth hlpr_smooth_shiny hlpr_subset_across hlpr_summarize_trajectory_df hlpr_vector_projection hlpr_widen_trajectory_df

#' @import SingleCellExperiment
#'
NULL

#' @keywords internal
hide_unit <- function(input){

  unit <- stringr::str_remove(string = input, pattern = regex_dist_value)

}


#' Make sure that barcodes are spata-like
#'
#' @param input A matrix with columns = barcodes, or a data.frame with a barcode-variable
#' @param sample_name Character value.
#' @keywords internal
#' @export
hlpr_add_barcode_suffix <- function(input, sample_name){

  pattern <- stringr::str_c("_", sample_name, "$", sep = "")

  if(base::is.data.frame(input)){

    input <-
      dplyr::mutate(.data = input,
                    barcodes = dplyr::case_when(
                      stringr::str_detect(barcodes, pattern = pattern) ~ barcodes,
                      !stringr::str_detect(barcodes, pattern = pattern) ~ stringr::str_c(barcodes, {{sample_name}}, sep = "_")
                    ))


  } else {

    barcodes <- base::colnames(input)

    barcodes <-
      dplyr::case_when(
        stringr::str_detect(barcodes, pattern = pattern) ~ barcodes,
        !stringr::str_detect(barcodes, pattern = pattern) ~ stringr::str_c(barcodes, {{sample_name}}, sep = "_")
      )

    base::colnames(input) <- barcodes

  }

 return(input)

}

#' @title Adds old coordinates
#'
#' @description Adds old coordinates of subsetted object to
#' plot_df in \code{plotSurface()}.
#'
#' @inherit check_object params
#' @param plot_df The plot_df.
#' @param complete Logical.
#'
#' @export
#' @keywords internal
hlpr_add_old_coords <- function(object, plot_df, complete){

  # currently deprecated!
  if(FALSE){

    old_coords_df <- object@information$old_coordinates

    cnames <- base::colnames(plot_df)
    variable <- cnames[!cnames %in% coords_df_vars]

    res_df <-
      dplyr::add_row(.data = plot_df,
                     barcodes = old_coords_df$barcodes,
                     sample = old_coords_df$sample,
                     x = old_coords_df$x,
                     y = old_coords_df$y)

    variable_vec <- res_df[[variable]]

    if(base::is.factor(variable_vec)){

      variable_vec <- base::factor(x = variable_vec,
                                   levels = c(base::levels(variable_vec), "subs.by.segment")
                                   )

      res_df[[variable]][base::is.na(res_df[[variable]])] <- "subs.by.segment"

    }

  } else {

    res_df <- plot_df

  }


 return(res_df)
}


#' @title Adjusts the size of discrete legend points
#'
#' @inherit check_pt params
#' @param variable The variable mapped to the color/fill aesthetic.
#' @param aes The aesthetic used.
#' @keywords internal
#' @export
hlpr_adjust_legend_size <- function(variable, aes, pt_size){

  if(!base::is.numeric(variable)){

    if(aes == "color"){

      ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = pt_size * 2.5)))

    } else if(aes == "fill"){

      ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(size = pt_size * 2.5)))

    }

  } else {

    list()

  }

}


#' Assign arguments to calling function
#'
#' @description Mediates between the input of the calling function and
#' the default instructions within the spata object. Every 'default'-argument
#' of the calling function that is NULL is replaced by what is defined as
#' the default.
#'
#' @inherit check_object params
#' @keywords internal
#' @export
hlpr_assign_arguments <- function(object){

  check_object(object)

  default_instructions <- getDefaultInstructions(object)

  ce <- rlang::caller_env()

  cfn <- rlang::caller_fn()

  cargs <- rlang::fn_fmls_names(fn = cfn)

  default_args <- cargs[cargs %in% methods::slotNames(default_instructions_object)]

  for(arg in default_args){

    arg_value <-
      base::parse(text = arg) %>%
      base::eval(envir = ce)

    if(base::is.null(arg_value)){

      arg_value <- methods::slot(default_instructions, name = arg)

      base::assign(
        x = arg,
        value = arg_value,
        envir = ce
      )
    }

  }

  base::invisible(TRUE)

}


#' @title Calculates breaks for heatmap according to input matrix
#'
#' @keywords internal
#' @export
hlpr_breaks <- function(mtr, length_out){

  quantiles <-
    base::as.numeric(mtr) %>%
    stats::quantile()

  breaks <- base::seq(quantiles[2], quantiles[4], length.out = length_out)

 return(breaks)

}


#' @title Compare samples within an object
#'
#' @description Checks whether the sample column in \code{df}
#' complies with the sample names of the provided object.
#'
#' @param object A valid spata-object.
#' @param df A data.frame
#' @param messages The message vector that is generated by the function in which
#' \code{hlpr_compare_samples()} is called in.
#'
#' @return Updated message vector.
#'
#' @details This function is to be used within the \code{SPATA::validateSpataObject()}-
#' functions. It expands the message-vector that is being generated by the calling
#' function and returns it.
#' @keywords internal
#' @export
hlpr_compare_samples <- function(object, df, messages){

  df_samples <- stringr::str_c(base::unique(df$sample), collapse = ", ") %>% base::sort()
  o_samples <- stringr::str_c(samples(object), collapse = ", ") %>% base::sort()

  if(!base::identical(df_samples, o_samples)){

    feedback <-
      stringr::str_c("Inavlid samples in column 'sample'.:",
                     "\n In coordinates: ", df_samples,
                     "\n In object:      ", o_samples,
                     sep = "")

    messages <-
      base::append(x = messages,
                   values = feedback)

  }

  return(messages)

}


#' @keywords internal
#' @export
hlpr_display_title <- function(display_title, title){

  if(base::isTRUE(display_title)){

    add_on <- ggplot2::labs(title = title)

   return(add_on)

  } else {

   return(NULL)

  }

}

#' @keywords internal
#' @export
hlpr_display_subtitle <- function(display_subtitle, subtitle){

  if(base::isTRUE(display_subtitle)){

    add_on <- ggplot2::labs(subtitle = subtitle)

   return(add_on)

  } else {

   return(NULL)

  }

}


#' @title Convert distance matrix to distance data.frame
#'
#' @description Reshapes a distance matrix with a wrapper around \code{reshape2::melt()}.
#'
#' @param dist_mtr A distance matrix gene expression - gene expression distances
#' @keywords internal
#' @export
hlpr_dist_mtr_to_df <- function(dist_mtr, varnames = c("gene1", "gene2")){

  dist_mtr <- base::as.matrix(dist_mtr)

  dist_mtr[base::upper.tri(x = dist_mtr, diag = TRUE)] <- NA

  reshape2::melt(data = dist_mtr,
                 na.rm = TRUE,
                 varnames = varnames,
                 value.name = "distance") %>%
    dplyr::mutate_if(.predicat = base::is.factor, .funs = base::as.character)

}

#' @keywords internal
#' @export
hlpr_drop_all_na <- function(df, ref_var = "variables", na_var = "values", verbose = TRUE){

  traj_length <- df$trajectory_order %>% base::unique() %>% base::length()

  if(base::any(base::is.na(df$values))){

    remove_vars <-
      dplyr::mutate(df, boolean_na = base::is.na(!!rlang::sym(na_var))) %>%
      dplyr::group_by(!!rlang::sym(ref_var)) %>%
      dplyr::summarise(total_na = base::sum(boolean_na)) %>%
      dplyr::filter(total_na == {{traj_length}}) %>%
      dplyr::pull(var = {{ref_var}})

    n_rv <- base::length(remove_vars)

    if(n_rv >= 1){

      ref1 <- adapt_reference(input = remove_vars, sg = "variable")
      ref2 <- scollapse(string = remove_vars, width = 100)

      msg <-
        glue::glue(
          "Discarding {n_rv} {ref1} as no changes between bins have been detected. Discarded {ref1}: '{ref2}'"
        )

      give_feedback(
        msg = msg,
        verbose = verbose
      )

      df <- dplyr::filter(df, !{{ref_var}} %in% remove_vars)

    }
  }

  return(df)

}

#' Removes the class part of a gene set string
#'
#' @param string Gene sets as a character vector
#'
#' @return Gene set name
#' @keywords internal
#' @export

hlpr_gene_set_name <- function(string){

  stringr::str_remove(string = string, pattern = "^.+?_")

}


#' Easy switch between geom line and and geom smooth
#'
#' @description To be used in plotTrajectoryFit()/-Customized()
#' @keywords internal
#' @export
hlpr_geom_trajectory_fit <- function(smooth, smooth_span, plot_df, ref_model, ref_variable, linesize, linealpha, smooth_se = FALSE){

  argument_list <- list(size = linesize, alpha = linealpha)

  #customized_df <- dplyr::filter(.data = plot_df, origin %in% c("Customized", ref_model))
  #expression_df <- dplyr::filter(.data = plot_df, origin %in% c("Residuals", ref_variable))

  # construct add on
  if(base::isTRUE(smooth)){

    out <-
      ggplot2::geom_smooth(
        data = plot_df,
        mapping = ggplot2::aes(linetype = origin),
        size = linesize,
        alpha = linealpha,
        method = "loess",
        span = smooth_span,
        formula = as.formula(y ~ x),
        se = smooth_se
      )

  } else {

    out <-
      ggplot2::geom_line(
        data = plot_df,
        mapping = ggplot2::aes(linetype = origin),
        size = linesize,
        alpha = linealpha
      )

  }

  return(out)

}


#' @title Provides the image as ggplot background
#'
#' @inherit check_sample params
#' @param image Image input.
#' @param display_image Logical value.
#'
#' @return Either null or a ggplot2::geom_annotation_raster
#' @keywords internal
#' @export
hlpr_image_add_on <- function(object, display_image, ...){

  deprecated(...)

  if(!containsImage(object) & base::isTRUE(display_image)){

    image_add_on <- NULL

    warning("`display_image` = TRUE but `spata2` object does not contain an image.")

  } else if(base::isTRUE(display_image)){

    sample_image <- getImage(object)

    if("Image" %in% base::class(sample_image)){

      image_raster <-
        grDevices::as.raster(x = sample_image)

      img_info <-
        image_raster %>%
        magick::image_read() %>%
        magick::image_info()

      st_image <-
        image_raster %>%
        magick::image_read()

      image_add_on <-
        ggplot2::annotation_raster(
          raster = st_image,
          xmax = 0,
          ymax = 0,
          xmin = img_info$width,
          ymin = img_info$height
        )

    } else {

      base::warning(glue::glue("Content of slot 'image' for sample '{of_sample}' must be of class 'Image' not of class '{base::class(sample_image)}'."))

      image_add_on <- list()

    }

  } else {

    image_add_on <- list()

  }

 return(image_add_on)

}

#' @keywords internal
#' @export
hlpr_image_add_on2 <- function(image){

  if(!base::is.null(image)){

    if(!"Image" %in% base::class(image)){

      base::warning("Argument 'image' is neither NULL nor an object of class 'Image'. ")
      image_add_on <- NULL

    } else {

      image_raster <- grDevices::as.raster(image)

      image_info <-
        magick::image_read(image_raster) %>%
        magick::image_info()

      image_flipped <-
        magick::image_read(image_raster)

      image_add_on <-
        ggplot2::annotation_raster(raster = image_flipped,
                                   xmin = 0, ymin = 0,
                                   xmax = image_info$width,
                                   ymax = image_info$height)

    }

  }

}

#' @keywords internal
#' @export
hlpr_image_to_df <- function(img){

  grDevices::as.raster(img) %>%
    base::as.matrix() %>%
    reshape2::melt(formula = x ~ y, value.name = "colors") %>%
    tibble::as_tibble() %>%
    dplyr::rename(x = Var1, y = Var2)

}


#' @title Join with single value
#'
#' @export
#' @keywords internal
#' @export
hlpr_join_with_color_by <- function(object, df, color_by = NULL, variables = NULL, ...){

  if(!base::is.null(color_by)){

    variables <- color_by

  }

  if(base::length(variables) >= 1){

    for(var in variables){

      if(isGene(object, var)){

        df <- joinWith(object, spata_df = df, genes = var, ...)

      } else if(isGeneSet(object, var)){

        df <- joinWith(object, spata_df = df, gene_set = var, ...)

      } else if(isFeature(object, var)) {

        df <- joinWith(object, spata_df = df, features = var, ...)

      }

    }

  }

  return(df)

}

#' @export
#' @keywords internal
#' @export
hlpr_join_with_aes <- hlpr_join_with_color_by


#' @title Return customized ggplot:labs()
#'
#' @description Helper function
#'
#' @param input The color_to argument
#' @param input_str Title-prefix. Should be one of \emph{'Genes:', 'Gene set:'} or \code{'Feature:'}.
#' @param color_str Legend title
#' @param display_title Logical. If set to FALSE only the legend-title will be specified.
#'
#' @return A customized \code{ggplot2::labs()}-function.
#' @keywords internal
#' @export
hlpr_labs_add_on <- function(input,
                             input_str,
                             color_str,
                             display_title){

  if(base::isTRUE(display_title)){

    if(base::length(input) > 5){

      input <- c(input[1:5], stringr::str_c("... +", (base::length(input)-5), sep = " "))

    }

    input_clpsd <- stringr::str_c(input, collapse = ", ")

    plot_title <- stringr::str_c(input_str, input_clpsd, sep = " ")

   return(ggplot2::labs(title = plot_title, color = color_str))

  } else {

   return(ggplot2::labs(color = color_str))

  }

}



#' @title Normalize gene or gene set values
#'
#' @description Helper function to use within \code{purrr::imap()}
#'
#' @param variable The variable to normalize (if matches requirements).
#' @param var_name The name of the variable to smooth.
#' @param aspect Gene or Gene set
#' @param subset A character vector of variable names that are to be normalized
#' @param pb An R6 progress bar object.
#'
#' @return A normalized variable (data.frame within \code{purrr::imap()})
#' @keywords internal
#' @export
hlpr_normalize_imap <- function(variable,
                                var_name,
                                aspect,
                                subset){

  if(!base::is.numeric(variable) | !var_name %in% subset){

     return(variable)

  } else if(base::all(variable == 0)){

      if(var_name == "mean_genes"){

        var_name <- "average"

      }

      base::warning(stringr::str_c(aspect, var_name, "contains only 0s. Returning NULL.", sep = " "))
     return(NULL)

  } else if(base::length(base::unique(variable)) == 1){

      if(var_name == "mean_genes"){

        var_name <- "average"

      }

      base::warning(stringr::str_c(aspect, var_name, "is uniformly expressed. Returning NULL.", sep = " "))
     return(NULL)

  } else {

      # normalize variable
      res <-
        (variable - base::min(variable)) /
        (base::max(variable) - base::min(variable))

      if(!base::any(base::is.na(res))){

     return(res)

      } else {

        base::warning(stringr::str_c(aspect, var_name, "normalization resulted in NaNs. Returning NULL.", sep = " "))
       return(NULL)

      }

  }

}



#' @inherit hlpr_normalize_imap params title
#'
#' @return A normalized variable
#' @export
#' @keywords internal
#' @export
hlpr_normalize_vctr <- function(variable){

  res <-
    (variable - base::min(variable)) /
    (base::max(variable) - base::min(variable))

  if(base::any(base::is.na(res))){

   return(variable)

  } else {

   return(res)

  }


}


#' @title Number of distinct values equal to 1?
#'
#' @description Helper function to use within purrr::map_lgl. Returns
#' TRUE if the gene row of the provided rna assay is uniformly expressed.
#'
#' @param x Input variable.
#' @param rna_assay Expression matrix
#' @param pb Progress bar object
#'
#' @export
#'
#' @keywords internal
#' @export
hlpr_one_distinct <- function(x, rna_assay, pb = NULL, verbose = TRUE){

  if(!base::is.null(pb)){pb$tick()}

  res <- dplyr::n_distinct(rna_assay[x,]) == 1

 return(res)

}


#' @title Process spatial correlation results
#'
#' @description Helper function to be used in \code{clusterSpatialCorrelationResults()}.
#' Takes a cutree data.frame and a distance data.frame and returns a named list:
#'
#' \itemize{
#'  \item{\emph{assessment_df}: A data.frame that attempts to evaluate all cluster's
#'   quality by providing the average distance between it's genes.}
#'   \item{\emph{distances_list}: A named list of data.frames. Each data.frame contains the
#'   gene-gene distances between all genes the cluster it corresponds to contains.}
#'   \item{\emph{gene_names_list}: A named list of character vectors. Each vector contains the
#'   unique gene names of the cluster it corresponds to.}
#'   \item{\emph{k}: k-value}
#'   \item{\emph{h}: h-value}
#'   }
#'
#' @param cutree_df Data.frame of two variables: gene and cluster-belonging.
#' @param dist_df Data.frame of three variables: gene1, gene2, distance
#'
#' @keywords internal
#' @export
hlpr_process_spatial_correlation_cluster <- function(cutree_df, dist_df, input){

  cluster_list_genes <-
    purrr::map(.x = base::unique(cutree_df$cluster),
               cutree_df = cutree_df,
               .f = function(cluster, cutree_df){

                 dplyr::filter(.data = cutree_df, cluster == {{cluster}}) %>%
                   dplyr::pull(var = "genes")

               }) %>%
    purrr::set_names(nm = base::unique(cutree_df$cluster))


  cluster_list_distances <-
    purrr::map(.x = cluster_list_genes,
               dist_df = dist_df,
               .f = function(cluster_genes, dist_df){

                 arranged_dist_df <-
                   dplyr::filter(dist_df,
                                 gene1 %in% {{cluster_genes}} &
                                   gene2 %in% {{cluster_genes}}) %>%
                   dplyr::filter(gene1 != gene2) %>%
                   dplyr::arrange(distance) %>%
                   tibble::as_tibble()

               }) %>%
    purrr::keep(.p = ~ base::nrow(.x) >= 2)

  cluster_df_assessment <-
    purrr::map_df(.x = cluster_list_distances,
                  .f = function(dist_df){

                    dplyr::summarise(.data = dist_df,
                                     mean_distance = base::mean(distance),
                                     median_distance = stats::median(distance),
                                     n_genes = dplyr::n_distinct(gene1))


                  }) %>%
    dplyr::mutate(cluster = base::names(cluster_list_distances)) %>%
    dplyr::select(cluster, dplyr::everything()) %>%
    dplyr::arrange(mean_distance)

  cluster_list_genes <-
    purrr::map(.x = cluster_list_distances,
               .f = function(dist_df){

                 dplyr::select(dist_df, gene1, gene2) %>%
                   base::as.matrix() %>%
                   base::t() %>%
                   base::as.character() %>%
                   base::unique()

               })

  cluster_list <-
    list("assessment_df" = cluster_df_assessment,
         "distances_list" = cluster_list_distances,
         "gene_names_list" = cluster_list_genes,
         "h" = input$h,
         "k" = input$k,
         "method" = input$method)

 return(cluster_list)

}


#' @title Save spata object inside functions
#'
#' @inherit argument_dummy params
#' @inherit check_object params
#' @param object_file The directory under which to store the object.
#' @param ref_step Character value.
#'
#' @keywords internal
#' @export
hlpr_save_spata_object <- function(object, object_file, ref_step, verbose){

  if(!base::is.null(object_file)){

    if(base::isTRUE(verbose)){glue::glue(base::message("Step {ref_step}: Saving spata-object."))}

    base::saveRDS(object, file = object_file)

    if(base::isTRUE(verbose)){

      base::message(glue::glue("The spata-object has been saved under '{object_file}'."))
      base::message("Done.")

    }

  } else {

    if(base::isTRUE(verbose)){
      base::message(glue::glue("Skipping step {ref_step} (saving) as 'output_path' was set to NULL."))
      base::message("Done.")
    }

  }


}




#' @title Color to + scatterplot helper
#'
#' @inherit joinWith params
#' @param color_to Named list.
#'
#' @return A named list. Slot \emph{data} contains a data.frame as input for \code{data} of \code{ggplot2::ggplot()} and
#' slot \emph{add_on} contains a list of ggplot-add-ons.
#'
#' @keywords internal
#' @export
hlpr_scatterplot <- function(object,
                             spata_df,
                             color_to,
                             method_gs = "mean",
                             display_title = FALSE,
                             pt_size = 2,
                             pt_alpha = 1,
                             pt_clrsp = "inferno",
                             pt_clrp = "milo",
                             pt_clr = "black",
                             smooth = FALSE,
                             smooth_span = 0.02,
                             normalize = TRUE,
                             verbose = TRUE,
                             complete = FALSE,
                             ...){

  # if feature
  if("features" %in% base::names(color_to)){

    feature <- color_to$features

    spata_df <- joinWithFeatures(object = object,
                                 spata_df = spata_df,
                                 features = feature,
                                 smooth = smooth,
                                 smooth_span = smooth_span,
                                 verbose = verbose)

    if(is_subsetted_by_segment(object)){

      spata_df <-
        hlpr_add_old_coords(
          object = object,
          plot_df = spata_df,
          complete = complete
        )

    }

    # assemble ggplot add on
    ggplot_add_on <- list(
      ggplot2::geom_point(data = spata_df, size = pt_size, alpha = pt_alpha,
                          mapping = ggplot2::aes(color = .data[[feature]])),
      confuns::scale_color_add_on(aes = "color", clrsp = pt_clrsp, clrp = pt_clrp,
                                  variable = spata_df[[feature]], ...),
      hlpr_adjust_legend_size(aes = "color", pt_size = pt_size, variable = spata_df[[feature]]),
      ggplot2::labs(color = feature)
    )

    # if gene set
  } else if("gene_sets" %in% base::names(color_to)){

    gene_set <- color_to$gene_sets

    spata_df <- joinWithGeneSets(object = object,
                                 spata_df = spata_df,
                                 gene_sets = gene_set,
                                 method_gs = method_gs,
                                 smooth = smooth,
                                 smooth_span = smooth_span,
                                 normalize = normalize,
                                 verbose = verbose)

    # currently not in use
    if(is_subsetted_by_segment(object)){

      spata_df <-
        hlpr_add_old_coords(
          object = object,
          plot_df = spata_df,
          complete = complete
        )

    }


    # display informative title
    if(base::isTRUE(display_title)){

      title <-
        stringr::str_c("Gene set: ", gene_set, " (", method_gs, ")", sep = "")

    } else {

      title <- NULL

    }


    # assemble ggplot add-on
    ggplot_add_on <- list(
      ggplot2::geom_point(data = spata_df, size = pt_size, alpha = pt_alpha,
                          mapping = ggplot2::aes(color = .data[[gene_set]])),
      confuns::scale_color_add_on(aes = "color", clrsp = pt_clrsp, ...),
      ggplot2::labs(color = NULL, title = title, caption = gene_set)
    )

    # if genes
  } else if("genes" %in% base::names(color_to)){

    genes <- color_to$genes

    spata_df <- joinWithGenes(object = object,
                              spata_df = spata_df,
                              genes = color_to$genes,
                              average_genes = TRUE,
                              smooth = smooth,
                              smooth_span = smooth_span,
                              normalize = normalize,
                              verbose = verbose)

    # currently not in use
    if(is_subsetted_by_segment(object)){

      spata_df <-
        hlpr_add_old_coords(
          object = object,
          plot_df = spata_df,
          complete = complete
        )

    }


    if(base::isTRUE(display_title)){

      title <-
        glue::glue("Gene: {genes}",
                   genes = glue::glue_collapse(x = color_to$genes, sep = ", ", width = 7, last = " and "))

    } else {

      title <- NULL

    }


    # assemble ggplot add-on
    ggplot_add_on <- list(
      ggplot2::geom_point(data = spata_df, size = pt_size, alpha = pt_alpha,
                          mapping = ggplot2::aes(color = .data[["mean_genes"]])),
      confuns::scale_color_add_on(aes = "color", clrsp = pt_clrsp, ...),
      ggplot2::labs(color = base::ifelse(base:::length(genes) == 1, genes, "Mean\nExpr."), title = title)
    )

    # else if color_to has not been specified
  } else if("color" %in% base::names(color_to)){

    ggplot_add_on <-
      ggplot2::geom_point(data = spata_df, size = pt_size, alpha = pt_alpha, color = color_to$color)

  }

 return(
    list(data = spata_df,
         add_on = ggplot_add_on
    )
  )

}

#' @title Smooth variables spatially
#'
#' @description Helper function to be used within \code{purrr::imap()}
#'
#' @param variable The variable to smooth
#' @param var_name Name of the variable to smooth
#' @param coords_df Data.frame that contains x and y coordinates
#' @param smooth_span Span to smooth with
#' @param aspect Gene or Gene set
#' @param subset Vector of variable names to smooth
#' @param pb A progress-bar or NULL.
#'
#' @return Smoothed variable (data.frame within \code{purrr::imap()})
#' @export
#' @keywords internal
#' @export
hlpr_smooth <- function(variable,
                        var_name,
                        coords_df,
                        smooth_span,
                        aspect,
                        subset,
                        pb = NULL){

  if(!base::is.null(pb)){

    pb$tick()

  }

  data <-
    base::cbind(variable, coords_df[, c("x", "y")]) %>%
    magrittr::set_colnames(value = c("rv", "x", "y"))

  if(!var_name %in% subset){

   return(variable)

  } else if(!base::is.numeric(data$rv)){

    if(var_name == "mean_genes"){

      var_name <- "average"

    }

   return(variable)

  } else if(base::any(base::is.na(data$rv)) |
            base::any(base::is.nan(data$rv))|
            base::any(base::is.infinite(data$rv))){

    if(var_name == "mean_genes"){

      var_name <- "average"

    }

    n <- base::sum(!is_number(data$rv))

    mn <- base::min(data$rv[is_number(data$rv)])

    warning(
      glue::glue(
        "Exchanging {n} Inf/NA values with {mn} for smoothing."
      )
    )

    data$rv[!is_number(data$rv)] <- mn

    smooth_span <- smooth_span/10

    model <- stats::loess(formula = rv ~ x * y, data = data, span = smooth_span)

    out <- stats::predict(object = model)

    return(out)

  } else {

    smooth_span <- smooth_span/10

    model <- stats::loess(formula = rv ~ x * y, data = data, span = smooth_span)

    out <- stats::predict(object = model)

   return(out)

  }

}

#' @title Smooth variable spatially in mini-shiny-apps
#'
#' @description Helper function to use independently (or in a pipe)
#'
#' @inherit hlpr_smooth params
#'
#' @return Data.frame with the smoothed variable specified in \code{variable}.
#' @keywords internal
#' @export
hlpr_smooth_shiny <- function(variable,
                              coords_df,
                              smooth_span){

  base::colnames(coords_df)[base::which(base::colnames(coords_df) == variable)] <- "response_variable"

  if(base::is.numeric(coords_df$response_variable)){

    model <- stats::loess(formula = response_variable ~ x * y, span = smooth_span, data = coords_df)

    smoothed_df_prel <-
      broom::augment(model) %>%
      dplyr::select(x, y, .fitted) %>%
      magrittr::set_colnames(value = c("x", "y", variable))

    selected_df <- dplyr::select(coords_df, -c("x", "y", "response_variable"))

    smoothed_df <-
      base::cbind(smoothed_df_prel, selected_df) %>%
      dplyr::select(barcodes, sample, x, y, dplyr::everything()) %>%
      as.data.frame()


    # if coords_df derived from trajectory analysis
    if("trajectory_order" %in% base::colnames(coords_df)){

      smoothed_df$trajectory_order <- coords_df$trajectory_order

    }

    if(base::nrow(smoothed_df) == base::nrow(coords_df)){

     return(smoothed_df)

    } else {

      shiny::showNotification(ui = "Smoothing failed. Return original values.",
                              type = "warning")

     return(coords_df)

    }


  } else {

    shiny::showNotification(ui = "Can not smooth features that aren't of class 'numeric'. Skip smoothing.",
                            type = "warning")

    base::colnames(coords_df)[base::which(base::colnames(coords_df) == "response_variable")] <- variable

   return(coords_df)

  }

}


#' Subset the across-variables
#'
#' @description Checks across and across_subset input and if at least one
#' of the across_subset values exists filters the data accordingly.
#'
#' @param data A data.frame that contains the variable specified in \code{across}.
#' @param across Character value. Denotes the discrete variable in the data.frame
#' across which something is to be analyzed or displayed.
#' @param across_subset Character vector. The groups of interest that the \code{across}-
#' variable contains.
#'
#' @return A filtered data.frame, informative messages or an error.
#' @export
#'
#' @keywords internal
#' @export
hlpr_subset_across <- function(data, across, across_subset){


  if(base::is.null(across_subset)){

   return(data)

  } else {

    data[[across]] <- confuns::unfactor(data[[across]])

    ref.against <-
      glue::glue("'{across}'-variable of the specified spata-object") %>%
      base::as.character()

    across_subset <-
      confuns::check_vector(
        input = across_subset,
        against = base::unique(data[[across]]),
        verbose = TRUE,
        ref.input = "'across_subset'",
        ref.against = ref.against) %>%
      base::as.character()

    data <- dplyr::filter(.data = data, !!rlang::sym(across) %in% {{across_subset}})

   return(data)

  }

}



#' @title Join and summarize compiled trajectory data.frames
#'
#' @description Joins a compiled trajectory data.frame with
#' the desired information and summarizes those.
#'
#' @inherit argument_dummy params
#' @inherit check_compiled_trajectory_df params
#' @inherit check_method params
#' @inherit check_object params
#' @inherit check_trajectory_binwidth params
#' @inherit check_variables params
#' @inherit normalize params
#'
#' @details Initially the compiled trajectory data.frame of the specified trajectory
#' is joined with the respective input of variables via \code{joinWithVariables()}.
#'
#' The argument \code{binwidth} refers to the amount of which the barcode-spots of the
#' given trajectory will be summarized with regards to the trajectory's direction:
#' The amount of \code{binwidth} and the previously specified 'trajectory width' in \code{createTrajectories()}
#' determine the length and width of the sub-rectangles in which the rectangle the
#' trajectory embraces are splitted and in which all barcode-spots are binned.
#' Via \code{dplyr::summarize()} the variable-means of every sub-rectangle are calculated.
#' These mean-values are then arranged according to the trajectory's direction.
#'
#' Eventually the data.frame is shifted via \code{tidyr::pivot_longer()} to a data.frame in which
#' every observation refers to the mean-value of one of the specified variable-elements (e.g. a specified
#' gene set) of the particular sub-rectangle. The returned data.frame contains the following variables:
#'
#' \itemize{
#'  \item{\emph{trajectory_part}: Character. Specifies the trajectory's sub-part of the observation. (Negligible if there is
#'  only one trajectory part.)}
#'  \item{\emph{trajectory_part_order}: Numeric. Indicates the order within the trajectory-part. (Negligible if there is
#'  only one trajectory part.)}
#'  \item{\emph{trajectory_order}: Numeric. Indicates the order within the whole trajectory.}
#'  \item{\emph{gene_sets, genes or features}: Character. The respective gene sets, gene or feature the value refers to.}
#'  \item{\emph{values}: Numeric. The actual summarized values.}}
#'
#' @export
#' @keywords internal
#' @export
hlpr_summarize_trajectory_df <- function(object,
                                         ctdf,
                                         binwidth = 5,
                                         variables,
                                         whole_sample = FALSE,
                                         method_gs = "mean",
                                         verbose = TRUE,
                                         summarize_with = c("mean"),
                                         with_sd = TRUE,
                                         drop_all_na = TRUE,
                                         normalize = FALSE){


  # 1. Control --------------------------------------------------------------

  # lazy check
  check_object(object)
  check_compiled_trajectory_df(ctdf = ctdf)

  stopifnot(base::is.numeric(binwidth))
  stopifnot(base::is.character(variables))
  stopifnot(base::is.logical(verbose))

  # adjusting check

  variables <- check_variables(variables = variables,
                               all_features = getFeatureNames(object, of_class = c("numeric", "integer")),
                               all_gene_sets = getGeneSets(object),
                               all_genes = getGenes(object),
                               max_slots = 3,
                               max_length = Inf)

  variables_list <- variables

  if("features" %in% base::names(variables)){

    variables[["features"]] <- check_features(object = object,
                                     features = variables[["features"]],
                                     valid_classes = c("numeric", "integer"))

  }

  # -----

  # 2. Summarize and join compiled trajectory data.frame --------------------

  var_df <-
    joinWithVariables(
      object = object,
      variables = variables,
      method_gs = method_gs,
      average_genes = FALSE,
      smooth = FALSE,
      normalize = FALSE,
      verbose = verbose
    )

  # join data.frame with variables
  joined_df <-
    dplyr::mutate(
      .data = ctdf,
      order_binned = plyr::round_any(
        x = projection_length,
        accuracy = binwidth,
        f = base::floor
        )
      ) %>%
    dplyr::left_join(
      x = .,
      y = var_df %>% dplyr::select(-x, -y, -sample),
      by = "barcodes"
    )


  # keep only variables that were successfully joined
  variables <- base::unlist(variables, use.names = FALSE)
  variables <- variables[variables %in% base::colnames(joined_df)]

  # summarize data.frame
  confuns::give_feedback(
    msg = glue::glue("Summarizing trajectory data.frame with {summarize_with}."),
    verbose = verbose
  )

  summarized_df <-
    dplyr::group_by(.data = joined_df, trajectory_part, order_binned) %>%
    dplyr::summarise(
      dplyr::across(
        .cols = dplyr::all_of(x = variables),
        .fns = list(
          mean = ~ mean(.x, na.rm = TRUE),
          median = ~ median(.x, na.rm = TRUE)
        )[[summarize_with]]
      ),
      .groups = "drop_last") %>%
    dplyr::mutate(trajectory_part_order = dplyr::row_number()) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(trajectory_order = dplyr::row_number()) %>%
    dplyr::select(-order_binned)

  if(base::isTRUE(normalize)){

    if(base::isTRUE(whole_sample)){

      confuns::give_feedback(
        msg = "Normalizing values by 'whole sample'.",
        verbose = verbose
      )

      pb <- confuns::create_progress_bar(total = base::length(variables))

      for(var_name in variables){

        if(base::isTRUE(verbose)){ pb$tick() }

        range_var <- base::range(var_df[[var_name]], na.rm = TRUE)

        var <- summarized_df[[var_name]]

        ref_min <- base::min(range_var)
        ref_max <- base::max(range_var)

        summarized_df[[var_name]] <- (var - ref_min)/(ref_max - ref_min)

      }

      shifted_df <-
        tidyr::pivot_longer(
          data = summarized_df,
          cols = dplyr::all_of(x = variables),
          names_to = "variables",
          values_to = "values"
        )

    } else {

      confuns::give_feedback(
        msg = "Normalizing values by 'trajectory'.",
        verbose = verbose
      )

      shifted_df <-
        tidyr::pivot_longer(
          data = summarized_df,
          cols = dplyr::all_of(x = variables),
          names_to = "variables",
          values_to = "values"
        ) %>%
        dplyr::group_by(variables) %>%
        dplyr::mutate(values = confuns::normalize(x = values)) %>%
        dplyr::ungroup()

    }

  } else {

    shifted_df <-
      tidyr::pivot_longer(
        data = summarized_df,
        cols = dplyr::all_of(variables),
        names_to = "variables",
        values_to = "values"
      )

  }

  if(base::isTRUE(with_sd)){

    give_feedback(
      msg = "Summarizing standard deviation.",
      verbose = verbose
    )

    sd_df <-
      dplyr::group_by(.data = joined_df, trajectory_part, order_binned) %>%
      dplyr::summarise(
        dplyr::across(
          .cols = dplyr::all_of(x = variables),
          .fns = ~ stats::sd(.x, na.rm = TRUE)
        ),
        .groups = "drop_last"
      ) %>%
      dplyr::mutate(trajectory_part_order = dplyr::row_number()) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(trajectory_order = dplyr::row_number()) %>%
      dplyr::select(-order_binned) %>%
      tidyr::pivot_longer(
        cols = dplyr::all_of(variables),
        names_to = "variables",
        values_to = "values_sd"
      )

    shifted_df <-
      dplyr::left_join(
        x = shifted_df,
        y = sd_df[,c("trajectory_order", "variables", "values_sd")],
        by = c("trajectory_order", "variables")
      )

  }

  if(base::isTRUE(FALSE)){

    df <- hlpr_drop_all_na(df = shifted_df, verbose= verbose)

  }


  confuns::give_feedback(msg = "Done.", verbose = verbose)

  # -----

 return(shifted_df)

}


#' @keywords internal
#' @export
hlpr_transfer_slot_content <- function(recipient, donor, skip = character(0), verbose = TRUE){

  snames_rec <- methods::slotNames(recipient)
  snames_don <- methods::slotNames(donor)

  for(snr in snames_rec){

    if(snr %in% snames_don & !snr %in% skip){

      give_feedback(
        msg = glue::glue("Transferring content of slot '{snr}'."),
        with.time = FALSE,
        verbose = verbose
      )

      recipient <-
        base::tryCatch({

          methods::slot(recipient, name = snr) <-
            methods::slot(donor, name = snr)

          recipient

        }, error = function(error){

          give_feedback(msg = error$message, verbose = verbose, with.time = FALSE)

          recipient

        })

    }

  }

  return(recipient)

}



#' @title Perform vector projection
#'
#' @description Helper function for trajectory-analysis to use within
#' \code{dplyr::mutate()}. Performs vector-projection with a spatial position
#' and a local coordinates system to arrange the barcodes that fall into a
#' trajectory square according to the trajectory direction.
#'
#' @param lcs A data.frame specifying the local coordinates system with variables
#' \code{x, y, xend, yend} and the observations \emph{local length axis} and
#' \emph{local width axis}.
#' @param x_coordinate x-coordinate
#' @param y_coordinate y-coordinate
#'
#' @return The projected length.
#'
#' @export
#' @keywords internal
#' @export
hlpr_vector_projection <- function(lcs, x_coordinate, y_coordinate){

  # vector from point of interest to origin of local coord system: 'vto'
  vto <- c((x_coordinate - lcs$x[1]), (y_coordinate - lcs$y[1]))

  # define local length axis (= relocated trajectory): 'lla'
  lla <- c((lcs$xend[1] - lcs$x[1]), (lcs$yend[1] - lcs$y[1]))

  # define lambda coefficient
  lambda <-
    ((vto[1] * lla[1]) + (vto[2] * lla[2])) / base::sqrt((lla[1])^2 + (lla[2])^2)^2

  # projecting vector on length axis
  pv <- lambda * (lla)

  # compute the length of the projected vector
  res <- base::sqrt((pv[1])^2 + (pv[2])^2)

 return(res)


}



#' @title Widen trajectory data.frame
#'
#' @param stdf A summarized trajectory data.frame - output
#' from \code{hlpr_summarize_trajectory_df()}.
#' @param variable Character value. The variable of \code{stdf}:
#' \emph{'gene_sets', 'genes'} or \emph{'features'}.
#'
#'
#' @return A widened data.frame in which  every observation is a
#' trajectory and every variable describes the value of the trajectory
#' at a specific position.
#'
#' @export
#'
#' @keywords internal
#' @export
hlpr_widen_trajectory_df <- function(stdf,
                                     variable){

  tidyr::pivot_wider(data = tdf,
                     id_cols = dplyr::all_of(c("trajectory_order", variable)),
                     names_from = dplyr::all_of(c("trajectory_part", "trajectory_order")),
                     values_from = "values")


}
kueckelj/SPATA2 documentation built on March 16, 2024, 10:25 a.m.