R/traj_utils.R

Defines functions add_data_to_model calculate_traj_metrics cal_cor_dist .change_info

Documented in add_data_to_model cal_cor_dist calculate_traj_metrics

.change_info <- function(model_ref, match_result){

  model_ref$cell_ids <- match_result[["cell_pair"]][["simulation"]]
  model_ref[["progressions"]][["cell_id"]] <- match_result[["cell_pair"]][["simulation"]]
  times <- dim(model_ref[["milestone_percentages"]])[1]/length(model_ref[["cell_ids"]])
  model_ref[["milestone_percentages"]][["cell_id"]] <- rep(match_result[["cell_pair"]][["simulation"]], times)

  return(model_ref)

}


#' Calculate the Correlation Between Geodesic Distances
#'
#' This function calculate the correlation between geodesic distances which refer
#' to the relative distance of one cell to all other cell in the trajectory. The
#' result is obtained from the mean values of choosing 0.05, 0.1, 0.15, 0.2,
#' 0.3, 0.35, 0.4, 0.45, 0.5 percent cells as waypoints.
#'
#'
#' @param model_ref,model_sim A matrix.
#' @param match_result The result generated by \code{\link[simutils]{match_cells}} function.
#' @return A value ranged from 0 to 1
#' @export
#' @importFrom dplyr arrange desc
#' @importFrom stats quantile
#'
#' @examples
#' # Check the docker status
#' # dynwrap::test_docker_installation(detailed = TRUE)
#'
#' # Open Terminal and execute the command
#' # docker pull dynverse/ti_slingshot:v1.0.3
#'
#' # Generate a reference data
#' set.seed(1)
#' a <- matrix(rpois(n = 2500, lambda = 2), nrow = 50)
#' rownames(a) <- paste0("cell_", 1:ncol(a))
#' colnames(a) <- paste0("gene_", 1:nrow(a))
#' dataset_ref <- dynwrap::wrap_expression(
#'   counts = a,
#'   expression = log2(a+1)
#' )
#' # Trajectory inference

#'
#' #Generate a simulation data
#' set.seed(1)
#' b <- matrix(rpois(n = 2500, lambda = 1.5), nrow = 50)
#' rownames(b) <- paste0("fcell_", 1:ncol(b))
#' colnames(b) <- paste0("fgene_", 1:nrow(b))
#' dataset_sim <- dynwrap::wrap_expression(
#'   counts = b,
#'   expression = log2(b+1)
#' )
#' # Trajectory inference

#'
#' # Match cells
#' match_result <- match_cells(ref_data = dataset_ref,
#'                             sim_data = dataset_sim)


cal_cor_dist <- function(model_ref, model_sim, match_result){
  m <- arrange(match_result$cell_pair,
               desc(match_result$cell_pair$match_value))
  model_ref <- .change_info(model_ref = model_ref,
                            match_result = match_result)
  model_sim <- dynwrap::add_cell_waypoints(trajectory = model_sim,
                                           num_cells_selected = 50)
  model_ref <- dynwrap::add_cell_waypoints(trajectory = model_ref,
                                           num_cells_selected = 50)
  waypoints_num <- round(quantile(1:length(model_ref[["cell_ids"]]), seq(0.05, 0.5, 0.05)))
  record <- data.frame()
  for(cell_num in waypoints_num){
    cat(paste0("Waypoints of ", cell_num, "\n"))
    waypoints_cell <- m$simulation[1:cell_num]
    model_sim$waypoint_cells <- waypoints_cell
    model_ref$waypoint_cells <- waypoints_cell
    Correlation <- dyneval::calculate_metrics(dataset = model_ref,
                                              model = model_sim,
                                              metrics = "correlation")
    record <- rbind(record, Correlation)
  }
  return(cor_dist = list(cor_dist = mean(record$correlation),
                         every_result = record$correlation))
}


#' Calculate Four Metrics to Compare Two Trajectories
#'
#' @param model_ref,model_sim The object generated by \code{\link[dynwrap]{infer_trajectory}}.
#' @param algorithm Optional. Which algorithm used for matching cells in simulated and real data. Improved_Hungarian (default), Hungarian.
#'
#' @return A list containing the results of four metrics.
#' @export
#'
#' @examples
#' # Check the docker status
#' # dynwrap::test_docker_installation(detailed = TRUE)
#'
#' # Generate a reference data
#' set.seed(1)
#' a <- matrix(rpois(n = 2500, lambda = 2), nrow = 50)
#' rownames(a) <- paste0("cell_", 1:ncol(a))
#' colnames(a) <- paste0("gene_", 1:nrow(a))
#' dataset_ref <- dynwrap::wrap_expression(
#'   counts = a,
#'   expression = log2(a+1)
#' )

#' #Generate a simulation data
#' set.seed(1)
#' b <- matrix(rpois(n = 2500, lambda = 1.5), nrow = 50)
#' rownames(b) <- paste0("fcell_", 1:ncol(b))
#' colnames(b) <- paste0("fgene_", 1:nrow(b))
#' dataset_sim <- dynwrap::wrap_expression(
#'   counts = b,
#'   expression = log2(b+1)
#' )


calculate_traj_metrics <- function(model_ref,
                                   model_sim,
                                   algorithm){
  ## Match cells
  message("Match cells in simulated and real data...")
  match_result <- match_cells(ref_data = model_ref,
                              sim_data = model_sim,
                              algorithm = algorithm)

  ## Calculate correlation
  message("Calculating correlation of distances...")
  cor_dist <- cal_cor_dist(model_ref = model_ref,
                           model_sim = model_sim,
                           match_result = match_result)

  ## Change information in reference data
  message("Change information in reference data...")
  model_ref <- .change_info(model_ref = model_ref,
                            match_result = match_result)

  ## Calculate metrics
  message("Calculating HIM...")
  him <- dyneval::calculate_metrics(dataset = model_ref,
                                    model = model_sim,
                                    metrics = "him")
  message("Calculating F1 branches...")
  f1_branches <- dyneval::calculate_metrics(dataset = model_ref,
                                            model = model_sim,
                                            metrics = "F1_branches")
  message("Calculating F1 milestones...")
  f1_milestones <- dyneval::calculate_metrics(dataset = model_ref,
                                              model = model_sim,
                                              metrics = "F1_milestones")
  message("Done...")
  return(dplyr::lst(HIM = him %>% dplyr::pull(him) %>% as.numeric(),
                    F1_branches = f1_branches %>% dplyr::pull(F1_branches),
                    F1_milestones = f1_milestones %>% dplyr::pull(F1_milestones),
                    Cor_dist = list(cor_dist = cor_dist,
                                    match_result = match_result))
         )
}


#' Add Gene Expression Data to Model
#'
#' This function is used to add the gene expression data into the result of
#' trajectory inference generated by \code{\link[dynwrap]{infer_trajectory}} function.
#'
#' @param model A dynwrap::with_trajectory object generated by \code{\link[dynwrap]{infer_trajectory}}.
#' function.
#' @param dataset A dynwrap::data_wrapper object generated by \code{\link[dynwrap]{infer_trajectory}}.
#' function.
#'
#' @return A new dynwrap::with_trajectory object.
#' @export
#'
#' @examples
#' # Generate a reference data
#' set.seed(1)
#' a <- matrix(rpois(n = 2500, lambda = 2), nrow = 50)
#' rownames(a) <- paste0("cell", 1:ncol(a))
#' colnames(a) <- paste0("gene", 1:nrow(a))
#' dataset_ref <- dynwrap::wrap_expression(
#'   counts = a,
#'   expression = log2(a+1)
#' )
#' # Trajectory inference

add_data_to_model <- function(model, dataset){

  if(!requireNamespace("checkmate", quietly = TRUE)){
    message("Install checkmate...")
    install.packages('checkmate')
  }
  checkmate::assertClass(model, "dynwrap::with_trajectory")
  checkmate::assertClass(dataset, "dynwrap::data_wrapper")
  if(is.null(model[["counts"]])){
    model <- dynwrap::add_expression(dataset = model,
                                     counts = dataset[['counts']])
  }
  if(is.null(model[["expression"]])){
    model <- dynwrap::add_expression(dataset = model,
                                     expression = dataset[['expression']])
  }
  return(model)
}
duohongrui/simutils documentation built on March 12, 2024, 8:40 p.m.