R/validate.R

Defines functions validate

Documented in validate

#' Perform cross-validation for the ranking.
#'
#' This function reoptimizes the ranking leaving out one of the original
#' reference genes at a time.
#'
#' @param ranking The ranking to validate.
#' @param reference_gene_ids The reference gene IDs whose ranking should be
#'   validated.
#' @param method_ids IDs of the methods that were used.
#' @param progress An optional progress function that should accept a single
#'   value between 0.0 and 1.0 for progress information.
#'
#' @returns A validation object with the following items:
#'   \describe{
#'     \item{`validation`}{A `data.table` containing percentiles of the
#'       comparison genes from the original ranking as well as their validation.
#'     }
#'     \item{`mean_score`}{The mean score of the genes.}
#'     \item{`mean_percentile_original`}{The mean percentile of the genes in
#'       the original ranking.
#'     }
#'     \item{`mean_percentile_validation`}{The mean percentile of the genes
#'       when optimizing without themselves.
#'     }
#'     \item{`mean_error`}{The mean absolute error.}
#'   }
#'
#' @export
validate <- function(ranking, reference_gene_ids, method_ids, progress = NULL) {
  if (!inherits(ranking, "geposan_ranking")) {
    stop("Ranking is invalid. Use geposan::ranking().")
  }

  if (is.null(progress)) {
    progress_bar <- progress::progress_bar$new()
    progress_bar$update(0.0)

    progress <- function(progress_value) {
      if (!progress_bar$finished) {
        progress_bar$update(progress_value)
        if (progress_value >= 1.0) {
          progress_bar$terminate()
        }
      }
    }
  }

  progress_state <- 0.0
  progress_step <- 1.0 / length(reference_gene_ids)

  results <- ranking[gene %chin% reference_gene_ids, .(gene, percentile)]

  for (gene_id in reference_gene_ids) {
    included_gene_ids <- reference_gene_ids[
      reference_gene_ids != gene_id
    ]

    weights <- optimal_weights(
      ranking,
      method_ids,
      included_gene_ids
    )

    ranking_validation <- ranking(ranking, weights)

    results[
      gene == gene_id,
      percentile_validation := ranking_validation[
        gene == gene_id,
        percentile
      ]
    ]

    if (!is.null(progress)) {
      progress_state <- progress_state + progress_step
      progress(progress_state)
    }
  }

  results[, error := percentile - percentile_validation]
  setorder(results, error)

  structure(
    list(
      validation = results,
      mean_percentile_original = results[, mean(percentile)],
      mean_percentile_validation = results[, mean(percentile_validation)],
      mean_error = results[, mean(error)]
    ),
    class = "geposan_validation"
  )
}

#' S3 method to print a validation object.
#'
#' @param x The validation to print.
#' @param ... Other parameters.
#'
#' @seealso [validate()]
#'
#' @export
print.geposan_validation <- function(x, ...) {
  cat(sprintf(
    paste0(
      "geposan validation:",
      "\n  Mean percentile original: %.1f%%",
      "\n  Mean percentile validation: %.1f%%",
      "\n  Mean error: %.1f percent points",
      "\n"
    ),
    x$mean_percentile_original * 100,
    x$mean_percentile_validation * 100,
    x$mean_error * 100
  ))

  invisible(x)
}
johrpan/geposan documentation built on Feb. 28, 2025, 3:48 a.m.