R/make_deconv_pred_obs_data.R

Defines functions make_deconv_pred_obs_data

Documented in make_deconv_pred_obs_data

#' Make tidy data for use in deconvolution plots
#'
#' Produces data with varied deconvolution performance metrics.
#' @param dat data.frame with predictions as columns, each row should be a prediction for a given sample and given group/celltype
#' @param true_values_col A string with the name of the column with the true values in `dat`. true values should be between 0 and 1.
#' @param predicted_cols A vector of strings with the name of the columns with the predictions for different methods in `dat`. predictions should be between 0 and 1
#' @param sample_id_col A string with the name of the column with the sample name or ID in `dat`.
#' @param group_col A string with the name of the column containing the cell types or groups in `dat`. group col should be a factor, otherwise the function will make it a factor
#' @return tibble with tidied up deconvolution performance data in nested fields
#' @export
make_deconv_pred_obs_data <- function(
    dat,
    true_values_col,
    predicted_cols,
    sample_id_col,
    group_col) {
  dfit <- data <- NULL

  assertthat::assert_that(is.data.frame(dat))
  assertthat::assert_that(
    all(c(group_col, predicted_cols, true_values_col) %in% colnames(dat))
  )
  assertthat::assert_that(all(dat[, c(predicted_cols, true_values_col)] >= 0))
  assertthat::assert_that(all(dat[, c(predicted_cols, true_values_col)] <= 1))

  # if grouping column is not a factor, make it a factor
  if (!is.factor(dat[[group_col]])) {
    dat <- dat |> dplyr::mutate("{group_col}" := as.factor(!!dplyr::sym(group_col)))
  }
  assertthat::assert_that(is.factor(dat[[group_col]]))

  # make data tbl for each prediction
  names(predicted_cols) <- predicted_cols

  tidy_dat <-
    purrr::map_dfr(.x = predicted_cols, .id = "method", .f = function(pcol) {
      tidy_subset_dat <-
        dat |>
        tibble::as_tibble() |>
        tidyr::nest(data = -tidyr::all_of(group_col)) |>
        dplyr::mutate(
          # fit on the prediction values
          dfit = purrr::map(data, ~ lm(!!dplyr::sym(pcol) ~ !!dplyr::sym(true_values_col), data = .x)),
          tidied = purrr::map(dfit, broom::tidy),
          glanced = purrr::map(dfit, broom::glance),
          augmented = purrr::map(dfit, broom::augment),
          # metrics (RMSE, R2, AIC)
          metrics = purrr::map_dfr(data, ~ prediction_stats(
            expected_values = .x[[true_values_col]],
            predicted_values = .x[[pcol]]
          ))
        )
      return(tidy_subset_dat)
    })
  return(tidy_dat)
}

Try the CimpleG package in your browser

Any scripts or data that you put into this service are public.

CimpleG documentation built on Dec. 7, 2025, 1:07 a.m.