#' Single imputation
#'
#' Performs a single imputation run and returns the data with NA values replaced
#' by imputed values.
#'
#' @param data a `data.frame` to perform the imputation on, missing values should
#' be `NA`.
#' @param design a design or model matrix as produced by
#' \code{\link[stats]{model.matrix}} with column names corresponding to the
#' different conditions.
#' @param id_col a character for the name of the column containing the
#' name of the features in data (e.g., peptides, proteins, etc.).
#'
#' @return a `data.frame` with `NA` values replaced by imputed values.
#' @export
#'
#' @examples
#' # Generate a design matrix for the data
#' design <- model.matrix(~ 0 + factor(rep(1:2, each = 3)))
#'
#' # Set correct colnames, this is important for fit_gamma_weights
#' colnames(design) <- paste0("ng", c(50, 100))
#'
#' yeast %>%
#' # Normalize and log-transform the data
#' psrn("identifier") %>%
#' # Run the imputation
#' single_imputation(design, "identifier")
single_imputation <- function(data,
design,
id_col = "id") {
conditions <- design %>%
get_conditions()
data <- data %>%
tibble::rowid_to_column()
gamma_reg <- data %>%
fit_gamma_imputation(design, id_col)
LOQ <- data %>%
purrr::keep(is.numeric) %>%
unlist(T, F) %>%
{quantile(., .25, na.rm = T) - 1.5*IQR(., na.rm = T)} %>%
unname()
missing_data <- data %>%
dplyr::filter(dplyr::if_any(where(is.numeric), is.na))
complete_data <- data %>%
dplyr::filter(dplyr::if_all(where(is.numeric), purrr::negate(is.na)))
char_cols <- missing_data %>%
dplyr::select(where(is.character), rowid)
order <- data %>%
colnames()
data %>%
prep_data_for_imputation(conditions, gamma_reg, LOQ) %>%
impute(char_cols, order) %>%
dplyr::bind_rows(complete_data) %>%
dplyr::arrange(rowid) %>%
dplyr::select(-rowid)
}
impute <- function(data, char_cols, order) {
data %>%
purrr::map(
dplyr::mutate,
data = purrr::pmap(list(mean, sd, data), impute_row)
) %>%
purrr::map(dplyr::select, data) %>%
purrr::map(tidyr::unnest, data) %>%
dplyr::bind_cols(char_cols) %>%
dplyr::select(dplyr::all_of(order))
}
impute_nest <- function(data, condition, gamma_reg_model, LOQ) {
if (anyNA(data)) {
data <- data %>%
tibble::rownames_to_column(var = "tmp_id") %>%
dplyr::mutate(
mean = rowMeans(dplyr::across(where(is.numeric)), na.rm = T),
mean = tidyr::replace_na(mean, LOQ)
)
data[["sd"]] <- stats::predict(gamma_reg_model[[condition]], data, type = "response")
data %>%
tidyr::nest(data = -c(mean, sd, tmp_id))
} else {
return(data)
}
}
impute_row <- function(mean, sd, data) {
if (!anyNA(data)) {
return(data)
} else {
data <- as.data.frame(data)
data[is.na(data)] <- stats::rnorm(n = sum(is.na(data)), mean = mean, sd = sd)
return(data)
}
}
prep_data_for_imputation <- function(data, conditions, gamma_reg_imputation, LOQ) {
data %>%
purrr::keep(is.numeric) %>%
dplyr::filter(dplyr::if_any(everything(), is.na)) %>%
split.default(stringr::str_extract(names(.), conditions)) %>%
purrr::imap(impute_nest, gamma_reg_imputation, LOQ)
}
estimate_loq <- function(data) {
data %>%
purrr::keep(is.numeric) %>%
unlist(T, F) %>%
{
stats::quantile(., .25, na.rm = T) - 1.5 * stats::IQR(., na.rm = T)
} %>%
unname()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.