#' compute group mean by LOD
#'
#' weight lod by nr of NA's $(LOD * nrNas + meanAbundance *nrObs)/(nrMeasured)$
#'
#' @export
#' @examples
#' Contrasts <- c("group.b-a" = "group_A - group_B", "group.a-ctrl" = "group_A - group_Ctrl")
#' dd <- prolfqua::sim_lfq_data_protein_config(Nprot = 100,weight_missing = 2)
#' mh <- prolfqua::MissingHelpers$new(dd$data, dd$config, prob = 0.8,weighted = TRUE)
#' xx <- mh$get_stats()
#' xx <- mh$get_LOD()
#' xx <- mh$impute_weighted_lod()
#' xx <- mh$impute_lod()
#' xx <- mh$get_poolvar()
#' bb <- mh$get_contrast_estimates(Contrasts)
#' mh$get_contrasts(Contrasts)
#'
#' dd <- prolfqua::sim_lfq_data_2Factor_config(Nprot = 100,weight_missing = 0.1)
#'
#' Contrasts <- c("c1" = "TreatmentA - TreatmentB",
#' "C2" = "BackgroundX- BackgroundZ",
#' "c3" = "`TreatmentA:BackgroundX` - `TreatmentA:BackgroundZ`",
#' "c4" = "`TreatmentB:BackgroundX` - `TreatmentB:BackgroundZ`"
#' )
#' mh <- prolfqua::MissingHelpers$new(dd$data, dd$config, prob = 0.8,weighted = TRUE)
#' mh$get_stats()$interaction |> table()
#' mh$get_contrast_estimates(Contrasts)
#'
MissingHelpers <- R6::R6Class(
"MissingHelpers",
public = list(
#' @field data data
data = NULL,
#' @field config config
config = NULL,
#' @field prob quantile of groups with one observed value to estimate LOD
prob = 0.5,
#' @field stats data.frame with group statistics
stats = NULL,
#' @field weighted should we weight the LOD
weighted = TRUE,
# constructor/initializer
#' @description
#' initialize
#' @param data data
#' @param config config
#' @param prob default 0.5, median of groups with one observed value
#' @param weighted should group average be computed used weighting, default TRUE.
initialize = function(data, config, prob = 0.5, weighted = TRUE)
{
self$data = data
self$config = config
self$prob = prob
self$weighted = weighted
},
#' @description
#' get data.frame with statistics
#' @return data.frame
get_stats = function(){
if (is.null(self$stats)) {
self$stats = prolfqua::summarize_stats_factors(self$data, self$config)
}
return(self$stats)
},
#' @description
#' determine limit of detection
#' computes quantile of abundances in groups with a single observation
#' @return integer LOD
get_LOD = function(){
LOD <- self$get_stats() |> dplyr::filter(nrMeasured == 1) |>
dplyr::summarize(LOD = quantile(meanAbundance, probs = self$prob ,na.rm = TRUE)) |>
pull()
return(LOD)
},
#' @description
#' compute group averages using weighted lod
#'
impute_weighted_lod = function(){
toimp <- self$get_stats()
toimp$meanAbundanceZero <- ifelse(is.na(toimp$meanAbundance), 0, toimp$meanAbundance)
impDat <- toimp |> mutate(meanAbundanceImp = (.data$nrMeasured * .data$meanAbundanceZero + .data$nrNAs * self$get_LOD()) / .data$nrReplicates )
return(impDat)
},
#' @description
#' if group average absent substitute with LOD
#'
impute_lod = function(){
toimp <- self$get_stats()
toimp$meanAbundanceImp <- ifelse(is.na(toimp$meanAbundance), self$get_LOD(), toimp$meanAbundance)
return(toimp)
},
#' @description
#' compute pooled var per protein
#' @param prob prob of sd from proteins where it can be computed
get_poolvar = function(prob = 0.75) {
if (self$weighted) {
impDat <- self$impute_weighted_lod()
} else {
impDat <- self$impute_lod()
}
pooled <- prolfqua::poolvar(impDat, self$config, method = "V1")
pooled <- dplyr::select(pooled ,-all_of(c(self$config$table$factor_keys_depth()[1],"var")))
pooled_zero <- pooled[pooled$df > 0 & pooled$sd > 0,]
meandf <- pooled_zero |> summarize(
n = 1, df = 1,
sd = quantile(sd, prob = prob, na.rm = TRUE),
sdT = quantile(sdT, prob = prob, na.rm = TRUE))
minsd <- 1
meandf$sd <- ifelse(meandf$sd > 0, meandf$sd, minsd)
meandf$sdT <- ifelse(meandf$sdT > 0, meandf$sdT, minsd)
pooled <- pooled |> mutate(sd = ifelse(is.na(sd) | sd == 0 ,meandf$sd, sd))
pooled <- pooled |> mutate(sdT = ifelse(is.na(sdT) | sdT == 0, meandf$sdT, sdT ))
pooled <- pooled |> mutate(df = ifelse(df == 0, 1, df))
return(pooled)
},
#' @description
#' get contrast estimates
#' @param Contrasts named array with contrasts
get_contrast_estimates = function(
Contrasts
){
if (self$weighted) {
lt <- self$impute_weighted_lod()
} else {
lt <- self$impute_lod()
}
abundance_column = "meanAbundanceImp"
hierarchy_keys <- self$config$table$hierarchy_keys()
imp <- lt |> pivot_wider(id_cols = hierarchy_keys,
names_from = interaction,
values_from = !!sym(abundance_column))
imputed <- prolfqua::get_contrast(ungroup(imp), hierarchy_keys, Contrasts)
imputed$avgAbd <- (imputed$group_1 + imputed$group_2)/2
imputed <- imputed |> dplyr::rename(
!!paste0(abundance_column,"_group_1") := "group_1",
!!paste0(abundance_column, "_group_2") := "group_2")
nr <- lt |> mutate(is_missing = ifelse( .data$nrNAs == .data$nrReplicates , 1 , 0) )
nr <- nr |> pivot_wider(id_cols = hierarchy_keys, names_from = interaction, values_from = .data$is_missing)
nrs <- prolfqua::get_contrast(ungroup(nr), hierarchy_keys, Contrasts)
nrs <- nrs |> select(all_of(c(hierarchy_keys,"contrast", "estimate" )))
nrs <- nrs |> rename(indic = estimate)
imputed <- inner_join(imputed, nrs, by = c(hierarchy_keys, "contrast"))
nrMeasured <- lt |> pivot_wider(id_cols = hierarchy_keys,
names_from = interaction,
values_from = nrMeasured)
nrMeasured <- prolfqua::get_contrast(ungroup(nrMeasured), hierarchy_keys, Contrasts)
nrMeasured <- nrMeasured |> select(all_of(c(hierarchy_keys, "contrast", nrMeasured_group_1 = "group_1", nrMeasured_group_2 = "group_2")))
imputed <- inner_join(imputed, nrMeasured, by = c(hierarchy_keys, "contrast"))
imputed2 <- imputed |> mutate(estimate = ifelse(.data$indic < 0 & .data$estimate < 0, 0, .data$estimate))
imputed2 <- imputed2 |> mutate(estimate = ifelse(.data$indic > 0 & .data$estimate > 0, 0, .data$estimate))
return(imputed2)
},
#' @description
#' compute contrasts
#' @param Contrasts vector with contrasts
#' @param confint compute confint
#' @param all return all columns, default FALSE
get_contrasts = function(Contrasts, confint = 0.95, all = FALSE) {
imputed <- self$get_contrast_estimates(Contrasts = Contrasts)
pooled <- self$get_poolvar()
result <- inner_join(imputed, pooled, by = self$config$table$hierarchy_keys())
result <- private$add_p_values(result, confint = confint)
if (!all) {
result <- select(result, -all_of( c("nrMeasured" , "mean" ,"n.groups", "n", "meanAll") ) )
}
return(result)
}
),
private = list(
add_p_values = function(result, confint = 0.95, all = TRUE){
result <- dplyr::mutate(result, statistic = .data$estimate / .data$sdT,
p.value = 2*pt(abs(.data$statistic), df = .data$df, lower.tail = FALSE))
prqt <- -qt((1 - confint)/2, df = result$df)
result$conf.low <- result$estimate - prqt * (result$sdT)
result$conf.high <- result$estimate + prqt * (result$sdT)
return(result)
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.