#' @title Clean bio
#'
#' @description This function cleans the results
#' obtained after biological weighting
#'
#' @param annot_table_wei_bio Table containing your
#' biologically weighted annotation
#' @param edges_table Table containing the edges between features
#' @param minimal_consistency Minimal consistency score for a class. FLOAT
#'
#' @return A table containing the biologically weighted annotation
#' where only a given number of initial candidates are kept
#'
#' @export
#'
#' @seealso weight_bio
#'
#' @examples NULL
clean_bio <-
function(annot_table_wei_bio = get("annot_table_wei_bio",
envir = parent.frame()
),
edges_table = get("edges_table",
envir = parent.frame()
),
minimal_consistency = get("minimal_consistency",
envir = parent.frame()
)) {
df <- annot_table_wei_bio |>
tidytable::distinct(
feature_id,
candidate_structure_inchikey_no_stereo,
candidate_structure_tax_cla_01kin,
candidate_structure_tax_npc_01pat,
candidate_structure_tax_cla_02sup,
candidate_structure_tax_npc_02sup,
candidate_structure_tax_cla_03cla,
candidate_structure_tax_npc_03cla,
candidate_structure_tax_cla_04dirpar,
score_pondered_bio,
.keep_all = TRUE
)
log_debug("calculating chemical consistency
features with at least 2 neighbors ... \n")
df3 <-
tidytable::right_join(
edges_table |>
tidytable::group_by(feature_source) |>
tidytable::add_count() |>
tidytable::ungroup() |>
tidytable::filter(n >= 2) |>
tidytable::select(-n),
df |>
tidytable::distinct(
feature_id,
candidate_structure_tax_cla_01kin,
candidate_structure_tax_npc_01pat,
candidate_structure_tax_cla_02sup,
candidate_structure_tax_npc_02sup,
candidate_structure_tax_cla_03cla,
candidate_structure_tax_npc_03cla,
candidate_structure_tax_cla_04dirpar,
score_pondered_bio
),
by = stats::setNames("feature_id", "feature_target")
) |>
tidytable::filter(!is.na(feature_source))
log_debug("... among all edges ... \n")
clean_per_level_bio <-
function(df,
candidates,
consistency_name,
feature_score_name,
feature_val_name) {
freq <- df |>
tidytable::distinct(
feature_source,
feature_target,
!!as.name(candidates),
score_pondered_bio
) |>
tidytable::mutate(
count = tidytable::n_distinct(feature_target),
.by = c(
feature_source,
!!as.name(candidates)
)
) |>
tidytable::mutate(
!!as.name(consistency_name) := count /
tidytable::n_distinct(feature_target),
.by = c(feature_source)
) |>
tidytable::distinct(feature_source,
!!as.name(candidates),
.keep_all = TRUE
) |>
tidytable::mutate(
!!as.name(feature_score_name) :=
!!as.name(consistency_name) * score_pondered_bio,
.by = c(
feature_source,
!!as.name(candidates)
)
) |>
tidytable::arrange(-!!as.name(feature_score_name)) |>
tidytable::distinct(feature_source, .keep_all = TRUE) |>
tidytable::select(
feature_source,
!!as.name(feature_val_name) := !!as.name(candidates),
!!as.name(consistency_name),
!!as.name(feature_score_name)
) |>
tidytable::mutate(
!!as.name(feature_val_name) := ifelse(
test = !!as.name(feature_score_name) >= minimal_consistency,
yes = !!as.name(feature_val_name),
no = "notConsistent"
)
)
}
log_debug("... at the (classyfire) kingdom level \n")
freq_cla_kin <- df3 |>
clean_per_level_bio(
candidates = "candidate_structure_tax_cla_01kin",
consistency_name = "consistency_structure_cla_kin",
feature_score_name = "feature_pred_tax_cla_01kin_score",
feature_val_name = "feature_pred_tax_cla_01kin_val"
)
log_debug("... at the (NPC) pathway level \n")
freq_npc_pat <- df3 |>
clean_per_level_bio(
candidates = "candidate_structure_tax_npc_01pat",
consistency_name = "consistency_structure_npc_pat",
feature_score_name = "feature_pred_tax_npc_01pat_score",
feature_val_name = "feature_pred_tax_npc_01pat_val"
)
log_debug("... at the (classyfire) superclass level \n")
freq_cla_sup <- df3 |>
clean_per_level_bio(
candidates = "candidate_structure_tax_cla_02sup",
consistency_name = "consistency_structure_cla_sup",
feature_score_name = "feature_pred_tax_cla_02sup_score",
feature_val_name = "feature_pred_tax_cla_02sup_val"
)
log_debug("... at the (NPC) superclass level \n")
freq_npc_sup <- df3 |>
clean_per_level_bio(
candidates = "candidate_structure_tax_npc_02sup",
consistency_name = "consistency_structure_npc_sup",
feature_score_name = "feature_pred_tax_npc_02sup_score",
feature_val_name = "feature_pred_tax_npc_02sup_val"
)
log_debug("... at the (classyfire) class level \n")
freq_cla_cla <- df3 |>
clean_per_level_bio(
candidates = "candidate_structure_tax_cla_03cla",
consistency_name = "consistency_structure_cla_cla",
feature_score_name = "feature_pred_tax_cla_03cla_score",
feature_val_name = "feature_pred_tax_cla_03cla_val"
)
log_debug("... at the (NPC) class level \n")
freq_npc_cla <- df3 |>
clean_per_level_bio(
candidates = "candidate_structure_tax_npc_03cla",
consistency_name = "consistency_structure_npc_cla",
feature_score_name = "feature_pred_tax_npc_03cla_score",
feature_val_name = "feature_pred_tax_npc_03cla_val"
)
log_debug("... at the (classyfire) parent level \n")
freq_cla_par <- df3 |>
clean_per_level_bio(
candidates = "candidate_structure_tax_cla_04dirpar",
consistency_name = "consistency_structure_cla_par",
feature_score_name = "feature_pred_tax_cla_04dirpar_score",
feature_val_name = "feature_pred_tax_cla_04dirpar_val"
)
log_debug("splitting already computed predictions \n")
df1 <- df |>
tidytable::filter(!is.na(feature_pred_tax_cla_02sup_val))
df1b <- df1 |>
tidytable::select(-tidytable::contains("feature_pred_tax"))
df2 <- df |>
tidytable::select(-tidytable::contains("feature_pred_tax")) |>
tidytable::anti_join(df1) |>
tidytable::bind_rows(df1b)
log_debug("joining all except -1 together \n")
annot_table_wei_bio_preclean <-
tidytable::left_join(df2,
freq_cla_kin,
by = stats::setNames(
"feature_source",
"feature_id"
)
) |>
tidytable::left_join(freq_npc_pat,
by = stats::setNames(
"feature_source",
"feature_id"
)
) |>
tidytable::left_join(freq_cla_sup,
by = stats::setNames(
"feature_source",
"feature_id"
)
) |>
tidytable::left_join(freq_npc_sup,
by = stats::setNames(
"feature_source",
"feature_id"
)
) |>
tidytable::left_join(freq_cla_cla,
by = stats::setNames(
"feature_source",
"feature_id"
)
) |>
tidytable::left_join(freq_npc_cla,
by = stats::setNames(
"feature_source",
"feature_id"
)
) |>
tidytable::left_join(freq_cla_par,
by = stats::setNames(
"feature_source",
"feature_id"
)
) |>
tidytable::select(
feature_id,
tidytable::everything()
) |>
## In case there are no consensus at all because no network
tidytable::mutate(tidytable::across(
.cols = tidytable::where(is.logical),
.fns = as.character
)) |>
log_pipe("adding dummy consistency for features
with less than 2 neighbors \n") |>
tidytable::mutate(
feature_pred_tax_cla_01kin_val = tidytable::coalesce(
feature_pred_tax_cla_01kin_val,
"dummy"
),
consistency_structure_cla_kin = tidytable::coalesce(
consistency_structure_cla_kin,
1
),
feature_pred_tax_cla_01kin_score = tidytable::coalesce(
feature_pred_tax_cla_01kin_score,
0
),
feature_pred_tax_npc_01pat_val = tidytable::coalesce(
feature_pred_tax_npc_01pat_val,
"dummy"
),
consistency_structure_npc_pat = tidytable::coalesce(
consistency_structure_npc_pat,
1
),
feature_pred_tax_npc_01pat_score = tidytable::coalesce(
feature_pred_tax_npc_01pat_score,
0
),
feature_pred_tax_cla_02sup_val = tidytable::coalesce(
feature_pred_tax_cla_02sup_val,
"dummy"
),
consistency_structure_cla_sup = tidytable::coalesce(
consistency_structure_cla_sup,
1
),
feature_pred_tax_cla_02sup_score = tidytable::coalesce(
feature_pred_tax_cla_02sup_score,
0
),
feature_pred_tax_npc_02sup_val = tidytable::coalesce(
feature_pred_tax_npc_02sup_val,
"dummy"
),
consistency_structure_npc_sup = tidytable::coalesce(
consistency_structure_npc_sup,
1
),
feature_pred_tax_npc_02sup_score = tidytable::coalesce(
feature_pred_tax_npc_02sup_score,
0
),
feature_pred_tax_cla_03cla_val = tidytable::coalesce(
feature_pred_tax_cla_03cla_val,
"dummy"
),
consistency_structure_cla_cla = tidytable::coalesce(
consistency_structure_cla_cla,
1
),
feature_pred_tax_cla_03cla_score = tidytable::coalesce(
feature_pred_tax_cla_03cla_score,
0
),
feature_pred_tax_npc_03cla_val = tidytable::coalesce(
feature_pred_tax_npc_03cla_val,
"dummy"
),
consistency_structure_npc_cla = tidytable::coalesce(
consistency_structure_npc_cla,
1
),
feature_pred_tax_npc_03cla_score = tidytable::coalesce(
feature_pred_tax_npc_03cla_score,
0
),
feature_pred_tax_cla_04dirpar_val = tidytable::coalesce(
feature_pred_tax_cla_04dirpar_val,
"dummy"
),
consistency_structure_cla_par = tidytable::coalesce(
consistency_structure_cla_par,
1
),
feature_pred_tax_cla_04dirpar_score = tidytable::coalesce(
feature_pred_tax_cla_04dirpar_score,
0
)
)
log_debug("adding already computed predictions back \n")
annot_table_wei_bio_clean <- annot_table_wei_bio_preclean |>
tidytable::anti_join(df1b) |>
tidytable::bind_rows(df1)
rm(
annot_table_wei_bio,
annot_table_wei_bio_preclean,
df,
df1,
df1b,
df2,
df3
)
return(annot_table_wei_bio_clean)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.