Nothing
#' Format the output of olink_normalization for seamless use with downstream
#' analysis functions.
#'
#' @author
#' Danai G. Topouza
#' Klev Diamanti
#'
#' @description
#' For within-product bridging and subset normalization:
#' * Adds non-overlapping assays between projects to the bridged file without
#' adjustment.
#' * Removes external controls, except sample controls.
#'
#' For cross-product bridging:
#' * Adds non-overlapping assays between projects and not bridgeable assays to
#' the bridged file without adjustment.
#' * Removes external controls, except sample controls.
#' * Replaces the NPX values of the non-reference project by the Median Centered
#' or QS Normalized NPX, according to the Bridging Recommendation.
#' * Edits the BridgingRecommendation column to indicate whether an assay is
#' NotBridgeable, NotOverlapping, MedianCentering, or QuantileSmoothing bridged.
#' * Replaces OlinkID by the concatenation of each product's OlinkIDs to
#' record the OlinkIDs from both projects for bridgeable assays. Assays that
#' are NotBridgeable or NotOverlapping retain their original OlinkIDs and NPX
#' values.
#' * Replaces Panel by the concatenation of each product panel per assay. Assays
#' that are NotBridgeable or NotOverlapping retain their original Panel value.
#' * Removes MedianCenteredNPX, QSNormalizedNPX, OlinkID_E3072 columns.
#'
#' #' For reference median normalization:
#' * Adds non-overlapping assays from the dataset, but not from the reference
#' medians, to the bridged file without adjustment.
#' * Removes external controls, except sample controls.
#'
#' In all cases, normalization and formatting changes are applied to the NPX
#' column. The contents of the Count and PCNormalizedNPX columns remain
#' unchanged.
#'
#' @param df_norm A "tibble" of Olink data in long format resulting from the
#' `olink_normalization` function.
#' @param lst_check Normalization input list checks generated by
#' `olink_norm_input_check`.
#'
#' @return A "tibble" of Olink data in long format containing both input
#' datasets with the bridged NPX quantifications, with the above
#' modifications.
#'
#' @examples
#' \donttest{
#' # bridge samples
#' bridge_samples <- intersect(
#' x = unique(OlinkAnalyze:::data_ht_small$SampleID),
#' y = unique(OlinkAnalyze:::data_3k_small$SampleID)
#' ) |>
#' (\(x) x[!grepl("CONTROL", x)])()
#'
#' # run olink_normalization
#' df_norm <- olink_normalization(
#' df1 = OlinkAnalyze:::data_ht_small,
#' df2 = OlinkAnalyze:::data_3k_small,
#' overlapping_samples_df1 = bridge_samples,
#' df1_project_nr = "Explore HT",
#' df2_project_nr = "Explore 3072",
#' reference_project = "Explore HT"
#' )
#'
#' # generate lst_check
#' lst_check <- OlinkAnalyze:::olink_norm_input_check(
#' df1 = OlinkAnalyze:::data_3k_small,
#' df2 = OlinkAnalyze:::data_ht_small,
#' overlapping_samples_df1 = bridge_samples,
#' overlapping_samples_df2 = NULL,
#' df1_project_nr = "P1",
#' df2_project_nr = "P2",
#' reference_project = "P2",
#' reference_medians = NULL
#' )
#'
#' # format output
#' OlinkAnalyze:::olink_normalization_format(
#' df_norm = df_norm,
#' lst_check = lst_check
#' )
#' }
#'
olink_normalization_format <- function(df_norm,
lst_check) {
# Extract data from non-overlapping assays ----
if (!is.null(lst_check$non_overlapping_oid)
&& length(unlist(lst_check$non_overlapping_oid)) > 0L) {
no_overlap_oid <- olink_format_oid_no_overlap(
lst_check = lst_check
)
} else {
no_overlap_oid <- NULL
}
# Combine data with df_norm ----
if (lst_check$norm_mode %in% c(olink_norm_modes$bridge,
olink_norm_modes$subset,
olink_norm_modes$ref_median)) {
if (!is.null(no_overlap_oid)) {
df_combo <- df_norm |>
dplyr::bind_rows(
no_overlap_oid
)
} else {
df_combo <- df_norm
}
} else if (lst_check$norm_mode == olink_norm_modes$norm_cross_product) {
# Extract data for assays = "NotBridgeable" ----
oid_col_name <- lst_check$ref_cols$olink_id
not_ref_oid_col_name <- paste0(lst_check$ref_cols$olink_id,
"_", lst_check$not_ref_product)
quant_col_name <- lst_check$ref_cols$quant
df_not_bridgeable <- df_norm |>
dplyr::filter(
# keep only assays that are not bridgeable
.data[["BridgingRecommendation"]] == "NotBridgeable"
) |>
dplyr::mutate(
!!oid_col_name := dplyr::if_else(
.data[["Project"]] == lst_check$ref_name,
.data[[oid_col_name]],
.data[[not_ref_oid_col_name]]
)
)
not_bridgeable_assays <- df_not_bridgeable |>
dplyr::distinct(
.data[[oid_col_name]]
) |>
nrow()
if (not_bridgeable_assays > 0L) {
cli::cli_inform(
c("i" = "{.val {not_bridgeable_assays}} not bridgeable assays are
included in the bridged dataset without adjustment.")
)
}
# Keep the data following BridgingRecommendation for bridgeable assays
df_bridgeable <- df_norm |>
dplyr::filter(
# keep only assays that are bridgeable
.data[["BridgingRecommendation"]] != "NotBridgeable"
) |>
dplyr::mutate(
!!oid_col_name := paste0(.data[[oid_col_name]], "_",
.data[[not_ref_oid_col_name]]),
!!quant_col_name := dplyr::case_when(
.data[["BridgingRecommendation"]] == "MedianCentering" ~
.data[["MedianCenteredNPX"]],
.data[["BridgingRecommendation"]] == "QuantileSmoothing" ~
.data[["QSNormalizedNPX"]],
.default = .data[[quant_col_name]]
)
)
# combine data
df_combo <- df_bridgeable |>
dplyr::bind_rows(
df_not_bridgeable
)
# add missing asssays
if (!is.null(no_overlap_oid)) {
df_combo <- df_combo |>
dplyr::bind_rows(
no_overlap_oid
)
}
# concatenate panel column
# keep reference product first
ref_product_panels <- c("HT" = "Explore_HT",
"Reveal" = "Reveal")
# reorder Panel to keep reference product first
# then concatenate panels per OlinkID
# NotBridgeable or NotOverlapping assays retain their original panel
df_combo <- df_combo |>
dplyr::mutate(
Panel_order = forcats::fct_relevel(Panel, ref_product_panels[[ lst_check$ref_product]])) |> # nolint: line_length_linter
dplyr::group_by(.data[[oid_col_name]]) |>
dplyr::mutate(Panel = paste(sort(unique(.data[["Panel_order"]])),
collapse = "_")) |>
dplyr::select(-.data[["Panel_order"]])
# clean up
df_combo <- df_combo |>
dplyr::select( # Remove extra columns
-dplyr::any_of(
c("MedianCenteredNPX", "QSNormalizedNPX", not_ref_oid_col_name)
)
)
}
# Remove external controls and sort by project
df_full <- df_combo |>
olink_format_rm_ext_ctrl(lst_check = lst_check) |>
dplyr::arrange(
.data[["Project"]], .data[[lst_check$ref_cols$sample_id]]
)
return(df_full)
}
#' Remove negative controls and plate controls from dataset. For use in
#' olink_normalization_format function. Generates a message stating which
#' control samples were removed.
#'
#' @author
#' Danai G. Topouza
#' Klev Diamanti
#'
#' @param df NPX dataset to be processed.
#' @param lst_check Normalization input list checks generated by
#' `olink_norm_input_check`.
#'
#' @return A "tibble" of Olink data in long format containing the input dataset
#' with negative controls and plate controls removed.
#'
olink_format_rm_ext_ctrl <- function(df,
lst_check) {
ext_ctrl_regexp <- FALSE
# if sample_type is present in data for both datasets, use it to
# identify NCs and PCs
if (length(lst_check$ref_cols$sample_type) > 0L
&& length(lst_check$not_ref_cols$sample_type) > 0L) {
exclude_ext_ctrl_sampletype <- function(df,
lst_check,
ext_ctrl_type) {
ext_ctrl_sid <- df |>
dplyr::filter(
dplyr::if_any(
dplyr::any_of(
c(lst_check$ref_cols$sample_type,
lst_check$not_ref_cols$sample_type)
),
~ .x %in% .env[["ext_ctrl_type"]]
)
) |>
dplyr::pull(
.data[[lst_check$ref_cols$sample_id]]
) |>
unique()
return(ext_ctrl_sid)
}
nc_sid <- exclude_ext_ctrl_sampletype(df = df,
lst_check = lst_check,
ext_ctrl_type = "NEGATIVE_CONTROL")
pc_sid <- exclude_ext_ctrl_sampletype(df = df,
lst_check = lst_check,
ext_ctrl_type = "PLATE_CONTROL")
ext_ctrl_regexp <- FALSE
} else {
# Set variable to capture Negative Controls and Plate Controls
neg_ctrl <- c("Negative", "NEGATIVE", "NEG", "Neg")
neg_ctrl_regex <- paste(neg_ctrl, collapse = "|")
pc_ctrl <- c("PLATE", "IPC", "Plate", "plate", "Plate Control",
"plate control", "plate_control", "Plate_Control")
pc_ctrl_regex <- paste(pc_ctrl, collapse = "|")
exclude_ext_ctrl_sampleid <- function(df,
lst_check,
ext_ctrl_regex) {
ext_ctrl_sid <- df |>
dplyr::distinct(
.data[[lst_check$ref_cols$sample_id]]
) |>
dplyr::filter(
stringr::str_detect(
string = .data[[lst_check$ref_cols$sample_id]],
pattern = ext_ctrl_regex
)
) |>
dplyr::pull(
.data[[lst_check$ref_cols$sample_id]]
)
return(ext_ctrl_sid)
}
nc_sid <- exclude_ext_ctrl_sampleid(df = df,
lst_check = lst_check,
ext_ctrl_regex = neg_ctrl_regex)
pc_sid <- exclude_ext_ctrl_sampleid(df = df,
lst_check = lst_check,
ext_ctrl_regex = pc_ctrl_regex)
ext_ctrl_regexp <- TRUE
}
if (length(c(nc_sid, pc_sid)) > 0L) {
# remove NCs and PCs for the dataset
df <- df |>
dplyr::filter(
!(.data[[lst_check$ref_cols$sample_id]] %in% c(nc_sid, pc_sid))
)
if (length(nc_sid) > 0L) {
cli::cli_inform(
c("i" = "{.val {length(nc_sid)}} Negative Control{?s} {?was/were}
removed from dataset: {.val {nc_sid}}")
)
}
if (length(pc_sid) > 0L) {
cli::cli_inform(
c("i" = "{.val {length(pc_sid)}} Plate Control{?s} {?was/were} removed
from dataset: {.val {pc_sid}}")
)
}
if (ext_ctrl_regexp == TRUE) {
cli::cli_inform(
c("!" = "Negative Control and Plate Control samples were identified and
removed based on common patterns in sample identifiers. Please verify
that no other samples were removed unintentionally!"
)
)
}
}
return(df)
}
#' Retrieve non-overlapping assays between two NPX datasets
#'
#' @description
#' For use in `olink_normalization_format` function. Generates a message stating
#' how many assays were not overlapping. Appends additional columns depending on
#' the normalization type to match normalized data output. For cross-product
#' normalization, splits any concatenated OlinkIDs.
#'
#' @author
#' Danai Topouza
#' Klev Diamanti
#'
#' @param lst_check Normalization input list checks generated by
#' `olink_norm_input_check`.
#'
#' @return A combined "tibble" of Olink data in long format containing only the
#' non-overlapping assays from each input dataset.
#'
olink_format_oid_no_overlap <- function(lst_check) {
extract_non_overlapping_df <- function(df,
df_name,
df_oid,
df_oid_no_overlap) {
if (df_name %in% names(df_oid_no_overlap)) {
## Split any combined product OlinkIDs from cross-product normalization
## at the underscore to catch all assays
df_assay_split <- df_oid_no_overlap[[df_name]] |>
stringr::str_subset("_") |>
stringr::str_split(pattern = "_") |>
unlist()
## Append split IDs to existing list
df_assays <- c(
unlist(df_oid_no_overlap[[df_name]]),
df_assay_split
)
} else {
df_assays <- character(0L)
}
## Get non-overlapping assays for dataset
df_no_overlap <- df |>
dplyr::filter(
.data[[df_oid]] %in% df_assays
) |>
dplyr::mutate(
Project = df_name
)
return(df_no_overlap)
}
# Get non-overlapping assays in ref df ----
ref_df_no_overlap <- extract_non_overlapping_df(
df = lst_check$ref_original_df,
df_name = lst_check$ref_name,
df_oid = lst_check$ref_cols$olink_id,
df_oid_no_overlap = lst_check$non_overlapping_oid
)
# Process other dataset ----
# Processing for reference median normalization
if (lst_check$norm_mode == olink_norm_modes$ref_median) {
num_non_overlap <- ref_df_no_overlap |> # nolint object_usage_linter
dplyr::pull(
.data[[lst_check$ref_cols$olink_id]]
) |>
unique()
cli::cli_inform(
c("i" = "{.val {length(num_non_overlap)}} non-overlapping assay{?s} found
in the dataset but not in the reference medians. Assay{?s} {?is/are}
included in the normalized dataset without adjustment.")
)
# Keep only non-overlapping assays from ref_df, not from ref median data
# Set non-overlapping adjustment factor to 0
df_non_overlapping <- ref_df_no_overlap |>
dplyr::mutate(
Adj_factor = 0
)
} else { # Continue for all other normalization types
# Get non-overlapping assays for not ref df ----
not_ref_df_no_overlap <- extract_non_overlapping_df(
df = lst_check$not_ref_original_df,
df_name = lst_check$not_ref_name,
df_oid = lst_check$ref_cols$olink_id,
df_oid_no_overlap = lst_check$non_overlapping_oid
)
cli::cli_inform(
c("i" = "{.val {length(unlist(lst_check$non_overlapping_oid))}}
non-overlapping assay{?s} {?is/are} included in the normalized dataset
without adjustment. Assays found in only one project will have decreased
statistical power due to the lower number of samples.")
)
# Combine non-overlapping assays from df1 and df2
df_non_overlapping <- ref_df_no_overlap |>
dplyr::bind_rows(
not_ref_df_no_overlap
)
# Processing for within-product bridging and subset normalization
if (lst_check$norm_mode %in% c(olink_norm_modes$bridge,
olink_norm_modes$subset)) {
# Set non-overlapping adjustment factor to 0
df_non_overlapping <- df_non_overlapping |>
dplyr::mutate(
Adj_factor = 0
)
} else if (lst_check$norm_mode == olink_norm_modes$norm_cross_product) {
# Processing for cross-product bridging
# Set bridging recommendation for non-overlapping assays
df_non_overlapping <- df_non_overlapping |>
dplyr::mutate(
BridgingRecommendation = "NotOverlapping"
)
}
}
return(df_non_overlapping)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.