#### history ####
# 2025-01-27 first partly working version
# 2025-01-31 working
# 2025-02-08 adding euctr protocol, isrctn
#' Calculate number of arms or groups with investigational medicines in a study
#'
#' Trial concept calculated: number of active arms with different
#' investigational medicines, after excluding non-active comparator, auxiliary
#' and placebo arms / medicines.
#' For ISRCTN, this is imprecise because arms are not identified in a field.
#' Most registers provide no or only limited information on phase 1 trials,
#' so that this number typically cannot be calculated for these trials.
#' Requires packages stringdist to be installed; stringdist is used for
#' evaluating names of active substances, which are considered similar when
#' the similarity is 0.8 or higher.
#'
#' @param df data frame such as from \link{dbGetFieldsIntoDf}. If `NULL`,
#' prints fields needed in `df` for calculating this trial concept, which can
#' be used with \link{dbGetFieldsIntoDf}.
#'
#' @return data frame with columns `_id` and `.numTestArmsSubstances`, an integer
#'
#' @export
#'
#' @importFrom dplyr mutate case_when if_else pull `%>%`
#' @importFrom stringdist stringsimmatrix
#' @importFrom stringi stri_count_fixed stri_split_fixed
#' @importFrom rlang .data
#'
#' @examples
#' # fields needed
#' f.numTestArmsSubstances()
#'
#' # apply trial concept when creating data frame
#' dbc <- nodbi::src_sqlite(
#' dbname = system.file("extdata", "demo.sqlite", package = "ctrdata"),
#' collection = "my_trials", flags = RSQLite::SQLITE_RO)
#' trialsDf <- dbGetFieldsIntoDf(
#' calculate = "f.numTestArmsSubstances",
#' con = dbc)
#' trialsDf
#'
f.numTestArmsSubstances <- function(df = NULL) {
# check generic, do not edit
stopifnot(is.data.frame(df) || is.null(df))
#### fields ####
fldsNeeded <- list(
"euctr" = c(
"e822_placebo",
"e823_other",
"e824_number_of_treatment_arms_in_the_trial",
"subjectDisposition.postAssignmentPeriods.postAssignmentPeriod.arms.arm"
),
"ctgov" = c(
"arm_group"
),
"ctgov2" = c(
"protocolSection.armsInterventionsModule.armGroups"
),
"isrctn" = c(
"trialDesign.secondaryStudyDesign",
"interventions.intervention.drugNames",
"interventions.intervention.interventionType"
),
"ctis" = c(
# CTIS1
"authorizedPartI.productRoleGroupInfos",
# CTIS2
"authorizedApplication.authorizedPartI.productRoleGroupInfos"
))
#### describe ####
if (is.null(df)) {
# generic, do not edit
return(fldsNeeded)
} # end describe
#### calculate ####
# check generic, do not edit
fctChkFlds(names(df), fldsNeeded)
# helper definitions
thresholdSimilar <- 0.8
# helper function
`%>%` <- dplyr::`%>%`
# helper function, to be call per group
asTestSimilarityArms <- function(x) {
# early exit
if (is.null(x) || !length(x) || all(is.na(x))) return(NULL)
# normalise
x <- tolower(x)
# early exit if single arm
if (length(x) == 1L) return(0L)
# calculate similarities
t <- stringdist::stringsimmatrix(x, x)
# early exit
if (ncol(t) < 1L) return(NA_integer_)
# extract max similarity of arm n with other arm(s)
diag(t) <- NA_integer_
if (all(is.na(t))) return(0L)
apply(t, 2, max, na.rm = TRUE)
} # asTestSimilarityArms
# helper function, to be called per trial
asTestSimilarityTrial <- function(x) {
# collapse active substances into one string per treatment arm / group
t <- sapply(x, function(i) paste0(unique(i), collapse = " / "))
# return vector of similarities of treatment arms
asTestSimilarityArms(t)
} # end asTestSimilarityTrial
#### . EUCTR ####
df %>% dplyr::mutate(
#
# protocol data
helper_protocol = dplyr::case_when(
.data$e822_placebo ~ .data$e824_number_of_treatment_arms_in_the_trial - 1L,
.data$e823_other ~ .data$e824_number_of_treatment_arms_in_the_trial - 1L,
.default = .data$e824_number_of_treatment_arms_in_the_trial
),
# results data
helper_asNamesPerTestGroup = lapply(
.data$subjectDisposition.postAssignmentPeriods.postAssignmentPeriod.arms.arm,
function(i) {if (any(names(i) == "title"))
as.list(i)[["title"]][
as.list(i)[["type"]][["value"]] == "ARM_TYPE.experimental"] else NA}),
#
helper_simTestGroupsInTrial = lapply(
.data$helper_asNamesPerTestGroup, function(i) asTestSimilarityTrial(i)),
#
helper_numTestGroupsInTrial = sapply(
.data$helper_simTestGroupsInTrial, length, USE.NAMES = FALSE, simplify = TRUE),
#
analysis_numDifferentTestGroupsInTrial = dplyr::case_when(
is.na(.data$helper_asNamesPerTestGroup) ~ NA_integer_,
.data$helper_numTestGroupsInTrial == 1L ~ 1L,
.default = sapply(.data$helper_simTestGroupsInTrial, function(i)
max(sum(i < thresholdSimilar), 1L, na.rm = TRUE), simplify = TRUE)
),
out = if_else(
!is.na(.data$analysis_numDifferentTestGroupsInTrial),
.data$analysis_numDifferentTestGroupsInTrial,
.data$helper_protocol
)
) %>%
dplyr::pull("out") -> df$euctr
#### . CTGOV ####
df %>% dplyr::mutate(
#
helper_asNamesPerExpArm = lapply(
.data$arm_group,
function(i) as.list(i)[["arm_group_label"]][
as.list(i)[["arm_group_type"]] == "Experimental"]),
#
helper_simExpArmsInTrial = lapply(
.data$helper_asNamesPerExpArm, function(i) asTestSimilarityTrial(i)),
#
helper_numExpArmsInTrial = sapply(
.data$helper_simExpArmsInTrial, length, USE.NAMES = FALSE, simplify = TRUE),
#
analysis_numDifferentExpArmsInTrial = dplyr::case_when(
is.na(.data$helper_asNamesPerExpArm) ~ NA_integer_,
.data$helper_numExpArmsInTrial == 1L ~ 1L,
.data$helper_numExpArmsInTrial > 1L ~
sapply(.data$helper_simExpArmsInTrial, function(i)
max(sum(i < thresholdSimilar), 1L, na.rm = TRUE), simplify = TRUE
)
),
# if at least one test arm im trial
out = .data$analysis_numDifferentExpArmsInTrial
) %>%
dplyr::pull("out") -> df$ctgov
#### . CTGOV2 ####
df %>% dplyr::mutate(
#
helper_asNamesPerExpArm = lapply(
.data$protocolSection.armsInterventionsModule.armGroups,
function(i){
if (!any(names(i) == "type")) return(NA)
i["label"][i["type"] == "EXPERIMENTAL"]
}),
#
helper_simExpArmsInTrial = lapply(
.data$helper_asNamesPerExpArm, function(i) asTestSimilarityTrial(i)),
#
helper_numExpArmsInTrial = sapply(
.data$helper_simExpArmsInTrial, length, USE.NAMES = FALSE, simplify = TRUE),
#
analysis_numDifferentExpArmsInTrial = dplyr::case_when(
is.na(.data$helper_asNamesPerExpArm) ~ NA_integer_,
.data$helper_numExpArmsInTrial == 1L ~ 1L,
.data$helper_numExpArmsInTrial > 1L ~ sapply(
.data$helper_simExpArmsInTrial, function(i)
max(sum(i < thresholdSimilar), 1L, na.rm = TRUE), simplify = TRUE)
),
#
out = .data$analysis_numDifferentExpArmsInTrial
) %>%
dplyr::pull("out") -> df$ctgov2
#### . ISRCTN ####
df %>%
dplyr::mutate(
#
helper_drugNames = stringi::stri_split_fixed(
.data$interventions.intervention.drugNames, ", "),
#
helper_numberDrugs = sapply(.data$helper_drugNames, function(i)
length(i), USE.NAMES = FALSE, simplify = TRUE),
#
helper_simDrugNames = lapply(
.data$helper_drugNames, function(i) asTestSimilarityTrial(i)),
#
analysis_numDifferentExpArmsInTrial = dplyr::case_when(
is.na(.data$helper_numberDrugs) ~ NA_integer_,
.data$helper_numberDrugs == 1L ~ 1L,
.data$helper_numberDrugs > 1L ~ sapply(
.data$helper_simDrugNames, function(i)
sum(i < thresholdSimilar),
simplify = TRUE
)
),
#
helper_numberarms = if_else(
grepl("controlled|parallel.?group", .data$trialDesign.secondaryStudyDesign, ignore.case = TRUE),
.data$analysis_numDifferentExpArmsInTrial, 1L),
#
out = if_else(
grepl("drug|biological|vaccine", .data$interventions.intervention.interventionType, ignore.case = TRUE),
.data$helper_numberarms, NA_integer_
)
) %>%
dplyr::pull("out")-> df$isrctn
#### . CTIS ####
df %>% dplyr::mutate(
#
helper_asNamesPerTestGroupCtis1 = lapply(
.data$authorizedPartI.productRoleGroupInfos,
function(i) {if (any(names(i) == "products"))
lapply(i[["products"]], "[[", "productName")[
i[["productRoleName"]] == "Test"] else NA}),
#
helper_simTestGroupsInTrialCtis1 = lapply(
.data$helper_asNamesPerTestGroupCtis1, function(i) asTestSimilarityTrial(i)),
#
helper_numTestGroupsInTrialCtis1 = sapply(
.data$helper_simTestGroupsInTrialCtis1, length, USE.NAMES = FALSE, simplify = TRUE),
#
analysis_numDifferentTestGroupsInTrialCtis1 = dplyr::case_when(
is.na(.data$helper_asNamesPerTestGroupCtis1) ~ NA_integer_,
.data$helper_numTestGroupsInTrialCtis1 == 1L ~ 1L,
.default = sapply(.data$helper_simTestGroupsInTrialCtis1, function(i)
max(sum(i < thresholdSimilar), 1L, na.rm = TRUE), simplify = TRUE)
),
#
#
helper_asNamesPerTestGroupCtis2 = lapply(
.data$authorizedApplication.authorizedPartI.productRoleGroupInfos,
function(i) {if (any(names(i) == "products"))
lapply(i[["products"]], "[[", "productName")[
i[["productRoleName"]] == "Test"] else NA_integer_}),
#
helper_simTestGroupsInTrialCtis2 = lapply(
.data$helper_asNamesPerTestGroupCtis2, function(i) asTestSimilarityTrial(i)),
#
helper_numTestGroupsInTrialCtis2 = sapply(
.data$helper_simTestGroupsInTrialCtis2, length, USE.NAMES = FALSE, simplify = TRUE),
#
analysis_numDifferentTestGroupsInTrialCtis2 = dplyr::case_when(
is.na(.data$helper_asNamesPerTestGroupCtis2) ~ NA_integer_,
.data$helper_numTestGroupsInTrialCtis2 == 1L ~ 1L,
.default = sapply(.data$helper_simTestGroupsInTrialCtis2, function(i)
max(sum(i < thresholdSimilar), 1L, na.rm = TRUE), simplify = TRUE)
),
#
#
out = dplyr::case_when(
!is.na(.data$analysis_numDifferentTestGroupsInTrialCtis1) ~
.data$analysis_numDifferentTestGroupsInTrialCtis1,
!is.na(.data$analysis_numDifferentTestGroupsInTrialCtis2) ~
.data$analysis_numDifferentTestGroupsInTrialCtis2,
.default = NA_integer_
)
) %>%
dplyr::pull("out") -> df$ctis
# merge into vector (ordered factor)
df[[".numTestArmsSubstances"]] <- as.integer(
dfMergeVariablesRelevel(
df = df,
colnames = names(fldsNeeded)
)
)
# keep only outcome columns
df <- df[, c("_id", ".numTestArmsSubstances"), drop = FALSE]
#### checks ####
stopifnot(inherits(df[[".numTestArmsSubstances"]], "integer"))
stopifnot(ncol(df) == 2L)
# return
return(df)
} # end f.numTestArmsSubstances
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.