# R/calculate_group_frequency.R In AJFOWLER/comorbidgroupr: Identify Groups of Comorbid Diseases Associated with Outcomes

#### Documented in calculate_group_frequency

```#' Calculate group frequency for all unique combinations
#' @description Generate an ordered data.frame of different disease combinations on the basis of the most frequent or most strongly associated with outcomes.
#'
#' @param unique_combinations List of unique combinations of disease positions for a given number of combinations, generated using \code{unique_combos}.
#' @param all_diseases List of positions associated with each disease, generated using \code{get_disease_counts()}.
#' @param outcome_positions Numeric vector where each element refers to a record that suffered a particular outcome.
#' @param min_freq Number between 0 and 1; minimum proportion of code combinations to be included in the stem. If \code{outcome_column} is passed, \code{min_freq} is the minimum event rate per combination to be considered.
#' @param tots Numeric, total length of \code{comorbid_column} initially profiled to calculate frequency proportions to compare against \code{min_freq}.
#' @param use_outcome Logical if to use outcome variable for stem generation.
#' @return data.frame ordered from lowest to highest proportion of those suffering outcomes (if \code{outcome_positions} entered) or number of records associated with that combination (if no \code{outcome_positions} entered).
#'
#' @examples
#' disease_counts = list(c(1), c(1,4), c(1,2,3,4), c(1))
#' unique_pos = structure(c(1, 1, 1, 2, 2, 3, 2, 3, 4, 3, 4, 4), .Dim = c(6L, 2L))
#'
#' calculate_group_frequency(unique_combinations = unique_pos,
#' all_diseases = disease_counts,
#' outcome_positions = 0, tots = 4)
#'
#' calculate_group_frequency(unique_combinations = unique_pos,
#' all_diseases = disease_counts,
#' outcome_positions = c(0,1,1,0), tots = 4)
#'
#' @export

calculate_group_frequency <- function(unique_combinations, all_diseases, outcome_positions, min_freq=0, tots, use_outcome = FALSE){
# logic for parameter entry:
# set up disease list
setups = .get_list_pos(all_diseases)

combos_outcome = data.frame(unique_combinations, stringsAsFactors = F)

returned = apply(combos_outcome, 1, function(x) {
# find all rows
all = .reduce_set_overlap(sapply(x, FUN = setups, simplify = F))
# intersection of all and outcomes
outcome_bg = length(intersect(all, outcome_positions))
return(cbind(length(all), outcome_bg))

})
combos_outcome = cbind(combos_outcome, t(returned))

names(combos_outcome)[(ncol(combos_outcome)-1):ncol(combos_outcome)] = c('freq', 'outcome')

combos_outcome\$propr_out = combos_outcome\$out/combos_outcome\$freq

if(length(outcome_positions) == 1 | use_outcome == FALSE){
# if no outcomes data, then just do based on freq
combos_outcome = combos_outcome[combos_outcome\$freq/tots >= min_freq, ]

return(combos_outcome[order(combos_outcome\$freq, decreasing = F),])
}else{
combos_outcome = combos_outcome[combos_outcome\$propr_out >= min_freq, ]
# if outcomes data, then do based on outcome, then freq
return(combos_outcome[order(combos_outcome\$propr_out, combos_outcome\$freq, decreasing = F),])
}
}

# helper functions below #
.reduce_set_overlap <- function(vector_list){Reduce(intersect, vector_list[lengths(vector_list)>0])}

.get_list_pos <- function(list){
get_pos <- function(pos){
if(pos == 0){
return()
}
else{
return(list[[pos]])}
}
}
```
AJFOWLER/comorbidgroupr documentation built on May 11, 2021, 6:01 a.m.