################################################################################
#
# Functions which will query relevant data from database and mutate data frames
# for better usage
#
################################################################################
#' Get all generated cohort IDs
#'
#' This function queries all generated cohorts in Atlas
#'
#' @param cdm common data model reference object
#'
#' @return A sorted vector of generated cohorts
#' @keywords internal
getCohorts <- function(cdm) {
allCohorts <- cdm$cohort %>% dplyr::distinct(.data$cohort_definition_id) %>% dplyr::pull(1)
allCohorts <- sort(allCohorts)
return(allCohorts)
}
#' Get selected generated cohorts patient data
#'
#' This function queries all patient ID's in selected cohorts
#'
#' @param cdm common data model reference object
#' @param selectedTarget The sample cohort ID
#' @param baseUrl the base URL for the WebApi instance
#' @param pathToStudy path to folder which contains folder named after studyName
#' @param studyName name chosen for study, is used as a folder name
#' @param selectedStates Vector of included cohort ID's
#' @param studyEnv environment created with cohort2TrajectoryConfiguration
#' @param stateCohortLabels Vector of the customized labels of the state cohorts
#'
#' @return A dataframe with selected patients. Columns: cohort_definition_id, subject_id, cohort_start_date, cohort_end_date
#' @keywords internal
getCohortData <- function(cdm,
studyEnv,
selectedTarget,
selectedStates,
baseUrl = "http://localhost:8080/WebAPI",
pathToStudy = studyEnv$pathToStudy,
studyName = studyEnv$studyName,
stateCohortLabels = NULL) {
##############################################################################
#
# Save selected cohorts as JSON
#
##############################################################################
selectedCohorts <- c(selectedTarget, selectedStates)
# Read json files from atlas
if (!is.null(baseUrl)) {
for (i in 1:length(selectedCohorts)) {
cohortId = as.integer(selectedCohorts[i])
object <-
ROhdsiWebApi::getCohortDefinition(cohortId = cohortId, baseUrl = baseUrl)
json <- .toJSON(object$expression, pretty = TRUE)
# Target cohort always has "0" value
if (i == 1) {
cohortId = 0
selectedCohorts[i] = "0"
}
fileConn <-
file(paste(
pathToStudy,
"/",
studyName,
"/JSON/",
as.character(cohortId),
".json",
sep = ""
))
writeLines(json, fileConn)
close(fileConn)
}
}
data = NULL
cohortSetCSV <- generateCohortSetCSV(selectedCohorts, stateCohortLabels, pathToStudy, studyName)
selectedCohortIds <- cohortSetCSV$cohortId
selectedCohortNames <- cohortSetCSV$cohortName
cohortSet <- CDMConnector::readCohortSet(paste(pathToStudy, "/", studyName, "/JSON/", sep = ""))
cdm <- CDMConnector::generateCohortSet(cdm, cohortSet, name = "cohort")
data <- cdm$cohort %>%
dplyr::filter(.data$cohort_definition_id %in% selectedCohortIds) %>%
dplyr::arrange(.data$subject_id,
.data$cohort_start_date,
.data$cohort_end_date) %>%
dplyr::collect()
data$cohort_definition_id = cohortSetCSV$cohortName[match(data$cohort_definition_id, cohortSetCSV$cohortId)]
##############################################################################
#
# We can have a situation where the target cohort is also given
# as a state cohort
#
##############################################################################
data_tmp_target <- NULL
if (selectedTarget %in% selectedStates) {
data_tmp_target = dplyr::filter(data, .data$cohort_definition_id == selectedTarget)
}
##############################################################################
#
# For the purpose of easier target cohort handling we should have a special
# notation for target cohort id, therefore
#
##############################################################################
data <- dplyr::mutate(
data,
cohort_definition_id = ifelse(
.data$cohort_definition_id == "target",
0,
.data$cohort_definition_id
)
)
if (!is.null(data_tmp_target)) {
data = rbind(data, data_tmp_target)
}
data <- dplyr::select(
data,
"subject_id",
"cohort_definition_id",
"cohort_start_date",
"cohort_end_date"
)
return(data)
}
#' This function adds personal data to subjects: age, gender
#'
#' @param cdm a cdm object
#' @param cohortData Imported cohort data
#' @return A dataframe with selected patients. Columns: cohort_definition_id, subject_id, cohort_start_date, cohort_end_date
#' @keywords internal
addPersonalData <- function(cohortData, cdm) {
data <- cohortData
##############################################################################
#
# Add state specific person data to each row
#
##############################################################################
subjects <- data %>% dplyr::distinct(.data$subject_id) %>% dplyr::pull(1) #getCohorts(data, useCDM = FALSE) #TODO: The $ method is for internal use only
personal_data <- cdm$person %>%
dplyr::filter(.data$person_id %in% subjects)
personal_data <- personal_data %>%
dplyr::select("person_id", "gender_concept_id", "birth_datetime") %>%
dplyr::rename(subject_id = "person_id")
##############################################################################
#
# Merge two datasets
#
##############################################################################
data_merged <-
merge(x = data,
y = personal_data,
by = "subject_id",
all.x = TRUE)
##############################################################################
#
# Calculate age when entering state and time in cohort
#
##############################################################################
data_merged <- dplyr::mutate(data_merged, age = round(as.numeric(
difftime(
as.Date(.data$state_start_date),
as.Date(.data$birth_datetime),
units = "days"
) / 365.25
), 3))
data_merged <- dplyr::select(data_merged, !"birth_datetime")
return(data_merged)
}
#' This function eliminates patients which do not fulfill the inclusion criteria
#'
#' @param cohortData Imported cohort data
#' @param mandatoryStates States which have to be present in the trajectory, otherwise dropped#'
#' @param mergeStates Boolean, if you want to merge states when they overlap
#' @param outOfCohortAllowed boolean whether the patient trajectory can surpass the target cohort's observation-period
#' @param mergeThreshold Value from 0 to 1. If mergeStates is TRUE the states will be label-merged given they overlap more than the specified threshold. Can be given as vector, then multiple iterations are runned,
#'
#' @return A dataframe with selected patients. Columns: cohort_definition_id, subject_id, cohort_start_date, cohort_end_date
#' @keywords internal
cleanCohortData <- function(cohortData,
mandatoryStates,
outOfCohortAllowed = FALSE,
mergeStates = FALSE,
mergeThreshold = 0.5) {
data_tmp <- cohortData
data_tmp$subject_id = as.integer(data_tmp$subject_id) #convert int64
##############################################################################
#
# Preserving only patients which are present in the target cohort
#
##############################################################################
# Preserving only patients present in target cohort
patientsEligible <- data_tmp %>%
dplyr::filter(.data$cohort_definition_id == "0") %>%
dplyr::pull(.data$subject_id) %>%
unique()
data_tmp <-
dplyr::filter(data_tmp, .data$subject_id %in% patientsEligible)
##############################################################################
#
# Cleaning data from observations before and after target cohort
#
##############################################################################
# Removing subjects information which have been included before
# target cohort start date (maybe should just switch the start date
# then for the same as target cohort)
# Selecting the first occurring target cohort row
data_target <- data_tmp %>%
dplyr::filter(.data$cohort_definition_id == "0") %>%
dplyr::group_by(.data$subject_id) %>%
dplyr::arrange(.data$cohort_start_date, .by_group = TRUE) %>%
dplyr::slice(1L)
# Selecting information about the states
data_states <-
dplyr::filter(data_tmp, .data$cohort_definition_id != "0")
# Merging all cases where patient has same state overlaps' can occur with custom datasets not cohorts
data_states <- data_states %>%
dplyr::group_by(.data$subject_id, .data$cohort_definition_id) %>%
dplyr::do(merge_overlaps(.)) %>%
dplyr::arrange(.data$subject_id, .data$cohort_start_date, .data$cohort_end_date)
# If mergeStates true
if (mergeStates) {
cli::cli_progress_step("Merging labels according to the specified threshold!")
data_states <-
combineCohorts(data_states,
mergeThreshold,
unique(data_states$subject_id))
data_states <-data_states %>% dplyr::arrange(.data$cohort_start_date, .data$cohort_end_date)
cli::cli_alert_success("Label merging completed!")
}
data_tmp <- rbind(data_target, data_states)
data_target <-
dplyr::select(data_target, "subject_id", "cohort_start_date", "cohort_end_date")
colnames(data_target) <-
c("subject_id", "reference_start_date", "reference_end_date")
data_tmp <- merge(data_tmp, data_target, by = "subject_id")
# If the state start date is after inclusion criteria end date then let's filter it out
if (!outOfCohortAllowed) {
data_tmp <-
dplyr::filter(data_tmp,
!(.data$reference_end_date < .data$cohort_start_date))
}
# If the state end date is before inclusion criteria start date then let's filter it out
data_tmp <-
dplyr::filter(data_tmp,
!(.data$reference_start_date > .data$cohort_end_date))
data_tmp$cohort_end_date <- as.Date(data_tmp$cohort_end_date)
data_tmp$reference_end_date <-
as.Date(data_tmp$reference_end_date)
# If the state start date is inside the interval but the end date is outside the interval
# then cut the endpoint to reference end point
if (!outOfCohortAllowed) {
data_tmp <- data_tmp %>%
dplyr::mutate(
cohort_end_date = dplyr::if_else(
.data$reference_end_date < .data$cohort_end_date,
as.Date(.data$reference_end_date),
as.Date(.data$cohort_end_date)
)
)
}
# If state start date is before inclusion criteria start date then let's change it to the target cohort start date
data_tmp <- dplyr::mutate(
data_tmp,
cohort_start_date =
dplyr::if_else(
.data$reference_start_date <= .data$cohort_start_date,
.data$cohort_start_date,
.data$reference_start_date
)
)
# Lets prioritize the cohorts giving the target cohort priority 0 others 1
# We use priority feature to always order target cohort first per patient in the dataframe
# this positioning is needed for calculating the feature time_in_cohort
data_tmp <- dplyr::mutate(data_tmp,
PRIORITY = dplyr::if_else(.data$cohort_definition_id == "0", 0, 1))
data_tmp <-
dplyr::arrange(data_tmp,
.data$subject_id,
.data$PRIORITY,
.data$cohort_start_date)
##############################################################################
#
# Preserving only patients which have the mandatory state(s)
#
##############################################################################
# Mandatory states
if (!(length(mandatoryStates) == 0 |
"No mandatory state" %in% mandatoryStates)) {
for (state in mandatoryStates) {
tmpPatientsEligible = unique(dplyr::filter(data_tmp, .data$cohort_definition_id == state)$subject_id)
patientsEligible = intersect(patientsEligible, tmpPatientsEligible)
}
}
data_tmp <-
dplyr::filter(data_tmp, .data$subject_id %in% patientsEligible)
##############################################################################
#
# Adding feature "TIME IN COHORT"
#
##############################################################################
# Order by patientId, start & end date
#data_merged = data_merged[order(data_merged[, 1], data_merged[, 3], data_merged[, 4]),]
data_tmp <-
dplyr::mutate(data_tmp, time_in_cohort = round(as.numeric(
difftime(
as.Date(.data$cohort_start_date),
as.Date(.data$reference_start_date),
units = "days"
) /
365.25
), 3))
data_tmp <- dplyr::select(
data_tmp,
"subject_id",
"cohort_definition_id",
"cohort_start_date",
"cohort_end_date",
"time_in_cohort"
)
return(data_tmp)
}
#' Build table for cohorts which shows the inclusion statistics
#'
#' This function creates an inclusion table from getCohortsPatients() function data
#'
#' @param cohortData Output of getCohortsPatients()
#' @param selectedCohorts Vector of included cohort ID's
#' @param cohortLabels Labels for cohort
#'
#' @return A dataframe with each selected cohort and its relation to the target cohort
#' @keywords internal
getInclusionTable <- function(cohortData,
selectedCohorts,
cohortLabels = NULL) {
data <- cohortData
nr_unique_cohorts <- length(unique(selectedCohorts))
cohortbyinclusion <-
as.data.frame(matrix(NA, nrow = nr_unique_cohorts, ncol = 3))
colnames(cohortbyinclusion) <-
c('(#) of patients', '(#) in target cohort', "(%) in target cohort")
rownames(cohortbyinclusion) <- c(unique(selectedCohorts))
data_inclusion <- data %>%
dplyr::select(.data$cohort_definition_id, .data$subject_id) %>%
dplyr::distinct()
patients_target <- data_inclusion %>%
dplyr::filter(.data$cohort_definition_id == "0") %>%
dplyr::select("subject_id")
##############################################################################
#
# It's important that the state cohorts are defined in Atlas so that they are
# always a subset of the target cohort, this way the statistics won't be time consuming
#
##############################################################################
for (cohortID in unique(selectedCohorts)) {
patients_observed <- data_inclusion %>%
dplyr::filter(.data$cohort_definition_id == cohortID) %>%
dplyr::select("subject_id")
nr_of_patients <- nrow(patients_observed)
nr_in_target <-
length(intersect(
patients_target$subject_id,
patients_observed$subject_id
))
prc_in_target <-
ifelse(nr_of_patients == 0, 0.00, round((nr_in_target /
nr_of_patients) * 100, 2))
cohortbyinclusion[toString(cohortID), ] = c(nr_of_patients, nr_in_target, prc_in_target)
}
cohortbyinclusion[, 2] <- as.integer(cohortbyinclusion[, 2])
cohortbyinclusion[, 1] <- as.integer(cohortbyinclusion[, 1])
if (is.null(cohortLabels) |
length(cohortLabels) != length(selectedCohorts))
return(cohortbyinclusion)
if (cohortLabels[1] == "0") {
cohortLabels[1] <- "TARGET"
}
rownames(cohortbyinclusion) = cohortLabels
return(cohortbyinclusion)
}
#' Convert the imported cohort data to patient treatmnet trajectories
#'
#' This function outputs a data.frame object which describes the movement of patients between the defined states
#'
#' @param cohortData A data.frame object which is queried with getCohortsPatients function
#' @param stateDuration The length of a discrete state
#' @param stateSelection The chosen state selection method
#' @param statePriorityVector The order of priority for states
#' @param absorbingStates The absorbing states: patient trajectory ends with it
#' @param cdm a cdm object
#' @param oocFix The method to use for replacing "OUT OF COHORT" states with more relevant states
#' @param studyName name chosen for study, is used as a folder name
#' @param allowedStatesList A list object which indicates accessible states from said state
#' @param addPersonalData Logical, indicating whether or not to add personal data for each patient
#' @keywords internal
getTrajectoriesDiscrete <- function(cdm,
cohortData,
stateDuration = 30,
stateSelection = 1,
statePriorityVector = NULL,
absorbingStates = NULL,
oocFix = "None",
studyName = "",
addPersonalData = TRUE,
allowedStatesList = list()) {
tmp_data <- dplyr::filter(cohortData, .data$cohort_definition_id != "0")
tmp_data <-
dplyr::arrange(tmp_data, "subject_id", "cohort_start_date")
# Getting all relevant patient ID's
patientIds <- unique(tmp_data$subject_id)
newPatientData <- getDiscreteStates(
stateSelection = as.numeric(stateSelection),
oocFix = oocFix,
stateDuration = stateDuration,
patientIDs = patientIds,
patientData = tmp_data,
statePriorityVector = statePriorityVector,
allowedStatesList = allowedStatesList
)
################################################################################
#
# Removing absorbing states
#
################################################################################
newPatientData <- removeAfterAbsorbingStatesDiscrete(
patientData <- newPatientData,
patientIDs <- patientIds,
absorbingStates <- if (is.null(absorbingStates))
c("No absorbing state")
else
absorbingStates
)
##############################################################################
#
# Adding personal data
#
##############################################################################
if (addPersonalData) {
newPatientData <-
addPersonalData(newPatientData, cdm)
}
else {
newPatientData$gender_concept_id = 0
newPatientData$birth_datetime = "1900-01-01"
}
################################################################################
#
# Saving data
#
################################################################################
states = as.character(c("START", setdiff(
unique(newPatientData$state_label), c('START', 'EXIT')
), "EXIT"))
n = length(states)
newPatientData$state_id =
plyr::mapvalues(
x = newPatientData$state_label,
from = states,
to = 1:n,
warn_missing = FALSE
)
return(newPatientData)
}
################################################################################
#
# Continuous trajectory functions
#
################################################################################
#' Prepare patientData dataframe for msm package
#'
#' This function prepares patientData dataframe for msm package
#'
#' @param patientData Object of class data.frame with columns subject_id, state_label, STATE_START_DATE, STATE_END_DATE
#' @param stateSelection Selection of the type of ordering
#' @param statePriorityVector All states in prioritized order
#' @param absorbingStates Absorbing states in dataframe
#' @param cdm a cdm object
#' @param studyName name chosen for study, is used as a folder name
#' @param allowedStatesList A list object which indicates accessible states from said state
#' @param addPersonalData Logical, indicating whether or not to add personal data for each patient
#' @return A dataframe ready for using msm package
#' @keywords internal
getTrajectoriesContinuous <- function(cdm,
patientData,
stateSelection,
statePriorityVector,
absorbingStates = NULL,
studyName = "",
addPersonalData = TRUE,
allowedStatesList = list()) {
data <- patientData
data <- dplyr::mutate(
data,
cohort_definition_id = ifelse(
.data$cohort_definition_id == "0",
"START",
.data$cohort_definition_id
)
)
### Let's find the ending date of last active state
data_ls <- data %>%
dplyr::filter(.data$cohort_definition_id != "START") %>%
dplyr::group_by(.data$subject_id) %>%
dplyr::summarise(LAST_STATE_DATE = max(as.Date(.data$cohort_end_date), na.rm = TRUE)) %>%
dplyr::select("subject_id", "LAST_STATE_DATE")
data <- merge(data, data_ls, by = "subject_id")
data <- dplyr::mutate(
data,
cohort_end_date = dplyr::if_else(
.data$cohort_definition_id == "START",
as.Date(.data$cohort_start_date),
as.Date(.data$cohort_end_date)
)
)
# data$cohort_definition_id = as.character(data$cohort_definition_id)
# Adding "EXIT" state
data <- data %>%
dplyr::group_by(.data$subject_id) %>%
dplyr::summarise(
LAST_STATE_DATE = max(.data$LAST_STATE_DATE, na.rm = TRUE),
cohort_start_date = max(as.Date(.data$LAST_STATE_DATE) + 1, na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::mutate(
cohort_end_date = .data$cohort_start_date,
cohort_definition_id = "EXIT"
) %>%
dplyr::bind_rows(data) %>%
dplyr::arrange(.data$subject_id, .data$cohort_start_date)
##############################################################################
#
# When we have prioritized states, we have to make sure that a state with smaller
# priority won't be used instead of a state with higher priority
#
##############################################################################
# Lets just create a new dataframe and add all the states in the order of priorities
if (stateSelection == 3) {
newData <-
dplyr::filter(data, .data$cohort_definition_id %in% c("START", "EXIT"))
for (patientID in unique(newData$subject_id)) {
patientData <- dplyr::filter(
data,
.data$subject_id == patientID &
.data$cohort_definition_id != "EXIT" &
.data$cohort_definition_id != "START"
)
for (p in 1:length(statePriorityVector)) {
priorityData <- dplyr::filter(patientData,
.data$cohort_definition_id == statePriorityVector[p])
if (nrow(priorityData) == 0) {
next
}
patientData <- dplyr::filter(patientData,
.data$cohort_definition_id != statePriorityVector[p])
if (nrow(patientData) > 0) {
for (i in 1:nrow(priorityData)) {
newpatientData <- patientData[0, ]
for (j in 1:nrow(patientData)) {
if (nrow(patientData) == 0) {
break
}
if (priorityData[i, ]$cohort_start_date <= patientData[j, ]$cohort_start_date &
priorityData[i, ]$cohort_end_date >= patientData[j, ]$cohort_end_date) {
# In this case the smaller priority state is absorbed by a higher priority state
next
}
else if (priorityData[i, ]$cohort_start_date >= patientData[j, ]$cohort_start_date &
priorityData[i, ]$cohort_end_date <= patientData[j, ]$cohort_end_date) {
if (priorityData[i, ]$cohort_start_date > patientData[j, ]$cohort_start_date) {
head <- patientData[j, ]
head$cohort_end_date <-
priorityData[i, ]$cohort_start_date
newpatientData <- rbind(newpatientData, head)
}
if (priorityData[i, ]$cohort_end_date <= patientData[j, ]$cohort_end_date) {
tail <- patientData[j, ]
tail$cohort_start_date <-
priorityData[i, ]$cohort_end_date + 1
newpatientData <- rbind(newpatientData, tail)
}
}
else if (priorityData[i, ]$cohort_start_date <= patientData[j, ]$cohort_end_date &
priorityData[i, ]$cohort_end_date >= patientData[j, ]$cohort_end_date) {
head <- patientData[j, ]
head$cohort_end_date <-
priorityData[i, ]$cohort_start_date
newpatientData <- rbind(newpatientData, head)
next
}
else if (priorityData[i, ]$cohort_start_date <= patientData[j, ]$cohort_start_date &
priorityData[i, ]$cohort_end_date >= patientData[j, ]$cohort_start_date) {
tail <- patientData[j, ]
tail$cohort_start_date <-
priorityData[i, ]$cohort_end_date
newpatientData <- rbind(newpatientData, tail)
}
else {
newpatientData <- rbind(newpatientData, patientData[j, ])
}
}
patientData <- newpatientData
}
}
newData <- rbind(newData, priorityData)
}
}
# Inherit the new value
data <- newData
}
# Calculating other time_in_cohort values
data_target <- dplyr::slice(dplyr::arrange(
dplyr::group_by(
dplyr::filter(data, .data$cohort_definition_id == "START"),
.data$subject_id
),
.data$cohort_start_date
),
1L)
# Selecting information about the states
data_states <-
dplyr::filter(data, .data$cohort_definition_id != "START")
data <- rbind(data_target, data_states)
data_target <-
dplyr::select(data_target, "subject_id", "cohort_start_date", "cohort_end_date")
colnames(data_target) <-
c("subject_id", "reference_start_date", "reference_end_date")
data <- merge(data, data_target, by = "subject_id")
data <- dplyr::mutate(data, time_in_cohort = round(as.numeric(
difftime(
as.Date(.data$cohort_start_date),
as.Date(.data$reference_start_date),
units = "days"
) /
365.25
), 3))
data <- dplyr::select(
data,
"subject_id",
"cohort_definition_id",
"cohort_start_date",
"cohort_end_date",
"time_in_cohort"
)
data <- dplyr::arrange(data, .data$subject_id, .data$time_in_cohort)
# We have to map states to 1,..., n.
states <- as.character(c("START", setdiff(
unique(data$cohort_definition_id), c('START', 'EXIT')
), "EXIT"))
n <- length(states)
data$state_label <-
plyr::mapvalues(
x = data$cohort_definition_id,
from = states,
to = 1:n,
warn_missing = FALSE
)
data <- dplyr::mutate(data, state_label = as.numeric(.data$state_label))
data <- dplyr::arrange(data, .data$subject_id, .data$time_in_cohort, .data$state_label)
data <- dplyr::select(
data,
"subject_id",
"cohort_definition_id",
"cohort_start_date",
"cohort_end_date",
"state_label",
"time_in_cohort"
)
colnames(data) <- c(
"subject_id",
"state_label",
"state_start_date",
"state_end_date",
"state_id",
"time_in_cohort"
)
# We should make sure that the time_in_cohort column has differing values for each state for the same patient
# for developement case, let's just create an artificial difference of 1 day for each colliding date
data <- dplyr::arrange(data, .data$subject_id, .data$time_in_cohort, .data$state_id)
paient_id <- NA
last_patient_id <- data$subject_id[1]
last_observed_ts <- data$time_in_cohort[1]
coef <- 1
impact <- 1 / 365.25
data$time_in_cohort <- round(data$time_in_cohort, 6)
for (row in 2:nrow(data)) {
patient_id <- data$subject_id[row]
if (patient_id != last_patient_id |
last_observed_ts < data$time_in_cohort[row]) {
last_patient_id <- patient_id
last_observed_ts <- data$time_in_cohort[row]
coef <- 1
}
else {
data$time_in_cohort[row] <- data$time_in_cohort[row] + impact * coef
last_patient_id <- patient_id
coef <- coef + 1
last_observed_ts <- data$time_in_cohort[row]
}
}
# Fix state end dates
data <- data %>%
dplyr::arrange(.data$subject_id, .data$time_in_cohort) %>% # Arrange by SUBJECT_ID and TIME_IN_COHORT
dplyr::group_by(.data$subject_id) %>% # Group by SUBJECT_ID
dplyr::mutate(
state_end_date = dplyr::case_when(
state_label %in% c("START", "EXIT") ~ state_end_date,
# Keep START and EXIT as is
TRUE ~ dplyr::lead(state_start_date) # Use next STATE_START_DATE
)
) %>%
dplyr::ungroup() # Ungroup for safety
################################################################################
#
# Removing absorbing states
#
################################################################################
data <- removeAfterAbsorbingStatesContinuous(
patientData = data,
patientIDs = unique(data$subject_id),
absorbingStates = if (is.null(absorbingStates)) {
c("No absorbing state")
}
else {
absorbingStates
}
)
##############################################################################
#
# Remove prohibited transitions
#
##############################################################################
data <- removeProhibitedTransitionsContinuous(
patientData = data,
patientIDs = unique(data$subject_id),
allowedStatesList = allowedStatesList
)
##############################################################################
#
# Adding personal data
#
##############################################################################
if (addPersonalData) {
data <- addPersonalData(data, cdm)
}
else {
data$gender_concept_id = 0
data$birth_datetime = "1900-01-01"
}
return(data)
}
#' Load settings of the study from trajectorySettings.csv according to the customized paramater studyName
#'
#' @param studyName Customized name for the study
#' @keywords internal
loadSettings <- function(studyName, pathToStudy) {
env <-
rlang::new_environment(data = list(), parent = rlang::empty_env())
jsonFiles = list.files(
path = paste(pathToStudy, "/", studyName, "/JSON", sep = ""),
pattern = NULL,
all.files = FALSE,
full.names = TRUE
)
stateNamesJSON <- list.files(
path = paste(pathToStudy, "/", studyName, "/JSON", sep = ""),
pattern = NULL,
all.files = FALSE,
full.names = FALSE
)
stateNamesJSON <-
substr(stateNamesJSON, 1, nchar(stateNamesJSON) - 5)
targetIndex <- which(stateNamesJSON == "0")
env$stateNamesJSON <- stateNamesJSON[-targetIndex]
env$targetJSON <-
if (identical(jsonFiles[targetIndex], character(0))) {
""
}
else {
paste(readLines(jsonFiles[targetIndex]), collapse = "\n")
}
env$jsonFiles <- jsonFiles[-targetIndex]
insertedJSONs <- c()
for (jsonFile in env$jsonFiles) {
insertedJSONs <-
c(insertedJSONs, paste(readLines(jsonFile), collapse = "\n"))
}
env$insertedJSONs <- insertedJSONs
settings <-
utils::read.csv(paste(
pathToStudy,
"/",
studyName,
"/Settings/trajectorySettings.csv",
sep = ""
))
savedStudyNames <- settings$studyName
studyIndex <- which(savedStudyNames == studyName)
if (length(studyIndex) == 0) {
return(NULL)
}
env$savedTrajectoryType <- settings$trajectoryType[studyIndex]
env$savedTrajectoryStates <-
strsplit(settings$trajectoryStates[studyIndex], ",")[[1]]
env$savedPriorityOrder <-
strsplit(settings$priorityOrder[studyIndex], ",")[[1]]
env$savedStateSelectionType <-
settings$stateSelectionType[studyIndex]
env$savedAbsorbingStates <- settings$absorbingStates[studyIndex]
env$savedMandatoryStates <- settings$mandatoryStates[studyIndex]
env$savedLengthOfStay <- settings$lengthOfStay[studyIndex]
env$savedOutOfCohortAllowed <-
settings$outOfCohortAllowed[studyIndex]
env$outOfCohortFix <- settings$outOfCohortFix[studyIndex]
return(env)
}
#' Generate cohort set CSV
#'
#' internal function for generating cohort set CSV for CDMConnector generateCohorts
#'
#' @param selectedStates all states to be included in db query
#' @param pathToStudy path to folder that contains study folder
#' @param studyName name of the study directory
#' @param stateCohortLabels user inserted labels for non target states
#'
#' @return cohortSetCSV df
#' @keywords internal
generateCohortSetCSV <- function(selectedStates,
stateCohortLabels,
pathToStudy,
studyName) {
if (file.exists(paste0(pathToStudy, studyName, '/JSON/CohortsToCreate.csv'))) {
file.remove(paste0(pathToStudy, studyName, '/JSON/CohortsToCreate.csv'))
}
cohortName <- c("target", stateCohortLabels)
jsonPath <- paste0(selectedStates, ".json")
max <- length(cohortName)
cohortId <- c(1:max)
cohortSet <- data.frame(cohortId, cohortName, jsonPath)
cohortList <- list(
cohortId = cohortSet$cohortId,
cohortName = cohortSet$cohortName,
jsonPath = cohortSet$jsonPath
)
save_object(cohortSet,
paste0(pathToStudy, "/", studyName, '/JSON/CohortsToCreate.csv'))
return (cohortSet)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.