#' Calculates medication adherence based on a fixed, specified refill time period
#'
#' @param serialDf the data frame with the serial data
#' @param startDates dataframe with ID and a date to start the adherence calculation (optional)
#' @param endDates dataframe with ID and a date to end the adherence calculation (optional)
#' @param atcCode regular expression for the class of medication ATC code (default: C09 which stands for any RAAS)
#' @param refillPeriod length of the refill period (default 90 days)
#' @param stopPeriod number of days after which the medication is considered to have stopped (optional)
#' @param idColumn name of ID column: default is PATIENT
#' @param dateColumn name of date column: default is VISIT. This column has to be of class Date
#' @param atcColumn name of the column with the ATC codes: default is ATC
#' @param doPCAScore flag whether a PCA score from the medication data should be calculated
#' @return time block data and dates in wide format
#' @export
#' @importFrom magrittr %>%
#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr select
#' @importFrom dplyr select_if
#' @importFrom dplyr starts_with
#' @importFrom dplyr ends_with
calculateFixedAdherence <- function(serialDf, startDates = NA, endDates = NA, atcCode = "C09", refillPeriod = 90, stopPeriod = NA,
idColumn = "PATIENT", dateColumn = "VISIT", atcColumn = "ATC", doPCAScore = F) {
# check data frames
if (nrow(serialDf) == 0 ) {
stop("Serial prescription data is empty")
} else {
serialDf <- serialDf[ , colSums(is.na(serialDf)) == 0]
if (nrow(serialDf) == 0 ) {
stop("Serial prescription data is empty after removing missing data")
}
}
# check block length
if (refillPeriod <= 0) {
stop("The refill period cannot be 0 or negative")
}
if(!is.na(stopPeriod)){
if(stopPeriod <= refillPeriod){
stop("Stop period needs to be bigger than the refill period")
}
}
# check baseline data
if(!is.null(nrow(startDates))){
startDate <- checkBaselineFormat(startDates, idColumn = idColumn, dateColumn = dateColumn)
if (is.null(startDate)) {
stop("incorrect format of start dates. Check column class. No missing values allowed.")
}
}
#chek end date data
if(!is.null(nrow(endDates))){
endDate <- checkBaselineFormat(endDates, idColumn = idColumn, dateColumn = dateColumn)
if (is.null(endDate)) {
stop("incorrect format of end dates. Check column class. No missing values allowed.")
}
}
# check serial prescription data
serialDf <- checkBinaryPrescriptionFormat(serialDf, idColumn = idColumn, dateColumn = dateColumn, atcColumn = atcColumn)
if (is.null(serialDf)) {
stop("incorrect format of serial prescription input data. Check column class. No missing values allowed.")
}
# check start and end date patients are the same
if(!is.null(nrow(startDates)) & !is.null(nrow(endDates))){
if(!all(startDates$PATIENT %in% endDates$PATIENT) & !all(endDates$PATIENT %in% startDates$PATIENT)){
stop("start and end dates must be specified for the same patients")
}
}
# restrict serial data to patients in startDates (if available)
if(!is.null(nrow(startDates))){
serialDf <- serialDf %>% filter(PATIENT %in% startDates$PATIENT)
if (nrow(serialDf) == 0) {
stop("Serial prescription data does not contain any data for the patients.")
}
}
# restrict serial data to patients in endDates (if available)
if(!is.null(nrow(endDates))){
serialDf <- serialDf %>% filter(PATIENT %in% endDates$PATIENT)
if (nrow(serialDf) == 0) {
stop("Serial prescription data does not contain any data for the patients.")
}
}
# restrict serial to ATC code under investigation
serialDf <- serialDf %>% filter(grepl(atcCode, ATC))
if (nrow(serialDf) == 0) {
stop('Serial data contains no prescriptions with the specific ATC code expression.')
}
#if start dates are available, start looking one refill period back to see whether the patient was medicated at start
if(!is.null(nrow(startDates))){
startDates <- startDates %>%
mutate(VISIT = VISIT - refillPeriod)
}
#create results data frame
resultsDf <- data.frame(PATIENT=character(),adherence=double(),firstPrescription=character(),lastPrescription=character(),
lastCoverDate=character(), numPrescriptions=numeric(), startStops=numeric(), stoppedYears = double(),
coveredYears = double(), stringsAsFactors = F)
#if there are start dates, use the start date patients to iterate over to find adherence, otherwise use all patients in the prescriptions
if(!is.null(nrow(startDates))){
allPatients <- unique(startDates$PATIENT)
} else {
allPatients <- unique(serialDf$PATIENT)
}
#go through all patients
for(i in 1:length(allPatients)){
myPatient <- allPatients[i]
resultsDf[i,'PATIENT'] <- myPatient
myPrescriptions <- serialDf %>%
filter(PATIENT == myPatient)
#restrict prescription for this patient to start at a certain time (optional)
if(nrow(myPrescriptions) > 0){
if(!is.null(nrow(startDates))){
myStartDate <- (startDates %>% filter(PATIENT == myPatient))$VISIT
myPrescriptions <- myPrescriptions %>%
filter(VISIT >= myStartDate)
}
if(!is.null(nrow(endDates))){
myEndDate <- (endDates %>% filter(PATIENT == myPatient))$VISIT
myPrescriptions <- myPrescriptions %>%
filter(VISIT <= myEndDate)
}
}
#if there are 0 prescriptions, there is 0 adherence, if there is 1 prescription, the adhrence period is the refill period
if(nrow(myPrescriptions) > 1){
resultsDf[i,'firstPrescription'] <- as.character(as.Date(min(myPrescriptions$VISIT)))
resultsDf[i,'lastPrescription'] <- as.character(as.Date(max(myPrescriptions$VISIT)))
resultsDf[i,'lastCoverDate'] <- as.character(as.Date(max(myPrescriptions$VISIT)) + refillPeriod)
resultsDf[i,'numPrescriptions'] <- nrow(myPrescriptions)
daysToCover <- as.numeric(max(myPrescriptions$VISIT) - min(myPrescriptions$VISIT)) + refillPeriod
#go through the precsriptions in a sorted way starting from the earliest
myPrescriptions <- myPrescriptions[order(myPrescriptions$VISIT),]
uncoveredDays <- 0
stoppedDays <- 0
numStops <- 0
for(j in 2:nrow(myPrescriptions)){
pOld <- myPrescriptions[(j-1),'VISIT'][[1]]
pNew <- myPrescriptions[j,'VISIT'][[1]]
if(as.numeric(pNew - pOld) > refillPeriod){
if(is.na(stopPeriod)){
uncoveredDays <- uncoveredDays + as.numeric((pNew - pOld)) - refillPeriod
} else if(as.numeric(pNew - pOld) > (stopPeriod + refillPeriod)){
stoppedDays <- stoppedDays + as.numeric((pNew - pOld))
numStops <- numStops + 1
daysToCover <- daysToCover - as.numeric((pNew - pOld))
} else {
uncoveredDays <- uncoveredDays + as.numeric((pNew - pOld)) - refillPeriod
}
}
}
resultsDf[i,'adherence'] <- (daysToCover - uncoveredDays)/daysToCover
resultsDf[i,'coveredYears'] <- (daysToCover - uncoveredDays)/365.25
resultsDf[i,'startStops'] <- numStops
resultsDf[i,'stoppedYears'] <- stoppedDays/365.25
} else if(nrow(myPrescriptions) == 0){
resultsDf[i,'adherence'] <- 0
resultsDf[i,'firstPrescription'] <- NA
resultsDf[i,'lastPrescription'] <- NA
resultsDf[i,'lastCoverDate'] <- NA
resultsDf[i,'numPrescriptions'] <- 0
resultsDf[i,'startStops'] <- NA
resultsDf[i,'stoppedYears'] <- NA
resultsDf[i,'coveredYears'] <- 0
} else if(nrow(myPrescriptions) == 1){
resultsDf[i,'adherence'] <- 1
resultsDf[i,'firstPrescription'] <- as.character(as.Date(min(myPrescriptions$VISIT)))
resultsDf[i,'lastPrescription'] <- as.character(as.Date(min(myPrescriptions$VISIT)))
resultsDf[i,'lastCoverDate'] <- as.character(as.Date(min(myPrescriptions$VISIT)) + refillPeriod)
resultsDf[i,'numPrescriptions'] <- 1
resultsDf[i,'startStops'] <- 0
resultsDf[i,'stoppedYears'] <- 0
resultsDf[i,'coveredYears'] <- refillPeriod/365.25
}
}
resultsDf$firstPrescription <- as.Date(resultsDf$firstPrescription)
resultsDf$lastPrescription <- as.Date(resultsDf$lastPrescription)
resultsDf$lastCoverDate <- as.Date(resultsDf$lastCoverDate)
if(!is.null(nrow(endDates))){
resultsDf <- resultsDf %>%
merge(endDates %>% select(PATIENT, VISIT))
resultsDf <- resultsDf %>%
mutate(gapEndFollowUpYears = safe.ifelse(lastCoverDate < VISIT,as.numeric(VISIT - lastCoverDate)/365.25,0)) %>%
select(-VISIT)
}
if(doPCAScore){
#if there is only one prescription, we set the adherence from perfect to 0
scoring <- resultsDf %>%
mutate(adherence = ifelse(numPrescriptions == 1,0,adherence))%>%
select(-ends_with('Prescription'), -ends_with('Date'), -numPrescriptions) %>%
mutate(gapEndFollowUpYears = gapEndFollowUpYears * -1, startStops = startStops * -1, stoppedYears = stoppedYears * -1) #all effects in the same direction
scoring[is.na(scoring)] <- 0
s <- prcomp(scoring %>% select(-PATIENT, -starts_with('ATC')) %>% select_if(~ length(unique(.)) > 1), center = TRUE,scale. = TRUE)
s <- scale(s$x[,1])
resultsDf <- cbind(resultsDf,s) %>%
as.data.frame()
names(resultsDf)[which(names(resultsDf) == 's')] <- 'score_all'
#remove gap to end
s <- prcomp(scoring %>% select(-PATIENT, -gapEndFollowUpYears, -starts_with('ATC')) %>% select_if(~ length(unique(.)) > 1),
center = TRUE,scale. = TRUE)
s <- scale(s$x[,1])
resultsDf <- cbind(resultsDf,s) %>%
as.data.frame()
names(resultsDf)[which(names(resultsDf) == 's')] <- 'score_withoutEndGap'
}
resultsDf
}
#' Calculates medication adherence based on a fixed, specified refill time period
#'
#' @param serialDf the data frame with the serial data
#' @param startDates dataframe with ID and a date to start the adherence calculation (optional)
#' @param endDates dataframe with ID and a date to end the adherence calculation (optional)
#' @param atcCode list of regular expressions for the classes of medication ATC code (default: c('C09', 'C10'))
#' @param refillPeriod length of the refill period (default 90 days)
#' @param stopPeriod number of days after which the medication is considered to have stopped (optional)
#' @param idColumn name of ID column: default is PATIENT
#' @param dateColumn name of date column: default is VISIT. This column has to be of class Date
#' @param atcColumn name of the column with the ATC codes: default is ATC
#' @param doPCAScore flag whether a PCA score from the medication data should be calculated
#' @param combinedScore flag whether a combined PCA scores for all ATC codes in the list should be calculated
#' @return time block data and dates in wide format
#' @export
#' @importFrom magrittr %>%
#' @importFrom plyr ldply
#' @importFrom tidyr pivot_wider
#' @importFrom dplyr select
#' @importFrom dplyr select_if
#' @importFrom dplyr starts_with
#' @importFrom dplyr ends_with
calculateFixedAdherenceList <- function(serialDf, startDates = NA, endDates = NA, atcCodeList = c("C09", "C10"), refillPeriod = 90, stopPeriod = NA,
idColumn = "PATIENT", dateColumn = "VISIT", atcColumn = "ATC", doPCAScore = F, combinedScore = T) {
results <- atcCodeList %>%
lapply(listAllAdherences, serialDf, startDates, endDates, refillPeriod, stopPeriod, idColumn, dateColumn, atcColumn) %>%
ldply()
if(doPCAScore){
#if there is only one prescription, we set the adherence from perfect to 0
scoring <- results %>%
mutate(adherence = ifelse(numPrescriptions == 1,0,adherence)) %>%
select(-ends_with('Prescription'), -ends_with('Date'), -numPrescriptions) %>%
mutate(gapEndFollowUpYears = gapEndFollowUpYears * -1, startStops = startStops * -1, stoppedYears = stoppedYears * -1) #all effects in the same direction
scoring <- results %>%
select(-ends_with('Prescription'), -ends_with('Date'), -numPrescriptions)
scoring <- scoring %>%
pivot_wider(id_cols = PATIENT, names_from = ATC,
values_from = names(scoring)[which(names(scoring) != 'PATIENT')])
scoring[is.na(scoring)] <- 0
if(combinedScore){
s <- prcomp(scoring %>% select(-PATIENT, -starts_with('ATC')) %>% select_if(~ length(unique(.)) > 1), center = TRUE,scale. = TRUE)
s <- scale(s$x[,1])
results <- cbind(results,s) %>%
as.data.frame()
names(results)[which(names(results) == 's')] <- 'score'
} else {
results[,'score'] <- NA
codes <- unique(results$ATC)
for(i in 1:length(codes)){
s <- prcomp(scoring %>% select(ends_with(paste0('_',codes[i]))) %>% select_if(~ length(unique(.)) > 1), center = TRUE,scale. = TRUE)
s <- scale(s$x[,1])
results[which(results$ATC == codes[i]),'score'] <- s
}
}
}
results
}
#' Helper function to call individual (one ATC code) adherence calculation for a list of ATC codes
#'
#' @param atcCode regular expression for the class of medication ATC code
#' @param serialDf the data frame with the serial data
#' @param startDates dataframe with ID and a date to start the adherence calculation (optional)
#' @param endDates dataframe with ID and a date to end the adherence calculation (optional)
#' @param refillPeriod length of the refill period (default 90 days)
#' @param stopPeriod number of days after which the medication is considered to have stopped (optional)
#' @param idColumn name of ID column: default is PATIENT
#' @param dateColumn name of date column: default is VISIT. This column has to be of class Date
#' @param atcColumn name of the column with the ATC codes: default is ATC
#' @return time block data and dates in wide format
#' @importFrom magrittr %>%
#' @importFrom dplyr mutate
listAllAdherences <- function(atcCode, serialDf, startDates, endDates, refillPeriod, stopPeriod, idColumn, dateColumn, atcColumn){
results <- calculateFixedAdherence(serialDf = serialDf, startDates = startDates, endDates = endDates, atcCode = atcCode,
refillPeriod = refillPeriod, stopPeriod = stopPeriod, idColumn = idColumn, dateColumn = dateColumn,
atcColumn = atcColumn, doPCAScore = F) %>%
mutate(ATC = atcCode)
results
}
#' Calculates medication adherence between start and end dates
#'
#' @param serialDf the data frame with the serial data
#' @param startDates dataframe with ID and a date to start the adherence calculation
#' @param endDates dataframe with ID and a date to end the adherence calculation
#' @param atcCode regular expression for the class of medication ATC code (default: C09 which stands for any RAAS)
#' @param refillPeriod length of the refill period (default 90 days)
#' @param idColumn name of ID column: default is PATIENT
#' @param dateColumn name of date column: default is VISIT. This column has to be of class Date
#' @param atcColumn name of the column with the ATC codes: default is ATC
#' @return time block data and dates in wide format
#' @export
#' @importFrom magrittr %>%
#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr select
#' @importFrom dplyr select_if
#' @importFrom dplyr starts_with
#' @importFrom dplyr ends_with
calculateFixedAdherenceFixedPeriod <- function(serialDf, startDates, endDates, atcCode = "C09", refillPeriod = 90,
idColumn = "PATIENT", dateColumn = "VISIT", atcColumn = "ATC") {
# check data frames
if (nrow(serialDf) == 0 ) {
stop("Serial prescription data is empty")
} else {
serialDf <- serialDf[ , colSums(is.na(serialDf)) == 0]
if (nrow(serialDf) == 0 ) {
stop("Serial prescription data is empty after removing missing data")
}
}
# check block length
if (refillPeriod <= 0) {
stop("The refill period cannot be 0 or negative")
}
# check start date data
if (nrow(startDates) == 0 ) {
stop("Start date data is empty")
} else {
startDate <- checkBaselineFormat(startDates, idColumn = idColumn, dateColumn = dateColumn)
if (is.null(startDate)) {
stop("incorrect format of start dates. Check column class. No missing values allowed.")
}
}
#chek end date data
if (nrow(endDates) == 0 ) {
stop("End date data is empty")
} else {
endDate <- checkBaselineFormat(endDates, idColumn = idColumn, dateColumn = dateColumn)
if (is.null(endDate)) {
stop("incorrect format of end dates. Check column class. No missing values allowed.")
}
}
# check serial prescription data
serialDf <- checkBinaryPrescriptionFormat(serialDf, idColumn = idColumn, dateColumn = dateColumn, atcColumn = atcColumn)
if (is.null(serialDf)) {
stop("incorrect format of serial prescription input data. Check column class. No missing values allowed.")
}
# check start and end date patients are the same
if(!is.null(nrow(startDates)) & !is.null(nrow(endDates))){
if(!all(startDates$PATIENT %in% endDates$PATIENT) & !all(endDates$PATIENT %in% startDates$PATIENT)){
stop("start and end dates must be specified for the same patients")
}
}
# restrict serial data to patients in startDates (if available)
if(!is.null(nrow(startDates))){
serialDf <- serialDf %>% filter(PATIENT %in% startDates$PATIENT)
if (nrow(serialDf) == 0) {
stop("Serial prescription data does not contain any data for the patients.")
}
}
# restrict serial data to patients in endDates (if available)
if(!is.null(nrow(endDates))){
serialDf <- serialDf %>% filter(PATIENT %in% endDates$PATIENT)
if (nrow(serialDf) == 0) {
stop("Serial prescription data does not contain any data for the patients.")
}
}
# restrict serial to ATC code under investigation
serialDf <- serialDf %>% filter(grepl(atcCode, ATC))
if (nrow(serialDf) == 0) {
stop('Serial data contains no prescriptions with the specific ATC code expression.')
}
#if start dates are available, start looking one refill period back to see whether the patient was medicated at start
if(!is.null(nrow(startDates))){
startDates <- startDates %>%
mutate(VISIT = VISIT - refillPeriod)
}
#create results data frame
resultsDf <- data.frame(PATIENT=character(),adherence=double(),firstPrescription=character(),lastPrescription=character(),
lastCoverDate=character(), numPrescriptions=numeric(), startStops=numeric(), stoppedYears = double(),
coveredYears = double(), stringsAsFactors = F)
#if there are start dates, use the start date patients to iterate over to find adherence, otherwise use all patients in the prescriptions
if(!is.null(nrow(startDates))){
allPatients <- unique(startDates$PATIENT)
} else {
allPatients <- unique(serialDf$PATIENT)
}
#go through all patients
for(i in 1:length(allPatients)){
myPatient <- allPatients[i]
resultsDf[i,'PATIENT'] <- myPatient
myPrescriptions <- serialDf %>%
filter(PATIENT == myPatient)
#restrict prescription for this patient to start at a certain time (optional)
if(nrow(myPrescriptions) > 0){
if(!is.null(nrow(startDates))){
myStartDate <- (startDates %>% filter(PATIENT == myPatient))$VISIT
myPrescriptions <- myPrescriptions %>%
filter(VISIT >= myStartDate)
}
if(!is.null(nrow(endDates))){
myEndDate <- (endDates %>% filter(PATIENT == myPatient))$VISIT
myPrescriptions <- myPrescriptions %>%
filter(VISIT <= myEndDate)
}
}
#if there are 0 prescriptions, there is 0 adherence
if(nrow(myPrescriptions) > 0){
resultsDf[i,'firstPrescription'] <- as.character(as.Date(min(myPrescriptions$VISIT)))
resultsDf[i,'lastPrescription'] <- as.character(as.Date(max(myPrescriptions$VISIT)))
resultsDf[i,'lastCoverDate'] <- as.character(as.Date(max(myPrescriptions$VISIT)) + refillPeriod)
resultsDf[i,'numPrescriptions'] <- nrow(myPrescriptions)
resultsDf[i,'startDate'] <- as.character(myStartDate)
resultsDf[i,'endDate'] <- as.character(myEndDate)
#we need to cover from start to end dates
daysToCover <- as.numeric(myEndDate - myStartDate)
#go through the precsriptions in a sorted way starting from the earliest
myPrescriptions <- myPrescriptions[order(myPrescriptions$VISIT),]
uncoveredDays <- 0
#add gaps at end and start to uncovered days
myLastCoverDate <- as.Date(max(myPrescriptions$VISIT)) + refillPeriod
if(myLastCoverDate < myEndDate){
uncoveredDays <- as.numeric(myEndDate - myLastCoverDate)
}
myFirstCoverDate <- as.Date(min(myPrescriptions$VISIT))
if(myFirstCoverDate > myStartDate){
uncoveredDays <- uncoveredDays + as.numeric(myFirstCoverDate - myStartDate)
}
#if we only have one prescription, there is nothing more to do
if(nrow(myPrescriptions) > 1){
for(j in 2:nrow(myPrescriptions)){
pOld <- myPrescriptions[(j-1),'VISIT'][[1]]
pNew <- myPrescriptions[j,'VISIT'][[1]]
if(as.numeric(pNew - pOld) > refillPeriod){
uncoveredDays <- uncoveredDays + as.numeric((pNew - pOld)) - refillPeriod
}
}
}
resultsDf[i,'adherence'] <- (daysToCover - uncoveredDays)/daysToCover
resultsDf[i,'coveredYears'] <- (daysToCover - uncoveredDays)/365.25
} else if(nrow(myPrescriptions) == 0){
resultsDf[i,'adherence'] <- 0
resultsDf[i,'firstPrescription'] <- NA
resultsDf[i,'lastPrescription'] <- NA
resultsDf[i,'lastCoverDate'] <- NA
resultsDf[i,'numPrescriptions'] <- 0
resultsDf[i,'coveredYears'] <- 0
}
}
resultsDf$firstPrescription <- as.Date(resultsDf$firstPrescription)
resultsDf$lastPrescription <- as.Date(resultsDf$lastPrescription)
resultsDf$lastCoverDate <- as.Date(resultsDf$lastCoverDate)
resultsDf$startDate <- as.Date(resultsDf$startDate)
resultsDf$endDate <- as.Date(resultsDf$endDate)
if(!is.null(nrow(endDates))){
resultsDf <- resultsDf %>%
merge(endDates %>% select(PATIENT, VISIT))
resultsDf <- resultsDf %>%
mutate(gapEndFollowUpYears = safe.ifelse(lastCoverDate < VISIT,as.numeric(VISIT - lastCoverDate)/365.25,0)) %>%
select(-VISIT)
}
resultsDf
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.