R/StudyPopulation.R

Defines functions plotTimeToEvent getCounts getAttritionTable createStudyPopulation fastDuplicated

Documented in createStudyPopulation getAttritionTable plotTimeToEvent

# Copyright 2021 Observational Health Data Sciences and Informatics
#
# This file is part of CohortMethod
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

fastDuplicated <- function(data, columns) {
  if (nrow(data) == 0) {
    return(vector())
  } else if (nrow(data) == 1) {
    return(c(FALSE))
  } else {
    results <- lapply(columns, function(column, data) data[2:nrow(data), column] == data[1:(nrow(data) - 1), column], data = data)
    result <- results[[1]]
    if (length(columns) > 1) {
      for (i in 2:length(columns)) {
        result <- result & results[[i]]
      }
    }
    return(c(FALSE, result))
  }
}

#' Create a study population
#'
#' @details
#' Create a study population by enforcing certain inclusion and exclusion criteria, defining a risk
#' window, and determining which outcomes fall inside the risk window.
#'
#' The `removeduplicateSubjects` argument can have one of the following values:
#'
#' - `"keep all"`: Do not remove subjects that appear in both target and comparator cohort
#' - `"keep first"`: When a subjects appear in both target and comparator cohort, only keep whichever cohort is first in time. If both cohorts start simultaneous, the person is removed from the analysis.
#' - `"remove all"`: Remove subjects that appear in both target and comparator cohort completely from the analysis."
#'
#' @template CohortMethodData
#'
#' @param population                       If specified, this population will be used as the starting
#'                                         point instead of the cohorts in the `cohortMethodData` object.
#' @param outcomeId                        The ID of the outcome. If not specified, no outcome-specific
#'                                         transformations will be performed.
#' @param firstExposureOnly                Should only the first exposure per subject be included?
#' @param removeDuplicateSubjects          Remove subjects that are in both the target and comparator
#'                                         cohort? See details for allowed values.
#' @param restrictToCommonPeriod           Restrict the analysis to the period when both exposures are observed?
#' @param washoutPeriod                    The minimum required continuous observation time prior to
#'                                         index date for a person to be included in the cohort.
#' @param removeSubjectsWithPriorOutcome   Remove subjects that have the outcome prior to the risk
#'                                         window start?
#' @param priorOutcomeLookback             How many days should we look back when identifying prior
#'                                         outcomes?
#' @param minDaysAtRisk                    The minimum required number of days at risk. Risk windows with fewer
#'                                         days than this number are removed from the analysis.
#' @param maxDaysAtRisk                    The maximum allowed number of days at risk. Risk windows that are
#'                                         longer will be truncated to this number of days.
#' @param riskWindowStart                  The start of the risk window (in days) relative to the `startAnchor`.
#' @param addExposureDaysToStart           DEPRECATED: Add the length of exposure the start of the risk window?
#'                                         Use `startAnchor` instead.
#' @param startAnchor                      The anchor point for the start of the risk window. Can be `"cohort start"`
#'                                         or `"cohort end"`.
#' @param riskWindowEnd                    The end of the risk window (in days) relative to the `endAnchor`.
#' @param addExposureDaysToEnd             DEPRECATED: Add the length of exposure the risk window?
#'                                         Use `endAnchor` instead.
#' @param endAnchor                        The anchor point for the end of the risk window. Can be `"cohort start"`
#'                                         or `"cohort end"`.
#' @param censorAtNewRiskWindow            If a subject is in multiple cohorts, should time-at-risk be censored
#'                                         when the new time-at-risk starts to prevent overlap?
#' @return
#' A `tibble` specifying the study population. This `tibble` will have the following columns:
#'
#' - `rowId`: A unique identifier for an exposure.
#' - `personSeqId`: The person sequence ID of the subject.
#' - `cohortStartdate`: The index date.
#' - `outcomeCount` The number of outcomes observed during the risk window.
#' - `timeAtRisk`: The number of days in the risk window.
#' - `survivalTime`: The number of days until either the outcome or the end of the risk window.
#'
#' @export
createStudyPopulation <- function(cohortMethodData,
                                  population = NULL,
                                  outcomeId,
                                  firstExposureOnly = FALSE,
                                  restrictToCommonPeriod = FALSE,
                                  washoutPeriod = 0,
                                  removeDuplicateSubjects = FALSE,
                                  removeSubjectsWithPriorOutcome = TRUE,
                                  priorOutcomeLookback = 99999,
                                  minDaysAtRisk = 1,
                                  maxDaysAtRisk = 99999,
                                  riskWindowStart = 0,
                                  addExposureDaysToStart = NULL,
                                  startAnchor = "cohort start",
                                  riskWindowEnd = 0,
                                  addExposureDaysToEnd = NULL,
                                  endAnchor = "cohort end",
                                  censorAtNewRiskWindow = FALSE) {
  if (!missing(addExposureDaysToStart) && !is.null(addExposureDaysToStart)) {
    warning("The addExposureDaysToStart argument is deprecated. Please use the startAnchor argument instead.")
    if (addExposureDaysToStart) {
      startAnchor <- "cohort end"
    } else {
      startAnchor <- "cohort start"
    }
  }
  if (!missing(addExposureDaysToEnd) && !is.null(addExposureDaysToEnd)) {
    warning("The addExposureDaysToEnd argument is deprecated. Please use the endAnchor argument instead.")
    if (addExposureDaysToEnd) {
      endAnchor <- "cohort end"
    } else {
      endAnchor <- "cohort start"
    }
  }
  if (!grepl("start$|end$", startAnchor, ignore.case = TRUE)) {
    stop("startAnchor should have value 'cohort start' or 'cohort end'")
  }
  if (!grepl("start$|end$", endAnchor, ignore.case = TRUE)) {
    stop("endAnchor should have value 'cohort start' or 'cohort end'")
  }
  isEnd <- function(anchor) {
    return(grepl("end$", anchor, ignore.case = TRUE))
  }
  if (is.logical(removeDuplicateSubjects)) {
    if (removeDuplicateSubjects)
      removeDuplicateSubjects <- "remove all"
    else
      removeDuplicateSubjects <- "keep all"
  }
  if (!(removeDuplicateSubjects %in% c("keep all", "keep first", "remove all")))
    stop("removeDuplicateSubjects should have value \"keep all\", \"keep first\", or \"remove all\".")
  if (missing(outcomeId))
    ParallelLogger::logTrace("Creating study population without outcome ID")
  else
    ParallelLogger::logTrace("Creating study population for outcome ID ", outcomeId)

  if (is.null(population)) {
    metaData <- attr(cohortMethodData, "metaData")
    population <- cohortMethodData$cohorts %>%
      collect() %>%
      select(-.data$personId)
  } else {
    metaData <- attr(population, "metaData")
  }

  if (firstExposureOnly) {
    ParallelLogger::logInfo("Keeping only first exposure per subject")
    population <- population[order(population$personSeqId, population$treatment, population$cohortStartDate), ]
    # idx <- duplicated(population[, c("personSeqId", "treatment")])
    idx <- fastDuplicated(population, c("personSeqId", "treatment"))
    population <- population[!idx, ]
    metaData$attrition <- rbind(metaData$attrition, getCounts(population, "First exposure only"))
  }
  if (restrictToCommonPeriod) {
    ParallelLogger::logInfo("Restrict to common period")
    if (nrow(population) > 0) {
      population <- population %>%
        group_by(.data$treatment) %>%
        summarise(treatmentStart = min(.data$cohortStartDate), treatmentEnd = max(.data$cohortStartDate)) %>%
        ungroup() %>%
        summarise(periodStart = max(.data$treatmentStart), periodEnd = min(.data$treatmentEnd)) %>%
        full_join(population, by = character()) %>%
        filter(population$cohortStartDate >= .data$periodStart & population$cohortStartDate <= .data$periodEnd) %>%
        select(-.data$periodStart, -.data$periodEnd)
    }
    metaData$attrition <- rbind(metaData$attrition, getCounts(population, "Restrict to common period"))
  }
  if (removeDuplicateSubjects == "remove all") {
    ParallelLogger::logInfo("Removing all subject that are in both cohorts (if any)")
    targetSubjectIds <- population$personSeqId[population$treatment == 1]
    comparatorSubjectIds <- population$personSeqId[population$treatment == 0]
    duplicateSubjectIds <- targetSubjectIds[targetSubjectIds %in% comparatorSubjectIds]
    population <- population[!(population$personSeqId %in% duplicateSubjectIds), ]
    metaData$attrition <- rbind(metaData$attrition,
                                getCounts(population, paste("Removed subjects in both cohorts")))
  } else if (removeDuplicateSubjects == "keep first") {
    ParallelLogger::logInfo("For subject that are in both cohorts, keeping only whichever cohort is first in time.")
    if (nrow(population) > 0) {
      population <- population[order(population$personSeqId, population$cohortStartDate), ]
      # Remove ties:
      idx <- fastDuplicated(population, c("personSeqId", "cohortStartDate"))
      idx[1:(length(idx) - 1)] <- idx[1:(length(idx) - 1)] | idx[2:length(idx)]
      if (all(idx)) {
        warning("All cohort entries are ties, with same subject ID and cohort start date")
      }
      population <- population[!idx, ]
      # Keeping first:
      idx <- fastDuplicated(population, "personSeqId")
      population <- population[!idx, ]
    }
    metaData$attrition <- rbind(metaData$attrition,
                                getCounts(population, paste("Restricting duplicate subjects to first cohort")))
  }

  if (washoutPeriod) {
    ParallelLogger::logInfo(paste("Requiring", washoutPeriod, "days of observation prior index date"))
    population <- population[population$daysFromObsStart >= washoutPeriod, ]
    metaData$attrition <- rbind(metaData$attrition, getCounts(population, paste("At least",
                                                                                washoutPeriod,
                                                                                "days of observation prior")))
  }
  if (removeSubjectsWithPriorOutcome) {
    if (missing(outcomeId) || is.null(outcomeId)) {
      ParallelLogger::logInfo("No outcome specified so skipping removing people with prior outcomes")
    } else {
      ParallelLogger::logInfo("Removing subjects with prior outcomes (if any)")
      outcomes <- cohortMethodData$outcomes %>%
        filter(.data$outcomeId == !!outcomeId) %>%
        collect()
      if (isEnd(startAnchor)) {
        outcomes <- merge(outcomes, population[, c("rowId", "daysToCohortEnd")])
        priorOutcomeRowIds <- outcomes$rowId[outcomes$daysToEvent > -priorOutcomeLookback & outcomes$daysToEvent <
                                               outcomes$daysToCohortEnd + riskWindowStart]
      } else {
        priorOutcomeRowIds <- outcomes$rowId[outcomes$daysToEvent > -priorOutcomeLookback & outcomes$daysToEvent <
                                               riskWindowStart]
      }
      population <- population[!(population$rowId %in% priorOutcomeRowIds), ]
      metaData$attrition <- rbind(metaData$attrition,
                                  getCounts(population, paste("No prior outcome")))
    }
  }
  # Create risk windows:
  population$riskStart <- rep(riskWindowStart, nrow(population))
  if (isEnd(startAnchor)) {
    population$riskStart <- population$riskStart + population$daysToCohortEnd
  }
  population$riskEnd <- rep(riskWindowEnd, nrow(population))
  if (isEnd(endAnchor)) {
    population$riskEnd <- population$riskEnd + population$daysToCohortEnd
  }
  idx <- population$riskEnd > population$daysToObsEnd
  population$riskEnd[idx] <- population$daysToObsEnd[idx]

  if (!missing(maxDaysAtRisk) && !is.null(maxDaysAtRisk)) {
    idx <- population$riskEnd > population$riskStart + maxDaysAtRisk
    if (any(idx)) {
      population$riskEnd[idx] <- population$riskStart[idx] + maxDaysAtRisk
    }
  }
  if (censorAtNewRiskWindow) {
    ParallelLogger::logInfo("Censoring time at risk of recurrent subjects at start of new time at risk")
    if (nrow(population) > 1) {
      population$startDate <- population$cohortStartDate + population$riskStart
      population$endDate <- population$cohortStartDate + population$riskEnd
      population <- population[order(population$personSeqId, population$riskStart), ]
      idx <- 1:(nrow(population) - 1)
      idx <- which(population$endDate[idx] >= population$startDate[idx + 1] &
                     population$personSeqId[idx] == population$personSeqId[idx + 1])
      if (length(idx) > 0) {
        population$endDate[idx] <- population$startDate[idx + 1] - 1
        population$riskEnd[idx] <- population$endDate[idx] - population$cohortStartDate[idx]
        idx <- population$riskEnd < population$riskStart
        if (any(idx)) {
          population <- population[!idx, ]
        }
      }
      population$startDate <- NULL
      population$endDate <- NULL
      metaData$attrition <- rbind(metaData$attrition,
                                  getCounts(population, paste("Censoring at start of new time-at-risk")))
    }
  }
  if (minDaysAtRisk != 0) {
    ParallelLogger::logInfo(paste("Removing subjects with less than", minDaysAtRisk, "day(s) at risk (if any)"))
    population <- population[population$riskEnd - population$riskStart >= minDaysAtRisk, ]
    metaData$attrition <- rbind(metaData$attrition, getCounts(population, paste("Have at least",
                                                                                minDaysAtRisk,
                                                                                "days at risk")))
  }
  if (missing(outcomeId) || is.null(outcomeId)) {
    ParallelLogger::logInfo("No outcome specified so not creating outcome and time variables")
  } else {
    # Select outcomes during time at risk
    outcomes <- cohortMethodData$outcomes %>%
      filter(.data$outcomeId == !!outcomeId) %>%
      collect()
    outcomes <- merge(outcomes, population[, c("rowId", "riskStart", "riskEnd")])
    outcomes <- outcomes[outcomes$daysToEvent >= outcomes$riskStart & outcomes$daysToEvent <= outcomes$riskEnd, ]

    # Create outcome count column
    if (nrow(outcomes) == 0) {
      population$outcomeCount <- rep(0, nrow(population))
    } else {
      outcomeCount <- aggregate(outcomeId ~ rowId, data = outcomes, length)
      colnames(outcomeCount)[colnames(outcomeCount) == "outcomeId"] <- "outcomeCount"
      population$outcomeCount <- 0
      population$outcomeCount[match(outcomeCount$rowId, population$rowId)] <- outcomeCount$outcomeCount
    }

    # Create time at risk column
    population$timeAtRisk <- population$riskEnd - population$riskStart + 1

    # Create survival time column
    firstOutcomes <- outcomes[order(outcomes$rowId, outcomes$daysToEvent), ]
    firstOutcomes <- firstOutcomes[!duplicated(firstOutcomes$rowId), ]
    # population <- merge(population, firstOutcomes[, c("rowId", "daysToEvent")], all.x = TRUE)
    population$daysToEvent <- rep(NA, nrow(population))
    population$daysToEvent[match(firstOutcomes$rowId, population$rowId)] <- firstOutcomes$daysToEvent
    population$survivalTime <- population$timeAtRisk
    population$survivalTime[population$outcomeCount != 0] <- population$daysToEvent[population$outcomeCount !=
                                                                                      0] - population$riskStart[population$outcomeCount != 0] + 1
  }
  attr(population, "metaData") <- metaData
  ParallelLogger::logDebug("Study population has ", nrow(population), " rows")
  return(population)
}

#' Get the attrition table for a population
#'
#' @param object   Either an object of type [CohortMethodData], a population object generated by
#'                 functions like [createStudyPopulation()], or an object of type
#'                 `outcomeModel`.
#'
#' @return
#' A `tibble` specifying the number of people and exposures in the population after specific steps
#' of filtering.
#'
#'
#' @export
getAttritionTable <- function(object) {
  if (is(object, "OutcomeModel")) {
    return(object$attrition)
  } else {
    return(attr(object, "metaData")$attrition)
  }
}

getCounts <- function(population, description = "") {
  targetPersons <- length(unique(population$personSeqId[population$treatment == 1]))
  comparatorPersons <- length(unique(population$personSeqId[population$treatment == 0]))
  targetExposures <- length(population$personSeqId[population$treatment == 1])
  comparatorExposures <- length(population$personSeqId[population$treatment == 0])
  counts <- dplyr::tibble(description = description,
                          targetPersons = targetPersons,
                          comparatorPersons = comparatorPersons,
                          targetExposures = targetExposures,
                          comparatorExposures = comparatorExposures)
  return(counts)
}


#' Plot time-to-event
#'
#' @details
#' Creates a plot showing the number of events over time in the target and comparator cohorts, both before and after
#' index date. The plot also distinguishes between events inside and outside the time-at-risk period. This requires
#' the user to (re)specify the time-at-risk using the same arguments as the [createStudyPopulation()] function.
#' Note that it is not possible to specify that people with the outcome prior should be removed, since the plot will
#' show these prior events.
#'
#' @template CohortMethodData
#'
#' @param population                       If specified, this population will be used as the starting
#'                                         point instead of the cohorts in the `cohortMethodData` object.
#' @param outcomeId                        The ID of the outcome. If not specified, no outcome-specific
#'                                         transformations will be performed.
#' @param firstExposureOnly                Should only the first exposure per subject be included?
#' @param removeDuplicateSubjects          Remove subjects that are in both the target and comparator
#'                                         cohort? See details for allowed values.
#' @param restrictToCommonPeriod           Restrict the analysis to the period when both exposures are observed?
#' @param washoutPeriod                    The minimum required continuous observation time prior to
#'                                         index date for a person to be included in the cohort.
#' @param minDaysAtRisk                    The minimum required number of days at risk.
#' @param riskWindowStart                  The start of the risk window (in days) relative to the `startAnchor`.
#' @param addExposureDaysToStart           DEPRECATED: Add the length of exposure the start of the risk window?
#'                                         Use `startAnchor` instead.
#' @param startAnchor                      The anchor point for the start of the risk window. Can be `"cohort start"`
#'                                         or `"cohort end"`.
#' @param riskWindowEnd                    The end of the risk window (in days) relative to the `endAnchor`.
#' @param addExposureDaysToEnd             DEPRECATED: Add the length of exposure the risk window?
#'                                         Use `endAnchor` instead.
#' @param endAnchor                        The anchor point for the end of the risk window. Can be `"cohort start"`
#'                                         or `"cohort end"`.
#' @param censorAtNewRiskWindow            If a subject is in multiple cohorts, should time-at-risk be censored
#'                                         when the new time-at-risk starts to prevent overlap?
#' @param periodLength                     The length in days of each period shown in the plot.
#' @param numberOfPeriods                  Number of periods to show in the plot. The periods are
#'                                         equally divided before and after the index date.
#' @param showFittedLines                  Fit lines to the proportions and show them in the plot?
#' @param targetLabel                      A label to us for the target cohort.
#' @param comparatorLabel                  A label to us for the comparator cohort.
#' @param title                            Optional: the main title for the plot.
#' @param fileName              Name of the file where the plot should be saved, for example
#'                              'plot.png'. See [ggplot2::ggsave()] for supported file formats.
#'
#' @return
#' A ggplot object. Use the [ggplot2::ggsave()] function to save to file in a different
#' format.
#'
#' @export
plotTimeToEvent <- function(cohortMethodData,
                            population = NULL,
                            outcomeId,
                            firstExposureOnly = FALSE,
                            restrictToCommonPeriod = FALSE,
                            washoutPeriod = 0,
                            removeDuplicateSubjects = FALSE,
                            minDaysAtRisk = 1,
                            riskWindowStart = 0,
                            addExposureDaysToStart = NULL,
                            startAnchor = "cohort start",
                            riskWindowEnd = 0,
                            addExposureDaysToEnd = NULL,
                            endAnchor = "cohort end",
                            censorAtNewRiskWindow = FALSE,
                            periodLength = 7,
                            numberOfPeriods = 52,
                            showFittedLines = TRUE,
                            targetLabel = "Target",
                            comparatorLabel = "Comparator",
                            title = NULL,
                            fileName = NULL) {
  if (!missing(addExposureDaysToStart) && !is.null(addExposureDaysToStart)) {
    warning("The addExposureDaysToStart argument is deprecated. Please use the startAnchor argument instead.")
    if (addExposureDaysToStart) {
      startAnchor <- "cohort end"
    } else {
      startAnchor <- "cohort start"
    }
  }
  if (!missing(addExposureDaysToEnd) && !is.null(addExposureDaysToEnd)) {
    warning("The addExposureDaysToEnd argument is deprecated. Please use the endAnchor argument instead.")
    if (addExposureDaysToEnd) {
      endAnchor <- "cohort end"
    } else {
      endAnchor <- "cohort start"
    }
  }
  if (is.null(population)) {
    population <- cohortMethodData$cohorts %>%
      collect()
  }
  population <- createStudyPopulation(cohortMethodData = cohortMethodData,
                                      population = population,
                                      outcomeId = outcomeId,
                                      firstExposureOnly = firstExposureOnly,
                                      restrictToCommonPeriod = restrictToCommonPeriod,
                                      washoutPeriod = washoutPeriod,
                                      removeDuplicateSubjects = removeDuplicateSubjects,
                                      removeSubjectsWithPriorOutcome = FALSE,
                                      minDaysAtRisk = minDaysAtRisk,
                                      riskWindowStart = riskWindowStart,
                                      startAnchor = startAnchor,
                                      riskWindowEnd = riskWindowEnd,
                                      endAnchor = endAnchor,
                                      censorAtNewRiskWindow = censorAtNewRiskWindow)
  outcomes <- cohortMethodData$outcomes %>%
    filter(.data$outcomeId == !!outcomeId) %>%
    collect()
  outcomes <- merge(population[, c("rowId", "treatment", "daysFromObsStart", "daysToObsEnd", "riskStart", "riskEnd")],
                    outcomes[, c("rowId", "daysToEvent")])
  outcomes <- outcomes[-outcomes$daysFromObsStart <= outcomes$daysToEvent & outcomes$daysToObsEnd >= outcomes$daysToEvent, ]
  outcomes$exposed <- 0
  outcomes$exposed[outcomes$daysToEvent >= outcomes$riskStart & outcomes$daysToEvent <= outcomes$riskEnd] <- 1

  idxExposed <- outcomes$exposed == 1
  idxTarget <- outcomes$treatment == 1
  createPeriod <- function(number) {
    start <- number*periodLength
    end <- number*periodLength + periodLength
    idxInPeriod <- outcomes$daysToEvent >= start & outcomes$daysToEvent < end
    idxPopInPeriod <- -population$daysFromObsStart <= start & population$daysToObsEnd >= end
    dplyr::tibble(number = number,
                  start = start,
                  end = end,
                  eventsExposed = 0,
                  eventsUnexposed = 0,
                  observed = 0,
                  eventsExposedTarget = sum(idxInPeriod & idxExposed & idxTarget),
                  eventsExposedComparator = sum(idxInPeriod & idxExposed & !idxTarget),
                  eventsUnexposedTarget = sum(idxInPeriod & !idxExposed & idxTarget),
                  eventsUnexposedComparator = sum(idxInPeriod & !idxExposed & !idxTarget),
                  observedTarget = sum(idxPopInPeriod & population$treatment),
                  observedComparator = sum(idxPopInPeriod & !population$treatment))
  }
  periods <- lapply(-floor(numberOfPeriods/2):ceiling(numberOfPeriods/2), createPeriod)
  periods <- do.call("rbind", periods)
  periods$rateExposedTarget <- periods$eventsExposedTarget / periods$observedTarget
  periods$rateUnexposedTarget <- periods$eventsUnexposedTarget / periods$observedTarget
  periods$rateExposedComparator <- periods$eventsExposedComparator / periods$observedComparator
  periods$rateUnexposedComparator <- periods$eventsUnexposedComparator / periods$observedComparator
  periods$rateTarget <- (periods$eventsExposedTarget + periods$eventsUnexposedTarget) / periods$observedTarget
  periods$rateComparator <- (periods$eventsExposedComparator + periods$eventsUnexposedComparator) / periods$observedComparator
  vizData <- rbind(dplyr::tibble(start = periods$start,
                                 end = periods$end,
                                 rate = periods$rateExposedTarget,
                                 status = "Exposed events",
                                 type = targetLabel),
                   dplyr::tibble(start = periods$start,
                                 end = periods$end,
                                 rate = periods$rateUnexposedTarget,
                                 status = "Unexposed events",
                                 type = targetLabel),
                   dplyr::tibble(start = periods$start,
                                 end = periods$end,
                                 rate = periods$rateExposedComparator,
                                 status = "Exposed events",
                                 type = comparatorLabel),
                   dplyr::tibble(start = periods$start,
                                 end = periods$end,
                                 rate = periods$rateUnexposedComparator,
                                 status = "Unexposed events",
                                 type = comparatorLabel))
  vizData$type <- factor(vizData$type, levels = c(targetLabel, comparatorLabel))

  plot <- ggplot2::ggplot(vizData, ggplot2::aes(x = .data$start + periodLength / 2,
                                                y = .data$rate * 1000,
                                                fill = .data$status)) +
    ggplot2::geom_col(width = periodLength, alpha = 0.8) +
    ggplot2::geom_vline(xintercept = 0, colour = "#000000", lty = 1, size = 1) +
    ggplot2::scale_fill_manual(values = c(rgb(0.8, 0, 0, alpha = 0.5),
                                          rgb(0, 0, 0.8, alpha = 0.5))) +
    ggplot2::scale_x_continuous("Days since exposure start") +
    ggplot2::scale_y_continuous("Proportion (per 1,000 persons)") +
    ggplot2::facet_grid(type~., scales = "free_y") +
    ggplot2::theme(panel.grid.minor = ggplot2::element_blank(),
                   panel.background = ggplot2::element_rect(fill = "#FAFAFA", colour = NA),
                   panel.grid.major = ggplot2::element_line(colour = "#AAAAAA"),
                   axis.ticks = ggplot2::element_blank(),
                   strip.background = ggplot2::element_blank(),
                   legend.title = ggplot2::element_blank(),
                   legend.position = "top")

  if (showFittedLines) {
    preTarget <- periods[periods$start < 0, ]
    preTarget <- cbind(preTarget, predict(lm(rateTarget ~ poly(number, 3), data = preTarget), interval = "confidence"))
    preTarget$type <- targetLabel
    preTarget$period <- "Pre"
    postTarget <- periods[periods$start >= 0, ]
    postTarget <- cbind(postTarget, predict(lm(rateTarget ~ poly(number, 3), data = postTarget), interval = "confidence"))
    postTarget$type <- targetLabel
    postTarget$period <- "Post"
    preComparator <- periods[periods$start < 0, ]
    preComparator <- cbind(preComparator, predict(lm(rateComparator ~ poly(number, 3), data = preComparator), interval = "confidence"))
    preComparator$type <- comparatorLabel
    preComparator$period <- "Pre"
    postComparator <- periods[periods$start >= 0, ]
    postComparator <- cbind(postComparator, predict(lm(rateComparator ~ poly(number, 3), data = postComparator), interval = "confidence"))
    postComparator$type <- comparatorLabel
    postComparator$period <- "Post"
    curve <- rbind(preTarget, postTarget, preComparator, postComparator)
    curve$rate <- 0
    curve$status <- "Exposed events"
    curve$type <- factor(curve$type, levels = c(targetLabel, comparatorLabel))

    plot <- plot + ggplot2::geom_ribbon(ggplot2::aes(x = start + periodLength/2,
                                                     ymin = .data$lwr * 1000,
                                                     ymax = .data$upr * 1000,
                                                     group = .data$period),
                                        fill = rgb(0,0,0),
                                        alpha = 0.3,
                                        data = curve) +
      ggplot2::geom_line(ggplot2::aes(x = start + periodLength/2,
                                      y = .data$fit * 1000,
                                      group = .data$period),
                         size = 1.5,
                         alpha = 0.8,
                         data = curve)
  }

  if (!is.null(title)) {
    plot <- plot + ggplot2::ggtitle(title)
  }
  if (!is.null(fileName))
    ggplot2::ggsave(fileName, plot, width = 7, height = 5, dpi = 400)
  return(plot)
}
escott12/CohortMethod documentation built on Dec. 20, 2021, 6:37 a.m.