tests/testthat/test-eventDepObservation.R

library(testthat)
library(SelfControlledCaseSeries)
# Simulation:
set.seed(123)
n <- 2e+05
observationDays <- 100
rr <- 2
minBaselineRate <- 5e-05
maxBaselineRate <- 1e-04

data <- tibble(personId = as.character(1:n))
data$observationStartDate <- 1
data$observationEndDate <- observationDays
data$exposureStartDate <- round(runif(n, 1, observationDays))
data$exposureEndDate <- round(runif(n, data$exposureStartDate, observationDays))
data$daysExposed <- data$exposureEndDate - data$exposureStartDate + 1
data$daysUnexposed <- observationDays - data$daysExposed
data$baselineRisk <- runif(n, minBaselineRate, maxBaselineRate)
data$eventsUnexposed <- rpois(n, data$baselineRisk * data$daysUnexposed)
data$eventsExposed <- rpois(n, rr * data$baselineRisk * data$daysExposed)
# data$ageInDays <- 1 # Age on the first day. Can be zero
data$ageInDays <- round(runif(n, 1, 100))
data$eventDate <- observationDays + 1

peopleUnexpEvent <- data$eventsUnexposed > 0
# Date of event is random day when unexposed:
data$eventDate[peopleUnexpEvent] <- round(runif(
  sum(peopleUnexpEvent),
  1,
  data$daysUnexposed[peopleUnexpEvent]
))
# If day greater than exposure start day, at exposure time so it falls in period post exposure:
data$eventDate[peopleUnexpEvent & data$eventDate > data$exposureStartDate] <- data$eventDate[peopleUnexpEvent &
  data$eventDate > data$exposureStartDate] + data$daysExposed[peopleUnexpEvent & data$eventDate > data$exposureStartDate]

# For people with event during exposure, and no event in the period prior exposure, randomly pick an
# event date during exposure:
peopleExpEvent <- data$eventsExposed > 0 & data$eventDate > data$exposureStartDate
data$eventDate[peopleExpEvent] <- round(runif(
  sum(peopleExpEvent),
  data$exposureStartDate[peopleExpEvent],
  data$exposureEndDate[peopleExpEvent]
))

# Remove non-cases:
data <- data[data$eventDate <= observationDays, ]

### Event-dependent observation period ###
preEventCensorRate <- 0.001
postEventCensorRate <- 0.05

# Pre event censoring
data$censorDate <- round(rexp(nrow(data), preEventCensorRate))

# Post event censoring
notCensored <- data$censorDate > data$eventDate
data$censorDate[notCensored] <- data$eventDate[notCensored] +
  round(rexp(sum(notCensored), postEventCensorRate))

# Remove patients where outcome or exposure falls after censor date:
data <- data[data$exposureStartDate <= data$censorDate, ]
data <- data[data$eventDate <= data$censorDate, ]

# Truncate exposure end date at censor date:
data$exposureEndDate[data$exposureEndDate > data$censorDate] <- data$censorDate[data$exposureEndDate >
  data$censorDate]

data$censorDate[data$censorDate > observationDays] <- observationDays
nrow(data)

### Use SCCS library. Note differences in convention: - end dates are NOT included to compute length of
### a period, so +1 for end dates - events are considered part of a period if period start data < event
### date <= period end date, so +1 for even dates


# Note: SCCS package currently not in CRAN.  Also note: I had to modify SCCS, remove adding 0.5 days
# when event date is end date
# library(SCCS)
# x <- eventdepenobs(formula = event ~ exposure + strata(indivL) + offset(logw),
#                    adrug = data$ageInDays + data$exposureStartDate - data$observationStartDate,
#                    aedrug = data$ageInDays + data$exposureEndDate - data$observationStartDate + 1,
#                    data = tibble(Indiv = data$personId,
#                                      aevent = data$ageInDays + data$eventDate - data$observationStartDate + 1,
#                                      astart = data$ageInDays,
#                                      aend = data$ageInDays + data$censorDate - data$observationStartDate + 1,
#                                      aestudy = data$ageInDays + observationDays))


x <- list(summary = list())
x$summary$coefficients <- c(0.762933)
x$modelfit <- matrix(c(-3122.776, -3122.776, -3122.776, -3122.776), nrow = 2)

test_that("Produces same results as SCCS package when using event-dependent observation periods", {
  cases <- tibble(
    observationPeriodId = as.numeric(data$personId),
    caseId = as.numeric(data$personId),
    personId = data$personId,
    startDay = 0,
    endDay = data$censorDate - data$observationStartDate,
    ageAtObsStart = data$ageInDays,
    observationPeriodStartDate = as.Date("2000-5-1")
  )

  cases$noninformativeEndCensor <- cases$endDay == max(cases$endDay)
  heiEras <- tibble(
    eraType = "rx",
    caseId = as.numeric(data$personId),
    eraId = 1,
    eraValue = 1,
    eraStartDay = data$exposureStartDate - data$observationStartDate,
    eraEndDay = data$exposureEndDate - data$observationStartDate
  )
  hoiEras <- tibble(
    eraType = "hoi",
    caseId = as.numeric(data$personId),
    eraId = 2,
    eraValue = 1,
    eraStartDay = data$eventDate - data$observationStartDate,
    eraEndDay = data$eventDate - data$observationStartDate
  )
  eras <- rbind(heiEras, hoiEras)
  eras <- eras[order(eras$caseId), ]

  eraRef <- eras %>%
    distinct(.data$eraId, .data$eraType) %>%
    mutate(eraName = "")

  sccsData <- Andromeda::andromeda(
    cases = cases,
    eras = eras,
    eraRef = eraRef
  )
  attr(sccsData, "metaData") <- list(
    outcomeIds = 2,
    attrition = tibble(outcomeId = 2)
  )
  class(sccsData) <- "SccsData"
  attr(class(sccsData), "package") <- "SelfControlledCaseSeries"

  studyPop <- createStudyPopulation(sccsData = sccsData)

  sccsIntervalData <- createSccsIntervalData(
    studyPopulation = studyPop,
    sccsData = sccsData,
    eraCovariateSettings = createEraCovariateSettings(
      includeEraIds = 1,
      start = 0,
      end = 0,
      endAnchor = "era end"
    ),
    eventDependentObservation = TRUE
  )

  expect_equal(attr(sccsIntervalData, "metaData")$censorModel$aic, min(x$modelfit[2, ]), tolerance = 1e-04)

  fit <- fitSccsModel(sccsIntervalData)

  expect_equal(x$summary$coefficients[1], as.vector(coef(fit)), tolerance = 1e-04)
})

# exp(x$summary$coefficients[1])
# exp(coef(fit))
#
# myData <- merge(ff::as.ram(sccsIntervalData$outcomes), ff::as.ram(sccsIntervalData$covariates), all.x = TRUE)
# myData$covariateValue[is.na(myData$covariateValue)] <- 0
# myData <- tibble(stratumId = myData$stratumId, exposure = myData$covariateValue,
#                      w = myData$time, event = myData$y)
# clogit(event ~ exposure + strata(stratumId) + offset(log(w)), data = myData)
#
# chopdat <- readRDS("s:/temp/chopdat.rds")
# chopdat$w <- exp(chopdat$logw)*365.25
# chopdat$stratumId <- as.numeric(as.character(chopdat$Indiv))
# cd1 <- aggregate(w ~ stratumId + exposure, data = chopdat, sum)
# cd2 <- aggregate(event ~ stratumId + exposure, data = chopdat, sum)
# cd <- merge(cd1,cd2)
# cd <- cd[order(cd$stratumId),]
# rownames(cd) <- NULL
# clogit(event ~ exposure + strata(stratumId) + offset(log(w)), data = cd)
#
# head(cd)
# head(myData)
# all.equal(cd,myData)
# which(myData$w > 10000)
# myData[myData$w > 100000,]
# (cd$w[1] - myData$w[1])
# sccsData = tibble(Indiv = as.factor(data$personId),
#                       aevent = data$ageInDays + data$eventDate - data$observationStartDate + 1,
#                       astart = data$ageInDays,
#                       aend = data$ageInDays + data$censorDate - data$observationStartDate + 1,
#                       aestudy = data$ageInDays + observationDays)
# sccsData$present <- ifelse(sccsData$aend>=sccsData$aestudy, 1, 0)
# chopdat <- formatdata(data$ageInDays + data$exposureStartDate - data$observationStartDate,
#                       data$ageInDays + data$exposureEndDate - data$observationStartDate + 1,
#                       NULL,
#                       0,
#                       0,
#                       TRUE,
#                       data = sccsData) # Expanded data based on the age and exposure groups
# exposedCases1 <- as.numeric(levels(sccsData$Indiv)[chopdat$indiv[chopdat$event == 1 & chopdat$exposure == 1]])
# exposedCases2 <- myData$stratumId[myData$y == 1 & myData$covariateValue == 1]
# all.equal(exposedCases1, exposedCases2)
#
#
#
# sccsData = tibble(Indiv = c(1,2,3),
#                       aevent = c(2,3,4),
#                       astart = c(1,1,1),
#                       aend = c(10,10,10),
#                       aestudy = c(10,10,10))
# chopdat <- formatdata(adrug = c(2,2,2),
#                       aedrug = c(4,4,4),
#                       NULL,
#                       0,
#                       0,
#                       TRUE,
#                       data = sccsData)
# chopdat
#
# data[data$personId == 99141,]
# chopdat[chopdat$indivL == which(levels(sccsData$Indiv) == 99141),]
#
# sccsIntervalData <- createSccsIntervalData(sccsData = sccsData,
#                                  exposureId = 1,
#                                  exposureOfInterestSettings = createCovariateSettings(start = 0,
#                                                                                       end = 0,
#                                                                                       addExposedDaysToEnd = TRUE),
#                                  naivePeriod = 0,
#                                  firstOutcomeOnly = TRUE,
#                                  includeAgeEffect = FALSE,
#                                  includePreExposureOfInterest = FALSE,
#                                  eventDependentObservation = FALSE)
#
# fit <- fitSccsModel(sccsIntervalData, exposureId = 1, prior = createPrior("none"))
# exp(coef(fit))
# # })
OHDSI/SelfControlledCaseSeries documentation built on Jan. 31, 2024, 7:30 p.m.