extras/InvestigateTestForCorrelatedEase.R

# Some code to explore how we can test if two correlated EASE are different

# Null hypotheses: both negative control estimate sets are generated by the same process from the same data
# Alternative hypothesis: The two sets are generated by different methods, but for the same controls, so likely to be 
# highly correlated

library(EmpiricalCalibration)
library(ggplot2)

method1Ncs <- simulateControls(n = 50, mean = 0, sd = 0, seLogRr = runif(50, 0.5, 1))
# second method slightly more biased:
method2Ncs <- method1Ncs
method2Ncs$logRr <- method2Ncs$logRr + rnorm(nrow(method2Ncs), mean = 0.05, sd = 0.1)



null1 <- fitMcmcNull(method1Ncs$logRr, method1Ncs$seLogRr)
null2 <- fitMcmcNull(method2Ncs$logRr, method2Ncs$seLogRr)

plotCalibrationEffect(method1Ncs$logRr, method1Ncs$seLogRr, null = null1, showCis = TRUE)

ease1 <- computeExpectedAbsoluteSystematicError(null1)
ease1
ease2 <- computeExpectedAbsoluteSystematicError(null2)
ease2

deltaLogRr <- method2Ncs$logRr - method1Ncs$logRr
mean(deltaLogRr)
sd(deltaLogRr)


# Generic EASE null distribution (null hypothesis: EASE = 0) -----------------------------------------------

method1Ncs <- simulateControls(n = 50, mean = 0, sd = 0, seLogRr = runif(50, 0.5, 1))
null <- fitNull(method1Ncs$logRr, method1Ncs$seLogRr)
ease1 <- computeExpectedAbsoluteSystematicError(null)

# seLogRr <- method1Ncs$seLogRr
sampleEaseUnderNull <- function(i, seLogRr) {
  logRr <- rnorm(length(seLogRr), mean = 0, sd = seLogRr)
  null <- fitNull(logRr, seLogRr)
  return(computeExpectedAbsoluteSystematicError(null))  
}
nullSamples <- sapply(1:10000, sampleEaseUnderNull, seLogRr = method1Ncs$seLogRr)
# hist(nullSamples)

p <- mean(nullSamples > ease1)
p


# Null distribution for paired EASE (null hypothesis: set 1 and 2 derive from same process) ------------------
# If set 1 and 2 are interchangeable, it shouldn't matter whether we pick NC 1 from set 1 or set 2, etc.

method1Ncs <- simulateControls(n = 50, mean = 0, sd = 0, seLogRr = runif(50, 0.5, 1))
# second method slightly more biased:
method2Ncs <- method1Ncs
method2Ncs$logRr <- method2Ncs$logRr + rnorm(nrow(method2Ncs), mean = 0.01, sd = 0.01)

deltaLogRr <- method2Ncs$logRr - method1Ncs$logRr
mean(deltaLogRr)
sd(deltaLogRr)

ggplot(data.frame(method1 = method1Ncs$logRr, method2 = method2Ncs$logRr), aes(x = method1, y = method2)) +
  geom_abline(slope = 1) +
  geom_point()


null1 <- fitNull(method1Ncs$logRr, method1Ncs$seLogRr)
ease1 <- computeExpectedAbsoluteSystematicError(null1)
null2 <- fitNull(method2Ncs$logRr, method2Ncs$seLogRr)
ease2 <- computeExpectedAbsoluteSystematicError(null2)

# ncs1 <- method1Ncs
# ncs2 <- method2Ncs 
sampleEaseFromTwoSets <- function(i, ncs1, ncs2) {
  idx <- runif(nrow(ncs1)) < 0.5
  null <- fitNull(ifelse(idx, ncs1$logRr, ncs2$logRr), 
                  ifelse(idx, ncs1$seLogRr, ncs2$seLogRr))
  return(computeExpectedAbsoluteSystematicError(null))  
}
nullSamples <- sapply(1:1000, sampleEaseFromTwoSets, ncs1 = method1Ncs, ncs2 = method2Ncs)
ggplot(data.frame(x = nullSamples), aes(x = x)) +
  geom_density(color = rgb(0.2, 0.2, 0.8), fill = rgb(0.2, 0.2, 0.8), alpha = 0.6) +
  geom_vline(xintercept = c(ease1, ease2))

# hist(nullSamples)
if (ease1 > ease2) {
  p <-  mean(nullSamples > ease1 | nullSamples < ease2)
} else {
  p <-  mean(nullSamples < ease1 | nullSamples > ease2)
}
p
# Note: this is dumb


# Null distribution for paired EASE (null hypothesis: set 1 and 2 derive from same process) ------------------
# Boostrapping


method1Ncs <- simulateControls(n = 50, mean = 0, sd = 0, seLogRr = runif(50, 0.1, 1))
# second method slightly more biased:
method2Ncs <- method1Ncs
method2Ncs$logRr <- method2Ncs$logRr + rnorm(nrow(method2Ncs), mean = 0, sd = 0.01)

# deltaLogRr <- method2Ncs$logRr - method1Ncs$logRr
# mean(deltaLogRr)
# sd(deltaLogRr)

# ggplot(data.frame(method1 = method1Ncs$logRr, method2 = method2Ncs$logRr), aes(x = method1, y = method2)) +
#   geom_abline(slope = 1) +
#   geom_point()


null1 <- fitNull(method1Ncs$logRr, method1Ncs$seLogRr)
ease1 <- computeExpectedAbsoluteSystematicError(null1)
ease1
null2 <- fitNull(method2Ncs$logRr, method2Ncs$seLogRr)
ease2 <- computeExpectedAbsoluteSystematicError(null2)
ease2
delta <- ease1 - ease2

# ncs1 <- method1Ncs
# ncs2 <- method2Ncs 
sampleDeltaEase <- function(i, ncs1, ncs2) {
  idx <- sample.int(nrow(ncs1), nrow(ncs1), replace = TRUE)
  null1 <- fitNull(method1Ncs$logRr[idx], method1Ncs$seLogRr[idx])
  ease1 <- computeExpectedAbsoluteSystematicError(null1)
  null2 <- fitNull(method2Ncs$logRr[idx], method2Ncs$seLogRr[idx])
  ease2 <- computeExpectedAbsoluteSystematicError(null2)
  return(ease1 - ease2)
}
nullSamples <- sapply(1:1000, sampleDeltaEase, ncs1 = method1Ncs, ncs2 = method2Ncs)
ggplot(data.frame(x = nullSamples), aes(x = x)) +
  geom_vline(xintercept = 0, size = 1) +
  geom_density(color = rgb(0.2, 0.2, 0.8), fill = rgb(0.2, 0.2, 0.8), alpha = 0.6) +
  geom_vline(xintercept = delta, linetype = "dashed")
# 
# hist(nullSamples)
if (delta > 0) {
  p <-  mean(nullSamples < 0) 
} else {
  p <-  mean(nullSamples > 0) 
}
p



sampleP <- function(i) {
  ncs1 <- simulateControls(n = 50, mean = 0.01, sd = 0.01, seLogRr = runif(50, 0.1, 1))
  
  # Simulate second method to be more biased:
  ncs2 <- ncs1
  ncs2$logRr <- ncs2$logRr + rnorm(nrow(ncs2), mean = 0, sd = 0.01)
  
  delta <- compareEase(logRr1 = ncs1$logRr, 
                                                  seLogRr1 = ncs1$seLogRr, 
                                                  logRr2 = ncs2$logRr, 
                                                  seLogRr2 = ncs2$seLogRr)
  return(delta$p)
}


cluster <- ParallelLogger::makeCluster(20)
ParallelLogger::clusterRequire(cluster, "EmpiricalCalibration")
p <- ParallelLogger::clusterApply(cluster, 1:500, sampleP)
p <- do.call(c, p)
mean(p < 0.05)
ParallelLogger::stopCluster(cluster)




boostrapEase <- function(i, ncs) {
  idx <- sample.int(nrow(ncs), nrow(ncs), replace = TRUE)
  null <- fitNull(ncs$logRr[idx], ncs$seLogRr[idx])
  return(computeExpectedAbsoluteSystematicError(null))
}
nullSamples <- sapply(1:1000, boostrapEase, ncs = method1Ncs)
quantile(nullSamples, c(0.5, 0.025, 0.975))

null <- fitMcmcNull(method1Ncs$logRr, method1Ncs$seLogRr)
computeExpectedAbsoluteSystematicError(null)



# Simulate results of first method:
ncs1 <- simulateControls(n = 50)

# Simulate second method to be more biased:
ncs2 <- ncs1
ncs2$logRr <- ncs2$logRr + rnorm(nrow(ncs2), mean = 0, sd = 0.01)

delta <- compareExpectedAbsoluteSystematicError(logRr1 = ncs1$logRr, 
                                                seLogRr1 = ncs1$seLogRr, 
                                                logRr2 = ncs2$logRr, 
                                                seLogRr2 = ncs2$seLogRr)
delta
attr(delta, "ease1") 
attr(delta, "ease2") 
OHDSI/EmpiricalCalibration documentation built on Jan. 31, 2024, 7:29 p.m.