# Some code to explore how we can test if two correlated EASE are different
# Null hypotheses: both negative control estimate sets are generated by the same process from the same data
# Alternative hypothesis: The two sets are generated by different methods, but for the same controls, so likely to be
# highly correlated
library(EmpiricalCalibration)
library(ggplot2)
method1Ncs <- simulateControls(n = 50, mean = 0, sd = 0, seLogRr = runif(50, 0.5, 1))
# second method slightly more biased:
method2Ncs <- method1Ncs
method2Ncs$logRr <- method2Ncs$logRr + rnorm(nrow(method2Ncs), mean = 0.05, sd = 0.1)
null1 <- fitMcmcNull(method1Ncs$logRr, method1Ncs$seLogRr)
null2 <- fitMcmcNull(method2Ncs$logRr, method2Ncs$seLogRr)
plotCalibrationEffect(method1Ncs$logRr, method1Ncs$seLogRr, null = null1, showCis = TRUE)
ease1 <- computeExpectedAbsoluteSystematicError(null1)
ease1
ease2 <- computeExpectedAbsoluteSystematicError(null2)
ease2
deltaLogRr <- method2Ncs$logRr - method1Ncs$logRr
mean(deltaLogRr)
sd(deltaLogRr)
# Generic EASE null distribution (null hypothesis: EASE = 0) -----------------------------------------------
method1Ncs <- simulateControls(n = 50, mean = 0, sd = 0, seLogRr = runif(50, 0.5, 1))
null <- fitNull(method1Ncs$logRr, method1Ncs$seLogRr)
ease1 <- computeExpectedAbsoluteSystematicError(null)
# seLogRr <- method1Ncs$seLogRr
sampleEaseUnderNull <- function(i, seLogRr) {
logRr <- rnorm(length(seLogRr), mean = 0, sd = seLogRr)
null <- fitNull(logRr, seLogRr)
return(computeExpectedAbsoluteSystematicError(null))
}
nullSamples <- sapply(1:10000, sampleEaseUnderNull, seLogRr = method1Ncs$seLogRr)
# hist(nullSamples)
p <- mean(nullSamples > ease1)
p
# Null distribution for paired EASE (null hypothesis: set 1 and 2 derive from same process) ------------------
# If set 1 and 2 are interchangeable, it shouldn't matter whether we pick NC 1 from set 1 or set 2, etc.
method1Ncs <- simulateControls(n = 50, mean = 0, sd = 0, seLogRr = runif(50, 0.5, 1))
# second method slightly more biased:
method2Ncs <- method1Ncs
method2Ncs$logRr <- method2Ncs$logRr + rnorm(nrow(method2Ncs), mean = 0.01, sd = 0.01)
deltaLogRr <- method2Ncs$logRr - method1Ncs$logRr
mean(deltaLogRr)
sd(deltaLogRr)
ggplot(data.frame(method1 = method1Ncs$logRr, method2 = method2Ncs$logRr), aes(x = method1, y = method2)) +
geom_abline(slope = 1) +
geom_point()
null1 <- fitNull(method1Ncs$logRr, method1Ncs$seLogRr)
ease1 <- computeExpectedAbsoluteSystematicError(null1)
null2 <- fitNull(method2Ncs$logRr, method2Ncs$seLogRr)
ease2 <- computeExpectedAbsoluteSystematicError(null2)
# ncs1 <- method1Ncs
# ncs2 <- method2Ncs
sampleEaseFromTwoSets <- function(i, ncs1, ncs2) {
idx <- runif(nrow(ncs1)) < 0.5
null <- fitNull(ifelse(idx, ncs1$logRr, ncs2$logRr),
ifelse(idx, ncs1$seLogRr, ncs2$seLogRr))
return(computeExpectedAbsoluteSystematicError(null))
}
nullSamples <- sapply(1:1000, sampleEaseFromTwoSets, ncs1 = method1Ncs, ncs2 = method2Ncs)
ggplot(data.frame(x = nullSamples), aes(x = x)) +
geom_density(color = rgb(0.2, 0.2, 0.8), fill = rgb(0.2, 0.2, 0.8), alpha = 0.6) +
geom_vline(xintercept = c(ease1, ease2))
# hist(nullSamples)
if (ease1 > ease2) {
p <- mean(nullSamples > ease1 | nullSamples < ease2)
} else {
p <- mean(nullSamples < ease1 | nullSamples > ease2)
}
p
# Note: this is dumb
# Null distribution for paired EASE (null hypothesis: set 1 and 2 derive from same process) ------------------
# Boostrapping
method1Ncs <- simulateControls(n = 50, mean = 0, sd = 0, seLogRr = runif(50, 0.1, 1))
# second method slightly more biased:
method2Ncs <- method1Ncs
method2Ncs$logRr <- method2Ncs$logRr + rnorm(nrow(method2Ncs), mean = 0, sd = 0.01)
# deltaLogRr <- method2Ncs$logRr - method1Ncs$logRr
# mean(deltaLogRr)
# sd(deltaLogRr)
# ggplot(data.frame(method1 = method1Ncs$logRr, method2 = method2Ncs$logRr), aes(x = method1, y = method2)) +
# geom_abline(slope = 1) +
# geom_point()
null1 <- fitNull(method1Ncs$logRr, method1Ncs$seLogRr)
ease1 <- computeExpectedAbsoluteSystematicError(null1)
ease1
null2 <- fitNull(method2Ncs$logRr, method2Ncs$seLogRr)
ease2 <- computeExpectedAbsoluteSystematicError(null2)
ease2
delta <- ease1 - ease2
# ncs1 <- method1Ncs
# ncs2 <- method2Ncs
sampleDeltaEase <- function(i, ncs1, ncs2) {
idx <- sample.int(nrow(ncs1), nrow(ncs1), replace = TRUE)
null1 <- fitNull(method1Ncs$logRr[idx], method1Ncs$seLogRr[idx])
ease1 <- computeExpectedAbsoluteSystematicError(null1)
null2 <- fitNull(method2Ncs$logRr[idx], method2Ncs$seLogRr[idx])
ease2 <- computeExpectedAbsoluteSystematicError(null2)
return(ease1 - ease2)
}
nullSamples <- sapply(1:1000, sampleDeltaEase, ncs1 = method1Ncs, ncs2 = method2Ncs)
ggplot(data.frame(x = nullSamples), aes(x = x)) +
geom_vline(xintercept = 0, size = 1) +
geom_density(color = rgb(0.2, 0.2, 0.8), fill = rgb(0.2, 0.2, 0.8), alpha = 0.6) +
geom_vline(xintercept = delta, linetype = "dashed")
#
# hist(nullSamples)
if (delta > 0) {
p <- mean(nullSamples < 0)
} else {
p <- mean(nullSamples > 0)
}
p
sampleP <- function(i) {
ncs1 <- simulateControls(n = 50, mean = 0.01, sd = 0.01, seLogRr = runif(50, 0.1, 1))
# Simulate second method to be more biased:
ncs2 <- ncs1
ncs2$logRr <- ncs2$logRr + rnorm(nrow(ncs2), mean = 0, sd = 0.01)
delta <- compareEase(logRr1 = ncs1$logRr,
seLogRr1 = ncs1$seLogRr,
logRr2 = ncs2$logRr,
seLogRr2 = ncs2$seLogRr)
return(delta$p)
}
cluster <- ParallelLogger::makeCluster(20)
ParallelLogger::clusterRequire(cluster, "EmpiricalCalibration")
p <- ParallelLogger::clusterApply(cluster, 1:500, sampleP)
p <- do.call(c, p)
mean(p < 0.05)
ParallelLogger::stopCluster(cluster)
boostrapEase <- function(i, ncs) {
idx <- sample.int(nrow(ncs), nrow(ncs), replace = TRUE)
null <- fitNull(ncs$logRr[idx], ncs$seLogRr[idx])
return(computeExpectedAbsoluteSystematicError(null))
}
nullSamples <- sapply(1:1000, boostrapEase, ncs = method1Ncs)
quantile(nullSamples, c(0.5, 0.025, 0.975))
null <- fitMcmcNull(method1Ncs$logRr, method1Ncs$seLogRr)
computeExpectedAbsoluteSystematicError(null)
# Simulate results of first method:
ncs1 <- simulateControls(n = 50)
# Simulate second method to be more biased:
ncs2 <- ncs1
ncs2$logRr <- ncs2$logRr + rnorm(nrow(ncs2), mean = 0, sd = 0.01)
delta <- compareExpectedAbsoluteSystematicError(logRr1 = ncs1$logRr,
seLogRr1 = ncs1$seLogRr,
logRr2 = ncs2$logRr,
seLogRr2 = ncs2$seLogRr)
delta
attr(delta, "ease1")
attr(delta, "ease2")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.