fastCI: Fast Concordance Index

source("~/Code/fastCI/computeNullCI.R")


naive_approximate_normal_from_density <- function(x){
  
  max.x <- which.max(x)
  mean <- mean(which(x == x[max.x]))
  
  var = abs(which.min(abs(x - x[mean]/2)) - mean)/(sqrt(2*log(2)))
  
  return(c("mean" = mean - 1, "var"=var))
  
}

computeExpectedApproximation <- function(N){
  
  mean <- choose(N, 2)/2 
  
  var <- sqrt((2*N^3 + 3*N^2 - 5*N)/72)
  return(c("mean" = mean, "var"=var))
}


plotNormalApproxN <- function(N){
  
  plot.new()
  
  test <- nullCIDist(N,1,cumulative = 0, force_sym = 1)
  
  tt <- naive_approximate_normal_from_density(test)
  
  tt2 <- computeExpectedApproximation(N)
  
  plot(0:(length(test)-1), test, type='l', col='red')
  
  lines(0:(length(test)), dnorm(0:length(test), tt[1], tt[2]), col='blue')
  
  
  lines(0:(length(test)), dnorm(0:length(test), tt2[1], tt2[2]), col='green')
  
  
  legend("topleft", legend = c("True", "Fitted Approximation", "Expected Approximation"),fill = c("red", "blue", "green"))
}

library(entropy)

n.min <- 5
n.max <- 150

ns <- seq(n.min, n.max)

KL.fitted <- numeric(n.max - n.min + 1)
KL.analytic <- numeric(n.max - n.min + 1)

for(i in seq_len(n.max - n.min + 1)){
  
  exact.null <- nullCIDist(ns[i],1,cumulative = 0, force_sym = 1)
  
  pars <- naive_approximate_normal_from_density(exact.null)
  pars2 <- computeExpectedApproximation(ns[i])
  
  normal.fitted <- dnorm(0:(length(exact.null)-1), pars[1], pars[2])
  
  normal.analytic <- dnorm(0:(length(exact.null)-1), pars2[1], pars2[2])
  
  KL.fitted[i] <- KL.empirical(exact.null, normal.fitted)
  KL.analytic[i] <- KL.empirical(exact.null, normal.analytic)
  
}

plot(x = ns, y = KL.fitted, col="blue")
points(x = ns, y = KL.analytic, col = "green")
legend("topright", legend = c("Fitted Approximation", "Expected Approximation"),fill = c("blue", "green"))


plot(x = ns, y = -log10(KL.analytic), col = "green")
points(x = ns, y = -log10(KL.fitted), col="blue")
legend("topleft", legend = c("Fitted Approximation", "Expected Approximation"),fill = c("blue", "green"))


## The analytic formula clearly fits much better
## To check for whether this is a conservative approximation at alpha = 0.05, we will look at differencs in the tails as a function of N


tail.diffs <- numeric(n.max - n.min + 1)
cvs <- numeric(n.max - n.min + 1)

for(i in seq_len(n.max - n.min + 1)){
  
  exact.null <- nullCIDist(ns[i],1,cumulative = 1, force_sym = 1)
  
  critical.val <- which.min(abs(exact.null - 0.025)) # alpha = 0.05 two sided
  cvs[i] <- critical.val
  
  pars <- computeExpectedApproximation(ns[i])
  
  
  # exact.null is 0 based, so we subtract 1 to get actual number of inversions
  tail.diffs[i] <- pnorm(critical.val - 1, mean = pars[1], sd = pars[2]) - exact.null[critical.val]
  
}

plot(ns, tail.diffs)

## Above method underestimates the probability of the tails in this regime consistently, so we will add an inversion into our normal estimate (or alternatively subtract an inversion if using other tail). 

## Testing above for extra inversion added:

tail.diffs <- numeric(n.max - n.min + 1)
cvs <- numeric(n.max - n.min + 1)

for(i in seq_len(n.max - n.min + 1)){
  
  exact.null <- nullCIDist(ns[i],1,cumulative = 1, force_sym = 1)
  
  critical.val <- which.min(abs(exact.null - 0.025)) # alpha = 0.05 two sided
  cvs[i] <- critical.val
  
  pars <- computeExpectedApproximation(ns[i])
  
  
  # exact.null is 0 based, so we subtract 1 to get actual number of inversions
  tail.diffs[i] <- pnorm(critical.val, mean = pars[1], sd = pars[2]) - exact.null[critical.val]
  
}

plot(ns, tail.diffs)


## What is the effect of adding the extra inversion depending on the alpha level? sweeping through all critical values for n = 100

tail.diffs <- numeric(choose(100,2)/2)

null.dist <- nullCIDist(100)
pars <- computeExpectedApproximation(100)

for(i in 1:(choose(100,2)/2)){
  
  tail.diffs[i] <- pnorm(i , mean = pars[1], sd = pars[2]) - null.dist[i]
  
  
}

plot(1:(choose(100,2)/2), tail.diffs)

bhklab/fastCI documentation built on Dec. 3, 2020, 12:17 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

bhklab/fastCI
Fast Concordance Index

inst/extscripts/naive_approximate_normal_from_density.R
In bhklab/fastCI: Fast Concordance Index

R Package Documentation

Browse R Packages

We want your feedback!

bhklab/fastCI Fast Concordance Index

inst/extscripts/naive_approximate_normal_from_density.R In bhklab/fastCI: Fast Concordance Index

R Package Documentation

Browse R Packages

We want your feedback!

bhklab/fastCI
Fast Concordance Index

inst/extscripts/naive_approximate_normal_from_density.R
In bhklab/fastCI: Fast Concordance Index