R/diagnostics.R

Defines functions mni.leave.one.out.diagnosis mni.diagnostic.capabilities.of.vertices mni.vertex.diagnostic.permutation mni.vertex.sensitivity mni.stepwise.diagnostics predplot.jpl predplot.single.jpl simulate.discriminant discriminate.on.areas discriminate.two.areas expand.grid.jpl

library(MASS)
library(nnet)

# check the ability of a single predictor to reproduce the true
# diagnosis using the leave one out method (i.e. it will do as many
# lda fits as there are subjects).
mni.leave.one.out.diagnosis <- function(values, true.diagnosis, method="lda") {
  number.cases <- length(true.diagnosis)
  diagnosis <- matrix(NA, ncol=1, nrow=number.cases)
  if (method == "qda") {
    diagnosis = qda(as.matrix(values), true.diagnosis, CV=TRUE)$class
  }
  else if (method == "lda") {
    diagnosis = lda(as.matrix(values), true.diagnosis, CV=TRUE)$class
  }
  else if (method == "multinom") {
    for (i in 1:number.cases) {
      multinom.fit <- multinom(true.diagnosis ~ values, subset=-i)
      tmp.diagnosis <- predict(multinom.fit, values)
      diagnosis[i] <- tmp.diagnosis[i]
    }
  }
  return(diagnosis)
}

# runs the mni.leave.one.out.diagnosis at each vertex using only that
# vertex as a predictor. Returns a matrix with nrows = nvertices and
# each column corresponding to the diagnosis of each subject at that
# vertex.
mni.diagnostic.capabilities.of.vertices <- function(data.table, true.diagnosis, method="lda") {
  number.cases <- length(true.diagnosis)
  number.vertices <- nrow(data.table)
  results <- matrix(NA, ncol=number.cases, nrow=number.vertices)
  modulo <- 1000
  for (i in 1:number.vertices) {
    try(results[i,] <- mni.leave.one.out.diagnosis(data.table[i,], true.diagnosis, method=method))

    if (i %% modulo == 0) {
      cat(format((i/number.vertices)*100, digits=3))
      cat("%  ")
    }
  }
  return(results)
}

mni.vertex.diagnostic.permutation <- function(data.table, true.diagnosis,
                                              num=100, method="lda") {
  number.subjects <- ncol(data.table)
  results <- vector(length=num)


  for (i in 1:num) {
    cat("Permutation: ")
    cat(i)
    cat("\n")
    
    g <- sample(true.diagnosis)
    d <- mni.diagnostic.capabilities.of.vertices(data.table, g,
                                                 method)
    a <- mni.vertex.sensitivity(d, g)$accuracy
    results[i] <- max(a[!is.na(a)])

  }
  return(results)
}

mni.vertex.sensitivity <- function(diagnostic.results, true.diagnosis) {
  number.vertices = nrow(diagnostic.results)
  sensitivity <-  vector(length=number.vertices)
  specificity <- vector(length=number.vertices)
  ppv <- vector(length=number.vertices)
  npv <- vector(length=number.vertices)
  accuracy <- vector(length=number.vertices)
  modula <- 0
  for (i in 1:number.vertices) {
    t <- try(table(diagnostic.results[i,], true.diagnosis))
    #print(i)
    #print(t)
    if (! inherits(t, "try-error") & nrow(t) == 2 & ncol(t) == 2) {
      accuracy[i] <- (t[1,1] + t[2,2]) / sum(t)
      specificity[i] <- t[1,1] / ( t[1,1] + t[2,1])
      sensitivity[i] <- t[2,2] / (t[2,2] + t[1,2])
    }
  }
#     t <- try(as.data.frame(table(diagnostic.results[i,], true.diagnosis)))
#     if (!inherits(t, "try-error")) {
#       sensitivity[i] <- t[4,3] / (t[4,3] + t[3,3])
#       specificity[i] <- t[1,3] / (t[1,3] + t[2,3])
#       ppv[i] <- t[4,3] / (t[4,3] + t[2,3])
#       npv[i] <- t[1,3] / (t[1,3] + t[3,3])
#       accuracy[i] <- (t[4,3] + t[1,3]) / (t[1,3] + t[2,3] + t[3,3] + t[4,3])
#    }
 #   tmp <- i %/% 1000
 #   if (tmp > modula) {
 #     print((i / number.vertices) * 100)
 #     modula <- tmp
 #   }
 # }
  return(list(accuracy=accuracy, sensitivity=sensitivity,
              specificity=specificity))
}

mni.stepwise.diagnostics <- function(diagnostic.results, true.diagnosis,
                                     data.table, max.steps = 1000) {
  attach(diagnostic.results)
  combined <- sensitivity + specificity #+ ppv + npv
  sorted <- sort(combined, index.return=TRUE, decreasing=TRUE)
  i <- 2
  perfect <- FALSE
  outcomes <- matrix(NA, nrow=max.steps, ncol=4)
  colnames(outcomes) <- c("TN", "FN", "FP", "TP")
  while (i < max.steps && perfect == FALSE) {
    #l <- lda(t(data.table[sorted$ix[1:i],]), true.diagnosis)
    #pl <- predict(l, t(data.table[sorted$ix[1:i],]))
    #diagnosis <- mni.leave.one.out.diagnosis(t(data.table[sorted$ix[1:i],]),
    #true.diagnosis)
    diagnosis <- qda(t(as.matrix(data.table[sorted$ix[1:i],])), true.diagnosis, CV=T)
    results <- as.data.frame(table(diagnosis$class, true.diagnosis))
    if (results[2,3] == 0 && results[3,3] == 0) {
      perfect <- TRUE
    }
    outcomes[i,] <- results[,3]
    i <- i + 1
  }
  detach(diagnostic.results)
  return(i, outcomes)
}
  

predplot.jpl <- function(x,y,truth, len=100, xlab="x", ylab="y") {
  r1 <- range(x)
  r2 <- range(y)
  s1 <- seq(r1[1], r1[2], length=len)
  s2 <- seq(r2[1], r2[2], length=len)

  grid <- expand.grid(x=s1, y=s2)

  l <- lda(cbind(x=x, y=y), truth)
  q <- qda(cbind(x=x, y=y), truth)
  m <- multinom(truth ~ x + y)
  
  pred.l <- predict(l, newdata=grid)
  pred.q <- predict(q, newdata=grid, type="predictive")
  pred.m <- predict(m, grid, type = "probs")

  plot(x,y,type="n", xlab=xlab, ylab=ylab)
  #text(x,y,labels=as.character(truth))
  points(x,y,col=as.numeric(truth), pch=19)
  contour(s1, s2, matrix(pred.l$post[,1], len), add=T,
          levels=0.5, drawlabels=F)
  contour(s1, s2, matrix(pred.q$post[,1], len), add=T, levels=0.5,
          drawlabels=F, col="red")
  contour(s1, s2, matrix(pred.m, len), add=T, levels=0.5,
          drawlabels=F, col="blue")

  legend(locator(n=1), c(levels(truth)[1], levels(truth)[2], "lda", "qda",
                    "logistic"),
         pch=c(19,19,-1, -1, -1), col=c(1, 2, "black", "red", "blue"),
         lty=c(0,0,1, 1,1))
  
}

predplot.single.jpl <- function(x, truth, len=100, xlab="x", ylab="y") {
  r1 <- range(x)
  s1 <- seq(r1[1], r1[2], length=len)

  n <- x
  
  l <- lda(as.matrix(x), truth)
  q <- qda(as.matrix(x), truth)
  m <- multinom(truth ~ n)

  n <- s1
  pred.l <- predict(l, newdata=as.matrix(s1))
  pred.q <- predict(q, newdata=as.matrix(s1), type="predictive")

  pred.m <- predict(m, newdata=as.matrix(n), type="probs")
  
  index.l <- which.min(abs(pred.l$post[,1] -0.5))
  index.q <- which.min(abs(pred.q$post[,1] -0.5))
  index.m <- which.min(abs(pred.m - 0.5))
  #plot(as.factor(truth), x)

  stripplot(x ~ as.factor(truth), xlab=xlab, ylab=ylab,
            panel=function(x,y) {
              panel.stripplot(x,y);
              panel.abline(h=s1[index.l]);
              panel.abline(h=s1[index.q], col="red");
              panel.abline(h=s1[index.m], col="blue")
            })

  #return(list(lda=s1[index.l], qda=s1[index.q], multinom=s1[index.m]))
}

simulate.discriminant <- function(group.means, group.sds, group.ns,
                                  num.sims=100, method="lda") {
  truth <- c(rep(1, group.ns[1]), rep(0, group.ns[2]))

  results <- list(sensitivity = vector(length=num.sims),
                  specificity = vector(length=num.sims),
                  accuracy = vector(length=num.sims))

  for (i in 1:num.sims) {
    data <- c(rnorm(group.ns[1], group.means[1], group.sds[1]),
              rnorm(group.ns[2], group.means[2], group.sds[2]))
    l <- lda(as.matrix(data), truth)
    t <- as.data.frame(table(predict(l)$class, truth))
    results$sensitivity[i] <- t[4,3] / (t[4,3] + t[3,3])
    results$specificity[i] <- t[1,3] / (t[1,3] + t[2,3])
    results$accuracy[i] <- (t[4,3] + t[1,3]) / (t[1,3] + t[2,3] + t[3,3] + t[4
,3])
  }
  return(results)
}

discriminate.on.areas <- function(data.table, animal.table, truth,
                                  method="lda", areas=NULL) {
  if (is.null(areas)) {
    areas <- unique(animal.table)
  }
  num.areas <- length(areas)

  results <- list(area = vector(length=num.areas),
                  accuracy = vector(length=num.areas),
                  sensitivity = vector(length=num.areas),
                  specificity = vector(length=num.areas))
  
  for (i in 1:num.areas) {
    results$area[i] <- areas[i]
    if (sum(animal.table == areas[i]) > 1) {
      d <- mni.leave.one.out.diagnosis(colMeans(data.table[animal.table == areas[i],]),
                                       truth, method=method)
      r <- mni.vertex.sensitivity(t(as.matrix(d)), truth)
      
      results$accuracy[i] <- r$accuracy
      results$sensitivity[i] <- r$sensitivity
      results$specificity[i] <- r$specificity
    }
  }
  return(results)
}

discriminate.two.areas <- function(data.table, animal.table, truth,
                                   method="lda", areas=NULL) {
  if (is.null(areas)) {
    areas <- unique(animal.table)
  }
  combo <- expand.grid.jpl(areas, areas)
  num.combo <- nrow(combo)

  results <- list(area1 = vector(length=num.combo),
                  area2 = vector(length=num.combo),
                  accuracy = vector(length=num.combo),
                  sensitivity = vector(length=num.combo),
                  specificity = vector(length=num.combo),
                  accuracy.a1 = vector(length=num.combo),
                  accuracy.a2 = vector(length=num.combo))
  
  for (i in 1:num.combo) {
    results$area1[i] <- combo[i,1]
    results$area2[i] <- combo[i,2]
    print(i)
    if (combo[i,1] != combo[i,2] && sum(animal.table == combo[i,1]) > 1
        && sum(animal.table == combo[i,2]) > 1) {
      a1 <- colMeans(data.table[animal.table == combo[i, 1],])
      a2 <- colMeans(data.table[animal.table == combo[i, 2],])
      d <- mni.leave.one.out.diagnosis(cbind(a1,a2), truth, method=method)
      r <- mni.vertex.sensitivity(t(as.numeric(d)), truth)

      d1 <- mni.leave.one.out.diagnosis(a1, truth, method=method)
      d2 <- mni.leave.one.out.diagnosis(a2, truth, method=method)
      r1 <- mni.vertex.sensitivity(t(as.numeric(d1)), truth)
      r2 <- mni.vertex.sensitivity(t(as.numeric(d2)), truth)
      
      results$accuracy[i] <- r$accuracy
      results$sensitivity[i] <- r$sensitivity
      results$specificity[i] <- r$specificity
      results$accuracy.a1[i] <- r1$accuracy
      results$accuracy.a2[i] <- r2$accuracy
    }
  }
  return(results)
    
}
  
# like the standard function expand.grid, except that no duplicates
# are allowed
expand.grid.jpl <- function(x,y) {
  # do the standard full expansion
  grid <- expand.grid(x,y)

  # remove identical elements, e.g. c(2,2)
  w <- which(grid[,1] == grid[,2])
  if (length(w) > 0) {
    grid[w,] <- NA
  }
  
  # now remove duplicates, e.g. c(3,4) and c(4,3) are considered duplicates
  for (i in 1:length(x)) {
    for (j in 1:length(y)) {
      w <- which(rowSums(cbind(grid[,1] == x[i], grid[,2] == y[j])) == 2 |
                 rowSums(cbind(grid[,1] == y[j], grid[,2] == x[i])) == 2)
      if (length(w) > 1) {
        grid[w[-1],] <- NA
      }
    }
  }
  return(na.omit(grid))
}
      
             
BIC-MNI/mni.cortical.statistics documentation built on May 5, 2019, 10:25 a.m.