R/F_validation.R

Defines functions RandomlyPartition ComputeInformationCriteria PlotPredDens PlotValidationResults OutsideAndInBounds GetRow GetPercentilesLeftOut PlotValResults

#----------------------------------------
# F_validation
# Leontine Alkema
#----------------------------------------

# note: PlotValidationResults (at end) is the main function, that calls this function:
PlotValResults <- function(# Summarize and plot validation results
  ### Summarize and plot validation results for one type of left-out observation (e.g. unmet, total, trad or modern),
  ### Note: returns summary results
  include.j, ##<< vector with logicals, include observation or not?
  P.j,##<< Percentiles to be summarized
  ## (element of P.jp data frame constructed in \code{\link{GetPercentilesLeftOut}})
  nameplot, ##<< Main for overview figure
  error.est.j, ##<< Errors to be summarized
  country.info, ##<< Object from class country.info
  winbugs.data,
  data,
  ## (element of error.est.jp data frame constructed in \code{\link{GetPercentilesLeftOut}})
  validation.list ##<< Info about validation exercise (from mcmc.meta)
  ){
    ## specify subgroups of left-out observations:
    if(isTRUE(validation.list$at.random.no.data) || isTRUE(validation.list$leave.iso.out)) {
        ## 'data' , 'country.info', already the concatenation of training and test data (done in PlotValidationResults()).
        getc.j.test <-
            c(winbugs.data$getc.j
             ,winbugs.data$getc.j.test
              )
        reg.j <- country.info$reg.c[getc.j.test]
        ssa.j <- country.info$ssa.c[getc.j.test]
        mics.j <- ifelse(data$source.j=="MICS", T, F)
        dhs.j <- ifelse(data$source.j=="DHS", T, F)
        dev.j <- country.info$dev.c[getc.j.test]
        if("sex.ac.unm.c" %in% colnames(country.info)) {
            sex.ac.j <- country.info$name.sex.ac.unm.c[getc.j.test]
            }
    } else {
        reg.j <-  country.info$reg.c[winbugs.data$getc.j]
        ssa.j <- country.info$ssa.c[winbugs.data$getc.j]
        mics.j <- ifelse(data$source.j=="MICS", T, F)
        dhs.j <- ifelse(data$source.j=="DHS", T, F)
        dev.j <- country.info$dev.c[winbugs.data$getc.j]
        if("sex.ac.unm.c" %in% colnames(country.info)) {
            sex.ac.j <- country.info$name.sex.ac.unm.c[winbugs.data$getc.j]
            }
    }

  select.ji <- cbind(rep(TRUE, length(ssa.j)), ssa.j=="No", ssa.j=="Yes",
                     dhs.j, mics.j, !mics.j & !dhs.j, dev.j=="Poor", dev.j=="Rich")
    namesselect <- c("All", "OutsideSSA", "SSA", "DHS", "MICS", "Other", "Poor", "Rich")

    if("sex.ac.unm.c" %in% colnames(country.info)) {
        select.ji <- cbind(select.ji, sex.ac.j == 0, sex.ac.j == 1)
        namesselect <- c(namesselect, "LowSexlActy", "HighSexlActy")
    }
  I <- dim(select.ji)[2] # number of subgroups
  table.ix <- NULL # x refers to table columns (outputs)
  table2.ix <- NULL # x refers to table columns (outputs)
  for (i in 1:I){
  #  nobs.i[i] <- sum(!is.na(Psselect[select.ji[,i]]))
    select <- select.ji[,i]& (include.j == TRUE)
    table.ix <- rbind(table.ix, GetRow(P.c = P.j[select]))
    table2.ix <- rbind(table2.ix, c(mean(error.est.j[select], na.rm= T),
                                  median((error.est.j[select]), na.rm= T),
                                     median(abs(error.est.j[select]), na.rm= T),
                      mean(P.j[select] <0.5, na.rm = T) )) # if P.j is less than 0.5, the obs fell below median
  }
  nobs.i <- table.ix[, "# Obs"]
  rownames(table.ix) <- paste0(namesselect, " (", nobs.i, ")")
  rownames(table2.ix) <- paste0(namesselect, " (", nobs.i, ")")
  colnames(table2.ix) <- c("Mean error", "Median error", "Median abs error", "Prop below median")

  par(mfrow = c(2,4), cex.axis = 1.5, cex.lab = 1.5, cex.main = 1.5)

  # 1. inside 80%
  par(mar = c(5,12,5,1))
  plot(1, type = "n", ylim = c(I, 0), main = "" , xlim = c(0.5,1),
         xlab = "Coverage of 80% PI", ylab = "", yaxt = "n")
  # for inside 80
  for (i in 1:I){
    res1 <- qbinom(p = c(0.025, 0.975), size = nobs.i[i], prob = 0.8)/nobs.i[i]
    segments(res1[1], i, res1[2], i, lwd = 10, col = "grey")
    points(i~table.ix[i, "Within 80% CI"], col = 1, pch = 19, lwd = 3)
  }
  abline(v = 0.8)
  axis(2, las = 2, labels = rownames(table.ix), at  = seq(1, I))

  par(mar = c(5,5,5,1))
  # 2. outside 80% CI
  plot(1, type = "n", ylim = c(I,0), xlim = c(0,0.3), xlab = "Proportion below/above 80% PI",
       ylab = "", yaxt = "n", main = nameplot)
  for (i in 1:I){
    res1 <- qbinom(p = c(0.025, 0.975), size = nobs.i[i], prob = 0.1)/nobs.i[i]
    segments(res1[1], i, res1[2], i, lwd = 10, col = "grey")
    points((i+0.1)~table.ix[i, "Below 80% CI"], col = 2, pch = 19, lwd = 3)
    points((i-0.1)~table.ix[i, "Above 80% CI"], col = 1, pch = 19, lwd = 3)
  }
  axis(1, las = 2, labels = NULL, at  = i)
  abline(v = 0.1)
  legend("bottomright", legend = c("Above", "Below"), col = c(1,2), lwd = 3, pch = 19, lty = -1, bty = "n")

  # 3. inside 95% CI
  plot(1, type = "n", ylim = c(I,0),
        main = "", xlim = c(0.7,1),
         xlab = "Coverage of 95% PI", ylab = "", yaxt = "n")
  for (i in 1:I){
    res1 <- qbinom(p = c(0.025, 0.975), size = nobs.i[i], prob = 0.95)/nobs.i[i]
    segments(res1[1], i, res1[2], i, lwd = 10, col = "grey")
     points(i~table.ix[i, "Within 95% CI"], col = 1, pch = 19, lwd = 3)
  }
  abline(v = 0.95)

  # 4. outside 95%
  #par(mar = c(5,5,5,12))
  plot(1, type = "n", ylim = c(I,0), xlim = c(0,0.3), xlab = "Proportion below/above 95% PI",
       ylab = "", yaxt = "n")
  for (i in 1:I){
    res1 <- qbinom(p = c(0.025, 0.975), size = nobs.i[i], prob = 0.025)/nobs.i[i]
    segments(res1[1], i, res1[2], i, lwd = 10, col = "grey")
    points((i+0.1)~table.ix[i, "Below 95% CI"],col = 2, pch = 19, lwd = 3)
    points((i-0.1)~table.ix[i, "Above 95% CI"],col = 1, pch = 19, lwd = 3)
  }
  axis(1, las = 2, labels = NULL, at  = i)
  abline(v = 0.025)
  legend("bottomright", legend = c("Above", "Below"), col = c(1,2), lwd = 3,
         pch = 19, lty = -1, bty = "n")

  hist(P.j[include.j], freq = FALSE, main = "Percentiles", xlab = "P")
    abline(h=1, col = 2)
    if(sum(ssa.j=="Yes"&include.j) > 0) {
        hist(P.j[ssa.j=="Yes"&include.j], freq = FALSE, breaks = 10, main = "Percentiles SSA", xlab = "P")
  abline(h=1, col = 2)
        } else plot(1)
  hist(error.est.j[include.j], freq = T, breaks = 20, main = "Errors", xlab = "Error")
  abline(v=0, col = 1)
  abline(v = mean(error.est.j[include.j], na.rm = T), col = 3)
  abline(v = median(error.est.j[include.j], na.rm = T), col = 4)
#  hist(st.error.j, freq = FALSE, main = "St. Errors", xlab = "St. Error")
                                #  abline(v=0, col = 2)
        if(sum(ssa.j=="Yes"&include.j) > 0) {
  hist(error.est.j[ssa.j=="Yes" & include.j], freq = T,  breaks = 20, main = "Errors SSA", xlab = "St. Error")
  abline(v=0, col = 1)
  abline(v = mean(error.est.j[ssa.j=="Yes" & include.j], na.rm = T), col = 3)
  abline(v = median(error.est.j[ssa.j=="Yes" & include.j], na.rm = T), col = 4)
        } else plot(1)
  ##value<<
  return(list(table.ix = table.ix,##<< Summary table for P.j
              table2.ix = table2.ix ##<< Summary table for error.est.j
              ))
}

#----------------------------------------------------------------------------------
GetPercentilesLeftOut <- function(
  ### Calculate where left-out obs falls in pred. posterior distribution:
  ### P.j  =(approx) P(Ynew <= yleftout) = percentile of left-out obs j in posterior predictive distribution
  ### and errors:
  ### error.est.j  = y.j - median(Ynew.j).
                                  data, #<- mcmc.meta$data.raw$data
                                  mcmc.array,
                                  winbugs.data,
                                  validation.list){
    ## If doing 'at.random.no.data' validation exercise do 'else' below.

    if(!isTRUE(validation.list$at.random.no.data) && !isTRUE(validation.list$leave.iso.out)) {

    Punmet.j <- error.unmet.j <- error.est.unmet.j <-
    Ptot.j <- Ptrad.j <- Pmodern.j <-
        error.est.trad.j <-
        error.est.modern.j <-
            error.est.tot.j <- rep(NA, winbugs.data$J)
  # if trad, mod and unmet were left out:
  if (!is.null(winbugs.data$getj.test.k)){
      for (j in winbugs.data$getj.test.k[1:winbugs.data$n.test.breakdown]){
      tradobs <- data$props.trad.j[j]
      modobs <- data$props.modern.j[j]
      totCPobs <-  tradobs + modobs
      parname.tr <- paste0("pred.logratio.ytrad.j[", j, "]")
      parname.mod <- paste0("pred.logratio.ymodern.j[", j, "]")
      a <- exp(c(mcmc.array[, , parname.tr])) + exp(c(mcmc.array[, , parname.mod]))
      totCP <- a/(a+1)
      trad <- (1-totCP)*exp(c(mcmc.array[, , parname.tr]))
      mod <- (1-totCP)*exp(c(mcmc.array[, , parname.mod]))
      Ptrad.j[j] <- mean(trad <= tradobs)
      Pmodern.j[j] <- mean(mod <= modobs)
      Ptot.j[j] <- mean(totCP <= totCPobs)
      error.est.trad.j[j] <- tradobs - median(trad)
      error.est.modern.j[j] <- modobs - median(mod)
      error.est.tot.j[j] <- totCPobs - median(totCP)
#      # from mcmc sample directly, using median of mean (expected to be slightly different)
#      parname.tr <- paste0("q.trad.j[", j, "]")
#      parname.mod <- paste0("q.modern.j[", j, "]")
#      error.trad.j[j] <- tradobs - median(c(mcmc.array[, , parname.tr]))
#      error.modern.j[j] <- modobs - median(c(mcmc.array[, , parname.mod]))
#      error.tot.j[j] <- totCPobs - median(c(mcmc.array[, , parname.tr])+c(mcmc.array[, , parname.mod]))
      if (is.element(j, winbugs.data$getj.test.unmet.k)){
          unmetobs <- data$props.unmet.j[j]
          parname <- paste0("pred.logitratio.yunmet.j[", j, "]")
          # use totCP to get sampled unmet
          unmet <- (1-totCP)*invlogit(c(mcmc.array[, , parname]))
          Punmet.j[j] <- mean(unmet <= unmetobs)
          error.est.unmet.j[j] <- unmetobs - median(unmet)
#          parname <- paste0("q.unmet.j[", j, "]")
#          error.unmet.j[j] <- unmetobs - median(c(mcmc.array[, , parname]))
      }
    } #end j-loop
  } else { # unmet only
      for (j in winbugs.data$getj.test.unmet.k){
          tradobs <- data$props.trad.j[j]
          modobs <- data$props.modern.j[j]
          totCPobs <-  tradobs + modobs
          parname.tr <- paste0("pred.logratio.ytrad.j[", j, "]")
          parname.mod <- paste0("pred.logratio.ymodern.j[", j, "]")
          a <- exp(c(mcmc.array[, , parname.tr])) + exp(c(mcmc.array[, , parname.mod]))
          totCP <- a/(a+1)

          # copied from above
          unmetobs <- data$props.unmet.j[j]
          parname <- paste0("pred.logitratio.yunmet.j[", j, "]")
          # use totCP to get sampled unmet
          unmet <- (1-totCP)*invlogit(c(mcmc.array[, , parname]))
          Punmet.j[j] <- mean(unmet <= unmetobs)
          error.est.unmet.j[j] <- unmetobs - median(unmet)
#         parname <- paste0("q.unmet.j[", j, "]")
#          error.unmet.j[j] <- unmetobs - median(c(mcmc.array[, , parname]))
      } # end j-loop
  }# end else
    } else if(isTRUE(validation.list$at.random.no.data)) { ## end '!isTRUE(validation.list$at.random.no.data)

        ## If doing 'at.random.no.data' validation exercise:

        Punmet.j <- error.unmet.j <- error.est.unmet.j <-
            Ptot.j <- Ptrad.j <- Pmodern.j <-
                error.est.trad.j <-
                    error.est.modern.j <-
                        error.est.tot.j <- rep(NA, winbugs.data$J + winbugs.data$J.test)

        ## Breakdown into mod, trad, or for tot only:
        for (j in winbugs.data$getj.test.k[1:winbugs.data$n.test.breakdown]){
            tradobs <- winbugs.data$data.test$props.trad.j[j - winbugs.data$J]
            modobs <- winbugs.data$data.test$props.modern.j[j - winbugs.data$J]
            totCPobs <-  tradobs + modobs
            parname.tr <- paste0("pred.logratio.ytrad.j[", j, "]")
            parname.mod <- paste0("pred.logratio.ymodern.j[", j, "]")
            a <- exp(c(mcmc.array[, , parname.tr])) + exp(c(mcmc.array[, , parname.mod]))
            totCP <- a/(a+1)
            trad <- (1-totCP)*exp(c(mcmc.array[, , parname.tr]))
            mod <- (1-totCP)*exp(c(mcmc.array[, , parname.mod]))
            Ptrad.j[j] <- mean(trad <= tradobs)
            Pmodern.j[j] <- mean(mod <= modobs)
            Ptot.j[j] <- mean(totCP <= totCPobs)
            error.est.trad.j[j] <- tradobs - median(trad)
            error.est.modern.j[j] <- modobs - median(mod)
            error.est.tot.j[j] <- totCPobs - median(totCP)

            ## Unmet need
            if (is.element(j, winbugs.data$getj.test.unmet.k)){
                unmetobs <- winbugs.data$data.test$props.unmet.j[j - winbugs.data$J]
                parname <- paste0("pred.logitratio.yunmet.j[", j, "]")
                unmet <- (1-totCP)*invlogit(c(mcmc.array[, , parname]))
                Punmet.j[j] <- mean(unmet <= unmetobs)
                error.est.unmet.j[j] <- unmetobs - median(unmet)
            }
        }
        ## No breakdown obs

        for(j in winbugs.data$getj.test.k[(winbugs.data$n.test.breakdown + 1):(winbugs.data$J.test)]){
            totCPobs <-  winbugs.data$data.test$props.tot.j[j - winbugs.data$J]
            parname.tot <- paste0("pred.logit.ytot.j[", j, "]")
            a <- exp(mcmc.array[, , parname.tot])
            totCP <- a/(a+1)
            Ptot.j[j] <- mean(totCP <= totCPobs)
            error.est.tot.j[j] <- totCPobs - median(totCP)
        }
    } else if (isTRUE(validation.list$leave.iso.out)) {
        ## No 'unmet' or 'tot'
        Ptot.j <- Ptrad.j <- Pmodern.j <-
            error.est.trad.j <-error.est.tot.j <-
                error.est.modern.j <-rep(NA, winbugs.data$J + winbugs.data$J.test)

        ## Breakdown into mod, trad, or for tot only:
        for (j in winbugs.data$getj.test.k[1:winbugs.data$n.test.breakdown]){
            tradobs <- winbugs.data$data.test$props.trad.j[j - winbugs.data$J]
            modobs <- winbugs.data$data.test$props.modern.j[j - winbugs.data$J]
            totCPobs <-  tradobs + modobs
            parname.tr <- paste0("pred.logratio.ytrad.j[", j, "]")
            parname.mod <- paste0("pred.logratio.ymodern.j[", j, "]")
            a <- exp(c(mcmc.array[, , parname.tr])) + exp(c(mcmc.array[, , parname.mod]))
            totCP <- a/(a+1)
            trad <- (1-totCP)*exp(c(mcmc.array[, , parname.tr]))
            mod <- (1-totCP)*exp(c(mcmc.array[, , parname.mod]))
            Ptrad.j[j] <- mean(trad <= tradobs)
            Pmodern.j[j] <- mean(mod <= modobs)
            Ptot.j[j] <- mean(totCP <= totCPobs)
            error.est.trad.j[j] <- tradobs - median(trad)
            error.est.modern.j[j] <- modobs - median(mod)
            error.est.tot.j[j] <- totCPobs - median(totCP)

            ## ## Unmet need
            ## if (is.element(j, winbugs.data$getj.test.unmet.k)){
            ##     unmetobs <- winbugs.data$data.test$props.unmet.j[j - winbugs.data$J]
            ##     parname <- paste0("pred.logitratio.yunmet.j[", j, "]")
            ##     unmet <- (1-totCP)*invlogit(c(mcmc.array[, , parname]))
            ##     Punmet.j[j] <- mean(unmet <= unmetobs)
            ##     error.est.unmet.j[j] <- unmetobs - median(unmet)
            ## }
        }
        ## ## No breakdown obs
        ## for(j in winbugs.data$getj.test.k[(winbugs.data$n.test.breakdown + 1):(winbugs.data$J.test)]){
        ##     totCPobs <-  winbugs.data$data.test$props.tot.j[j - winbugs.data$J]
        ##     parname.tot <- paste0("pred.logit.ytot.j[", j, "]")
        ##     a <- exp(mcmc.array[, , parname.tot])
        ##     totCP <- a/(a+1)
        ##     Ptot.j[j] <- mean(totCP <= totCPobs)
        ##     error.est.tot.j[j] <- totCPobs - median(totCP)
        ## }
    }

  ##value<< List with data.frames P.jp and error.est.jp
    ## with percentiles and errors respectively as explained in the description,
    ## where j refers to the observation index, and p
    ## refers to trad, modern, tot, unmet.

    if(isTRUE(validation.list$leave.iso.out)) {
        return(list(P.jp =
               data.frame(Ptrad.j,
                      Pmodern.j,
                      Ptot.j),
          error.est.jp = data.frame(error.est.trad.j,
                                   error.est.modern.j,
                                    error.est.tot.j
                                    )
          ))
        } else {

  return(list(P.jp =   # ##<<P.jp is a data frame with
    # ##describe<<
               data.frame(Ptrad.j, ##<<,
                      Pmodern.j, ##<<,
                      Ptot.j,##<<,
                      Punmet.j##<<,
                           ),
    # ##end<<

#           error.jp = data.frame(error.trad.j,  ##<<,
#                        error.modern.j,  ##<<,
#                                 error.tot.j, ##<<,
#                                 error.unmet.j ##<<,
#                        ),


          error.est.jp = data.frame(error.est.trad.j,
                                   error.est.modern.j,
                                    error.est.tot.j,
                                    error.est.unmet.j
                                    )
          ))
            }
}

#------------------------------------------------------------------------------
GetRow <- function(# Summarize proportion of left-out observations outside various PIs
  ###  Summarize proportion of left-out observations outside various PIs
  P.c ##<< Vector with percentiles (see \code{\link{GetPercentilesLeftOut}})
  ){
  out.in.CI95 <- round(OutsideAndInBounds(P.c = P.c),3)
  out.in.CI80 <- round(OutsideAndInBounds(P.c = P.c, CIlow = 0.1, CIup = 0.9),3)
  n <- sum(!is.na(P.c))
  res <- data.frame(n, out.in.CI80, out.in.CI95)
  colnames(res) <- c("# Obs",
        "Below 80% CI", "Within 80% CI", "Above 80% CI",
        "Below 95% CI", "Within 95% CI", "Above 95% CI")
  ##value<< Data frame with one row and columns
  ##c("# Obs",  "Below 80% CI", "Within 80% CI", "Above 80% CI",
  ##      "Below 95% CI", "Within 95% CI", "Above 95% CI")
  return(res)
}

#------------------------------------------------------------------------------
OutsideAndInBounds <- function(# Find proportion of left-out observations outside a specific PI
  ### Find proportion of left-out observations outside a specific PI
  P.c, ##<< Vector with percentiles (see \code{\link{GetPercentilesLeftOut}})
  CIlow = 0.025, ##<< Lower bound PI
  CIup = 0.975 ##<< Upper bound PI
  ){
  res <- c(mean(P.c < CIlow, na.rm = T),
          mean((P.c >= CIlow) & (P.c <= CIup), na.rm = T),
          mean(P.c > CIup, na.rm = T))
  ##value<<
  ## ("Below CI","In CI","Above CI")
  return(data.frame("Below CI" = res[1], "In CI" = res[2], "Above CI" = res[3]))
}

#----------------------------------------------------------------------------------
GetTraining <- function (# Construct training sets for validation exercise
  ### Construct training sets for validation exercise
  ### Choose one option from (at.random, at.end, exclude.unmet.only)
                         data, winbugs.data = NULL, ##<< winbugs.data needs to be provided when leaving out obs at the end
                         country.info,
  validation.list,
  leave1 = FALSE, ##<< Not yet implemented
  at.random.min.c = 1, at.end.not.1.obs.c = FALSE
  ,seed = 12345##<< Set seed when sampling observation to leave out
  ) {

    ##
    ## CHANGES (2017):
    ## --------------
    ##
    ## [MCW-2017-02-14-2] :: Argument 'at.random.no.data = FALSE' added to
    ## perform validation for countries with no data. Countries /with/ data are
    ## randomly selected and all of their observations are left out.
    ##

  # note: make sure training/test sets are not empty for breakdown/unmet
  # and that the training set is not empty for total
    set.seed(seed)

    at.random <- validation.list$at.random
    at.random.test.prop <- validation.list$at.random.test.prop
    at.end <- validation.list$at.end
    at.random.no.data <- validation.list$at.random.no.data
    at.random.no.data.strata <- validation.list$at.random.no.data.strata
    at.random.no.data.test.prop <- validation.list$at.random.no.data.test.prop
    year.cutoff <- validation.list$year.cutoff
    exclude.unmet.only <- validation.list$exclude.unmet.only
    exclude.unmet.only.test.prop <- validation.list$exclude.unmet.only.test.prop
    leave.iso.out <- validation.list$leave.iso.out
    leave.iso.out.iso.test <-  validation.list$leave.iso.out.iso.test

    J <- nrow(data)
    iso.c <- unique(data$iso.j)
    C <- length(iso.c)

    if (at.random){
        ## 20% at random (from break-down into modern/trad only) (ok if some
        ## countries end up without observations in training, they're still in
        ## test).
        ##
        ## Countries having all data removed has big impact for UWRA as
        ## data are fewer and countries with 1 data point are common. Ensure
        ## that each country has at least 'at.random.min.c' data points in
        ## training set.
        iso.all <- as.numeric(unique(data$iso.j))
        ## only breakdown obs
        na.modern.TF <- is.na(data$props.modern.j)
        na.modern.j <- (1:J)[na.modern.TF]
        iso.keep <-             #the ones with so few obs
            as.numeric(subset(as.data.frame(table(data[!na.modern.TF, "iso.j"])
                                           ,stringsAsFactors = FALSE)
                             ,Freq <= at.random.min.c)$Var1)
        iso.keep.j <- unique(c((1:J)[data$iso.j %in% iso.keep], na.modern.j))
        eligible.j <-
            (1:J)[!((1:J) %in% iso.keep.j | (1:J) %in% na.modern.j)]
        M <- length(eligible.j)
        M.prop <-
            ((1 - at.random.test.prop) * nrow(data) - (nrow(data) - M)) / M
                                #prop. to retain from eligible.j
        n.training.breakdown <- max(1, min(M-1, round(M.prop * M)))
        ## make sure unmet test or traning set is not empty (else loop does not work)
        unmet.empty <- TRUE
        ## at least 'at.random.min.c'
        min.not.satisfied <- TRUE
        ## Limit the number of tries
        max.tries <- 1000
        tries.so.far <- 0
        while ((unmet.empty || min.not.satisfied) && tries.so.far < max.tries) {
            tries.so.far <- tries.so.far + 1

            sample.j <- sample(eligible.j, size = n.training.breakdown)
            getj.training.k <- sort(c(iso.keep.j, sample.j))

            iso.all.intraining <- iso.all %in% data[getj.training.k,]$iso.j
            if(isTRUE(all(iso.all.intraining))) {
                sample.iso.freq <-
                    as.data.frame(table(data[getj.training.k, "iso.j"]))
            less.than.min <-
                sample.iso.freq[sample.iso.freq$Freq < at.random.min.c,]$Var1
                if(identical(length(less.than.min), 0L)) {
                    min.not.satisfied <- FALSE
                } else { min.not.satisfied <- TRUE }
            } else { min.not.satisfied <- TRUE }

            if (sum(!is.na(data$props.unmet.j[getj.training.k])) !=0 &
                sum(!is.na(data$props.unmet.j[-getj.training.k])) !=0){
                unmet.empty <- FALSE
            } else { unmet.empty <- TRUE }
        }
        if((unmet.empty || min.not.satisfied) && tries.so.far >= max.tries) {
            stop("Unable to a draw training set that satisfies the requirements in after ", max.tries, " attempts.")
        }
    }
    if (at.end){
        ## leave out obs after AND IN year.cutoff
        getj.training.k <- seq(1, length(data$years.j))[ (data$years.j <= year.cutoff)]
        if(at.end.not.1.obs.c) { #put all countries with one obs only in training
            c.tbl <- with(data, table(iso.j))
            c.tbl.1 <- c.tbl[c.tbl == 1]
            getj.training.k <-
                seq(1, length(data$years.j))[data$years.j <= year.cutoff | data$iso.j %in% names(c.tbl.1)]
            }
        ## make sure there's one obs in test/training for unmet, and one in training for total
        ## (note: test set is not constructed for total)
    if( sum(!is.na(data$props.unmet.j[getj.training.k])) == 0){
          print("Warning: training set unmet is empty")
    }
    if( sum(!is.na(data$props.unmet.j[-getj.training.k])) == 0){
          print("Warning: test set unmet is empty")
    }
    if( sum(!is.na(data$props.tot.j[getj.training.k])) == 0){
          print("Warning: training set tot is empty")
    }
  }
  if (exclude.unmet.only){
    ##details<<  If \code{exclude.unmet.only}, leave out all data in round(20%  no countries with data)
    n.unmet.c <- rep(NA, C)
    for (c in 1:C){
      n.unmet.c[c] <- sum(!is.na(data$props.unmet.j[data$iso.j == iso.c[c]]))
    }
    ncountrieswithdata <- sum(n.unmet.c !=0) # 108 countries
    nleaveout <- round(0.2*ncountrieswithdata)
    # sample countries to leave out
    indicesofcountriestoleaveout <-  sample(seq(1,C)[n.unmet.c>0], nleaveout)
    getj.training.k <- seq(1, length(data$years.j))[ is.element(data$iso.j, iso.c[-indicesofcountriestoleaveout])]
  }

    if(at.random.no.data) {
        n.training <- max(1, min(C - 1, round((1 - at.random.no.data.test.prop) * C))) #irrespective of breakdown
        unmet.empty <- TRUE
        if(is.null(at.random.no.data.strata)) {
            while(unmet.empty) {
                get.training.c <- iso.c[sample(1:C, n.training)]
                idx <- data$iso.j %in% get.training.c
                getj.training.k <- (1:J)[idx]
                unmet.train <- sum(!is.na(data$props.unmet.j[idx]))
                unmet.test <-  sum(!is.na(data$props.unmet.j[-idx]))
                if(unmet.train != 0 && unmet.test != 0) unmet.empty <- FALSE
            }
            ## if stratified
        } else {
            ## Read in the countryinfo
           if(!all(validation.list$at.random.no.data.strata %in% colnames(country.info))) {
                stop("Not all 'at.random.no.data.strata' are in 'regioninfo.csv'.")
            }
            iso.c <- intersect(iso.c, country.info$ISO.code)
            strata <-
                country.info[country.info$ISO.code %in% iso.c
                            ,c("ISO.code"
                              ,validation.list$at.random.no.data.strata
                               )
                            ,drop = FALSE]
            ## Must be at least two countries in strata to take some out.
            s.tbl <-
                as.data.frame(table(strata[,validation.list$at.random.no.data.strata])
                            ,stringsAsFactors = TRUE)
            if(identical(ncol(strata), 2L)) colnames(s.tbl)[1] <- colnames(strata)[2]
            strata <- merge(s.tbl, strata)
            strata <- strata[strata$Freq > 2,]
            tries <- 0
            while(unmet.empty) {
                if(tries > 99) stop("Have tried to find training set 100 times and have not succeeded. Please reduce, or change, the strata.")
                get.training.c <-
                    tapply(strata$ISO.code, strata[,validation.list$at.random.no.data.strata]
                          ,FUN = function(z) {
                              sample(z, size = (1 - exclude.unmet.only.test.prop) * length(z))
                          }
                          )
                idx <- data$iso.j %in% unlist(get.training.c)
                getj.training.k <- (1:J)[idx]
                unmet.train <- sum(!is.na(data$props.unmet.j[idx]))
                unmet.test <-  sum(!is.na(data$props.unmet.j[-idx]))
                if(unmet.train != 0 && unmet.test != 0) unmet.empty <- FALSE
                tries <- tries + 1
        }
        }}
    if(leave.iso.out) {
        ## 'data$iso.j' is made numeric in 'ReadDataAll()' so make sure
        ## 'leave.iso.out.iso.test' matches.
        if(is.factor(leave.iso.out.iso.test)) {
            leave.iso.out.iso.test <-
                levels(leave.iso.out.iso.test)[leave.iso.out.iso.test]
        }
        leave.iso.out.iso.test <-
            as.numeric(leave.iso.out.iso.test)
        if(!(leave.iso.out.iso.test %in% data$iso.j)) {
            stop("No country in input data has ISO Code '", leave.iso.out.iso.test, "'; please choose a different one for the test country.")
        } else {
            getj.training.k <- which(!(data$iso.j %in% leave.iso.out.iso.test))
        }
    }
  if (leave1) {
    # leave out all observations but one in set of countries with > 1 obs.
    # not yet implemented!
  }
  # save(getj.training.k, file = "getj.training.k.rda")
  # Note: reset seed after calling this function?
  ##value<< vector with indices for training set (getj.training.k)
  return(getj.training.k)
}
#----------------------------------------------------------
PlotValidationResults <- function(# Plot lots of results!
  ### Wrapper function to plot lots of results.
  run.name = "test", ##<< Run name
  output.dir = NULL, ##<< Directory where MCMC array and meta are stored.
  ## If NULL, it's \code{output/run.name}, default from \code{runMCMC}.
  fig.dir = NULL, ##<< Directory to store overview plots. If NULL, folder "fig" in current working directory.
  #  plot.ind.country.results = FALSE ##<< Create zillion plots for all countries?
  ## If TRUE, plots are saved in subdirectory "country.plots" in fig.dir.
  plot.CIs  = TRUE, # create contr use overview plots for all countries?
  return.res.paper = FALSE,     #[MCW-2017-03-20-1] :: Setting to 'TRUE' creates data frame with all validation results stored.
  keep.all = FALSE,
  UWRA = FALSE                  # unmarried women run?
  ){
    if (is.null(fig.dir)){
        fig.dir <- file.path(getwd(), "fig/")
        dir.create(fig.dir, showWarnings = FALSE)
    }
                                # put separate?
    if (is.null(output.dir)){
        output.dir <- file.path(getwd(), "output", run.name, "/")
    }

    load(file = file.path(output.dir,"res.country.rda")) # change JR, 20140418
    load(file = file.path(output.dir,"mcmc.meta.rda")) # change JR, 20140418
    validation <- !is.null(mcmc.meta$validation.list)
    validation.list <- mcmc.meta$validation.list

    if(isTRUE(validation.list$at.random.no.data) || isTRUE(validation.list$leave.iso.out)) {
        mcmc.meta$data.raw$data <- rbind(mcmc.meta$data.raw$data
                                        ,mcmc.meta$winbugs.data$data.test)
        mcmc.meta$data.raw$country.info <-
            rbind(mcmc.meta$data.raw$country.info
                 ,mcmc.meta$data.raw$country.info.no.data)
        C <- mcmc.meta$winbugs.data$C + mcmc.meta$winbugs.data$C.no.data
        getc.j.test <-
            c(rep(NA, mcmc.meta$winbugs.data$J)
             ,mcmc.meta$winbugs.data$getc.j.test
              )
    } else {
        C <- mcmc.meta$winbugs.data$C
        getc.j.test <- mcmc.meta$winbugs.data$getc.j
        }
    data <- mcmc.meta$data.raw$data
    country.info <- mcmc.meta$data.raw$country.info
    region.info <- mcmc.meta$data.raw$region.info
                                #This is already the union of regions for
                                #countries with and without data
    winbugs.data <- mcmc.meta$winbugs.data

    ## Test set proportion
    test.prop <-
        with(validation.list, {
            c(exclude.unmet.only.test.prop, at.random.test.prop
             ,at.random.no.data.test.prop)[c(exclude.unmet.only, at.random, at.random.no.data)]
            })

  if (!validation){
    print("Not a validation exercise!")
    return()
  }
  load(file = file.path(output.dir,"Ps_validation.rda")) # change JR, 20140418
  if (validation.list$exclude.unmet.only){
    select.unmet.c <- unique(getc.j.test[winbugs.data$getj.test.unmet.k])
  } else {
    select.unmet.c <- NULL
  }
  if (plot.CIs){
    PlotDataAndEstimates(data.raw = mcmc.meta$data.raw, #country.info = country.info,
                         validation = validation,
                         select.c = select.unmet.c,
                         getj.test.k = mcmc.meta$winbugs.data$getj.test.k,
                         getj.test.unmet.k = mcmc.meta$winbugs.data$getj.test.unmet.k,
                         CI.Lg.Lcat.qt = res.country$CIprop.Lg.Lcat.qt,
                         CIstar.Lg.Lcat.qt = res.country$CIstar.Lg.Lcat.qt,
                         CIratio.Lg.Lcat.qt = res.country$CIratio.Lg.Lcat.qt,
                         fig.name = file.path(fig.dir, paste0(run.name, "CIs.pdf")) # change JR, 20140418
                         ,UWRA = UWRA
                         )
    PlotDataAndEstimates(data.raw = mcmc.meta$data.raw, #country.info = country.info,
                         validation = validation,
                         select.c = select.unmet.c,
                         getj.test.k = mcmc.meta$winbugs.data$getj.test.k,
                         getj.test.unmet.k = mcmc.meta$winbugs.data$getj.test.unmet.k,
                         CI.Lg.Lcat.qt = res.country$CIprop.Lg.Lcat.qt,
                         CIstar.Lg.Lcat.qt = res.country$CIstar.Lg.Lcat.qt,
                         CIratio.Lg.Lcat.qt = res.country$CIratio.Lg.Lcat.qt,
                         fig.name = file.path(fig.dir, paste0(run.name, "CIs_diag.pdf")) # change JR, 20140418
                        ,UWRA = UWRA
                         ,ymin.at.0 = "data", ##<< Set lower bound on y-axis at 0?
                         ,ymax.at.100 = "data", ##<< Set upper bound on y-axis at percent = 100%? Only applies if plot.prop is TRUE.
                         )
  }
  ## open text file to write results
  validation.res.file <- file.path(fig.dir, paste0(run.name, "validationres.html")) # change JR, 20140418
  cat("", file = validation.res.file, append = F)

    ## when summarizing results in table: select only last observation year in each country
    ## Note: can be different for unmet because less observations!
    ## if(isTRUE(validation.list$at.random.no.data)) {
    ##     j.include.c <- j.include.unmet.c <- rep(0, C)
    ##     J <- length(data$props.unmet.j)
    ##     select.unmet.c <- unique(getc.j.test[winbugs.data$getj.test.unmet.k])
    ##     for (c in select.unmet.c) {
    ##         select <-
    ##             seq(1, J)[getc.j.test==c & is.element(seq(1, J), winbugs.data$getj.test.unmet.k) & !is.na(data$props.unmet.j)]
    ##         kk <- which.max(data$years.j[select])
    ##         j.include.unmet.c[c] <- seq(1, J)[select][kk]
    ##     }
    ## } else {
        j.include.c <- j.include.unmet.c <- j.include.c.tot <- rep(0, C)
        J <- length(data$props.unmet.j)
        select.unmet.c <- unique(getc.j.test[winbugs.data$getj.test.unmet.k])
        for (c in select.unmet.c){
            select <- seq(1, J)[getc.j.test==c
                                & is.element(seq(1, J), winbugs.data$getj.test.unmet.k)
                                & !is.na(data$props.unmet.j) ]
            kk <- which.max(data$years.j[select])
            j.include.unmet.c[c] <- seq(1, J)[select][kk]
        }
    ## }
    ## T/F indicator
    include.unmet.j <- is.element(seq(1, length(data$years.j)), j.include.unmet.c)

#   # which country has P.j missing?
#   length(select.unmet.c)
#   sum(include.unmet.j)
#   sum(!is.na(Ps$P.jp[include.unmet.j,3]))
#   sum(!is.na(winbugs.data$logitratio.yunmet.j[include.unmet.j]))
#
#   sum(is.na(Ps$P.jp[include.unmet.j,3]))
#   which.max(is.na(Ps$P.jp[include.unmet.j,3]))
#   getc.j.test[include.unmet.j]
#   data$name.j[include.unmet.j][40]
#  length(include.unmet.j)


  # now make the one for trad/modern
    if (!is.null(winbugs.data$getj.test.k)){
            select.c <- unique(getc.j.test[winbugs.data$getj.test.k[1:winbugs.data$n.test.breakdown]])
            for (c in select.c){
                select <- seq(1, J)[getc.j.test==c
                                    & is.element(seq(1, J), winbugs.data$getj.test.k[1:winbugs.data$n.test.breakdown])
                                    & !is.na(data$props.trad.j)]
                                # most recent when left out at end
                                # random for 20%
                if (length(select)==1){
                    kk <- select # kk different here from above!
                } else {
                    kk <- ifelse(validation.list$at.end, select[which.max(data$years.j[select])],
                                #                  which.min(data$years.j[select]) )
                                #                  which.max(data$years.j[select]) )
                                 sample(select, size = 1) )
                                # watch out: sample (x,1) gives a sample from 1:x!
                }
                j.include.c[c] <- seq(1, J)[kk]
            }
            include.j <- is.element(seq(1, length(data$years.j)), j.include.c)
            if(isTRUE(validation.list$at.random.no.data) || isTRUE(validation.list$leave.iso.out)) {
                idx.tot.j <- seq(1, J)[is.na(data$props.trad.j)]
                idx.tot.k <- (winbugs.data$n.test.breakdown + 1):winbugs.data$J.test
                ## Need to count those with only 'tot' as well.
                select.c <-
                    unique(getc.j.test[winbugs.data$getj.test.k[idx.tot.k]])
                for (c in select.c){
                    select <- seq(1, J)[getc.j.test==c & is.element(seq(1, J)
                                                    ,winbugs.data$getj.test.k[idx.tot.k]
                                                     )]
                                # most recent when left out at end
                                # random for 20%
                    if (length(select)==1){
                        kk <- select # kk different here from above!
                    } else {
                        kk <- sample(select, size = 1)
                                # watch out: sample (x,1) gives a sample from 1:x!
                    }
                    j.include.c.tot[c] <- seq(1, J)[kk]
                }
                include.tot.j <- include.j
                include.tot.j[idx.tot.j] <-
                    is.element(seq(1, length(data$years.j)), j.include.c.tot)[idx.tot.j]
            }
        } else {
            include.tot.j <- include.j <- rep(FALSE, length(data$years.j))
  }
  # check:
  #data.frame(data$iso.j, data$years.j, data$props.modern.j, include.j)
  #data.frame(data$iso.j, data$years.j, data$props.unmet.j, include.unmet.j)

    nameplot2 <-
        ifelse(mcmc.meta$validation.list$exclude.unmet.only, "Unmet only",
        ifelse(mcmc.meta$validation.list$at.random, paste0(test.prop * 100, "%"),
        ifelse(mcmc.meta$validation.list$at.end
              ,paste("Up to (excl) year", mcmc.meta$validation.list$year.cutoff),
        ifelse(mcmc.meta$validation.list$at.random.no.data, paste0(test.prop * 100, "% (no data exercise)")
              ,NA
               ))))
    res.paper <- NULL
        CI.df <- Error.df <- data.frame()
  pdf(file.path(fig.dir, paste0(run.name, "propoutside.pdf")), width = 16, height = 10) # change JR, 20140418
    for (p in 1:4){
        if (p==4){
            includeforp.j <- include.unmet.j
        } else if (p==3 && (isTRUE(validation.list$at.random.no.data) || isTRUE(validation.list$leave.iso.out))) {
            includeforp.j <- include.tot.j
        } else {
            includeforp.j <- include.j
        }
            if(!(p == 4 && validation.list$leave.iso.out)) {
        P.j <- Ps$P.jp[,p]
        ##error.j <- Ps$error.jp[,p]
        error.est.j <- Ps$error.est.jp[,p]
        nameplot1s <- c("Traditional", "Modern", "Total", "Unmet")
        if (sum(!is.na(P.j))>0){
            nameplot1 <- nameplot1s[p]
            res <- PlotValResults(include.j = includeforp.j, P.j = P.j,
                                  data = data,
                                  country.info = country.info,
                                  winbugs.data = winbugs.data,
                                  nameplot = paste(nameplot1, nameplot2, sep = " - "),
                                  error.est.j = error.est.j
                                 ,validation.list = validation.list)
            ## print table: html and tex
            print(xtable::xtable(res$table.ix, ##digits = c(0,0,0,0,0),
                         caption = paste(nameplot1, nameplot2)),
                  file = validation.res.file, append = T, type = "html")
            print(xtable::xtable(res$table2.ix, ##digits = c(0,0,0,0,0),
                         caption = paste(nameplot1, nameplot2)),
                  type="html", file = validation.res.file, append = T)
            addpaper <-unlist( c(
                res$table.ix[1, c("# Obs")],
                100*res$table.ix[1, c("Below 95% CI","Within 95% CI","Above 95% CI")] , # first one is all obs
                100*unlist(res$table2.ix[1, c("Prop below median")]),
                res$table2.ix[1, c("Median error","Median abs error")]
            ))
            if (is.null(res.paper)){
                res.paper <- addpaper
            } else {
                res.paper <- rbind(res.paper, addpaper)
            }
            if(keep.all) {
                CI.df <-
                    rbind(CI.df
                         ,data.frame(res$table.ix, group = rownames(res$table.ix)
                                    ,indicator = nameplot1s[p]
                                    ,check.names = FALSE
                                    ,row.names = NULL
                                     )
                          )
                Error.df <-
                    rbind(Error.df
                         ,data.frame(res$table2.ix
                                    ,group = rownames(res$table2.ix)
                                    ,indicator = nameplot1s[p]
                                    ,check.names = FALSE
                                    ,row.names = NULL
                                     )
                          )
            }
            }
            }
    }
    dev.off()
  if (prod(dim(res.paper))>1) rownames(res.paper) <- nameplot1s[1:nrow(res.paper)]
  # # plot(exp(Ps$error.j$error.trad.j) ~ data$props.trad.j)
  # # plot(exp(Ps$error.j$error.trad.j[ssa.j=="Yes"]) ~ data$props.trad.j[ssa.j=="Yes"])
  # # plot(Ps$error.j$error.trad.j ~ data$props.trad.j)
  # # plot(Ps$error.j$error.trad.j ~ winbugs.data$ratios.trad.modern.jn[,1])
  # # plot(Ps$error.j$error.modern.j ~ winbugs.data$ratios.trad.modern.jn[,2])
  # # plot(Ps$error.j$error.unmet.j ~ winbugs.data$logitratio.yunmet.j)
  #
  # #---------------------------------------------------------------------------------
  # # plot two sets of estimates for comparison
  # load(file = file.path(work.dir.save, paste0(name.dir.all,"CIs.rda")))
  # CIs.all <- CIs
  # load(file.path(work.dir.save, paste0(name.dir, "CIs.rda")))
  # CIs.tr <- CIs
  #
  # pdf(file.path(work.dir.save, paste0(name.dir, "_comparison_", name.dir.all, "CIs.pdf")), width = 21, height = 12)
  #   PlotDataAndEstimates(data = data, country.info = country.info,
  #                     # start.year = start.year, end.year = end.year,
  #                            # par.ciq = par.ciq, plot.blue.line = FALSE,
  #                             validation = validation,
  #                             getj.test.k = winbugs.data$getj.test.k,
  #                             getj.test.unmet.k = winbugs.data$getj.test.unmet.k,
  #                             CIs = CIs.all, CIs2 = CIs.tr,
  #                             name.dir = name.dir.all, name.dir2 = name.dir,
  #                             overview.country.plot = F, country.plot = F)
  # dev.off()

  #---------------------------------------------------------------------------------------------
  # Compare CIs in comp.year (e.g. 2008)
  # when leaving out data at the end for countries where data was left-out
  # or for unmet
  # a. get countries where data was left out
  # Note: for unmet we use the same countries as where trad/mod were left out
  # if (!is.null(winbugs.data$getj.test.k)){
  #   select.c <- unique(getc.j.test[winbugs.data$getj.test.k[1:winbugs.data$n.test.breakdown]])
  # } else {
  #   select.c <- unique(getc.j.test[winbugs.data$getj.test.unmet.k])
  # }
  #
  # comp.year <- 2008.5
  # # b. results
  # # diff = all - training
  # # all (updated) CI outside old (training) 80% CI?
  # diff.cp <- above80.cp <- below80.cp <- matrix(NA, length(country.info$iso.c),4) # trad, mod, tot, unmet
  # for (c in select.c){
  #   for (p in 1:4){
  #     CIall.median <- ifelse(p==1, CIs.all$CIs.trad.cqt[c,3,CIs.all$est.years==comp.year],
  #                     ifelse(p==2, CIs.all$CIs.modern.cqt[c,3,CIs.all$est.years==comp.year],
  #                     ifelse(p==3, CIs.all$CIs.tot.cqt[c,3,CIs.all$est.years==comp.year],
  #                       CIs.all$CIs.unmet.cqt[c,3,CIs.all$est.years==comp.year])))
  #     CItr.q <- ifelse(rep(p,5)==1, CIs.tr$CIs.trad.cqt[c,,CIs.tr$est.years==comp.year],
  #                     ifelse(rep(p,5)==2, CIs.tr$CIs.modern.cqt[c,,CIs.tr$est.years==comp.year],
  #                     ifelse(rep(p,5)==3, CIs.tr$CIs.tot.cqt[c,,CIs.tr$est.years==comp.year],
  #                       CIs.tr$CIs.unmet.cqt[c,,CIs.tr$est.years==comp.year])))
  #     diff.cp[c,p] <- CIall.median - CItr.q[3]
  #     below80.cp[c,p] <- ifelse ((CIall.median < CItr.q[2]), 1, 0)
  #     above80.cp[c,p] <- ifelse ((CIall.median > CItr.q[4]), 1, 0)
  # }}
  #
  # # specify subgroups of left-out observations:
  # reg.c <-  country.info$reg.c
  # ssa.c <- country.info$ssa.c
  # dev.c <- country.info$dev.c
  # select.ci <- cbind(rep(TRUE, C), ssa.c=="No", ssa.c=="Yes",
  #                    dev.c=="Poor", dev.c=="Rich")
  # namesselect <- c("All", "OutsideSSA", "SSA", "Poor", "Rich")
  # I <- dim(select.ci)[2] # number of subgroups
  # # already defined
  # # nameplot2 <-ifelse(mcmc.meta$validation.list$exclude.unmet.only, "Unmet only",
  # #                    ifelse(mcmc.meta$validation.list$at.random, "20%",
  # #                     ifelse(mcmc.meta$validation.list$at.end,  paste("Up to (excl) year",
  # #                     mcmc.meta$validation.list$year.cutoff),NA)))
  # for (p in 1:4){
  #   table.ix <- NULL # x refers to table columns (outputs)
  #   nameplot1 <- c("Traditional", "Modern", "Total", "Unmet")[p]
  #   for (i in 1:I){
  #     select <- select.ci[,i]
  #     table.ix <- rbind(table.ix,
  #                     c(sum(!is.na(diff.cp[select,p])),
  #                       mean(diff.cp[select,p], na.rm = T), mean(abs(diff.cp[select,p]), na.rm = T),
  #                       mean(below80.cp[select,p], na.rm = T), mean(above80.cp[select,p], na.rm = T)))
  #   }
  #   nobs.i <- table.ix[, 1]
  #   rownames(table.ix) <- paste0(namesselect, " (", nobs.i, ")")
  #   colnames(table.ix) <- c("# Obs", "Mean diff", "Medan abs diff", "% Below 80CI", "% Above 80CI")
  #   # print table
  #   print(xtable::xtable(table.ix, #digits = c(0,0,0,0,0),
  #                 caption = paste("Change in median/CI: Results for ", comp.year, ", ", nameplot1, " (",
  #                                 nameplot2, ")")),
  #                 file = validation.res.file, append = T, type = "html")
  # }
  # # plots
  # # hist(diff.cp[,1])
  if (return.res.paper){
    propleftoutunmet <- (sum(!is.na(winbugs.data$logitratio.yunmet.j[winbugs.data$getj.test.unmet.k]))/
          sum(!is.na(winbugs.data$logitratio.yunmet.j)      ))
    propleftoutmod <-  (sum(!is.na(winbugs.data$ratios.trad.modern.jn[,2][winbugs.data$getj.test.k]))/
           sum(!is.na(winbugs.data$ratios.trad.modern.jn[,2])      ))

    return(  list(res.paper = res.paper, propleftoutunmet = propleftoutunmet,
                  propleftoutmod = propleftoutmod,
                  nunmetleftout = length(select.unmet.c ),
                  nunmetleftout2 = sum(include.unmet.j)))
                  #nleftout = length(select.c ))) # matches up for non-unmet exe, error for unmet exercise
    # select.unmet.c relevant for unmet val exericise only
  }
    if(keep.all){
        validation.results <- list(CI.df, Error.df)
        save.path <- file.path(output.dir, "validn_repts"
                             ,"validation.results.rda"
                              )
        save(validation.results
            ,file = save.path)
        message("Validation results saved to ", save.path)
        }
}# end non-validation results
##----------------------------------------------------------------------
##' Plot samples from predictive densities with data point overlaid.
##'
##' A histogram of the posterior predictive density for each observed
##' data point is produced with the observed value itself marked on
##' the plot.
##'
##' .. content for \details{} ..
##' @param run.name
##' @param output.dir
##' @return
##' @author Mark Wheldon with lots taken from other ContraceptiveUse functions
##' @noRd
PlotPredDens <- function(run.name = "test", ##<< Run name
                         output.dir = NULL,
                         fig.dir = NULL ##<< Directory where MCMC array and meta are stored.
                         ) {

    ## -------* SET UP

    load(file = file.path(output.dir,"mcmc.meta.rda")) # change JR, 20140418
    if(!is.null(mcmc.meta$validation.list)) {
        warning("Predictive density plots only available for non-validation runs.")
        return(invisible())
    }
    if(mcmc.meta$general$do.country.specific.run) {
        warning("Predictive density plots not available for one country runs.")
        return(invisible())
    }
    if(!ModelFunctionPredDens(mcmc.meta$general$write.model.fun)) {
        warning(paste0("Predictive density plots only available for the following types of model:\n"
                      ,paste(ModelFunctionPredDens(mcmc.meta$general$write.model.fun
                                                  ,return.functions = TRUE)
                             ,collapse = ", "
                             ))
                ,collapse = ", ")
        return(invisible())
    }

    load(file = file.path(output.dir, "mcmc.array.rda"))

    if (is.null(output.dir)){
        output.dir <- file.path(getwd(), "output", run.name, "/")
    }
    if(is.null(fig.dir)) {
        fig.dir <- file.path(output.dir, "fig")
    }

    winbugs.data <- mcmc.meta$winbugs.data
    parnames.list <- mcmc.meta$parnames.list

    mod.fit.dir <- file.path(output.dir, "model_fit")
    if(!dir.exists(file.path(mod.fit.dir))) dir.create(file.path(mod.fit.dir), recursive = TRUE)

    ## -------* PLOTS and P-Values

    ## -------** Unmet

    pvals.df <- data.frame()

    pdf(width = 12, height = 12
       ,file = file.path(mod.fit.dir, "post_pred_samples_unmet.pdf"))
    n.pages <- ceiling(winbugs.data$n.training.unmet / 25)
    for(p in 1:n.pages) {
        par(mfrow = c(5, 5))
        for(k in (1 + (p - 1) * 25):min(winbugs.data$n.training.unmet, p * 25)) {
            parname <- paste0("pred.logitratio.yunmet.j["
                             ,winbugs.data$getj.training.unmet.k[k]
                             ,"]")
            j.c.name <-
                mcmc.meta$data.raw$data[winbugs.data$getj.training.unmet.k[k], "name.j"]
            j.year <-
                mcmc.meta$data.raw$data[winbugs.data$getj.training.unmet.k[k], "years.j"]
            obs <- winbugs.data$logitratio.yunmet.j[winbugs.data$getj.training.unmet.k[k]]
            pvals <- data.frame(parname = parname, Pr.lt = mean(mcmc.array[,,parname] < obs)
                               ,Pr.gt = mean(mcmc.array[,,parname] > obs))
            pvals.df <- rbind(pvals.df, pvals)
            hist(mcmc.array[,,parname], freq = FALSE
                ,xlim = c(min(obs, mcmc.array[,,parname]) - 0.2 * abs(min(obs, mcmc.array[,,parname]))
                         ,max(obs, mcmc.array[,,parname]) + 0.2 * abs(max(obs, mcmc.array[,,parname])))
                ,main = paste(parname, "\n", abbreviate(j.c.name, 15), " "
                             ,round(j.year, 1))
                ,xlab = "", sub = paste0("Pr < obs ", round(pvals$Pr.lt, 2), ". Pr > obs ", round(pvals$Pr.gt, 2)))
            lines(density(mcmc(mcmc.array[,,parname])))
            abcol <- "blue"
            if(pvals[2] < 0.05 || pvals[3] < 0.05) abcol <- "red"
            abline(v = obs, col = abcol, lwd = 2)
        }
    }
    dev.off()
    write.csv(pvals.df, file = file.path(mod.fit.dir, "post_pred_samples_unmet.csv")
             ,row.names = FALSE)

    ## -------** Mod/Trad Breakdown

    pvals.df <- data.frame()

    pdf(width = 12, height = 12
       ,file = file.path(mod.fit.dir, "post_pred_samples_bdown.pdf"))
    n.pages <- ceiling(2 * winbugs.data$n.training.breakdown / 24)
    for(p in 1:n.pages) {
        par(mfrow = c(5, 5))
        for(k in (1 + (p - 1) * 24 / 2):(min(winbugs.data$n.training.breakdown, p * 24 / 2))) {

            j.c.name <-
                mcmc.meta$data.raw$data[winbugs.data$getj.training.k[k], "name.j"]
            j.year <-
                mcmc.meta$data.raw$data[winbugs.data$getj.training.k[k], "years.j"]

            ## Trad '[k,1]'
            parname <- paste0("pred.ratios.trad.modern.jn["
                             ,winbugs.data$getj.training.k[k]
                             ,",1]")
            obs <- winbugs.data$ratios.trad.modern.jn[winbugs.data$getj.training.k[k], 1]
            pvals <- data.frame(parname = parname, Pr.lt = mean(mcmc.array[,,parname] < obs)
                               ,Pr.gt = mean(mcmc.array[,,parname] > obs))
            pvals.df <- rbind(pvals.df, pvals)
            hist(mcmc.array[,,parname], freq = FALSE
                ,xlim = c(min(obs, mcmc.array[,,parname]) - 0.2 * abs(min(obs, mcmc.array[,,parname]))
                         ,max(obs, mcmc.array[,,parname]) + 0.2 * abs(max(obs, mcmc.array[,,parname])))
                ,main = paste(parname, "\n", abbreviate(j.c.name, 15), " "
                             ,round(j.year, 1))
                ,xlab = "", sub = paste0("Pr < obs ", round(pvals$Pr.lt, 2), ". Pr > obs ", round(pvals$Pr.gt, 2)))
            lines(density(mcmc(mcmc.array[,,parname])))
            abcol <- "blue"
            if(pvals[2] < 0.05 || pvals[3] < 0.05) abcol <- "red"
            abline(v = obs, col = abcol, lwd = 2, lty = 2) #different lty to distinguish easily

            ## Modern '[k,2]'
            parname <- paste0("pred.ratios.trad.modern.jn["
                             ,winbugs.data$getj.training.k[k]
                             ,",2]")
            obs <- winbugs.data$ratios.trad.modern.jn[winbugs.data$getj.training.k[k], 2]
            pvals <- data.frame(parname = parname, Pr.lt = mean(mcmc.array[,,parname] < obs)
                               ,Pr.gt = mean(mcmc.array[,,parname] > obs))
            pvals.df <- rbind(pvals.df, pvals)
            hist(mcmc.array[,,parname], freq = FALSE
                ,xlim = c(min(obs, mcmc.array[,,parname]) - 0.2 * abs(min(obs, mcmc.array[,,parname]))
                         ,max(obs, mcmc.array[,,parname]) + 0.2 * abs(max(obs, mcmc.array[,,parname])))
                ,main = paste(parname, "\n", abbreviate(j.c.name, 15), " "
                             ,round(j.year, 1))
                ,xlab = ""
                ,sub = paste0("Pr < obs ", round(pvals$Pr.lt, 2), ". Pr > obs ", round(pvals$Pr.gt, 2)))
            lines(density(mcmc(mcmc.array[,,parname])))
            abcol <- "blue"
            if(pvals[2] < 0.05 || pvals[3] < 0.05) abcol <- "red"
            abline(v = obs, col = abcol, lwd = 2)
        }

        plot(1, type = "n", xlab = "", ylab = "", axes = F)
        legend("topright", lty = c(1, 2), col = "blue", legend = c("Mod", "Trad"))
        legend("bottomleft", lty = c(1, 2), col = "red", legend = c("Pr </> 0.05", "Pr </> 0.05"))
    }
    dev.off()
    write.csv(pvals.df, file = file.path(mod.fit.dir, "post_pred_samples_bdown.csv")
             ,row.names = FALSE)

    ## -------** No Mod/Trad Breakdown

    pvals.df <- data.frame()

    pdf(width = 12, height = 12
       ,file = file.path(mod.fit.dir, "post_pred_samples_tot.pdf"))
    n.pages <- ceiling(winbugs.data$n.training.tot / 25)
    for(p in 1:n.pages) {
        par(mfrow = c(5, 5))
        for(k in (1 + (p - 1) * 25):min(winbugs.data$n.training.tot, p * 25)) {
            j.c.name <-
                mcmc.meta$data.raw$data[winbugs.data$getj.training.tot.k[k], "name.j"]
            j.year <-
                mcmc.meta$data.raw$data[winbugs.data$getj.training.tot.k[k], "years.j"]
            parname <- paste0("pred.ytot.j["
                             ,winbugs.data$getj.training.tot.k[k]
                             ,"]")
            obs <- winbugs.data$logit.ytot.j[winbugs.data$getj.training.tot.k[k]]
            pvals <- data.frame(parname = parname, Pr.lt = mean(mcmc.array[,,parname] < obs)
                               ,Pr.gt = mean(mcmc.array[,,parname] > obs))
            pvals.df <- rbind(pvals.df, pvals)
            hist(mcmc.array[,,parname], freq = FALSE
                ,xlim = c(min(obs, mcmc.array[,,parname]) - 0.2 * abs(min(obs, mcmc.array[,,parname]))
                         ,max(obs, mcmc.array[,,parname]) + 0.2 * abs(max(obs, mcmc.array[,,parname])))
                ,main = paste(parname, "\n", abbreviate(j.c.name, 15), " "
                             ,round(j.year, 1))
                ,xlab = "", sub = paste0("Pr < obs ", round(pvals$Pr.lt, 2), ". Pr > obs ", round(pvals$Pr.gt, 2)))
            lines(density(mcmc(mcmc.array[,,parname])))
            abcol <- "blue"
            if(pvals[2] < 0.05 || pvals[3] < 0.05) abcol <- "red"
            abline(v = obs, col = abcol, lwd = 2)
        }
    }
    dev.off()
    write.csv(pvals.df, file = file.path(mod.fit.dir, "post_pred_samples_tot.csv")
             ,row.names = FALSE)

    message("Histograms and csv files saved to ", file.path(mod.fit.dir))

}
##----------------------------------------------------------------------

## Compute information criteria (AIC, WAIC, BIC).
##
## .. content for \description{} (no empty lines) ..
##
## .. content for \details{} ..
## @param run.name
## @param output.dir
## @return
## @author Mark Wheldon with lots taken from other ContraceptiveUse functions
## @noRd
ComputeInformationCriteria <-
    function(run.name = "test", ##<< Run name
             output.dir = NULL
             ,fig.dir = NULL) {##<< Directory where MCMC array and meta are stored.

        ## -------* SET UP

        load(file = file.path(output.dir,"mcmc.meta.rda")) # change JR, 20140418
        winbugs.data <- mcmc.meta$winbugs.data
        if(!is.null(mcmc.meta$validation.list)) {
            warning("Information criteria only available for non-validation runs.")
            return(invisible())
        }

        if(mcmc.meta$general$do.country.specific.run) {
            warning("Information criteria not available for one country runs.")
            return(invisible())
        }

    if(!ModelFunctionPredDens(mcmc.meta$general$write.model.fun)) {
        warning(paste("Predictive density plots only available for the following types of model:\n"
                      ,ModelFunctionPredDens(mcmc.meta$general$write.model.fun
                                            ,return.functions = TRUE)
                             ,collapse = ", "
                             ))
        return(invisible())
    }

        load(file = file.path(output.dir, "mcmc.array.rda"))

        if (is.null(output.dir)){
            output.dir <- file.path(getwd(), "output", run.name, "/")
        }

        winbugs.data <- mcmc.meta$winbugs.data
        parnames.list <- mcmc.meta$parnames.list

        mod.fit.dir <- file.path(output.dir, "model_fit")
        if(!dir.exists(file.path(mod.fit.dir))) dir.create(file.path(mod.fit.dir), recursive = TRUE)

        ## -------* MAIN

        ## -------** WAIC

        ## From Gelman et al. BDA Ed 3

        ## Indices of log-densities in 'mcmc.array'
        unmet.idx <- which(dimnames(mcmc.array)[[3]] %in% parnames.list$parnames.logdens.unmet)
        bdown.idx <- which(dimnames(mcmc.array)[[3]] %in% parnames.list$parnames.logdens.bdown)
        tot.idx <- which(dimnames(mcmc.array)[[3]] %in% parnames.list$parnames.logdens.tot)

        ## Pointwise predictive density
        ppd <-
            apply(mcmc.array[,,c(unmet.idx, bdown.idx, tot.idx)], 3
                , function(z) mean(exp(z)))
                                #take 'exp' because elements are log-densities
                                #takes average over posterior of theta

        ## Log pointwise predictive density
        lppd <-
            apply(mcmc.array[,,c(unmet.idx, bdown.idx, tot.idx)], 3
                , function(z) mean(z))
                                #don't exponentiate because already log-densities
                                #takes average over posterior of theta

        ## WAIC Correction version 1
        pWAIC1 <- 2 * sum(log(ppd) - lppd)

        ## WAIC Correction version 2
        pWAIC2 <- sum(apply(mcmc.array[,,c(unmet.idx, bdown.idx, tot.idx)], 3
                          , function(z) var(as.numeric(z))))
                                # 'as.numeric' because z is a matrix;
                                # 'var' would then return a var-covar
                                # matrix

        ## WAICs
        WAIC1 <- -2 * sum(lppd) + 2 * pWAIC1
        WAIC2 <- -2 * sum(lppd) + 2 * pWAIC2

        ## Save
        out <- data.frame(WAIC1 = WAIC1, WAIC2 = WAIC2)
        write.csv(out, file = file.path(mod.fit.dir, "WAIC.csv")
                 ,row.names = FALSE)

        cat("\n--------------------\nWAIC(1) =", WAIC1, "\nWAIC(2) =", WAIC2, "\n--------------------\n")

        return(invisible(out))

        }
##----------------------------------------------------------------------
## Randomly partition a vector into 'n.parts'.
RandomlyPartition <-
    function(x, n.parts = 10, seed = 1) {
        set.seed(seed)

        lx <- length(x)

        ## Randomly re-order
        x <- sample(x, size = lx, replace = FALSE)

        ## Now cut into 'n.parts'
        splits <- as.numeric(cut(1:lx, n.parts))
        tapply(x, splits, "[", simplify = FALSE)
        }
##----------------------------------------------------------------------
## The End!
FPcounts/FPEMglobal documentation built on July 20, 2024, 2:35 a.m.