R/ACC.R
In s2dverification: Set of Common Tools for Forecast Verification

Documented in ACC

#'Computes Anomaly Correlation Coefficient
#'
#'Calculates the Anomaly Correlation Coefficient for the ensemble mean of 
#'each model and the corresponding references for each startdate and each 
#'leadtime.\cr
#'The domain of interest can be specified by providing the list 
#'of longitudes/latitudes (lon/lat) of the grid together with the corners 
#'of the domain:\cr
#' lonlatbox = c(lonmin, lonmax, latmin, latmax).
#'
#'@param var_exp Array of experimental anomalies with dimensions: 
#'  c(nexp, nsdates, nltimes, nlat, nlon) or 
#'  c(nexp, nsdates, nmembers, nltimes, nlat, nlon).
#'@param var_obs Array of observational anomalies, same dimensions as var_exp 
#'  except along the first dimension and the second if it corresponds to the 
#'  member dimension.
#'@param lon Array of longitudes of the var_exp/var_obs grids, optional.
#'@param lat Array of latitudes of the var_exp/var_obs grids, optional.
#'@param lonlatbox Domain to select: c(lonmin, lonmax, latmin, latmax), 
#'  optional.
#'@param conf TRUE/FALSE: confidence intervals and significance level 
#'  provided or not.
#'@param conftype "Parametric" provides a confidence interval for the ACC 
#'  computed by a Fisher transformation and a significance level for the ACC 
#'  from a one-sided student-T distribution. "Bootstrap" provides a confidence 
#'  interval for the ACC and MACC computed from bootstrapping on the members 
#'  with 100 drawings with replacement. To guarantee the statistical robustness 
#'  of the result, make sure that your experiments/oservations/startdates/
#'  leadtimes always have the same number of members.
#'@param siglev The confidence level for the computed confidence intervals.
#'
#'@return 
#'\item{ACC}{
#'  If \code{conf = TRUE}, array with dimensions: 
#'  c(nexp, nobs, nsdates, nleadtimes, 4) 
#'  The fifth dimension of length 4 corresponds to the lower limit of the 
#'  \code{siglev}\% confidence interval, the ACC, the upper limit of the 
#'  \code{siglev}\% confidence interval and the \code{siglev}\% significance 
#'  level. If \code{conf = FALSE}, the array of the Anomaly Correlation 
#'  Coefficient has dimensions c(nexp, nobs, nsdates, nleadtimes).
#'}
#'\item{MACC}{
#'  The array of the Mean Anomaly Correlation Coefficient with dimensions 
#'  c(nexp, nobs, nleadtimes).
#'}
#'
#'@examples
#'# See ?Load for explanations on the first part of this example.
#'  \dontrun{
#'data_path <- system.file('sample_data', package = 's2dverification')
#'expA <- list(name = 'experiment', path = file.path(data_path, 
#'             'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly',
#'             '$VAR_NAME$_$START_DATE$.nc'))
#'obsX <- list(name = 'observation', path = file.path(data_path,
#'             '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$',
#'             '$VAR_NAME$_$YEAR$$MONTH$.nc'))
#'
#'# Now we are ready to use Load().
#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101')
#'sampleData <- Load('tos', list(expA), list(obsX), startDates, 
#'                   leadtimemin = 1, leadtimemax = 4, output = 'lonlat',
#'                   latmin = 27, latmax = 48, lonmin = -12, lonmax = 40)
#'  }
#'  \dontshow{
#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101')
#'sampleData <- s2dverification:::.LoadSampleData('tos', c('experiment'),
#'                                                c('observation'), startDates,
#'                                                leadtimemin = 1,
#'                                                leadtimemax = 4,
#'                                                output = 'lonlat',
#'                                                latmin = 27, latmax = 48,
#'                                                lonmin = -12, lonmax = 40)
#'  }
#'sampleData$mod <- Season(sampleData$mod, 4, 11, 12, 2)
#'sampleData$obs <- Season(sampleData$obs, 4, 11, 12, 2)
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'ano_exp <- Ano(sampleData$mod, clim$clim_exp)
#'ano_obs <- Ano(sampleData$obs, clim$clim_obs)
#'acc <- ACC(Mean1Dim(ano_exp, 2), Mean1Dim(ano_obs, 2))
#'  \donttest{
#'PlotACC(acc$ACC, startDates)
#'  }
#'@references Joliffe and Stephenson (2012). Forecast Verification: A 
#'  Practitioner's Guide in Atmospheric Science. Wiley-Blackwell.
#'@keywords datagen
#'@author 
#'History:\cr
#'  0.1 - 2013-08 (V. Guemas) - Original code\cr
#'  1.0 - 2013-09 (N. Manubens) - Formatting to CRAN\cr
#'  1.1 - 2013-09 (C. Prodhomme) - optimization\cr
#'  1.2 - 2014-08 (V. Guemas) - Bug-fixes: handling of NA & selection of domain + Simplification of code\cr
#'  1.3.0 - 2014-08 (V. Guemas) - Boostrapping over members\cr
#'  1.3.1 - 2014-09 (C. Prodhomme) - Add comments and minor style changes\cr
#'  1.3.2 - 2015-02 (N. Manubens) - Fixed ACC documentation and examples
#'@importFrom abind abind
#'@importFrom stats qt qnorm quantile
#'@export
ACC <- function(var_exp, var_obs, lon = NULL, lat = NULL,
                lonlatbox = NULL, conf = TRUE, conftype = "parametric",
                siglev = 0.95) {
  #library(abind)

  # Security checks and getting dimensions
  dimsvar <- dim(var_exp)
  if (length(dimsvar) == 5) {
    checkfirst <- 2
  }else if (length(dimsvar) == 6) {
    checkfirst <- 3
    nmembexp <- dimsvar[2]
    nmembobs <- dim(var_obs)[2]
  }else{
    stop("var_exp & var_obs should have dimensions (nexp/nsobs, nsdates, nltimes, nlat, nlon) 
                          or  dimensions (nexp/nsobs, nmembers, nsdates, nltimes, nlat, nlon) ")
  }
  for (iind in checkfirst:length(dimsvar)) {
    if (dim(var_obs)[iind] != dimsvar[iind]) {
      stop("var_exp & var_obs must have same dimensions except the first one (number of experiments or number of observational datasets) ")
    }
  }
  nexp <- dimsvar[1]
  nobs <- dim(var_obs)[1]
  nsdates <- dimsvar[checkfirst]
  nltimes <- dimsvar[checkfirst+1]
  nlat <- dimsvar[checkfirst+2]
  nlon <- dimsvar[checkfirst+3]

  # Selecting the domain
  if (is.null(lon) == FALSE & is.null(lat) == FALSE & 
      is.null(lonlatbox) == FALSE) {
    for (jind in 1:2) {
      while (lonlatbox[jind] < 0) {
        lonlatbox[jind] <- lonlatbox[jind] + 360
      }
      while (lonlatbox[jind] > 360) {
        lonlatbox[jind] <- lonlatbox[jind] - 360
      }
    }
    indlon <- which((lon >= lonlatbox[1] & lon <= lonlatbox[2]) | 
                    (lonlatbox[1] > lonlatbox[2] & (lon > lonlatbox[1] | 
                                                    lon < lonlatbox[2])))
    indlat <- which(lat >= lonlatbox[3] & lat <= lonlatbox[4])
  } else {
    indlon <- 1:nlon
    indlat <- 1:nlat
  }

  # Defining the outputs
  if(conf == TRUE) {
    ACC <- array(NA, dim = c(nexp, nobs, nsdates, nltimes, 4))
  } else {
    ACC <- array(NA, dim = c(nexp, nobs, nsdates, nltimes))
  }         
  MACCaux <- array(0, dim = c(nexp, nobs, nsdates, nltimes, 3))

  # Selecting the domain and preparing the ensemble-mean
  if (length(dimsvar) == 6) {
    var_exp <- array(var_exp[,,,,indlat, indlon], 
                     dim = c(nexp, nmembexp, nsdates, nltimes, 
                                length(indlat), length(indlon)))
    var_obs <- array(var_obs[,,,,indlat, indlon], 
                     dim = c(nobs, nmembobs, nsdates, nltimes, 
                                length(indlat), length(indlon)))
    tmp01 <- Mean1Dim(var_exp,2)
    tmp02 <- Mean1Dim(var_obs,2)
  }else{
    var_exp <- array(var_exp[,,,indlat, indlon], 
                     dim = c(nexp, nsdates, nltimes, 
                                length(indlat), length(indlon)))
    var_obs <- array(var_obs[,,,indlat, indlon], 
                     dim = c(nobs, nsdates, nltimes, 
                                length(indlat), length(indlon)))
    tmp01 <- var_exp
    tmp02 <- var_obs
  } 

  for( iobs in 1:nobs) {
    for( iexp in 1:nexp) {

      # tmp1 and tmp2 are splitted to handle NA before building tmp
      tmp1 <- array(tmp01[iexp, , , , ], dim = c(1, nsdates, nltimes,
                                    length(indlon) * length(indlat))) 
      tmp2 <- array(tmp02[iobs, , , , ], dim = c(1, nsdates, nltimes,
                                    length(indlon) * length(indlat)))

      # Variance(tmp1)should not take into account any point 
      # that is not available in tmp2 and therefore not accounted for 
      # in covariance(tmp1,tmp2) and vice-versa 
      tmp1[ is.na(tmp2) ] <- NA
      tmp2[ is.na(tmp1) ] <- NA

      tmp <- abind(tmp1, tmp2, along = 1)

      top <- apply(tmp, c(2, 3), function(x)
                      sum(x[1, ]*x[2, ], na.rm = TRUE) )

      bottom1 <- apply(tmp, c(2, 3), function(x)
                      sum(x[1, ]*x[1, ], na.rm = TRUE) )

      bottom2 <- apply(tmp, c(2, 3), function(x)
                      sum(x[2, ]*x[2, ], na.rm = TRUE) )
    
      bottom <- sqrt(bottom1 * bottom2 )
      
      ACCaux <- top / bottom

      #handle NA
      tmpallNA <- which(is.na(bottom) | bottom == 0)
      ACCaux[tmpallNA] <- NA
      
      top[tmpallNA] = NA
      bottom1[tmpallNA] = NA
      bottom2[tmpallNA] = NA

      #store the value to calculate the MACC
      MACCaux[iexp, iobs, , , 1] <- top
      MACCaux[iexp, iobs, , , 2] <- bottom1
      MACCaux[iexp, iobs, , , 3] <- bottom2
      
      if (conf == TRUE) {
        ACC[iexp, iobs, , , 2] <- ACCaux
        
        #calculate parametric confidence interval
        if (conftype == "parametric") {
        
          eno <- Mean1Dim( Eno(tmp2, 4), 1)
          t <- apply(eno, c(1, 2), 
                   function(x) qt(siglev, x - 2))
          enot <- abind(eno, t, along = 3)

          ACC[iexp, iobs, , , 4] <- apply(enot, c(1, 2), function(x)
                 sqrt((x[2] * x[2]) / ((x[2] * x[2]) + x[1] - 2)))
        
          correno <- abind(ACCaux, eno, along = 3)
        
          ACC[iexp, iobs, , , 1] <- apply(correno, c(1, 2), function(x)
                  tanh(atanh(x[1]) + qnorm(1 - (1 - siglev) / 2) / sqrt(x[2] - 3)))
          ACC[iexp, iobs, , , 3] <- apply(correno, c(1, 2), function(x)
                  tanh(atanh(x[1]) + qnorm((1 - siglev) / 2) / sqrt(x[2] - 3)))
        }
      } else {
        ACC[iexp, iobs, , ] <- ACCaux
      }

    }
  }

  #   #na.rm should be TRUE to obtain a MACC even if a few
  #   #start dates are missing 
  topfinal <- apply(MACCaux, c(1, 2, 4), function(x) 
                                 sum(x[, 1], na.rm = TRUE) )
               
  bottomfinal <- apply(MACCaux, c(1, 2, 4), function(x) 
               sqrt(sum(x[, 2], na.rm = TRUE) * sum(x[, 3], na.rm = TRUE)))

  #to avoid that some NA are called NaN or Inf
  tmpNA <- which(is.na(bottomfinal) | bottomfinal == 0)

  MACC <- topfinal / bottomfinal 
  MACC[tmpNA] <- NA  
  
  if (conf == TRUE & conftype == "bootstrap") {
    if (length(dimsvar) != 6) {
      stop("Var_exp and var_obs must have a member dimension")
    }
    ndraw <- 100
    #create the matrix to store the random values
    ACC_draw  = array(dim=c(nexp,nobs,nsdates,nltimes,ndraw))
    MACC_draw = array(dim=c(nexp,nobs,nltimes,ndraw))

    #put the member dimension first
    var_exp <- aperm(var_exp, c(2, 1, 3, 4, 5, 6))
    var_obs <- aperm(var_obs, c(2, 1, 3, 4, 5, 6))

    for (jdraw in 1:ndraw) {

      #choose a randomly member index for each point of the matrix 
      indexp <- array(sample(nmembexp, size = (nexp*nmembexp*nsdates*nltimes), 
                             replace = TRUE), dim = c(nmembexp, nexp, nsdates, nltimes, 
                                                    length(indlat), length(indlon)) )
      indobs <- array(sample(nmembobs, size = (nobs*nmembobs*nsdates*nltimes), 
                             replace = TRUE), dim = c(nmembobs, nobs, nsdates, nltimes,
                                                    length(indlat), length(indlon)) )

      #combine maxtrix of data and random index
      varindexp <- abind(var_exp, indexp, along = 7 )
      varindobs <- abind(var_obs, indobs, along = 7 )

      #select randomly the members for each point of the matrix
      varexpdraw <- aperm( array( 
                    apply( varindexp, c(2, 3, 4, 5, 6), function(x) x[,1][x[,2]] ),
                                 dim = c(nmembexp, nexp, nsdates, nltimes, 
                                       length(indlat), length(indlon))),
                           c(2, 1, 3, 4, 5, 6)) 
      varobsdraw <- aperm( array(
                    apply( varindobs, c(2, 3, 4, 5, 6), function(x) x[,1][x[,2]] ),
                                 dim = c(nmembobs, nobs, nsdates, nltimes, 
                                       length(indlat), length(indlon))),
                           c(2, 1, 3, 4, 5, 6)) 

      #calculate the ACC of the randomized field
      tmpACC <- ACC(varexpdraw, varobsdraw, conf = FALSE)
      ACC_draw[,,,,jdraw] <- tmpACC$ACC
      MACC_draw[,,,jdraw] <- tmpACC$MACC
    }

    #calculate the confidence interval
    ACC[ , , , , 3] <- apply(ACC_draw, c(1, 2, 3, 4), function(x)
                                       quantile(x, 1 - (1 - siglev) / 2, na.rm = TRUE))  
    ACC[ , , , , 1] <- apply(ACC_draw, c(1, 2, 3, 4), function(x)
                                       quantile(x, (1 - siglev) / 2, na.rm = TRUE))  

    MACC <- InsertDim(MACC, 4, 3)
    MACC[ , , , 3] <- apply(MACC_draw, c(1, 2, 3), function(x)
                                       quantile(x, 1 - (1 - siglev) / 2, na.rm = TRUE))  
    MACC[ , , , 1] <- apply(MACC_draw, c(1, 2, 3), function(x)
                                       quantile(x, (1 - siglev) / 2, na.rm = TRUE))  
  }

  invisible(list(ACC = ACC, MACC = MACC))
}