R/georob.cv.R

##  ############################################################################

cv <- function( object, ... ) UseMethod( "cv" )

##  ############################################################################

# cv.default <- function( 
#   object, 
#   formula = NULL, subset = NULL,
#   nset = 10, seed = NULL, sets = NULL,
#   
#   ... )
# {
#   
#   
#   
# }

##  ############################################################################

cv.georob <-
  function(
    object,
    formula = NULL, subset = NULL,
    nset = 10, seed = NULL, sets = NULL,
    duplicates.in.same.set = TRUE,
    re.estimate = TRUE, param = object[["param"]], 
    fit.param = object[["initial.objects"]][["fit.param"]],
    aniso = object[["aniso"]][["aniso"]], 
    fit.aniso = object[["initial.objects"]][["fit.aniso"]],
    return.fit = FALSE, reduced.output = TRUE,
    lgn = FALSE,
    mfl.action = c( "offset", "stop" ),
    ncores = min( nset, detectCores() ),
    verbose = 0,
    ...
  )
{
  
  ## Function computes nset-fold cross-validation predictions from a
  ## fitted georob object
  
  
  ## History:
  
  ## 2011-10-24 Korrektur Ausschluss von nichtbenoetigten Variablen fuer lognormal kriging
  ## 2011-12-23 AP modified for replicated observations and for parallel computing
  ## 2012-03-02 AP eliminated possibility for logging to file in parallel processing
  ## 2012-03-19 AP correction of error in parallel processing on Windows
  ## 2012-05-01 AP correct handling of NAs
  ## 2012-05-04 AP modifications for lognormal block kriging
  ## 2012-05-09 AP correction of error if a new formula is passed via update to georob
  ## 2012-05-22 AP correction of error in passing param and fit.param to georob
  ## 2012-06-05 AP correction of error in handling optional subset argument
  ## 2012-11-04 AP handling compressed cov.betahat
  ## 2012-12-04 AP modifiction for changes in predict.georob
  ## 2013-04-24 AP changes for parallelization on windows os
  ## 2013-05-23 AP correct handling of missing observations
  ## 2013-05-24 AP separate initial variogram parameters for each cross-validation set
  ## 2013-06-12 AP substituting [["x"]] for $x in all lists
  ## 2013-07-02 AP passing initial values of aniso and fit.aniso to georob via update
  ## 2013-07-05 AP return "variogram.model" as part of fit componnent
  ## 2014-02-21 AP catching problem when factor are very unbalanced
  ## 2014-05-15 AP changes for version 3 of RandomFields
    
  ## auxiliary function that fits the model and computes the predictions of
  ## a cross-validation set
  
  f.aux <- function( 
    ..i.., object, formula, data, sets, re.estimate, 
    param, fit.param, aniso, fit.aniso, lgn, verbose, ...
  ){  ## cv function
    
    if (verbose) cat( "\n\n  processing cross-validation set", ..i.., "\n" ) 
    
    ## fit model to complement of current set
    
    if( !re.estimate ){
      fit.param <- c( 
        variance = FALSE, snugget = FALSE, nugget = FALSE, scale = FALSE, 
        alpha = FALSE, beta = FALSE, delta = FALSE, 
        gamma = FALSE, kappa = FALSE, lambda = FALSE, mu = FALSE, nu = FALSE,
        f1 = FALSE, f2  =FALSE, omega = FALSE, phi = FALSE, zeta = FALSE      
      )[names( param )]
      fit.aniso <- c( f1 = FALSE, f2 = FALSE, omega = FALSE, phi = FALSE, zeta = FALSE )
    }
    
    ## change environment of terms and formula so that subset selection works for update            
    
    environment( formula ) <- environment()
    environment( object[["terms"]] ) <- environment()
    
    ## read-off initial values of variogram parameters
    
    if( ( is.matrix( param ) || is.data.frame( param ) ) )  param <- param[..i..,]
    if( ( is.matrix( fit.param ) || is.data.frame( fit.param ) ) )  fit.param <- fit.param[..i..,]
    if( ( is.matrix( aniso ) || is.data.frame( param ) ) )  aniso <- aniso[..i..,]
    if( ( is.matrix( fit.aniso ) || is.data.frame( fit.aniso ) ) )  fit.aniso <- fit.aniso[..i..,]

    t.georob <- update( 
      object, 
      formula = formula,
      data = data,
      subset = -sets[[..i..]] ,
      param = param, fit.param = fit.param,
      aniso = aniso, fit.aniso = fit.aniso,
      verbose = verbose,
      ...
    )
    
    if( verbose > 0 ){
      cat( "\n\n" )
      print( summary( t.georob ) )
    }
    
    ## compute predictions for current set
    
    t.predict <- predict( 
      t.georob, newdata = data[sets[[..i..]], ], type = "response",
      mmax = length( sets[[..i..]] ),
      extended.output = lgn,
      ncores = 1
    )
    
    ## backtransformation for log-normal kriging
    
    if( lgn ){
      t.predict <- lgnpp( t.predict )
      t.predict <- t.predict[, -match( 
        c( "trend", "var.pred", "cov.pred.target", "var.target" ), names( t.predict ) 
      )]
    }
    
    t.predict <- data.frame( i = sets[[..i..]], t.predict )
    
    t.ex <- c( 
      grep( "lower", colnames( t.predict ), fixed = TRUE ),
      grep( "upper", colnames( t.predict ), fixed = TRUE )
    )
    
    t.predict <- t.predict[, -t.ex]
    
    if( reduced.output ){
      
      if( !is.null( t.georob[["cov"]][["cov.betahat"]] ) ){
        t.se.coef <- sqrt( diag( expand( t.georob[["cov"]][["cov.betahat"]] ) ) )
      } else {
        t.se.coef <- NULL
      }
      
      t.georob <- t.georob[c(  
        "tuning.psi", "converged", "convergence.code",
        "gradient", "variogram.model", "param", "aniso",
        "coefficients"
      )]
      
      t.georob[["aniso"]] <- t.georob[["aniso"]][["aniso"]]
      
      if( !is.null( t.se.coef ) ) t.georob[["se.coefficients"]] <- t.se.coef
      
    }
    
    return( list( pred = t.predict, fit = t.georob ) )
    ## end cv function
  }
  
  mfl.action <- match.arg( mfl.action )
  
  ## update terms of object is formula is provided
  
  if( !is.null( formula ) ){
    formula <- update( formula( object ), formula )
    object[["terms"]] <- terms( formula )
  } else {
    formula <- formula( object )
  }
    
  ## get data.frame with required variables (note that the data.frame passed
  ## as data argument to georob must exist in GlobalEnv)

  data <- cbind(
    get_all_vars( 
      formula( object ), data = eval( getCall(object)[["data"]] )
    ),
    get_all_vars( 
      object[["locations.objects"]][["locations"]], eval( getCall(object)[["data"]] )
    )
  )
  
  if( identical( class( object[["na.action"]] ), "omit" ) ) data <- na.omit(data)
  
  ## select subset if appropriate
  
  if( !is.null( subset ) ){
    data <- data[subset, ]
    object[["Tmat"]] <- object[["Tmat"]][subset]
  } else if( !is.null( getCall(object)[["subset"]] ) ){
   data <- data[eval( getCall(object)[["subset"]] ), ]
  }
  
#   if( !is.null( getCall(object)[["subset"]] ) )
   
  ## define cross-validation sets
  
  if( is.null( sets ) ){
    
    if( !is.null( seed ) ) set.seed( seed )
    sets <- runif( NROW( data ) )
    sets <- cut( 
      sets, 
      breaks = c( -0.1, quantile( sets, probs = ( 1:(nset-1)/nset ) ), 1.1 )
    )
    sets <- factor( as.integer( sets ) )

  } else {
    
    nset <- length( unique( sets ) )
    if( length( sets ) != NROW( data ) ) stop(
      "sets must be an integer vector with length equal to the number of observations"      
    )
    
  }
  
  if( duplicates.in.same.set ){
    dups <- duplicated( object[["Tmat"]] )
    idups <- match( object[["Tmat"]][dups], object[["Tmat"]][!dups] )
    sets[dups] <- (sets[!dups])[idups]
  }
  
  sets <- tapply(
    1:NROW( data ),
    sets,
    function( x ) x
  )
  
  ## check whether all levels of factors are present in all calibration sets
  
  isf <- sapply( data, is.factor )
  
  mfl <- sapply(
    names( data )[isf],
    function( v, data, sets ){
      nolevel <- sapply( 
        sets, 
        function(s, x) any( table( x[-s] ) < 1 ),
        x = data[, v]
      )
      if( any( nolevel ) ){
        switch(
          mfl.action,
          "stop" = stop(
            "for factor '", v, "' some levels are missing in some calibration set(s),\n",
            "  try defining other cross-validation sets to avoid this problem"
          ),
          "offset" = {
            warning(
              "for factor '", v, "' some levels are missing in some calibration set(s),\n",
              "  respective factors are treated as offset terms in model"
            )
            cat(
              "  for factor '", v, "' some levels are missing in some calibration set(s),\n",
              "  respective factors are treated as offset terms in model"
            )
          }
        )
        TRUE
      } else {
        FALSE
      }
    },
    data = data, sets = sets
  )
  
  if( any( mfl ) ){
    v <- names( mfl )[mfl]
    
    ## construct offset and append to data
    
    offset <- apply( predict( object, type = "terms", terms = v )$fit, 1, sum )
    
    if( length( offset ) != nrow( data ) ) stop( "offset and data with incompatible dimensions" )
    data[, "..offset.."] <- offset
    
    ## update formula
    formula <- update( 
      formula, 
      as.formula(
        paste( 
          ". ~ . -", paste( v, collapse = " - " ) , "+ offset(..offset..)" )
      ) 
    )
    
  }
  
  
  ## redefine na.action component of object
  
  if( identical( class( object[["na.action"]] ), "exclude" ) ){
    class( object[["na.action"]] ) <- "omit"
  }
  

  
  ## check dimension of param and fit.param
  
  if( ( is.matrix( param ) || is.data.frame( param ) ) && nrow( param )!= nset ) stop(
    "'param' must have 'nset' rows if it is a matrix or data frame"  
  )
    
  if( ( is.matrix( fit.param ) || is.data.frame( fit.param ) ) && nrow( param )!= nset ) stop(
    "'fit.param' must have 'nset' rows if it is a matrix or data frame"  
  )
    
  if( ( is.matrix( aniso ) || is.data.frame( aniso ) ) && nrow( aniso )!= nset ) stop(
    "'aniso' must have 'nset' rows if it is a matrix or data frame"  
  )
    
  if( ( is.matrix( fit.aniso ) || is.data.frame( fit.aniso ) ) && nrow( aniso )!= nset ) stop(
    "'fit.aniso' must have 'nset' rows if it is a matrix or data frame"  
  )
    
  ## loop over all cross-validation sets
  
  if( .Platform[["OS.type"]] == "windows" ){
    
    ## create a SNOW cluster on windows OS
    
    cl <- makePSOCKcluster( ncores, outfile = "")
    
    ## export required items to workers
    
    junk <- clusterEvalQ( cl, require( georob, quietly = TRUE ) )
    
    t.result <- parLapply(
      cl, 
      1:length( sets ),
      f.aux, 
      object = object,
      formula = formula,
      data = data,
      sets = sets,
      re.estimate = re.estimate,
      param = param, fit.param = fit.param,
      aniso = aniso, fit.aniso = fit.aniso,
      lgn = lgn,
      verbose = verbose,
      ...
    )
    
    stopCluster(cl)
    
  } else {
        
    ## fork child processes on non-windows OS
    
    t.result <- mclapply(
      1:length( sets ),
      f.aux, 
      object = object,
      formula = formula,
      data = data,
      sets = sets,
      re.estimate = re.estimate,
      param = param, fit.param = fit.param,
      aniso = aniso, fit.aniso = fit.aniso,
      lgn = lgn,
      verbose = verbose,
      mc.cores = ncores,
      mc.allow.recursive = FALSE,
      ...
    )
    
  }
    
  ## create single data frame with cross-validation results 
  
  result <- t.result[[1]][["pred"]]
  result[["subset"]] <- rep( 1, nrow( t.result[[1]][["pred"]] ) )
  
  for( t.i in 2:length( t.result ) ) {
    result <- rbind( 
      result, 
      data.frame( 
        t.result[[t.i]][["pred"]],
        subset = rep( t.i, nrow( t.result[[t.i]][["pred"]] ) )
      )
    )
  }
  t.ix <- sort( result[["i"]], index.return = T )[["ix"]]
  result <- result[t.ix, ]
  result[["data"]] <- model.response( 
    model.frame( formula( object), data, na.action = na.pass ) 
  )
  
  if( lgn ) result[["lgn.data"]] <- exp( result[["data"]] )
  
  result <- result[, -match("i", colnames( result) )]
  
  isubset <- match( "subset", colnames( result ) )
  idata <- grep( "data", colnames( result ), fixed = TRUE )
  ipred<-  grep( "pred", colnames( result ), fixed = TRUE )
  ise <-   grep( "se", colnames( result ), fixed = TRUE )
  ise <- ise[ise != isubset]
  
  result <- cbind(
    result[, -c(isubset, idata, ipred, ise)],
    result[,  c(isubset, idata, ipred, ise)]
  )
  
  t.fit <- lapply( t.result, function( x ) return( x[["fit"]] ) )
  
  if( re.estimate && !all( sapply( t.fit, function(x) x[["converged"]] ) ) ) warning(
    "lack of covergence for  ", 
    sum( !sapply( t.fit, function(x) x[["converged"]] ) ), " cross-validation sets"
  )
  
  result <- list( 
    pred = result, 
    fit = if( return.fit ) t.fit else NULL
  )
  
  class( result ) <- "cv.georob"
  
  invisible( result )
  
}

##  ###########################################################################

plot.cv.georob <-
  function( 
    x, type = c( "sc", "lgn.sc", "ta", "qq", "pit", "mc", "bs" ), 
    ncutoff = NULL, 
    add = FALSE, 
    col, pch, lty,
    main, xlab, ylab, 
    ... 
  )
{
  
  ## plot method for class "cv.georob"  
  
  ## 2011-12-21 A. Papritz
  ## 2013-06-12 AP substituting [["x"]] for $x in all lists
  
  x <- x[["pred"]]
  
  type = match.arg( type )
  
  if( type == "sc.lgn" && !"lgn.pred" %in% names( x ) ) stop(
    "lognormal kriging results missing, use 'lgn = TRUE' for cross-validation"
  )
  
  if( type %in% c( "pit", "mc", "bs" ) ){
    
    result <- validate.predictions( 
      data = x[["data"]],
      pred = x[["pred"]],
      se.pred = x[["se"]],
      statistic = type, ncutoff = ncutoff
    )
    
  }
  
  if( missing( col ) ) col <- 1
  if( missing( pch ) ) pch <- 1
  if( missing( lty ) ) lty <- 1

  
    
  switch(
    type,
    sc = {
      
      ##  scatterplot of (transformed) measurements vs. predictions
      
      if( missing( main ) ) main <- "data vs. predictions"
      if( missing( xlab ) ) xlab <- "predictions"
      if( missing( ylab ) ) ylab <- "data"
      
      if( add ){
        points( data ~ pred, x, col = col, pch = pch, ... )
      } else {        
        plot( 
          data ~ pred, x, col = col, pch = pch, 
          main = main, xlab = xlab, ylab = ylab, ... 
        )
      }
      
      
    },        
    lgn.sc = {
      
      ##  scatterplot of original measurements vs.  back-transformded
      ##  lognormal predictions
      
      if( missing( main ) ) main <- "data vs. back-transformed predictions"
      if( missing( xlab ) ) xlab <- "back-transformed predictions"
      if( missing( ylab ) ) ylab <- "data"
      
      if( add ){
        points( lgn.data ~ lgn.pred, x, col = col, pch = pch, ... )
      } else {        
        plot( 
          lgn.data ~ lgn.pred, x, col = col, pch = pch, 
          main = main, xlab = xlab, ylab = ylab, ... 
        )
      }
      
      
      
    }, 
    ta = {
      
      ##  Tukey-Anscombe plot
      
      if( missing( main ) ) main <- "Tukey-Anscombe plot"
      if( missing( xlab ) ) xlab <- "predictions"
      if( missing( ylab ) ) ylab <- "standardized prediction errors"
      
      if( add ){
        points( I((data-pred)/se) ~ pred, x, col = col, pch = pch, ... )
      } else {        
        plot( 
          I((data-pred)/se) ~ pred, x, col = col, pch = pch, 
          main = main, xlab = xlab, ylab = ylab, ... 
        )
      }
      
    },
    qq = {
      
      ##  normal QQ-Plot of standardized prediction errors
      
      if( missing( main ) ) main <- "normal-QQ-plot of standardized prediction errors"
      if( missing( xlab ) ) xlab <- "quantile N(0,1)"
      if( missing( ylab ) ) ylab <- "quantiles of standardized prediction errors"
      
      r.qq <- with( x, qqnorm( ( data - pred ) / se, plot.it = FALSE ) )
      
      if( add ){
        points( r.qq, col = col, pch = pch, ... )
      } else {
        plot( r.qq, col = col, pch = pch, main = main, xlab = xlab, ylab = ylab, ... )
      }
    },
    pit = {
      
      ##  histogramm of probability-integral-transformation
      
      if( missing( main ) ) main <- "histogramm PIT-values"
      if( missing( xlab ) ) xlab <- "PIT"
      if( missing( ylab ) ) ylab <- "density"
      
      r.hist <- hist( 
        result, 
        col = col, lty = lty, 
        main = main, xlab = xlab, ylab = ylab, freq = FALSE, ... )
    },
    mc = {
      
      ##  narginal calibration plots: ecdf of measurements and mean
      ##  predictive cdf
      
      if( missing( main ) ) main <- "empirical cdf of data and mean predictive cdfs"
      if( missing( xlab ) ) xlab <- "data or predicitons"
      if( missing( ylab ) ) ylab <- "probability"
      
      matplot( 
        result[["y"]], 
        result[, c( "ghat", "fbar" )], type = "l",
        col = c( "black", "red" ),
        lty = c( "solid", "dashed" ),
        main = main, xlab = xlab, ylab = ylab,
        ...
      )
      
      t.usr <- par( "usr" )
      t.usr[3:4] <- with( result, range( fbar - ghat ) ) *1.04
      par( usr = t.usr )
      with( result, lines( y, fbar-ghat, col= "blue", lty = "dotted" ) )
      axis(2, pos = t.usr[2], col.axis = "blue", col.ticks = "blue" )
      legend( 
        "topleft",
        lty = c("solid", "dashed", "dotted" ), 
        col = c( "black", "red", "blue" ), bty = "n", cex = 1,
        legend = c(
          expression( paste( "empirical cdf ", hat(G) ) ),
          expression( paste( "mean predictive cdf ", bar(F) ) ),
          expression( bar(F)-hat(G) )
        )
      )
    },
    bs  ={
      
      # plot of brier score vs. cutoff
      
      if( missing( main ) ) main <- "Brier score vs. cutoff"
      if( missing( xlab ) ) xlab <- "cutoff"
      if( missing( ylab ) ) ylab <- "Brier score"
      
      if( add ){
        lines( result[["y"]], result[["bs"]], col = col, lty = lty, ... )
      } else {
        plot( result[["y"]], result[["bs"]], type = "l", col = col, lty = lty, 
          main = main, xlab = xlab, ylab = ylab, ...
        )
      }
    }
  )
  invisible( NULL )
}

  

##  ###########################################################################

print.cv.georob <-
  function( 
    x, digits = max(3, getOption("digits") - 3), ...
  )
{   ## print method for class "cv.georob"  
  
  ## 2011-10-13 A. Papritz
  ## 2012-12-18 AP invisible(x)
  ## 2013-06-12 AP substituting [["x"]] for $x in all lists
  
  x <- x[["pred"]]
  
  st <- validate.predictions( 
    data = x[["data"]],
    pred = x[["pred"]],
    se.pred = x[["se"]],
    statistic = "st",
    ...
  )
  
  print(
    format( st, digits = digits ), print.gap = 2, 
    quote = FALSE
  )
  
  invisible( x )
}


##  ###########################################################################

summary.cv.georob <-
  function( object, se = FALSE, ... )
{
  
  ## summary method for class "cv.georob" 
  
  ## function computes statistics of the cross-validation errors
  
  ## 2011-10-13 A. Papritz
  ## 2012-05-21 ap
  ## 2013-06-12 AP substituting [["x"]] for $x in all lists
  
  
  object <- object[["pred"]]
  
  bs <- validate.predictions( 
    data = object[["data"]],
    pred = object[["pred"]],
    se.pred = object[["se"]],
    ncutoff = length( object[["data"]] ),
    statistic = "bs"
  )
  
  t.d <- diff( bs[["y"]] )
  crps <- sum( bs[["bs"]] * 0.5 * ( c( 0., t.d ) + c( t.d, 0. ) ) )
  
  st <- validate.predictions( 
    data = object[["data"]],
    pred = object[["pred"]],
    se.pred = object[["se"]],
    statistic = "st"
  )
  
  if( !is.null( object[["lgn.pred"]] ) ){
    st.lgn <- validate.predictions( 
      data = object[["lgn.data"]],
      pred = object[["lgn.pred"]],
      se.pred = object[["lgn.se"]],
      statistic = "st"
    )
  } else {
    st.lgn <- NULL
  }
  
  ## collect results
  
  result <- list( st = st, crps = crps, st.lgn = st.lgn )

  ## compute standard errors of criteria across cross-validation sets
  
  if( se && !is.null( object[["subset"]] ) ){
    
    criteria <- t( sapply(
        tapply(
          1:nrow( object ),
          factor( object[["subset"]] ),
          function( i, data, pred, se.pred, lgn.data, lgn.pred, lgn.se.pred ){
            
            bs <- validate.predictions( 
              data = data[i],
              pred = pred[i],
              se.pred = se.pred[i],
              ncutoff = length( data[i] ),
              statistic = "bs"
            )
            
            t.d <- diff( bs[["y"]] )
            crps <- c( crps = sum( bs[["bs"]] * 0.5 * ( c( 0., t.d ) + c( t.d, 0. ) ) ) )
            
            st <- validate.predictions( 
              data = data[i],
              pred = pred[i],
              se.pred = se.pred[i],
              statistic = "st"
            )
            
            if( !is.null( lgn.pred ) ){
              st.lgn <- validate.predictions( 
                data = lgn.data[i],
                pred = lgn.pred[i],
                se.pred = lgn.se.pred[i],
                statistic = "st"
              )
              names( st.lgn ) <- paste( names( st.lgn ), "lgn", sep = "." )
            } else {
              st.lgn <- NULL
            }
            
            
            return( c( st, st.lgn, crps ) )
            
          },
          data = object[["data"]],
          pred = object[["pred"]],
          se.pred = object[["se"]],
          lgn.data = object[["lgn.data"]],
          lgn.pred = object[["lgn.pred"]],
          lgn.se.pred = object[["lgn.se"]]
        ),
        function( x ) x
      ))
    
    se.criteria <- apply(
      criteria, 2,
      function( x ) sd( x ) / sqrt( length( x ) )
    )
    
    result[["se.st"]] <- se.criteria[c( "me", "mede", "rmse", "made", "qne", "msse", "medsse")]
    result[["se.crps"]] <- se.criteria["crps"]
    if( !is.null( st.lgn ) ){
      result[["se.st.lgn"]] <- se.criteria[
        c( "me.lgn", "mede.lgn", "rmse.lgn", "made.lgn", "qne.lgn", "msse.lgn", "medsse.lgn")
      ]
      names( result[["se.st.lgn"]] ) <- gsub( ".lgn", "", names( result[["se.st.lgn"]] ) )
    }
    
  }
  
  class( result ) <- "summary.cv.georob"
 
  
  return( result )
  
}

##  ###########################################################################

print.summary.cv.georob <-
  function( 
    x, digits = max(3, getOption("digits") - 3), ...
  )
{
  
  ## print method for class "summary.cv.georob"  
  
  ## 2011-12-20 A. Papritz
  ## 2012-05-21 ap
  ## 2012-12-18 AP invisible(x)
  ## 2013-06-12 AP substituting [["x"]] for $x in all lists
  

  result <- c( x[["st"]], crps = x[["crps"]] )
  if( !is.null( x[["se.st"]] ) ){
    result <- rbind( result, c( x[["se.st"]], crps = x[["se.crps"]] ) )
    rownames( result ) <- c( "", "se" )
  }
  
  cat( "\nStatistics of cross-validation prediction errors\n" )
  print(
    format( result, digits = digits ), print.gap = 2, 
    quote = FALSE
  )
  
  if( !is.null( x[["st.lgn"]] ) ){
    result <- x[["st.lgn"]]
    if( !is.null( x[["se.st.lgn"]] ) ){
      result <- rbind( x[["st.lgn"]], x[["se.st.lgn"]] )
      rownames( result ) <- c( "", "se" )
    }
    
    cat( "\nStatistics of back-transformed cross-validation prediction errors\n" )
    print(
      format( result, digits = digits ), print.gap = 2, 
      quote = FALSE
    )
  }
  
  invisible( x )
  
}

##  ###########################################################################

rstudent.cv.georob <-
  function( model, ... )
{
  
  ## Function extracts studentized residuals from cv.georob object
  
  ## Arguments:
  
  ## model     cv.georob object
  ## ...       further arguments (currently not used)
  
  ## 2011-10-13 A. Papritz
  ## 2013-06-12 AP substituting [["x"]] for $x in all lists
  
  if( !identical( class( model )[1], "cv.georob" ) ) stop(
    "model is not of class 'cv.georob'" 
  )
  
  model <- model[["pred"]]
  
  ( model[["data"]] - model[["pred"]] ) / model[["se"]]
  
}

# ##  ###########################################################################
# 
# cv.variomodel <-
#   function( object, geodata, ... )
# {
#   
#   ## Wrapper function for cross-validation of object of class variomodel{geoR}
#   ## by function xvalid{geoR}
#   
#   ## Arguments:
#   
#   ## model     an object of class "variomodel{geoR}
#   ## ...       further arguements passed to xvalid{geoR), cf. respective help page
#   
#   ## 2012-11-22 A. Papritz
#   
#   call.fc <- match.call()
# 
#   res <- geoR::xvalid( model = object, ... )
#   
#   if( !is.null( attr( res, "geodata.xvalid" ) ) ){
#     attr( res, "geodata.xvalid" ) <- call.fc[["geodata"]]
#   }
#   if( !is.null( attr( res, "locations.xvalid" ) ) ){
#     attr( res, "locations.xvalid" ) <- call.fc[["locations.xvalid"]]
#   }
#   
#   return(res)
#   
# }
# 
# cv.likGRF <-
#   function( object, geodata, ... )
# {
#   
#   ## Wrapper function for cross-validation of object of class variomodel{geoR}
#   ## by function xvalid{geoR}
#   
#   ## Arguments:
#   
#   ## model     an object of class "likGRF{geoR}
#   ## ...       further arguements passed to xvalid{geoR), cf. respective help page
#   
#   ## 2012-11-22 A. Papritz
#   
#   call.fc <- match.call()
# 
#   res <- geoR::xvalid( model = object, geodata = geodata, ... )
#   
#   if( !is.null( attr( res, "geodata.xvalid" ) ) ){
#     attr( res, "geodata.xvalid" ) <- call.fc[["geodata"]]
#   }
#   if( !is.null( attr( res, "locations.xvalid" ) ) ){
#     attr( res, "locations.xvalid" ) <- call.fc[["locations.xvalid"]]
#   }
#   
#   return(res)
#   
# }

## ======================================================================
validate.predictions <- 
  function( 
    data,
    pred,
    se.pred,
    statistic = c( "pit", "mc", "bs", "st" ),	
    ncutoff = NULL
  )
{
  
  ## function computes several statistics to validate probabilistic
  ## predictions, cf.  Gneiting et al., 2007, JRSSB
  
  # 2011-20-21 A. Papritz
  # 2012-05-04 AP coping with NAs
  ## 2013-06-12 AP substituting [["x"]] for $x in all lists
  
  statistic = match.arg( statistic )
  
  ## exclude item with NAs
  
  t.sel <- complete.cases( data, pred, se.pred )
  
  if( sum( t.sel ) < length( t.sel ) ) warnings(
    "missing values encountered when validating predictions"
  )
  
  data    <- data[t.sel]
  pred    <- pred[t.sel]
  se.pred <- se.pred[t.sel]
  
  if( missing( ncutoff ) || is.null( ncutoff ) ) ncutoff <- min( 500, length( data ) )
  
  result <- switch(
    statistic,
    pit = {
      
      ## probability integral transformation
      
      pnorm( data, mean = pred, sd = se.pred )
    },
    mc = ,
    bs = {
      
      ## marginal calibration and brier score
      
      margin.calib <- data.frame(
        y = t.x <- unique( t.y <- sort( c( data ) ) ),
        ghat = cumsum( tabulate( match( t.y, t.x ) ) ) / length(t.y)
      )
      t.sel <- trunc( 
        seq( 
          from = as.integer(1), 
          to = nrow( margin.calib ), 
          length.out = min( nrow( margin.calib ), ncutoff ) 
        ) 
      )
      margin.calib <- margin.calib[t.sel,]
      
      t.bla <- t(
        sapply(
          margin.calib[["y"]],
          function( q, m, s, y ){
            t.p <- pnorm( q, mean = m, sd = s )
            c( 
              fbar = mean( t.p ), 
              bs = mean( ( t.p - as.numeric( y <= q ) )^2 )
            )
          },
          m = pred,
          s = se.pred,
          y = data
        )
      )
      cbind(
        margin.calib, as.data.frame( t.bla ) 
      )
    },
    st = {
      
      ## statistics of (standardized) prediction errors
      
      error <- data - pred
      std.error <- error / se.pred
      
      statistics <- c( 
        me = mean( error ),
        mede = median( error ),
        rmse = sqrt( mean( error^2 ) ),
        made = mad( error, center = 0 ),
        qne = Qn( error, finite.corr = TRUE ),
        msse = mean( std.error^2 ),
        medsse = median( std.error^2 )
      )
      
    }
  )
  
  return( result )   
  
}

Try the georob package in your browser

Any scripts or data that you put into this service are public.

georob documentation built on May 2, 2019, 6:53 p.m.