R/cv_model.R

### =========================================================================
### cv model
### =========================================================================
#' Cross-validation of fitted model
#'
#' @param model a model object (GLM or GAM)
#' @param data data.frame for evaluation. Must contain columns with the same names
#' as were supplied to the model fitting function
#' @param K the number of folds/strata in block cross-validation for random generation
#' (if strata vector is not supplied)
#' Minimum the same number as starta, maxuimum nrow(df)/10
#' @param strata (optional) vector of the same length as the number of rows in
#' the 'data' data.frame with integers (1:n) corresponding to n pre-defined strata to be used
#' @return vector with cross-validation predictions for each
#' observation (to be used for model evaluation)
#' @author Niklaus Zimmermann, Philipp Brun
#' @export
#' @examples
#'
#' # Fit a simple model
#' mod1=glm(X3740 ~ poly(bio_01,2) + poly(bio_01,2) + poly(forest_fraction,2),
#' data=obs.sel,family="binomial")
#'
#' # Use basic cv.model set-up for cross-validation predictions
#' cvpr1=cv.model(mod1,K=5)
#'
#' # Use cv.model for block cross-validation  with few blocks
#' par(mfrow=c(1,2))
#' strt1=make_blocks(df=obs.sel[,c("bio_01","bio_03")],nstrat=3,nclusters=3)
#' cvpr2=cv.model(mod1,strata=strt1)
#' # Quick comparison
#' plot(cvpr1[,2],cvpr2[,2],col=strt1,main="few blocks vs random")
#' lines(c(-1,2),c(-1,2),col="red")
#'
#' # Use cv.model for block cross-validation with many blocks
#' strt2=make_blocks(df=obs.sel[,c("bio_01","bio_03")],nstrat=5,nclusters=15)
#' cvpr3=cv.model(mod1,strata=strt2)
#' # Quick comparison
#' plot(cvpr1[,2],cvpr3[,2],col=strt2,main="many blocks vs random")
#' lines(c(-1,2),c(-1,2),col="red")
#'
cv.model <- function(model,strata=numeric(),K=numeric(),data = model$data){
  if(length(strata)==0 & length(K)==0){
    stop("Either strata or K argument must be supplied!")
  } else if(length(strata)==0){
    ks = make_blocks(nstrat=K,npoints=nrow(data))
  } else {
    ks = strata
  }

  cvpreds <- data.frame(row = row.names(data), cvpred = numeric(length = nrow(data)))
  for(i in unique(ks)){
    train <- data[ks!=i,]
    test <- data[ks==i,]
    modtmp <- update(model, data = train)
    cvpreds[which(ks==i),2] <- predict(modtmp, newdata = test, type = 'response')
  }
  return(cvpreds)
}
filBe87/PKUss documentation built on June 29, 2019, 12:12 a.m.