R/ensemble.VIF.R

Defines functions `ensemble.VIF.dataframe` `ensemble.VIF`

`ensemble.VIF` <- function(
    x=NULL, a=NULL, an=10000, 
    VIF.max=10, keep=NULL,
    layer.drops=NULL, factors=NULL, dummy.vars=NULL
)
{
    if(is.null(x) == T) {stop("value for parameter x is missing (RasterStack object)")}
    if(inherits(x,"RasterStack") == F) {stop("x is not a RasterStack object")}
    if(is.null(a) == F) {names(a) <- c("x", "y")}
#
    layer.drops <- as.character(layer.drops)
    factors <- as.character(factors)
    dummy.vars <- as.character(dummy.vars)
#
    vars <- names(x)
#
    if (length(layer.drops) > 0) {
        var.drops <- layer.drops
        nd <- length(layer.drops)
        for (i in 1:nd) {     
            if (any(vars == layer.drops[i]) == FALSE) {
                cat(paste("\n", "WARNING: variable to exclude '", layer.drops[i], "' not among grid layers", sep = ""))
            }else{
                cat(paste("\n", "NOTE: variable '", layer.drops[i], "' will not be included as explanatory variable", sep = ""))
                x <- raster::dropLayer(x, which(names(x) %in% c(layer.drops[i]) ))
                x <- raster::stack(x)
                vars <- names(x)
                if (length(factors) > 0) {
                    factors <- factors[factors != layer.drops[i]]
                }
                if (length(dummy.vars) > 0) {
                    dummy.vars <- dummy.vars[dummy.vars != layer.drops[i]]
                }
            }
        }
    }else{
        var.drops <- NULL
    }
#
    var.drops <- as.character(var.drops)
#
    vars <- names(x)
    if (length(factors) > 0) {
        var.drops <- c(var.drops, factors)
        for (i in 1:length(factors)) {vars <- vars[which(vars != factors[i])]}
    }
#
    nv <- length(vars)
#
    result <- data.frame(array(dim=c(nv, nv)))
    names(result) <- vars
    row.names(result) <- paste("step_", c(1:nv), sep="")
#
    if (is.null(a) == T) {a <- dismo::randomPoints(x[[1]], n=an, p=NULL, excludep=F)}
# presence locations will not be used, but are required for the ensemble.calibrate.models function
    p <- dismo::randomPoints(x[[1]], n=30, p=NULL, excludep=F)
#
    i <- 0
    VIF.result.max <- Inf

# for ensemble.calibrate.models, need to use NULL again for factors etc.
    if (length(var.drops) == 0) {var.drops <- NULL}
    if (length(dummy.vars) == 0) {dummy.vars <- NULL}

    cat(paste("\n", "Selection of explanatory variables based on the Variance Explanation Factor", sep = ""))
    cat(paste("\n", "Data obtained from  ", nrow(a), " point locations", sep = ""))
    cat(paste("\n", "If some variables have VIF > VIF.max, then the variable with largest VIF is excluded", sep = ""))
    cat(paste("\n", "The procedure stops when all variables have VIF <= VIF.max", "\n", sep = ""))

    while(VIF.result.max >= VIF.max) { 
        VIF.result <- ensemble.calibrate.models(x, p=p, a=a,
            layer.drops=var.drops, factors=NULL,
            VIF=T, ENSEMBLE.tune=F, 
            MAXENT=0, MAXNET=0, MAXLIKE=0, GBM=0, GBMSTEP=0, RF=0, CF=0,
            GLM=0, GLMSTEP=0, GAM=0, 
            GAMSTEP=0, MGCV=0, MGCVFIX=0,EARTH=0, RPART=0, NNET=0, FDA=0, 
            SVM=0, SVME=0, GLMNET=0,
            BIOCLIM.O=0, BIOCLIM=0, DOMAIN=0, MAHAL=0, MAHAL01=0)$VIF

        i <- i+1
        for (v in 1:length(VIF.result)) {result[i, which(names(result) == names(VIF.result)[v])] <- VIF.result[which(names(VIF.result) == names(VIF.result)[v])]}

	j <- 1
	while(names(VIF.result[j]) %in% keep && j <= length(VIF.result)) {j <- j+1}
	if (j <= length(VIF.result)){
	    VIF.result.max <- VIF.result[j]
            var.drops <- c(var.drops, names(VIF.result)[j])
	}else{
            VIF.result.max <- VIF.max-1
        }
    }

# remove last variable included
    if (length(var.drops) == 1) {
        var.drops <- as.character(NULL)
    }else{
        nvd <- length(var.drops)-1
        var.drops <- var.drops[1:nvd]
    }

    # include factors again as no information to exclude (drop)
    if (length(factors) > 0) {
        for (i in 1:length(factors)) {var.drops <- var.drops[which(var.drops != factors[i])]}
        vars.included <- c(names(VIF.result), factors)
    }else{
        vars.included <- names(VIF.result)
        factors <- NULL
    }

    dummy.vars <- as.character(dummy.vars)
    if (length(dummy.vars) > 0) {
        for (i in 1:length(var.drops)) {
            if (var.drops[i] %in% dummy.vars) {dummy.vars <- dummy.vars[which(dummy.vars != var.drops[i])]}
        }
    }else{
        dummy.vars <- NULL
    }

    if (length(var.drops) == 0) {var.drops <- NULL}

    result <- result[rowSums(result, na.rm=T) > 0, , drop=F]

    cat(paste("Summary of VIF selection process:", "\n", sep = ""))
    print(result)
    cat(paste("\n", sep = ""))

    cat(paste("Final selection of variables:", "\n", sep = ""))
    print(vars.included)
    cat(paste("\n", sep = ""))

    result <- result[rowSums(result, na.rm=T) > 0, ]
    return(list(stepwise.results=result, var.drops=var.drops, vars.included=vars.included,
        factors=factors, dummy.vars.included=dummy.vars, VIF.final=VIF.result))
}

`ensemble.VIF.dataframe` <- function(
    x=NULL, VIF.max=10, keep=NULL,
    car=TRUE, silent=F
)
{
    if(is.null(x) == T) {stop("value for parameter x is missing (data.frame)")}
    if(inherits(x, "data.frame") == F) {stop("x is not a data.frame")}
#
    VIFcalc <- function(x, var.drops=NULL, car=T, silent=F) {
        x1 <- x[, (names(x) %in% var.drops) == F]
        varnames <- names(x1)
        x1$RandomizedResponse <- x1[sample(nrow(x1)), 1]
        LM.formula <- as.formula(paste("RandomizedResponse ~ ", paste(varnames, sep="", collapse="+"), sep="", collapse="+"))

        if (car==T && silent==F) {
            vifresult <- car::vif(lm(formula=LM.formula, data=x1))
            if (is.null(vifresult) == F) {
                cat(paste("\n", "Variance inflation (package: car)", "\n", sep = ""))        
                print(vifresult)
                cat(paste("\n"))
            }else{
               cat(paste("\n", "NOTE: no result from car::vif", "\n", sep = ""))    
            }
        }

        if (silent == F) {
            cat(paste("VIF directly calculated from linear model with focal numeric variable as response", "\n", sep = ""))
        }
        newVIF <- numeric(length=length(varnames))
        newVIF[] <- NA
        names(newVIF) <- varnames
        for (i in 1:length(varnames)) {
            response.name <- varnames[i]
            explan.names <- varnames[-i]
            if (is.factor(x[, varnames[i]]) == F) {
                LM.formula <- as.formula(paste(response.name, "~", paste(explan.names, collapse="+"), sep=""))
                newVIF[i] <- summary(lm(formula=LM.formula, data=x1))$r.squared
            }
        }
        newVIF <- 1/(1-newVIF)
        newVIF <- sort(newVIF, decreasing=T, na.last=T)
        if (silent == F) {print(newVIF)}
        return(newVIF)
    }
#
    vars <- names(x)
    nv <- length(vars)
#
    result <- data.frame(array(dim=c(nv, nv)))
    names(result) <- vars
    row.names(result) <- paste("step_", c(1:nv), sep="")
#
    i <- 0
    VIF.result.max <- Inf
    var.drops <- NULL

    while(VIF.result.max >= VIF.max) { 
        VIF.result <- VIFcalc(x, car=car, silent=silent, var.drops=var.drops)
        i <- i+1
        for (v in 1:length(VIF.result)) {result[i, which(names(result) == names(VIF.result)[v])] <- VIF.result[which(names(VIF.result) == names(VIF.result)[v])]}

	j <- 1
	while(names(VIF.result[j]) %in% keep && j <= length(VIF.result)) {j <- j+1}
	if (j <= length(VIF.result)){
	    VIF.result.max <- VIF.result[j]
            var.drops <- c(var.drops, names(VIF.result)[j])
	}else{
            VIF.result.max <- VIF.max-1
        }
    }

# remove last variable included
    if (length(var.drops) == 1) {
        var.drops <- as.character(NULL)
    }else{
        nvd <- length(var.drops)-1
        var.drops <- var.drops[1:nvd]
    }

    result <- result[rowSums(result, na.rm=T) > 0, , drop=F]
    vars.included <- names(VIF.result)

    if (silent == F) {
        cat(paste("\n", "Summary of VIF selection process:", "\n", sep = ""))
        print(result)
        cat(paste("\n", "Final selection of variables:", "\n", sep = ""))
        print(vars.included)
        cat(paste("\n", sep = ""))
    }

    return(list(stepwise.results=result, var.drops=var.drops, vars.included=vars.included,
        VIF.final=VIF.result))
}

Try the BiodiversityR package in your browser

Any scripts or data that you put into this service are public.

BiodiversityR documentation built on Jan. 6, 2023, 5:18 p.m.