R/fCover.R
In RStoolbox: Remote Sensing Data Analysis

Documented in fCover

#' Fractional Cover Analysis
#' 
#' fCover takes a classified high resolution image, e.g. vegetation and non-vegetation based on Landsat and calculates cover fractions for
#' pixels of a coarser resolution, e.g. MODIS.
#' 
#' @param classImage high resolution SpatRaster containing a landcover classification, e.g. as obtained by \link{superClass}.
#' @param predImage coarse resolution SpatRaster for which fractional cover will be estimated.
#' @param nSamples Integer. Number of pixels to sample from predImage to train the regression model
#' @param classes Integer. Classes for which fractional cover should be estimated (one or more). 
#' @param model Character. Which model to fit for image regression. See \link[caret]{train} for options. Defaults to randomForest ('rf')
#' @param maxNA Numeric. Maximal proportion of NAs allowed in training pixels. 
#' @param tuneLength Integer. Number of levels for each tuning parameters that should be generated by train. See Details and \code{\link[caret]{train}}. 
#' @param trControl \code{\link[caret]{trainControl}} object, specifying resampling, validation etc.
#' @param method DEPREACTED! in favor of \code{trControl=trainControl(method="cv")} Resampling method for parameter tuning. Defaults to 10 fold cross-validation. See \code{\link[caret]{trainControl}} for options.
#' @param verbose Logical. Print progress information.
#' @param clamp Logical. Enforce results to stay within [0,1] interval. Values <0 are reset to 0 and values >1 to 1. 
#' @param filename Character. Filename of the output raster file (optional).
#' @param overwrite Logical. if \code{TRUE}, \code{filename} will be overwritten.
#' @param ... further arguments to be passed to  \code{\link[terra]{writeRaster}}
#' @details 
#' fCover gets the pixel values in a high resolution classified image that correspond to 
#' randomly selected moderate resolution pixels and then calculates the percent of 
#' the classified image pixels that represent your cover type of interest. In other words, if 
#' your high resolution image has a pixel size of 1m and your moderate resolution image has a 
#' pixel size of 30m the sampling process would take a block of 900 of the 1m resolution pixels 
#' that correspond to a single 30m pixel and calculate the percentage of the 1m pixels that 
#' are forest. For example, if there were 600 forest pixels and 300 non-forest pixels the value 
#' given for the output pixel would be 0.67 since 67% of the block of 1m pixels were forest. 
#' 
#' fCover relies on the train() function from the caret package which provides access to a huge number of classifiers.
#' Please see the available options at \link[caret]{train}. The default classifier (randomForest) we chose has been shown
#' to provide very good results in image regression and hence it is recomended you start with this one. If you choose a different
#' classifier, make sure it can run in regression mode.
#' 
#' Many models require tuning of certain parameters. Again, this is handled by \link[caret]{train} from the caret package.
#' With the tuneLength argument you can specify how many different values of each tuning parameter should be tested. The Random Forest
#' algorithm for example can be tuned by varying the mtry parameter. Hence, by specifying tuneLength = 10, ten different levels
#' of mtry will be tested in a cross-validation scheme and the best-performing value will be chosen for the final model.
#'
#' 
#' If the total no-data values for a block of high resolution pixels is greater than maxNA then 
#' it will not be included in the training data set since there is too much missing data to provide 
#' a reliable cover percentage. If the no-data proporton is less then maxNA the no-data pixels are removed 
#' from the total number of pixels when calculating the percent cover. 
#' 
#' @return 
#' Returns a list with two elements: models contains the fitted models evaluated in tenfold cross-validation (caret train objects); 
#' fCover contains a SpatRaster with a fractional cover layer for each requested class.
#' @export 
#' @seealso \link{superClass}
#' @examples
#' \donttest{
#' library(terra)
#' library(caret)
#' ## Create fake input images
#' agg.level <- 9
#' modis <- terra::aggregate(rlogo, agg.level)
#' 
#' ## Perform an exemplary classification
#' lc      <- unsuperClass(rlogo, nClass=2)
#' 
#' ## Calculate the true cover, which is of course only possible in this example, 
#' ## because the fake corse resolution imagery is exactly res(rlogo)*9
#' trueCover <- terra::aggregate(lc$map, agg.level, 
#'                    fun = function(x, ...){sum(x == 1, ...)/sum(!is.na(x))})
#' 
#' ## Run with randomForest and support vector machine (radial basis kernel)
#' ## Of course the SVM is handicapped in this example,
#' ## due to poor tuning (tuneLength)
#' par(mfrow=c(2,3))
#' for(model in c("rf", "svmRadial")){
#'    fc <- fCover(
#'            classImage = lc$map ,
#'            predImage = modis,
#'            classes=1,
#'            trControl = trainControl(method = "cv", number = 3),
#'            model=model,
#'            nSample = 50,
#'            tuneLength=2
#'    )           
#'    
#'    ## How close is it to the truth?
#'    compare.rf <- trueCover - fc$map
#'    plot(fc$map, main = paste("Fractional Cover: Class 1\nModel:", model))
#'    plot(compare.rf, main = "Diffence\n true vs. predicted")
#'    plot(trueCover[],fc$map[],  xlim = c(0,1), ylim =c(0,1),
#'            xlab = "True Cover", ylab = "Predicted Cover" )
#'    abline(coef=c(0,1))
#
#'    rmse <- sqrt(global(compare.rf^2, "sum", na.rm = TRUE))/ncell(compare.rf)
#'    r2 <- cor(trueCover[], fc$map[], "complete.obs")
#'    text(0.9,0.1, adj=1, labels = 
#'         paste(c("RMSE:","\nR2:"), round(unlist(c(rmse, r2)),3), collapse=""))
#' }
#'
#' ## Reset par
#' par(mfrow=c(1,1))
#' }
fCover <- function(classImage, predImage, 
                  nSamples = 1000, classes = 1,maxNA = 0,clamp = TRUE,
                  model = "rf", tuneLength = 3, trControl = trainControl(method = "cv"), method = deprecated(),  
                  filename = NULL, overwrite = FALSE, verbose, ...){
  
  if(is_present(method)){
    deprecate_warn("0.4.0", "fCover(method)", "fCover(trControl=trainControl(method,...))")
  }
  
  predImage <- .toTerra(predImage)
  classImage <- .toTerra(classImage)
  if(!missing("verbose")) .initVerbose(verbose)
  
  ## Resolution check
  r1  <- res(predImage)
  if(max(res(classImage)) >= max(r1)) stop("Resolution of classImage must be smaller than the resolution of predImage")
  
  ## Draw random sample from coarse res image
  .vMessage("Collecting random samples")
  dummy <- terra::crop(rast(predImage), classImage, snap = "in") ## crop properly to avoid sampling in marginal pixels 
  ranSam <- .iterativeRandomSample(predImage, nSamples, ext = ext(dummy))
  
  ## Extract classified (high res) values
  .vMessage("Extracting classified pixels")
  d   <- ranSam[,c("x","y")]
  
  exts <- do.call(c, apply(cbind(d[,1] - r1[1]/2, d[,1] + r1[1]/2, 
                                 d[,2] - r1[2]/2, d[,2] + r1[2]/2), 1, function(x) {
                                   st_as_sfc(st_bbox(ext(x)))
                                 }))
  st_crs(exts) <- st_crs(classImage)
  exts <- st_as_sf(exts)
  st_geometry(exts) <- "geometry"
  coverage_fraction <- value <- NULL
  fCov <- exact_extract(classImage, exts ,
                        function(x) {
                          ts <- sum(x$coverage_fraction, na.rm = TRUE)
                          o <- x %>% filter(value %in% classes) %>%
                            mutate(value = addNA(factor(value, classes))) %>%
                            complete(value, fill = list(coverage_fraction = 0 )) %>%  
                            group_by(value) %>% 
                            summarize(fCover = sum(coverage_fraction)/ts) %>%
                            pivot_wider(names_from = value , values_from = fCover, values_fill = 0, names_expand = TRUE)
                        },
                        summarize_df = TRUE, progress = FALSE)
  
  missingClasses <- classes[!colSums(fCov[,as.character(classes)])]
  if(length(missingClasses)) {
    stop(sprintf("One or more classes are not represented in the sampled pixels. Missing class/classes: %s\nEither this class does not occur in classImage or nSample was too small for a representative sample.", 
                 paste(missingClasses,collapse=", ")), call.=FALSE)
  }
  
  ## NA handling
  ##
  ## This means_ that if there is a NA pixel, it will be weighted proportionately and
  ## to the area of the occuring classes. This is a design decision. Alternatively we
  ## could count it as non-class pixel for either class, however, then these don't sum up 
  ## to unity anymore. ==> In general we should not make use of this option because of the
  ## required assumptions and simply ignore samples with NAs ==> default argument maxNA=0
  ## Maybe we should ditch the maxNA argument alltogether
  include <- fCov[,'NA'] <= maxNA
  if(!any(include)) stop("No non-NA samples!")
  # Normalize by 1-NAfrequency
  fCov[include,classes]   <- fCov[include,classes]/(1-fCov[include,'NA',drop=TRUE])
  

  ## Fit regression model and predict
  wopt <- list(...)
  fn <- if(length(classes) == 1 & !is.null(filename)) filename else "" ## write raster here already during predict if only one layer is output
  fCL <- lapply(seq_along(classes), function(cl){
    .vMessage("Fitting regression model for class ", cl)
    
    ## Assemble training data (and remove cells exceeding maxNA)
    trainingData <- data.frame(response = fCov[include, cl, drop = TRUE], ranSam[include, -c(1:2)])
    
    ## Fit model
    modelFit      <- caret::train(response ~ ., data = trainingData, method = model, tuneLength = tuneLength,  
                           trControl = trControl)
    ## TODO: enforce spatial separation between training and validation samples 
    
    ## Predict  
    .vMessage(paste0("Predicting fractional cover for class ", cl))               
    out <- terra::predict(predImage, model = modelFit, na.rm = TRUE, filename = fn, overwrite = overwrite, wopt =  wopt)              
    list(modelFit, out)                
  })
  
  ## Prepare output and return
  out     <- do.call(c, lapply(fCL, "[[", 2))
  models  <- lapply(fCL, "[[", 1)
  if(clamp) out <- clamp(out, lower = 0, upper = 1, values = TRUE)
  names(models) <- names(out) <- paste0("fC_", classes)
  
  names(fCov) <-  paste0("fC_", c(classes, NA))
  exts <- cbind(exts[include,],fCov[include,])
  
  if(!is.null(filename) & length(classes) > 1) {
    out <- terra::writeRaster(out, filename, overwrite = overwrite,  ...)
  }
  structure(list(model = models, trainData = exts, map = out), class = c("fCover", "RStoolbox"))
  
}



.iterativeRandomSample <- function(x, nSamples = 100, maxIterations = 5, ext = NULL, xy = TRUE) {
  
  if(!is.null(ext)) {
    x <- terra::crop(x, ext, snap = "in")
  }
  if(nSamples > ncell(x)) {
    nSamples <- ncell(x)
    warning("nSamples > ncell(predImage). Resetting nSamples to ncell(predImage)")   
  }
  cells <- 1:ncell(x)
  
  valsList <- list()
  while(maxIterations) {
    samples  <- sample(cells, nSamples, replace = FALSE)
    vals  <- terra::extract(x, y = samples, xy = xy)
    if(xy && !"x" %in% colnames(vals)) {  ## TODO: report bug in terra::extract -> is not returning xy
      vals <- data.frame(xyFromCell(x, samples), vals)
    }
    valsList[[length(valsList) + 1]]  <-  vals[complete.cases(vals),]
    cells <- setdiff(samples, cells)
    if(NROW(vals)>=nSamples || !length(cells)) break
    maxIterations <- maxIterations - 1
  }
  do.call(rbind, valsList)
}