pemgeneratr: Predictive Ecosystem Mapping

Documented in predict_landscape

#' Predict Landscape
#'
#' Takes a mlr model object and associated covariate rasters to generate a thematic map.
#' In order to process the landscape level prediction the input co-variated are tiled
#' and then mosaic'd together at the end.
#'
#' **Action** _is there a bug in tilemaker or with stars causing a xy offset error.
#' Solution is currently a hack.  See `load tile area`.
#'
#' @param model Location of a `mlr` model object (i.e. path to it.)
#' @param cov   A list of raster files.  These will be loaded as a `stars` object
#' @param tilesize Specify the number of pixels in the `x` and `y` directions for the tiles to be generated.  If your computer is mememory limited use a smaller tile (e.g. 500).
#' @param outDir directory for the output file.
#' @keywords predict, landscape
#' @export
#' @examples
#' ### Testing
#` cov <- list.files("e:/tmpGIS/PEM_cvs/", pattern = "*.tif",full.names = TRUE)
#` cov <- cov[-(grep(cov, pattern = "xml"))]
#'
# predict_landscape(model = "e:/tmp/model_gen_test/model.rds",
#'                  cov = cov,
#'                  tilesize = 1000,
#'                  outDir = "e:/tmp/predict_landscape")

predict_landscape <- function(model, cov,tilesize = 500,
                              outDir = "./predicted") {
  ## libraries  -----
  library(dplyr)
  library(mlr)

  ## Adjust names
  ## This will be used in the loop to rename stars object
  n <- basename(cov)
  n <- gsub(".tif", "", n)

  ## Load the model -----
  mod <- readRDS(model)


  ## Error handle -- model vs. cov -----------
  ## If names in model features are found in the cov list continue.
  ## ELSE exit with message
  if (length(setdiff(mod$features, n)) != 0) { ## tests if all model features are found in the cov list
    ## On model vs. cov error ---------------
    print("Name mis-match between the model features and the names of the rasters.")
    print("The following raster co-variates are not found in the model features list:")
    print(setdiff(mod$features, n))

    print("the following raster ")
  } else {

    ## drop rasters if not included in model
    if (length(setdiff(n, mod$features) > 0 )) {
    print("The following rasters are removed as they were not included in the model:")
    print(setdiff(n, mod$features))

    ## drop layers not used
    drop_layers <- paste0(setdiff(n, mod$features), ".tif")
    cov <- subset(cov, !(basename(cov) %in% drop_layers) )
    n <- basename(cov) ; n <- gsub(".tif", "", n)  ## fix n
    }


    ## create output dir -----------
    ifelse(!dir.exists(outDir), dir.create(outDir, recursive = TRUE), print("model folder already exists"))

    ## alternate -- outside of pemgeneratr
    # source(here::here('_functions', 'tile_index.R'))


    ## create tiles ---------------
    tiles <- pemgeneratr::tile_index(cov[1], tilesize)

    ## alternate -- outside of pemgeneratr
    # source(here::here('_functions', 'tile_index.R'))
    # tiles <- tile_index(cov[1], tilesize)


    ## begin loop through tiles -----

    ## set up progress messaging
    a <- 0 ## running total of area complete
    ta <- sum(as.numeric(sf::st_area(tiles)))


    for (i in 1:nrow(tiles)) {    ## testing first 2 tiles       ##nrow(tiles)) {
      t <- tiles[i,]  ## get tile
      print(paste("working on ", i, "of", nrow(tiles)))
      print("...")


      ## * load tile area---------
      print("... loading new data (from rasters)...")
      r <- stars::read_stars(cov,
                             RasterIO = list(nXOff  = t$offset.x[1]+1, ## hack -- stars and tile_maker issue??
                                             nYOff  = t$offset.y[1]+1,
                                             nXSize = t$region.dim.x[1],
                                             nYSize = t$region.dim.y[1]))

      ### Create an alternate template ------
      ### This forces a numeric template
      tdim <- dim(r[1])
      ext <- sf::st_bbox(t)
      emty <- raster::raster(nrow=tdim[1], ncol = tdim[2],
                             xmn=ext[1], xmx=ext[3], ymn=ext[2], ymx=ext[4],
                             vals=1.1)
      template <- stars::st_as_stars(emty)
      sf::st_crs(template) <- sf::st_crs(t)
      ### ----------------------------------



      ## * update names ---------
      names(r) <- n

      ## * convert tile to dataframe ---------
      rsf <- sf::st_as_sf(r, as_points = TRUE)
      # rsf <- na.omit(rsf)  ## na.omit caused issues

      ## * Test if tile is empty -------------
      na_table <- as.data.frame(sapply(rsf, function(x) all(is.na(x))))
      ## * determine na counts -- this will be used to restore NA values if tile is run.
      na_table$count <- as.data.frame(sapply(rsf, function(x) sum(is.na(x))))[,1]

      ## If any attribute is empty skip the tile
      if(any(na_table[,1] == TRUE)) {

        print("some variables with all NA values, skipping tile")

      } else{

        ## * Test if tile is empty -- original version
        # naTest <- as.data.frame(rsf[,1])
        # naTest <- naTest[,1]
        # if (sum(!is.na(naTest)) == 0) { ## if all the values in the tile are NA skip to next tile.
        #   print("... Empty tile moving to next...")
        # } else {

        ## * predict ---------

        ## * * Managing NA values ----------------
        ## When some of the values are NA change them to zero
        ## * Identify the attribute with the highest number of NA values.
        na_max <- na_table[na_table$count ==  max(na_table$count),]
        na_max <- row.names(na_max[1,]) ## if multiple attributes -- take the first

        ## make a copy of the attribute with the highest number of na values
        rsf_bk <- rsf[,na_max]  ## -- this will be used to restore NA values
        rsf[is.na(rsf)] <- 0 ## convert NA to zero as the predict function cannot handle NA

        print("... modelling outcomes (predicting)...")
        pred <- predict(mod, newdata = rsf)

        ## Restore NA values
        pred_dat <- pred$data ## predicted values extracted
        pred_dat[is.na(rsf_bk[,1]), 1:length(pred_dat)] <- NA ## if originally NA restore NA
        pred$data <- pred_dat ## values restored to pred -- allows for cbind without issue.

        ## * geo-link predicted values ---------
        r_out <- cbind(rsf, pred)

        ## layers to keep (i.e. newly predicted layers)
        keep <- setdiff(names(r_out),
                        names(r))
        keep <- keep[-length(keep)] ## drops the last entry (geometry field, not a name)

        r_out <- r_out %>% dplyr::select(keep)


        ## Save the names of the model response -----
        ## The levels are in the multiclass 'response'
        wkey <- 0
        if (wkey == 0)  {
          respNames <- levels(r_out$response) ## this becomes the dictionary to describe the raster values
          write.csv(respNames, paste(outDir, "response_names.csv",
                                     sep = "/"),
                    row.names = TRUE)
          wkey <- 1 ## Change this value so this small is statement does not execute again.
        }



        ## change the text values to numeric values.
        r_out$response <- as.numeric(r_out$response)

        ## Set up subdirectories for rastertile outputs
        print("... exporting raster tiles...")
        for (k in keep) {
          dir.create(paste(outDir, k, sep = "/"))
        }



        ## * save tile (each pred item saved) ---------
        for (j in 1:length(keep)) {
          # j <- 2  ## testing
          out <- stars::st_rasterize(r_out[j],
                                     # template = r[1])
                                     template = template) ## fix template bug
          stars::write_stars(out,
                             paste0(outDir,"/",
                                    keep[j], "/",             #sub-directoy
                                    keep[j], "_", i, ".tif")) #tile name
        }

        ## * report progress -----
        a <- a + as.numeric(sf::st_area(t))
        print(paste(round(a/ta*100,1), "% complete"))
        print("") ## blank line

      } ## end if statement -- for when tile is empty
    } ## END LOOP -------------
    print("All predicted tiles generated")





    ## Mosaic Tiles ---------------

    print("Generating raster mosaics")
    for (k in keep) {
      # get list of tiles
      r_tiles <- list.files(paste(outDir, k, sep = "/"),
                            pattern = ".tif",
                            full.names = TRUE)
      # remove pot. xml files
      # this was causing script to fail -- returned empty vector
      # r_tiles <- r_tiles[-(grep(r_tiles, pattern = "xml"))] ## drop any associated xml files


      ## mosaic
      gdalUtils::mosaic_rasters(gdalfile = r_tiles, ## list of rasters to mosaic
                                dst_dataset = paste0(outDir, "/", k, ".tif"),  #output: dir and filename
                                output_Raster = TRUE) ## saves the raster (not just a virtual raster)

    }


  } ### end positive if statment ----------

} ### end function