#' Predict Landscape
#'
#' Takes a mlr model object and associated covariate rasters to generate a thematic map.
#' In order to process the landscape level prediction the input co-variated are tiled
#' and then mosaic'd together at the end.
#'
#' **Action** _is there a bug in tilemaker or with stars causing a xy offset error.
#' Solution is currently a hack. See `load tile area`.
#'
#' @param model Location of a `mlr` model object (i.e. path to it.)
#' @param cov A list of raster files. These will be loaded as a `stars` object
#' @param tilesize Specify the number of pixels in the `x` and `y` directions for the tiles to be generated. If your computer is mememory limited use a smaller tile (e.g. 500).
#' @param outDir directory for the output file.
#' @keywords predict, landscape
#' @export
#' @examples
#' ### Testing
#` cov <- list.files("e:/tmpGIS/PEM_cvs/", pattern = "*.tif",full.names = TRUE)
#` cov <- cov[-(grep(cov, pattern = "xml"))]
#'
# predict_landscape(model = "e:/tmp/model_gen_test/model.rds",
#' cov = cov,
#' tilesize = 1000,
#' outDir = "e:/tmp/predict_landscape")
predict_landscape <- function(model, cov,tilesize = 500,
outDir = "./predicted") {
## libraries -----
library(dplyr)
library(mlr)
## Adjust names
## This will be used in the loop to rename stars object
n <- basename(cov)
n <- gsub(".tif", "", n)
## Load the model -----
mod <- readRDS(model)
## Error handle -- model vs. cov -----------
## If names in model features are found in the cov list continue.
## ELSE exit with message
if (length(setdiff(mod$features, n)) != 0) { ## tests if all model features are found in the cov list
## On model vs. cov error ---------------
print("Name mis-match between the model features and the names of the rasters.")
print("The following raster co-variates are not found in the model features list:")
print(setdiff(mod$features, n))
print("the following raster ")
} else {
## drop rasters if not included in model
if (length(setdiff(n, mod$features) > 0 )) {
print("The following rasters are removed as they were not included in the model:")
print(setdiff(n, mod$features))
## drop layers not used
drop_layers <- paste0(setdiff(n, mod$features), ".tif")
cov <- subset(cov, !(basename(cov) %in% drop_layers) )
n <- basename(cov) ; n <- gsub(".tif", "", n) ## fix n
}
## create output dir -----------
ifelse(!dir.exists(outDir), dir.create(outDir, recursive = TRUE), print("model folder already exists"))
## alternate -- outside of pemgeneratr
# source(here::here('_functions', 'tile_index.R'))
## create tiles ---------------
tiles <- pemgeneratr::tile_index(cov[1], tilesize)
## alternate -- outside of pemgeneratr
# source(here::here('_functions', 'tile_index.R'))
# tiles <- tile_index(cov[1], tilesize)
## begin loop through tiles -----
## set up progress messaging
a <- 0 ## running total of area complete
ta <- sum(as.numeric(sf::st_area(tiles)))
for (i in 1:nrow(tiles)) { ## testing first 2 tiles ##nrow(tiles)) {
t <- tiles[i,] ## get tile
print(paste("working on ", i, "of", nrow(tiles)))
print("...")
## * load tile area---------
print("... loading new data (from rasters)...")
r <- stars::read_stars(cov,
RasterIO = list(nXOff = t$offset.x[1]+1, ## hack -- stars and tile_maker issue??
nYOff = t$offset.y[1]+1,
nXSize = t$region.dim.x[1],
nYSize = t$region.dim.y[1]))
### Create an alternate template ------
### This forces a numeric template
tdim <- dim(r[1])
ext <- sf::st_bbox(t)
emty <- raster::raster(nrow=tdim[1], ncol = tdim[2],
xmn=ext[1], xmx=ext[3], ymn=ext[2], ymx=ext[4],
vals=1.1)
template <- stars::st_as_stars(emty)
sf::st_crs(template) <- sf::st_crs(t)
### ----------------------------------
## * update names ---------
names(r) <- n
## * convert tile to dataframe ---------
rsf <- sf::st_as_sf(r, as_points = TRUE)
# rsf <- na.omit(rsf) ## na.omit caused issues
## * Test if tile is empty -------------
na_table <- as.data.frame(sapply(rsf, function(x) all(is.na(x))))
## * determine na counts -- this will be used to restore NA values if tile is run.
na_table$count <- as.data.frame(sapply(rsf, function(x) sum(is.na(x))))[,1]
## If any attribute is empty skip the tile
if(any(na_table[,1] == TRUE)) {
print("some variables with all NA values, skipping tile")
} else{
## * Test if tile is empty -- original version
# naTest <- as.data.frame(rsf[,1])
# naTest <- naTest[,1]
# if (sum(!is.na(naTest)) == 0) { ## if all the values in the tile are NA skip to next tile.
# print("... Empty tile moving to next...")
# } else {
## * predict ---------
## * * Managing NA values ----------------
## When some of the values are NA change them to zero
## * Identify the attribute with the highest number of NA values.
na_max <- na_table[na_table$count == max(na_table$count),]
na_max <- row.names(na_max[1,]) ## if multiple attributes -- take the first
## make a copy of the attribute with the highest number of na values
rsf_bk <- rsf[,na_max] ## -- this will be used to restore NA values
rsf[is.na(rsf)] <- 0 ## convert NA to zero as the predict function cannot handle NA
print("... modelling outcomes (predicting)...")
pred <- predict(mod, newdata = rsf)
## Restore NA values
pred_dat <- pred$data ## predicted values extracted
pred_dat[is.na(rsf_bk[,1]), 1:length(pred_dat)] <- NA ## if originally NA restore NA
pred$data <- pred_dat ## values restored to pred -- allows for cbind without issue.
## * geo-link predicted values ---------
r_out <- cbind(rsf, pred)
## layers to keep (i.e. newly predicted layers)
keep <- setdiff(names(r_out),
names(r))
keep <- keep[-length(keep)] ## drops the last entry (geometry field, not a name)
r_out <- r_out %>% dplyr::select(keep)
## Save the names of the model response -----
## The levels are in the multiclass 'response'
wkey <- 0
if (wkey == 0) {
respNames <- levels(r_out$response) ## this becomes the dictionary to describe the raster values
write.csv(respNames, paste(outDir, "response_names.csv",
sep = "/"),
row.names = TRUE)
wkey <- 1 ## Change this value so this small is statement does not execute again.
}
## change the text values to numeric values.
r_out$response <- as.numeric(r_out$response)
## Set up subdirectories for rastertile outputs
print("... exporting raster tiles...")
for (k in keep) {
dir.create(paste(outDir, k, sep = "/"))
}
## * save tile (each pred item saved) ---------
for (j in 1:length(keep)) {
# j <- 2 ## testing
out <- stars::st_rasterize(r_out[j],
# template = r[1])
template = template) ## fix template bug
stars::write_stars(out,
paste0(outDir,"/",
keep[j], "/", #sub-directoy
keep[j], "_", i, ".tif")) #tile name
}
## * report progress -----
a <- a + as.numeric(sf::st_area(t))
print(paste(round(a/ta*100,1), "% complete"))
print("") ## blank line
} ## end if statement -- for when tile is empty
} ## END LOOP -------------
print("All predicted tiles generated")
## Mosaic Tiles ---------------
print("Generating raster mosaics")
for (k in keep) {
# get list of tiles
r_tiles <- list.files(paste(outDir, k, sep = "/"),
pattern = ".tif",
full.names = TRUE)
# remove pot. xml files
# this was causing script to fail -- returned empty vector
# r_tiles <- r_tiles[-(grep(r_tiles, pattern = "xml"))] ## drop any associated xml files
## mosaic
gdalUtils::mosaic_rasters(gdalfile = r_tiles, ## list of rasters to mosaic
dst_dataset = paste0(outDir, "/", k, ".tif"), #output: dir and filename
output_Raster = TRUE) ## saves the raster (not just a virtual raster)
}
} ### end positive if statment ----------
} ### end function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.