vignettes/classify_dirty.R

# a quick & dirty script for computing classifications (winter/summer, multiple classes) based on tilesDir content
# - it is assumed that the modelFile contains three models sorted from most to less precise one
# - for performance reasons (1000 times speedup) it's better to read whole rasters at once but it makes the script very memory hungry
#   so choose the number of threads carefully
modelFile = '/eodc/private/boku/ACube2/models/LC/2018y1_MODEL_LC.RData'
tilesDir = '/eodc/private/boku/ACube2/tiles'
tmpDir = '/eodc/private/boku/ACube2/tmp'
year = 2018
outBand = 'CLASS'
blockSize = 1000000
learnerNumThreads = 15
nCores = 3
cubeRpath = '/eodc/private/boku/software/cubeR'
skipExisting = TRUE

devtools::load_all(cubeRpath, quiet = TRUE)
library(dplyr, quietly = TRUE)
library(mlr3, quietly = TRUE)
library(mlr3learners, quietly = TRUE)
library(doParallel, quietly = TRUE)

e = new.env()
load(modelFile, envir = e)
models = get(ls(envir = e)[1], envir = e)
rm(e)
models = lapply(models, function(x) {
  x$learner$predict_type = 'prob'
  x$learner$param_set$values$num.threads = learnerNumThreads
  x
})

cols = lapply(models, function(x) {dplyr::tibble(var = x$cols)}) %>%
  dplyr::bind_rows() %>%
  dplyr::distinct() %>%
  dplyr::mutate(var  = gsub('-', '.', .data$var)) %>%
  dplyr::mutate(var2 = gsub('[.]', '-', .data$var)) %>%
  tidyr::separate(.data$var2, c('band', 'date'), sep = '_') %>%
  dplyr::mutate(band = sub('Q([0-9]{2})', 'q\\1', toupper(.data$band)))

registerDoParallel()
options(cores = nCores)
tiles = list.dirs(tilesDir, full.names = FALSE, recursive = FALSE)
tiles = tiles[nchar(tiles) == 6]
output = foreach(tls = tiles, .combine = bind_rows) %dopar% {
#output = foreach(tls = tiles, .combine = bind_rows) %do% {
  cat(tls, '\n', sep = '')

  outFiles = getTilePath(tilesDir, tls, paste0(year, 'y1'), paste0(outBand, c('', 'PROB')), 'tif')
  processed = FALSE
  if (!skipExisting | any(!file.exists(outFiles))) {
    try({
      tmpFiles = sub('^.*/', paste0(tmpDir, '/'), outFiles)
      unlink(c(tmpFiles, outFiles))

      cols = cols %>%
        dplyr::mutate(tileFile = getTilePath(tilesDir, tls, .data$date, .data$band, 'tif'))
      outputClass = raster::raster(raster::raster(cols$tileFile[1]))
      outputProb = raster::raster(outputClass)

      input = vector('list', nrow(cols))
      names(input) = c(cols$var)
      for (i in seq_along(cols$var)) {
        #cat(cols$var[i], '\n')
        input[[i]] = raster::getValues(raster::raster(cols$tileFile[i]))
      }
      input = dplyr::as_tibble(input) %>%
        dplyr::mutate(
          .dummy = rep_len(factor(models[[1]]$levels), n()),
          block = as.integer((row_number() - 1L) / blockSize)
        )
      cat(tls, ' input data read', sep = '')

      output = input %>%
        dplyr::group_by(block) %>%
        dplyr::do({
          x = .data
          cat(tls, ' block ', x$block[1], '\n', sep = '')
          result = dplyr::tibble(
            class = rep(NA_integer_, nrow(x)),
            prob = rep(NA_integer_, nrow(x))
          )
          mask1 = rowSums(is.na(x[, models[[1]]$cols])) == 0
          mask2 = rowSums(is.na(x[, models[[2]]$cols])) == 0 & !mask1
          mask3 = rowSums(is.na(x[, models[[3]]$cols])) == 0 & !mask1 & !mask2
          if (sum(mask1) > 0) {
            tmpVal1 = models[[1]]$learner$predict(mlr3::TaskClassif$new('tmp', x[mask1, ], '.dummy'))
            result$class[mask1] = as.integer(tmpVal1$response)
            result$prob[mask1] = as.integer(100 * apply(tmpVal1$prob, 1, max))
          }
          if (sum(mask2) > 0) {
            tmpVal2 = models[[2]]$learner$predict(mlr3::TaskClassif$new('tmp', x[mask2, ], '.dummy'))
            result$class[mask2] = as.integer(tmpVal2$response)
            result$prob[mask2] = as.integer(100 * apply(tmpVal2$prob, 1, max))
          }
          if (sum(mask3) > 0) {
            tmpVal3 = models[[3]]$learner$predict(mlr3::TaskClassif$new('tmp', x[mask3, ], '.dummy'))
            result$class[mask3] = as.integer(tmpVal3$response)
            result$prob[mask3] = as.integer(100 * apply(tmpVal3$prob, 1, max))
          }
          result
        })
      raster::values(outputClass) = output$class
      raster::values(outputProb) = output$prob

      raster::writeRaster(outputClass, tmpFiles[1], datatype = 'INT1U', overwrite = TRUE, options = c('COMPRESS=DEFLATE', 'TILED=YES', 'BLOCKXSIZE=512', 'BLOCKYSIZE=512'))
      raster::writeRaster(outputProb, tmpFiles[2], datatype = 'INT1U', overwrite = TRUE, options = c('COMPRESS=DEFLATE', 'TILED=YES', 'BLOCKXSIZE=512', 'BLOCKYSIZE=512'))
      file.rename(tmpFiles, outFiles)
      processed = TRUE
      cat(tls, ' finished\n', sep = '')
    })
  }
  dplyr::tibble(tileFile = outFiles, processed = processed)
}
logProcessingResults(output, t0)
IVFL-BOKU/landsupport documentation built on Sept. 13, 2022, 6:23 a.m.