disaggregation: Disaggregation Modelling

Documented in predict.disag_model predict_model predict_uncertainty

#' Predict mean and uncertainty from the disaggregation model result
#'
#' \emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and
#' predicts mean and uncertainty maps.
#'
#' To predict over a different spatial extent to that used in the model,
#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{new_data}.
#' If this is not given predictions are made over the data used in the fit.
#'
#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction.
#'
#' For the uncertainty calculations, the number of the realisations and the size of the credible interval to be calculated
#' are given by the arguments \emph{N} and \emph{CI} respectively.
#'
#' @param object disag_model object returned by disag_model function.
#' @param new_data If NULL, predictions are made using the data in model_output.
#'   If this is a raster stack or brick, predictions will be made over this data.
#' @param predict_iid logical. If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.
#' @param N Number of realisations. Default: 100.
#' @param CI Credible interval to be calculated from the realisations. Default: 0.95.
#' @param ... Further arguments passed to or from other methods.
#' @param newdata Deprecated.
#'
#' @return An object of class \emph{disag_prediction} which consists of a list of two objects:
#'  \item{mean_prediction }{List of:
#'   \itemize{
#'    \item \emph{prediction} Raster of mean predictions based.
#'    \item \emph{field} Raster of the field component of the linear predictor.
#'    \item \emph{iid} Raster of the iid component of the linear predictor.
#'    \item \emph{covariates} Raster of the covariate component of the linear predictor.
#'   }}
#'  \item{uncertainty_prediction: }{List of:
#'   \itemize{
#'    \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}.
#'    \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}.
#'   }}
#'
#'
#' @method predict disag_model
#'
#' @examples
#' \dontrun{
#' predict(fit_result)
#' }
#'
#' @export


predict.disag_model <- function(object, new_data = NULL, predict_iid = FALSE, N = 100, CI = 0.95, newdata = NULL, ...) {

  if (!is.null(newdata) && missing(new_data)) {
    new_data <- newdata
    message("newdata is deprecated and will be removed in a future version - please use new_data instead")
  }

  mean_prediction <- predict_model(object, new_data = new_data, predict_iid)

  uncertainty_prediction <- predict_uncertainty(object, new_data = new_data, predict_iid, N, CI)

  prediction <- list(mean_prediction = mean_prediction,
                     uncertainty_prediction = uncertainty_prediction)

  class(prediction) <- c('disag_prediction', 'list')

  return(prediction)
}

#' Function to predict mean from the model result
#'
#' \emph{predict_model} function takes a \emph{disag_model} object created by
#' \emph{disaggregation::disag_model} and predicts mean maps.
#'
#' Function returns rasters of the mean predictions as well as the  covariate and field contributions
#' to the linear predictor.
#'
#' To predict over a different spatial extent to that used in the model,
#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{new_data}.
#' If this is not given predictions are made over the data used in the fit.
#'
#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction.
#'
#' @param model_output disag_model object returned by disag_model function
#' @param new_data If NULL, predictions are made using the data in model_output.
#'   If this is a SpatRaster, predictions will be made over this data. Default NULL.
#' @param predict_iid If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.
#' @param newdata Deprecated.
#'
#' @return The mean prediction, which is a list of:
#'   \itemize{
#'    \item \emph{prediction} Raster of mean predictions based.
#'    \item \emph{field} Raster of the field component of the linear predictor.
#'    \item \emph{iid} Raster of the iid component of the linear predictor.
#'    \item \emph{covariates} Raster of the covariate component of the linear predictor.
#'   }
#'
#' @name predict_model
#'
#' @examples
#' \dontrun{
#' predict_model(result)
#' }
#'
#' @export

predict_model <- function(model_output, new_data = NULL, predict_iid = FALSE, newdata = NULL) {

  if (!is.null(newdata) && missing(new_data)) {
    new_data <- newdata
    message("newdata is deprecated and will be removed in a future version - please use new_data instead")
  }

  objects_for_prediction <- setup_objects(model_output, new_data = new_data, predict_iid)

  pars <- model_output$obj$env$last.par.best
  pars <- split(pars, names(pars))

  prediction <- predict_single_raster(pars,
                                      objects_for_prediction,
                                      link_function = model_output$model_setup$link)

  return(prediction)

}

#' Function to predict uncertainty from the model result
#'
#' \emph{predict_uncertainty} function takes a \emph{disag_model} object created by
#' \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps.
#'
#' Function returns a SpatRaster of the realisations as well as the upper and lower credible interval rasters.
#'
#' To predict over a different spatial extent to that used in the model,
#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{new_data}.
#' If this is not given predictions are made over the data used in the fit.
#'
#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction.
#'
#' The number of the realisations and the size of the credible interval to be calculated.
#' are given by the arguments \emph{N} and \emph{CI} respectively.
#'
#' @param model_output disag_model object returned by disag_model function.
#' @param new_data If NULL, predictions are made using the data in model_output.
#'   If this is a raster stack or brick, predictions will be made over this data. Default NULL.
#' @param predict_iid If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.
#' @param N number of realisations. Default: 100.
#' @param CI credible interval. Default: 0.95.
#' @param newdata Deprecated.
#'
#' @return The uncertainty prediction, which is a list of:
#'   \itemize{
#'    \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}.
#'    \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}.
#'   }
#'
#' @name predict_uncertainty
#'
#' @examples
#' \dontrun{
#' predict_uncertainty(result)
#' }
#'
#' @export

predict_uncertainty <- function(model_output, new_data = NULL, predict_iid = FALSE, N = 100, CI = 0.95, newdata = NULL) {

  if (!is.null(newdata) && missing(new_data)) {
    new_data <- newdata
    message("newdata is deprecated and will be removed in a future version - please use new_data instead")
  }

  objects_for_prediction <- setup_objects(model_output, new_data = new_data, predict_iid)

  parameters <- model_output$obj$env$last.par.best

  # If we have either of the random effects, we have the jointPrecision matrix.
  #   but if we have neither, we don't get that matrix and should use the
  #   covariance matrix instead

  #CH <- Matrix::Cholesky(as(S, 'dsCMatrix'))
  #x <- rmvn.sparse(10, mu, CH, prec=FALSE) ## 10 random draws of x
  #d <- dmvn.sparse(x, mu, CH, prec=FALSE) ## densities of the 10 draws


  if(model_output$model_setup$iid | model_output$model_setup$field){
    ch <- Matrix::Cholesky(model_output$sd_out$jointPrecision)
    par_draws <- sparseMVN::rmvn.sparse(N, parameters, ch, prec = TRUE)
  } else {
    covariance_matrix <- Matrix::Matrix(model_output$sd_out$cov.fixed, sparse = TRUE)
    ch <- Matrix::Cholesky(covariance_matrix)
    par_draws <- sparseMVN::rmvn.sparse(N, parameters, ch, prec = FALSE)
  }

  predictions <- list()

  for(r in seq_len(N)) {

    p <- split(par_draws[r, ], names(parameters))

    prediction_result <- predict_single_raster(p,
                                               objects_for_prediction,
                                               link_function = model_output$model_setup$link)

    predictions[[r]] <- prediction_result$prediction
  }

  predictions <- terra::rast(predictions)

  probs <- c((1 - CI) / 2, 1 - (1 - CI) / 2)
  predictions_ci <- terra::app(predictions, function(x) stats::quantile(x, probs = probs, na.rm = TRUE))


  names(predictions_ci) <- c('lower CI', 'upper CI')

  uncertainty <- list(realisations = predictions,
                      predictions_ci = predictions_ci)

  return(uncertainty)
}

# Get coordinates from raster
#
# @param data disag_data object
#
# @return A matrix of the coordinates of the raster
#
# @name getCoords

getCoords <- function(data) {

  points_raster <- data$covariate_rasters[[1]]
  points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999
  raster_pts <- terra::as.points(points_raster)
  coords <- terra::crds(raster_pts)

    return(coords)
}

# Helper to check and sort out new raster data.
check_new_data <- function(new_data, model_output){
  if(is.null(new_data)) return(NULL)
  if(!is.null(new_data)){
    if(!(inherits(new_data, c('SpatRaster')))){
      stop('new_data should be NULL or a SpatRaster')
    }
    if(!all(names(model_output$data$covariate_rasters) %in% names(new_data))){
      stop('All covariates used to fit the model must be in new_data')
    }
    # Take just the covariates we need and in the right order
    new_data <- new_data[[names(model_output$data$covariate_rasters)]]
  }
  return(new_data)
}

# Function to setup covariates, field and iid objects for prediction
setup_objects <- function(model_output, new_data = NULL, predict_iid = FALSE) {

  new_data <- check_new_data(new_data, model_output)

  # Pull out original data
  data <- model_output$data

  # Decide which covariates to predict over
  if(is.null(new_data)){
    covariates <- data$covariate_rasters
  } else {
    covariates <- new_data
  }

  data$covariate_rasters <- covariates

  # If there is no iid effect in the model, it cannot be predicted
  if(!model_output$model_setup$iid) {
    predict_iid <- FALSE
  }

  if(model_output$model_setup$field) {
    if(is.null(new_data)) {
      coords <- data$coords_for_prediction
    } else {
      coords <- getCoords(data)
    }
    Amatrix <- fmesher::fm_evaluator(data$mesh, loc = as.matrix(coords))$proj$A
    field_objects <- list(coords = coords, Amatrix = Amatrix)
  } else {
    field_objects <- NULL
  }

  if(predict_iid) {
    tmp_shp <- model_output$data$polygon_shapefile
    #needed to avoid errors in testing
    if (!("area_id" %in% names(model_output$data$polygon_shapefile))){
      tmp_shp <- dplyr::bind_cols(tmp_shp,
                                  area_id =
                                    factor(model_output$data$polygon_data$area_id))
    }
    iid_objects <- list(shapefile = tmp_shp, template = model_output$data$covariate_rasters)
  } else {
    iid_objects <- NULL
  }
  return(list(covariates = covariates,
              field_objects = field_objects,
              iid_objects = iid_objects))
}

# Function to take model parameters and predict a single raster
predict_single_raster <- function(model_parameters, objects, link_function) {

  # Create linear predictor
  covs_by_betas <- list()
  for(i in seq_len(terra::nlyr(objects$covariates))){
    covs_by_betas[[i]] <- model_parameters$slope[i] * objects$covariates[[i]]
  }

  cov_by_betas <- terra::rast(covs_by_betas)
  if(terra::nlyr(cov_by_betas) > 1){
    sum_cov_by_betas <- sum(cov_by_betas)
  } else {
    # With only 1 covariate, there's nothing to sum. Do this to avoid warnings.
    sum_cov_by_betas <- cov_by_betas
  }
  cov_contribution <- sum_cov_by_betas + model_parameters$intercept

  linear_pred <- cov_contribution

  if(!is.null(objects$field_objects)){
    # Extract field values
    field <- (objects$field_objects$Amatrix %*% model_parameters$nodemean)[, 1]
    field_ras <- terra::rast(cbind(objects$field_objects$coords, field),
                             type = 'xyz',
                             crs = terra::crs(linear_pred))
    linear_pred <- linear_pred + field_ras
  } else {
    field_ras <- NULL
  }

    if(!is.null(objects$iid_objects)) {

    objects$iid_objects$shapefile$iid <- model_parameters$iideffect

    iid_ras <- terra::rasterize(objects$iid_objects$shapefile, objects$iid_objects$template, field = 'iid')
    iideffect_sd <- 1/sqrt(exp(model_parameters$iideffect_log_tau))

    na_pixels <- which(is.na(terra::values(iid_ras, dataframe = FALSE, mat = FALSE)))
    if (length(na_pixels) != 0){
      na_iid_values <- stats::rnorm(length(na_pixels), 0, iideffect_sd)
      iid_ras[na_pixels] <- na_iid_values
    }

    if(terra::ext(iid_ras) != terra::ext(linear_pred)) {
      # Extent of prediction space is different to the original model. Keep any overlapping iid values but predict to the new extent
      raster_new_extent <- linear_pred
      raster_new_extent[1:terra::ncell(raster_new_extent)] <- NA
      #iid_ras <- terra::merge(iid_ras, raster_new_extent, ext = terra::ext(raster_new_extent))
      # NOt sure why we no longer need the ext argument
      # SS - added a crop which I think does the same thing
      iid_ras <- terra::merge(iid_ras, raster_new_extent)
      iid_ras <- terra::crop(iid_ras, raster_new_extent)
      missing_pixels <- which(is.na(terra::values(iid_ras, dataframe = FALSE, mat = FALSE)))
      missing_iid_values <- stats::rnorm(length(missing_pixels), 0, iideffect_sd)
      iid_ras[missing_pixels] <- missing_iid_values
    }
    linear_pred <- linear_pred + iid_ras
  } else {
    iid_ras <- NULL
  }

    if(link_function == 'logit') {
    prediction_ras <- 1 / (1 + exp(-1 * linear_pred))
  } else if(link_function == 'log') {
    prediction_ras <- exp(linear_pred)
  } else if(link_function == 'identity') {
    prediction_ras <- linear_pred
  }

    predictions <- list(prediction = prediction_ras,
                      field = field_ras,
                      iid = iid_ras,
                      covariates = cov_contribution)

  return(predictions)
}