stAirPol: Spatio-temporal Modelling of Air Pollution Data

Documented in fit_sp_model

#' fit_sp_model
#'
#' Fitting a spatio-temporal Bayesian model. This function is a wrapper for
#' \link{spT.Gibbs}. For details on the fitting procedure see \link{spT.Gibbs}.
#' This function makes it very esay to use \link{spT.Gibbs}.
#'
#' @param data Modelling data.frame which contains information about the
#' covariables and the target variable
#' @param knots_method Only used if the model choice is GPP" or "truncatedGPP",
#' knots_method specifies the distribution method of the knots. Default is a
#' random distribution with respect to the location of the sensor. With
#' \code{knots_method = 'grid'} the knots will be arranged as a grid.
#' @param training_set an object generated by \link{get_test_and_training_set},
#' if those an object is delivered, the data argument will be reduced to the
#' in training_set specified training sensors.
#' @param model the spatio-temporal models to be fitted, current choices are:
#' "GP", "truncatedGP", "AR", "GPP", and "truncatedGPP",
#' with the first one as the default.
#' @param knots matrix of knots locations
#' @param knots_plot Should be knots be plotted, Boolean, with FALSE as default
#' @param knots_seed Random seed for the knots sampling
#' @param knots_count How much knots should be used?
#' @param ... additional arguments for \link{spT.Gibbs}
#'
#' @seealso \link{spT.Gibbs}, \link{spT.initials}, \link{spT.geodist},
#' \link{fit_subintervalls}, \link{predict.stAirPol.model}, \link{predict_split}
#'
#' @return An object of the class spT, see \link{spT.Gibbs} for more details
#'
#' @export
#' @import spTimer
#' @importFrom MASS kde2d
#' @examples
#' data("mini_dataset")
#' #' remove outliers
#' mini_dataset <- clean_model_data(mini_dataset)
#' #' simple formula
#' formula <-  value ~ rainhist + windhist + trafficvol
#'
#' #' fit three kind of models and compair the PMCC
#' model.gp <- fit_sp_model(data = mini_dataset, formula = formula, model = 'GP')
#' model.ar <- fit_sp_model(data = mini_dataset, formula = formula, model = 'AR')
#' model.gpp <- fit_sp_model(data = mini_dataset, formula = formula, model = 'GPP')
#' print(model.gp$PMCC)
#' print(model.ar$PMCC)
#' print(model.gpp$PMCC)
#'
#' #' different options for GPP model
#' model.gpp <- fit_sp_model(data = mini_dataset, formula = formula, model = 'GPP',
#'                           knots_method = 'random', knots_plot = TRUE,
#'                           knots_count = 20, knots_seed = 2202)
#'
#' #' prioris
#' prio <- spT.priors(model = "GPP",
#'                    inv.var.prior = Gamm(0.5, 1),
#'                    beta.prior = Norm(0, 10^2)
#' )
#' model.gpp <- fit_sp_model(data = mini_dataset, formula = formula, model = 'GPP',
#'                           priors = prio)
#' #' see ?spTimer::spT.Gibbs for more options like scale.transform, spatial.decay
#'
#'
#' plot(model.gpp) #' trace plot of the MCMC-Iterations
#' summary(model.gpp) #' summary
#' #' perform the modelfit only on a trainingsset with 75% of the values
#' training_set <- get_test_and_training_set(mini_dataset, sampel_size = 0.75,
#'                                           random.seed = 220292)
#' model.gp <- fit_sp_model(data = mini_dataset, formula = formula,
#'                             model = 'GP', training_set = training_set)
fit_sp_model <- function(data, model = 'GPP',
                         knots_method = 'grid',
                         training_set = NULL,
                         knots_plot = FALSE,
                         knots = NULL,
                         knots_seed = NULL,
                         knots_count = 5, ...) {

  if (!is.null(training_set)) {
    data <- data[sensor_id %in% training_set$train]
  }
  data <- data[order(sensor_id)][order(timestamp)]
  time_data <- spTimer::spT.time(segments = 1,
                        t.series = length(unique(data$timestamp)))
  data <- data.frame(data)
  coords <- unique(data[, c('lon', 'lat')])
  if (model %in% c('GPP', 'truncatedGPP')) {
    if (is.null(knots)) {
      if (knots_method == 'grid') {
        if (knots_count^2 > nrow(coords)) {
          knots_count <- floor(sqrt(nrow(coords)))
        }
        if (length(knots_count) == 1) {
          knots <- spTimer::spT.grid.coords(Longitude = c(max(coords[,1]),
                                                        min(coords[,1])),
                                            Latitude = c(max(coords[,2]),
                                                         min(coords[,2])),
                                            by = c(knots_count,knots_count))
        } else if (length(knots_count) == 2) {
          knots <- spTimer::spT.grid.coords(Longitude = c(max(coords[,1]),
                                                          min(coords[,1])),
                                            Latitude = c(max(coords[,2]),
                                                         min(coords[,2])),
                                            by = c(knots_count[1],
                                                   knots_count[2]))
        }
      } else {
        if (!is.null(knots_seed)) set.seed(knots_seed)
        f1 <- MASS::kde2d(coords$lon, coords$lat, n = 100)
        Z <- melt(f1$z)
        row <- sample(1:nrow(Z),
                      size = min(knots_count, nrow(coords) - 1),
                      prob = Z$value)
        knots <- list()
        for (i in row) {
          knots[[i]] <- data.frame(Longitude = f1$x[Z[i, ]$Var1],
                                   Latitude = f1$y[Z[i, ]$Var2])
        }
        knots <- as.matrix(rbindlist(knots))
      }
    }
    if (knots_plot) {
      knots.df <- as.data.frame(knots)
      names(knots.df) <- c('lon', 'lat')
      knots.df$type <- 'knots'
      coords.plot <- coords
      coords.plot$type <- 'sensors'
      p <- rbind(coords.plot, knots.df)
      g1 <- ggplot(data = p) +
        geom_point(aes(x = lon, y = lat, col = type, shape = type)) +
        theme_classic()
      print(g1)
    }
    post.gp <-
      spTimer::spT.Gibbs(
        data = data,
        time.data = time_data,
        coords = coords,
        model = model,
        knots.coords = knots,
        ...)
  } else {
    post.gp <-
      spTimer::spT.Gibbs(
        data = data,
        time.data = time_data,
        coords = coords,
        model = model,
        ...)
  }
  class(post.gp) <- c('stAirPol.model', class(post.gp))
  post.gp
  }