FRK: Fixed Rank Kriging

#' @rdname loglik
#' @export
setMethod("loglik", signature="SRE", function(object) {
  .Deprecated("logLik")
  return(logLik(object))
  
})

setMethod("binned_data", signature = "SRE", function(object) {
  # This will contain the binned data for each BAU, and will
  # feature many missing values in the case of point-referenced data.
  Cmat_dgT <- .as(object@Cmat, "dgTMatrix")
  obs_BAUs <- Cmat_dgT@j + 1 # BAUs associated with each observation
  data_idx <- Cmat_dgT@i + 1 # data index
  Z <- rep(NA, length(object@BAUs))
  Z[obs_BAUs] <- object@Z[data_idx] # TODO delete the following if the new method works without errors: Z[obs_BAUs] <- object@Z[, 1][data_idx]
  return(Z)
})

#' @rdname SRE
#' @export
setMethod("logLik", signature="SRE", function(object) {
  # This is structured this way so that extra models for fs-variation
  # can be implemented later
  if (length(object@log_likelihood) != 0) {
    return(object@log_likelihood)
  } else if (object@fs_model == "ind") {
    return(.loglik.ind(object))
  } else {
    stop("Currently only independent fine-scale model is implemented")
  }
})

#' @rdname SRE
#' @export
setMethod("nobs", signature="SRE", function(object, ...) length(object@Z))

#' @rdname observed_BAUs
#' @export
setMethod("observed_BAUs", signature(object = "SRE"), function (object) {
  return(object@obsidx)
})


## this function is defined so that we can call it in SRE(), before an object of 
## class SRE is defined
## Note that Cmat maps BAUs to the observations. The dimension of object@Cmat is
## (number of observations) * (number of BAUs).
.observed_BAUs_from_Cmat <- function(Cmat) { 
  return(unique(.as(Cmat, "dgTMatrix")@j) + 1)
}

#' @rdname observed_BAUs
#' @export
setMethod("unobserved_BAUs",signature(object = "SRE"), function (object) {
  ## ncol(object@Cmat) is the total number of BAUs:
  unobsidx <- (1:ncol(object@Cmat))[-object@obsidx]
  return(unobsidx)
})


## Print/Show SRE
print.SRE <- function(x,...) {
  print_list <- list(
    formula =  deparse(x@f),
    ndatasets = length(x@data),
    nbasis = x@basis@n,
    basis_class = class(x@basis)[1],
    nBAUs = length(x@BAUs),
    nobs = length(x@Z),
    mean_obsvar = mean(x@Ve@x), 
    fs_var = x@sigma2fshat,
    dimC = deparse(dim(x@Cmat)),
    dimS = deparse(dim(x@S)),
    ncovars = ncol(x@X), 
    response = x@response, 
    link = x@link, 
    method = x@method)
  
  cat("Formula:",print_list$formula,"\n")
  cat("Assumed response distribution:",print_list$response,"\n")
  cat("Specified link function:",print_list$link,"\n")
  cat("Method of model fitting:",print_list$method,"\n")
  cat("Number of datasets:",print_list$ndatasets,"\n")
  cat("Number of basis functions:",print_list$nbasis,"\n")
  cat("Class of basis functions:",print_list$basis_class,"\n")
  cat("Number of BAUs [extract using object@BAUs]: ",print_list$nBAUs,"\n")
  cat("Number of observations [extract using object@Z]: ",print_list$nobs,"\n")
  cat("Mean obs. variance at BAU level [extract using object@Ve]:",print_list$mean_obsvar,"\n")
  cat("Fine-scale variance proportionality constant [extract using object@sigma2fshat]:",print_list$fs_var,"\n")
  cat("Dimensions of C in Z = C*Y + e [extract using object@Cmat]: ",print_list$dimC,"\n")
  cat("Dimensions of S in Y = X*alpha + S*eta + delta [extract using object@S]: ",print_list$dimS,"\n")
  cat("Number of covariates:",print_list$ncovars,"\n\n")
}
setMethod("show",signature(object="SRE"),function(object) print(object))

## Summary SRE
summary.SRE <- function(object,...) {
  summ_list <- list(
    summ_Kdiag = summary(diag(object@Khat)),
    summ_mueta = summary(object@mu_eta[,1]),
    summ_vareta = summary(diag(object@S_eta)),
    summ_muxi = summary(object@mu_xi[, 1]),
    finescale_var = deparse(as.vector(object@sigma2fshat)),
    reg_coeff = deparse(as.vector(object@alphahat)) 
  )
  class(summ_list) <- "summary.SRE"
  summ_list
}
setMethod("summary",signature(object="SRE"),summary.SRE)

## Set method for retrieving info_fit
#' @rdname info_fit
#' @export
setMethod("info_fit", signature(object = "SRE"),
          function(object) {object@info_fit})


# Retrieve coefficients of SRE model
#' @rdname SRE
#' @export 
setMethod("coef",signature(object = "SRE"),function(object,...) {
  l <- list(...)
  random_effects <- !is.null(l$random_effects) && l$random_effects == TRUE
  
  ## Variable names
  f   <- object@f
  tms <- labels(terms(f))
  varnames  <- all.vars(f)[-1]
  
  ## Fixed-effects 
  gamma_idx <- grepl("\\|", tms) # | is special character treated as logical "OR": the back strokes escape this sequence. 
  alpha_idx <- !gamma_idx
  alpha_nms <- varnames[alpha_idx]
  ## Determine if there is an intercept (this doesn't get included in the variable names above)
  if (attr(terms(f), "intercept")) alpha_nms <- c("Intercept", alpha_nms)
  coeff <- as.numeric(object@alphahat)
  names(coeff) <- alpha_nms
  
  if (random_effects) {
    # NB we only return estimates of the observed random effects: the unobserved random effects follow a mean-zero Gaussian distribution, so their "estimate" is zero
    # gamma_group_nms <- varnames[gamma_idx] # TODO use the random-effect group names to provide more informative names (e.g., fct:A, fct:B)
    gamma_nms <- lapply(object@G, colnames)
    gamma_nms <- do.call(c, gamma_nms)
    gamma <- as.numeric(object@mu_gamma)
    names(gamma) <- gamma_nms 
    
    ## Combine fixed and random effects
    coeff <- c(coeff, gamma)
  } 

  return(coeff)
})


# Retrieve uncertainty quantification on the coefficients of SRE model
#' @rdname SRE
#' @export
setMethod("coef_uncertainty",signature(object = "SRE"),function(object, percentiles = c(5, 95),  nsim = 400, random_effects = FALSE) {
  
  if(object@simple_kriging_fixed) stop("Uncertainty quantification on the fixed effects is only possible if simple_kriging_fixed is set to FALSE in SRE.fit()")
  
  fixed_effects_only <- !random_effects
  
  ## Compute the Cholesky factor of the permuted precision matrix.
  Q_L <- sparseinv::cholPermute(Q = object@Q_posterior)
  
  ## Generate sample of regression coefficients (most of these arguments can be 
  ## NULL because the function exits before they are used). 
  x <- .simulate(
    fixed_effects_only = fixed_effects_only, 
    fixed_effects_and_gamma = random_effects, 
    object = object, nsim = nsim, X = NULL, Q_L = Q_L, kriging = "universal", 
    type = NULL, obs_fs = NULL, k = NULL,predict_BAUs = NULL, CP = NULL, newdata = NULL
  )
  
  
  ## If random_effects = FALSE, x is just the matrix of samples of the fixed effects alpha.
  ## Otherwise, x is a list containing alpha and the matrix of samples of the random effects gamma. 
  if (random_effects) {
    alpha <- x[[1]]
    gamma <- x[[2]]
  } else {
    alpha <- x 
  }
  
  ## Variable names
  f   <- object@f
  tms <- labels(terms(f))
  varnames  <- all.vars(f)[-1]
  
  ## Fixed-effects 
  gamma_idx <- grepl("\\|", tms) # | is special character treated as logical "OR": the back strokes escape this sequence. 
  alpha_idx <- !gamma_idx
  alpha_nms <- varnames[alpha_idx]
  ## Determine if there is an intercept (this doesn't get included in the variable names above)
  if (attr(terms(f), "intercept")) alpha_nms <- c("Intercept", alpha_nms)
  
  alpha <- apply(alpha, 1, quantile, percentiles / 100)
  if (is.matrix(alpha)) {
    colnames(alpha) <- alpha_nms  
  } else {
    names(alpha) <- alpha_nms
  }
  
  ## Random-effects
  if (random_effects) {
    # gamma_group_nms <- varnames[gamma_idx] # TODO use the random-effect group names to provide more informative names (e.g., fct:A, fct:B)
    gamma_nms <- lapply(object@G0, colnames)
    gamma_nms <- do.call(c, gamma_nms)
    gamma <- apply(gamma, 1, quantile, percentiles / 100)
    colnames(gamma) <- gamma_nms 
    
    ## Combine fixed and random effects
    alpha <- cbind(alpha, gamma)
  }

  return(alpha)
})

## Print summary of SRE
print.summary.SRE <- function(x, ...) {
  cat("Summary of Var(eta) [extract using object@Khat]: \n")
  print(x$summ_Kdiag)
  cat("\n")
  cat("Summary of E(eta | Z) [extract using object@mu_eta]: \n")
  print(x$summ_mueta)
  cat("\n")
  cat("Summary of Var(eta | Z) [extract using object@S_eta]: \n")
  print(x$summ_vareta)
  cat("\n")    
  cat("Summary of E(xi | Z) [extract using object@mu_xi]: \n")
  print(x$summ_muxi)
  cat("\n")    
  cat("Fine-scale variance estimate [extract using object@sigma2fshat]:", x$finescale_var, "\n")
  cat("Regression coefficients [extract using object@alpha]:",x$reg_coeff,"\n")
  cat("For object properties use show().\n")
  invisible(x)
}



## The function below is used to facilitate the computation of multiple marginal variances
## by splitting up the problem into batches (prediction is an embarassingly parallel procedure)
.batch_compute_var <- function(S0,Cov,fs_in_process = TRUE) {
  
  ## Don't consider more than 1e4 elements at a time
  batch_size <- 1e4
  
  ## Create batching indices by simply dividing the n rows into batches of size 10000
  batching=cut(1:nrow(S0),
               breaks = seq(0,nrow(S0)+batch_size,
                            by=batch_size),labels=F)
  r <- ncol(S0) # number of columns
  
  ## At first this was parallelised, but the memory demand was becoming too large in many instances
  ## So for now parallelism is disabled. The following line can be uncommented if we wish to
  ## re-enable parallelism in the predictions
  
  #if(opts_FRK$get("parallel") > 1 & batch_size < nrow(X)) {
  if(0) {
    ## Export variables to the cluster
    clusterExport(opts_FRK$get("cl"),
                  c("batching","S0","Cov"),envir=environment())
    
    ## Compute the variances over max(10000) BAUs per batch
    var_list <- parLapply(opts_FRK$get("cl"),1:max(unique(batching)),
                          function(i) {
                            ## Find which BAUs this batch predicts over
                            idx = which(batching == i)
                            
                            ## Compute the marginal variance for these BAUs
                            if(fs_in_process) {
                              rowSums((S0[idx,] %*% Cov[1:r,1:r]) * S0[idx,]) +  # eta contribution
                                diag(Cov)[-(1:r)][idx] +                       # fs contribution
                                2*rowSums(Cov[r+idx,1:r] * S0[idx,])           # cross.cov between eta
                              # and fs variation
                            } else {
                              rowSums((S0[idx,] %*% Cov) * S0[idx,])             # just eta contribution
                            }
                          })
    clusterEvalQ(opts_FRK$get("cl"), {gc()})   # clear memory of cluster
    temp <- do.call(c,var_list)                # concatenate results
  } else {
    temp <- rep(0,nrow(S0))                    # initialise the vector of variances
    for(i in 1:max(unique(batching))) {        # for each batch
      
      ## Find which BAUs this batch predicts over
      idx = which(batching==i)
      
      ## If we have fs variation in the process
      if(fs_in_process)
        temp[idx] <- rowSums((S0[idx,] %*% Cov[1:r,1:r]) * S0[idx,]) +  # eta contribution
          diag(Cov)[-(1:r)][idx] +                                    # fs contribution
          2*rowSums(Cov[(r+idx),1:r] * S0[idx,])                      # cross-cov between eta
      else                                                                # and fs variation
        temp[idx] <- rowSums((S0[idx,] %*% Cov) * S0[idx,])             # otherwise we just have the eta
      # contribution
      
    }
  }
  
  ## Return all variances
  temp
}


## This function attempts to estimate the measurement error by fitting a variogram to the data
## and see where it crosses the y-axis. This captures the super-fine-scale variation that we
## characterise as measurement error. FRK then effectively fits a smooth variogram -- the difference
## between where this crosses the y-axis and the measurement error will be the fs-variation (estimated)
.est_obs_error <- function(sp_pts,variogram.formula,vgm_model = NULL,BAU_width = NULL) {
  
  ## Notify user (even if not verbose == TRUE)
  ## (after revision, now only notify user if verbose == TRUE)
  if(opts_FRK$get("verbose")) cat("Fitting variogram for estimating measurement error...\n")
  
  ## Basic checks
  if(!is(variogram.formula,"formula"))
    stop("variogram.formula needs to be of class formula")
  if(!is(sp_pts,"Spatial"))
    stop("sp_pts needs to be of class Spatial")
  if(!requireNamespace("gstat"))
    stop("gstat is required for variogram estimation. Please install gstat")
  
  ## Make sure we're not on sphere here, otherwise variogram fitting is too slow. Just remove
  ## CRS (this is only approximate anyway)
  if(!is.na(proj4string(sp_pts))) {
    sp_pts <- SpatialPointsDataFrame(coords = coordinates(sp_pts),
                                     data = sp_pts@data,proj4string = CRS())
  }
  
  
  ## If any of the coordinates are zero range (i.e., they contain only the same 
  ## value repeatedly), then we need to jitter the coordinates so that gstat can 
  ## estimate the measurement error using the variogram. 
  ## This situation typically only arises if we are trying to fit 1D spatial data.
  zero_range_dims <- which(apply(coordinates(sp_pts), 2, .zero_range))
  if (length(zero_range_dims) > 0) {
    new_coords <- coordinates(sp_pts)
    for (i in zero_range_dims) {
      new_coords[, i] <- jitter(new_coords[, i], amount = 1e-5)
    }
    
    ## An error occurs if we try to overwrite the original coordinates:
    # coordinates(sp_pts) <- new_coords 
    ## Just create a new Spatial* object:
    form <- as.formula(paste("~", paste(coordnames(sp_pts), collapse = "+")))
    sp_pts <- cbind(sp_pts@data, new_coords)
    coordinates(sp_pts) <- form
  }
  

  
  ## If we have many points (say > 50000) then subsample
  if(length(sp_pts) > 50000) {
    if(opts_FRK$get("verbose") > 0)
      cat("Selecting 50000 data points at random for estimating the measurement error variance\n")
    sp_pts_sub <- sp_pts[sample(1:length(sp_pts),50000),]
  } else sp_pts_sub <- sp_pts
  
  ## Find the maximum extent in each dimension
  coords_range <-  apply(coordinates(sp_pts_sub),2,
                         function(x) diff(range(x)))
  
  ## Find a maximum effective length scale by computing the "diagonal"
  diag_length <- sqrt(sum(coords_range^2))
  
  ## Compute the area of the domain
  area <- prod(coords_range)
  
  ## Consider the area that contains about 100 data points in it (we only want
  ## to study variogram points close to the origin)
  cutoff <- sqrt(area * 100 / length(sp_pts_sub))
  
  ## Remove covariates since we are only interested at behaviour close to iring
  ## and since the binning into BAUs hasn't (shouldn't have) occurred yet
  variogram.formula <- .formula_no_covars(variogram.formula)
  
  ## Extract  data values from data object
  L <- .extract.from.formula(variogram.formula,data=sp_pts_sub)
  
  ## Create a gstat object with this formula and data
  g <- gstat::gstat(formula=variogram.formula,data=sp_pts_sub)
  
  ## Compute the empirical variogram
  v <- gstat::variogram(g,cressie=T,            # Cressie's robust variogram estimate
                        cutoff = cutoff,        # maximum spatial separation distance
                        width = cutoff/10)      # width for semivariance estimates
  
  ## Fit the model. First, if the user did not supply any desired model, try to fit a linear model
  ## with initial conditions as given to vgm()
  if(is.null(vgm_model))
    vgm_model <-  gstat::vgm(psill = var(L$y)/2,
                             model = "Lin",
                             range = mean(v$dist),
                             nugget = var(L$y)/2)
  
  ## Try to fit the model.
  
  ## Try fitting using a linear model on just the first two to
  ## four points (cf. Kang and Cressie)
  OK <- FALSE                                    # init. OK
  num_points <- 2                                # start with 2 points
  while(!OK & num_points <= 4) {                 # while NOT OK
    ## fit only to first 1:num_points points
    linfit <- lm(gamma~dist,data=v[1:num_points,])
    
    ## ans. will be returned in vgm.fit$psill later on
    vgm.fit <- list(singular = 0)
    vgm.fit$psill <- coefficients(linfit)[1]   # extract psill from intercept
    if(vgm.fit$psill > 0) OK <- TRUE           # if variance > 0 OK = TRUE
    num_points <- num_points + 1               # increment num_points
  }
  
  
  ## If we have estimated a negative variance, then try to fit a linear variogram
  ## using more points
  if(vgm.fit$psill[1] <= 0) {
    
    vgm.fit <- suppressWarnings(gstat::fit.variogram(v, model = vgm_model))
    
    ## Check if the process of fitting generates a warning. If it did then OK == 0
    ## otherwise OK == 1 (this fits twice but since it's so quick it's not an issue)
    OK <- tryCatch({vgm.fit <- gstat::fit.variogram(v, model = vgm_model); 1},
                   warning=function(w) 0)
    ## If the reportef psill is less or equal to zero, or fit.variogram reported a singularity,
    ## or a Warning was thrown, then retry fitting using an exponential model
    if(vgm.fit$psill[1] <= 0 | attributes(vgm.fit)$singular | !OK) {
      vgm_model <-  gstat::vgm(psill = var(L$y)/2,
                               model = "Exp",
                               range = mean(v$dist),
                               nugget = var(L$y)/2)
      OK <- tryCatch({vgm.fit = gstat::fit.variogram(v, model = vgm_model); OK <- 1},warning=function(w) 0)
      vgm.fit <- suppressWarnings(gstat::fit.variogram(v, model = vgm_model))
    }
    
    if(vgm.fit$psill[1] <= 0 | attributes(vgm.fit)$singular | !OK) {
      ## Try with Gaussian, maybe process is very smooth or data has a large support
      vgm_model <-  gstat::vgm(var(L$y)/2, "Gau", mean(v$dist), var(L$y)/2)
      
      ## Try to fit the model.
      vgm.fit <- suppressWarnings(gstat::fit.variogram(v, model = vgm_model))
      
      ## Like above, we return OK = 0 if fit is still not good
      OK <- tryCatch({vgm.fit = gstat::fit.variogram(v, model = vgm_model); OK <- 1},warning=function(w) 0)
    }
    
    ## If we still have problems, then just take the first point of the empirical semivariogram and
    ## throw a warning that this estimate is probably not very good
    if(vgm.fit$psill[1] <= 0 | attributes(vgm.fit)$singular | !OK) {
      vgm.fit$psill[1] <- v$gamma[1]
      warning("Estimate of measurement error is probably inaccurate.
                    Please consider setting it through the std variable
                    in the data object if known.")
    }
  }
  
  ## Return the sqrt of the psill as the measurement error
  sp_pts$std <- sqrt(vgm.fit$psill[1])
  sp_pts
}