R/AgroSoil_generic_functions.R

Defines functions make_spatial_tiles .tiles2pol makeTiles plot_sm_spline sm_spline LLTO_cv mosaic_sentinel missing.tiles mosaic_ensemble_pred_tiles Round_All_Vals_in_df ensemble_predict predict_per_tiles NA2median tile_anc.lst generate_ref_stations permutations dem_derivatives2geotiff saga_DEM_derivatives aggregate_cov h5_to_tiff tiles_DEM4Deriv extract_stack_raster readRDS.p readRDS.gz saveRDS.gz readRDS.xz saveRDS.xz install_and_or_load_pkg

Documented in aggregate_cov dem_derivatives2geotiff ensemble_predict extract_stack_raster generate_ref_stations h5_to_tiff install_and_or_load_pkg LLTO_cv make_spatial_tiles missing.tiles mosaic_ensemble_pred_tiles mosaic_sentinel NA2median permutations plot_sm_spline predict_per_tiles readRDS.gz readRDS.p readRDS.xz Round_All_Vals_in_df saga_DEM_derivatives saveRDS.gz saveRDS.xz sm_spline tile_anc.lst tiles_DEM4Deriv

#################################################### LIST OF FUNCTIONS USED ####
  ## Idea
    # Generic functions used in AgroSoil R package

################################################################################

#' FAO 56 Crop-specific Database
#' @title FAO 56 Crop-specific Database
#'
#' @description Crop water requirements. The crop water need (ET crop) is defined as the depth (or amount) of water needed to meet the water loss through evapotranspiration.
#' @name FAO56_DB_agrosoil
#' @docType data
#' @usage FAO56_DB_agrosoil
#' @format An rds data format storing data within a list.
#' @keywords datasets
#' @references FAO 56. Crop Water Needs.
#' @examples
#' data(FAO56_DB_agrosoil)
#' crop_factor <- FAO56_DB_agrosoil$crop_factor
#' growing_days <- FAO56_DB_agrosoil$growing_days
NULL


#'
#' Check and install and/or load an R package(s)
#'
#' @description Used to load multiple R packages without having to load them individually. In instances where a package is not installed, this function will install the required package from CRAN
#'
#' @param require_pkg List of required packages
#'
#' @return install and or loads required packages
#'
#' @export
#'
#' @examples
#' Packages = c("pkg1","pkg2")
#' install_and_or_load_pkg(Packages)

install_and_or_load_pkg <- function(require_pkg){
  for (i in require_pkg) {
    if(!is.element(i, installed.packages()[,1]))
    {install.packages(i, dep = T)}
    library(i, character.only = T)
  }
}


#' Save RDS.xz
#'
#' @param x The respective R object to be written to file
#'
#' @param file Either a character string naming a file path or a connection open for writing.
#'
#' @param threads Number of cores to allocate for writing to disk. Default values is "detectCores()"
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#' saveRDS.xz <- function(x, file = "filename", threads = c(parallel::detectCores(),5))

saveRDS.xz <- function(x, file, threads = parallel::detectCores()) {
  con <- pipe(paste0("pxz -T", threads," > ", file), "wb")
  saveRDS(x, file = con, compress = FALSE)
  close(con)
}


#' Read RDS.xz
#'
#' @param file Either a character string naming a file path or a connection open for reading.
#'
#' @param threads Number of cores to allocate for reading file. Default values is "detectCores()"
#'
#' @return R object
#'
#' @export
#'
#' @examples
#' readRDS.xz <- function(file = "filename", threads = c(parallel::detectCores(),5))

readRDS.xz <- function(file, threads = parallel::detectCores()) {
  con <- pipe(paste0("pxz -d -k -c -T", threads, " ", file))
  object <- readRDS(file = con)
  close(con)
  return(object)
}


#' Save RDS.gz
#'
#' @param x The respective R object to be written to file
#'
#' @param file Either a character string naming a file path or a connection open for writing.
#'
#' @param threads Number of cores to allocate for writing to disk. Default values is "detectCores()"
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#' saveRDS.gz <- function(x, file = "filename", threads = c(parallel::detectCores(),5))

saveRDS.gz <- function(x, file, threads = parallel::detectCores()) {
  con <- pipe(paste0("pigz -p", threads, " > ", file), "wb")
  saveRDS(x, file = con)
  close(con)
}


#' Read RDS.gz
#'
#' @param file Either a character string naming a file path or a connection open for reading.
#'
#' @param threads Number of cores to allocate for reading file. Default values is "detectCores()"
#'
#' @return R object
#'
#' @export
#'
#' @examples
#' readRDS.gz <- function(file = "filename", threads = c(parallel::detectCores(),5))

readRDS.gz <- function(file, threads = parallel::detectCores()) {
  con <- pipe(paste0("pigz -d -c -p", threads, " ", file))
  object <- readRDS(file = con)
  close(con)
  return(object)
}


#' Reade RDS.p
#'
#' @param file Either a character string naming a file path or a connection open for reading.
#'
#' @param threads Number of cores to allocate for reading file. Default values is "detectCores()"
#'
#' @return R object
#'
#' @export
#'
#' @examples
#' readRDS.p <- function(file = "filename", threads = c(parallel::detectCores(),5))

readRDS.p <- function(file, threads = parallel::detectCores()) {
  fileDetails <- system2("file", args = file, stdout = TRUE)
  selector <- sapply(c("gzip", "XZ"), function(x){grepl(x, fileDetails)})
  format <- names(selector)[selector]

  if (format == "gz") {
    object <- readRDS.gz(file, threads = threads)
  } else if (format == "XZ") {
    object <- readRDS.xz(file, threads = threads)
  } else {
    object <- readRDS(file)
  }
  return(object)
}


#' Extract point information (values) from a stack of raster files
#'
#' @description This function is required to extract pixel values of multiple stacked raster files at specific locations.
#'
#' @param x Path to folder containing Raster files
#'
#' @param y Object of class SpatialPoints
#'
#' @return Dataframe. A matrix of extracted point location information for respective Raster files
#'
#' @export
#'
#' @examples
#' library(raster)
#' x <- list.files(path = "foleder containing raster files", pattern = glob2rx("*.file extension"), full.names = TRUE)
#'
#' pnts <- data.frame(Long = c(10:15), Lat = c(1:5))
#' coordinates(pnts) = ~ Long + Lat
#' proj4string(pnts)<- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
#' y <- pnts
#'
#' extract_stack_raster(x, y)

extract_stack_raster = function(x, y){
  set.seed(2410)
  image_raster = raster(x)
  proj4string(image_raster) <- proj4string(y)
  if(!is.na(proj4string(image_raster))){
    y = spTransform(y, proj4string(image_raster))
  }
  out = raster::extract(y=y, x=image_raster)
  return(out)
}


#' Splits a Raster object e.g. a digital elevation model (DEM) into smaller spatial tiles
#'
#' @description It is importance to split up a large raster dataset into chunks of smaller spatial tiles so as to speed data processing and optimize the use of computer resources. Hence, this function
#'
#' @param i Vector of spatial tile IDs
#'
#' @param tile.tbl Output of "getSpatialTiles" function from GSIFR package.
#'
#' @param in.covs List of DEM(s) to split into smaller spatial tiles
#'
#' @param out.path Path to folder containing tile directories
#'
#'
#' @return First, a chunk of the original DEM file saved as a .rds file on disk. Second, a .sdat file on disk for later use with SAGA-GIS
#'
#' @export
#'
#' @examples
#' library(GSIF)
#' library(rgdal)
#'
#' i <- as.numeric(20)
#' DEM_ObjInfo <- GDALinfo("filename of DEM/Raster file")
#' tile.tbl <- getSpatialTiles(DEM_ObjInfo, block.x=2000, return.SpatialPolygons=FALSE, overlap.percent=10)
#' tile.tbl$ID = as.character(1:nrow(tile.tbl))
#'
#' in.covs <- "filename of DEM/Raster file"
#' out.path <- path to folder containing tile directories
#'
#' out <- tiles_DEM4Deriv(i, tile.tbl, in.covs, out.path)

tiles_DEM4Deriv <- function(i, tile.tbl, in.covs, out.path){
  set.seed(2410)
  out.rds <- paste0(out.path, "/T", tile.tbl[i, "ID"], "/T", tile.tbl[i, "ID"], ".rds")
  if(!file.exists(out.rds)){
    m = readGDAL(in.covs, offset = unlist(tile.tbl[i, c("offset.y", "offset.x")]), region.dim = unlist(tile.tbl[i, c("region.dim.y", "region.dim.x")]), output.dim = unlist(tile.tbl[i, c("region.dim.y", "region.dim.x")]), silent = F)
  }

  # Write to file
    saveRDS(m, out.rds)
    writeGDAL(m, gsub(".rds", ".tif",out.rds), type = "Int32", mvFlag = -32767, options = "COMPRESS=DEFLATE")

  # Convert DEM from .tiff to .sdat
    system(paste0('gdal_translate ', gsub(".rds", ".tif",(out.rds)), ' ',gsub(".rds", ".sdat",(out.rds)),' -ot \"Int32\" -of \"SAGA\" -a_nodata \"-32768\" -co \"COMPRESS=DEFLATE\"'))

  gc(); gc()
}


#' Convert SMAP .h5 raster into .tif raster file
#'
#' @description Loads SMAP Level 4 Global 3-hourly at 9 km Soil moisture product and converts them into .tif files for easy further data processing. In addition, the output .tif file is reprojected into WGS84.
#'
#' @param h5.lst Path to folder containing downloaded SMAP .h5 files
#'
#' @param mapInfo.crs_EASE2 SMAP CRS string. E.g. CRS("+proj=cea +lon_0=0 +lat_ts=30 +x_0=0 +y_0=0 +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
#'
#' @param mapInfo.crs_WGS84 Destination Raster file's crs.
#'
#' @param out_dir Path to folder to write .tif raster files
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#' library(rhdf5)
#' library(raster)
#'
#' h5.lst <- list.files(".h5 folder path", pattern = glob2rx("*.h5"), full.names = T)
#' mapInfo.crs_EASE2 <- CRS("+proj=cea +lon_0=0 +lat_ts=30 +x_0=0 +y_0=0 +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs")
#' mapInfo.crs_WGS84 <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
#' out_dir <- path to folder for writing .tif files
#'
#' h5_to_tiff(h5.lst, mapInfo.crs_EASE2, mapInfo.crs_WGS84, out_dir)

h5_to_tiff = function(h5.lst, mapInfo.crs_EASE2, mapInfo.crs_WGS84, out_dir){
  set.seed(2410)
  # Set output tif filename
    out_tiff = paste0(out_dir, gsub(".h5", ".tif", basename(h5.lst)))

  if(!file.exists(out_tiff)){
    message(paste0("Processing and exporting .h5 file: ", basename(h5.lst), " ..."))
    # Get structure of .h5 file
      mapInfo.structure = h5ls(h5.lst)

    # Surface soil moisture
      SSM <- h5read(h5.lst, "Analysis_Data/sm_surface_analysis", read.attributes = TRUE)
      h5_dim = dim(SSM)
      SSM = ifelse(SSM < 0, NA, SSM)

    # Lat/Long - WGS84
      SSM_lat <- h5read(h5.lst, "/cell_lat", read.attributes = TRUE)
      SSM_long <- h5read(h5.lst, "/cell_lon", read.attributes = TRUE)

    # Cartesian coordinates
      SSM_x<- h5read(h5.lst, "/x", read.attributes = TRUE)
      SSM_y<- h5read(h5.lst, "/y", read.attributes = TRUE)

    # XY grid (interpolate at regular grids)
      xy_grid = as.data.frame(xy.grid(SSM_x, SSM_y))
      SSM_x <-xy_grid$V1
      SSM_y <- xy_grid$V2

    # Final data frame
      SSM2 = as.vector(t(SSM)); SSM_lat2 = as.vector(SSM_lat);SSM_long2 = as.vector(SSM_long); SSM_y = as.vector(SSM_y);SSM_x = as.vector(SSM_x)
      SSM_df = data.frame(Lat = SSM_lat2, Long = SSM_long2, SM = SSM2, x = SSM_x, y=SSM_y)

    # Get spatial extent
      SSM_extent <- extent(SSM_df[,c(4,5)])

    # Rasterize extent
      dummy_raster <- raster(SSM_extent, ncol=h5_dim[1], nrow=h5_dim[2], crs=mapInfo.crs_EASE2)
      SSM_raster <- rasterize(SSM_df[,c(4,5)], dummy_raster, SSM_df[,3], fun=mean)

    # Reproject to Global WGS84 and save to file
      SSM_raster = projectRaster(SSM_raster, crs = mapInfo.crs_WGS84)
      writeRaster(SSM_raster, out_tiff, overwrite=T)
  }
}


# Function to derive terrain variables from DEM
#' Aggregate Raster Files for Standard Soil Depth
#'
#' @description Loads a list of raster files for specific soil depths and aggregates them for a user-specified soil depth interval. Eg. aggregate Clay content maps at 0, 5, 15, 30 and 60cm into 0-30cm or 0-60cm
#'
#' @param in.cov_name Name of raster files to grep
#'
#' @param depth Standard depths to aggregate. Default soil depths are 0, 5, 15, 30 and 60cm
#'
#' @param in.covs_path Path to directory holding raster files.
#'
#' @param ot Output raster file type. Default value is "Int32".
#'
#' @param dstnodata Impute for missing or NO_DATA value. Default value is -32768.
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#' library(raster)
#' library(rgdal)
#'
#' aggregate_cov(in.cov_name, depth, in.covs_path)

# aggregate_cov <- function(in.cov_name, depth = c(0, 5, 15, 30, 60), in.covs_path, ot = "Int32", dstnodata = -32768){
#   set.seed(2410)
#   in.lst <- paste0(in.covs_path,"/",in.cov_name, "_M_sl", 1:length(depth), "_250m_ll.tif")
#   out.tif <- paste0(in.covs_path,"/",in.cov_name, "_agg_", depth[length(depth)] - depth[1], "cm.tif")
#
#   if(!file.exists(out.tif)&all(file.exists(in.lst))){
#     stack.covs <- raster::stack(in.lst)
#     stack.covs <- as(stack.covs, "SpatialGridDataFrame")
#
#     # Apply trapezoidal rule for data aggregation
#     for(dps in 1:(length(depth)-1)){
#       stack.covs@data[ , paste0("sd", dps)] <- rowMeans(stack.covs@data[ , dps:(dps+1)])
#     }
#
#     stack.covs$agg_depth <- rowSums(as.matrix(stack.covs@data[ , paste0("sd", 1:(length(depth)-1))]) %*% diag(diff(depth))) / sum(diff(depth))
#     writeGDAL(stack.covs["agg_depth"], out.tif, type = paste(ot), mvFlag = paste(dstnodata), options = "COMPRESS=DEFLATE")
#     gc(); gc()
#   }
# }

aggregate_cov <- function(in.cov_name, depth, in.covs_path, ot = "Int32", dstnodata = -32768, in.lst){
  set.seed(2410)
  out.tif <- paste0(in.covs_path, "/", in.cov_name, "_agg_",depth[length(depth)] - depth[1], "cm.tif")

  if (!file.exists(out.tif)){
    stack.covs <- raster::stack(in.lst)
    stack.covs <- as(stack.covs, "SpatialGridDataFrame")

    # Apply trapezoidal rule for data aggregation
    for (dps in 1:(length(depth) - 1)){
      stack.covs@data[, paste0("sd", dps)] <- rowMeans(stack.covs@data[,dps:(dps + 1)])
    }

    stack.covs$agg_depth <- rowSums(as.matrix(stack.covs@data[, paste0("sd", 1:(length(depth) - 1))]) %*% diag(diff(depth)))/sum(diff(depth))
    writeGDAL(stack.covs["agg_depth"], out.tif, type = paste(ot), mvFlag = paste(dstnodata), options = "COMPRESS=DEFLATE")
    gc();gc()
  }
}


# Function to derive terrain variables from DEM
#' Derive geomorphometric parameters from digital elevation model (DEM)
#'
#' @description Compute gridded process-based covariate(s) that are useful to explain the dynamics of a soil properties (in this case soil moisture) along a pedon or a soilscape. Here, we followed the scorpan concept of soil mapping (McBratney et al. 2003). They are computed at the pedon and catchment scale.
#'
#' @param saga_cmd SAGA-GIS path
#'
#' @param DEM_Input DEM raster file path save as ".tif" file
#'
#' @param MASK Masking layer file. Default value is NULL
#'
#' @param sel geomorphometric parameters to compute. Check SAGA-GIS documentation for further details
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#' library(rgdal)
#'
#' if(.Platform$OS.type == 'windows'){
#'    saga_cmd = shortPathName("C:/Program Files (x86)/SAGA-GIS/saga_cmd.exe")
#' }else{
#'    saga_cmd = "saga_cmd"
#' }
#'
#' DEM_Input <- "filename of DEM/Raster file"
#' sel <- c("SLP","TWI")
#'
#' saga_DEM_derivatives <- function(saga_cmd, DEM_Input, MASK=NULL, sel)
#'

# saga_DEM_derivatives <- function(saga_cmd, DEM_Input, MASK=NULL, sel=c("SLP","TWI","CRV","VBF","VDP","OPN","DVM")){
#   set.seed(2410)
#   if(!is.null(MASK)){
#     # Fill in missing DEM pixels:
#     suppressWarnings( system(paste0(saga_cmd,' grid_tools 25 -GRID=\"',
#                                     DEM_Input, '\" -MASK=\"',
#                                     MASK, '\" -CLOSED=\"',
#                                     DEM_Input, '\"')) )
#   }
#
#   # Slope:
#   if(any(sel %in% "SLP")){
#     try( suppressWarnings( system(paste0(saga_cmd,' ta_morphometry 0 -ELEVATION=\"',
#                                          DEM_Input, '\" -SLOPE=\"',
#                                          gsub(".sgrd", "_slope.sgrd", DEM_Input),
#                                          '\" -C_PROF=\"',
#                                          gsub(".sgrd", "_cprof.sgrd", DEM_Input), '\"') ) ) )
#   }
#
#   # TWI:
#   if(any(sel %in% "TWI")){
#     try( suppressWarnings( system(paste0(saga_cmd,' ta_hydrology 15 -DEM=\"',
#                                          DEM_Input, '\" -TWI=\"',
#                                          gsub(".sgrd", "_twi.sgrd", DEM_Input), '\"') ) ) )
#   }
#
#   # MrVBF:
#   if(any(sel %in% "VBF")){
#     try( suppressWarnings( system(paste0(saga_cmd,' ta_morphometry 8 -DEM=\"',
#                                          DEM_Input, '\" -MRVBF=\"',
#                                          gsub(".sgrd", "_vbf.sgrd", DEM_Input),
#                                          '\" -T_SLOPE=10 -P_SLOPE=3') ) ) )
#   }
#
#   # Valley depth:
#   if(any(sel %in% "VDP")){
#     try( suppressWarnings( system(paste0(saga_cmd,' ta_channels 7 -ELEVATION=\"',
#                                          DEM_Input, '\" -VALLEY_DEPTH=\"',
#                                          gsub(".sgrd", "_vdepth.sgrd",
#                                               DEM_Input), '\"') ) ) )
#   }
#
#   # Openness:
#   if(any(sel %in% "OPN")){
#     try( suppressWarnings( system(paste0(saga_cmd,' ta_lighting 5 -DEM=\"',
#                                          DEM_Input, '\" -POS=\"',
#                                          gsub(".sgrd", "_openp.sgrd", DEM_Input),
#                                          '\" -NEG=\"',
#                                          gsub(".sgrd", "_openn.sgrd", DEM_Input),
#                                          '\" -METHOD=1' ) ) ) )
#   }
#
#   # Deviation from Mean Value:
#   if(any(sel %in% "DVM")){
#     try( suppressWarnings( system(paste0(saga_cmd,' statistics_grid 1 -GRID=\"',
#                                          DEM_Input, '\" -DEVMEAN=\"',
#                                          gsub(".sgrd", "_devmean.sgrd", DEM_Input),
#                                          '\" -RADIUS=11' ) ) ))
#   }
# }


saga_DEM_derivatives <- function(saga_cmd, DEM_Input, MASK=NULL, sel=c("SLP","CPR","TWI","CRV","VBF","VDP","OPN","DVM","MRN","TPI"), RADIUS=c(9,13), cpus=5){
  if(pkgmaker::file_extension(DEM_Input)=="tif"){
    system(paste0('gdal_translate ', DEM_Input, ' ', gsub(".tif", ".sdat", DEM_Input), ' -of \"SAGA\" -ot \"Int16\"'))
    DEM_Input = gsub(".tif", ".sgrd", DEM_Input)
  }
  if(!is.null(MASK)){
    ## Fill in missing DEM pixels:
    suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' grid_tools 25 -GRID=\"', DEM_Input, '\" -MASK=\"', MASK, '\" -CLOSED=\"', DEM_Input, '\"')) )
  }
  # Uplslope curvature:
  if(any(sel %in% "CRV")){
    if(!file.exists(gsub(".sgrd", "_downlocal.sgrd", DEM_Input))){
      try( suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' ta_morphometry 26 -DEM=\"', DEM_Input, '\" -C_DOWN_LOCAL=\"', gsub(".sgrd", "_downlocal.sgrd", DEM_Input), '\" -C_UP_LOCAL=\"', gsub(".sgrd", "_uplocal.sgrd", DEM_Input), '\" -C_UP=\"tmp.sgrd\" -C_LOCAL=\"tmp.sgrd\" -C_DOWN=\"', gsub(".sgrd", "_down.sgrd", DEM_Input), '\"') ) ) )
    }
  }
  # Slope:
  if(any(sel %in% "SLP")){
    if(!file.exists(gsub(".sgrd", "_slope.sgrd", DEM_Input))){
      try( suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' ta_morphometry 0 -ELEVATION=\"', DEM_Input, '\" -SLOPE=\"', gsub(".sgrd", "_slope.sgrd", DEM_Input), '\"') ) ) )
    }
  }
  # Profile Curvature:
  if(any(sel %in% "CPR")){
    if(!file.exists(gsub(".sgrd", "_cprof.sgrd", DEM_Input))){
      try( suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' ta_morphometry 0 -ELEVATION=\"', DEM_Input, '\" -C_PROF=\"', gsub(".sgrd", "_cprof.sgrd", DEM_Input), '\"') ) ) )
    }
  }
  # Multiresolution Index of Valley Bottom Flatness MrVBF:
  if(any(sel %in% "VBF")){
    if(!file.exists(gsub(".sgrd", "_vbf.sgrd", DEM_Input))){
      try( suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' ta_morphometry 8 -DEM=\"', DEM_Input, '\" -MRVBF=\"', gsub(".sgrd", "_vbf.sgrd", DEM_Input), '\" -T_SLOPE=10 -P_SLOPE=3') ) ) )
    }
  }
  # Valley depth:
  if(any(sel %in% "VDP")){
    if(!file.exists(gsub(".sgrd", "_vdepth.sgrd", DEM_Input))){
      try( suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' ta_channels 7 -ELEVATION=\"', DEM_Input, '\" -VALLEY_DEPTH=\"', gsub(".sgrd", "_vdepth.sgrd", DEM_Input), '\"') ) ) )
    }
  }
  # Openness:
  if(any(sel %in% "OPN")){
    if(!file.exists(gsub(".sgrd", "_openp.sgrd", DEM_Input))){
      try( suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' ta_lighting 5 -DEM=\"', DEM_Input, '\" -POS=\"', gsub(".sgrd", "_openp.sgrd", DEM_Input), '\" -NEG=\"', gsub(".sgrd", "_openn.sgrd", DEM_Input), '\" -METHOD=0' ) ) ) )
    }
  }
  # Deviation from Mean Value:
  if(any(sel %in% "DVM")){
    if(!file.exists(gsub(".sgrd", "_devmean.sgrd", DEM_Input))){
      suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' statistics_grid 1 -GRID=\"', DEM_Input, '\" -DEVMEAN=\"', gsub(".sgrd", "_devmean.sgrd", DEM_Input), '\" -RADIUS=', RADIUS[1] ) ) )
    }
    if(!file.exists(gsub(".sgrd", "_devmean2.sgrd", DEM_Input))){
      suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' statistics_grid 1 -GRID=\"', DEM_Input, '\" -DEVMEAN=\"', gsub(".sgrd", "_devmean2.sgrd", DEM_Input), '\" -RADIUS=', RADIUS[2] ) ) )
    }
  }
  # TWI:
  if(any(sel %in% "TWI")){
    if(!file.exists(gsub(".sgrd", "_twi.sgrd", DEM_Input))){
      try( suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' ta_hydrology 15 -DEM=\"', DEM_Input, '\" -SLOPE_MIN=0.04 -SLOPE_OFF=0.3 -AREA_MOD=\"', gsub(".sgrd", "_catchm.sgrd", DEM_Input), '\" -SLOPE_TYPE=0 -TWI=\"', gsub(".sgrd", "_twi.sgrd", DEM_Input), '\"') ) ) )
    }
  }
  # Melton Ruggedness Number:
  if(any(sel %in% "MRN")){
    if(!file.exists(gsub(".sgrd", "_mrn.sgrd", DEM_Input))){
      suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' ta_hydrology 23 -DEM=\"', DEM_Input, '\" -AREA=\"tmp.sgrd\" -MRN=\"', gsub(".sgrd", "_mrn.sgrd", DEM_Input), '\" -ZMAX=\"tmp.sgrd\"' ) ) )
    }
  }
  # Topographic Position Index:
  if(any(sel %in% "TPI")){
    if(!file.exists(gsub(".sgrd", "_tpi.sgrd", DEM_Input))){
      suppressWarnings( system(paste0(saga_cmd,' -c=', cpus,' ta_morphometry 18 -DEM=\"', DEM_Input, '\" -STANDARD=1 -TPI=\"', gsub(".sgrd", "_tpi.sgrd", DEM_Input), '\" -RADIUS_MIN=0 -RADIUS_MAX=2000 -DW_WEIGHTING=3 -DW_BANDWIDTH=75' ) ) )
    }
  }
}

# Function to convert DEM derivatives to Geo Tiff's
#' Convert derived geomorphometric parameters to geotiffs
#'
#' @description Convert derived geomorphometric parameters to geotiffs.
#'
#' @param x SAGA-GIS 'sgrd' file
#'
#' @param imin min range
#'
#' @param imax max range
#'
#' @param omin min range
#'
#' @param omax max range
#'
#' @return NULL
#'
#' @export
#'
#' @examples
dem_derivatives2geotiff <- function(x, imin, imax, omin, omax, ot="Int16", mv=-32768){
  if(!file.exists(gsub(".sdat", ".tif", x))){
    if(length(grep(pattern = "cprof", x))>0){imin = -0.01; imax = 0.01; omin = -1000; omax = 1000}
    if(length(grep(pattern = "devmean", x))>0){imin = -15; imax = 15; omin = -1500; omax = 1500}
    if(length(grep(pattern = "down", x))>0){imin = -50; imax = 50; omin = -5000; omax = 5000}
    if(length(grep(pattern = "uplocal", x))>0){imin = -50; imax = 50; omin = -5000; omax = 5000}
    if(length(grep(pattern = "vbf", x))>0){imin = 0; imax = 10; omin = 0; omax = 1000}
    if(length(grep(pattern = "mrn", x))>0){imin = 0; imax = 50; omin = 0; omax = 500}
    if(length(grep(pattern = "open", x))>0){imin = 0; imax = 10; omin = 0; omax = 1000}
    if(length(grep(pattern = "slope", x))>0){imin = 0; imax = 1; omin = 0; omax = 100}
    if(length(grep(pattern = "tpi", x))>0){imin = -100; imax = 100; omin = -1000; omax = 1000}
    if(length(grep(pattern = "twi", x))>0){imin = 0; imax = 500; omin = 0; omax = 5000}
    if(length(grep(pattern = "catchm", x))>0){ot = "Int32"}
    if(missing(omin)){
      system(paste0('gdal_translate ', x, ' ', gsub(".sdat", ".tif", x), ' -co \"COMPRESS=DEFLATE\" -ot \"', ot, '\"'))
    } else {
      system(paste0('gdal_translate ', x, ' ', gsub(".sdat", ".tif", x), ' -co \"COMPRESS=DEFLATE\" -scale ', imin, ' ', imax, ' ', omin, ' ', omax, ' -ot \"', ot, '\" -a_nodata \"', mv, '\"'))
    }
  }
}


#' Create a permutation for snowfall to train soil moisture prediction model
#'
#' @description Useful tool to create a matrix of series of permutation for use in a parallelized soil moisture prediction
#'
#' @param pred.year.lst A vector of prediction years as numeric.
#'
#' @param month.lst A vector of prediction months as string.
#'
#' @param target_var Target variable to predict
#'
#' @return x   A permutated matrix of year, month and target variable
#'
#' @export
#'
#' @examples
#' pred.year.lst <- c(2015,2020)
#' month.lst <- c("Jan", "Dec")
#' target_var <- "SM"
#'
#' x <- permutations(pred.year.lst, month.lst, target_var)

permutations <- function(pred.year.lst,month.lst,target_var){
  nana1 = list(); nana2 = list(); nana3 = list()
  for(i in 1:length(pred.year.lst)){
    for(k in 1:length(month.lst)){
      for(g in 1:length(target_var)){
        nana1[[g]] = as.character(c(pred.year.lst[i], month.lst[k], target_var[g]))
      }
      nana2[[k]] = nana1
    }
    nana3[[i]] = nana2
  }
  kan = do.call(rbind,do.call(rbind,do.call(rbind,nana3)))
  return(split(kan,seq(nrow(kan))))
}


#' Generate ground stations for Raster file Extraction
#'
#' @description Generate a number of ground stations based on spatial coverage sampling approach for use in raster data extraction
#'
#' @param N_samples Numeric. Number of required ground stations. Default value is 100
#'
#' @param ROI_shp Object of class SpatialPoints dataframe
#'
#' @param out_dir Directory to save outputs
#'
#' @param iterations Numeric. Number of resampling iterations. Default value is 10
#'
#' @param crs_ Spatial reference for ground stations. Default is WGS84 "+proj=longlat +datum=WGS84 +no_defs"
#'
#' @param ROI_state String. Name of ROI for which stations are being generated.
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#' library(spcosa)
#' library(raster)
#'
#' generate_ref_stations(ROI_shp, out_dir, ROI_state)
#'

# generate_ref_stations <- function(N_samples = 200,ROI_shp, out_dir, iterations = 10, crs_, ROI_state){
#
#   # Set seed
#   set.seed(2410)
#
#   # Compute spatial coverage sample
#   myStrata_n10 <- stratify(ROI_shp, nStrata = N_samples, nTry=iterations)
#
#   # Now sample the centres of the geostrata
#   mySample <- spsample(myStrata_n10)
#   mySample2 <- mySample
#
#   mySample <- as(mySample, "data.frame")
#   colnames(mySample) <- c("Long","Lat")
#   mySample$ID = paste0("ST", rownames(mySample))
#   write.csv(mySample, file=paste0(out_dir, "/ref_stations.csv"))
#
#   # Save to file
#   coordinates(mySample) = ~ Long + Lat
#   proj4string(mySample)<- CRS(crs_)
#
#   unlink(paste0(out_dir,"/",ROI_state,"_ref_stations.shp"))
#   writeOGR(mySample, paste0(out_dir,"/",ROI_state,"_ref_stations.shp"), paste0(ROI_state,"_ref_stations"), "ESRI Shapefile")
#   out <- list(myStrata_n10, mySample2)
#   return(out)
# }
generate_ref_stations <- function(N_samples = 200, ROI_shp, out_dir, iterations = 10, crs_, ROI_state){
  set.seed(2410)
  myStrata_n10 <- stratify(ROI_shp, nStrata = N_samples, nTry = iterations)
  mySample <- as.data.frame(myStrata_n10@centroids@coords)
  colnames(mySample) <- c("Long", "Lat")
  mySample$ID = paste0("ST", rownames(mySample))
  write.csv(mySample, file = paste0(out_dir, "/ref_stations.csv"))
  coordinates(mySample) = ~Long + Lat
  proj4string(mySample) <- CRS(crs_)
  unlink(paste0(out_dir, "/", ROI_state, "_ref_stations.shp"))
  writeOGR(mySample, paste0(out_dir, "/", ROI_state, "_ref_stations.shp"),paste0(ROI_state, "_ref_stations"), "ESRI Shapefile")
}

#' Create stacked spatial tiles for a set of ancillary datasets
#'
#' @description For purposes of parallel processing, this function creates a set of stacked-spatial tiles a unlimited number of covariates, which could be loaded individually for further processing
#'
#' @param i Vector of spatial tile IDs
#'
#' @param tile.tbl Output of "getSpatialTiles" function from GSIFR package.
#'
#' @param in.covs List of covariates to split and stack as list of smaller spatial tiles
#'
#' @param out.path Path to folder containing output tile directories
#'
#' @param mask.w Surface water mask Raster layer
#'
#' @param mask.l Land cover mask Raster layer
#'
#' @param ROI_name Name of region of interest
#'
#' @param Landform Landform Raster layer
#'
#' @param crss CRS string
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#' library(rgdal)
#' library(raster)
#'
#' out <- tile_anc.lst(i, tile.tbl, in.covs, out.path, mask.w, mask.l, ROI_name, Landform, crss)
#'

tile_anc.lst <- function(i, tile.tbl, in.covs, out.path, mask.w, mask.l, ROI_name, Landform, crss){
  # Set seed
  set.seed(2410)
  out.rds <- paste0(out.path, "/T", tile.tbl[i,"ID"], "/T", tile.tbl[i,"ID"], ".rds")

  if(!file.exists(out.rds)){
    message(paste0("Loading rasterized ROI tif ..."))

    # Load rasterized ROI tif
    m = readGDAL(fname=mask.l,offset=unlist(tile.tbl[i,c("offset.y","offset.x")]),region.dim=unlist(tile.tbl[i,c("region.dim.y","region.dim.x")]), output.dim=unlist(tile.tbl[i,c("region.dim.y","region.dim.x")]),silent = TRUE)
    names(m) = ROI_name
    message(paste0("Loading water cover tif ..."))

    m$Landform = readGDAL(fname=Landform, offset=unlist(tile.tbl[i,c("offset.y","offset.x")]), region.dim=unlist(tile.tbl[i,c("region.dim.y","region.dim.x")]), output.dim=unlist(tile.tbl[i,c("region.dim.y","region.dim.x")]), silent = TRUE)$band1

    m = as(m["Landform"], "SpatialPixelsDataFrame")
    m = m[!is.na(m$Landform), ]

    x = spTransform(m, CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")) #paste(crss)))

    m$X_Cat <- x@coords[,1]
    m$Y_Cat <- x@coords[,2]

    for(j in 1:length(in.covs)){
      m@data[,tools::file_path_sans_ext(basename(in.covs[j]))] <- round(readGDAL(in.covs[j], offset=unlist(tile.tbl[i,c("offset.y","offset.x")]), region.dim=unlist(tile.tbl[i,c("region.dim.y","region.dim.x")]), output.dim=unlist(tile.tbl[i,c("region.dim.y","region.dim.x")]), silent = TRUE)$band1[m@grid.index])
    }

    # Fill-in missing values (if necessary):
    sel.mis = sapply(m@data, function(x){sum(is.na(x))>0})
    if(sum(sel.mis) > 0){
      for(j in which(sel.mis)){
        if(!is.factor(m@data[,j])){
          repn = quantile(m@data[,j], probs=.5, na.rm=TRUE)
          m@data[,j] = ifelse(is.na(m@data[,j]), repn, m@data[,j])
        }
      }
    }

    # NA2median <- cmpfun(function(x) replace(x, is.na(x), median(x, na.rm = TRUE)))
    # kan = replace(nana, TRUE, lapply(nana, NA2median))

    message(paste0("Saving stack of tiles as rds file ..."))
    saveRDS(m, out.rds)
    gc(); gc()
  }
}


#' Impute NAs with medians
#'
#' @description Replace missing column values with column median.
#'
#' @param x Numeric
#'
#' @return Dataframe
#'
#' @export
#'
#' @examples
#' df <- data.frame(kan = c(1:4,NA), nana = c(10:15))
#' df <- lapply(df, NA2median)

NA2median <- function(x) replace(x, is.na(x), median(x, na.rm = TRUE))


# Function to make space-time predictions at standard soil depths
#' Derive daily space-time predictions at standard soil depths
#'
#' @description Derives daily gridded distribution of soil moisture at standard soil depth intervals for a list of spatial tiles.
#'
#' @param i Vector of spatial tile IDs
#'
#' @param model_ Saved model (random forest or xgboost models) to be used for spatial predictions on the larger grid or stacked tile grids (.rds file)
#'
#' @param in_path Directory holding the stacked spatial tiles
#'
#' @param out_path Directory holding the stacked spatial tiles. Just to keep predictions with their respective stacked covariates.
#'
#' @param targ_var Target variable to predict
#'
#' @param method Machine learning algorithm to be used for the prediction. Values are "ranger" and "xgboost". Default value is "ranger" random forest algorithm.
#'
#' @param days_ Numeric. Days in the respective month of the targetted soil moisture prediction year. Default values are all days in the respective month of the year
#'
#' @param month_ Character. Single month of the targetted soil moisture prediction year.Default value is the months of the in situ soil moisture meansurement. Values are abbreviated month name. E.g. "Jan", "Mar", "Oct".
#'
#' @param month.lst_ Vector or a list. All months of the targetted soil moisture prediction year.
#'
#' @param pred.year_ Numeric. Single soil moisture prediction year
#'
#' @param rds_file Saved stacked-spatial tile of covariates (i.e. prediction grid)
#'
#' @param DDepth TRUE/FALSE. Whether to predict for top soils (FALSE) or for specific standard soil depths (TRUE).
#'
#' @param stdps Vector. Numeric values of standard soil depths at which predictions are required. Default values are c(0,5,15,30,60,100,200)
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#' library(rgdal)
#' library(raster)
#' library(lubridate)
#' library(plyr)
#' library(caret)
#'
#' out <- predict_per_tiles(i, model_, in_path, out_path, targ_var, method, days_, month_, month.lst_,pred.year_, rds_file, DDepth, stdps)

predict_per_tiles <- function(i, model_, in_path, out_path, targ_var, method, days_, month_, month.lst_,pred.year_, rds_file, DDepth, stdps=c(5,15,30,60,100),weather_data=FALSE,weather_source='ERA5',era5_depth=FALSE){

  if(era5_depth==TRUE){stdps=c(7,28,100)}

  # Set seed
    set.seed(2410)

  if(method == "ranger"){rds_out = paste0(out_path, "/", i, "/", targ_var,"_", month.lst_[grep(month_, month.lst_)],"_", i, "_rf.rds")}
  if(method == "xgboost"){rds_out = paste0(out_path, "/", i, "/", targ_var,"_", month.lst_[grep(month_, month.lst_)],"_", i, "_xgb.rds")}

  if(any(c(!file.exists(rds_out),file.size(rds_out)==0))){

    if(missing(rds_file)){rds_file = paste0(in_path, "/", i, "/", i, ".rds")}

    if(file.exists(rds_file)&file.size(rds_file)> 1e3){
      gc(); gc()

      grids <- readRDS(rds_file)
      # Extra check to fill missing tile grid data as zeros
        grids@data[is.na(grids@data)] <- 0

      if(DDepth==TRUE){

        # Dummy df to hold predictions
          df_dum <- matrix(data = NA, nrow=nrow(grids), ncol=length(stdps))
          daily.pred.lst=list()

        for(day_kount in 1:length(days_)){
          # message(paste0("Predicting Day: ", sprintf("%02d", days_[day_kount]), " of ", month.lst_[grep(month_, month.lst_)], ", ", pred.year_))

          for(dpth in 1:length(stdps)){
            grids@data[,"DEPTH"] = stdps[dpth]

            # Define temporal autocorrelation
              pred_date = as.Date(paste0(pred.year_, "-" , as.numeric(paste(match(month_,month.abb))), "-", days_[day_kount]), "%Y-%m-%d")
              grids@data[, "Datum"] = pred_date
              grids@data[, "cum_day"] = floor(unclass(as.POSIXct(as.POSIXct(paste(pred_date), format = "%Y-%m-%d")))/86400)
              grids@data[, "doy"] = as.integer(strftime(as.POSIXct(paste(pred_date), format = "%Y-%m-%d"), format = "%j"))

            # Get specific soil property based on depth
              # Extract depth information from CLAY, SILT, BD, OrgC, CrfVol:
              sg.props = c("CLYPPT","SLTPPT","BLDFIE", "ORCDRC", "CRFVOL")

              if(era5_depth==TRUE){
                for(sg in 1:length(sg.props)){
                  if(sg.props[sg]=="CLYPPT" | sg.props[sg]=="SLTPPT"| sg.props[sg]=="ORCDRC" | sg.props[sg]=="BLDFIE" | sg.props[sg]=="CRFVOL"){grids@data[,paste0("sg_", sg.props[sg])] = ifelse(grids@data$DEPTH==7, grids@data[,paste0(sg.props[sg], "_M_sl1_90m_ll")],ifelse(grids@data$DEPTH==28, grids@data[,paste0(sg.props[sg], "_M_sl3_90m_ll")],ifelse(grids@data$DEPTH==100, grids@data[,paste0(sg.props[sg], "_M_sl5_90m_ll")],NA)))}
                }
              }else{
                for(sg in 1:length(sg.props)){
                  if(sg.props[sg]=="CLYPPT" | sg.props[sg]=="SLTPPT"| sg.props[sg]=="ORCDRC" | sg.props[sg]=="BLDFIE" | sg.props[sg]=="CRFVOL"){grids@data[,paste0("sg_", sg.props[sg])] = ifelse(grids@data$DEPTH==5, grids@data[,paste0(sg.props[sg], "_M_sl1_250m_ll")],ifelse(grids@data$DEPTH==15, grids@data[,paste0(sg.props[sg], "_M_sl2_250m_ll")],ifelse(grids@data$DEPTH==30, grids@data[,paste0(sg.props[sg], "_M_sl3_250m_ll")],ifelse(grids@data$DEPTH==60, grids@data[,paste0(sg.props[sg], "_M_sl4_250m_ll")],ifelse(grids@data$DEPTH==100, grids@data[,paste0(sg.props[sg], "_M_sl5_250m_ll")],NA)))))}
                }
              }
              gc(); gc()
              sg_Colnames <- names(grids@data)[grep("_M_sl",names(grids@data)) ]
              grids@data <- grids@data[ ,!(names(grids@data) %in% sg_Colnames)]
              gc(); gc()

            # Get specific soil property based on depth
              # Extract temporal weather information from ERA5 variables:
              if(weather_data == TRUE){
                if(!weather_source=='DWD'){
                  # Extract date information from Prep and Evap:
                  grids@data[,"wd_ERA5_prep"] <- 1; grids@data[,"wd_ERA5_evap"] <- 1
                  for(j in 1:nrow(grids@data)){
                    grids@data[j,]$wd_ERA5_prep <- as.numeric(grids@data[j, paste0('X',gsub('-','.',grids@data[j,'Datum']),'.00.00.00_ERA5_prep')])
                    grids@data[j,]$wd_ERA5_evap <- as.numeric(grids@data[j, paste0('X',gsub('-','.',grids@data[j,'Datum']),'.00.00.00_ERA5_evap')])
                  }
                  gc(); gc()
                }
                weather_Colnames <- names(grids@data)[grep("00_ERA5_prep|00_ERA5_evap",names(grids@data))]
                grids@data <- grids@data[ ,!(names(grids@data) %in% weather_Colnames)]
              }

            if(method == "ranger"){
              pred_df = predict(model_, grids@data, na.action = na.pass)$predictions
              gc(); gc()
            }else{
              pred_df = predict(model_, grids@data, na.action = na.pass)
              gc(); gc()
            }

            df_dum[, dpth] <- round(pred_df, 3)
            rm(pred_df);gc(); gc()
          }
          cat("\n")
          daily.pred.lst[[day_kount]] = df_dum
        }

        # Export predictions
          daily.pred.lst.df <- matrix(unlist(daily.pred.lst), nrow = nrow(grids))
          saveRDS(daily.pred.lst.df, file = rds_out)
          rm(daily.pred.lst.df);gc(); gc()

      }else{

        # Dummy df to hold predictions
          df_dum <- matrix(data = NA, nrow = nrow(grids), ncol = length(days_))

        for(day_kount in 1:length(days_)){
          # message(paste0("Predicting for Day: ", sprintf("%02d", day_kount), " of ", month.lst_[grep(month_, month.lst_)], ", ", pred.year_))
          pred_date = as.Date(paste0(pred.year_,"-",as.numeric(paste(match(month_,month.abb))),"-",days_[day_kount]), "%Y-%m-%d")

          # define temporal auto-correlation
            grids@data[, "Datum"] = pred_date
            grids@data[, "cum_day"] = floor(unclass(as.POSIXct(as.POSIXct(paste(pred_date), format = "%Y-%m-%d")))/86400)
            grids@data[, "doy"] = as.integer(strftime(as.POSIXct(paste(pred_date), format = "%Y-%m-%d"), format = "%j"))

          # Get specific soil property based on depth
            # Extract temporal weather information from ERA5 variables:
            if(weather_data == TRUE){
              if(!weather_source=='DWD'){
                # Extract date information from Prep and Evap:
                grids@data[,"wd_ERA5_prep"] <- 1; grids@data[,"wd_ERA5_evap"] <- 1
                for(j in 1:nrow(grids@data)){
                  grids@data[j,]$wd_ERA5_prep <- as.numeric(grids@data[j, paste0('X',gsub('-','.',grids@data[j,'Datum']),'.00.00.00_ERA5_prep')])
                  grids@data[j,]$wd_ERA5_evap <- as.numeric(grids@data[j, paste0('X',gsub('-','.',grids@data[j,'Datum']),'.00.00.00_ERA5_evap')])
                }
                gc(); gc()
              }
              weather_Colnames <- names(grids@data)[grep("00_ERA5_prep|00_ERA5_evap",names(grids@data))]
              grids@data <- grids@data[ ,!(names(grids@data) %in% weather_Colnames)]
            }

          if(method == "ranger"){
            pred_df = predict(model_, grids@data, na.action = na.pass)$predictions
            gc(); gc()
          }else{
            pred_df = predict(model_, grids@data, na.action = na.pass)
            gc(); gc()
          }

          df_dum[,day_kount] <- round(pred_df, 3)
          rm(pred_df);gc(); gc()
          cat("\n")
        }

        # Export predictions
          saveRDS(df_dum, file = rds_out)
          rm(df_dum);gc(); gc()
      }
      gc(); gc()
    }
    rm(grids); gc(); gc()
  }
}


#' Make ensemble space-time soil moisture predictions
#'
#' @description Ensemble outputs of base machine learning algorithms (i.e. ranger and xgboost) and estimate their weighted averages as the final soil moisture estimate at each pixel location of the tiled grid. Output is a .tif raster object.
#'
#' @param i Vector of spatial tile IDs
#'
#' @param targ_var Target variable to ensemble its base predictions
#'
#' @param in_path Directory holding the stacked spatial tiles
#'
#' @param out_path Directory holding the stacked spatial tiles. Just to keep ensemble predictions (.tif files) with their respective stacked covariates.
#'
#' @param in_situ_min Numeric. Minimum value of the in situ / validation dataset
#'
#' @param in_situ_max Numeric. Maximum value of the in situ / validation dataset
#'
#' @param days_ Numeric. Days in the respective month of the targetted soil moisture prediction year. Default values are all days in the respective month of the year
#'
#' @param month_ Character. Single month of the targetted soil moisture prediction year.Default value is the months of the in situ soil moisture meansurement. Values are abbreviated month name. E.g. "Jan", "Mar", "Oct".
#'
#' @param month.lst_ Vector or a list. All months of the targetted soil moisture prediction year.
#'
#' @param pred.year_ Numeric. Single soil moisture prediction year
#'
#' @param model1_w Ranger model estimation error used as weights in the weighted average formula of the ensemble model.
#'
#' @param model2_w Xgboost model estimation error used as weights in the weighted average formula of the ensemble model.
#'
#' @param raster_dtype Raster data type of the destination raster file. See Raster package for other options
#'
#' @param mvFlag Impute for missing or NO_DATA value. Default value is -32768
#'
#' @param DDepth TRUE/FALSE. Whether to predict for top soils (FALSE) or for specific standard soil depths (TRUE).
#'
#' @param stdps Vector. Numeric values of standard soil depths at which predictions are required. Default values are c(0,5,15,30,60,100,200)
#'
#' @param rds_file Saved stacked-spatial tile of covariates (i.e. prediction grid)
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#' library(rgdal)
#' library(plyr)
#'
#' out <- ensemble_predict(i, targ_var, in_path, out_path, in_situ_min, in_situ_max,days_, month_, month.lst_, pred.year_, model1_w, model2_w, raster_dtype, mvFlag, DDepth, stdps,rds_file)

ensemble_predict <- function(i, targ_var, in_path, out_path, in_situ_min, in_situ_max,days_, month_, month.lst_, pred.year_, model1_w, model2_w, raster_dtype, mvFlag, DDepth, stdps=c(5,15,30,60,100),rds_file,era5_depth=FALSE,ensMLA = FALSE){

  if(era5_depth==TRUE){stdps=c(7,28,100)}

  # Set seed
    set.seed(2410)

  if(missing(rds_file)){rds_file = paste0(in_path, "/", i, "/", i, ".rds") }

  if(file.exists(rds_file)){
    out_tiff = paste0(out_path, "/", i, "/", targ_var, "_", month.lst_[grep(month_, month.lst_)], "_", sprintf("%02d", days_[1]), ".tif")

    if(any(c(!file.exists(out_tiff), file.size(out_tiff) == 0))){

      tile_grid <- readRDS(rds_file)
      gc(); gc()

      if(nrow(tile_grid@data) > 1){

        if(ensMLA==TRUE){
          rf_pred = paste0(out_path, "/", i, "/", targ_var,"_", month.lst_[grep(month_, month.lst_)], "_", i, "_rf.rds")
          gb_pred = paste0(out_path, "/", i, "/", targ_var,"_", month.lst_[grep(month_, month.lst_)], "_", i, "_xgb.rds")

          if(all(file.exists(c(gb_pred,rf_pred)))){

            # Load already predicted grids
            pred_grid.rf <- readRDS(rf_pred)
            pred_grid.gb <- readRDS(gb_pred)
            gc(); gc()

            # Estimate weighted averages of gridded predictions
            tile_grid@data <- data.frame(Reduce("+", list(pred_grid.rf*model1_w, pred_grid.gb*model2_w)) / (model1_w + model2_w))
            rm(pred_grid.rf);rm(pred_grid.gb);gc(); gc()

            if(DDepth==TRUE){

              # Subset each day's root zone predictions
              preds.lst=list()
              bgn.seq =  seq(1, length(days_)*length(stdps), length(stdps))
              end.seq =  seq(length(stdps), length(days_)*length(stdps), length(stdps))
              for(it in 1:length(days_)){preds.lst[[it]] <- tile_grid[ , bgn.seq[it]:end.seq[it]]}

              # Write tiffs to file
              for(it in 1:length(days_)){
                for(dpth in 1:length(stdps)){
                  message(paste0("Exporting rootzone prediction for Day: ", sprintf("%02d", days_[it]), " of ", month.lst_[grep(month_, month.lst_)], ", ", pred.year_))
                  out_tif = paste0(out_path, "/", i, "/", targ_var, "_", month.lst_[grep(month_, month.lst_)],"_", sprintf("%02d", days_[it]), "_sl", dpth,".tif")
                  if(!file.exists(out_tif)){
                    preds.lst[[it]]@data[ , dpth] <- ifelse(preds.lst[[it]]@data[ , dpth] < in_situ_min, in_situ_min, ifelse(preds.lst[[it]]@data[ , dpth] > in_situ_max, in_situ_max, preds.lst[[it]]@data[ , dpth]))
                    writeGDAL(preds.lst[[it]][dpth], out_tif, type = raster_dtype, mvFlag = mvFlag, options = "COMPRESS=DEFLATE")
                  }
                }
              }
              rm(preds.lst); gc(); gc()

            }else{

              # Write GeoTiffs (per time step per standard soil depths)
              for(day_kount in 1:length(tile_grid@data)){
                # message(paste0("Exporting prediction for Day: ", sprintf("%02d", day_kount), " of ", month.lst_[grep(month_, month.lst_)], ", ", pred.year_))
                out_tif = paste0(out_path, "/", i, "/", targ_var, "_", month.lst_[grep(month_, month.lst_)],"_", sprintf("%02d", day_kount), ".tif")
                if(!file.exists(out_tif)){
                  tile_grid@data[ , day_kount] <- ifelse(tile_grid@data[ , day_kount] < in_situ_min, in_situ_min, ifelse(tile_grid@data[ , day_kount] > in_situ_max, in_situ_max, tile_grid@data[ , day_kount]))
                  writeGDAL(tile_grid[day_kount], out_tif, type = raster_dtype, mvFlag = mvFlag, options = "COMPRESS=DEFLATE")
                }
              }
              gc(); gc()
            }
          }
          gc();gc()

        }else{

          gb_pred = paste0(out_path, "/", i, "/", targ_var,"_", month.lst_[grep(month_, month.lst_)],  "_", i, "_xgb.rds")

          if (file.exists(gb_pred)) {
            pred_grid.gb <- readRDS(gb_pred)
            tile_grid@data <- data.frame(pred_grid.gb)
            rm(pred_grid.gb)

            if (DDepth == TRUE) {
              preds.lst = list()
              bgn.seq = seq(1, length(days_) * length(stdps), length(stdps))
              end.seq = seq(length(stdps), length(days_) * length(stdps), length(stdps))

              for (it in 1:length(days_)) {
                preds.lst[[it]] <- tile_grid[, bgn.seq[it]:end.seq[it]]
              }

              for (it in 1:length(days_)) {
                pred.stdps = preds.lst[[it]]
                for (dpth in 1:length(stdps)) {
                  message(paste0("Exporting rootzone prediction for Day: ", sprintf("%02d", days_[it]), " of ", month.lst_[grep(month_, month.lst_)],  ", ", pred.year_))
                  out_tif = paste0(out_path, "/", i, "/", targ_var, "_", month.lst_[grep(month_, month.lst_)], "_", sprintf("%02d", days_[it]), "_sl", dpth, ".tif")
                  if (!file.exists(out_tif)) {
                    pred.stdps@data[, dpth] <- ifelse(pred.stdps@data[,  dpth] < in_situ_min, in_situ_min, ifelse(pred.stdps@data[, dpth] > in_situ_max, in_situ_max, pred.stdps@data[, dpth]))
                    writeGDAL(pred.stdps[dpth], out_tif, type = raster_dtype, mvFlag = mvFlag, options = "COMPRESS=DEFLATE")
                  }
                }
              }

            }else {
              for (day_kount in 1:length(tile_grid@data)) {
                message(paste0("Exporting prediction for Day: ", sprintf("%02d", day_kount), " of ", month.lst_[grep(month_, month.lst_)], ", ", pred.year_))
                out_tif = paste0(out_path, "/", i, "/", targ_var, "_", month.lst_[grep(month_, month.lst_)], "_", sprintf("%02d", day_kount), ".tif")
                if (!file.exists(out_tif)) {
                  tile_grid@data[, day_kount] <- ifelse(tile_grid@data[, day_kount] < in_situ_min, in_situ_min,  ifelse(tile_grid@data[, day_kount] > in_situ_max, in_situ_max, tile_grid@data[, day_kount]))
                  writeGDAL(tile_grid[day_kount], out_tif,  type = raster_dtype, mvFlag = mvFlag, options = "COMPRESS=DEFLATE")
                }
              }
              gc(); gc()
            }
          }
        }
      }
    }
    rm(tile_grid);gc(); gc()
  }
  gc(); gc()
}

#' Round all values in a data frame
#'
#' @description Rould all numerical values in a data frame / Table to user specific decimal places
#'
#' @param dataframe Dataframe. Must be all numeric.
#'
#' @param num.digits Numeric, Expected decimal places
#'
#' @return Data frame
#'
#' @export
#'
Round_All_Vals_in_df <- function(dataframe, num.digits) {
  nums <- vapply(dataframe, is.numeric, FUN.VALUE = logical(1))
  dataframe[,nums] <- round(dataframe[,nums], digits = num.digits)
}

#' Mosaic soil moisture prediction
#'
#' @description Mosaic soil moisture predictions from the individual spatial tiles for all overlapping regions
#'
#' @param i Character. Single month of the targetted soil moisture prediction year.Default value is the months of the in situ soil moisture meansurement. Values are abbreviated month name. E.g. "Jan", "Mar", "Oct".
#'
#' @param in_path Directory holding the predictions (saved as .tif files)
#'
#' @param targ_var Predicted target variable
#'
#' @param r Spatial resampling algorithm. Default value is "bilinear". Value options are "cubicspline", "near". See GDAL documentation for further details.
#'
#' @param ot Raster data type of the destination raster file. See GDAL documentation for other options
#'
#' @param dstnodata Impute for missing or NO_DATA value. Default value is -32768
#'
#' @param out_path Directory to hold final mosiaced predictions
#'
#' @param te Spatial extent of the destination raster file. See GDAL documentation for other options
#'
#' @param month_ Character. Single month of the targetted soil moisture prediction year.Default value is the months of the in situ soil moisture meansurement. Values are abbreviated month name. E.g. "Jan", "Mar", "Oct".
#'
#' @param month.lst_ Vector or a list. All months of the targetted soil moisture prediction year.
#'
#' @param pred.year_ Numeric. Single soil moisture prediction year
#'
#' @param pixel Expected spatial pixel size
#'
#' @param days_ Numeric. Days in the respective month of the targetted soil moisture prediction year. Default values are all days in the respective month of the year
#'
#' @param stdps Vector. Numeric values of standard soil depths at which predictions are required. Default values are c(0,5,15,30,60,100,200)
#'
#' @param DDepth TRUE/FALSE. Whether to predict for top soils (FALSE) or for specific standard soil depths (TRUE).
#'
#' @param Spatial_res Expected spatial resolution of destination raster file.
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#'
#' out <- mosaic_ensemble_pred_tiles(i, in_path, targ_var, r, ot, dstnodata, out_path, te, month_, month.lst_, pred.year_, pixel, days_, stdps, DDepth, Spatial_res)

mosaic_ensemble_pred_tiles <- function(i, in_path, targ_var, r="bilinear", ot, dstnodata, out_path, te, month_, month.lst_, pred.year_, pixel, days_, stdps=c(5,15,30,60,100), DDepth, Spatial_res, Temporal_res,shp,era5_depth=FALSE){

  if(era5_depth==TRUE){stdps=c(7,28,100)}

  # Set seed
  set.seed(2410)

  out_tiff <- paste0(out_path, "/", targ_var, "_", i, "_",sprintf("%02d", days_[1]),"_", Spatial_res,"m.tif")

  if(any(c(!file.exists(out_tiff),file.size(out_tiff)==0))){

    # Loop mosaic for all days in the month
      if(DDepth==TRUE){

        for(day_kount in 1:length(days_)){
          #message(paste0("Mosiacking ensemble prediction for Day: ", sprintf("%02d", days_[day_kount]), " of ", month.lst_[grep(month_, month.lst_)], ", ", pred.year_))

          # Write each depth tiffs to storage
          for(dpth in 1:length(stdps)){
            #message(paste0("Exporting rootzone prediction for depth: ", stdps[dpth], " cm ..."))
            out_tif = paste0(out_path, "/", targ_var, "_", month.lst_[grep(month_, month.lst_)],"_", sprintf("%02d", days_[day_kount]), "_sl", dpth,".tif")

            if(!file.exists(out_tif)){

              tmp.lst <- list.files(path=in_path, pattern=glob2rx(paste0("^",targ_var, "_", i, "_", sprintf("%02d", days_[day_kount]),"*_sl", dpth,".tif$")), full.names=TRUE, recursive=TRUE)

              out.tmp <- tempfile(fileext = ".txt")
              vrt.tmp <- tempfile(fileext = ".vrt")

              cat(tmp.lst, sep="\n", file=out.tmp)
              system(paste0('gdalbuildvrt -input_file_list ', out.tmp, ' ',vrt.tmp))
              system(paste0('gdalwarp ', vrt.tmp,' ', out_tif,' -ot ', ot,' -dstnodata ',dstnodata,' -te ',te,' -co "BIGTIFF=YES" -co "COMPRESS=DEFLATE" -overwrite -r ', r, ' -tr ',pixel))
              gc();gc()
              unlink(out.tmp);  unlink(vrt.tmp); rm(out.tmp); rm(vrt.tmp)
            }
          }
        }

       # Aggregations

        # Daily predictions
          if(Temporal_res=="daily"){

            message(paste0("Executing ",Temporal_res, " aggregation ..."))
            output_dir2 <- paste0(out_path, "/",Temporal_res)
            lapply(output_dir2, dir.create, showWarnings = FALSE)

            predd.lst = list.files(out_path, pattern = glob2rx("*.tif"), full.names = T)
            move.out <- lapply(predd.lst, function(x) {file.copy(x, paste0(output_dir2, "/Daily_",basename(x)))})
            unlink(predd.lst,recursive = T)

          }else if(Temporal_res=="weekly"){

            # Weekly predictions
            recurr_ = 7
            message(paste0("Executing ",Temporal_res, " aggregation ..."))
            output_dir2 <- paste0(out_path, "/",Temporal_res)
            lapply(output_dir2, dir.create, showWarnings = FALSE)

            predd.lst = list.files(out_path, pattern = glob2rx("*.tif"), full.names = T)

            # Actual aggregation
            bgn_seq = seq(1, length(days_), recurr_)
            end_seq = seq(recurr_, length(days_), recurr_)
            if(!length(end_seq)==length(bgn_seq)){
              end_seq = c(end_seq,length(days_))
            }

            for(dpf in 1:length(stdps)){
              predd.lst2 <- predd.lst[grep(paste0("_sl",dpf),predd.lst)]

              for(map_kount in 1:length(bgn_seq)){

                map.lst = predd.lst2[bgn_seq[map_kount]:end_seq[map_kount]]

                datess = as.Date(paste0(pred.year_,"-",as.numeric(paste(match(month_,month.abb))),"-",days_),format = "%Y-%m-%d")
                datess = datess[bgn_seq[map_kount]:end_seq[map_kount]]

                if(length(map.lst) < 2){
                  tiff_ = raster(map.lst)
                  values(tiff_)[is.na(values(tiff_))] = median(values(tiff_),na.rm = T)
                  tiff_ = mask(tiff_, shp)

                }else{
                  tiff_ = raster::stack(map.lst)
                  tiff_ = raster::calc(tiff_, median, na.rm = T)
                  values(tiff_)[is.na(values(tiff_))] = median(values(tiff_),na.rm = T)
                  tiff_ = mask(tiff_, shp)
                }
                writeRaster(tiff_, paste0(output_dir2,"/Wk_comp_", targ_var, "_", i, "_", sprintf("%02d",min(day(datess))),"_to_",sprintf("%02d",max(day(datess))) ,"_",Spatial_res,"m_sl",dpf,".tif"), overwrite=T, options="COMPRESS=DEFLATE")
              }
            }
            unlink(predd.lst,recursive = T)

          }else if(Temporal_res=="biweekly"){

            # Bi-weekly predictions
            recurr_ = 14
            message(paste0("Executing ",Temporal_res, " aggregation ..."))
            output_dir2 <- paste0(out_path, "/",Temporal_res)
            lapply(output_dir2, dir.create, showWarnings = FALSE)

            predd.lst = list.files(out_path, pattern = glob2rx("*.tif"), full.names = T)

            # Actual aggregation
            bgn_seq = seq(1, length(days_), recurr_)
            end_seq = seq(recurr_, length(days_), recurr_)
            if(!length(end_seq)==length(bgn_seq)){
              end_seq = c(end_seq,length(days_))
            }

            for(dpf in 1:length(stdps)){
              predd.lst2 <- predd.lst[grep(paste0("_sl",dpf),predd.lst)]

              for(map_kount in 1:length(bgn_seq)){

                map.lst = predd.lst2[bgn_seq[map_kount]:end_seq[map_kount]]

                datess = as.Date(paste0(pred.year_,"-",as.numeric(paste(match(month_,month.abb))),"-",days_),format = "%Y-%m-%d")
                datess = datess[bgn_seq[map_kount]:end_seq[map_kount]]

                if(length(map.lst) < 2){
                  tiff_ = raster(map.lst)
                  values(tiff_)[is.na(values(tiff_))] = median(values(tiff_),na.rm = T)
                  tiff_ = mask(tiff_, shp)

                }else{
                  tiff_ = raster::stack(map.lst)
                  tiff_ = raster::calc(tiff_, median, na.rm = T)
                  values(tiff_)[is.na(values(tiff_))] = median(values(tiff_),na.rm = T)
                  tiff_ = mask(tiff_, shp)
                }
                writeRaster(tiff_, paste0(output_dir2,"/BiWk_comp_", targ_var, "_", i, "_", sprintf("%02d",min(day(datess))),"_to_",sprintf("%02d",max(day(datess))) ,"_", Spatial_res,"m_sl",dpf,".tif"), overwrite=T, options="COMPRESS=DEFLATE")
              }
            }
            unlink(predd.lst,recursive = T)

          }else if(Temporal_res=="monthly"){

            # Monthly predictions
            #message(paste0("Executing ",Temporal_res, " aggregation ..."))

            output_dir2 <- paste0(out_path, "/",Temporal_res)
            lapply(output_dir2, dir.create, showWarnings = FALSE)

            predd.lst = list.files(out_path, pattern = glob2rx("*.tif"), full.names = T)

            for(dpf in 1:length(stdps)){
              predd.lst2 <- predd.lst[grep(paste0("_sl",dpf),predd.lst)]

              tiff_ = raster::stack(predd.lst2)
              tiff_ = raster::calc(tiff_, median, na.rm = T)
              values(tiff_)[is.na(values(tiff_))] = median(values(tiff_),na.rm = T)
              tiff_ = mask(tiff_, shp)

              writeRaster(tiff_, paste0(output_dir2,"/Monthly_comp_", targ_var, "_", i, "_", Spatial_res,"m_sl",dpf,".tif"), overwrite=T, options="COMPRESS=DEFLATE")
            }
            unlink(predd.lst,recursive = T)
          }

      }else{

        for(day_kount in days_){

          out_tif <- paste0(out_path, "/", targ_var, "_", i, "_", sprintf("%02d", day_kount) ,"_",Spatial_res,"m.tif")

          if(!file.exists(out_tif)){

            tmp.lst <- list.files(path=in_path, pattern=glob2rx(paste0("^",targ_var, "_", i, "_", sprintf("%02d", day_kount),"*.tif$")), full.names=TRUE, recursive=TRUE)

            out.tmp <- tempfile(fileext = ".txt")
            vrt.tmp <- tempfile(fileext = ".vrt")

            cat(tmp.lst, sep="\n", file=out.tmp)

            system(paste0('gdalbuildvrt -input_file_list ', out.tmp, ' ',vrt.tmp))
            system(paste0('gdalwarp ', vrt.tmp,' ', out_tif,' -ot ', ot,' -dstnodata ',dstnodata,' -te ',te,' -co "BIGTIFF=YES" -co "COMPRESS=DEFLATE" -overwrite -r ', r, ' -tr ',pixel))

            unlink(out.tmp);  unlink(vrt.tmp)
            rm(out.tmp); rm(vrt.tmp)
          }
        }

        # Aggregations

          # Daily predictions
            if(Temporal_res=="daily"){
              message(paste0("Executing ",Temporal_res, " aggregation ..."))
              output_dir2 <- paste0(out_path, "/",Temporal_res)
              lapply(output_dir2, dir.create, showWarnings = FALSE)

              predd.lst = list.files(out_path, pattern = glob2rx("*.tif"), full.names = T)
              move.out <- lapply(predd.lst, function(x) {file.copy(x, paste0(output_dir2, "/Daily_",basename(x)))})
              unlink(predd.lst,recursive = T)

            }else if(Temporal_res=="weekly"){

              # Weekly predictions
              recurr_ = 7
              message(paste0("Executing ",Temporal_res, " aggregation ..."))
              output_dir2 <- paste0(out_path, "/",Temporal_res)
              lapply(output_dir2, dir.create, showWarnings = FALSE)

              predd.lst = list.files(out_path, pattern = glob2rx("*.tif"), full.names = T)

              # Actual aggregation
              bgn_seq = seq(1, length(days_), recurr_)
              end_seq = seq(recurr_, length(days_), recurr_)
              if(!length(end_seq)==length(bgn_seq)){
                end_seq = c(end_seq,length(days_))
              }

              for(map_kount in 1:length(bgn_seq)){

                map.lst = predd.lst[bgn_seq[map_kount]:end_seq[map_kount]]

                datess = as.Date(paste0(pred.year_,"-",as.numeric(paste(match(month_,month.abb))),"-",days_),format = "%Y-%m-%d")
                datess = datess[bgn_seq[map_kount]:end_seq[map_kount]]

                if(length(map.lst) < 2){
                  tiff_ = raster(map.lst)
                  values(tiff_)[is.na(values(tiff_))] = median(values(tiff_),na.rm = T)
                  tiff_ = mask(tiff_, shp)

                }else{
                  tiff_ = raster::stack(map.lst)
                  tiff_ = raster::calc(tiff_, median, na.rm = T)
                  values(tiff_)[is.na(values(tiff_))] = median(values(tiff_),na.rm = T)
                  tiff_ = mask(tiff_, shp)
                }
                writeRaster(tiff_, paste0(output_dir2,"/Wk_comp_", targ_var, "_", i, "_", sprintf("%02d",min(day(datess))),"_to_",sprintf("%02d",max(day(datess))) ,"_",Spatial_res,"m.tif"), overwrite=T, options="COMPRESS=DEFLATE")
              }
              unlink(predd.lst,recursive = T)

            }else if(Temporal_res=="biweekly"){

              # Bi-weekly predictions
              recurr_ = 14
              message(paste0("Executing ",Temporal_res, " aggregation ..."))
              output_dir2 <- paste0(out_path, "/",Temporal_res)
              lapply(output_dir2, dir.create, showWarnings = FALSE)

              predd.lst = list.files(out_path, pattern = glob2rx("*.tif"), full.names = T)

              # Actual aggregation
              bgn_seq = seq(1, length(days_), recurr_)
              end_seq = seq(recurr_, length(days_), recurr_)
              if(!length(end_seq)==length(bgn_seq)){
                end_seq = c(end_seq,length(days_))
              }

              for(map_kount in 1:length(bgn_seq)){

                map.lst = predd.lst[bgn_seq[map_kount]:end_seq[map_kount]]

                datess = as.Date(paste0(pred.year_,"-",as.numeric(paste(match(month_,month.abb))),"-",days_),format = "%Y-%m-%d")
                datess = datess[bgn_seq[map_kount]:end_seq[map_kount]]

                if(length(map.lst) < 2){
                  tiff_ = raster(map.lst)
                  values(tiff_)[is.na(values(tiff_))] = median(values(tiff_),na.rm = T)
                  tiff_ = mask(tiff_, shp)

                }else{
                  tiff_ = raster::stack(map.lst)
                  tiff_ = raster::calc(tiff_, median, na.rm = T)
                  values(tiff_)[is.na(values(tiff_))] = median(values(tiff_),na.rm = T)
                  tiff_ = mask(tiff_, shp)
                }
                writeRaster(tiff_, paste0(output_dir2,"/BiWk_comp_", targ_var, "_", i, "_", sprintf("%02d",min(day(datess))),"_to_",sprintf("%02d",max(day(datess))) ,"_", Spatial_res,"m.tif"), overwrite=T, options="COMPRESS=DEFLATE")
              }
              unlink(predd.lst,recursive = T)

            }else if(Temporal_res=="monthly"){

              # Monthly predictions
              message(paste0("Executing ",Temporal_res, " aggregation ..."))

              output_dir2 <- paste0(out_path, "/",Temporal_res)
              lapply(output_dir2, dir.create, showWarnings = FALSE)

              predd.lst = list.files(out_path, pattern = glob2rx("*.tif"), full.names = T)

              tiff_ = raster::stack(predd.lst)
              tiff_ = raster::calc(tiff_, median, na.rm = T)
              values(tiff_)[is.na(values(tiff_))] = median(values(tiff_),na.rm = T)
              tiff_ = mask(tiff_, shp)

              writeRaster(tiff_, paste0(output_dir2,"/Monthly_comp_", targ_var, "_", i, "_", Spatial_res,"m.tif"), overwrite=T, options="COMPRESS=DEFLATE")
              unlink(predd.lst,recursive = T)
            }
      }

  }
}


#' Check for missing title
#'
#' @description Loop through the predicted tile directories for any missing ensemble predictions
#'
#' @param targ_var Predicted target variable
#'
#' @param pr.dirs Directory holding the stacked spatial tiles. Just to keep ensemble predictions (.tif files) with their respective stacked covariates.
#'
#' @param in_path Same
#'
#' @return A list
#'
#' @export
#'
#' @examples
#'
#' out <- missing.tiles(targ_var, in_path, pr.dirs)

missing.tiles <- function(targ_var, in_path, pr.dirs){
  dir.lst <- list.files(path=in_path, pattern=glob2rx(paste0("^", targ_var, "*.tif")), full.names=TRUE, recursive=TRUE)
  out.lst <- pr.dirs[which(!pr.dirs %in% basename(dirname(dir.lst)))]
  return(out.lst)
}


#' Mosiac Sentinel-1 images
#'
#' @description Mosiac Sentinel-1 images (variant orbits) for VV and VH polarisations
#'
#' @param j Vector or a list of all required months
#'
#' @param orbit.lst Vector containg list of Sentinel-1 orbits
#'
#' @param pixel Expected spatial pixel size
#'
#' @param Sen1_lst.byMonth Vector or a list of a required month.
#'
#' @param sens.year.lst Vector or a list of required years.
#'
#' @param out_dir Main output directory
#'
#' @return NULL
#'
#' @export
#'
#' @examples
#' library(rgdal)
#' library(lubridate)
#'
#' out <- mosaic_sentinel(j, orbit.lst, pol.lst = c("VV", "VH"), pixel, Sen1_lst.byMonth, sens.year.lst)

mosaic_sentinel <- function(j, orbit.lst, pol.lst = c("VV", "VH"), pixel, Sen1_lst.byMonth, sens.year.lst, out_dir){
  # Set seed
  set.seed(2410)

  # Subset per month
    Sen1_lst.new = Sen1_lst.byMonth[[paste(j)]]

  # Sort by polarization
    Sen1_lst.pol = lapply(pol.lst, function(x){unlist(Sen1_lst.new)[grep(x, basename(Sen1_lst.new))]})
    names(Sen1_lst.pol) = pol.lst

  # Loop per polarization
    for(k in pol.lst){
      Sen1_lst.new2 = Sen1_lst.pol[[paste(k)]]

      # Sort by Orbit
        Sen1_lst.orb = lapply(orbit.lst, function(x){unlist(Sen1_lst.new2)[grep(x, Sen1_lst.new2)]})

      # Sort by Date and Mosaic
        # Get recurring dates
          recurr_= length(which(unlist(lapply(basename(Sen1_lst.orb[[1]]),function(x){substr(x,18,25)})) == unlist(lapply(basename(Sen1_lst.orb[[1]]),function(x){substr(x,18,25)}))[1]))

      # Check date sequence
        if(lengths(Sen1_lst.orb[1]) != lengths(Sen1_lst.orb[2])){
          message(paste0("Length of orbits for polarizations not equal for the requested area for ", j, " . ", orbit.lst[1], " has length ", lengths(Sen1_lst.orb[1]), " while ", orbit.lst[2], " has length ", lengths(Sen1_lst.orb[2])))
        }else{
          message(paste0("Length of orbits for polarizations are equal for the requested area for ", j, ". Proceeding with mosaic for ", j, " ..."))
        }

      # Earliest to Latest
        df_ = as.data.frame(unlist(Sen1_lst.orb))
        colnames(df_) = "Sentinel_links"
        df_$Date = as.Date( paste0( substr( substr(basename(as.character(df_$Sentinel_links)),18,25),1,4),"-",substr( substr(basename(as.character(df_$Sentinel_links)),18,25),5,6),"-",substr( substr(basename(as.character(df_$Sentinel_links)),18,25),7,8)))
        df_ = dplyr::arrange(df_, Date)

        Sen1_lst.new3 = as.character(df_$Sentinel_links)
        rm(df_)
        Sen1_lst.orb2 = lapply(orbit.lst, function(x){unlist(Sen1_lst.new3)[grep(x, Sen1_lst.new3)]})

      # Create output directory
        dir_ = paste0(out_dir,"/",sens.year.lst, "/", j,"/Mosaic")
        for(i in dir_){
          lapply(paste0(dir_,"/",k), dir.create, recursive = TRUE, showWarnings = FALSE)
        }

      # Actual mosiac
        bgn_seq = seq(1, length(Sen1_lst.orb2[[1]]), recurr_)
        end_seq = seq(recurr_, length(Sen1_lst.orb2[[1]]), recurr_)

      # Loop through senquence
        for(tile_kount in 1:length(bgn_seq)){

          tmp.lst2 = list()

          for(tile_kount2 in 1:length(orbit.lst)){
            tmp.lst2[[tile_kount2]] <- unlist(c(Sen1_lst.orb2[[tile_kount2]][bgn_seq[tile_kount]:end_seq[tile_kount]]))
          }

          tmp.lst = unlist(tmp.lst2)

          set_dates <- unique(unlist(lapply(basename(tmp.lst), function(x){substr(x,18,25)})))
          set_dates <- as.Date(paste0(substr(set_dates,1,4),"-", substr(set_dates,5,6),"-", substr(set_dates,7,8)),format = "%Y-%m-%d")

          # Output tiff name - TO DO -  use length of difference btn dates to label (>10 days)
            out_tif = paste0(dir_,"/",k,"/Sent_",sprintf("%02d",min(day(set_dates))),"_to_",sprintf("%02d",max(day(set_dates))),"_",k,"_",j,"_",sens.year.lst,".tif")

          # Check if file exits
            if(!file.exists(out_tif)){

              # Create temp files to hold rasters files
                out.tmp <- tempfile(fileext = ".txt")
                vrt.tmp <- tempfile(fileext = ".vrt")

              # print to .txt temp file
                cat(tmp.lst, sep="\n", file=out.tmp)

              # Builds VRT from list of raster files
                system(paste0('gdalbuildvrt -srcnodata 0 -input_file_list ', out.tmp, ' ',vrt.tmp))

              # Save mosaic to file
                system(paste0('gdalwarp ', vrt.tmp,' ', out_tif,' -ot Float32 -srcnodata 0 -dstnodata 0 -co "BIGTIFF=YES" -co "COMPRESS=DEFLATE" -overwrite -r bilinear -tr ',pixel))
            }
        }
    }
}


#' Leave Location Time Out Cross Validation
#'
#' @description Assess model performance (R2, RMSE, ME) via a Leave Location Time Out Cross validation algorithm (LLTO-CV).
#'
#' @param log_ TRUE/FALSE. Whether to noramalise observed and predicted soil moisture measurements during RMSE estimation.
#'
#' @param targ_var Predicted target variable
#'
#' @param reg_mat_ Regression matrix for cross validation
#'
#' @param pnts_cv Object of class SpatialPoints dataframe
#'
#' @param idcol Column name / Station ID of in situ measurement
#'
#' @param nfold Numeric. Number of cross validation folds
#'
#' @param fmula Regression formula
#'
#' @param kan Character. Single month of the targetted soil moisture prediction year. Values are abbreviated month name. E.g. "Jan", "Mar", "Oct".
#'
#' @param month.lst_ Vector or a list. All months of the targetted soil moisture prediction year.
#'
#' @param pred.year_ Numeric. Single soil moisture prediction year
#'
#' @param method Algorithm for LLTO. Default value is "ranger". Other option is "ensemble" for coupling Random forest (ranger) and Gradient boost (Xgboost) algorithms.
#'
#' @return A list with CV resutls (Observed, predicted and performance metrics)
#'
#' @export
#'
#' @examples
#' library(raster)
#' library(caret)
#' library(ranger)
#' library(DescTools)
#'
#' out <- LLTO_cv(log_, targ_var, reg_mat_, pnts_cv, idcol, nfold, fmula, kan, month.lst_, pred.year_, method)

LLTO_cv = function(log_ = FALSE,targ_var,reg_mat_, pnts_cv, idcol, nfold, fmula, kan, month.lst_,pred.year_, method = "ranger"){
  # Set seed
  set.seed(2410)

  # Create folds for partitioning using measurement station's id
    parti <- dismo::kfold(levels(as.factor(reg_mat_[ ,idcol])), k = nfold)

  # A list to populate LLTO_CV results into
    out.rf = list(); out.rf.w = list()
    out.xgb = list(); out.xgb.w = list()

  # Generic model tuning settings for caret wrapper package
    ctrl <- trainControl(method = "repeatedcv",number = 10,repeats = 3)
    xgb.tuneGrid <- expand.grid(eta = c(0.3, 0.4, 0.5),nrounds = c(50, 100, 150),max_depth = 2:4,gamma = 0,colsample_bytree = 0.8,min_child_weight = 1,subsample=1)
    rf.tuneGrid <- expand.grid(mtry = seq(0, 20, by = 2), splitrule="variance", min.node.size = 5)

  # 1 sd quantiles
    quant <- c((1-0.682)/2, 0.5, 1-(1-0.682))

  # LLTO approach
    if(method=="ranger"){
      # Via Random forest - Ranger
        for(fold_kount in 1:nfold){

          message(paste0("Cross validating SM predictions via ranger for fold: ", fold_kount, " of ", nfold, " .Variable: ", targ_var, " of month ", grep(kan, month.lst_), " of ", length(month.lst_)," for year ", pred.year_, " ..."))

          # Split regression matrix
            reg_mat_stay = reg_mat_[which(reg_mat_[ , idcol] %in% levels(as.factor(reg_mat_[ , idcol]))[parti == fold_kount]), ]
            reg_mat_pred = reg_mat_[which(reg_mat_[ , idcol] %in% levels(as.factor(reg_mat_[ , idcol]))[!parti == fold_kount]), ]

          # Model
            t.mrfX <- caret::train(fmula, data=reg_mat_pred, method="ranger", trControl=ctrl, tuneGrid=rf.tuneGrid)
            rf_model_SM <- ranger::ranger(fmula, data=x, write.forest=TRUE, mtry=t.mrfX$bestTune$mtry, num.trees=100, quantreg = T)

          # Predict
            pred_SM = data.frame(predict(rf_model_SM, reg_mat_stay, type="quantiles",quantiles = quant, na.action = na.pass)$predictions)

          # Assign residuals
            df_rf = data.frame(Observed=reg_mat_stay[paste0(targ_var)], Predicted = pred_SM[,2]); colnames(df_rf) = c("Observed", "Predicted")
            out.rf[[fold_kount]] = df_rf; rm(df_rf)
            cat("\n")
        }
        gc(); gc()

      # Calculate performance accuracies
        nana = do.call(rbind, out.rf)

      # Calculate performance accuracies
        if(log_==FALSE){
          # Mean Error
            ME = mean(nana$Observed - nana$Predicted, na.rm=TRUE)

          # Mean Absolute Error
            MAE = mean(abs(nana$Observed - nana$Predicted), na.rm=TRUE)

          # Root Mean Squared Error
            RMSE = sqrt(mean((nana$Observed - nana$Predicted)^2, na.rm=TRUE))

          # Concordance Correlation Coefficient (CCC)
            ccc = DescTools::CCC(nana$Observed, nana$Predicted, ci = "z-transform", conf.level = 0.95, na.rm=TRUE)$rho.c
            LLTO.cv_F <- list(nana, data.frame(ME=ME, MAE=MAE, RMSE=RMSE, CCC=ccc)) #R.squared = R.sqaured,

        }else{

          # Mean Error
            logME = mean(log1p(nana$Observed) - log1p(nana$Predicted), na.rm=TRUE)

          # Mean Absolute Error
            logMAE = mean(abs(log1p(nana$Observed) - log1p(nana$Predicted)), na.rm=TRUE)

          # Log of Root Mean Squared Error
            logRMSE = sqrt(mean((log1p(nana$Observed) - log1p(nana$Predicted))^2, na.rm=TRUE))

          # Log of Concordance Correlation Coefficient (CCC)
            logccc = DescTools::CCC(log1p(nana$Observed), log1p(nana$Predicted), ci = "z-transform", conf.level = 0.95, na.rm=TRUE)$rho.c
            LLTO.cv_F <- list(nana, data.frame(ME=logME, MAE=logMAE, RMSE=logRMSE, CCC=logccc)) #R.sqaured = logR.sqaured,
        }

      # Final output of preformance metrics
        names(LLTO.cv_F) <- c(paste0("Base_MLA_Obs_Pred_", targ_var, "_",month.lst_[grep(kan, month.lst_)], "_", pred.year_),paste0("Base_MLA_Summary_", targ_var, "_", month.lst_[grep(kan, month.lst_)],"_", pred.year_))
      gc(); gc()

    }else{
      # Ensemble of Random forest and Xgboost
      # Model1 via Random forest - Ranger
        for(fold_kount in 1:nfold){

          message(paste0("Cross validating SM predictions via ranger for fold: ", fold_kount, " of ", nfold, " .Variable: ", targ_var, " of month ", grep(kan, month.lst_), " of ", length(month.lst_)," for year ", pred.year_, " ..."))

          # Split regression matrix
            reg_mat_stay = reg_mat_[which(reg_mat_[ , idcol] %in% levels(as.factor(reg_mat_[ , idcol]))[parti == fold_kount]), ]
            reg_mat_pred = reg_mat_[which(reg_mat_[ , idcol] %in% levels(as.factor(reg_mat_[ , idcol]))[!parti == fold_kount]), ]

          # Model
            t.mrfX <- caret::train(fmula, data=reg_mat_pred, method="ranger", trControl=ctrl, tuneGrid=rf.tuneGrid)
            rf_model_SM <- ranger::ranger(fmula, data=x, write.forest=TRUE, mtry=t.mrfX$bestTune$mtry, num.trees=100, quantreg = T)
            rf_model_SM.w = 1/rf_model_SM$prediction.error

          # Predict
            pred_SM = data.frame(predict(rf_model_SM, reg_mat_stay, type="quantiles",quantiles = quant, na.action = na.pass)$predictions)

          # Assign residuals
            df_rf = data.frame(Observed=reg_mat_stay[paste0(targ_var)], Predicted = pred_SM[,2]); colnames(df_rf) = c("Observed", "Predicted")
            out.rf.w[[fold_kount]] = rf_model_SM.w; rm(rf_model_SM.w)
            out.rf[[fold_kount]] = df_rf; rm(df_rf)
            cat("\n")
        }
      gc(); gc()


      # Model2 via XGBoost
        for(fold_kount in 1:nfold){

          message(paste0("Cross validating SM predictions via XGboost for fold: ", fold_kount, " of ", nfold, ". LLTO-CV for variable: ", targ_var, " of month ", grep(kan, month.lst_), " of ", length(month.lst_)," for year ", pred.year_, " ..."))

          # Split regression matrix
            reg_mat_stay = reg_mat_[which(reg_mat_[ , idcol] %in% levels(as.factor(reg_mat_[ , idcol]))[parti == fold_kount]), ]
            reg_mat_pred = reg_mat_[which(reg_mat_[ , idcol] %in% levels(as.factor(reg_mat_[ , idcol]))[!parti == fold_kount]), ]

          # Train
            xgb_mT.SM <- caret::train(fmula, data=reg_mat_pred, method="xgbTree", trControl=ctrl, tuneGrid=xgb.tuneGrid)
            xgb_mT.SM.w = 1/(min(xgb_mT.SM$results$RMSE, na.rm=TRUE)^2)

          # Predict
            pred_SM = predict(xgb_mT.SM, reg_mat_stay)

          # Assign residuals
            df_rf = data.frame(Observed=reg_mat_stay[paste0(targ_var)], Predicted=pred_SM); colnames(df_rf) = c("Observed", "Predicted")
            out.xgb.w[[fold_kount]] = xgb_mT.SM.w; rm(xgb_mT.SM.w)
            out.xgb[[fold_kount]] = df_rf; rm(df_rf)
            cat("\n")
        }
      gc(); gc()

      # Ensemble LLTO_CV
        # Estimate weights
          cv_model1.w = do.call(mean, out.rf.w)
          cv_model2.w = do.call(mean, out.xgb.w)

        # Weighted average
          cv_weights = list(cv_model1.w,cv_model2.w);names(cv_weights) = c("out.rf","out.xgb");
          cv_df.lst = list(do.call(rbind, out.rf),do.call(rbind, out.xgb)); names(cv_df.lst) = c("out.rf","out.xgb")
          df_ens_cv <- data.frame(Predicted = (Reduce("+", list(do.call(rbind, out.rf)$Predicted*cv_model1.w,do.call(rbind, out.xgb)$Predicted*cv_model2.w)) / (cv_model1.w+cv_model2.w)),Observed = do.call(rbind, out.rf)$Observed)

      # Calculate performance accuracies
        if(log_==FALSE){
          # Mean Error
            ME = mean(df_ens_cv$Observed - df_ens_cv$Predicted, na.rm=TRUE)

          # Mean Absolute Error
            MAE = mean(abs(df_ens_cv$Observed - df_ens_cv$Predicted), na.rm=TRUE)

          # Root Mean Squared Error
            RMSE = sqrt(mean((df_ens_cv$Observed - df_ens_cv$Predicted)^2, na.rm=TRUE))

          # Concordance Correlation Coefficient (CCC)
            ccc = DescTools::CCC(df_ens_cv$Observed, df_ens_cv$Predicted, ci = "z-transform", conf.level = 0.95, na.rm=TRUE)$rho.c
            LLTO.cv_F <- list(cv_weights,cv_df.lst, df_ens_cv, data.frame(ME=ME, MAE=MAE, RMSE=RMSE, CCC=ccc)) #R.squared = R.sqaured,

        }else{

          # Mean Error
            logME = mean(log1p(df_ens_cv$Observed) - log1p(df_ens_cv$Predicted), na.rm=TRUE)

          # Mean Absolute Error
            logMAE = mean(abs(log1p(df_ens_cv$Observed) - log1p(df_ens_cv$Predicted)), na.rm=TRUE)

          # Log of Root Mean Squared Error
            logRMSE = sqrt(mean((log1p(df_ens_cv$Observed) - log1p(df_ens_cv$Predicted))^2, na.rm=TRUE))

          # Log of Concordance Correlation Coefficient (CCC)
            logccc = DescTools::CCC(log1p(df_ens_cv$Observed), log1p(df_ens_cv$Predicted), ci = "z-transform", conf.level = 0.95, na.rm=TRUE)$rho.c
            LLTO.cv_F <- list(cv_weights, cv_df.lst, df_ens_cv, data.frame(ME=logME, MAE=logMAE, RMSE=logRMSE, CCC=logccc)) #R.sqaured = logR.sqaured,
        }

      # Final output of preformance metrics
        names(LLTO.cv_F) <- c(paste0("Base_MLA_RMSE_", targ_var, "_",month.lst_[grep(kan, month.lst_)], "_", pred.year_),paste0("Base_MLA_Obs_Pred_", targ_var, "_",month.lst_[grep(kan, month.lst_)], "_", pred.year_),paste0("Ensemble_Obs_Pred_", targ_var, "_",month.lst_[grep(kan, month.lst_)], "_", pred.year_),paste0("Ensemble_Summary_", targ_var, "_", month.lst_[grep(kan, month.lst_)],"_", pred.year_))
        gc(); gc()
    }

  return(LLTO.cv_F)
}


#' Create Soil Moisture spline for 0 - 100 cm soil depth based on 'ithir' R Package
#'
#' @description Creates a set of soil profile collections. Function modified from 'ithir' R Package.
#'
#' @return A list of soil profile collection based on user specified soil depth intervals.
#'
#' @export
#'
#' @example
#' Check R packge 'ithir' for examples and further details

sm_spline <- function(obj, var.name, lam = 0.1, d = c(0,5,15,30,60,100,200), vlow = 0, vhigh = 1, show.progress = TRUE){
  # Set seed
  set.seed(2410)

  if (is(obj,"SoilProfileCollection") == TRUE){
    depthcols = obj@depthcols
    idcol = obj@idcol
    obj@horizons = obj@horizons[,c(idcol, depthcols, var.name)]
    d.dat<- as.data.frame(obj@horizons)
  }

  if (is(obj,"data.frame") == TRUE){
    d.dat<- as.data.frame(obj[,c(1:3,which(colnames(obj)==var.name))])
  }

  if (is(obj,"data.frame") == FALSE & is(obj,"SoilProfileCollection") == FALSE){
    stop("ERROR:: Data must be either class data.frame or SoilProfileCollection")
  }

  mxd<- max(d)
  sp_dat<-split(d.dat,d.dat[,1])

  # Matrix of the continous splines for each data point
  m_fyfit<- matrix(NA,ncol=length(c(1:mxd)),nrow=length(sp_dat))

  # Matrix in which the averaged values of the spline are fitted. The depths are specified in the (d) object
  yave<- matrix(NA,ncol=length(d),nrow=length(sp_dat))

  # Matrix in which the sum of square errors of each lamda iteration for the working profile are stored
  sse<- matrix(NA,ncol=length(lam),nrow=1)

  # Matrix in which the sum of square errors for eac h lambda iteration for each profile are stored
  sset<- matrix(NA,ncol=2,nrow=length(sp_dat))

  # Profile ids
  mat_id<- d.dat[0,]

  # Combined data frame for observations and spline predictions
  dave<- d.dat[1,]
  dave$predicted<- 0
  dave$FID<- 0
  dave<- dave[0,]

  ## Fit splines profile by profile:
  if (show.progress) pb <- txtProgressBar(min=0, max=length(sp_dat), style=3)
  cnt<- 1
  for(st in 1:length(sp_dat)){
    subs<-sp_dat[[st]]  # subset the profile required
    subs<-as.matrix(subs)
    mat_id[st,1]<- subs[1,1]


    # manipulate the profile data to the required form
    ir<- c(1:nrow(subs))
    ir<-as.matrix(t(ir))
    u<- as.numeric(subs[ir,2])
    u<-as.matrix(t(u))   # upper
    v<- as.numeric(subs[ir,3])
    v<-as.matrix(t(v))   # lower
    y<- as.numeric(subs[ir,4])
    y<-as.matrix(t(y))   # concentration
    n<- length(y);       # number of observations in the profile
    d<- t(d)

    ############################################################################################################################################################
    ### routine for handling profiles with one observation
    if (n == 1){
      dave[cnt:(cnt-1+nrow(subs)),1:4]<- subs
      dave[cnt:(cnt-1+nrow(subs)),5]<- y
      dave[cnt:(cnt-1+nrow(subs)),6]<- st
      xfit<- as.matrix(t(c(1:mxd))) # spline will be interpolated onto these depths (1cm res)
      nj<- max(v)
      if (nj > mxd)
      {nj<- mxd}
      yfit<- xfit
      yfit[,1:nj]<- y   # values extrapolated onto yfit
      if (nj < mxd)
      {yfit[,(nj+1):mxd]=-9999}
      m_fyfit[st,]<- yfit

      # Averages of the spline at specified depths
      nd<- length(d)-1  # number of depth intervals
      dl<-d+1     #  increase d by 1
      for (cj in 1:nd){
        xd1<- dl[cj]
        xd2<- dl[cj+1]-1
        if (nj>=xd1 & nj<=xd2)
        {xd2<- nj-1
        yave[st,cj]<- mean(yfit[,xd1:xd2])}
        else
        {yave[st,cj]<- mean(yfit[,xd1:xd2])}   # average of the spline at the specified depth intervals
        yave[st,cj+1]<- max(v)
      } #maximum soil depth
      cnt<- cnt+nrow(subs)
      ##################################
      # error
      sset[st,1:2] <- 0
    }
    # End of single observation profile routine
    ###############################################################################################################################################################

    ## Start of routine for fitting spline to profiles with multiple observations
    else {
      ###############################################################################################################################################################
      dave[cnt:(cnt-1+nrow(subs)),1:4]<- subs
      dave[cnt:(cnt-1+nrow(subs)),6]<- st
      ## ESTIMATION OF SPLINE PARAMETERS
      np1 <- n+1  # number of interval boundaries
      nm1 <- n-1
      delta <- v-u  # depths of each layer
      del <- c(u[2:n],u[n])-v   # del is (u1-v0,u2-v1, ...)

      ## create the (n-1)x(n-1) matrix r; first create r with 1's on the diagonal and upper diagonal, and 0's elsewhere
      r <- matrix(0,ncol=nm1,nrow=nm1)
      for(dig in 1:nm1){
        r[dig,dig]<-1
      }
      for(udig in 1:nm1-1){
        r[udig,udig+1]<-1
      }

      ## then create a diagonal matrix d2 of differences to premultiply the current r
      d2 <- matrix(0, ncol=nm1, nrow=nm1)
      diag(d2) <- delta[2:n]  # delta = depth of each layer

      ## then premultiply and add the transpose; this gives half of r
      r <- d2 %*% r
      r <- r + t(r)

      ## then create a new diagonal matrix for differences to add to the diagonal
      d1 <- matrix(0, ncol=nm1, nrow=nm1)
      diag(d1) <- delta[1:nm1]  # delta = depth of each layer

      d3 <- matrix(0, ncol=nm1, nrow=nm1)
      diag(d3) <- del[1:nm1]  # del =  differences

      r <- r+2*d1 + 6*d3

      ## create the (n-1)xn matrix q
      q <- matrix(0,ncol=n,nrow=n)
      for (dig in 1:n){
        q[dig,dig]<- -1
      }
      for (udig in 1:n-1){
        q[udig,udig+1]<-1
      }
      q <- q[1:nm1,1:n]
      dim.mat <- matrix(q[],ncol=length(1:n),nrow=length(1:nm1))

      ## inverse of r
      rinv <- try(solve(r), TRUE)

      ## Note: in same cases this will fail due to singular matrix problems, hence you need to check if the object is meaningfull:
      if(is.matrix(rinv)){
        ## identity matrix i
        ind <- diag(n)

        ## create the matrix coefficent z
        pr.mat <- matrix(0,ncol=length(1:nm1),nrow=length(1:n))
        pr.mat[] <- 6*n*lam
        fdub <- pr.mat*t(dim.mat)%*%rinv
        z <- fdub%*%dim.mat+ind

        ## solve for the fitted layer means
        sbar <- solve(z,t(y))
        dave[cnt:(cnt-1+nrow(subs)),5]<- sbar
        cnt<- cnt+nrow(subs)


        ## calculate the fitted value at the knots
        b <- 6*rinv%*%dim.mat%*% sbar
        b0 <- rbind(0,b) # add a row to top = 0
        b1 <- rbind(b,0) # add a row to bottom = 0
        gamma <- (b1-b0) / t(2*delta)
        alfa <- sbar-b0 * t(delta) / 2-gamma * t(delta)^2/3

        ## END ESTIMATION OF SPLINE PARAMETERS
        ###############################################################################################################################################################

        ## fit the spline
        xfit<- as.matrix(t(c(1:mxd))) ## spline will be interpolated onto these depths (1cm res)
        nj<- max(v)
        if (nj > mxd){
          nj<- mxd
        }
        yfit<- xfit
        for (k in 1:nj){
          xd<-xfit[k]
          if (xd< u[1])
          {p<- alfa[1]} else
          {for (its in 1:n) {
            if(its < n)
            {tf2=as.numeric(xd>v[its] & xd<u[its+1])} else {tf2<-0}
            if (xd>=u[its] & xd<=v[its])
            {p=alfa[its]+b0[its]*(xd-u[its])+gamma[its]*(xd-u[its])^2} else if (tf2)
            {phi=alfa[its+1]-b1[its]*(u[its+1]-v[its])
            p=phi+b1[its]*(xd-v[its])}
          }}
          yfit[k]=p
        }
        if (nj < mxd)
        {yfit[,(nj+1):mxd]=NA}

        yfit[which(yfit > vhigh)] <- vhigh
        yfit[which(yfit < vlow)]  <-vlow
        m_fyfit[st,]<- yfit

        ## Averages of the spline at specified depths
        nd<- length(d)-1  # number of depth intervals
        dl<-d+1     #  increase d by 1
        for (cj in 1:nd) {
          xd1<- dl[cj]
          xd2<- dl[cj+1]-1
          if (nj>=xd1 & nj<=xd2)
          {xd2<- nj-1
          yave[st,cj]<- mean(yfit[,xd1:xd2])}
          else
          {yave[st,cj]<- mean(yfit[,xd1:xd2])}   # average of the spline at the specified depth intervals
          yave[st,cj+1]<- max(v)
        } #maximum soil depth

        ## CALCULATION OF THE ERROR BETWEEN OBSERVED AND FITTED VALUES
        rmse <- sqrt(sum((t(y)-sbar)^2)/n)
        rmseiqr <- rmse/IQR(y)
        sset[st,1] <- rmse
        sset[st,2] <- rmseiqr
      }
    }

    if (show.progress){
      setTxtProgressBar(pb, st)
    }
  }
  if (show.progress) {
    close(pb)
  }

  ## asthetics for output
  ## yave
  yave<- as.data.frame(yave)
  jmat<- matrix(NA,ncol=1,nrow=length(d))
  for (i in 1:length(d)-1) {
    a1<-paste(d[i],d[i+1],sep="-")
    a1<-paste(a1,"cm",sep=" ")
    jmat[i]<- a1
  }
  jmat[length(d)]<- "soil depth"
  for (jj in 1:length(jmat)){
    names(yave)[jj]<- jmat[jj]
  }
  yave<- cbind(mat_id[,1],yave)
  names(yave)[1]<- "id"

  # sset
  sset<- as.data.frame(sset)
  names(sset)<- c("rmse", "rmseiqr")

  # save outputs
  retval <- list(harmonised=yave, obs.preds=dave,splineFitError=sset ,var.1cm=t(m_fyfit))
  return(retval)
}


#' Plot Soil Moisture Splines.
#'
#' @description Plots a set of soil profile collections. Function modified from 'ithir' R Package.
#'
#' @return Plots all or a subset of soil profile collection based on user specified soil depth intervals.
#'
#' @export
#'
#' @example
#' Check R packge 'ithir' for examples and further details

plot_sm_spline <- function(splineOuts, d = t(c(0,5,15,30,60,100,200)), maxd, type = 1, label = "", plot.which = 1){
  # Type 1 (raw data and spline fit)
  if (type==1){ #plot the observed
    d1<- splineOuts$obs.preds
    d1<- d1[d1$FID==plot.which,]
    vals<- d1[,4]
    depths<- d1[,2:3]
    matX<- matrix(NA, nrow=nrow(d1), ncol= 4)
    matY<- matrix(NA, nrow=nrow(d1), ncol= 4)
    for (i in 1: nrow(d1)){
      matX[i,]<- c(vals[i]-vals[i], vals[i], vals[i], vals[i]-vals[i])
      matY[i,]<- c(depths[i,1], depths[i,1], depths[i,2], depths[i,2])
    }
    raw.plot<-plot(matX[1,], matY[1,],type="n",ylim=c(maxd,0),xlim=c(min(vals), (max(vals)*1.1)),main=paste("soil profile:",d1[1,1], sep=" "), ylab="depth", xlab= label, lty=2, lwd=1.5, xaxs="i", col="black", font.lab=2,cex.lab=1.5)
    for (i in 1: nrow(d1)){
      polygon (matX[i,],matY[i,], lty=1, lwd=2, border="black")
    }
    #plot the fitted spline
    lines(splineOuts$var.1cm[,plot.which],seq(1,d[length(d)]),lwd=2,col="red" )
  }

  # Type 2 (raw data and spline fitted averages)
  if (type==2){#plot the observed
    d1<- splineOuts$obs.preds
    d1<- d1[d1$FID==plot.which,]
    vals<- d1[,4]
    depths<- d1[,2:3]
    matX<- matrix(NA, nrow=nrow(d1), ncol= 4)
    matY<- matrix(NA, nrow=nrow(d1), ncol= 4)
    for (i in 1: nrow(d1)){
      matX[i,]<- c(vals[i]-vals[i], vals[i], vals[i], vals[i]-vals[i])
      matY[i,]<- c(depths[i,1], depths[i,1], depths[i,2], depths[i,2])
    }
    raw.plot<-plot(matX[1,], matY[1,],type="n",ylim=c(maxd,0),xlim=c(min(vals), (max(vals)*1.1)),main=paste("soil profile:",d1[1,1], sep=" "), ylab="depth", xlab= label, lty=2, lwd=1.5, xaxs="i", col="black", font.lab=2,cex.lab=1.5)

    d2<- as.matrix(splineOuts$harmonised[plot.which,2:length(d)])
    matX1<- matrix(NA, nrow=ncol(d2), ncol= 4)
    matY1<- matrix(NA, nrow=ncol(d2), ncol= 4)
    for (i in 1: ncol(d2)){
      matX1[i,]<- c(d2[i]-d2[i], d2[i], d2[i], d2[i]-d2[i])
      matY1[i,]<- c(d[1,i], d[1,i], d[1,i+1], d[1,i+1])
    }
    #plot the spline averages
    for (i in 1: ncol(d2)){
      polygon (matX1[i,],matY1[i,], lty=1, lwd=1,col="green", border="green")
    }

    for (i in 1: nrow(d1)){
      polygon (matX[i,],matY[i,], lty=1, lwd=2, border="black")
    }
  }

  # Type 3 (raw data and spline fitted averages)
  if (type==3){#plot the observed
    d1<- splineOuts$obs.preds
    d1<- d1[d1$FID==plot.which,]
    vals<- d1[,4]
    depths<- d1[,2:3]
    matX<- matrix(NA, nrow=nrow(d1), ncol= 4)
    matY<- matrix(NA, nrow=nrow(d1), ncol= 4)
    for (i in 1: nrow(d1)){
      matX[i,]<- c(vals[i]-vals[i], vals[i], vals[i], vals[i]-vals[i])
      matY[i,]<- c(depths[i,1], depths[i,1], depths[i,2], depths[i,2])
    }
    raw.plot<-plot(matX[1,], matY[1,],type="n",ylim=c(maxd,0),xlim=c(min(vals), (max(vals)*1.1)),main=paste("soil profile:",d1[1,1], sep=" "), ylab="depth", xlab= label, lty=2, lwd=1.5, xaxs="i", col="black", font.lab=2,cex.lab=1.5)

    d2<- as.matrix(splineOuts$harmonised[plot.which,2:length(d)])
    matX1<- matrix(NA, nrow=ncol(d2), ncol= 4)
    matY1<- matrix(NA, nrow=ncol(d2), ncol= 4)
    for (i in 1: ncol(d2)){
      matX1[i,]<- c(d2[i]-d2[i], d2[i], d2[i], d2[i]-d2[i])
      matY1[i,]<- c(d[1,i], d[1,i], d[1,i+1], d[1,i+1])
    }

    #plot the spline averages
    for (i in 1: ncol(d2)){
      polygon (matX1[i,],matY1[i,], lty=1, lwd=1,col="green", border="green")
    }
    #plot the observations
    for (i in 1: nrow(d1)){
      polygon (matX[i,],matY[i,], lty=1, lwd=2, border="black")
    }
    #plot the spline
    lines(splineOuts$var.1cm[,plot.which],seq(1,d[length(d)]),lwd=2,col="red" )
  }
}


##### Adopted from GSIF R package - deprecated on CRAN
## create table with tile dimensions...
makeTiles <- function(bb, block.x, block.y, overlap.percent, limit.bbox, columns = NULL, rows = NULL){

  ## number of tiles:
  xn = ceiling(signif(diff(bb[1,]),5)/block.x)
  yn = ceiling(signif(diff(bb[2,]),5)/block.y)

  # number of tiles:
  message(paste("Generating", xn*yn, "tiles..."))
  xminl = bb[1,1]
  yminl = bb[2,1]
  xmaxl = bb[1,1] + (xn-1) * block.x
  ymaxl = bb[2,1] + (yn-1) * block.y
  xminu = bb[1,1] + block.x
  yminu = bb[2,1] + block.y
  xmaxu = bb[1,1] + xn * block.x
  ymaxu = bb[2,1] + yn * block.y

  b.l <- expand.grid(KEEP.OUT.ATTRS=FALSE, xl=seq(xminl, xmaxl, by=block.x), yl=seq(yminl, ymaxl, by=block.y))
  b.u <- expand.grid(KEEP.OUT.ATTRS=FALSE, xu=seq(xminu, xmaxu, by=block.x), yu=seq(yminu, ymaxu, by=block.y))
  btiles <- cbind(b.l, b.u)
  # expand if necessary:
  btiles$xl <- btiles$xl - block.x * overlap.percent/100
  btiles$yl <- btiles$yl - block.y * overlap.percent/100
  btiles$xu <- btiles$xu + block.x * overlap.percent/100
  btiles$yu <- btiles$yu + block.y * overlap.percent/100

  if(limit.bbox == TRUE){
    ## fix min max coordinates:
    btiles$xl <- ifelse(btiles$xl < bb[1,1], bb[1,1], btiles$xl)
    btiles$yl <- ifelse(btiles$yl < bb[2,1], bb[2,1], btiles$yl)
    btiles$xu <- ifelse(btiles$xu > bb[1,2], bb[1,2], btiles$xu)
    btiles$yu <- ifelse(btiles$yu > bb[2,2], bb[2,2], btiles$yu)
  }

  # add offset for rgdal (optional):
  if(!is.null(columns)&!is.null(rows)){
    btiles$offset.y <- round(rows*(bb[2,2]-btiles$yu)/(bb[2,2]-bb[2,1]))
    btiles$offset.x <- columns + round(columns*(btiles$xl-bb[1,2])/(bb[1,2]-bb[1,1]))
    btiles$region.dim.y <- round(rows*(btiles$yu-btiles$yl)/(bb[2,2]-bb[2,1]))
    btiles$region.dim.x <- round(columns*(btiles$xu-btiles$xl)/(bb[1,2]-bb[1,1]))
  }

  return(btiles)

}

.tiles2pol <- function(bb, btiles, proj4string){
  ## get coordinates for each tile:
  coords.lst <- lapply(as.list(as.data.frame(t(as.matrix(btiles)))), function(x){matrix(c(x[1], x[1], x[3], x[3], x[1], x[2], x[4], x[4], x[2], x[2]), ncol=2, dimnames=list(paste("p", 1:5, sep=""), attr(bb, "dimnames")[[1]]))})

  ## create an object of class "SpatialPolygons"
  srl = lapply(coords.lst, Polygon)
  Srl = list()
  for(i in 1:length(srl)){ Srl[[i]] <- Polygons(list(srl[[i]]), ID=row.names(btiles)[i]) }
  pol = SpatialPolygons(Srl, proj4string=proj4string)

  return(pol)
}


#' Make Spatial tiles for a Raster File.
#'
#' @description Spatial tiles of a raster file.
#'
#' @return Dataframe of spatial tiles of a raster file.
#'
#' @export
#'
#' @example
#' Check GSIF for details
#'
make_spatial_tiles <- function(obj, block.x, block.y = block.x, overlap.percent = 0, limit.bbox = TRUE, return.SpatialPolygons = FALSE){

  if(!class(obj)=="GDALobj"){
    stop("Object of class \"GDALobj\" required.")
  }

  if(overlap.percent<0){
    stop("'overlap.percent' argument must be a positive number")
  }

  ## create bbox:
  bb <- matrix(c(obj[["ll.x"]], obj[["ll.y"]], obj[["ll.x"]]+obj[["columns"]]*obj[["res.x"]], obj[["ll.y"]]+obj[["rows"]]*obj[["res.y"]]), nrow=2)
  attr(bb, "dimnames") <- list(c("x","y"), c("min","max"))
  ## tile using rows and columns:
  btiles <- makeTiles(bb, block.x, block.y, overlap.percent, limit.bbox, rows = obj[["rows"]], columns = obj[["columns"]])

  if(return.SpatialPolygons == TRUE){
    pol <- .tiles2pol(bb=bb, btiles=btiles, proj4string=CRS(attr(obj, "projection")))
  } else {
    pol = btiles
  }

  message(paste("Returning a list of tiles for an object of class", class(obj), "with", signif(overlap.percent, 3), "percent overlap"))
  return(pol)

}
kanj241/agrosoil documentation built on March 25, 2022, 12:22 a.m.