GSTools: Grass Tools

Documented in generate_test_data

#' @name generate_test_data
#' @title Generate test data
#' @description  Function that utilizes the original dataset, the centers of the
#'   model to be analysed and weather information to create a test dataset that
#'   will be scaled and read to use in posteriror evaluations.
#' @usage generate_test_data(input_data, center_mean, center_stddev, grid,
#'   weather_dataset, wt_f=NULL, scaled = T, soil_type = NULL, points=NULL)
#' @param input_data Dataset in sprad format containing LSUs as columns and cell
#'   numbers as rows
#' @param center_mean Center attributes from scaled training data of the model
#'   used in the parameter model. Will be used if scale is set to TRUE. Example:
#'   train_data <- scale(train_data) col_means_train_harvest <- attr(train_data,
#'   "scaled:center")
#' @param center_stddev Scale attributes from scaled training data of the model.
#'   Will be used if scale is set to TRUE.
#' @param grid dataset with grid cells information. Has to be the same size as the \code{input_data}
#' @param weather_dataset Data set containing weather information.
#' @param wt_f koppen geiger climate classification dataset created by the file:
#'   koppen_geiger_pre_processing.R
#' @param scaled Boolean that to define wheater the data will be scalled or not.
#' @param soil_type Dataset with information on soil type on cell level.
#' @param points A list of values that will substitute the orinal columns with
#'   the number of LSUs in case a more fine LSU resolution is desired for
#'   finding the optminal LSU. Ex: points = as.list(seq(0,25,0.5))
#' @details If wt_t.Rdata is null all cells will be scalled and outputed, if
#'   wt_f is filled with the koppen geiger climate classification generated by
#'   the file: koppen_geiger_pre_processing.R. the EF an d ET climate zones will
#'   be removed from the analysis.
#' @author Marcos Alves \email{mppalves@gmail.com}
#' @examples
#' ##Scalled test data
#' generate_test_data(map_harvest_real, col_means_train_harvest, col_stddevs_train_harvest, grid, weather_dataset, soil_type = soil)
#'
#' ##Unscaled test data
#' generate_test_data(map_harvest_real, col_means_train_harvest, col_stddevs_train_harvest, grid, weather_dataset, scaled = F, soil_type = soil)
#'
#' ##Unscaled test data with LSU points substitutes
#' generate_test_data(map_harvest_real, col_means_train_harvest, col_stddevs_train_harvest, grid, weather_dataset, scaled = F, soil_type = soil, points = as.list(seq(0,25,0.5)))
#' @export generate_test_data

generate_test_data = function(input_data, center_mean, center_stddev, grid, weather_dataset, wt_f=NULL, scaled = T, soil_type = NULL, points=NULL){
  #excluding greenland and antarctica from the analysis

  if(is.null(soil_type)){
    environmental_var = weather_dataset[,c(1,3,5)]
  }else{
    environmental_var = cbind(weather_dataset[,c(1,3,5)],soil_type)
  }

  if(is.null(wt_f)){
    df.combined = cbind(input_data,grid,environmental_var)
  }else{
    dummy.x = cbind(input_data,grid,environmental_var,wt_f)
    df.combined = dummy.x[which(dummy.x$Cls != "EF" & dummy.x$Cls != "ET"),c(1:17)]
    print("EF and ET climates removed from analysis")
  }

  if(!is.null(points)){
    df.combined = df.combined[,-c(1:ncol(input_data))]
    df.combined = data.frame(points,df.combined)
    colnames(df.combined)[1:length(points)] =  points
    print("Orginal data points substituted by list")
  }

# harvest[,-c(1:ncol(input_data))]
  df.long = df.combined %>% gather(key = "LSU","lon":"soil_type",-c("lon":"soil_type"))
  colnames(df.long)[ncol(df.long)] = "gCm2"
  df.long$LSU = as.numeric(df.long$LSU)/10



  if(is.null(soil_type)){
    test_data = df.long[,1:6]
  }else{
    test_data = df.long[,1:7]
  }

  test_data = as.matrix(test_data)

  if(scaled){
  ###Standardizing the test data###
  test_data <- scale(test_data, center = center_mean, scale = center_stddev)
  return(test_data)
  }else{
  return(test_data)
  }
}