_workshop/2_wgen-unconditional/simulate_farms_Salado_A.r

# ------------------------------------------------------------------------------
# --- Script to generate synthetic farm boundaries and other data
# --- for the Salado A basin.
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Clean up all objects ----

remove(list = ls()); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Install needed R packages ----

# update.packages()

if (!require(lubridate)) {install.packages("lubridate"); library(lubridate)}
if (!require(rgdal)) {install.packages("rgdal"); library(rgdal)}
if (!require(sp)) {install.packages("sp"); library(sp)}
if (!require(PBSmapping)) {install.packages("PBSmapping"); library(PBSmapping)}
if (!require(spdep)) {install.packages("spdep"); library(spdep)}
if (!require(RColorBrewer)) {install.packages("RColorBrewer"); library(RColorBrewer)}
if (!require(spatstat)) {install.packages("spatstat"); library(spatstat)}
if (!require(rgeos)) {install.packages("rgeos"); library(rgeos)}
if (!require(rgdal)) {install.packages("rgdal"); library(rgdal)}
if (!require(maptools)) {install.packages("maptools"); library(maptools)}
if (!require(dplyr)) {install.packages("dplyr"); library(dplyr)}
if (!require(Hmisc)) {install.packages("Hmisc"); library(Hmisc)}
if (!require(RColorBrewer)) {install.packages("RColorBrewer"); library(RColorBrewer)}
if (!require(RANN)) {install.packages("RANN"); library(RANN)}

#  update.packages(ask=FALSE)
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Define function to trim number of farms within a department ----

# --- The list of synthetic farms to generate was derived from the AG Census
# --- and involves multiple assumptions.
# --- It may happen sometimes that the number of farms to generate
# --- within the department/partido exceeds the available area.
# --- This function compares the number of available 'unit tiles' in a partido
# --- with the number of tiles required by all farms to be simulated.
# --- The list of farms is trimmed to a certain proportion of the available tiles.

FilterFarms <- function(synth.farms, farm.comparable.field,
  max.aggregated.value,
  initial.seed = 0, max.rate = 0.95) {
  
  # synth.farms <- farm.table.this.dept
  # farm.comparable.field <- 'adjusted.farm.size'
  # max.aggregated.value <- area
  # initial.seed <- 0
  # max.rate <- 0.50
  
  set.seed(initial.seed)
  filtered.synth.farms <- synth.farms
  
  aggregated.farm.value <- sum(filtered.synth.farms[, farm.comparable.field])
 
   while (aggregated.farm.value > (max.rate * max.aggregated.value)) {
    row.number            <- sample(x = seq(from = 1, 
      to = nrow(filtered.synth.farms)), size = 1)
    filtered.synth.farms  <- filtered.synth.farms[-c(row.number), ]
    aggregated.farm.value <-
      sum(filtered.synth.farms[, farm.comparable.field])
  }
  
  return (filtered.synth.farms)
  
}
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Decide whether results will be plotted ----

plot.results <- FALSE
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Define Coordinate Reference Systems for various projections ----

gk.crs.string <- '+proj=tmerc +lat_0=-90 +lon_0=-60 +k=1 +x_0=5500000 +y_0=0
  +ellps=intl +twogs84=-148,136,90,0,0,0,0 +units=m +no_defs'

utm.crs.string <- '+proj=utm +zone=20H +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0'

ll.crs.string <- '+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0'
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Read outline of Salado Basin A  ----

proj.dir <- getwd()

basin.dir <- "D:/ENSO/Projects/CNH3/data/shapes_cuenca/"

tt0 <- file.info(basin.dir)   # Info about directory where basin shapefile is located
if (!tt0$isdir)
  stop("ERROR: Specified directory for basin shapefile does not exist... check name...\n")              

setwd(basin.dir)

# --- Read shapefile with Basin A outline

cuenca.A.ll <- readOGR(".", layer="Salado_A_latlon", verbose = TRUE)

setwd(proj.dir)

# --- Convert basin shape to Gauss-Kruger coordinates (in meters)

cuenca.A.GK <- sp::spTransform(cuenca.A.ll,
  CRS = CRS(gk.crs.string)) 

# --- Convert basin shape to UTM coordinates (in meters)

cuenca.A.UTM <- sp::spTransform(cuenca.A.ll,
  CRS = CRS(utm.crs.string))

if (plot.results) { 
  plot(cuenca.A.ll, axes = TRUE)
  plot(cuenca.A.GK, axes = TRUE)
  plot(cuenca.A.UTM, axes = TRUE)
}

rm(proj.dir, basin.dir, tt0); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Create individual shapefiles for partidos ----

# --- Read corrected shapefiles for Argentina's departments

orig.dept.dir <- "D:/ENSO-Data/Other Data/ARG_departments/"

tt0 <- dir.create(orig.dept.dir, showWarnings = TRUE)

tt0 <- file.info(orig.dept.dir)   # Informacion sobre el directorio especificado
if (tt0$isdir) {    				      # Es un directorio?
  setwd(orig.dept.dir)						# Cambiar al directorio especificado
} else {
  stop("ERROR: Specified directory does not exist... verify the name...\n")
}          			
rm(tt0)

dept.archivo <- paste(orig.dept.dir, "deptscorrected.shp", sep="")
if (!file.exists(dept.archivo)) {
  stop("Specified SHP file for departments", dept.archivo, "does not exist...\n")
}

oldPath <- getwd() 
setwd(orig.dept.dir)
depts <- rgdal::readOGR(".", layer="deptscorrected", verbose = TRUE) # Read departments
setwd(oldPath)

rm(orig.dept.dir, dept.archivo); gc()

# --- Aislar la parte de datos de departamentos

pp2 <- depts@data

# --- Identificar los partidos/departamentos que contienen
# --- a la cuenca A del Salado

depts.en.cuenca <- c("9 DE JULIO","ADOLFO ALSINA","BOLIVAR","BRAGADO",
 "CARLOS CASARES","CARLOS TEJEDOR","COLON",
 "DAIREAUX","FLORENTINO AMEGHINO","GENERAL ARENALES",
	"GENERAL PINTO","GENERAL VIAMONTE","GENERAL VILLEGAS","GUAMINI",
	"HIPOLITO YRIGOYEN","JUNIN","LEANDRO N. ALEM", "LINCOLN","PEHUAJO",
	"PELLEGRINI","RIVADAVIA","SALLIQUELO","TRENQUE LAUQUEN","TRES LOMAS",
  "GENERAL ROCA","PRESIDENTE ROQUE SAENZ PENA",
	"ATREUCO","CATRILO","CHAPALEUFU","CONHELO",
	"MARACO","QUEMU QUEMU","TRENEL",
	"GENERAL LOPEZ")

codigos.deptos.en.cuenca <- c(6588,6007,6105,6112,
  6147,6154,6175,
  6231,6277,6294,
  6351,6385,6392,6399,
  6406,6413,6462,6469,6609,
  6616,6679,6721,6826,6847,
  14035,14084,
  42007,42028,42056,42035,
  42105,42119,42147,
  82042)

pp3 <- pp2$code %in% codigos.deptos.en.cuenca

deptos.en.cuenca <- pp2[pp3, ]  # Seleccionar departamentos dentro de cuenca

# --- Iterate to create a shape file for each department inside the basin

for (i in 1:nrow(deptos.en.cuenca)) {
  
  code <- deptos.en.cuenca[i, "code"]
  dept <- as.character(deptos.en.cuenca[i, "dept"])
  
  indiv.shape <- depts[depts@data$code == code, ]
  
  oldPath <- getwd()
  setwd("D:/ENSO/Projects/CNH3/data/shapes_depts/")
  writeOGR(indiv.shape, dsn=".", layer = dept, driver = "ESRI Shapefile",
    check_exists = TRUE, overwrite_layer = TRUE, verbose = TRUE)
  setwd(oldPath)   
}

# --- Merge all shapefiles for departments in the Salado A basin.
# --- Build a single "outer boundary" for all polygons.

library(rgeos); library(maptools)

all.depts <- depts[pp3, ]

dept.IDs <- all.depts@data$code

dissolved.depts <- maptools::unionSpatialPolygons(all.depts, IDs = dept.IDs)

outer.shape.ll <- maptools::unionSpatialPolygons(all.depts,
  IDs = rep(1, times= length(dept.IDs)))

outer.shape.GK <- sp::spTransform(outer.shape.ll,
  CRS = CRS(gk.crs.string) )

outer.shape.UTM <- sp::spTransform(outer.shape.ll,
  CRS = CRS(utm.crs.string) )

dept.dir <- "D:/ENSO/Projects/CNH3/data/deptos_en_cuenca/"

if (plot.results) { 
  
  Cairo::CairoPDF(file = paste0(dept.dir,"Salado_depts.pdf"),
    width = 6, height = 6, onefile = TRUE, family = "Arial")
  
  plot(dissolved.depts, axes = TRUE, border = "grey80")
  plot(cuenca.A.ll, add = TRUE, border = "tomato")
  plot(outer.shape.ll, add = TRUE, border = "steelblue")
  
  dev.off()
}

rm(all.depts,pp2,pp3,code,dept,indiv.shape,codigos.deptos.en.cuenca,
   i,oldPath,dept.IDs,dissolved.depts); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Read table with departments inside Salado Basin ----

file1 <- "Salado_A_departments.txt"
infile1 <- paste(dept.dir, file1, sep="")

if (file.exists(infile1)) {
  basin.depts <- read.table(infile1, sep = "\t", header = TRUE, as.is = TRUE)
  basin.depts <- dplyr::arrange(basin.depts, dept.code, dept.name)
} else {
  cat("File with list of Salado departments not found...\n")
}

dept.names <- basin.depts$dept.name

# --- Convert all upper case characters to upper and lower case

capwords <- function(s, strict = FALSE) {
  cap <- function(s) paste(toupper(substring(s,1,1)),
{s <- substring(s,2); if(strict) tolower(s) else s},
sep = "", collapse = " " )
sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s)))
}

dept.names.lc <- capwords(dept.names, strict=TRUE) # Upper and lower case

dept.names.noblanks <- stringr::str_replace_all(dept.names, " ", "_")

# --- Let us make sure we have individual shape files for
# --- each department inside the basin.

dept.dir <- "D:/ENSO/Projects/CNH3/data/shapes_depts/"

dept.files <- list.files(path = dept.dir, pattern = ".shp$",
  full.names = TRUE)

dept.files.short <- list.files(path = dept.dir, pattern = ".shp$",
  full.names = FALSE)

dept.files.short <- sort(stringr::str_replace(dept.files.short,
  pattern = ".shp$",
  replacement = ''))

# --- Check that the department names read from a table
# --- have corresponding shapefiles.

if (!all.equal(sort(dept.names), dept.files.short)) {
  stop("Check departments' table and shapefile...\n")
}

n.of.depts <- length(dept.files)  # Number of depts in Salado Basin

remove(capwords, file1, infile1, dept.files.short); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Generate a regular grid inside the outer shape of departments ----

# --- Each grid point will represent the center of a tile or unit area.
# --- The grid generated extends outside the polygon; it will be trimmed later.
# --- The tiles will be the "building blocks" to assemble synthetic farms.
# --- NOTE THAT THE POINTS MAY HAVE DIFFERENT DELTA-X and DELTA-Y VALUES.
# --- That is, tiles created from these points may NOT be square, but rectangular.

tile.area <-  20                  # area represented by a point (in hectares)
point.spacing.x <- 1000  					# point separation along x-axis (in meters)
point.spacing.y <- 200						# point separation along y-axis (in meters)

if (((point.spacing.x * point.spacing.y) / 10000) != tile.area)
  stop("Error in point spacing along x and y dimensions...\n")

point.offset.x <- point.spacing.x / 2
point.offset.y <- point.spacing.y / 2

# --- Generate regular grid of points inside merged polygon
# --- The number of points in the grid is roughly equal to the area
# --- of the target region divided by the area encompassed by one grid point.

gg1 <- bbox(outer.shape.GK)		# bounding box of outer polygon
range.x <- range(pretty(range(gg1["x",]), n = 40, high.u.bias = 100))	# range of lon (x) values
range.y <- range(pretty(range(gg1["y",]), n = 40, high.u.bias = 100))	# range of lat (y) values

# --- The x- and y-values generated exceed the boundaries of the outer polygon
# --- so it can be easier to generate unit-area tiles afterwards...

x.values <- seq(from=(floor(range.x[1]) - (2 * point.spacing.x) + point.offset.x),
  to=(ceiling(range.x[2] + (2 * point.spacing.x))),
  by=point.spacing.x)

y.values <- seq(from=(floor(range.y[1]) - (2 * point.spacing.y) + point.offset.y),
  to=ceiling(range.y[2] + (2 * point.spacing.y)),
  by=point.spacing.y)

gg2 <- as.data.frame(expand.grid(lon = x.values, lat = y.values))

# --- Reorder the points so they are ordered along rows of constant latitude.
# --- Latitude is sorted in reverse so the first line is the northernmost lat.

gg2 <- dplyr::arrange(gg2, desc(lat), lon)
row.names(gg2) <- 1:nrow(gg2)	# matrix of coordinates for points in grid

# --- Create a spatial points object with what will be the centers
# --- of the tiles to be created later.

points.info <- data.frame(point.ID = row.names(gg2),
  x.coord = gg2[, "lon"], y.coord = gg2[, "lat"])

gg3 <- SpatialPointsDataFrame(coords = gg2, data = points.info,
  proj = CRS(gk.crs.string), match.ID = TRUE)

# --- If plot.results == TRUE, plot grid of points

if (plot.results) {
  plot(gg3, axes=TRUE, pch="16", col="tomato", cex=0.1)
  plot(outer.shape.GK, lwd=2, border="steelblue4", add=TRUE)
  title("Departments in Salado A Basin")
}

remove(gg1, gg2, range.x, range.y, x.values, y.values)
remove(point.offset.x, point.offset.y); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Create SpatialPolygons object with unit-area tiles ----

polys <- vector(mode="list", length = length(coordinates(gg3)[,1]))

# --- From the points, first we generate a grid topology,
# --- and then polygons around each point.

hh1 <- points2grid(gg3, round=NULL)

hh2 <- GridTopology(cellcentre.offset=hh1@cellcentre.offset,
  cellsize=hh1@cellsize,
  cells.dim=hh1@cells.dim)

hh3 <- as.SpatialPolygons.GridTopology(hh2,
  proj4string = CRS(gk.crs.string) )
gc()

hh4 <- sp::spChFIDs(hh3, x=as.character(points.info$point.ID))	# assign unique polygon IDs
gc()

hh5 <- points.info$point.ID

polys[hh5] <- slot(hh4[hh5],"polygons"); gc()

tiles.SP <- SpatialPolygons(polys,
  proj4string = CRS(gk.crs.string))
gc()

# --- Plot tiles and tile IDs

if (plot.results) {
  plot(tiles.SP, axes=TRUE, asp=1, border="grey70", lwd=0.5)
  title("Salado A Basin")
  plot(cuenca.A.UTM, add=TRUE, lwd=2, border="steelblue3")
  
  # plot(gg3,add=TRUE, pch=16, col="tomato", cex=0.5)
  # text(points.info[,"x.coord"], points.info[,"y.coord"],
  #	labels=points.info[,"point.ID"], cex=0.5)
}

rm(hh1,hh2,hh3,hh4,hh5,polys); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Identify and select points and tiles inside the outer shape of depts ----

# --- Select polygons that are COMPLETELY inside outer department shape. 

ccc <- rgeos::gWithin(tiles.SP, outer.shape.GK, byid = TRUE)
points.in <- which(ccc)
cc1 <- tiles.SP[points.in]       # Tiles inside outer shape

tiles.SP2 <- sp::spChFIDs(cc1,
  x = as.character(1:length(cc1)))	# assign unique polygon IDs

# --- Plot tiles inside target region...

if (plot.results) {
  plot(tiles.SP2[1:5000], axes = TRUE, col = "grey70", lwd = 0.5)
  plot(outer.shape.GK, lwd = 2, add = TRUE, border = "steelblue1")
  title("Salado A Basin")
}

remove(tiles.SP, ccc, cc1, points.in); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Read or build a 'master.tile.list' data frame ----

# --- This master list has information about all tiles in the outer shape,
# --- of departments, given a tile size and shape.
# --- If a text file exists, the list will be read at the start of each run,
# --- to avoid selecting  tiles used in a previous run.
# --- If the text file does not exist, create a 'master.tile.list' data frame
# --- with the tile information.

# --- Define name of text file with the master tile list. 

master.tile.outdir <- "D:/ENSO/Projects/CNH3/data/campos_sinteticos/tablas_datos/"
master.list.outfile <- paste(master.tile.outdir,"master_tile_data.txt", sep = "")

# --- Check if file exists. 
# --- If it exists, then read it... else, create the data frame 'master.tile.list'

# --- WARNING: If a department/partido has already been processed and
# --- the master list is read from a file, only few available tiles may be left
# --- for that department.
# --- If a department has to be re-run, make sure that master list is not read.

if (file.exists(master.list.outfile)) {
  # Master file list exists, then READ it....
  cat('Reading existing master tile file...')
  master.tile.list <- read.table(master.list.outfile,
    header = TRUE, sep = '\t', strip.white = TRUE)
} else {
  # Master file list DOES NOT exist, CREATE it....
  cat('Creating master tile data frame')
  tile.IDs <- as.numeric(sapply(slot(tiles.SP2, "polygons"),
    function(x) {slot(x,"ID")}))
  tile.coords <- coordinates(tiles.SP2)
  
  # --- Generate a data frame with ALL tiles inside outer shape of departments.
  # --- The list will be used to identify tiles already assigned to a farm.
  
  master.tile.list <- data.frame(tile.ID = tile.IDs,
    x.coord = tile.coords[, 1],
    y.coord = tile.coords[, 2],
    available = rep(TRUE, length.out = length(tile.IDs)),
    stringsAsFactors = FALSE)
}

table(master.tile.list$available, useNA = 'always')
# ------------------------------------------------------------------------------


# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
# ***** Start working with farms *****
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#

# -----------------------------------------------------------------------------#
# --- Read table with sizes of farms to be simulated ----

indir1 <- "D:/ENSO/R stuff/SaladoBasin/"    # Input dir for Federico's table
file1 <- "Salado_A_to_GIS_2002.txt"         # Input file for Federico's table

# --- Either read the table with farm sizes provided by Federico or,
# --- alternatively,
# --- read a test file with a few synthetic farm sizes for testing...

test.farm.sizes <- FALSE		# If TRUE, use test farm size data set

if (!test.farm.sizes) {			# Do not use test farm sizes
  # --- Table with farm sizes from Federico
  infile1 <- paste(indir1, file1, sep="")
  if (!exists("infile1")) {
    stop("ERROR: Specified input file", file1, "does not exist...\n")
  }
  farm.table <- read.table(infile1, sep="\t", na.strings=c("",NA), header=TRUE)
  # --- Retain only a few variables:   
  farm.table <- farm.table[,c("region.id","province.id",
    "department.id","farm.id","owner.id","eap.id",
    "original.farm.size", "adjusted.farm.size")]
} else {
  # --- Table with TEST farm sizes
  file1 <- "TEST.farm.sizes.txt"
  infile1 <- paste(indir1, file1, sep="")
  if (!exists("infile1")) {
    stop("ERROR: Specified input file", file1, "does not exist...\n")
  }
  farm.table <- read.table(infile1, sep="\t", na.strings=c("",NA), header=TRUE)
  # --- Retain only a few variables:
  farm.table <- farm.table[,c("region.id","farm.id","owner.id",
    "original.farm.size")]
}

rm(indir1, file1, infile1); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Adjust synthetic farm sizes ----
# --- so they are integer multiples of the unit tile size.

# --- If a farm is too large ( > 5000 has or 250 times the area of the unit tile)
# --- then truncate adjusted size to about 5000 has (final size depends on tile area).
# --- If a farm is too small ( < 20 has) then increase adjusted area to minimum area
# --- (one unit tile).

#  tile.area <- 20
max.farm.size <-  250 * tile.area # 5000 has with a tile of 20 has
min.farm.size <- tile.area        # about 20 has

farm.table <- dplyr::tbl_df(farm.table) %>%
  dplyr::mutate(adjusted.farm.size = round((original.farm.size /
    tile.area), 0) * tile.area) %>%
  dplyr::mutate(adjusted.farm.size = ifelse(adjusted.farm.size >= max.farm.size,
    max.farm.size, adjusted.farm.size)) %>%
  dplyr::mutate(adjusted.farm.size = ifelse(adjusted.farm.size <= min.farm.size,
    min.farm.size, adjusted.farm.size)) %>%
  dplyr::rename(prov.code = province.id, dept.code = department.id)

if (any(farm.table$adjusted.farm.size %% tile.area != 0)) {
  stop('There are adjusted areas not integer multiples of tile area')
}

# --- Check the areas of all adjusted farm sizes against
# --- the total area of departments considered.

sum.farms <- sum(farm.table$adjusted.farm.size)  # Sum of adjusted sizes

sum.depts <- ceiling(sum(basin.depts$area) * 100)  # sum of department areas in km^2, expressed in has

if (sum.depts >= sum.farms) {
  cat("Sum of adjusted farm areas is lower than sum of total departments area...\n")
} else {
  stop("Sum of adjusted farm areas is HIGHER than sum of total departments area!\n")
}

rm(min.farm.size, max.farm.size, sum.farms, sum.depts); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Identify departments with problems in farm areas ----

# --- Identify those departments for which area of farms to simulate is
# --- larger or equal than the total department area.

ddd <- dplyr::tbl_df(farm.table) %>%
  dplyr::group_by(prov.code, dept.code) %>%
  dplyr::summarise(count.a = n(),
    sum.o = round(sum(original.farm.size), 0),
    sum.a = sum(adjusted.farm.size) )

dd2 <- dplyr::inner_join(ddd, dplyr::tbl_df(basin.depts)) %>%
  dplyr::mutate(dept.area = (area * 100),
    prop = round(sum.a / (area * 100), 3)) %>%
  dplyr::select(prov.code, prov.name, dept.code, dept.name,
    count.a, sum.o, sum.a, dept.area, prop) %>%
  dplyr::arrange(prov.code, count.a)

dd3 <- dplyr::filter(dd2, prop >= 0.97)

if (any(dd2$prop >= 1)) {
  warning('Farm areas > dept size for SOME depts...\n')
  print(dd3)
}

rm(ddd, dd2, dd3); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Make a dotchart with number of farms to be simulated by department ----
# --- Se usa un dotplot algo mas sofisticado, usando funcion dotchart2()
# --- en paquete Hmisc. Los departamentos se agrupan por provincia.

dept.dir <- "D:/ENSO/Projects/CNH3/data/shapes_depts/"

if (plot.results) { 
  dd1 <- dplyr::tbl_df(farm.table) %>%
    dplyr::group_by(dept.code) %>%
    dplyr::summarise(n.farms = n()) %>%
    dplyr::arrange(n.farms)
  
  dd3 <- dplyr::inner_join(dd1, dplyr::tbl_df(basin.depts)) %>%
    dplyr::select(dept.name, prov.name, n.farms) %>%
    dplyr::arrange(prov.name, desc(n.farms))
  
  dd3$dept.name <-stringr::str_to_title(dd3$dept.name)
  
  Cairo::CairoPNG(filename = paste0(dept0.dir,"synth_farms_dotplot.png"),
    width = 840, height = 940,
    pointsize = 12, bg = "white",  res = NA)
  
  Hmisc::dotchart2(dd3$n.farms,
    labels = dd3$dept.name,
    groups = dd3$prov.name,
    auxdata = dd3$n.farms,
    auxtitle="Synth\n farms",
    xlab="Number of farms",
    main="Number of synthetic farms - Salado A Basin",
    dotsize = 1.1,
    cex.labels = 0.6,
    cex.group.labels = 0.8,
    sort. = FALSE,
    pch = 16, col = "tomato")
  
  dev.off()
  rm(dd1, dd2, dd3); gc()
  
} # Fin de if que define si se grafican resultados
# ------------------------------------------------------------------------------


# --- Iterate over each department inside the Salado A basin

# for (iii in 1:n.of.depts) {

for (iii in 18:34) {

# iii <- 20

# -----------------------------------------------------------------------------#
# --- Read shape file for THIS department ----

dept.dir <- "D:/ENSO/Projects/CNH3/data/shapes_depts/"

oldPath <- getwd()
setwd(dept.dir)
cat("Reading shape file for department", dept.names[iii], "...\n")
dept.boundary <- rgdal::readOGR(".", layer = dept.names[iii], verbose = TRUE)
setwd(oldPath)

dept.data <- slot(dept.boundary, "data") # Data associated with THIS department

dept.name <- dept.names[iii]
dept.name.lc <- dept.names.lc[iii]

rm(oldPath); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#  
# --- Convert boundary for THIS department into GK and UTM coordinates (in meters) ----

dept.boundary.GK <- sp::spTransform(dept.boundary,
    CRS = CRS(gk.crs.string))

dept.boundary.UTM <- sp::spTransform(dept.boundary,
  CRS = CRS(utm.crs.string))

# --- Plot outer boundary of THIS department in GK coordinates
  
if (plot.results) {
  plot(dept.boundary.GK, axes=TRUE)
  plot(dept.boundary.GK, border="steelblue4",
    col=rgb(246/255, 232/255, 195/255),
    lwd=2, add=T)
  title(dept.name.lc)
}

# --- Plot outer boundary of THIS department in lat-lon coordinates

if (plot.results) {
  plot(dept.boundary, axes=TRUE, border="steelblue4",
    col=rgb(246/255, 232/255, 195/255))
  title(dept.name.lc)
}
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Calculate area of department in hectares ----
# --- Divide by 10,000 because 1 ha = 10,000 m^2
# --- Also, 1 km^2 = 100 ha

# area <- (sapply(slot(dept.boundary.GK,"polygons"),
#   function(x) {slot(x,"area")} )) / 10000.
  
area <- ceiling(rgeos::gArea(dept.boundary.GK) / 10000.)
  
cat(paste("Area of department", dept.name.lc, "is", area, "hectares...\n"))
cat(paste("Area of department", dept.name.lc, "is", area/100, "km^2...\n"))
# ------------------------------------------------------------------------------ 


# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
# --- NOW LET US GENERATE PSEUDO_FARMS
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#

# -----------------------------------------------------------------------------#
# --- Identify and select tiles inside THIS department ----

tiles.SP3 <- tiles.SP2[dept.boundary.GK, ]     # Tiles inside polygon

tile.IDs.this.dept <- sapply(slot(tiles.SP3, "polygons"),
  function(x) {slot(x,"ID")})

# --- Tiles NOT available within entire region.
# --- Some tiles inside THIS department may have been used
# --- while processing previous departments.

tiles.unavailable.all.region <- as.numeric(master.tile.list$tile.ID[
  master.tile.list$available == FALSE])

uu1 <- tile.IDs.this.dept %in% tiles.unavailable.all.region

if (any(uu1)) {
  # Must eliminate tiles from this dept used before...
  cat('SOME tiles in this department have been used previously...\n')
  cat('The number of tiles inside this department will be reduced accordingly.\n')
  tiles.SP3 <- tiles.SP3[!uu1]
} else {
  # No need to eliminate any tiles wiuthin this dept
  cat('ALL tiles inside department are available...')
}

n.tiles.in.dept <- length(tiles.SP3)  # Number of tiles inside polygon

# --- Plot tiles inside target region...

if (plot.results) {
  sp::plot(dept.boundary.GK, axes = TRUE, border = 'steelblue1')
  sp::plot(tiles.SP3, add = TRUE)
  title(dept.name.lc)
}
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Build table of farms (trimmed, if needed) for THIS department ----

# --- Select farms inside THIS department (the one we are working on)

tt1 <- match(dept.name, basin.depts$dept.name)
this.dept.code <- as.numeric(basin.depts[tt1, "dept.code"])

farm.table.this.dept <- dplyr::tbl_df(farm.table) %>%
  dplyr::filter(dept.code == this.dept.code) %>%
  dplyr::mutate(tiles.required = round((adjusted.farm.size / tile.area), 0))  %>%
  dplyr::arrange(desc(adjusted.farm.size))

n.sim.farms <- nrow(farm.table.this.dept)  # number of simulated farms for THIS dept

n.tiles.required.this.dept  <- sum(farm.table.this.dept$tiles.required)

# --- Check if number of tiles within THIS dept is sufficient
# --- to place all farms in the dept.
# --- The number of tiles needed for all farms is compared to 
# --- the value of 'n.tiles.in.dept', calculated in the previous step.
# --- This number of tiles lists those tiles AVAILABLE for this
# --- department, as some tiles may have been used when assembling farms
# --- for an adjacent department.

cat(paste('Number of tiles in THIS department:', n.tiles.in.dept,'\n'))
cat(paste('Number of tiles needed for farms:', n.tiles.required.this.dept,'\n'))

# Proportion of available tiles in dept that can assigned to farms
max.prop.tiles <- 0.97 

if (n.tiles.required.this.dept > n.tiles.in.dept) {
  # Are there sufficient tiles inside the department?
  warning("Number of tiles required is > tiles in THIS department")
  warning("List of farms MUST be adjusted to fit in department")
  
  cat('Trimming list of farms within THIS department...\n')

  farm.table.this.dept <- FilterFarms(farm.table.this.dept,
    farm.comparable.field = 'tiles.required',
    max.aggregated.value = n.tiles.in.dept,
    initial.seed = 0,
    max.rate = max.prop.tiles)
  
  n.sim.farms <- nrow(farm.table.this.dept)  # number of simulated farms for THIS dept
  cat(paste('Trimmed number of farms within THIS department:',
    n.sim.farms, '\n'))
  
} # End of if that checks if total number of tiles required is > tiles in department

if (plot.results) {
  hist(farm.table.this.dept$adjusted.farm.size,
    breaks = c(0,100,250,500, 1000, 2500, 5000.01),
    freq = FALSE, col = "thistle",
    xlab = "Simulated farm areas (ha)")
  box()
}

table(farm.table.this.dept$tiles.required)

rm(tt1, this.dept.code, max.prop.tiles)
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Build a list to store tile IDs used for each simulated farm ----
# --- The list has length equal to the number of farms to be simulated.

tile.IDs.used <- vector(mode = "list", length = n.sim.farms)
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Build a data frame with info on tiles in THIS department ----
# --- The data frame lists the IDs of tiles, their x- and y-coordinates.
# --- A fourth column indicates if tiles are available to be used
# --- to construct a farm. Initially all tiles are set to TRUE.

tile.IDs <- sapply(slot(tiles.SP3, "polygons"), function(x) {slot(x, "ID")})
tile.coords <- coordinates(tiles.SP3)

tiles.available.this.dept <- data.frame(tile.ID = tile.IDs,
  x.coord = tile.coords[, 1],
  y.coord = tile.coords[, 2],
  available = rep(TRUE, length.out = length(tile.IDs)),
  stringsAsFactors = FALSE)

tiles.available.this.dept.2 <- rep(TRUE, length.out = length(tile.IDs),
  stringsAsFactors = FALSE)
names(tiles.available.this.dept.2) <- tile.IDs
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Find the k nearest neighbors for each tile inside THIS department ----
# --- inside the region considered.
# --- The number of neighbors found (k) is equal to the highest number
# --- of tiles required (i.e., the number of tiles needed to simulate
# --- the largest synthetic farm) plus an extra (here, 10%).
# --- The calculation returns a matrix that has (a) a number of rows equal to
# --- the number of tiles inside the region and (b) k columns (for the k nearest neigbors).
# --- NOTE: the values of the elements of the matrix of nearest neighbors are the _indices_
# --- of tiles inthe input object.

cc3 <- spdep::knearneigh(tile.coords,
  k = ceiling(max(farm.table.this.dept[, "tiles.required"]) * 1.3),
  longlat = FALSE,
  RANN = TRUE); gc()

nn.indices <- cc3$nn    # Nearest neighbors that are available
row.names(nn.indices) <- tiles.available.this.dept[, "tile.ID"]

# --- We add one column at the beginning of the matrix of neighbors
# --- that includes the index of the tile for which neighbors
# --- are found.

nn.indices.with.base.tile <- cbind((1:nrow(nn.indices)), nn.indices)

rm(cc3);gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Iterate over the number of synthetic farms to be generated ----

set.seed(256)

for (i in 1:n.sim.farms) {
 
  ##  i <- 1

  cat(paste("Simulating farm", i, "of", n.sim.farms, "...\n"))
  sim.farm.area <- farm.table.this.dept[i, "adjusted.farm.size"]  # area of THIS simulated farm
  n.tiles <- as.numeric(farm.table.this.dept[i, "tiles.required"])  # how many unit tiles  required for THIS farm?
  cat("Tiles needed for this farm:", as.numeric(n.tiles),"\n")
  
  # Find IDs of tiles that are still available for allocation to a farm
  tile.IDs.still.available <- dplyr::filter(tiles.available.this.dept, available == TRUE)
  
  if (nrow(tile.IDs.still.available) < n.tiles) {
    # There are no tiles available for this farm
    stop(paste("No tiles available for farm", i, "of", nrow(farm.table.this.dept)))
  
  } else {
    
    uu0 <- tile.IDs.still.available[, "tile.ID"]
    cat("Tiles still available:", length(uu0), "...\n")
    
    if (n.tiles == 1) {
      # Only ONE tile needed for this farm...
            
      # --- ID for the first (and, here, only) tile in a simulated farm
      first.tile.ID <- sample(uu0, size = 1)
      
    } else {
      # More than one tile needed for this farm...
      
      n.neighbors.still.available <- apply(X = nn.indices.with.base.tile, MARGIN = 1,
        FUN = function(x) {length(x[!is.na(x)])})
      indices.tiles.with.enough.neighbors <- which(n.neighbors.still.available >= n.tiles)
      names(indices.tiles.with.enough.neighbors) <- NULL
      
      # --- ID for the first tile in a simulated farm
      
      first.tile.ID <- sample(as.numeric(
        tiles.available.this.dept[indices.tiles.with.enough.neighbors, "tile.ID"]),
        size = 1)
      
      # Select the first tile selected plus "n.tiles - 1" nearest neighbors.
      # Remember the first column in "nn.indices.with.base.tile" is the index of the
      # first tile selected.
      
    } # End of else for only one tile needed
 
    # --- Find out the row number for the tile ID selected
    first.tile.row <- which(tiles.available.this.dept[, "tile.ID"] == first.tile.ID)
    
    oo1 <- nn.indices.with.base.tile[first.tile.row, ]
    oo2 <- oo1[!is.na(oo1)]
    # Indices and positions of all tiles used for THIS farm
    all.tiles <- oo2[1:n.tiles]
    all.tiles.IDs <- as.numeric(tiles.available.this.dept[all.tiles, "tile.ID"])
     
  } # End of else for tiles still available

  # --- Now that we have finished selecting tiles for THIS synthetic farm
  # --- let's save the tiles used for it in a list.
  
  tile.IDs.used[[i]] <- all.tiles.IDs  # store points used for this farm
  
  # --- Mark those tiles used for THIS farm
  # --- as unavailable for use in other farms
  tiles.available.this.dept[all.tiles, "available"] <- FALSE
  
  # --- In the list of available neighbors, mark tiles used as unavailable (NA)
  
  for (k in 1:nrow(nn.indices.with.base.tile)) {
    uu1 <- nn.indices.with.base.tile[k, ] %in% all.tiles
    nn.indices.with.base.tile[k, which(uu1)] <- NA
  }
  
  gc()

}   # End of iterating over number of farms to be simulated

# --- Update the master tile list (for entire area) setting to FALSE
# --- column 'available' for those rows which have been used in this department.

uuu <- as.numeric(tiles.available.this.dept[tiles.available.this.dept$available == FALSE, 'tile.ID'])
uu2 <- match(uuu, as.numeric(master.tile.list$tile.ID))
master.tile.list[uu2, 'available'] <- FALSE

table(master.tile.list$available)

remove(oo1,oo2,first.tile.ID,first.tile.row,
       tile.IDs.still.available, uuu, uu2)
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Check that there are no duplicates among selected tile IDs ----

oo5 <- sort(as.numeric(unlist(tile.IDs.used)))
if (any(duplicated(oo5))) {stop("ERROR: Duplicated tile IDs...\n")}

oo5[which(duplicated(oo5))]

# --- Check that areas of generated farms coincide with those specified

ww1 <- lapply(tile.IDs.used, FUN=length)
ww2 <- unlist(ww1) * tile.area

if (any(ww2 != farm.table.this.dept[,"adjusted.farm.size"])) {
  stop("ERROR: Specified and simulated farm areas do not coincide...\n")
  ww3 <- which(ww2 != farm.table.this.dept[, "adjusted.farm.size"])
  #ww2[ww3]
  #sim.farms[ww3,"area"]
  rm(ww3)
}

remove(ww1,ww2,oo5); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Plot tiles used for each simulated farm and their center points ----


if (plot.results) {
  
  uu1 <- unlist(tile.IDs.used)
  uu2 <- which(uu1 %in% tile.IDs)
  
  library(RColorBrewer)
  farm.colors <- sample(brewer.pal(8, "Pastel2"),
    size = n.sim.farms, replace = TRUE)

  plot(dept.boundary.GK, axes=TRUE, asp=1)
  plot(dept.boundary.GK, lwd=2, border="steelblue4", add=T)
  title(dept.name.lc)
  
  for (i in 1:n.sim.farms) {
    ppp <- sort(tile.IDs.used[[i]])
    pp2 <- match(ppp, tile.IDs)
    plot(tiles.SP3[pp2, ], add=TRUE,
      col = farm.colors[i], border = farm.colors[i], lwd=0.01)
  }	# End of iteration through simulated farms
  
  remove(ppp, pp2, farm.colors)
  
}   # End of "if plot.results"

rm(uu1, uu2); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Merge all unit polygons for each farm into a single polygon ----
# --- the unionSpatialPolygons() function produces an object of
# --- SpatialPolygon class, so we extract the polygon.

list.of.polygons <- vector("list", length = n.sim.farms)  # store farm polygons here

for (i in 1:n.sim.farms) {
  
  #  i <- 25
  sim.farm.ID <- as.numeric(farm.table.this.dept[i, "farm.id"])
  
  cat("Working on farm",i,"of", n.sim.farms,"... Farm ID:", sim.farm.ID,"...\n")
 
  pp1 <- sort(as.numeric(tile.IDs.used[[i]]))	# Tile IDs used for THIS farm
  pp2 <- match(pp1, as.numeric(tiles.available.this.dept[, "tile.ID"]))	# Row numbers in tiles object
  
  pp3 <- maptools::unionSpatialPolygons(tiles.SP3[pp2],
    IDs = as.numeric(rep(sim.farm.ID, times = length(pp2))),
    threshold = NULL)
  
  chk <- sapply(slot(pp3, "polygons"), function(x) slot(x, "ID"))
  
  if (length(chk) != 1)
    stop("ERROR: More than one polygon...")
  
  pp5 <- pp3@polygons   			# extract polygon
  list.of.polygons[i] <- pp5  # store polygon for THIS farm into list
  
}	# End of iterating through simulated farms to merge all unit polygons

remove(pp1, pp2, pp3, pp5, chk); gc()
remove(nn.indices, tile.coords, tiles.available.this.dept); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Assemble polygons for all simulated farms ----
# --- into a single SpatialPolygons object

farms.SP <- SpatialPolygons(list.of.polygons)
slot(farms.SP, "proj4string") <- CRS(gk.crs.string)

# --- In some cases, the same polygon may have two Polygons inside
# --- after performing the union of the individual SpatialPolygons.
# --- This may mean a farm formed by tiles that are geographically disjoint.
# --- WARNING: There may be issues with calculation of centroids in these cases.

nn1 <- sapply(farms.SP@polygons, function(x) {length(slot(x,"Polygons")) })
nn2 <- which(nn1 > 1)

# --- Check if more than one polygon with same ID

chk <- sapply(slot(farms.SP, "polygons"), function(x) slot(x, "ID"))
chk2 <- any(duplicated(chk))
if (chk2) stop("ERROR: More than one polygon in farms.SP...\n")

remove(nn1,nn2,chk,chk2,list.of.polygons); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Convert polygon coordinates from Gauss-Kruger to lat-lon and UTM ----

# Latitude and longitude

farms.SP2 <- sp::spTransform(farms.SP, CRS(ll.crs.string))

# UTM

farms.SP3 <- sp::spTransform(farms.SP, CRS(utm.crs.string))
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Find centroids for farms in disjoint polygons ----
# --- There are a few farms that involve disjointed polygons (see above).
# --- In these cases, the centroid does not coincide with the "labpt" slot.

# --- First do it in Gauss-Kruger coordinates

farms.centroids.SP <- rgeos::gCentroid(farms.SP, byid = TRUE)

# --- Now do it in lat-lon coordinates

farms.centroids.SP2 <- rgeos::gCentroid(farms.SP2, byid = TRUE)

# --- Now do it in UTM coordinates

farms.centroids.SP3 <- rgeos::gCentroid(farms.SP3, byid = TRUE)
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Plot simulated farms ----
# --- using Spatial Polygons objects generated so far...

if (plot.results) {
  
  library(RColorBrewer)
  farm.colors <- sample(brewer.pal(8, "Pastel2"), size=n.sim.farms, replace=TRUE)
  
  # --- First plot in GK coordinates
  
  plot(dept.boundary.GK, axes = TRUE, border = "steelblue", lwd = 2)
  plot(farms.SP, lwd = 0.2, col = farm.colors, border = "darkgrey", add = TRUE)
  plot(farms.centroids.SP, add=TRUE, pch=16, cex=0.3, col="tomato")
  plot(dept.boundary.GK, add = TRUE, border = "steelblue", lwd = 2)
  title(paste("Synthetic Farms -", dept.name.lc))
  
  # --- Then plot in lat-lon coordinates
  
  plot(dept.boundary, axes = TRUE, border = "steelblue", lwd = 2)
  plot(farms.SP2, lwd = 0.2, col = farm.colors, border = "darkgrey", add = TRUE)
  plot(farms.centroids.SP2, add=TRUE, pch=16, cex=0.5, col="tomato")
  plot(dept.boundary, add = TRUE, border = "steelblue", lwd = 2)
  title(paste("Synthetic Farms -", dept.name.lc))
  
  # --- Then plot in UTM coordinates
  
  plot(dept.boundary.UTM, axes = TRUE, border = "steelblue", lwd = 2)
  plot(farms.SP3, lwd = 0.2, col = farm.colors, border = "darkgrey", add = TRUE)
  plot(farms.centroids.SP3, add=TRUE, pch=16, cex=0.5, col="tomato")
  plot(dept.boundary.UTM, add = TRUE, border = "steelblue", lwd = 2)
  title(paste("Synthetic Farms -", dept.name.lc))
  
  #farm.id <- sapply(slot(farms.SP2, "polygons"), function(x) {slot(x, "ID")})
  #pp1 <- t(sapply(slot(farms.SP2, "polygons"), function(x) {slot(x, "labpt")}))
  #text(pp1, labels=farm.id, cex=0.4)
  
}

gc()
# ------------------------------------------------------------------------------


# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
# --- UP TO THIS POINT WE HAVE CREATED SPATIAL POLYGONS
# --- WITH SYNTHETIC FARM BORDERS.
# --- NOW WE NEED TO ASSEMBLE A DATA FRAME THAT WILL GO WITH THE SPATIAL DATA
# --- AND THAT WILL HAVE ADDITIONAL INFO ABOUT EACH SYNTHETIC FARM.
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#

# -----------------------------------------------------------------------------#
# --- Create an empty data frame to store info about each simulated farm ----

farms.DF <- data.frame(farmId = rep(NA, n.sim.farms),
  eapId = rep(NA, n.sim.farms),
  regnId = rep(NA, n.sim.farms),
  provId = rep(NA, n.sim.farms),
  provnm = rep(NA, n.sim.farms),
  deptId = rep(NA, n.sim.farms),
  deptnm = rep(NA, n.sim.farms),
  ownerId = rep(NA, n.sim.farms),
  orgFrmSz = rep(NA, n.sim.farms),
  adjFrmSz = rep(NA, n.sim.farms),
  GKlon = rep(NA, n.sim.farms),
  GKlat = rep(NA, n.sim.farms),
  deglon = rep(NA, n.sim.farms),
  deglat = rep(NA, n.sim.farms),
  UTMlon = rep(NA, n.sim.farms),
  UTMlat = rep(NA, n.sim.farms),
  perim = rep(NA, n.sim.farms),
  soilUnit = rep(NA, n.sim.farms),
  unitType = rep(NA, n.sim.farms),
  prodIndx = rep(NA, n.sim.farms),
  pctgSl1 = rep(NA, n.sim.farms),
  sgrpSl1 = rep(NA, n.sim.farms),
  pctgSl2 = rep(NA, n.sim.farms),
  sgrpSl2 = rep(NA, n.sim.farms),
  pctgSl3 = rep(NA, n.sim.farms),
  sgrpSl3 = rep(NA, n.sim.farms),
  creaGpId = rep(NA, n.sim.farms),
  advsrId= rep(NA, n.sim.farms) )
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Start filling in data frame with info about each simulated farm ----

# --- First, complete information using the synthetic farms' Spatial Polygons objects.

farms.DF[, "farmId"] <- as.numeric(sapply(slot(farms.SP2, "polygons"),
  function(x) {slot(x, "ID")}))
farms.DF[, "adjFrmSz"] <- sapply(slot(farms.SP, "polygons"),
  function(x) {slot(x, "area")}) / 10000
# Extract Gauss-Kruger coordinates for farm centroids from farms.centroids.SP3 object
farms.DF[, "GKlon"] <- round(coordinates(farms.centroids.SP)[,1], 0)
farms.DF[, "GKlat"] <- round(coordinates(farms.centroids.SP)[,2], 0)
# Extract lat-lon coordinates for farm centroids from farms.centroids.SP2 object
farms.DF[, "deglon"] <- round(coordinates(farms.centroids.SP2)[,1], 3)
farms.DF[, "deglat"] <- round(coordinates(farms.centroids.SP2)[,2], 3)
# Extract UTM coordinates for farm centroids from farms.centroids.SP object
farms.DF[, "UTMlon"] <- coordinates(farms.centroids.SP3)[,1]
farms.DF[, "UTMlat"] <- coordinates(farms.centroids.SP3)[,2]

# --- Second, complete  information using "farm.table.this.dept"

tt11 <- farm.table.this.dept$farm.id
tt12 <- match(farms.DF[,"farmId"], tt11)

farms.DF[, "eapId"] <- farm.table.this.dept[tt12,"eap.id"]									# Fill in EAP ID
farms.DF[, "regnId"] <- farm.table.this.dept[tt12,"region.id"]							# Fill in region ID
farms.DF[, "ownerId"] <- farm.table.this.dept[tt12,"owner.id"]							# Fill in farm owner ID
farms.DF[, "orgFrmSz"] <- round(farm.table.this.dept[tt12,"original.farm.size"], 1)		# Fill in farm size in original table

remove(tt11, tt12); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Calculate perimeter of each farm and store in data frame ----
# --- To do this, we must convert data into polygons used in PBSmapping library.
# --- In some cases, a farm includes separate polygons (e.g., a separate farm lot), so we need
# --- to sum up perimeters for all polygons coresponding to a given farm ID.

farms.PS <- SpatialPolygons2PolySet(farms.SP)			# Convert spatialpolygons into PBS PolySet
farms.perimeter <- calcLength(farms.PS, rollup=1)	# calculate perimeter (in meters)
farms.perimeter2 <- tapply(farms.perimeter[,"length"], INDEX=farms.perimeter[,"PID"], FUN=sum)	# add up perimeter for isolated polygons

farms.DF[, "perim"] <- as.vector(round(farms.perimeter2, 0))
# table(farms.DF$adjFrmSz, farms.DF$perim)

remove(farms.PS, farms.perimeter, farms.perimeter2); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Fetch province corresponding to each farm centroid ----
# --- First, read CORRECTED shape files with Argentine provinces

prov.dir <- "D:/ENSO-Data/Other Data/ARG_provinces/"
prov.file <- paste(prov.dir, "provs.corrected.shp", sep="")
if (!exists("prov.file")) {
  stop("ERROR: Specified SHP file for provinces", prov.file, "does not exist...\n") }

oldPath <- getwd()
setwd(prov.dir)
pp1 <- readOGR(".", layer="provs.corrected", verbose = TRUE)
pp1 <- spTransform(pp1, CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))
setwd(oldPath)

# --- Isolate the data part of provinces file

pp2 <- pp1@data

# --- Use centroids of farms to extract province data

pp6 <- over(farms.centroids.SP2, pp1)

farms.DF[, "provId"] <- as.numeric(pp6[, "indec.code"])		# Fill in INDEC province code
farms.DF[, "provnm"] <- as.character(pp6[,"prov.name"])		# Fill in province name

remove(prov.dir, prov.file, oldPath, pp1, pp2, pp6); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Fetch department corresponding to each farm centroid ----
# --- First, read CORRECTED shape files with Argentine departments

dept.dir <- "D:/ENSO-Data/Other Data/ARG_departments/"
dept.file <- paste(dept.dir, "deptscorrected.shp", sep="")
if (!exists("dept.file")) {
  stop("ERROR: Specified SHP file for departments", dept.file, "does not exist...\n") }

oldPath <- getwd()
setwd(dept.dir)
pp1 <- readOGR(".", layer="deptscorrected", verbose = TRUE)
pp1 <- spTransform(pp1, CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))
setwd(oldPath)

# --- Isolate the data part of department file

pp2 <- pp1@data

# --- Use centroids of farms to extract department data

pp6 <- over(farms.centroids.SP2, pp1)

dept.code1 <- as.numeric(pp6[, "code"])
dept.name1 <- as.character(pp6[, "dept"])

# --- Convert all upper case characters to upper and lower case

capwords <- function(s, strict = FALSE) {
  cap <- function(s) paste(toupper(substring(s,1,1)),
  {s <- substring(s,2); if(strict) tolower(s) else s},
  sep = "", collapse = " " )
  sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s)))
}

dept.name1 <- capwords(dept.name1, strict=TRUE)

farms.DF[, "deptId"] <- dept.code1		# Fill in department code
farms.DF[, "deptnm"] <- dept.name1		# Fill in department name

remove(dept.dir, dept.file, oldPath, pp1, pp2, pp6, dept.code1, dept.name1); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Add soil data for each synthetic farm from INTA soil atlas ----

# --- First, read data files downloaded from INTA.
# --- URL: http://geointa.inta.gov.ar/publico/INTA_SUELOS/suelos_500000_v8.zip.
# --- These files have soils by province, NOT by department.
# --- WARNING: "soils by department" file has different column names.

soils.dir <- "D:/ENSO-Data/Other Data/ARG_soils_by_province/"
oldPath <- getwd()
setwd(soils.dir)
soils <- readOGR(".", layer="suelos_500000_v9", verbose = TRUE)
soils <- spTransform(soils, CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))
setwd(oldPath)

# --- Get table with soil data

tt1 <- soils@data

# --- Extract soil data for farm centroids

pp7 <- over(farms.centroids.SP2, soils)

# --- Fill in dataframe with soil data

farms.DF[, "soilUnit"] <- as.character(pp7[, "SIMBC"])
farms.DF[, "unitType"] <- as.character(pp7[, "TIPO_UC"])
farms.DF[, "prodIndx"] <- as.numeric(pp7[, "IND_PROD"])
farms.DF[, "pctgSl1"] <- as.numeric(pp7[, "PORC_SUE1"])
farms.DF[, "pctgSl2"] <- as.numeric(pp7[, "PORC_SUE2"])
farms.DF[, "pctgSl3"] <- as.numeric(pp7[, "PORC_SUE3"])
farms.DF[, "sgrpSl1"] <- as.character(pp7[, "SGRUP_SUE1"])
farms.DF[, "sgrpSl2"] <- as.character(pp7[, "SGRUP_SUE2"])
farms.DF[, "sgrpSl3"] <- as.character(pp7[, "SGRUP_SUE3"])

remove(soils.dir,oldPath,soils,tt1,pp7); gc()
# ------------------------------------------------------------------------------


# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#
# --- WE ARE DONE WITH GENERATION OF THE ANCILLARY RECORDS FOR EACH FARM.
# --- NOW JOIN THE POLYGONS WITHIN THE DATA FRAME AND SAVE THE RESULTS.
# -----------------------------------------------------------------------------#
# -----------------------------------------------------------------------------#

# -----------------------------------------------------------------------------#
# --- Reorder rows in farms data frame ----
# ---- so they coincide with the order of farm IDs in polygons.

tt1 <- order(farms.DF[, "farmId"])
farms.DF <- farms.DF[tt1, ]

IDs <- as.numeric(sapply(slot(farms.SP, "polygons"), function(x) slot(x, "ID")))

pp1 <- match(IDs, farms.DF$farmId)

farms.DF <- farms.DF[pp1, ]
row.names(farms.DF) <- as.character(farms.DF[, "farmId"])

remove(tt1, pp1, IDs); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Create Spatial Polygons Data Frame objects in various coordinates ----

farms.SPDF <- SpatialPolygonsDataFrame(farms.SP, farms.DF)

# --- Check that polygon IDs and farm IDs in data slot coincide

tile.IDs.this.dept <- sapply(slot(farms.SPDF, "polygons"),
  function(x) {slot(x,"ID")})

uu0 <- as.numeric(tile.IDs.this.dept) == farms.SPDF$farmId
if (any(! uu0)) {
  Log.error('Polygon IDs do not coincide with farm ID in data slot')
}

# --- Convert coordinates from GK (meters) back to lat-lon
# --- and create a second Spatial Polygons Data Frame.

farms.SPDF2 <- spTransform(farms.SPDF, CRSobj	= CRS(ll.crs.string) )

# --- Convert coordinates from Gauss-Kruger (meters) to UTM coordinates
# --- and create a new Spatial Polygons Data Frame.

farms.SPDF3 <- spTransform(farms.SPDF, CRS(utm.crs.string) )

rm(tile.IDs.this.dept, uu0)
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Write out created shape files using rgdal library ----

synth.farms.outdir <- "D:/ENSO/Projects/CNH3/data/campos_sinteticos/"
tt0 <- dir.create(synth.farms.outdir, showWarnings = TRUE)

# --- Write out polygons with lat-lon coordinates
synth.farms.outdir.ll <- paste0(synth.farms.outdir, 'latlon_coords/')
tt0 <- dir.create(synth.farms.outdir.ll, showWarnings = TRUE)

oldPath = getwd()
setwd(synth.farms.outdir.ll)

outfile <- paste(dept.names.noblanks[iii],"_synth_farms", sep = "")

rgdal::writeOGR(farms.SPDF2, dsn=".", layer=outfile, driver="ESRI Shapefile",
         overwrite_layer=TRUE, verbose = TRUE)

setwd(oldPath)

# --- Now write out polygons with Gauss-Kruger coordinates

synth.farms.outdir.GK <- paste0(synth.farms.outdir,'GK_coords/')
tt0 <- dir.create(synth.farms.outdir.GK, showWarnings = TRUE)

oldPath = getwd()
setwd(synth.farms.outdir.GK)

outfile <- paste(dept.names.noblanks[iii],"_synth_farms_GK", sep = "")

rgdal::writeOGR(farms.SPDF, dsn=".", layer=outfile, driver="ESRI Shapefile",
    overwrite_layer=TRUE, verbose = TRUE)

setwd(oldPath)

rm(synth.farms.outdir, outfile); gc()
# ------------------------------------------------------------------------------

# -----------------------------------------------------------------------------#
# --- Write out filled-in farm table to output text file ----

synth.farms.tables.outdir <- "D:/ENSO/Projects/CNH3/data/campos_sinteticos/tablas_datos/"
tt0 <- dir.create(synth.farms.tables.outdir, showWarnings = TRUE)

oldPath = getwd()
setwd(synth.farms.tables.outdir)

farm.outfile <- paste(dept.names.noblanks[iii],"_synth_farm_data.txt", sep = "")
write.table(farms.DF, file = farm.outfile,
            append = FALSE, sep = "\t", row.names=FALSE)

setwd(oldPath)
rm(synth.farms.tables.outdir, farm.outfile); gc()
# ------------------------------------------------------------------------------

}   # END of iteration over depts in Salado Basin


# -----------------------------------------------------------------------------#
# --- Write out master tile list to text file brefore ending this run ----

# --- The 'master.tile.list' object is written after a run is finished
# --- and is read before another run begins. In this way, a run that starts
# --- can knw if a tile has been used previously.

master.list.outfile <- paste(master.tile.outdir,"master_tile_data.txt", sep = "")
write.table(master.tile.list, file = master.list.outfile,
  append = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)

rm(master.tile.outdir, master.list.outfile); gc()
# ------------------------------------------------------------------------------






# ------------------------------------------------------------------------------
# --- Clean up all objects ----

remove(list = ls()); gc()
# ------------------------------------------------------------------------------













depts.en.cuenca <- c("9 DE JULIO","ADOLFO ALSINA","BOLIVAR","BRAGADO",
  "CARLOS CASARES","CARLOS TEJEDOR","COLON",
  "DAIREAUX","FLORENTINO AMEGHINO","GENERAL ARENALES",
  "GENERAL PINTO","GENERAL VIAMONTE","GENERAL VILLEGAS","GUAMINI",
  "HIPOLITO YRIGOYEN","JUNIN","LEANDRO N. ALEM", "LINCOLN","PEHUAJO",
  "PELLEGRINI","RIVADAVIA","SALLIQUELO","TRENQUE LAUQUEN","TRES LOMAS",
  "GENERAL ROCA","PRESIDENTE ROQUE SAENZ PENA",
  "ATREUCO","CATRILO","CHAPALEUFU","CONHELO",
  "MARACO","QUEMU QUEMU","TRENEL",
  "GENERAL LOPEZ")




















ccc <- rgeos::gWithin(urban.areas.utm, cuenca.A.UTM, byid = TRUE)
cc2 <- which(ccc)

plot(cuenca.A.UTM, axes = TRUE)
plot(urban.areas.utm[cc2, ], add=TRUE, border = "tomato")

urban.areas.utm@data[cc2, ]

uuu <- (sapply(slot(urban.areas.utm,"polygons"),
               function(x) slot(x,"area"))) / 10000

ccc <- rgeos::gWithin(water.areas, cuenca.A.ll, byid = TRUE)
cc3 <- which(ccc)


plot(water.areas[cc3, ], add=TRUE, border = "steelblue")
plot(road.areas[ , ], add=TRUE, col = "orange")

ppp <- GSIF::getSpatialTiles(cuenca.A.ll, block.x = 0.5, block.y = 0.5, 
                             overlap.percent = 0, limit.bbox = TRUE,
                             return.SpatialPolygons = TRUE)





# --- If the farms do not fit inside the department, because there
# --- not enought available tiles, the number of synthetic farm
# --- needs to be reduced until they fit.
# --- First, we compute 95% of all tiles (not all tiles in a department
# --- are occupied by farms).
# --- Then, original list of synthetic farms is trimmed until the
# --- number of tiles needed reaches the number of available tiles.
# --- The trimming proceeds form the largest farm towards smaller farms.

tt1 <- floor(n.tiles.in.dept * 0.95)  # 95% of tiles in department
tt2 <- cumsum(sim.farms$tiles)        # Cumulative sum of tiles needed
tt3 <- which(tt2 <= tt1)        # List of farms (from largest) that fit inside department

# --- Now sim.farms will be trimmed to eliminate farms that
# --- do not fit inside department (insufficient tiles).
# --- Smallest farms are deleted first until the sum of areas reaches
# --- 95% of tiles inside department.

sim.farms <- sim.farms[tt3, ]   # Delete smallest farms

# --- Adjust these vectors after trimming farms

sim.farm.areas <- sim.farm.areas[tt3] # Adjusted farm size (multiples of point area)
sim.farm.IDs <- sim.farm.IDs[tt3]     # IDs of simulated farms
n.sim.farms <- nrow(sim.farms)    		# number of simulated farms

# --- Plot cumulative number of tiles and tiles in department

if (plot.results) {
  plot(tt2, type="l", col="steelblue",
    xlab="Number of farms",
    ylab="Cumulative number of tiles",
    main = dept.name.lc)
  abline(h = c(n.tiles.in.dept, n.tiles.in.dept * 0.95), col="grey60")
} 
rm(tt1,tt2,tt3)

} #End of check for number of needed tiles > tiles in department

remove(ww1,tiles.required, total.tiles.required, n.tiles.in.dept, uuu); gc()
# ------------------------------------------------------------------------------






# -----------------------------------------------------------------------------#
# --- Function to reduce number of synthetic farms if necessary ----

FilterFarms <- function(synth.farms, max.area.rate = 0.95) {
  
  filtered.synth.farms <- NULL
  
  FilterFarmsPerDept <- function(dept, synth.farms, max.area.rate) {
    dept.farms    <- synth.farms[synth.farms$dept.code == dept$dept.code, ]
    adjusted.area <- sum(dept.farms$adjusted.farm.size)
    
    while (adjusted.area > (max.area.rate * dept$area)) {
      row.number    <- sample(x = seq(from = 1, to = nrow(dept.farms)),
        size=1)
      adjusted.area <- sum(dept.farms$adjusted.farm.size)
      dept.farms    <- dept.farms[-c(row.number),]
    }
    
    if (is.null(filtered.synth.farms)) {
      filtered.synth.farms <<- dept.farms
    } else {
      filtered.synth.farms <<- rbind(filtered.synth.farms, dept.farms)
    }
  }
  
  depts.df  <- unique(synth.farms[c("dept.code", "area")])
  depts.lol <- apply(depts.df, MARGIN=1, FUN=as.list)
  lapply(depts.lol, FUN=FilterFarmsPerDept, synth.farms=synth.farms,
    max.area.rate=max.area.rate)
  return (filtered.synth.farms)
}
# ------------------------------------------------------------------------------


# -----------------------------------------------------------------------------#
# --- Reduce number of farms if their total area is > dept area ----

ddd <- dplyr::inner_join(farm.table, dplyr::tbl_df(basin.depts)) %>%
  dplyr::select(-c(dept.head:cent.lat, area.igm, perim)) %>%
  dplyr::mutate(area = area * 100) %>%
  dplyr::arrange(prov.code, dept.code, farm.id) %>%
  dplyr::arrange(prov.code, dept.code, owner.id, farm.id)

set.seed(128)
farm.table <- FilterFarms(ddd, max.area.rate = 0.95)

dd3 <- dplyr::tbl_df(ddd) %>%
  dplyr::group_by(dept.code, dept.name) %>%
  dplyr::summarise(total.orig = sum(adjusted.farm.size))

dd4 <- dplyr::tbl_df(farm.table) %>%
  dplyr::group_by(dept.code) %>%
  dplyr::summarise(total.adj = sum(adjusted.farm.size))

dplyr::inner_join(dd3, dd4) %>%
  dplyr::filter(total.orig != total.adj)

rm(ddd, dd2, dd3, dd4)
# ------------------------------------------------------------------------------

nacol <- function(spdf) {
  resample <- function(x, ...) x[sample.int(length(x), ...)]
  nunique <- function(x){unique(x[!is.na(x)])}
  
  np = nrow(spdf)
  adjl = spdep::poly2nb(spdf)
  cols = rep(NA, np)
  cols[1]=1
  nextColour = 2
  
  for(k in 2:np){
    adjcolours = nunique(cols[adjl[[k]]])
    if(length(adjcolours)==0){
      cols[k]=resample(cols[!is.na(cols)],1)
    }else{
      avail = setdiff(nunique(cols), nunique(adjcolours))
      if(length(avail)==0){
        cols[k]=nextColour
        nextColour=nextColour+1
      }else{
        cols[k]=resample(avail,size=1)
      }
    }
  }
  return(cols)
}
schmidtfederico/glmwgen documentation built on May 29, 2019, 3:41 p.m.