R/gen_lookup_tax.R

Defines functions gen_lookup_tax

Documented in gen_lookup_tax

#' This function generates the lookup table with a tax policy.
#' @param tax_amount                     is the amount of tax on the unit of groundwater extracted. Defaults to 1.1.
#' @param DSSAT_files                    is the directory where DSSAT files are located. Defaults to "C:/Users/manirad/Dropbox/DSSAT subregions work pre-2018 annual meeting/subregion KS files/outputs_for_econ/revised".
#' @param soil_file                      is the file that contains base soil file. Defaults to "C:/Users/manirad/Downloads/test/Well_Soil Type_generator_07.csv".
#' @param well_capacity_file             is the file that contains base well capacity. Defaults to "C:/Users/manirad/Downloads/test/Well_Capacity_ganarator.csv".
#' @param price_file                     is the file that includes crop prices. Defaults to "C:/Users/manirad/Dropbox/DSSAT subregions work pre-2018 annual meeting/subregion KS files/crop_prices.csv".
#' @param fixed_cost_file                is the file tha includes fixed costs (per acre) costs of production. Defaults to "C:/Users/manirad/Downloads/test/fixed_cost_input.csv".
#' @param pumping_cost                   is the cost of pumping an acre-inch of groundwater excluding taxes. Defaults to 3.56 which is the cost of pumpin an acre-inch of groundwater in parts of Kansas.
#' @param default_well_capacity_col_name is the name of the well capacity column generated from the MODFLOW model. Defaults to 'Well_Capacity(gpm)' as this is the original column name we started with.
#' @param soil_moisture_targets          is the vector of soil moisture targets that are generated by the DSSAT model. (25, 35, 45, 55, 65, 75).
#' @param IFREQ_seq                      is the difference between two consequtive irrigation frequencies (IFREQ) in DSSAT. Defaults to 2.
#' @param IFREQ_interpolate              is the size of interpolation interval. Defaults to 0.1 so that IFREQ_seq of 2 adds 0, 0.1, 0.2, ..., 1.9.
#' @return                               returns the output table.
#' @examples
#' \dontrun{
#' gen_lookup_tax(tax_amount = 2)
#' }
#' @export
gen_lookup_tax = function(tax_amount              = 1.1,
                          DSSAT_files         = "./input_files/DSSAT_files",
                          soil_file           = "./input_files/Well_Soil Type_generator_07.csv",
                          well_capacity_file  = "./input_files/Well_Capacity_ganarator.csv",
                          price_file          = "./input_files/crop_prices.csv",
                          fixed_cost_file     = "./input_files/fixed_cost_input.csv",
                          pumping_cost = 3.56,
                          default_well_capacity_col_name = "Well_Capacity(gpm)",
                          soil_moisture_targets = c(25, 35, 45, 55, 65, 75),
                          IFREQ_seq = 2,
                          IFREQ_interpolate = .1){
  library(data.table)
  tax_amount = tax_amount - 0.1
  # tax_amount = 1
  print(paste("this is the tax:", tax_amount, sep = " "))


  #========================================================================================
  # # # # # # # #                       Input files                        # # # # # # # #
  #========================================================================================
  #----------------
  # 1) DSSAT data
  #----------------
  # the output of DSSAT is a bit strange and it needs some cleaning.
  # there are some header rows which result in the columns not having names assigned. The code below is solving that issue to have the data formatted cleanly.
  # Notice that FNAM....  column is the key here. The way it is written now, it breaks column V9 which includes both PAW and IFREQ to two separate columns.

  # these are the actual column names that will be assigned:
  col_new = c("RUNNO", "TRNO", "R_pound", "O_pound", "C_pound", "CR", "MODEL", "EXNAME", "FNAM", "WSTA", "SOIL_ID", "SDAT", "PDAT", "EDAT", "ADAT", "MDAT", "HDAT", "DWAP", "CWAM",
              "HWAM", "HWAH", "BWAH", "PWAM", "HWUM", "H_AM", "H_UM",  "HIAM",  "LAIX",  "IR_M" , "IRCM",  "PRCM",  "ETCM",  "EPCM",  "ESCM",  "ROCM",  "DRCM",  "SWXM",  "NI_M",  "NICM",
              "NFXM",  "NUCM",  "NLCM",  "NIAM",  "CNAM",  "GNAM", "N2OEC",  "PI_M",  "PICM",  "PUPC",  "SPAM",  "KI_M",  "KICM",  "KUPC",  "SKAM",  "RECM",  "ONTAM",   "ONAM",  "OPTAM",
              "OPAM",   "OCTAM",    "OCAM",   "CO2EC",    "DMPPM",    "DMPEM",    "DMPTM",    "DMPIM",     "YPPM",     "YPEM",     "YPTM",     "YPIM",    "DPNAM",    "DPNUM",    "YPNAM",
              "YPNUM",  "NDCH", "TMAXA", "TMINA", "SRADA", "DAYLA",   "CO2A",   "PRCP",   "ETCP",   "ESCP",   "EPCP", "PAW", "IFREQ")


  # note: each file includes 1 crop-soil type-climate and it includes different PAW and IFREQ's
  filenames = list.files(path = DSSAT_files , pattern="*.OSU", full.names=TRUE)

  # read all the OSU files and remove NA's and fix the header issues
  ldf <- lapply(filenames, read.table, fill=T)                                       # these few lines are a bit complicated, but they read in all the files at the same time
  ldf <- lapply(ldf, function(x) x[-2,])                                             # remove the NA rows that will be generated becase of DSSAT formatting
  ldf <- mapply(cbind, ldf, "file_name"=filenames, SIMPLIFY=F)                       # and put them back together as a data.tabale rather than a list
  KS_DSSAT = data.table::rbindlist(ldf)                                                          # KS_DSSAT is the dataset we will be working on
  KS_DSSAT = data.table(KS_DSSAT)
  KS_DSSAT = KS_DSSAT[complete.cases(V85)]                                           # remove other potential NA's
  KS_DSSAT[, file_name:= NULL]                                                       # we don't need the file names
  rm(ldf)                                                                            # ldf was a auxilary thing so remove ldf here

  # break down V9 into IFREQ and PAW
  KS_DSSAT[, V9 := as.character(V9)]                                                 # make it a character vector
  KS_DSSAT[, PAW := substr(V9, 1, regexpr("_", V9)-2)]                               # create PAW from V9 by subsetting
  KS_DSSAT[, IFREQ := substr(V9, regexpr("Q", V9)+2, nchar(V9))]                     # create IFREQ from V9 by subsetting
  KS_DSSAT[, V9 := NULL]                                                             # we don't need V9 anumore

  data.table::setnames(KS_DSSAT, old = colnames(KS_DSSAT), new = col_new)                        # let's change the VXX col names to col_new

  ##### From here, it will loop over the soil types that you have for your region. Let's setup some basics:

  unique_soil = KS_DSSAT[, unique(SOIL_ID)]                                          # unique_soil is the unique soil types in the area... there are 7 soil types in Finney county

  KS_DSSAT_2 = data.table::copy(KS_DSSAT)                                                        # we need a copy of KS_DSSAT for later use because KS_DSSAT will be modified and we need the original version


  lookup_table_all_years_2 = data.table::data.table()                                            # 1) we need to create some empty data.tables. Otherwise, R won't recognize them.
  lookup_table_quarter_2   = data.table::data.table()                                            # 2) these will store outputs of each run for each soil type
  lookup_table_well_2      = data.table::data.table()                                            # 3) and they will be the outputs that we will use in Econ_model_annual_meeting.R

  for (i in 1:length(unique_soil)) { # the loop starts here.

    #----------------
    # 2) Well capacity data
    #----------------
    # we want to generate profit-maximizing decisions for each well capacity and each soil type. To do se, we want to generate a look up table that provides us with profit-maximizing
    # decisions for each well capacity (notice that the loop is over soil types, so we are conditioning each run of the for loop on soil type.)

    soil_type     = data.table::fread(soil_file)                  # this is just a tempelate Well_ID Soil_Type table. The value that is 07 doesn't matter. We will change it. The important thing is that Well_ID's go from 1001 to 3901 capturing a reasonable range of capacities for irrigation: 100-3000 gpm.
    soil_type[, Soil_Type := unique_soil[i]]

    ### ### ### @: this needs to be updated

    soil_type[, Soil_Type:= gsub("KSFC00000", "KS0000000", Soil_Type)]        # this is about naming soil types. Maybe different in your area. In KS hydrology and DSSAT had different names for soil types. This is trying to unify them.

    well_capacity = data.table::fread(well_capacity_file)                      # read in well capacity for each well by well id. This is the 100-3000 gpm range connected to Well_ID

    data.table::setkey(soil_type, Well_ID)                                                # set key is needed for merging two data.tables. We want to merge them by their Well_ID
    data.table::setkey(well_capacity, Well_ID)                                            # set key is needed for merging two data.tables. We want to merge them by their Well_ID

    well_capacity_data = soil_type[well_capacity]                             # Now we have soil type and well capacity combination
    data.table::setnames(well_capacity_data, default_well_capacity_col_name, "Well_capacity")       # change the name of the column to Well_capacity.


    #----------------
    # 3) Price data
    #----------------
    # add price of each crop. Here, we are assuming irrigated and dryland crops have the same price.
    # FA -> fallow
    # MZ -> corn
    # WH -> wheat
    # SG -> sorghum
    # SB -> soybean


    #                                                      ******************** you can change these **********************
    price_dt = data.table::fread(price_file) # price per kg
    data.table::setkey(price_dt, CR)                                                                              # setkey for merging later

    #----------------
    # 4) Cost data
    #----------------
    # add fixed and variable costs: fixed    costs               come from the crop budgets. it's a new input added.
    # add fixed and variable costs: variable costs of water also come from the crop budgets that Bill provided.
    # >>>>> create the fixed_cost_input.csv from the crop budgets for your region. <<<<<

    #                                                      ******************** you can change these **********************

    cost_dt = well_capacity_data[,.(Well_ID)]                                    # we want to assign marginal pumping cost by well, so get Well_ID's
    data.table::setkey(cost_dt, Well_ID)
    # cost_dt[, cost_per_acre_mm := 3.2/25.4]
    cost_dt[, cost_per_acre_mm := (pumping_cost + tax_amount)/25.4]                                     # and assign the SAME pumping cost (for now) to every well. This means that pumping cost does not change over time either.

    fixed_cost     = data.table::fread(fixed_cost_file)                               # read costs per acre that are independent of water.
    fixed_cost[irr==0, Crop := paste("dry", Crop, sep = "-")]                    # separate dryland and irrigated costs
    fixed_cost[, irr := NULL]                                                    # remove the irr column
    fixed_cost = rbind(fixed_cost, data.table::data.table(Crop="FA", f_cost = 0))            # assign a 0 fixed cost to fallow acres. If you don't have these yet, you can fake values and run the model.
    data.table::setkey(fixed_cost, Crop)                                                     # setkey for merging later.


    #========================================================================================
    # # # # # # # #                       Data Management                     # # # # # # # #
    #========================================================================================

    # Now, we have all the inputs that we need. We first, clean the data and create something that we can eun the optimization on.
    # in the future we may need to add more data above including the elevation of the surface and bedrock to get depth to water.

    #----------------
    # 1) clean DSSAT data
    #----------------
    KS_DSSAT=  KS_DSSAT_2[,.(SOIL_ID, CR, IFREQ, PAW, SDAT, IRCM, PRCP, PRCM, HWAM)]                            # select the columns we are going to work with: soil type, crop, ifreq, paw, year, irrigation, rainfall, crop yield
    KS_DSSAT[, IRCM := as.numeric(as.character(IRCM))]                                                    # clean a little: convert to numbers from characters
    KS_DSSAT=  KS_DSSAT[complete.cases(IRCM)]                                                             # remove NA's
    KS_DSSAT[, SDAT := substr(SDAT, 1, 4)]                                                                # get year from SDAT
    cols_change = colnames(KS_DSSAT)[c(3:9)]                                                       # The columns we want to convert to numeric
    KS_DSSAT[, (cols_change):= lapply(.SD, function(x) as.numeric(as.character(x))), .SDcols=cols_change] # change all those columns to numeric
    cols_change = colnames(KS_DSSAT)[c(1,2)]                                                              # The columns we want to convert to character
    KS_DSSAT[, (cols_change):= lapply(.SD, as.character), .SDcols=cols_change]                            # change those columns from factor to character

    KS_DSSAT_0 = KS_DSSAT[IFREQ==0]                                                                       # generate all PAW's for dryland crops. IFREQ does not matter for dryland crops, but we generate them for computational reasons.
    KS_DSSAT   = KS_DSSAT[IFREQ!=0]
    KS_DSSAT_0 <- KS_DSSAT_0[rep(seq_len(nrow(KS_DSSAT_0)), each=6)]                                      # repeat each line 6 times because we want to have these 6 PAW's: 25, 35, 45, 55, 65, 75


    KS_DSSAT_0[, PAW := rep(soil_moisture_targets, nrow(KS_DSSAT_0)/length(soil_moisture_targets))]
    KS_DSSAT   = rbind(KS_DSSAT, KS_DSSAT_0)                                                                # add the dryland crops to the irrigated ones and create the final output.
    data.table::setkey(KS_DSSAT, SOIL_ID, CR, IFREQ, PAW, SDAT)


    #----------------
    # 2) Interpolate to find crop yield and water use for every IFREQ
    #----------------
    KS_DSSAT= KS_DSSAT[PRCP > -10 & HWAM > -10]                                                # remove the years that did not generate crop yield

    KS_DSSAT= KS_DSSAT[,.(SOIL_ID, CR, PAW, SDAT, IFREQ, IRCM, PRCP, PRCM, HWAM)]                    # reorganize the columns
    data.table::setkey(KS_DSSAT,      SOIL_ID, CR, PAW, SDAT, IFREQ)

    # this part is trying to interpolate water use and yield for IFREQ's between values.

    # >>>>>>>>>>>>>>>>>>>>>>>>>
    KS_DSSAT[, lead_yield  :=     dplyr::lead(HWAM, n = 1L), by=c("SOIL_ID", "CR", "PAW", "SDAT")]           # create a lead variable; ignore the warnings
    KS_DSSAT[, lead_irr_mm :=     dplyr::lead(IRCM, n = 1L), by=c("SOIL_ID", "CR", "PAW", "SDAT")]           # create a lead variable; ignore the warnings

    KS_DSSAT <- KS_DSSAT[rep(seq_len(nrow(KS_DSSAT)), each=IFREQ_seq*10)]                                # repeat each line 20 times so that we can interpolate

    df_foo=data.table::data.table(x=seq(0, IFREQ_seq-.1, IFREQ_interpolate))                                                       # 2/20 = .1, so create a data.table at distances of 0.1
    df_foo=do.call(rbind, replicate(nrow(KS_DSSAT)/nrow(df_foo), df_foo, simplify = F))        # and repeat it so that it's the same size as the KS_DSSAT
    KS_DSSAT=data.table::data.table(KS_DSSAT, IFREQ_int=df_foo$x)                                          # merge KS_DSSAT and df_foo. notice that IFREQ is intervals of 2. IFREQ_int is the interpolated value.
    KS_DSSAT[, foo := max(IFREQ), by=c("SOIL_ID", "CR")]                                       # this is to not extrapolate beyond max IFREQ
    KS_DSSAT[, IFREQ := IFREQ + IFREQ_int]                                                     # now we have IFREQ's that are by 0.1
    KS_DSSAT=KS_DSSAT[IFREQ<=foo]                                                              # obviously, we don't want to extrapolate beyong our IFREQ
    KS_DSSAT[, foo := NULL]                                                                    # we are done with foo, so we remove it.

    KS_DSSAT[is.na(lead_yield), lead_yield :=0]                                                # set NA yield to 0.
    KS_DSSAT[, yield_int := HWAM + (lead_yield - HWAM)*IFREQ_int]                              # now that we have all the IFREQ's, let's fnd interpolated yield for each IFREQ

    KS_DSSAT[is.na(lead_irr_mm), lead_irr_mm :=0]
    KS_DSSAT[, irr_int := IRCM + (lead_irr_mm - IRCM)*IFREQ_int]                               # same thing for irrigation

    KS_DSSAT= KS_DSSAT[,.(SOIL_ID, CR, IFREQ, PAW, SDAT, irr_mm=irr_int, PRCP, PRCM, yield_kg_ac= yield_int)] # Let's keep only the interpolated columns. yield is still kg/ha here
    KS_DSSAT[, yield_kg_ac := yield_kg_ac * 0.4046]                                                     # it becomes kg/acre here
    data.table::setkey(KS_DSSAT, SOIL_ID, CR, IFREQ, PAW, SDAT)

    KS_DSSAT = KS_DSSAT[IFREQ == 0 | IFREQ >= IFREQ_seq]                                                # IFREQ's of between 0 and 2 don't make sense because they are interpolation of dryland and very high well capacities, so we remove them.
    KS_DSSAT = KS_DSSAT[IFREQ  !=0 | PAW   == soil_moisture_targets[1]]                                                # We don't need PAW of 25, 35, 45, 55, 65, 75 for dryland anymore. so we remove them here.
    KS_DSSAT[IFREQ==0, CR := paste("dry", CR, sep = "-")]                                       # let's be specific about dryland crops. IF IFREQ is 0, then it's a dryland


    #----------------
    # 3) IFREQ for each well capacity-acre
    #----------------
    # This part generates a data.table that devides each 130-acre circle into 4 quarter circles. and creates 5 different combinations:
    # 1. all four dryland,
    # 2. 32.5 acres irrigated 97.5 dryland
    # 3. 65 acres irrigated 65 dryland
    # 4. 97.5 acres irrigated 32.5 dryland
    # 5. all four irrigated.
    # dryland quarters are only allowed to grow dryland crops or be fallow.
    # irrigated quarters can only be irrigated. Total acres is always 130. However, NOTE that the variable 'tot_acres' is really total irrigated acres not total acres planted.


    number_of_crops = length(KS_DSSAT[, unique(CR)])                                  # we have 6 crops: 3 irrigated and 3 dryland crops.

    # 0 Fallow/ dryland for each crop. We add 0 for fallow. The idea is that at the end of the day there is no benefit in doing fallow. It is possible that expected profit become 0 and one might do fallow.
    # 1 MZ
    # 2 WH
    # 3 SG
    # 4 dry-MZ
    # 5 dry-WH
    # 6 dry-SG

    well_capacity_data = rbind(data.table::data.table(Well_ID=1, Soil_Type=unique(well_capacity_data$Soil_Type), Well_capacity=0), well_capacity_data) # add the first line which is dryland with well capacity of 0. Well_ID ==1 for the dryland well.
    well_capacity_data[, quarter_1 := 0]                                                     # define four quarters. Assign 0 to all of them and then we will modify it.
    well_capacity_data[, quarter_2 := 0]                                                     # define four quarters. Assign 0 to all of them and then we will modify it.
    well_capacity_data[, quarter_3 := 0]                                                     # define four quarters. Assign 0 to all of them and then we will modify it.
    well_capacity_data[, quarter_4 := 0]                                                     # define four quarters. Assign 0 to all of them and then we will modify it.


    well_capacity_data <- well_capacity_data[rep(seq_len(nrow(well_capacity_data)), each=5)] # repeat each of them 5 tmes because we have: 0, 32.5, 65, 97.5, 130


    # >>>>>>>>>>>>>>>>>>>>>>>>>
    well_capacity_data[,  tot_acres := rep(c(0, 32.5, 65, 97.5, 130), nrow(well_capacity_data)/5)]                            # assign total acres to them. This is total irrigated acres in a quarter circle
    well_capacity_data <- well_capacity_data[rep(seq_len(nrow(well_capacity_data)), each=(number_of_crops+1))] # repeat them again by crops+1 because of fallow

    # >>>>>>>>>>>>>>>>>>>>>>>>>
    well_capacity_data[,quarter_4 := rep(0:number_of_crops, nrow(well_capacity_data)/(number_of_crops+1))]                                      # just call them from fallow to # of crops. ignore the warnings.
    well_capacity_data[,quarter_3 := rep(0:number_of_crops, nrow(well_capacity_data)/(number_of_crops+1))]                                      # just call them from fallow to # of crops. ignore the warnings.
    well_capacity_data[,quarter_2 := rep(0:number_of_crops, nrow(well_capacity_data)/(number_of_crops+1))]                                      # just call them from fallow to # of crops. ignore the warnings.
    well_capacity_data[,quarter_1 := rep(0:number_of_crops, nrow(well_capacity_data)/(number_of_crops+1))]                                      # just call them from fallow to # of crops. ignore the warnings.

    well_capacity_data[, ifreq    := round((tot_acres * 1)/( Well_capacity * 0.053030303149011), 1)] # find IFREQ from well capacity for each total number of acres irrigated. Formula by Allan
    well_capacity_data[!(complete.cases(ifreq) & ifreq < 100), ifreq := 0]
    well_capacity_data[ifreq<2 & ifreq > 0, ifreq := 2]                                      # set to low IFREQ's equal to 2. Note that too low of IFREQ means very high well capacities, so it doesn't matter much.

    # we want to find which crop-PAW combination produces the largest expected profit here for any quarter circle. So we have all the possible crops for each quarter circle.
    # the trick is that making it vertical, which we do below, will make it easy for optimization then. Notice that tot_acres plays a key role here because the toal number of acres
    #  irrigated together with well capacity determines the IFREQ which is important for crop yield and profitability.

    well_capacity_data= reshape2::melt(well_capacity_data, id=c("Well_ID", "Soil_Type", "Well_capacity", "tot_acres", "ifreq")) # let's melt it to create a vertical data.table
    well_capacity_data= data.table::data.table(well_capacity_data)
    data.table::setkey(well_capacity_data, Well_ID, Soil_Type, tot_acres)


    well_capacity_data[, CR := ifelse(value==1, "MZ",  ##            >>>> THIS NEEDS TO BE UPDATED for our region.<<<< Now you have all crop-quarter combinations by name.
                                      ifelse(value==2, "WH",
                                             ifelse(value==3, "SG",
                                                    ifelse(value==4, "dry-MZ",
                                                           ifelse(value==5, "dry-WH",
                                                                  ifelse(value==6, "dry-SG", "FA"))))))]
    well_capacity_data[, value:=NULL]                                                                     # and we dont need this anymore.
    well_capacity_data[, quarter:= ifelse(variable== "quarter_1", 1,                                      # let's just define a variable called quarter.
                                          ifelse(variable== "quarter_2", 2,
                                                 ifelse(variable== "quarter_3", 3, 4)))]
    well_capacity_data[, variable:=NULL]                                                                  # and get rid of this.


    tot_acres_0       = well_capacity_data[tot_acres == 0]                                                # dryland circles can only do dryalnd or fallow
    tot_acres_0       = tot_acres_0[data.table::like(CR, "FA") | data.table::like(CR, "dry")]

    tot_acres_325     = well_capacity_data[tot_acres == 32.5]                                             # if 32.5 acres are irrigated, only one quarter can be irrigated, the rest is dryland or fallow
    tot_acres_325     = tot_acres_325[(quarter==1 & !(data.table::like(CR, "FA") | data.table::like(CR, "dry"))) | (quarter!=1 & (data.table::like(CR, "FA") | data.table::like(CR, "dry")))]

    tot_acres_65      = well_capacity_data[tot_acres == 65]                                               # if 65 acres are irrigated, only two quarters can be irrigated, the rest is dryland or fallow
    tot_acres_65      = tot_acres_65[(quarter <3 & !(data.table::like(CR, "FA") | data.table::like(CR, "dry"))) | (quarter  >2 & (data.table::like(CR, "FA") | data.table::like(CR, "dry")))]

    tot_acres_975     = well_capacity_data[tot_acres == 97.5]                                             # if 97.5 acres are irrigated, only three quarter can be irrigated, the rest is dryland or fallow
    tot_acres_975     = tot_acres_975[(quarter<4 & !(data.table::like(CR, "FA") | data.table::like(CR, "dry"))) | (quarter  >3 & (data.table::like(CR, "FA") | data.table::like(CR, "dry")))]

    tot_acres_130     = well_capacity_data[tot_acres == 130]                                             # if 130 acres are irrigated, all of the circle should irrigated, there is no dryland or fallow
    tot_acres_130     = tot_acres_130[!(data.table::like(CR, "FA") | data.table::like(CR, "dry"))]


    well_capacity_data = rbind(tot_acres_0, tot_acres_325, tot_acres_65, tot_acres_975, tot_acres_130) # merge  the 5 datasets generated above. This is the new well_capacity_data
    rm(tot_acres_0, tot_acres_325, tot_acres_65, tot_acres_975, tot_acres_130)                         # remove the 5 datasets generated above. we don't need them anymore.
    data.table::setkey(well_capacity_data, Well_ID, tot_acres, quarter)

    #========================================================================================
    # # # # # # # #                        Optimization                       # # # # # # # #
    #========================================================================================
    # we start by merging DSSAT outputs with well capacities
    KS_DSSAT[, IFREQ := round(IFREQ, 1)]                        # round it so that it merges. Otherwise, some of them won't be exactly the same.
    number_of_years = nrow(unique(KS_DSSAT, by="SDAT"))         # number of years that we have data for

    foo = data.table::data.table(SOIL_ID = unique_soil, CR = "FA", IFREQ = 0, PAW = soil_moisture_targets[1],
                     SDAT = min(KS_DSSAT$SDAT), irr_mm=0, PRCP = 200, PRCM = 200, yield_kg_ac = 0) # in these few lines we generate fallow acres for all years
    foo = foo[rep(seq_len(nrow(foo)), each=number_of_years)]                                                                    # repeat each line by the number of years
    baz = unique(KS_DSSAT, by="SDAT")

    # >>>>>>>>>>>>>>>>>>>>>>>>>
    foo[, SDAT := rep(baz$SDAT, nrow(foo)/nrow(baz))]
    foo[, PRCP := rep(baz$PRCP, nrow(foo)/nrow(baz))]
    KS_DSSAT= rbind(KS_DSSAT, foo)                                                                                              # and then merge fallow's with other acres.

    data.table::setkey(KS_DSSAT, SOIL_ID, CR, IFREQ)
    well_capacity_data[data.table::like(CR, "dry"), ifreq := 0]                                                                             # making sure that if crop is dryland, then ifreq = 0
    well_capacity_data[CR == "FA",      ifreq := 0]                                                                             # same for fallow

    data.table::setkey(well_capacity_data, Soil_Type, CR, ifreq)

    # foo_irr is what will be working with from here.

    foo_irr = merge(well_capacity_data, KS_DSSAT, by.x = c("Soil_Type", "CR", "ifreq"), by.y = c("SOIL_ID", "CR", "IFREQ"), allow.cartesian = T) #so, KS_DSSAT has DSSAT outputs for each combination of soil, crop and IFREQ: water use, yield. On the other hand, well_capacity_Data has IFREQ for each well capacity and acres. So, we need to merge them.

    foo_irr = foo_irr[,.(Well_ID, SOIL_ID=Soil_Type, Well_capacity, tot_acres, IFREQ=ifreq, CR, quarter, PAW, SDAT, irr_mm, PRCP, PRCM, yield_kg_ac)] # rename columns
    data.table::setkey(foo_irr, CR)
    foo_irr=foo_irr[price_dt]                                                                                                                   # merge it with crop price
    foo_irr = foo_irr[complete.cases(Well_ID)]
    data.table::setkey( foo_irr, Well_ID)
    cost_dt = rbind(data.table::data.table(Well_ID=1, cost_per_acre_mm = unique(cost_dt$cost_per_acre_mm)), cost_dt)                                        # add costs for dryland acres to. It will not affect profits at the end, because irrigation is zero.

    data.table::setkey(cost_dt, Well_ID)
    foo_irr=foo_irr[cost_dt]                                                                                                                    # merge with cost of pumping
    data.table::setkey( foo_irr, CR)
    data.table::setkey( fixed_cost, Crop)
    foo_irr=foo_irr[fixed_cost]                                                                                                                 # merge with fixed costs per acre
    foo_irr=foo_irr[complete.cases(Well_ID)]                                                                                                    # remove NA's

    foo_irr[, profit := 32.5 * (yield_kg_ac * price - irr_mm * cost_per_acre_mm - f_cost)]    # Estimate profits for each quarter
    foo_irr[, irrigation := 32.5 * irr_mm * 0.0393701]                                        # Estimate irrigation for each quarter
    data.table::setkey(foo_irr, Well_ID, tot_acres, quarter, SDAT)

    foo_irr_2 = data.table::copy(foo_irr)                                                                 # we need this for later

    # optimization starts here:

    foo_irr[, mean_profit_PAW := mean(profit),      by = c("Well_ID", "tot_acres", "quarter", "CR", "PAW")]    # find mean profit for each "Well_ID", "tot_acres", "quarter", "CR", "PAW"
    foo_irr[, sd_profit_PAW   := sd(profit),        by = c("Well_ID", "tot_acres", "quarter", "CR", "PAW")]    # find standard deviation of profit for each "Well_ID", "tot_acres", "quarter", "CR", "PAW"
    foo_irr[, mean_irr_PAW    := mean(irrigation),  by = c("Well_ID", "tot_acres", "quarter", "CR", "PAW")]    # find mean irrigation for each "Well_ID", "tot_acres", "quarter", "CR", "PAW"
    foo_irr=  unique(foo_irr,                       by = c("Well_ID", "tot_acres", "quarter", "CR", "PAW"))    # after that, we need unique observations by "Well_ID", "tot_acres", "quarter", "CR", "PAW"

    # the caulation above shows expected profit and water use for each quarter circle for each crop and for each PAW.
    # next, we need to find the crop-PAW combination that maximizes expected profit for each quarter.

    foo_irr[,  profit_quarter     := max(mean_profit_PAW), by=c("Well_ID", "tot_acres", "quarter")]
    foo_irr=   foo_irr[profit_quarter == mean_profit_PAW]
    foo_irr[,  count := .N ,    by=c("Well_ID", "tot_acres", "quarter")]                                     # these next few lines makes sure that we have unique observations
    foo_irr  = unique(foo_irr,  by=c("Well_ID", "tot_acres", "quarter"))
    foo_irr = foo_irr[Well_capacity>0 | tot_acres == 0]
    foo_irr[, count := .N , by=c("SOIL_ID", "Well_capacity", "tot_acres", "SDAT")]

    # then we want to find maximum profit for each well capacity.

    foo_irr[,  profit_tot_acres    := sum(profit_quarter), by=c("Well_ID", "tot_acres")]
    foo_irr[,  irr_tot_acres       := sum(mean_irr_PAW),   by=c("Well_ID", "tot_acres")]
    foo_irr[, profit_Well_ID := max(profit_tot_acres), by=c("Well_ID")]
    foo_irr=  foo_irr[  profit_Well_ID == profit_tot_acres]  ####        now, we have irrigation decisions at the beginning of the season for each well capacity including dryland


    # next, we want to create 3 look up tables
    # one for each well capacity: lookup_table_well
    # one for each quarter circle in a well capacity: lookup_table_quarter
    # one for each quarter in a well capacity in a given year in our historical data from 1981-2009: lookup_table_all_years


    lookup_table_quarter = foo_irr[Well_capacity>0,.(Well_capacity, SOIL_ID, tot_acres, quarter, CR, PAW, mean_irr_PAW, profit_quarter)]
    data.table::setkey(lookup_table_quarter, Well_capacity, quarter)

    lookup_table_well = unique(foo_irr,   by=c("Well_ID", "tot_acres"))
    lookup_table_well=  lookup_table_well[Well_capacity>0,.(Well_capacity, SOIL_ID, tot_acres, irr_tot_acres, profit_Well_ID)]
    data.table::setkey(lookup_table_well, Well_capacity)

    baz = lookup_table_quarter[,.(Well_capacity, tot_acres, quarter, CR, PAW, id=1)]
    data.table::setkey(baz, Well_capacity, tot_acres, quarter, CR, PAW)
    data.table::setkey(foo_irr_2, Well_capacity, tot_acres, quarter, CR, PAW)
    baz = foo_irr_2[baz]
    lookup_table_all_years = baz[,.(Well_ID, Well_capacity, SOIL_ID, tot_acres, IFREQ, CR, quarter, PAW, SDAT, PRCP, PRCM, yield_kg_ac, irrigation, profit)]

    lookup_table_all_years_2       = rbind(lookup_table_all_years_2, lookup_table_all_years)       # these three lines are merging outputs of each i in the loop, i.e., this is the final output for all soil types.
    lookup_table_quarter_2         = rbind(lookup_table_quarter_2,   lookup_table_quarter)         # these three lines are merging outputs of each i in the loop, i.e., this is the final output for all soil types.
    lookup_table_well_2            = rbind(lookup_table_well_2,      lookup_table_well)            # these three lines are merging outputs of each i in the loop, i.e., this is the final output for all soil types.

    print(i) # so that we know the loop is working

  } # end of loop

  data.table::setkey(lookup_table_all_years_2, SOIL_ID, Well_capacity, SDAT)
  data.table::setkey(lookup_table_quarter_2,   SOIL_ID, Well_capacity, quarter)
  data.table::setkey(lookup_table_well_2,      SOIL_ID, Well_capacity)

  write.csv(lookup_table_well_2,      "lookup_table_well_2.csv")         # save the data.
  saveRDS(lookup_table_quarter_2,     "lookup_table_quarter_2.rds")      # save the data.
  saveRDS(lookup_table_all_years_2,   "lookup_table_all_years_2.rds")    # save the data.
}
manirouhirad/MODSSAT documentation built on April 15, 2024, 11:31 p.m.