#' This function generates the lookup table with a tax policy.
#' @param tax_amount is the amount of tax on the unit of groundwater extracted. Defaults to 1.1.
#' @param DSSAT_files is the directory where DSSAT files are located. Defaults to "C:/Users/manirad/Dropbox/DSSAT subregions work pre-2018 annual meeting/subregion KS files/outputs_for_econ/revised".
#' @param soil_file is the file that contains base soil file. Defaults to "C:/Users/manirad/Downloads/test/Well_Soil Type_generator_07.csv".
#' @param well_capacity_file is the file that contains base well capacity. Defaults to "C:/Users/manirad/Downloads/test/Well_Capacity_ganarator.csv".
#' @param price_file is the file that includes crop prices. Defaults to "C:/Users/manirad/Dropbox/DSSAT subregions work pre-2018 annual meeting/subregion KS files/crop_prices.csv".
#' @param fixed_cost_file is the file tha includes fixed costs (per acre) costs of production. Defaults to "C:/Users/manirad/Downloads/test/fixed_cost_input.csv".
#' @param pumping_cost is the cost of pumping an acre-inch of groundwater excluding taxes. Defaults to 3.56 which is the cost of pumpin an acre-inch of groundwater in parts of Kansas.
#' @param default_well_capacity_col_name is the name of the well capacity column generated from the MODFLOW model. Defaults to 'Well_Capacity(gpm)' as this is the original column name we started with.
#' @param soil_moisture_targets is the vector of soil moisture targets that are generated by the DSSAT model. (25, 35, 45, 55, 65, 75).
#' @param IFREQ_seq is the difference between two consequtive irrigation frequencies (IFREQ) in DSSAT. Defaults to 2.
#' @param IFREQ_interpolate is the size of interpolation interval. Defaults to 0.1 so that IFREQ_seq of 2 adds 0, 0.1, 0.2, ..., 1.9.
#' @return returns the output table.
#' @examples
#' \dontrun{
#' gen_lookup_tax(tax_amount = 2)
#' }
#' @export
gen_lookup_tax = function(tax_amount = 1.1,
DSSAT_files = "./input_files/DSSAT_files",
soil_file = "./input_files/Well_Soil Type_generator_07.csv",
well_capacity_file = "./input_files/Well_Capacity_ganarator.csv",
price_file = "./input_files/crop_prices.csv",
fixed_cost_file = "./input_files/fixed_cost_input.csv",
pumping_cost = 3.56,
default_well_capacity_col_name = "Well_Capacity(gpm)",
soil_moisture_targets = c(25, 35, 45, 55, 65, 75),
IFREQ_seq = 2,
IFREQ_interpolate = .1){
library(data.table)
tax_amount = tax_amount - 0.1
# tax_amount = 1
print(paste("this is the tax:", tax_amount, sep = " "))
#========================================================================================
# # # # # # # # Input files # # # # # # # #
#========================================================================================
#----------------
# 1) DSSAT data
#----------------
# the output of DSSAT is a bit strange and it needs some cleaning.
# there are some header rows which result in the columns not having names assigned. The code below is solving that issue to have the data formatted cleanly.
# Notice that FNAM.... column is the key here. The way it is written now, it breaks column V9 which includes both PAW and IFREQ to two separate columns.
# these are the actual column names that will be assigned:
col_new = c("RUNNO", "TRNO", "R_pound", "O_pound", "C_pound", "CR", "MODEL", "EXNAME", "FNAM", "WSTA", "SOIL_ID", "SDAT", "PDAT", "EDAT", "ADAT", "MDAT", "HDAT", "DWAP", "CWAM",
"HWAM", "HWAH", "BWAH", "PWAM", "HWUM", "H_AM", "H_UM", "HIAM", "LAIX", "IR_M" , "IRCM", "PRCM", "ETCM", "EPCM", "ESCM", "ROCM", "DRCM", "SWXM", "NI_M", "NICM",
"NFXM", "NUCM", "NLCM", "NIAM", "CNAM", "GNAM", "N2OEC", "PI_M", "PICM", "PUPC", "SPAM", "KI_M", "KICM", "KUPC", "SKAM", "RECM", "ONTAM", "ONAM", "OPTAM",
"OPAM", "OCTAM", "OCAM", "CO2EC", "DMPPM", "DMPEM", "DMPTM", "DMPIM", "YPPM", "YPEM", "YPTM", "YPIM", "DPNAM", "DPNUM", "YPNAM",
"YPNUM", "NDCH", "TMAXA", "TMINA", "SRADA", "DAYLA", "CO2A", "PRCP", "ETCP", "ESCP", "EPCP", "PAW", "IFREQ")
# note: each file includes 1 crop-soil type-climate and it includes different PAW and IFREQ's
filenames = list.files(path = DSSAT_files , pattern="*.OSU", full.names=TRUE)
# read all the OSU files and remove NA's and fix the header issues
ldf <- lapply(filenames, read.table, fill=T) # these few lines are a bit complicated, but they read in all the files at the same time
ldf <- lapply(ldf, function(x) x[-2,]) # remove the NA rows that will be generated becase of DSSAT formatting
ldf <- mapply(cbind, ldf, "file_name"=filenames, SIMPLIFY=F) # and put them back together as a data.tabale rather than a list
KS_DSSAT = data.table::rbindlist(ldf) # KS_DSSAT is the dataset we will be working on
KS_DSSAT = data.table(KS_DSSAT)
KS_DSSAT = KS_DSSAT[complete.cases(V85)] # remove other potential NA's
KS_DSSAT[, file_name:= NULL] # we don't need the file names
rm(ldf) # ldf was a auxilary thing so remove ldf here
# break down V9 into IFREQ and PAW
KS_DSSAT[, V9 := as.character(V9)] # make it a character vector
KS_DSSAT[, PAW := substr(V9, 1, regexpr("_", V9)-2)] # create PAW from V9 by subsetting
KS_DSSAT[, IFREQ := substr(V9, regexpr("Q", V9)+2, nchar(V9))] # create IFREQ from V9 by subsetting
KS_DSSAT[, V9 := NULL] # we don't need V9 anumore
data.table::setnames(KS_DSSAT, old = colnames(KS_DSSAT), new = col_new) # let's change the VXX col names to col_new
##### From here, it will loop over the soil types that you have for your region. Let's setup some basics:
unique_soil = KS_DSSAT[, unique(SOIL_ID)] # unique_soil is the unique soil types in the area... there are 7 soil types in Finney county
KS_DSSAT_2 = data.table::copy(KS_DSSAT) # we need a copy of KS_DSSAT for later use because KS_DSSAT will be modified and we need the original version
lookup_table_all_years_2 = data.table::data.table() # 1) we need to create some empty data.tables. Otherwise, R won't recognize them.
lookup_table_quarter_2 = data.table::data.table() # 2) these will store outputs of each run for each soil type
lookup_table_well_2 = data.table::data.table() # 3) and they will be the outputs that we will use in Econ_model_annual_meeting.R
for (i in 1:length(unique_soil)) { # the loop starts here.
#----------------
# 2) Well capacity data
#----------------
# we want to generate profit-maximizing decisions for each well capacity and each soil type. To do se, we want to generate a look up table that provides us with profit-maximizing
# decisions for each well capacity (notice that the loop is over soil types, so we are conditioning each run of the for loop on soil type.)
soil_type = data.table::fread(soil_file) # this is just a tempelate Well_ID Soil_Type table. The value that is 07 doesn't matter. We will change it. The important thing is that Well_ID's go from 1001 to 3901 capturing a reasonable range of capacities for irrigation: 100-3000 gpm.
soil_type[, Soil_Type := unique_soil[i]]
### ### ### @: this needs to be updated
soil_type[, Soil_Type:= gsub("KSFC00000", "KS0000000", Soil_Type)] # this is about naming soil types. Maybe different in your area. In KS hydrology and DSSAT had different names for soil types. This is trying to unify them.
well_capacity = data.table::fread(well_capacity_file) # read in well capacity for each well by well id. This is the 100-3000 gpm range connected to Well_ID
data.table::setkey(soil_type, Well_ID) # set key is needed for merging two data.tables. We want to merge them by their Well_ID
data.table::setkey(well_capacity, Well_ID) # set key is needed for merging two data.tables. We want to merge them by their Well_ID
well_capacity_data = soil_type[well_capacity] # Now we have soil type and well capacity combination
data.table::setnames(well_capacity_data, default_well_capacity_col_name, "Well_capacity") # change the name of the column to Well_capacity.
#----------------
# 3) Price data
#----------------
# add price of each crop. Here, we are assuming irrigated and dryland crops have the same price.
# FA -> fallow
# MZ -> corn
# WH -> wheat
# SG -> sorghum
# SB -> soybean
# ******************** you can change these **********************
price_dt = data.table::fread(price_file) # price per kg
data.table::setkey(price_dt, CR) # setkey for merging later
#----------------
# 4) Cost data
#----------------
# add fixed and variable costs: fixed costs come from the crop budgets. it's a new input added.
# add fixed and variable costs: variable costs of water also come from the crop budgets that Bill provided.
# >>>>> create the fixed_cost_input.csv from the crop budgets for your region. <<<<<
# ******************** you can change these **********************
cost_dt = well_capacity_data[,.(Well_ID)] # we want to assign marginal pumping cost by well, so get Well_ID's
data.table::setkey(cost_dt, Well_ID)
# cost_dt[, cost_per_acre_mm := 3.2/25.4]
cost_dt[, cost_per_acre_mm := (pumping_cost + tax_amount)/25.4] # and assign the SAME pumping cost (for now) to every well. This means that pumping cost does not change over time either.
fixed_cost = data.table::fread(fixed_cost_file) # read costs per acre that are independent of water.
fixed_cost[irr==0, Crop := paste("dry", Crop, sep = "-")] # separate dryland and irrigated costs
fixed_cost[, irr := NULL] # remove the irr column
fixed_cost = rbind(fixed_cost, data.table::data.table(Crop="FA", f_cost = 0)) # assign a 0 fixed cost to fallow acres. If you don't have these yet, you can fake values and run the model.
data.table::setkey(fixed_cost, Crop) # setkey for merging later.
#========================================================================================
# # # # # # # # Data Management # # # # # # # #
#========================================================================================
# Now, we have all the inputs that we need. We first, clean the data and create something that we can eun the optimization on.
# in the future we may need to add more data above including the elevation of the surface and bedrock to get depth to water.
#----------------
# 1) clean DSSAT data
#----------------
KS_DSSAT= KS_DSSAT_2[,.(SOIL_ID, CR, IFREQ, PAW, SDAT, IRCM, PRCP, PRCM, HWAM)] # select the columns we are going to work with: soil type, crop, ifreq, paw, year, irrigation, rainfall, crop yield
KS_DSSAT[, IRCM := as.numeric(as.character(IRCM))] # clean a little: convert to numbers from characters
KS_DSSAT= KS_DSSAT[complete.cases(IRCM)] # remove NA's
KS_DSSAT[, SDAT := substr(SDAT, 1, 4)] # get year from SDAT
cols_change = colnames(KS_DSSAT)[c(3:9)] # The columns we want to convert to numeric
KS_DSSAT[, (cols_change):= lapply(.SD, function(x) as.numeric(as.character(x))), .SDcols=cols_change] # change all those columns to numeric
cols_change = colnames(KS_DSSAT)[c(1,2)] # The columns we want to convert to character
KS_DSSAT[, (cols_change):= lapply(.SD, as.character), .SDcols=cols_change] # change those columns from factor to character
KS_DSSAT_0 = KS_DSSAT[IFREQ==0] # generate all PAW's for dryland crops. IFREQ does not matter for dryland crops, but we generate them for computational reasons.
KS_DSSAT = KS_DSSAT[IFREQ!=0]
KS_DSSAT_0 <- KS_DSSAT_0[rep(seq_len(nrow(KS_DSSAT_0)), each=6)] # repeat each line 6 times because we want to have these 6 PAW's: 25, 35, 45, 55, 65, 75
KS_DSSAT_0[, PAW := rep(soil_moisture_targets, nrow(KS_DSSAT_0)/length(soil_moisture_targets))]
KS_DSSAT = rbind(KS_DSSAT, KS_DSSAT_0) # add the dryland crops to the irrigated ones and create the final output.
data.table::setkey(KS_DSSAT, SOIL_ID, CR, IFREQ, PAW, SDAT)
#----------------
# 2) Interpolate to find crop yield and water use for every IFREQ
#----------------
KS_DSSAT= KS_DSSAT[PRCP > -10 & HWAM > -10] # remove the years that did not generate crop yield
KS_DSSAT= KS_DSSAT[,.(SOIL_ID, CR, PAW, SDAT, IFREQ, IRCM, PRCP, PRCM, HWAM)] # reorganize the columns
data.table::setkey(KS_DSSAT, SOIL_ID, CR, PAW, SDAT, IFREQ)
# this part is trying to interpolate water use and yield for IFREQ's between values.
# >>>>>>>>>>>>>>>>>>>>>>>>>
KS_DSSAT[, lead_yield := dplyr::lead(HWAM, n = 1L), by=c("SOIL_ID", "CR", "PAW", "SDAT")] # create a lead variable; ignore the warnings
KS_DSSAT[, lead_irr_mm := dplyr::lead(IRCM, n = 1L), by=c("SOIL_ID", "CR", "PAW", "SDAT")] # create a lead variable; ignore the warnings
KS_DSSAT <- KS_DSSAT[rep(seq_len(nrow(KS_DSSAT)), each=IFREQ_seq*10)] # repeat each line 20 times so that we can interpolate
df_foo=data.table::data.table(x=seq(0, IFREQ_seq-.1, IFREQ_interpolate)) # 2/20 = .1, so create a data.table at distances of 0.1
df_foo=do.call(rbind, replicate(nrow(KS_DSSAT)/nrow(df_foo), df_foo, simplify = F)) # and repeat it so that it's the same size as the KS_DSSAT
KS_DSSAT=data.table::data.table(KS_DSSAT, IFREQ_int=df_foo$x) # merge KS_DSSAT and df_foo. notice that IFREQ is intervals of 2. IFREQ_int is the interpolated value.
KS_DSSAT[, foo := max(IFREQ), by=c("SOIL_ID", "CR")] # this is to not extrapolate beyond max IFREQ
KS_DSSAT[, IFREQ := IFREQ + IFREQ_int] # now we have IFREQ's that are by 0.1
KS_DSSAT=KS_DSSAT[IFREQ<=foo] # obviously, we don't want to extrapolate beyong our IFREQ
KS_DSSAT[, foo := NULL] # we are done with foo, so we remove it.
KS_DSSAT[is.na(lead_yield), lead_yield :=0] # set NA yield to 0.
KS_DSSAT[, yield_int := HWAM + (lead_yield - HWAM)*IFREQ_int] # now that we have all the IFREQ's, let's fnd interpolated yield for each IFREQ
KS_DSSAT[is.na(lead_irr_mm), lead_irr_mm :=0]
KS_DSSAT[, irr_int := IRCM + (lead_irr_mm - IRCM)*IFREQ_int] # same thing for irrigation
KS_DSSAT= KS_DSSAT[,.(SOIL_ID, CR, IFREQ, PAW, SDAT, irr_mm=irr_int, PRCP, PRCM, yield_kg_ac= yield_int)] # Let's keep only the interpolated columns. yield is still kg/ha here
KS_DSSAT[, yield_kg_ac := yield_kg_ac * 0.4046] # it becomes kg/acre here
data.table::setkey(KS_DSSAT, SOIL_ID, CR, IFREQ, PAW, SDAT)
KS_DSSAT = KS_DSSAT[IFREQ == 0 | IFREQ >= IFREQ_seq] # IFREQ's of between 0 and 2 don't make sense because they are interpolation of dryland and very high well capacities, so we remove them.
KS_DSSAT = KS_DSSAT[IFREQ !=0 | PAW == soil_moisture_targets[1]] # We don't need PAW of 25, 35, 45, 55, 65, 75 for dryland anymore. so we remove them here.
KS_DSSAT[IFREQ==0, CR := paste("dry", CR, sep = "-")] # let's be specific about dryland crops. IF IFREQ is 0, then it's a dryland
#----------------
# 3) IFREQ for each well capacity-acre
#----------------
# This part generates a data.table that devides each 130-acre circle into 4 quarter circles. and creates 5 different combinations:
# 1. all four dryland,
# 2. 32.5 acres irrigated 97.5 dryland
# 3. 65 acres irrigated 65 dryland
# 4. 97.5 acres irrigated 32.5 dryland
# 5. all four irrigated.
# dryland quarters are only allowed to grow dryland crops or be fallow.
# irrigated quarters can only be irrigated. Total acres is always 130. However, NOTE that the variable 'tot_acres' is really total irrigated acres not total acres planted.
number_of_crops = length(KS_DSSAT[, unique(CR)]) # we have 6 crops: 3 irrigated and 3 dryland crops.
# 0 Fallow/ dryland for each crop. We add 0 for fallow. The idea is that at the end of the day there is no benefit in doing fallow. It is possible that expected profit become 0 and one might do fallow.
# 1 MZ
# 2 WH
# 3 SG
# 4 dry-MZ
# 5 dry-WH
# 6 dry-SG
well_capacity_data = rbind(data.table::data.table(Well_ID=1, Soil_Type=unique(well_capacity_data$Soil_Type), Well_capacity=0), well_capacity_data) # add the first line which is dryland with well capacity of 0. Well_ID ==1 for the dryland well.
well_capacity_data[, quarter_1 := 0] # define four quarters. Assign 0 to all of them and then we will modify it.
well_capacity_data[, quarter_2 := 0] # define four quarters. Assign 0 to all of them and then we will modify it.
well_capacity_data[, quarter_3 := 0] # define four quarters. Assign 0 to all of them and then we will modify it.
well_capacity_data[, quarter_4 := 0] # define four quarters. Assign 0 to all of them and then we will modify it.
well_capacity_data <- well_capacity_data[rep(seq_len(nrow(well_capacity_data)), each=5)] # repeat each of them 5 tmes because we have: 0, 32.5, 65, 97.5, 130
# >>>>>>>>>>>>>>>>>>>>>>>>>
well_capacity_data[, tot_acres := rep(c(0, 32.5, 65, 97.5, 130), nrow(well_capacity_data)/5)] # assign total acres to them. This is total irrigated acres in a quarter circle
well_capacity_data <- well_capacity_data[rep(seq_len(nrow(well_capacity_data)), each=(number_of_crops+1))] # repeat them again by crops+1 because of fallow
# >>>>>>>>>>>>>>>>>>>>>>>>>
well_capacity_data[,quarter_4 := rep(0:number_of_crops, nrow(well_capacity_data)/(number_of_crops+1))] # just call them from fallow to # of crops. ignore the warnings.
well_capacity_data[,quarter_3 := rep(0:number_of_crops, nrow(well_capacity_data)/(number_of_crops+1))] # just call them from fallow to # of crops. ignore the warnings.
well_capacity_data[,quarter_2 := rep(0:number_of_crops, nrow(well_capacity_data)/(number_of_crops+1))] # just call them from fallow to # of crops. ignore the warnings.
well_capacity_data[,quarter_1 := rep(0:number_of_crops, nrow(well_capacity_data)/(number_of_crops+1))] # just call them from fallow to # of crops. ignore the warnings.
well_capacity_data[, ifreq := round((tot_acres * 1)/( Well_capacity * 0.053030303149011), 1)] # find IFREQ from well capacity for each total number of acres irrigated. Formula by Allan
well_capacity_data[!(complete.cases(ifreq) & ifreq < 100), ifreq := 0]
well_capacity_data[ifreq<2 & ifreq > 0, ifreq := 2] # set to low IFREQ's equal to 2. Note that too low of IFREQ means very high well capacities, so it doesn't matter much.
# we want to find which crop-PAW combination produces the largest expected profit here for any quarter circle. So we have all the possible crops for each quarter circle.
# the trick is that making it vertical, which we do below, will make it easy for optimization then. Notice that tot_acres plays a key role here because the toal number of acres
# irrigated together with well capacity determines the IFREQ which is important for crop yield and profitability.
well_capacity_data= reshape2::melt(well_capacity_data, id=c("Well_ID", "Soil_Type", "Well_capacity", "tot_acres", "ifreq")) # let's melt it to create a vertical data.table
well_capacity_data= data.table::data.table(well_capacity_data)
data.table::setkey(well_capacity_data, Well_ID, Soil_Type, tot_acres)
well_capacity_data[, CR := ifelse(value==1, "MZ", ## >>>> THIS NEEDS TO BE UPDATED for our region.<<<< Now you have all crop-quarter combinations by name.
ifelse(value==2, "WH",
ifelse(value==3, "SG",
ifelse(value==4, "dry-MZ",
ifelse(value==5, "dry-WH",
ifelse(value==6, "dry-SG", "FA"))))))]
well_capacity_data[, value:=NULL] # and we dont need this anymore.
well_capacity_data[, quarter:= ifelse(variable== "quarter_1", 1, # let's just define a variable called quarter.
ifelse(variable== "quarter_2", 2,
ifelse(variable== "quarter_3", 3, 4)))]
well_capacity_data[, variable:=NULL] # and get rid of this.
tot_acres_0 = well_capacity_data[tot_acres == 0] # dryland circles can only do dryalnd or fallow
tot_acres_0 = tot_acres_0[data.table::like(CR, "FA") | data.table::like(CR, "dry")]
tot_acres_325 = well_capacity_data[tot_acres == 32.5] # if 32.5 acres are irrigated, only one quarter can be irrigated, the rest is dryland or fallow
tot_acres_325 = tot_acres_325[(quarter==1 & !(data.table::like(CR, "FA") | data.table::like(CR, "dry"))) | (quarter!=1 & (data.table::like(CR, "FA") | data.table::like(CR, "dry")))]
tot_acres_65 = well_capacity_data[tot_acres == 65] # if 65 acres are irrigated, only two quarters can be irrigated, the rest is dryland or fallow
tot_acres_65 = tot_acres_65[(quarter <3 & !(data.table::like(CR, "FA") | data.table::like(CR, "dry"))) | (quarter >2 & (data.table::like(CR, "FA") | data.table::like(CR, "dry")))]
tot_acres_975 = well_capacity_data[tot_acres == 97.5] # if 97.5 acres are irrigated, only three quarter can be irrigated, the rest is dryland or fallow
tot_acres_975 = tot_acres_975[(quarter<4 & !(data.table::like(CR, "FA") | data.table::like(CR, "dry"))) | (quarter >3 & (data.table::like(CR, "FA") | data.table::like(CR, "dry")))]
tot_acres_130 = well_capacity_data[tot_acres == 130] # if 130 acres are irrigated, all of the circle should irrigated, there is no dryland or fallow
tot_acres_130 = tot_acres_130[!(data.table::like(CR, "FA") | data.table::like(CR, "dry"))]
well_capacity_data = rbind(tot_acres_0, tot_acres_325, tot_acres_65, tot_acres_975, tot_acres_130) # merge the 5 datasets generated above. This is the new well_capacity_data
rm(tot_acres_0, tot_acres_325, tot_acres_65, tot_acres_975, tot_acres_130) # remove the 5 datasets generated above. we don't need them anymore.
data.table::setkey(well_capacity_data, Well_ID, tot_acres, quarter)
#========================================================================================
# # # # # # # # Optimization # # # # # # # #
#========================================================================================
# we start by merging DSSAT outputs with well capacities
KS_DSSAT[, IFREQ := round(IFREQ, 1)] # round it so that it merges. Otherwise, some of them won't be exactly the same.
number_of_years = nrow(unique(KS_DSSAT, by="SDAT")) # number of years that we have data for
foo = data.table::data.table(SOIL_ID = unique_soil, CR = "FA", IFREQ = 0, PAW = soil_moisture_targets[1],
SDAT = min(KS_DSSAT$SDAT), irr_mm=0, PRCP = 200, PRCM = 200, yield_kg_ac = 0) # in these few lines we generate fallow acres for all years
foo = foo[rep(seq_len(nrow(foo)), each=number_of_years)] # repeat each line by the number of years
baz = unique(KS_DSSAT, by="SDAT")
# >>>>>>>>>>>>>>>>>>>>>>>>>
foo[, SDAT := rep(baz$SDAT, nrow(foo)/nrow(baz))]
foo[, PRCP := rep(baz$PRCP, nrow(foo)/nrow(baz))]
KS_DSSAT= rbind(KS_DSSAT, foo) # and then merge fallow's with other acres.
data.table::setkey(KS_DSSAT, SOIL_ID, CR, IFREQ)
well_capacity_data[data.table::like(CR, "dry"), ifreq := 0] # making sure that if crop is dryland, then ifreq = 0
well_capacity_data[CR == "FA", ifreq := 0] # same for fallow
data.table::setkey(well_capacity_data, Soil_Type, CR, ifreq)
# foo_irr is what will be working with from here.
foo_irr = merge(well_capacity_data, KS_DSSAT, by.x = c("Soil_Type", "CR", "ifreq"), by.y = c("SOIL_ID", "CR", "IFREQ"), allow.cartesian = T) #so, KS_DSSAT has DSSAT outputs for each combination of soil, crop and IFREQ: water use, yield. On the other hand, well_capacity_Data has IFREQ for each well capacity and acres. So, we need to merge them.
foo_irr = foo_irr[,.(Well_ID, SOIL_ID=Soil_Type, Well_capacity, tot_acres, IFREQ=ifreq, CR, quarter, PAW, SDAT, irr_mm, PRCP, PRCM, yield_kg_ac)] # rename columns
data.table::setkey(foo_irr, CR)
foo_irr=foo_irr[price_dt] # merge it with crop price
foo_irr = foo_irr[complete.cases(Well_ID)]
data.table::setkey( foo_irr, Well_ID)
cost_dt = rbind(data.table::data.table(Well_ID=1, cost_per_acre_mm = unique(cost_dt$cost_per_acre_mm)), cost_dt) # add costs for dryland acres to. It will not affect profits at the end, because irrigation is zero.
data.table::setkey(cost_dt, Well_ID)
foo_irr=foo_irr[cost_dt] # merge with cost of pumping
data.table::setkey( foo_irr, CR)
data.table::setkey( fixed_cost, Crop)
foo_irr=foo_irr[fixed_cost] # merge with fixed costs per acre
foo_irr=foo_irr[complete.cases(Well_ID)] # remove NA's
foo_irr[, profit := 32.5 * (yield_kg_ac * price - irr_mm * cost_per_acre_mm - f_cost)] # Estimate profits for each quarter
foo_irr[, irrigation := 32.5 * irr_mm * 0.0393701] # Estimate irrigation for each quarter
data.table::setkey(foo_irr, Well_ID, tot_acres, quarter, SDAT)
foo_irr_2 = data.table::copy(foo_irr) # we need this for later
# optimization starts here:
foo_irr[, mean_profit_PAW := mean(profit), by = c("Well_ID", "tot_acres", "quarter", "CR", "PAW")] # find mean profit for each "Well_ID", "tot_acres", "quarter", "CR", "PAW"
foo_irr[, sd_profit_PAW := sd(profit), by = c("Well_ID", "tot_acres", "quarter", "CR", "PAW")] # find standard deviation of profit for each "Well_ID", "tot_acres", "quarter", "CR", "PAW"
foo_irr[, mean_irr_PAW := mean(irrigation), by = c("Well_ID", "tot_acres", "quarter", "CR", "PAW")] # find mean irrigation for each "Well_ID", "tot_acres", "quarter", "CR", "PAW"
foo_irr= unique(foo_irr, by = c("Well_ID", "tot_acres", "quarter", "CR", "PAW")) # after that, we need unique observations by "Well_ID", "tot_acres", "quarter", "CR", "PAW"
# the caulation above shows expected profit and water use for each quarter circle for each crop and for each PAW.
# next, we need to find the crop-PAW combination that maximizes expected profit for each quarter.
foo_irr[, profit_quarter := max(mean_profit_PAW), by=c("Well_ID", "tot_acres", "quarter")]
foo_irr= foo_irr[profit_quarter == mean_profit_PAW]
foo_irr[, count := .N , by=c("Well_ID", "tot_acres", "quarter")] # these next few lines makes sure that we have unique observations
foo_irr = unique(foo_irr, by=c("Well_ID", "tot_acres", "quarter"))
foo_irr = foo_irr[Well_capacity>0 | tot_acres == 0]
foo_irr[, count := .N , by=c("SOIL_ID", "Well_capacity", "tot_acres", "SDAT")]
# then we want to find maximum profit for each well capacity.
foo_irr[, profit_tot_acres := sum(profit_quarter), by=c("Well_ID", "tot_acres")]
foo_irr[, irr_tot_acres := sum(mean_irr_PAW), by=c("Well_ID", "tot_acres")]
foo_irr[, profit_Well_ID := max(profit_tot_acres), by=c("Well_ID")]
foo_irr= foo_irr[ profit_Well_ID == profit_tot_acres] #### now, we have irrigation decisions at the beginning of the season for each well capacity including dryland
# next, we want to create 3 look up tables
# one for each well capacity: lookup_table_well
# one for each quarter circle in a well capacity: lookup_table_quarter
# one for each quarter in a well capacity in a given year in our historical data from 1981-2009: lookup_table_all_years
lookup_table_quarter = foo_irr[Well_capacity>0,.(Well_capacity, SOIL_ID, tot_acres, quarter, CR, PAW, mean_irr_PAW, profit_quarter)]
data.table::setkey(lookup_table_quarter, Well_capacity, quarter)
lookup_table_well = unique(foo_irr, by=c("Well_ID", "tot_acres"))
lookup_table_well= lookup_table_well[Well_capacity>0,.(Well_capacity, SOIL_ID, tot_acres, irr_tot_acres, profit_Well_ID)]
data.table::setkey(lookup_table_well, Well_capacity)
baz = lookup_table_quarter[,.(Well_capacity, tot_acres, quarter, CR, PAW, id=1)]
data.table::setkey(baz, Well_capacity, tot_acres, quarter, CR, PAW)
data.table::setkey(foo_irr_2, Well_capacity, tot_acres, quarter, CR, PAW)
baz = foo_irr_2[baz]
lookup_table_all_years = baz[,.(Well_ID, Well_capacity, SOIL_ID, tot_acres, IFREQ, CR, quarter, PAW, SDAT, PRCP, PRCM, yield_kg_ac, irrigation, profit)]
lookup_table_all_years_2 = rbind(lookup_table_all_years_2, lookup_table_all_years) # these three lines are merging outputs of each i in the loop, i.e., this is the final output for all soil types.
lookup_table_quarter_2 = rbind(lookup_table_quarter_2, lookup_table_quarter) # these three lines are merging outputs of each i in the loop, i.e., this is the final output for all soil types.
lookup_table_well_2 = rbind(lookup_table_well_2, lookup_table_well) # these three lines are merging outputs of each i in the loop, i.e., this is the final output for all soil types.
print(i) # so that we know the loop is working
} # end of loop
data.table::setkey(lookup_table_all_years_2, SOIL_ID, Well_capacity, SDAT)
data.table::setkey(lookup_table_quarter_2, SOIL_ID, Well_capacity, quarter)
data.table::setkey(lookup_table_well_2, SOIL_ID, Well_capacity)
write.csv(lookup_table_well_2, "lookup_table_well_2.csv") # save the data.
saveRDS(lookup_table_quarter_2, "lookup_table_quarter_2.rds") # save the data.
saveRDS(lookup_table_all_years_2, "lookup_table_all_years_2.rds") # save the data.
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.