Set Up

library(dplyr)
library(lubridate)
library(tidyr)
library(ggplot2)
library(gridExtra)
library(weathergen)

theme_set(theme_bw())
data(climate_cities)
clim.da <- climate_cities[['boston']]
clim.da$TEMP <- (clim.da$TMIN+clim.da$TMAX)/2
head(clim.da)

head(clim.da)
clim.mon <- group_by(clim.da, DATE=floor_date(DATE, 'month')) %>%
  summarise(PRCP=sum(PRCP),
            TMAX=mean(TMAX),
            TMIN=mean(TMIN),
            TEMP=mean(TEMP))
clim.wyr <- group_by(clim.da, WYEAR=wyear(DATE)) %>%
  summarise(N=n(),
            PRCP=sum(PRCP),
            TMAX=mean(TMAX),
            TMIN=mean(TMIN),
            TEMP=mean(TEMP))

complete_years <- clim.wyr$WYEAR[which(clim.wyr$N>=365)]
clim.da <- filter(clim.da, wyear(DATE) %in% complete_years)
clim.mon <- filter(clim.mon, wyear(DATE) %in% complete_years)
clim.wyr <- filter(clim.wyr, WYEAR %in% complete_years)
gather(clim.mon, VAR, VALUE, PRCP:TEMP) %>%
ggplot(aes(DATE, VALUE)) +
  geom_line() +
  labs(x='', y='') +
  facet_wrap(~VAR, scales='free_y', ncol=1)
gather(clim.wyr, VAR, VALUE, PRCP:TEMP) %>%
ggplot(aes(WYEAR, VALUE)) +
  geom_line() +
  labs(x='', y='') +
  facet_wrap(~VAR, scales='free_y', ncol=1)

Annual Simulation

wgen_wave <- wavelet_analysis(clim.wyr$PRCP, 
                              years = clim.wyr$WYEAR,
                              sig.level=0.90, noise.type='white')
plot(wgen_wave$gws, wgen_wave$period, type="b",
     xlab="Global Wavelet Spectrum", ylab="Fourier Period (Years)",
     log="y", ylim=rev(range(wgen_wave$period)), 
     xlim=range(c(wgen_wave$gws, wgen_wave$gws.sig$signif)))
lines(wgen_wave$gws.sig$signif, wgen_wave$period, lty=2, col='red') 

signif.periods.idx <- which(wgen_wave$gws > wgen_wave$gws.sig$signif)
signif.periods <- wgen_wave$period[signif.periods.idx]
print(signif.periods)

wgen_wave.comp <- wavelet_components(x=clim.wyr$PRCP, 
                                     wt=wgen_wave, 
                                     n.periods=2, 
                                     sig.periods=signif.periods.idx, 
                                     n.comp.periods=c(2, 2))
library(forecast)
wgen_models <- arima_fit_components(components=wgen_wave.comp)
par(mfrow=c(3,1))
plot(forecast(wgen_models[["NOISE"]], h=nrow(clim.wyr)))
plot(forecast(wgen_models[["COMPONENT 1"]], h=nrow(clim.wyr)))
plot(forecast(wgen_models[["COMPONENT 2"]], h=nrow(clim.wyr)))
par(mfrow=c(1,1))
NUM_TRIALS <- 1
num_year_sim <- 20
water_yr_change <- TRUE
month_list <- c(10:12, 1:9)
water_year_start <- min(clim.wyr$WYEAR)
water_year_end <- max(clim.wyr$WYEAR)
wind_included <- FALSE
annual_adjust <- TRUE
max_adjust <- 0.1
Markov_Adjust <- TRUE
transform_data <- FALSE
wgen_sim.warm <- arimas_simulate(models=wgen_models, n=num_year_sim)
plot(wgen_sim.warm$sum, type='l')
mean(wgen_sim.warm$sum)
mean(clim.wyr$PRCP)
sd(wgen_sim.warm$sum)
sd(clim.wyr$PRCP)

Adjustment Factors

factors <- list(
  wet_spell=rep(1, 12),
  dry_spell=rep(1, 12),
  prcp_mean=rep(1, 12),
  prcp_cv=rep(1, 12),
  temp_mean=rep(0, 12)
)
START_WYEAR_SIM <- 2001
k1 <- 1

sim.wyr <- data.frame(WYEAR=seq(START_WYEAR_SIM, length.out=num_year_sim),
                      PRCP=wgen_sim.warm$sum)

clim.da <- mutate(clim.da,
                  WYEAR=wyear(DATE),
                  YEAR=year(DATE),
                  MONTH=month(DATE),
                  DAY=day(DATE)) %>%
  select(DATE, WYEAR, YEAR, MONTH, DAY, PRCP, TMIN, TMAX, TEMP)

clim.mon <- mutate(clim.mon,
                  WYEAR=wyear(DATE),
                  YEAR=year(DATE),
                  MONTH=month(DATE),
                  DAY=day(DATE)) %>%
  select(DATE, WYEAR, YEAR, MONTH, DAY, PRCP, TMIN, TMAX, TEMP)

Daily Simulation

sim.da <- data.frame(DATE=seq(as.Date(paste(min(sim.wyr$WYEAR)-1, '-10-01', sep='')), 
                           as.Date(paste(max(sim.wyr$WYEAR), '-09-30', sep='')),
                           by='day')) %>%
  mutate(MONTH=month(DATE),
         WYEAR=wyear(DATE),
         JDAY=yday(DATE),
         WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1)
         WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY),
         STATE=NA,
         TRANSITION=NA,
         PRCP=NA,
         TEMP=NA,
         TMIN=NA,
         TMAX=NA,
         DATE_SAMPLED=NA)
sim.da[1, 'STATE'] <- 0
sim.da[1, 'PRCP'] <- 0
sim.da[1, 'TEMP'] <- 25
sim.da[1, 'TMAX'] <- 30
sim.da[1, 'TMIN'] <- 20

Single Year Simulation

i_wyr <- 1
kk <- max(round(sqrt(length(clim.wyr$PRCP)),0),round(length(clim.wyr$PRCP)*0.5,0))

For the first simulation year the simulated annual precipitation is r format(sim.wyr[i_wyr, 'PRCP'], digits=4) mm. Using this annual precipitation amount, find the r kk nearest-neighbors based on the observed annual precipitation. Perform a weighted sampling (IDW) on these nearest neighbors by randomly choosing 100 years with replacement.

KNN_ANNUAL <- function(sim_annual_prcp,ANNUAL_PRCP,WATER_YEAR_A,k1,y) {
  # returns array of years (length of 100) by weighted sampling of kk nearest neighbors
  kk <- max(round(sqrt(length(ANNUAL_PRCP)),0),round(length(ANNUAL_PRCP)*0.5,0))
  var_order <- 1:length(ANNUAL_PRCP)
    distance <- sqrt((sim_annual_prcp - ANNUAL_PRCP)^2)
    ordered_distances <- matrix(cbind(var_order,distance)[order(distance),],ncol=2)
    K_Distances <- matrix(ordered_distances[1:kk,],ncol=2)
    PROBS <- (1/row(K_Distances)[,1]) / sum((1/row(K_Distances)[,1]))
    set.seed(k1*y)
    selection <- sample(row(K_Distances)[,1],size=100,prob=PROBS,replace=TRUE)
    FINAL_YEARS <- WATER_YEAR_A[K_Distances[selection,1]]
}

CUR_YEARS <- KNN_ANNUAL(sim.wyr[i_wyr, 'PRCP'], clim.wyr$PRCP, clim.wyr$WYEAR, k1, i_wyr)
CUR_YEARS

This figure shows the annual precipitation for each water year, with the r kk nearest neighbors highlighted in red. The horizontal red line is the simulated annual precip for the current year (r format(sim.wyr[i_wyr, 'PRCP'], digits=4) mm).

clim.wyr %>%
  ggplot(aes(WYEAR, PRCP, color=WYEAR %in% unique(CUR_YEARS))) +
  geom_point() +
  geom_hline(yintercept=sim.wyr[i_wyr, 'PRCP'], linetype=2, color='red') +
  geom_text(aes(label=WYEAR), hjust=-0.1, vjust=0.5, size=4) +
  scale_color_manual('', values=c('TRUE'='orangered', 'FALSE'='grey50'), guide=FALSE) +
  labs(x="Water Year", y="Annual Precip (mm/yr)")

From these r kk nearest neighbors, the random sample of length 100 years is:

data.frame(INDEX=seq_along(CUR_YEARS), CUR_YEARS=CUR_YEARS) %>%
  ggplot(aes(INDEX, CUR_YEARS)) +
  geom_point() +
  geom_line() +
  labs(x="Sampled Index", y="Sampled Year")

Using this sequence of 100 years, create a continuous timeseries of the observed daily climate variables.

conditional_selection <- NULL
for (yy in 1:length(CUR_YEARS)) {
    conditional_selection <- c(conditional_selection, which(clim.da$WYEAR==CUR_YEARS[yy]))
}

current <- data.frame(DATE=clim.da$DATE[conditional_selection]) %>%
  left_join(clim.da)
mutate(current, INDEX=row_number()) %>%
  ggplot(aes(INDEX, PRCP, color=factor(WYEAR))) +
  geom_point() +
  labs(x="Sampled Day Index", y="Daily Precip (mm)") +
  guides(color=guide_legend(title='Water Year', ncol=2))

State Thresholds

Using this time series, define the dry/wet state thresholds. There are three states:

# get 80th percentile of daily precip > 0.3 for each month
thresh1 <- 0.3*25.4
thresh2_quantile <- 0.8

# Jeff's method
thresholds <- mutate(current, MONTH=ordered(MONTH, levels=month_list)) %>%
  filter(PRCP>thresh1) %>%
  group_by(MONTH) %>%
  summarise(THRESH_2=quantile(PRCP, thresh2_quantile),
            THRESH_MAX=max(PRCP)) %>%
  mutate(THRESH_1=thresh1) %>%
  arrange(MONTH) %>%
  select(MONTH, THRESH_1, THRESH_2, THRESH_MAX)
mutate(thresholds, THRESH_21=THRESH_2-THRESH_1, THRESH_MAX2=THRESH_MAX-THRESH_2) %>%
  select(-THRESH_2, -THRESH_MAX) %>%
  gather(VAR, VALUE, THRESH_1, THRESH_21, THRESH_MAX2) %>%
  ggplot(aes(MONTH, VALUE, fill=VAR)) +
  geom_bar(stat='identity', position='stack') +
  scale_fill_discrete('Thresholds', labels=c('THRESH_1'='Dry', 'THRESH_21'='Wet', 'THRESH_MAX2'='Extreme')) +
  labs(x='Month', y='Precipitaion Threshold (mm/day)')

Current States

Merge the precipitation timeseries with the state thresholds, and add julian/water days.

current <- current %>%
  left_join(select(thresholds, MONTH, THRESH_1, THRESH_2) %>% 
            mutate(MONTH=as.numeric(as.character(MONTH)))) %>%
  mutate(JDAY=yday(DATE),
         WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1)
         WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY)) %>%
  select(DATE, MONTH, JDAY, WDAY, PRCP, TEMP, TMAX, TMIN, THRESH_1, THRESH_2)

Determine state for precip and lagged precip, and transition. Note that Scott's method applies the current month threshold to the next month (e.g. if the Date is 6/30, the June threshold is applied to the July 1 precip to determine next state). I'll use this same method for now to maintain consistency.

current <- mutate(current,
                  STATE=ifelse(PRCP<=THRESH_1, 0,
                               ifelse(PRCP<=THRESH_2, 1, 2)),
                  STATE_NEXT=lead(STATE), # uses correct threshold
     #              STATE_NEXT=ifelse(lead(PRCP)<=THRESH_1, 0,
     #                                ifelse(lead(PRCP)<=THRESH_2, 1, 2)), # uses scott's threshold
                  TRANSITION=ifelse(is.na(STATE_NEXT), NA, paste(STATE, STATE_NEXT, sep='')))
head(current)

This figure shows the precipitation for the first year with the symbols colored by state. The dashed lines shows the two thresholds for each month.

# head(current, 365) %>%
current %>%
  ggplot(aes(WDAY, PRCP, color=factor(STATE))) +
  geom_point() +
  geom_line(aes(y=THRESH_1), color='black', linetype=2) +
  geom_line(aes(y=THRESH_2), color='black', linetype=2) +
  scale_color_discrete('State', labels=c('0'='Dry', '1'='Wet', '2'='Extreme')) +
  labs(x='Julian Day (1=Oct 1)', y='Daily Precip (mm)')
current.mon <- select(current, MONTH, PRCP, TEMP) %>%
  gather(VAR, VALUE, PRCP, TEMP) %>%
  group_by(VAR, MONTH) %>%
  summarise(MEAN=mean(VALUE),
            SD=sd(VALUE)) %>%
  ungroup

current.mon <- select(current, MONTH, PRCP, TEMP) %>%
  gather(VAR, VALUE, PRCP, TEMP) %>%
  plyr::dlply(c("VAR"), function(x) { 
    plyr::dlply(x, c("MONTH"), function(x) {
      c(MEAN=mean(x$VALUE), SD=sd(x$VALUE)) 
    })
  })

current.mon[["PRCP"]][[1]]

Transition Matrix

This figure shows the frequency of each transition state by month.

filter(current, !is.na(TRANSITION)) %>%
  mutate(MONTH=ordered(MONTH, levels=month_list)) %>%
  ggplot(aes(MONTH, fill=factor(TRANSITION))) +
  geom_bar(position='fill') +
  labs(x="Month", y="Fraction") +
  scale_fill_discrete('Transition')
GET_PI <- function(m) {
  P <- rbind(m, rep(1, 3))
  P[1,1] <- P[1,1] - 1
  P[2,2] <- P[2,2] - 1
    P[3,3] <- P[3,3] - 1
    B <- c(0,0,0,1)
    PI <- solve(P[c(1,2,4),],B[c(1,2,4)])
    return(PI)
}
mc_p <- lapply(month_list, function(m) {
  x <- filter(current, MONTH==m)
  with(x, prop.table(table(STATE_NEXT, STATE), 2))    
})
names(mc_p) <- as.character(month_list)
lapply(names(mc_p), function(m) {
  return(data.frame(mc_p[[m]]) %>% mutate(MONTH=m))
}) %>%
  do.call(rbind, .) %>%
  mutate(MONTH=ordered(MONTH, levels=month_list),
         FreqString=format(Freq, digits=1)) %>%
  ggplot(aes(STATE, STATE_NEXT, fill=Freq)) +
  geom_tile() +
  geom_text(aes(label=FreqString), size=3) +
  scale_fill_continuous(low='#ffeda0', high='#f03b20', space='Lab', limits=c(0,1)) +
  scale_x_discrete(expand=c(0,0)) +
  scale_y_discrete(expand=c(0,0)) +
  labs(x="From State", y="To State") + 
  facet_wrap(~MONTH)

Adjust Dry/Wet Spells

Now, we can adjust the transition matrix using the dry/wet spell changes

adjust_spell <- function(p, dry_change=1, wet_change=1) {
  pnew <- p

  # adjust dry spell
  p01 <- (p['1', '0'] + p['2', '0'])/dry_change - p['2', '0']
  p00 <- p['0', '0'] + (p['1', '0'] - p01)
  pnew['1','0'] <- p01
  pnew['0','0'] <- p00

  #adjust wet spell
  p10 <- (p['0', '1'] + p['2', '1'])/wet_change - p['2', '1']
  p11 <- p['1', '1'] + (p['0', '1'] - p10)
  pnew['0','1'] <- p10
  pnew['1','1'] <- p11

  pnew
}
p_june <- mc_p[['6']]
p_june_new <- adjust_spell(p_june, dry=2, wet=1)

p1 <- data.frame(p_june) %>%
  mutate(FreqString=format(Freq, digits=1)) %>%
  ggplot(aes(STATE, STATE_NEXT, fill=Freq)) +
  geom_tile() +
  geom_text(aes(label=FreqString), size=3) +
  scale_fill_continuous(low='#ffeda0', high='#f03b20', space='Lab', limits=c(0,1)) +
  scale_x_discrete(expand=c(0,0)) +
  scale_y_discrete(expand=c(0,0)) +
  labs(x="From State", y="To State") +
  ggtitle('Original June Matrix')  
p2 <- data.frame(p_june_new) %>%
  mutate(FreqString=format(Freq, digits=1)) %>%
  ggplot(aes(STATE, STATE_NEXT, fill=Freq)) +
  geom_tile() +
  geom_text(aes(label=FreqString), size=3) +
  scale_fill_continuous(low='#ffeda0', high='#f03b20', space='Lab', limits=c(0,1)) +
  scale_x_discrete(expand=c(0,0)) +
  scale_y_discrete(expand=c(0,0)) +
  labs(x="From State", y="To State")   +
  ggtitle('Adjusted June Matrix')  

grid.arrange(grobs=list(p1, p2), top="\nChange in Transition Matrix with dry=2", ncol=2)
p_june <- mc_p[['6']]
p_june_new <- adjust_spell(p_june, dry=1, wet=1.5)

p1 <- data.frame(p_june) %>%
  mutate(FreqString=format(Freq, digits=1)) %>%
  ggplot(aes(STATE, STATE_NEXT, fill=Freq)) +
  geom_tile() +
  geom_text(aes(label=FreqString), size=3) +
  scale_fill_continuous(low='#ffeda0', high='#f03b20', space='Lab', limits=c(0,1)) +
  scale_x_discrete(expand=c(0,0)) +
  scale_y_discrete(expand=c(0,0)) +
  labs(x="From State", y="To State") +
  ggtitle('Original June Matrix')
p2 <- data.frame(p_june_new) %>%
  mutate(FreqString=format(Freq, digits=1)) %>%
  ggplot(aes(STATE, STATE_NEXT, fill=Freq)) +
  geom_tile() +
  geom_text(aes(label=FreqString), size=3) +
  scale_fill_continuous(low='#ffeda0', high='#f03b20', space='Lab', limits=c(0,1)) +
  scale_x_discrete(expand=c(0,0)) +
  scale_y_discrete(expand=c(0,0)) +
  labs(x="From State", y="To State") +
  ggtitle('Adjusted June Matrix')  

grid.arrange(grobs=list(p1, p2),
             top="\nChange in Transition Matrix with wet=1.5", ncol=2)

Markov Chain Simulation

For each day of the current year, first generate a markov state

set.seed(k1)
next_state <- function(prev_state, p) {
  # given previous state and transition probability matrix p
  # simulate next state
  pp1 <- p['0', as.character(prev_state)]
  pp2 <- p['1', as.character(prev_state)] + pp1
  rn <- runif(1)
  if (rn < pp1) {
    next_state <- 0
  } else if (rn >= pp1 & rn < pp2) {
    next_state <-1
  } else {
    next_state <- 2
  }
  next_state
}
next_state(prev_state=0, p=mc_p[['10']])

Simulate All States

for (i in 2:(nrow(filter(sim.da, WYEAR==min(WYEAR)))+1)) {
  sim.da$STATE[i] <- next_state(sim.da$STATE[i-1], mc_p[[as.character(sim.da$MONTH[i-1])]])
  sim.da$TRANSITION[i-1] <- paste0(sim.da$STATE[i-1], sim.da$STATE[i])
}
prop.table(table(MONTH=sim.da$MONTH, STATE=sim.da$STATE), 1) %>% 
  data.frame %>% 
  ggplot(aes(MONTH, Freq, fill=STATE)) + 
  geom_bar(stat='identity')
prop.table(table(MONTH=current$MONTH, STATE=current$STATE), 1) %>% 
  data.frame %>% 
  ggplot(aes(MONTH, Freq, fill=STATE)) + 
  geom_bar(stat='identity')
filter(sim.da, !is.na(TRANSITION)) %>% 
  ggplot(aes(DATE, STATE)) +
  geom_point()
filter(sim.da, !is.na(STATE)) %>% 
  mutate(STATE_NEXT=lead(STATE)) %>%
  select(STATE, STATE_NEXT) %>%
  table %>%
  prop.table(2)

KNN of Daily Climate

wday_range <- function(j, n=7) {
  if (n %% 2 == 1) {
    l <- (n-1)/2
    u <- (n-1)/2
  } else {
    l <- n/2
    u <- n/2 - 1
  }
  rng <- seq(j-l, j+u)
  rng <- ifelse(rng<=0, 365+rng, rng)
  rng <- ifelse(rng>365, rng-365, rng)
  rng
}

i <- 2
j <- 1

current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1] & WDAY %in% wday_range(j))) %>%
  filter(!is.na(SELECTED))

mutate(current.sel, INDEX=row_number()) %>%
  arrange(SELECTED) %>%
#   filter(INDEX<=365) %>%
  ggplot(aes(INDEX, PRCP, color=SELECTED)) +
    geom_point()
stopifnot(sum(current.sel$SELECTED)>0)

KNN Resampling

# possible_days <- cur_day[cur_day_cur_state]
PRCP_TODAY <- (filter(current.sel, SELECTED))$PRCP
TEMP_TODAY <- (filter(current.sel, SELECTED))$TEMP
PRCP_TOMORROW <- (mutate(current.sel, PRCP=lead(PRCP)) %>% filter(SELECTED))$PRCP
TEMP_TOMORROW <- (mutate(current.sel, TEMP=lead(TEMP)) %>% filter(SELECTED))$TEMP
TMAX_TOMORROW <- (mutate(current.sel, TMAX=lead(TMAX)) %>% filter(SELECTED))$TMAX
TMIN_TOMORROW <- (mutate(current.sel, TMIN=lead(TMIN)) %>% filter(SELECTED))$TMIN
DATE_TOMORROW <- (mutate(current.sel, DATE=lead(DATE)) %>% filter(SELECTED))$DATE

cur_sim_PRCP <- sim.da$PRCP[i-1]
cur_sim_TEMP <- sim.da$TEMP[i-1]
KNN <- function(cur_sim_PRCP,cur_sim_TEMP,PRCP_TODAY,TEMP_TODAY,PRCP_TOMORROW,TEMP_TOMORROW,TMAX_TOMORROW,TMIN_TOMORROW,DATE_TOMORROW,k,sd_monthly_PRCP,sd_monthly_TEMP,mean_monthly_PRCP,mean_monthly_TEMP,k1,count) {
  w_PRCP <- 100/sd_monthly_PRCP
  w_TEMP <- 10/sd_monthly_TEMP
    var_order <- 1:length(PRCP_TODAY)
    distance <- sqrt(w_PRCP*((cur_sim_PRCP-mean_monthly_PRCP) - (PRCP_TODAY-mean_monthly_PRCP))^2 + w_TEMP*((cur_sim_TEMP-mean_monthly_TEMP) - (TEMP_TODAY-mean_monthly_TEMP))^2)
  ordered_distances <- matrix(cbind(var_order,distance)[order(distance),],ncol=2)
    K_Distances <- matrix(ordered_distances[1:k,],ncol=2)
    PROBS <- (1/row(K_Distances)[,1]) / sum((1/row(K_Distances)[,1]))
    set.seed(k1*count)
    selection <- sample(row(K_Distances)[,1],size=1,prob=PROBS,replace=TRUE)
    FINAL_PRCP <- PRCP_TOMORROW[K_Distances[selection,1]]
    FINAL_TEMP <- TEMP_TOMORROW[K_Distances[selection,1]]
    FINAL_TMAX <- TMAX_TOMORROW[K_Distances[selection,1]]
    FINAL_TMIN <- TMIN_TOMORROW[K_Distances[selection,1]]
    FINAL_DATE <- DATE_TOMORROW[K_Distances[selection,1]]
    return(c(FINAL_PRCP,FINAL_TEMP,FINAL_TMAX,FINAL_TMIN,FINAL_DATE))
}
k <- round(sqrt(length(PRCP_TODAY)))
RESULT <- KNN(cur_sim_PRCP,cur_sim_TEMP,
              PRCP_TODAY,TEMP_TODAY,
              PRCP_TOMORROW,TEMP_TOMORROW,TMAX_TOMORROW,TMIN_TOMORROW,DATE_TOMORROW,
              k,
              current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['SD']],
              current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['SD']],
              current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['MEAN']],
              current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['MEAN']],
              k1,i)

sim.da$PRCP[i] <- RESULT[1]
sim.da$TEMP[i] <- RESULT[2]
sim.da$TMAX[i] <- RESULT[3]
sim.da$TMIN[i] <- RESULT[4]
sim.da$DATE_SAMPLED[i] <- RESULT[5]

Repeat this for all days in the year

for (i in 2:(nrow(filter(sim.da, WYEAR==min(WYEAR)))+1)) {
  j <- sim.da$WDAY[i]

  current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1] & WDAY %in% wday_range(j))) %>%
    filter(!is.na(SELECTED))

  if (sum(current.sel$SELECTED)==0) {
  current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1] & WDAY %in% wday_range(j, n=61))) %>%
    filter(!is.na(SELECTED))    
  }


  PRCP_TODAY <- (filter(current.sel, SELECTED))$PRCP
  TEMP_TODAY <- (filter(current.sel, SELECTED))$TEMP
  PRCP_TOMORROW <- (mutate(current.sel, PRCP=lead(PRCP)) %>% filter(SELECTED))$PRCP
  TEMP_TOMORROW <- (mutate(current.sel, TEMP=lead(TEMP)) %>% filter(SELECTED))$TEMP
  TMAX_TOMORROW <- (mutate(current.sel, TMAX=lead(TMAX)) %>% filter(SELECTED))$TMAX
  TMIN_TOMORROW <- (mutate(current.sel, TMIN=lead(TMIN)) %>% filter(SELECTED))$TMIN
  DATE_TOMORROW <- (mutate(current.sel, DATE=lead(DATE)) %>% filter(SELECTED))$DATE

  cur_sim_PRCP <- sim.da$PRCP[i-1]
  cur_sim_TEMP <- sim.da$TEMP[i-1]

  k <- round(sqrt(length(PRCP_TODAY)))
  RESULT <- KNN(cur_sim_PRCP,cur_sim_TEMP,
                PRCP_TODAY,TEMP_TODAY,
                PRCP_TOMORROW,TEMP_TOMORROW,TMAX_TOMORROW,TMIN_TOMORROW,DATE_TOMORROW,
                k,
                current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['SD']],
                current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['SD']],
                current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['MEAN']],
                current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['MEAN']],
                k1,i)

  sim.da$PRCP[i] <- RESULT[1]
  sim.da$TEMP[i] <- RESULT[2]
  sim.da$TMAX[i] <- RESULT[3]
  sim.da$TMIN[i] <- RESULT[4]
  sim.da$DATE_SAMPLED[i] <- RESULT[5]
}
sim.da$DATE_SAMPLED <- as.Date(sim.da$DATE_SAMPLED)

Complete Simulation

sim.da <- data.frame(DATE=seq(as.Date(paste(min(sim.wyr$WYEAR)-1, '-10-01', sep='')), 
                           as.Date(paste(max(sim.wyr$WYEAR), '-09-30', sep='')),
                           by='day')) %>%
  mutate(MONTH=month(DATE),
         WYEAR=wyear(DATE),
         JDAY=yday(DATE),
         WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1)
         WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY),
         STATE=NA,
         TRANSITION=NA,
         PRCP=NA,
         TEMP=NA,
         TMIN=NA,
         TMAX=NA,
         DATE_SAMPLED=NA)
sim.da[1, 'STATE'] <- 0
sim.da[1, 'PRCP'] <- 0
sim.da[1, 'TEMP'] <- 25
sim.da[1, 'TMAX'] <- 30
sim.da[1, 'TMIN'] <- 20

i <- 1
for (i_wyr in seq_along(sim.wyr$WYEAR)) {
  # KNN of Annual Precip
  CUR_YEARS <- KNN_ANNUAL(sim.wyr[i_wyr, 'PRCP'], clim.wyr$PRCP, clim.wyr$WYEAR, k1, i_wyr)

  # Create current daily population
  conditional_selection <- NULL
  for (yy in 1:length(CUR_YEARS)) {
    conditional_selection <- c(conditional_selection, which(clim.da$WYEAR==CUR_YEARS[yy]))
  }

  current <- data.frame(DATE=clim.da$DATE[conditional_selection]) %>%
    left_join(clim.da)

  # Get thresholds
  thresh1 <- 0.3
  thresh2_quantile <- 0.8

  thresholds <- mutate(current, MONTH=ordered(MONTH, levels=month_list)) %>%
    filter(PRCP>thresh1) %>%
    group_by(MONTH) %>%
    summarise(THRESH_2=quantile(PRCP, thresh2_quantile),
              THRESH_MAX=max(PRCP)) %>%
    mutate(THRESH_1=thresh1) %>%
    arrange(MONTH) %>%
    select(MONTH, THRESH_1, THRESH_2, THRESH_MAX)

  # Merge current with thresholds
  current <- current %>%
    left_join(select(thresholds, MONTH, THRESH_1, THRESH_2) %>% 
              mutate(MONTH=as.numeric(as.character(MONTH)))) %>%
    mutate(JDAY=yday(DATE),
           WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1)
           WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY)) %>%
    select(DATE, MONTH, JDAY, WDAY, PRCP, TEMP, TMAX, TMIN, THRESH_1, THRESH_2) %>%
    mutate(STATE=ifelse(PRCP<=THRESH_1, 0,
                        ifelse(PRCP<=THRESH_2, 1, 2)),
           STATE_NEXT=lead(STATE),
           TRANSITION=ifelse(is.na(STATE_NEXT), NA, paste(STATE, STATE_NEXT, sep='')))

  current.mon <- select(current, MONTH, PRCP, TEMP) %>%
    gather(VAR, VALUE, PRCP, TEMP) %>%
    plyr::dlply(c("VAR"), function(x) { 
      plyr::dlply(x, c("MONTH"), function(x) {
        c(MEAN=mean(x$VALUE), SD=sd(x$VALUE)) 
      })
    })

  for (j in 1:(nrow(filter(sim.da, WYEAR==sim.wyr$WYEAR[i_wyr])))) {
    i <- i+1
    if (i > nrow(sim.da)) {
      break
    }
#     print(paste(i_wyr, i, sim.da$WYEAR[i], sim.da$WDAY[i]))

    # Simulate state transition
    sim.da$STATE[i] <- next_state(sim.da$STATE[i-1], mc_p[[as.character(sim.da$MONTH[i-1])]])
    sim.da$TRANSITION[i-1] <- paste0(sim.da$STATE[i-1], sim.da$STATE[i])

#     print(paste(sim.da$DATE[i], j, sim.da$WDAY[i]))

    current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1] & WDAY %in% wday_range(sim.da$WDAY[i]))) %>%
      filter(!is.na(SELECTED))

    if (sum(current.sel$SELECTED)==0) {
      current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1] & WDAY %in% wday_range(sim.da$WDAY[i], n=61))) %>%
        filter(!is.na(SELECTED))    
    }
    if (sum(current.sel$SELECTED)==0) {
      current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1])) %>%
        filter(!is.na(SELECTED))    
    }

    PRCP_TODAY <- (filter(current.sel, SELECTED))$PRCP
    TEMP_TODAY <- (filter(current.sel, SELECTED))$TEMP
    PRCP_TOMORROW <- (mutate(current.sel, PRCP=lead(PRCP)) %>% filter(SELECTED))$PRCP
    TEMP_TOMORROW <- (mutate(current.sel, TEMP=lead(TEMP)) %>% filter(SELECTED))$TEMP
    TMAX_TOMORROW <- (mutate(current.sel, TMAX=lead(TMAX)) %>% filter(SELECTED))$TMAX
    TMIN_TOMORROW <- (mutate(current.sel, TMIN=lead(TMIN)) %>% filter(SELECTED))$TMIN
    DATE_TOMORROW <- (mutate(current.sel, DATE=lead(DATE)) %>% filter(SELECTED))$DATE

    cur_sim_PRCP <- sim.da$PRCP[i-1]
    cur_sim_TEMP <- sim.da$TEMP[i-1]

    k <- round(sqrt(length(PRCP_TODAY)))
    RESULT <- KNN(cur_sim_PRCP,cur_sim_TEMP,
                  PRCP_TODAY,TEMP_TODAY,
                  PRCP_TOMORROW,TEMP_TOMORROW,TMAX_TOMORROW,TMIN_TOMORROW,DATE_TOMORROW,
                  k,
                  current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['SD']],
                  current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['SD']],
                  current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['MEAN']],
                  current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['MEAN']],
                  k1,i)

    sim.da$PRCP[i] <- RESULT[1]
    sim.da$TEMP[i] <- RESULT[2]
    sim.da$TMAX[i] <- RESULT[3]
    sim.da$TMIN[i] <- RESULT[4]
    sim.da$DATE_SAMPLED[i] <- RESULT[5]
  }
}
sim.da$DATE_SAMPLED <- as.Date(sim.da$DATE_SAMPLED)
p1 <- ggplot(sim.da, aes(WDAY, TEMP)) + 
  geom_line(aes(group=WYEAR), alpha=0.2) +
  stat_summary(fun.y='mean', geom="line", color='cyan') +
  ggtitle('Simulated')
p2 <- mutate(clim.da,
             JDAY=yday(DATE),
             WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1)
             WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY)) %>%
  ggplot(aes(WDAY, TEMP)) + 
  geom_line(aes(group=WYEAR), alpha=0.2) +
  stat_summary(fun.y='mean', geom="line", color='cyan') +
  ggtitle('Historical')
grid.arrange(p1, p2, ncol=1)
p1 <- ggplot(sim.da, aes(WDAY, PRCP)) + 
  geom_line(aes(group=WYEAR), alpha=0.2) +
  stat_summary(fun.y='mean', geom="line", color='cyan') +
  ggtitle('Simulated')
p2 <- mutate(clim.da,
             JDAY=yday(DATE),
             WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1)
             WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY)) %>%
  ggplot(aes(WDAY, PRCP)) + 
  geom_line(aes(group=WYEAR), alpha=0.2) +
  stat_summary(fun.y='mean', geom="line", color='cyan') +
  ggtitle('Historical')
grid.arrange(p1, p2, ncol=1)


HydrosystemsGroup/Weather-Generator documentation built on May 8, 2019, 8:34 a.m.