library(dplyr) library(lubridate) library(tidyr) library(ggplot2) library(gridExtra) library(weathergen) theme_set(theme_bw())
data(climate_cities) clim.da <- climate_cities[['boston']] clim.da$TEMP <- (clim.da$TMIN+clim.da$TMAX)/2 head(clim.da) head(clim.da)
clim.mon <- group_by(clim.da, DATE=floor_date(DATE, 'month')) %>% summarise(PRCP=sum(PRCP), TMAX=mean(TMAX), TMIN=mean(TMIN), TEMP=mean(TEMP)) clim.wyr <- group_by(clim.da, WYEAR=wyear(DATE)) %>% summarise(N=n(), PRCP=sum(PRCP), TMAX=mean(TMAX), TMIN=mean(TMIN), TEMP=mean(TEMP)) complete_years <- clim.wyr$WYEAR[which(clim.wyr$N>=365)] clim.da <- filter(clim.da, wyear(DATE) %in% complete_years) clim.mon <- filter(clim.mon, wyear(DATE) %in% complete_years) clim.wyr <- filter(clim.wyr, WYEAR %in% complete_years)
gather(clim.mon, VAR, VALUE, PRCP:TEMP) %>% ggplot(aes(DATE, VALUE)) + geom_line() + labs(x='', y='') + facet_wrap(~VAR, scales='free_y', ncol=1)
gather(clim.wyr, VAR, VALUE, PRCP:TEMP) %>% ggplot(aes(WYEAR, VALUE)) + geom_line() + labs(x='', y='') + facet_wrap(~VAR, scales='free_y', ncol=1)
wgen_wave <- wavelet_analysis(clim.wyr$PRCP, years = clim.wyr$WYEAR, sig.level=0.90, noise.type='white') plot(wgen_wave$gws, wgen_wave$period, type="b", xlab="Global Wavelet Spectrum", ylab="Fourier Period (Years)", log="y", ylim=rev(range(wgen_wave$period)), xlim=range(c(wgen_wave$gws, wgen_wave$gws.sig$signif))) lines(wgen_wave$gws.sig$signif, wgen_wave$period, lty=2, col='red') signif.periods.idx <- which(wgen_wave$gws > wgen_wave$gws.sig$signif) signif.periods <- wgen_wave$period[signif.periods.idx] print(signif.periods) wgen_wave.comp <- wavelet_components(x=clim.wyr$PRCP, wt=wgen_wave, n.periods=2, sig.periods=signif.periods.idx, n.comp.periods=c(2, 2))
library(forecast) wgen_models <- arima_fit_components(components=wgen_wave.comp) par(mfrow=c(3,1)) plot(forecast(wgen_models[["NOISE"]], h=nrow(clim.wyr))) plot(forecast(wgen_models[["COMPONENT 1"]], h=nrow(clim.wyr))) plot(forecast(wgen_models[["COMPONENT 2"]], h=nrow(clim.wyr))) par(mfrow=c(1,1))
NUM_TRIALS <- 1 num_year_sim <- 20 water_yr_change <- TRUE month_list <- c(10:12, 1:9) water_year_start <- min(clim.wyr$WYEAR) water_year_end <- max(clim.wyr$WYEAR) wind_included <- FALSE annual_adjust <- TRUE max_adjust <- 0.1 Markov_Adjust <- TRUE transform_data <- FALSE
wgen_sim.warm <- arimas_simulate(models=wgen_models, n=num_year_sim) plot(wgen_sim.warm$sum, type='l')
mean(wgen_sim.warm$sum) mean(clim.wyr$PRCP) sd(wgen_sim.warm$sum) sd(clim.wyr$PRCP)
factors <- list( wet_spell=rep(1, 12), dry_spell=rep(1, 12), prcp_mean=rep(1, 12), prcp_cv=rep(1, 12), temp_mean=rep(0, 12) )
START_WYEAR_SIM <- 2001 k1 <- 1 sim.wyr <- data.frame(WYEAR=seq(START_WYEAR_SIM, length.out=num_year_sim), PRCP=wgen_sim.warm$sum) clim.da <- mutate(clim.da, WYEAR=wyear(DATE), YEAR=year(DATE), MONTH=month(DATE), DAY=day(DATE)) %>% select(DATE, WYEAR, YEAR, MONTH, DAY, PRCP, TMIN, TMAX, TEMP) clim.mon <- mutate(clim.mon, WYEAR=wyear(DATE), YEAR=year(DATE), MONTH=month(DATE), DAY=day(DATE)) %>% select(DATE, WYEAR, YEAR, MONTH, DAY, PRCP, TMIN, TMAX, TEMP)
sim.da <- data.frame(DATE=seq(as.Date(paste(min(sim.wyr$WYEAR)-1, '-10-01', sep='')), as.Date(paste(max(sim.wyr$WYEAR), '-09-30', sep='')), by='day')) %>% mutate(MONTH=month(DATE), WYEAR=wyear(DATE), JDAY=yday(DATE), WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1) WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY), STATE=NA, TRANSITION=NA, PRCP=NA, TEMP=NA, TMIN=NA, TMAX=NA, DATE_SAMPLED=NA) sim.da[1, 'STATE'] <- 0 sim.da[1, 'PRCP'] <- 0 sim.da[1, 'TEMP'] <- 25 sim.da[1, 'TMAX'] <- 30 sim.da[1, 'TMIN'] <- 20
i_wyr <- 1 kk <- max(round(sqrt(length(clim.wyr$PRCP)),0),round(length(clim.wyr$PRCP)*0.5,0))
For the first simulation year the simulated annual precipitation is r format(sim.wyr[i_wyr, 'PRCP'], digits=4)
mm. Using this annual precipitation amount, find the r kk
nearest-neighbors based on the observed annual precipitation. Perform a weighted sampling (IDW) on these nearest neighbors by randomly choosing 100 years with replacement.
KNN_ANNUAL <- function(sim_annual_prcp,ANNUAL_PRCP,WATER_YEAR_A,k1,y) { # returns array of years (length of 100) by weighted sampling of kk nearest neighbors kk <- max(round(sqrt(length(ANNUAL_PRCP)),0),round(length(ANNUAL_PRCP)*0.5,0)) var_order <- 1:length(ANNUAL_PRCP) distance <- sqrt((sim_annual_prcp - ANNUAL_PRCP)^2) ordered_distances <- matrix(cbind(var_order,distance)[order(distance),],ncol=2) K_Distances <- matrix(ordered_distances[1:kk,],ncol=2) PROBS <- (1/row(K_Distances)[,1]) / sum((1/row(K_Distances)[,1])) set.seed(k1*y) selection <- sample(row(K_Distances)[,1],size=100,prob=PROBS,replace=TRUE) FINAL_YEARS <- WATER_YEAR_A[K_Distances[selection,1]] } CUR_YEARS <- KNN_ANNUAL(sim.wyr[i_wyr, 'PRCP'], clim.wyr$PRCP, clim.wyr$WYEAR, k1, i_wyr) CUR_YEARS
This figure shows the annual precipitation for each water year, with the r kk
nearest neighbors highlighted in red. The horizontal red line is the simulated annual precip for the current year (r format(sim.wyr[i_wyr, 'PRCP'], digits=4)
mm).
clim.wyr %>% ggplot(aes(WYEAR, PRCP, color=WYEAR %in% unique(CUR_YEARS))) + geom_point() + geom_hline(yintercept=sim.wyr[i_wyr, 'PRCP'], linetype=2, color='red') + geom_text(aes(label=WYEAR), hjust=-0.1, vjust=0.5, size=4) + scale_color_manual('', values=c('TRUE'='orangered', 'FALSE'='grey50'), guide=FALSE) + labs(x="Water Year", y="Annual Precip (mm/yr)")
From these r kk
nearest neighbors, the random sample of length 100 years is:
data.frame(INDEX=seq_along(CUR_YEARS), CUR_YEARS=CUR_YEARS) %>% ggplot(aes(INDEX, CUR_YEARS)) + geom_point() + geom_line() + labs(x="Sampled Index", y="Sampled Year")
Using this sequence of 100 years, create a continuous timeseries of the observed daily climate variables.
conditional_selection <- NULL for (yy in 1:length(CUR_YEARS)) { conditional_selection <- c(conditional_selection, which(clim.da$WYEAR==CUR_YEARS[yy])) } current <- data.frame(DATE=clim.da$DATE[conditional_selection]) %>% left_join(clim.da)
mutate(current, INDEX=row_number()) %>% ggplot(aes(INDEX, PRCP, color=factor(WYEAR))) + geom_point() + labs(x="Sampled Day Index", y="Daily Precip (mm)") + guides(color=guide_legend(title='Water Year', ncol=2))
Using this time series, define the dry/wet state thresholds. There are three states:
dry: 0
: precip < 0.3wet: 1
: 0.3 < precip < 80th ptile by month above dry thresholdextreme: 2
: precip > 80th ptile by month above dry threshold# get 80th percentile of daily precip > 0.3 for each month thresh1 <- 0.3*25.4 thresh2_quantile <- 0.8 # Jeff's method thresholds <- mutate(current, MONTH=ordered(MONTH, levels=month_list)) %>% filter(PRCP>thresh1) %>% group_by(MONTH) %>% summarise(THRESH_2=quantile(PRCP, thresh2_quantile), THRESH_MAX=max(PRCP)) %>% mutate(THRESH_1=thresh1) %>% arrange(MONTH) %>% select(MONTH, THRESH_1, THRESH_2, THRESH_MAX)
mutate(thresholds, THRESH_21=THRESH_2-THRESH_1, THRESH_MAX2=THRESH_MAX-THRESH_2) %>% select(-THRESH_2, -THRESH_MAX) %>% gather(VAR, VALUE, THRESH_1, THRESH_21, THRESH_MAX2) %>% ggplot(aes(MONTH, VALUE, fill=VAR)) + geom_bar(stat='identity', position='stack') + scale_fill_discrete('Thresholds', labels=c('THRESH_1'='Dry', 'THRESH_21'='Wet', 'THRESH_MAX2'='Extreme')) + labs(x='Month', y='Precipitaion Threshold (mm/day)')
Merge the precipitation timeseries with the state thresholds, and add julian/water days.
current <- current %>% left_join(select(thresholds, MONTH, THRESH_1, THRESH_2) %>% mutate(MONTH=as.numeric(as.character(MONTH)))) %>% mutate(JDAY=yday(DATE), WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1) WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY)) %>% select(DATE, MONTH, JDAY, WDAY, PRCP, TEMP, TMAX, TMIN, THRESH_1, THRESH_2)
Determine state for precip and lagged precip, and transition. Note that Scott's method applies the current month threshold to the next month (e.g. if the Date is 6/30, the June threshold is applied to the July 1 precip to determine next state). I'll use this same method for now to maintain consistency.
current <- mutate(current, STATE=ifelse(PRCP<=THRESH_1, 0, ifelse(PRCP<=THRESH_2, 1, 2)), STATE_NEXT=lead(STATE), # uses correct threshold # STATE_NEXT=ifelse(lead(PRCP)<=THRESH_1, 0, # ifelse(lead(PRCP)<=THRESH_2, 1, 2)), # uses scott's threshold TRANSITION=ifelse(is.na(STATE_NEXT), NA, paste(STATE, STATE_NEXT, sep=''))) head(current)
This figure shows the precipitation for the first year with the symbols colored by state. The dashed lines shows the two thresholds for each month.
# head(current, 365) %>% current %>% ggplot(aes(WDAY, PRCP, color=factor(STATE))) + geom_point() + geom_line(aes(y=THRESH_1), color='black', linetype=2) + geom_line(aes(y=THRESH_2), color='black', linetype=2) + scale_color_discrete('State', labels=c('0'='Dry', '1'='Wet', '2'='Extreme')) + labs(x='Julian Day (1=Oct 1)', y='Daily Precip (mm)')
current.mon <- select(current, MONTH, PRCP, TEMP) %>% gather(VAR, VALUE, PRCP, TEMP) %>% group_by(VAR, MONTH) %>% summarise(MEAN=mean(VALUE), SD=sd(VALUE)) %>% ungroup current.mon <- select(current, MONTH, PRCP, TEMP) %>% gather(VAR, VALUE, PRCP, TEMP) %>% plyr::dlply(c("VAR"), function(x) { plyr::dlply(x, c("MONTH"), function(x) { c(MEAN=mean(x$VALUE), SD=sd(x$VALUE)) }) }) current.mon[["PRCP"]][[1]]
This figure shows the frequency of each transition state by month.
filter(current, !is.na(TRANSITION)) %>% mutate(MONTH=ordered(MONTH, levels=month_list)) %>% ggplot(aes(MONTH, fill=factor(TRANSITION))) + geom_bar(position='fill') + labs(x="Month", y="Fraction") + scale_fill_discrete('Transition')
GET_PI <- function(m) { P <- rbind(m, rep(1, 3)) P[1,1] <- P[1,1] - 1 P[2,2] <- P[2,2] - 1 P[3,3] <- P[3,3] - 1 B <- c(0,0,0,1) PI <- solve(P[c(1,2,4),],B[c(1,2,4)]) return(PI) }
mc_p <- lapply(month_list, function(m) { x <- filter(current, MONTH==m) with(x, prop.table(table(STATE_NEXT, STATE), 2)) }) names(mc_p) <- as.character(month_list)
lapply(names(mc_p), function(m) { return(data.frame(mc_p[[m]]) %>% mutate(MONTH=m)) }) %>% do.call(rbind, .) %>% mutate(MONTH=ordered(MONTH, levels=month_list), FreqString=format(Freq, digits=1)) %>% ggplot(aes(STATE, STATE_NEXT, fill=Freq)) + geom_tile() + geom_text(aes(label=FreqString), size=3) + scale_fill_continuous(low='#ffeda0', high='#f03b20', space='Lab', limits=c(0,1)) + scale_x_discrete(expand=c(0,0)) + scale_y_discrete(expand=c(0,0)) + labs(x="From State", y="To State") + facet_wrap(~MONTH)
Now, we can adjust the transition matrix using the dry/wet spell changes
adjust_spell <- function(p, dry_change=1, wet_change=1) { pnew <- p # adjust dry spell p01 <- (p['1', '0'] + p['2', '0'])/dry_change - p['2', '0'] p00 <- p['0', '0'] + (p['1', '0'] - p01) pnew['1','0'] <- p01 pnew['0','0'] <- p00 #adjust wet spell p10 <- (p['0', '1'] + p['2', '1'])/wet_change - p['2', '1'] p11 <- p['1', '1'] + (p['0', '1'] - p10) pnew['0','1'] <- p10 pnew['1','1'] <- p11 pnew }
p_june <- mc_p[['6']] p_june_new <- adjust_spell(p_june, dry=2, wet=1) p1 <- data.frame(p_june) %>% mutate(FreqString=format(Freq, digits=1)) %>% ggplot(aes(STATE, STATE_NEXT, fill=Freq)) + geom_tile() + geom_text(aes(label=FreqString), size=3) + scale_fill_continuous(low='#ffeda0', high='#f03b20', space='Lab', limits=c(0,1)) + scale_x_discrete(expand=c(0,0)) + scale_y_discrete(expand=c(0,0)) + labs(x="From State", y="To State") + ggtitle('Original June Matrix') p2 <- data.frame(p_june_new) %>% mutate(FreqString=format(Freq, digits=1)) %>% ggplot(aes(STATE, STATE_NEXT, fill=Freq)) + geom_tile() + geom_text(aes(label=FreqString), size=3) + scale_fill_continuous(low='#ffeda0', high='#f03b20', space='Lab', limits=c(0,1)) + scale_x_discrete(expand=c(0,0)) + scale_y_discrete(expand=c(0,0)) + labs(x="From State", y="To State") + ggtitle('Adjusted June Matrix') grid.arrange(grobs=list(p1, p2), top="\nChange in Transition Matrix with dry=2", ncol=2)
p_june <- mc_p[['6']] p_june_new <- adjust_spell(p_june, dry=1, wet=1.5) p1 <- data.frame(p_june) %>% mutate(FreqString=format(Freq, digits=1)) %>% ggplot(aes(STATE, STATE_NEXT, fill=Freq)) + geom_tile() + geom_text(aes(label=FreqString), size=3) + scale_fill_continuous(low='#ffeda0', high='#f03b20', space='Lab', limits=c(0,1)) + scale_x_discrete(expand=c(0,0)) + scale_y_discrete(expand=c(0,0)) + labs(x="From State", y="To State") + ggtitle('Original June Matrix') p2 <- data.frame(p_june_new) %>% mutate(FreqString=format(Freq, digits=1)) %>% ggplot(aes(STATE, STATE_NEXT, fill=Freq)) + geom_tile() + geom_text(aes(label=FreqString), size=3) + scale_fill_continuous(low='#ffeda0', high='#f03b20', space='Lab', limits=c(0,1)) + scale_x_discrete(expand=c(0,0)) + scale_y_discrete(expand=c(0,0)) + labs(x="From State", y="To State") + ggtitle('Adjusted June Matrix') grid.arrange(grobs=list(p1, p2), top="\nChange in Transition Matrix with wet=1.5", ncol=2)
For each day of the current year, first generate a markov state
set.seed(k1) next_state <- function(prev_state, p) { # given previous state and transition probability matrix p # simulate next state pp1 <- p['0', as.character(prev_state)] pp2 <- p['1', as.character(prev_state)] + pp1 rn <- runif(1) if (rn < pp1) { next_state <- 0 } else if (rn >= pp1 & rn < pp2) { next_state <-1 } else { next_state <- 2 } next_state } next_state(prev_state=0, p=mc_p[['10']])
for (i in 2:(nrow(filter(sim.da, WYEAR==min(WYEAR)))+1)) { sim.da$STATE[i] <- next_state(sim.da$STATE[i-1], mc_p[[as.character(sim.da$MONTH[i-1])]]) sim.da$TRANSITION[i-1] <- paste0(sim.da$STATE[i-1], sim.da$STATE[i]) }
prop.table(table(MONTH=sim.da$MONTH, STATE=sim.da$STATE), 1) %>% data.frame %>% ggplot(aes(MONTH, Freq, fill=STATE)) + geom_bar(stat='identity')
prop.table(table(MONTH=current$MONTH, STATE=current$STATE), 1) %>% data.frame %>% ggplot(aes(MONTH, Freq, fill=STATE)) + geom_bar(stat='identity')
filter(sim.da, !is.na(TRANSITION)) %>% ggplot(aes(DATE, STATE)) + geom_point()
filter(sim.da, !is.na(STATE)) %>% mutate(STATE_NEXT=lead(STATE)) %>% select(STATE, STATE_NEXT) %>% table %>% prop.table(2)
wday_range <- function(j, n=7) { if (n %% 2 == 1) { l <- (n-1)/2 u <- (n-1)/2 } else { l <- n/2 u <- n/2 - 1 } rng <- seq(j-l, j+u) rng <- ifelse(rng<=0, 365+rng, rng) rng <- ifelse(rng>365, rng-365, rng) rng } i <- 2 j <- 1 current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1] & WDAY %in% wday_range(j))) %>% filter(!is.na(SELECTED)) mutate(current.sel, INDEX=row_number()) %>% arrange(SELECTED) %>% # filter(INDEX<=365) %>% ggplot(aes(INDEX, PRCP, color=SELECTED)) + geom_point()
stopifnot(sum(current.sel$SELECTED)>0)
# possible_days <- cur_day[cur_day_cur_state] PRCP_TODAY <- (filter(current.sel, SELECTED))$PRCP TEMP_TODAY <- (filter(current.sel, SELECTED))$TEMP PRCP_TOMORROW <- (mutate(current.sel, PRCP=lead(PRCP)) %>% filter(SELECTED))$PRCP TEMP_TOMORROW <- (mutate(current.sel, TEMP=lead(TEMP)) %>% filter(SELECTED))$TEMP TMAX_TOMORROW <- (mutate(current.sel, TMAX=lead(TMAX)) %>% filter(SELECTED))$TMAX TMIN_TOMORROW <- (mutate(current.sel, TMIN=lead(TMIN)) %>% filter(SELECTED))$TMIN DATE_TOMORROW <- (mutate(current.sel, DATE=lead(DATE)) %>% filter(SELECTED))$DATE cur_sim_PRCP <- sim.da$PRCP[i-1] cur_sim_TEMP <- sim.da$TEMP[i-1]
KNN <- function(cur_sim_PRCP,cur_sim_TEMP,PRCP_TODAY,TEMP_TODAY,PRCP_TOMORROW,TEMP_TOMORROW,TMAX_TOMORROW,TMIN_TOMORROW,DATE_TOMORROW,k,sd_monthly_PRCP,sd_monthly_TEMP,mean_monthly_PRCP,mean_monthly_TEMP,k1,count) { w_PRCP <- 100/sd_monthly_PRCP w_TEMP <- 10/sd_monthly_TEMP var_order <- 1:length(PRCP_TODAY) distance <- sqrt(w_PRCP*((cur_sim_PRCP-mean_monthly_PRCP) - (PRCP_TODAY-mean_monthly_PRCP))^2 + w_TEMP*((cur_sim_TEMP-mean_monthly_TEMP) - (TEMP_TODAY-mean_monthly_TEMP))^2) ordered_distances <- matrix(cbind(var_order,distance)[order(distance),],ncol=2) K_Distances <- matrix(ordered_distances[1:k,],ncol=2) PROBS <- (1/row(K_Distances)[,1]) / sum((1/row(K_Distances)[,1])) set.seed(k1*count) selection <- sample(row(K_Distances)[,1],size=1,prob=PROBS,replace=TRUE) FINAL_PRCP <- PRCP_TOMORROW[K_Distances[selection,1]] FINAL_TEMP <- TEMP_TOMORROW[K_Distances[selection,1]] FINAL_TMAX <- TMAX_TOMORROW[K_Distances[selection,1]] FINAL_TMIN <- TMIN_TOMORROW[K_Distances[selection,1]] FINAL_DATE <- DATE_TOMORROW[K_Distances[selection,1]] return(c(FINAL_PRCP,FINAL_TEMP,FINAL_TMAX,FINAL_TMIN,FINAL_DATE)) }
k <- round(sqrt(length(PRCP_TODAY))) RESULT <- KNN(cur_sim_PRCP,cur_sim_TEMP, PRCP_TODAY,TEMP_TODAY, PRCP_TOMORROW,TEMP_TOMORROW,TMAX_TOMORROW,TMIN_TOMORROW,DATE_TOMORROW, k, current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['SD']], current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['SD']], current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['MEAN']], current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['MEAN']], k1,i) sim.da$PRCP[i] <- RESULT[1] sim.da$TEMP[i] <- RESULT[2] sim.da$TMAX[i] <- RESULT[3] sim.da$TMIN[i] <- RESULT[4] sim.da$DATE_SAMPLED[i] <- RESULT[5]
Repeat this for all days in the year
for (i in 2:(nrow(filter(sim.da, WYEAR==min(WYEAR)))+1)) { j <- sim.da$WDAY[i] current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1] & WDAY %in% wday_range(j))) %>% filter(!is.na(SELECTED)) if (sum(current.sel$SELECTED)==0) { current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1] & WDAY %in% wday_range(j, n=61))) %>% filter(!is.na(SELECTED)) } PRCP_TODAY <- (filter(current.sel, SELECTED))$PRCP TEMP_TODAY <- (filter(current.sel, SELECTED))$TEMP PRCP_TOMORROW <- (mutate(current.sel, PRCP=lead(PRCP)) %>% filter(SELECTED))$PRCP TEMP_TOMORROW <- (mutate(current.sel, TEMP=lead(TEMP)) %>% filter(SELECTED))$TEMP TMAX_TOMORROW <- (mutate(current.sel, TMAX=lead(TMAX)) %>% filter(SELECTED))$TMAX TMIN_TOMORROW <- (mutate(current.sel, TMIN=lead(TMIN)) %>% filter(SELECTED))$TMIN DATE_TOMORROW <- (mutate(current.sel, DATE=lead(DATE)) %>% filter(SELECTED))$DATE cur_sim_PRCP <- sim.da$PRCP[i-1] cur_sim_TEMP <- sim.da$TEMP[i-1] k <- round(sqrt(length(PRCP_TODAY))) RESULT <- KNN(cur_sim_PRCP,cur_sim_TEMP, PRCP_TODAY,TEMP_TODAY, PRCP_TOMORROW,TEMP_TOMORROW,TMAX_TOMORROW,TMIN_TOMORROW,DATE_TOMORROW, k, current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['SD']], current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['SD']], current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['MEAN']], current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['MEAN']], k1,i) sim.da$PRCP[i] <- RESULT[1] sim.da$TEMP[i] <- RESULT[2] sim.da$TMAX[i] <- RESULT[3] sim.da$TMIN[i] <- RESULT[4] sim.da$DATE_SAMPLED[i] <- RESULT[5] } sim.da$DATE_SAMPLED <- as.Date(sim.da$DATE_SAMPLED)
sim.da <- data.frame(DATE=seq(as.Date(paste(min(sim.wyr$WYEAR)-1, '-10-01', sep='')), as.Date(paste(max(sim.wyr$WYEAR), '-09-30', sep='')), by='day')) %>% mutate(MONTH=month(DATE), WYEAR=wyear(DATE), JDAY=yday(DATE), WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1) WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY), STATE=NA, TRANSITION=NA, PRCP=NA, TEMP=NA, TMIN=NA, TMAX=NA, DATE_SAMPLED=NA) sim.da[1, 'STATE'] <- 0 sim.da[1, 'PRCP'] <- 0 sim.da[1, 'TEMP'] <- 25 sim.da[1, 'TMAX'] <- 30 sim.da[1, 'TMIN'] <- 20 i <- 1 for (i_wyr in seq_along(sim.wyr$WYEAR)) { # KNN of Annual Precip CUR_YEARS <- KNN_ANNUAL(sim.wyr[i_wyr, 'PRCP'], clim.wyr$PRCP, clim.wyr$WYEAR, k1, i_wyr) # Create current daily population conditional_selection <- NULL for (yy in 1:length(CUR_YEARS)) { conditional_selection <- c(conditional_selection, which(clim.da$WYEAR==CUR_YEARS[yy])) } current <- data.frame(DATE=clim.da$DATE[conditional_selection]) %>% left_join(clim.da) # Get thresholds thresh1 <- 0.3 thresh2_quantile <- 0.8 thresholds <- mutate(current, MONTH=ordered(MONTH, levels=month_list)) %>% filter(PRCP>thresh1) %>% group_by(MONTH) %>% summarise(THRESH_2=quantile(PRCP, thresh2_quantile), THRESH_MAX=max(PRCP)) %>% mutate(THRESH_1=thresh1) %>% arrange(MONTH) %>% select(MONTH, THRESH_1, THRESH_2, THRESH_MAX) # Merge current with thresholds current <- current %>% left_join(select(thresholds, MONTH, THRESH_1, THRESH_2) %>% mutate(MONTH=as.numeric(as.character(MONTH)))) %>% mutate(JDAY=yday(DATE), WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1) WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY)) %>% select(DATE, MONTH, JDAY, WDAY, PRCP, TEMP, TMAX, TMIN, THRESH_1, THRESH_2) %>% mutate(STATE=ifelse(PRCP<=THRESH_1, 0, ifelse(PRCP<=THRESH_2, 1, 2)), STATE_NEXT=lead(STATE), TRANSITION=ifelse(is.na(STATE_NEXT), NA, paste(STATE, STATE_NEXT, sep=''))) current.mon <- select(current, MONTH, PRCP, TEMP) %>% gather(VAR, VALUE, PRCP, TEMP) %>% plyr::dlply(c("VAR"), function(x) { plyr::dlply(x, c("MONTH"), function(x) { c(MEAN=mean(x$VALUE), SD=sd(x$VALUE)) }) }) for (j in 1:(nrow(filter(sim.da, WYEAR==sim.wyr$WYEAR[i_wyr])))) { i <- i+1 if (i > nrow(sim.da)) { break } # print(paste(i_wyr, i, sim.da$WYEAR[i], sim.da$WDAY[i])) # Simulate state transition sim.da$STATE[i] <- next_state(sim.da$STATE[i-1], mc_p[[as.character(sim.da$MONTH[i-1])]]) sim.da$TRANSITION[i-1] <- paste0(sim.da$STATE[i-1], sim.da$STATE[i]) # print(paste(sim.da$DATE[i], j, sim.da$WDAY[i])) current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1] & WDAY %in% wday_range(sim.da$WDAY[i]))) %>% filter(!is.na(SELECTED)) if (sum(current.sel$SELECTED)==0) { current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1] & WDAY %in% wday_range(sim.da$WDAY[i], n=61))) %>% filter(!is.na(SELECTED)) } if (sum(current.sel$SELECTED)==0) { current.sel <- mutate(current, SELECTED=(TRANSITION==sim.da$TRANSITION[i-1])) %>% filter(!is.na(SELECTED)) } PRCP_TODAY <- (filter(current.sel, SELECTED))$PRCP TEMP_TODAY <- (filter(current.sel, SELECTED))$TEMP PRCP_TOMORROW <- (mutate(current.sel, PRCP=lead(PRCP)) %>% filter(SELECTED))$PRCP TEMP_TOMORROW <- (mutate(current.sel, TEMP=lead(TEMP)) %>% filter(SELECTED))$TEMP TMAX_TOMORROW <- (mutate(current.sel, TMAX=lead(TMAX)) %>% filter(SELECTED))$TMAX TMIN_TOMORROW <- (mutate(current.sel, TMIN=lead(TMIN)) %>% filter(SELECTED))$TMIN DATE_TOMORROW <- (mutate(current.sel, DATE=lead(DATE)) %>% filter(SELECTED))$DATE cur_sim_PRCP <- sim.da$PRCP[i-1] cur_sim_TEMP <- sim.da$TEMP[i-1] k <- round(sqrt(length(PRCP_TODAY))) RESULT <- KNN(cur_sim_PRCP,cur_sim_TEMP, PRCP_TODAY,TEMP_TODAY, PRCP_TOMORROW,TEMP_TOMORROW,TMAX_TOMORROW,TMIN_TOMORROW,DATE_TOMORROW, k, current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['SD']], current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['SD']], current.mon[["PRCP"]][[sim.da[i, 'MONTH']]][['MEAN']], current.mon[["TEMP"]][[sim.da[i, 'MONTH']]][['MEAN']], k1,i) sim.da$PRCP[i] <- RESULT[1] sim.da$TEMP[i] <- RESULT[2] sim.da$TMAX[i] <- RESULT[3] sim.da$TMIN[i] <- RESULT[4] sim.da$DATE_SAMPLED[i] <- RESULT[5] } } sim.da$DATE_SAMPLED <- as.Date(sim.da$DATE_SAMPLED)
p1 <- ggplot(sim.da, aes(WDAY, TEMP)) + geom_line(aes(group=WYEAR), alpha=0.2) + stat_summary(fun.y='mean', geom="line", color='cyan') + ggtitle('Simulated') p2 <- mutate(clim.da, JDAY=yday(DATE), WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1) WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY)) %>% ggplot(aes(WDAY, TEMP)) + geom_line(aes(group=WYEAR), alpha=0.2) + stat_summary(fun.y='mean', geom="line", color='cyan') + ggtitle('Historical') grid.arrange(p1, p2, ncol=1)
p1 <- ggplot(sim.da, aes(WDAY, PRCP)) + geom_line(aes(group=WYEAR), alpha=0.2) + stat_summary(fun.y='mean', geom="line", color='cyan') + ggtitle('Simulated') p2 <- mutate(clim.da, JDAY=yday(DATE), WDAY=JDAY-(273+leap_year(DATE)), # water year day (1=Oct 1) WDAY=ifelse(WDAY<=0, 365+WDAY+leap_year(DATE), WDAY)) %>% ggplot(aes(WDAY, PRCP)) + geom_line(aes(group=WYEAR), alpha=0.2) + stat_summary(fun.y='mean', geom="line", color='cyan') + ggtitle('Historical') grid.arrange(p1, p2, ncol=1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.