#' @export
CIReg.Copy.Hindcast.Results <- function(EnvList, tyear){
prjdir = EnvList$prjdir
eyear_obs = as.numeric(EnvList$eyear_obs)
if(tyear > eyear_obs){
srcdir = paste(prjdir, "/CIReg/", eyear_obs, "/01_Regression-all", sep="")
dstdir = paste(prjdir, "/CIReg/", tyear, sep="")
SetWorkingDir(dstdir)
file.copy(srcdir, dstdir, recursive = T)
}
}
#' @export
CIReg.Create.Summary.Table <- function(EnvList){
prjdir = EnvList$prjdir
vardir = EnvList$vardir
varfile = EnvList$varfile
syear_mme = as.numeric(EnvList$syear_mme)
AcuMonths = as.numeric(EnvList$AcuMonths)
precopt = as.logical(EnvList$precopt)
CRAdj = as.double(EnvList$CRAdj)
options(warn=-1)
options(stringsAsFactors = FALSE)
VarDFile = paste(vardir, "/", varfile, sep="")
var = read.csv(VarDFile, header=T)
varnms = colnames(var)[2:ncol(var)]
varcnt = length(varnms)
ltcnt = 6
# remove all existing summary files
outdir = paste(prjdir, "/0_Analysis/CIReg", sep="")
SetWorkingDir(outdir)
flist = list.files(outdir, pattern = glob2rx("*.csv"), full.names = T)
file.remove(flist)
monoutdir = paste(prjdir, "/0_Analysis/CIReg/Monthly-TSeries", sep="")
SetWorkingDir(monoutdir)
flist = list.files(monoutdir, pattern = glob2rx("*.csv"), full.names = T)
file.remove(flist)
if(AcuMonths > ltcnt){
AcuMonths = ltcnt
cat(sprintf(" CIR: Accumulation month is greater than max. lead time!"))
}
for(i in 1:varcnt){
#for(k in 1:ltcnt){
varnm = varnms[i]
#lagtime = k
### Combine forecasted output into table and convert unit
smry = CIReg.Combine.Forecast.Output(EnvList, varnm, lagtime=1, ltcnt)
if(!is.na(smry)){
if(varnm == "prec" | precopt == T){ smry = prcp.mmday2mmmon(smry) }
for(ii in 1:AcuMonths){
acumon = ii
colcnt = ncol(smry); rowcnt = nrow(smry)
acusmry = as.data.frame(array(NA, dim=c((rowcnt - acumon + 1), colcnt))) #Empty array
colnames(acusmry) = colnames(smry)
acusmry$yearmon = smry$yearmon[1:(rowcnt - acumon + 1)]
for(jj in 1:(rowcnt - acumon + 1)){
acusmry[jj, 2:colcnt] = colMeans(smry[jj:(jj+acumon-1), 2:colcnt])
}
# Define output dir and file
fname = paste("CIReg-", varnm, sprintf("-AM%02d",acumon), "-Summary.csv", sep="")
OutDFile = paste(outdir, "/", fname, sep="")
write.csv(acusmry, OutDFile, row.names=F)
for(j in 1:12){
#ctdata = smry[which(as.numeric(substr(smry$yearmon, 6,7)) == j),c("yearmon", "MME", "OBS")]
ctdata = acusmry[which(as.numeric(substr(acusmry$yearmon, 6,7)) == j), ]
colcnt = ncol(ctdata) - 3
outdata = as.data.frame(ctdata$yearmon); colnames(outdata) = "yearmon"
for(jj in 2:colcnt){
colnm = names(ctdata)[jj]
monnm = sprintf("M%02d", j)
ltnm = paste("MinLag", substr(colnm, (nchar(colnm)-1), nchar(colnm)), sep="")
errdata = ctdata[,c(colnm,"OBS")]
idxthold = nrow(errdata[!is.na(errdata$OBS),]) * 0.5
errdata = na.omit(errdata)
colnames(errdata) = c("SIM", "OBS")
CR = critical.r(nrow(errdata))
cor = format(cor(errdata$SIM, errdata$OBS, method="pearson"), digits=2)
if(cor >= CR * CRAdj){
bmdir = paste(prjdir, "/CIReg", sep="")
dlist = list.dirs(bmdir, recursive=F, full.names=T)
dcnt = length(dlist)
for(k in 1:dcnt){
dirnm = dlist[k]
BmdlDFile = paste(dirnm, "/02_Best_multi-model/02_BestModel/", varnm, "-", ltnm, "-Observed.csv", sep="")
bmdata = read.csv(BmdlDFile, header=T)
bmdata$sel = paste(bmdata$x1, bmdata$l1, sep="")
idxsel = bmdata[j, c("sel")]
if(k == 1){
idxout = idxsel
} else {
idxtmp = idxsel
idxout = rbind(idxout, idxtmp)
}
} # Year Loop
idxtbl = as.data.frame(table(idxout))
idxtbl = idxtbl[order(idxtbl$Freq, decreasing=T), ]
mainidxcnt = idxtbl[1,c("Freq")]
idxcnt = nrow(idxtbl)
if(mainidxcnt >= idxthold){
outtmp = ctdata[, c("yearmon", colnm)]
outdata = merge(outdata, outtmp, by="yearmon", all=T)
}
} # TCC is greater Critical Value
} # Column Loop
if(ncol(outdata) > 1){
rcnt = nrow(outdata)
outdata$MME = NA
for(kk in 1:rcnt){
if(!all(is.na(outdata[kk, 2:(ncol(outdata)-1)]))){
outdata[kk, c("MME")] = mean(as.numeric(outdata[kk, 2:(ncol(outdata)-1)]), na.rm=T)
}
}
obsdata = ctdata[c("yearmon", "OBS")]
outdata = merge(outdata, obsdata, by="yearmon", all=T)
climdata = ctdata[c("yearmon", "CLIM")]
outdata = merge(outdata, climdata, by="yearmon", all=T)
outfile = paste("CIReg-", varnm, sprintf("-M%02d", j), sprintf("-AM%02d",acumon), ".csv", sep="")
MonDFile = paste(monoutdir, "/", outfile, sep="")
write.csv(ctdata, MonDFile, row.names=F)
} # End if
} # Month Loop
} # Accumulation Loop
}
} # Variable Loop
}
#' @export
CIReg.Monthly.Cross.Split.Validation <- function(VMode, currow, BestModels, regtbl, varnm, blagnms, syear_mme, eyear_obs, eyear_sim, tyear, precopt){
ptrcnt = ncol(BestModels)/3
bptrcnt = length(blagnms)
lagnms = as.vector(t(BestModels[currow,(1:ptrcnt)]))
lagmons = as.vector(t(BestModels[currow,(1+ptrcnt):(ptrcnt*2)]))
bestptrs = as.vector(t(BestModels[currow,(1+ptrcnt*2):(ptrcnt*3)]))
ptrcoeff = bestptrs
# Check the minimum value of the variable
minval = min(regtbl[, c(varnm)], na.rm=T)
if(VMode == "Cross"){
regtbl = regtbl[which(as.numeric(substr(regtbl$yearmon,1,4)) >= syear_mme & as.numeric(substr(regtbl$yearmon,1,4)) <= eyear_obs), c("yearmon", varnm, blagnms)]
} else {
regtbl = regtbl[which(as.numeric(substr(regtbl$yearmon,1,4)) >= syear_mme & as.numeric(substr(regtbl$yearmon,1,4)) <= eyear_sim), c("yearmon", varnm, blagnms)]
}
colnames(regtbl) = c("yearmon", "var", blagnms)
curmon = as.numeric(unique(substr(regtbl$yearmon,6,7)))
#### Cross / Split Validation
cnt = 1
if(VMode == "Cross"){
startyear = syear_mme
endyear = eyear_obs
} else {
startyear = (eyear_obs + 1)
endyear = eyear_sim
}
for(m in startyear:endyear){
curxs = regtbl[which(as.numeric(substr(regtbl$yearmon,1,4)) == m), c(blagnms)]
cvaldata = regtbl[which(!as.numeric(substr(regtbl$yearmon,1,4)) == m), c("yearmon", "var", blagnms)]
#Exclude target year
cvaldata = cvaldata[which(!(as.numeric(substr(cvaldata$yearmon,1,4)) == tyear)), ]
cvaldata = cvaldata[,c("var", blagnms)]
fit = lm(var ~ ., data=cvaldata)
summary = summary(fit)
coeff = coefficients(fit)
R2 = summary$r.squared
sigma = summary$sigma
pVal = anova(fit)$'Pr(>F)'[1]
if(any(is.na(curxs)) | length(curxs) == 0){
cury = NA
} else {
cury = coeff[1] + as.vector(coeff[2:(bptrcnt+1)]) %*% as.vector(t(curxs))
}
# save estimated regression model parameters
for(kk in 1:ptrcnt){
bcoln = which(blagnms[] == lagnms[kk])
if(length(bcoln) != 0){ ptrcoeff[kk] = coeff[bcoln+1] }
}
# change negative values to zero
if((varnm == "prec" | precopt == T) & (!is.na(cury)) & (cury <= 0)) { cury = 0}
if(cnt == 1){
valout = cbind(varnm, m, curmon, t(lagnms), t(lagmons), coeff[1], t(ptrcoeff), R2, sigma, pVal, cury)
colnames(valout) = c("name", "RmYear", "month", rep(paste("idx",1:ptrcnt, sep="")), rep(paste("lag",1:ptrcnt, sep="")), "intercept", rep(paste("x",1:ptrcnt, sep="")), "R2", "Sigma", "p-value", "estimation")
cnt = cnt + 1
} else {
valtmp = cbind(varnm, m, curmon, t(lagnms), t(lagmons), coeff[1], t(ptrcoeff), R2, sigma, pVal, cury)
colnames(valtmp) = c("name", "RmYear", "month", rep(paste("idx",1:ptrcnt, sep="")), rep(paste("lag",1:ptrcnt, sep="")), "intercept", rep(paste("x",1:ptrcnt, sep="")), "R2", "Sigma", "p-value", "estimation")
valout = rbind(valout, valtmp)
cnt = cnt + 1
}
}
return(valout)
}
#' @export
CIReg.Extract.Best.Model.Parameters <- function(BestModels, curmon){
BestModels[BestModels == "NA"] = NA
ptrcnt = ncol(BestModels)/3
lagnms = as.vector(t(BestModels[curmon,(1:ptrcnt)]))
lagmons = as.vector(t(BestModels[curmon,(1+ptrcnt):(ptrcnt*2)]))
bestptrs = as.vector(t(BestModels[curmon,(1+ptrcnt*2):(ptrcnt*3)]))
# remove predictors which is not selected
ptrtbl = rbind(lagnms, lagmons, bestptrs)
ptrtbl = ptrtbl[,colSums(is.na(ptrtbl)) == 0]
# if one perdictor is selected
if(is.null(ncol(ptrtbl))){
bptrcnt = 1
blagnms = ptrtbl[1]
blagmons = ptrtbl[2]
}else{
bptrcnt = ncol(ptrtbl)
blagnms = ptrtbl[1,]
blagmons = ptrtbl[2,]
}
outList = list("blagnms"=blagnms, "blagmons"=blagmons, "bestptrs"=bestptrs)
return(outList)
}
#' @export
CIReg.Select.BestModel.and.Run.Best.Regression <- function(EnvList, tyear){
prjdir = EnvList$prjdir
vardir = EnvList$vardir
varfile = EnvList$varfile
idxdir = EnvList$idxdir
idxfile = EnvList$idxfile
NBest = as.numeric(EnvList$NBest)
syear_mme = as.numeric(EnvList$syear_mme)
eyear_mme = as.numeric(EnvList$eyear_mme)
eyear_obs = as.numeric(EnvList$eyear_obs)
eyear_sim = as.numeric(EnvList$eyear_sim)
smonth = as.numeric(EnvList$smonth)
emonth = as.numeric(EnvList$emonth)
precopt = as.logical(EnvList$precopt)
MaxLTime =6
eyear_mme
regdir = paste(prjdir, "/CIReg/", tyear, "/01_Regression-all", sep="")
bmmdir = paste(prjdir, "/CIReg/", tyear, "/02_Best_multi-model", sep="")
SetWorkingDir(bmmdir)
tbldir = paste(bmmdir, "/01_PtrTable", sep="")
SetWorkingDir(tbldir)
bestdir = paste(bmmdir, "/02_BestModel", sep="")
SetWorkingDir(bestdir)
# read a sample file and get the number of predictors
ptrcnt = NBest
# Get variable(predictand) names
VarDFile = paste(vardir, "/", varfile, sep="")
var = read.csv(VarDFile, header=T)
varnms = colnames(var)[2:ncol(var)]
varcnt = length(varnms)
# Get index names (predictors)
IdxDFile = paste(idxdir, "/", idxfile, sep="")
idx = read.csv(IdxDFile, header=T)
idxnms = colnames(idx)
idxnms = idxnms[2:length(idxnms)]
#outfilecheck = paste(outdir, "/", varnms[varcnt], "-MinLag", sprintf("%02d", MaxLTime), "-Observed.csv", sep="")
#if(!file.exists(outfilecheck)){
########## Combine Predictor Tables ############################################
for(i in 1:varcnt){
varnm = varnms[i]
#for(j in 0:MaxLTime){
for(j in 1:MaxLTime){
MinLagTime = j
#### Write Predictor Table
infile = paste(regdir, "/", varnm, "-MinLag", sprintf("%02d", MinLagTime), "-Observed.csv", sep="")
outfile = paste(bmmdir, "/01_PtrTable/", varnm, "-MinLag", sprintf("%02d", MinLagTime), "-Observed.csv", sep="")
data = read.csv(infile, header=T)
PtrTable = array(NA, dim=c(12, ptrcnt*2)) #Empty array
colnames(PtrTable) = c(rep(paste("x",1:ptrcnt, sep="")), rep(paste("l",1:ptrcnt, sep="")))
for(ii in 1:12){
tem = data[which(data$month == ii), c("index", "lag")]
for(jj in 1:ptrcnt){
PtrTable[ii,jj] = as.character(tem$index[jj])
PtrTable[ii,jj+ptrcnt] = tem$lag[jj]
}
}
write.csv(PtrTable, outfile, row.names=F)
}
}
cat(sprintf(" CIR: Predictors have in combined into table\n"))
######## Select Best Model######################################################
varcnt = length(varnms)
for(i in 1:varcnt){
varnm = varnms[i]
for(j in 1:MaxLTime){
#for(j in 0:MaxLTime){
MinLagTime = j
#### Write Predictor Table
ptrfile = paste(bmmdir, "/01_PtrTable/", varnm, "-MinLag", sprintf("%02d", MinLagTime), "-Observed.csv", sep="")
bestfile = paste(bmmdir, "/02_BestModel/", varnm, "-MinLag", sprintf("%02d", MinLagTime), "-Observed.csv", sep="")
########
BestModels = array(NA, dim=c(12, ptrcnt*3)) #Empty array
colnames(BestModels) = c(rep(paste("x",1:ptrcnt, sep="")), rep(paste("l",1:ptrcnt, sep="")), rep(paste("b",1:ptrcnt, sep="")))
PtrTable = read.csv(ptrfile, header=T)
for(ii in smonth:emonth){
curmon = ii
lagnms = as.vector(t(PtrTable[ii,(1:ptrcnt)]))
lagmons = as.vector(t(PtrTable[ii,(1+ptrcnt):(ptrcnt*2)]))
lagnms2 = lagnms[!is.na(lagnms)]
lagmons2 = lagmons[!is.na(lagmons)]
if(length(lagnms2) == 0){
BestModels[ii,1:ptrcnt] = NA
BestModels[ii,(ptrcnt+1):(ptrcnt*2)] = NA
BestModels[ii,(ptrcnt*2+1):(ptrcnt*3)] = ""
} else {
#### Combine predictors based on lags
data = CIReg.Combine.Multiple.Observed.CIndex(VarDFile, IdxDFile, curmon, varnms, lagnms2, lagmons2)
# Exclude target year
data = data[which(!(as.numeric(substr(data$yearmon,1,4)) == tyear)), ]
#data = data[which(as.numeric(substr(data$yearmon,1,4)) >= syear_mme & as.numeric(substr(data$yearmon,1,4)) <= eyear_mme), c(varnm, lagnms2)]
data = data[which(as.numeric(substr(data$yearmon,1,4)) >= syear_mme & as.numeric(substr(data$yearmon,1,4)) <= eyear_obs), c(varnm, lagnms2)]
colnames(data) = c("var", lagnms2)
# leaps : http://www.inside-r.org/packages/cran/leaps/docs/regsubsets
# http://rstudio-pubs-static.s3.amazonaws.com/2897_9220b21cfc0c43a396ff9abf122bb351.html
if(length(lagnms2) > 1){
leaps = regsubsets(var~., data, nbest=1)
#plot(leaps, scale="adjr2")
sumry = summary(leaps)
brow = which.max(sumry$adjr2)
if(length(brow) == 0){
bmodel = array("", dim=c(NBest))
} else {
bmodel = as.vector(t(as.data.frame(sumry$outmat)[brow,]))
imsi = array("", dim=c(NBest))
imsi[match(lagnms2, lagnms)] = bmodel
bmodel = imsi
}
BestModels[ii,1:ptrcnt] = lagnms
BestModels[ii,(ptrcnt+1):(ptrcnt*2)] = lagmons[1:ptrcnt]
BestModels[ii,(ptrcnt*2+1):(ptrcnt*3)] = bmodel
} else {
BestModels[ii,1:ptrcnt] = lagnms
BestModels[ii,(ptrcnt+1):(ptrcnt*2)] = lagmons[1:ptrcnt]
BestModels[ii,(ptrcnt*2+1)] = "*"
}
} # If lagnms2 = NA
} # Month Loop
write.csv(BestModels, bestfile, row.names=F)
}
}
cat(sprintf(" CIR: Best Multivariate Regression Model has been selected.\n"))
######## Run Regression Models #################################################
varcnt = length(varnms)
for(i in 1:varcnt){
varnm = varnms[i]
for(j in 1:MaxLTime){
#for(j in 0:MaxLTime){
MinLagTime = j
#### Write Predictor Table
bestfile = paste(bmmdir, "/02_BestModel/", varnm, "-MinLag", sprintf("%02d", MinLagTime), "-Observed.csv", sep="")
regfile = paste(bmmdir, "/", varnm, "-MinLag", sprintf("%02d", MinLagTime), "-Observed.csv", sep="")
BestModels = read.csv(bestfile, header=T, na.strings=c(" "))
cnt=0
for(ii in smonth:emonth){
curmon = ii
# extract best model parameters from Best Model Table
out = CIReg.Extract.Best.Model.Parameters(BestModels, curmon)
blagnms = out$blagnms; blagmons = out$blagmons; bestptrs = out$bestptrs
blagnms[blagnms == "NA"] = NA
blagmons[blagmons == "NA"] = NA
blagnms2 = blagnms[!is.na(blagnms)]
blagmons2 = blagmons[!is.na(blagmons)]
if(length(blagnms2) > 0) {
cnt = cnt + 1
#### Combine predictors and predictands based on lags
regtbl = CIReg.Combine.Multiple.Observed.CIndex(VarDFile, IdxDFile, curmon, varnms, blagnms2, blagmons2)
#### Do cross validation
cvalout = CIReg.Monthly.Cross.Split.Validation(VMode="Cross", curmon, BestModels, regtbl, varnm, blagnms2, syear_mme, eyear_obs, eyear_sim, tyear, precopt=F)
### Do split validation
if(eyear_sim > eyear_mme){
svalout = CIReg.Monthly.Cross.Split.Validation(VMode="Split", curmon, BestModels, regtbl, varnm, blagnms2, syear_mme, eyear_obs, eyear_sim, tyear, precopt=F)
}
if(cnt == 1){
if(eyear_sim > eyear_mme){
sumout = rbind(cvalout, svalout)
} else {
sumout = cvalout
}
} else {
if(eyear_sim > eyear_mme){
imsi = rbind(cvalout, svalout)
sumout = rbind(sumout, imsi)
} else {
imsi = cvalout
sumout = rbind(sumout, imsi)
}
}
} # if(length(blagnms2) > 0)
} # Loop for month
cat(sprintf(" CIR: Cross and Split Validation has been finished: Var=%s Lag=%s\n", varnm, MinLagTime))
write.csv(sumout, regfile, row.names=F)
}
}
#}# IF file exists
}
#' @export
CIReg.Combine.Multiple.Observed.CIndex <- function(VarDFile, IdxDFile, curmon, varnms, lagnms, lagmons) {
var = read.csv(VarDFile, header=T)
var = var[c("yearmon", varnms)]
idx = read.csv(IdxDFile, header=T)
idx = idx[c("yearmon", lagnms)]
idxcnt = length(lagnms)
for(i in 1:idxcnt){
lagval = as.numeric(lagmons[i])
colnum = i + 1
if(lagval < 0){
srowt = 1- lagval
erowt = nrow(idx)
srows = 1
erows = nrow(idx) + lagval
idx[1:srowt-1, colnum] = NA
idx[srowt:erowt, colnum] = idx[srows:erows, colnum]
} else {
srowt = 1
erowt = nrow(idx) - lagval
srows = 1 + lagval
erows = nrow(idx)
idx[erowt+1:erows, colnum] = NA
idx[srowt:erowt, colnum] = idx[srows:erows, colnum]
}
}
out = merge(var, idx, by="yearmon", all=T)
if(curmon <= 12){
out = out[which(as.numeric(substr(out$yearmon,6,7)) == curmon),]
}
return(out)
}
#' @export
CIReg.Calculate.All.Regression.Split.Validation <- function(EnvList, tyear, precopt) {
#prjdir, vardir, varfile, idxdir, idxfile, NBest, mmetype, syear_mme, eyear_mme, eyear_obs, tyear, smonth, emonth, precopt
prjdir = EnvList$prjdir
vardir = EnvList$vardir
varfile = EnvList$varfile
idxdir = EnvList$idxdir
idxfile = EnvList$idxfile
NBest = as.numeric(EnvList$NBest)
syear_mme = as.numeric(EnvList$syear_mme)
eyear_mme = as.numeric(EnvList$eyear_mme)
eyear_obs = as.numeric(EnvList$eyear_obs)
smonth = as.numeric(EnvList$smonth)
emonth = as.numeric(EnvList$emonth)
precopt = as.logical(EnvList$precopt)
#MaxLTime = 6; syear_trn=1983; eyear_trn=2007; precopt=F
#LTime = as.numeric(substr(mmetype, 1, 1))
#if(LTime == 3) {MaxLag = -6; MaxLTime =3}
#if(LTime == 6) {MaxLag = -8; MaxLTime =6}
LTime = 6
MaxLag = -8; MaxLTime =6
options(stringsAsFactors = FALSE)
outdir = paste(prjdir, "/CIReg/", tyear, "/01_Regression-all", sep="")
SetWorkingDir(outdir)
# Get variable(predictand) names
VarDFile = paste(vardir, "/", varfile, sep="")
var = read.csv(VarDFile, header=T)
varnms_cir = colnames(var)[2:ncol(var)]
# Get index names (predictors)
IdxDFile = paste(idxdir, "/", idxfile, sep="")
idx = read.csv(IdxDFile, header=T)
idxnms = colnames(idx)
idxnms = idxnms[2:length(idxnms)]
varcnt = length(varnms_cir)
idxcnt = length(idxnms)
#moncnt = (emonth - smonth + 1)
for(i in 1:varcnt){
varnm = varnms_cir[i]
for(j in smonth:emonth){
curmon = j
outfile = paste(outdir, "/Reg_", varnm, "_", sprintf("%02d", j), "-SplitTest.csv", sep="")
#if(!file.exists(outfile)){
CR = critical.r(round((eyear_mme - syear_mme + 1)*2/3, digits = 0))
#CR = critical.r(eyear_mme - syear_mme + 1)
MaxCR = CR
repeat{
cnt = 1
#curmon = j
for(k in 1:idxcnt){
idxnm = idxnms[k]
# Need 2 month lag for forecasting next month (data is avilalble at previous month)
for(ii in MaxLag:-2){
#for(ii in MaxLag:0){
lagmon = ii
#data = CIReg.combine.Obs_Cindex(VarDFile, IdxDFile, curmon, varnm, idxnm, lagmon)
data = CIReg.Combine.Multiple.Observed.CIndex(VarDFile, IdxDFile, curmon, varnm, idxnm, lagmon)
data = data[which(as.numeric(substr(data$yearmon,1,4)) >= syear_mme & as.numeric(substr(data$yearmon,1,4)) <= eyear_mme), ]
# Exclude target_year for cross validation
data = data[which(!(as.numeric(substr(data$yearmon,1,4)) == tyear)), ]
data = na.omit(data)
colnames(data) = c("yearmon", "y", "x")
if(nrow(data) >= 15){
SdTCC = 0; MeanTCC = 0
# 2/3 of data
cend = round(nrow(data)*2/3, digits=0)
vend = nrow(data)
vstart = vend - cend + 1
cdata = data[1:cend,]
CTcc = cor(cdata$x, cdata$y, method="pearson")
vdata = data[vstart:vend,]
VTcc = cor(vdata$x, vdata$y, method="pearson")
nrsmpls = (vend-cend) * 3
for(tt in 1:nrsmpls){
rdatanm = paste("rdata", tt, sep="")
RTccnm = paste("RTcc", tt, sep="")
assign(rdatanm, data[sample(1:vend, cend, replace=F),])
assign(RTccnm, cor(get(rdatanm)[c("x")], get(rdatanm)[c("y")], method="pearson"))
}
# Check the correlation sign is on the sameway and greater than critical TCC.
ntsmpls = nrsmpls + 2
tcc = array(0, dim = c(ntsmpls))
mcnt = 0; pcnt = 0
if(CTcc > 0 & CTcc >= MaxCR){
pcnt = pcnt + 1
} else if(CTcc < 0 & abs(CTcc) >= MaxCR) {
mcnt = mcnt + 1
}
tcc[1] = CTcc
if(VTcc > 0 & CTcc >= MaxCR){
pcnt = pcnt + 1
} else if(CTcc < 0 & abs(CTcc) >= MaxCR) {
mcnt = mcnt + 1
}
tcc[2] = VTcc
for(tt in 1:nrsmpls){
RTccnm = paste("RTcc", tt, sep="")
if(get(RTccnm) > 0 & get(RTccnm) >= MaxCR) {
pcnt = pcnt + 1
} else if(get(RTccnm) < 0 & abs(get(RTccnm)) >= MaxCR) {
mcnt = mcnt + 1
}
tcc[tt+2] = get(RTccnm)
}
if(pcnt == ntsmpls | mcnt == ntsmpls){
SdTCC = sd(tcc)
MeanTCC = mean(tcc)
if(cnt == 1){
splitout = cbind(varnm, curmon, idxnm, lagmon, SdTCC, MeanTCC)
cnt = cnt + 1
} else {
splittmp = cbind(varnm, curmon, idxnm, lagmon, SdTCC, MeanTCC)
splitout = rbind(splitout, splittmp)
cnt = cnt + 1
}
}
}
} # End of Loop for lag-time for index
} # End of Index loop
if(cnt > 1){
splitout = as.data.frame(splitout)
colnames(splitout) = c("name", "month", "index", "lag", "SdTCC", "MeanTCC")
splitout = splitout[order(abs(as.numeric(splitout$MeanTCC)), decreasing = T),][1:nrow(splitout),]
#splitout = splitout[order(abs(as.numeric(splitout$MeanTCC)), decreasing = T),][1:MaxLTime,]
bcnt = 1
selcnt = nrow(splitout)
for(k in 1:selcnt){
idxnm = splitout[k, c("index")]
lagmon = as.numeric(splitout[k, c("lag")])
regdata = CIReg.Combine.Multiple.Observed.CIndex(VarDFile, IdxDFile, curmon, varnm, idxnm, lagmon)
data = regdata[which(as.numeric(substr(regdata$yearmon,1,4)) >= syear_mme & as.numeric(substr(regdata$yearmon,1,4)) <= eyear_mme), ]
# if there is at least one of NA in predictor, skip the regression
#if(!any(is.na(data[c(idxnm)]))){
cnt = 1
for(jj in syear_mme:eyear_mme){
curx = data[which(as.numeric(substr(data$yearmon,1,4)) == jj), c(idxnm)]
if(!is.na(curx)){
cvaldata = data[which(!as.numeric(substr(data$yearmon,1,4)) == jj), c("yearmon", varnm, idxnm)]
cvaldata = cvaldata[which(!(as.numeric(substr(cvaldata$yearmon,1,4)) == tyear)), ]
colnames(cvaldata) = c("yearmon", "y", "x")
fit = lm(y ~ x, data=cvaldata)
summary = summary(fit)
coeff = coefficients(fit)
R2 = summary$r.squared
sigma = summary$sigma
pVal = anova(fit)$'Pr(>F)'[1]
cury = coeff[1] + coeff[2]*curx
if((varnm == "prec" | precopt == T) & (!is.na(cury)) & (cury <= 0)) { cury = 0}
} else {
coeff[1:2] = NA; R2 = NA; sigma = NA; pVal = NA; cury = NA
}
if(cnt == 1){
sumout = cbind(varnm, curmon, idxnm, lagmon, jj, t(coeff), R2, sigma, pVal, cury)
cnt = cnt + 1
} else {
sumtmp = cbind(varnm, curmon, idxnm, lagmon, jj, t(coeff), R2, sigma, pVal, cury)
sumout = rbind(sumout, sumtmp)
cnt = cnt + 1
}
} # End of Year Loop for calibration
cvaldata = data
cvaldata = cvaldata[which(!(as.numeric(substr(cvaldata$yearmon,1,4)) == tyear)), ]
colnames(cvaldata) = c("yearmon", "y", "x")
fit = lm(y ~ x, data=cvaldata)
summary = summary(fit)
coeff = coefficients(fit)
R2 = summary$r.squared
sigma = summary$sigma
pVal = anova(fit)$'Pr(>F)'[1]
# Remaining period for verification
data = regdata[which(as.numeric(substr(regdata$yearmon,1,4)) >= (eyear_mme + 1) & as.numeric(substr(regdata$yearmon,1,4)) <= eyear_obs), ]
for(jj in (eyear_mme + 1):eyear_obs){
curx = data[which(as.numeric(substr(data$yearmon,1,4)) == jj), c(idxnm)]
if(!is.na(curx)){
coeff = coefficients(fit)
R2 = summary$r.squared
sigma = summary$sigma
pVal = anova(fit)$'Pr(>F)'[1]
cury = coeff[1] + coeff[2]*curx
if((varnm == "prec" | precopt == T) & (!is.na(cury)) & (cury <= 0)) { cury = 0}
} else {
coeff[1:2] = NA; R2 = NA; sigma = NA; pVal = NA; cury = NA
}
if(cnt == 1){
sumout = cbind(varnm, curmon, idxnm, lagmon, jj, t(coeff), R2, sigma, pVal, cury)
cnt = cnt + 1
} else {
sumtmp = cbind(varnm, curmon, idxnm, lagmon, jj, t(coeff), R2, sigma, pVal, cury)
sumout = rbind(sumout, sumtmp)
cnt = cnt + 1
}
} # End of Year Loop for Cross Validation
# Check TCC for training and verification period
sumout = as.data.frame(sumout)
colnames(sumout) = c("name", "month", "index", "lag", "RmYear", "Intercept", "X", "R2", "Sigma", "p-value", "estimation")
sumout$yearmon = paste(sumout$RmYear, "-", sprintf("%02d", as.numeric(sumout$month)), sep="")
sim = sumout[c("yearmon", "estimation")]
obs = var[c("yearmon", varnm)]
colnames(obs) = c("yearmon", "observed")
outdata = merge(sim, obs, by="yearmon")
trndata = outdata[which(as.numeric(substr(outdata$yearmon,1,4)) >= syear_mme & as.numeric(substr(outdata$yearmon,1,4)) <= eyear_mme), ]
veridata = outdata[which(as.numeric(substr(outdata$yearmon,1,4)) >= (eyear_mme + 1) & as.numeric(substr(outdata$yearmon,1,4)) <= eyear_obs), ]
TrnTcc = cor(as.numeric(trndata$estimation), trndata$observed, method="pearson")
VeriTcc = cor(as.numeric(veridata$estimation), veridata$observed, method="pearson")
if(TrnTcc * VeriTcc > 0){
if(bcnt == 1){
bptr = splitout[k,]
bcnt = bcnt + 1
} else {
btmp = splitout[k,]
bptr = rbind(bptr, btmp)
bcnt = bcnt + 1
}
}
} # End of SelCnt
} else {
bcnt = 1
} # IF (cnt > 1)
if(bcnt >1 | MaxCR < 0){
#if(bcnt > MaxLTime | MaxCR < CR){
if(MaxCR < CR){
cat(sprintf(" CIR: Correlation Coefficient is smaller than Critical CC: Critical=%3.2f, Selected=%3.2f, Year=%d, Month=%s, Variable=%s\n", CR, MaxCR, tyear, curmon, varnm))
}
break
}
MaxCR = MaxCR - 0.05
} # End of Repeat
if(bcnt > 1){
cat(sprintf(" CIR: Analysis has been finished Current Month=%s, Variable=%s\n", curmon, varnm))
outfile = paste(outdir, "/Reg_", varnm, "_", sprintf("%02d", j), "-SplitTest.csv", sep="")
#outfile = paste(outdir, "/Reg_", varnm, "_", sprintf("%02d", j), ".csv", sep="")
write.csv(bptr, outfile, row.names=F)
} else {
cat(sprintf(" CIR: Predictors are not available : Year=%d, Month=%s, Variable=%s\n", tyear, curmon, varnm))
}
} # End of Month loop
} # End of Station Loop
#### Select the best N models for each lag time #################
for(ii in 1:varcnt){
varnm = varnms_cir[ii]
for(jj in 1:MaxLTime){
#for(jj in 0:MaxLTime){
MinLagTime = jj
#### calculate mean of R2 for each combination
cnt = 0
for(j in smonth:emonth){
#regfile = paste(outdir, "/Reg_", varnm, "_", sprintf("%02d", j), ".csv", sep="")
regfile = paste(outdir, "/Reg_", varnm, "_", sprintf("%02d", j), "-SplitTest.csv", sep="")
if(file.exists(regfile)){
regdata = read.csv(regfile, header=T)
cnt = cnt + 1
if(cnt == 1){
rdata = regdata
} else {
temp = regdata
rdata = rbind(rdata, temp)
}
}
} # Month Loop
cnt = 0
for(j in smonth:emonth){
#mondata = data[which(data$month == j & data$lag <= -MinLagTime), ]
if(exists("rdata")){
mondata = rdata[which(rdata$month == j & rdata$lag <= (-MinLagTime+1)), ]
srtdata = mondata[order(mondata$MeanTCC, decreasing=T),]
#if same index is slected, exclude it
srtdata = subset(srtdata, !duplicated(srtdata[,c("index")]))
if(nrow(srtdata)>0){
cnt = cnt + 1
if(cnt == 1){
bestdata = srtdata[1:NBest,]
bestdata$rank = seq(1:NBest)
cnt = cnt + 1
} else {
tempdata = srtdata[1:NBest,]
tempdata$rank = seq(1:NBest)
bestdata = rbind(bestdata, tempdata)
cnt = cnt + 1
}
}
} # File exists
} # Month Loop
if(exists("bestdata")){
bestdata$FcstMode = "Obs"
outfile = paste(outdir, "/", varnm, "-MinLag", sprintf("%02d", MinLagTime), "-Observed.csv", sep="")
write.csv(bestdata, outfile, row.names=F)
cat(sprintf(" CIR: Best Index been selected for Variable=%s, MinLagtime=%s\n", varnm, jj))
}
} # Lag Loop
} # Var Loop
}
#' @export
CIReg.Combine.Forecast.Output <- function(EnvList, varnm, lagtime, MaxLTime){
prjdir = EnvList$prjdir
vardir = EnvList$vardir
varfile = EnvList$varfile
syear_mme = as.numeric(EnvList$syear_mme)
eyear_obs = as.numeric(EnvList$eyear_obs)
syear = syear_mme
eyear = as.numeric(substr(Sys.Date(),1,4))+1
#eyear = eyear_obs
# Max LeadTime
cnt = 1
out = NA
for(i in lagtime:MaxLTime){
mcnt = 1
for(k in syear:eyear){
#bcadir = paste(prjdir, "/CIReg/", mmetype, "/", k, "/02_Best_multi-model", sep="")
bcadir = paste(prjdir, "/CIReg/", k, "/02_Best_multi-model", sep="")
if(file.exists(bcadir)){
srchstr = paste(varnm, sprintf("-MinLag%02d",i), "-Observed.csv", sep="")
OutDFile = list.files(bcadir, pattern = glob2rx(srchstr), full.names = T)
data = read.csv(OutDFile, header=T)
data$yearmon = sprintf("%s-%02d", data$RmYear, data$month)
data = data[which(data$RmYear == k), c("yearmon", "estimation")]
if(mcnt == 1){
mout = data
mcnt = mcnt + 1
} else {
mtmp = data
mout = rbind(mout, mtmp)
}
} # IF file.exists
} # Year Loop
colnames(mout) = c("yearmon", sprintf("Lag%02d",i))
if(cnt == 1){
out = mout
cnt = cnt + 1
} else {
tmp = mout
out = merge(out, tmp, by="yearmon", all=T)
cnt = cnt + 1
}
} # LeadTime Loop
# Overall MME by considering different models and lead times
if(!is.na(out)){
rcnt = nrow(out)
out$MME = NA
for(i in 1:rcnt){
if(!all(is.na(out[i, 2:(ncol(out)-1)]))){
out[i, c("MME")] = mean(as.numeric(out[i, 2:(ncol(out)-1)]), na.rm=T)
}
}
### Get observed data and merge to output df
VarDFile = paste(vardir, "/", varfile, sep="")
obs = read.csv(VarDFile, header=T)
obs = obs[,c("yearmon", varnm)]
colnames(obs) = c("yearmon", "OBS")
out = merge(out, obs, by="yearmon", all=T)
### Calculate Climatology and merge to output df
obs$month = as.numeric(substr(obs$yearmon, 6, 7))
clim = aggregate(OBS ~ month, data = obs, FUN = mean)
out$CLIM = NA
for(i in 1:12){
out[which(as.numeric(substr(out$yearmon,6,7)) == i), c("CLIM")] = clim$OBS[i]
}
out = out[which(as.numeric(substr(out$yearmon,1,4)) >= syear & as.numeric(substr(out$yearmon,1,4)) <= eyear), ]
out = out[order(as.numeric(substr(out$yearmon, 1, 4)), as.numeric(substr(out$yearmon, 6, 7))),]
out = out[!(is.na(out$MME)) | !(is.na(out$OBS)),]
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.