#######################################################################################
################### master script part 4 - variogram scores #########################
#######################################################################################
# This script compares the downweigh sample covariance matrix and then PCAing it based on their variogram scores.
#
#
# Files generated:
#
# Directories: PCACov, GeoStat, ECC
# Data files (* in 1:12): PCA/CovRes_mon*.RData, PCA/diff_var_by_PC_m*.RData, PCA/var_sc_by_PC.RData, PCA/var_sc_by_PC_no_marg_corr.RData
# GeoStat/diff_var_geoStat_m*.RData, GeoStat/variogram_exp_m*.RData, GeoStat/var_sc.RData,
# ECC/ECC_fc.RData, ECC/diff_var_ECC_m*.RData, ECC/var_sc.RData
#
# Plots: mean_variogram_scores.pdf
#
# Requires previous run of 03.master.var.est.R
# with the same value of name_abbr as below.
##### setting up ######
rm(list = ls())
setwd("~/NR/SFE")
options(max.print = 1e3)
library(PostProcessing)
library(data.table)
name_abbr = "NAO_2"
save_dir = paste0("~/PostClimDataNoBackup/SFE/Derived/", name_abbr,"/")
load(file = paste0(save_dir,"setup.RData"))
########################################
for_res_cov = function(dt,
weight_mat,
Y = 1985:2000,
M = 1:12,
save_dir = "~/PostClimDataNoBackup/SFE/PCACov/",
ens_size = 9,
version = "wrt_ens_mean")
{
land_ids <- which(dt[, is.na(Ens_bar) | is.na(SST_bar)])
if(!identical(land_ids,integer(0)))
{
dt = dt[-land_ids,]
}
for(mon in M)
{
print(paste0("month =",mon))
dt_SC = copy(dt[month == mon & year %in% Y,])
if(version == "wrt_ens_mean")
{
dt_SC[,"Res":= SST_bar - trc(Ens_bar + Bias_Est)]
cor_factor = 1 / sqrt( length(Y))
sqrt_cov_mat = as.matrix(na.omit(dt_SC[,Res]))
res_cov_sqrt = cor_factor * matrix(sqrt_cov_mat,ncol = length(Y) )
Sigma = res_cov_sqrt %*% t(res_cov_sqrt)
Sigma = weight_mat * Sigma
}
save(Sigma, file = paste0(save_dir,"CovRes_mon",mon,".RData"))
rm(dt_SC)
}
}
###############################
########### PCA ###############
###############################
# to skip this section, run instead:
# PCA_dir = paste0(save_dir, "PCA/")
# load(file = paste0(PCA_dir,"variogram_scores.RData"))
##### setting up ######
PCA_dir = paste0(save_dir,"PCA_dw/")
dir.create(PCA_dir, showWarnings = FALSE)
training_years = DT[!(year %in% validation_years),unique(year)]
# get weight matrix:
L = 2000
NA_rows = DT[,which(is.na(SST_bar) | is.na(SST_hat) | is.na(SD_hat))]
DT_NA_free = DT[-NA_rows,]
sp <- sp::SpatialPoints(cbind(x=DT_NA_free[YM == min(YM), Lon],
y=DT_NA_free[YM == min(YM), Lat]),
proj4string = sp::CRS("+proj=longlat +datum=WGS84"))
Dist <- sp::spDists(sp, longlat = TRUE)
weights = GneitingWeightFct(Dist, L=L)
weight_mat = matrix(weights, nrow = dim(Dist)[1])
# get downweighted covariance matrices:
for_res_cov(Y = training_years,
dt = DT,
weight_mat = weight_mat,
save_dir = PCA_dir,
ens_size = ens_size,
version = "wrt_mean")
PCs = c(10,20,30,40,50,100,150,200,250) # range of PCs to test
#### variogram score computation: ####
for(m in months)
{
# setup: get principal components and marginal variances for the given month m:
print(paste0("m = ",m))
load(file = paste0(PCA_dir,"CovRes_mon",m,".RData"))
PCA = irlba::irlba(Sigma, nv = max(PCs))
assign(paste0("PCA",m), PCA)
land_ids = which(DT[year == min(year) & month == min(month),is.na(Ens_bar) | is.na(SST_bar)])
PCA_DT = DT[year == min(year) & month == min(month),][-land_ids,.(Lon,Lat,grid_id)]
for(d in 1:max(PCs))
{
PCA_DT [,paste0("PC",d) := PCA$d[d]*PCA$u[,d]]
}
# also get marginal variances
variances = list()
d = 1
vec = PCA_DT[,eval(parse(text = paste0("PC",d)))]
variances[[d]] = vec^2
if(max(PCs)>1)
{
for(d in 2:max(PCs)){
vec = PCA_DT[,eval(parse(text = paste0("PC",d)))]
variances[[d]] = variances[[d-1]] + vec^2
}
}
names(variances) = paste0("var",1:max(PCs))
PCA_DT = data.table(PCA_DT,rbindlist(list(variances)))
# now move to getting the variogram scores:
# without marginal correction:
dummy_fct = function(y)
{
var_sc_PCA_old(m, y, DT, PCA = PCA, PCA_DT = PCA_DT, dvec = PCs, ens_size = ens_size,
file_name = "var_sc_by_PC_dw_before",
marginal_correction = FALSE,
cov_dir = PCA_dir, save_dir = PCA_dir)
}
parallel::mclapply(validation_years,dummy_fct,mc.cores = length(validation_years))
# with marginal correction:
dummy_fct = function(y)
{
var_sc_PCA_old(m, y, DT, PCA = PCA, PCA_DT = PCA_DT, dvec = PCs, ens_size = ens_size,
file_name = "var_sc_by_PC_dw_before",
marginal_correction = TRUE,
cov_dir = PCA_dir, save_dir = PCA_dir)
}
parallel::mclapply(validation_years,dummy_fct,mc.cores = length(validation_years))
}
###### combine: ######
scores_nmc = list()
k=0
for(m in months){
for(y in validation_years)
{k=k+1
load(file = paste0(PCA_dir,"var_sc_by_PC_dw_before_nmc_m",m,"_y",y,".RData"))
scores_nmc[[k]] = scores
}
}
scores_nmc = rbindlist(scores_nmc)
scores_mc = list()
k=0
for(m in months){
for(y in validation_years)
{k=k+1
load(file = paste0(PCA_dir,"var_sc_by_PC_dw_before_m",m,"_y",y,".RData"))
scores_mc[[k]] = scores
}
}
scores_mc = rbindlist(scores_mc)
pdf(file = paste0(plot_dir,"vs_by_dist_Gn_wf_PCA50_wrtm.pdf"))
plot(score_by_dist[,.(dist,mean_sc_mc)],type = "l",
main = "variogram score for comp. supp. cov. fct.",
xlab = "Parameter L", ylab = "mean vs")
dev.off()
print(paste0("Minimal variogram score is achieved for ",mean_sc[mean_sc == min(mean_sc),d][1],
" principal components with a score of ",mean_sc[,min(mean_sc)],
". Without marginal correction it is achieved for ",mean_sc_nmc[mean_sc == min(mean_sc),d][1],
" principal components with a score of ",mean_sc_nmc[,min(mean_sc)],"."))
# save
mean_sc = scores_mc[,mean_sc := mean(sc), by = d][,.(d,mean_sc)]
mean_sc = unique(mean_sc)
mean_sc_nmc = scores_nmc[,mean_sc := mean(sc), by = d][,.(d,mean_sc)]
mean_sc_nmc = unique(mean_sc_nmc)
save(scores_mc,scores_nmc,mean_sc,mean_sc_nmc,file = paste0(PCA_dir,"variogram_scores",weight_name_addition,".RData"))
#########################################
########### Geostationary ###############
#########################################
# to skip this section, run instead:
# geostat_dir = paste0(save_dir, "GeoStat/")
# load(file = paste0(geostat_dir,"variogram_scores.RData"))
###########################################
geostat_dir = paste0(save_dir, "GeoStat/")
dir.create(geostat_dir, showWarnings = FALSE)
geostationary_training(dt = DT, save_dir = geostat_dir,m = months)
for(m in months)
{
# setup: get principal components and marginal variances for the given month m:
print(paste0("m = ",m))
load(paste0(geostat_dir,"variogram_exp_m",m,".RData"))
# variogram scores without marginal correction:
dummy_fct = function(y)
{
var_sc_geoStat_new( DT, m, y, Mod = Mod, Dist = Dist,
weighted = weight_a_minute, weight_fct = weight_fct,
file_name = paste0("var_sc",weight_name_addition),
save_dir = geostat_dir, data_dir = geostat_dir,
mar_var_cor = FALSE)
}
parallel::mclapply(validation_years,dummy_fct,mc.cores = length(validation_years))
# with marginal correction:
dummy_fct = function(y)
{
var_sc_geoStat_newnew( DT,m, y, Mod = Mod, Dist = Dist,
weighted = weight_a_minute, weight_fct = weight_fct,
file_name = paste0("var_sc",weight_name_addition),
save_dir = geostat_dir, data_dir = geostat_dir,
mar_var_cor = TRUE)
}
parallel::mclapply(validation_years,dummy_fct,mc.cores = length(validation_years))
}
####### combine #########
scores_geostat_nmc = list()
k=0
for(m in months){
for(y in validation_years)
{k=k+1
load(file = paste0(geostat_dir,"var_sc",weight_name_addition,"_nmc_m",m,"_y",y,".RData"))
scores_geostat_nmc[[k]] = scores
}
}
scores_geostat_nmc = rbindlist(scores_geostat_nmc)
scores_geostat_mc = list()
k=0
for(m in months){
for(y in validation_years)
{k=k+1
load(file = paste0(geostat_dir,"var_sc",weight_name_addition,"_m",m,"_y",y,".RData"))
scores_geostat_mc[[k]] = scores
}
}
scores_geostat_mc = rbindlist(scores_geostat_mc)
mean_geostat_sc = scores_geostat_mc[,mean_sc := mean(sc)][,.(mean_sc)]
mean_geostat_sc = unique(mean_geostat_sc)
mean_geostat_sc_nmc = scores_geostat_nmc[,mean_sc := mean(sc)][,.(mean_sc)]
mean_geostat_sc_nmc = unique(mean_geostat_sc_nmc)
# save
save(scores_geostat_mc,scores_geostat_nmc,mean_geostat_sc,mean_geostat_sc_nmc,file = paste0(geostat_dir,"variogram_scores",weight_name_addition,".RData"))
#########################################
################ ECC ####################
#########################################
# to skip this section, run instead:
# ECC_dir = paste0(save_dir,"ECC/")
# load(file = paste0(ECC_dir,"var_sc.RData"))
# sc_ECC = sc
#########################################
ECC_dir = paste0(save_dir,"ECC/")
dir.create(ECC_dir, showWarnings = FALSE)
forecast_ECC(dt = DT[year %in% validation_years],save_dir = ECC_dir)
setup_var_sc_ECC(eval_years = validation_years,
months = months,
weighted = weight_a_minute, weight_fct = weight_fct,
ens_size = ens_size,
file_name = paste0("diff_var_ECC",weight_name_addition,"m"),
data_dir = ECC_dir, save_dir = ECC_dir)
var_sc_ECC(months = months,eval_years = validation_years,save_dir = ECC_dir,
data_name = paste0("diff_var_ECC",weight_name_addition,"m"),file_name = paste0("var_sc",weight_name_addition,".RData"))
load(file = paste0(ECC_dir,"var_sc",weight_name_addition,".RData"))
sc_ECC = sc
#########################################
########### plotting scores: ##############
###########################################
# we leave out ECC, because its score is just too bad:
print(paste0("variogram score for ECC is ",sc_ECC[,mean(sc)]))
pdf(paste0(plot_dir,"/mean_variogram_scores_dw_before.pdf"))
rr = range(c(scores_mc[,mean(sc),by = d][,2],scores_nmc[,mean(sc),by = d][,2],mean_geostat_sc[,mean_sc],mean_geostat_sc_nmc[,mean_sc]))
#### with marginal correction ####
plot(x = mean_sc[[1]],
y = mean_sc[[2]],
ylim = rr,
type = "b",
col = "blue",
main = paste0("mean variogram scores"),
xlab = "number of principal components",
ylab = "mean score")
#---- add minima: -----
#abline(h = min(mean_sc[[2]]), lty = "dashed", col = adjustcolor("blue",alpha = .5))
opt_num_PCs = mean_sc[,which.min(mean_sc)]
points(x = mean_sc[[1]][opt_num_PCs],
y = mean_sc[[2]][opt_num_PCs],
col = "blue",
bg = "blue",
pch = 21)
#### without marginal correction ####
lines(x = mean_sc_nmc[[1]],
y = mean_sc_nmc[[2]],
type = "b",
col = "darkgreen")
#---- add minima: -----
#abline(h = min(mean_sc_nmc[[2]]), lty = "dashed", col = adjustcolor("darkgreen",alpha = .5))
opt_num_PCs = mean_sc_nmc[,which.min(mean_sc)]
points(x = mean_sc_nmc[[1]][opt_num_PCs],
y = mean_sc_nmc[[2]][opt_num_PCs],
col = "darkgreen",
bg = "darkgreen",
pch = 21)
# --- add geostat value: ----
abline(h = mean_geostat_sc[,mean_sc], lty = "dashed", col = adjustcolor("darkred"))
abline(h = mean_geostat_sc_nmc[,mean_sc], lty = "dashed", col = adjustcolor("black"))
#abline(h = sc_ECC[,mean(sc)], lty = "dashed", col = adjustcolor("pink"))
legend("topright",legend = c("PCA, m.c.v.","PCA","geostat"),
col = c("blue","darkgreen","darkred"),lty = c(1,1,2))
# legend("topright",legend = c("PCA, m.c.v.","PCA","geostat, m.c.v.","geostat","ECC"),
# col = c("blue","darkgreen","darkred","black","pink"),lty = c(1,1,2,2,2))
dev.off()
##################
save.image(file = paste0(save_dir,"setup.RData"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.