#' Calculates the Fourier terms for modeling seasonality
#' @param x Coefficients
#' @param K Number of Fourier terms (default = 2)
#' @param m Number of observations per period (default = 365.25*4)
#' @export
#' @return A matrix with 2xK Fourier terms
fourier = function(x, K = 2, m = 365.25*4) {
n = length(x)
idx = 1:n
fourier.x = matrix(nrow = n, ncol = 2*K)
coln = rep(NA, 2*K)
for(k in 1:K) {
fourier.x[,2*k-1] = sin(2*pi*k*idx/m)*x
fourier.x[,2*k] = cos(2*pi*k*idx/m)*x
coln[2*k-1] = paste("sin", k, sep = "")
coln[2*k] = paste("cos", k, sep = "")
}
colnames(fourier.x) = coln
colnames(fourier.x) = paste("intercept", colnames(fourier.x), sep = "_")
return(fourier.x)
}
#' Splitting the data in a traing set and a test set
#' @param years A vector containg the year of each observation
#' @param trainingPeriod A vector defining for which time period the model is trained on (default = 2006:2014)
#' @param testPeriod Defining for which time period the model is tested on (default = 2015)
#' @export
#' @return A list containing the training set and the test set
split.data = function(years, trainingPeriod = 2006:2014, testPeriod = 2015) {
training.test = list()
training.test[[1]] = which(years %in% trainingPeriod)
training.test[[2]] = which(years %in% testPeriod)
return(training.test)
}
#' Compute quantiles of the Box Cox distribution
#' @param obs The probability
#' @param mean The mean of the distribution
#' @param sd The standard deviation of the distribution
#' @param lambda The lambda used in the Box Cox transformation
#' @export
#' @return The quantiles of the Box Cox distribution
qBoxCox = function(p, mean, sd, lambda) {
if(round(lambda,2) < 0) {
q = (lambda*(sd*qnorm(p)+mean)+1)^(1/lambda)
} else if(round(lambda,2) == 0) {
q = exp(mean + sd*qnorm(p))
} else { # lambda > 0
T = (1/lambda + mean)/sd
Vp = 1 - (1-p)*pnorm(T)
q = (lambda*(sd*qnorm(Vp)+mean)+1)^(1/lambda)
}
return(q)
}
#' Compute the Inverse Box Cox tranformation
#' @param obst Values to be detransformed
#' @param lambda The lambda used in the Box Cox transformation
#' @export
#' @return The quantiles of the Box Cox distribution
InvBoxCox = function(obst, lambda){
if(is.na(lambda)) {
obs = NA
} else {
if(lambda == 0) {
obs = exp(obst)
} else {
obs = (lambda*obst + 1)^(1/lambda)
}
}
return(obs)
}
#' Compute the Inverse Box Cox tranformation
#' @param n Number of samples to simulate
#' @param mean Mean of truncated normal distribution
#' @param sd Standard deviation of truncated normal distribution
#' @param a Lower truncation limit
#' @param b Upper truncation limit
#' @export
#' @return Samples from the truncated normal distribution
rtnorm = function(n, mean, sd, a, b) {
q = c(a,b)
p = pnorm(q, mean, sd)
nt = round(n/(p[2] - p[1]))
x = rnorm(nt, mean, sd)
x = x[x > a & x <b]
return(x)
}
#' Generate samples from the predictive distribution
#' @param mean Mean of untruncated normal distribution
#' @param sd Standard deviation of untruncated normal distribution
#' @param lambda paramter in Box Cox transformation
#' @export
#' @return Samples from the predictive distribution
rpred = function(n, mean, sd, lambda) {
if(lambda < -1e-6) {
a = -Inf
b = -1/lambda
xbc = rtnorm(n, mean, sd, a, b)
} else if(lambda > 1e-6) {
a = -1/lambda
b = Inf
xbc = rtnorm(n, mean, sd, a, b)
} else {
xbc = rnorm(n, mean, sd)
}
x = InvBoxCox(xbc, lambda)
return(x)
}
#' Compute density of the Box Cox distribution
#' @param obs The probability
#' @param mean The mean of the distribution
#' @param sd The standard deviation of the distribution
#' @param lambda The lambda used in the Box Cox transformation
#' @param log Logical value to switch between log or not
#' @export
#' @return The density values of the Box Cox distribution
dboxcox = function(obs, mean, sd, lambda, log = FALSE) {
if(log == FALSE) {
val = rep(0, length(obs))
if(round(lambda,2) < 0) {
val[obs>0] = obs[obs>0]^(lambda-1)*sd^(-1)*dnorm(((obs[obs>0]^lambda-1)/lambda-mean)/sd)*pnorm((-1/lambda-mean)/sd)^(-1)
} else if(round(lambda,2) == 0) {
val = dlnorm(obs, mean, sd, log = FALSE)
} else {
val[obs>0] = obs[obs>0]^(lambda-1)*sd^(-1)*dnorm(((obs[obs>0]^lambda-1)/lambda-mean)/sd)*pnorm((1/lambda+mean)/sd)^(-1)
}
} else {
val = rep(-Inf, length(obs))
if(round(lambda,2) < 0) {
val[obs>0] = (lambda-1)*log(obs[obs>0]) - log(sd) + dnorm(((obs[obs>0]^lambda-1)/lambda-mean )/sd, log = TRUE) - log(pnorm( (-1/lambda - mean)/sd))
} else if(round(lambda,2) == 0) {
val = dlnorm(obs, mean, sd, log = TRUE)
} else {
val[obs>0] = (lambda-1)*log(obs[obs>0]) - log(sd) + dnorm(((obs[obs>0]^lambda-1)/lambda-mean)/sd, log = TRUE) - log(pnorm((1/lambda+mean)/sd))
}
}
return(val)
}
#' Compute the mean absolute error
#' @param obs Observations
#' @param mean The mean of the fitted distribution
#' @param sd The standard deviation of the fitted distribution
#' @param lambda The lambda used in the Box Cox transformation
#' @param print2screen Logical parameter to print results to screen
#' @export
#' @return The mean absolute error of the Box Cox distribution
maeEst = function(obs,mean, sd, lambda, print2screen = TRUE) {
median <- qBoxCox(0.5, mean, sd, lambda)
mae <- mean(abs(obs - median))
if(print2screen) cat("\n The mean absolute error is:",mae,"\n")
return(mae)
}
#' Compute the log score
#' @param obs Observations
#' @param mean The mean of the fitted distribution
#' @param sd The standard deviation of the fitted distribution
#' @param lambda The lambda used in the Box Cox transformation
#' @param print2screen Logical parameter to print results to screen
#' @export
#' @return The -log score. The lower value, the better.
logsEst = function(obs,
mean,
sd,
lambda,
log = TRUE,
print2screen = TRUE) {
logsD = dboxcox(obs = obs,
mean = mean,
sd = sd,
lambda = lambda,
log = log)
logs = mean(logsD)
if(print2screen) cat("\n The log score is:",logs,"\n")
return(logs)
}
#' Calculates the PIT values
#' @param obs.bc Box Cox transformed observations
#' @param mean The mean of the fitted distribution
#' @param sd The standard deviation of the fitted distribution
#' @param lambda The lambda used in the Box Cox transformation
#' @export
#' @return The PIT values normalized to the zero one intervall
ppredbc = function(obs, mean, sd, lambda) {
obs.bc <- BoxCoxLambdaKnown(obs, lambda)
if(round(lambda,2) < 0) {
p = pnorm(obs.bc, mean = mean, sd = sd)/pnorm(-1/lambda, mean = mean, sd = sd)
} else if(round(lambda,2) == 0) {
p = pnorm(obs.bc, mean = mean, sd = sd)
} else { # lambda > 0
p = pnorm(obs.bc, mean = mean, sd = sd)/(1-pnorm(-1/lambda, mean = mean, sd = sd))
}
# if(round(lambda,2) < 0) {
# p = pnorm((g.x-mean)/sd) / pnorm((-1/lambda - mean)/sd)
# } else if(round(lambda,2) == 0) {
# p = pnorm((g.x-mean)/sd)
# } else { # lambda > 0
# p = (pnorm((g.x-mean)/sd) - pnorm((-1/lambda - mean)/sd)) / pnorm((1/lambda + mean)/sd)
# }
return(p)
}
#' Computes the reliability index
#' @param obs.bc Box Cox transformed observations
#' @param mean The mean of the fitted distribution
#' @param sd The standard deviation of the fitted distribution
#' @param lambda The lambda used in the Box Cox transformation
#' @param n.bins Number of bins used
#' @param print2screen Logical parameter to print results to screen
#' @export
#' @return The reliability index (RIDX)
ribxEst = function(obs,
mean,
sd,
lambda,
n.bins = 20,
print2screen = TRUE) {
U = ppredbc(obs = obs,
mean = mean,
sd = sd,
lambda = lambda)
U = U[ !is.na(U) ]
n = length(U)
if(n > 0) {
seps = seq(0, 1, length.out = n.bins+1)
probs = rep(NA, n.bins)
for(i in 1:n.bins) {
probs[i] = sum( U >= seps[i] & U <= seps[i+1] )
}
probs = probs/n
unif.prob = rep(1/n.bins, n.bins)
RI = mean(abs(probs - unif.prob)*100)
} else {
RI = NA
}
if(print2screen) cat("\n The reliability index is:",RI,"\n")
return(RI)
}
#' Computes the Continuous Ranked Probability Score (CRPS)
#' @param obs.bc Box Cox transformed observations
#' @param mean The mean of the fitted distribution
#' @param sd The standard deviation of the fitted distribution
#' @param lambda The lambda used in the Box Cox transformation
#' @param print2screen Logical parameter to turn on and off print to screen
#' @export
#' @return The Continuous Ranked Probability Score (CRPS)
crpsEst = function(obs,
mean,
sd,
lambda,
Nsamples = 10000,
print2screen = TRUE) {
nobs = length(mean)
crps = rep(NA,nobs)
for(i in 1:nobs) {
EXz = 0
EXXm = 0
# Generate samples from Box Cox distribution
x = rpred(Nsamples, mean[i], sd, lambda)
EXz = mean(abs(x - obs))
# Generate samples from Box Cox distribution
x = rpred(Nsamples, mean[i], sd, lambda)
nsamp = 0.0;
for(j in 0:(Nsamples/2-1)) {
EXXm = EXXm + abs(x[2*j+1] - x[2*j+2])
nsamp = nsamp + 1.0
}
EXXm = EXXm/nsamp
crps[i] = EXz - 0.5*EXXm;
}
crps = mean(crps)
if(print2screen) cat("\n The continuous ranked probability score is:",crps,"\n")
return(crps);
}
#' Compute the root mean squared error
#' @param obs Observations
#' @param mean The mean of the fitted distribution
#' @param sd The standard deviation of the fitted distribution
#' @param lambda The lambda for the Box-Cox distribution
#' @param Nsamples The number of samples to generate from the predictive distribution.
#' @param print2screen Logical parameter to turn on and off print to screen
#' @export
#' @return The root mean squared error of the predictive distribution
rmseEst = function(obs,
mean,
sd,
lambda,
Nsamples = 10000,
print2screen = TRUE){
nobs = length(mean)
# For every observation and prediction (temporal position) we must first estimate the expectation of the Box Cox distribution by simulation (lines 83 - 84). Next compute squared difference to observation. Som over position, finally take the quare root of the mean.
RMSE = 0
for(i in 1:nobs) {
x = rpred(Nsamples,
mean[i],
sd,
lambda)
Ex = mean(x)
RMSE = RMSE + (obs[i] - Ex)^2
}
RMSE = sqrt(RMSE/nobs)
if(print2screen) cat("\n The root mean squared error is:",RMSE,"\n")
return(RMSE)
}
#' Summary function to calculate a table of performance measures
#' @param obs Observations
#' @param mean The mean of the fitted distribution
#' @param sd The standard deviation of the fitted distribution
#' @param lambda The lambda for the Box-Cox distribution
#' @param Nsamples The number of samples to generate from the predictive distribution.
#' @param print2screen Logical parameter to print results to screen
#' @export
#' @return The root mean squared error of the predictive distribution
perfMeasures = function(obs,
mean,
sd,
lambda,
Nsamples = 10000,
print2screen = TRUE){
df = data.frame("Performance measure" = NA,"Value" = NA)
if(print2screen){
cat("\n =======================================================\n")
cat("\n Summary table of the performance measures\n")
cat(" -----------------------------------------\n")
}
df[1,1] = "MAE"
df[1,2] = maeEst(obs = obs,
mean = mean,
sd = sd,
lambda = lambda,
print2screen = FALSE)
if(print2screen) cat("\n The mean absolute error is:",df[1,2],"\n")
df[2,1] = "RMSE"
df[2,2] = rmseEst(obs = obs,
mean = mean,
sd = sd,
lambda = lambda,
Nsamples = 10000,
print2screen = FALSE)
if(print2screen) cat(" The root mean squared error is:",df[2,2],"\n")
df[3,1] = "Logs"
df[3,2] = logsEst(obs = obs,
mean = mean,
sd = sd,
lambda = lambda,
log = TRUE,
print2screen = FALSE)
if(print2screen) cat(" The log score is:",df[3,2],"\n")
df[4,1] = "RIBX"
df[4,2] = ribxEst(obs = obs,
mean = mean,
sd = sd,
lambda = lambda,
print2screen = FALSE)
if(print2screen) cat(" The reliability index is:",df[4,2],"\n")
df[5,1] = "CRPS"
df[5,2] = crpsEst(obs = obs,
mean = mean,
sd = sd,
lambda = lambda,
print2screen = FALSE)
if(print2screen) cat(" The continuous ranked probability score is:",df[5,2],"\n")
if(print2screen) cat("\n =======================================================\n")
return(df)
}
#' Performs Box-Cox transformation of the data. The transformation parameter (lambda) is chosen to minimize deviation from the normal distribution (minumum sum of squared skewness and squared kurtosis)
#' @param obs The observations to be transformed
#' @return The observations transformed
#' @return The estimated lambda
#' @export
BoxCoxLambda = function(obs) {
lambda = seq(0.0, 1.0, 0.1)
obs.trans = matrix(nrow = length(lambda), ncol = length(obs))
normdev = rep(NA, length(lambda)) # Holds the amount of deviation from the normal distribution
for(i in 1:length(lambda)) {
if(lambda[i] == 0) {
obs.trans[i,] = log(obs)
} else {
obs.trans[i,] = (obs^lambda[i]-1)/lambda[i]
}
normdev[i] = moments::skewness(obs.trans[i,],na.rm = TRUE)^2 + 0.25*(moments::kurtosis(obs.trans[i,],na.rm = TRUE))^2
}
return(list(data=obs.trans[which.min(normdev),],lambda = lambda[which.min(normdev)]))
}
#' Plot the predictive distribution
#' @param obs The observations to be transformed
#' @param t.period The time period for plotting
#' @param mean The mean value of the distribution
#' @param sd The sd value of the distribution
#' @param lambda The estimated lambda of the distribution
#' @param graphDev Logical variable to create a OS specific graphic device
#' @return A figure showing the predictive distribution
#' @export
plotPred = function(obs,t.period, mean, sd,lambda, graphD=FALSE) {
upper <- qBoxCox(0.95, mean = mean, sd = sd, lambda = lambda)
lower <- qBoxCox(0.05, mean = mean, sd = sd, lambda = lambda)
median <- qBoxCox(0.5, mean, sd, lambda)
t.upper <- upper[t.period]
t.lower <- lower[t.period]
if(graphD) graphDev(width = 7,height = 5)
plot(t.period, obs[t.period], type="l",
xlab="Time point in test period", ylab="SWH", ylim=c(0,15),
main="")
polygon(c(t.period, rev(t.period), t.period[1]),
c(t.lower, rev(t.upper), t.lower[1]), col="gray90", border="NA")
lines(t.period, obs[t.period])
lines(t.period, median[t.period], col="gray50", lty=2)
legend("topright", lty=c(1,2,1), lwd=c(1,1,4), col=c("black", "gray50", "gray90"),
legend=c("Observation", "Predictive median", "90% prediction interval"))
}
#' Plot random predictive trajectories for a selection of time points
#' @param obs The observations to be transformed
#' @param t.period The time period for plotting
#' @param mean The mean value of the distribution
#' @param sd The sd value of the distribution
#' @param lambda The estimated lambda of the distribution
#' @param n.random The number of random predictive trajectories
#' @param graphDev Logical variable to create a OS specific graphic device
#' @return A figure showing the random predictive trajectories
#' @export
rPlotPred = function(obs,
t.period,
mean,
sd,
lambda,
n.random,
graphD = FALSE) {
t.ind = t.period
n.period = length(t.ind)
random.q = array(NA, dim=c(n.random,n.period))
for(i in 1:n.period) random.q[,i] = qBoxCox(runif(n.random), mean[t.ind[i]], sd, lambda)
if(graphD) graphDev(width = 7,height = 5)
plot(t.ind, random.q[1,], type="l", col="gray50",
xlab="Time point in test period", ylab="SWH", ylim=c(5,12),
main="Random predictive trajectories")
for(i in 2:n.random) lines(t.ind, random.q[i,], col="gray50")
lines(t.ind, obs[t.ind], col="black", lwd=2)
}
#' Plot random correlation for a selection of time points
#' @param obs The observations to be transformed
#' @param t.period The time period for plotting
#' @param mean The mean value of the distribution
#' @param sd The sd value of the distribution
#' @param lambda The estimated lambda of the distribution
#' @param n.random The number of random predictive trajectories
#' @param training.test The training and the test period
#' @param SWHobs SWH observations for a particular location in the test period
#' @param graphDev Logical variable to create a OS specific graphic device
#' @return A figure showing the random correlations
#' @export
rCorr = function(obs,
t.period,
mean,
sd,
lambda,
n.random,
training.test,
SWHobs,
graphD = FALSE) {
n.period = length(t.period)
t.ind = t.period
random.q = array(NA, dim=c(n.random,n.period))
for(i in 1:n.period) random.q[,i] = qBoxCox(runif(n.random), pred.mean[t.ind[i]], pred.sd, pred.lambda)
sample.q <- array(NA, dim=c(n.random,n.period))
for(i in 1:n.period) sample.q[,i] <- rank(random.q[,i])
sample.q
nTest <- length(training.test[[2]])
h.ind <- training.test[[2]][(nTest-(n.random*n.period-1)):nTest]
hist.obs <- t(array(SWHobs[h.ind], dim=c(n.period,n.random)))
hist.q <- array(NA, dim=c(n.random,n.period))
for(i in 1:n.period) hist.q[,i] = rank(hist.obs[,i])
hist.q
sort.q <- random.q
for(i in 1:n.period) sort.q[,i] = sort(random.q[,i])[hist.q[,i]]
if (graphD) graphDev(width = 7,height = 5)
plot(t.ind, sort.q[1,], type="l", col="gray50",
xlab="Time point in test period", ylab="SWH", ylim=c(5,12),main="")
for(i in 2:n.random) lines(t.ind, sort.q[i,], col="gray50")
lines(t.ind, obs[t.ind], col="black", lwd=2)
}
#' Prephare graphical device for a given OS
#' @param width Width of the graphical window
#' @param height Height of the grapchical window
#' @return The an OS dependent graphical window
#' @export
graphDev = function(width = 7,height = 5) {
system = Sys.info()
if(system['sysname']=="Windows"){
windows(width = 7,height = 5)
}
if(system['sysname']=="Linux"){
X11(width = 7,height = 5)
}
if(system['sysname']=="Darwin"){
quartz("",width = 7,height = 5)
}
}
#' Compute PIT values of the Box Cox distribution
#' @param obs The observations to be transformed
#' @param mean The mean of the predictive distribution
#' @param sd The standard deviation of the predictive distribution
#' @param lambda The estimated Box Cox lambda parameter used in the transformation of the predictive distribution
#' @param plothist Set to TRUE if plotting the histogram of the PIT values
#' @param graphDev Logical variable to create a OS specific graphic device
#' @return The PIT values
#' @export
pBoxCox = function(x, mean, sd, lambda,plothist = TRUE,graphD = FALSE) {
g.x <- BoxCoxLambdaKnown(x, lambda)
if(round(lambda,2) < 0) {
p = pnorm((g.x-mean)/sd) / pnorm((-1/lambda - mean)/sd)
} else if(round(lambda,2) == 0) {
p = pnorm((g.x-mean)/sd)
} else { # lambda > 0
p = (pnorm((g.x-mean)/sd) - pnorm((-1/lambda - mean)/sd)) / pnorm((1/lambda + mean)/sd)
}
if(graphD) graphDev(width = 7,height = 5)
hist(p, freq=FALSE, nclass=10, col="gray", xlab="PIT value",main = "PIT histogram")
abline(a=1, b=0, lty=2,col = "red")
return(p)
}
#' Performs Box-Cox transformation with a known lambda parameter
#' @param obs The observations to be transformed
#' @param lambda The lambda parameter to be used in the transformation
#' @return The observations transformed
#' @export
BoxCoxLambdaKnown = function(obs, lambda) {
if(round(lambda,3) == 0) {
obs = log(obs)
} else {
obs = (obs^lambda - 1)/lambda
}
return(obs)
}
#' Calculates the AR lags
#' @param n.training Size of training data
#' @param n.test Size of training data
#' @param maxlag Order of AR process
#' @return AR terms for various lags used in model fitting
#' @export
makeARterms = function(SWH.bc.standard.training,
SWH.bc.fourier.training,
SWH.bc.standard.test,
SWH.bc.fourier.test,
n.training = 100,
n.test = 10,
maxlag = 10) {
SWH.bc.ar.training.names = rep("",maxlag)
SWH.bc.fourier.ar.training.names = rep("",maxlag)
SWH.bc.ar.test.names = rep("",maxlag)
SWH.bc.fourier.ar.test.names = rep("",maxlag)
SWH.bc.ar.training.list = list()
SWH.bc.fourier.ar.training.list = list()
SWH.bc.ar.test.list = list()
SWH.bc.fourier.ar.test.list = list()
for(lag in 1:maxlag){
#assign(paste("SWH.bc.ar.training.m",lag,sep = ""),c(rep(SWH.bc.standard.training[1],lag), SWH.bc.standard.training[1:(n.training-lag)]))
SWH.bc.ar.training.names[lag] = paste("SWH.bc.ar.training.m",lag,sep = "")
SWH.bc.ar.training.list[[lag]] = c(rep(SWH.bc.standard.training[1],lag), SWH.bc.standard.training[1:(n.training-lag)])
#assign(paste("SWH.bc.fourier.ar.training.m",lag,sep=""),rbind(matrix(rep(SWH.bc.fourier.training[1,],lag), nrow = lag, byrow = TRUE), SWH.bc.fourier.training[1:(n.training-lag),]))
SWH.bc.fourier.ar.training.names[lag] = paste("SWH.bc.fourier.ar.training.m",lag,sep = "")
SWH.bc.fourier.ar.training.list[[lag]] = rbind(matrix(rep(SWH.bc.fourier.training[1,],lag), nrow = lag, byrow = TRUE), SWH.bc.fourier.training[1:(n.training-lag),])
#assign(paste("SWH.bc.ar.test.m",lag,sep = ""), c(rep(SWH.bc.standard.test[1],lag), SWH.bc.standard.test[1:(n.test-lag)]))
SWH.bc.ar.test.names[lag] = paste("SWH.bc.ar.test.m",lag,sep = "")
SWH.bc.ar.test.list[[lag]] = c(rep(SWH.bc.standard.test[1],lag), SWH.bc.standard.test[1:(n.test-lag)])
#assign(paste("SWH.bc.fourier.ar.test.m",lag,sep = ""), rbind(matrix(rep(SWH.bc.fourier.test[1,],lag), nrow = lag, byrow = TRUE), SWH.bc.fourier.test[1:(n.test-lag),]))
SWH.bc.fourier.ar.test.names[lag] = paste("SWH.bc.fourier.ar.test.m",lag,sep = "")
SWH.bc.fourier.ar.test.list[[lag]] = rbind(matrix(rep(SWH.bc.fourier.test[1,],lag), nrow = lag, byrow = TRUE), SWH.bc.fourier.test[1:(n.test-lag),])
}
names(SWH.bc.ar.training.list) = SWH.bc.ar.training.names
names(SWH.bc.fourier.ar.training.list) = SWH.bc.fourier.ar.training.names
names(SWH.bc.ar.test.list) = SWH.bc.ar.test.names
names(SWH.bc.fourier.ar.test.list) = SWH.bc.fourier.ar.test.names
ARterms = list()
ARterms$SWH.bc.ar.training.list = SWH.bc.ar.training.list
ARterms$SWH.bc.fourier.ar.training.list = SWH.bc.fourier.ar.training.list
ARterms$SWH.bc.ar.test.list = SWH.bc.ar.test.list
ARterms$SWH.bc.fourier.ar.test.list = SWH.bc.fourier.ar.test.list
return(ARterms)
}
#' Training the model and generating the predictive distributions
#' @param SWH The SWH data
#' @param SLP The SLP data
#' @param SLP.grad The SLP gradient data
#' @param latCell Define the latitude cell of interest (default = 4)
#' @param longCell Define the longitude cell of interest (default = 4)
#' @param training.test The training and the test data
#' @param neig Define the number of spatial neighbours (default = 2)
#' @param na.thresh Define how many missing values is "good enough" (default = 500)
#' @param latSWH A vector containing the SWH latitudes
#' @param lonSWH A vector containing the SWH longitudes
#' @param latSLP A vector containing the SLP latitudes
#' @param lonSLP A vector containing the SLO longitudes
#' @param intercept.fourier Seasonal Fourier coefficients
#' @param maxlag Define the maximum order of AR processes to be constructed
#' @return A list of predictive means, predictive spread and lambda for Box-Cox transformation
#' @export
getPreddistr = function(SWH = NA,
SLP = NA,
SLP.grad = NA,
latCell = 4,
longCell = 4,
neig = 2,
na.thresh = 500,
latSWH = NA,
lonSWH = NA,
latSLP = NA,
longSLP = NA,
intercept.fourier = NA,
maxlag = 10) {
pred.mean = rep(NA, length(training.test[[2]]))
pred.sd = NA
pred.lambda = NA
## Divide in training and test set
idx.training = training.test[[1]]
idx.test = training.test[[2]]
idx.rank = 0
## Make sure covariates are from the correct location
idx.longSLP = which(longitudeSLP == longitudeSWH[longCell])
idx.latSLP = which(latitudeSLP == latitudeSWH[latCell])
## Only continue with analysis if sufficient available data
if(sum(is.na(SWH[longCell, latCell,])) < na.thresh &
sum(is.na( SLP[idx.longSLP, idx.latSLP, ])) < na.thresh &
sum(is.na( SLP.grad[idx.longSLP, idx.latSLP, ])) < na.thresh) {
## Remove missing data
not.NA = which(!is.na(SWH[longCell, latCell,]) &
!is.na( SLP[idx.longSLP, idx.latSLP, ]) &
!is.na( SLP.grad[idx.longSLP, idx.latSLP, ]))
idx.training = idx.training[idx.training %in% not.NA]
n.training = length(idx.training)
idx.test = idx.test[idx.test %in% not.NA]
n.test = length(idx.test)
## Estimate lambda and Box-Cox transform SWH in training period
SWH.bc.dummy.training = BoxCoxLambda(SWH[longCell, latCell,idx.training])
SWH.bc.lambda.training = SWH.bc.dummy.training$lambda
SWH.bc.training = SWH.bc.dummy.training$data
## Box-Cox transform SWH in test period with same lambda
SWH.bc.test = BoxCoxLambdaKnown(SWH[longCell, latCell,idx.test], SWH.bc.lambda.training)
## Estimate lambda and Box-Cox transform SLP.grad in training period
SLP.grad.bc.dummy.training = BoxCoxLambda(SLP.grad[idx.longSLP, idx.latSLP,idx.training])
SLP.grad.bc.lambda.training = SLP.grad.bc.dummy.training$lambda
SLP.grad.bc.training = SLP.grad.bc.dummy.training$data
## Box-Cox transform SLP.grad in test period with same lambda
SLP.grad.bc.test = BoxCoxLambdaKnown(SLP.grad[idx.longSLP, idx.latSLP,idx.test], SLP.grad.bc.lambda.training)
## Standardizing the variables
SWH.bc.mean.training = mean(SWH.bc.training)
SWH.bc.sd.training = sd(SWH.bc.training)
SWH.bc.standard.training = (SWH.bc.training - SWH.bc.mean.training)/
SWH.bc.sd.training
SWH.bc.standard.test = (SWH.bc.test - SWH.bc.mean.training)/
SWH.bc.sd.training
SLP.mean.training = mean( SLP[idx.longSLP, idx.latSLP, idx.training] )
SLP.sd.training = sd( SLP[idx.longSLP, idx.latSLP, idx.training] )
SLP.standard.training = (SLP[idx.longSLP, idx.latSLP, idx.training] - SLP.mean.training)/
SLP.sd.training
SLP.standard.test = (SLP[idx.longSLP, idx.latSLP, idx.test] - SLP.mean.training)/
SLP.sd.training
SLP.grad.bc.mean.training = mean(SLP.grad.bc.training)
SLP.grad.bc.sd.training = sd(SLP.grad.bc.training)
SLP.grad.bc.standard.training = (SLP.grad.bc.training - SLP.grad.bc.mean.training)/
SLP.grad.bc.sd.training
SLP.grad.bc.standard.test = (SLP.grad.bc.test - SLP.grad.bc.mean.training)/
SLP.grad.bc.sd.training
## Compute Fourier periodics of the explanatory variables
fourier.training = intercept.fourier[idx.training,]
fourier.test = intercept.fourier[idx.test,]
SLP.fourier.training = fourier.training*SLP.standard.training
SLP.fourier.test = fourier.test*SLP.standard.test
SLP.grad.bc.fourier.training = fourier.training*SLP.grad.bc.standard.training
SLP.grad.bc.fourier.test = fourier.test*SLP.grad.bc.standard.test
SWH.bc.fourier.training = fourier.training*SWH.bc.standard.training
SWH.bc.fourier.test = fourier.test*SWH.bc.standard.test
# Create AR terms for model selection
ARterms = makeARterms(SWH.bc.standard.training = SWH.bc.standard.training,
SWH.bc.fourier.training = SWH.bc.fourier.training,
SWH.bc.standard.test = SWH.bc.standard.test,
SWH.bc.fourier.test = SWH.bc.fourier.test,
n.training = n.training,
n.test = n.test,
maxlag = maxlag)
SWH.bc.ar.training.list = ARterms$SWH.bc.ar.training.list
SWH.bc.fourier.ar.training.list = ARterms$SWH.bc.fourier.ar.training.list
SWH.bc.ar.test.list = ARterms$SWH.bc.ar.test.list
SWH.bc.fourier.ar.test.list = ARterms$SWH.bc.fourier.ar.test.list
#Extract variables from the lists
list2env(setNames(SWH.bc.ar.training.list,paste0("SWH.bc.ar.training.m",seq_along(SWH.bc.ar.training.list))), envir = parent.frame())
list2env(setNames(SWH.bc.fourier.ar.training.list,paste0("SWH.bc.fourier.ar.training.m",seq_along(SWH.bc.fourier.ar.training.list))), envir = parent.frame())
list2env(setNames(SWH.bc.ar.test.list,paste0("SWH.bc.ar.test.m",seq_along(SWH.bc.ar.test.list))), envir = parent.frame())
list2env(setNames(SWH.bc.fourier.ar.test.list,paste0("SWH.bc.fourier.ar.test.m",seq_along(SWH.bc.fourier.ar.test.list))), envir = parent.frame())
## Vanem&Walker spatial model LASSO #####
## Compute mean, max og min of the neighborhood of the current point
SLP.spatmax = apply(SLP[ max(idx.longSLP-neig,1):min(idx.longSLP+neig, dim(SLP)[1]),
max(idx.latSLP-neig,1):min(idx.latSLP+neig, dim(SLP)[2]),], 3, max, na.rm = TRUE)
SLP.spatmin = apply(SLP[ max(idx.longSLP-neig,1):min(idx.longSLP+neig, dim(SLP)[1]),
max(idx.latSLP-neig,1):min(idx.latSLP+neig, dim(SLP)[2]),], 3, min, na.rm = T)
SLP.spatmean = apply(SLP[ max(idx.longSLP-neig,1):min(idx.longSLP+neig, dim(SLP)[1]),
max(idx.latSLP-neig,1):min(idx.latSLP+neig, dim(SLP)[2]),], 3, mean, na.rm = TRUE)
SLP.grad.spatmax = apply(SLP.grad[ max(idx.longSLP-neig,1):min(idx.longSLP+neig, dim(SLP)[1]),
max(idx.latSLP-neig,1):min(idx.latSLP+neig, dim(SLP)[2]),], 3, max, na.rm = TRUE )
SLP.grad.spatmin = apply(SLP.grad[ max(idx.longSLP-neig,1):min(idx.longSLP+neig, dim(SLP)[1]),
max(idx.latSLP-neig,1):min(idx.latSLP+neig, dim(SLP)[2]),], 3, min, na.rm = TRUE)
SLP.grad.spatmean = apply(SLP.grad[ max(idx.longSLP-neig,1):min(idx.longSLP+neig, dim(SLP)[1]),
max(idx.latSLP-neig,1):min(idx.latSLP+neig, dim(SLP)[2]),], 3, mean, na.rm = TRUE)
## Split in test and training
SLP.spatmax.training = SLP.spatmax[idx.training]
SLP.spatmin.training = SLP.spatmin[idx.training]
SLP.spatmean.training = SLP.spatmean[idx.training]
SLP.grad.spatmax.training = SLP.grad.spatmax[idx.training]
SLP.grad.spatmin.training = SLP.grad.spatmin[idx.training]
SLP.grad.spatmean.training = SLP.grad.spatmean[idx.training]
SLP.spatmax.test = SLP.spatmax[idx.test]
SLP.spatmin.test = SLP.spatmin[idx.test]
SLP.spatmean.test = SLP.spatmean[idx.test]
SLP.grad.spatmax.test = SLP.grad.spatmax[idx.test]
SLP.grad.spatmin.test = SLP.grad.spatmin[idx.test]
SLP.grad.spatmean.test = SLP.grad.spatmean[idx.test]
## Standardize SLP spatial variables
SLP.spatmax.mean.training = mean(SLP.spatmax.training)
SLP.spatmax.sd.training = sd(SLP.spatmax.training)
SLP.spatmax.standard.training = (SLP.spatmax.training - SLP.spatmax.mean.training)/
SLP.spatmax.sd.training
SLP.spatmax.standard.test = (SLP.spatmax.test - SLP.spatmax.mean.training)/
SLP.spatmax.sd.training
SLP.spatmin.mean.training = mean(SLP.spatmin.training)
SLP.spatmin.sd.training = sd(SLP.spatmin.training)
SLP.spatmin.standard.training = (SLP.spatmin.training - SLP.spatmin.mean.training)/
SLP.spatmin.sd.training
SLP.spatmin.standard.test = (SLP.spatmin.test - SLP.spatmin.mean.training)/
SLP.spatmin.sd.training
SLP.spatmean.mean.training = mean(SLP.spatmean.training)
SLP.spatmean.sd.training = sd(SLP.spatmean.training)
SLP.spatmean.standard.training = (SLP.spatmean.training - SLP.spatmean.mean.training)/
SLP.spatmean.sd.training
SLP.spatmean.standard.test = (SLP.spatmean.test - SLP.spatmean.mean.training)/
SLP.spatmean.sd.training
## Box-Cox transform and standardize SLP.grad
SLP.grad.spatmax.bc.dummy = BoxCoxLambda(SLP.grad.spatmax.training)
SLP.grad.spatmax.bc.lambda.training = SLP.grad.spatmax.bc.dummy$lambda
SLP.grad.spatmax.bc.training = SLP.grad.spatmax.bc.dummy$data
SLP.grad.spatmax.bc.test = BoxCoxLambdaKnown(SLP.grad.spatmax.test, SLP.grad.spatmax.bc.lambda.training)
SLP.grad.spatmax.bc.mean.training = mean(SLP.grad.spatmax.bc.training)
SLP.grad.spatmax.bc.sd.training = sd(SLP.grad.spatmax.bc.training)
SLP.grad.spatmax.bc.standard.training = (SLP.grad.spatmax.bc.training - SLP.grad.spatmax.bc.mean.training)/
SLP.grad.spatmax.bc.sd.training
SLP.grad.spatmax.bc.standard.test = (SLP.grad.spatmax.bc.test - SLP.grad.spatmax.bc.mean.training)/
SLP.grad.spatmax.bc.sd.training
SLP.grad.spatmin.bc.dummy = BoxCoxLambda(SLP.grad.spatmin.training)
SLP.grad.spatmin.bc.lambda.training = SLP.grad.spatmin.bc.dummy$lambda
SLP.grad.spatmin.bc.training = SLP.grad.spatmin.bc.dummy$data
SLP.grad.spatmin.bc.test = BoxCoxLambdaKnown(SLP.grad.spatmin.test, SLP.grad.spatmin.bc.lambda.training)
SLP.grad.spatmin.bc.mean.training = mean(SLP.grad.spatmin.bc.training)
SLP.grad.spatmin.bc.sd.training = sd(SLP.grad.spatmin.bc.training)
SLP.grad.spatmin.bc.standard.training = (SLP.grad.spatmin.bc.training - SLP.grad.spatmin.bc.mean.training)/
SLP.grad.spatmin.bc.sd.training
SLP.grad.spatmin.bc.standard.test = (SLP.grad.spatmin.bc.test - SLP.grad.spatmin.bc.mean.training)/
SLP.grad.spatmin.bc.sd.training
SLP.grad.spatmean.bc.dummy = BoxCoxLambda(SLP.grad.spatmean.training)
SLP.grad.spatmean.bc.lambda.training = SLP.grad.spatmean.bc.dummy$lambda
SLP.grad.spatmean.bc.training = SLP.grad.spatmean.bc.dummy$data
SLP.grad.spatmean.bc.test = BoxCoxLambdaKnown(SLP.grad.spatmean.test, SLP.grad.spatmean.bc.lambda.training)
SLP.grad.spatmean.bc.mean.training = mean(SLP.grad.spatmean.bc.training)
SLP.grad.spatmean.bc.sd.training = sd(SLP.grad.spatmean.bc.training)
SLP.grad.spatmean.bc.standard.training = (SLP.grad.spatmean.bc.training - SLP.grad.spatmean.bc.mean.training)/
SLP.grad.spatmean.bc.sd.training
SLP.grad.spatmean.bc.standard.test = (SLP.grad.spatmean.bc.test - SLP.grad.spatmean.bc.mean.training)/
SLP.grad.spatmean.bc.sd.training
## Compute Fourier predictors for seasonally varying coefficients
SLP.spatmax.fourier.training = fourier.training*SLP.spatmax.standard.training
SLP.spatmax.fourier.test = fourier.test*SLP.spatmax.standard.test
SLP.spatmin.fourier.training = fourier.training*SLP.spatmin.standard.training
SLP.spatmin.fourier.test = fourier.test*SLP.spatmin.standard.test
SLP.spatmean.fourier.training = fourier.training*SLP.spatmean.standard.training
SLP.spatmean.fourier.test = fourier.test*SLP.spatmean.standard.test
SLP.grad.spatmax.bc.fourier.training = fourier.training*SLP.grad.spatmax.bc.standard.training
SLP.grad.spatmax.bc.fourier.test = fourier.test*SLP.grad.spatmax.bc.standard.test
SLP.grad.spatmin.bc.fourier.training = fourier.training*SLP.grad.spatmin.bc.standard.training
SLP.grad.spatmin.bc.fourier.test = fourier.test*SLP.grad.spatmin.bc.standard.test
SLP.grad.spatmean.bc.fourier.training = fourier.training*SLP.grad.spatmean.bc.standard.training
SLP.grad.spatmean.bc.fourier.test = fourier.test*SLP.grad.spatmean.bc.standard.test
## Create full set of spatial predictors for the training and test sets
predictors.training.spatial = as.data.frame(cbind(SLP.spatmax.standard.training,
SLP.spatmin.standard.training,
SLP.spatmean.standard.training,
SLP.grad.spatmax.bc.standard.training,
SLP.grad.spatmin.bc.standard.training,
SLP.grad.spatmean.bc.standard.training,
SLP.spatmax.fourier.training,
SLP.spatmin.fourier.training,
SLP.spatmean.fourier.training,
SLP.grad.spatmax.bc.fourier.training,
SLP.grad.spatmin.bc.fourier.training,
SLP.grad.spatmean.bc.fourier.training))
predictors.test.spatial = as.data.frame(cbind(SLP.spatmax.standard.test,
SLP.spatmin.standard.test,
SLP.spatmean.standard.test,
SLP.grad.spatmax.bc.standard.test,
SLP.grad.spatmin.bc.standard.test,
SLP.grad.spatmean.bc.standard.test,
SLP.spatmax.fourier.test,
SLP.spatmin.fourier.test,
SLP.spatmean.fourier.test,
SLP.grad.spatmax.bc.fourier.test,
SLP.grad.spatmin.bc.fourier.test,
SLP.grad.spatmean.bc.fourier.test))
## Create set of local predictors with AR order 5
predictors.training = as.data.frame( cbind(predictors.training.spatial,
intercept.fourier[idx.training,],
SLP.standard.training,
SLP.grad.bc.standard.training,
SLP.fourier.training,
SLP.grad.bc.fourier.training,
SWH.bc.ar.training.m1,
SWH.bc.fourier.ar.training.m1,
SWH.bc.ar.training.m2,
SWH.bc.fourier.ar.training.m2,
SWH.bc.ar.training.m3,
SWH.bc.fourier.ar.training.m3,
SWH.bc.ar.training.m4,
SWH.bc.fourier.ar.training.m4,
SWH.bc.ar.training.m5,
SWH.bc.fourier.ar.training.m5) )
predictors.test = as.data.frame( cbind(predictors.test.spatial,
intercept.fourier[idx.test,],
SLP.standard.test,
SLP.grad.bc.standard.test,
SLP.fourier.test,
SLP.grad.bc.fourier.test,
SWH.bc.ar.test.m1,
SWH.bc.fourier.ar.test.m1,
SWH.bc.ar.test.m2,
SWH.bc.fourier.ar.test.m2,
SWH.bc.ar.test.m3,
SWH.bc.fourier.ar.test.m3,
SWH.bc.ar.test.m4,
SWH.bc.fourier.ar.test.m4,
SWH.bc.ar.test.m5,
SWH.bc.fourier.ar.test.m5) )
colnames(predictors.training) = paste("V", 1:dim(predictors.training)[2], sep = "")
colnames(predictors.test) = paste("V", 1:dim(predictors.test)[2], sep = "")
cat("Total number of potential predictors used in LASSO selection:",dim(predictors.training)[2], "\n")
## Start with LASSO selection
cv = glmnet::cv.glmnet(as.matrix(predictors.training), SWH.bc.standard.training, family = "gaussian", alpha = 1, nfold = 10)
lasso = glmnet::glmnet(as.matrix(predictors.training), SWH.bc.standard.training, alpha = 1)
minindex = which.min(abs(lasso$lambda - cv$lambda.min))
beta = lasso$beta[,minindex]
predictors.training = predictors.training[, which( abs(beta) > 1e-6 )]
predictors.test = predictors.test[, which( abs(beta) > 1e-6 )]
cat("Number of predictors selected by LASSO:",dim(predictors.training)[2], "\n")
cat("Fitting linear model with the variables from the LASSO selection...\n")
fit <- lm(SWH.bc.standard.training ~ ., data = predictors.training)
fits = summary(fit)
## predict
SWH.bc.standard.pred <- predict(object = fit, newdata = predictors.test) #lm
SWH.bc.standard.pred.se <- fits$sigma
## Descale and detransform before computing different raknkings
SWH.bc.pred = SWH.bc.standard.pred*SWH.bc.sd.training + SWH.bc.mean.training
SWH.bc.pred.se = SWH.bc.standard.pred.se*SWH.bc.sd.training
SWH.pred = InvBoxCox(SWH.bc.pred, SWH.bc.lambda.training) #detransform
pred.mean[idx.test - length(training.test[[1]])] = SWH.bc.pred
pred.sd = SWH.bc.pred.se
pred.lambda = SWH.bc.lambda.training
}
return(list(pred.SWH=SWH.pred, pred.mean=pred.mean, pred.sd=pred.sd, pred.lambda=pred.lambda, fits=fits))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.