Nothing
optimizer_smooth_model = function(m_select, method = c("nlminb","BFGS","ucminf","Nelder-Mead"), follow.on = FALSE,
itnmax = NULL, printParam = FALSE){
# Information ---------------------------------------------------------------------------------------------------
# example: optimizer_smooth_model(m_select = sd_m_select, method = "nlminb", save_name = "optim_smooth_model_sd")
# output: a list with 'summary' and 'coefficients' (see output details below)
#
# --- this function optimizes the coefficients of the best fitted linear models
# (from the function 'model_selection') via smooth modeling and with maximum likelihood estimation
#
# --- input:
# 1. 'm_select': this input should be a list including 'max_data', 'covariables' and 'models'
# as in the output of the function 'model_selection'
# --- optional input:
# 2. 'method': optimization method(s) for external function 'optimx', this can also be a vector
# possible methods are: 'Nelder-Mead','BFGS','CG','L-BFGS-B','nlm','nlminb','spg',
# 'ucminf','newuoa','bobyqa','nmkb','hjkb','Rcgmin','Rvmmin'
# default (if this input is missing) is 'c("nlminb","BFGS","ucminf","Nelder-Mead")'
# 3. 'follow.on': logical value
# if TRUE, and there are multiple methods, then the last set of coefficients from one method
# is used as the starting set for the next
# default (if this input is missing) is FALSE
# 4. 'itnmax': if provided as a vector of the same length as the length of 'method',
# this gives the maximum number of iterations or function values for the corresponding method
# if a single number is provided, this will be used for all methods
# 5. 'printParam': logical value
# if TRUE, the GEV parameters during the optimization are printed
# this might be useful to check the proper functioning of the optimization
# (shape parameter should be approximately between -0.5 and 0.5)
# default (if this input is missing) is FALSE
# 6. 'save_name': a character string defining the name of the function output to be saved
# default (if this input is missing) is 'optim_smooth_model'
# --- output: a list with
# 1. a summary of the optimization results, including an information message whether the
# optimization was successful or not and which method delivered the best coefficients: 'summary'
# 2. a list with the optimized coefficients: 'coefficients'
# containing: 'loccoeff', 'scalecoeff', 'shapecoeff' and 'all_coeff'
#---------------------------------------------------------------------------------------------------------------#
# Check required packages and input parameters ------------------------------------------------------------------
# # load required package 'SpatialExtremes'
# if (inherits(try(library(SpatialExtremes, warn.conflicts = FALSE, quietly = TRUE),
# silent = TRUE), "try-error")) {
# message("required package 'SpatialExtremes' is not installed yet -- trying to install package")
# install.packages("SpatialExtremes", quiet = TRUE)
# if (inherits(try(library(SpatialExtremes, warn.conflicts = FALSE, quietly = TRUE),
# silent = TRUE), "try-error")) {
# stop("package 'SpatialExtremes' couldn't be installed")
# } else {
# message("package successfully installed and loaded")
# }
# }
#
# # load required package 'optimx'
# if (inherits(try(library(optimx, warn.conflicts = FALSE, quietly = TRUE), silent = TRUE), "try-error")) {
# message("required package 'optimx' is not installed yet -- trying to install package")
# install.packages("optimx", quiet = TRUE)
# if (inherits(try(library(optimx, warn.conflicts = FALSE, quietly = TRUE), silent = TRUE), "try-error")) {
# stop("package 'optimx' couldn't be installed")
# } else {
# message("package successfully installed and loaded")
# }
# }
# 'm_select' should be a list including 'max_data', 'covariables' and
# 'models' as in the output of the function 'model_selection'
if (!(is.list(m_select) && all(c("max_data","covariables","models") %in% names(m_select)))) {
stop("please use the output of the function 'model_selection' as input 'm_select'")
}
# 'method' has to be a vector of possible methods from the external function 'optimx'
if (!(all(method %in% c("Nelder-Mead","BFGS","CG","L-BFGS-B","nlm","nlminb","spg",
"ucminf","newuoa","bobyqa","nmkb","hjkb","Rcgmin","Rvmmin")))) {
if (length(method) == 1) {
stop("the used optimization method is unnown -- please check possible methods of function 'optimx'")
} else {
x = sum(!(method %in% c("Nelder-Mead","BFGS","CG","L-BFGS-B","nlm","nlminb","spg","ucminf",
"newuoa","bobyqa","nmkb","hjkb","Rcgmin","Rvmmin")))
if (x == 1) {
stop(c("one of the used optimization methods is unnown -- please check possible methods",
" of function 'optimx'"))
} else {
stop(sprintf(
c("%i of the used optimization methods are unnown -- please check possible methods",
" of function 'optimx'"),x))
}
}
}
# 'follow.on' has to be a logical value
if (!(is.logical(follow.on))) {
stop(sprintf("'follow.on' has to be TRUE or FALSE -- '%s' is not allowed",follow.on))
}
# 'itnmax' has to be a positive integer and of length 1 or of same length as 'method'
if (!missing(itnmax)) {
if (length(itnmax) == 1){
if ((itnmax < 0) || (itnmax != as.integer(itnmax))) {
stop(sprintf("'itnmax' has to be a positive integer -- '%s' is not allowed",itnmax))
}
if (length(method) > 1) {
message(sprintf(c("since a single number is provided for the 'itnmax' input, this will be used",
" for all the %i chosen methods"),length(method)))
}
} else if (length(itnmax) != length(method)) {
stop(sprintf(
"'itnmax' has to be a vector of either the same length as 'method' (%i) or of length 1",length(method)))
} else {
if (any(itnmax < 0) || any(itnmax != as.integer(itnmax))) {
stop(sprintf("'itnmax' has to be a vector (possibly of length 1) of positive integer(s)"))
}
}
}
# 'printParam' has to be a logical value
if (!(is.logical(printParam))) {
stop(sprintf("'printParam' has to be TRUE or FALSE -- '%s' is not allowed",printParam))
}
#---------------------------------------------------------------------------------------------------------------#
# Perform calculations ------------------------------------------------------------------------------------------
# define max_data, cov and models from input argument 'm_select'
max_data = m_select$max_data
cov = m_select$covariables
models = m_select$models
loc_model = models$loc_model
scale_model = models$scale_model
shape_model = models$shape_model
# define number of stations
n.site = nrow(max_data)
# define total number of coefficients to be optimized for each GEV parameter
n.loc = length(loc_model$coefficients)
n.scale = length(scale_model$coefficients)
n.shape = length(shape_model$coefficients)
# define starting values for the optimization as the coefficients from the model selection
start = c(loc_model$coefficients, scale_model$coefficients, shape_model$coefficients)
# rename coefficients
loc_names = paste0("loc_", c("Int",names(loc_model$coefficients)[-1]), sep="")
scale_names = paste0("scale_", c("Int",names(scale_model$coefficients)[-1]), sep="")
shape_names = paste0("shape_", c("Int",names(shape_model$coefficients)[-1]), sep="")
coeff_names = c(loc_names, scale_names, shape_names)
names(start) = coeff_names
# add intercept column to covariable matrix
C = cbind("(Intercept)" = rep(1,times = n.site), cov)
# calculate the GEV parameters for given coefficients
dsgnmat2Param = function(loccoeff, scalecoeff, shapecoeff) {
locs = as.numeric(C[,names(loc_model$coefficients)]%*%loccoeff)
C_new = cbind(C,"loc" = locs)
scales = as.numeric(abs(C_new[,names(scale_model$coefficients)]%*%scalecoeff))
C_new = cbind(C_new,"scale" = scales)
shapes = as.numeric(C_new[,names(shape_model$coefficients)]%*%shapecoeff)
ans = cbind("locs" = locs,"scales" = scales,"shapes" = shapes)
return(ans)
}
# rescale the starting values for more stable optimization results
parscale = 10^(floor(log10(abs(start))))
start = start/parscale
# define the logarithmized likelihood function
loglikeli = function(p) {
# scale the coefficients back to original scaling
p = p*parscale
# define location, scale and shape coefficients
loccoeff = p[1:n.loc]
scalecoeff = p[(n.loc+1):(n.loc+n.scale)]
shapecoeff = p[(n.loc+n.scale+1):(n.loc+n.scale+n.shape)]
# calculate the GEV parameters according to the given coefficients
gev = dsgnmat2Param(loccoeff,scalecoeff,shapecoeff)
locs = gev[,"locs"]
scales = gev[,"scales"]
shapes = gev[,"shapes"]
if (printParam) {
print(summary(gev))
}
# calculate the log likelihood
sum = 0
for (k in 1:n.site) {
z = as.numeric(max_data[k,!is.na(max_data[k,])])
summand = sum(dgev(z, loc = locs[k], scale = scales[k], shape = shapes[k], log = TRUE))
sum = sum + summand
}
# in order to maximize we take the negative value
return(-sum)
}
# define the 'squared' log likelihood function
loglikeli2 = function(p) {
# scale the coefficients back to original scaling
p = p*parscale
# define location, scale and shape coefficients
loccoeff = p[1:n.loc]
scalecoeff = p[(n.loc+1):(n.loc+n.scale)]
shapecoeff = p[(n.loc+n.scale+1):(n.loc+n.scale+n.shape)]
# calculate the GEV parameters according to the given coefficients
gev = dsgnmat2Param(loccoeff,scalecoeff,shapecoeff)
locs = gev[,"locs"]
scales = gev[,"scales"]
shapes = gev[,"shapes"]
if (printParam) {
print(summary(gev))
}
# calculate the 'squared' log likelihood
sum = 0
for (k in 1:n.site) {
z = as.numeric(max_data[k,!is.na(max_data[k,])])
val = 1 + shapes[k]*((z - locs[k])/scales[k])
n.obs = length(z)
summand = -n.obs*log(scales[k]) - (1 + 1/shapes[k])*sum(1/2*log(val^2)) -
sum(exp((-1/shapes[k])*1/2*log(val^2)))
sum = sum + summand
}
# in order to maximize we take the negative value
return(-sum)
}
# if loglikeli(start) is infinite, try to eliminate the responsible stations
if (is.infinite(loglikeli(start))) {
# define the coefficients
coeff = start*parscale
loccoeff = coeff[1:n.loc]
scalecoeff = coeff[(n.loc+1):(n.loc+n.scale)]
shapecoeff = coeff[(n.loc+n.scale+1):(n.loc+n.scale+n.shape)]
# calculate the GEV parameters according to the given coefficients
gev = dsgnmat2Param(loccoeff,scalecoeff,shapecoeff)
locs = gev[,"locs"]
scales = gev[,"scales"]
shapes = gev[,"shapes"]
# identify the stations that cause the infinity (-> bad stations)
bad = NULL
j = 0
for (k in 1:n.site) {
z = as.numeric(max_data[k,!is.na(max_data[k,])])
# the term val should be positive!
val = 1 + shapes[k]*((z - locs[k])/scales[k])
if (any(val <= 0)) {
j = j + 1
bad[j] = k
}
}
# if only 10 % of all stations cause the problem, drop them
# else try to optimize the 'squared' log likelihood
# best results are most probably obtained with methods 'ucminf' or 'nlminb'
if (length(bad) <= floor(n.site*0.1)) {
max_data = max_data[-bad,]
n.site = nrow(max_data)
cov = cov[-bad,]
C = cbind("(Intercept)" = rep(1,times = n.site), cov)
} else {
warning(c("more than 10 % of all stations caused infinite log likelihood with starting values",
"\n tried to optimize 'squared' log likelihood -- use method 'ucminf' or 'nlminb'"))
message("bad stations:")
print(rownames(max_data[bad,]))
}
}
# if loglikeli(start) is still infinite, there is another problem,
# try to optimize the 'squared' log likelihood
# best results are most probably obtained with method 'ucminf' or 'nlminb'
if (is.infinite(loglikeli(start))) {
optloglik = loglikeli2
} else {
optloglik = loglikeli
}
# perform the actual optimization
# method 'nlminb' has an additional control parameter 'eval.max'
if (any(method == "nlminb") && !follow.on && !missing(itnmax)) {
if (length(method) > 1) {
pos = which(method == "nlminb")
optimizer1 = optimx(start, optloglik, method = method[-pos], itnmax = itnmax,
control = list(follow.on = follow.on, kkt = FALSE))
optimizer2 = optimx(start, optloglik, method = method[pos], itnmax = itnmax,
control = list(follow.on = follow.on, eval.max = itnmax, kkt = FALSE))
optimizer = rbind(optimizer1, optimizer2)
optimizer = optimizer[match(method, rownames(optimizer)),]
} else {
optimizer = optimx(start, optloglik, method = method, itnmax = itnmax,
control = list(follow.on = follow.on, eval.max = itnmax, kkt = FALSE))
}
} else {
optimizer = optimx(start, optloglik, method = method, itnmax = itnmax,
control = list(follow.on = follow.on, kkt = FALSE))
}
# scale the coefficients back to original scaling
for (l in 1:nrow(optimizer)) {
optimizer[l,coeff_names] = optimizer[l,coeff_names]*parscale
optimizer[l,"value"] = -optimizer[l,"value"]
}
# check whether the optimization was successful according to the convergence codes and
# find the method with the maximal log likelihood
if (any(optimizer$convcode %in% c(0,1))) {
ind = which(optimizer[,"value"] == max(optimizer[which(optimizer$convcode %in% c(0,1)),"value"]))
# if there are more than one method with the same (best) result choose the one with a convergence code of 0
# or the last one of them
if (any(optimizer$convcode[ind] == 0)) {
ind = ind[max(which(optimizer$convcode[ind] == 0))]
message = sprintf(
"optimization was successful, coefficients were chosen from method '%s'",row.names(optimizer)[ind])
} else {
ind = max(ind)
warning(sprintf(c("optimization method with the best result (method '%s') reached iteration limit;",
"\n coefficients were chosen from this method, but results might be bad;",
"\n compare with other methods or increase the optional input argument 'itnmax'!"),
row.names(optimizer)[ind]))
message = sprintf(c("optimization method with the best result (method '%s') reached iteration limit;",
"coefficients were chosen from this method, but results might be bad;",
"compare with other methods or increase the optional input argument 'itnmax'!"),
row.names(optimizer)[ind])
}
coeff = as.numeric(optimizer[ind,coeff_names])
if (all(abs(coeff - start*parscale) < 10^-3)) {
warning(
"optimized coefficients and starting values are (almost) the same -- difference smaller than 10^-3")
}
} else {
print(optimizer)
stop(c("optimization was NOT successful -- try other method(s)",
"\n you can use the optional input argument 'method = c(...)'"))
}
names(coeff) = coeff_names
# define the coefficients for each GEV parameter separately
loccoeff = coeff[1:n.loc]
scalecoeff = coeff[(n.loc+1):(n.loc+n.scale)]
shapecoeff = coeff[(n.loc+n.scale+1):(n.loc+n.scale+n.shape)]
names(loccoeff) = names(loc_model$coefficients)
names(scalecoeff) = names(scale_model$coefficients)
names(shapecoeff) = names(shape_model$coefficients)
# summarize optimization result and information message
summary = list(message = message, method = row.names(optimizer)[ind], result = optimizer)
print(summary)
coefficients = list("loccoeff" = loccoeff, "scalecoeff" = scalecoeff,
"shapecoeff" = shapecoeff, "all_coeff" = coeff)
#---------------------------------------------------------------------------------------------------------------#
# Define function output ----------------------------------------------------------------------------------------
ans = list(summary = summary, coefficients = coefficients)
return(ans)
#---------------------------------------------------------------------------------------------------------------#
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.