#' Negative log-likelihood for marginal threshold exceedances
#'
#' This function handles missing values in the data matrix that create
#' marginal only non-missingness. These are not accounted for in the pairwise
#' likelihood routine and so are handled outside of the C++ code in a uniform
#' fashion for each of the sub-models
#' @importFrom "stats" "na.omit" "quantile"
#' @param margdat an \code{m} by \code{d} matrix containing marginal exceedances, one per row
#' @param u a \code{d}-vector of thresholds
#' @param scale vector of scale parameters
#' @param shape vector of shape parameters
#' @return the negative log-likelihood
margexc <- function(margdat, u, scale, shape){
sum(sapply(1:ncol(margdat), function(i){
s <- na.omit(margdat[,i])
if(length(s)==0){
return(0)
} else{
-mev::gpd.ll(par=c(scale[i], shape[i]), dat=s-u[i])
}
}))
}
#' Wrapper for the scaled Dirichlet extremal model
#'
#' This function takes as argument the parameters of the scaled Dirichlet bivariate distribution
#' and passes them to the C++ function.
#'
#' @useDynLib ExtLiouv
#' @importFrom Rcpp evalCpp
#' @param optpar vector of parameters over which to optimize
#' @param fixedpar vector of parameters to keep constant
#' @param wfixed is vector of int with indices of data points
#' @param numpar vector integer giving the length of each parameter (scale, shape, ...)
#' @param transform boolean indicating whether to exp (expit for rho) the parameters for optimization
#' @param dat data matrix
#' @param thid logical matrix indicating whether entries exceed the marginal threshold
#' @param N total sample size
#' @param lambda vector of marginal rates of exceedance
#' @param u vector of thresholds
#' @return the negative log-likelihood value
#' @keywords internal
nlloptdir <- function(optpar, fixedpar, wfixed, numpar, transform=FALSE, dat, thid, N, lambda, u, datmarg) {
if((length(optpar)+length(fixedpar))!=sum(numpar)){
stop("Invalid arguments passed to function `nlloptdir'; please check")
}
#Transform the parameters if they are to be optimized (otherwise, don't)
if(1 %in% wfixed){ mscale = fixedpar[1:numpar[1]]; fixedpar <- fixedpar[-(1:numpar[1])]
} else { mscale = optpar[1:numpar[1]]; optpar <- optpar[-(1:numpar[1])]
if(transform){mscale <- exp(mscale)}
}
if(2 %in% wfixed){ mshape = fixedpar[1:numpar[2]]; fixedpar <- fixedpar[-(1:numpar[2])]
} else { mshape = optpar[1:numpar[2]]; optpar <- optpar[-(1:numpar[2])] }
if(3 %in% wfixed){ malpha = fixedpar[1:numpar[3]]; fixedpar <- fixedpar[-(1:numpar[3])]
} else { malpha = optpar[1:numpar[3]]; optpar <- optpar[-(1:numpar[3])]
if(transform){malpha <- exp(malpha)}
}
if(4 %in% wfixed){ mrho = fixedpar[1:numpar[4]]; fixedpar <- fixedpar[-(1:numpar[4])]
} else { mrho = optpar[1:numpar[4]]; optpar <- optpar[-(1:numpar[4])]
if(transform){
mrho <- exp(mrho)#/(1+exp(mrho)) #@TODO check whether rho must lie in (0,1)
}
}
#Case of same shape or scale is handled in Marginal transform
nllmvsdir(y=dat, thid=thid, N=N, lambda=lambda, u=u, alpha=malpha, rho=mrho, scale=mscale, shape=mshape) +
margexc(datmarg, u, mscale, mshape)
}
#' Wrapper for the scaled negative extremal Dirichlet model
#'
#' This function takes as argument the parameters of the scaled Dirichlet bivariate distribution
#' and passes them to the C++ function.
#'
#' @param optpar vector of parameters over which to optimize
#' @param fixedpar vector of parameters to keep constant
#' @param wfixed is vector of int with indices of data points
#' @param numpar vector integer giving the length of each parameter (scale, shape, ...)
#' @param transform boolean indicating whether to exponentiate the parameters for optimization
#' @param dat data matrix
#' @param thid logical matrix indicating whether entries exceed the marginal threshold
#' @param N total sample size
#' @param lambda vector of marginal rates of exceedance
#' @param u vector of thresholds
#' @return the negative log-likelihood value
#' @keywords internal
nlloptnegdir <- function(optpar, fixedpar, wfixed, numpar, transform=FALSE, dat, thid, N, lambda, u, datmarg) {
if((length(optpar)+length(fixedpar))!=sum(numpar)){
stop("Invalid arguments passed to function `nlloptnegdir'; please check")
}
#Transform the parameters if they are to be optimized (otherwise, don't)
if(1 %in% wfixed){ mscale = fixedpar[1:numpar[1]]; fixedpar <- fixedpar[-(1:numpar[1])]
} else { mscale = optpar[1:numpar[1]]; optpar <- optpar[-(1:numpar[1])]
if(transform){mscale <- exp(mscale)}
}
if(2 %in% wfixed){ mshape = fixedpar[1:numpar[2]]; fixedpar <- fixedpar[-(1:numpar[2])]
} else { mshape = optpar[1:numpar[2]]; optpar <- optpar[-(1:numpar[2])] }
if(3 %in% wfixed){ malpha = fixedpar[1:numpar[3]]; fixedpar <- fixedpar[-(1:numpar[3])]
} else { malpha = optpar[1:numpar[3]]; optpar <- optpar[-(1:numpar[3])]
if(transform){ malpha <- exp(malpha)}
}
if(4 %in% wfixed){ mrho = fixedpar[1:numpar[4]]; fixedpar <- fixedpar[-(1:numpar[4])]
} else { mrho = optpar[1:numpar[4]]; optpar <- optpar[-(1:numpar[4])]
if(transform){ mrho <- exp(mrho)
}
}
#Handle case restriction here
if(mrho >= min(malpha)){return(1e10)}
#Case of same shape or scale is handled in Marginal transform
nllmvsnegdir(y=dat, thid=thid, N=N, lambda=lambda, u=u, alpha=malpha, rho=mrho, scale=mscale, shape=mshape) +
margexc(datmarg, u, mscale, mshape)
}
#' Test for conditional negative semi-definitiveness
#' Is the matrix of rank n-1 satisfying sum-to-zero constraints conditionally negative definite?
#' @param X matrix to test for CNSD
#' @param tol a tolerance factor for the lowest eigenvalue
#' @return a logical indicating whether the matrix \code{X} is CNSD.
.is.CNSD <- function (X, tol = 1e-08){
n <- nrow(X)
P <- diag(n)
if(n>2){
diag(P[,-1]) <- -1
} else if(n==2){#error with one dimensional case...
P[1,-1] <- -1
}
Xhat <- P %*% X %*% t(P)
eigs <- eigen(Xhat[-n, -n], symmetric=TRUE, only.values=TRUE)$values
!eigs[1] > tol
}
#' Create a conditionally negative definite matrix from Cholesky decomposition
#'
#' This function takes a vector, fills a Cholesky upper triangular matrix and transforms it to a
#' positive definite matrix, then into a symmetric CNSD matrix with zero diagonal for the HR model
#'
#' @param vec vector of parameters for the Cholesky decomposition, with log-diagonal entries
#' @param dime dimension of the model
#' @return a CNSD matrix
#' @keywords internal
chol2Lambda <- function(vec, dime, EoW=TRUE){#vector of parameter and dimension of the model
if(!is.integer(dime <- as.integer(dime))){
stop("Invalid dimension for the Husler-Reiss model; argument must be an integer")
}
L = matrix(0, nrow=dime, ncol=dime)
#diagonal elements are on the log-scale to ensure uniqueness of Cholesky decompo
diag(L) <- exp(vec[1:dime])
#other elements stored per row
if(dime>1){
L[upper.tri(L, diag=FALSE)] <- vec[-(1:dime)]
}
#create Sigma
Sigma <- t(L)%*%L
#create Lambda
Sigma2Lambda(Sigma)
}
#' Create a correlation matrix from Cholesky decomposition
#'
#' This function takes a vector, fills a Cholesky upper triangular matrix and transforms it to a
#' positive definite matrix, then adds a diagonal of ones for the extremal Student model
#'
#' @param vec vector of parameters for the Cholesky decomposition
#' (upper diagonal matrix, excluding the diagonal)
#' @param dime dimension of the model
#' @return a correlation matrix
#' @keywords internal
chol2cor <- function(vec, dime){#vector of parameter and dimension of the model
if(!is.integer(dime <- as.integer(dime))){
stop("Invalid dimension for the extremal student model; argument must be an integer")
}
L = matrix(0, nrow=dime, ncol=dime)
if(dime>1){
L[upper.tri(L, diag=FALSE)] <- vec[-(1:dime)]
}
diag(L) <- 1 #Fix the diagonal entries, must be positive anyway to ensure uniqueness
for(i in 1:dime){
L[,i] <- L[,i]/sum(L[,i]^2)
}
#create Corr
t(L)%*%L
}
#' Transform covariance matrix to conditionally negative definite matrix
#'
#' @param Sigma symmetric positive definite matrix
#' @return a CND matrix
#' @keywords internal
Sigma2Lambda <- function(Sigma){
rbind(cbind(0, t(diag(Sigma))),cbind(diag(Sigma),
rep(1,nrow(Sigma))%*%t(diag(Sigma))+diag(Sigma)%*%t(rep(1, nrow(Sigma)))-2*Sigma))/4
}
#' Transform a symmetric conditionally negative definite matrix into a covariance matrix
#' @param Lambda symmetric conditionally negative definite matrix of parameters for the Husler-Reiss model
#' @return a symmetric positive definite matrix
#' @keywords internal
Lambda2Sigma <- function(Lambda){
2*(matrix(Lambda[,1],nrow(Lambda),nrow(Lambda))+t(matrix(Lambda[1,],nrow(Lambda),nrow(Lambda)))-Lambda)[-1,-1]
}
#' Wrapper for the Husler-Reiss model
#'
#' This function takes as argument the parameters of the Husler-Reiss bivariate distribution
#' and passes them to the C++ function.
#'
#' @inheritParams nlloptdir
#' @return the negative log-likelihood value
#' @keywords internal
nllopthr <- function(optpar, fixedpar, wfixed, numpar, transform=FALSE, dat, thid, N, lambda, u, datmarg) {
if((length(optpar)+length(fixedpar))!=sum(numpar)){
stop("Invalid arguments passed to function `nllopthr'; please check")
}
#Transform the parameters if they are to be optimized (otherwise, don't)
if(1 %in% wfixed){ mscale <- fixedpar[1:numpar[1]]; fixedpar <- fixedpar[-(1:numpar[1])]
} else { mscale <- optpar[1:numpar[1]]; optpar <- optpar[-(1:numpar[1])]
if(transform){mscale <- exp(mscale)}
}
if(2 %in% wfixed){ mshape <- fixedpar[1:numpar[2]]; fixedpar <- fixedpar[-(1:numpar[2])]
} else { mshape <- optpar[1:numpar[2]]; optpar <- optpar[-(1:numpar[2])] }
if(3 %in% wfixed){ mLambda = fixedpar;
#If fixed, there is one check once for the parameters and the latter simply need to be filled
lambdamat <- matrix(mLambda, ncol(dat),ncol(dat))
} else { mLambda <- optpar #just whatever is left over for this model
if(transform){lambdamat <- chol2Lambda(mLambda, ncol(dat)-1)
} else{#TODO determine whether to fill by row or column #DOES THIS WORK?
lambdamat <- matrix(0, ncol=ncol(dat),nrow=ncol(dat))
lambdamat[upper.tri(lambdamat)] <- mLambda
lambdamat <- lambdamat + t(lambdamat)
}
}
#Case of same shape or scale is handled in marginal transform
nllmvhr(y=dat, thid=thid, N=N, lambda=lambda, u=u, Lambda=lambdamat, scale=mscale, shape=mshape) +
margexc(datmarg, u, mscale, mshape)
}
#' Wrapper for the extremal Student model
#'
#' This function takes as argument the parameters of the extremal Student distribution
#' and passes them to the C++ function.
#'
#' @inheritParams nlloptdir
#' @return the negative log-likelihood value
#' @keywords internal
nlloptxstud <- function(optpar, fixedpar, wfixed, numpar, transform=FALSE, dat, thid, N, lambda, u, datmarg) {
if((length(optpar)+length(fixedpar))!=sum(numpar)){
stop("Invalid arguments passed to function `nllopthr'; please check")
}
#Transform the parameters if they are to be optimized (otherwise, don't)
if(1 %in% wfixed){ mscale <- fixedpar[1:numpar[1]]; fixedpar <- fixedpar[-(1:numpar[1])]
} else { mscale <- optpar[1:numpar[1]]; optpar <- optpar[-(1:numpar[1])]
if(transform){mscale <- exp(mscale)}
}
if(2 %in% wfixed){ mshape <- fixedpar[1:numpar[2]]; fixedpar <- fixedpar[-(1:numpar[2])]
} else { mshape <- optpar[1:numpar[2]]; optpar <- optpar[-(1:numpar[2])] }
if(3 %in% wfixed){ mdf <- fixedpar[1:numpar[3]]; fixedpar <- fixedpar[-(1:numpar[3])]
} else { mdf <- optpar[1:numpar[3]]; optpar <- optpar[-(1:numpar[3])] }
if(4 %in% wfixed){
#If fixed, there is one check once for the parameters and the latter simply need to be filled
corrmat <- matrix(fixedpar, ncol(dat),ncol(dat)) #fill by column
} else { mRho <- optpar #just whatever is left over for this model
if(transform){corrmat <- chol2cor(mRho, ncol(dat))
} else{#TODO determine whether to fill by row or column #DOES THIS WORK?
corrmat <- matrix(0, ncol=ncol(dat),nrow=ncol(dat))
corrmat[upper.tri(corrmat,diag=FALSE)] <- mRho
corrmat <- corrmat + t(corrmat)
diag(corrmat) <- 1
}
}
#Case of same shape or scale is handled in marginal transform
nllmvxstud(y=dat, thid=thid, N=N, Rho=corrmat, u=u, nu=mdf, scale=mscale, shape=mshape) +
margexc(datmarg, u, mscale, mshape)
}
#' Pairwise composite likelihood estimation for peaks-over-threshold
#'
#' This function finds the maximum composite likelihood estimate
#' for multivariate extreme value models based on pairs of data.
#' It returns a list with the result from \code{\link[stats]{optim}}
#' along with some of the information passed to the function.
#'
#' @section Warning: users should strip the data matrix from full \code{NA} cases or else provide \code{N}.
#' While these will be ignored in the routine, they are taken into account in the calculation of the total number of
#' non exceedances.
#'
#' @param dat \code{n} by \code{d} data matrix on original scale
#' @param u \code{d} vector of threshold parameters on probability scale
#' @param start vector of starting value for the optimization
#' @param N total number of observations if some observations falling under all threshold were removed from \code{x} (can be omitted)
#' @param lambda vector of percentage of threshold exceedances (can be omitted)
#' @param model family of multivariate extreme value distribution. \code{'ct'} and \code{'hr'} are the only family currently supported.
#' @param cscale logical indicating common scale for all variables
#' @param cshape logical indicating common shape for all variables
#' @param sym logical indicating whether equal dependence parameters in the Dirichlet model
#' @param std.err logical indicating whether to calculate standard errors numerically
#' @param method optimization method; see \link[stats]{optim}
#' @param warn.inf whether to check the validity of starting values before optimization
#' @param ... additional arguments (fixed parameters) passed to the \code{optim}
#' function to be held fixed during the optimization
#' @return a list containing the output of \code{\link[stats]{optim}}, notably
#' parameter values \code{par} and standard errors \code{se}
#' @export
#' @importFrom stats optim var
#' @importFrom numDeriv hessian
#' @importFrom mev gp.fit
#'
#' @author Leo Belzile, some Cpp code by Adrien de Casto and Leonid Rousniak for `hr' and `xstud' models
#' @examples
#' set.seed(4)
#' x <- mev::rmev(n=10000, d=2, param=c(1,2,0.5), model = "negdir")
#' qu <- apply(x, 2, quantile,probs <- 0.99)
#' y <- x[which(rowSums(isAbove(x,threshold = qu))>=1),]
#' fit <- fmvcpot(dat=y, u=qu, model="negdir",lambda=colSums(isAbove(y,qu))/(nrow(x)+1), N=nrow(x),
#' cscale=TRUE, cshape=TRUE, shape=1, scale=100)
#' fit$par
#' x <- mev::rmev(n=10000, d=2, param=c(1,2,0.5), model = "dir")
#' qu <- apply(x, 2, quantile,probs <- 0.99)
#' y <- x[which(rowSums(isAbove(x,threshold = qu))>=1),]
#' fit <- fmvcpot(dat=y, u=qu, model="dir",lambda=colSums(isAbove(y,qu))/(nrow(x)+1), N=nrow(x),
#' cscale=TRUE, cshape=TRUE, shape=1, scale=100)
#' fit$par
fmvcpot <- function(dat, u, lambda, N, model=c("log","neglog","ct","dir","negdir","hr","xstud"),
cscale=FALSE, cshape=FALSE, sym=FALSE,
start=NULL, std.err=TRUE, method="BFGS", warn.inf=TRUE,...){
#keep for the marginal analysis the cases - so contributions must be handled in the C++ function
d <- ncol(dat)
if(length(u)!=d){
stop("Invalid threshold; must match the dimension of dat");
}
if(missing(lambda)){ #sep.bvdata does this automatically
lambda <- sapply(1:d, function(ind){sum(dat[,ind]>u[ind], na.rm=TRUE)/(nrow(dat)+1)})
}
#if(model=="ct"){ model="dir"}
if(missing(N)){ N <- nrow(dat)}
thid <- isAbove(dat, u) #works with NA
nulls <- which(rowSums(thid)==0) #will strip observations that are only NA
#Also removes values that are unnecessary (because they are censored)
#This speeds up the calculations quite drastically, because optim must loop most of the zeros ow.
if(length(nulls)>0){
dat <- dat[-nulls,]
thid <- thid[-nulls,]
}
#Define name of parameters to be optimized - will automatically match `nm',
#but recall `start' may be provided by user
param <- c("scale", "shape")
if(model %in% c("ct","neglog","log","dir","negdir")){
param <- c(param, "alpha","rho")
numpar <- c(ifelse(cscale,1,d),ifelse(cshape,1,d),ifelse(sym,1,d), 1)
} else if(model=="hr"){
param <- c(param, "Lambda")
numpar <- c(ifelse(cscale,1,d),ifelse(cshape,1,d),d*(d-1)/2)
} else if(model=="xstud"){
param <- c(param, "df","Rho")
numpar <- c(ifelse(cscale,1,d),ifelse(cshape,1,d),1, d*(d-1)/2)
}
#Collect the fixed parameter values and switch model if restriction
nmdots <- names(list(...))
fixed.param <- list(...)[nmdots %in% param]
if(model=="ct"){
fixed.param[['rho']] <- 1;
model <- "dir"
} else if(model=="neglog"){
fixed.param[['alpha']] <- rep(1, d);
model <- "dir"
} else if(model=="log"){
fixed.param[['alpha']] <- rep(1, d);
model <- "negdir"
}
#COMMON ROUTINE
#Define starting values (if not provided by the user)
if (is.null(start)) {#default
start <- composite_pot_startvals(x=dat, u=u, model=model, sym=sym, cscale=cscale,
cshape=cshape, transform=TRUE) #TODO check incidence
} else if (!is.list(start)){
stop("`start' must be a named list")
}
nm <- names(start)
#For now, just matching the parameters; will deal with length later
if(isTRUE(any(!(param %in% c(nm,names(fixed.param)))))){
stop("Unspecified parameters")
}
m <- match(nm, param)
if(any(is.na(m))){
stop("`start' specifies unknown arguments")
}
fixed <- c()
wfixed <- c()
if("scale" %in% nmdots){
fixed <- c(fixed,as.vector(fixed.param['scale']))
start$scale <- NULL
if(length(unlist(fixed.param['scale']))!=numpar[1]){
stop("Invalid scale parameter: it does not match the constraint provided.")
}
wfixed <- c(wfixed, 1)
}
if("shape" %in% nmdots){
fixed <- c(fixed,as.vector(fixed.param['shape']))
start$shape <- NULL
if(length(unlist(fixed.param['shape']))!=numpar[2]){
stop("Invalid shape parameter: it does not match the constraint provided.")
}
wfixed <- c(wfixed, 2)
}
#Model specific arguments
if(model %in% c("dir","negdir")){
if("alpha" %in% nmdots){
fixed <- c(fixed,as.vector(fixed.param['alpha']))
start$alpha <- NULL
if(length(unlist(fixed.param['alpha']))!=numpar[3]){
stop("Invalid `alpha' parameter provided")
}
wfixed <- c(wfixed, 3)
}
if("rho" %in% nmdots){
fixed <- c(fixed,as.vector(fixed.param['rho']))
start$rho <- NULL
if(length(unlist(fixed.param['rho']))!=numpar[4]){
stop("Invalid `rho' parameter provided")
}
wfixed <- c(wfixed, 4)
}
} else if(model=="hr"){
if("Lambda" %in% nmdots){
start$Lambda <- NULL
numpar[3] <- d^2
#User can provide either parameters or the full matrix...
lambda_par <- fixed.param['Lambda'][[1]]
if(is.matrix(lambda_par)){
if(all(dim(lambda_par)==d)){
if(.is.CNSD(lambda_par)){
fixed <- c(fixed,as.vector(lambda_par))
}
}
} else if(is.vector(lambda_par)){
if(all.equal(length(lambda_par),d*(d-1)/2)){
Lambda_mat <- matrix(0,d,d)
Lambda_mat[upper.tri(Lambda_mat,diag=FALSE)] <- lambda_par
Lambda_mat <- Lambda_mat + t(Lambda_mat)
if(.is.CNSD(Lambda_mat)){
fixed <- c(fixed,as.vector(Lambda_mat))
}
}
} else {
stop("Invalid `Lambda' parameter provided")
}
wfixed <- c(wfixed, 3)
}
} else if(model=="xstud"){
if("df" %in% nmdots){
fixed <- c(fixed,as.vector(fixed.param['df']))
start$df <- NULL
wfixed <- c(wfixed, 3)
}
if("Rho" %in% nmdots){
start$Rho <- NULL
numpar[4] <- d^2 #because matrix is provided
#User can provide either parameters or the full matrix...
Rho_par <- fixed.param['Rho'][[1]]
if(is.matrix(Rho_par)){
if(all(dim(Rho_par)==d)){
if(all(eigen(Rho_par,only.values = TRUE)$values>0,
abs(Rho_par[-seq(1, d^2, d+1)])<1,
all.equal(diag(Rho_par),rep(1,d)))){
fixed <- c(fixed,as.vector(lambda_par))
}
}
} else if(is.vector(Rho_par)){
if(all.equal(length(Rho_par),d*(d-1)/2)){
Rho_mat <- matrix(0,d,d)
Rho_mat[upper.tri(Rho_mat,diag=FALSE)] <- Rho_par
Rho_mat <- Rho_mat + t(Rho_mat) + diag(1, d)
if(all(eigen(Rho_par,only.values = TRUE)$values>0,
abs(Rho_par[-seq(1, d^2, d+1)])<1)){
fixed <- c(fixed,as.vector(Lambda_mat))
}
}
} else {
stop("Invalid `Rho' parameter provided")
}
wfixed <- c(wfixed, 4)
}
}
#Deal with missing data
#Which rows contain only marginal exceedances?
margexc <- apply(is.na(dat), 1, which)
uninonNA <- which(unlist(lapply(margexc, length))==(d-1))
datmarg <- dat[uninonNA,]
if(length(uninonNA)>0){
dat <- dat[-uninonNA,]
}
#Starting values and optimization routine
start.arg <- unlist(start)
fixed <- unlist(fixed)
if(is.null(start.arg)){
#If all parameters are provided, simply evaluate the log-likelihood
nll <- switch(model,
dir=nlloptdir(start.arg, fixed, wfixed, numpar,transform=FALSE,
dat=dat, thid=thid, lambda=lambda, N=N, u=u, datmarg=datmarg),
negdir=nlloptnegdir(start.arg, fixed, wfixed, numpar,transform=FALSE,
dat=dat, thid=thid, lambda=lambda, N=N, u=u, datmarg=datmarg),
hr=nllopthr(start.arg, fixed, wfixed, numpar,transform=FALSE,
dat=dat, thid=thid, lambda=lambda, N=N, u=u, datmarg=datmarg),
xstud=nlloptxstud(start.arg, fixed, wfixed, numpar,transform=FALSE,
dat=dat, thid=thid, lambda=lambda, N=N, u=u, datmarg=datmarg)
)
return(nll)
} #Optimize
if(warn.inf && isTRUE(all.equal(do.call(switch(model,
dir="nlloptdir",
negdir="nlloptnegdir",
hr="nllopthr",
xstud="nlloptxstud"),#TODO add other models
list(optpar=start.arg, fixedpar=fixed, wfixed=wfixed, numpar=numpar, transform=TRUE,
dat=dat, thid=thid, lambda=lambda, N=N, u=u, datmarg=datmarg)),1e10))){
warning("Negative log-likelihood is infinite at starting values")
}
#If start.arg was provided by user, should be inv.transformed
opt <- optim(par=start.arg, fn=get(switch(model, dir="nlloptdir",
negdir="nlloptnegdir",
hr="nllopthr",
xstud="nlloptxstud")),
hessian = std.err, fixedpar=fixed, wfixed=wfixed,
numpar=numpar, transform=TRUE, method = method, control=list(maxit=2500),
dat=dat, thid=thid, lambda=lambda, N=N, u=u, datmarg=datmarg)
if(opt$convergence!=0){
warning("Optimization routine did not converge");
}
start <- NULL
tpar <- opt$par
if(opt$convergence==0){
if(! (1 %in% wfixed)){
start <- c(start,exp(tpar[1:numpar[1]])); tpar <- tpar[-(1:numpar[1])]
}
if(! (2 %in% wfixed)){
start <- c(start,tpar[1:numpar[2]]); tpar <- tpar[-(1:numpar[2])]
}
if(model %in% c("dir","negdir")){
if(! (3 %in% wfixed)){
start <- c(start,exp(tpar[1:numpar[3]])); tpar <- tpar[-(1:numpar[3])]
}
if(! (4 %in% wfixed)){
start <- c(start,exp(tpar[1:numpar[4]])); tpar <- tpar[-(1:numpar[4])]
}
}
opt$par <- start;
opt$hessian <- numDeriv::hessian(func=switch(model, dir=nlloptdir, negdir=nlloptnegdir),
x=opt$par, fixedpar=fixed, wfixed=wfixed,
numpar=numpar, transform=FALSE, dat=dat, thid=thid,
lambda=lambda, N=N, u=u, datmarg=datmarg)
}
opt$estimate <- list()
#Put scale and shape in the estimates
if(1 %in% wfixed){
opt$estimate[['scale']] <- rep(as.vector(unlist(fixed.param['scale'])),length.out = ncol(dat))
} else{
opt$estimate[['scale']] <- rep(as.vector(opt$par[grep('scale',names(opt$par))]),length.out = ncol(dat))
}
if(2 %in% wfixed){
opt$estimate[['shape']] <- rep(as.vector(unlist(fixed.param['shape'])),length.out = ncol(dat))
} else{
opt$estimate[['shape']] <- rep(as.vector(opt$par[grep('shape',names(opt$par))]),length.out = ncol(dat))
}
if(model %in% c("dir","negdir")){
if(3 %in% wfixed){
opt$estimate[['alpha']] <- rep(as.vector(unlist(fixed.param['alpha'])),length.out = ncol(dat))
} else{
opt$estimate[['alpha']] <- as.vector(opt$par[grep('alpha',names(opt$par))])
}
if(4 %in% wfixed){
opt$estimate['rho'] <- as.vector(unlist(fixed.param['rho']))
} else{
opt$estimate['rho'] <- opt$par[grep('rho',names(opt$par))]
}
} else if(model=="hr"){
#passing on the parameters will fail here because the function nllopthr recognizes fixed arguments
Lambda_mle <- chol2Lambda(tpar, ncol(dat)-1)
# start <- c(start,Lambda = c(Lambda_mle[upper.tri(Lambda_mle,diag=FALSE)]))
# opt$par <- start;
# opt$hessian <- numDeriv::hessian(func=nllopthr, x=start, fixedpar=fixed, wfixed=wfixed,
# numpar=numpar, transform=FALSE, dat=dat, thid=thid,
# lambda=lambda, N=N, u=u)
if(3 %in% wfixed){
opt$estimate[['Lambda']] <- fixed.param['Lambda']
} else{
opt$estimate[['Lambda']] <- Lambda_mle
}
} else if(model=="xstud"){
if(3 %in% wfixed){
opt$estimate[['df']] <- fixed.param['df']
} else{
opt$estimate[['df']] <- opt$par[grep('df',names(opt$par))]
}
Rho_mle <- chol2cor(tpar, ncol(dat))
if(3 %in% wfixed){
opt$estimate[['Rho']] <- fixed.param['Rho']
} else{
opt$estimate[['Rho']] <- Rho_mle
}
}
#Add the variance matrix and the standard errors to the opt list
if(std.err){
if(model %in% c("dir","negdir")){
opt$var <- try(solve(opt$hessian))
if(is.character(opt$var)){
opt$var <- NULL
} else{
opt$se <- sqrt(diag(opt$var))
}
}
}#TODO include the HR model
#Saving intermediate elements
opt$model <- model
opt$u <- as.vector(u)
opt$cscale <- cscale
opt$cshape <- cshape
if(model %in% c("dir","negdir")){opt$sym <- sym}
opt$N <- N
opt$lambda <- lambda
return(opt)
}
#' Starting values for multivariate peaks-over-threshold via pairwise composite likelihood methods
#'
#' This function is the analog of \link[evd]{bvstart.vals}, except that it handles higher dimensional models
#'
#' @param x \code{n} by \code{d} data matrix on original scale
#' @param u \code{d} vector of threshold parameters on probability scale
#' @param model string indicating the multivariate extreme value family
#' @param sym logical indicating whether model should be made symmetric
#' @param cscale logical indicating common scale for all variables
#' @param cshape logical indicating common shape for all variables
#' @param transform logical indicating transformation to unconstrained domain for parameters
#' @keywords internal
composite_pot_startvals <- function(x, u=NULL, model, sym=FALSE,
cscale=FALSE, cshape=FALSE, transform=FALSE){
if(length(u)!=ncol(x)){
stop("Threshold length must match the provided data")
}
#Define containers
start <- list()
scale <- rep(1, ncol(x))
shape <- rep(0.1, ncol(x))
#Marginal parameters
if(!cscale || !cshape){
for(i in 1:ncol(as.matrix(x))){
out <- mev::gp.fit(xdat=na.omit(x[,i]), threshold=u[i], method="Grimshaw", show=FALSE)
if(!cscale && !cshape){
scale[i] <- out$estimate[1][[1]]
shape[i] <- out$estimate[2][[1]]
#start[[paste0("scale",i)]] <- out$estimate[1][[1]]
#start[[paste0("shape",i)]] <- out$estimate[2][[1]]
} else if(cscale){
shape[i] <- out$estimate[2][[1]]
} else if(cshape){
scale[i] <- out$estimate[1][[1]]
}
}
}
if(cscale || cshape){
#If only one parameter is fixed, take average
out <- mev::gp.fit(xdat=na.omit(c(t(x)-u)), threshold=0, method="Grimshaw",show=FALSE)
if(cscale){ scale <- out$estimate[1][[1]] }
if(cshape){ shape <- out$estimate[2][[1]] }
}
if(transform){
start[["scale"]] <- log(scale)
} else{
start[["scale"]] <- scale
}
start[["shape"]] <- shape
if(model == "log"){
start["dep"] = 0.75
} else if(model %in% c("dir","negdir")){
if(!sym){
if(transform){
start[["alpha"]] <- rep(0.1, ncol(x))
} else{
start[["alpha"]] <- rep(1.1, ncol(x))
}
} else{ #if sym
if(transform){
start[["alpha"]] <- 0.1
} else{
start[["alpha"]] <- 1.1
}
}
rst = 0.7
if(transform){
start[["rho"]] = log(rst)#-log(1-rst) #logit(irv)
} else{
start[["rho"]] = rst
}
} else if(model == "hr"){
sigMatstart <- matrix(0.5,ncol(x)-1,ncol(x)-1)+diag(0.5,ncol(x)-1)
L <- chol(sigMatstart)
start[['Lambda']] <- c(log(diag(L)), L[upper.tri(L, diag=FALSE)])
} else if(model == "xstud"){
start[["df"]] <- 4
start[["Rho"]] <- rep(0.5, ncol(x)*(ncol(x)-1)/2) #TODO : check dimension
}
return(start)
}
#' Estimation of the variability and Godambe information matrix using a
#' nonparametric bootstrap scheme
#'
#' Composite likelihood estimators are consistent and asymptotically normal
#' under mild regularity conditions. The asymptotic covariance matrix is the
#' inverse Godambe information matrix \eqn{G^{-1}}, which is estimated by a
#' nonparametric bootstrap as the empirical covariance of \code{B} replicates.
#' The sensitivity matrix \eqn{H} is calculated from the Hessian matrix at the
#' maximum composite likelihood estimates. Lastly, the variability matrix \eqn{J} is
#' obtained from the relation \eqn{G=HJ^{-1}H}
#'
#' @param dat data matrix
#' @param fitted output from the call to \code{fmvcpot}.
#' @param B number of bootstrap replicates
#' @param ... fixed pararameters to pass to \code{fmvcpot} if any
#' @param use.start logical indicating whether to use MCLE as starting value
#' @return a list with matrices \code{godambe}, \code{sensitivity} and \code{variability}
#' @export
compositemat <- function(dat, fitted, B, use.start=FALSE, ...){
#Sensitivity matrix H
Hmat <- fitted$hessian
bootpar <- matrix(NA,nrow=B, ncol=length(fitted$par))
#Extract the composite maximum likelihood
st <- list()
if(fitted$model %in% c("dir","negdir")){
parnames <- c("scale","shape","alpha","rho")
}
#start must be a named list
whichpar <- sapply(substr(names(fitted$par),1,3), function(nm){pmatch(nm,parnames)})
for(p in parnames){
temp = which(whichpar==switch(p, "scale"=1, "shape"=2,"alpha"=3,"rho"=4))
if(length(temp)>0){
if(p!="shape"){ #since we use transform
st[[p]] <- log(fitted$par[temp])
} else{
st[[p]] <- fitted$par[temp]
}
}
}
#Boostrap loop
for(b in 1:B){
set.seed(b)
if(use.start){
optimal <- try(fmvcpot(dat[sample(1:nrow(dat), size=nrow(dat), replace = TRUE),],
u=fitted$u, model=fitted$model,start=st, std.err=FALSE, method="Nelder-Mead",
transform=TRUE,cscale=fitted$cscale, cshape=fitted$cshape, sym=fitted$sym,...))
if(!is.character(optimal)){ #If fail, try without the starting values
optimal <- try(fmvcpot(dat[sample(1:nrow(dat), size=nrow(dat), replace = TRUE),],
u=fitted$u, model=fitted$model, std.err=FALSE, method="Nelder-Mead",
transform=TRUE,cscale=fitted$cscale, cshape=fitted$cshape, sym=fitted$sym,...))
}
} else{
optimal <- try(fmvcpot(dat[sample(1:nrow(dat), size=nrow(dat), replace = TRUE),],
u=fitted$u, model=fitted$model, std.err=FALSE, method="Nelder-Mead",
transform=TRUE,cscale=fitted$cscale, cshape=fitted$cshape, sym=fitted$sym,...))
}
if(!is.character(optimal)){
if(optimal$convergence==0){
bootpar[b,] <- optimal$par
}
}
}
#print(bootpar)
covmat <- var(bootpar,na.rm = TRUE)
godambe <- solve(covmat)
Jmat <- Hmat%*%covmat%*%Hmat
return(list(sensitivity=Hmat, godambe=godambe, variability=Jmat, parreplicates=bootpar))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.