R/Conditional_RP_2D.R

#' Calculates joint and conditional return periods
#'
#' Univariate return period events are obtained from the GPDs to be consistent with the isolines produced by the \code{Design_Event_2D} function.
#' To find the conditional probabilities a large number of realizations are simulated from the copulas fit to the conditioned samples, in proportion with the sizes of the conditional samples. The realizations are transformed to the original scale and the relevant probabilities estimated empirically.
#'
#' @param Data Data frame of dimension \code{nx2} containing two co-occurring time series of length \code{n}.
#' @param Data_Con1 Data frame containing the conditional sample (declustered excesses paired with concurrent values of other variable), conditioned on the variable in the first column.
#' @param Data_Con2 Data frame containing the conditional sample (declustered excesses paired with concurrent values of other variable), conditioned on the variable in the second column. Can be obtained using the \code{Con_Sampling_2D} function.
#' @param u1 Numeric vector of length one specifying the (quantile) threshold above which the variable in the first column was sampled in Data_Con1.
#' @param u2 Numeric vector of length one specifying the (quantile) threshold above which the variable in the second column was sampled in Data_Con2.
#' @param Thres1 Numeric vector of length one specifying the threshold above which the variable in the first column was sampled in Data_Con1. Only one of \code{u1} and \code{Thres1} should be supplied. Default is \code{NA}.
#' @param Thres2 Numeric vector of length one specifying the threshold above which the variable in the second column was sampled in Data_Con2. Only one of \code{u2} and \code{Thres2} should be supplied. Default is \code{NA}.
#' @param Copula_Family1 Numeric vector of length one specifying the copula family used to model the \code{Data_Con1} dataset.
#' @param Copula_Family2 Numeric vector of length one specifying the copula family used to model the \code{Data_Con2} dataset. Best fitting of 40 copulas can be found using the \code{Copula_Threshold_2D} function.
#' @param Marginal_Dist1 Character vector of length one specifying (non-extreme) distribution used to model the marginal distribution of the non-conditioned variable in \code{Data_Con1}.
#' @param Marginal_Dist2 Character vector of length one specifying (non-extreme) distribution used to model the marginal distribution of the non-conditioned variable in \code{Data_Con2}.
#' @param Con1 Character vector of length one specifying the name of variable in the first column of \code{Data}.
#' @param Con2 Character vector of length one specifying the name of variable in the second column of \code{Data}.
#' @param mu Numeric vector of length one specifying the (average) occurrence frequency of events in \code{Data}. Default is \code{365.25}, daily data.
#' @param Con_Var Character vector of length one specifying the (column) name of the conditioning variable.
#' @param RP_Con Numeric vector of length one specifying the return period of the conditioning variable \code{Con_Var}.
#' @param RP_Non_Con Numeric vector of length one specifying the return period of the non-conditioning variable.
#' @param x_lab Character vector specifying the x-axis label.
#' @param y_lab Character vector specifying the y-axis label.
#' @param x_lim_min Numeric vector of length one specifying x-axis minimum. Default is \code{NA}.
#' @param x_lim_max Numeric vector of length one specifying x-axis maximum. Default is \code{NA}.
#' @param y_lim_min Numeric vector of length one specifying y-axis minimum. Default is \code{NA}.
#' @param y_lim_max Numeric vector of length one specifying y-axis maximum. Default is \code{NA}.
#' @param DecP Numeric vector of length one specifying the number of decimal places to round the data in the conditional samples to in order to identify observations in both conditional samples. Default is \code{2}.
#' @param N Numeric vector of length one specifying the size of the sample from the fitted joint distributions used to estimate the density along an isoline. Samples are collected from the two joint distribution with proportions consistent with the total number of extreme events conditioned on each variable. Default is \code{10^6}
#' @return Console output: \itemize{
#' \item Con_Var
#' Name of the conditioning variable
#' \item RP_Var1
#' Return period of variable Con1 i.e., variable in second column of \code{Data}
#' \item RP_Var2
#' Return period of variable Con2 i.e., variable in third column of \code{Data}
#' \item Var1
#' Value of Con1 at the return period of interest i.e. RP_Var1
#' \item Var2
#' Value of Con2 at the return period of interest i.e. RP_Var2
#' \item RP_Full_Dependence
#' Joint return period of the (Var1,Var2) event under full dependence
#' \item RP_Independence
#' Joint return period of the (Var1,Var2) event under independence
#' \item RP_Copula
#' Joint return period of the (Var1,Var2) event according to the two sided conditional sampling - copula theory approach
#' \item Prob
#' Probability associated with \code{RP_Copula}
#' \item N_Excess
#' Number of realizations of the \code{Con_Var} above \code{RP_Con}-year return period value
#' \item Non_Con_Var_X
#' Values of the non-conditioned variable of the (conditional) Cummulative Distribution Function (CDF) i.e. x-axis of bottom left plot
#' \item Con_Prob
#' \code{Con_Prob} CDF of the non-conditioned variable given the return period of \code{Con_Var} exceeds \code{RP_Con}
#' \item Con_Prob_Est
#' Probability the non-conditioned variable is less than or equal to \code{RP_Non_Con} given the return period of \code{Con_Var} exceeds \code{RP_Con}
#' }
#' Graphical output: \itemize{
#' \item Top left: Sample conditioned on Con1 (red crosses) and Con2 (blue circles). Black dot is the event with a marginal return period of the conditioned variable \code{Var_Con} and non-conditioned variable equal to \code{RP_Con} and \code{RP_Non_Con}, respectively. The joint return period of the event using the conditional sampling - copula theory approach and under the assumptions of full dependence and independence between the variables are printed.
#' \item Top right: Sample conditioned on Con1 (red crosses) and Con2 (blue circles). Only the region where Con_Var exceeds RP_Con is visible. This is the region for which the conditional distribution (of the non-conditioned variable given Con_Var exceeds RP_Con) and in turn conditional return periods are calculated.
#' \item Bottom left: Conditional Cumulative Distribution Function (CDF) of the non-conditioned variable given the marginal return period of the conditioned variable \code{Var_Con} exceeds \code{RP_Con} years i.e. the points visible in the top right plot.
#' \item Bottom right: Conditional return period of the non-conditioned variable given the conditioned variable \code{Var_Con} has a return period longer than \code{RP_Con}.
#' }
#' @seealso \code{\link{Design_Event_2D}}
#' @export
#' @examples
#' #Under a 10yr (or greater) rainfall event condition, what is the joint probability that a 10yr
#' #O-sWLevent occurs simultaneously?  What is the cumulative probability of events with the
#' #frequency equal to or less than a 10yr O-sWL event?
#' Conditional_RP_2D(Data=S22.Detrend.df,
#'                   Data_Con1=con.sample.Rainfall$Data, Data_Con2=con.sample.OsWL$Data,
#'                   u1=0.98, u2=0.98,
#'                   Copula_Family1=cop.Rainfall,Copula_Family2=cop.OsWL,
#'                   Marginal_Dist1="Logis", Marginal_Dist2="BS",
#'                   Con1 = "Rainfall", Con2 = "OsWL",
#'                   mu = 365.25,
#'                   Con_Var="Rainfall",
#'                   RP_Con=10, RP_Non_Con=10,
#'                   x_lab = "Rainfall (Inches)", y_lab = "O-sWL (ft NGVD 29)",
#'                   y_lim_max = 10,
#'                   N=10^7)
Conditional_RP_2D<-function (Data, Data_Con1, Data_Con2, u1, u2,
                             Thres1=NA, Thres2=NA, Copula_Family1,
                             Copula_Family2, Marginal_Dist1, Marginal_Dist2, Con1 = "Rainfall",
                             Con2 = "OsWL", mu = 365.25, Con_Var, RP_Con, RP_Non_Con,
                             Var1=NA,Var2=NA,
                             x_lab = "Rainfall (mm)", y_lab = "O-sWL (mNGVD 29)", x_lim_min = NA,
                             x_lim_max = NA, y_lim_min = NA, y_lim_max = NA, DecP = 2, N)
{
  ###Preliminaries

  #Remove 1st column of Data if it is a Date or factor object.
  if (class(Data[, 1])[1] == "Date" | class(Data[, 1])[1] == "factor" | class(Data[, 1])[1] == "POSIXct" | class(Data[, 1])[1] == "character") {
    Data <- Data[, -1]
  }

  #Find the columns in Data (which should be consistent in terms of column order of the other data input objects)
  #of conditioning variable 1 (Con1) and conditioning variable 2 (Con2).
  var1 <- Var1
  var2 <- Var2
  con1 <- which(names(Data) == Con1)
  con2 <- which(names(Data) == Con2)
  con_var <- which(names(Data) == Con_Var)
  if(is.na(Var1)==T){
  RP_Var1 <- ifelse(con_var == 1, RP_Con, RP_Non_Con)
  RP_Var2 <- ifelse(con_var == 2, RP_Con, RP_Non_Con)
  }

  #Axis limits for plots
  x_min <- ifelse(is.na(x_lim_min) == T, min(na.omit(Data[,con1])), x_lim_min)
  x_max <- ifelse(is.na(x_lim_max) == T, max(na.omit(Data[,con1])), x_lim_max)
  y_min <- ifelse(is.na(y_lim_min) == T, min(na.omit(Data[,con2])), y_lim_min)
  y_max <- ifelse(is.na(y_lim_max) == T, max(na.omit(Data[,con2])), y_lim_max)

  #Finding the threshold if specified as a quantile
  if(is.na(Thres1)==T){
    Thres1<-quantile(na.omit(Data[,con1]), u1)
  }

  ##Finding the value of variable con1 associated with a return peroid of RP_Var1
  #Fitting the GPD
  GPD_con1 <- evm(Data_Con1[, con1], th = Thres1, penalty = "gaussian", priorParameters = list(c(0,0), matrix(c(100^2, 0, 0, 0.25), nrow = 2)))
  #Calculate the time period spanned by the original dataset in terms of mu (only including occasions where both variables are observed).
  time.period<-nrow(Data[which(is.na(Data[,con1]) == F & is.na(Data[, con2]) == F),])/mu
  #Calculate the rate of occurrences of extremes (in terms of mu) in Data_Con1.
  rate<-nrow(Data_Con1)/time.period
  #Interarrival time
  EL_Con1<-1/rate
  #Value of con1 with return period RP_Var1
  if(is.na(var1)==T){
   Var1<-as.numeric(u2gpd((1-EL_Con1/RP_Var1), p = 1, th=Thres1 , sigma=exp(GPD_con1$coefficients[1]),xi= GPD_con1$coefficients[2]))
  }
  if(is.na(var2)==F){
   RP_Var1<-1/(1-pgpd(Var1, u=Thres1 , sigma=exp(GPD_con1$coefficients[1]),xi= GPD_con1$coefficients[2]))
  }

  ##Fit the specified marginal distribution (Marginal_Dist1) to the variable con2 in Data_Con1.
  if (Marginal_Dist1 == "BS") {
    bdata2 <- data.frame(shape = exp(-0.5), scale = exp(0.5))
    bdata2 <- transform(bdata2, y = Data_Con1[, con2])
    marginal_non_con1 <- vglm(y ~ 1, bisa, data = bdata2,
                              trace = FALSE)
  }
  if (Marginal_Dist1 == "Exp") {
    marginal_non_con1 <- fitdistr(Data_Con1[, con2], "exponential")
  }
  if (Marginal_Dist1 == "Gam(2)") {
    marginal_non_con1 <- fitdistr(Data_Con1[, con2], "gamma2")
  }
  if(Marginal_Dist1 == "Gam(3)"){
    data.gamlss<-data.frame(X=Data_Con1[,con2])
    marginal_non_con1 <- tryCatch(gamlss(X~1, data=data.gamlss, family=GG),
                                  error = function(e) "error")
  }
  if(Marginal_Dist1 == "GamMix(2)"){
    data.gamlss<-data.frame(X=Data_Con1[,con2])
    marginal_non_con1 <- tryCatch(gamlssMX(X~1, data=data.gamlss, family=GA, K=2),
                                  error = function(e) "error")
  }
  if(Marginal_Dist1 == "GamMix(3)"){
    data.gamlss<-data.frame(X=Data_Con1[,con2])
    marginal_non_con1 <- tryCatch(gamlssMX(X~1, data=data.gamlss, family=GA, K=3),
                                  error = function(e) "error")
  }
  if (Marginal_Dist1 == "Gaus") {
    marginal_non_con1 <- fitdistr(Data_Con1[, con2], "normal")
  }
  if (Marginal_Dist1 == "InvG") {
    marginal_non_con1 <- fitdist(Data_Con1[, con2], "invgauss",
                                 start = list(mean = 5, shape = 1))
  }
  if (Marginal_Dist1 == "Logis") {
    marginal_non_con1 <- fitdistr(Data_Con1[, con2], "logistic")
  }
  if (Marginal_Dist1 == "LogN") {
    marginal_non_con1 <- fitdistr(Data_Con1[, con2], "lognormal")
  }
  if (Marginal_Dist1 == "TNorm") {
    marginal_non_con1 <- fitdistr(Data_Con1[, con2], "normal")
  }
  if (Marginal_Dist1 == "Twe") {
    marginal_non_con1 <- tweedie.profile(Data_Con1[, con2] ~
                                           1, p.vec = seq(1.5, 2.5, by = 0.2), do.plot = FALSE)
  }
  if (Marginal_Dist1 == "Weib") {
    marginal_non_con1 <- fitdistr(Data_Con1[, con2], "weibull")
  }

  #Finding the threshold if specified as a quantile
  if(is.na(Thres2)==T){
    Thres2<-quantile(na.omit(Data[,con2]), u2)
  }

  if (is.na(var2)==F){
    if (Marginal_Dist1 == "BS") {
      RP_Var2_con1 <- 1/(1-pbisa(Var2, as.numeric(Coef(marginal_non_con1)[1]),
                            as.numeric(Coef(marginal_non_con1)[2])))
    }
    if (Marginal_Dist1 == "Exp") {
      RP_Var2_con1 <- 1/(1-pexp(Var2, rate = as.numeric(marginal_non_con1$estimate[1])))
    }
    if (Marginal_Dist1 == "Gam(2)") {
      RP_Var2_con1 <- 1/(1-pgamma(Var2, shape = as.numeric(marginal_non_con1$estimate[1]),
                             rate = as.numeric(marginal_non_con1$estimate[2])))
    }
    if(Marginal_Dist1 == "Gam(3)"){
      RP_Var2_con1<-1/(1-pGG(Var2, mu=exp(marginal_non_con1$mu.coefficients), sigma=exp(marginal_non_con1$sigma.coefficients), nu=marginal_non_con1$nu.coefficients))
    }
    if(Marginal_Dist1 == "GamMix(2)"){
      prob.MX1 <- round(marginal_non_con1$prob[1],3)
      prob.MX2 <- 1 - prob.MX1
      RP_Var2_con1<-1/(1-pMX(Var2, mu=list(mu1=exp(marginal_non_con1$models[[1]]$mu.coefficients), mu2=exp(marginal_non_con1$models[[2]]$mu.coefficients)),
                             sigma=list(sigma1=exp(marginal_non_con1$models[[1]]$sigma.coefficients), sigma2=exp(marginal_non_con1$models[[2]]$sigma.coefficients)),
                             pi = list(pi1=prob.MX1, pi2=prob.MX2), family=list(fam1="GA", fam2="GA")))
    }
    if(Marginal_Dist1 == "GamMix(3)"){
      prob.MX1 <- round(marginal_non_con1$prob[1],3)
      prob.MX2 <- round(marginal_non_con1$prob[2],3)
      prob.MX3 <- 1 - prob.MX1 - prob.MX2
      RP_Var2_con1<-1/(1-pMX(Var2, mu=list(mu1=exp(marginal_non_con1$models[[1]]$mu.coefficients), mu2=exp(marginal_non_con1$models[[2]]$mu.coefficients), mu3=exp(marginal_non_con1$models[[3]]$mu.coefficients)),
                        sigma=list(sigma1=exp(marginal_non_con1$models[[1]]$sigma.coefficients), sigma2=exp(marginal_non_con1$models[[2]]$sigma.coefficients), sigma3=exp(marginal_non_con1$models[[3]]$sigma.coefficients)),
                        pi = list(pi1=prob.MX1, pi2=prob.MX2, pi3=prob.MX3), family=list(fam1="GA", fam2="GA", fam3="GA")))
    }
    if (Marginal_Dist1 == "Gaus") {
      RP_Var2_con1 <- 1/(1-pnorm(Var2, mean = as.numeric(marginal_non_con1$estimate[1]),
                            sd = as.numeric(marginal_non_con1$estimate[2])))
    }
    if (Marginal_Dist1 == "InvG") {
      RP_Var2_con1 <- 1/(1-pinvgauss(Var2, mean = as.numeric(marginal_non_con1$estimate[1]),
                                shape = as.numeric(marginal_non_con1$estimate[2])))
    }
    if (Marginal_Dist1 == "Logis") {
      RP_Var2_con1 <- 1/(1-plogis(Var2, location = as.numeric(marginal_non_con1$estimate[1]),
                             scale = as.numeric(marginal_non_con1$estimate[2])))
    }
    if (Marginal_Dist1 == "LogN") {
      RP_Var2_con1 <- 1/(1-plnorm(Var2, meanlog = as.numeric(marginal_non_con1$estimate[1]),
                             sdlog = as.numeric(marginal_non_con1$estimate[2])))
    }
    if (Marginal_Dist1 == "TNorm") {
      RP_Var2_con1 <- 1/(1-ptruncnorm(Var2, a = min(Data_Con1[,con2]), mean = as.numeric(marginal_non_con1$estimate[1]),
                                 sd = as.numeric(marginal_non_con1$estimate[2])))
    }
    if (Marginal_Dist1 == "Twe") {
      RP_Var2_con1 <- 1/(1-ptweedie(Var2, power = marginal_non_con1$p.max,
                               mu = mean(Data_Con1[, con2]), phi = marginal_non_con1$phi.max))
    }
    if (Marginal_Dist1 == "Weib") {
      RP_Var2_con1 <- 1/(1-pweibull(Var2, shape = as.numeric(marginal_non_con1$estimate[1]),
                               scale = as.numeric(marginal_non_con1$estimate[2])))
    }
  }

  ##Finding the value of variable con2 associated with a return peroid of RP_Var2
  #Fitting the GPD to con2 in Data_Con2
  GPD_con2 <- evm(Data_Con2[, con2], th = Thres2, penalty = "gaussian", priorParameters = list(c(0, 0), matrix(c(100^2, 0, 0, 0.25), nrow = 2)))
  #Calculate the time period spanned by the original dataset in terms of mu (only including occasions where both variables are observed).
  time.period<-nrow(Data[which(is.na(Data[,con1]) == F & is.na(Data[, con2]) == F),])/mu
  #Calculate the rate of occurrences of extremes (in terms of mu) in Data_Con1.
  rate<-nrow(Data_Con2)/time.period
  #Calculate the inter-arrival time of extremes (in terms of mu) in Data_Con1.
  EL_Con2<-1/rate
  #Value of con2 with return period RP_Var2
  if(is.na(var2)==T){
   Var2<-as.numeric(u2gpd((1-EL_Con2/RP_Var2), p = 1, th=Thres2 , sigma=exp(GPD_con2$coefficients[1]),xi= GPD_con2$coefficients[2]))
  }
  if(is.na(var2)==F){
   RP_Var2<-1/(1-pgpd(Var2, u=Thres2 , sigma=exp(GPD_con2$coefficients[1]),xi= GPD_con2$coefficients[2]))
  }

  ##Fit the specified marginal distribution (Marginal_Dist2) to the non-conditioned variable con1 in Data_Con2.
  if (Marginal_Dist2 == "BS") {
    bdata2 <- data.frame(shape = exp(-0.5), scale = exp(0.5))
    bdata2 <- transform(bdata2, y = Data_Con2[, con1])
    marginal_non_con2 <- vglm(y ~ 1, bisa, data = bdata2,
                              trace = FALSE)
  }
  if (Marginal_Dist2 == "Exp") {
    marginal_non_con2 <- fitdistr(Data_Con2[, con1], "exponential")
  }
  if (Marginal_Dist2 == "Gam(2)") {
    marginal_non_con2 <- fitdistr(Data_Con2[, con1], "gamma")
  }
  if(Marginal_Dist2 == "Gam(3)"){
    data.gamlss<-data.frame(X=Data_Con2[,con1])
    marginal_non_con2 <- tryCatch(gamlss(X~1, data=data.gamlss, family=GG),
                                  error = function(e) "error")
  }
  if(Marginal_Dist2 == "GamMix(2)"){
    data.gamlss<-data.frame(X=Data_Con2[,con1])
    marginal_non_con2 <- tryCatch(gamlssMX(X~1, data=data.gamlss, family=GA, K=2),
                                  error = function(e) "error")
  }
  if(Marginal_Dist2 == "GamMix(3)"){
    data.gamlss<-data.frame(X=Data_Con2[,con1])
    marginal_non_con2 <- tryCatch(gamlssMX(X~1, data=data.gamlss, family=GA, K=3),
                                  error = function(e) "error")
  }
  if (Marginal_Dist2 == "Gaus") {
    marginal_non_con2 <- fitdistr(Data_Con2[, con1], "normal")
  }
  if (Marginal_Dist2 == "InvG") {
    marginal_non_con2 <- fitdist(Data_Con2[, con1], "invgauss",
                                 start = list(mean = 5, shape = 1))
  }
  if (Marginal_Dist2 == "Logis") {
    marginal_non_con2 <- fitdistr(Data_Con2[, con1], "logistic")
  }
  if (Marginal_Dist2 == "LogN") {
    marginal_non_con2 <- fitdistr(Data_Con2[, con1], "lognormal")
  }
  if (Marginal_Dist2 == "TNorm") {
    marginal_non_con2 <- fitdistr(Data_Con2[, con1], "normal")
  }
  if (Marginal_Dist2 == "Twe") {
    marginal_non_con2 <- tweedie.profile(Data_Con2[, con1] ~
                                           1, p.vec = seq(1.5, 2.5, by = 0.2), do.plot = FALSE)
  }
  if (Marginal_Dist2 == "Weib") {
    marginal_non_con2 <- fitdistr(Data_Con2[, con1], "weibull")
  }

  if (is.na(var2)==F){
    if (Marginal_Dist2 == "BS") {
      RP_Var1_con2 <- 1/(1-pbisa(Var1, as.numeric(Coef(marginal_non_con2)[1]),
                            as.numeric(Coef(marginal_non_con2)[2])))
    }
    if (Marginal_Dist2 == "Exp") {
      RP_Var1_con2 <- 1/(1-pexp(Var1, rate = as.numeric(marginal_non_con2$estimate[1])))
    }
    if (Marginal_Dist2 == "Gam(2)") {
      RP_Var1_con2 <- 1/(1-pgamma(Var1, shape = as.numeric(marginal_non_con2$estimate[1]),
                             rate = as.numeric(marginal_non_con2$estimate[2])))
    }
    if(Marginal_Dist2=="Gam(3)"){
      RP_Var1_con2<-1/(1-pGG(Var1, mu=exp(marginal_non_con2$mu.coefficients), sigma=exp(marginal_non_con2$sigma.coefficients), nu=marginal_non_con2$nu.coefficients))
    }
    if(Marginal_Dist2=="GamMix(2)"){
      prob.MX1 <- round(marginal_non_con2$prob[1],3)
      prob.MX2 <- 1 - prob.MX1
      RP_Var1_con2<-1/(1-pMX(Var1, mu=list(mu1=exp(marginal_non_con2$models[[1]]$mu.coefficients), mu2=exp(marginal_non_con2$models[[2]]$mu.coefficients)),
                        sigma=list(sigma1=exp(marginal_non_con2$models[[1]]$sigma.coefficients), sigma2=exp(marginal_non_con2$models[[2]]$sigma.coefficients)),
                        pi = list(pi1=prob.MX1, pi2=prob.MX2), family=list(fam1="GA", fam2="GA")))
    }
    if(Marginal_Dist2=="GamMix(3)"){
      prob.MX1 <- round(marginal_non_con2$prob[1],3)
      prob.MX2 <- round(marginal_non_con2$prob[2],3)
      prob.MX3 <- 1 - prob.MX1 - prob.MX2
      RP_Var1_con2<-1/(1-pMX(Var1, mu=list(mu1=exp(marginal_non_con2$models[[1]]$mu.coefficients), mu2=exp(marginal_non_con2$models[[2]]$mu.coefficients), mu3=exp(marginal_non_con2$models[[3]]$mu.coefficients)),
                        sigma=list(sigma1=exp(marginal_non_con2$models[[1]]$sigma.coefficients), sigma2=exp(marginal_non_con2$models[[2]]$sigma.coefficients), sigma3=exp(marginal_non_con2$models[[3]]$sigma.coefficients)),
                        pi = list(pi1=prob.MX1, pi2=prob.MX2, pi3=prob.MX3), family=list(fam1="GA", fam2="GA", fam3="GA")))
    }
    if (Marginal_Dist2 == "Gaus") {
      RP_Var1_con2 <- 1/(1-pnorm(Var1, mean = as.numeric(marginal_non_con2$estimate[1]),
                            sd = as.numeric(marginal_non_con2$estimate[2])))
    }
    if (Marginal_Dist2 == "InvG") {
      RP_Var1_con2 <- 1/(1-pinvgauss(Var1, mean = as.numeric(marginal_non_con2$estimate[1]),
                                shape = as.numeric(marginal_non_con2$estimate[2])))
    }
    if (Marginal_Dist2 == "Logis") {
      RP_Var1_con2 <- 1/(1-plogis(Var1, location = as.numeric(marginal_non_con2$estimate[1]),
                             scale = as.numeric(marginal_non_con2$estimate[2])))
    }
    if (Marginal_Dist2 == "LogN") {
      RP_Var1_con2 <- 1/(1-plnorm(Var1, meanlog = as.numeric(marginal_non_con2$estimate[1]),
                             sdlog = as.numeric(marginal_non_con2$estimate[2])))
    }
    if (Marginal_Dist2 == "TNorm") {
      RP_Var1_con2 <- 1/(1-ptruncnorm(Var1, a = min(Data_Con2[,con2]), mean = as.numeric(marginal_non_con2$estimate[1]),
                                 sd = as.numeric(marginal_non_con2$estimate[2])))
    }
    if (Marginal_Dist2 == "Twe") {
      RP_Var1_con2 <- 1/(1-ptweedie(Var1, power = marginal_non_con2$p.max,
                               mu = mean(Data_Con2[, con2]), phi = marginal_non_con2$phi.max))
    }
    if (Marginal_Dist2 == "Weib") {
      RP_Var1_con2 <- 1/(1-pweibull(Var1, shape = as.numeric(marginal_non_con2$estimate[1]),
                               scale = as.numeric(marginal_non_con2$estimate[2])))
    }
  }

  ###Simulating sample from the joint distribution (copula+marginals) fit to the sample conditioned on Con1
  #Fit the specified copula family (Copula_Family1) to the observations in Data_Con1.
  obj1 <- BiCopSelect(pobs(Data_Con1[, 1]), pobs(Data_Con1[,  2]), familyset = Copula_Family1, selectioncrit = "AIC",
                      indeptest = FALSE, level = 0.05, weights = NA, rotations = TRUE,
                      se = FALSE, presel = TRUE, method = "mle")
  #Simulate a sample from the fitted copula. Out of the sample size 'N' the proportion of the sample from the
  #copula assoicated with Data_Con1 is proportional to the size of Data_Con1 relative to Data_Con2.
  sample <- BiCopSim(round(N * nrow(Data_Con1)/(nrow(Data_Con1) + nrow(Data_Con2)), 0), obj1)
  #Transform the realizations of the variable con1 to the original scale using the inverse CDF (quantile function)
  #of the GPD contained in the u2gpd function.
  cop.sample1.con <- u2gpd(sample[, con1], p = 1, th = Thres1, sigma = exp(GPD_con1$coefficients[1]),xi = GPD_con1$coefficients[2])

  #Transform the realizations of variable con2 to the original scale using the inverse CDF (quantile function)
  #of the selected parametric (non-extreme value) distribution (Marginal_Dist1)
  if (Marginal_Dist1 == "BS") {
    cop.sample1.non.con <- qbisa(sample[, con2], as.numeric(Coef(marginal_non_con1)[1]),
                                 as.numeric(Coef(marginal_non_con1)[2]))
  }
  if (Marginal_Dist1 == "Exp") {
    cop.sample1.non.con <- qexp(sample[, con2], rate = as.numeric(marginal_non_con1$estimate[1]))
  }
  if (Marginal_Dist1 == "Gam(2)") {
    cop.sample1.non.con <- qgamma(sample[, con2], shape = as.numeric(marginal_non_con1$estimate[1]),
                                  rate = as.numeric(marginal_non_con1$estimate[2]))
  }
  if(Marginal_Dist1=="Gam(3)"){
    cop.sample1.non.con<-qGG(sample[,con2], mu=exp(marginal_non_con1$mu.coefficients), sigma=exp(marginal_non_con1$sigma.coefficients), nu=marginal_non_con1$nu.coefficients)
  }
  if(Marginal_Dist1=="GamMix(2)"){
    prob.MX1 <- round(marginal_non_con1$prob[1],3)
    prob.MX2 <- 1 - prob.MX1
    cop.sample1.non.con<-qMX(sample[,con2], mu=list(mu1=exp(marginal_non_con1$models[[1]]$mu.coefficients), mu2=exp(marginal_non_con1$models[[2]]$mu.coefficients)),
                             sigma=list(sigma1=exp(marginal_non_con1$models[[1]]$sigma.coefficients), sigma2=exp(marginal_non_con1$models[[2]]$sigma.coefficients)),
                             pi = list(pi1=prob.MX1, pi2=prob.MX2), family=list(fam1="GA", fam2="GA"))
  }
  if(Marginal_Dist1=="GamMix(3)"){
    prob.MX1 <- round(marginal_non_con1$prob[1],3)
    prob.MX2 <- round(marginal_non_con1$prob[2],3)
    prob.MX3 <- 1 - prob.MX1 - prob.MX2
    cop.sample1.non.con<-qMX(sample[,con2], mu=list(mu1=exp(marginal_non_con1$models[[1]]$mu.coefficients), mu2=exp(marginal_non_con1$models[[2]]$mu.coefficients), mu3=exp(marginal_non_con1$models[[3]]$mu.coefficients)),
                             sigma=list(sigma1=exp(marginal_non_con1$models[[1]]$sigma.coefficients), sigma2=exp(marginal_non_con1$models[[2]]$sigma.coefficients), sigma3=exp(marginal_non_con1$models[[3]]$sigma.coefficients)),
                             pi = list(pi1=prob.MX1, pi2=prob.MX2, pi3=prob.MX3), family=list(fam1="GA", fam2="GA", fam3="GA"))
  }
  if (Marginal_Dist1 == "Gaus") {
    cop.sample1.non.con <- qnorm(sample[, con2], mean = as.numeric(marginal_non_con1$estimate[1]),
                                 sd = as.numeric(marginal_non_con1$estimate[2]))
  }
  if (Marginal_Dist1 == "InvG") {
    cop.sample1.non.con <- qinvgauss(sample[, con2], mean = as.numeric(marginal_non_con1$estimate[1]),
                                     shape = as.numeric(marginal_non_con1$estimate[2]))
  }
  if (Marginal_Dist1 == "Logis") {
    cop.sample1.non.con <- qlogis(sample[, con2], location = as.numeric(marginal_non_con1$estimate[1]),
                                  scale = as.numeric(marginal_non_con1$estimate[2]))
  }
  if (Marginal_Dist1 == "LogN") {
    cop.sample1.non.con <- qlnorm(sample[, con2], meanlog = as.numeric(marginal_non_con1$estimate[1]),
                                  sdlog = as.numeric(marginal_non_con1$estimate[2]))
  }
  if (Marginal_Dist1 == "TNorm") {
    cop.sample1.non.con <- qtruncnorm(sample[, con2], a = min(Data_Con1[,
                                                                        con2]), mean = as.numeric(marginal_non_con1$estimate[1]),
                                      sd = as.numeric(marginal_non_con1$estimate[2]))
  }
  if (Marginal_Dist1 == "Twe") {
    cop.sample1.non.con <- qtweedie(sample[, con2], power = marginal_non_con1$p.max,
                                    mu = mean(Data_Con1[, con2]), phi = marginal_non_con1$phi.max)
  }
  if (Marginal_Dist1 == "Weib") {
    cop.sample1.non.con <- qweibull(sample[, con2], shape = as.numeric(marginal_non_con1$estimate[1]),
                                    scale = as.numeric(marginal_non_con1$estimate[2]))
  }
  #Put the realizations that have been transformed to the original scale in a data frame.
  cop.sample1 <- data.frame(cop.sample1.con, cop.sample1.non.con)
  colnames(cop.sample1) <- c("Var1", "Var2")

  ###Simulating sample from the joint distribution (copula+marginals) fit to the sample conditioned on Con2
  #Fit the specified copula family (Copula_Family2) to the observations in Data_Con2.
  obj2 <- BiCopSelect(pobs(Data_Con2[, 1]), pobs(Data_Con2[,2]), familyset = Copula_Family2, selectioncrit = "AIC",
                      indeptest = FALSE, level = 0.05, weights = NA, rotations = TRUE,
                      se = FALSE, presel = TRUE, method = "mle")
  #Simulate a sample from the fitted copula. Out of the sample size 'N' the proportion of the sample from the
  #copula assoicated with Data_Con1 is proportional to the size of Data_Con1 relative to Data_Con2.
  sample <- BiCopSim(round(N * nrow(Data_Con2)/(nrow(Data_Con1) +nrow(Data_Con2)), 0), obj2)

  #Transform the realizations of variable con2 to the original scale using the inverse CDF (quantile function)
  #of the GPD contained in the u2gpd function.
  cop.sample2.con <- u2gpd(sample[, con2], p = 1, th = Thres2, sigma = exp(GPD_con2$coefficients[1]),xi = GPD_con2$coefficients[2])

  #Transform the realizations of variable con1 to the original scale using the inverse CDF (quantile function)
  #of the selected parametric (non-extreme value) distribution (Marginal_Dist2)
  if (Marginal_Dist2 == "BS") {
    cop.sample2.non.con <- qbisa(sample[, con1], as.numeric(Coef(marginal_non_con2)[1]),
                                 as.numeric(Coef(marginal_non_con2)[2]))
  }
  if (Marginal_Dist2 == "Exp") {
    cop.sample2.non.con <- qexp(sample[, con1], rate = as.numeric(marginal_non_con2$estimate[1]))
  }
  if (Marginal_Dist2 == "Gam(2)") {
    cop.sample2.non.con <- qgamma(sample[, con1], shape = as.numeric(marginal_non_con2$estimate[1]),
                                  rate = as.numeric(marginal_non_con2$estimate[2]))
  }
  if(Marginal_Dist2=="Gam(3)"){
    cop.sample2.non.con<-qGG(sample[,con1], mu=exp(marginal_non_con2$mu.coefficients), sigma=exp(marginal_non_con2$sigma.coefficients), nu=marginal_non_con2$nu.coefficients)
  }
  if(Marginal_Dist2=="GamMix(2)"){
    prob.MX1 <- round(marginal_non_con2$prob[1],3)
    prob.MX2 <- 1 - prob.MX1
    cop.sample2.non.con<-qMX(sample[,con1], mu=list(mu1=exp(marginal_non_con2$models[[1]]$mu.coefficients), mu2=exp(marginal_non_con2$models[[2]]$mu.coefficients)),
                             sigma=list(sigma1=exp(marginal_non_con2$models[[1]]$sigma.coefficients), sigma2=exp(marginal_non_con2$models[[2]]$sigma.coefficients)),
                             pi = list(pi1=prob.MX1, pi2=prob.MX2), family=list(fam1="GA", fam2="GA"))
  }
  if(Marginal_Dist2=="GamMix(3)"){
    prob.MX1 <- round(marginal_non_con2$prob[1],3)
    prob.MX2 <- round(marginal_non_con2$prob[2],3)
    prob.MX3 <- 1 - prob.MX1 - prob.MX2
    cop.sample2.non.con<-qMX(sample[,con1], mu=list(mu1=exp(marginal_non_con2$models[[1]]$mu.coefficients), mu2=exp(marginal_non_con2$models[[2]]$mu.coefficients), mu3=exp(marginal_non_con2$models[[3]]$mu.coefficients)),
                             sigma=list(sigma1=exp(marginal_non_con2$models[[1]]$sigma.coefficients), sigma2=exp(marginal_non_con2$models[[2]]$sigma.coefficients), sigma3=exp(marginal_non_con2$models[[3]]$sigma.coefficients)),
                             pi = list(pi1=prob.MX1, pi2=prob.MX2, pi3=prob.MX3), family=list(fam1="GA", fam2="GA", fam3="GA"))
  }
  if (Marginal_Dist2 == "Gaus") {
    cop.sample2.non.con <- qnorm(sample[, con1], mean = as.numeric(marginal_non_con2$estimate[1]),
                                 sd = as.numeric(marginal_non_con2$estimate[2]))
  }
  if (Marginal_Dist2 == "InvG") {
    cop.sample2.non.con <- qinvgauss(sample[, con1], mean = as.numeric(marginal_non_con2$estimate[1]),
                                     shape = as.numeric(marginal_non_con2$estimate[2]))
  }
  if (Marginal_Dist2 == "LogN") {
    cop.sample2.non.con <- qlnorm(sample[, con1], meanlog = as.numeric(marginal_non_con2$estimate[1]),
                                  sdlog = as.numeric(marginal_non_con2$estimate[2]))
  }
  if (Marginal_Dist2 == "Logis") {
    cop.sample2.non.con <- qlogis(sample[, con1], location = as.numeric(marginal_non_con2$estimate[1]),
                                  scale = as.numeric(marginal_non_con2$estimate[2]))
  }
  if (Marginal_Dist2 == "TNorm") {
    cop.sample1.non.con <- qtruncnorm(sample[, con1], a = min(Data_Con2[,
                                                                        con1]), mean = as.numeric(marginal_non_con2$estimate[1]),
                                      sd = as.numeric(marginal_non_con2$estimate[2]))
  }
  if (Marginal_Dist2 == "Twe") {
    cop.sample2.non.con <- qtweedie(sample[, con1], power = marginal_non_con2$p.max,
                                    mu = mean(Data_Con2[, con1]), phi = marginal_non_con2$phi.max)
  }
  if (Marginal_Dist2 == "Weib") {
    cop.sample2.non.con <- qweibull(sample[, con1], shape = as.numeric(marginal_non_con2$estimate[1]),
                                    scale = as.numeric(marginal_non_con2$estimate[2]))
  }
  #Put the realizations that have been transformed to the original scale in a data frame.
  cop.sample2 <- data.frame(cop.sample2.non.con, cop.sample2.con)
  colnames(cop.sample2) <- c("Var1", "Var2")

  #Combine the data frames containg the samples from two joint models (on the original scale)
  cop.sample <- rbind(cop.sample1, cop.sample2)

  Data_Con_Combined <- rbind(round(Data_Con1,DecP),round(Data_Con2,DecP))
  Data_Con_N <- nrow(unique(Data_Con_Combined))
  RP_Copula <- (1 / (Data_Con_N / time.period)) / (length(which(cop.sample[, con1] > Var1 & cop.sample[, con2] > Var2))/ N)

  #Plotting the results so far
  par(mfrow = c(2, 2))
  par(mar = c(4.5, 4.2, 0.5, 0.5))
  plot(Data[, con1], Data[, con2], xlim = c(x_min, x_max),
       ylim = c(y_min, y_max), col = "Light Grey", xlab = x_lab,
       ylab = y_lab, cex.lab = 1.5, cex.axis = 1.5)
  points(Data_Con1[, con1], Data_Con1[, con2], col = 4, cex = 1.5)
  points(Data_Con2[, con1], Data_Con2[, con2], col = "Red",
         pch = 4, cex = 1.5)
  points(Var1, Var2, pch = 16, cex = 1.5)
  legend("topright", c(paste("Full dependence RP = ", round(min(RP_Var1,RP_Var2),0), " years", sep = ""), paste("Joint RP (copula) = ", round(RP_Copula,0), " years", sep = ""), paste("Independence RP = ", round(RP_Var1 * RP_Var2,0) , " years", sep = "")), bty = "n", cex = 1.25)
  segments(Var1,0,Var1,Var2,lty=2)
  axis(1,Var1,labels=paste(round(Var1,2)),line=1.2)
  segments(0,Var2,Var1,Var2,lty=2)
  axis(2,Var2,labels=paste(round(Var2,2)),line=1.2)

  ##Calculating the conditional probabilities using a simulation approach
  #Rate of observations in the conditional samples. The 'unique' command
  #ensures observations in both conditional samples are not counted twice.
  #rate <- nrow(unique(round(rbind(Data_Con1, Data_Con2), 2)))/time.period
  if (con_var == con1) {
    #Plotting the region the conditional probabilities correspond to.
    plot(Data[, con1], Data[, con2], xlim = c(x_min, x_max),
         ylim = c(y_min, y_max), col = "Light Grey", xlab = x_lab,
         ylab = y_lab, cex.lab = 1.5, cex.axis = 1.5)
    points(Data_Con1[, con1], Data_Con1[, con2], col = 4, cex = 1.5)
    points(Data_Con2[, con1], Data_Con2[, con2], col = "Red",
           pch = 4, cex = 1.5)
    axis(1,Var1,labels=paste(round(Var1,2)))
    rect(0,y_min-1000,Var1,y_max+1000,col=1)

    #Rate
    rate<-length(which(cop.sample[, con1] > Var1)) / (1 / (Data_Con_N / time.period) * N)
    #Linear interpolation of the cummulative distribution function (CDF) of con2 given con1 exceeds Var1
    #x is empirical probabilities, y is values of con2 in simulated sample where con1 exceeds Var1, xout is interpolation points
    CDF_Var <- approx(x=seq(1, length(which(cop.sample[, con1] > Var1)), 1)/length(which(cop.sample[, con1] > Var1)),
                      y=cop.sample[which(cop.sample[, con1] > Var1), con2][order(cop.sample[which(cop.sample[,con1] > Var1), con2])],
                      xout=seq(round(min(1/length(which(cop.sample[,con1] > Var1))) + 5e-04, 3), 1, 0.001))
    #Plotting CDF_Var the conditional CDF of con2 given con1 exceeds Var1
    plot(CDF_Var$y, CDF_Var$x, xlab = y_lab, ylab = paste("CDF given",Con_Var,">",round(Var1,2)),
         xlim = c(y_min, y_max), cex.lab = 1.5, cex.axis = 1.5,
         type = "l", lwd = 2.5)
    #Ensuring only interpolated values appoximately lower than the upper limit than the x-axis upper limit are plotted
    d <- abs(CDF_Var$y - y_max)
    max <- which(d == min(d, na.rm = T))[1]
    #Plotting CDF but with probabilities converted to return periods
    plot(1/(rate * (1 - CDF_Var$x[1:max])), CDF_Var$y[1:max],
         ylab = y_lab, xlab = paste("RP given",Con_Var,">",round(Var1,2),"(years)"),
         ylim = c(y_min, y_max), cex.lab = 1.5, cex.axis = 1.5,
         type = "l", lwd = 2.5)
    #Number of realizations in the sample where con1 exceeds Var1
    N_Excess <- length(which(cop.sample[, con1] > Var1))
    #Interpolating CDF
    CDF_Var <- approx(CDF_Var$y, CDF_Var$x, seq(round(min(CDF_Var$y),2), round(max(CDF_Var$y), 2), 0.01))
    #Extracting conditional probability that con2 is less than Var2 given con1 exceeds Var1
    #By finding value of the last interpolation of the CDF at Var2
    Con_Prob_Est <- approx(CDF_Var$x, CDF_Var$y, Var2)$y
  }
  if (con_var == con2) {
    #Plotting the region the conditional probabilities correspond to.
    plot(Data[, con1], Data[, con2], xlim = c(x_min, x_max),
         ylim = c(y_min, y_max), col = "Light Grey", xlab = x_lab,
         ylab = y_lab, cex.lab = 1.5, cex.axis = 1.5)
    points(Data_Con1[, con1], Data_Con1[, con2], col = 4, cex = 1.5)
    points(Data_Con2[, con1], Data_Con2[, con2], col = "Red",
           pch = 4, cex = 1.5)
    axis(2,Var2,labels=paste(round(Var2,2)))
    rect(x_min-1000,0,x_max+1000,Var2,col=1)

    #Rate
    rate<-length(which(cop.sample[, con2] > Var2)) / (1 / (Data_Con_N / time.period) * N)
    #Linear interpolation of the cummulative distribution function (CDF) of con1 given con2 exceeds Var2
    #x is empirical probabilities, y is values of con1 in simulated sample where con2 exceeds Var2, xout is interpolation points
    CDF_Var <- approx(x=seq(1, length(which(cop.sample[, con2] > Var2)), 1)/length(which(cop.sample[, con2] > Var2)),
                      y=cop.sample[which(cop.sample[, con2] > Var2), con1][order(cop.sample[which(cop.sample[,con2] > Var2), con1])],
                      xout=seq(round(min(1/length(which(cop.sample[,con2] > Var2))) + 5e-04, 3), 1, 0.001))
    #Plotting CDF_Var the conditional CDF of con1 given con2 exceeds Var2
    plot(CDF_Var$y, CDF_Var$x, xlab = x_lab, ylab = paste("CDF given",Con_Var,">",round(Var2,2)),
         xlim = c(x_min, x_max), cex.lab = 1.5, cex.axis = 1.5,
         type = "l", lwd = 2.5)
    #Ensuring only interpolated values approximately lower than the upper limit than the x-axis upper limit are plotted
    d <- abs(CDF_Var$y - x_max)
    max <- which(d == min(d, na.rm = T))[1]
    #Plotting CDF but with probabilities converted to return periods
    plot(1/(rate * (1 - CDF_Var$x[1:max])), CDF_Var$y[1:max],
         ylab = x_lab, xlab = paste("RP given",Con_Var,">",round(Var2,2),"(years)"),
         ylim = c(x_min, x_max), cex.lab = 1.5, cex.axis = 1.5,
         type = "l", lwd = 2.5)
    #Number of realizations in the sample where con2 exceeds Var2
    N_Excess <- length(which(cop.sample[, con2] > Var2))
    #Interpolating CDF
    print(summary(cop.sample))
    CDF_Var <- approx(CDF_Var$y, CDF_Var$x, seq(round(min(CDF_Var$y),2), round(max(CDF_Var$y), 2), 0.01))
    #Extracting conditional probability that con1 is less than Var1 given con2 exceeds Var2
    #By finding value of the last interpolation of the CDF at Var1
    Con_Prob_Est <- approx(CDF_Var$x, CDF_Var$y, Var1)$y
  }

  #Create a list of outputs.
  res <- list(Con_Var = Con_Var, RP_Var1 = RP_Var2, RP_Var2 = RP_Var2,
              Var1 = Var1, Var2 = Var2, RP_Full_Dependence = min(RP_Var1,RP_Var2),
              RP_Independence = RP_Var1 * RP_Var2,
              RP_Copula = RP_Copula,
              Prob = 1/RP_Copula,
              N_Excess = N_Excess,Non_Con_Var_X = CDF_Var$x,
              Con_Prob = CDF_Var$y, Con_RP = 1/(rate * (1 - CDF_Var$y)),
              Con_Prob_Est = Con_Prob_Est)
  return(res)
}
rjaneUCF/MultiHazard documentation built on April 20, 2024, 12:48 a.m.