R/hello.R

Defines functions Top_Incomes Table_Top_Share Pareto_diagram tidy_income Top_Share ReturnEPD ProbEPD .rhoEst .EPDneglogL EPDfit .EPDdirectMLE .EPDredMLE EPD Hill repd qepd pepd depd .EPDinput MLE.pareto1

Documented in depd EPD EPDfit Hill MLE.pareto1 Pareto_diagram pepd qepd repd ReturnEPD Table_Top_Share tidy_income Top_Incomes Top_Share

#' Density of the Pareto 1 distribution
#'
#' @param x a (positive) vector
#' @param mu a number (the lower bound, default is 1)
#' @param alpha a number (the power index)
#' @param xi a number (the inverse of \code{alpha}, corresponding to the tail index)
#' @return the density of the Pareto 1 distribution at points \code{x}
#' @seealso [ppareto1()], [qpareto1()] and [rpareto1()]
#' @examples
#' dpareto1(2, 1, alpha=1.5, xi=1/alpha)
dpareto1 <- function (x, mu=1, alpha=1/xi, xi=1/alpha)  { alpha*mu^alpha/x^(alpha+1) }

#' Cumulative distribution function of the Pareto 1 distribution
#'
#' @param x a (positive) vector
#' @param mu a number (the lower bound, default is 1)
#' @param alpha a number (the power index)
#' @param xi a number (the inverse of \code{alpha})
#' @return the c.d.f. of the Pareto 1 distribution at points \code{x}
#' @examples
#' ppareto1(2, 1, 1.5)
ppareto1 <- function (x, mu=1, alpha=1/xi, xi=1/alpha)  { 1 - ( x/mu )^(-alpha) }

#' Quantile function of the Pareto 1 distribution
#'
#' @param p a vector of probabilities (with values in [0,1])
#' @param mu a number (the lower bound)
#' @param alpha a number (the power index)
#' @param xi a number (the inverse of \code{alpha}, corresponding to the tail index)
#' @return the quantile function of the Pareto 1 distribution at points \code{p}
#' @examples
#' qpareto1(.5, 1, alpha=1.5)
qpareto1 <- function (p, mu, alpha=1/xi, xi=1/alpha)  { mu*(1-p)^(-1/alpha) }

#' Random generation of the Pareto 1 distribution
#'
#' @param n an integer
#' @param mu a number (the lower bound)
#' @param alpha a number (the power index)
#' @param xi a number (the inverse of \code{alpha}, corresponding to the tail index)
#' @return generates \code{n} values of the Pareto 1 distribution
#' @examples
#' set.seed(123)
#' rpareto1(6, 1, alpha=1.5)
rpareto1 <- function (n, mu, alpha=1/xi, xi=1/alpha)  { mu*(1-runif(n))^(-1/alpha) }

#' Maximum Likelihood estimation of the Pareto 1 distribution, with weights
#'
#' @param data a vector of observations
#' @param weights a vector of weights (default = 1)
#' @param threhold the threshold parameter of the Pareto 1 distribution (\code{mu})
#' @return a list with the index \code{alpha} and \code{k}, the number of observations above \code{threshold}
#' @examples
#' set.seed(123)
#' x <- rpareto1(100, 1, 1.5)
#' w <- rgamma(100, 10, 10)
#' estim <- MLE.pareto1(data=x, weights=w, threshold=1)
#' estim
MLE.pareto1 <- function(data, weights=rep(1,length(data)), threshold=min(data))
{
  foo <- cbind(data,weights)
  foo <- foo[foo[,1]>threshold,]
  xx <- foo[,1]
  ww <- foo[,2]/sum(foo[,2])
  m <- as.numeric(threshold)
  a <- 1/(sum(ww*log(xx))-log(m))
  k <- NROW(xx)
  return(list(alpha=a, xi=1/a, k=k))
}

#' Density of the Generalized Pareto distribution (GPD)
#'
#' @param x a (positive) vector
#' @param xi a number (the tail index)
#' @param mu a number (the lower bound)
#' @param beta a number (the scaling paramater, default = 1)
#' @return the c.d.f. of the Generalized Pareto distribution at points \code{x}
#' @examples
#' pgpd(2, xi=1/1.5, 1, 1)
pgpd <- function (x, xi, mu = 0, beta = 1)  { (1 - (1+(xi*(x-mu))/beta)^(-1/xi)) }

#' Density of the Generalized Pareto distribution (GPD)
#'
#' @param x a (positive) vector
#' @param xi a number (the tail index)
#' @param mu a number (the lower bound)
#' @param beta a number (the scaling paramater, default = 1)
#' @return the density of the Pareto 1 distribution at points \code{x}
#' @examples
#' dgpd(2, xi=1/1.5, 1, 1)
dgpd <- function (x, xi, mu = 0, beta = 1)  { (beta^(-1))*(1+(xi*(x-mu))/beta)^((-1/xi)-1) }

#' Random generation of the Generalized Pareto distribution (GPD)
#'
#' @param n an integer
#' @param xi a number (the tail index)
#' @param mu a number (the lower bound)
#' @param beta a number (the scaling paramater, default = 1)
#' @return generates \code{n} values of the Pareto 1 distribution at points \code{x}
#' @examples
#' rgpd(10, xi=1/1.5, 1, 1)
rgpd <- function (n, xi, mu = 0, beta = 1)  { mu + (beta/xi)*((1-runif(n))^(-xi)-1) }

#' Maximum Likelihood estimation of the Generalized Pareto distribution, with weights
#'
#' @param data a vector of observations
#' @param weights a vector of weights (default = 1)
#' @param threhold the threshold parameter of the Generalized Pareto distribution (\code{mu})
#' @param nextrmes the number of largest values considered (integer)
#' @param method method used for inference (\code{"ml"} for maximum likelihood)
#' @param information (not used)
#' @return a list \code{xi} the tail index, \code{mu} the threshold, \code{"beta"} the scaling coefficient, and \code{k} the number of observations above \code{mu}
#' @examples
#' set.seed(123)
#' x <- rpareto1(100, 1, 1.5)
#' w <- rgamma(100,10,10)
#' estim <- MLE.gpd(data=x, weights=w, threshold=1)
#' estim$par.ests
MLE.gpd <- function (data, weights=rep(1,length(data)),
                     threshold = NA, nextremes = NA, method="ml",
                     information = c("observed", "expected"), ...)
{
  n <- length(data)
  if (is.na(nextremes) && is.na(threshold))
    stop("Enter either a threshold or the number of upper extremes")
  if (!is.na(nextremes) && !is.na(threshold))
    stop("Enter EITHER a threshold or the number of upper extremes")
  if (!is.na(nextremes))

    data <- as.numeric(data)

  foo=cbind(data,weights)
  foo=foo[foo[,1]>threshold,]
  x=foo[,1]
  w=foo[,2]/sum(foo[,2])

  exceedances <- x
  excess <- exceedances - threshold
  Nu <- length(excess)
  xbar <- sum(w*excess)
  method <- "ml"
  s2 <- sum(w*(excess-xbar)^2)
  xi0 <- -0.5 * (((xbar * xbar)/s2) - 1)
  beta0 <- 0.5 * xbar * (((xbar * xbar)/s2) + 1)
  theta <- c(xi0, beta0)
  negloglik <- function(theta, tmp) {
    xi <- theta[1]
    beta <- theta[2]
    cond1 <- beta <= 0
    cond2 <- (xi <= 0) && (max(tmp) > (-beta/xi))
    if (cond1 || cond2)
      f <- 1e+06
    else {
      y <- logb(1 + (xi * tmp)/beta)
      y <- w*y/xi
      f <- logb(beta) + (1 + xi) * sum(y)
    }
    f
  }
  fit <- optim(theta, negloglik, hessian = TRUE, ..., tmp = excess)
  if (fit$convergence)
    warning("optimization may not have succeeded")
  par.ests <- fit$par
  converged <- fit$convergence
  nllh.final <- fit$value
  p.less.thresh <- 1 - Nu/n
  return(list(xi=par.ests[1], mu=threshold, beta=par.ests[2], k=Nu))
}

.EPDinput <- function(y, gamma, kappa, tau, kappaTau = TRUE) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #
  if (!is.numeric(gamma)) {
    stop("gamma should be numeric.")
  }

  if (!is.numeric(kappa)) {
    stop("kappa should be numeric.")
  }

  if (!is.numeric(tau)) {
    stop("tau should be numeric.")
  }


  if (any(tau >= 0)) {
    stop("tau should be strictly negative.")
  }

  if (any(gamma <= 0)) {
    stop("gamma should be strictly positive.")
  }
  if (kappaTau) {
    if (any(kappa <= pmax(-1, 1/tau))) {
      stop("kappa should be larger than max(-1,1/tau).")
    }
  }

  ly <- length(y)
  lg <- length(gamma)
  lk <- length(kappa)
  lt <- length(tau)

  l <- c(ly, lg, lk, lt)

  ind <- which(l > 1)

  if (length(ind) > 1) {

    if (!length(unique(l[ind])) == 1) {
      stop("All input arguments should have length 1 or equal length.")
    }
  }
}

#' Density of the Extended Pareto distribution
#'
#' @param x a (positive) vector
#' @param gamma a (strictly positive) number (the tail index)
#' @param kappa a number - must be larger than max{-1,1/tau}
#' @param tau a (negative) number (default is -1)
#' @param log logical indicating if logarithm of density should be returned
#' @return the density of the Extended Pareto distribution at points \code{x}
#' @source \url{https://github.com/TReynkens/ReIns/blob/master/R/EPD.R} Tom Reynkens, ReIns package version 1.0.7
#' @examples
#' depd(2,.5,1,-1)
depd <- function(x, gamma, kappa, tau = -1, log = FALSE) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #

  .EPDinput(x, gamma, kappa, tau, kappaTau = TRUE)

  d <- 1 / (gamma*x^(1/gamma+1)) * (1+kappa*(1-x^tau))^(-1/gamma-1) *
    (1+kappa*(1-(1+tau)*x^tau))
  d[x <= 1] <- 0
  if (log) d <- log(d)
  return(d)
}

#' Cumulative Distribution Function of the Extended Pareto distribution
#'
#' @param x a (positive) vector
#' @param gamma a (strictly positive) number (the tail index)
#' @param kappa a number - must be larger than max{-1,1/tau}
#' @param tau a (negative) number (default is -1)
#' @param log logical indicating if logarithm of density should be returned
#' @return the c.d.f. of the Extended Pareto distribution at points \code{x}
#' @source \url{https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R} Tom Reynkens, ReIns package version 1.0.7
#' @examples
#' pepd(2,.5,1,-1)
pepd <- function(x, gamma, kappa, tau = -1, lower.tail = TRUE, log.p = FALSE) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #

  .EPDinput(x, gamma, kappa, tau, kappaTau = FALSE)

  p <- 1 - (x * (1+kappa*(1-x^tau)))^(-1/gamma)

  p[x <= 1] <- 0

  if (any(kappa <= pmax(-1, 1/tau))) {
    if (length(kappa) > 1 | length(tau) > 1) {
      p[kappa <= pmax(-1, 1/tau)] <- NA
    } else {
      p <- NA
    }
  }

  if (!lower.tail) p <- 1-p

  if (log.p) p <- log(p)

  return(p)
}

#' Quantile Function of the Extended Pareto distribution
#'
#' @param p a vector of probabilities (in the interval [0,1])
#' @param gamma a (strictly positive) number (the tail index)
#' @param kappa a number - must be larger than max{-1,1/tau}
#' @param tau a (negative) number (default is -1)
#' @param log logical indicating if logarithm of density should be returned
#' @return the c.d.f. of the Extended Pareto distribution at points \code{x}
#' @source \url{https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R} Tom Reynkens, ReIns package version 1.0.7
#' @examples
#' qepd(.5,.5,1,-1)
qepd <-  function(p, gamma, kappa, tau = -1, lower.tail = TRUE, log.p = FALSE) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #

  .EPDinput(p, gamma, kappa, tau, kappaTau = TRUE)


  if (log.p) p <- exp(p)

  if (!lower.tail) p <- 1-p

  if (any(p < 0 | p > 1)) {
    stop("p should be between 0 and 1.")
  }

  l <- length(p)
  Q <- numeric(l)

  endpoint <- 10

  if (any(p < 1)) {

    mx <- max(p[p < 1])

    while (pepd(endpoint, gamma, kappa, tau) <= mx) {
      endpoint <- endpoint*10
    }
  }

  for (i in 1:l) {

    if (p[i] < .Machine$double.eps) {
      # p=0 case
      Q[i] <- 1

    } else if (abs(p[i]-1) > .Machine$double.eps) {
      # 0<p<1 case

      # Function to minimise
      f <- function(x) {
        ((1-p[i])^(-gamma) - x*(1+kappa*(1-x^tau)))^2
      }
      # If minimising fails return NA
      Q[i] <- tryCatch(optimise(f, lower=1, upper=endpoint)$minimum, error=function(e) NA)

    } else {
      # p=1 case
      Q[i] <- Inf
    }

  }

  return(Q)
}

#' Random Generation of the Extended Pareto distribution
#'
#' @param n integer, number of generations
#' @param gamma a (strictly positive) number (the tail index)
#' @param kappa a number - must be larger than max{-1,1/tau}
#' @param tau a (negative) number (default is -1)
#' @param log logical indicating if logarithm of density should be returned
#' @return a vector of \code{n} values generated from an Extended Pareto distribution
#' @source \url{https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R} Tom Reynkens, ReIns package version 1.0.7
#' @examples
#' set.seed(123)
#' repd(6, gamma=.5,kappa=1,tau=-1)
repd <-  function(n, gamma, kappa, tau = -1) {
  return(qepd(runif(n), gamma=gamma, kappa=kappa, tau=tau))
}

#' Hill estimator of the tail index, with weights
#'
#' @param data the vector of observations
#' @param weights the vector of weights (default is uniform weights)
#' @return Hill estimator of \code{xi} (the tail index) and \code{alpha}, as well as \code{k} the number of observations
#'
#'  Note that \code{weights} do not need to sum up to 1 (function will convert them via \code{weights/sum(weights)})
#' @examples
#' set.seed(123)
#' x <- rpareto1(100, 1, 1.5)
#' w <- rgamma(100,10,10)
#' Hill(x,w)
Hill = function(data,weights=rep(1,length(data))){
  w <- weights/sum(weights)
  n <- length(data)
  X <- as.numeric(sort(data))
  Hill <- sum(w[2:n]*(log(X[2:n])-log(X[1])))
  return(list(xi = Hill, alpha=1/Hill, k=n))
}

#' Fit the Extended Pareto distribution to a vector of observations, with weights
#'
#' @param data vector of observations
#' @param weights vector of (positive) weights
#' @param rho parameter of Fraga Alves et al. (2003) estimate
#' @param start vector of length 2 containing the starting values for the optimisation
#' @param direct logical indicating if the parameters are obtained by directly maximising the log-likelihood function
#' @param warnings logical indicating if possible warnings from the optimisation function are shown
#' @return a list with \code{k} the number of observations used, \code{gamma} the vector of the corresponding estimates for the tail parameter of the EPD, \code{kappa} the vector of the corresponding MLE estimates for the kappa parameter of the EPD and \code{tau} the vector of the corresponding estimates for the second order tail index parameter of the EPD using Hill estimates and values for \code{rho}
#' @source adapted from \url{https://github.com/TReynkens/ReIns/blob/master/R/EPD.R} Tom Reynkens, ReIns package version 1.0.7
#' @examples
#' set.seed(123)
#' x <- rpareto1(100,1,.5)
#' w <- rgamma(100,10,10)
#' EPD(data=x, weights=w)
EPD <- function(data, weights=rep(1,length(data)), rho = -1,
                start = NULL, direct = TRUE, warnings = FALSE, ...) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #

  df=data.frame(data,weights)
  df=df[order(df$data),]
  w <- df$weights/sum(df$weights)
  X <- as.numeric(df$data)
  n <- length(X)
  K <- (n-1)

  if (n == 1) {
    stop("We need at least two data points.")
  }

  if (direct) {
    EPD <- .EPDdirectMLE(data=data, weights=w, rho=rho, start=start, warnings=warnings)
  } else {
    # Select parameter using approach of Beirlant, Joosens and Segers (2009).
    EPD <- .EPDredMLE(data=data, weights=w, rho=rho)
  }

  if (length(rho) == 1) {
    EPD$gamma <- as.vector(EPD$gamma)
    EPD$kappa <- as.vector(EPD$kappa)
    EPD$tau <- as.vector(EPD$tau)

  }

  if (length(rho) == 1) {
    return(list(k=K, gamma=unlist(EPD$gamma[K]), kappa=unlist(EPD$kappa[K]), tau=unlist(EPD$tau[K])))
  } else {
    return(list(k=K, gamma=unlist(EPD$gamma[K,]), kappa=unlist(EPD$kappa[K,]), tau=unlist(EPD$tau[K,])))
  }
}


.EPDredMLE <- function(data, weights=rep(1,length(data)), rho = -1) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  #   original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #
  #   Fit EPD using approach of Beirlant, Joosens and Segers (2009).

  df=data.frame(data,weights)
  df=df[order(df$data),]
  w <- df$weights/sum(df$weights)
  X <- as.numeric(df$data)

  n <- length(X)
  K <- (n-1)

  if (n == 1) {
    stop("We need at least two data points.")
  }

  nrho <- length(rho)
  rho.orig <- rho

  H <- Hill(data, w)$xi

  if (all(rho > 0) & nrho == 1) {
    rho <- .rhoEst(data, alpha=1, tau=rho, weights=w)$rho
    beta <- -rho

  } else if (all(rho < 0)) {
    beta <- -rho

  } else {
    stop("rho should be a single positive number or a vector (of length >=1) of negative numbers.")
  }


  gamma <- matrix(0, n-1, nrho)
  kappa <- matrix(0, n-1, nrho)
  tau <- matrix(0, n-1, nrho)

  beta <- -rho

  for (j in 1:nrho) {

    if (nrho == 1 & all(rho.orig > 0)) {
      tau[K, 1] <- -beta[K]/H


      rhovec <- rho
    } else {

      tau[K, j] <- -beta[j]/H

      rhovec <- rep(rho[j], n-1)
    }


    E <- numeric(n-1)

    for (k in K) {
      i <- 1:k
      E[k] <- sum( w[n-k+i] * (X[n-k+i]/X[n-k])^tau[k,j] )
    }

    kappa[K,j] <- H * (1-2*rhovec[K]) * (1-rhovec[K])^3 / rhovec[K]^4 * (E[K] - 1 / (1-rhovec[K]))

    gamma[K,j] <- H - kappa[K,j] * rhovec[K] / (1 - rhovec[K])

  }

  return(list(gamma=gamma, kappa=kappa, tau=tau))
}

.EPDdirectMLE <- function(data, weights=rep(1,length(data)), rho = -1, start = NULL,  warnings = FALSE) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #


  df=data.frame(data,weights)
  df=df[order(df$data),]
  w <- df$weights/sum(df$weights)
  X <- as.numeric(df$data)
  n <- length(X)

  if (n == 1) {
    stop("We need at least two data points.")
  }

  nrho <- length(rho)
  rho.orig <- rho

  H <- Hill(data, w)$xi

  if (all(rho > 0) & nrho == 1) {
    rho <- .rhoEst(data, alpha=1, tau=rho, weights=w)$rho
    beta <- -rho

  } else if (all(rho < 0)) {
    beta <- -rho

  } else {
    stop("rho should be a single positive number or a vector (of length >=1) of negative numbers.")
  }

  gamma <- matrix(0, n-1, nrho)
  kappa <- matrix(0, n-1, nrho)
  tau <- matrix(0, n-1, nrho)

  for (j in 1:nrho) {

    for (k in (n-1):(n-1)) {

      epddf <- df[df$data > X[n-k],]
      epddata <- epddf$data/X[n-k]
      epdw    <- epddf$w

      if (nrho == 1 & all(rho.orig > 0)) {
        tau[k,1] <- -beta[k]/H

      } else {

        tau[k,j] <- -beta[j]/H
      }

      if (is.null(start)) {
        start2 <- numeric(2)
        start2[1] <- H
        start2[2] <- 0
      } else if (is.matrix(start)) {

        if (nrow(start >= n-1)) {
          start2 <- numeric(2)
          start2[1] <- start[k,1]
          start2[2] <- start[k,2]
        } else {
          stop("start does not contain enough rows.")
        }

      } else {
        start2 <- start
      }

      if (tau[k,j] < 0) {
        tmp <- EPDfit(epddata, start=start2, tau=tau[k,j], weights=epdw)
        gamma[k,j] <- tmp[1]
        kappa[k,j] <- tmp[2]
      } else {
        gamma[k,j] <- kappa[k,j] <- NA
      }

    }

  }

  return(list(gamma=gamma, kappa=kappa, tau=tau))
}

#' Fit the Extended Pareto distribution to a vector of observations, with weights, using maximum likelihood estimation
#'
#' @param data vector of observations
#' @param tau the value for tau in the EPD distribution
#' @param weight vector of (positive) weights
#' @param rho parameter of Fraga Alves et al. (2003) estimate
#' @param start vector of length 2 containing the starting values for the optimisation (default are \code{c(.1,1})
#' @param warnings logical indicating if possible warnings from the optimisation function are shown
#' @return a list with \code{gamma} the vector of the corresponding estimates for the tail parameter of the EPD, \code{kappa} the vector of the corresponding MLE estimates for the kappa parameter of the EPD, as well as \code{tau}
#' @source adapted from \url{https://github.com/TReynkens/ReIns/blob/master/R/EPD.R} Tom Reynkens, ReIns package version 1.0.7
#' @examples
#' set.seed(123)
#' x <- rpareto1(100, mu=1, alpha=.5)
#' w <- rgamma(100,10,10)
#' EPDfit(data=x, tau=-3.3, weights=w)
EPDfit <- function(data, tau, start = c(0.1, 1), warnings = FALSE, weights=rep(1,length(data))) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #

  w <- weights/sum(weights)
  if (is.numeric(start) & length(start) == 2) {
    gamma_start <- start[1]
    kappa_start <- start[2]
  } else {
    stop("start should be a 2-dimensional numeric vector.")
  }


  if (ifelse(length(data) > 1, var(data) == 0, 0)) {
    sg <- c(NA, NA)
  } else {

    fit <- optim(par=c(gamma_start, kappa_start), fn=.EPDneglogL, Y=data, tau=tau, weights=w)

    sg <- fit$par

    if (fit$convergence > 0 & warnings) {
      warning("Optimisation did not complete succesfully.")
      if (!is.null(fit$message)) {
        print(fit$message)
      }
    }
  }
  listsg=list(gamma=sg[1], kappa=sg[2], tau=tau)
  return(listsg)
}


.EPDneglogL <- function(theta, Y, tau, weights) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #

  w <- weights/sum(weights)
  gamma <- theta[1]
  kappa <- theta[2]

  if (kappa <= max(-1, 1/tau) | gamma <= 0) {
    logL <- -10^6
  } else {
    logL <- sum( w*log(depd(Y, gamma=gamma, kappa=kappa, tau=tau)) )
  }

  return(-logL)
}

.rhoEst <- function(data, alpha = 1, theta1 = 2, theta2 = 3, tau = 1, weights=rep(1,length(data))) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #


  if (alpha <= 0) {
    stop("alpha should be strictly positive.")
  }

  if (tau <= 0) {
    stop("tau should be strictly positive.")
  }

  df=data.frame(data,weights)
  df=df[order(df$data),]
  w <- df$weights/sum(df$weights)
  X <- as.numeric(df$data)

  n <- length(X)
  rho <- numeric(n)
  Tn <- numeric(n)
  K <- (n-1)

  M_alpha <- numeric(n)
  M_alpha_theta1 <- numeric(n)
  M_alpha_theta2 <- numeric(n)

  l <- log(X[n-K+1])
  for (k in K) {
    M_alpha[k] <- sum( (l[1:k]-log(X[n-k]))^alpha ) / k
    M_alpha_theta1[k] <- sum( (l[1:k]-log(X[n-k]))^(alpha*theta1) ) / k
    M_alpha_theta2[k] <- sum( (l[1:k]-log(X[n-k]))^(alpha*theta2) ) / k
  }

  Tn[K] <- ( (M_alpha[K]/gamma(alpha+1))^tau - (M_alpha_theta1[K]/gamma(alpha*theta1+1))^(tau/theta1)  ) /
    ( (M_alpha_theta1[K]/gamma(alpha*theta1+1))^(tau/theta1) - (M_alpha_theta2[K]/gamma(alpha*theta2+1))^(tau/theta2)  )

  rho[K] <- 1 - ( 2 * Tn[K] / ( 3 - Tn[K]) ) ^ (1/alpha)

  return(list(k=K, rho=rho[K], Tn=Tn[K]))
}


ProbEPD <- function(data, q, gamma, kappa, tau, ...) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #


  if ( length(gamma) != length(kappa) | length(gamma) != length(tau)) {
    stop("gamma, kappa and tau should have equal length.")
  }

  X <- as.numeric(sort(data))


  n <- length(X)
  prob <- numeric(n)
  K <- (n-1)

  K2 <- K[which(gamma[K] > 0)]

  prob[K2] <- (K2+1)/(n+1) * (1 - pepd(q/X[n-K2], gamma=gamma[K2], kappa=kappa[K2], tau=tau[K2]))
  prob[prob < 0 | prob > 1] <- NA


  return(list(k=K, P=prob[K], q=q))

}

#' Large Return Period associated to the Extended Pareto distribution
#'
#' @param data a vector of observations
#' @param q the large quantile to estimate 1/P[X>q]
#' @param gamma vector of \code{n-1} estimates for the EVD obtained from [EPD]
#' @param kappa vector of \code{n-1} estimates for the EVD obtained from [EPD]
#' @param tau vector of \code{n-1} estimates for the EVD obtained from [EPD]
#' @return a list with \code{k} the vector of the values of the tail parameter k, \code{R} the vector of the corresponding return period and \code{q} the used large quantile
#' @source \url{https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R} Tom Reynkens, ReIns package version 1.0.7
#' @examples
#' set.seed(123)
#' x <- rpareto1(100, mu=1, alpha=1.5)
#' fit <- EPD(x)
#' ReturnEPD(data=x, q=.01, gamma=fit$gamma, kappa=fit$kappa, tau=fit$tau)
ReturnEPD <- function(data, q, gamma, kappa, tau, ...) {
  #
  #   Modified EPD function from the ReIns R package to have:
  #        - weighted ML estimation
  #        - results from only one cutoff
  #        - direct ML estimation by default
  # original R code :  ReIns package version 1.0.7
  #    https://github.com/TReynkens/ReIns/blob/master/R/EPD.R
  #    https://github.com/TReynkens/ReIns/blob/master/R/Distributions.R
  #

  if ( length(gamma) != length(kappa) | length(gamma) != length(tau)) {
    stop("gamma, kappa and tau should have equal length.")
  }

  X <- as.numeric(sort(data))
  n <- length(X)
  r <- numeric(n)
  K <- (n-1)

  K2 <- K[which(gamma[K] > 0)]

  r[K2] <- (n+1)/(K2+1) / (1 - pepd(q/X[n-K2], gamma=gamma[K2], kappa=kappa[K2], tau=tau[K2]))


  r[which(gamma[K] <= 0)] <- NA

  r[r <= 0] <- NA

  return(list(k=K, R=r[K], q=q))

}

#' @title Top_Share
#' @description This function estimates Top Share
#'
#' @param data a vector of observations, from \code{\link{tidy_income}}
#' @param p numeric, the probability level (default 0.01 for the top 1\%)
#' @param q numeric (possibly a vector), the probability level to model a Pareto distribution (default 0.1)
#' @param method is the distribution considered (default \code{"edf"} for the empirical distribution function, but can be \code{"pareto1"}, \code{"gpd"} or  \code{"epd"})
#' @param edp.direct logical (default \code{TRUE}) for the method use for EPD fit
#' @return estimation of the share of income/wealth owned by the top 100p\% of the population, assuming that the top 100q\% of the distribution is Pareto distributed. The list contains elements for each value of \code{q} if it is a vector, with \code{index} the share, \code{alpha} the inverse of the tail index, \code{coef},  \code{share.index} the value of \code{p}, \code{share.pareto} the value of the threshold as a percentage (.1 for the 90\% quantile) and \code{threshold} for the numerical value.
#' @examples
#' url_1 <- "https://github.com/freakonometrics/TopIncome/raw/master/data_csv/dataframe_yw_1.csv"
#' df <- read.table(url_1,sep=";",header=TRUE)
#' data_1  <-  tidy_income(income = df$y, weights = df$w)
#' Top_Share(data_1)
Top_Share <- function(data, p=.01, q=.1, method="edf", epd.direct=TRUE) {
  if (!require("Hmisc")) install.packages("Hmisc")
  require(Hmisc)
  #
  # p : top 100p% share
  # q : top 100q% of the distribution being Pareto
  #
  # method="edf" : sample top share, that is, based on the EDF
  # method="pareto1" : EDF+Pareto1
  #
  x = data$y
  weights = data$weights
  if (p>1) stop('Error: p should be smaller than 1 \n\n')
  if (p<0) stop('Error: p should be greater than 0 \n\n')
  ## Top Share based on Pareto I and GPD Models

  if (min(q)>1) stop('Error: q should be smaller than 1 \n\n')
  if (max(q)<0) stop('Error: q should be greater than 0 \n\n')

  vindex=valpha=vcoef=vgamma=vkappa=vshare.index=vshare.pareto=vthreshold=vtau=vbeta=vxi=vk=rep(NA,length(q))


  ## Top Share based on the Empirical Distribution Function (EDF)

  if(method=="edf") {
    for(s in 1:length(q)){

      up=Hmisc::wtd.quantile(x, weights=weights, probs=1-p, normwt=TRUE) # weighted (1-p)-quantile
      up=as.numeric(up)

      vindex[s]=tis=sum(weights*x*(x>up))/sum(weights*x)
    }
    L=list(index=vindex,share=p,k=length(x),method="edf")
  }


  if(method=="pareto1" || method=="pareto2" || method=="gpd") {

    for(s in 1:length(q)){
      u=Hmisc::wtd.quantile(x, weights=weights, probs=1-q[s], normwt=TRUE)
      u=as.numeric(u)  # threshold = weighted (1-q)-quantile
      datax=cbind(x,weights)
      dataq=datax[x<u,]
      xq = Hmisc::wtd.mean(dataq[,1], weights=dataq[,2])
      # Estimate the Pareto distribution with weighted data
      if(method=="pareto1") {
        coef=MLE.pareto1(x,weights=weights,threshold=u)
        sigma=u
        alpha=coef$alpha
      }
      if(method=="pareto2" || method=="gpd") {
        coef=MLE.gpd(x,weights=weights,threshold=u)
        sigma=coef$beta/coef$xi
        alpha=1/coef$xi
      }
      # Top Income Shares with weighted data
      if (alpha<1) { tis=NaN
      } else if(p<=q[s]) {
        num = (alpha/(alpha-1))*sigma*(p/q[s])^(-1/alpha) + u - sigma
        den = (1-q[s])*xq + q[s]*sigma/(alpha-1) + q[s]*u
        tis = p*num/den
      } else if(p>q[s])  {
        up=Hmisc::wtd.quantile(x, weights=weights, probs=1-p, normwt=TRUE)
        up=as.numeric(up)
        datap=data[x<up,]
        xp = Hmisc::wtd.mean(datap[,1], weights=datap[,2])
        den = (1-q[s])*xq + q[s]*sigma/(alpha-1) + q[s]*u
        tis = 1 - (1-p)*xp/den
      }
      vindex[s] <- tis
      valpha[s] <- alpha
      if(method=="pareto2" || method=="gpd") vxi[s] <- coef$xi
      if(method=="pareto2" || method=="gpd") vbeta[s] <- coef$beta
      vshare.index[s]=p
      vshare.pareto[s]=q[s]
      vthreshold[s]=u
      vk[s]=sum(x>u)
    }
    L=list(index=vindex,alpha=valpha,xi=vxi,beta=vbeta,share.index=p,
           share.pareto=vshare.pareto,threshold=vthreshold,k=vk, method=method)}

  ## Top Share based on EPD Model
  if(method=="epd") {
    for(s in 1:length(q)){
      u=Hmisc::wtd.quantile(x, weights=weights, probs=1-q[s], normwt=TRUE)
      u=as.numeric(u)  # threshold = weighted (1-q)-quantile

      dataqq=data[data>=u,]
      coef=EPD(dataqq[,1], weights=dataqq[,2], direct=epd.direct)
      delta=coef$kappa
      tau=coef$tau
      alpha <- 1/coef$gamma

      up=Hmisc::wtd.quantile(x, weights=weights, probs=1-p, normwt=TRUE)
      up=as.numeric(up)
      datap=data[x<up,]
      xp = xq = Hmisc::wtd.mean(datap[,1], weights=datap[,2])

      pextpareto=function(x, u=1, delta=0, tau=-1, alpha){#Compute CDF
        d=1-((x/u)*(1+delta-delta*(x/u)^tau))^(-alpha)
        d[x<=u] <- 0
        return(d) }
      ff_bis <- function(x) (1-pextpareto(1/x, u=u, delta=delta, tau=tau, alpha=alpha))/x^2

      if (alpha<=1) {tis=NaN    # infinite mean (tail=alpha=1/xi < 1)
      } else if(delta<max(-1,1/tau)) {tis=NaN   # kappa should be largen than max(-1,1/tau)
      } else if(p<=q[s]) {
        uprim=u*qepd(1-p/q[s], gamma=coef$gamma, kappa=coef$kappa, tau=coef$tau)
        Eup = try( integrate(ff_bis, lower=0, upper=1/uprim)$value , TRUE)
        Eu = try( integrate(ff_bis, lower=0, upper=1/u)$value , TRUE)
        if(inherits(Eup, "try-error") && inherits(Eup, "try-error")) tis=NaN
        if(!(inherits(Eup, "try-error") && inherits(Eup, "try-error"))) tis=(p*uprim+q[s]*Eup)/((1-q[s])*xq+q[s]*(u+Eu))
      } else if(p>q[s])  {
        Eu = try( integrate(ff_bis, lower=0, upper=1/u)$value , TRUE)
        Ex = ((1-q[s])*xp+q[s]*(u+Eu))
        if(inherits(Eu, "try-error")) tis=NaN
        if(!inherits(Eu, "try-error")) tis = 1-(1-p)*xp/Ex
      }

      vindex[s] <- tis
      valpha[s] <- 1/coef$gamma
      vgamma[s]=coef$gamma
      vkappa[s]=coef$kappa
      vtau[s]=coef$tau
      vshare.index[s]=p
      vshare.pareto[s]=q[s]
      vthreshold[s]=u
      vk[s]=sum(x>u)
    }
    L=list(index=vindex,alpha=valpha,tau=vtau,kappa=vkappa,gamma=vgamma,share.index=p,
           share.pareto=vshare.pareto,threshold=vthreshold, k=vk, method=method)
  }
  return(L)
}

#' @title tidy_income
#' @description Convert income/wealth data into a datafrane that can be used with \code{TopShare} or \code{Pareto_diagram}
#'
#' @param income a vector of data (income or wealth)
#' @param weights a vector of weight (same length as \code{income}
#' @return a dataframe with 4 columns, \code{y} the vector income (or wealth), \code{weights} the vector of weights, \code{Fw} the cumulated proportion of people (with weights) and \code{Fx} the cumulated proportion of people (without weights)
#' @seealso \code{\link{Top_Share}}, \code{\link{Pareto_diagram}}, \code{\link{Table_Top_Share}}
#' @examples
#' url_1 <- "https://github.com/freakonometrics/TopIncome/raw/master/data_csv/dataframe_yw_1.csv"
#' df <- read.table(url_1,sep=";",header=TRUE)
#' data_1  <-  tidy_income(income = df$y, weights = df$w)
#' str(data_1)
tidy_income <- function(income, weights){
  df=data.frame(w=weights, y=income)
  df$w=df$w/sum(df$w)
  df=df[order(df$y),]
  Fw=cumsum(df$w)/(sum(df$w)+df$w[1])
  n=length(df$y)
  Fx=(1:n)/(n+1)
  data = data.frame(y=df$y, weights=df$w, Fw=Fw, Fx=Fx)
  return(data)
}

#' Pareto diagrams - Pareto 1, GPD and EPD
#'
#' @param data dataframe obtained from \code{\link{tidy_income}} function
#' @param p numeric, the probability level (default 0.01 for the top 1\%)
#' @param q numeric, the probability level to model a Pareto distribution (default 0.1)
#' @param viz logical \code{TRUE} to plot the estimates
#' @return a table with estimations of top share and a graph
#' @examples
#' url_1 <- "https://github.com/freakonometrics/TopIncome/raw/master/data_csv/dataframe_yw_1.csv"
#' df <- read.table(url_1,sep=";",header=TRUE)
#' data_1  <-  tidy_income(income = df$y, weights = df$w)
#' \dontrun{Pareto_diagram(data_1)}
Pareto_diagram = function(data, p=.01, q=.1, viz=TRUE){

  # graphical parameters
  ysup <- 5
  top.x <- .25
  top.y <- .4
  top.xx <- 38
  top.yy <- .11

  res1=Top_Share(data, p=p, q=q, method="pareto1")
  res2=Top_Share(data, p=p, q=q, method="gpd")
  res3=Top_Share(data, p=p, q=q, method="epd", epd.direct= TRUE)
  pot=data[data$y>0,]   # Keep positive data
  if(viz) par(mfrow=c(1,1), mar=c(4, 4, 4, 1))  # bottom, left, top, right
  if(viz) plot(log(pot$y), log(1-pot$Fw), xlab="log(x)", ylab="log(1-F(x))", cex=.6, col="gray")

  u=seq(log(res1$threshold), log(max(data$y)), length.out=500)
  yhat.par1=ppareto1(exp(u),mu=res1$threshold,alpha=res1$alpha)
  yhat.par2=pgpd(exp(u),xi=res2$xi,mu=res2$threshold,beta=res2$beta)
  yhat.epd=pepd(exp(u)/res3$threshold,gamma=res3$gamma,kappa=res3$kappa,tau=res3$tau)
  if(viz){
    lines(u,log(1-yhat.par1)+log(q), col="blue", lty=2, lwd=1.5)
    lines(u,log(1-yhat.epd)+log(q), col="red", lty=1, lwd=1.5)
    lines(u,log(1-yhat.par2)+log(q),col="green", lty=3, lwd=1.5)
    legend("topright", legend=c("Pareto 1", "GPD", "EPD"), col=c("blue","green", "red"), lty=c(2,3,1))
  }
  res90=Top_Share(data, p=p, q=.10, method="pareto1")
  if(viz) abline(v=log(res90$threshold), col="lightgrey", lty=2)  # percentile 90
  if(viz) legend(log(res90$threshold)-top.x, top.y, legend=expression(italic('q')[90]), cex=.9, bty="n")
  res95=Top_Share(data, p=p, q=.05, method="pareto1")
  if(viz) abline(v=log(res95$threshold), col="lightgrey", lty=2)  # percentile 95
  if(viz) legend(log(res95$threshold)-top.x, top.y, legend=expression(italic('q')[95]), cex=.9, bty="n")
  res99=Top_Share(data, p=p, q=.01, method="pareto1")
  if(viz) abline(v=log(res99$threshold), col="lightgrey", lty=2)  # percentile 99
  legend(log(res99$threshold)-top.x, top.y, legend=expression(italic('q')[99]), cex=.9, bty="n")
}

#' Table of top shares (using three thresholds)
#'
#' @param data dataframe obtained from \code{tidy_income} function
#' @param p numeric, the probability level (default 0.01 for the top 1\%)
#' @param q numeric (possibly a vector), the probability level to model a Pareto distribution (default \code{c(0.1, 0.05, 0.01)} for the top 10\%, 5\% and 1\%)
#' @param md logical \code{TRUE} to have the table in a markdown format (default is \code{FALSE})
#' @examples
#' set.seed(123)
#' x <- rpareto1(200, 1, alpha=1.5)
#' w <- rgamma(200, 10, 10)
#' data_0  <-  tidy_income(income = x, weights = w)
#' Table_Top_Share(data_0,q=c(.1,.05))
#' url_1 <- "https://github.com/freakonometrics/TopIncome/raw/master/data_csv/dataframe_yw_1.csv"
#' df <- read.table(url_1,sep=";",header=TRUE)
#' data_1  <-  tidy_income(income = df$y, weights = df$w)
#' Table_Top_Share(data_1)
Table_Top_Share = function(data, p=.01, q=c(.1,.05,.01), md=FALSE, verbose=FALSE){


  thresholds=q
  res0=Top_Share(data,p=.01,q=thresholds, method="edf")
  res1=Top_Share(data,p=.01,q=thresholds, method="pareto1")
  res2=Top_Share(data,p=.01,q=thresholds, method="gpd")
  res3=Top_Share(data,p=.01,q=thresholds, method="epd")

  alpha=rbind(100*(1-res1$share.pareto),res1$alpha,res2$alpha,res3$alpha)
  rownames(alpha) <- c("cutoff","Pareto 1", "GPD", "EPD")
  colnames(alpha)=paste("top",round(100*(1-q)),"%",sep="")
  res=rbind(100*(1-res1$share.pareto),res0$index,res1$index,res2$index,res3$index)
  rownames(res) <- c("cutoff","edf","Pareto 1", "GPD", "EPD")
  colnames(res)=paste("top",round(100*(1-q)),"%",sep="")


  if(verbose){
    cat("----- top shares ------------------\n")
    print(res)
    cat("----- alpha (tail index) ----------\n")
    print(alpha)
  }

  if(md){
    if (!require("knitr")) install.packages("knitr")
    require(knitr)
    dt=data.frame(res*100)[-1,]
    names(dt)=paste("top",round(100*(1-q)),"%",sep="")
    rownames(dt)=c("EDF","Pareto_1","GPD","EPD")
    cat(kable(dt,caption="Top Share (in percent)",
              bootstrap_options = c("striped", "hover"), full_width = FALSE))

    dt=data.frame(alpha)[-1,]
    names(dt)=paste("top",round(100*(1-q)),"%",sep="")
    rownames(dt)=c("Pareto_1","GPD","EPD")
    cat(kable(dt,caption="Tail Index (alpha)",
              bootstrap_options = c("striped", "hover"), full_width = FALSE))
  }

  return(list(TopShare=res,TailIndex=alpha))}

#' Top Incomea plot
#'
#' @param data dataframe obtained from \code{\link{tidy_income}} function
#' @param p probability level (default 0.01)
#' @param thr numeric vector of probability levels to model a Pareto distribution (default is \code{seq(.85,.999,by=.001)} from 0.85 up to 0.999)
#' @param tail_index logical to plot the tail index (default \code{TRUE})
#' @return one or two graphs (depending on \code{tail==TRUE})
#' @examples
#' url_1 <- "https://github.com/freakonometrics/TopIncome/raw/master/data_csv/dataframe_yw_1.csv"
#' df <- read.table(url_1,sep=";",header=TRUE)
#' data_1  <-  tidy_income(income = df$y, weights = df$w)
#' \dontrun{Top_Incomes(data_1)}
Top_Incomes = function(data, p=.01, thr=seq(.85,.999,by=.001), tail_index = TRUE,...){
  thr=round(thr,10)
  tail=matrix(0,NROW(thr),7)
  tis.index=matrix(0,NROW(thr),7)
  tis.alpha=matrix(0,NROW(thr),7)
  for(i in 1:NROW(thr)) {

    res1=Top_Share(data, p=p, q=1-thr[i], method="pareto1")
    res2=Top_Share(data, p=p, q=1-thr[i], method="gpd")
    res3=Top_Share(data, p=p, q=1-thr[i], method="epd", epd.direct=TRUE)
    res4=Top_Share(data, p=p, method="edf")

    tis.index[i,1]=res1$threshold     # threshold y0
    tis.index[i,2]=res1$k          # k largest observations
    tis.index[i,3]=thr[i]             # quantile threshold
    tis.index[i,4]=res1$index
    tis.index[i,5]=res2$index
    tis.index[i,6]=res3$index
    tis.index[i,7]=res4$index

    tis.alpha[i,1]=res2$threshold           # threshold y0
    tis.alpha[i,2]=res2$k          # k largest observations
    tis.alpha[i,3]=thr[i]             # quantile threshold
    tis.alpha[i,4]=res1$alpha
    tis.alpha[i,5]=res2$alpha
    tis.alpha[i,6]=res3$alpha
    tis.alpha[i,7]=0

  }

  # graphical parameters
  ysup <- 5
  top.x <- .25
  top.y <- .4
  top.xx <- 38
  top.yy <- .11

  if(tail_index){
    plot(tis.alpha[,2],tis.alpha[,4], ylim=c(0,8), type="b", cex=.75, pch=3, main="MLE estimates of the tail index", xlab="k largest values", ylab="tail index (alpha)", col="blue")
    lines(tis.alpha[,2],tis.alpha[,4], col="blue", type="l", cex=.75)
    lines(tis.alpha[,2],tis.alpha[,5], col="green", type="p", cex=.75, pch=2)
    lines(tis.alpha[,2],tis.alpha[,5], col="green", type="l", cex=.75)
    lines(tis.alpha[,2],tis.alpha[,6], col="red", type="b", cex=.75, pch=1)
    lines(tis.alpha[,2],tis.alpha[,6], col="red", type="l", cex=.75)
    abline(v=tis.alpha[(tis.alpha[,3]==.90),2], col="lightgray", lty=2) # 10% top obs
    abline(v=tis.alpha[(tis.alpha[,3]==.95),2], col="lightgray", lty=2) #  5% top obs
    abline(v=tis.alpha[(tis.alpha[,3]==.99),2], col="lightgray", lty=2) #  1% top obs
    legend("topright", legend=c("Pareto 1 (Hill estimator)","GPD", "EPD"), col=c("blue", "green", "red"), pch=c(3,2,1), lty=1,bty="n")

    legend(tis.alpha[(tis.alpha[,3]==.90),2]-top.xx,top.yy, legend=expression(italic('q')[90]), cex=.9, bty="n")
    legend(tis.alpha[(tis.alpha[,3]==.95),2]-top.xx,top.yy, legend=expression(italic('q')[95]), cex=.9, bty="n")
    legend(tis.alpha[(tis.alpha[,3]==.99),2]-top.xx,top.yy, legend=expression(italic('q')[99]), cex=.9, bty="n")
  }

  plot(tis.index[,2],tis.index[,4], ylim=c(0,.4), type="b", cex=.75, pch=3, main="Top 1% share", xlab="k largest values", ylab="share", col="blue")
  lines(tis.index[,2],tis.index[,4], col="blue", type="l", cex=.75)
  lines(tis.index[,2],tis.index[,5], col="green", type="p", cex=.75, pch=2)
  lines(tis.index[,2],tis.index[,5], col="green", type="l", cex=.75)
  lines(tis.index[,2],tis.index[,6], col="red", type="b", cex=.75, pch=1)
  lines(tis.index[,2],tis.index[,6], col="red", type="l", cex=.75)
  lines(tis.index[,2],tis.index[,7], col="gray", type="l", cex=.75)
  abline(v=tis.index[(tis.index[,3]==.90),2], col="lightgray", lty=2) # 10% top obs
  abline(v=tis.index[(tis.index[,3]==.95),2], col="lightgray", lty=2) #  5% top obs
  abline(v=tis.index[(tis.index[,3]==.99),2], col="lightgray", lty=2) #  1% top obs

  legend("topright", legend=c("Pareto 1","GPD", "EPD"), col=c("blue", "green", "red"), pch=c(3,2,1),lty=1,bty="n")
  legend(tis.index[(tis.index[,3]==.90),2]-top.xx,top.yy, legend=expression(italic('q')[90]), cex=.9, bty="n")
  legend(tis.index[(tis.index[,3]==.95),2]-top.xx,top.yy, legend=expression(italic('q')[95]), cex=.9, bty="n")
  legend(tis.index[(tis.index[,3]==.99),2]-top.xx,top.yy, legend=expression(italic('q')[99]), cex=.9, bty="n")
}

#' import HMisc
#' import knitr
#' importFrom("graphics", "abline", "legend", "lines", "par", "plot")
#' importFrom("stats", "integrate", "optim", "optimise", "runif", "var")
#' importFrom("utils", "install.packages")
freakonometrics/TopIncome documentation built on Oct. 16, 2021, 5:58 p.m.