R/ZIPF.R

# the ZIPF distrbution 
# The pdf could  have used the zeta() function on package require(Rmpfr)
# here we have used the function zetaP() which is a 
# simplify version of Thomas Lee zeta()
#------------------------------------------------------------
# the fitting function
#-------------------------------------------------------------
ZIPF <- function (mu.link = "log") 
{
  mstats <- checklink("mu.link", "ZIPF", substitute(mu.link),c("inverse", "log", "sqrt", "identity")) 
  structure(
    list(    family = c("ZIPF", "zipf distribution"),
         parameters = list(mu = TRUE),
              nopar = 1, 
               type = "Discrete", 
            mu.link = as.character(substitute(mu.link)), 
         mu.linkfun = mstats$linkfun, 
         mu.linkinv = mstats$linkinv, 
              mu.dr = mstats$mu.eta, 
               dldm = function(y,mu)
                 {
                 nd <- numeric.deriv(dZIPF(y, mu, log=TRUE), "mu", delta=0.001)
               dldm <- as.vector(attr(nd, "gradient"))
               dldm
                },
              d2ldm2 = function(y,mu)
                {
                 nd <- numeric.deriv(dZIPF(y, mu, log=TRUE), "mu", delta=0.001)
               dldm <- as.vector(attr(nd, "gradient"))
             d2ldv2 <- -dldm*dldm
             d2ldv2
                },
         G.dev.incr  = function(y,mu,...) -2*dZIPF(x = y, mu = mu, log = TRUE),
         rqres = expression(rqres(pfun="pZIPF", type="Discrete", ymin=1, y=y, mu=mu)), 
         mu.initial =expression({mu <- rep(.1,length(y)) } ),
         mu.valid = function(mu) all(mu > 0), 
         y.valid = function(y)  all(y >= 1),
            mean = function(mu) ifelse(mu > 1, zetaP(mu) / zetaP(mu +1), Inf),
        variance = function(mu) ifelse(mu > 2, (zetaP(mu + 1) * zetaP(mu - 1) - (zetaP(mu))^2) / (zetaP(mu + 1))^2, Inf)
    ),
    class = c("gamlss.family","family"))
}
#-----------------------------------------------------------------------------------------
dZIPF<- function(x, mu = 1, log = FALSE)
{
  if (any(mu <= 0) )  stop(paste("mu must be greater than 0 ", "\n", "")) 
  if (any(x < 1) )  stop(paste("x must be >=1", "\n", ""))
     ly <- max(length(x),length(mu)) 
      x <- rep(x, length = ly)      
     mu <- rep(mu, length = ly)   
   logL <- -(mu+1)*log(x)-log(zetaP(mu+1)) # or zeta
    lik <- if (log) logL else exp(logL)
  as.numeric(lik)
}
#----------------------------------------------------------------------------------------
pZIPF <- function(q, mu = 1, lower.tail = TRUE, log.p = FALSE)
{
#----------  
  Zeta.aux<- function (shape, qq) 
  {
    LLL <- max(length(shape), length(qq))
    if (length(shape) != LLL) 
      shape <- rep_len(shape, LLL)
    if (length(qq) != LLL) 
      qq <- rep_len(qq, LLL)
    if (any(qq < 12 - 1)) 
      warning("all values of argument 'q' should be 12 or more")
    aa <- qq
    B2 <- c(1/6, -1/30, 1/42, -1/30, 5/66, -691/2730, 7/6, -3617/510)
    kk <- length(B2)
    ans <- 1/((shape - 1) * (1 + aa)^(shape - 1)) + 0.5/(1 + aa)^shape
    term <- (shape/2)/(1 + aa)^(shape + 1)
    ans <- ans + term * B2[1]
    for (mm in 2:kk) 
    {
      term <- term * (shape + 2 * mm - 3) * (shape + 2 * mm - 
                                               2)/((2 * mm - 1) * 2 * mm * (1 + aa)^2)
      ans <- ans + term * B2[mm]
    }
    ifelse(aa - 1 <= qq, ans, rep(0, length(ans)))
  }
  #--------------------------------------------------
  if (any(mu <= 0) )  stop(paste("mu must be greater than 0 ", "\n", "")) 
  if (any(q < 1) )  stop(paste("y must be >=0", "\n", "")) 
       ly <- max(length(q),length(mu)) 
        q <- rep(q, length = ly)      
       mu <- rep(mu, length = ly)
      ans <- rep_len(0, ly)
   qfloor <- floor(q)
  for (nn in 1:(12 - 1)) ans <- ans + as.numeric(nn <= qfloor)/nn^(mu + 1)
          vecTF <- (12 - 1 <= qfloor)
  if (lower.tail) 
       {
            if (any(vecTF)) 
              ans[vecTF] <- zetaP(mu[vecTF] + 1) - Zeta.aux(mu[vecTF] + 
                                                                1, qfloor[vecTF] + 1)
       }
       else 
       {
            ans <- zetaP(mu + 1) - ans
            if (any(vecTF)) 
              ans[vecTF] <- Zeta.aux(mu[vecTF] + 1, qfloor[vecTF] + 
                                       1)
       }
   cdf <- ans/zetaP(mu + 1)
   cdf
}
#----------------------------------------------------------------------------------------
qZIPF <- function(p, mu = 1, lower.tail = TRUE, log.p = FALSE, max.value = 10000)
{      
  if (any(mu <= 0) )  stop(paste("mu must be greater than 0 ", "\n", "")) 
  if (any(p < 0) | any(p > 1.0001))  stop(paste("p must be between 0 and 1", "\n", "")) 
  if (log.p==TRUE) p <- exp(p) else p <- p
  if (lower.tail==TRUE) p <- p else p <- 1-p    
      ly <- length(p)                                                       
     QQQ <- rep(0,ly)                         
     nmu <- rep(mu, length = ly)                
  for (i in seq(along=p))                                                          
  {
    cumpro <- 0                                                                         
    if (p[i]+0.000000001 >= 1) QQQ[i] <- Inf
    else  
    {  
      for (j in seq(from = 1, to = max.value))
      {
        cumpro <-  pZIPF(j, mu = nmu[i],  log.p = FALSE) 
        # else  cumpro+dSICHEL(j, mu = nmu[i], sigma = nsigma[i], nu = nnu[i], log = FALSE)# the above is faster 
        QQQ[i] <- j 
        if  (p[i] <= cumpro ) break 
      } 
    }
  }          
  QQQ   
}
#----------------------------------------------------------------------------------------
rZIPF <- function(n, mu = 1, max.value = 10000)
{ 
  if (any(mu <= 0) )  stop(paste("mu must be greater than 0 ", "\n", "")) 
  if (any(n <= 0))  stop(paste("n must be a positive integer", "\n", "")) 
  n <- ceiling(n)
  p <- runif(n)
  r <- qZIPF(p, mu = mu, max.value = max.value)
  r
}
#----------------------------------------------------------------------------------------
#---------------------------------------------------------------------
# this is the function from VGAM
# it also need the function Zeta.derivative() which need C code
zetaP <-function (x) 
{
  Zeta.aux<- function (shape, qq) 
  {
    LLL <- max(length(shape), length(qq))
    if (length(shape) != LLL) 
      shape <- rep_len(shape, LLL)
    if (length(qq) != LLL) 
      qq <- rep_len(qq, LLL)
    if (any(qq < 12 - 1)) 
      warning("all values of argument 'q' should be 12 or more")
      aa <- qq
      B2 <- c(1/6, -1/30, 1/42, -1/30, 5/66, -691/2730, 7/6, -3617/510)
      kk <- length(B2)
     ans <- 1/((shape - 1) * (1 + aa)^(shape - 1)) + 0.5/(1 + aa)^shape
    term <- (shape/2)/(1 + aa)^(shape + 1)
     ans <- ans + term * B2[1]
    for (mm in 2:kk) 
      {
    term <- term * (shape + 2 * mm - 3) * (shape + 2 * mm - 
                       2)/((2 * mm - 1) * 2 * mm * (1 + aa)^2)
     ans <- ans + term * B2[mm]
      }
    ifelse(aa - 1 <= qq, ans, rep(0, length(ans)))
  }
   aa <- 12 
  ans <- 0
  for (ii in 0:(aa - 1)) ans <- ans + 1/(1 + ii)^x
   ans <- ans + Zeta.aux(shape = x, aa)    
  ans
}
Stan125/gamlss.dist documentation built on May 12, 2019, 7:38 a.m.