R/gamlssML.R

Defines functions predict.gamlssML gamlssML

Documented in gamlssML

# latest change done 31-1-18 MS to make sure that NET is working  
# changes done on 1-11-13 MS 
# this is an attempt to create a function which 
# fits a GAMLSS distribution using non-linear maximisation like optim()
# in fact here we use MLE() which is a copy of the mle() function of stat4
# The reason for doing this 
#     i) to improve the speed of fitting 
#     ii)  because it would be useful to extent this to Hidden Markov Models
#          and other models as ARMA and GARCH type
# Author Mikis Stasinopoulos 
# TO DO
# i)   weights implementation OK 
# ii)  residuals  OK
# iii) do I need mu.fv and mu.lp?? probably not
# iv)  mu.coef etc OK
# v) if implemented may histDist() should use it (Not working at the moment)
# vi) check all the methods OK
# vii) do I need data argument OK
# viii) vcoc() OK
# needs a summary() function OK
# new TO DO  
# i) If the Hessian is falling may still be able to find parameters (we need to change MLE to allow for that) ok 22-12-11
# ii) we need start.from as argument OK 21-12-11
# latest change 10-7-12
# latest changes are done to correct some of the problems with vcov() function
# only the MLE function is mainly effected
# the optimHess() function is now used for the hessian matrix
# the method optim.proc() in MLE allows to use  optim() or nlminb()

# can we use profile likelihood here?
# not yet but I am working on it now 
#######################################################################################
#names(m1)          
#[5]  "weights"                   
#        "iter"              "method"         
#[13] "converged"       "residuals"       "noObs"          
#[17] "mu.fv"           "mu.lp"            
#[21] "mu.link"              
#---------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------
# this should be a GAMLSS based mle procedure for fitting distributions
# the main problem is to create the likelihood function automatcally 
# not to be given by the user as in MLE below
# also it would be better to use the link function parameters rather the original
# to avoid boundary problems in the parameters
# this probably can be done by 
#   i)    using the eta.par as parameters in the likelihood fun say LogLikelihood(tparameters)
#   ii)   within the function use the inverse link to go to the original parameters
#   iii)  evalute the likelihood 
#   vi)   after exit transfer back (also use this for the se's)?? 
#---------------------------------------------------------------------------------------- 
#require(gamlss)
gamlssML<-function(formula, 
                   family = NO,  
                  weights = NULL, 
                 mu.start = NULL, 
              sigma.start = NULL, 
                 nu.start = NULL, 
                tau.start = NULL,
                   mu.fix = FALSE,
                sigma.fix = FALSE,
                   nu.fix = FALSE,
                  tau.fix = FALSE,
                     data, # it need start.from = NULL
               start.from = NULL,
                   ...) 
{
  #------------------------------------------------------------------------------------------
  # local functions
  #-------------------------------------------------------------------------------------------
  #-------------------------------------------------------------------------------------------
  # this is  copy of the stats::mle()
  # but it exports a S3 object rather than an S4 object
  # see  the examples in mle() for its use
  # here we use it as convenient way of calling optim()
  # and to cover the case that some parameters can be fixed
  #-------------------------------------------------------------------------------------------
  # local function taken from the mle() function
  # this function is not using hessian at the fitting
  #-------------------------------------------------------------------
  MLE <- function (minuslogl, 
                   start = formals(minuslogl), 
                  method = "BFGS", 
                   fixed = list(),
              optim.proc = c( "nlminb", "optim"),
           optim.control = NULL,
                   ...) 
  {
    #------------------------------------------------------------------------
       call <- match.call()
    optim.p <- match.arg(optim.proc)
          n <- names(fixed)
   fullcoef <- formals(minuslogl)
    if (any(!n %in% names(fullcoef))) 
      stop("some named arguments in 'fixed' are not arguments to the supplied log-likelihood")
fullcoef[n] <- fixed
    if (!missing(start) && (!is.list(start) || is.null(names(start)))) 
      stop("'start' must be a named list")
   start[n] <- NULL
      start <- sapply(start, eval.parent)
         nm <- names(start)
         oo <- match(nm, names(fullcoef))
    if (any(is.na(oo))) 
      stop("some named arguments in 'start' are not arguments to the supplied log-likelihood")
      start <- start[order(oo)]
         nm <- names(start)
          f <- function(p) 
    {
          l <- as.list(p)
   names(l) <- nm
       l[n] <- fixed
      do.call("minuslogl", l)
    }
    if (length(start))
    {
      switch(optim.p, 
             "nlminb"={
               oout <- nlminb(start = start, objective = f,  control=optim.control)
               if (oout$convergence > 0) # I took this from Ripley
                 warning("possible convergence problem: optim gave code=", 
                         oout$convergence, " ", oout$message)
               oout$hessian <- optimHess(oout$par, f) #HessianPB(pars=oout$par, fun=f)$Hessian
               value.of.f <- oout$objective
             },
             "optim"={
               oout <- optim(par= start, fn = f, method=method, control=optim.control,...)
               #oout <- optim(start, f, method = method, hessian = TRUE, ...)
               if (oout$convergence > 0) # I took this from Ripley
                 warning("possible convergence problem: optim gave code=", 
                         oout$convergence, " ", oout$message)
               oout$hessian <- optimHess(oout$par, f) #HessianPB(pars=oout$par, fun=f)$Hessian
               value.of.f <- oout$value
             })    
    } 
    else 
      oout <- list(par = numeric(0L), value = f(start))		
      coef <- oout$par
    if (length(coef))
    {
      vcov <- try(solve(oout$hessian))
      if (any(class(vcov)%in%"try-error")) vcov <- matrix(NA, dim(oout$hessian)[1], dim(oout$hessian)[2])		  	
    } 				
    else     vcov <- matrix(numeric(0L), 0L, 0L)
    fullcoef[nm] <- coef
    method <- if (optim.p =="nlminb") "nlminb" else method
    out <- list(call = call, coef = coef, fullcoef = unlist(fullcoef), 
                vcov = vcov, min = value.of.f, details = oout, minuslogl = minuslogl, 
                method = method)
    class(out) <- "MLE"
    out
  }
  # end of MLE
  #--------------------------------------------------------------------------------------
  #--------------------------------------------------------------------------------------
  # this is to replicate rqres within gamlss enviroment DS Friday, March 31, 2006 at 10:30
  # it is used as in gamlss()
  rqres <- function (pfun = "pNO", 
                     type = c("Continuous", "Discrete", "Mixed"),
                 censored = NULL,  
                     ymin = NULL, 
                   mass.p = NULL, 
                  prob.mp = NULL,
                        y = y,
                     ... )
  { }
  body(rqres) <-  eval(quote(body(rqres)), envir = getNamespace("gamlss"))
  ##---------------------------------------------------------------------------------------
  ##---------------------------------------------------------------------------------------
  #----------------------------------------------------------------------------------------
  #----------------------------------------------------------------------------------------
  # main function starts here
 mlFitcall <- match.call()  #   the function call  
  ## checking for NA in the data 
  if(!missing(data)) 
  {
    if  (any(is.na(data)))   
      stop("The data contains NA's, use data = na.omit(mydata)")
  }  
  ##       Evaluate the model frame
    mnames <- c("", "formula", "data", "weights" ) #  "subset"  "na.action"
    cnames <- names(mlFitcall)  # get the names of the arguments of the call
    cnames <- cnames[match(mnames,cnames,0)] # keep only the ones that match with mnames
     mcall <- mlFitcall[cnames] # get in mcall all the relevant information but remember
                                 # that the first element will be NULL
mcall[[1]] <- as.name("model.frame") # replace NULL with model.frame
mcall[[2]] <- if (any(grepl("~", deparse(substitute(formula)))))   mcall[[2]] 
              else as.formula(paste(deparse(substitute(formula)),"~1", sep=""), parent.frame(1))
#if (!any(grepl("~", deparse(substitute(formula)))))
 # {
  # Y_Y<- get(deparse(substitute(formula)), envir=parent.frame(1))
  # assign(deparse(substitute(formula)), Y_Y)
#   mcall[[2]] <-as.formula(paste(deparse(substitute(formula)),"~1", sep=""), parent.frame(1))
#  }
 mu.frame <- eval(mcall, sys.parent())  # evalute the data.frame at the model.frame
        Y <- model.extract(mu.frame, "response") #extracting the y variable from the formula
  if(is.null(dim(Y)))                       # if y not matrix
        N <- length(Y) else N <- dim(Y)[1]
        w <- model.extract(mu.frame, weights) # weights for the likelihood
   if(is.null(w))   w <- rep(1, N)
   else if(any(w < 0)) stop("negative weights not allowed") #
   fam  <- as.gamlss.family(family)
  fname <- fam$family[[1]] 
   dfun <- paste("d",fname,sep="")
  # pfun <- paste("p",fname,sep="")
    PDF <- eval(parse(text=dfun))
  #	CDF <- eval(parse(text=pfun))
  nopar <- fam$nopar
  ## extracting now the y and the binomial denominator in case we use BI or BB
  if(any(fam$family%in%.gamlss.bi.list)) 
  { 
    if (NCOL(Y) == 1) 
    {
      y <- if (is.factor(Y))  Y != levels(Y)[1] else Y
      bd <- rep(1, N)
      if (any(y < 0 | y > 1)) stop("y values must be 0 <= y <= 1")
    } 
    else if (NCOL(Y) == 2) 
    {
      if (any(abs(Y - round(Y)) > 0.001)) {
        warning("non-integer counts in a binomial GAMLSS!")
      }
      bd <- Y[,1] + Y[,2]
      y <-  Y[,1]
      if (any(y < 0 | y > bd)) stop("y values must be 0 <= y <= N") # MS Monday, October 17, 2005 
    } 
    else stop(paste("For the binomial family, Y must be", 
                    "a vector of 0 and 1's or a 2 column", "matrix where col 1 is no. successes", 
                    "and col 2 is no. failures"))
  }
  # multinomial checking
  else if(any(fam$family%in%.gamlss.multin.list))
  {
    y <- if(is.factor(Y))   unclass(Y)
    else Y
  } 
  ## For censoring
  else if(is.Surv(Y))
  { 
    ## checking that the family is censored
    if (length(grep("censored",family$family[[2]]))==0) 
      stop(paste("the family in not a censored distribution, use cens()"))
    ## checking compatability of Surv object and censored distribution
    if (length(grep(attr(Y,"type"),family$family[[2]]))==0) 
      stop(paste("the Surv object and the censored distribution are not of the same type"))
    y <- Y
  }     
  else {y <- Y }
  # here is the place to  start from ACTION HERE
  # check whether start.from is null
  if (!is.null(start.from)) # MS 21-12-11
  {
    if (is.gamlss(start.from)) # if not check whether model or vector
    {
      if ("mu"%in%names(fam$parameters))    {mu.start <- fitted(start.from, "mu")[1]}
      if ("sigma"%in%names(fam$parameters)) {sigma.start <- fitted(start.from, "sigma")[1]}
      if ("nu"%in%names(fam$parameters))    {nu.start <- fitted(start.from, "nu")[1]}
      if ("tau"%in%names(fam$parameters))   {tau.start <- fitted(start.from, "tau")[1]}
    }
    else 
    {
      if (is.numeric(start.from))
      {
        if (nopar<length(start.from)) stop("start.fom need to be as big as the number of parameters in the distribution")
        if ("mu"%in%names(fam$parameters))    {mu.start <- start.from[1]}
        if ("sigma"%in%names(fam$parameters)) {sigma.start <- start.from[2]}
        if ("nu"%in%names(fam$parameters))    {nu.start <- start.from[3]}
        if ("tau"%in%names(fam$parameters))   {tau.start <- start.from[4]}
      }
      else stop("start.from is a non numeric variable")
    }
  }
  ##  get the initial values if are not set by the user
  if ("mu"%in%names(fam$parameters))
  {
    mu <- if(is.null(mu.start))  mean(eval(fam$mu.initial, list(y=y)))
    else mu.start[1] 
    eta.mu <- fam$mu.linkfun(mu)
    mu.fix <- (mu.fix||!fam$parameters[["mu"]])   ### first check whether are fix     
  }
  if ("sigma"%in%names(fam$parameters))
  {
    sigma <- if(is.null(sigma.start)) mean(eval(fam$sigma.initial, list(y=y, mu=mu)))
    else sigma.start[1]
    eta.sigma <- fam$sigma.linkfun(sigma)
    sigma.fix <- (sigma.fix||!fam$parameters[["sigma"]]) 
  }
  if ("nu"%in%names(fam$parameters))
  {
    nu <- if(is.null(nu.start))  mean(eval(fam$nu.initial, list(y=y, mu=mu, sigma=sigma)))
    else nu.start[1]
    eta.nu <- fam$nu.linkfun(nu)
    nu.fix <- (nu.fix||!fam$parameters[["nu"]]) 
  }
  if ("tau"%in%names(fam$parameters))
  {  
    tau <- if(is.null(tau.start)) mean(eval(fam$tau.initial, list(y=y, mu=mu, sigma=sigma, nu=nu)))
    else tau.start[1]
    eta.tau <- fam$tau.linkfun(tau)
    tau.fix <- (tau.fix||!fam$parameters[["tau"]]) 
  }
  ## whether to fix parameters
  fixed <- list()
  if (mu.fix)      fixed <- c(fixed, eta.mu=mu[1])
  if (sigma.fix)   fixed <- c(fixed, eta.sigma=sigma[1])
  if (nu.fix)      fixed <- c(fixed, eta.nu=nu[1])
  if (tau.fix)     fixed <- c(fixed, eta.tau=tau[1])
  noFixed <- sum(c(mu.fix, sigma.fix, nu.fix, tau.fix))
  ## define the likelihood and find the maximum
  switch(nopar, 
         {# one parameter 
           ll1 <- function(eta.mu)
           {
             mu <- fam$mu.linkinv(eta.mu)
             if(any(fam$family%in%.gamlss.bi.list)) -sum(w*PDF(y, mu=mu, bd=bd, log=TRUE)) # BI
             else -sum(w*PDF(y, mu=mu, log=TRUE))# other
           }
           fit <-	MLE(ll1, start=list(eta.mu=eta.mu), fixed=fixed, ...)
         },
         {# two paremeters
           ll2 <- function(eta.mu, eta.sigma)
           {
             mu <- fam$mu.linkinv(eta.mu)
             sigma <- fam$sigma.linkinv(eta.sigma)
             if(any(fam$family%in%.gamlss.bi.list))  -sum(w*PDF(y, bd=bd, mu=mu, sigma=sigma, log=TRUE))  else   
               -sum(w*PDF(y, mu=mu, sigma=sigma, log=TRUE))
           }
           fit <-	MLE(ll2, start=list(eta.mu=eta.mu, eta.sigma=eta.sigma), fixed=fixed, ...)
         },
         {# three parameters
           ll3 <- function(eta.mu, eta.sigma, eta.nu)
           {
             mu <- fam$mu.linkinv(eta.mu)
             sigma <- fam$sigma.linkinv(eta.sigma)
             nu <- fam$nu.linkinv(eta.nu)
             if(any(fam$family%in%.gamlss.bi.list)) -sum(w*PDF(y, bd=bd, mu=mu, sigma=sigma, nu=nu, log=TRUE)) else
               -sum(w*PDF(y, mu=mu, sigma=sigma, nu=nu, log=TRUE))
           }
           fit <-	MLE(ll3, start=list(eta.mu=eta.mu, eta.sigma=eta.sigma, eta.nu=eta.nu), fixed=fixed, ...)
           
         },
         {# four parameters
           ll4 <- function(eta.mu, eta.sigma, eta.nu, eta.tau)
           {
             mu <- fam$mu.linkinv(eta.mu)
             sigma <- fam$sigma.linkinv(eta.sigma)
             nu <- fam$nu.linkinv(eta.nu)
             tau <- fam$tau.linkinv(eta.tau)
             -sum(w*PDF(y, mu=mu, sigma=sigma, nu=nu, tau=tau, log=TRUE))
           }
           fit <-	MLE(ll4, start=list(eta.mu=eta.mu, eta.sigma=eta.sigma, eta.nu=eta.nu, eta.tau=eta.tau), fixed=fixed, ...)
         }
  ) # end of switch
  # saving things
  df.fit <- nopar-noFixed
  out <-list(family = fam$family,  
         parameters =  as.character(names(fam$par)), 
               type = fam$type, 
               call = mlFitcall, 
                  y = y,
            weights = w,
         G.deviance = 2*fit$min, 
                  N = N,  
             df.fit = df.fit, 
        df.residual = N-df.fit, 
                aic = 2*fit$min+2*nopar,
                sbc = 2*fit$min+log(N)*nopar,
             method = fit$method,
               vcov = fit$vcov, 
             Allpar = fit$coef)
  if ("mu"%in%names(fam$parameters))
  {
           mu <- if (mu.fix)  fam$mu.linkinv(eta.mu) else fam$mu.linkinv(fit$coef["eta.mu"])
    names(mu) <- "mu"
    mu.coefficients <- fit$coef["eta.mu"]
    names(mu.coefficients) <- "mu.coefficients"
    out <- c(out, mu, mu.coefficients )
    out$mu.link <- fam$mu.link
  }
  ## Output for sigma model: ---------------------------------------------------------------
  if ("sigma"%in%names(fam$parameters))
  {
    sigma <- if (sigma.fix)  fam$mu.linkinv(eta.sigma) else fam$sigma.linkinv(fit$coef["eta.sigma"])
    names(sigma) <- "sigma"
    sigma.coefficients <- fit$coef["eta.sigma"]
    names(sigma.coefficients) <- "sigma.coefficients"	
    out <- c(out, sigma, sigma.coefficients )
    out$sigma.link <- fam$sigma.link
  }
  ##  output for nu ------------------------------------------------------------------------
  if ("nu"%in%names(fam$parameters))
  {
    nu <- if (nu.fix)  fam$mu.linkinv(eta.nu) else fam$nu.linkinv(fit$coef["eta.nu"])
    names(nu) <- "nu"
    nu.coefficients = fit$coef["eta.nu"]
    names(nu.coefficients) <- "nu.coefficients"			    
    out <- c(out, nu, nu.coefficients )
    out$nu.link <- fam$nu.link
    
  }
  ##  output for tau -----------------------------------------------------------------------
  if ("tau"%in%names(fam$parameters))
  {
    tau <- if (tau.fix)  fam$mu.linkinv(eta.tau) else fam$tau.linkinv(fit$coef["eta.tau"])
    names(tau) <- "tau"
    tau.coefficients = fit$coef["eta.tau"]
    names(tau.coefficients) <- "tau.coefficients"			    
    out <- c(out, tau, tau.coefficients )
    out$tau.link <- fam$tau.link
  }
  if(any(fam$family%in%.gamlss.bi.list)) 
  {
    out$bd <- bd
  }
  
  out$residuals <- eval(fam$rqres)
  out$rqres <- fam$rqres
  if (!missing(data) ) out$call$data <- substitute(data)
  class(out) <- c("gamlssML", "gamlss")
  out
}                                        
######################################################################################
# methods for gamlssML    
######################################################################################
fitted.gamlssML<-function (object, what = c("mu", "sigma", "nu", "tau"), parameter= NULL, ... ) 
{
  what <- if (!is.null(parameter))  {
    match.arg(parameter, choices=c("mu", "sigma", "nu", "tau"))} else  match.arg(what)
  if (! what%in%object$par) stop(paste(what,"is not a parameter in the gamlss object","\n"))
  x <- rep(object[[what]], object$N)
  x
}
######################################################################################
vcov.gamlssML<-function (object, type = c("vcov", "cor", "se",  "all"), ... ) 
{
  type <- match.arg(type)
  switch(type,
         "se"={x <- sqrt(diag(object$vcov))},
         "cor"={x <- cov2cor(object$vcov)},
         "vcov"={x<- object$vcov},
         "all"={x <-list(vcov=object$vcov, cor=cov2cor(object$vcov), se=sqrt(diag(object$vcov)))}
  )
  x
}
######################################################################################
# new at the 6-8-11 DS  
summary.gamlssML  <- function (object, digits = max(3, getOption("digits") - 3), ...) 
{   
  
  digits <- max(3, getOption("digits") - 3)
  cat("*******************************************************************")
  cat("\nFamily: ", deparse(object$family), "\n") 
  cat("\nCall: ",  deparse(object$call),  "\n", fill=TRUE)
  cat("Fitting method:", deparse(object$method), "\n\n") 
  est.disp <- FALSE
  # df.r <- object$noObs - object$mu.df
  coef  <- object$Allpar
  se.coef <- vcov(object, "se")
  tval <- coef/se.coef
  matcoef <- cbind(coef, se.coef, tval, 2*(1-pnorm(abs(tval))))
  dimnames(matcoef) <- list(names(tval), c(" Estimate", " Std. Error", " t value", "Pr(>|t|)"))
  cat("\nCoefficient(s):\n")
  printCoefmat(matcoef, digits = 6, signif.stars = TRUE)
  cat("\n Degrees of Freedom for the fit:", object$df.fit, "Residual Deg. of Freedom  ", 
      object$df.residual, "\n")
  cat("Global Deviance:    ", format(signif(object$G.deviance)), 
      "\n            AIC:    ", format(signif(object$aic)), "\n            SBC:    ", 
      format(signif(object$sbc)), "\n")
  invisible(object)
}
#-------------------------------------------------------------------------------------
# created 21-2-18 MS
# peobably it does not need data argument
predict.gamlssML <- function(object, what = c("mu", "sigma", "nu", "tau"), 
                                parameter = NULL,
                                  newdata = NULL, 
                                   se.fit = FALSE, 
                                     data = NULL, ...) 
{
   what <- if (!is.null(parameter))  {
  match.arg(parameter, choices=c("mu", "sigma", "nu", "tau", "all"))} else  match.arg(what)
  if (! what%in%object$par) stop(paste(what,"is not a parameter in the gamlss object","\n"))
# x <- rep(object[[what]], object$N)
#  If no new data just use fitted() and finish
if (is.null(newdata))
 {
  if (!se.fit)
     ret <- fitted(object,what)
  else
  {
  # this is the case se.fit=TRUE get the se
  se <- vcov(object,"se") 
     dmudeta <- abs(
  gamlss.family(eval(parse(text=paste(object$family[1],"(",what,".link=",
   eval(parse(text=(paste("object$",what,".link", sep="")))),")", sep=""))))[[paste(what,"dr",sep=".")]]
(coef(object, what)))
  ret<-list(fit = fitted(object,what),                  # eta:  obj[[paste(what, "lp", sep=".")]]
          se.fit = unname(rep(se[paste("eta.", what,sep="")]*dmudeta, length(fitted(object,what)))))          
  # },   
    
  }
 }
   else # if new data use only the length of the new data
 {
   lengthnewdata <- if (is(newdata,"data.frame")) dim(newdata)[1] else length(newdata)
   if (!se.fit)
     ret<- rep(fitted(object,what)[1], lengthnewdata)  
   else
   {
     se <- vcov(object,"se") 
     dmudeta <- abs(
       gamlss.family(eval(parse(text=paste(object$family[1],"(",what,".link=",
                                           eval(parse(text=(paste("object$",what,".link", sep="")))),")", sep=""))))[[paste(what,"dr",sep=".")]]
       (coef(object, what)))
   ret<-list(fit = rep(fitted(object,what)[1], lengthnewdata) ,                  # eta:  obj[[paste(what, "lp", sep=".")]]
               se.fit = unname(rep(se[paste("eta.", what,sep="")]*dmudeta, lengthnewdata)))   
   }
 } 
   ret
}
# methods are finish here  
#-------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------
mstasinopoulos/GAMLSS-original documentation built on March 27, 2024, 7:11 a.m.