R/wle.glm.R

Defines functions wle.glm wle.glm.fit wle.glm.weights print.wle.glm print.wle.glm.root deviance.wle.glm effects.wle.glm family.wle.glm model.frame.wle.glm fitted.wle.glm coef.wle.glm weights.wle.glm formula.wle.glm residuals.wle.glm summary.wle.glm summary.wle.glm.root print.summary.wle.glm

Documented in family.wle.glm print.summary.wle.glm print.wle.glm residuals.wle.glm summary.wle.glm weights.wle.glm wle.glm wle.glm.fit wle.glm.weights

#############################################################
#                                                           #
#	wle.glm function                                    #
#	Author: Claudio Agostinelli and Fatemah Alqallaf    #
#	E-mail: claudio@unive.it                            #
#	Date: March, 11, 2010                               #
#	Version: 0.3                                        #
#                                                           #
#	Copyright (C) 2010 Claudio Agostinelli              #
#                      and Fatemah Alqallaf                 #
#                                                           #
#############################################################

## Function developed from 'glm' R version 2.6.0
wle.glm <- function (formula, family = binomial, data, weights, subset, na.action, start = NULL, etastart, mustart, offset, control = list(glm=glm.control(...), wle=wle.glm.control()), model = TRUE, method = "wle.glm.fit", x = FALSE, y = TRUE, contrasts = NULL, dist.method="euclidean", ...) {
  call <- match.call()
  if (is.character(family)) 
    family <- get(family, mode = "function", envir = parent.frame())
  if (is.function(family)) 
    family <- family()
  if (is.null(family$family)) {
    print(family)
    stop("'family' not recognized")
  }
  if (missing(data)) 
    data <- environment(formula)
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "subset", "weights", "na.action", "etastart", "mustart", "offset"), names(mf), 0)
  mf <- mf[c(1, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  switch(method, model.frame = return(mf), wle.glm.fit = 1, stop("invalid 'method': ", method))
  mt <- attr(mf, "terms")
  Y <- model.response(mf, "any")
  if (length(dim(Y)) == 1) {
    nm <- rownames(Y)
    dim(Y) <- NULL
    if (!is.null(nm)) 
      names(Y) <- nm
  }
  X <- if (!is.empty.model(mt)) 
    model.matrix(mt, mf, contrasts)
  else matrix(, NROW(Y), 0)
  weights <- as.vector(model.weights(mf))
  if (!is.null(weights) && !is.numeric(weights)) 
    stop("'weights' must be a numeric vector")
  offset <- as.vector(model.offset(mf))
  if (!is.null(weights) && any(weights < 0)) 
    stop("negative weights not allowed")
  if (!is.null(offset)) {
    if (length(offset) == 1) 
      offset <- rep(offset, NROW(Y))
    else if (length(offset) != NROW(Y)) 
      stop(gettextf("number of offsets is %d should equal %d (number of observations)", length(offset), NROW(Y)), domain = NA)
  }
  mustart <- model.extract(mf, "mustart")
  etastart <- model.extract(mf, "etastart")

### check the wle.glm.control entries
  if (is.null(control$wle$group))
    control$wle$group <- max(round(NROW(Y)/2), NCOL(X)+1)
  if (control$wle$group > NROW(Y))
    stop("'group' parameter must be not greater than NROW(Y)")
  maxboot <- sum(log(1:NROW(Y)))-(sum(log(1:control$wle$group))+sum(log(1:(NROW(Y)-control$wle$group))))
  if (log(control$wle$boot) > maxboot)
    stop("Bootstrap replication not in the range")
  if (is.null(control$wle$smooth) & !(family$family=='poisson' | family$family=='binomial')) {
    stop("'smooth parameter is missing without default")
  } 
  tot.sol <- 0
  not.conv <- 0
  iboot <- 0
  fit.store <- list()
  while (tot.sol < control$wle$num.sol & iboot < control$wle$boot) {
    iboot <- iboot + 1
    index.boot <- sample(1:NROW(Y), size=control$wle$group, replace=FALSE)
    if (NCOL(X) > 1)
      X.boot <- X[index.boot,]
    else
      X.boot <- X[index.boot]
    if (NCOL(Y) > 1)
      Y.boot <- Y[index.boot,]
    else
      Y.boot <- Y[index.boot]
  # initial values

    fit.boot <- try(glm.fit(x=X.boot, y=Y.boot, weights = weights[index.boot], start = start, etastart = etastart[index.boot], mustart = mustart[index.boot], offset = offset, family = family, control = control$glm, intercept = attr(mt, "intercept") > 0))
    if (is.list(fit.boot) && !any(is.na(fit.boot$coefficients))) {
      fit.boot$x <- data.frame(X.boot)
      fit.boot$y <- Y.boot
      fit.boot$terms <- mt
      fit.boot$family <- family
      class(fit.boot) <- "glm"
      fitted.Y <- try(predict.glm(fit.boot, newdata = if(is.null(data)) data.frame(X) else data, type = "response"))      
      dispersion <- if (family$family %in% c("poisson", "binomial")) {
        est.disp <- FALSE
        1
      } else if (fit.boot$df.residual > 0) {
        est.disp <- TRUE
        if (any(fit.boot$weights == 0)) 
          warning("observations with zero weight not used for calculating dispersion")
        if (control$wle$mle.dispersion) {
          if (family$family=="Gamma") { 
            wle.gamma.shape.glm(y=Y.boot, mu=fit.boot$fitted.values, deviance=fit.boot$deviance, df.residual=fit.boot$df.residual, prior.weights=weights[index.boot], wle.weights=rep(1, length(Y.boot)), it.lim=control$glm$maxit, eps.max=control$glm$epsilon, verbose=control$wle$verbose)$dispersion
          } else if (family$family=="inverse.gaussian") {
            wle.inversegaussian.lambda.glm(y=Y.boot, mu=fit.boot$fitted.values, prior.weights=weights[index.boot], wle.weights=rep(1, length(Y.boot)))$dispersion
          } else {
             stop("'WLE' method is not available for the estimation of the dispersion parameter in this family, set control$wle$mle.dispersion=FALSE to use the weighted method of moments")
          }
        } else {
          sum((fit.boot$weights * fit.boot$residuals^2)[fit.boot$weights > 0])/fit.boot$df.residual
        }
      } else {
        est.disp <- FALSE
        1
      }
###      cat('At the boot time ',dispersion, '\n')
  # initial wle.weights
      wle.weights <- wle.glm.weights(Y, X, fitted.values=fitted.Y, family = family, dispersion=dispersion, raf=control$wle$raf, tau=control$wle$tau, smooth=control$wle$smooth, asy.smooth=control$wle$asy.smooth, window.size=control$wle$window.size, use.asymptotic=control$wle$use.asymptotic, use.smooth=control$wle$use.smooth, tol=control$wle$tol, dist.method=dist.method, cutpoint=control$wle$cutpoint, powerdown=control$wle$powerdown)$weights
    
      fit <- wle.glm.fit(x = X, y = Y, weights = weights, wle.weights=wle.weights, start = fit.boot$coefficients, etastart = etastart, mustart = fit.boot$fitted.values, offset = offset, family = family, control = control, intercept = attr(mt, "intercept") > 0, dispersion=dispersion)
      if (length(offset) && attr(mt, "intercept") > 0) {
        fit$null.deviance <- wle.glm.fit(x = X[, "(Intercept)", drop = FALSE], y = Y, weights = weights, wle.weights=wle.weights, offset = offset, family = family, control = control, intercept = TRUE, dispersion=dispersion)$deviance
      }
      if (fit$converged) {
        fit$wle.weights <- fit$wle.weights/max(fit$wle.weights)
        if (tot.sol==0) {
          fit.store[[1]] <- fit
          w.store <- matrix(fit$wle.weights, nrow=1)
          tot.sol <- 1
        } else {
#####        cat(min(apply(abs(w.store-matrix(rep(fit$wle.weights, NROW(w.store)), ncol=NROW(Y), byrow=TRUE)), 1, max)), '\n')
          if (min(apply(abs(w.store-matrix(rep(fit$wle.weights, NROW(w.store)), ncol=NROW(Y), byrow=TRUE)), 1, max))>control$wle$equal) {
            tot.sol <- tot.sol+1
            fit.store[[tot.sol]] <- fit
            w.store <- rbind(w.store,fit$wle.weights)
          }
        }
      } else not.conv <- not.conv + 1
    } else {
      not.conv <- not.conv + 1
    }
  }
  if (tot.sol==0) {
    fit.store[[1]] <- fit
  }
    names(fit.store) <- paste('root', 1:length(fit.store), sep='')
  if (model) 
    fit.store$model <- mf
  fit.store$na.action <- attr(mf, "na.action")
  if (x)
    fit.store$x <- X
  if (!y)
    fit.store$y <- NULL
  else
    fit.store$y <- Y
  fit <- c(fit.store, list(family=fit.store$root1$family, call = call, formula = formula, terms = mt, data = data, offset = offset, control = control, method = method, contrasts = attr(X, "contrasts"), xlevels = .getXlevels(mt, mf)), tot.sol=tot.sol, not.conv=not.conv)
  class(fit) <- "wle.glm"
  return(fit)
}

#############################################################
#                                                           #
#	wle.glm.fit function                                #
#	Author: Claudio Agostinelli                         #
#	E-mail: claudio@unive.it                            #
#	Date: December, 15, 2009                            #
#	Version: 0.2-1                                      #
#                                                           #
#	Copyright (C) 2009 Claudio Agostinelli              #
#                                                           #
#############################################################

wle.glm.fit <- function(x, y, weights = NULL, wle.weights = rep(1, NROW(y)), start = NULL, etastart = NULL,  mustart = NULL, offset = rep(0, NROW(y)), family = gaussian(), control = list(glm=glm.control(), wle=wle.glm.control()), dist.method='euclidean', intercept = TRUE, dispersion=NULL) {
  if (is.null(weights))
    weights <- rep.int(1, NROW(y))

  if (family$family=='binomial' & NCOL(y) == 1 & !all((y==0) | (y==1))) {
    if (any((weights-round(weights))!=0)) {
      stop("Please submit the response variable as two columns (first column successes, second column unsuccesses), use 'weights' only for weighting the observations not for the total")
    } else {
      y <- cbind(round(y*weights), weights-round(y*weights))      
      weights <- rep.int(1, NROW(y))
    }
  }
  if (!is.null(control$wle$use.asymptotic) && NCOL(y) > 1)
    stop("The use of asymptotic residuals for a two columns response variable is not implemented yet.")

  if (NCOL(y) > 1) {
    ny <- NROW(y)
    wle.weights <- as.vector(t(wle.weights))
    weights <- c(weights, weights)
    y01 <- rbind(cbind(y[,1], rep(0, ny)), cbind(rep(0, ny), y[,2])) 
    x01 <- rbind(x, x)    
  } else {
    if (is.null(control$wle$delta)) {
      y01 <- y
    } else {
      delta <- control$wle$delta
      pibar <- mean(y)
      pihat <-  max(delta, min(1-delta, pibar)) 
      delta0 <- pihat*delta/(1+delta)
      delta1 <- (1+pihat*delta)/(1+delta)
      y01 <- delta0+(delta1-delta0)*y
      ## if (verbose)
      ##   cat(y01, '\n')
    }
    x01 <- x
  }
  
  pesia <- wle.weights 
  pesib <- wle.weights+1+control$wle$tol
  iter <- 0
####    iterations over wle weights until convergence of the wle weights  
  while(any(abs(pesib-pesia) > control$wle$tol) & iter < control$wle$max.iter) {
    iter <- iter + 1
    pesib <- pesia
    res.glm.fit <- try(glm.fit(x=x01, y=y01, weights = pesia*weights, start = start, etastart = etastart,  mustart = mustart, offset = offset, family = family, control = control$glm, intercept = intercept))
    if (is.list(res.glm.fit)) {###FIX ME!!!!!
      nvars <- NCOL(x01)
      EMPTY <- nvars == 0
      n.ok <- sum(pesia) - sum(res.glm.fit$weights==0)
      rank <- if(EMPTY) 0 else res.glm.fit$qr$rank
      res.glm.fit$df.null <- n.ok - as.integer(intercept)
      res.glm.fit$df.residual <- n.ok - rank
###    cat('df.residual ', res.glm.fit$df.residual, '\n')
      dispersion <- if (family$family %in% c("poisson", "binomial")) {
         est.disp <- FALSE
         1
      } else if (res.glm.fit$df.residual > 0) {
        est.disp <- TRUE
        if (any(res.glm.fit$weights == 0)) 
          warning("observations with zero weight not used for calculating dispersion")
        if (control$wle$mle.dispersion) {
          if (family$family=="Gamma") {
            wle.gamma.shape.glm(y=y01, mu=res.glm.fit$fitted.values, deviance=res.glm.fit$deviance, df.residual=res.glm.fit$df.residual, prior.weights=weights, wle.weights=pesia, it.lim=control$glm$maxit, eps.max=control$glm$epsilon, verbose=control$wle$verbose, dispersion=dispersion)$dispersion
          } else if (family$family=="inverse.gaussian") {
            wle.inversegaussian.lambda.glm(y=y01, mu=res.glm.fit$fitted.values, prior.weights=weights, wle.weights=pesia)$dispersion
          } else {
            stop("'WLE' method is not available for the estimation of the dispersion parameter in this family, set control$wle$mle.dispersion=FALSE to use the weighted method of moments")
          }
        } else {
            sum((pesia * res.glm.fit$weights * res.glm.fit$residuals^2)[res.glm.fit$weights > 0])/res.glm.fit$df.residual
        }
      } else {
        est.disp <- FALSE
        1
      }
###    cat('In the iterations ',dispersion, '\n')
      res.weights <- wle.glm.weights(y=y, x=x, fitted.values=res.glm.fit$fitted.values, family=family, dispersion=dispersion, raf=control$wle$raf, tau=control$wle$tau, smooth=control$wle$smooth, asy.smooth=control$wle$asy.smooth, window.size=control$wle$window.size, use.asymptotic=control$wle$use.asymptotic, use.smooth=control$wle$use.smooth, tol=control$wle$tol, dist.method=dist.method, cutpoint=control$wle$cutpoint, powerdown=control$wle$powerdown)
####    plot(pesia, ylim=c(0,1))
      pesia <- res.weights$weights
    } else {
      iter <- control$wle$max.iter
      start <- NULL
    }
  }

  ## calculate df
  nvars <- NCOL(x)
  EMPTY <- nvars == 0
  nobs <- NROW(y)
  n.ok <- sum(pesia) - sum(res.glm.fit$weights==0)
  rank <- if(EMPTY) 0 else res.glm.fit$qr$rank
  res.glm.fit$df.null <- n.ok - as.integer(intercept)
  res.glm.fit$df.residual <- n.ok - rank
#  ## calculate AIC
#  aic <- family$aic
#  ## 
#  eval(family$initialize)
#  ### controllare passaggio dei parametri
#  res.glm.fit$aic <- aic(y, n, res.glm.fit$fitted.values, res.glm.fit$weights, res.glm.fit$deviance) + 2*rank
  
  if (iter == control$wle$max.iter) res.glm.fit$converged <- FALSE
  res.glm.fit$prior.weights <- weights
  res.glm.fit$wle.weights <- pesia
  res.glm.fit$wle.asymptotic <- res.weights$asymptotic
  res.glm.fit$dispersion <- dispersion
  return(res.glm.fit)
}

#############################################################
#                                                           #
#	wle.glm.weights function                            #
#	Author: Claudio Agostinelli                         #
#	E-mail: claudio@unive.it                            #
#	Date: May, 04, 2011                                 #
#	Version: 0.6-1                                      #
#                                                           #
#	Copyright (C) 2011 Claudio Agostinelli              #
#                                                           #
#############################################################

wle.glm.weights <- function(y, x, fitted.values, family = gaussian(), dispersion=1, raf='GKL', tau=0.1, smooth=NULL, asy.smooth=0.031, window.size=NULL, use.asymptotic=NULL, use.smooth=TRUE, tol=10^(-6), dist.method='euclidean', cutpoint=0, powerdown=1) {

  ## internal function for the evaluation of the weights given the Pearson residuals
  pesi <- function(x, raf, tau=0.1) {
    gkl <- function(x, tau) {
      if (tau!=0)
        x <- log(tau*x+1)/tau
      return(x)
    }
    pwd <- function(x, tau) {
      if(tau==Inf)
        x <- log(x+1)
      else
        x <- tau*((x + 1)^(1/tau) - 1)
      return(x)
    }
    x <- ifelse(x < 1e-10, 0, x)
    ww <- switch(raf,
       ##park+basu+2003.pdf
       GKL = gkl(x, tau),
       ##lindsay+1994.pdf                 
       PWD = pwd(x, tau),
       HD =  2*(sqrt(x + 1) - 1) ,
       NED =  2 - (2 + x)*exp(-x) ,
       SCHI2 =  1-(x^2/(x^2 +2)) )

    if (raf!='SCHI2') {
      ww <- (ww + 1)/(x + 1)
    }
    ww[ww > 1] <- 1
    ww[ww < 0] <- 0
    ww[is.infinite(x)] <- 0
    return(ww)
  }

  sottopesa <- function(x, cutpoint=0, powerdown=1)
    ifelse(x > cutpoint | x==0, x, x^((cutpoint/x)^powerdown))

  ## internal function to check duplicate in the 'x' matrix
  myunique <- function(x, window.size=NULL) {
  ## x: a distance matrix
    nx <- nrow(x)
    index0 <- index1 <- list()
    nodup <- rep(TRUE, nx)
    j <- 0
    while (any(nodup)) {
      j <- j + 1
      jj <- min((1:nx)[nodup])
      ind0 <- (1:nx)[x[jj,] <= .Machine$double.eps*2]
      if (!is.null(window.size))
        ind1 <- (1:nx)[x[jj,] <= window.size]
      else
        ind1 <- ind0
      nodup[ind0] <- FALSE
      index0[[j]] <- ind0
      index1[[j]] <- ind1
    }
    result <- list(index0, index1)
    return(result)
  }

########## BINOMIAL AND POISSON
  if (family$family=='binomial' | family$family=='poisson') {
    ny <- NROW(y)
    if (NCOL(y)==1)
      weights <- rep(0, ny)
    else
      weights <- matrix(0, nrow=ny, ncol=2)
    asy <- rep(FALSE, ny)
    dx <- as.matrix(dist(x, method=dist.method))
    ressplit <- myunique(dx, window.size=window.size)
    splitx <- ressplit[[1]]
    splitxx <- ressplit[[2]]
    nny <- sapply(splitx, length)
    nnny <- sapply(splitxx, length)
    if (!is.null(use.asymptotic) && any(nnny <= use.asymptotic)) {
      residall <- residualsAnscombe(y=y, mu=fitted.values, family=family)
##        cat(summary(residall), '\n')
##        cat(sd(residall), '\n')
##        cat(mad(residall), '\n')
      ff.den <- density(residall, bw=asy.smooth, na.rm=TRUE)
##    plot(ff.den)
      ff.smoothed <- approxfun(x=ff.den$x, y=ff.den$y, rule=2)
      ffall <- ff.smoothed(v=residall)
      mmall <- dnorm(residall, mean=0, sd=sqrt(1+asy.smooth))
      ddall <- ffall/mmall - 1    
      asy.weights <- pesi(x=ddall, raf=raf, tau=tau)

##      cat('maxww', maxww, '\n')
    }

    for (j in 1:length(splitx)) {
      if (is.null(use.asymptotic) || nnny[j] > use.asymptotic) {
        asymptotic <- FALSE
        if (family$family=='binomial') {
          if (NCOL(y) == 1) {
            ytemp <- y[splitx[[j]]]
            ff <- rep(1, nny[j])
            tff <- table(y[splitxx[[j]]])
            tff <- tff/sum(tff)
            if (length(tff)==2) {
              ff[ytemp==0] <- tff[1]
              ff[ytemp==1] <- tff[2]
            }
            mm <- dbinom(ytemp,size=1,prob=fitted.values[splitx[[j]]][1])
          } else {
            ytemp <- y[splitx[[j]], , drop=FALSE]
            tff <- apply(y[splitxx[[j]], , drop = FALSE], 2, sum)
            tff <- tff/sum(tff)
            ff <- matrix(tff, nrow=nny[j], ncol=2, byrow=TRUE)
            mm <- dbinom(c(1,0), size=1, prob=fitted.values[splitx[[j]]][1])
            mm <- matrix(mm, nrow=nny[j], ncol=2, byrow=TRUE)
          }
        } else if (family$family=='poisson') {
          ytemp <- y[splitx[[j]]]
          ff <- rep(0,nny[j])
          tff <- table(y[splitxx[[j]]])
          tff <- tff/sum(tff)
          nff <- as.numeric(names(tff))
          for (i in 1:nny[j]) {
            ff[i] <- tff[nff==ytemp[i]] 
          }
          mm <- dpois(ytemp,lambda=fitted.values[splitx[[j]]][1])
        }
#        else {
#          stop('family not implemented yet')
#        } 
      } else {
        ##### quasi--family
        asymptotic <- TRUE
        rtemp <- residall[splitx[[j]]]
        ff <- ff.smoothed(v=rtemp)
        mm <- dnorm(rtemp, mean=0, sd=sqrt(1+asy.smooth))
      }
      dd <- ff/mm - 1

#    cat('ff', ff, '\n')
#    cat('mm', mm, '\n')
#    cat('dd', dd, '\n')
#    cat('ww', ww, '\n')
      if (NCOL(y)==1) {
        ww <- pesi(x=dd, raf=raf, tau=tau)
        weights[splitx[[j]]] <- ww
        asy[splitx[[j]]] <- asymptotic
      } else {
        ww <- apply(dd, 2, function(x) pesi(x, raf=raf, tau=tau))
        weights[splitx[[j]],] <- ww
      }
    }
    if (sum(asy) < ny & sum(asy) >= ny/2) {
      weights[asy] <- quantile((weights[!asy]/asy.weights[!asy]), 0.75)*weights[asy]
    } else if (sum(asy) > 0 & sum(asy) < ny/2) {
      weights[!asy] <- quantile((weights[asy]/asy.weights[asy]), 0.75)*weights[!asy]
    }
    weights[weights > 1] <- 1
########## QUASIBINOMIAL AND QUASIPOISSON    
  } else if (family$family=='quasibinomial' | family$family=='quasipoisson') {
    resid <- (y - fitted.values)/sqrt(family$variance(fitted.values))
    ff.den <- density(resid, bw=asy.smooth, na.rm=TRUE)
    ff.smoothed <- approxfun(x=ff.den$x, y=ff.den$y, rule=2)
    ffall <- ff.smoothed(v=resid)
    mmall <- dnorm(resid, mean=0, sd=sqrt(1+asy.smooth))
    ddall <- ffall/mmall - 1
    weights <- pesi(x=ddall, raf=raf, tau=tau)
    asy <- rep(TRUE, length(weights))
########## GAMMA ####################################    
  } else if (family$family=='Gamma') {
    ny <- length(y)
    weights <- rep(0, ny)
    asy <- rep(FALSE, ny)
    dx <- as.matrix(dist(x, method=dist.method))
    ressplit <- myunique(dx, window.size=window.size)
    splitx <- ressplit[[1]]
    splitxx <- ressplit[[2]]
    nny <- sapply(splitx, length)
    nnny <- sapply(splitxx, length)
    if (!is.null(use.asymptotic) && any(nnny <= use.asymptotic)) {
      residall <- residualsAnscombe(y=y, mu=fitted.values, family=family)/sqrt(dispersion)
      residall <- residall - median(residall)
##        cat(summary(residall), '\n')
##        cat(sd(residall), '\n')
##        cat(mad(residall), '\n')
      ff.den <- density(residall, bw=asy.smooth, na.rm=TRUE)
##    plot(ff.den)
      ff.smoothed <- approxfun(x=ff.den$x, y=ff.den$y, rule=2)
      ffall <- ff.smoothed(v=residall)
      mmall <- dnorm(residall, mean=0, sd=sqrt(1+asy.smooth))
      ddall <- ffall/mmall - 1    
      asy.weights <- pesi(x=ddall, raf=raf, tau=tau)
##      asy.weights[is.na(asy.weights)] <- 0
##      cat('maxww', maxww, '\n')
    }

    for (j in 1:length(splitx)) {
      if (is.null(use.asymptotic) || nnny[j] > use.asymptotic) {
        asymptotic <- FALSE
        ytemp <- y[splitx[[j]]]
        xtemp <- y[splitxx[[j]]]
        ### phi dispersion parameter
        ### mu mean parameter
        shape <- 1/dispersion ### 1/phi
        rate <- 1/(fitted.values[splitx[[j]]][1]*dispersion) ### 1/(mu*phi)
        temp <- (fitted.values[splitx[[j]]][1])^2*dispersion ### temp <- shape/rate^2

###        cat('Shape ', shape, '\n')
###        cat('Rate ', rate, '\n')
###        cat('temp ', temp, '\n')
        dsup <- max(xtemp)+ 3*smooth*temp
###        cat('dsup ', dsup, '\n')       
        tol.int <- tol*10^(-4)
        z <- .Fortran("wlegamma",
	    as.double(xtemp),
            as.double(ytemp),
	    as.integer(nnny[j]),
	    as.integer(nny[j]),                     
	    as.integer(1), ### Here the RAF is fixed to Hellinger,
                           ### hence do not use the weights from this function
                           ### but use the function pesi from the Pearson
                           ### Residuals
            as.double(1.0),
	    as.double(smooth*temp),
            as.integer(1*use.smooth),
            as.double(dsup),
	    as.double(tol),
            as.double(tol.int),
	    as.double(rate),
	    as.double(shape),
	    ww=double(nny[j]),
	    ff=double(nny[j]),
	    mm=double(nny[j]),
            PACKAGE = "wle")
        ff <- z$ff
        mm <- z$mm
      } else {
        ##### quasi--family
        asymptotic <- TRUE
        rtemp <- residall[splitx[[j]]]
        ff <- ff.smoothed(v=rtemp)
        mm <- dnorm(rtemp, mean=0, sd=sqrt(1+asy.smooth))
      }
      dd <- ff/mm - 1

#    cat('ff', ff, '\n')
#    cat('mm', mm, '\n')
#    cat('dd', dd, '\n')
      ww <- pesi(x=dd, raf=raf, tau=tau)
#    cat('ww', ww, '\n')

      weights[splitx[[j]]] <- ww
      asy[splitx[[j]]] <- asymptotic
    }
    if (sum(asy) < ny & sum(asy) >= ny/2) {
      weights[asy] <- quantile((weights[!asy]/asy.weights[!asy]), 0.75)*weights[asy]
    } else if (sum(asy) > 0 & sum(asy) < ny/2) {
      weights[!asy] <- quantile((weights[asy]/asy.weights[asy]), 0.75)*weights[!asy]
    }
    weights[is.na(weights)] <- 0
    weights[weights > 1] <- 1
    weights <- sottopesa(weights, cutpoint=cutpoint, powerdown=powerdown)
########## INVERSE GAUSSIAN ####################################    
  } else if (family$family=='inverse.gaussian') {
    ny <- length(y)
    weights <- rep(0, ny)
    asy <- rep(FALSE, ny)
    dx <- as.matrix(dist(x, method=dist.method))
    ressplit <- myunique(dx, window.size=window.size)
    splitx <- ressplit[[1]]
    splitxx <- ressplit[[2]]
    nny <- sapply(splitx, length)
    nnny <- sapply(splitxx, length)
    if (!is.null(use.asymptotic) && any(nnny <= use.asymptotic)) {
      residall <- residualsAnscombe(y=y, mu=fitted.values, family=family)
      residall <- residall - median(residall)
##        cat(summary(residall), '\n')
##        cat(sd(residall), '\n')
##        cat(mad(residall), '\n')
      ff.den <- density(residall, bw=asy.smooth, na.rm=TRUE)
##    plot(ff.den)
      ff.smoothed <- approxfun(x=ff.den$x, y=ff.den$y, rule=2)
      ffall <- ff.smoothed(v=residall)
      mmall <- dnorm(residall, mean=0, sd=sqrt(1+asy.smooth))
      ddall <- ffall/mmall - 1
      asy.weights <- pesi(x=ddall, raf=raf, tau=tau)
##      asy.weights[is.na(asy.weights)] <- 0
##      cat('maxww', maxww, '\n')
    }

    for (j in 1:length(splitx)) {
      if (is.null(use.asymptotic) || nnny[j] > use.asymptotic) {
        asymptotic <- FALSE
        ytemp <- y[splitx[[j]]]
        xtemp <- y[splitxx[[j]]]
        ### mu mean parameter
        mu <- fitted.values[splitx[[j]]][1]
###        cat('Mu ', mu, '\n')
        ### dispersion is 1/lambda
        dsup <- max(xtemp)+ 3*smooth*dispersion
###        cat('dsup ', dsup, '\n')       
        tol.int <- tol*10^(-4)
        z <- .Fortran("wleinvga",
	    as.double(xtemp),
            as.double(ytemp),
	    as.integer(nnny[j]),
	    as.integer(nny[j]),                     
	    as.integer(1), ### Here the RAF is fixed to Hellinger,
                           ### hence do not use the weights from this function
                           ### but use the function pesi from the Pearson
                           ### Residuals
            as.double(1.0),
	    as.double(smooth*dispersion),
            as.integer(1*use.smooth),
            as.double(dsup),
	    as.double(tol),
            as.double(tol.int),
	    as.double(mu),
	    as.double(1/dispersion),
	    ww=double(nny[j]),
	    ff=double(nny[j]),
	    mm=double(nny[j]),
            PACKAGE = "wle")
        ff <- z$ff
        mm <- z$mm
      } else {
        ##### quasi--family
        asymptotic <- TRUE
        rtemp <- residall[splitx[[j]]]
        ff <- ff.smoothed(v=rtemp)
        mm <- dnorm(rtemp, mean=0, sd=sqrt(1+asy.smooth))
      }
      dd <- ff/mm - 1

#    cat('ff', ff, '\n')
#    cat('mm', mm, '\n')
#    cat('dd', dd, '\n')
      ww <- pesi(x=dd, raf=raf, tau=tau)
#    cat('ww', ww, '\n')

      weights[splitx[[j]]] <- ww
      asy[splitx[[j]]] <- asymptotic
    }
    if (sum(asy) < ny & sum(asy) >= ny/2) {
      weights[asy] <- quantile((weights[!asy]/asy.weights[!asy]), 0.75)*weights[asy]
    } else if (sum(asy) > 0 & sum(asy) < ny/2) {
      weights[!asy] <- quantile((weights[asy]/asy.weights[asy]), 0.75)*weights[!asy]
    }
    weights[is.na(weights)] <- 0
    weights[weights > 1] <- 1
    weights <- sottopesa(weights, cutpoint=cutpoint, powerdown=powerdown)
    weights[weights < 0.01] <- 0
#######################################################################
  } else 
    stop("Only family 'gaussian', 'Gamma', 'inverse.gaussian', 'binomial', 'poisson', 'quasibinomial' and 'quasipoisson' are implemented (for now)")
  ###cat('WLE weights ', weights, '\n')
  result <- list(weights=weights, asymptotic=asy, grouped=nny, grouped.windows=nny)
  return(result)
}   

#############################################################
#                                                           #
#	print.wle.glm function                              #
#	Author: Claudio Agostinelli                         #
#	E-mail: claudio@unive.it                            #
#	Date: February, 25, 2010                            #
#	Version: 0.1-2                                      #
#                                                           #
#	Copyright (C) 2010 Claudio Agostinelli              #
#                                                           #
#############################################################
## from print.glm in R 2.6.0

print.wle.glm <- function (x, digits = max(3, getOption("digits") - 3), ...) {
    cat("\nCall: ", deparse(x$call), "\n\n")
    if (x$tot.sol > 0) {
      for (i in 1:x$tot.sol) {
        cat("Root: ", i, "\n")
        y <- extractRoot(x, root=i)
        print.wle.glm.root(x=y, digits=digits, ...)
      }
    } else {
      cat('No converged solutions were found\n\n')
    }
    cat("\n")
    cat("\nNumber of solutions ", x$tot.sol, "\n")
    cat("\n")    
    invisible(x)
}

###############################################################
print.wle.glm.root <- function(x, digits = max(3, getOption("digits") - 3), ...) {
  cat("Coefficients")
  if (is.character(co <- x$contrasts)) 
    cat("  [contrasts: ", apply(cbind(names(co), co), 1, paste, collapse = "="), "]")
    cat(":\n")
    print.default(format(x$coefficients, digits = digits), 
    print.gap = 2, quote = FALSE)
  cat("\nThe following statistics are valid only if this model is the FULL model in a model selection procedure\n")
  cat("\nDegrees of Freedom:", x$df.null, "Total (i.e. Null); ", 
    x$df.residual, "Residual\n")
  if (nzchar(mess <- naprint(x$na.action))) 
    cat("  (", mess, ")\n", sep = "")
    cat("Null Deviance:\t   ", format(signif(x$null.deviance, 
      digits)), "\nResidual Deviance:", format(signif(x$deviance, 
      digits)), "\tAIC:", format(signif(x$aic, digits)), "\n")
    cat("Converged:\t   ", x$converged,"\n\n")
  invisible(x)
}

###############################################################
deviance.wle.glm <- function(object, root="all", ...) {
  if (root=="all")
    root <- 1:object$tot.sol
  root <- round(as.numeric(root))
  root <- root[root <= object$tot.sol & root > 0]
  if (!length(root))
    stop('not valid root numbers provided')
  dev <- rep(0, length(root))
  for (iroot in 1:length(root)) {
    dev[iroot] <- eval(parse(text=paste('object$root', root[iroot], '$deviance', sep='')))
  }
  names(dev) <- paste('root', root, sep='')
  return(dev)
}

## from effects.glm
effects.wle.glm <- function(object, root="all", ...) {
  if (root=="all")
    root <- 1:object$tot.sol
  root <- round(as.numeric(root))
  root <- root[root <= object$tot.sol & root > 0]
  if (length(root)==1) {
    eff <- eval(parse(text=paste('object$root', root, '$effects', sep='')))
  } else {
    eff <- vector(length=0)
    for (iroot in root) {
      eff <- rbind(eff, eval(parse(text=paste('object$root', iroot, '$effects', sep=''))))
    }
    rownames(eff) <- paste('root', root, sep='')
  }
  return(eff)
}

## from family.glm
family.wle.glm <- function(object, ...) object$family

## from model.frame.glm
model.frame.wle.glm <- function (formula, ...) {
    dots <- list(...)
    nargs <- dots[match(c("data", "na.action", "subset"), names(dots), 0L)]
    if (length(nargs) || is.null(formula$model)) {
        fcall <- formula$call
        fcall$method <- "model.frame"
        fcall[[1L]] <- as.name("wle.glm")
        fcall[names(nargs)] <- nargs
#       env <- environment(fcall$formula)  # always NULL
        env <- environment(formula$terms)
        if (is.null(env)) env <- parent.frame()
        eval(fcall, env)
    }
    else formula$model
}

fitted.wle.glm <- function(object, root="all", ...) {
  if (root=="all")
    root <- 1:object$tot.sol
  root <- round(as.numeric(root))
  root <- root[root <= object$tot.sol & root > 0]
  if (!length(root))
    stop('not valid root numbers provided')
  if (length(root)==1) {
    fitted.values <- eval(parse(text=paste('object$root', root, '$fitted.values', sep='')))
  } else {
    fitted.values <- vector(length=0)
    for (iroot in root) {
      fitted.values <- rbind(fitted.values, eval(parse(text=paste('object$root', iroot, '$fitted.values', sep=''))))
    }
    rownames(fitted.values) <- paste('root', root, sep='')
  }
  return(fitted.values)
}

coef.wle.glm <- function(object, root="all", ...) {
  if (root=="all")
    root <- 1:object$tot.sol
  root <- round(as.numeric(root))
  root <- root[root <= object$tot.sol & root > 0]
  if (!length(root))
    stop('not valid root numbers provided')
  if (length(root)==1) {
    coeff <- eval(parse(text=paste('object$root', root, '$coefficients', sep='')))
  } else {
    coeff <- vector(length=0)
    for (iroot in root) {
      coeff <- rbind(coeff, eval(parse(text=paste('object$root', iroot, '$coefficients', sep=''))))
    }
    rownames(coeff) <- paste('root', root, sep='')
  }
  return(coeff)
}

## from weights.glm
weights.wle.glm <- function(object, type = c("prior", "working", "wle"), root="all", ...) {
  type <- match.arg(type)
  type <- switch(type,
            prior = "prior.weights",
            working = "weights",
            wle = "wle.weights")  
  if (root=="all")
    root <- 1:object$tot.sol
  root <- round(as.numeric(root))
  root <- root[root <= object$tot.sol & root > 0]
  if (!length(root))
    stop('not valid root numbers provided')
  if (length(root)==1) {
    res <- eval(parse(text=paste('object$root', root, '$', type, sep='')))
  } else {
    res <- vector(length=0)
    for (iroot in root) {
      res <- rbind(res, eval(parse(text=paste('object$root', iroot, '$', type, sep=''))))
    }
    rownames(res) <- paste('root', root, sep='')
  }
    
  if(is.null(object$na.action)) res
  else naresid(object$na.action, res)
}

## from formula.glm
formula.wle.glm <- function(x, ...) {
    form <- x$formula
    if( !is.null(form) ) {
        form <- formula(x$terms) # has . expanded
        environment(form) <- environment(x$formula)
        form
    } else formula(x$terms)
}

## from residuals.glm
residuals.wle.glm <- function(object, type = c("deviance", "pearson", "working","response", "partial"), root="all", ...) {
  type <- match.arg(type)
  if (type == "partial") 
    .NotYetImplemented()
  res.internal <- function(type, y, dev.resids, variance, mu.eta, r, mu, wts, eta, df.res) {
    switch(type,
           deviance=,pearson=,response=
           if(is.null(y)) {
               y <-  mu + r * mu.eta(eta)
           })
    res <- switch(type,
                  deviance = if(df.res > 0) {
                      d.res <- sqrt(pmax(dev.resids(y, mu, wts),
 0))
                      ifelse(y > mu, d.res, -d.res)
                  } else rep.int(0, length(mu)),
                  pearson = (y-mu)*sqrt(wts)/sqrt(variance(mu)),
                  working = r,
                  response = y - mu,
                  partial = r
                  )
    if(!is.null(object$na.action))
        res <- naresid(object$na.action, res)
    if (type == "partial") ## need to avoid doing naresid() twice.
        .NotYetImplemented() ##res <- res+predict(object, type="terms")
    return(res)
  }

  y <- object$y
  dev.resids <- object$family$dev.resids
  variance <- object$family$variance
  mu.eta <- object$family$mu.eta

  if (root=="all")
    root <- 1:object$tot.sol
  root <- round(as.numeric(root))
  root <- root[root <= object$tot.sol & root > 0]
  if (!length(root))
    stop('not valid root numbers provided')
  if (length(root)==1) {
    r <- eval(parse(text=paste('object$root', root, '$residuals', sep='')))
    mu <- eval(parse(text=paste('object$root', root, '$fitted.values', sep='')))
    wts <- eval(parse(text=paste('object$root', root, '$prior.weights', sep='')))
    eta <- eval(parse(text=paste('object$root', root, '$linear.predictors', sep='')))
    df.res <- eval(parse(text=paste('object$root', root, '$df.residual', sep='')))
    res <- res.internal(type, y, dev.resids, variance, mu.eta, r, mu, wts, eta, df.res)
  } else {
    res <- vector(length=0)
    for (iroot in root) {
      r <- eval(parse(text=paste('object$root', iroot, '$residuals', sep='')))
      mu <- eval(parse(text=paste('object$root', iroot, '$fitted.values', sep='')))
      wts <- eval(parse(text=paste('object$root', iroot, '$prior.weights', sep='')))
      eta <- eval(parse(text=paste('object$root', iroot, '$linear.predictors', sep='')))
      df.res <- eval(parse(text=paste('object$root', iroot, '$df.residual', sep='')))
      res <- rbind(res, res.internal(type, y, dev.resids, variance, mu.eta, r, mu, wts, eta, df.res))
    }
    rownames(res) <- paste('root', root, sep='')
  }
  return(res)
}

#################################
summary.wle.glm <- function(object, root=1, dispersion = NULL, correlation = FALSE, symbolic.cor = FALSE, ...) {
  if (object$tot.sol>0) {
    root <- round(as.numeric(root))
    if (length(root)!=1)
      stop("'root' arguments must be a vector of length 1")
    if (root > object$tot.sol | root <= 0)
      stop("'root' must be between 1 and object$tot.sol")
    object <- extractRoot.wle.glm(object, root=root)
    res <- summary.wle.glm.root(object=object, dispersion=dispersion, correlation=correlation, symbolic.cor=symbolic.cor, ...)
    return(res)
  } else {
    cat('No converged solutions were found\n')
  }
}

## from summary.glm
summary.wle.glm.root <- function(object, dispersion = NULL, correlation = FALSE, symbolic.cor = FALSE, ...) {
    est.disp <- FALSE
    df.r <- object$df.residual
    if(is.null(dispersion))	# calculate dispersion if needed
	dispersion <-
	    if(object$family$family %in% c("poisson", "binomial"))  1
	    else if(df.r > 0) {
                est.disp <- TRUE
		if(any(object$weights==0))
		    warning("observations with zero weight not used for calculating dispersion")
                if (object$family$family=="Gamma" | object$family$family=="inverse.gaussian")
                  object$dispersion
                else 
  		  sum((object$wle.weights*object$weights*object$residuals^2)[object$weights > 0])/ df.r
	    } else {
                est.disp <- TRUE
                NaN
            }

    ## calculate scaled and unscaled covariance matrix

    aliased <- is.na(coef(object))  # used in print method
    p <- object$rank
    if (p > 0) {
        p1 <- 1L:p
        Qr <- object$qr
        ## WATCHIT! doesn't this rely on pivoting not permuting 1L:p? -- that's quaranteed
        coef.p <- object$coefficients[Qr$pivot[p1]]
        covmat.unscaled <- chol2inv(Qr$qr[p1,p1,drop=FALSE])
        dimnames(covmat.unscaled) <- list(names(coef.p),names(coef.p))
        covmat <- dispersion*covmat.unscaled
        var.cf <- diag(covmat)

        ## calculate coef table

        s.err <- sqrt(var.cf)
        tvalue <- coef.p/s.err

        dn <- c("Estimate", "Std. Error")
        if(!est.disp) { # known dispersion
            pvalue <- 2*pnorm(-abs(tvalue))
            coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
            dimnames(coef.table) <- list(names(coef.p),
                                         c(dn, "z value","Pr(>|z|)"))
        } else if(df.r > 0) {
            pvalue <- 2*pt(-abs(tvalue), df.r)
            coef.table <- cbind(coef.p, s.err, tvalue, pvalue)
            dimnames(coef.table) <- list(names(coef.p),
                                         c(dn, "t value","Pr(>|t|)"))
        } else { # df.r == 0
            coef.table <- cbind(coef.p, NaN, NaN, NaN)
            dimnames(coef.table) <- list(names(coef.p),
                                         c(dn, "t value","Pr(>|t|)"))
        }
        df.f <- NCOL(Qr$qr)
    } else {
        coef.table <- matrix(, 0L, 4L)
        dimnames(coef.table) <-
            list(NULL, c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
        covmat.unscaled <- covmat <- matrix(, 0L, 0L)
        df.f <- length(aliased)
    }
    ## return answer

    ## these need not all exist, e.g. na.action.
    keep <- match(c("call","terms","family","deviance", "aic",
		      "contrasts", "df.residual","null.deviance","df.null",
                      "iter", "na.action"), names(object), 0L)
    ans <- c(object[keep],
	     list(deviance.resid = residuals(object, type = "deviance"),
		  coefficients = coef.table,
                  aliased = aliased,
		  dispersion = dispersion,
		  df = c(object$rank, df.r, df.f),
		  cov.unscaled = covmat.unscaled,
		  cov.scaled = covmat))

    if(correlation && p > 0) {
	dd <- sqrt(diag(covmat.unscaled))
	ans$correlation <-
	    covmat.unscaled/outer(dd,dd)
	ans$symbolic.cor <- symbolic.cor
    }
    class(ans) <- "summary.wle.glm"
    return(ans)
}

# from print.summary.glm
print.summary.wle.glm <- function (x, digits = max(3, getOption("digits") - 3), symbolic.cor = x$symbolic.cor, signif.stars = getOption("show.signif.stars"), ...) {
    cat("\nCall:\n")
    cat(paste(deparse(x$call), sep="\n", collapse="\n"), "\n\n", sep="")
    cat("Deviance Residuals: \n")
    if(x$df.residual > 5) {
	x$deviance.resid <- quantile(x$deviance.resid,na.rm=TRUE)
	names(x$deviance.resid) <- c("Min", "1Q", "Median", "3Q", "Max")
    }
    print.default(x$deviance.resid, digits=digits, na.print = "",
                  print.gap = 2)

    if(length(x$aliased) == 0L) {
        cat("\nNo Coefficients\n")
    } else {
        ## df component added in 1.8.0
        ## partial matching problem here.
        df <- if ("df" %in% names(x)) x[["df"]] else NULL
        if (!is.null(df) && (nsingular <- df[3L] - df[1L]))
            cat("\nCoefficients: (", nsingular,
                " not defined because of singularities)\n", sep = "")
        else cat("\nCoefficients:\n")
        coefs <- x$coefficients
        if(!is.null(aliased <- x$aliased) && any(aliased)) {
            cn <- names(aliased)
            coefs <- matrix(NA, length(aliased), 4L,
                            dimnames=list(cn, colnames(coefs)))
            coefs[!aliased, ] <- x$coefficients
        }
        printCoefmat(coefs, digits=digits, signif.stars=signif.stars,
                     na.print="NA", ...)
    }
    ##
    cat("\n(Dispersion parameter for ", x$family$family,
	" family taken to be ", format(x$dispersion), ")\n\n","\nThe following statistics are valid only if this model is the FULL model in a model selection procedure\n",
	apply(cbind(paste(format(c("Null","Residual"), justify="right"),
                          "deviance:"),
		    format(unlist(x[c("null.deviance","deviance")]),
			   digits= max(5, digits+1)), " on",
		    format(unlist(x[c("df.null","df.residual")])),
		    " degrees of freedom\n"),
	      1L, paste, collapse=" "), sep="")
    if(nzchar(mess <- naprint(x$na.action))) cat("  (",mess, ")\n", sep="")
    cat("AIC: ", format(x$aic, digits= max(4, digits+1)),"\n\n",
	"Number of Fisher Scoring iterations: ", x$iter,
	"\n", sep="")

    correl <- x$correlation
    if(!is.null(correl)) {
# looks most sensible not to give NAs for undefined coefficients
#         if(!is.null(aliased) && any(aliased)) {
#             nc <- length(aliased)
#             correl <- matrix(NA, nc, nc, dimnames = list(cn, cn))
#             correl[!aliased, !aliased] <- x$correl
#         }
	p <- NCOL(correl)
	if(p > 1) {
	    cat("\nCorrelation of Coefficients:\n")
	    if(is.logical(symbolic.cor) && symbolic.cor) {# NULL < 1.7.0 objects
		print(symnum(correl, abbr.colnames = NULL))
	    } else {
		correl <- format(round(correl, 2), nsmall = 2, digits = digits)
		correl[!lower.tri(correl)] <- ""
		print(correl[-1, -p, drop=FALSE], quote = FALSE)
	    }
	}
    }
    cat("\n")
    invisible(x)
}

Try the wle package in your browser

Any scripts or data that you put into this service are public.

wle documentation built on May 29, 2017, 11:48 a.m.