R/gof_functions.R

Defines functions ZeroTest DispersionTest GOF_check getQnorm getEnvelope var_res_2 var_res rqr rqr_zinb2 rqr_zinb1 rqr_zip rqr_nb2 rqr_nb1 rqr_pois Pres_ZINB2 Pres_ZINB1 Pres_ZIP

Documented in DispersionTest GOF_check rqr rqr_nb1 rqr_nb2 rqr_pois rqr_zinb1 rqr_zinb2 rqr_zip ZeroTest

Pres_ZIP<-function(y,mu,p)  (y-mu)/sqrt(mu*(1+p*mu/(1-p)))
Pres_ZINB1<-function(y,mu,p,r)  (y-mu)/sqrt(mu*(1+r+p*mu/(1-p)))
Pres_ZINB2<-function(y,mu,p,r) (y-mu)/sqrt(mu+(r*(mu^2)/(1-p))+((mu/(1-p))^2)*(p^2+p))


#' RQR for Poisson GLMM
#' Computes the randomized quantile residuals for Poisson GLMM
#'
#' @keywords internal
#'
#' @param x An object of clas *iccc*.
#' @return A vector with the residuals.
rqr_pois<-function(x){
  mu<-predict(x$model,type="response")
  y<-x$model$frame$y
  rqr<-qnorm(runif(length(y),ppois(y-1,mu),ppois(y,mu)))
  rqr<-ifelse(rqr=="Inf",5,rqr)
  rqr<-ifelse(rqr=="-Inf",-5,rqr)
  return(rqr)
}


#' RQR for NegBin1 GLMM
#' Computes the randomized quantile residuals for NegBin1 GLMM
#'
#' @keywords internal
#' @param x An object of clas *iccc*.
#' @return A vector with the residuals.
rqr_nb1<-function(x){
  mu<-predict(x$model,type="response")
  y<-x$model$frame$y
  x$varcomp
  r<-as.numeric(x$varcomp[3])
  rqr<-qnorm(runif(length(y),pnbinom(y-1,mu=mu,size=mu/r),pnbinom(y,mu=mu,size=mu/r)))
  rqr<-ifelse(rqr=="Inf",5,rqr)
  rqr<-ifelse(rqr=="-Inf",-5,rqr)
  return(rqr)
}

#' RQR for NegBin2 GLMM
#' Computes the randomized quantile residuals for NegBin2 GLMM
#'
#' @keywords internal
#' @param x An object of clas *iccc*.
#' @return A vector with the residuals.
rqr_nb2<-function(x){
  mu<-predict(x$model,type="response")
  y<-x$model$frame$y
  r<-as.numeric(x$varcomp[3])
  rqr<-qnorm(runif(length(y),pnbinom(y-1,mu=mu,size=1/r),pnbinom(y,mu=mu,size=1/r)))
  rqr<-ifelse(rqr=="Inf",5,rqr)
  rqr<-ifelse(rqr=="-Inf",-5,rqr)
  return(rqr)
}

#' RQR for ZIP GLMM
#' Computes the randomized quantile residuals for ZIP GLMM
#' @keywords internal
#' @param x An object of clas *iccc*.
#' @return A vector with the residuals.
rqr_zip<-function(x){
  mu<-predict(x$model,type="response")
  y<-x$model$frame$y
  p<-as.numeric(x$varcomp[3])
  rqr<-qnorm(runif(length(y),pzipois(y-1,lambda=mu,pstr0=p),pzipois(y,lambda=mu,pstr0=p)))
  rqr<-ifelse(rqr=="Inf",5,rqr)
  rqr<-ifelse(rqr=="-Inf",-5,rqr)
  return(rqr)
}

#' RQR for ZINB1 GLMM
#' Computes the randomized quantile residuals for ZINB1 GLMM
#' @keywords internal
#' @param x An object of clas *iccc*.
#' @return A vector with the residuals.
rqr_zinb1<-function(x){
  mu<-predict(x$model,type="response")
  y<-x$model$frame$y
  r<-as.numeric(x$varcomp[3])
  p<-as.numeric(x$varcomp[4])
  rqr<-qnorm(runif(length(y),pzinegbin(y-1,munb=mu,size=mu/r),pzinegbin(y,munb=mu,size=mu/r)))
  rqr<-ifelse(rqr=="Inf",5,rqr)
  rqr<-ifelse(rqr=="-Inf",-5,rqr)
  return(rqr)
}

#' RQR for ZINB2 GLMM
#' Computes the randomized quantile residuals for ZINB2 GLMM
#' @keywords internal
#' @param x An object of clas *iccc*.
#' @return A vector with the residuals.

rqr_zinb2<-function(x){
  mu<-predict(x$model,type="response")
  y<-x$model$frame$y
  r<-as.numeric(x$varcomp[3])
  p<-as.numeric(x$varcomp[4])
  rqr<-qnorm(runif(length(y),pzinegbin(y-1,munb=mu,size=1/r),pzinegbin(y,munb=mu,size=1/r)))
  rqr<-ifelse(rqr=="Inf",5,rqr)
  rqr<-ifelse(rqr=="-Inf",-5,rqr)
  return(rqr)
}


#' RQR for GLMM
#' Computes the randomized quantile residuals for GLMM
#' @keywords internal
#' @param x An object of clas *iccc*.
#' @details Randomized quantile residuals (RQR) are computed for GLMMM. The within-cluster families considered
#' are Poisson, Negative Binomial with additive and proportional extra-dispersion and their zero-inflated extensions.
#' For further details on RQR see Dunn and Smyth (1996) and Feng et al (2020)
#' @return A vector with the residuals.
#' @references {
#'
#' Dunn PK, Smyth GK. (1996). Randomized quantile residuals. J Comput Graph Stat.5(3):236–44.
#'
#' Feng et al. (2020). A comparison of residual diagnosis tools for diagnosing regression models for count data. BMC Medical Research Methodology 20:175
#' }
rqr<-function(x){
  fam<-x$model$modelInfo$family$family

  zi<-x$model$modelInfo$allForm$ziformula

  if (zi!=~1){
    if (fam=="poisson") res<-rqr_pois(x)
    if (fam=="nbinom1") res<-rqr_nb1(x)
    if (fam=="nbinom2") res<-rqr_nb2(x)
  }

  if (zi==~1){
    if (fam=="poisson") res<-rqr_zip(x)
    if (fam=="nbinom1") res<-rqr_zinb1(x)
    if (fam=="nbinom2") res<-rqr_zinb2(x)
  }

    return(res)
}


var_res <- function(x) {
  n<-nrow(x$model$frame)
  ns<-length(unique(x$model$frame$id))
  p<-length(x$model$fit$par)
  rdf<-n-ns-p
  res<-rqr(x)
  var.res<-sum(res^2)/rdf
  out<-matrix(var.res,nrow=1)
  colnames(out)<-c("Residual Variance")
  return(out)
}

var_res_2 <- function(x,df) {
  var.res<-sum(x^2)/df
  out<-matrix(var.res,nrow=1)
  colnames(out)<-c("Residual Variance")
  return(out)
}


getEnvelope <- function(model.object,nsim=100){

  zi<-model.object$modelInfo$allForm$ziformula
  if (zi!=~1)  fam<-model.object$modelInfo$family$family
  if (zi==~1){
    fam2<-model.object$modelInfo$family$family
    if (fam2 == "poisson") fam<-"zip"
    if (fam2 == "nbinom1") fam<-"zinb1"
    if (fam2 == "nbinom2") fam<-"zinb2"
  }


  if (is.null(model.object$frame$met) ) {
    sim.data<-cbind(simulate(model.object,nsim=nsim), id=model.object$frame$id)

    message("Simulating...","\n")

    resid.matrix<-sapply(1:nsim,function(i){

      if (i %in% seq(10,nsim,10))  message(paste(c(i,"..."),sep=""),appendLF=F)
      sim.data$y<-sim.data[,i]
      est<-invisible(icc_counts(sim.data,y="y",id="id",type=c("rep"),fam=fam))
      sort(rqr(est))
    }
    )

  }

  if (is.null(model.object$frame$met)==F ) {
    sim.data<-cbind(simulate(model.object,nsim=nsim), id=model.object$frame$id, met=model.object$frame$met)

    message("Simulating...","\n")

    resid.matrix<-sapply(1:nsim,function(i){
      if (i %in% seq(10,nsim,10))  message(paste(c(i,"..."),sep=""),appendLF=F)
      sim.data$y<-sim.data[,i]
      est<-invisible(icc_counts(sim.data,y="y",id="id",met="met",type=c("con"),
                                fam=fam))
      sort(rqr(est))
    }
    )

  }


  return(list(simdata=sim.data,resid.matrix=resid.matrix))

}



getQnorm <- function(y){

  n  <- length(y)
  qu <- rep(NA, n)

  for(i in 1:n){

    qu[i] <- sum(y <= y[i])/n

  }

  qh <- qnorm(p = qu)

  return(qh)

}


#' Goodness of fit for GLMM
#'
#' Assessment of goodness of fit for GLMM
#'
#' @param x An object of clas *iccc*.
#' @param nsim Number of simulations to run. Default is set to 100.
#' @param alpha Level of significance
#' @details Randomized quantile residuals are computed for the fitted model. Simulations based on the fitted model are generated
#' and the model is refitted to each simulated dataset. Envelopes for RQR are built as the appropriate quantile (in relation to the level fo significance) of RQR from
#' the refitted models. Additionally, a test for dispersion and zero inflation are carried out by comparing the RQR dispersion and the
#' number of zeros from the original model and data to those from the refitted models and simulated data.
#' @export
#' @return An object of class *GOF* for which method *plot* is available. A list with the following components:
#' \itemize{
#' \item *plot_env*. Plot of RQR envelopes with the original RQR.
#' \item *plot_var*. Plot of the simulated RQR dispersion.
#' \item *plot_zi*. Plot of the count of zeros in the simulated datasets.
#' \item *res_var*. Dispersion of RQR from the original sample.
#' \item *pval_var*. Proportion of simulated RQR dispersion that are greater than the original dispersion that can be interpreted as a simulated P-value to check the goodness of fit on dispersion.
#' \item *zero_count*. Count of zeros in the original sample.
#' \item *pval_zi*. Proportion of simulated zero count that are greater than that of the original sample. It can be interpreted as a simulated P-value to check the hypothesis of zero-inflation.
#' }
#' @seealso [plot.GOF()], [DispersionTest()],[ZeroTest()]
#'
#' @examples
#' \donttest{
#' # Poisson model. Repeatability setting.
#' iccpois<-icc_counts(EPP,y="Social",id="id")
#' GOF_check(iccpois)
#' # Zero-inflated Poisson model. Repeatability setting
#' icczip<-icc_counts(EPP,y="Social",id="id",fam="zip")
#' GOF_check(icczip)
#' }
GOF_check<-function(x,nsim=100,alpha=0.05){
  Freq<-NULL
  sim_out<-getEnvelope(x$model,nsim=nsim)

  mat<-sim_out$resid.matrix

  envelope.df <- data.frame(
    min = apply(mat, 1, FUN = quantile,probs=alpha,type=6),
    max = apply(mat, 1, FUN = quantile,probs=1-alpha,type=6),
    mean = apply(mat, 1, FUN = mean))

  res<-sort(rqr(x))
  theo<-getQnorm(res)
  env<-as.data.frame(cbind(envelope.df,res,theo))

  zi<-x$model$modelInfo$allForm$ziformula
  if (zi!=~1)  plot_tit<-x$model$modelInfo$family$family
  if (zi==~1)  plot_tit<-paste("Zero Inflated",x$model$modelInfo$family$family)

  g1<-ggplot(data=env, aes(x=theo,y=res))+geom_point(shape = "+")+
    geom_line(aes(x=theo,y=mean),color="red",lty=2)+
    geom_line(aes(x=theo,y=min),color="black")+
    geom_line(aes(x=theo,y=max),color="black")+
    theme_bw()+labs(x = "Theoretical quantiles",
                    y = "Sample quantiles",
                    title = paste(plot_tit,"model"))

  # Dispersion test

  n<-nrow(x$model$frame)
  ns<-length(unique(x$model$frame$id))
  p<-length(x$model$fit$par)
  rdf<-n-ns-p
  act_var=as.numeric(var_res(x))
  out_var_res<-data.frame(theo=apply(sim_out$resid.matrix,2,var_res_2,df=rdf))
  pval_var<-(sum(act_var<out_var_res$theo)+1)/(nrow(out_var_res)+1)


  g2<-ggplot(out_var_res,aes(x=theo, label=paste("Sample dispersion \n",round(act_var,2) ) ) ) +
    geom_density()+
    labs(y = "Density",x = "Simulated Residual Dispersion",
         title = paste(plot_tit,"model"))+theme_bw()

  limy<-layer_scales(g2)$y$range$range
  limx<-layer_scales(g2)$x$range$range

  g2 <- g2 + geom_label(aes(x=limx[1]+0.2*diff(limx),y=limy[1]+0.05*diff(limy)),size=3)

  #ZI test
  obs_c<-count_zero(x$model$frame$y)
  y_sim<-sim_out$simdata %>% select(starts_with("sim"))
  c0<-apply(y_sim,2,count_zero)
  pval_zi<-(sum(obs_c<=c0)+1)/(length(c0)+1)
  c0<-factor(c0,levels=min(c0):max(c0))
  z<-prop.table(table(c0))


  lab.g3<-paste("Zeros in sample:",obs_c )
  g3<-ggplot(as.data.frame(z), aes(c0, Freq, label = lab.g3)) + geom_bar(stat="identity") +
    xlab("Simulated Zero Count") + labs(y="Proportion",
                                        title = paste(plot_tit,"model")) +
    theme_bw()

  g3 <- g3 + geom_label(aes(x=levels(c0)[1],y=0.1),size=3,hjust=-0.5)


  out<-list(plot_env=g1,plot_var=g2,res_var=act_var,pval_var=pval_var,
            plot_zi=g3,zero_count=obs_c,pval_zi=pval_zi)

  class(out)<-"GOF"
  return(out)
}

#' Dispersion test for GLMM
#'
#' @param x An object of class *GOF* generated by *GOF_check* function.
#' @details The function prints the dispersion of sample randomized quantile residuals (RQR) and the simulated P-value.
#' @export
#' @return A vector with the sample RQR dispersion and the P-value.
#'
#' @examples
#' \donttest{
#' # Poisson model. Repeatability setting.
#' iccpois<-icc_counts(EPP,y="Social",id="id")
#' iccpois.gof<-GOF_check(iccpois)
#' DispersionTest(iccpois.gof)
#' }
#' @seealso [GOF_check()]
DispersionTest<-function(x){
  out<-data.frame(S=x$res_var,P_value=x$pval_var,row.names="")
  return(out)
}

#' Zero-Inflation test for GLMM
#'
#' @param x An object of class *GOF* generated by *GOF_check* function.
#' @details The function prints the count of zeros in the sample and the simulated P-value.
#' @return A vector with the zero count and the P-value.
#' @export
#' @examples
#' \donttest{
#' # Poisson model. Repeatability setting.
#' iccpois<-icc_counts(EPP,y="Social",id="id")
#' iccpois.gof<-GOF_check(iccpois)
#' ZeroTest(iccpois.gof)
#' # Zero-inflated Poisson model. Repeatability setting
#' icczip<-icc_counts(EPP,y="Social",id="id",fam="zip")
#' icczip.gof<-GOF_check(icczip)
#' ZeroTest(icczip.gof)
#' }
#' @seealso [GOF_check()]
ZeroTest<-function(x){
  out<-data.frame(Count=x$zero_count,P_value=x$pval_zi,row.names="")
  return(out)
}

Try the iccCounts package in your browser

Any scripts or data that you put into this service are public.

iccCounts documentation built on May 29, 2024, 7:57 a.m.