R/flipscores.R
In flipscores: Robust Score Testing in GLMs, by Sign-Flip Contributions

Documented in flipscores

#' @title Robust testing in GLMs, by sign-flipping score contributions
#'
#' @description Provides robust tests for testing in GLMs, by sign-flipping score contributions. The tests are often robust against overdispersion, heteroscedasticity and, in some cases, ignored nuisance variables.
#' @param score_type The type of score that is computed. It can be "orthogonalized", "effective" or "basic". 
#' Both "orthogonalized" and "effective" take into account the nuisance estimation and they provide the same
#' test statistic. In case of small samples "effective score" might have a slight anti-conservative behaviour. 
#' "orthogonalized effective score" gives a solution for this issue.
#' Note that in case of a big model matrix, the "orthogonalized" may take a long time.
#'
#' @param n_flips The number of random flips of the score contributions.
#' When \code{n_flips} is equal or larger than the maximum number of possible flips (i.e. n^2), all possible flips are performed.
#'
#' @param id a \code{vector} identifying the clustered observations. If \code{NULL} (default) observations are assumed to be independent. If \code{id} is not \code{NULL}, only \code{score_type=="effective"} is allowed, yet.
#' @param alternative It can be "greater", "less" or "two.sided" (default)
#' @param seed \code{NULL} by default. 
#' @param formula see \code{glm} function. It can also be a model (usually generated by a call to \code{glm}); in this case, any other glm-related parameter (e.g. \code{family, data, etc.}) are discarded, the function will make use of the ones used to generate the model.  
#' @param family see \code{glm} function.
#' @param data see \code{glm} function.
# @param model can be a model (e.g. \code{lm} or \code{glm}). In this case other parameters used to fit a \code{glm} 
# (i.e. \code{formula}, \code{family}, \code{data}, etc) are not considered.  It is \code{NULL} by default (i.e. not used). 
#' @param ... see \code{glm} function.
#' 
#'
#' @usage flipscores(formula, family, data, score_type, 
#' n_flips=5000, alternative ="two.sided", 
#' id = NULL, seed = NULL, ...)
#'
#' @return glm class object with sign-flip score test.
#' See also the related functions (\code{summary.flipscores}, \code{anova.flipscores}, \code{print.flipscores}). 
#'
#' @details \code{flipscores} borrow the same parameters from function \code{glm} (and \code{glm.nb}). See these helps for more details about parameters such as \code{formula},
#' \code{data}, \code{family}. Note: in order to use Negative Binomial family, \code{family} reference must have quotes (i.e. \code{family="negbinom"}). 
#'
#' @author Livio Finos, Riccardo De Santis, Vittorio Giatti, Jesse Hemerik and Jelle Goeman
#'
#' @seealso \code{\link{anova.flipscores}}, \code{\link{summary.flipscores}}, \code{\link[flip]{flip}}
#'
#' @name flipscores
#' 
#' @references "Robust testing in generalized linear models by sign-flipping score contributions" by J.Hemerik, J.Goeman and L.Finos.
#' 
#' @examples
#' set.seed(1)
#' dt=data.frame(X=rnorm(20),
#'    Z=factor(rep(LETTERS[1:3],length.out=20)))
#' dt$Y=rpois(n=20,lambda=exp(dt$Z=="C"))
#' mod=flipscores(Y~Z+X,data=dt,family="poisson")
#' summary(mod)
#' 
#' # Equivalent to:
#' model=glm(Y~Z+X,data=dt,family="poisson")
#' mod2=flipscores(model)
#' summary(mod2)
#' @export



flipscores<-function(formula, family, data,
                     score_type = "standardized",
                     n_flips=5000, 
                     alternative ="two.sided", 
                     id = NULL,
                     seed=NULL,
                     ...){
  # if(FALSE) flip() #just a trick to avoid warnings in package building
  # temp=is(formula) #just a trick to avoid warnings in package building
  # catturo la call,
  fs_call <- mf <- match.call()
  
  score_type=match.arg(score_type,c("orthogonalized","standardized","effective","basic"))
  if(missing(score_type))
    stop("test type is not specified or recognized")
  
  
  # individuo i parametri specifici di flip score
  m <- match(c("score_type","n_flips","alternative","id","seed"), names(mf), 0L)
  m <- m[m>0]
  flip_param_call= mf[c(1L,m)]
  
  # rinomino la funzione da chiamare:
  flip_param_call[[1L]]=.flip_test#quote(flipscores:::.flip_test) #
  
  flip_param_call$n_flips <- eval(flip_param_call$n_flips, parent.frame())
  flip_param_call$score_type <- eval(flip_param_call$score_type, parent.frame()) 
  if(is.null(flip_param_call$score_type)) flip_param_call$score_type = "standardized"
  flip_param_call$seed <- eval(flip_param_call$seed, parent.frame())
  
  m2 <- match(c("to_be_tested"), names(mf), 0L)
  if(m2==0)
    to_be_tested=NULL
  else{
    m <- c(m,m2)
    to_be_tested=mf[[m2]]
  }
  
  #####check id not null only with effective score:
  if(!is.null(flip_param_call$id)&&(score_type=="orthogonalized")){
    print(warning("WARNING: Use of id is allowed only with score_type=='effective', yet. 
 Nothing done."))
    return(NULL)
  }
  
  # mi tengo solo quelli buoni per glm
  if(length(m)>0) mf <- mf[-m] 
  
  model = eval(mf$formula,parent.frame()) # most of the time it is not a model
  if("formula"%in%is(model)){ # usual input
    # if("model"%in%names(mf)){
    #   #compute H1 model
    #   model <- eval(mf$model, parent.frame())
    #   if(is.null(model[["x"]])){
    #     param_x_ORIGINAL=FALSE
    #     model=update(model,x=TRUE)
    #     } else param_x_ORIGINAL=TRUE
    #   
    # } else { #fit the glm or negbinom model
    #set the model to fit
    if(!is.null(mf$family)&&(mf$family=="negbinom")){
      mf[[1L]]=quote(glm.nb)
      mf$family=NULL
    } else{
      mf[[1L]]=quote(glm)
    }
    
    #compute H1 model
    param_x_ORIGINAL=mf$x
    mf$x=TRUE
    model <- eval(mf, parent.frame())
  } else { # input is a model
    param_x_ORIGINAL <- TRUE
    model <- update(model,x=TRUE,family=model$family)
  }
  
  if(is.null(model$y)) model$y=model$model[,1]
  
  #compute H0s models
  if(is.null(to_be_tested))
    to_be_tested=colnames(model[["x"]]) else
      to_be_tested=eval(to_be_tested,parent.frame())
  
  if(is.null(flip_param_call$seed)) flip_param_call$seed=Sys.time() #eval(.Random.seed[1], envir=.GlobalEnv)
  results=lapply(to_be_tested,socket_compute_scores_and_flip,
                 model,
                 flip_param_call=flip_param_call#score_type=score_type,
                 # id=eval(flip_param_call$id, parent.frame()),
                 # alternative=flip_param_call$alternative,
                 # n_flips=flip_param_call$n_flips,
                 # seed=flip_param_call$seed
  )
  model$scores=data.frame(lapply(results,function(x)x[[1]]$scores))
  nrm=sapply(results,function(x)attributes(x[[1]]$scores)$scale_objects$nrm)
  model$Tspace=data.frame(lapply(results,function(x)x[[1]]$Tspace)) 
  model$p.values=sapply(results,function(x)x[[1]]$p.values)
  names(nrm) <- names(model$scores)<- names(model$Tspace) <- names(model$p.values) <-to_be_tested
  attr(model$scores,"nrm")=nrm
  
  ### output
  model$call=fs_call
  model$flip_param_call=flip_param_call
  # model$id=flip_param_call$id
  model$score_type=score_type
  model$n_flips=n_flips
  
  
  if(is.null(param_x_ORIGINAL)||(!param_x_ORIGINAL)) model$x=NULL
  class(model) <- c("flipscores", class(model))
  return(model)
}