R/overlap_fc.R

Defines functions separation_qc overlap_qc overlap_fc

Documented in overlap_fc overlap_qc separation_qc

#' A fraction check for overlap.
#'
#' Intended for data where one suspects overlap, this function checks for overlap on an ever growing subset of the data. It is a low-level function for a response vector y and a design matrix X. 
#'
#' The function samples a fraction of observations form the data and checks for overlap. If overlap exists in a subset, then overlap exists in the overal data (Sablica et al,. 2026). If no overlap is found, it takes a larger sample and checks again until all data are tested. If no overlap is found even for all data, it concludes there is separation.
#'
#' Since solving the exact linear program on the full data via \link{checkovl} can take a long time for large data, this check can be quicker in case of overlap (especially if the overlapping categories are not rare). However, if there is separation this function usually takes longer. 
#'
#' @param y outcome vector. 
#' @param X design matrix.
#' @param S structure vector matrix 
#' @param frac the fraction of the data to use for checking (uses n/frac data). Defaults to 10. If frac is below 1 or n, it uses frac=1. Using frac=1 is the same as using checkovl.  
#' @param verbose should progress be reported. Defaults to 'FALSE'.
#' @param rational should rational arithmetic be used?
#' @param model what model class is intended to be fitted? Can be any of "b" for binary, "bcl" for baseline-category link, "cl" for cumulative link, "acl" for adjacent-category link. "sl" for sequential link, "osm" for ordered stereotype model. If missing it defaults to cumulative link for ordinal y and baseline-category for everything else.
#' @param backend which backend to use for the linear program. Can be 'rcdd' (default and only option for rational=TRUE) or 'ROI'.
#' @param solver the solver to be used in the backend. Defaults to "DualSimplex" for "rcdd" and the first LP solver returned by `ROI_applicable_solver()` for "ROI".  
#' @return a Boolean; either 'TRUE' if we detect overlap or 'FALSE' if we do not (so the data show separation).
#'
#' @export
overlap_fc <- function(y, X, S, frac=10L, verbose=FALSE, rational=FALSE, model=c("b","bcl","cl","acl","sl","osm"), backend = c("rcdd", "ROI"), solver = NULL)
{
 backend <- .divorce_match_backend(backend)   
 if(missing(S)) {
 if(missing(model)) model <- NULL    
 n <- length(y)
 if(frac>n || frac < 1) frac <- 1
 i <- 1
 repeat{
 nco <- nc <- i*floor(n/frac)
 if(nc > n) nc <-n
 if (verbose>0) cat("Checking ",nc,"rows.","\n")
 ind <- sample(1:n,nc,replace=FALSE)
 ys <- y[ind]
 Xs <- X[ind,]
 olcheck <- FALSE
 if(isTRUE(all.equal(length(unique(ys)),length(unique(y))))) #we skip evaluation if not all categories are in the subsample
 {
     olcheck <- checkovl(y=ys, X=Xs, rational=rational, model=model, backend=backend, solver = solver)
 }
 if(isTRUE(olcheck)) break()
 if(nco >= n) break() 
 i <- i+1
 }
 return(olcheck)   
 } else {
 n <- dim(S)[1]
 #n.cat <- length(unique(y))
 if(frac>n || frac < 1) frac <- 1
 i <- 1
 repeat{
 nco <- nc <- i*floor(n/frac)
 if(nc > n) nc <-n
 if (verbose>0) cat("Checking ",nc,"rows.","\n")
 ind <- sample(1:n,nc,replace=FALSE)
 Ss <- S[ind,]
 olcheck <- checkovl(S=Ss, rational=rational, backend = backend, solver = solver)
 if(isTRUE(olcheck)) break()
 if(nco >= n) break() 
 i <- i+1
 }
 return(olcheck)   
 }
}
     

#' A quick check for overlap.
#'
#' @param y outcome vector. 
#' @param X design matrix.
#' @param S structure vector matrix
#' @param rational should rational arithmetic be used?
#' @param model what model class is intended to be fitted? Can be any of "b" for binary, "bcl" for baseline-category link, "cl" for cumulative link, "acl" for adjacent-category link. "sl" for sequential link, "osm" for ordered stereotype model. If missing it defaults to cumulative link for ordinal y and baseline-category for everything else.
#' @param backend which backend to use for the linear program. Can be 'rcdd' (default and only option for rational=TRUE) or 'ROI'.
#' @param solver the solver to be used in the backend. Defaults to "DualSimplex" for "rcdd" and the first LP solver returned by `ROI_applicable_solver()` for "ROI".   
#' @return a Boolean; either 'TRUE' if we detect overlap or 'FALSE' if we do not (so the data show separation).
#' 
#'
#' @export
overlap_qc <- function(y, X, S, rational=FALSE, model=c("b","bcl","cl","acl","sl","osm"), backend = c("rcdd", "ROI"), solver = NULL)
{
if(missing(S)) {
if(missing(model)) model <- NULL
return(!any(sepcols(y = y,X = X, rational = rational, model = model, backend = backend, solver = solver)$separated))
        } else {
return(!any(sepcols(S=S, rational = rational, backend = backend, solver = solver)$separated))    
        }
}


#' A quick check for separation.
#'
#' @param y outcome vector. 
#' @param X design matrix.
#' @param S structure vector matrix 
#' @param rational should rational arithmetic be used?
#' @param model what model class is intended to be fitted? Can be any of "b" for binary, "bcl" for baseline-category link, "cl" for cumulative link, "acl" for adjacent-category link. "sl" for sequential link, "osm" for ordered stereotype model. If missing it defaults to cumulative link for ordinal y and baseline-category for everything else.
#' @param backend which backend to use for the linear program. Can be 'rcdd' (default and only option for rational=TRUE) or 'ROI'.
#' @param solver the solver to be used in the backend. Defaults to "DualSimplex" for "rcdd" and the first LP solver returned by `ROI_applicable_solver()` for "ROI".   
#' @return a Boolean; either 'TRUE' if we detect overlap or 'FALSE' if we do not (so the data show separation).
#'
#' @export
separation_qc <- function(y, X, S, rational=FALSE,model=c("b","bcl","cl","acl","sl","osm"), backend = c("rcdd", "ROI"), solver = NULL)
{
if(missing(S)) {
if(missing(model)) model <- NULL
return(any(sepcols(y=y,X=X,rational=rational,model=model, backend = backend, solver=solver)$separated))
} else {
  return(any(sepcols(S=S,rational=rational, backend = backend, solver = solver)$separated))  
 }
}

Try the divoRce package in your browser

Any scripts or data that you put into this service are public.

divoRce documentation built on April 28, 2026, 3:01 p.m.