R/tcl_datcheck_full.R
In tcl: Testing in Conditional Likelihood Context

# adapted from eRm function datcheck.R
# addded AK 20-02-022

# datcheck <- function(X, W, mpoints, groupvec, model){

tcl_datcheck_full <- function(X,model){
  mpoints <- 1 # addded AK 20-02-2022

  if(is.data.frame(X)){
    X <- as.matrix(X)   # X as data frame allowed
  }

  if(is.null(colnames(X))){                                 #determine item names
    if(mpoints > 1){
      mpind <- paste("t",rep(1:mpoints,each=(ncol(X)/mpoints),1),sep="") #time points
      itemind <- paste("I",1:(ncol(X)/mpoints),sep="")
      colnames(X) <- paste(itemind,mpind)
    } else {
      colnames(X) <- paste("I",1:ncol(X),sep="")                         #item labels
    }
  }
  if(is.null(rownames(X))) rownames(X) <- paste0("P", seq_len(nrow(X)))   #person labels

  ### added AK 21.02.2022

  if (model == "RM") {

    func <- function(X) suppressWarnings("Rm" %in% class(try(eRm::RM(X), silent = TRUE)))

    #### Check for ill-conditioned data matrix X  in full model ######
    XWcheck <- !func(X)

    # if (XWcheck$ill_conditioned == TRUE ){
    if (XWcheck){
      warning(paste0(
        # stop(paste0(
        "\n",
        "\n",
        # prettyPaste("Estimation stopped due to ill-conditioned data matrix X! Suspicious items in full model):"),
        prettyPaste("No test statistic computable. Model not identified from the data."),
        "\n",
        "\n"
        # paste("No Estimation not possibke in full model!", collapse=" ")
      ),
      call. = FALSE, immediate.=TRUE)
      return(list(X = NA, X_original=NA, del_pos=NA, Xcheck="none"))
    } # end if
  }


  # #----------------------- check groupvec --------------------------
  #
  # if((length(groupvec) > 1L) && (length(groupvec) != nrow(X))){
  #   stop("Wrong specification of groupvec!")
  # }
  #
  # if(min(groupvec) != 1L){
  #   stop("Group specification must start with 1!")
  # }
  #
  # if(length(unique(groupvec)) != (max(groupvec))){
  #   stop("Group vector is incorrectly specified (perhaps a category is missing)!")   # rh 2011-03-03
  # }
  #
  # if((max(groupvec) > 1L) && (mpoints == 1)){
  #   stop(paste0("\n", prettyPaste("Model not identifiable! Group contrasts can only be imposed for repeated measurement designs.")))
  # }
  #
  # #  if ((length(groupvec) > 1) && any(is.na(X))) {
  # #    stop("Model with repeated measures, group specification and NAs cannot be computed!") }
  #
  # #----------------------- check X --------------------------------
  allna.vec <- apply(X,2,function(y) {all(is.na(y))})                 #eliminate items with all NA's
  if (any(allna.vec)) {stop("There are items with full NA responses which must be deleted!")}

  allna.vec <- apply(X,1,function(y) {all(is.na(y))})                 #eliminate items with all NA's
  if (any(allna.vec)) {stop("There are persons with full NA responses which must be deleted!")}

  allna.vec <- apply(X,1,function(y) {sum(is.na(y))})
  if (any(allna.vec == (ncol(X)-1L))) {stop("Subjects with only 1 valid response must be removed!")}

  ri.min <- apply(X,2,min,na.rm=TRUE)                                 #if no 0 responses
  if(any(ri.min > 0)){
    warning(paste0(
      "\n",
      prettyPaste("The following items have no 0-responses:"),
      "\n",
      paste(colnames(X)[ri.min > 0], collapse=" "),
      "\n",
      prettyPaste("Responses are shifted such that lowest category is 0.")
    ), call. = FALSE, immediate.=TRUE)
  }
  X <- t(apply(X,1,function(y) {y-ri.min}))                           #shift down to 0

  ri <- apply(X,2,sum,na.rm=TRUE)                                     #item raw scores
  n.NA <- colSums(apply(X,2,is.na))                                   #number of NA's per column
  maxri <- (dim(X)[1]*(apply(X,2,max,na.rm=TRUE)))-n.NA               #maximum item raw scores with NA
  TFcol <- ((ri==maxri) | (ri==0))
  X.n <- X[,!TFcol]                                                   #new matrix with excluded items
  item.ex <- (seq_len(ncol(X)))[TFcol]                                     #excluded items
  if(length(item.ex) > 0) {
    if(mpoints == 1){
      warning(paste0(
        "\n",
        # prettyPaste("The following items were excluded due to complete 0/full responses:"),
        prettyPaste("The following items were excluded for the computation of RS test due to complete 0/full responses:"),
        "\n",
        paste(colnames(X)[item.ex], collapse=" ")
      ), call. = FALSE, immediate.=TRUE)
    } else {
      stop(paste0(
        "\n",
        "The following items show complete 0/full responses:",
        "\n",
        paste(colnames(X)[item.ex], collapse=" "),
        "\n",
        prettyPaste("Estimation cannot be performed! Delete the corresponding items for the other measurement points as well!")
      ), call. = FALSE)
    }
  }

  if ((model=="PCM") || (model=="LPCM")) {                         #check if there are missing categories for PCM (for RSM doesn't matter)
    tablist <- apply(X,2,function(x) list(as.vector(table(x))))
    tablen <- sapply(tablist,function(x) length(x[[1]]))
    xmax <- apply(X,2,max)+1
    indwrong <- which(tablen != xmax)
    if(length(indwrong) > 0){
      warning(paste0(
        "\n",
        # prettyPaste("The following items do not have responses on each category:"),
        prettyPaste("The following items do not have responses on each category in the full model:"),
        "\n",
        paste(colnames(X)[indwrong], collapse=" "),
        "\n",
        prettyPaste("Estimation may not be feasible. Please check data matrix!")
      ), call. = FALSE, immediate.=TRUE)
    }
  }


  # #-------------------------- ill conditioned for RM and LLTM --------------
  # if ((model=="RM") || (model=="LLTM")) {
  #   if (length(table(X.n)) != 2L) stop("Dichotomous data matrix required!")
  #   k.t   <- dim(X.n)[2L]/mpoints                                    #check for each mpoint separately
  #   t.ind <- rep(seq_len(mpoints), 1L, each=k.t)
  #   X.nlv <- split(t(X.n),t.ind)                                  #split X due to mpoints
  #   cn.lv <- split(colnames(X.n),t.ind)
  #   X.nl  <- lapply(X.nlv,matrix,ncol=k.t,byrow=TRUE)
  #   for(i in seq_len(length(X.nl))) colnames(X.nl[[i]]) <- cn.lv[[i]]
  #
  #   for(l in seq_len(mpoints)){                                       #check within mpoints
  #     X.nll <- X.nl[[l]]
  #     k <- ncol(X.nll)
  #     adj <- matrix(0, ncol=k, nrow=k)
  #     for(i in seq_len(k)) for(j in seq_len(k)) {
  #       adj[i,j]<- 1*any(X.nll[,i] > X.nll[,j], na.rm = TRUE)
  #     }
  #     cd  <- component.dist(adj, connected = "strong")
  #     cm  <- cd$membership
  #     cmp <- max(cm)
  #     if(cmp > 1L) {
  #       cmtab <- table(cm)
  #       maxcm.n <- as.numeric(names(cmtab)[cmtab!=max(cmtab)])
  #       suspcol <- (seq_len(length(cm)))[tapply(cm, seq_len(length(cm)), function(x){ any(maxcm.n == x) })]
  #       n.suspcol <- colnames(X.nll)[suspcol]
  #       stop(paste0(
  #         "\n",
  #         prettyPaste("Estimation stopped due to ill-conditioned data matrix X! Suspicious items:"),
  #         "\n",
  #         paste(n.suspcol, collapse=" ")
  #       ), call. = FALSE)
  #     }
  #   }
  # }
  # #----------------------- end ill-conditioned check -------------------------------

  # return(list(X = X.n, groupvec = groupvec))

  if(rlang::is_empty(item.ex)) item.ex<-NA # addded AK 20-02-2022

  return(list(X = X.n, X_original=X, del_pos=item.ex, Xcheck="RS")) # addded AK 20-02-2022

}