R/collapseItemScores.R

# Collapses total scores (rows) if O row sum < 1
# Collapses categories (cols) if cell E < 1
# Mostly taken from mirt
collapseItemScores <- function (On, En, mincell = 1)
{
    tO <- On
    tE <- En

    #drop <- which(rowSums(is.na(En)) > 0)
    drop <- which(apply(En, 1, function(row) any(row ==0 )))
    En[is.na(En)] <- 0
    if (length(drop)) {
        up <- drop[1L]:drop[length(drop)/2]
        low <- drop[length(drop)/2 + 1L]:drop[length(drop)]
        En[max(up) + 1, ] <- colSums(En[c(up, max(up) +
                                              1), , drop = FALSE])
        On[max(up) + 1, ] <- colSums(On[c(up, max(up) +
                                              1), , drop = FALSE])
        En[min(low) - 1, ] <- colSums(En[c(low, min(low) -
                                               1), , drop = FALSE])
        On[min(low) - 1, ] <- colSums(On[c(low, min(low) -
                                               1), , drop = FALSE])
        En[c(up, low), ] <- On[c(up, low), ] <- NA
        En <- na.omit(En)
        On <- na.omit(On)
    }

    # remove rows with all 0's (no one with given total score)
    drop <- rowSums(On) == 0
    On <- On[!drop, ]
    En <- En[!drop, ]

    # collapse rows with only one observation with row+1
    drop <- c()
    for (j in seq_len(nrow(On) - 1L)) {
        ss <- sum(On[j, ])
        if (ss == 1L) {
            drop <- c(drop, j)
            On[j + 1L, ] <- On[j + 1L, ] + On[j, ]
            En[j + 1L, ] <- En[j + 1L, ] + En[j, ]
        }
    }
    if (length(drop)) {
        On <- On[-drop, ]
        En <- En[-drop, ]
    }
    # is last row only has 1 after above, collapse with row-1
    ss <- sum(On[nrow(On), ])
    if (ss == 1L) {
        On[nrow(On) - 1L, ] <- On[nrow(On) - 1L, ] + On[nrow(On),
        ]
        En[nrow(On) - 1L, ] <- En[nrow(On) - 1L, ] + En[nrow(On),
        ]
        On <- On[-nrow(On), ]
        En <- En[-nrow(En), ]
    }
    # If we have more than 2 columns, collapse where E < mincell
    L <- En < mincell
    if (ncol(En) > 2L) {
        for (j in seq_len(nrow(En))) {
            if (!any(L[j, ]))
                next
            tmp <- En[j, ]
            tmp2 <- On[j, ]
            while (length(tmp) > 2L) {
                m <- min(tmp)
                whc <- max(which(m == tmp))
                if (whc == 1L) { # collapse right if 1st
                    tmp[2L] <- tmp[2L] + tmp[1L]
                    tmp2[2L] <- tmp2[2L] + tmp2[1L]
                }
                else if (whc == length(tmp)) { # collapse left is last
                    tmp[length(tmp) - 1L] <- tmp[length(tmp) -
                                                     1L] + tmp[length(tmp)]
                    tmp2[length(tmp2) - 1L] <- tmp2[length(tmp2) -
                                                        1L] + tmp2[length(tmp2)]
                }
                else {
                    left <- min(tmp[whc - 1L], tmp[whc + 1L]) ==
                        c(tmp[whc - 1L], tmp[whc + 1L])[1L]
                    pick <- if (left)
                        whc - 1L
                    else whc + 1L
                    tmp[pick] <- tmp[pick] + tmp[whc]
                    tmp2[pick] <- tmp2[pick] + tmp2[whc]
                }
                tmp[whc] <- tmp2[whc] <- NA
                tmp <- na.omit(tmp)
                tmp2 <- na.omit(tmp2)
                if (all(tmp >= mincell))
                    break
            }
            tmp <- c(tmp, rep(NA, ncol(En) - length(tmp)))
            tmp2 <- c(tmp2, numeric(ncol(En) - length(tmp2)))
            En[j, ] <- tmp
            On[j, ] <- tmp2
        }
    }
    En[is.na(En)] <- 0 # set to 0 before this?

    # collapse columns if column sum is too small
    if (ncol(En) > 2L) {
        while (TRUE) {
            pick <- colSums(En) < mincell * ceiling(nrow(En) *
                                                        0.1)
            if (!pick[length(pick)] || ncol(En) == 2L)
                break
            if (pick[length(pick)]) {
                On[, length(pick) - 1L] <- On[, length(pick) -
                                                  1L] + On[, length(pick)]
                En[, length(pick) - 1L] <- En[, length(pick) -
                                                  1L] + En[, length(pick)]
                On <- On[, -length(pick)]
                En <- En[, -length(pick)]
            }
        }
    }
    # collapse columns if column sum is too small
    dropcol <- logical(ncol(En))
    for (j in ncol(En):2L) {
        tmp <- sum(En[, j] > 0)/nrow(En)
        if (tmp < 0.05) {
            dropcol[j] <- TRUE
            En[, j - 1L] <- En[, j - 1L] + En[, j]
            On[, j - 1L] <- On[, j - 1L] + On[, j]
        }
    }
    En <- En[, !dropcol]
    On <- On[, !dropcol]

    # If there still are any small ones
    L <- En < mincell & En != 0
    while (any(L, na.rm = TRUE)) {
        if (!is.matrix(L))
            break
        whc <- min(which(rowSums(L) > 0L))
        if (whc == 1L) {
            En[2L, ] <- En[2L, ] + En[1L, ]
            On[2L, ] <- On[2L, ] + On[1L, ]
            En <- En[-1L, ]
            On <- On[-1L, ]
        }
        else if (whc == nrow(En)) {
            En[nrow(En) - 1L, ] <- En[nrow(En) - 1L, ] +
                En[nrow(En), ]
            On[nrow(On) - 1L, ] <- On[nrow(On) - 1L, ] +
                On[nrow(On), ]
            En <- En[-nrow(En), ]
            On <- On[-nrow(On), ]
        }
        else {
            ss <- c(sum(On[whc - 1L, ]), sum(On[whc + 1L,
            ]))
            up <- (min(ss) == ss)[1L]
            pick <- if (up)
                whc - 1L
            else whc + 1L
            En[pick, ] <- En[pick, ] + En[whc, ]
            On[pick, ] <- On[pick, ] + On[whc, ]
            En <- En[-whc, ]
            On <- On[-whc, ]
        }
        L <- En < mincell & En != 0
    }
    En[En == 0] <- NA # we want NAs
    return(list(On = On, En = En))
}
joakimwallmark/PolyOptimalIRT documentation built on Dec. 21, 2021, 1:16 a.m.