#' POLYSIBTEST
#'
#' This function assesses uniform DIF for polytomous DATA using the POLYSIBTEST procedure (Chang et al., 1996).
#' The function outputs the uniform DIF magnitudes as well as the significance testing.
#'
#' @param data_ref The dataset that contains the reference group participants
#' @param data_foc The dataset that contains the focal group participants
#' @param minc The minimum number of individuals in a give total test score category. Initialized to 2 per Shealy and Stout (1993) recommendation
#' @param cusr The guess correction factor. This is initialized to 0, but can be changed based on the items used. The cusr value is the average of guess correction across the items in the dataset.
#' @param idw The weighting factor. This is initialized to use the reference and focal group weighting recommended by Shealy and Stout (1993)
#' @param suspect_items the item(s) to be assess for DIF. Listing one item number will assess for DIF on that item, multiple items (e.g. c(2,3,8)) will assess DIF for the bundle of items.
#' @param matching_items the list of items to match on.
#' @param listwise initialized to 1, this inidicates that the procedure will not delete any rows with missing data, changing this to 2 will listwise delete.
#' @param nch a vector indicating the number of categories for each item. Example if there are 5 items and each has 3 catgories nch <- c(3,3,3,3,3). If there are 5 items and items 1 - 3 have 4 categories and items 4 and 5 have 2 then nch <- (4,4,4,2,2)
#' @aliases psib
#' @export psib
#' @references Chang, H. H., Mazzeo, J. & Roussos, L. (1996). DIF for Polytomously Scored Items: An Adaptation of the SIBTEST Procedure. Journal of Educational Measurement, 33, 333-353.
#' @references DIF-Pack (2021) Measured Progress. https://psychometrics.onlinehelp.measuredprogress.org/tools/dif/
#' @references Shealy, R. & Stout, W. (1993). A model-based standardization approach that separates true bias/DIF from group ability differences and detect test bias/DTF as well as item bias/DIF. Psychometrika, 58, 159-194.
#' @references Weese, J. D., Turner, R. C., Liang, X., Ames, A., & Crawford, B. (accepted). Implementing a Standardized Effect Size in the POLYSIBTEST Procedure
#' @examples
#'
#' \dontrun{
#'
#' #perform POLYSIBTEST with one suspect item. Sample data has 20 items with 5 categories each.
#' nch <- rep(5,20)
#' psib(data_ref = data_refP, data_foc = data_focP,
#' minc = 2,suspect_items = c(20), matching_items = c(1:19),nch = nch)
#'
#' #perform POLYSIBTEST with a bundle of suspect items
#'
#' psib(data_ref = data_refP, data_foc = data_focP,
#' minc = 2, suspect_items = c(16,17,18,19,20), matching_items = c(1:15),nch = nch)
#'
#' }
psib <- function(data_ref, data_foc,minc=2,cusr = 0,idw = 0,suspect_items, matching_items,listwise = 1,nch){
############################################################################################################1
############################################################################################################1
# NOTE: THIS CODE IS ADAPTED FROM THE ORIGINAL FORTRAN CODE THAT POWERED THE SIBTEST V1.7 (2005 SOFTWARE
# RELEASE DATE). THAT CODE HAS BEEN RELEASED BY MEASURED PROGRESS AND IS NOW CONSIDERED OPEN SOURCED.
# YOU CAN FIND THAT CODE AT THE FOLLOWING WEBSITE
# https://psychometrics.onlinehelp.measuredprogress.org/tools/dif/.
# I take no credit for creating the code, only the translation and it's implementation in R
# This code will be updated to become vectorized so it is more efficient.
#
############################################################################################################1
############################################################################################################1
if(listwise == 2){
data_ref <- na.omit(data_ref)
data_foc <- na.omit(data_foc)
}
if (any(is.na(data_ref)) || any(is.na(data_foc))) {
return("POLYSIBTEST requires that both reference and focal group datasets do not have missing values. Please select TRUE to remove missing values to listwise remove missing values")}
if(ncol(data_ref) != ncol(data_foc)){
return("Verify that your data is correct. The number of columns in the reference group data does not equal the number of columns in the focal group data.")}
if(any(suspect_items %in% matching_items)){
return("POLYSIBTEST cannot run when when suspect items and matching subtest items contain the same the same items. Please check your suspect and matching subtest item selections")}
psibuni <- function(data_ref,data_foc,xmaj,ymaj,j0maj,xmin,ymin,j0min,itdifj,
itdifn,n0,m0,ni,minc,cusr,iflg,idw,
fnamer,fnamef,hvscore,hscore,hsscore,ndiv,idiv,ipv,jknmaj,jknmin){
estaujn <- estaunn <- estauj <-estaun <-
adjmj <- adjmn <- obprmj<-obprmn<-scores<-iclind <- NULL
itdfwj = itdifj
itdfwn = itdifn
ybarmj <- ybarmn <-ysum2j <- ysum2n <-
spbmaj <- spbmin <- spbmj2 <- spbmn2 <- rep(0,hvscore+1)
j0maj <- nrow(data_ref)
j0min <- nrow(data_foc)
for(j in 1:j0maj){
spbmaj[xmaj[j]] = spbmaj[xmaj[j]]+ymaj[j]
spbmj2[xmaj[j]] = spbmj2[xmaj[j]]+(ymaj[j]*ymaj[j])
}
for(j in 1:j0min){
spbmin[xmin[j]] = spbmin[xmin[j]]+ymin[j]
spbmn2[xmin[j]] = spbmn2[xmin[j]]+(ymin[j]*ymin[j])
}
mhsscor = hsscore/n0
ybarmj[c(2:(hvscore+1))] <- spbmaj[c(1:(hvscore))]/n0
ybarmn[c(2:(hvscore+1))] <- spbmin[c(1:(hvscore))]/n0
ysum2j[c(2:(hvscore+1))] <- spbmj2[c(1:(hvscore))]/(n0*n0)
ysum2n[c(2:(hvscore+1))] <- spbmn2[c(1:(hvscore))]/(n0*n0)
ysdmaj <- ifelse(jknmaj > 1,sqrt(((jknmaj*ysum2j)-(ybarmj^2))/(jknmaj*(jknmaj-1))),0)
ysdmin <- ifelse(jknmin > 1,sqrt(((jknmin*ysum2n)-(ybarmn^2))/(jknmin*(jknmin-1))),0)
ybarmj <-ifelse(jknmaj >0, ybarmj/jknmaj, ybarmj)
ybarmn <-ifelse(jknmin >0, ybarmn/jknmin, ybarmn)
adjmj <- ybarmj
adjmn <- ybarmn
rj0maj = j0maj
obprmj <- jknmaj/rj0maj
rj0min = j0min
obprmn = jknmin/(rj0min)
obprmj[is.na(obprmj)] <-0
obprmn[is.na(obprmn)] <-0
iclind <- rep(1,(hvscore+1))
iclind[1] <- 0
iclind[hvscore+1] <- 0
iclind <- ifelse(jknmaj < (minc),0, iclind)
iclind <- ifelse(jknmin < (minc),0, iclind)
lowcts = round(cusr*(ndiv),0)
iclind[c(1:(lowcts+1))] <- 0
ipelimf = 0
ipelimr = 0
ntsc = 0
ntsc <- sum(iclind,na.rm = T)
ipelimr <- sum(jknmaj[!iclind],na.rm = T)
ipelimf <- sum(jknmin[!iclind],na.rm = T)
pcel = (ntsc)/(hvscore-1)
pelimr = (ipelimr)/(j0maj)
pelimf = (ipelimf)/(j0min)
if(ntsc == 0 | pelimr == j0maj | pelimf == j0min){
return("No usable data in the datasets")
}
scores <- c(0:hvscore)
rm <- scores*obprmj
rm[!iclind] <- 0
rf <- obprmj
rf[!iclind] <- 0
vsavmj <- sum(rm)/sum(rf)
vsvrmj <- (((scores-vsavmj)^2)*obprmj)
vsvrmj[!iclind] <- 0
vsvrmj <- sum(vsvrmj)/sum(rf)
rm <- scores*obprmn
rm[!iclind] <- 0
rf <- obprmn
rf[!iclind] <- 0
vsavmn <- sum(rm)/sum(rf)
vsvrmn <- (((scores-vsavmn)^2)*obprmn)
vsvrmn[!iclind] <- 0
vsvrmn <- sum(vsvrmn)/sum(rf)
kk <- c(1:(hvscore+1))
estaun <- estauj <- (kk-1)/hvscore
mix = 1
npv = m0 - ndiv
if(ndiv > 0){
for(i in 1:ndiv){
k0=idiv[i]
itdfwj[k0] = (itdfwj[k0]-cusr)/(1-cusr)
if (itdfwj[k0] < 0){
itdfwj[k0] = 0
}
itdfwn[k0] = (itdfwn[k0]-cusr)/(1-cusr)
if (itdfwn[k0] < 0){
itdfwn[k0] = 0
}
}
divarj=sum(itdfwj*(1-itdfwj),na.rm=T)
divarn=sum(itdfwn*(1-itdfwn),na.rm=T)
}else{
divarj=0
divarn=0
}
if(ndiv != m0){
xbar <- xvar <- rep(0,npv)
for(i in 1:npv){
iv = ipv[i]
xbar[i] = sum(data_ref[,iv])
xvar[i] = sum(data_ref[,iv]^2)
}
njj = nrow(data_ref)
f1 = njj
xbar <- xbar/f1
xvar <- xvar/f1
xvar = xvar-xbar*xbar
vmj <- sum(xvar,na.rm = T)+divarj
xbar <- xvar <- rep(0,npv)
for(i in 1:npv){
iv = ipv[i]
xbar[i] = sum(data_foc[,iv])
xvar[i] = sum(data_foc[,iv]^2)
}
njj = nrow(data_foc)
f1 = njj
xbar <- xbar/f1
xvar <- xvar/f1
xvar = xvar-xbar*xbar
vmn <- sum(xvar,na.rm = T)+divarn
}
if(ndiv == m0){
vmj = divarj
vmn = divarn
}
kr20pj <- (m0/(m0-1))*(1-(vmj/vsvrmj))
kr20pn <- (m0/(m0-1))*(1-(vmn/vsvrmn))
kk <- c(1:(hvscore+1))
estauj <- (vsavmj+kr20pj*((kk-1)-vsavmj))/hvscore
estaun <- (vsavmn+kr20pn*((kk-1)-vsavmn))/hvscore
adjmj <- shnew(m=1,adjmj,ybarmj,iclind,estauj,estaun,iflg,hvscore,mhsscor)
if(length(adjmj) == 1){
stop("There was an error computing the adjusted mean for the reference group")
}
adjmn <- shnew(m=2,adjmn,ybarmn,iclind,estauj,estaun,iflg,hvscore,mhsscor)
if(length(adjmj) == 1){
stop("There was an error computing the adjusted mean for the focal group")
}
xnk <- adjmj - adjmn
xnk[!iclind] <- 0
dhatnk <- ((1/jknmaj)*ysdmaj^2)+((1/jknmin)*(ysdmin)^2)
dhatnk[!iclind] <- 0
yyy <- beta_calc(xnk,jknmaj,jknmin,dhatnk,iclind,hvscore)
se <- yyy[2]
buni <- yyy[1]
r1 <- yyy[3]
sd1 <- yyy[4]
es1 <- yyy[5]
se = se*n0
buni = buni*n0
if(r1 < 0){
p <- (pnorm(r1,0,1))*2
}else{
p <- (1-pnorm(r1,0,1))*2
}
return(c(buni,se,r1,p,sd1,es1,pelimr,pelimf))
}
beta_calc <- function(xnk,jknmaj,jknmin,dhatnk,iclind,n){
wgt <- sd_denom <- rep(0,n+1)
rn = 0.0
rd = 0.0
cntmaj=0.0
cntmin=0.0
for(k in 1:n){
if (iclind[k] == 1){
cntmaj = cntmaj+jknmaj[k]
cntmin = cntmin+jknmin[k]
}
}
for(k in 1:n){
if(iclind[k] == 1){
if(idw == 0){
wgt[k] = (jknmaj[k] + jknmin[k])/(cntmaj+cntmin)
sd_denom[k] = (wgt[k]^2)*(1/jknmaj[k]+1/jknmin[k])
}
if(idw == 1){
wgt[k] = jknmin[k]/cntmin
}
}
rn = rn + xnk[k]*wgt[k]
rd = rd + (wgt[k]^2.0)*dhatnk[k]
}
buni = rn
if(rd == 0){
warning("There was an error computing the standard error, it is 0 and therfore, no p-value can be calculated")
iflag=6
se=0.0
r1 = -99
return(c(buni,se,r1,0,0))
}else{
se = rd^0.5
if(sum(sd_denom) > 0){
sd = se/sqrt(sum(sd_denom))
es1 <- buni/sd
}
if(sum(sd_denom) == 0){
warning("There was an error computing the standard deviation, it is 0 and therfore, no effect size can be calculated")
sd = 0
es1 = 0
}
r1 = rn /se
}
return(c(buni,se,r1,sd,es1))
}
shnew <- function(m,ybardt,sercor,iclind,estauj,
estaun,iflg,n0,mhsscor){
ivec <- NULL
taumid <- c(rep(0,n0+1))
kount = 0
for(k in 1:(n0+1)){
if(iclind[k] == 1){
kount = kount + 1
ivec[kount] = k
}
}
for(i in 1:n0+1){
if(iclind[i] == 1){
l = i
break
}
}
for(i in rev(1:n0+1)){
if(iclind[i] == 1){
r = i
break }
}
if(r <= l){
iflg <- -99
return(iflg)
}
taumid[1] = max(estauj[l],estaun[l])
taumid[kount] = min(estauj[r],estaun[r])
for(kc in (2):(kount-1)){
x <- ivec[kc]
taumid[kc] = (estauj[x]+estaun[x])/2
}
if(m == 1){etprm1 <- rep(0,n0+1)}
if(m == 2){etprm2 <- rep(0,n0+1)}
if (m == 1){
for(kc in (2):(kount)){
etprm1[kc] = (sercor[ivec[kc+1]] - sercor[ivec[kc-1]]) /(estauj[ivec[kc+1]] - estauj[ivec[kc-1]] )
}
}
if(m == 2){
for(kc in (2):(kount)){
etprm2[kc] = (sercor[ivec[kc+1]] - sercor[ivec[kc-1]]) /(estaun[ivec[kc+1]] - estaun[ivec[kc-1]] )
}
}
estfix1 <- NULL
estfix <- 0
if (m == 1){
for(kc in (2):(kount-1)){
estfix = (estauj[ivec[kc]] - taumid[kc])*etprm1[kc]
ybardt[ivec[kc]] = ybardt[ivec[kc]] - estfix
estfix1 <- rbind(estfix1,estfix)
if(ybardt[ivec[kc]] < 0){ ybardt[ivec[kc]]=0 }
if(ybardt[ivec[kc]] > mhsscor){ ybardt[ivec[kc]]=mhsscor }
}
}else{
if(m == 2){
for(kc in (2):(kount-1)){
estfix = (estaun[ivec[kc]] - taumid[kc])*etprm2[kc]
ybardt[ivec[kc]] = ybardt[ivec[kc]] - estfix
if(ybardt[ivec[kc]] < 0){ ybardt[ivec[kc]]=0 }
if(ybardt[ivec[kc]] > mhsscor ){ ybardt[ivec[kc]]=mhsscor }
}
}
}
if(m == 1){
ybardt[l] = ethat1(taumid[1],estauj,sercor,n0)
ybardt[r] = ethat1(taumid[kount],estauj,sercor,n0)
}else{
if(m == 2){
ybardt[l] = ethat1(taumid[1],estaun,sercor,n0)
ybardt[r] = ethat1(taumid[kount],estaun,sercor,n0)
}
}
return(ybardt)
}
ethat1 <- function( tau,estauj,ybarmj,n0){
if (tau <= estauj[1]){
ethat1 = ybarmj[1]
return(ethat1)
}else{
if (estauj[n0+1] <= tau){
ethat1 = ybarmj[n0+1]
return(ethat1)
}
}
kfound = -1
for(k in (2):(n0)){
if (estauj[k] <= tau & tau < estauj[k+1]){
kfound = k
alpha = (tau - estauj[kfound]) /( estauj[kfound+1] - estauj[kfound] )
ethat1 = (alpha)*ybarmj[kfound+1] + (1-alpha)*ybarmj[kfound]
return(ethat1)
}
}
}
itdifj <- itdifn <- NULL
nitem = ncol(data_ref)
xnitem <- nitem
ni = nitem
netsum <- length(suspect_items)
n0 = netsum
isusp <- suspect_items
nthsum <- length(matching_items)
m0 = nthsum
ivalid <- matching_items
hscore = sum(nch)
iden <- idifj <- idifn <-ixtsum <- sad<- NULL
jknmin <- jknmaj <- rep(0,hscore)
idifj <- idifn <- ixtsum <- rep(0,nitem)
iden <- rep(2,nitem)
iflag = 0
iden[isusp] <- 0
iden[ivalid] <-1
hsscore = hscore
idiv <- ipv <- rep(0,nthsum)
hvscore = ndiv = npv = 0
for(i in 1:nthsum){
if(nch[ivalid[i]] == 1){
ndiv = ndiv + 1
idiv[ndiv] = ivalid[i]
}else{
npv = npv +1
ipv[npv] = ivalid[i]
}
hvscore = hvscore + nch[ivalid[i]]
}
xmaj <- ymaj <- rep(0,nrow(data_ref))
xmin <- ymin <- rep(0,nrow(data_foc))
u1 <- NULL
isum2 <- idifj <- idifn <- ixtsum <- rep(0,nitem)
ixsum <- ix2sum <- iyjsum <- ixjsum <- ix2jsum <- 0
for(j in 1:nrow(data_ref)){
xmaj[j] <- 0
ymaj[j] <- 0
xr = 0
u <- data_ref[j,]
for(ii in 1:nitem){
u1[ii] <- u[[ii]]
}
for(k in 1:nitem){
if(iden[k] == 1){
xmaj[j] = xmaj[j] + u1[k]
}
if(iden[k] == 0){
ymaj[j] = ymaj[j] + u1[k]
}
if((iden[k] != 0 & iden[k] != 1)){
xr = xr + u1[k]
}
idifj[k] = idifj[k] + u1[k]
isum2[k] = isum2[k] + u1[k]*u1[k]
}
xx <- xmaj[j]
jknmaj[xx]=jknmaj[xx]+1
iyjsum = iyjsum + ymaj[j]
ixjsum = ixjsum + xmaj[j]
ix2jsum = ix2jsum + xmaj[j]*xmaj[j]
ix = xmaj[j] + ymaj[j] + xr
ixsum = ixsum + ix
ix2sum = ix2sum + ix*ix
for(k in 1:nitem){
ixtsum[k] = ixtsum[k] + ix*u1[k]
}
}
jr = nrow(data_ref)
xjr = jr
xbr = ixsum/(xjr)
sxr = sqrt(ix2sum/xjr - xbr*xbr)
ix2sumt = ix2sum
ixsum = 0
ix2sum = 0
iynsum = 0
ixnsum = 0
ix2nsum = 0
for(j in 1:nrow(data_foc)){
xmin[j] <- 0
ymin[j] <- 0
xr = 0
u <- data_foc[j,]
for(ii in 1:nitem){
u1[ii] <- u[[ii]]
}
for(k in 1:nitem){
if(iden[k] == 1){
xmin[j] = xmin[j] + u1[k]
}
if(iden[k] == 0){
ymin[j] = ymin[j] + u1[k]
}
if(!(iden[k] == 0 | iden[k] == 1)){
xr = xr + u1[k]
}
idifn[k] = idifn[k] + u1[k]
isum2[k] = isum2[k] + u1[k]*u1[k]
}
xx <- xmin[j]
jknmin[xx]=jknmin[xx]+1
iynsum = iynsum + ymin[j]
ixnsum = ixnsum + xmin[j]
ix2nsum = ix2nsum + xmin[j]*xmin[j]
ix = xmin[j] + ymin[j] + xr
ixsum = ixsum + ix
ix2sum = ix2sum + ix*ix
for(k in 1:nitem){
ixtsum[k] = ixtsum[k] + ix*u1[k]
}
}
jf = nrow(data_foc)
xjf = jf
xbf = (ixsum)/(xjf)
sxf = sqrt((ix2sum/xjf) - (xbf*xbf))
ix2sumt = ix2sumt + ix2sum
pbsflag = 0
pbs <- rep(0,nitem)
fnex = xjr + xjf
ad = xbr - xbf
sdavg = sqrt((xjr*sxr*sxr + xjf*sxf*sxf)/(fnex))
if(sdavg > 0){sad = ad/sdavg}
sad1 <- sad
term2 <- 0
xbar = ((xjr)*xbr + (xjf)*xbf)/(fnex)
term1 = ((xjr)*xbr + (xjf)*xbf)*((xjr)*xbr + (xjf)*xbf)
sdx = sqrt((ix2sumt - (term1/(fnex)))/(fnex))
for(k in 1:nitem){
iterm4 <- idifn[k] +idifj[k]
iterm3 <- hscore*(jr + jf) - idifj[k] - idifn[k]
if(iterm3 > 0 & iterm4 > 0){
if(sdx < 0){
pbsflag = 1;
pbs[k] = -99
}
if(sdx >= 0){
sdk = sqrt((isum2[k]-(iterm4*iterm4)/fnex)/fnex)
xnum = ixtsum[k]/fnex - xbar*iterm4/fnex
pbs[k] = xnum/(sdx*sdk)
}
}
if(iterm3 <= 0 | iterm4 <= 0){
pbsflag = 1;
pbs[k] = -99
}
}
pp <- (idifj+idifn)/fnex
xbr = (ixjsum)/(xjr)
varr = (ix2jsum)/(xjr) - xbr*xbr
sdr = sqrt(varr)
xbf = (ixnsum)/(xjf)
varf = (ix2nsum)/(xjf) - xbf*xbf
sdf = sqrt(varf)
ad = xbr - xbf
sdavg = sqrt(((xjr)*varr + (xjf)*varf)/(fnex))
if(sdavg > 0) {sad = ad/sdavg }
sad2 <- sad
itdifj <- idifj/xjr
itdifn <- idifn/xjf
jknmaj1 <- jknmin1 <-rep(0,(hvscore+1))
jknmaj11 <- table(xmaj)
jknmin11 <-table(xmin)
jknmaj1[as.integer(names(jknmaj11))+1] <- jknmaj11
jknmin1[as.integer(names(jknmin11))+1] <- jknmin11
output <- psibuni(data_ref=data_ref,data_foc=data_foc,xmaj,ymaj,jr,xmin,ymin,
jf,itdifj,itdifn,
netsum,nthsum,nitem,minc,cusr,iflag,idw,
fnamer,fnamef,hvscore,hscore,hsscore,ndiv,idiv,ipv,jknmaj1,jknmin1)
out <- data.frame(suspect_items = paste(suspect_items,collapse = " "), beta = output[1],sigma_uni = output[2], std = output[5], effect_size = output[6],z = output[3],
p_value = output[4])
out <- cbind(out[1],round(out[2:7],3))
colnames(out) <- c("Suspect Item(s)","Beta","SE","SD","Delta","z","p")
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.