R/tree_Rasch.R
In DIFtree: Item Focussed Trees for the Identification of Items in Differential Item Functioning

tree_Rasch <-
function(y,
                       DM_kov,
                       npersons,
                       nitems,
                       nvar,
                       ordered_values,
                       n_levels,
                       n_s,
                       alpha,
                       nperm,
                       trace,
                       penalize
  
){
  # design of rasch model 
  pp_design <- diag(npersons)                   # persons, person P reference 
  pp_design <- pp_design[rep(1:nrow(pp_design),each=nitems),]
  pp_design <- pp_design[,-npersons]
  
  ip_design <- -1*diag(nitems)                  # item parameter  
  ip_design <- ip_design[rep(1:nrow(ip_design),times=npersons),]
  
  dm_rasch <- cbind(pp_design,ip_design)               
  
  names_rasch    <- c(paste("theta",1:(npersons-1),sep=""),paste("beta",1:nitems,sep=""))
  colnames(dm_rasch) <- names_rasch      
  
  # functions to build design 
  thresholds <- lapply(1:nvar, function(j) ordered_values[[j]][-length(ordered_values[[j]])])
  
  v <- lapply(1:nvar,function(j) 1:(n_levels[j]-1)) 
  w <- lapply(1:nvar, function(j) rep(paste0("s",j),n_s[j]))
  
  design_one  <- function(x,threshold,upper){
    if(upper){
      ret <- ifelse(x > threshold,1,0)
    } else{
      ret <- ifelse(x > threshold,0,1)
    }
    return(ret)
  }
  
  design <- function(x,thresholds,upper){
    ret <- sapply(thresholds, function(j) design_one(x,j,upper))
    return(ret)
  }
  
  whole_design <- function(X,var,item,thresholds,upper=TRUE){
    design_tree <- matrix(0,nrow=nitems*npersons,ncol=length(thresholds[[var]]))
    rows        <- seq(item,(nitems*npersons),by=nitems)
    design_tree[rows,] <- design(X[,var],thresholds[[var]],upper)
    z <- rep(paste0(ifelse(upper,"_u","_l"),item),length(thresholds[[var]]))
    colnames(design_tree) <- paste0(w[[var]],v[[var]],z)
    return(design_tree)
  }
  
  designlists <- function(X,thresholds,upper=TRUE){
    ret <- lapply(1:nitems, function(j){
      lapply(1:nvar, function(var){
        whole_design(X,var,j,thresholds,upper)
      })
    })
    
    return(ret)
  }
  
  #########################################################################################
  
  # initializations 
  mod_potential <- list()
  devs          <- c()
  crits         <- c()
  splits        <- c()
  pvalues       <- c() 
  ip            <- list()
  vars_evtl     <- list()
  splits_evtl   <- list()
  which_obs     <- list()
  numbers       <- list()
  count <- 1
  
  pp     <- paste("theta",1:(npersons-1),sep="")
  help_p <- paste0(pp,collapse="+")
  
  numbers[[1]]     <- lapply(1:nitems,function(j) 1)
  which_obs[[1]]   <- lapply(1:nitems,function(j) matrix(1:npersons,nrow=1))
  splits_evtl[[1]] <- lapply(1:nitems,function(j) lapply(1:nvar, function(var) matrix(1:n_s[var],nrow=1)))
  vars_evtl[[1]]   <- lapply(1:nitems,function(j) nvar)
  ip[[1]]          <- lapply(1:nitems,function(j) paste0("beta",j))
  help0            <- formula(paste("y~",help_p,"+",paste0(unlist(ip[[1]]),collapse="+"),"-1"))
  dat0             <- data.frame(y,dm_rasch)
  mod0             <- mod_potential[[1]] <- glm(help0,family=binomial(link="logit"),data=dat0)
  start            <- predict(mod0)
  
  design_upper <- designlists(DM_kov,thresholds)
  design_lower <- designlists(DM_kov,thresholds,upper=FALSE)
  sig   <- TRUE
  anysplit <- TRUE
  
  # function to compute all models in one knot
  allmodels <- function(i,var,kn,design_lower,design_upper){
    
    deviances <- rep(0,n_s[var])
    help_kn <- ip[[count]][[i]][kn]
    help1   <- paste0(unlist(ip[[count]])[-which(unlist(ip[[count]])==help_kn)],collapse="+")
    splits_aktuell <- splits_evtl[[count]][[i]][[var]][kn,] 
    splits_aktuell <- splits_aktuell[!is.na(splits_aktuell)]
    
    param_knot <- coef(mod0)[help_kn]
    obs_aktuell <- which_obs[[count]][[i]][kn,]
    obs_aktuell <- obs_aktuell[!is.na(obs_aktuell)]
    rows_obs   <- seq(i,(nitems*npersons),by=nitems)[obs_aktuell]
    start[rows_obs] <- start[rows_obs]+param_knot
    
    if(length(splits_aktuell)>0){
      
      for(j in splits_aktuell){
        dat   <- data.frame(dat0,design_lower[[i]][[var]][,j,drop=FALSE],design_upper[[i]][[var]][,j,drop=FALSE])
        help2 <- paste(ip[[count]][[i]][kn],c(colnames(design_lower[[i]][[var]])[j],colnames(design_upper[[i]][[var]])[j]),sep=":")
        help3 <- paste(help2,collapse="+")
        help4 <- formula(paste("y~",help3,"-1"))
        suppressWarnings(
          mod <- glm(help4,family=binomial(link="logit"),data=dat,offset=start)
        )
        deviances[j] <-  deviance(mod0)-deviance(mod)
      }
    }
    return(deviances)
  }  
  
  # estimate tree 
  while(sig & anysplit){
    
    # compute all models
    dv <- lapply(1:nvar,function(var) {
      lapply(1:nitems,function(i) {
        n_knots   <- length(ip[[count]][[i]])
        deviances <- matrix(rep(0,n_s[var]*n_knots),ncol=n_knots)
        for(kn in 1:n_knots){
          deviances[,kn] <- allmodels(i,var,kn,design_lower,design_upper)
        }
        return(deviances)
      })
    })
    
    # select optimum
    variable <- which.max(lapply(1:nvar,function(j) max(unlist(dv[[j]]))))
    item     <- which.max(lapply(1:nitems, function(j) max(dv[[variable]][[j]])))
    split    <- as.numeric(which(dv[[variable]][[item]]==max(dv[[variable]][[item]]),arr.ind=TRUE)[,1])
    knoten   <- as.numeric(which(dv[[variable]][[item]]==max(dv[[variable]][[item]]),arr.ind=TRUE)[,2])
    if(length(split)>1){
      split  <- split[1]
      knoten <- knoten[1]
      warning(paste("Maximum in iteration ",count," not uniquely defined"))
    }
    ip_old   <- ip[[count]][[item]][knoten]
    level    <- length(strsplit(ip_old,":")[[1]])
    number   <- numbers[[count]][[item]][knoten]
    left     <- max(numbers[[count]][[item]])+1
    right    <- max(numbers[[count]][[item]])+2
    
    # compute permutation test 
    dev <- rep(NA,nperm)
    
    for(perm in 1:nperm){
      dv_perm <- rep(0,n_s[variable])
      obs_aktuell <- which_obs[[count]][[item]][knoten,]
      obs_aktuell <- obs_aktuell[!is.na(obs_aktuell)]
      DM_kov_perm <- DM_kov
      DM_kov_perm[obs_aktuell,variable] <- sample(DM_kov_perm[obs_aktuell,variable],length(obs_aktuell))
      design_upper_perm      <- design_upper
      design_upper_perm[[item]][[variable]] <- whole_design(DM_kov_perm,variable,item,thresholds)
      design_lower_perm      <- design_lower
      design_lower_perm[[item]][[variable]] <- whole_design(DM_kov_perm,variable,item,thresholds,upper=FALSE)
      dv_perm <- allmodels(item,variable,knoten,design_lower_perm,design_upper_perm)
      dev[perm] <- max(dv_perm)
      if(trace){
        cat(".")
      }
    }
    
    # test decision 
    crit_val <- quantile(dev,1-(alpha/vars_evtl[[count]][[item]][knoten]))
    proof    <- max(dv[[variable]][[item]]) > crit_val
    devs[count]   <- max(dv[[variable]][[item]])
    crits[count]   <- crit_val
    pvalues[count] <- length(which(dev>max(dv[[variable]][[item]])))/nperm
    
    if(proof){
      
      # get new formula 
      help_kn2 <- ip[[count]][[item]][knoten]
      help5 <- paste0(unlist(ip[[count]])[-which(unlist(ip[[count]])==help_kn2)],collapse="+")
      help6 <- paste(ip[[count]][[item]][knoten],c(colnames(design_lower[[item]][[variable]])[split],colnames(design_upper[[item]][[variable]])[split]),sep=":")
      help7 <- paste(help6,collapse="+")
      help8 <- formula(paste("y~",help_p,"+",help5,"+",help7,"-1"))
      
      ######################
      if(level>1){
        help_kn4 <- lu(c(),1,level-1,c())
        help_kn5 <- unlist(strsplit(help_kn2,""))
        help_kn6 <- paste0(help_kn5[which(help_kn5=="_")+1],collapse="")
        knoten2  <- which(help_kn4==help_kn6)
      } else{
        knoten2 <- knoten
      }
      ######################
      
      splits <- rbind(splits,c(variable,item,split,level,knoten2,number,left,right))   
      
      
      # fit new model 
      dat <- dat0 <- data.frame(dat0,design_lower[[item]][[variable]][,split,drop=FALSE],design_upper[[item]][[variable]][,split,drop=FALSE])
      suppressWarnings(
        mod0  <- mod_potential[[count+1]] <- glm(help8,family=binomial(link="logit"),data=dat,etastart=start)
      )
      start <- predict(mod0)
      
      # generiere neue itemparameter 
      ip[[count+1]]                             <- ip[[count]]
      ip[[count+1]][[item]]                     <- rep("",length(ip[[count]][[item]])+1)
      ip[[count+1]][[item]][c(knoten,knoten+1)] <- help6
      ip[[count+1]][[item]][-c(knoten,knoten+1)]<- ip[[count]][[item]][-knoten]
      
      # passe splits_evtl an
      n_knots                                                       <- length(ip[[count+1]][[item]])
      splits_evtl[[count+1]]                                        <- splits_evtl[[count]]
      for(var in 1:nvar){
        splits_evtl[[count+1]][[item]][[var]]                       <- matrix(0,nrow=n_knots,ncol=n_s[var])
        splits_evtl[[count+1]][[item]][[var]][c(knoten,knoten+1),]  <- matrix(rep(splits_evtl[[count]][[item]][[var]][knoten,],2),nrow=2,byrow=T)
        splits_evtl[[count+1]][[item]][[var]][-c(knoten,knoten+1),] <- splits_evtl[[count]][[item]][[var]][-knoten,]
      }
      splits_evtl[[count+1]][[item]][[variable]][knoten,splits_evtl[[count+1]][[item]][[variable]][knoten,]>=split] <- NA 
      splits_evtl[[count+1]][[item]][[variable]][(knoten+1),splits_evtl[[count+1]][[item]][[variable]][(knoten+1),]<=split] <- NA
      
      # any split? 
      anysplit <- !all(is.na(unlist(splits_evtl[[count+1]])))
      
      # passe vars_evtl an 
      vars_evtl[[count+1]]                             <- vars_evtl[[count]]
      vars_evtl[[count+1]][[item]]                     <- rep(0,n_knots)
      vars_evtl[[count+1]][[item]][c(knoten,knoten+1)] <- rep(vars_evtl[[count]][[item]][knoten],2)
      vars_evtl[[count+1]][[item]][-c(knoten,knoten+1)]<- vars_evtl[[count]][[item]][-knoten]
      
      if(length(which(!is.na(splits_evtl[[count+1]][[item]][[variable]][knoten,])))==0){ 
        vars_evtl[[count+1]][[item]][knoten] <- vars_evtl[[count+1]][[item]][knoten]-1 
      }
      if(length(which(!is.na(splits_evtl[[count+1]][[item]][[variable]][knoten+1,])))==0){ 
        vars_evtl[[count+1]][[item]][knoten+1] <- vars_evtl[[count+1]][[item]][knoten+1]-1 
      }
      
      # passe which_obs an 
      which_obs[[count+1]]                               <- which_obs[[count]]
      which_obs[[count+1]][[item]]                       <- matrix(0,nrow=n_knots,ncol=npersons)
      which_obs[[count+1]][[item]][c(knoten,knoten+1),]  <- matrix(rep(which_obs[[count]][[item]][knoten,],2),nrow=2,byrow=T)
      which_obs[[count+1]][[item]][-c(knoten,knoten+1),] <- which_obs[[count]][[item]][-knoten,]
      thresh <- ordered_values[[variable]][1:n_s[variable]][split]
      which_obs[[count+1]][[item]][knoten,DM_kov[,variable]>thresh] <- NA
      which_obs[[count+1]][[item]][(knoten+1),DM_kov[,variable]<=thresh] <- NA
      
      # passe numbers an 
      numbers[[count+1]]                              <- numbers[[count]]
      numbers[[count+1]][[item]]                      <- numeric(length=n_knots)
      numbers[[count+1]][[item]][c(knoten,knoten+1)]  <- c(left,right)
      numbers[[count+1]][[item]][-c(knoten,knoten+1)] <- numbers[[count]][[item]][-knoten] 
      
      # trace
      if(trace){
        cat(paste0("\n Split"," ",count,";"," ","Item"," ",item,"\n"))
      }
      
      # erhoehe counter
      count <- count+1 
      
    } else{
      sig <- FALSE
      if(penalize){
        if(count>1){
          help9 <- formula(paste("~",0,"+",help_p))
          help10 <- formula(paste("~",help5,"+",help7))
          mod_potential[[count]] <- penalized(y,penalized=help10,unpenalized=help9,lambda2=1e-3,data=dat0,trace=FALSE)
        } else{
          help9  <- formula(paste("~",0,"+",help_p))
          help10 <- formula(paste("~",paste0(unlist(ip[[1]]),collapse="+")))
          mod_potential[[count]] <- penalized(y,penalized=help10,unpenalized=help9,lambda2=1e-3,data=dat0,trace=FALSE)
        }
      }
    }
  }
  
  ################################################################################### 
  
  # prettify results 
  mod_opt     <- mod_potential[[count]]
  ip_opt      <- ip[[count]]
  theta_hat   <- c(coefficients(mod_opt)[1:(npersons-1)],0)
  beta_hat    <- coefficients(mod_opt)[npersons:length(coefficients(mod_opt))]
  
  if(count>1){
    
    dif_items   <- unique(splits[,2])
    nodif_items <- c(1:nitems)[-dif_items]
    
    beta_hat_nodif <- sapply(nodif_items,function(j) beta_hat[ip_opt[[j]]])
    beta_hat_dif   <- lapply(dif_items, function(j)  beta_hat[ip_opt[[j]]])
    names(beta_hat_dif) <- dif_items
    
    help9 <- cumsum(c(0,(n_levels-1)))
    colnames(splits) <- c("var","item","split","level","node","number","left","right")
    splits <- data.frame(cbind(splits[,1:5,drop=FALSE],"variable"=rep(NA,nrow(splits)),"threshold"=rep(NA,nrow(splits)),splits[,6:8,drop=FALSE]))
    for(i in 1:nrow(splits)){
      splits[i,6] <- colnames(DM_kov)[splits[i,1]]
      v2 <- lapply(1:nvar,function(j) ordered_values[[j]][-length(ordered_values[[j]])])
      splits[i,7] <- v2[[splits[i,1]]][splits[i,3]]
    }
    splits <- splits[,-1]
    
    for(i in dif_items){
      info <- splits[splits[,"item"]==i,]
      endnodes <- get_endnodes(info)
      names(beta_hat_dif[[paste(i)]]) <- endnodes 
    }
  
  } else{
    
    beta_hat_nodif <- beta_hat
    beta_hat_dif   <- c()
    
  }
  
  to_return <- list("splits"=splits,
                    "thetas"=theta_hat,
                    "betas_nodif"=beta_hat_nodif,
                    "betas_dif"=beta_hat_dif,
                    "pvalues"=pvalues,
                    "devs"=devs,
                    "crits"=crits)
  
  return(to_return)
  
}
Any scripts or data that you put into this service are public.
DIFtree documentation built on July 8, 2020, 6:49 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
DIFtree
Item Focussed Trees for the Identification of Items in Differential Item Functioning

R/tree_Rasch.R
In DIFtree: Item Focussed Trees for the Identification of Items in Differential Item Functioning

Defines functions tree_Rasch

Try the DIFtree package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DIFtree Item Focussed Trees for the Identification of Items in Differential Item Functioning

R/tree_Rasch.R In DIFtree: Item Focussed Trees for the Identification of Items in Differential Item Functioning

Defines functions tree_Rasch

Try the DIFtree package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DIFtree
Item Focussed Trees for the Identification of Items in Differential Item Functioning

R/tree_Rasch.R
In DIFtree: Item Focussed Trees for the Identification of Items in Differential Item Functioning