#' @title Create DIGRAM Object
#' @name digram.object
#' @description Create a digram.object.
#' @usage digram.object(project=NULL,data=data.frame(),variables=colnames(data),filter.conditions=data.frame(variable.number=numeric(0),min=numeric(0),max=numeric(0)),recursive.structure=c(),comments="")
#' @param project The name of the DIGRAM project
#' @param data The data.frame or matrix with the data
#' @param variables A vector of column names or numbers from the dataset to include in the recoded dataset (the order matters in the recoded data) *or* a list of variables each element in the form list(variable.name="",variable.label="",ncat=0,category.names=c(),variable.type=c("nominal","ordinal"),minimum=0,maximum=1,cutpoints=c())
#' @param filter.conditions A data.frame with three columns: variable.number, max, and min. Only cases, for which all values of filter variables belong to the intervals defined by the corresponding minimum and maximum values (both included), will be used in the analysis.
#' @param recursive.structure A vector of cutpoints to define the recursive blocks
#' @param comments A string
#' @export
#' @return Returns a digram.object
#' @author Jeppe Bundsgaard <jebu@@edu.au.dk>
#' @examples
#' library(iarm)
#' do<-digram.object(project = "desc2",data = desc2,variables = c(5:14,2:4,1),recursive.structure = c(10,13))
#' @references
#' Kreiner, S. (2003). *Introduction to DIGRAM*. Dept. of Biostatistics, University of Copenhagen.
digram.object<-function(project=NULL,data=data.frame(),variables=NULL,filter.conditions=NULL,recursive.structure=NULL,comments="") {
if(is.null(project)) stop("You need to provide a project name")
#if(inherits(data,"tbl_df")) data<-as.data.frame(data)
if(!inherits(variables,"list")) {
vars<-if(is.null(variables)) colnames(data) else if(inherits(variables,c("numeric","integer"))) colnames(data)[variables] else variables
num.vars<-length(vars)
variables<-list()
for(i in 1:num.vars) {
iminus<-i-1
numletters<-which(i<=(26^(1:5)))[1]
variable.label<-""
k<-iminus
for(j in numletters:1) {r<-(k%%26);variable.label<-paste0(LETTERS[r+ifelse(numletters<2||j>1,1,0)],variable.label); k<-floor(k/26)}
variable.name<-vars[i]
column.number<-which(colnames(data)%in%vars[i])
if(!is.null(attr(data[,vars[i]], "labels"))) {
categories<-attr(data[,vars[i]], "labels")
} else {
# Test for integer/float/character...
categories<-na.omit(unique(data[,vars[i]]))
categories<-categories[order(categories)]
}
ncat<-length(categories)
variable.type<-ifelse(inherits(categories,"numeric") | inherits(categories,"integer"),"ordinal","nominal")
minimum<-if(variable.type=="ordinal") min(categories) else 0
maximum<-if(variable.type=="ordinal") max(categories) else ncat-1
# Record 2: <Minimum> <Cutpoint(1)>...<Last cutpoint> ... <Maximum>
cutpoints<-if(variable.type=="ordinal") categories[order(categories)] else 0:(ncat-1)
cutpoints<-cutpoints[-ncat]
# Categories
if(ncat<1) stop(paste("Not enough categories in",variable.name))
category.names<-data.frame(Category=0:(ncat-1),Name=categories)
if((inherits(data[,vars[i]],c("double")))) data[,vars[i]]<-as.integer(data[,vars[i]])
if(!(inherits(data[,vars[i]],c("integer","numeric")))) {
r2<-paste(apply(category.names,1,function(x) {
paste0("'",x["Name"],"'=",x["Category"])
}),collapse = ";")
data[,vars[i]]<-car::recode(data[,vars[i]],r2,as.numeric = T)
}
variables[[i]]<-list(variable.name=variable.name,variable.label=variable.label,column.number=column.number,ncat=ncat,category.names=category.names,variable.type=variable.type,minimum=minimum,maximum=maximum,cutpoints=cutpoints)
}
}
# The number of recursive blocks should appear on a separate record.
if(is.null(recursive.structure)) recursive.structure<-length(variables)
recursive.blocks<-length(recursive.structure)
do<-list(project=project,data=data,recoded=NULL,variables=variables,filter.conditions=filter.conditions,recursive.blocks=recursive.blocks,recursive.structure=recursive.structure,comments=comments,commandsrun=c())
class(do)<-"digram.object"
# print(do)
do$recoded<-digram.recode(do)
do
}
onlyif<-function(y) {i<-0;for(z in y) {if(!is.na(z) & z>0) i<-i+z else break};i}
digram.recode<-function(do) {
data<-do$data
variables<-do$variables
filter.conditions<-do$filter.conditions
# Recode data
as.data.frame(sapply(variables,function(x) {
cutpoints2<-matrix(c(x$minimum,x$cutpoints+.000001,x$cutpoints,x$maximum),ncol = 2,byrow = F)
froms<-apply(cutpoints2,1,paste,collapse=":")
tos<-0:(length(x$cutpoints))
recodestr<-paste(c(apply(matrix(c(froms,tos),ncol=2,byrow = F),1,paste,collapse="="),paste0(x$maximum+.000001,":hi=NA")),collapse=";")
column.number<-x$column.number
datacol<-data[,column.number]
# If this is a combined variable, add the columns together
if(is.data.frame(datacol)) datacol<-apply(datacol,1,function(y) {
if(is.null(x$combine.type)) x$combine.type<-"sum"
switch (x$combine.type,
"sum" = sum(y),
"or" = max(y),
"xor" = ifelse(sum(y>0)==1,y[y>0],0),
"and" = ifelse(length(unique(y))==1,y[1],0),
"onlyif" = onlyif(y)
)
})
if(!is.null(x$reverse) && x$reverse) datacol<-car::recode(datacol,paste(tos,rev(tos),sep = "=",collapse = ";"))
if(!is.null(filter.conditions)) {
minmax<-filter.conditions[filter.conditions$variable.number==column.number,2:3]
if(nrow(minmax)>0) {
minval<-minmax[1]
maxval<-minmax[2]
datacol<-car::recode(datacol,paste0("lo:",(minval-.000001),"=NA;",(maxval+.000001),":hi=NA"))
}
}
datacol<-car::recode(datacol,recodestr)
recoded<-data.frame("col"=datacol)
colnames(recoded)<-x$variable.name
return(recoded)
}))
}
#' Create a tidygraph object from a digram.object
#'
#' @param do A digram.object.
#' @param LD A data.frame with columns for item1, item2 and gamma coefficient. Items can be variable.names, variable.columns or item numbers.
#' @param DIF A data.frame with columns for item, exogenous variable and gamma coefficient. Items and exogenous variables can be variable.names, variable.columns or item numbers.
#' @param summarize.testlets If true, don't collapse testlets, but summarize number of local dependent item pairs (both ways) in each testlet.
#' @usage as_tbl_graph(do)
#' @return Returns a tbl_graph
#' @export
#' @seealso [tidygraph::as_tbl_graph()]
#' @examples
#' library(ggraph)
#' library(tidygraph)
#' dograph<-as_tbl_graph(DHP)
#' ggraph(dograph,layout="fr")+geom_edge_link()+geom_node_label(mapping = aes(label=label))
#' ggraph(dograph,layout="fr")+geom_edge_link()+geom_node_label(mapping = aes(label=name))
#'
#' # Show arrows
#' ggraph(dograph,layout="fr")+geom_edge_link(end_cap = square(.5, 'cm'),arrow = arrow(angle=10,length=unit(.2,"cm")))+geom_node_label(mapping = aes(label=label))
#'
#' # A digram.object with a testlet
#' dograph<-as_tbl_graph(code.testlet(DHP,"a b c"))
#' ggraph(dograph,layout="fr")+geom_edge_link(end_cap = square(.5, 'cm'),arrow = arrow(angle=10,length=unit(.2,"cm")))+geom_node_label(mapping = aes(label=label))
#'
#' # Local dependecy and DIF
#' dograph<-as_tbl_graph(DHP,LD=data.frame(item1=c(5,3),item2=c(6,5),gamma=c(.53,-.38)),DIF=data.frame(item=c(4),exo=c(8),gamma=c(.27)))
#' ggraph(dograph,layout="fr")+geom_edge_link(mapping=aes(label=ifelse(!is.na(gamma),abs(gamma),""),alpha=ifelse(!is.na(gamma),gamma,1),color=ifelse(!is.na(gamma),2,1)),angle_calc="along",label_dodge=unit(.25,"cm"),end_cap = square(.5, 'cm'),arrow = arrow(angle=10,length=unit(.2,"cm")))+geom_node_label(mapping = aes(label=label))
as_tbl_graph.digram.object<-function(do,items=NULL,exo.names=NULL,exo.labels=exo.names,LD=NULL,DIF=NULL,summarize.testlets=F){
if(!inherits(do,"digram.object")) stop("do needs to be of class digram.object")
resp<-do$recoded
if(is.null(items)) items<-1:do$recursive.structure[1]
if(is.null(exo.names)) {
exo<-if(ncol(resp)>do$recursive.structure[1]) (do$recursive.structure[1]+1):ncol(resp) else NULL
exo.names<-get.variable.names(do,exo)
if(inherits(exo,"character")) exo<-match(exo,exo.names)
exo.labels<-get.labels(do,exo)
}
item.names<-get.variable.names(do,items)
if(inherits(items,"character")) items<-match(items,item.names)
item.labels<-get.labels(do,items)
nitems<-length(items)
nexo<-length(exo.names)
ntestlets<-0
if(!is.null(LD) || !is.null(DIF)) {
if(!summarize.testlets) {
environment(collapse.testlets) <- environment()
collapse.testlets()
ntestlets<-sum(items>length(do$variables))
}
} else if(!is.null(do$testlets)) {
testlet.edges<-lapply(do$testlets,function(x) {
testlet<-x$testlet
nitem<-length(testlet)
from<-unlist(sapply(1:(nitem-1),function(y) rep(testlet[y]+2,nitem-y))) # We add 2 because we have theta and total score
to<-unlist(sapply(2:nitem,function(y) testlet[y:nitem]+2)) # Ditto
c(from,to,to,from)
})
testlet.edges<-as.data.frame(matrix(unlist(testlet.edges),ncol=2))
colnames(testlet.edges)<-c("from","to")
testlet.edges$gamma<-NA
}
if(!is.null(do$split)) {
warning("Graphing of split items not implemented yet.")
}
if(!is.null(LD)) {
if(nrow(LD)>0){
if(ncol(LD)!=3) stop("LD needs to have three columns: item1, item2, and gamma")
colnames(LD)<-c("from","to","gamma")
maxnl<-max(nchar(LD$from),nchar(LD$to))
LD$from<-apply(array(LD$from),1,function(x) which(x==item.names.shorten(item.names,maxnl))) + 2 # We add 2 because we have theta and total score
LD$to<-apply(array(LD$to),1,function(x) which(x==item.names.shorten(item.names,maxnl))) + 2
}
}
if(!is.null(DIF)) {
if(nrow(DIF)>0){
if(ncol(DIF)!=3) stop("DIF needs to have three columns: item, exo, and gamma")
colnames(DIF)<-c("to","from","gamma")
#maxnl<-max(nchar(apply(array(DIF$from),1,sub,pattern=",.*",replacement="")),nchar(DIF$to))
maxnl<-max(nchar(DIF$to))
# DIF$from<-apply(array(DIF$from),1,function(x) which(sub(",.*","",x)==item.names.shorten(exo.names,maxnl))) + 2 + nitems # We add 2 and nitems because we have theta and total score and items before exos
DIF$to<-sapply(DIF$to,function(x) {a<-which(x==item.names.shorten(item.names,maxnl)); ifelse(is.null(a),NA,a)}) + 2
DIF$from<-sapply(DIF$from,match,exo.names) + 2 + nitems # We add 2 and nitems because we have theta and total score and items before exos
hasNA<-is.na(DIF$to) | is.na(DIF$from)
if(any(hasNA)) DIF<-DIF[!hasNA,]
#DIF$to<-apply(array(DIF$to),1,match,item.names) + 2 # We add 2 because we have theta and total score
}
}
# nitems<-length(items)
# nexo<-length(exo)
# c("Theta","Total Score")
nodes<-rbind(data.frame(name=c("θ","S"),label=c("θ","S"),type=rep("Ability",2),stringsAsFactors = F),#,column.number=c(0,0)
data.frame(name=item.names,label=item.labels,type=c(rep("Item",nitems-ntestlets),rep("Testlet",ntestlets)))
)
if(nexo>0) nodes<-rbind(nodes, #t(sapply(1:do$recursive.structure[1], function(x) unlist(do$variables[[x]][c("variable.name","variable.label","column.number")]))),
data.frame(name=exo.names,label=exo.labels,type=rep("Exo",nexo))#t(sapply((do$recursive.structure[1]+1):do$recursive.structure[2], function(x) unlist(do$variables[[x]][c("variable.name","variable.label","column.number")])))
)
edges<-data.frame(
from=c(1,rep(2,nitems)),
to=c(2,3:(nitems+2)))
if(nexo>0) edges<-rbind(edges,data.frame(
from=(nitems+3):(nitems+nexo+2),
to=rep(1,nexo)))
edges$gamma<-NA
if(exists("testlet.edges")) edges<-rbind(edges,testlet.edges)
if(exists("LD")) edges<-rbind(edges,LD)
if(exists("DIF")) edges<-rbind(edges,DIF)
tbl_graph(nodes=nodes,edges=edges,directed = T)
}
#' Code items as a testlet/local dependant
#'
#' @param do A digram.object
#' @param testlet String. The items that are part of a testlet/are local dependant. Give as a list of comma separated variable numbers, variable labels or variable names. If there is spaces in the variable names, they can be delimited by ".
#' @param names A vector of strings naming the testlets. If names are not given, they are composed of the testlet item names.
#' @param append Logical. Append new testlet variables to the existing ones.
#'
#' @return Returns a digram.object with the revised testlet-data.frame.
#' @export
#' @details Local dependence is often caused by items sharing a common stimulus. This is called testlets or item bundles (Wang & Wilson 2006. Coding for Local Dependence is the same as identifying a testlet or an item bundle.
#' @references
#' Wang, W.-C., & Wilson, M. (2005). The Rasch Testlet Model. *Applied Psychological Measurement*, 29(2), 126–149. https://doi.org/10.1177/0146621604271053
#' @examples
#' data(DHP)
#' do<-code.testlet(do=DHP,testlet=c("ab,dhp36 dhp37,5 6"))
code.testlet<-function(do,testlet=NULL,names=NULL,labels=NULL,append=F) {
do$commandsrun<-append(do$commandsrun,sys.call())
if(!inherits(do,"digram.object")) stop("do needs to be a digram.object")
if(is.null(testlet)) stop("You need to provide a list of variables which are local dependent")
testlet.strs<-ifelse(grepl("\"",testlet),strsplit(x = testlet, split ='(?<=")\\s*,\\s*(?=")',perl = T),testlet.strs<-strsplit(x = testlet, split ="\\s*,\\s*"))[[1]]
no<-0
testlets<-lapply(testlet.strs,function(x) {
no<<-no+1
if(grepl("\"",x)) {
res<-gregexpr('"[^"]+"\\s*',x)[[1]]
res<-c(res,nchar(x))
testlet<-c()
for(i in 2:length(res)) {
testlet<-c(testlet,trimws(substr(x,res[i-1],res[i]-1))) #gsub("\"","",)
}
} else testlet<-strsplit(x," |\\+")[[1]]
if(length(testlet)<2) testlet<-strsplit(testlet,"")[[1]]
testlet<-sapply(testlet,function(x) ifelse(grepl("^[0-9]$",x),as.numeric(x),x))
testlet<-sapply(testlet,get.column.no,do=do)
label<-ifelse(is.null(labels[no]),
paste(get.labels(do,testlet),collapse = " + "),
labels[no])
if(is.null(names[no])){
itemnames<-get.variable.names(do,testlet)
name<-str_overlap(itemnames)
if(name=="") name<-paste(itemnames,collapse = " + ")
} else name<-names[no]
list(testlet=testlet,name=name,label=label)
})
if(append && !is.null(do$testlets)) {
do$testlets<-append(do$testlets,testlets)
} else do$testlets<-testlets
do
}
str_overlap<-function(arr,str="") {
if(length(arr)==0) return(str)
if(str=="") return(str_overlap(arr[-1],arr[1]))
str2=arr[1]
for(i in 1:nchar(str)) {
if(substr(str,i,i)!=substr(str2,i,i)) {i<-i-1;break}
}
return(sub("[-_.:;,+?]\\s*$","",str_overlap(arr[-1],substr(str,1,i))))
}
# Internal function to collapse testlets
collapse.testlets<-function(){
if(!is.null(do$testlets)) {
testletno<-0
for(tlist in do$testlets){
testletno<-testletno+1
testlet<-tlist$testlet
testlet<-(testlet[testlet %in% items])
newitem<-length(do$variables)+testletno
resp[,newitem]<<-NA
if(length(testlet)>0) {
items<<-c(items,newitem)
olditems<-which(items %in% testlet)
# Recode
resp[,newitem]<<-apply(resp[,testlet],1,sum,na.rm=T)
# Combine names and labels
newname<-tlist$name
item.names<<-c(item.names,newname)
colnames(resp)[newitem]<<-newname
item.labels<<-c(item.labels,tlist$label)
item.names<<-item.names[-olditems]
item.labels<<-item.labels[-olditems]
# Remove item-nums
items<<-c(items[-olditems])
}
}
}
}
#' Code items to be split (as having DIF)
#'
#' @param do A digram.object
#' @param split.var String. The variables to split (having DIF). A comma separated list of variable numbers, labels or names.
#' @param split.on String. The exogenous variables to split on (causing DIF). A comma separated list of exogenous variable numbers, labels or names.
#' @param append Logical.
#' @details If more variables and exogenous variables are given, all possible combinations of these are split.
#' @return a modified digram.object
#' @export
#'
#' @examples
#' data(DHP)
#' do<-code.split(DHP,"a,b","under60")
code.split<-function(do,split.var,split.on,append=F) {
do$commandsrun<-append(do$commandsrun,sys.call())
if(!inherits(do,"digram.object")) stop("do needs to be a digram.object")
if(is.null(split.var)) stop("You need to provide one or more variables to split")
if(is.null(split.on)) stop("You need to provide one or more exogenous variables to split on")
if(append && !is.null(do$splits)) splits<-do$splits else splits<-c()
vars<-strsplit(x = split.var, split =" *, *")[[1]]
var.nums<-sapply(vars,get.column.no,do=do)
exos<-strsplit(x = split.on, split =" *, *")[[1]]
exo.nums<-sapply(exos,get.column.no,do=do)
do$splits<-rbind(splits,expand.grid(var=var.nums,exo=exo.nums))
# TODO: Update testlets
do
}
#' Combine several digram.objects into a combined.digram.objects.
#'
#' @param ... digram.objects to combine
#' @param project Name of project
#'
#' @return Returns a combined.digram.objects
#' @export
#' @details This function is useful when preparing a multidimensional analysis. Each digram.object represents a dimension.¸
#' @examples
#' do1<-DHP
#' do2<-DHP
#' digram.objects.combine(do1,do2)
digram.objects.combine<-function(...,project="") {
digram.objects<-list(...)
nr<-0
for(do in digram.objects) {
if(!inherits(do,what = "digram.object")) stop("The objects need to be of class digram.object.")
if(nr==0) nr<-nrow(do$data)
if(nrow(do$data)!=nr) stop("The digram.objects need to have the same number of data rows.")
nr<-nrow(do$data)
}
cdo<-list(project=project,digram.objects=digram.objects)
class(cdo)<-"combined.digram.objects"
cdo
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.