Nothing
DIFlasso <-
function (Y, X, l.lambda = 20, grouped = TRUE, trace = FALSE, df.type = c("YuanLin", "L2norm"))
{
vardiffs <- abs(apply(X,2,var)-1)
if(sum(vardiffs)>1e-6)
warning("X is not standardized. In most cases, X should be standardized!")
if(sum(!apply(Y,2,unique) %in% c(0,1))>0)
stop("Y either contains values unequal to 0 or 1 or items with only 0 or only 1")
if(!is.data.frame(X))
stop("X has to be a data.frame")
if(!is.data.frame(Y))
stop("Y has to be a data.frame")
# print trace?
if(grouped){
if (trace) {
trace <- 1
}else {
trace <- 0
}
}
# number of persons
P <- nrow(Y)
# number of items
I <- ncol(Y)
# number of covariates
l <- ncol(X)
# which type of dfs should be used
df.type <- match.arg(df.type)
if(!(df.type %in% c("YuanLin", "L2norm"))){
stop("Unknown df.type")
}
# Convert data.frames into matrices
names.y <- names(Y)
names.x <- names(X)
Y <- matrix(as.double(as.matrix(Y)),ncol=I,byrow=FALSE)
X <- matrix(as.double(as.matrix(X)),ncol=l,byrow=FALSE)
# which persons have to be excluded?
exclu1 <- rowSums(Y) != I & rowSums(Y) != 0
exclu <- rep(exclu1, each = I)
# create the part of the design matrix containing the covariates
xp <- matrix(0, ncol = l * I, nrow = P * I)
suppressWarnings(for (q in 1:P) {
xp[((q - 1) * I + 1):(q * I), ] <- matrix(as.double(c(X[q,
], rep(0, l * I))), byrow = T, ncol = l * I, nrow = I)
})
xp <- xp[exclu, ]
# create the response vector
XP <- c()
for (t in 1:P) {
XP <- c(XP, Y[t, ])
}
XP <- XP[exclu]
# new (reduced) number of persons
ex <- sum(!exclu1)
P <- P - ex
# create the part of the design matrix containing the columns for person parameters theta
help1 <- c(1, rep(0, P - 2))
help2 <- c(rep(help1, I), 0)
suppressWarnings(help3 <- matrix(help2, ncol = P - 1, nrow = P *
I, byrow = TRUE))
help3[((P - 1) * I + 1):(P * I), ] <- 0
# create the part of the design matrix containing the columns for item parameters beta
help4 <- rep(diag(I), P)
help5 <- matrix(help4, ncol = I, nrow = P * I, byrow = TRUE)
# design matrix
design.matrix <- cbind(help3, -help5, -xp)
# index vector for group lasso penalization
if(grouped){
index = c(rep(NA, sum(exclu1) + I - 1), rep(1:I, each = l))
# calculate maximal lambda
lmax <- lambdamax(design.matrix, XP, index, penscale = sqrt,
model = LogReg(), center = FALSE, standardize = FALSE)
# create sequence of lambdas
lambda.seq <- seq(from = lmax * 1.05, to = 1e-06, length = l.lambda)
# compute group lasso model
suppressWarnings(m2 <- grplasso(design.matrix, XP, index,
lambda = lambda.seq, penscale = sqrt, model = LogReg(),
center = FALSE, standardize = FALSE, control = grpl.control(trace = trace)))
# index for penalized parameters
index2 <- index[!is.na(index)]
# parameters for smallest lambda
coef.unpen <- m2$coef[!is.na(index), l.lambda]
# norm for parameters for smallest lambda
norm.unpen <- c()
for (o in 1:max(index2)) {
norm.unpen[o] <- sqrt(sum(coef.unpen[index2 == o]^2))
}
# group sizes
p.j <- table(index2)
# matrix of coefficients
coefs.unrest <- coefs <- m2$coef
# matrix for gammas
coefs.grp <- coefs[!is.na(index), ]
# norms of gammas
norm.gamma <- matrix(0, ncol = ncol(coefs), nrow = max(index2))
for (j in 1:ncol(coefs)) {
for (o in 1:max(index2)) {
norm.gamma[o, j] <- sqrt(sum(coefs.grp[index2 == o,
j]^2))
}
}
if(df.type == "YuanLin"){
# degrees of freedom of all gammas
df.grp <- colSums(norm.gamma > 0) + colSums((norm.gamma/norm.unpen) *
(c(p.j) - 1))
# degrees of freedom for thetas and betas
df.unpen <- sum(is.na(index))
# total degrees of freedom
df <- df.grp + df.unpen
}else{
# degrees of freedom of all gammas
# different from yuan and lin
df.grp <- colSums((norm.gamma/norm.unpen) * c(p.j))
# degrees of freedom for thetas and betas
df.unpen <- sum(is.na(index))
# total degrees of freedom
df <- df.grp + df.unpen
}
# predicted probabilities
pi.i <- predict(m2, type = "response")
# log likelihood
loglik <- colSums(XP * log(pi.i) - XP * log(1 - pi.i) + log(1 - pi.i))
}else{
m2 <- penalized(response = XP, penalized = design.matrix[,-(1:(P+I-1))],
unpenalized = design.matrix[,1:(P+I-1)], steps = l.lambda,
lambda1 = 1e-06, trace = trace)
coefs.unrest <- coefs <- matrix(unlist(lapply(m2,penalized::coef,"all")),ncol=l.lambda)
loglik <- sapply(m2,penalized::loglik)
coefs.grp <- matrix(unlist(lapply(m2,penalized::coef,"penalized")),ncol=l.lambda)
if(df.type == "YuanLin"){
df <- colSums(coefs!=0)
}else{
norm <- sqrt(coefs.grp^2)
norm <- norm/norm[,ncol(norm)]
df <- P + I - 1 + colSums(norm)
}
lambda.seq <- unlist(lapply(m2,function(x)x@lambda1))
}
# BIC
bic <- -2 * loglik + df * log(length(XP))
# AIC
aic <- -2 * loglik + df * 2
####################
# which item will be the reference/anchor item
ref.item <- NULL
ind0 <- l.lambda
while(is.null(ref.item)){
cand <- which(colSums(matrix(coefs.grp[,ind0],nrow=l))==0)
if(length(cand)>0){
ref.item <- max(cand)
}else{ind0 <- ind0-1}
}
restrict <- function(x){
x <- x - rep(x[(l*ref.item-l+1):(l*ref.item)],I)
return(x)}
# center gamma around refernce item, only relevant when gamma_ref.item not zero
coefs.grp <- apply(coefs.grp,2,restrict)
# coefs2: add one row to coefs matrix because theta_P is not set zero anymore
coefs2 <- matrix(0,nrow=nrow(coefs)+1,ncol=ncol(coefs))
coefs2[(P+I+1):(nrow(coefs2)),] <- coefs.grp
# estimated thetas, centered around reference item ref.item
if(grouped){
theta.hat <- predict(m2)[ref.item +seq(from=0, to= P*I-I, by=I),]
}else{
linpred <- matrix(unlist(lapply(m2,penalized::linear.predictors)),ncol=l.lambda)
theta.hat <- linpred[ref.item +seq(from=0, to= P*I-I, by=I),]
}
# estimated betas, centered around reference item ref.item
beta.hat <- coefs[(P):(P + I - 1), ]
beta.hat <- t(apply(beta.hat,1,function(x){return(x - beta.hat[ref.item,])}))
coefs2[1:P, ] <- theta.hat
# estimated betas
coefs2[(P+1):(P + I), ] <- beta.hat
dif.mat <- matrix(coefs.grp[,which.min(bic)],nrow=l, dimnames=list(names.x,names.y))
no.dif.cols <- which(coefs.grp[,which.min(bic)]==0)
design.matrix <- insertCol(design.matrix,P,c(rep(0,(P-1)*I),rep(1,I)))
design.matrix<- design.matrix[,-c(P+ref.item,P+I+no.dif.cols)]
design.matrix <- cbind(design.matrix,XP)
dif.items <- which(colSums(dif.mat)!=0)
returns <- list(theta = theta.hat, beta = beta.hat, gamma = coefs.grp, P = P, I = I, m = l,
logLik = logLik, BIC = bic, AIC = aic, df = df, refit.matrix = design.matrix,
lambda = lambda.seq, ref.item = ref.item, dif.mat = dif.mat, dif.items = dif.items,
names.y = names.y, names.x = names.x, removed.persons = which(!exclu1))
class(returns) <- "DIFlasso"
return(returns)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.