Nothing
### This is the marginal likelihood function to be maximized
# it is weighted if weight var is not null
#par - the score vector
#getIndLnL - logical, should the likelihood be calculated only for a subset
#X_ - the matrix of X values, defaults to all X , used for calculating group likelihoods
#i - list of indexes for individual likelihood (used to subset weights and r22)
#repweightVar - the replicate weight for replicate variance estimation
#inside - TRUE if inside mix, otherwise linear and inside=FALSE
fn.regression <- function(X_, i=NULL, wv=NULL, rr1, stuDat, nodes, inside=TRUE, fast=TRUE, verbose=FALSE) {
if(length(wv) > 1) {
stop("Must specify NULL weight or exactly one.")
}
# any lets the NULL case pass
if(any(! wv %in% colnames(stuDat))) {
stop("weight wv not in X_ wv=", dQuote(wv), " column names of X_", pasteItems(dQuote(colnames(X_))))
}
K <- ncol(X_)
# fix rr1 to just regard this data
if(!is.null(i)) {
rr1p <- rr1[,i,drop=FALSE]
} else{
rr1p <- rr1
i <- 1:nrow(X_)
}
# fix weights
if(!is.null(wv)) {
w <- stuDat[i, wv]
} else {
w <- rep(1, nrow(X_))
}
dTheta <- nodes[2] - nodes[1]
# for numerical stability, premultiply by rr1m, then remove it when calculating sum(w * log(cs))
rr1m <- (1/(.Machine$double.eps)^0.5)/apply(rr1p, 2, max)
rr1q <- t(t(rr1p) * rr1m)
insd <- inside
Xnodes <- t(matrix(nodes, nrow=length(nodes), ncol=nrow(X_)))
prev_gr <- rep(Inf, ncol(X_))
max_x_gr <- ncol(X_) * 1e-5
function(par, returnPosterior=FALSE, gr=FALSE, hess=FALSE) {
# break up par into beta and residual components
B <- par[1:K]
if(insd) {
s2 <- ifelse(par[(K+1)] < 1, exp(par[(K+1)] - 1), par[(K+1)]^2)
} else {
s2 <- par[(K+1)]^2
}
s2 <- max(s2, 1e-6)
s <- sqrt(s2)
if(hess) {
# form prediction
XB <- as.vector(X_ %*% B)
# residual, per indivudal (outer sapply), per node (inner sapply)
nodes.minus.XB <- t(Xnodes - XB)
# likelihood of normal distribution for residuals nodes.minus.XB
rr2 <- rr1p * ((1/(sqrt(2 * pi * s2))) * exp(-((nodes.minus.XB)^2/(2 * s2))))
# final parameter done numerically
par_ <- par[(K+1)] + 1e-6
# apply non-linear transform
s2_ <- ifelse(par_ < 1, exp(par_ - 1), par_^2)
s_ <- sqrt(s2_)
# likelihood of normal distribution for residuals nodes.minus.XB
rr2_ <- rr1p * ((1/(sqrt(2 * pi * s_^2))) * exp(-((nodes.minus.XB)^2/(2 * s_^2))))
trr2mxb <- t(rr2 * nodes.minus.XB)
colSums_rr2 <- colSums(rr2)
# Hess loop in rcpp
if(fast) {
H <- calcHess(K, rr2, rr2_, trr2mxb, X_, nodes.minus.XB, w, s2, s_)
if(any(H %in% c(NA, NaN, Inf, -Inf))) {
H <- slowHess(K, rr2, rr2_, trr2mxb, X_, nodes.minus.XB, w, s2, s_)
}
} else {
H <- slowHess(K, rr2, rr2_, trr2mxb, X_, nodes.minus.XB, w, s2, s_)
}
# now variance of variance term
gr0 <- -2*sum(w * colSums( rr2 * (-1/s + 1*(nodes.minus.XB)^2/s^3))/colSums_rr2)
# break up par into beta and residual components
par_ <- par[(K+1)] + 1e-6
if(insd) {
s2_ <- ifelse(par_ < 1, exp(par_ - 1), par_^2)
} else {
s2_ <- par_
}
s_ <- sqrt(s2_)
# likelihood of normal distribution for residuals nodes.minus.XB
rr2_ <- rr1p * ((1/(sqrt(2 * pi * s_^2))) * exp(-((nodes.minus.XB)^2/(2 * s_^2))))
gr_ <- -2*sum(w*colSums( rr2_ * (-1/s_ + 1*(nodes.minus.XB)^2/s_^3))/colSums(rr2_))
H[K+1,K+1] <- (gr_ - gr0)/1e-6
return(H)
}
if(gr) {
XB <- as.vector(X_ %*% B)
# residual, per indivudal (outer sapply), per node (inner sapply)
nodes.minus.XB <- t(Xnodes - XB)
# likelihood of normal distribution for residuals nodes.minus.XB
rr2 <- rr1p * ((1/(sqrt(2 * pi * s2))) * exp(-((nodes.minus.XB)^2/(2 * s2))))
gr_res <- rep(0, length(par))
trr2mxb <- t(rr2 * nodes.minus.XB)
denom <- colSums(rr2)
rs <- w * rowSums( trr2mxb / s2)/denom
for(i in 1:K){
gr_res[i] <- -2 * sum( X_[,i] * rs)
# works like: gr_res[i] <- -2 * sum(w * rowSums( trr2mxb * X_[,i] / s2)/denom)
}
gr_res[K+1] <- -2* sum(w * colSums( rr2 * (-1/s + 1*(nodes.minus.XB)^2/s^3))/denom)
if(par[(K+1)] < 1) {
gr_res[K+1] <- gr_res[K+1] * 0.5 * exp(0.5*(par[(K+1)]-1))
}
gr_res[is.na(gr_res)] <- 0
prev_gr <<- gr_res
return(gr_res)
}
# form prediction
XB <- X_ %*% B
# residual, per indivudal (outer sapply), per node (inner sapply)
nodes.minus.XB <- t(t(matrix(nodes, nrow=length(nodes), ncol=nrow(XB))) - as.vector(XB))
# likelihood of normal distribution for residuals nodes.minus.XB
rr2 <- rr1q * ((1/(sqrt(2 * pi * s2))) * exp(-((nodes.minus.XB)^2/(2 * s2))))
# aggregate likelihood, weight, multiply by -2 to make it a deviance
cs <- pmax(.Machine$double.eps, colSums(rr2))
if(returnPosterior) {
sd <- mu <- rep(0, ncol(rr1q))
for(i in 1:ncol(rr1q)) {
fi <- rr2[,i]
mu[i] <- sum(nodes*fi)/sum(fi)
sd[i] <- sqrt(sum((nodes - mu[i])^2 * fi)/sum(fi))
}
return(data.frame(id=rownames(X_), mu=mu, sd=sd, stringsAsFactors=FALSE))
}
res <- -2*( sum(w * (log(cs) - log(rr1m)) ) + sum(w) * log(dTheta) )
return( res )
}
}
interleaveMatrixRows <- function(A,B) {
stopifnot(nrow(A) == nrow(B))
stopifnot(ncol(A) == ncol(B))
res <- matrix(NA, nrow=nrow(A)*2, ncol=ncol(A))
for(i in 1:nrow(A)) {
res[1+(i-1)*2,] <- A[i,]
res[2+(i-1)*2,] <- B[i,]
}
return(res)
}
#' @importFrom Matrix Diagonal
#' @importFrom stats spline predict
fnCor <- function(Xb1, Xb2, s1, s2, w, rr1, rr2, nodes, fine=FALSE, fast = TRUE) {
# distance between nodes
dTheta <- nodes[2] - nodes[1]
# get residuals
numNodes <- length(nodes)
numUnits <- length(Xb1)
nodes.minus.Xb1 <- t(Xb1 - t(matrix(nodes, nrow=numNodes, ncol=numUnits)))
nodes.minus.Xb2 <- t(Xb2 - t(matrix(nodes, nrow=numNodes, ncol=numUnits)))
function(par, returnPosterior=FALSE, newXb1=NULL, newXb2=NULL, gr=FALSE, hess=FALSE, superFine=0) {
if(!is.null(newXb1)) {
if(length(newXb1) != numUnits) {
stop("The dimensions of Xb1 must remain the same.")
}
nodes.minus.Xb1 <- t(newXb1 - t(matrix(nodes, nrow=numNodes, ncol=numUnits)))
}
if(!is.null(newXb2)) {
if(length(newXb2) != numUnits) {
stop("The dimensions of Xb2 must remain the same.")
}
nodes.minus.Xb2 <- t(newXb2 - t(matrix(nodes, nrow=numNodes, ncol=numUnits)))
}
if(returnPosterior & gr) {
stop("cannot return posterior gradient")
}
if(hess & gr) {
stop("can only return one of Hessian or gradient")
}
# will we go fine
finef <- min(floor( (abs(par))^1.5 ), 10) # cap at 10 before factor
if(superFine > 0) {
finef <- superFine
}
dofine <- finef > 1
# par is propose correlation in Fisher-Z space, r is in correlation space
r <- tanh(par)
cov <- s1 * s2 * r
Sigma <- matrix(c(s1^2, cov, cov, s2^2), ncol=2)
detSigma <- s1^2*s2^2 - cov^2
if(gr) {
dcov <- s1 * s2 * 1/(cosh(par)^2)
dDetSigma <- -2 * cov * s1 * s2 * (1/ (cosh(par)^2) )
}
if(hess) {
dcov <- s1 * s2 * 1/(cosh(par)^2)
ddcov <- -2 * s1 * s2 * sinh(par)/(cosh(par)^3)
dDetSigma <- -2 * cov * s1 * s2 * (1/ (cosh(par)^2) )
ddDetSigma <- -2 * dcov * s1 * s2 * (1/ (cosh(par)^2) ) + 4 * cov * s1 * s2 * sinh(par)/(cosh(par)^3)
}
#
# calculates vector of (x-mu)T %*% SigmaInv %*% (x-mu)
# where x-mu is for an individual member is nodes.minux.Xb1/2, a vector of length 2
rr_ <- vector(length=numUnits)
if( (gr | hess) & !dofine) {
drr_ <- rr_ # another container for derivatives
if(hess) {
ddrr_ <- rr_ # another container for derivatives
}
}
# this is a 2D integral
# for nodes in dimension 1
mat <- matrix(NA, nrow=numNodes, ncol=numNodes)
if(returnPosterior) {
# set constants to zero
mui2 <- muj2 <- mui <- muj <- m2 <- rep(0, numUnits)
}
for(i in 1:numNodes) {
# node differences on subscale i
nodesi <- nodes.minus.Xb1[i, ]
# student probability of the nodes on subscale i
rr1i <- rr1[i, ]
# for nodes in dimension 2
for(j in 1:numNodes) {
# node differences on subscale j
nodesj <- nodes.minus.Xb2[j, ]
# student probability of the node on subscale j
rr2j <- rr2[j, ]
# for each
# the next line works, these are equal: mvnResid[4] (next line) and
# t(epsilon[,4]) %*% SigmaInv %*% epsilon[,4]
# also equivalent to: apply(epsilon * (SigmaInv %*% epsilon), 2, sum)
# these use this sigma inverse, though solve could be used as well
# SigmaInv <- 1/detSigma * matrix(c(s2^2, -1*cov, -1*cov, s1^2), ncol=2)
# and this defintion of epsilon
# epsilon <- rbind(nodesi, nodesj)
mvnResid <- (1/detSigma) * (s1^2 * nodesi^2 - 2 * cov * nodesi * nodesj + s2^2 * nodesj^2)
# check a single entry with this code:
# a <- c(nodes.minus.Xb1[i,k], nodes.minus.Xb2[j,k])
# t(a) %*% SigmaInv %*% a - mvnResid[k] # = 0, they are the same
# rr_:
# put 1/sqrt(detSigma) into exp because it can be incredibly large while mvnResid is
# incredibly small, making the entire expression resolve to zero.
rr_ij <- rr1i * rr2j * (2/pi) * exp(-1/2 * (log(detSigma) + mvnResid))
if(returnPosterior) {
mui <- mui + rr_ij * nodes[i]
muj <- muj + rr_ij * nodes[j]
mui2 <- mui2 + rr_ij * nodes[i]^2
muj2 <- muj2 + rr_ij * nodes[j]^2
m2 <- m2 + rr_ij * nodes[i] * nodes[j]
}
rr_ <- rr_ + rr_ij
if(gr & !dofine){
# mvnresid gradient
mvnResidNum <- (s1^2 * nodesi^2 - 2*cov * nodesi*nodesj + s2^2 * nodesj^2)
dMvnResidNum <- - 2* dcov * nodesi * nodesj
dMvnResid <- (1/detSigma^2) * (detSigma * dMvnResidNum - mvnResidNum * dDetSigma)
# rr_ij gradient
drr_ij <- -(1/2) * rr_ij * (dDetSigma/detSigma + dMvnResid)
drr_ <- drr_ + drr_ij
}
if(hess & !dofine) {
mvnResidNum <- (s1^2 * nodesi^2 - 2*cov * nodesi*nodesj + s2^2 * nodesj^2)
dMvnResidNum <- - 2* dcov * nodesi * nodesj
ddMvnResidNum <- - 2* ddcov * nodesi * nodesj
dMvnResid <- (1/detSigma^2) * (detSigma * dMvnResidNum - mvnResidNum * dDetSigma)
num <- (detSigma * dMvnResidNum - mvnResidNum * dDetSigma)
dnum <- (dDetSigma * dMvnResidNum + detSigma * ddMvnResidNum - dMvnResidNum * dDetSigma - mvnResidNum * ddDetSigma)
ddMvnResid <- (1/detSigma^4) * (detSigma^2 * dnum - num * 2 * detSigma * dDetSigma)
# rr_ij gradient
drr_ij <- -(1/2) * rr_ij * (dDetSigma/detSigma + dMvnResid)
drr_ <- drr_ + drr_ij
ddrr_ij <- -(1/2) * rr_ij * ((detSigma*ddDetSigma-dDetSigma^2)/detSigma^2 + ddMvnResid) -(1/2) * drr_ij * (dDetSigma/detSigma + dMvnResid)
ddrr_ <- ddrr_ + ddrr_ij
}
mat[i,j] <- sum(rr_ij)
}
} # end for(i in 1:numNodes)
cs <- pmax(.Machine$double.eps, rr_)
if( (gr | hess) & !dofine) {
dcs <- ifelse(rr_ < .Machine$double.eps, 0, drr_)
if(hess) {
ddcs <- ifelse(rr_ < .Machine$double.eps, 0, ddrr_)
}
}
if(returnPosterior){ # & finef <= 1) {
if(finef <= 1) {
mu1 <- mui/rr_ # mean of var1
mu2 <- muj/rr_ # mean of var2
Cov <- (m2/rr_ - mu1*mu2) # covariance
mu12 <- mui2/rr_ # mean of mu1^2
mu22 <- muj2/rr_ # mean of mu2^2
rho <- Cov/sqrt( (mu1^2 - mu12) * (mu2^2 - mu22) )
return(rho)
} else {
# reset sums to zero
mui2 <- muj2 <- mui <- muj <- m2 <- m2 * 0
}
}
if(!fine | finef <=1) {
if(hess) {
return(-2 * sum(w * (cs*ddcs - dcs^2)/(cs^2)))
}
if(gr) {
return(-2 * sum(w * dcs/cs))
}
return(-2 * (sum(w * log(cs)) + 2 * sum(w) * log(dTheta)))
}
rr1f <- matrix(NA, nrow=nrow(rr1)*finef, ncol=ncol(rr1))
nodesf <- nodes
for(i in 2:finef) {
nodesf <- c(nodesf, nodes + dTheta * (i-1)/finef)
}
nodesf <- sort(nodesf) - dTheta/2
for(i in 1:ncol(rr1)) {
rr1f[,i] <- spline(x=nodes, y=rr1[,i], n=length(nodesf), xmin=min(nodesf), xmax=max(nodesf))$y
}
rr2f <- matrix(NA, nrow=nrow(rr2)*finef, ncol=ncol(rr2))
for(i in 1:ncol(rr1)) {
rr2f[,i] <- spline(x=nodes, y=rr2[,i], n=length(nodesf), xmin=min(nodesf), xmax=max(nodesf))$y
}
nodes.minus.Xb1f <- t(Xb1 - t(matrix(nodesf, nrow=numNodes*finef, ncol=numUnits)))
nodes.minus.Xb2f <- t(Xb2 - t(matrix(nodesf, nrow=numNodes*finef, ncol=numUnits)))
rr2_ <- vector(length=numUnits)
if(gr | hess) {
drr2_ <- rr2_
if(hess) {
ddrr2_ <- rr2_
}
}
for(i in 2:(numNodes-1)) {
nodesfi <- nodes.minus.Xb1f[(i-1)*finef + 1:finef, ]
rr1fi <- rr1f[(i-1)*finef + 1:finef, ]
for(j in 2:(numNodes-1)) {
if(any( mat[(i-1):(i+1),(j-1):(j+1)] * 1e4 >= max(mat)) | (mat[i,j] * 1e8 >= max(mat))) {
# close to an important value,
rr2fj <- rr2f[(j-1)*finef + 1:finef, ]
nodesfj <- nodes.minus.Xb2f[(j-1)*finef + 1:finef, ]
for(fine_i in 1:finef) {
fnodesi <- nodesfi[fine_i, ]
for(fine_j in 1:finef) {
fnodesj <- nodesfj[fine_j, ]
mvnResid <- (1/detSigma) * (s1^2 * fnodesi^2 - 2*cov * fnodesi*fnodesj + s2^2 * fnodesj^2)
if(fast) {
rr_ij <- calcRrij(fine_i-1, fine_j-1, rr1fi, rr2fj, detSigma, mvnResid)
} else {
rr_ij <- rr1fi[fine_i,] * rr2fj[fine_j,] * (2/pi) * exp(-1/2 * (log(detSigma) + mvnResid))
}
if(gr) {
# mvnresid gradient
mvnResidNum <- (s1^2 * fnodesi^2 - 2*cov * fnodesi*fnodesj + s2^2 * fnodesj^2)
dMvnResidNum <- - 2* dcov * fnodesi * fnodesj
dMvnResid <- (1/detSigma^2) * (detSigma * dMvnResidNum - mvnResidNum * dDetSigma)
# rr_ij gradient
drr2_ij <- -(1/2) * rr_ij * (dDetSigma/detSigma + dMvnResid)
drr2_ <- drr2_ + drr2_ij
}
if(hess) {
mvnResidNum <- (s1^2 * fnodesi^2 - 2*cov * fnodesi*fnodesj + s2^2 * fnodesj^2)
dMvnResidNum <- - 2* dcov * fnodesi * fnodesj
ddMvnResidNum <- - 2* ddcov * fnodesi * fnodesj
dMvnResid <- (1/detSigma^2) * (detSigma * dMvnResidNum - mvnResidNum * dDetSigma)
num <- (detSigma * dMvnResidNum - mvnResidNum * dDetSigma)
dnum <- (dDetSigma * dMvnResidNum + detSigma * ddMvnResidNum - dMvnResidNum * dDetSigma - mvnResidNum * ddDetSigma)
ddMvnResid <- (1/detSigma^4) * (detSigma^2 * dnum - num * 2 * detSigma * dDetSigma)
drr_ij <- -(1/2) * rr_ij * (dDetSigma/detSigma + dMvnResid)
drr2_ <- drr2_ + drr_ij
ddrr_ij <- -(1/2) * rr_ij * ((detSigma*ddDetSigma-dDetSigma^2)/detSigma^2 + ddMvnResid) -(1/2) * drr_ij * (dDetSigma/detSigma + dMvnResid)
ddrr2_ <- ddrr2_ + ddrr_ij
}
if(returnPosterior) {
ii <- (i-1)*finef+fine_i
jj <- (j-1)*finef+fine_j
mui <- mui + rr_ij * nodesf[ii]
muj <- muj + rr_ij * nodesf[jj]
mui2 <- mui2 + rr_ij * nodesf[ii]^2
muj2 <- muj2 + rr_ij * nodesf[jj]^2
m2 <- m2 + rr_ij * nodesf[ii] * nodesf[jj]
}
rr2_ <- rr2_ + rr_ij
}
}
} else {
# approximately zero
}
}
}
# log requires a probability above zero, use the smallest double
# as a floor
cs2 <- pmax(.Machine$double.eps, rr2_)
if(gr) {
dcs2 <- ifelse(rr2_ <= .Machine$double.eps, 0, drr2_)
return(-2 * sum(w * dcs2/cs2))
}
if(hess) {
dcs2 <- ifelse(rr2_ <= .Machine$double.eps, 0, drr2_)
ddcs2 <- ifelse(rr2_ < .Machine$double.eps, 0, ddrr2_)
return(-2 * sum(w * (cs2*ddcs2-dcs2^2)/(cs2^2)))
}
if(returnPosterior) {
mu1 <- mui/rr2_ # mean of var1
mu2 <- muj/rr2_ # mean of var2
Cov <- (m2/rr2_ - mu1*mu2) # covariance
mu12 <- mui2/rr2_ # mean of mu1^2
mu22 <- muj2/rr2_ # mean of mu2^2
rho <- Cov/sqrt( (mu1^2 - mu12) * (mu2^2 - mu22) )
rho[!is.finite(rho)] <- 0 # replace division by zero
return(rho)
}
# likelihood of normal distribution for residuals nodes.minus.XB
# aggregate likelihood, weight, multiply by -2 to make it a deviance
# dTheta squared because there are two dimensions to the integral
# simpler version of formula
# return(-2*sum(w * (log(cs*dTheta^2))))
# rearange to:
return(-2 * (sum(w * log(cs2)) + 2 * sum(w) * log(dTheta/finef)))
}
}
# graded response model
grm <- function (theta, d, score, a, D) {
maxD <- length(d)
if(score == 0) {
pr <- 1/(1 + exp(D*a*(theta - d[(score+1)])))
}
else if(score == maxD) {
pr <- 1/(1 + exp(-D*a*(theta - d[score])))
} else {
pr <- 1/(1 + exp(D*a*(theta - d[(score+1)]))) - 1/(1 + exp(D*a*(theta - d[score])))
}
pr
}
# log of density of binomial where size=1, accepts x in [0,1] for partial credit
ldbinom3 <- function(x,pr) {
return(x*log(pr) + (1-x)*log(1-pr))
}
# graded partial credit model
gpcm <- function (theta, d, score, a, D) {
if(is.na(score)){
return(NA)
}
if (score > length(d)) {
stop (paste0("Score of ", score," higher than maximum (", length(d),")"))
}
if(score <= 0) {
stop (paste0("Score of ", score," lower than minimum (1)"))
}
Da <- D * a
exp(sum(Da * (theta - d[1:score]))) / sum(exp(cumsum(Da * (theta - d))))
}
# helper, get the graded partial credit model likelihood
gpcmLikelihood <- function (theta, d, score, a, D=1.7) {
exp(sum(D * (theta - d[1:score]))) / sum(exp(cumsum(D * (theta - d))))
}
# helper, get the graded response model likelihood
grmLikelihood <- function (theta, d, score, a, D=1.7) {
maxD <- length(d)
if(score == 0) {
pr <- 1/(1 + exp(D*a*(theta - d[(score+1)])))
}
else if(score == maxD) {
pr <- 1/(1 + exp(-D*a*(theta - d[score])))
} else {
pr <- 1/(1 + exp(D*a*(theta - d[(score+1)]))) - 1/(1 + exp(D*a*(theta - d[score])))
}
pr
}
slowHess <- function(K, rr2, rr2_, trr2mxb, X_, nodes.minus.XB, w, s2, s_) {
colSums_rr2_ <- colSums(rr2_)
H <- matrix(0, nrow=K+1, ncol=K+1)
denom <- colSums(rr2)
denom2 <- denom^2
denomPrimeList <- list()
for(i in 1:K) {
fi <- t( trr2mxb * X_[,i]/s2)
num <- colSums(fi)
for(j in i:K) {
fj <- t( trr2mxb * X_[,j]/s2)
numPrime <- colSums(t(t(rr2) * X_[,i] * X_[,j] / s2) - t( t(fj * nodes.minus.XB) * X_[,i])/s2)
denomPrime <- colSums(fj)
H[j,i] <- H[i,j] <- 2*sum(w*(numPrime * denom + num * denomPrime) / denom2)
}
# do numerical cross partial of variance term, taking care of the possibility of exp(x-1) transform
gr0 <- -2 * sum(w * num/denom)
gr_ <- -2 * sum(w * colSums( t( t((rr2_ * nodes.minus.XB)) * X_[,i]/s_^2))/colSums_rr2_)
H[K+1,i] <- H[i, K+1] <- (gr_ - gr0)/1e-6
}
return(H)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.