## FLAG NEED TO CHANGE TO CARROLL NOTATION FOR REST
npmle_deconv_biv <- function(X, csem, umin = c(-5,-5), umax = c(5,5), ngrid = c(100,100), lambda = 0.0005, lltol = 1e-7, psmall = 0.000005, discrete=FALSE, quietly = FALSE, startmiddle=FALSE){
## bivariate nonparametric MLE of distribution of error-free variables "(U1,U2)" given
## error-prone variables "(X1,X2)" where Xi = Ui + ei and ei|Ui ~ N(Ui, csem_i^2(Ui))
##
## see npmle_deconv for more details
##
## NOTE: csem is passed a list of functions csem[[i]] is csem fxn for variable i
## NOTE: reduced default on psmall because now we are dealing with joint probabilities
## NOTE: reduced the .05 on new value to 0.01 because it was taking very long
## NOTE: startmiddle==TRUE mean algorithm will start at values near middle of grid
stopifnot( all(!is.na(X)) && is.function(csem[[1]]) && is.function(csem[[2]]) && all(umin < umax) && (lambda > 0) && (lltol > 0) )
## NOTE: we collapse over repeat X values when possible, using "counts" to contribute to likelihood
.X <- unique(X)
.X <- .X[order(.X[,1], .X[,2]),]
nX <- nrow(.X)
.counts <- sapply(1:nX, function(i){ sum( (X[,1] == .X[i,1]) & (X[,2] == .X[i,2]) ) })
stopifnot(sum(.counts) == nrow(X))
X <- .X
## functions to map probabilities to reduced-dimension unconstrained scale, and inverse
theta_to_p <- function(theta){
if(length(theta) == 0){
return(1)
} else {
p <- exp(theta) / (1 + sum(exp(theta)))
return(c(p, 1-sum(p)))
}
}
p_to_theta <- function(p){
stopifnot( all(p > 0) && (abs((1 - sum(p))) < 1e-9) )
if(length(p) == 1){
return(numeric(0))
} else {
last <- length(p)
return(log(p[1:(last-1)] * (1 + ((1-p[last])/p[last])) ))
}
}
## CHECKS:
## p <- c(0.1, 0.2, 0.4, 0.3); print(ma(p - theta_to_p(p_to_theta(p))))
## theta <- c(-3.2, 1.0, -1.0); print(ma(theta - p_to_theta(theta_to_p(theta))))
## print(ma(1 - theta_to_p(p_to_theta(1))))
## p_to_theta(theta_to_p(numeric(0)))
## build (nX x ngrid) matrix of conditional densities by multipling due to local independence, and
## then vec'ing the grid. NOTE there is probably a better way to do this
grid <- expand.grid(seq(from=umin[1], to=umax[1], length=ngrid[1]), seq(from=umin[2], to=umax[2], length=ngrid[2]))
names(grid) <- c("u1","u2")
ngrid <- nrow(grid)
grid$index <- 1:ngrid
grid$csem1 <- csem[[1]](grid$u1)
grid$csem2 <- csem[[2]](grid$u2)
fxu <- matrix(0, nrow=nX, ncol=ngrid)
if(!discrete){
for(i in 1:nX){
fxu[i,] <- dnorm(rep(X[i,1],ngrid), mean=grid$u1, sd = grid$csem1) * dnorm(rep(X[i,2],ngrid), mean=grid$u2, sd = grid$csem2)
}
} else { ## uses pxgu() and conditional independence of (X1,X2) given (U1,U2)
## FLAG FIXME this is inefficient
.tab1 <- su(.X[,1])
.tab2 <- su(.X[,2])
p1 <- lapply(1:ngrid, function(i){ pxgu(grid$u1[i], grid$csem1[i], .tab1) })
p2 <- lapply(1:ngrid, function(i){ pxgu(grid$u2[i], grid$csem2[i], .tab2) })
p1 <- do.call("cbind", lapply(p1, function(x){ x[,"p"] })) ## sorted unique scale scores on rows, conditional probs at each grid point on columns
p2 <- do.call("cbind", lapply(p2, function(x){ x[,"p"] })) ## sorted unique scale scores on rows, conditional probs at each grid point on columns
for(i in 1:nX){
fxu[i,] <- p1[which(.tab1 == X[i,1]),] * p2[which(.tab2 == X[i,2]),]
}
rm(p1,p2)
gc()
}
## negative log likelihood for a set of probabilities given ".inds" which are
## indices of "grid" and which are continually updated
## NOTE: updated to use counts
negll <- function(theta){
-sum(.counts * log(as.vector(fxu[,.inds] %*% theta_to_p(theta))))
}
## find initial best grid point. NOTE this will often want the u with the
## biggest CSEM because a single point is inconsistent with most X unless the
## CSEM is large. the exception is if we pick ugrid to be very large, then it
## will find interior point. however even if it picks an extreme starting
## point, that point will be dropped later if it is too far in the tails.
ll <- apply(fxu, 2, function(x){ sum(.counts * log(x)) })
if(startmiddle){
.inds <- which.min( (grid$u1 - mean(grid$u1))^2 + (grid$u2 - mean(grid$u2))^2 )
} else {
.inds <- which.max(ll)
}
.probs <- 1
.eligible <- setdiff(1:ngrid, .inds)
ll.current <- ll[.inds]
.history <- list()
tmp <- grid[.inds,]
tmp$p <- 1
tmp$ll <- ll.current
tmp$eu1 <- tmp$u1
tmp$eu2 <- tmp$u2
tmp$varu1 <- tmp$varu2 <- 0
tmp$cor <- 0
.history[[1]] <- tmp
## now add grid points per the algorithm until there is no improvement
done <- FALSE
while(!done){
## evaluate each eligible point with a weight "lambda"
part.static <- as.vector(fxu[,.inds,drop=FALSE] %*% ((1 - lambda)*.probs))
part.total <- matrix(part.static, ncol=length(.eligible), nrow=nX) + (fxu[,.eligible] * lambda)
ll.candidate <- apply(part.total, 2, function(x){ sum(.counts * log(x)) })
if(all(ll.candidate - ll.current < lltol)){
done <- TRUE
} else {
w <- which.max(ll.candidate - ll.current)
.inds <- c(.inds, .eligible[w])
.eligible <- setdiff(.eligible, .eligible[w])
## set starting value: mass 0.01 at new value, normalized p on existing values
o <- optim(p_to_theta(c(0.99*(.probs/sum(.probs)), 0.01)), negll, method = "BFGS", control = list(maxit=1000, trace=5*(1 - as.numeric(quietly)), REPORT = 1))
.probs <- theta_to_p(o$par)
ll.current <- -o$value
## sometimes we might have picked up an early grid point but now it has virtually no probability, so dump it
w <- which(.probs < psmall)
if(length(w) > 0){
.probs <- .probs[-w]
.probs <- .probs/sum(.probs)
.inds <- .inds[-w]
}
## reorder the grid if need be
o <- order(.inds)
.inds <- .inds[o]
.probs <- .probs[o]
## summarize the state
tmp <- grid[.inds,]
tmp$p <- .probs
tmp$ll <- ll.current
tmp$eu1 <- sum(tmp$u1 * tmp$p)
tmp$eu2 <- sum(tmp$u2 * tmp$p)
tmp$varu1 <- sum(tmp$u1^2 * tmp$p) - (su(tmp$eu1))^2
tmp$varu2 <- sum(tmp$u2^2 * tmp$p) - (su(tmp$eu2))^2
tmp$cor <- (sum(tmp$u1 * tmp$u2 * tmp$p) - (su(tmp$eu1) * su(tmp$eu2))) / sqrt( su(tmp$varu1) * su(tmp$varu2))
.history[[length(.history)+1]] <- tmp
if(!quietly){
print(.history[[length(.history)]])
cat("\n\n\n")
}
}
}
## calculate nonparametric estimate of conditional mean fxn
## WARNING- extremely noisy
pu <- .history[[length(.history)]]
tmp <- data.frame(u1 = su(pu$u1), eu2 = 0)
for(i in 1:nrow(tmp)){
z <- subset(pu, u1 == tmp$u1[i])
tmp$eu2[i] <- weighted.mean(z$u2, w = z$p)
}
## a different way using moving average to smooth
## tmp <- data.frame(u1 = seq(from=min(pu$u1), to=max(pu$u1), length=1000), eu2 = 0)
## for(i in 1:nrow(tmp)){
## z <- subset(pu, (u1 > tmp$u1[i] - 0.6) & (u1 < tmp$u1[i] + 0.6))
## tmp$eu2[i] <- weighted.mean(z$u2, w = z$p)
## }
## return
return(list(.history = .history, pu = pu, eu2gu1 = tmp))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.