## This functions accepts the following arguments:
## y: univariate outcome
## z: endogenous predictors
## w: instruments
## x: exogenous predictors
## zeval: optional evaluation data for the endogenous predictors
## weval: optional evaluation data for the instruments
## xeval: optional evaluation data for the exogenous predictors
## ... optional arguments for crs()
## This function returns a list with the following elements:
## phi: the IV estimator of phi(z) corresponding to the estimated
## derivative phihat(z)
## phi.prime: the IV derivative estimator
## phi.mat: the matrix with colums phi_1, phi_2 etc. over all iterations
## phi.prime.mat: the matrix with colums phi'_1, phi'_2 etc. over all iterations
## num.iterations: number of iterations taken by Landweber-Fridman
## norm.stop: the stopping rule for each Landweber-Fridman iteration
## norm.value: the norm not multiplied by the number of iterations
## convergence: a character string indicating whether/why iteration terminated
crsivderiv <- function(y,
z,
w,
x=NULL,
zeval=NULL,
weval=NULL,
xeval=NULL,
iterate.max=1000,
iterate.diff.tol=1.0e-08,
constant=0.5,
penalize.iteration=TRUE,
start.from=c("Eyz","EEywz"),
starting.values=NULL,
stop.on.increase=TRUE,
smooth.residuals=TRUE,
opts=list("MAX_BB_EVAL"=10000,
"EPSILON"=.Machine$double.eps,
"INITIAL_MESH_SIZE"="r1.0e-01",
"MIN_MESH_SIZE"=paste("r",sqrt(.Machine$double.eps),sep=""),
"MIN_POLL_SIZE"=paste("r",1,sep=""),
"DISPLAY_DEGREE"=0),
...) {
crs.messages <- getOption("crs.messages")
console <- newLineConsole()
## Basic error checking
if(!is.logical(stop.on.increase)) stop("stop.on.increase must be logical (TRUE/FALSE)")
if(!is.logical(smooth.residuals)) stop("smooth.residuals must be logical (TRUE/FALSE)")
start.from <- match.arg(start.from)
if(constant <= 0 || constant >=1) stop("constant must lie in (0,1)")
if(missing(y)) stop("You must provide y")
if(missing(z)) stop("You must provide z")
if(missing(w)) stop("You must provide w")
if(NCOL(y) > 1) stop("y must be univariate")
if(NROW(y) != NROW(z) || NROW(y) != NROW(w)) stop("y, z, and w have differing numbers of rows")
if(!is.null(x) && NROW(y) != NROW(x)) stop("y and x have differing numbers of rows")
if(iterate.max < 2) stop("iterate.max must be at least 2")
if(iterate.diff.tol < 0) stop("iterate.diff.tol must be non-negative")
## Cast as data frames
w <- data.frame(w)
z <- data.frame(z)
if(!is.null(x)) x <- data.frame(x)
## Check for evaluation data
if(is.null(zeval)) zeval <- z
if(is.null(weval)) weval <- w
if(!is.null(x) && is.null(xeval)) xeval <- x
## Set up formulas for multivariate w, z, and x if provided
wnames <- names(w)
znames <- names(z)
names(weval) <- wnames
names(zeval) <- znames
## If there exist exogenous regressors X, append these to the
## formulas involving Z (can be manually added to W by the user if
## desired)
if(!is.null(x)) {
xnames <- names(x)
names(xeval) <- xnames
}
## Now create evaluation data
if(is.null(x)) {
traindata <- data.frame(y,z,w)
evaldata <- data.frame(zeval,weval)
} else {
traindata <- data.frame(y,z,w,x)
evaldata <- data.frame(zeval,weval,xeval)
}
if(!is.null(starting.values) && (NROW(starting.values) != NROW(evaldata))) stop(paste("starting.values must be of length",NROW(evaldata)))
## Formulae for derivative estimation
formula.muw <- as.formula(paste("mu ~ ", paste(wnames, collapse= "+")))
formula.yw <- as.formula(paste("y ~ ", paste(wnames, collapse= "+")))
formula.phiw <- as.formula(paste("phi ~ ", paste(wnames, collapse= "+")))
if(is.null(x)) {
formula.yz <- as.formula(paste("y ~ ", paste(znames, collapse= "+")))
formula.Eywz <- as.formula(paste("E.y.w ~ ", paste(znames, collapse= "+")))
} else {
formula.yz <- as.formula(paste("y ~ ", paste(znames, collapse= "+"), " + ", paste(xnames, collapse= "+")))
formula.Eywz <- as.formula(paste("E.y.w ~ ", paste(znames, collapse= "+"), " + ", paste(xnames, collapse= "+")))
}
## Landweber-Fridman
## We begin the iteration computing phi.prime.0
console <- printClear(console)
console <- printPop(console)
if(is.null(x)) {
console <- printPush(paste("Computing optimal smoothing for f(z) and S(z) for iteration 1...",sep=""),console)
} else {
console <- printPush(paste("Computing optimal smoothing f(z) and S(z) for iteration 1...",sep=""),console)
}
## Note - here I am only treating the univariate case, so let's
## throw a stop with warning for now...
if(NCOL(z) > 1) stop(" This version supports univariate z only")
## For all results we need the density function for Z and the
## survivor function for Z (1-CDF of Z)
# require(np)
cat(paste("\rIteration ", 1, " of at most ", iterate.max,sep=""))
## Let's compute the bandwidth object for the unconditional density
## for the moment. Use the normal-reference rule for speed
## considerations (sensitivity analysis indicates this is not
## problematic).
bw <- npudensbw(dat=z,
bwmethod="normal-reference",
...)
model.fz <- npudens(tdat=z,
bws=bw$bw,
...)
f.z <- predict(model.fz,newdata=evaldata)
model.Sz <- npudist(tdat=z,
bws=bw$bw,
...)
S.z <- 1-predict(model.Sz,newdata=evaldata)
if(is.null(starting.values)) {
console <- printClear(console)
console <- printPop(console)
if(is.null(x)) {
console <- printPush(paste("Computing optimal smoothing for E(y|z) for iteration 1...",sep=""),console)
} else {
console <- printPush(paste("Computing optimal smoothing for E(y|z,x) for iteration 1...",sep=""),console)
}
if(start.from == "Eyz") {
## Start from E(Y|z)
if(crs.messages) options(crs.messages=FALSE)
model.E.y.z <- crs(formula.yz,
opts=opts,
data=traindata,
deriv=1,
...)
if(crs.messages) options(crs.messages=TRUE)
E.y.z <- predict(model.E.y.z,newdata=evaldata)
phi.prime <- attr(E.y.z,"deriv.mat")[,1]
} else {
## Start from E(E(Y|w)|z)
E.y.w <- fitted(crs(formula.yw,opts=opts,data=traindata,...))
model.E.E.y.w.z <- crs(formula.Eywz,opts=opts,data=traindata,deriv=1,...)
E.E.y.w.z <- predict(model.E.E.y.w.z,newdata=evaldata,...)
phi.prime <- attr(E.E.y.w.z,"deriv.mat")[,1]
}
} else {
phi.prime <- starting.values
}
## Step 1 - begin iteration - for this we require \varphi_0. To
## compute \varphi_{0,i}, we require \mu_{0,i}. For j=0 (first
## term in the series), \mu_{0,i} is Y_i.
console <- printClear(console)
console <- printPop(console)
if(is.null(x)) {
console <- printPush(paste("Computing optimal smoothing for E(y|w) (stopping rule) for iteration 1...",sep=""),console)
} else {
console <- printPush(paste("Computing optimal smoothing for E(y|w) (stopping rule) for iteration 1...",sep=""),console)
}
## NOTE - this presumes univariate z case... in general this would
## be a continuous variable's index
phi <- integrate.trapezoidal(z[,1],phi.prime)
## In the definition of phi we have the integral minus the mean of
## the integral with respect to z, so subtract the mean here
phi <- phi - mean(phi) + mean(y)
starting.values.phi <- phi
starting.values.phi.prime <- phi.prime
## For stopping rule...
if(crs.messages) options(crs.messages=FALSE)
model.E.y.w <- crs(formula.yw,
opts=opts,
data=traindata,
...)
if(crs.messages) options(crs.messages=TRUE)
E.y.w <- predict(model.E.y.w,newdata=evaldata)
norm.stop <- numeric()
## For the stopping rule, we require E.phi.w
if(crs.messages) options(crs.messages=FALSE)
model.E.phi.w <- crs(formula.phiw,
opts=opts,
data=traindata,
...)
if(crs.messages) options(crs.messages=TRUE)
E.phi.w <- predict(model.E.phi.w,newdata=evaldata)
## Now we compute mu.0 (a residual of sorts)
mu <- y - phi
## Now we repeat this entire process using mu = y - phi.0 rather
## than y
mean.mu <- mean(mu)
if(smooth.residuals) {
## Smooth residuals (smooth of (y-phi) on w)
if(crs.messages) options(crs.messages=FALSE)
model.E.mu.w <- crs(formula.muw,
opts=opts,
data=traindata,
...)
## We require the fitted values...
predicted.model.E.mu.w <- predict(model.E.mu.w,newdata=evaldata)
if(crs.messages) options(crs.messages=TRUE)
## We again require the mean of the fitted values
mean.predicted.model.E.mu.w <- mean(predicted.model.E.mu.w)
} else {
## Not smoothing residuals (difference of E(Y|w) and smooth of phi
## on w)
if(crs.messages) options(crs.messages=FALSE)
model.E.phi.w <- crs(formula.phiw,
opts=opts,
data=traindata,
...)
## We require the fitted values...
predicted.model.E.mu.w <- E.y.w - predict(model.E.phi.w,newdata=evaldata)
if(crs.messages) options(crs.messages=TRUE)
## We again require the mean of the fitted values
mean.predicted.model.E.mu.w <- mean(E.y.w) - mean(predicted.model.E.mu.w)
}
norm.stop[1] <- sum(predicted.model.E.mu.w^2)/sum(E.y.w^2)
## Now we compute T^* applied to mu
cdf.weighted.average <- npksum(txdat=z,
exdat=zeval,
tydat=as.matrix(predicted.model.E.mu.w),
operator="integral",
bws=bw$bw)$ksum/nrow(traindata)
survivor.weighted.average <- mean.predicted.model.E.mu.w - cdf.weighted.average
T.star.mu <- (survivor.weighted.average-S.z*mean.mu)/f.z
## Now we update phi.prime.0, this provides phi.prime.1, and now
## we can iterate until convergence... note we replace phi.prime.0
## with phi.prime.1 (i.e. overwrite phi.prime)
phi.prime <- phi.prime + constant*T.star.mu
phi.prime.mat <- phi.prime
phi.mat <- phi
## This we iterate...
for(j in 2:iterate.max) {
## Save previous run in case stop norm increases
cat(paste("\rIteration ", j, " of at most ", iterate.max,sep=""))
console <- printClear(console)
console <- printPop(console)
if(is.null(x)) {
console <- printPush(paste("Computing optimal smoothing and phi(z) for iteration ", j,"...",sep=""),console)
} else {
console <- printPush(paste("Computing optimal smoothing and phi(z,x) for iteration ", j,"...",sep=""),console)
}
## NOTE - this presumes univariate z case... in general this would
## be a continuous variable's index
phi <- integrate.trapezoidal(z[,1],phi.prime)
## In the definition of phi we have the integral minus the mean of
## the integral with respect to z, so subtract the mean here
phi <- phi - mean(phi) + mean(y)
## Now we compute mu.0 (a residual of sorts)
mu <- y - phi
## Now we repeat this entire process using mu = y = phi.0 rather than y
## Next, we regress require \mu_{0,i} W
if(smooth.residuals) {
## Smooth residuals (smooth of (y-phi) on w)
if(crs.messages) options(crs.messages=FALSE)
model.E.mu.w <- crs(formula.muw,
opts=opts,
data=traindata,
...)
## We require the fitted values...
predicted.model.E.mu.w <- predict(model.E.mu.w,newdata=evaldata)
if(crs.messages) options(crs.messages=TRUE)
## We again require the mean of the fitted values
mean.predicted.model.E.mu.w <- mean(predicted.model.E.mu.w)
} else {
## Not smoothing residuals (difference of E(Y|w) and smooth of
## phi on w)
if(crs.messages) options(crs.messages=FALSE)
model.E.phi.w <- crs(formula.phiw,
opts=opts,
data=traindata,
...)
## We require the fitted values...
predicted.model.E.mu.w <- E.y.w - predict(model.E.phi.w,newdata=evaldata)
if(crs.messages) options(crs.messages=TRUE)
## We again require the mean of the fitted values
mean.predicted.model.E.mu.w <- mean(E.y.w) - mean(predicted.model.E.mu.w)
}
norm.stop[j] <- ifelse(penalize.iteration,j*sum(predicted.model.E.mu.w^2)/sum(E.y.w^2),sum(predicted.model.E.mu.w^2)/sum(E.y.w^2))
## Now we compute T^* applied to mu
cdf.weighted.average <- npksum(txdat=z,
exdat=zeval,
tydat=as.matrix(predicted.model.E.mu.w),
operator="integral",
bws=bw$bw)$ksum/nrow(traindata)
survivor.weighted.average <- mean.predicted.model.E.mu.w - cdf.weighted.average
T.star.mu <- (survivor.weighted.average-S.z*mean.predicted.model.E.mu.w)/f.z
## Now we update, this provides phi.prime.1, and now we can iterate until convergence...
phi.prime <- phi.prime + constant*T.star.mu
phi.prime.mat <- cbind(phi.prime.mat,phi.prime)
phi.mat <- cbind(phi.mat,phi)
## The number of iterations in LF is asymptotically equivalent to
## 1/alpha (where alpha is the regularization parameter in
## Tikhonov). Plus the criterion function we use is increasing
## for very small number of iterations. So we need a threshold
## after which we can pretty much confidently say that the
## stopping criterion is decreasing. In Darolles et al. (2011)
## \alpha ~ O(N^(-1/(min(beta,2)+2)), where beta is the so called
## qualification of your regularization method. Take the worst
## case in which beta = 0 and then the number of iterations is ~
## N^0.5. Note that derivative estimation seems to require more
## iterations hence the heuristic sqrt(N)
if(j > round(sqrt(nrow(traindata))) && !is.monotone.increasing(norm.stop)) {
## If stopping rule criterion increases or we are below stopping
## tolerance then break
if(stop.on.increase && norm.stop[j] > norm.stop[j-1]) {
convergence <- "STOP_ON_INCREASE"
break()
}
if(abs(norm.stop[j-1]-norm.stop[j]) < iterate.diff.tol) {
convergence <- "ITERATE_DIFF_TOL"
break()
}
}
convergence <- "ITERATE_MAX"
}
## Extract minimum, and check for monotone increasing function and
## issue warning in that case. Otherwise allow for an increasing
## then decreasing (and potentially increasing thereafter) portion
## of the stopping function, ignore the initial increasing portion,
## and take the min from where the initial inflection point occurs
## to the length of norm.stop
norm.value <- norm.stop/(1:length(norm.stop))
if(which.min(norm.stop) == 1 && is.monotone.increasing(norm.stop)) {
warning("Stopping rule increases monotonically (consult model$norm.stop):\nThis could be the result of an inspired initial value (unlikely)\nNote: we suggest manually choosing phi.0 and restarting (e.g. instead set `starting.values' to E[E(Y|w)|z])")
convergence <- "FAILURE_MONOTONE_INCREASING"
# phi <- starting.values.phi
# phi.prime <- starting.values.phi.prime
j <- 1
while(norm.value[j+1] > norm.value[j]) j <- j + 1
j <- j-1 + which.min(norm.value[j:length(norm.value)])
phi <- phi.mat[,j]
phi.prime <- phi.prime.mat[,j]
} else {
## Ignore the initial increasing portion, take the min to the
## right of where the initial inflection point occurs
j <- 1
while(norm.stop[j+1] > norm.stop[j]) j <- j + 1
j <- j-1 + which.min(norm.stop[j:length(norm.stop)])
phi <- phi.mat[,j]
phi.prime <- phi.prime.mat[,j]
}
console <- printClear(console)
console <- printPop(console)
if(j == iterate.max) warning(" iterate.max reached: increase iterate.max or inspect norm.stop vector")
return(list(phi=phi,
phi.prime=phi.prime,
phi.mat=phi.mat,
phi.prime.mat=phi.prime.mat,
num.iterations=j,
norm.stop=norm.stop,
norm.value=norm.value,
convergence=convergence,
starting.values.phi=starting.values.phi,
starting.values.phi.prime=starting.values.phi.prime))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.