R/CVdeconv.r

CVdeconv <- function(n, W, phiU, phiK, muK2, RK, deltat, tt) {

# Authors: Aurore Delaigle
# This function computes the cross-validation (CV) bandwidth for kernel 
# deconvolution estimator as in Stefanski, L., Carroll, R.J. (1990). 
# Deconvoluting kernel density estimators. Statistics 2, 169–184. 
# Delaigle, A. and I. Gijbels (2004). Practical bandwidth selection in 
# deconvolution kernel density estimation, Computational Statistics and Data 
# Analysis, 45, 249-267

#----------------------------
# arguments:
#----------------------------

# n: sample size
# W: vector of univariate contaminated data
# phiU:function that gives the characteristic function of the error
#phiK: Fourier transfrom of the kernel. The default is (1-t^2)^3 on the interval [-1,1]
#muK2: second moment of the kernel, i.e. \int x^2 K(x) dx
#RK: integral of the square of the kernel, i.e. \int K^2(x) dx
#tt: vector of discrete t values on which you approximate the integrals in the Fourier domain. 
#	If phiK is compactly supported, the first and last elements of t must be the lower and upper bound of the support of phiK.
#	If phiK is not compactly supported, the first and last elements of t must be larger enough for your discretisation of the intergals to be accurate
#deltat: distance between two points of the t grid 


#------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#								WARNINGS:
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#In case of multiple bandwidth solutions, by default this code takes the largest solution: you can change this to your preferred way of breaking ties.
#Often if you plot CV you will see that the first few solutions seem unreasonable (CV fluctuates widely). You can take the first minimum that looks reasonable.
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------


# --------------------------------------------------------
# Preliminary calculations and initialisation of functions
# --------------------------------------------------------

	W  <- as.vector(W)

	#make sure t is a vector in the right format
	dim(tt) <- c(length(tt),1)

	#Define hgrid, the grid of h values where to search for a solution: you can change the default grid if no solution is found on this grid.
	maxh <- (max(W)-min(W))/10

	#normal reference bandwidth of the naive KDE estimator (estimator that ignores the errors) using the same kernel as above
	hnaive <- ((8*sqrt(pi)*RK/3/muK2^2)^0.2)*sqrt( stats::var(W) )*n^(-1/5)

	#grid of h values on which we will look for hCV, If you did not find a minimum on that grid you can redefine it
	hgrid <- seq(hnaive/3,maxh,(maxh-hnaive/3)/100)
	lh <- length(hgrid)
	dim(hgrid) <- c(1,lh)

	#Quantities that will be needed several times in the computations below
	toverh <- tt%*%(1/hgrid)
	phiU2 <- phiU(toverh)^2
	phiKt <- phiK(tt)

	#----------------------
	#Compute CV criterion
	#----------------------

	longh <- length(hgrid)
	CVcrit <- rep(0,longh)
	OO <- outer(tt, t(W))

	#Compute CV criterion for all values of h on the grid of h values

	for (j in seq_len(longh)) {
		h <- hgrid[j]

		#Estimate the square of the norm of the empirical characteristic function of W
		rehatphiW <- apply(cos(OO/h),1,sum)/n
		imhatphiW <- apply(sin(OO/h),1,sum)/n
		normhatphiW2 <- rehatphiW^2+imhatphiW^2
		
		#Compute CV
		CVcrit[j] <- sum(phiKt/phiU2[,j]*(normhatphiW2*((n-1)*phiKt-2*n)+2))
		CVcrit[j] <- CVcrit[j]/h
	}

	#----------------------
	#Find CV bandwidth
	#----------------------

	#Find the indices corresponding to all local minima of the CV curve
	indh <- which((CVcrit[2:(longh-1)] < CVcrit[1:(longh-2)]) & (CVcrit[2:(longh-1)] < CVcrit[3:(longh)]))

	#if did not find a local minimum, take the global minimun on the boundaries of the h grid
	if (length(indh)<1) {
		hCV <- which.min(CVcrit)
	} else {
		#In case of multiple solutions, take the largest bandwidth: you can change this to your preferred way of breaking ties
		hCV <- max(hgrid[indh])
	}

	return (hCV)
}
TimothyHyndman/deconvolve documentation built on May 13, 2019, 11:51 p.m.