R/selector.R
In ks: Kernel Smoothing

Documented in Hbcv Hbcv.diag hlscv Hlscv Hlscv.diag hnm Hnm Hnm.diag hns Hns Hns.diag hpi Hpi Hpi.diag hscv Hscv Hscv.diag hucv Hucv Hucv.diag

#############################################################################
## Estimate g_AMSE pilot bandwidths for even orders - 2-dim
##
## Parameters
## r - (r1, r2) partial derivative
## n - sample size
## psi1 - psi_(r + (2,0))
## psi2 - psi_(r + (0,2))
##
## Returns
## g_AMSE pilot bandwidths for even orders
#############################################################################
gamse.even.2d <- function(r, n, psi1, psi2)
{
    d <- 2
    num <- -2 * dmvnorm.deriv(x=c(0,0), deriv.order=r, Sigma=diag(2), deriv.vec=FALSE)
    den <- (psi1 + psi2) * n   
    g.amse <- (num/den)^(1/(2 + d + sum(r)))

    return(g.amse)
}

#############################################################################
## Estimate g_AMSE pilot bandwidths for odd orders - 2-dim
##
## Parameters
##
## r - (r1, r2) partial derivative
## n - sample size
## psi1 - psi_(r + (2,0))
## psi2 - psi_(r + (0,2))
## psi00 - psi_(0,0)
## RK - R(K^(r))
## 
## Returns
## g_AMSE pilot bandwidths for odd orders
#############################################################################
gamse.odd.2d <- function(r, n, psi1, psi2, psi00, RK)
{  
    d <- 2
    num <- 2 * psi00 * (2 * sum(r) + d) * RK
    den <- (psi1 + psi2)^2 * n^2
    g.amse <- (num/den)^(1/(2*sum(r) + d + 4))

    return(g.amse)
}

#############################################################################
## Estimate g_SAMSE pilot bandwidth - 2- to 6-dim 
##
## Parameters
## Sigma.star - scaled variance matrix
## n - sample size
##
## Returns
## g_SAMSE pilot bandwidth
#############################################################################
gsamse <- function(Sigma.star, n, modr, nstage=1, psihat=NULL)
{
    d <- ncol(Sigma.star)
    K <- numeric(); psi <- numeric()

    ## 4th order g_SAMSE
    K <- dmvnorm.deriv(x=rep(0,d), deriv.order=modr, Sigma=diag(d), add.index=TRUE, deriv.vec=FALSE)
    K <- K$deriv[apply(K$deriv.ind, 1, is.even)]

    if (modr==4)
    {
        derivt4 <- dmvnorm.deriv(x=rep(0,d), deriv.order=4, add.index=TRUE, deriv.vec=FALSE, only.index=TRUE)
        derivt6 <- dmvnorm.deriv(x=rep(0,d), deriv.order=6, add.index=TRUE, deriv.vec=FALSE, only.index=TRUE)

        for (i in 1:nrow(derivt4))
        {
            r <- derivt4[i,]
            if (is.even(r))
            {
                A3psi <- 0
                for (j in 1:d)
                {
                    if (nstage==1) A3psi <- A3psi + psins(r=r+2*elem(j,d), Sigma=Sigma.star)
                    else if (nstage==2) A3psi <- A3psi + psihat[which.mat(r=r+2*elem(j,d), mat=derivt6)]
                }
                psi <- c(psi, A3psi)    
            }
        }
    }
    ## 6th order g_SAMSE
    else if (modr==6)
    {
        derivt6 <- dmvnorm.deriv(x=rep(0,d), deriv.order=6, add.index=TRUE, deriv.vec=FALSE, only.index=TRUE)

        for (i in 1:nrow(derivt6))
        {
            r <- derivt6[i,]        
            if (is.even(r))
            {
                A3psi <- 0
                for (j in 1:d)
                A3psi <- A3psi + psins(r=r+2*elem(j,d), Sigma=Sigma.star)
                psi <- c(psi, A3psi)
            }
        }
    }

    ## see thesis for formula
    A1 <- sum(K^2)
    A2 <- sum(K * psi)  
    A3 <- sum(psi^2)
    B1 <- (2*modr + 2*d)*A1
    B2 <- (modr + d - 2)*A2
    B3 <- A3
    gamma1 <- (-B2 + sqrt(B2^2 + 4*B1*B3)) / (2*B1)
    g.samse <- (gamma1 * n)^(-1/(modr + d + 2))

    return (g.samse)      
}

############################################################################
## Scalar pilot selector for derivatives r>0 from Chacon & Duong (2011)
## Generalisation of gsamse for r>0
############################################################################
gdscalar <- function(x, d, r, n, verbose, binned=FALSE, nstage=1, scv=FALSE)
{
    if (scv) cf <- c(2^(-d), 2^(-d/2+1), 4)
    else cf <- c(1,1,1)
    S <- var(x)
    Sinv <- chol2inv(chol(S))

    if (nstage==1)
    {
        psi2r6.ns <- psins(r=2*r+6, Sigma=S)
        B3 <- norm(Matrix(psi2r6.ns, nrow=1) %*% (Matrix(vec(diag(d)), ncol=1) %*% Matrix(vec(diag(d)), nrow=1) %x% Diagonal(d^(2*r+4))) %*% Matrix(psi2r6.ns, ncol=1), type="1")
        if (!scv)
        {
            B1 <- 2*(2*pi)^(-d)*OF(2*r+4)*prod(d+2*(0:(r+2)))  
            B2 <- -(d+2*r+2)*2^(-d/2-r)*(2*pi)^(-d)*det(S)^(-1/2)*OF(2*r+4)*nu(r=r+2, A=Sinv)
        }
        else
        {
            B1 <- 2^(-2*r-3)*(4*pi)^(-d)*OF(2*r+6)*prod(d+2*(0:(r+2)))
            B2 <- -(d+2*r+2)*2^(-2*r-4)*(4*pi)^(-d)*det(S)^(-1/2)*OF(2*r+4)*nu(r=r+2, A=Sinv)
            B3 <- 4*B3
        }
        g2r4 <- (2*B1/(-B2 + sqrt(B2^2 + 4*B1*B3)))^(1/(d+2*r+8))*n^(-1/(d+2*r+8))
    }
    else if (nstage==2)
    {
        psi2r8.ns <- psins(r=2*r+8, Sigma=S)
        B3 <- norm(Matrix(psi2r8.ns, nrow=1) %*% (Matrix(vec(diag(d)), ncol=1) %*% Matrix(vec(diag(d)), nrow=1) %x% Diagonal(d^(2*r+6))) %*% Matrix(psi2r8.ns, ncol=1), type="1")
        if (!scv)
        {
            B1 <- 2*(2*pi)^(-d)*OF(2*r+6)*prod(d+2*(0:(r+3)))  
            B2 <- -(d+2*r+4)*2^(-d/2-r+2)*(2*pi)^(-d)*det(S)^(-1/2)*OF(2*r+6)*nu(r=r+4, A=Sinv)
        }
        else
        {
            B1 <- 2^(-2*r-5)*(4*pi)^(-d)*OF(2*r+6)*prod(d+2*(0:(r+3)))
            B2 <- -(d+2*r+4)*2^(-2*r-6)*(4*pi)^(-d)*det(S)^(-1/2)*OF(2*r+6)*nu(r=r+4, A=Sinv)
            B3 <- 4*B3
        }
        g2r6.ns <- (2*B1/(-B2 + sqrt(B2^2 + 4*B1*B3)))^(1/(d+2*r+8))*n^(-1/(d+2*r+8))
        L0 <- dmvnorm.mixt(x=rep(0,d), mus=rep(0,d), Sigmas=diag(d), props=1)
        if (binned)
        eta2r6 <- drop(kfe(x=x, G=g2r6.ns^2*diag(d), inc=1, binned=binned, deriv.order=2*r+6, add.index=FALSE, verbose=verbose) %*% vec(diag(d^(r+3))))
        else
        eta2r6 <- Qr(x=x, deriv.order=2*r+6, Sigma=g2r6.ns^2*diag(d), inc=1)
        
        A1 <- cf[1]*(2*d+4*r+8)*L0^2*OF(2*r+4)*nu(r=r+2, A=diag(d))
        A2 <- cf[2]*(d+2*r+2)*L0*OF(2*r+4)*eta2r6
        A3 <- cf[3]*eta2r6^2
        
        g2r4 <- (2*A1/((-A2+ sqrt(A2^2 +4*A1*A3))*n))^(1/(d+2*r+6))
    }
    return(g2r4)
}

############################################################################
## Unconstrained pilot selector for derivatives r>0 from Chacon & Duong (2011)
## Generalisation of Gunconstr for r>0
############################################################################
Gdunconstr <- function(x, d, r, n, nstage=1, verbose, binned=FALSE, scv=FALSE, optim.fun="optim")
{
    if (scv) cf <- c(2^(-(d/2+r+2)), 2)
    else cf <- c(1,1)
    S <- var(x) 
    S12 <- matrix.sqrt(S)

    optim.fun <- match.arg(optim.fun, c("nlm", "optim")) 
    if (nstage==2)
    {
        # G2r6.NR <- Gns(r=2*r+6,n=n,Sigma=S, scv=scv)
        # vecPsi2r6 <- kfe(x=x, G=G2r6.NR, binned=binned, deriv.order=2*r+6, deriv.vec=TRUE, add.index=FALSE, verbose=verbose)   
        x.star <- pre.sphere(x)
        G2r6.NR <- Gns(r=2*r+6,n=n,Sigma=var(x.star), scv=scv)
        vecPsi2r6 <- kfe(x=x.star, G=G2r6.NR, binned=binned, deriv.order=2*r+6, deriv.vec=TRUE, add.index=FALSE, verbose=verbose)   

        dls <- (0:(d^2-1))*d^(2*r+4) 

        AB2 <- function(vechG)
        {
            G <- invvech(vechG) %*% invvech(vechG)
            Ginv <- chol2inv(chol(G))
            ## direct computation
            v1 <- n^(-1)*det(G)^(-1/2)*(2*pi)^(-d/2)*(-1)^(r+2)*OF(2*r+4)* Sdrv(d=d,r=2*r+4,v=Kpow(vec(Ginv),r+2))

            v2 <- numeric(d^(2*r+4))
            for(k in 1:d^(2*r+4)) { v2[k] <- sum(vec(G)*vecPsi2r6[dls+k]) }

            AB <- cf[1]*v1 + cf[2]*(1/2)*v2
            AB2.val <- sum(AB^2)
            return(AB2.val)
        }

        Gstart <- Gns(r=2*r+4, n=n, Sigma=var(x.star), scv=scv)
        Gstart <- matrix.sqrt(Gstart)
        
        if (optim.fun=="nlm")
        {
            result <- nlm(p=vech(Gstart), f=AB2, print.level=2*as.logical(verbose))
            G2r4 <- result$estimate
        } 
        else if (optim.fun=="optim")
        { 
            result <- optim(par=vech(Gstart), fn=AB2, method="BFGS", control=list(trace=as.numeric(verbose), REPORT=1))
            G2r4 <- result$par
        }
        G2r4 <- invvech(G2r4) %*% invvech(G2r4)
        G2r4 <- S12 %*% G2r4 %*% S12
    }
    else
    {
        G2r4 <- Gns(r=2*r+4, n=n, Sigma=S, scv=scv)
    }
    
    return(G2r4)
}

############################################################################
## Estimate psi functionals using 1-stage plug-in 
##
## Parameters
## x.star - pre-transformed data points
## pilot - "amse" = different AMSE pilot bandwidths
##       - "samse" = optimal SAMSE pilot bandwidth
##
## Returns
## estimated psi functionals
#############################################################################
psifun1 <- function(x.star, pilot="samse", binned, bin.par, deriv.order=0, verbose=FALSE)
{
    d <- ncol(x.star)
    r <- deriv.order
    S.star <- var(x.star)
    n <- nrow(x.star)

    ## pilots are based on (2r+4)-th order derivatives
    ## compute 1 pilot for SAMSE
    if (pilot=="samse")
    {
        g.star <- gsamse(S.star, n, 4)
        psihat.star <- kfe(x=x.star, G=g.star^2*diag(d), deriv.order=4, deriv.vec=TRUE, binned=binned, add.index=TRUE, verbose=verbose)
    }
    ## compute 5 different pilots for AMSE
    else if ((pilot=="amse") & (d==2))
    {
        derivt4 <- dmvnorm.deriv(x=rep(0,d), deriv.order=4, add.index=TRUE, deriv.vec=FALSE, only.index=TRUE)
        derivt4.vec <- dmvnorm.deriv(x=rep(0,d), deriv.order=4, add.index=TRUE, deriv.vec=TRUE, only.index=TRUE)

        RK31 <- 15/(64*pi)
        psi00 <- psins(r=c(0,0), Sigma=S.star) 
        psihat.star <- vector()
        g.star <- vector()

        for (k in 1:nrow(derivt4))
        { 
            r <- derivt4[k,]
            psi1 <- psins(r=r + 2*elem(1, 2), Sigma=S.star)
            psi2 <- psins(r=r + 2*elem(2, 2), Sigma=S.star)

            ## odd order
            if (prod(r) == 3) g.star[k] <- gamse.odd.2d(r=4, n, psi1, psi2, psi00, RK31)
            ## even order
            else g.star[k] <- gamse.even.2d(r=4, n, psi1, psi2)[k]
            psihat.star[k] <- kfe.scalar(x=x.star, deriv.order=r, g=g.star[k], binned=binned, bin.par=bin.par)
        }

        ## create replicated form of psihat
        psihat.star.vec <- rep(0, nrow(derivt4.vec))
        for (k in 1:nrow(derivt4.vec))
        psihat.star.vec[k] <- psihat.star[which.mat(r=derivt4.vec[k,], mat=derivt4)]

        psihat.star <- list(psir=psihat.star.vec, deriv.ind=derivt4.vec)
    }

    return(psihat.star)
}

#############################################################################
## Estimate psi functionals using 2-stage plug-in 
##
## Parameters
## x - pre-transformed data points
## pilot - "amse" - different AMSE pilot
##       - "samse" - SAMSE pilot
## Returns
## estimated psi functionals
#############################################################################
psifun2 <- function(x.star, pilot="samse", binned, bin.par, deriv.order=0, verbose=FALSE)
{ 
    d <- ncol(x.star)
    r <- deriv.order
    S.star <- var(x.star)
    n <- nrow(x.star)

    ## pilots are based on (2r+4)-th order derivatives
    ## compute 1 pilot for SAMSE    
    if (pilot=="samse")
    {
        g6.star <- gsamse(S.star, n=n, modr=6)
        psihat6.star <- kfe(x=x.star, G=g6.star^2*diag(d), deriv.order=6, deriv.vec=TRUE, binned=binned, add.index=FALSE, verbose=verbose)
        g.star <- gsamse(S.star, n=n, modr=4, nstage=2, psihat=psihat6.star)
        psihat.star <- kfe(x=x.star, G=g.star^2*diag(d), deriv.order=4, deriv.vec=TRUE, binned=binned, add.index=TRUE, verbose=verbose)
    }
    ## compute different pilots for AMSE
    else if ((pilot=="amse") & (d==2))
    {
        derivt4 <- dmvnorm.deriv(x=rep(0,d), deriv.order=4, add.index=TRUE, deriv.vec=FALSE, only.index=TRUE)
        derivt4.vec <- dmvnorm.deriv(x=rep(0,d), deriv.order=4, add.index=TRUE, deriv.vec=TRUE, only.index=TRUE)
        derivt6 <- dmvnorm.deriv(x=rep(0,d), deriv.order=6, add.index=TRUE, deriv.vec=FALSE, only.index=TRUE)

        RK31 <- 15/(64*pi)
        RK51 <- 945/(256*pi)
        RK33 <- 225/(256*pi)
        psi00 <- psins(r=rep(0,d), Sigma=S.star) 

        psihat6.star <- vector()
        g6.star <- vector()
        psihat.star <- vector()
        g.star <- vector()

        for (k in 1:nrow(derivt6))
        {
            r <- derivt6[k,]
            psi1 <- psins(r=r + 2*elem(1, 2), Sigma=S.star)
            psi2 <- psins(r=r + 2*elem(2, 2), Sigma=S.star)
            if (prod(r) == 5)
            g6.star[k] <- gamse.odd.2d(r=6, n, psi1, psi2, psi00, RK51)
            else if (prod(r) == 9)
            g6.star[k] <- gamse.odd.2d(r=6, n, psi1, psi2, psi00, RK33) 
            else  
            g6.star[k] <- gamse.even.2d(r=6, n, psi1, psi2)[k]

            psihat6.star[k] <- kfe.scalar(x=x.star, deriv.order=r, g=g6.star[k], binned=binned, bin.par=bin.par) 
        }

        ## pilots are based on 4th order derivatives using 6th order psi functionals
        ## computed above 'psihat6.star'
        for (k in 1:nrow(derivt4))
        {
            r <- derivt4[k,]
            psi1 <- psihat6.star[7 - (r + 2*elem(1,2))[1]]
            psi2 <- psihat6.star[7 - (r + 2*elem(2,2))[1]]

            if (prod(r) == 3)
            g.star[k] <- gamse.odd.2d(r=4, n, psi1, psi2, psi00, RK31)
            else
            g.star[k] <- gamse.even.2d(r=4, n, psi1, psi2)[k]

            psihat.star[k] <- kfe.scalar(x=x.star, deriv.order=r, g=g.star[k],  binned=binned, bin.par=bin.par) 
        }

        ## create replicated form of psihat
        psihat.star.vec <- rep(0, nrow(derivt4.vec))
        for (k in 1:nrow(derivt4.vec))
        psihat.star.vec[k] <- psihat.star[which.mat(r=derivt4.vec[k,], mat=derivt4)]

        psihat.star <- list(psir=psihat.star.vec, deriv.ind=derivt4.vec)
    }

    return(psihat.star)
}

#############################################################################
## Estimate psi functionals for 6-variate data using 1-stage plug-in 
## with unconstrained pilot
##
## Parameters
## x - data points
## Sd4, Sd6 - symmetrizer matrices of order 4 and 6
##
## Returns
## estimated psi functionals
#############################################################################

psifun1.unconstr <- function(x, binned, bgridsize, deriv.order=0, verbose=FALSE)
{
    n <- nrow(x)
    r <- deriv.order
    S <- var(x)

    ## stage 1 of plug-in
    G2r4 <- Gns(r=2*r+4,n=n,Sigma=S) 
    vecPsi2r4 <- kfe(x=x, G=G2r4, deriv.order=2*r+4, binned=binned, bgridsize=bgridsize, deriv.vec=TRUE, add.index=FALSE, verbose=verbose) 
    return (vecPsi2r4)
}

#############################################################################
## Estimate psi functionals for 6-variate data using 2-stage plug-in 
## with unconstrained pilot
##
## Parameters
## x - data points
## Sd4, Sd6 - symmetrizer matrices of order 4 and 6
##
## Returns
## estimated psi functionals
############################################################################

psifun2.unconstr <- function(x, binned, bgridsize, deriv.order=0, verbose=FALSE, optim.fun="optim")
{
    d <- ncol(x)
    n <- nrow(x)
    S <- var(x)
    S12 <- matrix.sqrt(S)
    r <- deriv.order
    optim.fun <- match.arg(optim.fun, c("nlm", "optim"))

    ## stage 1 of plug-in
    x.star <- pre.sphere(x)
    G2r6 <- Gns(r=2*r+6,n=n,Sigma=var(x.star)) 
    vecPsi2r6 <- kfe(x=x.star, G=G2r6, binned=binned, bgridsize=bgridsize, deriv.order=2*r+6, deriv.vec=TRUE, add.index=FALSE, verbose=verbose)

    ## asymptotic squared bias for r = 4 for MSE-optimal G
    D2r4phi0 <- DrL0(d=d, r=2*r+4)
    Id2r4 <- diag(d^(2*r+4))

    AB2 <- function(vechG)
    {
        rr <- 2*r+4
        G <- invvech(vechG)%*%invvech(vechG)
        G12 <- matrix.sqrt(G)
        Ginv12 <- chol2inv(chol(G12))
        AB <- n^(-1)*det(Ginv12)*(Kpow(A=Ginv12,pow=rr)%*%D2r4phi0)+(1/2)*(t(vec(G))%x%Id2r4) %*% vecPsi2r6
        AB2.val <- sum(AB^2)
        return (AB2.val)
    }

    Gstart <- Gns(r=2*r+4,n=n,Sigma=var(x.star))
    Gstart <- matrix.sqrt(Gstart)

    if (optim.fun=="nlm")
    {
        res <- nlm(p=vech(Gstart), f=AB2, print.level=2*as.logical(verbose))    
        G2r4 <- res$estimate
    }
    else if (optim.fun=="optim")
    {
        res <- optim(vech(Gstart), AB2, method="BFGS", control=list(trace=as.numeric(verbose), REPORT=1))
        G2r4 <- res$par
    }
    G2r4 <- invvech(G2r4)%*%invvech(G2r4)
    G2r4 <- S12 %*% G2r4 %*% S12

    ## stage 2 of plug-in
    vecPsi2r4 <- kfe(x=x, G=G2r4, binned=binned, bgridsize=bgridsize, deriv.order=2*r+4, deriv.vec=TRUE, add.index=FALSE, verbose=verbose)

    return (vecPsi2r4)
}

#############################################################################
## Plug-in bandwidth selectors
#############################################################################

############################################################################
## Computes plug-in full bandwidth matrix - 2 to 6 dim
##
## Parameters
## x - data points
## Hstart - initial value for minimisation
## nstage - number of plug-in stages (1 or 2)
## pilot - "amse" - different AMSE pilot
##       - "samse" - SAMSE pilot
##       - "unconstr" - unconstrained pilot
## pre - "scale" - pre-scaled data
##     - "sphere"- pre-sphered data 
##
## Returns
## Plug-in full bandwidth matrix
#############################################################################
hpi <- function(x, nstage=2, binned=TRUE, bgridsize, deriv.order=0)
{
    ## 1-d selector is taken from KernSmooth's dpik
    d <- 1 
    if (missing(bgridsize)) bgridsize <- default.gridsize(d)
    if (deriv.order==0) h <- dpik(x=x, level=nstage, gridsize=bgridsize)
    else
    {
        n <- length(x)
        d <- 1
        r <- deriv.order  
        K2r4 <- dnorm.deriv(x=0, mu=0, sigma=1, deriv.order=2*r+4)
        K2r6 <- dnorm.deriv(x=0, mu=0, sigma=1, deriv.order=2*r+6) 
        m2 <- 1  
        mr <- psins.1d(r=2*r, sigma=1)

        ## formula for bias annihilating bandwidths from Wand & Jones (1995, p.70)
        if (nstage==2)
        {
            psi2r8.hat <- psins.1d(r=2*r+8, sigma=sd(x))
            gamse2r6 <- (2*K2r6/(-m2*psi2r8.hat*n))^(1/(2*r+9)) 
            psi2r6.hat <- kfe.1d(x=x, g=gamse2r6, deriv.order=2*r+6, inc=1, binned=binned)
            gamse2r4 <- (2*K2r4/(-m2*psi2r6.hat*n))^(1/(2*r+7))
            psi2r4.hat <- kfe.1d(x=x, g=gamse2r4, deriv.order=2*r+4, inc=1, binned=binned)
        }
        else 
        {
            psi2r6.hat <- psins.1d(r=2*r+6, sigma=sd(x))
            gamse2r4 <- (2*K2r4/(-m2*psi2r6.hat*n))^(1/(2*r+7))
            psi2r4.hat <- kfe.1d(x=x, g=gamse2r4, deriv.order=2*r+4, inc=1, binned=binned)
        }

        ## formula form Wand & Jones (1995, p.49)
        h <- ((2*r+1)*mr/(m2^2*psi2r4.hat*n))^(1/(2*r+5))
    }

    return(h)
}

Hpi <- function(x, nstage=2, pilot, pre="sphere", Hstart, binned, bgridsize, amise=FALSE, deriv.order=0, verbose=FALSE, optim.fun="optim")
{
    n <- nrow(x)
    d <- ncol(x)
    r <- deriv.order
    if (missing(binned)) binned <- default.bflag(d=d,n=n)
    if (d > 4) binned <- FALSE
    if (missing(bgridsize)) bgridsize <- default.bgridsize(d)
    if (!is.matrix(x)) x <- as.matrix(x)

    if (missing(pilot)) { if (d==2 & r==0) pilot <- "samse" else pilot <- "dscalar" }
    pilot1 <- match.arg(pilot, c("amse", "samse", "unconstr", "dunconstr", "dscalar"))
    pre1 <- match.arg(pre, c("scale", "sphere"))
    optim.fun <- match.arg(optim.fun, c("nlm", "optim"))

    if (pilot1=="amse" & (d>2 | r>0)) stop("amse pilot selectors not defined for d>2 and/or r>0")
    if ((pilot1=="samse" | pilot1=="unconstr") & r>0) stop("dscalar or dunconstr pilot selectors are better for derivatives r>0")
    if (pilot1=="unconstr" & d>=6) stop("Unconstrained pilots are not implemented for d>6")

    if (pre1=="scale")
    {
        x.star <- pre.scale(x)
        S12 <- diag(sqrt(diag(var(x))))
        Sinv12 <- chol2inv(chol(S12))
    }
    else if (pre1=="sphere")
    {
        x.star <- pre.sphere(x)
        S12 <- matrix.sqrt(var(x))
        Sinv12 <- chol2inv(chol(S12))
    }

    Idr <- diag(d^r)
    RKr <- nu(r=r, diag(d))*2^(-d-r)*pi^(-d/2)

    if (binned)
    {
        H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RKr)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x)
        bin.par <- binning(x=x, bgridsize=bgridsize, H=H.max)
        H.max.star <- (((d+8)^((d+6)/2)*pi^(d/2)*RKr)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x.star)
        bin.par.star <- binning(x=x.star, bgridsize=bgridsize, H=H.max.star)    
    }

    if (pilot1=="unconstr")
    {
        ## psi4.mat is on data scale
        if (nstage==1)
        psi.fun <- (-1)^r*psifun1.unconstr(x=x, binned=binned, bgridsize=bgridsize, deriv.order=r, verbose=verbose)
        else if (nstage==2)
        psi.fun <- psifun2.unconstr(x=x, binned=binned, bgridsize=bgridsize, deriv.order=r, verbose=verbose)
        psi2r4.mat <- (-1)^r*invvec(psi.fun)   

        ## use normal reference bandwidth as initial condition 
        if (missing(Hstart)) Hstart <- Hns(x=x, deriv.order=r)
    }
    else if (pilot1=="dunconstr")
    {
        ## G2r4 is on data scale
        G2r4 <- Gdunconstr(x=x, d=d, r=r, n=n, nstage=nstage, verbose=verbose, binned=binned, optim.fun=optim.fun)
        vecPsi2r4 <- kfe(x=x, G=G2r4, binned=binned, deriv.order=2*r+4, deriv.vec=TRUE, add.index=FALSE, verbose=verbose, bin.par=bin.par)
        if (missing(Hstart)) Hstart <-  Hns(x=x, deriv.order=r)
    }
    else if (pilot1=="dscalar")
    {
        ## g2r4 is on pre-transformed data scale
        g2r4 <- gdscalar(x=x.star, r=r, n=n, d=d, verbose=verbose, nstage=nstage, binned=binned)

        G2r4 <- g2r4^2 * diag(d)
        vecPsi2r4 <- kfe(x=x.star, G=G2r4, binned=binned, deriv.order=2*r+4, deriv.vec=TRUE, add.index=FALSE, verbose=verbose, bin.par=bin.par.star)
        if (missing(Hstart)) Hstart <-  Hns(x=x.star, deriv.order=r)
    }
    else
    {      
        ## psi4.mat is on pre-transformed data scale
        if (nstage==1)
        psi.fun <- psifun1(x.star, pilot=pilot, binned=binned, bin.par=bin.par.star, deriv.order=r, verbose=verbose)$psir
        else if (nstage==2)
        psi.fun <- psifun2(x.star, pilot=pilot, binned=binned, bin.par=bin.par.star, deriv.order=r, verbose=verbose)$psir
        psi2r4.mat <- invvec(psi.fun)

        ## use normal reference bandwidth as initial condition 
        if (missing(Hstart)) Hstart <- Hns(x=x.star, deriv.order=r)
        else Hstart <- Sinv12 %*% Hstart %*% Sinv12
    }

    ## PI is estimate of AMISE
    pi.temp <- function(vechH)
    { 
        H <- invvech(vechH) %*% invvech(vechH)
        Hinv <- chol2inv(chol(H))
        IdrvH <- Idr%x%vec(H)
        int.var <- 1/(det(H)^(1/2)*n)*nur(r=r, A=Hinv, mu=rep(0,d), Sigma=diag(d))*2^(-d-r)*pi^(-d/2)

        if (pilot1=="dunconstr" | pilot1=="dscalar") 
        pi.val <- int.var + (-1)^r*1/4*vecPsi2r4 %*% (vec(diag(d^r) %x% vec(H) %x% vec(H)))
        else
        pi.val <- int.var + (-1)^r*1/4* sum(diag(t(IdrvH) %*% psi2r4.mat %*% IdrvH))
        pi.val <- drop(pi.val)
        return(pi.val)
    }

    Hstart <- matrix.sqrt(Hstart)
    if (optim.fun=="nlm")
    {
        result <- nlm(p=vech(Hstart), f=pi.temp, print.level=2*as.numeric(verbose))    
        H <- invvech(result$estimate) %*% invvech(result$estimate)
        amise.star <- result$minimum
    }
    else if (optim.fun=="optim")
    {
        result <- optim(vech(Hstart), pi.temp, method="BFGS", control=list(trace=as.numeric(verbose), REPORT=1))
        H <- invvech(result$par) %*% invvech(result$par)
        amise.star <- result$value
    }
    if (!(pilot1 %in% c("dunconstr","unconstr")))  H <- S12 %*% H %*% S12   ## back-transform

    if (!amise) return(H)
    else return(list(H = H, PI.star=amise.star))
}     

#############################################################################
## Computes plug-in diagonal bandwidth matrix for 2 to 6-dim
##
## Parameters
## x - data points
## nstage - number of plug-in stages (1 or 2)
## pre - "scale" - pre-scaled data
##     - "sphere"- pre-sphered data 
##
## Returns
## Plug-in diagonal bandwidth matrix
#############################################################################
Hpi.diag <- function(x, nstage=2, pilot, pre="scale", Hstart, binned, bgridsize, amise=FALSE, deriv.order=0, verbose=FALSE, optim.fun="optim")
{
    n <- nrow(x)
    d <- ncol(x)
    r <- deriv.order
    RK <- (4*pi)^(-d/2)
    if (missing(binned)) binned <- default.bflag(d=d,n=n)
    if (d > 4) binned <- FALSE
    if (missing(bgridsize)) bgridsize <- default.bgridsize(d)
    if (!is.matrix(x)) x <- as.matrix(x)

    if (missing(pilot)) { if (d==2 & r==0) pilot <- "samse" else pilot <- "dscalar" }
    pilot1 <- match.arg(pilot, c("amse", "samse", "unconstr", "dunconstr", "dscalar"))
    pre1 <- match.arg(pre, c("scale", "sphere"))
    optim.fun <- match.arg(optim.fun, c("nlm", "optim"))

    if (pre1=="sphere") stop("Using pre-sphering won't give diagonal bandwidth matrix")
    if (pilot1=="amse" & (d>2 | r>0)) stop("samse pilot selectors are better for higher dimensions and/or deriv.order>0")
    if (pilot1=="samse" & r>0) stop("dscalar or dunconstr pilot selectors are better for derivatives r>0")
    if (pilot1=="unconstr" | pilot1=="dunconstr") stop("Unconstrained pilot selectors are not suitable for Hpi.diag")

    if (pre1=="scale")
    {
        x.star <- pre.scale(x)
        S12 <- diag(sqrt(diag(var(x))))
        Sinv12 <- chol2inv(chol(S12))
    }
    else if (pre1=="sphere")
    {
        x.star <- pre.sphere(x)
        S12 <- matrix.sqrt(var(x))
        Sinv12 <- chol2inv(chol(S12))
    }

    if (binned)
    {
        H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x.star)
        bin.par <- binning(x=x.star, bgridsize=bgridsize, H=H.max) 
    }

    Idr <- diag(d^r)  
    if (pilot1=="amse" | pilot1=="samse")
    {
        if (nstage==1)
        psi.fun <- psifun1(x.star, pilot=pilot, binned=binned, bin.par=bin.par, deriv.order=r, verbose=verbose)$psir
        else if (nstage==2)
        psi.fun <- psifun2(x.star, pilot=pilot, binned=binned, bin.par=bin.par, deriv.order=r, verbose=verbose)$psir
        psi2r4.mat <- invvec(psi.fun)
    }
    else if (pilot1=="dscalar")
    {
        g2r4 <- gdscalar(x=x.star, r=r, n=n, d=d, verbose=verbose, nstage=nstage, binned=binned)
        G2r4 <- g2r4^2 * diag(d)
        vecPsi2r4 <- kfe(x=x.star, G=G2r4, binned=binned, deriv.order=2*r+4, deriv.vec=TRUE, add.index=FALSE, verbose=verbose)
    }

    if (d==2 & r==0 & (pilot1=="amse" | pilot1=="samse"))
    {
        ## diagonal bandwidth matrix for 2-dim has exact formula 
        psi40 <- psi.fun[1]
        psi22 <- psi.fun[6]
        psi04 <- psi.fun[16]
        s1 <- sd(x[,1])
        s2 <- sd(x[,2])
        h1 <- (psi04^(3/4)*RK/(psi40^(3/4)*(sqrt(psi40*psi04)+psi22)*n))^(1/6)
        h2 <- (psi40/psi04)^(1/4) * h1
        H <- diag(c(s1^2*h1^2, s2^2*h2^2))
        psimat4.D <- invvech(c(psi40, psi22, psi04))
        amise.star <- drop(n^(-1)*RK*(h1*h2)^(-1) + 1/4*c(h1,h2)^2 %*% psimat4.D %*% c(h1,h2)^2)
    }
    else
    {  
        ## PI is estimate of AMISE
        pi.temp <- function(diagH)
        { 
            H <- diag(diagH) %*% diag(diagH)
            Hinv <- chol2inv(chol(H))
            IdrvH <- Idr%x%vec(H)
            int.var <- 1/(det(H)^(1/2)*n)*nu(r=r, Hinv)*2^(-d-r)*pi^(-d/2)

            if (pilot1=="dscalar")
            pi.val <- int.var + (-1)^r*1/4*vecPsi2r4 %*% (vec(diag(d^r) %x% vec(H) %x% vec(H)))
            else
            pi.val <- int.var + (-1)^r*1/4* sum(diag(t(IdrvH) %*% psi2r4.mat %*% IdrvH))

            return(drop(pi.val))
        }

        ## use normal reference bandwidth as initial condition
        if (missing(Hstart)) Hstart <- Hns(x=x.star, deriv.order=r)
        else Hstart <- Sinv12 %*% Hstart %*% Sinv12
        Hstart <- matrix.sqrt(Hstart)
        
        if (optim.fun=="nlm")
        {
            result <- nlm(p=diag(Hstart), f=pi.temp, print.level=2*as.numeric(verbose))    
            H <- diag(result$estimate^2)
            amise.star <- result$minimum
        }
        else if (optim.fun=="optim")
        {  
            result <- optim(diag(Hstart), pi.temp, method="BFGS", control=list(trace=as.numeric(verbose), REPORT=1))
            H <- diag(result$par) %*% diag(result$par)
            amise.star <- result$value
        }
        H <- S12 %*% H %*% S12  ## back-transform
    }

    if (!amise) return(H)
    else return(list(H = H, PI.star=amise.star))
}

#############################################################################
## Cross-validation bandwidth selectors
#############################################################################

#############################################################################
## Computes the least squares cross validation LSCV function for 2 to 6 dim
## 
## Parameters
## x - data values
## H - bandwidth matrix
##
## Returns
## LSCV(H)
#############################################################################
lscv.1d <- function(x, h, binned, bin.par, deriv.order=0)
{
    r <- deriv.order
    lscv1 <- kfe.1d(x=x, g=sqrt(2)*h, inc=1, binned=binned, bin.par=bin.par, deriv.order=2*r) 
    lscv2 <- kfe.1d(x=x, g=h, inc=0, binned=binned, bin.par=bin.par, deriv.order=2*r) 
    return((-1)^r*(lscv1 - 2*lscv2))     
}

lscv.mat <- function(x, H, binned=FALSE, bin.par, bgridsize, deriv.order=0)
{
    r <- deriv.order
    d <- ncol(x)
    n <- nrow(x)

    if (!binned)
    {
        lscv1 <- Qr(x=x, deriv.order=2*r, Sigma=2*H, inc=1)
        lscv2 <- Qr(x=x, deriv.order=2*r, Sigma=H, inc=0)
        lscv <- drop(lscv1 - 2*lscv2)
    }
    else
    {
        lscv1 <- kfe(x=x, G=2*H, inc=1, binned=binned, bin.par=bin.par, bgridsize=bgridsize, deriv.order=2*r, add.index=FALSE)
        lscv2 <- kfe(x=x, G=H, inc=0, binned=binned, bin.par=bin.par, bgridsize=bgridsize, deriv.order=2*r, add.index=FALSE)
        lscv <- (-1)^2*drop((lscv1 - 2*lscv2) %*% vec(diag(d^r)))
    }

    return(lscv)  
}

#############################################################################
## Finds the bandwidth matrix that minimises LSCV for 2 to 6 dim
## 
## Parameters
## x - data values
## Hstart - initial bandwidth matrix
##
## Returns
## H_LSCV
#############################################################################
hlscv <- function(x, binned=TRUE, bgridsize, amise=FALSE, deriv.order=0, bw.ucv=TRUE)
{
    ## adapted from versions supplied by J.E. Chacon 19/02/2021
    if (any(duplicated(x)))
    warning("Data contain duplicated values: LSCV is not well-behaved in this case")
    n <- length(x)
    d <- 1
    r <- deriv.order
    hnorm <- sqrt((4/(n*(d + 2)))^(2/(d + 4)) * var(x))
    if (missing(binned)) binned <- default.bflag(d=d,n=n)
    if (missing(bgridsize)) if (bw.ucv) bgridsize <- 10000 else bgridsize <- 10001 

    ## use stats::bw.ucv function
    if (bw.ucv)
    {
        difs <- dist(x)
        lower <- min(difs[difs>0])
        opt <- list()
        opt$minimum <- stats::bw.ucv(x=x, nb=bgridsize, lower=lower)
        opt$objective <- NA
    }
    else 
    {
        difs <- x%*%t(rep(1,n))-rep(1,n)%*%t(x)
        difs <- difs[lower.tri(difs)]  

        if (binned)
        {
            bin.par <- binning(x, bgridsize=bgridsize, h=hnorm)
            lscv.1d.temp <- function(h) { return(lscv.1d(x=x, h=h, binned=binned, bin.par=bin.par, deriv.order=r)) }
        }
        else
        {
            if (r>0) stop("Unbinned hlscv not yet implemented for deriv.order>0") 
            edifs <- exp(-difs^2/2)
            RK <- 1/(2*sqrt(pi))

            lscv.1d.temp <- function(h)
            {
                lscv1 <- (1-1/n)*sum(edifs^(1/(2*h^2)))/(h*sqrt(2)*sqrt(2*pi))
                lscv2 <- 2*sum(edifs^(1/h^2))/(h*sqrt(2*pi))
                return(RK/(n*h)+2*(lscv1-lscv2)/(n^2-n))
            }   
        }
        lower <- min(difs[difs>0]) 
        opt <- optimise(lscv.1d.temp, interval=c(lower, 2*hnorm), tol=.Machine$double.eps)
    }

    if (!amise) return(opt$minimum)
    else return(list(h=opt$minimum, LSCV=opt$objective))
}

Hlscv <- function(x, Hstart, binned=FALSE, bgridsize, amise=FALSE, deriv.order=0, verbose=FALSE, optim.fun="optim", trunc)
{
    if (any(duplicated(x))) warning("Data contain duplicated values: LSCV is not well-behaved in this case")
    if (!is.matrix(x)) x <- as.matrix(x)
    n <- nrow(x)
    d <- ncol(x)
    r <- deriv.order 
    if (missing(binned)) binned <- default.bflag(d=d,n=n)
    if (d>4) binned <- FALSE
    if (missing(bgridsize)) bgridsize <- default.bgridsize(d)

    optim.fun <- match.arg(optim.fun, c("nlm", "optim"))

    ## use normal reference selector as initial condn
    Hnorm <- Hns(x=x, deriv.order=r)
    if (missing(Hstart)) Hstart <- Hnorm
    Hstart <- matrix.sqrt(Hstart)
    if (binned) bin.par <- binning(x=x, H=Hnorm, bgridsize=bgridsize)
    if (missing(trunc)) { if (deriv.order==0) trunc <- 1e10 else trunc <- 4 }

    lscv.init <- lscv.mat(x=x, H=Hnorm, binned=binned, bin.par=bin.par, deriv.order=r)
    lscv.mat.temp <- function(vechH)
    {
        H <- invvech(vechH) %*% invvech(vechH)
        lscv <- lscv.mat(x=x, H=H, binned=binned, bin.par=bin.par, deriv.order=r)
        if (det(H) < 1/trunc*det(Hnorm) | det(H) > trunc*det(Hnorm) | abs(lscv) > trunc*abs(lscv.init)) lscv <- lscv.init 
        return(lscv)  
    }

    if (optim.fun=="nlm")
    {
        result <- nlm(p=vech(Hstart), f=lscv.mat.temp, print.level=2*as.numeric(verbose))   
        H <- invvech(result$estimate) %*% invvech(result$estimate)
        amise.opt <- result$minimum
    }
    else if (optim.fun=="optim")
    {
        result <- optim(vech(Hstart), lscv.mat.temp, method="Nelder-Mead", control=list(trace=as.numeric(verbose), REPORT=1))     
        H <- invvech(result$par) %*% invvech(result$par)
        amise.opt <- result$value
    }

    if (!amise) return(H)
    else return(list(H=H, LSCV=amise.opt))
}

#############################################################################
## Finds the diagonal bandwidth matrix that minimises LSCV for 2 to 6 dim
## 
## Parameters
## x - data values
## Hstart - initial bandwidth matrix
##
## Returns
## H_LSCV,diag
#############################################################################
Hlscv.diag <- function(x, Hstart, binned=FALSE, bgridsize, amise=FALSE, deriv.order=0, verbose=FALSE, optim.fun="optim", trunc)
{
    if (any(duplicated(x))) warning("Data contain duplicated values: LSCV is not well-behaved in this case")
    if (!is.matrix(x)) x <- as.matrix(x)
    n <- nrow(x)
    d <- ncol(x)
    r <- deriv.order
    if (missing(binned)) binned <- default.bflag(d=d,n=n)
    if (d>4) binned <- FALSE
    if (missing(bgridsize)) bgridsize <- default.bgridsize(d)
    optim.fun <- match.arg(optim.fun, c("nlm", "optim"))

    Hnorm <- Hns(x=x, deriv.order=r)
    if (missing(Hstart)) Hstart <- Hnorm

    ## don't truncate optimisation for deriv.order==0
    if (missing(trunc)) { if (deriv.order==0) trunc <- 1e10 else trunc <- 4 }

    ## linear binning
    if (binned)
    {
        bin.par <- binning(x=x, bgridsize=bgridsize, H=Hnorm)
    }

    lscv.init <- lscv.mat(x=x, H=Hnorm, binned=binned, bin.par=bin.par, deriv.order=r)

    lscv.mat.temp <- function(diagH)
    {
        H <- diag(diagH^2)
        lscv <- lscv.mat(x=x, H=H, binned=binned, bin.par=bin.par, deriv.order=r)
        if (det(H) < 1/trunc*det(Hnorm) | det(H) > trunc*det(Hnorm) | abs(lscv) > trunc*abs(lscv.init)) lscv <- lscv.init 
        return(lscv)  
    }

    Hstart <- matrix.sqrt(Hstart)
    if (optim.fun=="nlm")
    {
        result <- nlm(p=diag(Hstart), f=lscv.mat.temp, print.level=2*as.numeric(verbose))
        H <- diag(result$estimate^2)
        amise.opt <- result$minimum
    }
    else if (optim.fun=="optim")
    {
        result <- optim(diag(Hstart), lscv.mat.temp, method="Nelder-Mead", control=list(trace=as.numeric(verbose), REPORT=1))
        H <- diag(result$par^2)
        amise.opt <- result$value
    }

    if (!amise) return(H)
    else return(list(H=H, LSCV=amise.opt))
}

## aliases using "UCV" instead of "LSCV"

hucv <- function(...) { hlscv(...) }
Hucv <- function(...) { Hlscv(...) }
Hucv.diag <- function(...) { Hlscv.diag(...) }

#############################################################################
## Computes the biased cross validation BCV function for 2-dim
## 
## Parameters
## x - data values
## H1, H2 - bandwidth matrices
##
## Returns
## BCV(H)
#############################################################################
bcv.mat <- function(x, H1, H2, binned=FALSE)
{
    n <- nrow(x)
    d <- 2

    psi <- kfe(x, G=H2, deriv.order=4, add.index=TRUE, deriv.vec=TRUE, inc=0, binned=binned)
    psi40 <- psi$psir[1]
    psi31 <- psi$psir[2]
    psi22 <- psi$psir[4]
    psi13 <- psi$psir[8]
    psi04 <- psi$psir[16]

    coeff <- c(1, 2, 1, 2, 4, 2, 1, 2, 1)
    psi.fun <- c(psi40, psi31, psi22, psi31, psi22, psi13, psi22, psi13,psi04)/(n*(n-1))
    psi4.mat <- matrix(coeff * psi.fun, ncol=3, nrow=3)

    RK <- (4*pi)^(-d/2) 
    bcv <- drop(n^(-1)*det(H1)^(-1/2)*RK + 1/4*t(vech(H1)) %*% psi4.mat %*% vech(H1))

    return(list(bcv=bcv, psimat=psi4.mat))
}

#############################################################################
## Find the bandwidth matrix that minimises the BCV for 2-dim
## 
## Parameters
## x - data values
## whichbcv - 1 = BCV1
##          - 2 = BCV2 
## Hstart - initial bandwidth matrix
##
## Returns
## H_BCV
#############################################################################
Hbcv <- function(x, whichbcv=1, Hstart, binned=FALSE, amise=FALSE, verbose=FALSE)
{
    n <- nrow(x)
    d <- ncol(x)
    RK <- (4*pi)^(-d/2)
    if(!is.matrix(x)) x <- as.matrix(x)

    ## use normal reference b/w matrix for bounds
    k <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*n*gamma((d+8)/2)*(d+2)))^(2/(d+4))
    Hmax <- k * abs(var(x))
    up.bound <- Hmax
    if (missing(Hstart)) Hstart <- matrix.sqrt(0.9*Hmax)

    bcv.mat.temp <- function(vechH)
    {
        H <- invvech(vechH) %*% invvech(vechH)
        bcv <- bcv.mat(x=x, H1=H, H2=whichbcv*H, binned=binned)$bcv
        return(bcv)  
    }

    result <- optim(vech(Hstart), bcv.mat.temp, method="L-BFGS-B", upper=vech(matrix.sqrt(up.bound)), lower=-vech(matrix.sqrt(up.bound)), control=list(trace=as.numeric(verbose), REPORT=1))

    H <- invvech(result$par) %*% invvech(result$par)
    amise.opt <- result$value

    if (!amise) return(H)
    else return(list(H = H, BCV=amise.opt))
}

#############################################################################
## Find the diagonal bandwidth matrix that minimises the BCV for 2-dim
## 
## Parameters
## x - data values
## whichbcv - 1 = BCV1
##          - 2 = BCV2
## Hstart - initial bandwidth matrix
##
## Returns
## H_BCV, diag
#############################################################################
Hbcv.diag <- function(x, whichbcv=1, Hstart, binned=FALSE, amise=FALSE, verbose=FALSE)
{
    n <- nrow(x)
    d <- ncol(x)
    RK <- (4*pi)^(-d/2)
    if(!is.matrix(x)) x <- as.matrix(x)

    ## use maximally smoothed b/w matrix for bounds
    k <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*n*gamma((d+8)/2)*(d+2)))^(2/(d+4))
    Hmax <- k * abs(var(x))
    up.bound <- diag(Hmax)

    if (missing(Hstart)) Hstart <- 0.9*matrix.sqrt(Hmax)

    bcv.mat.temp <- function(diagH)
    {
        H <- diag(diagH) %*% diag(diagH) 
        return(bcv.mat(x, H, whichbcv*H, binned=binned)$bcv)
    } 

    result <- optim(diag(Hstart), bcv.mat.temp, method="L-BFGS-B", upper=sqrt(up.bound), control=list(trace=as.numeric(verbose), REPORT=1))
    H <- diag(result$par) %*% diag(result$par)
    amise.opt <- result$value
    
    if (!amise) return(H)
    else return(list(H = H, BCV=amise.opt))
}

#############################################################################
## Estimate scalar g_AMSE pilot bandwidth for SCV for 2 to 6 dim
##
## Parameters
## Sigma.star - scaled/ sphered variance matrix
## Hamise - (estimate) of H_AMISE 
## n - sample size
##
## Returns
## g_AMSE pilot bandwidth
#############################################################################
Theta6.elem <- function(d)
{
    Theta6.mat <- list()
    Theta6.mat[[d]] <- list()
    for (i in 1:d)
    Theta6.mat[[i]] <- list()

    for (i in 1:d)
    for (j in 1:d)
    {  
        temp <- numeric()
        for (k in 1:d)     
        for (ell in 1:d)    
        temp <- rbind(temp, elem(i,d)+2*elem(k,d)+2*elem(ell,d)+elem(j,d))

        Theta6.mat[[i]][[j]] <- temp
    }

    return(Theta6.mat)
}

gamse.scv <- function(x.star, d, Sigma.star, Hamise, n, binned=FALSE, bin.par, bgridsize, verbose=FALSE, nstage=1, Theta6=FALSE)
{
    if (nstage==0)
    {
        psihat6.star <- psins(r=6, Sigma=Sigma.star, deriv.vec=TRUE) 
    }
    else if (nstage==1)
    {  
        g6.star <- gsamse(Sigma.star, n, 6) 
        G6.star <- g6.star^2 * diag(d)
        if (Theta6) psihat6.star <- kfe(x=x.star, bin.par=bin.par, deriv.order=6, G=G6.star, deriv.vec=FALSE, add.index=FALSE, binned=binned, bgridsize=bgridsize, verbose=verbose)
        else psihat6.star <- kfe(x=x.star, bin.par=bin.par, deriv.order=6, G=G6.star, deriv.vec=TRUE, add.index=FALSE, binned=binned, bgridsize=bgridsize, verbose=verbose)
    }

    if (Theta6)
    {
        derivt6 <- dmvnorm.deriv(x=rep(0,d), deriv.order=6, add.index=TRUE, deriv.vec=FALSE, only.index=TRUE)
        Theta6.mat <- matrix(0, ncol=d, nrow=d)
        Theta6.mat.ind <- Theta6.elem(d)
        for (i in 1:d)
        for (j in 1:d)
        {
            temp <- Theta6.mat.ind[[i]][[j]]
            temp.sum <- 0
            for (k in 1:nrow(temp))
            temp.sum <- temp.sum + psihat6.star[which.mat(temp[k,], derivt6)]
            Theta6.mat[i,j] <- temp.sum 
        }

        eye3 <- diag(d)
        D4 <- dupl(d)$d
        trHamise <- tr(Hamise) 

        ## required constants - see thesis
        Cmu1 <- 1/2*t(D4) %*% vec(Theta6.mat %*% Hamise)
        Cmu2 <- 1/8*(4*pi)^(-d/2) * (2*t(D4)%*% vec(Hamise) + trHamise * t(D4) %*% vec(eye3))
        num <- 2 * (d+4) * sum(Cmu2*Cmu2)
        den <- -(d+2) * sum(Cmu1*Cmu2) + sqrt((d+2)^2 * sum(Cmu1*Cmu2)^2 + 8*(d+4)*sum(Cmu1*Cmu1) * sum(Cmu2*Cmu2))
        gamse <- (num/(den*n))^(1/(d+6)) 
    }
    else
    {  
        ## updated constants using Chacon & Duong (2010) notation
        Cmu1Cmu1 <- drop(1/4*psihat6.star %*% (Hamise %x% diag(d^4) %x% Hamise) %*% psihat6.star)
        Cmu1Cmu2 <- 3/4*(4*pi)^(-d/2)*drop(vec(Hamise %x% diag(d) %x% Hamise) %*% psihat6.star)
        Cmu2Cmu2 <- 1/64*(4*pi)^(-d)*(4*tr(Hamise%*%Hamise) + (d+8)*tr(Hamise)^2)
        num <- 2 * (d+4) * Cmu2Cmu2
        den <- -(d+2) * Cmu1Cmu2 + sqrt((d+2)^2 * Cmu1Cmu2^2 + 8*(d+4)*Cmu1Cmu1 * Cmu2Cmu2)
        gamse <- (num/(den*n))^(1/(d+6)) 
    }
    
    return(gamse)
}

#############################################################################
## Estimate unconstrained G_AMSE pilot bandwidth for SCV for 2 to 6 dim
## Code by J.E. Chacon
##
## Returns
## G_AMSE pilot bandwidth
#############################################################################
Gunconstr.scv <- function(x, binned=FALSE, bin.par, bgridsize, verbose=FALSE, nstage=1, optim.fun="optim")
{
    d <- ncol(x)
    n <- nrow(x)
    S <- var(x)
    S12 <- matrix.sqrt(S)
    optim.fun <- match.arg(optim.fun, c("nlm", "optim"))

    ## stage 1 of plug-in
    x.star <- pre.sphere(x)
    if (nstage==1)
    {  
        G6 <- Gns(r=6,n=n,Sigma=var(x.star), scv=TRUE) 
        psihat6 <- kfe(x=x.star, deriv.order=6, G=G6, deriv.vec=TRUE, add.index=FALSE, binned=binned, bgridsize=bgridsize, verbose=verbose)
    }
    else if (nstage==0)
    {
        psihat6 <- psins(r=6, Sigma=var(x.star), deriv.vec=TRUE) 
    }

    ## constants for normal reference
    D4phi0 <- DrL0(d=d,r=4)
    Id4 <- diag(d^4)

    ## asymptotic squared bias for r = 4
    AB2 <- function(vechG)
    {
        G <- invvech(vechG)%*%invvech(vechG)
        G12 <- matrix.sqrt(G)
        Ginv12 <- chol2inv(chol(G12))
        AB <- n^(-1)*det(Ginv12)*(Kpow(A=Ginv12,pow=4)%*%D4phi0)*2^(-(d+4)/2) + (t(vec(G))%x%Id4)%*%psihat6
        AB2.val <- sum(AB^2)
        return (AB2.val)
    }

    Gstart <- Gns(r=4,n=n,Sigma=S,scv=TRUE)
    Gstart <- matrix.sqrt(Gstart)
    if (optim.fun=="nlm")
    {
        result <- nlm(p=vech(Gstart), f=AB2, print.level=2*as.logical(verbose))    
        G4 <- result$estimate
    }
    else if (optim.fun=="optim")    
    { 
        result <- optim(vech(Gstart), AB2, method="BFGS", control=list(trace=as.numeric(verbose), REPORT=1))
        G4 <- result$par
    }   
    G4 <- invvech(G4)%*%invvech(G4)
    G4 <- S12 %*% G4 %*% S12

    return(G4) 
}

#############################################################################
## Computes the smoothed cross validation function for 2 to 6 dim
## 
## Parameters
## x - data values
## H - bandwidth matrix
## G - pilot bandwidth matrix
##
## Returns
## SCV(H)
#############################################################################
scv.1d <- function(x, h, g, binned=TRUE, bin.par, inc=1, deriv.order=0)
{
    r <- deriv.order
    if (!missing(x)) n <- length(x)
    if (!missing(bin.par)) n <- sum(bin.par$counts)
    scv1 <- kfe.1d(x=x, deriv.order=2*r, bin.par=bin.par, g=sqrt(2*h^2+2*g^2), binned=binned, inc=inc)
    scv2 <- kfe.1d(x=x, deriv.order=2*r, bin.par=bin.par, g=sqrt(h^2+2*g^2), binned=binned, inc=inc)
    scv3 <- kfe.1d(x=x, deriv.order=2*r, bin.par=bin.par, g=sqrt(2*g^2), binned=binned, inc=inc)

    bias2 <- (-1)^r*(scv1 - 2*scv2 + scv3)
    if (bias2 < 0) bias2 <- 0
    scv <- (n*h)^(-1)*(4*pi)^(-1/2)*2^(-r)*OF(2*r) + bias2

    return(scv)
}

scv.mat <- function(x, H, G, binned=FALSE, bin.par, bgridsize, verbose=FALSE, deriv.order=0)
{
    d <- ncol(x)
    n <- nrow(x)
    r <- deriv.order
    vId <- vec(diag(d))
    Hinv <- chol2inv(chol(H))

    if (!binned)
    {
        scv1 <- Qr(x=x, deriv.order=2*r, Sigma=2*H+2*G, inc=1)
        scv2 <- Qr(x=x, deriv.order=2*r, Sigma=H+2*G, inc=1)
        scv3 <- Qr(x=x, deriv.order=2*r, Sigma=2*G, inc=1)
        bias2 <- (-1)^r*(scv1 - 2*scv2 + scv3)
        if (bias2 < 0) bias2 <- 0
    }
    else
    {
        scv1 <- kfe(x=x, G=2*H + 2*G, deriv.order=2*r, inc=1, binned=binned, bin.par=bin.par, bgridsize=bgridsize, verbose=verbose, add.index=FALSE)
        scv2 <- kfe(x=x, G=H + 2*G, deriv.order=2*r, inc=1, binned=binned, bin.par=bin.par, bgridsize=bgridsize, verbose=verbose, add.index=FALSE)
        scv3 <- kfe(x=x, G=2*G, deriv.order=2*r, inc=1, binned=binned, bin.par=bin.par, bgridsize=bgridsize, verbose=verbose, add.index=FALSE)

        bias2 <- drop((-1)^r*Kpow(vId,r) %*% (scv1 - 2*scv2 + scv3))
        if (bias2 < 0) bias2 <- 0
    }
    scvmat <- 1/(det(H)^(1/2)*n)*nur(r=r, A=Hinv, mu=rep(0,d), Sigma=diag(d), type="direct")*2^(-d-r)*pi^(-d/2) + bias2
    
    return (scvmat)
}

#############################################################################
## Find the bandwidth that minimises the SCV for 1 to 6 dim
## 
## Parameters
## x - data values
## pre - "scale" - pre-scaled data
##     - "sphere"- pre-sphered data
## Hstart - initial bandwidth matrix
##
## Returns
## H_SCV
#############################################################################
hscv <- function(x, nstage=2, binned=TRUE, bgridsize, plot=FALSE)
{
    sigma <- sd(x)
    n <- length(x)
    d <- 1
    hnorm <- sqrt((4/(n*(d + 2)))^(2/(d + 4)) * var(x))
    if (missing(binned)) binned <- default.bflag(d=d,n=n)
    if (missing(bgridsize)) bgridsize <- default.bgridsize(d)
    hmin <- 0.1*hnorm
    hmax <- 2*hnorm

    bin.par <- binning(x=x, bgridsize=bgridsize, h=hnorm)
    if (nstage==1)
    {
        psihat6 <- psins.1d(r=6, sigma=sigma)
        psihat10 <- psins.1d(r=10, sigma=sigma)
    }
    else if (nstage==2)
    {
        g1 <- (2/(7*n))^(1/9)*2^(1/2)*sigma
        g2 <- (2/(11*n))^(1/13)*2^(1/2)*sigma

        psihat6 <- kfe.1d(x=x, bin.par=bin.par, binned=binned, deriv.order=6, g=g1, inc=1)
        psihat10 <- kfe.1d(x=x, bin.par=bin.par, binned=binned, deriv.order=10, g=g2, inc=1)
    }

    g3 <- (-6/((2*pi)^(1/2)*psihat6*n))^(1/7) 
    g4 <- (-210/((2*pi)^(1/2)*psihat10*n))^(1/11)
    psihat4 <- kfe.1d(x=x, bin.par=bin.par, binned=binned, deriv.order=4, g=g3, inc=1)
    psihat8 <- kfe.1d(x=x, bin.par=bin.par, binned=binned, deriv.order=8, g=g4, inc=1)

    C <- (441/(64*pi))^(1/18) * (4*pi)^(-1/5) * psihat4^(-2/5) * psihat8^(-1/9)

    scv.1d.temp <- function(h)
    {
        return(scv.1d(x=x, bin.par=bin.par, h=h, g=C*n^(-23/45)*h^(-2), binned=binned, inc=1))
    }

    if (plot)
    {  
        hseq <- seq(hmin,hmax, length=400)
        hscv.seq <- rep(0, length=length(hseq))
        for (i in 1:length(hseq))
        hscv.seq[i] <- scv.1d.temp(hseq[i])
        plot(hseq, hscv.seq, type="l", xlab="h", ylab="SCV(h)")
    }

    opt <- optimise(f=scv.1d.temp, interval=c(hmin, hmax))$minimum
    if (n >= 1e5) warning("hscv is not always stable for large samples")

    return(opt)
}

Hscv <- function(x, nstage=2, pre="sphere", pilot, Hstart, binned, bgridsize, amise=FALSE, deriv.order=0, verbose=FALSE, optim.fun="optim")
{
    n <- nrow(x)
    d <- ncol(x)
    r <- deriv.order
    if (missing(binned)) binned <- default.bflag(d=d,n=n)
    if (d > 4) binned <- FALSE
    if (missing(bgridsize)) bgridsize <- default.bgridsize(d)
 
    if(!is.matrix(x)) x <- as.matrix(x)
    if (missing(pilot)) { if (d==2 & r==0) pilot <- "samse" else pilot <- "dscalar" }
    pilot1 <- match.arg(pilot, c("amse", "samse", "unconstr", "dunconstr", "dscalar"))
    pre1 <- match.arg(pre, c("scale", "sphere"))
    optim.fun <- match.arg(optim.fun, c("nlm", "optim"))

    if (pilot1=="amse" & (d>2 | r>0)) stop("amse pilot selectors not defined for d>2 and/or r>0")
    if ((pilot1=="samse" | pilot1=="unconstr") & r>0) stop("dscalar or dunconstr pilot selectors are better for deriv.order>0")

    if (pre1=="scale")
    {
        x.star <- pre.scale(x)
        S12 <- diag(sqrt(diag(var(x))))
        Sinv12 <- chol2inv(chol(S12))
    }
    else if (pre1=="sphere")
    {
        x.star <- pre.sphere(x)
        S12 <- matrix.sqrt(var(x))
        Sinv12 <- chol2inv(chol(S12))
    }

    RK <- (4*pi)^(-d/2)
    if (binned)
    {
        if (pilot1=="unconstr" | pilot1=="dunconstr")
        H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x)
        else
        H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x.star)
        if (default.bflag(d=d, n=n))
        bin.par <- binning(x=x.star, bgridsize=bgridsize, H=matrix.sqrt(H.max))
    }
    if (pilot1=="unconstr")
    {
        ## Gu pilot matrix is on data scale
        Gu <- Gunconstr.scv(x=x, binned=binned, bgridsize=bgridsize, verbose=verbose, nstage=nstage-1, optim.fun=optim.fun)
        if (missing(Hstart)) Hstart <- Hns(x=x, deriv.order=r)
    }
    else if (pilot1=="dunconstr")
    {
        ## Gu pilot matrix is on data scale
        Gu <- Gdunconstr(x=x, d=d, r=r, n=n, nstage=nstage, verbose=verbose, binned=binned, scv=TRUE, optim.fun=optim.fun)
        if (missing(Hstart)) Hstart <- Hns(x=x, deriv.order=r)
    }
    else if (pilot1=="dscalar")
    {
        ## Gs is on pre-transformed data scale
        g2r4 <- gdscalar(x=x.star, d=d, r=r, n=n, nstage=nstage, verbose=verbose, scv=TRUE, binned=binned)
        Gs <- g2r4^2*diag(d)
        if (missing(Hstart)) Hstart <- Hns(x=x.star, deriv.order=r)
    }
    else
    {
        ## Gs is on transformed data scale    
        Hamise <- Hpi(x=x.star, nstage=1, deriv.order=r, pilot=pilot, pre="sphere", binned=TRUE, bgridsize=bgridsize, verbose=verbose, optim.fun=optim.fun) 
        if (any(is.na(Hamise)))
        {
            warning("Pilot bandwidth matrix is NA - replaced with maximally smoothed")
            Hamise <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x.star)
        }
        gs <- gamse.scv(x.star=x.star, d=d, Sigma.star=var(x.star), Hamise=Hamise, n=n, binned=binned, bgridsize=bgridsize, verbose=verbose, nstage=nstage-1)
        Gs <- gs^2*diag(d)

        ## use normal reference bandwidth as initial condition 
        if (missing(Hstart)) Hstart <- Hns(x=x.star, deriv.order=r) 
        else Hstart <- Sinv12 %*% Hstart %*% Sinv12
    }

    ## SCV is estimate of AMISE
    scv.mat.temp <- function(vechH)
    {
        H <- invvech(vechH) %*% invvech(vechH)
        if (pilot1=="samse" | pilot1=="amse" | pilot1=="dscalar") { Gpilot <- Gs; xx <- x.star }
        else if (pilot1=="unconstr" | pilot1=="dunconstr") { Gpilot <- Gu; xx <- x }

        if (default.bflag(d=d, n=n))
        scvm <- scv.mat(x=xx, H=H, G=Gpilot, binned=binned, bin.par=bin.par, verbose=FALSE, deriv.order=r)
        else
        scvm <- scv.mat(x=xx, H=H, G=Gpilot, binned=binned, verbose=FALSE, deriv.order=r)
        return(scvm)
    }

    Hstart <- matrix.sqrt(Hstart)
    if (optim.fun=="nlm")
    {
        result <- nlm(p=vech(Hstart), f=scv.mat.temp, print.level=2*as.numeric(verbose))    
        H <- invvech(result$estimate) %*% invvech(result$estimate)
        amise.star <- result$minimum
    }
    else if (optim.fun=="optim")
    {
        result <- optim(vech(Hstart), scv.mat.temp, method="BFGS", control=list(trace=as.numeric(verbose), REPORT=1))
        H <- invvech(result$par) %*% invvech(result$par)
        amise.star <- result$value
    }
    if (!(pilot1 %in% c("dunconstr","unconstr")))  H <- S12 %*% H %*% S12   ## back-transform

    if (!amise) return(H)
    else return(list(H = H, SCV.star=amise.star))
}

Hscv.diag <- function(x, nstage=2, pre="scale", pilot, Hstart, binned, bgridsize, amise=FALSE, deriv.order=0, verbose=FALSE, optim.fun="optim")
{
    n <- nrow(x)
    d <- ncol(x)
    r <- deriv.order
    RK <- (4*pi)^(-d/2)
    if (missing(binned)) binned <- default.bflag(d=d,n=n)
    if (d > 4) binned <- FALSE
    if (missing(bgridsize)) bgridsize <- default.bgridsize(d)
    if(!is.matrix(x)) x <- as.matrix(x)

    if (missing(pilot)) { if (d==2 & r==0) pilot <- "samse" else pilot <- "dscalar" }
    pilot1 <- match.arg(pilot, c("amse", "samse", "unconstr", "dunconstr", "dscalar"))
    pre1 <- match.arg(pre, c("scale", "sphere"))
    optim.fun <- match.arg(optim.fun, c("nlm", "optim"))

    if (pilot1=="amse" & (d>2 | r>0)) stop("samse pilot selectors are better for higher dimensions and/or deriv.order>0")
    if (pilot1=="samse" & r>0) stop("dscalar pilot selectors are better for deriv.order>0")
    if (pilot1=="unconstr" | pilot1=="dunconstr") stop("Unconstrained pilot selectors are not suitable for Hscv.diag")

    if (pre1=="sphere") stop("Using pre-sphering doesn't give a diagonal bandwidth matrix")

    if (pre1=="scale")
    {
        x.star <- pre.scale(x)
        S12 <- diag(sqrt(diag(var(x))))
        Sinv12 <- chol2inv(chol(S12))
    }
    else if (pre1=="sphere")
    {
        x.star <- pre.sphere(x)
        S12 <- matrix.sqrt(var(x))
        Sinv12 <- chol2inv(chol(S12))
    }

    if (binned)
    {
        H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x.star)
        bin.par.star <- binning(x=x.star, bgridsize=bgridsize, H=H.max) 
    }

    if (pilot1=="dscalar")
    {
        ## Gs is on pre-transformed data scale
        g2r4 <- gdscalar(x=x.star, r=r, n=n, d=d, verbose=verbose, nstage=nstage, scv=TRUE, binned=binned)
        Gs <- g2r4^2*diag(d)
        if (missing(Hstart)) Hstart <- Hns(x=x.star, deriv.order=r)
    }
    else
    {
          ## Gs is on transformed data scale
          Hamise <- Hpi(x=x.star, nstage=1, pilot=pilot, pre="sphere", binned=binned, bgridsize=bgridsize, verbose=verbose, optim.fun=optim.fun) 
          if (any(is.na(Hamise)))
          {
            warning("Pilot bandwidth matrix is NA - replaced with maximally smoothed")
            Hamise <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x.star)
          }
          gs <- gamse.scv(x.star=x.star, d=d, Sigma.star=var(x.star), Hamise=Hamise, n=n, binned=binned, bgridsize=bgridsize, verbose=verbose, nstage=nstage-1)

          Gs <- gs^2*diag(d)

        ## use normal reference bandwidth as initial condition 
        if (missing(Hstart)) Hstart <- Hns(x=x.star, deriv.order=r)
        else Hstart <- Sinv12 %*% Hstart %*% Sinv12
    }

    scv.mat.temp <- function(diagH)
    {
        H <- diag(diagH) %*% diag(diagH)
        if (default.bflag(d=d, n=n)) scvm <- scv.mat(x.star, H, Gs, binned=binned, verbose=FALSE, bin.par=bin.par.star, deriv.order=r)
        else scvm <- scv.mat(x.star, H, Gs, binned=binned, verbose=FALSE, deriv.order=r)
        return(scvm)
    }

    Hstart <- matrix.sqrt(Hstart)
    if (optim.fun=="nlm")
    {
        result <- nlm(p=diag(Hstart), f=scv.mat.temp, print.level=2*as.numeric(verbose))    
        H <- diag(result$estimate) %*% diag(result$estimate)
        amise.star <- result$minimum
    }
    else if (optim.fun=="optim")
    {  
        result <- optim(diag(Hstart), scv.mat.temp, method="Nelder-Mead", control=list(trace=as.numeric(verbose), REPORT=1))
        H <- diag(result$par) %*% diag(result$par)
        amise.star <- result$value
    }
    ## back-transform
    H <- S12 %*% H %*% S12

    if (!amise) return(H)
    else return(list(H = H, SCV.star=amise.star))
}

############################################################################
## Normal scale selector H_ns for kernel density derivate estimators
############################################################################

Hns <- function(x, deriv.order=0)
{
    if (is.vector(x)) { n <- 1; d <- length(x) } 
    else { n <- nrow(x); d <- ncol(x) }
    r <- deriv.order
    H <- (4/(n*(d+2*r+2)))^(2/(d+2*r+4)) * var(x)
    return(H)
}

Hns.diag <- function(x)
{
    if (is.vector(x)) { n <- 1; d <- length(x) } 
    else { n <- nrow(x); d <- ncol(x) }
    S <- var(x)
    Delta <- chol2inv(chol(diag(diag(S))))%*%S
    svD <- svd(Delta)
    Deltainv <- svD$v %*% diag(1/svD$d) %*% t(svD$u)
    H <- ((4*d*det(Delta)^(1/2))/(2*tr(Deltainv%*%Deltainv) + (tr(Deltainv))^2))^(2/(d+4))*diag(diag(S))*n^(-2/(d+4))
    
    return(H)
}

hns <- function(x, deriv.order=0)
{
    n <- length(x)
    d <- 1
    r <- deriv.order
    h <- (4/(n*(d+2*r+2)))^(1/(d+2*r+4))*sd(x)

    return(h)
}

#######################################################################
## Normal scale G_ns for kernel functional estimators
## scv = flag for SCV bias annihiliation constant
#######################################################################

Gns <- function(r,n,Sigma,scv=FALSE)
{
    d <- ncol(Sigma)
    const <- ifelse(scv,1,2)
    G <- (2/((n*(d+r))))^(2/(d+r+2))*const*Sigma

    return(G)
}

Gns.search <- function(G, f, n=10)
{
    Gstart.vec <- numeric(n)
    for (i in 1:length(Gstart.vec)) 
    { 
        Gstart <- matrix.sqrt(i/length(Gstart.vec)*G)
        Gstart.vec[i] <- f(vech(Gstart))
    }
    i <- which.min(Gstart.vec)/length(Gstart.vec)
    Gstart <- i*G

    return(Gstart)
}

############################################################################
## Normal mixture selector 
############################################################################

Hnm <- function(x, deriv.order=0, G=1:9, subset.ind, mise.flag=FALSE, verbose=FALSE, ...)
{
    if (!requireNamespace("mclust", quietly=TRUE)) stop("Install the mclust package as it is required.", call.=FALSE)
    modelNames <- "VVV" 
    ## set local seed
    current.seed <- .Random.seed
    on.exit( { .Random.seed <<- current.seed } )
    set.seed(8192)

    if (!missing(subset.ind)) nmixt.fit <- mclust::Mclust(x[subset.ind,], G=G, verbose=verbose, modelNames=modelNames, ...)
    else nmixt.fit <- mclust::Mclust(x, G=G, verbose=verbose, modelNames=modelNames, ...)

    if (is.vector(x)) { d <- length(x); n <- 1 } else { d <- ncol(x); n <- nrow(x) } 
    mus <- t(nmixt.fit$parameters$mean)
    Sigmas <- matrix(nmixt.fit$parameters$variance$sigma, byrow=TRUE, ncol=d)
    props <- nmixt.fit$parameters$pro
    if (mise.flag) H.nm <- Hmise.mixt(samp=n, mus=mus, Sigmas=Sigmas, props=props, deriv.order=deriv.order)
    else H.nm <- Hamise.mixt(samp=n, mus=mus, Sigmas=Sigmas, props=props, deriv.order=deriv.order)

    return(H.nm)
}

Hnm.diag <- function(x, deriv.order=0, G=1:9, subset.ind, mise.flag=FALSE, verbose=FALSE, ...)
{
    if (!requireNamespace("mclust", quietly=TRUE)) stop("Install the mclust package as it is required.", call.=FALSE)
    modelNames <- "VVI" 
    if (!missing(subset.ind)) nmixt.fit <- mclust::Mclust(x[subset.ind,], G=G, verbose=verbose, modelNames=modelNames, ...)
    else nmixt.fit <- mclust::Mclust(x, G=G, verbose=verbose, modelNames=modelNames, ...)

    if (is.vector(x)) { d <- length(x); n <- 1 } else { d <- ncol(x); n <- nrow(x) } 
    mus <- t(nmixt.fit$parameters$mean)
    Sigmas <- matrix(nmixt.fit$parameters$variance$sigma, byrow=TRUE, ncol=d)
    props <- nmixt.fit$parameters$pro
    if (mise.flag) H.nm <- Hmise.mixt.diag(samp=n, mus=mus, Sigmas=Sigmas, props=props, deriv.order=deriv.order)
    else H.nm <- Hamise.mixt.diag(samp=n, mus=mus, Sigmas=Sigmas, props=props, deriv.order=deriv.order)

    return(H.nm)
}

hnm <- function(x, deriv.order=0, G=1:9, subset.ind, mise.flag=FALSE, verbose=FALSE, ...)
{
    if (!missing(subset.ind)) nmixt.fit <- mclust::Mclust(x[subset.ind], G=G, verbose=verbose, ...)
    else nmixt.fit <- mclust::Mclust(x, G=G, verbose=verbose, ...)

    mus <- nmixt.fit$parameters$mean
    sigmas <- sqrt(nmixt.fit$parameters$variance$sigma)
    props <- nmixt.fit$parameters$pro
    n <- length(x)
    if (mise.flag) h.nm <- hmise.mixt(samp=n, mus=mus, sigmas=sigmas, props=props, deriv.order=deriv.order)
    else h.nm <- hamise.mixt(samp=n, mus=mus, sigmas=sigmas, props=props, deriv.order=deriv.order)

    return(h.nm)
}