### Pseudo-poisson informtion criterion (PPIC) for selecting the
### number of FPCs
PPIC <- function(K, f_locpoly, t, N, f_mu, G.eigen_v, xi, xgrid, delta) {
if (K == 1) {
tmp <- f_mu + outer(G.eigen_v[, 1], xi[1, ]) / delta # density functions
} else {
tmp <- f_mu + (G.eigen_v[, 1:K] / delta) %*% xi[1:K, ] # density functions
}
## make adjustments to get valid density functions (nonnegative+integrate to 1)
tmp <- apply(tmp, 2, function(x) {
x[x < 0] <- 0 # non-negative
x <- x / sum(x) # integrate to delta^2
return(x)
})
tmp <- tmp / delta^2
n <- length(N)
tmp <- lapply(seq(1, n), function(i) approx(xgrid, tmp[, i], t[(sum(N[1:i]) - N[i] + 1):sum(N[1:i])])$y)
f_locpoly <- unlist(f_locpoly)
if (sum(f_locpoly <= 0) > 0) f_locpoly[f_locpoly < 1e-8] <- 1e-8
tmp <- unlist(tmp)
tmp[tmp < 1e-8] <- 1e-8
D_K <- 2 * sum(f_locpoly * log(f_locpoly / tmp) - (f_locpoly - tmp)) + 2 * K
return(D_K)
}
### local linear regression
den_locpoly <- function(partition, t, N) {
n <- length(N)
l <- length(partition)
partition_x <- (partition[-1] + partition[-l]) / 2
cumsumN <- c(0, cumsum(N))
f_locpoly <- lapply(seq(1, n), function(i) {
a <- (cumsumN[i] + 1) : cumsumN[i + 1]
### the histogram density estimate for the ith subject
f_H <- hist(t[a], breaks = partition, plot = FALSE)$counts / N[i] / diff(partition)
### leave one out cross validation, local linear regression
GCV_loclinear <- function(hh) {
D <- sapply(partition_x, function(x) partition_x - x)
W <- dnorm(D / hh)
S0 <- apply(W, 1, sum)
S1 <- apply(W * D, 1, sum)
S2 <- apply(W * D^2, 1, sum)
L <- W *
{
t(VTM(S2, l - 1)) - D * t(VTM(S1, l - 1))
} / t(VTM(S0 * S2 - S1^2, l - 1))
f_oneout <- L %*% f_H
return(sum({(f_H - f_oneout) / (1 - mean(diag(L)))}^2))
}
### find bandwidth that minimize generalized cross validation
# here set lower limit to be the min(diff(partition_x)) to avoid error
hh <- optimize(GCV_loclinear, lower = min(diff(partition_x)), upper = IQR(t) * 2)$minimum
### use the obtained GCV bandwidth to get local linear regression estimates
D <- t(sapply(t[a], function(x) partition_x - x))
W <- dnorm(D / hh)
S0 <- apply(W, 1, sum)
S1 <- apply(W * D, 1, sum)
S2 <- apply(W * D^2, 1, sum)
L <- W *
{
t(VTM(S2, l - 1)) - D * t(VTM(S1, l - 1))
} / t(VTM(S0 * S2 - S1^2, l - 1))
return(L %*% f_H)
})
}
### local linear regression
den_locpoly2 <- function(t, N) {
n <- length(N)
nbreak <- numeric(n)
cumsumN <- c(0, cumsum(N))
f_locpoly <- lapply(seq(1, n), function(i) {
a <- (cumsumN[i] + 1) : cumsumN[i + 1]
### the histogram density estimate for the ith subject
f_H <- hist(t[a], plot = FALSE)
partition <- f_H$breaks
nbreak[i] <- length(partition)
f_H <- f_H$counts / N[i] / diff(partition)
partition_x <- {
partition[-1] + partition[-length(partition)]
} / 2
### leave one out cross validation, local linear regression
GCV_loclinear <- function(hh) {
D <- sapply(partition_x, function(x) partition_x - x)
W <- dnorm(D / hh)
S0 <- apply(W, 1, sum)
S1 <- apply(W * D, 1, sum)
S2 <- apply(W * D^2, 1, sum)
L <- W *
{
t(VTM(S2, nbreak[i] - 1)) - D * t(VTM(S1, nbreak[i] - 1))
} / t(VTM(S0 * S2 - S1^2, nbreak[i] - 1))
f_oneout <- L %*% f_H
return(sum({( f_H - f_oneout) / (1 - mean(diag(L)))}^2))
}
### find bandwidth that minimize generalized cross validation
# here set lower limit to be the min(diff(partition_x)) to avoid error
hh <- optimize(GCV_loclinear, lower = min(diff(partition_x)), upper = IQR(t[a]) * 2)$minimum
### use the obtained GCV bandwidth to get local linear regression estimates
D <- t(sapply(t[a], function(x) partition_x - x))
W <- dnorm(D / hh)
S0 <- apply(W, 1, sum)
S1 <- apply(W * D, 1, sum)
S2 <- apply(W * D^2, 1, sum)
L <- W *
{
t(VTM(S2, nbreak[i] - 1)) - D * t(VTM(S1, nbreak[i] - 1))
} / t(VTM(S0 * S2 - S1^2, nbreak[i] - 1))
return(L %*% f_H)
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.