############################################
### Simulation data generation functions
### Delaigle et al. (2020) simulation
############################################
#' Generate partially observed functional data from Delaigle et al.(2021).
#'
#' Partially observed functional data is generated with 51 regular grids by using the setting of Delaigle et al.(2021) with model 2.
#'
#' @param n a number of curves
#' @param type the type of generated data. "partial" means the option for partially observed data, "snippet" is short fragmented data, and "dense" means the fully observed curves.
#' @param out.prop a proportion of outlying curves of total n curves. Only used for dist = "normal".
#' @param out.type a outlier type, 1~3 are supported. Only used for dist = "normal".
#' @param dist a distribution which the data is generated. "normal"(Normal distribution) and "tdist"(t-distribution) are supported. If dist = "tdist", the option of \code{out.prop} and \code{out.type} are ignored.
#' @param noise a numeric value which is added random gaussian noises. Default is 0(No random noise).
#' @param d a parameter for missingness when \code{type} is "partial" (See Kraus(2015))
#' @param f a parameter for missingness when \code{type} is "partial" (See Kraus(2015))
#'
#' @return a list contatining as follows:
#' \item{Ly}{a list of n vectors containing the observed values for each individual.}
#' \item{Lt}{a list of n vectors containing the observation time points for each individual corresponding to \code{Ly}}
#' \item{out.ind}{a vector containing outlier index. 0 is non-outlier and 1 is the outlier.}
#' \item{x.full}{a n x 51 dense matrix with n observations per 51 timepoints before making partially observed.}
#'
#' @examples
#' set.seed(100)
#' x.list <- sim_delaigle(n = 100,
#' type = "partial",
#' out.prop = 0.2,
#' dist = "normal")
#' x <- list2matrix(x.list)
#' matplot(t(x), type = "l")
#'
#' @references
#' \cite{Delaigle, A., Hall, P., Huang, W., & Kneip, A. (2021). Estimating the covariance of fragmented and other related types of functional data. Journal of the American Statistical Association, 116(535), 1383-1401.}
#' \cite{Kraus, D. (2015). Components and completion of partially observed functional data. Journal of the Royal Statistical Society: Series B: Statistical Methodology, 777-801.}
#'
#' @export
sim_delaigle <- function(n = 100,
type = c("partial","snippet","dense"),
out.prop = 0.2,
out.type = 1,
dist = "normal",
noise = 0,
d = 1.4,
f = 0.2) {
gr <- seq(0, 1, length.out = 51) # equispaced points
# generate dense curves
m <- length(gr) # legnth of observed grids
cov_sim <- get_delaigle_cov(gr, model = 2)
if (dist == 'normal') {
y <- mvtnorm::rmvnorm(n, rep(0, m), cov_sim)
} else if (dist == 'tdist') {
out.prop <- 0 # for heavy-tailed distrubution, we do not set outlier index
y <- LaplacesDemon::rmvt(n = n,
mu = rep(0, m),
S = lqmm::make.positive.definite(cov_sim),
df = 3)
} else if (dist == 'laplace') {
out.prop <- 0 # for heavy-tailed distrubution, we do not set outlier index
y <- LaplacesDemon::rmvl(n = n,
mu = rep(0, m),
Sigma = lqmm::make.positive.definite(cov_sim))
}
# random noise
if (noise > 0) {
y <- y + matrix(stats::rnorm(n*m, 0, sqrt(noise)), n, m)
}
x <- list()
x$Ly <- lapply(1:n, function(i) { y[i, ] })
x$Lt <- lapply(1:n, function(i) { gr })
x$out.ind <- rep(0, n) # indicator of outlier
x.full <- t(sapply(x$Ly, cbind)) # matrix containing the fully observed data
# Check type option
if (type == "dense") { # Nothing do
x$x.full <- x.full
} else if (type == "partial") { # generate partially obseved data
# generate observation periods (Kraus(2015) setting)
# curve 1 will be missing on (.4,.7), other curves on random subsets
x.obs <- rbind((gr <= .4) | (gr >= .7),
simul.obs(n = n-1, grid = gr,
d = d, f = f)) # TRUE if observed
# remove missing periods
x.partial <- x.full
x.partial[!x.obs] <- NA
x <- list(Ly = apply(x.partial, 1, function(y){ y[!is.na(y)] }),
Lt = apply(x.obs, 1, function(y){ gr[y] }),
out.ind = x$out.ind,
x.full = x.full)
} else if (type == "snippet") { # generate functional snippets
# Lin & Wang(2020) setting
len.frag = c(0.1, 0.3) # length of domain for each curves
a_l <- len.frag[1]
b_l <- len.frag[2]
Ly <- list()
Lt <- list()
for (n_i in 1:n) {
l_i <- stats::runif(1, a_l, b_l)
M_i <- stats::runif(1, a_l/2, 1-a_l/2)
A_i <- max(0, M_i-l_i/2)
B_i <- min(1, M_i+l_i/2)
t <- gr[which(gr >= A_i & gr <= B_i)] # observed grids
m <- length(t) # legnth of observed grids
idx <- which(gr %in% t)
Ly[[n_i]] <- x$Ly[[n_i]][idx]
Lt[[n_i]] <- t
}
x <- list(Ly = Ly,
Lt = Lt,
out.ind = x$out.ind,
x.full = x.full)
} else {
stop(paste(type, "is not an appropriate argument of type"))
}
# no outliers
if (out.prop == 0) {
return(x)
}
# generate outlier curves
n.outlier <- ceiling(n*out.prop) # number of outliers
if (out.type %in% 1:3) {
x.outlier <- list(Ly = x$Ly[(n-n.outlier+1):n],
Lt = x$Lt[(n-n.outlier+1):n])
x.outlier <- make_outlier(x.outlier, out.type = out.type)
x$Ly[(n-n.outlier+1):n] <- x.outlier$Ly
x$out.ind[(n-n.outlier+1):n] <- 1 # outlier indicator
# x$Lt[(n-n.outlier+1):n] <- x.outlier$Lt
} else {
stop(paste(out.type, "is not correct value of argument out.type! Just integer value between 1~3."))
}
return(x)
}
### Generate outlying curves
make_outlier <- function(x, out.type = 1) {
n <- length(x$Lt) # number of outlying curves
d <- 0.3
sigma.exp <- 1
for (k in 1:n) {
t <- x$Lt[[k]]
m <- length(t) # length of time points
tmp.mat <- matrix(NA, m, m)
for (j in 1:m){
tmp.mat[j, ] <- abs(t - t[j])
}
Sigma <- exp(-tmp.mat/d) * sigma.exp^2
mu <- rep(0, m)
I <- matrix(0, m, m)
diag(I) <- rep(1, m)
Sig_norm <- matrix(0, m, m)
diag(Sig_norm) <- rep(100, m)
if (out.type == 1) {
err.out <- LaplacesDemon::rmvt(1, mu, I, df = 3) * LaplacesDemon::rmvn(1, rep(2, m), Sig_norm) # t with df=3
} else if (out.type == 2) {
err.out <- LaplacesDemon::rmvc(1, mu, I) # cauchy
} else if (out.type == 3) {
err.out <- LaplacesDemon::rmvc(1, mu, Sigma) # cauchy
}
# x_i <- rmvn(1, mu, Sigma) * 2 + err.out
x_i <- err.out
x$Ly[[k]] <- as.numeric(x_i)
}
return(x)
}
### Get covariance function for simulation
# model = 1~2 avaliable (In the paper, 1~4 models are shown)
get_K <- function(s, t, model = 2) {
if (model == 1) {
K <- sqrt(5)*(6*t^2-6*t+1) * sqrt(5)*(6*s^2-6*s+1) +
0.8 * sqrt(2)*log(t+0.5)*ifelse(t <= 0.5, 1, 0) * sqrt(2)*log(s+0.5)*ifelse(s <= 0.5, 1, 0) +
0.3 * sqrt(22)*(252*t^5-630*t^4+560*t^3-210*t^2+30*t-1)*ifelse(t > 0.5, 1, 0) *
sqrt(22)*(252*s^5-630*s^4+560*s^3-210*s^2+30*s-1)*ifelse(s > 0.5, 1, 0)
} else if (model == 2) {
K <- 1 + 0.5*(2*t-1)*(2*s-1)*3 + 0.5^2*(6*t^2-6*t+1)*(6*s^2-6*s+1)*5 +
0.5^3*(20*t^3-30*t^2+12*t-1)*(20*s^3-30*s^2+12*s-1)*7
}else if (model == 3) {
K <- 2*sin(2*1*pi*t)*sin(2*1*pi*s)+ (2^{-2})*2*cos(2*1*pi*t)*cos(2*1*pi*s)
+ (3^{-2})* 2*sin(2*2*pi*t)*sin(2*2*pi*s)+ 2*cos(2*2*pi*t)*cos(2*2*pi*s)
+(4^{-2})* 2*sin(2*2*pi*t)*sin(2*2*pi*s)+ 2*cos(2*2*pi*t)*cos(2*2*pi*s)
+(5^{-2})* 2*sin(2*3*pi*t)*sin(2*3*pi*s)+ 2*cos(2*3*pi*t)*cos(2*3*pi*s)
+(6^{-2})* 2*sin(2*3*pi*t)*sin(2*3*pi*s)+ 2*cos(2*3*pi*t)*cos(2*3*pi*s)
}
return(K)
}
#' Get True eigenfunction for simulation in Delaigle et al.(2021).
#'
#' True eigenfunctions are obtained for given grids from the setting of Delaigle et al.(2021) with model 2.
#'
#' @param grid a vector containing the observed timepoints.
#' @param model The model option for Delaigle et al.(2021).
#'
#' @return a p x k matrix containing the eigenfunctions where p is the length of grids and k is the number of true components.
#'
#' @examples
#' gr <- seq(0, 1, length.out = 51)
#' eig.true <- get_delaigle_eigen(gr, model = 2)
#' matplot(eig.true, type = "l")
#'
#' @references
#' \cite{Delaigle, A., Hall, P., Huang, W., & Kneip, A. (2021). Estimating the covariance of fragmented and other related types of functional data. Journal of the American Statistical Association, 116(535), 1383-1401.}
#'
#' @export
get_delaigle_eigen <- function(grid, model = 2) {
t <- grid
if (model == 1) {
eig_ftn <- matrix(0, length(t), 3)
eig_ftn[, 1] <-sqrt(5)*(6*t^2-6*t+1)
eig_ftn[, 2] <- sqrt(2)*log(t+0.5)*ifelse(t <= 0.5, 1, 0)
eig_ftn[, 3] <- sqrt(22)*(252*t^5-630*t^4+560*t^3-210*t^2+30*t-1)*ifelse(t > 0.5, 1, 0)
} else if (model == 2) {
eig_ftn <- matrix(0, length(t), 4)
eig_ftn[, 1] <- rep(1, length(t))
eig_ftn[, 2] <- (2*t-1)*sqrt(3)
eig_ftn[, 3] <- (6*t^2-6*t+1)*sqrt(5)
eig_ftn[, 4] <- (20*t^3-30*t^2+12*t-1)*sqrt(7)
}else if (model == 3) {
eig_ftn <- matrix(0, length(t), 6)
k=1;eig_ftn[, 1] <- sqrt(2)*sin(2*k*pi*t)
eig_ftn[, 2] <- sqrt(2)*cos(2*k*pi*t)
k=2;eig_ftn[, 3] <- sqrt(2)*sin(2*k*pi*t)
eig_ftn[, 4] <- sqrt(2)*cos(2*k*pi*t)
k=3;eig_ftn[, 5] <- sqrt(2)*sin(2*k*pi*t)
eig_ftn[, 6] <- sqrt(2)*cos(2*k*pi*t)
}
return(eig_ftn)
}
#' Get true covariance function from Delaigle et al.(2021).
#'
#' True covariance function is obtained for given grids from the setting of Delaigle et al.(2021) with model 2.
#'
#' @param grid a vector containing the observed timepoints.
#' @param model The model option for Delaigle et al.(2021).
#'
#' @return a covariance matrix with the dimension, length of grids.
#'
#' @examples
#' gr <- seq(0, 1, length.out = 51)
#' cov.true <- get_delaigle_cov(gr, model = 2)
#' library(GA)
#' persp3D(gr, gr, cov.true,
#' theta = -70, phi = 30, expand = 1)
#'
#' @references
#' \cite{Delaigle, A., Hall, P., Huang, W., & Kneip, A. (2021). Estimating the covariance of fragmented and other related types of functional data. Journal of the American Statistical Association, 116(535), 1383-1401.}
#'
#' @export
get_delaigle_cov <- function(grid, model = 2) {
m <- length(grid) # legnth of observed grids
cov_sim <- matrix(NA, m, m)
for (i in 1:m) {
for (j in 1:m) {
# If upper triangular than compute, else substitute transposed value
if (i <= j) {
cov_sim[i, j] <- get_K(grid[i], grid[j], model = model)
} else {
cov_sim[i, j] <- cov_sim[j, i]
}
}
}
return(cov_sim)
}
###################################
### Functions from Kraus (2015)
###################################
### Functions for partially observed case
norm.randcoef = function(n) stats::rnorm(n,0,1)
unif.randcoef = function(n) stats::runif(n,-1,1)*sqrt(3)
t5.randcoef = function(n) stats::rt(n,5)/sqrt(5/3)
simul.obs <- function(n = 100, grid = seq(0, 1, len = 200), d = 1.4, f = .2) {
out = matrix(TRUE,n,length(grid))
for (i in 1:n) {
cen = d*sqrt(stats::runif(1))
e = f*stats::runif(1)
out[i,(cen-e<grid)&(grid<cen+e)] = FALSE # missing interval = (cen-u,cen+u)
}
out
}
# ### get design grids
# get_ind_inter <- function(data.list) {
# gr <- data.list$gr
# tt <- lapply(data.list$x$t, function(t) {
# val <- cbind(rep(t, length(t)),
# rep(t, each = length(t)))
# ind <- cbind(rep(which(gr %in% val[, 1]), length(t)),
# rep(which(gr %in% val[, 2]), each = length(t)))
# return(ind)
# })
# tt <- do.call("rbind", tt) # rbind the argument(matrix type) in list
# tt <- unique(tt)
#
# return(tt)
# }
### extrapolation parts of covariance
cov_extra <- function(cov, ind) {
cov[ind] <- 0
return(cov)
}
### intrapolation parts of covariance
cov_inter <- function(cov, ind) {
cov_ext <- cov_extra(cov, ind)
cov <- cov - cov_ext
return(cov)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.