#' AFSSEN
#'
#' It computes important variables and produce smooth estimates of their parameters in a function-on-scalar linear model
#' with sub-Gaussian errors and high-dimensional predictors.
#'
#' @param Y matrix. \code{N} by \code{m} matrix of pointwise observations.
#' @param X matrix. \code{N} by \code{I} design matrix. It has
#' standardized columns.
#' @param T_domain vector. Time domain for evaluation of \code{Y} and generating kernel.
#' Default is \code{T_domain = seq(0,1,m=50)}
#' @param type_kernel string. three possible choices are implemented. \code{gaussian},
#' Defualt is \code{"exponential"}.
#' \code{exponential}, \code{sobolev}.
#' @param param_kernel scalar. Value of the characteristic smoothing parameter of the kernel.
#' It is the \eqn{\sigma} parameter of the Gaussian and the Exponential kernel, as introduced
#' the \eqn{\sigma} parameter of the Sobolev.
#' Defualt is \code{8}.
#' @param thres scalar. Stopping criteria: beta increment threshold
#' \deqn{
#' || \beta^{T} - \beta^(T-1) ||_{H} < thres
#' }
#' Defualt is \code{0.02}.
#' @param number_non_zeros scalar. Stopping Criteria: Kill switch; number of nonzero predictors
#' Defualt is \code{20}.
#' @param ratio_lambda_H scalar. \eqn{\lambda_{Hmax}/\lambda_{Hmin}}
#' Defualt is \code{0.01}.
#' @param number_lambda_H scalar. Generate the number of log-equally spaced \eqn{\lambda_{H}} in \eqn{[\lambda_{Hmin},\lambda_{Hmax}]}.
#' Defualt is \code{100}.
#' @param num_lambda_H_NONad scalar. Number of \eqn{\lambda_H} in non-adaptive step
#' Defualt is \code{50}.
#' @param lambda_H vector. You have option to insert directly a vector of \eqn{\lambda_H}.
#' Defualt is \code{numeric()}.
#' @param lambda_K vector. Vector of \eqn{\lambda_{K}}.
#'
#' @param early_CV binary. 0 or 1 : applying the \code{early_CV_thres} stopping criteria or not.
#' Defualt is \code{0}.
#' @param early_CV_thres scalar. Stopping Criteria: Breaking point in CV plot.
#' \deqn{
#' |CV(h-1,k)-CV(h,k)| / |CV(h-1,k)| < early_CV_thres
#' }
#' Defualt is \code{0.001}.
#' @param max_ite_nadp scalar. Stopping Criteria: Maximum iteration of coordinate descent algorithm in non-adaptive step
#' Defualt is \code{10}.
#' @param max_ite_adp scalar. Stopping Criteria: Maximum iteration of coordinate descent algorithm in adaptive step
#' Defualt is \code{30}.
#' @param max_ite_final scalar. Stopping Criteria: Maximum iteration of coordinate descent algorithm in final step
#' Defualt is \code{50}.
#' @param target_inc binary. Stopping Criteria: 0 or 1 : if target function is increased, stop
#' Defualt is \code{1}.
#' @param proportion_training_set scalar. value in (0,1), the
#' proportion for the training set for the Cross Validation in non-adaptive step
#' Defualt is \code{0.75}.
#' @param fold_ad scalar. Number of fold for using CV in adaptive steps to find optimum \eqn{\lambda_{H}} and \eqn{\lambda_{K}} and then the coefficients estimation.
#' Defualt is \code{10}.
#'
#' @return list containing:
#' \itemize{
#' \item \code{beta : } matrix. final estimation of coefficients.
#' \item \code{beta_no_adaptive : } matrix. estimation of coefficients after non-adaptive step.
#' \item \code{predictors : } vector. final significant predictors.
#' \item \code{predictors_no_adaptive : } vector. significant predictors after non-adaptive step.
#' \item \code{lambda_H_opt : } scalar. optimum \eqn{\lambda_{H}}
#' \item \code{lambda_K_opt : } scalar. optimum \eqn{\lambda_{K}}
#' }
#'
#' @export
#'
#' @import Rcpp MASS caret nloptr kernlab
#'
#' @examples
#' \dontrun{
#' data(simulation)
#' data(SobolevKernel)
#' time <- proc.time()
#' FLAME_estimation <- FLAME()
#' duration <- proc.time()-time
#' duration
#' names(FLAME_estimation)
#' }
#'
AFSSEN <- function(X,Y, T_domain = seq(0, 1, length = 50),
# X (a N*I numerical matrix) N = #observations I = #all_predictors
# Y = Y_ful=XB+E (a N*m matrix of pointwise evaluation!) m = #time_points
type_kernel="exponential",param_kernel=8,
thres=0.02, number_non_zeros=20,
ratio_lambda_H=0.01, number_lambda_H=100, num_lambda_H_NONad=50,
lambda_H=numeric(), lambda_K,
early_CV=0, early_CV_thres=0.001, max_ite_nadp=10,
max_ite_adp=30, max_ite_final=50, target_inc=1,
proportion_training_set=0.75, verbose=FALSE, fold_ad=10)
{
# sourceCpp('src/FLAME_functions_cpp.cpp')
# source("R/covMaterniso.R")
# source("R/generation_kernel.R")
# source("R/generation_kernel.R")
# source("R/sobolev_kernel_generation.R")
# source("R/norm_matrix_H.R")
# source("R/projection_basis.R")
# source("R/Matern_kernel_generation.R")
# source("R/generation_kernel.R")
############################################################################
#
# Part 0: Defining the function for numerically estimate norm beta in H
#
############################################################################
estimation_norm_R <- function(lambda_H, lambda_K, omega, B, tau)
{
## @A: the function we want to optimize in AFSSEN is:
## 1=\sum_{j=1}^{\infty} \dfrac{tau_j < \beta_j , v_j >^2 }{ tau_j \norm{\beta_j}_K + \lambda omega_j}^2
## @A: nloptr is an R package for nonlinear optimization.
## ?@A: why do you optimiza |1-1/sum| instead of |1-sum|?
optimization_function <- function(x, lambda_H, lambda_K, omega, B, tau)
{
numerat <- (B^2) * (tau^2)
denom <- ((tau+lambda_K)*x + lambda_H*omega*tau)^2
# tot <- 1 - 1 / (sum(numerat/denom))
tot <- 1 - sum(numerat/denom)
return(abs(tot))
}
opts= list("algorithm"= "NLOPT_LN_COBYLA", "xtol_rel"=1.0e-16)
## ?@A: why it ended up with semicolon (;)?
ott_model <- nloptr(0, optimization_function, opts=opts, lb=0,
lambda_H=lambda_H,lambda_K=lambda_K, omega=omega, B=B, tau=tau);
return(ott_model$solution)
}
Proc_Time <- function()
{
r=as.numeric(proc.time()[3])
return(r)
}
function_computation <- estimation_norm_R
function_time <- Proc_Time
############################################################################
#
# Part 1: kernel generation
#
############################################################################
#m <- 50 # total number of points
#T_domain <- seq(0, 1, length = m) # time points, length = m
M_integ <- length(T_domain)/diff(range(T_domain))
M <- 50
kernel_here <- generation_kernel(type = type_kernel,parameter = param_kernel,
domain = T_domain, thres = 0.99, return.derivatives = TRUE)
eigenval <- kernel_here$eigenval
eigenvect <- kernel_here$eigenvect
derivatives <- kernel_here$derivatives
Y <- projection_basis(Y, eigenvect, M_integ)
############################################################################
#
# Part 2: RUN non-adaptive version to estimte weights for adaptive step
#
############################################################################
#######################################################
#
# Part 2.1: estimation_first
#
#######################################################
weights = rep(1, dim(X)[2])
# First step: all the weights are set to 1
# first run to identify the possible values of lambda
# (from lambda_max which makes all the predictors set to 0, to the the lambda value which
# number_non_zeros predicotors are different from 0)
# num_lambda_H_NONad = number_lambda_H # NONad= Non adaptive
#num_lambda_NONad <- round(number_lambda/5) # for this first step (the most
# computationally expensive) we can reduce the number of lambda examined.
# Then (in the Second: Adaptive Step)
# we will consider the whole set of lambdas to define the final estimation
#######################
#######################
####################### Non-adaptive
#######################
#######################
if (verbose) print('Non-adaptive step')
subset <- c(rep(2, ceiling(dim(X)[1]*proportion_training_set)),
rep(1, ceiling(dim(X)[1]*(1-proportion_training_set))))
# proportion_training_set is the percentage of the data to use to
# fit the model (2), the remaining part to compute the prediction
# error:left_out (1)
random_groups <- subset[sample(1:dim(X)[1])]
# training and test set
i <- 1
left_out <- which(random_groups==i)
# fitting of the model with the proportion_training_set% of the data and
# computation of the CV error
#sourceCpp("FLAME_functions_cpp.cpp")
eigenvect_for_der=eigenvect[-dim(eigenvect)[1],]
estimation_first_CV <- definition_beta_CV(X[-left_out,], Y[, -left_out],
X[left_out,], Y[, left_out],
eigenval, weights,
function_computation, function_time,
thres, number_non_zeros,
lambda_H,lambda_K,
ratio_lambda_H, num_lambda_H_NONad,
early_CV, early_CV_thres, max_ite_nadp, target_inc,
verbose, base::sum)
#projection_basis, M_integ-1,
#derivatives, eigenvect_for_der, XBd[,left_out])
## @A: estimation_first_CV$error is the \norm{Y_pred - Y_test}_H for different lambda.
## @A: lambda_first_selected is not the optimum lambda. It is the index of the lambda.
## @A: estimation_first_CV$error is a matrix (#lambda_H)*(#lambda_K)
lambda_H_first_selected <- which(estimation_first_CV$error==min(estimation_first_CV$error[estimation_first_CV$error>0]) , arr.ind = TRUE)[1] # optimum lambda_H
lambda_K_first_selected <- which(estimation_first_CV$error==min(estimation_first_CV$error[estimation_first_CV$error>0]) , arr.ind = TRUE)[2] # optimum lambda_K
#######################################################
#
# Part 2.3: estimation_first_definite, weights_new and
#
#######################################################
if (length(estimation_first_CV$Pred_opt)==0)
{
print('No significant predictors indentified.')
result <- list(beta=NULL,
beta_no_adaptive=NULL,
predictors=NULL,
predictors_no_adaptive=NULL)
return(result)
}else
{
if(length(estimation_first_CV$Pred_opt)==1)
{
## @A: In the following matrices, we have just one column because the length(estimation_first_definite$Pred)==1
beta_selected <- matrix(estimation_first_CV$Beta_opt[,estimation_first_CV$Pred_opt],
length(estimation_first_CV$Beta_opt[,estimation_first_CV$Pred_opt]), 1)
X2_data <- matrix(X[,estimation_first_CV$Pred_opt], length(X[,estimation_first_CV$Pred_opt]),1)
}
else{
beta_selected <- estimation_first_CV$Beta_opt[,estimation_first_CV$Pred_opt]
X2_data <- X[,estimation_first_CV$Pred_opt]
}
}
# defintion of the weights
weights_new <- 1/norm_matrix_H(beta_selected)
## @A: For adaptive step, we just consider the non zero predictors and delete the zero ones.
## you can see the length of weights_new can be less than the initial weights.
############################################################################
#
# Part 3: RUN adaptive step
#
############################################################################
#######################################################
#
# Part 3.1: estimation_second
#
#######################################################
if (verbose) print('Adaptive step')
flds <- createFolds(1:dim(X)[1], k = fold_ad, list = TRUE, returnTrain = FALSE)
SUM_MAT_CV = matrix(0,number_lambda_H,length(lambda_K))
SUM_MAT_der = matrix(0,number_lambda_H,length(lambda_K))
#error_der = rep(0,fold_ad)
for( f in 1:fold_ad){
left_out <- flds[[f]]
estimation_second_CV <- definition_beta_CV(as.matrix(X2_data[-left_out,]), Y[, -left_out],
as.matrix(X2_data[left_out,]), Y[, left_out],
eigenval, weights_new,
function_computation, function_time,
thres, number_non_zeros,
lambda_H, lambda_K,
ratio_lambda_H, number_lambda_H,
early_CV, early_CV_thres, max_ite_adp,target_inc,
verbose, base::sum)
#projection_basis, M_integ-1,
#derivatives, eigenvect_for_der, XBd[,left_out])
A <- estimation_second_CV$error
A [A ==0] <- Inf
SUM_MAT_CV = SUM_MAT_CV + A
# B <- estimation_second_CV$error_der
#
# B [B ==0] <- Inf
#
# SUM_MAT_der = SUM_MAT_der + B
# # # #derivatives
# B_pred_der_time <- t(derivatives %*%estimation_second_CV$Beta_opt[,estimation_second_CV$Pred_opt])
# Y_true_der_time <- X[,estimation_second_CV$Pred_opt]%*%B_pred_der_time
# Y_pred_der <- projection_basis(Y_true_der_time, eigenvect[-dim(eigenvect)[1],], M_integ)
#
# diff_der= Y_true_der - Y_pred_der
#
# error_der[f]=sqrt(sum(apply(diff_der,2,function(x) sum(x^2))))
print(paste("fold",f," ,"))
}
Mean_Error_CV=as.matrix(SUM_MAT_CV/fold_ad)
# Mean_Error_der=as.matrix(SUM_MAT_der/fold_ad)
ind_min_cv = as.vector(which(Mean_Error_CV==min(Mean_Error_CV[which(Mean_Error_CV>0)]) , arr.ind = TRUE)[1,])
min_pred_CV=min(Mean_Error_CV)
# if(length(lambda_K)==1)
# corres_pred_der=Mean_Error_der[ind_min_cv[1]]
# else
# corres_pred_der=Mean_Error_der[ind_min_cv]
#
# min_pred_der=min(Mean_Error_der)
lambda_K_opt=lambda_K[ind_min_cv[2]]
lambda_H_opt=estimation_second_CV$lambda_H[ind_min_cv[1]]
###################################
#
# Run for the whole data with best best lambda_H and lambda_K
#
###################################
if (verbose) print('Adaptive step for optimum parameters')
estimation_second <- definition_beta(as.matrix(X2_data), Y,
eigenval, weights_new,
function_computation, function_time,
thres, number_non_zeros,
lambda_H_opt, lambda_K_opt,
ratio_lambda_H, number_lambda_H,
verbose,base::sum)
#######################################################
#
# Part 3.3: estimation_second_definite and finding lamda which implies minimum CV
#
#######################################################
#######################################################
#
# Part 3.3: The output values
#
#######################################################
predictors_2=estimation_second$Pred # final set of
# predictors different from 0 (among the ones isolated in the
# First: Non-Adaptive step)
beta_2=estimation_second$Beta[,predictors_2] # estimated betas,
# it is the matrix of the coefficients of the basis expansion
# of the betas (with respect to the basis defined by the eigenvectors
# of the kernel)
## @A: Pay attention: predictors_2 are the labels of nonzro predictors among the
## ones isolated in the First: Non-Adaptive step. So for finding the actual labels among the
## whole predictors we need to define "predictor_def" as follows
predictor_def <- estimation_first_CV$Pred_opt[predictors_2] # index of
# the non-zero predictors computed in the estimation
beta_def <- matrix(0, dim(estimation_first_CV$Beta_opt)[1],
dim(estimation_first_CV$Beta_opt)[2])
## @A: "beta_def" is a J*I matrix which the nonzero ones estimated by estimation_second and
## rest of the columns are zero.
lambda_H=estimation_second_CV$lambda_H
lambda_K=estimation_second_CV$lambda_K
beta_def[, predictor_def] <- beta_2 # final matrix of the estimated betas
#XBhat=(beta_2)%*%t(X[,predictor_def])
# # #derivatives
#Bhat_true_der_time <- t(kernel_here$derivatives %*% beta_2)
#Yhat_true_der_time <- X[,predictor_def]%*%Bhat_true_der_time
#XBhatd <- projection_basis(Yhat_true_der_time, eigenvect[-dim(eigenvect)[1],], M_integ-1)
#pred_error=sum(norm_matrix_H(XB-XBhat))
#pred_error_der=sum(norm_matrix_H(XBd-XBhatd))
# (still as coefficients of the kernel basis)
result <- list(beta=beta_def,
beta_no_adaptive=estimation_first_CV$Beta_opt,
predictors=predictor_def,
predictors_no_adaptive=estimation_first_CV$Pred_opt,
Mean_Error_CV=Mean_Error_CV,
min_pred_CV=min_pred_CV,
index_lambdas=ind_min_cv,
lambda_H=lambda_H,
lambda_K=lambda_K,
lambda_K_opt=estimation_second_CV$lambda_k_opt,
lambda_H_opt=estimation_second_CV$lambda_H_opt,
L_matrix_nonad=estimation_first_CV$L_matrix,
L_matrix_ad=estimation_second_CV$L_matrix,
Mat_target_inc=estimation_second_CV$Mat_target_inc,
vec_number_lambda_H_computed_adp=estimation_second_CV$vec_number_lambda_H_computed,
vec_break_in_number_pred_adp=estimation_second_CV$vec_break_in_number_pred,
vec_break_in_early_cv_adp=estimation_second_CV$vec_break_in_early_cv,
vec_norm_H_diff_beta_inc_nadp=estimation_first_CV$vec_norm_H_diff_beta_inc,
vec_norm_H_diff_beta_inc_adp=estimation_second_CV$vec_norm_H_diff_beta_inc,
X=X,
Y=Y,
#beta_time=beta,
#XB=XB,
#XBd=XBd,
#XBhat=XBhat,
#XBhatd=XBhatd,
#pred_error=pred_error,
#pred_error_der=pred_error_der,
kernel_vec_der=derivatives,
kernel_vec=eigenvect,
kernel_val=eigenval,
T_domain=T_domain)
return(result)
}
###############
###############
############### Test
###############
###############
#
# N=200
# I=200
# I0=10
# nu_beta=2.5
# range=1/4
# variance=1 # for the rough beta in Matern
# nu_eps = 1.5
# range_eps=1/4 # for epsilon in Matern
#
#
# m <- 110 # total number of points
# T_domain <- seq(0, 1, length = m) # time points, length = m
# M_integ <- length(T_domain)/diff(range(T_domain))
#
# # # # # # #######
# # # #
# # # defintion of the design matrix X, in this specific case the
# # # covariance matrix C is the identity matrix
# mu_x <- rep(0, I)
# C <- diag(I)
# X <- mvrnorm(n=N, mu=mu_x, Sigma=C) # X is a N*I matrix
# X <- scale(X) # normalization
#
# # # # # # #######
# # # # #
# # # defintion of the coefficients
#
# hyp <- c(log(range), log(variance)/2) # set of parameters for the # Matern Covariance operator of beta
# mu_beta <- rep(0,m) # mean of the beta
# Sig_beta <- covMaterniso(nu_beta, rho = range, sigma = sqrt(variance) , T_domain)
# beta <- mvrnorm(mu=mu_beta, Sigma=Sig_beta, n=I0) # beta is a I0*m matrix. Each row is made by a matern process.
#
# # # # # # # I0 significant coefficients
# # # # # # # defintion of the random errors
#
# mu_eps <- rep(0, m)
# Sig_eps <- covMaterniso(nu_eps, rho = range_eps, sigma = sqrt(variance), T_domain)
# eps <- mvrnorm(mu=mu_eps, Sigma=Sig_eps, n=N) # eps is a N*m matrix
#
# # # # # # random errors
# # # #
# I_X <- sort(sample(1:I, I0)) # index of the I0 significant predictors
#
# Y_true <- X[,I_X]%*%beta
# Y_full <- X[,I_X]%*%beta + eps # Y_n observations (pointwise evaluation)
#
# M <- 50
#
# kernel_here <- generation_kernel(type = "exponential",parameter = 8,
# domain = T_domain, thres = 0.99, return.derivatives = TRUE)
#
# eigenval <- kernel_here$eigenval
# eigenvect <- kernel_here$eigenvect
# derivatives <- kernel_here$derivatives
#
#
# # # # # preojection on the kernel basis of y and beta
# Y_matrix <- projection_basis(Y_full, eigenvect, M_integ)
# XB <- projection_basis(Y_true, eigenvect, M_integ)
#
#
# B_true <- projection_basis(beta, eigenvect, M_integ)
# matrix_beta_true_full <- matrix(0, dim(B_true)[1], I)
# matrix_beta_true_full[,I_X] <- B_true
#
# # # #derivatives
# B_true_der_time <- t(kernel_here$derivatives %*% B_true)
# Y_true_der_time <- X[,I_X]%*%B_true_der_time
# XBd <- projection_basis(Y_true_der_time, eigenvect[-dim(eigenvect)[1],], M_integ-1)
#
# Y <- Y_matrix
#
# ##############
#
# Y_total <- Y_full
# X_total <- X
#
# ##############
# # # #
# # #
# # #
# # #
# # #
# Output <- AFSSEN(X=X_total,Y=Y_total,T_domain = seq(0, 1, length = 50),
# # X (a N*I numerical matrix) N = #observations I = #all_predictors
# # Y = Y_ful=XB+E (a N*m matrix of pointwise evaluation!) m = #time_points
# type_kernel="exponential",param_kernel=8,
# thres=0.02, number_non_zeros=20,
# ratio_lambda_H=0.001, number_lambda_H=100, num_lambda_H_NONad=50,
# lambda_H=numeric(), lambda_K=c(1,0.1,0.01),
# early_CV=0, early_CV_thres=0.01, max_ite_nadp=5, max_ite_adp=10, max_ite_final=10, target_inc=1,
# proportion_training_set=0.75, verbose=FALSE, fold_ad=2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.