#' SSM function
#' This function is used to ...
#' @param data_ss contains all information, and only selected variables in formula_var enters the statespace model
#' @param formula_var select variables from \code{data_ss} into the statespace model
#' @param ss_param_temp a list of parameters to set up state-space model
#' \itemize{
#' \item{\code{m0}: }{initial values for states}
#' \item{\code{C0}: }{initial values for variance of states}
#' \item{\code{inits}: }{initial values for the estimating of all NA terms, via maximizing likelihood}
#' \item{\code{AR1_coeffi}: }{variables, whose coefficient is a AR(1) process; if none, then is NULL}
#' \item{\code{rw_coeffi}: }{variables, whose coefficient is a random walk process;if none, then is NULL}
#' \item{\code{w_cp_param}: }{variables, whose coefficients are periodic fixed (may shift to other levels over time, but fixed within periods)}
#' \item{\code{v_cp_param}: }{information about periodic observational variance V (may decrease or increase over time, but fixed within periods)}
#' }
#' @param max_iteration control for the convergence of changepoints, a positive integer
#' @param cpt_learning_param a list of variable for change point learning
#' \itemize{
#' \item{\code{cpt_method}: }{either "mean" or "meanvar"}
#' \item{\code{burnin}: }{a positive number in (0,1)}
#' \item{\code{mergeband}: }{a positive integer}
#' \item{\code{AR1_coeffi}: }{variables, whose coefficient is a AR(1) process; if none, then is NULL}
#' \item{\code{rw_coeffi}: }{variables, whose coefficient is a random walk process;if none, then is NULL}
#' \item{\code{w_cp_param}: }{variables, whose coefficients are periodic fixed (may shift to other levels over time, but fixed within periods)}
#' \item{\code{v_cp_param}: }{information about periodic observational variance V (may decrease or increase over time, but fixed within periods)}
#' }
#'
#' @param dlm_option using kalman filter or smoothing
#' @param printFlag print data while processing
#'
#' @return
#' @export
#' @importFrom stats rnorm
#' @importFrom utils head
#' @importFrom MASS mvrnorm
#' @importFrom crayon red
#' @importFrom crayon blue
#' @import changepoint dlm dplyr tidyverse imputeTS
run.SSM_unanimous_cpts=function(data_ss,formula_var,ss_param_temp,max_iteration=100,
cpt_learning_param=list(cpt_method="mean",burnin=1/10,mergeband=20,convergence_cri=10),
dlm_option="smooth",
printFlag=T){
# !! Please read this paragraph first !!
# Limitation 1: this current version of function has only tested on models, no more complicated than y ~ y_1 + x + x_1 + c
# for a continuous y, a binary/continuous x, and a continuous c,
# other scenarios, including (i) interactions and (ii) binary y and c, are not allowed and haven't been tested.
# Limitation 2: this current version of function only allow for simutaniously learning of the shared changepoints for w_cpt_param.
# Thus, it only works for (i) a initial collection of changepoints are given for a collection of variables,
# those changpoints are fixed, and no more updating
# (ii) a initial collection of changepoints are given for a collection of variables,
# but those changepoints are not for sure, and need updating
# (iii) no initial guess is given in terms of changepoints, but only how many
# the changepoints learnt need to be the same across all variables, whose coefficient shifts levels
# Working flowchart for [changepoints]
# (i) if changepoints are given in <changepoints> and <fixed_cpts>=T for all variabels in [w_cp_param]
# -> 1) no initailization
# 2) no estimation for the location of changepoints
# 3) no iteration until convergence for changepoints
# we allow changepoints differ for each variables, when they are all given in advance in this case
# (ii) if changepoints are given in <changepoints> and <fixed_cpts>=F for all variabels in [w_cp_param]
# -> 1) no initailization
# 2) estimate the location of changepoints for those variables with fixed_cpts=F
# 3) update changepoints via iteration, until convergence achieved for those variables with fixed_cpts=F
# changepoints should be same for variables for with fixed_cpts=F
# (iii) if changepoints are not given and thus <fixed_cpts> do not exist in [w_cp_param]
# -> 1) initialization
# 2) require estimation for the location of changepoints
# 3) require iteration until convergence for changepoints
# we requirechangepoints should be same across all variables with shifed levels
# Otherwise, all other cases are not permitted for the moment, including
# -> [not allowed] 1) changepoints are partially given
# -> [not allowed] 2) <fixed_cpts> are partially T
# Limitation 3: currently the initiation of changepoints parts miss the update of data with interaction between y_1 and x
# Limitation 4[to be changed]: simple and no iterative "StructTS" imputation for learning cpts in V, only for W
# estimated_cpts and iter in the return result only reflects those for W, not for V
# Explanation for parameters:
# <data_ss>: contains all information, and only selected variables in formula_var enters the statespace model
# <formula_var>: select variables from <data_ss> into the statespace model
# <ss_param>:
# <m0>: initial values for states
# <C0>: initial values for variance of states
# <inits>: initial values for the estimating of all NA terms, via maximizing likelihood
# <AR1_coeffi>: variables, whose coefficient is a AR(1) process;
# if none, then is NULL
# <rw_coeffi>: variables, whose coefficient is a random walk process;
# if none, then is NULL
# <w_cp_param>: variables, whose coefficients are periodic fixed (may shift to other levels over time, but fixed within periods)
# [structure] a list of lists, containing <variable>, <segments>, <changepoints>, <fixed_cpts> for each variable whose coefficient level shifts to different values
# - <variable> the name of the variable [must exist]
# - <segments> how many segments of constant coefficient [must exist]
# - <changepoints> the corresponding changepoints for the separated segments
# [note]: <changepoints> can be learnt from <segments>, or directly given
# - <fixed_cpts>: only exist when <changepoints> exists
# if none, then is NULL
# [Requirement] <changepoints> + <fixed_cpts> exist either for all variables or for none -> [may be futher altered]
# [Requirement] when <changepoints> is estimated, all changepoints should be the same across different variables -> [may be futher altered]
# [Requirement] when <changepoints> is given, changepoints for different variables may differ
# <v_cp_param>: information about periodic observational variance V (may decrease or increase over time, but fixed within periods)
# [structure] only one list containing <segments>, <changepoints>, and <fixed_cpts>
# - <segments>: how many segments of constant coefficient [must exist]
# - <changepoints>: the corresponding changepoints for the separated segments
# [note]: <changepoints> can be learnt from <segments>, or directly given
# - <fixed_cpts>=T: only exist when <changepoints> exists
# if none, then is NULL
# <max_iteration>: control for the convergence of changepoints, a positive integer
# <cpt_learning_param>: <cpt_method> either "mean" or "meanvar"
# <burnin> a positive number in (0,1)
# <mergeband> a positive integer
# <convergence_cri> a positive integer
# Caution: all used for learning changepoints in coefficients, not for changepoints in observational variance
# Example 1: (fixed effects for all coefficients)
# ss_param=list(inits=c(log(1)),m0=c(40,0.5,-1.5,-0.5,1),C0=diag(rep(10^3),5),
# AR1_coeffi=NULL,rw_coeffi=NULL,v_cp_param=NULL,w_cp_param=NULL,max_iteration=50)
# Example 2: (AR(1) for C's coefficient and fixed for all other coefficients)
# ss_param=list(inits=c(0,-4.60517,0), # 0 for autocorrelation, log(0.01) for W_i, last 0 for V
# m0=c(40,0.5,-1.5,-0.5,1,0.5),C0=diag(rep(10^3),6),
# AR1_coeffi="c",rw_coeffi=NULL,v_cp_param=NULL,w_cp_param=NULL,max_iteration=50)
# Example 3: (random walk for C's coefficient and fixed for all other coefficients)
# ss_param=list(inits=c(0,-4.60517),m0=c(40,0.5,-0.5,-1.5,1.5),C0=diag(rep(10^3),5),
# AR1_coeffi=NULL,rw_coeffi="c",v_cp_param=NULL,w_cp_param=NULL,max_iteration=50)
# Example 4: (AR(1) for both X and X_1, and fixed for all other coefficients)
# ss_param=list(inits=c(0,0,-4.60517,-4.60517,0), # first and second 0 for 2 autocorrelations, 3rd and 4th for W_i of X and X_1, and last 0 for V
# m0=c(40,0.5,-1.5,-0.5,1,-0.75,-0.25),C0=diag(rep(10^3),7),
# AR1_coeffi=c("x","x_1"),rw_coeffi=NULL,v_cp_param=NULL,w_cp_param=NULL,max_iteration=50)
# Example 5: (periodic V of 3 periods)
# ss_param=list(inits=c(log(1),log(20),log(1)),m0=c(40,0.5,-1.5,-0.5,1),C0=diag(rep(10^3),5),
# AR1_coeffi=NULL,rw_coeffi=NULL,
# v_cp_param=list(segments=3),w_cp_param=NULL,max_iteration=50)
# Example 5.1: (periodic V of 3 known periods)
# ss_param=list(inits=log(c(1,20,1)),m0=c(40,0.5,-1.5,-0.5,1),C0=diag(rep(10^3),5),
# AR1_coeffi=NULL,rw_coeffi=NULL,
# v_cp_param=list(segments=3,changepoints=c(400,700),fixed_cpts=T),w_cp_param=NULL,max_iteration=max_iteration)
# Example 6: (periodic coefficient for X of 3 periods)
# ss_param=list(inits=c(log(1)),m0=c(40,0.5,-1.5,-0.5,1),C0=diag(rep(10^3),5),
# AR1_coeffi=NULL,rw_coeffi=NULL,v_cp_param=NULL,
# w_cp_param=list(list(variable="x",segments=3)),max_iteration=50)
# Example 6.1: (periodic coefficient for X of 3 known periods)
# ss_param=list(inits=c(log(1)),m0=c(40,0.5,-1.5,-0.5,1),C0=diag(rep(10^3),5),
# AR1_coeffi=NULL,rw_coeffi=NULL,
# v_cp_param=NULL,
# w_cp_param=list(list(variable="x","segments"=3,changepoints=c(400,700),fixed_cpts=T)),
# max_iteration=max_iteration)
# Example 6.2: (periodic coefficient for X and X_1 of 3 uncertain known perios)
# ss_param=list(inits=c(log(1)),m0=c(40,0.5,-1.5,-0.5,1),C0=diag(rep(10^3),5),
# AR1_coeffi=NULL,rw_coeffi=NULL,
# v_cp_param=NULL,
# w_cp_param=list(list(variable="x","segments"=3,changepoints=c(400,700),fixed_cpts=F),
# list(variable="x_1","segments"=3,changepoints=c(400,700),fixed_cpts=F)),
# max_iteration=max_iteration)
# Example 7: (period v of 3 periods and periodic coefficient for X of 3 periods)
# ss_param=list(inits=c(log(1),log(20),log(1)),m0=c(40,0.5,-1.5,-0.5,1),C0=diag(rep(10^3),5),
# AR1_coeffi=NULL,rw_coeffi=NULL,
# v_cp_param=list(segments=3),
# w_cp_param=list(list(variable="x",segments=3)),max_iteration=50)
# ------------------- Preparation ---------------------- #
# check: 1. all variables specified in the model exist the data_ss set
# 2. ss_param is a list
# 1) m0 is either NULL(use the default in dlm), or given (contains no NA, all numeric, same length as states)
# 2) C0 is either NULL(use the default in dlm), or given (contains no NA, all numeric, same dim as n_states*n_states)
# 3) the variables, whose coefficiens is AR(1) process, are char string, no NA, and in both model and data_ss set
# 4) the variables, whose coefficiens is randeom walk process, is char string and in both model and data_ss set
# 5) check if changepoints of V need to be estimated
# -> as no imputation is applied for y -> no change on data_ss -> no iteration needed
# In short, changepoints of V is only estimated once, in the initialization part
# Otherwise, if changepoints are given, no action is needed.
# 6) check if changepoints of W for certain variables need to be estimated
# 7) check cpt_learning_param under w_param
# 8) check if the length of inits is correct
# 3. <max_iteration>: a positive integer
if(!all(formula_var %in% colnames(data_ss))){
stop("The variables specified in the formula are not contained in the data_ss set.")
}
if(!is.list(ss_param_temp)){
stop("ss_param must be a list.")
}else{
m0=ss_param_temp$m0 # initial for states -> checked
C0=ss_param_temp$C0 # initial for variance of states -> checked
inits=ss_param_temp$inits # initial for NA terms in mle
AR1_coeffi=ss_param_temp$AR1_coeffi # add 1 additional state and 2 additional NA (autocorrelation and W) -> checked
rw_coeffi=ss_param_temp$rw_coeffi # add 1 additional NA (W) -> checked
w_cp_param=ss_param_temp$w_cp_param # change points to be estimated -> change W_t to be 10
v_cp_param=ss_param_temp$v_cp_param # change points estimated once in the begining, and add #(segments-1) NA for V -> checked
# n_states includes: 1 for intercept,
# n_variable for all selected variable,
# length(AR1_coeffi) for additional baselines for the AR(1) process
# Note: random walk only change W, and do not add states
# shift in beta only change W to 10, do not add anything
# varying V only change segments of V_t, do not add states
n_states=1+length(formula_var)+length(AR1_coeffi)
if(!is.null(m0)){ # m0 is either NULL(use the default in dlm), or given (contains no NA, all numeric, same length as states)
if(!is.numeric(m0)){stop("Error in m0: m0 is not all numeric.")}
if(!all(!is.na(m0))){stop("Error in m0: m0 contains NA.")}
if(length(m0)!=n_states){stop("Error in m0: m0 has different length as that of the states.")}
}
if(!is.null(C0)){ # C0 is either NULL(use the default in dlm), or given (contains no NA, all numeric, same dim as n_states*n_states)
if(!is.numeric(C0)){stop("Error in C0: C0 is not numeric.")}
if(!all(!is.na(C0))){stop("Error in C0: C0 contains NA.")}
if(!all(dim(C0)==n_states)){stop("Error in C0: C0 has different dimention than n_states*n_states.")}
}
if(!is.null(AR1_coeffi)){ # variables are of type 'char' string, no NA, and in both model and data_ss set
if(!is.character(AR1_coeffi)){stop("Error in AR1_coeffi: the specified variables in AR1_coeffi are not all char string.")}
if(!all(!is.na(AR1_coeffi))){stop("Error in AR1_coeffi: AR1_coeffi contains NA.")}
if(!all(AR1_coeffi %in% formula_var)){stop("Error in AR1_coeffi: the selected time-varying coefficient has no corresponding variable in the regression formula.")}
if(!all(AR1_coeffi %in% colnames(data_ss))){stop("Error in AR1_coeffi: the selected time-varying coefficient has no corresponding variable in the data_ss set.")}
}
if(!is.null(rw_coeffi)){ # variables are char string, no NA, and in both model and data_ss set
if(!is.character(rw_coeffi)){stop("Error in rw_coeffi: the specified variables in rw_coeffi are not all char string")}
if(!all(!is.na(rw_coeffi))){stop("Error in rw_coeffi: rw_coeffi contains NA.")}
if(!all(rw_coeffi %in% c("intercept",formula_var))){stop("Error in rw_coeffi: the selected time-varying coefficient has no corresponding variable in the regression formula.")}
if(!all(rw_coeffi %in% c("intercept",colnames(data_ss)))){stop("Error in rw_coeffi: the selected time-varying coefficient has no corresponding variable in the data_ss set.")}
}
if(!is.null(v_cp_param)){
v_cp_param_learncps=v_cp_param
# check 1) only allow [segments], [changepoints], or [fixed_cpts]
# 2) #elements is 1-3.
# 3-4) [segments] must exist, and must be a positive number
# 5) when either [changepoints] or [fixed_cpts] are detected: (for the case when cpts are given)
# 5.1) both [changepoints] and [fixed_cpts] exist.
# 5.2) changepoints are all positive numbers, and within the range of timeline
# when [changepoints] doesn't exist, (for the case when cpts aren't given)
# as NA is not allowed in the program of finding change points,
# if there are NA in the original outcome -> do one time imputation "StructTS"
# -> no iteration is needed, without iterative imputation
# 6) [segments]-1 must equals to #changepoints
if(!all(names(v_cp_param_learncps) %in% c("segments","changepoints","fixed_cpts"))){stop("Error in v_cp_param: only [segments], [changepoints], [fixed_cpts] are allowed.")}
if(!(length(v_cp_param_learncps)>0 & length(v_cp_param_learncps)<4)){stop("Error in v_cp_param: length is [1,3].")}
if(!("segments" %in% names(v_cp_param_learncps))){stop("Error in v_cp_param: <segments> must exist for v_cp_param.")}
if(!(is.numeric(v_cp_param_learncps$segments) &
length(v_cp_param_learncps$segments)==1 &
v_cp_param_learncps$segments>0)){stop("Error in v_cp_param: <segments> is not a positive number.")}
if(!all(!names(v_cp_param_learncps) %in% c("changepoints","fixed_cpts"))){ # if detected one of changepoints or fixed_cpts
if(!all(c("changepoints","fixed_cpts") %in% names(v_cp_param_learncps))){stop("Error in v_cp_param: [changepoints] and [fixed_cpts] must exist at the same time.")}
if(!is.numeric(v_cp_param_learncps$changepoints)){stop("Error in v_cp_param: [changepoints] is not all numeric")}
if(!all(v_cp_param_learncps$changepoints > 1 & v_cp_param_learncps$changepoints < nrow(data_ss))){stop("Error in v_cp_param: [changepoints] is not range of [1,nrow(data_ss)-1].")}
if(printFlag){
if(v_cp_param_learncps$fixed_cpts){
cat(red("Note: ''changepoints'' of V is given and fixed.\n"))
}else{
cat(red("Note: ''changepoints'' of V is given a initial guess, but need to be updated according to the fitting result.\n"))
}
}
}else{
# when initial guess of changepoints of V is unkwown
######### initialize changepoints for V (part I of initialization) #########
if(sum(is.na(data_ss$y))!=0){
# for the ignore case ?
cpt_v_temp=cpt.var(na_kalman(data_ss$y,model="StructTS"),penalty="Manual",Q=v_cp_param_learncps$segments-1, method="BinSeg")
}else{
# for the full and cc case
cpt_v_temp=cpt.var(data_ss$y,penalty="Manual",Q=v_cp_param_learncps$segments-1, method="BinSeg")
}
v_cp_param[["changepoints"]]=cpts(cpt_v_temp)
v_cp_param[["fixed_cpts"]]=T
if(printFlag){
invisible(plot(cpt_v_temp))
cat("Estimated ''changepoints'' for V are",cpts(cpt_v_temp),".\n")
}
}
if(length(v_cp_param[["changepoints"]]) != (v_cp_param[["segments"]]-1)){
stop("For varying V, the number <changepoints> doesn't agree with those indicated by <segments>")
}
}
if(!is.null(w_cp_param)){
# only when w_cp_param (shifted levels of coefficients) exists and cpts for W need to be estimated (either not given, or given but not fixed)
# -> we need [cpt_learning_param]
if(!is.null(cpt_learning_param) & is.list(cpt_learning_param)){
if(all(c("cpt_method","burnin","mergeband","convergence_cri") %in% names(cpt_learning_param)) & length(unique(names(cpt_learning_param)))==4){
if(!all(cpt_learning_param$cpt_method %in% c("mean","meanvar"))){stop("Error in cpt_learning_param$cpt_method: should be mean or meanvar.")}
if(!(length(cpt_learning_param$burnin)==1 & is.numeric(cpt_learning_param$burnin) & cpt_learning_param$burnin>0 & cpt_learning_param$burnin<1)){stop("Error in cpt_learning_param$burnin: should be number in (0,1).")}
if(!(length(cpt_learning_param$mergeband)==1 & is.numeric(cpt_learning_param$mergeband) & cpt_learning_param$mergeband>0)){stop("Error in cpt_learning_param$mergeband: should be a positive number.")}
if(!(length(cpt_learning_param$convergence_cri)==1 & is.numeric(cpt_learning_param$convergence_cri) & cpt_learning_param$convergence_cri>0)){stop("Error in cpt_learning_param$mergeband: should be a positive number.")}
cpt_method=cpt_learning_param$cpt_method
burnin=cpt_learning_param$burnin
mergeband=cpt_learning_param$mergeband
cpts_convergence_cri=cpt_learning_param$convergence_cri
}else{
stop("Error in cpt_learning_param: this is a list of 5 elements of [cpt_method],[burnin],[mergeband],[convergence_cri], and [max_iteration].")
}
}else{
stop("Error in cpt_learning_param: cpt_learning_param (list) is needed when some variable has shifted levles of coefficient.")
}
}
if(!is.null(w_cp_param)){
w_cp_param_learncps=w_cp_param
# Check: 1) The variables with changepoints must be in data_ss and in formula
# the order of variables aligns with the order in the formula
# 2) for each variable with changepoints,
# check 2.1) [variable], [segments], [changepoints], [fixed_cpts] may exist
# 2.2) #elements for each sublist is 1-4.
# 2.3) [variable] and [segments] must exist
# [variable] must be a char
# [segments] must be a positive number
# [segments] for all variables must be the same
# 2.4) when [changepoints] exists -> [fixed_cpts] must exist and be logical
# 2.5) when [changepoints] doesn't exist, it need to be estimated only once from the data_ss
# -> no iteration is needed, without iterative imputation
# 2.6) [variable], [segments], [changepoints] must exist for all variables after possibly estimation
# Note: # changepoints need to strictly equal to [segments]-1 eventually, but not necessary in the intialization
# check 2.1)-2.3)
for(aa in 1:length(w_cp_param_learncps)){
if(!all(names(w_cp_param_learncps[[aa]]) %in% c("variable","segments","changepoints","fixed_cpts"))){stop("Error in w_cp_param: only [variable], [segments], [changepoints], [fixed_cpts] for each variable are allowed.")}
if(!(length(w_cp_param_learncps[[aa]])>0 & length(w_cp_param_learncps[[aa]])<5)){stop("Error in w_cp_param: the length of each list is wrong.")}
if(!all(c("variable","segments") %in% names(w_cp_param_learncps[[aa]]))){stop("Error in w_cp_param: <variable> and <segments> must exist for each level")}
if(!(is.character(w_cp_param_learncps[[aa]]$variable) &
length(w_cp_param_learncps[[aa]]$variable)==1)){stop("Error in w_cp_param: <variable> is not a char.")}
if(!(is.numeric(w_cp_param_learncps[[aa]]$segments) &
length(w_cp_param_learncps[[aa]]$segments)==1 &
w_cp_param_learncps[[aa]]$segments>0)){stop("Error in w_cp_param: <segments> is not a positive integer.")}
}
if(length(unique(unlist(lapply(1:length(w_cp_param_learncps),function(aa1){w_cp_param_learncps[[aa1]][["segments"]]}))))>1){
stop("Error in w_cp_param: the segments value for different variables need to be the same")
}
# check 1)
w_cp_param_variables=unlist(lapply(1:length(w_cp_param_learncps),function(bb){w_cp_param_learncps[[bb]][["variable"]]}))
if(!all(w_cp_param_variables %in% formula_var)){stop("The selected level-shifted coefficient has no corresponding variable in the regression formula.")}
if(!all(w_cp_param_variables %in% colnames(data_ss))){stop("The selected level-shifted coefficient has no corresponding variable in the data_ss set.")}
if(length(w_cp_param_variables)>1){
w_cp_param_variables_order=c()
for(bb in w_cp_param_variables){
w_cp_param_variables_order=c(w_cp_param_variables_order,which(bb == c("intercept",formula_var)))
}
if(!all(diff(w_cp_param_variables_order)>0)){stop("Error in w_cp_param: the variable order should be the same as in formula.")}
}
# check 2.4)-2.5): if <changepoints> exist and "fixed_cpts"=T for all variables -> no need to learn changespoints and no need for iteration
# if <changepoints> exist and "fixed_cpts"=F for all variables -> no need to learn changespoints and need further iteration
# if <changepoints> do not exist -> "fixed_cpts" shouldn't exist either
# -> generate changepoints and iterate
# Caution: For now, we require changepoints+fixed_cpts to exist for all variables or for none
# when changepoints+fixed_cpts exist, fixed_cpts =T for all or =F for all
# Caution 2: For initialization, we do not require # segments=1 to be equal to the length of the changepoints
if(all(unlist(lapply(1:length(w_cp_param_learncps), function(cc1){ "changepoints" %in% names(w_cp_param_learncps[[cc1]]) & "fixed_cpts" %in% names(w_cp_param_learncps[[cc1]])})))){
# if [changepoints] and [fixed_cpts] both exist for each variable
if(!all(unlist(lapply(1:length(w_cp_param_learncps),function(cc3){is.numeric(w_cp_param_learncps[[cc3]]$changepoints) &
all(w_cp_param_learncps[[cc3]]$changepoints>1 & w_cp_param_learncps[[cc3]]$changepoints < nrow(data_ss))})))){
stop("Error in w_cp_param: [changepoints] for all variables are not all numeric and within the right range.")
}
if(length(unique(unlist(lapply(1:length(w_cp_param_learncps),function(aa2){w_cp_param_learncps[[aa2]][["changepoints"]]}))))!=w_cp_param_learncps[[1]]$segments-1){
stop("Error in w_cp_param: given [changepoints] may not be same for all variables")
}
if(all(unlist(lapply(1:length(w_cp_param_learncps), function(cc4){w_cp_param_learncps[[cc4]]$fixed_cpts})))){
cat(red("Note: ''changepoints'' of W is given and fixed.\n"))
}else if(all(unlist(lapply(1:length(w_cp_param_learncps), function(cc4){!w_cp_param_learncps[[cc4]]$fixed_cpts})))){
cat(red("Note: ''changepoints'' of W is given a initial guess, but need to be updated according to the fitting result.\n"))
}else{
stop("Error in w_cp_param: [fixed_cpts] need to be all T or all F when changepoints exist.")
}
}else if(all(unlist(lapply(1:length(w_cp_param_learncps), function(cc2){ (!"changepoints" %in% names(w_cp_param_learncps[[cc2]])) & (!"fixed_cpts" %in% names(w_cp_param_learncps[[cc2]]))})))){
# if [changepoints] and [fixed_cpts] both do not exist for each varible
if(printFlag){cat("The changepoints of variables, whose coefficients shift over time, are not given.\n")}
######### initialization of changepoints of W (part II of initialization) ##########
# if the changepoints don't exist, we ask the corresponding coeffi be randome walk, and then learn changepoints from the results
ss_param_learncps=ss_param_temp
# 1) remove the w_cp_param part
ss_param_learncps$w_cp_param=NULL
# 2) adjust the random walk part
# w_cp_param_variables=unlist(lapply(1:length(w_cp_param_learncps),function(ee){w_cp_param_learncps[[ee]][["variable"]]}))
ss_param_learncps$rw_coeffi=unique(c(ss_param_learncps$rw_coeffi,w_cp_param_variables))
# 3) generate new inits
v_part_length=ifelse(is.null(ss_param_learncps$v_cp_param),1,ss_param_learncps$v_cp_param$segments)
v_part=ss_param_learncps$inits[(length(ss_param_learncps$inits)-v_part_length+1):length(ss_param_learncps$inits)]
ar1_part_length=length(ss_param_learncps$AR1_coeffi)
if(ar1_part_length>0){
ar1_part=ss_param_learncps$inits[1:(2*ar1_part_length)]
}else{
ar1_part=NULL
}
ss_param_learncps$inits=c(ar1_part, rep(0,length(ss_param_learncps$rw_coeffi)),v_part)
# fit dlm model
# [Unique point] (begin)
data_ss_temp_init_cpts_w=data_ss
out_filter_init_cpts_w=run.SSM_unanimous_cpts(data_ss=data_ss_temp_init_cpts_w,formula_var=formula_var,ss_param_temp=ss_param_learncps,
max_iteration=max_iteration,cpt_learning_param=cpt_learning_param,
dlm_option=dlm_option,printFlag=printFlag)$out_filter
out_smooth_init_cpts_w=dlmSmooth(out_filter_init_cpts_w)
# [Unique point] (end)
# get the changepoints
w_cps_new=c()
start_pt=floor(nrow(out_filter_init_cpts_w$m)*burnin)
for(ee in 1:length(w_cp_param)){
if(dlm_option=="filter"){
temp=out_filter_init_cpts_w$m[start_pt:nrow(out_filter_init_cpts_w$m),which(colnames(out_filter_init_cpts_w$mod$GG)==w_cp_param[[ee]]$variable)]
}else if(dlm_option=="smooth"){
temp=out_smooth_init_cpts_w$s[start_pt:nrow(out_smooth_init_cpts_w$s),which(colnames(out_filter_init_cpts_w$mod$GG)==w_cp_param[[ee]]$variable)]
}
cpt_temp=cpt.meanvar(temp,penalty="Manual",Q=ss_param_temp$w_cp_param[[ee]]$segments-1,method="BinSeg") # changepoints learning method cannot be chosen
if(printFlag){plot(cpt_temp)}
w_cps_new=c(w_cps_new,cpts(cpt_temp)+start_pt-1)
}
w_cps_new=merge_closepoints(points=w_cps_new,band=mergeband)
if(printFlag){cat(red("The inital guess of changepoints are:"),w_cps_new,"\n")}
for(ff in 1:length(w_cp_param)){
w_cp_param[[ff]][["changepoints"]]=w_cps_new
w_cp_param[[ff]][["fixed_cpts"]]=F
}
}else{
stop("Error in w_cp_param: [changepoints] and [fixed_cpts] must both exist or not exist for all variables")
}
# final check 2.6): <variable>, <segments> and <changpoints> all exist for each variable
for(gg in 1:length(w_cp_param)){
if(!all(c("variable","segments","changepoints","fixed_cpts") %in% names(w_cp_param[[gg]]))){
stop("<variable>, <segments>, <changepoints>, and <fixed_cpts> must exist for each level shift coefficient.")
}
}
}
if(is.null(inits)){
stop("inits is required before estimating NA in dlm model.")
}else{
if(!all(!is.na(inits))){stop("Error in inits: NA in inits.")}
if(!is.numeric(inits)){stop("Error in inits: inits is not numeric")}
if(length(inits)!=(length(AR1_coeffi)*2+length(rw_coeffi)+ifelse(is.null(v_cp_param),1,v_cp_param$segments))){stop("Error in inits: Error in the length of inits.")}
}
}
if(!(length(max_iteration)==1 & is.numeric(max_iteration) & max_iteration>0 & max_iteration%%1==0)){stop("max_iteration is not a positive integer!")}
if(!(dlm_option %in% c("smooth","filter") & length(dlm_option)==1)){stop("dlm_option can either be filter or smooth")}
# functions to build up statespace model, which certains terms to be estimates as NA
# the items in u are: [1:length(AR1_coeffi)] terms -> logit(autocorrelation) for all AR(1) coefficients
# default value is 0=exp(0.5)/(1+exp(0.5))
# [length(AR1_coeffi)+1: 2*length(AR1_coeffi)] -> log(variance for Wj) for all AR(1) coefficients
# [2*length(AR1_coeffi)+1:2*length(AR1_coeffi)+length(rw_coeffi)] -> log(variance for Wj) for random walk coefficients
# [2*length(AR1_coeffi)+length(rw_coeffi)+1:2*length(AR1_coeffi)+length(rw_coeffi)+length(v_changepoint)] -> (possibly sereis of) log(variance for V)
# [2*length(AR1_coeffi)+1:3*length(AR1_coeffi)] -> log(variance for Wj) for random walk coefficients
# page 124
buildCamp=function(u){
model=dlmModReg(data_ss[,formula_var],dV=exp(u[2*length(AR1_coeffi)+length(rw_coeffi)+1]))
FF=matrix(c(model$FF,rep(0,length(AR1_coeffi))),nrow=1)
if(length(AR1_coeffi)>0){
states_name=c("intercept",formula_var,paste("rho.",AR1_coeffi,sep=""))
}else{
states_name=c("intercept",formula_var)
}
colnames(FF)=states_name
GG=diag(rep(1,length(states_name)))
colnames(GG)=rownames(GG)=states_name
W=diag(rep(0,length(states_name)))
colnames(W)=rownames(W)=states_name
if(length(AR1_coeffi)>0){
for(l in 1:length(AR1_coeffi)){
variable=AR1_coeffi[l]
GG[variable,grep(paste("^rho.",variable,"$",sep=""),colnames(GG))]=1
GG[variable,variable]=exp(u[l])/(1+exp(u[l]))
W[variable,variable]=exp(u[l+length(AR1_coeffi)])
}
}
if(length(rw_coeffi)>0){
for(k in 1:length(rw_coeffi)){
variable=rw_coeffi[k]
W[variable,variable]=exp(u[2*length(AR1_coeffi)+k])
}
}
JFF=matrix(c(model$JFF,rep(0,ncol(FF)-ncol(model$JFF))),nrow=1)
model$FF=FF
model$JFF=JFF
model$GG=GG
model$W=W
model$C0=diag(rep(1e07,ncol(model$FF)))
model$m0=rep(0,ncol(model$FF))
if(!is.null(m0) & all(!is.na(m0))){
model$m0=m0
}
if(!is.null(C0) & all(!is.na(C0))){
model$C0=C0
}
# adjust model for time-varying V
if(!is.null(v_cp_param)){
JV(model)=ncol(model$X)+1
v_changepoint=c(1,v_cp_param$changepoints,nrow(model$X))
X_JV=rep(NA,nrow(model$X))
for(jv in 1:(length(sort(v_changepoint))-1)){
X_JV[v_changepoint[jv]:v_changepoint[jv+1]]=exp(u[2*length(AR1_coeffi)+length(rw_coeffi)+jv])
}
model$X=cbind(model$X,X_JV)
}
# adjust model for sudden shift in coefficients
if(!is.null(w_cp_param)){
JW=model$W
JW[JW!=0]=0
for(jw in 1:length(w_cp_param_variables)){
JW[w_cp_param_variables[jw],w_cp_param_variables[jw]]=ncol(model$X)+jw
}
w_changepoint=matrix(0,nrow=nrow(model$X),ncol=length(w_cp_param_variables))
colnames(w_changepoint)=w_cp_param_variables
for(jw in 1:length(w_cp_param_variables)){
jw_cps_temp=unique(unlist(lapply(w_cp_param[[jw]]$changepoints,function(jwpoint){(jwpoint-10):(jwpoint+10)})))
jw_cps_temp=jw_cps_temp[jw_cps_temp>0 & jw_cps_temp<=nrow(model$X)]
w_changepoint[jw_cps_temp,w_cp_param_variables[jw]]=10
}
X(model)=cbind(model$X,w_changepoint)
JW(model)=JW
}
dlm(model)
return(model)
}
get.filter_result=function(out_filter,formula_var,AR1_coeffi,rw_coeffi,w_cp_param){
# organize result
result_var_names=c("(Intercept)",formula_var)
if(is.null(w_cp_param)){
Estimate=Std.Error=rep(NA,length(formula_var)+1)
w_cp_param_variables=NULL
result_var_names_new=result_var_names
}else{
Estimate=Std.Error=rep(NA,length(formula_var)+1+length(unlist(lapply(1:length(w_cp_param),function(i){w_cp_param[[i]][["changepoints"]]}))))
result_var_names_new=result_var_names
for(rr in 1:length(w_cp_param)){
pointer_id=grep(paste("^",w_cp_param[[rr]]$variable,"$",sep=""),result_var_names)
temp_id=grep(paste("^",w_cp_param[[rr]]$variable,"$",sep=""),result_var_names_new)
result_var_names_new=c(result_var_names_new[1:(temp_id-1)],
paste(w_cp_param[[rr]]$variable,"(period",1:(length(w_cp_param[[rr]]$changepoints)+1),")",sep=""))
if((pointer_id+1)<=length(result_var_names)){
result_var_names_new=c(result_var_names_new,result_var_names[(pointer_id+1):length(result_var_names)])
}
}
}
last_id=nrow(out_filter$m) # one more than nrow(data_ss_new)
# for fixed coefficient -> pick the last timepoint estimation?
# for AR(1) coefficient -> pick the average of 1/6 to 6/6 timepoints
# for random walk -> pick the last timepoint estimation, since any point doesn't make sense
pos=1
for(a in 1:(length(formula_var)+1)){
if(result_var_names[a] %in% AR1_coeffi){
Estimate[pos]=mean(out_filter$m[round(last_id/6,0):last_id,a]) # if it's an AR(1) process, return the mean of 1/6 to 6/6
pos=pos+1
}else if(result_var_names[a] %in% rw_coeffi){
Estimate[pos]=NA
pos=pos+1
}else if(result_var_names[a] %in% w_cp_param_variables){
changes_id=c(w_cp_param[[which(result_var_names[a]==w_cp_param_variables)]][["changepoints"]]-10,last_id)
Estimate[pos:(pos+length(changes_id)-1)]=out_filter$m[changes_id,a]
Std.Error[pos:(pos+length(changes_id)-1)]=unlist(lapply(changes_id,function(temp_id){sqrt(diag(dlmSvd2var(out_filter$U.C[[temp_id]],out_filter$D.C[temp_id,])))[a]}))
pos=pos+length(changes_id)
}else{
Estimate[pos]=out_filter$m[last_id,a]
Std.Error[pos]=sqrt(diag(dlmSvd2var(out_filter$U.C[[last_id]],out_filter$D.C[last_id,])))[a]
pos=pos+1
}
}
result=as.data.frame(cbind(Estimate=Estimate,Std.Error=Std.Error))
rownames(result)=result_var_names_new
return(result)
}
get.smooth_result=function(out_filter,formula_var,AR1_coeffi,rw_coeffi,w_cp_param){
out_smooth=dlmSmooth(out_filter)
# organize result
result_var_names=c("(Intercept)",formula_var)
if(is.null(w_cp_param)){
Estimate=Std.Error=rep(NA,length(formula_var)+1)
w_cp_param_variables=NULL
result_var_names_new=result_var_names
}else{
Estimate=Std.Error=rep(NA,length(formula_var)+1+length(unlist(lapply(1:length(w_cp_param),function(i){w_cp_param[[i]][["changepoints"]]}))))
result_var_names_new=result_var_names
for(rr in 1:length(w_cp_param)){
pointer_id=grep(paste("^",w_cp_param[[rr]]$variable,"$",sep=""),result_var_names)
temp_id=grep(paste("^",w_cp_param[[rr]]$variable,"$",sep=""),result_var_names_new)
result_var_names_new=c(result_var_names_new[1:(temp_id-1)],
paste(w_cp_param[[rr]]$variable,"(period",1:(length(w_cp_param[[rr]]$changepoints)+1),")",sep=""))
if((pointer_id+1)<=length(result_var_names)){
result_var_names_new=c(result_var_names_new,result_var_names[(pointer_id+1):length(result_var_names)])
}
}
}
last_id=nrow(out_smooth$s) # one more than nrow(data_ss)
# for fixed coefficient -> pick the last timepoint estimation
# for AR(1) coefficient -> pick the average of 1/6 to 6/6 timepoints
# for random walk -> pick the last timepoint estimation, since any point doesn't make sense
pos=1
for(a in 1:(length(formula_var)+1)){
if(result_var_names[a] %in% AR1_coeffi){
Estimate[pos]=mean(out_smooth$s[round(last_id/6,0):last_id,a]) # if it's an AR(1) process, return the mean of 1/6 to 6/6
pos=pos+1
}else if(result_var_names[a] %in% rw_coeffi){
Estimate[pos]=NA
pos=pos+1
}else if(result_var_names[a] %in% w_cp_param_variables){
changes_id=c(w_cp_param[[which(result_var_names[a]==w_cp_param_variables)]][["changepoints"]]-10,last_id)
Estimate[pos:(pos+length(changes_id)-1)]=out_smooth$s[changes_id,a]
Std.Error[pos:(pos+length(changes_id)-1)]=unlist(lapply(changes_id,function(temp_id){sqrt(diag(dlmSvd2var(out_filter$U.C[[temp_id]],out_filter$D.C[temp_id,])))[a]}))
pos=pos+length(changes_id)
}else{
Estimate[pos]=out_smooth$s[last_id,a]
Std.Error[pos]=sqrt(diag(dlmSvd2var(out_filter$U.C[[last_id]],out_filter$D.C[last_id,])))[a]
pos=pos+1
}
}
result=as.data.frame(cbind(Estimate=Estimate,Std.Error=Std.Error))
rownames(result)=result_var_names_new
return(result)
}
# ------------------- main part (begin)---------------------- #
# 1. given data without missing values in the covariates -> fit state space model
# 2. if cpts is not given, alternate between i) learn cpts
# ii) refit state sapce model
# until convergence
# 1. find MLE for init, build up spacestate model, and get filter states
if(printFlag){
cat("==============================================================================\n")
cat("Build statespace model with unknown variance and auto-correlation terms estimated.\n")
cat("\n The initial m0, C0, and inits given by the users is:\n m0: ",m0,
"\n C0: ",C0,
"\n inits: ",inits,
", or after transformation is: ")
if(length(AR1_coeffi)>0){
cat(c(exp(inits[1:length(AR1_coeffi)])/(1+exp(inits[1:length(AR1_coeffi)])),exp(inits[(length(AR1_coeffi)+1):length(inits)])))
}else{
cat(exp(inits[(length(AR1_coeffi)+1):length(inits)]))
}
cat("\n")
}
#if(printFlag){print(buildCamp(inits))}
outMLE=dlmMLE(data_ss$y,parm=inits,buildCamp)
out_filter=dlmFilter(data_ss$y,buildCamp(outMLE$par))
out_smooth=dlmSmooth(out_filter)
iter=1
# added for changepoints(begin)
if(!is.null(w_cp_param)){
# if fixed_cpts = False for all variables
if(all(unlist(lapply(1:length(w_cp_param), function(cc5){w_cp_param[[cc5]]$fixed_cpts==F})))){
cat(red("The changepoints are all not fixed, and need to be updated."))
out_filter_temp=out_filter
out_smooth_temp=out_smooth
start_pt=floor(nrow(out_filter_temp$m)*burnin)
convergence=F
while(convergence==F){
w_cps_new=c()
for(hh in 1:length(w_cp_param)){
if(dlm_option=="filter"){
temp=out_filter_temp$m[start_pt:nrow(out_filter_temp$m),which(colnames(out_filter_temp$mod$GG)==w_cp_param[[hh]]$variable)]
}else if(dlm_option=="smooth"){
temp=out_smooth_temp$s[start_pt:nrow(out_smooth_temp$s),which(colnames(out_filter_temp$mod$GG)==w_cp_param[[hh]]$variable)]
}
if("meanvar" %in% cpt_method){
cpt_temp=cpt.meanvar(temp,penalty="Manual",Q=ss_param_temp$w_cp_param[[hh]]$segments-1,method="BinSeg")
w_cps_new=c(w_cps_new,cpts(cpt_temp)+start_pt-1)
if(printFlag){
cat(red("The changepoints are:",w_cps_new,"\n"))
plot(cpt_temp)
}
}
if("mean" %in% cpt_method){
cpt_temp=cpt.mean(temp,penalty="Manual",Q=ss_param_temp$w_cp_param[[hh]]$segments-1,method="BinSeg")
w_cps_new=c(w_cps_new,cpts(cpt_temp)+start_pt-1)
if(printFlag){
cat(red("The changepoints are:"),w_cps_new,"\n")
plot(cpt_temp)
}
}
}
w_cps_new=merge_closepoints(w_cps_new,band=mergeband)
if(length(w_cps_new)==w_cp_param[[1]]$segments-1){
if(all(abs(w_cp_param[[1]][["changepoints"]]-w_cps_new)<cpts_convergence_cri)){
convergence=T
}
}
for(ii in 1:length(w_cp_param)){
w_cp_param[[ii]][["changepoints"]]=w_cps_new
}
outMLE_temp=dlmMLE(data_ss$y,parm=inits,buildCamp)
out_filter_temp=dlmFilter(data_ss$y,buildCamp(outMLE_temp$par))#Build
iter=iter+1
# check if it never converges
if(convergence==F & iter > max_iteration){
cat(red("Error in convergence."))
return(NULL)
}
}
out_filter=out_filter_temp
}else if(all(unlist(lapply(1:length(w_cp_param), function(cc5){w_cp_param[[cc5]]$fixed_cpts==T})))){
if(printFlag){cat("All changepoints are fixed, and no need to update after fitting.\n")}
}else{stop("Error in w_cp_param after fitting: fixed_cpts are not all T or all F.")}
}
# added for changepoints (end)
if(dlm_option=="smooth"){
result=get.smooth_result(out_filter=out_filter,formula_var=formula_var,AR1_coeffi=AR1_coeffi,rw_coeffi=rw_coeffi,w_cp_param=w_cp_param)
}else if(dlm_option=="filter"){
result=get.filter_result(out_filter=out_filter,formula_var=formula_var,AR1_coeffi=AR1_coeffi,rw_coeffi=rw_coeffi,w_cp_param=w_cp_param)
}
if(printFlag){print(result)}
# organize return
if(!is.null(w_cp_param) & all(unlist(lapply(1:length(w_cp_param), function(cc5){w_cp_param[[cc5]]$fixed_cpts==F})))){
estimated_cpts=w_cps_new
}else{
estimated_cpts=NULL
}
# output result
return(list(result=result,estimated_cpts=estimated_cpts,out_filter=out_filter,iter=iter))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.