R/cv.R
In fect: Fixed Effects Counterfactuals

###################################################################
## Cross-validation
###################################################################
fect.cv <- function(Y, # Outcome variable, (T*N) matrix
                    X, # Explanatory variables:  (T*N*p) array
                    D, #  Indicator for treated unit (tr==1) 
                    I,
                    II, 
                    T.on, 
                    T.off = NULL, 
                    T.on.carry = NULL, 
                    T.on.balance = NULL,
                    balance.period = NULL,
                    method = "ife",
                    criterion = "mspe",  
                    k = 5, # CV time
                    cv.prop = 0.1,
                    cv.treat = TRUE, 
                    cv.nobs = 3,
                    cv.donut = 1,
                    min.T0 = 5,
                    r = 0, # initial number of factors considered if CV==1
                    r.end,
                    proportion = 0,
                    nlambda = 10, 
                    lambda = NULL,
                    force, 
                    hasRevs = 1,
                    tol, # tolerance level
                    norm.para = NULL,
                    group.level = NULL,
                    group = NULL
                    ) {  
    
    ##-------------------------------##
    ## Parsing data
    ##-------------------------------##  
    placebo.pos <- na.pos <- NULL

    ## unit id and time
    TT <- dim(Y)[1]
    N <- dim(Y)[2]
    if (is.null(X) == FALSE) {
        p <- dim(X)[3]
    } else {
        p <- 0 
        X <- array(0, dim = c(1, 1, 0))
    }

    ## replicate data
    YY <- Y
    YY[which(II == 0)] <- 0 ## reset to 0 
    t.on <- c(T.on)
    T0.min <- min(apply(II, 2, sum))

    D.c <- apply(D, 2, function(vec){cumsum(vec)})
    D.c <- ifelse(D.c > 0, 1, 0)
    D.sum <- colSums(D.c)
    tr <- which(D.sum>=1)
    Ntr <- length(tr)
    co <- which(D.sum==0)
    Nco <- length(co)

    ##  --------- initial fit using fastplm --------- ##
    data.ini <- matrix(NA, (TT*N), (2 + 1 + p))
    data.ini[, 2] <- rep(1:N, each = TT)         ## unit fe
    data.ini[, 3] <- rep(1:TT, N)                ## time fe
    data.ini[, 1] <- c(Y)                       ## outcome
    if (p > 0) {                                ## covar
        for (i in 1:p) {
            data.ini[, (3 + i)] <- c(X[, , i])
        }
    }
    ## observed Y0 indicator:
    oci <- which(c(II) == 1)
    initialOut <- initialFit(data = data.ini, 
                             force = force, 
                             oci = oci)
    Y0 <- initialOut$Y0
    beta0 <- initialOut$beta0
    if (p > 0 && sum(is.na(beta0)) > 0) {
        beta0[which(is.na(beta0))] <- 0
    }

    ## ------------- restrictions on candidate hyper parameters ---------- ##
    obs.con <- (sum(II) - r.end * (N + TT) + r.end^2 - p) <= 0
    if (obs.con) {
        while((sum(II) - r.end * (N + TT) + r.end^2 - p) <= 0) {
            r.end <- r.end - 1
        }
    }
    if (r.end >= T0.min) {
        if (method %in% c("both", "ife", "gsynth")) {
            message("Factor number should not be greater than ", T0.min-1, "\n", sep = "")
        }
        r.end <- T0.min-1
    } 
    else {
        if (obs.con) {
            if (method %in% c("both", "ife", "gsynth")) {
                message("Factor number should not be greater than ", r.end, "\n", sep = "")
            }
        }
    }

        ##-------------------------------##
    ## ----------- Main Algorithm ----------- ##
        ##-------------------------------##
    
    validX <- 1 ## no multi-colinearity 
    CV.out.ife <- CV.out.mc <- NULL
    
    ##----------------------------------------------------##
    ##         Cross-validation of r and lambda           ##
    ##----------------------------------------------------##
    
    r.max <- min(TT, r.end)
    r.cv <- 0 ## initial value
    
    if(method %in% c("ife", "both", "gsynth") && FALSE){
        r.cv <- 0
        est.best <- inter_fe_ub(YY, Y0, 
                                X, II, beta0, 
                                0, force = force, 
                                tol)
        message("Cross validation cannot be performed since available pre-treatment records of treated units are too few. So set r.cv = 0.\n ")
    }
    else {

        r.old <- r ## save the minimal number of factors 
        message("Cross-validating ...","\n") 
        if(criterion=='mspe'){
            message("Criterion: Mean Squared Prediction Error\n")
        }
        else if(criterion=='wmspe'){
            message("Criterion: Weighted Mean Squared Prediction Error\n")
        }
        else if(criterion=='gmspe'){
            message("Criterion: Geometric Mean Squared Prediction Error\n")
        }
        else if(criterion=='wgmspe'){
            message("Criterion: Weighted Geometric Mean Squared Prediction Error\n")
        }
        else if(criterion=='mad'){
            message("Criterion: Median Absolute Deviation\n")
        }
        else if(criterion=='moment'){
            message("Criterion: Moment Conditions\n")
        }
        else if(criterion=='gmoment'){
            message("Criterion: Geometric Moment Conditions\n")
        }
        else if(criterion=='pc'){
            message("Criterion: PC\n")
        }

        ## for gsynth, use the cross-validation function in fect.gsynth
        if(method == "gsynth"){
            message("Interactive fixed effects model...\n")
            out <- fect.gsynth(Y = Y, D = D, X = X, I = I, II = II,
                               T.on = T.on, T.off = T.off, r = r, r.end = r.end, CV = 1,
                               force = force, hasRevs = hasRevs, 
                               tol = tol, boot = 0,
                               norm.para = norm.para,
                               group.level = group.level, group = group)
            return(out)    
        }

        ## ----- ##
        ## ------------- initialize ------------ ##
        ## ----- ##
        
        cv.pos <- which(t.on<=0)
        t.on.cv <- t.on[cv.pos]
        count.on.cv <- as.numeric(table(t.on.cv))
        ## tot.id <- which(c(II)==1) ## observed control data
        ## cv.count <- ceiling((sum(II)*sum(II))/sum(I))
        rm.count <- floor(sum(II)*cv.prop)
        cv.count <- sum(II) - rm.count

        #ociCV <- matrix(NA, cv.count, k) ## store indicator
        #rmCV <- matrix(NA, rm.count, k) ## removed indicator
        ociCV <- list()
        rmCV <- list()
        estCV <- NULL ## used for mspe 


        Y0CV <- array(NA, dim = c(TT, N, k)) ## store initial Y0
        if (p > 0) {
           beta0CV <- array(NA, dim = c(p, 1, k)) 
        } else {
            beta0CV <- array(0, dim = c(1, 0, k)) ## store initial beta0
        }
        
        ## cv.id.all <- c()
        flag <- 0
        for (i in 1:k) {
            cv.n <- 0
            repeat{
                cv.n <- cv.n + 1
                #cv.id <- cv.sample(II, as.integer(sum(II) - cv.count))
                get.cv <- cv.sample(II, D, 
                                    count = rm.count, 
                                    cv.count = cv.nobs, 
                                    cv.treat = cv.treat, 
                                    cv.donut = cv.donut)
                cv.id <- get.cv$cv.id
                ## cv.id <- sample(oci, as.integer(sum(II) - cv.count), replace = FALSE)
                II.cv.valid <- II.cv <- II
                II.cv[cv.id] <- 0
                II.cv.valid[cv.id] <- -1
                ## ziyi: if certain rows or columns doesn't satisfy con1 or con2,
                ## replace the row or column of II.cv using the corresponding rows or columns in II
                
                con1 <- sum(apply(II.cv, 1, sum) >= 1) == TT
                con2 <- sum(apply(II.cv, 2, sum) >= min.T0) == N

                if(con1==TRUE & con2==TRUE) {
                    break
                }

                if (cv.n>=200) {
                    flag <- 1
                    #message("Some units have too few pre-treatment observations. Remove them automatically in Cross-Validation.")
                    keep.1 <- which(apply(II.cv, 1, sum) < 1)
                    keep.2 <- which(apply(II.cv, 2, sum) < min.T0)
                    II.cv[keep.1,] <- II[keep.1,]
                    II.cv[,keep.2] <- II[,keep.2]
                    II.cv.valid[keep.1,] <- II[keep.1,]
                    II.cv.valid[,keep.2] <- II[,keep.2]
                    cv.id <- which(II.cv.valid!=II)
                    break
                }
            }

            if(length(cv.id)==0){
                stop("Some units have too few pre-treatment observations. Set a larger \"cv.prop\" or set \"cv.treat\" to FALSE.")
            }

            rmCV[[i]] <- cv.id
            ocicv <- setdiff(oci, cv.id)
            ociCV[[i]] <- ocicv
            
            if(cv.n<200){
                estCV <- c(estCV, list(get.cv$est.id))
            }
            else{
                cv.id.old <- get.cv$cv.id
                cv.diff <- setdiff(cv.id.old, cv.id)
                estCV <- c(estCV,list(setdiff(get.cv$est.id,cv.diff)))
            }

            initialOutCv <- initialFit(data = data.ini, 
                                       force = force, 
                                       oci = ocicv)
            Y0CV[,,i] <- initialOutCv$Y0
            
            if (p > 0) {
                beta0cv <- initialOutCv$beta0
                if (sum(is.na(beta0cv)) > 0) {
                    beta0cv[which(is.na(beta0cv))] <- 0
                }
                beta0CV[,,i] <- beta0cv
            }
        }

        if(flag == 1){
            message("Some units have too few pre-treatment observations. Remove them automatically in Cross-Validation.\n")
        }
    
        ## get weighted matrix
        count.T.cv <- count.T.cv.old <- table(T.on)
        count.T.cv.old <- count.T.cv <- count.T.cv[which(as.numeric(names(count.T.cv))<=0)]
        cv.prop.cut <- max(count.T.cv.old)*proportion
        cv.drop.index <- which(count.T.cv.old<=cv.prop.cut)

        count.T.cv <- count.T.cv/mean(count.T.cv)
        name.count.T.cv <- names(count.T.cv)
        count.T.cv <- c(count.T.cv,median(count.T.cv))
        names(count.T.cv) <- c(name.count.T.cv,"Control")
        count.T.cv[cv.drop.index] <- 0 # set weights to 0 for the periods when the number of treated observations is less than ..

        ##  --------------------------------------------- ##
##  ---------------- cross validation for ife model ------------------  ##
        ##  --------------------------------------------- ##
        
        if (method %in% c("ife", "both")) {
            
            message("Interactive fixed effects model...\n")
            
            r.pc <- est.pc.best <- MSPE.best <- WMSPE.best <- MSPE.pc.best <- NULL
            gmoment.best <- moment.best <- MAD.best <- GMSPE.best <- WGMSPE.best <- NULL
            
            if (criterion == "PC") {
                CV.out.ife <- matrix(NA, (r.max - r.old + 1), 6)
                colnames(CV.out.ife) <- c("r", "sigma2", "IC", "PC", "MSPTATT", "MSE")
            } 
            else {
                CV.out.ife <- matrix(NA, (r.max - r.old + 1), 13)
                colnames(CV.out.ife) <- c("r", "sigma2", "IC", "PC", 
                                          "MSPE","WMSPE","GMSPE","WGMSPE", "MAD", "Moment", "GMoment", "MSPTATT", "MSE")
            }
            
            CV.out.ife[,"r"] <- c(r.old:r.max)
            CV.out.ife[,"PC"] <- CV.out.ife[,"GMoment"] <- CV.out.ife[,"Moment"] <- CV.out.ife[,"MAD"] <- CV.out.ife[,"MSPE"] <- CV.out.ife[,"WMSPE"] <- CV.out.ife[,"GMSPE"] <- CV.out.ife[,"WGMSPE"] <- 1e20

            for (i in 1:dim(CV.out.ife)[1]) { ## cross-validation loop starts 
                ## inter FE based on control, before & after 
                r <- CV.out.ife[i, "r"]  
                ## k <- 5
                if (criterion %in% c("mspe","wmspe","gmspe","wgmspe","mad","moment")) {
                    SSE <- 0
                    WSSE <- 0
                    GSSE <- 0
                    WGSSE <- 0
                    ll.length <- 0
                    moment.list <- c()
                    index.moment.list <- c()
                    MAD.list <- c()
                    for (ii in 1:k) {
                        II.cv <- II
                        II.cv[rmCV[[ii]]] <- 0
                        YY.cv <- YY
                        YY.cv[rmCV[[ii]]] <- 0
                        est.cv.fit <- inter_fe_ub(YY.cv, as.matrix(Y0CV[,,ii]), X, II.cv, as.matrix(beta0CV[,,ii]), r, force, tol)$fit
                        index.cv <- as.character(T.on[estCV[[ii]]])
                        index.cv[which(is.na(index.cv))] <- "Control"
                        weight.cv <- count.T.cv[index.cv]
                        names(weight.cv) <- NULL
                        SSE <- SSE + sum((YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]])^2) 
                        WSSE <- WSSE + sum(weight.cv*(YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]])^2)
                        GSSE <- GSSE + sum(log((YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]])^2))
                        ll <- weight.cv*(YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]])^2
                        ll <- ll[which(ll>0)] 
                        WGSSE <- WGSSE + sum(log(ll))
                        ll.length <- ll.length + length(ll)
                        MAD.list <- c(MAD.list,(YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]])^2)
                        # moment conditions
                        moment.list <- c(moment.list,(YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]]))
                        index.moment.list <- c(index.moment.list,index.cv)
                        #resid.mean <- tapply((YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]]), index.cv, mean)
                        #resid.mean <- abs(resid.mean)
                        #weight.cv <- count.T.cv[names(resid.mean)]
                        #names(weight.cv) <- NULL
                        #moment <- c(moment, sum(weight.cv*resid.mean)/sum(weight.cv))
                    }
                    MSPE <- SSE/(length(unlist(estCV)))
                    WMSPE <- WSSE/(length(unlist(estCV)))
                    GMSPE <- exp(GSSE/(length(unlist(estCV))))
                    WGMSPE <- exp(WGSSE/ll.length)
                    MAD <- median(abs(MAD.list-median(MAD.list)))
                    
                    # moment
                    resid.mean <- tapply(moment.list,index.moment.list, mean)
                    resid.mean <- abs(resid.mean)
                    # g-moment
                    gm_mean = function(x){
                        exp(sum(log(x)) / length(x))
                    }
                    resid.g.mean <- tapply(abs(moment.list),index.moment.list, gm_mean)
                    weight.cv.g <- count.T.cv[names(resid.g.mean)]
                    weight.cv <- count.T.cv[names(resid.mean)]
                    names(weight.cv) <- NULL
                    names(weight.cv.g) <- NULL
                    moment <- sum(weight.cv*resid.mean)/sum(weight.cv)
                    gmoment <- sum(weight.cv.g*resid.g.mean)/sum(weight.cv)
                     
                }

                est.cv <- inter_fe_ub(YY, 
                                      Y0, 
                                      X, 
                                      II, 
                                      beta0, 
                                      r, 
                                      force, 
                                      tol) ## overall
                sigma2 <- est.cv$sigma2 
                IC <- est.cv$IC
                PC <- est.cv$PC

                eff.v.cv <- c(Y - est.cv$fit)[cv.pos]
                meff <- as.numeric(tapply(eff.v.cv, t.on.cv, mean))
                MSPTATT <- sum(meff^2*count.on.cv)/sum(count.on.cv)
                MSE <- sum(eff.v.cv^2)/length(eff.v.cv)

                if(!is.null(norm.para)) {
                    if (criterion %in% c("mspe","wmspe","gmspe","wgmspe","mad","moment","gmoment")) {
                        MSPE <- MSPE*(norm.para[1]^2)
                        WMSPE <- WMSPE*(norm.para[1]^2)
                        GMSPE <- GMSPE*(norm.para[1]^2)
                        WGMSPE <- WGMSPE*(norm.para[1]^2)
                        MAD <- MAD*(norm.para[1]^2)
                        moment <- moment*(norm.para[1]^2)
                        gmoment <- gmoment*(norm.para[1]^2)
                    }
                    sigma2 <- sigma2*(norm.para[1]^2)
                    IC <- est.cv$IC - log(est.cv$sigma2) + log(sigma2)
                    PC <- PC*(norm.para[1]^2)
                }

                if (criterion == "mspe") {
                    if ((min(CV.out.ife[,"MSPE"]) - MSPE) > 0.01*min(CV.out.ife[,"MSPE"])) {
                        ## at least 1% improvement for MPSE
                        MSPE.best <- MSPE
                        est.best <- est.cv  
                        r.cv <- r
                    } else {
                        if (r == r.cv + 1) message("*")
                    }
                }
                else if(criterion == 'wmspe'){
                    if ((min(CV.out.ife[,"WMSPE"]) - WMSPE) > 0.01*min(CV.out.ife[,"WMSPE"])) {
                        ## at least 1% improvement for MPSE
                        WMSPE.best <- WMSPE
                        est.best <- est.cv  
                        r.cv <- r
                    } 
                    else {
                        if (r == r.cv + 1) message("*")
                    }
                }
                else if(criterion == 'gmspe'){
                    if ((min(CV.out.ife[,"GMSPE"]) - GMSPE) > 0.01*min(CV.out.ife[,"GMSPE"])) {
                        ## at least 1% improvement for MPSE
                        GMSPE.best <- GMSPE
                        est.best <- est.cv  
                        r.cv <- r
                    } 
                    else {
                        if (r == r.cv + 1) message("*")
                    }
                }
                else if(criterion == 'wgmspe'){
                    if ((min(CV.out.ife[,"WGMSPE"]) - WGMSPE) > 0.01*min(CV.out.ife[,"WGMSPE"])) {
                        ## at least 1% improvement for MPSE
                        WGMSPE.best <- WGMSPE
                        est.best <- est.cv  
                        r.cv <- r
                    } 
                    else {
                        if (r == r.cv + 1) message("*")
                    }
                }
                else if(criterion == 'mad'){
                    if ((min(CV.out.ife[,"MAD"]) - MAD) > 0.01*min(CV.out.ife[,"MAD"])) {
                        MAD.best <- MAD
                        est.best <- est.cv  
                        r.cv <- r
                    } 
                    else {
                        if (r == r.cv + 1) message("*")
                    }
                }
                else if(criterion == 'moment'){
                    if ((min(CV.out.ife[,"Moment"]) - moment) > 0.01*min(CV.out.ife[,"Moment"])) {
                        moment.best <- moment
                        est.best <- est.cv  
                        r.cv <- r
                    } 
                    else {
                        if (r == r.cv + 1) message("*")
                    }
                }
                else if(criterion == 'gmoment'){
                    if ((min(CV.out.ife[,"GMoment"]) - gmoment) > 0.01*min(CV.out.ife[,"GMoment"])) {
                        gmoment.best <- gmoment
                        est.best <- est.cv  
                        r.cv <- r
                    } 
                    else {
                        if (r == r.cv + 1) message("*")
                    }
                }
                else if(criterion == "pc"){
                    if (PC < min(CV.out.ife[,"PC"])) {
                        est.pc.best <- est.cv  
                        r.pc <- r
                    }
                }

                if (criterion != "pc") {
                    CV.out.ife[i, 2:12] <- c(sigma2, IC, PC, MSPE, WMSPE, GMSPE, WGMSPE, MAD, moment, MSPTATT, MSE)
                } else {
                    CV.out.ife[i, 2:6] <- c(sigma2, IC, PC, MSPTATT, MSE)
                }
                
                if (criterion == "pc") {
                    message("\n r = ",r, "; sigma2 = ",
                        sprintf("%.5f",sigma2), "; IC = ",
                        sprintf("%.5f",IC), "; PC = ",
                        sprintf("%.5f",PC), "; MSPTATT = ",
                        sprintf("%.5f",MSPTATT), "; MSE = ",
                        sprintf("%.5f",MSE), sep="")

                } else {
                    #message("\n r = ",r, "; sigma2 = ",
                    #    sprintf("%.5f",sigma2), "; IC = ",
                    #    sprintf("%.5f",IC), "; PC = ",
                    #    sprintf("%.5f",PC), "; MSPE = ",
                    #    sprintf("%.5f",MSPE), "; GMSPE = ",
                    #    sprintf("%.5f",GMSPE), "; Moment = ",
                    #    sprintf("%.5f",moment), "; MSPTATT = ",
                    #    sprintf("%.5f",MSPTATT), "; MSE = ",
                    #    sprintf("%.5f",MSE), sep="")
                    message("\n r = ",r, "; sigma2 = ",
                        sprintf("%.5f",sigma2), "; IC = ",
                        sprintf("%.5f",IC), "; PC = ",
                        sprintf("%.5f",PC), "; MSPE = ",
                        sprintf("%.5f",MSPE))
                }
            } ## end of while: search for r_star over  

            #MSPE.best <- min(CV.out[,"MSPE"])
            #PC.best <- min(CV.out[,"PC"])

            ## compare 
            if (criterion == "both") {
                if (r.cv > r.pc) {
                    message("\n\n Factor number selected via cross validation may be larger than the true number. Using the PC criterion.\n\n ")
                    r.cv <- r.pc 
                    est.best <- est.pc.best
                    MSPE.best <- MSPE.pc.best
                }
                est.best.ife <- est.best
                MSPE.best.ife <- MSPE.best
            }
            else if (criterion == "pc") {
                est.best.ife <- est.pc.best  
                r.cv <- r.pc
            }
            else {
                est.best.ife <- est.best
                MSPE.best.ife <- MSPE.best
                WMSPE.best.ife <- WMSPE.best
                GMSPE.best.ife <- GMSPE.best
                WGMSPE.best.ife <- WGMSPE.best
                MAD.best.ife <- MAD.best
                moment.best.ife <- moment.best
                gmoment.best.ife <- gmoment.best
            }
            
            if (r > (TT-1)) {message(" (r hits maximum)")}
            message("\n\n r* = ",r.cv, sep="")
            message("\n\n") 
        }

        ##  ------------------------------------- ##
##  ---------------- cross validation for mc ---------------  ##
        ##  ------------------------------------- ##
        if (method %in% c("mc", "both")) {
            message("Matrix completion method...\n")
            eigen.all <- NULL
            if (is.null(lambda) || length(lambda) == 1) {
                ## create the hyper-parameter sequence
                ## biggest candidate lambda 
                ## Y.lambda <- YY 
                Y.lambda <- YY - Y0
                ## Y.lambda[which(II == 0)] <- Y0[which(II == 0)]
                Y.lambda[which(II == 0)] <- 0
                eigen.all <- svd( Y.lambda / (TT * N) )$d
                lambda.max <- log10(max(eigen.all))
                lambda <- rep(NA, nlambda)
                lambda.by <- 3/(nlambda - 2)
                for (i in 1:(nlambda - 1)) {
                    lambda[i] <- 10^(lambda.max - (i - 1) * lambda.by)
                }
                lambda[nlambda] <- 0
            } 
            else {
                Y.lambda <- YY - Y0
                Y.lambda[which(II == 0)] <- 0
                eigen.all <- svd( Y.lambda / (TT * N) )$d
            }

            ## store all MSPE
            MSPE.best <- WMSPE.best <- GMSPE.best <- WGMSPE.best <- MAD.best <- moment.best <- gmoment.best <- NULL
            CV.out.mc <- matrix(NA, length(lambda), 10)
            colnames(CV.out.mc) <- c("lambda.norm", "MSPE", "WMSPE","GMSPE", "WGMSPE", "MAD", "Moment", "GMoment", "MSPTATT", "MSE")
            CV.out.mc[,"lambda.norm"] <- c(lambda/max(eigen.all))
            CV.out.mc[,"GMoment"] <- CV.out.mc[,"Moment"] <- CV.out.mc[,"MAD"] <- CV.out.mc[,"WGMSPE"] <- CV.out.mc[,"WGMSPE"] <- CV.out.mc[,"GMSPE"] <- CV.out.mc[,"WMSPE"] <- CV.out.mc[,"MSPE"] <- 1e20

            break_count <- 0
            break_check <- 0
            for (i in 1:length(lambda)) {    
                ## k <- 5
                SSE <- 0
                WSSE <- 0
                GSSE <- 0
                WGSSE <- 0
                ll.length <- 0
                moment.list <- c()
                index.moment.list <- c()
                MAD.list <- c()
                for (ii in 1:k) {
                    II.cv <- II
                    II.cv[rmCV[[ii]]] <- 0
                    YY.cv <- YY
                    YY.cv[rmCV[[ii]]] <- 0
                    est.cv.fit <- inter_fe_mc(YY.cv, as.matrix(Y0CV[,,ii]), 
                                              X, II.cv, as.matrix(beta0CV[,,ii]), 
                                              1, lambda[i], force, tol)$fit
                    index.cv <- as.character(T.on[estCV[[ii]]])
                    index.cv[which(is.na(index.cv))] <- "Control"
                    weight.cv <- count.T.cv[index.cv]
                    names(weight.cv) <- NULL
                    SSE <- SSE + sum((YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]])^2) 
                    WSSE <- WSSE + sum(weight.cv*(YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]])^2)
                    GSSE <- GSSE + sum(log((YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]])^2))
                    ll <- weight.cv*(YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]])^2
                    ll <- ll[which(ll>0)] 
                    WGSSE <- WGSSE + sum(log(ll))
                    ll.length <- ll.length + length(ll)
                    MAD.list <- c(MAD.list,(YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]])^2)
                    # moment conditions
                    moment.list <- c(moment.list,(YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]]))
                    index.moment.list <- c(index.moment.list,index.cv)
                    #resid.mean <- tapply((YY[estCV[[ii]]]-est.cv.fit[estCV[[ii]]]), index.cv, mean)
                    #resid.mean <- abs(resid.mean)
                    #weight.cv <- count.T.cv[names(resid.mean)]
                    #names(weight.cv) <- NULL
                    #moment <- c(moment, sum(weight.cv*resid.mean)/sum(weight.cv))
                }
                MSPE <- SSE/(length(unlist(estCV)))
                WMSPE <- WSSE/(length(unlist(estCV)))
                GMSPE <- exp(GSSE/(length(unlist(estCV))))
                WGMSPE <- exp(WGSSE/ll.length)
                MAD <- median(abs(MAD.list-median(MAD.list)))
                    
                # moment
                resid.mean <- tapply(moment.list,index.moment.list, mean)
                resid.mean <- abs(resid.mean)
                # g-moment
                gm_mean = function(x){
                    exp(sum(log(x)) / length(x))
                }
                resid.g.mean <- tapply(abs(moment.list),index.moment.list, gm_mean)
                weight.cv.g <- count.T.cv[names(resid.g.mean)]
                weight.cv <- count.T.cv[names(resid.mean)]
                names(weight.cv) <- NULL
                names(weight.cv.g) <- NULL
                moment <- sum(weight.cv*resid.mean)/sum(weight.cv)
                gmoment <- sum(weight.cv.g*resid.g.mean)/sum(weight.cv)

                est.cv <- inter_fe_mc(YY, Y0, X, II, beta0, 
                                      1, lambda[i], 
                                      force, tol) ## overall

                eff.v.cv <- c(Y - est.cv$fit)[cv.pos]
                meff <- as.numeric(tapply(eff.v.cv, t.on.cv, mean))
                MSPTATT <- sum(meff^2*count.on.cv)/sum(count.on.cv) 
                MSE <- sum(eff.v.cv^2)/length(eff.v.cv)

                if(!is.null(norm.para)) {
                    MSPE <- MSPE*(norm.para[1]^2)
                    WMSPE <- WMSPE*(norm.para[1]^2)
                    GMSPE <- GMSPE*(norm.para[1]^2)
                    WGMSPE <- WGMSPE*(norm.para[1]^2)
                    MAD <- MAD*(norm.para[1]^2)
                    moment <- moment*(norm.para[1]^2)
                    gmoment <- gmoment*(norm.para[1]^2)
                }

                if (criterion == "mspe") {
                    if ((min(CV.out.mc[,"MSPE"]) - MSPE) > 0.01*min(CV.out.mc[,"MSPE"])) {
                        ## at least 1% improvement for MPSE
                        MSPE.best <- MSPE
                        est.best <- est.cv  
                        lambda.cv <- lambda[i]
                        break_count <- 0
                        break_check <- 0
                    } else {
                        if (i > 1) {
                            if (lambda.cv == lambda[i-1]){
                                message("*")
                                break_check <- 1
                                break_count <- 0  
                            } 
                        }
                    }
                }
                else if(criterion == 'wmspe'){
                    if ((min(CV.out.mc[,"WMSPE"]) - WMSPE) > 0.01*min(CV.out.mc[,"WMSPE"])) {
                        ## at least 1% improvement for MPSE
                        WMSPE.best <- WMSPE
                        est.best <- est.cv  
                        lambda.cv <- lambda[i]
                        break_check <- 0
                        break_count <- 0
                    } 
                    else {
                        if (i > 1) {
                            if (lambda.cv == lambda[i-1]){
                                message("*")
                                break_check <- 1
                                break_count <- 0  
                            }
                        }
                    }
                }
                else if(criterion == 'gmspe'){
                    if ((min(CV.out.mc[,"GMSPE"]) - GMSPE) > 0.01*min(CV.out.mc[,"GMSPE"])) {
                        ## at least 1% improvement for MPSE
                        GMSPE.best <- GMSPE
                        est.best <- est.cv  
                        lambda.cv <- lambda[i]
                        break_check <- 0
                        break_count <- 0
                    } 
                    else {
                        if (i > 1) {
                            if (lambda.cv == lambda[i-1]){
                                message("*")
                                break_check <- 1
                                break_count <- 0  
                            }
                        }
                    }
                }
                else if(criterion == 'wgmspe'){
                    if ((min(CV.out.mc[,"WGMSPE"]) - WGMSPE) > 0.01*min(CV.out.mc[,"WGMSPE"])) {
                        ## at least 1% improvement for MPSE
                        WGMSPE.best <- WGMSPE
                        est.best <- est.cv  
                        lambda.cv <- lambda[i]
                        break_check <- 0
                        break_count <- 0
                    } 
                    else {
                        if (i > 1) {
                            if (lambda.cv == lambda[i-1]){
                                message("*")
                                break_check <- 1
                                break_count <- 0  
                            }
                        }
                    }
                }
                else if(criterion == 'mad'){
                    if ((min(CV.out.mc[,"MAD"]) - MAD) > 0.01*min(CV.out.mc[,"MAD"])) {
                        ## at least 1% improvement for MPSE
                        MAD.best <- MAD
                        est.best <- est.cv  
                        lambda.cv <- lambda[i]
                        break_check <- 0
                        break_count <- 0
                    } 
                    else {
                        if (i > 1) {
                            if (lambda.cv == lambda[i-1]){
                                message("*")
                                break_check <- 1
                                break_count <- 0  
                            }
                        }
                    }
                }
                else if(criterion == 'moment'){
                    if ((min(CV.out.mc[,"Moment"]) - moment) > 0.01*min(CV.out.mc[,"Moment"])) {
                        ## at least 1% improvement for MPSE
                        moment.best <- moment
                        est.best <- est.cv  
                        lambda.cv <- lambda[i]
                        break_check <- 0
                        break_count <- 0
                    } 
                    else {
                        if (i > 1) {
                            if (lambda.cv == lambda[i-1]){
                                message("*")
                                break_check <- 1
                                break_count <- 0  
                            }
                        }
                    }
                }
                else if(criterion == 'gmoment'){
                    if ((min(CV.out.mc[,"GMoment"]) - gmoment) > 0.01*min(CV.out.mc[,"GMoment"])) {
                        ## at least 1% improvement for MPSE
                        gmoment.best <- gmoment
                        est.best <- est.cv  
                        lambda.cv <- lambda[i]
                        break_check <- 0
                        break_count <- 0
                    } 
                    else {
                        if (i > 1) {
                            if (lambda.cv == lambda[i-1]){
                                message("*")
                                break_check <- 1
                                break_count <- 0  
                            }
                        }
                    }
                }

                if(break_check == 1){
                    break_count <-  break_count + 1
                }

                CV.out.mc[i, 2:10] <- c(MSPE, WMSPE, GMSPE, WGMSPE, MAD, moment, gmoment, MSPTATT, MSE)
                message("\n lambda.norm = ",
                sprintf("%.5f",lambda[i]/max(eigen.all)),"; MSPE = ",
                sprintf("%.5f",MSPE),   "; MSPTATT = ",
                sprintf("%.5f",MSPTATT), "; MSE = ", 
                sprintf("%.5f",MSE), sep="")
                if(break_count == 3){
                    break
                }
            }
            est.best.mc <- est.best 
            MSPE.best.mc <- MSPE.best
            WMSPE.best.mc <- WMSPE.best
            GMSPE.best.mc <- GMSPE.best
            WGMSPE.best.mc <- WGMSPE.best
            MAD.best.mc <- MAD.best
            moment.best.mc <- moment.best
            gmoment.best.mc <- gmoment.best
            message("\n\n lambda.norm* = ",lambda.cv/max(eigen.all), sep="")
            message("\n\n")
        }
    }    ## End of Cross-Validation 


    if (method == "ife") {
        est.best <- est.best.ife
        validF <- ifelse(r.cv > 0, 1, 0)
    } 
    else if (method == "mc") {
        est.best <- est.best.mc
        validF <- est.best$validF
    }
    else {
        if(criterion == 'mspe'){
            if (MSPE.best.ife <= MSPE.best.mc) {
                est.best <- est.best.ife
                validF <- ifelse(r.cv > 0, 1, 0) 
                method <- "ife"
            } else {
                est.best <- est.best.mc
                validF <- est.best$validF
                method <- "mc"
            }
        }
        if(criterion == 'wmspe'){
            if (WMSPE.best.ife <= WMSPE.best.mc) {
                est.best <- est.best.ife
                validF <- ifelse(r.cv > 0, 1, 0) 
                method <- "ife"
            } else {
                est.best <- est.best.mc
                validF <- est.best$validF
                method <- "mc"
            }
        }
        if(criterion == 'gmspe'){
            if (GMSPE.best.ife <= GMSPE.best.mc) {
                est.best <- est.best.ife
                validF <- ifelse(r.cv > 0, 1, 0) 
                method <- "ife"
            } else {
                est.best <- est.best.mc
                validF <- est.best$validF
                method <- "mc"
            }
        }
        if(criterion == 'wgmspe'){
            if (WGMSPE.best.ife <= WGMSPE.best.mc) {
                est.best <- est.best.ife
                validF <- ifelse(r.cv > 0, 1, 0) 
                method <- "ife"
            } else {
                est.best <- est.best.mc
                validF <- est.best$validF
                method <- "mc"
            }
        }
        if(criterion == 'mad'){
            if (MAD.best.ife <= MAD.best.mc) {
                est.best <- est.best.ife
                validF <- ifelse(r.cv > 0, 1, 0) 
                method <- "ife"
            } else {
                est.best <- est.best.mc
                validF <- est.best$validF
                method <- "mc"
            }
        }
        if(criterion == 'moment'){
            if (moment.best.ife <= moment.best.mc) {
                est.best <- est.best.ife
                validF <- ifelse(r.cv > 0, 1, 0) 
                method <- "ife"
            } else {
                est.best <- est.best.mc
                validF <- est.best$validF
                method <- "mc"
            }
        }
        if(criterion == 'gmoment'){
            if (gmoment.best.ife <= gmoment.best.mc) {
                est.best <- est.best.ife
                validF <- ifelse(r.cv > 0, 1, 0) 
                method <- "ife"
            } else {
                est.best <- est.best.mc
                validF <- est.best$validF
                method <- "mc"
            }
        }
        message("\n\n Recommended method through cross-validation: ", method, sep = "")
        message("\n\n")
    }    
    validX <- est.best$validX 

        ##------------------------------##
    ## ----------- Summarize -------------- ##
        ##------------------------------## 

    ## 00. run a fect to obtain residuals
    if (method == "ife") {
        if (r.cv == 0) {
            est.fect <- est.best
        } 
        else {
            est.fect <- inter_fe_ub(YY, Y0, X, II, beta0, 0, force = force, tol)
        }
    } 
    else {
        est.fect <- inter_fe_ub(YY, Y0, X, II, beta0, 0, force = force, tol)
    }
           

    ##-------------------------------##
    ##   ATT and Counterfactuals     ##
    ##-------------------------------##

    ## we first adjustment for normalization 
    if (!is.null(norm.para)) {

        Y <- Y * norm.para[1]
        if (method == "ife") {
            ## variance of the error term 
            sigma2 <- est.best$sigma2 * (norm.para[1]^2)
            IC <- est.best$IC - log(est.best$sigma2) + log(sigma2)
            PC <- est.best$PC * (norm.para[1]^2)
            est.best$sigma2 <- sigma2
            est.best$IC <- IC
            est.best$PC <- PC
        }

        ## output of estimates
        est.best$mu <- est.best$mu * norm.para[1] 
        if (method == "ife" && r.cv > 0) {
            est.best$lambda <- est.best$lambda * norm.para[1]
            est.best$VNT <- est.best$VNT * norm.para[1]
        }
        if (force%in%c(1, 3)) {
            est.best$alpha <- est.best$alpha * norm.para[1] 
        }
        if (force%in%c(2,3)) {
            est.best$xi <- est.best$xi * norm.para[1] 
        }
        #if (p>0) {
        #    est.best$beta <- est.best$beta * norm.para[1]
        #}
        est.best$residuals <- est.best$residuals * norm.para[1] 
        est.best$fit <- est.best$fit * norm.para[1] 
        est.fect$fit <- est.fect$fit * norm.para[1]
        est.fect$sigma2 <- est.fect$sigma2 * norm.para[1]
    }

    ## 0. revelant parameters
    sigma2 <- IC <- PC <- NULL
    if (method == "ife") {
        sigma2 <- est.best$sigma2   
        IC <- est.best$IC
        PC <- est.best$PC
    }

    if (p>0) {
        na.pos <- is.nan(est.best$beta)
        beta <- est.best$beta
        if( sum(na.pos) > 0 ) {
            beta[na.pos] <- NA
        }
    } else {
        beta <- NA
    }
   
    ## 1. estimated att and counterfactuals
    Y.ct.equiv <- Y.ct <- NULL
    Y.ct <- est.best$fit
    eff <- Y - Y.ct   
    missing.index <- which(is.na(eff))
    if(length(missing.index)>0){
        I[missing.index] <- 0
        II[missing.index] <- 0
    }
    if (0 %in% I) {
        eff[which(I == 0)] <- NA
    } 
    complete.index <- which(!is.na(eff))
    att.avg <- sum(eff[complete.index] * D[complete.index])/(sum(D[complete.index]))

    att.avg.balance <- NA
    if(!is.null(balance.period)){
        complete.index2 <- which(!is.na(T.on.balance))
        att.avg.balance <- sum(eff[complete.index2] * D[complete.index2])/(sum(D[complete.index2]))
    }


    ## att.avg.unit
    tr.pos <- which(apply(D, 2, sum) > 0)
    att.unit <- sapply(1:length(tr.pos), function(vec){return(sum(eff[, tr.pos[vec]] * D[, tr.pos[vec]]) / sum(D[, tr.pos[vec]]))})
    att.avg.unit <- mean(att.unit)

    eff.equiv <- Y - est.fect$fit
    equiv.att.avg <- sum(eff.equiv * D) / (sum(D))

    ## 2. rmse for treated units' observations under control
    tr <- which(apply(D, 2, sum) > 0)
    tr.co <- which((as.matrix(1 - D[,tr]) * as.matrix(II[,tr])) == 1)
    eff.tr <- as.matrix(eff[,tr])
    v.eff.tr <- eff.tr[tr.co]
    rmse <- sqrt(mean(v.eff.tr^2))

    ## 3. unbalanced output
    if (0%in%I) {
        eff[which(I == 0)] <- NA
        est.best$fit[which(I == 0)] <- NA
        ## eff.equiv[which(I == 0)] <- NA
    }
    est.best$residuals[which(II == 0)] <- NA
    if (method == "mc") {
        est.best$sigma2 <- mean(c(est.best$residuals[which(II == 1)])^2) ## mean squared error of residuals    
    }    

    ## 4. dynamic effects
    t.on <- c(T.on)
    eff.v <- c(eff) ## a vector
    rm.pos1 <- which(is.na(eff.v))
    rm.pos2 <- which(is.na(t.on))
    eff.v.use1 <- eff.v
    t.on.use <- t.on
    n.on.use <- rep(1:N, each = TT)

    eff.equiv.v <- c(eff.equiv)

    if (NA %in% eff.v | NA %in% t.on) {
        eff.v.use1 <- eff.v[-c(rm.pos1, rm.pos2)]
        t.on.use <- t.on[-c(rm.pos1, rm.pos2)]
        n.on.use <- n.on.use[-c(rm.pos1, rm.pos2)]
        eff.equiv.v <- eff.equiv.v[-c(rm.pos1, rm.pos2)]
    }

    pre.pos <- which(t.on.use <= 0)
    eff.pre <- cbind(eff.v.use1[pre.pos], t.on.use[pre.pos], n.on.use[pre.pos])
    colnames(eff.pre) <- c("eff", "period", "unit")

    eff.pre.equiv <- cbind(eff.equiv.v[pre.pos], t.on.use[pre.pos], n.on.use[pre.pos])
    colnames(eff.pre.equiv) <- c("eff.equiv", "period", "unit")

    pre.sd <- tapply(eff.pre.equiv[,1], eff.pre.equiv[,2], sd)
    pre.sd <- cbind(pre.sd, sort(unique(eff.pre.equiv[, 2])), table(eff.pre.equiv[, 2]))
    colnames(pre.sd) <- c("sd", "period", "count")

    time.on <- sort(unique(t.on.use))
    att.on <- as.numeric(tapply(eff.v.use1, t.on.use, mean)) ## NA already removed
    count.on <- as.numeric(table(t.on.use))

    ## 4.2 balance effect 
    balance.att <- NULL 
    if (!is.null(balance.period)) {
        t.on.balance <- c(T.on.balance)
        rm.pos4 <- which(is.na(t.on.balance)) 
        t.on.balance.use <- t.on.balance

        if (NA %in% eff.v | NA %in% t.on.balance) {
            eff.v.use3  <- eff.v[-c(rm.pos1, rm.pos4)]
            t.on.balance.use <- t.on.balance[-c(rm.pos1, rm.pos4)]        
        }

        balance.time <- sort(unique(t.on.balance.use))
        balance.att <- as.numeric(tapply(eff.v.use3, t.on.balance.use, mean)) ## NA already removed
        balance.count <- as.numeric(table(t.on.balance.use))

    }


    
    ## 5 carryover effect 
    carry.att <- NULL 
    if (!is.null(T.on.carry)) {
        t.on.carry <- c(T.on.carry)
        rm.pos4 <- which(is.na(t.on.carry)) 
        t.on.carry.use <- t.on.carry

        if (NA %in% eff.v | NA %in% t.on.carry) {
            eff.v.use3  <- eff.v[-c(rm.pos1, rm.pos4)]
            t.on.carry.use <- t.on.carry[-c(rm.pos1, rm.pos4)]        
        }

        carry.time <- sort(unique(t.on.carry.use))
        carry.att <- as.numeric(tapply(eff.v.use3, t.on.carry.use, mean)) ## NA already removed

        #if (!is.null(time.on.carry.seq)) {
        #    carry.att.med <- rep(NA, length(time.on.carry.seq))
        #    carry.att.med[which(time.on.carry.seq %in% carry.time)] <- carry.att
        #    carry.att <- carry.att.med
            
        #}
    }



    ## 6. switch-off effects
    eff.off <- eff.equiv <- off.sd <- NULL
    if (hasRevs == 1) {    
        t.off <- c(T.off)
        rm.pos3 <- which(is.na(t.off))
        eff.v.use2 <- eff.v
        t.off.use <- t.off

        if (NA %in% eff.v | NA %in% t.off) {
            eff.v.use2 <- eff.v[-c(rm.pos1, rm.pos3)]
            t.off.use <- t.off[-c(rm.pos1, rm.pos3)]
        }

        off.pos <- which(t.off.use > 0)
        eff.off <- cbind(eff.v.use2[off.pos], t.off.use[off.pos], n.on.use[off.pos])
        colnames(eff.off) <- c("eff", "period", "unit")

        eff.off.equiv <- cbind(eff.equiv.v[off.pos], t.off.use[off.pos], n.on.use[off.pos])
        colnames(eff.off.equiv) <- c("off.equiv", "period", "unit")

        off.sd <- tapply(eff.off.equiv[,1], eff.off.equiv[,2], sd)
        off.sd <- cbind(off.sd, sort(unique(eff.off.equiv[, 2])), table(eff.off.equiv[, 2]))
        colnames(off.sd) <- c("sd", "period", "count")

        time.off <- sort(unique(t.off.use))
        att.off <- as.numeric(tapply(eff.v.use2, t.off.use, mean)) ## NA already removed
        count.off <- as.numeric(table(t.off.use))
    }

    ## 8. loess HTE by time
    D.missing <- D
    D.missing[which(D==0)] <- NA
    eff.calendar <- apply(eff*D.missing,1,mean,na.rm=TRUE)
    N.calendar <- apply(!is.na(eff*D.missing),1,sum)
    T.calendar <- c(1:TT)
    if(sum(!is.na(eff.calendar))>1){
        #loess fit
        loess.fit <- suppressWarnings(try(loess(eff.calendar~T.calendar,weights = N.calendar),silent=TRUE))       
        if('try-error' %in% class(loess.fit)){
            eff.calendar.fit <- eff.calendar
            calendar.enp <- NULL
        }
        else{
            eff.calendar.fit <- eff.calendar
            eff.calendar.fit[which(!is.na(eff.calendar))] <- loess.fit$fit
            calendar.enp <- loess.fit$enp              
        }
    }
    else{
        eff.calendar.fit <- eff.calendar
        calendar.enp <- NULL
    }

    ## 7. cohort effects
    if (!is.null(group)) {
        cohort <- cbind(c(group), c(D), c(eff.v))
        rm.pos <- unique(c(rm.pos1, which(cohort[, 2] == 0)))
        cohort <- cohort[-rm.pos, ]

        g.level <- sort(unique(cohort[, 1]))
        raw.group.att <- as.numeric(tapply(cohort[, 3], cohort[, 1], mean))

        group.att <- rep(NA, length(group.level))
        group.att[which(group.level %in% g.level)] <- raw.group.att

        # by-group dynamic effects
        group.level.name <- names(group.level)

        group.output <- list()
        for(i in c(1:length(group.level))){
            sub.group <- group.level[i]
            sub.group.name <- group.level.name[i]

            ## by-group dynamic effects
            t.on.sub <- c(T.on[which(group==sub.group)])
            eff.v.sub <- c(eff[which(group==sub.group)]) ## a vector
            rm.pos1.sub <- which(is.na(eff.v.sub))
            rm.pos2.sub <- which(is.na(t.on.sub)) 
            eff.v.use1.sub <- eff.v.sub
            t.on.use.sub <- t.on.sub
            if (NA %in% eff.v.sub | NA %in% t.on.sub) {
                eff.v.use1.sub <- eff.v.sub[-c(rm.pos1.sub, rm.pos2.sub)]
                t.on.use.sub <- t.on.sub[-c(rm.pos1.sub, rm.pos2.sub)]
            }
            if(length(t.on.use.sub)>0){
                time.on.sub <- sort(unique(t.on.use.sub))
                att.on.sub <- as.numeric(tapply(eff.v.use1.sub, 
                                            t.on.use.sub, 
                                            mean)) ## NA already removed
                count.on.sub <- as.numeric(table(t.on.use.sub))
            }else{
                time.on.sub <- att.on.sub <- count.on.sub <- NULL
            }
            
            suboutput <- list(att.on=att.on.sub,
                              time.on=time.on.sub,
                              count.on=count.on.sub)


            ## T.off
            if (hasRevs == 1) {    
                t.off.sub <- c(T.off[which(group==sub.group)])
                rm.pos3.sub <- which(is.na(t.off.sub))
                eff.v.use2.sub <- eff.v.sub
                t.off.use.sub <- t.off.sub
                if (NA %in% eff.v.sub | NA %in% t.off.sub) {
                    eff.v.use2.sub <- eff.v.sub[-c(rm.pos1.sub, rm.pos3.sub)]
                    t.off.use.sub <- t.off.sub[-c(rm.pos1.sub, rm.pos3.sub)]
                }
                if(length(t.off.use.sub)>0){
                    time.off.sub <- sort(unique(t.off.use.sub))
                    att.off.sub <- as.numeric(tapply(eff.v.use2.sub, t.off.use.sub, mean)) ## NA already removed
                    count.off.sub <- as.numeric(table(t.off.use.sub))
                }else{
                    time.off.sub <- att.off.sub <- count.off.sub <- NULL
                }

                suboutput <- c(suboutput, list(att.off = att.off.sub,
                                               count.off = count.off.sub,
                                               time.off = time.off.sub))

            }
            group.output[[sub.group.name]] <- suboutput
        }
    }

    

    ##-------------------------------##
    ##           Storage 
    ##-------------------------------##  

    ##control group residuals
    out<-list(
        ## main results 
        sigma2 = est.best$sigma2,
        sigma2.fect = est.fect$sigma2,
        T.on = T.on,
        Y.ct = est.best$fit,
        eff = eff,
        att.avg = att.avg,
        att.avg.unit = att.avg.unit,
        ## supporting
        force = force,
        T = TT,
        N = N,
        Ntr = Ntr,
        Nco = Nco,
        p = p, 
        D = D,
        Y = Y,
        X = X,
        I = I,
        II = II,
        tr = tr,
        co = co,
        est = est.best,
        method = method,
        mu = est.best$mu,
        beta = beta, 
        validX = validX,
        validF = validF,
        niter = est.best$niter, 
        time = time.on,
        att = att.on,
        count = count.on,
        eff.calendar = eff.calendar,
        N.calendar = N.calendar,
        eff.calendar.fit = eff.calendar.fit,
        calendar.enp = calendar.enp,
        eff.pre = eff.pre,
        eff.pre.equiv = eff.pre.equiv,
        pre.sd = pre.sd,
        rmse = rmse,
        rmCV = rmCV,
        estCV = estCV,
        res = est.best$res)

    if (hasRevs == 1) {
        out <- c(out, list(time.off = time.off, 
                           att.off = att.off,
                           count.off = count.off,
                           eff.off = eff.off,
                           eff.off.equiv = eff.off.equiv,
                           off.sd = off.sd))
    }

    if (!is.null(T.on.carry)) {
        out <- c(out, list(carry.att = carry.att, carry.time = carry.time))
    }

    if(!is.null(balance.period)){
        out <- c(out, list(balance.att = balance.att, balance.time = balance.time,balance.count = balance.count,balance.avg.att = att.avg.balance))        
    }

    if (force == 1) {
        out<-c(out, list(alpha = est.best$alpha,
                         alpha.tr = as.matrix(est.best$alpha[tr,]),
                         alpha.co = as.matrix(est.best$alpha[co,])))
    } else if (force == 2) {
        out<-c(out,list(xi = est.best$xi))
    } else if (force == 3) {
        out<-c(out,list(alpha = est.best$alpha, xi = est.best$xi,
                        alpha.tr = as.matrix(est.best$alpha[tr,]),
                        alpha.co = as.matrix(est.best$alpha[co,])))
    }

    if (method == "ife") {
        out <- c(out, list(r.cv = r.cv, IC = IC, PC = PC))
        if (r.cv > 0) {
            out <- c(out, list(factor = as.matrix(est.best$factor),
                               lambda = as.matrix(est.best$lambda),
                               lambda.tr = as.matrix(est.best$lambda[tr,]),
                               lambda.co = as.matrix(est.best$lambda[co,]))) 
        }
    }

    if (method == "mc") {
        out <- c(out, list(lambda.cv = lambda.cv, lambda.seq = lambda,
                           lambda.norm = lambda.cv / max(eigen.all), 
                           eigen.all = eigen.all))
    }

    ## CV results
    if (!is.null(CV.out.ife)) {
        out <- c(out, list(CV.out.ife = CV.out.ife))
    }

    if (!is.null(CV.out.mc)) {
        out <- c(out, list(CV.out.mc = CV.out.mc))
    }

    if (!is.null(group)) {
        out <- c(out, list(group.att = group.att,
                           group.output = group.output))
    }
 
    return(out)
} ## cross-validation function ends