Run_simulation/RUN_GIC/run_GIC.R

df_beta = 5
p = 100 #100 #30 #100 
beta_C_true = matrix(0, nrow = p, ncol = df_beta)
beta_C_true[3, ] <- c(-1, 0, 0, 0, -0.5) #c(-0.5, 0, 0, 0, -0.5)
beta_C_true[1, ] <- c(1, 0, 1 , 0, -0.5) #c(0.5, 0, 1 , 0, -0.5)
beta_C_true[2, ] <- c(0, 0,  -1,  0,  1)

k_list <- c(4,5,6)
n_train = 100 #100
n_test = 500

Data <- Model2(n = n_train, p = p, m = 0, intercept = TRUE,
               SNR = 4, sigma = 3,
               rho_X = 0, rho_W = 0,
               Corr_X = "CorrCS", Corr_W = "CorrAR",
               df_W = 5, df_beta = df_beta,
               ns = 20, obs_spar = 1, theta.add = FALSE, #c(0,0,0),
               beta_C = as.vector(t(beta_C_true)))
y <- drop(Data$data$y)
n <- length(y)
X <- Data$data$Comp
Zc <- Data$data$Zc
intercept <- Data$data$intercept
m <- ifelse(is.null(Zc), 0, dim(Zc)[2]) #+ as.integer(intercept)
m1 <- m + as.integer(intercept)
sseq <- Data$basis.info[,1]
beta_C.true <- matrix(Data$beta[1:(p*(df_beta))],
                      nrow = p, ncol = df_beta, byrow = TRUE)
beta_curve.true <- Data$basis.info[,-1] %*% t(beta_C.true)
Non_zero.true <- (1:p)[apply(beta_C.true, 1, function(x) max(abs(x)) > 0)]
foldid <- sample(rep(seq(nfolds), length = n))

arg_list <- as.list(Data$call)[-1]
arg_list$n <- n_test
Test <- do.call(Model2, arg_list)
y_test <- drop(Test$data$y)

GIC_arg <- list(basis_fun = "bs", degree = 3, sseq = Data$basis.info[, 1])
dfmax = ceiling(p/3*2)
lambda.factor = ifelse(n_train < (p * k_list[1]), 0.01, 0.005)
#lambda.factor = ifelse(n_train < (p * k_list[1]), 0.05, 0.005)

# # y = y
# # X = X
# # Zc = Zc
# # intercept = intercept
# # W = rep(1, p)
# # k = k_list
# # nfolds = 10
# # trim = 0
# # tol = 0
# # inner_eps = 1e-6
# # inner_maxiter = 1E3
# # dfmax = 20
# # lambda.factor = 1e-20
# # mu_ratio = 1
# # outer_eps = 1e-6
# # keep = TRUE
# # Trange = c(0,1)
 


GIC_m1 <- GIC.FuncompCGL( y = y, X = X, Zc = Zc, ref = NULL,
                          inner_eps = 1e-8, outer_eps = 1e-8, tol = 1e-8,
                          k = k_list)

temp <- get.GIC(p = p, df_list = k_list, lower_tri = 0.01,
                GIC_obj = GIC_m1, GIC_arg = GIC_arg,
                cut_type = "Strict", GIC_type = "GIC2",
                method_type = "cgl", refit = FALSE)
GIC_curve <- temp$GIC_curve
k_opt <- temp$k_opt
beta_GIC <- temp$beta

plot.args = list(x = seq(length(GIC_m1$lam)), #GIC_m1$lam, #log(GIC_m1$lam),
                 y = GIC_curve[1, ],
                 ylim = range(GIC_curve),
                 xlab= "lambda Index",#"lambda", #"log(lambda)",
                 ylab="GIC",
                 type="n")
#do.call("plot",plot.args)
# for(i in 1:length(k_list)) {
#
#   points(x = seq(length(GIC_m1$lam)), #GIC_m1$lam, #log(GIC_m1$lam),
#          y = GIC_curve[i, ], col = rainbow(length(k_list))[i])
#   text(length(GIC_m1$lam), #tail(log(GIC_m1$lam), 1),
#        GIC_curve[i, length(GIC_m1$lam)], labels=paste(k_list[i]),
#        cex= 1, pos= 4, col = rainbow(length(k_list))[i])
# }
# axis(3, at = pretty(seq(length(GIC_m1$lam))), labels = rev(pretty(GIC_m1$lam)))
# loc  = which(GIC_curve == min(GIC_curve), arr.ind = TRUE)




beta_C <- matrix(beta_GIC[1:(p*k_opt)], byrow = TRUE, nrow = p)
cat("colSums:", colSums(beta_C) , "\r\n")
#Non.zero <- which(abs(beta_C[,1]) > 0)
Non.zero <- apply(beta_C, 1, function(x) ifelse(max(abs(x)) >0, TRUE, FALSE))
Non.zero <- (1:p)[Non.zero]
cat("None zero groups:", Non.zero)
#vet(beta, p = p, k = k_opt)

par(mfrow=c(1,4))
do.call("plot",plot.args)
for(i in 1:length(k_list)) {
  points(x = seq(length(GIC_m1$lam)), #log(GIC_m1$lam),
         y = GIC_curve[i, ], col = rainbow(length(k_list))[i], pch = seq(length(k_list))[i])
  text(length(GIC_m1$lam), #tail(log(GIC_m1$lam), 1),
       GIC_curve[i, length(GIC_m1$lam)], labels=paste(k_list[i]),
       cex= 1, pos= 4, col = rainbow(length(k_list))[i])
}
#axis(3, at = pretty(seq(length(GIC_m1$lam))), labels = rev(pretty(GIC_m1$lam)))

matplot(sseq, beta_curve.true,
        ylab = "coeffcients curve", xlab = "TIME", #main = "TRUE",
        ylim = range(Data$beta[1:(p*df_beta)]),
        type = "l")
abline(a = 0, b = 0, col = "grey", lwd = 2)
title("TRUE", line = 0.5)
text(0, beta_curve.true[1, Non_zero.true], labels = paste(Non_zero.true))

B <- splines::bs(Data$basis.info[,1], df = k_opt, intercept = TRUE)
beta_curve <- B %*% t(beta_C)
matplot(sseq, beta_curve,
        ylab = "coef", xlab = "TIME", #main = "ESTI",
        ylim = range(Data$beta[1:(p*df_beta)])#,
        #type = "l"
)
abline(a = 0, b = 0, col = "grey", lwd = 2)
title("Estimate", line = 0.5)
text(0, beta_curve[1, Non.zero], labels = paste(Non.zero))
text(tail(sseq, 1), beta_curve[dim(beta_curve)[1], Non.zero], labels = paste(Non.zero))
plot(apply(abs(beta_C),1,sum))
text(seq(length(GIC_m1$lam))[which(apply(abs(beta_C),1,sum) > 0)], #tail(log(GIC_m1$lam), 1),
     apply(abs(beta_C),1,sum)[which(apply(abs(beta_C),1,sum) > 0)],
     labels=paste(seq(length(GIC_m1$lam))[which(apply(abs(beta_C),1,sum) > 0)]),
     cex= 1, pos= 4)
Non.zero <- apply(beta_C, 1, function(x)
  ifelse(sqrt(sum(x^2)) > sqrt(sum(beta_C^2))/100, TRUE, FALSE))
Non.zero <- (1:p)[Non.zero]
Non.zero

text(seq(length(GIC_m1$lam))[Non.zero], #tail(log(GIC_m1$lam), 1),
     apply(abs(beta_C_temp),1,sum)[Non.zero],
     labels=paste(seq(length(GIC_m1$lam))[Non.zero]),
     cex= 1, pos= 2, col = "red")

title(paste0("k=", k_opt), line = 0.5)
title(paste0("Method cgl"), outer=TRUE, line = -2)
par(mfrow=c(1,1))

##set a cutoff when you compute nonzeros
Non.zero <- apply(beta_C, 1, function(x)
  ifelse(sqrt(sum(x^2)) > sqrt(sum(beta_C^2))/100, TRUE, FALSE))
Non.zero <- (1:p)[Non.zero]
Non.zero







GIC_curve_temp <- matrix(NA, nrow = length(k_list), ncol = length(GIC_m1$lam))
GIC_min_temp <- vector()
for(k in k_list){
  cat("k = ", k, ",")
  
  #scaler <- log(log(n)) * log(max(p * k, n)) / n #GIC1
  #scaler <- log(max(p * k, n)) / n #GIC2
  #scaler <- log(n) / n # BIC
  
  scaler <- switch(temp$GIC_type,
                   "GIC1" = log(max(p * k, n)),
                   "GIC2" = log(max(p * k, n)) * log(log(n)),
                   "BIC" = log(n),
                   "AIC" = 2) / n
  
  N_zero <- apply(GIC_m1$Funcomp.CGL.fit[[k- k_list[1] + 1]]$beta, 2, 
                  function(x, p, k) {
                  A = matrix(x[1:(p*k)], nrow = p, byrow = TRUE)
                  a = apply(A, 1, function(x) sum(abs(x)))
                  return(length(which(a > 0)))
                  },p = p, k = k)
  
  part2 <- scaler * ifelse(N_zero >= 2, (N_zero - 1), 0)
  #part2 <- scaler * ifelse(N_zero >= 2, ( (N_zero - 1) * k), 0)
  #plot(log(GIC_m1$MSE[k - k_list[1] + 1, ]) + part2)
  id <- which.min( (log(GIC_m1$MSE[k - k_list[1] + 1, ])+part2)[N_zero >= 2] )
  # id 
  # GIC_m1$Funcomp.CGL.fit[[k- k_list[1] + 1]]$beta[, id]
  GIC_curve_temp[k-k_list[1] + 1, ] <- log(GIC_m1$MSE[k - k_list[1] + 1, ]) + part2
  GIC_min_temp[k - k_list[1] + 1] <- GIC_curve_temp[k-k_list[1] + 1, id]
    
}
cat("\r\n")
rm(k)
GIC_min_temp
k_loc_temp <- which.min(GIC_min_temp)
lam_loc_temp <- which(GIC_curve_temp[k_loc_temp, ] == GIC_min_temp[k_loc_temp])
beta_GIC_temp <- GIC_m1$Funcomp.CGL.fit[[k_loc_temp]]$beta[, lam_loc_temp]
k_opt_temp <- k_list[k_loc_temp]



beta_C_temp <- matrix(beta_GIC_temp[1:(p*k_opt_temp)], byrow = TRUE, nrow = p)
cat("colSums:", colSums(beta_C_temp) , "\r\n")
#Non.zero <- which(abs(beta_C[,1]) > 0)
Non.zero_temp <- apply(beta_C_temp, 1, function(x) ifelse(max(abs(x)) >0, TRUE, FALSE))
Non.zero_temp <- (1:p)[Non.zero_temp]
cat("None zero groups:", Non.zero_temp)
#vet(beta, p = p, k = k_opt)

par(mfrow=c(1,4))
plot.args_temp = list(x = seq(length(GIC_m1$lam)), #GIC_m1$lam, #log(GIC_m1$lam),
                 y = GIC_curve_temp[1, ],
                 ylim = range(GIC_curve_temp),
                 xlab= "lambda Index",#"lambda", #"log(lambda)",
                 ylab="GIC",
                 type="n")
do.call("plot",plot.args_temp)
for(i in 1:length(k_list)) {
  points(x = seq(length(GIC_m1$lam)), #log(GIC_m1$lam),
         y = GIC_curve_temp[i, ], col = rainbow(length(k_list))[i], pch = seq(length(k_list))[i])
  text(length(GIC_m1$lam), #tail(log(GIC_m1$lam), 1),
       GIC_curve_temp[i, length(GIC_m1$lam)], labels=paste(k_list[i]),
       cex= 1, pos= 4, col = rainbow(length(k_list))[i])
}
#axis(3, at = pretty(seq(length(GIC_m1$lam))), labels = rev(pretty(GIC_m1$lam)))

matplot(sseq, beta_curve.true,
        ylab = "coeffcients curve", xlab = "TIME", #main = "TRUE",
        ylim = range(Data$beta[1:(p*df_beta)]),
        type = "l")
abline(a = 0, b = 0, col = "grey", lwd = 2)
title("TRUE", line = 0.5)
text(0, beta_curve.true[1, Non_zero.true], labels = paste(Non_zero.true))

B_temp <- splines::bs(Data$basis.info[,1], df = k_opt_temp, intercept = TRUE)
beta_curve_temp <- B_temp %*% t(beta_C_temp)
matplot(sseq, beta_curve_temp,
        ylab = "coef", xlab = "TIME", #main = "ESTI",
        ylim = range(Data$beta[1:(p*df_beta)])#,
        #type = "l"
)
abline(a = 0, b = 0, col = "grey", lwd = 2)
title("Estimate", line = 0.5)
text(0, beta_curve_temp[1, Non.zero], labels = paste(Non.zero))
text(tail(sseq, 1), beta_curve_temp[dim(beta_curve_temp)[1], Non.zero], 
     labels = paste(Non.zero))

plot(apply(abs(beta_C_temp),1,sum))

text(seq(length(GIC_m1$lam))[which(apply(abs(beta_C_temp),1,sum) > 0)], #tail(log(GIC_m1$lam), 1),
     apply(abs(beta_C_temp),1,sum)[which(apply(abs(beta_C_temp),1,sum) > 0)],
     labels=paste(seq(length(GIC_m1$lam))[which(apply(abs(beta_C_temp),1,sum) > 0)]),
     cex= 1, pos= 4)

Non.zero_temp <- apply(beta_C_temp, 1, function(x)
  ifelse(sqrt(sum(x^2)) > sqrt(sum(beta_C_temp^2))/100, TRUE, FALSE))
Non.zero_temp <- (1:p)[Non.zero_temp]
Non.zero_temp

text(seq(length(GIC_m1$lam))[Non.zero_temp], #tail(log(GIC_m1$lam), 1),
     apply(abs(beta_C_temp),1,sum)[Non.zero_temp],
     labels=paste(seq(length(GIC_m1$lam))[Non.zero_temp]),
     cex= 1, pos= 2, col = "red")

title(paste0("k=", k_opt_temp), line = 0.5)
title(paste0("Method cgl"), outer=TRUE, line = -2)

par(mfrow=c(1,1))

##set a cutoff when you compute nonzeros
Non.zero_temp <- apply(beta_C_temp, 1, function(x)
                       ifelse(sqrt(sum(x^2)) > sqrt(sum(beta_C_temp^2))/100, TRUE, FALSE))
Non.zero_temp <- (1:p)[Non.zero_temp]
Non.zero_temp


# cgl_GIC <- list()
# MSE <- temp$MSE[temp$k_opt - k_list[1] + 1, temp$lam_loc]
# R_sqr <- 1 - MSE * length(y) / crossprod(y -  mean(y))
# 
# obj <- FuncompCGL(y = y_test, X = Test$data$Comp, k = k_opt, nlam = 1, outer_maxiter = 0)
# X_test <- cbind2(cbind(obj$Z, Test$data$Zc), 1)
# PE <- sum((y_test - X_test %*% beta_GIC)^2) / length(y_test)
# cgl_GIC$pred_error <- c(MSE = MSE, PE = PE, Rsqr_train = R_sqr)
# 
# cgl_GIC$Non.zero_cut <- Non.zero
# cgl_GIC <- c(cgl_GIC,
#              ERROR_fun(beta_fit = beta_GIC, beta_true = Data$beta,
#                        basis_fit = B, basis_true = Data$basis.info[,-1],
#                        sseq = Data$basis.info[, 1],
#                        m = m, p = p, Nzero_group = length(Non_zero.true), tol = 0),
#              k = k_opt)
# cgl_GIC$coef <- list(beta_C = beta_C, beta_c = tail(beta_GIC, m1))
jiji6454/Rpac_compReg documentation built on May 31, 2019, 5:01 a.m.