Run_simulation/RUN_CV/run_cv.R

df_beta = 5
p = 30
beta_C_true = matrix(0, nrow = p, ncol = df_beta)
beta_C_true[3, ] <- c(-1, 0, 0, 0, -0.5)
beta_C_true[1, ] <- c(1, 0, 1 , 0, -0.5)
beta_C_true[2, ] <- c(0, 0,  -1,  0,  1)

nfolds = 10
k_list <- c(4,5)
n_train = 100
n_test = 500

Data <- Model2(n = 100, p = p, m = 0, intercept = TRUE,
               SNR = 2, sigma = 2,
               rho_X = 0, rho_W = 0.5,
               Corr_X = "CorrCS", Corr_W = "CorrAR",
               df_W = 5, df_beta = df_beta,
               ns = 20, obs_spar = 1, theta.add = FALSE, #c(0,0,0),
               beta_C = as.vector(t(beta_C_true)))
y <- drop(Data$data$y)
n <- length(y)
X <- Data$data$Comp
Zc <- Data$data$Zc
intercept <- Data$data$intercept
m <- ifelse(is.null(Zc), 0, dim(Zc)[2]) #+ as.integer(intercept)
m1 <- m + as.integer(intercept)
sseq <- Data$basis.info[,1]
beta_C.true <- matrix(Data$beta[1:(p*(df_beta))],
                      nrow = p, ncol = df_beta, byrow = TRUE)
beta_curve.true <- Data$basis.info[,-1] %*% t(beta_C.true)
Non_zero.true <- (1:p)[apply(beta_C.true, 1, function(x) max(abs(x)) > 0)]
foldid <- sample(rep(seq(nfolds), length = n))

arg_list <- as.list(Data$call)[-1]
arg_list$n <- n_test
Test <- do.call(Model2, arg_list)
y_test <- drop(Test$data$y)

# y = y
# X = X
# Zc = Zc
# intercept = intercept
# W = rep(1, p)
# k = k_list
# nfolds = 10
# trim = 0
# tol = 0
# inner_eps = 1e-6
# inner_maxiter = 1E3
# dfmax = 20
# lambda.factor = 1e-20
# mu_ratio = 1
# outer_eps = 1e-6
# keep = TRUE
# Trange = c(0,1)


rule <- "lam.min"
# cv_cgl, Constrained group lasso
cv_cgl <-  cv.FuncompCGL(y = y, X = X, Zc = Zc, intercept = intercept,
                         W = rep(1, p), #W = function(x){ diag( 1 / apply(x, 2, sd) ) },
                         k = k_list, trim = 0,
                         foldid = foldid, #nfolds = 10,
                         tol = 0, inner_eps = 1e-6, inner_maxiter = 1E3,
                         dfmax = 30, lambda.factor = 1e-3,
                         mu_ratio = 1, outer_eps = 1e-6,
                         keep = TRUE, Trange = c(0,1)
                         #lam = c(exp(seq(log(0.01),log(0.00001),length=100)),0)
)
plot(cv_cgl,k_list = k_list)
cv_cgl$Ftrim[c("lam.min", "lam.1se")]
beta <-  coef(cv_cgl, trim = FALSE, s = rule)
k_opt <- (length(drop(beta)) - (m + 1)) / p
#cv_cgl$Funcomp.CGL.fit[[as.character(k_opt)]]


par(mfrow=c(1,4))
plot(cv_cgl,k_list = k_list)
plot(cv_cgl$Funcomp.CGL.fit[[as.character(k_opt)]],
     p = p, k = k_opt, ylab = "L2")
plot(cv_cgl$Ftrim$cvm[k_opt - k_list[1] + 1, ])
title(paste0("k=", k_opt), line = 0.5)
# apply(cv_cgl$Funcomp.CGL.fit[[k_opt - 3]]$beta, 2,
#       function(x, p, k) {
#         #which(abs(x[seq(1, (p-1)*k+1, by = k)])>0),
#         (1:p)[apply(matrix(x[1:(p*k)], byrow = TRUE, nrow = p), 1,
#                     function(x) max(abs(x)) > 0)]
#       },p = p , k = k_opt)
if(k_opt == df_beta) {
  plot(Data$beta, col = "red", pch = 19,
       ylim = range(c(range(Data$beta), range(beta)))) #range(Data$beta))
  abline(v= seq(from = 0, to = (p*df_beta), by = df_beta ))
  abline(h = 0)
  points(beta)
  if(m1 > 0) points(p*df_beta + 1:m1, tail(Data$beta, m1),
                    col = "blue", pch = 19)
} else {
  plot(beta, ylim = range(c(range(Data$beta), range(beta))) )
  abline(v= seq(from = 0, to = (p*k_opt), by = k_opt ))
  abline(h = 0, col = "red")
  if(m1 > 0) points(p*k_opt + 1:m1, tail(Data$beta, m1),
                    col = "blue", pch = 19)
}
title(paste0("Method cgl"), outer=TRUE, line = -2)
par(mfrow=c(1,1))

beta_C <- matrix(beta[1:(p*k_opt)], byrow = TRUE, nrow = p)
cat("colSums:", colSums(beta_C))
#Non.zero <- which(abs(beta_C[,1]) > 0)
Non.zero <- apply(beta_C, 1, function(x) ifelse(max(abs(x)) >0, TRUE, FALSE))
Non.zero <- (1:p)[Non.zero]
cat("None zero groups:", Non.zero)
#vet(beta, p = p, k = k_opt)

par(mfrow=c(1,4))
plot(cv_cgl) #plot(cv_cgl,k_list = k_list)
matplot(sseq, beta_curve.true,
        ylab = "coeffcients curve", xlab = "TIME", #main = "TRUE",
        ylim = range(Data$beta[1:(p*df_beta)]),
        type = "l")
abline(a = 0, b = 0, col = "grey", lwd = 2)
title("TRUE", line = 0.5)
text(0, beta_curve.true[1, Non_zero.true], labels = paste(Non_zero.true))

B <- splines::bs(Data$basis.info[,1], df = k_opt, intercept = TRUE)
beta_curve <- B %*% t(beta_C)
matplot(sseq, beta_curve,
        ylab = "coef", xlab = "TIME", #main = "ESTI",
        ylim = range(Data$beta[1:(p*df_beta)])#,
        #type = "l"
)
abline(a = 0, b = 0, col = "grey", lwd = 2)
title("Estimate", line = 0.5)
text(0, beta_curve[1, Non.zero], labels = paste(Non.zero))
text(tail(sseq, 1), beta_curve[dim(beta_curve)[1], Non.zero], labels = paste(Non.zero))
plot(apply(abs(beta_C),1,sum))
title(paste0("k=", k_opt), line = 0.5)
title(paste0("Method cgl"), outer=TRUE, line = -2)
par(mfrow=c(1,1))
##set a cutoff when you compute nonzeros
Non.zero <- apply(beta_C, 1, function(x)
  ifelse(sqrt(sum(x^2)) > sqrt(sum(beta_C^2))/100, TRUE, FALSE))
Non.zero <- (1:p)[Non.zero]
Non.zero
Non.zero <- apply(beta_C, 1, function(x)
  ifelse(sum(x^2) > sum(beta_C^2)/100, TRUE, FALSE))
Non.zero <- (1:p)[Non.zero]
Non.zero
## cut by curve
Curve_L2 <- colSums(beta_curve^2)
Curve_L2 <- Curve_L2 - colSums(beta_curve[c(1, nrow(Data$basis.info)), ]^2) / 2
Curve_L2 <- Curve_L2 * (Data$basis.info[2,1] - Data$basis.info[1,1])
plot(Curve_L2)
which(sqrt(Curve_L2) > sqrt(sum(Curve_L2)) / 100)

cgl <- list()
# MSE <- crossprod(y -  cbind2(cbind(cv_cgl$Funcomp.CGL.fit[[k_opt - k_list[1]+ 1]]$Z,
# Zc), 1) %*% beta) / length(y)
X_train <- cbind2(cbind(cv_cgl$Funcomp.CGL.fit[[k_opt - k_list[1]+ 1]]$Z, Zc), 1)
MSE <- sum((y -  X_train %*% beta)^2) / length(y)
# R_sqr <- 1 - crossprod(y -  cbind2(cbind(cv_cgl$Funcomp.CGL.fit[[k_opt - k_list[1]+ 1]]$Z,
# Zc), 1) %*% beta) / crossprod(y -  mean(y))
R_sqr <- sum((y -  X_train %*% beta)^2)
R_sqr <- 1 - R_sqr / crossprod(y -  mean(y))

obj <- FuncompCGL(y = y_test, X = Test$data$Comp, k = k_opt, nlam = 1, outer_maxiter = 0)
# PE <- sum((Test$data$y - cbind2(cbind(obj$Z, Test$data$Zc), 1) %*% beta)^2)
# / length(drop(Test$data$y))
X_test <- cbind2(cbind(obj$Z, Test$data$Zc), 1)
PE <- sum((y_test - X_test %*% beta)^2) / length(y_test)
cgl$pred_error <- c(MSE = MSE, PE = PE, Rsqr_train = R_sqr)

cgl$Non.zero_cut <- Non.zero
cgl <- c(cgl,
         ERROR_fun(beta_fit = beta, beta_true = Data$beta,
                   basis_fit = B, basis_true = Data$basis.info[,-1],
                   sseq = Data$basis.info[, 1],
                   m = m, p = p, Nzero_group = length(Non_zero.true), tol = 0),
         k = k_opt)
cgl$coef <- list(beta_C = beta_C, beta_c = tail(beta, m1))

## Not run: 
# naive model
# set mu_raio = 0 to identifying without linear constraints,
# no outer_loop for Lagrange augmented multiplier
# mu_ratio = 0
cv_naive <-  cv.FuncompCGL(y = y, X = X, Zc = Zc, intercept = intercept,
                           W = rep(1, p), #W = function(x){ diag( 1 / apply(x, 2, sd) ) },
                           k = k_list, nfolds = 10, trim = 0,
                           tol = 0, inner_eps = 1e-6, inner_maxiter = 1E3,
                           dfmax = 30, lambda.factor = 1e-3,
                           mu_ratio = 0, outer_eps = 1e-6,
                           keep = FALSE, Trange = c(0,1)
                           #lam = c(exp(seq(log(0.01),log(0.00001),length=100)),0)
)

plot(cv_naive,k_list = k_list)
cv_naive$Ftrim[c("lam.min", "lam.1se")]
beta <-  coef(cv_naive, trim = FALSE, s = rule)
k_opt <- (length(drop(beta)) - (m + 1)) / p
#cv_naive$Funcomp.CGL.fit[[as.character(k_opt)]]


par(mfrow=c(1,4))
plot(cv_naive,k_list = k_list)
plot(cv_naive$Funcomp.CGL.fit[[as.character(k_opt)]],
     p = p, k = k_opt, ylab = "L2")
plot(cv_naive$Ftrim$cvm[k_opt - k_list[1] + 1, ])
title(paste0("k=", k_opt), line = 0.5)
# apply(cv_naive$Funcomp.CGL.fit[[k_opt - 3]]$beta, 2,
#       function(x, p, k) {
#         #which(abs(x[seq(1, (p-1)*k+1, by = k)])>0),
#         (1:p)[apply(matrix(x[1:(p*k)], byrow = TRUE, nrow = p), 1,
#                     function(x) max(abs(x)) > 0)]
#       },p = p , k = k_opt)
if(k_opt == df_beta) {
  plot(Data$beta, col = "red", pch = 19,
       ylim = range(c(range(Data$beta), range(beta)))) #range(Data$beta))
  abline(v= seq(from = 0, to = (p*df_beta), by = df_beta ))
  abline(h = 0)
  points(beta)
  if(m1 > 0) points(p*df_beta + 1:m1, tail(Data$beta, m1),
                    col = "blue", pch = 19)
} else {
  plot(beta, ylim = range(c(range(Data$beta), range(beta))) )
  abline(v= seq(from = 0, to = (p*k_opt), by = k_opt ))
  abline(h = 0, col = "red")
  if(m1 > 0) points(p*k_opt + 1:m1, tail(Data$beta, m1),
                    col = "blue", pch = 19)
}
title(paste0("Method naive"), outer=TRUE, line = -2)
par(mfrow=c(1,1))

beta_C <- matrix(beta[1:(p*k_opt)], byrow = TRUE, nrow = p)
cat("colSums:", colSums(beta_C))
#Non.zero <- which(abs(beta_C[,1]) > 0)
Non.zero <- apply(beta_C, 1, function(x) ifelse(max(abs(x)) >0, TRUE, FALSE))
Non.zero <- (1:p)[Non.zero]
cat("None zero groups:", Non.zero)
#vet(beta, p = p, k = k_opt)

par(mfrow=c(1,4))
plot(cv_naive) #plot(cv_naive,k_list = k_list)
matplot(sseq, beta_curve.true,
        ylab = "coeffcients curve", xlab = "TIME", #main = "TRUE",
        ylim = range(Data$beta[1:(p*df_beta)]),
        type = "l")
abline(a = 0, b = 0, col = "grey", lwd = 2)
title("TRUE", line = 0.5)
text(0, beta_curve.true[1, Non_zero.true], labels = paste(Non_zero.true))

B <- splines::bs(Data$basis.info[,1], df = k_opt, intercept = TRUE)
beta_curve <- B %*% t(beta_C)
matplot(sseq, beta_curve,
        ylab = "coef", xlab = "TIME", #main = "ESTI",
        ylim = range(Data$beta[1:(p*df_beta)])#,
        #type = "l"
)
abline(a = 0, b = 0, col = "grey", lwd = 2)
title("Estimate", line = 0.5)
text(0, beta_curve[1, Non.zero], labels = paste(Non.zero))
text(tail(sseq, 1), beta_curve[dim(beta_curve)[1], Non.zero], labels = paste(Non.zero))
plot(apply(abs(beta_C),1,sum))
title(paste0("k=", k_opt), line = 0.5)
title(paste0("Method naive"), outer=TRUE, line = -2)
par(mfrow=c(1,1))
##set a cutoff when you compute nonzeros
Non.zero <- apply(beta_C, 1, function(x)
  ifelse(sqrt(sum(x^2)) > sqrt(sum(beta_C^2))/100, TRUE, FALSE))
Non.zero <- (1:p)[Non.zero]
Non.zero


naive <- list()
# MSE <- crossprod(y -  cbind2(cbind(cv_naive$Funcomp.CGL.fit[[k_opt - k_list[1]+ 1]]$Z,
# Zc), 1) %*% beta)
X_train <- cbind2(cbind(cv_naive$Funcomp.CGL.fit[[k_opt - k_list[1]+ 1]]$Z, Zc), 1)
MSE <- sum((y -  X_train %*% beta)^2)/ length(y)
# R_sqr <- 1 - crossprod(y -  cbind2(cbind(cv_naive$Funcomp.CGL.fit[[k_opt - k_list[1]+ 1]]$Z,
# Zc), 1) %*% beta) / crossprod(y -  mean(y))
R_sqr <- sum((y -  X_train %*% beta)^2)
R_sqr <- 1 - R_sqr / crossprod(y -  mean(y))

obj <- FuncompCGL(y = Test$data$y, X = Test$data$Comp, k = k_opt, nlam = 1, outer_maxiter = 0)
# PE <- sum((Test$data$y - cbind2(cbind(obj$Z, Test$data$Zc), 1) %*% beta)^2)
# / length(drop(Test$data$y))
X_test <- cbind2(cbind(obj$Z, Test$data$Zc), 1)
PE <- sum((y_test - X_test %*% beta)^2) / length(y_test)
naive$pred_error <- c(MSE = MSE, PE = PE, Rsqr_train = R_sqr)
naive$Non.zero_cut <- Non.zero
naive <- c(naive,
           ERROR_fun(beta_fit = beta, beta_true = Data$beta,
                     basis_fit = B, basis_true = Data$basis.info[,-1],
                     sseq = Data$basis.info[, 1],
                     m = m, p = p, Nzero_group = length(Non_zero.true), tol = 0),
           k = k_opt)
naive$coef <- list(beta_C = beta_C, beta_c = tail(beta, m1))



# log contract model
# set reference variable and once ref is set to a scalar in range,
# mu_ratio is set to 0 automatically
# ref = sample(1:p, 1)
cv_base <- cv.FuncompCGL(y = y, X = X, Zc = Zc, intercept = intercept, ref = sample(1:p, 1),
                         W = rep(1, p - 1), #W = function(x){ diag( 1 / apply(x, 2, sd) ) },
                         k = k_list, nfolds = 10, trim = 0,
                         tol = 0, inner_eps = 1e-6, inner_maxiter = 1E3,
                         dfmax = 30, lambda.factor = 1e-3,
                         mu_ratio = 0, outer_eps = 1e-6,
                         keep = FALSE, Trange = c(0,1)
                         #,lam = c(exp(seq(log(0.01),log(0.00001),length=100)),0)
)

plot(cv_base,k_list = k_list)
cv_base$Ftrim[c("lam.min", "lam.1se")]
beta <-  coef(cv_base, trim = FALSE, s = rule)
k_opt <- (length(drop(beta)) - (m + 1)) / p
#cv_base$Funcomp.CGL.fit[[as.character(k_opt)]]


par(mfrow=c(1,4))
plot(cv_base,k_list = k_list)
plot(cv_base$Funcomp.CGL.fit[[as.character(k_opt)]],
     p = p, k = k_opt, ylab = "L2")
plot(cv_base$Ftrim$cvm[k_opt - k_list[1] + 1, ])
title(paste0("k=", k_opt), line = 0.5)
# apply(cv_base$Funcomp.CGL.fit[[k_opt - 3]]$beta, 2,
#       function(x, p, k) {
#         #which(abs(x[seq(1, (p-1)*k+1, by = k)])>0),
#         (1:p)[apply(matrix(x[1:(p*k)], byrow = TRUE, nrow = p), 1,
#                     function(x) max(abs(x)) > 0)]
#       },p = p , k = k_opt)
if(k_opt == df_beta) {
  plot(Data$beta, col = "red", pch = 19,
       ylim = range(c(range(Data$beta), range(beta)))) #range(Data$beta))
  abline(v= seq(from = 0, to = (p*df_beta), by = df_beta ))
  abline(h = 0)
  points(beta)
  if(m1 > 0) points(p*df_beta + 1:m1, tail(Data$beta, m1),
                    col = "blue", pch = 19)
} else {
  plot(beta, ylim = range(c(range(Data$beta), range(beta))) )
  abline(v= seq(from = 0, to = (p*k_opt), by = k_opt ))
  abline(h = 0, col = "red")
  if(m1 > 0) points(p*k_opt + 1:m1, tail(Data$beta, m1),
                    col = "blue", pch = 19)
}
title(paste0("Method baseline=", cv_base$Funcomp.CGL.fit[[1]]$ref), outer=TRUE, line = -2)
par(mfrow=c(1,1))

beta_C <- matrix(beta[1:(p*k_opt)], byrow = TRUE, nrow = p)
cat("colSums:", colSums(beta_C))
#Non.zero <- which(abs(beta_C[,1]) > 0)
Non.zero <- apply(beta_C, 1, function(x) ifelse(max(abs(x)) >0, TRUE, FALSE))
Non.zero <- (1:p)[Non.zero]
cat("None zero groups:", Non.zero)
#vet(beta, p = p, k = k_opt)

par(mfrow=c(1,4))
plot(cv_base) #plot(cv_base,k_list = k_list)
matplot(sseq, beta_curve.true,
        ylab = "coeffcients curve", xlab = "TIME", #main = "TRUE",
        ylim = range(Data$beta[1:(p*df_beta)]),
        type = "l")
abline(a = 0, b = 0, col = "grey", lwd = 2)
title("TRUE", line = 0.5)
text(0, beta_curve.true[1, Non_zero.true], labels = paste(Non_zero.true))

B <- splines::bs(Data$basis.info[,1], df = k_opt, intercept = TRUE)
beta_curve <- B %*% t(beta_C)
matplot(sseq, beta_curve,
        ylab = "coef", xlab = "TIME", #main = "ESTI",
        ylim = range(Data$beta[1:(p*df_beta)])#,
        #type = "l"
)
abline(a = 0, b = 0, col = "grey", lwd = 2)
title("Estimate", line = 0.5)
text(0, beta_curve[1, Non.zero], labels = paste(Non.zero))
text(tail(sseq, 1), beta_curve[dim(beta_curve)[1], Non.zero], labels = paste(Non.zero))
plot(apply(abs(beta_C),1,sum))
title(paste0("k=", k_opt), line = 0.5)
title(paste0("Method baseline=", cv_base$Funcomp.CGL.fit[[1]]$ref), outer=TRUE, line = -2)
par(mfrow=c(1,1))
##set a cutoff when you compute nonzeros
Non.zero <- apply(beta_C, 1, function(x)
  ifelse(sqrt(sum(x^2)) > sqrt(sum(beta_C^2))/100, TRUE, FALSE))
Non.zero <- (1:p)[Non.zero]
Non.zero


base <- list()
# MSE <- crossprod(y -  cbind2(cbind(cv_cgl$Funcomp.CGL.fit[[k_opt - k_list[1]+ 1]]$Z,
# Zc), 1) %*% beta) / length(y)
X_train <- cbind2(cbind(cv_cgl$Funcomp.CGL.fit[[k_opt - k_list[1]+ 1]]$Z, Zc), 1)
MSE <- crossprod(y -  X_train %*% beta) / length(y)
# R_sqr <- 1 - crossprod(y -  cbind2(cbind(cv_cgl$Funcomp.CGL.fit[[k_opt - k_list[1]+ 1]]$Z,
# Zc), 1) %*% beta) / crossprod(y -  mean(y))
R_sqr <- crossprod(y -  X_train %*% beta)
R_sqr <- 1 - R_sqr / crossprod(y -  mean(y))

obj <- FuncompCGL(y = Test$data$y, X = Test$data$Comp, k = k_opt, nlam = 1, outer_maxiter = 0)
# PE <- sum((Test$data$y - cbind2(cbind(obj$Z, Test$data$Zc), 1) %*% beta)^2)
# / length(drop(Test$data$y))
X_test <- cbind2(cbind(obj$Z, Test$data$Zc), 1)
PE <- sum((y_test - X_test %*% beta)^2) / length(y_test)
base$pred_error <- c(MSE = MSE, PE = PE, Rsqr_train = R_sqr)

base$Non.zero_cut <- Non.zero
base <- c(base,
          ERROR_fun(beta_fit = beta, beta_true = Data$beta,
                    basis_fit = B, basis_true = Data$basis.info[,-1],
                    sseq = Data$basis.info[, 1],
                    m = m, p = p, Nzero_group = length(Non_zero.true), tol = 0),
          k = k_opt)
base$coef <- list(beta_C = beta_C, beta_c = tail(beta, m1))


## End(Not run)
jiji6454/Rpac_compReg documentation built on May 31, 2019, 5:01 a.m.