Run_simulation/compCL_test/naive/glmnet_str=TRUE.R

devtools::use_build_ignore("Run_simulation")
library("compReg")
library("glmnet")
#simulation 1

#parameter set up
n = 50
p = 30
rho = 0.2
sigma = 0.5
gamma = 0.5
add.on = 1:5
beta = c(1, -0.8, 0.6, 0, 0, -1.5, -0.5, 1.2)
beta = c( beta, rep(0, times = p - length(beta)) )
pos_group = c(1:3, 6:8)
neg_group = (1:p)[-pos_group]
intercept = FALSE
# Comp_data = comp_simulation(n = n, p = p, rho = rho, sigma = sigma, gamma  = gamma, add.on = add.on,
#                             beta = beta, intercept = intercept)
#
#
# m1 <- compCL(y = Comp_data$y, Z = Comp_data$X.comp, Zc = Comp_data$Zc, intercept = Comp_data$intercept,
#              pf = rep(1, times = p),
#              lam = NULL, nlam = 100,lambda.factor = ifelse(n < p, 0.05, 0.001),
#              dfmax = 20,
#              #pfmax = min(dfmax * 1.5, p),
#              #u = u,
#              mu_ratio = 1, tol = 1e-10,
#              outer_maxiter = 1e8, outer_eps = 1e-10,
#              inner_maxiter = 1e3, inner_eps = 1e-6)

nfolds = 10
nlam = 100
trim= 0.05
mu_ratio = 0
tol = 1e-10
dfmax = p

#table set up
rep <- 100
result_minlam <- matrix(NA, nrow = 100, ncol = 6)
colnames(result_minlam) <- c("PE", "L1", "L2", "Linif", "FP", "FN")
result_min1se <- matrix(NA, nrow = 100, ncol = 6)
colnames(result_min1se) <- c("PE", "L1", "L2", "Linif", "FP", "FN")
result_GIC <- matrix(NA, nrow = 100, ncol = 6)
colnames(result_GIC) <- c("PE", "L1", "L2", "Linif", "FP", "FN")
B_minlam <- matrix(NA, nrow = 100, ncol = p + 1)
B_GIC <- matrix(NA, nrow = 100, ncol = p + 1)
B_min1se <- matrix(NA, nrow = 100, ncol = p + 1)

#100 simulation
glmnet.control(fdev = 0, devmax = 100)
a <- log(log(n)) / n * log(max(p, n))
for(l in 1:rep) {
  set.seed(l)
  Comp_data = comp_simulation(n = n, p = p, rho = rho, sigma = sigma, gamma  = gamma, add.on = add.on,
                              beta = beta, intercept = intercept)
  
  
  #cv.compCL
  foldid <- sample(rep(seq(nfolds), length = n))
  comp_cvm <- cv.glmnet(x = log(Comp_data$X.comp),  y = Comp_data$y,
                                  weights = rep(1, times = n), penalty.factor = rep(1, times = p),
                                  alpha = 1,
                                  nlambda = 100, 
                                  lambda.min.ratio = 0.001,
                                  standardize = TRUE,
                                  intercept = FALSE,
                                  #thresh = 1e-20,
                                  type.gaussian = "na",
                                  offset = NULL,
                                  type.measure = "mse", 
                                  #nfolds = 10, 
                                  foldid = foldid,
                                  grouped = FALSE, keep = TRUE
                        #,dfmax = dfmax
  )
  
  
   
  B_minlam[l, ] <- as.vector(coef(comp_cvm, s = "lambda.min"))
  B_min1se[l, ] <- as.vector(coef(comp_cvm, s = "lambda.1se"))
  
  pos_select <- which(abs(B_minlam[l, ]) > 0 )
  FP_select <- pos_select[which( pos_select %in% neg_group )]
  FN_select <- pos_group[which( !(pos_group %in% pos_select) )]
  result_minlam[l, 5] <- length(FP_select)
  result_minlam[l, 6] <- length(FN_select)
  result_minlam[l, 1] <- sum( (Comp_data$y - cbind2(1, log(Comp_data$X.comp)) %*% B_minlam[l, ])^2) / n
  
  diff <- B_minlam[l, 1:p] - beta
  result_minlam[l, 2] <- sum(abs(diff))
  result_minlam[l, 3] <- sqrt(sum(diff^2))
  result_minlam[l, 4] <- max(abs(diff))
  
  
  #B_min1se[l, ] <- coef(comp_cvm, trim = FALSE, s = "lam.1se")$beta
  
  pos_select <- which(abs(B_min1se[l, ]) > 0 )
  FP_select <- pos_select[which( pos_select %in% neg_group )]
  FN_select <- pos_group[which( !(pos_group %in% pos_select) )]
  result_min1se[l, 5] <- length(FP_select)
  result_min1se[l, 6] <- length(FN_select)
  result_min1se[l, 1] <- sum( (Comp_data$y - cbind2(1, log(Comp_data$X.comp)) %*% B_min1se[l, ])^2) / n
  
  diff <- B_min1se[l, 1:p] - beta
  result_min1se[l, 2] <- sum(abs(diff))
  result_min1se[l, 3] <- sqrt(sum(diff^2))
  result_min1se[l, 4] <- max(abs(diff))
  
  fit <- predict(comp_cvm$glmnet.fit, newx = log(Comp_data$X.comp))
  GIC <- vector()
  for(j in 1:dim(fit)[2]) {
    SSE <- sum((Comp_data$y - fit[j])^2) / n
    GIC[j] <- log(SSE)
    GIC[j] <- GIC[j] + length(which(abs(comp_cvm$glmnet.fit$beta[, j]) > 0)) * a
  }
  
  id <- which.min(GIC)
  cat("id", id, "\r\n")
  B_GIC[l, ] <- c(0, as.vector(comp_cvm$glmnet.fit$beta[, id]))
  
  pos_select <- which(abs(B_GIC[l, ]) > 0 )
  FP_select <- pos_select[which( pos_select %in% neg_group )]
  FN_select <- pos_group[which( !(pos_group %in% pos_select) )]
  result_GIC[l, 5] <- length(FP_select)
  result_GIC[l, 6] <- length(FN_select)
  result_GIC[l, 1] <- sum( (Comp_data$y - cbind2(1, log(Comp_data$X.comp)) %*%  B_GIC[l, ])^2) / n
  
  diff <-  B_GIC[l, 1:p] - beta
  result_GIC[l, 2] <- sum(abs(diff))
  result_GIC[l, 3] <- sqrt(sum(diff^2))
  result_GIC[l, 4] <- max(abs(diff))
}


#formulate tables
table_GIC <- matrix(NA, nrow = 2, ncol = 6)
table_minlam <- matrix(NA, nrow = 2, ncol = 6)
table_min1se <- matrix(NA, nrow = 2, ncol = 6)
colnames(table_GIC) <- c("MSE", "L1", "L2", "Linif", "FP", "FN")
colnames(table_minlam) <- c("MSE", "L1", "L2", "Linif", "FP", "FN")
colnames(table_min1se) <- c("MSE", "L1", "L2", "Linif", "FP", "FN")
rownames(table_GIC) <- c("MEAN", "SD")
rownames(table_minlam) <- c("MEAN", "SD")
rownames(table_min1se) <- c("MEAN", "SD")

table_GIC[1, ] <- apply(result_GIC, 2, mean)
table_GIC[2, ] <- apply(result_GIC, 2, sd)
table_minlam[1, ] <- apply(result_minlam, 2, mean)
table_minlam[2, ] <- apply(result_minlam, 2, sd)
table_min1se[1, ] <- apply(result_min1se, 2, mean)
table_min1se[2, ] <- apply(result_min1se, 2, sd)


PE <- matrix(NA, nrow = 100, ncol = 3)
for(l in 1:rep) {
  set.seed(l + 100)
  Comp_data = comp_simulation(n = n, p = p, rho = rho, sigma = sigma, gamma  = gamma, add.on = add.on,
                              beta = beta, intercept = intercept)
  
  PE[l, 1] <- sum((Comp_data$y - log(Comp_data$X.com) %*% B_minlam[l, -1])^2)/n
  PE[l, 2] <- sum((Comp_data$y - log(Comp_data$X.com) %*% B_min1se[l, -1])^2)/n
  PE[l, 3] <- sum((Comp_data$y - log(Comp_data$X.com) %*% B_GIC[l, -1])^2)/n
}


table_GIC <- cbind(table_GIC, c(MEAN = mean(PE[, 3]), SD = sd(PE[, 3])))
table_minlam <- cbind(table_minlam, c(MEAN = mean(PE[, 1]), SD = sd(PE[, 1])))
table_min1se <- cbind(table_min1se, c(MEAN = mean(PE[, 2]), SD = sd(PE[, 2])))

colnames(table_GIC)[7] <- "PE"
colnames(table_minlam)[7] <- "PE"
colnames(table_min1se)[7] <- "PE"

result <- list(minlam = table_minlam,
               min1se = table_min1se, 
               GIC = table_GIC)
save(result, file = "Run_simulation/compCL_test/naive/Lin_setting1-1_glmnet_str=TRUE.Rda")
jiji6454/Rpac_compReg documentation built on May 31, 2019, 5:01 a.m.