#!/home/statsadmin/R/bin/Rscript
#####package load ######
library(methods)
library(stats)
library(MASS)
library(splines)
library(plyr)
library(dplyr)
library(Rcpp)
library(RcppArmadillo)
library(gglasso)
library(compReg)
library(fda)
library(orthogonalsplinebasis)
# library("compReg", lib.loc = "/home/zsun/Rlibs")
# library("fda", lib.loc = "/home/zsun/Rlibs")
# library("orthogonalsplinebasis", lib.loc = "/home/zsun/Rlibs")
source("ax.R")
W_fun <- function(x){
x <- apply(x, 2, sd)
x <- 1/x
x <- diag(x)
return(x)
}
#####package load ######
###### parameter read ######
args <- commandArgs(trailingOnly = TRUE)
seed <- as.numeric(args[1]) + 1
###### parameter read ######
######Parameter for data generate ####
#####Parameter for cross validation #####
nfolds <- 10
k_list <- 4:6
a <- k_list[1]-1
n = 50
p = 30
intercept = TRUE
m = 0
SNR = 1
sigma = 2
obs_spar = 0.6
df_beta = 5
ns = 100
theta.add = c(1,2,5,6)
gamma = 0.5
beta_C_true = matrix(0, nrow = p, ncol = df_beta)
beta_C_true[3, ] <- c(-0.8, -0.8 , 0.4 , 1 , 1)
beta_C_true[4, ] <- c(0.5, 0.5, -0.6 ,-0.6, -0.6)
beta_C_true[1, ] <- c(-0.5, -0.5, -0.5 , -1, -1)
beta_C_true[2, ] <- c(0.8, 0.8, 0.7, 0.6, 0.6)
beta_0_true <- 1
rho_W = 0
rho_X = 0
df_W = 5
ns = 100
#####Parameter for cross validation #####
#####Parameter for regression #####
dfmax <- floor(p * 2/3)
tol <- 1e-6
nlam <- 100
lambda.factor <- ifelse(n < p * k_list[1], 0.01, 0.001)
pfmax = min(dfmax * 1.5, p)
trim = 0.05
inner_eps = 1e-6
inner_maxiter = 1e+3
outer_eps = 1e-10
outer_maxiter = 1e+6
mu_ratio = 1
#####Parameter for regression #####
#####parameter variable generate ######
#set.seed(seed)
rule1 = "lam.1se"
rule2 = "lam.min"
data <- Model(n = n, p = p, intercept = intercept, m = m,
ns = ns, obs_spar = obs_spar,
SNR = SNR, sigma = sigma,
theta.add = theta.add, gamma = gamma,
df_W = df_W,
range_beta_c = beta_0_true,
rho_X = rho_X, rho_W = rho_W,
beta_C = as.vector(t(beta_C_true)))
data2 <- Model(n = n, p = p, intercept = intercept, m = m,
ns = ns, obs_spar = obs_spar,
SNR = SNR, sigma = sigma,
theta.add = theta.add, gamma = gamma,
df_W = df_W,
range_beta_c = beta_0_true,
rho_X = rho_X, rho_W = rho_W,
beta_C = as.vector(t(beta_C_true)))
foldid <- sample(rep(seq(nfolds), length = n))
ref <- sample(p, 1)
sseq_complete <- seq(from = 0, to = 1, length.out = ns)
basis_true <- Genbasis(sseq = sseq_complete, df = df_beta, degree = 3, type = "bs")
naive <- list()
bl <- list()
object <- as.list(1:length(k_list))
#####parameter variable generate ######
#####clg #####
cat("cgl method \r\n")
cgl <- list()
cvm2.6 <- cv.FuncompCGL(y = data$data$y #/ sd(data$data$y)
, X =data$data$Comp #data$data.raw$Z_ITG #data$data$Comp#data$data.raw$Z_t.full #data$data.raw$Z_ITG#
, Zc = data$data$Zc
, intercept = intercept,
W = W_fun,
#W = rep(1, times = p),
foldid = foldid, #nfolds = nfolds,
trim = trim,
dfmax = dfmax, pfmax = pfmax,
lambda.factor = lambda.factor, nlam = nlam,
k = k_list, tol = tol, basis_fun = "bs",
outer_eps = outer_eps , outer_maxiter = outer_maxiter,
inner_maxiter = inner_maxiter, inner_eps = inner_eps
,mu_ratio = mu_ratio
)
beta1 <- coef(cvm2.6, s = rule1)
m <- ifelse(is.null(m), 0, m)
k_opt <- (length(drop(beta1)) - (m + 1)) / p
beta_C <- vet(beta1, p = p, k = k_opt)$C
if(max(abs(colSums(beta_C)) > 1e-8)) cat("Column sum of coef matrix is nonn zero \r\n")
beta_c <- vet(beta1, p = p, k = k_opt)$b
basis_fit <- Genbasis(sseq = sseq_complete, df = k_opt, degree = 3, type = "bs")
MSE <- crossprod(data$data$y - cbind2(cbind(cvm2.6$Funcomp.CGL.fit[[k_opt - a]]$Z, data$data$Zc), 1) %*% beta1) / n
m2.6 <- FuncompCGL(y = data2$data$y, X = data2$data$Comp, Zc = data2$data$Zc, intercept = intercept, k = k_opt,
dfmax = floor(p/3*2), lambda.factor = 0.001, mu_ratio = 1, W = W_fun, nlam = 1)
PE <- sum((data2$data$y - cbind2(cbind(m2.6$Z, data2$data$Zc), 1) %*% beta1)^2) / length(drop(data2$data$y))
cgl$pred_error <- c(MSE = MSE, PE = PE)
cgl <- c(cgl,
ERROR_fun(beta_fit = beta1, beta_true = data$beta,
basis_fit = basis_fit, basis_true = basis_true,
sseq = sseq_complete,
m = m, p = p, Nzero_group = 4),
k = k_opt)
cgl$coef <- list(beta_C = beta_C, beta_c = beta_c)
matplot(sseq_complete,
basis_fit %*% t(matrix(beta1[1:(p*k_opt)], nrow = p, ncol = k_opt, byrow = TRUE))
,ylab = "coef", xlab = "TIME", main = "ESTI" #ylim = range(data$beta[1:(p*df_beta)])
)
cgl.1se <- cgl
cgl <- list()
beta1 <- coef(cvm2.6, s = rule2)
m <- ifelse(is.null(m), 0, m)
k_opt <- (length(drop(beta1)) - (m + 1)) / p
beta_C <- vet(beta1, p = p, k = k_opt)$C
if(max(abs(colSums(beta_C)) > 1e-8)) cat("Column sum of coef matrix is nonn zero \r\n")
beta_c <- vet(beta1, p = p, k = k_opt)$b
basis_fit <- Genbasis(sseq = sseq_complete, df = k_opt, degree = 3, type = "bs")
MSE <- crossprod(data$data$y - cbind2(cbind(cvm2.6$Funcomp.CGL.fit[[k_opt - a]]$Z, data$data$Zc), 1) %*% beta1) / n
m2.6 <- FuncompCGL(y = data2$data$y, X = data2$data$Comp, Zc = data2$data$Zc, intercept = intercept, k = k_opt,
dfmax = floor(p/3*2), lambda.factor = 0.001, mu_ratio = 1, W = W_fun, nlam = 1)
PE <- sum((data2$data$y - cbind2(cbind(m2.6$Z, data2$data$Zc), 1) %*% beta1)^2) / length(drop(data2$data$y))
cgl$pred_error <- c(MSE = MSE, PE = PE)
cgl <- c(cgl,
ERROR_fun(beta_fit = beta1, beta_true = data$beta,
basis_fit = basis_fit, basis_true = basis_true,
sseq = sseq_complete,
m = m, p = p, Nzero_group = 4),
k = k_opt)
cgl$coef <- list(beta_C = beta_C, beta_c = beta_c)
matplot(sseq_complete,
basis_fit %*% t(matrix(beta1[1:(p*k_opt)], nrow = p, ncol = k_opt, byrow = TRUE))
,ylab = "coef", xlab = "TIME", main = "ESTI" #ylim = range(data$beta[1:(p*df_beta)])
)
cgl.min <- cgl
#####clg #####
#####naive #####
cat("naive method \r\n")
naive <- list()
cvm2.7 <- cv.FuncompCGL(y = data$data$y #/ sd(data$data$y)
, X =data$data$Comp #data$data.raw$Z_ITG #data$data$Comp#data$data.raw$Z_t.full #data$data.raw$Z_ITG#
, Zc = data$data$Zc, intercept = intercept,
W = W_fun,
#W = rep(1, times = p),
foldid = foldid, #nfolds = nfolds,
trim = trim,
dfmax = dfmax, pfmax = pfmax,
lambda.factor = lambda.factor, nlam = nlam,
k = k_list,
tol = tol, basis_fun = "bs",
outer_maxiter = as.integer(1),
inner_maxiter = outer_maxiter * inner_maxiter, inner_eps = outer_eps,
mu_ratio = 0
)
beta2 <- coef(cvm2.7, s = rule1)
#m <- ifelse(is.null(m), 0, m)
k_opt <- (length(drop(beta2)) - (m + 1)) / p
beta_C <- vet(beta2, p = p, k = k_opt)$C
#if(max(abs(colSums(beta_C)) > 1e-8)) cat("Column sum of coef matrix is nonn zero \r\n")
beta_c <- vet(beta2, p = p, k = k_opt)$b
basis_fit <- Genbasis(sseq = sseq_complete, df = k_opt, degree = 3, type = "bs")
MSE <- crossprod(data$data$y - cbind2(cbind(cvm2.7$Funcomp.CGL.fit[[k_opt - a]]$Z, data$data$Zc), 1) %*% beta2) / n
m2.7 <- FuncompCGL(y = data2$data$y, X = data2$data$Comp, Zc = data2$data$Zc, intercept = intercept, k = k_opt,
dfmax = floor(p/3*2), lambda.factor = 0.001, mu_ratio = 1, W = W_fun, nlam = 1)
PE <- sum((data2$data$y - cbind2(cbind(m2.7$Z, data2$data$Zc), 1) %*% beta2)^2) / length(drop(data2$data$y))
naive$pred_error <- c(MSE = MSE, PE = PE)
naive <- c(naive,
ERROR_fun(beta_fit = beta2, beta_true = data$beta,
basis_fit = basis_fit, basis_true = basis_true,
sseq = sseq_complete,
m = m, p = p, Nzero_group = 4),
k = k_opt)
naive$coef <- list(beta_C = beta_C, beta_c = beta_c)
naive.1se <- naive
naive <- list()
beta2 <- coef(cvm2.7, s = rule2)
#m <- ifelse(is.null(m), 0, m)
k_opt <- (length(drop(beta2)) - (m + 1)) / p
beta_C <- vet(beta2, p = p, k = k_opt)$C
#if(max(abs(colSums(beta_C)) > 1e-8)) cat("Column sum of coef matrix is nonn zero \r\n")
beta_c <- vet(beta2, p = p, k = k_opt)$b
basis_fit <- Genbasis(sseq = sseq_complete, df = k_opt, degree = 3, type = "bs")
MSE <- crossprod(data$data$y - cbind2(cbind(cvm2.7$Funcomp.CGL.fit[[k_opt - a]]$Z, data$data$Zc), 1) %*% beta2) / n
m2.7 <- FuncompCGL(y = data2$data$y, X = data2$data$Comp, Zc = data2$data$Zc, intercept = intercept, k = k_opt,
dfmax = floor(p/3*2), lambda.factor = 0.001, mu_ratio = 1, W = W_fun, nlam = 1)
PE <- sum((data2$data$y - cbind2(cbind(m2.7$Z, data2$data$Zc), 1) %*% beta2)^2) / length(drop(data2$data$y))
naive$pred_error <- c(MSE = MSE, PE = PE)
naive <- c(naive,
ERROR_fun(beta_fit = beta2, beta_true = data$beta,
basis_fit = basis_fit, basis_true = basis_true,
sseq = sseq_complete,
m = m, p = p, Nzero_group = 4),
k = k_opt)
naive$coef <- list(beta_C = beta_C, beta_c = beta_c)
naive.min <- naive
#####naive #####
#### baseline #####
cat("baseline method \r\n")
P_bl <- c(ref, 1)
Baseline.1se <- as.list(1:2)
Baseline.min <- as.list(1:2)
names(Baseline) <- paste0("bl", P_bl)
for(q in 1:2) {
for(l in 1:length(k_list)){
object[[l]] <- cvm2.6$Funcomp.CGL.fit[[k_list[l]-a]]$Z
group_index <- matrix(1:((p)*k_list[l]), nrow = k_list[l])
Z_ref <- object[[l]][, group_index[, P_bl[q] ]]
object[[l]] <- object[[l]][, -group_index[, P_bl[q]]]
group_index <- matrix(1:((p-1)*k_list[l]), nrow = k_list[l])
for(j in 1:(p-1)) {
object[[l]][, group_index[, j]] <- object[[l]][, group_index[, j]] - Z_ref
}
object[[l]] <- FuncompCGL(y = data$data$y, X = object[[l]], Zc = data$data$Zc, lam = NULL, intercept = intercept,
#W = f1,
nlam = 1, outer_maxiter = 0, mu_ratio = 0,
k = k_list[l],
#dfmax = dfmax -1, pfmax = pfmax - 1,
tol = tol, W = W_fun)
}
lam0 <- max(sapply(object, "[[", "lam"))
for(l in 1:length(k_list)){
object[[l]] <- FuncompCGL(y = data$data$y, X = object[[l]]$Z, Zc = data$data$Zc,intercept = intercept,
W = object[[l]]$W,
nlam = nlam, lambda.factor = lambda.factor,
inner_eps = outer_eps, inner_maxiter = inner_maxiter * outer_maxiter, mu_ratio = as.integer(0),
lam = lam0,
k = k_list[l],
dfmax = dfmax - 1, pfmax = pfmax - 1,
tol = tol)
}
cv.nlam <- sapply(object, function(x) length(drop(x$lam)))
cv.nlam_id <- which.min(cv.nlam)
cv.lam <- object[[cv.nlam_id]]$lam
cv.nlam <- cv.nlam[cv.nlam_id]
cvm <- matrix(NA, nrow = length(k_list), ncol = max(cv.nlam))
rownames(cvm) <- paste0("df=", k_list)
colnames(cvm) <- seq(cv.nlam)
cvsd <- cvm
cvm.trim <- cvm
cvsd.trim <- cvm
for(l in 1:length(k_list)) {
outlist <- as.list(seq(nfolds))
for(j in 1:nfolds) {
cat("folds", j, "\r\n")
which <- foldid == j
y_train <- data$data$y[!which]
Z <- object[[l]]$Z
Z_train <- object[[l]]$Z[!which, , drop = FALSE]
Zc_train <- data$data$Zc[!which, , drop = FALSE]
outlist[[j]] <- FuncompCGL(y = y_train, X = Z_train, Zc = Zc_train, k = k_list[l], lam = cv.lam, W = object[[l]]$W,
intercept = intercept,
inner_eps = outer_eps, inner_maxiter * outer_maxiter, mu_ratio = as.integer(0),
dfmax = dfmax - 1, pfmax = pfmax - 1, tol = tol)
}
#X <- cbind(Z, Zc)
cvstuff <- cv.test(outlist, drop(data$data$y), X = cbind(object[[l]]$Z, Zc = data$data$Zc), foldid, lam = cv.lam, trim = 0)
cvm[l, ] <- cvstuff$cvm
cvsd[l, ] <- cvstuff$cvsd
}
lammin <- ggetmin(lam = drop(cv.lam), cvm = cvm, cvsd =cvsd, k_list = k_list)
switch(rule1,
"lam.1se"={
s <- lammin$lam.1se[1]
k_se <- lammin$lam.1se[2]
},
"lam.min" = {
s <- lammin$lam.min[1]
k_se <- lammin$lam.min[2]
})
beta3 <- coef(object[[k_se - a]], s = s)
MSE <- crossprod(data$data$y - cbind2(cbind(object[[k_se - a]]$Z, data$data$Zc), 1) %*% beta3) / n
beta3 <- c(beta3[ifelse(P_bl[q]==1,0,1):((P_bl[q]-1)*k_se)],
-colSums(matrix(beta3[1:((p-1) *(k_se))], byrow = TRUE, nrow = p-1))
,beta3[((P_bl[q]-1)*k_se+1):length(beta3)])
beta_C <- vet(beta3, p = p, k = k_se)$C
if(max(abs(colSums(beta_C)) > 1e-8)) cat("Column sum of coef matrix is nonn zero \r\n")
beta_c <- vet(beta3, p = p, k = k_se)$b
basis_fit <- Genbasis(sseq = sseq_complete, df = k_se, degree = 3, type = "bs")
m2.8 <- FuncompCGL(y = data2$data$y, X = data2$data$Comp, Zc = data2$data$Zc, intercept = intercept, k = k_se,
dfmax = floor(p/3*2), lambda.factor = 0.001, mu_ratio = 1, W = W_fun, nlam = 1)
PE <- sum((data2$data$y - cbind2(cbind(m2.8$Z, data2$data$Zc), 1) %*% beta3)^2) / length(drop(data2$data$y))
naive$pred_error <- c(MSE = MSE, PE = PE)
Baseline.1se[[q]] <- list(pred_error = c(MSE = MSE, PE = PE))
Baseline.1se[[q]] <- c(Baseline.1se[[q]],
ERROR_fun(beta_fit = beta3, beta_true = data$beta,
basis_fit = basis_fit, basis_true = basis_true,
sseq = sseq_complete,
m = m, p = p, Nzero_group = 4),
k = k_se)
Baseline.1se[[q]]$coef <- list(beta_C = beta_C, beta_c = beta_c)
switch(rule2,
"lam.1se"={
s <- lammin$lam.1se[1]
k_se <- lammin$lam.1se[2]
},
"lam.min" = {
s <- lammin$lam.min[1]
k_se <- lammin$lam.min[2]
})
beta3 <- coef(object[[k_se - a]], s = s)
MSE <- crossprod(data$data$y - cbind2(cbind(object[[k_se - a]]$Z, data$data$Zc), 1) %*% beta3) / n
beta3 <- c(beta3[ifelse(P_bl[q]==1,0,1):((P_bl[q]-1)*k_se)],
-colSums(matrix(beta3[1:((p-1) *(k_se))], byrow = TRUE, nrow = p-1))
,beta3[((P_bl[q]-1)*k_se+1):length(beta3)])
beta_C <- vet(beta3, p = p, k = k_se)$C
if(max(abs(colSums(beta_C)) > 1e-8)) cat("Column sum of coef matrix is nonn zero \r\n")
beta_c <- vet(beta3, p = p, k = k_se)$b
basis_fit <- Genbasis(sseq = sseq_complete, df = k_se, degree = 3, type = "bs")
m2.8 <- FuncompCGL(y = data2$data$y, X = data2$data$Comp, Zc = data2$data$Zc, intercept = intercept, k = k_se,
dfmax = floor(p/3*2), lambda.factor = 0.001, mu_ratio = 1, W = W_fun, nlam = 1)
PE <- sum((data2$data$y - cbind2(cbind(m2.8$Z, data2$data$Zc), 1) %*% beta3)^2) / length(drop(data2$data$y))
naive$pred_error <- c(MSE = MSE, PE = PE)
Baseline.min[[q]] <- list(pred_error = c(MSE = MSE, PE = PE))
Baseline.min[[q]] <- c(Baseline.min[[q]],
ERROR_fun(beta_fit = beta3, beta_true = data$beta,
basis_fit = basis_fit, basis_true = basis_true,
sseq = sseq_complete,
m = m, p = p, Nzero_group = 4),
k = k_se)
Baseline.min[[q]]$coef <- list(beta_C = beta_C, beta_c = beta_c)
}
#### baseline #####
# train_data <- list(y = y, X = X, Zc = Zc, intercept = intercept)
# test_data <- list(y = y_test, X = X_test, Zc = Zc_test, intercept = intercept)
# true_data <- list(train = train_data, test = test_data, beta = Data$beta, df_beta = df_beta)
out_1se <- list(naive = naive.1se, cgl = cgl.1se, bl1 = Baseline.1se[[1]], bl2 = Baseline.1se[[2]])
out_min <- list(naive = naive.min, cgl = cgl.min, bl1 = Baseline.min[[1]], bl2 = Baseline.min[[2]])
out <- list("1se" = out_1se, "min" = out_min,
data_train = data, data_test = data2)
out$parameters.info = c(seed = seed, foldid = foldid, baseline = P_bl)
save(out, file = args[2])
proc.time()
out_1se$naive[1:5]
out_1se$cgl[1:5]
out_1se$bl1[1:5]
out_1se$bl2[1:5]
out_min$naive[1:5]
out_min$cgl[1:5]
out_min$bl1[1:5]
out_min$bl2[1:5]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.