#' @title
#' Fits regularization paths for longitudinal compositional data with group-lasso penalty.
#' @description
#' Fits regularization paths for longitudinal compositional data with group-lasso penalty at a sequence of regularization parameters lambda and fixed degree of freedom of basis.
#'
# @usage
# FuncompCGL <- function(y, X, Zc = NULL, intercept = TRUE,
# ref = NULL, k, degree = 3,
# basis_fun = c("bs", "OBasis", "fourier"),
# insert = c("FALSE", "X", "basis"),
# method = c("trapezoidal", "step"),
# interval = c("Original", "Standard"), Trange,
# T.name = "TIME", ID.name = "Subject_ID",
# W = rep(1,times = p - length(ref)),
# dfmax = p - length(ref),
# pfmax = min(dfmax * 1.5, p - length(ref)),
# lam = NULL, nlam = 100,
# lambda.factor = ifelse(n < p1, 0.05, 0.001),
# tol = 0, mu_ratio = 1.01,
# outer_maxiter = 1e+08, outer_eps = 1e-8,
# inner_maxiter = 1e+4, inner_eps = 1e-8)
#
#' @param y a vector of response variable.
#'
#' @param X a data frame or matrix.
#' \itemize{
#' \item If \code{dim(X)[1]} > \eqn{n}, \eqn{n} is the sample size,
#' \code{X} should be a data frame of longitudinal compositinal predictors with number \eqn{p},
#' including subject ID and time variable. Order of subject ID should be the same as that of \code{y}.
#' \item If \code{dim(X)[1]}=\eqn{n}, \code{X} is considered as after taken integration, a \eqn{n*(p*k)} matrix.
#' }
#'
#' @param T.name,ID.name characters specifying names of time varaible and Subject ID respectively in X,
#' only needed as X is data frame of longitudinal compositinal varaibles.
#' Default are \code{"TIME"} and \code{"Subject_ID"}.
#'
#' @param Zc A design matrix for control variables, could be missing. Default is NULL. No penalty is imposed.
#'
#' @param k a scaler, degree of freedom of basis.
#' @param ref reference variable. If \code{ref} is set to a scalar between \code{[1,p]}, log-contract method is applied with the variable
#' \code{ref} as baseline. If \code{ref} = \code{NULL} (default value), constrained group lasso method is applied
#' @param degree degree of basis - default value is 3.
#'
#' @param basis_fun a function of basis. For now one of the following three types,
#' \itemize{
#' \item \code{bs} B-splines see \code{\link{bs}}.
#' \item \code{OBasis} Orthoganal B-splies, see \code{\link{orthogonalsplinebasis}}.
#' \item \code{fourier} Fourier basis, see \code{\link{fda}}
#' }
#' Default is \code{"bs"}.
#'
#' @param interval a character string sepcifying domain of integral
#' \itemize{
#' \item "Original" On original time scale, interval = range(Time).
#' \item "Standard" Time points are mapped onto [0,1], interval = (0,1).
#' }
#' Default is \code{"Original"}
#'
#' @param insert way to interpolation. If \code{insert} = \code{"X"} or \code{"basis"}, dense time sequence is generated, equally space
#' by \code{min(diff(sseq))/20)}, where \code{sseq} is sorted set of all observed time points.
#' \itemize{
#' \item \code{"FALSE"} no interpolation.
#' \item \code{"X"} linear interpolation of compositional data.
#' \item \code{"basis"} compositional data is considered as step function, imposing basis on un-observed time points for each subject.
#' }
#' Default is \code{"FALSE"}
#'
#' @param method method used to approximate integral.
#' \itemize{
#' \item \code{"trapezoidal"} Sum up area under trapezoidal formulated by values of function at two adjacent observed time points. See \code{\link{ITG_trap}}.
#' \item \code{"step"} Sum up area under rectangle formulated by step function at observed time points. See \code{\link{ITG_step}}.
#' }
#' Default is \code{"trapezoidal"}
#'
#' @param W a vector in length of p (the total number of groups), matrix with dimension \code{p1*p1} or character specifying function
#' used to calculate inverted weight matrix for each group.
#' \itemize{
#' \item If vector, works as penalty factor. Separate penalty weights can be applied to each group of beta'ss.
#' to allow differential shrinkage. Can be 0 for some groups, which implies no shrinkage, and results in that group
#' always being included in the model.
#' \item If matrix, a block diagonal matrix. Diagonal elements are inverted weights matrics for each group.
#' \item if character, user should provide the function for inverted weights matrics.
#' }
#' Default value is rep(1, times = p).
#' @param Trange range of time points
#' @inheritParams cglasso
#'
#' @return An object with S3 calss \code{\link{FuncompCGL}}
#' \item{Z}{integral matrix for longitudinal compositinal predictors with dimension \eqn{n*(p*k)}.}
#' \item{lam}{the actual sequence of \code{lam} values used.}
#' \item{df}{the number of non-zero groups in estimated coefficients for \code{Z} at each value of \code{lam}}
#' \item{beta}{a matrix of coefficients for \code{cbind{Z, Zc, 1_n}}, with \code{nlam} rows.}
#' \item{dim}{dimension of coefficient matrix}
#' \item{call}{the call that produced this object.}
#'
#'
#' @examples
#'
#' df_beta = 5
#' p = 30
#' beta_C_true = matrix(0, nrow = p, ncol = df_beta)
#' beta_C_true[3, ] <- c(-0.8, -0.8 , 0.4 , 1 , 1)
#' beta_C_true[4, ] <- c(0.5, 0.5, -0.6 ,-0.6, -0.6)
#' beta_C_true[1, ] <- c(-0.5, -0.5, -0.5 , -1, -1)
#' beta_C_true[2, ] <- c(0.8, 0.8, 0.7, 0.6, 0.6)
#' Data <- Model(n = 50, p = p, m = 2, intercept = TRUE,
#' SNR = 2, sigma = 2,
#' rho_X = 0, rho_W = 0,
#' df_W = 5, df_beta = df_beta,
#' ns = 100, obs_spar = 0.2, theta.add = c(3,4,5),
#' beta_C = as.vector(t(beta_C_true)))
#' y <- Data$data$y
#' X <- Data$data$Comp
#' Zc <- Data$data$Zc
#' intercept <- Data$data$intercept
#'
#' k_use <- df_beta
#' m1 <- FuncompCGL(y = y, X = X , Zc = Zc, intercept = intercept,
#' k = k_use, basis_fun = "bs",
#' insert = "FALSE", method = "t",
#' dfmax = p, tol = 1e-6)
#'
#' beta <- coef(m1, s = m1$lam[20])
#' #beta <- coef(m1)
#'
#' beta_C <- matrix(beta[1:(p*k_use)], nrow = p, byrow = TRUE)
#' colSums(beta_C)
#' Non.zero <- apply(beta_C, 1, function(x) ifelse(max(abs(x)) == 0, FALSE, TRUE))
#' Non.zero <- (1:p)[Non.zero]
#' Non.zero
#' plot(m1, ylab = "L2", p = p , k = k_use)
#'
#' @export
#'
#' @import stats
#' @importFrom splines bs
#' @importFrom fda eval.basis create.fourier.basis
#' @importFrom orthogonalsplinebasis evaluate OBasis expand.knots
#'
#'
FuncompCGL <- function(y, X, Zc = NULL, intercept = TRUE, ref = NULL,
k, degree = 3, basis_fun = c("bs", "OBasis", "fourier"),
insert = c("FALSE", "X", "basis"), method = c("trapezoidal", "step"),
interval = c("Original", "Standard"), Trange,
T.name = "TIME", ID.name = "Subject_ID",
W = rep(1,times = p - length(ref)),
dfmax = p - length(ref), pfmax = min(dfmax * 1.5, p - length(ref)),
lam = NULL, nlam = 100, lambda.factor = ifelse(n < p1, 0.05, 0.001),
tol = 0, mu_ratio = 1.01,
outer_maxiter = 1e+08, outer_eps = 1e-8,
inner_maxiter = 1e+4, inner_eps = 1e-8) {
n <- length(y)
this.call <- match.call()
basis_fun <- match.arg(basis_fun)
method <- match.arg(method)
insert <- match.arg(insert)
interval <- match.arg(interval)
if(!is.null(lam) && TRUE %in% (lam < 0)) stop("User provided lambda must be positive vector")
if(dim(X)[1] == n) {
# Case I: already take integral and ready to go
## 1.
Z <- as.matrix(X)
p1 <- ncol(Z)
p <- p1 / k + length(ref)
if(length(ref)>0) mu_ratio = 0
#p <- p1 / k
# ## 2.Subtracting baseline integral ??????
# if(is.numeric(ref)) {
# if( !(ref %in% 1:p) ) stop("Reference variable is out of range")
# mu_ratio = 0
# group.index <- matrix(1:p1, nrow = k)
# Z_ref <- Z[, group.index[, ref]]
# Z <- Z[, -group.index[, ref]]
# for(j in 1:(p-1)) Z[, group.index[, j]] <- Z[, group.index[, j]] - Z_ref
# p1 <- ncol(Z)
# }
} else {
# Case II: take integral
X <- as.data.frame(X)
X.names <- colnames(X)[!colnames(X) %in% c(T.name, ID.name)]
if( any(X[, X.names] == 0) ) stop("There is entry with value 0")
X[, X.names] <- log(X[, X.names] / rowSums(X[, X.names]))
p <- length(X.names)
#if(is.numeric(ref) && !(ref %in% 1:p)) stop("Reference variable is out of range")
if(is.numeric(ref)) {
if( !(ref %in% 1:p) ) stop("Reference variable is out of range")
mu_ratio = 0
}
# Time range provided in case that sample do not cover the entire range
if(missing(Trange)) Trange <- range(X[, T.name])
switch(interval,
"Standard" = {
#mapping time sequence on to [0,1]
X[, T.name] = (X[, T.name] - min(Trange)) / diff(Trange) #(X[, T.name] - min(Trange[1])) / diff(Trange)
interval = c(0, 1)
Trange = c(0, 1)
},
"Original" = {
interval = Trange
})
# In case that X is not represented in numerical order of Subject_ID
X[, ID.name] <- factor(X[, ID.name], levels = unique(X[, ID.name]))
# generate discrete basis matrix
sseq <- round(sort(unique(c(Trange, as.vector(X[, T.name])))), 10)
if(insert != "FALSE") sseq <- round(seq(from = interval[1], to = interval[2],
#by = length(sseq) * 2,
by = min(diff(sseq))/10), #### digit = 10
10)
# generate knots equally
nknots <- k - (degree + 1)
if(nknots > 0) {
knots <- ((1:nknots) / (nknots + 1)) * diff(interval) + interval[1]
} else knots <- NULL
basis <- switch(basis_fun,
"bs" = bs(x = sseq, df = k, degree = degree,
Boundary.knots = interval, intercept = TRUE),
"fourier" = eval.basis(sseq,
basisobj = create.fourier.basis(rangeval = interval, nbasis = k )),
"OBasis" = evaluate(OBasis(expand.knots(c(interval[1], knots, interval[2])),
order = degree + 1),
sseq))
X[, X.names] <- apply(X[, X.names], 2, function(x, ref) x - ref,
ref = if(is.null(ref)) 0 else X[, X.names[ref], drop = TRUE])
D <- split(X[, c(T.name, X.names[-ifelse(is.null(ref), p + 1, ref)])], X[, ID.name])
p1 <- (p - length(ref)) * k
Z <- matrix(NA, nrow = n, ncol = p1)
for(i in 1:n) Z[i, ] <- ITG(D[[i]], basis, sseq, T.name, interval, insert, method)$integral
}
group.index <- matrix(1:p1, nrow = k)
if( is.vector(W) ) {
###cat(" ,length(W) = ", length(W))
if(length(W) != (p1 / k) ) stop("W shoulde be a vector of length p=", p1/ k)
} else if( is.matrix(W) ) {
if(any(dim(W) - c(p1, p1) != 0)) stop('Wrong dimensions of Weights matrix')
} else if( is.function(W) ){
W_fun <- W
W <- diag(p1)
for(i in 1:(p1/k)) W[group.index[, i], group.index[, i]] <- W_fun(Z[, group.index[, i]])
}
###cat(" ,is.null(ref) = ", is.null(ref))
###cat(" ,dfmax = ", dfmax, "\r\n")
output <- cglasso(y = y, Z = Z, Zc = Zc, k = k, W = W, intercept = intercept,
mu_ratio = mu_ratio,
lam = lam, nlam = nlam, lambda.factor = lambda.factor,
dfmax = dfmax, pfmax = pfmax,
tol = tol,
outer_maxiter = outer_maxiter, outer_eps = outer_eps,
inner_maxiter = inner_maxiter, inner_eps = inner_eps)
output$Z <- Z
output$W <- W
output$call <- this.call
output$ref <- ref
class(output) <- "FuncompCGL"
output$dim <- dim(output$beta)
return(output)
}
# FuncompCGL <- function(y, X, Zc = NULL, intercept = TRUE, k,
# T.name = "TIME", ID.name = "Subject_ID",
# degree = 3, basis_fun = c("bs", "OBasis", "fourier"),
# insert = c("FALSE", "X", "basis"), method = c("trapezoidal", "step"),
# interval = c("Original", "Standard"), Trange,
# W = rep(1, times = p), #pf = rep(1, times = p),
# dfmax = p, pfmax = min(dfmax * 1.5, p),
# lam = NULL, nlam = 100, lambda.factor = ifelse(n < p1, 0.05, 0.001),
# tol = 0, mu_ratio = 1.01,
# outer_maxiter = 1e+08, outer_eps = 1e-8,
# inner_maxiter = 1e+4, inner_eps = 1e-8
# #, ...
# ) {
#
# n <- length(y)
# this.call <- match.call()
#
#
# basis_fun <- match.arg(basis_fun)
# method <- match.arg(method)
# insert <- match.arg(insert)
# interval <- match.arg(interval)
#
# if(dim(X)[1] == n) {
# # already take integral
# Z <- X
# p1 <- dim(X)[2]
# p <- p1 / k
# } else {
# # take integral
# X <- as.data.frame(X)
# X.names <- colnames(X)[!colnames(X) %in% c(T.name, ID.name)]
# p <- length(X.names)
# if( any(X[, X.names] == 0) ) stop("There is entry with value 0")
# p1 <- p * k
# #if(missing(Time)) Time <- X[, T.name]
# Time <- X[, T.name]
# #if(missing(Subject_ID))
# Subject_ID <- unique(X[, ID.name])
# X[, ID.name] <- factor(X[, ID.name], levels = Subject_ID )
#
# if(missing(Trange)) Trange <- range(Time)
# if(interval == "Standard") {
# interval <- c(0,1)
# X[, T.name] <- (Time - min(Trange[1])) / diff(Trange)
# #X[, T.name] <- (Time - min(Time)) / diff(range(Time))
# } else {
# interval <- Trange
# }
#
# sseq <- round(sort(unique(c(Trange, as.vector(X[, T.name])))), 10)
# if(insert != FALSE) sseq <- round(seq(from = interval[1], to = interval[2], by = min(diff(sseq))/20), 10)
#
#
#
# nknots <- k - (degree + 1)
# if(nknots > 0) {
# knots <- ((1:nknots) / (nknots + 1)) * diff(interval) + interval[1]
# } else {
# knots <- NULL
# }
#
# basis <- switch(basis_fun,
#
# "bs" = bs(x = sseq, df = k, degree = degree,
# Boundary.knots = interval, intercept = TRUE),
#
# "fourier" = eval.basis(sseq,
# basisobj = create.fourier.basis(rangeval = interval, nbasis = k )),
#
# "OBasis" = evaluate(OBasis(expand.knots(c(interval[1], knots, interval[2])),
# order = degree + 1),
# sseq)
#
# )
#
#
#
#
#
# X[, X.names] <- log(X[, X.names]/ rowSums(X[, X.names]))
# D <- split(X[, c(T.name, X.names)], X[, ID.name])
#
# Z <- matrix(NA, nrow = n, ncol = p1)
# for(i in 1:n) {
# #cat(i)
# d <- D[[i]]
# Z[i, ] <- ITG(d, basis, sseq, T.name, interval, insert, method)$integral
#
# #Z <- sapply(D, function(x, basis, sseq, T.name, interval, insert, method)
# # ITG(x, basis, sseq, T.name, interval, insert, method)
# # ,sseq = sseq, basis = basis, T.name = T.name, interval = interval, insert = insert, method = method)
# #Z <- t(Z)
#
# }
#
#
#
# }
#
#
# Z <- as.matrix(Z)
# group.index <- matrix(1:p1, nrow = k)
# if( is.vector(W) ) {
# if(length(W) != p) stop("W shoulde be a vector of length p")
# } else if( is.matrix(W) ) {
# if(any(dim(W) - c(p1, p1) != 0)) stop('Wrong dimensions of Weights matrix')
# } else if( is.function(W) ){
# W_fun <- W
# W <- diag(p1)
# for(i in 1:p) W[group.index[, i], group.index[, i]] <- W_fun(Z[, group.index[, i]])
# }
#
# output <- cglasso(y = y, Z = Z, Zc = Zc, k = k, W = W, intercept = intercept,
# mu_ratio = mu_ratio,
# lam = lam, nlam = nlam, lambda.factor = lambda.factor,
# dfmax = dfmax, pfmax = pfmax,
# tol = tol,
# outer_maxiter = outer_maxiter, outer_eps = outer_eps,
# inner_maxiter = inner_maxiter, inner_eps = inner_eps
# #, ...
# )
# output$Z <- Z
# output$W <- W
# output$call <- this.call
# class(output) <- "FuncompCGL"
# output$dim <- dim(output$beta)
# return(output)
# }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.