Nothing
#' Penalized Regression with Lasso and Weighted Fusion Penalties with Given Parameters
#'
#' Performs penalized regression with Lasso penalty and weighted fusion penalty
#' for a given pair of tuning parameters (lambda1 and lambda2), which is
#' determined by the user based on prior knowledge or use any number just for
#' testing purpose.
#'
#' @param data An object of class "WTsmth.data" as generated by prep()
#' @param lambda1 A scalar numeric. Lambda_1 value to be considered. Provided
#' value will be transformed to 2^(lambda1).
#' @param lambda2 A scalar numeric Lambda_2 value to be considered. Provided
#' value will be transformed to 2^(lambda2).
#' @param weight A character. The type of weighting. Must be one of
#' eql, keql, wcs, kwcs, wif, kwif indicating
#' equal weight, K x equal weight, Cosine similarity, K x cosine similarity,
#' inverse frequency, and K x inverse frequency, where K is the number of
#' individuals in each CNV-active region.
#' `eql` and `keql` gives equal weight to adjacent CNVs.
#' `wcs` and `kwcs` allow similar CNV fragments to have more similar effect size.
#' `wif` and `kwif` will encourage CNV with lower frequency to borrow
#' information from nearby more frequent CNV fragments.
#' Considering that CNVs usually present in some CNV-active regions and there are
#' large regions in between with no CNV at all. K will describe the number of individuals
#' having any CNV activities in a CNV-active region, and varying the weight according
#' to the sample size across regions.
#' @param family A character. The family of the outcome. Must be one of
#' "gaussian" (Y is continuous) or "binomial" (Y is binary).
#' @param iter.control A list object. Allows user to control iterative
#' update procedure. Allowed elements are "max.iter", the maximum number
#' of iterations; "tol.beta", the difference between consecutive beta
#' updates below which the procedure is deemed converged; and "tol.loss",
#' the difference in consecutive loss updates below which the procedure
#' is deemed converged.
#' @param ... Ignored.
#'
#'
#' @returns A numeric vector. The estimated model parameters
#'
#'
#' @include ctnsSolution.R helpful_tests.R rwlsSolution.R utils.R
#'
#' @export
#'
#' @examples
#' # Note we use here a very small example data set to expedite examples.
#'
#' # load toy dataset
#' data("CNVCOVY")
#'
#' # prepare data format for regression analysis
#'
#' ## Continuous outcome Y_QT
#' frag_data <- prep(CNV = CNV, Y = Y_QT, Z = Cov, rare.out = 0.05)
#' QT_fit <- fit_WTSMTH(frag_data,
#' lambda1 = -5,
#' lambda2 = 21,
#' weight = "eql",
#' family = "gaussian")
#'
#' ## Binary outcome Y_BT
#'
#' # We can directly replace frag_data$Y with Y_BT in the correct format,
#' # ensuring that the ordering matches that of the prepared object.
#'
#' rownames(Y_BT) <- Y_BT$ID
#' frag_data$Y <- Y_BT[names(frag_data$Y), "Y"] |> drop()
#' names(frag_data$Y) <- rownames(frag_data$Z)
#'
#' # Or, we can also repeat the prep() call
#' # frag_data <- prep(CNV = CNV, Y = Y_BT, Z = Cov, rare.out = 0.05)
#'
#' BT_fit <- fit_WTSMTH(frag_data,
#' lambda1 = -5,
#' lambda2 = 6,
#' weight = "eql",
#' family = "binomial")
fit_WTSMTH <- function(data, lambda1, lambda2, weight = NULL,
family = c("gaussian", "binomial"),
iter.control = list(max.iter = 8L,
tol.beta = 10^(-3),
tol.loss = 10^(-6)),
...) {
extras <- list(...)
# take the first value as default
family <- match.arg(family)
stopifnot(
"`data` must be a 'WTsmth.data' object" = !missing(data) && inherits(data, "WTsmth.data"),
"`lambda1 must be a numeric vector" = !missing(lambda1) && .isNumericVector(lambda1, 1L),
"`lambda2 must be a numeric vector" = !missing(lambda2) && .isNumericVector(lambda2, 1L),
"`weight` must be one of eql, keql, wcs, kwcs, wif, kwif" =
is.null(weight) || {.isCharacterVector(weight, 1L) &&
weight %in% c("eql", "keql", "wcs", "kwcs", "wif", "kwif")},
"`iter.control` must be a list; allowed elements are max.iter, tol.beta, and tol.loss" =
.isNamedList(iter.control, c("max.iter", "tol.beta", "tol.loss"))
)
iter.control <- .testIterControl(iter.control)
#if fit one lmd1 and lmd2 pair, data is not expanded with "weight"
if (is.null(data$XZ)) {
if (is.null(weight)) stop("`weight` must be provided", call. = FALSE)
data <- .expandWTsmth(data, weight = weight)
} else {
if (!is.null(weight)) warning("`weight` input ignored; data already expanded", call. = FALSE)
}
if(family == "binomial") {
data$Y <- .confirmBinary(data$Y)
#subset for CV
}
if (family == "gaussian") {
data$Y <- .confirmContinuous(data$Y)
}
##for updating iteratively -- mainly useful in binory outcomes
data$XZ_update <- data$XZ
data$Y_update <- data$Y
## for crossvalidation
if (!is.null(extras$subset)) {
data$Y_update <- data$Y[extras$subset]
data$XZ_update <- data$XZ[extras$subset, , drop = FALSE]
data$Y <- data$Y[extras$subset]
data$XZ <- data$XZ[extras$subset, , drop = FALSE]
}
intercept_name <- "(Intercept)"
while (intercept_name %in% colnames(data$XZ)) {
intercept_name <- sample(LETTERS, 10, TRUE)
}
XZ_colnames <- c(intercept_name, colnames(data$XZ))
Y_app <- rep.int(0L, nrow(data$A))
names(Y_app) <- rownames(data$A)
if (family == "gaussian") {
# Update XZp1 and send to data$XZ
data$XZ_update <- cbind(1.0, data$XZ_update)
colnames(data$XZ_update) <- XZ_colnames
# Update XZ_app
XZ_app <- cbind(0.0, sqrt(2.0^(lambda2)) * data$A)
rownames(XZ_app) <- rownames(data$A)
b_coef = .ctnsSolution(data = data, X.app = XZ_app, Y.app = Y_app, lambda1 = lambda1)
#data = data; X.app = XZ_app; Y.app = Y_app; lambda1 = lambda1
} else {
XN = nrow(data$XZ)
AN = nrow(data$A)
XZ_app <- cbind(0.0, sqrt(2*(XN+AN)*(2^(lambda2))) * data$A)
#dim(XZ_app)
rownames(XZ_app) <- rownames(data$A)
b_coef = .rwlsSolution(data = data,
X.app = XZ_app, Y.app = Y_app,
lambda1 = lambda1,
iter.control = iter.control)
}
# data = data; X.app = XZ_app; Y.app = Y_app; lambda1 = lambda1; iter.control = iter.control
b_intercept <- c("(Intercept)", "", "", "", "", b_coef[1])
b_cnv <- data.frame(Vnames = names(b_coef[-1]),
coef= b_coef[-1])
cnvr_info <- data$CNVR.info
cnvr_info$Vnames <- paste0(cnvr_info$deldup, cnvr_info$grid.id)
b_cnv <- merge(cnvr_info[ ,c("Vnames", "CHR", "CNV.start", "CNV.end", "deldup")],
b_cnv, by = "Vnames")
b_cnv <- b_cnv[order(b_cnv$deldup, b_cnv$CNV.start),]
b_x <- cbind(names(b_coef[(1+ncol(data$design)+1):(1+ncol(data$design)+ncol(data$Z))]), matrix("", nrow=ncol(data$Z), ncol=4), b_coef[(1+ncol(data$design)+1):(1+ncol(data$design)+ncol(data$Z))])
colnames(b_x) <- c("Vnames", "CHR", "CNV.start", "CNV.end", "deldup", "coef")
b_cnv <- rbind(b_intercept, b_cnv, b_x)
b_cnv <- data.frame("Vnames" = b_cnv$Vnames ,
"CHR" = b_cnv$CHR |> as.integer(),
"CNV.start" = b_cnv$CNV.start |> as.numeric(),
"CNV.end" = b_cnv$CNV.end |> as.numeric(),
"deldup" = b_cnv$deldup ,
"coef" = b_cnv$coef |> as.numeric())
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.