Nothing
## ----include=FALSE------------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
# load the package
library("CNVreg")
## ----echo=FALSE, warning=FALSE, message=FALSE---------------------------------
# load packages required for Rmd
library("kableExtra")
library("tidyverse")
library("ggplot2")
library("patchwork")
## -----------------------------------------------------------------------------
# load the example dataset
data("CNVCOVY", package = "CNVreg")
## -----------------------------------------------------------------------------
# view the dataset
summary(CNV)
## -----------------------------------------------------------------------------
# view the dataset
summary(Cov)
## -----------------------------------------------------------------------------
# view the dataset
summary(Y_QT)
## -----------------------------------------------------------------------------
# view the dataset
summary(Y_BT)
## ----warning=FALSE------------------------------------------------------------
# data preprocessing for a quantitative(continuous) outcome Y_QT
frag_data_QT <- prep(CNV = CNV, Y = Y_QT, Z = Cov, rare.out = 0.05)
## -----------------------------------------------------------------------------
# Format of `prep()` funtion output
str(frag_data_QT)
## ----warning=FALSE------------------------------------------------------------
# data preprocessing with a binary trait
frag_data_BT <- prep(CNV = CNV, Y = Y_BT, Z = Cov, rare.out = 0.05)
## ----eval = FALSE-------------------------------------------------------------
# ## copy frag_data_QT
# frag_data_BT <- frag_data_QT
#
# ### replace Y with Y_BT in the correct format: ordered named vector
# ### order the sample in Y_BT as in frag_data_QT$Y
# rownames(Y_BT) <- Y_BT$ID
#
# frag_data_BT$Y <- Y_BT[names(frag_data_QT$Y), "Y"] |> drop()
# names(frag_data_QT$Y) <- rownames(frag_data_QT$Y)
## -----------------------------------------------------------------------------
QT_TUNE <- withr::with_seed(
12345,
cvfit_WTSMTH(data = frag_data_QT,
lambda1 = seq(-8, -3, 1),
lambda2 = seq(12, 25, 2),
weight = "eql",
family = "gaussian",
cv.control = list(n.fold = 5L,
n.core = 1L,
stratified = FALSE),
verbose = FALSE))
## ----echo=FALSE---------------------------------------------------------------
# loss matrix of candidate tuning parameters
QT_TUNE$Loss %>% format( digits = 6) %>%
mutate(
across(2:ncol(QT_TUNE$Loss),
~ cell_spec(.x,
color = ifelse(.x > min(QT_TUNE$Loss[,2:ncol(QT_TUNE$Loss)])+0.0001, "black", "white"),
background = ifelse(.x <= min(QT_TUNE$Loss[, 2:ncol(QT_TUNE$Loss)])+0.0001, "red", "white")
)
)
)%>%
kable(booktabs = FALSE, linesep = "", align = "c", format = "html", escape = F, caption = "Average loss for each pair of candidate tuning parameters") %>%add_header_above(c(" " = 1, "Lambda 1" = ncol(QT_TUNE$Loss)-1))
## -----------------------------------------------------------------------------
# selected optimal tuning parameters with minimum loss
QT_TUNE$selected.lambda
## -----------------------------------------------------------------------------
##coefficients of intercept and covariates
QT_TUNE$coef[c(1, 21:22), c("Vnames", "coef") ]
## -----------------------------------------------------------------------------
# estimated coefficents for CNV
QT_TUNE$coef[2:20, ]
# non-zero coefficients
# QT_TUNE$coef[which(abs(QT_TUNE$coef$coef)>0), ]
## ----warning=FALSE, echo=FALSE------------------------------------------------
# plot the coefficients
# keep CNV fragments and exclude intercept and covariates(Age, Sex) in ploting, row(2:20)
CNVR <- frag_data_QT$CNVR.info %>% group_by(CNV.id, deldup)%>%
summarise(CNV.start = min(CNV.start),
CNV.end = max(CNV.end),
nfrag = length(CNV.id),
.groups = 'drop')
CNVR_adj <- CNVR[CNVR$nfrag > 1, ]
CNV_coef <- QT_TUNE$coef[2:20,]
P <- ggplot(CNV_coef, aes(x= CNV_coef$CNV.start , y=CNV_coef$coef*1000))+theme_bw()+
geom_point(aes(color = ifelse(abs(CNV_coef$coef*1000) > 1, "red", ifelse(abs(CNV_coef$coef*1000) < 10^(-5), "white", "black")))) +
scale_color_identity()+
theme(axis.text.x = element_text(size = 10, angle = 25, vjust = 1, hjust = 1))+
scale_x_continuous("Genomic position", labels = as.character(CNVR_adj$CNV.start), breaks = CNVR_adj$CNV.start)+
ylab("CNV coefficients (×10^(-3))")+
geom_segment(x=CNV_coef$CNV.start, xend=CNV_coef$CNV.end, y=CNV_coef$coef*1000, yend=CNV_coef$coef*1000)+
geom_rect(aes(xmin=CNVR_adj$CNV.start[1]-1000000, xmax=CNVR_adj$CNV.end[1]+1000000, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)+
annotate("text", x = CNVR_adj$CNV.start[1], y = max(CNV_coef$coef*1000) +0.2, label = "A", color="red")+
geom_rect(aes(xmin=CNVR_adj$CNV.start[2]-1000000, xmax=CNVR_adj$CNV.end[2]+1000000, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)+
geom_rect(aes(xmin=CNVR_adj$CNV.start[3]-1000000, xmax=CNVR_adj$CNV.end[3]+1000000, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)+
annotate("text", x = CNVR_adj$CNV.start[3], y = max(CNV_coef$coef*1000)+0.2, label = "B", color="red")+
geom_rect(aes(xmin=CNVR_adj$CNV.start[4]-1000000, xmax=CNVR_adj$CNV.end[4]+1000000, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)
PA <- ggplot(CNV_coef[2:5,], aes(x= 1/2*(CNV_coef$CNV.start[2:5]+CNV_coef$CNV.end[2:5]) , y=CNV_coef$coef[2:5]*1000))+theme_bw()+
ylab("")+
geom_point(aes(color = ifelse(abs(CNV_coef$coef[2:5]*1000) > 1, "red", ifelse(abs(CNV_coef$coef[2:5]) < 10^(-5), "white", "black")))) +
scale_color_identity()+
theme(axis.text.x = element_text(size = 10, angle = 25, vjust = 1, hjust = 1))+
scale_x_continuous("", labels = as.character(CNV_coef$CNV.start[2:5]), breaks = CNV_coef$CNV.start[2:5])+
geom_segment(x=CNV_coef$CNV.start[2:5], xend=CNV_coef$CNV.end[2:5], y=CNV_coef$coef[2:5]*1000, yend=CNV_coef$coef[2:5]*1000)+
geom_rect(aes(xmin=CNVR_adj$CNV.start[1]-10, xmax=CNVR_adj$CNV.end[1]+10, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)+
annotate("text", x =1/2*( CNVR_adj$CNV.start[1]+CNVR_adj$CNV.end[1]), y = max(CNV_coef$coef*1000) +0.0002, label = "Zoom in on region A", color="red")
PB <- ggplot(CNV_coef[11:14,], aes(x= 1/2*(CNV_coef$CNV.start[11:14]+CNV_coef$CNV.end[11:14]) , y=CNV_coef$coef[11:14]*1000))+theme_bw()+
ylab("")+
geom_point(aes(color = ifelse(abs(CNV_coef$coef[11:14]*1000) > 1, "red", ifelse(abs(CNV_coef$coef[11:14]*1000) < 10^(-5), "white", "black")))) +
scale_color_identity()+
theme(axis.text.x = element_text(size = 10, angle = 25, vjust = 1, hjust = 1))+
scale_x_continuous("Genomic position", labels = as.character(CNV_coef$CNV.start[11:14]), breaks = CNV_coef$CNV.start[11:14])+
geom_segment(x=CNV_coef$CNV.start[11:14], xend=CNV_coef$CNV.end[11:14], y=CNV_coef$coef[11:14]*1000, yend=CNV_coef$coef[11:14]*1000)+
geom_rect(aes(xmin=CNVR_adj$CNV.start[3]-10, xmax=CNVR_adj$CNV.end[3]+10, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)+
annotate("text", x =1/2*( CNVR_adj$CNV.start[3]+CNVR_adj$CNV.end[3]), y = max(CNV_coef$coef*1000) + 0.2, label = "Zoom in on region B", color="red")
P + (PA/PB) +
plot_annotation(title = "CNV coefficient estiamte across the genomic region - A fine-tuned model")+
plot_layout(axes = "collect", widths = c(2,1)) +
plot_layout( guide = "collect") & theme(legend.position="Top",
legend.text = element_text(size=12), legend.title = element_text(size=12))
## -----------------------------------------------------------------------------
BT_TUNE <- withr::with_seed(
12345,
cvfit_WTSMTH(frag_data_BT,
lambda1 = seq(-5.25, -4.75, 0.25),
lambda2 = seq(2, 8, 2),
weight = "eql",
family = "binomial",
cv.control = list(n.fold = 5L,
n.core = 1L,
stratified = FALSE),
iter.control = list(max.iter = 8L,
tol.beta = 10^(-3),
tol.loss = 10^(-6)),
verbose = FALSE))
## ----echo=FALSE---------------------------------------------------------------
# loss matrix of candidate tuning parameters
BT_TUNE$Loss %>% format( digits = 6) %>%
mutate(
across(2:ncol(BT_TUNE$Loss),
~ cell_spec(.x,
color = ifelse(.x > min(BT_TUNE$Loss[,2:ncol(BT_TUNE$Loss)])+0.000001, "black", "white"),
background = ifelse(.x <= min(BT_TUNE$Loss[, 2:ncol(BT_TUNE$Loss)])+0.000001, "red", "white")
)
)
)%>%
kable(booktabs = FALSE, linesep = "", align = "c", format = "html", escape = F, caption = "Average loss for each pair of candidate tuning parameters") %>%add_header_above(c(" " = 1, "Lambda 1" = ncol(BT_TUNE$Loss)-1))
## -----------------------------------------------------------------------------
# selected optimal tuning parameters with minimum loss
BT_TUNE$selected.lambda
## -----------------------------------------------------------------------------
BT_TUNE$coef[c(1, 21:22), c("Vnames", "coef") ]
## -----------------------------------------------------------------------------
BT_TUNE$coef[2:20, ]
## ----warning=FALSE, echo=FALSE------------------------------------------------
CNV_coef <- BT_TUNE$coef[2:20,]
P <- ggplot(CNV_coef, aes(x= CNV_coef$CNV.start , y=CNV_coef$coef*1000))+theme_bw()+
geom_point(aes(color = ifelse(abs(CNV_coef$coef*1000) > 1, "red", ifelse(abs(CNV_coef$coef*1000) < 10^(-5), "white", "black")))) +
scale_color_identity()+
theme(axis.text.x = element_text(size = 10, angle = 25, vjust = 1, hjust = 1))+
scale_x_continuous("Genomic position", labels = as.character(CNVR_adj$CNV.start), breaks = CNVR_adj$CNV.start)+
ylab("CNV coefficients (×10^(-3))")+
geom_segment(x=CNV_coef$CNV.start, xend=CNV_coef$CNV.end, y=CNV_coef$coef*1000, yend=CNV_coef$coef*1000)+
geom_rect(aes(xmin=CNVR_adj$CNV.start[1]-1000000, xmax=CNVR_adj$CNV.end[1]+1000000, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)+
annotate("text", x = CNVR_adj$CNV.start[1], y = max(CNV_coef$coef*1000)+0.2, label = "A", color="red")+
geom_rect(aes(xmin=CNVR_adj$CNV.start[2]-1000000, xmax=CNVR_adj$CNV.end[2]+1000000, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)+
geom_rect(aes(xmin=CNVR_adj$CNV.start[3]-1000000, xmax=CNVR_adj$CNV.end[3]+1000000, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)+
annotate("text", x = CNVR_adj$CNV.start[3], y = max(CNV_coef$coef*1000)+0.2, label = "B", color="red")+
geom_rect(aes(xmin=CNVR_adj$CNV.start[4]-1000000, xmax=CNVR_adj$CNV.end[4]+1000000, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)
PA <- ggplot(CNV_coef[2:5,], aes(x= 1/2*(CNV_coef$CNV.start[2:5]+CNV_coef$CNV.end[2:5]) , y=CNV_coef$coef[2:5]*1000))+theme_bw()+
ylab("")+
geom_point(aes(color = ifelse(abs(CNV_coef$coef[2:5]*1000) > 0.001, "red", ifelse(abs(CNV_coef$coef[2:5]*1000) < 10^(-8), "white", "black")))) +
scale_color_identity()+
theme(axis.text.x = element_text(size = 10, angle = 25, vjust = 1, hjust = 1))+
scale_x_continuous("", labels = as.character(CNV_coef$CNV.start[2:5]), breaks = CNV_coef$CNV.start[2:5])+
geom_segment(x=CNV_coef$CNV.start[2:5], xend=CNV_coef$CNV.end[2:5], y=CNV_coef$coef[2:5]*1000, yend=CNV_coef$coef[2:5]*1000)+
geom_rect(aes(xmin=CNVR_adj$CNV.start[1]-10, xmax=CNVR_adj$CNV.end[1]+10, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)+
annotate("text", x =1/2*( CNVR_adj$CNV.start[1]+CNVR_adj$CNV.end[1]), y = max(CNV_coef$coef*1000) + 0.4, label = "Zoom in on region A", color="red")
PB <- ggplot(CNV_coef[11:14,], aes(x= 1/2*(CNV_coef$CNV.start[11:14]+CNV_coef$CNV.end[11:14]) , y=CNV_coef$coef[11:14]*1000))+theme_bw()+
ylab("")+
geom_point(aes(color = ifelse(abs(CNV_coef$coef[11:14]*1000) > 1, "red", ifelse(abs(CNV_coef$coef[11:14]*1000) < 10^(-5), "white", "black")))) +
scale_color_identity()+
theme(axis.text.x = element_text(size = 10, angle = 25, vjust = 1, hjust = 1))+
scale_x_continuous("Genomic position", labels = as.character(CNV_coef$CNV.start[11:14]), breaks = CNV_coef$CNV.start[11:14])+
geom_segment(x=CNV_coef$CNV.start[11:14], xend=CNV_coef$CNV.end[11:14], y=CNV_coef$coef[11:14]*1000, yend=CNV_coef$coef[11:14]*1000)+
geom_rect(aes(xmin=CNVR_adj$CNV.start[3]-10, xmax=CNVR_adj$CNV.end[3]+10, ymin=min(CNV_coef$coef*1000), ymax = max(CNV_coef$coef*1000)), fill="yellow", alpha = 0.02)+
annotate("text", x =1/2*( CNVR_adj$CNV.start[3]+CNVR_adj$CNV.end[3]), y = max(CNV_coef$coef*1000) +0.4, label = "Zoom in on region B", color="red")
P + (PA/PB) +plot_annotation(title = "CNV coefficient estiamte across the genomic region - A fine-tuned model")+
plot_layout(axes = "collect", widths = c(2,1)) + plot_layout( guide = "collect") & theme(legend.position="Top",
legend.text = element_text(size=12), legend.title = element_text(size=12))
## -----------------------------------------------------------------------------
# we know the optimal tuning parameters and directly apply it to reproduce the analysis for a continuous outcome.
QT_fit <- fit_WTSMTH(frag_data_QT,
lambda1 = -5,
lambda2 = 20,
weight="eql",
family="gaussian")
## -----------------------------------------------------------------------------
QT_fit[c(1, 21:22), c("Vnames", "coef") ]
## -----------------------------------------------------------------------------
QT_fit[2:20, ]
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.