Nothing
#' Inner Function `.breakCNV()` and the dependent functions
#'
#' Use CNV breakpoints (BP1 and BP2) on the same chromosome to construct CNV fragments/grids.
#' Process CNV data in PLINK format 1 chromosome at a time using some dependent functions.
#' Create CNV fragments (grids) and define the boundary of each fragment.
#'
#' @noRd
#' @param CNV A data.frame in PLINK format. Specifically, must contain
#' columns:
#' \itemize{
#' \item "ID": character, unique identity for each sample
#' \item "CHR": integer, range 1-22
#' \item "BP1": integer, CNV event starting position
#' \item "BP2": integer, CNV event ending position, each record must have BP1 <= BP2, i.e., data at least 1bp (or data can be other unit length)
#' \item "TYPE": integer, range 0, 1, 3, 4, and larger allowed, 2 is not allowed.
#' }
#'
#' @returns A list object containing
#' \item{CHR}{The chromosome under analysis}
#' \item{id}{An integer vector of ids for each grid unit}
#' \item{CNV.start}{An integer vector of the lower boundary of each grid}
#' \item{CNV.end}{An integer vector of the upper boundary of each grid}
#' @keywords internal
#'
.createGrid <- function(CNV) {
# Identify boundaries of all grid units
BPs <- c(CNV$BP1, CNV$BP2) |> unique() |> sort()
# Track boundaries for each grid unit
n_units <- length(BPs) - 1L
ITV_info <- data.frame("CHR" = rep(CNV$CHR[1L], n_units))
ITV_info$grid.id <- seq_len(n_units)
ITV_info$CNV.start <- BPs[-{n_units + 1L}]
ITV_info$CNV.end <- BPs[-1L]
ITV_info
}
#'
#' Construct a long-shaped matrix linking CNV BP1/BP2 to units of the fragments(grid),
#' One record of CNV can match to >=1 grid(s).
#'
#' @noRd
#' @param CNV A data.frame in PLINK format. Specifically, must contain
#' columns:
#' \itemize{
#' \item "ID": character, unique identity for each sample
#' \item "CHR": integer, range 1-22
#' \item "BP1": integer, CNV event starting position
#' \item "BP2": integer, CNV event ending position, each record must have BP1 <= BP2, i.e., data at least 1bp (or data can be other unit length)
#' \item "TYPE": integer, range 0, 1, 3, 4, and larger allowed, 2 is not allowed.
#' }
#' @param grid A list object containing the grid details
#' \itemize{
#' \item CHR The chromosome under analysis
#' \item id An integer vector of ids for each grid unit
#' \item CNV.start An integer vector of the lower boundary of each grid
#' \item CNV.end An integer vector of the upper boundary of each grid
#' }
#'
#' @returns A data.frame ordered by grid.id containing
#' \item{ID}{The sample ID}
#' \item{CHR}{The chromosome under analysis}
#' \item{grid.id}{The id of a grid unit}
#' \item{TYPE}{The CNV value for the grid unit}
#' \item{deldup}{A character, del/dup, indicating if deletion or duplication}
#' \item{AUC}{Numeric AUC value}
#'
#' @keywords internal
.createLongData <- function(CNV, grid) {
BPs <- c(grid$CNV.start, max(grid$CNV.end))
# idx of Match starting BP of each record to its grid unit
starts <- findInterval(CNV$BP1, BPs)
# idx of Match ending BP of each record to its grid unit
ends <- findInterval(CNV$BP2, BPs, left.open = TRUE)
# create long data format, for each idx of interval from ITVs to ITVe
long <- data.frame("ID" = rep(CNV$ID, times = ends - starts + 1L))
long$CHR <- CNV$CHR[1L]
long$grid.id <- mapply(seq, starts, ends, MoreArgs = list(by = 1L)) |> unlist()
long$TYPE <- rep(CNV$TYPE, times = ends - starts + 1L)
long$deldup <- ifelse(long$TYPE < 2L, "del", "dup")
lower_boundary <- grid$CNV.start[long$grid.id]
upper_boundary <- grid$CNV.end[long$grid.id]
long$AUC <- abs(2.0 - long$TYPE) * abs(upper_boundary - lower_boundary)
long <- long[!duplicated(long), ]
long[order(long$deldup, long$grid.id), ]
}
#' Convert CNV Records to Grids/fragments
#'
#' @noRd
#' @param CNV A data.frame in PLINK format. Specifically, must contain
#' columns:
#' \itemize{
#' \item "ID": character, unique identity for each sample
#' \item "CHR": integer, range 1-22
#' \item "BP1": integer, CNV event starting position
#' \item "BP2": integer, CNV event ending position, each record must have BP1 <= BP2, i.e., data at least 1bp (or data can be other unit length)
#' \item "TYPE": integer, range 0, 1, 3, 4, and larger allowed, 2 is not allowed.
#' }
#'
#' @returns A list object with elements
#' \itemize{
#' \item long.cnv CNV data converted to long format
#' \itemize{
#' \item{ID}{The sample ID}
#' \item{CHR}{The chromosome under analysis}
#' \item{grid.id}{The id of a grid unit}
#' \item{TYPE}{The CNV value for the grid unit}
#' \item{deldup}{A character, del/dup, indicating if deletion or duplication}
#' \item{AUC}{Numeric AUC value}
#' }
#' \item grid.info A data.frame. Links the grid information to the CNV data
#' \itemize{
#' \item CHR The chromosome
#' \item id The grid unit id
#' \item CNV.start The BP value of the left grid boundary
#' \item CNV.end The BP value of the right grid boundary
#'. }
#' }
#'
#' @include helpful_tests.R
#' @keywords internal
#'
#'
#' Construct CNV fragments for CNVs in PLINK format, spread CNV records to a long table,
#' and output the long-shaped data.frame for all CNV fragments and the fragment boundary information.
.breakCNV <- function(CNV) {
stopifnot(
"`CNV` data.frame in PLINK format with columns: `ID`, `CHR`, `BP1`, `BP2`,`TYPE`" =
!missing(CNV) && .isCNV(CNV)
)
# create grid
grid_info <- .createGrid(CNV)
# create long data format
CNV_frag_l <- .createLongData(CNV = CNV, grid = grid_info)
list("long.cnv" = CNV_frag_l,
"grid.info" = grid_info)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.