Nothing
# a utility function for adding slop to BED coordinates
add.slop <- function(bed, slop) {
bed$start <- bed$start - slop;
bed$end <- bed$end + slop;
# check for negative start coordinates, replace with 0, and issue a warning
if (any(bed$start < 0)) {
bed$start[bed$start < 0] <- 0;
warning('Slop caused negative start coordinates; replacing with 0.');
}
return(bed);
}
#' @title Convert PGS data to BED format
#' @description Convert imported and formatted PGS compnent SNP coordinate data to BED format.
#' @param pgs.weight.data A data.frame containing SNP coordinate data with standardized CHROM and POS columns.
#' @param chr.prefix A logical indicating whether the 'chr' prefix should be used when formatting chromosome name.
#' @param numeric.sex.chr A logical indicating whether the sex chromosomes should be formatted numerically, as opposed to alphabetically.
#' @param slop An integer indicating the number of base pairs to add to the BED interval on either side.
#' @return A data.frame containing the PGS component SNP coordinate data in BED format and any other columns provided in pgs.weight.data.
#' @examples
#' pgs.weight.data <- data.frame(
#' CHROM = c('chr1', 'chrX'),
#' POS = c(10, 20)
#' );
#' convert.pgs.to.bed(pgs.weight.data);
#'
#' # Switch between different chromosome notations
#' convert.pgs.to.bed(pgs.weight.data, chr.prefix = FALSE, numeric.sex.chr = TRUE);
#'
#' # Add slop to BED intervals
#' convert.pgs.to.bed(pgs.weight.data, slop = 10);
#' @export
convert.pgs.to.bed <- function(pgs.weight.data, chr.prefix = TRUE, numeric.sex.chr = FALSE, slop = 0) {
# check that data is a data.frame
if (!is.data.frame(pgs.weight.data)) {
stop('data must be a data.frame');
}
# check that data has CHROM and POS columns
if (!all(c('CHROM', 'POS') %in% colnames(pgs.weight.data))) {
stop('data must have CHROM and POS columns');
}
# check that slop is a non-negative integer
if (!is.numeric(slop) | slop < 0) {
stop('slop must be a non-negative integer');
}
# format chromosome names according to user specifications
pgs.weight.data$CHROM <- ApplyPolygenicScore::format.chromosome.notation(
chromosome = pgs.weight.data$CHROM,
chr.prefix = chr.prefix,
numeric.sex.chr = numeric.sex.chr
);
## assemble BED file ##
# 0-index coordinates
pgs.bed <- data.frame(
chr = pgs.weight.data$CHROM,
start = pgs.weight.data$POS - 1,
end = pgs.weight.data$POS
);
# check for negative start coordinates, report an error
if (any(pgs.bed$start < 0)) {
stop('0-indexing caused negative start coordinates.');
}
# add slop
if (slop > 0) {
pgs.bed <- add.slop(bed = pgs.bed, slop = slop);
}
# concat with the rest of the prs columns
pgs.bed <- cbind(pgs.bed, pgs.weight.data[, !(colnames(pgs.weight.data) %in% c('CHROM', 'POS'))]);
return(pgs.bed);
}
#' @title Combine PGS BED files
#' @description Merge overlapping PGS coordinates in multiple BED files.
#' @param pgs.bed.list A named list of data.frames containing PGS coordinates in BED format.
#' @param add.annotation.data A logical indicating whether an additional annotation data column should be added to the annotation column.
#' @param annotation.column.index An integer indicating the index of the column in the data frames in \code{pgs.bed.list} that should be added to the annotation column.
#' @param slop An integer indicating the number of base pairs to add to the BED interval on either side.
#' @return A data.frame containing the merged PGS coordinates in BED format with an extra annotation column containing the name of the PGS and data from one additional column optionally selected by the user.
#' @examples
#' bed1 <- data.frame(
#' chr = c(1, 2, 3),
#' start = c(1, 2, 3),
#' end = c(2, 3, 4),
#' annotation = c('a', 'b', 'c')
#' );
#' bed2 <- data.frame(
#' chr = c(1, 2, 3),
#' start = c(1, 20, 30),
#' end = c(2, 21, 31),
#' annotation = c('d', 'e', 'f')
#' );
#' bed.input <- list(bed1 = bed1, bed2 = bed2);
#' combine.pgs.bed(bed.input);
#' @export
combine.pgs.bed <- function(pgs.bed.list, add.annotation.data = FALSE, annotation.column.index = 4, slop = 0) {
# check that pgs.bed.list is a named list
if (!is.list(pgs.bed.list) | is.null(names(pgs.bed.list))) {
stop('pgs.bed.list must be a named list');
}
# check that all elements of pgs.bed.list are data.frames
if (!all(sapply(pgs.bed.list, is.data.frame))) {
stop('all elements of pgs.bed.list must be data.frames');
}
# check that all elements of pgs.bed.list have the same column names
if (!all(sapply(pgs.bed.list, function(x) all(colnames(x) == colnames(pgs.bed.list[[1]]))))) {
stop('all elements of pgs.bed.list must have the same column names');
}
# check that each element of pgs.bed.list is in BED format (chr, start, end)
if (!all(sapply(pgs.bed.list, function(x) all( c('chr', 'start', 'end') %in% colnames(x))))) {
stop('all elements of pgs.bed.list must have columns named chr, start, and end');
}
# check that all intervals specified in pgs.bed.list are one bp in length
if (!all(sapply(pgs.bed.list, function(x) all(x$start == x$end - 1)))) {
stop('all intervals specified in pgs.bed.list must represent one SNP and be one bp in length');
}
# check that slop is a non-negative integer
if (!is.numeric(slop) | slop < 0) {
stop('slop must be a non-negative integer');
}
if (add.annotation.data) {
# check that annotation.column.index is within the range of the number of columns in the data.frames in pgs.bed.list
if (annotation.column.index > ncol(pgs.bed.list[[1]])) {
stop('annotation.column.index must be within the range of the number of columns in the data.frames in pgs.bed.list');
}
}
# Annotate each PGS BED file with the name of the PGS
for (i in 1:length(pgs.bed.list)) {
pgs.bed.list[[i]]$annotation <- names(pgs.bed.list)[i];
}
# concatenate all BED files in list
concatenated.bed <- do.call(rbind, pgs.bed.list);
# add requested annotation data to annotation column
if (add.annotation.data) {
concatenated.bed$annotation <- paste(concatenated.bed[ , annotation.column.index], concatenated.bed$annotation, sep = '|');
}
# sort by chromosome and position
concatenated.bed <- concatenated.bed[order(concatenated.bed$chr, concatenated.bed$start), ];
# merge overlapping SNP intervals and combine overlapping annotations using aggregate
merged.bed <- aggregate(annotation ~ chr + start + end, data = concatenated.bed, FUN = paste, collapse = ',');
# sort by chromosome and position
merged.bed <- merged.bed[order(merged.bed$chr, merged.bed$start), ];
# add slop
if (slop > 0) {
merged.bed <- add.slop(bed = merged.bed, slop = slop);
}
# return merged BED file
return(merged.bed);
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.