#' add repeat masker table annotation
#'
#' This function allows you to add to the annotation table the repeat masker table informations
#' @param samples list of annotated peaks
#' @param table_folder folder where the repeat masker table is, default is 'utils'
#' @keywords repeat masker table
#' @import data.table tidyverse GenomicRanges
#' @export
#' @examples
#' add_repeat_masker_table(samples, table_folder)
add_repeat_masker_table <- function(samples, table_folder = "utils"){
###adding repeat masker annotation
#loading repeat masker table for repeats annotation
###repeat masker annotation table downloaded from UCSC Table Browser: https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=861624999_IsHRXOsBEeXzFUKTc2Yxnwu3wDiL&clade=mammal&org=Human&db=hg38&hgta_group=allTracks&hgta_track=rmsk&hgta_table=0&hgta_regionType=genome&position=chr12%3A54%2C594%2C719-54%2C595%2C565&hgta_outputType=selectedFields&hgta_outFileName=repeat_masker_table
path <- file.path(table_folder, "repeat_masker_table")
repeat_masker_table <- read.table(file = path, sep = '\t', header = TRUE)
names(repeat_masker_table) <- c("seqnames", "start", "end", "strand", "rep_name", "rep_class", "rep_family")
tmp_repeat_masker_table <- GRanges(repeat_masker_table) #converting to GRanges to use findOverlaps function(GRanges package)
tmp_peaks_granges <- lapply(samples, GRanges) #converting to GRanges to use findOverlaps function(GRanges package)
#using GRanges::findOverlaps to find overlaps between our set of peaks and repetitive repeat masker table
repeat_hits <- lapply(tmp_peaks_granges, function(condition){
hits <- findOverlaps(condition, tmp_repeat_masker_table, minoverlap = 3, select = "first")
hits <- as.data.frame(hits)
})
#creating a dataframe of the same length of our annotation dataframe with rep_name and rep_class that will be merged with our annatation dataframe
repeat_hits <- lapply(repeat_hits, function(condition){
temporary_df <- data.frame(matrix(0, ncol = 3, nrow = 0))
names(temporary_df) <- c("repetitive_sequence", "rep_name", "rep_class")
for (line in condition) {
tmp2 <- repeat_masker_table[line,] %>%
mutate(repetitive_sequence = ifelse(is.na(line), as.integer(NA), "yes")) %>%
select(repetitive_sequence, rep_name, rep_class)
temporary_df <- rbind(temporary_df, tmp2)
}
return(temporary_df)
})
#add the information obtained from the repeat masker table as annotation columns
for (condition in names(samples)) {
samples[[condition]] = cbind(samples[[condition]], repeat_hits[[condition]])
}
return(samples)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.