#' prepare_annotations
#'
#' Separates the SNPeff annotations found in an annotated and rearranged VCF dataframe (arranged using arrange_data)
#'
#' @name prepare_annotations
#' @param df A rearranged and annotated VCF dataframe
#' @return A dataframe containing each annotation on a separate column
#' @export
#' @examples
#' # Example: Shows the separation of the ANN column based on | delimiter.
#' test <- data.frame( ANN = c("A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P"))
#'
#' # The ANN column will be split based on the strings in `snpeff_info()` and
#' # an additional "error" column.
#' snpeff_info()
#'
#' # Split the SNPeff annotations in "ANN" column and save to dataframe `df`
#' df <- prepare_annotations(df)
#'
#' # The one "ANN" column is split into 16 columns
#' dim(test)
#' dim(df)
#'
prepare_annotations = function(df){
if (!"ANN" %in% colnames(df)){
message("No annotations present in dataframe - to annotate use SNPeff")
} else{
# names of elements selected from snpeff website
snpeff = snpeff_info()
snpeff2 = c()
for (i in snpeff){
snpeff2 = c(snpeff2, glue::glue("{i}2"))
}
snpeff_multi = c(snpeff, snpeff2, 'errors')
snpeff_length = length(snpeff)
message(">2 annotations: ", list(levels(factor((df %>%
dplyr::filter(lengths(gregexpr("[|]", df$ANN)) > snpeff_length*2) %>%
droplevels())$sample))))
# building just an annotation df
# 16 different features provided by snpeff see website for more info
single_anno = df %>%
dplyr::filter(lengths(gregexpr("[|]", df$ANN)) <= snpeff_length + 1) %>%
droplevels()
single_anno = single_anno %>% tidyr::separate(ANN, c(snpeff, 'errors'), "[|]") %>% droplevels()
# two annotations will be more than 16 elements but less than 45 (which would indicate 3). In total it should be 30 elements
multi_anno = df %>%
dplyr::filter(lengths(gregexpr("[|]", df$ANN)) > snpeff_length + 1 &
lengths(gregexpr("[|]", df$ANN)) < snpeff_length*3) %>%
droplevels()
multi_anno = multi_anno %>% tidyr::separate(ANN, snpeff_multi, "[|]") %>% droplevels()
multi_anno1 = multi_anno %>% dplyr::select(!tidyselect::all_of(snpeff2)) # separate by first annotation
multi_anno2 = multi_anno %>% dplyr::select(!tidyselect::all_of(snpeff)) # separate by second annotation
colnames(multi_anno2) = c(colnames(multi_anno1)) # change col names to match
multi_anno = rbind(multi_anno1, multi_anno2) # concat annotation dfs to make long df
multi_anno = rbind(single_anno, multi_anno) # concat with single annotations to have all
# deduplicate just in case
multi_anno = multi_anno[!duplicated(multi_anno), ] %>% droplevels()
return(multi_anno)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.