#' Count number of overlapping recombination events at each genomic position
#'
#' This function reads a GFF file or data frame containing identified
#' recombination events in the genome identified by Gubbins, and counts the frequency of recombination
#' events at each genomic position i.e. number of unique overlapping recombination events. The
#' data frame can be generated using the "load.gubbins.rec.events.gff" function
#'
#' @param gubbins.gff.file Path to the input Gubbins GFF recombination file or data frame
#' @param recom.input.type Type of input recombination data, either "Gubbins" GFF or "BRATNextGen" tabular data.
#'
#' @return A data frame containing number of unique recombination events at genomic positions where recombination events were identified
#'
#' @examples
#' \dontrun{
#' Read genome in GFF formatted file (generated usign readseq) and plot
#' the genomic features
#'
#' This function may take some minutes to finish depending on the number
#' of recombination events identifiedand genome size
#'
#' gubbins.gff<-system.file("extdata", "ST320.recombination_predictions.gff",
#' package = "RCandy",mustWork = TRUE)
#'
#' rec.freq<-count.rec.events.per.base(gubbins.gff)
#' }
#'
#' @export
#'
#' @import magrittr
#' @import dplyr
#'
#' @author Chrispin Chaguza, \email{Chrispin.Chaguza@@gmail.com}
#' @references \url{https://github.com/ChrispinChaguza/RCandy}
#'
#### Function to count the number of recombination events per base in the genome ####
count.rec.events.per.base<-function(gubbins.gff.file,recom.input.type="Gubbins"){
if( !recom.input.type %in% c("Gubbins","BRATNextGen") ){
stop("Invalid recombination data specified. Choose from 'Gubbins' or 'BRATNextGen'")
}
# Check if the Gubbins GFF recombination file name is provided and is a string/character
if( !is.null(gubbins.gff.file) & is.character(gubbins.gff.file) ){
# Read the recombination events from the Gubbins GFF file
# Check if the input data was generated by Gubbins (GFF file) or BRATNextGen (tabular file)
if( recom.input.type=="Gubbins" ){
rec.count.tmp<-load.gubbins.GFF(gubbins.gff.file)
}else{
rec.count.tmp<-load.gubbins.GFF(gubbins.gff.file,recom.input.type="BRATNextGen")
}
}else{
# Check if the Gubbins GFF recombination events are provided via a data frame rather than a file
if( length(setdiff(class(gubbins.gff.file), c("tbl_df","tbl","data.frame","rowwise_df")))==0 ){
rec.count.tmp<-gubbins.gff.file
}else{
# Exit the program, invalid recombination event data provided
stop("Something is wrong with the Gubbins recombination file")
}
}
# Define variable to store each genomic position containing each recombination event
temp.vals<-data.frame(XX=0)
# Identify genomic positions with recombination events
for(count.val in 1:length(rec.count.tmp$SEQ)){
if( count.val==1 ){
tmp.fr.dat<-data.frame(XX=seq(rec.count.tmp[count.val,]$START,rec.count.tmp[count.val,]$END,1))
temp.vals<-tmp.fr.dat
}else{
tmp.fr.dat<-data.frame(XX=seq(rec.count.tmp[count.val,]$START,rec.count.tmp[count.val,]$END,1))
temp.vals<-rbind(temp.vals,tmp.fr.dat)
}
}
# Count number of overlapping recombination events at each genomic position
temp.vals.fr<-data.frame(XX=temp.vals$XX) %>% dplyr::as_tibble() %>%
dplyr::group_by(.data$XX) %>% dplyr::add_count(name="FRQ") %>%
dplyr::mutate(POS=.data$XX,GRP=1) %>% dplyr::ungroup() %>%
dplyr::select(-.data$XX) %>% dplyr::arrange(.data$POS) %>% distinct()
# Return a data frame containing genomic position and number of recombination events identified
return(temp.vals.fr)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.