#' Count number of overlapping recombination events at each genomic position
#'
#' This function reads a GFF file or data frame containing identified
#' recombination events in the genome identified by Gubbins, and counts the frequency of recombination
#' events at each genomic position i.e. number of unique overlapping recombination events. The
#' data frame can be generated using the "load.gubbins.rec.events.gff" function
#'
#' @param gubbins.gff.file Path to the input Gubbins GFF recombination file or data frame
#' @param recom.input.type Type of input recombination data, either "Gubbins" GFF or "BRATNextGen" tabular data.
#' @param taxon.names Vector containing taxon names.
#'
#' @return A data frame containing number of unique recombination events at genomic positions where recombination events were identified
#'
#' @examples
#' \dontrun{
#' Read genome in GFF formatted file (generated usign readseq) and plot
#' the genomic features
#'
#' This function may take some minutes to finish depending on the number
#' of recombination events identifiedand genome size
#'
#' gubbins.gff<-system.file("extdata", "ST320.recombination_predictions.gff",
#' package = "RCandy",mustWork = TRUE)
#'
#' rec.freq<-count.rec.events.per.base(gubbins.gff)
#' }
#'
#' @export
#'
#' @import magrittr
#' @import dplyr
#'
#' @author Chrispin Chaguza, \email{Chrispin.Chaguza@@gmail.com}
#' @references \url{https://github.com/ChrispinChaguza/RCandy}
#'
#### Function to count the number of recombination events per base in the genome ####
count.rec.events.per.genome<-function(gubbins.gff.file,recom.input.type="Gubbins",taxon.names){
# Check if valid taxon names are specified
if(is.null(taxon.names) | !is.vector(taxon.names)){
stop("Invalid taxon names specified")
}
# Check type of recombination events and input file name or data
if( !recom.input.type %in% c("Gubbins","BRATNextGen") ){
stop("Invalid recombination data specified. Choose from 'Gubbins' or 'BRATNextGen'")
}
# Check if the Gubbins GFF recombination file name is provided and is a string/character
if( !is.null(gubbins.gff.file) & is.character(gubbins.gff.file) ){
# Read the recombination events from the Gubbins GFF file
# Check if the input data was generated by Gubbins (GFF file) or BRATNextGen (tabular file)
if( recom.input.type=="Gubbins" ){
rec.count.tmp<-load.gubbins.GFF(gubbins.gff.file)
}else{
rec.count.tmp<-load.gubbins.GFF(gubbins.gff.file,recom.input.type="BRATNextGen")
}
}else{
# Check if the Gubbins GFF recombination events are provided via a data frame rather than a file
if( length(setdiff(class(gubbins.gff.file), c("tbl_df","tbl","data.frame","rowwise_df")))==0 ){
rec.count.tmp<-gubbins.gff.file
}else{
# Exit the program, invalid recombination event data provided
stop("Something is wrong with the Gubbins recombination file")
}
}
# Define variable to store each genomic position containing each recombination event
temp.vals<-c()
# Identify genomic positions with recombination events
for(count.val in 1:length(rec.count.tmp$SEQ)){
temp.vals<-c(temp.vals,rec.count.tmp$gene[[count.val]])
}
# Count number of recombinations detected in each genome
temp.vals.fr<-table(temp.vals)
# Count number of overlapping recombination events at each genomic position
temp.vals.fr<-temp.vals.fr[taxon.names]
# Return a data frame containing genomic position and number of recombination events identified
return(temp.vals.fr)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.