R/draw.genome.annotations.R

Defines functions show.genome.annotation.plot

Documented in show.genome.annotation.plot

#' Draw genome annotation features
#'
#' This function reads a reference genome in GFF format and then plots the
#' genetic features (coding sequences) on both forward and reverse strands.
#'
#' @param genome.name Path to the input file name.
#' @param genome.start Start position of the genome to show in the plot.
#' @param genome.end End position of the genome to show in the plot.
#' @param genome.start.upstream Start drawing the genome plot from the specified bases upstream of the genome.
#' @param genome.end.downstream End drawing the genome plot from the specified bases upstream of the genome.
#' @param show.gene.label Show genetic feature label.
#' @param gene.feature.width Width of the genetic features.
#'
#' @examples
#' \dontrun{
#' Read genome in GFF formatted file (generated usign readseq) and plot the genomic features
#'
#' ref.genome.gff<-system.file("extdata", "Hungary19A-6.gff", package = "RCandy",mustWork = TRUE)
#'
#' show.genome.annotation.plot(ref.genome.gff)
#' }
#'
#' @return None
#'
#' @import viridis
#' @import shape
#' @import magrittr
#' @import dplyr
#' @import grDevices
#' @import graphics
#'
#' @author Chrispin Chaguza, \email{Chrispin.Chaguza@@gmail.com}
#' @references \url{https://github.com/ChrispinChaguza/RCandy}
#'
## Generate a genome annotation plot using a reference genome GFF file
## (ideally generated by readseq)

show.genome.annotation.plot<-function(genome.name,
                                      genome.start=NULL,
                                      genome.end=NULL,
                                      genome.start.upstream=0,
                                      genome.end.downstream=0,
                                      show.gene.label=FALSE,
                                      gene.feature.width=1.5){

  # Set default value for genome length (assume it's not explicitly specified)
  ref.genome.length<-NULL
  # Check if a valid reference genome name is provided
  if( !is.null(genome.name) & is.character(genome.name) ){
    # Same coordinates for the genome region to show, default whole genome
    reference.genome.obj1<-load.genome.GFF(genome.name)
    if( "source" %in% reference.genome.obj1$feature ){
      # Extract reference genome length from the source line in the GFF file
      ref.genome.length<-reference.genome.obj1[reference.genome.obj1$feature=="source",]$end
      # Filter out lines not containing information about the genetic features
      reference.genome.obj<-reference.genome.obj1 %>% dplyr::filter(.data$feature!="source") # %>% dplyr::filter(feature %in% c("locus_tag","CDS","gene"))
    }else{
      # Exit the program when valid genome length is found
      stop("Could not find a feature labelled 'source' in the genome annotation file")
    }
  }else{
    # Check if the reference genome GFF annotation data is provided in a data frame
    if( length(setdiff(class(genome.name),c("tbl_df","tbl","data.frame","rowwise_df","grouped_df")))==0 ){
      if( "source" %in% genome.name$feature ){
        # Extract reference genome length from the source line in the GFF file
        ref.genome.length<-genome.name[genome.name$feature=="source",]$end

        # Filter out lines not containing information about the genetic features
        reference.genome.obj<-genome.name %>% dplyr::filter(.data$feature!="source") # %>% dplyr::filter(feature %in% c("locus_tag","CDS","gene"))
      }else{
        # Exit the program, when no valid genome length is found
        stop("Could not find a feature labelled 'source' in the genome annotation file")
      }
    }else{
      # Exit the program, possibly invalid GFF annotation file
      stop("Something is wrong with the GFF genome anotation file")
    }
  }

  # Specify the genomic regions to show genomic features
  if( is.null(genome.start) | is.null(genome.end) ){
    genome.start<-0
    genome.end<-ref.genome.length
  }

  xlim.vals<-c(genome.start-genome.start.upstream,genome.end+genome.end.downstream)

  # Plot the genome annotation features
  plot(genome.start,genome.end,las=1,xlim=xlim.vals,ylim=c(0,5),xaxs="i",yaxs="r",
       bty="n",xaxt="n",yaxt="n",xlab="",ylab="",col=rgb(0,0,0,alpha=0))

  # Plot the gene features on different lines for the forward and reverse strand
  # Genes randomly assigned different colours for clarity
  Arrows(x0=ifelse(reference.genome.obj$strand=="+",reference.genome.obj$start,reference.genome.obj$end),
         y0=ifelse(reference.genome.obj$strand=="+",4.0,0.5),
         x1=ifelse(reference.genome.obj$strand=="+",reference.genome.obj$end,reference.genome.obj$start),
         y1=ifelse(reference.genome.obj$strand=="+",4.0,0.5),
         arr.type="triangle",arr.width=0.25,arr.length=0.10,
         col=sample(viridis::inferno(length(reference.genome.obj$start)),
                    size=length(reference.genome.obj$start),replace=TRUE),
         lty=1,lwd=gene.feature.width,arr.lwd=gene.feature.width)

  # Show a horizontal line (genome) between the forward and reverse strands
  segments(xlim.vals[1],2.25,xlim.vals[2],2.25,lwd=1.01,lty=4)

  # Show gene labels in the plot
  if( show.gene.label ){
    reference.genome.obj2<-reference.genome.obj %>%
      dplyr::filter(.data$start>=as.vector(xlim.vals[1]) & .data$end<=as.vector(xlim.vals[2]))
    text((reference.genome.obj2$start+reference.genome.obj2$end)/2,4,
         labels=reference.genome.obj2$gene,col="black",srt=45,adj=0)
  }
}
ChrispinChaguza/RCandy documentation built on June 23, 2022, 1:03 p.m.