R/IGVsnap.R

Defines functions IGVsnap

Documented in IGVsnap

#' @title Script generator for Integrative Genomics Viewer (IGV) batch tasks.
#'
#' @description The function builds a script file that can be run on IGV to generate multiple screenshots at specific genomic regions.
#'
#' @param loci_vector Either a gene name vector (e.g. \code{c("Gapdh", "Spi1", ...)}) or a regions vector (eg. \code{c('chr1:253000-256503', ...)}. All IGV formats are allowed.
#' @param input_type Define the input type. Allowed values are \code{genes} and \code{regions}.
#' @param biomart Defines the \code{biomart} parameter for \code{biomaRt} package, by default \code{ensembl}.
#' @param dataset Defines the \code{dataset} parameter for \code{biomaRt} package, by default \code{mmusculus_gene_ensembl}.
#' @param reference_genome [optional] Defines the genome to use, e.g. "mm9", "mm10", "hg19", "hg38", ... . By default \code{NULL}.
#' @param fivePrime Numeric value to define of how many base-pairs (bp) expand from full gene position at it's 5'-end, default 1000bp.
#' @param threePrime Numeric value to define of how many base-pairs (bp) expand from full gene position at it's 3'-end, default 1000bp.
#' @param snap_names [optional] String vector to define the names of images (without extention), by default uses \code{loci_vector}.
#' @param IGV_batch_file String for the batch_script_file_name/path, by default \code{<working_directory>/IGV_batch.txt}.
#' @param snap_image_format String to define the format of the images, e.g. "png", "jpeg", "svg", ... . By default \code{png}.
#' @param snap_directory String for the output directory for the snapshoots. By default <working_directory>.
#' @param maxPanelHeight Numeric value to define the height in pixel of the IGV pannel that will be captured on IGV. By default \code{1000}.
#' @param delay.interval Sets a delay (sleep) time in milliseconds. The sleep interval is invoked between successive commands. By default \code{10}. helps to give the time to IGV to adapt the view before the snap (such as the autoscale).
#' @param session [optional] FULL path to an IGV session file (session.xml) to use for the images. By default \code{NULL}.
#' @param exit Logical value to indicate whether exit IGV after image capture ended. By default \code{FALSE}.
#'
#' @return Exports a .txt file ready-to-use on IGV.
#'
#' @details To run the script on IGV: Tools > Run Batch Script... > choose the .txt output file from this function. \cr For more info on how batch tasks work on IGV see: \cr \url{https://software.broadinstitute.org/software/igv/PortCommands}.
#'
#' @export IGVsnap
#'
# @import biomaRt
# @import dplyr

######################
## IGVsnap function ##
######################
IGVsnap = function(loci_vector,
                   input_type, # 'genes' or 'regions'
                   biomart = "ensembl",
                   dataset = "mmusculus_gene_ensembl",
                   reference_genome = NULL,
                   fivePrime = 1000, #bp
                   threePrime = 1000,
                   snap_names = NULL,
                   IGV_batch_file = paste(getwd(), "/IGV_batch.txt", sep = ""),
                   snap_image_format = "png",
                   snap_directory = getwd(),
                   maxPanelHeight = 1000,
                   delay.interval = 10,
                   session = NULL,
                   exit = FALSE) {

  #-----------------------------#
  # Check if Rseb is up-to-date #
  Rseb::actualize(update = F, verbose = F)   #
  #-----------------------------#

  # check parameters
    help_message = c(
    "Help for 'IGVsnap' function from 'Rseb' package: \n", "\n",
    "      loci_vector   either a gene name vector [e.g. c('Gapdh', 'Spi1', ...)] or a regions vector [eg. c('chr1:253000-256503', ...). All IGV formats are allowed. \n",
    "       input_type   define whether 'genes' or 'regions'. \n",
    "          biomart   'biomart' parameter for biomaRt package, by default 'ensembl'. \n",
    "          dataset   'dataset' parameter for biomaRt package, by default 'mmusculus_gene_ensembl'. \n",
    " reference_genome   [optional] to define the genome to use 'mm10', 'hg19, ..., 'NULL' by default. \n",
    "        fivePrime   how many bases [bp] exapand from full gene position at it's 5'-end, default 1000bp. \n",
    "       threePrime   how many bases [bp] exapand from full gene position at it's 3'-end, default 1000bp. \n",
    "       snap_names   [optional] names of images (without extention), by default uses 'loci_vector'. \n",
    "   IGV_batch_file   batch_script_file name/position, by default <working_directory>/IGV_batch.txt. \n",
    "snap_image_format   image format such as 'png', 'jpeg', 'svg', ..., by default 'png'. \n",
    "   snap_directory   output directory for snapshoots, by default <working_directory>. \n",
    "   maxPanelHeight   heigth in pixel of the IGV pannel that will be captured. \n",
    "   delay.interval   Sets a delay (sleep) time in milliseconds. The sleep interval is invoked between successive commands, default 10. \n",
    "          session   [optional] define an IGV session file .xml to use for the images, 'NULL' by default - USE FULL PATH -. \n",
    "             exit   logical to indicate whether exit IGV after image capture ended, FALSE by default. \n",
    "             help   logical to indicate whether display the help, FALSE by default. \n",
    "\n", "More info at https://software.broadinstitute.org/software/igv/PortCommands")

  if (exists("input_type") == F | exists("loci_vector") == F | (!(input_type %in% c("genes", "regions")))) {return(message(help_message))}

  # Retrieves regions
  if (input_type == "genes") {
    loci_vector = sort(unique(loci_vector))

    # Install packages from bioconductor
    pkg = "biomaRt"
    if (!require(pkg, character.only = TRUE)) {
      BiocManager::install(pkg)
      if(!require(pkg, character.only = TRUE)) stop(paste(pkg,"package not found."))}

    require(biomaRt)
    # to list all the datasets availables
    # listDatasets(useMart("ensembl"))

    # for getBM():
    # EXAMPLES
    # mm10 = useMart("ensembl", dataset="mmusculus_gene_ensembl")
    # listFilters(mm10)     /to filter output
    # listAttributes(mm10)  /output columns

    genome = useMart(biomart = biomart,
                     dataset = dataset)

    # loading gene list
    require(dplyr)
    gene_positions = getBM(mart = genome, # mart object, see "useMart" function
                           attributes = c("chromosome_name", "start_position", "end_position", "strand", "external_gene_name"), # columns in output
                           filters = "external_gene_name", # select only certain values,
                           values = loci_vector) %>% # values for the filter
    arrange(factor(external_gene_name, levels = loci_vector))

    gene_positions$start_position = gene_positions$start_position-fivePrime
    gene_positions$end_position = gene_positions$end_position+threePrime

    IGV_positions = data.frame(position = paste("chr", gene_positions$chromosome_name, ":",
                                                gene_positions$start_position, "-",
                                                gene_positions$end_position,
                                                sep = ""),
                               gene = gene_positions$external_gene_name)

    if (is.null(snap_names)) {snap_names = IGV_positions$gene}
    list = IGV_positions$position

    } else if (input_type == "regions") {
      if (is.null(snap_names)) {snap_names = loci_vector}
      list = loci_vector
    }

  # Initialization/Creation of the batch_file
  write(file = IGV_batch_file,
        x = paste("# IGV_batch_file,", as.character(date())))

  # Insert loading exiting session if required
  if (!is.null(session)) {
    write(file = IGV_batch_file,
          x = paste("load", session),
          append = T)
  } else if (!is.null(reference_genome)){
    write(file = IGV_batch_file,
          x = paste("genome", tolower(reference_genome)),
          append = T)
  }


  # Set pannel size (height in pixel)
  write(file = IGV_batch_file,
        x = paste("maxPanelHeight", maxPanelHeight),
        append = T)


  # Insert output directory for the snapshots
  write(file = IGV_batch_file,
        x = paste("snapshotDirectory", snap_directory),
        append = T)

  for (i in 1:length(list)) {
    # Write the position where to go
    write(file = IGV_batch_file,
          x = paste("goto", list[i]),
          append = T)

    # Write the command to delay interval
    write(file = IGV_batch_file,
          x = paste("setSleepInterval", delay.interval),
          append = T)

    # Write the command to take a snapshot with the name of the gene/position with the chosen extension
    write(file = IGV_batch_file,
          x = paste("snapshot ", snap_names[i], "_snapshot.", snap_image_format, sep =""),
          append = T)
  }

  # Add command to go to whole genome (to indicate that finished)
  write(file = IGV_batch_file,
        x = "goto all",
        append = T)

  # Exit command if required
  if (exit == TRUE) {
    write(file = IGV_batch_file,
          x = "exit",
          append = T)
  }

  message("The following batch script have been generated: \n",
          IGV_batch_file,
          "\n", "\n",
          "The final snapshoot will be generated in the following folder: \n",
          snap_directory)

} # END function
sebastian-gregoricchio/Rseb documentation built on June 14, 2024, 12:22 p.m.