R/annovar.R

Defines functions vcf.annovar.annotation download.annovar.db is.annovar.installed

Documented in download.annovar.db is.annovar.installed vcf.annovar.annotation

#' check if ANNOVAR is installed
#' otherwise open the website of ANNOVAR
#'
#' @param path_to_annovar define path to ANNOVAR binaries
#' @param open_browser booleen if TRUE, when ANNOVAR is not detected a web-browser is open to the ANNOVAR's website
#'
#' @importFrom utils browseURL
#'
#' @export
#'
is.annovar.installed <- function(path_to_annovar=NA, open_browser=FALSE){
  message("Detection of ANNOVAR...")

  if(is.na(path_to_annovar)){
    test.annovar <- ((Sys.which("annotate_variation.pl") == "") | (Sys.which("table_annovar.pl") == ""))
  }else{
    test.annovar <- ( !file.exists(paste(path_to_annovar,"annotate_variation.pl",sep="/")) |
                      !file.exists(paste(path_to_annovar,"table_annovar.pl",sep="/"))      )
  }

  if(test.annovar){
    message("Please install ANNOVAR (annotate_variation and table_annovar) in your PATH!")

    if(open_browser){
      browseURL("http://annovar.openbioinformatics.org/")
    }

    return(FALSE)
  }else{
    message("ANNOVAR is detected in your PATH!")
    return(TRUE)
  }
}

#' download ANNOVAR DB for annotation
#' https://doc-openbio.readthedocs.io/projects/annovar/en/latest/user-guide/download/#additional-databases
#'
#' @param force a boolean wheter the function should check if the database is already stored before to download them
#' @param path_to_annovar define path to ANNOVAR binaries
#' @param path_annovar_db a path where the database will be stored
#'
#' @importFrom R.utils gunzip
#' @importFrom data.table fread
#' @importFrom data.table fwrite
#'
#' @export
#'
download.annovar.db <- function(force=FALSE, path_to_annovar=NA, path_annovar_db="DB_annovar"){

  if(!is.annovar.installed(path_to_annovar)){
    stop("ANNOVAR is not installed!")
  }

  if(is.na(path_to_annovar)){
    annotate_variation <- "annotate_variation.pl"
  }else{
    annotate_variation <- paste(path_to_annovar,"annotate_variation.pl", sep="/")
  }

  if(!dir.exists(path_annovar_db)){
    dir.create(path_annovar_db)
  }

  ## RefGene
  if(!file.exists(paste(path_annovar_db,"hg19_refGene.txt", sep="/"))){

    my_annovar_cmd_A <- paste(annotate_variation,
                                "-buildver hg19",
                                "-downdb -webfrom annovar refGene",
                                path_annovar_db)

    system(my_annovar_cmd_A)
  }

  ## dbSNP 150
  if(!file.exists(paste(path_annovar_db,"hg19_avsnp150.txt", sep="/"))){

    my_annovar_cmd_B <- paste(annotate_variation,
                                "-buildver hg19",
                                "-downdb -webfrom annovar avsnp150",
                                path_annovar_db)

    system(my_annovar_cmd_B)
  }

  ## gnomAD 2.1.1
  if(!file.exists(paste(path_annovar_db,"hg19_gnomad211_genome.txt", sep="/"))){

    my_annovar_cmd_C <- paste(annotate_variation,
                                "-buildver hg19",
                                "-downdb -webfrom annovar gnomad211_genome",
                                path_annovar_db)

    system(my_annovar_cmd_C)
  }

  ## clinvar 20190305
  if(!file.exists(paste(path_annovar_db,"hg19_clinvar_20190305.txt", sep="/"))){

    my_annovar_cmd_D <- paste(annotate_variation,
                                "-buildver hg19",
                                "-downdb -webfrom annovar clinvar_20190305",
                                path_annovar_db)

    system(my_annovar_cmd_D)
  }

  ## dbNSFP 3.5a (for PhyloP score)
  if(!file.exists(paste(path_annovar_db,"hg19_dbnsfp35a.txt", sep="/"))){

    my_annovar_cmd_D <- paste(annotate_variation,
                                "-buildver hg19",
                                "-downdb -webfrom annovar dbnsfp35a",
                                path_annovar_db)

    system(my_annovar_cmd_D)
  }

  ## gwasCatalog
  if(!file.exists(paste(path_annovar_db,"hg19_gwasCatalog.txt", sep="/"))){

    my_annovar_cmd_E <- paste(annotate_variation,
                                "-buildver hg19",
                                "-downdb gwasCatalog",
                                path_annovar_db)

    system(my_annovar_cmd_E)
  }

  ## Constraint pLoF Metrics (gnomAD)
  if(!file.exists(paste(path_annovar_db,"morfee.lof_metrics.by_gene.txt", sep="/"))){

    download.file("https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz",
                  paste(path_annovar_db,"gnomad.v2.1.1.lof_metrics.by_gene.txt.gz", sep="/"))

    gunzip(paste(path_annovar_db,"gnomad.v2.1.1.lof_metrics.by_gene.txt.gz", sep="/"))

    lof_gene <- fread(paste(path_annovar_db,"gnomad.v2.1.1.lof_metrics.by_gene.txt", sep="/"))

    colnames(lof_gene)[colnames(lof_gene)=="gene"] <- "#Gene_name" # Rename first column to allow ANNOVAR to detect the header

    col2keep <- c("#Gene_name","exp_lof","pLI","oe_lof","oe_lof_lower","oe_lof_upper")

    fwrite(lof_gene[, col2keep, with = FALSE], sep="\t", na = "NA", quote = FALSE,
           file=paste(path_annovar_db,"morfee.lof_metrics.by_gene.txt", sep="/"))

  }

}


#' vcf annotation with ANNOVAR
#'
#' @param my_raw_vcf a string to define name of raw VCF to annotate
#' @param path_results a path where the annoated VCF will be saved
#' @param path_to_annovar define path to ANNOVAR binaries
#' @param path_annovar_db a path where the database will be stored
#'
#' @export
#'
vcf.annovar.annotation <- function(my_raw_vcf, path_results="MORFEE_results", path_to_annovar=NA, path_annovar_db="DB_annovar"){

  if(!is.annovar.installed(path_to_annovar)){
    stop("ANNOVAR is not installed!")
  }

  if(is.na(path_to_annovar)){
    table_annovar <- "table_annovar.pl"
  }else{
    table_annovar <- paste(path_to_annovar,"table_annovar.pl", sep="/")
  }

  if(!dir.exists(path_results)){
    dir.create(path_results)
  }

  my_annovar_cmd_Z <- paste(table_annovar, my_raw_vcf, paste0(path_annovar_db,"/"),
                              "-buildver hg19",
                              "-out", paste(path_results,gsub(".vcf", "", basename(my_raw_vcf)), sep="/"),
                              "-remove -protocol refGene,gwasCatalog,avsnp150,clinvar_20190305,gnomad211_genome,dbnsfp35a -operation gx,r,f,f,f,f -nastring . -vcfinput",
                              "-xref",paste(path_annovar_db,"morfee.lof_metrics.by_gene.txt", sep="/"))

  print(my_annovar_cmd_Z)

  system(my_annovar_cmd_Z)

}
daissi/MORFEE documentation built on Jan. 9, 2021, 10:52 a.m.