R/rsb.R

Defines functions rsb

Documented in rsb

#' @title Computes Rsb Statistics (standardized cross-population iES)
#'
#' @description
#' This function computes Rsb Statistics (standardized cross-population EHH) using two populations
#' and target-SNPs, then performs the SNPs' annotation and generate appropriate tables and graphics.
#'
#' @usage rsb(pop1, pop2, popname1, popname2, snp.list = "all", filter = 2, method = "both",
#' annot = T, write.xls = "both", plot = T)
#'
#' @param pop1 Scalar character with the name of the first population to be compared to the second.
#' @param pop2 Scalar character with the name of the second population to be compared to the first one.
#' @param popname1 Scalar character with an optional name for the first population.
#' @param popname2 Scalar character with an optional name for the second population.
#' @param snp.list Character vector. It could be a scalar named "all" (default) for loading all SNPs
#' present in 'snps' directory, or a character vector with the exact names from the target-SNP files
#' (also supposed to be in 'snps' directory).
#' @param filter Numerical value for filtering significant Rsb scores (default = 2).
#' @param method Scalar character. Indicates if applied method must be "bilateral", "unilateral",
#' or "both" (default).
#' @param annot Logical scalar. If TRUE (default), SNP annotation in xls files will be written with
#' correspondent SNP's genes, as well as ancestral and derived allele present in the dataset.
#' FALSE turns off this feature.
#' @param write.xls Scalar character. It could be set to "all.snps" for annotating all SNPs included
#' in this analysis, and "ss.snps" for annotating only SNPs statistically significants
#' (defined by 'filter' argument), or 'both' (default) for both annotation outputs.
#' @param plot Logical scalar. If TRUE (default), then Rsb-related plots will be generated and saved
#' as png figures.
#'
#' @details
#' R binary file named 'scanhh.RData' must be loaded before running this function. Populations going
#' to be analyzed must be provided as 'scanhh_list$NAME', where 'NAME' is the population name (see @@Examples
#' section). This function creates a new directory in 'rehh_out' folder, named 'rsb', with two sub-directories:
#' 'rsb/graphics' for the plots, and 'rsb/tables' for csv and xls files.
#'
#' The 'method' parameter indicates which method will be applied ('bilateral', 'unilateral', or 'both';
#' default = 'both'). The default value for 'filter' parameter has been set to 2 accordingly the
#' standard value described on original paper.
#'
#' The 'annot' parameter turns on or off annotation procedure (default is TRUE, meaning "on"), while argument
#' passed in 'write.xls' parameter tells which SNPs must be included on analysis ("all.snps", "ss.snps"
#' or "both"). Plot argument enables the generation of two plots/figures per population.
#'
#' If all parameter's arguments are used as they are by default, then a total of four simple files (.csv)
#' containing Rsb results will be generated by analysis to both methods, as well as four .xls files from
#' 'write.xls' parameter.
#'
#' Except by annotation argument ('annot = T'), this function usually runs very fast. However,
#' depending on how many target-SNPs are present in the dataset, the annotation velocity may vary considerably.
#'
#' @return Rsb files: simple result files (.csv), SNP annotation (.xls) and/or Rsb plots (.png).
#' @seealso \code{\link{make.scanhh}}
#'
#' @examples
#' \dontrun{
#' load('scanhh.RData')
#' rsb(pop1 = scanhh_list$Africans, pop2 = scanhh_list$Europeans)}
#'
#' \dontrun{
#' load('scanhh.RData')
#' rsb(pop1 = scanhh_list$Africans, pop2 = scanhh_list$Europeans), snp.list = 'all', filter = 2,
#' method = 'unilateral', annot = T, write.xls = 'ss.snps', plot = T)
#' }
#'
#' @export
#'

rsb <- function(pop1, pop2, popname1, popname2, snp.list = NULL, filter = 2L, method = "bilateral", annot = T, write.xls = "ss.snps",
                 plot = T, plot.format = "png") {
  
  if(!plot %in% c(T,F))
    stop("plot must be TRUE or FALSE.")
  
  if(!annot %in% c(T,F))
    stop("annot must be TRUE or FALSE.")
  
  if (missing(pop1)) {
    stop("You must specify the populations in 'scanhh.list' file.\nExample:\n> load(scanhh.list)\n> pop1 = scanhh_list$population1")
  }
  
  if (missing(pop2)) {
    stop("You must specify the populations in 'scanhh.list' file.\nExample:\n> load(scanhh.list)\n> pop2 = scanhh_list$population2")
  }
  
  if (missing(popname1)) {
    popname1 <- comment(pop1)
  }
  
  if (missing(popname2)) {
    popname2 <- comment(pop2)
  }
  
  if (!method %in% c("unilateral","bilateral","both"))
    stop('method must be "unilateral", "bilateral", or "both".')
  
  if (!plot.format %in% c("png", "jpeg", "tiff", "pdf", "svg", "ps", "win"))
    stop('File type not supported! Supporterd formats: "png", "jpeg", "tiff", "pdf", "svg", "ps", and "win".')
  
  # Loading Target-SNPs
  if (is.null(snp.list)) {
    snp.list <- list.files(path = "snps/", all.files = TRUE)
    snp.list <- grep(pattern = "[[:alnum:]]", x = snp.list, value = TRUE)
    snp.list.data <- unlist(lapply(X = snp.list, FUN = function(x) readLines(paste0("snps/", x))))
  } else {
    snp.list.data <- unlist(lapply(X = snp.list, FUN = function(x) readLines(paste0("snps/", x))))
  }
  
  # Check if there are any target-SNP on dataset
  dataSNPs <- row.names(scanhh.list[[1]])
  available_SNPs <- intersect(dataSNPs, snp.list.data)
  if ( length(available_SNPs) == 0L ) stop("There are no target-SNPs in the dataset.")
  
  # Computing Rsb Statistics as described by Tang et al. 2007
  cat("\n Computing Rsb Statistics... \n")
  dir.create(path = "rehh_out/rsb", showWarnings = F)
  dir.create(path = "rehh_out/rsb/tables", showWarnings = F)
  
  if (method == "bilateral" || method == "both") {
    
    rsb.bi <- rehh::ies2rsb(pop1, pop2, popname1, popname2, method = "bilateral")
    
    rsb.bi.snp.list <- rsb.bi[row.names(rsb.bi) %in% snp.list.data, ]
    if(nrow(rsb.bi.snp.list) == 0L) stop("\tNo target-SNPs present in this dataset.")
    rsb.bi.snp.list <- rsb.bi.snp.list[complete.cases(rsb.bi.snp.list), ]
    if(nrow(rsb.bi.snp.list) == 0L) stop("\tNo target-SNPs with available statistics are present in this dataset.")
    
    rsb.bi.snp.list.stat <- rsb.bi.snp.list[rsb.bi.snp.list[, 3] >= filter | rsb.bi.snp.list[, 3] <= -filter, ]
    
    data.table::fwrite(x = rsb.bi.snp.list, file = paste0("./rehh_out/rsb/tables/", popname1, ".vs.", popname2, ".rsb.bi.csv"),
                       quote = F, row.names = T, col.names = T)
    data.table::fwrite(x = rsb.bi.snp.list.stat, file = paste0("./rehh_out/rsb/tables/", popname1, ".vs.", popname2, ".rsb.bi.stat.csv"),
                       quote = F, row.names = T, col.names = T)
  }
  
  if (method == "unilateral" || method == "both") {
    
    rsb.uni <- rehh::ies2rsb(pop1, pop2, popname1, popname2, method = "unilateral")
    
    rsb.uni.snp.list <- rsb.uni[row.names(rsb.uni) %in% snp.list.data, ]
    if(nrow(rsb.uni.snp.list) == 0L) stop("\tNo target-SNPs present in this dataset.")
    rsb.uni.snp.list <- rsb.uni.snp.list[complete.cases(rsb.uni.snp.list), ]
    if(nrow(rsb.uni.snp.list) == 0L) stop("\tNo target-SNPs with available statistics are present in this dataset.")
    
    rsb.uni.snp.list.stat <- rsb.uni.snp.list[rsb.uni.snp.list[, 3] >= filter | rsb.uni.snp.list[, 3] <= -filter, ]
    
    data.table::fwrite(rsb.uni.snp.list, file = paste0("./rehh_out/rsb/tables/", popname1, ".vs.", popname2, ".rsb.uni.csv"),
                       quote = F, row.names = T, col.names = T)
    data.table::fwrite(rsb.uni.snp.list.stat, file = paste0("./rehh_out/rsb/tables/", popname1, ".vs.", popname2, ".rsb.uni.stat.csv"),
                       quote = F, row.names = T, col.names = T)
  }
  
  ### Generating graphics ###
  
  if (plot) {
    
    cat(" Plotting rsb results... \n\n")
    dir.create(path = "rehh_out/rsb/graphics", showWarnings = FALSE)
    
    if(exists("rsb.bi")) {
      filename <- paste0("rehh_out/rsb/graphics/", popname1, ".vs.", popname2, ".rsbplot.bi")
      rsbplot(rsb.data = rsb.bi, file.name = filename, file.type = plot.format, main = names(rsb.bi.snp.list)[3])
    }
    
    if(exists("rsb.uni")) {
      filename <- paste0("rehh_out/rsb/graphics/", popname1, ".vs.", popname2, ".rsbplot.uni")
      rsbplot(rsb.data = rsb.uni, file.name = filename, file.type = plot.format, main = names(rsb.uni.snp.list)[3])
    }
    
  }
  
  # SNP Annotation
  
  if (annot) {
    load("haplotypes/haps-sample.RData")
    haps.info.alleles <- data.table::rbindlist(haps.info)
    
    if (method == "bilateral" || method == "both") {
      if(exists("rsb.bi.snp.list")) {
        
        if (write.xls == "all.snps" || write.xls == "both") {
          if( nrow(rsb.bi.snp.list) > 0L ) {
            alleles.state <- haps.info.alleles[haps.info.alleles$V2 %in% rownames(rsb.bi.snp.list), ]
            snpgenes <- snp.annot(rownames(rsb.bi.snp.list))
            rsb.bi.snp.list.ncbi.xls <- cbind(CHR = rsb.bi.snp.list$CHR, SNP = rownames(rsb.bi.snp.list),
                                              Allele_A = alleles.state$V4, Allele_D = alleles.state$V5, GENE = snpgenes, rsb.bi.snp.list[2:4])
            file.name <- paste0("rehh_out/rsb/tables/", popname1, ".vs.", popname2, ".rsb.bi.all.xls")
            WriteXLS::WriteXLS(rsb.bi.snp.list.ncbi.xls, ExcelFileName = file.name, SheetNames = "Rsb Analysis", AdjWidth = T, BoldHeaderRow = T)
            cat(paste0(paste0("File '", popname1, ".vs.", popname2, ".rsb.bi.all.xls"),"' successfully saved into 'rehh_out/rsb/tables/' directory \n"))
          } else {
            cat("\tNo SNPs have been found in this dataset (method = \"bilateral\" & write.xls = \"all.snps\")\n")
          } 
        }
        
        if (write.xls == "ss.snps" || write.xls == "both") {
          if( nrow(rsb.bi.snp.list.stat) > 0L ) {
            alleles.state <- haps.info.alleles[haps.info.alleles$V2 %in% rownames(rsb.bi.snp.list.stat), ]
            snpgenes <- snp.annot(rownames(rsb.bi.snp.list.stat))
            rsb.bi.snp.list.stat.ncbi.xls <- cbind(CHR = rsb.bi.snp.list.stat$CHR, SNP = rownames(rsb.bi.snp.list.stat),
                                                   Allele_A = alleles.state$V4, Allele_D = alleles.state$V5, GENE = snpgenes, rsb.bi.snp.list.stat[2:4])
            file.name <- paste0("rehh_out/rsb/tables/", popname1, ".vs.", popname2, ".rsb.bi.stat.xls")
            WriteXLS::WriteXLS(rsb.bi.snp.list.stat.ncbi.xls, ExcelFileName = file.name, SheetNames = "Rsb Analysis", AdjWidth = T, BoldHeaderRow = T)
            cat(paste0(paste0("File '", popname1, ".vs.", popname2, ".rsb.bi.all.xls"),"' successfully saved into 'rehh_out/rsb/tables/' directory \n"))
          } else {
            cat("\tNo SNPs have been found in this dataset (method = \"bilateral\" & write.xls = \"all.snps\")\n")
          }
          
        }
      }
    }
    
    if (method == "unilateral" || method == "both") {
      if(exists("rsb.uni.snp.list")) {
        
        if (write.xls == "all.snps" || write.xls == "both") {
          if( nrow(rsb.uni.snp.list) > 0L ) {
            alleles.state <- haps.info.alleles[haps.info.alleles$V2 %in% rownames(rsb.uni.snp.list), ]
            snpgenes <- snp.annot(rownames(rsb.uni.snp.list))
            rsb.uni.snp.list.ncbi.xls <- cbind(CHR = rsb.uni.snp.list$CHR, SNP = rownames(rsb.uni.snp.list),
                                               Allele_A = alleles.state$V4, Allele_D = alleles.state$V5, GENE = snpgenes, rsb.uni.snp.list[2:4])
            file.name <- paste0("rehh_out/rsb/tables/", popname1, ".vs.", popname2, ".rsb.uni.all.xls")
            WriteXLS::WriteXLS(rsb.uni.snp.list.ncbi.xls, ExcelFileName = file.name, SheetNames = "Rsb Analysis", AdjWidth = T, BoldHeaderRow = T)
            cat(paste0(paste0("File '", popname1, ".vs.", popname2, ".rsb.uni.all.xls"),"' successfully saved into 'rehh_out/rsb/tables/' directory \n"))
          } else {
            cat("\tNo SNPs have been found in this dataset (method = \"unilateral\" & write.xls = \"all.snps\")\n")
          }
        }
        
        if (write.xls == "ss.snps" || write.xls == "both") {
          if( nrow(rsb.uni.snp.list.stat) > 0L ) {
            alleles.state <- haps.info.alleles[haps.info.alleles$V2 %in% rownames(rsb.uni.snp.list.stat), ]
            snpgenes <- snp.annot(rownames(rsb.uni.snp.list.stat))
            rsb.uni.snp.list.stat.ncbi.xls <- cbind(CHR = rsb.uni.snp.list.stat$CHR, SNP = rownames(rsb.uni.snp.list.stat),
                                                    Allele_A = alleles.state$V4, Allele_D = alleles.state$V5, GENE = snpgenes, rsb.uni.snp.list.stat[2:4])
            file.name <- paste0("rehh_out/rsb/tables/", popname1, ".vs.", popname2, ".rsb.uni.stat.xls")
            WriteXLS::WriteXLS(rsb.uni.snp.list.stat.ncbi.xls, ExcelFileName = file.name, SheetNames = "Rsb Analysis", AdjWidth = T, BoldHeaderRow = T)
            cat(paste0(paste0("File '", popname1, ".vs.", popname2, ".rsb.uni.stat.xls"),"' successfully saved into 'rehh_out/rsb/tables/' directory \n"))
          } else {
            cat("\tNo SNPs have been found in this dataset (method = \"unilateral\" & write.xls = \"all.snps\")\n")
          }
        }
      }
    }
  }
}
cmcouto-silva/LD documentation built on Sept. 20, 2020, 2:46 p.m.