iMKT: Integrative McDonald and Kreitman Test

Documented in DGRP

#' @title DGRP correction method
#'
#' @description MKT calculation corrected using DGRP method (Mackay et al. 2012 Nature).
#'
#' @details In the standard McDonald and Kreitman test, the estimate of adaptive evolution (alpha) can be easily biased by the segregation of slightly deleterious non-synonymous substitutions. Specifically, slightly deleterious mutations contribute more to polymorphism than they do to divergence, and thus, lead to an underestimation of alpha. Because adaptive mutations and weakly deleterious selection act in opposite directions on the MKT, alpha and the fraction of substitutions that are slighlty deleterious, b, will be both underestimated when both selection regimes occur. To take adaptive and slighlty deleterious mutations mutually into account, Pi, the count off segregatning sites in class i, should be separated into the number of neutral variants and the number of weakly deleterious variants, Pi = Pineutral + Pi weak del. Alpha is then estimated as 1-(Pineutral/P0)(D0/Di). As weakly deleterious mutations tend to segregate at low frequencies, neutral and weakly deleterious fractions from Pi can be estimated based on any frequency cutoff established.
#'
#' @param daf data frame containing DAF, Pi and P0 values
#' @param divergence data frame containing divergent and analyzed sites for selected (i) and neutral (0) classes
#' @param listCutoffs list of cutoffs to use (optional). Default cutoffs are: 0, 0.05, 0.1
#' @param plot report plot (optional). Default is FALSE
#' 
#' @return MKT corrected by the DGRP method. List with alpha results, graph (optional), divergence metrics, MKT tables and negative selection fractions
#'
#' @examples
#' ## Using default cutoffs
#' DGRP(myDafData, myDivergenceData)
#' ## Using custom cutoffs and rendering plot
#' DGRP(myDafData, myDivergenceData, c(0.05, 0.1, 0.15), plot=TRUE)
#'
#' @import utils
#' @import stats
#' @import ggplot2
#' @importFrom ggthemes theme_foundation
#' @importFrom cowplot plot_grid
#' @importFrom reshape2 melt 
#'
#' @keywords MKT
#' @export


####################################################
################# MKT-FWW function #################
####################################################

DGRP <- function(daf, divergence, listCutoffs=c(0,0.05,0.1), plot=FALSE) {
  
  ## Check data
  check <- checkInput(daf, divergence, 0, 1)
  if(check$data == FALSE) {
     stop(check$print_errors) }

  ## Declare output lists and data frames
  output <- list()
  mkt_tables <- list()
  fractions <- data.frame(row.names = c("d","f","b"))
  div_metrics <- list()
  div_cutoff <- list()
  
  ## Divergence metrics
  Ka <- divergence$Di/divergence$mi
  Ks <- divergence$D0/divergence$m0
  omega <- Ka/Ks
  div_table <- data.frame(Ka, Ks, omega)
  names(div_table) <- c("Ka", "Ks", "omega")
  
  ## Iterate along cutoffs
  for (cutoff in listCutoffs) {
    
    daf_below_cutoff <- daf[daf$daf <= cutoff, ] ## below DAF
    daf_above_cutoff <- daf[daf$daf > cutoff, ] ## over DAF 
    P0 <- sum(daf$P0) 
    Pi <- sum(daf$Pi) 

    ## Create MKT table 
    mkt_table_standard <- data.frame(Polymorphism = c(sum(daf$P0), sum(daf$Pi)), Divergence=c(divergence$D0,divergence$Di),row.names = c("Neutral class","Selected class"))
    mkt_table <- data.frame(`DAF below cutoff` = c(sum(daf_below_cutoff$P0), sum(daf_below_cutoff$Pi)), `DAF above cutoff`=c(sum(daf_above_cutoff$P0), sum(daf_above_cutoff$Pi)),row.names = c("Neutral class","Selected class"))
    
    ## Estimate fractions
    f_neutral <- mkt_table[1,1]/sum(daf$P0)
    Pi_neutral_below_cutoff <- Pi * f_neutral
    Pi_wd <- mkt_table[2,1] - Pi_neutral_below_cutoff
    Pi_neutral <- round(Pi_neutral_below_cutoff + mkt_table[2,2])
    
    ## Estimation of alpha
    alpha <- 1-((Pi_neutral/P0)*(mkt_table_standard[1,2]/mkt_table_standard[2,2]))
    
    ## Estimation of b: weakly deleterious
    b <- (Pi_wd/P0)*(divergence$m0/divergence$mi)
    
    ## Estimation of f: neutral sites
    f <- (divergence$m0*Pi_neutral)/(as.numeric(divergence$mi)*as.numeric(P0))
    
    ## Estimation of d, strongly deleterious sites
    d <- 1-(f+b)
    
    ## Fisher exact test p-value from the MKT
    m <- matrix(c(P0,Pi_neutral,divergence$D0,divergence$Di), ncol=2)
    pvalue <- fisher.test(m)$p.value
    
    ## Omega A and Omega D
    omegaA <- omega*alpha
    omegaD <- omega-omegaA
    
    ## Fractions
    fraction <-data.frame(c(d,f,b))
    names(fraction) <- cutoff
    fractions <- cbind(fractions,fraction)
    
    ## Store output  
    output[[paste("Cutoff = ",cutoff)]] <- c(cutoff, alpha, pvalue)
    div_cutoff[[paste("Cutoff = ",cutoff)]] <- c(cutoff, omegaA, omegaD)
    mkt_tables[[paste("Number of segregating sites by DAF category - Cutoff = ",cutoff)]]  <- mkt_table
  } 

  ## MKT tables
  mkt_tables[[paste("MKT standard table")]]  <- mkt_table_standard
  
  ## Results table
  output <- as.data.frame(do.call("rbind",output))
  colnames(output) <- c("cutoff", "alpha", "pvalue")
  
  ## Divergence metrics
  div_cutoff <- as.data.frame(do.call("rbind",div_cutoff))
  names(div_cutoff) <- c("cutoff", "omegaA", "omegaD")
  
  ## Render plot
  if (plot == TRUE) {
    
    ## Cut-offs graph
    plot <- ggplot(output, aes(x=as.factor(cutoff), y=alpha, group=1)) +
      geom_line(color="#386cb0") + 
      geom_point(size=2.5, color="#386cb0")+
      themePublication() +
      xlab("Cut-off") + ylab(expression(bold(paste("Adaptation (",alpha,")")))) 
  
    ## Re-format outputs
    output <- output[,c(2,3)]
    names(output) <- c("alpha.symbol","Fishers exact test P-value")
    div_cutoff <- div_cutoff[,c(2,3)]
    colnames(div_cutoff) <- c("omegaA.symbol", "omegaD.symbol")
    div_metrics <- list(div_table, div_cutoff)
    names(div_metrics) <- c("Global metrics", "Estimates by cutoff")
  
    ## Melt fractions data
    fractions_melt <- melt(fractions, id.vars=NULL) 
    fractions_melt$Fraction <-  rep(c("d", "f", "b"),length(fractions_melt$variable)/3)
  
    ## Fractions graph
    plotfraction <- ggplot(fractions_melt) + geom_bar(stat="identity", aes_string(x="variable", y="value", fill="Fraction"), color="black") +
      coord_flip() + themePublication() + ylab(label="Fraction") + xlab(label="Cut-off") +
      scale_fill_manual(values=c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33"), breaks=c("f","d","b"), labels=c(expression(italic("f")),expression(italic("d")),expression(italic("b")))) +
      theme(axis.line=element_blank())  + scale_y_discrete(limit=seq(0,1,0.25), expand=c(0,0))
  
    plot <- plot_grid(plot, plotfraction, nrow=2, labels=c("A","B"), rel_heights=c(2,1))
  
    ## Return list output  
    list_output <- list(output, plot, div_metrics, mkt_tables, fractions)
    names(list_output) <- c("Results","Graph", "Divergence metrics", "MKT tables","Fractions")
  
  ## If no plot to render
  } else if (plot==FALSE) {
      ## Re-format outputs
      output <- output[,c(2,3)]
      names(output) <- c("alpha.symbol","Fishers exact test P-value")
      div_cutoff <- div_cutoff[,c(2,3)]
      colnames(div_cutoff) <- c("omegaA.symbol", "omegaD.symbol")
      div_metrics <- list(div_table, div_cutoff)
      names(div_metrics) <- c("Global metrics", "Estimates by cutoff")
    
      ## Melt fractions data
      fractions_melt <- melt(fractions, id.vars=NULL) 
      fractions_melt$Fraction <-  rep(c("d", "f", "b"),length(fractions_melt$variable)/3)
      
      ## Return list output  
      list_output <- list(output, div_metrics, mkt_tables, fractions)
      names(list_output) <- c("Results", "Divergence metrics", "MKT tables","Fractions")
  }
  
  ## Return output
  return(list_output)
}