our: genetic data analysis

Documented in plot_phenotypes

#' @title plot phenotypes
#' @description A function that should be chained with get_phenotypes, to
#' produce phenotype relevant knockout plots.
#' @param df a data.farme with columns gene, value, sample, phenotype
#' @param top if the data contains means (col 'mus'), then the top x genes
#' will be selected to be plotted.
#' @param subset.dev Numeric. Subset by means that more extreme than this value (abs).
#' Assumes the data is sorted by t-distribution.
#' @export

plot_phenotypes <- function(df, top = NULL, subset.dev = NULL){

  require(ggplot2)

  if (F){
    # for debugging on server
    df = x[[1]]
    res = plot_phenotypes(df)
    ggsave(filename = '/well/lindgren/flassen/rplot1.png', plot = res, width = 8, height = 10, type = 'cairo')

  }

  # check input
  stopifnot(all(c('gene', 'value','sample','phenotype') %in% colnames(df)))
  pheno = unique(df$phenotype)
  stopifnot(length(pheno)==1)

  # ensure correct type
  df$value <- as.numeric(df$value)
  df$gene <- as.factor(df$gene)
  print(str(df))

  ## get mean difference between knockouts and non-knockouts
  if ('mus' %in% colnames(df)){
    if (!is.null(subset.dev)){
      if (subset.dev>0){
         df <- df[abs(df$mus) > abs(subset.dev),]
      }
    }
    #mean_subset = df[, c('gene','mus')]
    #mean_subset = mean_subset[!duplicated(mean_subset),]
    #mean_subset = mean_subset[rev(order(mean_subset$mus)),]
    #print(mean_subset)
    ## remove the non-top genes
    #genes = unlist(ifelse(!is.null(top), list(mean_subset$gene[1:top]), list(mean_subset$gene)))
    #df = df[df$gene %in% genes,]
  }

  ## plot the data
  p1 = ggplot(df, aes(x=gene, y=value)) +
    geom_jitter(alpha = 0.4) +
    geom_boxplot(fill = 'lightblue', alpha = 0.8) +
    coord_flip() +
    ggtitle(pheno) +
    geom_vline(xintercept = 0, linetype="dotted") +
    theme_bw()

  return(p1)
}