R/gen_exploratory_figures.R

Defines functions gen_exploratory_figures

Documented in gen_exploratory_figures

#' A function for generating explonatroy figures and summary data for KOMP analysis input data
#'
#'
#' This function generate figures and summary text using input data for KOMP downstream analysis (e.g. data generated from gen_downstream_analyis_input())
#'
#' @param data The preprocessed data (data.frame), outliers removed and containing only (phenotypes and predictors).
#' @param domain.name The domain name (string)
#' @param predictor.list The vector containing all predictors for the phenotype domain.
#' @param path.output The path to the directory, where all figures and the summary text file will be stored.
#'
#' @examples
#' OFA <- read.table("OFAOut2.csv", sep=",", header=TRUE)
#' predictor.list <- as.character(meta.data[meta.data$pheno.group=="OFA",]$predictor.list)
#' path.output <- "/path/to/output/"
#' gen_exploratory_figures(OFA, "OFA", predictor.list, path.output)
#' @export
#'

gen_exploratory_figures <- function(data, domain.name, predictor.list, path.output){

  pdf(file=paste0(path.output,domain.name,"_date_vs_genotype.pdf"), height=25)
  print(ggplot(data, aes(Date.of.test.New)) + geom_bar(aes(fill=GenotypeSymbol)) + coord_flip())
  dev.off()

  pdf(file=paste0(path.output,domain.name,"_mutantline_vs_sex.pdf"), height=100)
  print(ggplot(subset(data, Strain.Name!="C57BL/6NJ"), aes(Strain.Name)) + geom_bar(aes(fill=Sex)) + coord_flip())
  dev.off()

  pdf(file=paste0(path.output,domain.name,"_mutantline_vs_genotype.pdf"), height=100)
  print(ggplot(subset(data, Strain.Name!="C57BL/6NJ"), aes(Strain.Name)) + geom_bar(aes(fill=GenotypeSymbol)) + coord_flip())
  dev.off()

  pdf(file=paste0(path.output,domain.name,"_mutantline_vs_date.pdf"), height=100, width=30)
  print(ggplot(subset(data, Strain.Name!="C57BL/6NJ"), aes(Strain.Name)) + geom_bar(aes(fill=Date.of.test.New), colour="black") + coord_flip())
  dev.off()

  if("Room.origin"%in%predictor.list){
    pdf(file=paste0(path.output,domain.name,"_mutantline_vs_room.pdf"), height=100)
    print(ggplot(subset(data, Strain.Name!="C57BL/6NJ"), aes(Strain.Name)) + geom_bar(aes(fill=Room.origin)) + coord_flip())
    dev.off()
  }

  if("Experimenter.ID"%in%predictor.list){
    pdf(file=paste0(path.output,domain.name,"_mutantline_vs_experimenter.pdf"), height=100)
    print(ggplot(subset(data, Strain.Name!="C57BL/6NJ"), aes(Strain.Name)) + geom_bar(aes(fill=Experimenter.ID)) + coord_flip())
    dev.off()
  }

  if("Time.categ"%in%predictor.list){
    pdf(file=paste0(path.output,domain.name,"_mutantline_vs_timecateg.pdf"), height=100)
    print(ggplot(subset(data, Strain.Name!="C57BL/6NJ"), aes(Strain.Name)) + geom_bar(aes(fill=Time.categ)) + coord_flip())
    dev.off()
  }

  if("DayOfWeek"%in%predictor.list){
    pdf(file=paste0(path.output,domain.name,"_mutantline_vs_dayofweek.pdf"), height=100)
    print(ggplot(subset(data, Strain.Name!="C57BL/6NJ"), aes(Strain.Name)) + geom_bar(aes(fill=DayOfWeek)) + coord_flip())
    dev.off()
  }

  if("Month"%in%predictor.list){
    pdf(file=paste0(path.output,domain.name,"_mutantline_vs_month.pdf"), height=100)
    print(ggplot(subset(data, Strain.Name!="C57BL/6NJ"), aes(Strain.Name)) + geom_bar(aes(fill=Month)) + coord_flip())
    dev.off()
  }

  if("Arena"%in%predictor.list){
    pdf(file=paste0(path.output,domain.name,"_mutantline_vs_arena.pdf"), height=100)
    print(ggplot(subset(data, Strain.Name!="C57BL/6NJ"), aes(Strain.Name)) + geom_bar(aes(fill=Arena)) + coord_flip())
    dev.off()
  }

  if("BW"%in%predictor.list){
    pdf(file=paste0(path.output,domain.name,"_mutantline_vs_bw_by_sex.pdf"), height=100)
    print(ggplot(subset(data, Strain.Name!="C57BL/6NJ"), aes(x=Strain.Name, y=BW)) + geom_boxplot(aes(fill=Sex)) + coord_flip())
    dev.off()
  }

  pdf(file=paste0(path.output,domain.name,"_date_scatter_by_genotype.pdf"), width=30)
  for(j in pheno.list){
    print(ggplot(data, aes_string(x="Date.of.test.New", y=j)) + geom_point(aes(colour=factor(GenotypeSymbol)))+theme(axis.text.x = element_text(angle = 90, hjust = 1)))
  }
  dev.off()

  pdf(file=paste0(path.output,domain.name,"_date_scatter_by_sex.pdf"), width=30)
  for(j in pheno.list){
    print(ggplot(data, aes_string(x="Date.of.test.New", y=j)) + geom_point(aes(colour=Sex))+theme(axis.text.x = element_text(angle = 90, hjust = 1)))
  }
  dev.off()

  sink(paste0(path.output,"data_summary_log.txt"), split=TRUE, append=TRUE)
  cat("Phenotype Domain:", domain.name, "\n")
  cat("# of control animals:", nrow(subset(data, Strain.Name=="C57BL/6NJ")), "\n")
  cat("# of mutant animals:", nrow(subset(data, Strain.Name!="C57BL/6NJ")), "\n")
  cat("# of mutant lines:", length(levels(data$Strain.Name))-1, "\n")
  cat("avg # of mutant animals per line:", mean(table(subset(data, Strain.Name!="C57BL/6NJ")$Strain.Name)), "\n")
  cat("\n\n\n")
  sink()

}
dleelab/KompUtils documentation built on May 13, 2017, 3:31 a.m.