# Evan Henrich
# ehenrich@fredhutch.org
# Gottardo Lab - Fred Hutchinson Cancer Research Center
# January 2017
#----------------------HELPER METHODS--------------------------------------
#' Generates TSV text tables for use in the hipc pipeline from ImmuneSpace data
#'
#' @param studies a vector of characters holding the study names for which to generate eSets
#' @param hai_dir a path for the preprocessed HAI data
#' @param ge_dir a path for the preprocessed GE and demographic data
#' @param rawdata_dir a path for directory to download rawdata into
#' @export
hipc_preprocess <- function(studies, hai_dir, ge_dir, rawdata_dir, orig_params = T){
if(orig_params == F){
yale.anno <- readline(prompt =
"Yale Studies' Gene Annotation: [H = HIPC manuscript, l = R library, m = Illumina manifest] ")
yale.anno <- ifelse(yale.anno %in% c("H", "h", ""), "o", yale.anno)
sdy80.anno <- readline(prompt = "SDY80 Gene Annotation: [H = HIPC manuscript, l = R library] ")
sdy80.anno <- ifelse(sdy80.anno %in% c("H", "h", ""), "o", sdy80.anno)
sdy80.norm <- readline(prompt = "Re-normalize SDY80 GE data? [T / f] ")
sdy80.norm <- ifelse(sdy80.norm %in% c(T, "", "t", "T"), TRUE, FALSE)
}else{
yale.anno <- "o"
sdy80.anno <- "o"
sdy80.norm <- F
}
input_map <- list()
input_map$o <- "original"
input_map$l <- "library"
input_map$m <- "manifest"
yale.anno <- input_map[[yale.anno]]
sdy80.anno <- input_map[[sdy80.anno]]
for(sdy in studies){
print(paste0("Generating GE for ", sdy))
makeGE(sdy,
yale.anno = yale.anno,
sdy80.anno = sdy80.anno,
sdy80.norm = sdy80.norm,
output_dir = ge_dir,
rawdata_dir = rawdata_dir)
print(paste0("Generating HAI for ", sdy))
makeHAI(sdy, output_dir = hai_dir)
print(paste0("Generating Demo for ", sdy))
makeDemo(sdy, output_dir = ge_dir)
}
}
#' Generates rds objects from studies given the study names and directories holding the rawdata.
#'
#' @param studies a vector of characters holding the study names for which to generate eSets
#' @param hai_dir a path for the preprocessed HAI data
#' @param ge_dir a path for the preprocessed GE and demographic data
#' @param rds_dir a path for the output files (rds)
#' @export
hipc_make_rds <- function(studies, hai_dir, ge_dir, rds_dir){
message("Now combining files for each study into rds / eset file")
message("\n")
combined_hai <- combine_hai_data(hai_dir, output_dir = rds_dir)
for(sdy in studies){
make_rds(sdy, ge_dir, combined_hai, output_dir = rds_dir)
}
}
#' Perform HIPC Meta Analysis using given parameters
#'
#' Takes in various parameters related to the cutoff points for gene pathway selection, HAI endpoint selection,
#' adjustments to gene expression data, and output types.
#' @param rds_dir the path for the directory holding the RDS files with esets of each study
#' @param cohort the age cohort, either 'young' or 'old', to use in analysis
#' @param orig_params a boolean that defaults to TRUE, but when set to false allows the user to specify non-original parameters for the analysis
#' @param output_dir the path for the pdf and text outputs of the analysis
#' @export
hipc_meta_analysis <- function(rds_dir, cohort, orig_params = T, output_dir){
if(orig_params == F){
FDR.cutoff <- as.numeric(readline(prompt = "Input FDR.cutoff (orig = 0.5): "))
pvalue.cutoff <- as.numeric(readline(prompt = "Input pvalue.cutoff (orig = 0.01): "))
endPoint <- as.integer(readline(prompt = "Input HAI discretization low % cutoff [20 or 30]: "))
endPoint <- paste0("fc_res_max_d", endPoint)
adjusted <- readline(prompt = "Adjusted = [T / f] ")
adjusted <- ifelse(adjusted %in% c(T, TRUE, "t", t), TRUE, FALSE)
baselineOnly <- readline(prompt = "baselineOnly = [T / f] ")
baselineOnly <- ifelse(baselineOnly %in% c(T, TRUE, "t", t), TRUE, FALSE)
indiv_rds <- readline(prompt = "output individual Rds of gene module analysis? [T / F] ")
indiv_rds <- ifelse(indiv_rds %in% c(T, TRUE, "t", t), TRUE, FALSE)
}else{
FDR.cutoff <- 0.5
pvalue.cutoff <- 0.01
endPoint <- "fc_res_max_d30"
adjusted <- FALSE
baselineOnly <- TRUE
indiv_rds <- FALSE
}
message(paste0("Running meta analysis for ", cohort, " cohort"))
message("\n")
meta_analysis(geneSetDB = geneSetDB,
rds_data_dir = rds_dir,
cohort = cohort,
FDR.cutoff = FDR.cutoff,
pvalue.cutoff = pvalue.cutoff,
endPoint = endPoint,
adjusted = adjusted,
baselineOnly = baselineOnly,
indiv_rds = indiv_rds,
markdown = F,
output_dir = output_dir)
}
#----------------MAIN METHOD-----------------------------------------
#' Runs full pipeline from start to finish with user input required at certain points
#' @importFrom plyr ldply l_ply llply quickdf
#' @importFrom httr GET write_disk
#' @importFrom stringr str_sub str_match str_trim
#' @importFrom data.table fread setnames
#' @importFrom hash hash has.key
#' @importFrom tibble as_tibble
#' @importFrom RCurl getCurlHandle basicTextGatherer curlPerform
#' @importFrom DT datatable
#' @import knitr
#' @import dplyr
#'
#' @return text and pdf files wtih significant gene pathways
#' @export
hipc_full_pipeline <- function(){
bioc_install <- readline(prompt = "Install BioConductor dependencies? [ T / f ] ")
if(bioc_install %in% c(T, "T", "t", "")){
source("https://bioconductor.org/biocLite.R")
biocLite("Biobase")
biocLite("qusage")
biocLite("illuminaHumanv4.db")
biocLite("hugene10sttranscriptcluster.db")
biocLite("hgu133plus2.db")
biocLite("DESeq")
biocLite("preprocessCore")
biocLite("GEOquery")
}
studies <- c("SDY212", "SDY63", "SDY404", "SDY400", "SDY80", "SDY67")
message(paste0("Your working directory is ", getwd()))
directory <- readline(prompt = "Use current working directory or different path? [ T / f ] ")
if(directory %in% c(T, "T", "t", "")){
ImmSig_dir <- file.path(getwd(),"ImmSig_Analysis")
}else if(directory %in% c(F, "F", "f")){
wkdir <- readline(prompt = "Please specify full path for directory where you want to save files: ")
if(!dir.exists(wkdir)){
stop("Directory does not exist. Exiting analysis")
}else{
ImmSig_dir <- file.path(wkdir,"ImmSig_Analysis")
}
}else{
stop("Terminating: direction not understood")
}
message("Creating Subdirectory 'ImmSig_Analysis' for output files")
dir.create(ImmSig_dir)
message(paste0("path to 'ImmSig_Analysis: ", ImmSig_dir))
# 1. Extract and pre-process data from ImmuneSpace, ImmPort, or GEO databases
gen_files <- readline(prompt = "Generate and preprocess rawdata? [T / f] ")
if(gen_files %in% c(T, "T", "t", "")){
message("Creating PreProc_Data directory and sub-directories ... ")
# create preproc dir and subdirectories
pp_dir <- file.path(ImmSig_dir,"PreProc_Data")
dir.create(path = pp_dir)
hai_dir <- file.path(pp_dir,"HAI")
dir.create(path = hai_dir)
ge_dir <- file.path(pp_dir,"GE")
dir.create(path = ge_dir)
rawdata_dir <- file.path(pp_dir,"rawdata")
dir.create(path = rawdata_dir)
orig_params <- readline(prompt = "Use original parameters from HIPC manuscript? [ T / f ] ")
orig_params <- ifelse(orig_params %in% c(T, "", "t", "T"), TRUE, FALSE)
hipc_preprocess(studies, hai_dir, ge_dir, rawdata_dir, orig_params)
}
cat("Moving on to RDS Generation")
# Step 2: Combine pre-processed files into Rds (BioConductor eset)
run_rds <- readline(prompt = "Are you ready to run rds generation file? [T / f] ")
rds_dir <- file.path(ImmSig_dir, "Rds_data")
dir.create(path = rds_dir)
if(run_rds %in% c(T, "T", "t", "")){
hipc_make_rds(studies, hai_dir, ge_dir, rds_dir)
}
cat("Moving on to Meta Analysis")
# Step 3: Run meta analysis script
run_meta <- readline(prompt = "Are you ready to run meta analysis? [T / f] ")
if(run_meta %in% c(T, "T", "t", "")){
output_dir <- file.path(ImmSig_dir, "Meta_analysis_output")
dir.create(path = output_dir)
hipc_meta_analysis(rds_dir, cohort = "young", orig_params = T, output_dir = output_dir)
hipc_meta_analysis(rds_dir, cohort = "old", orig_params = T, output_dir = output_dir)
}else{
stop("process stopped")
}
cat(paste0("Output saved to ", output_dir))
}
#-----------------------------------------------------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.