#main functions
clearAllSettings<-function(study){
study@studyMetaData@settings = list()
for(n in names(study@results)){
study@results[[n]]$settings = list()
}
return(study)
}
changeStudyNamePrompt<-function(study){
name = ""
while(T){
nname = readline("Please enter a new study name: ")
if(nname != "") break
}
study = changeStudyName(study=study, newName = nname)
return(study)
}
changeStudyName<-function(study, newName){
study@studyMetaData@studyName = newName
if(grepl(pattern="^test", x=newName)){
study@studyMetaData@RootFile = paste("./output/", newName, "/", sep="", collapse="")
}else{
study@studyMetaData@RootFile = paste("./output/study_", newName, "/", sep="", collapse="")
}
return(study)
}
#autoRunFromSettings
#runs all analyses described in settings found in meta data
#to make the data file, run the code below in examples,
#make sure the working directory is set to
#setwd("../packageDir/")
#then run this
# abacavirSettings = sres@studyMetaData@settings
# save(abacavirSettings, file="./inst/extdata/abacavirSettings.rda")
#'@title Automatically run set of analyses from a set of loaded settings.
#'@description Function searches for settings objects in the STUDY-studyMetaData-settings slot, then automatically executes the analysis for each arm described by the settings.
#'@param study A \code{Study} object with settings available.
#'@param verbose A flag indicating if analyses should be run in interactive/verbose mode, prompting user to check intermediate settings.
#'@return A \code{Study} object with analysis described by settings complete.
#'@export
#'@examples
#'#load a set of settings
#'abacavirSettings = getBasicSettings()
#'
#'#initialize the study
#'
#'s1 = getStudyObject(path_detail=getDefaultPaths(),
#' settings=abacavirSettings,
#' study.name="integrationTestAvacavirMetabolism")
#'s1 = loadBasicArms(STUDY=s1)
#'sres = autoRunFromSettings(study=s1)
autoRunFromSettings<-function(study, verbose=T){
#pull each analysis type out
allanalyses = names(study@studyMetaData@settings)
analyses = allanalyses[!grepl(pattern="^default|^overlap_analysis", x=allanalyses, ignore.case=T)]#remove any settings starting with the string "default"
if( length(analyses) ){
cat("Settings were found for these intput arms:\n")
cat(analyses, "\n")
for(a in analyses){
print(a)
study = runArm(armDescription=a, study=study,
fromDescription=F, interactive=F)
}
if("overlap_analysis"%in%allanalyses){
study = runArm(armDescription="overlap_analysis", study=study, fromDescription=F, interactive=F)
}
}else if(verbose){
tmp = readline("Sorry, no settings could be found.\nSelect 2 from main menu to load settings from another study.\nPress enter to continue. ")
}
return(study)
}#autoRunFromSettings
checkForceRowNames<-function(tab){
if(sum(rownames(tab) != as.character(1:nrow(tab)))&!is.null(rownames(tab))) return(tab)
rn = tab[,1]
tab = tab[,2:ncol(tab),drop=F]
rownames(tab)<-rn
return(tab)
}
#'@title Get settings object pre-loaded for example analysis
#'@description Returns set of setting for analysis of example somatic mutation data, example drug screen data, overlap analysis, and path analysis.
#'@return A \code{Settings}
#'@export
getBasicSettings<-function(){
#pull up the settings
varname = load(system.file("extdata/abacavirSettings.rda",package="packageDir"), verbose=TRUE)
abacavirSettings = get(varname[1])
if(is.data.frame(abacavirSettings)) abacavirSettings = dfToList(abacavirSettings)
#swap out locations of any files, getting
for( sl in names(abacavirSettings) ){
fileSlots = grep(pattern = "file", x = names( abacavirSettings[[sl]] ) )
for(si in fileSlots ){
fileTmp = basename( abacavirSettings[[sl]][[si]] )
if(fileTmp!="") abacavirSettings[[sl]][[si]] = system.file(paste0("extdata/", fileTmp), package="packageDir")
}
}
return(abacavirSettings)
}
#runArm
#function that executes the armsMain interface
#if NULL is returned by the arm, NULL will be assigned to the slot in the study and settings
#takes:
# study: the study object
# arm description: the arm description string or the arm name (if arm name, the fromDescription flag must be set to F)
# interactive: flag for interactive mode
#returns: study
#'@title runArm
#'@description Main function for taking and running a study arm. Provides needed settings and study object, implementing the study arm interface.
#'@param armDescription the desription slot from a DataArm object
#'@param study A Study object.
#'@param fromDescription Indicates if arm should be retrieved by its description or by its title.
#'@param interactive Indicates if arm should be run in interactive mode.
#'@return Returns a Study object with results from running the study arm.
#'@export
runArm<-function(armDescription,
study,
fromDescription=T,
interactive=NULL){
#pull out settings and arms
arms = study@arms
settings = study@studyMetaData@settings
#establish which arm's settings to extract
if(fromDescription){
armName = arms$dictionary[armDescription]
}else{
armName = armDescription
}
#load settings here incase generic arm is found in next section
curSettings = settings[[armName]]
if( grepl(pattern="_generic_", x=armName, ignore.case=T) ){
print("Setting up for generic/arbitrary gene data input..")
armName="arbitrary_gene_data_input"
}
if(is.data.frame(curSettings)) curSettings = dfToList(df=curSettings)
if( is.null(interactive)&!is.null(curSettings) ){
inter_option = readline("Would you like to process the data interactively so that\nnew settings or files can be selected? (enter y or n)\n")
interactive = inter_option=="y"
if(interactive){
changeFiles = readline("Would you like to only change the files and leave all the settings the same? (y/n)")
if(changeFiles=="y"){
interactive=F
curSettings$changeFiles = T
}
}
}
if(is.null(curSettings)){
curSettings = list()
curSettings$interactive=T
interactive = T
}else{
if(is.null(interactive)) interactive = T
curSettings$interactive = interactive
}
#pull out arm's main function
armMain = arms[[armName]]@mainFunction
#run the arm's main function
res = armMain(settings=curSettings, study=study)
if(!is.null(res)){
if(is.null(res$resTypeName)){
study@results[[armName]] = res
study@studyMetaData@settings[[armName]] = res$settings
}else{#if the arm is generic gene data input arm, there will be something in the resTypeName slot
study@results[[res$resTypeName]] = res$summary
study@studyMetaData@settings[[res$resTypeName]] = res$settings
}
}else{
cat("\nNothing returned from analysis. Returning to main menu.\n")
}
return(study)
}
#printProgramState
#displays a summary of loaded data
#takes: results list, study_name string and path_detail list object
#returns: nothing
#'@title printProgramState
#'@description Displays to standard out general information about the state of the program, including pathways, settings and data currently loaed.
#'@param stud A Study object from which the program state is to be determined.
#'@export
printProgramState<-function(stud){#results, study_name, path_detail){
results = stud@results
study_name = studyName(stud)
path_detail = getPaths(stud)
########################################################################
############## Current state of loaded data #################
########################################################################
cat("\n#########################################################")
cat("\nStudy name:",study_name,"\n")
cat("#########################################################")
if(is.null(path_detail)){
cat("\n***No paths loaded***\n")
}else{
cat("\nCellular pathway set from: ", path_detail$info, "containing ", as.integer(ncol(path_detail$paths)), "genes across", as.integer(nrow(path_detail$paths)), "pathways.\n" )
}
cat("######### Currently loaded data processing settings:")
cat("\n")
if(length(stud@studyMetaData@settings)){
cat(paste("-",names(stud@studyMetaData@settings), "\n", sep=""), sep="")
}else{
cat("\n***There are currently no data processing settings loaded***\n")
}
cat("######### Currently loaded data summaries:\n")
###show loaded data
if(length(results)){
# summaries:
# summis = grepl(pattern="summary", x=names(results))
# sumnames = names(results)[summis]
# # processed data:
# odnames = names(results)[!summis]
sumnames = names(results)
for(n in sumnames){
curnamev = strsplit(n,split="_")[[1]]
curname = paste(curnamev[1:(length(curnamev)-1)])
cursum = results[[n]]
if(length(grep(pattern="drug",x=curname,ignore.case=T))){
cat("-",curname,"data loaded.\t", as.character(nrow(cursum$coverage_summary$pathsummary)),"paths analyzed\n")
}else if(grepl(pattern="^overlap", x=n,ignore.case=T)){
cat("-",curname,"data loaded.\t", as.character(nrow(cursum$"Aberrationally enriched, containing drug targets")),"paths functionally targeted and significantly aberrational\n")
}else{
cat("-",curname,"data loaded.\t", as.character(nrow(cursum$pathsummary)),"paths affected\n")
}
}
# cat("######### Other loaded data:\n")
# for(n in odnames) cat("-",gsub(pattern="_",replacement=" ",x=n),"\n")
}else{
cat("\n***There is currently no aberration or drug screen data loaded***\n")
}
cat("\n#########################################################\n")
# cat("#########################################################")
# ###indicated if patients will be limited to some subset
# if(is.null(aberration_patient_subset)){
# cat("\nUsing full set of patients\n")
# }else if(length(grep(pattern="TCGA",x=patient_set_name,ignore.case=T))){
# cat("\nSingle patient loaded, pid:",patient_set_name,"\n")
# }else{
# cat("\nLoaded",patient_set_name,"cohort consisting of ",length(aberration_patient_subset)," patients out of ",length(all_patients)," patients.\n")
# }
# cat("#########################################################\n")
}#printProgramState
# darkPaths = results$overlap_analysis$'Aberration enriched, not drug targeted'$path_id
#determine list of genes found most often in pathways
#'@title Get pathway overlaps.
#'@description Determine the set of overlapping genes and number of pathway overlaps for a set of pathways.
#'@param darkPaths The paths to search for overlaps.
#'@param path_detail A Path_Detail object.
#'@return A data frame with columns "Number of paths" and "Path names", and a row for each gene in all the submitted pathways.
#'@export
BangForBuck<-function(darkPaths, path_detail){
cat("\nIn bang for buck analysis...")
if(!is.vector(darkPaths)){
darkPaths = darkPaths[,1]
}
pths = path_detail$paths
#1 extract the paths from path_detail
#check if there are paths that can't be found
notFound = setdiff(darkPaths, rownames(pths))
if(length(notFound)){
message("Warning, could not find all paths. Was the path repository changed?")
warning("Warning, could not find all paths. Was the path repository changed between data inputs?")
warning(paste("These paths were not found and could not be used in the BangForBuck analysis of path overlaps:\n",
paste(notFound,collapse="\n ")),
"\n(end list of paths not found)")
darkPaths = intersect(darkPaths, rownames(pths))
}
xpaths = pths[darkPaths,,drop=F]
genecounts = rep(1, nrow(xpaths))%*%xpaths
genecounts = genecounts[1,]
genecounts = genecounts[genecounts!=0] #after this length(genecounts)=440
genecounts = genecounts[order(genecounts, decreasing=T)]
pathcross = c()
for(gn in names(genecounts)){
# print(gn)
cur = paste(rownames(xpaths)[xpaths[,gn]], sep=" | ", collapse=" | ")
pathcross = c(pathcross, cur)
}
pathsPerGene = cbind.data.frame(genecounts, pathcross, stringsAsFactors=F)
colnames(pathsPerGene)<-c("Number of paths", "Path names")
print("..done\n")
return(pathsPerGene)
}
#next, report which of the pathways interact with
#'@title Display summaries of loaded data.
#'@description This function prints to the screen summary statistics for data sets from each data arm.
#'@param study A \code{Study} object.
#'@export
#'@examples
#'study=getTestStudyObject()
#'DataSummary(study=study)
DataSummary<-function(study){
results=study@results
cat("\nDisplaying summaries for all loaded data:\n")
sumin = grep("summary", x=names(results),ignore.case=T)
for(i in sumin){
titleTxt = gsub(pattern="summary", replacement="", x=names(results)[i])
titleTxt = gsub(pattern="_", replacement=" ", x=titleTxt)
if(length(grep(pattern="coverage", names(results[[i]]),ignore.case=T))){
cat("\nOverview of",titleTxt, "coverage:\n")
print(results[[i]]$coverage_summary$summarystats)
}
cat("\nOverview of",titleTxt,"data:\n")
print(results[[i]]$summarystats)
}
}
#options(warn=-1)
bmerge<-function(x,y){
if(is.null(x)){
return(y)
}
#merge by rowname
#make rownames rownames again
#
out=merge(x=x,y=y,by=0,all=T,incomparable=NaN)
rownames(out)<-out[,1]
out = out[,2:ncol(out)]
#fill in NAs with zeros:
return(out)
}
#'@title Allows comparrison of summary statistics for all loaded aberration data types.
#'@description The function examines and compares the pathway summaries for all loaded aberration data types
#'@param study A \code{Study} object. More than one aberration data type needs to have been loaded before this function is run.
#'@return A \code{Study} object with a slot named, \code{"Aberration data type comparrison"} in the results list filled with a table comparring impacts of each aberration data type for each pathway.
#'@export
#'@examples
#' stud = getTestStudyObject()
#' stud = compareSources(study=stud)
#' resslot = slot(object=stud, name="results")
#' \dontrun{
#' View(resslot$'Aberration data type comparrison')
#' }
compareSources<-function(study){
#takes: the results list
#returns: Two item list: list(outtable=outtable, results=results)
# outtable slot: table containing comparisons of aberration data
# results slot: the original results list object
if(is.null(study@results[["combined_aberrations_summary"]])){
study@results[["combined_aberrations_summary"]] = combineAberrationTypes(study=study)
}
results = study@results
cat("\nSelecting columns for output table..\n")
#columns in output: <the overall analysis> <number of nodes contributed from each> <pvalue from each
#put first 10 columns of the onto the outtable
outtable = results$combined_aberrations_summary$pathsummary[,c(1,2,3,4,12)]
colnames(outtable)[4:ncol(outtable)] = paste(colnames(outtable)[4:ncol(outtable)], "combined", sep="_")
#go through the aberration summaries and
#append on the number of aberrational genes and the hyperg p w/FDR
abi = grep(pattern="_aberration_",x=names(results))
for(i in abi){ # for each slot in results holding aberration data
cur = results[[i]]
extract = cur$pathsummary[,c(1,4,12)]
#rename the extract columns
cname = gsub(pattern="_aberration_summary", replacement="", x=names(results)[i])
colnames(extract)[2:ncol(extract)] = paste(colnames(extract)[2:ncol(extract)], cname, sep="_")
#now append
outtable = merge(outtable,
extract,
by="path_id",
all=F,
sort=F)
}
cat("\nReordering by p. aberrational...\n")
#reorder outtable by the proportion aberrational:
outtable = outtable[order(outtable[,5],decreasing=F),]
cat("\nReturning from compareSources()\n")
cat("\nComparrison of aberration sources complete. \n")
study@results = results
study@results[["Aberration data type comparrison"]] = outtable
return(study)
}#compareSources
combineAberrationTypes2<-function(study, s, results, overlapPatients=NULL){
results=study@results
if(VERBOSE) print("inside combineAberrationTypes2()....................................")
if(is.null(s)) s = pickSettings(study=study, settingType="aberration")
big_pgm = NULL
abtypenames = "Summary of"
if(is.null(s)) s=study@studyMetaData@settings$defaultSummaryTable
#get the indexes for the aberration data
ri = grep(pattern="aberration_",x=names(results))
if(!length(ri)&length(grep(pattern="aberrations_", x=names(results)))&(length(overlapPatients)==1)){
#if we're looking at a single patient and they already have a combined aberrations analysis done, return it
return(results$combined_aberrations_summary)
}
if(VERBOSE){
print(overlapPatients)
print(names(results))
}
#next: find the overlap of patients in each ab type
# #if overlap patients are not yet provided , check if there are overlap patients
# if(is.null(overlapPatients)){
# for(i in ri){
# curres = results[[i]]
# print("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
# if(VERBOSE) print(names(results)[i])
# if(VERBOSE) print(curres$patientsums)
# if(is.null(overlapPatients)){
# overlapPatients = rownames(curres$patientsums)
# if(VERBOSE) print("overlap patients rout 1:")
# if(VERBOSE) print(overlapPatients)
# }else{
# numover = length(overlapPatients)
# overlapPatients = intersect(overlapPatients, rownames(curres$patientsums))
# nlost = numover - length(overlapPatients)
# if(VERBOSE) print("overlap patients rout 2:")
# if(VERBOSE) print(overlapPatients)
# if(nlost > 0){
# cat("Note: ",nlost,"patients were found not to have been analyzed by all platforms.\n")
# }
#
# }
# }
# }#if(is.null(overlapPatients))
if(VERBOSE) cat("Total patients analyzed using all platforms:",length(overlapPatients),"\n")
if(!length(overlapPatients)){
cat("No patients were found to be analyzed on all platforms.\n",
"Re-run analysis using fewer aberration data types or data sets\n",
"with matching patient IDs to examine aberration overlap or \n",
"combination of aberration types. If an error does not occur,",
"any results that follow may be spurious.\n")
readline("Press any key to continue.\n")
}
coverage_summary=NULL
coverageSet = NULL
if(VERBOSE) print(ri)
#Establish joint coverage of all aberration types
#for each aberration type
cat("\nNote, limiting coverage to inlude only those genes\nanalyzed on all aberration analysis platforms\n")
for(i in ri){
curname = names(results)[i]
if(VERBOSE){
print("for(i in ri){ iteration:")
print(i)
print(curname)
}
curres = results[[i]]
if("coverage_summary"%in%names(curres)){
if(VERBOSE) cat(" ..coverage summary found.. ")
#there is a coverage summary, so extract the coverage set
covsum=curres$coverage_summary
if(is.null(coverageSet)){
if(VERBOSE) print("coverageSet is null")
coverageSet = rownames(covsum$genesummary)
}else{
if(VERBOSE) print("coverageSet is not null")
coverageSet = intersect(x=coverageSet, y=rownames(covsum$genesummary))
}
}
abtypenames = paste(abtypenames,curres$summarystats[1,2], sep=" ")
if(VERBOSE) print("abtypenames:")
if(VERBOSE) print(abtypenames)
trimGeneMatrix=curres$patientGeneMatrix[,overlapPatients,drop=F]#make sure only patients who have representation in each data type are being represented here
big_pgm = as.matrix(joinTables(t1=big_pgm, t2=trimGeneMatrix, name_prefix=curname, fill=0))
cat(".")
}#for
if(VERBOSE) print("infunction 2")
if(!is.null(coverageSet)){
#1: trim the big_pgm so there are no un-covered genes
goodPGMRows = rownames(big_pgm)%in%coverageSet
big_pgm = big_pgm[goodPGMRows,,drop=F]
}
if(VERBOSE){
print("summaryTable from combineAberrationTypes, regular")
cat("dim pgm going into summaryTable:", dim(big_pgm), "\n")
print(typeof(big_pgm))
print(ncol(big_pgm))
print(nrow(big_pgm))
print(length(big_pgm))
}
if( !is.null(big_pgm)&!is.null(coverageSet) ){
combined_aberrations_summary = summaryTable(study=study, individualEnrichment=TRUE,
activeGeneDescription="aberrational",
pgm=big_pgm,
coverage=coverageSet,
settings=s,
coverageDataSetDescription="analysis of coverage by poor-coverage genomic aberration analysis platform(s)",
coverageGeneDescription="aberration_analysis_covered",
dataSetDescription=abtypenames)
}else if(!is.null(big_pgm)){
combined_aberrations_summary = summaryTable(study=study,
individualEnrichment=TRUE,
activeGeneDescription="aberrational",
pgm=big_pgm,
settings=s,
dataSetDescription=abtypenames)
}else{
warning("Cannot find patient gene matrix combined for all patients...")
}
return(combined_aberrations_summary)
}#combineAberrationTypes2()
combineAberrationTypes<-function(study, s=NULL, results=NULL, runEachPatient=F, overlapPatients=NULL){
if(is.null(results)) results = study@results
if(is.null(s)){
s = pickSettings(study=study, settingType="aberration")
}
cat("\nCombining aberration types ....................................\n")
big_pgm = NULL
abtypenames = "Summary of"
if(is.null(s)) s=study@studyMetaData@settings$defaultSummaryTable
#get the indexes for the aberration data
ri = grep(pattern="aberration_",x=names(results))
if(!length(ri)&length(grep(pattern="aberrations_", x=names(results)))&(length(overlapPatients)==1)){
#if we're looking at a single patient and they already have a combined aberrations analysis done, return it
return(results$combined_aberrations_summary)
}
if(VERBOSE){
print(overlapPatients)
print(names(results))
}
#next: find the overlap of patients in each ab type
#if overlap patients are not provided , check if there are overlap patients
if(is.null(overlapPatients)){
for(i in ri){
curres = results[[i]]
cat("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n")
if(VERBOSE) print(names(results)[i])
if(VERBOSE) print(curres$patientsums)
if(is.null(overlapPatients)){
overlapPatients = rownames(curres$patientsums)
if(VERBOSE) print("overlap patients rout 1:")
if(VERBOSE) print(overlapPatients)
}else{
numover = length(overlapPatients)
overlapPatients = intersect(overlapPatients,rownames(curres$patientsums))
nlost = numover - length(overlapPatients)
if(VERBOSE) print("overlap patients rout 2:")
if(VERBOSE) print(overlapPatients)
if(nlost > 0){
cat("Note: ",nlost,"patients were found not to have been analyzed by all platforms.\n")
}
}
}
}#if(is.null(overlapPatients))
if(VERBOSE) cat("Total patients analyzed using all platforms:",length(overlapPatients),"\n")
if(!length(overlapPatients)){
cat("No patients were found to be analyzed on all platforms.\n",
"Re-run analysis using fewer aberration data types or data sets\n",
"with matching patient IDs to examine aberration overlap or \n",
"combination of aberration types. If an error does not occur,",
"any results that follow may be spurious.\n")
readline("Press any key to continue.\n")
}
coverage_summary=NULL
coverageSet = NULL
tm=NULL
print(ri)
#Establish joint coverage of all aberration types
#for each aberration type
cat("\nNote, limiting coverage to inlude only those genes\nanalyzed on all aberration analysis platforms\n")
for(i in ri){
curname = names(results)[i]
if(VERBOSE){
print("for(i in ri){ iteration:")
print(i)
print(curname)
print("point x")
}
curres = results[[i]]
if("coverage_summary"%in%names(curres)){
if(VERBOSE) print("coverage summary found")
#there is a coverage summary, so extract the coverage set
covsum=curres$coverage_summary
if(is.null(coverageSet)){
if(VERBOSE) print("coverageSet is null")
coverageSet = rownames(covsum$genesummary)
}else{
if(VERBOSE) print("coverageSet is not null")
coverageSet = intersect(x=coverageSet, y=rownames(covsum$genesummary))
}
}
abtypenames = paste(abtypenames,curres$summarystats[1,2], sep=" ")
if(VERBOSE) print("abtypenames:")
if(VERBOSE) print(abtypenames)
trimGeneMatrix=curres$patientGeneMatrix[,overlapPatients,drop=F]#make sure only patients who have representation in each data type are being represented here
big_pgm = as.matrix(joinTables(t1=big_pgm, t2=trimGeneMatrix, name_prefix=curname, fill=0))
if(VERBOSE) print("point z")
}#for
if(VERBOSE) print("infunction 2")
if(!is.null(coverageSet)&is.null(big_pgm)){
#1: trim the big_pgm so there are no un-covered genes
goodPGMRows = rownames(big_pgm)%in%coverageSet
big_pgm = big_pgm[goodPGMRows,,drop=F]
}
if(VERBOSE){
print("summaryTable from combineAberrationTypes, regular")
cat("dim pgm going into summaryTable:", dim(big_pgm), "\n")
print(typeof(big_pgm))
print(ncol(big_pgm))
print(nrow(big_pgm))
print(length(big_pgm))
}
if(!(is.null(big_pgm)&is.null(coverageSet))){
combined_aberrations_summary = summaryTable(study=study, individualEnrichment=runEachPatient,
activeGeneDescription="aberrational",
pgm=big_pgm,
coverage=coverageSet,
settings=s,
coverageDataSetDescription="analysis of coverage by poor-coverage genomic aberration analysis platform(s)",
coverageGeneDescription="analyzed",
dataSetDescription=abtypenames)
}else{
combined_aberrations_summary = list()
}
return(combined_aberrations_summary)
}#combineAberrationTypes()
#results$overlap_analysis$combined_aberrations_summary$path_summary_each_patient$X08.00173$
getPatientSubset<-function(){
############################################
############################################
############### load patient subset
prompt = paste("Enter f to load a subset of patients from a file\n",
"Enter s to load a subset of patients from a clinical data table\n",
"Enter r to reset and use all patients: ",sep="")
pidsource = readline(prompt)
if(pidsource=="r"){ #reset and use all patients
aberration_patient_subset = NULL
}
if(pidsource=="f"){#load a subset of patients from a file
cat("\nPlease select file of patient identifiers\n")
fname=file.choose()
pids = read.delim(file=fname,sep="\t", quote=F, stringsAsFactors=F)
pids = unlist(lapply(pids, extract_pid))
aberration_patient_subset=pids
}else if(pidsource=="s"){#load a subset of patients from a clinical data table
cat("\nPlease select the clinical data file. The first column must contain patient IDs.\n")
fname=file.choose()
clinical = read.delim(file=fname,sep="\t", stringsAsFactors=F)
pidcol = 1
all_patients = unlist(lapply(clinical[,pidcol], extract_pid))
extract_col = NULL
while(T){
extract_col = readline("Please enter the name of the column over which to divide the patient IDs: ")
if(extract_col%in%colnames(clinical)){
tmp_patient_set_name = extract_col
break
}
cat("Sorry, that column name did not match any of the columns")
}
summed_col = summarize_by(col=clinical[,extract_col],display=F)
cat("\nHere are the values found in that column:\n")
print(summed_col)
val = NULL
while(T){ #### Find the column to sort on.
val = readline("Please enter the value in that column you would like to extract patient IDs by: ")
if(val%in%summed_col[,"types"]){
tmp_patient_set_name = paste(tmp_patient_set_name, val, sep="_", collapse="_")
break
}#if
cat("Sorry, that value name did not match any of the values")
}##while
###############################################################################################
### with column in extract_col, and taking the pids with val in extract_col, pull the trigger,
### and put the set of pids into aberration_patient_subset
ecol = clinical[,extract_col]
ecol[is.na(ecol)] = paste(val, 0, sep="_", collapse="_")#this is a hack to deal with NAs
aberration_patient_subset = all_patients[ecol==val]
cat("The subset of patients selected is", tmp_patient_set_name,
". This subset contains", length(aberration_patient_subset), "patients.")
if("y"==readline("Is this the correct cohort? (please enter y or n) ")){
patient_set_name=tmp_patient_set_name
break
}##if
}
return(aberration_patient_subset)
}#getPatientSubset
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.