#' Write a configuration to file. Generally used by gen.config()
#'
#' @param x Parameter sweep results generated by single.predictor()
#' @param v View data frame
#' @param task Task name
#' @param fn.config Filename of the config to be created
write.config <- function(x,v,task,fn.config='config_TEST.txt',view.name=NA) {
## Things in all view model types
base::write(paste('data.fn',v,sep='\t'), file=fn.config, append=FALSE)
base::write(paste('acc',x$Accuracy, sep='\t'), file=fn.config, append=TRUE)
base::write(paste('taskname',task, sep='\t'), file=fn.config, append=TRUE)
if(!is.na(view.name)) { base::write(paste('view.name',view.name,sep='\t'), file=fn.config, append=TRUE) }
## Things specific to this model type (e.g. ntrees)
## TODO: seems like this should be automated
if(x$model=='glmnet') {
base::write(paste('type','en', sep='\t'), file=fn.config, append=TRUE)
base::write(paste('alpha',x$alpha, sep='\t'), file=fn.config, append=TRUE)
base::write(paste('measure','auc', sep='\t'), file=fn.config, append=TRUE)
} else if(x$model=='rf') {
base::write(paste('type','rf', sep='\t'), file=fn.config, append=TRUE)
base::write(paste('mtry',x$mtry, sep='\t'), file=fn.config, append=TRUE)
} else if(x$model=='svmRadialCost') {
base::write(paste('type','svm', sep='\t'), file=fn.config, append=TRUE)
base::write(paste('C',x$C, sep='\t'), file=fn.config, append=TRUE)
base::write(paste('kernel','svmRadialCost', sep='\t'), file=fn.config, append=TRUE) # TODO: allow different kernel types
} else { message('WARNING: Model type unknown to platypus.') }
#
}
# TODO: ignore.label currently isn't used in the fxn
#' Generate configuration files for platypus
#'
#' @param view.data List of view data matrices. Must be named.
#' @param tasks File containing all task labels, one column per task
#' @param config.loc Where the config files should be stored
#' @param model.type Type of classifier to use (select from en, rf, svm)
#' @param view.filenames List of files containing view feature data
#' @param view.names List of names for each view
#' @param ignore.label Label to ignore in the task file (default 'intermediate')
#' @param store Whether to store configs to file or not. Default FALSE.
#'
#' @examples
# Create several view/task pairs, including some missing labels
#' X1 <- matrix(rnorm(10000), nrow=100)
#' rownames(X1) <- paste0('Sample',seq(nrow(X1)))
#' colnames(X1) <- paste0('Feature',seq(ncol(X1)))
#' X2 <- matrix(rnorm(10000), nrow=100)
#' rownames(X2) <- paste0('Sample',seq(nrow(X2)))
#' colnames(X2) <- paste0('Feature',seq(ncol(X2)))
#' X3 <- matrix(rnorm(10000), nrow=100)
#' rownames(X3) <- paste0('Sample',seq(nrow(X3)))
#' colnames(X3) <- paste0('Feature',seq(ncol(X3)))
#' y <- sample(c('MOO','OINK',NA),100,replace=TRUE)
#' names(y) <- paste0('Sample',seq(max(nrow(X1), nrow(X2),nrow(X3))))
#'
#' # Group together view and task data
#' view.data <- list(Farm=X1, Lion=X2, Cat=X3)
#' tasks <- as.data.frame(y)
#'
#' # Pretend we loaded this data from some files
#' fn.view.names <- list(Farm='Farm.tsv', Lion='Lion.tsv', Cat='Cat.tsv')
#'
#' # Generate config files
#' # If the data files don't already exist, use this
#' configs <- gen.config(view.data, tasks[,1,drop=FALSE], model.type='en', config.loc='config')
#'
#' # Generate config files
#' gen.config(view.data[1], tasks[,1,drop=FALSE], model.type='en',config.loc='.')
#' gen.config(view.data, tasks, model.type='en', view.filenames=fn.view.names,config.loc='.')
#' gen.config(view.data, tasks, model.type='en',config.loc='.')
#' gen.config(view.data, tasks, model.type='svm',config.loc='.')
#' gen.config(view.data, tasks, model.type='rf',config.loc='.')
#'
#' @return List of config filenames, for use in platypus
#'
#' @export
gen.config <- function(view.data, tasks, config.loc='config', model.type=c('en','rf','svm'), view.filenames=NA, view.names=NA, ignore.label='intermediate', store=FALSE) {
## Goals:
## view.data is list of data frames (data)
## tasks is list of lists (labels)
## config.loc is path where config files should be stored
## For each task - load the task
## For each view - load the view
## Find optimal parameters for view/task pair
## generate config file
## add config filename to return list
## return list of config filenames
## Make sure model type is in our current list of options
model.type=match.arg(model.type)
## If view names not provided, make a named list of NA values for it. This is for convenience in code below.
if(all(is.na(view.names))) {
view.names <- paste0('View',1:length(view.data))
}
names(view.names) <- names(view.data)
## If view filenames not provided, make a named list of NA values for it. This is for convenience in code below.
if(all(is.na(view.filenames))) {
view.filenames <- rep(NA, length(view.data))
}
names(view.filenames) <- names(view.data)
# ## Set up options
# alpha.seq <- seq(0.1, 0.9, 0.1)
# ## Load file with tasks (one task per column, NA/blank values for missing labels)
# tasks <- utils::read.table(fn.tasks,sep=delim, header=TRUE, row.names=1,check.names=FALSE,stringsAsFactors=FALSE)
## Store list of config filenames for returning
fns.config <- list()
## Main loop
for( v in names(view.data)) {
print(paste('View',v))
print(names(view.names))
## Load the view data
# X <- utils::read.table(v,sep=delim.v, header=TRUE, row.names=1,check.names=FALSE,stringsAsFactors=FALSE)
X <- view.data[[v]]
## If view data filenames provided, use those. Otherwise store data to file in same directory as config files, and use that location
if(is.na(view.filenames[[v]])) {
message('Data filepath not in config, writing data matrix to file in same location as configs.')
v.fn <- paste0(v,'.tsv')
utils::write.table(X, file=v , sep="\t",row.names=TRUE, col.names=TRUE, quote=FALSE)
} else {
v.fn <- view.filenames[[v]]
}
for( task in colnames(tasks) ) {
print(paste('Task',task))
## Use the current task labels - for multiview learning there's just the 1 task
y <- tasks[,colnames(tasks)==task]
names(y) <- rownames(tasks)
## Set up filename for this config
fn.config <- switch(model.type,
en = file.path(config.loc, paste0('config_en_',task,'_',v,'.txt')),
rf = file.path(config.loc, paste0('config_rf_',task,'_',v,'.txt')),
svm= file.path(config.loc, paste0('config_svm_',task,'_',v,'.txt'))
)
print( paste('Generating config ',fn.config) )
fns.config <- c(fns.config, fn.config)
## Parameter sweep based on task type
res <- single.predictor(X[names(y),],y,model=model.type)
if(store) {
write.config(res, v.fn, task, fn.config=fn.config, view.name=view.names[[v]])
}
rm(res) # TODO: instead of deleting, add to list and return object
} # end for tasks
} # end for views
print('Finished, success!')
return(unlist(fns.config)) # TODO: Change this to return view objects, not filenames. Goal is to make filenames optional.
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.