R/gen.config.R

Defines functions write.config gen.config

Documented in gen.config write.config

#' Write a configuration to file. Generally used by gen.config()
#' 
#' @param x Parameter sweep results generated by single.predictor()
#' @param v View data frame
#' @param task Task name
#' @param fn.config Filename of the config to be created
write.config <- function(x,v,task,fn.config='config_TEST.txt',view.name=NA) {
  ## Things in all view model types
  base::write(paste('data.fn',v,sep='\t'), file=fn.config, append=FALSE)
  base::write(paste('acc',x$Accuracy, sep='\t'), file=fn.config, append=TRUE)
  base::write(paste('taskname',task, sep='\t'), file=fn.config, append=TRUE)
  if(!is.na(view.name)) { base::write(paste('view.name',view.name,sep='\t'), file=fn.config, append=TRUE) }

  ## Things specific to this model type (e.g. ntrees)
## TODO: seems like this should be automated
  if(x$model=='glmnet') { 
    base::write(paste('type','en', sep='\t'), file=fn.config, append=TRUE)
    base::write(paste('alpha',x$alpha, sep='\t'), file=fn.config, append=TRUE)
    base::write(paste('measure','auc', sep='\t'), file=fn.config, append=TRUE)
  } else if(x$model=='rf') { 
    base::write(paste('type','rf', sep='\t'), file=fn.config, append=TRUE)
    base::write(paste('mtry',x$mtry, sep='\t'), file=fn.config, append=TRUE)
  } else if(x$model=='svmRadialCost') { 
    base::write(paste('type','svm', sep='\t'), file=fn.config, append=TRUE)
    base::write(paste('C',x$C, sep='\t'), file=fn.config, append=TRUE)
    base::write(paste('kernel','svmRadialCost', sep='\t'), file=fn.config, append=TRUE) # TODO: allow different kernel types
  } else { message('WARNING: Model type unknown to platypus.') }
#
}

# TODO: ignore.label currently isn't used in the fxn
#' Generate configuration files for platypus
#'
#' @param view.data List of view data matrices. Must be named.
#' @param tasks File containing all task labels, one column per task
#' @param config.loc Where the config files should be stored
#' @param model.type Type of classifier to use (select from en, rf, svm)
#' @param view.filenames List of files containing view feature data
#' @param view.names List of names for each view
#' @param ignore.label Label to ignore in the task file (default 'intermediate')
#' @param store Whether to store configs to file or not. Default FALSE.
#'
#' @examples
# Create several view/task pairs, including some missing labels
#' X1 <- matrix(rnorm(10000), nrow=100)
#' rownames(X1) <- paste0('Sample',seq(nrow(X1)))
#' colnames(X1) <- paste0('Feature',seq(ncol(X1)))
#' X2 <- matrix(rnorm(10000), nrow=100)
#' rownames(X2) <- paste0('Sample',seq(nrow(X2)))
#' colnames(X2) <- paste0('Feature',seq(ncol(X2)))
#' X3 <- matrix(rnorm(10000), nrow=100)
#' rownames(X3) <- paste0('Sample',seq(nrow(X3)))
#' colnames(X3) <- paste0('Feature',seq(ncol(X3)))
#' y <- sample(c('MOO','OINK',NA),100,replace=TRUE)
#' names(y) <- paste0('Sample',seq(max(nrow(X1), nrow(X2),nrow(X3))))
#' 
#' # Group together view and task data
#' view.data <- list(Farm=X1, Lion=X2, Cat=X3)
#' tasks <- as.data.frame(y)
#' 
#' # Pretend we loaded this data from some files
#' fn.view.names <- list(Farm='Farm.tsv', Lion='Lion.tsv', Cat='Cat.tsv')
#' 
#' # Generate config files
#' # If the data files don't already exist, use this
#' configs <- gen.config(view.data, tasks[,1,drop=FALSE], model.type='en', config.loc='config')
#' 
#' # Generate config files
#' gen.config(view.data[1], tasks[,1,drop=FALSE], model.type='en',config.loc='.')
#' gen.config(view.data, tasks, model.type='en', view.filenames=fn.view.names,config.loc='.')
#' gen.config(view.data, tasks, model.type='en',config.loc='.') 
#' gen.config(view.data, tasks, model.type='svm',config.loc='.')
#' gen.config(view.data, tasks, model.type='rf',config.loc='.')
#' 
#' @return List of config filenames, for use in platypus
#'
#' @export
gen.config <- function(view.data, tasks, config.loc='config', model.type=c('en','rf','svm'), view.filenames=NA, view.names=NA, ignore.label='intermediate', store=FALSE) {

## Goals:
## view.data is list of data frames (data)
## tasks is list of lists (labels)
## config.loc is path where config files should be stored

  ## For each task - load the task
  ##   For each view - load the view
  ##     Find optimal parameters for view/task pair
  ##     generate config file
  ##     add config filename to return list
  ## return list of config filenames

  ## Make sure model type is in our current list of options
  model.type=match.arg(model.type)  

  ## If view names not provided, make a named list of NA values for it. This is for convenience in code below.
  if(all(is.na(view.names))) {
    view.names <- paste0('View',1:length(view.data))
  }
  names(view.names) <- names(view.data)

  ## If view filenames not provided, make a named list of NA values for it. This is for convenience in code below.
  if(all(is.na(view.filenames))) {
    view.filenames <- rep(NA, length(view.data))
  }
  names(view.filenames) <- names(view.data)

#  ## Set up options
#  alpha.seq <- seq(0.1, 0.9, 0.1)

#  ## Load file with tasks (one task per column, NA/blank values for missing labels)
#  tasks <- utils::read.table(fn.tasks,sep=delim, header=TRUE, row.names=1,check.names=FALSE,stringsAsFactors=FALSE) 

  ## Store list of config filenames for returning
  fns.config <- list()

  ## Main loop
  for( v in names(view.data)) {
    print(paste('View',v))
    print(names(view.names))
    ## Load the view data
#    X <- utils::read.table(v,sep=delim.v, header=TRUE, row.names=1,check.names=FALSE,stringsAsFactors=FALSE)
    X <- view.data[[v]]

    ## If view data filenames provided, use those. Otherwise store data to file in same directory as config files, and use that location
    if(is.na(view.filenames[[v]])) {
      message('Data filepath not in config, writing data matrix to file in same location as configs.')
      v.fn <- paste0(v,'.tsv')
      utils::write.table(X, file=v , sep="\t",row.names=TRUE, col.names=TRUE, quote=FALSE)
    } else {
      v.fn <- view.filenames[[v]]
    }

    for( task in colnames(tasks) ) {
      print(paste('Task',task))

      ## Use the current task labels - for multiview learning there's just the 1 task
      y <- tasks[,colnames(tasks)==task]
      names(y) <- rownames(tasks)

      ## Set up filename for this config
      fn.config <- switch(model.type,
        en = file.path(config.loc, paste0('config_en_',task,'_',v,'.txt')),
        rf = file.path(config.loc, paste0('config_rf_',task,'_',v,'.txt')),
        svm= file.path(config.loc, paste0('config_svm_',task,'_',v,'.txt'))
      )
      print( paste('Generating config ',fn.config) )
      fns.config <- c(fns.config, fn.config)


      ## Parameter sweep based on task type
      res <- single.predictor(X[names(y),],y,model=model.type)
      if(store) {
         write.config(res, v.fn, task, fn.config=fn.config, view.name=view.names[[v]])
      }
      rm(res) # TODO: instead of deleting, add to list and return object
      
    } # end for tasks
  } # end for views

  print('Finished, success!')
  return(unlist(fns.config)) # TODO: Change this to return view objects, not filenames. Goal is to make filenames optional.
}
graim/PLATYPUS documentation built on Oct. 4, 2019, 2:05 p.m.