# pseudo-code:
# for (v in views)
# load v
# load labels
#
# while( not converged on labels OR no changes anymore )
# for( v in views )
# train v on labelled data
# predict unlabelled data
# check for instances where predictions agree -> new labels
# add newly found labels to labelled data
#OLD NOT USED @param w flag for weighting the predictions by accuracy, default=FALSE # TODO REMOVE
#' platypus multiview learning
#'
#' The standard platypus function for multiview learning
#' first pass the filepath for the labs file and the column name or number of the class (if not given, first column is default)
#' pass the filepath of a parameter-file for each view
#' @param view.list List of view objects
#' @param fn.labs File containing outcome labels
#' @param b Label class to ignore, if any. Defaults to 'intermediate'
#' @param i Maximal number of iterations for each platypus run, default=100
#' @param m Percent agreement required to learn a sample's class label, default=100
#' @param e Expanded output: returned result list contains a list of trained views after each iteration, default=FALSE
#' @param u Updating the accuracies of the single views in each iteration, default=FALSE
#' @param classcol.labs Optional argument. Which column from the labels file to use for learning
#' @param nfolds Number of cross-validation folds
#' @return final.result.list
#' @keywords platypus
#' @import glmnet
#' @import randomForest
#' @import methods
#' @export
platypus <- function(fn.labs, view.list, b='intermediate', i=100, m=100, e=FALSE,u=FALSE,classcol.labs=1, nfolds=10) {
## Debug flag can be manually activated, for testing purposes
flag.debug <- TRUE
#flag.debug <- FALSE
if(flag.debug) { print('Debug is on') }
## Set more readable names
majority.threshold.percent<-m
n.iters<-i
#classcol.labs <- 1 # The column containing the output labels. Often is the first column, not including rownames
updating <- u
expanded.output <- e
ignore.label <- b
## Load the data for labs and each view
# labs <- load.label.data(fn.labs,classcol.labs)
labs <- utils::read.table(fn.labs, sep='\t',header=TRUE, row.names=1, check.names=FALSE, stringsAsFactors = FALSE)
if(flag.debug) {print(table(labs[,classcol.labs]))}
# Get the two unique labels, ignoring the ignore.label TODO minor changes to remove fxn for now
unique.labels <- setdiff(unique(labs[,classcol.labs]),ignore.label)
#unique.labels <- get.unique.labels(labs[,classcol.labs],ignore.label)
#view.list <- lapply(view.list, load.data) # moved this outside platypus so it takes in already loaded view list
if(flag.debug) { print(lapply(view.list, function(x){length(intersect(rownames(labs),rownames(x$data.matrix)))} )) }
## Take all IDs for each data type
all.ids <- unique( unlist(lapply(view.list, function(x) {rownames(x$data.matrix)} )) )
labs <- labs[rownames(labs) %in% all.ids,,drop=FALSE] # labs without data in any view can not be used for training and/or prediction
## Sort out labelled and unlabelled IDs
# filter all ids marked with "ignore.label"
# if this method is called by cv.platypus, the hold-out fold is marked with 'testing' - has to be excluded from ids.labelled and ids.unlabelled!
ids.labelled <- rownames(labs[!(is.na(labs[,classcol.labs])) & labs[,classcol.labs] != 'testing' & labs[,classcol.labs] != ignore.label,,drop=FALSE])
labels <- labs[ids.labelled,classcol.labs,drop=F]
labels[,classcol.labs] <- factor(labels[,classcol.labs])
known.labels <- labels[,classcol.labs,drop=F]
# important to take all IDs, which are in the feature data, not just the ones which have 'NA' as class label in labs
ids.unlabelled <- all.ids[!(all.ids %in% ids.labelled)
& !(all.ids %in% rownames(labs[labs[,classcol.labs] == 'testing',,drop=F]))
& !(all.ids %in% rownames(labs[labs[,classcol.labs] == ignore.label,,drop=F]))]
# summary of newly found labels
new.labels <- c()
# number of views, which have to agree on a prediction, to take it into the training data
#view.list <- normalize.accuracies(view.list)
for(view.i in seq(length(view.list))){
view.list[[view.i]]$acc.norm <- normalize.accuracy.log(view.list[[view.i]]$acc)
}
sum.acc <- 0
for(view.i in seq(length(view.list))){
sum.acc = sum.acc + view.list[[view.i]]$acc.norm
}
weighting.threshold <- majority.threshold.percent*sum.acc/100
#starting values for all views agree
weighting.threshold.upper <- sum.acc
weighting.threshold.lower <- 0
# counter
iterations.woChange <- 0
# collect information about each iteration for expanded output
if(expanded.output){
collect.iteration.lists <- list()
unlabelled.matrix <- matrix(data=NA,nrow=length(ids.unlabelled),ncol=n.iters,dimnames=list(ids.unlabelled))
unlabelled.matrices.list <- list()
for (view.i in seq(length(view.list))) {
unlabelled.matrices.list[[view.i]] <- matrix(data=NA,nrow=length(ids.unlabelled),ncol=n.iters,dimnames=list(ids.unlabelled))
}
}
## Repeat until all labels are 'known' OR we hit the iterations limit OR no new labels are added anymore
for (i in seq(n.iters)) {
print(paste('Iteration',i))
## Quit if there aren't any unlabelled IDs left
if (length(ids.unlabelled) <= 0) {
print('No new labels to learn, stopping label learning.')
break
}
## Collect starting information for the iteration
if(expanded.output){
iteration.list <- list(iteration=i,weighting.threshold=weighting.threshold
,no.ids.labelled=dim(labels)[[1]],no.ids.unlabelled=length(ids.unlabelled))
}
## Train each view
view.list <- lapply(view.list, function(x) { view.train(labels,x,nfolds=nfolds) } )
## Test on the unknown labels
predictions <- matrix(data=NA, nrow=length(ids.unlabelled), ncol=length(view.list),dimnames=list(ids.unlabelled, seq(length(view.list))))
for(view.i in seq(length(view.list))){
ids <- intersect(ids.unlabelled,rownames(view.list[[view.i]]$data.matrix))
predictions[ids,view.i] <- view.predict(ids.unlabelled,view.list[[view.i]])
}
## Add unknown labels to known, where view agreement meets requirements
if(updating){
## update accuracies
view.list <- update.accuracies(view.list, known.labels)
## update weighting.threshold
sum.acc <- 0
for(view.i in seq(length(view.list))){
sum.acc = sum.acc + view.list[[view.i]]$acc.norm
}
weighting.threshold <- majority.threshold.percent*sum.acc/100
}
new.labelled.list <- get.new.labels.majorityWeighted(predictions,view.list,unique.labels)
# Quit, if the maximal prediction value reached is lower than the given threshold
if(new.labelled.list$weighting.threshold.upper < weighting.threshold){
print('No longer learning new labels, stopping label learning.')
break
} else{
new.labelled <- new.labelled.list$new.labelled
weighting.threshold.upper <- new.labelled.list$weighting.threshold.upper
weighting.threshold.lower <- new.labelled.list$weighting.threshold.lower
}
colnames(new.labelled) <- colnames(labels)
## Add new labels
labels <- rbind(labels,new.labelled)
new.labels <- rbind(new.labels, new.labelled)
if(flag.debug) {print( paste('Number labeled samples', length(ids.labelled)) ) }
## TODO: If only 1 class in labels list, quit with a warning UNTESTED
if(nrow(table(labels))!=2) {
print('No longer 2 classes, stopping iterations.')
break
}
## Remove newly found labels from unlabelled IDs
ids.unlabelled <- ids.unlabelled[!(ids.unlabelled %in% rownames(labels))]
print( paste('Remaining unlabeled IDs:', length(ids.unlabelled)) )
## Collect information about the iterations for expanded output
if(expanded.output){
iteration.list$weighting.threshold.upper <- weighting.threshold.upper
iteration.list$weighting.threshold.lower <- weighting.threshold.lower
iteration.list$no.new.labelled <- length(new.labelled)
iteration.list$no.ids.left.unlabelled <- length(ids.unlabelled)
iteration.list$view.list <- view.list
collect.iteration.lists[[i]] <- iteration.list
unlabelled.matrix[rownames(new.labels),i] <- new.labels
for (view.i in seq(length(view.list))) {
unlabelled.matrices.list[[view.i]][rownames(predictions),i] <- predictions[,view.i]
}
}
} # end for n.iters
## Collect information for the returned result
final.result.list <- list(final.views=view.list,labels.complete=labels,labels.new=new.labels,unlabelled.ids=ids.unlabelled
,weighting.threshold=weighting.threshold,n.iters=n.iters,expanded.output=expanded.output)
if(expanded.output){
final.result.list$iteration.information <- collect.iteration.lists
final.result.list$labelling.matrix <- unlabelled.matrix
final.result.list$labelling.matrices.views <- unlabelled.matrices.list
}
return(final.result.list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.