R/obliqueRF.default.R
In obliqueRF: Oblique Random Forests from Recursive Linear Model Splits

##--------------------------------------
## oblique random forest 
##--------------------------------------

obliqueRF.default<- function(
                    x,
                    y,
                    x.test=NULL,
                    y.test=NULL,
                    mtry=NULL,
                    ntree=100,
                    training_method="ridge",                    
                    bImportance=F,
                    bProximity = F,
                    verbose=F,
                    ...
                    )
{

    num_split_vars <- mtry

    # always check out-of-bag error
    do_oob<-TRUE

    if(verbose)
      cat("obliqueRF: WARNING: status messages enabled; set \"verbose\" to false to disable\n");

	# --- importance ---

	impStruct<-list(bImportance=bImportance,importance_pvalue=0,importance_weight="");

    if(bImportance){
        impStruct$importance_pvalue <- 0.05 # p value
        impStruct$importance_weight <- "none"
        do_oob<-FALSE  
        
        if(bProximity){
            cat("obliqueRF: WARNING: importance and proximity cannot be calculated at the same time. Will set \'bProximity = FALSE\' and continue.\n")
            bProximity = FALSE
        }

        if(!(training_method=="log")){
            cat("obliqueRF: WARNING: importance requires training_method=\"log\". Adjusting training_method...\n");
            training_method="log"    
        }

    }
    
    # --- check training method selection ---

    node_train_method=NULL;    
    if(training_method=="ridge"){        
        node_train_method=ridge_svd_wrapper
        cat("obliqueRF: using fast ridge regression with SVD as node model,\n does not scale data at the node,\n tests regularization parameter \"lambda\" for 10^(c(-5:5)).\n In case of problems with the SVD variant try training_method=\"ridge_slow\" for separate ridge regressions (slower).\n")
    }
    if(training_method=="ridge_slow"){        
        node_train_method=ridge_slow_wrapper
        cat("obliqueRF: using slow ridge regression as node model,\n does not scale data at the node,\n tests regularization parameter \"lambda\" for 10^(c(-5:5)) with separate explicit ridge regressions.\n")
    }
    if(training_method=="log"){
        node_train_method=log_wrapper
        cat("obliqueRF: using logistic regression as node model.\n")        
    }
    if(training_method=="svm"){
        node_train_method=svm_wrapper        
        cat("obliqueRF: using linear SVM as node model,\n scales data at the node,\n tests regularization parameter \"cost\" for 10^(c(-3,-1,1)).\n")        
    }
    if(training_method=="pls"){
        node_train_method=pls_wrapper        
        cat("obliqueRF: using partial least squares regression as node model,\n does not scale data at the node,\n optimizes regularization parameter \"ncomp\" (number of components) for 1:min(20, mtry).\n")
    }        
    if(training_method=="rnd"){        
        node_train_method=rnd_wrapper
        cat("obliqueRF: using a random hyperplane as node model,\n scales data to unit variance at the node,\n coefficients of random subspace projection are chosen from a normal distribution with unit variance.\n")
    }
	if( is.null(node_train_method) )
	{
		stop("obliqueRF: the training method is not valid, choose one of the following:\n  ridge, ridge_slow, log, svm, pls or rnd.\n  See documentation for more details. Aborting");
	}

    node_pred_method=predict

    # generic impurity support
    node_impurity_method=gini_impurity_wrapper;


    # ------------- DATA -------------

    # make sure we have valid data
    if(
      any(is.nan(as.vector(x))) |
      any(is.infinite(as.vector(x))) |
      any(is.nan(as.vector(y))) |
      any(is.infinite(as.vector(y)))
      )
    stop("obliqueRF: Training data contains NaNs or Inf values. Please correct and try again. Aborting...");

    if(is.null(y.test)){
        cat("obliqueRF: no test set defined. Will use training data.\n")
        x.test<-x
        y.test<-y    
    }
    if(
      sum(is.nan(as.vector(x.test)))>0 |
      sum(is.infinite(as.vector(x.test)))>0 |
      sum(is.nan(as.vector(y.test)))>0 |
      sum(is.infinite(as.vector(y.test)))>0
      )
      stop("obliqueRF: Test data contains NaNs or Inf values. Please correct and try again. Aborting...");

    # labels: transform factors etc. to numerical values in [1,0]
    clnames <- sort(unique(paste(y)))
    if(!all(y==0|y==1)){
        
         yout<-rep(0,length(y))
         ytestout<-rep(0,length(y.test))
         
         yout[y==clnames[1]]<-0
         ytestout[y.test==clnames[1]]<-0     
         yout[y==clnames[2]]<-1
         ytestout[y.test==clnames[2]]<-1     
        
         y<-yout
         y.test<-ytestout
    }    

    # get a data.frame from x, y
    num_vars=ncol(x);
    num_samples=nrow(x);    

    if(length(y)!=num_samples)
        stop("obliqueRF: WARNING: number of samples and length of y does not match.")

    test_set<-cbind(as.matrix(x.test),y.test)
    
    d<-cbind(as.matrix(x),y)
    cl_id=num_vars+1;
    cl<- y

    # number of variables to split on at each node
    if(is.null(num_split_vars))
        num_split_vars=max(as.integer(sqrt(num_vars)),2); # default
    if(num_split_vars> num_vars){
        stop('obliqueRF: WARNING: mtry > number of variable.')
    }

    # check number of variables
    num_classes<-length(unique(cl));
    if((num_classes<2) || min(cl))
        stop("obliqueRF: WARNING: wrong number of classes. Aborting...")

    # ------------- VARIABLES -----------------

    # allocate variables
      nr_nodes=2*num_samples+1; #total number of possible nodes
      nd_min_size=1; #minimum size of a node
      oob_glob=rep(0, num_samples); #indicates how often a sample was oob

      oob_class_votes <- matrix(0, num_classes, nrow(d));
      test_set_class_votes<-matrix(0, num_classes, nrow(test_set));

      if(bImportance){
		  imp<-rep(0,ncol(x))
		  imp_class_votes<-matrix(0, num_classes, nrow(d));
      }

      oob_prox <- matrix(NA, nrow(d), ntree)

    # ------------- TRAIN -------------

     err<-c()
     err_0<-c()
     err_1<-c()    
    
    # main loop
    trees=vector("list",ntree);
	if(verbose)
        cat("obliqueRF: starting.",date(),"\n")
    
    for(curr_num_tree in 1:ntree)
        {   
			if(verbose)
			  if((curr_num_tree%%10)==1) cat("", curr_num_tree,"out of",ntree,"trees so far.",date(),"\n");

            class_n=rep(0, num_classes); #vector containing the class distribution in sample

            # sample with replacement
            k<-sample(1:num_samples, num_samples, replace=TRUE);

            # "bag" statistic, how often is a sample in the bag?
            sample_pop=as.numeric(table(factor(k,levels=1:num_samples)));
            # count the class distribution in sample (weights here?)
            class_n=as.numeric(table(cl[k]));

            dframe<-d
            b_sample=sample_pop; # sampling frequency and index vector for oob
            nr_diff_cases=length(sample_pop>0); #nr of different (unique) cases in the bootstrap sample
            n_case=1:num_samples;   #translation to original case numbers (needed for class population, etc.)

            # ---------------------- BUILDTREE ------------------------
            # build the tree, i.e. nr of columns in dframe (-1 for cl)

            tree<-buildtree(
                dframe,
                b_sample,
                class_n,
                n_case,
                nr_nodes,
                nd_min_size,
                nr_diff_cases,
                num_vars,
                num_split_vars,
                num_classes,
                node_train_method,
                node_pred_method,
				node_impurity_method,
                num_samples,
                impStruct
                );
            trees[[curr_num_tree]]=tree;

            # ---------------------- PREDICT ------------------------

			# convert the node_class from 0/1 to 1/2
			node_classIdx=tree$node_class+1;

            # -----------------
            # oob error
            if(do_oob){
				#global: count the oob's
				oob_glob=oob_glob+(sample_pop==0)*1;

				for(M in which(sample_pop==0)) # only oob samples
				{
					curr_node=1;
					for(i in 1:(tree$total_nr_nodes))
					{
						if(tree$node_status[curr_node]==-1) #==terminal node
						{
							# increment class count by 1
							oob_class_votes[node_classIdx[curr_node],M]=
							  oob_class_votes[node_classIdx[curr_node],M]+1;
							  
							oob_prox[M,curr_num_tree]<-curr_node
							break;
						}
						dset<-d[M, tree$best_vars[,curr_node]]; # *NOT* test_set
						score=sum(tree$training_stats[,curr_node]*dset);
						if(score<tree$best_split_value[curr_node])
							curr_node=tree$treemap[1, curr_node]
						else
							curr_node=tree$treemap[2, curr_node];
					}
				}
            }
            # -----------------
            # importance

            if(bImportance){
                for(pi in 1:ncol(tree$signif))
                 imp[tree$best_vars[,pi]]<-imp[tree$best_vars[,pi]] + (tree$significance[,pi]) 
                 #plot(imp,t="l", xlab="Features", ylab="Importance", main=curr_num_tree)
            }

            # -----------------
            # test set?
            
                for(M in 1:nrow(test_set))
                {
                    curr_node=1;
                    for(i in 1:(tree$total_nr_nodes))
                    {
                        if(tree$node_status[curr_node]==-1) #==terminal node
                        {
						    # increment class count by 1
                            test_set_class_votes[node_classIdx[curr_node],M]=
						      test_set_class_votes[node_classIdx[curr_node],M]+1;
                            break;
                        }
                        dset<-test_set[M, tree$best_vars[,curr_node]];
                        score=sum(tree$training_stats[,curr_node]*dset);
                        if(score<tree$best_split_value[curr_node])
                            curr_node=tree$treemap[1, curr_node]
                        else
                            curr_node=tree$treemap[2, curr_node];
                    }
                }

            # -------------------- EVAL --------------------

            v<- 1*(test_set_class_votes[2,]/sum(test_set_class_votes[,1])>.5)
            m<-table(factor(y.test,levels=c(0,1)),factor(v,levels=c(0,1)))
            err_0[curr_num_tree]<- m[1,2]/sum(m[1,])
            err_1[curr_num_tree]<- m[2,1]/sum(m[2,])
            err[curr_num_tree]<- (m[1,2]+m[2,1])/sum(m)


        
     } # loop over trees

    test.votes<-test_set_class_votes[2,]/sum(test_set_class_votes[,1])

    # calc proximity
    if(bProximity){
		oob_counts<-matrix(0,nrow(d),nrow(d)) # diag(oob_counts) is oob_glob
		#count how often each sample was oob
		for(op in 1:ncol(oob_prox)){                
			wh<-which(!is.na(oob_prox[,op]))
			oob_counts[wh, wh]<-oob_counts[wh, wh] +1
		}

		node_counts<-matrix(0,nrow(d),nrow(d))
		for(op in 1:ncol(oob_prox)){
			tabl<-table(oob_prox[,op])
			mults<-as.numeric(names(tabl)[tabl>1])
			for(mi in mults){
			wh<-which(oob_prox[,op]==mi)
			node_counts[wh, wh]<-node_counts[wh, wh] +1
			}
		} 
		dst<-1 - node_counts/oob_counts
		diag(dst)<-0
    }



    oob.votes<-NULL; thr<-NULL
    if(do_oob){
		oob.votes<-oob_class_votes[2,]/(oob_class_votes[1,]+oob_class_votes[2,])
		pred <- prediction(oob.votes, y)
		perf <- performance(pred,"acc")
		thr<-perf@"x.values"[[1]][which.max(perf@"y.values"[[1]])]
    }

    cl <- match.call()
    cl[[1]] <- as.name("obliqueRF")

    res<-list(
    	call=cl,
    	type = "classification",
        class_names = clnames,
    	errs=list(err_0 = err_0, err_1 = err_1, err = err),
    	pred=list(  pred.test = 1*(test.votes>.5),
    		    votes.test = test.votes,
    		    y.test = y.test,
    		    votes.oob = oob.votes,
    		    y = y),
    	lab=paste('oRF - node:', tree$lab),
    	ntree = ntree,
    	mtry = num_split_vars,
    	importance = if(bImportance) imp else NULL,
    	proximity = if(bProximity) dst else NULL,
    	num_classes = num_classes,
    	trees = trees
    )

    class(res) <- "obliqueRF"
    return(res);
}


##--------------------------------------------
## buildtree
##--------------------------------------------


buildtree <- function(
    dframe,
    b_sample,
    class_n,
    n_case,
    nr_nodes,
    nd_min_size,
    nr_diff_cases,
    num_vars,
    num_split_vars,
    num_classes,
    node_train_method,
    node_pred_method,
    node_impurity_method,
    num_samples,
    impStruct
    )
{
    class_distr_nodes=matrix(0, num_classes, nr_nodes); #class distribution per node
    node_status=rep(0, nr_nodes); #indicates what type a node is: 1: parent node, 2: has not been processed yet, -1: terminal node
    node_start=rep(0, nr_nodes);  #start index for a node in the data matrix
    node_length=rep(0, nr_nodes); #nr of cases belonging to that node, starting from node_start

    var_imp=rep(0, num_vars);

    best_vars=matrix(0, num_split_vars, nr_nodes); #holds the split variables
    treemap<-matrix(0,2,nr_nodes); #holds the indexes of the child nodes
    dgini=rep(0, nr_nodes);
    dgini_perc=rep(0, nr_nodes);
    significance=matrix(0, num_split_vars, nr_nodes);
    best_split_values=rep(0,nr_nodes);
    ridge_values=rep(NA,nr_nodes);
    training_stats=matrix(0, num_split_vars,nr_nodes);

    ncur=1; #index of the node we can write to
    node_status[1]=2; #first node has not been split yet...
    node_start[1]=1;
    node_length[1]=nr_diff_cases;

    #copy the class-numbers
    class_distr_nodes[,1]=class_n;

    n_temp_case=n_case;
    #iterate over the node_status array and check if there are unprocessed nodes;
    #split if necessary until terminal nodes are reached
    nr_parent_nds=0;
    curr_node=0;
    lab<-" "
    
    for(curr_node in 1:nr_nodes){

        if(node_status[curr_node]==0) break;
        if(node_status[curr_node]!=2) next; #the current node is either a parent or a terminal node
        
        #start and end index of the cases in the data matrix
        nd_start=node_start[curr_node];
        nd_length=node_length[curr_node];
        nd_end=nd_start+nd_length-1;

        #get the class distribution for the current node
        class_n=class_distr_nodes[,curr_node];

        res<-try(findbestsplit(
           dframe=dframe[n_case[nd_start:nd_end],],
            b_sample=b_sample[n_case[nd_start:nd_end]],
            class_n,
            nd_start,
            nd_length,
            num_vars,
            num_split_vars,
            num_classes,
            node_train_method,
            node_pred_method,
		    node_impurity_method,
            impStruct
            ),TRUE);

        if((res$jstat==1)|(class(res)=="try-error"))
        {
            node_status[curr_node]=-1;
            next;
        }

        lab<-res$lab

        best_vars[,curr_node]=res$variables;
        dgini[curr_node]=res$decsplit;
        dgini_perc[curr_node]=res$decsplit_perc;
        significance[,curr_node]=res$signif;
        best_split_values[curr_node]=res$best_split_value;
        ridge_values[curr_node]=res$ridge_value;
        training_stats[,curr_node]=res$training_stats;
        var_imp[res$variables]=var_imp[res$variables]+abs(res$training_stats)*((res$decsplit));#(nd_length/nr_diff_cases))

        #count the parent nodes
        nr_parent_nds=nr_parent_nds+1;

        #save size, etc. for the two child nodes
        node_length[ncur+1]=res$isplit;
        node_length[ncur+2]=node_length[curr_node]-res$isplit;
        node_start[ncur+1]=nd_start;
        node_start[ncur+2]=nd_start+res$isplit;

        #update n_case
        true_left_ind=(nd_start-1)+((1:node_length[curr_node])[res$split_vect==1]);
        true_right_ind=(nd_start-1)+((1:node_length[curr_node])[res$split_vect==0]);
        n_temp_case[nd_start:(nd_start+res$isplit-1)]=n_case[true_left_ind];
        n_temp_case[(nd_start+res$isplit):nd_end]=n_case[true_right_ind];
        n_case[nd_start:nd_end]=n_temp_case[nd_start:nd_end];

        #class population/distribution  # sum over b_sample
        for(n in nd_start:(nd_start+res$isplit-1))
        {
            nc=n_case[n];
            j=dframe[nc, num_vars+1]+1;
            class_distr_nodes[j,ncur+1]=class_distr_nodes[j,ncur+1]+b_sample[nc];
        }
        for(n in (nd_start+res$isplit):nd_end)
        {
            nc=n_case[n];
            j=dframe[nc, num_vars+1]+1;
            class_distr_nodes[j,ncur+2]=class_distr_nodes[j,ncur+2]+b_sample[nc];
        }

        #adjust the node status
        node_status[ncur+1]=2;
        node_status[ncur+2]=2;
        if (node_length[ncur+1]<=nd_min_size) node_status[ncur+1]=-1;
        if (node_length[ncur+2]<=nd_min_size) node_status[ncur+2]=-1;

        #if pure node then terminal node
        if(sum(class_distr_nodes[,ncur+1]>0)==1) node_status[ncur+1]=-1;
        if(sum(class_distr_nodes[,ncur+2]>0)==1) node_status[ncur+2]=-1;
        nr_samples=sum(class_distr_nodes[,ncur+1])+sum(class_distr_nodes[,ncur+2]);

        #remember the child nodes
        treemap[1,curr_node]=ncur+1;
        treemap[2,curr_node]=ncur+2;

        node_status[curr_node]=1; #current node has been processed
        ncur=ncur+2;
    } #for(curr_node in 1:nr_nodes)

    #normalise the variable importance
    var_imp=var_imp/nr_parent_nds;
    if(nr_parent_nds>0) var_imp=var_imp/nr_parent_nds;

    total_nr_nodes=curr_node;
    node_status[node_status==2]=-1;

    #get the class for each node
    node_class=apply(class_distr_nodes[,1:total_nr_nodes],2,which.max)-1;

    res2<-list(
        best_vars=best_vars[,1:total_nr_nodes], 
        best_split_values=best_split_values[1:total_nr_nodes], 
        ridge_values=ridge_values[1:total_nr_nodes], 
        training_stats=training_stats[,1:total_nr_nodes], 
        significance=significance[,1:total_nr_nodes],
        node_class=node_class, 
        dgini=dgini[1:total_nr_nodes], 
        dgini_perc=dgini_perc[1:total_nr_nodes], 
        total_nr_nodes=total_nr_nodes, 
        treemap=treemap[,1:total_nr_nodes], 
        node_status=node_status[1:total_nr_nodes], 
        var_imp=var_imp,
        lab=lab
        );

    return(res2);
}


##--------------------------------------------
## findbestsplit
##--------------------------------------------


findbestsplit <- function(
    dframe,
    b_sample,
    class_n,
    nd_start,
    nd_length,
    num_vars,
    num_split_vars,
    num_classes,
    node_train_method,
    node_pred_method,
    node_impurity_method,
    impStruct
    )
{
    #--------

    #draw some variable indices without replacement
    vars<- sample(1:num_vars, num_split_vars);

    #construct the subset (don't forget the labels!)
    temp_matrix<-dframe[,vars];
    cls<-dframe[,num_vars+1];



    #do the training 
    #we can get warnings from the training function - ignore for now
      ow<-options("warn");
	  options(warn=-1);
      z<- try(node_train_method(x=temp_matrix, y=cls, ind = b_sample, impStruct = impStruct),TRUE);
#      z<- try(node_train_method(x=temp_matrix, y=cls, ind = b_sample, impStruct = impStruct),F);

      options(ow);

      lab<-" "
      
      if((class(z)=="try-error") || (sum(is.na(z$coef))>0) || (table(cls)==1) || is.null(z$coef) )
      {
	  jstat=1;
	  res<-list(jstat=jstat);
	  return(res);
      }

    #call some impurity function
    impurity_meas<-node_impurity_method(temp_matrix, cls, b_sample, class_n, num_classes, nd_length, z);

    #if something went wrong we have no valid impurity data
    # the jstat will be tested by the calling function as well
    if( impurity_meas$jstat==1 )
    return(impurity_meas);


    #assign the results
    res<-list(
        jstat=impurity_meas$jstat,
        variables=vars,
        isplit=impurity_meas$isplit,
        split_vect=impurity_meas$split_vect,
        decsplit=impurity_meas$decsplit,
        decsplit_perc=impurity_meas$decsplit_perc,
        best_split_value=impurity_meas$best_split_value,
        ridge_value=z$ridge_value,
        training_stats=z$coef,
        signif=z$signif,
        lab=z$lab
        );
    return(res);
}



##--------------------------------------------
## Impurity wrapper
##--------------------------------------------
gini_impurity_wrapper<-function(dmatrix, cls, b_sample, class_n, num_classes, nd_length, z,...)
{
    #coefficients for gini index
    pno<-0;#numerator
    pdo<-0;#denominator

    pno<-sum(class_n^2);
    pdo<-sum(class_n);

    crit0<-pno/pdo;
    critmax<-(-1.0e20);
    jstat<-0;
    isplit<-0;


    #get the scores
#     pred<-as.matrix(dmatrix)%*%z$coef;
    pred<-(dmatrix)%*%z$coef;
    #sort them
    pred_sort_matching<-sort.list(pred);
    p<-pred[pred_sort_matching];

    uv<-b_sample[pred_sort_matching];
    clv<-cls[pred_sort_matching]+1;

    rldv<-cumsum(uv);
    rrdv<-pdo-rldv;

    cum_class<-array(0,c(num_classes,nd_length,2));
    temp_uv<-numeric(nd_length);
    for(i in 1:num_classes)
    {
        idv<-clv==i;
        temp_uv[idv]<-uv[idv];
        temp_cumsum<-cumsum(temp_uv);
        cum_class[i,,1]<-(temp_cumsum^2);
        cum_class[i,,2]<-((class_n[i]-temp_cumsum)^2);
        temp_uv<-numeric(nd_length);
    }
    rlnv<-apply(cum_class[,,1], 2, sum);
    rrnv<-apply(cum_class[,,2], 2, sum);

    crit<-(rlnv/rldv)+(rrnv/rrdv);
    isplit<-which.max(crit)
    critmax<-crit[isplit];

    if (critmax<(-1.0e10))
    {
        jstat<-1;
        res<-list(jstat=jstat);
        return(res);
    }

    split_vect<-numeric(nd_length);
    #assign 1 to the first group, the 2nd will stay 0
    split_vect[pred_sort_matching[1:isplit]]<-1;
    decsplit<-((critmax-crit0));

    #calculate the offset
    best_split_value<-(p[isplit]+p[isplit+1])/2;
    #assign the results
    res<-list(
	jstat=jstat,
        isplit=isplit,
        split_vect=split_vect,
        decsplit=decsplit,
        best_split_value=best_split_value,
        decsplit_perc=(decsplit/crit0)
        );
    return(res);
}





## -------------------------------------------
## NODE MODELS
## -------------------------------------------


##--------------------------------------------
## Logistic wrapper
## if "IMPORTANCE" is chosen ANOVA is calculated at every split
##--------------------------------------------

# require(stats)

log_wrapper<-function(x=NULL, y=NULL, ind, impStruct,...) {

   ## what samples: get bootstrapped data set 
   wh<-c();
   for(i in 1:max(ind)) wh<-c(wh,which(ind>=i))

   ## what features: remove constant variables
   wp<-which(unlist(lapply(1:ncol(x),function(A) length(unique(x[wh,A]))>1)))

    X<-scale(x[wh,wp])
    z<-try(glm(y[wh]~.,data=as.data.frame(X),family=binomial(link = "logit")),TRUE)

    if(class(z)[1]!="try-error")
    {
        COEF<-coef(z)[-1]/attr(X,"scaled:scale")  

        v_norm=sqrt(COEF%*%COEF);
        z$coef<-rep(0,ncol(x))
        z$coef[wp]<-COEF/v_norm;

	    z$signif<-rep(0,ncol(x))
		if(impStruct$bImportance)
		{
		    if(impStruct$importance_weight == "none")  
		     z$signif[wp]<- c(-1,1)[1+(anova(z, test="Chisq")[-1,5]<impStruct$importance_pvalue)*1]
		    if(impStruct$importance_weight == "length")  
		     z$signif[wp]<- c(-1,1)[1+(anova(z, test="Chisq")[-1,5]<impStruct$importance_pvalue)*1]*length(wh)
		}

        z$lab<-"logit"
        z$ridge_value = 0
    }
    return(z);
}



##--------------------------------------------
## PLS  wrapper 
##--------------------------------------------



pls_wrapper<-function(x=NULL, y=NULL, ind,...) {

       ## what samples: get bootstrapped data set 
       wh<-c();
       for(i in 1:max(ind)) wh<-c(wh,which(ind>=i))
    
       ## what features: remove constant variables
       wp<-which(unlist(lapply(1:ncol(x),function(A) length(unique(x[wh,A]))>1)))

    m_steps<-min(20,length(wp));

    lbd<-c()
    lab<-c(-1,1)[y+1]
    
    X<-scale(x[wh,wp], scale=F)

    obj<-kernelpls.fit(X=as.matrix(X), Y=lab[wh], ncomp =m_steps, stripped = T)

    # find optimal lambda from oob
    Xt<-scale(x[ind==0,wp], center=attr(X,"scaled:center"), scale=F)
    for(l in 1:m_steps){
      if(sum(ind==0)>1)
      lbd[l]<-sum((as.matrix(Xt)%*%coef(obj)[,,l]-as.numeric(obj$Xm%*%coef(obj)[,,l])>0)*1==y[ind==0])
    }
    
    wm<-max(which.max(as.numeric(lbd))[1],2)
    z<-obj

    COEF<-coef(z)[,,wm]

    v_norm=sqrt(COEF%*%COEF);
    z$coef<-rep(0,ncol(x))
    z$coef[wp]<-COEF/v_norm;

    z$signif<-rep(0,ncol(x))
    z$signif[wp]<-0;

    z$ridge_value = wm
    z$lab<-"PLS"
        
    return(z);
}


##--------------------------------------------
## ridge wrapper
##--------------------------------------------


ridge_slow_wrapper<-function(x=NULL, y=NULL, ind,...) {
   ## what samples: get bootstrapped data set 
    # ind contains the sample population;
    # build the bootstrap vector with repetitions
    wh<-c();
    for(i in 1:max(ind)) wh<-c(wh,which(ind>=i))

   ## what features: remove constant variables
    wp<-which(unlist(lapply(1:ncol(x),function(A) length(unique(x[wh,A]))>1)))

    numLambdas=11;
    ridgeValues=c(-5:5);
    lambdas=10^(ridgeValues); #vector of possible lambdas
    
    bestRidgeValue=ridgeValues[1];

    # the current sample and feature subset for the node
    X<-scale(x[wh,wp], scale=F)

    lbd<-rep(0, numLambdas); #vector for number of matches per lambda
    lab<-c(-1,1)[y+1] # convert two class case into fit to -1 and 1

   ## if there are no oobs left there is no need for testing different lambdas
    if(sum(ind==0)==0)
    {
      z<-gen.ridge(x=t(t(X)), y=lab[wh], lambda=lambdas[1]);
    }
    else #test different lambdas
    {
      #save objects
      objs<-vector("list",numLambdas);
      
      # the oobs with the current features
      Xt<-scale(x[ind==0,wp],  center=attr(X,"scaled:center"), scale=F)

      # loop over lambda
      for(l in 1:numLambdas){
	  
	  objs[[l]]<-gen.ridge(x=t(t(X)), y=lab[wh], lambda=lambdas[l])
	  p<-predict(objs[[l]], t(t(Xt)));

	  # find optimal lambda from (local) oob: check how many classes match
	  lbd[l]<-sum((p>0)*1==y[ind==0])
	  
      }
      # find (first) best lambda (with best matching)
      wm<-which.max(as.numeric(lbd))[1]
      # get again the stats for the best lambda
      #z<-gen.ridge(x=t(t(X)), y=lab[wh], lambda=lambdas[wm])
      z<-objs[[wm]];
      bestRidgeValue=ridgeValues[wm];
    }

    COEF<-z$coefficients[,1]

    v_norm=sqrt(COEF%*%COEF);
    z$coef<-rep(0,ncol(x))
    z$coef[wp]<-COEF/v_norm;

    z$signif<-rep(0,ncol(x))
    z$signif[wp]<-0;

    z$ridge_value = bestRidgeValue;
    z$lab<-"ridge"

    return(z);
}


##--------------------------------------------
## ridge wrapper using SVD
##--------------------------------------------


ridge_svd_wrapper<-function(x=NULL, y=NULL, ind,...) {
   ## what samples: get bootstrapped data set 
    # ind contains the sample population;
    # build the bootstrap vector with repetitions
    wh<-c();
    for(i in 1:max(ind)) wh<-c(wh,which(ind>=i))

   ## what features: remove constant variables
    wp<-which(unlist(lapply(1:ncol(x),function(A) length(unique(x[wh,A]))>1)))

    numLambdas=11;
    ridgeValues=c(-5:5);
    lambdas=10^(ridgeValues); #vector of possible lambdas
    
    bestRidgeValue=ridgeValues[1];

    # the current sample and feature subset for the node
    X<-scale(x[wh,wp], scale=F)

    lbd<-rep(0, numLambdas); #vector for number of matches per lambda
    lab<-c(-1,1)[y+1] # convert two class case into fit to -1 and 1

   ## if there are no oobs left there is no need for testing different lambdas
    if(sum(ind==0)==0)
    {
      z<-gen.ridge(x=t(t(X)), y=lab[wh], lambda=lambdas[1]);
    }
    else #test different lambdas
    {
      #save objects
      objs<-vector("list",numLambdas);
      
      # the oobs with the current features
      Xt<-scale(x[ind==0,wp],  center=attr(X,"scaled:center"), scale=F)

      #SVD
      A=t(t(X));
      B=t(t(Xt));
#      s<-svd(x=A);
      s<-svd(x=A, nu=0, nv=ncol(A));
      #precalculate everything we can
      vH_Ab=Conj(t(s$v)) %*% Conj(t(A)) %*% lab[wh];

      #make sure we don't run into trouble because of
      #low rank
      sigmas<-rep(0,ncol(A));
      sigmas[1:length(s$d)]<-s$d;
      sigmasSquared<-sigmas^2;
      # loop over lambda
      for(l in 1:numLambdas){
	  
	  #the regularised pseudo inverse
	  svdCoeffs<-s$v%*%diag(1/((sigmasSquared+lambdas[l]) ) ) %*% vH_Ab;
	  objs[[l]]$coefficients<-svdCoeffs;
	  
	  #calc distance to hyperplane
	  p<-B %*% svdCoeffs;

	  # find optimal lambda from (local) oob: check how many classes match
	  lbd[l]<-sum((p>0)*1==y[ind==0])

	  #for comparison
	  #tmp<-gen.ridge(x=t(t(X)), y=lab[wh], lambda=lambdas[l])
	  #p<-predict(tmp, t(t(Xt)));
      }
#browser()
      # find (first) best lambda (with best matching)
      wm<-which.max(as.numeric(lbd))[1]
      # get again the stats for the best lambda
      #z<-gen.ridge(x=t(t(X)), y=lab[wh], lambda=lambdas[wm])
      z<-objs[[wm]];
      bestRidgeValue=ridgeValues[wm];
    }

    COEF<-z$coefficients[,1]

    v_norm=sqrt(COEF%*%COEF);
    z$coef<-rep(0,ncol(x))
    z$coef[wp]<-COEF/v_norm;

    z$signif<-rep(0,ncol(x))
    z$signif[wp]<-0;

    z$ridge_value = bestRidgeValue;
    z$lab<-"ridge"

    return(z);
}



##--------------------------------------------
## svm wrapper
##--------------------------------------------

svm_wrapper<-function(x=NULL, y=NULL, ind,...) {

   ## what samples: get bootstrapped data set 
   wh<-c();
   for(i in 1:max(ind)) wh<-c(wh,which(ind>=i))

   ## what features: remove constant variables
   wp<-which(unlist(lapply(1:ncol(x),function(A) length(unique(x[wh,A]))>1)))


    # loop over lambda
    lbd<-c()
     
    X<-scale(x[wh,wp])
    Xt<-scale(x[ind==0,wp], scale=attr(X,"scaled:scale"), center=attr(X,"scaled:center"))

    for(l in 1:3){
        
        obj<-try(svm(y=as.factor(y[wh]),x=as.matrix(X),kernel='linear', scale=T, tolerance=1, cost=10^(c(-3,-1,1))[l]),T)
        
        # find optimal lambda from oob
        if(sum(ind==0)>1)
            lbd[l]<-sum(predict(obj, newdata=Xt)==y[ind==0])

    }
    wm<-which.max(as.numeric(lbd))[1]
    z<-try(svm(y=as.factor(y[wh]),x=as.matrix(X),kernel='linear', scale=T, cost=10^(c(-3,-1,1))[wm],tolerance=1),TRUE)

    if(class(z)!="try-error")
    {
        COEF<-as.numeric(t(z$coefs)%*%as.matrix((X)[z$index,]))/attr(X,"scaled:scale")  

        v_norm=sqrt(COEF%*%COEF);
        z$coef<-rep(0,ncol(x))
        z$coef[wp]<-COEF/v_norm;

        z$signif<-rep(0,ncol(x))
        z$signif[wp]<-0;

        z$lab<-"svm"
        z$ridge_value = lbd
    }
    return(z);
}


##--------------------------------------------
## random coefficients
##--------------------------------------------

rnd_wrapper<-function(x=NULL, y=NULL, ind,...) {

   ## what samples: get bootstrapped data set 
   wh<-c();
   for(i in 1:max(ind)) wh<-c(wh,which(ind>=i))

   ## what features: remove constant variables
   wp<-which(unlist(lapply(1:ncol(x),function(A) length(unique(x[wh,A]))>1)))

    z<-list()

    X<-scale(x[wh,wp])

    COEF<-rnorm(ncol(x))

    v_norm=sqrt(COEF%*%COEF)/attr(X,"scaled:scale");
    z$coef<-COEF/v_norm;

    z$signif<-rep(0,ncol(x))
    z$ridge_value = 0    
    z$lab<-"rnorm"
    
    return(z);
}
Any scripts or data that you put into this service are public.
obliqueRF documentation built on May 2, 2019, 4:19 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
obliqueRF
Oblique Random Forests from Recursive Linear Model Splits

R/obliqueRF.default.R
In obliqueRF: Oblique Random Forests from Recursive Linear Model Splits

Try the obliqueRF package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

obliqueRF Oblique Random Forests from Recursive Linear Model Splits

R/obliqueRF.default.R In obliqueRF: Oblique Random Forests from Recursive Linear Model Splits

Try the obliqueRF package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

obliqueRF
Oblique Random Forests from Recursive Linear Model Splits

R/obliqueRF.default.R
In obliqueRF: Oblique Random Forests from Recursive Linear Model Splits