R/rfVisualize.R
In CORElearn: Classification, Regression and Feature Evaluation

Documented in classPrototypes getRpartModel rfAttrEvalClustering rfClustering rfOutliers rfProximity

getRpartModel <- function(model, dataset) {
    m <- getRpart(model);
    ee <-list();
    class(ee)<-"rpart";
    if (model$model == "regTree") 
       ee$method <- "anova" 
    else ee$method<-"class";
    ff<-m[[1]];
    ff<-matrix(as.numeric(ff),m[[4]],m[[5]],TRUE);
    dim(m[[3]])<-c(m[[4]],3);
    attrDesc<-matrix(m[[12]],m[[8]],m[[13]], TRUE)
    var<-m[[3]][,2];
	suppressWarnings(idv<-as.integer(var))
    var[!is.na(idv)]<-attrDesc[,2];
    dim(var)<-c(m[[4]],1);
    dfr<-data.frame(var,ff);
    
    desc<-m[[3]][,3]!= ""
    yvalue<-dfr[,9];
    yvalue[desc]<-m[[3]][desc,3]
    classLev<-length(model$class.lev);
    #special case if class has levels - 
    #it is not a regression tree.
    if(classLev > 0){
        stat<-matrix(m[[14]], m[[4]],classLev,TRUE);
        stat<-unlist(stat);
        dim(stat)<-c(m[[4]],classLev);
        stat1<-stat/dfr[,2];
        dim(stat)<-c(1,m[[4]]*classLev);
        dim(stat1)<-c(1,m[[4]]*classLev);
		y2 <- c()
		for (i in 1:length(yvalue)) {
			if (yvalue[i] %in% model$class.lev)
				y2[i] <- as.integer(factor(yvalue[i],levels=model$class.lev))
		    else y2[i] <- as.integer(yvalue[i])
		}
		dfryval2<-c(y2,stat,stat1);
		# dfryval2<-c(yvalue,stat,stat1);
        dim(dfryval2)<-c(m[[4]],2*classLev+1);
		method <- "class"
    }
    else{
        av<-mat.or.vec(m[[4]],1);
        ps<-strsplit(yvalue, " ", fixed=TRUE);
        isOldType<-length(unlist(ps)) == length(yvalue);
        if(isOldType){
            yvalue<-as.numeric(yvalue);
        }
        dfryval2<-c(yvalue);
        dim(dfryval2)<-c(m[[4]],1);
    }
    dfr[,9]<-dfryval2;
    ee$frame<-dfr;
    attr(ee$frame, "row.names")<-as.integer(c(m[[3]][,1]));
    attr(ee$frame, "names")<-m[[2]];
    splits <-matrix(as.numeric(m[[6]]),m[[8]],m[[9]],TRUE);
    splits<-splits[,2:dim(splits)[2]];
    dim(splits)<-c(m[[8]],as.integer(m[[9]])-1);
    splCol<-attrDesc[,2]
    attr(splits, "dimnames")<-list(splCol, m[[7]]);
    #replace count column with values from frame$n
    attrIndex<-ee$frame$var!="<leaf>"
    leftchild<-attr(ee$frame[attrIndex,],"row.names")*2
    i<-1
    while(i<=length(leftchild)){
        selected<-attr(ee$frame,"row.names")==leftchild[i]
        splits[i,1]<-ee$frame$n[selected]
        i<-i+1;
    }
    ee$splits<-splits;
    search <- all.vars(model$formula)[1]
    b<-lapply(attr(dataset, 'names'),function(x){x!=search});
    #we can use levels onto dataset, 
    #because rpart.labels matches attributes by name.
    cs<-lapply(dataset,levels);
    #cs is a list of attributes each of them has a list of values.
    cs<-cs[unlist(b)]
    d<-max(unlist(lapply(cs, length)));
    d<-min(d, as.numeric(m[[11]]));
    if(d > 0){
        csplit<-matrix(as.numeric(m[[10]]),m[[8]],m[[11]],TRUE);
        #csplit==3 is the default, 
        #if min is 1 there is at least one discrete attribute
        if(min(csplit) == 1){
            csplit<-csplit[,1:d];
            dim(csplit)<-c(m[[8]],d);
            ee$csplit<-csplit;
        }
    }
    attr(ee, "xlevels")<-cs;
    attr(ee, "ylevels")<-model$class.lev;
    if(classLev > 0){
        #function used for formatting discrete class
        textFunction <- function (yval, dev, wt, ylevel, 
                digits, n, use.n){
            nclass <- (ncol(yval) - 1L)/2
            group <- yval[, 1L]
            counts <- yval[, 1L + (1L:nclass)]
            dimCounts<-dim(counts);
            counts<-matrix(as.numeric(counts),dimCounts[1],dimCounts[2])
            temp1 <- rpart.formatg(counts, digits)
            if (nclass > 1) {
                temp1 <- apply(matrix(temp1, ncol = nclass), 
                        1, paste, collapse = "/")
            }
            if (use.n) {
                out <- paste(format(group, justify = "left"), 
                        "\n", temp1, sep = "")
            }
            else {
                out <- format(group, justify = "left")
            }
            return(out);
        }
    }
    else{
        #function used for formatting continuous class
        textFunction <- function (yval, dev, wt, ylevel, 
                digits, n, use.n){
            if(!is.numeric(yval)){
                ps<-strsplit(yval, "+", fixed=TRUE);
                for(ij in 1:length(ps)){
                    yval[ij]<-paste(ps[[ij]], collapse = "+\n");
                }
                if (use.n) {
                    yval<-paste(yval, "\nn=", n, sep = "")
                }
                return(yval);
            }
            else{
                if (use.n) {
                    paste(rpart.formatg(yval, digits), 
                            "\nn=", n, sep = "")
                }
                else {
                    paste(rpart.formatg(yval, digits))
                }
            }
        }
    }
    #rpartNamespace <- asNamespace("rpart");
    #environment(textFunction) <- rpartNamespace;
    ee$functions$text <- textFunction;
    ee;
}
getRpart <- function(model) {
    #regression tree
    if(model$model == "regTree"){
        .Call(C_exportModelRT, as.integer(model$modelID))
    }
    #classification tree
    else if(model$model == "tree"){
        .Call(C_exportModelT, as.integer(model$modelID))
    }
    else{
        stop("The model must be a regresion or a decision tree.");
    }
}
rfProximity <- function(model, outProximity=TRUE){
    if (model$model == "rf"){
        .Call(C_exportProximity, as.integer(model$modelID),as.integer(outProximity==FALSE))
    }
    else{
        stop("The model must be a random forest.");
    }
}

getVarImportanceCluster <-function(model, cluster){
    modelID <- model$modelID;
    tmp <- .C(C_exportVarImportanceCluster,
        	   	 as.integer(modelID),
           		 clusterData = as.integer(cluster),
     		     var = double(model$noNumeric + model$noDiscrete-1),
                 NAOK=TRUE)
}
spaceScale <- function(pr, component){
    space<-cmdscale(pr, component, add=TRUE);
}
rfClustering <- function(model, noClusters=4){
    covMatrix<-rfProximity(model, outProximity=F);
    cluster<-cluster::pam(covMatrix, noClusters, diss=TRUE)
}

rfOutliers <- function(model, dataset){
    pr <- rfProximity(model, outProximity=TRUE);
    search <- all.vars(model$formula)[1];
    b<-lapply(attr(dataset, 'names'),function(x){x==search});
    attrClass<-c(1:length(b))[b==TRUE]
    caseIndex<-matrix(c(1:dim(pr)[2]), dim(pr)[1], dim(pr)[2], TRUE)
    el<-caseIndex;
    i<-1;
    prSum<-matrix(0);
    while(i<=dim(pr)[2]){
        rclass<-dataset[el[i,], attrClass]==dataset[i,attrClass]
        prSumMedian<-median(pr[i,rclass])
        prSumSd<-sd(pr[i,rclass]);
        prSum[i]<-sum(pr[i,rclass]^2)^(-1)-prSumMedian
        if(is.na(prSumSd)){
            prSum[i] = 10;
            warning(paste("Element ",i," is the only one in his class. Setting output to 10."))
        }
        else if(is.numeric(prSumSd) && prSumSd>0){
            prSum[i]<-prSum[i]/prSumSd
        }
        i<-i+1
    }
    outliers<-prSum
}
classPrototypes<-function(model, dataset, noPrototypes=10){
    search <- all.vars(model$formula)[1];
    b<-unlist(lapply(attr(dataset, 'names'),function(x){x==search}));
    lev<-levels(dataset[, b]);
    nclass<-length(lev);
    n<-length(dataset[, 1]);
    pre<-predict(model, dataset);
    cluster<-NULL;
    index<-matrix(1:n, n, nclass);
    out<-matrix(0, nclass, noPrototypes);
    for(j in 1:nclass){
        p<-pre$probabilities;
        maxelem<-NULL;
        i<-1;
        while(i<=n && length(p[p!=0])>0 && length(maxelem) < noPrototypes){
            tmp<-index[p==max(p)];
            ntmp<-length(tmp);
            outPos<-list();
            for(k in 1:ntmp){
                outPos[length(outPos)+1]<-c(1:nclass)[lev==dataset[tmp[k],b]]
            }
            outPos<-unlist(outPos);
            #       if(length(outPos==j) > 0)
            #       {
            maxelem<-c(maxelem, tmp[outPos==j]);
            #       }
            p[tmp,]<-0;
            i<-i+1;
        }
        nmaxelem<-length(maxelem);
        if(nmaxelem>=noPrototypes) {
            out[j, ]<-maxelem[1:noPrototypes];
        }
        else{
            out[j, c(1:nmaxelem)]<-maxelem[1:nmaxelem];
        }
        cluster<-c(cluster, (c(1:noPrototypes)*0+j)[out[j,]>0]);
    }
    out<-t(out);
    dim(out)<-c(1, noPrototypes*nclass);
    o<-list();
    o$prototypes<-as.numeric(out[out>0]);
    o$clustering<-as.numeric(cluster);
    o$levels<-as.character(lev);
    impPredictedExample<-o;
}
# the data.frame set is converted to a form such that all the attributes have values between 0 and 1
# this is useful in visualization
varNormalization<-function(md, set){
    #d-discrete, a-attribure, n-names
    column<-length(set[1,]);
    n<-length(set[,1]);
    colPos<-matrix(FALSE, column, column);
    dan<-md$discAttrNames;
    nd<-length(dan);
    ian<-0;
    if(nd>0){
        int<-vector("numeric",nd);
        for(ian in 1:nd)
        {
            search<-dan[ian];
            colPos[ian,]<-unlist(lapply(attr(set, 'names'),function(x){x==search}));
            int[ian]<-1/(length(levels(set[, colPos[ian, ]]))-1);
        }
    }
    nan<-md$numAttrNames;
    nn<-length(nan);
    if(nn > 0){
        offset<-ian;
        mi<-vector("numeric", nn);
        sigma<-vector("numeric", nn);
        maxnorm<-vector("numeric", nn);
        moveup<-vector("numeric", nn);
        for(ian in 1:nn){
            search<-nan[ian];
            tmp<-unlist(lapply(attr(set, 'names'),function(x){x==search}));
            colPos[ian+offset,]<-tmp
            mi[ian]<-as.numeric(mean(set[,tmp]));
            sigma[ian]<-as.numeric(sd(set[,tmp]));
            allcurval<-(set[, tmp]-mi[ian])/sigma[ian];
            moveup[ian]<-min(c(0,allcurval));
            maxnorm[ian]<-max(allcurval-moveup[ian])
        }
    }
    out<-NULL;
    classV<- all.vars(md$formula)[1];
    classV<-unlist(lapply(attr(set, 'names'),function(x){x!=classV}));
    for(ex in 1:n){
        pos<-vector("numeric", column);
        if(nd>0){
            for(da in 1:nd) {
                lev<-levels(set[, colPos[da, ]]);
                val<-set[ex, colPos[da, ]];
                if(is.na(val)){
                    index<-1;
                }
                else{
                    index<-c(1:length(lev))[lev==val];
                }
                pos[colPos[da, ]]<-(index-1)*int[da];
            }
        }
        if(nn>0){
            for(na in 1:nn){
                normal<-((set[ex, colPos[na+offset, ]]-mi[na])/sigma[na]);
                val<-(normal-moveup[na])/maxnorm[na];
                pos[colPos[na+offset, ]]<-val;
            }
        }
        out<-c(out,pos[classV]);
    }
    out<-matrix(out, n, column-1, TRUE);
}

getQuartils<-function(examples){
    int<-0.25;
    i<-1
    minOld<-FALSE
    while(i<5){
        minIndex<-examples<i*int
        aMedian<-median(examples[!minOld & minIndex]);
        examples[!minOld & minIndex]<-aMedian;
        minOld<- minOld | minIndex
        i<-i+1
    }
    getQuartils<-examples;
}

rfAttrEvalClustering<-function(model, dataset, clustering=NULL){
    search <- all.vars(model$formula)[1];
    b<-lapply(attr(dataset, 'names'),function(x){x==search})
    b<-unlist(b);
    i<-1;
    imp<-NULL;
    lev<-NULL;
    if(is.null(clustering)){
        cl2<-as.numeric(dataset[,b]);
        levSet<-dataset[,b];
    }
    else{
        cl2<-clustering;
        levSet<-cl2;
    }
    cl<-cl2;
    stopme<-length(levels(levSet));
    while(i<=stopme){
        cl[cl!=i]<-0;
        cl[cl==i]<-1;
        lev<-c(lev, as.character((levSet[cl==1])[1]));
        d<-getVarImportanceCluster(model, cl);
        imp<-c(imp,d$var);
        cl<-cl2;
        i<-i+1;
    }
    temp<-list();
    ncolumn<-length(b[b==FALSE]);
    imptemp<-matrix(imp, stopme, ncolumn, TRUE);
  	colnames(imptemp) <- names(dataset)[b==FALSE]
	  rownames(imptemp) <- levels(levSet)
    temp$imp<-imptemp
    temp$levels<-lev;
    rfAttrEvalClustering<-temp;
	
}

plotRFStats <- function(point, cluster=FALSE, plotLine=FALSE, 
        lOffset=0, myCount=7, myAxes=FALSE)
{
    pointLen <- length(point);
    if(is.null(dim(point)) || length(dim(point)) == 1)
    {
        tmpPoint <- point;
        point<-matrix(0, pointLen, 2);
        point[,1] <- c(1:pointLen);
        point[,2] <- tmpPoint;
    }
    noVar <- pointLen;
    ylim<-c(min(point[,2]), max(point[,2])+lOffset)
    xlim<-c(min(point[,1]), max(point[,1]))
    if(is.logical(myAxes)){
        axesShow<-TRUE;
    }
    else{
        axesShow<-FALSE;
    }
    plot(1, 1, xlim=xlim, ylim=ylim, type="n", ann=FALSE, frame=TRUE, axes=axesShow);
    if(!is.logical(myAxes) && length(myAxes) > 0){
        axis(2);
        axis(1, at=1:noVar, labels=myAxes);
    }
    if(length(cluster) > 1 )
    {
        tmpCluster <- cluster;
        clusterLevelNames <- list();
        clusterLevels <- 0;
        i <- 1;
        while(length(tmpCluster) > 0 && i < 13)
        {
            clusterLevels[i] <- i;
            clusterLevelNames[i]<-tmpCluster[1]
            cluster[cluster==tmpCluster[1]]<-i;
            tmpCluster <- tmpCluster[tmpCluster!=tmpCluster[1]];
            i <- i+1;
        }
        clusterLevels <- clusterLevels[clusterLevels>0];
        myPch <-0;
        myColor <-0;
        for(value in clusterLevels)
        {
            #mod
            myPch[value]<-floor(value/myCount);
            myColor[value]<- 1+value - myCount*floor(value/myCount);
            
            points(point[cluster==value,], col=myColor[value], pch=myPch[value]);
            if(plotLine == TRUE)
            {
                lines(point[cluster==value,], col=myColor[value], pch=myPch[value]);
            }
        }
        prefix<-""
        if(is.integer(clusterLevelNames[1]))
        {
            prefix<-"skupina "
        }
        clusterNames <- paste(prefix, clusterLevelNames, sep = "");
        legend(xlim[1], ylim[2], clusterNames, cex=0.8, col=myColor, pch=myPch);
    }
    else
    {
        points(point);
        if(plotLine == TRUE)
        {
            lines(point);
        }
    }
}

plotRFMulti<-function(point, legendNames=FALSE, lOffset=0, 
        myCount=7, myHoriz=FALSE, myAxes=FALSE)
{
    noVar<-dim(point)[2]
    noCluster<-dim(point)[1]
    ylim<-c(min(point), max(point)+lOffset)
    xlim<-c(1, noVar)
    if(is.logical(myAxes)){
        axesShow<-TRUE;
    }
    else{
        axesShow<-FALSE;
    }
    plot(1, 1, xlim=xlim, ylim=ylim, type="n", ann=FALSE, frame=TRUE, axes=axesShow);
    if(!is.logical(myAxes) && length(myAxes) > 0){
        axis(2);
        axis(1, at=1:noVar, labels=myAxes);
    }
    myPch <-0;
    myColor <-0;
    pPoint<- matrix(0.0, noVar, 2)
    pPoint[,1]<-c(1:noVar)
    for(i in c(1:noCluster))
    {
        myPch[i]<-floor(i/myCount);
        myColor[i]<- 1+i - myCount*floor(i/myCount);
        
        pPoint[,2]<-point[i,]
        points(pPoint, col=myColor[i], pch=myPch[i]);
        lines(pPoint, col=myColor[i], pch=myPch[i]);
    }
    prefix<-""
    if(is.logical(legendNames) && legendNames==FALSE)
    {
        legendNames<-c(1:noCluster)
        prefix <-"cluster"
    }
    clusterNames <- paste(prefix, legendNames, sep = "");
    color<-c(1:noCluster);
    legend(xlim[1], ylim[2], clusterNames, cex=0.8, col=myColor, pch=myPch, horiz=myHoriz);
}

plotRFNorm<-function(point, cluster, somnames, lOffset, 
        myHoriz=FALSE, myAxes=FALSE)
{
    noVar<-dim(point)[2];
    ylim<-c(min(point), max(point)+lOffset)
    xlim<-c(1, noVar)
    if(is.logical(myAxes)){
        axesShow<-TRUE;
    }
    else{
        axesShow<-FALSE;
    }
    plot(1, 1, xlim=xlim, ylim=ylim, type="n", ann=FALSE, frame=TRUE, axes=axesShow);
    if(!is.logical(myAxes) && length(myAxes) > 0){
        axis(2);
        axis(1, at=1:noVar, labels=myAxes);
    }
    tmpCluster <- cluster;
    clusterLevelNames <- list();
    clusterLevels <- 0;
    i <- 1;
    while(length(tmpCluster) > 0 && i < 13){
        clusterLevels[i] <- i;
        clusterLevelNames[i]<-tmpCluster[1]
        cluster[cluster==tmpCluster[1]]<-i;
        tmpCluster <- tmpCluster[tmpCluster!=tmpCluster[1]];
        i <- i+1;
    }
    clusterLevels <- clusterLevels[clusterLevels>0];
    myPch <-0;
    myColor <-0;
    myCount<-7
    nexamples<-length(point[,1]);
    for(value in 1:nexamples){
        #mod
        myPch[cluster[value]]<-floor(cluster[value]/myCount);
        myColor[cluster[value]]<- 1+cluster[value] - myCount*floor(cluster[value]/myCount);
        points(point[value,], col=myColor[cluster[value]], pch=myPch[cluster[value]]);
        lines(point[value,], col=myColor[cluster[value]], pch=myPch[cluster[value]]);}
    legend(xlim[1], ylim[2], somnames, cex=0.8, col=myColor, pch=myPch, horiz=myHoriz);
}

# taken from rpart package, because it does not export it anymore
## format a set of numbers using C's "g" format
rpart.formatg <- function(x, digits = getOption("digits"),
		format = paste0("%.", digits, "g"))
{
	if (!is.numeric(x)) stop("'x' must be a numeric vector")
	
	temp <- sprintf(format, x)
	if (is.matrix(x)) matrix(temp, nrow = nrow(x)) else temp
}
Any scripts or data that you put into this service are public.
CORElearn documentation built on April 4, 2025, 4:34 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
CORElearn
Classification, Regression and Feature Evaluation

R/rfVisualize.R
In CORElearn: Classification, Regression and Feature Evaluation

Defines functions rpart.formatg plotRFNorm plotRFMulti plotRFStats rfAttrEvalClustering getQuartils varNormalization classPrototypes rfOutliers rfClustering spaceScale getVarImportanceCluster rfProximity getRpart getRpartModel

Documented in classPrototypes getRpartModel rfAttrEvalClustering rfClustering rfOutliers rfProximity

Try the CORElearn package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CORElearn Classification, Regression and Feature Evaluation

R/rfVisualize.R In CORElearn: Classification, Regression and Feature Evaluation

Defines functions rpart.formatg plotRFNorm plotRFMulti plotRFStats rfAttrEvalClustering getQuartils varNormalization classPrototypes rfOutliers rfClustering spaceScale getVarImportanceCluster rfProximity getRpart getRpartModel

Documented in classPrototypes getRpartModel rfAttrEvalClustering rfClustering rfOutliers rfProximity

Try the CORElearn package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CORElearn
Classification, Regression and Feature Evaluation

R/rfVisualize.R
In CORElearn: Classification, Regression and Feature Evaluation