R/ubSMOTE.R

ubSMOTE <-
function(X,Y,perc.over=200,k=5,perc.under=200,verbose=TRUE){
  
  if(!is.factor(Y)) 
    stop("Y has to be a factor")
  if(is.vector(X)) 
    stop("X cannot be a vector")  
  
  data<-cbind(X,Y)
  id.1 <- which(Y == 1)
  
  time<-system.time({
    # generate synthetic cases from these id.1
    newExs <- ubSmoteExs(data[id.1,],"Y",perc.over,k)   
  })
#   if(verbose)
#     cat("Time SMOTE:",round(as.numeric(time["elapsed"]),digits=2),"; perc.over",perc.over,"; perc.under",perc.under,"; k",k,"\n")
  
  row.has.na<-function(X)
    return(apply(X,1,function(x){any(is.na(x))}))
  
  row.is.na<-row.has.na(newExs)
  
  if(any(row.is.na)) {
    newExs<-newExs[!row.is.na, ]
    colnames(newExs)<-colnames(data)
    cat("WARNING: NAs generated by SMOTE removed \n")
  }
  
  # get the undersample of the "majority class" examples
  selMaj <- sample((1:NROW(data))[-id.1],
                   as.integer((perc.under/100)*nrow(newExs)),
                   replace=T)
  
  # the final data set (the undersample + the rare cases + the smoted exs)
  newdataset <- rbind(data[selMaj,],data[id.1,],newExs)
  #shuffle the order of instances
  newdataset<-newdataset[sample(1:NROW(newdataset)), ]
  
  X<-newdataset[ ,-ncol(newdataset)]
  Y<-newdataset[ ,ncol(newdataset)]
  
  return(list(X=X,Y=Y))
}

Try the unbalanced package in your browser

Any scripts or data that you put into this service are public.

unbalanced documentation built on May 2, 2019, 7:01 a.m.