classif.svm_pruebas_type_1vsAll.R
In fda.usc: Functional Data Analysis and Utilities for Statistical Computing

# For multiclass-classification with k levels, k>2, libsvm uses the ‘one-against-one’-approach, in which k(k-1)/2 binary classifiers are trained; the appropriate class is found by a voting scheme.

wlda2<-function (x, grouping, weights = rep(1, nrow(x)), 
                 method = c("unbiased", "ML"), ...) 
{
  if (is.null(dim(x))) 
    stop("'x' is not a matrix")
  x <- data.matrix(x)
  if (any(!is.finite(x))) 
    stop("infinite, NA or NaN values in 'x'")
  n <- nrow(x)
  if (n != length(weights)) 
    stop("nrow(x) and length(weights) are different")
  if (any(weights < 0)) 
    stop("weights have to be larger or equal to zero")
  if (all(weights == 0)) 
    stop("all weights are zero")
  names(weights) <- rownames(x)
  if (n != length(grouping)) 
    stop("'nrow(x)' and 'length(grouping)' are different")
  x <- x[weights > 0, , drop = FALSE]
  grouping <- grouping[weights > 0]
  w <- weights[weights > 0]
  n <- nrow(x)
  if (!is.factor(grouping)) 
    warning("'grouping' was coerced to a factor")
  g <- as.factor(grouping)
  lev <- lev1 <- levels(g)
  counts <- as.vector(table(g))
  if (any(counts == 0)) {
    empty <- lev[counts == 0]
    warning(sprintf(ngettext(length(empty), "group %s is empty or weights in this group are all zero", 
                             "groups %s are empty or weights in these groups are all zero"), 
                    paste(empty, collapse = ", ")), domain = NA)
    lev1 <- lev[counts > 0]
    g <- factor(g, levels = lev1)
    counts <- as.vector(table(g))
  }
  if (length(lev1) == 1L) 
    stop("training data from only one group given")
  method <- match.arg(method)
  class.weights <- tapply(w, g, sum)
  prior <- class.weights/sum(w)
  print(w)
  print(prior)
  print(class.weights)
  print("a")
  ng <- length(prior)
  names(counts) <- lev1
  xwt <- w * x
  print(dim(x))
  print(dim(xwt))
  center <- t(matrix(sapply(lev1, function(z) colSums(xwt[g == 
                                                            z, , drop = FALSE])), ncol = ng, dimnames = list(colnames(x), 
                                                                                                             lev1)))/as.numeric(class.weights)
  z <- x - center[g, , drop = FALSE]
  cov <- crossprod(w * z, z)/sum(w)
  
  if (method == "unbiased") {
    norm.weights <- w/class.weights[g]
    cov <- cov/(1 - sum(prior * tapply(norm.weights^2, g, 
                                       sum)))
  }
  cl <- match.call()
  cl[[1L]] <- as.name("wlda")
  return(structure(list(prior = prior, counts = counts, means = center, 
                        cov = cov, lev = lev, N = n, weights = weights, method = method, 
                        call = cl), class = "wlda"))
}
# https://github.com/rforge/locclass/tree/master/locClassMlr
#install.packages("mlbench")
library(mlbench)
data(PimaIndiansDiabetes)

train <- sample(nrow(PimaIndiansDiabetes), 500)

# weighting observations from classes pos and neg according to their
# frequency in the data set:
ws <- as.numeric(1/table(PimaIndiansDiabetes$diabetes)
                 [PimaIndiansDiabetes$diabetes])
sum(ws[train])
fit <- locClass:::wlda.default(data.matrix(PimaIndiansDiabetes[train,-9]),
                               PimaIndiansDiabetes[train,"diabetes"],
            , weights = ws[train])

fit2 <- wlda2(data.matrix(PimaIndiansDiabetes[train,-9]),
              PimaIndiansDiabetes[train,"diabetes"],
              , weights = rev(ws[train]))
pred <- predict(fit, newdata = PimaIndiansDiabetes[-train,-9])
pred2 <- predict(fit2, newdata = PimaIndiansDiabetes[-train,-9])
ytest<-  PimaIndiansDiabetes[-train,9]
mean(pred$class==ytest)
mean(pred2$class==ytest)
pred$posterior[1:11,];pred2$posterior[1:11,]
pred$posterior[1:11,]-pred2$posterior[1:11,]


install.packages("flexmix")
install.packages("locClass", repos="https://R-Forge.R-project.org")
library(locClass)
?wlda
# Hand, D. J., Vinciotti, V. (2003), Local versus global models for classification problems: Fitting models where it matters, The American Statistician, 57(2) 124–130.
?dalda

#' @export classif.gbm
classif.gbm=function(formula, data, basis.x=NULL, 
                              weights = "equal", type = "1vsall", ...) 
{
  rqr <- "randomForest"
  if (!(rqr %in% rownames(installed.packages()))) {
    stop("Please install package 'randomForest'") }
  
  #require(eval(rqr)[1], quietly = TRUE, warn.conflicts = FALSE)
  suppressWarnings(rqr2<-require(eval(rqr), 
                                 character.only = TRUE,quietly = TRUE, 
                                 warn.conflicts = FALSE))
  if (!rqr2) 
    stop("Please, load the namespace of the package for  method")
  
  prob=0.5
  C <- match.call()  
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula","data","basis.x","weights","type"), names(mf), 0L)
  mf <- mf[c(1L, m)]
  mf$drop.unused.levels <- TRUE
  #mf[[1L]] <- quote(stats::model.frame)
  #mf <- eval(mf, parent.frame())
  #if (method == "model.frame")     return(mf)
  tf <- terms.formula(formula)
  terms <- attr(tf, "term.labels")
  nt <- length(terms)
  if (attr(tf, "response") > 0) {
    response <- as.character(attr(tf, "variables")[2])
    pf <- rf <- paste(response, "~", sep = "")
  } else pf <- rf <- "~"
  vtab<-rownames(attr(tf,"factors"))
  vnf=intersect(terms,names(data$df))
  # vnf2=intersect(vtab[-1],names(data$df)[-1])
  vfunc2=setdiff(terms,vnf)
  vint=setdiff(terms,vtab)
  vfunc=setdiff(vfunc2,vint)
  off<-attr(tf,"offset")
  name.coef=nam=beta.l=list()
  y <- data$df[[response]]
  lev <- levels(y)
  prob2<-prob1 <- ny <- nlevels(y)
  
  # 2019/04/24
  out.func <- fdata2model(vfunc,vnf,response, data, basis.x,pf=pf,tf=tf)  
  pf <- out.func$pf          
  basis.x <- out.func$basis.x
  XX <- out.func$XX
  vs.list <- out.func$vs.list
  mean.list=out.func$mean.list
  rm(out.func)
  n <- NROW(XX)
  
  par.method <- as.list(substitute(list(...)))[-1L]
  if (weights[1] =="equal") 
    class.weights=NULL
  
  if (weights[1] == "inverse")   {
    #print("inverse")
    weights<-weights4class(y,type=weights)
    wtab<-tapply(weights,y,mean)
    class.weights<-wtab/sum(wtab)
    names(class.weights)<- lev
  }
  
  if (any(weights < 0)) 
    stop("negative weights not allowed")
  
  if (length(weights)==ny){
    #print("weights como clas lev")
    class.weights <- weights
    names(class.weights) <- lev
  } 
  # if (is.null(names(class.weights)))     names(class.weights) <- lev
  if (!is.null(par.method$classwt)) 
    class.weights=classwt
  
  par.method <- c(list(formula=pf, data=XX,classwt=class.weights),par.method)
  #par.method<-c(list(formula=pf, data=XX),par.method)
  if (is.null(par.method$votes)) par.method$votes=TRUE
  
  
  out<-list()
  out$formula.ini=formula
  out$data=data
  out$XX=XX
  out$C <- C[1:2]
  
  
  if (type == "majority" |  ny==2){
    z=do.call("randomForest",par.method)
    out$fit<-z
    out$group.est = z$predicted
    out$prob.group <- z$votes
    
  }    else { # One vs Other
    # 2019/08/30
    prob.group<-matrix(NA,n,ny)
    colnames(prob.group)<-lev
    z<-list()
    for (i in 1:ny) {
      igroup  <- y==lev[i]
      newy<-ifelse(igroup, 0,1)
      par.method$data[,response]<-factor(newy)
      newx<- par.method$data
      z[[i]] <-  do.call("randomForest",par.method)
      prob.group[,i] <- z[[i]]$votes[,1]
    }
    out$prob.group <- prob.group
    out2glm<-classifKgroups(y,prob.group,lev) # hacer una par prob<0 y >0
    out$group.est = out2glm$yest
    out$fit <- z
  }
  out$prob <- prob
  out$group <- y
  out$max.prob <- mean(y==out$group.est,na.rm=T) 
  out$fit <- z
  out$basis.x=basis.x
  out$mean=mean.list
  out$formula=pf
  out$vs.list=vs.list
  #out$method <- method
  #out$par.method <- par.method
  
  tab <- table(out$group.est,y)
  prob.group2 <- array(NA, dim = c(n, ny))
  prob.group2 <- prob.group2/apply(prob.group2, 1, sum)
  for (i in 1:ny) {
    prob1[i] = tab[i, i]/sum(tab[, i])
  }
  names(prob1) <- z$levels
  #colnames(prob.group) <- z$levels
  out$prob.classification <- prob1
  out$type <- type
  # If norm.votes=TRUE, the fraction is given, which can be taken as predicted probabilities for the classes.
  
  #out$group.pred <- out$group.est
  #class(out)<-c("classif",class(z))
  class(out)<-"classif"
  out
}

# install.packages("gbm")
library(gbm)

?gbm



#############################
ii<-c(1:150,191:210)
dat$df<-data.frame(glearn=phoneme$classlearn[ii])
dat$x<-phoneme$learn[ii,]
# plot(dat$x)
newdat<-list("df"=data.frame("glearn"=phoneme$classtest),"x"=phoneme$test)
tp<-"majority"
tp1<-"1vsall"
wt<-"inverse"
wt2<-"equal"
set.seed(1)
args(classif.multinom)
a5<-classif.multinom(glearn~x, data = dat, weights = wt2)

set.seed(1)
a6<-classif.nnet(glearn~x, data = dat, type=tp,weights = wt2)

a5$max.prob;a6$max.prob
a5$prob.classification;a6$prob.classification

names(a5)
a5$type
tp2<-"class"
p5 <- predict(a5,newdat,type=tp2) #no va probs!!
p5
p6<- predict(a6,newdat,type=tp2) #no va probs!!
a5$max.prob;a6$max.prob
a5$prob.classification;a6$prob.classification
mean(p5==newdat$df$glearn);mean(p6==newdat$df$glearn)


set.seed(1)
a6<-classif.rpart(glearn~x, data = dat, type=tp,weights="inverse")

set.seed(1)
a6<-classif.rpart(glearn~x, data = dat, type=tp,weights=c(1,1,1,1,1))

p5<- predict(a5,newdat);p6<- predict(a6,newdat)
a5$max.prob;a6$max.prob
a5$prob.classification;a6$prob.classification
mean(p5==newdat$df$glearn);mean(p6==newdat$df$glearn)


table(p5,a5$group.est)
mean(p5==dat$df$glearn)

a5<-classif.randomForest(glearn~x, data = dat, weights="equal",ntree=10)
set.seed(1)
p5<- predict(a5,dat)

p5<- predict(a5,dat,type="prob")
names(p5)
table(p5,a5$group.est)
a5$type
mean(p5==dat$df$glearn)

set.seed(1)
a5<-classif.randomForest(glearn~x, data = dat, weights="equal", type="majority")
set.seed(1)
p5<- predict.classif(a5,dat)
table(p5,a5$group.est)

####################################
a4<-classif.randomForest(glearn~x, data = dat, weights="equal")
a4$prob.group
a5<-classif.randomForest(glearn~x, data = dat, weights="equal", type="majority")
a5$prob.group
a4$max.prob;a5$max.prob

p5<- predict(a5$fit, newx = a3$XX[,-1])
table(p5,a5$group.est)


p4<- predict(a4,dat)
p5<- predict(a5,dat)
p6<- predict.classif(a5,dat)
a5$C;class(a5)
table(p5,a5$group.est)
table(p6,a5$group.est)
table(p6,p5)


a<- predict(a3$fit[[1]], newx = ,a3$fit[[1]]$x,type ='linear.predictor')
a<- predict(a3$fit[[1]], newx = a3$fit[[1]]$x,type ='votes')
a
####################################
#fdata2model<-fda.usc:::fdata2model
#weights4class<-fda.usc:::weights4class

 require(fda.usc)
data(phoneme)
mlearn<-phoneme[["learn"]]
glearn<-phoneme[["classlearn"]]
mtest<-phoneme[["test"]]
gtest<-phoneme[["classtest"]]
dataf<-data.frame(glearn)
dat=list("df"=dataf,"x"=mlearn)
a1<-classif.glm(glearn~x, data = dat)
a2<-classif.svm(glearn~x, data = dat)
#a3<-classif.ksvm(glearn~x, data = dat, weights="equal")
a4<-classif.lda(glearn~x, data = dat)
a5<-classif.lda(glearn~x, data = dat, weights=c(.1,.2,.2,.25,.25))
a4$max.prob
a5$max.prob
names(a4) 
names(a4$fit[[1]])
summary(a4)
a4$fit

class(a4$fit)

# 2019/08/29
# cbind(round(a2$fit$decision.values,3),a2$fit$fitted)[200:250,]
# 1/2    1/3    1/4    1/5    2/3    2/4    2/5    3/4    3/5    4/5  
# svm hace majority voting por defecto
# se incluye "1vsall" para que sea homogeneo con el resto
#################################
classifKgroups<-fda.usc:::classifKgroups

type="inverse" 
type="equal"
a1<-classif.glm(glearn~x, data = dat,weights=type)
a2<-classif.glm(glearn~x, data = dat,weights=type,type="majority")
a3<-classif.svm(glearn~x, data = dat,weights=type)
a4<-classif.svm(glearn~x, data = dat,weights=type,type="majority")

a1<-classif.lda(glearn~x, data = dat)
a2<-classif.lda(glearn~x, data = dat, weights="inverse")
a3<-classif.lda(glearn~x, data = dat, type="majority")

a1<-classif.qda(glearn~x, data = dat)
a2<-classif.qda(glearn~x, data = dat, weights="inverse")
a3<-classif.qda(glearn~x, data = dat, type="majority")
a4<-classif.lda(glearn~x, data = dat)


a4$group.est
a3$prob.classification 
a4$prob.classification 
a3$prob.group[1:3,]
a4$prob.group[1:3,]
a1$max.prob;a2$max.prob;a3$max.prob;a4$max.prob
newdat<-list("x"=phoneme$test)
ytest<-phoneme$classtest
ytrain<-dat$df$glearn
newdat<-dat;ytest<-ytrain

p1<-predict.classif(a1,newdat)
p2<-predict.classif(a2,newdat)
p3<-predict.classif(a3,newdat)
p4<-predict.classif(a4,newdat)

mean(p1==ytest);mean(p2==ytest);mean(p3==ytest);mean(p4==ytest);
mean(p1==ytrain);mean(p2==ytrain);mean(p3==ytrain);


newdat<-list(df=data.frame(glearn=phoneme$classlearn),"x"=phoneme$test)
ytest<-phoneme$classtest
ytrain<-phoneme$classtest

a1<-classif.qda(glearn~x, data = newdat)
a2<-classif.qda(glearn~x, data = newdat, weights=c(.3,.3,.2,.1,.1))

p1<-predict(a1$fit[[1]],a1$XX)$class
p2<-predict(a2$fit[[1]],a2$XX)$class
p3<-predict.classif(a1,newdat)
mean(p1==p2)
mean(p1==ytest);mean(p2==ytest)
mean(p1==ytrain);mean(p2==p3)
table(p1,p2)
table(p2,p3)
#################################
Iris <- data.frame(rbind(iris3[,,1], iris3[,,2], iris3[,,3]),
                   Sp = rep(c("s","c","v"), rep(50,3)))
train <- sample(1:150, 75)
table(Iris$Sp[train])
## your answer may differ
##  c  s  v
## 22 23 30
z <- lda(Sp ~ ., Iris, prior = c(1,1,1)/3, subset = train)
p<-predict(z, Iris[train, ])$class

names(Iris)
Iris2<-Iris[1:100,]
Iris2$Sp2<-as.numeric(Iris2$Sp)-1
z2 <- gbm(formula("Sp2 ~ Sepal.L.+Sepal.W.+Petal.L.+Petal.W."), data=Iris2,distribution="bernoulli")
z2
summary(z2)

predict.gbm(z2, Iris2,n.trees=10, single.tree=F )>0
predict.gbm(z2, Iris2,n.trees=10, single.tree=F,type="response" )>0.5

install.packages("xgboost")
library(xgboost)
?xgboost


install.packages("h2o")
library(h2o)
#https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
#cuenta de usuario (como aemet!)
h2o.init()
#For H2O package documentation, ask for help:
?h2o

h2o.no_progress()
h2o.init(max_mem_size = "5g")

names(Iris2)
y <- "Sp2"
x <- setdiff(names(Iris2), y)

# turn training set into h2o object
train.h2o <- as.h2o(Iris2)

# training basic GBM model with defaults
h2o.fit1 <- h2o.gbm(
  x = x,
  y = y,
  training_frame = train.h2o,
  nfolds = 5
)

# assess model results
(h2o.fit1)


# training basic GBM model with defaults
h2o.fit2 <- h2o.gbm(
  x = x,
  y = y,
  training_frame = train.h2o,
  nfolds = 5,
  ntrees = 5000,
  stopping_rounds = 10,
  stopping_tolerance = 0,
  seed = 123
)

# model stopped after xx trees
h2o.fit2@parameters$ntrees

# cross validated RMSE
h2o.rmse(h2o.fit2, xval = TRUE)




#split <- h2o.splitFrame(train.h2o, ratios = 0.75)
#train <- split[[1]]
#valid <- split[[2]]

train <- train.h2o
valid <- train.h2o

# create hyperparameter grid
hyper_grid <- list(
  max_depth = c(1, 3, 5),
  min_rows = c(1, 5, 10),
  learn_rate = c(0.01, 0.05, 0.1),
  learn_rate_annealing = c(.99, 1),
  sample_rate = c(.5, .75, 1),
  col_sample_rate = c(.8, .9, 1)
)

# perform grid search 
grid <- h2o.grid(
  algorithm = "gbm",
  grid_id = "gbm_grid1",
  x = x, 
  y = y, 
  training_frame = train,
  validation_frame = valid,
  hyper_params = hyper_grid,
  ntrees = 5000,
  stopping_rounds = 10,
  stopping_tolerance = 0,
  seed = 123
)

# collect the results and sort by our model performance metric of choice
grid_perf <- h2o.getGrid(
  grid_id = "gbm_grid1", 
  sort_by = "mse", 
  decreasing = FALSE
)
grid_perf
proc.time()


#' \dontrun{
#' require(fda.usc)
#' data(phoneme)
#' mlearn<-phoneme[["learn"]]
#' glearn<-phoneme[["classlearn"]]
#' mtest<-phoneme[["test"]]
#' gtest<-phoneme[["classtest"]]
#' dataf<-data.frame(glearn)
#' dat=list("df"=dataf,"x"=mlearn)
#' a1<-classif.glm(glearn~x, data = dat)
#' a2<-classif.svm(glearn~x, data = dat)
#' a3<-classif.svm2(glearn~x, data = dat)
#' newdat<-list("x"=mtest)
#' p1<-predict.classif(a1,newdat)
#' table(gtest,p1)
#' sum(p1==gtest)/250
#' }
#' @export


rp
 
 data(tecator)
 ab=tecator$absorp.fdata
 ab1=fdata.deriv(ab,nderiv=1)
 ab2=fdata.deriv(ab,nderiv=2)
 gfat<-cut(tecator$y$Fat,breaks=3)

 dat1<-data.frame(y=gfat,x=ab$data[,c(1,50,100)])
 
 a0<-rpart(y~.,data=dat1)
 predict(a0,dat1,type="prob")
 
 
 a0<-svm(y~.,data=dat1,probability=T,fitted=T)
 predict(a0,dat1,probability=T,decision.values=T)
 
 
 a4<- weighted.ksvm(y=gfat,x=ab$data[,c(1,50,100)],weights=rep(1,len=length(gfat)))
 names(a4)
 range(predict(a4,ab$data[,c(1,50,100)],type="benefit.score"))
 
 
 a1<-ksvm(y~.,dat1,prob.model=T)
 
 predict(a1,dat1,type = 'probabilities')
 predict(a1,dat1,type = 'votes')
 
 
 a0$decision.values[1:11,]
 
table(a0$fitted,gfat)
Any scripts or data that you put into this service are public.
fda.usc documentation built on Oct. 17, 2022, 9:06 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
fda.usc
Functional Data Analysis and Utilities for Statistical Computing

inst/script/classif.svm_pruebas_type_1vsAll.R
In fda.usc: Functional Data Analysis and Utilities for Statistical Computing

Try the fda.usc package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fda.usc Functional Data Analysis and Utilities for Statistical Computing

inst/script/classif.svm_pruebas_type_1vsAll.R In fda.usc: Functional Data Analysis and Utilities for Statistical Computing

Try the fda.usc package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fda.usc
Functional Data Analysis and Utilities for Statistical Computing

inst/script/classif.svm_pruebas_type_1vsAll.R
In fda.usc: Functional Data Analysis and Utilities for Statistical Computing