R/exercise4.R

library(mlr)
data(iris)

#-----------------exercise 2
#2
#learner:A learner in mlr is generated by calling makeLearner.
# set hyperparameters.2)control the output for later prediction
#3)set an I to name the object
#task
#Learning tasks encapsulate the data set and further relevant information
#about a maschine learning problem, for example the name of the target variable for supervised problems

#how can I access details from a trained model
#4 plotLearnerPrediction()
#5 How can I get an overview on available performance measures
#6 How can I access the details of such a measure
#7 what is k-fold cross validation
#8 makeResampleDesc()
#9 makeParamSet()
#10 makeTuneControl*()
#11 What does the benchmark function do



#---------------Exercise 3
# 3-1-1
# fit a regression tree on the Boston Housing data with the learner
#"regr.rpart." Hint: The BostonHousing task comes with mlr and
# called bh.task

library(mlr)
data(BostonHousing,package = "mlbench")
bh.task = makeRegrTask(data = BostonHousing,target = "medv")
lrn = makeLearner("regr.rpart")
fit = train(lrn,bh.task)
fit

#recursive partitioning
#mse mean squared error
#regr.gbm gradient boosting machine
#n = getTaskSize(bh.task)
#train.set = seq(1,n,by = 2)
#test.set = seq(2,n,by = 2)
#(rpart) classification tree
#fit = train(regr.lrn,bh.task,subset = train.set)

#task.pred = predict(fit,task=bh.task,subset=test.set)
#head(as.data.frame(task.pred))
#plotLearnerPrediction(regr.lrn,task = bh.task)

#names(fit)
#3-2
#extract the rpart model
#getLearnerModel(fit)
models = getLearnerModel(fit,more.unwrap = TRUE)
library(rpart.plot)
rpart.plot(models,digits=2,varlen=0)
#rm : average number of rooms per dwelling
#Istat: percentage of lower status of the population
# crime :per capita crime rate by town
# dis weighted distances to five Boston employment centres
#4 features
#3-1-3
#Prune the treee to a depth of 3 with prune()
# and plot it again. which variable survived the pruning
#getDefaultMeasure(makeLearner("regr.lm"))
#str(mmce)
#plot(prune(dend , c("Alaska", "California")), main = "tree without Alaska and California")
rpart.plot(prune(models,cp=3))

#3-2
#1
data(Ionosphere,package = "mlbench")
Ionosphere

removeConstantFeatures(Ionosphere,perc = 0)

# 3-2
classif.task = makeClassifTask(id="Ionosphere",data=Ionosphere,target="Class")
discrete_ps = makeParamSet(
  makeNumericParam("n.trees",lower = 100, upper= 5000,trafo = function(x) 10^x ),
  makeNumericParam("interaction.depth",lower=1,upper=4,trafo = function(x) 10^x),
  makeNumericParam("shrinkage",lower=0,upper=1,trafo = function(x) 10^x)
)
ctrl = makeTuneControlRandom(maxit=50L)
rdesc = makeResampleDesc("CV",iters=3L)
res = tuneParams("classif.gbm",classif.task,resampling = rdesc,
                 par.set =discrete_ps,control = ctrl)



#--------------------------------4
#1
diabete = read.csv(file = "data/train.csv")
classif.task = makeClassifTask(data =diabete,target = "diabetes")
lrns = list(makeLearner("classif.lda"),makeLearner("classif.rpart"))
rdesc = makeResampleDesc("Holdout")
bmr = benchmark(lrns,classif.task,rdesc)
getBMRPredictions(bmr)

#2
discrete_ps = makeParamSet(
  makeNumericParam("tol", lower = 0, upper = 0.0001)
)
ctrl = makeTuneControlGrid()
rdesc = makeResampleDesc("CV", iters = 3L)
res1 = tuneParams("classif.lda", task = classif.task, resampling = rdesc,
                 par.set = discrete_ps, control = ctrl)
#getParamSet("classif.lda")


#3
library(parallelMap)
parallelStartSocket(2)
rdesc = makeResampleDesc("CV", iters = 3)
r = resample("classif.lda", classif.task, rdesc)
detectCores()
parallelStop()
library(parallel)
detectCores()
#4
perf = getBMRPerformances(bmr, as.df = TRUE)
write.csv(perf,file="data/perf.csv")
head(perf)


performance(perf,measures = timetrain,model = mode)


base.learners = list(
  makeLearner("classif.ksvm"),
  makeLearner("classif.randomForest")
)
lrn = makeModelMultiplexer(base.learners)
haijunXue/mrr documentation built on May 28, 2019, 8:54 p.m.