R/predict.R In mathurshikhar/visualisation: Visualisation

```library(randomForest)
library(ggplot2)
library(gridExtra)
library(corrplot)
library(caret)
library(tree)

forest\$Id<- NULL
soil <- forest[ ,c(15:54)]
area <- forest[,c(11:14)]
forest <- forest[,c(-15:-54, -11:-14)]
fact <- factor(apply(soil, 1, function(x) which(x == 1)), labels = c(1:38))
forest\$Soil_Type <- as.integer(fact)
fact2 <- factor(apply(area, 1, function(x) which(x == 1)), labels = c(1:4))
forest\$Wilderness_Area <- as.integer(fact2)
forest<- forest[ ,c(1:10,12,13,11)]
forestTrain<-forest

set.seed(1)
forest1<- forest[runif(dim(forest)[1]) > 0.8, ]
forest1\$Id <- NULL

#Remove columns with zero variance
sub = apply(forest1[,-56], 2, function(col) all(var(col) !=0 ))
forestSub<- forest1[,sub]
n<- dim(forestSub)
set.seed(1)
split <- runif(dim(forestSub)[1]) > 0.2
train <- forestSub[split,]
test <- forestSub[!split,]

#Tree prediction
train1<- train
test1<- test
tree.forests = tree(factor(Cover_Type) ~., data = train1)
plot(tree.forests)
text(tree.forests, cex=1.3)
tree.prediction = predict(tree.forests, test1[,-13], type='class')
sa <- data.frame(cover=test[,13], pred=tree.prediction)

#Use randomForest for prediction
rf <- randomForest(factor(Cover_Type) ~ ., train, mtry=12, ntree=1000)
predictions <- predict(rf, test)
pred <- data.frame(Cover_Type=test\$Cover_Type, Prediction=predictions)
rownames(pred)=NULL

#Next step. After training modelmove on to General case. Test set prediction
forestTest\$Id<- NULL
soil<- forestTest[ ,c(15:54)]
area<- forestTest[,c(11:14)]
forest<- forestTest[,c(-15:-54, -11:-14)]
Newfactor <- factor(apply(soil, 1, function(x) which(x == 1)), labels = c(1:40))
forestTest\$Soil_Type<- as.integer(Newfactor)
Newfactor2 <- factor(apply(area, 1, function(x) which(x == 1)), labels = c(1:4))
forestTest\$Wilderness_Area<- as.integer(Newfactor2)
forestTest<- forestTest[ ,c(1:10,56,55)]

#Remove columns witt zero variance
sub = apply(forestTest, 2, function(col) all(var(col) !=0 ))
TestSub<- forestTest[,sub]
n<- dim(TestSub)

#Use our previous data set "forest" as training set.
forestTest\$Id <- NULL
forestTrain\$Id<- NULL
#forestTrain\$Cover_Type<- as.factor(forestTarin\$Cover_Type)

#Use randomForest for prediction
rf1 <- randomForest(factor(Cover_Type) ~ ., mtry = 12,ntrees=1000, importance = TRUE,forestTrain)
predictions <- predict(rf1, TestSub)

Forest_Cover_Type <- data.frame(predictions)
names(Forest_Cover_Type) <- "Forest_Cover_Type"
forestTest <- cbind(forestTest, Forest_Cover_Type)
write.csv(forestTest, file = "../Data/out.csv")
```
mathurshikhar/visualisation documentation built on May 21, 2019, 12:55 p.m.