app.R
In mathurshikhar/visualisation: Visualisation

library(shiny)
library(plotly)
library(gridExtra)
library(randomForest)

forestCover <- read.csv("Data/out.csv")
names1 <- names(forestCover)
forest <- read.csv("Data/train.csv")

#Taken from predict.r
forest$Id <- NULL
soil <- forest[ ,c(15:54)]
area <- forest[,c(11:14)]
forest <- forest[,c(-15:-54, -11:-14)]
Newfactor <- factor(apply(soil, 1, function(x) which(x == 1)), labels = c(1:38))
forest$Soil_Type <- as.integer(Newfactor)
Newfactor2 <- factor(apply(area, 1, function(x) which(x == 1)), labels = c(1:4))
forest$Wilderness_Area <- as.integer(Newfactor2)
forest <- forest[ ,c(1:10,12,13,11)]
head(forest)
forestTrain <-forest
forest1 <- forest[runif(dim(forest)[1]) > 0.8, ]
forest1$Id <- NULL
sub = apply(forest1[,-56], 2, function(col) all(var(col) !=0 ))
forestSub <- forest1[,sub]
n <- dim(forestSub)
set.seed(1)
split <- runif(dim(forestSub)[1]) > 0.2
train <- forestSub[split,]

rf <- randomForest(factor(Cover_Type) ~ ., train, mtry=12, ntree=1000)
Importance <- rf
  
pfc <- list(
  title = "Predicted Forest Cover Type"
)

ui <- fluidPage(

  headerPanel("Forest Cover Data Visualisation"),


    sidebarPanel(
     selectInput('x', 'X', choices = names1, selected = "Soil_Type"),
     tags$hr(),
     selectInput('y', 'Y', choices = names1, selected = "Elevation"),
     tags$hr(),
     selectInput('color', 'Color', choices = names1, selected = "Forest_Cover_Type"),
     tags$hr(),
     sliderInput('sampleSize', 'Sample Size', min = 1, max = 2000,
              value = 1000, step = 500, round = 0)),

  mainPanel(
     plotlyOutput('trendPlot', height = "650px", width = "850px")

),
  fluidRow(column(7, offset = 2,plotlyOutput('hist'))),

  fluidRow(column(5, plotOutput("boxplot")), column(5, plotOutput("impPlot"))),
         plotOutput("grid"))


server <- function(input, output) {

  
  dataset <- reactive({
    forestCover[sample(nrow(forestCover),  input$sampleSize),]
  })

  output$trendPlot <- renderPlotly({

    # Build a dynamic ggolot graph
    p <- ggplot(dataset(), main = "Brush and double click to zoom", aes_string(x = input$x, y = input$y, color = input$color)) +
      geom_point()
      ggplotly(p)
  })

 #Build a histogram comparing different Forest Cover types
  output$hist <- renderPlotly({
  p <-  plot_ly(forestCover, x = ~Forest_Cover_Type, type = "histogram") %>%  layout(xaxis = pfc)
  p
 })
 #Build a boxplot of data available in the train dataset
 output$boxplot <- renderPlot({
   boxplot(forest[,c(-7,-8,-9,-11,-12,-13)], las=3, par(mar = c(15, 4, 2, 2)), col="darkseagreen4", main="General Analysis", names = c("Elevation", "Aspect", "Slope", "Hor. Dist to Hydrology", "Ver. Dist to Hydrology", "Hor. Dist to Roadways", "Hor. Dist to Fire Points"))
   theme_set(theme_gray(base_size = 20))
 })
 #Create a varImpPlot from the train dataset showing the importance of each factor
 output$impPlot <- renderPlot({
   varImpPlot(Importance, col="darkblue", pch=19)

 })
 #Build a grid of plots showing
 output$grid <- renderPlot({
   g1<- ggplot(forest, aes(Elevation, color = factor(Cover_Type), fill = factor(Cover_Type))) + geom_density(alpha = 0.2)
   g2<- ggplot(forest, aes(Aspect, color = factor(Cover_Type), fill = factor(Cover_Type))) + geom_density(alpha = 0.2)
   g3<- ggplot(forest, aes(Horizontal_Distance_To_Roadways, color = factor(Cover_Type), fill = factor(Cover_Type))) + geom_density(alpha = 0.2)
   g4<- ggplot(forest, aes(Horizontal_Distance_To_Fire_Points, color = factor(Cover_Type), fill = factor(Cover_Type))) + geom_density(alpha = 0.2)
   grid.arrange(g1, g2,g3,g4, ncol=2,nrow=2)

 })
}
shinyApp(ui, server)