knitr::opts_chunk$set(echo = F, warning = F, message = F, eval = T)
library(knitr)
library(ISLR)
library(tabplot)
library(tidyverse)
library(tabplot)
library(randomForest)
library(stringr)
library(GGally)
library(forcats)
library(caret)
library(ggcorrplot)
library(Amelia)
library(e1071)

# setwd("C:/Users/erbla/OneDrive/R/multiview shiny")



source( stringr::str_c(getwd(),'/mod_load_data v01.R') )

source( stringr::str_c(getwd(),'/mod_summary v01.R') )

source( stringr::str_c(getwd(), '/mod_clean_data v01.R') )

Data

Load Data

# load two ui_elements and the save_plot() function
# and the rea_load reactive element which returns 
# the data



mod_load_data_ui()

rea_load = mod_load_rea(input)
print('i ran')

# f_data  = function(params) {
#   
#   return(params$data)
#   
# }

rea_load = function(){

  temp_data
}

Summary

mod_summary_ui(rea_clean)

Clean Data

mod_clean_ui(rea_load)

rea_clean = mod_clean_rea(input, rea_load)

rea_ana   = mod_ana_rea(input, rea_clean)

Analysis

Group Statistics

renderUI({

  d_clean = rea_ana()
  categoricals = d_clean$categoricals

  inputPanel(

  selectInput("group_stats", label = "select grouping variable",
              choices = categoricals, selected = categoricals[1])
  )

})

f_bring_to_pos_range = function(x){


  if( min(x)< 0) x = x + abs(min(x))


  return(x)
}

f_diff_of_means_medians = function(df, group, variable){


  data = df %>%
    select( group = one_of(group), variable = one_of(variable) )%>%
    mutate ( variable = f_bring_to_pos_range(variable) ) %>%
    group_by( group ) %>%
    summarise( means     = mean(variable,   na.rm = T)
               , medians = median(variable, na.rm = T) ) %>%
    ungroup() %>%
    summarise( diff_of_means        = max(means) - min(means)
               , diff_of_means_perc = ( ( max(means) - min(means) ) /max(means) ) *100
               , diff_of_medians      = max(medians) - min(medians)
               , diff_of_medians_perc = ( ( max(medians) - min(medians) ) /max(means) ) *100    )


  return(data)
}

f_max_diff_of_freq = function(df, var1, var2){

  t = table( df[[var1]], df[[var2]] ) %>%
    as_tibble() %>%
    group_by( Var1 ) %>% 
    mutate ( diff_var1          = ( max(n)-min(n) ) 
               , diff_var1_perc = ( ( max(n)-min(n))/ max(n) *100) 
             ) %>%
    group_by( Var2 ) %>%
    mutate ( diff_var2          = ( max(n)-min(n) ) 
               , diff_var2_perc = ( ( max(n)-min(n))/ max(n) *100) 
              ) %>%
    ungroup()%>%
    summarise( max_diff_freq          = max( c(diff_var1, diff_var2) )
               , max_diff_freq_perc  = max( c(diff_var1_perc, diff_var2_perc) ) 
    )
}

f_anova_stats = function(df, group, variables) {
  # returns a dataframe with anova stats
  # df        : dataframe
  # group     : grouping variable as character vector, must indicate factor variable
  # variables : numerical variables to be analyzed as character vector, must indicate numerical variable

  data = df

  formula = stringr::str_c('value~',group) %>%
    as.formula()

  df_anova = data %>%
    as_tibble() %>%
    select( one_of( c(group, variables) ) ) %>%
    gather(key = 'variable', value = 'value', one_of( variables ) ) %>%
    group_by( variable ) %>%
    nest( one_of(group), value) %>%
    mutate( model_anova     = purrr::map( data, ~aov( formula, data = .)) 
            , summary_anova = purrr::map( model_anova, summary)
            , summary_anova = purrr::map( summary_anova
                                          , function(x) x[[1]])
            , anova_pval    = purrr::map(summary_anova, 'Pr(>F)')
            , anova_pval    = purrr::map_dbl(anova_pval
                                             , function(x) x[1])
            , model_kruskal = purrr::map( data, ~kruskal.test(formula, data = .) )
            , kruskal_pval  = purrr::map_dbl(model_kruskal, 'p.value')
            , model_shapiro = purrr::map(data
                                      , function(x) shapiro.test(sample(x$value, 5000, replace = T) ) )
            , shapiro_stat  = purrr::map_dbl(model_shapiro,'statistic')
            , shapiro_pval  = purrr::map_dbl(model_shapiro,'p.value') 
            , diff_df       = purrr::map( data, f_diff_of_means_medians, group = group, variable = 'value') ) %>%
    unnest(diff_df) %>%
    select(variable
           , shapiro_stat
           , shapiro_pval
           , anova_pval
           , kruskal_pval
           , diff_of_means
           , diff_of_means_perc
           , diff_of_medians
           , diff_of_medians_perc
           )

  return(df_anova)
}

f_chi_square = function(df, group, variables) {
  # returns a dataframe with anova stats
  # df        : dataframe
  # group     : grouping variable as character vector, must indicate factor variable
  # variables : numerical variables to be analyzed as character vector, must indicate factor variable

  data = df

  variables = variables[!variables == group]

  if(purrr::is_empty(variables)) return()

  df_chi = data %>%
    as_tibble() %>%
    select( one_of( c(group, variables) ) ) %>%
    gather(key = 'variable', value = 'value', one_of( variables  ) ) %>%
    group_by( variable ) %>%
    nest( one_of( group ), value) %>%
    mutate(  model_chi = purrr::map( data, ~chisq.test(x = .[[group]]
                                                       , y = .[['value']]
                                                       ) )
            ,chi_pval  = purrr::map_dbl(model_chi, 'p.value')
            ,diff_df   = purrr::map(data, f_max_diff_of_freq, group, 'value')  ) %>%
    unnest(diff_df) %>%
    select(variable, chi_pval, max_diff_freq, max_diff_freq_perc)

  return(df_chi)
}

rea_group_stats = reactive({

  d_clean    = rea_ana()
  data       = d_clean$data
  boxcox     = d_clean$boxcox
  numericals = d_clean$numericals
  categoricals = d_clean$categoricals

  if(! is_empty(numericals)) {


    df_num    = f_anova_stats(df = data
                             ,group = input$group_stats
                             ,variables = numericals
                             )

    df_boxcox = f_anova_stats(df = data
                              ,group = input$group_stats
                              ,variables = boxcox
                              )

    sig_num = df_num %>%
      mutate( p_val = ifelse(shapiro_stat >= 0.9
                             , anova_pval
                             , kruskal_pval) 
              ) %>%
      filter(p_val <= 0.001) %>%
      .[['variable']]

    sig_boxcox = df_boxcox %>%
      mutate( p_val = ifelse(shapiro_stat >= 0.9
                             , anova_pval
                             , kruskal_pval) 
              ) %>%
      filter(p_val <= 0.001) %>%
      .[['variable']]


  } else {

    df_num     = NULL
    df_boxcox  = NULL
    sig_num    = NULL
    sig_boxcox = NULL

  }



  # only works with minimum two categoricals vars
  if( length(categoricals) >= 2 ){

    df_chi    = f_chi_square(df = data
                              ,group = input$group_stats
                              ,variables = categoricals
                             )

    sig_chi = df_chi %>%
      filter(chi_pval <= 0.001) %>%
      .[['variable']]


  } else{

    df_chi  = NULL
    sig_chi = NULL
  }

  return( list( df = list(df_num     = df_num
                          ,df_boxcox = df_boxcox
                          ,df_chi    = df_chi
                          )
                , sig = list(sig_num     = sig_num
                           ,sig_boxcox = sig_boxcox
                           ,sig_chi    = sig_chi
                           ) 
                ) 
          )

})

Numericals (ANOVA)

render_table_num_group_stat = function( boxcox = F) {

  DT::renderDataTable(
   extensions = 'Buttons'
   , options = list( dom       = 'Bftrip'
                    , buttons = c('copy', 'excel')
                    , pageLength = 1
                    )
  ,{

    group = input$group_stats

    if( boxcox == F ){ 

      df = rea_group_stats()$df$df_num

    }else{

      df = rea_group_stats()$df$df_boxcox

    }

      df = DT::datatable(df
                         , extensions = 'Buttons'
                         , options    = list( dom       = 'Bftrip'
                                              , buttons = c('copy', 'excel')
                                              )
                     ) %>%
      DT::formatStyle('shapiro_stat'
                      , color = DT::styleInterval(0.9, c('red','green') )
                      ) %>%
      DT::formatStyle(3:5
                      , color = DT::styleInterval(0.01, c('green','red') )
                      ) %>%
      DT::formatRound(2:5, 6) %>%
      DT::formatRound(c(6,8), 3) %>%
      DT::formatRound(c(7,9), 2)

  })

}

render_table_num_group_stat( boxcox = F )

Numericals boxcox (ANOVA)

render_table_num_group_stat( boxcox = T )

Categoricals (Chi-Square)

DT::renderDataTable({

  d_clean    = rea_ana()
  data       = d_clean$data
  boxcox     = d_clean$boxcox
  numericals = d_clean$numericals
  categoricals = d_clean$categoricals


  group = input$group_stats

  df = rea_group_stats()$df$df_chi 

  df = DT::datatable(df
                     , extensions = 'Buttons'
                     , options    = list( dom = 'Bftrip'
                                          , buttons = c('copy', 'excel')
                                          )
                     ) %>%
    DT::formatStyle('chi_pval'
                    , color = DT::styleInterval(0.01, c('green','red') )
                    ) %>%
    DT::formatRound( 2, 6)   %>%
    DT::formatRound( 4, 2)

  return(df)

})

Predictive Capacity

renderUI({

  d_clean = rea_ana()
  categoricals = d_clean$categoricals
  numericals   = d_clean$numericals

  inputPanel(

    selectInput("group_imp"
                , label = "select predicted variable"
                , choices = c(categoricals, numericals)
                , selected = categoricals[1])

  )

})

rea_imp = reactive({

  d_clean = rea_ana()
  data = d_clean$data
  all_variables = d_clean$all_variables

  form = as.formula( stringr::str_c(input$group_imp,'~.') )

  m = randomForest::randomForest(form, data[, all_variables] )

  imp = tibble( mean_decrease_gini = m$importance[,1]
                ,variable = names(m$importance[,1])
                ,group = input$group_imp) %>%
    arrange(desc(mean_decrease_gini))

  return(imp)

})

renderPlot({

  imp = rea_imp()

  p = ggplot(imp) +
    geom_bar(aes(x = fct_reorder(variable
                             , mean_decrease_gini) 
                , y = mean_decrease_gini
                , fill = fct_reorder(variable
                                 , mean_decrease_gini
                                 , .desc = T)
                )
            , stat = 'identity'
            , show.legend = F) +
        coord_flip() +
        labs( x = 'Variable'
              , y = 'Predictive capacity (Mean decrease in Gini)')


  name = stringr::str_c( input$sql_or_lib,'_predictive_capacity_', imp[[1,'group']] )


  save_plot(plot = p
            , path = input$path
            , name = name
            , yes = input$save
            )

  return(p)

})

Decision Tree

renderUI({

  d_clean = rea_ana()
  categoricals = d_clean$categoricals
  numericals   = d_clean$numericals

  inputPanel(

    selectInput("group_tree"
                , label    = "select predicted variable"
                , choices  = c(categoricals, numericals)
                , selected = categoricals[1]

                )

    , numericInput('min_split'
                  ,label = 'minimum node size to attempt split'
                  , min   = 2
                  , max   = 10000
                  , step  = 1
                  , value = 20
                  )

    , numericInput('max_depth'
                  , label = 'maximum tree depth'
                  , min   = 2
                  , max   = 30
                  , step  = 1
                  , value = 30
                  )

    , numericInput('cp'
                   ,label = 'minimum complexity reduction to attempt split'
                   , min   = 0.0001
                   , max   = 0.99
                   , step  = 0.001
                   , value = 0.01
                   )

    , numericInput('xval'
                   ,label = 'k-fold cross validation'
                   , min   = 0
                   , max   = 100
                   , step  = 1
                   , value = 10
                   )

  )

})

rea_tree = reactive({

  d_clean = rea_ana()
  data = d_clean$data

  pred = input$group_tree

  form = stringr::str_c(pred, '~.') %>%
    as.formula()

  m = rpart::rpart(form
                   , data
                   , minsplit = input$min_split
                   , cp       = input$cp
                   #, maxcompete = 4
                   #, maxsurrogate = 5
                   #, usesurrogate = 2
                   #, xval = 10
                   #, surrogatestyle = 0
                   , maxdepth = input$max_depth
                   , xval     = input$xval)

  return(m)

})


renderPlot({

  m = rea_tree()

  rpart.plot::prp(m
                  , branch.type   = 5
                  , box.palette   ="RdYlGn"
                  , faclen        = 0
                  , extra         = 6
                  , fallen.leaves = input$fallen_leaves
                  , tweak         = input$tweak
                  , gap           = input$gap
                  , space         = input$space
                  )

  name = stringr::str_c( input$sql_or_lib,'_tree1_', input$group_tree )

  save_plot(plot = p
            , path = input$path
            , name = name
            , yes = input$save
            , excel = F
            )
})

renderPlot({

  m = rea_tree()

  rpart.plot::rpart.plot(m
                  , fallen.leaves = T
                  , tweak         = input$tweak
                  , gap           = input$gap
                  , space         = input$space
                  )

  name = stringr::str_c( input$sql_or_lib,'_tree2_', input$group_tree )

  save_plot(plot = p
            , path = input$path
            , name = name
            , yes = input$save
            , excel = F
            )
})


inputPanel(

checkboxInput('fallen_leaves'
              , label = 'fallen_leaves'
              , value = T)

, numericInput('tweak'
              ,label = 'increase_text_size'
              , min   = 0.1
              , max   = 10
              , step  = 0.1
              , value = 1.2
              )

, numericInput('gap'
              , label = 'gap'
              , min   = 0
              , max   = 100
              , step  = 1
              , value = 2
              )

, numericInput('space'
              , label = 'space'
              , min   = 0
              , max   = 100
              , step  = 1
              , value = 2
               )
)

Principle Component Analysis

Plot Principle Components

rea_pca = reactive({

  d_clean       = rea_ana()
  data          = d_clean$data
  boxcox        = d_clean$boxcox
  categoricals  = d_clean$categoricals
  all_variables = d_clean$all_variables
  numericals    = d_clean$numericals

  if(is_empty(numericals) | is_empty(boxcox)) return()

  if(input$boxcox_pca == T) {

    vars = boxcox

  }
  else{

    vars = numericals
  }

  pca = prcomp(x = select(data, one_of(vars) )
               , scale. = input$scale_pca
               , center = input$center_pca)

  pca$cos2 = pca$rotation^2

  pca$contrib = lmap( as_tibble(pca$cos2 ), function(y) y/ sum(y) *100 )
  row.names(pca$contrib) = row.names(pca$cos2)

  pca$vae = as_tibble (t( pca$sdev / sum(pca$sdev) *100 ) )
  colnames(pca$vae) = colnames(pca$contrib)

  pca$contrib_abs_perc = t( t( apply( pca$contrib/100, 1, function(x,y) x*pca$vae ) ) )
  pca$contrib_abs_perc = unnest( as.data.frame( pca$contrib_abs_perc) )
  row.names(pca$contrib_abs_perc) = row.names(pca$contrib)

  pca$contrib_abs_perc = as.data.frame( t(pca$contrib_abs_perc) )
  pca$contrib_abs_perc$var = row.names(pca$contrib_abs_perc)

  pca$contrib_abs_perc = pca$contrib_abs_perc %>%
    gather(key = 'key', value = 'value', everything(), -var) 

  #group variables with less than 2.5% contribution
  pca$contrib_abs_perc_reduced = pca$contrib_abs_perc  %>%
    mutate(key = ifelse(value < 2.5
                        , ' sum contrib < 2.5%'
                        , key)) %>%
    group_by( var, key) %>%
    summarise( value = sum(value) )


  # filter principle components that explain less than
  # 2.5% of the variance

  pca$x                        = pca$x[, pca$vae > 2.5]

  pca$contrib_abs_perc         = pca$contrib_abs_perc %>%
    filter(var %in% colnames(pca$x))

  pca$contrib_abs_perc_reduced = pca$contrib_abs_perc_reduced %>%
    filter(var %in% colnames(pca$x))

  data = data %>%
    cbind(pca$x)

  return(list(data = data, pca = pca) )

})

renderUI({

  inputPanel(


     checkboxInput('center_pca', label = 'center', value = T)  

    , checkboxInput('scale_pca', label = 'scale', value = T)  

    , checkboxInput('boxcox_pca', label = 'boxcox', value = T)  



  )

})

Plot Principle Components

# rotation plot
renderPlot({

  data = rea_pca()$data
  pca  = rea_pca()$pca

  group  = input$group_pca
  x_axis = input$x_axis_pca
  y_axis = input$y_axis_pca


  if(input$group_pca == 'None') group = NULL 

  p = ggplot(data) +
    geom_point( aes_string(x = x_axis, y = y_axis, color = group)
                ,alpha=0.4 ) +
    labs(title = 'plot1')


  name = stringr::str_c( input$sql_or_lib,'_principle_component_')%>%
    stringr::str_c( input$group_pca ) %>%
    stringr::str_c('_boxcox', input$boxcox_pca) %>% 
    stringr::str_c('_scale',  input$scale_pca) %>% 
    stringr::str_c('_center', input$center_pca) 

  save_plot(plot = p
        , path = input$path
        , name = name
        , yes = input$save
        )

  return( p )

})

renderUI({

  d_clean       = rea_ana()
  d_pca         = rea_pca()
  data          = d_pca$data
  boxcox        = d_clean$boxcox
  categoricals  = d_clean$categoricals
  all_variables = d_clean$all_variables
  numericals    = d_clean$numericals

  prin_comp = select(data, starts_with('PC', ignore.case = F)) %>%
    names()

  inputPanel(


    selectInput("x_axis_pca"
                , label = "select x_axis"
                , choices = prin_comp 
                , selected = prin_comp[2]
                )

    , selectInput("y_axis_pca"
                , label = "select y_axis"
                , choices = prin_comp 
                , selected = prin_comp[1]
                )

    , selectInput("group_pca"
                , label = "select grouping variable"
                , choices = c(categoricals, 'None')
                , selected = categoricals[1]
                )

  )

})

Plot Variance Explained

# variance explained

renderPlot({

  pca = rea_pca()$pca

  vae = tibble( value = pca$sdev
                , pca_n = str_c('pca', 1:length(pca$sdev))
            ) 

  p =   ggplot(pca$contrib_abs_perc_reduced) +
    geom_bar(aes(x = fct_reorder(var, value, sum, .desc = T )
                 , y = value, fill = key)
             , stat = 'identity'
             , position='stack') +
    scale_fill_brewer(palette = 'Paired') 

  name = stringr::str_c( input$sql_or_lib,'_variance_explained_')%>%
    stringr::str_c( input$group_pca ) %>%
    stringr::str_c('_boxcox', input$boxcox_pca) %>% 
    stringr::str_c('_scale',  input$scale_pca) %>% 
    stringr::str_c('_center', input$center_pca)  

    save_plot(plot = p
          , path = input$path
          , name = name
          , yes = input$save
          )

  return(p)

})

Correlation

rea_corr = reactive({

  d_clean       = rea_ana()
  data          = d_clean$data
  boxcox        = d_clean$boxcox
  categoricals  = d_clean$categoricals
  all_variables = d_clean$all_variables
  numericals    = d_clean$numericals

  if(is_empty(boxcox)) return()

  corr = round( cor(data[, boxcox]), 1)

  p_val_corr = ggcorrplot::cor_pmat( data[, boxcox] )

  return( list(corr = corr,
               p_val_corr = p_val_corr) )

})

renderPlot(width = 1024, height = 1024, {

  corr = rea_corr()$corr
  p_val_corr = rea_corr()$p_val_corr

  p = ggcorrplot::ggcorrplot( corr
                              ,hc.order = TRUE
                              , type = "lower"
                              , lab = TRUE
                              , p.mat = p_val_corr
                              )

  name = stringr::str_c( input$sql_or_lib,'_correlation')

  save_plot(plot = p
        , path = input$path
        , name = name
        , yes = input$save
        )

  return(p)

})

Data Visualisation

Tabplot

preselect = c('all'
              ,'none'
              ,'categoricals'
              ,'numericals'
              ,'top_10_importance'
              ,'boxcox'
              ,'none_boxcox'
              ,'pc1+pc2'
              ,'pc1+pc2 contrib >2.5%'
              ,'correlation'
              ,'group_stat_P<0.001'
              ,'2fac_2num'
              )

renderUI({

  d_clean       = rea_ana()
  data          = d_clean$data
  boxcox        = d_clean$boxcox
  categoricals  = d_clean$categoricals
  all_variables = d_clean$all_variables
  numericals    = d_clean$numericals

  inputPanel(


      sliderInput('n_breaks_tab'
                  , label = "Number of bins:"
                  , min = 1
                  , max = 500
                  , step = 1
                  , value = 30)

    , selectInput("sort_col_tab"
                  , label = "Sort_by"
                  , choices = names(data) 
                  , selected = numericals[1])

    , checkboxInput('decreasing_tab'
                    , label = 'decreasing'
                    , value = T)  

    , sliderInput('max_levels_tab'
                  , label = "Maximum number of levels for categorical variables"
                  , min = 3
                  , max = 30
                  , step = 1
                  , value = 10)

    , selectInput("preselect", label = "preselect"
                  ,choices = preselect 
                  ,selected = 'top_10_importance')


    , selectInput("preselect_correlation", label = "select_correlation"
                  ,choices = boxcox 
                  ,selected = boxcox[1])



  )

})
renderPlot( {

  d_clean       = rea_ana()
  data          = d_clean$data
  boxcox        = d_clean$boxcox
  categoricals  = d_clean$categoricals
  all_variables = d_clean$all_variables
  numericals    = d_clean$numericals

  imp = rea_imp()

  p = tabplot::tableplot(data
                , select_string = input$select_vars_tab
                , sortCol       = input$sort_col_tab
                , decreasing    = input$decreasing_tab
                , nBins         = input$n_breaks_tab
                , max_levels    = input$max_levels_tab
                , scales = 'lin')

  index_str = stringr::str_c( which(all_variables %in% input$select_vars_tab)
                              , collapse = '+'  )

  name = stringr::str_c( input$sql_or_lib,'_tabplot_')%>%
  stringr::str_c('sort_', input$sort_col_tab, '_') %>% 
  stringr::str_c( index_str) 

  save_plot(plot = p
        , path = input$path
        , name = name
        , yes = input$save
        , excel = F
        )

  return(p)

  })


chk_bx_grp_ui = function(x,y, name, label) { renderUI({


  d_clean       = rea_ana()
  data          = d_clean$data
  boxcox        = d_clean$boxcox
  categoricals  = d_clean$categoricals
  all_variables = d_clean$all_variables
  numericals    = d_clean$numericals

  imp        = rea_imp()
  p_val_corr = rea_corr()$p_val_corr
  pca        = rea_pca()$pca

  if(input[[x]] == 'all')               preselect = names(data)
  if(input[[x]] == 'none')              preselect = NULL
  if(input[[x]] == 'top_10_importance') preselect = imp$variable[1:10]
  if(input[[x]] == 'boxcox')            preselect = boxcox
  if(input[[x]] == 'none_boxcox')       preselect = names(data)[!names(data) %in% boxcox]
  if(input[[x]] == 'numericals')        preselect = numericals
  if(input[[x]] == 'categoricals')      preselect = categoricals

  if(input[[x]] == 'group_stat_P<0.001') {

    sig = rea_group_stats()$sig

    #remove boxcox that are also in numericals
    sig_boxcox = sig$sig_boxcox %>%
      stringr::str_extract('^[A-Za-z]*') %>%
      .[!. %in% sig$sig_num] %>%
      stringr::str_c('_boxcox')

    preselect = stringr::str_c( c(sig$sig_num, sig_boxcox, sig$sig_chi ) )
  }

  if(input[[x]] == 'correlation') {

    correlating = p_val_corr %>%
      as_tibble() %>%
      mutate( vars = names(.) ) %>%
      select( filter_var = one_of(input[[y]]), vars) %>%
      filter( filter_var <= 0.05  ) %>%
      select( vars )


    preselect = correlating$vars
  }

  if( startsWith( input[[x]], 'pc1+pc2')) {

    if(endsWith( input[[x]], '>2.5%')){

      pca_1_2 = pca$contrib_abs_perc_reduced %>%
        filter(var %in% c('PC1', 'PC2') )

      preselect = pca_1_2$key

    }else {

      pca_1_2 = pca$contrib_abs_perc %>%
        filter(var %in% c('PC1', 'PC2') )

      preselect = pca_1_2$key

    }

  }


  if(input[[x]] == '2fac_2num')  {
    preselect = c(numericals[1], categoricals[1]
                  ,numericals[2], categoricals[2]
                  ,numericals[3], categoricals[3]
                  ,numericals[4], categoricals[4]
                  )[1:4]
  }


  checkboxGroupInput( name
                    , label = label
                    , choices = names(data)
                    , selected = preselect
                    , inline = F )
 })
}

chk_bx_grp_ui('preselect', 'preselect_correlation', name = 'select_vars_tab', label = 'Select Variables')

ggduo

renderUI({

  d_clean       = rea_ana()
  data          = d_clean$data
  boxcox        = d_clean$boxcox
  categoricals  = d_clean$categoricals
  all_variables = d_clean$all_variables
  numericals    = d_clean$numericals

  inputPanel(

    selectInput("preselect_duo_col"
                , label = "Preselect Column"
              ,choices = preselect 
              ,selected = '2fac_2num')

    , selectInput("preselect_duo_row"
                , label = "Preselect Row"
              ,choices = preselect 
              ,selected = '2fac_2num')


  )
})

renderUI({

  d_clean       = rea_ana()
  data          = d_clean$data
  boxcox        = d_clean$boxcox
  categoricals  = d_clean$categoricals
  all_variables = d_clean$all_variables
  numericals    = d_clean$numericals

  inputPanel(

   selectInput("preselect_corr_duo_col"
                , label = "Select Correlation Column"
                ,choices = boxcox 
                ,selected = boxcox[1])

  , selectInput("preselect_corr_duo_row"
                , label = "Select Correlation Row"
                ,choices = boxcox 
                ,selected = boxcox[1])

  )

})

rea_duo = eventReactive(eventExpr     =input$but_duo
                        , {
                          return( list(col    = input$select_vars_duo_col
                                       ,row   = input$select_vars_duo_row
                                       ,group = input$group_duo
                                       ,corr  = input$corr_duo) )
                        } )


renderPlot({

  d_clean       = rea_ana()
  data          = d_clean$data
  boxcox        = d_clean$boxcox
  categoricals  = d_clean$categoricals
  all_variables = d_clean$all_variables
  numericals    = d_clean$numericals

  ret   = rea_duo()
  col   = ret$col
  row   = ret$row
  group = ret$group
  corr  = ret$corr

  if(group == 'None') group = NULL

  lm_with_cor <- function(data, mapping, ..., method = "pearson") {
    x <- data[[deparse(mapping$x)]]
    y <- data[[deparse(mapping$y)]]
    cor <- cor(x, y, method = method)
    GGally::ggally_smooth_lm(data, mapping, ...) +
      ggplot2::geom_label(
        data = data.frame(
          x = min(x, na.rm = TRUE),
          y = max(y, na.rm = TRUE),
          lab = round(cor, digits = 3)
        ),
        mapping = ggplot2::aes(x = x, y = y, label = lab, color = NULL),
        hjust = 0, vjust = 1,
        size = 5, fontface = "bold"
      )
  }


  if(corr == T)  {  
    p =GGally::ggduo(
      data, col, row,
      mapping = aes_string(color = group),
      types = list(continuous = GGally::wrap(lm_with_cor, alpha = 0.25)),
      showStrips = FALSE
    ) +
      theme(legend.position = "bottom")
  }

  else{
    p = GGally::ggduo(
      data, col, row,
      mapping = aes_string(color = group),
      #types = list(continuous = wrap(lm_with_cor, alpha = 0.25)),
      showStrips = FALSE
    ) +
      theme(legend.position = "bottom")
  }

  index_str_row = stringr::str_c( which(all_variables %in% row)
                              , collapse = '+'  )

  index_str_col = stringr::str_c( which(all_variables %in% col)
                              , collapse = '+'  )

  name = stringr::str_c( input$sql_or_lib,'_ggduo')%>%
  stringr::str_c('_sort', input$sort_col_tab ) %>% 
  stringr::str_c( '_col', index_str_col) %>%
  stringr::str_c( '_row', index_str_row) 

  save_plot(plot = p
        , path = input$path
        , name = name
        , yes = input$save
        , excel = F
        )

  return(p)
})

renderUI({

  d_clean       = rea_ana()
  data          = d_clean$data
  boxcox        = d_clean$boxcox
  categoricals  = d_clean$categoricals
  all_variables = d_clean$all_variables
  numericals    = d_clean$numericals


  inputPanel(

    selectInput("group_duo"
                , label = "select grouping variable"
                , choices = c(categoricals, 'None')
                , selected = categoricals[length(categoricals)]
                )

    , checkboxInput('corr_duo'
                    , label = 'show corr coef'
                    , value = T)  


    , actionButton('but_duo', 'Render Plot')

    #, cellArgs = list(width = '50%')
  )



})

# chk_bx_grp_ui has its own gui rendering function

splitLayout(


   chk_bx_grp_ui('preselect_duo_col', 'preselect_corr_duo_col'
                   , name = 'select_vars_duo_col'
                   , label = 'Select Variables Column')


   , chk_bx_grp_ui('preselect_duo_row', 'preselect_corr_duo_row'
                   , name = 'select_vars_duo_row'
                   , label = 'Select Variables Row')


 )

Histograms

Histogram A

input_panel_histo = function(suffix) {

  renderUI({

    d_clean       = rea_ana()
    data          = d_clean$data
    boxcox        = d_clean$boxcox
    categoricals  = d_clean$categoricals
    all_variables = d_clean$all_variables
    numericals    = d_clean$numericals



    inputPanel(

      selectInput(str_c("n_breaks", suffix)
                  , label = "Number of bins:"
                  , choices = c(10, 20, 30, 50, 75, 100, 150, 200, 300, 500, 750, 1000)
                  , selected = 30
                  )

      , selectInput(str_c("variable", suffix)
                    , label = "Select variable"
                    , choices = names(data) 
                    , selected = numericals[1]
                    )

      , selectInput(str_c( "group", suffix)
                    , label = "Select grouping variable"
                    , choices = c(categoricals , 'None')
                    , selected = categoricals[1]
                    )

      , selectInput(str_c("graph_type", suffix)
                    , label = "Graph Type"
                    , choices = c('bar', 'line', 'violin') 
                    , selected = 'line')

      , selectInput(str_c("y_axis", suffix)
                    , label = "Y-Axis"
                    , choices = c('density', 'counts') 
                    , selected = 'density')

      , checkboxInput(str_c( 'auto_range', suffix )
                      , label = 'use automatic range'
                      , value = T)  

      , textInput(str_c('x_min', suffix)
                  , label = 'X-min'
                  , value = '0')

      , textInput(str_c('x_max', suffix)
                  , label = 'X-max'
                  , value = '1000')

    )

  })
}

renderPlot_histo = function( suffix ){


  renderPlot({

    d_clean       = rea_ana()
    data          = d_clean$data
    boxcox        = d_clean$boxcox
    categoricals  = d_clean$categoricals
    all_variables = d_clean$all_variables
    numericals    = d_clean$numericals


    # get input

    n_breaks   = input[[ str_c("n_breaks"   , suffix) ]]
    variable   = input[[ str_c("variable"   , suffix) ]]
    group      = input[[ str_c( "group"     , suffix) ]]
    graph_type = input[[ str_c("graph_type" , suffix) ]]
    y_axis     = input[[ str_c("y_axis"     , suffix) ]]
    auto_range = input[[ str_c( 'auto_range', suffix) ]]
    x_min      = input[[ str_c('x_min'      , suffix) ]]
    x_max      = input[[ str_c('x_max'      , suffix) ]]


    #y-axis
    if(y_axis == 'density') {
      y_axis = '..density..'
    }
    else{
      y_axis = '..count..'
    }

    #group

    if(group == 'None') group = NULL 

    # numericals ----------------------------------------------------------------------------

    numericals = c(numericals, boxcox)

    #geom_freqpoly 
    if(variable %in% numericals & graph_type == 'line'){

      p = data %>%
      ggplot() +
        geom_freqpoly( aes_string(x = variable, y = y_axis, color = group)
                       , bins =  as.numeric(n_breaks))
    }


     #geom_histo 
    if(variable %in% numericals & graph_type == 'bar' ){


      p = data %>%
      ggplot() +
        geom_histogram( aes_string(x = variable, y = y_axis, fill = group)
                       , bins =  as.numeric(n_breaks), alpha = 0.6
                       , position = 'identity')
    }

    #geom_violin 
    if(variable %in% numericals & graph_type == 'violin'){

      medians = data %>%
        group_by_(as.symbol(group)) %>%
        select( one_of(numericals ) ) %>%
        summarise_all(  median )


      p = data %>%
      ggplot() +
        geom_violin( aes_string(x = group
                                , y = variable
                                , fill = group)
                      ) +
        geom_crossbar( data = medians,
                       mapping = aes_string(x = group
                                  , y = variable
                                  , ymin = variable
                                  , ymax = variable) )
    }


    # add x range

    if(variable %in% numericals & auto_range == F & !graph_type == 'violin'){

      p = p +
        xlim( c( as.numeric(x_min), as.numeric(x_max)) )
    }

    # add y range

    if(variable %in% numericals & auto_range == F & graph_type == 'violin'){

      p = p +
        ylim( c( as.numeric(x_min), as.numeric(x_max)) )
    }


    # categoricals ----------------------------------------------------------------------------

    #geom_bar
    if(variable %in% categoricals ){

      if(y_axis == '..density..')   y_axis = '..prop..'

      p = data %>%
      ggplot() +
        geom_bar( aes_string(x = variable, y = y_axis, fill = group, group = group)
                       , position = 'dodge')
      }

  y_axis_str = stringr::str_extract_all(y_axis
                                        , '[A-Za-z]') %>%
    unlist()%>%
    stringr::str_c(collapse = '')

  name = stringr::str_c( input$sql_or_lib, '_')%>%
  stringr::str_c(suffix ) %>% 
  stringr::str_c('_', graph_type ) %>%
  stringr::str_c('_', y_axis_str)  %>%
  stringr::str_c( '_group', group) %>%
  stringr::str_c( '_var', variable) %>%
  stringr::str_c( '_', n_breaks)

  save_plot(plot = p
        , path = input$path
        , name = name
        , yes = input$save
        )

    return(p)

    })
}
input_panel_histo('histo_a')

renderPlot_histo('histo_a')

Histogram B

input_panel_histo('histo_b')

renderPlot_histo('histo_b')

Histogram C

input_panel_histo('histo_c')

renderPlot_histo('histo_c')

Histogram D

input_panel_histo('histo_d')

renderPlot_histo('histo_d')


erblast/oetteR documentation built on May 27, 2019, 12:11 p.m.