oetteR: Collection of personal R functions

#'@title analyse group difference of dataset
#'@description returns a full analysis as a taglist inluding all features with
#'  p_values, medians, means, percentages and counts, as well as plots passing
#'  the treshold values
#'@param data_ls data_ls object generated by f_clean_data(), or a named list
#'  list( data = <dataframe>, numericals = < vector with column names of
#'  numerical columns>)
#'@param col_group character vector denoting grouping columns
#'@param tresh_p_val p value threshold for plots, Default: 0.05
#'@param thresh_diff_perc minimum percent difference threshold for plots,
#'  Default: 3
#'@return taglist
#' @examples
#' \dontrun{
#'  data_ls = f_clean_data(mtcars)
#'  taglist = f_stat_group_ana_taglist(data_ls, 'cyl')
#'  f_plot_obj_2_html(taglist, type = "taglist", output_file = 'test_me', title = 'Plots')
#'  file.remove('test_me.html')
#' }
#'@seealso \code{\link[plotly]{ggplotly}}
#'  \code{\link[htmltools]{tagList}},\code{\link[htmltools]{h1}},\code{\link[htmltools]{h2}}
#'
#'
#'@rdname f_stat_group_ana_taglist
#'@export
#'@importFrom plotly ggplotly
#'@importFrom htmltools tagList h1 h2 h3 h4 h5 h6

f_stat_group_ana_taglist = function(data_ls, col_group, tresh_p_val = 0.05, thresh_diff_perc = 3 ){

  print('f_stat_group_ana_taglist is deprecated use f_stat_group_ana isntead')

  df_anova = f_stat_anova( data_ls, col_group )
  df_chi   = f_stat_chi_square( data_ls, col_group )
  df_comb  = f_stat_combine_anova_with_chi_square( df_anova, df_chi ) %>%
    mutate( stars = f_stat_stars(p_value) ) %>%
    select( variable, stars, p_value, diff_perc )

  df_means = f_stat_group_mean_medians(data_ls, col_group)
  df_perc  = f_stat_group_counts_percentages(data_ls, col_group)



  f_plot = function( var, title, col_group, data_ls ){


    caption = '* P:0.05, ** P:0,005, *** P:0.001'


    if(var %in% data_ls$numericals){


      p = f_plot_hist( var, data_ls, col_group, graph_type = 'violin') %>%
        plotly::ggplotly( tooltip = c('y','fill') )

      taglist = f_html_padding(p, 4, title, caption = caption )

    }else{

      p = f_plot_hist( var, data_ls, col_group, graph_type = 'bar' , y_axis = 'count' ) %>%
        plotly::ggplotly( tooltip = c('y','fill') )

      l1 = f_html_padding(p, 4, title, subtitle = 'Counts' )

      p = f_plot_hist( var, data_ls, col_group, graph_type = 'bar' , y_axis = 'density' ) %>%
        plotly::ggplotly( tooltip = c('y','fill') )

      l2 = f_html_padding(p, subtitle = 'Probabilities', caption = caption )

      taglist = htmltools::tagList( l1, l2)


    }

    return( taglist )

  }

  plots = df_comb %>%
    filter( p_value <= tresh_p_val & diff_perc >= thresh_diff_perc) %>%
    mutate( stars = f_stat_stars( p_value )
            ,title = paste( variable, stars)
            , plot = map2( variable, title, f_plot, col_group, data_ls )
    ) %>%
    .$plot

  tab_all = f_datatable_universal( df_comb, round_other_nums = 2 ) %>%
    f_html_padding( 3, title ='All features'
                    , subtitle = paste('grouped by:', col_group), pad_after = 3 )

  tab_mean = f_datatable_universal( df_means, round_other_nums = 2 ) %>%
    f_html_padding( 3, title ='Means and Medians of numerical features'
                    , subtitle = paste('grouped by:', col_group))

  tab_perc = f_datatable_universal( df_perc, round_other_nums = 2 ) %>%
    f_html_padding(  title ='Counts and percentages of categorical features'
                     , subtitle = paste('grouped by:', col_group), pad_after = 2 )


  taglist = htmltools::tagList()
  taglist[[1]] = htmltools::h1( paste('Differences Between "', col_group, '" Groups') )
  taglist[[2]] = tab_all
  taglist[[3]] = htmltools::h2( paste( 'Plots for features with significant differences of minimum'
                                       , thresh_diff_perc, '% sorted by P Value' ) )
  taglist[[4]] = plots
  taglist[[5]] = f_html_breaks(5)
  taglist[[6]] = htmltools::h2( 'Summary Tables' )
  taglist[[7]] = tab_mean
  taglist[[8]] = tab_perc

  return( taglist )

}