#'@title analyse group difference of dataset
#'@description returns a full analysis as a taglist inluding all features with
#' p_values, medians, means, percentages and counts, as well as plots passing
#' the treshold values
#'@param data_ls data_ls object generated by f_clean_data(), or a named list
#' list( data = <dataframe>, numericals = < vector with column names of
#' numerical columns>)
#'@param col_group character vector denoting grouping columns
#'@param tresh_p_val p value threshold for plots, Default: 0.05
#'@param thresh_diff_perc minimum percent difference threshold for plots,
#' Default: 3
#'@return taglist
#' @examples
#' \dontrun{
#' data_ls = f_clean_data(mtcars)
#' taglist = f_stat_group_ana_taglist(data_ls, 'cyl')
#' f_plot_obj_2_html(taglist, type = "taglist", output_file = 'test_me', title = 'Plots')
#' file.remove('test_me.html')
#' }
#'@seealso \code{\link[plotly]{ggplotly}}
#' \code{\link[htmltools]{tagList}},\code{\link[htmltools]{h1}},\code{\link[htmltools]{h2}}
#'
#'
#'@rdname f_stat_group_ana_taglist
#'@export
#'@importFrom plotly ggplotly
#'@importFrom htmltools tagList h1 h2 h3 h4 h5 h6
f_stat_group_ana_taglist = function(data_ls, col_group, tresh_p_val = 0.05, thresh_diff_perc = 3 ){
print('f_stat_group_ana_taglist is deprecated use f_stat_group_ana isntead')
df_anova = f_stat_anova( data_ls, col_group )
df_chi = f_stat_chi_square( data_ls, col_group )
df_comb = f_stat_combine_anova_with_chi_square( df_anova, df_chi ) %>%
mutate( stars = f_stat_stars(p_value) ) %>%
select( variable, stars, p_value, diff_perc )
df_means = f_stat_group_mean_medians(data_ls, col_group)
df_perc = f_stat_group_counts_percentages(data_ls, col_group)
f_plot = function( var, title, col_group, data_ls ){
caption = '* P:0.05, ** P:0,005, *** P:0.001'
if(var %in% data_ls$numericals){
p = f_plot_hist( var, data_ls, col_group, graph_type = 'violin') %>%
plotly::ggplotly( tooltip = c('y','fill') )
taglist = f_html_padding(p, 4, title, caption = caption )
}else{
p = f_plot_hist( var, data_ls, col_group, graph_type = 'bar' , y_axis = 'count' ) %>%
plotly::ggplotly( tooltip = c('y','fill') )
l1 = f_html_padding(p, 4, title, subtitle = 'Counts' )
p = f_plot_hist( var, data_ls, col_group, graph_type = 'bar' , y_axis = 'density' ) %>%
plotly::ggplotly( tooltip = c('y','fill') )
l2 = f_html_padding(p, subtitle = 'Probabilities', caption = caption )
taglist = htmltools::tagList( l1, l2)
}
return( taglist )
}
plots = df_comb %>%
filter( p_value <= tresh_p_val & diff_perc >= thresh_diff_perc) %>%
mutate( stars = f_stat_stars( p_value )
,title = paste( variable, stars)
, plot = map2( variable, title, f_plot, col_group, data_ls )
) %>%
.$plot
tab_all = f_datatable_universal( df_comb, round_other_nums = 2 ) %>%
f_html_padding( 3, title ='All features'
, subtitle = paste('grouped by:', col_group), pad_after = 3 )
tab_mean = f_datatable_universal( df_means, round_other_nums = 2 ) %>%
f_html_padding( 3, title ='Means and Medians of numerical features'
, subtitle = paste('grouped by:', col_group))
tab_perc = f_datatable_universal( df_perc, round_other_nums = 2 ) %>%
f_html_padding( title ='Counts and percentages of categorical features'
, subtitle = paste('grouped by:', col_group), pad_after = 2 )
taglist = htmltools::tagList()
taglist[[1]] = htmltools::h1( paste('Differences Between "', col_group, '" Groups') )
taglist[[2]] = tab_all
taglist[[3]] = htmltools::h2( paste( 'Plots for features with significant differences of minimum'
, thresh_diff_perc, '% sorted by P Value' ) )
taglist[[4]] = plots
taglist[[5]] = f_html_breaks(5)
taglist[[6]] = htmltools::h2( 'Summary Tables' )
taglist[[7]] = tab_mean
taglist[[8]] = tab_perc
return( taglist )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.