f_plot_alluvial_1v1: plot alluvial of gathered data

Description Usage Arguments Value See Also Examples

Description

Plots two variables of a dataframe on an alluvial plot. A third variable can be added either two the left or the right of the alluvial plot to provide coloring of the flows. All numerical variables are scaled, centered and YeoJohnson transformed before binning.

Usage

1
2
3
4
5
6
7
8
f_plot_alluvial_1v1(data, col_x, col_y, col_id, col_fill = NULL,
  fill_right = T, bins = 5, bin_labels = c("LL", "ML", "M", "MH",
  "HH"), NA_label = "NA", order_levels_y = NULL,
  order_levels_x = NULL, order_levels_fill = NULL, complete = TRUE,
  fill_by = "first_variable",
  col_vector_flow = f_plot_col_vector74(faint = F, greys = F),
  col_vector_value = RColorBrewer::brewer.pal(9, "Greys")[c(3, 6, 4, 7,
  5)])

Arguments

data

a dataframe

col_x

character vector denoting column for the x axis variable

col_y

character vector denoting column for the y axis variable

col_id

character vector denoting id column

col_fill

character vector denoting color fill variable for flows, Default: NULL

fill_right

logical, TRUE fill variable is added to the right FALSE to the left, Default: T

bins

number of bins for automatic binning of numerical variables, Default: 5

bin_labels

labesl for bins, Default: c("LL", "ML", "M", "MH", "HH")

NA_label

character vector define label for missing data

order_levels_y

character vector denoting order of y levels from low to high, does not have to be complete can also just be used to bring levels to the front, Default: NULL

order_levels_x

character vector denoting order of x levels from low to high, does not have to be complete can also just be used to bring levels to the front, Default: NULL

order_levels_fill

character vector denoting order of color fill variable levels from low to high, does not have to be complete can also just be used to bring levels to the front, Default: NULL

complete

boolean, insert implicitly missing observations, Default: TRUE

fill_by

one_of(c('first_variable', 'last_variable', 'all_flows', 'values')), Default: 'first_variable'

col_vector_flow

HEX colors for flows, Default: f_plot_col_vector74(faint = F, greys = F)

col_vector_value

Hex colors for y levels/values, Default: RColorBrewer::brewer.pal(9, "Greys")[c(3, 6, 4, 7, 5)]

Value

plot

See Also

brewer.pal fct_relevel,fct_rev UQ geom_flow,geom_stratum

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
## Not run: 
if(interactive()){
# sample data
 monthly_flights = nycflights13::flights %>%
 group_by(month, tailnum, origin, dest, carrier) %>%
 summarise() %>%
 group_by( tailnum, origin, dest, carrier) %>%
 count() %>%
 filter( n == 12 ) %>%
 select( - n ) %>%
 left_join( nycflights13::flights ) %>%
 .[complete.cases(.), ] %>%
 ungroup() %>%
 mutate( tailnum = pmap_chr(list(tailnum, origin, dest, carrier), paste )
         , qu = cut(month, 4)) %>%
 group_by(tailnum, carrier, origin, dest, qu ) %>%
 summarise( mean_arr_delay = mean(arr_delay) ) %>%
 ungroup() %>%
 mutate( mean_arr_delay = ifelse( mean_arr_delay < 10, 'on_time', 'late' ) )

levels(monthly_flights$qu) = c('Q1', 'Q2', 'Q3', 'Q4')

data = monthly_flights

col_x = 'qu'
col_y = 'mean_arr_delay'
col_fill = 'carrier'
col_id = 'tailnum'

# flow coloring variants
f_plot_alluvial_1v1( data, col_x, col_y, col_id, col_fill )
f_plot_alluvial_1v1( data, col_x, col_y, col_id, fill_by = 'last_variable' )
f_plot_alluvial_1v1( data, col_x, col_y, col_id, fill_by = 'first_variable' )
f_plot_alluvial_1v1( data, col_x, col_y, col_id, fill_by = 'all_flows' )
f_plot_alluvial_1v1( data, col_x, col_y, col_id, fill_by = 'value' )

# use same color coding for flows and y levels
f_plot_alluvial_1v1( data, col_x, col_y, col_id, fill_by = 'last_variable'
                    , col_vector_flow = f_plot_col_vector74()
                    , col_vector_value = f_plot_col_vector74() )

# move fill variable to the left
f_plot_alluvial_1v1( data, col_x, col_y, col_id, col_fill, fill_right = F )

# reorder levels
f_plot_alluvial_1v1( data, col_x, col_y, col_id, fill_by = 'first_variable'
                    , order_levels_y = c('on_time', 'late') )

f_plot_alluvial_1v1( data, col_x, col_y, col_id, fill_by = 'first_variable'
                    , order_levels_x = c('Q4', 'Q3', 'Q2', 'Q1') )

order_by_carrier_size = data %>%
 group_by(carrier) %>%
 count() %>%
 arrange( desc(n) ) %>%
 .[['carrier']]

f_plot_alluvial_1v1( data, col_x, col_y, col_id, col_fill
                    , order_levels_fill = order_by_carrier_size )

}

## End(Not run)

erblast/oetteR documentation built on May 27, 2019, 12:11 p.m.