coin: Interfaces for coin package for data science pipelines.

Description Usage Arguments Details Value Author(s) Examples

Description

Interfaces to coin functions that can be used in a pipeline implemented by magrittr.

Usage

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

Arguments

data

data frame, tibble, list, ...

...

Other arguments passed to the corresponding interfaced function.

Details

Interfaces call their corresponding interfaced function.

Value

Object returned by interfaced function.

Author(s)

Roberto Bertolusso

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
## Not run: 
library(intubate)
library(magrittr)
library(coin)

## Tests of Independence in Two- or Three-Way Contingency Tables
## Please contribute better example
## Original function to interface
chisq_test(Plant ~ Type, data = CO2)
cmh_test(Plant ~ Type, data = CO2)
lbl_test(Plant ~ Type, data = CO2)

## The interface puts data as first parameter
ntbt_chisq_test(CO2, Plant ~ Type)
ntbt_cmh_test(CO2, Plant ~ Type)
ntbt_lbl_test(CO2, Plant ~ Type)

## so it can be used easily in a pipeline.
CO2 %>%
  ntbt_chisq_test(Plant ~ Type)
CO2 %>%
  ntbt_cmh_test(Plant ~ Type)
CO2 %>%
  ntbt_lbl_test(Plant ~ Type)


## Correlation Tests
## Original function to interface
## Asymptotic Spearman test
spearman_test(CONT ~ INTG, data = USJudgeRatings)
## Asymptotic Fisher-Yates test
fisyat_test(CONT ~ INTG, data = USJudgeRatings)
## Asymptotic quadrant test
quadrant_test(CONT ~ INTG, data = USJudgeRatings)
## Asymptotic Koziol-Nemec test
koziol_test(CONT ~ INTG, data = USJudgeRatings)

## The interface puts data as first parameter
## Asymptotic Spearman test
ntbt_spearman_test(USJudgeRatings, CONT ~ INTG)
## Asymptotic Fisher-Yates test
ntbt_fisyat_test(USJudgeRatings, CONT ~ INTG)
## Asymptotic quadrant test
ntbt_quadrant_test(USJudgeRatings, CONT ~ INTG)
## Asymptotic Koziol-Nemec test
ntbt_koziol_test(USJudgeRatings, CONT ~ INTG)

## so it can be used easily in a pipeline.
## Asymptotic Spearman test
USJudgeRatings %>%
  ntbt_spearman_test(CONT ~ INTG)
## Asymptotic Fisher-Yates test
USJudgeRatings %>%
  ntbt_fisyat_test(CONT ~ INTG)
## Asymptotic quadrant test
USJudgeRatings %>%
  ntbt_quadrant_test(CONT ~ INTG)
## Asymptotic Koziol-Nemec test
USJudgeRatings %>%
  ntbt_koziol_test(CONT ~ INTG)

## ntbt_independence_test: General Independence Test
## Original function to interface
independence_test(asat ~ group, data = asat, distribution = "exact",
                  alternative = "greater", 
                  ytrafo = function(data)
                    trafo(data, numeric_trafo = normal_trafo),
                  xtrafo = function(data)
                      trafo(data, factor_trafo = function(x)
                          matrix(x == levels(x)[1], ncol = 1)))

## The interface puts data as first parameter
ntbt_independence_test(asat, asat ~ group, distribution = "exact",
                       alternative = "greater", 
                       ytrafo = function(data)
                         trafo(data, numeric_trafo = normal_trafo),
                       xtrafo = function(data)
                         trafo(data, factor_trafo = function(x)
                           matrix(x == levels(x)[1], ncol = 1)))

## so it can be used easily in a pipeline.
asat %>%
  ntbt_independence_test(asat ~ group, distribution = "exact",
                         alternative = "greater", 
                         ytrafo = function(data)
                           trafo(data, numeric_trafo = normal_trafo),
                         xtrafo = function(data)
                           trafo(data, factor_trafo = function(x)
                             matrix(x == levels(x)[1], ncol = 1)))


## Two- and K-Sample Location Tests
## Tritiated Water Diffusion Across Human Chorioamnion
## Hollander and Wolfe (1999, p. 110, Tab. 4.1)
diffusion <- data.frame(
    pd = c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46,
           1.15, 0.88, 0.90, 0.74, 1.21),
    age = factor(rep(c("At term", "12-26 Weeks"), c(10, 5)))
)
ex <- data.frame(
    y = c(3, 4, 8, 9, 1, 2, 5, 6, 7),
    x = factor(rep(c("no", "yes"), c(4, 5)))
)

## Original function to interface
kruskal_test(pd ~ age, data = diffusion, distribution = "exact")
median_test(y ~ x, data = ex, distribution = "exact")
normal_test(pd ~ age, data = diffusion, distribution = "exact", conf.int = TRUE)
oneway_test(pd ~ age, data = diffusion)
savage_test(pd ~ age, data = diffusion, distribution = "exact", conf.int = TRUE)
wilcox_test(pd ~ age, data = diffusion, distribution = "exact", conf.int = TRUE)

## The interface puts data as first parameter
ntbt_kruskal_test(diffusion, pd ~ age, distribution = "exact")
ntbt_median_test(ex, y ~ x, distribution = "exact")
ntbt_normal_test(diffusion, pd ~ age, distribution = "exact", conf.int = TRUE)
ntbt_oneway_test(diffusion, pd ~ age)
ntbt_savage_test(diffusion, pd ~ age, distribution = "exact", conf.int = TRUE)
ntbt_wilcox_test(diffusion, pd ~ age, distribution = "exact", conf.int = TRUE)

## so it can be used easily in a pipeline.
diffusion %>%
  ntbt_kruskal_test(pd ~ age, distribution = "exact")
ex %>%
  ntbt_median_test(y ~ x, distribution = "exact")
diffusion %>%
  ntbt_normal_test(pd ~ age, distribution = "exact", conf.int = TRUE)
diffusion %>%
  ntbt_oneway_test(pd ~ age)
diffusion %>%
  ntbt_savage_test(pd ~ age, distribution = "exact", conf.int = TRUE)
diffusion %>%
  ntbt_wilcox_test(pd ~ age, distribution = "exact", conf.int = TRUE)

performance <- matrix(
    c(794, 150,
       86, 570),
    nrow = 2, byrow = TRUE,
    dimnames = list(
         "First" = c("Approve", "Disprove"),
        "Second" = c("Approve", "Disprove")
    )
)

## ntbt_mh_test: Marginal Homogeneity Tests
## Effectiveness of different media for the growth of diphtheria
## Cochran (1950, Tab. 2)
cases <- c(4, 2, 3, 1, 59)
n <- sum(cases)
cochran <- data.frame(
    diphtheria = factor(
        unlist(rep(list(c(1, 1, 1, 1),
                        c(1, 1, 0, 1),
                        c(0, 1, 1, 1),
                        c(0, 1, 0, 1),
                        c(0, 0, 0, 0)),
                   cases))
    ),
    media = factor(rep(LETTERS[1:4], n)),
    case =  factor(rep(seq_len(n), each = 4))
)

## Original function to interface
mh_test(diphtheria ~ media | case, data = cochran)

## The interface puts data as first parameter
ntbt_mh_test(cochran, diphtheria ~ media | case)

## so it can be used easily in a pipeline.
cochran %>%
  ntbt_mh_test(diphtheria ~ media | case)

## ntbt_maxstat_test: Generalized Maximally Selected Statistics
## Original function to interface
maxstat_test(counts ~ coverstorey, data = treepipit)

## The interface puts data as first parameter
ntbt_maxstat_test(treepipit, counts ~ coverstorey)

## so it can be used easily in a pipeline.
treepipit %>%
  ntbt_maxstat_test(counts ~ coverstorey)


## Two- and K-Sample Scale Tests
## Serum Iron Determination Using Hyland Control Sera
## Hollander and Wolfe (1999, p. 147, Tab 5.1)
sid <- data.frame(
    serum = c(111, 107, 100, 99, 102, 106, 109, 108, 104, 99,
              101, 96, 97, 102, 107, 113, 116, 113, 110, 98,
              107, 108, 106, 98, 105, 103, 110, 105, 104,
              100, 96, 108, 103, 104, 114, 114, 113, 108, 106, 99),
    method = gl(2, 20, labels = c("Ramsay", "Jung-Parekh"))
)

## Original function to interface
ansari_test(serum ~ method, data = sid)
conover_test(serum ~ method, data = sid)
fligner_test(serum ~ method, data = sid)
klotz_test(serum ~ method, data = sid)
mood_test(serum ~ method, data = sid)
taha_test(serum ~ method, data = sid)

## The interface puts data as first parameter
ntbt_ansari_test(sid, serum ~ method)
ntbt_conover_test(sid, serum ~ method)
ntbt_fligner_test(sid, serum ~ method)
ntbt_klotz_test(sid, serum ~ method)
ntbt_mood_test(sid, serum ~ method)
ntbt_taha_test(sid, serum ~ method)

## so it can be used easily in a pipeline.
sid %>%
  ntbt_ansari_test(serum ~ method)
sid %>%
  ntbt_conover_test(serum ~ method)
sid %>%
  ntbt_fligner_test(serum ~ method)
sid %>%
  ntbt_klotz_test(serum ~ method)
sid %>%
  ntbt_mood_test(serum ~ method)
sid %>%
  ntbt_taha_test(serum ~ method)

## ntbt_logrank_test: Two- and K-Sample Tests for Censored Data
## Example data (Callaert, 2003, Tab.1)
callaert <- data.frame(
    time = c(1, 1, 5, 6, 6, 6, 6, 2, 2, 2, 3, 4, 4, 5, 5),
    group = factor(rep(0:1, c(7, 8)))
)
## Original function to interface
logrank_test(Surv(time) ~ group, data = callaert, distribution = "exact")

## The interface puts data as first parameter
ntbt_logrank_test(callaert, Surv(time) ~ group, distribution = "exact")

## so it can be used easily in a pipeline.
callaert %>%
  ntbt_logrank_test(Surv(time) ~ group, distribution = "exact")


## ntbt_symmetry_test: General Symmetry Test
## One-sided exact Fisher-Pitman test for paired observations
y1 <- c(1.83,  0.50,  1.62,  2.48, 1.68, 1.88, 1.55, 3.06, 1.30)
y2 <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)
dta <- data.frame(
    y = c(y1, y2),
    x = gl(2, length(y1)),
    block = factor(rep(seq_along(y1), 2))
)

## Original function to interface
symmetry_test(y ~ x | block, data = dta, distribution = "exact", alternative = "greater")

## The interface puts data as first parameter
ntbt_symmetry_test(dta, y ~ x | block, distribution = "exact", alternative = "greater")

## so it can be used easily in a pipeline.
dta %>%
  ntbt_symmetry_test(y ~ x | block, distribution = "exact", alternative = "greater")


## Symmetry Tests
## Data with explicit group and block information
dta <- data.frame(y = c(y1, y2), x = gl(2, length(y1)),
                  block = factor(rep(seq_along(y1), 2)))

## Original function to interface
## For two samples, the sign test is equivalent to the Friedman test...
sign_test(y ~ x | block, data = dta, distribution = "exact")
friedman_test(y ~ x | block, data = dta, distribution = "exact")
## ...and the signed-rank test is equivalent to the Quade test
wilcoxsign_test(y ~ x | block, data = dta, distribution = "exact")
quade_test(y ~ x | block, data = dta, distribution = "exact")

## The interface puts data as first parameter
ntbt_sign_test(dta, y ~ x | block, distribution = "exact")
ntbt_friedman_test(dta, y ~ x | block, distribution = "exact")
ntbt_wilcoxsign_test(dta, y ~ x | block, distribution = "exact")
ntbt_quade_test(dta, y ~ x | block, distribution = "exact")

## so it can be used easily in a pipeline.
dta %>%
  ntbt_sign_test(y ~ x | block, distribution = "exact")
dta %>%
  ntbt_friedman_test(y ~ x | block, distribution = "exact")
dta %>%
  ntbt_wilcoxsign_test(y ~ x | block, distribution = "exact")
dta %>%
  ntbt_quade_test(y ~ x | block, distribution = "exact")

## End(Not run)

rbertolusso/intubate documentation built on May 27, 2019, 3 a.m.