dapply: apply a function on data.frame

Description Usage Arguments Value Author(s) Examples

Description

split a data.frame, apply a function, and get data.frame with results

Usage

1
dapply ( data , split.vars = NULL , fun = mean , wide = TRUE , drop = TRUE , all.level = FALSE , push.data.frame = FALSE , verbose = FALSE , ... )

Arguments

data

a data frame

split.vars

colnames of variables to split data.frame

fun

a function

wide

logical, if TRUE (default) a data.frame in wide format is returned, if FALSE a data.frame in long format is returned

drop

logical, if TRUE (default) empty combinations will be dropped, if FALSE all combinations are kept and NAs are generated

all.level

logical, if TRUE all levels of factors are incorporated, if FALSE (default) only factor levels existing in the data are used

push.data.frame

logical, if TRUE the splitted data.frame with all response variables is pushed to the function, if FALSE (default) the splitted data.frame is processed column-wise

verbose

logical, if TRUE progress is printed, if FALSE (default) no console output is generated

...

arguments to be passed to fun

Value

A data frame with results. Depending on wide a data.frame in either wide or long format.

Author(s)

Martin Hecht

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
### example data

x1 <- data.frame ( "sex" = c ( "m" , "f" ) , stringsAsFactors = FALSE ) 
x2 <- data.frame ( "age" = c ( 20 , 30 , 40 ) ) 
d <- merge ( x1 , x2 )
set.seed ( 123 )
d$resp1 <- rnorm ( nrow ( d ) , 0 , 1 )
d$resp2 <- rnorm ( nrow ( d ) , 0 , 1 )


### return results in wide format

# 1 variable
dapply ( data = d[,"resp1",drop= FALSE] , fun = mean )

# 2 variables
dapply ( data = d[,c("resp1","resp2"),drop= FALSE] , fun = mean )

# 2 variables, 2 functions
twofun1 <- c ( mean , median )
names ( twofun1 ) <- c ( "mean" , "median" )
dapply ( data = d[,c("resp1","resp2")] , fun = twofun1 )

# 1 variable, 1 split
dapply ( data = d[,c("sex","resp1")] , split.vars = "sex" , fun = mean )

# 1 variable, 2 splits
dapply ( data = d[,c("sex","age","resp1")] , split.vars = c("sex","age") , fun = mean )

# 2 variable, 2 splits
dapply ( data = d[,c("sex","age","resp1","resp2")] , split.vars = c("sex","age") , fun = mean )

# 2 variable, 2 splits, 2 functions
dapply ( data = d[,c("sex","age","resp1","resp2")] , split.vars = c("sex","age") , fun = twofun1 )


### return results in long format

# 1 variable
dapply ( data = d[,"resp1",drop= FALSE] , fun = mean , wide = FALSE )

# 2 variables
dapply ( data = d[,c("resp1","resp2")] , fun = mean , wide = FALSE )

# 2 variables, 2 functions
dapply ( data = d[,c("resp1","resp2")] , fun = twofun1 , wide = FALSE )

# 1 variable, 1 split
dapply ( data = d[,c("sex","resp1")] , split.vars = "sex" , fun = mean , wide = FALSE )

# 1 variable, 2 splits
dapply ( data = d[,c("sex","age","resp1")] , split.vars = c("sex","age") , fun = mean , wide = FALSE )

# 2 variable, 2 splits
dapply ( data = d[,c("sex","age","resp1","resp2")] , split.vars = c("sex","age") , fun = mean , wide = FALSE )

# 2 variable, 2 splits, 2 functions
dapply ( data = d[,c("sex","age","resp1","resp2")] , split.vars = c("sex","age") , fun = twofun1 , wide = FALSE )


### function has more than one return when applying data.frame()
# e.g., table has names of category and frequency of category
# .1 is category , .2 is frequency

# 1 variable, 
dapply ( data = d[,"resp1",drop= FALSE] , fun = table , wide = TRUE )
dapply ( data = d[,"resp1",drop= FALSE] , fun = table , wide = FALSE )

# 2 variables
dapply ( data = d[,c("resp1","resp2")] , fun = table , wide = TRUE )
dapply ( data = d[,c("resp1","resp2")] , fun = table , wide = FALSE )

# 2 variables, 2 functions
twofun2 <- c ( table , table )
names ( twofun2 ) <- c ( "table" , "table" )
dapply ( data = d[,c("resp1","resp2")] , fun = twofun2 , wide = TRUE )
dapply ( data = d[,c("resp1","resp2")] , fun = twofun2 , wide = FALSE )

# 1 variable, 1 split
dapply ( data = d[,c("sex","resp1")] , split.vars = "sex" , fun = table , wide = TRUE )
dapply ( data = d[,c("sex","resp1")] , split.vars = "sex" , fun = table , wide = FALSE )

# 1 variable, 2 splits
dapply ( data = d[,c("sex","age","resp1")] , split.vars = c("sex","age") , fun = table , wide = TRUE )
dapply ( data = d[,c("sex","age","resp1")] , split.vars = c("sex","age") , fun = table , wide = FALSE )

# 2 variable, 2 splits
dapply ( data = d[,c("sex","age","resp1","resp2")] , split.vars = c("sex","age") , fun = table , wide = TRUE )
dapply ( data = d[,c("sex","age","resp1","resp2")] , split.vars = c("sex","age") , fun = table , wide = FALSE )

# 2 variable, 2 splits, 2 functions
dapply ( data = d[,c("sex","age","resp1","resp2")] , split.vars = c("sex","age") , fun = twofun2 , wide = TRUE )
dapply ( data = d[,c("sex","age","resp1","resp2")] , split.vars = c("sex","age") , fun = twofun2 , wide = FALSE )


### push.data.frame = TRUE
# instead of column-wise processing, now entire data.frame is pushed to function
# for instance correlation of resp1 and resp2 splitted by sex is computed
calc.cor <- function ( x ) {cor ( x[,1] , x[,2] )}
dapply ( data = d[,c("sex","resp1","resp2")] , split.vars = c("sex") , fun = calc.cor , push.data.frame = TRUE )

eatTools documentation built on May 2, 2019, 4:44 p.m.

Related to dapply in eatTools...