count_if: Count/sum/average/other functions on values that meet a...

Description Usage Arguments Details Value Examples

View source: R/count_if.R

Description

These functions calculate count/sum/average/etc. on values that meet a criterion that you specify. apply_if_* apply custom functions. There are different flavors of these functions: *_if work on entire dataset/matrix/vector, *_row_if works on each row and *_col_if works on each column.

Usage

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
count_if(criterion, ...)

count_row_if(criterion, ...)

count_col_if(criterion, ...)

x %row_in% criterion

x %has% criterion

x %col_in% criterion

sum_if(criterion, ..., data = NULL)

sum_row_if(criterion, ..., data = NULL)

sum_col_if(criterion, ..., data = NULL)

mean_if(criterion, ..., data = NULL)

mean_row_if(criterion, ..., data = NULL)

mean_col_if(criterion, ..., data = NULL)

sd_if(criterion, ..., data = NULL)

sd_row_if(criterion, ..., data = NULL)

sd_col_if(criterion, ..., data = NULL)

median_if(criterion, ..., data = NULL)

median_row_if(criterion, ..., data = NULL)

median_col_if(criterion, ..., data = NULL)

max_if(criterion, ..., data = NULL)

max_row_if(criterion, ..., data = NULL)

max_col_if(criterion, ..., data = NULL)

min_if(criterion, ..., data = NULL)

min_row_if(criterion, ..., data = NULL)

min_col_if(criterion, ..., data = NULL)

apply_row_if(fun, criterion, ..., data = NULL)

apply_col_if(fun, criterion, ..., data = NULL)

Arguments

criterion

Vector with counted values or function. See details and examples.

...

Data on which criterion will be applied. Vector, matrix, data.frame, list.

x

Data on which criterion will be applied. Vector, matrix, data.frame, list.

data

Data on which function will be applied. Doesn't applicable to count_*_if functions. If omitted then function will be applied on the ... argument.

fun

Custom function that will be applied based on criterion.

Details

Possible type for criterion argument:

count* and %in*% never returns NA's. Other functions remove NA's before calculations (as na.rm = TRUE in base R functions).

Function criterion should return logical vector of same size and shape as its argument. This function will be applied to each column of supplied data and TRUE results will be used. There is asymmetrical behavior in *_row_if and *_col_if for function criterion: in both cases function criterion will be applied columnwise.

Value

*_if return single value (vector of length 1). *_row_if returns vector for each row of supplied arguments. *_col_if returns vector for each column of supplied arguments. %row_in%/%col_in% return logical vector - indicator of presence of criterion in each row/column. %has% is an alias for %row_in%.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
set.seed(123)
sheet1 = as.sheet(
       matrix(sample(c(1:10,NA), 30, replace = TRUE), 10)
)

result  = compute(sheet1, {
             # count 8
             exact = count_row_if(8, V1, V2, V3)
             # count values greater than 8
             greater = count_row_if(gt(8), V1, V2, V3)
             # count integer values between 5 and 8, e. g. 5, 6, 7, 8
             integer_range = count_row_if(5:8, V1, V2, V3)
             # count values between 5 and 8 
             range = count_row_if(5 %thru% 8, V1, V2, V3)
             # count NA 
             na = count_row_if(is.na, V1, V2, V3)
             # count not-NA 
             not_na = count_row_if(not_na, V1, V2, V3) 
             # are there any 5 in each row?
             has_five = cbind(V1, V2, V3) %row_in% 5   
         })  
result
 
mean_row_if(6, sheet1$V1, data = sheet1)
median_row_if(gt(2), sheet1$V1, sheet1$V2, sheet1$V3) 
sd_row_if(5 %thru% 8, sheet1$V1, sheet1$V2, sheet1$V3)
 
if_na(sheet1) = 5 # replace NA 

# custom apply
apply_col_if(prod, gt(2), sheet1$V1, data = sheet1) # product of all elements by columns
apply_row_if(prod, gt(2), sheet1$V1, data = sheet1) # product of all elements by rows
 
# Examples borrowed from Microsoft Excel help for COUNTIF
sheet1 = text_to_columns(
    "
       a       b
    apples    32
    oranges   54
    peaches   75
    apples    86
    "
)

count_if("apples", sheet1$a) # 2

count_if("apples", sheet1) # 2

calc(sheet1, count_if("apples", a, b)) # 2

count_if(gt(55), sheet1$b) # greater than 55 = 2

count_if(ne(75), sheet1$b) # not equal 75 = 3

count_if(ge(32), sheet1$b) # greater than or equal 32 = 4

count_if(gt(32) & lt(86), sheet1$b) # 2

# count only integer values between 33 and 85
count_if(33:85, sheet1$b) # 2

# values with letters
count_if(regex("^[A-z]+$"), sheet1) # 4

# values that started on 'a'
count_if(regex("^a"), sheet1) # 2

# count_row_if
count_row_if(regex("^a"), sheet1) # c(1,0,0,1)

sheet1 %row_in% 'apples'  # c(TRUE,FALSE,FALSE,TRUE)

# Some of Microsoft Excel examples for SUMIF/AVERAGEIF/etc 
sheet1 = text_to_columns(
    "
    property_value commission data
    100000              7000  250000
    200000             14000	
    300000             21000	
    400000             28000
    "
)

# Sum of commision for property value greater than 160000
calc(sheet1, sum_if(gt(160000), property_value, data = commission)) # 63000
    
# Sum of property value greater than 160000
calc(sheet1, sum_if(gt(160000), property_value)) # 900000

# Sum of commision for property value equals to 300000
calc(sheet1, sum_if(300000, property_value, data = commission)) # 21000
    
# Sum of commision for property value greater than first value of data
calc(sheet1, sum_if(gt(data[1]), property_value, data = commission)) # 49000
    
sheet1 = text_to_columns(
       "
         category     food sales
       Vegetables Tomatoes  2300
       Vegetables   Celery  5500
           Fruits  Oranges   800
             NA     Butter   400
       Vegetables  Carrots  4200
           Fruits   Apples  1200
       "
       )

# Sum of sales for Fruits
calc(sheet1, sum_if("Fruits", category, data = sales)) # 2000

# Sum of sales for Vegetables    
calc(sheet1, sum_if("Vegetables", category, data = sales)) # 12000

# Sum of sales for food which is ending on 'es' 
calc(sheet1, sum_if(perl("es$"), food, data = sales)) # 4300

# Sum of sales for empty category
calc(sheet1, sum_if(NA, category, data = sales))  # 400


sheet1 = text_to_columns(
    "
    property_value commission data
    100000              7000  250000
    200000             14000	
    300000             21000	
    400000             28000
    "
)

# Commision average for comission less than 23000
calc(sheet1, mean_if(lt(23000), commission)) # 14000


# Property value average for property value less than 95000
calc(sheet1, mean_if(lt(95000), property_value)) #  NaN

# Commision average for property value greater than 250000
calc(sheet1, mean_if(gt(250000), property_value, data = commission)) # 24500


sheet1 = text_to_columns(
    '
                region  profits
                  East   45678
                  West   23789
                 North   -4789
    "South (New Office)"     0
               MidWest    9678
    ',
    quote = '"'
)


# Mean profits for 'west' regions
calc(sheet1, mean_if(contains("West"), region, data = profits)) # 16733.5


# Mean profits for regions wich doesn't contain New Office
calc(sheet1, mean_if(not(contains("New Office")), region, data = profits))  # 18589


sheet1 = text_to_columns(
    "
    grade weight 
    89      1
    93      2
    96      2
    85      3
    91      1
    88      1
    "
)

# Minimum grade for weight equals to 1
calc(sheet1, min_if(1, weight, data = grade)) # 88


# Maximum grade for weight equals to 1
calc(sheet1, max_if(1, weight, data = grade)) #91


# Example with offset
sheet1 = text_to_columns(
    "
    weight grade 
       10    b
       11    a
      100    a
      111    b
        1    a
        1    a
    "
)

calc(sheet1, min_if("a", grade[2:5], data = weight[1:4])) # 10

expss documentation built on Jan. 8, 2021, 5:38 p.m.