View source: R/dataManipulation-filterData.R
filterData | R Documentation |
A dataset can be filtered:
on a specific value
of interest
on a function of a variable (valueFct
parameter),
e.g. maximum of the variable)
to retain only non missing values of a variable (keepNA
set to FALSE
)
by groups (varsBy
parameter)
Note that by default, missing values in the filtering variable are retained
(which differs from the default behaviour in R).
To filter missing records, please set the keepNA
parameter to FALSE
.
filterData( data, filters, keepNA = TRUE, returnAll = FALSE, verbose = FALSE, labelVars = NULL, labelData = "data" )
data |
Data.frame with data. |
filters |
Unique filter or list of filters.
If a list of filters is specified, the different filters are independently
executed on the entire dataset to identify the records to retain for
each filtering condition. |
keepNA |
Logical, if TRUE (by default) missing values in |
returnAll |
Logical:
|
verbose |
Logical, if TRUE (FALSE by default) progress messages are printed in the current console. For the visualizations, progress messages during download of subject-specific report are displayed in the browser console. |
labelVars |
Named character vector containing variable labels. |
labelData |
(optional) String with label for input |
If returnAll
is FALSE
: data
filtered with the specified filters
is TRUE
: data
with the additional column: keep
or varNew
(if specified), containing TRUE
for records
which fulfill the specified condition(s) and FALSE
otherwise.
The output contains the additional attribute: msg
which contains a message
describing the filtered records.
Laure Cougnaud
library(clinUtils) data(dataADaMCDISCP01) labelVars <- attr(dataADaMCDISCP01, "labelVars") dataDM <- dataADaMCDISCP01$ADSL ## single filter # filter with inclusion criteria: filterData( data = dataDM, filters = list(var = "SEX", value = "M"), # optional labelVars = labelVars, verbose = TRUE ) # filter with non-inclusion criteria filterData( data = dataDM, filters = list(var = "SEX", value = "M", rev = TRUE), # optional labelVars = labelVars, verbose = TRUE ) # filter based on inequality operator filterData( data = dataDM, filters = list(var = "AGE", value = 75, op = "<="), # optional labelVars = labelVars, verbose = TRUE ) # missing values are retained by default! dataDMNA <- dataDM dataDMNA[1 : 2, "AGE"] <- NA filterData( data = dataDMNA, filters = list(var = "AGE", value = 75, op = "<="), # optional labelVars = labelVars, verbose = TRUE ) # filter missing values on variable filterData( data = dataDMNA, filters = list(var = "AGE", value = 75, op = "<=", keepNA = FALSE), # optional labelVars = labelVars, verbose = TRUE ) # retain only missing values filterData( data = dataDMNA, filters = list(var = "AGE", value = NA, keepNA = TRUE), # optional labelVars = labelVars, verbose = TRUE ) # filter missing values filterData( data = dataDMNA, filters = list(var = "AGE", keepNA = FALSE), # optional labelVars = labelVars, verbose = TRUE ) ## multiple filters # by default the records fulfilling all conditions are retained ('AND') filterData( data = dataDM, filters = list( list(var = "AGE", value = 75, op = "<="), list(var = "SEX", value = "M") ), # optional labelVars = labelVars, verbose = TRUE ) # custom operator: filterData( data = dataDM, filters = list( list(var = "AGE", value = 75, op = "<="), "|", list(var = "SEX", value = "M") ), # optional labelVars = labelVars, verbose = TRUE ) # filter by group # only retain adverse event records with worst-case severity dataAE <- dataADaMCDISCP01$ADAE dataAE$AESEV <- factor(dataAE$AESEV, levels = c("MILD", "MODERATE", "SEVERE")) dataAE$AESEVN <- as.numeric(dataAE$AESEV) nrow(dataAE) dataAEWorst <- filterData( data = dataAE, filters = list( var = "AESEVN", valueFct = max, varsBy = c("USUBJID", "AEDECOD"), keepNA = FALSE ), # optional labelVars = labelVars, verbose = TRUE ) nrow(dataAEWorst) # post-processing function # keep subjects with at least one severe AE: dataSubjectWithSevereAE <- filterData( data = dataAE, filters = list( var = "AESEV", value = "SEVERE", varsBy = "USUBJID", postFct = any ), # optional labelVars = labelVars, verbose = TRUE ) # for each laboratory parameter: keep only subjects which have at least one # measurement classified as low or high dataLB <- subset(dataADaMCDISCP01$ADLBC, !grepl("change", PARAM)) dataLBFiltered <- filterData( data = dataLB, filters = list( var = "LBNRIND", value = c("LOW", "HIGH"), varsBy = c("PARAMCD", "USUBJID"), postFct = any ), # optional labelVars = labelVars, verbose = TRUE )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.