filterData {clinDataReview} | R Documentation |
Filter a dataset based on specified filters.
Description
A dataset can be filtered:
on a specific
value
of intereston a function of a variable (
valueFct
parameter), e.g. maximum of the variable)to retain only non missing values of a variable (
keepNA
set toFALSE
)by groups (
varsBy
parameter)
Note that by default, missing values in the filtering variable are retained
(which differs from the default behaviour in R).
To filter missing records, please set the keepNA
parameter to FALSE
.
Usage
filterData(
data,
filters,
keepNA = TRUE,
returnAll = FALSE,
verbose = FALSE,
labelVars = NULL,
labelData = "data"
)
Arguments
data |
Data.frame with data. |
filters |
Unique filter or list of filters.
If a list of filters is specified, the different filters are independently
executed on the entire dataset to identify the records to retain for
each filtering condition. |
keepNA |
Logical, if TRUE (by default) missing values in |
returnAll |
Logical:
|
verbose |
Logical, if TRUE (FALSE by default) progress messages are printed in the current console. For the visualizations, progress messages during download of subject-specific report are displayed in the browser console. |
labelVars |
Named character vector containing variable labels. |
labelData |
(optional) String with label for input |
Value
If returnAll
is
FALSE
:data
filtered with the specified filtersis
TRUE
:data
with the additional column:keep
orvarNew
(if specified), containingTRUE
for records which fulfill the specified condition(s) andFALSE
otherwise.
The output contains the additional attribute: msg
which contains a message
describing the filtered records.
Author(s)
Laure Cougnaud
Examples
library(clinUtils)
data(dataADaMCDISCP01)
labelVars <- attr(dataADaMCDISCP01, "labelVars")
dataDM <- dataADaMCDISCP01$ADSL
## single filter
# filter with inclusion criteria:
filterData(
data = dataDM,
filters = list(var = "SEX", value = "M"),
# optional
labelVars = labelVars, verbose = TRUE
)
# filter with non-inclusion criteria
filterData(
data = dataDM,
filters = list(var = "SEX", value = "M", rev = TRUE),
# optional
labelVars = labelVars, verbose = TRUE
)
# filter based on inequality operator
filterData(
data = dataDM,
filters = list(var = "AGE", value = 75, op = "<="),
# optional
labelVars = labelVars, verbose = TRUE
)
# missing values are retained by default!
dataDMNA <- dataDM
dataDMNA[1 : 2, "AGE"] <- NA
filterData(
data = dataDMNA,
filters = list(var = "AGE", value = 75, op = "<="),
# optional
labelVars = labelVars, verbose = TRUE
)
# filter missing values on variable
filterData(
data = dataDMNA,
filters = list(var = "AGE", value = 75, op = "<=", keepNA = FALSE),
# optional
labelVars = labelVars, verbose = TRUE
)
# retain only missing values
filterData(
data = dataDMNA,
filters = list(var = "AGE", value = NA, keepNA = TRUE),
# optional
labelVars = labelVars, verbose = TRUE
)
# filter missing values
filterData(
data = dataDMNA,
filters = list(var = "AGE", keepNA = FALSE),
# optional
labelVars = labelVars, verbose = TRUE
)
## multiple filters
# by default the records fulfilling all conditions are retained ('AND')
filterData(
data = dataDM,
filters = list(
list(var = "AGE", value = 75, op = "<="),
list(var = "SEX", value = "M")
),
# optional
labelVars = labelVars, verbose = TRUE
)
# custom operator:
filterData(
data = dataDM,
filters = list(
list(var = "AGE", value = 75, op = "<="),
"|",
list(var = "SEX", value = "M")
),
# optional
labelVars = labelVars, verbose = TRUE
)
# filter by group
# only retain adverse event records with worst-case severity
dataAE <- dataADaMCDISCP01$ADAE
dataAE$AESEV <- factor(dataAE$AESEV, levels = c("MILD", "MODERATE", "SEVERE"))
dataAE$AESEVN <- as.numeric(dataAE$AESEV)
nrow(dataAE)
dataAEWorst <- filterData(
data = dataAE,
filters = list(
var = "AESEVN",
valueFct = max,
varsBy = c("USUBJID", "AEDECOD"),
keepNA = FALSE
),
# optional
labelVars = labelVars, verbose = TRUE
)
nrow(dataAEWorst)
# post-processing function
# keep subjects with at least one severe AE:
dataSubjectWithSevereAE <- filterData(
data = dataAE,
filters = list(
var = "AESEV",
value = "SEVERE",
varsBy = "USUBJID",
postFct = any
),
# optional
labelVars = labelVars, verbose = TRUE
)
# for each laboratory parameter: keep only subjects which have at least one
# measurement classified as low or high
dataLB <- subset(dataADaMCDISCP01$ADLBC, !grepl("change", PARAM))
dataLBFiltered <- filterData(
data = dataLB,
filters = list(
var = "LBNRIND",
value = c("LOW", "HIGH"),
varsBy = c("PARAMCD", "USUBJID"),
postFct = any
),
# optional
labelVars = labelVars, verbose = TRUE
)