This is a brief example report using dataquieR
’s
functions. For a longer and better elaborated example, please also
consider our online
example with data from SHIP.
load(system.file("extdata", "study_data.RData", package = "dataquieR"))
<- study_data sd1
The imported study data consist of:
load(system.file("extdata", "meta_data.RData", package = "dataquieR"))
<- meta_data md1
The imported meta data provide information for:
The call of this R-function requires two inputs only:
<- pro_applicability_matrix(
appmatrix study_data = sd1,
meta_data = md1,
label_col = LABEL
)
Heatmap-like plot:
$ApplicabilityPlot appmatrix
<- com_unit_missingness(
my_unit_missings2 study_data = sd1,
meta_data = md1,
id_vars = c("CENTER_0", "PSEUDO_ID"),
strata_vars = "CENTER_0",
label_col = "LABEL"
)
$SummaryData my_unit_missings2
<- com_segment_missingness(
MissSegs study_data = sd1,
meta_data = md1,
label_col = "LABEL",
threshold_value = 5,
direction = "high",
exclude_roles = c("secondary", "process")
)
$SummaryPlot MissSegs
For some analyses adding new and transformed variable to the study data is necessary.
# use the month function of the lubridate package to extract month of exam date
require(lubridate)
# apply changes to copy of data
<- sd1
sd2 # indicate first/second half year
$month <- month(sd2$v00013) sd2
Static metadata of the variable must be added to the respective metadata.
<- prep_add_to_meta(
MD_TMP VAR_NAMES = "month",
DATA_TYPE = "integer",
LABEL = "EXAM_MONTH",
VALUE_LABELS = "1 = January | 2 = February | 3 = March |
4 = April | 5 = May | 6 = June | 7 = July |
8 = August | 9 = September | 10 = October |
11 = November | 12 = December",
meta_data = md1
)
Subsequent call of the R-function may include the new variable.
<- com_segment_missingness(
MissSegs study_data = sd2,
meta_data = MD_TMP,
group_vars = "EXAM_MONTH",
label_col = "LABEL",
threshold_value = 1,
direction = "high",
exclude_roles = c("secondary", "process")
)
$SummaryPlot MissSegs
The following implementation considers also labeled missing codes. The use of such a table is optional but recommended. Missing code labels used in the simulated study data are loaded as follows:
<- read.csv2(system.file("extdata",
code_labels "Missing-Codes-2020.csv",
package = "dataquieR"
),stringsAsFactors = FALSE, na.strings = c()
)
<- com_item_missingness(
item_miss study_data = sd1,
meta_data = meta_data,
label_col = "LABEL",
show_causes = TRUE,
cause_label_df = code_labels,
include_sysmiss = TRUE,
threshold_value = 80
)
The function call above sets the analyses of causes for missing values to TRUE, includes system missings with an own code, and sets the threshold to 80%.
$SummaryTable item_miss
$SummaryPlot item_miss
<- con_limit_deviations(
MyValueLimits resp_vars = NULL,
label_col = "LABEL",
study_data = sd1,
meta_data = md1,
limits = "HARD_LIMITS"
)
$SummaryTable MyValueLimits
# select variables with deviations
<- as.character(MyValueLimits$SummaryTable$Variables)[MyValueLimits$SummaryTable$GRADING == 1] whichdeviate
::ggarrange(plotlist = MyValueLimits$SummaryPlotList[whichdeviate], ncol = 2) ggpubr
<- con_inadmissible_categorical(
IAVCatAll study_data = sd1,
meta_data = md1,
label_col = "LABEL"
)
<- read.csv(system.file("extdata",
checks "contradiction_checks.csv",
package = "dataquieR"
),header = TRUE, sep = "#"
)
<- con_contradictions(
AnyContradictions study_data = sd1,
meta_data = md1,
label_col = "LABEL",
check_table = checks,
threshold_value = 1
)
$SummaryTable AnyContradictions
$SummaryPlot AnyContradictions
<- dataquieR:::acc_robust_univariate_outlier(study_data = sd1, meta_data = md1, label_col = LABEL)
ruol
$SummaryPlotList ruol
## $AGE_0
##
## $AGE_1
##
## $SBP_0
##
## $DBP_0
##
## $GLOBAL_HEALTH_VAS_0
##
## $ARM_CIRC_0
##
## $CRP_0
##
## $BSG_0
##
## $DEV_NO_0
##
## $N_CHILD_0
##
## $N_INJURIES_0
##
## $N_BIRTH_0
##
## $N_ATC_CODES_0
##
## $ITEM_1_0
##
## $ITEM_2_0
##
## $ITEM_3_0
##
## $ITEM_4_0
##
## $ITEM_5_0
##
## $ITEM_6_0
##
## $ITEM_7_0
##
## $ITEM_8_0
<- dataquieR::acc_loess(
myloess resp_vars = "SBP_0",
group_vars = "USR_BP_0",
time_vars = "EXAM_DT_0",
label_col = "LABEL",
study_data = sd1,
meta_data = md1
)
$SummaryPlotList myloess
## $SBP_0