Nothing
## ----setup--------------------------------------------------------------------
library(clinCompare)
## ----compare-datasets---------------------------------------------------------
baseline <- data.frame(
USUBJID = c("SUBJ01", "SUBJ02", "SUBJ03"),
AGE = c(45, 52, 38),
SEX = c("M", "F", "M"),
RACE = c("WHITE", "WHITE", "ASIAN"),
stringsAsFactors = FALSE
)
updated <- data.frame(
USUBJID = c("SUBJ01", "SUBJ02", "SUBJ03"),
AGE = c(45, 53, 38),
SEX = c("M", "F", "F"),
RACE = c("WHITE", "WHITE", "ASIAN"),
stringsAsFactors = FALSE
)
result <- compare_datasets(baseline, updated)
result
## ----drill-into-result--------------------------------------------------------
# Per-column difference counts
result$observation_comparison$discrepancies
# Row-level details for a specific variable
result$observation_comparison$details$SEX
## ----compare-variables--------------------------------------------------------
df_a <- data.frame(
USUBJID = c("SUBJ01", "SUBJ02"),
AGE = c(45, 52),
SEX = c("M", "F"),
stringsAsFactors = FALSE
)
df_b <- data.frame(
USUBJID = c("SUBJ01", "SUBJ02"),
AGE = c(45L, 52L),
WEIGHT = c(75.5, 80.2),
stringsAsFactors = FALSE
)
compare_variables(df_a, df_b)
## ----compare-observations-----------------------------------------------------
df1 <- data.frame(
ID = c(1, 2, 3),
SCORE = c(80, 90, 70),
stringsAsFactors = FALSE
)
df2 <- data.frame(
ID = c(1, 2, 3),
SCORE = c(80, 95, 70),
stringsAsFactors = FALSE
)
compare_observations(df1, df2)
## ----clean-dataset------------------------------------------------------------
messy <- data.frame(
NAME = c("Alice", "alice", "Bob", "Bob"),
SCORE = c(100, 100, 85, 85),
stringsAsFactors = FALSE
)
clean_dataset(messy, remove_duplicates = TRUE, convert_to_case = "upper")
## ----prepare-datasets---------------------------------------------------------
df_unsorted1 <- data.frame(
REGION = c("West", "East", "North"),
SALES = c(150, 200, 180)
)
df_unsorted2 <- data.frame(
REGION = c("East", "North", "West"),
SALES = c(210, 185, 160)
)
prepped <- prepare_datasets(df_unsorted1, df_unsorted2, sort_columns = "REGION")
prepped$df1
prepped$df2
## ----compare-by-group---------------------------------------------------------
site_data_v1 <- data.frame(
SITEID = c("SITE01", "SITE01", "SITE02", "SITE02"),
SUBJID = c("S01", "S02", "S03", "S04"),
AGE = c(45, 52, 38, 61)
)
site_data_v2 <- data.frame(
SITEID = c("SITE01", "SITE01", "SITE02", "SITE02"),
SUBJID = c("S01", "S02", "S03", "S04"),
AGE = c(45, 53, 38, 62)
)
by_site <- compare_by_group(site_data_v1, site_data_v2, group_vars = "SITEID")
names(by_site)
## ----detect-domain------------------------------------------------------------
dm_data <- data.frame(
STUDYID = rep("STUDY01", 3),
USUBJID = c("SUBJ01", "SUBJ02", "SUBJ03"),
AGE = c(45, 62, 51),
SEX = c("M", "F", "M"),
RACE = c("WHITE", "BLACK", "ASIAN"),
ARMCD = c("TRT", "PBO", "TRT"),
ARM = c("Treatment", "Placebo", "Treatment"),
stringsAsFactors = FALSE
)
detect_cdisc_domain(dm_data)
## ----cdisc-compare------------------------------------------------------------
dm_v1 <- data.frame(
STUDYID = rep("STUDY01", 3),
USUBJID = c("SUBJ01", "SUBJ02", "SUBJ03"),
AGE = c(45, 62, 51),
SEX = c("M", "F", "M"),
RACE = c("WHITE", "BLACK", "ASIAN"),
ARMCD = c("TRT", "PBO", "TRT"),
ARM = c("Treatment", "Placebo", "Treatment"),
RFSTDTC = c("2024-01-15", "2024-01-16", "2024-01-17"),
stringsAsFactors = FALSE
)
dm_v2 <- data.frame(
STUDYID = rep("STUDY01", 3),
USUBJID = c("SUBJ01", "SUBJ02", "SUBJ03"),
AGE = c(45, 62, 52),
SEX = c("M", "F", "M"),
RACE = c("WHITE", "BLACK", "ASIAN"),
ARMCD = c("TRT", "PBO", "TRT"),
ARM = c("Treatment", "Placebo", "Treatment"),
RFSTDTC = c("2024-01-15", "2024-01-16", "2024-01-17"),
stringsAsFactors = FALSE
)
cdisc_result <- cdisc_compare(dm_v1, dm_v2, domain = "DM", standard = "SDTM")
cdisc_result
## ----validate-cdisc-----------------------------------------------------------
validation <- validate_cdisc(dm_v1, domain = "DM", standard = "SDTM")
## ----get-all-diffs------------------------------------------------------------
diffs <- get_all_differences(cdisc_result)
diffs
## ----export-report------------------------------------------------------------
# HTML report
export_report(cdisc_result, file.path(tempdir(), "dm_report.html"))
# Text report
export_report(cdisc_result, file.path(tempdir(), "dm_report.txt"))
## ----export-excel, eval=FALSE-------------------------------------------------
# # Excel workbook with Summary, Variable Diffs, Value Diffs, and CDISC tabs
# export_report(cdisc_result, file.path(tempdir(), "dm_report.xlsx"))
## ----batch-compare, eval=FALSE------------------------------------------------
# results <- compare_submission(
# base_dir = "submission_v1/",
# compare_dir = "submission_v2/",
# output_file = "submission_diff.xlsx"
# )
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.