Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5
)
## ----setup--------------------------------------------------------------------
library(autodb)
## -----------------------------------------------------------------------------
summary(ChickWeight)
## -----------------------------------------------------------------------------
db <- autodb(ChickWeight)
db
## -----------------------------------------------------------------------------
db_text <- gv(db)
cat(db_text)
## ----check_diagrammer---------------------------------------------------------
if (requireNamespace("DiagrammeR", quietly = TRUE)) {
show <- function(x) DiagrammeR::grViz(gv(x))
maybe_plot <- function(x) DiagrammeR::grViz(gv(x))
}else{
show <- print
maybe_plot <- function(x) invisible(NULL)
}
## ----db_plot------------------------------------------------------------------
maybe_plot(db)
## -----------------------------------------------------------------------------
deps <- discover(ChickWeight, accuracy = 1, progress = TRUE)
## -----------------------------------------------------------------------------
deps
## -----------------------------------------------------------------------------
detset(deps)
dependant(deps)
attrs_order(deps)
## -----------------------------------------------------------------------------
schema <- synthesise(deps)
schema
## -----------------------------------------------------------------------------
maybe_plot(schema)
## -----------------------------------------------------------------------------
knitr::kable(as.data.frame(Titanic))
## -----------------------------------------------------------------------------
show(autodb(as.data.frame(Titanic)))
## -----------------------------------------------------------------------------
titanic_deps_freqonly <- discover(as.data.frame(Titanic), 1, exclude = "Freq")
titanic_deps_freqonly
## -----------------------------------------------------------------------------
identical(titanic_deps_freqonly, discover(as.data.frame(Titanic), 1, exclude_class = "numeric"))
## -----------------------------------------------------------------------------
show(autodb(as.data.frame(Titanic), exclude = "Freq"))
## -----------------------------------------------------------------------------
titanic_deps <- discover(as.data.frame(Titanic), 1)
titanic_deps
## -----------------------------------------------------------------------------
titanic_deps[dependant(titanic_deps) == "Freq"]
## -----------------------------------------------------------------------------
linked_schema <- autoref(schema)
linked_schema
## -----------------------------------------------------------------------------
normalise(deps)
## -----------------------------------------------------------------------------
maybe_plot(linked_schema)
## ----chickWeight_db2_plot-----------------------------------------------------
db2 <- decompose(ChickWeight, linked_schema)
show(db2)
## ----chickWeights_rejoin------------------------------------------------------
rejoined <- rejoin(db)
summary(rejoined)
identical(rejoined, ChickWeight)
df_equiv(rejoined, ChickWeight)
## ----nudge_classes------------------------------------------------------------
if (requireNamespace("DiagrammeR", quietly = TRUE)) {
DiagrammeR::grViz(gv(nudge, name = "nudge"))
}else{
summary(nudge)
}
## ----nudge_database-----------------------------------------------------------
nudge_deps <- discover(
nudge,
accuracy = 1,
exclude = c("n_study", "n_comparison", "n_control", "n_intervention"),
exclude_class = "numeric"
)
nudge_schema <- normalise(nudge_deps, remove_avoidable = TRUE)
show(nudge_schema)
## ----nudge_publication_check--------------------------------------------------
nudge_database <- decompose(nudge, nudge_schema)
nudge_title_relation <- records(nudge_database)$title
nudge_pid_duplicates <- unique(nudge_title_relation$publication_id[
duplicated(nudge_title_relation$publication_id)
])
knitr::kable(subset(nudge_title_relation, publication_id %in% nudge_pid_duplicates))
## ----nudge_reference_check----------------------------------------------------
nudge_reference_duplicates <- unique(nudge_title_relation$reference[
duplicated(nudge_title_relation$reference)
])
knitr::kable(subset(nudge_title_relation, reference %in% nudge_reference_duplicates))
## ----nudge_filter-------------------------------------------------------------
nudge_deps_filtered <- nudge_deps[
lengths(detset(nudge_deps)) == 1 |
vapply(
detset(nudge_deps),
\(ds) length(setdiff(ds, c("publication_id", "reference"))) != 1,
logical(1)
)
]
nudge_schema_filtered <- normalise(nudge_deps_filtered, remove_avoidable = TRUE)
show(nudge_schema_filtered)
## ----nudge_sizes--------------------------------------------------------------
nudge_deps_size <- discover(nudge[, startsWith(names(nudge), "n_")], 1)
nudge_deps_size
nudge_deps_final <- c(nudge_deps_filtered, nudge_deps_size)
nudge_schema_final <- normalise(nudge_deps_final, remove_avoidable = TRUE)
nudge_database_final <- decompose(nudge, nudge_schema_final)
show(nudge_schema_final)
## ----nudge_size_check---------------------------------------------------------
knitr::kable(unique(subset(
nudge,
n_comparison != n_control + n_intervention,
c(
es_id,
reference,
title,
n_study,
n_comparison,
n_control,
n_intervention
)
)))
## ----nudge_clean_database-----------------------------------------------------
show(nudge_database_final)
## ----nudge_publication_badfilter----------------------------------------------
nudge_schema_relfiltered <- nudge_schema[
!grepl("publication_id_", names(nudge_schema), fixed = TRUE) &
!grepl("_publication_id", names(nudge_schema), fixed = TRUE) &
!grepl("reference_", names(nudge_schema), fixed = TRUE) &
!grepl("_reference", names(nudge_schema), fixed = TRUE)
]
## ----nudge_publication_badfilter_gv-------------------------------------------
show(nudge_schema_relfiltered)
## ----nudge_publication_badfilter_autoref--------------------------------------
identical(autoref(nudge_schema_relfiltered), nudge_schema_relfiltered)
## ----nudge_badfilter_example_fds----------------------------------------------
example_fds <- functional_dependency(
list(
list("title", "reference"),
list(c("reference", "type_experiment"), "location"),
list(c("title", "type_experiment"), "location")
),
c("title", "reference", "type_experiment", "location")
)
example_fds
## ----nudge_badfilter_example_transitive---------------------------------------
show(normalise(example_fds, ensure_lossless = FALSE))
## ----nudge_badfilter_example_nontransitive------------------------------------
show(normalise(example_fds[-2], ensure_lossless = FALSE))
## ----nudge_approximate_cheat--------------------------------------------------
nudge_approx_cheat_database_schema <- discover(
nudge,
accuracy = 1 - 2/nrow(nudge),
exclude = c("n_study", "n_comparison", "n_control", "n_intervention"),
exclude_class = "numeric"
) |>
normalise()
show(nudge_approx_cheat_database_schema)
## ----nudge_approximate--------------------------------------------------------
nudge_approx_database_schema <- discover(
nudge,
accuracy = 0.99,
exclude = c("n_study", "n_comparison", "n_control", "n_intervention"),
exclude_class = "numeric"
) |>
normalise()
show(nudge_approx_database_schema)
## ----nudge_approximate_reduced------------------------------------------------
show(reduce(nudge_approx_database_schema, "es_id"))
## ----avoid_setup--------------------------------------------------------------
avoid_deps <- functional_dependency(
list(
list("A", "B"),
list("B", "A"),
list(c("A", "C"), "D"),
list(c("A", "C"), "E"),
list(c("B", "D"), "C")
),
attrs_order = c("A", "B", "C", "D", "E")
)
avoid_deps
avoid_schema <- normalise(avoid_deps)
show(avoid_schema)
## ----avoid_remove-------------------------------------------------------------
avoid_schema_removed <- normalise(
avoid_deps,
remove_avoidable = TRUE
)
show(avoid_schema_removed)
## ----example_data_frame_with_interval_option----------------------------------
df_options <- data.frame(
id = 1:20,
value = c(2.3, 2.3, 5.7, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
lower_bound = c(NA_real_, NA_real_, NA_real_, 2.4, 0, 1, 0, 5.6, 2.4, 5.3, 5.3, 2.4, 2.4, 2.4, 2.4, 2.4, 2.4, 2.4, 5.6, 2.4),
upper_bound = c(NA_real_, NA_real_, NA_real_, 7.1, 10, 10, 13.1, 25.8, 10, 13.1, 10, 25.8, 25.8, 25.8, 25.8,13.1, 13.1, 25.8, 25.8, 25.8),
interval_distribution = c(NA, NA, NA, "uniform", "uniform", "uniform", "uniform", "uniform", "Beta", "Beta", "Beta", "Beta", "Kumaraswamy", "Kumaraswamy", "Kumaraswamy", "Kumaraswamy", "PERT", "PERT", "PERT", "PERT"),
param1 = c(NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, 1, 2, 2, 2.1, 2, 2, 2, 1, 2, 2),
param2 = c(NA, NA, NA, NA, NA, NA, NA, NA, 1, 2, 2, 2, 2, 1, 1, 1, NA, NA, NA, NA)
)
df_options$interval_distribution <- factor(df_options$interval_distribution)
knitr::kable(df_options)
## ----example_data_frame_with_interval_option_db-------------------------------
db_options <- autodb(df_options)
show(db_options)
## ----example_data_frame_with_interval_option_nulls----------------------------
df_options_with_presence <- data.frame(
id = df_options$id,
value = df_options$value,
value_present = !is.na(df_options$value),
lower_bound = df_options$lower_bound,
lower_bound_present = !is.na(df_options$lower_bound),
upper_bound = df_options$upper_bound,
upper_bound_present = !is.na(df_options$upper_bound),
interval_distribution = df_options$interval_distribution,
interval_distribution_present = !is.na(df_options$interval_distribution),
param1 = df_options$param1,
param1_present = !is.na(df_options$param1),
param2 = df_options$param2,
param2_present = !is.na(df_options$param2)
)
## ----example_data_frame_with_interval_option_nulls_db-------------------------
db_options_with_presence <- autodb(df_options_with_presence)
show(db_options_with_presence)
## ----example_data_frame_with_interval_option_nulls_rel------------------------
knitr::kable(records(db_options_with_presence)$value_present)
## ----example_data_frame_with_interval_option_nulls_distribution_rel-----------
knitr::kable(records(db_options_with_presence)$interval_distribution)
## ----example_data_frame_with_interval_option_p1absent_db----------------------
db_options_with_presence_p1absent <- autodb(subset(
df_options_with_presence,
!param1_present
))
show(db_options_with_presence_p1absent)
## ----example_data_frame_with_interval_option_p1present_db---------------------
show(autodb(subset(
df_options_with_presence,
param1_present
)))
## ----example_data_frame_with_interval_option_p1absent_constants---------------
knitr::kable(records(db_options_with_presence_p1absent)$constants)
## ----example_data_frame_with_NAs----------------------------------------------
df_nas <- data.frame(
patient = c(1L, 2L, 3L, 4L),
trial_entry_date = as.Date(c("2022/05/02", "2022/06/06", "2022/04/01", "2022/03/19")),
death_date = as.Date(c(NA, NA, "2022/10/07", NA))
)
knitr::kable(df_nas)
## ----example_data_frame_with_NAs_autodb---------------------------------------
show(autodb(df_nas))
## ----example_data_frame_with_NAs_nullably_normalised--------------------------
ideal_db <- decompose(
df_nas,
database_schema(
relation_schema(
list(
patient = list(c("patient", "trial_entry_date"), list("patient")),
patient_death = list(c("patient", "death_date"), list("patient"))
),
names(df_nas)
),
list(list("patient_death", "patient", "patient", "patient"))
)
)
records(ideal_db)$patient_death <- subset(records(ideal_db)$patient_death, !is.na(death_date))
show(ideal_db)
## ----factor_example_datasets--------------------------------------------------
df_badmerge_int <- cbind(
expand.grid(
a = c(NA, 0L, 1L),
b = c(NA, FALSE, TRUE)
),
row = 1:9
)
df_badmerge_factor <- df_badmerge_int
df_badmerge_factor$a <- as.factor(df_badmerge_factor$a)
knitr::kable(df_badmerge_int)
df_badmerge_logical <- df_badmerge_int
df_badmerge_logical$a <- as.logical(df_badmerge_logical$a)
names(df_badmerge_logical)[[3]] <- "row2"
knitr::kable(df_badmerge_logical)
## ----factor_example_int_single------------------------------------------------
knitr::kable(merge(
df_badmerge_int[, c("a", "row")],
df_badmerge_logical[, c("a", "row2")]
))
## ----factor_example_int_single_symmetric--------------------------------------
knitr::kable(merge(
df_badmerge_logical[, c("a", "row2")],
df_badmerge_int[, c("a", "row")]
))
## ----factor_example_single----------------------------------------------------
knitr::kable(merge(
df_badmerge_factor[, c("a", "row")],
df_badmerge_logical[, c("a", "row2")]
))
## ----factor_example_single_symmetric------------------------------------------
knitr::kable(merge(
df_badmerge_logical[, c("a", "row2")],
df_badmerge_factor[, c("a", "row")]
))
## ----factor_example-----------------------------------------------------------
knitr::kable(merge(
df_badmerge_factor,
df_badmerge_logical
))
## ----factor_example_asymmetric------------------------------------------------
knitr::kable(merge(
df_badmerge_logical,
df_badmerge_factor
))
## ----redundant_keys_example---------------------------------------------------
fds_redkey <- functional_dependency(
list(
list("a", "b"),
list("d", "c"),
list(c("b", "d"), "a"),
list("a", "c"),
list(c("b", "c"), "d")
),
letters[1:4]
)
fds_redkey
## ----redundant_keys_schema----------------------------------------------------
schema_redkey <- normalise(fds_redkey, remove_avoidable = TRUE)
show(schema_redkey)
## ----redundant_keys_fix-------------------------------------------------------
fds_redkey_fix <- functional_dependency(
list(
list("a", "b"),
list("d", "c"),
list(c("b", "c"), "a"),
list("a", "d")
),
letters[1:4]
)
fds_redkey_fix
schema_redkey_fix <- normalise(fds_redkey_fix, remove_avoidable = TRUE)
show(schema_redkey_fix)
## ----dup_example--------------------------------------------------------------
dup_db <- autodb(ChickWeight)
show(dup_db)
## ----dup_example_dup----------------------------------------------------------
show(dup_db[c(1, 1, 2, 2, 2)])
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.