In leppott/ContDataSumViz: Create Visualizations of Continuous Monitoring Data

knitr::opts_chunk$set(echo = FALSE)
knitr::opts_chunk$set(comment = NA)
knitr::opts_chunk$set(rownames.print = FALSE)

```{css, echo=FALSE} h1.title { font-size: 28px; } p.compact { margin-bottom: 0px; margin-top: 0px; }

<p class="compact">
Dataset: **`r encodeString(params$sourcedata$dataset_shortdesc)`**
</p>

<p class="compact">
Aggregated by: **`r params$aggregatedata$aggregation_timeunit`**
</p>

<p class="compact">
Report created on: `r Sys.time()`; Package version `r utils::packageVersion(utils::packageName())`; `r R.Version()$version.string`
</p>

# {.tabset .tabset-pills}

## Source data {.tabset}


```r
sourcesummary <- summarise_source_data(params$sourcedata, showprogress = FALSE)
sourcedatafields <- sourcesummary$datafields
sourcedatafields$ordinal_position <- seq.int(nrow(sourcedatafields))

Data fields imported

reactable::reactable(
    # NOTE: trimws() needed as summarise_source_data() is optimised for console printout
    sourcedatafields[which(trimws(sourcesummary$datafields$fieldtype) != "ignore"), ],
    sortable = TRUE,
    filterable = FALSE,
    searchable = FALSE,
    rownames = FALSE,
    pagination = FALSE,
    striped = TRUE,
    highlight = TRUE,
    columns = list(
        fieldname = reactable::colDef(name = "Field name",
                                                                    style = list(fontWeight = "bold")),
        fieldtype = reactable::colDef(name = "Field type"),
        datatype = reactable::colDef(show = FALSE),
        count = reactable::colDef(name = "Total values"),
        missing = reactable::colDef(name = "Missing values"),
        min = reactable::colDef(name = "Min value"),
        max = reactable::colDef(name = "Max value"),
        validation_warnings = reactable::colDef(name = "Validation warnings"),
        ordinal_position = reactable::colDef(name = "Column position")
    )
)

Data fields ignored

reactable::reactable(
    # NOTE: trimws() needed as summarise_source_data() is optimised for console printout
    sourcedatafields[which(trimws(sourcesummary$datafields$fieldtype) == "ignore"), 
                                     c("fieldname", "fieldtype", "ordinal_position")],
    sortable = TRUE,
    filterable = FALSE,
    searchable = FALSE,
    pagination = FALSE,
    rownames = FALSE,
    compact = TRUE,
    fullWidth = FALSE,
    columns = list(
        fieldname = reactable::colDef(name = "Field name",
                                                                    minWidth = 200),
        fieldtype = reactable::colDef(name = "Field type",
                                                                    minWidth = 100),
        ordinal_position = reactable::colDef(name = "Column position")
    )
)

Validation warnings

reactable::reactable(
    sourcesummary$validation_warnings,
    sortable = TRUE,
    filterable = FALSE,
    searchable = FALSE,
    rownames = FALSE,
    pagination = FALSE,
    striped = TRUE,
    highlight = TRUE,
    compact = TRUE,
    fullWidth = FALSE,
    columns = list(
        fieldname = reactable::colDef(
            name = "Field name",
            style = list(fontWeight = "bold"),
            minWidth = 200
        ),
        message = reactable::colDef(name = "Details",
                                                                minWidth = 500),
        instances = reactable::colDef(name = "Instances")
    )
)

Summary

summarydf <- data.frame(
    "Item" = c(
        "Columns in source",
        "Columns imported",
        "Column used for timepoint",
        "Min timepoint value",
        "Max timepoint value",
        "Rows in source",
        "Duplicate rows",
        "Rows missing timepoint values",
        "Rows imported",
        "Strings interpreted as missing values",
        "Total validation warnings"
    ),
    "Value" = c(
        sourcesummary$overall["cols_source_n"],
        sourcesummary$overall["cols_imported_n"],
        sourcesummary$overall["timepoint_fieldname"],
        sourcesummary$overall["timepoint_min"],
        sourcesummary$overall["timepoint_max"],
        sourcesummary$overall["rows_source_n"],
        sourcesummary$overall["rows_duplicates_n"],
        sourcesummary$overall["timepoint_missing_n"],
        sourcesummary$overall["rows_imported_n"],
        sourcesummary$overall["na_values"],
        sum(sourcesummary$validation_warnings$instances, na.rm = TRUE) +
            sum(is.na(
                sourcesummary$validation_warnings$instances
            ))
    ),
    stringsAsFactors = FALSE
)

reactable::reactable(
    summarydf,
    sortable = FALSE,
    filterable = FALSE,
    searchable = FALSE,
    pagination = FALSE,
    rownames = FALSE,
    striped = TRUE,
    compact = TRUE,
    fullWidth = FALSE,
    columns = list(
        Item = reactable::colDef(
            name = "",
            style = list(fontWeight = "bold"),
            minWidth = 300
        ),
        Value = reactable::colDef(name = "",
                                                            minWidth = 200)
    )
)

Aggregated data {.tabset}

aggsummary <- summarise_aggregated_data(params$aggregatedata)

Values present

#, fig.height = 0.5*as.numeric(aggsummary$overall["n_fields"])+3

plot_overview_combo_static(
    aggfields = params$aggregatedata$aggregatefields,
    aggtype = "n",
    lineplot_fieldname = params$aggregatedata$timepoint_fieldname,
    lineplot_fillcolour = "pink",
    heatmap_fillcolour = "darkred",
    title = paste("Records per", params$aggregatedata$aggregation_timeunit)
)

Missing values

plot_overview_combo_static(
    aggfields = params$aggregatedata$aggregatefields,
    aggtype = "missing_n",
    lineplot_fieldname = "[ALLFIELDSCOMBINED]",
    lineplot_fillcolour = "lightblue",
    heatmap_fillcolour = "darkblue",
    title = paste(
        "Total missing values per",
        params$aggregatedata$aggregation_timeunit
    )
)

Non-conformant values

plot_overview_combo_static(
    aggfields = params$aggregatedata$aggregatefields,
    aggtype = "nonconformant_n",
    lineplot_fieldname = "[ALLFIELDSCOMBINED]",
    lineplot_fillcolour = "lightgreen",
    heatmap_fillcolour = "darkgreen",
    title = paste(
        "Total nonconformant values per",
        params$aggregatedata$aggregation_timeunit
    )
)

Duplicate records

plot_overview_totals_static(
    aggfield = params$aggregatedata$aggregatefields[["[DUPLICATES]"]],
    aggtype = "sum",
    fillcolour = "yellow",
    title = paste(
        "Total duplicate records per",
        params$aggregatedata$aggregation_timeunit
    )
)

Summary

aggsummarydf <- data.frame(
    "Item" = c(
        "Column used for timepoint",
        "Min timepoint value",
        "Max timepoint value",
        "Timepoint aggregation unit",
        "Total number of timepoints",
        "Number of empty timepoints",
        "Number of data fields"
        # NOTE: Partitionfield functionality disabled until we work out how to present it
        # "Column(s) used as partitionfield"
    ),
    "Value" = c(
        aggsummary$overall["timepoint_fieldname"],
        aggsummary$overall["timepoint_min"],
        aggsummary$overall["timepoint_max"],
        aggsummary$overall["aggregation_timeunit"],
        aggsummary$overall["n_timepoints"],
        aggsummary$overall["n_empty_timepoints"],
        aggsummary$overall["n_fields"]
        # NOTE: Partitionfield functionality disabled until we work out how to present it
        # aggsummary$overall["partitionfield_fieldnames"]
    ),
    stringsAsFactors = FALSE
)

reactable::reactable(
    aggsummarydf,
    sortable = FALSE,
    filterable = FALSE,
    searchable = FALSE,
    pagination = FALSE,
    rownames = FALSE,
    striped = TRUE,
    compact = TRUE,
    fullWidth = FALSE,
    columns = list(
        Item = reactable::colDef(
            name = "",
            style = list(fontWeight = "bold"),
            minWidth = 300
        ),
        Value = reactable::colDef(name = "",
                                                            minWidth = 200)
    )
)

Individual data fields {.tabset}

#params$aggregatedata <- testcpddata_byday
#aggtype <- "n"
for(i in seq_along(names(params$aggregatedata$aggregatefields))) {
    cat('\n###',
            names(params$aggregatedata$aggregatefields)[i],
            ' {.tabset}\n')
    aggtypes <-
        names(params$aggregatedata$aggregatefields[[i]]$values)[-1]
    for (aggtype in aggtypes) {
        cat('\n####', aggtype_friendlyname(aggtype, "short"), '\n')
        p <-
            plot_timeseries_static(aggfield = params$aggregatedata$aggregatefields[[i]],
                                                         aggtype = aggtype)

        #p <- plotly::ggplotly(p) # not working
        print(p)
        cat('\n')
    }
}