In clinDataReview: Clinical Data Review Tool

library(knitr)
knitr::opts_chunk$set(
    echo = FALSE, 
    warning = FALSE, error = FALSE, message = FALSE, 
    results = "asis"
)

knitr::knit_hooks$set(
    message = function(x, options) {
        paste('\n\n<div class="alert alert-info">',
            gsub('##', '\n', x),
            '</div>', sep = '\n')
    }
)

# print warnings where they occur (warn = 0 by default)
options(warn = 1)

# Note: find a way to specify defaults for variables
attach(params)

# TODO: find a way to specify named vector in YAML
if(exists("colorVar"))  colorVar <- unlist(colorVar)    else    colorVar <- NULL
if(!exists("typePlot")) typePlot <- c("treemap", "sunburst")
if(!exists("patientProfilePath"))   patientProfilePath <- NULL
if(!exists("loopingVar")) loopingVar <- NULL
if(!exists("loopingTotal")) loopingTotal <- TRUE
if(!exists("parentVar"))    parentVar <- NULL

library(clinDataReview)
library(clinUtils)
library(inTextSummaryTable)
library(plyr) # for dlply, rbind.fill
library(htmltools)

if(!exists("reportTitleLevel")) reportTitleLevel <- 1

# Create a header at the wanted depth
cat(getMdHeader(title = reportTitle, level = reportTitleLevel))

# Load data
pathData <- file.path(pathDataFolder, dataFileName)

dataAll <- clinUtils::loadDataADaMSDTM(pathData, verbose = FALSE)
data <- do.call(plyr::rbind.fill, dataAll)

# Extract label information
labelVars <- attr(dataAll, "labelVars")

# Data processing 
if(exists("dataProcessing")){
    data <- processData(
        data = data, 
        dataPath = pathDataFolder,
        processing = dataProcessing,
        verbose = TRUE,
        labelVars = labelVars
    )
    # Labels updated with extra annotation:
    labelVars <- attr(data, "labelVars")
}

# Create URL to patient profiles
if(!is.null(patientProfilePath))
    data <- createPatientProfileVar(
        data = data, 
        patientProfilePath = patientProfilePath,
        checkExist = FALSE
    )

## total

if(exists("dataTotalFileName")){

    # Load data
    pathDataTotal <- file.path(pathDataFolder, dataTotalFileName)

    dataTotalAll <- clinUtils::loadDataADaMSDTM(pathDataTotal, verbose = FALSE)
    dataTotal <- dataTotalAll[[1]]

    # Extract label information
    labelVarsTotal <- attr(dataTotalAll, "labelVars")

    # Total data processing
    if(exists("dataTotalProcessing")){
        dataTotal <- processData(
            data = dataTotal, 
            dataPath = pathDataFolder, 
            processing = dataTotalProcessing,
            verbose = TRUE,
            labelVars = labelVarsTotal,
            labelData = "total data"
        )
    }

}else   dataTotal <- data


# only subset of the data if requested
if(!is.null(loopingVar) && exists("loopingNMax") && is.integer(loopingNMax)){

    data <- merge(
        x = data,
        y = unique(data[, loopingVar, drop = FALSE])[seq_len(loopingNMax), , drop = FALSE],
        all = FALSE
    )

}

    ## compute counts

    ## extra 'statistics' per group:
    # combine all paths across patients (should be collapsed with: ', ')
    # + links
    statsExtraPP <- if(!is.null(patientProfilePath)){
        list(
            statPatientProfilePath = function(data) 
                toString(sort(unique(data$patientProfilePath))),
            statPatientProfileLink = function(data)
                toString(sort(unique(data$patientProfileLink)))
        )
    }

    ## statistics of interest
    # get default counts + stats with subjects profiles path + color variable
    statsPP <- c(
        getStats(c("n", "m", "%")),
        if(!is.null(colorVar))  getStats(type = "Mean"),
        if(!is.null(patientProfilePath))
            list(
                patientProfilePath = quote(statPatientProfilePath),
                patientProfileLink = quote(statPatientProfileLink)
            )
    )

    # all variable of interest:
    vars <- c(parentVar, countVar)

    varWithTotal <- vars

    # variable to summarize
    # if a color variable is specified:
    # - the mean of this variable is computed (var = [color var])
    # - as well as the counts per group (var = 'all')
    if(!is.null(colorVar)){
        if(packageVersion("inTextSummaryTable") < "2.11.0")
            stop("'inTextSummaryTable' version >= 2.11.0 is required.")
        colorVarCountVar <- colorVar[countVar]
        var <- c("all", colorVarCountVar)
        varInclude0 <- colorVarCountVar
    }else{
        var <- NULL
        varInclude0 <- FALSE
    }

    # Extract input data for plot: summary table
    dataPlot <- ddply(data, loopingVar, function(dataI){

        # extract total dataset
        dataTotalI <- dataTotal
        if(!is.null(loopingVar) && loopingTotal){
            if(any(!loopingVar %in% colnames(dataTotalI)))
                stop("Looping variable(s) are not available in the total dataset. ",
                    "Are you sure you want to compute the total by 'loopingVar'? (see 'loopingTotal').")
            dataTotalI <- merge(
                x = unique(dataI[, loopingVar, drop = FALSE]), 
                y = dataTotalI, 
                by = loopingVar, 
                all = FALSE # only include data if present in y
            )
        }

        # The variable(s) used for coloring should be included into the same data column
        # So for the computation of the data total across rows
        # the corresponding specified colorVar of the PARENT variable should be used
        if(!is.null(colorVar)){
            if(!is.null(names(colorVar))){
                dataTotalRow <- sapply(varWithTotal, function(varI){

                    # extract color variable for the parent variable
                    parentVarI <- vars[match(varI, vars)-1]
                    colorParentVar <- colorVar[parentVarI]

                    dataTotalRow <- ddply(dataI, c("USUBJID", parentVar), function(x){
                        # if color var not specified for this variable: set to NA
                        x[[colorVar[countVar]]] <- if(length(colorParentVar) == 0 || is.na(colorParentVar)){
                            NA
                        }else   x[[colorParentVar]]
                        x
                    })

                }, simplify = FALSE)
            }else   dataTotalRow <- NULL
        }else   dataTotalRow <- NULL

        # compute summary table
        tableStats <- computeSummaryStatisticsTable(

            data = dataI,
            rowVar = vars,
            var = var,
            # total for column header
            # contains all subjects (even the one haven't presented an AE)
            dataTotal = dataTotalI, 

            # plotly treemap requires records (rows) for each group
            rowVarTotalInclude = if(length(varWithTotal) > 0)   varWithTotal,
            # data considered to computed the total per parent sector
            dataTotalRow = dataTotalRow,

            rowOrder = "total",
            labelVars = labelVars,

            # in case color variable not specified for all variables
            varInclude0 = varInclude0, 

            # statistics of interest
            # for DT output, include columns with patients
            stats = statsPP, 
            # add extra 'statistic': concatenate subject IDs
            statsExtra = statsExtraPP

        )

        # combine counts of adverse events and stat of color variable in the same row
        if(!is.null(colorVar)){
            colCounts <- c(vars, "statN", "statPercN", "statm", "n", "%", "m", if(!is.null(patientProfilePath)) "patientProfileLink")
            tableStatsCounts <- subset(tableStats, variable == "all" & !isTotal)
            tableStatsCounts <- tableStatsCounts[, colCounts]
            tableStatsColorVar <- subset(tableStats, variable != "all" & !isTotal)[, c(vars, "statMean", "Mean")]
            dataPlot <- merge(tableStatsCounts, tableStatsColorVar, all.x = TRUE, by = vars)
        }else   dataPlot <- subset(tableStats, !isTotal)

        dataPlot

    })

    # extract plot variables
    hoverVars <- c(parentVar, countVar, "n", "%", "m", if(!is.null(colorVar))   "Mean")

    # extract variables displayed in the table
    tableVars <- c(parentVar, countVar, "statN", "statPercN", "statm", if(!is.null(colorVar))   "statMean")

    # set labels for all variables
    labelVars[c("n", "statN")] <- "Number of patients"
    labelVars[c("%", "statPercN")] <- paste(c(
        "Percentage of patients",
        if(!is.null(loopingVar) && loopingTotal)    
            paste("by", toString(clinUtils::getLabelVar(loopingVar, labelVars = labelVars)))
    ), collapse = " ")
    labelVars[c("m", "statm")] <- "Number of events"
    labelVars[c("Mean", "statMean")] <- paste("Mean", clinUtils::getLabelVar(colorVar[countVar], labelVars = labelVars))

    # format label for 'knitPrintClinDataReview'
    if(!is.null(loopingVar))
        dataPlot[, loopingVar] <- colwise(function(x) gsub("\\.", "", x))(dataPlot[, loopingVar, drop = FALSE])

    # for each plot type ...
    listPlots <- sapply(typePlot, function(typePlotI){

        # ... and each looping variable:
        listPlots <- dlply(dataPlot, loopingVar, function(dataI){

            # create plot unique ID
            argsFormatLabelChunk <- c(
                list("countsVisualizationTemplate", typePlotI),
                if(!is.null(loopingVar))    unique(dataI[, loopingVar, drop = FALSE])
            )       
            id <- do.call(formatLabelChunk, argsFormatLabelChunk)

            # create plot
            colorLab <- toString(clinUtils::getLabelVar(colorVar[countVar], data = dataI, labelVars = labelVars))
            pathLab <- unname(clinUtils::getLabelVar(var = "USUBJID", labelVars = labelVars))
            plotCountClinData(
                data = dataI,
                vars = c(parentVar, countVar),
                valueVar = "statm", valueLab = "Number of events",
                colorVar = if(!is.null(colorVar))   "statMean",
                colorRange = if(exists("colorRange"))   colorRange,
                colorLab = colorLab, 
                hoverVars = hoverVars, 
                pathVar = if(!is.null(patientProfilePath))  "patientProfileLink", 
                pathLab = pathLab,
                table = TRUE, 
                tableVars = tableVars, 
                labelVars = labelVars,
                id = id,
                verbose = TRUE,
                typePlot = typePlotI
            )

        })

    }, simplify = FALSE)
    names(listPlots) <- paste(clinUtils::simpleCap(typePlot), "visualization")

    knitPrintClinDataReview(
        list = listPlots,
        level = reportTitleLevel + 1
    )