###################################################
# s03_summary_excluded_data.R
# Description: Summary of data excluded with C="C"
# Dependencies: s01_dataset_preparation.R / s01.RData
###################################################

# Settings to knit in top directory:
# Everything after this chunk works with paths relative top level
library(rprojroot)
knitr::opts_knit$set(root.dir=find_root(has_file("OpenProject.Rproj"))) 
knitr::opts_chunk$set(echo=F)

# Note: R markdown opens a new R session, your global environment is not available.

Load the dataset as prepared by "s01_dataset_preparation.R"

# -----------------------------------------------
# Prepare environment
# -----------------------------------------------
source(file = file.path("./Scripts","Setup","setup01_rEnvironment.R"))
load(file = file.path("./Scripts","s01.RData"))

The outputs from this scripts are:

  1. Summary table of number of excluded data and reasons (split by >/< LLOQ)

  2. Table listing each excluded record

Both are intended to be used in the report (appendices)

Summary of excluded subjects, concentrations and doses

Number of subjects excluded and total number in final data:

# data frame 'rawdata' contains all rows. 
# In the data frame 'data' rows with c="C" has been removed
rawdata %>% summarize(nSubTot = length(unique(NMSEQSID)))

data    %>% summarize(nSubIncl = length(unique(NMSEQSID)))

Total number of excluded rows:

excluded <- rawdata %>% filter(C=="C")
nrow(excluded)

excl_doses <- excluded %>% filter(EVID==1)
excl_conc  <- excluded %>% filter(EVID==0)

Out of which r nrow(excl_doses) rows are doses and r nrow(excl_conc) concentrations.

Is anything else than EVID=1 or EVID=0 that is excluded?

test <- sum(nrow(excl_doses), nrow(excl_conc)) !=  nrow(excluded)

check_message(test)

How much of the excluded concentration data is missing or BLQ?

excl_conc_missing <- 
  excl_conc %>% filter(MDV==1)

nrow(excl_conc_missing) # rows
100*( nrow(excl_conc_missing) / nrow(excl_conc) )  # percent

Summarize excluded data

Review reasons for exclusion

Excluded doses

unique(excl_doses[,"COMMENT"])

Excluded concentrations

unique(excl_conc[,"COMMENT"])

Primary reason for exclusion split by BLQ/non-BLQ:

# In this dataset there are not multiple comments in one row/cell. 
# If that is the case, you need to be careful not to count it twice/
# overwrite the reason/decide which is the primary cause for exclusion.

# Add flag
excl_conc <- excl_conc %>% 
  mutate(REASON = ifelse(str_detect(COMMENT, "Pre first dose sample"), 
                         "Pre first dose sample", NA),
         REASON = ifelse(str_detect(COMMENT, "Randomly selected for illustration"), 
                         "Randomly selected for illustration", REASON)
         )

# Total n concentration 
total_conc <- rawdata %>% filter(EVID==0)

# Generate summary and tweak to presentable table
summaryExclConc <- 
  excl_conc %>%
  group_by(REASON, BLQ) %>% 
  summarize(n = n(), 
            `(%)` = round(100 * (n / nrow(total_conc)), digits=1)) %>% 
  ungroup() %>% 
  mutate(BLQ = factor(BLQ, levels = c(0,1), labels = c("Non-BLQ", "BLQ")))

summaryExclConc
# Write summary table to file
if(params$print_results){
  write.csv(summaryExclConc,
            file = file.path(directories[["res_eda_dir"]], "omittedConcDataSummary.csv"),
            row.names = F)
}

# Full listing to file
listingExcluded <- 
  excl_conc %>%  # in this case using excl_conc as no doses were excluded
  # excluded %>% 
  select(NMSEQSID,TIME,TAPD,DV,AMT,EVID,MDV,BLQ,DOSE,REASON)

if(params$print_results){
  write.csv(listingExcluded,
            file = file.path(directories[["res_eda_dir"]], "omittedDataListing.csv"),
            row.names = F)
}

How much of the non-BLQ excluded data are from sparse samples:

# Note, in this example the sparse data is those without a OCC assignment
summarySparse <- 
  excl_conc %>%
  filter(is.na(OCC) & BLQ==0) %>% 
  group_by(REASON) %>% 
  summarize(n = n(), 
            `(%)` = round(100 * (n / nrow(total_conc) ), digits=1))
summarySparse

Review of comments not leading to exclusion

# Use the dataset 'data' for this since all C=="C" is commented out
unique(data$COMMENT)

data %>%
  filter(! (COMMENT %in% c(" ", "") | is.na(COMMENT))) %>% 
  group_by(EVID, COMMENT) %>% 
  summarize(n = n(), 
            percentOfTotal = round(100 * (n / nrow(data)), digits=2))


AstraZeneca/pmworkbench documentation built on Nov. 18, 2019, 12:51 a.m.