################################################### # s03_summary_excluded_data.R # Description: Summary of data excluded with C="C" # Dependencies: s01_dataset_preparation.R / s01.RData ################################################### # Settings to knit in top directory: # Everything after this chunk works with paths relative top level library(rprojroot) knitr::opts_knit$set(root.dir=find_root(has_file("OpenProject.Rproj"))) knitr::opts_chunk$set(echo=F) # Note: R markdown opens a new R session, your global environment is not available.
Load the dataset as prepared by "s01_dataset_preparation.R"
# ----------------------------------------------- # Prepare environment # ----------------------------------------------- source(file = file.path("./Scripts","Setup","setup01_rEnvironment.R")) load(file = file.path("./Scripts","s01.RData"))
The outputs from this scripts are:
Summary table of number of excluded data and reasons (split by >/< LLOQ)
Table listing each excluded record
Both are intended to be used in the report (appendices)
Number of subjects excluded and total number in final data:
# data frame 'rawdata' contains all rows. # In the data frame 'data' rows with c="C" has been removed rawdata %>% summarize(nSubTot = length(unique(NMSEQSID))) data %>% summarize(nSubIncl = length(unique(NMSEQSID)))
Total number of excluded rows:
excluded <- rawdata %>% filter(C=="C") nrow(excluded) excl_doses <- excluded %>% filter(EVID==1) excl_conc <- excluded %>% filter(EVID==0)
Out of which r nrow(excl_doses)
rows are doses and r nrow(excl_conc)
concentrations.
Is anything else than EVID=1 or EVID=0 that is excluded?
test <- sum(nrow(excl_doses), nrow(excl_conc)) != nrow(excluded) check_message(test)
How much of the excluded concentration data is missing or BLQ?
excl_conc_missing <- excl_conc %>% filter(MDV==1) nrow(excl_conc_missing) # rows 100*( nrow(excl_conc_missing) / nrow(excl_conc) ) # percent
unique(excl_doses[,"COMMENT"])
unique(excl_conc[,"COMMENT"])
Primary reason for exclusion split by BLQ/non-BLQ:
# In this dataset there are not multiple comments in one row/cell. # If that is the case, you need to be careful not to count it twice/ # overwrite the reason/decide which is the primary cause for exclusion. # Add flag excl_conc <- excl_conc %>% mutate(REASON = ifelse(str_detect(COMMENT, "Pre first dose sample"), "Pre first dose sample", NA), REASON = ifelse(str_detect(COMMENT, "Randomly selected for illustration"), "Randomly selected for illustration", REASON) ) # Total n concentration total_conc <- rawdata %>% filter(EVID==0) # Generate summary and tweak to presentable table summaryExclConc <- excl_conc %>% group_by(REASON, BLQ) %>% summarize(n = n(), `(%)` = round(100 * (n / nrow(total_conc)), digits=1)) %>% ungroup() %>% mutate(BLQ = factor(BLQ, levels = c(0,1), labels = c("Non-BLQ", "BLQ"))) summaryExclConc
# Write summary table to file if(params$print_results){ write.csv(summaryExclConc, file = file.path(directories[["res_eda_dir"]], "omittedConcDataSummary.csv"), row.names = F) } # Full listing to file listingExcluded <- excl_conc %>% # in this case using excl_conc as no doses were excluded # excluded %>% select(NMSEQSID,TIME,TAPD,DV,AMT,EVID,MDV,BLQ,DOSE,REASON) if(params$print_results){ write.csv(listingExcluded, file = file.path(directories[["res_eda_dir"]], "omittedDataListing.csv"), row.names = F) }
# Note, in this example the sparse data is those without a OCC assignment summarySparse <- excl_conc %>% filter(is.na(OCC) & BLQ==0) %>% group_by(REASON) %>% summarize(n = n(), `(%)` = round(100 * (n / nrow(total_conc) ), digits=1)) summarySparse
# Use the dataset 'data' for this since all C=="C" is commented out unique(data$COMMENT) data %>% filter(! (COMMENT %in% c(" ", "") | is.na(COMMENT))) %>% group_by(EVID, COMMENT) %>% summarize(n = n(), percentOfTotal = round(100 * (n / nrow(data)), digits=2))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.