################################################### # s05_eda_conc_time.Rmd # Description: Exploratory data analysis of concentrations vs time for population PK analysis # Dependencies: s01_datasetPrep.R / s01.RData ################################################### # Settings to knit in top directory: # Everything after this chunk works with paths relative top level library(rprojroot) knitr::opts_knit$set(root.dir=find_root(has_file("OpenProject.Rproj"))) knitr::opts_chunk$set(echo=F) # Note: R markdown opens a new R session, your global environment is not available.
This script uses the data.frame "data", loaded from s01.RData. That is, all rows with C=="C" has been excluded for exploratory data analysis.
# ----------------------------------------------- # Prepare environment # ----------------------------------------------- source(file = file.path("./Scripts","Setup","setup01_rEnvironment.R")) load(file = file.path("./Scripts","s01.RData"))
Are the plots and tables also being written to file?
params$print_results
Number of observations in the dataset by BLQ.
conc_data %>% group_by(BLQ) %>% summarize(n = n()) %>% mutate(percent = signif( 100 * n / sum(n), digits = 3))
Number of samples and occasions per subject (with/without BLQ)
# total conc_per_pat <- conc_data %>% group_by(NMSEQSID) %>% summarize(nDV=length(DV), nOcc = ifelse(all(is.na(OCC)), 0, max(as.numeric(as.character(OCC)), na.rm=T)), maxTIME = max(TIME, na.rm=T)) %>% ungroup() %>% select(nDV, nOcc, maxTIME) summary(conc_per_pat) # >BLQ conc_per_pat_noBLQ <- conc_data %>% filter(BLQ == "Non-BLQ") %>% group_by(NMSEQSID) %>% summarize(nDV=length(DV), nOcc = ifelse(all(is.na(OCC)), 0, max(as.numeric(as.character(OCC)), na.rm=T)), maxTIME = max(TIME, na.rm=T)) %>% ungroup() %>% select(nDV, nOcc, maxTIME) summary(conc_per_pat_noBLQ)
Number of individuals and samples stratified by XXX
# The pre-writted code is stratifying on study. Can easily be changes to e.g. dose level # Please note: if you are stratifying on DOSE and subjects # are gived multiple doses they are counted several times # Including BLQ samples conc_per_strata <- conc_data %>% group_by(STUDYID) %>% summarize(nSubjects = length(unique(NMSEQSID)), nConc = length(DV)) %>% ungroup() %>% mutate(pConcStrat = signif(100 * nConc / sum(nConc), digits=3)) # Excluding BLQ samples conc_per_strata_noBLQ <- conc_data %>% filter(BLQ == "Non-BLQ") %>% group_by(STUDYID) %>% summarize(nSubjects = length(unique(NMSEQSID)), nConc = length(DV)) %>% ungroup() %>% mutate(pConcStrat = signif(100 * nConc / sum(nConc), digits=3)) # % BQL per strata conc_per_strata <- conc_per_strata %>% bind_cols(nConc_NoBlq = conc_per_strata_noBLQ$nConc) %>% mutate(pBlqByStrat = signif(100 - (100 * nConc_NoBlq / nConc), digits=3)) %>% select(STUDYID, nSubjects, nConc, pConcStrat, pBlqByStrat) %>% arrange(rev(STUDYID)) %>% rename(Study = STUDYID, `Subjects (n)` = nSubjects, `Concentrations (n)` = nConc, `Concentrations (%)` = pConcStrat, `BLQ (%)` = pBlqByStrat) kable(conc_per_strata, caption = "Number of individuals and samples per study")
# --------- Save to file if(params$print_results){ write.csv(conc_per_strata, file = file.path(directories[["res_eda_dir"]], "conc_by_strat.csv"), row.names = F) }
Add code for histograms of BLQ/non-blq histograms for TAD
Lines connect data from one occasion within a subject. Colour indicate subject. Points indicate measured data. Dashed blue line show the lower limit of quantification. BLQ data is prited as LLOQ/2.
# Used to set the same x-axes in plots range_axes <- conc_data %>% summarize(minDV = min(DV, na.rm = T), maxDV = max(DV, na.rm = T), maxTIME = max(TIME, na.rm = T), maxTAPD = max(TAPD, na.rm = T)) # List of plots plots <- vector("list", length(conc_data_strat_split)) for(i in 1:length(conc_data_strat_split)){ p <- arrangeGrob( # linear gg_conc_time(conc_data_strat_split[[i]], x=TIME, y=DV, color=NMSEQSID, occ=OCC) + facet_wrap(~STRATSPLIT) + coord_cartesian(xlim=c(0, range_axes$maxTIME)) + guides(colour="none", shape="none") + labs(x=labs_TIME, y=labs_conc), # log gg_conc_time(conc_data_strat_split[[i]], x=TIME, y=DV, color=NMSEQSID, occ=OCC) + facet_wrap(~STRATSPLIT) + coord_cartesian(xlim=c(0, range_axes$maxTIME)) + guides(colour="none", shape="none") + labs(x=labs_TIME, y=labs_conc) + scale_y_log10(), nrow=2) plots[[i]] <- p } # Print all the elements in the list on a separate page walk(plots, grob_draw) # Print to file if(params$print_results){ pdf(file = file.path(directories[["res_eda_dir"]], paste0("conc_time_by_strat_", delivery_date,".pdf")), height=6.5, width=8.5) walk(plots, grob_draw) dev.off() }
Right: un-transformed scale. Left: log-transformed y-axis. Lines connect data from one occasion within a subject. Colour indicate subject. Points indicate measured data. Dashed blue line show the lower limit of quantification. BLQ data is prited as LLOQ/2.
# List of plots plots <- vector("list", length(conc_data_strat_split)) for(i in 1:length(conc_data_strat_split)){ p <- arrangeGrob( # linear gg_conc_time(conc_data_strat_split[[i]], x=TAPD, y=DV, color=NMSEQSID, occ=OCC) + facet_wrap(~STRATSPLIT) + coord_cartesian(xlim=c(0, range_axes$maxTAPD)) + guides(colour="none", shape="none") + labs(x=labs_TAPD, y=labs_conc), # log gg_conc_time(conc_data_strat_split[[i]], x=TAPD, y=DV, color=NMSEQSID, occ=OCC) + facet_wrap(~STRATSPLIT) + coord_cartesian(xlim=c(0, range_axes$maxTAPD)) + guides(colour="none", shape="none") + labs(x=labs_TAPD, y=labs_conc) + scale_y_log10(), nrow=1) plots[[i]] <- p } # Print all the elements in the list on a separate page walk(plots, grob_draw) # Print to file if(params$print_results){ pdf(file = file.path(directories[["res_eda_dir"]], paste0("conc_vs_tad_by_strat_", delivery_date,".pdf")), height = 3, width = 8.5) walk(plots, grob_draw) dev.off() }
Right: un-transformed scale. Left: log-transformed y-axis. Lines connect data from one occasion within a subject. Colour indicate subject. Points indicate measured data. Dashed blue line show the lower limit of quantification. BLQ data is prited as LLOQ/2.
# If you want to zoom in on a specific time scale e.g. absorption phase or first 12/24 hr x_max <- 12 # List of plots plots <- vector("list", length(conc_data_strat_split)) for(i in 1:length(conc_data_strat_split)){ p <- arrangeGrob( # linear gg_conc_time(conc_data_strat_split[[i]], x=TAPD, y=DV, color=NMSEQSID, occ=OCC) + facet_wrap(~STRATSPLIT) + coord_cartesian(xlim=c(0, x_max)) + guides(colour="none", shape="none") + labs(x=labs_TAPD, y=labs_conc), # log gg_conc_time(conc_data_strat_split[[i]], x=TAPD, y=DV, color=NMSEQSID, occ=OCC) + facet_wrap(~STRATSPLIT) + coord_cartesian(xlim=c(0, x_max)) + guides(colour="none", shape="none") + labs(x=labs_TAPD, y=labs_conc) + scale_y_log10(), nrow=1) plots[[i]] <- p } # Print all the elements in the list on a separate page walk(plots, grob_draw) if(params$print_results){ pdf(file=file.path(directories[["res_eda_dir"]], paste0("conc_vs_tad_by_strat_", x_max, "_", delivery_date,".pdf")), height=3, width=8.5) walk(plots, grob_draw) dev.off() }
Right: un-transformed scale. Left: log-transformed y-axis. Lines connect data from one occasion within a subject. Colour indicate subject. Points indicate measured data. Dashed blue line show the lower limit of quantification. BLQ data is prited at LLOQ/2.
# Zoom in on absorption phase x_max <- 4 # List of plots plots <- vector("list", length(conc_data_strat_split)) for(i in 1:length(conc_data_strat_split)){ p <- arrangeGrob( # linear gg_conc_time(conc_data_strat_split[[i]], x=TAPD, y=DV, color=NMSEQSID, occ=OCC) + facet_wrap(~STRATSPLIT) + coord_cartesian(xlim=c(0, x_max)) + guides(colour="none", shape="none") + labs(x=labs_TAPD, y=labs_conc), # log gg_conc_time(conc_data_strat_split[[i]], x=TAPD, y=DV, color=NMSEQSID, occ=OCC) + facet_wrap(~STRATSPLIT) + coord_cartesian(xlim=c(0, x_max)) + guides(colour="none", shape="none") + labs(x=labs_TAPD, y=labs_conc) + scale_y_log10(), nrow=1) plots[[i]] <- p } # Print all the elements in the list on a separate page walk(plots, grob_draw) if(params$print_results){ pdf(file=file.path(directories[["res_eda_dir"]], paste0("absorption_phase_", x_max,"_", delivery_date,".pdf")), height=3, width=8.5) walk(plots, grob_draw) dev.off() }
Right: un-transformed scale. Left: log-transformed y-axis. Lines connect data from one occasion within a subject. Colour indicate subject. Points indicate measured data. Dashed blue line show the lower limit of quantification. BLQ data is prited at LLOQ/2.
# First 24 hours after dose, compare single and steady state # (Requires that DOSEFLAG has been added to the dataset in s01_dataset_preparation.R) x_max <- 24 # List of plots plots <- vector("list", length(conc_data_strat_split)) for(i in 1:length(conc_data_strat_split)){ # remove sparse occasions (DOSEFLAG = NA) dat <- conc_data_strat_split[[i]] %>% filter(!is.na(DOSEFLAG)) p <- arrangeGrob( # linear gg_conc_time(dat, x=TAPD, y=DV, color=NMSEQSID, occ=OCC) + facet_wrap(~STRATSPLIT+DOSEFLAG, nrow = 2) + coord_cartesian(xlim=c(0, x_max)) + guides(colour="none", shape="none") + labs(x=labs_TAPD, y=labs_conc), # log gg_conc_time(dat, x=TAPD, y=DV, color=NMSEQSID, occ=OCC) + facet_wrap(~STRATSPLIT+DOSEFLAG, nrow = 2) + coord_cartesian(xlim=c(0, x_max)) + guides(colour="none", shape="none") + labs(x=labs_TAPD, y=labs_conc) + scale_y_log10(), nrow=1) plots[[i]] <- p } # Print all the elements in the list on a separate page walk(plots, grob_draw) # Print to file if(params$print_results){ pdf(file = file.path(directories[["res_eda_dir"]], paste0("single_vs_multiple_dose_", x_max,"_", delivery_date,".pdf")), height=6, width=8.5) walk(plots, grob_draw) dev.off() }
The section below generates a list of individual plots with different combinations of the following settings:
Concentration versus time after first dose or time after dose
Log scale and untransformed scale
Free/fixed scales for axes across individuals
Not all plots are going to be useful, you can just comment out/delete the ones you do not want to keep.
# 1. Conc vs TAFD. Same axes across subjects. individualPlots <- list( gg_title_plot("Concentration vs. time after first dose \n Same axes across subjects")) p1 <- vector("list", length(conc_data_id_splits)) for(i in 1:(length(conc_data_id_splits))){ p <- gg_conc_time(conc_data_id_splits[[i]], x=TAFD, y=DV, color=REGIMEN, occ=OCC) + facet_wrap(~OSID+COHORT, labeller="label_both", nrow=3, ncol=4) + # set scales for axes coord_cartesian(ylim=c(0, range_axes$maxDV), xlim=c(0, range_axes$maxTAFD)) + guides(shape="none") + labs(x=labs_TAFD, y=labs_conc) p1[[i]] <- p } individualPlots <- c(individualPlots, p1) # 2. Log Conc vs. TAFD. Same axes across subjects individualPlots <- c(individualPlots, list( gg_title_plot("Concentrations vs. time after first dose \n Same axes across subjects \n\n Semi-log"))) p1 <- vector("list", length(conc_data_id_splits)) for(i in 1:(length(conc_data_id_splits))){ p <- gg_conc_time(conc_data_id_splits[[i]], x=TAFD, y=DV, color=REGIMEN, occ=OCC) + facet_wrap(~OSID+COHORT, labeller="label_both", nrow=3, ncol=4) + # set scales for axes coord_cartesian(ylim=c(range_axes$minDV, range_axes$maxDV), xlim=c(0, range_axes$maxTAFD)) + scale_y_log10(breaks=c(1,10,100,1000)) + guides(shape="none") + labs(x=labs_TAFD, y=labs_conc) p1[[i]] <- p } individualPlots <- c(individualPlots, p1) # 3. Conc vs TAFD. Free scales on both axes individualPlots <- c(individualPlots, list( gg_title_plot("Concentration. vs time after first dose \n Free scales on axes"))) p1 <- vector("list", length(conc_data_id_splits)) for(i in 1:(length(conc_data_id_splits))){ p <- gg_conc_time(conc_data_id_splits[[i]], x=TAFD, y=DV, color=REGIMEN, occ=OCC) + facet_wrap(~OSID+COHORT, labeller="label_both", nrow=3, ncol=4, scales="free") + guides(shape="none") + labs(x=labs_TAFD, y=labs_conc) p1[[i]] <- p } individualPlots <- c(individualPlots, p1) # 4. Log Conc vs TAFD. Free scales on axes individualPlots <- c(individualPlots,list( gg_title_plot("Concentration vs. time after first dose \n Free scales on axes \n\n Semi-log"))) p1 <- vector("list", length(conc_data_id_splits)) for(i in 1:(length(conc_data_id_splits))){ p <- gg_conc_time(conc_data_id_splits[[i]], x=TAFD, y=DV, color=REGIMEN, occ=OCC) + facet_wrap(~OSID+COHORT, labeller="label_both", nrow=3, ncol=4, scales="free") + scale_y_log10(breaks=c(1,10,100,1000)) + guides(shape="none") + labs(x=labs_TAFD, y=labs_conc) p1[[i]] <- p } individualPlots <- c(individualPlots, p1) # 5. Conc vs. TAPD. Same y-axis across subjects. individualPlots <- c(individualPlots, list( gg_title_plot("Concentration vs. time after dose \n Same axes across subjects"))) p1 <- vector("list", length(conc_data_id_splits)) for(i in 1:(length(conc_data_id_splits))){ p <- gg_conc_time(conc_data_id_splits[[i]], x=TAPD, y=DV, color=REGIMEN, occ=OCC) + facet_wrap(~OSID+COHORT, labeller="label_both", nrow=3, ncol=4) + # set scales for axes coord_cartesian(ylim=c(0, range_axes$maxDV), xlim=c(0, range_axes$maxTAPD)) + guides(shape="none") + labs(x=labs_TAPD, y=labs_conc) p1[[i]] <- p } individualPlots <- c(individualPlots, p1) # 6. Log Conc vs. TAPD. Same y-axis across subjects individualPlots <- c(individualPlots,list( gg_title_plot("Concentration vs. time after dose \n Same axes across subjects \n\n Semi-log"))) p1 <- vector("list", length(conc_data_id_splits)) for(i in 1:(length(conc_data_id_splits))){ p <- gg_conc_time(conc_data_id_splits[[i]], x=TAPD, y=DV, color=REGIMEN, occ=OCC) + facet_wrap(~OSID+COHORT, labeller="label_both", nrow=3, ncol=4) + # set scales for axes coord_cartesian(ylim=c(range_axes$minDV, range_axes$maxDV), xlim=c(0, range_axes$maxTAPD)) + scale_y_log10(breaks=c(1,10,100,1000)) + guides(shape="none") + labs(x=labs_TAPD, y=labs_conc) p1[[i]] <- p } individualPlots <- c(individualPlots, p1) # 7. Conc vs TAPD - Free scales individualPlots <- c(individualPlots,list( gg_title_plot("Concentration vs. time after dose \n Free scales on axes"))) p1 <- vector("list", length(conc_data_id_splits)) for(i in 1:(length(conc_data_id_splits))){ p <- gg_conc_time(conc_data_id_splits[[i]], x=TAPD, y=DV, color=REGIMEN, occ=OCC) + facet_wrap(~OSID+COHORT, labeller="label_both", nrow=3, ncol=4, scales="free") + guides(shape="none") + labs(x=labs_TAPD, y=labs_conc) p1[[i]] <- p } individualPlots <- c(individualPlots, p1) ## 8. Log conc vs TAPD - Free scales individualPlots <- c(individualPlots,list( gg_title_plot("Concentration vs. time after dose \n Free scales on axes \n Semi-log"))) p1 <- vector("list", length(conc_data_id_splits)) for(i in 1:(length(conc_data_id_splits))){ p <- gg_conc_time(conc_data_id_splits[[i]], x=TAPD, y=DV, color=REGIMEN, occ=OCC) + facet_wrap(~OSID+COHORT, labeller="label_both", nrow=3, ncol=4, scales="free") + scale_y_log10(breaks=c(1,10,100,1000)) + guides(shape="none") + labs(x=labs_TAPD, y=labs_conc) p1[[i]] <- p } individualPlots <- c(individualPlots, p1) # 9. Log Conc vs. TAPD. Same y-axis across subjects, TAD <12 h x_max <- 12 individualPlots <- c(individualPlots,list( gg_title_plot(paste("Concentration vs. time after dose \n TAD <", x_max ,"\n\n Semi-log")))) p1 <- vector("list", length(conc_data_id_splits)) for(i in 1:(length(conc_data_id_splits))){ p <- gg_conc_time(conc_data_id_splits[[i]], x=TAPD, y=DV, color=REGIMEN, occ=OCC) + facet_wrap(~OSID+COHORT, labeller="label_both", nrow=3, ncol=4) + # set scales for axes coord_cartesian(ylim=c(range_axes$minDV, range_axes$maxDV), xlim=c(0, x_max)) + scale_y_log10(breaks=c(1,10,100,1000)) + guides(shape="none") + labs(x=labs_TAPD, y=labs_conc) p1[[i]] <- p } individualPlots <- c(individualPlots, p1) ## This list takes quite some time to print... # Print all the elements in the list on a separate page walk(individualPlots, grob_draw) if(params$print_results){ pdf(file = file.path(directories[["res_eda_dir"]], paste0("individual_conc_vs_time_", delivery_date,".pdf")), height=8.5, width=11) walk(individualPlots, grob_draw) dev.off() }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.