################################################### # s07_nm_datasets.Rmd # Description: Dataset preparation for NONMEM # Dependencies: s01_dataset_preparation.R / s01.RData ################################################### # Settings to knit in top directory: # Everything after this chunk works with paths relative top level library(rprojroot) knitr::opts_knit$set(root.dir=find_root(has_file("OpenProject.Rproj"))) knitr::opts_chunk$set(echo=F) # Note: R markdown opens a new R session, your global environment is not available.
This script uses the data.frame "rawdata". That is, all rows with C=="C" are included (as required by regulatory agencies). These rows are excluded in the NONMEM scripts instead.
# ----------------------------------------------- # Prepare environment # ----------------------------------------------- source(file = file.path("./Scripts","Setup","setup01_rEnvironment.R")) load(file = file.path("./Scripts","s01.RData"))
Are the datasets being outputted to file?
params$print_csv
# List numeric columns to include for nonmem dataset nm_columns <- c("C","NMSEQSID","TIME","TAPD","AMT","DV","MDV","EVID", "ADDL","II","CMT","BLQ","OCC","STUDYID","DOSE","DAY", "AGE","BCRCL","BWT","SEXM","RACE") # use rawdata: contains all "C" and numeric versions of columns nm_data <- rawdata %>% select(nm_columns) # Comment out rows with negative time (pre-first dose samples) nm_data <- nm_data %>% mutate(C = ifelse(TAFD < 0, "C", C)) # rename NMSEQSID to ID nm_data <- nm_data %>% rename(ID = NMSEQSID) # rename NMSEQSID to ID and TAFD to TIME nm_data <- nm_data %>% rename(ID = NMSEQSID, TIME = TAFD) # use ln transformed data? # nm_data <- nm_data %>% # rename(NORMDV = DV, # DV = LNDV) ## Add a RATE column with values -2 at dosing records # for estimation of zero order input (for models with zero order input) nm_data <- nm_data %>% mutate(RATE = ifelse(!is.na(AMT), -2, NA))
## Categorical covariates: model as separate groups and potentially merge with other nm_data <- nm_data %>% mutate(BRENAL = ifelse(is.na(BRENAL), -99, BRENAL)) ## Continous covariates: impute based on median (stratified by sex if appropriate) impute_baseline <- nm_data %>% filter(!duplicated(ID)) nm_data <- nm_data %>% mutate( # Height BHT = ifelse(is.na(BHT) & SEXM==0 , median(impute_baseline$BHT[impute_baseline$SEXM==0], na.rm=T), BHT), BHT = ifelse(is.na(BHT) & SEXM==1, median(impute_baseline$BHT[impute_baseline$SEXM==1], na.rm=T), BHT), # Weight BWT = ifelse(is.na(BWT) & SEXM==0 , median(impute_baseline$BWT[impute_baseline$SEXM==0], na.rm=T), BWT), BWT = ifelse(is.na(BWT) & SEXM==1, median(impute_baseline$BWT[impute_baseline$SEXM==1], na.rm=T), BWT), # Calculate BMI based on imputed (and reported) values BBMI = ifelse(is.na(BBMI), nm_data$BWT[is.na(nm_data$BBMI)] / (nm_data$BHT[is.na(nm_data$BBMI)]/100)^2, BBMI), # # Calculate BSA based on imputed (and reported) values (Dubois&Dubois) # BBSA = ifelse(is.na(BBSA), # 0.007184 * ((nm_data$BHT[is.na(nm_data$BBSA)])^0.725) * # (nm_data$BWT[is.na(nm_data$BBSA)]^0.425), BBSA), # # CrCL # BCRCL = ifelse(is.na(BCRCL) & SEXM==0, # median(impute_baseline$BCRCL[impute_baseline$SEXM==0], na.rm=T), # BCRCL), # BCRCL = ifelse(is.na(BCRCL) & SEXM==1, # median(impute_baseline$BCRCL[impute_baseline$SEXM==1], na.rm=T), # BCRCL), # eGFR BEGFR = ifelse(is.na(BEGFR) & SEXM==0, median(impute_baseline$BEGFR[impute_baseline$SEXM==0], na.rm=T), BEGFR), BEGFR = ifelse(is.na(BEGFR) & SEXM==1, median(impute_baseline$BEGFR[impute_baseline$SEXM==1], na.rm=T), BEGFR), # SCR BSCR = ifelse(is.na(BSCR), median(impute_baseline$BSCR, na.rm=T), BSCR) )
The dataset names are saved in variables that are used to update the nonmem control-files later on.
# Dataset name and path to location nm_data_filename <- paste0(dv_name,"_nm_", delivery_date,".csv") out_file <- file.path(directories[["derived_data_dir"]], nm_data_filename) # Write dataset if(params$print_csv){ write.csv(nm_data, file=out_file, row.names=F, quote=F, na = ".") }
nm_data_par <- nm_data # Duplicate all dose records for estimation of zero-order input into central (cmt==2) doses <- nm_data_par %>% filter(EVID==1) %>% mutate(CMT=2) # Modify record in dataset for which we use Ka nm_data_par <- nm_data_par %>% mutate(RATE = ifelse(EVID==1, NA, RATE)) # Merge duplicated doses with data and sort by ID and time nm_data_par <- rbind(nm_data_par, doses) %>% arrange(ID, TIME) # Dataset name and path to location nm_data_par_filename <- paste0(dv_name, "_nm_", delivery_date, "_par",".csv") out_file <- file.path(directories[["derived_data_dir"]], nm_data_par_filename) # Write dataset if(params$print_csv){ write.csv(nm_data_par, file=out_file, row.names=F, quote=F, na = ".") }
## Add dummy variables at time of dosing in order ## to aviod negative TAD for transit compartment model nm_data_transit <- nm_data # Use dose records and modify to merge right before next dose transit_dummy <- nm_data_transit %>% # not needed for first dose filter(!is.na(AMT) & TIME !=0) %>% # change evid to 2, the time to 10 min before the dose, AMT to 0 # only baseline values for covariates so does not need to change these mutate(EVID = 2, AMT = NA, TIME = TIME - (10/60), TAPD = NA, CMT = 2) # Bind with the original data and sort by ID and time nm_data_transit <- rbind(nm_data_transit, transit_dummy) %>% arrange(ID, TIME) rownames(nm_data_transit) <- NULL # Dataset name and path to location nm_data_transit_filename <- paste0(dv_name, "_nm_", delivery_date, "_transit",".csv") out_file <- file.path(directories[["derived_data_dir"]], nm_data_transit_filename) # Write dataset if(params$print_csv){ write.csv(nm_data_transit, file=out_file, row.names=F, quote=F, na = ".") }
# Save dataframe with number of datasets and modifications done. nm_datasets <- data.frame(filename = c(nm_data_filename, nm_data_par_filename,nm_data_transit_filename), Description = c("Dataset prepared for NONMEM use: (i) only numerical columns selected, (ii) missing covariates imputed, (iii) NMSEQSID renamed to ID and TAFD renamed to TIME, (iv) RATE column added to estimate duration of zero-order absorption", "Duplicated dose record added for estimation of sequential or parallel zero- and first order absorption", "Dummy variable added at 10 min before each dose in order to avoid negative time after dose results for transit compartment model")) kable(nm_datasets)
# Write dataset if(params$print_csv){ write.csv(nm_datasets, file=file.path(directories[["derived_data_dir"]], "prepared_datasets.csv"), row.names=F, quote=T) }
# Save environment to be used in developement scripts rm("params") # remove rmarkdown params or it will cause issues in the next script # this will generate in error but all the output is generated as should. save.image(file = file.path(directories[["scripts_dir"]],"s06.RData"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.