Nothing
#Copyright (c) 2009-2022 Sebastien Bihorel
#All rights reserved.
#
#This file is part of scaRabee.
#
# scaRabee is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# scaRabee is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with scaRabee. If not, see <http://www.gnu.org/licenses/>.
#
scarabee.read.data <- function(files=NULL, method=NULL){
## Checks inputs
if (is.null(files)){
stop('files argument is NULL.')
}
if (is.null(files$data)){
stop('files argument does not have any data level or files$data is NULL.')
}
if (!file.exists(files$data)){
stop('data file does not exist.')
}
if (is.null(method)){
stop('method argument is NULL.')
}
if (length(method)!=1 | !method%in%c('population','subject')){
stop('method argument should be \'population\' or \'subject\'.')
}
## Read-in data file
cat('Processing data file:\n')
data <- read.csv(file=files$data,
header=TRUE,
as.is=TRUE,
na.string=c('.',NA))
# Check data names
if (paste(names(data),collapse='')!=paste(toupper(names(data)),collapse=''))
stop('headers in data file should be in upper case.')
names(data) <- toupper(names(data))
## Check dimension
if (size(data,1)<1)
stop('the data file does not contain any data.')
if (length(names(data))!=length(unique(names(data))))
stop('the data file should not contain duplicate variables names.')
## Check variable headers
if (!'OMIT'%in%names(data))
data <- cbind(data.frame(omit=0),data)
required.vars <- c('OMIT','TRT','ID','TIME','AMT','RATE',
'CMT','EVID','DV','DVID','MDV')
if(any(is.na(match(required.vars,names(data))))){
which.missing <- required.vars[is.na(match(required.vars,names(data)))]
stop(paste('the dataset does not contain the required variable(s):\n ',
paste(which.missing,collapse=' '),sep=''))
}
## Check for reserved variable names
reserved.names <- c('firstTIME','newTIME')
if (any(names(data)%in%reserved.names)){
stop(paste('the dataset should not contain variables using',
'reserved names:\n ',
paste(reserved.names,collapse=' '),sep=''))
}
## Check numerical variables
num.vars <- required.vars[-c(1,2)]
numvar.class <- sapply(num.vars,function(x,...) class(data[,x]),data)
if (!all(numvar.class=="numeric" | numvar.class=="integer")){
nonnumvars.index <- which(!(numvar.class=="numeric" |
numvar.class=="integer"))
stop(paste('\ non numerical values are contained in the ',
'following variable(s):\n ',
paste(num.vars[nonnumvars.index],collapse=' '),sep=''))
}
## Coerced reserved variables to integers
data$OMIT <- as.integer(data$OMIT)
data$ID <- as.integer(data$ID)
data$CMT <- as.integer(data$CMT)
data$EVID <- as.integer(data$EVID)
data$MDV <- as.integer(data$MDV)
data$DVID <- as.integer(data$DVID)
## Check for illegal missing
if (sum(is.na(data$ID))>0)
stop('ID variable cannot be NA or \'.\'.')
if (sum(is.na(data$CMT))>0)
stop('CMT variable cannot be NA or \'.\'.')
if (sum(is.na(data$EVID))>0)
stop('EVID variable cannot be NA or \'.\'.')
if (sum(is.na(data$MDV))>0)
stop('MDV variable cannot be NA or \'.\'.')
## Check ID column contain sequence of continuous integers
uniqueIDs <- unique(data$ID)
minID <- min(uniqueIDs)
maxID <- max(uniqueIDs)
if (minID!=1 | length(uniqueIDs)!=maxID)
stop(paste('the subject IDs do not consist of a ',
'continuous sequence of integers starting from 1.',sep=''))
## Check EVID
uniqueEVIDs <- sort(unique(data$EVID))
if (!all(uniqueEVIDs%in%c(0,1)))
stop('EVID variable should be set to 0 or 1.')
## Check DVID content
if (length(which(is.na(data$DVID)))> 0)
data$DVID[which(is.na(data$DVID))] <- 0
dosingDVID <- unique(data$DVID[which(data$EVID==1)])
obsDVID <- unique(data$DVID[which(data$EVID==0)])
if (length(which(data$EVID==1)) > 0 &&
(length(dosingDVID)!=1 | dosingDVID[1]!=0))
stop('DVID variable must be set to 0 for dosing events.')
if (any(obsDVID[1]==0))
stop('DVID variable must not be set to 0 for observation events.')
## Check MDV
uniqueMDVs <- sort(unique(data$MDV))
if (!all(uniqueMDVs%in%c(0,1)))
stop('MDV variable should be set to 0 or 1.')
dosingMDV <- unique(data$MDV[which(data$EVID==1)])
if (length(which(data$EVID==1)) > 0 &&
(length(dosingMDV)!=1 | dosingMDV[1]!=1))
stop('MDV variable must be to 1 for dosing events.')
## Exclude any data record with OMIT not equal to 0 or NA
if (length(which(is.na(data$OMIT))) > 0)
data$OMIT[which(is.na(data$OMIT))] <- 0
if (length(which(data$OMIT==0)) > 0) {
data <- data[which(data$OMIT==0),]
} else {
stop('all records set for omission.')
}
data <- data[,-which(names(data)=="OMIT")]
required.vars <- required.vars[-1]
## Check that rate is not set to -1 and -2 in the same dataset
if (all(c(-1,-2)%in%unique(data$RATE))){
stop('the RATE variable cannot be set to -1 and -2 in the dataset.')
}
# Sort data by subject, treatment, time, and DVID
data <- data[order(data$ID,
data$TRT,
data$TIME,
data$DVID),]
row.names(data) <- 1:size(data,1)
# Assess time variable (assume the time of the first record with same ID and
# TRT should be set to 0)
uniques <- data[!duplicated(data[,c('ID','TRT')]),]
names(uniques)[which(names(uniques)=='TIME')] <- 'firstTIME'
data$unique <- paste(data$ID,data$TRT)
uniques$unique <- paste(uniques$ID,uniques$TRT)
data <- merge(data,uniques[,c('firstTIME','unique')],by='unique')[,-1]
data$newTIME <- data$TIME - data$firstTIME
# Create a new data file if TIME and newTIME are different
new.data.file <- FALSE
if (!all(data$TIME==data$newTIME)){
cat(paste(' Note: The time of the first event in at least one ',
'treatment period for at\n least one individual was not 0.',
' The modified dataset created for this\n analysis ',
'was stored in the working directory:\n ',files$data,
'.new\n', sep=''))
data$TIME <- data$newTIME
data <- data[,-which(names(data)%in%c('firstTIME','newTIME'))]
write.csv(data,
file=paste(files$data,'.new',sep=''),
quote=FALSE,
row.names=FALSE)
new.data.file <- TRUE
}
if (length(which(names(data)%in%c('firstTIME','newTIME'))>0))
data <- data[,-which(names(data)%in%c('firstTIME','newTIME'))]
# Find variable indices
itt.index <- match(c('ID','TRT','TIME'),names(data))
data.index <- match(c('DVID','DV'),names(data))
dose.index <- match(c('CMT','AMT','RATE'),names(data))
cmt.index <- match('CMT',names(data))
cov.index <- which(!names(data)%in%required.vars)
## Process data according to method
###### Start of subject-based data processing
if (method=='subject') {
# Sort data by subject, treatment, DVID, and time
data <- data[order(data$ID,
data$TRT,
data$DVID,
data$TIME),]
## Start point for real data processing
uniqueIDs <- unique(data$ID)
# Create backbone of function output list (with one slot per id)
out.data <- sapply(1:length(uniqueIDs),
function(x,tmp) c(tmp,list()),
tmp=list())
names(out.data) <- uniqueIDs
out.data <- c(out.data, list(ids=uniqueIDs))
for (id in uniqueIDs){
id.data <- data[which(data$ID==id),]
idTRTs <- unique(id.data$TRT)
out.data[[as.character(id)]] <- sapply(1:length(idTRTs),
function(x,tmp) c(tmp,list()),
tmp=list())
names(out.data[[as.character(id)]]) <- idTRTs
out.data[[as.character(id)]] <- c(out.data[[as.character(id)]],
list(trts=idTRTs))
out.data[[as.character(id)]]$id <- id
}
## Extract dosing information
dose.data <- data[which(data$EVID==1),]
data <- data[which(data$EVID!=1),]
if (size(data,1)>1) {
row.names(data) <- 1:size(data,1)
} else {
stop(paste('the dataset does not contain any data ',
'for the dependent variable(s).',sep=''))
}
dose.data <- dose.data[,c(itt.index,dose.index,cov.index)]
# Exclude sample records with MDV not set to 0
if (length(which(data$MDV==0)) > 0) {
data <- data[which(data$MDV==0),]
} else {
stop('no sample record with MDV=0 was found.')
}
# Split data between analysis data and covariates data by ID and TRT
ana.data <- data[,c(itt.index,data.index)]
cov.data <- data[,c(itt.index,cmt.index,cov.index)]
for (id in uniqueIDs){
id.ana.data <- ana.data[which(ana.data$ID==id),]
id.cov.data <- cov.data[which(cov.data$ID==id),]
uniqueTRTs <- unique(id.ana.data$TRT)
for (treat in uniqueTRTs){
browser()
out.data[[as.character(id)]][[as.character(treat)]]$ana <-
id.ana.data[which(id.ana.data$TRT==treat), names(id.ana.data)[-c(1,2)]]
tmp <- id.cov.data[which(id.cov.data$TRT==treat),names(id.cov.data)]
names(tmp) <- toupper(names(tmp))
out.data[[as.character(id)]][[as.character(treat)]]$cov <- tmp
out.data[[as.character(id)]][[as.character(treat)]]$trt <- treat
}
}
# Convert dosing information to scarabee-readable format
for (id in uniqueIDs) {
id.data <- dose.data[which(dose.data$ID==id),]
id.data <- id.data[,-1]
if (size(id.data,1) > 0) { # There is dosing information for id
for (treat in uniqueTRTs) {
trt.data <- id.data[which(id.data$TRT==treat),]
if (size(trt.data,1) > 0) { # There is dosing information for treat
bol.data <- trt.data[which(trt.data$RATE==0),
c('TIME','CMT','AMT','RATE')]
inf.data <- trt.data[which(trt.data$RATE!=0),-1]
bol.data <- bol.data[order(bol.data$TIME,bol.data$CMT),]
inf.data <- inf.data[order(inf.data$TIME,inf.data$CMT),]
if (size(bol.data,1) > 0){
out.data[[as.character(id)]][[as.character(treat)]]$bolus <-
bol.data
} else {
out.data[[as.character(id)]][[as.character(treat)]]$bolus <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
}
if (size(inf.data,1) > 0){
out.data[[as.character(id)]][[as.character(treat)]]$infusion <-
inf.data
} else {
out.data[[as.character(id)]][[as.character(treat)]]$infusion <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
}
} else { # There is no dosing information for treat
out.data[[as.character(id)]][[as.character(treat)]]$bolus <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
out.data[[as.character(id)]][[as.character(treat)]]$infusion <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
}
}
} else { # There is no dosing information for id
for (treat in uniqueTRTs) {
out.data[[as.character(id)]][[as.character(treat)]]$bolus <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
out.data[[as.character(id)]][[as.character(treat)]]$infusion <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
}
}
}
###### Start of population-based data processing
} else {
# Sort data by treatment, DVID, and time
data <- data[order(data$TRT,
data$DVID,
data$TIME),]
# Create backbone of function output list (with one slot per id)
out.data <- list(list())
names(out.data) <- '1'
out.data <- c(out.data, list(ids=1))
idTRTs <- unique(data$TRT)
out.data[[1]] <- sapply(1:length(idTRTs),
function(x,tmp) c(tmp,list()),
tmp=list())
names(out.data[[1]]) <- idTRTs
out.data[[1]] <- c(out.data[[1]],list(trts=idTRTs))
out.data[[1]]$id <- 1
## Extract dosing information
dose.data <- data[which(data$EVID==1),]
data <- data[which(data$EVID!=1),]
if (size(data,1)>1) {
row.names(data) <- 1:size(data,1)
} else {
stop(paste('the dataset does not contain any data ',
'for the dependent variable(s).',sep=''))
}
dose.data <- dose.data[,c(itt.index,dose.index,cov.index)]
# Exclude sample records with MDV not set to 0
if (length(which(data$MDV==0)) > 0) {
data <- data[which(data$MDV==0),]
} else {
stop('no sample record with MDV=0 was found.')
}
# Split data between analysis data and covariates data by ID and TRT
ana.data <- data[,c(itt.index,data.index)]
cov.data <- data[,c(itt.index,cmt.index,cov.index)]
uniqueTRTs <- unique(ana.data$TRT)
for (treat in uniqueTRTs){
if (length(which(ana.data$TRT==treat)) < 2 ) {
stop(paste('there was less than two samples for treatment ',
treat, '.',sep=''))
}
out.data[[1]][[as.character(treat)]]$ana <-
ana.data[which(ana.data$TRT==treat), names(ana.data)[-c(1,2)]]
tmp <- cov.data[which(cov.data$TRT==treat), names(cov.data)]
names(tmp) <- toupper(names(tmp))
out.data[[1]][[as.character(treat)]]$cov <- tmp
out.data[[1]][[as.character(treat)]]$trt <- treat
}
## Convert dosing information to scarabee-readable format
dose.data <- dose.data[,c('ID','TRT','TIME','CMT','AMT','RATE')]
id.data <- dose.data[which(dose.data$ID==1),]
id.data <- id.data[,-1]
if (size(id.data,1) > 0) { # There is dosing information
for (treat in uniqueTRTs) {
trt.data <- id.data[which(id.data$TRT==treat),]
if (size(trt.data,1) > 0) { # There is dosing information for treat
bol.data <- trt.data[which(trt.data$RATE==0),
c('TIME','CMT','AMT','RATE')]
inf.data <- trt.data[which(trt.data$RATE!=0),-1]
bol.data <- bol.data[order(bol.data$TIME,bol.data$CMT),]
inf.data <- inf.data[order(inf.data$TIME,inf.data$CMT),]
if (size(bol.data,1) > 0){
out.data[[1]][[as.character(treat)]]$bolus <- bol.data
} else {
out.data[[1]][[as.character(treat)]]$bolus <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
}
if (size(inf.data,1) > 0){
out.data[[1]][[as.character(treat)]]$infusion <- inf.data
} else {
out.data[[1]][[as.character(treat)]]$infusion <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
}
} else { # There is no dosing information for treat
out.data[[1]][[as.character(treat)]]$bolus <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
out.data[[1]][[as.character(treat)]]$infusion <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
}
}
} else { # There is no dosing information
for (treat in uniqueTRTs) {
out.data[[1]][[as.character(treat)]]$bolus <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
out.data[[1]][[as.character(treat)]]$infusion <-
data.frame(TIME=numeric(0),
CMT=numeric(0),
AMT=numeric(0),
RATE=numeric(0))
}
}
}
cat(' Done\n\n')
return(list(data=out.data,new=new.data.file))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.