functions.to.prep.data.R

#------ 1. function to rename data ------
rename_data <- function(){

}

#------ 2. functions to clean data ------

# What to exclude from the analysis, delete rows that
# 1. NA's in response (r)
# 2. NA's in sample size (n)
# 3. studies report with same dose in all arms
# 4. NA's in dose
# 5. single arm studies

#*** Exclude flexible dose (ASK Georgia)
removeNAdosresdata.fun=function(dataset,var.names="all")
{
  # this function takes a database and exclude studies that will cause problems in fiting the dosres model for a particular outcome
  # It return the same database but in the end it has a variable that tells you which studies to exclude
  # Give a name to that column in the nametoexclude variable
if(var.names=='all'){
  out.studies1 <- unique(dataset$studyid[apply(dataset,1,function(x)any(is.na(x)))])
}else{
  dataset.only.var <- subset(dataset,select=var.names)
  out.studies1 <- unique(dataset.only.var$studyid[apply(dataset.only.var,1,function(x)any(is.na(x)))])
}
  # 1. exclude studies with 0 events
  #out1=unique(studyid[is.na(r)|r ==0])
  out.studies2 <- unique(dataset$studyid[dataset$r==0])

  # 5. exclude studies with similar reported dose
  out.studies3=unique(dataset$studyid)[sapply(unique(dataset$studyid), function(x) sum(table(dataset$dose[which(dataset$studyid==x)])>1))==1]

  # the index of all excluded studies
  out.studies <- c(out.studies1,out.studies2,out.studies3)
  # add column with include/exclude studies T/F
  dataset$exclude <- ifelse(dataset$studyid%in%out.studies,TRUE,FALSE)

  dataset_na.rm <- dataset[!dataset$studyid%in%out.studies,]
  return(list(dataset=dataset_na.rm,out=out.studies))
}

# exclude single arm trials ( remove NA's first)
exludesinglearmsdata.fun<-function(dataset,studyid)
{
  studyid=eval(substitute(studyid), dataset)
  singlearmstudies=names(table(studyid))[table(studyid)<2]
  dataset2=dataset[is.na(match(studyid,singlearmstudies)),]
  dataset2
  #returns a dataset with the same columns after excluding single arms
}



#------ 2. functions to make data in jags formatting ------

# 1. the function that find the RCS transformation for each drug
myf <- function(dose.per.drug){
  max.dose <- max(dose.per.drug)
  knots <- quantile(0:max.dose,probs = c(0.25,0.50,0.75))
  rcs.dose.per.drug <-  rcspline.eval(dose.per.drug,knots = knots,inclx = TRUE)
  return(rcs.dose.per.drug)
}

# 2. create a matrix of any variable 'var' where  studies in rows and arms in columns
fun.mat <- function(data,var){
  ns <-length(unique(data$studyid))
  na <- as.numeric(table(data$studyid)) # number of arms per study
  max.na <- max(na)
  data$studyID <- as.numeric(as.factor(data$studyid))         # transform studyid to ordered numeric values
  study_id <- unique(data$studyID)
  varmat <- matrix(NA,ns,max.na)
  for (i in 1:ns) {
    varmat[i,1:as.numeric(table(data$studyID)[i])] <- var[data$studyID == study_id[i]]
  }
  return(varmat)
}

# 3. determine the direct head-to-head comparisons
direct.comp.index <- function(data)
{
  data <- dplyr::arrange(data, data$studyid, data$dose)
  t1 <- vector()
  t2 <- vector()
  for (i in seq_along(unique(data[["studyid"]]))) {
    subset <- subset(data, studyid==unique(data[["studyid"]])[i])
    for (k in 2:nrow(subset)) {
      t1 <- append(t1, subset[["drug"]][1])
      t2 <- append(t2, subset[["drug"]][k])
      if (is.na(subset[["drug"]][k])) {
        stop()
      }
    }
  }

  comparisons <- data.frame(t1 = t1, t2 = t2)
  comparisons <- comparisons %>% dplyr::group_by(t1, t2) %>%
    dplyr::mutate(nr = dplyr::n())
  comparisons <- unique(comparisons)
  comparisons <- dplyr::arrange(comparisons, t1, t2)
  row_name = comparisons$row_name
  comparisons %<>% select(-row_name) %>% as.matrix
  rownames(comparisons) = row_name
  return(comparisons)
}
htx-r/doseresNMA documentation built on Jan. 28, 2021, 5:32 a.m.