R/trialanalysis_input_functions.R

Defines functions get_trial_arm_details get_gender_details get_age_details get_timepoint_details get_outcome_details get_eq5d_details get_colnames_codedvalues

Documented in get_age_details get_colnames_codedvalues get_eq5d_details get_gender_details get_outcome_details get_timepoint_details get_trial_arm_details

#######################################################################
# 1. Get the packages required
#######################################################################
#2 Get the trial data ready
#2.1 Load the data

#######################################################################
# 2.2 Get the required fields and codes for timepoints, demography
# -age and gender, qol measure -EQ5D and any other analysis requires
#######################################################################
# 2.2.1 Get the details of the trial arm
#' Function to get the details of the trial arm
#' @param trialdata, data containing individual level trial data
#' @return the name of the variable related to trial arm and the unique contents
#' if success, else error
#' @examples
#' get_trial_arm_details(data.frame("Age" = c(21,15),
#' "arm" = c("control", "intervention")))
#' @importFrom IPDFileCheck  check_colno_pattern_colname
#' @importFrom IPDFileCheck get_colno_pattern_colname
#' @export
get_trial_arm_details  <-  function(trialdata) {
  trialdata <- data.table(trialdata, stringsAsFactors = FALSE)
  names  <-  colnames(trialdata)
  related_words  <-  c("arm", "trial", "trialarm")
  exists  <-  unlist(lapply(related_words,
                            IPDFileCheck::check_colno_pattern_colname, names))
  ind  <-  which(exists  ==  TRUE)
  colnumbers  <-  unlist(lapply(related_words[ind],
                                IPDFileCheck::get_colno_pattern_colname, names))
  if (sum(colnumbers > 0)  ==  1) {
    index  <-  which(colnumbers > 0)
    this_name  <-  names[colnumbers[index]]
  }
  if (sum(colnumbers > 0) > 1) {
    index  <-  which(colnumbers > 0)[1]
    this_name  <-  names[colnumbers[index]]
  }
  if (sum(colnumbers > 0) < 1) {
    stop("no matching columns found")
  }
  codes  <-  unique(trialdata[[this_name]])
  result  <-  list(name = this_name, codes = codes)
  return(result)
}
#######################################################################
#' Function to get the details of the gender column
#' @param trialdata, data containing individual level trial data
#' @return the name of the variable related to gender and the unique contents
#' if success, else error
#' @examples
#' get_gender_details(data.frame("Age" = c(21,15), "sex" = c("m", "f")))
#' @importFrom IPDFileCheck  check_colno_pattern_colname
#' @importFrom IPDFileCheck get_colno_pattern_colname
#' @export
get_gender_details  <-  function(trialdata) {
  names  <-  colnames(trialdata)
  related_words  <-  c("sex", "gender", "female", "male")
  exists  <-  unlist(lapply(related_words,
                            IPDFileCheck::check_colno_pattern_colname, names))
  ind  <-  which(exists  ==  TRUE)
  colnumbers  <-  unlist(lapply(related_words[ind],
                                IPDFileCheck::get_colno_pattern_colname, names))
  if (sum(colnumbers > 0)  ==  1) {
    index  <-  which(colnumbers > 0)
    this_name  <-  names[colnumbers[index]]
  }
  if (sum(colnumbers > 0) > 1) {
    index  <-  which(colnumbers > 0)[1]
    this_name  <-  names[colnumbers[index]]
  }
  if (sum(colnumbers > 0) < 1) {
    stop("no matching columns found")
  }
  codes  <-  unique(trialdata[[this_name]])
  result  <-  list(name = this_name, codes = codes)
  return(result)
}
#######################################################################
# 2.2.3 Get the colnames of age column
#' Function to get the details of the age column
#' @param trialdata, data containing individual level trial data
#' @return the name of the variable related to age and the unique contents
#' if success, else error
#' @examples
#' get_age_details(data.frame("Age" = c(21,15), "arm" = c("control", "intervention")))
#' @importFrom IPDFileCheck  check_colno_pattern_colname
#' @importFrom IPDFileCheck get_colno_pattern_colname
#' @export
get_age_details  <-  function(trialdata) {
  names  <-  colnames(trialdata)
  related_words  <-  c("age", "dob", "yob", "date of birth", "year of birth", "birth year")
  exists  <-  unlist(lapply(related_words, IPDFileCheck::check_colno_pattern_colname,
                            names))
  ind  <-  which(exists  ==  TRUE)
  colnumbers  <-  unlist(lapply(related_words[ind], IPDFileCheck::get_colno_pattern_colname,
                                names))
  if (sum(colnumbers > 0)  ==  1) {
    index  <-  which(colnumbers > 0)
    this_name  <-  names[colnumbers[index]]
  }
  if (sum(colnumbers > 0) > 1) {
    index  <-  which(colnumbers > 0)[1]
    this_name  <-  names[colnumbers[index]]
  }
  if (sum(colnumbers > 0) < 1) {
    stop("no matching columns found")
  }
  codes  <-  unique(trialdata[[this_name]])
  result  <-  list(name = this_name, codes = codes)
  return(result)
}
#######################################################################
# 2.2.4 Get the colnames of "time point" column
#' Function to get the details of the time point column
#' @param trialdata, data containing individual level trial data
#' @return the name of the variable related to time point and the unique contents
#' if success, else error
#' @examples
#' get_timepoint_details(data.frame("time" = c(21,15), "arm" = c("control", "intervention")))
#' @importFrom IPDFileCheck  check_colno_pattern_colname
#' @importFrom IPDFileCheck get_colno_pattern_colname
#' @export
get_timepoint_details <- function(trialdata) {
  names <- colnames(trialdata)
  related_words <- c("time point", "times", "time", "timepoint")
  exists <- unlist(lapply(related_words, IPDFileCheck::check_colno_pattern_colname,
                          names))
  ind <- which(exists == TRUE)
  colnumbers <- unlist(lapply(related_words[ind], IPDFileCheck::get_colno_pattern_colname,
                              names))
  if (sum(colnumbers > 0) == 1) {
    index <- which(colnumbers > 0)
    this_name <- names[colnumbers[index]]
  }
  if (sum(colnumbers > 0) > 1) {
    index <- which(colnumbers > 0)[1]
    this_name <- names[colnumbers[index]]
  }
  if (sum(colnumbers > 0) < 1) {
    stop("no matching columns found")
  }
  codes <- unique(trialdata[[this_name]])
  result <- list(name = this_name, codes = codes)
  return(result)
}
#######################################################################
# 2.3 Get the required fields and codes for qol measure -EQ5D and
# anyother the analysis requires
#######################################################################
# 2.3,1 Get the colnames of outcome column
#' Function to get the details of the outcome column
#' @param trialdata, data containing individual level trial data
#' @param name, name of the variable
#' @param related_words, probable column names
#' @param multiple, indicates true if there are multiple columns
#' @return the name of the variable related to health outcome (any) and
#' the unique contents if success, else error
#' @examples
#' get_outcome_details(data.frame("qol.MO"=c(1,2), "qol.PD"=c(1,2), "qol.AD"= c(1,2)),
#' "eq5d", "qol",TRUE)
#' @importFrom IPDFileCheck  check_colno_pattern_colname
#' @importFrom IPDFileCheck get_colno_pattern_colname
#' @export
get_outcome_details <- function(trialdata, name, related_words, multiple=FALSE) {
  names <- colnames(trialdata)
  exists <- unlist(lapply(related_words, IPDFileCheck::check_colno_pattern_colname, names))
  ind <- which(exists == TRUE)
  colnumbers <- unlist(lapply(related_words[ind], IPDFileCheck::get_colno_pattern_colname,
                              names))
  if (sum(colnumbers > 0) == 1) {
    index <- which(colnumbers > 0)
    this_name <- names[colnumbers[index]]
    codes <- sort(unique(trialdata[[this_name]]))
  }
  if (sum(colnumbers > 0) > 1) {
    if (multiple == TRUE) {
      index <- which(colnumbers > 0)
      this_name <- names[colnumbers[index]]
      all.codes <- c(0)
      for (i in seq_len(length(this_name))) {
        this_ind <- this_name[i]
        codes <- sort(unique(trialdata[[this_ind]]))
        all.codes <- list(all.codes, codes)
        if (i == 1) {
          all.codes <- all.codes[-1]
        }
      }
      unlist_all <- unlist(all.codes)
      codes <- sort(unique(unlist_all))
    }else{
      index <- which(colnumbers > 0)[1]
      this_name <- names[colnumbers[index]]
      codes <- sort(unique(trialdata[[this_name]]))
    }
  }
  if (sum(colnumbers > 0) < 1) {
    stop("no matching columns found")
  }
  result <- list(name = this_name, codes = codes)
  return(result)
}
#######################################################################
# 2.3.2 Get the colnames of eq5d column
#' Function to get the details of the EQ5D column
#' @param trialdata, data containing individual level trial data
#' @return the name of the variable related to EQ5D and the unique contents if success, else error
#' @examples
#' get_eq5d_details(data.frame("MO"= c(1,2), "SC"= c(1,2), "UA"= c(1,2), "PD"= c(1,2), "AD"= c(1,2)))
#' @importFrom IPDFileCheck  check_colno_pattern_colname
#' @importFrom IPDFileCheck get_colno_pattern_colname
#' @export
get_eq5d_details <- function(trialdata) {
  names <- colnames(trialdata)
  related_words <- c("MO", "SC", "UA", "PD", "AD", "mobility", "self care", "usual activities",
                     "pain depression", "anxiety")
  exists <- unlist(lapply(related_words, IPDFileCheck::check_colno_pattern_colname, names))
  ind <- which(exists == TRUE)
  colnumbers <- unlist(lapply(related_words[ind], IPDFileCheck::get_colno_pattern_colname,
                              names))
  if (sum(colnumbers > 0) == 1) {
    warning("Need to match 5 columns")
    return(-2)
  }
  if (sum(colnumbers > 0) > 1) {
    index <- which(colnumbers > 0)
    this_name <- names[colnumbers[index]]
    codes <- 0
    for (j in seq_len(length(this_name))) {
      this_ind <- this_name[j]
      this_codes <- sort(unique(trialdata[[this_ind]]))
      codes <- append(codes, this_codes)
    }
    codes <- sort(unique(codes[-1]))
  }
  if (sum(colnumbers > 0) < 1) {
    stop("no matching columns found")
  }
  result <- list(name = this_name, codes = codes)
  return(result)
}
#######################################################################
# 3 Miscellanoues
#######################################################################
# 3.1 Keep the column name, coded values and non response code into a dataframe
#' Function to keep the column name, coded values and non response code into a dataframe
#' @param variable, name of the variable in the column
#' @param name, column name
#' @param code, coded values
#' @param nrcode, code for non response
#' @return data frame with all the above information
#' @examples get_colnames_codedvalues("arm", "pat_trial_arm",c("Y", "N"))
#' @export
get_colnames_codedvalues <- function(variable, name, code, nrcode=NA) {
  if (!is.null(name)) {
    colname <- name
    nrcode <- nrcode
    if (is.null(code)) {
      df <- data.frame(c(variable, colname, nrcode), stringsAsFactors = FALSE)
      the_names <- (c("variable", "colname", "nonrescode"))
    }else{
      coded_values <- code
      lizt <- seq(1, length(coded_values))
      coded_value_names  <-  sapply(lizt, paste0, "_coded_value")
      df <- data.frame(c(variable, colname, coded_values, nrcode), stringsAsFactors = FALSE)
      the_names <- (c("variable", "colname", unlist(coded_value_names), "nonrescode"))
    }
    rownames(df) <- the_names
    colnames(df)  <-  variable
    return((df))
  }else{
    stop("column name or coded values may be missing")
  }
}
#######################################################################
sheejamk/MarkovModel documentation built on Jan. 23, 2020, 2:44 a.m.