R/outcome_cost_analysis_functions.R

Defines functions get_mean_sd_age value_eq5d5L_IPD map_eq5d5Lto3L_VanHout value_ADL_scores_IPD value_Shows_IPD

########################################################################################
#' Function to return mean age from a data frame
#' @param this_data the data containing column with age
#' @param age_nrcode non response code
#' @return mean and sd, if success -1, if failure
#' @examples
#' this_data = as.data.frame (cbind(num=c(1,2,3,4), age = c(14,25,26,30)))
#' get_mean_sd_age(this_data,NA)
#' @export
get_mean_sd_age = function(this_data,age_nrcode){
  # Assumption is that age data is complete or incomplete data is denoted by empty
  # entry or a valid non response code.
  # Non response code is taken from config file
  # if age format is not right throw error
  age_details <- get_age_details(this_data)
  if (IPDFileCheck::test_age(this_data,age_details$name,age_nrcode) != 0) {
    stop("Error- age data format")
  }else{
    #else read the age column
    age_data = this_data[[age_details$name]]
    if (!is.na(age_nrcode))
      age_data = age_data[age_data!=age_nrcode]
    age_data = age_data[age_data != " "]
    meanage = mean(as.numeric(age_data[!is.na(age_data)]))
    sdage=stats::sd(as.numeric(age_data[!is.na(age_data)]))
    results = list(mean=meanage, sd=sdage)
    return(results)
  }
}

#####################################################################################
#' Function to add EQ5D5L scores to IPD data
#' @param ind_part_data a dataframe
#' @param eq5d_nrcode non response code for EQ5D5L, default is NA
#' @return qaly included modified data, if success -1, if failure
#' @examples
#' datafile = system.file("extdata", "trial_data.csv", package = "AnalyseOutcomesCost")
#' trial_data = load_trial_data(datafile)
#' value_eq5d5L_IPD(trial_data,NA)
#' @export
#' @source
#' http://eprints.whiterose.ac.uk/121473/1/Devlin_et_al-2017-Health_Economics.pdf
value_eq5d5L_IPD=function(ind_part_data,eq5d_nrcode){
  ind_part_data <- data.frame(ind_part_data)
  eq5d_details <- get_eq5d_details(ind_part_data)
  eq5d_columnnames <- eq5d_details$name
  timepoint_details <- get_timepoint_details(ind_part_data)
  if(sum(is.na(timepoint_details))==0){
    timepointscol <- timepoint_details$name
    timepoints <- unique(ind_part_data[[timepointscol]])
    nooftimepoints = length(timepoints)
  }else{
    timepointscol <- NA
    timepoints <- NA
    nooftimepoints = 1
  }
  for(j in 1:nooftimepoints){
    if(is.na(timepointscol) || timepointscol=="NA"){
      rows_needed = seq(1:nrow(ind_part_data))
    }else{
      rows_needed = which(ind_part_data[[timepointscol]]==timepoints[j])
    }
    #pick the responses assumes the order
    eq5d_responses=ind_part_data[rows_needed,eq5d_columnnames]
    #Check if the responses are numeric with range 1 to 5
    results = sapply(eq5d_columnnames,IPDFileCheck::test_data_numeric,eq5d_responses,eq5d_nrcode,1,5)
    if(any(results < 0)){
      stop("eq5d responses do not seem right")
    }else{
      index5L <- rep(0,nrow(eq5d_responses))
      for(i in seq(nrow(eq5d_responses))){
        index5L[i] = valueEQ5D::value5LInd("England",eq5d_responses[i,1],
                                         eq5d_responses[i,2],eq5d_responses[i,3],
                                         eq5d_responses[i,4],eq5d_responses[i,5])
      }
      new_colname=paste("EQ5D5LIndex")
      ind_part_data[rows_needed,new_colname]=index5L
    }
  }
  return(ind_part_data)
}
##########################################################################################################
#' Function to map EQ5D5L scores to EQ5D3L scores and then add to IPD data
#' @param ind_part_data a dataframe
#' @param eq5d_nrcode non response code for EQ5D5L, default is NA
#' @return qaly included modified data, if success -1, if failure
#' @examples
#' datafile = system.file("extdata", "trial_data.csv", package = "AnalyseOutcomesCost")
#' trial_data = load_trial_data(datafile)
#  map_eq5d5Lto3L_VanHout(trial_data, NA)
#' @description Function to add EQ5D5L scores to IPD data based on
#' http://eprints.whiterose.ac.uk/121473/1/Devlin_et_al-2017-Health_Economics.pdf
#' @export
map_eq5d5Lto3L_VanHout=function(ind_part_data,eq5d_nrcode){
  eq5d_details <- get_eq5d_details(ind_part_data)
  eq5d_columnnames <- eq5d_details$name
  ind_part_data <- data.frame(ind_part_data)
  timepoint_details <- get_timepoint_details(ind_part_data)
  if(sum(is.na(timepoint_details))==0){
    timepointscol <- timepoint_details$name
    timepoints <- unique(ind_part_data[[timepointscol]])
    nooftimepoints = length(timepoints)
  }else{
    timepointscol <- NA
    timepoints <- NA
    nooftimepoints = 1
  }
  for(j in 1:nooftimepoints){
    if(is.na(timepointscol) || timepointscol == "NA"){
      rows_needed = seq(1:nrow(ind_part_data))
    }else{
      rows_needed = which(ind_part_data[[timepointscol]] == timepoints[j])
    }
    #pick the responses assumes the order
    eq5d_responses = ind_part_data[rows_needed,eq5d_columnnames]
    #Check if the responses are numeric with range 1 to 5
    results = sapply(eq5d_columnnames,IPDFileCheck::test_data_numeric,eq5d_responses,eq5d_nrcode,1,5)
    if(any(results != 0)){
      stop("eq5d responses do not seem right")
    }else{
      index5L <- rep(0,nrow(eq5d_responses))
      for(i in seq(nrow(eq5d_responses))) {
        score_5L=as.numeric(paste(eq5d_responses[i,1],
                                  eq5d_responses[i,2], eq5d_responses[i,3],
                                  eq5d_responses[i,4], eq5d_responses[i,5], sep =""))
        index5L[i] = valueEQ5D::map5Lto3LInd("UK","CW",score_5L)
      }
      new_colname = paste("EQ5D3L_from5L")
      ind_part_data[rows_needed,new_colname] = index5L
    }
  }
  return(ind_part_data)
}

##########################################################################################################
#' Function to convert ADL scores to a T score
#' @param ind_part_data a dataframe containing IPD data
#' @param adl_related_words reltaed words to find out which columns contain adl data
#' @param adl_scoring ADL scoring table
#' @param adl_scoring_data_columns, ADL scoring table columnnames
#' @param multiple boolean to indicate there are mulitplevalues
#' @param timepointscol, timepoints measured
#' @param adl_nrcode non response code for ADL
#' @return ADLscores converted to T score included modified data, if success -1, if failure
#' @examples
#' datafile = system.file("extdata", "trial_data.csv", package = "AnalyseOutcomesCost")
#' trial_data = load_trial_data(datafile)
#  value_ADL_scores_IPD(trial_data,c("tpi"),adl_scoring,colnames(adl_scoring),"tbCodeQtnTimePoint",NA)
#' @description
#' Function to convert ADL scores to a T score based on file:///C:/Users/smk543/Downloads/PROMIS%20Pain%20Interference%20Scoring%20Manual.pdf
#' @export
value_ADL_scores_IPD=function(ind_part_data,adl_related_words,adl_scoring,adl_scoring_data_columns, multiple =TRUE,timepointscol=NA,adl_nrcode){
  adl_details <-  get_outcome_details(ind_part_data,"adl",adl_related_words,multiple = TRUE)
  adl_columnnames <- adl_details$name
  ind_part_data<-data.frame(ind_part_data)
  timepoint_details <- get_timepoint_details(ind_part_data)
  if(sum(is.na(timepoint_details))==0){
    timepointscol <- timepoint_details$name
    timepoints <- unique(ind_part_data[[timepointscol]])
    nooftimepoints = length(timepoints)
  }else{
    timepointscol <- NA
    timepoints <- NA
    nooftimepoints = 1
  }
  for(j in 1:nooftimepoints){
    if(is.na(timepointscol) | timepointscol=="NA"){
      rows_needed=seq(1:nrow(ind_part_data))
    }else{
      rows_needed=which(ind_part_data[[timepointscol]]==timepoints[j])
    }
    #get ADL responses
    adl_responses=ind_part_data[rows_needed,adl_columnnames]
    #Check if the responses are 8 for anindividual
    if(length(adl_columnnames)!=8){
      stop("error- ADL should have 8 columns")
    }else{
      #Check if the responses are numeric with range 1 to 5
      results=sapply(adl_columnnames,IPDFileCheck::test_data_numeric,adl_responses,adl_nrcode,1,5)
    }
    if(any(results<0)){
      stop("ADL responses do not seem right")
    }else{
      #Check if ADL scoring table has columns defined in the config file
      if(IPDFileCheck::test_columnnames(adl_scoring_data_columns,adl_scoring)==0){
        #Replace NA with 0
        adl_scoring[is.na(adl_scoring)] <- 0
        #Find the sum of scores
        sumADL=rowSums(adl_responses)
        TscoreADL=rep(0,length(sumADL))
        for(i in 1:length(sumADL)){
          ithrow=which(adl_scoring$Raw.score==sumADL[i])
          #Get the T score correspong to raw sum
          TscoreADL[i]=adl_scoring$T.Score[ithrow]
        }
        #Add the T score to data , save and return
        new_colname=paste("ADLTscore")
        data[rows_needed,new_colname]=TscoreADL
      }else{
        stop("Error ADL scoring column names are not equal to what specified in configuration file")
      }
    }
  }
  return(ind_part_data)
}
##########################################################################################################
#' Function to convert Shows scores
#' @param ind_part_data a dataframe containing IPD
#' @param shows_related_words a dataframe containing IPD
#' @param shows_nrcode non response code for ADL, default is NA
#' @return sum of scores, if success -1, if failure
#' @examples
#' datafile = system.file("extdata", "trial_data.csv", package = "AnalyseOutcomesCost")
#' trial_data = load_trial_data(datafile)
#' value_Shows_IPD(trial_data,"qsy",NA)
#' @export
value_Shows_IPD=function(ind_part_data,shows_related_words,shows_nrcode){
  shows_details <-  get_outcome_details(ind_part_data,"shows",shows_related_words,multiple = TRUE)
  shows_columnnames <- shows_details$name
  ind_part_data<-data.frame(ind_part_data)
  timepoint_details <- get_timepoint_details(ind_part_data)
  if(!is.na(timepoint_details)){
    timepointscol <- timepoint_details$name
    timepoints <- unique(ind_part_data[[timepointscol]])
    nooftimepoints = length(timepoints)
  }else{
    timepointscol <- NA
    timepoints <- NA
    nooftimepoints = 1
  }
  for(j in 1:nooftimepoints){
    if(is.na(timepointscol) || timepointscol=="NA"){
      rows_needed=seq(1:nrow(ind_part_data))
    }else{
      rows_needed=which(ind_part_data[[timepointscol]]==timepoints[j])
    }
    #get shows responses
    shows_responses=ind_part_data[rows_needed,shows_columnnames]
    #Check if the responses are 8 for anindividual
    if(length(shows_columnnames)!=10){
      stop("error- ShOWS should have 10 columns")
    }else{
      #Check if the responses are numeric with range 0 to 3 qctually --in the data it is coded from 1to 4.
      results=sapply(shows_columnnames,IPDFileCheck::test_data_numeric,shows_responses,shows_nrcode,1,4)
    }
    if(any(results<0)){
      stop("ShOWS responses do not seem right")
    }else{
      #Check if shows scoring table has columns defined in the config file
        sumShows=rowSums(shows_responses)-10
        #Add the score to data , save and return
        new_colname=paste("ShOWSscore")
        ind_part_data[rows_needed,new_colname]=sumShows
    }
  }
  return(ind_part_data)
}
sheejamk/MarkovModel documentation built on Jan. 23, 2020, 2:44 a.m.