R/lhc_analysis.R

#' Summarises results of runs for parameter sets generated by a latin-hypercube
#'
#' Only to be applied for simulations that are stochastic, and responses are
#' supplied in the folder structure detailed in the R Journal paper, useful for
#' cases where the simulation is agent-based. Takes each parameter value set
#' generated by the hypercube in turn, and analyses the replicate simulation
#' results for that set. Produces a CSV file containing the parameters of the
#' run and the median of each simulation response for each run. In cases where,
#' for example, 300 runs have been performed for a parameter set, this file
#' will contain 300 rows for that set, each accompanied by the median of each
#' simulation response for that run. This file will be named as specified by
#' parameter LHC_ALL_SIM_RESULTS_FILE. This method can be performed for a number
#' of simulation timepoints, producing CSV files for each timepoint taken.
#'
#' @param FILEPATH Directory where the simulation runs of single CSV file can
#' be found
#' @param SPARTAN_PARAMETER_FILE Location of the file output by the
#' latin-hypercube sampling method. Note if providing a single CSV file
#' with parameter/response pairings, you do not need to provide this file,
#' and can thus enter this parameter as NULL.
#' @param PARAMETERS Array containing the names of the parameters of which
#' parameter samples will be generated
#' @param NUMSAMPLES The number of parameter subsets that were generated in the
#'  LHC design. Only required if analysing results provided within Folder
#'  structure setup.
#' @param NUMRUNSPERSAMPLE The number of runs performed for each parameter
#' subset. This figure is generated through Aleatory Analysis. Only required
#' if analysing results provided within Folder structure setup.
#' @param MEASURES Array containing the names of the output measures which are
#'  used to analyse the simulation
#' @param RESULTFILENAME Name of the simulation results file. In the current
#' version, XML and CSV files can be processed. If performing this analysis
#' over multiple timepoints, it is assumed that the timepoint follows the
#' file name, e.g. trackedCells_Close_12.csv.
#' @param ALTFILENAME In some cases, it may be relevant to read from
#' a further results file if the initial file contains no results. This
#' filename is set here. In the current version, XML and CSV files can be
#' processed.
#' @param OUTPUTCOLSTART Column number in the simulation results file where
#' output begins - saves (a) reading in unnecessary data, and (b) errors where
#' the first column is a label, and therefore could contain duplicates.
#' @param OUTPUTCOLEND Column number in the simulation results file where the
#' last output measure is.
#' @param LHC_ALL_SIM_RESULTS_FILE Name to be given to the CSV file that
#' summarises all simulation runs for all parameter sets
#' @param TIMEPOINTS Implemented so this method can be used when analysing
#' multiple simulation timepoints. If only analysing one timepoint, this
#' should be set to NULL. If not, this should be an array of timepoints,
#' e.g. c(12,36,48,60)
#' @param TIMEPOINTSCALE Implemented so this method can be used when analysing
#'  multiple simulation timepoints. Sets the scale of the timepoints being
#'  analysed, e.g. "Hours"
#' @param check_done If multiple timepoints, whether the input has been checked
#' @param write_csv_file Whether the analysis should be written to CSV file. Used
#' with spartanDB, where results are submitted to analysis database
#'
#' @export
lhc_process_sample_run_subsets <- function(
  FILEPATH, SPARTAN_PARAMETER_FILE, PARAMETERS, NUMSAMPLES, NUMRUNSPERSAMPLE,
  MEASURES, RESULTFILENAME, ALTFILENAME, OUTPUTCOLSTART, OUTPUTCOLEND,
  LHC_ALL_SIM_RESULTS_FILE, TIMEPOINTS = NULL, TIMEPOINTSCALE = NULL,
  check_done = FALSE, write_csv_file = TRUE) {

  input_check <- list("arguments"=as.list(match.call()),"names"=names(match.call())[-1])
  # Run if all checks pass:
  if(check_input_args(input_check$names, input_check$arguments)) {

    if (is.null(TIMEPOINTS)) {

      message("Generating Simulation Median Responses (process_sample_run_subsets)")

      # Read in Spartan parameter file
      lhc_table <- read_from_csv(file.path(FILEPATH,SPARTAN_PARAMETER_FILE))

      # Generate summary stats for all parameter sets
      all_sim_median_results <- summarise_lhc_sweep_responses(
        FILEPATH, NUMRUNSPERSAMPLE, PARAMETERS, MEASURES, RESULTFILENAME,
        ALTFILENAME, NUMSAMPLES, lhc_table, OUTPUTCOLSTART, OUTPUTCOLEND)

      # Output if results not blank
      if (!is.null(all_sim_median_results) && write_csv_file) {
        write_data_to_csv(all_sim_median_results,file.path(FILEPATH,LHC_ALL_SIM_RESULTS_FILE))
      }
      return(all_sim_median_results)

    } else {
      lhc_process_sample_run_subsets_overTime(
        FILEPATH, SPARTAN_PARAMETER_FILE, PARAMETERS, NUMSAMPLES,
        NUMRUNSPERSAMPLE, MEASURES, RESULTFILENAME,
        ALTFILENAME, OUTPUTCOLSTART, OUTPUTCOLEND,
        LHC_ALL_SIM_RESULTS_FILE, TIMEPOINTS, TIMEPOINTSCALE)
      }
  }
}

#' Pre-process analysis settings if multiple timepoints are being considered
#'
#' @inheritParams lhc_process_sample_run_subsets
lhc_process_sample_run_subsets_overTime <- function(FILEPATH, SPARTAN_PARAMETER_FILE,
                                           PARAMETERS, NUMSAMPLES,
                                           NUMRUNSPERSAMPLE, MEASURES,
                                           RESULTFILENAME, ALTFILENAME,
                                           OUTPUTCOLSTART, OUTPUTCOLEND,
                                           LHC_ALL_SIM_RESULTS_FILE,
                                           TIMEPOINTS, TIMEPOINTSCALE)
{
  # Process each timepoint
  for (n in 1:length(TIMEPOINTS)) {

    current_time <- TIMEPOINTS[n]
    message(paste("Processing Timepoint: ", current_time, sep = ""))

    simresultfilename <- append_time_to_argument(
      RESULTFILENAME, current_time,
      check_file_extension(RESULTFILENAME))

    altfilename_full <- NULL
    if (!is.null(ALTFILENAME))
      altfilename_full <- append_time_to_argument(
        ALTFILENAME, current_time,
        check_file_extension(ALTFILENAME))

    lhcallsimresultsfile_full <- append_time_to_argument(
      LHC_ALL_SIM_RESULTS_FILE, current_time,
      check_file_extension(LHC_ALL_SIM_RESULTS_FILE))


    lhc_process_sample_run_subsets(FILEPATH, SPARTAN_PARAMETER_FILE,
                                   PARAMETERS, NUMSAMPLES,
                                   NUMRUNSPERSAMPLE, MEASURES,
                                   simresultfilename,
                                   altfilename_full,
                                   OUTPUTCOLSTART, OUTPUTCOLEND,
                                   lhcallsimresultsfile_full,
                                   NULL, NULL, check_done = TRUE)
  }
}

#' Processes an LHC sample, returning summary stats for all parameter sets
#'
#' @param filepath Directory where the simulation runs of single CSV file can
#' be found
#' @param numrunspersample The number of runs performed for each parameter
#' subset. This figure is generated through Aleatory Analysis. Only required
#' if analysing results provided within Folder structure setup.
#' @param parameters Simulation parameters being analysed / perturbed
#' @param measures Array containing the names of the output measures which are
#'  used to analyse the simulation
#' @param resultfilename Name of the simulation results file. In the current
#' version, XML and CSV files can be processed. If performing this analysis
#' over multiple timepoints, it is assumed that the timepoint follows the
#' file name, e.g. trackedCells_Close_12.csv.
#' @param altfilename In some cases, it may be relevant to read from
#' a further results file if the initial file contains no results. This
#' filename is set here. In the current version, XML and CSV files can be
#' processed.
#' @param num_samples The number of parameter subsets that were generated in the
#'  LHC design. Only required if analysing results provided within Folder
#'  structure setup.
#' @param lhctable Parameter sets generated by LHC sampling
#' @param outputcolstart Column number in the simulation results file where
#' output begins - saves (a) reading in unnecessary data, and (b) errors where
#' the first column is a label, and therefore could contain duplicates.
#' @param outputcolend Column number in the simulation results file where the
#' last output measure is.
#' @return Summary stats for all parameter sets
summarise_lhc_sweep_responses <- function(
  filepath, numrunspersample, parameters, measures, resultfilename, altfilename,
  num_samples, lhctable, outputcolstart,outputcolend) {

  all_sim_median_results <- NULL

  for (k in 1:num_samples) {
    message(paste("Summarising Responses for Parameter Set ", k, sep = ""))

    # Get parameters for this set
    param_row <- as.numeric(lhctable[k, ])


    # Get the medan responses
    median_results <- getMediansSubset(paste(filepath, "/", k, "/",
                                             sep = ""),
                                       numrunspersample, measures,
                                       resultfilename,
                                       altfilename,
                                       outputcolstart,outputcolend)

    run_params <- NULL
    for (p in 1:length(param_row)) {
      run_params <- cbind(run_params, array(as.numeric(param_row[p]),
                                            dim = c(nrow(median_results))))
    }
    # Bind parameters to results
    param_result <- cbind(run_params,  median_results)

    # ADD THIS TO THE LIST OF ALL MEDIANS BEING PROCESSED IN THIS ANALYSIS
    all_sim_median_results <- rbind(all_sim_median_results, param_result)
  }

  colnames(all_sim_median_results) <- c(parameters, measures)

  return(all_sim_median_results)
}



#' Summarises simulation behaviour for each parameter set, by median of
#' distribution of replicate runs
#'
#' Processes either the CSV file generated by lhc_process_sample_run_subsets
#' or one that has been supplied, going through each line of that file and
#' generating a file that summarises simulation responses under each parameter
#' set. This CSV file, named as specified by parameter LHCSUMMARYFILENAME,
#' will contain one row for each parameter set, accompanied by the median of
#' all the responses contained in the LHC_ALL_SIM_RESULTS_FILE. This method
#' can also be performed for a number of simulation timepoints
#' @param FILEPATH Directory where the simulation runs of single CSV file can
#' be found
#' @param PARAMETERS Array containing the names of the parameters of which
#' parameter samples will be generated
#' @param MEASURES Array containing the names of the output measures which are
#' used to analyse the simulation
#' @param LHC_ALL_SIM_RESULTS_FILE If lhc_process_sample_run_subsets is used
#' (i.e. results processed by folder structure), this will contain the output
#' of that method. If specifying responses using a single CSV file, this will
#' contain the name of that file (which should be in the FILEPATH folder).
#' @param LHCSUMMARYFILENAME Name of the LHC Summary file to be generated.
#' Contains each parameter set alongside the result gained when the simulation
#' was run under that criteria.
#' @param SPARTAN_PARAMETER_FILE Location of the file output by the
#' latin-hypercube sampling method (list of parameters). Note if providing a
#' single CSV file with parameter/response pairings, you do not need to provide
#' this file, and can thus enter this parameter as NULL.
#' @param TIMEPOINTS Implemented so this method can be used when analysing
#' multiple simulation timepoints. If only analysing one timepoint, this should
#' be set to NULL. If not, this should be an array of timepoints,
#' e.g. c(12,36,48,60)
#' @param TIMEPOINTSCALE Implemented so this method can be used when analysing
#'  multiple simulation timepoints. Sets the scale of the timepoints being
#'  analysed, e.g. "Hours"
#' @param check_done If using multiple timepoints, whether data entry has been
#' checked
#' @param write_csv_file Whether the analysis should be written to CSV file. Used
#' with spartanDB, where results are submitted to analysis database
#'
#' @export
lhc_generateLHCSummary <- function(FILEPATH, PARAMETERS, MEASURES,
                                   LHC_ALL_SIM_RESULTS_FILE,
                                   LHCSUMMARYFILENAME,
                                   SPARTAN_PARAMETER_FILE = NULL,
                                   TIMEPOINTS = NULL, TIMEPOINTSCALE = NULL,
                                   check_done=FALSE, write_csv_file=TRUE) {

  input_check <- list("arguments"=as.list(match.call()),"names"=names(match.call())[-1])
  # Run if all checks pass:
  if(check_input_args(input_check$names, input_check$arguments)) {

    if (is.null(TIMEPOINTS)) {

      message("Generating LHC summary file from median simulation results (lhc_generateLHCSummary)")

      # Read in LHC result file
      lhc_all_sim_results = read_from_csv(file.path(FILEPATH,LHC_ALL_SIM_RESULTS_FILE))

      # Stores parameters used and their median output responses, for all sets
      summary_table <- summarise_replicate_runs(lhc_all_sim_results, PARAMETERS, MEASURES)

      if(write_csv_file)
      {
        write_data_to_csv(summary_table, file.path(FILEPATH, LHCSUMMARYFILENAME))
        message(paste("LHC Summary file output to ", file.path(FILEPATH, LHCSUMMARYFILENAME), sep = ""))
      }
      return(summary_table)

    } else {
    # Process each timepoint
    lhc_generateLHCSummary_overTime(
      FILEPATH, PARAMETERS, MEASURES, LHC_ALL_SIM_RESULTS_FILE,
      LHCSUMMARYFILENAME, SPARTAN_PARAMETER_FILE = NULL, TIMEPOINTS,
      TIMEPOINTSCALE)
    }
  }
}

#' Pre-process analysis settings if multiple timepoints are being considered
#'
#' @inheritParams lhc_generateLHCSummary
lhc_generateLHCSummary_overTime <- function(
  FILEPATH, PARAMETERS, MEASURES, LHC_ALL_SIM_RESULTS_FILE, LHCSUMMARYFILENAME,
  SPARTAN_PARAMETER_FILE = NULL, TIMEPOINTS, TIMEPOINTSCALE) {

  for (n in 1:length(TIMEPOINTS)) {
    current_time <- TIMEPOINTS[n]
    message(paste("Processing Timepoint: ", current_time, sep = ""))

    lhc_allsim_results_full <- append_time_to_argument(
      LHC_ALL_SIM_RESULTS_FILE, current_time,
      check_file_extension(LHC_ALL_SIM_RESULTS_FILE))

    lhc_summaryfilename_full <- append_time_to_argument(
      LHCSUMMARYFILENAME, current_time,
      check_file_extension(LHCSUMMARYFILENAME))

    lhc_generateLHCSummary(
      FILEPATH, PARAMETERS, MEASURES, lhc_allsim_results_full,
      lhc_summaryfilename_full, SPARTAN_PARAMETER_FILE, NULL, NULL,
      check_done=TRUE)
  }
}

#' Summarises replicate runs of a parameter set. Used by LHC and eFAST
#'
#' @param lhc_all_sim_results All sim results for all parameter sets
#' @param PARAMETERS Array containing the names of the parameters of which
#' parameter samples will be generated
#' @param MEASURES Array containing the names of the output measures which are
#' used to analyse the simulation
#' @param bind_params Whether to include the parameter values in the set of results
#' (eFAST doesn't)
#' @return Summary of responses under each parameter set
summarise_replicate_runs <- function(lhc_all_sim_results, PARAMETERS, MEASURES, bind_params=TRUE)
{
  # Reads parameters from result file rather than the spartan file, incase
  # this is not available. Assumes ordered, in that when a different parameter
  # set is found, the assumption is made that all the simulations under those
  # conditions have been processed.
  string_last_params_seen <- ""
  summary_table <- NULL

  # Now process each row of the result file
  for (row in 1:nrow(lhc_all_sim_results)) {
    # Get the parameters from the result file
    sim_params <- lhc_all_sim_results[row, 1:length(PARAMETERS)]
    # Convert to string so comparison can be made:
    sim_params_string <- paste(sim_params, collapse = " ")

    # Process if a new parameter set
    if (sim_params_string != string_last_params_seen) {
      string_last_params_seen <- sim_params_string

      # Subset the results to just this set of parameters
      param_result <- subset_results_by_param_value_set(
        PARAMETERS, lhc_all_sim_results, sim_params)

      # Now calculate medians for each measure and bind to result set
      summary_table <- rbind(summary_table,
                             calculate_medians_for_all_measures(
                               sim_params, param_result, MEASURES, bind_params))
    }
  }
  # NOW ADD HEADERS TO THIS INFORMATION AND WRITE TO FILE
  if(bind_params)
    colnames(summary_table) <- c(PARAMETERS, MEASURES)

  return(summary_table)


}

#' Calculate medians for all measures for a simulation parameter result
#' @param sim_params Current parameter set
#' @param param_result Set of results under those conditions
#' @param measures Simulation output responses
#' @param bind_params Whether to bind the parameter values to the output
#' @return Summary statistics for this set of parameters (with parameter values)
calculate_medians_for_all_measures <- function(sim_params, param_result,
                                               measures, bind_params = TRUE) {
  if(bind_params)
    summary_sim_row <- sim_params
  else
    summary_sim_row <- NULL

  #print(paste0("Measures: ",measures))

  for (l in 1:length(measures)) {
    #print(param_result[[measures[l]]])
    summary_sim_row <- cbind(summary_sim_row,
                             median(param_result[[measures[l]]]))
  }
  return(summary_sim_row)
}


#' Generate Partial Rank Correlation Coefficients for parameter/response pairs
#'
#' For each parameter, and each simulation output measure, calculates the
#' Partial Rank Correlation Coefficient between the parameter value and the
#' simulation results, giving a statistical measurement of any effect that
#' is present.  This is output to a CSV file. Can be performed for a
#' number of timepoints if required.
#'
#' @inheritParams lhc_generateLHCSummary
#' @param CORCOEFFSOUTPUTFILE Name of the generated CSV file generated
#' @param cor_calc_method Way to calculate the correlation coefficient: Pearson's
#' ("p"), Spearman's ("s"), and Kendall's ("k"). Default is p
#' @param check_done If multiple timepoints, whether the input has been checked
#' @param write_csv_files Whether results should be output to CSV file. Used with spartanDB
#' @param lhc_summary_object If not specified in a CSV file, results can be specified in an
#' R object. In this case LHCSUMMARYFILENAME will be NULL
#' @return If no CSV file output, PRCC values returned as an R object
#' @export
#'
lhc_generatePRCoEffs <- function(
  FILEPATH, PARAMETERS, MEASURES, LHCSUMMARYFILENAME, CORCOEFFSOUTPUTFILE,
  TIMEPOINTS = NULL, TIMEPOINTSCALE = NULL, cor_calc_method=c("s"), check_done = FALSE,
  write_csv_files = TRUE, lhc_summary_object=NULL) {

  input_check <- list("arguments"=as.list(match.call()),"names"=names(match.call())[-1])

  # Run if all checks pass:
  if(check_input_args(input_check$names, input_check$arguments)) {

    if (is.null(TIMEPOINTS)) {

      if(!is.null(LHCSUMMARYFILENAME))
      {
        lhc_result_file <- read_from_csv(file.path(FILEPATH,LHCSUMMARYFILENAME))
      }
      else if(!is.null(lhc_summary_object))
      {
        lhc_result_file<-lhc_summary_object
      }

      message("Generating Partial Rank Correlation Coefficients (lhc_generatePRCoEffs)")

      COEFFRESULTS <- calculate_prccs_all_parameters(PARAMETERS, lhc_result_file,
                                                     MEASURES, cor_calc_method)

      if(write_csv_files)
      {
        write_data_to_csv(COEFFRESULTS,file.path(FILEPATH,CORCOEFFSOUTPUTFILE),row_names=TRUE)
        message(paste("File of PRCCs output to ", file.path(FILEPATH,CORCOEFFSOUTPUTFILE),
                      sep=""))
      }
      else
      {
        message("Calculated PRCCs returned as R Object")
        return(COEFFRESULTS)
      }

    } else {
      lhc_generatePRCoEffs_overTime(
        FILEPATH, PARAMETERS, MEASURES,LHCSUMMARYFILENAME, CORCOEFFSOUTPUTFILE,
        TIMEPOINTS, TIMEPOINTSCALE)
    }
  }
}

#' Generate Partial Rank Correlation Coefficients for parameter/response pairs for results in database
#'
#' For each parameter, and each simulation output measure, calculates the
#' Partial Rank Correlation Coefficient between the parameter value and the
#' simulation results, giving a statistical measurement of any effect that
#' is present. In this case, results are mined from a database, as created by
#' the spartanDB package, and the statistics returned for adding back to the DB.
#'
#' @param db_results Set of experiment results from the DB
#' @param parameters Simulation parameters
#' @param measures Simulation measures
#' @param cor_calc_method Way to calculate the correlation coefficient: Pearson's
#' ("p"), Spearman's ("s"), and Kendall's ("k"). Default is p
#' @export
#'
lhc_generatePRCoEffs_db_link <- function(
  db_results, parameters, measures, cor_calc_method=c("s")) {

  message("Generating Partial Rank Correlation Coefficients (lhc_generatePRCoEffs)")

  COEFFRESULTS <- calculate_prccs_all_parameters(parameters, db_results,
                                                 measures, cor_calc_method)

  return(COEFFRESULTS)

}

#' Pre-process analysis settings if multiple timepoints are being considered
#'
#' @inheritParams lhc_generatePRCoEffs
lhc_generatePRCoEffs_overTime <- function(FILEPATH, PARAMETERS, MEASURES,
                                 LHCSUMMARYFILENAME, CORCOEFFSOUTPUTFILE,
                                 TIMEPOINTS, TIMEPOINTSCALE) {

  # PROCESS EACH TIMEPOINT, BY AMENDING THE FILENAMES AND
  # RECALLING THIS FUNCTION
  for (n in 1:length(TIMEPOINTS)) {
    current_time <- TIMEPOINTS[n]
    message(paste("Processing Timepoint: ", current_time, sep = ""))

    lhcsummaryfilename_full <- append_time_to_argument(
      LHCSUMMARYFILENAME, current_time,
      check_file_extension(LHCSUMMARYFILENAME))

    corcoeffsfile_full <- append_time_to_argument(
      CORCOEFFSOUTPUTFILE, current_time,
      check_file_extension(CORCOEFFSOUTPUTFILE))

    lhc_generatePRCoEffs(FILEPATH, PARAMETERS, MEASURES,
                         lhcsummaryfilename_full, corcoeffsfile_full,
                         TIMEPOINTS = NULL, TIMEPOINTSCALE = NULL,
                         check_done = TRUE)
  }

}

#' Calculate PRCC values for all parameter-measure pairs
#' @param PARAMETERS Simulation parameters
#' @param LHCRESULTFILE Summary statistics for all LHC parameter sets
#' @param MEASURES Simulation output responses
#' @param cor_calc_method Way to calculate the correlation coefficient: Pearson's
#' ("p"), Spearman's ("s"), and Kendall's ("k"). Default is p
#' @return Correlation coefficients for all pairings
calculate_prccs_all_parameters <- function(PARAMETERS, LHCRESULTFILE, MEASURES,
                                           cor_calc_method=c("s"))
{

  COEFFRESULTS <- NULL
  # Now calculate coefficients for all parameters
  for (k in 1:length(PARAMETERS)) {

    # Get coefficient set
    COEFFDATA <- lhc_constructcoeff_dataset(LHCRESULTFILE, PARAMETERS[k],
                                            PARAMETERS)
    # Retrieve parameter result
    COEFFPARAMCOL <- as.numeric(LHCRESULTFILE[, PARAMETERS[k]])

    # Calculate coefficients
    COEFFRESULTS <- rbind(COEFFRESULTS, calculate_prcc_for_all_measures(
      MEASURES, COEFFPARAMCOL, COEFFDATA, LHCRESULTFILE, cor_calc_method))
  }

  colnames(COEFFRESULTS) <- generate_prcc_results_header(MEASURES)
  rownames(COEFFRESULTS) <- PARAMETERS

  return(COEFFRESULTS)
}

#' Generates the CSV file header for the prcc results file
#' @param measures The simulation output responses
#' @return Header object for CSV file
generate_prcc_results_header <- function(measures) {


  # NAME THE COLUMNS FOR EASE OF REFERENCE LATER
  COEFFRESULTSHEAD <- NULL
  for (l in 1:length(measures)) {
    COEFFRESULTSHEAD <- cbind(COEFFRESULTSHEAD,
                              (paste(measures[l], "_Estimate", sep = "")),
                              (paste(measures[l], "_PValue", sep = "")))
  }

  return(COEFFRESULTSHEAD)
}

#' For all measures, calculate the prcc for each parameter
#' @param MEASURES Simulation output responses
#' @param COEFFPARAMCOL Results for the current simulation parameter
#' @param COEFFDATA Coefficient data object being created
#' @param LHCRESULTFILE Complete simulation results for all parameter sets
#' @param cor_calc_method Way to calculate the correlation coefficient: Pearson's
#' ("p"), Spearman's ("s"), and Kendall's ("k"). Default is p
#' @param prcc_method Method to calculate the partial correlation coefficient, either
#' variance-covariance matrix ("mat") or recursive formula ("rec"). Default mat
#' @return Updated set of parameter correlation coefficient results
calculate_prcc_for_all_measures <- function(MEASURES, COEFFPARAMCOL, COEFFDATA,
                                            LHCRESULTFILE, cor_calc_method=c("s"),
                                            prcc_method="mat")
{
  PARAM_RESULTS <- NULL
  for (l in 1:length(MEASURES)) {
    #print(MEASURES[l])
    #print(LHCRESULTFILE[,MEASURES[l]])
    COEFFMEASURERESULT <- as.numeric(LHCRESULTFILE[, MEASURES[l]])
    PARAMCOEFF <- pcor.test(COEFFPARAMCOL, COEFFMEASURERESULT,
                          COEFFDATA, calc_method=cor_calc_method, use=prcc_method)
    #print(PARAMCOEFF)
    if(!is.null(PARAMCOEFF))
    {
      #print(PARAMCOEFF$estimate)
      #print(PARAMCOEFF$p.value)
      PARAM_RESULTS <- cbind(PARAM_RESULTS, PARAMCOEFF$estimate,
                            PARAMCOEFF$p.value)
      #print(PARAM_RESULTS)
    }
    else {
      message("Correlation Calculation method needs to be either s,p,or k, and prcc calculation method either rec or mat")
      return(NULL)
    }
  }
  return(PARAM_RESULTS)
}
kalden/spartan documentation built on May 31, 2019, 11:52 p.m.