templates/group2group_FDA.R

# group2group_FDA.R

# This script demonstrates group comparison functional data analysis using the
# bayesFDA package. As the simplest form of valuable FDA, this does NOT
# account for repeated measures, covariates, or other ANOVA components. However,
# it does handle an arbitrary number of groups and all desired  pairwise
# comparisons are handled efficiently via one model.

#-------------------------------------------------------------------------------
# Setup Information #
#-------------------------------------------------------------------------------

# Provide paths to data files.
# Data from each group need to be placed in different files. Each csv file is
# organized such that a column is an observed curve. Prior time normalization is
# currently required, thus each csv must have the same number of rows. Missing
# values or NA's are only allowed if the entire column is missing, other than
# header information like subject label or treatment. Add as many as desired,
# note that order will be used later. The names will be used for output labels.

data_paths <- c(
  groupA = "path/to/group1.csv",
  groupB = "path/to/group2.csv",
  groupR = "another/path/to/data3.csv"
)

# Provide the number of header rows. This must be common for all files.

num_header_rows = 3

# Provide desired comparisons as a matrix of couplets. Order matters as the
# second value will be subtracted from the first. Provide numbers corresponding
# to the ordering of the above paths. For example, to compare group R minus
# group A in the example above, you would enter 3,1 as a couplet line. The names
# will be used for output labels.

comparison_couplets <- matrix(c(
  2,1,
  3,1
), ncol = 2, byrow = TRUE)
rownames(comparison_couplets) <- c("Comp_BminusA", "Comp_RminusA")

# Provide a path to a folder where output will be saved. Optionally, provide a
# naming scheme (do not include file extensions). Output contains so-called
# vector triplets, where, for any given curve of interest, a lower bound, mean,
# and upper bound are given. Output generated by this script organizes triplets
# into two files: a csv for group averages and a csv of requested comparisons.


output_path = "path/to/folder"
output_prefix = "output"

# The final user inputs are model fitting parameters. The defaults are generally
# safe assuming the data are smooth and there are at least 2*num_spline_basis
# points per curve.

num_spline_basis = 25
num_mcmc_iterations = 10000
num_mcmc_burnin = round(num_mcmc_iterations*0.1)

#-------------------------------------------------------------------------------
# Package Installation Comments #
#-------------------------------------------------------------------------------

# This script depends on the bayesFDA package, which is available at
# github.com/wzhorton/bayesFDA. Installation needs only to happen once. The
# following methods may be successful depending on your system.
# Method 1: > devtools::install_github("wzhorton/bayesFDA")
# Method 2: go to github.com/wzhorton/bayesFDA/releases/latest and download the binary
#   corresponding to your system. Install with > install.packages("path/to/binary", repos = NULL)
# Method 3: Same as method 2 except download source files and add type = "source" to install code.

#-------------------------------------------------------------------------------
# Main Script (Modify at your own risk) #
#-------------------------------------------------------------------------------

library(bayesFDA)

#----------
# Read data
#----------
data_list <- lapply(data_paths, function(dp){
  tmp <- read.csv(dp, header = FALSE, skip = num_header_rows)
  tmp <- tmp[,apply(tmp, 2, function(cc) !any(is.na(cc)))]
})

#----------
# Fit model
#----------
model_fit <- multi_group_fda(data_list, num_spline_basis, num_mcmc_iterations, num_mcmc_burnin)

#---------------
# Process output
#---------------

# Constants
ngrp <- length(data_list)
ncomp <- nrow(comparison_couplets)
npts <- nrow(data_list[[1]])

basis <- splines::bs(seq(0,1,len=npts), df = num_spline_basis, intercept = TRUE)
n_vec <- sapply(data_list, ncol)
sd_curves <- sapply(data_list, function(crvs){
  apply(crvs, 1, sd)
})

# Group triplets
grp_fit_array <- apply(model_fit$beta_grp, c(1,2), function(x){
  c(lower = quantile(x, 0.025), est = mean(x), upper = quantile(x, 0.975))
})
grp_fit_list <- lapply(1:dim(grp_fit_array)[3], function(x) basis%*%t(grp_fit_array[,,x]))

# Comparison triplets
comp_fit_list <- lapply(1:ncomp, function(compi){
  coup <- comparison_couplets[compi,]
  diff_data <- model_fit$beta_grp[,coup[1],] - model_fit$beta_grp[,coup[2],]
  diff_fit_mat <- apply(diff_data, 1, function(x){
    c(lower = quantile(x, 0.025), diff = mean(x), upper = quantile(x, 0.975))
  })
  return(basis%*%t(diff_fit_mat))
})

# Areas of significance
grp_area_list <- lapply(grp_fit_list, function(trip){
  area_mat <- extract_sig_area(trip)
  return(area_mat)
})

comp_area_list <- lapply(1:ncomp, function(trip_ind){
  trip <- comp_fit_list[[trip_ind]]
  ns <- n_vec[comparison_couplets[trip_ind,]]
  area_mat <- extract_sig_area(trip)
  if(all(is.na(area_mat))){
    es_mat <- NA
  } else {
    es_mat <- t(apply(area_mat, 1, function(area_vec){
      max_loc <- area_vec[3]
      cohend(diff = trip[,2][max_loc], ns, sd_curves[max_loc, comparison_couplets[trip_ind,]])
    }))
  }
  return(cbind(area_mat, es_mat))
})

#-------------------------------------------------------------------------------
# Write Output (Modify at your own risk) #
#-------------------------------------------------------------------------------

setwd(output_path)

for(i in 1:ngrp){
  write.csv(grp_fit_list[[i]], paste0(output_prefix,"_", names(data_paths)[i],"_triplets.csv"),
            row.names = FALSE)
  write.csv(grp_area_list[[i]], paste0(output_prefix,"_", names(data_paths)[i],"_areas.csv"),
            row.names = FALSE)
}

for(i in 1:ncomp){
  write.csv(comp_fit_list[[i]], paste0(output_prefix,"_", rownames(comparison_couplets)[i],"_triplets.csv"),
            row.names = FALSE)
  write.csv(comp_area_list[[i]], paste0(output_prefix,"_", rownames(comparison_couplets)[i],"_areas.csv"),
            row.names = FALSE)
}
wzhorton/bayesFDA documentation built on Dec. 23, 2021, 6:15 p.m.