SlicerMorphR: Import SlicerMorph data into R

Documented in read.malpaca.estimates

#' Read MALPACA estimates
#'
#' This function returns store MALPACA median estimates and corresponding manual landmarks in 3D arrays.
#' It also returns a 4D array that contains all landmark estimates of individual templates
#'
#' @param MALPACA_outputDir The output directory of MALPACA
#' @param template_number The number of templates
#' @return A list that contains median estimates, manual landmarks and estimates of individual templates \cr
#' The returned value contains: \cr
#' \itemize{
#'   \item $MALPACA_medians = a 3d array for the median estimates of MALPACA
#'   \item $allEstimates = A 4d array for all estimates of each template. The 1st dimension is the landmark number. The 2nd dimension specifies x, y, z coordinates. The 3rd dimension is the template number. The 4th dimension is the sample size.
#' }
#'
#' @examples
#' #The example below shows how this function reads MALPACA output.
#' #The output estimates are then submitted to the 'extract.outliers' for removing outlier estimates.
#' #Finally, 'get.medians'is used to calculate a new set of median estimates based on the result of 'extract.outliers'
#' #Please see the help file of 'extract.outliers' and 'get.medians' in SlicerMorphR for more detail.
#'
#' #Clone the Github repository that contains the sample ape MALPACA output and template landmark files using the url 'https://github.com/SlicerMorph/Mouse_Models.git'
#' #The ape MALPACA output is in the 'ape_MALPACA' folder. See 'https://github.com/SlicerMorph/Mouse_Models' for details
#'
#' # Users can also download and unzip the file in the Github repository using the following R code:
#' save.dir='C:/tmp'  #enter your directory to download the MALPACA data sample
#' setwd(save.dir)
#' zip_url <- "https://github.com/SlicerMorph/Mouse_Models/archive/refs/heads/main.zip"
#' download.file(url = zip_url, destfile = "Mouse_Models.zip")
#' unzip(zipfile = "Mouse_Models.zip")
#'
#' MAL_outDir_git<- paste(save.dir, 'Mouse_Models-main/ape_malpaca', sep="/")
#' #If cloned the repository, MAL_outDir_git <- 'your_directory_for_the_Cloned_folder/ape_malpaca'
#'
#' templates_path_git<- paste(save.dir, 'Mouse_Models-main/ape_malpaca/templates_LMs', sep='/')
#' #If cloned the repository, templates_path_git <- 'your_directory_for_the_Cloned_folder/ape_malpaca/templates_LMs'
#'
#' #Read MALPACA median estimates and estimates by individual templates
#' LMs <- read.malpaca.estimates(MALPACA_outputDir = MAL_outDir_git, templates_Dir = templates_path_git)
#'
#' allEst <- LMs$allEstimates #Store all individual estimates in a 4D array
#' #For the ape data
#' allEst[1:2, , 1, 3] #The LM1 and LM2 estimated by template 1 for target specimen 3
#'           [,1]    [,2]    [,3]
#' [1,] -15.37080 428.172 159.411
#' [2,]  -1.88649 365.697 180.744
#'
#' MAL_medians <- LMs$MALPACA_medians #Store all median estimates in a 3D array
#' MAL_medians[1:2, , 3] #The median estimates for LM 1 and LM3 of the target specimen 3
#'           [,1]     [,2]     [,3]
#'[1,] -7.218665 417.9610 158.4400
#'[2,] -1.850612 366.3141 180.6046
#'
#'
#' #########Run 'extract.outliers'
#' outPath <- the_output_path_for_boxplots_that_mark_outlier_estimates
#' outliers <- extract.outliers(allEstimates = allEst, MALPACA_medians = MAL_medians, outputPath = outPath)
#'
#' outliers$estimates_no_out[18:23, , 2, 7]
#' #Show LM18 to LM23 estimated by template 2 for the 7th target specimen.
#' #Note the coordinates of LM21 are marked as NA because it is an outlier
#'             x        y        z
#' 18   3.123262 441.5676 226.7014
#' 19   1.254022 417.1126 206.4035
#' 20  -2.445040 385.6000 193.9190
#' 21         NA       NA       NA
#' 22 -17.194445 457.1618 238.1673
#' 23  28.504499 455.5570 234.8030
#'
#' outliers$outlier_info[[7]]
#' #Print out the outlier LMs generated by each template for the 7th target specimen. "NULL" means no outlier generated.
#' $`USNM084655-Cranium_merged_1`
#' NULL
#'
#' $`USNM142185-Cranium`
#' [1] 21 39
#'
#' $`USNM153830-Cranium`
#' [1] 21
#'
#' $`USNM176236-Cranium_merged_1`
#' NULL
#'
#' $USNM590953_CRANIUM
#' [1] 19 36 38
#' $USNM599167_CRANIUM
#' [1] 19 25 36 37
#'
#'
#' #######Run 'get.medians'
#' medians <- get.medians(AllLMs = outliers$estimates_no_out, outlier.NA = TRUE)
#' #Because therer are NAs in outliers$estimates_no_out, assign "TRUE" to "outlier.NA"
#' medians[1:2, , 3]
#' [,1]     [,2]     [,3]
#' [1,] -7.218665 416.3260 158.4400
#' [2,] -1.850612 366.3141 180.6046
#' @export

read.malpaca.estimates <- function(MALPACA_outputDir, templates_Dir){
  #Read MALPACA median estimates and manual LMs
  medians_dir <- paste(MALPACA_outputDir, "medianEstimates", sep = "/")
  medianFiles <- list.files(medians_dir)
  
  #Fetch file type
  words <- strsplit(medianFiles[1], split = ".", fixed = TRUE)
  dataType <- words[[1]][length(words[[1]])]
  
  target_names <- NULL
  for (file in medianFiles){
    name <- strsplit(file, split = ".", fixed = TRUE)
    name <- name[[1]][1]
    name <- strsplit(name, split = "_", fixed = TRUE)
    name <- name[[1]][-length(name[[1]])] #Remove "_median" from the file name
    name <- paste(name, collapse = "_")
    target_names <- append(target_names, name)
  }
  
  template_files <- list.files(templates_Dir)
  template_number <- length(template_files)
  template_fileName <- vector()
  for (file in template_files){
    name <- strsplit(file, split = ".", fixed = TRUE)
    template_fileName <- append(template_fileName, name[[1]][1])
  }
  
  #Fetch sample size and landmark numbers
  if (dataType == 'fcsv'){
    temp <- read.markups.fcsv(file = paste(medians_dir, medianFiles[1], sep = "/"), forceLPS = TRUE)
  } else if (dataType == 'json'){
    temp <- read.markups.json(file = paste(medians_dir, medianFiles[1], sep = "/"))
  } else {
    print("The landmark files are neither mrk.json nor fcsv. Please check the proper data types")
  }
  LM_number <- dim(temp)[1]
  sample_size <- length(medianFiles)
  
  #Read MALPACA median estimates and cooresponding manual landmark files for the targets
  MALPACA_medians <- array(dim = c(LM_number, 3, sample_size))
  for (i in 1: sample_size){
    if (dataType == 'fcsv'){
      MALPACA_medians[, , i] <- read.markups.fcsv(file = paste(medians_dir, medianFiles[i], sep = "/"), forceLPS = TRUE)
    } else if (dataType == 'json'){
      MALPACA_medians[, , i] <- read.markups.json(file = paste(medians_dir, medianFiles[i], sep = "/"))
    } else {
      print("The landmark files are neither mrk.json nor fcsv. Please check the proper data types")
    }
  }
  dimnames(MALPACA_medians)[[1]] <- c(1:LM_number)
  dimnames(MALPACA_medians)[[2]] <- c("x", "y", "z")
  dimnames(MALPACA_medians)[[3]] <- target_names
  
  
  #Read individual Estimates
  allEstimates_dir <- paste(MALPACA_outputDir, "individualEstimates", sep = "/")
  files_all <- list.files(allEstimates_dir)
  allEstimates <- array(dim = c(LM_number, 3, template_number, sample_size))
  file_counter = 1
  
  for (i in 1:sample_size){
    for (j in 1:template_number){
      if (dataType == 'fcsv'){
        allEstimates[, , j, i] <- read.markups.fcsv(file = paste(allEstimates_dir, files_all[file_counter], sep = "/"), forceLPS = TRUE)
      } else if (dataType == 'json'){
        allEstimates[, , j, i] <- read.markups.json(file = paste(allEstimates_dir, files_all[file_counter], sep = "/"))
      } else {
        print("The landmark files are neither mrk.json nor fcsv. Please check the proper data types")
      }
      file_counter = file_counter + 1
    }
  }
  
  dimnames(allEstimates)[[1]] <- c(1:LM_number)
  dimnames(allEstimates)[[2]] <- c("x", "y", "z")
  dimnames(allEstimates)[[3]] <- template_fileName
  dimnames(allEstimates)[[4]] <- target_names
  
  results <- list()
  results$MALPACA_medians <- MALPACA_medians
  results$allEstimates <- allEstimates
  
  return (results)
  
}