R/gt3x_2_csv.R

Defines functions gt3x_2_csv save_accel header_csv save_header read_info

Documented in gt3x_2_csv header_csv read_info save_accel save_header

#' @title Read metadata from a GT3X file
#' 
#' @description Read the metadata from a GT3X file as a data.frame.
#' @details Given a complete path to a file with extension ".gt3x" (exported from Actilife's software), import it's internal "info.txt" file, extract it's metadata and format it into an R data.frame.
#' @param origin path to a GT3X file from which to read the metadata
#' @param verbose logical: wether to show detailed log messages or not (default: FALSE)
#' @importFrom tidyr separate spread
#' @importFrom dplyr rename_all mutate_at vars
#' @importFrom magrittr divide_by %>%
#' @export
read_info <- function(origin, verbose = FALSE) {
  # Connection to the info.txt file:
  info <- unz(origin, filename = "info.txt")

  # Close the connection after quitting this function:
  on.exit(close(info))
    
  # Read in the file, format it and return it:
  data.frame(key = info %>% 
                     readLines) %>%
    # Breaking the lines into variables:
    separate(key, c("key", "value"), ": ") %>%
    # Changing to wide format:
    spread(key, value) %>%
    # Renaming all columns:
    rename_all(make.names) %>%
    # Formatting dates properly:
    mutate_at(vars(Download.Date,
                   Last.Sample.Time,
                   Start.Date,
                   Stop.Date),
              . %>%
                as.numeric %>%
                divide_by(1e7) %>%
                as.POSIXct(origin = "0001-01-01", tz = "UTC"))
}

#' @title Export GT3X metadata to CSV 
#' 
#' @description Formats the data.frame obtained from the read_info function into Actilife's header format, then exports it to a CSV file.
#'
#' @details Saves the header extracted from the .gt3x file with the read_info function in the .csv extension (look at read_info function)
#' @param df_file data.frame with metadata generated by the read_info function
#' @param outfile_csv  complete path to the output CSV file the header will be written to
#' @param verbose logical: wether to show detailed log messages or not (default: FALSE)
#' @importFrom hms as_hms
#' @importFrom stringr str_sub
#' @importFrom magrittr %>%
#' @importFrom logger log_trace
#' @export
save_header <- function(df_file, outfile_csv, verbose = FALSE){
  # Formatting metadata to Actilife's header format:
  if(verbose){
    log_trace("Formatting metadata to Actilife's header format.")
  }
  
  header_txt <- paste0("------------ Data File Created By ActiGraph GT3X+ ActiLife v6.13.4 Firmware v1.9.2 date format dd/MM/yyyy at 30 Hz  Filter Normal -----------\n",
                       "Serial Number: ", df_file$Serial.Number, "\n",
                       "Start Time ", df_file$Start.Date %>%
                                        str_sub(12, 20) %>%
                                        as_hms, "\n",
                       "Start Date ", format(df_file$Start.Date, "%d/%m/%Y"), "\n",
                       "Epoch Period (hh:mm:ss) 00:00:00\n",
                       "Download Time ", df_file$Download.Date %>%
                                           str_sub(12, 20) %>%
                                           as_hms, "\n",
                       "Download Date ", format(df_file$Download.Date, "%d/%m/%Y"), "\n",
                       "Current Memory Address: 0\n",
                       "Current Battery Voltage: ", sub(",", ".", df_file$Battery.Voltage),"     Mode = 12\n",
                       "--------------------------------------------------\n")
  
  # Writing the .csv document with the header
  if(verbose){
    log_trace("Exporting header information to '", outfile_csv, "'.")
  }
  
  cat(header_txt,
      file = outfile_csv)
}

#' @title GT3X header to Actilife's format
#' 
#' @description Reads metadata from ``.gt3x`` files, formats it to Actilife's standard format (for compatibility with GGIR) and exports it to a CSV file.
#' 
#' @details Reads the metadata from a GT3X file's internal "info.txt" file using the read_info function, then exports it as the header to a CSV file using the save_header function.
#' @param origin path to a GT3X file to convert
#' @param outdir path for outputting the CSV formatted file
#' @param verbose logical: wether to show detailed log messages or not (default: FALSE)
#' @importFrom stringr str_sub str_trim
#' @importFrom magrittr %>%
#' @importFrom logger log_trace log_info log_success
#' @export
header_csv <- function(origin, outdir, verbose = FALSE){
  # Input file:
  file <- basename(origin) %>%
            str_sub(1, -6)
  
  # Output directory:
  outdir_csv <- paste0(str_trim(outdir), "/csv")
  
  # Output file:
  outfile_csv <- paste0(outdir_csv, "/", file, "RAW.csv")
  
  # Create output directory if it doesn't already exist:
  if(!dir.exists(outdir_csv)){
    dir.create(outdir_csv)
    log_info("Output directory '", outdir_csv, "' created.")
  }else if(file.exists(outfile_csv)){
    log_info("File '", outdir_csv, "/", outfile_csv, "' already exists. Specify 'overwrite = FALSE' if you wish to skip such cases.")
  }
  
  # Reading info.txt file and formatting it as a dataframe:
  if(verbose){
    log_trace("Reading 'info.txt' file from '", origin, "'.")
  }
  
  info_filedf <- read_info(origin, verbose = verbose)
  
  # Trace message:
  if(verbose){
    log_success("Read 'info.txt' file, exporting it in CSV format.")
  }
  
  # Exporting header information as a CSV file:
  save_header(df_file = info_filedf, outfile_csv = outfile_csv, verbose = verbose)
  
  if(verbose){
    log_success("Exported header information to '", outfile_csv, "'.")
  }
}

#' @title Export GT3X contents
#' 
#' @description  Converts acceleration data from a ".gt3x" file to a data.frame and exports it to CSV.
#' 
#' @details This function reads the binary data inside the ".gt3x" file's internal "log.bin" file and saves it in CSV format.
#' @param acc.file path to a GT3X file to convert
#' @param outdir path for outputting the CSV formatted file (default: NULL). There will be another directory created inside this one, called "/csv".
#' @param verbose logical: wether to show detailed log messages or not (default: FALSE)
#' @importFrom read.gt3x read.gt3x
#' @importFrom stringr str_sub
#' @importFrom vroom vroom_write
#' @importFrom magrittr %>%
#' @importFrom logger log_trace log_success
#' @importFrom dplyr transmute
#' @importFrom data.table as.data.table
#' @export
save_accel <- function(acc.file, outdir = NULL, verbose = FALSE){
  # Initializing runtime counter:
  t_accel <- Sys.time()
  
  # Trace message:
  if(verbose){
    log_trace("Reading accelerometer information from '", acc.file, "'.")
  }
  
  # Filename:
  file_id <- basename(acc.file) %>% str_sub(1, -6)

  # Output CSV file:
  csv_file <- paste0(ifelse(is.null(outdir),
                            dirname(acc.file),
                            outdir),
                     "/csv/", file_id, "RAW.csv")
  
  # Reading accelerometer data and processing variables:
  accel_df <- read.gt3x(acc.file,
                        imputeZeroes = TRUE) %>%
                # Here, we drop the 'timestamp' column:
                as.data.table(.[, -4]) %>%
                transmute(`Accelerometer X` = Y %>% as.character,
                          `Accelerometer Y` = X %>% as.character,
                          `Accelerometer Z` = Z %>% as.character)

  # Writing acceleration data in csv:
  if(verbose){
    log_success("Read accelerometer data from '", file_id, ".gt3x:log.bin'. Took ", 
                (Sys.time() - t_accel) %>% as.numeric %>% round(2),
                " seconds.")
    log_trace("Appending accelerometer information to '", csv_file, "'.")
  }

  vroom_write(x = accel_df,
              path = csv_file, 
              append = TRUE,
              delim = ",",
              col_names = TRUE)
  
  if(verbose){
    log_success("Exported all information from '", file_id, ".gt3x' successfully. Took ",
                (Sys.time() - t_accel) %>% as.numeric %>% round(2),
                " seconds.")
  }
}

#' @title Export GT3X as CSV
#' 
#' @description Convert GT3X files to CSV format in Actilife's default export format.
#' 
#' @details Reads the "info.txt" and "log.bin" internal files from ".gt3x" files exported by Actilife's software and converts it to a CSV file in the same format of the CSV file extracted directly from the software.
#' @param gt3x_files complete path to a GT3X file to convert. Can also be a vector of complete filenames or a directory with ".gt3x" files in it.
#' @param outdir path to a directory for outputting the CSV formatted file (default: NULL). There will be another directory created inside this one, called "/csv".
#' @param progress logical: wether or not to show TCLTK progress bar (default: FALSE)
#' @param parallel logical: wether or not to use parallel processing (default: FALSE)
#' @param cores integer: number of cores to use for parallelization. If parallel = FALSE, this argument will be ignored.
#' @param logfile path to use for outputting logfile. If FALSE, even when verbose = TRUE only print basic info messages and directly to R log. Default (NULL) outputs to R's log when parallel = FALSE and to the same directory as the CSV output when parallel = TRUE.
#' @param verbose logical: wether or not to show detailed log messages (default: FALSE)
#' @export
#' @importFrom magrittr %>%
#' @importFrom utils unzip
#' @importFrom gdata humanReadable
#' @importFrom stringr str_sub str_detect
#' @importFrom logger log_layout layout_glue_colors log_threshold TRACE log_error log_trace log_info
#' @importFrom doSNOW registerDoSNOW
#' @importFrom snow makeSOCKcluster
#' @importFrom foreach foreach %dopar% registerDoSEQ
#' @importFrom tcltk tkProgressBar setTkProgressBar
#' @importFrom parallel stopCluster detectCores
gt3x_2_csv <- function(gt3x_files = NULL, outdir = NULL, progress = FALSE,
                       parallel = FALSE, cores = detectCores(), logfile = NULL,
                       verbose = FALSE){
  # Configuring logger:
  log_layout(layout_glue_colors)
  log_threshold(TRACE)
  
  # Setting up error messages:
  #   - Made it so that all errors found are printed, to ease debugging
  quit <- FALSE
  
  #   - If there's no specified .gt3x file/folder:
  if(is.null(gt3x_files)){
    quit <- TRUE
    log_error("Please specify the complete path to a GT3X file, a vector of complete paths to GT3X files or a folder containing GT3X files with the parameter 'gt3x_files' in the function call.")
  #   - If there is a specified .gt3x file/folder:
  } else{
    #   - If any filepaths have a "/" at the end, strip it out:
    gt3x_files <- gt3x_files %>%
                    sapply(function(x){
                      x %>%
                        str_detect('/$') %>%
                        ifelse(str_sub(x, 1, -2), x)
                    })
    
    #   - If 'gt3x_files' is a 1L vector:
    if(length(gt3x_files) == 1){
      #   - If the specified .gt3x file/folder doesn't exist:
      if(!file.exists(gt3x_files)){
        #   - If even pasting the extension on it doesn't work, quit with an error:
        if(!file.exists(paste0(gt3x_files, ".gt3x"))){
          quit <- TRUE
          log_error("Path '", gt3x_files, "' doesn't seem to be an existing file or directory.")
        #   - Else, append the extension and keep going:
        } else{
          if(verbose){
            log_trace("File '", gt3x_files, "' not found. Using '", paste0(gt3x_files, ".gt3x"), "' as a filename.")
          }
          gt3x_files <- paste0(gt3x_files, ".gt3x")
        }
      #   - If it exists and is a folder:
      } else if(!str_detect(gt3x_files, "\\..*$")){
        #   - If there are no GT3X files in it, quit:
        if(gt3x_files %>% 
             list.files(pattern = "\\.gt3x$",
                        ignore.case = TRUE) %>%
             length < 1){
          quit <- TRUE
          log_error("No GT3X files found at '", gt3x_files, "'.")
        #   - If there are GT3X files in it, list them:
        } else {
          gt3x_files <- gt3x_files %>%
                          list.files(pattern = "\\.gt3x$",
                                     full.names = TRUE,
                                     ignore.case = TRUE)
        }
      }
      
      # Checking for expected content in the files:
      if(!quit){
        ctErrorCounter <- 0
        contents_error <- character()
        
        for(itChecks in 1:length(gt3x_files)){
          if(!"info.txt" %in% unzip(gt3x_files[itChecks], list = TRUE)$Name |
             !"log.bin"  %in% unzip(gt3x_files[itChecks], list = TRUE)$Name){
            ctErrorCounter <- ctErrorCounter + 1
            quit <- TRUE
          }
        }
        
        if(length(contents_error) > 1){
          log_error("These files don't have the expected GT3X structure: '",
                    paste(contents_error, collapse = "', '"),
                    "'.")
        } else if(length(contents_error > 0)){
          log_error("File '", contents_error, "' doesn't have the expected GT3X structure.")
        }
      }
    #   - If 'gt3x_files' is a vector of filepaths:
    } else if(length(gt3x_files) > 1){
      #   - Checking if each of them exists:
      checks <- gt3x_files %>% file.exists
      
      # Creating auxiliary variables:
      flErrorCounter <- 0
      ctErrorCounter <- 0
      files_error <- character()
      contents_error <- character()
      
      for(itChecks in 1:length(checks)){
        #   - If any one of the files doesnt't exist, quit the function:
        if(!checks[itChecks]){
          flErrorCounter <- flErrorCounter + 1
          quit <- TRUE
          files_error[flErrorCounter] <- gt3x_files[itChecks]
        #   - If any file exists but doesn't have the expected content:
        } else if(!"info.txt" %in% unzip(gt3x_files[itChecks], list = TRUE)$Name |
                  !"log.bin"  %in% unzip(gt3x_files[itChecks], list = TRUE)$Name) {
          ctErrorCounter <- ctErrorCounter + 1
          quit <- TRUE
          contents_error[ctErrorCounter] <- gt3x_files[itChecks]
        }
      }
      
      if(length(files_error) > 1){
        log_error("Couldn't find these files: '", 
                  paste(files_error, collapse = "', '"),
                  "'.")
      } else if(length(files_error) > 0){
        log_error("File '", files_error, "' couldn't be found.")
      }
      
      if(length(contents_error > 1)){
        log_error("These files don't have the expected GT3X structure: '",
                  paste(contents_error, collapse = "', '"),
                  "'.")
      } else{
        log_error("File '", contents_error, "' doesn't have the expected GT3X structure.")
      }
    }
  }
  
  #   - If outdir is specified:
  if(!is.null(outdir)){
    # Checking if outdir exists:
    if(!dir.exists(outdir)){
      if(verbose){
        log_trace("Directory '", outdir, "' doesn't exist. Checking if it can be created.")
      }
      
      # If it's an actual directory, create it:
      if(!str_detect(outdir, "\\..*$")){
        dir.create(outdir)
        if(verbose){
          log_success("Directory '", outdir, "' created.")
        }
        
      # If it's a filename, return an error:
      } else{
        quit <- TRUE
        log_error("Output directory '", outdir, "' seems to be a filename.")
      }
    }
  #   - If there's no outdir specified, warn the user of the outdir used:
  } else {
    if(length(gt3x_files) == 1){
      log_info("Outputting to '", dirname(gt3x_files), "'. Specify a directory in the 'outdir' parameter if you wish to change this.")
    } else{
      log_info("Outputting each processed file to it's parent '.gt3x' directory.")
    }
  }
  
  # If there are no errors:
  if(!quit){
    # Initializing runtime counter (with trace message):
    if(verbose){
      log_trace(paste0("Initializing runtime counter."))
    }
    
    startTime <- Sys.time()

    # Creating cluster with number of cores to use:
    if(parallel){
      cluster <- makeSOCKcluster(cores)
      registerDoSNOW(cluster)
      
      if(verbose){
        log_success("Cluster allocated for parallelization. Using ", cores, " cores.")
      }
    } else {
      registerDoSEQ()
      
      if(verbose){
        log_trace("Processing files sequentially.")
      }
    }
    
    # If user asked for progress bar:
    if(progress){
      bar <- tkProgressBar(title = "Converting GT3X files to CSV. Progress:",
                           min = 0,
                           max = length(gt3x_files),
                           width = 500) 
      
      updateProgress <- function(n){
        setTkProgressBar(bar,
                         n,
                         label = paste0(round(n/length(gt3x_files)*100, 0), "% done"))
      }
      
      opts <- list(progress = updateProgress)
    } else{
      opts <- list()
    }
    
    # Looping through file list with %dopar%:
    foreach(
      it_file = 1:length(gt3x_files),
      .export = c("save_accel", "read_info", "header_csv", "save_header"),
      .inorder = FALSE,
      .options.snow = opts
    ) %dopar% {
      # Initialize runtime counter for this particular file:
      fileStartTime <- Sys.time()
      
      # Get this iteration's filepath:
      gt3x_file <- gt3x_files[it_file]
      
      # Auxiliary variables;
      #   - Output directory:
      ifelse(is.null(outdir),
             assign("outdir", dirname(gt3x_file)),
             assign("outdir", outdir)) %>%
        # Don't print ifelse() results to log:
        invisible
      
      #   - Output file:
      assign("outfile",
             basename(gt3x_file) %>%
               # Trimming file extension out:
               str_sub(1, -6))
      
      # Logfile:
      if(is.null(logfile) & parallel){
        sink(paste0(outdir, "/", outfile, ".txt"))
      } else if(logfile != FALSE){
        sink(logfile)
      }
      
      # Trace message:
      if(verbose){
        log_trace(paste0("Started processing file '", gt3x_file, "'."))
        log_trace(paste0("Using '", outdir, "' as the output directory and '", outfile, ".gt3x' as the output filename."))
        log_trace(paste0("Attempting to extract contents from '.gt3x' file."))
      }
      
      # Extracting activity header from the 'info.txt' file and exporting content from 
      # the 'log.bin' file as CSV:
      header_csv(gt3x_file, outdir, verbose = verbose)
      save_accel(gt3x_file, outdir, verbose = verbose)
  
      # Trace message with elapsed time:
      log_trace(paste0("File '",
                       outfile,
                       ".gt3x' processed. Took ",
                       Sys.time() %>%
                         difftime(fileStartTime, units = "secs") %>%
                         as.numeric %>%
                         round(2), 
                       " seconds. Approximate file size: ", 
                       file.info(paste0(outdir, "/csv/", outfile, "RAW.csv"))$size %>%
                         humanReadable(standard = "SI")))
      
      # Log back to R:
      sink()
    }
    
    # Stopping cluster:
    if(parallel){
      stopCluster(cluster)
    }
    
    # Closing progress bar:
    if(progress){
      close(bar)
    }
    
    # Trace message with total elapsed time:
    if(verbose & length(gt3x_files > 1)){
      log_trace(paste0("Total elapsed time: ", 
                       Sys.time() %>%
                         difftime(startTime, units = "secs") %>%
                         as.numeric %>%
                         round(2),
                       " seconds."))
    }
  }
}
danilodpsantos/gt3x2csv documentation built on March 13, 2021, 3:20 a.m.