IFC: Tools for Imaging Flow Cytometry

Documented in data_to_DAF

################################################################################
# This file is released under the GNU General Public License, Version 3, GPL-3 #
# Copyright (C) 2020 Yohann Demont                                             #
#                                                                              #
# It is part of IFC package, please cite:                                      #
# -IFC: An R Package for Imaging Flow Cytometry                                #
# -YEAR: 2020                                                                  #
# -COPYRIGHT HOLDERS: Yohann Demont, Gautier Stoll, Guido Kroemer,             #
#                     Jean-Pierre Marolleau, Loïc Garçon,                      #
#                     INSERM, UPD, CHU Amiens                                  #
#                                                                              #
# DISCLAIMER:                                                                  #
# -You are using this package on your own risk!                                #
# -We do not guarantee privacy nor confidentiality.                            #
# -This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or        #
# FITNESS FOR A PARTICULAR PURPOSE. In no event shall the copyright holders or #
# contributors be liable for any direct, indirect, incidental, special,        #
# exemplary, or consequential damages (including, but not limited to,          #
# procurement of substitute goods or services; loss of use, data, or profits;  #
# or business interruption) however caused and on any theory of liability,     #
# whether in contract, strict liability, or tort (including negligence or      #
# otherwise) arising in any way out of the use of this software, even if       #
# advised of the possibility of such damage.                                   #
#                                                                              #
# You should have received a copy of the GNU General Public License            #
# along with IFC. If not, see <http://www.gnu.org/licenses/>.                  #
################################################################################

#' @title DAF File Writer
#' @description
#' Writes an `IFC_data` object to a daf file
#' @param obj an `IFC_data` object extracted with features extracted.
#' @param write_to pattern used to export file.
#' Placeholders, like "\%d/\%s_fromR.\%e", will be substituted:\cr
#' -\%d: with full path directory of 'obj$fileName'\cr
#' -\%p: with first parent directory of 'obj$fileName'\cr
#' -\%e: with extension of 'obj$fileName' (without leading .)\cr
#' -\%s: with shortname from 'obj$fileName' (i.e. basename without extension).\cr
#' Exported file extension will be deduced from this pattern. Note that it has to be a .daf.
#' @param viewing_pop Character String. Allow user to change displayed population. Default is 'All'.
#' @param overwrite whether to overwrite file or not. Default is FALSE.
#' Note that if TRUE, it will overwrite exported file if path of 'obj$fileName' and deduced from 'write_to' arguments are different.
#' Otherwise, you will get an error saying that overwriting source file is not allowed.\cr
#' Note also that an original file, i.e. generated by IDEAS(R) or INSPIRE(R), will never be overwritten.
#' Otherwise, you will get an error saying that overwriting original file is not allowed.\cr
#' @param binary whether to write object to file in binary mode or not. Default is TRUE.\cr
#' Note that it can represent a convenient way to make file written in binary mode back-compatible with former version of IDEAS software.\cr
#' /!\ However unexpected behaviour may happen if features, regions, pops, ... are depending on masks (e.g. AdaptiveErode, Component, LevelSet, Watershed) introduced in newer version of IDEAS software.\cr
#' /!\ Important please note that conversion from binary to non-binary and back to binary may create some rounding adjustment resulting in some features/image values changes.\cr
#' Finally, if data originate from FCS, 'binary' will be forced to FALSE. 
#' @param endianness The endian-ness ("big" or "little") of the target system for the file. Default is .Platform$endian.\cr
#' Endianness describes the bytes order of data stored within the files. This parameter may not be modified.
#' @param display_progress whether to display a progress bar. Default is TRUE.
#' @param verbose whether to display information (use for debugging purpose). Default is FALSE.
#' @param fullname whether to export daf file with full name of its corresponding cif, if found. Default is TRUE.
#' If cif can't be found, daf file will be exported with the original cif file name.
#' @param cifdir the path of the directory to initially look to cif file. Default is dirname(obj$fileName). Only apply when 'fullname' is set to TRUE.
#' @param ntry number of times \code{\link{data_to_DAF}} will be allowed to find corresponding cif file. Default is +Inf. Only apply when 'fullname' is set to TRUE.
#' @param ... other arguments to be passed.
#' @examples
#' if(requireNamespace("IFCdata", quietly = TRUE)) {
#'   tmp <- tempdir(check = TRUE)
#'   ## use a daf file
#'   file_daf <- system.file("extdata", "example.daf", package = "IFCdata")
#'   daf <- ExtractFromDAF(fileName = file_daf)
#'   ## add a new population to daf
#'   dafnew <- data_add_pops(daf, list(buildPopulation(name = "test", type = "T", obj = 0)))
#'   ## export obj to file using binary mode
#'   data_to_DAF(obj = dafnew, write_to = paste0(tmp, "\\test_bin.daf"),
#'               overwrite = TRUE, binary = TRUE)
#'   ## exporting to non binary mode
#'   data_to_DAF(obj = dafnew, write_to = paste0(tmp, "\\test_notbin.daf"),
#'               overwrite = TRUE, binary = FALSE)
#' } else {
#'   message(sprintf('Please run `install.packages("IFCdata", repos = "%s", type = "source")` %s',
#'                   'https://gitdemont.github.io/IFCdata/',
#'                   'to install extra files required to run this example.'))
#' }
#' @return It invisibly returns full path of exported file.
#' @export
data_to_DAF = function(obj, write_to, viewing_pop = "All", overwrite = FALSE,
                       binary = TRUE, endianness = .Platform$endian, 
                       display_progress = TRUE, verbose = FALSE,
                       fullname = TRUE, cifdir = dirname(obj$fileName), ntry = +Inf, ...) {
  dots = list(...)
  # change locale
  locale_back <- setloc(c("LC_ALL" = "en_US.UTF-8"))
  enc_back <- options("encoding" = "UTF-8")
  on.exit(suspendInterrupts({setloc(locale_back); options(enc_back)}), add = TRUE)
  now = format(Sys.time(), format = "%d-%b-%y %H:%M:%S")
  
  # check mandatory param
  assert(obj, cla = "IFC_data")
  if(length(obj$pops)==0) stop("please use argument 'extract_features' = TRUE with ExtractFromDAF() or ExtractFromXIF() and ensure that features were correctly extracted")
  if(missing(write_to)) stop("'write_to' can't be missing")
  assert(write_to, len = 1, typ = "character")
  assert(overwrite, len = 1, alw = c(TRUE, FALSE))
  assert(binary, c(TRUE, FALSE))
  assert(endianness, len = 1, alw = c("little", "big"))
  assert(display_progress, c(TRUE, FALSE))
  assert(verbose, c(TRUE, FALSE))
  cifdir = na.omit(as.character(cifdir)); assert(cifdir, len = 1, typ = "character")
  ntry = na.omit(as.numeric(ntry)); assert(ntry, len = 1, typ = "numeric")
  if(ntry < 0) ntry = 0
  assert(fullname, len=1, alw=c(TRUE, FALSE))
  
  # tests if file can be written
  fileName = normalizePath(obj$fileName, winslash = "/", mustWork = FALSE)
  title_progress = basename(fileName)
  splitf_obj = splitf(fileName)
  splitp_obj = splitp(write_to)
  write_to = formatn(splitp_obj, splitf_obj)
  file_extension = getFileExt(write_to)
  assert(file_extension, len = 1, alw = "daf")
  if(any(splitp_obj$channel > 0)) message("'write_to' has %c argument but channel information can't be retrieved with data_to_DAF()")
  if(any(splitp_obj$object > 0)) message("'write_to' has %o argument but channel information can't be retrieved with data_to_DAF()")
  
  overwritten = FALSE
  if(file.exists(write_to)) {
    write_to = enc2native(normalizePath(write_to, winslash = "/", mustWork = FALSE))
    if(!overwrite) stop(paste0("file ",write_to," already exists"))
    if(tolower(fileName) == tolower(write_to)) stop("you are trying to overwrite source file which is not allowed")
    xmlEND_export = cpp_scanFirst(write_to, charToRaw('</Assay>'), start = 0, end = 0)
    if(xmlEND_export > 0) {
      xml_export = read_xml(readBin(con = write_to, what = "raw", n = xmlEND_export + nchar("</Assay>") - 1), options=c("HUGE","RECOVER","NOENT","NOBLANKS","NSCLEAN"))
      tryCatch({
        is_fromR = as.character(na.omit(xml_attr(xml_find_first(xml_export, "//Assay"), attr = "IFC_version")))
      }, finally = rm(xml_export))
      if(length(is_fromR)==0) stop("you are trying to overwrite an original file which is not allowed")
    } else {
      stop(paste0(write_to, "\ndoes not seem to be well formatted: </Assay> not found")) 
    }
    tmp_file = normalizePath(tempfile(), winslash = "/", mustWork = FALSE)
    overwritten = TRUE
  }
  dir_name = dirname(write_to)
  if(!dir.exists(dir_name)) if(!dir.create(dir_name, recursive = TRUE, showWarnings = FALSE)) stop(paste0("can't create\n", dir_name))
  file_w = ifelse(overwritten, tmp_file, write_to)
  tryCatch(suppressWarnings({
    towrite = file(description = file_w, open = "wb")
  }), error = function(e) {
    stop(paste0(ifelse(overwritten,"temp ","'write_to' "), "file: ", file_w, "\ncan't be created: check name ?"))
  })
  close(towrite)
  write_to = normalizePath(write_to, winslash = "/", mustWork = FALSE)
  
  # defines some variables
  now = format(Sys.time(), format = "%d-%b-%y %H:%M:%S")
  pkg_ver = paste0(unlist(packageVersion("IFC")), collapse = ".")
  channels = obj$description$Images
  is_fcs = length(obj$description$FCS)!=0
  # TODO ask AMNIS about Raw Number, it seems to be a problem when this feature is part of daf file
  obj = suppressWarnings(data_rm_features(obj, "Raw Number", list_only = FALSE)) # ensures to remove "Raw Number"
  # when obj is from rif or cif, number of objects can be different from actual number of collected object
  # e.g. merged of subset of file, there is no way to link object number and feature value so the only solution
  # is to remove all features and only keep Object Number
  obj = checkObj(obj)
  
  if(!is_fcs) {
    if(length(obj$images)==0) stop("please use argument 'extract_images' = TRUE with ExtractFromDAF() or ExtractFromXIF() and ensure that images were correctly extracted")
    # changes to DAF compatible colors
    channels$color = map_color(channels$color, FALSE)
    # removes gamma
    channels = channels[, !grepl("gamma", names(channels))]
    channels[, "physicalChannel"] = channels[, "physicalChannel"] - 1
    bgm = grep("^bgmean", names(obj$images))
    bgs = grep("^bgstd", names(obj$images))
    satc = grep("^satcount", names(obj$images))
    satp = grep("^satpercent", names(obj$images))
    
    if(fullname) {
      found = FALSE
      checksum = obj$checksum
      
      fileName_image = file.path(cifdir, basename(obj$description$ID$file)) # look in cifdir 1st
      if(file.exists(fileName_image)) {
        if(checksumXIF(fileName_image) == checksum) found = TRUE
      } else {
        fileName_image = obj$description$ID$file
      }
      if((!found)&& file.exists(fileName_image)) {
        if(checksumXIF(fileName_image) == checksum) found = TRUE
      }
      
      while((interactive() && (ntry > 0) && (!found))) {
        message(paste0("daf file does not refer to: ", fileName_image))
        old_wd = getwd()
        on.exit(setwd(old_wd), add= TRUE)
        setwd(dirname(obj$fileName))
        if(.Platform$OS.type == "windows") {
          fileName_image = choose.files(caption = paste0("Looking for: ", basename(obj$description$ID$file)), multi = FALSE, filters = cbind("Compensated Image File (*.cif)", "*.cif"))
        } else {
          fileName_image = file.choose()
        }
        if(file.exists(fileName_image)) if(getFileExt(fileName_image)=="cif") if(checksumXIF(fileName_image) == checksum) {
          found = TRUE
          break;
        } 
        ntry = ntry - 1
      }
      fileName_image = normalizePath(fileName_image, winslash = "/", mustWork = FALSE) # /!\ ask AMNIS using full path produces error while trying to retrieve compensation
    } else {
      fileName_image = basename(obj$description$ID$file)
    } 
    obj$description$ID$file <- fileName_image
  } else {
    if(binary) {
      binary = FALSE
      message("'binary' has been forced to FALSE because .daf originates from .fcs file.")
    }
  }

  # creates shared nodes
  sub_nodes = list()
  if(!is_fcs) sub_nodes = c(sub_nodes,
                            list(xml_new_node(name = "ChannelPresets",
                                              attrs = list(count = nrow(channels), bits = "12"),
                                              .children = xml_new_node(name = "gallery",
                                                                       attrs = list(viewmode="All Channels",
                                                                                    orderByFeature="Object Number",
                                                                                    ascendingOrder="true",
                                                                                    population=ifelse(viewing_pop %in% names(obj$pops), viewing_pop, "All"),
                                                                                    showMasks="false",
                                                                                    showColor="true",
                                                                                    showSaturationColor="false"))),
                                 xml_new_node(name = "Images", .children = lapply(1:nrow(channels), FUN=function(i) {
                                   xml_new_node(name = "image", attrs = channels[i, ])
                                 })),
                                 toXML2_masks(obj$description$masks, verbose = verbose)))
  
  sub_nodes = c(sub_nodes, list(toXML2_features_def(obj$features_def, verbose = verbose),
                                toXML2_regions(obj$regions, verbose = verbose),
                                toXML2_pops(obj$pops, verbose = verbose, display_progress = display_progress, title_progress = title_progress, ...)))
  
  # defines root node "Assay"
  root <- xml_new_root("Assay")
  # adds attributes to root node
  IDEAS_version = obj$description$Assay$IDEAS_version
  if(binary) {
    if(length(IDEAS_version) == 0) IDEAS_version = "6.2.64.0"
    root %>% xml_set_attrs(value = c(IFC_version = pkg_ver, date = now, IDEAS_version = IDEAS_version, binaryfeatures = "True"))
  } else {
    if(length(IDEAS_version) == 0) IDEAS_version = "6.1.822.0"
    root %>% xml_set_attrs(value = c(IFC_version = pkg_ver, date = now, IDEAS_version = IDEAS_version))
  }
  # adds first children
  xml_add_child(root, .value = xml_new_node(name = "SampleName", text = splitf_obj["short"]))
  xml_add_child(root, .value = xml_new_node(name = "ShowSampleName", text = "False"))
  # adds shared children subnodes
  if(is_fcs) {
    xml_add_child(root, .value = xml_new_node(name = "FCS", attrs = obj$description$ID, .children = sub_nodes))
  } else {
    xml_add_child(root, .value = xml_new_node(name = "SOD", attrs = obj$description$ID, .children = sub_nodes))
  }
  # add stats
  stats = dots$stats
  if(length(stats) == 0) stats = list(list(type="COUNT",title="Count",def=""),
                                      list(type="PERCENT_GATED",title="%Gated",def=""))
  yloc = unlist(lapply(obj$graphs, FUN = function(g) g[["ylocation"]]))
  size = unlist(lapply(obj$graphs, FUN = function(g) g[["ysize"]]))
  stats = buildStats(obj, stats, width = 80 * (1 + length(stats)), height = 240, xlocation = 0, ylocation = max(c(yloc + size, 0)))
  disp_nodes = list(toXML2_stats(stats, verbose = verbose))
  if(length(obj$graphs) > 0) disp_nodes = c(disp_nodes, toXML2_graphs(obj$graphs, verbose = verbose))
  xml_add_child(root, .value =   xml_new_node(name = "Displays", attrs = list(count = num_to_string(1+length(obj$graphs)), 
                                                                              layout="1", entriesPerRow="12", lightDarkMode="1"),
                                              .children = disp_nodes))
  tryCatch({
    if(binary) {
      # directly writes to temp file
      towrite <- file(file_w, open = "wb")
      tryCatch({
        write_xml(root, file = towrite, encoding = "utf-8")
        seek(con = towrite, where = seek(towrite)-1, origin = "start")
        # writing features
        toBIN_features(features = obj$features, w_con = towrite, endianness = endianness,
                       display_progress = display_progress, verbose = verbose, title_progress = title_progress)
        # writing images
        toBIN_images(images = obj$images, w_con = towrite, endianness = endianness,
                     display_progress = display_progress, verbose = verbose, title_progress = title_progress)
      }, error = function(e) {
        stop(e$message, call. = FALSE)
      }, finally = close(towrite))
    } else {
      write_xml(root, file = file_w, encoding = "utf-8")
      # write shared nodes to temporary character vector
      .tempXMLOutput = readLines(file_w)
      # detects where features nodes are beginning
      pos1 = grep("</DefinedFeatures>", .tempXMLOutput, perl = FALSE, fixed = TRUE)
      # detects where tags start to get indents length
      pos2 = regexpr("</DefinedFeatures>", .tempXMLOutput[pos1], perl = FALSE, fixed = TRUE)
      # defines indents
      indent1 = paste0(rep(" ", times = pos2-2), collapse = "")
      indent2 = c(" ", indent1)
      indent3 = c("  ", indent2)
      
      # now writes to file
      # writes 1st part of shared nodes (i.e. upto the end of features definition) from .tempXMLOutput to temporary file
      cat(.tempXMLOutput[1], file = file_w, append = FALSE, "\n", sep="")
      lapply(.tempXMLOutput[2:pos1], FUN=function(i_text) cat(i_text, file = file_w, append = TRUE, "\n", sep=""))
      
      # writes features nodes
      if(verbose) message("writing features nodes")
      L = length(obj$features)
      cat(indent1, file = file_w, append = TRUE, "<FeatureValues>\n")
      if(display_progress) {
        pb_fen = newPB(min = 0, max = L, initial = 0, style = 3)
        tryCatch({
        lapply(1:L, FUN=function(i_feat) {
          setPB(pb_fen, value = i_feat, title = title_progress, label = "writing features values (xml)")
          cat(indent3, sep = "", file = file_w, append = TRUE, 
              sprintf('<UDFValues fid="%s" fv="%s" />\n', num_to_string(i_feat-1), paste0(num_to_string(obj$features[[i_feat]]), collapse = "|")))
        })
      }, error = function(e) {
        stop(e$message)
      }, finally = endPB(pb_fen))
      } else {
        lapply(1:L, FUN=function(i_feat) {
          cat(indent3, sep = "", file = file_w, append = TRUE, 
              sprintf('<UDFValues fid="%s" fv="%s" />\n', num_to_string(i_feat-1), paste0(num_to_string(obj$features[[i_feat]]), collapse = "|")))
        })
      }
      cat(indent1, file = file_w, append = TRUE, "</FeatureValues>\n")
      
      # writes images nodes
      if(verbose) message("writing images nodes")
      L = nrow(obj$images)
      if(is_fcs) {
        L = as.integer(obj$description$ID$objcount)
        if(display_progress) {
          pb_imn = newPB(min = 0, max = L, initial = 0, style = 3)
          tryCatch({
            lapply(0:(L-1), FUN=function(i_img) {
              setPB(pb_imn, value = i_img, title = title_progress, label = "writing images values (xml)")
              cat(indent2, file = file_w, append = TRUE, sep = "",
                  sprintf('<SO id="%i" imgIFD="-1" mskIFD="-1" spIFD="-1" w="0" l="0" fs="0" cl="0" ct="0" objCenterX="0" objCenterY="0" />\n',i_img))
            })
          }, error = function(e) {
            stop(e$message)
          }, finally = endPB(pb_imn))
        } else {
          lapply(0:(L-1), FUN=function(i_img) {
            cat(indent2, file = file_w, append = TRUE, sep = "",
                sprintf('<SO id="%i" imgIFD="-1" mskIFD="-1" spIFD="-1" w="0" l="0" fs="0" cl="0" ct="0" objCenterX="0" objCenterY="0" />\n',i_img))
          })
        }
      } else {
        if(display_progress) {
          pb_imn = newPB(min = 0, max = L, initial = 0, style = 3)
          tryCatch({
            lapply(1:L, FUN=function(i_img) {
              setPB(pb_imn, value = i_img, title = title_progress, label = "writing images values (xml)")
              cat(indent2, file = file_w, append = TRUE, sep = "",
                  sprintf('<SO id="%s" imgIFD="%s" mskIFD="%s" spIFD="%s" w="%s" l="%s" fs="%s" cl="%s" ct="%s" objCenterX="%s" objCenterY="%s" bgmean="%s" bgstd="%s" satcount="%s" satpercent="%s" />\n',
                          num_to_string(obj$images[i_img, 'id']),
                          num_to_string(obj$images[i_img, 'imgIFD']),
                          num_to_string(obj$images[i_img, 'mskIFD']),
                          num_to_string(obj$images[i_img, 'spIFD']),
                          num_to_string(obj$images[i_img, 'w']),
                          num_to_string(obj$images[i_img, 'l']),
                          num_to_string(obj$images[i_img, 'fs']),
                          num_to_string(obj$images[i_img, 'cl']),
                          num_to_string(obj$images[i_img, 'ct']),
                          num_to_string(obj$images[i_img, 'objCenterX']),
                          num_to_string(obj$images[i_img, 'objCenterY']),
                          paste0(num_to_string(unlist(obj$images[i_img, bgm])), collapse = "|"),
                          paste0(num_to_string(unlist(obj$images[i_img, bgs])), collapse = "|"),
                          paste0(num_to_string(unlist(obj$images[i_img, satc])), collapse = "|"),
                          paste0(num_to_string(unlist(obj$images[i_img, satp])), collapse = "|")))   
            })
          }, error = function(e) {
            stop(e$message)
          }, finally = endPB(pb_imn))
        } else {
          lapply(1:L, FUN=function(i_img) {
            cat(indent2, file = file_w, append = TRUE, sep = "",
                sprintf('<SO id="%s" imgIFD="%s" mskIFD="%s" spIFD="%s" w="%s" l="%s" fs="%s" cl="%s" ct="%s" objCenterX="%s" objCenterY="%s" bgmean="%s" bgstd="%s" satcount="%s" satpercent="%s" />\n',
                        num_to_string(obj$images[i_img, 'id']),
                        num_to_string(obj$images[i_img, 'imgIFD']),
                        num_to_string(obj$images[i_img, 'mskIFD']),
                        num_to_string(obj$images[i_img, 'spIFD']),
                        num_to_string(obj$images[i_img, 'w']),
                        num_to_string(obj$images[i_img, 'l']),
                        num_to_string(obj$images[i_img, 'fs']),
                        num_to_string(obj$images[i_img, 'cl']),
                        num_to_string(obj$images[i_img, 'ct']),
                        num_to_string(obj$images[i_img, 'objCenterX']),
                        num_to_string(obj$images[i_img, 'objCenterY']),
                        paste0(num_to_string(unlist(obj$images[i_img, bgm])), collapse = "|"),
                        paste0(num_to_string(unlist(obj$images[i_img, bgs])), collapse = "|"),
                        paste0(num_to_string(unlist(obj$images[i_img, satc])), collapse = "|"),
                        paste0(num_to_string(unlist(obj$images[i_img, satp])), collapse = "|")))
          })
        }
      }
      # finalizes temporary file by adding remaining shared nodes from .tempXMLOutput
      lapply(.tempXMLOutput[(pos1+1):length(.tempXMLOutput)], FUN=function(i_text) cat(i_text, file = file_w, append = TRUE, "\n", sep=""))
    }
  }, error = function(e) {
    stop(paste0("Can't create 'write_to' file.\n", write_to,
                ifelse(overwritten,"\nFile was not modified.\n","\n"),
                "See pre-file @\n", normalizePath(file_w, winslash = "/"), "\n",
                e$message), call. = FALSE)
  })
  if(overwritten) {
    mess = paste0("\n######################\n", write_to, "\nhas been successfully overwritten\n")
    if(!suppressWarnings(file.rename(to = write_to, from = file_w))) { # try file renaming which is faster
      if(!file.copy(to = write_to, from = file_w, overwrite = TRUE)) { # try file.copy if renaming is not possible
        stop(paste0("Can't copy temp file@\n", normalizePath(file_w, winslash = "/"), "\n",
                    "Can't create 'write_to' file.\n", write_to,
                    "\nFile was not modified.\n"), call. = FALSE)
      } else {
        file.remove(file_w, showWarnings = FALSE)
      }
    }
  } else {
    mess = paste0("\n######################\n", write_to, "\nhas been successfully exported\n")
  }
  message(mess)
  return(invisible(write_to))
}