R/readutils.R
In hadron: Analysis Framework for Monte Carlo Simulation Data in Physics

Documented in extract.loop extract.obs getorderedconfignumbers getorderedfilelist readbinarycf readbinarydisc readbinarysamples readcmicor readcmidatafiles readcmidisc readcmifiles readcmiloopfiles readgradflow readhlcor readnissatextcf readoutputdata readtextcf

#' @export
readcmicor <- function(filename, colClasses=c("integer","integer","integer","numeric","numeric","integer"),
                       skip=0) {
  data <- read.table(filename, header=F, skip=skip,
                     colClasses=colClasses)
  attr(data, "class") <- c("cmicor", class(data))  
  return(invisible(data))
}



#' Creates an ordered filelist from a basename and a path
#' 
#' These functions generate an ordered filelist and an order list of config
#' numbers by using a path and a basename and '*'.
#' 
#' All filenames are assumend to have equal length.
#' 
#' @param path the path to be searched
#' @param basename the basename of the files
#' @param last.digits the number of last characters in each filename to be used
#' for ordering the list.
#' @param ending the file extension after the digits.
#' @return returns the ordered list of strings.
#' @author Carsten Urbach, \email{curbach@@gmx.de}
#' @seealso \code{\link{readcmidatafiles}}, \code{\link{extract.obs}}
#' @keywords file
#' @export
#' @examples
#'
#' filelist <- getorderedfilelist(path=paste0(system.file(package="hadron"), "/extdata/"),
#'                                basename="testfile", last.digits=3, ending=".dat")
#' filelist
#' 
getorderedfilelist <- function(path="./", basename="onlinemeas", last.digits=4, ending="") {
  ofiles <- Sys.glob( sprintf( "%s/%s*%s", path, basename,ending ) ) 
  ii <- getorderedconfigindices(path=path, basename=basename, last.digits=last.digits, ending=ending)
  return(invisible(ofiles[ii]))
}

getconfignumbers <- function(ofiles, basename="onlinemeas", last.digits=4, ending="") {
  if(any(nchar(ofiles) != nchar(ofiles[1]))) {
    stop("getconfigurationnumbers: all filenames need to have the same length, aborting...\n")
  }
  lending <- nchar(ending)
  
  ## sort input files using the last last.digits characters of each filename
  e <- nchar(ofiles[1]) - lending
  s <- e-last.digits+1
  ## sorted config numbers
  return(invisible(as.integer(substr(ofiles,s,e))))
}

getorderedconfigindices <- function(path="./", basename="onlinemeas", last.digits=4, ending="") {
  ofiles <- Sys.glob( sprintf( "%s/%s*%s", path, basename, ending ) ) 
  if(any(nchar(ofiles) != nchar(ofiles[1]))) {
    stop("getconfigurationnumbers: all filenames need to have the same length, aborting...\n")
  }
  lending <- nchar(ending)
  
  ## sort input files using the last last.digits characters of each filename
  e <- nchar(ofiles[1]) - lending
  s <- e-last.digits+1
  ## sort-index vector
  gaugeno <- as.integer(substr(ofiles,s,e))
  return(invisible(order(gaugeno)))
}

#' Creates an ordered vector of gauge config file numbers
#' 
#' These functions generate an ordered list of config
#' numbers by using a path and a basename and '*'.
#' 
#' All filenames are assumend to have equal length.
#' 
#' @param path the path to be searched
#' @param basename the basename of the files
#' @param last.digits the number of last characters in each filename to be used
#' for ordering the list.
#' @param ending the file extension after the digits.
#' @return returns the ordered list of gauge config numbers as a numeric vector.
#' @author Carsten Urbach, \email{curbach@@gmx.de}
#' @seealso \code{\link{readcmidatafiles}}, \code{\link{extract.obs}}
#' @keywords file
#'
#' @examples
#' confignumbers <- getorderedconfignumbers(path=paste0(system.file(package="hadron"), "/extdata/"),
#'                                basename="testfile", last.digits=3, ending=".dat")
#' confignumbers
#' @export
getorderedconfignumbers <- function(path="./", basename="onlinemeas", last.digits=4, ending="") {
  ofiles <- Sys.glob( sprintf( "%s/%s*%s", path, basename, ending ) ) 
  if(any(nchar(ofiles) != nchar(ofiles[1]))) {
    stop("getconfigurationnumbers: all filenames need to have the same length, aborting...\n")
  }
  lending <- nchar(ending)
  
  ## sort input files using the last last.digits characters of each filename
  e <- nchar(ofiles[1]) - lending
  s <- e-last.digits+1
  ## sort-index vector
  gaugeno <- as.integer(substr(ofiles,s,e))
  return(invisible(sort(gaugeno)))
}



#' Read Single Data Files in Chris Michael Format
#' 
#' reads data from single files in Chris Michael format
#' 
#' These functions reads data from single data files. It is assumed that every
#' file has the same number of columns.
#' 
#' The cmi (Chris Michael) format for connected correlators comprises 6 colums
#' per file: 1) the observable type number (itype); 2) the operator type number
#' (iobs); 3) the time difference from source going from 0 to \eqn{Time/2} for
#' each operator type; 4) \eqn{c_1}{c1} correlator value at time value forward
#' in time; 5) \eqn{c_2}{c2} correlator value at time value backward in time;
#' 6) number of gauge configuration.
#' 
#' There are scripts shipped with the package converting the output written
#' into seperate files for each gauge configuration into the expected format.
#' They are called \code{puttogether.sh} and \code{puttogether_reverse.sh}
#' which will sort with increasing and with decreasing gauge configuration
#' number, respectively.
#' 
#' Note, that the normalisation of correlators needs multiplication by factor
#' of \eqn{0.5} (and possible \eqn{(2*\kappa)^2}{(2*k)^2} and \eqn{L^3} factors
#' dependent on your conventions).
#' 
#' The values of \code{itype} run from \code{1} to the total number of gamma
#' matrix combinations available. \code{iobs} equals \code{1} for local-local
#' correlators, \code{3} for local-smeared, \code{5} for smeared-local and
#' \code{7} for smeared-smeared
#' 
#' For charged mesons the order of gamma-matrix combinations is as follows:\cr
#' order PP PA AP AA 44 P4 4P A4 4A for pion like \eqn{P=\gamma_5}{P=g5}
#' \eqn{A=\gamma_4\gamma_5}{A=g4g5} \eqn{4=\gamma_4}{4=g4}\cr order 44 VV AA 4V
#' V4 4A A4 VA AV for rho-a1 like \eqn{4=\gamma_i\gamma_4}{4=gig4}
#' \eqn{V=\gamma_i}{V=gi} \eqn{A=\gamma_i\gamma_5}{A=gig5}\cr order BB SS -
#' total 20 \eqn{\gamma_i\gamma_4\gamma_5}{B=gig4g5} \eqn{S=I}\cr itype=21 is
#' conserved vector current at sink, \eqn{\gamma_5}{g5} at source
#' 
#' For neutral mesons the order of gamma-matrix combinations is as follows:\cr
#' order PP PA AP AA II PI IP AI IA for pion like \eqn{P=\gamma_5}{P=g5}
#' \eqn{A=\gamma_4\gamma_5}{A=g4g5} \eqn{I=1}{1=1}\cr order 44 VV BB 4V V4 4B
#' B4 VB BV for rho-b1 like \eqn{4=\gamma_i\gamma_4}{4=gig4}
#' \eqn{V=\gamma_i}{V=gi} \eqn{B=\gamma_i\gamma_4\gamma_5}{B=gig4g5}\cr order
#' XX AA - total 20 for a0-X like \eqn{A=\gamma_i\gamma_5}{A=gig5}
#' \eqn{X=\gamma_4}{X=g4}
#' 
#' For loops (disconnected contributions to neutral mesons) the convention is
#' as follows: files are assumed to have eight columns with gauge, gamma, t,
#' sample, ReTL, ImTL, ReTF, ImTF, where gamma is 1 to 16 as list of
#' (hermitian) gamma matrices: order g_5 g_1 g_2 g_3\cr -ig_4* g_5 g_1 g_2
#' g_3\cr -ig_5* i*g_5 g_1 g_2 g_3 ie 1,..\cr -ig_5g_4* -i*g_5 g_1 g_2 g_3 ie
#' g_4, g_5*row 2\cr (so P is 1; A4 is 5; S is 9; A_i is 10,11,12 etc)
#' 
#' t is t-value of trace (here spatial momentum is zero) sample is sample
#' number 1,...24 (or 96) ReTL is real part of trace at time t, with gamma
#' combination given and Local operator (F is Fuzzed == non-local) operator).
#' 
#' Normalisation is trace M^-1 with M=1+...
#' 
#' To make a disconnected correlator, one combines these traces for different t
#' (and different sample number) as a product. Note only Re Gamma=1 and Im
#' Gamma=gamma_5 have VEV's, see \code{\link{computeDisc}}
#' 
#' @aliases readcmicor readcmifiles readcmidatafiles readcmiloopfiles
#' @param files list of filenames to be read. Can be created using
#' \code{getorderedfilelist}.
#' @param skip Number of lines to be skipped at the beginning of each file
#' @param excludelist files to exclude from reading.
#' @param verbose Increases verbosity of the function.
#' @param colClasses The column data type classes, the \code{read.table}.
#' @param obs To reduce memory consumption it is possible to extract only one
#' of the observales. The column in which to match \code{obs} is to be given
#' with \code{obs.index}. This will only be affective if \code{obs} is not
#' \code{NULL}.
#' @param obs.index The column in which to match \code{obs} is to be given with
#' \code{obs.index}.
#' @param avg Integer. Average over successive number samples
#' @param stride Integer. Read only subset of files with corresponding stride.
#' @return \code{readcmicor} returns an object of class \code{cmicor}, read
#' from a single file.
#' 
#' \code{readcmidatafiles} returns an object of class \code{cmicor}, which is
#' an \code{rbind} of all \code{data.frame}s read from the single files in the
#' filelist.
#' 
#' \code{readcmiloopfiles} returns an object of class \code{cmiloop}, which is
#' an \code{rbind} of all \code{data.frame}s read from the single files in the
#' filelist.
#' @author Carsten Urbach, \email{curbach@@gmx.de}
#' @seealso \code{\link{getorderedfilelist}}, \code{\link{extract.obs}},
#' \code{\link{readcmidisc}}
#' @keywords file
#' @examples
#'
#' ## a running toy example
#' files <- paste0(system.file(package="hadron"), "/extdata/outprcvn.dddd.00.0000")
#' X <- readcmifiles(files, skip=0,
#'                   colClasses=c("integer", "integer","integer","numeric","numeric"))
#' X
#'
#' ## a more realistic example
#' \dontrun{filelist <- getorderedfilelist("ouptrc", last.digits=3, ending=".dat")}
#' \dontrun{cmicor <- readcmidatafiles(filelist, skip=1)}
#' 
#' @export readcmifiles
readcmifiles <- function(files, excludelist=c(""), skip, verbose=FALSE,
                         colClasses, obs=NULL, obs.index, avg=1, stride=1) {
  if(missing(files)) {
    stop("readcmifiles: filelist missing, aborting...\n")
  }
  reduce <- FALSE
  if(!(is.null(obs) || missing(obs.index))) {
    reduce <- TRUE
  }
  tmpdata <- read.table(files[1], colClasses=colClasses, skip=skip)
  if(reduce) {
    tmpdata <- tmpdata[tmpdata[,obs.index] %in% obs,]
  }
  fLength <- length(tmpdata$V1)
  nFiles <- length(files)
  # when stride is > 1, we only read a subset of files
  nFilesToLoad <- as.integer(nFiles/stride)
  nCols <- length(tmpdata)
  ## we generate the full size data.frame first
  tmpdata[,] <- NA
  ldata <- tmpdata
  ldata[((nFilesToLoad-1)*fLength+1):(nFilesToLoad*fLength),] <- tmpdata
  # create a progress bar
  
  pb <- NULL
  if(!verbose) {
    pb <- txtProgressBar(min = 0, max = nFiles, style = 3)
  }
  for(i in c(1:nFiles)) {
    # update progress bar
    if(!verbose) {
      setTxtProgressBar(pb, i)
    }
    if( !(files[i] %in% excludelist) && file.exists(files[i]) && (i-1) %% stride == 0) {
      
      if(verbose) {
        message("Reading from file ", files[i], "\n")
      }
      ## read the data
      tmpdata <- read.table(files[i], colClasses=colClasses, skip=skip)
      if(reduce) {
        tmpdata <- tmpdata[tmpdata[,obs.index] %in% obs,]
      }
      ## sanity checks
      if(fLength < length(tmpdata$V1)) {
        warning(paste("readcmifiles: file ", files[i], " does not have the same length as the other files. We will cut and hope...\n"))
      }
      else if(fLength > length(tmpdata$V1)) {
        stop(paste("readcmifiles: file", files[i], "is too short. Aborting...\n"))
      }
      if(nCols != length(tmpdata)) {
        stop(paste("readcmifiles: file", files[i], "does not have the same number of columns as the other files. Aborting...\n"))
      }
      
      dat_idx_start <- ((i-1)/stride*fLength) + 1
      dat_idx_stop <- dat_idx_start+fLength-1
      ldata[dat_idx_start:dat_idx_stop,] <- tmpdata
      rm(tmpdata)
    }
    else if(!file.exists(files[i])) {
      warning(paste("readcmifiles: dropped file", files[i], "because it's missing\n"))
    }
  }
  if(!verbose) {
    close(pb)
  }

  # if we want to average over successive samples, we do this here
  if(avg > 1){
    for(i in seq(1,nFilesToLoad,by=avg)){
      out_idx_start <- (i-1)*fLength + 1
      out_idx_stop <- i*fLength
      for(j in seq(i+1,i+avg-1)){
        print(j)
        avg_idx_start <- (j-1)*fLength + 1
        avg_idx_stop <- j*fLength
        # add the next sample to the sample that we use as an output base
        ldata[out_idx_start:out_idx_stop,] <- ldata[out_idx_start:out_idx_stop,] +
                                              ldata[avg_idx_start:avg_idx_stop,]
        # invalidate the sample that we just added
        ldata[avg_idx_start:avg_idx_stop,] <- NA 
      }
      # take the average over the samples
      ldata[out_idx_start:out_idx_stop,] <- ldata[out_idx_start:out_idx_stop,]/avg
    }
  }
  ## remove NAs from missing files
  ldata <- na.omit(ldata)

  return(invisible(ldata))
}

#' @export
readcmidatafiles <- function(files, excludelist=c(""), skip=1, verbose=FALSE,
                             colClasses=c("integer", "integer","integer","numeric","numeric"),
                             obs=NULL, obs.index=1, avg=1, stride=1) {

  data <- readcmifiles(files, excludelist=excludelist, skip=skip, verbose=verbose,
                       colClasses=colClasses, obs=obs, obs.index=obs.index, avg=avg, stride=stride)
  attr(data, "class") <- c("cmicor", class(data))  
  return(invisible(data))
}

#' @export
readcmiloopfiles <- function(files, excludelist=c(""), skip=0, verbose=FALSE,
                             colClasses=c("integer", "integer","integer","integer",
                               "numeric","numeric","numeric","numeric"),
                             obs=NULL, obs.index=2) {
  data <- readcmifiles(files, excludelist=excludelist, skip=skip, verbose=verbose,
                       colClasses=colClasses, obs=obs, obs.index=obs.index, avg=1, stride=1)
  attr(data, "class") <- c("cmiloop", class(data))
  return(invisible(data))
}



#' Extract a single loop from an object of class \code{cmiloop}
#' 
#' Extracts all loop values from an object of class \code{cmiloop} for all
#' available times, samples and configurations.
#' 
#' 
#' @param cmiloop input object of class \code{cmiloop} generated for instance
#' with \code{readcmiloopfiles}.
#' @param obs the observable to extract
#' @param ind.vec index vector to be used during extraction with
#' \code{ind.vec[1]} the column with the observable number, \code{ind.vec[2]}
#' the time values, \code{ind.vec[3]} the sample numbers, \code{ind.vec[4]} the
#' real part of the local loop, \code{ind.vec[5]} the imaginary part of the
#' local loop, \code{ind.vec[6]} and \code{ind.vec[7]} the same for fuzzed (or
#' smeared) loops and \code{ind.vec[8]} for the configuraton number.
#' @param L The spatial lattice extent needed for normalisation. If not given
#' set to \code{Time/2}.
#' @return a list with elements as follows:
#' 
#' \code{cf}: real part of the local loop
#' 
#' \code{icf}: imaginary part of the local loop
#' 
#' \code{scf}: real part of the smeared loop
#' 
#' \code{iscf}: imaginary part of the smeared loop
#' 
#' \code{Time=Time}, \code{nrSamples}, \code{nrObs=1}, \code{nrStypes=2},
#' \code{obs=obs} and \code{conf.index}. The last is the list of configurations
#' corresponding to the loops.
#' @author Carsten Urbach, \email{curbach@@gmx.de}
#' @seealso \code{\link{readcmiloopfiles}}
#' @export extract.loop
extract.loop <- function(cmiloop, obs=9, ind.vec=c(2,3,4,5,6,7,8,1), L) {
  ldata <- cmiloop[cmiloop[,ind.vec[1]] == obs,] 
  Time <- max(ldata[,ind.vec[2]])
  nrSamples <- max(ldata[,ind.vec[3]])
  if(missing(L)) {
    L <- Time/2
  }

  cf <- cf_meta(nrObs = 1, Time = Time, nrStypes = 2)
  cf <- cf_orig(cf,
                cf = array(ldata[,ind.vec[4]], dim=c(Time, nrSamples, length(ldata[,ind.vec[4]])/Time/nrSamples))/sqrt(L^3),
                icf = array(ldata[,ind.vec[5]], dim=c(Time, nrSamples, length(ldata[,ind.vec[5]])/Time/nrSamples))/sqrt(L^3))
  cf <- cf_smeared(cf,
                   scf = array(ldata[,ind.vec[6]], dim=c(Time, nrSamples, length(ldata[,ind.vec[6]])/Time/nrSamples))/sqrt(L^3),
                   iscf =  array(ldata[,ind.vec[7]], dim=c(Time, nrSamples, length(ldata[,ind.vec[7]])/Time/nrSamples))/sqrt(L^3),
                   nrSamples = nrSamples,
                   obs = obs)

  # TODO: This should be set via a constructor.
  cf$conf.index <- unique(ldata[,ind.vec[8]])

  return (invisible(cf))
}



#' Extract One or More Gamma Combinations from am CMI Correlator
#' 
#' Extracts one or more gamma matrix combinations (observables) from a
#' correlator stored in cmi format
#' 
#' C(t) and C(-t) are averaged as indicated by \code{sym.vec}.
#' 
#' @param cmicor an correlator object in cmi format
#' @param vec.obs vector containing the numbers of observables to be extracted.
#' @param ind.vec Index vector indexing the column numbers in cmicor to be
#' used. The first must be the observable index, the second the smearing type
#' index, the third the time, the fourth C(+t) and the fifth C(-t).
#' 
#' Index vector indexing the column numbers in cmiloop to be used. The first
#' must be the observable index, the second the smearing type index, the third
#' the time, the fourth ReTL, the fifth ImTL, the sixth ReTF and the seventh
#' ImTF.
#' @param verbose Increases verbosity of the function.
#' @param sym.vec a vector of bools of length equal to the number of
#' observables indicating whether C(t) is symmetric in t, i.e. whether C(+t)
#' and C(-t) should be added or subtracted. If not given C(+t) and C(-t) will
#' be assumed to be symmetric.
#' @param sign.vec a sign vector of length equal to the number of observables
#' indicating whether the corresponding correlation function should be
#' multiplied by +-1.
#' @param symmetrise if set to \code{TRUE}, the correlation function will be
#' averaged for \code{t} and \code{Time-t}, with the sign depending on the value
#' of \code{sym}.  Note that currently the correlator with t-values larger than
#' \code{Time/2} will be discarded.
#' @return returns a list containing \item{cf}{ for \code{extract.obs}: array
#' containing the correlation function with dimension number of files times
#' (nrObs*nrStypes*(Time/2+1)). C(t) and C(-t) are averaged according to
#' \code{sym.vec}.
#' 
#' for \code{extract.loop}: ReTL } \item{icf}{ for \code{extract.loop} only:
#' ImTL } \item{scf}{ for \code{extract.loop} only: ReTF } \item{sicf}{ for
#' \code{extract.loop} only: ImTF } \item{Time}{ The time extent of the
#' correlation functions.  } \item{nrStypes}{ The number of smearing
#' combinations.  } \item{nrObs}{ The number of observables.  }
#' \item{nrSamples}{ for \code{extrac.loop} only: the number of samples found
#' in the files.  }
#' @author Carsten Urbach, \email{curbach@@gmx.de}
#' @seealso \code{\link{readcmicor}}, \code{\link{readcmidatafiles}},
#' @keywords ts
#' @examples
#' 
#' files <- paste0(system.file(package="hadron"), "/extdata/outprcvn.dddd.00.0000")
#' X <- readcmifiles(files, skip=0,
#'                   colClasses=c("integer", "integer","integer","numeric","numeric"))
#' Y <- extract.obs(X)
#' Y
#' 
#' @export extract.obs
extract.obs <- function(cmicor, vec.obs=c(1), ind.vec=c(1,2,3,4,5),
                        sym.vec, sign.vec, verbose=FALSE, symmetrise=TRUE) {
  if(missing(cmicor)) {
    stop("extract.obs: data missing in extract.obs\n")
  }
  ## consistency check for data in the data
  if( !all( unique(vec.obs) %in% unique(cmicor[,ind.vec[1]]) )) {
    stop("extract.obs: vec.obs does not match or is not fully included in the observable list in the data\n")
  }
  nrObs <- length(vec.obs)
  nrStypes <- length(unique(cmicor[,ind.vec[2]]))
  Time <-  2*max(cmicor[,ind.vec[3]])
  Thalf <- max(cmicor[,ind.vec[3]])+1
  if(Thalf != length(unique(cmicor[,ind.vec[3]]))) {
    stop("extract.obs: data inconsistent, Time not equal to what was found in the input data\n")
  }
  if(verbose) message("extract.obs: nrObs=",nrObs, " nrStypes=",nrStypes, " Time=", Time, "\n")

  data <- cmicor[cmicor[,ind.vec[1]] %in% vec.obs,]
  cf <- NULL
  
  if(symmetrise){
    ## we divide everything by 2 apart from t=0 and t=Time/2
    data[(data[,ind.vec[3]]!=0 & (data[,ind.vec[3]]!=(Thalf-1))),ind.vec[c(4,5)]] <-
        data[(data[,ind.vec[3]]!=0 & (data[,ind.vec[3]]!=(Thalf-1))),ind.vec[c(4,5)]]/2
    ## symmetrise or anti-symmetrise for given observable?
    isym.vec <- rep(+1, times= nrObs*Thalf*nrStypes)
    isign.vec <- rep(+1, times= nrObs*Thalf*nrStypes)
    if(!missing(sym.vec)) {
      if(length(sym.vec) != nrObs) {
        stop("extract.obs: sym.vec was given, but does not have the correct length")
      }
      for(i in c(1:nrObs)) {
        if(!sym.vec[i]) {
          isym.vec[((i-1)*Thalf*nrStypes+1):((i)*Thalf*nrStypes)] <- -1
        }
      }
    }
    if(!missing(sign.vec)) {
      if(length(sign.vec) != nrObs) {
        stop("extract.obs: sign.vec was given, but does not have the correct length")
      }
      for(i in c(1:nrObs)) {
        if(sign.vec[i] < 0) {
          isign.vec[((i-1)*Thalf*nrStypes+1):((i)*Thalf*nrStypes)] <- -1
        }
      }
    }

    cf <- t(array(isign.vec*(data[,ind.vec[4]] + isym.vec*data[,ind.vec[5]]),
                dim=c(nrObs*Thalf*nrStypes, length(data[,1])/(nrObs*Thalf*nrStypes))))
  }else{ # no symmetrisation
    cf <- t(array(0,dim=c( nrObs*Time*nrStypes, length(data[,1])/(nrObs*Thalf*nrStypes) ) ) )
    for(bw in c(0:1)){
      # select forward or backward correlator
      tmp <-  t(array(data=data[,ind.vec[4+bw]],dim=c(nrObs*Thalf*nrStypes, length(data[,1])/(nrObs*Thalf*nrStypes))))
      for(i in c(1:nrObs)){
        for(s in c(1:nrStypes)){
          # since we are not symmetrising, the individual correlators have Time entries 
          lhs <- c((bw+1):(Thalf-bw)) + (i-1)*nrStypes*Time  + (s-1)*Time  + bw*(Thalf-1)
          # in the cmi format, the backward correlator is on time-slices 1 to Time/2-1 (indices 2 to Time/2)
          rhs <- c((bw+1):(Thalf-bw)) + (i-1)*nrStypes*Thalf + (s-1)*Thalf
          # but in "reverse" order (to ease averaging)
          if( bw == 1 ) rhs <- rev(rhs)
          cf[,lhs] <- tmp[,rhs]
        }
      }
    }
  }
  
  ret <- cf_meta(nrObs = nrObs, Time = Time, nrStypes = nrStypes, symmetrised = symmetrise)
  ret <- cf_orig(ret, cf = cf, icf = NULL)

  return (invisible(ret))
}

#' readhlcor
#'
#' @param filename String. Filename of the heavy light correlator data file. The
#'    file is expected to have nine columns, the first four integer, the second four numeric
#'    and the last integer valued again.
#'
#' @return
#' Invisibly returns a \link{data.frame} object containing the file content.
#' @export
readhlcor <- function(filename) {
  return(invisible(read.table(filename, header=FALSE,
                              colClasses=c("integer", "integer","integer","integer","numeric","numeric","numeric","numeric","integer"))))
}



#' Read Data In output.data Format of tmLQCD
#' 
#' reads data from an output.data file written by tmLQCD
#' 
#' The data can be plotted directly using \dQuote{plot}.
#' 
#' @param filename filename of the data file
#' @return returns a data frame of class \dQuote{outputdata} containing the
#' data.
#' @author Carsten Urbach \email{curbach@@gmx.de}
#' @keywords file
#' @return
#' Returns an object of class `outputdata` derived from a data.frame
#' as generated by \link{read.table} applied to the input file.
#' 
#' @examples
#' 
#' plaq <- readoutputdata(paste0(system.file(package="hadron"), "/extdata/output.data"))
#' plot(plaq)
#' 
#' @export readoutputdata
readoutputdata <- function(filename) {
  data <- read.table(filename, header=FALSE)
  attr(data, "class") <- c("outputdata", "data.frame")  
  return(invisible(data))
}



#' Read correlator data from single file
#' 
#' Reads arbitrary number of samples for a complex correlation function from a
#' text file.
#' 
#' 
#' @param file filename of file to read from.
#' @param Time time extent of the correlation function
#' @param sym if \code{TRUE} average C(+t) and C(-t), otherwise C(+t) and
#' -C(-t). Averaging can be switched off using the \code{symmetrise} option.
#' @param skip number of lines to skip at beginning of file
#' @param check.t if set to an integer value larger than zero the function will
#' assume that in the corresponding column of the file the Euclidean time is
#' counted and it will check whether the maximum in this column is identical to
#' Time-1.
#' @param ind.vector index vector of length 2 with the indices of real and
#' imaginary values of correlator, respectivley.
#' @param symmetrise if set to \code{TRUE}, the correlation function will be
#' averaged for \code{t} and \code{Time-t}, with the sign depending on the value
#' of \code{sym}. Note that currently the correlator with t-values larger than
#' \code{Time/2} will be discarded.
#' @param path the path to the files.
#' @param autotruncate Boolean. Whether to autotruncate or not
#' @param avg Integer. Average over successive number samples
#' @param stride Integer. Read only subset of files with corresponding stride.
#' @param Nmin Integer. Minimal number of measurements that must remain after
#' sparsification and averaging. Default equals to 4.
#' @return returns a list with two arrays \code{cf} and \code{icf} with real
#' and imaginary parts of the correlator, and integers \code{Time},
#' \code{nrStypes=1} and \code{nrObs=1}. Both of the arrays have dimension
#' \code{c(N, (Time/2+1))}, where \code{N} is the number of measurements
#' (gauges).  \code{Time} is the time extent, \code{nrStypes} the number of
#' smearing levels and \code{nrObs} the number of operators, both of which are
#' currently fixed to 1.
#' @author Carsten Urbach, \email{curbach@@gmx.de}
#' @seealso \code{\link{readcmidatafiles}}, \code{\link{readbinarydisc}},
#' \code{\link{readcmidisc}}, \code{\link{readcmicor}},
#' \code{\link{readbinarycf}}
#' @keywords file
#' @export readtextcf
readtextcf <- function(file, Time=48, sym=TRUE, path="", skip=1, check.t=0, ind.vector=c(2,3), symmetrise=TRUE,
                       stride=1, avg=1, Nmin=4, autotruncate=TRUE) {
  stopifnot(!missing(file))
  stopifnot(Time >= 1)

  tmp <- read.table(paste(path, file, sep=""), skip=skip)
  stopifnot(!((length(ind.vector) < 2) || (max(ind.vector) > length(tmp)) || (min(ind.vector) < 1)))
  
  if(check.t > 0 && max(tmp[[check.t]]) != Time-1) {
    stop("Time in function call does not match the one in the file, aborting...\n")
  }

  if(length(tmp[[ind.vector[1]]]) %% Time != 0) {
    stop("Time does not devide the number of rows in file, aborting... check value of paramter skip to readtextcf!\n")
  }
  
  ii <- c(1:(Time/2+1))

  tmp <- array(tmp[[ind.vector[1]]] + 1i*tmp[[ind.vector[2]]], dim=c(Time, length(tmp[[ind.vector[1]]])/Time))
  if( (stride > 1 | avg > 1) & (ncol(tmp) %% (stride*avg) != 0) ){
    if(autotruncate){
      warning(sprintf("stride=%d, avg=%d, ncol=%d\n",stride,avg,ncol(tmp)))
      warning("readtextcf: Sparsification and/or averaging requested, but their product does not divide the number of measurements!\n")
      warning("readtextcf: Reducing the number of total measurements to fit!\n")
      nmeas <- as.integer( (stride*avg)*floor( ncol(tmp)/(stride*avg) ))
      if( nmeas/(stride*avg) >= Nmin ){
        tmp <- tmp[,1:nmeas]
      } else {
        warning(sprintf("readtextcf: After sparsification and averaging, less than %d measurements remain, disabling sparsification and averaging!\n",Nmin))
        stride <- 1
        avg <- 1
      }
    } else {
      stop("readtextcf: Sparsification and/or averaging requested, but their product does not divide the number of measurements!\n")
    }
  }

  ## sparsify data
  if(stride > 1){
    sp.idx <- seq(from=1,to=ncol(tmp),by=stride)
    tmp <- tmp[,sp.idx]
  } 
  # average over 'avg' measurements sequentially
  if(avg > 1){
    tmp2 <- tmp
    tmp <- array(0, dim=c(Time,ncol(tmp2)/avg))
    for( i in c(1:ncol(tmp)) ){
      tmp[,i] <- (1.0/avg)*apply(X=tmp2[,((i-1)*avg+1):(i*avg)],
                                 MARGIN=1,
                                 FUN=sum)
    }
  }
  
  ret <- cf_meta(nrObs = 1, Time=Time, nrStypes = 1)
  ret <- cf_orig(ret, cf = t(Re(tmp)), icf = t(Im(tmp)))

  if (symmetrise) {
    sign <- +1
    if (!sym) sign <- -1

    ret <- symmetrise.cf(ret, sign)
  }

  return (invisible(ret))
}

#' @title reader for Nissa text format correlation functions
#' @param file_basenames_to_read Character vector of file names without the
#'                              smearing combination suffixes (such as 'll', 'ls', 'sl', 'ss')
#'                              which will be added in the reading routine accordign to what was 
#'                              passed via `smear_combs_to_read`. An example would be
#'                              '0001/mes_contr_2pts', not the lack of the smearing suffix.
#' @param smear_combs_to_read Character vector containing the smearing cominations that are to be read.
#'                            These will be attached to the `file_basenames_to_read` in the reading routine.
#' @param Time Integer, time extent of the lattice.
#' @param combs_to_read Data frame containing the indices of the masses and r-paramter combinations to
#'                      be read as well as the name of the spin combination.
#'                      For a two-point function using the second and third mass (0-indexed), 
#'                      the (+^dag,+) r-combination and the pseudoscalar-pseudoscalar spin combination
#'                      would look as follows:
#'                      \tabular{rrrrr}{
#'                        m1_idx \tab m2_idx \tab r1_idx \tab r2_idx \tab spin_comb \cr
#'                        1      \tab 2      \tab 0      \tab 0      \tab "P5P5"
#'                      }
#' @param sym.vec Integer or numeric vector. Specifies whether the correlator at
#'                the given position is symmetric (+1.0) or anti-symmetric (-1.0 )
#'                under time reflection. This is passed to \code{symmetrise.cf}. This
#'                should be of sufficient length to cover all correlators that are
#'                going to be read (one number per row of \code{combs_to_read} and
#'                per entry of \code{smear_combs_to_read})
#' @param symmetrise Boolean, specifies whether averaging over backward and forward
#'                   correlators should be done after the correlator has been read in.
#' @param nts Integer, number of time slices to be read from the correlator files.
#'
#' @return
#' Returns an object of class `cf`.
#' 
#' @export
readnissatextcf <- function(file_basenames_to_read,
                            smear_combs_to_read,
                            Time,
                            combs_to_read,
                            nts = Time, 
                            sym.vec = c(1),
                            symmetrise = FALSE)
{
  tmp <- read_nissa_textcf_kernel(file_basenames_to_read,
                                  smear_combs_to_read,
                                  nts,
                                  combs_to_read)

  total_nts <- nts*length(smear_combs_to_read)*nrow(combs_to_read)

  realcols <- seq(1,2*total_nts,2)
  imagcols <- seq(2,2*total_nts,2)

  cf <- cf_meta(nrObs = nrow(combs_to_read), Time=Time, nrStypes = length(smear_combs_to_read), symmetrised = FALSE)
  cf <- cf_orig(cf, cf = tmp[,realcols,drop=FALSE], icf = tmp[,imagcols,drop=FALSE])

  if(symmetrise){
    # in some cases it makes sense to store only a subset of the time slices of a
    # correlation function. In this case, symmetrisation is not possible unless
    # the missing time slices are reconstructed or added manually somehow.
    if( nts != Time ){
      stop("The time extent and the number of time slices in the correlator do not agree, cannot symmetrise!")
    }
    cf <- symmetrise.cf(cf, sym.vec)
  }
  return (invisible(cf))
}




#' read correlation function from binary files
#' 
#' Reads a correlation function from binary files, including hdf5 formatted
#' files.
#' 
#' It is assumend that each file contains at least \code{(obs+N)*Time} complex
#' doubles, where \code{Time} is the time extent, \code{obs} is the number of the
#' observable to read in and \code{Nop} the number of replicas for this
#' observable. It is assumed that complex is the fastest running index, next
#' time and then obs. The filelist is assumed to be ordered according to the
#' gauge configuration MC history.
#' 
#' @param files list of filenames to be read. Can be created using
#' \code{getorderedfilelist}. The filelist is assumed to be order according to
#' ascending gauge fields.
#' @param Time time extent of correlation functions.
#' @param obs each file may contain many correlation functions. With 'obs'
#' one choses which observable to read in. To be precise, in each file the
#' reading will start at point Time*obs*sizeof(complex\code{<double>}) and read
#' Nop*Time*sizeof(complex\code{<double>}).
#' @param symmetrise symmetrise the correlation function or not
#' @param Nop number of replicas for the correlator to read in.
#' @param endian the endianess of the binary file.
#' @param excludelist files to exclude from reading.
#' @param sym if \code{TRUE} average C(+t) and C(-t), otherwise C(+t) and
#' -C(-t).
#' @param op the N replicas can be either averaged (\code{op="aver"}) or summed
#' (\code{op="sum"}).
#' @param path path to be prepended to every filename.
#' @param hdf5format if \code{TRUE}, try to read from an hdf5 file.
#' @param hdf5name Name of the data set as a string.
#' @param hdf5index The data might be an array of size n x Time. \code{hdf5index}
#' is used to convert two columns of the data to a complex valued vector using
#' the first and second index for real and imaginary part, respectively. If
#' \code{hdf5index} has length smaller than 2 the first index is reused.
#' @return returns a list with two arrays \code{cf} and \code{icf} with real
#' and imaginary parts of the correlator, and integers \code{Time},
#' \code{nrStypes=1} and \code{nrObs=1}. Both of the arrays have dimension
#' \code{c(N, (Time/2+1))}, where \code{N} is the number of measurements
#' (gauges).  \code{Time} is the time extent, \code{nrStypes} the number of
#' smearing levels and \code{nrObs} the number of operators, both of which are
#' currently fixed to 1.
#' @author Carsten Urbach, \email{curbach@@gmx.de}
#' @seealso \code{\link{readcmidatafiles}}, \code{\link{readbinarydisc}},
#' \code{\link{readcmidisc}}, \code{\link{readcmicor}}
#' @keywords file
#' @examples
#'
#' X <- readbinarycf(path=paste0(system.file(package="hadron"), "/extdata/"),
#'                   files="C2_bin.dat", Time=64, obs=0)
#' X
#' X$cf
#' 
#' @export readbinarycf
readbinarycf <- function(files, 
                         Time, 
                         obs=5, 
                         Nop=1,
                         symmetrise=TRUE,
                         endian="little",
                         op="aver",
                         excludelist=c(""), 
                         sym=TRUE, 
                         path="",
                         hdf5format=FALSE, 
                         hdf5name, 
                         hdf5index=c(1,2)) {
  if(missing(files)) {
    stop("files must be given! Aborting...\n")
  }
  if(Nop < 1) {
    stop("Nop must be larger than 0 and integer, aborting...\n")
  }
  if(obs < 0) {
    stop("obs must be a positive integer, aborting...\n")
  }
  if(Time < 1) {
    stop("Time must be larger than 0 and integer, aborting...\n")
  }
  if(endian != "little" && endian != "big") {
    stop("endian must be either little or big, aborting...\n")
  }
  if(hdf5format) {
    if(missing(hdf5name)) stop("hdf5name must be given, aborting...\n")
    h5avail <- requireNamespace('rhdf5')
    if(!h5avail) stop("rhdf5 package not installed, aborting...\n")
    obs <- 1
    Nop <- 1
    if(length(hdf5index)<2) hdf5index <- c(hdf5index, hdf5index)
  }
  ii <- c(1:(Nop*Time))+obs*Time

  Cf <- complex()
  for(f in files) {
    ifs <- paste(path, f, sep="")
    if( !(ifs %in% excludelist) && file.exists(ifs)) {
      tmp <- numeric()
      if(!hdf5format) {
        to.read <- file(ifs, "rb")
        tmp <- readBin(to.read, what=complex(), n=(obs+Nop)*Time, endian = endian)[ii]
      }
      else {
        LS <- rhdf5::h5ls(ifs)
        if(as.integer(LS[LS$name == hdf5name,]$dim) != Time) {
          stop(paste(hdf5name, "in file", ifs, "has not the correct length, aborting...\n"))
        }
        tmp <- rhdf5::h5read(file=ifs, name=hdf5name)
        tmp <- tmp[,hdf5index[1]] + 1i*tmp[,hdf5index[2]]
      }
      ## we average the replica
      if(Nop > 1) {
        if(op == "aver") {
          tmp <- apply(array(tmp, dim=c(Time, Nop)), 1, mean)
        }
        else {
          tmp <- apply(array(tmp, dim=c(Time, Nop)), 1, sum)
        }
      }

      Cf <- cbind(Cf, tmp[1:Time])
      
      if(!hdf5format) {
        close(to.read)
      }
      else {
        if(exists("rhdf5::H5close")) rhdf5::H5close()
      }
    }
    else if(!file.exists(ifs)) {
      warning("file ", ifs, " does not exist...\n")
    }
  }

  ret <- cf_meta(nrObs = 1, Time=Time, nrStypes = 1, symmetrised = FALSE)
  ret <- cf_orig(ret, cf = t(Re(Cf)), icf = t(Im(Cf)))

  if (symmetrise) {
    sign <- +1
    if (!sym) sign <- -1
    ret <- symmetrise.cf(ret, sign)
  }

  return (invisible(ret))
}


#' Read binary correlation function by sample
#'
#' Read binary correlation functions sample by sample, return as a list of
#' length `nosamples` where increasing indices refer to averaging over
#' increasing numbers of samples.
#'
#' @param files character vector. Paths to the file to read. As `path` is
#' prepended to each element, one can also just pass the filenames here.
#' @param Time numeric. Time extent.
#' @param endian character, either `little` or `big`.
#' @param path character. Path that is prefixed to each of the paths given in
#' `files`.
#' @param excludelist character vector. Elements in `files` that are specified
#' in `excludelist` are skipped. The caller could also just pass
#' `setdiff(files, excludelist)`.
#' @param sym logical. Whether the read data shall be symmetrized in the end.
#' @param ftype numeric type. As the data is read in binary this type has to
#' match exactly the one in the file.
#' @param nosamples number of samples
#'
#' @return
#' Returns a \link{list} of `cf` objects.
#' 
#' @export
readbinarysamples <- function(files, Time=48, nosamples=2, endian="little",
                              excludelist=c(""), sym=TRUE, path="", ftype=double() ){

  if(missing(files)) {
    stop("files must be given! Aborting...\n")
  }
  stopifnot(length(files) > 0)

  if(Time < 1) {
    stop("Time must be larger than 0 and integer, aborting...\n")
  }
  if(endian != "little" && endian != "big") {
    stop("endian must be either little or big, abroting...\n")
  }
  
  Cf <- list()
  for( i in 1:nosamples ){
    Cf[[i]] <- ftype
  }

  for(f in files){
    ifs <- paste(path, f, sep="")
    if( !(ifs %in% excludelist) && file.exists(ifs)){
      to.read <- file(ifs,"rb")
      tmp <- array(readBin(to.read, what=ftype, n=Time*nosamples, endian=endian), dim=c(Time, nosamples))
      close(to.read)
      # average over increasing numbers of samples
      for( i in 1:nosamples ){
        tmp2 <- ftype
        if( i == 1 ){
          tmp2 <- tmp[,1]
        } else {
          tmp2 <- apply(X=tmp[,1:i],MARGIN=1,FUN=mean)
        }
        Cf[[i]] <- cbind(Cf[[i]], tmp2)
      }
    } else if(!file.exists(ifs)) {
      warning("file ", ifs, " does not exist...\n")
    }
  }

  ret <- list()
  for (i in 1:nosamples) {
    ret[[i]] <- cf_meta(nrObs = 1, Time = Time, nrStypes = 1, symmetrised = FALSE)
    ret[[i]] <- cf_orig(ret[[i]], cf = t(Re(Cf[[i]])), icf = t(Im(Cf[[i]])))

    sign <- +1
    if (!sym) sign <- -1
    ret[[i]] <- symmetrise.cf(ret[[i]], sign)
  }

  return (invisible(ret))
}




#' read disconnected loops from binary files
#' 
#' Reads disconnected loops from binary files.
#' 
#' It is assumend that each file contains O*Time complex doubles, where Time is the
#' time extent and O the number of observables in the file. It is assumed that
#' complex is the fastest running index, next time and then observables. The
#' different samples are assumend to be in different files. The file list is
#' assumed to be ordered with number of samples running fastest, and then
#' number of gauges.
#' 
#' @param files list of filenames to be read. Can be created for instance using
#' \code{getorderedfilelist}. The filelist is assumed to be ordered with number
#' of samples running fastest, and the next to fastest nubmer of gauges.
#' @param Time time extent of correlation functions.
#' @param obs each file may contain Time*obs correlation functions. With
#' \code{obs} one choses which observable to read in.
#' @param endian the endianess of the binary file.
#' @param excludelist files to exclude from reading.
#' @param nrSamples the number of samples
#' @param path path to be prepended to every filename.
#' @return returns a list with two arrays \code{cf} and \code{icf} with real
#' and imaginary parts of the loops, and integers \code{Time},
#' \code{nrStypes=1}, \code{nrSamples} and \code{nrObs=1}. Both of the arrays
#' have dimension \code{c(Time, N)}, where \code{N} is the number of measurements
#' (gauges) and \code{Time} the time extent, \code{nrStypes} the number of smearing
#' levels and \code{nrObs} the number of operators, both of which are currently fixed to 1.
#' @author Carsten Urbach, \email{curbach@@gmx.de}
#' @seealso \code{\link{readcmidatafiles}}, \code{\link{readbinarycf}},
#' \code{\link{readcmidisc}}, \code{\link{readcmicor}}
#' @keywords file
#' @examples
#'
#' ## running toy example
#' file <- paste0(system.file("extdata", package = "hadron"), "/C2_pi0.dat")
#' X <- readbinarydisc(files=file, Time=64, obs=0)
#' X$cf
#'
#' ## more realistic example
#' \dontrun{files <- character()}
#' \dontrun{for(i in seq(600,1744,8)) }
#' \dontrun{  files <- c(files, "C2_dis_u_conf", sprintf("%.04d", i), ".dat", sep="")}
#' \dontrun{cf <- readbinarydisc(files, obs=4, excludelist=c("C2_pi0_conf0632.dat"))}
#' 
#' @export readbinarydisc
readbinarydisc <- function(files, Time=48, obs=5, endian="little",
                           excludelist=c(""), nrSamples=1, path="") {
  Cf <- complex()

  N <- length(files)/nrSamples
  if(nrSamples*N != length(files)) {
    stop("readbinarydisc: not the same number of samples per gauge")
  }
  for(f in files) {
    ifs <- paste(path,f,sep="")
    if( !(ifs %in% excludelist) && file.exists(ifs)) {
      to.read <- file(ifs, "rb")
      tmp <- readBin(to.read, complex(), n=(obs+1)*Time, endian = endian)
      Cf <- cbind(Cf, tmp[c((obs*Time+1):((obs+1)*Time))])
      close(to.read)
    }
  }
  Cf <- array(Cf, dim=c(Time, nrSamples, N))

  cf <- cf_meta(Time = Time)
  cf <- cf_orig(cf, cf = Re(Cf), icf = Im(Cf))
  cf <- cf_smeared(cf, scf = NA, iscf = NA, nrSamples = nrSamples, obs = obs)

  return (invisible(cf))
}



#' reads disconnected loops in cmi format
#' 
#' reads disconnected loops in cmi (Chris Michael) format from a list of files.
#' 
#' 
#' @param files list of filenames to be read. Can be created using
#' \code{getorderedfilelist}.
#' @param obs index of operator to parse from files
#' @param ind.vec vector containing the index (column in file) of obs, t,
#' samples, Re(local), Im(local, Re(smeared), Im(smeared).
#' @param excludelist files to exclude from reading.
#' @param skip lines to skip at beginning of each file.
#' @param colClasses The column data type classes, the \code{read.table}.
#' @param L the spatial lattice extent, set to \code{Time/2} if missing.
#' @param debug setting debug to TRUE makes the routine more verbose by
#' spilling out separate filenames.
#' @return returns a list with four arrays \code{cf}, \code{icf} \code{scf} and
#' \code{sicf} containing real and imaginary parts of the local and smeared
#' loops, respectively, and integers \code{Time}, \code{nrStypes=2},
#' \code{nrSamples} and \code{nrObs=1}. The four arrays have dimension
#' \code{c(Time, S, N)}, where \code{S} is the nubmer of samples, \code{Time} is the
#' time extent and \code{N} is the number of measurements (gauges).
#' \code{Time} is the time extent, \code{nrStypes} the number of smearing
#' levels and \code{nrObs} the number of operators, which are currently fixed
#' to 1 and 2, respectively. \code{nrSamples} is the number of samples.
#' 
#' Note that the arrays are normalised by \code{1/sqrt(L^2)}.
#' 
#' The routine expects that all files have identical content. Otherwise the
#' routine will stop.
#' @author Carsten Urbach, \email{curbach@@gmx.de}
#' @seealso \code{\link{readcmidatafiles}}, \code{\link{readbinarycf}},
#' \code{\link{readbinarydisc}}, \code{\link{readcmicor}}
#' @keywords file
#' @examples
#'
#' # a running toy example
#' hpath <- system.file(package="hadron")
#' files <- paste0(hpath, "/extdata/newdisc.0.1373.0.006.k0v4.10")
#' X <- readcmidisc(files=files)
#' X
#'
#' ## a more realistic example
#' \dontrun{v4files <- character()}
#' \dontrun{for(i in seq(600,1744,8))}
#' \dontrun{  v4files <- }
#' \dontrun{   c(v4files, paste("disc.0.163265.0.006.k0v4.", sprintf("%.04d", i), sep=""))}
#' \dontrun{v4data <- readcmidisc(v4files)}
#' @export readcmidisc
readcmidisc <- function(files, obs=9, ind.vec=c(2,3,4,5,6,7,8),
                        excludelist=c(""), skip=0, L,
                        colClasses=c("integer", "integer","integer","integer",
                          "numeric","numeric","numeric","numeric"),
                        debug = FALSE) {
  if(missing(files)) {
    stop("readcmidisc: filelist missing, aborting...\n")
  }
  if(length(ind.vec) != 7) {
    stop("readcmidisc: ind.vec must have length 7, aborting...\n")
  }
  nFiles<-0
  for(f in files) {
    if( !(f %in% excludelist) && file.exists(f)) {
      nFiles<-nFiles+1
      lastFile<-f
    }
  }
  tmp <- read.table(lastFile, colClasses=colClasses, skip=skip)
  tmp <- tmp[tmp[,ind.vec[1]] %in% obs, ]
  nRows <- nrow(tmp)
  nCols <- ncol(tmp)
  ldata <- matrix(0,nFiles*nRows, nCols)

  n <- 0
  for(f in files) {
    if( !(f %in% excludelist) && file.exists(f)) {
      if(debug) print(f)
      tmp <- read.table(f, colClasses=colClasses, skip=skip)

      ldata[c((n*nRows+1):((n+1)*nRows)),] <- as.matrix(tmp[tmp[,ind.vec[1]] %in% obs, ])
      n <- n+1
    }
  }
  Time <- max(ldata[,ind.vec[2]])
  nrSamples <- max(ldata[,ind.vec[3]])

  if(missing(L)) L <- Time/2

  cf <- cf_meta(nrObs = 1, Time = Time, nrStypes = 2)
  cf <- cf_orig(cf,
                cf = array(ldata[, ind.vec[4]], dim=c(Time, nrSamples, nFiles))/sqrt(L^3),
                icf = array(ldata[, ind.vec[5]], dim=c(Time, nrSamples, nFiles))/sqrt(L^3))
  cf <- cf_smeared(cf,
                   scf = array(ldata[, ind.vec[6]], dim=c(Time, nrSamples, nFiles))/sqrt(L^3),
                   iscf= array(ldata[, ind.vec[7]], dim=c(Time, nrSamples, nFiles))/sqrt(L^3),
                   nrSamples = nrSamples,
                   obs = obs)

  return (invisible(cf))
}



#' Read Gradient Flow Output Files in tmLQCD format
#' 
#' given a pathname, reads all gradient flow output files in that directory
#' 
#' This function reads all tmLQCD gradient flow files in the given path and
#' returns a data frame which concatenates them all.
#' 
#' The single files are expected to be in the tmLQCD format which consists of a
#' header with the column names "traj t P Eplaq Esym tsqEplaq tsqEsym Wsym" and
#' the measurement for each flow time in rows. The columns can be ordered
#' arbitrarily as long as the header and the data are consistent.
#' 
#' @param path the path into which the function should descend
#' @param skip number of measurements to skip.
#' @param basename basename of the files to be read.
#' @param col.names column names of the columns in the files to be read. If not
#' given it will be infered from the files, if possible.
#' @return The function returns a data frame ordered first by the flow time and
#' then by the the trajectory number (so the trajectory number is the index
#' which runs fastest). The data frame has column names \itemize{ \item t -
#' flow time \item traj - trajectory number \item P - plaquette expectation
#' value (at flow time t) \item Eplaq - energy density from plaquette
#' definition (at flow time t) \item Esym - energy density from clover
#' definition (at flow time t) \item tsqEplaq - flow time squared multiplied by
#' plaquette energy density \item tsqEsym - flow time squared multiplied by
#' clover energy density \item Wsym - BMW 'w(t)' observable }.
#' @author Bartosz Kostrzewa, \email{bartosz.kostrzewa@@desy.de}
#' @keywords file
#' @examples
#' 
#' path <- system.file("extdata/", package="hadron")
#' raw.gf <- readgradflow(path)
#' 
#' @export readgradflow
readgradflow <- function(path, skip=0, basename="gradflow", col.names) {
  files <- getorderedfilelist(path=path, basename=basename, last.digits=6)
  # the trajectory numbers deduced from the filename
  gaugeno <- getconfignumbers(files, basename=basename, last.digits=6)
  files <- files[(skip+1):length(files)]
  if(length(files)==0) stop(sprintf("readgradflow: no tmlqcd gradient flow files found in path %s",path))

  tmpdata <- data.frame()
  if(missing(col.names)) {
    tmpdata <- read.table(file=files[1],colClasses="numeric",header=TRUE,stringsAsFactors=FALSE)
  }
  else {
    tmpdata <- read.table(file=files[1],colClasses="numeric",col.names=col.names,stringsAsFactors=FALSE)
  }
  ## add the trajectory number, if not present
  if(is.null(tmpdata$traj)) tmpdata$traj <- gaugeno[1]
  
  fLength <- length(tmpdata$t)
  nFiles <- length(files)
  nCols <- ncol(tmpdata)
  ## we generate the full size data.frame first
  tmpdata[,] <- NA
  ldata <- tmpdata
  ldata[((nFiles-1)*fLength+1):(nFiles*fLength),] <- tmpdata
  rm(tmpdata)
  
  pb <- NULL
  pb <- txtProgressBar(min = 0, max = length(files), style = 3)
  for( i in 1:length(files) ){
    setTxtProgressBar(pb, i)
    tmp <- data.frame()
    if(missing(col.names)) {
      tmp <- read.table(file=files[i], colClasses="numeric", header=TRUE, stringsAsFactors=FALSE)
    }
    else {
      tmp <- read.table(file=files[i], colClasses="numeric", col.names=col.names, stringsAsFactors=FALSE)
    }
    if(is.null(tmp$traj)) tmp$traj <- gaugeno[i]
    # the tmlqcd gradient flow routine has the tendency to crash, so we check if the files
    # are complete 
    if( dim( tmp )[1] != fLength ) {
      warning(sprintf("For file %s, number of rows is not correct: %d instead of %d\n",files[i],dim(tmp)[1],fLength) )
      ldata[((i-1)*fLength+1):(i*fLength),] <- NA
    } else {
      ldata[((i-1)*fLength+1):(i*fLength),] <- tmp
    }
  }
  close(pb)

  # keep only rows which contain all data
  ldata <- ldata[complete.cases(ldata),]

  # order by t as outermost index
  ldata <- ldata[order(ldata$t,ldata$traj),]
  rownames(ldata) <- NULL
  return(invisible(ldata))
}
Any scripts or data that you put into this service are public.
hadron documentation built on Sept. 9, 2022, 5:06 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
hadron
Analysis Framework for Monte Carlo Simulation Data in Physics

R/readutils.R
In hadron: Analysis Framework for Monte Carlo Simulation Data in Physics

Documented in extract.loop extract.obs getorderedconfignumbers getorderedfilelist readbinarycf readbinarydisc readbinarysamples readcmicor readcmidatafiles readcmidisc readcmifiles readcmiloopfiles readgradflow readhlcor readnissatextcf readoutputdata readtextcf

Try the hadron package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hadron Analysis Framework for Monte Carlo Simulation Data in Physics

R/readutils.R In hadron: Analysis Framework for Monte Carlo Simulation Data in Physics

Documented in extract.loop extract.obs getorderedconfignumbers getorderedfilelist readbinarycf readbinarydisc readbinarysamples readcmicor readcmidatafiles readcmidisc readcmifiles readcmiloopfiles readgradflow readhlcor readnissatextcf readoutputdata readtextcf

Try the hadron package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hadron
Analysis Framework for Monte Carlo Simulation Data in Physics

R/readutils.R
In hadron: Analysis Framework for Monte Carlo Simulation Data in Physics