R/sliceTS.R

Defines functions sliceTS

Documented in sliceTS

#' @title Slice time series
#'
#' @description Store slices of previously generated time series in the input folder
#' and store them in the output folder.
#'
#' @details The input folder is expected to contain the folder structure as generated by
#' generateTS.R (timeseries subfolder containing experiment subfolders).
#' Sliced time series are stored in the output folder without subfolders.
#'
#' @param slices a matrix with rows and 2 columns, where each row codes the time series identifier, the first column the start and the second the end time point; NA means until end of the time series
#' @param slice.def.path path to a slice definition file, which has two tab-delimited columns for the start and end time points, respectively; NA means until end of the time series
#' @param input.folder location of time series and settings sub folders
#' @param output.folder folder in which all slices go (no sub-folders)
#' @param expIds set of experiment identifiers to process
#' @export

sliceTS<-function(slices=NULL, slice.def.path="", input.folder="", output.folder="", expIds=c()){
  if(input.folder != ""){
    if(!file.exists(input.folder)){
      stop(paste("The input folder",input.folder,"does not exist!"))
    }
    input.timeseries.folder=file.path(input.folder,"timeseries")
    if(!file.exists(input.timeseries.folder)){
      stop("The input folder does not have a time series subfolder!")
    }
  }else{
    stop("Please provide the input folder!")
  }

  if(output.folder != ""){
    if(!file.exists(output.folder)){
      dir.create(output.folder)
    }
  }else{
    stop("Please provide the output folder!")
  }

  if(is.null(slices) && slice.def.path==""){
    stop("Please provide either the matrix with slice definitions or the path to the slice definition file.")
  }

  if(!is.null(slices) && slice.def.path!=""){
    warning("Both the matrix with slice definitions and the path to the slice definition file is provided. The latter is ignored.")
    slice.def.path=""
  }

  if(slice.def.path!=""){
    print(paste("Reading slice definitions from",slice.def.path))
    slices=read.table(slice.def.path,header=FALSE)
  }

  for(expId in expIds){
    print(paste("Processing identifier",expId))

    input.timeseries.name=paste(expId,"timeseries",sep="_")
    input.timeseries.expId.folder=file.path(input.timeseries.folder,input.timeseries.name)
    if(!file.exists(input.timeseries.expId.folder)){
      stop("The input time series folder does not have a subfolder for the input experiment identifier!")
    }

    # read time series file
    ts.name=paste(expId,"timeseries.txt",sep="_")
    input.path.ts=file.path(input.timeseries.expId.folder,ts.name)
    print(paste("Reading time series from:",input.path.ts,sep=" "))
    ts=read.table(file=input.path.ts,sep="\t",header=FALSE)
    ts=as.matrix(ts)
    N=nrow(ts)

    startSlice=slices[expId,1]
    endSlice=slices[expId,2]
    if(is.na(endSlice) || endSlice>ncol(ts)){
      endSlice=ncol(ts)
    }

    if(startSlice >= endSlice){
      stop(paste("The start of the subset definition for time series",expId,"is equal to or larger than the end."))
    }

    slicedTS=ts[,startSlice:endSlice]
    print(paste("Length of the slice",ncol(slicedTS)))

    # save time series
    ts.name=paste(expId,"sliced_timeseries.txt",sep="_")
    ts.path=file.path(output.folder,ts.name)
    write(t(slicedTS),file=ts.path,ncolumns=ncol(slicedTS),sep="\t")

  } # end loop expIds

}
hallucigenia-sparsa/seqtime documentation built on Jan. 9, 2023, 11:53 p.m.