R/blender.R

## Blender data import and processing script
# Functionalized version of blenderprocess and blenderprocessfile
# Eric Ruud, MEI Research Ltd. eruud@meinergy.com
#
# Requires: data$BlenderData for processing data
# Optional: params$files$blenderfile to save filename
# Outputs:

# mark script for NAMESPACE inclusion
#'@export
blender <- function(data, params) {

  ## Settings
  # Derivative window for change detection
  derivative_window <- 2
  # Number of times above mean for break detection
  num_mean <- 1.5
  # Number of points above deriv thold to count as a break
  deriv_thold <- 2
  # How close values need to be to fullscale to avoid cropping
  cutoffpercent <- .02
  # Min number of points for measurement after data cleansing
  minpoints <- 5

  # Check to see if we have been passed the raw file
  # If true, extract filename
  # If false, filename = NULL
  if(!is.null(params$files$blenderfile))
  {
    # Decode file date to char from b64
    b64_decoded_raw <- rawToChar(base64enc::base64decode(params$files$blenderfile))
    # Get row that has filename
    filename <- read.csv(textConnection(b64_decoded_raw), check.names=FALSE, nrows = 1, skip = 0, header = FALSE, stringsAsFactors = FALSE)
    filename <- basename(filename[1,2])
  } else {
    filename <- "NULL"
  }

  # Extract datasets
  import <- data$BlenderData

  # remove notes and alarms (assume enabled for now)
  import <- import[,1:(length(import[1,])-2)]

  # Get offset of MFC data
   MFCoff <- min(which(colnames(import) == "MFCFlow_1"))-1

  # Change the date/time to POSIX format
  # Assume format and time column name (CalRQ files)
  import$Time <- as.POSIXct(import$Time, format = "%Y-%m-%dT%H:%M:%SZ")

  # Remove data with BIOSFlow as a NaN since we can't use it
  # We don't want to do this for MFCs yet since we don't know
  # when each MFC is being tested
  import <- import[complete.cases(import$BIOSFlow),]

  # Calc a data interval for the derivative (seconds)
  data_interval <- median(diff(as.numeric(import$Time)))

  # Find out how many MFCs are present in file (up to 4, sequentially)
  # return as numMFC
  for (i in 1:4)
  {
    if (!is.null(eval(parse(text = paste("import$MFCFlow_",i,sep = "")))))
    {
      numMFC <- i
    }
  }

  # Add an index vector based on rownames
  # (keeps track of BIOs rows lost to NaN)
  import$index <- as.numeric(rownames(import))

  # Initalize a vector for storing data about breakpoints we find in the MFC data
  processing <- matrix(FALSE, length(import[,1]),numMFC)

  # For each MFC, find out when it is in use
  for (i in 1:numMFC)
  {
    # We'll try to figure out where the data intervals are now
    # Take the derivative, assume values above the average of the
    # derivative mean the value has switched.
    mfcderiv <- as.numeric(abs(derivative(import[MFCoff+i], data_interval, derivative_window)))

    # Initialize a vector for counting changes in mfcderiv
    k <- 0

    # Find sequences of mfcderiv above our cutoff for detecting a change in flow
    # (constant * mean deriv)
    cutoff <- mfcderiv > num_mean*mean(mfcderiv)
    for (j in 1:length(mfcderiv))
    {
      if (cutoff[j] == TRUE)
      {
        # start counting as above mean thold
        k <- k + 1
      }
      else {
        # stop counting and check if count > count thold
        if (k >= deriv_thold)
        {
          # record as a break
          processing[j,i] <- TRUE
        }
        # reset count to find next break
        k <- 0
      }
    }
    }
  # Create matric for tracking MFC data
  MFCpts <- matrix(data=NA,nrow=length(processing[,1]),ncol=numMFC)

  # Scan data from each MFC and record breakpoints
  for (i in 1:numMFC)
  {
    # where does the flow change?
    changes <- which(processing[,i])

    # need at least 2 points
    if (length(changes)>1)
    {
      for (j in 1:(length(changes)-1))
      {
        # Store data
        MFCpts[j,i] <- changes[j]
        # Will overwrite if > 1 point set because endpoint is shared with startpoint
        MFCpts[j+1,i] <- changes[j+1]
      }
    }
  }

  # Initialize results matrix
  # not sure how long this will be but it won't be longer than import
  # maybe there is a better way to do this
  # create an index so we can cut it later and keep track of where we are
  results <- matrix(data = NA, nrow = length(import[,1]), ncol = MFCoff + 8, dimnames = list(1:length(import[,1]),c(colnames(import[,1:MFCoff]),"MFC mean","BIOS mean","MFC sdev","Bios sdev", "MFC","Number of Points","Start Point","End point")))
  resultsindex <- 1

  # Calibration processing
  # Cycle through MFCs
  for (i in 1:length(MFCpts[1,]))
  {
    # non-NA data
    breaks <- MFCpts[!is.na(MFCpts[,i]),i]

    # get lengths of breaks
    lengths <- breaks[2:length(breaks)]-breaks[1:length(breaks)-1]

    # make sure we have data to process
    if (!length(lengths) == 0)
    {
      # define limits for outliers as percent of fullscale
      mfcdata <- import[breaks[1]:breaks[length(breaks)],MFCoff+i]
      biosdata <- import[breaks[1]:breaks[length(breaks)],2]

      limit <- max(mfcdata[!is.na(mfcdata)])*cutoffpercent
      bioslimit <- max(biosdata[!is.na(biosdata)])*cutoffpercent

      # Cycle through breaks
      for (j in 1:(length(breaks)-1))
      {
        # get indicies of data in this segment
        points <- breaks[j]:breaks[j+1]

        # If this is the warm up period, omit first half of data
        if (lengths[j] > 1.5 * median(lengths))
        {
          points <- points[round(length(points)/2):length(points)]
        }

        # exclude MFC data with NA
        points <- points[!is.na(import[points,MFCoff+i])]

        # calc mean for cutoff test
        mfcmean = median(import[points,MFCoff+i])
        biosmean = median(import[points,2])

        # Crop out points that fail our cutoff tests
        # MFC test
        mfccut <- abs(import[points,MFCoff+i] - mfcmean) > limit
        bioscut <- abs(import[points,2] - biosmean) > bioslimit

        # save to points vector
        points <- points[!mfccut & !bioscut]

        # combine data for these points
        # exclude Notes and Time for avg
        # check that we have enough points
        if (length(points) > minpoints)
        {
          # OK to add to results matrix
          # first add all non MFC data

          # Date is first value
          results[resultsindex,1] <- import[points[1],1]

          # All others are avg
          results[resultsindex,2:MFCoff] <- colMeans(import[points,2:MFCoff])

          results[resultsindex,(MFCoff+1):(MFCoff+8)] <- c(mean(import[points,MFCoff+i]), mean(import[points,2]), sd(import[points,MFCoff+i]),  sd(import[points,2]), i, length(points), points[1], points[length(points)])

          resultsindex <- resultsindex + 1
        }
      }
    }
  }

  resultstest <- results[!is.na(results[,1]),]

  results <- matrix(0, length(changes)-1, 4)

  results$MFCavg[j] <- mean(import[chages[j]:changes[j+1],4+i])
  results$biosavg[j] <-  mean(import[chages[j]:changes[j+1],4+i])
  results$sdev[j] <-
  results$numpoints[j] <-




}
###
{
  # Check if deriv is above mean
  if (biosderiv[i] > 15*median(biosderiv))
  {
    # if it is, add to the counter vector
    counter <- counter + 1
  }
  else
  {
    # if it's not above the average, check to see if it was previously, for at least 2 points.
    if (counter > 1)
    {
      # if so, note this breakpoint and zero the counter vector to find the next break
      breaks <- append(breaks,i-2)
      counter <- 0
    }
  }
}
eruud/eric.pilr.r documentation built on May 16, 2019, 8:49 a.m.