R/mm_mea.R
In sjemea: Multi electrode analysis

## mm_mea.R --- Code for reading Markus Meister's MEA data.
## Author: Stephen Eglen
## Copyright: GPL
## Mon 15 Jan 2007
## This is code for the array data from Markus Meister (MM)
## not likely to be useful for CARMEN, as it was a handbuilt array
## used for the data that appeared in (Meister et al., 1991) on retinal waves
## in developing retina.


## * Markus's functions.

## These are some constants assumed in Markus's functions.
mm.WrapTime <- 16** 4 * 128        #Clock wraparound in tics, ca 419s.
mm.sample.rate <- 20000.0               #20KHz sample rate.
mm.burst.sep <- 10
mm.num.electrodes <- 63                 #of these, 61 usuable...
## Size of the various data types.
mm.longsize <- 4; mm.floatsize <- 4; mm.intsize <- 2; mm.unsize <- 2


mm.readpos <- function(posfile) {
  ## Read in a .pos file that has been generated by Markus' MAC program.
  x <- read.table(posfile,sep="\t", skip=5, header=FALSE)
  res <- cbind(x$V2, x$V3)
  if (dim(x)[2] == 4) rownames(res) <- as.character(x$V4)
  class(res) <- "mm.pos"
  res
}

## File ~ms/ms_sje_pos.text has been converted into a data file using:
## mmpos <- mm.readpos("~/ms/ms_sje_pos.text")
## save(mmpos, file = "mmpos.rda", ascii=TRUE)
## This file was then put in the data subdirectory.

mm.readpos.compare <- function(NCells, boxes, posfile) {
  ## Generate the multisite positions.
  ## Read in the position file if it was given to compare with my
  ## assignment of channels to electrode positions.
  guess.pos <- array(0, dim=c(NCells,2))
  ##mm.pos <- mm.readpos("~/ms/ms_sje_pos.text")
  ## shouldn't have to load data() each time...
  data(mmpos)
  for (i in 1:NCells) {
    matches <- which(boxes[,1] == i)
    ##cat(paste("matches for cell",i,":",matches, "\n"))
    if (length(matches) == 0) {
      stop(paste("no matches found for cell",i))
    }
    channel <- boxes[matches[1],2]
    guess.pos[i,] <- mmpos[channel,]
  }
  if (is.character(posfile)) {

    if (!file.exists(posfile))
      stop(paste("Position file",posfile,"does not exist"))
    
    ## now read in the spike position file.  Always return this
    ## if it is available.
    pos <- mm.readpos(posfile)
    if (NCells != dim(pos)[1]) {
      stop(paste("NCells against size in this.pos differs",
                 NCells, dim(pos)[1], "\n"))
    }

    ## compare the computed positions with those points read in from
    ## Markus' program.
    diffs <- pos - guess.pos
    dists <- apply(diffs, 1, function(x) { sqrt(sum(x**2))})
    if (any(dists)) {
      warning(paste("some cell positions wrong\n",
                    paste(which(dists >0),
                          signif(dists[which(dists>0)],4), "\n",
                          collapse=" "),"\n"))
    }
  } else {
    ## no data file, so just take guess.
    pos <- guess.pos
  }

  class(pos) <- "mm.pos"
  pos
}

plot.mm.pos <- function(x, use.rownames=FALSE) {
  ## Show the layout of Markus' electrodes within the array.
  range <- c(-300,300)
  plot(x[,1], x[,2], asp=1,
       xlim=range, ylim=range, xlab="", ylab="", type="n")
  if (use.rownames)
    text(x[,1], x[,2], rownames(x))
  else
    text(x[,1], x[,2])
}

read.ms.mm.data <- function(cellname, posfile=NULL) {
  ## Read in the multisite data and return a list with all the relevant
  ## data.  Determine the format of the file then call the appropriate
  ## routine (format1, format2).

  if(is.null(posfile) ) {
    posfile <- paste(cellname, ".pos", sep='')
    if (!file.exists(posfile))
      posfile <- NULL
    else
      cat(paste("guess posfile:",posfile, "\n"))
  }
    
  fp <- file(cellname , 'rb')
  Format <- readBin(fp, integer(), 1, mm.longsize, endian="big")
  close(fp)

  if (Format == 2) {
    cat(paste("Guessing",cellname, "is format 2\n"))
    res <- read.ms.mm.data.format2(cellname, posfile)
  } else {
    cat(paste("Guessing",cellname, "is format 1\n"))
    res <- read.ms.mm.data.format1(cellname, posfile)
  }

  ## meanfiring rate is the number of spikes divided by the (time of
  ## last spike - time of first spike).  
  meanfiringrate <- res$nspikes /
    ( sapply(res$spikes, max) - sapply(res$spikes, min))

  ## Do some things common to both formats.
  dists <- make.distances(res$pos)

  ##mm.distance.breaks <- c(0, 35, 105, 175, 245, 315, 385, 455, 525, 595)
  mm.distance.breaks <- c(0, seq(35, by=70, length=9))
  mm.distance.breaks.strings <-
    levels(cut(0, mm.distance.breaks, right=FALSE, include.lowest=TRUE))

  dists.bins   <- bin.distances(dists, mm.distance.breaks)
  corr.indexes.dt <- 0.05
  corr.indexes <- make.corr.indexes(res$spikes, corr.indexes.dt)
  res$dists <- dists
  res$dists.bins <- dists.bins
  res$corr.indexes <- corr.indexes
  res$corr.indexes.dt <- corr.indexes.dt
  corr.id <- cbind(my.upper(dists), my.upper(corr.indexes))  
  corr.id.means <- corr.get.means(corr.id)
  res$corr.id <- corr.id
  res$corr.id.means <- corr.id.means
  res$distance.breaks <- mm.distance.breaks
  res$distance.breaks.strings <- mm.distance.breaks.strings
  res$rates <- make.spikes.to.frate(res$spikes)
  class(res) <- "mm.s"
  res
}

read.ms.mm.data.format2 <- function(cellname, posfile=NULL) {
  ## Read in the multisite data and return a list with all the relevant
  ## data (format 2).

  ## Get the total size of the file so it can be compared with value
  ## of seek() once all the data have been read in.
  filesize <- file.info(cellname)$size
  
  fp <- file(cellname , 'rb')
  seek(fp,0)
  Format <- readBin(fp, integer(), 1, mm.longsize, endian="big")
  t <- readBin(fp, integer(), 4, mm.longsize, endian="big")
  FileIndex <- t[1]; BoxIndex <- t[2]; RecIndex <- t[3]; StatIndex <- t[4]

  ## Now read the NFiles...
  seek(fp, 64)
  t <- readBin(fp, integer(), 4, mm.intsize, endian="big")
  NFiles <- t[1]; NBoxes <- t[2]; NRecords <- t[3]; NCells <- t[4]

  if (NFiles>1)
    warning(paste("NFiles larger than 1 - check ok? - e.g. endTimes",
                  NFiles, "\n"))
  
  t <- readBin(fp, integer(), 2, mm.longsize, endian="big")
  NEvents <- t[1]; NSpikes <- t[2]
  cat(paste("NEvents", NEvents, "NSpikes", NSpikes, "\n"))

  ## Read in the fileinfo.
  if (seek(fp) != FileIndex)
    stop("error - current file position different from expected FileIndex")

  seek(fp, FileIndex)
  for (r in 1:NFiles) {
    vrn <- readBin(fp, integer(), 1, mm.intsize, endian="big")
    pfilename <- readChar(fp, 64)
    pdirname <- readChar(fp, 64)
    flcrdat  <- readBin(fp, integer(), 1, mm.longsize, endian="big")
    t <- readBin(fp, integer(), 3, mm.intsize, endian="big")
    LowRecord <- t[1]; nrec <- t[2]; LastRecord <- t[3];
    cat(paste("File", r, "name", pfilename, "dir", pdirname,
              "nrec", nrec, "LastRecord", LastRecord, "\n"))
  }

  ## Read in the Box Info ####################
  if (seek(fp) != BoxIndex)
    stop("error - current file position different from BoxIndex")

  ## Make an array to store the boxes.
  boxes <- array(0, dim= c(NBoxes, 7))
  ## todo -- determine how these boxes relate to position of neurons.
  seek(fp, BoxIndex)
  for (r in 1:NBoxes) {
    t <- readBin(fp, integer(), 7, mm.intsize, endian="big")
    group <- t[1]; channel <- t[2]; plott <- t[3]
    ##cat(paste("Box", r, "Group", group, "Chan", channel,
    ##"Plot", plott, "bounds", t[4], t[5], t[6], t[7], "\n"))
    boxes[r,] <- t
  }


  ## now parse the RecIndex... ####################

  if (seek(fp) != RecIndex)
    stop(paste("seek position different from expected RecIndex",
               seek(fp), RecIndex))

  ## RecIndex points to an array of length NRecords, each which points
  ## to the start of the rth record.

  seek(fp,RecIndex)
  RecordIndexes <- readBin(fp, integer(), NRecords, mm.longsize, endian="big")

  ## Parse each record...

  startclock <- integer(NRecords)
  endclock   <- integer(NRecords)
  starttimes <- integer(NRecords)       #to be calculated...
  endtimes   <- integer(NRecords)
  nevents    <- integer(NRecords)
  nspikes.rec<- integer(NRecords)

  spikecount <- 0
  eventcount <- 0
  laststop   <- 0

  ## Make an empty list of size NCells.  Each element will be a list.
  allspikes <- list()
  for (i in 1:NCells) {
    allspikes[i] <- list()
  }
  EndTime <- 0;                        #todo: start of each file?

  for (r in 1:NRecords) {

    if ((laststop >0) && (seek(fp) != laststop)) {
      stop(paste("Error: RecordIndex and position of last byte differ",
                 "Record", r, "start", start, "laststop", laststop))
    }
    
    seek(fp, RecordIndexes[r])
    startclock[r] <- readBin(fp, integer(), 1, mm.unsize, signed=FALSE,
                             endian="big")
    endclock[r]   <- readBin(fp, integer(), 1, mm.unsize, signed=FALSE,
                             endian="big")
    nevents[r]    <- readBin(fp, integer(), 1, mm.longsize, endian="big")
    nspikes.rec[r]<- readBin(fp, integer(), 1, mm.longsize, endian="big")


    ShiftTime <- (EndTime %/% mm.WrapTime) * mm.WrapTime
    while ( ( (startclock[r] * 128) + ShiftTime) < EndTime)
      ShiftTime <- ShiftTime + mm.WrapTime

    starttimes[r] <- (startclock[r] * 128) + ShiftTime
    
    cat(paste(r, "clock", startclock[r], endclock[r], "#events", nevents[r],
              "#spikes", nspikes.rec[r], "\n"))
    spikecount <- spikecount + nspikes.rec[r]
    eventcount <- eventcount + nevents[r]
    ## Read in the number of spikes from each cell in record r.
    spikespercell <- readBin(fp, integer(), NCells, mm.longsize, endian="big")

    ## Read in the time of event.
    eventsinrecord <- readBin(fp, integer(), nevents[r],mm.longsize, endian="big")

    ## width of events
    we <- readBin(fp, integer(), nevents[r], mm.intsize, endian="big")

    ## peak of events
    pe <- readBin(fp, integer(), nevents[r], mm.intsize, endian="big")

    ## time of each spike from each cell in record r
    TLast <- -1
    for (cell in 1:NCells) {
      nspikescell <- spikespercell[cell]
      spiketimes <- readBin(fp, integer(), nspikescell, mm.longsize, endian="big")


      if (nspikescell >0 ) {
        spiketimes <- spiketimes + ShiftTime
        lastspiketime <- spiketimes[nspikescell]
        if ( TLast < lastspiketime)
          TLast <- lastspiketime
      }
      
      if (r == 1)
        allspikes[cell] <- list(spiketimes)
      else
        allspikes[cell] <- list(c(allspikes[[cell]],spiketimes))
    }


    ## Now check the end time.
    ##cat(paste("before: EndTime", EndTime, "TLast", TLast, "\n"))
    if (EndTime < TLast)
      EndTime <- TLast

    endtimes[r] <- ( (endclock[r]+1)* 128) +
      ((EndTime %/% mm.WrapTime) * mm.WrapTime)

    if (endtimes[r] < EndTime) {
      endtimes[r] <- endtimes[r] + mm.WrapTime
      cat(paste("wraptime added", mm.WrapTime, "\n"))
    }

    EndTime <- endtimes[r]
    
    ## this is the end of the loop for this record.
    laststop <- seek(fp)                     # used for counting purposes.



  }
  if (spikecount != NSpikes)
    stop(paste ("spikecount differs from expected value",
                spikecount, NSpikes))

  if (eventcount != NEvents)
    stop(paste ("eventcount differs from expected value",
                eventcount, NEvents))

  ## Check the C values
  if (seek(fp) != StatIndex)
    stop(paste ("StatIndex problem", stop, StatIndex))

  C <- readBin(fp, integer(), NCells, mm.intsize, endian="big")
  SpikesInCell <- readBin(fp, integer(), NCells, mm.longsize, endian="big")

  if (( sum(SpikesInCell) != NSpikes))
    stop("Error in the total number of spikes in cell")

  ## Can also check SpikesInCell with the sum of spikes
  count.allspikes <- sapply(allspikes, length)
  if (sum(abs(count.allspikes - SpikesInCell)) > 0)
    stop("Counts of spikes differs...")

  Pe <- readBin(fp, numeric(), NCells, mm.floatsize, endian="big")
  Wi <- readBin(fp, numeric(), NCells, mm.floatsize, endian="big")
  PP <- readBin(fp, numeric(), NCells, mm.floatsize, endian="big")
  WP <- readBin(fp, numeric(), NCells, mm.floatsize, endian="big")
  WW <- readBin(fp, numeric(), NCells, mm.floatsize, endian="big")
  CrossF <- readBin(fp, numeric(), NCells*mm.num.electrodes, mm.floatsize, endian="big")
  CrossR <- readBin(fp, numeric(), NCells*mm.num.electrodes, mm.floatsize, endian="big")

  dim(CrossF) <- c(NCells,mm.num.electrodes)
  dim(CrossR) <- c(NCells,mm.num.electrodes)
  if ( seek(fp) != filesize)
    stop(paste("difference at end of file", seek(fp), filesize))

  ## End of processing this file.
  close(fp)

  pos <- mm.readpos.compare(NCells, boxes, posfile)

  ## Convert spike times into seconds.
  allspikes <- lapply(allspikes, function(x) { x / mm.sample.rate})

  ## check that the spikes are monotonic.
  check.spikes.monotonic(allspikes)
  bursts <- lapply(allspikes, function(x) spikes.to.bursts(x, mm.burst.sep))

  ## Check that the number of spikes matches the number we return in "spikes"
  if (NSpikes != sum(sapply(allspikes,length)))
    stop("NSpikes and actual number of spikes differ")

  
  res <- list (NFiles=NFiles, NBoxes=NBoxes, NRecords = NRecords,
               NSpikes=NSpikes, NEvents=NEvents,
               startclock=startclock, endclock=endclock,
               nevents=nevents, nspikes.rec=nspikes.rec,
               starttimes=starttimes,
               endtimes=endtimes,
               NCells=NCells, boxes=boxes, C=C,
               spikes=allspikes,
               nspikes=sapply(allspikes, length),
               bursts=bursts,
               CrossF=CrossF, CrossR=CrossR, Pe=Pe,
               file=cellname,
               pos=pos)
  class(res) <- "mm.s"
  res
}

read.ms.mm.data.format1 <- function(cellname, posfile=NULL) {
  ## Read in the multisite data and return a list with all the relevant
  ## data.  This assumes the data is in format 1.

  ## Get the total size of the file so it can be compared with value
  ## of seek() once all the data have been read in.
  filesize <- file.info(cellname)$size

  fp <- file(cellname , 'rb')
  seek(fp,0)
  t <- readBin(fp, integer(), 4, mm.intsize, endian="big")
  
  NFiles <- t[1]; NBoxes <- t[2]; NRecords <- t[3]; NCells <- t[4]

  t <- readBin(fp, integer(), 2, mm.longsize, endian="big")
  NSpikes <- t[1]; NEvents <- t[2];
  
  cat(paste("NFiles", NFiles, "NBoxes", NBoxes, "NRecords", NRecords,
            "NCells", NCells, "\n"))
  cat(paste("NEvents", NEvents, "NSpikes", NSpikes, "\n"))


  ## Read in the file information
  for (r in 1:NFiles) {
    vrn <- readBin(fp, integer(), 1, mm.intsize, endian="big")
    pfilename <- readChar(fp, 64)
    pdirname <- readChar(fp, 64)
    flcrdat  <- readBin(fp, integer(), 1, mm.longsize, endian="big")
    t <- readBin(fp, integer(), 3, mm.intsize, endian="big")
    LowRecord <- t[1]; nrec <- t[2]; LastRecord <- t[3];
    cat(paste("File", r, "name", pfilename, "dir", pdirname,
              "nrec", nrec, "LastRecord", LastRecord, "\n"))
  }

  ## Read in the Box Info ####################
  boxes <- array(0, dim= c(NBoxes, 7))
  for (r in 1:NBoxes) {
    t <- readBin(fp, integer(), 7, mm.intsize, endian="big")
    group <- t[1]; channel <- t[2]; plott <- t[3]
    ##cat(paste("Box", r, "Group", group, "Chan", channel,
    ##"Plot", plott, "bounds", t[4], t[5], t[6], t[7], "\n"))
    boxes[r,] <- t
  }

  startclock <- readBin(fp, integer(), NRecords,mm.unsize,signed=FALSE,
                        endian="big")
  endclock   <- readBin(fp, integer(), NRecords,mm.unsize,signed=FALSE,
                        endian="big")

  cat("start and end times\n")
  print(startclock); print(endclock)
  SpikesInRecords <- readBin(fp, integer(), NRecords, mm.longsize, endian="big")
  SpikesInCell    <- readBin(fp, integer(),   NCells, mm.longsize, endian="big")


  ## This seems to duplicate the information in the boxes.
  C              <- readBin(fp, integer(),   NCells,  mm.intsize, endian="big")
  ## cat("about to print C\n");   print(C)
  ##cat(paste("after reading C, file pos is", seek(fp), "\n"))


  ##   Pe <- readBin(fp, numeric(), NCells, doublesize, endian="big")
  ##   print(Pe[1:20])
  ##   Wi <- readBin(fp, numeric(), NCells, doublesize, endian="big")
  ##   PP <- readBin(fp, numeric(), NCells, doublesize, endian="big")
  ##   WP <- readBin(fp, numeric(), NCells, doublesize, endian="big")
  ##   WW <- readBin(fp, numeric(), NCells, doublesize, endian="big")

  ##   CrossF <- readBin(fp, numeric(), NCells*mm.num.electrodes, doublesize, endian="big")
  ##   CrossR <- readBin(fp, numeric(), NCells*mm.num.electrodes, doublesize, endian="big")
  ##   dim(CrossF) <- c(NCells,mm.num.electrodes)
  ##   dim(CrossR) <- c(NCells,mm.num.electrodes)

  ## double is 10 bytes according to my calculations.
  ## Markus acknowledges that the double format in ThinkC (MAC) is curious
  ## so  for now, I'm just reading in blocks of 10 bytes.  131 is
  ## derived from (mm.num.electrodes + mm.num.electrodes) + 5 
  tempstuff <- readBin(fp, integer(), NCells*131*10/2, 2, endian="big")
  
  ## Number of spikes from each cell in each record
  N <- readBin(fp, integer(), NCells*NRecords, mm.longsize, endian="big")
  dim(N) <- c(NCells,NRecords)
  ##cat("N\n");print(N)

  ## The N array is useful for knowing which spikes belong to which cell
  ## and which record.
  if (any(apply(N, 2, sum) != SpikesInRecords))
    stop("SpikesInRecords and Col sum of N differ")

  ## All spike times for each cell (and for each record) are read
  ## in at once, and stored in a big T array.
  ## Time of each spike from each cell in each record
  my.num.spikes <- sum(N)
  cat(paste("my.num.spikes", my.num.spikes, "\n"))
  T <- readBin(fp, integer(), my.num.spikes, mm.longsize, endian="big")

  breaks <- as.vector(N)
  high <- cumsum(breaks)
  low <- c(1, high[1:(length(high)-1)]+1)

  spikes <- apply(cbind(low, breaks), 1,
                  function (i) {
                    num <- i[2];
                    if (num > 0)        #return spikes if there are some
                      res <- T[i[1]:(i[1]+(num-1))]
                    else                #return empty vector
                      res <- numeric(0)
                    res
                  })

  starttimes <- integer(NRecords)       #to be calculated...
  endtimes   <- integer(NRecords)
  nevents    <- integer(NRecords)
  nspikes.rec<- integer(NRecords)

  ## Make an empty list of size NCells.  Each element will be a list.
  allspikes <- list()
  for (i in 1:NCells) allspikes[i] <- list()
  EndTime <- 0;                        #todo: start of each file?

  for (r in 1:NRecords) {

    ShiftTime <- (EndTime %/% mm.WrapTime) * mm.WrapTime
    while ( ( (startclock[r] * 128) + ShiftTime) < EndTime)
      ShiftTime <- ShiftTime + mm.WrapTime

    starttimes[r] <- (startclock[r] * 128) + ShiftTime
    
    cat(paste(r, "clock", startclock[r], endclock[r], "\n"))

    ## time of each spike from each cell in record r
    TLast <- -1
    for (cell in 1:NCells) {
      ## spikes for this record and cell.
      spiketimes <- spikes[[((r-1)*NCells)+cell]]
      nspikescell <- length(spiketimes)

      if (nspikescell >0 ) {
        spiketimes <- spiketimes + ShiftTime
        lastspiketime <- spiketimes[nspikescell]
        if ( TLast < lastspiketime)
          TLast <- lastspiketime
      }
      if (r == 1)
        allspikes[cell] <- list(spiketimes)
      else
        allspikes[cell] <- list(c(allspikes[[cell]],spiketimes))
    }

    ## Now check the end time.
    ##cat(paste("before: EndTime", EndTime, "TLast", TLast, "\n"))
    if (EndTime < TLast)
      EndTime <- TLast

    endtimes[r] <- ( (endclock[r]+1)* 128) +
      ((EndTime %/% mm.WrapTime) * mm.WrapTime)

    if (endtimes[r] < EndTime) {
      endtimes[r] <- endtimes[r] + mm.WrapTime
      cat(paste("wraptime added", mm.WrapTime, "\n"))
    }

    EndTime <- endtimes[r]
    
  }                                   #next record.
  
  ## Number of events in each record
  EventsInRecord <- readBin(fp, integer(), NRecords, mm.longsize, endian="big")


  my.num.events <- sum(EventsInRecord)
  if (my.num.events > 0) {
    warning(paste("we have some events... oh oh!", my.num.events, "\n"))
    ## todo: need to read in TE, WE, PE if we have any events.
  }
  TE <- readBin(fp, integer(), my.num.events, mm.longsize, endian="big")
  WE <- readBin(fp, integer(), my.num.events, mm.longsize, endian="big")
  PE <- readBin(fp, integer(), my.num.events, mm.longsize, endian="big")

  ## should now be at the end of the file, so can check file length.

  if ( seek(fp) != filesize)
    stop(paste("difference at end of file", seek(fp), filesize))

  ## End of processing this file.
  close(fp)

  pos <- mm.readpos.compare(NCells, boxes, posfile)
  
  allspikes <- lapply(allspikes, function(x) { x / mm.sample.rate})
  
  ## check that the spikes are monotonic.
  check.spikes.monotonic(allspikes)

  ## Check that the number of spikes matches the number we return in "spikes"
  if (NSpikes != sum(sapply(spikes,length)))
    warning("NSpikes and actual number of spikes differ")
  
  bursts <- lapply(allspikes, function(x) spikes.to.bursts(x, mm.burst.sep))
  
  res <- list (NFiles=NFiles, NBoxes=NBoxes, NRecords = NRecords,
               NSpikes=NSpikes, NEvents=NEvents,
               startclock=startclock, endclock=endclock,
               nevents=NEvents,
               nspikes.rec=nspikes.rec, # #of spikes per record.
               starttimes=starttimes,
               endtimes=endtimes,
               NCells=NCells, boxes=boxes, C=C,
               spikes=allspikes,
               nspikes=sapply(allspikes, length), # #of spikes/cell.
               bursts=bursts,
               T=T,
               ##CrossF=CrossF, CrossR=CrossR, Pe=Pe,
               file=cellname,
               N=N,
               SpikesInRecords=SpikesInRecords,
               SpikesInCell=SpikesInCell,
               pos=pos)
  class(res) <- "mm.s"
  res
}