R/beadarrayMSV.R

Defines functions readBeadSummaryOutput getNormInd scatterArrays shearRawSignal normalizeIllumina getNoiseDistributions transformChannels plotEstimatedNoise transformSEs normalizeShearedChannels createAlleleSet cart2pol createAlleleSetFromFiles createMultiSetFromFiles callGenotypes callGenotypes.verboseTest callGenotypes.interactive findSeTheta generatePolyCenters getCenters getSpecificCenters findClusters findPolyploidClusters testHardyWeinberg validateCallsPedigree countFailedSNP plotGenotypes manualCall validateSingleCall setNormOptions setGenoOptions setMergeOptions unmixParalogues getSingleCalls assignToAlleleSet translateTheta makeDiploidCalls translateThetaCombined translateThetaFromFiles resolveInheritanceSNP locateParalogues plotCountsChrom assignParalogues preprocessBeadSet plotPreprocessing makeFilenames modifyExistingFilenames writeAlleleSet

Documented in assignParalogues assignToAlleleSet callGenotypes callGenotypes.interactive callGenotypes.verboseTest cart2pol countFailedSNP createAlleleSet createAlleleSetFromFiles createMultiSetFromFiles findClusters findPolyploidClusters findSeTheta generatePolyCenters getCenters getNoiseDistributions getNormInd getSingleCalls getSpecificCenters locateParalogues makeDiploidCalls makeFilenames manualCall modifyExistingFilenames normalizeIllumina normalizeShearedChannels plotCountsChrom plotEstimatedNoise plotGenotypes plotPreprocessing preprocessBeadSet readBeadSummaryOutput resolveInheritanceSNP scatterArrays setGenoOptions setMergeOptions setNormOptions shearRawSignal testHardyWeinberg transformChannels transformSEs translateTheta translateThetaCombined translateThetaFromFiles unmixParalogues validateCallsPedigree validateSingleCall writeAlleleSet

#New class-definitions
setClass('BeadSetIllumina',contains='eSet', prototype =
         prototype(new("VersionedBiobase",
                       versions = c(classVersion("eSet"), BeadSetIllumina = "1.0.0"))))
setMethod("initialize", "BeadSetIllumina",
          function(.Object,
                   assayData = assayDataNew(R = R, se.R = se.R, G = G, se.G = se.G,
                     no.beads = no.beads, storage.mode = 'list'),
                   R = new("matrix"), se.R = new("matrix"), G = new("matrix"),
                   se.G = new("matrix"), no.beads = new("matrix"), ...) {
            if (!missing(assayData) &&
                any(!missing(R),!missing(G),!missing(se.R),!missing(se.G),!missing(no.beads))){
              warning("Using 'assayData'; ignoring 'R', 'G', 'se.R', 'se.G', 'no.beads'")
            }
            callNextMethod(.Object, assayData = assayData, ...)
          })
setValidity("BeadSetIllumina", function(object) {
  val <- assayDataValidMembers(assayData(object), c("R", "se.R", "G", "se.G", "no.beads"))
  if (is.logical(val)){
    val <- storageMode(object) %in% "list"
    if (!val){
      val <- "\nRequired storageMode: 'list'"
    }
  }
  val
})

setClass('AlleleSetIllumina',contains='eSet', prototype =
         prototype(new("VersionedBiobase",
                       versions = c(classVersion("eSet"), AlleleSetIllumina = "1.0.0"))))
setMethod("initialize", "AlleleSetIllumina",
          function(.Object, intensity = new("matrix"), theta = new("matrix"),
                   SE = new("matrix"), ...) {
            callNextMethod(.Object, intensity = intensity, theta = theta, SE = SE,
                           storage.mode = 'list', ...)
          })
setValidity("AlleleSetIllumina", function(object) {
  val <- assayDataValidMembers(assayData(object), c("intensity", "theta", "SE"))
  if (is.logical(val)){
    val <- storageMode(object) %in% "list"
    if (!val){
      val <- "\nRequired storageMode: 'list'"
    }
  }
  val
})



#Reads iScan's summary files. Uses code from 'readIllumina()' {beadarray}
readBeadSummaryOutput <- function(arrayNames=NULL,path='.',pattern='beadTypeFile.txt',recursive=FALSE,sep=',',fullPaths=NULL,sepchar='_',prList=NULL){
  #Locate files
  if (is.null(fullPaths))
    fullPaths <- dir(path=path,pattern=pattern,recursive=recursive)
  if (length(fullPaths) == 0) 
    stop(paste("No files with extension", pattern, "found"))
  tmp <- unlist(strsplit(fullPaths,'/'))
  nRec <- length(tmp)/length(fullPaths)
  indNames <- vector(length=length(tmp))
  indNames[seq(nRec,length(tmp),nRec)] <- TRUE
  rFiles <- tmp[indNames]
  rPaths <- rep('',length(fullPaths))
  if (length(grep('/',fullPaths)) == length(fullPaths)){
    if (nRec>1){
      for (i in 1:(nRec-1)){
        indi <- seq(i,length(tmp),nRec)
        rPaths <- paste(rPaths,tmp[indi],'/',sep='')
      }
    }
  }
  arrays <- strtrim(rFiles, nchar(rFiles) - nchar(pattern) - 1)
  if (!is.null(arrayNames)) {
    arrayNames <- gsub(paste('_',pattern,sep=''), "", arrayNames)
    ind <- which(arrays %in% arrayNames)
    if (length(ind) == 0) 
      stop("'arrayNames' did not match with files in specified path(s)")
    arrays <- arrays[ind]
    rFiles <- rFiles[ind]
    rPaths <- rPaths[ind]
  }
  nArrays <- length(arrays)
  message('Found ', nArrays, ' arrays')
  
  #Prepare phenoData
  arrayInfo <- data.frame(arrayNames=as.character(arrays),no.beads=rep(0,nArrays),stringsAsFactors=FALSE,row.names=arrays)
  if (length(grep(sepchar,arrays)) == nArrays){
    tmp <- unlist(strsplit(arrays,sepchar))
    if (length(tmp) == nArrays*3){
      arrayInfo$chip <- tmp[seq(1, length(tmp), by=3)]
      arrayInfo$row <- tmp[seq(2, length(tmp), by=3)]
      arrayInfo$col <- tmp[seq(3, length(tmp), by=3)]
    }else{
      arrayInfo$chip <- tmp[seq(1, length(tmp), by=2)]
      arrayInfo$row <- tmp[seq(2, length(tmp), by=2)]
      arrayInfo$col <- rep(1, length(arrayInfo$chip))
    }
  }
  labelDesc <- c('Array ID','No. detected beads','Chip ID','Array ID within chip','Strip ID within array')
  phenoMetadata <- data.frame(labelDescription=labelDesc,row.names=colnames(arrayInfo))
  
  #Read assayData
  filesFull <- file.path(path, paste(rPaths,rFiles,sep=""))
  fc <- file(filesFull[1],open="r")
  header <- strsplit(readLines(fc,n=1),sep)
  if (!all(unlist(header)==c("Illumicode","N","Mean GRN","Dev GRN","Mean RED","Dev RED")))
    stop(paste('Unknown fields in header:',header))
  dat <- scan(file=fc,sep=sep,quiet=TRUE,
              what=list(ProbeID=integer(0),no.beads=integer(0),G=integer(0),sd.G=integer(0),
                R=integer(0),sd.R=integer(0)))
  close(fc)
  if (is.null(prList)){
    prList <- dat$ProbeID
  }
  if (length(prList)>length(dat$ProbeID)){
    stop(paste('The array',arrays[1],'contains fewer probes than specified in "prList"'))
  }else if (length(prList)<length(dat$ProbeID)){
    warning(paste('The array',arrays[1],'contains more probes than specified in "prList"'))
  }
  indP <- dat$ProbeID %in% prList #sapply(prList,function(x,ProbeID) which(ProbeID%in%x)[1],dat$ProbeID)
  if (!identical(prList,dat$ProbeID[indP])){
    stop(paste('The markers in array',arrays[1],'deviate from those in "prList"\n or are sorted differently.'))
  }
  nProbes <- length(prList)
  G <- se.G <- R <- se.R <- no.beads <-
    matrix(nrow=nProbes,ncol=nArrays,dimnames=list(prList,arrays))
  G[,1] <- dat$G[indP]
  se.G[,1] <- dat$sd.G[indP]/sqrt(dat$no.beads[indP])
  R[,1] <- dat$R[indP]
  se.R[,1] <- dat$sd.R[indP]/sqrt(dat$no.beads[indP])
  no.beads[,1] <- dat$no.beads[indP]
  if (nArrays>1){
    for (i in 2:nArrays){
      fc <- file(filesFull[i],open="r")
      dat <- scan(file=fc,sep=sep,quiet=TRUE,skip=1,
        what=list(ProbeID=integer(0),no.beads=integer(0),G=integer(0),sd.G=integer(0),
          R=integer(0),sd.R=integer(0)))
      close(fc)
      if (identical(prList,dat$ProbeID)){
        indP <- rep(TRUE,nProbes)
      }else{
        indP <- dat$ProbeID %in% prList
        if (!identical(prList,dat$ProbeID[indP])){
          stop(paste('Different probe-sets!\n',length(prList),'probes in "prList",',length(dat$ProbeID),'probes in array',arrays[i]))
        }
        warning(paste('The array',arrays[i],'contains more probes than specified in "prList"'))
      }
      G[,i] <- dat$G[indP]
      se.G[,i] <- dat$sd.G[indP]/sqrt(dat$no.beads[indP])
      R[,i] <- dat$R[indP]
      se.R[,i] <- dat$sd.R[indP]/sqrt(dat$no.beads[indP])
      no.beads[,i] <- dat$no.beads[indP]
    }
  }
  arrayInfo$no.beads <- apply(no.beads,2,sum)
  phenoData = new("AnnotatedDataFrame", data=arrayInfo, varMetadata=phenoMetadata)
  featureInfo <- data.frame(Address=prList,row.names=prList)
  featureMetadata <- data.frame(labelDescription="Unique strand identifier",row.names=colnames(featureInfo))
  featureData = new("AnnotatedDataFrame", data=featureInfo, varMetadata=featureMetadata)
  
  #Create summary data
  BSData = new('BeadSetIllumina', R=R, se.R=se.R, G=G, se.G=se.G, no.beads=no.beads,
    phenoData=phenoData, annotation='illuminaProbeIDs',
    featureData=featureData)
  sampleNames(BSData) <- make.names(sampleNames(BSData))
  featureNames(BSData) <- make.names(featureNames(BSData))
  rm(G,R,se.G,se.R,no.beads,arrayInfo,featureInfo,phenoData,featureData); gc()
  BSData
}



#Retrieves indexes to probes corresponding to specified Norm.ID's
getNormInd <- function(beadInfo,featureNames,normID=NULL,verbose=TRUE){
  if (is.null(normID))
    normID <- unique(beadInfo$Norm.ID)   #use all
  normInd <- matrix(FALSE,nrow=length(featureNames),ncol=length(normID),
                    dimnames=list(sub('X','',featureNames),normID))
  for (i in 1:length(normID)){
    indi <- beadInfo$Norm.ID==normID[i]
    probeIDs <- beadInfo$Address[indi]
    if (nchar(normID[i])>1)
      probeIDs <- c(probeIDs,beadInfo$Address2[indi])
    normInd[as.character(probeIDs),i] <- TRUE
  }
  rownames(normInd) <- featureNames
  if (verbose){
    cat('Sum Norm.IDs:\n')
    print(apply(normInd,2,sum))
  }
  normInd
}



#Plots A vs. B, or R vs. G, depending on BSData
scatterArrays <- function(BSData,arrays,markers=seq(1,nrow(BSData),5),smooth=TRUE,newFigure=TRUE,maxPlots=72,...){
  an <- sampleNames(BSData)[arrays]
  nSelect <- length(an)
  if (newFigure){
    if (nSelect>maxPlots)
      stop("Number of arrays for plotting exceeds 'maxPlots'")
    pp <- par()
    nC <- ceiling(sqrt(nSelect))
    nR <- ceiling(nSelect/nC)
    par(mfrow=c(nR,nC),mai=c(1.2,1,1,.5)/sqrt(nR),...)
  }else if (nSelect>1){
    stop("Only one array allowed when 'newFigure'==FALSE")
  }
  if (all(c('A','B') %in% names(assayData(BSData)))){
    x <- 'A'
    y <- 'B'
  }else if (all(c('R','G') %in% names(assayData(BSData)))){
    x <- 'R'
    y <- 'G'
  }else{
    stop('Cartesian coordinate intensities required in assayData.')
  }
  for (i in 1:nSelect){
    if (smooth){
      #require('geneplotter')
      smoothScatter(assayData(BSData)[[x]][markers,an[i]],assayData(BSData)[[y]][markers,an[i]],main=an[i],xlab=x,ylab=y)
    }else{
      plot(assayData(BSData)[[x]][markers,an[i]],assayData(BSData)[[y]][markers,an[i]],main=an[i],xlab=x,ylab=y,pch='.')
    }
    xy <- range(par('usr'))
    lines(xy,xy,col='green')
    lines(xy,c(0,0),col='green')
    lines(c(0,0),xy,col='green')
  }
  if (newFigure){
    par(pp[c('mfcol','mai')])
  }
}



#Rotate and shear BEFORE any other transf.
shearRawSignal <- function(BSData,plot=FALSE,newFigure=plot,normOpts=setNormOptions(),maxPlots=72,...){
  nArrays <- ncol(BSData)
  if (newFigure){
    if (nArrays>maxPlots)
      stop("Number of arrays for plotting exceeds 'maxPlots'")
    pp <- par()
    nC <- ceiling(sqrt(nArrays))
    nR <- ceiling(nArrays/nC)
    par(mfrow=c(nR,nC),mai=c(1,1,1,.5)/sqrt(nC))
  }else if (plot & nArrays>1){
    stop("Only one array allowed when 'plot'==TRUE and 'newFigure'==FALSE")
  }
  message('Rotating and shearing raw signal...')
  for (i in 1:nArrays){
    rawData <- list()
    rawData$x <- assayData(BSData)$R[,i]
    rawData$y <- assayData(BSData)$G[,i]
    trData <- normalizeIllumina(rawData,normOpts=normOpts,plot=plot,...)
    assayData(BSData)$R[,i] <- trData$x
    assayData(BSData)$G[,i] <- trData$y
    gc()
  }
  if (newFigure) par(pp[c('mfcol','mai')])
  validObject(BSData)
  BSData
}



#If 'indTrain' is a list containing indexes to red and green infinium-I
#probes, lines are fitted through these data-points only. Otherwise, same
#probes are used for finding both red and grean shear. Default: all probes.
#NB! Be aware of how xBase and yBase is calculated
normalizeIllumina <- function(rawData,indTrain=rep(TRUE,length(rawData$x)),normOpts=setNormOptions(),plot=FALSE,xylim=NULL,verbose=FALSE){
  if (is.list(indTrain)){ #2-element list with indexes to infinium-I probes
    xTrainR <- rawData$x[indTrain$'10r']
    xTrainG <- rawData$x[indTrain$'20g']
    yTrainR <- rawData$y[indTrain$'10r']
    yTrainG <- rawData$y[indTrain$'20g']
    iInorm <- TRUE
  }else{
    xTrainR <- xTrainG <- rawData$x[indTrain]
    yTrainR <- yTrainG <- rawData$y[indTrain]
    iInorm <- FALSE
  }
    
  #Fit line through AA
  rngx <- range(xTrainR,na.rm=TRUE)
  step <- diff(rngx)/(normOpts$nBins-1)
  breaks <- seq(rngx[1],rngx[2],step)
  midsx <- breaks[-normOpts$nBins] + step/2
  xBase <- rep(NA,length=normOpts$nBins-1)
  for (i in 1:(normOpts$nBins-1)){
    indi <- xTrainR>breaks[i] & xTrainR<breaks[i+1]
    if (sum(indi,na.rm=T)>normOpts$minSize){
      if (iInorm){
        xBase[i] <- quantile(yTrainR[indi],prob=normOpts$prob,na.rm=TRUE)  #Best for inf-I
      }else{
        xBase[i] <- quantile(sort(yTrainR[indi])[1:normOpts$minSize],prob=normOpts$prob,na.rm=TRUE)
      }
    }
  }
  if (sum(!is.na(xBase))>2){
    abx <- coef(lm(xBase~midsx))
  }else{
    abx <- rep(NA,2)
    names(abx) <- c('(Intercept)','midsx')
  }
  
  #Fit line through BB 
  rngy <- range(yTrainG,na.rm=TRUE)
  step <- diff(rngy)/(normOpts$nBins-1)
  breaks <- seq(rngy[1],rngy[2],step)
  midsy <- breaks[-normOpts$nBins] + step/2
  yBase <- rep(NA,length=normOpts$nBins-1)
  for (i in 1:(normOpts$nBins-1)){
    indi <- yTrainG>breaks[i] & yTrainG<breaks[i+1]
    if (sum(indi,na.rm=T)>normOpts$minSize){
      if (iInorm){
        yBase[i] <- quantile(xTrainG[indi],prob=normOpts$prob,na.rm=TRUE)
      }else{
        yBase[i] <- quantile(sort(xTrainG[indi])[1:normOpts$minSize],prob=normOpts$prob,na.rm=TRUE)
      }
    }
  }
  if (sum(!is.na(yBase))>2){
    aby <- coef(lm(yBase~midsy))
    aby <- c(-aby[1]/aby[2],1/aby[2])
  }else{
    aby <- rep(NA,2)
    names(aby) <- c('(Intercept)','midsy')
  }
  
  #Rotate and shear 
  normData <- rawData
  normData$x0 <- (aby[1]-abx[1])/(abx[2]-aby[2])
  normData$y0 <- abx[1] + abx[2]*normData$x0
  normData$x <- normData$x - normData$x0
  normData$y <- normData$y - normData$y0
  xx <- cbind(c(1,1),c(0,1))
  xAngle <- atan2(diff(xx%*%abx),1)
  yAngle <- atan2(diff(xx%*%aby),1)
  tmpX2 <- cos(xAngle)*normData$x + sin(xAngle)*normData$y
  normData$y <- -sin(xAngle)*normData$x + cos(xAngle)*normData$y
  normData$x <- tmpX2-normData$y/tan(yAngle-xAngle)
  
  #Plot input data with fitted lines
  if (plot){
    if (is.null(xylim)){
      plot(rawData$x,rawData$y,pch='.',main=paste('nBins=',normOpts$nBins,', prob quantile=',normOpts$prob),xlab='R',ylab='G')
      if (iInorm){
        points(xTrainR,yTrainR,pch='.',col='red')
        points(xTrainG,yTrainG,pch='.',col='green')
      }else{
        points(rawData$x[-which(indTrain)],rawData$y[-which(indTrain)],pch='.',col='red')
      }
      xylim <- par('usr')
    }else{
      plot.new()
      plot.window(xylim[1:2],xylim[3:4])
      axis(1); axis(2)
      points(rawData$x,rawData$y,pch='.',main=paste('nBins=',normOpts$nBins,', prob quantile=',normOpts$prob),xlab='R',ylab='G')
      if (iInorm){
        points(xTrainR,yTrainR,pch='.',col='red')
        points(xTrainG,yTrainG,pch='.',col='green')
      }else{
        points(rawData$x[-which(indTrain)],rawData$y[-which(indTrain)],pch='.',col='red')
      }
   }
    xy <- range(xylim)
    points(midsx,xBase,col='blue',pch=16)
    lines(xy,abx[1]+xylim[1:2]*abx[2],col='red')
    points(yBase,midsy,col='blue',pch=16)
    lines(xy,aby[1]+xy*aby[2],col='green')
    #points(x0,y0,col='blue',pch=16)
    lines(xy,rep(normData$y0,2),col='blue')
    lines(rep(normData$x0,2),xy,col='blue')
  }
  if (verbose){
    message(sprintf('\tOffset\tSlope'))
    message(sprintf('R:\t%.4g\t%.4g',abx[1],abx[2]))
    message(sprintf('G:\t%.4g\t%.4g',aby[1],aby[2]))
    #message(sprintf('%.4g\t',aby))
  }
  normData
}



#Returns a matrix of parametrized noise for each array
getNoiseDistributions <- function(BSData,subBeadPool=NULL,normInd,normOpts=setNormOptions(),plot=FALSE,newFigure=plot,maxPlots=72,xlim=NULL,...){
  nArrays <- ncol(BSData)
  if (newFigure){
    if (nArrays>maxPlots)
      stop("Number of arrays for plotting exceeds 'maxPlots'")
    pp <- par()
    nC <- ceiling(sqrt(nArrays))
    nR <- ceiling(nArrays/nC)
    par(mfrow=c(nR,nC),mai=c(1,1,1,.5)/sqrt(nC))
  }else if (plot & nArrays>1){
    stop("Only one array allowed when 'plot'==TRUE and 'newFigure'==FALSE")
  }
  an <- sampleNames(BSData)
  
  sPools <- colnames(normInd)
  if (is.null(subBeadPool))
    subBeadPool <- sPools[nchar(sPools)==1]
  #b10x <- normInd[,'101']
  #b20x <- normInd[,'201']
  #if (length(subBeadPool)>1){
    #for (i in 2:length(subBeadPool)){
      #b10x <- b10x | normInd[,as.character(100+i)]
      #b20x <- b20x | normInd[,as.character(200+i)]
    #}
  #}
  b10x <- normInd[,as.character(100+as.numeric(subBeadPool[1]))]
  b20x <- normInd[,as.character(200+as.numeric(subBeadPool[1]))]
  for (i in subBeadPool[-1]){
    b10x <- b10x | normInd[,as.character(100+as.numeric(i))]
    b20x <- b20x | normInd[,as.character(200+as.numeric(i))]
  }
  
  noiseDist <- matrix(nrow=nArrays,ncol=4,dimnames=list(an,c('medianR','medianG','madR','madG')))
  trChannel <- transformChannels(assayData(BSData)$R[b20x,],assayData(BSData)$G[b10x,],normOpts=normOpts)
  noiseDist[,'medianR'] <- apply(trChannel$X,2,median,na.rm=TRUE)
  noiseDist[,'medianG'] <- apply(trChannel$Y,2,median,na.rm=TRUE)
  noiseDist[,'madR'] <- apply(trChannel$X,2,mad,na.rm=TRUE)
  noiseDist[,'madG'] <- apply(trChannel$Y,2,mad,na.rm=TRUE)
  if (plot){
    if (is.null(xlim)){
      meanD <- apply(noiseDist,2,mean,na.rm=TRUE)
      xlim <- c(floor(min(meanD[1:2]-4*max(meanD[3:4]))),ceiling(max(meanD[1:2]+4*max(meanD[3:4]))))
    }
    for (i in 1:nArrays){
      hx <- hist(trChannel$X[,i],breaks=normOpts$breaks,plot=FALSE)
      hy <- hist(trChannel$Y[,i],breaks=normOpts$breaks,plot=FALSE)
      plot(hx$mids,hx$density,type='l',main=paste('Noise level,',an[[i]]),xlab='',ylab='',col='gray',xlim=xlim,...)
      lines(hx$mids,dnorm(hx$mids,noiseDist[i,1],noiseDist[i,3]),col='red')
      lines(hy$mids,hy$density,col='gray')
      lines(hy$mids,dnorm(hy$mids,noiseDist[i,2],noiseDist[i,4]),col='seagreen')
    }
  }
  if (newFigure){par(pp[c('mfcol','mai')])}
  noiseDist
}



#Transform BSData
transformChannels <- function(X,Y=NULL,normOpts=setNormOptions()){
  str1 <- '%d of %d %s-intensities below %d set to NA'
  switch(normOpts$transf,
         none={lstr=' '},
         log={
           X[X<=-normOpts$offset] <- NA
           message(sprintf(str1,sum(is.na(X)),length(X),'X',-normOpts$offset))
           X <- log2(X+normOpts$offset)
           lstr <- 'Log2'
           if (normOpts$offset!=0)
             lstr <- paste(lstr,normOpts$offset,'+')
           if (!is.null(Y)){
             Y[Y<=-normOpts$offset] <- NA
             message(sprintf(str1,sum(is.na(Y)),length(Y),'Y',-normOpts$offset))
             Y <- log2(Y+normOpts$offset)
           }},
         root={
           X[X<=-normOpts$offset] <- NA
           message(sprintf(str1,sum(is.na(X)),length(X),'X',-normOpts$offset))
           X <- (X+normOpts$offset)^(1/normOpts$nthRoot)
           lstr <- paste('nth-root (n=',normOpts$nthRoot,')',sep='')
           if (normOpts$offset!=0)
             lstr <- paste(lstr,', ',normOpts$offset,' +',sep='')
           if (!is.null(Y)){
             Y[Y<=-normOpts$offset] <- NA
             message(sprintf(str1,sum(is.na(Y)),length(Y),'Y',-normOpts$offset))
             Y <- (Y+normOpts$offset)^(1/normOpts$nthRoot)
           }}
         )
  trChannel <- list()
  trChannel$X <- as.matrix(X)
  trChannel$Y <- as.matrix(Y)
  trChannel$lstr <- lstr
  trChannel
}



#Scatter-plot with origin and noise indicated
#NB! 'noiseDist' must be transformed according to 'normOpts'. normInd = single index-vector
plotEstimatedNoise <- function(BSData,noiseDist,normInd=rep(TRUE,nrow(BSData)),normOpts=setNormOptions(),newFigure=TRUE,maxPlots=72,...){
  if (!identical(sampleNames(BSData),rownames(noiseDist)))
    stop("'BSData' and 'noiseDist' contain different samples")
  if (newFigure){
    if (ncol(BSData)>maxPlots)
      stop("Number of arrays for plotting exceeds 'maxPlots'")
    pp <- par()
    nC <- ceiling(sqrt(ncol(BSData)))
    nR <- ceiling(ncol(BSData)/nC)
    par(mfrow=c(nR,nC),mai=c(1,1,1,.5)/sqrt(nC))
  }else if (ncol(BSData)>1){
    stop("Only one array allowed when 'newFigure'==FALSE")
  }
  an <- sampleNames(BSData)
  for (i in 1:ncol(BSData)){
    trData <- transformChannels(assayData(BSData)$R[normInd,an[i]],assayData(BSData)$G[normInd,an[i]],normOpts=normOpts)
    plot(trData$X,trData$Y,pch='.',main=paste(an[i],', nSD =',normOpts$nSD),...)
    xy <- range(par('usr'))
    lines(rep(noiseDist[an[i],'medianR'],2),xy,col='green')
    lines(xy,rep(noiseDist[an[i],'medianG'],2),col='green')
    points(c(noiseDist[an[i],'medianR']-2*noiseDist[an[i],'madR'],
             noiseDist[an[i],'medianR']+2*noiseDist[an[i],'madR']),
           rep(noiseDist[an[i],'medianG'],2),type='b',pch=16,col='red')
    points(rep(noiseDist[an[i],'medianR'],2),
           c(noiseDist[an[i],'medianG']-2*noiseDist[an[i],'madG'],
             noiseDist[an[i],'medianG']+2*noiseDist[an[i],'madG']),type='b',pch=16,col='red')
  }
  if (newFigure) par(pp[c('mfcol','mai')])
}



#Transform SE's using a first order Taylor-expansion around the mean
transformSEs <- function(X,se.X,normOpts=setNormOptions()){
  str1 <- '%d of %d intensities below %d set to NA'
  switch(normOpts$transf,
         none={lstr=' '},
         log={
           se.X[X<=-normOpts$offset] <- NA
           message(sprintf(str1,sum(is.na(se.X)),length(se.X),-normOpts$offset))
           se.X <- se.X/(X+normOpts$offset)/log(2)
           lstr <- 'Log2'
           if (normOpts$offset!=0){
             warning('transformSEs() not properly tested for transf==\'log\' and offset!=0')
             lstr <- paste(lstr,normOpts$offset,'+')
           }
         },
         root={
           se.X[X<=-normOpts$offset] <- NA
           message(sprintf(str1,sum(is.na(se.X)),length(se.X),-normOpts$offset))
           se.X <- se.X*(X+normOpts$offset)^((1-normOpts$nthRoot)/normOpts$nthRoot)/normOpts$nthRoot
           lstr <- paste('nth-root (n=',normOpts$nthRoot,')',sep='')
           if (normOpts$offset!=0)
             lstr <- paste(lstr,', ',normOpts$offset,' +',sep='')
         })
  trSE <- list()
  trSE$SE <- se.X
  trSE$lstr <- lstr
  trSE
}



#Centres and scales based on percentile, assumes already rotated and sheared
normalizeShearedChannels <- function(trChannel,noiseDist,normOpts=setNormOptions()){#,b101=NULL,b201=NULL){
  nProbes <- nrow(trChannel$X)
  trChannel$X0 <- noiseDist[,'medianR']
  trChannel$Y0 <- noiseDist[,'medianG']
  trChannel$X.SE <- noiseDist[,'madR']
  trChannel$Y.SE <- noiseDist[,'madG']
  trChannel$X <- trChannel$X - matrix(1,nProbes,1)%*%trChannel$X0
  trChannel$Y <- trChannel$Y - matrix(1,nProbes,1)%*%trChannel$Y0
  
  switch(normOpts$method,
         none={trChannel$method <- 'The channels are not normalized'},
         quantNorm={   #Quantile normalization
           #require('limma')
           #iX <-  apply(trChannel$X,2,function(x) all(!is.na(x)))
           #iY <-  apply(trChannel$Y,2,function(x) all(!is.na(x)))
           iX <-  apply(trChannel$X,2,function(x) sum(!is.na(x))>1)
           iY <-  apply(trChannel$Y,2,function(x) sum(!is.na(x))>1)
           for (i in which(iX & iY)){
             normXY <- normalizeQuantiles(cbind(trChannel$X[,i],trChannel$Y[,i]))
             trChannel$X[,i] <- normXY[,1]
             trChannel$Y[,i] <- normXY[,2]
           }
           #trChannel$X.SE <- trChannel$Y.SE <- sqrt(trChannel$X.SE^2+trChannel$Y.SE^2)/2   #NB!
           trChannel$method <- 'The data have been quantile normalized (SE\'s not modified)'
         },
         medianAF={  #Scales red channel such that median(AF) is close to 0.5
           message('Median(AF)-estimates for a few arrays:')
           nDisp <- min(4,ncol(trChannel$X))
           message(sprintf('%s\t',colnames(trChannel$X)[1:nDisp]))
           for (rep in 1:8){
             PAF <- trChannel$X/(trChannel$X+trChannel$Y)
             medianAF <- apply(PAF,2,median,na.rm=TRUE)
             message(sprintf('%.6f\t',medianAF[1:nDisp]))
             trChannel$X <- trChannel$X/matrix(1,nProbes,1)%*%medianAF/2
             trChannel$X.SE <- trChannel$X.SE/medianAF/2
             #gc()
           }
           trChannel$method <- 'The red channel scaled such that median(AF) is close to 0.5'
         },
         linPeak={  #Scales each channel by its scale-th quantile
           trChannel$XScale <- apply(trChannel$X,2,quantile,normOpts$scale,na.rm=TRUE)
           trChannel$YScale <- apply(trChannel$Y,2,quantile,normOpts$scale,na.rm=TRUE)
           trChannel$X <- trChannel$X/matrix(1,nProbes,1)%*%trChannel$XScale
           trChannel$Y <- trChannel$Y/matrix(1,nProbes,1)%*%trChannel$YScale
           trChannel$X.SE <- trChannel$X.SE/trChannel$XScale
           trChannel$Y.SE <- trChannel$Y.SE/trChannel$YScale
           trChannel$method <- paste('Both channels scaled using the ',normOpts$scale,'-th quantile',sep='')
         },
         #linHomo={  #Scales each channel using quantile of homozygotes
         #  stop('linHomo not implemented for matrices')
         #  trChannel$XScale <- quantile(trChannel$X[b101],normOpts$scale,na.rm=TRUE)
         #  trChannel$YScale <- quantile(trChannel$Y[b201],normOpts$scale,na.rm=TRUE)
         #  trChannel$X <- trChannel$X/trChannel$XScale
         #  trChannel$Y <- trChannel$Y/trChannel$YScale
         #  trChannel$X.SE <- trChannel$X.SE/trChannel$XScale
         #  trChannel$Y.SE <- trChannel$Y.SE/trChannel$YScale
         #  trChannel$method <- paste('Each channel scaled using the ',normOpts$scale,'-th quantile of its infinium I homozygotes',sep='')
         #}
  )
  trChannel
}



#Merge BSData (R/G) into BSRed (A/B)
#beadInfo must contain 'Name', 'SNP', 'ILMN.Strand', 'Address', 'Address2', 'Norm.ID'
createAlleleSet <- function(BSData,beadInfo,normOpts,includeAB=FALSE){
  nSNP <- nrow(beadInfo)
  sPools <- unique(beadInfo$Norm.ID)
  subBeadPool <- sPools[nchar(sPools)==1]
  A <- B <- Var.pooled <- Min.Beads <- 
    matrix(nrow=nSNP,ncol=ncol(BSData),dimnames=list(rownames(beadInfo),sampleNames(BSData)))
  
  #Addresses are sorted ascendingly
  allProbes <- featureData(BSData)[[1]]
  indA1 <- which(is.element(allProbes,beadInfo$Address))
  indA2 <- which(is.element(allProbes,beadInfo$Address2))
  #Addresses ordered to follow beadInfo
  sortAdd1 <- sort(beadInfo$Address,index.return=TRUE)
  unSortAdd1 <- sort(sortAdd1$ix,index.return=TRUE)  #same as 'order()'
  Add2 <- beadInfo$Address2
  Add2[Add2==0] <- NA
  sortAdd2 <- sort(Add2,index.return=TRUE)
  unSortAdd2 <- sort(sortAdd2$ix[1:length(indA2)],index.return=TRUE)
  #Indexes used with 'Address'
  norm1 <- norm101 <- norm201 <- rep(FALSE,nSNP)
  for (i in subBeadPool){
    norm1 <- norm1 | beadInfo$Norm.ID==as.character(i)
    norm101 <- norm101 | beadInfo$Norm.ID==as.character(100+i)
    norm201 <- norm201 | beadInfo$Norm.ID==as.character(200+i)
  }
  #Indexes used with 'Address2'
  norm101_2 <- norm201_2 <- rep(FALSE,length(indA2))
  for (i in subBeadPool){
    norm101_2 <- norm101_2 | beadInfo[norm201|norm101,'Norm.ID']==as.character(100+i)
    norm201_2 <- norm201_2 | beadInfo[norm201|norm101,'Norm.ID']==as.character(200+i)
  }

  A[norm1,] <- assayData(BSData)$R[indA1,][unSortAdd1$ix,][norm1,]
  B[norm1,] <- assayData(BSData)$G[indA1,][unSortAdd1$ix,][norm1,]
  A[norm101,] <- assayData(BSData)$R[indA1,][unSortAdd1$ix,][norm101,]
  A[norm201,] <- assayData(BSData)$G[indA1,][unSortAdd1$ix,][norm201,]
  B[norm101,] <- assayData(BSData)$R[indA2,][unSortAdd2$ix,][norm101_2,]
  B[norm201,] <- assayData(BSData)$G[indA2,][unSortAdd2$ix,][norm201_2,]
  Min.Beads[norm1,] <- assayData(BSData)$no.beads[indA1,][unSortAdd1$ix,][norm1,]
  Min.Beads[norm101,] <- 
    pmin(assayData(BSData)$no.beads[indA1,][unSortAdd1$ix,][norm101,],
         assayData(BSData)$no.beads[indA2,][unSortAdd2$ix,][norm101_2,])
  Min.Beads[norm201,] <- 
    pmin(assayData(BSData)$no.beads[indA1,][unSortAdd1$ix,][norm201,],
         assayData(BSData)$no.beads[indA2,][unSortAdd2$ix,][norm201_2,])
  pol <- cart2pol(A,B,dist=normOpts$dist,pNorm=normOpts$pNorm)
  Var.pooled[norm1,] <- 
    (assayData(BSData)$se.R[indA1,][unSortAdd1$ix,][norm1,]^2 +
     assayData(BSData)$se.G[indA1,][unSortAdd1$ix,][norm1,]^2)/2
  Var.pooled[norm101,] <- 
    (assayData(BSData)$se.R[indA1,][unSortAdd1$ix,][norm101,]^2 +
     assayData(BSData)$se.R[indA2,][unSortAdd2$ix,][norm101_2,]^2)/2
  Var.pooled[norm201,] <- 
    (assayData(BSData)$se.G[indA1,][unSortAdd1$ix,][norm201,]^2 +
     assayData(BSData)$se.G[indA2,][unSortAdd2$ix,][norm201_2,]^2)/2
  SE <- findSeTheta(sqrt(Var.pooled),pol$r,dist=normOpts$dist,pNorm=normOpts$pNorm)
  
  beadMetadata = data.frame(labelDescription=character(ncol(beadInfo)),
    row.names=colnames(beadInfo),stringsAsFactors=FALSE)
  beadMetadata[c('Name','SNP','ILMN.Strand','Address','Address2','Norm.ID'),1] <- 
    c('Marker ID', 'Polymorphism', 'TOP/BOT-category', 'Illumina strand ID',
      'Second strand ID (Infinium I)', 'Sub-bead pool ID')
  featureData <- new("AnnotatedDataFrame", data=beadInfo, varMetadata=beadMetadata,
                     dimLabels=c('featureNames','featureColumns'))
  BSRed <- new('AlleleSetIllumina', intensity=pol$r, theta=pol$th*2/pi,
               SE=SE, min.beads=Min.Beads, phenoData=phenoData(BSData),
               featureData=featureData)
  if (includeAB){
    assayData(BSRed)$A <- A
    assayData(BSRed)$B <- B
  } #end if
  validObject(BSRed)
  rm(pol,A,B,Var.pooled,SE,Min.Beads,featureData,beadInfo,BSData); gc()
  BSRed
}



#Transform cartesian coordinates to polar
cart2pol <- function(x,y,dist='euclidean',pNorm=NULL){
  pol <- list()
  switch(dist,
         euclidean={pol$r <- sqrt(x^2 + y^2)},
         manhattan={pol$r <- x+y},
         minkowski={pol$r <- (abs(x)^pNorm + abs(y)^pNorm)^(1/pNorm)}
         )
  pol$th <- atan2(y,x)
  pol
}



#Make sure 'dataFiles' points to the relevant data. NB! 'beadInfo' MUST contain
#total number of SNPs in the dataFiles, and 'markers' contains row-indexes to
#beadInfo. The
#vectors 'markers' and 'arrays' are indexes to rows and columns, i.e. they do NOT
#depend on the order in which the markers and arrays are given in the files.
createAlleleSetFromFiles <- function(dataFiles,markers,arrays,phenoInfo=NULL,beadInfo=NULL,sep='\t',quote=''){
  if (!all(c('intFile','thFile','seFile') %in% names(dataFiles)))
    stop("The filenames 'intFile', 'thFile', and 'seFile' must all be provided")
  if ('resFile' %in% names(dataFiles)){
    message("Loading featureData (ignoring 'beadInfo'-argument) ...")
    beadInfo <- read.table(dataFiles[['resFile']],header=TRUE,sep=sep,quote=quote,as.is=TRUE)
    fMetadata <- data.frame(labelDescription=character(ncol(beadInfo)), row.names=colnames(beadInfo), stringsAsFactors=FALSE)
    fMetadata[c('Name','SNP','ILMN.Strand','Address','Address2','Norm.ID','Classification',  #try()?
                'Cent.Deviation','Within.SD','HW.Chi2','HW.P','BAF.Locus1','BAF.Locus2',
                'Call.Rate'),1] <- 
                  c('Marker ID','Polymorphism','TOP/BOT-category','Illumina strand ID',
                    'Second strand ID (Infinium I)','Sub-bead pool ID',
                    'Genotype call from automatic clustering',
                    'Largest distance from cluster-centre to its ideal position',
                    'Largest within-cluster spread',
                    'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
                    'Probability of Hardy-Weinberg equilibrium',
                    'Estimated B-allele frequency',
                    'Estimated BAF of second paralogue (if exists)',
                    'Ratio of samples assigned to clusters')
  }else if(!is.null(beadInfo)){
    fMetadata <- data.frame(labelDescription=character(ncol(beadInfo)), row.names=colnames(beadInfo),stringsAsFactors=FALSE)
    fMetadata[c('Name','SNP','ILMN.Strand','Address','Address2','Norm.ID'),1] <- 
      c('Marker ID', 'Polymorphism', 'TOP/BOT-category', 'Illumina strand ID',
        'Second strand ID (Infinium I)', 'Sub-bead pool ID')
  }else{
    stop("No available featureData. Provide 'beadInfo'-table or 'resFile'-filename")
  }
  allMarkers <- unlist(read.table(dataFiles[['intFile']],nrows=1,as.is=TRUE,sep=sep))
  if (missing(markers)){
    markers <- 1:min(nrow(beadInfo),144)
  }else if (is.character(markers)){
    markers <- which(allMarkers %in% markers)
  }else{
    markers <- (1:length(allMarkers))[markers]
  }
  featureData <- new("AnnotatedDataFrame", data=beadInfo[markers,], varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
  
  if ('phFile' %in% names(dataFiles)){
    message("Loading phenoData (ignoring 'phenoInfo'-argument) ...")
    phenoInfo <- read.table(dataFiles[['phFile']],header=TRUE,sep=sep,quote=quote,as.is=TRUE)
    phenoInfo$noiseIntensity <- as.numeric(phenoInfo$noiseIntensity)
  }else if(is.null(phenoInfo)){
    stop("No available phenoData. Provide 'phenoInfo'-table or 'phFile'-filename")
  }
  pMetadata <- data.frame(labelDescription=character(ncol(phenoInfo)), row.names=colnames(phenoInfo), stringsAsFactors=FALSE)
  pMetadata[c('arrayNames','no.beads','chip','row','col','noiseIntensity'),1] <-
    c('Array ID','No. detected beads','Chip ID','Array ID within chip',
      'Strip ID within array','Estimated signal detection-limit')
  if (missing(arrays)){
    #arrays <- 1:min(nrow(phenoInfo),500)
    arrays <- 1:nrow(phenoInfo)
  }else if(is.character(arrays)){
    arrays <- which(rownames(phenoInfo) %in% arrays)
  }else{
    arrays <- (1:nrow(phenoInfo))[arrays]
  }
  phenoData <- new("AnnotatedDataFrame", data=phenoInfo[arrays,], varMetadata=pMetadata, dimLabels=c('sampleNames','sampleColumns'))

  colClasses <- rep('NULL',length(allMarkers)+1)
  colClasses[1] <- 'character'
  colClasses[markers+1] <- 'numeric'
  orderM <- order(sort(markers,index.return=TRUE)$ix)
  message('Loading intensities...')
  intensity <- t(read.table(dataFiles[['intFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
  intensity <- intensity[orderM,arrays]
  message('Loading thetas...')
  theta <- t(read.table(dataFiles[['thFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
  theta <- theta[orderM,arrays]
  message('Loading standard errors...')
  SE <- t(read.table(dataFiles[['seFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
  SE <- SE[orderM,arrays]
  BSRed <- new('AlleleSetIllumina', intensity=intensity, theta=theta, SE=SE, phenoData=phenoData, featureData=featureData)
  rm('intensity','theta','SE','phenoData','featureData'); gc()

  if ('callFile' %in% names(dataFiles)){
    callHeader <- c('',unlist(read.table(dataFiles[['callFile']],nrows=1,as.is=TRUE,sep=sep)))
    callCols <- rep('NULL',length(callHeader))
    callCols[1] <- 'character'
    callCols[callHeader %in% sampleNames(BSRed)] <- 'numeric'
    indFound <- sampleNames(BSRed) %in% callHeader
    orderA <- order(sort(arrays[indFound],index.return=TRUE)$ix)
    call <- matrix(nrow=length(markers),ncol=length(arrays),dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
    message('Loading calls...')
    skip <- min(markers)
    callFrame <- read.table(dataFiles[['callFile']],header=FALSE,sep=sep,quote=quote,nrows=max(markers)+1-skip,colClasses=callCols,skip=skip,row.names=1,col.names=callHeader)
    call[,indFound] <- as.matrix(callFrame[markers+1-skip,orderA])
    assayData(BSRed)$call <- call
    validObject(BSRed)
  }
  if ('genoFile' %in% names(dataFiles)){
    genoHeader <- c('',unlist(read.table(dataFiles[['genoFile']],nrows=1,as.is=TRUE,sep=sep)))
    genoCols <- rep('NULL',length(genoHeader))
    genoCols[1] <- 'character'
    genoCols[genoHeader %in% sampleNames(BSRed)] <- 'character'
    indFound <- sampleNames(BSRed) %in% genoHeader
    orderA <- order(sort(arrays[indFound],index.return=TRUE)$ix)
    genotype <- matrix('',nrow=length(markers),ncol=length(arrays),dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
    message('Loading genotypes...')
    skip <- min(markers)
    genoFrame <- read.table(dataFiles[['genoFile']],header=FALSE,sep=sep,quote=quote,nrows=max(markers)+1-skip,colClasses=genoCols,skip=skip,row.names=1,col.names=genoHeader)
    genotype[,indFound] <- gsub('\"','',as.matrix(genoFrame[markers+1-skip,orderA]))
    assayData(BSRed)$genotype <- genotype
    validObject(BSRed)
  }
  BSRed
}



createMultiSetFromFiles <- function(dataFiles,markers,arrays,phenoInfo=NULL,beadInfo=NULL,sep='\t',quote=''){
  if ('resFile' %in% names(dataFiles)){
    message("Loading featureData (ignoring 'beadInfo'-argument) ...")
    beadInfo <- read.table(dataFiles[['resFile']],header=TRUE,sep=sep,quote=quote,as.is=TRUE)
    fMetadata <- data.frame(labelDescription=character(ncol(beadInfo)), row.names=colnames(beadInfo), stringsAsFactors=FALSE)
    fMetadata[c('Name','SNP','ILMN.Strand','Address','Address2','Norm.ID','Classification',  #try()?
                'Cent.Deviation','Within.SD','HW.Chi2','HW.P','BAF.Locus1','BAF.Locus2',
                'Call.Rate'),1] <- 
                  c('Marker ID','Polymorphism','TOP/BOT-category','Illumina strand ID',
                    'Second strand ID (Infinium I)','Sub-bead pool ID',
                    'Genotype call from automatic clustering',
                    'Largest distance from cluster-centre to its ideal position',
                    'Largest within-cluster spread',
                    'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
                    'Probability of Hardy-Weinberg equilibrium',
                    'Estimated B-allele frequency',
                    'Estimated BAF of second paralogue (if exists)',
                    'Ratio of samples assigned to clusters')
  }else if(!is.null(beadInfo)){
    fMetadata <- data.frame(labelDescription=character(ncol(beadInfo)), row.names=colnames(beadInfo),stringsAsFactors=FALSE)
    fMetadata[c('Name','SNP','ILMN.Strand','Address','Address2','Norm.ID'),1] <- 
      c('Marker ID', 'Polymorphism', 'TOP/BOT-category', 'Illumina strand ID',
        'Second strand ID (Infinium I)', 'Sub-bead pool ID')
  }else{
    stop("No available featureData. Provide 'beadInfo'-table or 'resFile'-filename")
  }
  allMarkers <- rownames(beadInfo)
  if (missing(markers)){
    markers <- 1:min(nrow(beadInfo),128)
  }else if (is.character(markers)){
    markers <- which(allMarkers %in% markers)
  }else{
    markers <- (1:length(allMarkers))[markers]
  }
  featureData <- new("AnnotatedDataFrame", data=beadInfo[markers,], varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
  
  if ('phFile' %in% names(dataFiles)){
    message("Loading phenoData (ignoring 'phenoInfo'-argument) ...")
    phenoInfo <- read.table(dataFiles[['phFile']],header=TRUE,sep=sep,quote=quote,as.is=TRUE)
  }else if(is.null(phenoInfo)){
    stop("No available phenoData. Provide 'phenoInfo'-table or 'phFile'-filename")
  }
  pMetadata <- data.frame(labelDescription=character(ncol(phenoInfo)), row.names=colnames(phenoInfo), stringsAsFactors=FALSE)
  pMetadata[c('arrayNames','no.beads','chip','row','col','noiseIntensity'),1] <-
    c('Array ID','No. detected beads','Chip ID','Array ID within chip',
      'Strip ID within array','Estimated signal detection-limit')
  if (missing(arrays)){
    #arrays <- 1:min(nrow(phenoInfo),500)
    arrays <- 1:nrow(phenoInfo)
  }else if(is.character(arrays)){
    arrays <- which(rownames(phenoInfo) %in% arrays)
  }else{
    arrays <- (1:nrow(phenoInfo))[arrays]
  }
  phenoData <- new("AnnotatedDataFrame", data=phenoInfo[arrays,], varMetadata=pMetadata, dimLabels=c('sampleNames','sampleColumns'))

  colClasses <- rep('NULL',length(allMarkers)+1)
  colClasses[1] <- 'character'
  colClasses[markers+1] <- 'numeric'
  orderM <- order(sort(markers,index.return=TRUE)$ix)
  if ('intFile' %in% names(dataFiles)){
    message('Loading intensities...')
    intensity <- t(read.table(dataFiles[['intFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
    intensity <- intensity[orderM,arrays]
  }else{
    intensity <- NULL
  }
  if ('thFile' %in% names(dataFiles)){
    message('Loading thetas...')
    theta <- t(read.table(dataFiles[['thFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
    theta <- theta[orderM,arrays]
  }else{
    theta <- NULL
  }
  if ('seFile' %in% names(dataFiles)){
    message('Loading standard errors...')
    SE <- t(read.table(dataFiles[['seFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
    SE <- SE[orderM,arrays]
  }else{
    SE <- NULL
  }
  BSRed <- new('MultiSet', intensity=intensity, theta=theta, SE=SE, phenoData=phenoData, featureData=featureData, storage.mode='list')
  sampleNames <- rownames(phenoData@data)
  featureNames <- rownames(featureData@data)
  rm('intensity','theta','SE','phenoData','featureData'); gc()
  
  if ('callFile' %in% names(dataFiles)){
    callHeader <- c('',unlist(read.table(dataFiles[['callFile']],nrows=1,as.is=TRUE,sep=sep)))
    callCols <- rep('NULL',length(callHeader))
    callCols[1] <- 'character'
    callCols[callHeader %in% sampleNames] <- 'numeric'
    indFound <- sampleNames %in% callHeader
    orderA <- order(sort(arrays[indFound],index.return=TRUE)$ix)
    call <- matrix(nrow=length(markers),ncol=length(arrays),dimnames=list(featureNames,sampleNames))
    message('Loading calls...')
    skip <- min(markers)
    callFrame <- read.table(dataFiles[['callFile']],header=FALSE,sep=sep,quote=quote,nrows=max(markers)+1-skip,colClasses=callCols,skip=skip,row.names=1,col.names=callHeader)
    call[,indFound] <- as.matrix(callFrame[markers+1-skip,orderA])
    assayData(BSRed)$call <- call
    validObject(BSRed)
  }
  if ('genoFile' %in% names(dataFiles)){
    genoHeader <- c('',unlist(read.table(dataFiles[['genoFile']],nrows=1,as.is=TRUE,sep=sep)))
    genoCols <- rep('NULL',length(genoHeader))
    genoCols[1] <- 'character'
    genoCols[genoHeader %in% sampleNames] <- 'character'
    indFound <- sampleNames %in% genoHeader
    orderA <- order(sort(arrays[indFound],index.return=TRUE)$ix)
    genotype <- matrix('',nrow=length(markers),ncol=length(arrays),dimnames=list(featureNames,sampleNames))
    message('Loading genotypes...')
    skip <- min(markers)
    genoFrame <- read.table(dataFiles[['genoFile']],header=FALSE,sep=sep,quote=quote,nrows=max(markers)+1-skip,colClasses=genoCols,skip=skip,row.names=1,col.names=genoHeader)
    genotype[,indFound] <- gsub('\"','',as.matrix(genoFrame[markers+1-skip,orderA]))
    assayData(BSRed)$genotype <- genotype
    validObject(BSRed)
  }
  BSRed
}



callGenotypes <- function(BSRed,gO=setGenoOptions(largeSample=ncol(BSRed)>250)){
  #Quality control, SNP- and array-filtering
  if ('call' %in% names(assayData(BSRed))){
    OK = FALSE
    while (!OK){
      ovrwrt <- readline(prompt='Warning! Call-matrix already exists. Overwrite? (yes/no) ')
      OK <- ovrwrt %in% c('yes','no')
    }
    if (ovrwrt %in% 'yes'){
      message("Matrix 'assayData(BSRed)$call' will be replaced!")
      pData(BSRed)$passRatio <- NULL
      fData(BSRed)$Ped.Errors <- NULL
      assayData(BSRed)[grep('ped.check',names(assayData(BSRed)))] <- NULL
    }else{
      stop("Exits, previous calls are retained")
    }
  }
  descNames <- c('Classification','Cent.Deviation','Within.SD','HW.Chi2','HW.P','BAF.Locus1',
                 'BAF.Locus2','Call.Rate')
  if (any(descNames %in% colnames(fData(BSRed)))){
    OK = FALSE
    while (!OK){
      ovrwrt <- readline(prompt='Warning! Overlapping column-names in featureData. Overwrite? (yes/no) ')
      OK <- ovrwrt %in% c('yes','no')
    }
    if (ovrwrt %in% 'yes'){
      message("Overlapping descriptors will be replaced!")
      featureData(BSRed) <- featureData(BSRed)[,-which(colnames(fData(BSRed)) %in% descNames)]
    }else{
      stop("Exits, previous descriptors are retained")
    }
  }
  iSnps <- apply(assayData(BSRed)$theta,1,function(x,cc) sum(!is.na(x))>cc,cc=gO$arrayPerSnpLim*ncol(BSRed))
  message(sum(!iSnps),' SNP(s) with more than ',100-gO$arrayPerSnpLim*100,'% missings disregarded')
  iArrays <- apply(assayData(BSRed)$theta,2,function(x,cc) sum(!is.na(x))>cc,cc=gO$snpPerArrayLim*nrow(BSRed))
  message(sum(!iArrays),' array(s) with more than ',100-gO$snpPerArrayLim*100,'% missings disregarded')
  mNoise <- mean(pData(BSRed)$noiseIntensity[iArrays],na.rm=TRUE)
  if (sum(iSnps)==1){
    R <- t(as.matrix(assayData(BSRed)$intensity[iSnps,iArrays]))
    Theta <- t(as.matrix(assayData(BSRed)$theta[iSnps,iArrays]))
    SE <- t(as.matrix(assayData(BSRed)$SE[iSnps,iArrays]))
    dimnames(R)[1] <- dimnames(Theta)[1] <- dimnames(SE)[1] <- names(iSnps)
  }else{
    R <- assayData(BSRed)$intensity[iSnps,iArrays]
    Theta <- assayData(BSRed)$theta[iSnps,iArrays]
    SE <- assayData(BSRed)$SE[iSnps,iArrays]
  }
  iFilter <- diff(apply(Theta,1,range,na.rm=TRUE))>gO$filterLim
  nKeep <- sum(iFilter)
  message(sum(!iFilter),' monomorphs with range(theta)<',gO$filterLim,' are detected')
  
  polyCent <- generatePolyCenters(ploidy=gO$ploidy)
  assayData(BSRed)$call <- matrix(nrow=nrow(BSRed),ncol=ncol(BSRed),dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
  classVec <- character(nrow(BSRed))
  classVec[!iSnps] <- 'NA'
  classVec[iSnps][!iFilter] <- 'MONO-filt'
  fData <- data.frame(Classification=classVec,Cent.Deviation=rep(NA,nrow(BSRed)),Within.SD=rep(NA,nrow(BSRed)),HW.Chi2=rep(NA,nrow(BSRed)),HW.P=rep(NA,nrow(BSRed)),BAF.Locus1=rep(NA,nrow(BSRed)),BAF.Locus2=rep(NA,nrow(BSRed)),Call.Rate=rep(NA,nrow(BSRed)),stringsAsFactors=FALSE,row.names=featureNames(BSRed))
  fDesc <-  c('Genotype call from automatic clustering',
              'Largest distance from cluster-centre to its ideal position',
              'Largest within-cluster spread',
              'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
              'Probability of Hardy-Weinberg equilibrium',
              'Estimated B-allele frequency',
              'Estimated BAF of second paralogue (if exists)',
              'Ratio of samples assigned to clusters')
  fMetadata <- data.frame(labelDescription=fDesc, row.names=colnames(fData), stringsAsFactors=FALSE)
  count <- 0
  for (i in which(iFilter)){ 
    count <- count + 1
    if (count %in% seq(min(100,nKeep),nKeep,100))
      message('Called ',count,' of ',nKeep,' genotypes')
    iMiss <- is.na(R[i,])|is.na(SE[i,])|is.na(Theta[i,])
    iDet <- R[i,]>mNoise & !iMiss & Theta[i,]>-.5 & Theta[i,]<1.5    #Filter bad samples
    #if (sum(iDet)<=sum(iArrays)*gO$detectLim){
    if (sum(iDet)<=sum(!iMiss)*gO$detectLim){
      fData$Classification[iSnps][[i]] <- 'FAIL'
    }else{
      sclR <- median(R[i,iDet],na.rm=TRUE)*2*gO$rPenalty                   #Factor by which R is divided before clustering
      X <- matrix(cbind(Theta[i,],R[i,]/sclR,SE[i,])[iDet,],ncol=3)
      if (is.null(gO$probsIndSE)){
        indSE <- rep(TRUE,sum(iDet))
      }else{
        indSE <- X[,3]<quantile(X[,3],probs=gO$probsIndSE,na.rm=TRUE)      #Uncertain points removed from clustering
      }
      #qRange <- getQRange(X[indSE,1],minScl=3,probs=gO$probsQRange)     #Initial cluster-centers
      #sConf <- distLikelyConfiguration(X[indSE,1:2],polyCent,qRange=qRange)  #Ordered list of likely classifications
      #maxConf <- sum(sConf$x<gO$mseLim,na.rm=TRUE)
      sConf <- getCenters(X[indSE,1],gO=gO,polyCent=polyCent)
      maxConf <- length(sConf$ix)
      OK <- FALSE
      iConf <- 1
      while (!OK){
        testCallRate <- FALSE
        goodSNP <- FALSE
        nCl <- polyCent$size[sConf$ix[iConf]]
        cntIdeal <- polyCent$centers[[sConf$ix[iConf]]]
        centers <- sConf$centers[[iConf]]
        clObj <- findPolyploidClusters(X,indSE=indSE,centers=centers,wss.update=FALSE)
        tmpRes <- rep(NA,7)
        if (any(clObj$size>0)){
          iCl <- which(clObj$size>0)
          tmpRes[1] <- max(abs(cntIdeal[iCl]-clObj$centers[iCl,1]))   #Max cluster-centre deviation (Theta)
          #tmpRes[1] <- mean(abs(cntIdeal[iCl]-clObj$centers[iCl,1]))   #Max cluster-centre deviation (Theta)
          if (tmpRes[1]<gO$devCentLim & sum(clObj$size<1)<2){
            wSpread <- tapply(X[,1],clObj$cluster,sd)   #NB! MAD?
            wSpread[clObj$size[iCl]<2] <- 0   #use gO$minClLim instead?
            tmpRes[2] <- max(wSpread)  #Max within-cluster spread (Theta)
            if (tmpRes[2]<gO$wSpreadLim){
              if (nCl==1){
                tmpRes[5:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=sConf$ix[iConf],polyCent=polyCent,afList=gO$afList)[4:5])
                testCallRate <- TRUE
              }else{
                tmpRes[3:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=sConf$ix[iConf],polyCent=polyCent,afList=gO$afList)[c(1,3:5)])
                minB <- (clObj$centers[iCl,1] - gO$nSdOverlap*wSpread)[-1]
                maxB <- (clObj$centers[iCl,1] + gO$nSdOverlap*wSpread)[-length(iCl)]
                ovrlp <- any(round(minB*100)<round(maxB*100))   #Overlapping clusters?
                                        #goodSNP <- tmpRes[1]+2*tmpRes[2]<gO$goodSnpLim  & sConf$ix[iConf]==which(polyCent$classification=='SNP') & sum(clObj$size>=min(gO$minClLim,ncol(BSRed)/100))==3
                                        #if (tmpRes[4]>gO$hwAlpha | goodSNP){
                if (tmpRes[4]>gO$hwAlpha & !ovrlp){
                  testCallRate <- TRUE
                }else{
                  iConf <- iConf + 1
                }
              }
              if (testCallRate){
                nClRed <- length(iCl)
                clCtrs <- matrix(clObj$centers[iCl,],nrow=nClRed)
                indOut <- matrix(FALSE,nrow=nrow(X),ncol=nCl)
                indKeep <- rep(FALSE,nrow(X))
                indOverlap <- rep(FALSE,nrow(X))
                Sratio <- rep(1,nCl)
                for (j in iCl){
                  indj <- clObj$cluster==j
                  nj <- sum(indj)
                  if (nj>=gO$minClLim){    #Hotellings T^2-ellipse superimposed on clusters
                    ctrX <- apply(X[indj,1:2],2,median,na.rm=TRUE)
                    Xj <- X[indj,1:2]-matrix(ctrX,nrow=nj,ncol=2,byrow=TRUE)
                    Sj <- t(Xj)%*%Xj/(nj-1)
                    Sratio[j] <- Sj[1,1]/(Sj[2,2]*gO$rPenalty)  #Cluster 'angle', given comparable axes
                    Xall <- X[,1:2]-matrix(ctrX,nrow=nrow(X),ncol=2,byrow=TRUE)
                    T2j <- diag(Xall%*%solve(Sj)%*%t(Xall))
                    f.alpha <- T2j*(nj-2)*nj/(2*(nj-1)*(nj+1))
                    pVal <- 1 - pf(f.alpha,2,nj-2)
                  }else{
                    pVal <- rep(0,nrow(X))
                    pVal[indj] <- 1
                  }
                  indOut[,j] <- pVal < gO$clAlpha
                  indKeep[indj] <- !indOut[indj,j]
                  indOverlap[!indOut[,j]][clObj$cluster[!indOut[,j]]!=j] <- TRUE
                }
                plot <- FALSE
                if (plot){
                  plot.new()
                  plot.window(c(-.2,1.2),c(0,max(max(X[,2]),1/gO$rPenalty)))
                  axis(1); axis(2)
                  points(X[,1],X[,2],pch=16)
                  points(X[!indKeep,1],X[!indKeep,2],pch=16,col='red')   #outside ellipses
                  points(X[indOverlap,1],X[indOverlap,2],pch=16,col='orange')  #overlapping ellipses
                  points(clCtrs[,1],clCtrs[,2],col='green',pch=16)    #cluster-centres
                }
                indKeep <- indKeep &! indOverlap
                #tmpRes[7] <- sum(indKeep)/sum(iArrays)
                tmpRes[7] <- sum(indKeep)/sum(!iMiss)
                                        #psRat <- prod(Sratio[clObj$size>ncol(BSRed)/10])
                psRat <- sum(Sratio*clObj$size)/(ncol(BSRed)*nClRed)
                                        #goodSNP <- goodSNP & sum(!indOverlap)==sum(iDet)
                
                if ((tmpRes[7]>gO$detectLim & psRat<=gO$rotationLim)){# | goodSNP){   #Success!
                  if (sum(iSnps)==1){
                    assayData(BSRed)$call[iSnps,iArrays][iDet][indKeep] <- cntIdeal[clObj$cluster][indKeep]
                  }else{
                    assayData(BSRed)$call[iSnps,iArrays][i,iDet][indKeep] <- cntIdeal[clObj$cluster][indKeep]
                  }
                  fData$Classification[iSnps][[i]] <- polyCent$classification[[sConf$ix[iConf]]]
                  fData$Cent.Deviation[iSnps][[i]] <- tmpRes[1]
                  fData$Within.SD[iSnps][i] <- tmpRes[2]
                  fData$HW.Chi2[iSnps][i] <- tmpRes[3]
                  fData$HW.P[iSnps][i] <- tmpRes[4]
                  fData$BAF.Locus1[iSnps][i] <- 1-tmpRes[5]  #NB! B-af, not A-af
                  fData$BAF.Locus2[iSnps][i] <- 1-tmpRes[6]  #NB!
                  fData$Call.Rate[iSnps][i] <- tmpRes[7]
                  #fData$Max.Rotation[iSnps][i] <- max(Sratio)
                  OK <- TRUE
                }else{
                  iConf <- iConf + 1
                } #end if success
              } #end if callrate
            }else{
              iConf <- iConf + 1
            } #end if wSpread
          }else{
            iConf <- iConf + 1
          } #end if devCent
          if (iConf>maxConf){
            fData$Classification[iSnps][[i]] <- 'FAIL'
            OK <- TRUE
          }
        }else{
          fData$Classification[iSnps][[i]] <- 'FAIL'
          OK <- TRUE
        } #end if any(clObj$size)
      } #end while !OK
    } #end if gO$detectLim
  } #end for
  featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
  featureData(BSRed) <- combine(featureData(BSRed), featureData)
  validObject(BSRed)
  BSRed
}


#Tests automatic genocalling for a single marker, plots different outcomes. Outputs all results from single marker
callGenotypes.verboseTest <- function(BSRed,singleMarker=1,gO=setGenoOptions(largeSample=ncol(BSRed)>250)){
  message(sprintf("Verbosely analyzing marker '%s'",featureNames(BSRed)[singleMarker]))
  iSnps <- apply(assayData(BSRed)$theta,1,function(x,cc) sum(!is.na(x))>cc,cc=gO$arrayPerSnpLim*ncol(BSRed))
  if (!iSnps[singleMarker]){
    message('Failure as the ratio of missing arrays exceeds (1 - "arrayPerSnpLim")')
  }
  iSnps <- singleMarker
  iArrays <- apply(assayData(BSRed)$theta,2,function(x,cc) sum(!is.na(x))>cc,cc=gO$snpPerArrayLim*nrow(BSRed))
  message(sum(!iArrays),' array(s) with more than ',100-gO$snpPerArrayLim*100,'% missings found')
  mNoise <- mean(pData(BSRed)$noiseIntensity[iArrays],na.rm=TRUE)
  R <- as.matrix(assayData(BSRed)$intensity[singleMarker,iArrays])
  Theta <- as.matrix(assayData(BSRed)$theta[singleMarker,iArrays])
  SE <- as.matrix(assayData(BSRed)$SE[singleMarker,iArrays])
  dimnames(R)[2] <- dimnames(Theta)[2] <- dimnames(SE)[2] <- featureNames(BSRed)[singleMarker]
  iFilter <- diff(range(Theta,na.rm=TRUE))>gO$filterLim
  if (!iFilter){
    message('The selected marker would be filtered away as a monomorph based on range(Theta)')
    return()
  }
  BSRed <- BSRed[singleMarker,]
  polyCent <- generatePolyCenters(ploidy=gO$ploidy)
  #nTest <- length(polyCent$size)
  #message(sprintf('%d allowed cluster-combinations',nTest))
  #call <- matrix(nrow=nTest,ncol=ncol(BSRed),dimnames=list(polyCent$classification,sampleNames(BSRed)))
  #fData <- data.frame(Cent.Deviation=rep(NA,nTest),Within.SD=rep(NA,nTest),HW.Chi2=rep(NA,nTest),HW.P=rep(NA,nTest),BAF.Locus1=rep(NA,nTest),BAF.Locus2=rep(NA,nTest),Call.Rate=rep(NA,nTest),Overlap=rep(NA,nTest),Rotation=rep(NA,nTest),stringsAsFactors=FALSE,row.names=polyCent$classification)
  #fDesc <-  c('Largest distance from cluster-centre to its ideal position',
  #            'Largest within-cluster spread',
  #            'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
  #            'Probability of Hardy-Weinberg equilibrium',
  #            'Estimated B-allele frequency',
  #            'Estimated BAF of second paralogue (if exists)',
  #            'Ratio of samples assigned to clusters',
  #            'TRUE if there is an initial cluster overlap (along \'Theta\')',
  #            'Large value (>1) means increasingly slanting clusters')
  #fMetadata <- data.frame(labelDescription=fDesc, row.names=colnames(fData), stringsAsFactors=FALSE)

  iMiss <- is.na(R)|is.na(SE)|is.na(Theta)
  iDet <- R>mNoise & !iMiss & Theta>-.5 & Theta<1.5    #Filter bad arrays
  message(sprintf('%d low-quality or missing array(s) filtered away for this marker',sum(!iDet)))
  #if (sum(iDet)<=sum(iArrays)*gO$detectLim){
  if (sum(iDet)<=sum(!iMiss)*gO$detectLim){
    str <- 'Failure as rate of high quality to non-missing arrays (%.2f) smaller than "detectLim"'
    message(sprintf(str,sum(iDet)/sum(!iMiss)))
  }else{
    sclR <- median(R[iDet],na.rm=TRUE)*2*gO$rPenalty                   #Factor by which R is divided before clustering
    X <- matrix(cbind(Theta,R/sclR,SE)[iDet,],ncol=3)
    if (is.null(gO$probsIndSE)){
      indSE <- rep(TRUE,sum(iDet))
    }else{
      indSE <- X[,3]<quantile(X[,3],probs=gO$probsIndSE,na.rm=TRUE)      #Uncertain points removed from clustering
    }
    #qRange <- getQRange(X[indSE,1],minScl=3,probs=gO$probsQRange)     #Initial cluster-centers
    #sConf <- distLikelyConfiguration(X[indSE,1:2],polyCent,qRange=qRange)  #Ordered list of likely classifications
    sConf <- getCenters(X[indSE,1],gO=gO,polyCent=polyCent)
    print(sConf)
    nTest <- length(sConf$ix)
    message(sprintf('%d cluster-combinations tested',nTest))
    rNames <- unlist(polyCent$classification[sConf$ix])
    if (nTest>1){
      for (i in 2:length(rNames)){
        sumRep <- sum(rNames[1:(i-1)]%in%rNames[i])
        if (sumRep>0){
          rNames[i] <- paste(rNames[i],sumRep,sep='.')
        }
      }
    }
    call <- matrix(nrow=nTest,ncol=ncol(BSRed),dimnames=list(rNames,sampleNames(BSRed)))
    fData <- data.frame(Cent.Deviation=rep(NA,nTest),Within.SD=rep(NA,nTest),HW.Chi2=rep(NA,nTest),HW.P=rep(NA,nTest),BAF.Locus1=rep(NA,nTest),BAF.Locus2=rep(NA,nTest),Call.Rate=rep(NA,nTest),Overlap=rep(NA,nTest),Rotation=rep(NA,nTest),stringsAsFactors=FALSE,row.names=rNames)
    fDesc <-  c('Largest distance from cluster-centre to its ideal position',
                'Largest within-cluster spread',
                'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
                'Probability of Hardy-Weinberg equilibrium',
                'Estimated B-allele frequency',
                'Estimated BAF of second paralogue (if exists)',
                'Ratio of samples assigned to clusters',
                'TRUE if there is an initial cluster overlap (along \'Theta\')',
                'Large value (>1) means increasingly slanting clusters')
    fMetadata <- data.frame(labelDescription=fDesc, row.names=colnames(fData), stringsAsFactors=FALSE)

    pp <- par()
    nC <- ceiling(sqrt(nTest))
    nR <- ceiling(nTest/nC)
    par(mfrow=c(nR,nC),mai=c(1,1,1,.5)/sqrt(nC))
    
    for (iConf in 1:nTest){
      current.i <- sConf$ix[iConf]
      nCl <- polyCent$size[current.i]
      cntIdeal <- polyCent$centers[[current.i]]
      centers <- sConf$centers[[iConf]]
      clObj <- findPolyploidClusters(X,indSE=indSE,centers=centers,wss.update=FALSE)
      if (!any(clObj$size>0)){
        message(sprintf("No clusters assigned for '%s'",polyCent$classification[current.i]))
      }else{
        iCl <- which(clObj$size>0)
        #fData[current.i,1] <- max(abs(cntIdeal[iCl]-clObj$centers[iCl,1]))   #Max cluster-centre deviation (Theta)
        fData[iConf,1] <- max(abs(cntIdeal[iCl]-clObj$centers[iCl,1]))   #Max cluster-centre deviation (Theta)
        wSpread <- tapply(X[,1],clObj$cluster,sd)   #NB! MAD?
        wSpread[clObj$size[iCl]<2] <- 0   #use gO$minClLim instead?
        #fData[current.i,2] <- max(wSpread)  #Max within-cluster spread (Theta)
        fData[iConf,2] <- max(wSpread)  #Max within-cluster spread (Theta)
        if (nCl==1){
          #fData[current.i,5:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=current.i,polyCent=polyCent,afList=gO$afList)[4:5])
          fData[iConf,5:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=current.i,polyCent=polyCent,afList=gO$afList)[4:5])
        }else{
          #fData[current.i,3:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=current.i,polyCent=polyCent,afList=gO$afList)[c(1,3:5)])
          fData[iConf,3:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=current.i,polyCent=polyCent,afList=gO$afList)[c(1,3:5)])
          minB <- (clObj$centers[iCl,1] - gO$nSdOverlap*wSpread)[-1]
          maxB <- (clObj$centers[iCl,1] + gO$nSdOverlap*wSpread)[-length(iCl)]
          #fData[current.i,8] <- any(round(minB*100)<round(maxB*100))
          fData[iConf,8] <- any(round(minB*100)<round(maxB*100))
        }
        nClRed <- length(iCl)
        clCtrs <- matrix(clObj$centers[iCl,],nrow=nClRed)
        indOut <- matrix(FALSE,nrow=nrow(X),ncol=nCl)
        indKeep <- rep(FALSE,nrow(X))
        indOverlap <- rep(FALSE,nrow(X))
        Sratio <- rep(1,nCl)
        for (j in iCl){
          indj <- clObj$cluster==j
          nj <- sum(indj)
          if (nj>=gO$minClLim){    #Hotellings T^2-ellipse superimposed on clusters
            ctrX <- apply(X[indj,1:2],2,median,na.rm=TRUE)
            Xj <- X[indj,1:2]-matrix(ctrX,nrow=nj,ncol=2,byrow=TRUE)
            Sj <- t(Xj)%*%Xj/(nj-1)
            Sratio[j] <- Sj[1,1]/(Sj[2,2]*gO$rPenalty)  #Cluster 'angle', given comparable axes
            Xall <- X[,1:2]-matrix(ctrX,nrow=nrow(X),ncol=2,byrow=TRUE)
            T2j <- diag(Xall%*%solve(Sj)%*%t(Xall))
            f.alpha <- T2j*(nj-2)*nj/(2*(nj-1)*(nj+1))
            pVal <- 1 - pf(f.alpha,2,nj-2)
          }else{
            pVal <- rep(0,nrow(X))
            pVal[indj] <- 1
          }
          indOut[,j] <- pVal < gO$clAlpha
          indKeep[indj] <- !indOut[indj,j]
          indOverlap[!indOut[,j]][clObj$cluster[!indOut[,j]]!=j] <- TRUE
        }
        
        plot.new()
        plot.window(c(-.2,1.2),c(0,max(max(X[,2]),1/gO$rPenalty)))
        axis(1); axis(2)
        title(main=polyCent$classification[current.i],xlab='Theta',ylab='R')
        points(X[,1],X[,2],pch=16)
        points(X[!indKeep,1],X[!indKeep,2],pch=16,col='red')   #outside ellipses
        points(X[indOverlap,1],X[indOverlap,2],pch=16,col='orange')  #overlapping ellipses
        points(clCtrs[,1],clCtrs[,2],col='green',pch=16)    #cluster-centres
        
        indKeep <- indKeep &! indOverlap
        #fData[current.i,7] <- sum(indKeep)/sum(iArrays)
        #fData[current.i,9] <- sum(Sratio*clObj$size)/(ncol(BSRed)*nClRed)
        #call[current.i,iArrays][iDet][indKeep] <- cntIdeal[clObj$cluster][indKeep]
        #fData[iConf,7] <- sum(indKeep)/sum(iArrays)
        fData[iConf,7] <- sum(indKeep)/sum(!iMiss)
        fData[iConf,9] <- sum(Sratio*clObj$size)/(ncol(BSRed)*nClRed)
        call[iConf,iArrays][iDet][indKeep] <- cntIdeal[clObj$cluster][indKeep]
      } #end if (!any(clObj$size))
    } #for
    par(pp[c('mfcol','mai')])
    verboseResults <- list()
    verboseResults$call <- call
    verboseResults$fData <- fData
    verboseResults$fData[,5:6] <- 1-verboseResults$fData[,5:6]  #NB! B-af, not A-af
    verboseResults$fMetadata <- fMetadata
    test <- data.frame(Cent.Deviation=fData[,1]<gO$devCentLim,
                       WithinSD=fData[,2]<gO$wSpreadLim,
                       HW.P=fData[,4]>gO$hwAlpha,Call.Rate=fData[,7]>gO$detectLim,
                       Overlap=!fData[,8],Rotation=fData[,9]<=gO$rotationLim,
                       row.names=rNames)
    verboseResults$test <- test
    verboseResults$test[test==TRUE] <- 'PASS'
    verboseResults$test[test==FALSE] <- 'FAIL'
    verboseResults$test$sumFAIL <- apply(!test,1,sum,na.rm=TRUE)
    #verboseResults$test <- verboseResults$test[sConf$ix,]
    indPass <- verboseResults$test$sumFAIL==0
    if (any(indPass)){
      clss <- rownames(verboseResults$test)[which(indPass)[1]]
    }else{
      clss <- 'FAIL'
    }
    message(sprintf("Marker would be classified as '%s'",clss))
    verboseResults
  } #end if low qual.
}



#Manual genotype calling
callGenotypes.interactive <- function(BSRed,gO=setGenoOptions(largeSample=ncol(BSRed)>250)){
  #require('rggobi')
  mNoise <- mean(pData(BSRed)$noiseIntensity, na.rm=TRUE)
  R <- assayData(BSRed)$intensity
  Theta <- assayData(BSRed)$theta
  SE <- assayData(BSRed)$SE
  polyCent <- generatePolyCenters(ploidy=gO$ploidy)
  if (is.null(assayData(BSRed)$call)){
    assayData(BSRed)$call <-  matrix(nrow=nrow(BSRed), ncol=ncol(BSRed), dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
    isCalled <- FALSE
  }else{
    isCalled <- TRUE
  }
  if (is.null(assayData(BSRed)$ped.check.parents) | !isCalled)
    assayData(BSRed)$ped.check.parents <- matrix(nrow=nrow(BSRed), ncol=ncol(BSRed), dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
  rNames <- c("Classification", "Cent.Deviation", "Within.SD", "HW.Chi2", "HW.P", "BAF.Locus1", "BAF.Locus2", "Call.Rate", "Ped.Errors")   
  if (all(rNames[1:8] %in% colnames(fData(BSRed))) & isCalled){
    prevCalls <- fData(BSRed)$Classification
    if (!rNames[9] %in% colnames(fData(BSRed))){
      fData <- data.frame(Ped.Errors=rep(NA,nrow(BSRed)), stringsAsFactors=FALSE, row.names=featureNames(BSRed))
      fMetadata <- data.frame(labelDescription='Number of pedigree-errors, summed over offspring', row.names=colnames(fData), stringsAsFactors=FALSE)
      featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
      featureData(BSRed) <- combine(featureData(BSRed), featureData)
    }
    if ('Manual.Calls.R' %in% colnames(fData(BSRed))){
      indMiss <- fData(BSRed)$Manual.Calls.R %in% ''
      if (any(!indMiss)){
        message('Column \'Manual.Calls.R\' will be overwritten!')
        prevCalls[!indMiss] <- fData(BSRed)$Manual.Calls.R[!indMiss]
      }
    }else{
      fData <- data.frame(Manual.Calls.R=character(nrow(BSRed)), stringsAsFactors=FALSE, row.names=featureNames(BSRed))
      fMetadata <- data.frame(labelDescription='Genotype call from interactive clustering', row.names=colnames(fData), stringsAsFactors=FALSE)
      featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
      featureData(BSRed) <- combine(featureData(BSRed), featureData)
    }
  }else{
    iKeep <- !colnames(fData(BSRed)) %in% c(rNames,'Manual.Calls.R')
    fData <- data.frame(Cent.Deviation=rep(NA,nrow(BSRed)), Within.SD=rep(NA,nrow(BSRed)), HW.Chi2=rep(NA,nrow(BSRed)), HW.P=rep(NA,nrow(BSRed)), BAF.Locus1=rep(NA,nrow(BSRed)), BAF.Locus2=rep(NA,nrow(BSRed)), Call.Rate=rep(NA,nrow(BSRed)), Ped.Errors=rep(NA,nrow(BSRed)), Manual.Calls.R=character(nrow(BSRed)), stringsAsFactors=FALSE, row.names=featureNames(BSRed))
    fDesc <-  c('Largest distance from cluster-centre to its ideal position',
                'Largest within-cluster spread',
                'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
                'Probability of Hardy-Weinberg equilibrium',
                'Estimated B-allele frequency',
                'Estimated BAF of second paralogue (if exists)',
                'Ratio of samples assigned to clusters',
                'Number of pedigree-errors, summed over offspring',
                'Genotype call from interactive clustering')
    fMetadata <- data.frame(labelDescription=fDesc, row.names=colnames(fData), stringsAsFactors=FALSE)
    featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
    featureData(BSRed) <- combine(featureData(BSRed)[,iKeep], featureData)
    prevCalls <- 'FAIL'
  }
  str0 <- 'Calling marker no. %d of %d...'
  for (i in 1:nrow(BSRed)){
    message(sprintf(str0,i,nrow(BSRed)))
    if (isCalled & !prevCalls[i]%in%'FAIL'){
      marker <- data.frame(Theta=Theta[i,], R=R[i,], Call=assayData(BSRed)$call[i,], PedigreeID=pData(BSRed)$PedigreeID, stringsAsFactors=FALSE)
      gg <- ggobi(marker[,c('Theta','R')],name='marker')
      glc <- marker$Call*4 + 1
      glc[is.na(glc)] <- 8
      marker$PedCheck <- validateSingleCall(marker,prevCalls[i])
      iErr <- marker$PedCheck>0
      glc[iErr] <- 5 + unlist(marker$PedCheck[iErr])
      glyph_colour(gg$marker) <- glc
      glyph_type(gg$marker) <-  rep(6,nrow(marker))
      glyph_type(gg$marker)[iErr] <- 7
      message(sprintf('Number of pedigree-errors: %d',sum(iErr)))
      finished <- readline(prompt='Keep current clusters? (y/n) ')
      OK <- finished %in% c('y','Y','yes','Yes','YES','1','T','TRUE')
      close(gg)
    }else{
      OK <- FALSE
    } #end if isCalled
    if (!OK){
      iMiss <- is.na(R[i,])|is.na(SE[i,])
      iDet <- R[i,]>mNoise & !iMiss & Theta[i,]>-.5 & Theta[i,]<1.5    #Filter bad markers
      if (sum(iDet)<=ncol(BSRed)*gO$detectLim){
        message(sprintf('Marker %s failed due to many missing samples',featureNames(BSRed)[i]))
        featureData(BSRed)@data$Manual.Calls.R[[i]] <- 'FAIL'
        assayData(BSRed)$call[i,] <- NA
        assayData(BSRed)$ped.check.parents[i,] <- NA
        assayData(BSRed)$ped.check[i,] <- NA
        featureData(BSRed)@data$Cent.Deviation[[i]] <- NA
        featureData(BSRed)@data$Within.SD[i] <- NA
        featureData(BSRed)@data$HW.Chi2[i] <- NA
        featureData(BSRed)@data$HW.P[i] <- NA
        featureData(BSRed)@data$BAF.Locus1[i] <- NA
        featureData(BSRed)@data$BAF.Locus2[i] <- NA
        featureData(BSRed)@data$Call.Rate[i] <- 0
        #featureData(BSRed)@data$Max.Rotation[i] <- NA
        featureData(BSRed)@data$Ped.Errors[i] <- 0
      }else{
        sclR <- median(R[i,iDet],na.rm=TRUE)*2*gO$rPenalty                   #Factor by which R is divided before clustering
        X <- matrix(cbind(Theta[i,],R[i,]/sclR,SE[i,])[iDet,],ncol=3)
        if (is.null(gO$probsIndSE)){
          indSE <- rep(TRUE,sum(iDet))
        }else{
          indSE <- X[,3]<quantile(X[,3],probs=gO$probsIndSE,na.rm=TRUE)      #Uncertain points removed from clustering
        } #end if is.null
        #qRange <- getQRange(X[indSE,1],minScl=3,probs=gO$probsQRange)     #Initial cluster-centers
        #sConf <- distLikelyConfiguration(X[indSE,1:2],polyCent)  #Ordered list of likely classifications
        #sConf <- getCenters(X[indSE,1],gO=gO,polyCent=polyCent)
        #maxConf <- length(sConf$ix)
        while (!OK){
          marker <- data.frame(Theta=Theta[i,],R=R[i,],PedigreeID=pData(BSRed)$PedigreeID,stringsAsFactors=FALSE)[iDet,]
          gg <- ggobi(marker[,c('Theta','R')],name='marker')
          glyph_colour(gg$marker) <- 8
          clList <- unlist(polyCent$classification)#[sConf$ix])
          nCand <- length(clList)
          message('Genotype-calling categories: ')
          for (k in 1:nCand)
            message(sprintf('%d: %s',k,clList[k]))
          message(sprintf('%d: FAIL',k+1))
          rngT <- range(marker$Theta)
          message(sprintf('(Range Theta: [%.2f, %.2f])',rngT[1],rngT[2]))
          iConf <- NA
          while (!iConf %in% 1:(nCand+1)){
            str1 <- 'Please indicate the correct marker-type: (in 1:8) '
            iConf <- as.numeric(readline(prompt=str1))
          }
          if (iConf!=(nCand+1)){
            #Suggest clusters
            nCl <- polyCent$size[iConf]#[sConf$ix[iConf]]
            cntIdeal <- polyCent$centers[[iConf]]#[[sConf$ix[iConf]]]
            centers <- getSpecificCenters(X[indSE,1], classification=polyCent$classification[[iConf]], gO=gO, breaks=seq(-.25,1.25,gO$binWidth), polyCent=polyCent)
            clObj <- findPolyploidClusters(X,indSE=indSE,centers=centers,wss.update=FALSE)
            
            #Quality-check and update clusters
            marker$Call <- cntIdeal[clObj$cluster]
            gg <- manualCall(marker,cntIdeal,clList[iConf],gg,close.gg=FALSE)
            marker$Call <- gg$marker$Call
            marker$PedCheck <- gg$marker$PedCheck
            if (nCl==1){
              clObj$cluster <- gg$marker$Call-cntIdeal+1
            }else{
              clObj$cluster <- (gg$marker$Call-cntIdeal[1])/diff(range(cntIdeal))*(nCl-1)+1
            }
            glc <- gg$marker$Call*4 + 1
            glc[is.na(glc)] <- 8
            glt <- rep(6,nrow(marker))
            newCent1 <- tapply(X[,1],clObj$cluster,median,na.rm=TRUE) #NB! median
            newCent2 <- tapply(X[,2],clObj$cluster,median,na.rm=TRUE) #NB! median
            clObj$centers[as.numeric(names(newCent1)),] <- cbind(newCent1,newCent2)
            clObj$centers[-as.numeric(names(newCent1)),] <- NA
            clObj$size <- rep(0,nCl)
            newSizes <- tapply(rep(1,nrow(X)),clObj$cluster,sum)
            clObj$size[as.numeric(names(newSizes))] <- newSizes
            
            tmpRes <- rep(NA,7)
            iCl <- which(clObj$size>0)
            tmpRes[1] <- max(abs(cntIdeal[iCl]-clObj$centers[iCl,1]))   #Max cluster-centre deviation (Theta)
            tmpRes[2] <- max(tapply(X[,1],clObj$cluster,sd))  #Max within-cluster spread (Theta)
            if (nCl>1){
              tmpRes[3:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=iConf,polyCent=polyCent,afList=gO$afList)[c(1,3:5)])
            } #end if nCl
            nClRed <- length(iCl)
            clCtrs <- matrix(clObj$centers[iCl,],nrow=nClRed)
            indOut <- matrix(FALSE,nrow=nrow(X),ncol=nCl)
            indKeep <- rep(FALSE,nrow(X))
            indOverlap <- rep(FALSE,nrow(X))
            Sratio <- rep(1,nCl)
            for (j in iCl){
              indj <- clObj$cluster%in%j
              nj <- sum(indj)
              if (nj>gO$minClLim){    #Hotellings T^2-ellipse superimposed on clusters
                ctrX <- apply(X[indj,1:2],2,median,na.rm=TRUE)
                Xj <- X[indj,1:2]-matrix(ctrX,nrow=nj,ncol=2,byrow=TRUE)
                Sj <- t(Xj)%*%Xj/(nj-1)
                Sratio[j] <- Sj[1,1]/(Sj[2,2]*gO$rPenalty)  #Cluster 'angle', given comparable axes
                Xall <- X[,1:2]-matrix(ctrX,nrow=nrow(X),ncol=2,byrow=TRUE)
                T2j <- diag(Xall%*%solve(Sj)%*%t(Xall))
                f.alpha <- T2j*(nj-2)*nj/(2*(nj-1)*(nj+1))
                pVal <- 1 - pf(f.alpha,2,nj-2)
              }else{
                pVal <- rep(0,nrow(X))
                pVal[indj] <- 1
              }
              indOut[,j] <- pVal < gO$clAlpha
              indKeep[indj] <- !indOut[indj,j]
              indOverlap[!indOut[,j]][clObj$cluster[!indOut[,j]]!=j] <- TRUE
            } #end for j
            indKeep <- indKeep &! indOverlap
            tmpRes[7] <- sum(indKeep)/ncol(BSRed)
            
            #Update calls, pedCheck, and scatterplot 
            glc[!indKeep] <- rep(8,sum(!indKeep))
            glyph_colour(gg$marker) <- glc
            marker$Call[!indKeep] <- gg$marker$Call[!indKeep] <- clObj$cluster[!indKeep] <- NA
            indSel <- marker$PedCheck>0
            marker$PedCheck[indSel] <- gg$marker$PedCheck[indSel] <- validateSingleCall(marker[indSel,],clList[iConf])
            iErr <- marker$PedCheck>0
            glyph_colour(gg$marker)[iErr] <- 5 + unlist(marker$PedCheck[iErr])
            glyph_type(gg$marker) <- glt
            glyph_type(gg$marker)[iErr] <- 7
            
            message(sprintf('Hardy-Weinberg p-value: %.2g',tmpRes[4]))
            message(sprintf('Call-rate: %.2g',tmpRes[7]))
            message(sprintf('Number of pedigree-errors: %d',sum(iErr)))
            finished <- readline(prompt='Save results and move on to next marker? (y/n) ')
          }else{
            finished <- readline(prompt='Fail marker and move on? (y/n) ')
          } #end if iConf!=8
          OK <- finished %in% c('y','Y','yes','Yes','YES','1','T','TRUE')
          close(gg)
        } #end while !OK
        if (iConf!=8){
          assayData(BSRed)$call[i,iDet][indKeep] <- cntIdeal[clObj$cluster][indKeep]
          assayData(BSRed)$call[i,!iDet] <- NA
          assayData(BSRed)$call[i,iDet][!indKeep] <- NA
          assayData(BSRed)$ped.check.parents[i,iDet] <- marker$PedCheck
          assayData(BSRed)$ped.check.parents[i,!iDet] <- NA
          assayData(BSRed)$ped.check[i,] <- NA
          assayData(BSRed)$ped.check[i,iDet][marker$PedCheck%in%0] <- TRUE
          assayData(BSRed)$ped.check[i,iDet][marker$PedCheck%in%1] <- FALSE
          featureData(BSRed)@data$Manual.Calls.R[[i]] <- polyCent$classification[[iConf]]#[[sConf$ix[iConf]]]
          featureData(BSRed)@data$Cent.Deviation[[i]] <- tmpRes[1]
          featureData(BSRed)@data$Within.SD[i] <- tmpRes[2]
          featureData(BSRed)@data$HW.Chi2[i] <- tmpRes[3]
          featureData(BSRed)@data$HW.P[i] <- tmpRes[4]
          featureData(BSRed)@data$BAF.Locus1[i] <- tmpRes[5]
          featureData(BSRed)@data$BAF.Locus2[i] <- tmpRes[6]
          featureData(BSRed)@data$Call.Rate[i] <- tmpRes[7]
          #featureData(BSRed)@data$Max.Rotation[i] <- max(Sratio)
          featureData(BSRed)@data$Ped.Errors[i] <- sum(iErr)
        }else{
          featureData(BSRed)@data$Manual.Calls.R[[i]] <- 'FAIL'
          assayData(BSRed)$call[i,] <- NA
          assayData(BSRed)$ped.check.parents[i,] <- NA
          assayData(BSRed)$ped.check[i,] <- NA
          featureData(BSRed)@data$Cent.Deviation[[i]] <- NA
          featureData(BSRed)@data$Within.SD[i] <- NA
          featureData(BSRed)@data$HW.Chi2[i] <- NA
          featureData(BSRed)@data$HW.P[i] <- NA
          featureData(BSRed)@data$BAF.Locus1[i] <- NA
          featureData(BSRed)@data$BAF.Locus2[i] <- NA
          featureData(BSRed)@data$Call.Rate[i] <- 0
          #featureData(BSRed)@data$Max.Rotation[i] <- NA
          featureData(BSRed)@data$Ped.Errors[i] <- 0
        } #end if iConf!=8
      } #end if sum(iDet)
    }else{
      featureData(BSRed)@data$Manual.Calls.R[[i]] <- featureData(BSRed)@data$Classification[[i]]
      assayData(BSRed)$ped.check.parents[i,] <- marker$PedCheck
    } #end if !OK
  } #end for i
  validObject(BSRed)
  BSRed
}



#Scales each value of the pooled SE with the arclength of the first quadrant
#semi-circle of radius 'R', as the arclength is one for all values of
#'R' in the theta-R plot. The arclength is defined by 'p' <- the norm of the
#distance measure used. If p=1 (manhattan) a=sqrt(2)*r, if p=2 (euclidean)
#a=pi/2*r, if p=inf (infinity-norm) a=2r.
findSeTheta <- function(pooledSE.raw,R,dist='manhattan',pNorm=NULL){
  integrand <- function(th,pNorm){
    2/pNorm*sqrt(cos(th)^(4/pNorm-2)*sin(th)^2+sin(th)^(4/pNorm-2)*cos(th)^2)
  }
  switch(dist,
         euclidean={scale <- pi*R/2},
         manhattan={scale <- sqrt(2)*R},
         minkowski={
           unitLength <- integrate(integrand,lower=0,upper=pi/2,pNorm=pNorm)
           scale <- unitLength$value*R
         })
  pooledSE.theta <- pooledSE.raw/scale
  pooledSE.theta
}
  


#Generates a list of possible cluster-alternatives
generatePolyCenters <- function(ploidy){
  centers <- list()
  classification <- list()
  if (ploidy=='di'){
    centers[[1]] <- 0               #Only A's
    centers[[2]] <- 1               #Only B's
    centers[[3]] <- c(0,.5,1)            #Regular SNP
    classification[1] <- 'MONO-a'
    classification[2] <- 'MONO-b'
    classification[3] <- 'SNP'
  }else if(ploidy=='tetra'){
    centers[[1]] <- 0               #Only A's
    centers[[2]] <- .5              #PSV or MSV with small minor AF
    centers[[3]] <- 1               #Only B's
    centers[[4]] <- c(0,.5,1)            #Regular SNP
    centers[[5]] <- c(0,.25,.5)          #MSV with one paralogue fixed AA
    centers[[6]] <- c(.5,.75,1)          #MSV with one paralogue fixed BB
    centers[[7]] <- c(0,.25,.5,.75,1)    #MSV with segregation in both paralogues
    classification[1] <- 'MONO-a'
    classification[2] <- 'PSV'
    classification[3] <- 'MONO-b'
    classification[4] <- 'SNP'
    classification[5] <- 'MSV-a'
    classification[6] <- 'MSV-b'
    classification[7] <- 'MSV-5'
  }else if(ploidy=='tetra.red'){
    centers[[1]] <- 0               #Only A's
    centers[[2]] <- .5              #PSV or MSV with small minor AF
    centers[[3]] <- 1               #Only B's
    centers[[4]] <- c(0,.5,1)            #Regular SNP
    centers[[5]] <- c(0,.25,.5)          #MSV with one paralogue fixed AA
    centers[[6]] <- c(.5,.75,1)          #MSV with one paralogue fixed BB
    classification[1] <- 'MONO-a'
    classification[2] <- 'PSV'
    classification[3] <- 'MONO-b'
    classification[4] <- 'SNP'
    classification[5] <- 'MSV-a'
    classification[6] <- 'MSV-b'
  }else{
    stop("Allowed values for ploidy are 'di', 'tetra', or 'tetra.red'")
  }
  polyCent <- list()
  polyCent$centers <- centers
  polyCent$classification <- classification
  polyCent$size <- sapply(polyCent$centers,length)
  polyCent
}



#Estimates likely range and cluster-centres
getCenters <- function(theta,gO=setGenoOptions(),breaks=seq(-.25,1.25,gO$binWidth),polyCent=generatePolyCenters(ploidy=gO$ploidy)){
##### for testing:
  #breaks <- seq(-.25,1.25,.1)
  #theta <- assayData(BSRed)$theta[125,]; theta <- theta[theta>breaks[1]&theta<max(breaks)]
  #hX <- hist(theta,breaks=breaks,plot=TRUE,ylim=c(0,50));abline(h=gO$minBin-1)
#####
  
  theta <- theta[theta>breaks[1] & theta<max(breaks)]
  nSamp <- length(theta)
  polyCl <- findClusters(theta=theta,breaks=breaks,minBin=gO$minBin)
  if (polyCl$nCl<3){
    breaks1 <- seq(breaks[1],max(breaks),gO$binWidth/2)
    polyCl1 <- findClusters(theta=theta,breaks=breaks1,minBin=gO$minBin)
    if (all(polyCl1$clSizes>nSamp/100)){
      polyCl <- polyCl1
    }
  }
  centers <- list()
  if (polyCl$nCl==0){
    ix <- NULL
  }else if (gO$ploidy%in%'tetra'){
    i1 <- polyCent$size==1
    i3 <- polyCent$size==3
    i5 <- polyCent$size==5
    if (polyCl$nCl==1){
      vec1 <- unlist(polyCent$centers[i1])
      centers[[1]] <- sum(polyCl$clPeaks*polyCl$clSizes)/sum(polyCl$clSizes)
      sqDev1 <- (vec1 - centers[[1]])^2/c(2,1,2)   #Disfavour PSVs
      cent3 <- polyCent$centers[[4]]
      indClose <- which(cent3 %in% (round(polyCl$clPeaks*2)/2))
      cent3[indClose] <- polyCl$clPeaks
      centers[[2]] <- cent3
      ix <- c(which(i1)[order(sqDev1)[1]],4)
    }else if (polyCl$nCl==2){
      ix <- numeric(3)
      cent5 <- polyCent$centers[[7]]
      indClose <- cent5 %in% (round(polyCl$clPeaks*4)/4)
      if (sum(indClose)==1){
        cDist <- sapply(polyCl$clPeaks,function(x,cent5) (x-cent5)^2, cent5)
        orderC <- order(cDist[indClose,])
        cDist[indClose,orderC[2]] <- 10
        indClose[order(cDist[,orderC[2]])[1]] <- TRUE
      }
      indClose <- which(indClose)
      closerAA <- sum((polyCl$clPeaks)^2) < sum((polyCl$clPeaks-1)^2)
      
      if (sum(c(1,3,5)%in%indClose)==2){  #1-3, 3-5, 1-5
        cent5[indClose] <- polyCl$clPeaks
        if (closerAA){
          ii2 <- 5
        }else{
          ii2 <- 6
        }
        cent3 <- polyCent$centers[[ii2]]
        indClose1 <- which(rank((cent3-polyCl$clPeaks[order(polyCl$clSizes)[2]])^2)==1)
        cent3[indClose1] <- polyCl$clPeaks[order(polyCl$clSizes)[2]]
        centers[1:2] <- list(cent5[c(1,3,5)],cent3)
        ix[1:2] <- c(4,ii2)
      }else if(sum(1:3%in%indClose)==2){   #1-2, 2-3
        cent5[indClose] <- polyCl$clPeaks
        cent3 <- polyCent$centers[[4]]
        cent3[1:2] <- polyCl$clPeaks
        centers[1:2] <- list(cent5[1:3],cent3)
        ix[1:2] <- c(5,4)
      }else if(sum(3:5%in%indClose)==2){   #4-5, 3-4
        cent5[indClose] <- polyCl$clPeaks
        cent3 <- polyCent$centers[[4]]
        cent3[2:3] <- polyCl$clPeaks
        centers[1:2] <- list(cent5[3:5],cent3)
        ix[1:2] <- c(6,4)
      }else{                               #1-4, 2-4, 2-5
        cent3 <- polyCent$centers[[4]]
        indClose <- which(cent3 %in% (round(polyCl$clPeaks*2)/2))
        if (length(indClose)==1){
          if (closerAA){                                #clusters closer to theta=0
            indClose <- which(cent3 %in% (floor(pmax(polyCl$clPeaks,0)*2)/2))
          }else{                                        #clusters closer to theta=1
            indClose <- which(cent3 %in% (ceiling(pmin(polyCl$clPeaks,1)*2)/2))
          }
        }
        cent3[indClose] <- polyCl$clPeaks
        if (closerAA){
          ii2 <- 5
        }else{
          ii2 <- 6
        }
        cent3a <- polyCent$centers[[ii2]]
        indClose1 <- which(rank((cent3a-polyCl$clPeaks[order(polyCl$clSizes)[2]])^2)==1)
        cent3a[indClose1] <- polyCl$clPeaks[order(polyCl$clSizes)[2]]
        centers[1:2] <- list(cent3,cent3a)
        ix[1:2] <- c(4,ii2)
      }
      vec1 <- unlist(polyCent$centers[i1])
      centers[[3]] <- sum(polyCl$clPeaks*polyCl$clSizes)/sum(polyCl$clSizes)
      sqDev1 <- (vec1 - centers[[3]])^2
      ix[3] <- which(i1)[order(sqDev1)[1]]
    }else if (polyCl$nCl==3){
      ix <- numeric(3)
      arr3 <- t(array(unlist(polyCent$centers[i3]),dim=c(sum(i3),3)))
      indx <- apply(arr3,1,function(x,clPeaks,lim) max(abs(x-clPeaks))<=lim,polyCl$clPeaks,gO$devCentLim)  #Don't attempt if devCentLim is exceeded
      if (any(indx)){
        arr3 <- matrix(arr3[indx,],nrow=sum(indx),byrow=TRUE)
        sqDev3 <- apply(arr3,1,function(x) sum((x-polyCl$clPeaks)^2))/c(2,1,1)[indx] #alt: apply(arr3,1,function(x) diff(range(x)))
        ix[1] <- which(i3)[indx][order(sqDev3)[1]]
        centers[[1]] <- polyCl$clPeaks
      }else if (diff(range(theta))<.25){
        vec1 <- unlist(polyCent$centers[i1])
        centers[[1]] <- sum(polyCl$clPeaks*polyCl$clSizes)/sum(polyCl$clSizes)
        sqDev1 <- (vec1 - centers[[1]])^2/c(2,1,2)   #Disfavour PSVs
        ix[1] <- which(i1)[order(sqDev1)[1]]
      }else{
        ix[1] <- 7
        centers[[1]] <- c(0,polyCl$clPeaks,1)
      }
      breaks1 <- seq(breaks[1],max(breaks),gO$binWidth/2)
      polyCl1 <- findClusters(theta=theta,breaks=breaks1,minBin=gO$minBin)
      if (polyCl1$nCl==5){
        centers[[2]] <- polyCl1$clPeaks
      }else{
        centers[[2]] <- seq(polyCl$clPeaks[1],polyCl$clPeaks[3],diff(range(polyCl$clPeaks))/4)
      }
      ix[2] <- 7
      
      diffPeaks <- diff(polyCl$clPeaks)
      iSame <- c(diffPeaks<=min(diffPeaks),FALSE)
      if (sum(iSame)==1){
        cent3 <- polyCent$centers[[4]]
        clPeaksRed <- polyCl$clPeaks[!iSame]
        indx <- which(iSame)
        clPeaksRed[indx] <- mean(polyCl$clPeaks[indx:(indx+1)])
        indClose <- which(cent3 %in% (round(clPeaksRed*2)/2))
        if (length(indClose)==1){
          closerAA <- sum((clPeaksRed)^2) < sum((clPeaksRed-1)^2)
          if (closerAA){                                #clusters closer to theta=0
            indClose <- c(1,2)
          }else{                                        #clusters closer to theta=1
            indClose <- c(2,3)
          }
        }
        cent3[indClose] <- clPeaksRed
        centers[[3]] <- cent3
        ix[3] <- 4
      }else{
        vec1 <- unlist(polyCent$centers[i1])
        centers[[3]] <- sum(polyCl$clPeaks*polyCl$clSizes)/sum(polyCl$clSizes)
        sqDev1 <- (vec1 -  centers[[3]])^2
        ix[3] <- which(i1)[order(sqDev1)[1]]
      }
    }else if (polyCl$nCl==4){
      cent5 <- polyCent$centers[[7]]
      #minDist <- tapply(cent5,1:5,function(x,clPeaks) min((x-clPeaks)^2),polyCl$clPeaks)
      minDist <- sapply(cent5,function(x,clPeaks) min((x-clPeaks)^2),polyCl$clPeaks)
      indClose <- !rank(minDist,ties.method='max')==5
      if (sum(indClose)<4){
        if (sum((polyCl$clPeaks)^2) < sum((polyCl$clPeaks-1)^2)){   #clusters closer to theta=0
          indClose <- c(rep(TRUE,4),FALSE)
        }else{                                        #clusters closer to theta=1
          indClose <- c(FALSE,rep(TRUE,4))
        }
      }
      cent5[indClose] <- polyCl$clPeaks
      centers[[1]] <- cent5
      breaks1 <- seq(breaks[1],max(breaks),gO$binWidth/2)
      polyCl1 <- findClusters(theta=theta,breaks=breaks1,minBin=gO$minBin)
      if (polyCl1$nCl==5){
        centers[[2]] <- polyCl1$clPeaks
      }else{
        centers[[2]] <- seq(polyCl$clPeaks[1],polyCl$clPeaks[4],diff(range(polyCl$clPeaks))/4)
      }
      diffPeaks <- diff(polyCl$clPeaks)
      iSame <- c(diffPeaks<=min(diffPeaks),FALSE)
      if (sum(iSame)==1){
        clPeaksRed <- polyCl$clPeaks[!iSame]
        indx <- which(iSame)
        clPeaksRed[indx] <- mean(polyCl$clPeaks[indx:(indx+1)])
      }else{
        clPeaksRed <- c(polyCl$clPeaks[1],mean(polyCl$clPeaks[2:3]),polyCl$clPeaks[4])
      }
      centers[[3]] <- clPeaksRed
      arr3 <- t(array(unlist(polyCent$centers[i3]),dim=c(sum(i3),3)))
      sqDev3 <- apply(arr3,1,function(x) sum((x-clPeaksRed)^2))/c(2,1,1) #alt: apply(arr3,1,function(x) diff(range(x)))
      ix <- c(7,7,which(i3)[order(sqDev3)[1]])
    }else if (polyCl$nCl==5){
      centers[[1]] <- polyCl$clPeaks
      #centers[[2]] <- seq(0,1,.25)
      centers[[2]] <- c(polyCl$clPeaks[1],mean(polyCl$clPeaks[2:4]),polyCl$clPeaks[5])
      ix <- c(7,4)
    }else{
      ix <- c(4,7)
      centers <- polyCent$centers[ix]
    } #end if polyCl$nCl
  }else{
    ix <- 1:length(polyCent$classification)
    centers <- polyCent$centers
    warning('Only ploidy="tetra" is implemented. A non-ranked list of starting points
is returned for other ploidy, however accurate clustering is NOT expected')
  #}else if (gO$ploidy%in%'di'){
  #  i1 <- polyCent$size==1
  #  i3 <- polyCent$size==3
  #  if (polyCl$nCl==1){
  #    
  #  }else if (polyCl$nCl==2){
  #    
  #  }else if (polyCl$nCl==3){
  #    
  #  }else{
  #    
  #  } #end if polyCl$nCl==1
  } #end if polyCl$nCl==0
  sConf <- list(ix=ix,centers=centers)
  sConf
}



getSpecificCenters <- function(theta,classification,gO=setGenoOptions(),breaks=seq(-.25,1.25,gO$binWidth),polyCent=generatePolyCenters(ploidy=gO$ploidy)){
##### for testing:
  #breaks <- seq(-.25,1.25,.1)
  #theta <- assayData(BSRed)$theta[8,]; theta <- theta[theta>breaks[1]&theta<max(breaks)]
  #hX <- hist(theta,breaks=breaks,plot=TRUE,ylim=c(0,50));abline(h=gO$minBin-1)
#####
  
  theta <- theta[theta>breaks[1] & theta<max(breaks)]
  nSamp <- length(theta)
  polyCl <- findClusters(theta=theta,breaks=breaks,minBin=gO$minBin)
  iConf <- which(polyCent$classification %in% classification)
  if (polyCl$nCl==polyCent$size[iConf]){
    return(polyCl$clPeaks)
  }else if (polyCl$nCl<polyCent$size[iConf]){
    breaks1 <- seq(breaks[1],max(breaks),gO$binWidth/2)
    polyCl1 <- findClusters(theta=theta,breaks=breaks1,minBin=gO$minBin)
    if (polyCl1$nCl==polyCent$size[iConf]){
      return(polyCl1$clPeaks)
    }
  }else{
    breaks1 <- seq(breaks[1],max(breaks),gO$binWidth*2)
    polyCl1 <- findClusters(theta=theta,breaks=breaks1,minBin=gO$minBin)
    if (polyCl1$nCl==polyCent$size[iConf]){
      return(polyCl1$clPeaks)
    }
  }
  return(polyCent$centers[[iConf]])
}


  
#Suggests clusters based on histograms
findClusters <- function(theta,breaks=seq(-.25,1.25,.05),minBin=2,plot=FALSE){
  hX <- hist(theta,breaks=breaks,plot=plot)
  spec <- hX$counts
  spec[spec<minBin] <- 0
  spec <- c(0,spec,0)
  indPeaks <- which(apply(embed(spec,3),1,function(x) rank(x)[2]>2))
  clPeaks <- hX$mids[indPeaks]
  iSame <- c(diff(indPeaks)==1,FALSE)
  if (any(iSame)){
    clPeaksRed <- clPeaks[!iSame]
    indx <- which(iSame)
    for (i in 1:sum(iSame)){
      clPeaksRed[indx[1]+1-i] <- mean(clPeaks[indx[1]:(indx[1]+1)])
      indx <- indx[-1]
    }
    clPeaks <- clPeaksRed
  }
  nCl <- length(clPeaks)
  clSizes <- apply(matrix(embed(c(0,hX$counts,0),3)[indPeaks[!iSame],],nrow=nCl),1,sum)
  polyCl <- list(clPeaks=clPeaks,clSizes=clSizes,nCl=nCl)
  polyCl
}



findPolyploidClusters <- function(X,indSE=rep(TRUE,nrow(X)),centers,plot=FALSE,wss.update=TRUE,...){
  clObj <- NULL
  nCl <- length(centers)
  if (sum(indSE)>=nCl+1){   #Requires more samples than groups
    breaks <- c(-2,-.25,centers[-nCl]+diff(centers)/2,1.25,2)
    hX <- hist(X[indSE,1],breaks=breaks,plot=plot,...)
    indCall <- hX$counts[-c(1,nCl+2)]>0
  }else{
    stop('k-means algorithm requires more samples than cluster-centers')
  }
  centers <- cbind(centers,rep(NA,nCl))
  if (sum(indCall)>1){
    for (i in which(indCall)){
      indi <- X[indSE,1]>hX$breaks[i+1] & X[indSE,1]<=hX$breaks[i+2]+1e-7  #Accounts for numerical tolerance of 'hist'
      centers[i,2] <- median(X[indSE,2][indi],na.rm=TRUE)
    }
    clObj <- try(kmeans(X[indSE,1:2],centers[indCall,]),silent=TRUE)
    if (inherits(clObj, "try-error")){
      message(clObj)
      warning('Hartigan-Wong clustering failed for one marker (kmeans).\n  Will attempt MacQueen-algorithm instead.')
      clObj <- kmeans(X[indSE,1:2],centers[indCall,],algorithm='MacQueen')
    }
  }else if (sum(indCall)==1){
    clObj$centers <- matrix(apply(X[indSE,1:2],2,median,na.rm=TRUE),nrow=1)
    clObj$cluster <- rep(1,sum(indSE))
  }else{
    stop('No clusters found')
  }

  ## Assign left-outs to clusters
  clCluster <- rep(NA,nrow(X))
  clCluster[indSE] <- which(indCall)[clObj$cluster]
  centers[indCall,] <- clObj$centers
  centers[!indCall,2] <- mean(X[!indSE,2],na.rm=TRUE)
  if (!all(indSE)){
    #dAll <- (matrix(clObj$centers[,1],nrow=sum(!indSE),ncol=sum(indCall),byrow=TRUE) - matrix(X[!indSE,1],nrow=sum(!indSE),ncol=sum(indCall)))^2 + (matrix(clObj$centers[,2],nrow=sum(!indSE),ncol=sum(indCall),byrow=TRUE) - matrix(X[!indSE,2],nrow=sum(!indSE),ncol=sum(indCall)))^2
    dAll <- (matrix(centers[,1],nrow=sum(!indSE),ncol=nCl,byrow=TRUE) - matrix(X[!indSE,1],nrow=sum(!indSE),ncol=nCl))^2 + (matrix(centers[,2],nrow=sum(!indSE),ncol=nCl,byrow=TRUE) - matrix(X[!indSE,2],nrow=sum(!indSE),ncol=nCl))^2
    clCluster[!indSE] <- apply(dAll,1,function(x) which(rank(x,ties.method='random')==1))
  }
  #for (a in 1:nCl){
  #  if (!indCall[a]){
  #    clCluster[clCluster>=a] <- clCluster[clCluster>=a] + 1
  #  }
  #}
  clObj$cluster <- clCluster

  ## Update cluster-centers etc.
  clObj$centers <- matrix(NA,nrow=nCl,ncol=2)
  newCent1 <- tapply(X[,1],clCluster,median,na.rm=TRUE) #NB! median
  newCent2 <- tapply(X[,2],clCluster,median,na.rm=TRUE) #NB! median
  clObj$centers[as.numeric(names(newCent1)),] <- cbind(newCent1,newCent2)
  
  clObj$size <- rep(0,nCl)
  newSizes <- tapply(clCluster,clCluster,length)
  clObj$size[as.numeric(names(newSizes))] <- newSizes

  if (sum(!indSE)>0){
    clObj$withinss <- rep(NA,nCl)
    if (wss.update){   #Save time by not calculating withinss
      ws1 <- tapply(X[,1],clCluster,function(x) (x-rep(mean(x,na.rm=TRUE),length(x)))^2)
      ws2 <- tapply(X[,2],clCluster,function(x) (x-rep(mean(x,na.rm=TRUE),length(x)))^2)
      newWss <- rep(NA,nCl)
      for (i in 1:nCl){
        newWss[i] <- sum(ws1[[i]] + ws2[[i]])
      }
      clObj$withinss[as.numeric(names(ws1))] <- newWss
    }
  }
  class(clObj) <- 'kmeans'
  clObj
}



#Null hypothesis: The population is in Hardy-Weinberg frequencies.
#Different allele-frequencies for testing tetraploid loci given in afList.
#A value of afList=.5 means that the AF is the same at both loci.
testHardyWeinberg <- function(sizes,bestConf,polyCent=generatePolyCenters(ploidy='di'),afList=seq(0,.5,.05)){
  if (is.character(bestConf))
    bestConf <- which(polyCent$classification==bestConf)
  nPheno <- polyCent$size[bestConf]
  nTot <- sum(sizes)
  af <- rep(NA,2)
  if (nPheno==1){
    chiSq <- 0
    df <- 1   #dummy-variable
    if (bestConf==1){
      af[1] <- 1
    }else if (bestConf==2){
      af <- 0:1
    }else if (bestConf==3){
      af[1] <- 0
    }
  }else if (nPheno==3){
    p <- (2*sizes[1]+sizes[2])/(2*nTot)
    if (any(p==c(0,1))){
      chiSq <- 0
    }else{
      q <- 1-p
      expCounts <- rep(NA,nPheno)
      expCounts[1] <- p^2*nTot          #AA
      expCounts[2] <- 2*p*q*nTot        #AB
      expCounts[3] <- q^2*nTot          #BB
      chiSq <- sum((sizes-expCounts)^2/expCounts)
    }
    df <- 1
    af[1] <- p
    if (bestConf==5){
      af[2] <- 1
    }else if (bestConf==6){
      af[2] <- 0
    }
  }else if (nPheno==5){
    p <- (4*sizes[1]+3*sizes[2]+2*sizes[3]+sizes[4])/(4*nTot)  #'sizes' contains the 5 cluster-sizes
    if (any(p==c(0,1))){
      chiSq <- 0
      af <- rep(p,2)
    }else{
      iLim <- min(1-1/(2*p),1/(2*p))
      if (any(afList<iLim))  #Negative af's not allowed
        afList <- c(iLim,afList[afList>iLim])
      nTry <- length(afList)   #Try different allowed weights between p1 and p2
      expCounts <- matrix(nrow=nTry,ncol=nPheno)
      chiSqs <- rep(Inf,nTry)
      afs <- matrix(nrow=nTry,ncol=2)
      for (i in 1:nTry){
        p1 <- 2*p*afList[i]
        p2 <- 2*p*(1-afList[i])   #mean(AFs)=p (AFs sum to 2*p)
        if (all(c(p1,p2)<=1)){
          q1 <- 1-p1
          q2 <- 1-p2
          expCounts[i,1] <- p1^2*p2^2*nTot              #AAAA
          expCounts[i,2] <- 2*p1*p2*(p1*q2+p2*q1)*nTot      #AAAB
          expCounts[i,3] <- (p1^2*q2^2+p2^2*q1^2+4*p1*q1*p2*q2)*nTot #AABB
          expCounts[i,4] <- 2*q1*q2*(p1*q2+p2*q1)*nTot      #ABBB
          expCounts[i,5] <- q1^2*q2^2*nTot          #BBBB
          indi <- expCounts[i,]>0
          chiSqs[i] <- sum((sizes[indi]-expCounts[i,indi])^2/expCounts[i,indi])
          afs[i,] <- c(p1,p2)
        }
      }
      sChi <- sort(chiSqs,index.return=TRUE)
      chiSq <- chiSqs[sChi$ix[1]]
      af <- afs[sChi$ix[1],]
    }
    df <- 3    #Number of genotypes/clusters - number of alleles
  }else{
    stop(paste('HW-testing not implemented for',nPheno,'cluster(s)'))
  }
  pVal <- pchisq(chiSq,df=df,lower.tail=FALSE)
  HWstats <- data.frame(chiSq=chiSq,df=df,pVal=pVal,af1=af[1],af2=af[2])
  HWstats
}



#Validates BSRed, returns updated BSRed
#PedigreeID given as '<p><mmm><fff><oo>', where 'p', 'mmm', 'fff' and 'oo' are unique
#identifiers for population, mother, father, and individual within full-sib group,
#respectively. '000' means founding parent, '999' means unknown.
validateCallsPedigree <- function(BSRed){
  assayData(BSRed)$ped.check <- matrix(nrow=nrow(BSRed),ncol=ncol(BSRed),dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
  if (!'PedigreeID' %in% colnames(pData(BSRed))){
    stop('PedigreeID not provided!')
  }
  sampIDs <- pData(BSRed)$PedigreeID
  indFamily <- unique(substr(sampIDs,1,7)[!sampIDs%in%''])
  indFounding <- substr(indFamily,2,4)=='000' | substr(indFamily,5,7)=='000'
  indFamily <- indFamily[!indFounding]
  nFull <- length(indFamily)
  
  #Establish correct classification and adjust MSV3-calls to diploid
  callType <- fData(BSRed)$Classification
  if (!is.null(fData(BSRed)$Manual.Calls.R)){
    iManual <- !fData(BSRed)$Manual.Calls.R%in%''
    callType[iManual] <- fData(BSRed)$Manual.Calls.R[iManual]
  }
  callMatr <- assayData(BSRed)$call
  callMatr[callType%in%'MSV-a',] <- callMatr[callType%in%'MSV-a',]*2
  callMatr[callType%in%'MSV-b',] <- callMatr[callType%in%'MSV-b',]*2-1

  for (j in 1:nFull){
    jf <- sampIDs %in% paste(substr(indFamily[j],1,1),'000',substr(indFamily[j],5,7),'00',sep='')
    jm <- sampIDs %in% paste(substr(indFamily[j],1,4),'00000',sep='')
    j.fam <- substr(sampIDs,1,7) %in% indFamily[j] &! (jf|jm)
    pCalls <- matrix(callMatr[,jf|jm],nrow=nrow(BSRed))
    oCalls <- matrix(callMatr[,j.fam],nrow=nrow(BSRed))
    if (ncol(pCalls)==1){
      assayData(BSRed)$ped.check[,j.fam] <- oCalls>=rep(pCalls,sum(j.fam))-.5 & oCalls<=rep(pCalls,sum(j.fam))+.5
    }else if (ncol(pCalls)==2){
      indNA <- apply(pCalls,1,function(x) any(is.na(x)))
      indLH <- pCalls[,1]<=.5 & pCalls[,2]>=.5 & !indNA
      assayData(BSRed)$ped.check[indLH,j.fam] <- oCalls[indLH,]<=rep(pCalls[indLH,1],sum(j.fam))+.5 & oCalls[indLH,]>=rep(pCalls[indLH,2],sum(j.fam))-.5
      indHL <- pCalls[,2]<=.5 & pCalls[,1]>=.5 & !indLH & !indNA
      assayData(BSRed)$ped.check[indHL,j.fam] <- oCalls[indHL,]<=rep(pCalls[indHL,2],sum(j.fam))+.5 & oCalls[indHL,]>=rep(pCalls[indHL,1],sum(j.fam))-.5
      indL <- which(apply(pCalls,1,function(x) all(x<.5)))
      assayData(BSRed)$ped.check[indL,j.fam] <- oCalls[indL,]<=apply(matrix(pCalls[indL,],nrow=length(indL),ncol=2),1,sum)
      indH <- which(apply(pCalls,1,function(x) all(x>.5)))
      assayData(BSRed)$ped.check[indH,j.fam] <- oCalls[indH,]>=apply(matrix(pCalls[indH,],nrow=length(indH),ncol=2),1,sum)-1
      indNA1 <- apply(pCalls,1,function(x) sum(is.na(x))==1)
      pRed <- t(pCalls[indNA1,])[!is.na(t(pCalls[indNA1,]))]
      assayData(BSRed)$ped.check[indNA1,j.fam] <- oCalls[indNA1,]>=rep(pRed,sum(j.fam))-.5 & oCalls[indNA1,]<=rep(pRed,sum(j.fam))+.5
    }else if (ncol(pCalls)>2){
      stop('More than two parents. Genotyped repeatedly, or ped-error?')
    }
  }
  nErrors <- apply(assayData(BSRed)$ped.check,1,function(x) sum(!x,na.rm=TRUE))
  fData <- data.frame(Ped.Errors=nErrors)
  fMetadata <- data.frame(labelDescription='Number of pedigree-errors, summed over offspring')
  featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
  indx <- !colnames(fData(BSRed))%in%'Ped.Errors'
  featureData(BSRed) <- combine(featureData(BSRed)[,indx], featureData)
  validObject(BSRed)
  BSRed
}



#Calculates the ratio of non-FAILed markers which are called for each array.
#If ped-error flag is TRUE, calls violating pedigree count as missing. 
countFailedSNP <- function(BSRed,inclPedErrors=TRUE){
  classMatr <- assayData(BSRed)$call
  if (inclPedErrors & ! is.null(assayData(BSRed)$ped.check)){
    classMatr[!assayData(BSRed)$ped.check] <- NA
    lStr <- 'Ratio of passed markers (ped-errors count as failed)'
  }else{
    lStr <- 'Ratio of passed markers (ped-errors not considered)'
  }
  iPass <- !fData(BSRed)$Classification %in% c('FAIL','MONO-filt')
  nErrors <- apply(classMatr[iPass,],2,function(x) sum(is.na(x)))
  pData <- data.frame(passRatio = 1-nErrors/sum(iPass))
  pMetadata <- data.frame(labelDescription = lStr)
  phenoData <- new('AnnotatedDataFrame', data=pData, varMetadata=pMetadata, dimLabels=c('sampleNames','sampleColumns'))
  phenoData(BSRed) <- combine(phenoData(BSRed),phenoData)
  validObject(BSRed)
  BSRed
}



#Plotting function
plotGenotypes <- function(BSRed,markers=1:min(nrow(BSRed),64),indHighlight=NULL,ploidy='tetra',indicate.SE=FALSE,retFrames=FALSE,nC=NULL,mai=NULL,mNoise=NULL,main=NULL){
  nSNPs <- length(markers)
  pp <- par()
  if (is.null(nC)){
    nC <- ceiling(sqrt(nSNPs))
  }
  nR <- ceiling(nSNPs/nC)
  if (is.null(mai)){
    mai <- c(1,1,1,.5)/sqrt(min(nC,nR))
  }
  par(mfrow=c(nR,nC),mai=mai,bg='white')
  
  Theta <- matrix(assayData(BSRed)$theta[markers,],nrow=nSNPs)
  R <- matrix(assayData(BSRed)$intensity[markers,],nrow=nSNPs)
  Call <- matrix(assayData(BSRed)$call[markers,],nrow=nSNPs)
  if (indicate.SE)
    callProb <- 1/matrix(assayData(BSRed)$SE[markers,],nrow=nSNPs)
  pedCheckP <- assayData(BSRed)$ped.check.parents[markers,]
  if (!is.null(pedCheckP)){  #Use 'ped.check.parents' only if valid for all markers
    classR <- fData(BSRed)$Manual.Calls.R[markers]
    if (is.null(classR)){
      ii <- rep(TRUE,nSNPs)
    }else{
      ii <- !classR%in%'FAIL'
    }
    useP <- !any(apply(is.na(pedCheckP[ii,]),1,all))
  }else{
    useP <- FALSE
  }
  if (!useP){ #Use 'ped.check' rather than 'ped.check.parents'
    pedCheck <- assayData(BSRed)$ped.check[markers,]
    if (!is.null(pedCheck)){
      pedCheck <- matrix(pedCheck,nrow=nSNPs)
      pedCheck[is.na(pedCheck)] <- TRUE
      pedParents <- matrix(0,nrow=nSNPs,ncol=ncol(BSRed))
      pedParents[!pedCheck] <- 1
    }else{
      pedParents <- NULL
    }
  }else{
    pedParents <- NULL
  }
  if (is.null(mNoise)){
    mNoise <- rep(mean(pData(BSRed)$noiseIntensity,na.rm=TRUE),nSNPs)
  }
  hetLine <- c(.5,.5,0,1e6)
  cex <- 50/max(50,min(ncol(BSRed),200))   #No larger than one, no smaller than 1/4
  if (!is.null(indHighlight))
    cexhl <- 50/max(50,min(length(indHighlight),100))
  
  switch(ploidy,
         di={
           stop('ploidy=\'di\' not implemented')
         },
         tetra={
           palette(c('red','magenta','blue','cyan','seagreen','black','purple','lightblue'))
           indTriplets <- Call==1/3 | Call==2/3 | is.na(Call)
           Call[!indTriplets] <- Call[!indTriplets]*4 + 1
           Call[is.na(Call)] <- 6
           Call[Call==1/3] <- 7
           Call[Call==2/3] <- 8
           for (i in 1:nSNPs){
             xylim <- c(-0.6,1.6,0,max(R[i,],na.rm=TRUE))
             plot.new()
             plot.window(xylim[1:2],xylim[3:4]); axis(1); axis(2)
             if (indicate.SE){
               points(Theta[i,],R[i,],pch=16,col=Call[i,],cex=callProb[i,]*2/max(callProb[1,],na.rm=TRUE))
             }else if (!is.null(indHighlight)){
               points(Theta[i,-indHighlight],R[i,-indHighlight],pch=16,cex=cex,col=Call[i,-indHighlight])
               points(Theta[i,indHighlight],R[i,indHighlight],pch=16,cex=cexhl,col='yellow2')
             }else{
               points(Theta[i,],R[i,],pch=16,cex=cex,col=Call[i,])
               if (useP){
                 points(Theta[i,pedCheckP[i,]%in%1],R[i,pedCheckP[i,]%in%1],pch=4,lwd=3,col='yellow2')
                 points(Theta[i,pedCheckP[i,]%in%2],R[i,pedCheckP[i,]%in%2],pch=1,lwd=3,col='yellow2')
               }else if (!is.null(pedCheck)){
                 points(Theta[i,!pedCheck[i,]],R[i,!pedCheck[i,]],pch=4,lwd=3,col='yellow2')
                 famInd <- unique(substr(pData(BSRed)$PedigreeID[!pedCheck[i,]],1,7))
                 for (j in 1:length(famInd)){
                   jf <- pData(BSRed)$PedigreeID %in% paste(substr(famInd[j],1,1),'000',substr(famInd[j],5,7),'00',sep='')
                   jm <- pData(BSRed)$PedigreeID %in% paste(substr(famInd[j],1,4),'00000',sep='')
                   points(Theta[i,jf|jm],R[i,jf|jm],pch=1,lwd=3,col='yellow2')
                   pedParents[i,jf|jm] <- 2
                 }
               }
             }
             if (is.null(main)){
               title(main=featureNames(BSRed)[markers[i]],xlab='theta',ylab='intensity')
             }else{
               title(main=main[i],xlab='theta',ylab='intensity')
             }
             lines(c(0,0),xylim[3:4],col='gray')
             lines(c(1,1),xylim[3:4],col='gray')
             lines(hetLine[1:2],hetLine[3:4],col='gray')
             lines(xylim[1:2],rep(mNoise[i],2),col='red')
           }},
         pooled={
           stop('ploidy=\'pooled\' not implemented')
         }
         ) #end switch
  par(pp[c('mfrow','mai')])
  if (retFrames){
    xy <- list()
    for (i in 1:nSNPs){
      xy[[i]] <- data.frame(Theta=Theta[i,],R=R[i,],Call=Call[i,],pedErrors=pedParents[i,])
    }
    xy
  }
}



#Interactively define clusters for a given marker.
#Optional parameter gg is an instance of a GObject
manualCall <- function(marker,cntIdeal,classification,gg=NULL,close.gg=TRUE){
  nCl <- length(cntIdeal)
  if (is.null(marker$Call))
    marker$Call <- rep(NA,nrow(marker))
  if (is.null(marker$PedCheck) & !is.null(marker$PedigreeID))
    marker$PedCheck <- validateSingleCall(marker,classification)
  if (is.null(gg)){
    gg <- ggobi(marker[,c('Theta','R')],name='marker')
  }else{
    if (!'GGobi'%in%class(gg) | nrow(gg$marker)!=nrow(marker))
      stop('Arguments \'marker\' and \'gg\' not compatible')
  }
  dd <- displays(gg)[[1]]
  imode(dd) <- 'Brush'
  str.3 <- sprintf('Modify which cluster? (in [1,%d], or use "8" to assign NAs) ',nCl)
  str.8 <- sprintf("Brush calls to erase, OR
press <1> to return: ")
  str.end <- 'Satisfied with assignments? (y/n) '
  glc <- unlist(as.numeric(marker$Call)*4+1)
  glc[is.na(glc)] <- rep(8,sum(is.na(glc)))
  glyph_colour(gg$marker) <- glc
  glt <- glyph_type(gg$marker) <- rep(6,nrow(marker))
  if (!is.null(marker$PedigreeID)){
    iErr <- marker$PedCheck>0
    glyph_colour(gg$marker)[iErr] <- 5 + unlist(marker$PedCheck[iErr])
    glyph_type(gg$marker)[iErr] <- 7
  }
  OK <- FALSE
  while (!OK){
    cl.i <- 1
    shadowed(gg$marker) <- rep(FALSE,nrow(marker))
    while (cl.i<=nCl){
      str.i <- sprintf("Brush points belonging to cluster %d (of %d), OR
press <1> to skip cluster, OR
press <2> to skip remaining clusters, OR
press <3> to modify a specific cluster or set points to NA, OR
press <4> to start from scratch: ",cl.i,nCl)
      actn <- readline(prompt=str.i)
      if (actn=='1'){
        cl.i <- cl.i + 1
      }else if (actn=='2'){
        cl.i <- nCl + 1
      }else if (actn=='3'){
        OK.3 <- FALSE
        while (!OK.3){
          cl.i <- as.numeric(readline(str.3))
          OK.3 <- cl.i %in% c(1:nCl,8)
        } #end while OK.3
      }else if (actn=='4'){
        cl.i <- 1
        glc <- glyph_colour(gg$marker) <- rep(8,nrow(marker))
        marker$Call <- rep(NA,nrow(marker))
        if (!is.null(marker$PedCheck))
          marker$PedCheck <- rep(0,nrow(marker))
      }else{
        iCluster <- selected(gg$marker)
        marker$Call[iCluster] <- rep(cntIdeal[cl.i],sum(iCluster)) #(glc[iCluster]-1)/4
        glc[iCluster] <- rep(cntIdeal[cl.i]*4+1,sum(iCluster))
        assignedCl <- unique(glc[glc<=5])
        glyph_colour(gg$marker) <- glc
        if (!is.null(marker$PedigreeID) & length(assignedCl)>1){
          selM <- unique(substr(marker$PedigreeID[iCluster],1,4))
          selM <- selM[!substr(selM,2,4)%in%c('000','999')]
          redF <- paste(substr(marker$PedigreeID,1,1),substr(marker$PedigreeID,5,7),sep='')
          selF <- unique(redF[iCluster])
          selF <- selF[!substr(selF,2,4)%in%c('000','999')]
          indSel <- substr(marker$PedigreeID,1,4)%in%selM | redF%in%selF | marker$PedCheck>0
          marker$PedCheck[indSel] <- validateSingleCall(marker[indSel,],classification)
          iErr <- marker$PedCheck>0
          glyph_colour(gg$marker)[iErr] <- 5 + unlist(marker$PedCheck[iErr])
          glyph_type(gg$marker) <- glt
          glyph_type(gg$marker)[iErr] <- 7
        } #end if is.null
        cl.i <- cl.i + 1
      } #end if actn
    } #end while cl.i
    
    if (cl.i==8){
      actn <- readline(prompt=str.8)
      if (actn=='1'){
      }else{
        iErase <- selected(gg$marker)
        glc[iErase] <- rep(8,sum(iErase))
        marker$Call[iErase] <- rep(NA,sum(iErase))
        glyph_colour(gg$marker) <- glc
        if (!is.null(marker$PedCheck)){
          indSel <- marker$PedCheck>0
          marker$PedCheck[indSel] <- validateSingleCall(marker[indSel,],classification)
          iErr <- marker$PedCheck>0
          glyph_colour(gg$marker)[iErr] <- 5 + unlist(marker$PedCheck[iErr])
          glyph_type(gg$marker) <- glt
          glyph_type(gg$marker)[iErr] <- 7
        } #end if is.null
      } #end if actn
    }else{
      nErr <- sum(marker$PedCheck>0)
      if (nErr>0){
        shadowed(gg$marker) <- glyph_colour(gg$marker)%in%c(1:5,8)
        str.ped <- sprintf('Warning: %d ped-errors!
Brush points to un-assign, OR
press <1> to un-assign all erroneous offspring, OR
press <2> to modify clusters, OR
press <3> to disregard errors and finish: ',nErr)
        actn <- readline(prompt=str.ped)
        if (actn=='1'){
          iErase <- marker$PedCheck==1
          glc[iErase] <- rep(8,sum(iErase))
          marker$Call[iErase] <- rep(NA,sum(iErase))
          marker$PedCheck[marker$PedCheck>0] <- 0
          glyph_colour(gg$marker) <- glc
          glyph_type(gg$marker) <- glt
          #OK <- TRUE
        }else if (actn=='2'){
        }else if (actn=='3'){
          OK <- TRUE
        }else{
          iErase <- selected(gg$marker)
          glc[iErase] <- rep(8,sum(iErase))
          marker$Call[iErase] <- rep(NA,sum(iErase))
          glyph_colour(gg$marker) <- glc
          if (!is.null(marker$PedCheck)){
            indSel <- marker$PedCheck>0
            marker$PedCheck[indSel] <- validateSingleCall(marker[indSel,],classification)
            iErr <- marker$PedCheck>0
            glyph_colour(gg$marker)[iErr] <- 5 + unlist(marker$PedCheck[iErr])
            glyph_type(gg$marker) <- glt
            glyph_type(gg$marker)[iErr] <- 7
          } #end if is.null
        } #end if actn
      }else{
        finished <- readline(prompt=str.end)
        OK <- finished %in% c('y','Y','yes','Yes','YES','1','T','TRUE')
      } #end if nErr
    } #end if cl.i
  } #while !OK
  if (close.gg){
    close(gg)
    return(marker)
  }else{
    shadowed(gg$marker) <- rep(FALSE,nrow(marker))
    gg$marker$Call <- as.numeric(marker$Call)
    gg$marker$PedCheck <- marker$PedCheck
    return(gg)
  } #end if close.gg
}



#Validates a single SNP/marker, returns vector of pedErrors (o=1,p=2) 
validateSingleCall <- function(marker,classification){
  #indFamily <- unique(substr(marker$PedigreeID,2,7))
  #indFounding <- substr(indFamily,1,3)=='000' | substr(indFamily,4,6)=='000'
  indFamily <- unique(substr(marker$PedigreeID,1,7))
  indFamily <- indFamily[!indFamily%in%'']
  indFounding <- substr(indFamily,2,4)=='000' | substr(indFamily,5,7)=='000'
  indFamily <- indFamily[!indFounding]
  nFull <- length(indFamily)
  pedErrors <- rep(0,nrow(marker))
  if (classification%in%'MSV-a'){
    callVec <- marker$Call*2
  }else if (classification%in%'MSV-b'){
    callVec <- marker$Call*2-1
  }else{
    callVec <- marker$Call
  }
  for (j in 1:nFull){
    #jf <- marker$PedigreeID==paste('1000',substr(indFamily[j],4,6),'00',sep='')
    #jm <- marker$PedigreeID==paste('1',substr(indFamily[j],1,3),'00000',sep='')
    #j.fam <- substr(marker$PedigreeID,2,7)==indFamily[j] &! (jf|jm)
    jf <- marker$PedigreeID %in% paste(substr(indFamily[j],1,1),'000',substr(indFamily[j],5,7),'00',sep='')
    jm <- marker$PedigreeID %in% paste(substr(indFamily[j],1,4),'00000',sep='')
    j.fam <- substr(marker$PedigreeID,1,7) %in% indFamily[j] &! (jf|jm)
    bothP <- c(which(jf),which(jm))
    pCalls <- sort(callVec[bothP][!is.na(callVec[bothP])],index.return=TRUE)
    oCalls <- callVec[j.fam]
    if (length(pCalls$x)==1){
      iErr <- (oCalls<pCalls$x-.5 | oCalls>pCalls$x+.5) &! is.na(oCalls)
      if (any(iErr)){
        pedErrors[j.fam][iErr] <- 1
        pedErrors[bothP] <- 2
      } #if any
    }else if (length(pCalls$x)==2){
      if (pCalls$x[1]<=.5 & pCalls$x[2]>=.5){    #Alt. 1
        iHi <- oCalls>pCalls$x[1]+.5 &! is.na(oCalls)
        if (any(iHi)){
          pedErrors[j.fam][iHi] <- 1
          pedErrors[bothP[pCalls$ix[1]]] <- 2
        } #if any
        iLo <- oCalls<pCalls$x[2]-.5 &! is.na(oCalls)
        if (any(iLo)){
          pedErrors[j.fam][iLo] <- 1
          pedErrors[bothP[pCalls$ix[2]]] <- 2
        } #if any
      }else if(all(pCalls$x<.5)){    #Alt. 2
        iHi <- oCalls>sum(pCalls$x) &! is.na(oCalls)
        if (any(iHi)){
          pedErrors[j.fam][iHi] <- 1
          pedErrors[bothP] <- 2
        } #if any
      }else if(all(pCalls$x>.5)){      #Alt. 3
        iLo <- oCalls<sum(pCalls$x)-1 &! is.na(oCalls)
        if (any(iLo)){
          pedErrors[j.fam][iLo] <- 1
          pedErrors[bothP] <- 2
        } #if any
      } #if alternatives
    }else if (length(pCalls$x)>2){
      stop('More than two parents. Genotyped repeatedly, or ped-error?')
    } #end if length(pCalls)
  } #end for j
  pedErrors
}


  
#Set values for transformation/normalisation and return in a list. Suggests default values.
setNormOptions <- function(shearInf1=TRUE,transf='root',method='medianAF',minSize=suggestSh(shearInf1)$minSize,prob=suggestSh(shearInf1)$prob,nBins=suggestSh(shearInf1)$nBins,dist=suggestTr(transf)$dist,pNorm=suggestTr(transf)$pNorm,nthRoot=suggestTr(transf)$nthRoot,offset=suggestTr(transf)$offset,scale=suggestNo(method)$scale,nSD=3,breaks=200){
  suggestSh <- function(shearInf1){
    opt <- NULL
    if (shearInf1){
      opt$minSize <- 20; opt$prob <- .1; opt$nBins <- 50
    }else{
      opt$minSize <- 40; opt$prob <- .05; opt$nBins <- 50
    }
    opt
  }
  suggestTr <- function(transf){
    opt <- NULL
    if (transf=='none'){
      opt$dist <- 'manhattan'; opt$pNorm <- NULL; opt$nthRoot <- NULL; opt$offset <- NULL
    }else if (transf=='log'){
      opt$dist <- 'minkowski'; opt$pNorm <- 5; opt$nthRoot <- NULL; opt$offset <- 200
    }else if (transf=='root'){
      opt$dist <- 'minkowski'; opt$pNorm <- 5; opt$nthRoot <- 4; opt$offset <- 200
    }
    opt
  }
  suggestNo <- function(method){
    opt <- NULL
    if (method=='medianAF'){
      opt$scale <- NULL
    }else if (method=='linPeak'){
      opt$scale <- 0.75
    }
    opt
  }
  normOpts <- list(shearInf1=shearInf1,minSize=minSize,prob=prob,nBins=nBins,transf=transf,nthRoot=nthRoot,offset=offset,dist=dist,pNorm=pNorm,method=method,scale=scale,nSD=nSD,breaks=breaks)
  normOpts
}



#Set parameters for genotype-calling and return in a list. Suggests default values.
#Default values for largeSample==TRUE calibrated for more than 3000 objects,
#largeSample==FALSE calibrated for 100 objects.
#Calibrated for transf='root'; if transf='none', increase devCentLim and wSpreadLim
#Earlier versions: mseLim=1, probsQRange=c(.01,.5,.99)
setGenoOptions <- function(largeSample=FALSE,snpPerArrayLim=.8,arrayPerSnpLim=0,ploidy='tetra',filterLim=0,detectLim=.8,wSpreadLim=suggestGeno(largeSample)$wSpreadLim,devCentLim=.35,hwAlpha=suggestGeno(largeSample)$hwAlpha,probsIndSE=suggestGeno(largeSample)$probsIndSE,afList=seq(0,.5,.05),clAlpha=suggestGeno(largeSample)$clAlpha,rPenalty=2,rotationLim=suggestGeno(largeSample)$rotationLim,minClLim=5,nSdOverlap=2,minBin=suggestGeno(largeSample)$minBin,binWidth=suggestGeno(largeSample)$binWidth){
  suggestGeno <- function(largeSample){
    opt <- NULL
    if (largeSample){
      opt$probsIndSE=.9; opt$clAlpha=.01; opt$rotationLim=2; opt$wSpreadLim=.07; opt$hwAlpha=1e-10; opt$minBin=2; opt$binWidth=.05
    }else{
      opt$probsIndSE=NULL; opt$clAlpha=.05; opt$rotationLim=5; opt$wSpreadLim=.1; opt$hwAlpha=1e-4; opt$minBin=2; opt$binWidth=.1
    }
    opt
  }
  genoOpts <- list(snpPerArrayLim=snpPerArrayLim,arrayPerSnpLim=arrayPerSnpLim,ploidy=ploidy,filterLim=filterLim,detectLim=detectLim,wSpreadLim=wSpreadLim,devCentLim=devCentLim,hwAlpha=hwAlpha,probsIndSE=probsIndSE,afList=afList,clAlpha=clAlpha,rPenalty=rPenalty,rotationLim=rotationLim,minClLim=minClLim,nSdOverlap=nSdOverlap,minBin=minBin,binWidth=binWidth)
  genoOpts
}


#Set parameters for merging of genotypes and return in a list. Suggests default values.
setMergeOptions <- function(minC=NULL,noiseQuantile=.75,offspringLim=7,ratioLim=.9,rngLD=5){
  if (!is.null(minC))
    noiseQuantile <- NULL
  mergeOpts <- list(minC=minC,noiseQuantile=noiseQuantile,offspringLim=offspringLim,ratioLim=ratioLim,rngLD=rngLD)
  mergeOpts
}



#Takes MSV5-calls as input and partially resolves the true calls for each paralog.
#'Partially' because the actual chromosome number is not known, and the duplicate
#calls therefore have to be assigned assuming known location of paralogs of only
#one parent at the time. Where informative meiosis is found, each offspring is
#called as {Para1,Para2} <- {0,0}, {0,1}, or {1,1}. Can theoretically be used for
#LA by assuming only one known parent.
unmixParalogues <- function(BSRed,singleCalls=getSingleCalls(BSRed)){
  unmixTheta <- function(x){
    X <- rep(NA,2)
    if (x!=.5){
      X[1] <- round(x)
      X[2] <- 2*x - X[1]
      X <- sort(X)
    }
    X
  }
  lookupOffspring <- function(x,LookUp){
    X <- cbind(LookUp[x[-c(1,2)],x[1],x[2]],LookUp[x[-c(1,2)],x[1],x[2]])
    iHalf <- X[,1]%in%.5
    X[iHalf,1] <- 0
    X[iHalf,2] <- 1
    X <- as.vector(X)
    X
  }

  #Input control
  callType <- fData(BSRed)$Classification
  if (!is.null(fData(BSRed)$Manual.Calls.R)){
    iManual <- !fData(BSRed)$Manual.Calls.R %in% ''
    callType[iManual] <- fData(BSRed)$Manual.Calls.R[iManual]
  }
  if (!all(callType %in% 'MSV-5')){
    stop('This function accepts nothing but MSV-5 markers as input')
  }
  
  #Expand call-tables for father and mother to include both paralogues
  str1 <- paste(rownames(singleCalls),'Para1',sep='_')
  str2 <- paste(rownames(singleCalls),'Para2',sep='_')
  rNames <- as.vector(t(cbind(str1,str2)))
  nRows <- 2*nrow(BSRed)
  paraCalls <- NULL
  paraCalls$father <- paraCalls$mother <- matrix(nrow=nRows,ncol=ncol(BSRed),dimnames=list(rNames,sampleNames(BSRed)))
  paraCalls$mother[seq(1,nRows,2),] <- paraCalls$mother[seq(2,nRows,2),] <- paraCalls$father[seq(1,nRows,2),] <- paraCalls$father[seq(2,nRows,2),] <- singleCalls

  #Create look-up table for valid meioses (dim: 5(offspring) x 5(parent1) x 5(parent2))
  LookUp <- array(dim=rep(5,3))
  LookUp[c(1,6,11,26,27,31,36,51:53,56,61,77,78,82,87,103,108,113)] <- 0
  LookUp[c(7,17,33,42,59,67,84,93,109,119)] <- .5
  LookUp[c(13,18,23,39,44,48,49,65,70,73:75,90,95,99,100,115,120,125)] <- 1

  #Index including only <p><fff> (population + father-ID)
  pp <- paste(substr(pData(BSRed)$'PedigreeID',1,1),substr(pData(BSRed)$'PedigreeID',5,7),sep='')
  
  #Inheritance from mothers, fathers not fixed to paralogues yet
  indMothers <- unique(substr(pData(BSRed)$PedigreeID,1,4))
  indMothers <- indMothers[!substr(indMothers,2,4) %in% c('000','999')]
  for (j in 1:length(indMothers)){
    jm <- pData(BSRed)$PedigreeID %in% paste(indMothers[j],'00000',sep='')
    mCalls <- assayData(BSRed)$call[,jm]
    iNA <- is.na(singleCalls[,jm]) &! is.na(mCalls)
    if (sum(iNA)>0){
      paraCalls$mother[sort(c(which(iNA)*2,which(iNA)*2-1)),jm] <- as.vector(sapply(mCalls[iNA],unmixTheta))
      #Don't bother with homozygotes and unknown 0.5's:
      iCand.m <- !is.na(paraCalls$mother[seq(1,nRows,2),jm]) &! mCalls%in%c(0,1)
      j.fam <- substr(pData(BSRed)$PedigreeID,1,4) %in% indMothers[j] &! jm
      if (sum(j.fam)>2){   #Which minimum family-size to allow?
        indMates <- unique(pp[j.fam])
        indMates <- indMates[!substr(indMates,2,4) %in% '999']
        for (i in 1:length(indMates)){
          jf <- pData(BSRed)$PedigreeID %in% paste(substr(indMates[i],1,1),'000',substr(indMates[i],2,4),'00',sep='')
          fCalls <- assayData(BSRed)$call[,jf]
          i.fam <- pp[j.fam] %in% indMates[i]
          oCalls <- assayData(BSRed)$call[,j.fam][,i.fam]
          iCand.f <- fCalls!=.5 | !is.na(paraCalls$mother[seq(1,nRows,2),jf])
          iCand <- which(iCand.m & iCand.f)
          if (length(iCand)>0){
            indLookup <- matrix(cbind(mCalls,fCalls,oCalls)[iCand,]*4+1,nrow=length(iCand))
            cc <- apply(indLookup,1,lookupOffspring,LookUp)
            dim(cc) <- c(ncol(oCalls),length(iCand)*2)
            paraCalls$mother[sort(c(iCand*2-1,iCand*2)),j.fam][,i.fam] <- t(cc)
          }
        } #for i
      } #if sum(j.fam)
    } #if sum(iNA)
  } #for j
  
  #Inheritance from fathers, mothers not fixed to paralogues yet
  indFathers <- unique(pp)
  indFathers <- indFathers[!substr(indFathers,2,4) %in% c('000','999')]
  for (j in 1:length(indFathers)){
    jf <- pData(BSRed)$PedigreeID %in% paste(substr(indFathers[j],1,1),'000',substr(indFathers[j],2,4),'00',sep='')
    fCalls <- assayData(BSRed)$call[,jf]
    iNA <- is.na(singleCalls[,jf]) &! is.na(fCalls)
    if (sum(iNA)>0){
      paraCalls$father[sort(c(which(iNA)*2,which(iNA)*2-1)),jf] <- as.vector(sapply(fCalls[iNA],unmixTheta))
      #Don't bother with homozygotes and unknown 0.5's:
      iCand.f <- !is.na(paraCalls$father[seq(1,nRows,2),jf]) &! fCalls%in%c(0,1)
      j.fam <- pp %in% indFathers[j] &! jf
      if (sum(j.fam)>2){   #Which minimum family-size to allow?
        indMates <- unique(substr(pData(BSRed)$PedigreeID[j.fam],1,4))
        indMates <- indMates[!substr(indMates,2,4) %in% '999']
        for (i in 1:length(indMates)){
          jm <- pData(BSRed)$PedigreeID %in% paste(indMates[i],'00000',sep='')
          mCalls <- assayData(BSRed)$call[,jm]
          i.fam <- substr(pData(BSRed)$PedigreeID[j.fam],1,4) %in% indMates[i]
          oCalls <- assayData(BSRed)$call[,j.fam][,i.fam]
          iCand.m <- mCalls!=.5 | !is.na(paraCalls$father[seq(1,nRows,2),jm])
          iCand <- which(iCand.m & iCand.f)
          if (length(iCand)>0){
            indLookup <- matrix(cbind(fCalls,mCalls,oCalls)[iCand,]*4+1,nrow=length(iCand))
            cc <- apply(indLookup,1,lookupOffspring,LookUp)
            dim(cc) <- c(ncol(oCalls),length(iCand)*2)
            paraCalls$father[sort(c(iCand*2-1,iCand*2)),j.fam][,i.fam] <- t(cc)
          }
        } #for i
      } #if sum(j.fam)
    } #if sum(iNA)
  } #for j
  j.allFathers <- substr(pData(BSRed)$PedigreeID,2,4)%in%'000' & substr(pData(BSRed)$PedigreeID,8,9)%in%'00'
  paraCalls$mother[,j.allFathers] <- NA
  j.allMothers <- substr(pData(BSRed)$PedigreeID,5,9)%in%'00000'
  paraCalls$father[,j.allMothers] <- NA
  paraCalls
}



#Identify MSV-5's for which both paralogues are either AA, BB, or AB
getSingleCalls <- function(BSRed){
  compTheta <- function(x){
    rng <- c(max(0,x[1]-.5),min(1,x[1]+.5))
    indAB <- any(rng %in% x[-1]) &! is.na(x[1])
    indAB
  }
  singleCalls <- matrix(nrow=nrow(BSRed),ncol=ncol(BSRed),dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
  singleCalls[assayData(BSRed)$call==0] <- 0
  singleCalls[assayData(BSRed)$call==1] <- 1
  indFamily <- unique(substr(pData(BSRed)$PedigreeID,1,7))
  indFounding <- substr(indFamily,2,4)=='000' | substr(indFamily,5,7)=='000'
  indFamily <- indFamily[!indFounding]
  nFull <- length(indFamily)
  for (j in 1:nFull){ #Parents with [AB,AB]-markers
    jf <- pData(BSRed)$PedigreeID %in% paste(substr(indFamily[j],1,1),'000',substr(indFamily[j],5,7),'00',sep='')
    jm <- pData(BSRed)$PedigreeID %in% paste(substr(indFamily[j],1,4),'00000',sep='')
    j.fam <- substr(pData(BSRed)$PedigreeID,1,7) %in% indFamily[j] &! (jf|jm)
    if (sum(jf)==1 & sum(jm)==1 & sum(j.fam)>=1){
      fCalls <- assayData(BSRed)$call[,jf]
      mCalls <- assayData(BSRed)$call[,jm]
      oCalls <- assayData(BSRed)$call[,j.fam]
      indF <- fCalls==.5 & !is.na(fCalls)
      if (any(indF)){
        indF[indF] <- apply(matrix(cbind(mCalls,oCalls)[indF,],nrow=sum(indF)),1,compTheta)
        singleCalls[indF,jf] <- .5
      }
      indM <- mCalls==.5 & !is.na(mCalls)
      if (any(indM)){
        indM[indM] <- apply(matrix(cbind(fCalls,oCalls)[indM,],nrow=sum(indM)),1,compTheta)
        singleCalls[indM,jm] <- .5
      }
    }
  }
  singleCalls
}



#Enables assignments to markers of 'AlleleSetIllumina', given phenoData are identical
assignToAlleleSet <- function(BSRed,BSAdd){
  if (!identical(phenoData(BSRed),phenoData(BSAdd)))
    stop('The phenoData of the AlleleSets differ. Will not merge!')
  indReplace <- which(featureNames(BSRed) %in% featureNames(BSAdd))
  orderRed <- order(featureNames(BSRed)[indReplace])
  indReplace <- indReplace[orderRed]
  orderAdd <- order(fData(BSAdd)$Name)
  if (!identical(featureData(BSRed),featureData(BSAdd))){
    varRed <- colnames(fData(BSRed))
    varAdd <- colnames(fData(BSAdd))
    if (!all(varAdd %in% varRed)){
      fData <- data.frame(stringsAsFactors=FALSE, row.names=featureNames(BSRed))
      varExtra <- varAdd[!varAdd %in% varRed]
      for (i in 1:length(varExtra)){
        fData[[varExtra[i]]] <- as('',class(fData(BSAdd)[[varExtra[i]]]))
      }
      fMetadata <- data.frame(labelDescription=featureData(BSAdd)@varMetadata[varExtra,], row.names=varExtra)
      featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
      featureData(BSRed) <- combine(featureData(BSRed), featureData)
      message(sprintf('%d new column(s) of featureData added to target',i))
      varRed <- colnames(fData(BSRed))
   }
    featureData(BSRed)@data[indReplace, varRed %in% varAdd] <- fData(BSAdd)[orderAdd, varRed[varRed %in% varAdd]]
    message('New featureData assigned to target')
  } #end if
  assayRed <- names(assayData(BSRed))
  assayAdd <- names(assayData(BSAdd))
  if (!all(assayAdd%in%assayRed)){
    assayExtra <- assayAdd[!assayAdd%in%assayRed]
    for (i in 1:length(assayExtra)){
      assayData(BSRed)[[assayExtra[i]]] <- matrix(nrow=nrow(BSRed), ncol=ncol(BSRed), dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
      message(sprintf('Added assayData \'%s\' to target',assayExtra[i]))
    } #end for
  } #end if
  if (!all(assayRed%in%assayAdd))
    warning('One or more assayData-arrays in target not found in added AlleleSetIllumina-object')
  assayNames <- intersect(assayAdd, names(assayData(BSRed)))
  nAssays <- length(assayNames)
  for (i in 1:nAssays){
    Xred <- assayData(BSRed)[[assayNames[i]]][indReplace,]
    Xadd <- assayData(BSAdd)[[assayNames[i]]][orderAdd,]
    if (!identical(Xred,Xadd)){
      assayData(BSRed)[[assayNames[i]]][indReplace,] <- Xadd
      message(sprintf('New %s-data assigned to target',assayNames[i]))
    } #end if
  } #end for
  validObject(BSRed)
  BSRed
}



#Translates calls (in {0,1/2,1}) into true genotypes (A,T,C,G). 'type' is either
#'regular', 'single' (markernames w. 'Para'), or 'merged' (markernames w. 'Chrom')
#Output is a matrix of the same size as 'calls', each element given as "x y",
#where x and y are the two alleles. Missing: "- -"
#If type is 'regular', and non-diploid markers are found, these are attempted
#translated into a diploid representation where meaningful.
translateTheta <- function(calls,resInfo,type='regular'){
  subTrans <- function(x,alleles){
    tG <- character(length(x))
    tG[x%in%0] <- paste(alleles[,1],alleles[,1])[x%in%0]
    tG[x%in%0.5] <- paste(alleles[,1],alleles[,2])[x%in%0.5]
    tG[x%in%1] <- paste(alleles[,2],alleles[,2])[x%in%1]
    tG
  }
  if (is.vector(calls))
    calls <- as.matrix(calls)
  if (type%in%'regular'){
    markerNames <- rownames(calls)
    fData <- data.frame(Classification=resInfo[markerNames,'Classification'], row.names=markerNames)
    #if (any(c(.25,.75) %in% unique(as.vector(calls)))){
    if (any(!unique(fData$Classification) %in% c('MONO-a','MONO-b','SNP','FAIL','MONO-filt'))){
      message('Non-diploid calls found. Will be converted to a diploid representation where possible')
      calls <- makeDiploidCalls(calls,fData)
    }
  }else if (type%in%'single'){
    markerNames <- sub('_Para[12]','',rownames(calls))
  }else if (type%in%'merged'){
    markerNames <- sub('_Chrom[0-9XY]+','',rownames(calls))
  }
  cc <- data.frame(Base=c('A','T','C','G'),Compl=c('t','a','g','c'))
  snpInfo <- resInfo[markerNames,c('SNP','ILMN.Strand')]
  topSNP <- snpInfo[,1]
  for (i in 1:4)
    topSNP[snpInfo[,2]%in%'BOT'] <- sub(cc[i,1],cc[i,2],topSNP[snpInfo[,2]%in%'BOT'])
  alleles <- toupper(do.call('rbind',strsplit(substr(topSNP,2,4),'/')))
  genotype <- apply(calls,2,subTrans,alleles)
  dimnames(genotype) <- dimnames(calls)
  genotype[is.na(calls)] <- '- -'
  genotype
}



#Will convert non-diploid calls to a diploid representation where possible (i.e. for MSV-a and -b's)
makeDiploidCalls <- function(calls,fData){
  diploidCalls <- calls
  callType <- fData$Classification
  if (!is.null(fData$Manual.Calls.R)){
    iManual <- !fData$Manual.Calls.R %in% ''
    callType[iManual] <- fData$Manual.Calls.R[iManual]
  }
  diploidCalls[callType%in%'MSV-a'] <- diploidCalls[callType%in%'MSV-a']*2
  diploidCalls[callType%in%'MSV-b'] <- diploidCalls[callType%in%'MSV-b']*2-1
  diploidCalls[callType%in%'MSV-5'] <- NA
  diploidCalls[callType%in%'PSV'] <- NA
  diploidCalls
}



#Translates calls (in {0, 1/4, 1/2, 3/4, 1}) into true genotypes (A,T,C,G). Genotypes
#for duplicated markers given as 4 letters, 2 letters for diploid loci. Resolved
#paralogs separated by comma.
translateThetaCombined <- function(BSRed,mergedCalls=NULL){
  subTransDiploid <- function(x,alleles){
    tG <- character(length(x))
    tG[x%in%0] <- paste(alleles[,1],alleles[,1],sep='')[x%in%0]
    tG[x%in%0.5] <- paste(alleles[,1],alleles[,2],sep='')[x%in%0.5]
    tG[x%in%1] <- paste(alleles[,2],alleles[,2],sep='')[x%in%1]
    tG[tG %in% ""] <- '--'
    tG
  }
  subTransMSVa <- function(x,alleles){
    tG <- character(length(x))
    tG[x%in%0] <- paste(alleles[,1],alleles[,1],',',alleles[,1],alleles[,1],sep='')[x%in%0]
    tG[x%in%0.25] <- paste(alleles[,1],alleles[,2],',',alleles[,1],alleles[,1],sep='')[x%in%0.25]
    tG[x%in%.5] <- paste(alleles[,2],alleles[,2],',',alleles[,1],alleles[,1],sep='')[x%in%.5]
    tG[tG %in% ""] <- '--,--'
    tG
  }
  subTransMSVb <- function(x,alleles){
    tG <- character(length(x))
    tG[x%in%0.5] <- paste(alleles[,1],alleles[,1],',',alleles[,2],alleles[,2],sep='')[x%in%0.5]
    tG[x%in%0.75] <- paste(alleles[,1],alleles[,2],',',alleles[,2],alleles[,2],sep='')[x%in%0.75]
    tG[x%in%1] <- paste(alleles[,2],alleles[,2],',',alleles[,2],alleles[,2],sep='')[x%in%1]
    tG[tG %in% ""] <- '--,--'
    tG
  }
  subTransMSV5 <- function(x,alleles){
    tG <- character(length(x))
    tG[x%in%0] <- paste(alleles[,1],alleles[,1],',',alleles[,1],alleles[,1],sep='')[x%in%0]
    tG[x%in%0.25] <- paste(alleles[,1],alleles[,1],alleles[,1],alleles[,2],sep='')[x%in%0.25]
    tG[x%in%0.5] <- paste(alleles[,1],alleles[,1],alleles[,2],alleles[,2],sep='')[x%in%0.5]
    tG[x%in%0.75] =paste(alleles[,1],alleles[,2],alleles[,2],alleles[,2],sep='')[x%in%0.75]
    tG[x%in%1] <- paste(alleles[,2],alleles[,2],',',alleles[,2],alleles[,2],sep='')[x%in%1]
    tG[tG %in% ""] <- '--,--'
    tG
  }
  if (!is.null(mergedCalls)){
    if (!identical(colnames(mergedCalls),sampleNames(BSRed))){
      stop('"mergedCalls" must be either NULL or a matrix with sample-info in the columns')
    }
    mergedNames <- sub('_Chrom[0-9XY]+','',rownames(mergedCalls))
  }
  
  call <- assayData(BSRed)$call
  callType <- fData(BSRed)$Classification
  if (!is.null(fData(BSRed)$Manual.Calls.R)){
    iManual <- !fData(BSRed)$Manual.Calls.R%in%''
    callType[iManual] <- fData(BSRed)$Manual.Calls.R[iManual]
  }
  cc <- data.frame(Base=c('A','T','C','G'),Compl=c('t','a','g','c'))
  topSNP <- fData(BSRed)$SNP
  for (i in 1:4)
    topSNP[fData(BSRed)$ILMN.Strand%in%'BOT'] <- sub(cc[i,1],cc[i,2],topSNP[fData(BSRed)$ILMN.Strand%in%'BOT'])
  alleles <- toupper(do.call('rbind',strsplit(substr(topSNP,2,4),'/')))
  rownames(alleles) <- featureNames(BSRed)
  genotype <- matrix('',nrow=nrow(call),ncol=ncol(call),dimnames=dimnames(call))
  iDiploid <- callType %in% c('MONO-a','MONO-b','SNP')
  call.i <- matrix(call[iDiploid,],nrow=sum(iDiploid),ncol=ncol(call))
  alleles.i <- matrix(alleles[iDiploid,],nrow=sum(iDiploid),ncol=2)
  genotype[iDiploid,] <- apply(call.i,2,subTransDiploid,alleles.i)
  iPSV <- callType %in% 'PSV'
  genotype[iPSV,] <- paste(alleles[iPSV,1],alleles[iPSV,1],',',alleles[iPSV,2],alleles[iPSV,2],sep='')
  iMSVa <- callType %in% 'MSV-a'
  call.i <- matrix(call[iMSVa,],nrow=sum(iMSVa),ncol=ncol(call))
  alleles.i <- matrix(alleles[iMSVa,],nrow=sum(iMSVa),ncol=2)
  genotype[iMSVa,] <- apply(call.i,2,subTransMSVa,alleles.i)
  iMSVb <- callType %in% 'MSV-b'
  call.i <- matrix(call[iMSVb,],nrow=sum(iMSVb),ncol=ncol(call))
  alleles.i <- matrix(alleles[iMSVb,],nrow=sum(iMSVb),ncol=2)
  genotype[iMSVb,] <- apply(call.i,2,subTransMSVb,alleles.i)
  iMSV5 <- callType %in% 'MSV-5'
  call.i <- matrix(call[iMSV5,],nrow=sum(iMSV5),ncol=ncol(call))
  alleles.i <- matrix(alleles[iMSV5,],nrow=sum(iMSV5),ncol=2)
  genotype[iMSV5,] <- apply(call.i,2,subTransMSV5,alleles.i)
  iFAIL <- callType %in% 'FAIL'
  genotype[iFAIL,] <- '--'
  if (!is.null(mergedCalls)){
    alleles.i <- matrix(alleles[mergedNames,],nrow=nrow(mergedCalls),ncol=2)
    trueSplit <- apply(mergedCalls,2,subTransDiploid,alleles.i)
    rownames(trueSplit) <- mergedNames
    trueMerged <- genotype[iMSV5,]
    for (i in 1:sum(iMSV5)){
      #i=i+1
      marker.i <- rownames(trueMerged)[i]
      ii <- which(rownames(trueSplit)%in%marker.i)
      split.i <- matrix(trueSplit[ii,],nrow=length(ii),ncol=ncol(trueSplit))
      #tt <- rbind(mergedCalls=mergedCalls[ii,],trueSplit=trueSplit[ii,],call=call[marker.i,],genotype=trueMerged[marker.i,],oldGeno=oldGeno[marker.i,])
      #print(tt[,101:110])
      if (nrow(split.i)==2){
        iRes <- apply(split.i,2,function(x) !all(x %in% '--'))
        trueMerged[i,iRes] <- apply(split.i[,iRes],2,function(x) paste(x[1],x[2],sep=','))
      }else if (nrow(split.i)==1){
        iRes <- !(split.i%in%'--' | call[marker.i,]%in%c(0,1))
        split.i2 <- sub(split.i[iRes],'',genotype[marker.i,iRes])
        trueMerged[i,iRes] <- paste(split.i[iRes],split.i2,sep=',')
      }
    }
    genotype[iMSV5,] <- trueMerged
  }
  
  assayData(BSRed)$genotype <- genotype
  validObject(BSRed)
  BSRed
}



#Reads call-data from file and writes genotype to a file of similar name
translateThetaFromFiles <- function(dataFiles,mergedCalls=NULL,markerStep=1000,sep='\t',quote=''){
  message("Loading featureData...")
  beadInfo <- read.table(dataFiles[['resFile']],header=TRUE,sep=sep,quote=quote,as.is=TRUE)
  callHeader <- c('',unlist(read.table(dataFiles[['callFile']],nrows=1,as.is=TRUE,sep=sep)))
  callCols <- rep('numeric',length(callHeader))
  callCols[1] <- 'character'
  file <- sub('Call','Alleles',dataFiles[['callFile']])
  for (i in seq(1,nrow(beadInfo),markerStep)){
    minM <- min(nrow(beadInfo),i+markerStep-1)
    message(paste('Translating markers ',i,':',minM,' (of ',nrow(beadInfo),') ...',sep=''))
    call <- read.table(dataFiles[['callFile']],header=FALSE,sep=sep,quote=quote,nrows=minM+1-i,colClasses=callCols,skip=i,row.names=1,col.names=callHeader)
    BSRed <- new('MultiSet',call=as.matrix(call),featureData=as(beadInfo[i:minM,],'AnnotatedDataFrame'),storage.mode='list')
    BSRed <- translateThetaCombined(BSRed,mergedCalls)
    write.table(assayData(BSRed)$genotype,file=file,append=i>1,sep=sep,col.names=i==1)
  }
}



#Resolves the within half-sib family inheritance from heterozygous parents. NB! No MSV's accepted
resolveInheritanceSNP <- function(BSSnp){
  uCalls <- unique(as.vector(assayData(BSSnp)$call))
  if (any(!uCalls %in% c(0,.5,1,NA))){
    stop('Only SNPs, no MSVs, are allowed. Calls found with illegal values')
  }
  inheritance <- NULL
  inheritance$mother <- inheritance$father <- matrix(nrow=nrow(BSSnp),ncol=ncol(BSSnp),dimnames=list(featureNames(BSSnp),sampleNames(BSSnp)))
  i0 <- assayData(BSSnp)$call==0
  inheritance$mother[i0] <- inheritance$father[i0] <- 0
  i1 <- assayData(BSSnp)$call==1
  inheritance$mother[i1] <- inheritance$father[i1] <- 1
  #LookUp <- matrix(c(0,0,NA,1,NA,0,NA,1,1),nrow=3,byrow=T) # Offspring x P2 (P1 is AB)

  #Index including only <p><fff> (population + father-ID)
  pp <- paste(substr(pData(BSSnp)$'PedigreeID',1,1),substr(pData(BSSnp)$'PedigreeID',5,7),sep='')
  
  #Inheritance from mothers
  indMothers <- unique(substr(pData(BSSnp)$PedigreeID,1,4))
  indMothers <- indMothers[!substr(indMothers,2,4) %in% c('000','999')]
  jm <- pData(BSSnp)$PedigreeID %in% paste(indMothers,'00000',sep='')
  inheritance$mother[,jm] <- inheritance$father[,jm] <- assayData(BSSnp)$call[,jm]
  for (j in 1:length(indMothers)){
    jm <- pData(BSSnp)$PedigreeID %in% paste(indMothers[j],'00000',sep='')
    mCalls <- assayData(BSSnp)$call[,jm]
    iCand.m <- mCalls %in% .5
    j.fam <- substr(pData(BSSnp)$PedigreeID,1,4) %in% indMothers[j] &! jm
    if (sum(j.fam)>2){   #Which minimum family-size to allow?
      indMates <- unique(pp[j.fam])
      indMates <- indMates[!substr(indMates,2,4) %in% '999']
      for (i in 1:length(indMates)){
        jf <- pData(BSSnp)$PedigreeID %in% paste(substr(indMates[i],1,1),'000',substr(indMates[i],2,4),'00',sep='')
        fCalls <- assayData(BSSnp)$call[iCand.m,jf]
        iCand.f <- fCalls!=.5  & !is.na(fCalls)
        i.fam <- pp[j.fam] %in% indMates[i]
        oCalls <- assayData(BSSnp)$call[iCand.m,j.fam][iCand.f,i.fam]
        iCand.o <- oCalls %in% .5
        cc <- 1 - fCalls[iCand.f]%*%t(rep(1,sum(i.fam)))
        inheritance$mother[iCand.m,j.fam][iCand.f,i.fam][iCand.o] <- cc[iCand.o]
        } #for i
    } #if sum
  } #for j
  
  #Inheritance from fathers
  indFathers <- unique(pp)
  indFathers <- indFathers[!substr(indFathers,2,4) %in% c('000','999')]
  jf <- pData(BSSnp)$PedigreeID %in% paste(substr(indFathers,1,1),'000',substr(indFathers,2,4),'00',sep='')
  inheritance$mother[,jf] <- inheritance$father[,jf] <- assayData(BSSnp)$call[,jf]
  for (j in 1:length(indFathers)){
    jf <- pData(BSSnp)$PedigreeID %in% paste(substr(indFathers[j],1,1),'000',substr(indFathers[j],2,4),'00',sep='')
    fCalls <- assayData(BSSnp)$call[,jf]
    iCand.f <- fCalls %in% .5
    j.fam <- pp %in% indFathers[j] &! jf
    if (sum(j.fam)>2){   #Which minimum family-size to allow?
      indMates <- unique(substr(pData(BSSnp)$PedigreeID[j.fam],1,4))
      indMates <- indMates[!substr(indMates,2,4) %in% '999']
      for (i in 1:length(indMates)){
        jm <- pData(BSSnp)$PedigreeID %in% paste(indMates[i],'00000',sep='')
        mCalls <- assayData(BSSnp)$call[iCand.f,jm]
        iCand.m <- mCalls!=.5  & !is.na(mCalls)
        i.fam <- substr(pData(BSSnp)$PedigreeID[j.fam],1,4) %in% indMates[i]
        oCalls <- assayData(BSSnp)$call[iCand.f,j.fam][iCand.m,i.fam]
        iCand.o <- oCalls %in% .5
        cc <- 1 - mCalls[iCand.m]%*%t(rep(1,sum(i.fam)))
        inheritance$father[iCand.f,j.fam][iCand.m,i.fam][iCand.o] <- cc[iCand.o]
       } #for i
    } #if sum
  } #for j
  inheritance
}



#Compares each split MSV-5 with known markers within each father and mother half-sib family
#and counts the IBD-occurences. Returns a matrix of dim(#MSV-5, #Chromosomes, 2 parents)
#from which the peaks can be used to assign each homeolog to its respective chromosome.
locateParalogues <- function(BSSnp,paraCalls,inheritP,offspringLim=7,ratioLim=.9){
  indMothers <- unique(substr(pData(BSSnp)$PedigreeID,1,4))
  indMothers <- indMothers[!substr(indMothers,2,4) %in% c('000','999')]
  nMothers <- length(indMothers)
  pp <- paste(substr(pData(BSSnp)$'PedigreeID',1,1),substr(pData(BSSnp)$'PedigreeID',5,7),sep='')
  indFathers <- unique(pp)
  indFathers <- indFathers[!substr(indFathers,2,4) %in% c('000','999')]
  nFathers <- length(indFathers)
  nPara <- nrow(paraCalls$m)
  countsGood <- NULL
  countsGood$M <- countsGood$F <- matrix(0,nrow=nPara/2,ncol=nrow(BSSnp),dimnames=list(unique(sub('_Para[12]','',rownames(paraCalls$m))),fData(BSSnp)$Name))
  for (i in 1:nPara){    #One paralogue at the time
    if (i %in% seq(1,nPara,2))
      message(sprintf('Mapping paralogue %d/%d: %s',i,nPara,rownames(paraCalls$m)[i]))
    nDen <- nNum <- matrix(nrow=nMothers,ncol=nrow(BSSnp))
    for (j in 1:nMothers){    #Inheritance from mothers
      jm <- pData(BSSnp)$PedigreeID %in% paste(indMothers[j],'00000',sep='')
      if (paraCalls$m[i,jm] %in% .5){    #Mother j heterozygous in paralogue i
        iHetSnp <- assayData(BSSnp)$call[,jm] %in% .5   #Polymorphic markers
        j.fam <- substr(pData(BSSnp)$PedigreeID,1,4) %in% indMothers[j] &! jm
        iSamp <- !is.na(paraCalls$m[i,j.fam])
        if (sum(iSamp)>1){   #Minimum 2 offspring w. known genotype
          snps <- as.matrix(inheritP$m[iHetSnp,j.fam][,iSamp],nrow=sum(iHetSnp))
          nDen[j,iHetSnp] <- apply(snps,1,function(x) sum(!is.na(x)))
          nNum[j,iHetSnp] <- apply(snps,1,function(x,p) max(sum(x==p,na.rm=TRUE),sum(x==1-p,na.rm=TRUE)),paraCalls$m[i,j.fam][iSamp])
        } #if sum
      } #if polymorphic
    } #for j
    indGood <- matrix(nNum>=offspringLim & nNum/nDen>=ratioLim,nrow=nrow(nNum))
    cGood <- apply(indGood,2,function(x) sum(x[!is.na(x)]))
    ii <- sub('_Para[12]','',rownames(paraCalls$m)[i])
    countsGood$M[ii,] <- countsGood$M[ii,] + cGood
      
    nDen <- nNum <- matrix(nrow=nFathers,ncol=nrow(BSSnp))
    for (j in 1:nFathers){    #Inheritance from fathers
      jf <- pData(BSSnp)$PedigreeID %in% paste(substr(indFathers[j],1,1),'000',substr(indFathers[j],2,4),'00',sep='')
      if (paraCalls$f[i,jf] %in% .5){    #Father j heterozygous in paralogue i
        iHetSnp <- assayData(BSSnp)$call[,jf] %in% .5   #Polymorphic markers
        j.fam <- pp %in% indFathers[j] &! jf
        iSamp <- !is.na(paraCalls$f[i,j.fam])
        if (sum(iSamp)>1){   #Minimum 2 offspring w. known genotype
          snps <- as.matrix(inheritP$f[iHetSnp,j.fam][,iSamp],nrow=sum(iHetSnp))
          nDen[j,iHetSnp] <- apply(snps,1,function(x) sum(!is.na(x)))
          nNum[j,iHetSnp] <- apply(snps,1,function(x,p) max(sum(x==p,na.rm=TRUE),sum(x==1-p,na.rm=TRUE)),paraCalls$f[i,j.fam][iSamp])
        } #if sum
      } #if polymorphic
    } #for j
    indGood <- matrix(nNum>=offspringLim & nNum/nDen>=ratioLim,nrow=nrow(nNum))
    cGood <- apply(indGood,2,function(x) sum(x[!is.na(x)]))
    ii <- sub('_Para[12]','',rownames(paraCalls$f)[i])
    countsGood$F[ii,] <- countsGood$F[ii,] + cGood
  } #for i
  #chromNames <- sort(unique(lMap$Ssa))
  chromNames <- sort(unique(fData(BSSnp)$Chr.Name))
  nChroms <- length(chromNames)
  chromHits <- array(0,dim=c(nPara/2,nChroms,2),dimnames=list(rownames(countsGood$F),chromNames,c('mother','father')))
  for (i in 1:(nPara/2)){
    #rr <- data.frame(Chr.Name=fData(BSSnp)$Chr.Name, mCounts=countsGood$M[i,], fCounts=countsGood$F[i,])
    #chromHits[i,,1] <- tapply(rr$mCounts,rr$Chr.Name,sum)
    #chromHits[i,,2] <- tapply(rr$fCounts,rr$Chr.Name,sum)
    #rr <- data.frame(mCounts=countsGood$M[i,], fCounts=countsGood$F[i,])
    chromHits[i,,1] <- tapply(countsGood$M[i,],fData(BSSnp)$Chr.Name,sum)
    chromHits[i,,2] <- tapply(countsGood$F[i,],fData(BSSnp)$Chr.Name,sum)
  } #for
  lenChrom <- tapply(rep(1,nrow(BSSnp)),fData(BSSnp)$Chr.Name,sum)
  cHitsPerMarker <- chromHits/array(rep(rep(1,nPara/2)%*%t(lenChrom),2),dim=dim(chromHits))
  nCountsTot <- cbind(mother=apply(countsGood$M,1,sum), father=apply(countsGood$F,1,sum))
  
  testP <- FALSE
  if (testP){
    plotCountsChrom(cHitsPerMarker,1:min(16,(nPara/2)))
    countG <- countsGood$F[1,]
    rr <- cbind(fData(BSSnp)[,c('Chr.Name','Chr.Index')],countG)
    rr <- rr[order(rr$Chr.Index),]
    rr <- rr[order(rr$Chr.Name),]
    print(rr[order(rr$countG),][-(1:(nrow(BSSnp)-30)),])
  }
  chromHits <- NULL
  chromHits$cPerMarker <- cHitsPerMarker
  chromHits$nCountsTot <- nCountsTot
  chromHits
}



#For each marker, plot the number of hits per chromosome for each chromosome
plotCountsChrom <- function(chromHits,markers=1:16,...){
  nSnp <- length(markers)
  pp <- par()
  nC <- ceiling(sqrt(nSnp))
  nR <- ceiling(nSnp/nC)
  par(mfrow=c(nR,nC),mai=c(1,1,1,.5)/sqrt(nC))
  xlim <- c(1,dim(chromHits)[2])
  for (i in 1:nSnp){
    plot.new()
    ylim <- c(0,max(chromHits[markers[i],,]))
    plot.window(xlim,ylim); axis(1,...); axis(2)
    lines(xlim[1]:xlim[2],chromHits[markers[i],,1],col='black')
    lines(xlim[1]:xlim[2],chromHits[markers[i],,2],col='red')
    title(main=dimnames(chromHits)[[1]][markers[i]],xlab='Chrom. #',ylab='Counts')
  }
  par(pp[c('mfrow','mai')])
}



#Assign paralogs to specific chromosomes
assignParalogues <- function(BSSnp,BSRed,paraCalls=unmixParalogues(BSRed,singleCalls),inheritP=resolveInheritanceSNP(BSSnp),singleCalls=getSingleCalls(BSRed),cHits=locateParalogues(BSSnp,paraCalls,inheritP,mO$offspringLim,mO$ratioLim)$cPerMarker,mO=setMergeOptions()){
  indMothers <- unique(substr(pData(BSSnp)$PedigreeID,1,4))
  indMothers <- indMothers[!substr(indMothers,2,4) %in% c('000','999')]
  nMothers <- length(indMothers)
  pp <- paste(substr(pData(BSSnp)$'PedigreeID',1,1),substr(pData(BSSnp)$'PedigreeID',5,7),sep='')
  indFathers <- unique(pp)
  indFathers <- indFathers[!substr(indFathers,2,4) %in% c('000','999')]
  nFathers <- length(indFathers)

  #Select markers that can be assigned to chromosomes and name them accordingly
  dimCH =dim(cHits)
  bestCH <- t(apply(cHits,1,function(x) apply(x,1,max)))    #Use largest value betw. parents
  orderC <- apply(bestCH,1,function(x) order(x,decreasing=TRUE))[1:3,]
  if (is.null(mO$minC)){
    noiseLevels <- apply(cbind(bestCH,orderC[3,]),1,function(x,n) x[x[n]],n=dimCH[2]+1)
    mO$minC <- quantile(noiseLevels, probs=mO$noiseQuantile)   #Quantile of 3rd largest peak across markers
    str <- 'The %.0fth percentile of the 3rd largest chromosome peaks gives minC = %.3f'
    message(sprintf(str, mO$noiseQuantile*100, mO$minC))
  }
  maxCounts <- apply(cHits,1,max)
  iOK <- which(maxCounts>mO$minC)
  iRunning <- sort(c(2*iOK-1,2*iOK))
  rNames <- NULL
  step <- 1
  altC <- matrix(nrow=dimCH[1],ncol=2,dimnames=list(names(maxCounts),c('P1','P2')))
  cNumbers <- sub('[a-z0]+','',colnames(cHits))
  for (i in iOK){
    altInd <- orderC[,i]
    ii <- bestCH[i,altInd]>mO$minC & c(TRUE,TRUE,FALSE)   #More than two matches not allowed
    altCH <- bestCH[i,altInd]
    if (ii[2]){
      ii[2] <- altCH[2]>2*altCH[3]   #Requires twice as many matches as max(noise)
    }
    altCH <- altCH[ii]
    #print(i);print(altInd);print(altCH)
    if (length(altCH)==2){
      sAltInd <- sort(altInd[1:2])
      rNames[step:(step+1)] <- paste(colnames(orderC)[i],'_Chrom',cNumbers[sAltInd],sep='')
      altC[i,] <- cNumbers[sAltInd]
      step <- step + 2
    }else{
      rNames[step] <- paste(colnames(orderC)[i],'_Chrom',cNumbers[altInd[1]],sep='')
      altC[i,1] <- cNumbers[altInd[1]]
      iAlt <- which(names(iRunning)%in%colnames(orderC)[i])
      iRunning <- iRunning[-iAlt[2]]
      step <- step + 1
    } #if
  } #for
  nNew <- length(rNames)
  nPara <- nrow(paraCalls$m)
  markerNames <- sub('_Chrom[0-9XY]+','',rNames)
  mergedCalls <- singleCalls[markerNames,]
  filledIn <- !is.na(singleCalls)
  rownames(mergedCalls) <- rNames
  indSCalls <- NULL    #each element is the corresponding element-index in paraCalls
  colOffset <- rep(1,nNew) %*% t(seq(0,ncol(mergedCalls)*nPara-1,nPara))
  indSCalls$running <- indSCalls$m <- indSCalls$f <- matrix(iRunning,nrow=nNew,ncol=ncol(paraCalls$m),dimnames=dimnames(mergedCalls)) + colOffset
  indSCalls$m[!filledIn[markerNames,]] <- indSCalls$f[!filledIn[markerNames,]] <- NA
  #partnerCandidates <- list()   #Keeps ranking of chromosomes where both paralogues not known
  positionFemale <- matrix(nrow=nNew,ncol=nMothers,dimnames=list(rNames,indMothers))
  positionMale <- matrix(nrow=nNew,ncol=nFathers,dimnames=list(rNames,indFathers))
  
  #Fill in first for mother, then for father
  str1 <- 'Inheritance each parent, paralogue %d/%d: %s'
  for (i in 1:nPara){    #One paralogue at the time
    if (i %in% seq(1,nPara,10))
      message(sprintf(str1,i,nPara,rownames(paraCalls$m)[i]))
    for (j in 1:nMothers){    #Inheritance from mothers
      jm <- pData(BSSnp)$PedigreeID %in% paste(indMothers[j],'00000',sep='')#; paraCalls$m[i,jm] %in% .5
      if (paraCalls$m[i,jm] %in% .5){    #Mother j heterozygous in paralogue i
        iHetSnp <- assayData(BSSnp)$call[,jm] %in% .5   #Polymorphic markers
        j.fam <- substr(pData(BSSnp)$PedigreeID,1,4) %in% indMothers[j] &! jm
        iSamp <- !is.na(paraCalls$m[i,j.fam])
        iName <- strsplit(rownames(paraCalls$m)[i],'_Para')[[1]]
        nHits <- sum(!is.na(altC[iName[1],]))
        #sum(iSamp);nHits;filledIn[iName[1],jm]
        if (sum(iSamp)>=mO$offspringLim & nHits>0 &! filledIn[iName[1],jm]){
          snps <- as.matrix(inheritP$m[iHetSnp,j.fam][,iSamp],nrow=sum(iHetSnp))
          nDen <- apply(snps,1,function(x) sum(!is.na(x)))
          nNum <- apply(snps,1,function(x,p) max(sum(x==p,na.rm=TRUE),sum(x==1-p,na.rm=TRUE)),paraCalls$m[i,j.fam][iSamp])
          #rr <- cbind(lMap[iHetSnp,],nNum,nDen,Ratio=nNum/nDen)
          rr <- cbind(fData(BSSnp)[iHetSnp,c('Chromosome','Female')],nNum,nDen,Ratio=nNum/nDen)
          if (nHits==2){
            bestNum <- totalDen <- medianRF <- linkRatio <- rep(0,nHits)
            for (k in 1:nHits){   #Test both alternatives
              #indC <- rr$Ssa %in% paste('ssa',sprintf('%.02d',altC[iName[1],k]),sep='')#; rr[indC,]
              indC <- rr$Chromosome %in% altC[iName[1],k]#; rr[indC,]
              indGood <- rr[indC,'Ratio']>=mO$ratioLim & rr[indC,'nNum']>=mO$offspringLim#; rr[indC,][indGood,]
              while (any(indGood)){
                medGPos <- quantile(rr[indC,'Female'][indGood],na.rm=TRUE,probs=.5,type=1)#;medGPos #No averaging!
                indCand <- rr[indC,'Female']>medGPos-mO$rngLD & rr[indC,'Female']<medGPos+mO$rngLD#; rr[indC,][indCand,]
                lRat <- sum(rr[indC,'nNum'][indCand])/sum(rr[indC,'nDen'][indCand])#;lRat
                bNum <- max(rr[indC,'nNum'][indCand][rr[indC,'Ratio'][indCand]>=mO$ratioLim],na.rm=TRUE)#;bNum
                #snps[indC,][indCand,]; assayData(BSSnp)$call[iHetSnp,j.fam][indC,iSamp][indCand,]
                tDen <- sum(apply(!is.na(matrix(matrix(snps[indC,],nrow=sum(indC))[indCand,],nrow=sum(indCand))),2,any))#;tDen
                if (lRat>linkRatio[k] | (lRat==linkRatio[k] & tDen>totalDen[k])){
                  linkRatio[k] <- lRat
                  medianRF[k] <- medGPos
                  totalDen[k] <- tDen
                  bestNum[k] <- bNum
                } #if
                indGood[indGood]=!indCand[indGood]#; rr[indC,][indGood,]
              } #while
            } #for k
            #totalDen;bestNum;medianRF;linkRatio
            if (any(linkRatio>=mO$ratioLim & totalDen>=mO$offspringLim) &! (diff(linkRatio)==0 & diff(totalDen)==0)){
              rankFit <- order(linkRatio,totalDen,decreasing=TRUE)
              iMerge <- paste(iName[1],'_Chrom',altC[iName[1],rankFit],sep='')#;iMerge
              iSingle1 <- which(names(iRunning) %in% iName[1])#;iSingle1
              iOrder1 <- c(iSingle1[as.numeric(iName[2])],iSingle1[-as.numeric(iName[2])])#;iOrder1
              indSCalls$m[iMerge,jm|j.fam] <- indSCalls$running[iOrder1,jm|j.fam]
              iSingle2 <- which(sub('_Para[12]','',rownames(paraCalls$m)) %in% iName[1])
              iOrder2 <- c(iSingle2[as.numeric(iName[2])],iSingle2[-as.numeric(iName[2])])
              mergedCalls[iMerge,jm] <- paraCalls$m[iOrder2,jm]
              if (any(!is.na(positionFemale[iMerge,j])))
                stop('Attempt to overwrite positionFemale[iMerge,j]...')
              positionFemale[iMerge[1],j] <- medianRF[rankFit[1]]
             } #if any
          }else if(nHits==1){
            sumGoodC <- tapply(rr$Ratio>=mO$ratioLim & rr$nNum>=mO$offspringLim,rr$Chromosome,sum)#; sumGoodC
            sumGoodC <- sumGoodC[order(as.numeric(names(sumGoodC)))]   #order chromosomes increasingly
            if (any(sumGoodC)){
              iCand <- which(sumGoodC>0)
              totalDen <- bestNum <- medianRF <- linkRatio <- rep(0,length(iCand))
              for (k in 1:length(iCand)){   #Test all possible alternatives
                #indC <- rr$Ssa %in% paste('ssa',sprintf('%.02d',iCand[k]),sep='')#; rr[indC,]
                indC <- rr$Chromosome %in% iCand[k]#; rr[indC,]
                indGood <- rr[indC,'Ratio']>=mO$ratioLim & rr[indC,'nNum']>=mO$offspringLim#; rr[indC,][indGood,]
                while (any(indGood)){
                  medGPos <- quantile(rr[indC,'Female'][indGood],na.rm=TRUE,probs=.5,type=1)#;medGPos #No averaging!
                  indCand <- rr[indC,'Female']>medGPos-mO$rngLD & rr[indC,'Female']<medGPos+mO$rngLD#; rr[indC,][indCand,]
                  lRat <- sum(rr[indC,'nNum'][indCand])/sum(rr[indC,'nDen'][indCand])#; lRat
                  bNum <- max(rr[indC,'nNum'][indCand][rr[indC,'Ratio'][indCand]>=mO$ratioLim],na.rm=TRUE)#; bNum
                  #snps[indC,][indCand,]
                  tDen <- sum(apply(!is.na(matrix(matrix(snps[indC,],nrow=sum(indC))[indCand,],nrow=sum(indCand))),2,any))#;tDen
                  if (lRat>linkRatio[k] | (lRat==linkRatio[k] & tDen>totalDen[k])){
                    linkRatio[k] <- lRat
                    medianRF[k] <- medGPos
                    totalDen[k] <- tDen
                    bestNum[k] <- bNum
                  } #if
                  indGood[indGood]=!indCand[indGood]#; rr[indC,][indGood,]
                } #while
              } #for k
              #totalDen;bestNum;medianRF;linkRatio
              compFrame <- data.frame(cbind(linkRatio,totalDen,bestNum,medianRF),row.names=names(iCand))
              if (any(linkRatio>=mO$ratioLim & totalDen>=mO$offspringLim)){
                expChrom <- as.numeric(iCand%in%altC[iName[1],1])#;expChrom
                rankFit <- order(linkRatio,totalDen,expChrom,decreasing=TRUE)#;rankFit  #NB! Favours exp. chrom.
                #partnerCandidates[[iName[1]]][[indMothers[j]]] <- compFrame[rankFit,]
                if (iCand[rankFit[1]]%in%altC[iName[1],1]){
                  iMerge <- paste(iName[1],'_Chrom',altC[iName[1],nHits],sep='')
                  indSCalls$m[iMerge,jm|j.fam] <- indSCalls$running[iMerge,jm|j.fam]+as.numeric(iName[2])-1
                  mergedCalls[iMerge,jm] <- paraCalls$m[i,jm]
                  if (any(!is.na(positionFemale[iMerge,j])))
                    stop('Attempt to overwrite positionFemale[iMerge,j]...')
                  positionFemale[iMerge,j] <- medianRF[rankFit[1]]
                } #if iCand
              } #if any linkRatio
            } #if any sumGoodC
          } #if nHits
        } #if sum
      } #if polymorphic
    } #for j
    
    for (j in 1:nFathers){    #Inheritance from fathers
      jf <- pData(BSSnp)$PedigreeID %in% paste(substr(indFathers[j],1,1),'000',substr(indFathers[j],2,4),'00',sep='')#; paraCalls$f[i,jf] %in% .5
      if (paraCalls$f[i,jf] %in% .5){    #Father j heterozygous in paralogue i
        iHetSnp <- assayData(BSSnp)$call[,jf] %in% .5   #Polymorphic markers
        j.fam <- pp %in% indFathers[j] &! jf
        iSamp <- !is.na(paraCalls$f[i,j.fam])
        iName <- strsplit(rownames(paraCalls$f)[i],'_Para')[[1]]
        nHits <- sum(!is.na(altC[iName[1],]))
        #sum(iSamp);nHits;filledIn[iName[1],jf]
        if (sum(iSamp)>=mO$offspringLim & nHits>0 &! filledIn[iName[1],jf]){
          snps <- as.matrix(inheritP$f[iHetSnp,j.fam][,iSamp],nrow=sum(iHetSnp))
          nDen <- apply(snps,1,function(x) sum(!is.na(x)))
          nNum <- apply(snps,1,function(x,p) max(sum(x==p,na.rm=TRUE),sum(x==1-p,na.rm=TRUE)),paraCalls$f[i,j.fam][iSamp])
          #rr <- cbind(lMap[iHetSnp,],nNum,nDen,Ratio=nNum/nDen)
          rr <- cbind(fData(BSSnp)[iHetSnp,c('Chromosome','Male')],nNum,nDen,Ratio=nNum/nDen)
          if (nHits==2){
            bestNum <- totalDen <- medianRF <- linkRatio <- rep(0,nHits)
            for (k in 1:nHits){   #Test both alternatives
              #indC <- rr$Ssa %in% paste('ssa',sprintf('%.02d',altC[iName[1],k]),sep='')#; rr[indC,]
              indC <- rr$Chromosome %in% altC[iName[1],k]#; rr[indC,]
              indGood <- rr[indC,'Ratio']>=mO$ratioLim & rr[indC,'nNum']>=mO$offspringLim#; rr[indC,][indGood,]
              while (any(indGood)){
                medGPos <- quantile(rr[indC,'Male'][indGood],na.rm=TRUE,probs=.5,type=1)#;medGPos #No averaging!
                indCand <- rr[indC,'Male']>medGPos-mO$rngLD & rr[indC,'Male']<medGPos+mO$rngLD#; rr[indC,][indCand,]
                lRat <- sum(rr[indC,'nNum'][indCand])/sum(rr[indC,'nDen'][indCand])#;lRat
                bNum <- max(rr[indC,'nNum'][indCand][rr[indC,'Ratio'][indCand]>=mO$ratioLim],na.rm=TRUE)#;bNum
                #snps[indC,][indCand,]
                tDen <- sum(apply(!is.na(matrix(matrix(snps[indC,],nrow=sum(indC))[indCand,],nrow=sum(indCand))),2,any))#;tDen
                if (lRat>linkRatio[k] | (lRat==linkRatio[k] & tDen>totalDen[k]) | (lRat==linkRatio[k] & tDen==totalDen[k] & bNum>bestNum[k])){
                  linkRatio[k] <- lRat
                  medianRF[k] <- medGPos
                  totalDen[k] <- tDen
                  bestNum[k] <- bNum
                } #if
                indGood[indGood]=!indCand[indGood]; rr[indC,][indGood,]
              } #while
            } #for k
            #totalDen;bestNum;medianRF;linkRatio
            if (any(linkRatio>=mO$ratioLim & totalDen>=mO$offspringLim) &! (diff(linkRatio)==0 & diff(totalDen)==0)){
              rankFit <- order(linkRatio,totalDen,decreasing=TRUE)#;rankFit
              iMerge <- paste(iName[1],'_Chrom',altC[iName[1],rankFit],sep='')#;iMerge
              iSingle1 <- which(names(iRunning) %in% iName[1])#;iSingle1
              iOrder1 <- c(iSingle1[as.numeric(iName[2])],iSingle1[-as.numeric(iName[2])])#;iOrder1
              indSCalls$f[iMerge,jf|j.fam] <- indSCalls$running[iOrder1,jf|j.fam]
              iSingle2 <- which(sub('_Para[12]','',rownames(paraCalls$f)) %in% iName[1])#;iSingle2
              iOrder2 <- c(iSingle2[as.numeric(iName[2])],iSingle2[-as.numeric(iName[2])])#;iOrder2
              mergedCalls[iMerge,jf] <- paraCalls$f[iOrder2,jf]
              if (any(!is.na(positionMale[iMerge,j])))
                stop('Attempt to overwrite positionMale[iMerge,j]...')
              positionMale[iMerge[1],j] <- medianRF[rankFit[1]]
          } #if any
          }else if(nHits==1){
            sumGoodC <- tapply(rr$Ratio>=mO$ratioLim & rr$nNum>=mO$offspringLim,rr$Chromosome,sum)#; sumGoodC
            sumGoodC <- sumGoodC[order(as.numeric(names(sumGoodC)))]   #order chromosomes increasingly
            if (any(sumGoodC)){
              iCand <- which(sumGoodC>0)
              totalDen <- bestNum <- medianRF <- linkRatio <- rep(0,length(iCand))
              for (k in 1:length(iCand)){   #Test all possible alternatives
                #indC <- rr$Ssa %in% paste('ssa',sprintf('%.02d',iCand[k]),sep='')#; rr[indC,]
                indC <- rr$Chromosome %in% iCand[k]
                indGood <- rr[indC,'Ratio']>=mO$ratioLim & rr[indC,'nNum']>=mO$offspringLim#; rr[indC,][indGood,]
                while (any(indGood)){
                  medGPos <- quantile(rr[indC,'Male'][indGood],na.rm=TRUE,probs=.5,type=1)#;medGPos #No averaging!
                  indCand <- rr[indC,'Male']>medGPos-mO$rngLD & rr[indC,'Male']<medGPos+mO$rngLD#; rr[indC,][indCand,]
                  lRat <- sum(rr[indC,'nNum'][indCand])/sum(rr[indC,'nDen'][indCand])#; lRat
                  bNum <- max(rr[indC,'nNum'][indCand][rr[indC,'Ratio'][indCand]>=mO$ratioLim],na.rm=TRUE)#; bNum
                  #snps[indC,][indCand,]
                  tDen <- sum(apply(!is.na(matrix(matrix(snps[indC,],nrow=sum(indC))[indCand,],nrow=sum(indCand))),2,any))#;tDen
                  if (lRat>linkRatio[k] | (lRat==linkRatio[k] & tDen>totalDen[k])){
                    linkRatio[k] <- lRat
                    medianRF[k] <- medGPos
                    totalDen[k] <- tDen
                    bestNum[k] <- bNum
                  } #if
                  indGood[indGood]=!indCand[indGood]; rr[indC,][indGood,]
                } #while
              } #for k
              #totalDen;bestNum;medianRF;linkRatio
              compFrame <- data.frame(cbind(linkRatio,totalDen,bestNum,medianRF),row.names=names(iCand))
              if (any(linkRatio>=mO$ratioLim & totalDen>=mO$offspringLim)){
                expChrom <- as.numeric(iCand%in%altC[iName[1],1])#;expChrom
                rankFit <- order(linkRatio,totalDen,expChrom,decreasing=TRUE)#;rankFit  #NB! Favours exp. chrom.
                #partnerCandidates[[iName[1]]][[indFathers[j]]] <- compFrame[rankFit,]
                if (iCand[rankFit[1]]%in%altC[iName[1],1]){
                  iMerge <- paste(iName[1],'_Chrom',altC[iName[1],nHits],sep='')
                  indSCalls$f[iMerge,jf|j.fam] <- indSCalls$running[iMerge,jf|j.fam]+as.numeric(iName[2])-1
                  mergedCalls[iMerge,jf] <- paraCalls$f[i,jf]
                  if (any(!is.na(positionMale[iMerge,j])))
                    stop('Attempt to overwrite positionMale[iMerge,j]...')
                  positionMale[iMerge,j] <- medianRF[rankFit[1]]
                } #if iCand
              } #if any linkRatio
            } #if any sumGoodC
          } #if nHits
        } #if sum
      } #if polymorphic
    } #for j
  } #for i

  #Fill in for offspring within each full-sib family
  indOffspring <- !substr(pData(BSSnp)$PedigreeID,8,9)%in%'00'
  indFullsib <- unique(substr(pData(BSSnp)$PedigreeID[indOffspring],1,7))
  nFullsib <- length(indFullsib)
  str1 <- 'Merging calls for full-sib family %d/%d'
  for (j in 1:nFullsib){
    if (j %in% seq(1,nFullsib,10))
      message(sprintf(str1,j,nFullsib))
    j.fam <- substr(pData(BSSnp)$PedigreeID,1,7)%in%indFullsib[j]
    indCurr <- substr(pData(BSSnp)$PedigreeID[j.fam][1],1,7)
    jm <- pData(BSSnp)$PedigreeID %in% paste(substr(indCurr,1,4),'00000',sep='')
    jf <- pData(BSSnp)$PedigreeID %in% paste(substr(indCurr,1,1),'000',substr(indCurr,5,7),'00',sep='')
    if (any(jm) & any(jf)){
      indM <- as.vector(indSCalls$m[,j.fam])
      indF <- as.vector(indSCalls$f[,j.fam])
      mergedCalls[,j.fam] <- (paraCalls$m[indM]+paraCalls$f[indF])/2
      pCalls <- mergedCalls[,c(which(jm),which(jf))]#;pCalls[1:10,]
      indAA <- apply(pCalls==0,1,sum)%in%2  #NB! The following may mask NA's which are due to ped-error
      mergedCalls[indAA,j.fam][is.na(mergedCalls[indAA,j.fam])] <- 0  #all AA
      indBB <- apply(pCalls==1,1,sum)%in%2
      mergedCalls[indBB,j.fam][is.na(mergedCalls[indBB,j.fam])] <- 1  #all BB
      indAB <- (pCalls[,1]%in%0 & pCalls[,2]%in%1) | (pCalls[,1]%in%1 & pCalls[,2]%in%0)
      mergedCalls[indAB,j.fam][is.na(mergedCalls[indAB,j.fam])] <- .5   #all AB
      for (k in 1:nNew){   #Improve speed by looping through MSV-5's instead
        pairInd <- rNames[markerNames%in%markerNames[k]]#;pairInd
        step <- which(pairInd%in%rNames[k])#;step
        #mergedCalls[pairInd,c(which(jm),which(jf),which(j.fam))]
        #assayData(BSRed)$call[markerNames[k],c(which(jm),which(jf),which(j.fam))]
        if (length(pairInd)==2){
          indNA <- is.na(mergedCalls[pairInd[step],j.fam]) &! is.na(mergedCalls[pairInd[-step],j.fam])#;indNA
          mergedCalls[k,j.fam][indNA] <- 2*assayData(BSRed)$call[markerNames[k],j.fam][indNA]-mergedCalls[pairInd[-step],j.fam][indNA]  #NB! Possibility of calls >1 or <0 ???
        } #if length
        indNA1 <- is.na(mergedCalls[k,j.fam])#;indNA1
        if (any(indNA1)){
          ii <- is.na(pCalls[k,])#;ii
          if (any(ii)){ # &step!=2
            sInd <- c(sub('Chrom[0-9XY]+','Para1',pairInd[1]),sub('Chrom[0-9XY]+','Para2',pairInd[1]))
            #paraCalls$m[sInd,c(which(jm),which(jf),which(j.fam))]
            #paraCalls$m[indSCalls$m[pairInd,jm]]
            rankFitM <- order(indSCalls$m[pairInd,jm])#;rankFitM
            if (ii[1] | length(pairInd)==1){
              iim <- apply(matrix(matrix(paraCalls$m[sInd,j.fam],nrow=length(pairInd))[,indNA1],nrow=length(pairInd)),2,function(x) diff(x)%in%0)
            }else{
              iim <- !is.na(paraCalls$m[sInd[1],j.fam][indNA1])
            } #if ii[1]
            #paraCalls$f[sInd,c(which(jm),which(jf),which(j.fam))]
            #paraCalls$f[indSCalls$f[pairInd,jf]]
            rankFitF <- order(indSCalls$f[pairInd,jf])#;rankFitF
            if (ii[2] | length(pairInd)==1){
              iif <- apply(matrix(matrix(paraCalls$f[sInd,j.fam],nrow=length(pairInd))[,indNA1],nrow=length(pairInd)),2,function(x) diff(x)%in%0)
            }else{
              iif <- !is.na(paraCalls$f[sInd[1],j.fam][indNA1])
            } #if ii[2]
            if (any(iif&iim) & step==2) stop('Cannot include step==1 -criterion...')
            mergedCalls[k,j.fam][indNA1][iim&iif] <- (paraCalls$m[sInd[rankFitM[step]],j.fam]+paraCalls$f[sInd[rankFitF[step]],j.fam])[iim&iif]/2
          }else{
            pp <- assayData(BSRed)$call[markerNames[k],c(which(jm),which(jf))]#;pp
            rngParents <- c(max(0,max(pp)-.5),min(1,min(pp)+.5))
            if (all(pp<=.25)){
              rngParents[2] <- sum(pp)
            }else if (all(pp>=.75)){
              rngParents[1] <- sum(pp)-1
            }
            #rngParents
            indLow <- assayData(BSRed)$call[markerNames[k],j.fam][indNA1] %in% rngParents[1]#;indLow
            indHigh <- assayData(BSRed)$call[markerNames[k],j.fam][indNA1] %in% rngParents[2]#;indHigh
            if (any(indLow|indHigh) & step==2) stop('Cannot include step==1 -criterion...')
            mergedCalls[k,j.fam][indNA1][indLow] <- max(pCalls[k,])-.5  #NB! Possibility of calls >1 or <0 ???
            mergedCalls[k,j.fam][indNA1][indHigh] <- min(pCalls[k,])+.5  #NB! Possibility of calls >1 or <0 ???
          } #if any ii
        } #if any indNA1
      } #for k
    } #if any
  } #for j
  x <- NULL
  x$x <- mergedCalls
  x$chromPairs <- altC
  x$positionFemale <- positionFemale
  x$positionMale <- positionMale
  x
}



preprocessBeadSet <- function(BSData,normInd,normOpts=setNormOptions(shearInf1=!is.null(normInd))){
  if ('noiseIntensity'%in%colnames(pData(BSData)))
    stop("Column 'noiseIntensity' found in phenoData. Will not pre-process.")
  if (normOpts$shearInf1){
    indTrain <- list()
    indRed <- grepl('10[0-9]',colnames(normInd))
    indTrain$'10r' <- apply(as.matrix(normInd[,indRed]),1,any)
    indGreen <- grepl('20[0-9]',colnames(normInd))
    indTrain$'20g' <- apply(as.matrix(normInd[,indGreen]),1,any)
    #indTrain$'10r' <- normInd[,'102']|normInd[,'101']  #Inf-I using red channel
    #indTrain$'20g' <- normInd[,'202']|normInd[,'201']  #Inf-I using green channel
  }else{
    indTrain <- rep(TRUE,nrow(BSData))
  } #end if
  BSData <- shearRawSignal(BSData,normOpts=normOpts,indTrain=indTrain)
  message('Estimating origin and noise-levels...')
  noiseDist <- getNoiseDistributions(BSData,normInd=normInd,normOpts=normOpts)
  message(sprintf('Transformation of signal: %s...',normOpts$trans))
  trChannel <- transformChannels(assayData(BSData)$R,assayData(BSData)$G,normOpts=normOpts)
  message(sprintf('Transformation of red SE: %s...',normOpts$trans))
  trSE.R <- transformSEs(assayData(BSData)$R,assayData(BSData)$se.R,normOpts=normOpts)
  message(sprintf('Transformation of green SE: %s...',normOpts$trans))
  trSE.G <- transformSEs(assayData(BSData)$G,assayData(BSData)$se.G,normOpts=normOpts)
  message(sprintf('Normalize channels: %s...',normOpts$method))
  trChannel <- normalizeShearedChannels(trChannel,noiseDist,normOpts=normOpts)
  assayData(BSData)$R <- trChannel$X
  assayData(BSData)$se.R <- trSE.R$SE
  assayData(BSData)$G <- trChannel$Y
  assayData(BSData)$se.G <- trSE.G$SE
  noiseData <- data.frame(noiseIntensity=normOpts$nSD*trChannel$X.SE,row.names=sampleNames(BSData))
  noiseMetadata <- data.frame(labelDescription=paste('Estimated red channel SD*',normOpts$nSD,sep=''),row.names=colnames(noiseData))
  noiseInt <- new('AnnotatedDataFrame',data=noiseData,varMetadata=noiseMetadata,dimLabels=dimLabels(phenoData(BSData)))
  phenoData(BSData) <- combine(phenoData(BSData),noiseInt)
  validObject(BSData)
  BSData
}



plotPreprocessing <- function(BSData,normInd,normOpts=setNormOptions(shearInf1=!is.null(normInd)),plotArray=1,...){
  pp <- par()
  par(mfrow=c(2,2),mai=c(.8,.8,.8,.4))
  if (normOpts$shearInf1){
    indTrain <- list()
    indRed <- grepl('10[0-9]',colnames(normInd))
    indTrain$'10r' <- apply(as.matrix(normInd[,indRed]),1,any)
    indGreen <- grepl('20[0-9]',colnames(normInd))
    indTrain$'20g' <- apply(as.matrix(normInd[,indGreen]),1,any)
    #indTrain$'10r' <- normInd[,'102']|normInd[,'101']  #Inf-I using red channel
    #indTrain$'20g' <- normInd[,'202']|normInd[,'201']  #Inf-I using green channel
  }else{
    indTrain <- rep(TRUE,nrow(BSData))
  } #end if
  scatterArrays(BSData,arrays=plotArray,smooth=TRUE,newFigure=FALSE,probeInd=1:nrow(BSData))
  BSsheared = shearRawSignal(BSData[,plotArray],plot=TRUE,newFigure=FALSE,normOpts=normOpts,indTrain=indTrain,verbose=TRUE)
  noiseDist = getNoiseDistributions(BSsheared,normInd=normInd,normOpts=normOpts,plot=TRUE,newFigure=FALSE)
  plotEstimatedNoise(BSsheared,noiseDist,normOpts=normOpts,newFigure=FALSE,xlab='R',ylab='G',...)
  par(pp[c('mfcol','mai')])
}



makeFilenames <- function(tag,normOpts,path='.'){
  if (!substr(path,nchar(path),nchar(path)) %in% '/'){
    path <- paste(path,'/',sep='')
  }
  intFile <- paste(path,'Int_',normOpts$transf,'_',normOpts$method,'_',tag,'.txt',sep='')
  thFile <- paste(path,'Theta_',normOpts$transf,'_',normOpts$method,'_',tag,'.txt',sep='')
  seFile <- paste(path,'SE_',normOpts$transf,'_',normOpts$method,'_',tag,'.txt',sep='')
  phFile <- paste(path,'Pheno_',normOpts$transf,'_',normOpts$method,'_',tag,'.txt',sep='')
  callFile <- paste(path,'Call_',normOpts$transf,'_',normOpts$method,'_',tag,'.txt',sep='')
  resFile <- paste(path,'Res_',normOpts$transf,'_',normOpts$method,'_',tag,'.txt',sep='')
  genoFile <- paste(path,'Alleles_',normOpts$transf,'_',normOpts$method,'_',tag,'.txt',sep='')
  dataFiles <- c(intFile=intFile,thFile=thFile,seFile=seFile,phFile=phFile,callFile=callFile,resFile=resFile,genoFile=genoFile)
  ind <- substr(dataFiles,nchar(path)+1,nchar(dataFiles)) %in% dir(path)
  if (any(ind)){
    OK = FALSE
    while (!OK){
      ovrwrt <- readline(prompt='Warning! Existing files may be overwritten. Continue? (yes/no) ')
      OK <- ovrwrt %in% c('yes','no')
    }
    if (ovrwrt %in% 'yes'){
      message('Saving to \'dataFiles\' may overwrite existing files!')
    }else{
      message("Please provide a unique tag for the filenames")
      dataFiles <- NULL
    }
  }
  dataFiles
}


modifyExistingFilenames <- function(dataFiles,oldtag,newtag){
  dataFiles['callFile'] <- sub(oldtag,newtag,dataFiles['callFile'])
  dataFiles['resFile'] <- sub(oldtag,newtag,dataFiles['resFile'])
  dataFiles['genoFile'] <- sub(oldtag,newtag,dataFiles['genoFile'])
  print(dataFiles)
  dataFiles
}



#Saves data with valid row/column names
writeAlleleSet <- function(dataFiles,BSRed,append=TRUE,sep='\t',quote=FALSE){
  sampleNames(BSRed) <- make.names(sampleNames(BSRed))
  featureNames(BSRed) <- make.names(featureNames(BSRed))
  re <- gregexpr('/',dataFiles)
  nDir <- length(re[[1]])
  path <- substr(dataFiles[[1]],1,re[[1]][nDir]-1)
  if (any(c('callFile','resFile','genoFile') %in% names(dataFiles)) & any(c('call','genotype') %in% names(assayData(BSRed)))){
    if ('callFile' %in% names(dataFiles)){
      fName <- substr(dataFiles[['callFile']],re[[1]][nDir]+1,nchar(dataFiles[['callFile']]))
      if (!fName %in% dir(path,fName)){
        message(sprintf("Writing calls to '%s'...",dataFiles['callFile']))
      }else if (append){
        message(sprintf("Appending calls to '%s'...",dataFiles['callFile']))
      }else{
        message(sprintf("Overwriting '%s'...",dataFiles['callFile']))
      }
      write.table(formatC(assayData(BSRed)$call,digits=2,format='f'),file=dataFiles['callFile'],append=append,sep=sep,quote=quote,col.names=!append)
    }
    if ('resFile' %in% names(dataFiles)){
      fName <- substr(dataFiles[['resFile']],re[[1]][nDir]+1,nchar(dataFiles[['resFile']]))
      if (!fName %in% dir(path,fName)){
        message(sprintf("Writing featureData to '%s'...",dataFiles['resFile']))
      }else if (append){
        message(sprintf("Appending featureData to '%s'...",dataFiles['resFile']))
      }else{
        message(sprintf("Overwriting '%s'...",dataFiles['resFile']))
      }
      write.table(format(fData(BSRed)),file=dataFiles['resFile'],append=append,sep=sep,quote=quote,col.names=!append)
    }
    if ('genoFile' %in% names(dataFiles)){
      fName <- substr(dataFiles[['genoFile']],re[[1]][nDir]+1,nchar(dataFiles[['genoFile']]))
      if (!fName %in% dir(path,fName)){
        message(sprintf("Writing genotypes to '%s'...",dataFiles['genoFile']))
      }else if (append){
        message(sprintf("Appending genotypes to '%s'...",dataFiles['genoFile']))
      }else{
        message(sprintf("Overwriting '%s'...",dataFiles['genoFile']))
      }
       write.table(formatC(assayData(BSRed)$genotype,digits=2,format='f'),file=dataFiles['genoFile'],append=append,sep=sep,quote=quote,col.names=!append)
    }
    iExtra = !names(dataFiles) %in% c('callFile','resFile','genoFile')
    if (any(iExtra)){
      message('Will NOT write the following data to file(s) (to force - save separately):')
      message(sprintf("'%s'\t",names(dataFiles)[iExtra]))
    }
  }else if (any(c('callFile','resFile','genoFile') %in% names(dataFiles))){
    message('No calls or genotypes available. No files written')
  }else if (any(c('intFile','thFile','seFile','phFile') %in% names(dataFiles))){
    if ('intFile' %in% names(dataFiles)){
      fName <- substr(dataFiles[['intFile']],re[[1]][nDir]+1,nchar(dataFiles[['intFile']]))
      if (!fName %in% dir(path,fName)){
        message(sprintf("Writing intensities to '%s'...",dataFiles['intFile']))
      }else if (append){
        message(sprintf("Appending intensities to '%s'...",dataFiles['intFile']))
      }else{
        message(sprintf("Overwriting '%s'...",dataFiles['intFile']))
      }
      write.table(formatC(t(assayData(BSRed)$intensity),format='f'),file=dataFiles['intFile'],append=append,sep=sep,quote=quote,col.names=!append)
    }
    if ('thFile' %in% names(dataFiles)){
      fName <- substr(dataFiles[['thFile']],re[[1]][nDir]+1,nchar(dataFiles[['thFile']]))
      if (!fName %in% dir(path,fName)){
        message(sprintf("Writing thetas to '%s'...",dataFiles['thFile']))
      }else if (append){
        message(sprintf("Appending thetas to '%s'...",dataFiles['thFile']))
      }else{
        message(sprintf("Overwriting '%s'...",dataFiles['thFile']))
      }
      write.table(formatC(t(assayData(BSRed)$theta),format='f'),file=dataFiles['thFile'],append=append,sep=sep,quote=quote,col.names=!append)
    }
    if ('seFile' %in% names(dataFiles)){
      fName <- substr(dataFiles[['seFile']],re[[1]][nDir]+1,nchar(dataFiles[['seFile']]))
      if (!fName %in% dir(path,fName)){
        message(sprintf("Writing standard errors to '%s'...",dataFiles['seFile']))
      }else if (append){
        message(sprintf("Appending standard errors to '%s'...",dataFiles['seFile']))
      }else{
        message(sprintf("Overwriting '%s'...",dataFiles['seFile']))
      }
      write.table(formatC(t(assayData(BSRed)$SE),format='f'),file=dataFiles['seFile'],append=append,sep=sep,quote=quote,col.names=!append)
    }
    if ('phFile' %in% names(dataFiles)){
      fName <- substr(dataFiles[['phFile']],re[[1]][nDir]+1,nchar(dataFiles[['phFile']]))
      if (!fName %in% dir(path,fName)){
        message(sprintf("Writing phenoData to '%s'...",dataFiles['phFile']))
      }else if (append){
        message(sprintf("Appending phenoData to '%s'...",dataFiles['phFile']))
      }else{
        message(sprintf("Overwriting '%s'...",dataFiles['phFile']))
      }
      write.table(format(pData(BSRed)),file=dataFiles['phFile'],append=append,sep=sep,quote=quote,col.names=!append)
    }
  }else{
    message('No valid file specified')
  }
}

Try the beadarrayMSV package in your browser

Any scripts or data that you put into this service are public.

beadarrayMSV documentation built on May 1, 2019, 6:33 p.m.