R/beadarrayMSV.R

Defines functions writeAlleleSet modifyExistingFilenames makeFilenames plotPreprocessing preprocessBeadSet assignParalogues plotCountsChrom locateParalogues resolveInheritanceSNP translateThetaFromFiles translateThetaCombined makeDiploidCalls translateTheta assignToAlleleSet getSingleCalls unmixParalogues setMergeOptions setGenoOptions setNormOptions validateSingleCall manualCall plotGenotypes countFailedSNP validateCallsPedigree testHardyWeinberg findPolyploidClusters findClusters getSpecificCenters getCenters generatePolyCenters findSeTheta callGenotypes.interactive callGenotypes.verboseTest callGenotypes createMultiSetFromFiles createAlleleSetFromFiles cart2pol createAlleleSet normalizeShearedChannels transformSEs plotEstimatedNoise transformChannels getNoiseDistributions normalizeIllumina shearRawSignal scatterArrays getNormInd readBeadSummaryOutput

Documented in assignParalogues assignToAlleleSet callGenotypes callGenotypes.interactive callGenotypes.verboseTest cart2pol countFailedSNP createAlleleSet createAlleleSetFromFiles createMultiSetFromFiles findClusters findPolyploidClusters findSeTheta generatePolyCenters getCenters getNoiseDistributions getNormInd getSingleCalls getSpecificCenters locateParalogues makeDiploidCalls makeFilenames manualCall modifyExistingFilenames normalizeIllumina normalizeShearedChannels plotCountsChrom plotEstimatedNoise plotGenotypes plotPreprocessing preprocessBeadSet readBeadSummaryOutput resolveInheritanceSNP scatterArrays setGenoOptions setMergeOptions setNormOptions shearRawSignal testHardyWeinberg transformChannels transformSEs translateTheta translateThetaCombined translateThetaFromFiles unmixParalogues validateCallsPedigree validateSingleCall writeAlleleSet

#New class-definitions
setClass('BeadSetIllumina',contains='eSet', prototype =
         prototype(new("VersionedBiobase",
                       versions = c(classVersion("eSet"), BeadSetIllumina = "1.0.0"))))
setMethod("initialize", "BeadSetIllumina",
          function(.Object,
                   assayData = assayDataNew(R = R, se.R = se.R, G = G, se.G = se.G,
                     no.beads = no.beads, storage.mode = 'list'),
                   R = new("matrix"), se.R = new("matrix"), G = new("matrix"),
                   se.G = new("matrix"), no.beads = new("matrix"), ...) {
            if (!missing(assayData) &&
                any(!missing(R),!missing(G),!missing(se.R),!missing(se.G),!missing(no.beads))){
              warning("Using 'assayData'; ignoring 'R', 'G', 'se.R', 'se.G', 'no.beads'")
            }
            callNextMethod(.Object, assayData = assayData, ...)
          })
setValidity("BeadSetIllumina", function(object) {
  val <- assayDataValidMembers(assayData(object), c("R", "se.R", "G", "se.G", "no.beads"))
  if (is.logical(val)){
    val <- storageMode(object) %in% "list"
    if (!val){
      val <- "\nRequired storageMode: 'list'"
    }
  }
  val
})

setClass('AlleleSetIllumina',contains='eSet', prototype =
         prototype(new("VersionedBiobase",
                       versions = c(classVersion("eSet"), AlleleSetIllumina = "1.0.0"))))
setMethod("initialize", "AlleleSetIllumina",
          function(.Object, intensity = new("matrix"), theta = new("matrix"),
                   SE = new("matrix"), ...) {
            callNextMethod(.Object, intensity = intensity, theta = theta, SE = SE,
                           storage.mode = 'list', ...)
          })
setValidity("AlleleSetIllumina", function(object) {
  val <- assayDataValidMembers(assayData(object), c("intensity", "theta", "SE"))
  if (is.logical(val)){
    val <- storageMode(object) %in% "list"
    if (!val){
      val <- "\nRequired storageMode: 'list'"
    }
  }
  val
})



#Reads iScan's summary files. Uses code from 'readIllumina()' {beadarray}
readBeadSummaryOutput <- function(arrayNames=NULL,path='.',pattern='beadTypeFile.txt',recursive=FALSE,sep=',',fullPaths=NULL,sepchar='_',prList=NULL){
  #Locate files
  if (is.null(fullPaths))
    fullPaths <- dir(path=path,pattern=pattern,recursive=recursive)
  if (length(fullPaths) == 0) 
    stop(paste("No files with extension", pattern, "found"))
  tmp <- unlist(strsplit(fullPaths,'/'))
  nRec <- length(tmp)/length(fullPaths)
  indNames <- vector(length=length(tmp))
  indNames[seq(nRec,length(tmp),nRec)] <- TRUE
  rFiles <- tmp[indNames]
  rPaths <- rep('',length(fullPaths))
  if (length(grep('/',fullPaths)) == length(fullPaths)){
    if (nRec>1){
      for (i in 1:(nRec-1)){
        indi <- seq(i,length(tmp),nRec)
        rPaths <- paste(rPaths,tmp[indi],'/',sep='')
      }
    }
  }
  arrays <- strtrim(rFiles, nchar(rFiles) - nchar(pattern) - 1)
  if (!is.null(arrayNames)) {
    arrayNames <- gsub(paste('_',pattern,sep=''), "", arrayNames)
    ind <- which(arrays %in% arrayNames)
    if (length(ind) == 0) 
      stop("'arrayNames' did not match with files in specified path(s)")
    arrays <- arrays[ind]
    rFiles <- rFiles[ind]
    rPaths <- rPaths[ind]
  }
  nArrays <- length(arrays)
  message('Found ', nArrays, ' arrays')
  
  #Prepare phenoData
  arrayInfo <- data.frame(arrayNames=as.character(arrays),no.beads=rep(0,nArrays),stringsAsFactors=FALSE,row.names=arrays)
  if (length(grep(sepchar,arrays)) == nArrays){
    tmp <- unlist(strsplit(arrays,sepchar))
    if (length(tmp) == nArrays*3){
      arrayInfo$chip <- tmp[seq(1, length(tmp), by=3)]
      arrayInfo$row <- tmp[seq(2, length(tmp), by=3)]
      arrayInfo$col <- tmp[seq(3, length(tmp), by=3)]
    }else{
      arrayInfo$chip <- tmp[seq(1, length(tmp), by=2)]
      arrayInfo$row <- tmp[seq(2, length(tmp), by=2)]
      arrayInfo$col <- rep(1, length(arrayInfo$chip))
    }
  }
  labelDesc <- c('Array ID','No. detected beads','Chip ID','Array ID within chip','Strip ID within array')
  phenoMetadata <- data.frame(labelDescription=labelDesc,row.names=colnames(arrayInfo))
  
  #Read assayData
  filesFull <- file.path(path, paste(rPaths,rFiles,sep=""))
  fc <- file(filesFull[1],open="r")
  header <- strsplit(readLines(fc,n=1),sep)
  if (!all(unlist(header)==c("Illumicode","N","Mean GRN","Dev GRN","Mean RED","Dev RED")))
    stop(paste('Unknown fields in header:',header))
  dat <- scan(file=fc,sep=sep,quiet=TRUE,
              what=list(ProbeID=integer(0),no.beads=integer(0),G=integer(0),sd.G=integer(0),
                R=integer(0),sd.R=integer(0)))
  close(fc)
  if (is.null(prList)){
    prList <- dat$ProbeID
  }
  if (length(prList)>length(dat$ProbeID)){
    stop(paste('The array',arrays[1],'contains fewer probes than specified in "prList"'))
  }else if (length(prList)<length(dat$ProbeID)){
    warning(paste('The array',arrays[1],'contains more probes than specified in "prList"'))
  }
  indP <- dat$ProbeID %in% prList #sapply(prList,function(x,ProbeID) which(ProbeID%in%x)[1],dat$ProbeID)
  if (!identical(prList,dat$ProbeID[indP])){
    stop(paste('The markers in array',arrays[1],'deviate from those in "prList"\n or are sorted differently.'))
  }
  nProbes <- length(prList)
  G <- se.G <- R <- se.R <- no.beads <-
    matrix(nrow=nProbes,ncol=nArrays,dimnames=list(prList,arrays))
  G[,1] <- dat$G[indP]
  se.G[,1] <- dat$sd.G[indP]/sqrt(dat$no.beads[indP])
  R[,1] <- dat$R[indP]
  se.R[,1] <- dat$sd.R[indP]/sqrt(dat$no.beads[indP])
  no.beads[,1] <- dat$no.beads[indP]
  if (nArrays>1){
    for (i in 2:nArrays){
      fc <- file(filesFull[i],open="r")
      dat <- scan(file=fc,sep=sep,quiet=TRUE,skip=1,
        what=list(ProbeID=integer(0),no.beads=integer(0),G=integer(0),sd.G=integer(0),
          R=integer(0),sd.R=integer(0)))
      close(fc)
      if (identical(prList,dat$ProbeID)){
        indP <- rep(TRUE,nProbes)
      }else{
        indP <- dat$ProbeID %in% prList
        if (!identical(prList,dat$ProbeID[indP])){
          stop(paste('Different probe-sets!\n',length(prList),'probes in "prList",',length(dat$ProbeID),'probes in array',arrays[i]))
        }
        warning(paste('The array',arrays[i],'contains more probes than specified in "prList"'))
      }
      G[,i] <- dat$G[indP]
      se.G[,i] <- dat$sd.G[indP]/sqrt(dat$no.beads[indP])
      R[,i] <- dat$R[indP]
      se.R[,i] <- dat$sd.R[indP]/sqrt(dat$no.beads[indP])
      no.beads[,i] <- dat$no.beads[indP]
    }
  }
  arrayInfo$no.beads <- apply(no.beads,2,sum)
  phenoData = new("AnnotatedDataFrame", data=arrayInfo, varMetadata=phenoMetadata)
  featureInfo <- data.frame(Address=prList,row.names=prList)
  featureMetadata <- data.frame(labelDescription="Unique strand identifier",row.names=colnames(featureInfo))
  featureData = new("AnnotatedDataFrame", data=featureInfo, varMetadata=featureMetadata)
  
  #Create summary data
  BSData = new('BeadSetIllumina', R=R, se.R=se.R, G=G, se.G=se.G, no.beads=no.beads,
    phenoData=phenoData, annotation='illuminaProbeIDs',
    featureData=featureData)
  sampleNames(BSData) <- make.names(sampleNames(BSData))
  featureNames(BSData) <- make.names(featureNames(BSData))
  rm(G,R,se.G,se.R,no.beads,arrayInfo,featureInfo,phenoData,featureData); gc()
  BSData
}



#Retrieves indexes to probes corresponding to specified Norm.ID's
getNormInd <- function(beadInfo,featureNames,normID=NULL,verbose=TRUE){
  if (is.null(normID))
    normID <- unique(beadInfo$Norm.ID)   #use all
  normInd <- matrix(FALSE,nrow=length(featureNames),ncol=length(normID),
                    dimnames=list(sub('X','',featureNames),normID))
  for (i in 1:length(normID)){
    indi <- beadInfo$Norm.ID==normID[i]
    probeIDs <- beadInfo$Address[indi]
    if (nchar(normID[i])>1)
      probeIDs <- c(probeIDs,beadInfo$Address2[indi])
    normInd[as.character(probeIDs),i] <- TRUE
  }
  rownames(normInd) <- featureNames
  if (verbose){
    cat('Sum Norm.IDs:\n')
    print(apply(normInd,2,sum))
  }
  normInd
}



#Plots A vs. B, or R vs. G, depending on BSData
scatterArrays <- function(BSData,arrays,markers=seq(1,nrow(BSData),5),smooth=TRUE,newFigure=TRUE,maxPlots=72,...){
  an <- sampleNames(BSData)[arrays]
  nSelect <- length(an)
  if (newFigure){
    if (nSelect>maxPlots)
      stop("Number of arrays for plotting exceeds 'maxPlots'")
    pp <- par()
    nC <- ceiling(sqrt(nSelect))
    nR <- ceiling(nSelect/nC)
    par(mfrow=c(nR,nC),mai=c(1.2,1,1,.5)/sqrt(nR),...)
  }else if (nSelect>1){
    stop("Only one array allowed when 'newFigure'==FALSE")
  }
  if (all(c('A','B') %in% names(assayData(BSData)))){
    x <- 'A'
    y <- 'B'
  }else if (all(c('R','G') %in% names(assayData(BSData)))){
    x <- 'R'
    y <- 'G'
  }else{
    stop('Cartesian coordinate intensities required in assayData.')
  }
  for (i in 1:nSelect){
    if (smooth){
      #require('geneplotter')
      smoothScatter(assayData(BSData)[[x]][markers,an[i]],assayData(BSData)[[y]][markers,an[i]],main=an[i],xlab=x,ylab=y)
    }else{
      plot(assayData(BSData)[[x]][markers,an[i]],assayData(BSData)[[y]][markers,an[i]],main=an[i],xlab=x,ylab=y,pch='.')
    }
    xy <- range(par('usr'))
    lines(xy,xy,col='green')
    lines(xy,c(0,0),col='green')
    lines(c(0,0),xy,col='green')
  }
  if (newFigure){
    par(pp[c('mfcol','mai')])
  }
}



#Rotate and shear BEFORE any other transf.
shearRawSignal <- function(BSData,plot=FALSE,newFigure=plot,normOpts=setNormOptions(),maxPlots=72,...){
  nArrays <- ncol(BSData)
  if (newFigure){
    if (nArrays>maxPlots)
      stop("Number of arrays for plotting exceeds 'maxPlots'")
    pp <- par()
    nC <- ceiling(sqrt(nArrays))
    nR <- ceiling(nArrays/nC)
    par(mfrow=c(nR,nC),mai=c(1,1,1,.5)/sqrt(nC))
  }else if (plot & nArrays>1){
    stop("Only one array allowed when 'plot'==TRUE and 'newFigure'==FALSE")
  }
  message('Rotating and shearing raw signal...')
  for (i in 1:nArrays){
    rawData <- list()
    rawData$x <- assayData(BSData)$R[,i]
    rawData$y <- assayData(BSData)$G[,i]
    trData <- normalizeIllumina(rawData,normOpts=normOpts,plot=plot,...)
    assayData(BSData)$R[,i] <- trData$x
    assayData(BSData)$G[,i] <- trData$y
    gc()
  }
  if (newFigure) par(pp[c('mfcol','mai')])
  validObject(BSData)
  BSData
}



#If 'indTrain' is a list containing indexes to red and green infinium-I
#probes, lines are fitted through these data-points only. Otherwise, same
#probes are used for finding both red and grean shear. Default: all probes.
#NB! Be aware of how xBase and yBase is calculated
normalizeIllumina <- function(rawData,indTrain=rep(TRUE,length(rawData$x)),normOpts=setNormOptions(),plot=FALSE,xylim=NULL,verbose=FALSE){
  if (is.list(indTrain)){ #2-element list with indexes to infinium-I probes
    xTrainR <- rawData$x[indTrain$'10r']
    xTrainG <- rawData$x[indTrain$'20g']
    yTrainR <- rawData$y[indTrain$'10r']
    yTrainG <- rawData$y[indTrain$'20g']
    iInorm <- TRUE
  }else{
    xTrainR <- xTrainG <- rawData$x[indTrain]
    yTrainR <- yTrainG <- rawData$y[indTrain]
    iInorm <- FALSE
  }
    
  #Fit line through AA
  rngx <- range(xTrainR,na.rm=TRUE)
  step <- diff(rngx)/(normOpts$nBins-1)
  breaks <- seq(rngx[1],rngx[2],step)
  midsx <- breaks[-normOpts$nBins] + step/2
  xBase <- rep(NA,length=normOpts$nBins-1)
  for (i in 1:(normOpts$nBins-1)){
    indi <- xTrainR>breaks[i] & xTrainR<breaks[i+1]
    if (sum(indi,na.rm=T)>normOpts$minSize){
      if (iInorm){
        xBase[i] <- quantile(yTrainR[indi],prob=normOpts$prob,na.rm=TRUE)  #Best for inf-I
      }else{
        xBase[i] <- quantile(sort(yTrainR[indi])[1:normOpts$minSize],prob=normOpts$prob,na.rm=TRUE)
      }
    }
  }
  if (sum(!is.na(xBase))>2){
    abx <- coef(lm(xBase~midsx))
  }else{
    abx <- rep(NA,2)
    names(abx) <- c('(Intercept)','midsx')
  }
  
  #Fit line through BB 
  rngy <- range(yTrainG,na.rm=TRUE)
  step <- diff(rngy)/(normOpts$nBins-1)
  breaks <- seq(rngy[1],rngy[2],step)
  midsy <- breaks[-normOpts$nBins] + step/2
  yBase <- rep(NA,length=normOpts$nBins-1)
  for (i in 1:(normOpts$nBins-1)){
    indi <- yTrainG>breaks[i] & yTrainG<breaks[i+1]
    if (sum(indi,na.rm=T)>normOpts$minSize){
      if (iInorm){
        yBase[i] <- quantile(xTrainG[indi],prob=normOpts$prob,na.rm=TRUE)
      }else{
        yBase[i] <- quantile(sort(xTrainG[indi])[1:normOpts$minSize],prob=normOpts$prob,na.rm=TRUE)
      }
    }
  }
  if (sum(!is.na(yBase))>2){
    aby <- coef(lm(yBase~midsy))
    aby <- c(-aby[1]/aby[2],1/aby[2])
  }else{
    aby <- rep(NA,2)
    names(aby) <- c('(Intercept)','midsy')
  }
  
  #Rotate and shear 
  normData <- rawData
  normData$x0 <- (aby[1]-abx[1])/(abx[2]-aby[2])
  normData$y0 <- abx[1] + abx[2]*normData$x0
  normData$x <- normData$x - normData$x0
  normData$y <- normData$y - normData$y0
  xx <- cbind(c(1,1),c(0,1))
  xAngle <- atan2(diff(xx%*%abx),1)
  yAngle <- atan2(diff(xx%*%aby),1)
  tmpX2 <- cos(xAngle)*normData$x + sin(xAngle)*normData$y
  normData$y <- -sin(xAngle)*normData$x + cos(xAngle)*normData$y
  normData$x <- tmpX2-normData$y/tan(yAngle-xAngle)
  
  #Plot input data with fitted lines
  if (plot){
    if (is.null(xylim)){
      plot(rawData$x,rawData$y,pch='.',main=paste('nBins=',normOpts$nBins,', prob quantile=',normOpts$prob),xlab='R',ylab='G')
      if (iInorm){
        points(xTrainR,yTrainR,pch='.',col='red')
        points(xTrainG,yTrainG,pch='.',col='green')
      }else{
        points(rawData$x[-which(indTrain)],rawData$y[-which(indTrain)],pch='.',col='red')
      }
      xylim <- par('usr')
    }else{
      plot.new()
      plot.window(xylim[1:2],xylim[3:4])
      axis(1); axis(2)
      points(rawData$x,rawData$y,pch='.',main=paste('nBins=',normOpts$nBins,', prob quantile=',normOpts$prob),xlab='R',ylab='G')
      if (iInorm){
        points(xTrainR,yTrainR,pch='.',col='red')
        points(xTrainG,yTrainG,pch='.',col='green')
      }else{
        points(rawData$x[-which(indTrain)],rawData$y[-which(indTrain)],pch='.',col='red')
      }
   }
    xy <- range(xylim)
    points(midsx,xBase,col='blue',pch=16)
    lines(xy,abx[1]+xylim[1:2]*abx[2],col='red')
    points(yBase,midsy,col='blue',pch=16)
    lines(xy,aby[1]+xy*aby[2],col='green')
    #points(x0,y0,col='blue',pch=16)
    lines(xy,rep(normData$y0,2),col='blue')
    lines(rep(normData$x0,2),xy,col='blue')
  }
  if (verbose){
    message(sprintf('\tOffset\tSlope'))
    message(sprintf('R:\t%.4g\t%.4g',abx[1],abx[2]))
    message(sprintf('G:\t%.4g\t%.4g',aby[1],aby[2]))
    #message(sprintf('%.4g\t',aby))
  }
  normData
}



#Returns a matrix of parametrized noise for each array
getNoiseDistributions <- function(BSData,subBeadPool=NULL,normInd,normOpts=setNormOptions(),plot=FALSE,newFigure=plot,maxPlots=72,xlim=NULL,...){
  nArrays <- ncol(BSData)
  if (newFigure){
    if (nArrays>maxPlots)
      stop("Number of arrays for plotting exceeds 'maxPlots'")
    pp <- par()
    nC <- ceiling(sqrt(nArrays))
    nR <- ceiling(nArrays/nC)
    par(mfrow=c(nR,nC),mai=c(1,1,1,.5)/sqrt(nC))
  }else if (plot & nArrays>1){
    stop("Only one array allowed when 'plot'==TRUE and 'newFigure'==FALSE")
  }
  an <- sampleNames(BSData)
  
  sPools <- colnames(normInd)
  if (is.null(subBeadPool))
    subBeadPool <- sPools[nchar(sPools)==1]
  #b10x <- normInd[,'101']
  #b20x <- normInd[,'201']
  #if (length(subBeadPool)>1){
    #for (i in 2:length(subBeadPool)){
      #b10x <- b10x | normInd[,as.character(100+i)]
      #b20x <- b20x | normInd[,as.character(200+i)]
    #}
  #}
  b10x <- normInd[,as.character(100+as.numeric(subBeadPool[1]))]
  b20x <- normInd[,as.character(200+as.numeric(subBeadPool[1]))]
  for (i in subBeadPool[-1]){
    b10x <- b10x | normInd[,as.character(100+as.numeric(i))]
    b20x <- b20x | normInd[,as.character(200+as.numeric(i))]
  }
  
  noiseDist <- matrix(nrow=nArrays,ncol=4,dimnames=list(an,c('medianR','medianG','madR','madG')))
  trChannel <- transformChannels(assayData(BSData)$R[b20x,],assayData(BSData)$G[b10x,],normOpts=normOpts)
  noiseDist[,'medianR'] <- apply(trChannel$X,2,median,na.rm=TRUE)
  noiseDist[,'medianG'] <- apply(trChannel$Y,2,median,na.rm=TRUE)
  noiseDist[,'madR'] <- apply(trChannel$X,2,mad,na.rm=TRUE)
  noiseDist[,'madG'] <- apply(trChannel$Y,2,mad,na.rm=TRUE)
  if (plot){
    if (is.null(xlim)){
      meanD <- apply(noiseDist,2,mean,na.rm=TRUE)
      xlim <- c(floor(min(meanD[1:2]-4*max(meanD[3:4]))),ceiling(max(meanD[1:2]+4*max(meanD[3:4]))))
    }
    for (i in 1:nArrays){
      hx <- hist(trChannel$X[,i],breaks=normOpts$breaks,plot=FALSE)
      hy <- hist(trChannel$Y[,i],breaks=normOpts$breaks,plot=FALSE)
      plot(hx$mids,hx$density,type='l',main=paste('Noise level,',an[[i]]),xlab='',ylab='',col='gray',xlim=xlim,...)
      lines(hx$mids,dnorm(hx$mids,noiseDist[i,1],noiseDist[i,3]),col='red')
      lines(hy$mids,hy$density,col='gray')
      lines(hy$mids,dnorm(hy$mids,noiseDist[i,2],noiseDist[i,4]),col='seagreen')
    }
  }
  if (newFigure){par(pp[c('mfcol','mai')])}
  noiseDist
}



#Transform BSData
transformChannels <- function(X,Y=NULL,normOpts=setNormOptions()){
  str1 <- '%d of %d %s-intensities below %d set to NA'
  switch(normOpts$transf,
         none={lstr=' '},
         log={
           X[X<=-normOpts$offset] <- NA
           message(sprintf(str1,sum(is.na(X)),length(X),'X',-normOpts$offset))
           X <- log2(X+normOpts$offset)
           lstr <- 'Log2'
           if (normOpts$offset!=0)
             lstr <- paste(lstr,normOpts$offset,'+')
           if (!is.null(Y)){
             Y[Y<=-normOpts$offset] <- NA
             message(sprintf(str1,sum(is.na(Y)),length(Y),'Y',-normOpts$offset))
             Y <- log2(Y+normOpts$offset)
           }},
         root={
           X[X<=-normOpts$offset] <- NA
           message(sprintf(str1,sum(is.na(X)),length(X),'X',-normOpts$offset))
           X <- (X+normOpts$offset)^(1/normOpts$nthRoot)
           lstr <- paste('nth-root (n=',normOpts$nthRoot,')',sep='')
           if (normOpts$offset!=0)
             lstr <- paste(lstr,', ',normOpts$offset,' +',sep='')
           if (!is.null(Y)){
             Y[Y<=-normOpts$offset] <- NA
             message(sprintf(str1,sum(is.na(Y)),length(Y),'Y',-normOpts$offset))
             Y <- (Y+normOpts$offset)^(1/normOpts$nthRoot)
           }}
         )
  trChannel <- list()
  trChannel$X <- as.matrix(X)
  trChannel$Y <- as.matrix(Y)
  trChannel$lstr <- lstr
  trChannel
}



#Scatter-plot with origin and noise indicated
#NB! 'noiseDist' must be transformed according to 'normOpts'. normInd = single index-vector
plotEstimatedNoise <- function(BSData,noiseDist,normInd=rep(TRUE,nrow(BSData)),normOpts=setNormOptions(),newFigure=TRUE,maxPlots=72,...){
  if (!identical(sampleNames(BSData),rownames(noiseDist)))
    stop("'BSData' and 'noiseDist' contain different samples")
  if (newFigure){
    if (ncol(BSData)>maxPlots)
      stop("Number of arrays for plotting exceeds 'maxPlots'")
    pp <- par()
    nC <- ceiling(sqrt(ncol(BSData)))
    nR <- ceiling(ncol(BSData)/nC)
    par(mfrow=c(nR,nC),mai=c(1,1,1,.5)/sqrt(nC))
  }else if (ncol(BSData)>1){
    stop("Only one array allowed when 'newFigure'==FALSE")
  }
  an <- sampleNames(BSData)
  for (i in 1:ncol(BSData)){
    trData <- transformChannels(assayData(BSData)$R[normInd,an[i]],assayData(BSData)$G[normInd,an[i]],normOpts=normOpts)
    plot(trData$X,trData$Y,pch='.',main=paste(an[i],', nSD =',normOpts$nSD),...)
    xy <- range(par('usr'))
    lines(rep(noiseDist[an[i],'medianR'],2),xy,col='green')
    lines(xy,rep(noiseDist[an[i],'medianG'],2),col='green')
    points(c(noiseDist[an[i],'medianR']-2*noiseDist[an[i],'madR'],
             noiseDist[an[i],'medianR']+2*noiseDist[an[i],'madR']),
           rep(noiseDist[an[i],'medianG'],2),type='b',pch=16,col='red')
    points(rep(noiseDist[an[i],'medianR'],2),
           c(noiseDist[an[i],'medianG']-2*noiseDist[an[i],'madG'],
             noiseDist[an[i],'medianG']+2*noiseDist[an[i],'madG']),type='b',pch=16,col='red')
  }
  if (newFigure) par(pp[c('mfcol','mai')])
}



#Transform SE's using a first order Taylor-expansion around the mean
transformSEs <- function(X,se.X,normOpts=setNormOptions()){
  str1 <- '%d of %d intensities below %d set to NA'
  switch(normOpts$transf,
         none={lstr=' '},
         log={
           se.X[X<=-normOpts$offset] <- NA
           message(sprintf(str1,sum(is.na(se.X)),length(se.X),-normOpts$offset))
           se.X <- se.X/(X+normOpts$offset)/log(2)
           lstr <- 'Log2'
           if (normOpts$offset!=0){
             warning('transformSEs() not properly tested for transf==\'log\' and offset!=0')
             lstr <- paste(lstr,normOpts$offset,'+')
           }
         },
         root={
           se.X[X<=-normOpts$offset] <- NA
           message(sprintf(str1,sum(is.na(se.X)),length(se.X),-normOpts$offset))
           se.X <- se.X*(X+normOpts$offset)^((1-normOpts$nthRoot)/normOpts$nthRoot)/normOpts$nthRoot
           lstr <- paste('nth-root (n=',normOpts$nthRoot,')',sep='')
           if (normOpts$offset!=0)
             lstr <- paste(lstr,', ',normOpts$offset,' +',sep='')
         })
  trSE <- list()
  trSE$SE <- se.X
  trSE$lstr <- lstr
  trSE
}



#Centres and scales based on percentile, assumes already rotated and sheared
normalizeShearedChannels <- function(trChannel,noiseDist,normOpts=setNormOptions()){#,b101=NULL,b201=NULL){
  nProbes <- nrow(trChannel$X)
  trChannel$X0 <- noiseDist[,'medianR']
  trChannel$Y0 <- noiseDist[,'medianG']
  trChannel$X.SE <- noiseDist[,'madR']
  trChannel$Y.SE <- noiseDist[,'madG']
  trChannel$X <- trChannel$X - matrix(1,nProbes,1)%*%trChannel$X0
  trChannel$Y <- trChannel$Y - matrix(1,nProbes,1)%*%trChannel$Y0
  
  switch(normOpts$method,
         none={trChannel$method <- 'The channels are not normalized'},
         quantNorm={   #Quantile normalization
           #require('limma')
           #iX <-  apply(trChannel$X,2,function(x) all(!is.na(x)))
           #iY <-  apply(trChannel$Y,2,function(x) all(!is.na(x)))
           iX <-  apply(trChannel$X,2,function(x) sum(!is.na(x))>1)
           iY <-  apply(trChannel$Y,2,function(x) sum(!is.na(x))>1)
           for (i in which(iX & iY)){
             normXY <- normalizeQuantiles(cbind(trChannel$X[,i],trChannel$Y[,i]))
             trChannel$X[,i] <- normXY[,1]
             trChannel$Y[,i] <- normXY[,2]
           }
           #trChannel$X.SE <- trChannel$Y.SE <- sqrt(trChannel$X.SE^2+trChannel$Y.SE^2)/2   #NB!
           trChannel$method <- 'The data have been quantile normalized (SE\'s not modified)'
         },
         medianAF={  #Scales red channel such that median(AF) is close to 0.5
           message('Median(AF)-estimates for a few arrays:')
           nDisp <- min(4,ncol(trChannel$X))
           message(sprintf('%s\t',colnames(trChannel$X)[1:nDisp]))
           for (rep in 1:8){
             PAF <- trChannel$X/(trChannel$X+trChannel$Y)
             medianAF <- apply(PAF,2,median,na.rm=TRUE)
             message(sprintf('%.6f\t',medianAF[1:nDisp]))
             trChannel$X <- trChannel$X/matrix(1,nProbes,1)%*%medianAF/2
             trChannel$X.SE <- trChannel$X.SE/medianAF/2
             #gc()
           }
           trChannel$method <- 'The red channel scaled such that median(AF) is close to 0.5'
         },
         linPeak={  #Scales each channel by its scale-th quantile
           trChannel$XScale <- apply(trChannel$X,2,quantile,normOpts$scale,na.rm=TRUE)
           trChannel$YScale <- apply(trChannel$Y,2,quantile,normOpts$scale,na.rm=TRUE)
           trChannel$X <- trChannel$X/matrix(1,nProbes,1)%*%trChannel$XScale
           trChannel$Y <- trChannel$Y/matrix(1,nProbes,1)%*%trChannel$YScale
           trChannel$X.SE <- trChannel$X.SE/trChannel$XScale
           trChannel$Y.SE <- trChannel$Y.SE/trChannel$YScale
           trChannel$method <- paste('Both channels scaled using the ',normOpts$scale,'-th quantile',sep='')
         },
         #linHomo={  #Scales each channel using quantile of homozygotes
         #  stop('linHomo not implemented for matrices')
         #  trChannel$XScale <- quantile(trChannel$X[b101],normOpts$scale,na.rm=TRUE)
         #  trChannel$YScale <- quantile(trChannel$Y[b201],normOpts$scale,na.rm=TRUE)
         #  trChannel$X <- trChannel$X/trChannel$XScale
         #  trChannel$Y <- trChannel$Y/trChannel$YScale
         #  trChannel$X.SE <- trChannel$X.SE/trChannel$XScale
         #  trChannel$Y.SE <- trChannel$Y.SE/trChannel$YScale
         #  trChannel$method <- paste('Each channel scaled using the ',normOpts$scale,'-th quantile of its infinium I homozygotes',sep='')
         #}
  )
  trChannel
}



#Merge BSData (R/G) into BSRed (A/B)
#beadInfo must contain 'Name', 'SNP', 'ILMN.Strand', 'Address', 'Address2', 'Norm.ID'
createAlleleSet <- function(BSData,beadInfo,normOpts,includeAB=FALSE){
  nSNP <- nrow(beadInfo)
  sPools <- unique(beadInfo$Norm.ID)
  subBeadPool <- sPools[nchar(sPools)==1]
  A <- B <- Var.pooled <- Min.Beads <- 
    matrix(nrow=nSNP,ncol=ncol(BSData),dimnames=list(rownames(beadInfo),sampleNames(BSData)))
  
  #Addresses are sorted ascendingly
  allProbes <- featureData(BSData)[[1]]
  indA1 <- which(is.element(allProbes,beadInfo$Address))
  indA2 <- which(is.element(allProbes,beadInfo$Address2))
  #Addresses ordered to follow beadInfo
  sortAdd1 <- sort(beadInfo$Address,index.return=TRUE)
  unSortAdd1 <- sort(sortAdd1$ix,index.return=TRUE)  #same as 'order()'
  Add2 <- beadInfo$Address2
  Add2[Add2==0] <- NA
  sortAdd2 <- sort(Add2,index.return=TRUE)
  unSortAdd2 <- sort(sortAdd2$ix[1:length(indA2)],index.return=TRUE)
  #Indexes used with 'Address'
  norm1 <- norm101 <- norm201 <- rep(FALSE,nSNP)
  for (i in subBeadPool){
    norm1 <- norm1 | beadInfo$Norm.ID==as.character(i)
    norm101 <- norm101 | beadInfo$Norm.ID==as.character(100+i)
    norm201 <- norm201 | beadInfo$Norm.ID==as.character(200+i)
  }
  #Indexes used with 'Address2'
  norm101_2 <- norm201_2 <- rep(FALSE,length(indA2))
  for (i in subBeadPool){
    norm101_2 <- norm101_2 | beadInfo[norm201|norm101,'Norm.ID']==as.character(100+i)
    norm201_2 <- norm201_2 | beadInfo[norm201|norm101,'Norm.ID']==as.character(200+i)
  }

  A[norm1,] <- assayData(BSData)$R[indA1,][unSortAdd1$ix,][norm1,]
  B[norm1,] <- assayData(BSData)$G[indA1,][unSortAdd1$ix,][norm1,]
  A[norm101,] <- assayData(BSData)$R[indA1,][unSortAdd1$ix,][norm101,]
  A[norm201,] <- assayData(BSData)$G[indA1,][unSortAdd1$ix,][norm201,]
  B[norm101,] <- assayData(BSData)$R[indA2,][unSortAdd2$ix,][norm101_2,]
  B[norm201,] <- assayData(BSData)$G[indA2,][unSortAdd2$ix,][norm201_2,]
  Min.Beads[norm1,] <- assayData(BSData)$no.beads[indA1,][unSortAdd1$ix,][norm1,]
  Min.Beads[norm101,] <- 
    pmin(assayData(BSData)$no.beads[indA1,][unSortAdd1$ix,][norm101,],
         assayData(BSData)$no.beads[indA2,][unSortAdd2$ix,][norm101_2,])
  Min.Beads[norm201,] <- 
    pmin(assayData(BSData)$no.beads[indA1,][unSortAdd1$ix,][norm201,],
         assayData(BSData)$no.beads[indA2,][unSortAdd2$ix,][norm201_2,])
  pol <- cart2pol(A,B,dist=normOpts$dist,pNorm=normOpts$pNorm)
  Var.pooled[norm1,] <- 
    (assayData(BSData)$se.R[indA1,][unSortAdd1$ix,][norm1,]^2 +
     assayData(BSData)$se.G[indA1,][unSortAdd1$ix,][norm1,]^2)/2
  Var.pooled[norm101,] <- 
    (assayData(BSData)$se.R[indA1,][unSortAdd1$ix,][norm101,]^2 +
     assayData(BSData)$se.R[indA2,][unSortAdd2$ix,][norm101_2,]^2)/2
  Var.pooled[norm201,] <- 
    (assayData(BSData)$se.G[indA1,][unSortAdd1$ix,][norm201,]^2 +
     assayData(BSData)$se.G[indA2,][unSortAdd2$ix,][norm201_2,]^2)/2
  SE <- findSeTheta(sqrt(Var.pooled),pol$r,dist=normOpts$dist,pNorm=normOpts$pNorm)
  
  beadMetadata = data.frame(labelDescription=character(ncol(beadInfo)),
    row.names=colnames(beadInfo),stringsAsFactors=FALSE)
  beadMetadata[c('Name','SNP','ILMN.Strand','Address','Address2','Norm.ID'),1] <- 
    c('Marker ID', 'Polymorphism', 'TOP/BOT-category', 'Illumina strand ID',
      'Second strand ID (Infinium I)', 'Sub-bead pool ID')
  featureData <- new("AnnotatedDataFrame", data=beadInfo, varMetadata=beadMetadata,
                     dimLabels=c('featureNames','featureColumns'))
  BSRed <- new('AlleleSetIllumina', intensity=pol$r, theta=pol$th*2/pi,
               SE=SE, min.beads=Min.Beads, phenoData=phenoData(BSData),
               featureData=featureData)
  if (includeAB){
    assayData(BSRed)$A <- A
    assayData(BSRed)$B <- B
  } #end if
  validObject(BSRed)
  rm(pol,A,B,Var.pooled,SE,Min.Beads,featureData,beadInfo,BSData); gc()
  BSRed
}



#Transform cartesian coordinates to polar
cart2pol <- function(x,y,dist='euclidean',pNorm=NULL){
  pol <- list()
  switch(dist,
         euclidean={pol$r <- sqrt(x^2 + y^2)},
         manhattan={pol$r <- x+y},
         minkowski={pol$r <- (abs(x)^pNorm + abs(y)^pNorm)^(1/pNorm)}
         )
  pol$th <- atan2(y,x)
  pol
}



#Make sure 'dataFiles' points to the relevant data. NB! 'beadInfo' MUST contain
#total number of SNPs in the dataFiles, and 'markers' contains row-indexes to
#beadInfo. The
#vectors 'markers' and 'arrays' are indexes to rows and columns, i.e. they do NOT
#depend on the order in which the markers and arrays are given in the files.
createAlleleSetFromFiles <- function(dataFiles,markers,arrays,phenoInfo=NULL,beadInfo=NULL,sep='\t',quote=''){
  if (!all(c('intFile','thFile','seFile') %in% names(dataFiles)))
    stop("The filenames 'intFile', 'thFile', and 'seFile' must all be provided")
  if ('resFile' %in% names(dataFiles)){
    message("Loading featureData (ignoring 'beadInfo'-argument) ...")
    beadInfo <- read.table(dataFiles[['resFile']],header=TRUE,sep=sep,quote=quote,as.is=TRUE)
    fMetadata <- data.frame(labelDescription=character(ncol(beadInfo)), row.names=colnames(beadInfo), stringsAsFactors=FALSE)
    fMetadata[c('Name','SNP','ILMN.Strand','Address','Address2','Norm.ID','Classification',  #try()?
                'Cent.Deviation','Within.SD','HW.Chi2','HW.P','BAF.Locus1','BAF.Locus2',
                'Call.Rate'),1] <- 
                  c('Marker ID','Polymorphism','TOP/BOT-category','Illumina strand ID',
                    'Second strand ID (Infinium I)','Sub-bead pool ID',
                    'Genotype call from automatic clustering',
                    'Largest distance from cluster-centre to its ideal position',
                    'Largest within-cluster spread',
                    'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
                    'Probability of Hardy-Weinberg equilibrium',
                    'Estimated B-allele frequency',
                    'Estimated BAF of second paralogue (if exists)',
                    'Ratio of samples assigned to clusters')
  }else if(!is.null(beadInfo)){
    fMetadata <- data.frame(labelDescription=character(ncol(beadInfo)), row.names=colnames(beadInfo),stringsAsFactors=FALSE)
    fMetadata[c('Name','SNP','ILMN.Strand','Address','Address2','Norm.ID'),1] <- 
      c('Marker ID', 'Polymorphism', 'TOP/BOT-category', 'Illumina strand ID',
        'Second strand ID (Infinium I)', 'Sub-bead pool ID')
  }else{
    stop("No available featureData. Provide 'beadInfo'-table or 'resFile'-filename")
  }
  allMarkers <- unlist(read.table(dataFiles[['intFile']],nrows=1,as.is=TRUE,sep=sep))
  if (missing(markers)){
    markers <- 1:min(nrow(beadInfo),144)
  }else if (is.character(markers)){
    markers <- which(allMarkers %in% markers)
  }else{
    markers <- (1:length(allMarkers))[markers]
  }
  featureData <- new("AnnotatedDataFrame", data=beadInfo[markers,], varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
  
  if ('phFile' %in% names(dataFiles)){
    message("Loading phenoData (ignoring 'phenoInfo'-argument) ...")
    phenoInfo <- read.table(dataFiles[['phFile']],header=TRUE,sep=sep,quote=quote,as.is=TRUE)
    phenoInfo$noiseIntensity <- as.numeric(phenoInfo$noiseIntensity)
  }else if(is.null(phenoInfo)){
    stop("No available phenoData. Provide 'phenoInfo'-table or 'phFile'-filename")
  }
  pMetadata <- data.frame(labelDescription=character(ncol(phenoInfo)), row.names=colnames(phenoInfo), stringsAsFactors=FALSE)
  pMetadata[c('arrayNames','no.beads','chip','row','col','noiseIntensity'),1] <-
    c('Array ID','No. detected beads','Chip ID','Array ID within chip',
      'Strip ID within array','Estimated signal detection-limit')
  if (missing(arrays)){
    #arrays <- 1:min(nrow(phenoInfo),500)
    arrays <- 1:nrow(phenoInfo)
  }else if(is.character(arrays)){
    arrays <- which(rownames(phenoInfo) %in% arrays)
  }else{
    arrays <- (1:nrow(phenoInfo))[arrays]
  }
  phenoData <- new("AnnotatedDataFrame", data=phenoInfo[arrays,], varMetadata=pMetadata, dimLabels=c('sampleNames','sampleColumns'))

  colClasses <- rep('NULL',length(allMarkers)+1)
  colClasses[1] <- 'character'
  colClasses[markers+1] <- 'numeric'
  orderM <- order(sort(markers,index.return=TRUE)$ix)
  message('Loading intensities...')
  intensity <- t(read.table(dataFiles[['intFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
  intensity <- intensity[orderM,arrays]
  message('Loading thetas...')
  theta <- t(read.table(dataFiles[['thFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
  theta <- theta[orderM,arrays]
  message('Loading standard errors...')
  SE <- t(read.table(dataFiles[['seFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
  SE <- SE[orderM,arrays]
  BSRed <- new('AlleleSetIllumina', intensity=intensity, theta=theta, SE=SE, phenoData=phenoData, featureData=featureData)
  rm('intensity','theta','SE','phenoData','featureData'); gc()

  if ('callFile' %in% names(dataFiles)){
    callHeader <- c('',unlist(read.table(dataFiles[['callFile']],nrows=1,as.is=TRUE,sep=sep)))
    callCols <- rep('NULL',length(callHeader))
    callCols[1] <- 'character'
    callCols[callHeader %in% sampleNames(BSRed)] <- 'numeric'
    indFound <- sampleNames(BSRed) %in% callHeader
    orderA <- order(sort(arrays[indFound],index.return=TRUE)$ix)
    call <- matrix(nrow=length(markers),ncol=length(arrays),dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
    message('Loading calls...')
    skip <- min(markers)
    callFrame <- read.table(dataFiles[['callFile']],header=FALSE,sep=sep,quote=quote,nrows=max(markers)+1-skip,colClasses=callCols,skip=skip,row.names=1,col.names=callHeader)
    call[,indFound] <- as.matrix(callFrame[markers+1-skip,orderA])
    assayData(BSRed)$call <- call
    validObject(BSRed)
  }
  if ('genoFile' %in% names(dataFiles)){
    genoHeader <- c('',unlist(read.table(dataFiles[['genoFile']],nrows=1,as.is=TRUE,sep=sep)))
    genoCols <- rep('NULL',length(genoHeader))
    genoCols[1] <- 'character'
    genoCols[genoHeader %in% sampleNames(BSRed)] <- 'character'
    indFound <- sampleNames(BSRed) %in% genoHeader
    orderA <- order(sort(arrays[indFound],index.return=TRUE)$ix)
    genotype <- matrix('',nrow=length(markers),ncol=length(arrays),dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
    message('Loading genotypes...')
    skip <- min(markers)
    genoFrame <- read.table(dataFiles[['genoFile']],header=FALSE,sep=sep,quote=quote,nrows=max(markers)+1-skip,colClasses=genoCols,skip=skip,row.names=1,col.names=genoHeader)
    genotype[,indFound] <- gsub('\"','',as.matrix(genoFrame[markers+1-skip,orderA]))
    assayData(BSRed)$genotype <- genotype
    validObject(BSRed)
  }
  BSRed
}



createMultiSetFromFiles <- function(dataFiles,markers,arrays,phenoInfo=NULL,beadInfo=NULL,sep='\t',quote=''){
  if ('resFile' %in% names(dataFiles)){
    message("Loading featureData (ignoring 'beadInfo'-argument) ...")
    beadInfo <- read.table(dataFiles[['resFile']],header=TRUE,sep=sep,quote=quote,as.is=TRUE)
    fMetadata <- data.frame(labelDescription=character(ncol(beadInfo)), row.names=colnames(beadInfo), stringsAsFactors=FALSE)
    fMetadata[c('Name','SNP','ILMN.Strand','Address','Address2','Norm.ID','Classification',  #try()?
                'Cent.Deviation','Within.SD','HW.Chi2','HW.P','BAF.Locus1','BAF.Locus2',
                'Call.Rate'),1] <- 
                  c('Marker ID','Polymorphism','TOP/BOT-category','Illumina strand ID',
                    'Second strand ID (Infinium I)','Sub-bead pool ID',
                    'Genotype call from automatic clustering',
                    'Largest distance from cluster-centre to its ideal position',
                    'Largest within-cluster spread',
                    'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
                    'Probability of Hardy-Weinberg equilibrium',
                    'Estimated B-allele frequency',
                    'Estimated BAF of second paralogue (if exists)',
                    'Ratio of samples assigned to clusters')
  }else if(!is.null(beadInfo)){
    fMetadata <- data.frame(labelDescription=character(ncol(beadInfo)), row.names=colnames(beadInfo),stringsAsFactors=FALSE)
    fMetadata[c('Name','SNP','ILMN.Strand','Address','Address2','Norm.ID'),1] <- 
      c('Marker ID', 'Polymorphism', 'TOP/BOT-category', 'Illumina strand ID',
        'Second strand ID (Infinium I)', 'Sub-bead pool ID')
  }else{
    stop("No available featureData. Provide 'beadInfo'-table or 'resFile'-filename")
  }
  allMarkers <- rownames(beadInfo)
  if (missing(markers)){
    markers <- 1:min(nrow(beadInfo),128)
  }else if (is.character(markers)){
    markers <- which(allMarkers %in% markers)
  }else{
    markers <- (1:length(allMarkers))[markers]
  }
  featureData <- new("AnnotatedDataFrame", data=beadInfo[markers,], varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
  
  if ('phFile' %in% names(dataFiles)){
    message("Loading phenoData (ignoring 'phenoInfo'-argument) ...")
    phenoInfo <- read.table(dataFiles[['phFile']],header=TRUE,sep=sep,quote=quote,as.is=TRUE)
  }else if(is.null(phenoInfo)){
    stop("No available phenoData. Provide 'phenoInfo'-table or 'phFile'-filename")
  }
  pMetadata <- data.frame(labelDescription=character(ncol(phenoInfo)), row.names=colnames(phenoInfo), stringsAsFactors=FALSE)
  pMetadata[c('arrayNames','no.beads','chip','row','col','noiseIntensity'),1] <-
    c('Array ID','No. detected beads','Chip ID','Array ID within chip',
      'Strip ID within array','Estimated signal detection-limit')
  if (missing(arrays)){
    #arrays <- 1:min(nrow(phenoInfo),500)
    arrays <- 1:nrow(phenoInfo)
  }else if(is.character(arrays)){
    arrays <- which(rownames(phenoInfo) %in% arrays)
  }else{
    arrays <- (1:nrow(phenoInfo))[arrays]
  }
  phenoData <- new("AnnotatedDataFrame", data=phenoInfo[arrays,], varMetadata=pMetadata, dimLabels=c('sampleNames','sampleColumns'))

  colClasses <- rep('NULL',length(allMarkers)+1)
  colClasses[1] <- 'character'
  colClasses[markers+1] <- 'numeric'
  orderM <- order(sort(markers,index.return=TRUE)$ix)
  if ('intFile' %in% names(dataFiles)){
    message('Loading intensities...')
    intensity <- t(read.table(dataFiles[['intFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
    intensity <- intensity[orderM,arrays]
  }else{
    intensity <- NULL
  }
  if ('thFile' %in% names(dataFiles)){
    message('Loading thetas...')
    theta <- t(read.table(dataFiles[['thFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
    theta <- theta[orderM,arrays]
  }else{
    theta <- NULL
  }
  if ('seFile' %in% names(dataFiles)){
    message('Loading standard errors...')
    SE <- t(read.table(dataFiles[['seFile']],header=TRUE,sep=sep,quote=quote,colClasses=colClasses,nrows=max(arrays)))
    SE <- SE[orderM,arrays]
  }else{
    SE <- NULL
  }
  BSRed <- new('MultiSet', intensity=intensity, theta=theta, SE=SE, phenoData=phenoData, featureData=featureData, storage.mode='list')
  sampleNames <- rownames(phenoData@data)
  featureNames <- rownames(featureData@data)
  rm('intensity','theta','SE','phenoData','featureData'); gc()
  
  if ('callFile' %in% names(dataFiles)){
    callHeader <- c('',unlist(read.table(dataFiles[['callFile']],nrows=1,as.is=TRUE,sep=sep)))
    callCols <- rep('NULL',length(callHeader))
    callCols[1] <- 'character'
    callCols[callHeader %in% sampleNames] <- 'numeric'
    indFound <- sampleNames %in% callHeader
    orderA <- order(sort(arrays[indFound],index.return=TRUE)$ix)
    call <- matrix(nrow=length(markers),ncol=length(arrays),dimnames=list(featureNames,sampleNames))
    message('Loading calls...')
    skip <- min(markers)
    callFrame <- read.table(dataFiles[['callFile']],header=FALSE,sep=sep,quote=quote,nrows=max(markers)+1-skip,colClasses=callCols,skip=skip,row.names=1,col.names=callHeader)
    call[,indFound] <- as.matrix(callFrame[markers+1-skip,orderA])
    assayData(BSRed)$call <- call
    validObject(BSRed)
  }
  if ('genoFile' %in% names(dataFiles)){
    genoHeader <- c('',unlist(read.table(dataFiles[['genoFile']],nrows=1,as.is=TRUE,sep=sep)))
    genoCols <- rep('NULL',length(genoHeader))
    genoCols[1] <- 'character'
    genoCols[genoHeader %in% sampleNames] <- 'character'
    indFound <- sampleNames %in% genoHeader
    orderA <- order(sort(arrays[indFound],index.return=TRUE)$ix)
    genotype <- matrix('',nrow=length(markers),ncol=length(arrays),dimnames=list(featureNames,sampleNames))
    message('Loading genotypes...')
    skip <- min(markers)
    genoFrame <- read.table(dataFiles[['genoFile']],header=FALSE,sep=sep,quote=quote,nrows=max(markers)+1-skip,colClasses=genoCols,skip=skip,row.names=1,col.names=genoHeader)
    genotype[,indFound] <- gsub('\"','',as.matrix(genoFrame[markers+1-skip,orderA]))
    assayData(BSRed)$genotype <- genotype
    validObject(BSRed)
  }
  BSRed
}



callGenotypes <- function(BSRed,gO=setGenoOptions(largeSample=ncol(BSRed)>250)){
  #Quality control, SNP- and array-filtering
  if ('call' %in% names(assayData(BSRed))){
    OK = FALSE
    while (!OK){
      ovrwrt <- readline(prompt='Warning! Call-matrix already exists. Overwrite? (yes/no) ')
      OK <- ovrwrt %in% c('yes','no')
    }
    if (ovrwrt %in% 'yes'){
      message("Matrix 'assayData(BSRed)$call' will be replaced!")
      pData(BSRed)$passRatio <- NULL
      fData(BSRed)$Ped.Errors <- NULL
      assayData(BSRed)[grep('ped.check',names(assayData(BSRed)))] <- NULL
    }else{
      stop("Exits, previous calls are retained")
    }
  }
  descNames <- c('Classification','Cent.Deviation','Within.SD','HW.Chi2','HW.P','BAF.Locus1',
                 'BAF.Locus2','Call.Rate')
  if (any(descNames %in% colnames(fData(BSRed)))){
    OK = FALSE
    while (!OK){
      ovrwrt <- readline(prompt='Warning! Overlapping column-names in featureData. Overwrite? (yes/no) ')
      OK <- ovrwrt %in% c('yes','no')
    }
    if (ovrwrt %in% 'yes'){
      message("Overlapping descriptors will be replaced!")
      featureData(BSRed) <- featureData(BSRed)[,-which(colnames(fData(BSRed)) %in% descNames)]
    }else{
      stop("Exits, previous descriptors are retained")
    }
  }
  iSnps <- apply(assayData(BSRed)$theta,1,function(x,cc) sum(!is.na(x))>cc,cc=gO$arrayPerSnpLim*ncol(BSRed))
  message(sum(!iSnps),' SNP(s) with more than ',100-gO$arrayPerSnpLim*100,'% missings disregarded')
  iArrays <- apply(assayData(BSRed)$theta,2,function(x,cc) sum(!is.na(x))>cc,cc=gO$snpPerArrayLim*nrow(BSRed))
  message(sum(!iArrays),' array(s) with more than ',100-gO$snpPerArrayLim*100,'% missings disregarded')
  mNoise <- mean(pData(BSRed)$noiseIntensity[iArrays],na.rm=TRUE)
  if (sum(iSnps)==1){
    R <- t(as.matrix(assayData(BSRed)$intensity[iSnps,iArrays]))
    Theta <- t(as.matrix(assayData(BSRed)$theta[iSnps,iArrays]))
    SE <- t(as.matrix(assayData(BSRed)$SE[iSnps,iArrays]))
    dimnames(R)[1] <- dimnames(Theta)[1] <- dimnames(SE)[1] <- names(iSnps)
  }else{
    R <- assayData(BSRed)$intensity[iSnps,iArrays]
    Theta <- assayData(BSRed)$theta[iSnps,iArrays]
    SE <- assayData(BSRed)$SE[iSnps,iArrays]
  }
  iFilter <- diff(apply(Theta,1,range,na.rm=TRUE))>gO$filterLim
  nKeep <- sum(iFilter)
  message(sum(!iFilter),' monomorphs with range(theta)<',gO$filterLim,' are detected')
  
  polyCent <- generatePolyCenters(ploidy=gO$ploidy)
  assayData(BSRed)$call <- matrix(nrow=nrow(BSRed),ncol=ncol(BSRed),dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
  classVec <- character(nrow(BSRed))
  classVec[!iSnps] <- 'NA'
  classVec[iSnps][!iFilter] <- 'MONO-filt'
  fData <- data.frame(Classification=classVec,Cent.Deviation=rep(NA,nrow(BSRed)),Within.SD=rep(NA,nrow(BSRed)),HW.Chi2=rep(NA,nrow(BSRed)),HW.P=rep(NA,nrow(BSRed)),BAF.Locus1=rep(NA,nrow(BSRed)),BAF.Locus2=rep(NA,nrow(BSRed)),Call.Rate=rep(NA,nrow(BSRed)),stringsAsFactors=FALSE,row.names=featureNames(BSRed))
  fDesc <-  c('Genotype call from automatic clustering',
              'Largest distance from cluster-centre to its ideal position',
              'Largest within-cluster spread',
              'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
              'Probability of Hardy-Weinberg equilibrium',
              'Estimated B-allele frequency',
              'Estimated BAF of second paralogue (if exists)',
              'Ratio of samples assigned to clusters')
  fMetadata <- data.frame(labelDescription=fDesc, row.names=colnames(fData), stringsAsFactors=FALSE)
  count <- 0
  for (i in which(iFilter)){ 
    count <- count + 1
    if (count %in% seq(min(100,nKeep),nKeep,100))
      message('Called ',count,' of ',nKeep,' genotypes')
    iMiss <- is.na(R[i,])|is.na(SE[i,])|is.na(Theta[i,])
    iDet <- R[i,]>mNoise & !iMiss & Theta[i,]>-.5 & Theta[i,]<1.5    #Filter bad samples
    #if (sum(iDet)<=sum(iArrays)*gO$detectLim){
    if (sum(iDet)<=sum(!iMiss)*gO$detectLim){
      fData$Classification[iSnps][[i]] <- 'FAIL'
    }else{
      sclR <- median(R[i,iDet],na.rm=TRUE)*2*gO$rPenalty                   #Factor by which R is divided before clustering
      X <- matrix(cbind(Theta[i,],R[i,]/sclR,SE[i,])[iDet,],ncol=3)
      if (is.null(gO$probsIndSE)){
        indSE <- rep(TRUE,sum(iDet))
      }else{
        indSE <- X[,3]<quantile(X[,3],probs=gO$probsIndSE,na.rm=TRUE)      #Uncertain points removed from clustering
      }
      #qRange <- getQRange(X[indSE,1],minScl=3,probs=gO$probsQRange)     #Initial cluster-centers
      #sConf <- distLikelyConfiguration(X[indSE,1:2],polyCent,qRange=qRange)  #Ordered list of likely classifications
      #maxConf <- sum(sConf$x<gO$mseLim,na.rm=TRUE)
      sConf <- getCenters(X[indSE,1],gO=gO,polyCent=polyCent)
      maxConf <- length(sConf$ix)
      OK <- FALSE
      iConf <- 1
      while (!OK){
        testCallRate <- FALSE
        goodSNP <- FALSE
        nCl <- polyCent$size[sConf$ix[iConf]]
        cntIdeal <- polyCent$centers[[sConf$ix[iConf]]]
        centers <- sConf$centers[[iConf]]
        clObj <- findPolyploidClusters(X,indSE=indSE,centers=centers,wss.update=FALSE)
        tmpRes <- rep(NA,7)
        if (any(clObj$size>0)){
          iCl <- which(clObj$size>0)
          tmpRes[1] <- max(abs(cntIdeal[iCl]-clObj$centers[iCl,1]))   #Max cluster-centre deviation (Theta)
          #tmpRes[1] <- mean(abs(cntIdeal[iCl]-clObj$centers[iCl,1]))   #Max cluster-centre deviation (Theta)
          if (tmpRes[1]<gO$devCentLim & sum(clObj$size<1)<2){
            wSpread <- tapply(X[,1],clObj$cluster,sd)   #NB! MAD?
            wSpread[clObj$size[iCl]<2] <- 0   #use gO$minClLim instead?
            tmpRes[2] <- max(wSpread)  #Max within-cluster spread (Theta)
            if (tmpRes[2]<gO$wSpreadLim){
              if (nCl==1){
                tmpRes[5:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=sConf$ix[iConf],polyCent=polyCent,afList=gO$afList)[4:5])
                testCallRate <- TRUE
              }else{
                tmpRes[3:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=sConf$ix[iConf],polyCent=polyCent,afList=gO$afList)[c(1,3:5)])
                minB <- (clObj$centers[iCl,1] - gO$nSdOverlap*wSpread)[-1]
                maxB <- (clObj$centers[iCl,1] + gO$nSdOverlap*wSpread)[-length(iCl)]
                ovrlp <- any(round(minB*100)<round(maxB*100))   #Overlapping clusters?
                                        #goodSNP <- tmpRes[1]+2*tmpRes[2]<gO$goodSnpLim  & sConf$ix[iConf]==which(polyCent$classification=='SNP') & sum(clObj$size>=min(gO$minClLim,ncol(BSRed)/100))==3
                                        #if (tmpRes[4]>gO$hwAlpha | goodSNP){
                if (tmpRes[4]>gO$hwAlpha & !ovrlp){
                  testCallRate <- TRUE
                }else{
                  iConf <- iConf + 1
                }
              }
              if (testCallRate){
                nClRed <- length(iCl)
                clCtrs <- matrix(clObj$centers[iCl,],nrow=nClRed)
                indOut <- matrix(FALSE,nrow=nrow(X),ncol=nCl)
                indKeep <- rep(FALSE,nrow(X))
                indOverlap <- rep(FALSE,nrow(X))
                Sratio <- rep(1,nCl)
                for (j in iCl){
                  indj <- clObj$cluster==j
                  nj <- sum(indj)
                  if (nj>=gO$minClLim){    #Hotellings T^2-ellipse superimposed on clusters
                    ctrX <- apply(X[indj,1:2],2,median,na.rm=TRUE)
                    Xj <- X[indj,1:2]-matrix(ctrX,nrow=nj,ncol=2,byrow=TRUE)
                    Sj <- t(Xj)%*%Xj/(nj-1)
                    Sratio[j] <- Sj[1,1]/(Sj[2,2]*gO$rPenalty)  #Cluster 'angle', given comparable axes
                    Xall <- X[,1:2]-matrix(ctrX,nrow=nrow(X),ncol=2,byrow=TRUE)
                    T2j <- diag(Xall%*%solve(Sj)%*%t(Xall))
                    f.alpha <- T2j*(nj-2)*nj/(2*(nj-1)*(nj+1))
                    pVal <- 1 - pf(f.alpha,2,nj-2)
                  }else{
                    pVal <- rep(0,nrow(X))
                    pVal[indj] <- 1
                  }
                  indOut[,j] <- pVal < gO$clAlpha
                  indKeep[indj] <- !indOut[indj,j]
                  indOverlap[!indOut[,j]][clObj$cluster[!indOut[,j]]!=j] <- TRUE
                }
                plot <- FALSE
                if (plot){
                  plot.new()
                  plot.window(c(-.2,1.2),c(0,max(max(X[,2]),1/gO$rPenalty)))
                  axis(1); axis(2)
                  points(X[,1],X[,2],pch=16)
                  points(X[!indKeep,1],X[!indKeep,2],pch=16,col='red')   #outside ellipses
                  points(X[indOverlap,1],X[indOverlap,2],pch=16,col='orange')  #overlapping ellipses
                  points(clCtrs[,1],clCtrs[,2],col='green',pch=16)    #cluster-centres
                }
                indKeep <- indKeep &! indOverlap
                #tmpRes[7] <- sum(indKeep)/sum(iArrays)
                tmpRes[7] <- sum(indKeep)/sum(!iMiss)
                                        #psRat <- prod(Sratio[clObj$size>ncol(BSRed)/10])
                psRat <- sum(Sratio*clObj$size)/(ncol(BSRed)*nClRed)
                                        #goodSNP <- goodSNP & sum(!indOverlap)==sum(iDet)
                
                if ((tmpRes[7]>gO$detectLim & psRat<=gO$rotationLim)){# | goodSNP){   #Success!
                  if (sum(iSnps)==1){
                    assayData(BSRed)$call[iSnps,iArrays][iDet][indKeep] <- cntIdeal[clObj$cluster][indKeep]
                  }else{
                    assayData(BSRed)$call[iSnps,iArrays][i,iDet][indKeep] <- cntIdeal[clObj$cluster][indKeep]
                  }
                  fData$Classification[iSnps][[i]] <- polyCent$classification[[sConf$ix[iConf]]]
                  fData$Cent.Deviation[iSnps][[i]] <- tmpRes[1]
                  fData$Within.SD[iSnps][i] <- tmpRes[2]
                  fData$HW.Chi2[iSnps][i] <- tmpRes[3]
                  fData$HW.P[iSnps][i] <- tmpRes[4]
                  fData$BAF.Locus1[iSnps][i] <- 1-tmpRes[5]  #NB! B-af, not A-af
                  fData$BAF.Locus2[iSnps][i] <- 1-tmpRes[6]  #NB!
                  fData$Call.Rate[iSnps][i] <- tmpRes[7]
                  #fData$Max.Rotation[iSnps][i] <- max(Sratio)
                  OK <- TRUE
                }else{
                  iConf <- iConf + 1
                } #end if success
              } #end if callrate
            }else{
              iConf <- iConf + 1
            } #end if wSpread
          }else{
            iConf <- iConf + 1
          } #end if devCent
          if (iConf>maxConf){
            fData$Classification[iSnps][[i]] <- 'FAIL'
            OK <- TRUE
          }
        }else{
          fData$Classification[iSnps][[i]] <- 'FAIL'
          OK <- TRUE
        } #end if any(clObj$size)
      } #end while !OK
    } #end if gO$detectLim
  } #end for
  featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
  featureData(BSRed) <- combine(featureData(BSRed), featureData)
  validObject(BSRed)
  BSRed
}


#Tests automatic genocalling for a single marker, plots different outcomes. Outputs all results from single marker
callGenotypes.verboseTest <- function(BSRed,singleMarker=1,gO=setGenoOptions(largeSample=ncol(BSRed)>250)){
  message(sprintf("Verbosely analyzing marker '%s'",featureNames(BSRed)[singleMarker]))
  iSnps <- apply(assayData(BSRed)$theta,1,function(x,cc) sum(!is.na(x))>cc,cc=gO$arrayPerSnpLim*ncol(BSRed))
  if (!iSnps[singleMarker]){
    message('Failure as the ratio of missing arrays exceeds (1 - "arrayPerSnpLim")')
  }
  iSnps <- singleMarker
  iArrays <- apply(assayData(BSRed)$theta,2,function(x,cc) sum(!is.na(x))>cc,cc=gO$snpPerArrayLim*nrow(BSRed))
  message(sum(!iArrays),' array(s) with more than ',100-gO$snpPerArrayLim*100,'% missings found')
  mNoise <- mean(pData(BSRed)$noiseIntensity[iArrays],na.rm=TRUE)
  R <- as.matrix(assayData(BSRed)$intensity[singleMarker,iArrays])
  Theta <- as.matrix(assayData(BSRed)$theta[singleMarker,iArrays])
  SE <- as.matrix(assayData(BSRed)$SE[singleMarker,iArrays])
  dimnames(R)[2] <- dimnames(Theta)[2] <- dimnames(SE)[2] <- featureNames(BSRed)[singleMarker]
  iFilter <- diff(range(Theta,na.rm=TRUE))>gO$filterLim
  if (!iFilter){
    message('The selected marker would be filtered away as a monomorph based on range(Theta)')
    return()
  }
  BSRed <- BSRed[singleMarker,]
  polyCent <- generatePolyCenters(ploidy=gO$ploidy)
  #nTest <- length(polyCent$size)
  #message(sprintf('%d allowed cluster-combinations',nTest))
  #call <- matrix(nrow=nTest,ncol=ncol(BSRed),dimnames=list(polyCent$classification,sampleNames(BSRed)))
  #fData <- data.frame(Cent.Deviation=rep(NA,nTest),Within.SD=rep(NA,nTest),HW.Chi2=rep(NA,nTest),HW.P=rep(NA,nTest),BAF.Locus1=rep(NA,nTest),BAF.Locus2=rep(NA,nTest),Call.Rate=rep(NA,nTest),Overlap=rep(NA,nTest),Rotation=rep(NA,nTest),stringsAsFactors=FALSE,row.names=polyCent$classification)
  #fDesc <-  c('Largest distance from cluster-centre to its ideal position',
  #            'Largest within-cluster spread',
  #            'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
  #            'Probability of Hardy-Weinberg equilibrium',
  #            'Estimated B-allele frequency',
  #            'Estimated BAF of second paralogue (if exists)',
  #            'Ratio of samples assigned to clusters',
  #            'TRUE if there is an initial cluster overlap (along \'Theta\')',
  #            'Large value (>1) means increasingly slanting clusters')
  #fMetadata <- data.frame(labelDescription=fDesc, row.names=colnames(fData), stringsAsFactors=FALSE)

  iMiss <- is.na(R)|is.na(SE)|is.na(Theta)
  iDet <- R>mNoise & !iMiss & Theta>-.5 & Theta<1.5    #Filter bad arrays
  message(sprintf('%d low-quality or missing array(s) filtered away for this marker',sum(!iDet)))
  #if (sum(iDet)<=sum(iArrays)*gO$detectLim){
  if (sum(iDet)<=sum(!iMiss)*gO$detectLim){
    str <- 'Failure as rate of high quality to non-missing arrays (%.2f) smaller than "detectLim"'
    message(sprintf(str,sum(iDet)/sum(!iMiss)))
  }else{
    sclR <- median(R[iDet],na.rm=TRUE)*2*gO$rPenalty                   #Factor by which R is divided before clustering
    X <- matrix(cbind(Theta,R/sclR,SE)[iDet,],ncol=3)
    if (is.null(gO$probsIndSE)){
      indSE <- rep(TRUE,sum(iDet))
    }else{
      indSE <- X[,3]<quantile(X[,3],probs=gO$probsIndSE,na.rm=TRUE)      #Uncertain points removed from clustering
    }
    #qRange <- getQRange(X[indSE,1],minScl=3,probs=gO$probsQRange)     #Initial cluster-centers
    #sConf <- distLikelyConfiguration(X[indSE,1:2],polyCent,qRange=qRange)  #Ordered list of likely classifications
    sConf <- getCenters(X[indSE,1],gO=gO,polyCent=polyCent)
    print(sConf)
    nTest <- length(sConf$ix)
    message(sprintf('%d cluster-combinations tested',nTest))
    rNames <- unlist(polyCent$classification[sConf$ix])
    if (nTest>1){
      for (i in 2:length(rNames)){
        sumRep <- sum(rNames[1:(i-1)]%in%rNames[i])
        if (sumRep>0){
          rNames[i] <- paste(rNames[i],sumRep,sep='.')
        }
      }
    }
    call <- matrix(nrow=nTest,ncol=ncol(BSRed),dimnames=list(rNames,sampleNames(BSRed)))
    fData <- data.frame(Cent.Deviation=rep(NA,nTest),Within.SD=rep(NA,nTest),HW.Chi2=rep(NA,nTest),HW.P=rep(NA,nTest),BAF.Locus1=rep(NA,nTest),BAF.Locus2=rep(NA,nTest),Call.Rate=rep(NA,nTest),Overlap=rep(NA,nTest),Rotation=rep(NA,nTest),stringsAsFactors=FALSE,row.names=rNames)
    fDesc <-  c('Largest distance from cluster-centre to its ideal position',
                'Largest within-cluster spread',
                'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
                'Probability of Hardy-Weinberg equilibrium',
                'Estimated B-allele frequency',
                'Estimated BAF of second paralogue (if exists)',
                'Ratio of samples assigned to clusters',
                'TRUE if there is an initial cluster overlap (along \'Theta\')',
                'Large value (>1) means increasingly slanting clusters')
    fMetadata <- data.frame(labelDescription=fDesc, row.names=colnames(fData), stringsAsFactors=FALSE)

    pp <- par()
    nC <- ceiling(sqrt(nTest))
    nR <- ceiling(nTest/nC)
    par(mfrow=c(nR,nC),mai=c(1,1,1,.5)/sqrt(nC))
    
    for (iConf in 1:nTest){
      current.i <- sConf$ix[iConf]
      nCl <- polyCent$size[current.i]
      cntIdeal <- polyCent$centers[[current.i]]
      centers <- sConf$centers[[iConf]]
      clObj <- findPolyploidClusters(X,indSE=indSE,centers=centers,wss.update=FALSE)
      if (!any(clObj$size>0)){
        message(sprintf("No clusters assigned for '%s'",polyCent$classification[current.i]))
      }else{
        iCl <- which(clObj$size>0)
        #fData[current.i,1] <- max(abs(cntIdeal[iCl]-clObj$centers[iCl,1]))   #Max cluster-centre deviation (Theta)
        fData[iConf,1] <- max(abs(cntIdeal[iCl]-clObj$centers[iCl,1]))   #Max cluster-centre deviation (Theta)
        wSpread <- tapply(X[,1],clObj$cluster,sd)   #NB! MAD?
        wSpread[clObj$size[iCl]<2] <- 0   #use gO$minClLim instead?
        #fData[current.i,2] <- max(wSpread)  #Max within-cluster spread (Theta)
        fData[iConf,2] <- max(wSpread)  #Max within-cluster spread (Theta)
        if (nCl==1){
          #fData[current.i,5:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=current.i,polyCent=polyCent,afList=gO$afList)[4:5])
          fData[iConf,5:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=current.i,polyCent=polyCent,afList=gO$afList)[4:5])
        }else{
          #fData[current.i,3:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=current.i,polyCent=polyCent,afList=gO$afList)[c(1,3:5)])
          fData[iConf,3:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=current.i,polyCent=polyCent,afList=gO$afList)[c(1,3:5)])
          minB <- (clObj$centers[iCl,1] - gO$nSdOverlap*wSpread)[-1]
          maxB <- (clObj$centers[iCl,1] + gO$nSdOverlap*wSpread)[-length(iCl)]
          #fData[current.i,8] <- any(round(minB*100)<round(maxB*100))
          fData[iConf,8] <- any(round(minB*100)<round(maxB*100))
        }
        nClRed <- length(iCl)
        clCtrs <- matrix(clObj$centers[iCl,],nrow=nClRed)
        indOut <- matrix(FALSE,nrow=nrow(X),ncol=nCl)
        indKeep <- rep(FALSE,nrow(X))
        indOverlap <- rep(FALSE,nrow(X))
        Sratio <- rep(1,nCl)
        for (j in iCl){
          indj <- clObj$cluster==j
          nj <- sum(indj)
          if (nj>=gO$minClLim){    #Hotellings T^2-ellipse superimposed on clusters
            ctrX <- apply(X[indj,1:2],2,median,na.rm=TRUE)
            Xj <- X[indj,1:2]-matrix(ctrX,nrow=nj,ncol=2,byrow=TRUE)
            Sj <- t(Xj)%*%Xj/(nj-1)
            Sratio[j] <- Sj[1,1]/(Sj[2,2]*gO$rPenalty)  #Cluster 'angle', given comparable axes
            Xall <- X[,1:2]-matrix(ctrX,nrow=nrow(X),ncol=2,byrow=TRUE)
            T2j <- diag(Xall%*%solve(Sj)%*%t(Xall))
            f.alpha <- T2j*(nj-2)*nj/(2*(nj-1)*(nj+1))
            pVal <- 1 - pf(f.alpha,2,nj-2)
          }else{
            pVal <- rep(0,nrow(X))
            pVal[indj] <- 1
          }
          indOut[,j] <- pVal < gO$clAlpha
          indKeep[indj] <- !indOut[indj,j]
          indOverlap[!indOut[,j]][clObj$cluster[!indOut[,j]]!=j] <- TRUE
        }
        
        plot.new()
        plot.window(c(-.2,1.2),c(0,max(max(X[,2]),1/gO$rPenalty)))
        axis(1); axis(2)
        title(main=polyCent$classification[current.i],xlab='Theta',ylab='R')
        points(X[,1],X[,2],pch=16)
        points(X[!indKeep,1],X[!indKeep,2],pch=16,col='red')   #outside ellipses
        points(X[indOverlap,1],X[indOverlap,2],pch=16,col='orange')  #overlapping ellipses
        points(clCtrs[,1],clCtrs[,2],col='green',pch=16)    #cluster-centres
        
        indKeep <- indKeep &! indOverlap
        #fData[current.i,7] <- sum(indKeep)/sum(iArrays)
        #fData[current.i,9] <- sum(Sratio*clObj$size)/(ncol(BSRed)*nClRed)
        #call[current.i,iArrays][iDet][indKeep] <- cntIdeal[clObj$cluster][indKeep]
        #fData[iConf,7] <- sum(indKeep)/sum(iArrays)
        fData[iConf,7] <- sum(indKeep)/sum(!iMiss)
        fData[iConf,9] <- sum(Sratio*clObj$size)/(ncol(BSRed)*nClRed)
        call[iConf,iArrays][iDet][indKeep] <- cntIdeal[clObj$cluster][indKeep]
      } #end if (!any(clObj$size))
    } #for
    par(pp[c('mfcol','mai')])
    verboseResults <- list()
    verboseResults$call <- call
    verboseResults$fData <- fData
    verboseResults$fData[,5:6] <- 1-verboseResults$fData[,5:6]  #NB! B-af, not A-af
    verboseResults$fMetadata <- fMetadata
    test <- data.frame(Cent.Deviation=fData[,1]<gO$devCentLim,
                       WithinSD=fData[,2]<gO$wSpreadLim,
                       HW.P=fData[,4]>gO$hwAlpha,Call.Rate=fData[,7]>gO$detectLim,
                       Overlap=!fData[,8],Rotation=fData[,9]<=gO$rotationLim,
                       row.names=rNames)
    verboseResults$test <- test
    verboseResults$test[test==TRUE] <- 'PASS'
    verboseResults$test[test==FALSE] <- 'FAIL'
    verboseResults$test$sumFAIL <- apply(!test,1,sum,na.rm=TRUE)
    #verboseResults$test <- verboseResults$test[sConf$ix,]
    indPass <- verboseResults$test$sumFAIL==0
    if (any(indPass)){
      clss <- rownames(verboseResults$test)[which(indPass)[1]]
    }else{
      clss <- 'FAIL'
    }
    message(sprintf("Marker would be classified as '%s'",clss))
    verboseResults
  } #end if low qual.
}



#Manual genotype calling
callGenotypes.interactive <- function(BSRed,gO=setGenoOptions(largeSample=ncol(BSRed)>250)){
  #require('rggobi')
  mNoise <- mean(pData(BSRed)$noiseIntensity, na.rm=TRUE)
  R <- assayData(BSRed)$intensity
  Theta <- assayData(BSRed)$theta
  SE <- assayData(BSRed)$SE
  polyCent <- generatePolyCenters(ploidy=gO$ploidy)
  if (is.null(assayData(BSRed)$call)){
    assayData(BSRed)$call <-  matrix(nrow=nrow(BSRed), ncol=ncol(BSRed), dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
    isCalled <- FALSE
  }else{
    isCalled <- TRUE
  }
  if (is.null(assayData(BSRed)$ped.check.parents) | !isCalled)
    assayData(BSRed)$ped.check.parents <- matrix(nrow=nrow(BSRed), ncol=ncol(BSRed), dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
  rNames <- c("Classification", "Cent.Deviation", "Within.SD", "HW.Chi2", "HW.P", "BAF.Locus1", "BAF.Locus2", "Call.Rate", "Ped.Errors")   
  if (all(rNames[1:8] %in% colnames(fData(BSRed))) & isCalled){
    prevCalls <- fData(BSRed)$Classification
    if (!rNames[9] %in% colnames(fData(BSRed))){
      fData <- data.frame(Ped.Errors=rep(NA,nrow(BSRed)), stringsAsFactors=FALSE, row.names=featureNames(BSRed))
      fMetadata <- data.frame(labelDescription='Number of pedigree-errors, summed over offspring', row.names=colnames(fData), stringsAsFactors=FALSE)
      featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
      featureData(BSRed) <- combine(featureData(BSRed), featureData)
    }
    if ('Manual.Calls.R' %in% colnames(fData(BSRed))){
      indMiss <- fData(BSRed)$Manual.Calls.R %in% ''
      if (any(!indMiss)){
        message('Column \'Manual.Calls.R\' will be overwritten!')
        prevCalls[!indMiss] <- fData(BSRed)$Manual.Calls.R[!indMiss]
      }
    }else{
      fData <- data.frame(Manual.Calls.R=character(nrow(BSRed)), stringsAsFactors=FALSE, row.names=featureNames(BSRed))
      fMetadata <- data.frame(labelDescription='Genotype call from interactive clustering', row.names=colnames(fData), stringsAsFactors=FALSE)
      featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
      featureData(BSRed) <- combine(featureData(BSRed), featureData)
    }
  }else{
    iKeep <- !colnames(fData(BSRed)) %in% c(rNames,'Manual.Calls.R')
    fData <- data.frame(Cent.Deviation=rep(NA,nrow(BSRed)), Within.SD=rep(NA,nrow(BSRed)), HW.Chi2=rep(NA,nrow(BSRed)), HW.P=rep(NA,nrow(BSRed)), BAF.Locus1=rep(NA,nrow(BSRed)), BAF.Locus2=rep(NA,nrow(BSRed)), Call.Rate=rep(NA,nrow(BSRed)), Ped.Errors=rep(NA,nrow(BSRed)), Manual.Calls.R=character(nrow(BSRed)), stringsAsFactors=FALSE, row.names=featureNames(BSRed))
    fDesc <-  c('Largest distance from cluster-centre to its ideal position',
                'Largest within-cluster spread',
                'Chi-squared statistic from test of Hardy-Weinberg equilibrium',
                'Probability of Hardy-Weinberg equilibrium',
                'Estimated B-allele frequency',
                'Estimated BAF of second paralogue (if exists)',
                'Ratio of samples assigned to clusters',
                'Number of pedigree-errors, summed over offspring',
                'Genotype call from interactive clustering')
    fMetadata <- data.frame(labelDescription=fDesc, row.names=colnames(fData), stringsAsFactors=FALSE)
    featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
    featureData(BSRed) <- combine(featureData(BSRed)[,iKeep], featureData)
    prevCalls <- 'FAIL'
  }
  str0 <- 'Calling marker no. %d of %d...'
  for (i in 1:nrow(BSRed)){
    message(sprintf(str0,i,nrow(BSRed)))
    if (isCalled & !prevCalls[i]%in%'FAIL'){
      marker <- data.frame(Theta=Theta[i,], R=R[i,], Call=assayData(BSRed)$call[i,], PedigreeID=pData(BSRed)$PedigreeID, stringsAsFactors=FALSE)
      gg <- ggobi(marker[,c('Theta','R')],name='marker')
      glc <- marker$Call*4 + 1
      glc[is.na(glc)] <- 8
      marker$PedCheck <- validateSingleCall(marker,prevCalls[i])
      iErr <- marker$PedCheck>0
      glc[iErr] <- 5 + unlist(marker$PedCheck[iErr])
      glyph_colour(gg$marker) <- glc
      glyph_type(gg$marker) <-  rep(6,nrow(marker))
      glyph_type(gg$marker)[iErr] <- 7
      message(sprintf('Number of pedigree-errors: %d',sum(iErr)))
      finished <- readline(prompt='Keep current clusters? (y/n) ')
      OK <- finished %in% c('y','Y','yes','Yes','YES','1','T','TRUE')
      close(gg)
    }else{
      OK <- FALSE
    } #end if isCalled
    if (!OK){
      iMiss <- is.na(R[i,])|is.na(SE[i,])
      iDet <- R[i,]>mNoise & !iMiss & Theta[i,]>-.5 & Theta[i,]<1.5    #Filter bad markers
      if (sum(iDet)<=ncol(BSRed)*gO$detectLim){
        message(sprintf('Marker %s failed due to many missing samples',featureNames(BSRed)[i]))
        featureData(BSRed)@data$Manual.Calls.R[[i]] <- 'FAIL'
        assayData(BSRed)$call[i,] <- NA
        assayData(BSRed)$ped.check.parents[i,] <- NA
        assayData(BSRed)$ped.check[i,] <- NA
        featureData(BSRed)@data$Cent.Deviation[[i]] <- NA
        featureData(BSRed)@data$Within.SD[i] <- NA
        featureData(BSRed)@data$HW.Chi2[i] <- NA
        featureData(BSRed)@data$HW.P[i] <- NA
        featureData(BSRed)@data$BAF.Locus1[i] <- NA
        featureData(BSRed)@data$BAF.Locus2[i] <- NA
        featureData(BSRed)@data$Call.Rate[i] <- 0
        #featureData(BSRed)@data$Max.Rotation[i] <- NA
        featureData(BSRed)@data$Ped.Errors[i] <- 0
      }else{
        sclR <- median(R[i,iDet],na.rm=TRUE)*2*gO$rPenalty                   #Factor by which R is divided before clustering
        X <- matrix(cbind(Theta[i,],R[i,]/sclR,SE[i,])[iDet,],ncol=3)
        if (is.null(gO$probsIndSE)){
          indSE <- rep(TRUE,sum(iDet))
        }else{
          indSE <- X[,3]<quantile(X[,3],probs=gO$probsIndSE,na.rm=TRUE)      #Uncertain points removed from clustering
        } #end if is.null
        #qRange <- getQRange(X[indSE,1],minScl=3,probs=gO$probsQRange)     #Initial cluster-centers
        #sConf <- distLikelyConfiguration(X[indSE,1:2],polyCent)  #Ordered list of likely classifications
        #sConf <- getCenters(X[indSE,1],gO=gO,polyCent=polyCent)
        #maxConf <- length(sConf$ix)
        while (!OK){
          marker <- data.frame(Theta=Theta[i,],R=R[i,],PedigreeID=pData(BSRed)$PedigreeID,stringsAsFactors=FALSE)[iDet,]
          gg <- ggobi(marker[,c('Theta','R')],name='marker')
          glyph_colour(gg$marker) <- 8
          clList <- unlist(polyCent$classification)#[sConf$ix])
          nCand <- length(clList)
          message('Genotype-calling categories: ')
          for (k in 1:nCand)
            message(sprintf('%d: %s',k,clList[k]))
          message(sprintf('%d: FAIL',k+1))
          rngT <- range(marker$Theta)
          message(sprintf('(Range Theta: [%.2f, %.2f])',rngT[1],rngT[2]))
          iConf <- NA
          while (!iConf %in% 1:(nCand+1)){
            str1 <- 'Please indicate the correct marker-type: (in 1:8) '
            iConf <- as.numeric(readline(prompt=str1))
          }
          if (iConf!=(nCand+1)){
            #Suggest clusters
            nCl <- polyCent$size[iConf]#[sConf$ix[iConf]]
            cntIdeal <- polyCent$centers[[iConf]]#[[sConf$ix[iConf]]]
            centers <- getSpecificCenters(X[indSE,1], classification=polyCent$classification[[iConf]], gO=gO, breaks=seq(-.25,1.25,gO$binWidth), polyCent=polyCent)
            clObj <- findPolyploidClusters(X,indSE=indSE,centers=centers,wss.update=FALSE)
            
            #Quality-check and update clusters
            marker$Call <- cntIdeal[clObj$cluster]
            gg <- manualCall(marker,cntIdeal,clList[iConf],gg,close.gg=FALSE)
            marker$Call <- gg$marker$Call
            marker$PedCheck <- gg$marker$PedCheck
            if (nCl==1){
              clObj$cluster <- gg$marker$Call-cntIdeal+1
            }else{
              clObj$cluster <- (gg$marker$Call-cntIdeal[1])/diff(range(cntIdeal))*(nCl-1)+1
            }
            glc <- gg$marker$Call*4 + 1
            glc[is.na(glc)] <- 8
            glt <- rep(6,nrow(marker))
            newCent1 <- tapply(X[,1],clObj$cluster,median,na.rm=TRUE) #NB! median
            newCent2 <- tapply(X[,2],clObj$cluster,median,na.rm=TRUE) #NB! median
            clObj$centers[as.numeric(names(newCent1)),] <- cbind(newCent1,newCent2)
            clObj$centers[-as.numeric(names(newCent1)),] <- NA
            clObj$size <- rep(0,nCl)
            newSizes <- tapply(rep(1,nrow(X)),clObj$cluster,sum)
            clObj$size[as.numeric(names(newSizes))] <- newSizes
            
            tmpRes <- rep(NA,7)
            iCl <- which(clObj$size>0)
            tmpRes[1] <- max(abs(cntIdeal[iCl]-clObj$centers[iCl,1]))   #Max cluster-centre deviation (Theta)
            tmpRes[2] <- max(tapply(X[,1],clObj$cluster,sd))  #Max within-cluster spread (Theta)
            if (nCl>1){
              tmpRes[3:6] <- as.numeric(testHardyWeinberg(sizes=clObj$size,bestConf=iConf,polyCent=polyCent,afList=gO$afList)[c(1,3:5)])
            } #end if nCl
            nClRed <- length(iCl)
            clCtrs <- matrix(clObj$centers[iCl,],nrow=nClRed)
            indOut <- matrix(FALSE,nrow=nrow(X),ncol=nCl)
            indKeep <- rep(FALSE,nrow(X))
            indOverlap <- rep(FALSE,nrow(X))
            Sratio <- rep(1,nCl)
            for (j in iCl){
              indj <- clObj$cluster%in%j
              nj <- sum(indj)
              if (nj>gO$minClLim){    #Hotellings T^2-ellipse superimposed on clusters
                ctrX <- apply(X[indj,1:2],2,median,na.rm=TRUE)
                Xj <- X[indj,1:2]-matrix(ctrX,nrow=nj,ncol=2,byrow=TRUE)
                Sj <- t(Xj)%*%Xj/(nj-1)
                Sratio[j] <- Sj[1,1]/(Sj[2,2]*gO$rPenalty)  #Cluster 'angle', given comparable axes
                Xall <- X[,1:2]-matrix(ctrX,nrow=nrow(X),ncol=2,byrow=TRUE)
                T2j <- diag(Xall%*%solve(Sj)%*%t(Xall))
                f.alpha <- T2j*(nj-2)*nj/(2*(nj-1)*(nj+1))
                pVal <- 1 - pf(f.alpha,2,nj-2)
              }else{
                pVal <- rep(0,nrow(X))
                pVal[indj] <- 1
              }
              indOut[,j] <- pVal < gO$clAlpha
              indKeep[indj] <- !indOut[indj,j]
              indOverlap[!indOut[,j]][clObj$cluster[!indOut[,j]]!=j] <- TRUE
            } #end for j
            indKeep <- indKeep &! indOverlap
            tmpRes[7] <- sum(indKeep)/ncol(BSRed)
            
            #Update calls, pedCheck, and scatterplot 
            glc[!indKeep] <- rep(8,sum(!indKeep))
            glyph_colour(gg$marker) <- glc
            marker$Call[!indKeep] <- gg$marker$Call[!indKeep] <- clObj$cluster[!indKeep] <- NA
            indSel <- marker$PedCheck>0
            marker$PedCheck[indSel] <- gg$marker$PedCheck[indSel] <- validateSingleCall(marker[indSel,],clList[iConf])
            iErr <- marker$PedCheck>0
            glyph_colour(gg$marker)[iErr] <- 5 + unlist(marker$PedCheck[iErr])
            glyph_type(gg$marker) <- glt
            glyph_type(gg$marker)[iErr] <- 7
            
            message(sprintf('Hardy-Weinberg p-value: %.2g',tmpRes[4]))
            message(sprintf('Call-rate: %.2g',tmpRes[7]))
            message(sprintf('Number of pedigree-errors: %d',sum(iErr)))
            finished <- readline(prompt='Save results and move on to next marker? (y/n) ')
          }else{
            finished <- readline(prompt='Fail marker and move on? (y/n) ')
          } #end if iConf!=8
          OK <- finished %in% c('y','Y','yes','Yes','YES','1','T','TRUE')
          close(gg)
        } #end while !OK
        if (iConf!=8){
          assayData(BSRed)$call[i,iDet][indKeep] <- cntIdeal[clObj$cluster][indKeep]
          assayData(BSRed)$call[i,!iDet] <- NA
          assayData(BSRed)$call[i,iDet][!indKeep] <- NA
          assayData(BSRed)$ped.check.parents[i,iDet] <- marker$PedCheck
          assayData(BSRed)$ped.check.parents[i,!iDet] <- NA
          assayData(BSRed)$ped.check[i,] <- NA
          assayData(BSRed)$ped.check[i,iDet][marker$PedCheck%in%0] <- TRUE
          assayData(BSRed)$ped.check[i,iDet][marker$PedCheck%in%1] <- FALSE
          featureData(BSRed)@data$Manual.Calls.R[[i]] <- polyCent$classification[[iConf]]#[[sConf$ix[iConf]]]
          featureData(BSRed)@data$Cent.Deviation[[i]] <- tmpRes[1]
          featureData(BSRed)@data$Within.SD[i] <- tmpRes[2]
          featureData(BSRed)@data$HW.Chi2[i] <- tmpRes[3]
          featureData(BSRed)@data$HW.P[i] <- tmpRes[4]
          featureData(BSRed)@data$BAF.Locus1[i] <- tmpRes[5]
          featureData(BSRed)@data$BAF.Locus2[i] <- tmpRes[6]
          featureData(BSRed)@data$Call.Rate[i] <- tmpRes[7]
          #featureData(BSRed)@data$Max.Rotation[i] <- max(Sratio)
          featureData(BSRed)@data$Ped.Errors[i] <- sum(iErr)
        }else{
          featureData(BSRed)@data$Manual.Calls.R[[i]] <- 'FAIL'
          assayData(BSRed)$call[i,] <- NA
          assayData(BSRed)$ped.check.parents[i,] <- NA
          assayData(BSRed)$ped.check[i,] <- NA
          featureData(BSRed)@data$Cent.Deviation[[i]] <- NA
          featureData(BSRed)@data$Within.SD[i] <- NA
          featureData(BSRed)@data$HW.Chi2[i] <- NA
          featureData(BSRed)@data$HW.P[i] <- NA
          featureData(BSRed)@data$BAF.Locus1[i] <- NA
          featureData(BSRed)@data$BAF.Locus2[i] <- NA
          featureData(BSRed)@data$Call.Rate[i] <- 0
          #featureData(BSRed)@data$Max.Rotation[i] <- NA
          featureData(BSRed)@data$Ped.Errors[i] <- 0
        } #end if iConf!=8
      } #end if sum(iDet)
    }else{
      featureData(BSRed)@data$Manual.Calls.R[[i]] <- featureData(BSRed)@data$Classification[[i]]
      assayData(BSRed)$ped.check.parents[i,] <- marker$PedCheck
    } #end if !OK
  } #end for i
  validObject(BSRed)
  BSRed
}



#Scales each value of the pooled SE with the arclength of the first quadrant
#semi-circle of radius 'R', as the arclength is one for all values of
#'R' in the theta-R plot. The arclength is defined by 'p' <- the norm of the
#distance measure used. If p=1 (manhattan) a=sqrt(2)*r, if p=2 (euclidean)
#a=pi/2*r, if p=inf (infinity-norm) a=2r.
findSeTheta <- function(pooledSE.raw,R,dist='manhattan',pNorm=NULL){
  integrand <- function(th,pNorm){
    2/pNorm*sqrt(cos(th)^(4/pNorm-2)*sin(th)^2+sin(th)^(4/pNorm-2)*cos(th)^2)
  }
  switch(dist,
         euclidean={scale <- pi*R/2},
         manhattan={scale <- sqrt(2)*R},
         minkowski={
           unitLength <- integrate(integrand,lower=0,upper=pi/2,pNorm=pNorm)
           scale <- unitLength$value*R
         })
  pooledSE.theta <- pooledSE.raw/scale
  pooledSE.theta
}
  


#Generates a list of possible cluster-alternatives
generatePolyCenters <- function(ploidy){
  centers <- list()
  classification <- list()
  if (ploidy=='di'){
    centers[[1]] <- 0               #Only A's
    centers[[2]] <- 1               #Only B's
    centers[[3]] <- c(0,.5,1)            #Regular SNP
    classification[1] <- 'MONO-a'
    classification[2] <- 'MONO-b'
    classification[3] <- 'SNP'
  }else if(ploidy=='tetra'){
    centers[[1]] <- 0               #Only A's
    centers[[2]] <- .5              #PSV or MSV with small minor AF
    centers[[3]] <- 1               #Only B's
    centers[[4]] <- c(0,.5,1)            #Regular SNP
    centers[[5]] <- c(0,.25,.5)          #MSV with one paralogue fixed AA
    centers[[6]] <- c(.5,.75,1)          #MSV with one paralogue fixed BB
    centers[[7]] <- c(0,.25,.5,.75,1)    #MSV with segregation in both paralogues
    classification[1] <- 'MONO-a'
    classification[2] <- 'PSV'
    classification[3] <- 'MONO-b'
    classification[4] <- 'SNP'
    classification[5] <- 'MSV-a'
    classification[6] <- 'MSV-b'
    classification[7] <- 'MSV-5'
  }else if(ploidy=='tetra.red'){
    centers[[1]] <- 0               #Only A's
    centers[[2]] <- .5              #PSV or MSV with small minor AF
    centers[[3]] <- 1               #Only B's
    centers[[4]] <- c(0,.5,1)            #Regular SNP
    centers[[5]] <- c(0,.25,.5)          #MSV with one paralogue fixed AA
    centers[[6]] <- c(.5,.75,1)          #MSV with one paralogue fixed BB
    classification[1] <- 'MONO-a'
    classification[2] <- 'PSV'
    classification[3] <- 'MONO-b'
    classification[4] <- 'SNP'
    classification[5] <- 'MSV-a'
    classification[6] <- 'MSV-b'
  }else{
    stop("Allowed values for ploidy are 'di', 'tetra', or 'tetra.red'")
  }
  polyCent <- list()
  polyCent$centers <- centers
  polyCent$classification <- classification
  polyCent$size <- sapply(polyCent$centers,length)
  polyCent
}



#Estimates likely range and cluster-centres
getCenters <- function(theta,gO=setGenoOptions(),breaks=seq(-.25,1.25,gO$binWidth),polyCent=generatePolyCenters(ploidy=gO$ploidy)){
##### for testing:
  #breaks <- seq(-.25,1.25,.1)
  #theta <- assayData(BSRed)$theta[125,]; theta <- theta[theta>breaks[1]&theta<max(breaks)]
  #hX <- hist(theta,breaks=breaks,plot=TRUE,ylim=c(0,50));abline(h=gO$minBin-1)
#####
  
  theta <- theta[theta>breaks[1] & theta<max(breaks)]
  nSamp <- length(theta)
  polyCl <- findClusters(theta=theta,breaks=breaks,minBin=gO$minBin)
  if (polyCl$nCl<3){
    breaks1 <- seq(breaks[1],max(breaks),gO$binWidth/2)
    polyCl1 <- findClusters(theta=theta,breaks=breaks1,minBin=gO$minBin)
    if (all(polyCl1$clSizes>nSamp/100)){
      polyCl <- polyCl1
    }
  }
  centers <- list()
  if (polyCl$nCl==0){
    ix <- NULL
  }else if (gO$ploidy%in%'tetra'){
    i1 <- polyCent$size==1
    i3 <- polyCent$size==3
    i5 <- polyCent$size==5
    if (polyCl$nCl==1){
      vec1 <- unlist(polyCent$centers[i1])
      centers[[1]] <- sum(polyCl$clPeaks*polyCl$clSizes)/sum(polyCl$clSizes)
      sqDev1 <- (vec1 - centers[[1]])^2/c(2,1,2)   #Disfavour PSVs
      cent3 <- polyCent$centers[[4]]
      indClose <- which(cent3 %in% (round(polyCl$clPeaks*2)/2))
      cent3[indClose] <- polyCl$clPeaks
      centers[[2]] <- cent3
      ix <- c(which(i1)[order(sqDev1)[1]],4)
    }else if (polyCl$nCl==2){
      ix <- numeric(3)
      cent5 <- polyCent$centers[[7]]
      indClose <- cent5 %in% (round(polyCl$clPeaks*4)/4)
      if (sum(indClose)==1){
        cDist <- sapply(polyCl$clPeaks,function(x,cent5) (x-cent5)^2, cent5)
        orderC <- order(cDist[indClose,])
        cDist[indClose,orderC[2]] <- 10
        indClose[order(cDist[,orderC[2]])[1]] <- TRUE
      }
      indClose <- which(indClose)
      closerAA <- sum((polyCl$clPeaks)^2) < sum((polyCl$clPeaks-1)^2)
      
      if (sum(c(1,3,5)%in%indClose)==2){  #1-3, 3-5, 1-5
        cent5[indClose] <- polyCl$clPeaks
        if (closerAA){
          ii2 <- 5
        }else{
          ii2 <- 6
        }
        cent3 <- polyCent$centers[[ii2]]
        indClose1 <- which(rank((cent3-polyCl$clPeaks[order(polyCl$clSizes)[2]])^2)==1)
        cent3[indClose1] <- polyCl$clPeaks[order(polyCl$clSizes)[2]]
        centers[1:2] <- list(cent5[c(1,3,5)],cent3)
        ix[1:2] <- c(4,ii2)
      }else if(sum(1:3%in%indClose)==2){   #1-2, 2-3
        cent5[indClose] <- polyCl$clPeaks
        cent3 <- polyCent$centers[[4]]
        cent3[1:2] <- polyCl$clPeaks
        centers[1:2] <- list(cent5[1:3],cent3)
        ix[1:2] <- c(5,4)
      }else if(sum(3:5%in%indClose)==2){   #4-5, 3-4
        cent5[indClose] <- polyCl$clPeaks
        cent3 <- polyCent$centers[[4]]
        cent3[2:3] <- polyCl$clPeaks
        centers[1:2] <- list(cent5[3:5],cent3)
        ix[1:2] <- c(6,4)
      }else{                               #1-4, 2-4, 2-5
        cent3 <- polyCent$centers[[4]]
        indClose <- which(cent3 %in% (round(polyCl$clPeaks*2)/2))
        if (length(indClose)==1){
          if (closerAA){                                #clusters closer to theta=0
            indClose <- which(cent3 %in% (floor(pmax(polyCl$clPeaks,0)*2)/2))
          }else{                                        #clusters closer to theta=1
            indClose <- which(cent3 %in% (ceiling(pmin(polyCl$clPeaks,1)*2)/2))
          }
        }
        cent3[indClose] <- polyCl$clPeaks
        if (closerAA){
          ii2 <- 5
        }else{
          ii2 <- 6
        }
        cent3a <- polyCent$centers[[ii2]]
        indClose1 <- which(rank((cent3a-polyCl$clPeaks[order(polyCl$clSizes)[2]])^2)==1)
        cent3a[indClose1] <- polyCl$clPeaks[order(polyCl$clSizes)[2]]
        centers[1:2] <- list(cent3,cent3a)
        ix[1:2] <- c(4,ii2)
      }
      vec1 <- unlist(polyCent$centers[i1])
      centers[[3]] <- sum(polyCl$clPeaks*polyCl$clSizes)/sum(polyCl$clSizes)
      sqDev1 <- (vec1 - centers[[3]])^2
      ix[3] <- which(i1)[order(sqDev1)[1]]
    }else if (polyCl$nCl==3){
      ix <- numeric(3)
      arr3 <- t(array(unlist(polyCent$centers[i3]),dim=c(sum(i3),3)))
      indx <- apply(arr3,1,function(x,clPeaks,lim) max(abs(x-clPeaks))<=lim,polyCl$clPeaks,gO$devCentLim)  #Don't attempt if devCentLim is exceeded
      if (any(indx)){
        arr3 <- matrix(arr3[indx,],nrow=sum(indx),byrow=TRUE)
        sqDev3 <- apply(arr3,1,function(x) sum((x-polyCl$clPeaks)^2))/c(2,1,1)[indx] #alt: apply(arr3,1,function(x) diff(range(x)))
        ix[1] <- which(i3)[indx][order(sqDev3)[1]]
        centers[[1]] <- polyCl$clPeaks
      }else if (diff(range(theta))<.25){
        vec1 <- unlist(polyCent$centers[i1])
        centers[[1]] <- sum(polyCl$clPeaks*polyCl$clSizes)/sum(polyCl$clSizes)
        sqDev1 <- (vec1 - centers[[1]])^2/c(2,1,2)   #Disfavour PSVs
        ix[1] <- which(i1)[order(sqDev1)[1]]
      }else{
        ix[1] <- 7
        centers[[1]] <- c(0,polyCl$clPeaks,1)
      }
      breaks1 <- seq(breaks[1],max(breaks),gO$binWidth/2)
      polyCl1 <- findClusters(theta=theta,breaks=breaks1,minBin=gO$minBin)
      if (polyCl1$nCl==5){
        centers[[2]] <- polyCl1$clPeaks
      }else{
        centers[[2]] <- seq(polyCl$clPeaks[1],polyCl$clPeaks[3],diff(range(polyCl$clPeaks))/4)
      }
      ix[2] <- 7
      
      diffPeaks <- diff(polyCl$clPeaks)
      iSame <- c(diffPeaks<=min(diffPeaks),FALSE)
      if (sum(iSame)==1){
        cent3 <- polyCent$centers[[4]]
        clPeaksRed <- polyCl$clPeaks[!iSame]
        indx <- which(iSame)
        clPeaksRed[indx] <- mean(polyCl$clPeaks[indx:(indx+1)])
        indClose <- which(cent3 %in% (round(clPeaksRed*2)/2))
        if (length(indClose)==1){
          closerAA <- sum((clPeaksRed)^2) < sum((clPeaksRed-1)^2)
          if (closerAA){                                #clusters closer to theta=0
            indClose <- c(1,2)
          }else{                                        #clusters closer to theta=1
            indClose <- c(2,3)
          }
        }
        cent3[indClose] <- clPeaksRed
        centers[[3]] <- cent3
        ix[3] <- 4
      }else{
        vec1 <- unlist(polyCent$centers[i1])
        centers[[3]] <- sum(polyCl$clPeaks*polyCl$clSizes)/sum(polyCl$clSizes)
        sqDev1 <- (vec1 -  centers[[3]])^2
        ix[3] <- which(i1)[order(sqDev1)[1]]
      }
    }else if (polyCl$nCl==4){
      cent5 <- polyCent$centers[[7]]
      #minDist <- tapply(cent5,1:5,function(x,clPeaks) min((x-clPeaks)^2),polyCl$clPeaks)
      minDist <- sapply(cent5,function(x,clPeaks) min((x-clPeaks)^2),polyCl$clPeaks)
      indClose <- !rank(minDist,ties.method='max')==5
      if (sum(indClose)<4){
        if (sum((polyCl$clPeaks)^2) < sum((polyCl$clPeaks-1)^2)){   #clusters closer to theta=0
          indClose <- c(rep(TRUE,4),FALSE)
        }else{                                        #clusters closer to theta=1
          indClose <- c(FALSE,rep(TRUE,4))
        }
      }
      cent5[indClose] <- polyCl$clPeaks
      centers[[1]] <- cent5
      breaks1 <- seq(breaks[1],max(breaks),gO$binWidth/2)
      polyCl1 <- findClusters(theta=theta,breaks=breaks1,minBin=gO$minBin)
      if (polyCl1$nCl==5){
        centers[[2]] <- polyCl1$clPeaks
      }else{
        centers[[2]] <- seq(polyCl$clPeaks[1],polyCl$clPeaks[4],diff(range(polyCl$clPeaks))/4)
      }
      diffPeaks <- diff(polyCl$clPeaks)
      iSame <- c(diffPeaks<=min(diffPeaks),FALSE)
      if (sum(iSame)==1){
        clPeaksRed <- polyCl$clPeaks[!iSame]
        indx <- which(iSame)
        clPeaksRed[indx] <- mean(polyCl$clPeaks[indx:(indx+1)])
      }else{
        clPeaksRed <- c(polyCl$clPeaks[1],mean(polyCl$clPeaks[2:3]),polyCl$clPeaks[4])
      }
      centers[[3]] <- clPeaksRed
      arr3 <- t(array(unlist(polyCent$centers[i3]),dim=c(sum(i3),3)))
      sqDev3 <- apply(arr3,1,function(x) sum((x-clPeaksRed)^2))/c(2,1,1) #alt: apply(arr3,1,function(x) diff(range(x)))
      ix <- c(7,7,which(i3)[order(sqDev3)[1]])
    }else if (polyCl$nCl==5){
      centers[[1]] <- polyCl$clPeaks
      #centers[[2]] <- seq(0,1,.25)
      centers[[2]] <- c(polyCl$clPeaks[1],mean(polyCl$clPeaks[2:4]),polyCl$clPeaks[5])
      ix <- c(7,4)
    }else{
      ix <- c(4,7)
      centers <- polyCent$centers[ix]
    } #end if polyCl$nCl
  }else{
    ix <- 1:length(polyCent$classification)
    centers <- polyCent$centers
    warning('Only ploidy="tetra" is implemented. A non-ranked list of starting points
is returned for other ploidy, however accurate clustering is NOT expected')
  #}else if (gO$ploidy%in%'di'){
  #  i1 <- polyCent$size==1
  #  i3 <- polyCent$size==3
  #  if (polyCl$nCl==1){
  #    
  #  }else if (polyCl$nCl==2){
  #    
  #  }else if (polyCl$nCl==3){
  #    
  #  }else{
  #    
  #  } #end if polyCl$nCl==1
  } #end if polyCl$nCl==0
  sConf <- list(ix=ix,centers=centers)
  sConf
}



getSpecificCenters <- function(theta,classification,gO=setGenoOptions(),breaks=seq(-.25,1.25,gO$binWidth),polyCent=generatePolyCenters(ploidy=gO$ploidy)){
##### for testing:
  #breaks <- seq(-.25,1.25,.1)
  #theta <- assayData(BSRed)$theta[8,]; theta <- theta[theta>breaks[1]&theta<max(breaks)]
  #hX <- hist(theta,breaks=breaks,plot=TRUE,ylim=c(0,50));abline(h=gO$minBin-1)
#####
  
  theta <- theta[theta>breaks[1] & theta<max(breaks)]
  nSamp <- length(theta)
  polyCl <- findClusters(theta=theta,breaks=breaks,minBin=gO$minBin)
  iConf <- which(polyCent$classification %in% classification)
  if (polyCl$nCl==polyCent$size[iConf]){
    return(polyCl$clPeaks)
  }else if (polyCl$nCl<polyCent$size[iConf]){
    breaks1 <- seq(breaks[1],max(breaks),gO$binWidth/2)
    polyCl1 <- findClusters(theta=theta,breaks=breaks1,minBin=gO$minBin)
    if (polyCl1$nCl==polyCent$size[iConf]){
      return(polyCl1$clPeaks)
    }
  }else{
    breaks1 <- seq(breaks[1],max(breaks),gO$binWidth*2)
    polyCl1 <- findClusters(theta=theta,breaks=breaks1,minBin=gO$minBin)
    if (polyCl1$nCl==polyCent$size[iConf]){
      return(polyCl1$clPeaks)
    }
  }
  return(polyCent$centers[[iConf]])
}


  
#Suggests clusters based on histograms
findClusters <- function(theta,breaks=seq(-.25,1.25,.05),minBin=2,plot=FALSE){
  hX <- hist(theta,breaks=breaks,plot=plot)
  spec <- hX$counts
  spec[spec<minBin] <- 0
  spec <- c(0,spec,0)
  indPeaks <- which(apply(embed(spec,3),1,function(x) rank(x)[2]>2))
  clPeaks <- hX$mids[indPeaks]
  iSame <- c(diff(indPeaks)==1,FALSE)
  if (any(iSame)){
    clPeaksRed <- clPeaks[!iSame]
    indx <- which(iSame)
    for (i in 1:sum(iSame)){
      clPeaksRed[indx[1]+1-i] <- mean(clPeaks[indx[1]:(indx[1]+1)])
      indx <- indx[-1]
    }
    clPeaks <- clPeaksRed
  }
  nCl <- length(clPeaks)
  clSizes <- apply(matrix(embed(c(0,hX$counts,0),3)[indPeaks[!iSame],],nrow=nCl),1,sum)
  polyCl <- list(clPeaks=clPeaks,clSizes=clSizes,nCl=nCl)
  polyCl
}



findPolyploidClusters <- function(X,indSE=rep(TRUE,nrow(X)),centers,plot=FALSE,wss.update=TRUE,...){
  clObj <- NULL
  nCl <- length(centers)
  if (sum(indSE)>=nCl+1){   #Requires more samples than groups
    breaks <- c(-2,-.25,centers[-nCl]+diff(centers)/2,1.25,2)
    hX <- hist(X[indSE,1],breaks=breaks,plot=plot,...)
    indCall <- hX$counts[-c(1,nCl+2)]>0
  }else{
    stop('k-means algorithm requires more samples than cluster-centers')
  }
  centers <- cbind(centers,rep(NA,nCl))
  if (sum(indCall)>1){
    for (i in which(indCall)){
      indi <- X[indSE,1]>hX$breaks[i+1] & X[indSE,1]<=hX$breaks[i+2]+1e-7  #Accounts for numerical tolerance of 'hist'
      centers[i,2] <- median(X[indSE,2][indi],na.rm=TRUE)
    }
    clObj <- try(kmeans(X[indSE,1:2],centers[indCall,]),silent=TRUE)
    if (inherits(clObj, "try-error")){
      message(clObj)
      warning('Hartigan-Wong clustering failed for one marker (kmeans).\n  Will attempt MacQueen-algorithm instead.')
      clObj <- kmeans(X[indSE,1:2],centers[indCall,],algorithm='MacQueen')
    }
  }else if (sum(indCall)==1){
    clObj$centers <- matrix(apply(X[indSE,1:2],2,median,na.rm=TRUE),nrow=1)
    clObj$cluster <- rep(1,sum(indSE))
  }else{
    stop('No clusters found')
  }

  ## Assign left-outs to clusters
  clCluster <- rep(NA,nrow(X))
  clCluster[indSE] <- which(indCall)[clObj$cluster]
  centers[indCall,] <- clObj$centers
  centers[!indCall,2] <- mean(X[!indSE,2],na.rm=TRUE)
  if (!all(indSE)){
    #dAll <- (matrix(clObj$centers[,1],nrow=sum(!indSE),ncol=sum(indCall),byrow=TRUE) - matrix(X[!indSE,1],nrow=sum(!indSE),ncol=sum(indCall)))^2 + (matrix(clObj$centers[,2],nrow=sum(!indSE),ncol=sum(indCall),byrow=TRUE) - matrix(X[!indSE,2],nrow=sum(!indSE),ncol=sum(indCall)))^2
    dAll <- (matrix(centers[,1],nrow=sum(!indSE),ncol=nCl,byrow=TRUE) - matrix(X[!indSE,1],nrow=sum(!indSE),ncol=nCl))^2 + (matrix(centers[,2],nrow=sum(!indSE),ncol=nCl,byrow=TRUE) - matrix(X[!indSE,2],nrow=sum(!indSE),ncol=nCl))^2
    clCluster[!indSE] <- apply(dAll,1,function(x) which(rank(x,ties.method='random')==1))
  }
  #for (a in 1:nCl){
  #  if (!indCall[a]){
  #    clCluster[clCluster>=a] <- clCluster[clCluster>=a] + 1
  #  }
  #}
  clObj$cluster <- clCluster

  ## Update cluster-centers etc.
  clObj$centers <- matrix(NA,nrow=nCl,ncol=2)
  newCent1 <- tapply(X[,1],clCluster,median,na.rm=TRUE) #NB! median
  newCent2 <- tapply(X[,2],clCluster,median,na.rm=TRUE) #NB! median
  clObj$centers[as.numeric(names(newCent1)),] <- cbind(newCent1,newCent2)
  
  clObj$size <- rep(0,nCl)
  newSizes <- tapply(clCluster,clCluster,length)
  clObj$size[as.numeric(names(newSizes))] <- newSizes

  if (sum(!indSE)>0){
    clObj$withinss <- rep(NA,nCl)
    if (wss.update){   #Save time by not calculating withinss
      ws1 <- tapply(X[,1],clCluster,function(x) (x-rep(mean(x,na.rm=TRUE),length(x)))^2)
      ws2 <- tapply(X[,2],clCluster,function(x) (x-rep(mean(x,na.rm=TRUE),length(x)))^2)
      newWss <- rep(NA,nCl)
      for (i in 1:nCl){
        newWss[i] <- sum(ws1[[i]] + ws2[[i]])
      }
      clObj$withinss[as.numeric(names(ws1))] <- newWss
    }
  }
  class(clObj) <- 'kmeans'
  clObj
}



#Null hypothesis: The population is in Hardy-Weinberg frequencies.
#Different allele-frequencies for testing tetraploid loci given in afList.
#A value of afList=.5 means that the AF is the same at both loci.
testHardyWeinberg <- function(sizes,bestConf,polyCent=generatePolyCenters(ploidy='di'),afList=seq(0,.5,.05)){
  if (is.character(bestConf))
    bestConf <- which(polyCent$classification==bestConf)
  nPheno <- polyCent$size[bestConf]
  nTot <- sum(sizes)
  af <- rep(NA,2)
  if (nPheno==1){
    chiSq <- 0
    df <- 1   #dummy-variable
    if (bestConf==1){
      af[1] <- 1
    }else if (bestConf==2){
      af <- 0:1
    }else if (bestConf==3){
      af[1] <- 0
    }
  }else if (nPheno==3){
    p <- (2*sizes[1]+sizes[2])/(2*nTot)
    if (any(p==c(0,1))){
      chiSq <- 0
    }else{
      q <- 1-p
      expCounts <- rep(NA,nPheno)
      expCounts[1] <- p^2*nTot          #AA
      expCounts[2] <- 2*p*q*nTot        #AB
      expCounts[3] <- q^2*nTot          #BB
      chiSq <- sum((sizes-expCounts)^2/expCounts)
    }
    df <- 1
    af[1] <- p
    if (bestConf==5){
      af[2] <- 1
    }else if (bestConf==6){
      af[2] <- 0
    }
  }else if (nPheno==5){
    p <- (4*sizes[1]+3*sizes[2]+2*sizes[3]+sizes[4])/(4*nTot)  #'sizes' contains the 5 cluster-sizes
    if (any(p==c(0,1))){
      chiSq <- 0
      af <- rep(p,2)
    }else{
      iLim <- min(1-1/(2*p),1/(2*p))
      if (any(afList<iLim))  #Negative af's not allowed
        afList <- c(iLim,afList[afList>iLim])
      nTry <- length(afList)   #Try different allowed weights between p1 and p2
      expCounts <- matrix(nrow=nTry,ncol=nPheno)
      chiSqs <- rep(Inf,nTry)
      afs <- matrix(nrow=nTry,ncol=2)
      for (i in 1:nTry){
        p1 <- 2*p*afList[i]
        p2 <- 2*p*(1-afList[i])   #mean(AFs)=p (AFs sum to 2*p)
        if (all(c(p1,p2)<=1)){
          q1 <- 1-p1
          q2 <- 1-p2
          expCounts[i,1] <- p1^2*p2^2*nTot              #AAAA
          expCounts[i,2] <- 2*p1*p2*(p1*q2+p2*q1)*nTot      #AAAB
          expCounts[i,3] <- (p1^2*q2^2+p2^2*q1^2+4*p1*q1*p2*q2)*nTot #AABB
          expCounts[i,4] <- 2*q1*q2*(p1*q2+p2*q1)*nTot      #ABBB
          expCounts[i,5] <- q1^2*q2^2*nTot          #BBBB
          indi <- expCounts[i,]>0
          chiSqs[i] <- sum((sizes[indi]-expCounts[i,indi])^2/expCounts[i,indi])
          afs[i,] <- c(p1,p2)
        }
      }
      sChi <- sort(chiSqs,index.return=TRUE)
      chiSq <- chiSqs[sChi$ix[1]]
      af <- afs[sChi$ix[1],]
    }
    df <- 3    #Number of genotypes/clusters - number of alleles
  }else{
    stop(paste('HW-testing not implemented for',nPheno,'cluster(s)'))
  }
  pVal <- pchisq(chiSq,df=df,lower.tail=FALSE)
  HWstats <- data.frame(chiSq=chiSq,df=df,pVal=pVal,af1=af[1],af2=af[2])
  HWstats
}



#Validates BSRed, returns updated BSRed
#PedigreeID given as '<p><mmm><fff><oo>', where 'p', 'mmm', 'fff' and 'oo' are unique
#identifiers for population, mother, father, and individual within full-sib group,
#respectively. '000' means founding parent, '999' means unknown.
validateCallsPedigree <- function(BSRed){
  assayData(BSRed)$ped.check <- matrix(nrow=nrow(BSRed),ncol=ncol(BSRed),dimnames=list(featureNames(BSRed),sampleNames(BSRed)))
  if (!'PedigreeID' %in% colnames(pData(BSRed))){
    stop('PedigreeID not provided!')
  }
  sampIDs <- pData(BSRed)$PedigreeID
  indFamily <- unique(substr(sampIDs,1,7)[!sampIDs%in%''])
  indFounding <- substr(indFamily,2,4)=='000' | substr(indFamily,5,7)=='000'
  indFamily <- indFamily[!indFounding]
  nFull <- length(indFamily)
  
  #Establish correct classification and adjust MSV3-calls to diploid
  callType <- fData(BSRed)$Classification
  if (!is.null(fData(BSRed)$Manual.Calls.R)){
    iManual <- !fData(BSRed)$Manual.Calls.R%in%''
    callType[iManual] <- fData(BSRed)$Manual.Calls.R[iManual]
  }
  callMatr <- assayData(BSRed)$call
  callMatr[callType%in%'MSV-a',] <- callMatr[callType%in%'MSV-a',]*2
  callMatr[callType%in%'MSV-b',] <- callMatr[callType%in%'MSV-b',]*2-1

  for (j in 1:nFull){
    jf <- sampIDs %in% paste(substr(indFamily[j],1,1),'000',substr(indFamily[j],5,7),'00',sep='')
    jm <- sampIDs %in% paste(substr(indFamily[j],1,4),'00000',sep='')
    j.fam <- substr(sampIDs,1,7) %in% indFamily[j] &! (jf|jm)
    pCalls <- matrix(callMatr[,jf|jm],nrow=nrow(BSRed))
    oCalls <- matrix(callMatr[,j.fam],nrow=nrow(BSRed))
    if (ncol(pCalls)==1){
      assayData(BSRed)$ped.check[,j.fam] <- oCalls>=rep(pCalls,sum(j.fam))-.5 & oCalls<=rep(pCalls,sum(j.fam))+.5
    }else if (ncol(pCalls)==2){
      indNA <- apply(pCalls,1,function(x) any(is.na(x)))
      indLH <- pCalls[,1]<=.5 & pCalls[,2]>=.5 & !indNA
      assayData(BSRed)$ped.check[indLH,j.fam] <- oCalls[indLH,]<=rep(pCalls[indLH,1],sum(j.fam))+.5 & oCalls[indLH,]>=rep(pCalls[indLH,2],sum(j.fam))-.5
      indHL <- pCalls[,2]<=.5 & pCalls[,1]>=.5 & !indLH & !indNA
      assayData(BSRed)$ped.check[indHL,j.fam] <- oCalls[indHL,]<=rep(pCalls[indHL,2],sum(j.fam))+.5 & oCalls[indHL,]>=rep(pCalls[indHL,1],sum(j.fam))-.5
      indL <- which(apply(pCalls,1,function(x) all(x<.5)))
      assayData(BSRed)$ped.check[indL,j.fam] <- oCalls[indL,]<=apply(matrix(pCalls[indL,],nrow=length(indL),ncol=2),1,sum)
      indH <- which(apply(pCalls,1,function(x) all(x>.5)))
      assayData(BSRed)$ped.check[indH,j.fam] <- oCalls[indH,]>=apply(matrix(pCalls[indH,],nrow=length(indH),ncol=2),1,sum)-1
      indNA1 <- apply(pCalls,1,function(x) sum(is.na(x))==1)
      pRed <- t(pCalls[indNA1,])[!is.na(t(pCalls[indNA1,]))]
      assayData(BSRed)$ped.check[indNA1,j.fam] <- oCalls[indNA1,]>=rep(pRed,sum(j.fam))-.5 & oCalls[indNA1,]<=rep(pRed,sum(j.fam))+.5
    }else if (ncol(pCalls)>2){
      stop('More than two parents. Genotyped repeatedly, or ped-error?')
    }
  }
  nErrors <- apply(assayData(BSRed)$ped.check,1,function(x) sum(!x,na.rm=TRUE))
  fData <- data.frame(Ped.Errors=nErrors)
  fMetadata <- data.frame(labelDescription='Number of pedigree-errors, summed over offspring')
  featureData <- new('AnnotatedDataFrame', data=fData, varMetadata=fMetadata, dimLabels=c('featureNames','featureColumns'))
  indx <- !colnames(fData(BSRed))%in%'Ped.Errors'
  featureData(BSRed) <- combine(featureData(BSRed)[,indx], featureData)
  validObject(BSRed)
  BSRed
}



#Calculates the ratio of non-FAILed markers which are called for each array.
#If ped-error flag is TRUE, calls violating pedigree count as missing. 
countFailedSNP <- function(BSRed,inclPedErrors=TRUE){
  classMatr <- assayData(BSRed)$call
  if (inclPedErrors & ! is.null(assayData(BSRed)$ped.check)){
    classMatr[!assayData(BSRed)$ped.check] <- NA
    lStr <- 'Ratio of passed markers (ped-errors count as failed)'
  }else{
    lStr <- 'Ratio of passed markers (ped-errors not considered)'
  }
  iPass <- !fData(BSRed)$Classification %in% c('FAIL','MONO-filt')
  nErrors <- apply(classMatr[iPass,],2,function(x) sum(is.na(x)))
  pData <- data.frame(passRatio = 1-nErrors/sum(iPass))
  pMetadata <- data.frame(labelDescription = lStr)
  phenoData <- new('AnnotatedDataFrame', data=pData, varMetadata=pMetadata, dimLabels=c('sampleNames','sampleColumns'))
  phenoData(BSRed) <- combine(phenoData(BSRed),phenoData)
  validObject(BSRed)
  BSRed
}



#Plotting function
plotGenotypes <- function(BSRed,markers=1:min(nrow(BSRed),64),indHighlight=NULL,ploidy='tetra',indicate.SE=FALSE,retFrames=FALSE,nC=NULL,mai=NULL,mNoise=NULL,main=NULL){
  nSNPs <- length(markers)
  pp <- par()
  if (is.null(nC)){
    nC <- ceiling(sqrt(nSNPs))
  }
  nR <- ceiling(nSNPs/nC)
  if (is.null(mai)){
    mai <- c(1,1,1,.5)/sqrt(min(nC,nR))
  }
  par(mfrow=c(nR,nC),mai=mai,bg='white')
  
  Theta <- matrix(assayData(BSRed)$theta[markers,],nrow=nSNPs)
  R <- matrix(assayData(BSRed)$intensity[markers,],nrow=nSNPs)
  Call <- matrix(assayData(BSRed)$call[markers,],nrow=nSNPs)
  if (indicate.SE)
    callProb <- 1/matrix(assayData(BSRed)$SE[markers,],nrow=nSNPs)
  pedCheckP <- assayData(BSRed)$ped.check.parents[markers,]
  if (!is.null(pedCheckP)){  #Use 'ped.check.parents' only if valid for all markers
    classR <- fData(BSRed)$Manual.Calls.R[markers]
    if (is.null(classR)){
      ii <- rep(TRUE,nSNPs)
    }else{
      ii <- !classR%in%'FAIL'
    }
    useP <- !any(apply(is.na(pedCheckP[ii,]),1,all))
  }else{
    useP <- FALSE
  }
  if (!useP){ #Use 'ped.check' rather than 'ped.check.parents'
    pedCheck <- assayData(BSRed)$ped.check[markers,]
    if (!is.null(pedCheck)){
      pedCheck <- matrix(pedCheck,nrow=nSNPs)
      pedCheck[is.na(pedCheck)] <- TRUE
      pedParents <- matrix(0,nrow=nSNPs,ncol=ncol(BSRed))
      pedParents[!pedCheck] <- 1
    }else{
      pedParents <- NULL
    }
  }else{
    pedParents <- NULL
  }
  if (is.null(mNoise)){
    mNoise <- rep(mean(pData(BSRed)$noiseIntensity,na.rm=TRUE),nSNPs)
  }
  hetLine <- c(.5,.5,0,1e6)
  cex <- 50/max(50,min(ncol(BSRed),200))   #No larger than one, no smaller than 1/4
  if (!is.null(indHighlight))
    cexhl <- 50/max(50,min(length(indHighlight),100))
  
  switch(ploidy,
         di={
           stop('ploidy=\'di\' not implemented')
         },
         tetra={
           palette(c('red','magenta','blue','cyan','seagreen','black','purple','lightblue'))
           indTriplets <- Call==1/3 | Call==2/3 | is.na(Call)
           Call[!indTriplets] <- Call[!indTriplets]*4 + 1
           Call[is.na(Call)] <- 6
           Call[Call==1/3] <- 7
           Call[Call==2/3] <- 8
           for (i in 1:nSNPs){
             xylim <- c(-0.6,1.6,0,max(R[i,],na.rm=TRUE))
             plot.new()
             plot.window(xylim[1:2],xylim[3:4]); axis(1); axis(2)
             if (indicate.SE){
               points(Theta[i,],R[i,],pch=16,col=Call[i,],cex=callProb[i,]*2/max(callProb[1,],na.rm=TRUE))
             }else if (!is.null(indHighlight)){
               points(Theta[i,-indHighlight],R[i,-indHighlight],pch=16,cex=cex,col=Call[i,-indHighlight])
               points(Theta[i,indHighlight],R[i,indHighlight],pch=16,cex=cexhl,col='yellow2')
             }else{
               points(Theta[i,],R[i,],pch=16,cex=cex,col=Call[i,])
               if (useP){
                 points(Theta[i,pedCheckP[i,]%in%1],R[i,pedCheckP[i,]%in%1],pch=4,lwd=3,col='yellow2')
                 points(Theta[i,pedCheckP[i,]%in%2],R[i,pedCheckP[i,]%in%2],pch=1,lwd=3,col='yellow2')
               }else if (!is.null(pedCheck)){
                 points(Theta[i,!pedCheck[i,]],R[i,!pedCheck[i,]],pch=4,lwd=3,col='yellow2')
                 famInd <- unique(substr(pData(BSRed)$PedigreeID[!pedCheck[i,]],1,7))
                 for (j in 1:length(famInd)){
                   jf <- pData(BSRed)$PedigreeID %in% paste(substr(famInd[j],1,1),'000',substr(famInd[j],5,7),'00',sep='')
                   jm <- pData(BSRed)$PedigreeID %in% paste(substr(famInd[j],1,4),'00000',sep='')
                   points(Theta[i,jf|jm],R[i,jf|jm],pch=1,lwd=3,col='yellow2')
                   pedParents[i,jf|jm] <- 2
                 }
               }
             }
             if (is.null(main)){
               title(main=featureNames(BSRed)[markers[i]],xlab='theta',ylab='intensity')
             }else{
               title(main=main[i],xlab='theta',ylab='intensity')
             }
             lines(c(0,0),xylim[3:4],col='gray')
             lines(c(1,1),xylim[3:4],col='gray')
             lines(hetLine[1:2],hetLine[3:4],col='gray')
             lines(xylim[1:2],rep(mNoise[i],2),col='red')
           }},
         pooled={
           stop('ploidy=\'pooled\' not implemented')
         }
         ) #end switch
  par(pp[c('mfrow','mai')])
  if (retFrames){
    xy <- list()
    for (i in 1:nSNPs){
      xy[[i]] <- data.frame(Theta=Theta[i,],R=R[i,],Call=Call[i,],pedErrors=pedParents[i,])
    }
    xy
  }
}



#Interactively define clusters for a given marker.
#Optional parameter gg is an instance of a GObject
manualCall <- function(marker,cntIdeal,classification,gg=NULL,close.gg=TRUE){
  nCl <- length(cntIdeal)
  if (is.null(marker$Call))
    marker$Call <- rep(NA,nrow(marker))
  if (is.null(marker$PedCheck) & !is.null(marker$PedigreeID))
    marker$PedCheck <- validateSingleCall(marker,classification)
  if (is.null(gg)){
    gg <- ggobi(marker[,c('Theta','R')],name='marker')
  }else{
    if (!'GGobi'%in%class(gg) | nrow(gg$marker)!=nrow(marker))
      stop('Arguments \'marker\' and \'gg\' not compatible')
  }
  dd <- displays(gg)[[1]]
  imode(dd) <- 'Brush'
  str.3 <- sprintf('Modify which cluster? (in [1,%d], or use "8" to assign NAs) ',nCl)
  str.8 <- sprintf("Brush calls to erase, OR
press <1> to return: ")
  str.end <- 'Satisfied with assignments? (y/n) '
  glc <- unlist(as.numeric(marker$Call)*4+1)
  glc[is.na(glc)] <- rep(8,sum(is.na(glc)))
  glyph_colour(gg$marker) <- glc
  glt <- glyph_type(gg$marker) <- rep(6,nrow(marker))
  if (!is.null(marker$PedigreeID)){
    iErr <- marker$PedCheck>0
    glyph_colour(gg$marker)[iErr] <- 5 + unlist(marker$PedCheck[iErr])
    glyph_type(gg$marker)[iErr] <- 7
  }
  OK <- FALSE
  while (!OK){
    cl.i <- 1
    shadowed(gg$marker) <- rep(FALSE,nrow(marker))
    while (cl.i<=nCl){
      str.i <- sprintf("Brush points belonging to cluster %d (of %d), OR
press <1> to skip cluster, OR
press <2> to skip remaining clusters, OR
press <3> to modify a specific cluster or set points to NA, OR
press <4> to start from scratch: ",cl.i,nCl)
      actn <- readline(prompt=str.i)
      if (actn=='1'){
        cl.i <- cl.i + 1
      }else if (actn=='2'){
        cl.i <- nCl + 1
      }else if (actn=='3'){
        OK.3 <- FALSE
        while (!OK.3){
          cl.i <- as.numeric(readline(str.3))
          OK.3 <- cl.i %in% c(1:nCl,8)
        } #end while OK.3
      }else if (actn=='4'){
        cl.i <- 1
        glc <- glyph_colour(gg$marker) <- rep(8,nrow(marker))
        marker$Call <- rep(NA,nrow(marker))
        if (!is.null(marker$PedCheck))
          marker$PedCheck <- rep(0,nrow(marker))
      }else{
        iCluster <- selected(gg$marker)
        marker$Call[iCluster] <- rep(cntIdeal[cl.i],sum(iCluster)) #(glc[iCluster]-1)/4
        glc[iCluster] <- rep(cntIdeal[cl.i]*4+1,sum(iCluster))
        assignedCl <- unique(glc[glc<=5])
        glyph_colour(gg$marker) <- glc
        if (!is.null(marker$PedigreeID) & length(assignedCl)>1){
          selM <- unique(substr(marker$PedigreeID[iCluster],1,4))
          selM <- selM[!substr(selM,2,4)%in%c('000','999')]
          redF <- paste(substr(marker$PedigreeID,1,1),substr(marker$PedigreeID,5,7),sep='')
          selF <- unique(redF[iCluster])
          selF <- selF[!substr(selF,2,4)%in%c('000','999')]
          indSel <- substr(marker$PedigreeID,1,4)%in%selM | redF%in%selF | marker$PedCheck>0
          marker$PedCheck[indSel] <- validateSingleCall(marker[indSel,],classification)
          iErr <- marker$PedCheck>0
          glyph_colour(gg$marker)[iErr] <- 5 + unlist(marker$PedCheck[iErr])
          glyph_type(gg$marker) <- glt
          glyph_type(gg$marker)[iErr] <- 7
        } #end if is.null
        cl.i <- cl.i + 1
      } #end if actn
    } #end while cl.i
    
    if (cl.i==8){
      actn <- readline(prompt=str.8)
      if (actn=='1'){
      }else{
        iErase <- selected(gg$marker)
        glc[iErase] <- rep(8,sum(iErase))
        marker$Call[iErase] <- rep(NA,sum(iErase))
        glyph_colour(gg$marker) <- glc
        if (!is.null(marker$PedCheck)){
          indSel <- marker$PedCheck>0
          marker$PedCheck[indSel] <- validateSingleCall(marker[indSel,],classification)
          iErr <- marker$PedCheck>0
          glyph_colour(gg$marker)[iErr] <- 5 + unlist(marker$PedCheck[iErr])
          glyph_type(gg$marker) <- glt
          glyph_type(gg$marker)[iErr] <- 7
        } #end if is.null
      } #end if actn
    }else{
      nErr <- sum(marker$PedCheck>0)
      if (nErr>0){
        shadowed(gg$marker) <- glyph_colour(gg$marker)%in%c(1:5,8)
        str.ped <- sprintf('Warning: %d ped-errors!
Brush points to un-assign, OR
press <1> to un-assign all erroneous offspring, OR
press <2> to modify clusters, OR
press <3> to disregard errors and finish: ',nErr)
        actn <- readline(prompt=str.ped)
        if (actn=='1'){
          iErase <- marker$PedCheck==1
          glc[iErase] <- rep(8,sum(iErase))
          marker$Call[iErase] <- rep(NA,sum(iErase))
          marker$PedCheck[marker$PedCheck>0] <- 0
          glyph_colour(gg$marker) <- glc
          glyph_type(gg$marker) <- glt
          #OK <- TRUE
        }else if (actn=='2'){
        }else if (actn=='3'){
          OK <- TRUE
        }else{
          iErase <- selected(gg$marker)
          glc[iErase] <- rep(8,sum(iErase))
          marker$Call[iErase] <- rep(NA,sum(iErase))
          glyph_colour(gg$marker) <- glc
          if (!is.null(marker$PedCheck)){
            indSel <- marker$PedCheck>0
            marker$PedCheck[indSel] <- validateSingleCall(marker[indSel,],classification)
            iErr <- marker$PedCheck>0
            glyph_colour(gg$marker)[iErr] <- 5 + unlist(marker$PedCheck[iErr])
            glyph_type(gg$marker) <- glt
            glyph_type(gg$marker)[iErr] <- 7
          } #end if is.null
        } #end if actn
      }else{
        finished <- readline(prompt=str.end)
        OK <- finished %in% c('y','Y','yes','Yes','YES','1','T','TRUE')
      } #end if nErr
    } #end if cl.i
  } #while !OK
  if (close.gg){
    close(gg)
    return(marker)
  }else{
    shadowed(gg$marker) <- rep(FALSE,nrow(marker))
    gg$marker$Call <- as.numeric(marker$Call)
    gg$marker$PedCheck <- marker$PedCheck
    return(gg)
  } #end if close.gg
}



#Validates a single SNP/marker, returns vector of pedErrors (o=1,p=2) 
validateSingleCall <- function(marker,classification){
  #indFamily <- unique(substr(marker$PedigreeID,2,7))
  #indFounding <- substr(indFamily,1,3)=='000' | substr(indFamily,4,6)=='000'
  indFamily <- unique(substr(marker$PedigreeID,1,7))
  indFamily <- indFamily[!indFamily%in%'']
  indFounding <- substr(indFamily,2,4)=='000' | substr(indFamily,5,7)=='000'
  indFamily <- indFamily[!indFounding]
  nFull <- length(indFamily)
  pedErrors <- rep(0,nrow(marker))
  if (classification%in%'MSV-a'){
    callVec <- marker$Call*2
  }else if (classification%in%'MSV-b'){
    callVec <- marker$Call*2-1
  }else{
    callVec <- marker$Call
  }
  for (j in 1:nFull){
    #jf <- marker$PedigreeID==paste('1000',substr(indFamily[j],4,6),'00',sep='')
    #jm <- marker$PedigreeID==paste('1',substr(indFamily[j],1,3),'00000',sep='')
    #j.fam <- substr(marker$PedigreeID,2,7)==indFamily[j] &! (jf|jm)
    jf <- marker$PedigreeID %in% paste(substr(indFamily[j],1,1),'000',substr(indFamily[j],5,7),'00',sep='')
    jm <- marker$PedigreeID %in% paste(substr(indFamily[j],1,4),'00000',sep='')
    j.fam <- substr(marker$PedigreeID,1,7) %in% indFamily[j] &! (jf|jm)
    bothP <- c(which(jf),which(jm))
    pCalls <- sort(callVec[bothP][!is.na(callVec[bothP])],index.return=TRUE)
    oCalls <- callVec[j.fam]
    if (length(pCalls$x)==1){
      iErr <- (oCalls<pCalls$x-.5 | oCalls>pCalls$x+.5) &! is.na(oCalls)
      if (any(iErr)){
        pedErrors[j.fam][iErr] <- 1
        pedErrors[bothP] <- 2
      } #if any
    }else if (length(pCalls$x)==2){
      if (pCalls$x[1]<=.5 & pCalls$x[2]>=.5){    #Alt. 1
        iHi <- oCalls>pCalls$x[1]+.5 &! is.na(oCalls)
        if (any(iHi)){
          pedErrors[j.fam][iHi] <- 1
          pedErrors[bothP[pCalls$ix[1]]] <- 2
        } #if any
        iLo <- oCalls<pCalls$x[2]-.5 &! is.na(oCalls)
        if (any(iLo)){
          pedErrors[j.fam][iLo] <- 1
          pedErrors[bothP[pCalls$ix[2]]] <- 2
        } #if any
      }else if(all(pCalls$x<.5)){    #Alt. 2
        iHi <- oCalls>sum(pCalls$x) &! is.na(oCalls)
        if (any(iHi)){
          pedErrors[j.fam][iHi] <- 1
          pedErrors[bothP] <- 2
        } #if any
      }else if(all(pCalls$x>.5)){      #Alt. 3
        iLo <- oCalls<sum(pCalls$x)-1 &! is.na(oCalls)
        if (any(iLo)){
          pedErrors[j.fam][iLo] <- 1
          pedErrors[bothP] <- 2
        } #if any
      } #if alternatives
    }else if (length(pCalls$x)>2){
      stop('More than two parents. Genotyped repeatedly, or ped-error?')
    } #end if length(pCalls)
  } #end for j
  pedErrors
}


  
#Set values for transformation/normalisation and return in a list. Suggests default values.
setNormOptions <- function(shearInf1=TRUE,transf='root',method='medianAF',minSize=suggestSh(shearInf1)$minSize,prob=suggestSh(shearInf1)$prob,nBins=suggestSh(shearInf1)$nBins,dist=suggestTr(transf)$dist,pNorm=suggestTr(transf)$pNorm,nthRoot=suggestTr(transf)$nthRoot,offset=suggestTr(transf)$offset,scale=suggestNo(method)$scale,nSD=3,breaks=200){
  suggestSh <- function(shearInf1){
    opt <- NULL
    if (shearInf1){
      opt$minSize <- 20; opt$prob <- .1; opt$nBins <- 50
    }else{
      opt$minSize <- 40; opt$prob <- .05; opt$nBins <- 50
    }
    opt
  }
  suggestTr <- function(transf){
    opt <- NULL
    if (transf=='none'){
      opt$dist <- 'manhattan'; opt$pNorm <- NULL; opt$nthRoot <- NULL; opt$offset <- NULL
    }else if (transf=='log'){
      opt$dist <- 'minkowski'; opt$pNorm <- 5; opt$nthRoot <- NULL; opt$offset <- 200
    }else if (transf=='root'){
      opt$dist <- 'minkowski'; opt$pNorm <- 5; opt$nthRoot <- 4; opt$offset <- 200
    }
    opt
  }
  suggestNo <- function(method){
    opt <- NULL
    if (method=='medianAF'){
      opt$scale <- NULL
    }else if (method=='linPeak'){
      opt$scale <- 0.75
    }
    opt
  }
  normOpts <- list(shearInf1=shearInf1,minSize=minSize,prob=prob,nBins=nBins,transf=transf,nthRoot=nthRoot,offset=offset,dist=dist,pNorm=pNorm,method=method,scale=scale,nSD=nSD,breaks=breaks)
  normOpts
}



#Set parameters for genotype-calling and return in a list. Suggests default values.
#Default values for largeSample==TRUE calibrated for more than 3000 objects,
#largeSample==FALSE calibrated for 100 objects.
#Calibrated for transf='root'; if transf='none', increase devCentLim and wSpreadLim
#Earlier versions: mseLim=1, probsQRange=c(.01,.5,.99)
setGenoOptions <- function(largeSample=FALSE,snpPerArrayLim=.8,arrayPerSnpLim=0,ploidy='tetra',filterLim=0,detectLim=.8,wSpreadLim=suggestGeno(largeSample)$wSpreadLim,devCentLim=.35,hwAlpha=suggestGeno(largeSample)$hwAlpha,probsIndSE=suggestGeno(largeSample)$probsIndSE,afList=seq(0,.5,.05),clAlpha=suggestGeno(largeSample)$clAlpha,rPenalty=2,rotationLim=suggestGeno(largeSample)$rotationLim,minClLim=5,nSdOverlap=2,minBin=suggestGeno(largeSample)$minBin,binWidth=suggestGeno(largeSample)$binWidth){
  suggestGeno <- function(largeSample){
    opt <- NULL
    if (largeSample){
      opt$probsIndSE=.9; opt$clAlpha=.01; opt$rotationLim=2; opt$wSpreadLim=.07; opt$hwAlpha=1e-10; opt$minBin=2; opt$binWidth=.05
    }else{
      opt$probsIndSE=NULL; opt$clAlpha=.05; opt$rotationLim=5; opt$wSpreadLim=.1; opt$hwAlpha=1e-4; opt$minBin=2; opt$binWidth=.1
    }
    opt
  }
  genoOpts <- list(snpPerArrayLim=snpPerArrayLim,arrayPerSnpLim=arrayPerSnpLim,ploidy=ploidy,filterLim=filterLim,detectLim=detectLim,wSpreadLim=wSpreadLim,devCentLim=devCentLim,hwAlpha=hwAlpha,probsIndSE=probsIndSE,afList=afList,clAlpha=clAlpha,rPenalty=rPenalty,rotationLim=rotationLim,minClLim=minClLim,nSdOverlap=nSdOverlap,minBin=minBin,binWidth=binWidth)
  genoOpts
}


#Set parameters for merging of genotypes and return in a list. Suggests default values.
setMergeOptions <- function(minC=NULL,noiseQuantile=.75,offspringLim=7,ratioLim=.9,rngLD=5){
  if (!is.null(minC))
    noiseQuantile <- NULL
  mergeOpts <- list(minC=minC,noiseQuantile=noiseQuantile,offspringLim=offspringLim,ratioLim=ratioLim,rngLD=rngLD)
  mergeOpts
}



#Takes MSV5-calls as input and partially resolves the true calls for each paralog.
#'Partially' because the actual chromosome number is not known, and the duplicate
#calls therefore have to be assigned assuming known location of paralogs of only
#one parent at the time. Where informative meiosis is found, each offspring is
#called as {Para1,Para2} <- {0,0}, {0,1}, or {1,1}. Can theoretically be used for
#LA by assuming only one known parent.
unmixParalogues <- function(BSRed,singleCalls=getSingleCalls(BSRed)){
  unmixTheta <- function(x){
    X <- rep(NA,2)
    if (x!=.5){
      X[1] <- round(x)
      X[2] <- 2*x - X[1]
      X <- sort(X)
    }
    X
  }
  lookupOffspring <- function(x,LookUp){
    X <- cbind(LookUp[x[-c(1,2)],x[1],x[2]],LookUp[x[-c(1,2)],x[1],x[2]])
    iHalf <- X[,1]%in%.5
    X[iHalf,1] <- 0
    X[iHalf,2] <- 1
    X <- as.vector(X)
    X
  }

  #Input control
  callType <- fData(BSRed)$Classification
  if (!is.null(fData(BSRed)$Manual.Calls.R)){
    iManual <- !fData(BSRed)$Manual.Calls.R %in% ''
    callType[iManual] <- fData(BSRed)$Manual.Calls.R[iManual]
  }
  if (!all(callType %in% 'MSV-5')){
    stop('This function accepts nothing but MSV-5 markers as input')
  }
  
  #Expand call-tables for father and mother to include both paralogues
  str1 <- paste(rownames(singleCalls),'Para1',sep='_')
  str2 <- paste(rownames(singleCalls),'Para2',sep='_')
  rNames <- as.vector(t(cbind(str1,str2)))
  nRows <- 2*nrow(BSRed)
  paraCalls <- NULL
  paraCalls$father <- paraCalls$mother <- matrix(nrow=nRows,ncol=ncol(BSRed),dimnames=list(rNames,sampleNames(BSRed)))
  paraCalls$mother[seq(1,nRows,2),] <- paraCalls$mother[seq(2,nRows,2),] <- paraCalls$father[seq(1,nRows,2),] <- paraCalls$father[seq(2,nRows,2),] <- singleCalls

  #Create look-up table for valid meioses (dim: 5(offspring) x 5(parent1) x 5(parent2))
  LookUp <- array(dim=rep(5,3))
  LookUp[c(1,6,11,26,27,31,36,51:53,56,61,77,78,82,87,103,108,113)] <- 0
  LookUp[c(7,17,33,42,59,67,84,93,109,119)] <- .5
  LookUp[c(13,18,23,39,44,48,49,65,70,73:75,90,95,99,100,115,120,125)] <- 1

  #Index including only <p><fff> (population + father-ID)
  pp <- paste(substr(pData(BSRed)$'PedigreeID',1,1),substr(pData(BSRed)$'PedigreeID',5,7),sep='')
  
  #Inheritance from mothers, fathers not fixed to paralogues yet
  indMothers <- unique(substr(pData(BSRed)$PedigreeID,1,4))
  indMothers <- indMothers[!substr(indMothers,2,4) %in% c('000','999')]
  for (j in 1:length(indMothers)){
    jm <- pData(BSRed)$PedigreeID %in% paste(indMothers[j],'00000',sep='')
    mCalls <- assayData(BSRed)$call[,jm]
    iNA <- is.na(singleCalls[,jm]) &! is.na(mCalls)
    if (sum(iNA)>0){
      paraCalls$mother[sort(c(which(iNA)*2,which(iNA)*2-1)),jm] <- as.vector(sapply(mCalls[iNA],unmixTheta))
      #Don't bother with homozygotes and unknown 0.5's:
      iCand.m <- !is.na(paraCalls$mother[seq(1,nRows,2),jm]) &! mCalls%in%c(0,1)
      j.fam <- substr(pData(BSRed)$PedigreeID,1,4) %in% indMothers[j] &! jm
      if (sum(j.fam)>2){   #Which minimum family-size to allow?
        indMates <- unique(pp[j.fam])
        indMates <- indMates[!substr(indMates,2,4) %in% '999']
        for (i in 1:length(indMates)){
          jf <- pData(BSRed)$PedigreeID %in% paste(substr(indMates[i],1,1),'000',substr(indMates[i],2,4),'00',sep='')
          fCalls <- assayData(BSRed)$call[,jf]
          i.fam <- pp[j.fam] %in% indMates[i]
          oCalls <- assayData(BSRed)$call[,j.fam][,i.fam]
          iCand.f <- fCalls!=.5 | !is.na(paraCalls$mother[seq(1,nRows,2),jf])
          iCand <- which(iCand.m & iCand.f)
          if (length(iCand)>0){
            indLookup <- matrix(cbind(mCalls,fCalls,oCalls)[iCand,]*4+1,nrow=length(iCand))
            cc <- apply(indLookup,1,lookupOffspring,LookUp)
            dim(cc) <- c(ncol(oCalls),length(iCand)*2)
            paraCalls$mother[sort(c(iCand*2-1,iCand*2)),j.fam][,i.fam] <- t(cc)
          }
        } #for i
      } #if sum(j.fam)
    } #if sum(iNA)
  } #for j
  
  #Inheritance from fathers, mothers not fixed to paralogues yet
  indFathers <- unique(pp)
  indFathers <- indFathers[!substr(indFathers,2,4) %in% c('000','999')]
  for (j in 1:length(indFathers)){
    jf <- pData(BSRed)$PedigreeID %in% paste(substr(indFathers[j],1,1),'000',substr(indFathers[j],2,4),'00',sep