R/qingInternal.R

Defines functions wrComTbl util.vec.replace util.vec.orderPair util.vec.matchVecIdx util.vec.matchVec util.vec.exByKey util.vec.2maIdx util.tbl.exFactorInfo util.str.tokenPicker util.str.splitEx util.str.splitAndOrNot util.str.seqCutter util.str.rmSpacePadder util.str.replace util.str.2CharArray util.sql.groupby util.matrix.rmSparseRow util.matrix.merge util.matrix.insertCol util.matrix.exByKeyInRow util.matrix.delCol util.matrix.csvText util.matrix.colIdx4Match util.matrix.colComp util.matrix.col.shuffle2 util.matrix.col.shuffle util.matrix.clone util.matrix.catSave util.matrix.catm util.matrix.cat util.matrix.2list util.listMatrix.2matrix util.list.rmByKeyVal util.list.ex util.list.2matrix util.it.upTriCombIdx util.it.triMatch2 util.it.triMatch util.it.smallLargeIdxOOLD util.it.smallLargeIdx util.findBrace util.dataframe.round util.dataframe.merge util.dataframe.findColNo util.char.1stStrIdx util.char.1stIdx util.array3d.2matrix util.array.rmEmptyStr txtToHapBkMap txtToGenoMap trioMerge trioFile.proc trio.simuOLD trio.simuDev trio.simu.proposed trio.match.single trio.impuDev trio.impu toolbox.load snpPREFileMatchTrio simuHapMap.build signalRule.setInter signalRule.contr signalRule.addSignal signalRule.2strata.build signal.new setTrioMissingSNP semiAugBkFrame sampleIdxOutsideList sampleHapSemiAugMap2 sampleHapExclude sampleDipSemiAugMap sam.reject sam.Hap.old resample qTraceback qstrsplit qStr.pattern qp qing.mulMatch qing.cut qExpandTable procSemiAugMap moveDir matTBCondOnChild3.finalProc ls.list listExtractor linkageFile.proc isDigitAtLociIdx imputGeno impuBkTDT.scheduler impuBk.scheduler HRCBSpGrp.sp HRCBSpGrp.cons HRCB.famMap.spTrio HRCB.Esp1Rule.spTrioOnBase HRCB.Esp1Rule.sampleKid HRCB.applyRule hapPair.match hapGenoBlockProc hapBkGenoMap2HapMap hapBk2AlleleSeq hap2genotype.m hap2genotype.bk hap2GenoBlock hap.2geno grp.palette grp.kmStep getP.logodds getHapProb2 getHapProb.semiMapFrame getBackParentGeno geno2hapFreq geno.2dStr2BinaMa genMatingTBCondOnChild3 genGenoProb freqmap.reconstruct freqbuild.haponly freq.build freq.allele findMissing findLastPreviousDate find.PsudoControlHap filterHaps2SHaps.check filterHaps2SHaps filterHapIdx2SuperHapSet fHapBkIdx2DipTb exParentDip exIdxFromHap exhaustHapExp exDipProbSemiAugMap exchangeDigit ESp.imputBlock ESp.impuParent.homoKid ESp.impuParent.heterKid ESp.impu1Par.E.sp ESp.impu1Par.E ESp.impu1Par.D.sp ESp.impu1Par.D ESp.impu1Par.C.sp ESp.impu1Par.C ESp.impu1Par.B.sp ESp.impu1Par.B ESp.impu1Par.A.sp ESp.impu1Par.A ESp.impu1Par dfToHapBkMap dfToGenoMap covDipStr2CodedGeno covDipBinaMa2CodedGeno checkMendelianError calHapIdx2SHapSet calHapIdx2SHap bkMap.updateHapFreq bkMap.updateByRule bkMap.superHRCB bkMap.spHap bkMap.spGenoSeq bkMap.shuffle bkMap.LRCB.spTrio bkMap.HRCB.famMap bkMap.HRCB.Esp1Rule.genoSeq bkMap.HRCB.Esp1Rule.Base bkMap.genoFreq bkMap.findHRCBIdx bkMap.ESp.apply1Rule.stepBy bkMap.ESp.apply1Rule bkMap.constr binaTree.toStr binaTree.shuffleNode binaTree.shuffle.findNodePar binaTree.shuffle.findElmPar binaTree.setApply binaTree.patternFormOLD binaTree.patternForm binaTree.parser.pVarElm binaTree.parser.proc binaTree.parser binaTree.merge binaTree.constr binaTree.closeNode binaTree.apply binaTree.addNode binaTree.addElm binaTree.1Level.changeSignal augBkFrame

augBkFrame <-
function(hapBkMap, key, probLeftover = .01){
	keyIndex = which(hapBkMap$keys==key)
	bk = hapBkMap$bks[[keyIndex]]
	
	## extract the original exp and prob, then restandard prob
	estBkExp = as.character(bk[,hapBkMap$expCol])
	estBkProbRe = bk[,hapBkMap$probCol]*(1-probLeftover)
	
	## create the exhaust haplotypes 
	bkSnpLen = bk[1,hapBkMap$hapLenCol]
	exhaustExp = exhaustHapExp(lociCt=bkSnpLen, snpCoding=c(1,2))$hapStr
	exhaustExpCt = length(exhaustExp)
	
	## augmate the additional expression
	rematch = match(estBkExp, exhaustExp)
	
	resiProb = probLeftover / (exhaustExpCt-length(estBkExp))
	resiProb = rep(resiProb, times=exhaustExpCt)
	resiProb[rematch]=estBkProbRe
	
	base = bk[1,]
	newBk = NULL
	for( i in 1:exhaustExpCt ){
		newBk = rbind(newBk, base)
	}
	## replace the expression and probability
	newBk[,hapBkMap$probCol]=resiProb
	newBk[,hapBkMap$expCol]=as.integer(exhaustExp)
	
	## replace the necessary part in hapBkMap
	hapBkMap$bks[[keyIndex]]=newBk
	
	hapBkMap$bkLens[keyIndex]=exhaustExpCt
	
	## other parts, like df, dfStr,  in hapBkMap is not updated because it is not necessary
	
	return(hapBkMap)
	
}

binaTree.1Level.changeSignal <-
function(bina.col, model.signal.cur ){

    bina.col.num = sapply(bina.col, FUN=function(item) {as.numeric(substr(item, 1, nchar(item)-1))})
    bina.col.ct = table(bina.col.num)
    bina.snp.single = paste(dimnames(bina.col.ct)[[1]][bina.col.ct==1], "b", sep="")

    bTree = binaTree.parser(str=model.signal.cur)
    leaves.all = unlist(bTree$elm.vlist)
    leaves.comb = unique(leaves.all[  is.element(unlist(bTree$elm.vlist), bina.snp.single)  ])
    if(length(leaves.comb)>=1){
      for (i in 1:length(leaves.comb)){
        tmp.str = leaves.comb[i]
        ##print(tmp.str)
        model.signal.cur = util.str.replace(str=model.signal.cur,
                           replaced=tmp.str,
                           new=paste(substr(tmp.str, 1, nchar(tmp.str)-1), "a", sep=""),
                           replace.all=TRUE)
        ##print(model.signal.cur)
      }
      
    }
    return(model.signal.cur)
  }

binaTree.addElm <-
function(binaTree,  elm.level, elm.var,  elm.bool){
  # objects for variable only elment for memory reason
  # 1)elm.vlist: a list of vector of variable names, one vector for one element
  # 2)elm.vMa: A matrix for variable only element
  #   matrix col 1:id: list id
  #          col 2:level: level
  #          col 3:bool: boolean term: not(-1) vs 1 ## CHANGED 8JAN09 before:[and(0); or(1); not(-1)]
  
  # two objects for state info
  # curElm: whether current element is a variable only(0) or node(1), or missing(NA)
  # curElmId: the index of current element, or missing(0)
  
  # when the element is a "not" element, the level has to be -1
  ifD = FALSE
 
  if (ifD) print("binaTree.addElm")
  if (ifD) print(qp("elm.level=", elm.level))
  if (ifD) print(qp("elm.var=", elm.var))
  if (ifD) print(qp("elm.bool=", elm.bool))
  if(elm.bool==-1) elm.level=elm.level-1
  
  elm.vlist = binaTree$elm.vlist
  elm.vMa = binaTree$elm.vMa
  
  elm.vlist = c(elm.vlist, list(elm.var))
  
  if(binaTree$curElmId!=0){
    
    elm.lastIdx = max(elm.vMa[,1])
    elm.vMa = rbind(elm.vMa, c(elm.lastIdx+1, elm.level, elm.bool))
  }else{
    ## initialize the empty variables
    elm.lastIdx=0
    elm.vMa[1,]= c(elm.lastIdx+1, elm.level, elm.bool)
  }
  binaTree$elm.vlist = elm.vlist
  binaTree$elm.vMa = elm.vMa

  binaTree$curElm = 0;  #variable element
  binaTree$curElmId = elm.lastIdx+1 # id
  if (ifD) print("after addElm, binaTree")
  if (ifD) print(binaTree)
  return(binaTree)
}

binaTree.addNode <-
function(binaTree,  elm.level, elm.bool){
  ifD = FALSE
  if (ifD) print("binaTree.addNode")
  if (ifD) print(qp("elm.level=", elm.level))
  if (ifD) print(qp("elm.bool=", elm.bool))
   
  # objects for node element. (contain node itself or variable only element)
  # 1)elm.nlist: a list of matrix, each vector for one element
  #    matrix col 1: idx for node or variable element
  #    matrix col 2: is a node(1) or variable element(0)
  # 2)elm.nMa: A matrix for node
  #    matrix col 1:id: list id
  #           col 2:level: levle
  #           col 3:bool: boolean term: and(0); or(1)
  #           col 4:open: open(1)/close(0)
  # two objects for state info
  # curElm: whether current element is a variable only(0) or node(1), or missing(NA)
  # curElmId: the index of current element
  
  ## called when the node is open or adding element
  elm.nlist = binaTree$elm.nlist
  elm.nMa = binaTree$elm.nMa

  ## if elm.nMa has the node open
  if (length(elm.nlist)>=1){
      fil = elm.nMa[,2]==elm.level & elm.nMa[,4]==1
      if(sum(fil, na.rm=TRUE)==1){
        # append to old node
        open.id = elm.nMa[,1][fil]
        
        nMa = elm.nlist[[fil]]
        nMa = rbind(nMa, c(binaTree$curElmId, binaTree$curElm))
        elm.nlist[fil]=list(nMa)
      }else{
        # new node
        elm.lastIdx = max(elm.nMa[,1])
        open.id = elm.lastIdx + 1
        elm.nMa = rbind(elm.nMa, c(open.id, elm.level, elm.bool, 1))

        nMa = matrix(c(binaTree$curElmId, binaTree$curElm), nrow=1, ncol=2)
        elm.nlist=c(elm.nlist, list(nMa))
      }
  }else{
      ## initialize the empty variables
      open.id = 1
      elm.nMa[1,]=c(open.id, elm.level, elm.bool, 1)
      nMa = matrix( c(binaTree$curElmId, binaTree$curElm), nrow=1, ncol=2)
      elm.nlist = c(elm.nlist, list(nMa))
  }
  
  binaTree$elm.nlist = elm.nlist
  binaTree$elm.nMa = elm.nMa
  
  binaTree$curElm = 1;  #node element
  binaTree$curElmId = open.id # id
  if (ifD) print("after addNode, binaTree:")
  if (ifD) print(binaTree)
  return(binaTree)
}

binaTree.apply <-
function(binaTree, data, colnames, re.final=TRUE){
  ifD = FALSE
  ori.rowCt = nrow(data)
  col.idx = 1:ncol(data)
  
  ## data.v: a matrix of boolean (0/1) for each row in binaTree$vMa, ordered by id
  ## data.n: a matrix of boolean (0/1) for each row in binaTree$nMa, ordered by id
  data.v=NULL
  data.n=NULL

  ## data.f: a vector of boolean (0/1) for the node at level 0
  ## or for the variable element if no node is defined.
  data.f=NULL

  data.f = rep(NA, ori.rowCt)
  
  ## process the variable only element first, regardless the level
  vCt = nrow(binaTree$elm.vMa)
  data.v = matrix(NA, ncol=vCt, nrow=ori.rowCt) 
  for( i in 1:vCt){
    cur.row = binaTree$elm.vMa[i,]
    ## col seq= id, level, bool
    ## find the var list
    cur.elm = binaTree$elm.vlist[[ i ]]
    cur.col = col.idx[is.element(colnames, cur.elm)]
    if (length(cur.col)!=length(cur.elm)) stop()

    cur.bool = cur.row[3]
    if(length(cur.col)>1){
     
      if(cur.bool==0){ ## and
        data.v[,i]=as.numeric(apply(data[,cur.col], 1, all))
      }
      if(cur.bool==1){ ## or
        data.v[,i]=as.numeric(apply(data[,cur.col], 1, any))
      }
    }else{
      if(cur.bool==-1){ ## not
        data.v[,i]=as.numeric(1-data[,cur.col])
      }else{
        data.v[,i]=data[,cur.col]
      }
    }
  } # for( i in 1:vCt){

  ## if have node element, process the node from deeper level to zero
  data.n=NULL
  nCt = length(binaTree$elm.nlist)
  if(nCt>=1){
    nMa = binaTree$elm.nMa
    
    data.n=matrix(NA, ncol=nCt, nrow=ori.rowCt)

    new.order = order(nMa[,2], decreasing = TRUE)
    if(ifD) print(nMa)
    nMa = nMa[new.order, ,drop=FALSE]
    if(ifD) print(nMa)
    #nlist = binaTree$elm.nlist[new.order]
    nlist = binaTree$elm.nlist
    if(ifD) print(nlist)
    for( i in 1:nCt ){
      cur.row = nMa[i, ,drop=FALSE]
      if(ifD) print("cur node row after sorting")
      if(ifD) print(cur.row)
      ## col seq= id, level, bool, open
      
      ## find the var list
      cur.elm = nlist[[ cur.row[1] ]]
      if(ifD) print("cur node element")
      if(ifD) print(cur.elm)
      dd.velm=NULL
      dd.nelm=NULL
      # if variable element, get from data.v
      if(sum(cur.elm[,2]==0)>=1){
        dd.velm = data.v[, cur.elm[,1][cur.elm[,2]==0]]
        
        if (ifD) {
          if(is.null(dim(dd.velm))){
            print(dd.velm[1:5])
          }else{
            print(dd.velm[1:5,])
          }
        }
      }
      # if node element, get from data.n
      if(sum(cur.elm[,2]==1)>=1){
        dd.nelm = data.n[, cur.elm[,1][cur.elm[,2]==1]]
        
        if (ifD) {
          if(is.null(dim(dd.nelm))){
            print(dd.nelm[1:5])
          }else{
            print(dd.nelm[1:5,])
          }
        }
      }

      dd.cur = cbind(dd.velm, dd.nelm)
      if(ifD) print(cur.row)
      cur.bool = cur.row[3]
     
      if(cur.bool==0){ ## and
        data.n[,cur.row[1]]=as.numeric(apply(dd.cur, 1, all))
      }
      if(cur.bool==1){ ## or
        data.n[,cur.row[1]]=as.numeric(apply(dd.cur, 1, any))
      }

      if( (cur.bool!=1) & (cur.bool!=0)) stop("Wrong boolean value.")
    } ## for( i in 1:nCt ){
    
  } ##  if(nCt>=1){

  if(re.final){
    if(nCt==0){
      # only one variable only element
      return(data.v[,1])
    }else{
      # return the last element (must be a node)
      t.id = binaTree$curElmId
      re = data.n[,t.id]
      return(re)
    }

  }else{
    return(list(data.v=data.v, data.n=data.n))
  }
}

binaTree.closeNode <-
function(binaTree,  elm.level){
  ## if the current element is an element with not as boolean,
  ## it will have have row in the node map

  # objects for node element. (contain node itself or variable only element)
  # 1)elm.nlist: a list of vector, each vector for one element
  #    matrix col 1: idx for node or variable element
  #    matrix col 2: is a node(1) or variable element(0)
  # 2)elm.nMa: A matrix for node
  #    matrix col 1:id: list id
  #           col 2:level: levle
  #           col 3:bool: boolean term: and(0); or(1)
  #           col 4:open: open(1)/close(0)
  # two objects for state info
  # curElm: whether current element is a variable only(0) or node(1), or missing(NA)
  # curElmId: the index of current element

  ifD = FALSE
  if (ifD) print("binaTree.closeNode")
  if (ifD) print(qp("elm.level=", elm.level))
  if (ifD) print("binaTree::")
  if (ifD) print(binaTree)
  
  elm.nlist = binaTree$elm.nlist
  elm.nMa = binaTree$elm.nMa
  open.id = NA

  ## print(elm.nMa)
  ## see if any node is open (should be)
  if (nrow(elm.nMa)>=1){
      fil = (elm.nMa[,2]==elm.level) & (elm.nMa[,4]==1)
      if (ifD) print(fil)
      if(sum(fil, na.rm=TRUE)==1){
        # close the old node
        open.id = elm.nMa[,1][fil]
        elm.nMa[fil, 4]=0

        nMa = elm.nlist[[which(fil)]]
        if(ifD) print(nMa)
        nMa = rbind(nMa, c(binaTree$curElmId, binaTree$curElm))
        elm.nlist[which(fil)]=list(nMa)
      }else{
        stop("no node match the requirement")
      }
  }else{
      stop("no open node")
  }
  
  binaTree$elm.nlist = elm.nlist
  binaTree$elm.nMa = elm.nMa
  binaTree$curElmId = open.id#
  binaTree$curElm = 1 #node

  if(ifD) print("after close node, binaTree:")
  if(ifD) print(binaTree)
  
  return(binaTree)
}

binaTree.constr <-
function(){

  elm.vlist = list()
  elm.vMa = matrix(NA, ncol=3, nrow=1)
  colnames(elm.vMa) = c("id", "level", "bool")

  elm.nlist = list()
  elm.nMa = matrix(NA, ncol=4, nrow=1)
  colnames(elm.nMa) = c("id", "level", "bool", "open")

  binaTree = list(elm.vlist=elm.vlist, elm.vMa=elm.vMa, elm.nlist=elm.nlist, elm.nMa=elm.nMa,
               curElm=NA, curElmId=0)
  return(binaTree)
}

binaTree.merge <-
function(treeForSearch){
 fN = "binaTree.merge:"
 ifD = FALSE
 
 searchWholeTree = TRUE
 tt = 0
 search = FALSE
 # do an iterative search for ( or ) and , keep search until all the "or" have no parent with "and" operator
 while(searchWholeTree){
     tt = tt+1
     if(tt>100) stop(paste(fN, "iterative procdure exceed 100 times. Sometime is wrong!"))
     
     # if the tree is updated, use the updated one. Otherwise, search the next possible choice
     if(!search){
       if(ifD) {
         print("Updated the tree:")
         #print(binaTree.toStr(treeForSearch))
         print(treeForSearch)
       }
       tree = treeForSearch
       elm.nMa = tree$elm.nMa
  
       ## search for "and"
       filter = elm.nMa[,3]==0
       if(sum(filter)==0) {
  #          search = FALSE
  #          searchWholeTree = FALSE
           return(treeForSearch)
       }
       search = TRUE
     }

     if( sum(filter)>=1){
       
       newMa = elm.nMa[filter, , drop=FALSE]

       if(ifD) print(newMa)
       
       j = 1 # loop though node with "and" operator
       remove.list = NULL
       while ( j <= sum(filter) & search){
         if(ifD) print(paste("j=", j))
         curCh.idx = newMa[j, 1]
         curChRow = match(curCh.idx, elm.nMa[,1])
         curNode.level = newMa[j, 2]
         par.idx = binaTree.shuffle.findNodePar(tree, curCh.idx)

         if(is.null(par.idx)) stop (paste(fN, " No parent for a child node!"))

         if(par.idx != 0) {

            parRow = match(par.idx, elm.nMa[,1])
            if(ifD){
              
              print(paste("Matched parent row =", parRow, " parent id =", par.idx))
            }
            # find parent node is using "and"
            if( elm.nMa[parRow, 3] == 0){
              if(ifD) print("find a need for merge")
              search = FALSE
       
              ## find out the element of current node and the other node id
              curNodeMa = tree$elm.nlist[[curCh.idx]]
       
              parNodeMa = tree$elm.nlist[[par.idx]]

              ## need to be the same id as well a node
              filter.curkidRow = parNodeMa[,1]==curCh.idx & parNodeMa[,2]==1

              othNode.row = parNodeMa[!filter.curkidRow,]
              curNode.rows = curNodeMa
              tree$elm.nlist[[par.idx]] = rbind(othNode.row, curNode.rows)

              ## remove from the nMa the current node, will NOT remove the node from nlist for simplicity
              remove.list = c(remove.list, curChRow)

              if(ifD) {
                print("elm.nlist and nMa after updating the partent of old node")
                #print(tree$elm.nlist)
                #print(tree$elm.nMa)
              }
   
              treeForSearch = tree
              tree = NULL
            
            } ## if( elm.nMa[match.id, 3] == 0){
          } # if(par != 0) {
         j = j +1
       } ##  while ( j <= sum(filter) & search){

       if(length(remove.list)>0){
         elm.nMa = elm.nMa[-c(remove.list),,drop=FALSE]
         print(paste("removed:", paste(c(remove.list), collapse=";")))
         if(ifD) print(elm.nMa)
         treeForSearch$elm.nMa = elm.nMa
# 
#          remove.nlist  = elm.nMa[remove.list,1]
#          treeForSearch$elm.nlist = treeForSearch$elm.nlist [-c(remove.nlist)]
       }

       if(j > sum(filter) & search) searchWholeTree = FALSE
       if(ifD & j > sum(filter) & search ) print("stopped")
     } ## if( sum(filter)>=1){
     
   } ## while(searchWholeTree){

 ##return(NULL)
 return(treeForSearch)
}

binaTree.parser <-
function(str){

  ifD = FALSE

  ## the first character is "(", "not", "and", "or", or variable name
  b.lev=0
  b.open = FALSE
  cur.idx = 0
  cur.str = str

  binaTree = binaTree.constr()

  binaTree = binaTree.parser.proc(str, binaTree, curLevel=0, first.seg=TRUE)

  # check wether the level 0 is closed
  nMa = binaTree$elm.nMa

  if (length(binaTree$elm.nlist)>0){
    cur.level = nMa[, 2]
    if(sum(cur.level==0)>1) stop()
    zero.level = which(cur.level==0)
    if( nMa[zero.level, 4]==1){
      ## close the last node
      binaTree=binaTree.closeNode(binaTree,  elm.level=0)
    }
    
  }
  
  return(binaTree)

}

binaTree.parser.proc <-
function(str, binaTree, curLevel, first.seg=FALSE){
  ## need to grow the list
  ifD = FALSE
  
  str = util.str.rmSpacePadder(str, rm=c("b", "e"))
  
  if(ifD) print(qp("binaTree.parser.proc::str=", str, "|end"))
  if(ifD) print(qp("binaTree.parser.proc::curLevel=", curLevel, "|end"))
  #if(ifD) print(binaTree)
  
   ## patterns: parse out the seg separated by (/)
   ## "" hides the part has already been processed, ')' hides the part not necessary there
   ## 7 "("A and B and C')'
   ## 5  and A and B')'
   ## 8 ")") and A 
   ## 1 "((A and "(B and C)) and
   ## 2   A and (
   ## 4  and (
   
   b.level = util.findBrace(str)
   if(ifD) print(b.level)
   ## if no more brace, it is ending!!!
   t.elm=NULL
   if(is.null(b.level)){

     if(first.seg){
       ## #7:"("A and B and C')' only one elm
       ## can only be and/or/not
       ## add elm, elment only
       if(ifD) print("first.seg")
       t.elm = binaTree.parser.pVarElm(str, type="middle")
       if(is.null(t.elm)) stop()

       ## changed, so it would not allow two elments as one element variable
       if(t.elm$mixed==2){
         if(ifD) print("t.elm$mixed==2")
         # add the node
         
         ## add the element and add the node as well. Note that at the maximum, only two elements is allowed
         binaTree = binaTree.addElm(binaTree, curLevel, t.elm$idx[1], 0)
         binaTree = binaTree.addNode(binaTree, curLevel, t.elm$bool)
         binaTree = binaTree.addElm(binaTree, curLevel, t.elm$idx[2], 0)
         ## close the node
         binaTree = binaTree.closeNode(binaTree, curLevel)
       }else{       
         binaTree = binaTree.addElm(binaTree, curLevel, t.elm$idx, t.elm$bool)
       }
       
       return(binaTree)
     }else{
       ## #5: and A and C')', ending
       ## can be and/or,  must be mix node and
       ## ENDING
       ## add elm, add node, close node
       t.elm = binaTree.parser.pVarElm(str, type="start")
       if(is.null(t.elm)) stop("binaTree.parser.pVarElm(str, type='start')")
       if(t.elm$mixed==1){
         ## add the node
         binaTree = binaTree.addNode(binaTree, curLevel, t.elm$bool)
         ## add elm first
         binaTree = binaTree.addElm(binaTree, curLevel, t.elm$idx, t.elm$bool)
         ## close the node
         binaTree = binaTree.closeNode(binaTree, curLevel)
       }else{
         stop("binaTree.parser.pVarElm(str, type='start'):: not node" )
       }
       return(binaTree)
     } # if(first.seg){

   } # if(is.null(b.level)){
  
   ## if more brace, keep subsetting
   if(!is.null(b.level)){
     if (b.level$brace==1){
       ## #8 or #5 or #7 or having ")"
       if(first.seg){
         stop(" Can not have ')' before '('.")
       }
       
       if(b.level$idx==1){
           ## #8
           ## process node, then subset
           ## close the node
           
           binaTree = binaTree.closeNode(binaTree, curLevel)
           curLevel = curLevel-1
           
           ## subset
           if(b.level$id < nchar(str)){
             curStr = substr(str, b.level$idx+1, nchar(str))
             binaTree = binaTree.parser.proc(curStr, binaTree, curLevel, first.seg=FALSE)
           }
           return(binaTree)
       }else{ # if(b.level$idx==1){
         ## #7 or #5
         ## #7: "("A and B and C')', #5:  and A and B')'
         ## depends on the first is boolean or factor
         ## process node, then subset
         segStr = substr(str, 1, b.level$idx-1)
         t.elm = binaTree.parser.pVarElm(segStr, type="un")
         if(ifD) print(t.elm) 
         if(is.null(t.elm)) stop( "binaTree.parser.pVarElm(str, type='un')" )
         if(t.elm$mixed==1){
           ## add the node
           binaTree = binaTree.addNode(binaTree, curLevel, t.elm$bool)           
           ## print(t.elm)
           ## add elm first
           binaTree = binaTree.addElm(binaTree, curLevel, t.elm$idx, t.elm$bool)
           
           binaTree = binaTree.closeNode(binaTree, curLevel)
           curLevel = curLevel-1
         }else{
           if(t.elm$mixed==2){
             if(ifD) print("second:: t.elm$mixed==2")
             # add the node
             
             ## add the element and add the node as well. Note that at the maximum, only two elements is allowed
             binaTree = binaTree.addElm(binaTree, curLevel, t.elm$idx[1], 0)
             binaTree = binaTree.addNode(binaTree, curLevel, t.elm$bool)
             binaTree = binaTree.addElm(binaTree, curLevel, t.elm$idx[2], 0)
             ## close the node
             binaTree = binaTree.closeNode(binaTree, curLevel)
             curLevel = curLevel - 1
           }else{
               
             binaTree = binaTree.addElm(binaTree, curLevel, t.elm$idx, t.elm$bool)
             ##print(" binaTree.addElm(binaTree, curLevel, t.elm$idx, t.elm$bool) "  )
             ##print(binaTree)
             curLevel = curLevel-1
             ## if the last is an variable only element, need to close the node
           }
         }

         ## subset
         if(b.level$id < nchar(str)){
             curStr = substr(str, b.level$idx+1, nchar(str))
             binaTree = binaTree.parser.proc(curStr, binaTree, curLevel, first.seg=FALSE)
         }else{
             if( curLevel==0){
               binaTree = binaTree.closeNode(binaTree, curLevel)
             }else{
               stop(qp("curLevel=", curLevel, ", should be 0"))
             }
         }
         return(binaTree)
       } # if(b.level$idx==1){
     } # if (b.level$brace==1){

     if(b.level$brace==0){
       ## having "("
       if(b.level$idx==1){
         ## #1: "((A and "(B and C)) and
         ## see how many levels it opens, then subset
           add.lev = util.char.1stIdx(str, "(", match=FALSE)
           curLevel = curLevel+add.lev
           curStr = substr(str, add.lev+1, nchar(str))
           binaTree = binaTree.parser.proc(curStr, binaTree, curLevel, first.seg=FALSE)
           return(binaTree)
       }else{ # if(b.level$idx==1){
         ## #4: and (, #2: A and (

         ## process the node, then subset
         segStr = substr(str, 1, b.level$idx-1)
         t.elm = binaTree.parser.pVarElm(segStr, type="end")
         ##print(paste("t.elm=", t.elm))
         if(is.null(t.elm)) stop( " binaTree.parser.pVarElm(str, type='end')" )
         if(t.elm$mixed==1){
           ## if the boolean term itself
           if(is.null(t.elm$idx)){
             binaTree = binaTree.addNode(binaTree, curLevel, t.elm$bool)
           }else{
             binaTree = binaTree.addElm(binaTree, curLevel, t.elm$idx, t.elm$bool)
             binaTree = binaTree.addNode(binaTree, curLevel, t.elm$bool)
           }

         }else{
           stop( " binaTree.parser.pVarElm(str, type='end'):: not node")        
         }           
         
         ## subset
         if(b.level$id < nchar(str)){
             curStr = substr(str, b.level$idx, nchar(str))
             binaTree = binaTree.parser.proc(curStr, binaTree, curLevel, first.seg=FALSE)
         }else{
           stop ("find ( without )")
         }
         return(binaTree)
       }  # if(b.level$idx==1){
     }# if(b.level$brace==0){
   } # if(!is.null(b.level)){

}

binaTree.parser.pVarElm <-
function(str, type="un", check.format=TRUE){
    ifD = FALSE
    fN = "binaTree.parser.pVarElm:"

    str = util.str.rmSpacePadder(str, rm=c("b", "e"))
    if(ifD) print(qp("binaTree.parser.pVarElm::str=", str, "|begin"))
    if(ifD) print(qp("binaTree.parser.pVarElm::type=", type, "|begin"))
    
    idx = NULL
    bool = NA
    mixed = 0
    
    if(type=="un"){
      # see if starting with 'and' or 'or'or 'not'
      idx.and = util.char.1stStrIdx(str, find="and ")
      idx.or = util.char.1stStrIdx(str, find="or ")
      idx.not = util.char.1stStrIdx(str, find="not ")
      if(idx.not==1){ ## start with not
          mixed = 0
          bool = -1
          idx = util.str.splitAndOrNot(str, "not", check.space=check.format)
          if(!is.null(idx)){
            if(check.format){
              if(length(idx$items)==1){
                return(list(idx=idx$items, bool=bool, mixed=mixed))
              }
            }else{
              if (length(idx$items)>2) stop(paste(fN, "do not allow more than two variables in an element."))
  
              return(list(idx=idx$items, bool=bool, mixed=mixed))
            }
          }
      }

      if(idx.and==1){ ## start with and
          mixed = 1
          bool = 0
          idx = util.str.splitAndOrNot(str, "and", check.space=check.format)

          if(is.null(idx)) return(NULL)

          if (length(idx$items)>2) stop(paste(fN, "do not allow more than two variables in an element."))
  
          return(list(idx=idx$items, bool=bool, mixed=mixed))
      }
      if(idx.or==1){ ## start with or
          mixed = 1
          bool = 1
          idx = util.str.splitAndOrNot(str, "or", check.space=check.format)

          if(is.null(idx)) return(NULL)
          if (length(idx$items)>2) stop(paste(fN, "do not allow more than two variables in an element."))
          return(list(idx=idx$items, bool=bool, mixed=mixed))
      }

      list.and = util.str.splitAndOrNot(str, " and ", check.space=check.format)
      list.or  = util.str.splitAndOrNot(str, " or ", check.space=check.format)

      ## operator is in the middle
      #print(list.and)
      #print(list.or)
      
      if (!is.null(list.and)){
        if (length(list.and$items)>2) stop(paste(fN, "do not allow more than two variables in an element."))
        return(list(idx=list.and$items, bool=0, mixed=2))
      }
      
      if (!is.null(list.or)){
        if (length(list.or$items)>2) stop(paste(fN, "do not allow more than two variables in an element."))
        return(list(idx=list.or$items, bool=1, mixed=2))
      }
    }
    
    if(type=="start"){
      list.and = util.str.splitAndOrNot(str, "and ", check.space=check.format)
      list.or  = util.str.splitAndOrNot(str, "or ", check.space=check.format)

      if (!is.null(list.and)){
        if (length(list.and$items)>1) stop(paste(fN, "do not allow more than two variables without a brace."))
        return(list(idx=list.and$items, bool=0, mixed=1))
      }
      if (!is.null(list.or)){
        if (length(list.or$items)>1) stop(paste(fN, "do not allow more than two variables without a brace."))
        return(list(idx=list.or$items, bool=1, mixed=1))  
      }
    }

    if(type=="end"){
      if(str=="and")
        return(list(idx=NULL, bool=0, mixed=1))
      
      if(str=="or")
        return(list(idx=NULL, bool=1, mixed=1))
      
      list.and = util.str.splitAndOrNot(str, "and", check.space=check.format)
      list.or  = util.str.splitAndOrNot(str, "or", check.space=check.format)

      if (!is.null(list.and)){
        if (length(list.and$items)>1) stop(paste(fN, "do not allow more than two variables without a brace."))
        return(list(idx=list.and$items, bool=0, mixed=1))
      }
      
      if (!is.null(list.or)){
        #print(list.or)
        if (length(list.or$items)>1) stop(paste(fN, "do not allow more than two variables without a brace."))
        return(list(idx=list.or$items, bool=1, mixed=1))  
      }
    }    

    
    if(type=="middle"){
      idx.not = util.char.1stStrIdx(str, find="not ")
      if(idx.not==1){ ## start with not
          mixed = 0
          bool = -1
          idx = util.str.splitAndOrNot(str, "not", check.space=check.format)
          if(!is.null(idx)){
            if(check.format){
              if(length(idx$items)==1) return(list(idx=idx$items, bool=bool, mixed=mixed))
            }else{
              if (length(idx$items)>2) stop(paste(fN, "do not allow more than two variables in an element."))
              return(list(idx=idx$items, bool=bool, mixed=mixed))
            }
          }
      }
      list.and = util.str.splitAndOrNot(str, " and ", check.space=check.format)
      list.or  = util.str.splitAndOrNot(str, " or ", check.space=check.format)

      #print(list.and)
      #print(list.or)
      if (!is.null(list.and)){
        if (length(list.and$items)>2) stop(paste(fN, "do not allow more than two variables in an element."))      
        return(list(idx=list.and$items, bool=0, mixed=2))
      }
      if (!is.null(list.or)){
        if (length(list.or$items)>2) stop(paste(fN, "do not allow more than two variables in an element."))      
        return(list(idx=list.or$items, bool=1, mixed=2))
      }
      if(is.null(list.and) & is.null(list.or)){
        # could be just one element
         if(util.char.1stIdx(str, " ")==0){
           return(list(idx=str, bool=0, mixed=0))
         }
      }
                
    }    

    return(NULL) 
}

binaTree.patternForm <-
function(binaTree, elmMList, bkIdx.RuleOrder){
  ifD = FALSE
  fN = "binaTree.patternForm:"

  if(ifD) {
    print("binaTree")
    print(binaTree)
    print("elmMList")
    print(elmMList)
    print(bkIdx.RuleOrder)

  }


  # place to hold the matching hap pairs
  mgrid = NULL

  unique.bk = unique(bkIdx.RuleOrder)
  unique.bk = sort(unique.bk)
  
  # total number of HRCB
  bkCt = length(unique.bk)

  nMa = binaTree$elm.nMa
  nCt = nrow(binaTree$elm.nMa)

  ## in binaTree, all the op among node is "OR".
  ## but within a node, it is one element or a boolean term of multiple element
  ## with more than one element.

  mgrid = NULL
  ##process node with "and" op
  filter = nMa[,3]==0
  # find out the node with "and" as operator
  range = (1:nCt)[filter]
  ## need to parse the node with "and" as operator
  if(sum(filter)>=1){
    for ( i in range){
      # for each node 
      node.idx = nMa[i,1]
      nodeMa = binaTree$elm.nlist[[node.idx]]
      if(sum(nodeMa[,2])>=1) stop(paste(fN, "'and' node with node as element."))

      # nodeMa[,1] gives the element's id, which follow the elmBkIdx
      bkIdx.seqInRule = nodeMa[,1]

      bkm.ct = table(bkIdx.seqInRule)
      bkm.bench = NULL
      bkm.bench = as.integer(dimnames(bkm.ct)[1][[1]])

      ## if two elements in one bk need to be meet, need to update elmMList and bkIds
      newMList = NULL
      
      for(j in 1:length(bkm.bench)){
        ## for each bkIdx related to the element in the node
        tmp.ct = bkm.ct[j]
        elm.m.ids =  bkm.bench[j]
        if(tmp.ct>1){ # if more than one element from the same bk
          stop (paste(fN, "not implemented"))
#           elm.m.ids = nodeMa[ bkm.idx==bkm.cur.idx, 1]
#           #obtain all the elmMlist for this set of elm
#           tmp.allMatch = elmMList[elm.m.ids]
#           ## keep the first set of meeting one
#           tmp.finalSet = tmp.allMatch[[1]]
#           final.fit = rep(1, times=nrow(tmp.allMatch))
#           for(ii in 2:tmp.ct){
#             ## do an intersection operation, first assume all are in
#             ## then match two indexes
#             tmp.cur = tmp.allMatch[[ii]]
#             filter = is.element(tmp.finalSet[,1], tmp.cur[,1])
#             ## match the first idx
#             final.fit = final.fit & filter
#             ## match the second idx
#             filter = match(tmp.finalSet[,2], tmp.cur[,2])
#             final.fit = final.fit & filter            
#           } # for(ii in 2:tmp.ct){
#           if(sum(filter)<=0) stop (paste(fN, "no fitted hap pairs for the more than two matching elment in a block."))
# 
#           tmp.ma = tmp.finalSet[filter,, drop=FALSE ]
#           newMList = c(newMList, list(tmp.ma))
        }else{
          newMList = c(newMList, elmMList[elm.m.ids])
        } # if(tmp.ct>1){ # one element for one bk
        
      } # for(j in 1:length(bkm.bench))
      ## form the pattern for add node, add to the match.grid1 and 2
      bkMVec = bkIdx.RuleOrder[ bkm.bench]
      # match the bkm.cur.idx to a position in the whole pattern
      mat.idx = match(bkMVec, unique.bk)
      hapPair.matched=hapPair.match(mat.idx, bkCt, newMList)
      if(ifD) print(hapPair.matched)
      mgrid = rbind(mgrid, hapPair.matched)
    } # for ( i in range){ parse node with "and" as operator
  } # if(sum(filter)>=1){

  ## parse all the element in "or" node

  for (i in 1:nCt){
     curNode = nMa[i,]
     ## only for "or" node
     if(curNode[3]==1){
      node.idx = curNode[1]
      nodeMa = binaTree$elm.nlist[[node.idx]]
      ## only considering the element variables
      if(sum(nodeMa[,2])<nrow(nodeMa)){
        if(ifD) print("find the node linked with one element!!")
        elm.ids = nodeMa[nodeMa[,2]==0,1]
        #if(length(elm.ids)!=1) stop(paste(fN, "number of element in a node is not one"))

        # it is OK to have more than one element
        for (ij in 1:(length(elm.ids))){
          bkm.idx = bkIdx.RuleOrder [ elm.ids[ij] ]
          mat.idx = match(bkm.idx, unique.bk)
          if (ifD) print(bkm.idx)
          if (ifD) print(unique.bk)
          #print(elmMList[elm.ids[ij]])
          hapPair.matched=hapPair.match(mat.idx, bkCt, elmMList[elm.ids[ij]])
          mgrid = rbind(mgrid, hapPair.matched)
        }
        
        if(ifD) print(mgrid)
      }# else it is a all node node      
     } # if(curNode[3]==1){
  } # for i in (1:nCt){
  
  return( mgrid )
}

binaTree.patternFormOLD <-
function(binaTree, elmMList, bkIdx.RuleOrder){
  ifD = TRUE
  fN = "binaTree.patternForm:"

  if(ifD) {
    print("binaTree")
    print(binaTree)
    print("elmMList")
    print(elmMList)
    print(bkIdx.RuleOrder)

  }
  # place to hold the matching hap pairs
  mgrid = NULL

  unique.bk = unique(bkIdx.RuleOrder)
  unique.bk = sort(unique.bk)
  
  # total number of HRCB
  bkCt = length(unique.bk)

  nMa = binaTree$elm.nMa
  nCt = nrow(binaTree$elm.nMa)

  ## in binaTree, all the op among node is "OR".
  ## but within a node, it is one element or a boolean term of multiple element
  ## with more than one element.

  mgrid = NULL
  ##process node with "and" op
  filter = nMa[,3]==0
  # find out the node with "and" as operator
  range = (1:nCt)[filter]
  ## need to parse the node with "and" as operator
  if(sum(filter)>=1){
    for ( i in range){
      # for each node 
      node.idx = nMa[i,1]
      nodeMa = binaTree$elm.nlist[[node.idx]]
      if(sum(nodeMa[,2])>=1) stop(paste(fN, "'and' node with node as element."))

      # nodeMa[,1] gives the element's id, which follow the elmBkIdx
      bkIdx.seqInRule = nodeMa[,1]

      bkm.ct = table(bkIdx.seqInRule)
      bkm.bench = NULL
      bkm.bench = as.integer(dimnames(bkm.ct)[1][[1]])

      ## if two elements in one bk need to be meet, need to update elmMList and bkIds
      newMList = NULL
      
      for(j in 1:length(bkm.bench)){
        ## for each bkIdx related to the element in the node
        tmp.ct = bkm.ct[j]
        elm.m.ids =  bkm.bench[j]
        if(tmp.ct>1){ # if more than one element from the same bk
          stop (paste(fN, "not implemented"))
#           elm.m.ids = nodeMa[ bkm.idx==bkm.cur.idx, 1]
#           #obtain all the elmMlist for this set of elm
#           tmp.allMatch = elmMList[elm.m.ids]
#           ## keep the first set of meeting one
#           tmp.finalSet = tmp.allMatch[[1]]
#           final.fit = rep(1, times=nrow(tmp.allMatch))
#           for(ii in 2:tmp.ct){
#             ## do an intersection operation, first assume all are in
#             ## then match two indexes
#             tmp.cur = tmp.allMatch[[ii]]
#             filter = is.element(tmp.finalSet[,1], tmp.cur[,1])
#             ## match the first idx
#             final.fit = final.fit & filter
#             ## match the second idx
#             filter = match(tmp.finalSet[,2], tmp.cur[,2])
#             final.fit = final.fit & filter            
#           } # for(ii in 2:tmp.ct){
#           if(sum(filter)<=0) stop (paste(fN, "no fitted hap pairs for the more than two matching elment in a block."))
# 
#           tmp.ma = tmp.finalSet[filter,, drop=FALSE ]
#           newMList = c(newMList, list(tmp.ma))
        }else{
          newMList = c(newMList, elmMList[elm.m.ids])
        } # if(tmp.ct>1){ # one element for one bk
        
      } # for(j in 1:length(bkm.bench))
      ## form the pattern for add node, add to the match.grid1 and 2
      bkMVec = bkIdx.RuleOrder[ bkm.bench]
      # match the bkm.cur.idx to a position in the whole pattern
      mat.idx = match(bkMVec, unique.bk)
      hapPair.matched=hapPair.match(mat.idx, bkCt, newMList)
      if(ifD) print(hapPair.matched)
      mgrid = rbind(mgrid, hapPair.matched)
    } # for ( i in range){ parse node with "and" as operator
  } # if(sum(filter)>=1){

  ## parse all the element in "or" node

  for (i in 1:nCt){
     curNode = nMa[i,]
     ## only for "or" node
     if(curNode[3]==1){
      node.idx = curNode[1]
      nodeMa = binaTree$elm.nlist[[node.idx]]
      ## only considering the element variables
      if(sum(nodeMa[,2])<nrow(nodeMa)){
        if(ifD) print("find the node linked with one element!!")
        elm.ids = nodeMa[nodeMa[,2]==0,1]
        if(length(elm.ids)!=1) stop(paste(fN, "number of element in a node is not one"))

        bkm.idx = bkIdx.RuleOrder [ elm.ids ]
        mat.idx = match(bkm.idx, unique.bk)
        if (ifD) print(bkm.idx)
        if (ifD) print(unique.bk)
        hapPair.matched=hapPair.match(mat.idx, bkCt, elmMList[elm.ids])
        mgrid = rbind(mgrid, hapPair.matched)
      }# else it is a all node node      
     } # if(curNode[3]==1){
  } # for i in (1:nCt){
  
  return( mgrid )
}

binaTree.setApply <-
function(binaTree, setList){
  ifD = FALSE
  # the setList already contain the set for each variable

  data.n=NULL
  
  nCt = length(binaTree$elm.nlist)
  if(nCt>=1){
    nMa = binaTree$elm.nMa
    
    data.n = rep(list(NULL), nrow(nMa))

    new.order = order(nMa[,2], decreasing = TRUE)
    if(ifD) print(nMa)
    nMa = nMa[new.order, ,drop=FALSE]
    if(ifD) print(nMa)
    #nlist = binaTree$elm.nlist[new.order]
    nlist = binaTree$elm.nlist
    if(ifD) print(nlist)
    for( i in 1:nCt ){
      cur.row = nMa[i, ,drop=FALSE]
      if(ifD) print("cur node row after sorting")
      if(ifD) print(cur.row)
      ## col seq= id, level, bool, open
      
      ## find the var list
      cur.elm = nlist[[ cur.row[1] ]]
      if(ifD) print("cur node element")
      if(ifD) print(cur.elm)
      dd.velm=NULL
      dd.nelm=NULL
      
      # if variable element, get from data.v
      ## only three possibilties: both are variable, both are node, one variable and one node
      if( sum(cur.elm[,2]==0)  >=1){
        # dd.velm = data.v[, cur.elm[,1][cur.elm[,2]==0]]
        dd.velm = setList[ c(cur.elm[,1][cur.elm[,2]==0]) ]
        
        if (ifD) {
          if(is.null(dim(dd.velm))){
            print(dd.velm[1:5])
          }else{
            print(dd.velm[1:5,])
          }
        }
      }
      # if node element, get from data.n
      if(sum(cur.elm[,2]==1)>=1){
        dd.nelm = data.n[ c(cur.elm[,1][cur.elm[,2]==1]) ]
        
        if (ifD) {
          if(is.null(dim(dd.nelm))){
            print(dd.nelm[1:5])
          }else{
            print(dd.nelm[1:5,])
          }
        }
      }

      dd.cur = c(dd.velm, dd.nelm)
      if(ifD) print(cur.row)
      cur.bool = cur.row[3]
     
      if(cur.bool==0){ ## and -> intersection

        if(length( dd.cur[[1]] )==0) {
          warning("length( dd.cur[[1]] )==0")
          data.n[[ cur.row[1]   ]] = NULL
        }
        if(length( dd.cur[[2]] )==0) {
          warning("length( dd.cur[[2]] )==0")
          data.n[[ cur.row[1]   ]] = NULL
        }
        if(length(dd.cur[[1]])!=0 & length(dd.cur[[2]])!=0){
          filter = match(dd.cur[[1]], dd.cur[[2]], nomatch=0)
          filter = filter[filter>0]
          data.n[[  cur.row[1]  ]] = dd.cur[[2]][ filter ]
        }
      }
      if(cur.bool==1){ ## or -> union
        
        data.n [[  cur.row[1]  ]] = unique(c(dd.cur[[1]], dd.cur[[2]]))
        if( length(dd.cur[[1]])==0 & length(dd.cur[[2]])==0 ){
          data.n[[  cur.row[1]  ]] = NULL
        }
      }
    } ## for( i in 1:nCt ){
    
  } ##  if(nCt>=1){

    if(nCt==0){
      # only one variable only element
      return(setList[[1]])
    }else{
      # return the last element (must be a node)
      t.id = binaTree$curElmId
      re = data.n[[t.id]]
      return(re)
    }

}

binaTree.shuffle.findElmPar <-
function(tree, id.elm){
  fN = "binaTree.shuffle.findElmPar:"
  elm.nMa = tree$elm.nMa

  ## loop through each node in nMa
  search = TRUE
  i = 1
  child.node=NULL
  while ( i <= length(tree$elm.nlist) & search){
    curMa = tree$elm.nlist[[i]]
    filter = curMa[ ,2]==0 & curMa[,1]==id.elm
    if(sum(filter)>=1){
      search = FALSE
      child.node = i
    }
    i = i+1
  }

  if(search){
    stop(paste(fN, " cannot find the node using the current element."))
  }

  return(child.node)

}

binaTree.shuffle.findNodePar <-
function(tree, id.node){
  fN = "binaTree.shuffle.findNodePar:"
  elm.nMa = tree$elm.nMa

  par.level = elm.nMa[elm.nMa[,1]==id.node,2]-1
  if(par.level<0) {
    #warning(paste(fN, " top level has no parent for id =", id.node))
    return(0)
  }

  par.posiId = elm.nMa[elm.nMa[,2]==par.level,1]

  if(length(par.posiId)<=0 ) stop( paste(fN, " no node belongs to the upper level"))

  i = 1

  posId = NULL
  ## multiple parents may have it as a child
  search =TRUE
  while(i<=length(par.posiId) & search){

    posParId = par.posiId[i]
    nodeMa = tree$elm.nlist[[ posParId ]]
    yesNode = nodeMa[ ,2]==1
    childId = nodeMa[yesNode,1]
    if(sum(is.element(id.node, childId))==1){
      posId = posParId
      search = FALSE
    }
    i = i +1;
  }

  return(posId)
 
}

binaTree.shuffleNode <-
function(treeForSearch){
 fN = "binaTree.shuffleNode:"
 ifD = FALSE
 
 searchWholeTree = TRUE
 tt = 0
 search = FALSE
 # do an iterative search for ( or ) and , keep search until all the "or" have no parent with "and" operator
 while(searchWholeTree){
     tt = tt+1
     if(tt>100) stop(paste(fN, "iterative procdure exceed 100 times. Sometime is wrong!"))
     
     # if the tree is updated, use the updated one. Otherwise, search the next possible choice
     if(!search){
       if(ifD) {
         print("Updated the tree:")
         print(treeForSearch)
       }
       tree = treeForSearch
       elm.nMa = tree$elm.nMa
  
       ## search for "or"
       filter = elm.nMa[,3]==1
       if(sum(filter)==0) {
  #          search = FALSE
  #          searchWholeTree = FALSE
           return(treeForSearch)
       }
       search = TRUE
     }

     if( sum(filter)>=1){
       
       newMa = elm.nMa[filter, , drop=FALSE]

       if(ifD) print(newMa)
       
       j = 1 # loop though node with "or" operator
       while ( j <= sum(filter) & search){
         if(ifD) print(paste("j=", j))
         curCh.idx = newMa[j, 1]
         curNode.level = newMa[j, 2]
         par.idx = binaTree.shuffle.findNodePar(tree, curCh.idx)

         if(is.null(par.idx)) stop (paste(fN, " No parent for a child node!"))

         if(par.idx != 0) {
           
            parRow = match(par.idx, elm.nMa[,1])
            if(ifD){
              
              print(paste("Matched parent row =", parRow, " parent id =", par.idx))
            }
            # find parent node is using "and"
            if( elm.nMa[parRow, 3] == 0){
              if(ifD) print("find a case")
              search = FALSE
       
              ## find out the element of current node and the other node id
              curNodeMa = tree$elm.nlist[[curCh.idx]]
       
              parNodeMa = tree$elm.nlist[[par.idx]]

              filter.curKid = parNodeMa[,1]==curCh.idx & parNodeMa[,2]==1
              
              twoKid = parNodeMa[,1]
              othNode.idx = twoKid[!filter.curKid]
              othNode.type = parNodeMa[!filter.curKid, 2]

              ## if the other node is a nodel
              ## change the other kid level one down
              if(othNode.type==1){
                othNodeRow = match(othNode.idx, elm.nMa[,1])
                elm.nMa[othNodeRow, 2] =  curNode.level+1

                ## create a duplicate of the othNode
                othNodeMaDup  = tree$elm.nlist[[othNode.idx]]
                othNodeDup.New = length(tree$elm.nlist)+1
                tree$elm.nlist = c(tree$elm.nlist, list(othNodeMaDup))
                elm.nMa = rbind(elm.nMa, c(othNodeDup.New, curNode.level+1, elm.nMa[othNode.idx, 3], 0))
                ma =  matrix(c(curNodeMa[2,1], othNodeDup.New, curNodeMa[2,2], othNode.type), ncol=2, nrow=2, byrow=FALSE)
              }else{
                ma =  matrix(c(curNodeMa[2,1], othNode.idx, curNodeMa[2,2], othNode.type), ncol=2, nrow=2, byrow=FALSE)
              }
              
              ## create the new node
              othNode.New = length(tree$elm.nlist)+1
              tree$elm.nlist = c(tree$elm.nlist, list(ma))
   
              if(ifD) {
                print("elm.nlist after adding the new node")
                #print(tree$elm.nlist)
   
              }
   
              ## added the new node to the nMa
              elm.nMa = rbind(elm.nMa, c(othNode.New, curNode.level, 0, 0))
              
       
              ## update the old node with "and"
              curNodeMa[2,] = c(othNode.idx, othNode.type)
              elm.nMa[curCh.idx, 3]=0
              tree$elm.nlist[[curCh.idx]]=curNodeMa
   
              if(ifD){
                print("elm.nlist after updating the current node")
                #print(tree$elm.nlist)
              }
              
       
              ## update the parent of old node
              parNodeMa =  matrix(c(curCh.idx, othNode.New, 1, 1), ncol=2, nrow=2, byrow=FALSE)
              elm.nMa[parRow, 3]=1
              tree$elm.nlist[[par.idx]]=parNodeMa
              tree$elm.nMa = elm.nMa
              
              if(ifD) {
                print("elm.nlist and nMa after updating the partent of old node")
                #print(tree$elm.nlist)
                #print(tree$elm.nMa)
              }
   
              treeForSearch = tree
              tree = NULL
            
            } ## if( elm.nMa[match.id, 3] == 0){
          } # if(par != 0) {
         j = j +1
       } ##  while ( j <= sum(filter) & search){
       if(j > sum(filter) & search) searchWholeTree = FALSE
       if(ifD & j > sum(filter) & search ) print("stopped")
     } ## if( sum(filter)>=1){
     
   } ## while(searchWholeTree){

 ##return(NULL)
 return(treeForSearch)

}

binaTree.toStr <-
function(binaTree){
  
  ifD = FALSE

  ## process the variable only element first, regardless the level
  vCt = nrow(binaTree$elm.vMa)
  data.v = rep("", times=vCt)
  
  for( i in 1:vCt){
    # print(i)
    cur.row = binaTree$elm.vMa[i,]
    ## col seq= id, level, bool
    ## find the var list
    cur.elm = binaTree$elm.vlist[[ i ]]

    cur.bool = cur.row[3]
    if(length(cur.elm)>1){
     
      if(cur.bool==0){ ## and
        data.v[i]= paste(cur.elm, collapse=" and ")
      }
      if(cur.bool==1){ ## or
        data.v[i]= paste(cur.elm, collapse=" or ")
      }
    }else{
      if(cur.bool==-1){ ## not
        data.v[i]= paste("(not ", cur.elm, ")", sep="")
      }else{
        data.v[i]= cur.elm
      }
    }
  } # for( i in 1:vCt){

  ## if have node element, process the node from deeper level to zero
  data.n=NULL
  nCt = nrow(binaTree$elm.nMa)
  if(nCt>=1){
    nMa = binaTree$elm.nMa
    
    data.n=rep("", times=nCt)

    new.order = order(nMa[,2], decreasing = TRUE)
    if(ifD) print(nMa)
    nMa = nMa[new.order, ,drop=FALSE]
    if(ifD) print(nMa)
    #nlist = binaTree$elm.nlist[new.order]
    nlist = binaTree$elm.nlist
    if(ifD) print(nlist)
    for( i in 1:nCt ){
      cur.row = nMa[i, ,drop=FALSE]
      if(ifD) print("cur node row after sorting")
      if(ifD) print(cur.row)
      ## col seq= id, level, bool, open
      
      ## find the var list
      cur.elm = nlist[[ cur.row[1] ]]
      if(ifD) print("cur node element")
      if(ifD) print(cur.elm)
      dd.velm=NULL
      dd.nelm=NULL
      # if variable element, get from data.v
      if(sum(cur.elm[,2]==0)>=1){
        dd.velm = data.v[cur.elm[,1][cur.elm[,2]==0]]
      }
      # if node element, get from data.n
      if(sum(cur.elm[,2]==1)>=1){
        dd.nelm = data.n[cur.elm[,1][cur.elm[,2]==1]]
      }

      
      dd.cur = cbind(dd.velm, dd.nelm)
      if(ifD) print(cur.row)
      cur.bool = cur.row[3]

      if(i!=nCt){
        if(cur.bool==0){ ## and
          data.n[cur.row[1]]= paste("(" , paste(dd.cur, collapse= " and "), ")", sep="")
        }
        if(cur.bool==1){ ## or
          data.n[cur.row[1]]= paste("(" , paste(dd.cur, collapse= " or "), ")", sep="")
        }
      }else{ # for the top node, no need for outside brace
        if(cur.bool==0){ ## and
          data.n[cur.row[1]]= paste(dd.cur, collapse= " and ")
        }
        if(cur.bool==1){ ## or
          data.n[cur.row[1]]= paste(dd.cur, collapse= " or ")
        }  
      }
    } ## for( i in 1:nCt ){
    
  } ##  if(nCt>=1){

    if(nCt==0){
      # only one variable only element
      return(data.v[,1])
    }else{
      # return the last element (must be a node)
      t.id = binaTree$curElmId
      re = data.n[t.id]
      if(ifD) print(data.n)
      return(re)
    }

}

bindHapBkGenoMaps <-
function (hapBkMap=NULL, genoMap) 
{
	if(is.null(hapBkMap)){
		# when all the marker are single marker
		snpCtNum = nrow(genoMap$df)/3
		genomeMarkerInfo = matrix(NA, ncol=5, nrow=snpCtNum)
		genomeMarkerInfo[,1]=rep(1, snpCtNum)
		genomeMarkerInfo[,2]=1:snpCtNum
		genomeMarkerInfo[,3]=1:snpCtNum
		genomeMarkerInfo[,4]=1:snpCtNum
		genomeMarkerInfo[,5]=rep(1, snpCtNum)

		genoIndex = which(genomeMarkerInfo[, 5] == 1)
		
		genoMapDf = cbind(genoMap$df, hapLens = rep(1, nrow(genoMap$df)))
		genoOnlyMap = dfToHapBkMap(genoMapDf, keyCol = NULL, chCol = genoMap$chCol, 
				blockCol = genoMap$genomeSeqCol, expCol = genoMap$expCol, 
				probCol = genoMap$probCol, hapLenCol = genoMap$genomeSeqCol, 
				beginCol = NULL, endCol = NULL, snpBase = genoMap$snpBase, 
				re.bf = TRUE, re.javaGUI = TRUE)		
		
		re = list(hapBkOnlyMap=NULL, genoOnlyMap = genoOnlyMap, genomeMarkerInfo = genomeMarkerInfo, genoIndex = genoIndex)
		return(re)
		
	}

	if (is.null(hapBkMap$beginCol)) {
		stop("hapBkMap$beginCol is null, methods not implemented.")
	}
	if (is.null(hapBkMap$dfStr)) {
		stop("hapBkMap$dfStr is null, methods not implemented.")
	}
	hapOnlyMap = hapBkMap
	genoMapDf = cbind(genoMap$df, hapLens = rep(1, nrow(genoMap$df)))
	genoOnlyMap = dfToHapBkMap(genoMapDf, keyCol = NULL, chCol = genoMap$chCol, 
			blockCol = genoMap$genomeSeqCol, expCol = genoMap$expCol, 
			probCol = genoMap$probCol, hapLenCol = genoMap$genomeSeqCol, 
			beginCol = NULL, endCol = NULL, snpBase = genoMap$snpBase, 
			re.bf = TRUE, re.javaGUI = TRUE)
	qu = NULL
	qu.type = NULL
	markers_gb = NULL
	markers_ge = NULL
	markersIndex_genome = 0
	oldCh = "ooo"
	newCh = "ooo"
	hapIdx = 0
	chkey = unlist(lapply(genoOnlyMap$keys, FUN = function(item) {
						re = util.str.seqCutter(item, delims = "-")[1]
						re
					}))
	chkey.hap = unlist(lapply(hapOnlyMap$keys, FUN = function(item) {
						re = util.str.seqCutter(item, delims = "-")[1]
						re
					}))
	chkeyUni = unique(chkey)
	chkeyUni.hap = unique(chkey.hap)
	chkeyDiff = setdiff(chkeyUni, chkeyUni.hap)
	if (length(chkeyDiff) == 0) {
		chkeyDiff = NULL
	}
	genomeMarkerInfo = NULL
	
	for (ikey in chkeyUni) {
		#print(ikey)
		snp.allCt = sum(chkey == ikey)
		if (is.element(ikey, chkeyDiff)) {
			tmp.endCt = sum(chkey == ikey)
			tmp.ma = NULL
			if (tmp.endCt > 0) {
				for (tt2 in 1:tmp.endCt) {
					tmp.ma = rbind(tmp.ma,
							c(rep(markersIndex_genome + tt2, 3), 1))
				}
				tmp.ma = matrix(tmp.ma, ncol = 4)
				hap.ma = data.frame(ch = I(rep(ikey, times = nrow(tmp.ma))), 
						markers_gb = tmp.ma[, 1], markers_ge = tmp.ma[, 1], 
						qu = tmp.ma[, 1], qu.type = tmp.ma[, 4])
			}
			else {
				stop("This is wrong.")
			}
			hap.ma = hap.ma[order(hap.ma[, 2]), ]
			genomeMarkerInfo = rbind(genomeMarkerInfo, hap.ma)
			markersIndex_genome = markersIndex_genome + snp.allCt
		}
		else {
			ibks.f <- (chkey.hap == ikey)
			
			tmp.ma = NULL
			## need to know whether there are some singleton appearing at the beginning
			if( hapOnlyMap$markers_b[ibks.f][1] > 1){
				for (tt in 1:(hapOnlyMap$markers_b[ibks.f][1]-1) ) {
					tmp.ma = rbind(tmp.ma, c(rep(markersIndex_genome+tt, 3), 1))
				}
			}
			
			markers_gb = markersIndex_genome + hapOnlyMap$markers_b[ibks.f]
			markers_ge = markersIndex_genome + hapOnlyMap$markers_e[ibks.f]
			qu = (hapIdx + 1):(hapIdx + sum(ibks.f))
			qu.type = rep(0, times = sum(ibks.f))
			hap.ma = data.frame(ch = I(rep(ikey, times = sum(ibks.f))), 
					markers_gb, markers_ge, qu, qu.type)
			hapIdx = hapIdx + sum(ibks.f)
			
			if (sum(ibks.f) > 1) {
				tmp.diff = markers_gb[-1] -
						markers_ge[-(sum(ibks.f))] - 1
				tmp.pos1 = which(tmp.diff > 0)
				if (length(tmp.pos1)>0) {
					tmp.pos = hap.ma[tmp.pos1, c(1, 3), drop = FALSE]
					for (tt in 1:(length(tmp.pos1))) {
						for (tt2 in 1:(tmp.diff[tmp.pos1][tt])) {
							tmp.ma = rbind(tmp.ma, c(rep(tmp.pos[tt, 
															2] + tt2, 3), 1))
						}
					}
				}
			}
			tmp.endCt = snp.allCt - hapOnlyMap$markers_e[ibks.f][sum(ibks.f)]
			if (tmp.endCt > 0) {
				for (tt2 in 1:tmp.endCt) {
					tmp.ma = rbind(tmp.ma, c(
									rep(markers_ge[sum(ibks.f)] + 
													tt2, 3), 1))
				}
			}
			if (!is.null(tmp.ma)) {
				tmp.ma = matrix(tmp.ma, ncol = 4)
				single.ma = data.frame(ch = I(rep(ikey, times = nrow(tmp.ma))), 
						markers_gb = tmp.ma[, 1], markers_ge = tmp.ma[, 1], 
						qu = tmp.ma[, 1], qu.type = tmp.ma[, 4])
				hap.ma = rbind(hap.ma, single.ma)
				hap.ma = hap.ma[order(hap.ma[, 2]), ]
			}
			markersIndex_genome = markersIndex_genome + snp.allCt
			genomeMarkerInfo = rbind(genomeMarkerInfo, hap.ma)
		}
	}
	genomeMarkerInfo = genomeMarkerInfo[order(genomeMarkerInfo[, 2]), ]
	re = list(hapBkOnlyMap = hapOnlyMap, genoOnlyMap = genoOnlyMap, 
			genomeMarkerInfo = genomeMarkerInfo)
	hapIndex = which(genomeMarkerInfo[, 5] == 0)
	genoIndex = which(genomeMarkerInfo[, 5] == 1)
	re = c(re, list(hapIndex = hapIndex, genoIndex = genoIndex))
	return(re)
}

bkMap.constr <-
function(data, keyCol, hapLenCol=NULL, expCol, probCol, alleleCode=1:2, ...){
	fN = "bkMap.constr"
	
	if( is.character(data)){
		data = read.csv(data, ...)
		#print(qp(fN, ": Info on the ", data, " file."))
		#print(str(data))
	}
	
	colNum = ncol(data)
	#print(keyVal)
	keyVal = unique(data[,keyCol]) # not reordered, following the original order
	#print(keyVal)
	
	bks = NULL
	snpLen = 0
	bkLens = NULL
	keys = NULL
	bkEndingIdx = NULL
	if(is.null(hapLenCol)){
		# if the input dataset does NOT contain info. on haplotype block length
		for( i in 1:length(keyVal)){
			if(i==1){
				r = data[data[,keyCol]==keyVal[i], ]
				
				## normalize the prob and check the haplen
				tmp.expLen  = unlist(lapply(r[,expCol], FUN=function(i){nchar(as.character(i))}  ))
				snpCt1 = tmp.expLen[1]
				if (min(tmp.expLen==snpCt1)==0) stop("At least one of the block has different lengths for haplotypes.")
				
				
				tmp.prob = r[,probCol]
				if (max(tmp.prob<0)==1) stop("At least one of the block has negative haplotype frequencies.")
				if (sum(tmp.prob)==0) stop("At least one of the block has haplotype frequencies sum up to 0.")          
				tmp.prob = tmp.prob/sum(tmp.prob)          
				r[,probCol]=tmp.prob
				
				bkLens = dim(r)[1]
				snpCtVec = rep(snpCt1, bkLens)
				
				r = cbind(r, snpCtVec)
				
				bks = list(r)          
				snpLen = snpLen + snpCt1
				keys = as.character(r[1,keyCol])
				snpCt = snpCt1
			}else{
				
				r = data[data[,keyCol]==keyVal[i], ]
				
				## normalize the prob and check the haplen
				tmp.expLen  = unlist(lapply(r[,expCol], FUN=function(i){nchar(as.character(i))}  ))
				snpCt1 = tmp.expLen[1]
				if (min(tmp.expLen==snpCt1)==0) stop("At least one of the block has different lengths for haplotypes.")
				
				tmp.prob = r[,probCol]
				if (max(tmp.prob<0)==1) stop("At least one of the block has negative haplotype frequencies.")
				if (sum(tmp.prob)==0) stop("At least one of the block has haplotype frequencies sum up to 0.")
				tmp.prob = tmp.prob/sum(tmp.prob)
				r[,probCol]=tmp.prob
				
				bkLens = c(bkLens, dim(r)[1])
				snpCtVec = rep(snpCt1, dim(r)[1])
				
				r = cbind(r, snpCtVec)
				
				bks = c(bks, list(r))
				snpLen = snpLen + snpCt1
				keys = c(keys, as.character(r[1,keyCol]))
				snpCt = c(snpCt, snpCt1)
			}
			
			hapLenCol = dim(r)[2]
		}
	}else{
		# if the input dataset contain info. on haplotype block length
		for( i in 1:length(keyVal)){
			if(i==1){
				r = data[data[,keyCol]==keyVal[i], ]
				
				## normalize the prob and check the haplen
				tmp.expLen  = unlist(lapply(r[,expCol], FUN=function(i){nchar(as.character(i))}  ))
				snpCt1 = tmp.expLen[1]
				if (min(tmp.expLen==snpCt1)==0) stop("At least one of the block has different lengths for haplotypes.")
				if (min(r[,hapLenCol]==snpCt1)==0) stop("At least one of the block provides inconsistant haplotype size.")
				
				tmp.prob = r[,probCol]
				if (max(tmp.prob<0)==1) stop("At least one of the block has negative haplotype frequencies.")
				if (sum(tmp.prob)==0) stop("At least one of the block has haplotype frequencies sum up to 0.")
				tmp.prob = tmp.prob/sum(tmp.prob)
				
				r[,probCol]=tmp.prob
				
				bks = list(r)
				bkLens = dim(r)[1]
				snpLen = snpLen + snpCt1
				keys = as.character(r[1,keyCol])
				snpCt = snpCt1
			}else{
				r = data[data[,keyCol]==keyVal[i], ]
				
				## normalize the prob and check the haplen
				tmp.expLen  = unlist(lapply(r[,expCol], FUN=function(i){nchar(as.character(i))}  ))
				snpCt1 = tmp.expLen[1]
				if (min(tmp.expLen==snpCt1)==0) stop("At least one of the block has different lengths for haplotypes.")
				if (min(r[,hapLenCol]==snpCt1)==0) stop("At least one of the block provides inconsistant haplotype size.")
				
				tmp.prob = r[,probCol]
				if (max(tmp.prob<0)==1) stop("At least one of the block has negative haplotype frequencies.")
				if (sum(tmp.prob)==0) stop("At least one of the block has haplotype frequencies sum up to 0.")
				tmp.prob = tmp.prob/sum(tmp.prob)
				
				r[,probCol]=tmp.prob
				
				bks = c(bks, list(r))
				bkLens = c(bkLens, dim(r)[1])
				snpLen = snpLen + snpCt1
				keys = c(keys, as.character(r[1,keyCol]))
				snpCt = c(snpCt, snpCt1)
			}
		}
	}
	bkMap = list(bks = bks, bkLens = bkLens, keys = keys, snpLen = snpLen, expCol = expCol, hapLenCol=hapLenCol, probCol = probCol, snpCt=snpCt, alleleCode=alleleCode )
	return(bkMap)
	
}

bkMap.ESp.apply1Rule <-
function(bkMap, signalRule){
	ifD = FALSE
	fN = "bkMap.ESp.apply1Rule"
	if(ifD) print(paste(fN, "begin::"))
	
	signal = signalRule$signal
	
	total.bkct = length( bkMap$snpCt )
	
	# construct the beginning/ending index of snp in original map
	idx.be = matrix(NA, nrow=total.bkct, ncol=2)
	idx.be[,2]=cumsum(bkMap$snpCt)
	idx.be[,1]=c(1, cumsum(bkMap$snpCt)+1)[1:total.bkct]
	
	
	causalBkUniqOrderedIdx = bkMap.findHRCBIdx(bkMap, as.integer(signalRule$var.idx), re.keys=FALSE, unique=TRUE)
	## find out # of haplotype in each HRCB
	HRCB.hapCt = bkMap$bkLens [causalBkUniqOrderedIdx]
	HRCB.hapCtProd = cumprod(HRCB.hapCt)[length(causalBkUniqOrderedIdx)]
	
	causalBkIdx.ruleOrder = bkMap.findHRCBIdx(bkMap, as.integer(signalRule$var.idx), re.keys=FALSE, unique=FALSE)
	
	#if(length(causalBkUniqOrderedIdx)==1){
	
	if(length(causalBkIdx.ruleOrder)==1){
		
	}else{
		## need to update the rule a little
		if(ifD) print( binaTree.toStr(signal))
		signal.n = binaTree.shuffleNode(signal)    
		if(ifD) print( binaTree.toStr(signal.n))
		signal.f = binaTree.merge(signal.n)
		if(ifD) print( binaTree.toStr(signal.f))
		
	}
	
	## have to find the matching with the HRCB only bkMap
	#causalBkIdx.ruleOrder = match(causalBkIdx.ruleOrder, causalBkIdx)
	
	elmMList = list()
	# for each block, need to find the hap pairs that are associated with higher risk
	
	for ( j in 1:length(causalBkIdx.ruleOrder)){
		bkIdx = causalBkIdx.ruleOrder[ j ]
		if(ifD) print(paste("bkIdx inside the bkMap =", bkIdx ))
		
		##  20Dec08Change!!!: straighten coding: 1, 2, 3 for 1-digitCoding, and 1, 2 for 2-digit
		
		genoMap =  hap2GenoBlock(hapExp = as.character(bkMap$bks[[bkIdx]][, bkMap$expCol]),
				hapProb = bkMap$bks[[bkIdx]][, bkMap$probCol], snpCt = bkMap$snpCt[bkIdx],
				alleleCode = bkMap$alleleCode )
		if(ifD) print(genoMap)
		item = idx.be[bkIdx,]
		## use recycling rules
		re = paste("v", rep(item[1]:item[2], each=2), sep="")
		newData.col = paste(re,  c("a", "b"), sep="")
		
		## singleSNPRule follow the original order in the genome
		## HARD CODE!!!HARD CODE: genoMa from hap2GenoBlock keep digits  1, 2 for 2-digit, need to change to 0/1 binary coding
		## 20Dec08Change!!! to use 2- for 0/1 binay coding, so the first alleleCode (changed to 1) correspond to the minor allele
		## and the second  (changed to 2) for major allele. Just need to coordinate with signal Rule.
		
		varIdx = match (signalRule$signal$elm.vlist[[j]], newData.col)
		treePred = (2-genoMap$genoMa)[,varIdx]
		## if the variable is not proceed by "not" operator
		## CHANGED on 8JAN09, from ==0 to !=-1. -1 means not
		if (signalRule$signal$elm.vMa[j,3]!=-1){
			nct = sum(treePred==1)
			if(ifD) print("not proceed by not")
			pairHRCB = genoMap$tb[  treePred==1, c(1,2), drop=FALSE ]
		}else{
			## if the variable is proceed by "not" operator
			nct = sum(treePred==0)
			if(ifD) print("proceed by not")
			pairHRCB = genoMap$tb[  treePred==0, c(1,2), drop=FALSE ]
		}
		if (ifD){
			print(paste("matching hap pairs in block ", j))
			print(pairHRCB)
		}
		elmMList=c(elmMList, list(pairHRCB))
	} # for ( j in 1:length(causalBkIdx.new)){
	#print("This is elmMList")
	#print(elmMList)
	#print(causalBkIdx.ruleOrder)
	#print("qing mark")
	
	## simplied if only one HRCB
	
	if (length(causalBkIdx.ruleOrder)==1){
		
		superDipIdx = apply(pairHRCB, 1, FUN=util.it.triMatch, len=HRCB.hapCtProd)
		
		superDipIdx = sort(superDipIdx)
		
		## FIX LATER!!!FIX : can further reduce the workload
		HRCBGrp = t(sapply(superDipIdx, FUN=util.it.triMatch2, len=HRCB.hapCtProd, re.homo=TRUE))
		
		# print(str(HRCBGrp))
		
		HRCBGrp.A = HRCBGrp[HRCBGrp[,3]==1, c(1,2), drop=FALSE]
		
		HRCBGrp.BIdx = HRCBGrp[HRCBGrp[,3]==0,1]
		
		#print(HRCBGrp.A)
		#print(HRCBGrp.BIdx)
		return(list(A=HRCBGrp.A, B=HRCBGrp.BIdx))
		
	}
	
	hapGrids = binaTree.patternForm(binaTree=signal.f, elmMList=elmMList, bkIdx.RuleOrder=causalBkIdx.ruleOrder)
	if(ifD) print(str(hapGrids))
	##write.csv(hapGrids, file="hapGrids.csv") 
	
	## check whether no matching BK
	apply(hapGrids, 2, FUN=function(coo){
				qing.check = mean(is.na(coo))
				if(qing.check==1) stop(qp(fN, ": no matching pattern for one block"))
			})
	
	
	## need to find super-hap index for the matching patterns
	uniBkCt = ncol(hapGrids) / 2
	
	HRCB.hapCt2 = HRCB.hapCt
	
	if(ifD) print(hapGrids)
	supIdx = NULL
	
	for(n in 1:nrow(hapGrids)){
		
		if(ifD) {
			print(paste("proc row for pattern: row=", nrow(hapGrids)))
			#print(n)
			#print(hapGrids[n,])
		}
		if( n>240) {
			#print(hapGrids[n,])
		}
		hap1 = hapGrids[n, 1:uniBkCt] 
		m1 = filterHaps2SHaps(hap1, HRCB.hapCt2) 
		
		hap2 = hapGrids[n, (uniBkCt+1):(2*uniBkCt)]
		m2 = filterHaps2SHaps(hap2, HRCB.hapCt2)  
		
		hapPairs = qExpandTable(listOfFactor =list(m1, m2), removedRowIdx=NULL, re.row=FALSE)
		hapPairs = t(apply(hapPairs, 1, FUN=range))
		superDipIdx = apply(hapPairs, 1, FUN=util.it.triMatch, len=HRCB.hapCtProd)
		superDipIdx = unique(unlist(superDipIdx))
		
		supIdx = c(supIdx, superDipIdx)
		supIdx = unique(supIdx)
		
		if(length(supIdx)>10^10) stop( qp(fN,": Too many matching super-haplotypes, exceeding 10^10."))
		#if(ifD) print(supIdx)
	}
	
	supDipAll = NULL
	if( HRCB.hapCtProd < 250 ){
		supDipAll = supIdx[sort.list(as.integer(supIdx), method="radix")]
	}else{
		supDipAll = supIdx[sort.list(as.integer(supIdx), method="quick", na.last=NA)]
	}
	
	
	#print(paste(fN, "QingMark2::sorted superDip:"))
	#print(length(supDipAll))
	## parse out the four stratum
	if (length(supDipAll)==0){
		warning(paste(fN, ":no matching allele on the HRCB"))
	}
	
	#print(supDipAll)
	## FIX LATER!!!FIX : can further reduce the workload
	HRCBGrp = t(sapply(supDipAll, FUN=util.it.triMatch2, len=HRCB.hapCtProd, re.homo=TRUE))
	
	
	#print(HRCBGrp)
	# print(str(HRCBGrp))
	
	HRCBGrp.A = HRCBGrp[HRCBGrp[,3]==1, c(1,2), drop=FALSE]
	
	HRCBGrp.BIdx = HRCBGrp[HRCBGrp[,3]==0,1]
	#save(HRCBGrp.A, file="HRCBGrp.A.RData")
	#save(HRCBGrp.BIdx, file="HRCBGrp.BIdx.RData")
	
	return(list(A=HRCBGrp.A, B=HRCBGrp.BIdx))
	
}

bkMap.ESp.apply1Rule.stepBy <-
function(bkMap, signalRule){
	ifD = FALSE
	fN = "bkMap.ESp.apply1Rule"
	if(ifD) print(paste(fN, "begin::"))
	
	signal = signalRule$signal
	
	total.bkct = length( bkMap$snpCt )
	
	# construct the beginning/ending index of snp in original map
	idx.be = matrix(NA, nrow=total.bkct, ncol=2)
	idx.be[,2]=cumsum(bkMap$snpCt)
	idx.be[,1]=c(1, cumsum(bkMap$snpCt)+1)[1:total.bkct]
	
	
	causalBkUniqOrderedIdx = bkMap.findHRCBIdx(bkMap, as.integer(signalRule$var.idx), re.keys=FALSE, unique=TRUE)
	## find out # of haplotype in each HRCB
	HRCB.hapCt = bkMap$bkLens [causalBkUniqOrderedIdx]
	HRCB.hapCtProd = cumprod(HRCB.hapCt)[length(causalBkUniqOrderedIdx)]
	
	causalBkIdx.ruleOrder = bkMap.findHRCBIdx(bkMap, as.integer(signalRule$var.idx), re.keys=FALSE, unique=FALSE)
	
	#if(length(causalBkUniqOrderedIdx)==1){
	
	if(length(causalBkIdx.ruleOrder)==1){
		
	}else{
		## need to update the rule a little
		if(ifD) print( binaTree.toStr(signal))
		signal.n = binaTree.shuffleNode(signal)    
		if(ifD) print( binaTree.toStr(signal.n))
		signal.f = binaTree.merge(signal.n)
		if(ifD) print( binaTree.toStr(signal.f))
		
	}
	
	## have to find the matching with the HRCB only bkMap
	#causalBkIdx.ruleOrder = match(causalBkIdx.ruleOrder, causalBkIdx)
	
	elmMList = list()
	# for each block, need to find the hap pairs that are associated with higher risk
	
	for ( j in 1:length(causalBkIdx.ruleOrder)){
		bkIdx = causalBkIdx.ruleOrder[ j ]
		if(ifD) print(paste("bkIdx inside the bkMap =", bkIdx ))
		
		##  20Dec08Change!!!: straighten coding: 1, 2, 3 for 1-digitCoding, and 1, 2 for 2-digit
		
		genoMap =  hap2GenoBlock(hapExp = as.character(bkMap$bks[[bkIdx]][, bkMap$expCol]),
				hapProb = bkMap$bks[[bkIdx]][, bkMap$probCol], snpCt = bkMap$snpCt[bkIdx],
				alleleCode = bkMap$alleleCode )
		if(ifD) print(genoMap)
		item = idx.be[bkIdx,]
		## use recycling rules
		re = paste("v", rep(item[1]:item[2], each=2), sep="")
		newData.col = paste(re,  c("a", "b"), sep="")
		
		## singleSNPRule follow the original order in the genome
		## HARD CODE!!!HARD CODE: genoMa from hap2GenoBlock keep digits  1, 2 for 2-digit, need to change to 0/1 binary coding
		## 20Dec08Change!!! to use 2- for 0/1 binay coding, so the first alleleCode (changed to 1) correspond to the minor allele
		## and the second  (changed to 2) for major allele. Just need to coordinate with signal Rule.
		
		varIdx = match (signalRule$signal$elm.vlist[[j]], newData.col)
		treePred = (2-genoMap$genoMa)[,varIdx]
		## if the variable is not proceed by "not" operator
		## CHANGED on 8JAN09, from ==0 to !=-1. -1 means not
		if (signalRule$signal$elm.vMa[j,3]!=-1){
			nct = sum(treePred==1)
			if(ifD) print("not proceed by not")
			pairHRCB = genoMap$tb[  treePred==1, c(1,2), drop=FALSE ]
		}else{
			## if the variable is proceed by "not" operator
			nct = sum(treePred==0)
			if(ifD) print("proceed by not")
			pairHRCB = genoMap$tb[  treePred==0, c(1,2), drop=FALSE ]
		}
		
		#print(paste("matching hap pairs in block ", bkIdx))
		#print(pairHRCB)
		
		
		elmMList=c(elmMList, list(pairHRCB))
	} # for ( j in 1:length(causalBkIdx.new)){
	#print("This is elmMList")
	#print(elmMList)
	#print(causalBkIdx.ruleOrder)
	#print("qing mark")
	
	## simplied if only one HRCB
	
	if (length(causalBkIdx.ruleOrder)==1){
		
		superDipIdx = apply(pairHRCB, 1, FUN=util.it.triMatch, len=HRCB.hapCtProd)
		
		superDipIdx = sort(superDipIdx)
		
		## FIX LATER!!!FIX : can further reduce the workload
		HRCBGrp = t(sapply(superDipIdx, FUN=util.it.triMatch2, len=HRCB.hapCtProd, re.homo=TRUE))
		
		# print(str(HRCBGrp))
		
		HRCBGrp.A = HRCBGrp[HRCBGrp[,3]==1, c(1,2), drop=FALSE]
		
		HRCBGrp.BIdx = HRCBGrp[HRCBGrp[,3]==0,1]
		
		#print(HRCBGrp.A)
		#print(HRCBGrp.BIdx)
		return(list(A=HRCBGrp.A, B=HRCBGrp.BIdx))
		
	}
	
	hapGrids = binaTree.patternForm(binaTree=signal.f, elmMList=elmMList, bkIdx.RuleOrder=causalBkIdx.ruleOrder)
	if(ifD) print(str(hapGrids))
	##write.csv(hapGrids, file="hapGrids.csv") 
	
	## check whether no matching BK
	apply(hapGrids, 2, FUN=function(coo){
				qing.check = mean(is.na(coo))
				if(qing.check==1) stop(qp(fN, ": no matching pattern for one block"))
			})
	
	
	## need to find super-hap index for the matching patterns
	uniBkCt = ncol(hapGrids) / 2
	
	HRCB.hapCt2 = HRCB.hapCt
	
	if(ifD) print(hapGrids)
	supIdx = NULL
	
	for(n in 1:nrow(hapGrids)){
		
		if(ifD) {
			print(paste("proc row for pattern: row=", nrow(hapGrids)))
			#print(n)
			#print(hapGrids[n,])
		}
		if( n>240) {
			#print(hapGrids[n,])
		}
		hap1 = hapGrids[n, 1:uniBkCt] 
		m1 = filterHaps2SHaps(hap1, HRCB.hapCt2) 
		
		hap2 = hapGrids[n, (uniBkCt+1):(2*uniBkCt)]
		m2 = filterHaps2SHaps(hap2, HRCB.hapCt2)  
		
		hapPairs = qExpandTable(listOfFactor =list(m1, m2), removedRowIdx=NULL, re.row=FALSE)
		hapPairs = t(apply(hapPairs, 1, FUN=range))
		superDipIdx = apply(hapPairs, 1, FUN=util.it.triMatch, len=HRCB.hapCtProd)
		superDipIdx = unique(unlist(superDipIdx))
		
		supIdx = c(supIdx, superDipIdx)
		supIdx = unique(supIdx)
		
		if(length(supIdx)>10^10) stop( qp(fN,": Too many matching super-haplotypes, exceeding 10^10."))
		#if(ifD) print(supIdx)
	}
	
	supDipAll = NULL
	if( HRCB.hapCtProd < 250 ){
		supDipAll = supIdx[sort.list(as.integer(supIdx), method="radix")]
	}else{
		supDipAll = supIdx[sort.list(as.integer(supIdx), method="quick", na.last=NA)]
	}
	
	
	#print(paste(fN, "QingMark2::sorted superDip:"))
	#print(length(supDipAll))
	## parse out the four stratum
	if (length(supDipAll)==0){
		warning(paste(fN, ":no matching allele on the HRCB"))
	}
	
	#print(supDipAll)
	## FIX LATER!!!FIX : can further reduce the workload
	HRCBGrp = t(sapply(supDipAll, FUN=util.it.triMatch2, len=HRCB.hapCtProd, re.homo=TRUE))
	
	
	#print(HRCBGrp)
	# print(str(HRCBGrp))
	
	HRCBGrp.A = HRCBGrp[HRCBGrp[,3]==1, c(1,2), drop=FALSE]
	
	HRCBGrp.BIdx = HRCBGrp[HRCBGrp[,3]==0,1]
	#save(HRCBGrp.A, file="HRCBGrp.A.RData")
	#save(HRCBGrp.BIdx, file="HRCBGrp.BIdx.RData")
	
	return(list(A=HRCBGrp.A, B=HRCBGrp.BIdx))
	
}

bkMap.findHRCBIdx <-
function(bkMap, snpIdx, re.keys=TRUE, unique=TRUE, complement=FALSE){
  if(!is.integer(snpIdx)) stop()
  # find out the bk bin the snp fall into by comparing the idx with the ending idx of block
  endIdx = cumsum(bkMap$snpCt)
  idx.bk.belong = qing.cut(val=snpIdx, cutPt=endIdx, cutPt.ordered = TRUE, right.include=TRUE)

  if ((max(idx.bk.belong<0)==1) | (max(idx.bk.belong>length(endIdx))==1)) stop("One of the SNP index in sigStr is out of range.")

  if(unique){
    idx.bk.belong = sort(unique(idx.bk.belong)) 
  }
  if(complement){
    total = 1:(length(bkMap$keys))
    idx.bk.belong.unique =  sort(unique(idx.bk.belong)) 
    idx.bk.belong = total[ -idx.bk.belong.unique ]
  }
  
  if(re.keys){
    idx.bk.belong = bkMap$keys[idx.bk.belong]
  }
  return(idx.bk.belong)
}

bkMap.genoFreq <-
function(bkMap, keys){

  # only one key
  idx = match(keys, bkMap$keys)

  genoFreq = NULL

  for( i in idx){
    if( is.na(i) ) stop()
    if(i>1){
      snpBIdx = sum(bkMap$snpCt[ 1:(i-1) ])
    }else{
      snpBIdx = 0
    }
    bks = bkMap$bks[[ i ]]
    genoFreq.byBk = hap.2geno( as.character(bks[, bkMap$expCol]), bks[,bkMap$probCol], snpBIdx)
    genoFreq = rbind(genoFreq, genoFreq.byBk)
  }
  return(genoFreq)

}

bkMap.HRCB.Esp1Rule.Base <-
function(bkMap, rule, baseName=NULL, dig1Code=0:3){
  ifD = FALSE
  fn = "bkMap.HRCB.Esp1Rule.Base::"
  if(ifD) print(paste(fn, "begin"))

  if (length( rule$slist)>1) stop("This function work only with one SignalRule list")

  
  signalRule = rule$slist[[1]]

  HRCBIdx = bkMap.findHRCBIdx(bkMap, as.integer(signalRule$var.idx), re.keys=FALSE, unique=TRUE)
  superHRCBMap = bkMap.superHRCB(bkMap, uniBkIndexes=HRCBIdx, re.probOnly = TRUE)
  
  # obtain super-hap and their probabilities
  supHapProb = superHRCBMap
  supLen = length(supHapProb)

    
  # if only one term in the rule, it doesn't matter whether the coef is positive or negative
  # if more than one term, need to choose the negative as the reference
  ## 20Dec08Change!!!
  HRCBGrps = bkMap.ESp.apply1Rule(bkMap, signalRule)

  if(ifD){
    print( HRCBGrps )
    #return(NULL)
  }

  ## construct the objects for four sampling stratum
  
  if (nrow(HRCBGrps$A)==0){
    ## if no heter pairs associated with high risk
    #print("check")
    #print(HRCBGrps$A)
  }
  
  HRCBStra.AD = HRCBSpGrp.cons(supHapProb, HRCBGrps$A, type="A")
  #print(HRCBStra.AD)

  HRCBStra.A = HRCBStra.AD$grpA

  HRCBStra.D = HRCBStra.AD$grpD

  #print(supHapProb)
  #print(HRCBGrps$B)
  HRCBStra.B = HRCBSpGrp.cons(supHapProb, HRCBGrps$B, type="B")
  #print(HRCBStra.B)
  
  if(ifD) {
    print(HRCBStra.A)
    print(HRCBStra.D)
    print(HRCBStra.B)
  }
  if(length( HRCBGrps$B )>0){
    tmp.Cidx = (1:supLen)[-HRCBGrps$B]
  }else{
    tmp.Cidx = (1:supLen)
  }
  HRCBStra.C = HRCBSpGrp.cons(supHapProb, tmp.Cidx, type="C")

  if(ifD){
    # check the sum of prob
    print(paste(fn, " check the sum of prob:"))
    print(sum(HRCBStra.A$idProb[,2],
        HRCBStra.B$idProb[,2],
        HRCBStra.C$idProb[,2],
        HRCBStra.D$idProb[,2]))
     print(HRCBStra.A)
     print(HRCBStra.B)
     print(HRCBStra.C)
     print(HRCBStra.D)
  }

  ## restandadize the prob
  straCt = c(HRCBStra.A$ct, HRCBStra.B$ct, HRCBStra.C$ct, HRCBStra.D$ct)
  straCumCt = cumsum(straCt)
  sttProb = c(HRCBStra.A$idProb[,2],
        HRCBStra.B$idProb[,2],
        HRCBStra.C$idProb[,2],
        HRCBStra.D$idProb[,2])
  sttProb = sttProb/sum(sttProb)

  #print(paste(fn, " risk allele freq = ", sum( sttProb [1: straCumCt[2] ])))

  
  if(HRCBStra.A$ct>0) HRCBStra.A$idProb[,2] = sttProb[1:straCumCt[1]]
  if(HRCBStra.B$ct>0) HRCBStra.B$idProb[,2] = sttProb[(straCumCt[1]+1): straCumCt[2] ]
  if(HRCBStra.C$ct>0) HRCBStra.C$idProb[,2] = sttProb[(straCumCt[2]+1): straCumCt[3] ]
  HRCBStra.D$idProb[,2] = sttProb[(straCumCt[3]+1): straCumCt[4] ]

  HRCBStra = list(HRCBStra.A = HRCBStra.A, HRCBStra.B = HRCBStra.B,
                  HRCBStra.C = HRCBStra.C, HRCBStra.D = HRCBStra.D)

  if(!is.null(baseName))  save(HRCBStra, file=paste(baseName, ".RData", sep=""))

  return(HRCBStra)

}

bkMap.HRCB.Esp1Rule.genoSeq <-
function(bkMap, rule, re.probOnly = TRUE){

  ifD = FALSE
  signalRule = rule$slist[[1]]

  HRCBIdx = bkMap.findHRCBIdx(bkMap, as.integer(signalRule$var.idx), re.keys=FALSE, unique=TRUE)

  causalInfo = bkMap.updateByRule(bkMap, rule)
  
  # after process signalRule, 
  keys.new = bkMap$keys[c(causalInfo$causalBkIdx)]
  bkMapS = bkMap.shuffle(bkMap, bkCt=NULL, keys.new=keys.new, exclude=FALSE)

  superHRCBMap = bkMap.superHRCB(bkMap, uniBkIndexes=HRCBIdx, re.probOnly=re.probOnly)

  if(re.probOnly){
    supHapProb = superHRCBMap
    #print(paste("# of prob: ", length( superHRCBMap)))

    return(list(bkMapS=bkMapS, supHapProb=supHapProb))
  }else{
   #print("Re additional stuff")
   #print(superHRCBMap)
   supHapProb = superHRCBMap$prob
   superHapExp = apply(superHRCBMap$linkBkExp, 1, paste, collapse="")

   if(ifD) print( cbind(superHapExp, superHRCBMap$prob, cumsum(superHRCBMap$prob)))
  
   # obtain super-hap and their probabilities

   #print(paste("# of prob: ", length( superHRCBMap$prob)))

   return(list(bkMapS=bkMapS, supHapExp=superHapExp, supHapProb=supHapProb))

  }

}

bkMap.HRCB.famMap <-
function(bkMapS, rule, newColName,  ifS="simuDirectInfo", baseName=NULL){
     ifD = FALSE

     ct.shap = cumprod(bkMapS$bkLens)[length(bkMapS$bkLens)]
     ct.sdip = .5*ct.shap*(1+ct.shap)
     ct.mrow = .5*ct.sdip*(1+ct.sdip)

     if(ct.mrow > 5*10^6) warning(qp("Total number of super-haplotype exceed 5*10^6. It is likely to exceed the memory limit."))

     if(ct.mrow > 5*10^7) stop(qp("Total number of super-haplotype exceed 5*10^7. It exceeds the memory limit."))

     linkedHap = bkMap.superHRCB(bkMapS)
     hapExp = apply(linkedHap$linkBkExp, 1, paste, collapse="")
     prob = linkedHap$prob/sum(linkedHap$prob)
   
     hapCt = length(prob)
     
     ab = hap2GenoBlock(hapExp, prob, snpCt = bkMapS$snpLen, alleleCode = bkMapS$alleleCode)

     risk = HRCB.applyRule(ab$tb$genoExp, rule, newColName)

     ## ruleMet only return the applied result for the last rule in the list
     ## associate the hap pair index with prob is used for the kids
     if(length(rule$slist)==1){
       tb = cbind(ab$tb, ruleMet=risk$treePred, risk.Prob=risk$riskProb)
     }else{
       tb = data.frame(ab$tb, risk.Prob=risk$riskProb)
     }

     if(ifD) print(str(tb))
     if(ifD) print(tb)

     #print("test")

     ## generat kids hap ids pair
     rowCt = nrow(tb)

     if(ifD) print(rowCt)
     matRowCt = (rowCt^2-rowCt)/2+rowCt
     matingRowIdxCorn = util.it.smallLargeIdx(rowCt, keep.same=FALSE)
     if(ifD) print(str(matingRowIdxCorn))
     matingRowIdxDiag = matrix(rep(1:rowCt, each=2), ncol=2, byrow=TRUE)
     if(ifD) print(str(matingRowIdxDiag))

     matingRowIdx = rbind(matingRowIdxCorn, matingRowIdxDiag)
     if(ifD) print(matingRowIdx[1:20,])

     tmpProb = tb$prob
     matingPCor = matrix(tmpProb[matingRowIdxCorn], ncol=2, byrow=FALSE)
     matingPCorn = 2*matingPCor[,1]*matingPCor[,2]
     matingPDiag = tmpProb^2
     matingP = c(matingPCorn, matingPDiag)
     if(ifD) print(matingP[1:20])
     
     ## HARD CODE!!! NEED to be blocked
##****  subs = 1:10
##****  matingRowIdx = matingRowIdx[subs,]
##****  matingP = matingP[subs]

     tmpTb = matrix(unlist(tb[,c(1,2)]), ncol=2, byrow=FALSE)
     if(ifD) print(str(tmpTb))
    
     fa.hapIdx = tmpTb[matingRowIdx[,1], ]
     ma.hapIdx = tmpTb[matingRowIdx[,2], ]

     if(ifD) {
       print("Sample parents hap idxes:")
       print(fa.hapIdx[1:10,])
       print(ma.hapIdx[1:10,])
     }

     fam.map = matrix(NA, ncol=12, nrow=matRowCt )
     fam.map[,c(1,2)]=fa.hapIdx
     fam.map[,c(3,4)]=ma.hapIdx
     fam.map[,5]=pmin( fa.hapIdx[,1], ma.hapIdx[,1])
     fam.map[,6]=pmax( fa.hapIdx[,1], ma.hapIdx[,1])
     fam.map[,7]=pmin( fa.hapIdx[,1], ma.hapIdx[,2])
     fam.map[,8]=pmax( fa.hapIdx[,1], ma.hapIdx[,2])
     fam.map[,9]=pmin( fa.hapIdx[,2], ma.hapIdx[,1])
     fam.map[,10]=pmax( fa.hapIdx[,2], ma.hapIdx[,1])
     fam.map[,11]=pmin( fa.hapIdx[,2], ma.hapIdx[,2])
     fam.map[,12]=pmax( fa.hapIdx[,2], ma.hapIdx[,2])

     if(ifD) print(fam.map[1:10,])

     ## obtain the disease prob given the hap idx
     kids.hapIdx = matrix(t(fam.map[,5:12]), nrow=2, byrow=FALSE, ncol=4*matRowCt)
     if(ifD) print(kids.hapIdx[,1:10])
     kids.hapIdx = t(kids.hapIdx)
     if(ifD) print(kids.hapIdx[1:10,])
     if(ifD) print(str(kids.hapIdx))

### instead of using the inefficient matching, we will use the virtual mapping function 
#     kids.matchedRow = unlist(apply(kids.hapIdx[1:20,], 1, util.vec.matchVecIdx, vec=t( tmpTb ),  vecLen=rowCt*2, #benchLen=2))

     kids.matchedRow = unlist(apply(kids.hapIdx, 1, util.it.triMatch, len=hapCt))

     if(ifD) print(kids.matchedRow[1:20])
     # kids.p = matrix(  rep(.25*matingP, each=4), ncol=4, byrow=TRUE)
     # print( paste("Check:: kids p sum (before standardization)=", sum(kids.p)))
     kids.p = matingP/sum(matingP)
  
     ## check
     #if(ifD)  print( paste("Check:: kids p sum=", sum(kids.p)))
     #fam.map = data.frame(fa.hapIdx, ma.hapIdx, ch1.h, ch2.h, ch3.h, ch4.h, rowProb = matingP, kids.p)
     #if(ifD) print(fam.map[1:10,])
  
     kids.risk = matrix( risk$riskProb [kids.matchedRow], ncol=4, byrow=TRUE)
     kids.matchedRow = matrix(kids.matchedRow, ncol=4, byrow=TRUE)
     if(ifD) {
       print("kids risk")
       print(str(kids.risk))
     }
     kids.risk[,1] = (.25*kids.p)* kids.risk[,1]
     kids.risk[,2] = (.25*kids.p)* kids.risk[,2]
     kids.risk[,3] = (.25*kids.p)* kids.risk[,3]
     kids.risk[,4] = (.25*kids.p)* kids.risk[,4]
  
     ## check
     #print( paste("Check:: kids risk sum=", sum(kids.risk)))
     
     fam.map = cbind(fam.map, matingP, kids.risk)
     colnames(fam.map) = c("f.hap1", "f.hap2", "m.hap1", "m.hap2",
                         "c1.hap1", "c1.hap2", "c2.hap1", "c2.hap2",
                         "c3.hap1", "c3.hap2", "c4.hap1", "c4.hap2",
                         "rowProb", 
                         "c1Risk", "c2Risk", "c3Risk", "c4Risk")
  
     if(ifD) print(round(fam.map[1:10,],5))

     if(!is.null(ifS)) {
       
       #write.csv(tb, file=paste(ifS, "hap2geno.csv", sep=""))
       #write.csv(fam.map,  file=paste(ifS, "hapMating.csv", sep=""))
     }
  
     matingTbInfo = list(matRowCt=matRowCt, kids.risk=kids.risk,
                 matingRowIdx = matingRowIdx,
                 hap2genoMap = ab$tb,
                 snpLen = bkMapS$snpLen,
                 kids.matchedRow = kids.matchedRow)
     #print(str(matingTbInfo))

     if(!is.null(baseName))  save( matingTbInfo, file=paste(baseName, ".RData", sep=""))

     return( matingTbInfo )
}

bkMap.HRCB.LRCB.split <-
function (bkMap, rule){

  causalInfo = bkMap.updateByRule(bkMap=bkMap, rule=rule)
  col.shuffled=causalInfo$new.colname

  nocausalInfo = bkMap.updateByRule(bkMap=bkMap, rule=rule,  complement=TRUE )
  col.shuffled.nocausal =nocausalInfo$new.colname

  
  # after process signalRule, 
  keys.new = bkMap$keys[c(causalInfo$causalBkIdx)]
  
  # one map for associated blocks
  bkMapS  = bkMap.shuffle(bkMap, bkCt=NULL, keys.new=keys.new, exclude=FALSE)


  bkMapNS = bkMap.shuffle(bkMap, bkCt=NULL, keys.new=keys.new, exclude=TRUE)
        
  return(bkMaps = list(bkMapS=bkMapS,  bkMapNS=bkMapNS, newColName=col.shuffled, noCausalColName=col.shuffled.nocausal))
}

bkMap.LRCB.spTrio <-
function(bkMap, caseNo, ifS = NULL, reControl=FALSE){

   ifD = FALSE
   # first sample parents
   f.samples = bkMap.spHap(bkMap, caseNo)
   f.samples2 = bkMap.spHap(bkMap, caseNo)

   m.samples = bkMap.spHap(bkMap, caseNo)
   m.samples2 = bkMap.spHap(bkMap, caseNo)

   #dim(par)
   fa = covDipStr2CodedGeno(f.samples$subjects, f.samples2$subjects, subjectCt=caseNo, snpLen=bkMap$snpLen, snpCoding=0:3, snpBase=c(0, bkMap$alleleCode))
   ma = covDipStr2CodedGeno(m.samples$subjects, m.samples2$subjects, subjectCt=caseNo, snpLen=bkMap$snpLen, snpCoding=0:3, snpBase=c(0, bkMap$alleleCode))

   ## shuffle the 
   par = cbind(as.vector(f.samples$subjects), as.vector(f.samples2$subjects),
               as.vector(m.samples$subjects), as.vector(m.samples2$subjects))

   parIn =  cbind(as.vector(f.samples$subjectsIn), as.vector(f.samples2$subjectsIn),
               as.vector(m.samples$subjectsIn), as.vector(m.samples2$subjectsIn))

   #print(paste("!reControl: par dim:", paste(dim(par), collapse=" by ", sep="")))

   
   ## shuffle the random kid
   ## get index, !!!take the same happair combination for all blocks and all trio
   baseIdx =  matrix(c(1,3,2,3,1,4,2,4), nrow=2, byrow=FALSE)

   randomRowIdx = sample(1:4, size=1, replace=FALSE)
   newchild = baseIdx[,randomRowIdx]

   chExp = par[, newchild]
   chIn = parIn[, newchild]

   #print(paste("!reControl: chExp dim:", paste(dim(chExp), collapse=" by ", sep="")))

   #print(dim(chExp))
   #print(dim(chIn))
   #print(dim(newchild))

   child1 = matrix(chExp[,1], nrow=caseNo, byrow=FALSE)
   child2 = matrix(chExp[,2], nrow=caseNo, byrow=FALSE)

   affChild = covDipStr2CodedGeno(child1, child2, subjectCt=caseNo, snpLen=bkMap$snpLen, snpCoding=0:3, snpBase=c(0, bkMap$alleleCode))


   trioSetts = rbind(fa, ma, affChild)
   #print(paste("!reControl: trioSetts dim:", paste(dim(trioSetts), collapse=" by ", sep="")))

   if(!reControl){     
     rowNewSeq = t(matrix(1:(caseNo*3), ncol=3, nrow=caseNo, byrow=FALSE))
     geno.FMCMa = trioSetts[rowNewSeq,]
   }else{

     othRowIdx = (1:4)[ -randomRowIdx ]
     
     exHapIdx = baseIdx[,othRowIdx]

     tt = FALSE
     if(tt) print(paste("othRowIdx=", paste(othRowIdx, collapse=";")))
     if(tt) print(paste("expHapIdx=", paste(exHapIdx, collapse=";")))

     for( i in 1:3){
          childExpIdx = exHapIdx[,i]
          #print(childExpIdx)
          othChildExp = par[,childExpIdx]
          #print(paste("!reControl: othChildExp dim:", paste(dim(othChildExp), collapse=" by ", sep="")))
          childOth1 = matrix(othChildExp[,1], nrow=caseNo, byrow=FALSE)
          childOth2 = matrix(othChildExp[,2], nrow=caseNo, byrow=FALSE)

          affChild =
            covDipStr2CodedGeno(childOth1, childOth2, subjectCt=caseNo, snpLen=bkMap$snpLen, snpCoding=0:3, snpBase=c(0, bkMap$alleleCode))
          trioSetts = rbind(trioSetts, affChild)
     }
     #print(paste("!reControl: trioSetts dim:", paste(dim(trioSetts), collapse=" by ", sep="")))
     
     rowNewSeq = t(matrix(1:(caseNo*6), ncol=6, nrow=caseNo, byrow=FALSE))
     #print(trioSetts)
     geno.FMCMa = trioSetts[rowNewSeq,]
     

   }

     ## rearrange the data, so the three subject in a family goes together.
   if(!is.null(ifS)){
       ## retain trio Index
       childIn1 = matrix(chIn[,1], nrow=caseNo, byrow=FALSE)
       childIn2 = matrix(chIn[,2], nrow=caseNo, byrow=FALSE)
       faIn1 = matrix(parIn[,1],  nrow=caseNo, byrow=FALSE)
       faIn2 = matrix(parIn[,2],  nrow=caseNo, byrow=FALSE)
       maIn1 = matrix(parIn[,3],  nrow=caseNo, byrow=FALSE)
       maIn2 = matrix(parIn[,4],  nrow=caseNo, byrow=FALSE)
  
       childIn = util.matrix.col.shuffle2(childIn1, childIn2)
       faIn = util.matrix.col.shuffle2(faIn1, faIn2)
       maIn = util.matrix.col.shuffle2(maIn1, maIn2)
  
       allIn = rbind(faIn, maIn, childIn)

       rowNewSeq = t(matrix(1:(caseNo*3), ncol=3, nrow=caseNo, byrow=FALSE))
       allIn = allIn[rowNewSeq,]

       write.table(allIn,  file=paste(ifS, "supHap.csv", sep=""), col.names=FALSE, row.names=FALSE, sep=",")         
   }
   return(geno.FMCMa)
   
#      ## not implemented
#      ## shuffle the random kid
#      child = apply(par, 1, FUN=function(row){
#        idx = matrix(c(1,3,2,3,1,4,2,4), nrow=2, byrow=FALSE)
#        randomChildIdx = sample(1:4, size=4, replace=FALSE)
#        idx = idx[,randomChildIdx]
#   
#        newchild = row[idx]
#      })
#      child1.1 = matrix(child[1,], nrow=caseNo, byrow=FALSE)
#      child1.2 = matrix(child[2,], nrow=caseNo, byrow=FALSE)
# 
#      child2.1 = matrix(child[3,], nrow=caseNo, byrow=FALSE)
#      child2.2 = matrix(child[4,], nrow=caseNo, byrow=FALSE)
# 
#      child3.1 = matrix(child[5,], nrow=caseNo, byrow=FALSE)
#      child3.2 = matrix(child[6,], nrow=caseNo, byrow=FALSE)
# 
#      child4.1 = matrix(child[7,], nrow=caseNo, byrow=FALSE)
#      child4.2 = matrix(child[8,], nrow=caseNo, byrow=FALSE)
# 
#      affChild1 = covDipStr2CodedGeno(child1.1, child1.2, subjectCt=caseNo, snpLen=bkMap$snpLen, snpCoding=0:3, snpBase=c(0, bkMap$alleleCode))
#      affChild2 = covDipStr2CodedGeno(child2.1, child2.2, subjectCt=caseNo, snpLen=bkMap$snpLen, snpCoding=0:3, snpBase=c(0, bkMap$alleleCode))
#      affChild3 = covDipStr2CodedGeno(child3.1, child3.2, subjectCt=caseNo, snpLen=bkMap$snpLen, snpCoding=0:3, snpBase=c(0, bkMap$alleleCode))
#      affChild4 = covDipStr2CodedGeno(child4.1, child4.2, subjectCt=caseNo, snpLen=bkMap$snpLen, snpCoding=0:3, snpBase=c(0, bkMap$alleleCode))
#   
#      allChild = rbind(affChild1, affChild2, affChild3, affChild4)
# 
#      # child is SNPct*4 by caseNo matrix
#      newSeq = matrix(1:(4*caseNo), ncol=caseNo, byrow=TRUE)
#      newChild = allChild[newSeq,]
# 
#      trioSetts = rbind(fa, ma, affChild1)
#      rowNewSeq = t(matrix(1:(caseNo*3), ncol=3, nrow=caseNo, byrow=FALSE))
# 
#      geno.FMCMa = trioSetts[rowNewSeq,]
#      return(trio=list(geno.FMCMa=geno.FMCMa, cc = newChild))

}

bkMap.shuffle <-
function(bkMap, bkCt=1, keys.new=NULL, exclude=FALSE){
  keys.ori = bkMap$keys
  if(is.null(keys.new)){
    keys.new =  resample(keys.ori, size=bkCt, replace=FALSE)
  }

  # order are index in the keys.ori
  if(!exclude){
    keys.neworder = match(keys.new, keys.ori)
    leftkeys.neworder = (1:length(keys.ori)) [-keys.neworder]

    ## CHANGE. do not think keep the left over blocks is ever be used.
    #if(!del.left){
    #  keys.neworder = c(keys.neworder, leftkeys.neworder)
    #}else{
      keys.neworder = keys.neworder
    #}
  }else{
    # if want to exclude the keys.new
    keys.neworder = match(keys.new, keys.ori)
    keptkeys.neworder = (1:length(keys.ori)) [-keys.neworder]

    #if(!del.left){
    #  keys.neworder = c(keptkeys.neworder, keys.neworder)
    #}else{
      keys.neworder = keptkeys.neworder
    #}
  }

  ##print(keys.neworder)
  newBkMap = bkMap
  newBkMap$bks = bkMap$bks[keys.neworder]
  newBkMap$bkLens = bkMap$bkLens[keys.neworder]
  newBkMap$keys = keys.ori[keys.neworder]
  newBkMap$snpCt = bkMap$snpCt[keys.neworder]
  newBkMap$snpLen = sum(newBkMap$snpCt)
  newBkMap$alleleCode = bkMap$alleleCode

  if(exclude){
    newBkMap$selBkCt = length(keys.ori) - length(keys.new)
  }else{
    newBkMap$selBkCt = length(keys.new)
  }
  return(newBkMap)
  
}

bkMap.spGenoSeq <-
function(bkMap, subjectCt){

   # first set of hap
   samples = bkMap.spHap(bkMap, subjectCt)
   samples2 = bkMap.spHap(bkMap, subjectCt)
   if(F) print(cbind(samples[[1]], samples2[[1]]))

   # change hap to genotype
   samplesBina = hapBk2AlleleSeq(subjects=samples$subjects, subjectCt, snpLen=bkMap$snpLen)
   samplesBina2 = hapBk2AlleleSeq(subjects=samples2$subjects, subjectCt, snpLen=bkMap$snpLen)
  
   ## construct the genotypic data
   binaNew1 = pmin(samplesBina, samplesBina2)
   binaNew2 = pmax(samplesBina, samplesBina2)
   bina = util.matrix.col.shuffle2(binaNew1, binaNew2)

   return(bina)
}

bkMap.spHap <-
function(bkMap, subjectCt){

  len = length(bkMap$bkLens)

  re = NULL
  people = NULL
  people.in = NULL
  for( i.bk in 1:len ){
    curIn = sample(bkMap$bkLens[i.bk], size = subjectCt, replace = TRUE, prob = bkMap$bks[[i.bk]][,bkMap$probCol])

    curExp = lapply(curIn, FUN=function(item, bkMap, i.bk){
                              as.character(bkMap$bks[[i.bk]][item,bkMap$expCol])
                            }, bkMap = bkMap, i.bk=i.bk)
    people = c(people, list(unlist(curExp)))
    people.in = c(people.in, list(curIn))
  }
  subjects = matrix(unlist(people), ncol=length(people), byrow=FALSE)
  subjects.in = matrix(unlist(people.in), ncol=length(people.in), byrow=FALSE)

  re = list(subjects=subjects, subjectsIn=subjects.in)
  return(re)
}

bkMap.superHRCB <-
function(bkMap, uniBkIndexes=c(1:length(bkMap$keys)), re.probOnly = FALSE){

  ifD = FALSE
  ## find every block expression for every block
  linked = lapply(uniBkIndexes, FUN=function(index, bkMap){
                                            bkMap$bks[[index]]}, bkMap=bkMap)

  hapLen = bkMap$snpCt
  
  len = length(uniBkIndexes)
  if(ifD) print(paste("block len=", len, sep=""))
  
  gridProb = lapply(linked, FUN=function(bk, probCol){
                                 re = bk[, probCol]}, probCol=bkMap$probCol)


    ## only calculate the probability
     gridProb = lapply(linked, FUN=function(bk, probCol){
                                   re = bk[, probCol]}, probCol=bkMap$probCol)
    if(length(uniBkIndexes)==1){
       prob = as.matrix(gridProb[[1]])
    }else{
      prob = qExpandTable(listOfFactor = gridProb)
      prob = apply(prob, 1, prod)
    }
    prob = prob/sum(prob)


  if(re.probOnly){
    return(prob)
  }
  
  gridBase = lapply(linked, FUN=function(bk, expCol){
                                 re = as.character(bk[, expCol])}, expCol=bkMap$expCol)
  gridBaseIndex = lapply(bkMap$bkLens[uniBkIndexes], FUN=function(bkLen){
                                 re = 1:bkLen})
  
  if(length(uniBkIndexes)==1){
     mas = as.matrix(gridBase[[1]])
     mashIn = as.matrix(gridBaseIndex[[1]])
  }else{
    mas = qExpandTable(listOfFactor =  gridBase )
    mashIn = qExpandTable(listOfFactor =  gridBaseIndex )

  }

  if(ifD) {
    print(paste("mas index dim=", ""))
    print(str(mas))
    print(mas[1:2,])
    print(mashIn[1:2,])
    print(prob[1:2])
  }
  #updateBkMap = specifyBks(oriData, uniBkIndexes, bkMap, keyCol, hapLenCol)
  #print(mas)

  re = list(linkBkExp=mas, linkBkIn = mashIn, prob=prob)

  ## print(mashIn)
  return(re)  
}

bkMap.updateByRule <-
function(bkMap, rule, complement=FALSE, is.hapGenoMap = FALSE){

  # shuffle the genotype blocks, but keep the columns
  # so need to shuffle the column names

  if(is.hapGenoMap) stop("Method not implemented for hapGenoMap listing yet!")

  # based on the snp idx, map them with block idx
  if(!is.hapGenoMap){
    causalBkIdx.new = bkMap.findHRCBIdx(bkMap, as.integer(rule$snpIdx), re.keys=FALSE, unique=TRUE, complement=complement)
    total.bkct = length(bkMap$keys)
  
    # construct the beginning/ending index of snp in original map
    idx.be = matrix(NA, nrow=total.bkct, ncol=2)
    idx.be[,2]=cumsum(bkMap$snpCt)
    idx.be[,1]=c(1, cumsum(bkMap$snpCt)+1)[1:total.bkct]
  
    id.be.shuffled = idx.be[ c(causalBkIdx.new,  (1:total.bkct)[!is.element(causalBkIdx.new, 1:total.bkct)]),, drop=FALSE ]
  
    newData.col = apply(id.be.shuffled, 1, FUN= function(item){
      ## use recycling rules
      re = paste("v", rep(item[1]:item[2], each=2), sep="")
      re = paste(re,  c("a", "b"), sep="")
      re})

  }else{
    ## not implemented. Because later we added the argu, complement. 
    ## causalBkIdx.new = hapBkGenoMap.findHRCBIdx(allmap=bkMap, snpIdx=as.integer(rule$snpIdx), re.key=FALSE, unique=TRUE)

    ## snp.idxSeq = hapBkGenoMap.findHRCBSnpIdx(allmap=bkMap, snpIdx=as.integer(rule$snpIdx), re.1digit=TRUE)
    ## newData.col = t( paste( "v", rep(snp.idxSeq, each=2), c("a", "b"), sep=""))

  }

  info = list(causalBkIdx = causalBkIdx.new,
            new.colname = unlist(newData.col))
  return(info)
  

}

bkMap.updateHapFreq <-
function(bkMap, bkIndex, expression=NULL, freq){
  bkFrame = bkMap$bks[[bkIndex]]

  if( length(freq)!= nrow(bkFrame)) stop(paste("updateBlockBinaFreq:: freq length doesn't match the ", bkIndex, " block config.", sep=""))

  newfreq = freq/(sum(freq))

  if(!is.null(expression)) {
    if( length(expression)!= nrow(bkFrame)) stop(paste("updateBlockBinaFreq:: expression length doesn't match the ", bkIndex, " block config.", sep=""))

    bkFrame[, bkMap$expCol ] = expression
  }

  bkFrame[, bkMap$probCol ] = newfreq

  bkMap$bks[[bkIndex]] = bkFrame

  return(bkMap)
}

calHapIdx2SHap <-
function(hapIdxes, hapCts){

  # return one index
  bkCt = length(hapCts)

  hapCts= c(1, hapCts[-bkCt])
  cumRows = cumprod(hapCts)
  enuStart = cumRows*(hapIdxes-1)
  idx = cumsum(enuStart)[bkCt] + 1

  return(idx)

}

calHapIdx2SHapSet <-
function( bkIdx, hapIdx, hapCts){

  ifD = FALSE
  
  ## find the number of row for each stratum
  cumRows = c(1, hapCts[1:bkIdx])
  cumRows = cumprod(cumRows)

  it = cumRows[bkIdx]
  matchSet = 1:it
  if(ifD) print(paste("matchSet=", paste(matchSet, collapse=";")))
  set = it*(hapIdx-1)+matchSet
  if(ifD) print(paste("set=", paste(set, collapse=";")))
  
  strataOffset = seq.int(from=0, to=cumprod(hapCts)[length(hapCts)]-1, by = it*hapCts[bkIdx])
  if(ifD) print(paste("strataOffset=", paste(strataOffset, collapse=";")))
  set = rep(strataOffset, each=it) + set;
  return(set)

}

checkMendelianError <-
function(codedSNPTrio, snpCoding=c(0,1,2,3)){

  # is the child homo with 1?
  if(codedSNPTrio[3]==snpCoding[2]){
    onePHaveNone = FALSE
    if(codedSNPTrio[1]==snpCoding[3]) onePHaveNone = TRUE
    othPHaveNone = FALSE
    if(codedSNPTrio[2]==snpCoding[3]) othPHaveNone = TRUE
    if(onePHaveNone | othPHaveNone)
      stop("Medelian error for homozygous (1) child with at least one parent homozygous (2)")
  # is the child homo with 2?
  }else if(codedSNPTrio[3]==snpCoding[3]){
    onePHaveNone = FALSE
    if(codedSNPTrio[1]==snpCoding[2]) onePHaveNone = TRUE
    othPHaveNone = FALSE
    if(codedSNPTrio[2]==snpCoding[2]) othPHaveNone = TRUE
    if(onePHaveNone | othPHaveNone)
      stop("Medelian error for homozygous (2) child with at least one parent homozygous (1)")
  # is the child hetero
  }else if(codedSNPTrio[3]==snpCoding[4]){

    if(codedSNPTrio[1]==snpCoding[2]){
      if(codedSNPTrio[2]==snpCoding[2]){
        stop("Medelian error for heterozygous child with two parents homozygous (1)") 
      }
    }else if(codedSNPTrio[1]==snpCoding[3]){
      if(codedSNPTrio[2]==snpCoding[3]){
        stop("Medelian error for heterozygous child with two parents homozygous (2)") 
      }
    }
  # is the child missing
  }

  return(NULL)
}

covDipBinaMa2CodedGeno <-
function(dip1, dip2, subjectCt, snpCoding=c(0,1,2,3), snpBase=c(0,1,2)){

  dipSum = dip1+dip2

  ##  print(dipSum)
  a1 = sum(snpBase[2:3] * c(2,0)) # 11-> to 1
  a2 = sum(snpBase[2:3] * c(1,1)) # 12-> to 3
  a3 = sum(snpBase[2:3] * c(0,2)) # 22-> to 2

  snp1d.f = factor(dipSum, levels=c(a1, a3, a2), labels = snpCoding[2:4])
  snp1d.f = as.character(snp1d.f)

  if(is.null(dim(dipSum))){
    ncol = length(dipSum)
    nrow = 1
  }else{
    ncol = dim(dipSum)[2]
    nrow = dim(dipSum)[1]
  }

  codedGeno = matrix(as.integer(snp1d.f), nrow=subjectCt, ncol=ncol)

  
##   codedGeno = matrix(NA, nrow=subjectCt, ncol=ncol)
##   for( i in 1:dim(dipSum)[1]){
##     for( j in 1:dim(dipSum)[2]){
##        if (dipSum[i,j] == 2*snpBase[1]) codedGeno[i,j] = snpCoding[1]
##        if (dipSum[i,j] == 2*snpBase[2]) codedGeno[i,j] = snpCoding[2]
##        if (dipSum[i,j] == 2*snpBase[3]) codedGeno[i,j] = snpCoding[3]
##        if (dipSum[i,j] == snpBase[2]+snpBase[3]) codedGeno[i,j] = snpCoding[4]
##     }
##   }
  return(codedGeno)
}

covDipStr2CodedGeno <-
function(dipStr1, dipStr2, subjectCt, snpLen, snpCoding=c(0,1,2,3), snpBase=c(0,1,2)){
  dip1 =  hapBk2AlleleSeq(dipStr1, subjectCt, snpLen, markdownOne = FALSE)
  dip2 =  hapBk2AlleleSeq(dipStr2, subjectCt, snpLen, markdownOne = FALSE)
  re = covDipBinaMa2CodedGeno(dip1, dip2, subjectCt, snpCoding=snpCoding, snpBase=snpBase)
  return(re)
}

dfToGenoMap <-
function(df, dataHeaders=NULL, genotype=c("11", "12", "22"), snpBase=1){
	##txtF = "tblBlock.csv"
	##dataHeaders = NULL
	
	if(is.null(dataHeaders)){
		## assume that txtF has the first row as column names
		data = df
	}else{
		## assume that column names of the data is passed from outside
		data = df
		colnames(data) = dataHeaders
	}
	
	m = match(c("prekey", "seq", "freq", "genotype"), colnames(data), 0)
	
	## assume except for markers_b and marekers_e, other variable should be presented
	if(min(m[2:3])==0) stop("One or more required variable(s) missing")
	
	
	if( !is.element("prekey", colnames(data)) ){
		## assume the seq of homo, hetero, homo
		data$prekey = I(rep("prekey", times=nrow(data)))
	}
	if( !is.element("genotype", colnames(data)) ){
		## assume the seq of homo, hetero, homo
		data$genotype = I(rep(genotype, times=nrow(data)/3))
	}
	mheader=c("prekey", "seq", "freq", "genotype")
	
	m = match(c("prekey", "seq", "freq", "genotype"), colnames(data), 0)
	## cast the data into the right format
	for ( i in 1:4 ){
		if(!is.na(m[i])){
			if (i==1){
				if(class(data[,m[i]])=="factor") data[,i]=as.character(data[,m[i]])
			}
			if (i==4){
				if(class(data[,m[i]])=="factor") data[,i]=as.numeric(as.character(data[,m[i]]))
			}
			if( i==2 | i==3){
				if(class(data[,m[i]])=="factor") data[,i]=as.numeric(as.character(data[,m[i]]))
			}
		}
	}
	
	chLastSNPIndex = util.sql.groupby(data[,m], groupCols=1,  varCol=2, type=c("max"))
	
	genoMap = list(df = data[,m], chLastSNPIndex=chLastSNPIndex, chCol=1, genomeSeqCol=2, probCol=3, expCol=4, snpBase=snpBase)
	return(genoMap)
	
}

dfToHapBkMap <-
function(data, keyCol=NULL, chCol, blockCol, expCol, probCol, hapLenCol, beginCol=NULL, endCol=NULL, snpBase=1, re.bf = TRUE, re.javaGUI = TRUE){
	
	if(is.null(keyCol)){
		data = cbind(data, key=paste(data[,chCol], data[,blockCol], sep="-"))
		keyCol = ncol(data)
	}
	
	allKeys = unique(data[,keyCol])
	keyCt = length(allKeys)
	
	bks = NULL
	snpLen = 0
	bkLens = NULL
	bkSnpLens = NULL
	markers_b = NULL
	markers_e = NULL
	
	df = data[,c( keyCol, chCol, blockCol, expCol, probCol, hapLenCol, beginCol, endCol)]
	
	if(!is.null(beginCol)){
		for( i in 1:keyCt){
			r = df[data[,keyCol]==allKeys[i], ]
			bks = c(bks, list(r))
			bkLens = c(bkLens, dim(r)[1])
			bkSnpLens = c(bkSnpLens, r[1, 6])
			snpLen = snpLen + r[1,6]
			markers_b = c(markers_b, r[1, 7])
			markers_e = c(markers_e, r[1, 8])
		}
	}else{
		for( i in 1:keyCt){
			r = df[data[,keyCol]==allKeys[i], ]
			bks = c(bks, list(r))
			bkLens = c(bkLens, dim(r)[1])
			bkSnpLens = c(bkSnpLens, r[1, 6])
			snpLen = snpLen + r[1, 6]
		}
	}
	
	
	dfStr = cbind(as.character(data[,keyCol]),
			as.character(data[,expCol]),
			as.character(data[,probCol]))
	
	dfMaster = cbind(as.character(allKeys),
			as.character(bkLens),
			as.character(bkSnpLens),
			
			as.character(markers_b),
			as.character(markers_e))
	
	hapBkMap = NULL
	if(re.bf){
		hapBkMap = c(hapBkMap, list(df=df))
	}
	
	if(re.javaGUI){
		hapBkMap = c(hapBkMap, list(dfStr=dfStr,       dfStrDim    = c(nrow(dfStr), ncol(dfStr)),
						dfMaster=dfMaster, dfMasterDim = c(nrow(dfMaster), ncol(dfMaster)),
						bkCumIndex=cumsum(bkLens)))
	}
	
	hapBkMap = c(hapBkMap, list(
					bks = bks,
					bkLens = bkLens,
					keys = as.character(allKeys),
					bkSnpLens = bkSnpLens, 
					snpBase =snpBase, snpLen = snpLen, keyCol=1, chCol=2, blockCol=3,
					expCol = 4, probCol = 5, hapLenCol=6))
	
	if(!is.null(beginCol)){
		hapBkMap = c(hapBkMap, list(markers_b = markers_b, markers_e = markers_e,
						beginCol=7, endCol=8))
	}
	
	return(hapBkMap)
}

ESp.impu1Par <-
function(othParPairs, childPairs,  semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen, reType=FALSE, job=1 ){
  ifD = FALSE
  fStr = "[ESp.impu1Par]"
  if(ifD) {
    print(fStr)
    print(semiMapFrame)
    print(paste("snpLen=", snpLen))
  }
  
  if(is.null(dim(othParPairs))) othParPairs = matrix(othParPairs, ncol=2)
  if(is.null(dim(childPairs))) childPairs = matrix(childPairs, ncol=2)
  
  allParIdx= unique(as.vector(othParPairs))

  if(ifD){
    print(othParPairs)
    print(childPairs)

  }
  
  
  ## generate all combination of par and child pairs.
  prob.ps = getHapProb2(selIdx=allParIdx,
    semiMapFrame,
    resiProbCol=resiProbCol,
    augIdxCol=augIdxCol,
    probCol= probCol,
    snpLen, restandard=TRUE)
  if(ifD) {
    print(prob.ps)
  }
  
  n.par = length(othParPairs)/2
  n.ch = length(childPairs)/2

  sp.ma = NULL
  for( i in 1:n.par){
    par = othParPairs[i,]
    
    for( j in 1:n.ch){
      ch = childPairs[j,]
      
      ## find whether the pair match on at lease one hap
      if(sum(is.na(match(par, ch)))==2) {
        if(ifD) print( paste("Ignor par=", paste(par, collapse=";"), " and ch=", paste(ch, collapse=";") ))

      }else{
          sp.prob=NULL
          type = 0
          
          ##1) B/E/C vs. A/D
          if (par[1]!=par[2]){
            ##2) B/E/C, B/E vs. C
            if(ch[1]!=ch[2]){
              
              ch = sort(ch)
              par= sort(par)
              ##3) B/E, B vs. E
              if (sum( ch==par )==2){
                if(ifD) print("B")
                # B
                sp.prob = ESp.impu1Par.B(hap=ch, prob.p=prob.ps[match(par, allParIdx)], semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen )
                type=2

                ## additional col, no use
                add.col = rep(NA, 5)
                
              }else{
                if(ifD) print("E")
                # E: need to keep the common one at the front
                match.t = is.na(match(ch, par))
                if(match.t[1]){ ch = c(ch[2], ch[1])}
                match.t = is.na(match(par, ch))
                if(match.t[1]){ par = c(par[2], par[1]) }             
                
                sp.prob = ESp.impu1Par.E(hap=ch[1], prob.p=prob.ps[match(par, allParIdx)], semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen )
                type=5
                add.col = rep(par[2], 2)
                if(ifD) print(sp.prob)
                
              } ##B vs E::if (sum( ch==par )==2){
              
            }else{ ##2) B/E/C, B/E vs. C
              # C: need to keep the common one at front
              if(ifD) print("C")
              match.t = is.na(match(par, ch))
              if(match.t[1]){par = c(par[2], par[1]) }

              #print(par)
              #print( prob.ps[match(par, allParIdx)] )

              sp.prob = ESp.impu1Par.C(hap=par, prob.p=prob.ps[match(par, allParIdx)], semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen )
              # print(sp.prob)
              type=3
              add.col = rep(par[2], 3)

              
            } ## B/E vs C::if(ch[1]!=ch[2]){
            
          }else{ ## B/E/C vs D/A if (par[1]!=par[2]){
            ##2) D/A, D vs. A
            if(ch[1]!=ch[2]){
              # D need to keep the common one at front
              if(ifD) print("D")
              match.t = is.na(match(ch, par))
              if(match.t[1]){
                  ch = c(ch[2], ch[1])              
              }
              sp.prob = ESp.impu1Par.D(hap=ch[2], prob.p=prob.ps[match(par[1], allParIdx)], semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen )
              type=4
              
              add.col = rep(NA, 2)
            }else{
              # A
              if(ifD) print("A")
              sp.prob = ESp.impu1Par.A(hap=ch[1], prob.p=prob.ps[match(par[1], allParIdx)], semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen )
              type=1
              add.col = rep(NA, 2)
              
            } ## D vs. A::if(ch[1]!=ch[2]){
    
          } ## B/E/C vs D/A if (par[1]!=par[2]){
          ch.colA = matrix(rep(ch, length(sp.prob)), ncol=2, byrow=TRUE)
          
          rows = cbind(rep(type, length(sp.prob)), 1:length(sp.prob), sp.prob, add.col, ch.colA)
          if(ifD) print(rows)
          
          sp.ma = rbind(sp.ma, rows)
          # print(sp.ma)

        } # if(sum(is.na(match(par, ch)))<2) {
    } # for( j in 1:n.ch){
  } # for( i in 1:n.par){

  colnames(sp.ma)=c("type", "straSeq", "prob", "add", "ch1", "ch2")


  hap6idx = matrix(NA, nrow=job, ncol=7)
  
  for(ss in 1:job){
    # now sample it
    row.sp = sample(1:nrow(sp.ma), size=1, prob=sp.ma[,3])
  
    type.sp = sp.ma[row.sp,1]
    straSeq = sp.ma[row.sp,2]
    
    if(ifD) {
      print("Sampling")
      print(sp.ma)
    }
    if(type.sp==1){
      sp6 = ESp.impu1Par.A.sp(hap=sp.ma[row.sp, 5],  straSeq, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen )
    }
    if(type.sp==2){
      sp6 = ESp.impu1Par.B.sp(hap=sp.ma[row.sp, 5:6],  straSeq, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen )
    }
    if(type.sp==3){
      #print("type.sp==3")
      sp6 = ESp.impu1Par.C.sp(hap=sp.ma[row.sp, 5], hap.p=sp.ma[row.sp, 4], straSeq, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen )
      #print(sp6)
    }
    if(type.sp==4){
      sp6 = ESp.impu1Par.D.sp(hap=sp.ma[row.sp, 5:6],  straSeq, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen )
    }
    if(type.sp==5){
      sp6 = ESp.impu1Par.E.sp(hap=sp.ma[row.sp, 5:6], hap.p=sp.ma[row.sp, 4] ,  straSeq, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen )
    }

    hap6idx[ss, ]=c(sp6, type.sp)
  }

  
  if(reType){
    return(hap6idx )
  }else{
    return(hap6idx[,1:6,drop=FALSE])
  }
}

ESp.impu1Par.A <-
function(hap, prob.p, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen ){
	
	prob.c = getHapProb2(selIdx=hap, semiMapFrame,
			resiProbCol=resiProbCol,
			augIdxCol=augIdxCol,
			probCol=probCol,
			snpLen, restandard=FALSE)
	stra.1 = (prob.c^2)*(prob.p^2)
	
	stra.2 = (prob.c)*(1-prob.c)*(prob.p^2)
	
	return(c(stra.1, stra.2))
	
}

ESp.impu1Par.A.sp <-
function(hap,  straSeq, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen ){
	
	## give out 6 item vectors for the hap pairs for imputed par, given par and child
	if(straSeq==1){
		## i,i|ii|ii
		re = rep(hap, 6)
	}
	
	if(straSeq==2){
		## i, !=i|ii|ii
		sp = sampleHapSemiAugMap2(semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen, exHapIdx=hap)
		re = c(sort(c(hap, sp)), rep(sort(hap), 4))
	}
	
	return(re)
	
}

ESp.impu1Par.B <-
function(hap, prob.p, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen){
	
	prob.c1 = getHapProb2(selIdx=hap[1], semiMapFrame,
			resiProbCol=resiProbCol,
			augIdxCol=augIdxCol,
			probCol=probCol,
			snpLen, restandard=FALSE)
	prob.c2 = getHapProb2(selIdx=hap[2], semiMapFrame,
			resiProbCol=resiProbCol,
			augIdxCol=augIdxCol,
			probCol=probCol,
			snpLen, restandard=FALSE)
	
	stra.1 = (prob.c1^2)*(prob.p[1]*prob.p[2])
	stra.2 = (prob.c1)*(1-prob.c1-prob.c2)*(prob.p[1]*prob.p[2])
	stra.3 = 2*(prob.c1*prob.c2)*(prob.p[1]*prob.p[2])
	stra.4 = (prob.c2)*(1-prob.c1-prob.c2)*(prob.p[1]*prob.p[2])
	stra.5 = (prob.c2^2)*(prob.p[1]*prob.p[2])
	
	return(c(stra.1, stra.2, stra.3, stra.4, stra.5))
	
}

ESp.impu1Par.B.sp <-
function(hap,  straSeq, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen ){
	
	## give out 6 item vectors for the hap pairs for imputed par, given par and child
	if(straSeq==1){
		## i,i|ik|ik
		re = c(rep(hap[1], 2), sort(hap), sort(hap))
	}
	
	if(straSeq==2){
		## i, !=ik|ik|ik
		sp = sampleHapSemiAugMap2(semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen, exHapIdx=hap)
		re = c(sort(c(hap[1], sp)), sort(hap), sort(hap))
	}
	if(straSeq==3){
		## ik|ik|ik
		re = rep(sort(hap), 3)
	}
	
	if(straSeq==4){
		## k, !=ik|ik|ik
		sp =  sampleHapSemiAugMap2(semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen, exHapIdx=hap)    
		re = c(sort(c(hap[2], sp)),  sort(hap), sort(hap))   
	}
	if(straSeq==5){
		## k,k|ik|ik
		re = c(rep(hap[2], 2),  sort(hap), sort(hap))
	}
	
	return(re)
	
}

ESp.impu1Par.C <-
function(hap, prob.p, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen ){
	
	prob.c1 = getHapProb2(selIdx=hap[1], semiMapFrame,
			resiProbCol=resiProbCol,
			augIdxCol=augIdxCol,
			probCol= probCol,
			snpLen, restandard=FALSE)
	prob.c2 = getHapProb2(selIdx=hap[2], semiMapFrame,
			resiProbCol= resiProbCol,
			augIdxCol= augIdxCol,
			probCol=  probCol,
			snpLen, restandard=FALSE)
	
	stra.1 = (prob.c1^2)*(prob.p[1]*prob.p[2])
	stra.2 = (prob.c1)*(1-prob.c1-prob.c2)*(prob.p[1]*prob.p[2])
	stra.3 = (prob.c1*prob.c2)*(prob.p[1]*prob.p[2])
	
	return(c(stra.1, stra.2, stra.3))
	
}

ESp.impu1Par.C.sp <-
function(hap, hap.p, straSeq, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen){
	#print(paste("straSeq=", straSeq))
	## give out 6 item vectors for the hap pairs for imputed par, given par and child
	if(straSeq==1){
		## i,i|ij|ii
		re = c(hap, hap,   sort(c(hap, hap.p)), hap, hap)
	}
	
	if(straSeq==2){
		## i, !=ij|ij|ii
		sp = sampleHapSemiAugMap2(semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen, exHapIdx=c(hap, hap.p))
		#print(paste("sp=", sp))
		re = c(sort(c(hap, sp)), sort(c(hap, hap.p)), hap, hap)
	}
	if(straSeq==3){
		## ij|ij|ii
		re = c(sort(c(hap, hap.p)), sort(c(hap, hap.p)), hap, hap)
	}
	
	return(re)
	
}

ESp.impu1Par.D <-
function(hap, prob.p, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen ){
	
	prob.c = getHapProb2(selIdx=hap, semiMapFrame,
			resiProbCol=resiProbCol,
			augIdxCol=augIdxCol,
			probCol= probCol,
			snpLen, restandard=FALSE)
	
	
	stra.1 = (prob.c^2)*(prob.p^2)
	stra.2 = (prob.c)*(1-prob.c)*(prob.p^2)
	
	return(c(stra.1, stra.2))
	
}

ESp.impu1Par.D.sp <-
function(hap,  straSeq, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen ){
	
	## give out 6 item vectors for the hap pairs for imputed par, given par and child
	if(straSeq==1){
		## kk|ii|ik
		re = c(hap[2], hap[2], hap[1], hap[1], sort(hap))
	}
	
	if(straSeq==2){
		## k, !=k|ii|ik
		sp = sampleHapSemiAugMap2(semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen, exHapIdx=c(hap[2]))
		re = c(sort(c(hap[2], sp)),     hap[1], hap[1], sort(hap))
	}
	
	return(re)
	
}

ESp.impu1Par.E <-
function(hap, prob.p, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen ){
	
	prob.c = getHapProb2(selIdx=hap, semiMapFrame,
			resiProbCol=resiProbCol,
			augIdxCol=augIdxCol,
			probCol=probCol,
			snpLen, restandard=FALSE)
	
	
	stra.1 = (prob.c^2)*(prob.p[1]*prob.p[2])
	stra.2 = (prob.c)*(1-prob.c)*(prob.p[1]*prob.p[2])
	
	return(c(stra.1, stra.2))
	
}

ESp.impu1Par.E.sp <-
function(hap, hap.p, straSeq, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen ){
	# print(straSeq)
	re = NULL
	## give out 6 item vectors for the hap pairs for imputed par, given par and child
	if(straSeq==1){
		## k,k|ij|ik
		re = c(hap[2], hap[2], sort(c(hap[1], hap.p)), sort(hap))
	}
	
	if(straSeq==2){
		## k, !=k|ij|ik
		sp = sampleHapSemiAugMap2(semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen, exHapIdx=c(hap[2]))
		re = c(sort(c(hap[2], sp)),     sort(c(hap[1], hap.p)), sort(hap))
	}
	
	
	return(re)
	
}

ESp.impuParent.heterKid <-
function(hapPair, hapProb, test=FALSE){
	ifD = FALSE
	fN = "ESp.impuParent.heterKid"
	hapCt = length(hapProb)
	
	## matrix to store the set info
	# first 2 col are hap idx, third is the probability
	setB = matrix(NA, ncol=3, nrow=5)
	setB[2,c(1,2)]=hapPair
	setB[3,c(1,2)]=rep(hapPair[1], 2)
	setB[5,c(1,2)]=rep(hapPair[2], 2)
	
	setB[2,3] = 2*hapProb[hapPair[1]]*hapProb[hapPair[2]] 
	setB[3,3] = (hapProb[hapPair[1]])^2
	setB[5,3] = (hapProb[hapPair[2]])^2
	setB[1,3] = 2*hapProb[hapPair[1]]*(1-sum(hapProb[hapPair]))
	setB[4,3] = 2*hapProb[hapPair[2]]*(1-sum(hapProb[hapPair]))
	
	
	sampleTb = matrix(NA, nrow=9, ncol=5)
	## first 2 col ae idx matched with setB
	sampleTb[,1]=c(1,1,1,2,2,2,3,3,3)
	sampleTb[,2]=c(4,2,5,4,2,5,4,2,5)
	## mating ratio
	sampleTb[,3]=c(2,2,2,2,1,2,2,2,2)
	## kids ratio
	sampleTb[,4]=c(.25, .25, .5, .25, .5, .5, .5, .5, 1)
	## last col is the sample prob
	
	## get the sample prob
	sampleTb[,5]= (setB[ sampleTb[,1], 3]) * (setB[ sampleTb[,2], 3]) * (sampleTb[,3]) * (sampleTb[,4])
	if(test){
		return(sampleTb)
	}
	if(ifD){
		print(paste(fN, "::sampling probability"))
		print(sampleTb)
	}
	
	sampleTb[,5]=sampleTb[,5]/sum(sampleTb[,5])
	chooseRow = sample(1:9, size=1, prob=sampleTb[,5])
	
	hap4idx = rep(NA, times=4)
	if(is.element(chooseRow, c(5,6,8,9))){
		#no need for resampling, the hapPair should come in as ordered
		hap4idx = c( setB[ sampleTb[chooseRow,1], c(1,2)], setB[ sampleTb[chooseRow,2], c(1,2)] )
		return(hap4idx)
	}else{
		hapProb[ hapPair ]=0
		sample.idx =  sample(1:hapCt, size=2, replace=TRUE, prob=hapProb)
		# need to resampling A:A2
		if (chooseRow==1){
			hap4idx[ c(1,3) ] = hapPair
			hap4idx[ c(2,4) ] = sample.idx
			hap4idx = c(range(hap4idx[c(1,2)]), range(hap4idx[c(3,4)]))
			return(hap4idx)
		}
		# A:B
		if (chooseRow==2){
			hap4idx[ 1 ] = hapPair[1]
			hap4idx[ 2 ] = sample.idx[1]
			hap4idx[ c(3,4) ] = setB[2, c(1,2)]
			hap4idx = c(range(hap4idx[c(1,2)]), hap4idx[c(3,4)])
			return(hap4idx)
		}
		# A:C2
		if (chooseRow==3){
			hap4idx[ 1 ] = hapPair[1]
			hap4idx[ 2 ] = sample.idx[1]
			hap4idx[ c(3,4) ] = setB[5, c(1,2)]
			hap4idx = c(range(hap4idx[c(1,2)]), hap4idx[c(3,4)])
			return(hap4idx)
		}
		# B:A2
		if (chooseRow==4){
			hap4idx[ 3 ] = hapPair[2]
			hap4idx[ 4 ] = sample.idx[1]
			hap4idx[ c(1,2) ] = setB[2, c(1,2)]
			hap4idx = c(hap4idx[c(1,2)], range(hap4idx[c(3,4)]))
			return(hap4idx)
		}
		# C:A2
		if (chooseRow==7){
			hap4idx[ 3 ] = hapPair[2]
			hap4idx[ 4 ] = sample.idx[1]
			hap4idx[ c(1,2) ] = setB[3, c(1,2)]
			hap4idx = c(hap4idx[c(1,2)], range(hap4idx[c(3,4)]))
			return(hap4idx)
		}   
		
		
	}
	
	
}

ESp.impuParent.homoKid <-
function(hapPair, hapProb, test=FALSE){
	
	hapCt = length(hapProb)
	
	## matrix to store the set info, no need anymore
	# first 2 col are hap idx, third is the probability
	
	sampleTb = matrix(NA, nrow=3, ncol=5)
	## first 2 col ae idx matched with setB
	sampleTb[,1]=c(1, 1, 2)
	sampleTb[,2]=c(1, 2, 2)
	## mating ratio, #not used anymore
	
	## kids ratio
	sampleTb[,4]=c(.25, .5, 1)
	## last col is the sample prob
	
	## get the sample prob
	
	## current approach: get the mating prob
	tp = hapProb[hapPair]
	matingProb = c(4*tp^2*(1-tp)^2, 4*tp^3*(1-tp), tp^4  )
	
	sampleTb[,5]= matingProb*sampleTb[,4]
	if(test){
		return(sampleTb)
	}
	sampleTb[,5]=sampleTb[,5]/sum(sampleTb[,5])
	chooseRow = sample(1:3, size=1, prob=sampleTb[,5])
	
	hap4idx = rep(NA, times=4)
	if(chooseRow==3){
		#no need for resampling
		hap4idx = rep(hapPair, times=4)
		return(hap4idx)
	}else{
		
		hapProb[ hapPair ]=0
		sample.idx =  sample(1:hapCt, size=2, replace=TRUE, prob=hapProb)
		# need to resampling A:A2
		if (chooseRow==1){
			hap4idx[ c(1,3) ] = rep(hapPair, 2)
			hap4idx[ c(2,4) ] = sample.idx
			hap4idx = c(range(hap4idx[c(1,2)]), range(hap4idx[c(3,4)]))
			return(hap4idx)
		}
		# A:B
		if (chooseRow==2){
			hap4idx[ c(1,2,3) ] = rep(hapPair, 3)
			hap4idx[ 4 ] = sample.idx[1]
			hap4idx = c(hap4idx[c(1,2)],  range(hap4idx[c(3,4)]))
			return(hap4idx)
		}
	}
	
}

ESp.imputBlock <-
function(appVarNames,  trioBlock, snpLen=ncol(trioBlock),  bkIdx, job=1, snpCoding, snpBase, reType=FALSE, logF=NULL,  hapBkOnlyMap.vars){

  ## TODO!!! making missed only disappear
  fStr ="[ESp.imputBlock:]"
  ifD = FALSE
  # inside the function, assume snpCoding as c( 0, 1, 2, 3 )for NA, homo, homo, heter and snpBase as c(0, 1, 2) for NA, allele1, allele2

  
  if(ifD) print( paste(fStr, " processing block index:", bkIdx))

  if( min( c(snpCoding==c(0,1,2,3),  snpBase ==c(0,1,2))) <1 )  
     stop (paste("\nData configuration is not right:\n", "snpCoding=[", paste(snpCoding, collapse=";", sep=""),
                                                       "] snpBase=[", paste(snpBase, collapse=";", sep=""), "]", sep=""))
  allhapKeys = get(appVarNames$freqMap)$hapIndex
  semiMapFrame = get(appVarNames$freqMap)$hapBkOnlyMap$bks[[ match(bkIdx, allhapKeys)]]

  ## the third row is the child
  child = trioBlock[3,]
  father = trioBlock[1,]
  mother  = trioBlock[2,]

  compMissing.trio = as.logical(apply(trioBlock, 1, sum)==0)

  cHapBkInfoMap = hapGenoBlockProc(child,  snpCoding=snpCoding)
  reqIn =  cHapBkInfoMap$homoIn
  reqDig = cHapBkInfoMap$homoDigit
  

  ## if no parents is completely missing
  if( !(compMissing.trio[1] | compMissing.trio[2])   ){
      
      fHapBkInfoMap = hapGenoBlockProc(father,  snpCoding=snpCoding)
      mHapBkInfoMap = hapGenoBlockProc(mother,  snpCoding=snpCoding)
    
    
      fHapFiltered = procSemiAugMap(appVarNames, fHapBkInfoMap, snpLen)
      mHapFiltered = procSemiAugMap(appVarNames, mHapBkInfoMap, snpLen)
      
      ## obtain completed parent filtered dip, knock off the one doesn't match with homozygous index
      fDipTb = fHapBkIdx2DipTb(appVarNames, fHapBkInfoMap, idxList=fHapFiltered, snpCoding,
                                      reqIn=reqIn, reqDigits = reqDig, expression=fHapBkInfoMap$ori, snpLen)
     
      mDipTb = fHapBkIdx2DipTb(appVarNames, mHapBkInfoMap, idxList=mHapFiltered, snpCoding,
                                      reqIn=reqIn, reqDigits = reqDig, expression=mHapBkInfoMap$ori, snpLen)


    
    if (compMissing.trio[3]){

      if(ifD) print( "no parents is compMiss, child is compMiss, sample parents.")
      if(ifD) print( "Type 2: F(d)M(d)C(CM)")
      ## no parents is compMiss, child is compMiss, sample parents.

      ## sample the non missing parent
      fProb = exDipProbSemiAugMap(fDipTb, semiMapFrame=semiMapFrame,
                                  hapBkOnlyMap.vars$resiProbCol, hapBkOnlyMap.vars$augIdxCol, hapBkOnlyMap.vars$probCol, snpLen)
      ## sample the non missing parent
      mProb = exDipProbSemiAugMap(mDipTb, semiMapFrame=semiMapFrame,
                                  hapBkOnlyMap.vars$resiProbCol, hapBkOnlyMap.vars$augIdxCol, hapBkOnlyMap.vars$probCol, snpLen)

      trioHapIdx = matrix(NA, nrow=job, ncol=13)
      trioHapIdx[,13]=rep(2, job)
      for( ss in 1:job){
         ## sample fa then sample mo...
         chooseRow = sample(length(fProb), size=1, prob=fProb)
         tmpFather = fDipTb[chooseRow,]
   
         chooseRow = sample(length(mProb), size=1, prob=mProb)
         tmpMother = mDipTb[chooseRow,]
   
         tmpChild = matrix( c(tmpFather[1], tmpMother[1], tmpFather[1], tmpMother[2],
                              tmpFather[2], tmpMother[1], tmpFather[2], tmpMother[2]), nrow=2, byrow=FALSE)
         t.choice = sample(4, size=1)
   
         trioHapIdx[ss,1:12] =  c(tmpFather, tmpMother, as.vector(tmpChild[, c(t.choice, (1:4)[-t.choice])]))
   
       }
      if(reType){
        return(trioHapIdx)
      }else{
        return(trioHapIdx[,1:12, drop=FALSE])
      }
      
    }else{
      ## no parents is compMiss, child is NOT compMiss, get all and exclude the fDipTb and mDipTb for those doesn't fit cDipTb

      if(ifD) print( "no parents is compMiss, child is NOT compMiss, get all and exclude the fDipTb and mDipTb for those doesn't fit cDipTb")
      if(ifD) print( "Type 6: F(d)M(d)C(d)")
      ## if none is completed missing
      cHapFiltered = procSemiAugMap(appVarNames, cHapBkInfoMap, snpLen)

      cDipTb = fHapBkIdx2DipTb(appVarNames, cHapBkInfoMap, idxList=cHapFiltered, snpCoding,
                                      reqIn=NULL, reqDigits = NULL, expression=cHapBkInfoMap$ori, snpLen)

##       print("###")
##       print(cDipTb)
##       print(fDipTb)
##       print(mDipTb)
      
      ## need to exclude the fDipTb and mDipTb for those doesn't fit cDipTb 
      tmpDip = exParentDip(cDipTb, fDipTb, fHapBkInfoMap,  snpLen)
      cDipTb = tmpDip$childTb
      fDipTb = tmpDip$parTb
#      print(tmpDip)
    
      tmpDip = exParentDip(cDipTb, mDipTb, mHapBkInfoMap, snpLen)
      cDipTb = tmpDip$childTb
      mDipTb = tmpDip$parTb
#      print(tmpDip)
      
      if(!is.null(logF)){
           logl(logF, paste("Possible dip for father with some data: row=", length(fDipTb)/2))
           logl(logF, paste(util.matrix.cat(fDipTb, 1:2, sep="."), collapse="; ", sep=""))
      
           logl(logF, paste("Possible dip for mother with some data: row=", length(mDipTb)/2))
           logl(logF, paste(util.matrix.cat(mDipTb, 1:2, sep="."), collapse="; ", sep=""))
    
           logl(logF, paste("Possible dip for child with some data: row=", length(cDipTb)/2))
           logl(logF, paste(util.matrix.cat(cDipTb, 1:2, sep="."), collapse="; ", sep=""))
      }
    
      ## need to refilter/standandize the pair probability
      ## obtain the diplotype map and probability
      
      prob1 = exDipProbSemiAugMap(fDipTb, semiMapFrame,
                         hapBkOnlyMap.vars$resiProbCol, hapBkOnlyMap.vars$augIdxCol, hapBkOnlyMap.vars$probCol, snpLen)
      
      prob2 = exDipProbSemiAugMap(mDipTb, semiMapFrame,
                         hapBkOnlyMap.vars$resiProbCol, hapBkOnlyMap.vars$augIdxCol, hapBkOnlyMap.vars$probCol, snpLen)
    
      if(ifD) {
        print("Possible diplotype (hap pair) indexes for father (fDipTb); and probability")
        print(fDipTb)
        print(prob1)
        print("Possible diplotype (hap pair) indexes for mother (mDipTb); and probability")
        print(mDipTb)
        print(prob2)
        print("Possible diplotype (hap pair) indexes for child  (cDipTb); ")
        print(cDipTb)
      }

      ## otherwise, build the mating table based on children geno      
      mating6hap = genMatingTBCondOnChild3(appVarNames,  child= cDipTb,
                             par1=fDipTb, par2=mDipTb, prob1, prob2, logF=logF, job=job)

      othChildtt = apply(mating6hap, 1, FUN=find.PsudoControlHap)
      
      trioHapIdx =  cbind(mating6hap[ ,1:4, drop=FALSE],  t(othChildtt))

      #if (ifD) print("@@@@@ return obj@@@@@")
      if(reType){
        #if(ifD) print(  cbind(trioHapIdx, rep(6, job)) )
        return(cbind(trioHapIdx, rep(6, job)))
      }else{
        #if(ifD) print(trioHapIdx)
        return(trioHapIdx)
      }
      
    }
  }

  ## if both parents are completely missing
  if( compMissing.trio[1] & compMissing.trio[2] ){
    if (compMissing.trio[3]){

      if(ifD) print("both parents are compMiss, child is compMiss, sample parents")
      if(ifD) print("Type 1: F(CM)M(CM)C(CM)")

      trioHapIdx = matrix(NA, nrow=job, ncol=13)
      trioHapIdx[,13]=rep(1, job)
      for(ss in 1:job){
        ## both parents are compMiss, child is compMiss, sample parents.
        tmpFather = sampleDipSemiAugMap(semiMapFrame, hapBkOnlyMap.vars$resiProbCol, hapBkOnlyMap.vars$augIdxCol, hapBkOnlyMap.vars$probCol, snpLen)
        tmpMother = sampleDipSemiAugMap(semiMapFrame, hapBkOnlyMap.vars$resiProbCol, hapBkOnlyMap.vars$augIdxCol, hapBkOnlyMap.vars$probCol, snpLen)
  
        tmpChild = matrix( c(tmpFather[1], tmpMother[1], tmpFather[1], tmpMother[2],
                             tmpFather[2], tmpMother[1], tmpFather[2], tmpMother[2]), nrow=2, byrow=FALSE)
        t.choice = sample(1:4, size=1)
  
        trioHapIdx[ss,1:12] =  c(tmpFather, tmpMother, as.vector(tmpChild[, c(t.choice, (1:4)[-t.choice]) ]))
  
      }

      
      if(reType){
        return(trioHapIdx)
      }else{
        return(trioHapIdx[, 1:12, drop=FALSE])
      }
      
    }else{

      if(ifD) print("both parents are compMiss, child is NOT compMiss, sample kids first, then imput parent (ESp method)")
      if(ifD) print("Type 4: F(CM)M(CM)C(d)")
      ## both parents are compMiss, child is NOT compMiss, sample kids first, then imput parent (ESp method)

      supHapProb = getHapProb.semiMapFrame( semiMapFrame, hapBkOnlyMap.vars$resiProbCol,
                           hapBkOnlyMap.vars$augIdxCol, hapBkOnlyMap.vars$probCol, snpLen)

      ## for child not completely missing, need to restandardize the other parents' hap freq
      cHapFiltered = procSemiAugMap(appVarNames, cHapBkInfoMap, snpLen)
      ## need one function to FIX!!!FIX it 
      cDipTb = fHapBkIdx2DipTb(appVarNames, cHapBkInfoMap, idxList=cHapFiltered, snpCoding,
                                  reqIn=NULL, reqDigits = NULL, expression=cHapBkInfoMap$ori, snpLen)

      ## sample child
      cProb = exDipProbSemiAugMap(cDipTb, semiMapFrame=semiMapFrame,
                                  hapBkOnlyMap.vars$resiProbCol, hapBkOnlyMap.vars$augIdxCol,
                                  hapBkOnlyMap.vars$probCol, snpLen)

      trioHapIdx = matrix(NA, nrow=job, ncol=13)
      trioHapIdx[,13]=rep(4, job)
      for(ss in 1:job){
        chooseRow = sample(length(cProb), size=1, prob=cProb)
        hapPair = cDipTb[chooseRow,]
  
        if (hapPair[1]==hapPair[2]){
          matingTbl = ESp.impuParent.homoKid(hapPair[1], supHapProb)
        }else{
          matingTbl = ESp.impuParent.heterKid(hapPair, supHapProb)
        }
  
        trioHapIdxtt = c(matingTbl, hapPair)
        trioHapIdx[ss,1:12] =  c(matingTbl,  find.PsudoControlHap(trioHap6=trioHapIdxtt))
      }

      if(reType){
        return(trioHapIdx)
      }else{
        return(trioHapIdx[,1:12,drop=FALSE])
      }
    }
  }
  

  ## if only one parent is completely missing
  if( sum(compMissing.trio[1:2])==1 ){
    if(ifD) print( "only one parent is completely missing, need to sample the non-comp-missing parents from a restricted list")
    
    ## need to sample the non-comp-missing parents from a restricted list
    nonMparent = trioBlock[!compMissing.trio, ,drop=FALSE][1, ]
    pHapBkInfoMap = hapGenoBlockProc(nonMparent,  snpCoding=snpCoding)
    pHapFiltered = procSemiAugMap(appVarNames, pHapBkInfoMap, snpLen)

#     print(pHapBkInfoMap)
#     print(pHapFiltered)
    ## obtain completed parent filtered dip, knock off the one doesn't match with homozygous index
    pDipTb = fHapBkIdx2DipTb(appVarNames, pHapBkInfoMap,
                                     idxList=pHapFiltered, snpCoding = snpCoding,
                                     reqIn=reqIn, reqDigits = reqDig, expression=pHapBkInfoMap$ori, snpLen)

    if (compMissing.trio[3]){
      ## one parents is compMiss, child is compMiss, sample parents.
      ## just use the popu hap freq for the missing parent
      if(ifD) print( "one parents is compMiss, child is compMiss, sample parents ")
      if(ifD) print("Type 3: F(CM)M(d)C(CM)")
      missingParent = sampleDipSemiAugMap(semiMapFrame, hapBkOnlyMap.vars$resiProbCol, hapBkOnlyMap.vars$augIdxCol, hapBkOnlyMap.vars$probCol, snpLen)

      ## sample the non missing parent
      pProb = exDipProbSemiAugMap(pDipTb, semiMapFrame=semiMapFrame,
                                  hapBkOnlyMap.vars$resiProbCol,
                                  hapBkOnlyMap.vars$augIdxCol,
                                  hapBkOnlyMap.vars$probCol, snpLen)

      trioHapIdx = matrix(NA, nrow=job, ncol=13)
      trioHapIdx[,13]=rep(3, job)
      for(ss in 1:job){
        
        chooseRow = sample(length(pProb), size=1, prob=pProb)
        nomissingParent = pDipTb[chooseRow,]
  
        if(compMissing.trio[1]){
          tmpFather = missingParent
          tmpMother = nomissingParent
        }else{
          tmpFather = nomissingParent
          tmpMother = missingParent
        }
  
        tmpChild = matrix(  c(tmpFather[1], tmpMother[1], tmpFather[1], tmpMother[2],
                              tmpFather[2], tmpMother[1], tmpFather[2], tmpMother[2]), nrow=2, byrow=FALSE)
        
        t.choice = sample(4, size=1)
  
        trioHapIdx[ss,1:12] =  c(tmpFather, tmpMother, as.vector(tmpChild[, c(t.choice, (1:4)[-t.choice]) ]))
      }
      
      if(reType){
        return(trioHapIdx)
      }else{
        return(trioHapIdx[,1:12,drop=FALSE])
      }
      
    }else{

      if(ifD) print( "one parents is compMiss, child is NOT compMiss, use ESp method")
      if(ifD) print("Type 5: F(CM)M(d)C(d)")
      ## one parents is compMiss, child is NOT compMiss, use ESp method.

      ## for child not completely missing, need to restandardize the other parents' hap freq
      cHapFiltered = procSemiAugMap(appVarNames, cHapBkInfoMap, snpLen)
      ## need one function to FIX!!!FIX it 
      cDipTb = fHapBkIdx2DipTb(appVarNames, cHapBkInfoMap, idxList=cHapFiltered, snpCoding,
                                  reqIn=NULL, reqDigits = NULL, expression=cHapBkInfoMap$ori, snpLen)

      #print(cHapFiltered)
      #print(cDipTb) 
      trioHapIdx.f = ESp.impu1Par(
        othParPairs=pDipTb, childPairs=cDipTb,  semiMapFrame,
        resiProbCol=hapBkOnlyMap.vars$resiProbCol,
        augIdxCol=hapBkOnlyMap.vars$augIdxCol,
        probCol=hapBkOnlyMap.vars$probCol,
        snpLen, reType=reType, job=job)

      if(ifD) print(paste("Outcome hap:", paste(trioHapIdx.f, collapse=";")))
      
      ## need to know switch parents if the mom is missing
      if( compMissing.trio[1] ){
        othChildtt = apply(trioHapIdx.f, 1, FUN=find.PsudoControlHap)      
        trioHapIdx =  cbind(trioHapIdx.f[, 1:4, drop=FALSE],  t(othChildtt))
      }else{
        othChildtt = apply(trioHapIdx.f, 1, FUN=find.PsudoControlHap)
        trioHapIdx =  cbind(trioHapIdx.f[, c(3,4, 1,2), drop=FALSE],  t(othChildtt))
      }

      if(reType){
        return(cbind(trioHapIdx, 5+trioHapIdx.f[,7]/10))
      }else{
        return(trioHapIdx)
      }
 
    }
  }
  
}

exchangeDigit <-
function(ma, cols=NULL, dig1Code=c(0, 1, 3, 2), dig2Code = c(0, 1, 2), action=c("1to2", "2to1")){
	ifD = FALSE
	
	if(is.null(cols)){
		cols = c(1, ncol(ma))
	}
	
	fun.error = FALSE
	if(length(cols)!=2) fun.error=TRUE
	if(cols[2]>ncol(ma)) fun.error=TRUE
	
	outputColCt = cols[2]-cols[1]
	if( outputColCt <=0) fun.error=TRUE
	
	if(length(action)>=2) {
		
		if(length(action)>1) warning(paste("Two or more actions is request (", paste(action, collapse="; "),
							"). Only the first requested is performed."), sep="")
		action=action[1]
		
		if ((action!="1to2") & (action!="2to1"))
			stop(paste("Requested action, (",  action, "), is not implemented.", sep="" ))
	}
	ma.change = ma[, cols[1]:cols[2], drop=FALSE]
	## internally, do not use NA to represent missing,
	code.0 = 0
	code.012 = dig2Code
	code.0123 = dig1Code
	
	if( max( is.na(dig2Code[2:3]))==1) stop("NA is not allow to represent the non-missing allele!")
	if( max( is.na(dig1Code[2:4]))==1) stop("NA is not allow to represent the non-missing genotype!")
	
	if(action=="1to2"){
		if( is.na(dig1Code[1]) ){
			code.123 = dig1Code[2:4]
			## if the min is the same as the default code for NA(=0), then use the min -1 
			if( max(code.123==0)==1) code.0 = min(code.123)-1
			code.0123 = c(code.0, code.123)
			ma.change = apply(ma.change, 1:2, FUN= util.vec.replace, orignal = dig1Code, replaceBy=code.0123)
		}
		outputColCt = outputColCt/2
		tmpBridge = dig2Code[c(1, 2, 2, 3)]
		tmpBridge2 = dig2Code[c(1, 2, 3, 3)]
		if(ifD) print(paste("tmpBridge:", paste(tmpBridge, collapse=";")))
		if(ifD) print(paste("tmpBridge2:", paste(tmpBridge2, collapse=";")))   
	}
	if(action=="2to1"){
		if( is.na(dig2Code[1]) ){
			#need to replace the given NA with 0 or others
			code.12 = dig2Code[2:3]
			if( max(code.12==0)==1) code.0 = min(code.12)-1
			code.012 = c(code.0, code.12)
		}
		ma.change = apply(ma.change, 1:2, FUN= util.vec.replace, orignal = dig2Code, replaceBy=code.012)
		outputColCt = outputColCt*2
		sumBase = matrix(c(2, 0, 0,
						0, 2, 0,
						0, 1, 1,
						0, 0, 2), byrow=FALSE, nrow=3)
		sumCode = as.vector(matrix(code.012, ncol=3)%*%sumBase)
		if(ifD) print(paste("sumCode:", paste(sumCode, collapse=";")))
	}
	
	
	if (fun.error){
		stop("Argu, cols, refer to the ranges of the index for the columns in argu, ma.
						The current values are not valid.")
	}
	
	
	
	if(cols[1]>1) {
		ma.kept1 = ma[, 1:(cols[1]-1)]
	}else{
		ma.kept1 = NULL
	}
	
	ma.kept2=NULL
	if(cols[2]<ncol(ma)) ma.kept2 = ma[, (cols[2]+1):ncol(ma)]
	
	ma.ex=NULL
	if(action=="1to2"){
		
		
		ma.2dig1 = apply(ma.change, 1:2, FUN= util.vec.replace, orignal = code.0123, replaceBy=tmpBridge)
		ma.2dig2 = apply(ma.change, 1:2, FUN= util.vec.replace, orignal = code.0123, replaceBy=tmpBridge2)
		
		## make sure the small digit is at the front
		if(dig2Code[2]<dig2Code[3]){
			ma.ex = util.matrix.col.shuffle2(ma.2dig1, ma.2dig2)
		}else{
			ma.ex = util.matrix.col.shuffle2(ma.2dig2, ma.2dig1)
		}
		
	}
	
	if(action=="2to1"){
		
		seq1 = seq.int(from=1, to=ncol(ma.change), by=2)
		
		ma.sum = matrix(ma.change[, seq1]+ma.change[,seq1+1], ncol=length(seq1), byrow=FALSE)
		
		## convert to Qing's 1-digit coding system. 3 is for hetero
		
		ma.ex = apply(ma.sum, 1:2, FUN= util.vec.replace, orignal = sumCode, replaceBy=dig1Code)
		
	}
	if(!is.null( ma.kept1)){
		all = cbind(ma.kept1, ma.ex)
	}else{
		all = ma.ex
	}
	if(!is.null( ma.kept2)){
		all = cbind(all, ma.kept2)
	}
	return(all)
	
}

exDipProbSemiAugMap <-
function(dipMap, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen){
	## SemiAugMap only have the major haplotypes' expression and prob,
	## only a number for prob for each of the other haplotypes 
	## SemiAugMap also has a column showing the index of the haplotype in the fixed AugMap
	ifD = FALSE
	
	dipProb = as.vector(dipMap)
	
	leftOverProb = semiMapFrame[1, resiProbCol]
	
	mappedAugIdx = semiMapFrame[ , augIdxCol]
	
	mapProb = semiMapFrame[ , probCol]
	
	commonProbIdx = length(mapProb) + 1
	
	curIdx = match(dipProb, mappedAugIdx, nomatch=commonProbIdx )
	fittedFilter = curIdx==commonProbIdx
	
	## find out how many un/specified haplotype will fit the observed data
	fittedInMap= unique(curIdx[!fittedFilter])
	fittedOutMap = unique(dipProb[fittedFilter])
	
	if(ifD) print("fittedInMap")
	if(ifD) print(fittedInMap)
	if(ifD) print("fittedOutMap")
	if(ifD) print(fittedOutMap)
	
	## restandarize the prob
	if(length(fittedOutMap)>0){
		mapProbRe=rep(0, commonProbIdx)
		mapProbRe[commonProbIdx]=leftOverProb/length(fittedOutMap)
		mapProbRe[fittedInMap] = mapProb[fittedInMap]/sum(mapProb[fittedInMap])*(1-leftOverProb)
		
	}else{
		mapProbRe =rep(0, commonProbIdx-1)
		mapProbRe[fittedInMap] = mapProb[fittedInMap]/sum(mapProb[fittedInMap])
		
	}
	
	
	if(ifD) print(mapProbRe)
	
	dipProb = mapProbRe[curIdx]
	
	if(ifD) print(dipProb)
	
	## remember the dipMap is passed in as a vector filted by column
	idxSeq = 1:(length(curIdx)/2)
	reDipProb = dipProb[idxSeq] * dipProb[idxSeq+ (length(curIdx)/2) ]
	
	return(reDipProb)
	
}

exhaustHapExp <-
function(lociCt=3, re.str=TRUE,  snpCoding=c(1,2)){

  ma = matrix(snpCoding, ncol=1)

  ## grow the matrix on both sides
  if(lociCt==1) return(list(hap=ma, hapStr=as.character(ma)))
  for( i in 1:(lociCt-1) ){
    growing = util.matrix.clone(ma, 2)    
    addedBit = c(rep(snpCoding[1], times=2^i), rep(snpCoding[2], times=2^i))
    ma = cbind(growing, " "=addedBit)
  }
  if(re.str) {
    hapStr = util.matrix.cat(ma, 1:lociCt, sep="")
    return(list(hap=ma, hapStr=hapStr))
  }else{
    return(list(hap=ma))
  }
  
}

exIdxFromHap <-
function(lociCt){
  ## know the internal sequency of haplotype
  digitMap1 = matrix(NA, ncol=lociCt, nrow=2^(lociCt-1) );
  digitMap2 = matrix(NA, ncol=lociCt, nrow=2^(lociCt-1) );

  for( n in 1:lociCt ){
     col = seq.int(from=1, to=2^lociCt, by = 2^n)
     col2 = col+2^(n-1)

     colNext = col
     colNext2 = col2

     for( j in 1:(2^(n-1))){
       if( j > 1){
         newCol = col + j - 1
         colNext = rbind(colNext, newCol)

         newCol2 = col2 + j - 1
         colNext2 = rbind(colNext2, newCol2)
         ## print(colNext)
       }
     }

     digitMap1[,n]=as.vector(colNext)
     digitMap2[,n]=as.vector(colNext2)
   }

  
  return (list(digitMap1=digitMap1, digitMap2 = digitMap2))
}

exParentDip <-
function(childTb, parTb, pHapBkInfoMap, snpLen){
	## if a haplotype doesn't include in the parents or child, exclude
	
	if(is.null(dim(childTb)) | is.null(dim(parTb))  | length(pHapBkInfoMap$missingIn)==snpLen){
		return(list(childTb=matrix(childTb, ncol=2), parTb=matrix(parTb, ncol=2)))
	}
	
	child = unique(childTb)
	parent = unique(parTb)
	
	childLeft = child[is.element(child, parent)]
	parentLeft = parent[is.element(parent, child)]
	
	if(length(childLeft)==0 | length(parentLeft)==0 ) {
		## print(paste("ChildTb:\n", childTb, "parTb:\n", parTb, "\n Cannot find the pair matched"))
		stop(paste("\n ChildTb:\n", childTb, "parTb:\n", parTb, "\n Cannot find the pair matched"))
	}
	
	
	childIdx = apply(childTb, 1, FUN = function(rowItem, bench){
				re = as.logical(max(is.element(rowItem, bench)))
			}, bench = childLeft)
	
	
	parIdx = apply(parTb, 1, FUN = function(rowItem, bench){
				re = as.logical(max(is.element(rowItem, bench)))
			}, bench = parentLeft)
	
	if(sum(childIdx)==0 | sum(parIdx)==0 ) {
		## print(paste("childTb:\n", childTb, "parTb:\n", parTb, "\n Cannot find the pair matched"))
		stop(paste("\n childTb:\n", childTb, "parTb:\n", parTb, "\n Cannot find the pair matched"))
	}
	return(list(childTb=childTb[childIdx,, drop=FALSE], parTb=parTb[parIdx,,drop=FALSE]))
	
}

fHapBkIdx2DipTb <-
function(appVarNames, filteredBkInfo, idxList, snpCoding, reqIn=NULL, reqDigits = NULL, expression, snpLen=nchar(expression)){
	ifD = FALSE
	fStr = "[fHapBkIdx2DipTb]:"
	if(ifD) {
		print(qp(fStr, "start"))
		print(filteredBkInfo)
	}
	if( min( snpCoding==c(0,1,2,3)) <1 )  
		stop (paste("\nData configuration is not right:\n", "snpCoding=[", paste(snpCoding, collapse=";", sep=""), "]", sep=""))
	
	
	hetoIn = filteredBkInfo$hetoIn
	
	if(!is.null(hetoIn)){
		## need to build up the dip pair following the heto digit requirment\
		bkCt = length(idxList)
		
		if(bkCt<2)
			stop(paste("\nCannot find more than two haplotype to build heto digit for data:(", expression, ").", sep=""))
		hetoSeq = hetoIn
	}else{
		## only need to process the digit requirement posed by child
		hetoSeq = NULL
		bkCt = length(idxList)
	}
	
	if(ifD & (!is.null(hetoSeq))) print(paste("hetoSeq:", paste(hetoSeq, collapse=";", sep="")))
	
	## if no heto digit and no requirment posed by child, just return the all possible dip pair
	if(is.null(reqIn) & is.null(hetoIn) ){
		dipMap = util.it.upTriCombIdx(idxList, diag=TRUE, re.ordered=TRUE)
		#print(dipMap)
		return(dipMap)
	}
	
	
	## if no heto digit but some requirement posed by child
	if(is.null(hetoIn) & (!is.null(reqIn)) ){
		dipMap = NULL
		for ( i in 1:bkCt ){
			curBkIdx = idxList[i]
			## because no heto digits restriction, then dip built by the same hap should be considered.
			j = i
			
			while ( j <= bkCt ){
				## compare the heto digits
				othBkIdx = idxList[j]
				
				## check the required digits
				meetReq = TRUE
				tmpReq = 1
				while( tmpReq <= length(reqIn)){
					curDigitReq = reqDigits[tmpReq]
					tmpSeq = seq.int(from=1, to=2^(reqIn[tmpReq]-1), by=1)
					oneMeetReq = isDigitAtLociIdx(hapIdx=curBkIdx, digit=curDigitReq,
							lociCt=snpLen, lociIdx=reqIn[tmpReq], intVec = tmpSeq)
					othMeetReq = isDigitAtLociIdx(hapIdx=othBkIdx, digit=curDigitReq,
							lociCt=snpLen, lociIdx=reqIn[tmpReq], intVec = tmpSeq)
					
					meetReq = meetReq & (oneMeetReq | othMeetReq)
					tmpReq = tmpReq + 1
					if(!meetReq) tmpReq = length(reqIn)+10
					
				}
				## keep the matched pair
				if(meetReq)  dipMap = rbind(dipMap, range(curBkIdx, othBkIdx))
				j = j + 1
			} # while ( j <= bkCt ){
		} # for ( i in 1:bkCt ){
		if(is.null(dipMap))
			stop(paste("\nCannot find the haplotype pairs for parent meet digit requirement, exp=(", expression, ").", sep=""))
		#print(dipMap)
		return(dipMap)   
	}
	
	## if( !is.null(hetoIn) & [ is.null(reqIn) | !is.null(reqIn) ] )
	
	idx4hapDigit = NULL
	## check out the global variables
	tryCatch({
				
				tmpGetObj = NULL
				tmpGetObj = get(appVarNames$digit, envir=baseenv() )
				
				idx4hapDigit$digitMap1 = tmpGetObj$digitMap1[1:(2^(snpLen-1)), 1:snpLen]
				idx4hapDigit$digitMap2 = tmpGetObj$digitMap2[1:(2^(snpLen-1)), 1:snpLen]
				
				rm(tmpGetObj)
				##gc()
			}, error=function(e){
				errTrace = paste(e, collapse=";", sep="") 
				stop(paste("\n", fStr, errTrace, "\nApp-wise Global Variable ", appVarNames$digit, " does not exisit."))
			})
	
	
	dipMap = NULL
	## check for all required heto
	
	## all the idxList meet the homo digit requirement, but for heto digits
	## we need to match up the pair for all the heto digits
	#print(idxList)
	#print(idx4hapDigit)
	for ( i in 1:bkCt ){
		curBkIdx = idxList[i]
		## because heto digits restriction, then dip built by the same hap should be excluded.
		j = i + 1 
		while( j <= bkCt ){
			## compare the heto digits
			othBkIdx = idxList[j]
			
			if(ifD) print(paste("i=", i, "; j=", j, sep=""))
			
			## suppose cur bk contribute a digit 1
			curLociMatch1 = util.matrix.colIdx4Match(ma=idx4hapDigit$digitMap1[, hetoSeq, drop=FALSE], val=curBkIdx)
			## suppose oth bk contribute a digit 2
			othLociMatch2 = util.matrix.colIdx4Match(ma=idx4hapDigit$digitMap2[, hetoSeq, drop=FALSE], val=othBkIdx)
			
			## suppose oth bk contribute a digit 1
			othLociMatch1 = util.matrix.colIdx4Match(ma=idx4hapDigit$digitMap1[, hetoSeq, drop=FALSE], val=othBkIdx)
			## suppose cur bk contribute a digit 2
			curLociMatch2 = util.matrix.colIdx4Match(ma=idx4hapDigit$digitMap2[, hetoSeq, drop=FALSE], val=curBkIdx)               
			
			#print(curLociMatch1)
			#print(othLociMatch2)
			
			#print(othLociMatch1)
			#print(curLociMatch2)
			
			## change code, may cause trouble      
			##        if(length(curLociMatch1)==0) curLociMatch1 = 100
			##        if(length(curLociMatch2)==0) curLociMatch2 = 101
			##        if(length(othLociMatch1)==0) othLociMatch1 = 102
			##        if(length(othLociMatch2)==0) othLociMatch2 = 103
			##        tmp = sum(c( suppressWarnings(curLociMatch1==othLociMatch2),
			##                    suppressWarnings(curLociMatch2==othLociMatch1)))
			
			
			## check same length
			tmp=0
			
			if(length(curLociMatch1)==length(othLociMatch2) ){
				if(length(curLociMatch1)!=0 ){
					tmp=sum(curLociMatch1==othLociMatch2)
				}
			}
			
			if(length(curLociMatch2)==length(othLociMatch1) ){
				if(length(curLociMatch2)!=0 ){
					tmp=tmp+sum(curLociMatch2==othLociMatch1)
				}
			}
			
			
			if(tmp!=0){
				
				if(tmp == length(hetoSeq)){
					## find the matched pair
					## check the required digit
					if(is.null(reqIn)){
						## keep the matched pair
						dipMap = rbind(dipMap, range(curBkIdx, othBkIdx))
						if(ifD) print(paste("curBkIdx=", curBkIdx, ": othBkIdx=", othBkIdx, sep=""))
					}else{
						## check the required digits
						meetReq = TRUE
						tmpReq = 1
						while( tmpReq <= length(reqIn)){
							curDigitReq = reqDigits[tmpReq]
							tmpSeq = seq.int(from=1, to=2^(reqIn[tmpReq]-1), by=1)
							oneMeetReq = isDigitAtLociIdx(hapIdx=curBkIdx, digit=curDigitReq,
									lociCt=snpLen, lociIdx=reqIn[tmpReq], intVec = tmpSeq)
							othMeetReq = isDigitAtLociIdx(hapIdx=othBkIdx, digit=curDigitReq,
									lociCt=snpLen, lociIdx=reqIn[tmpReq], intVec = tmpSeq)
							
							meetReq = meetReq & (oneMeetReq | othMeetReq)
							tmpReq = tmpReq + 1
							if(meetReq) tmpReq = length(reqIn)+10
							
						}
						## keep the matched pair
						if(meetReq)  {
							dipMap = rbind(dipMap, range(curBkIdx, othBkIdx))
							if(ifD) print(paste("curBkIdx=", curBkIdx, "; othBkIdx=", othBkIdx, sep=""))
						}
					} # if(is.null(reqIn)){
				} # if(tmp == length(hetoSeq)){
			} #  if(tmp!=0){
			
			j = j + 1
			
		} # while( j <= bkCt ){
	} # for ( i in 1:bkCt ){
	rm(idx4hapDigit)
	##gc()
	if(is.null(dipMap))
		stop(paste("\n Cannot find the haplotype pairs for parent meet heto/digit requirement, exp=(", expression, ").", sep=""))
	
	#print(dipMap)
	
	return(dipMap)
	
}

filterHapIdx2SuperHapSet <-
function( bkIdxes, hapIdxes, hapCts){

  it = lapply(hapCts, 1, FUN=seq, from=1)
  # first build the whole list
  idxComb = qExpandTable(listOfFactor = it, removedRowIdx=NULL, re.row=FALSE )
  
  # eliminate the impossible rows.
  left = 1: (cumprod(hapCts)[length(hapCts)]  )
  for ( i in 1:length(bkIdxes)){
    matchedVal = hapIdxes[[i]]
    set = left[ is.element(idxComb [left ,bkIdxes[i]], matchedVal) ]
    #print(set)
    left = set
  }
  return(set)

}

filterHaps2SHaps <-
function(hapForBk, hapCts){
  ifD = FALSE
  fN = "filterHaps2SHaps:"
  if(ifD) {
    print(paste(fN, "begin"))
    print(hapCts)
  }
  bkCt = length(hapCts)

  if(bkCt==1) {
    if (is.na(hapForBk)) {
      return(1:hapCts)
    }else{
      return (hapForBk)
    }
  }

  segLen = cumprod(hapCts)
  
  filter = is.na(hapForBk)

  cut.idx = (1:bkCt)[filter]
  endWithNA = TRUE

  # if the whole pattern doesn't end with NA
  if(!filter[bkCt]){
    endWithNA = FALSE
    cut.idx.end = c(cut.idx, bkCt)
    cut.idx.sta = c(1, cut.idx+1)
  }else{
    # if the whole pattern ends with NA
    cut.idx.end = cut.idx
    cut.idx.sta = c(1, cut.idx[-length(cut.idx)]+1) 
  }

  if(ifD) print(cbind(cut.idx.sta, cut.idx.end))
  segCt = length(cut.idx.sta)
  j = 1
  grp.idx = NULL
  
  while( j <=segCt){
    if(ifD) print(paste("j=", j))
    seg = hapForBk[cut.idx.sta[j]:cut.idx.end[j]]
    if(ifD) print(seg)
    offset.q = 0
    segSeq = 0
    ## calculate the offset
    if( (cut.idx.end[j]-cut.idx.sta[j])!=0){
      ## if the seg has other than NA, we have offset
      if(j==1 & j==segCt & !endWithNA){
        hapCc = hapCts[ (cut.idx.sta[j]):(cut.idx.end[j]) ]
        nums = seg
        offset.q = calHapIdx2SHap(nums, hapCts=hapCc)
        return(offset.q)
      }
      if(j==1 & j==segCt & endWithNA){
        hapCc = hapCts[ (cut.idx.sta[j]):(cut.idx.end[j]-1) ]
        nums = seg[ - length(seg)]
        offset.q = calHapIdx2SHap(nums, hapCts=hapCc)
        n.prec = segLen[cut.idx.end[j]-1]
        segSeq = seq.int(from=0, to=segLen[cut.idx.end[j]]-1, by = n.prec)
        return(segSeq+offset.q)
      }      
      if(j==1){
        ## if it is the first segment and with length >= 2, must have an offset
        hapCc = hapCts[ (cut.idx.sta[j]):(cut.idx.end[j]-1) ]
        nums = seg[ - length(seg)]
        offset.q = calHapIdx2SHap(nums, hapCts=hapCc)
      }
      if(j>1 & j< segCt){
        ## if it is the middle segment and with length >= 2, must have an offset
        hapCc = hapCts[(cut.idx.sta[j]):(cut.idx.end[j]-1) ]
        nums = c(1, seg[ - length(seg)])
        hapCc = c(segLen[(cut.idx.sta[j]-1) ], hapCc)
        ## the out of box offset added one already, but we want to add another offset on it 
        offset.q = calHapIdx2SHap(nums, hapCts=hapCc)-1
      }
      if(j==segCt){
        if(endWithNA){
          ## if it is the last segment and with length >= 2 and have a sequence cutoff
          hapCc = hapCts[(cut.idx.sta[j]):(cut.idx.end[j]-1) ]
          nums = c(1, seg[ - length(seg)])
          hapCc = c(segLen[(cut.idx.sta[j]-1) ], hapCc)
          ## the out of box offset added one already, but we want to add another offset on it 
          offset.q = calHapIdx2SHap(nums, hapCts=hapCc)-1
        }else{
          ## if it is the last segment and with length >= 2 and have no sequence cutoff
          hapCc = hapCts[(cut.idx.sta[j]):(cut.idx.end[j]) ]
          nums = c(1, seg)
          hapCc = c(segLen[(cut.idx.sta[j]-1) ], hapCc)
          ## the out of box offset added one already, but we want to add another offset on it 
          offset.q = calHapIdx2SHap(nums, hapCts=hapCc)-1
        } ## if(endWithNA){
      } ## if(j==segCt){
      if(ifD) print(offset.q)

      if(j!=segCt | endWithNA){
        ## it has a sequence as well
        if(cut.idx.end[j]==1){
          n.prec = 1
          
        }else{
          n.prec = segLen[cut.idx.end[j]-1]
        }        
        segSeq = seq.int(from=0, to=segLen[cut.idx.end[j]]-1, by = n.prec)
      }
        
    }else{ ###  if( (cut.idx.end[j]-cut.idx.sta[j])!=0){
      ## if only one element in the segment

      if (j==segCt & !endWithNA){
          ## if it is the last segment and with length >= 2 and have no sequence cutoff
          hapCc = hapCts[(cut.idx.sta[j]):(cut.idx.end[j]) ]
          nums = c(1, seg)
          hapCc = c(segLen[(cut.idx.sta[j]-1) ], hapCc)
          ## the out of box offset added one already, but we want to add another offset on it 
          offset.q = calHapIdx2SHap(nums, hapCts=hapCc)-1
          segSeq = 0
      }else{
        ## it has a sequence only
        if(cut.idx.end[j]==1){
          n.prec = 1
          ## for the first element to be a NA
          offset.q = 1
        }else{
          n.prec = segLen[cut.idx.end[j]-1]
        }        
        segSeq = seq.int(from=0, to=segLen[cut.idx.end[j]]-1, by = n.prec)
      } ## if (j==segCt & !endWithNA){

    } ###  if( (cut.idx.end[j]-cut.idx.sta[j])!=0){

    if(ifD) print("segSeq")
    if(ifD) print(segSeq)
    if(ifD) print(offset.q)

    if(j==1){
      ## first seg
      grp.idx = segSeq + offset.q
    }else{
      oth.idx = segSeq + offset.q
      all.idx = qExpandTable(listOfFactor =list(grp.idx, oth.idx), removedRowIdx=NULL, re.row=FALSE)
      if(ifD) print(all.idx)
      grp.idx = all.idx[,1]+all.idx[,2]
    }
    if(ifD) print(grp.idx)
    j = j+1
  }
  return(grp.idx)

}

filterHaps2SHaps.check <-
function(hapForBk, hapCts){
  ifD = FALSE
  fN = "filterHaps2SHaps.check::"
  hapList = lapply(hapCts, FUN=function(i) {1:i})
  aa = qExpandTable(listOfFactor =hapList, removedRowIdx=NULL, re.row=FALSE)

  filter = !is.na(hapForBk)
  comp = rep(TRUE, times=nrow(aa))

  
  for( i in 1:length(hapForBk)){
    if(!is.na(hapForBk[i])){
      ff = aa[,i]==hapForBk[i]
      comp =  comp & ff
    }
  }
  row.idx = (1:nrow(aa))[comp]
  if(ifD) print(row.idx)
  
  my.idx = filterHaps2SHaps(hapForBk, hapCts)

  dis = sum(is.na(match(my.idx, row.idx)))
  len.m = length(my.idx)-length(row.idx)

  if (sum(dis, len.m)==0) return (NULL)
  print("compare:my.idx with row.idx")
  print(my.idx)
  print(row.idx)
  stop(paste(fN, "ERROR: not matched index."))
  return(NULL)
}

find.PsudoControlHap <-
function(trioHap6){
  
  child.sort = sort(trioHap6[5:6])
  
  othChild = matrix(trioHap6[1:4][c(1, 3, 1, 4, 2, 3, 2, 4)], ncol=2, byrow=TRUE)
  othChild = apply(othChild, 1, sort)
  
  child.m = NULL
  for( i in 1:4){
    oth = othChild[,i]
    if(sum(child.sort==oth)==2){
      child.m = c(child.m, i)
    }
  }
  if(length(child.m)<1) stop("No match for the child")

  t.choice = child.m[sample(length(child.m), size=1)]
 
  re = othChild[, c(t.choice, (1:4)[-t.choice]) ]

  return(re)
}

findLastPreviousDate <-
function(timeVec, probVec, cutoff){
  if(max(timeVec)< cutoff) return(NA)
  filter = which(timeVec<= cutoff)
  re = probVec[filter[length(filter)]]
  return(re)
}

findMissing <-
function(df, is.1digit=FALSE, snpStartLeftIndex, snpEndRightIndex, dig1Code=c(0, 1, 2, 3), dig2Code=c(0, 1, 2) ){
  ## use c(0,1,3,2) as the 1-digit coding, if not, exchange it
  snpEndLeftIndex = ncol(df)-snpEndRightIndex+1

  if(!is.1digit){
     snpNum = (snpEndLeftIndex - snpStartLeftIndex +1) /2
     snps = df[, snpStartLeftIndex:snpEndLeftIndex]

     snp1digit = exchangeDigit(ma=snps, cols=c(1,snpNum*2), dig1Code=c(0, 1, 3, 2), dig2Code =dig2Code, action=c("2to1"))
 
   }else{
     snpNum = snpEndLeftIndex - snpStartLeftIndex +1
     snp1digit = df[, snpStartLeftIndex:snpEndLeftIndex]
     if(min(dig1Code==c(0,1,3,2))==0){
       snp1digit = apply(snp1digit, 1:2,  FUN= util.vec.replace, orignal = dig1Code, replaceBy=c(0,1,3,2))
     }
   }
   nrow = nrow(df)
   missingPos = which(snp1digit==0)

   if(length(missingPos)==0) return(NULL)

   missingCol = (missingPos - (missingPos %% nrow))/nrow + 1

   miss.cord = matrix(NA, nrow=length(missingPos), ncol=2)

   miss.cord[,1]= (missingPos %% nrow)
   ## adjust the one at the last row
   miss.cord[  miss.cord[,1]==0, 1]= nrow
  
   miss.cord[,2]= (missingPos - miss.cord[,1])/nrow + 1

   colnames(miss.cord)=c("row", "snp")
   return(miss.cord)

}

freq.allele <-
function(ct, order = c("majorHM", "heter", "minorHM")){

  if ( sum(is.na(match(order, c("majorHM", "heter", "minorHM")))) != 0) stop ("Wrong order of count.")
  freq = (2*ct[1]+ct[2])/( 2*sum(ct) )
  freqs = c(freq, 1-freq)
  #print(ct)
  if ( freq>1 ) warning("Allele frequency is greater than 1")
  return(freqs)

}

freq.build <-
function(hap=NULL, geno){
	
	## make it can take .csv file
	if(!is.null(hap)){
		if(is.character(hap)){
			hap = read.csv(hap, header=TRUE, sep=",", as.is=TRUE)
		}
	}
	
	## make it can take .csv file
	if(is.character(geno)){
		geno = read.csv(geno, header=TRUE, sep=",", as.is=TRUE)
	}
	
	## check input data
	test = match(c("prekey", "seq", "freq"), colnames(geno))
	if (sum(is.na(test))>=1)
		stop("Input argument, geno, does not have the required columns, i.e., prekey, seq, and freq.") 
	
	if(is.null(hap)){
		warning("NULL value is provided for input argument, hap.")
	}
	
	freq = c(genoMap.info = list(geno[, match(c("prekey", "seq", "freq"), colnames(geno))]),
			hapMap.info = list(hap))
	
	return(freq)
}

freqbuild.haponly <-
function(hap, alleleCode = 1:2){
	## only hap is provided.
	## need to find out the hap with 1 loci, and get the geno freq from the 1 loci
	ifD = FALSE
	
	## check the maximum length for imputation
	bkMap = bkMap.constr(data=hap, keyCol=1, hapLenCol=NULL, expCol=2, probCol=3, alleleCode=alleleCode )
	if(sum(bkMap$snpCt >=8 )>0)  stop("At least one of the block size exceeds the maximum value of 7 loci.") 
	
	prekey = rep("ch", nrow(hap))
	cumidx = cumsum(bkMap$snpCt)
	markers.e = cumidx
	
	markers.b = c(1, (cumidx+1)[-length(cumidx)])
	others = lapply(  1:(length(bkMap$keys)), FUN=function(i, rep1, rep2, rep3, reptimes){
				outData1 = rep(rep1[i], times=reptimes[i])
				outData2 = rep(rep2[i], times=reptimes[i])
				outData3 = rep(rep3[i], times=reptimes[i])
				ooo = cbind(outData1, outData2, outData3)
			},
			rep1=bkMap$snpCt, rep2=markers.b, rep3=markers.e, reptimes=bkMap$bkLens)
	
	others.ma =NULL
	for( dd in others){
		others.ma = rbind(others.ma, dd)
	}
	others = matrix(others, nrow=nrow(hap), ncol=3, byrow=TRUE)
	
	hapFrame = cbind(prekey, hap, others.ma)
	colnames(hapFrame)=c("prekey", "block", "hap", "freq", "hapLen", "markers_b", "markers_e")
	
	## need to remove the singletones
	hapFrame = hapFrame[ (hapFrame[,5]!=1), ]
	
	
	genoFrame = data.frame(prekey=rep("ch", bkMap$snpLen*3),
			seq=rep(1:bkMap$snpLen, each=3),
			freq=rep(NA, bkMap$snpLen*3))
	
	single = which(bkMap$snpCt == 1)
	if(length(single)>0){
		
		## find singletons.
		for( ss in single ){
			freq.info = bkMap$bks[[ss]]
			allele.freq=freq.info[,3][match(freq.info[,2], alleleCode)]
			geno1 = allele.freq[1]^2
			geno2 = 2*allele.freq[1]*allele.freq[2]
			geno3 = allele.freq[2]^2
			genoFrame[ ((ss-1)*3+1):(ss*3) , 3]=c(geno1, geno2, geno3)
		}
		
		
	}
	if(nrow(hapFrame)>0){
		freq.haponly = freq.build(hap=hapFrame, geno=genoFrame)
	}else{
		freq.haponly = freq.build(hap=NULL, geno=genoFrame)
	}
	if(ifD) {
		print(hapFrame)
		print(dim(genoFrame))
		print(genoFrame[1:6,])
	}
	return(freq.haponly)
}

freqmap.reconstruct <-
function(data, cols=NULL, loci.ct, is.1digit=FALSE, dig1Code=c(0, 1, 2, 3), dig2Code = c(0, 1, 2), key.prefix="", start.base=1, ...){
   ifD = FALSE

   ## is NA is error used to represent the missing, change it to 0, or the max(allele code)+1
   
   if (is.1digit){
     if(is.null(cols)) cols=c(1, ncol(data))
     data = exchangeDigit(ma=data, cols=cols, dig1Code=dig1Code,
       dig2Code = dig2Code, action=c("1to2")) [,(cols[1]: ((cols[2]-cols[1]+1)*2+cols[1]-1))]
     miss.code = dig1Code[1]
     #str(data)
   }else{
     data = data[, (cols[1]:cols[2])]
     miss.code = dig2Code[1]
   }
   
   ## need to construct the haplotype freq file
   ## with hap key, haplotype, haplotype prob 
   ct = length(loci.ct)
   geno.code = as.vector(matrix(dig2Code[2:3], ncol=2) %*% matrix(c(2,0, 1,1, 0,2), ncol=3, byrow=FALSE))

   #if(sum(loci.ct>7)>=1) stop("Cannot process haplotype block with 8 or more loci.")
   if(sum(loci.ct)!=(ncol(data)/2)) {
     if(ifD) print(sum(loci.ct))
     if(ifD) print(ncol(data)/2)
     stop("Dismatching number of loci with the number of column in data.")

   }

   hapMap.info= NULL
   genoMap.info=NULL
   
   startId = start.base
   
   ## process the file if only one block exist
   if(ct==1){

     if(loci.ct>=2){
       hap.re = haplo.stats::haplo.em(geno=data, ...)
       
       hap.prob = hap.re$hap.prob
    
       hap.exp =  hap.re$haplotype
    
       hap.expVec = as.integer(apply(hap.exp, 1, paste, collapse=""))
       #m = match(c("ch", "block", "hap", "freq", "hapLen","markers_b", "markers_e"), 
       df = data.frame( prekey= rep(key.prefix, length(hap.prob)),
                   block =rep(1, length(hap.prob)),
                   hap=as.vector(hap.expVec),
                   freq = hap.prob,
                   hapLen = rep(loci.ct, length(hap.prob)),
                   markers_b=rep(startId, length(hap.prob)),
                   markers_e=rep(startId+loci.ct-1, length(hap.prob))
                  )
       hapMap.info=df
     }

     dd = data[,1]+data[,2]
       
     tb = table(factor(unlist(dd), levels=geno.code))
     tb.freq = tb/sum(tb)

     #ch	seq	freq	genotype
     df = data.frame(prekey= I(rep(key.prefix, 3)),
                   seq =rep(startId, 3),
                   freq = as.vector(tb.freq)
                  )
     genoMap.info=df
     
     
     return(re=list(hapMap.info=hapMap.info, genoMap.info=genoMap.info))
     
   }

    ## process the file if only more than one block exist
   idx.end = cumsum(loci.ct)*2
   idx.start = c(1, (idx.end+1)[-length(loci.ct)])
   cut.rg = cbind(idx.start, idx.end)


   all.df.key2 = NULL
   all.df.oth2 = NULL

   snp.b = startId
   snp.e = startId-1
   tmp1 = 1
   
   allsnp = TRUE
   for( i in 1:ct){
      keys =NULL
      oth=NULL
      snp.b = snp.e+1
      snp.e = snp.b+loci.ct[i]-1
      if(loci.ct[i]!=1){
		  allsnp = FALSE
          ## for Hap
          hap.re = haplo.stats::haplo.em(geno=data[, ((cut.rg[i,1]):(cut.rg[i,2]))], ...)

          # print(hap.re)
          hap.prob = hap.re$hap.prob
       
          hap.exp  = hap.re$haplotype
       
          hap.expVec = as.integer(apply(hap.exp, 1, paste, collapse=""))

          keys = rep(key.prefix, length(hap.prob))
          
          oth =cbind(block =  rep(i, length(hap.prob)),
                   hap=hap.expVec,
                   freq = hap.prob,
                   hapLen = rep(loci.ct[i], length(hap.prob)),
                   markers_b=rep(snp.b, length(hap.prob)),
                   markers_e=rep(snp.e,length(hap.prob))
                  )

          all.df.key2 = c(all.df.key2, keys)
          all.df.oth2 = rbind(prekey=all.df.oth2, oth)
          #print(all.df.oth2[1:3,])
          
      } #if(loci.ct[i]!=1){
    } #for( i in 1:ct){

	if(!allsnp){
	    rownames(all.df.key2)=NULL
	    rownames(all.df.oth2)=NULL
	    hapMap.info = data.frame(prekey=I(all.df.key2), all.df.oth2)
	}else{
		hapMap.info = NULL
	}
 
    ## gen one genomap for every SNP
    dd.t = data[, (min(cut.rg)):(max(cut.rg))]
    # convert into two columns
    dd.ma = matrix(unlist(dd.t), ncol=2, byrow=FALSE)
    dd = dd.ma[,1]+dd.ma[,2]

    ## remove NA first
    #print(table(dd))
    ## convert into n columns each column is for one SNP
    dd = matrix(dd, nrow=nrow(data), byrow=FALSE)
    tb = apply(dd, 2, FUN= function(col, geno.code){
      tt = table(factor(unlist(col), levels=geno.code))
      tt.freq = tt/sum(tt)
      tt.freq
    }, geno.code=geno.code)

    #print(tb[1:3,])

    #ch	seq	freq	genotype
 
    genoMap.info = data.frame(prekey= I(rep(key.prefix,  3*ncol(dd))),
        seq =rep(startId : (startId + cumsum(loci.ct)[length(loci.ct)] -1), each=3),
        freq = as.vector(tb) )

   	return(re=list(hapMap.info=hapMap.info, genoMap.info=genoMap.info))
   
}

genGenoProb <-
function(){
   genoProb = matrix(c(1,1, 1, 0, 0,
                       2,1, 0, 0, 1,
                       3,1,.5, 0,.5,
                       1,2, 0, 0, 1,
                       2,2, 0, 1, 0,
                       3,2, 0,.5,.5,
                       1,3,.5, 0,.5,
                       2,3, 0,.5,.5,
                       3,3,.25,.25, .5), ncol=5, byrow=TRUE)
   ## test
   ## apply(genoProb[,3:5], 1, sum)
   ## apply(genoProb[,3:5], 2, sum)
   return (genoProb)
}

genMatingTBCondOnChild3 <-
function(appVarNames, child, par1, par2, prob1, prob2, logF = NULL, job=1){

  fStr ="[genMatingTBCondOnChild3]:"
  ifD = FALSE
  maxTbl = NULL
  if(ifD) print(par1)
  if(ifD) print(par2)
  tryCatch({
    maxTbl = get(appVarNames$tbl, envir=baseenv())
    maxRow = nrow(maxTbl)
  }, error=function(e){
    errTrace = paste(e, collapse=";", sep="")
    stop(paste("\n", fStr, errTrace, "\nApp-wise Global Variable ", appVarNames$freqMap, " does not exisit."))
  })
  
  tryCatch({
    maxMateTbl = get(appVarNames$mateTbl, envir=baseenv())
  }, error=function(e){
    errTrace = paste(e, collapse=";", sep="")
    stop(paste("\n", fStr, errTrace, "\nApp-wise Global Variable ", appVarNames$freqMap, " does not exisit."))
  })

  childPairNo = nrow(child)

  if(is.null(childPairNo)) {
    child = matrix(child, ncol=2)
    childPairNo = 1
  }

  if(is.null(dim(par1))){
    par1 = matrix(par1, ncol=2)
  }
  if(is.null(dim(par2))){
    par2 = matrix(par2, ncol=2)
  }
  #print("OOOOOLLLDDDDD")
  
  #print(par1)
  #print(par2)
  
  ## get the most probable ones, order the pair by their prob
  par1 = par1[order(prob1,decreasing=TRUE),,drop=FALSE]
  par2 = par2[order(prob2,decreasing=TRUE),,drop=FALSE]

  prob1 = sort(prob1,decreasing=TRUE)
  prob2 = sort(prob2,decreasing=TRUE)
  
  #print(par1)
  #print(par2)
  #print(prob1)
  #print(prob2)
  
  ## would assume that all pairs with different indexes will come at the beginning of the list
  ## and pairs with same indexes will come at the end of the list

  par1short = nrow(par1)<=nrow(par2)

  if(par1short){
    baseNo = nrow(par1)
    othNo = nrow(par2)
    basePair = par1
    othPair = par2
    baseCol = 1:2
    othCol = 3:4
    mateCol = 1
    
  }else{
    baseNo = nrow(par2)
    othNo = nrow(par1)
    basePair = par2
    othPair = par1
    baseCol = 3:4
    othCol = 1:2
    mateCol = 2
  }

  probVec = rep(NA, times=maxRow)
  counter = 0
  ## iterate choice of one parent and the child
  for ( i in 1:baseNo){
    if(ifD) print(paste("i=", i, " out of ", baseNo))
    
    childMeet = apply(child, 1, FUN = function(rowItem, bench){
        re = as.logical(max(is.element(rowItem, bench)))
    }, bench = basePair[i,])

    childSelRowIdx = NULL
    if( sum(childMeet) >=1 ){
        childSelRowIdx = (1:childPairNo)[childMeet]
        for(j in childSelRowIdx){
             ## if two hap in child is the same
             sameHapChild = child[j,1]==child[j,2]
             if(sameHapChild) {
                par2Meet = apply(othPair, 1, FUN = function(rowItem, bench){
                   re = as.logical(max(is.element(rowItem, bench)))
                }, bench = child[j,1])               

             }else{

                childMatch = is.element(child[j,], basePair[i,])
                if(sum(childMatch)==2){
                  othHapReq = child[j,]
                }else if(sum(childMatch)==1){
                  othHapReq = child[j,][!childMatch]
                }else{
                  stop ("programming error")
                }
                if(ifD) print(childMatch)

                par2Meet = apply(othPair, 1, FUN = function(rowItem, bench){
                   re = as.logical(max(is.element(rowItem, bench)))
                }, bench = othHapReq)                  
             }

             if(ifD) print(par2Meet)
             if( sum(par2Meet) >=1 ){
               par2SelRowIdx = (1:othNo)[par2Meet]
               
               if(ifD)  print(paste("par2SelRowIdx=", par2SelRowIdx))
               addedRow = length(par2SelRowIdx)
               if(ifD) print(paste("addedRow=", addedRow))
               if( addedRow != 0){
                  if( (counter + addedRow) > maxRow ) {
                       #rm(maxTbl)
                       #gc()
                       # stop (paste("\n", fStr, "List length (", counter + addedRow , ") exceed the maximum (", maxRow, ") for i =", i, se
                       simpleResult = matTBCondOnChild3.finalProc(maxTbl=maxTbl, counter=counter, maxMateTbl=maxMateTbl,
                               probVec=probVec, prob1=prob1, prob2=prob2, job=job, logF=logF  )
                       return(simpleResult)

                  }
                  tmpPrb = rep(.25, times=addedRow)
                  othSameHap = NULL
                  othSameHap = othPair[par2SelRowIdx,1]==othPair[par2SelRowIdx,2]
  
                  if(ifD)  print(sum(othSameHap))
                  if(sum(othSameHap)>0) {
                    # if other parent are homo, double =.5
                    tmpPrb[othSameHap]=tmpPrb[othSameHap]*2
                  }
                  # if base parent are homo, double for the second time =1
                  if(basePair[i,1]==basePair[i,2]) tmpPrb = tmpPrb*2

                  ## depend on the child is heto, same pairs for parents and child has different prob than different pairs for parents
                  ## parent must be same heto, homo kid will have .25, heter kid will have .5 
                  if( (sum(othSameHap)<addedRow) && (basePair[i,1]!=basePair[i,2]) && ( child[j,1]!=child[j,2])  ){
                    ## 1st rule, there are some hapPairs not the same for the other parent
                    ## 2nd rule, hapPairs for base parent are not the same
                    ## 3nd rule, hapPairs for child are not the same 
                    tmpMaRow = (1:addedRow)[!othSameHap]
                    othPairMa = othPair[par2SelRowIdx[tmpMaRow],,drop=FALSE]

                    ## compare the hapPairs for the other parents with the hapPairs for the base parents.
                    rowTmpMeet = apply(othPairMa, 1, FUN = function(rowItem, bench){
                      re = as.logical(sum(is.element(rowItem, bench))==2)
                    }, bench = basePair[i,])
                    if(sum(rowTmpMeet)>0) tmpPrb[tmpMaRow[rowTmpMeet]]=.5
                  }
                  
                  probVec[(counter+1):(counter+addedRow)]= tmpPrb
                  
                  ##print(matrix(.Internal(rep(par1[i,], addedRow)), ncol=2, byrow=TRUE))
                  ##print( matrix(par2[par2Meet, ], ncol=2))
                  maxTbl[(counter+1):(counter+addedRow), baseCol] = matrix(rep(basePair[i,], addedRow), ncol=2, byrow=TRUE)
                  maxTbl[(counter+1):(counter+addedRow), othCol] = othPair[par2SelRowIdx, , drop=FALSE]
                  ## newly added
                  maxTbl[(counter+1):(counter+addedRow), 5:6] = matrix(rep(child[j,], addedRow), ncol=2, byrow=TRUE)

                  if(ifD) print(maxTbl[(counter+1):(counter+addedRow),])
                  maxMateTbl[(counter+1):(counter+addedRow), mateCol] = rep(i, addedRow)
                  maxMateTbl[(counter+1):(counter+addedRow), 3-mateCol] = par2SelRowIdx
         
                  counter = counter + addedRow
                  if(ifD)print(paste("counter=", counter))
               }
           } ## if( sum(par2Meet) >=1 ){

        } ## for(j in 1:childSelRowIdx){
    } ## if( sum(childMeet) >=1 ){
  }
  simpleResult = matTBCondOnChild3.finalProc(maxTbl=maxTbl, counter=counter, maxMateTbl=maxMateTbl,
                               probVec=probVec, prob1=prob1, prob2=prob2, job=job, logF=logF  )
  return(simpleResult)
}

geno.2dStr2BinaMa <-
function(expStr, subjectCt=length(expStr), snpLen){

  samplesBina = hapBk2AlleleSeq(subjects=expStr, subjectCt=subjectCt, snpLen=snpLen*2)
  bina1= samplesBina[, seq.int(from=1,to=snpLen*2, by=2), drop=FALSE]
  bina2 = samplesBina[, seq.int(from=2,to=snpLen*2, by=2), drop=FALSE]
   binaNew1 = pmin(bina1, bina2)
   binaNew2 = pmax(bina1, bina2)
   bina = util.matrix.col.shuffle2(binaNew1, binaNew2)

   if(subjectCt==1) bina=matrix(bina, nrow=1)
   return(bina)  

}

geno2hapFreq <-
function(prekey, block, genoFreq, snpBase=1, snpSeq){
	df = data.frame( key=I(rep(paste(prekey, block, sep="-"), 2)),
			hapF.ch = rep(prekey, 2),
			hapF.block = rep(block, 2),
			hapF.hap = c(snpBase, snpBase+1),
			freq = c(genoFreq[1]+genoFreq[2]/2, 1- (genoFreq[1]+genoFreq[2]/2) ),
			hapF.hapLen = rep(1, 2),
			hapF.markers_b = rep(snpSeq, 2),
			hapF.markers_e = rep(snpSeq, 2))
	
	return(df)
}

getBackParentGeno <-
function(trioDf, famCol=1, memCol=2, snpIdx=NULL, prefix=NULL, re.child=FALSE){

    if(is.null(snpIdx)){
      snpIdx = (1:ncol(trioDf))[-c(famCol, memCol)]
    }
      ## HARD CODE!!!HARD CODE: know the last three digits are for covariate values
     rowCt = nrow(trioDf)
     trioSNP=trioDf[, c(famCol, memCol, snpIdx)]

     if(re.child) {
       selMa = seq.int(from=3, to=rowCt, by = 3)

     }else{
       ## we know the first two rows in every three rows are parents
       selSeq = seq.int(from=1, to=rowCt, by = 3)
       selSeq2 = selSeq + 1
       selMa = matrix(c(selSeq, selSeq2), ncol=2, byrow = FALSE)
       selMa = as.vector(t(selMa))
     }

     #print(length(selMa))
     trioSNP.id = paste(trioSNP[selMa, 1], trioSNP[selMa, 2], sep="-")
     unique.par = unique(trioSNP.id)

     unique.par.idx = match(unique.par, trioSNP.id)

     #print(length(unique.par))
     
     if(is.null(prefix)){
       if(re.child) {
         re = trioSNP[unique.par.idx,]
       }else{
         re = trioSNP[unique.par.idx,]
       }
       return(re)
     }
     
     if(re.child) {
       write.table(trioSNP[unique.par.idx,], file=paste(prefix, "_child.txt", sep=""), sep=" ",
                   append = FALSE, row.names = FALSE, col.names = FALSE)
     }else{
       write.table(trioSNP[unique.par.idx,], file=paste(prefix, "_parent.txt", sep=""), sep=" ",
                   append = FALSE, row.names = FALSE, col.names = FALSE)
     }
     return(NULL)
}

getHapProb.semiMapFrame <-
function( semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen){
	## SemiAugMap only have the major haplotypes' expression and prob,
	## only a number for prob for each of the other haplotypes 
	## SemiAugMap also has a column showing the index of the haplotype in the fixed AugMap
	ifD = FALSE
	
	leftOverProb = semiMapFrame[1, resiProbCol]
	mapProb = semiMapFrame[ , probCol]
	mappedAugIdx = semiMapFrame[ , augIdxCol]
	
	commonProbIdx = length(mapProb)
	
	lef = leftOverProb/(2^snpLen-commonProbIdx)
	allProb = rep(lef, length=2^snpLen)
	
	allProb[mappedAugIdx]=mapProb
	
	return(allProb)
}

getHapProb2 <-
function(selIdx, semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen, restandard=FALSE){
	## SemiAugMap only have the major haplotypes' expression and prob,
	## only a number for prob for each of the other haplotypes 
	## SemiAugMap also has a column showing the index of the haplotype in the fixed AugMap
	ifD = FALSE
	
	leftOverProb = semiMapFrame[1, resiProbCol]
	mapProb = semiMapFrame[ , probCol]
	mappedAugIdx = semiMapFrame[ , augIdxCol]
	
	commonProbIdx = length(mapProb)
	
	findMatchMajor = match(selIdx, mappedAugIdx)
	selMajor = findMatchMajor[!is.na(findMatchMajor)]
	if(ifD) print(semiMapFrame)
	if(ifD) print(selMajor)
	
	selMajor.ct = sum(!is.na(findMatchMajor))
	selMinor.ct = sum(is.na(findMatchMajor))
	
	if(commonProbIdx == (2^snpLen)){
		## if the map is complete
		
		if(selMinor.ct>0){
			stop (paste("non existing selIdx=[", paste(selIdx, collapse=";", sep=""), "]", sep=""))
		}else if(selMajor.ct>0){
			newProb = mapProb[ selMajor ]
		}else {
			stop (paste("not valide selIdx=[", paste(selIdx, collapse=";", sep=""), "]", sep=""))
		}
	}else{
		## if the map is incomplete
		
		augProb = c(mapProb, leftOverProb/(2^snpLen - commonProbIdx))
		idxMatched = selMajor
		if(selMinor.ct>0){
			validIdx = selIdx[is.na(findMatchMajor)]
			tt = (validIdx>=1 & validIdx <=2^snpLen)
			if(sum(tt)!=length(validIdx)){
				stop (paste("non existing selIdx=[", paste(selIdx, collapse=";", sep=""), "]", sep=""))
			}
			
			idxMatched = findMatchMajor
			idxMatched[is.na(findMatchMajor)] = commonProbIdx+1
			
		}
		newProb = augProb[idxMatched]
	}
	
	if(restandard){
		newProb = newProb/sum(newProb)
	}
	
	return(newProb)
}

getP.logodds <-
function(logodds){
	return (exp(logodds)/(1+exp(logodds)))
}

grp.CI <-
function (maUp, maLow, position, barLen, col=NULL,...) 
{
    vLen = length(position)
    lapply(1:vLen, FUN = function(i, up, low, xPos, barLen, col,...) {
        segments((xPos - barLen/2)[i], low[i], (xPos + barLen/2)[i], 
            low[i], col,...)
        segments(xPos[i], low[i], xPos[i], up[i],col, ...)
        segments((xPos - barLen/2)[i], up[i], (xPos + barLen/2)[i], 
            up[i],col, ...)
    }, up = maUp, low = maLow, xPos = position, barLen = barLen, col)
    return(NULL)
}

grp.kmStep <-
function(timeVec, probVec, lty=1, lwd=1,  col ="black", plot.dot=FALSE){

  len = length(timeVec)

  for( i in 1:(len-1)){
    segments(timeVec[i], probVec[i], timeVec[i+1], probVec[i], col=col, lty=lty, lwd=lwd)
    segments(timeVec[i+1], probVec[i], timeVec[i+1], probVec[i+1], col=col, lty=lty, lwd=lwd)
    if(plot.dot) points(timeVec[i+1], probVec[i], pch=1, col="black")
  }

  return(NULL)
}

grp.palette <-
function(name, width = 500, height =500){
     jpeg(filename = name, width = width, height = height,
          pointsize = 10, quality = 100, bg = "white", res = NA)
     
     
     recL = 1
     colMa = matrix(colors(), ncol = 25, nrow = 27, byrow = TRUE)
     
     my.xlim = c(0, 25)
     my.ylim = c(0, 27)
     plot(my.xlim, my.ylim, type = "n", xlab="", ylab="", axes=FALSE)
     
     
     for(row in 1:27){
       for(col in 1:25){
         rect( (col-1)* recL + .1, (row-1)* recL + .1, col*recL - .1, row*recL - .1, col = colMa[row, col] )
         #print(paste("x=", (col-1)* recL + .1, ", y=", (row-1)* recL + .1))
         #print(paste("col=", col, "row=", row))
         
       }
     
     }
     axis(2, seq(.5,27.5, by=2), seq(0,670, by=50), cex=.5)
     axis(1)
     grid(nx = NULL, ny = NULL, col = colors()[31], lty = "dotted",
          lwd = NULL, equilogs = TRUE)
     
     dev.off()
     return(NULL)
}

hap.2geno <-
function(hapExp, hapProb, snpBIdx=NA){

    if(is.na(snpBIdx)) snpBIdx = 0
    bina1 = hapBk2AlleleSeq(subjects=hapExp,
                            subjectCt=length(hapExp),
                            snpLen=nchar(hapExp[1]), markdownOne = FALSE)
    
    geno1dFreq = lapply(1:ncol(bina1), FUN=function(col, prob, snpMa){
      snpF = factor(snpMa[,col],levels=1:2)
      
      gFreq = tapply( prob, snpF, sum)
      #print(gFreq)
      gFreq[ is.na(gFreq) ] = 0
      gFreq1 = c(gFreq, 2*gFreq[1])
      gFreq2 = c(gFreq, gFreq[2])
      re = gFreq1*gFreq2
      #print(re)
    }, prob=hapProb  , snpMa = bina1)
    
    genoFreq = util.list.2matrix(list=geno1dFreq, byRow = TRUE)
    colnames(genoFreq)=c("11","22","12")
    rownames(genoFreq)=paste("snp", snpBIdx+1:ncol(bina1), sep="")
    return(genoFreq)
}

hap2GenoBlock <-
function(hapExp = c("11", "12", "21", "22"), hapProb = rep(1/length(hapExp), length(hapExp)), snpCt = nchar(hapExp[1]), alleleCode = c(1, 2)){

     ## first standardize??
     
     ct = length(hapExp)
#      hapIdxCorner=NULL
#      ## 2 hap combination to form the diplotype, identify each variation by its index
#      for ( i in 1:ct ){
#        for ( j in 1:ct){
#          if(i<j) hapIdxCorner = rbind(hapIdxCorner, c(hap1=i, hap2=j))
#        }
#      }
#      
# 
     ifD = FALSE
     if(ifD) print(paste("hapLen:", ct))
     hapIdxCorner = util.it.smallLargeIdx(ct, keep.same=FALSE)
     hapIdxDiag = matrix(rep(1:ct, times=2), ncol=2, byrow=FALSE)
  
     probCorner = matrix(as.vector(hapProb)[as.vector(hapIdxCorner)], ncol=2, byrow=FALSE)
     probDiag = matrix(as.vector(hapProb)[as.vector(hapIdxDiag)], ncol=2, byrow=FALSE)
     joinCorner = 2*probCorner[,1]*probCorner[,2]
     joinDiag = probDiag[,1]*probDiag[,2]
     
     expCorner = matrix(hapExp[hapIdxCorner], ncol=2, byrow=FALSE)
     expDiag = matrix(hapExp[hapIdxDiag], ncol=2, byrow=FALSE)
     
     hapIdx = rbind(hapIdxCorner, hapIdxDiag)
     colnames(hapIdx) = c("id1", "id2")
     hapExp = rbind(expCorner, expDiag)
     colnames(hapExp) = c("hExp1", "hExp2")

     if(ifD) print(hapExp)
     # get the one-digit coding
     # when add to digit, get c(2,3,4)-1=c(1,2,3) the common SNP coding
     # HARD CODE!!!HARD CODE
     bina1 = hapBk2AlleleSeq(hapExp[,1], subjectCt=dim(hapExp)[1], snpLen=snpCt, markdownOne = FALSE)
     bina2 = hapBk2AlleleSeq(hapExp[,2], subjectCt=dim(hapExp)[1], snpLen=snpCt, markdownOne = FALSE)
      if(ifD) print(bina1)
     
     ##  20Dec08Change!!!: straighten coding: output must be of digit 1, 2, 3 for 1-digitCoding, and 1, 2 for 2-digit
     a1 = sum(alleleCode * c(2,0)) # 11-> to 1
     a2 = sum(alleleCode * c(1,1)) # 12-> to 3
     a3 = sum(alleleCode * c(0,2)) # 22-> to 2

     snp1d = bina1+bina2
     snp1d.f = factor(snp1d, levels=c(a1, a3, a2))

     if(ifD) {
       print(c(a1, a2, a3))
       print(";;")
       print(snp1d)
       print(snp1d.f)
       print(is.na(snp1d.f))
     }
     
     if( max(is.na(snp1d.f))==1) stop("Input allelCode does not match with other input.")
     
     snp1d = as.integer(snp1d.f)
     snp1d = matrix(snp1d, ncol=ncol(bina1), nrow=nrow(bina1))

     # get the two-digit coding
     bina1.f = factor(bina1, levels=alleleCode)
     if( max(is.na(bina1.f))==1) stop("Input allelCode does not match with other input.")
     bina1.f = as.integer(bina1.f)

     bina2.f = factor(bina2, levels=alleleCode)
     if( max(is.na(bina2.f))==1) stop("Input allelCode does not match with other input.")
     bina2.f = as.integer(bina2.f)

     bina1 = matrix(bina1.f, ncol=ncol(bina1), nrow=nrow(bina1))
     bina2 = matrix(bina2.f, ncol=ncol(bina1), nrow=nrow(bina1))
     
     binaNew1 = pmin(bina1, bina2)
     binaNew2 = pmax(bina1, bina2)

     if(ifD) print(binaNew1)
     
     genoTypes.m = util.matrix.col.shuffle2(binaNew1, binaNew2)
     genoExp = util.matrix.cat(genoTypes.m, 1:(snpCt*2), sep="")
      
     prob = matrix(c(joinCorner, joinDiag), ncol=1)

     tb = data.frame(hapIdx, hapExp, prob, genoExp)
     
     return(list(tb=tb, genoMa=genoTypes.m, geno1d = snp1d))
}

hap2genotype.bk <-
function(bkDframe, chCol, blockCol, keyCol, expCol, probCol, hapLenCol){
  hapExp = bkDframe[,expCol]
  prob = bkDframe[,probCol]
  dipExp =expand.grid(hapExp, hapExp)
  probExp = expand.grid(prob, prob)
  varCt =  bkDframe[1, hapLenCol]
  genotype = hap2genotype.m(as.character(dipExp[,1]), as.character(dipExp[,2]), nrow(dipExp), varCt)
  probExp = probExp[,1]*probExp[,2]
  geno = util.matrix.cat( genotype, 1:(2*varCt))
  collaps = tapply(probExp, geno, sum)

  nrow = length(collaps)
  
  re = data.frame(ch=rep(bkDframe[1,chCol], nrow), block=rep(bkDframe[1,blockCol], nrow),
                  exp=names(collaps), varCt=rep(2*varCt, nrow), prob=collaps,
                  key=rep(bkDframe[1,keyCol], nrow))
 ## re = list(probList=collaps, varCt = varCt)
  return(re)
}

hap2genotype.m <-
function(hapMa1, hapMa2, subjectCt, snpLen){
  bina1 = hapBk2AlleleSeq(hapMa1, subjectCt, snpLen, markdownOne = FALSE)
  bina2 = hapBk2AlleleSeq(hapMa2, subjectCt, snpLen, markdownOne = FALSE)
  
  binaNew1 = pmin(bina1, bina2)
  binaNew2 = pmax(bina1, bina2)
  re = util.matrix.col.shuffle2(binaNew1, binaNew2)

  return(re)
}

hapBk2AlleleSeq <-
function(subjects, subjectCt, snpLen, markdownOne=TRUE){

  if(is.null(dim(subjects))){
     subjects.snp = lapply(subjects, FUN=util.str.2CharArray,  len=snpLen)    
  }else{     
     bkCt = dim(subjects)[2]
     
     subjects.hap = util.matrix.cat(subjects, 1:bkCt, sep="")
     subjects.snp = lapply(subjects.hap, FUN=util.str.2CharArray,  len=snpLen)
   }
  
  if(markdownOne){
    subjects.snp = matrix(as.numeric(unlist(subjects.snp))-1, ncol = snpLen, byrow = TRUE)
  }else{
    subjects.snp = matrix(as.numeric(unlist(subjects.snp)), ncol = snpLen, byrow = TRUE)
  }
  return(subjects.snp)
  
}

hapBkGenoMap2HapMap <-
function(hapBkGenoMap, reSimuMap=TRUE){
	
	singleton = hapBkGenoMap$genomeMarkerInfo[hapBkGenoMap$genomeMarkerInfo$qu.type==1, 2 ]
	singletonGeno = hapBkGenoMap$genoOnlyMap$bks[ singleton ]
	newFrame = NULL
	for( i in 1: (length(singletonGeno))){
		g = singletonGeno[[i]]
		newF = geno2hapFreq(prekey=g[1,hapBkGenoMap$genoOnlyMap$chCol] ,
				block =g[1,hapBkGenoMap$genoOnlyMap$blockCol] ,
				genoFreq=g[,hapBkGenoMap$genoOnlyMap$probCol] ,
				snpBase=hapBkGenoMap$hapBkOnlyMap$snpBase,
				snpSeq=singleton[i])
		newFrame=c(newFrame, list(newF))
	}
	
	singletonSeqId = which(hapBkGenoMap$genomeMarkerInfo$qu.type==1)
	allSeqId = 1: (nrow(hapBkGenoMap$genomeMarkerInfo))
	leftSeqId = allSeqId[ -singletonSeqId ]
	
	## bind with old
	allBkMap = c(hapBkGenoMap$hapBkOnlyMap$bks, newFrame)
	## reshuffle the bks
	newBkMap = allBkMap[ match(allSeqId, c(leftSeqId, singletonSeqId)) ]
	
	
	## get new DF
	newDf = matrix(NA, nrow=length(singletonGeno)*2+nrow(hapBkGenoMap$hapBkOnlyMap$df), ncol=7)
	newDf = data.frame(key=I(rep("-", nrow(newDf))), newDf)
	
	rowIdx = 0
	for( j in 1:(length(newBkMap))){
		hapDf = newBkMap[[j]]
		newDf[ (rowIdx+1):(rowIdx+nrow(hapDf)), ]= hapDf
		rowIdx = rowIdx+nrow(hapDf)
	}
	
	#str(newDf)
	if(reSimuMap){
		simuDf = newDf[ , c(1, 4, 5)]
		simuMap = bkMap.constr(data=simuDf, keyCol=1, hapLenCol=NULL, expCol=2, probCol=3)
		return(list( newBkMap=newBkMap, newDf=newDf, simuMap=simuMap))
	}
	
	return(list(  newBkMap=newBkMap, newDf=newDf ))
}

hapGenoBlockProc <-
function(hapGenoSub,  snpCoding=c(0, 1, 2, 3)){
	ifD = FALSE
	if(ifD) print(hapGenoSub)
	
	snpCt = length(hapGenoSub)
	
	homoIn = NULL
	homoDigit = NULL
	missingIn = NULL
	hetoIn = NULL
	for ( i in 1: snpCt ){
		if(ifD) print(hapGenoSub[i])
		matchIndex = which(as.numeric(hapGenoSub[i])==snpCoding)
		if(matchIndex==1){
			missingIn = c(missingIn, i)
		}else if(matchIndex==2 | matchIndex==3){
			homoIn = c(homoIn, i)
			homoDigit = c(homoDigit, snpCoding[matchIndex])
		}else if(matchIndex==4){
			hetoIn = c(hetoIn, i)
		}  
	}
	return(list(homoIn=homoIn, homoDigit = homoDigit, missingIn=missingIn, hetoIn=hetoIn, ori=hapGenoSub))
}

hapPair.match <-
function(listNewOrder, bkMax, pairList){
  ifD=FALSE
  fN = "hapPair.match:"
  if (ifD)   print(paste(fN, " start"))
  if (ifD)   print(listNewOrder)
  if (ifD)   print(bkMax)
  if (ifD)   print(pairList)
    
  bkCt = length(listNewOrder)

  map.row = 2^(bkCt-1)
  if(bkCt ==1) {
    row.ct = nrow(pairList[[1]])
    mgrid1 = matrix(NA, nrow=row.ct, ncol=bkMax)
    mgrid2 = matrix(NA, nrow=row.ct, ncol=bkMax)

    mgrid1[, listNewOrder]= pairList[[1]][,1]
    mgrid2[, listNewOrder]= pairList[[1]][,2]
    #print(cbind(mgrid1=mgrid1, mgrid2=mgrid2))
    return(cbind(mgrid1=mgrid1, mgrid2=mgrid2))
  }else{
    row.ct = cumprod( unlist(lapply(pairList, FUN=nrow)) )[bkCt]
  }

  # map of index for which hap to pull: each column for each index in bkIds,
  idx.map = exhaustHapExp(lociCt=bkCt)$hap
  idx.map = idx.map[1:(nrow(idx.map)/2),,drop=FALSE]

  if(ifD) print(paste("map.row=", map.row, " row.ct=", row.ct))
  
  mgrid1 = matrix(NA, nrow=row.ct*map.row, ncol=bkMax)
  mgrid2 = matrix(NA, nrow=row.ct*map.row, ncol=bkMax)
  
  for(i in 1:map.row){
    if(ifD) print(i)
    # for each row in the map, grap the hap list and do an expand
    grab.hapSet = idx.map[i,]
    ex.list = lapply(1:bkCt, FUN=function(ii, maps, pList){
      ma = pList[[ii]] 
      re = ma[,maps[ii]]
      return(re)
    }, maps=grab.hapSet, pList = pairList)
    ex.grid = qExpandTable(listOfFactor =ex.list, removedRowIdx=NULL, re.row=FALSE)
    if(ifD) print(ex.grid)
    
    row.seq = ((i-1)*row.ct+1) : (i*row.ct) 
    mgrid1[ row.seq,listNewOrder ] = ex.grid

    grab.hapSet = 3-idx.map[i,]
    ex.list = lapply(1:bkCt, FUN=function(ii, maps, pList){
      ma = pList[[ii]] 
      re = ma[,maps[ii]]
      return(re)
    }, maps=grab.hapSet, pList = pairList)
    if(ifD) print(ex.list)
    ex.grid = qExpandTable(listOfFactor =ex.list, removedRowIdx=NULL, re.row=FALSE)
    if(ifD) print(ex.grid)
    
    mgrid2[ row.seq,listNewOrder ] = ex.grid

  }
  #print(cbind(mgrid1=mgrid1, mgrid2=mgrid2))
  return(cbind(mgrid1=mgrid1, mgrid2=mgrid2))

}

HRCB.applyRule <-
function(subjectsExpMa, rule, col.shuffled){
  ifD = FALSE
  len = length(rule)
  if(is.null(dim(subjectsExpMa))){
    nsub = length(subjectsExpMa)
  }else{
    nsub = nrow(subjectsExpMa)
  }
  varCt = length(col.shuffled)

  if(ifD) print("subjectsExpMa:")
  if(ifD) print(subjectsExpMa[1:10])
  subjectListMa = hapBk2AlleleSeq(subjectsExpMa, nsub, varCt, markdownOne=TRUE)
  print(str(subjectListMa))

  subjectList = util.matrix.2list(subjectListMa)
  
  linear = unlist(rep(rule$inter, nsub))
  slist = rule$slist
  
  for ( i in 1:length(slist)){
    curRule = slist[[i]]
    treePred = binaTree.apply(binaTree=curRule$signal, data=subjectListMa, colnames=col.shuffled, re.final=TRUE)
    #print(treePred)
    linear = linear + treePred * unlist(rep(curRule$coef, nsub))
  }

  if(ifD){
    aaaa = cbind(subjectsExpMa, subjectListMa, treePred )
    write.csv(aaaa, file="HRCB.applyRule.diagnostic.csv")

  }
  #print(linear)
  riskProb =  getP.logodds(linear)
  attributes(riskProb)=NULL
  #print(riskProb)
  return(list(riskProb=riskProb, treePred=treePred))
}

HRCB.Esp1Rule.sampleKid <-
function(rule, caseNo, spStrata, supHapProb){
	ifD =FALSE
	fn = "HRCB.Esp1Rule.sampleKid::"
	
	tmpObj = NULL
	if( is.character(spStrata)){
		
		a = load(paste(spStrata, ".RData", sep=""))
		tmpObj = get(a[1])
		
	}else{
		tmpObj = spStrata
	}
	
	
	HRCBStra.A = tmpObj$HRCBStra.A
	HRCBStra.B = tmpObj$HRCBStra.B
	HRCBStra.C = tmpObj$HRCBStra.C
	HRCBStra.D = tmpObj$HRCBStra.D
	
	signalRule = rule$slist[[1]]
	inter = rule$inter
	coef = signalRule$coef
	
	HR = exp(inter+coef)/(1+exp(inter+coef))
	LR = exp(inter)/(1+exp(inter))
	
	
	if(HRCBStra.A$ct>0) {
		risk.A = HRCBStra.A$idProb[,2]*unlist(HR)
	}else{
		risk.A = 0
	}
	if(HRCBStra.B$ct>0) {
		risk.B = HRCBStra.B$idProb[,2]*unlist(HR)
	}else{
		risk.B = 0
	}
	if(HRCBStra.C$ct>0) {
		risk.C = HRCBStra.C$idProb[,2]*unlist(LR)
	}else{
		risk.C = 0
	}
	risk.D = HRCBStra.D$idProb[,2]*unlist(LR)
	
	risk.sumHR = lapply(list(risk.A, risk.B), FUN=sum)
	risk.sumLR = lapply(list(risk.C, risk.D), FUN=sum)
	
	
	# check the sum of prob
	#print(paste(fn, " population risk =", sum(unlist(c(risk.sumHR, risk.sumLR)))))
	
	
	stra.sampled = rmultinom(1, size=caseNo, prob=c(risk.sumHR, risk.sumLR) )
	stra.cumsam = cumsum(stra.sampled)  
	if(ifD){
		print(paste("sampled group:", paste(stra.sampled, collapse="; ")))
	}
	
	
	simMa = matrix(NA, nrow=caseNo, ncol=2)
	## simu for group A
	if(stra.sampled[1]>0){
		#print("a")
		simMa[1:stra.cumsam[1],] = HRCBSpGrp.sp(grp=HRCBStra.A, size=stra.sampled[1], hapProb=supHapProb, riskProb=risk.A)
		#print("a")
	}
	## simu for group B
	if(stra.sampled[2]>0){
		#print("b")
		simMa[(stra.cumsam[1]+1):stra.cumsam[2] ,] = HRCBSpGrp.sp(grp=HRCBStra.B, size=stra.sampled[2], hapProb=supHapProb, riskProb=risk.B)
		
	}
	## simu for group C
	if(stra.sampled[3]>0){
		#print("c")
		simMa[(stra.cumsam[2]+1):stra.cumsam[3], ] =
				HRCBSpGrp.sp(grp=HRCBStra.C, size=stra.sampled[3], hapProb=supHapProb)
		
	}
	## simu for group D
	if(stra.sampled[4]>0){
		#print("d")
		simMa[(stra.cumsam[3]+1):stra.cumsam[4], ] =
				HRCBSpGrp.sp(grp=HRCBStra.D, size=stra.sampled[4], hapProb=supHapProb, grpA =HRCBStra.A )
		
	}
	
	return(simMa)
}

HRCB.Esp1Rule.spTrioOnBase <-
function(bkMap=NULL, preObj=NULL, spStrata, rule, caseNo, ifS="simuInfo",  reControl=FALSE){
  FN = "HRCB.Esp1Rule.spTrioOnBase"
  if(is.null(preObj)){
    if(is.null(bkMap)) stop(" No object is passed as bkMap nor as preObj. The function need at least one to work.")
    warning( "No object is passed as preObj. The function will need to generate the preObj.")
    
    preObj = bkMap.HRCB.Esp1Rule.genoSeq(bkMap, rule, re.probOnly = FALSE)
  }

  #print(FN)
  spStrataObj  = get(spStrata)
  #print(str(spStrataObj))

  
  bkMapS=preObj$bkMapS
  supHapExp=preObj$supHapExp
  supHapProb=preObj$supHapProb

  if(is.null(preObj$supHapExp)) stop("Object, preObj, has the variable $supHapExp as NULL!")
  
  kids = HRCB.Esp1Rule.sampleKid(rule=rule, caseNo=caseNo, spStrata=spStrataObj, supHapProb=supHapProb)
    
#  print("return kids")
#  return(kids)
  parIdx = matrix(NA, nrow = caseNo, ncol=4)
  for( eachChild in 1:caseNo){
     hapPair = kids[eachChild,]
     if (hapPair[1]==hapPair[2]){
       parIdx[eachChild,] = ESp.impuParent.homoKid(hapPair[1], supHapProb)
     }else{
       ## testing  hapPair=c(1,2)
       parIdx[eachChild,] = ESp.impuParent.heterKid(hapPair, supHapProb)
     }
   }

    ## shuffle the trio, so risk groups are mixed
    tmpRandomShu = sample(1:caseNo, size=caseNo, replace=FALSE)

   if(!reControl){
     ## need to combine family and run.
     fam.hapIdx.unsort = rbind(parIdx[,1:2], parIdx[,3:4], kids)
     fam.hapIdx = fam.hapIdx.unsort[t( matrix(1:(caseNo*3), ncol=3, nrow=caseNo, byrow=FALSE)), ]
  
     ## find out the exact string expression
     hap.str1 = supHapExp[fam.hapIdx[,1]]
     hap.str2 = supHapExp[fam.hapIdx[,2]]
  
     ## convert string into digits
     geno.FMCMa = covDipStr2CodedGeno(hap.str1, hap.str2, subjectCt=caseNo*3, snpLen=bkMapS$snpLen, snpCoding=0:3, snpBase=c(0, bkMapS$alleleCode))

     ## shuffle the trio, so risk groups are mixed
     allRowIdx = matrix(1:(3*caseNo), nrow=3, byrow=FALSE) 
     trioShuffledIdx = allRowIdx[,tmpRandomShu]
     geno.FMCMa=geno.FMCMa[trioShuffledIdx,]

     if(!is.null(ifS)) {
       ## check!!!
       supDipIdx = matrix(unlist(apply(fam.hapIdx.unsort, 1, FUN=util.it.triMatch, len=length(supHapProb))),
                    ncol=3, nrow=caseNo, byrow=FALSE)
  
       sample.idx = data.frame(parIdx, kids, supDipIdx)[tmpRandomShu,]
       colnames(sample.idx) = c("hapIdx_f1", "hapIdx_f2", "hapIdx_m1", "hapIdx_m2",
                                "hapIdx_c1", "hapIdx_c2", "dipIdx_f", "dipIdx_m", "dipIdx_c")
       write.csv(sample.idx,  file=paste(ifS, "supHap.csv", sep=""))
     }
   }else{ # if(!reControl){
     ## need to find out which one is for the affected child
     othKids =  matrix(NA, nrow = caseNo, ncol=6)
     for( eachChild in 1:caseNo){
       find.idx1 = which( kids[eachChild, 1] == parIdx[eachChild, 1:4])
       if(length(find.idx1)==0) stop("Affected child hap indexes do not match with parents'.")
       find.idx2 = which( kids[eachChild, 2] == parIdx[eachChild, 1:4])
       if(length(find.idx2)==0) stop("Affected child hap indexes do not match with parents'.")
   
       if(max(find.idx1)<=2){
         # idx1 can only come from F, then idx2 must come from M
         fix.idx1 = find.idx1[1]
         fix.idx2 = find.idx2[ find.idx2>=3 ][1]
         comp.idx1 = (1:2)[-fix.idx1]
         comp.idx2 = (3:4)[-(fix.idx2-2)]
       }else{ ## if(max(find.idx1)<=2){
         if( min(find.idx1)>=3){
           # idx1 can only come from M, then idx2 must come from F
           fix.idx1 = find.idx1[1]
           fix.idx2 = find.idx2[ find.idx2<=2 ][1]
           comp.idx1 = (3:4)[-(fix.idx1-2)]
           comp.idx2 = (1:2)[-fix.idx2]
         }else{ ### if( min(find.idx1)>=3){
           # idx1 can come from either F or M, then need to see where idx2 come from
           if(max(find.idx2)<=2){
             # idx2 must come from F, then idx1 come from M
             fix.idx1 = find.idx1[ find.idx1>=3 ][1]
             fix.idx2 = find.idx2[1]
             comp.idx1 = (3:4)[-(fix.idx1-2)]
             comp.idx2 = (1:2)[-fix.idx2] 
           }else{ #### if(max(find.idx2)<=2){
#             if(min(find.idx2)>=3){
#               # idx2 must come from M, then idx1 come from F
#               fix.idx1 = find.idx1[ find.idx1<=2 ][1]
#               fix.idx2 = find.idx2[1]
#               comp.idx1 = (1:2)[-fix.idx1]
#               comp.idx2 = (3:4)[-fix.idx2]
#             }else{
               # set idx1 come from F, then idx2 come from M
               fix.idx1 = find.idx1[ find.idx1<=2 ][1]
               fix.idx2 = find.idx2[ find.idx2>=3 ][1]
               comp.idx1 = (1:2)[-fix.idx1]
               comp.idx2 = (3:4)[-(fix.idx2-2)]               
#             }
           } #### if(max(find.idx2)<=2){
         } ### if( min(find.idx1)>=3){
       } ## if(max(find.idx1)<=2){
       # fill in the other three
       
       if(  max( is.na(c(fix.idx1, comp.idx2, comp.idx1, comp.idx2, comp.idx1, fix.idx2)))==1 ) print("##############")
       othKids[eachChild, ] =  parIdx[eachChild,1:4] [c(fix.idx1, comp.idx2, comp.idx1, comp.idx2, comp.idx1, fix.idx2)]
       #print( c(parIdx[eachChild,1:4], kids[eachChild,], othKids[eachChild,]) )
       
     } # for( eachChild in 1:caseNo){

     ## need to combine family and run.
     fam.hapIdx.unsort = rbind(parIdx[,1:2], parIdx[,3:4], kids, othKids[,1:2], othKids[,3:4], othKids[,5:6])
     fam.hapIdx = fam.hapIdx.unsort[t( matrix(1:(caseNo*6), ncol=6, nrow=caseNo, byrow=FALSE)), ]
  
     ## find out the exact string expression
     hap.str1 = supHapExp[fam.hapIdx[,1]]
     hap.str2 = supHapExp[fam.hapIdx[,2]]
  
     ## convert string into digits
     geno.FMCMa = covDipStr2CodedGeno(hap.str1, hap.str2, subjectCt=caseNo*6, snpLen=bkMapS$snpLen, snpCoding=0:3, snpBase=c(0, bkMapS$alleleCode))

     ## shuffle the trio, so risk groups are mixed
     allRowIdx = matrix(1:(6*caseNo), nrow=6, byrow=FALSE) 
     trioShuffledIdx = allRowIdx[,tmpRandomShu]
     geno.FMCMa=geno.FMCMa[trioShuffledIdx,]

     if(!is.null(ifS)) {
       ## check!!!
       supDipIdx = matrix(unlist(apply(fam.hapIdx.unsort, 1, FUN=util.it.triMatch, len=length(supHapProb))),
                    ncol=6, nrow=caseNo, byrow=FALSE)
  
       sample.idx = data.frame(parIdx, kids, supDipIdx)[tmpRandomShu,]
       colnames(sample.idx) = c("hapIdx_f1", "hapIdx_f2", "hapIdx_m1", "hapIdx_m2",
                                "hapIdx_c1", "hapIdx_c2", "dipIdx_f", "dipIdx_m", "dipIdx_c", "dipIdx_cA", "dipIdx_cB" ,"dipIdx_cC" )
       write.csv(sample.idx,  file=paste(ifS, "supHap.csv", sep=""))
     }
   } # if(!reControl){
  
     
   return(geno.FMCMa)

}

HRCB.famMap.spTrio <-
function(caseNo, matingTbName, ifS="simuDirectInfo", reControl =FALSE ){
     ifD = FALSE

     FN = "HRCB.famMap.spTrio" 
     #print(FN)
     matingTbInfo  = get(matingTbName)
     #print(str(matingTbInfo))

     
     matRowCt = matingTbInfo$matRowCt
     #print(qp("matRowCt=", matRowCt*4))
     #print(str(matingTbInfo))
     
     kids.risk = matingTbInfo$kids.risk
     matingRowIdx = matingTbInfo$matingRowIdx
     genoMap = matingTbInfo$hap2genoMap
     snpLen = matingTbInfo$snpLen
     kids.matchedRow = matingTbInfo$kids.matchedRow

     ## sample row
     ##caseNo = 5
     kid.idx.sample = sample( 1:(matRowCt*4), size = caseNo, prob=kids.risk, replace=TRUE)

     ## find the matrix idx for the kids
     row.sample = kid.idx.sample%%matRowCt
     row.sample[row.sample==0]=matRowCt
     case.idx.matchedRow = kids.matchedRow[kid.idx.sample]
     
     if(reControl){
       ## need to calculate the control id
       ## recreat the child id for sampled rows
       controlIdx = unlist(lapply(1:caseNo, FUN=function(i, totalRow, rowIdx, cIdx){
             childIDX = rep(rowIdx[i], 4)+ (0:3)*totalRow
             leftControl = childIDX[ childIDX!=cIdx[i] ]
           }, totalRow = matRowCt, rowIdx=row.sample, cIdx=kid.idx.sample))

       tt.newIdxSeq = cbind( case.idx.matchedRow, matrix( kids.matchedRow[controlIdx], ncol=3, byrow=TRUE))
       genoOth = genoMap[,6][ tt.newIdxSeq  ]
       geno.CC = genoOth[t( matrix(1:(caseNo*4), ncol=4, byrow=FALSE) )   ]
       geno.CCMa = t(sapply(geno.CC, FUN=geno.2dStr2BinaMa, subjectCt=caseNo*4, snpLen=snpLen))
       
     }


     ## generate parents, kids, pesudo controls
     fa.idx.matchedRow = matingRowIdx[,1][row.sample]
     ma.idx.matchedRow = matingRowIdx[,2][row.sample]
     

     if(!is.null(ifS)) {
       sample.idx = data.frame(kid.idx.sample, row.sample, fa.idx.matchedRow, ma.idx.matchedRow, case.idx.matchedRow)
       colnames(sample.idx) = c("idx_kids", "rowIdx_matingTb", "fRowIdx_genoTb",
                                 "mRowIdx_genoTb", "cRowIdx_genoTb")
       write.csv(sample.idx,  file=paste(ifS, "supHap.csv", sep=""))
     }

     ## convert string into digits
     hap.str1 = genoMap[,3][ c(fa.idx.matchedRow, ma.idx.matchedRow, case.idx.matchedRow) ]
     hap.str2 = genoMap[,4][ c(fa.idx.matchedRow, ma.idx.matchedRow, case.idx.matchedRow) ]

     geno = covDipStr2CodedGeno(hap.str1, hap.str2, subjectCt=caseNo*3, snpLen=snpLen, snpCoding=0:3, snpBase=c(0, 1, 2))

     #geno = genoMap[,6][c(fa.idx.matchedRow, ma.idx.matchedRow, case.idx.matchedRow)]

     geno.FMCMa = geno[ t( matrix(1:(caseNo*3), ncol=3, byrow=FALSE) ),   ]
     if(ifD) print(matrix(geno.FMCMa, ncol=1)[1:10,])
     ## convert string into digits
     # geno.FMCMa = t(sapply(geno.FMC, FUN=geno.2dStr2BinaMa, subjectCt=caseNo*3, snpLen=snpLen))
   
     if(!reControl){
        return(geno.FMCMa)
     }

     
    stop ("NOT implemented yet")
    return(list(cc=geno.CCMa, geno.FMCMa=geno.FMCMa))

}

HRCBSpGrp.cons <-
function(hapProb, ids, type="A"){
	ifD = FALSE
	## contain the first index and the haplotype probabilities for hap1, hap2 and joint
	idProb = NULL
	id2 = NULL
	
	if (type == "A"){
		## for type A, the ids are two column indexes
		## need to find the list of second indexes for the same first index
		tmp.row = nrow(ids)
		if (tmp.row==0) {
			grpA = list(type="A",  ct=0)
			
		}else{
			
			idsAll = matrix(NA, nrow=tmp.row*2, ncol=2)
			idsAll[1:tmp.row,]=ids
			idsAll[(1:tmp.row)+tmp.row, 1]= ids[,2]
			idsAll[(1:tmp.row)+tmp.row, 2]= ids[,1]
			
			new.order = order(idsAll[,1])
			ids = NULL
			ids = idsAll[new.order,]
			
			uniqueIdx1 = unique(ids[,1])
			if(ifD) {
				print("unique idx:")
				print(uniqueIdx1)
			}
			beginIdx = match(uniqueIdx1, table=ids[,1])
			
			## rely on the natural order of ids[,2]
			id2 = ids[,2]
			ids = NULL
			
			senCt = length(uniqueIdx1)
			idProb = matrix(NA, nrow=senCt, ncol=3)
			idProb[,1]=uniqueIdx1
			idProb[,3]=beginIdx
			
			tmp = matrix(NA, nrow=senCt, ncol=2)
			tmp[,1]=hapProb[ uniqueIdx1 ]
			
			
			## to avoid processing too many if, leave the last one out for now
			
			tmp[1:(senCt-1), 2] = sapply(1:(senCt-1), FUN=function(i, idxcut, allmatch, prob){
						
						prob.idxR = (idxcut[i]):(idxcut[i+1]-1)
						prob.sum = sum(prob[  allmatch[ prob.idxR ]  ])
						return(prob.sum)
					}, idxcut=idProb[,3], allmatch=id2, prob=hapProb)
			
			## append the last one
			tmp[senCt, 2]= sum(hapProb[ id2[  (idProb[senCt,3]:length(id2))  ] ])
			
			idProb[,2]= tmp[,1]*tmp[,2]
			
			idProb[  (idProb[,2] > ((-1)*10^(-15))) &  (idProb[,2] < 0), 2]=0
			
			if(max(idProb[,2]<0)==1) stop("Sampling probability for strata A is wrong.")
			
			grpA = list(type="A", idProb=idProb, id2=id2, ct=senCt)
		}
		
		## also get D
		idProb=NULL
		len = length(hapProb)
		
		idProb = matrix(NA, nrow = len, ncol=2)
		idProb[,1] = 1:(len)
		
		tmp2 = rep(0, times=len)
		
		## if the first id is found in group A, we need to take out the probabilities
		if (grpA$ct > 0)  tmp2[ grpA$idProb[,1] ] = tmp[,2]
		
		idProb[,2] = hapProb*(1-hapProb-tmp2)
		tmp=NULL
		tmp2=NULL
		
		idProb[  (idProb[,2] > ((-1)*10^(-15))) &  (idProb[,2] < 0), 2]=0
		if(max(idProb[,2]<0)==1) stop("Sampling probability for strata D is wrong.")
		
		grpD = list(type="D", idProb=idProb, ct = len)
		
		return(list(grpA=grpA, grpD=grpD))
	}
	
	if (type == "B" | type=="C"){
		if(length(ids)==0) return(list(type=type, ct = 0))
		
		idProb = matrix(NA, nrow=length(ids) , ncol=2)
		# for type B or C, the ids are one column for homogenuous pair
		idProb[,1] = ids
		idProb[,2] = hapProb[ids]^2
		
		idProb[  (idProb[,2] > ((-1)*10^(-15))) &  (idProb[,2] < 0), 2]=0
		
		if(max(idProb[,2]<0)==1) stop("Sampling probability for strata B/C is wrong.")
		
		
		return(list(type=type, idProb=idProb, ct=length(ids)))
	}
	
}

HRCBSpGrp.sp <-
function(grp, size, hapProb, riskProb=NULL, grpA=NULL){
	ifD = FALSE
	samMa = matrix(NA, nrow=size, ncol=2)
	#print(grp)
	#print(hapProb)
	
	
	straCt = nrow(grp$idProb)
	stra.sampled=NULL
	if(grp$type=="A" | grp$type=="B"){
		if (is.null(riskProb))   riskProb=grp$idProb[,2]
		stra.sampled = rmultinom(1, size=size, prob= riskProb )
	}else{
		stra.sampled = rmultinom(1, size=size, prob= grp$idProb[,2])
	}
	if(ifD) print(grp$type)
	if(ifD) print("strata ct")
	if(ifD) print(stra.sampled)
	
	#print("sampled")
	tmp.ma = cbind(1:straCt, stra.sampled)
	tmp.ma = tmp.ma[tmp.ma[,2]>0, ,drop=FALSE]
	#print(tmp.ma)
	
	samMa[,1] =  unlist(apply(tmp.ma, 1, FUN = function(row, ma){
						rep(ma[row[1]], times=row[2])}, ma=grp$idProb[,1]))
	
	#print(samMa)
	
	if (grp$type=="A"){
		#id2 = grp$id2
		samMa[,2] = unlist(apply(tmp.ma, 1, FUN=function(row, id2, hapProb, idxct ){
							#print(hapProb)
							if(ifD) print(row)
							if(row[1]!=length(idxct)){
								tmp.range = idxct[row[1]]:(idxct[row[1]+1]-1)
							}else{
								tmp.range = idxct[row[1]]:length(id2)
							}
							tmp.sam = id2 [tmp.range]
							
							id2.sel = resample(tmp.sam, size=row[2], prob = hapProb[tmp.sam], replace=TRUE)
							#print(id2.sel)
							if(length(id2.sel)==1) id2.sel = rep(id2.sel, times=row[2])
							#print(id2.sel)
							return(id2.sel)
							
						},  id2 = grp$id2, hapProb=hapProb, idxct = grp$idProb[,3]))
		if(ifD) print(samMa)
		samMa = cbind(pmin(samMa[,1], samMa[,2]), pmax(samMa[,1], samMa[,2]))
	}
	if (grp$type=="B" | grp$type=="C"){
		samMa[,2] = samMa[,1]
	}
	
	if (grp$type=="D"){
		## sample the second idx by rejection
		if(is.null(grpA)) stop("HRCBSpGrp.sam::error")
		samMa[,2] = unlist(
				apply(tmp.ma, 1, FUN = function(row, grpAid, idxct, id2){
							maByIdx = match(row[1], grpAid)
							if( !is.na(maByIdx)){
								if(ifD) print(row)
								if(maByIdx!=length(idxct)){
									tmp.range = idxct[maByIdx]:(idxct[maByIdx+1]-1)
								}else{
									tmp.range = idxct[maByIdx]:length(id2)
								}
								## ask the program to trace up
								tmp.sam = id2 [tmp.range]
								removeList = c(tmp.sam, row[1])
								
								# sample for the entire list, except the one already in A
								idx2 = sam.reject(allIdx=grp$idProb[,1], idxProb = hapProb, rejectIdx = removeList, size=row[2])
							}else{
								# sample for the others, exept the one that is the same as the column 1           
								idx2 = sam.reject(allIdx=grp$idProb[,1], idxProb = hapProb, rejectIdx = row[1], size=row[2])
							}
							return(idx2)
						}, grpAid = grpA$idProb[,1], idxct=grpA$idProb[,3], id2=grpA$id2))
		
		if(ifD) print(samMa)
		samMa = cbind(pmin(samMa[,1], samMa[,2]), pmax(samMa[,1], samMa[,2]))
		if(ifD) print(samMa)
	}  
	
	return(samMa)
	
}

impuBk.scheduler <-
function(raw, idx, job=1, toolname=NULL, freqMaps=NULL, dir="", is.1digit=TRUE, dig1Code, dig2Code, reType=FALSE, reHap=NULL, logF=NULL, logErr=""){

  fStr ="[impuBk.scheduler:]"
  ifD = FALSE

  if(!is.null(dir)){
    if(dir==""){
      print(paste("Imputation information files(s) will be saved under current working directory:",  getwd()))
    }else{
      print(paste("Create directory:", dir, ", where imputation information file(s) are saved.", sep=""))
      dir.create(path=dir, showWarnings = TRUE)
      if (!is.null(logF)){
        logF = file.path(dir, qp(logF, ".txt"))
      }
      if (!is.null(reHap)){
        reHap = file.path(dir, reHap)
      }
      logErr = file.path(dir, logErr)
    }
  }else{
    if(job==1) {
#      print(
#          paste("Value for argument dir is NULL. Function will not save any information about imputation. In case of error, log file will be saved under current working directory:", getwd()))
#       paste("Function will not save any information about imputation. In case of error, log file will be saved under current working directory:", getwd()))
      reHap = NULL
      logF = NULL
    }else{
      if (!is.null(logF)){
        logF = qp(logF, ".txt")
      }
#      print(
#          paste("Value for argument dir is NULL. However, users ask to do multiple imputation. ",
#                "Imputation information files(s) will be saved under current working directory:",  getwd()))
    }
  }

  # inside the function, assume 0 1 2 3 for NA, homo, homo, heter and 0 1 2 for NA, allele1, allele2

  ## first check the existance of required parameters
  if(is.null(toolname)){
    if(is.null(freqMaps)){
      stop("Values for both arguments, toolboxName and freqMaps, are missing.")
    }else{
      toolname = toolbox.load(freqMaps=freqMaps)
    }
  }

  ## find the start and end position for the blocks of interest

  all.genomeMarkerInfo = get(toolname$freqMap)$genomeMarkerInfo

  bd.start = all.genomeMarkerInfo[idx[1], 2, drop=TRUE]
  bd.end   = all.genomeMarkerInfo[idx[ length(idx) ], 3, drop=TRUE]

  snpOffset =bd.start-1

  ## change digit to 1 digit coding using Qing's coding scheme
  if(!is.1digit){
      ## excluding the parents cols
      genos  = raw[, 2+( ((bd.start-1)*2+1):(bd.end*2) ),drop=FALSE  ]
      snpNum = ncol(genos)/2
      snp1digit = exchangeDigit(ma=genos,
        cols=c(1,snpNum*2), dig1Code=c(0, 1, 3, 2), dig2Code =dig2Code, action=c("2to1"))
  }else{
      ## excluding the parents cols
      genos  = raw[, 2+(bd.start:bd.end), drop=FALSE]
      snpNum = ncol(genos)
      snp1digit = genos
      #dig1Code.inside = dig1Code[c(1, 2, 4, 3)]
      
      ## change to inside Qing's coding scheme
      if(min(dig1Code==c(0,1,3,2))==0){
        snp1digit = apply(snp1digit, 1:2,  FUN= util.vec.replace, orignal = dig1Code, replaceBy=c(0,1,3,2))
      }
  }

  trioCt = nrow(snp1digit)/3
  maxRow = trioCt*job

  ## 18 columns: bk, trio, x1, x2, y1, y2, hap_f, hap_m, hap_c1-4
  if(reType){
    imputBkRecord = matrix(NA, ncol = 19, nrow=maxRow)
    impuDummy = imputBkRecord 
  }else{
    imputBkRecord = matrix(NA, ncol = 18, nrow=maxRow)
    impuDummy = imputBkRecord 
  }
  imputBkRecord.ct = 0
  errorTrap = NULL
  
  tryCatchEnv = new.env(parent=baseenv())
  assign("trapID", 0, envir=tryCatchEnv)
  assign("errorTrap", errorTrap, envir=tryCatchEnv)

  if(!is.null(get(toolname$freqMap)$hapBkOnlyMap)){
	  all.hapIndex = get(toolname$freqMap)$hapIndex
	  
	  hapBkOnlyMap.vars=list()
	  tmp.hapMap =  get(toolname$freqMap)$hapBkOnlyMap
	  hapBkOnlyMap.vars$resiProbCol= tmp.hapMap$resiProbCol
	  hapBkOnlyMap.vars$augIdxCol= tmp.hapMap$augIdxCol
	  hapBkOnlyMap.vars$probCol= tmp.hapMap$probCol

  }else{
	  all.hapIndex = c(-1)
	  hapBkOnlyMap.vars=list()
  }
  snpCoding = 0:3
  snpBase = 0:2
  genoProb = genGenoProb()

    for( unit in idx){
      if (is.element(unit, all.hapIndex)){
        ## haplotype, for each block, search every trio for missingness
        ## find block boundary
        bk.bd = unlist(all.genomeMarkerInfo[unit, c(2,3)])
        bk.geno = snp1digit[, (bk.bd[1]:bk.bd[2])]

        snpCt = bk.bd[2]-bk.bd[1]+1

        exhaustHap = get(toolname$exp)
        exhaustHap = exhaustHap[1:2^snpCt, 1:snpCt]

        bk.genoRowComp = rowSums(bk.geno!=0) == snpCt 
        bk.genoTrioComp = matrix(bk.genoRowComp, ncol=3, byrow=TRUE)
        bk.missTrioIdx = (1:trioCt) [ rowSums(bk.genoTrioComp) < 3 ]
    
        for (famId in bk.missTrioIdx){
          x1 = bk.bd[1]
          x2 = bk.bd[2]
          y1 = (famId-1)*3+1
          y2 = famId*3
          trioBlock = snp1digit[y1:y2, x1:x2 -  snpOffset]
          if(ifD) print( paste(fStr, " processing fam index:", famId))
          if(ifD) print(trioBlock)
          
          tryCatch({
            replace = ESp.imputBlock(appVarNames=toolname, 
                           trioBlock=trioBlock, snpLen=snpCt, bkIdx = unit, job=job,
                           snpCoding = snpCoding, snpBase=snpBase, reType = reType,  logF=logF, 
                           hapBkOnlyMap.vars=hapBkOnlyMap.vars
                            )

            imputBkRecord.ct = imputBkRecord.ct + 1
            imputBkRecord[((imputBkRecord.ct-1)*job+1):(imputBkRecord.ct*job) , 1:6 ] = matrix(
                           rep(c(unit, raw[y1, 1], y1, y2, x1, x2), times=job), ncol=6, byrow=TRUE)
            
            imputBkRecord[((imputBkRecord.ct-1)*job+1):(imputBkRecord.ct*job) ,
                          7:ncol( imputBkRecord ) ] = replace
            
            fam.hapIdx1 = replace[1, c(1,3,5)]
            fam.hapIdx2 = replace[1, c(2,4,6)]
            hap.str1 = exhaustHap[fam.hapIdx1, ]
            hap.str2 = exhaustHap[fam.hapIdx2, ]

            ## convert string into digits
            geno.FMCMa = covDipStr2CodedGeno(hap.str1, hap.str2, subjectCt=3, snpLen=snpCt)
 
            if(ifD) print(geno.FMCMa)
            if(ifD) print(snp1digit[y1:y2, x1:x2-  snpOffset])

            if( sum(abs(geno.FMCMa - trioBlock)[ trioBlock!=0 ])!=0 ) stop("No matching")

            snp1digit[y1:y2, x1:x2-  snpOffset] = geno.FMCMa

            if(!is.null(logF)){
              logl(logF, paste("For trio #", y2/3, ", Choose hap idx=[",
                               paste(replace[1:6], collapse=".", sep=""),
                               "]"))
              logl(logF, paste("Choose hap exp=[",
                    paste( apply(geno.FMCMa, 1, FUN=paste, collapse=""), collapse=".", sep=""), "]",
                               sep=""))
              logl(logF, paste("missing bk data:",
                    paste( apply(trioBlock, 1, FUN=paste, collapse=""), collapse="."),
                               sep=""))
      
            }
            if(ifD){
              print("-------------------")
              print(  c(y1, y2, x1, x2)  )              
            }

#            if( (imputBkRecord.ct!=0) & (imputBkRecord.ct %%cutpt==0) & (!is.null(reHap)) ){  
#                  ## if there is record and upto certain number, need to be outputed
#                  ## evaluated after each trio
#                  write.table(imputBkRecord[1:(imputBkRecord.ct*job) ,,drop=FALSE],
#                              file=paste(reHap, "_imputBkRecord.csv", sep=""), sep=",",
#                              append = TRUE, row.names = FALSE, col.names=FALSE)
#                  imputBkRecord.ct = 0
#                  imputBkRecord = impuDummy
#             }                       
           },  error = function(e) {
               ##print(qTraceback())
               traceback()
			   b = get("trapID", envir=tryCatchEnv)
			   b = b +1 
			   assign("trapID", b, envir=tryCatchEnv)
               #trapID <<- trapID+1
	
               ## HARD CODE!!!HARD CODE: trio id is assumed to the be first one
			   #errorTrap <<- rbind(errorTrap, errorInfo)
	           errorInfo = c(trapID=b, bkIdx=unit, pedgree=raw[y1,1], case=raw[(y1+2),2], c(y1, y2, x1, x2))
			   a = get("errorTrap", envir=tryCatchEnv)
			   a = rbind(a, errorInfo)
			   assign("errorTrap", a, envir=tryCatchEnv)
			   
               
                if(!is.null(logF)){
                  logl(logF, paste("\nError trap id=(", b, ") and details for errors:", sep=""))
                  logl(logF, paste("Error block index {idx=", unit, "}------------", sep=""))
                  logl(logF, paste("Error trap famId=(", famId, ") and details for errors:", sep=""))
                  
                  logl(logF, paste("missing bk data:",
                               paste( apply(trioBlock, 1, FUN=paste, collapse=""), collapse="."),
                               sep=""))
                  
                  #logl(logF, errTrace)
                }         
               }, warn =function(w) {
                 print("ImpuBlock::Warnings")
                 
            } ) ## tryCatch

           if(!is.null(logF)){
             logl(logF, paste("end imputing block index {idx=", unit, "}------------\n", sep=""))
           }
              
        } ## for (famId in bk.missTrioIdx){
         if( (imputBkRecord.ct!=0) & (!is.null(reHap)) ){  
                  ## if there is record and upto certain number, need to be outputed
                  ## evaluated when the block is over
                 write.table(imputBkRecord[1:(imputBkRecord.ct*job) ,,drop=FALSE],
                             file=paste(reHap, "_imputBkRecord.csv", sep=""), sep=",",
                             append = TRUE, row.names = FALSE, col.names=FALSE)
         }
         imputBkRecord.ct = 0
         imputBkRecord = impuDummy
      }else{
        # print("A genotype")  ## genotype
        bk.bd = unlist(all.genomeMarkerInfo[unit, 2])
        tmp.Map =  get(toolname$freqMap)$genoOnlyMap
        popuProb =  tmp.Map$bks[[bk.bd]][,tmp.Map$probCol]

        snpCt = 1

        bk.geno = snp1digit[,bk.bd -  snpOffset]        
        bk.genoRowComp = as.integer(bk.geno!=0) 
        bk.genoTrioComp = matrix(bk.genoRowComp, ncol=3, byrow=TRUE)
        bk.missTrioIdx = (1:trioCt) [ rowSums(bk.genoTrioComp) < 3 ]

        for (famId in bk.missTrioIdx){
           x1 = bk.bd
           x2 = bk.bd
           y1 = (famId-1)*3+1
           y2 = famId*3
           trioBlock = snp1digit[y1:y2, x1:x2 -  snpOffset]
           if(ifD) print( paste(fStr, " processing fam index:", famId))
           if(ifD) print(trioBlock)
           
           tryCatch({
 
             replace = imputGeno( trioBlock, job=job, genoProb=genoProb,
                    popuProb = popuProb, data.order="FMC", snpCoding=snpCoding)

             #print(replace)
             imputBkRecord.ct = imputBkRecord.ct + 1
             imputBkRecord[((imputBkRecord.ct-1)*job+1):(imputBkRecord.ct*job) , 1:6 ]  =  matrix(
                            rep(c(unit, raw[y1, 1], y1, y2, x1, x2), times=job), ncol=6, byrow=TRUE)

             imputBkRecord[((imputBkRecord.ct-1)*job+1):(imputBkRecord.ct*job),
                           c(7, 9, 11, 13, 15, 17) ] = replace
             
             geno.FMCMa = replace[1, 1:3]
             if( sum(abs(geno.FMCMa - trioBlock)[ trioBlock!=0 ])!=0 ) stop("No matching")

             snp1digit[y1:y2, x1:x2 -  snpOffset] = replace[1,1:3]

#              if( (imputBkRecord.ct!=0) & (imputBkRecord.ct %%cutpt==0) & (!is.null(reHap)) ){  
#                   ## if there is record and upto certain number, need to be outputed
#                   ## evaluated after each trio                
#                 write.table(imputBkRecord[1:(imputBkRecord.ct*job) ,,drop=FALSE],
#                             file=paste(reHap, "_imputBkRecord.csv", sep=""), sep=",",
#                             append = TRUE, row.names = FALSE, col.names=FALSE)
#                 imputBkRecord.ct = 0
#                 imputBkRecord = impuDummy
#              }
             
           },  error = function(e) {
               ##print(qTraceback())
               traceback()
			   
			   b = get("trapID", envir=tryCatchEnv)
			   b = b +1 
			   assign("trapID", b, envir=tryCatchEnv)
			   #trapID <<- trapID+1
			   
	           ## HARD CODE!!!HARD CODE: trio id is assumed to the be first one
			   #errorTrap <<- rbind(errorTrap, errorInfo)
			   errorInfo = c(trapID=b, bkIdx=unit, pedgree=raw[y1,1], case=raw[(y1+2),2], c(y1, y2, x1, x2))
    		   a = get("errorTrap", envir=tryCatchEnv)
			   a = rbind(a, errorInfo)
			   assign("errorTrap", a, envir=tryCatchEnv)			   
     
                if(!is.null(logF)){
                  logl(logF, paste("\nError trap id=(", b, ") and details for errors:", sep=""))
                  logl(logF, paste("Error block index {idx=", unit, "}------------", sep=""))
                  logl(logF, paste("Error trap famId=(", famId, ") and details for errors:", sep=""))
                  
                  logl(logF, paste("missing bk data:",
                               paste(trioBlock, collapse="."),
                               sep=""))
                }
               }, warn =function(w) {
                 # print("ImpuBlock::Warnings")                 
            } ) ## tryCatch

           if(!is.null(logF)){
             logl(logF, paste("end imputing block index {idx=", unit, "}------------\n", sep=""))
           }
         } ## for (famId in bk.missTrioIdx){
         if( (imputBkRecord.ct!=0) & (!is.null(reHap)) ){  
                ## if there is record, write out after each singletonn SNP 
                write.table(imputBkRecord[1:(imputBkRecord.ct*job) ,,drop=FALSE],
                            file=paste(reHap, "_imputBkRecord.csv", sep=""), sep=",",
                            append = TRUE, row.names = FALSE, col.names=FALSE)
         }
         imputBkRecord.ct = 0
         imputBkRecord = impuDummy
     
      } ## if (is.element(unit, all.hapIndex)){
    } ## for( unit in idx){

	errorTrap=get("errorTrap", envir=tryCatchEnv)
    if(!is.null(errorTrap)){
      write.table(errorTrap, file=paste(logErr, "_errorTrap.csv", sep=""), sep=",",
                 append = FALSE, row.names = FALSE, col.names = TRUE)
     
    }
    
    return (snp1digit)
}

impuBkTDT.scheduler <-
function(raw=data, idx, job=1, toolname=NULL, freqMaps=NULL, dir=NULL, is.1digit=TRUE, dig1Code, dig2Code, reType=FALSE, reHap=NULL, logF=NULL, logErr=""){

  fStr ="[impuBkTDT.scheduler:]"
  ifD = FALSE
  #logF = "test"
  if(ifD) print(fStr)

  if(!is.null(dir)){
    if(dir==""){
      print(paste("Imputation information files(s) will be saved under current working directory:",  getwd()))

    }else{
      print(paste("Create directory:", dir, ", where imputation information file(s) are saved.", sep=""))
      dir.create(path=dir, showWarnings = TRUE)

      if (!is.null(logF)){
        logF = file.path(dir, qp(logF, ".txt"))
      }

      if (!is.null(reHap)){
        reHap = file.path(dir, reHap)
      }

      logErr = file.path(dir, logErr)

    }
  }else{

    if(job==1) {
#      print(
#          paste("Value for argument dir is NULL. Function will not save any information about imputation. In case of error, log file will be saved under current working directory:", getwd()))

#       paste("Function will not save any information about imputation. In case of error, log file will be saved under current working directory:", getwd()))
      reHap = NULL
      logF = NULL
    }else{
      if (!is.null(logF)){
        logF = qp(logF, ".txt")
      }
      print(
          paste("Value for argument dir is NULL. However, users ask to do multiple imputation. ",
                "Imputation information files(s) will be saved under current working directory:",  getwd()))
    }

  }


  # inside the function, assume 0 1 2 3 for NA, homo, homo, heter and 0 1 2 for NA, allele1, allele2

  ## first check the existance of required parameters
  if(is.null(toolname)){
    if(is.null(freqMaps)){
      stop("Values for both arguments, toolboxName and freqMaps, are missing.")
    }else{
      toolname = toolbox.load(freqMaps=freqMaps)
    }
  }
  
  ## find the start and end position for the blocks of interest

  all.genomeMarkerInfo = get(toolname$freqMap)$genomeMarkerInfo

  bd.start = all.genomeMarkerInfo[idx[1], 2, drop=TRUE]
  bd.end   = all.genomeMarkerInfo[idx[ length(idx) ], 3, drop=TRUE]

  snpOffset =bd.start-1
  #print("Get here")
  ## change digit to 1 digit coding using Qing's coding scheme
  if(!is.1digit){
      ## excluding the parents cols
      genos  = raw[, 2+( ((bd.start-1)*2+1):(bd.end*2) ), drop=FALSE  ]
      snpNum = ncol(genos)/2
      snp1digit = exchangeDigit(ma=genos,
        cols=c(1,snpNum*2), dig1Code=c(0, 1, 3, 2), dig2Code =dig2Code, action=c("2to1"))
  }else{
      ## excluding the parents cols
      genos  = raw[, 2+(bd.start:bd.end), drop=FALSE]
      snpNum = ncol(genos)
      snp1digit = genos
      ## change to inside Qing's coding scheme
      if(min(dig1Code==c(0,1,3,2))==0){
        snp1digit = apply(snp1digit, 1:2,  FUN= util.vec.replace, orignal = dig1Code, replaceBy=c(0,1,3,2))
      }
  }

  #print("Get snp1digit")
  trioCt = nrow(snp1digit)/3
  maxRow = trioCt*job

  snp1digitTDT = matrix(NA, nrow=trioCt*6, ncol=(bd.end-bd.start+1))
  
  ## 18 columns: bk, trio, x1, x2, y1, y2, hap_f, hap_m, hap_c1-4
 
  if(reType){
    imputBkRecord = matrix(NA, ncol = 19, nrow=maxRow)
    impuDummy = imputBkRecord
  }else{
    imputBkRecord = matrix(NA, ncol = 18, nrow=maxRow)
    impuDummy = imputBkRecord
  }
  imputBkRecord.ct = 0
  errorTrap = NULL
  
  tryCatchEnv = new.env(parent=baseenv())
  assign("trapID", 0, envir=tryCatchEnv)
  assign("errorTrap", errorTrap, envir=tryCatchEnv)
  
  #print("Get imputBkRecord.ct")
  #print(dim(imputBkRecord))
  
	if(!is.null(get(toolname$freqMap)$hapBkOnlyMap)){
		all.hapIndex = get(toolname$freqMap)$hapIndex
		
		hapBkOnlyMap.vars=list()
		tmp.hapMap =  get(toolname$freqMap)$hapBkOnlyMap
		hapBkOnlyMap.vars$resiProbCol= tmp.hapMap$resiProbCol
		hapBkOnlyMap.vars$augIdxCol= tmp.hapMap$augIdxCol
		hapBkOnlyMap.vars$probCol= tmp.hapMap$probCol
		
	}else{
		all.hapIndex = c(-1)
		hapBkOnlyMap.vars=list()
	}


  snpCoding = 0:3
  snpBase = 0:2
  genoProb = genGenoProb()

  #print("Get loaded item")
  #print(idx)
  for( unit in idx){
      if (ifD) print( paste(fStr, " processing block index:", unit))
      if (is.element(unit, all.hapIndex)){
        ## haplotype, for each block, search every trio for missingness
        ## find block boundary
        bk.bd = unlist(all.genomeMarkerInfo[unit, c(2,3)])

        snpCt = bk.bd[2]-bk.bd[1]+1

        exhaustHap = get(toolname$exp)
        exhaustHap = exhaustHap[1:(2^snpCt), 1:snpCt]

        bk.missTrioIdx = (1:trioCt)
    
        for (famId in bk.missTrioIdx){
          x1 = bk.bd[1]
          x2 = bk.bd[2]
          y1 = (famId-1)*3+1
          y2 = famId*3
          trioBlock = snp1digit[y1:y2, x1:x2 -  snpOffset]
          if(ifD) print( paste(fStr, " processing fam index:", famId))
          if(ifD) print(trioBlock)
          
          tryCatch({

            replace = ESp.imputBlock(appVarNames=toolname, 
                           trioBlock=trioBlock, snpLen=snpCt, bkIdx = unit, job=job,
                           snpCoding = snpCoding, snpBase=snpBase, reType = reType,  logF=logF, 
                           hapBkOnlyMap.vars=hapBkOnlyMap.vars
                            )
            
            imputBkRecord.ct = imputBkRecord.ct + 1
            imputBkRecord[((imputBkRecord.ct-1)*job+1):(imputBkRecord.ct*job), 1:6 ] = matrix(
                           rep(c(unit, raw[y1, 1], y1, y2, x1, x2), times=job), ncol=6, byrow=TRUE)

#             print(raw[, 1:4])
#             print(y1)
#             print(raw[y1, 1])
#             print(      matrix(rep(c(unit, raw[y1, 1], y1, y2, x1, x2), times=job), ncol=6, byrow=TRUE)[,1:4]
#                   )

            imputBkRecord[((imputBkRecord.ct-1)*job+1):(imputBkRecord.ct*job), 7:ncol( imputBkRecord ) ] = replace
            fam.hapIdx1 = replace[1, c(1, 3, 5, 7, 9, 11 )]
            fam.hapIdx2 = replace[1, c(2, 4, 6, 8,10, 12 )]
            hap.str1 = exhaustHap[fam.hapIdx1, ]
            hap.str2 = exhaustHap[fam.hapIdx2, ]

            ## convert string into digits
            geno.FMCMa = covDipStr2CodedGeno(hap.str1, hap.str2, subjectCt=6, snpLen=snpCt)
 
            if(ifD) print(geno.FMCMa)
            if(ifD) print(snp1digit[y1:y2, x1:x2-  snpOffset])

            if( sum(abs(geno.FMCMa[1:3,] - trioBlock)[ trioBlock!=0 ])!=0 ) stop("No matching")

            snp1digitTDT[ ((famId-1)*6+1) :( famId*6  ), x1:x2-  snpOffset] =
              geno.FMCMa[ c(1,2,3, sample(1:3, size=3, replace=FALSE)+3), ]
            
            if(!is.null(logF)){
      
              logl(logF, paste("For trio #", y2/3, ", Choose hap idx=[",
                               paste(replace[1:6], collapse=".", sep=""),
                               "]"))
              logl(logF, paste("Choose hap exp=[",
                               paste( apply(geno.FMCMa, 1, FUN=paste, collapse=""), collapse=".", sep=""), "]",
                               sep=""))
              logl(logF, paste("missing bk data:",
                               paste( apply(trioBlock, 1, FUN=paste, collapse=""), collapse="."),
                               sep=""))
      
            }
            if(ifD){
              print("-------------------")
              print(  c(y1, y2, x1, x2)  )              
            }

           },  error = function(e) {
               ##print(qTraceback())
               traceback()
			   
			   b = get("trapID", envir=tryCatchEnv)
			   b = b +1 
			   assign("trapID", b, envir=tryCatchEnv)			   
			   #trapID <<- trapID+1
		
				## HARD CODE!!!HARD CODE: trio id is assumed to the be first one
				#errorTrap <<- rbind(errorTrap, errorInfo)	
	            errorInfo = c(trapID=b, bkIdx=unit, pedgree=raw[y1,1], case=raw[(y1+2),2], c(y1, y2, x1, x2))
				a = get("errorTrap", envir=tryCatchEnv)
				a = rbind(a, errorInfo)
				assign("errorTrap", a, envir=tryCatchEnv)
	
                if(!is.null(logF)){
                  logl(logF, paste("\nError trap id=(", b, ") and details for errors:", sep=""))
                  logl(logF, paste("Error block index {idx=", unit, "}------------", sep=""))
                  logl(logF, paste("Error trap famId=(", famId, ") and details for errors:", sep=""))
                  
                  logl(logF, paste("missing bk data:",
                               paste( apply(trioBlock, 1, FUN=paste, collapse=""), collapse="."),
                               sep=""))
                  
                  #logl(logF, errTrace)
                }         
               }, warn =function(w) {
                 print("ImpuBlock::Warnings")
                 
            } ) ## tryCatch

           if(!is.null(logF)){
             logl(logF, paste("end imputing block index {idx=", unit, "}------------\n", sep=""))
           }
              
        } ## for (famId in bk.missTrioIdx){

        if( (imputBkRecord.ct!=0) & (!is.null(reHap)) ){
          #print("Write")
          write.table(imputBkRecord[1:(imputBkRecord.ct*job) , , drop=FALSE],
                      file=paste(reHap, "_imputBkRecord.csv", sep=""), sep=",",
                      append = TRUE, row.names = FALSE, col.names=FALSE)
        }
        imputBkRecord.ct  = 0
        imputBkRecord = impuDummy

      }else{
        ## genotype
        # print("A genotype")

        bk.bd = unlist(all.genomeMarkerInfo[unit, 2])

        tmp.Map =  get(toolname$freqMap)$genoOnlyMap
        popuProb =  tmp.Map$bks[[bk.bd]][,tmp.Map$probCol]

        snpCt = 1
        bk.missTrioIdx = (1:trioCt) 

        for (famId in bk.missTrioIdx){
           x1 = bk.bd
           x2 = bk.bd
           y1 = (famId-1)*3+1
           y2 = famId*3
           trioBlock = snp1digit[y1:y2, x1:x2 -  snpOffset]
           if(ifD) print( paste(fStr, " processing fam index:", famId))
           if(ifD) print(trioBlock)
           
           tryCatch({
 
             replace = imputGeno( trioBlock, job=job, genoProb=genoProb,
                    popuProb = popuProb, data.order="FMC", snpCoding=snpCoding)

             #print(replace)
             imputBkRecord.ct = imputBkRecord.ct + 1
             imputBkRecord[((imputBkRecord.ct-1)*job+1):(imputBkRecord.ct*job) ,
                           1:6 ] = matrix(rep(c(unit, raw[y1, 1], y1, y2, x1, x2), times=job), ncol=6, byrow=TRUE)

             imputBkRecord[((imputBkRecord.ct-1)*job+1):(imputBkRecord.ct*job) ,
                           c(7, 9, 11, 13, 15, 17) ] = replace
             geno.FMCMa = replace[1, 1:6]

             if( sum(abs(geno.FMCMa[1:3] - trioBlock)[ trioBlock!=0 ])!=0 ) stop("No matching")

             snp1digitTDT[ ((famId-1)*6+1):( famId*6  ), x1:x2-  snpOffset] =
               geno.FMCMa [ c(1,2,3, sample(1:3, size=3, replace=FALSE)+3)]
                     
           },  error = function(e) {
               ##print(qTraceback())
               traceback()
			   b = get("trapID", envir=tryCatchEnv)
			   b = b +1 
			   assign("trapID", b, envir=tryCatchEnv)			   
			   #trapID <<- trapID+1
			
               ## HARD CODE!!!HARD CODE: trio id is assumed to the be first one
			   #errorTrap <<- rbind(errorTrap, errorInfo)
			   errorInfo = c(trapID=b, bkIdx=unit, pedgree=raw[y1,1], case=raw[(y1+2),2], c(y1, y2, x1, x2))
			   a = get("errorTrap", envir=tryCatchEnv)
			   a = rbind(a, errorInfo)
			   assign("errorTrap", a, envir=tryCatchEnv)
			   
                if(!is.null(logF)){
                  logl(logF, paste("\nError trap id=(", b, ") and details for errors:", sep=""))
                  logl(logF, paste("Error block index {idx=", unit, "}------------", sep=""))
                  logl(logF, paste("Error trap famId=(", famId, ") and details for errors:", sep=""))
                  
                  logl(logF, paste("missing bk data:",
                               paste(trioBlock, collapse="."),
                               sep=""))
                  
                  #logl(logF, errTrace)
                }         
               }, warn =function(w) {
                 # print("ImpuBlock::Warnings")
                 
            } ) ## tryCatch

           if(!is.null(logF)){
             logl(logF, paste("end imputing block index {idx=", unit, "}------------\n", sep=""))
           }
         } ## for (famId in bk.missTrioIdx){

         if( (imputBkRecord.ct!=0) & (!is.null(reHap)) ){
          #print("Write")
          write.table(imputBkRecord[1:(imputBkRecord.ct*job) , , drop=FALSE],
                      file=paste(reHap, "_imputBkRecord.csv", sep=""), sep=",",
                      append = TRUE, row.names = FALSE, col.names=FALSE)
        }
        imputBkRecord.ct  = 0
        imputBkRecord = impuDummy
        
      } ## if (is.element(unit, all.hapIndex)){

#       print( qp("unit=", unit))
#       print( imputBkRecord[1:(imputBkRecord.ct*job), 1:4])
    } ## for( unit in idx){

	errorTrap=get("errorTrap", envir=tryCatchEnv)
    if(!is.null(errorTrap)){
      write.table(errorTrap, file=paste(logErr, "_errorTrap.csv", sep=""), sep=",",
                 append = FALSE, row.names = FALSE, col.names = TRUE)
     
    }
    return (snp1digitTDT)
}

imputGeno <-
function(trioBlock, job=1, genoProb, popuProb, data.order, snpCoding){
  fStr = "[imputGeno]:"
  ifD = FALSE
  if(ifD) print(qp(fStr, "start"))
  if( min( c(snpCoding==c(0,1,2,3),data.order=="FMC")) <1 )
     stop (paste(fStr, "Data configuration is not right:\n", "snpCoding=[", paste(snpCoding, collapse=";", sep=""),
                 "]", "trioBlock order=[", data.order, "]", sep=""))

  ##CAUTION, need to flip the popuProb, it is in the sequence for 11, 22, 12
  ## and re-standardize if freq==0
  tmpPop = popuProb/sum(popuProb)
  zeroPop = tmpPop==0
  #print(zeroPop)

  if(sum(zeroPop)>0){
    tmpPop = tmpPop/1.0001
    
    tmpPop[zeroPop]=.0001/sum(zeroPop)

#    if(ifD){
#      print(popuProb)
#      print(tmpPop)
#      print(sum(tmpPop))
#    }
    popuProb=tmpPop
  }
  
  popuProb=popuProb[c(1,3,2)]
  
## first process the block with complete genotypes
  if( sum(trioBlock==snpCoding[1])==0 ){
    seqpar = paste(trioBlock[1], trioBlock[2], sep="-")
    bench = paste(genoProb[,1], genoProb[,2], sep="-")
    matchPos = match(seqpar, bench)

    famRe = matrix(NA, nrow=job, ncol=6)

    
    for( i in 1:job){
      famRe[i, 1:2]= trioBlock[1:2]
      kid = unlist(lapply(1:3, FUN=function(item, repp){
        rep(item, repp[item])
      }, repp=genoProb[matchPos, 3:5]*4))
  
      newlyM = match(trioBlock[3], kid)
  
      allKid = kid[ c(newlyM[1], kid[-newlyM][sample(1:3, size=3, replace=FALSE)])  ]

      famRe[i, 3:6]=allKid      
    }

    return(famRe)
  }

  
  finalIdx = rep(T, nrow(genoProb))
  ## filter out the parents
  if(trioBlock[1]!=snpCoding[1]){
    matched = genoProb[,1]==trioBlock[1]
    finalIdx = finalIdx & matched
  }
  if(ifD) print(finalIdx)
  if(trioBlock[2]!=snpCoding[1]){
    matched = genoProb[,2]==trioBlock[2]
    finalIdx = finalIdx & matched
  }
  if(ifD) print(finalIdx)
  cProb = rep(1, nrow(genoProb))
  if(trioBlock[3]!=snpCoding[1]){
    ## hard code!!! hard code geno coding need to be 1,2,3 to correspond to the child geno
    ## indicated by genoProb
    matched = genoProb[, trioBlock[3]+2]!=0

    cProb = genoProb[, trioBlock[3]+2]
    
    finalIdx = finalIdx & matched
  }
  if(ifD) print(finalIdx)

  if(sum(finalIdx)==0) stop(paste(fStr,
          "Mendelian error! trio geno=[", paste(trioBlock, collapse=".", sep=""),
          "] for order =[", order, "]", sep=""))
#  print("pop")
#  print(popuProb)
  fProb = popuProb[genoProb[,1]]
  mProb = popuProb[genoProb[,2]]
  jProb = fProb*mProb
  jProb = jProb/sum(jProb)

  ## check if estimated genotype return no genotype for this one
  jointProb = sum(jProb[finalIdx])

  if(jointProb==0) {
    jProb[!finalIdx]=0
    jProb = jProb*cProb 
    jProb[finalIdx]=rep(1/sum(finalIdx), sum(finalIdx))
    print("should not happen")
    #print(trioBlock)
    stop("should not happen")
    
  }else{
    jProb[!finalIdx]=0
    jProb = jProb*cProb
    jProb = jProb/sum(jProb)
  }
  
  if(ifD) print(cbind(fProb, mProb, jProb, finalIdx, genoProb[,1:2]))

  allKid = t(apply(genoProb[, 3:5]*4, 1, FUN=function(row){
                  a = rep(c(1, 2, 3), times=row); a}))
  #print(allKid)
  #print(jProb)
  
  re6Geno = matrix(NA, ncol=6, nrow=job)

  for( ss in 1:job){
    
    chooseRow = sample(1:9, size=1, prob = jProb)
  
    re = genoProb[chooseRow, 1:2]
    if(trioBlock[3]!=snpCoding[1]){
      childGeno = trioBlock[3]
    }else{
      ## hard code!!! hard code, geno coding need to be 1,2,3 to correspond to the child geno
      ## indicated by genoProb
      childGeno = sample(1:3, size=1, prob = genoProb[chooseRow, 3:5])
  
    }

    oth = allKid[chooseRow,]
    #print(oth)
    othleft = oth [- which(oth==childGeno)[1] ]
    re6Geno[ss,] = c(re, childGeno, othleft)
  
  }
  #print(re6Geno)
  return(re6Geno)
}

isDigitAtLociIdx <-
function(hapIdx, digit=1, lociCt, lociIdx, intVec=seq.int(from=1, to=2^(lociIdx-1), by=1)){

  ifD = FALSE
  
  if(digit!=1 & digit!=2) stop(paste("Invalide digit imput: ", digit, sep=""))
  
  leftSide = (hapIdx - intVec)/2^lociIdx + 1

  if(ifD) print(leftSide)
  
  roundLeftSide = as.integer(leftSide)

  if(ifD) print(roundLeftSide)

  integerLeft = leftSide[roundLeftSide == leftSide]

  if(ifD) print(integerLeft)

  one = integerLeft >= 1
  oth = integerLeft <= 2^(lociCt-lociIdx)

  fittedCt = one & oth

  if(ifD) print(fittedCt)

  if(length(fittedCt)>1) stop("Error!. Should left with only one or none choice")

  if(length(fittedCt)==1){
    if(digit==2) fittedCt = !fittedCt
    return(fittedCt)
  }else{
    if(digit==2) {
      return(TRUE)
    }else{
      return(FALSE)
    }
  }
}

linkageFile.proc <-
function(data, snpIdxRange=NULL, key.prefix="", bk.sizes=NULL, action = c("outputTrio", "formTrio1digit", "missingReport", "Mendelian check", "freq estimate"), dig2Code=0:2, dig1Code=c(0,1,3,2), ... ){
	
	#dig2Code=0:2
	#dig1Code=c(NA,0,1,2)
	
	pedCol =1
	memCol=2
	affectCol = 6
	dadCol=3
	momCol=4
	txt.affect=2
	
	sep=""
	header=FALSE
	
	
	trioTwoDigit = snpPREFileMatchTrio(txtF=data, sep=sep, header=header,
			pedCol =pedCol, memCol=memCol, affectCol =affectCol,
			dadCol=dadCol, momCol=momCol, txt.affect=txt.affect,
			logF=NULL)
	#print(dim(trioTwoDigit))
	
	if(is.null(snpIdxRange)) snpIdxRange = c(7, ncol(trioTwoDigit))
	
	snpStartLeftIndex=snpIdxRange[1]
	snpEndRightIndex =ncol(trioTwoDigit) - snpIdxRange[2] + 1
	
	re = list()
	if(is.element("outputTrio", action)){
		re = c(re, trio2digit=list(trioTwoDigit))
	}
	
	genos  = trioTwoDigit[, snpStartLeftIndex:(ncol(trioTwoDigit)-snpEndRightIndex+1)]
	snpNum = ncol(genos)/2
	
	
	## check linkage file code
	linkqing = unique(as.vector(unlist(genos)))
	tt.check = match(linkqing, dig2Code) 
	if(max(is.na(tt.check))==1)
		stop(paste("dig2Code is wrong.", "Existing codes in data:[",
						paste(linkqing, collapse=";", sep=""), "].")
		)
	
	snp1digit = exchangeDigit(ma=genos,
			cols=c(1,snpNum*2), dig1Code=dig1Code, dig2Code =dig2Code, action=c("2to1"))
	
	snp1digit.inside = exchangeDigit(ma=genos,
			cols=c(1,snpNum*2), dig1Code=c(0, 1, 3, 2), dig2Code =dig2Code, action=c("2to1"))
	
	trio1digit = cbind( trioTwoDigit[, c(pedCol, memCol)], snp1digit)
	if(is.element("formTrio1digit", action)){
		
		re = c(re, trio=list(trio1digit))
	}
	
	if(is.element("missingReport", action)){
		## check necessary parameters
		if( is.element(snpStartLeftIndex, c(pedCol, memCol, affectCol, dadCol, momCol))){
			warning("Cannot report missing information. The argument, snpIdxRange, includes one of the special column for linkage file.")
		}else{
			missSNPPos = findMissing(df=trioTwoDigit, is.1digit=FALSE, snpStartLeftIndex=snpStartLeftIndex,
					snpEndRightIndex=snpEndRightIndex, dig1Code=NULL, dig2Code=dig2Code )
			re = c(re, missIdx = list(missSNPPos))
		}
	}
	
	if(is.element("Mendelian check", action)){
		snpTrio = matrix(snp1digit.inside, nrow=3, byrow=FALSE)
		
		MedErr = matrix(NA, ncol=4, nrow=ncol(snpTrio))
		colnames(MedErr)=c("y", "x", "trio", "SNP")
		
		tryCatchEnv = new.env(parent=baseenv())
		assign("MedErr.ct", 0, envir=tryCatchEnv)	
		assign("MedErr", MedErr, envir=tryCatchEnv)
		
		trioCt = nrow(snp1digit)/3
		
		## CHANGED: 2009: report the index in the trio1digit
		tmpDigit = 1
		#print(str(snpTrio))
	
		for ( i in 1:ncol(snpTrio)){
			tryCatch({
						tt = checkMendelianError(codedSNPTrio=snpTrio[,i], snpCoding=c(0,1,2,3))
					}, error = function(e){
						#print(paste("trioCt=", trioCt))
						#print(paste("snpStartLeftIndex=", snpStartLeftIndex))
					    a = get("MedErr.ct", envir=tryCatchEnv)
					    a = a+1
					    assign("MedErr.ct", a, envir=tryCatchEnv)
					    #MedErr.ct <<- MedErr.ct +1
						#print(paste("MedErr.ct=", MedErr.ct, " i=", i))
						tttx = ceiling(i/trioCt)
						ttty = i%%trioCt
						if(ttty==0) ttty = trioCt
						## CHANGED: 2009: report the index in the trio1digit
						b = get("MedErr", envir=tryCatchEnv)
						b[a,] = c( (ttty-1)*3+1,  (tttx-1)*tmpDigit+3,  ttty,    tttx)
						assign("MedErr", b, envir=tryCatchEnv)
						#print( MedErr[MedErr.ct,,drop=FALSE] )
					})          
		}
		MedErr.ct = get("MedErr.ct", envir=tryCatchEnv)
		MedErr = get("MedErr", envir=tryCatchEnv)
		
		if(MedErr.ct==0) {
			MedErr=NULL
			#print("No Mendelian error.")
			re = c(trio=list(trio1digit), MedErr=list(MedErr[1:MedErr.ct,,drop=FALSE]))
		}else{
			#print("Found Mendelian error(s).")
			re = c(MedErr=list(MedErr[1:MedErr.ct,,drop=FALSE]), trio.err=list(trio1digit))
		}
	}
	
	if(is.element("freq estimate", action)){
		## check necessary parameters
		if( is.element(snpStartLeftIndex, c(pedCol, memCol, affectCol, dadCol, momCol))){
			warning("Cannot provide frequencies estimation. The argument,  snpIdxRange, includes one of the special column for linkage file.")
		}else{
			
			tmp = ncol(trioTwoDigit)
			parTwoDigit = getBackParentGeno(trioDf=trioTwoDigit, famCol=pedCol, memCol=memCol,
					snpIdx=snpStartLeftIndex:(tmp-snpEndRightIndex+1), re.child=FALSE, prefix=NULL)
			#print("###")
			#print(dim(parTwoDigit))
			
			if( is.null(bk.sizes) ){
				warning("Cannot provide frequencies estimation. The argument, bk.sizes, is not provided for user-specify option.")
			}else{
				
				map=freqmap.reconstruct(data=parTwoDigit, cols=c(3, ncol(parTwoDigit)), loci.ct=bk.sizes, is.1digit=FALSE,
						dig1Code=NULL, dig2Code = dig2Code, key.prefix=key.prefix, start.base=1, ...)
				
				re = c(re, freq=list(map))
			}
			
		} ##if( is.element(snpStartLeftIndex, c(pedCol, memCol, affectCol, dadCol, momCol))){
		
	}
	
	return(re)
	
}

listExtractor <-
function(nameList, varList, na.replace = NULL){
  ifD = FALSE

  varNames = names(varList)
  varLen = length(varList)
  exLen = length(nameList)

  m = match(nameList, varNames, 0)
  if(min(m)==0 & is.null(na.replace)){
    stop(paste("Variable(s) not found in the varList. NULL not allowed. Names in varList: ", paste(varNames, collapse=", "), sep=""))
  }

  re = NULL
  for(i in 1:length(m)){
    if(m[i]==0){
      re = c(re, na.replace)
    }else{
      re = c(re, varList[m[i]])
    }
  }
  return (re)
}

logBe <-
function (fileName, str=NULL){
       cat(paste("\n\n=============Begin of the Script::" , Sys.time(), "==================") , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       if(!is.null(str)) cat(str , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       return(NULL)
}

loge <-
function (fileName, str=NULL){
       if(!is.null(str)) cat(str , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       cat(paste("-------------End of the Script::" , Sys.time(), "--------------------") , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       return(NULL)
}

logErr <-
function (fileName, str){
       cat("!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!!!! ERROR !!!!!!!!!!!!" , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       cat(str , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       cat("!!!!!!!!!!!! ERROR ------------------- ERROR !!!!!!!!!!!" , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       return(NULL)
}

logl <-
function (fileName, str){
       cat(str , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       return(NULL)
}

logs <-
function (fileName, str){
       cat(str , file = fileName, sep = " ", fill = FALSE, labels = NULL, append = TRUE)
       return(NULL)
}

logWarn <-
function (fileName, str){
       cat("************ WARNING ***************** WARNING ***********" , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       cat(str , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       cat("************ WARNING ----------------- WARNING ***********" , file = fileName, sep = " ", fill = TRUE, labels = NULL, append = TRUE)
       return(NULL)
}

ls.list <-
function(path=NULL, objname,  exam.ext = .3){
  
  myobj = NULL
 
  
  if( (!is.null(path)) & (!is.character(objname))){
    assign(objname, NULL)
    load(path)
    myobj = get(objname)
  }else{
    myobj = objname
  }

  if( !is.list(myobj)){
    stop(paste("Object name (", objname, ") is not a list!", sep=""))
  }
  element = NULL
  l.len = length(myobj)
  names.all = names(myobj)
  for( i in 1:l.len){
    #print(i)
    #print(element)
    elm = myobj[[i]]
    # str(elm)
    if( !is.list(elm)){
      element = c(element, paste("Object", i, ") name=", names.all[i], "; value=", elm, ";", sep=""))
    }else{
      # check structure
      #str(elm[[1]])
      if (length(elm)==1){
        element = c(element, paste("Object", i, ") List::name=", names.all[i], "; length=1", sep=""))
      }else if(length(elm)>1){
        is.allSame = qStr.pattern(elm, exam.ext=exam.ext)
        ##print(is.allSame)
        if( is.null(is.allSame)){
          element = c(element, paste("Object", i, ") List::name=", names.all[i], "; length=", length(elm), ";", sep=""))
        }else{
          ## same objects 
          element = c(element, paste("Object", i, ") List::name=", names.all[i], "; length=", length(elm), ";", sep=""))
      
          element = c(element, paste("            structure::", is.allSame, ";", sep=""))

        }
      }else{
        element = c(element, paste("Object", i, ") List::name=", names.all[i], "; length=0", sep=""))
      }

    }
  }
  return(element)
}

matTBCondOnChild3.finalProc <-
function(maxTbl, counter, maxMateTbl, probVec, prob1, prob2, job, logF){
  #print("Run finalProc")
  ifD = FALSE

  cutoff = min(counter, 20)
  ## in the maxMateTbl, the first column is index within par1 and the second column is index within par2
  if (ifD) print(maxTbl[1:100,])
  if (ifD) print(cbind(maxMateTbl[1:cutoff, ], probVec[1:cutoff]))
  prob1n = rep(0, length(prob1))
  prob1n[unique(maxMateTbl[1:counter,1])]= prob1[unique(maxMateTbl[1:counter,1])]

  prob1n = prob1n/sum(prob1n)
  onePDipProb = prob1n[maxMateTbl[1:counter,1]]
  if(ifD) print(paste("onePDipProb=", paste(round(onePDipProb,3)[1:cutoff], collapse=";", sep="")))


  prob2n = rep(0, length(prob2))
  prob2n[unique(maxMateTbl[1:counter,2])]= prob2[unique(maxMateTbl[1:counter,2])]

  prob2n = prob2n/sum(prob2n)
  othPDipProb = prob2n[maxMateTbl[1:counter,2]]
  if(ifD) print(paste("othPDipProb=", paste(round(othPDipProb,3)[1:cutoff], collapse=";", sep="")))

  prob = onePDipProb * othPDipProb *  probVec[1:counter]
  
  prob = prob/sum(prob)

  if(ifD) print(paste("final prob=", paste(round(prob,3)[1:cutoff], collapse=";", sep="")))

  ## sample the row
  hap6idx = matrix(NA, nrow=job, ncol=6)
  if(ifD) print(hap6idx)
  for(ss in 1:job){
    chooseRow = sample(1:counter, size=1, prob=prob)
    #print(chooseRow)
    hap6idx[ss,] = maxTbl[chooseRow, , drop=FALSE]
    #print(hap6idx)
  }

#   if(!is.null(logF)){
#     if(cutoff==20){
#       bkMapStr = paste(fStr, "\nCutoffed possible dip mating table:\n",
#                      paste(wrComTbl( cbind(maxTbl[1:cutoff, ,drop=FALSE], prob[1:cutoff], probVec[1:cutoff]),
#                               colNames=c("f1", "f2", "m1", "m2", "c1", "c2", "prob", "probVec")), collapse="\n", sep=""),
#                      sep="")
#     }else{
#       bkMapStr = paste(fStr, "\nCompleted poss dip mating table:\n",
#                      paste(wrComTbl( cbind(maxTbl[1:cutoff, ,drop=FALSE], prob[1:cutoff], probVec[1:cutoff]),
#                               colNames=c("f1", "f2", "m1", "m2", "c1", "c2", "prob", "probVec")), collapse="\n", sep=""),
#                      sep="")
# 
#     }
#     logl(logF, bkMapStr)
#     logl(logF, paste(fStr, "choose row num=", chooseRow, sep=""))
#     #logl(logF, paste("Parents index=[", paste(as.vector(hap6idx), collapse=".", sep=""), "]", sep=""))
#   }

  if(ifD) {
    tttt = cbind(maxTbl[1:cutoff, ,drop=FALSE], prob[1:cutoff])
    print(tttt)
    print(hap6idx)
  }
  rm(maxTbl)
  rm(maxMateTbl)
  rm(probVec) 

  return( hap6idx )

}

moveDir <-
function(par=getwd(), fromRoot, toRoot, subDir=NULL){


  fListdir = list.files(path = file.path(par, fromRoot))
  dir.index = file.info(file.path(par, fromRoot, fListdir))$isdir
  dirs = fListdir[dir.index]
  for( j in dirs){
    newdir = file.path(par, toRoot, j)
    dir.create(path=newdir, showWarnings = TRUE)
  }
  ## addtional subdirectory
  if(!is.null(subDir))
    dir.create(path=file.path(par, toRoot, subDir[1], subDir[2]), showWarnings = TRUE)
  
  fList = list.files(path = file.path(par, fromRoot), all.files = TRUE,
           full.names = FALSE, recursive = TRUE)

  for( i in fList){
    f1 = file.path( par, fromRoot, i)
    f2 = file.path( par, toRoot, i)
    print(f1)
    print(f2)
    file.copy(from=f1, to=f2, overwrite=TRUE )

  }
}

procSemiAugMap <-
function(appVarNames, homoHetoInfo, snpLen){
	
	fStr="[procSemiAugMap]:"
	
	if(is.null(homoHetoInfo$homoIn)){
		## if not homo digit requirment, every hap is possible
		retainList = 1:(2^snpLen)
		return(retainList)
	}
	
	## if there is homo digit requirement
	idx4hapDigit = NULL
	## check out the global variables
	tryCatch({
				
				tmpGetObj = NULL
				tmpGetObj = get(appVarNames$digit, envir=baseenv())
				idx4hapDigit$digitMap1 = tmpGetObj$digitMap1[1:(2^(snpLen-1)), 1:snpLen]
				idx4hapDigit$digitMap2 = tmpGetObj$digitMap2[1:(2^(snpLen-1)), 1:snpLen]
				
				rm(tmpGetObj)
				##gc()
			}, error=function(e){
				## traceback()
				## print(e)
				errTrace = paste(e, collapse=";", sep="")
				stop(paste("\n", fStr, errTrace, "\nApp-wise Global Variable ", appVarNames$digit, " does not exisit."))
			})
	
	
	
	## first process the homo digit
	
	retainList = NULL
	matchedIdx = NULL
	for( i in 1:length(homoHetoInfo$homoIn)){
		curDigit =  homoHetoInfo$homoDigit[i]
		tmpPos = homoHetoInfo$homoIn[i]
		if(curDigit == 1) {
			matchedIdx = idx4hapDigit$digitMap1[, tmpPos, drop=FALSE]
			if(is.null(retainList)){
				retainList = matchedIdx
			}else{
				retainList = retainList[is.element(retainList, matchedIdx)]
			}
			## print(retainList)
		}else{
			matchedIdx = idx4hapDigit$digitMap2[, tmpPos, drop=FALSE]
			if(is.null(retainList)){
				retainList = matchedIdx
			}else{
				retainList = retainList[is.element(retainList, matchedIdx)]
			}
			##  print(retainList)
		} ## if(curDigit == 1) {
	} ## for( i in homoHetoInfo$homoIn){
	
	rm(idx4hapDigit)
	##gc()
	if( is.null (retainList) )
		stop(paste("\nBlock infomation error. No population haplotype matches existing data:(", expression, ").", sep=""))
	
	return (retainList)
	
}

qExpandTable <-
function(listOfFactor =  list( 1:3, 10:13), removedRowIdx=NULL, re.row=FALSE ){
   tblDim = length(listOfFactor)
   tblDimSeq = unlist(lapply(listOfFactor, FUN=length))

   recyMa = matrix(listOfFactor[[1]], ncol=1, nrow=tblDimSeq[1])
   recyRow = tblDimSeq[1]
   ## grow the index list  
   for ( i in 2:tblDim ){
     growing = util.matrix.clone(recyMa, tblDimSeq[i])
     ## addecCol =  rep(listOfFactor[[i]], each=recyRow)
     recyMa = cbind(growing,  rep(listOfFactor[[i]], each=recyRow))
     recyRow = recyRow * tblDimSeq[i]  
   }

   rowIdx = NULL
   if ( (!is.null(removedRowIdx)) | re.row){
     lengthRev = c(0, cumprod(tblDimSeq)) [ tblDim:1 ]
     tmp = ( removedRowIdx[tblDim:1] - 1)* lengthRev
     rowIdx = sum(tmp)+removedRowIdx[1]
     ## print(lengthRev)
     ## print(tmp)
   }

   ## print(rowIdx)
   ## print(recyMa)
   if(re.row){
     return(list(ma=recyMa, rowNum = rowIdx))
   }else{
     if(!is.null(removedRowIdx)){
       recyMa = recyMa[-rowIdx, , drop=FALSE]
     }
     return(recyMa)
   }
}

qing.cut <-
function(val, cutPt, cutPt.ordered = TRUE, right.include=TRUE){

  ## !!! HARD CODE !!! -1 means the val >/>= the maximum value in the cutPt
  ## return the matched index
  if(!cutPt.ordered) cutPt = order(cutPt)

  cutPt.ct = length(cutPt)

  if(cutPt.ct<1) stop("Zero length cutPt.")
  if(cutPt.ct<1) stop("Zero length val.")
  
  val.ct = length(val)

  re = rep(NA, length=val.ct)
  
  numFalse = re
  cellMatchedIdx = re

  if(right.include){
    for( i in 1:val.ct){
      cVal = val[i]
      falseMat = cVal > cutPt
      numFalse[i] = sum(falseMat)
    }
  }else{
    for( i in 1:val.ct){
      cVal = val[i]
      falseMat = cVal >= cutPt
      numFalse[i] = sum(falseMat)
    }
  }
    
  cellMatchedIdx [ numFalse==cutPt.ct ] = -1
  cellMatchedIdx [ numFalse!=cutPt.ct ] = numFalse[ numFalse!=cutPt.ct ]+1  

  return(cellMatchedIdx)

}

qing.mulMatch <-
function(val, bench){
  
  matched = bench==val
  bench.seq = 1:length(bench)
  re = bench.seq[matched]
  if(length(re)>=1){
    return(re)
  }else{
    return(0)
  }
}

qp <-
function(..., sep=""){
  str = paste(..., sep=sep)
  return(str)
}

qStr.pattern <-
function(objList,  exam.ext = .3){
  l.len = length(objList)

  class.last = NULL
  len.last = NULL
  names.last = NULL
  i = 1
  search=TRUE
  while( i <= min((round(l.len*exam.ext, 0)+1), l.len) & search ){
    #print(i)
    elm = objList[[i]]
    names = names(elm)
    class = unlist(lapply(elm, FUN=class))
    len = unlist(lapply(elm, FUN=length))

    if(!is.null(names.last)){
      ## see if the names matched
      if(length(names.last)==length(names)){
        all.m = names.last==names
        if(sum(all.m)!=length(names)) search=FALSE
      }else{
        search=FALSE  
      }
      if(length(class.last)==length(class)){
        all.m = class.last==class
        if(sum(all.m)!=length(class)) search=FALSE
      }else{
        search=FALSE  
      }
      if(length(len.last)==length(len)){
        all.m = len.last==len
        if(sum(all.m)!=length(len)) search=FALSE  
      }else{
         search=FALSE  
      }      
      
    }
    names.last=names
    names=NULL
    class.last=class
    class=NULL
    len.last=len
    len=NULL    
    i = i+1
  }

  if(search){
    if (is.null(names.last[1])){
      reStr = paste(c("name", "class", "length"),
                  c("",  class.last[1],  len.last[1]), 
                  sep="=", collapse="; ")
    }else{
      reStr = paste(c("name", "class", "length"),
                  c(names.last[1],  class.last[1],  len.last[1]), 
                  sep="=", collapse="; ")
    }
  }else{
    reStr = NULL
  }
  return(reStr)
}

qstrsplit <-
function(str, delim, re.1st = FALSE){
  re = unlist(strsplit(str, delim))
  if (length(re)==1){
    # possible not find the delim
    if (nchar(re[1])==nchar(str)){
      # not find the delim
      return(NA)
    }else  if(nchar(re[1])==0){
      # str contain delim itself
      if(re.1st ){
        return(re)
      }else{
        return(NULL)
      }
    }else{
      # str ends with delim
      return(re)
    }
  }else{
    if(nchar(re[1])==0){
      # str starts with delim
      if(re.1st){
        return(re)
      }else{
        return(re[-1])      
      }
    }else{
      return(re)
    }
  }
}

qTraceback <-
function(x = NULL){
    if (is.null(x) && (exists(".Traceback", envir = baseenv()))) 
        x <- get(".Traceback", envir = baseenv())

    reStr = NULL
    if (is.null(x) || length(x) == 0) 
        cat(gettext("No traceback available"), "\n")
    else {
        n <- length(x)
        m0 <- getOption("deparse.max.lines")
        for (i in 1:n) {
            label <- paste(n - i + 1, ": ", sep = "")
            m <- length(x[[i]])
            if (m > 1) 
                label <- c(label, rep(substr("          ", 1, 
                  nchar(label, type = "w")), m - 1))
            if (is.numeric(m0) && m0 > 0 && m0 < m) {
                cat(paste(label[1:m0], x[[i]][1:m0], sep = ""), 
                  sep = "\n")
                cat(label[m0 + 1], " ...\n")
                reStr = c(reStr, paste(label[1:m0], x[[i]][1:m0], sep = "") )
                reStr = c(reStr, label[m0 + 1], " ...\n")
            }
            else {
              cat(paste(label, x[[i]], sep = ""), sep = "\n")
              reStr = c(reStr, paste(label, x[[i]], sep = ""))
            }
        }
    }
    return(reStr)

}

resample <-
function(x, size, ...)
  if(length(x) <= 1) { if(!missing(size) && size == 0) x[FALSE] else x
  } else sample(x, size, ...)

sam.Hap.old <-
function(exp, prob, subjectCt){
	
	if(length(exp)==1){
		reExp = rep(exp, subjectCt)
	}else{
		reExp = sample(exp, size=subjectCt, prob=prob, replace=TRUE)
	}
	
	return(reExp)
	
}

sam.reject <-
function(allIdx=NULL, idxProb=NULL, rejectIdx, size=1){
	
	if (is.null(allIdx)){
		## if allIdx==NULL, then allIdx = 1:length(idxProb)
		length = length
		allIdx = 1:length
	}else{
		## if idxProb==NUL, then prob are the same for all idx,
		length = length(allIdx)
		if(is.null(idxProb)) idxProb = rep(1/length, length)
	}
	
	if(length<=0) stop(paste("Invalid input value for allList with length=[", length, "]", sep=""))
	
	meet = match(rejectIdx, allIdx)
	idxProb[meet]=0
	idx = resample(allIdx, size=size, prob=idxProb, replace=TRUE)
	
	return(idx)
}

sampleDipSemiAugMap <-
function(semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen){
	## SemiAugMap only have the major haplotypes' expression and prob,
	## only a number for prob for each of the other haplotypes 
	## SemiAugMap also has a column showing the index of the haplotype in the fixed AugMap
	ifD = FALSE
	
	leftOverProb = semiMapFrame[1, resiProbCol]
	mapProb = semiMapFrame[ , probCol]
	mappedAugIdx = semiMapFrame[ , augIdxCol]
	
	commonProbIdx = length(mapProb) 
	
	## need to take account the situation where all the hap is presented.
	if(commonProbIdx == 2^snpLen){
		chooseDip1 = sample(1:commonProbIdx, size=1, prob = mapProb)
		chooseDip2 = sample(1:commonProbIdx, size=1, prob = mapProb)
		
		chooseDip1 = mappedAugIdx[chooseDip1]
		chooseDip2 = mappedAugIdx[chooseDip2]
	}else{
		commonProbIdx = length(mapProb) + 1
		chooseDip1 = sample(1:commonProbIdx, size=1, prob = c(mapProb, leftOverProb))
		chooseDip2 = sample(1:commonProbIdx, size=1, prob = c(mapProb, leftOverProb))
		
		allIdx = NULL
		
		if(chooseDip1 == commonProbIdx){
			allIdx = 1 :(2^snpLen)
			chooseDip1 = sampleIdxOutsideList(allIdx, mappedAugIdx)
		}else{
			chooseDip1 = mappedAugIdx[chooseDip1]
		}
		
		if(chooseDip2 == commonProbIdx){
			if(is.null(allIdx)) allIdx = 1:(2^snpLen)
			chooseDip2 = sampleIdxOutsideList(allIdx, mappedAugIdx)
		}else{
			chooseDip2 = mappedAugIdx[chooseDip2]
		}
		
	}
	
	reDip = range(chooseDip1, chooseDip2)
	
	return(reDip)
	
}

sampleHapExclude <-
function(idxs, hapProb){
	newHapProb = hapProb
	newHapProb[idxs]=0
	idx = sample(1:length(newHapProb), prob=newHapProb, size=1)
	return(idx)
}

sampleHapSemiAugMap2 <-
function(semiMapFrame, resiProbCol, augIdxCol, probCol, snpLen, exHapIdx=NULL){
	## SemiAugMap only have the major haplotypes' expression and prob,
	## only a number for prob for each of the other haplotypes 
	## SemiAugMap also has a column showing the index of the haplotype in the fixed AugMap
	ifD = FALSE
	fStr = "[sampleHapSemiAugMap2]:"
	if(ifD) print(fStr)
	
	leftOverProb = semiMapFrame[1, resiProbCol]
	mapProb = semiMapFrame[ , probCol]
	mappedAugIdx = semiMapFrame[ , augIdxCol]
	
	exMajor = NULL
	exMinor = NULL
	exMajor.ct = 0
	exMinor.ct = 0
	commonProbIdx = length(mapProb)
	chooseHapIdx = NULL
	
	if(!is.null(exHapIdx)){
		
		matched = (  (exHapIdx >=1) & (exHapIdx <= (2^snpLen)))
		
		if(sum(matched)!=length(exHapIdx)) stop( paste("not valid exHapIdx=[", paste(exHapIdx, collapse=";", sep=""), "]", sep=""))
		
		majorMatch = is.element(exHapIdx, mappedAugIdx)
		exMajor = exHapIdx[majorMatch]
		exMinor = exHapIdx[!majorMatch]
		
		leftMajorRow = (1:commonProbIdx)[!is.element(mappedAugIdx, exMajor)]
		exMajor.ct = sum(majorMatch)
		exMinor.ct = sum(!majorMatch)
		
	}
	
	if(ifD){
		print(semiMapFrame)
		if(!is.null(exHapIdx)){
			print(paste("exMajor=[", paste(exMajor, collapse=";", sep=""), "]", sep=""))
			print(paste("leftMajorRow=[", paste(leftMajorRow, collapse=";", sep=""), "]", sep=""))
			print(paste("exMinor=[", paste(exMinor, collapse=";", sep=""), "]", sep=""))
			print(paste("majorMatch=[", paste(majorMatch, collapse=";", sep=""), "]", sep=""))
		}
	}
	
	
	## need to take account the situation where all the hap is presented.
	if(commonProbIdx == 2^snpLen){
		if(exMinor.ct>=1) stop(paste(fStr, " impossible to exclude minor when the map is completed."))
		## the only excluded are major
		if(exMajor.ct>0){
			## readjust the prob
			mapProbN = mapProb[leftMajorRow]
			mappedAugIdxN = mappedAugIdx[leftMajorRow]
			
			if(ifD) print(paste("mapProb=[", paste(round(mapProbN, 2), collapse=";", sep=""), "]", sep=""))
			if(ifD) print(paste("length=[", commonProbIdx-exMajor.ct , "]", sep=""))
			
			chooseRow = sample(1:(commonProbIdx-exMajor.ct), size=1, prob=mapProbN)
			chooseHapIdx = mappedAugIdxN[chooseRow]
			
		}else{
			## sample from completed map
			if(ifD) print(paste("mapProb=[", paste(round(mapProb, 2), collapse=";", sep=""), "]", sep=""))
			if(ifD) print(paste("length=[", commonProbIdx , "]", sep=""))
			
			chooseRow = sample(1:commonProbIdx, size=1, prob=mapProb)
			chooseHapIdx = mappedAugIdx[chooseRow]
		}
		
	}else{
		## not a complete map
		if(exMajor.ct==0 & exMinor.ct==0){
			## no exclusion
			
			mapProbN = c(mapProb,  leftOverProb)
			
			if(ifD) print("(exMajor.ct==0 & exMinor.ct==0):")
			if(ifD) print(paste("mapProbN=[", paste(round(mapProbN, 2), collapse=";", sep=""), "]", sep=""))
			if(ifD) print(paste("length=[", commonProbIdx+1 , "]", sep=""))
			
			chooseRow = sample(1:(commonProbIdx+1), size=1, prob=mapProbN)
			
			if(chooseRow==commonProbIdx+1){
				allIdx = 1:(2^snpLen)
				chooseHapIdx = sampleIdxOutsideList(allIdx, mappedAugIdx)
			}else{
				chooseHapIdx = mappedAugIdx[chooseRow]
			}
			
		}else if(exMajor.ct==0 & exMinor.ct>0){
			## only exclude minor
			before.minor = 2^snpLen - commonProbIdx
			mapProbN = c(mapProb,  leftOverProb/before.minor*(before.minor-exMinor.ct))
			
			if(ifD) print("(exMajor.ct==0 & exMinor.ct>0):")
			if(ifD) print(paste("mapProbN=[", paste(round(mapProbN, 2), collapse=";", sep=""), "]", sep=""))
			if(ifD) print(paste("length=[", commonProbIdx+1 , "]", sep=""))
			
			chooseRow = sample(1:(commonProbIdx+1), size=1, prob=mapProbN)
			
			
			if(chooseRow==commonProbIdx+1){
				allIdx = 1:(2^snpLen)
				chooseHapIdx = sampleIdxOutsideList(allIdx, c(mappedAugIdx, exMinor))
			}else{
				chooseHapIdx = mappedAugIdx[chooseRow]
			}      
			
		}else if(exMajor.ct>0 & exMinor.ct==0){
			## only exclude major
			mapProbN = mapProb[leftMajorRow]
			mappedAugIdxN = mappedAugIdx[leftMajorRow]
			mapProbN = c(mapProbN,  leftOverProb)
			
			if(ifD) print("(exMajor.ct>0 & exMinor.ct==0):")
			if(ifD) print(paste("mapProbN=[", paste(round(mapProbN, 2), collapse=";", sep=""), "]", sep=""))
			if(ifD) print(paste("length=[", commonProbIdx-exMajor.ct+1 , "]", sep=""))
			
			chooseRow = sample(1:(commonProbIdx-exMajor.ct+1), size=1, prob=mapProbN)
			
			if(chooseRow==(commonProbIdx-exMajor.ct+1)){
				allIdx = 1:(2^snpLen)
				## tricky, can only sample minor, augIdx pool are not reduced
				chooseHapIdx = sampleIdxOutsideList(allIdx, c(mappedAugIdx))
			}else{
				chooseHapIdx = mappedAugIdxN[chooseRow]
			}      
		}else if(exMajor.ct>0 & exMinor.ct>0){
			## exlude both major and minor
			mapProbN = mapProb[leftMajorRow]
			mappedAugIdxN = mappedAugIdx[leftMajorRow]
			
			before.minor = 2^snpLen - commonProbIdx
			
			mapProbN = c(mapProbN,  leftOverProb/before.minor*(before.minor-exMinor.ct))
			
			
			if(ifD) print("(exMajor.ct>0 & exMinor.ct>0):")
			if(ifD) print(paste("mapProbN=[", paste(round(mapProbN, 2), collapse=";", sep=""), "]", sep=""))
			if(ifD) print(paste("length=[", commonProbIdx-exMajor.ct+1 , "]", sep=""))
			
			chooseRow = sample(1:(commonProbIdx-exMajor.ct+1), size=1, prob=mapProbN)
			
			if(chooseRow==(commonProbIdx-exMajor.ct+1)){
				allIdx = 1:(2^snpLen)
				## tricky, can only sample minor, augIdx pool are not reduced
				chooseHapIdx = sampleIdxOutsideList(allIdx, c(mappedAugIdx, exMinor))
			}else{
				chooseHapIdx = mappedAugIdxN[chooseRow]
			}    
			
			
		}else{
			stop("Nothing here")
		}
		
	}
	
	return(chooseHapIdx)
	
}

sampleIdxOutsideList <-
function(allList, listVec, maxIt = 1000){
	
	length = length(allList)
	if(length<=0) stop(paste("Invalid input value for allList with length=[", length, "]", sep=""))
	
	keepSearch = TRUE
	idx = NULL
	count = 1 
	while(keepSearch & count<= maxIt){
		idx = sample(1:length, size=1)
		if(!is.element(allList[idx], listVec)){
			keepSearch = FALSE
			return(allList[idx])
		}   
		count = count+1
	}
	
	stop(paste("\nInefficient sampling. Iteration count exceeds maximum limit=[", maxIt, "].",
					"\nallList = [", paste(allList, collapse=";", sep=""),
					"] and listVect = [", paste(listVec, collapse=";", sep=""), "].",
					sep=""))
	return(NULL)
}

semiAugBkFrame <-
function(hapBkMap, key, probLeftOver = .01){
	
	ifD = FALSE
	
	keyIndex = which(hapBkMap$keys==key)
	bk = hapBkMap$bks[[keyIndex]]
	
	## extract the original exp and prob, then restandard prob
	estBkExp = as.character(bk[,hapBkMap$expCol])
	estBkProbRe = bk[,hapBkMap$probCol]/(sum( bk[,hapBkMap$probCol] ))*(1-probLeftOver)
	
	## create the exhaust haplotypes 
	bkSnpLen = bk[1,hapBkMap$hapLenCol]
	exhaustExp = exhaustHapExp(lociCt=bkSnpLen, snpCoding=c(1,2))$hapStr
	exhaustExpCt = length(exhaustExp)
	
	## match the expression in the short list to the exhaustive list
	rematch = match(estBkExp, exhaustExp)
	
	## replace the expression's probability
	bk[,hapBkMap$probCol]=estBkProbRe
	bk$augIdx = rematch
	bk$resiProb = rep(probLeftOver, times=nrow(bk))
	
	hapBkMap$bks[[keyIndex]] = bk
	
	hapBkMap$augIdxCol = ncol(bk)-1
	hapBkMap$resiProbCol = ncol(bk)
	
	#if(ifD) print(bkFrame)
	
	## other parts, like df, dfStr,  in hapBkMap is not updated because it is not necessary
	return(hapBkMap)
	
}

setTrioMissingSNP <-
function(trioDf, cord, snp1digit=FALSE, missingDigit = 0){
  ## cord has the beginning row number and col numbers for the trio with missing data

  cord = matrix(cord, ncol=4, byrow=FALSE)
  rowCt = nrow(cord)

  dataNew = trioDf
  if(!snp1digit){
    for( i in 1:rowCt){
      x.y = cord[i,1:2]
      dataNew[  x.y[1]: (x.y[1]+2),  x.y[2]: (x.y[2]+1) ] = missingDigit
      #print(paste("i=", i, " x.y=[", paste(x.y, collapse=";", sep=""), "]", sep=""))
      #print(paste(    x.y[1]: (x.y[1]+2), collapse=";", sep=""))
      #print(paste(    x.y[2]: (x.y[2]+1), collapse=";", sep=""))
    }
  }else{
    for( i in 1:rowCt){
      x.y = cord[i,3:4]
      #dataNew[  ((x.y[1]-1)*3+1): (x.y[1]*3),  x.y[2]+2 ] = 0
      #print(paste("i=", i, " x.y=[", paste(x.y, collapse=";", sep=""), "]", sep=""))
      #print(paste( ((x.y[1]-1)*3+1): (x.y[1]*3) , collapse=";", sep=""))
      #print(paste( x.y[2]+2, collapse=";", sep=""))

      dataNew[  ((x.y[1]-1)*3+1): (x.y[1]*3),  x.y[2]+2 ] = missingDigit
    }
  }
  return(dataNew)
}

signal.new <-
function(coef, type, signalStr){

  # var used internally
  dig2Code=c("00", "11", "12", "22")
  
  ifD = FALSE
  vars = NULL
  segReplace = NULL

  
  signal=binaTree.parser(str=signalStr)
  #signal =binaTree.parser.proc(str=signalStr, binaTree=binaTree, curLevel=0, first.seg=TRUE)
  vars = unique(unlist(signal$elm.vlist))
  #boolOp = signal$elm.vMa[,3]

  ori.vars = unlist(signal$elm.vlist)
  ori.match = match(ori.vars, vars)
  
  
  if(type=="geno2d"){
    ## translate into snp 2-digit coding
    pos.equalSign = sapply(vars, FUN=util.char.1stIdx, find="=")

    snpIdx = sapply(1:length(vars), FUN=function(i, name, pos) {
      ## variable name starts with g, followed by snp idx and "="
      a = substr(name[i], 2, pos[i]-1)
      a
    }, name=vars, pos=pos.equalSign)
    
    segReplace = sapply(1:length(vars), FUN=function(i, name, pos, dig2Code, snpId){
      ##  Dec08Change!!! Change the sigStr to two-digit string representation
      a = substr(name[i], pos[i]+1, pos[i]+2)
      ## change!!!change
      if(a== dig2Code[2]){
        ## less common homo is "11", (a) is dominant for the less common, (b) is recessive for the less common
        ## 11 is equivalent to recessive 
        re = paste("v", snpId[i], "b", sep="")  
      }
      if(a== dig2Code[4]){
        ## "22" is equivalent to not dominant
        re =  paste("(not v", snpId[i] ,"a)", sep="")
      }      
      if(a== dig2Code[3]){
        ## heto: "12" 
        re = paste("( (not v", snpId[i],"b) and v", snpId[i], "a )", sep="")    
      }
      re
    }, name=vars, pos=pos.equalSign, dig2Code=dig2Code, snpId = snpIdx)
  } ## if(type=="geno2d"){

  if(type=="D/R"){
    ## translate into snp 2-digit coding
    snpIdx = sapply(1:length(vars), FUN=function(i, name) {
      ## variable name starts with snp idx, followed by one character "D"/"R"
      a = substr(name[i], 1, nchar(name[i])-1)
      a
    }, name=vars)

    snpDR = sapply(1:length(vars), FUN=function(i, name) {
      ## variable name starts with snp idx, followed by one character "D"/"R"
      a = substr(name[i], nchar(name[i]), nchar(name[i]))
      a
    }, name=vars)


    ## need to take care of not
    #if (length(vars)!=length( boolOp )) stop("Error in signal.new: not unique markers in string.")
    
    segReplace = sapply(1:length(vars), FUN=function(i,  snpDR, snpId) {
      re = ""
      if(snpDR[i]== "D"){
        ## dominant:
        re = paste( "v", snpId[i] ,"a", sep="")
      }
      if(snpDR[i]== "R"){
        ## recessive:
        re = paste( "v", snpId[i] ,"b", sep="")
      }
      re
    },  snpDR=snpDR,  snpId = snpIdx)
  } ## if(type=="d/r"){
  
    ## replace the original str with new element within str
    changedStr = signalStr
    if(length(vars)>=1){
      for (i in 1:length(vars)){
        tmp.str = vars[i]
        ##print(tmp.str)
        changedStr = util.str.replace(str=changedStr,
                           replaced=vars[i],
                           new=segReplace[i],
                           replace.all=TRUE)
        ##print(model.signal.cur)
      }
    }

  if(ifD) print(changedStr)
  if( length(unlist(signal$elm.vlist))==1 & substr(changedStr,1,1)=="(")
    changedStr = substr(changedStr, 2, nchar(changedStr)-1)
  
  ## reconstruct the single
  #signal =binaTree.parser.proc(str=changedStr, binaTree=binaTree, curLevel=0, first.seg=TRUE)
  signal = binaTree.parser(str=changedStr)
  
  ## also return the snpIdx so reshuffle the column can be done later
  all.varSnp = snpIdx[ ori.match ]
  
  return(list(coef=coef, signal=signal, var.idx = all.varSnp, str=changedStr)) 

}

signalRule.2strata.build <-
function(sigStr="g9=11 and g13=11", sigType="geno2d", para=c(-5, 1)){

  ## Dec08Change!!!: allow D/R coding
  #if (sigType !="geno1d") stop( paste("sigType=", sigType, " is not implemented.", sep=""))
  if( !is.element( sigType, c("geno2d", "D/R") ) ) stop( paste("Wrong input: sigType=", sigType, ". It is not implemented.", sep=""))
  
  if( length(para)!=2) stop( paste("Wrong length of input para:", length(para), sep=""))
        
  rule = signalRule.contr()
  rule = signalRule.setInter(rule, para[1])
  sig1 = signal.new(para[2], type=sigType, signalStr = sigStr)
  rule = signalRule.addSignal(rule, sig1)

  return (rule)
}

signalRule.addSignal <-
function(rule, signal){

  rule$slist = c(rule$slist, list(signal))
  rule$snpIdx = c(rule$snpIdx, signal$var.idx)
  
  return (rule)
}

signalRule.contr <-
function(){
  rule = list()
  rule$inter = NA
  rule$slist = NULL
  rule$snpIdx = NULL
  return(rule)
}

signalRule.setInter <-
function(rule, inter){
  rule$inter = inter;
  return (rule)
}

simuHapMap.build <-
function(hapInfoFrame=NULL, genoInfoFrame){
	
	## make it can take .csv file
	if(!is.null(hapInfoFrame)){
		if(is.character(hapInfoFrame)){
			hapInfoFrame = read.csv(hapInfoFrame, header=TRUE, sep=",", as.is=TRUE)
		}
	}
	
	## make it can take .csv file
	if(is.character(genoInfoFrame)){
		genoInfoFrame = read.csv(genoInfoFrame, header=TRUE, sep=",", as.is=TRUE)
	}
	
	## check input data
	test = match(c("prekey", "seq", "freq"), colnames(genoInfoFrame))
	if (sum(is.na(test))>=1)
		stop("Input argument, genoInfoFrame, does not have the required columns, i.e., prekey, seq, and freq.") 
	
#  if(is.null(hapInfoFrame)){
#    warning("NULL value is provided for input argument, hapInfoFrame.")
#  }
	
	genoMap.info = genoInfoFrame[, match(c("prekey", "seq", "freq"), colnames(genoInfoFrame))]
	
	genoMap = dfToGenoMap(df = genoMap.info, dataHeaders = c("prekey", 
					"seq", "freq"), genotype = c("11", "12", "22"), snpBase = 1)
	
	
	if(is.null(hapInfoFrame)){
		#print("here")
		
		onlyGeno = genoInfoFrame
		## build the df first
		key = paste( onlyGeno[,1], onlyGeno[,2], sep="-")
		key = unique(key)
		
		allGenoBk = NULL
		for( i in 1:(length(key))){
			x.b = (i-1)*3+1
			x.e = i*3
			oneBk = geno2hapFreq(prekey=onlyGeno[x.b,1], block=onlyGeno[x.b,2],
					genoFreq = onlyGeno[x.b:x.e, 3], snpBase=1,
					snpSeq = i)
			allGenoBk = rbind(allGenoBk, oneBk[,c(1,4,5)])
		}
		colnames(allGenoBk)=c("key","haploytype","frequency")
		#print(allGenoBk)
		
		simuMap = bkMap.constr(data=allGenoBk, keyCol=1, hapLenCol=NULL, expCol=2, probCol=3)
		simuSetup = list(NULL)
		simuSetup = c(simuSetup, newDf=list(allGenoBk), simuMap=list(simuMap)) 
	}else{
		bkFrame = hapInfoFrame
		bkFrame$key = paste(bkFrame[, 1], bkFrame[, 2], sep = "-h")
		
		hapBkMap = dfToHapBkMap(data = bkFrame, keyCol = ncol(bkFrame), 
				chCol = 1, blockCol = 2, expCol = 3, probCol = 4, 
				hapLenCol = 5, beginCol = 6, endCol = 7, snpBase = 1, 
				re.bf = TRUE, re.javaGUI = TRUE)
		hapBkGenoMap = bindHapBkGenoMaps(hapBkMap = hapBkMap, genoMap = genoMap)
		simuSetup = hapBkGenoMap2HapMap(hapBkGenoMap=hapBkGenoMap, reSimuMap=TRUE)
	}
	return(simuSetup)
}

snpPREFileMatchTrio <-
function(txtF, sep=" ", header = FALSE, pedCol=1, memCol=2, affectCol=6, dadCol=3, momCol=4, txt.affect=2, logF = NULL){

      if(is.character(txtF)){
        ## by default the text file has no header 
        df = read.table(file=txtF, header = header, sep=sep)
      }else{
        df = txtF
      }
      filter = df[, affectCol]==txt.affect 
      caseDf = df[filter,]

      ## it is ok for the parent to be affected
      ##controlDf = df[!filter,]
      controlDf = df

      ## don't sort the case, keep the original order
      #caseDf = caseDf[order(caseDf[,pedCol], caseDf[,memCol]),]
      #controlDf = controlDf[order(controlDf[,pedCol], controlDf[,memCol]),]

      numCase = nrow(caseDf)

      trioDf = NULL
      for( i in 1:numCase){
        caseRow = caseDf[i,]
        
        tryCatch({
             trioDf = rbind(trioDf, trio.match.single(caseRow, controlDf, pedCol, memCol, dadCol, momCol))
           },
             warning = function(warn){
                  if(!is.null(logF)){
                    #print(warn)
                    logWarn(logF, as.character(warn))
                  }else{
                    print("Throw warnings by case")
                    print(warn)
                }
        }) ## tryCatch({
      }
      return(trioDf)
}

toolbox.load <-
function(freqMaps){

	## HARD CODE!!!HARD CODE assuming the genotype is never used in the genoMap
	genoMap = dfToGenoMap(df=freqMaps$genoMap.info, dataHeaders=c("prekey", "seq", "freq"),  genotype=c("11", "12", "22"), snpBase=1)
	
    ## need to build the overall map
    if(!is.null(freqMaps$hapMap.info)){
	  if(max(freqMaps$hapMap.info$hapLen>7)==1) stop("Cannot process haplotype block with 8 or more loci.")  
      bkFrame = freqMaps$hapMap.info
      bkFrame$key = paste(bkFrame[,1], bkFrame[,2], sep="-") 

      hapBkMap = dfToHapBkMap(data=bkFrame,  keyCol=ncol(bkFrame),
                   chCol=1, blockCol=2,  expCol=3, probCol=4, hapLenCol=5, beginCol=6, endCol=7, snpBase=1, re.bf = TRUE, re.javaGUI = TRUE)
      
	  hapBkGenoMap = bindHapBkGenoMaps(hapBkMap=hapBkMap, genoMap=genoMap)
		   
      hapBkMap = hapBkGenoMap$hapBkOnlyMap
	  changedKey =  hapBkMap$keys
		   
	  newHapBkMap = hapBkMap
	  for( i in changedKey){
			   newHapBkMap =  semiAugBkFrame(newHapBkMap, key=i, probLeftOver = .01)
	  }
	  semiAugHapBkGenoMap = hapBkGenoMap
	  semiAugHapBkGenoMap$hapBkOnlyMap = newHapBkMap
    }else{
	  hapBkGenoMap = bindHapBkGenoMaps(hapBkMap=NULL, genoMap=genoMap)
	  semiAugHapBkGenoMap = hapBkGenoMap
	}
   
    ## HARD CODE!!!HARD CODE
    maxSNP=7
    idx4hapDigitAll = exIdxFromHap(maxSNP)
    exhaustHapExpAll = exhaustHapExp(maxSNP, re.str=FALSE)[[1]]
    
    maxRow = 4000
    maxTbl = matrix(NA, ncol =6, nrow = maxRow)
    maxMateTbl = matrix(NA, ncol =2, nrow = maxRow)
    
    varPrefix = paste(sample(letters[1:26], size=5), sep="", collapse="")
    varOriginalName = c("semiAugHapBkGenoMapSZ", "idx4hapDigitAll", "exhaustHapExpAll", "maxTbl", "maxMateTbl")
    appVarNames = paste(varPrefix, varOriginalName, sep="_")
    names(appVarNames) = c("freqMap", "digit", "exp", "tbl", "mateTbl")
    appVarNames = as.list(appVarNames)
    
    #print(paste("Application wise global environment var with names as", paste(appVarNames, sep="", collapse=";")))
    
    lapply(1:length(appVarNames), FUN=function(item, varNames, varList){
              assign(varNames[[item]], varList[[item]], envir=baseenv()); return(NULL)},
           varNames=appVarNames,
           varList = list(semiAugHapBkGenoMap, idx4hapDigitAll, exhaustHapExpAll, maxTbl, maxMateTbl))
    return(appVarNames)
}

trio.impu <-
function(triodd, freq,  impu.missingOnly=TRUE){
  dir=NULL

  dig1Code=c(NA,0,1,2)


  ## check number of loci match with map or not
  if( (ncol(triodd)-2) != nrow(freq$genoMap.info)/3 )
    stop("The number of SNPs in the frequency file does not equal the number of SNPs in trio dataset.")

  
  #if( subInF!="PPC" ) stop("Subject in the data should follow Father, Mother and child sequence.")
  
  if(is.character(triodd) ) {
    data = read.csv(triodd, header=FALSE)
  }else{
    data = triodd
  }
  code.Factor = apply( data[,c(-1,-2)], 2, FUN=is.factor)
  if(max(code.Factor)==1) stop("Genotypes cannot be factors.")

  code.Factor = apply( data[,c(-1,-2)], 2, FUN=is.numeric)
  if(min(code.Factor)==0) stop("Genotypes must be numerical.")
  
  ## check the coding for trio
  code.Check = unique(unlist(data[,c(-1,-2)]))
  
  if( sum(is.na(code.Check))==0 ){
    if (impu.missingOnly) {
      print("No missing genotype. Return original data.")
      return(triodd[,-c(1,2)])
    }
  }

  code.Left = code.Check[!is.na(code.Check)]

  code.Match = match(code.Left, dig1Code[2:4])
  if( sum(!is.na(code.Match)) != length(code.Match)){
    stop("Genotypes are not coded as 0, 1, nor 2")
  }

  toolboxNames = toolbox.load(freqMaps=freq)
  
  if(!is.null(  get(toolboxNames$freqMap)$hapBkOnlyMap )){
	  bk.ssize =  get(toolboxNames$freqMap)$hapBkOnlyMap$bkSnpLens
	  if (max(bk.ssize>=8)==1) stop("At least one haplotype block has 8 or more SNPs in the block. Method fails.")	  
  }

  # inside the function, assume 0 1 3 2 for NA, homo, hetero, homo, and 0 1 2 for NA, allele1, allele2
  dig1Default = c(0, 1, 3, 2)
  ## if the given code for 1-digit is not (0 1, 3, 2), change it.

  if (is.na(dig1Code[1])){
    ttt = data[,c(-1,-2)]
    ttt[is.na(ttt)]=max(dig1Code, na.rm=TRUE)+1

    data[,c(-1,-2)]=ttt
    dig1Code[1]=max(dig1Code, na.rm=TRUE)+1
  }

  
  if( sum(dig1Default == dig1Code)!=4 ){
    data.geno = apply(data[, c(-1, -2)], 1:2, FUN= util.vec.replace, orignal = dig1Code, replaceBy=dig1Default)
    data = cbind(data[, 1:2], data.geno)
  }
  ##TODO!!! confirm the number
  #print(toolboxNames)
  
  bkCt = nrow(get(toolboxNames$freqMap)$genomeMarkerInfo)

  if(impu.missingOnly){

    ## get the hapPair without saving.
    imputed = impuBk.scheduler(raw=data, idx=1:bkCt, job=1,
                toolname=toolboxNames,
                freqMaps=NULL, dir=dir,
                is.1digit=TRUE, dig1Code=dig1Default, dig2Code=0:2,
                reType=FALSE, reHap=NULL, logF=NULL, logErr="")
    if( sum(dig1Default == dig1Code)!=4 ){
      imputed = apply(imputed, 1:2, FUN= util.vec.replace, orignal = dig1Default, replaceBy=dig1Code)
    }
     return(imputed)
  }

  #print("get impuBkTDT.scheduler")
  ## get the hapPair without saving.
  imputed = impuBkTDT.scheduler(raw=data, idx=1:bkCt, job=1,
                toolname=toolboxNames,
                freqMaps=NULL, dir=dir,
                is.1digit=TRUE, dig1Code=dig1Default, dig2Code=0:2,
                reType=FALSE, reHap=NULL, logF=NULL, logErr="")

  if( sum(dig1Default == dig1Code)!=4 ){
     imputed = apply(imputed, 1:2, FUN= util.vec.replace, orignal = dig1Default, replaceBy=dig1Code)
  }
  return(imputed)
 
}

trio.impuDev <-
function(trio1digit, freqMaps, bk.seq=NULL, dig1Code=0:3, subInF = "PPC", dir=NULL, job=1, prefix="", impu.missingOnly=TRUE){

  if( subInF!="PPC" ) stop("Subject in the data should follow Father, Mother and child sequence.")
  
  if(is.character(trio1digit) ) {
    data = read.csv(trio1digit, header=FALSE)
  }else{
    data = trio1digit
  }
  
  toolboxNames = toolbox.load(freqMaps=freqMaps)
  
  if(!is.null(  get(toolboxNames$freqMap)$hapBkOnlyMap )){
	  bk.ssize =  get(toolboxNames$freqMap)$hapBkOnlyMap$bkSnpLens
	  if (max(bk.ssize>=8)==1) stop("At least one haplotype block has 8 or more SNPs in the block. Method fails.")	  
  }  
  
  
  # inside the function, assume 0 1 3 2 for NA, homo, hetero, homo, and 0 1 2 for NA, allele1, allele2
  dig1Default = c(0, 1, 3, 2)
  ## if the given code for 1-digit is not (0 1, 3, 2), change it.

  if( sum(dig1Default == dig1Code)!=4 ){
    data.geno = apply(data[, c(-1, -2)], 1:2, FUN= util.vec.replace, orignal = dig1Code, replaceBy=dig1Default)
    data = cbind(data[, 1:2], data.geno)
    
  }
  ##TODO!!! confirm the number
  #print(toolboxNames)
  
  bkCt = nrow(get(toolboxNames$freqMap)$genomeMarkerInfo)

  #print(data[1:3, 1:10])
  if(is.null(bk.seq)) bk.seq = 1:bkCt
  if(impu.missingOnly){

    ## get the hapPair without saving.
    imputed = impuBk.scheduler(raw=data, idx=bk.seq, job=job,
                toolname=toolboxNames,
                freqMaps=NULL, dir=dir,
                is.1digit=TRUE, dig1Code=dig1Default, dig2Code=0:2,
                reType=FALSE,
                reHap=qp(prefix, "impuHap_bk", bk.seq[1]),
                logF=NULL,
                logErr=qp(prefix, "impuErr_bk", bk.seq[1])
      )
    if( sum(dig1Default == dig1Code)!=4 ){
      imputed = apply(imputed, 1:2, FUN= util.vec.replace, orignal = dig1Default, replaceBy=dig1Code)
    }
     return(imputed)
  }

  ## get the hapPair without saving.
  imputed = impuBkTDT.scheduler(raw=data, idx=bk.seq, job=job,
                toolname=toolboxNames,
                freqMaps=NULL, dir=dir,
                is.1digit=TRUE, dig1Code=dig1Default, dig2Code=0:2,
                reType=FALSE,
                reHap=qp(prefix, "impuHap_bk", bk.seq[1]),
                logF=NULL,
                logErr=qp(prefix, "impuErr_bk", bk.seq[1])
  )

  if( sum(dig1Default == dig1Code)!=4 ){
     imputed = apply(imputed, 1:2, FUN= util.vec.replace, orignal = dig1Default, replaceBy=dig1Code)
  }
  return(imputed)
 
}

trio.match.single <-
function(caseRow, controlDf, pedCol, memCol, dadCol, momCol){
   dadRow = which(controlDf[,pedCol]==unlist(caseRow[pedCol]) & controlDf[,memCol]==unlist(caseRow[dadCol]))
   momRow = which(controlDf[,pedCol]==unlist(caseRow[pedCol]) & controlDf[,memCol]==unlist(caseRow[momCol]))

   if( length(dadRow)!=1 ){
     warning(paste("\n The individual is affected, but we have no data on the father. Case information::\n",
                   paste(wrComTbl(caseRow[1:7], colNames=colnames(caseRow)[1:7]), collapse="\n"), sep=""))
     return(NULL)
   }
   if( length(momRow)!=1 ){
     warning(paste("\n The individual is affected, but we have no data on the mother. Case information::\n",
                   paste(wrComTbl(caseRow[1:7], colNames=colnames(caseRow)[1:7]), collapse="\n"), sep=""))
     return(NULL)
   }
  
   re = rbind(controlDf[dadRow,], controlDf[momRow,], caseRow)
   return(re)
 }

trio.simu.direct <-
function (bkMap, rule, caseNo, datasetCt=1, infoS="simuDirInfo", ddir=NULL, baseObj.saveFN=NULL, baseObj.name=NULL,  reControl=FALSE, dig1Code=0:3 ){

  info = bkMap.HRCB.LRCB.split(bkMap, rule)

  bkMapNS = info$bkMapNS

  varPrefix = paste(sample(letters[1:26], size=10), sep="", collapse="")
  varOriginalName = "stepstone"
  finalUse = paste(varPrefix, varOriginalName, sep="_")
  #print(qp("finalUse=", finalUse))

  if(is.null(baseObj.saveFN)){
    ## if the spStrata is not saved earlier, need to generate
    if(!is.null(baseObj.name)){
      ## then we can choose to save it
     
      print(qp("No stepstone object is saved. Choose to generate and save the object as:", baseObj.name))
      
      matingTbInfo = bkMap.HRCB.famMap(info$bkMapS, rule, newColName=info$newColName,  ifS=infoS, baseName=baseObj.name)

      #finalUse =  baseObj.name
      assign(finalUse, matingTbInfo, envir=baseenv())
      
    }else{
      ## or not save it, just used. Not recommend.
      print(qp("No stepstone object is saved. Choose to generate but not save the object."))
      matingTbInfo = bkMap.HRCB.famMap(info$bkMapS, rule, newColName=info$newColName,  ifS=infoS, baseName=NULL)

      #finalUse = matingTblInfo
      assign(finalUse, matingTbInfo, envir=baseenv())
    }
    
  }else{
    ## if the spStrata is saved earlier, just load the saved one
   
    if( is.character(baseObj.saveFN)){
      print(qp("Stepstone object is saved before as:", baseObj.saveFN, ". Do not need to generate it."))
      tmp = load(paste(baseObj.saveFN, ".RData", sep=""))
      assign(finalUse, get(tmp[1]), envir=baseenv())
      
    }else{
      print(qp("Stepstone object is given."))
      assign(finalUse, baseObj.saveFN, envir=baseenv())
    }
  }

  
  simuTrio = rep(list(NA), length=datasetCt)

  if(is.null(ddir) &  (datasetCt!=1)){
    print( "Request to generate multipe datasets, but do not give path for save. Will return as a list.")
  }

  for( i in 1:datasetCt){
      if(is.null(infoS)){
        trioData1 = HRCB.famMap.spTrio(caseNo=caseNo, matingTbName=finalUse,
          ifS =NULL , reControl=reControl)
      }else{
        trioData1 = HRCB.famMap.spTrio(caseNo=caseNo, matingTbName=finalUse,
          ifS = qp(infoS, "_", i) , reControl=reControl)
      }
      
      trioData2 = bkMap.LRCB.spTrio(bkMapNS, caseNo=caseNo, reControl=reControl)
      
      simuTrioData = trioMerge(trioData1, trioData2, colName1=info$newColName, colName2=info$noCausalColName)

      if( is.null(dim(simuTrioData))){
        snp1recode =  util.vec.replace(simuTrioData, orignal = c(0,1,3,2), replaceBy= dig1Code)
      }else{
        snp1recode =  apply(simuTrioData, 1:2, FUN= util.vec.replace, orignal = c(0,1,3,2), replaceBy=dig1Code)
      }

      if(is.null(ddir)){
        simuTrio[[i]]=snp1recode
      }else{
         if (ddir==""){
           save(snp1recode, file=qp("trioDirSimu_", i, ".RData"))
         }else{
           save(snp1recode, file=file.path(ddir, qp("trioDirSimu_", i, ".RData")))
         } 
      }
    }

  if(is.null(ddir)){
    return(simuTrio)
  }else{
    return(NULL)
  }
}

trio.simu.proposed <-
function(bkMap, rule, caseNo, datasetCt=1, infoS="simuPropInfo", exInfoS="exSimuPropInfo", ddir=NULL, startIdx=NULL, spStrata.saveFN = NULL,  spStrata.name=NULL, reControl =FALSE, dig1Code=0:3 ){

  if (is.null(ddir)) {
    infoS=NULL
    exInfoS = NULL
  }
  ifD = FALSE
  
  info = bkMap.HRCB.LRCB.split(bkMap, rule)

  bkMapNS = info$bkMapNS

  varPrefix = paste(sample(letters[1:26], size=10), sep="", collapse="")
  varOriginalName = "stepstone"
  finalUse = paste(varPrefix, varOriginalName, sep="_")
  if(ifD) print(qp("finalUse=", finalUse))

  if(is.null(spStrata.saveFN)){
    ## if the spStrata is not saved earlier, need to generate
    if(!is.null(spStrata.name)){
      ## then we can choose to save it
      spStrata = bkMap.HRCB.Esp1Rule.Base(bkMap, rule, baseName=spStrata.name)
      if(ifD) print(qp("No stepstone object is previously saved. Generate and save the object as:", spStrata.name, ".RData"))

      #finalUse = spStrata.name
      assign(finalUse, spStrata, envir=baseenv())
      
    }else{
      ## or not save it, just used. Not recommend.
      spStrata = bkMap.HRCB.Esp1Rule.Base(bkMap, rule, baseName=NULL)

      #finalUse = spStrata
      assign(finalUse, spStrata, envir=baseenv())
      #print(qp("No stepstone object is previously saved. Generate but not save the object."))
    }
    
  }else{
    ## if the spStrata is saved earlier, just load the saved one
    
    if( is.character(spStrata.saveFN)){
      if(ifD) print(qp("Stepstone object is previously saved as:", spStrata.saveFN, ".RData. Do not need to generate it."))

      tryCatch({tmp = load(paste(spStrata.saveFN, ".RData", sep=""))
               assign(finalUse, get(tmp[1]), envir=baseenv())  },
             error = function(e){
               stop(paste("Cannot open step-stone file '", spStrata.saveFN, ".RData'.", sep=""))
             })

    }else{
      #print(qp("Stepstone object is given."))
      #finalUse = spStrata.saveFN
      assign(finalUse, spStrata.saveFN, envir=baseenv())
    }
  }
  
  preObj = bkMap.HRCB.Esp1Rule.genoSeq(bkMap, rule, re.probOnly = FALSE)

  
  simuTrio = rep(list(NA), length=datasetCt)

  if(is.null(ddir) &  (datasetCt!=1))
    if(ifD) print( "Request to generate multiple datasets, and will return as a list.")

  for( i in 1:datasetCt){

        if(!is.null(infoS)){
          trioData1 = HRCB.Esp1Rule.spTrioOnBase(bkMap=NULL, preObj=preObj, spStrata=finalUse,
                                       rule=rule, caseNo,
                                       ifS = qp(infoS, "_", i) , reControl=reControl)
        }else{
          trioData1 = HRCB.Esp1Rule.spTrioOnBase(bkMap=NULL, preObj=preObj, spStrata=finalUse,
                                       rule=rule, caseNo,
                                       ifS = NULL , reControl=reControl)
        }
    #tryCatch({
        if(!is.null(exInfoS)){
          trioData2 = bkMap.LRCB.spTrio(bkMapNS, caseNo=caseNo, ifS = qp(exInfoS, "_", i), reControl=reControl)
        }else{
          trioData2 = bkMap.LRCB.spTrio(bkMapNS, caseNo=caseNo, ifS = NULL, reControl=reControl)
        }
     #}, warning=function(w){print(w)})
        
        simuTrioData = trioMerge(trioData1, trioData2, colName1=info$newColName, colName2=info$noCausalColName)

        ## get back to common coding scheme, 3 for heter
        if( is.null(dim(simuTrioData))){
          snp1recode =  util.vec.replace(simuTrioData, orignal = c(0,1,3,2), replaceBy= dig1Code)
        }else{
          snp1recode =  apply(simuTrioData, 1:2, FUN= util.vec.replace, orignal = c(0,1,3,2), replaceBy=dig1Code)
        }

        ## adding other stuff
        if(ifD) print("A")
        if(ifD) print(dim(snp1recode))
        famid = rep(1:caseNo, each=3)
        pid = rep(1:3, times=caseNo)
        trio.snp = cbind(famid, pid, snp1recode)
        colnames(trio.snp) = c("famid", "pid", paste("snp", 1:(ncol(snp1recode)), sep=""))
        
        if(is.null(ddir)){
          simuTrio[[i]]=trio.snp
        }else{

         if (ddir==""){
           if (is.null(startIdx)){
             save(trio.snp, file=qp("trioSimu_", i, ".RData"))
           }else{
             save(trio.snp, file=qp("trioSimu_", startIdx+i-1, ".RData"))
           }
         }else{
           if (is.null(startIdx)){
             save(trio.snp, file=file.path(ddir, qp("trioSimu_", i, ".RData")))
           }else{
             save(trio.snp, file=file.path(ddir, qp("trioSimu_", startIdx+i-1, ".RData")))
           }
         } 
       }

  }

  if(is.null(ddir)){
    return(simuTrio)
  }else{
    return(NULL)
  }

}

trio.simuDev <-
function(bkMap=NULL,  sigStr="g9=11 and g13=11", sigType, para=c(-1, .5),  caseNo=10, datasetCt=1,  dig1Code=0:3, ddF=NULL, startIdx=NULL, stepstone.saveFN=NULL, stepstone.name=NULL, spSupHap.namePrefix=NULL, verbose=TRUE, reControl=FALSE){

  print(paste("Try to simulated trio data: # datasets=", datasetCt, "; # trio =", caseNo, sep=""))


  if(!is.null(ddF)){
    if(ddF==""){
      print(paste("Simulated data file(s) and other result file(s) are saved under current working directory."))
      infoS.s = spSupHap.namePrefix
      exInfoS.s = paste(spSupHap.namePrefix, "LRCB", sep="")
    }else{
      print(paste("Create directory:", ddF, ", where simulated data file(s) and other result file(s) are saved.", sep=""))
      dir.create(path=ddF, showWarnings = TRUE)
      infoS.s = file.path(ddF, spSupHap.namePrefix)
      exInfoS.s = file.path(ddF, paste(spSupHap.namePrefix, "LRCB", sep=""))
    }

    if(is.null(spSupHap.namePrefix))  infoS.s = NULL
  }else{
    print(paste("Value for argument ddF is NULL. Function will return data as a list, ignoring input for argument, spSupHap.namePrefix"))
    infoS.s = NULL
    exInfoS.s = NULL
  }

  if(is.null(bkMap)){
    print(paste("Value for argument bkMap is NULL. Function will use package default object, simuBkMap."))
    data(simuBkMap, envir = environment())
    bkdata = get("simuBkMap")
    bkMap = bkMap.constr(data=bkdata, keyCol=1, hapLenCol=NULL, expCol=2, probCol=3, alleleCode=1:2)
  }

  #print("Info on data object for haplotype block frequencies:")
  #print(str(bkMap, max.level=1))

  ##  Dec08Change!!! allow D/R coding
  rule =signalRule.2strata.build(sigStr=sigStr, sigType=sigType, para=para)

  #print("Info on data object for risk factor, signalRule:")
  #print(rule$slist[[1]]$str)
  if(is.null(spSupHap.namePrefix)){
    infoS.s = NULL
    exInfoS.s = NULL
  }else{
    if(!is.null(ddF)){
      if(ddF==""){
        infoS.s = spSupHap.namePrefix
        exInfoS.s = paste(spSupHap.namePrefix, "LRCB", sep="")
      }else{
        infoS.s = file.path(ddF, spSupHap.namePrefix)
        exInfoS.s = file.path(ddF, paste(spSupHap.namePrefix, "LRCB", sep=""))
      }
    }else{
      infoS.s = NULL
      exInfoS.s = NULL
    }
  }

  ptm=proc.time()


  trioData = trio.simu.proposed(bkMap=bkMap,
                    rule=rule,
                    caseNo=caseNo,
                    datasetCt=datasetCt,
                    infoS=infoS.s,
                    exInfoS=exInfoS.s,
                    ddir=ddF,
                    startIdx=startIdx,
                    spStrata.saveFN = stepstone.saveFN,
                    spStrata.name = stepstone.name,
                    reControl =reControl,  dig1Code=dig1Code 
                    )
  
  if(verbose) print(paste("Time used to generate ", datasetCt,  " dataset(s).", sep=""))
  if(verbose) print(proc.time() - ptm)
  gc.e = gc()
 
  if(verbose) print("Info on memory usage:")
  if(verbose) print(gc.e)

  print(paste("Finished generating ", datasetCt,  " dataset(s).", sep=""))

  return(trioData)
}

trio.simuOLD <-
function(bkMap=NULL,  interaction="9R and 13R", alpha, beta, n=10, rep=1,  stepstone.saveFN=NULL, stepstone.name=NULL, verbose=TRUE){

  sigType="D/R"
  para = c(alpha, beta)
  dig1Code=c(4,0,1,2)
  ddF = NULL
  spSupHap.namePrefix=NULL
  alleleCode=1:2

  print(paste("Try to simulated trio data: # datasets=", rep, "; # trio =", n, sep=""))
  
  if(!is.null(ddF)){
    if(ddF==""){
      print(paste("Simulated data file(s) and other result file(s) are saved under current working directory:",  getwd()))
      infoS.s = spSupHap.namePrefix
    }else{
      print(paste("Create directory:", ddF, ", where simulated data file(s) and other result file(s) are saved.", sep=""))
      dir.create(path=ddF, showWarnings = TRUE)
      infoS.s = file.path(ddF, spSupHap.namePrefix)
    }

    if(is.null(spSupHap.namePrefix))  infoS.s = NULL
  }else{
    #print(paste("Value for argument ddF is NULL. Function will return data as a list, ignoring input for argument, spSupHap.namePrefix"))
    infoS.s = NULL
  }

  if(is.null(bkMap)){
    print(paste("Value for argument bkMap is NULL. Function will use package default object, simuBkMap."))
    data(simuBkMap, envir = environment())
    bkdata = get("simuBkMap")
    bkMap = bkMap.constr(data=bkdata, keyCol=1, hapLenCol=NULL, expCol=2, probCol=3, alleleCode=alleleCode)
  }

  #print("Info on data object for haplotype block frequencies:")
  #print(str(bkMap, max.level=1))
   ##  Dec08Change!!! allow D/R coding
  rule =signalRule.2strata.build(sigStr=interaction, sigType=sigType, para=para)

  #print("Info on data object for risk factor, signalRule:")
  #print(rule$slist[[1]]$str)

  ptm=proc.time()

  trioData = trio.simu.proposed(bkMap=bkMap,
                    rule=rule,
                    caseNo=n,
                    datasetCt=rep,
                    infoS=infoS.s,
                    exInfoS=NULL,
                    ddir=ddF,
                    spStrata.saveFN = stepstone.saveFN,
                    spStrata.name = stepstone.name,
                    reControl =FALSE,  dig1Code=dig1Code 
                    )
  
  if(verbose) print(paste("Time used to generate ", rep,  " dataset(s).", sep=""))
  if(verbose) print(proc.time() - ptm)
  gc.e = gc()
 
  if(verbose) print("Info on memory usage:")
  if(verbose) print(gc.e)

  print(paste("Finished generating ", rep,  " dataset(s).", sep=""))

  return(trioData)
}

trioFile.proc <-
function(data, key.prefix="", bk.sizes=NULL, dig2Code=0:2, dig1Code=c(0,1,3,2),
		action = c("missingReport", "Mendelian check", "freq estimate"), ... ){
	
	
	sep=""
	header =FALSE
	pedCol=1
	memCol=2
	snpIdxRange = c(3, ncol(data))
	is.1digit=TRUE
	
#   if(!is.null(txtF)){
#     if(is.character(txtF)){
#          ## by default the text file has no header 
#          #trioTwoDigit = read.csv(file=data, header = header, sep=sep)
#          stop("The argument, data, cannot be a string.")
#     }else{
#          trioTwoDigit = data
#     }
#   }
	
	trioTwoDigit = data
	
	snpStartLeftIndex=snpIdxRange[1]
	snpEndRightIndex =ncol(trioTwoDigit) - snpIdxRange[2] + 1
	
	
	if(!is.1digit){
		genos  = trioTwoDigit[, snpStartLeftIndex:(ncol(trioTwoDigit)-snpEndRightIndex+1)]
		## check input, if is.1digit=TRUE, dig2Code must be c(0,1,2), if is.1digit=FALSE, dig1Code must be c(NA, 0,1,2,)
		code.Factor = apply( genos, 2, FUN=is.factor)
		if(max(code.Factor)==1) stop("Genotypes cannot be factors.")
		
		code.Factor = apply( genos, 2, FUN=is.numeric)
		if(min(code.Factor)==0) stop("Genotypes must be numerical.")
		
		code.Check = sort(unique(unlist(genos)))
		
		if( min(code.Check==dig2Code)==0 ){
			stop("Trio data is in 2-digit coding, but alleles are not represented by 0, 1, and 2.")
		}
		
		
		snpNum = ncol(genos)/2
		snp1digit = exchangeDigit(ma=genos,
				cols=c(1,snpNum*2), dig1Code=dig1Code, dig2Code =dig2Code, action=c("2to1"))
		
	}else{
		genos  = trioTwoDigit[, snpStartLeftIndex:(ncol(trioTwoDigit)-snpEndRightIndex+1)]
		
		## check input, if is.1digit=TRUE, dig2Code must be c(0,1,2), if is.1digit=FALSE, dig1Code must be c(NA, 0,1,2,)
		code.Factor = apply( genos, 2, FUN=is.factor)
		if(max(code.Factor)==1) stop("Genotypes cannot be factors.")
		
		code.Factor = apply( genos, 2, FUN=is.numeric)
		if(min(code.Factor)==0) stop("Genotypes must be numerical.")
		
		## check the coding for trio
		code.Check = unique(unlist(genos))
		
		code.Left = code.Check[!is.na(code.Check)]
		
		code.Match = match(code.Left, dig1Code[2:4])
		if( sum(!is.na(code.Match)) != length(code.Match)){
			stop("Non-missing genotypes are not coded as 0, 1, and 2")
		}
		
		snpNum = ncol(genos)
		snp1digit = genos
		## change to inside Qing's coding scheme, now assume the input code is c(NA, 0,1,2), no change, and no checking
		
#      if(min(dig1Code==c(0,1,2,3))==0){
#        snp1digit = apply(snp1digit, 1:2,  FUN= util.vec.replace, orignal = dig1Code, replaceBy=c(0,1,2,3))
		
	}
	
	snp1digit.inside = apply(snp1digit, 1:2,  FUN= util.vec.replace, orignal = dig1Code,
			replaceBy=c(0,1,3,2))
	
	re = list()
	
#   if(is.element("formTrio1digit", action)){
#     trio1digit = cbind( trioTwoDigit[, c(pedCol, memCol)], snp1digit)
#     re = c(re, trio1digit=list(trio1digit))
#   }
	
	if(is.element("missingReport", action)){
		## check necessary parameters
		if( is.element(snpStartLeftIndex, c(pedCol, memCol))){
			warning("Cannot report missing information. The argument, snpStartLeftIndex, is one of the special column for the trio file.")
		}else{
			
			if(is.1digit){
				#print(here)
				#print(snp1digit.inside)
				#print(snpStartLeftIndex)
				#print(snpEndRightIddex)
				missSNPPos = findMissing(df=snp1digit.inside, is.1digit=TRUE, snpStartLeftIndex=snpStartLeftIndex-2,
						snpEndRightIndex=snpEndRightIndex, dig1Code=c(0,1,3,2), dig2Code=dig2Code)
			}else{
				missSNPPos = findMissing(df=trioTwoDigit, is.1digit=FALSE, snpStartLeftIndex=snpStartLeftIndex,
						snpEndRightIndex=snpEndRightIndex, dig1Code=NULL, dig2Code=dig2Code )
			}
			
			if(is.null(missSNPPos)){
				
				re = c(re, missIdx = list(NULL))
			}else{
				re = c(re, missIdx = list(missSNPPos))
			}
		}
	}
	
	if(is.element("Mendelian check", action)){
		tmpDigit=2
		if(is.1digit) tmpDigit = 1
		snpTrio = matrix(snp1digit.inside, nrow=3, byrow=FALSE)
		
		MedErr = matrix(NA, ncol=4, nrow=ncol(snpTrio))
		colnames(MedErr)=c("y", "x", "trio", "SNP")
		tryCatchEnv = new.env(parent=baseenv())
		assign("MedErr.ct", 0, envir=tryCatchEnv)	
		assign("MedErr", MedErr, envir=tryCatchEnv)

		trioCt = nrow(snp1digit)/3
		
		#print(str(snpTrio))
		for ( i in 1:ncol(snpTrio)){
			tryCatch({
						tt = checkMendelianError(codedSNPTrio=snpTrio[,i], snpCoding=c(0,1,2,3))
					}, error = function(e){
						a = get("MedErr.ct", envir=tryCatchEnv)
						a = a+1
						assign("MedErr.ct", a, envir=tryCatchEnv)
						tttx = ceiling(i/trioCt)
						ttty = i%%trioCt
						if(ttty==0) ttty = trioCt
						b = get("MedErr", envir=tryCatchEnv)
						b[a,] = c( (ttty-1)*3+1,  (tttx-1)*tmpDigit+1+snpStartLeftIndex-1,  ttty,    tttx)
						assign("MedErr", b, envir=tryCatchEnv)
						#print( MedErr[MedErr.ct,,drop=FALSE] )
					})          
		}
		MedErr.ct = get("MedErr.ct", envir=tryCatchEnv)
		MedErr = get("MedErr", envir=tryCatchEnv)
				
		if(MedErr.ct==0) {
			MedErr=NULL
			#print("No Mendelian error.")
		}else{
			#print("Found Mendelian error(s).")
		}
		re = c(re, MedErr=list(MedErr[1:MedErr.ct,,drop=FALSE]))
		
	}
	
	if(is.element("freq estimate", action)){
		## check necessary parameters
		if(is.null(bk.sizes)) warning("Cannot provide frequencies estimation. The argument, bk.sizes, is missing.")
		
		if( is.element(snpStartLeftIndex, c(pedCol, memCol))){
			warning("Cannot provide frequencies estimation. The argument, snpStartLeftIndex, is one of the special column for linkage file.")
		}else{
			
			
			tmp = ncol(trioTwoDigit)
			parTwoDigit = getBackParentGeno(trioDf=trioTwoDigit, famCol=pedCol, memCol=memCol,
					snpIdx=snpStartLeftIndex:(tmp-snpEndRightIndex+1), re.child=FALSE, prefix=NULL)
			#print(dim(parTwoDigit))
			
			if( is.null(bk.sizes) ){
				warning("Cannot provide frequencies estimation. The argument, bk.sizes, is not provided for user-specify option.")
			}else{
				
				map=freqmap.reconstruct(data=parTwoDigit, cols=c(3, ncol(parTwoDigit)), loci.ct=bk.sizes, is.1digit=is.1digit,
						dig1Code=dig1Code, dig2Code = dig2Code, key.prefix=key.prefix, start.base=1, ...)
				
				re = c(re, freq=list(map))
			}
			
		} ##if( is.element(snpStartLeftIndex, c(pedCol, memCol, affectCol, dadCol, momCol))){
		
	}
	
	return(re)
	
}

trioMerge <-
function(trioData1, trioData2, colName1, colName2,snpCoding=0:3){

  ifD = FALSE

  # print("trioMerge")
  # print(colName1)
  # print(colName2)
  # 
  # print(str(trioData1))
  # print(str(trioData2))
  # 
  # print(trioData1[1:6, 1:5])
  # print(trioData2[1:6, 1:5])

  if( sum(snpCoding == (0:3))!=4)
    stop ("Function use 1-digit coding for genotypes as the following, integer 1 to 3 to
                    represent the three genotypes: less common homozygous, common homozygous, and heterzygous.
                     And zero for missing value")

  trioData = cbind(trioData1, trioData2)

  ## note this is 1-digit coding.
  colName1.first = colName1[ seq.int(from=1, to=length(colName1), by=2) ]
  colName2.first = colName2[ seq.int(from=1, to=length(colName2), by=2) ]

  
  trioCol = c(colName1.first, colName2.first)

  id.all = 1:(length(trioCol)/2)
  
  ori.col = paste("v", id.all, "a", sep="")
  #print(ori.col)

  shuffle.order = match(ori.col, trioCol)

  #print( cbind(trioCol, ori.col, shuffle.order))

  trioData.shuffled = trioData[, shuffle.order]

  #print(shuffle.order)
  #print(colnames(trioData.shuffled))

  if(ifD) print(str(trioData.shuffled))
  
  return(trioData.shuffled)

}

txtToGenoMap <-
function(txtF, delim=",", dataHeaders=NULL, genotype=c("11", "12", "22"), snpBase=1){
	##txtF = "tblBlock.csv"
	##dataHeaders = NULL
	
	if(is.null(dataHeaders)){
		## assume that txtF has the first row as column names
		data = read.csv(file=txtF, header = TRUE, sep=delim, as.is=TRUE)
	}else{
		## assume that column names of the data is passed from outside
		data = read.csv(file=txtF, header = FALSE, sep=delim, as.is=TRUE)
		colnames(data) = dataHeaders
	}
	
	genoMap = dfToGenoMap(df=data, dataHeaders=dataHeaders,  genotype=genotype, snpBase=snpBase)
	
	return(genoMap)
	
}

txtToHapBkMap <-
function(txtF, delim=",", dataHeaders=NULL, sorted=FALSE, ...){
	##txtF = "tblBlock.csv"
	##dataHeaders = NULL
	
	if(is.null(dataHeaders)){
		## assume that txtF has the first row as column names
		data = read.csv(file=txtF, header = TRUE, sep=delim, na="missing")
	}else{
		## assume that column names of the data is passed from outside
		data = read.csv(file=txtF, header = FALSE, sep=delim, na="missing")
		colnames(data) = dataHeaders
	}
	
	m = match(c("ch", "block", "hap", "freq", "hapLen","markers_b", "markers_e"), 
			colnames(data), 0)
	
	## assume except for markers_b and marekers_e, other variable should be presented
	if(min(m[1:5])==0) stop("One or more required variable(s) missing")
	
	## cast the data into the right format
	for ( i in m ){
		if(class(data[,i])=="factor") data[,i]=as.numeric(as.character(data[,i]))
	}
	
	## create key as a combination of chromosome and block
	key = paste(data[,m[1]], data[,m[2]], sep="-")
	
	
	df = cbind(key, data[,m])
	
	if(!sorted){
		df = df[ order(df$ch, df$block),]
	}
	
	hapBkMap = NULL
	if(m[6]==0){
		hapBkMap = dfToHapBkMap(df, keyCol=1, chCol=2, blockCol=3,
				expCol=4, probCol=5, hapLenCol=6, ...)
	}else{
		hapBkMap = dfToHapBkMap(df, keyCol=1, chCol=2, blockCol=3,
				expCol=4, probCol=5, hapLenCol=6,
				beginCol=7, endCol=8, ...)
	}
	return(hapBkMap)             
}

util.array.rmEmptyStr <-
function(vec){
  lens= nchar(vec)
  re = vec[lens>=1]
  return(re)
}

util.array3d.2matrix <-
function(arr, dimLevel=dimnames(arr)[[3]], showDim3 = FALSE, re.num=TRUE, appAsRow = FALSE){
  re = NULL   
  for( i in 1:length(dimLevel)){
    ma = arr[,,i]
    if(showDim3){
      if(re.num){
        ma = cbind(ma, rep(as.numeric(dimLevel[i]), nrow(ma)))
      }else{
        mat = data.frame(ma)
        dimnames(mat)=dimnames(ma)
        ma = cbind(mat, rep(dimLevel[i], nrow(ma)))
      }
    }
    if(appAsRow){
      re = rbind(re, ma)
    }else{
      re = cbind(re, ma)
    }
  }
  return(re)
}

util.char.1stIdx <-
function(str, find, match=TRUE, is.array=FALSE){
   
   if(!is.array){
     charArray = util.str.2CharArray(str, len=nchar(str))
   }else{
     charArray=str
   }
   if(match){
     pos = which(charArray==find)
   }else{
     pos = which(charArray!=find)

     #print(pos)
   }
   if( length(pos)>=1){
     if(match){
       return(pos[1])
     }else{
       return(pos[1]-1)
     }
   }else{
     return (0)
   }
}

util.char.1stStrIdx <-
function(str, find){

   result = qstrsplit(str, find, re.1st=TRUE)
   if(length(result)==1){
     # NA==not found it
     if(is.na(result)) return(0)
     # NULL==contain find only
     if(is.null(result)) return(1)
   }

   return(nchar(result[1])+1)
}

util.dataframe.findColNo <-
function(data, colNameSearched){
  colNms = colnames(data)
  matchIdx = match(colNameSearched, colNms)
  return(matchIdx)
}

util.dataframe.merge <-
function(list, vertical=TRUE){

  len = length(list)
  data = NULL
  for( i in 1:len){
    cur = list[[i]]
    if(vertical){
      data = rbind(data, cur)
    }else{
      data = cbind(data, cur)
    }
  }
  return(data)
}

util.dataframe.round <-
function(vecData, keepZero=FALSE, na.replace=NULL, digits=getOption("digits"), ...){

  re = sapply(vecData, FUN=function(x, na.rep, digits, ...){
                                num = x
                                if(is.numeric(num)){

                                    if(is.na(num) & (!is.null(na.rep))){
                                      num = na.replace
                                    }else if(is.na(num) & is.null(na.rep)){
                                      num = NA
                                    }else{
                                      if(keepZero){
                                        if(digits==0){
                                          num = format(num, nsmall=0)
                                        }else{
                                          num = format(num, digits=digits, nsmall=digits)
                                        }
                                      }else{
                                        num = round(num, digits=digits, ...)
                                      }
                                    }
                                                                 
                                }else{
                                  if(num=="NA" & is.null(na.rep)){
                                    num = as.character(num)
                                  }else if(num=="NA" & (!is.null(na.rep))){
                                    num = na.rep
                                  }else{
                                    num = num
                                  }
                                }
                                  
                              }, na.rep = na.replace, digits=digits, ...)
  return(re)

}

util.findBrace <-
function(str){

  #left ==0, right=1
  brace=-1
  idx=0
  
  arr = util.str.2CharArray(str, len=nchar(str))
  l.find =util.char.1stIdx(arr, "(", match=TRUE, is.array=TRUE)
  r.find =util.char.1stIdx(arr, ")", match=TRUE, is.array=TRUE)

  if(l.find==0){
    if(r.find!=0)
      return (list(brace=1, idx=r.find))
    else
      return(NULL)
  }
  if(r.find==0){
    if(l.find!=0)
      return (list(brace=0, idx=l.find))
    else
      return(NULL)
  }
  if(l.find<r.find) return (list(brace=0, idx=l.find))
  if(l.find>r.find) return (list(brace=1, idx=r.find))
  
}

util.it.smallLargeIdx <-
function(len, keep.same=FALSE){

      if(keep.same){
               init = 1:len
                      idx.ma = matrix(NA, ncol=2 , nrow=(len^2-len)/2+len )
                      idx.init = unlist(lapply(init, FUN=function(i, ct){
                                 rep(i, times=ct-i+1)}, ct=len))
                      idx.next = unlist(lapply(init, FUN=function(i, ct){
                                 i:ct
                               }, ct=len))
                      idx.ma[,1]=idx.init
                      idx.ma[,2]=idx.next

             }else{
                      init=1:(len-1)
                      #print("util.it.smallLargeIdx2")
                      #print(init)
                      #print(len)
                             idx.ma = matrix(NA, ncol=2 , nrow=(len^2-len)/2 )
                             idx.init = unlist(lapply(init, FUN=function(i, ct){
                                        rep(i, times=ct-i)}, ct=len))
                             idx.next = unlist(lapply(init, FUN=function(i, ct){
                                        (i+1):ct
                                      }, ct=len))
                             idx.ma[,1]=idx.init
                             idx.ma[,2]=idx.next
                    }

          return(idx.ma)
    }

util.it.smallLargeIdxOOLD <-
function(idx, keep.same=FALSE){
    ct = length(idx)
    idxPair=NULL
    if(keep.same){
     for ( i in 1:ct ){
       for ( j in 1:ct){
         #if(i<j) IdxPair = rbind(hapIdxCorner, c(hap1=i, hap2=j))
         if(i<=j) idxPair = rbind(idxPair, c(idx1=i, idx2=j))
       }
     }
    return(idxPair)
   }else{
     for ( i in 1:ct ){
       for ( j in 1:ct){
         #if(i<j) IdxPair = rbind(hapIdxCorner, c(hap1=i, hap2=j))
         if(i<j) idxPair = rbind(idxPair, c(idx1=i, idx2=j))
       }
     }
     return(idxPair)
   }
}

util.it.triMatch <-
function(idxPair, len){
  ## rely on the structure of tri idx pair
  ## rely on the order to the pair
  if(idxPair[1]-idxPair[2]==0){
     ## two idx are the same
     endIdx = (len^2-len)/2
              rowIdx = endIdx + idxPair[1]
   }else{
              # two idx are not the same, old approach
              #idxLen = c(0, (len-1):1)
              #idxLen.upper = idxLen[1:idxPair[1]]
              #idx.added = idxPair[2]-idxPair[1]
              #rowIdx = sum(idxLen.upper)+idx.added

              # new math approach
              idx.added = idxPair[2]-idxPair[1]
              rowIdx = len/2*(len-1)-(1+len-idxPair[1])/2*(len-idxPair[1]) + idx.added
   }
   return(rowIdx)
}

util.it.triMatch2 <-
function(dipIdx, len, re.homo=FALSE){
       ## rely on the structure of tri idx pair
       ## rely on the order to the pair

       upper = .5*len*(len-1)
       if ((dipIdx)>upper){
         if(re.homo){
           return(c(rep(dipIdx-upper, 2), 0))
         }else{
           return(rep(dipIdx-upper, 2))
         }
       }

       cutoff = (len-1):1
       cutoff = cumsum(cutoff)

       block = qing.cut(dipIdx, cutPt=cutoff, cutPt.ordered = TRUE, right.include=TRUE)
       if(re.homo){
         return(c(block, dipIdx - c(0, cutoff)[block] + block, 1))
       }else{
         return(c(block, dipIdx - c(0, cutoff)[block] + block))
       }
#       if (block==1){
#         return( c(block, dipIdx+1))
#       }else{
#         return( c(block, dipIdx - cutoff[block-1] + block))
#       }

}

util.it.upTriCombIdx <-
function(idx, diag=TRUE, re.ordered = FALSE){
     ct = length(idx)
     reRow = (ct^2 - ct)/2

     if(diag){
       reRow = reRow + ct
       re = matrix(NA, ncol=2, nrow=reRow)
       if(re.ordered){
          it = 0
          for ( i in 1:ct ){
            for ( j in i:ct){
                it = it + 1
                re[it,] = range(c(idx[i], idx[j]))
            }
          }
       }else{
          it = 0
          for ( i in 1:ct ){
            for ( j in i:ct){
                it = it + 1
                re[it,] = c(idx[i], idx[j])
            }
          }
       }
       colnames(re) = c("id1", "id2")
       return(re)
     }

     re = matrix(NA, ncol=2, nrow=reRow)
     if(re.ordered){
        it = 0
        for ( i in 1:ct ){
          j = i + 1 
          while ( j <= ct){
              ## print(i)
              ## print(j)
              it = it + 1
              re[it,] = range(c(idx[i], idx[j]))
              j = j + 1
          }
        }
     }else{
        it = 0
        for ( i in 1:ct ){
          j = i + 1
          while( j <= ct){
              it = it + 1
              re[it,] = c(idx[i], idx[j])
              j = j + 1
          }
        }
     }
     colnames(re) = c("id1", "id2")
     return(re)

}

util.list.2matrix <-
function(list, byRow = TRUE, add.dimnames=FALSE) 
{
    colCt = length(list)
    rowCt = length(list[[1]])
    re = matrix(unlist(list), ncol = colCt, nrow = rowCt, byrow = FALSE)
    if(add.dimnames){
      header = dimnames( list[[1]] )[[1]]
      if(is.null(header)) header = paste("v", 1:colCt, sep="")
      rownames(re)=header
    }
    if (byRow) 
        re = t(re)
    return(re)
}

util.list.ex <-
function(nameList, varList, na.allow = TRUE, na.replace = NULL){
  ifD = FALSE

  varNames = names(varList)
  varLen = length(varList)
  exLen = length(nameList)

  if(varLen!=exLen) warning(paste("Two lists are of different length. nameList of length(", exLen, "); varList of length(", varLen,").", sep="") )
  ## create a map with key = var name, item = var position 
  varMap = data.frame(item = seq.int(from=1, to=varLen, by=1), key = varNames)

  namePos = rep(0, length=exLen)
  
  tryCatchEnv = new.env(parent=baseenv())
  assign("namePos", namePos, envir=tryCatchEnv)
  
  ## based on this map, find the right sequence of positions to extract values
  for ( i in 1:exLen){
    tryCatch({namePos[i] = varMap$item[varMap$key == nameList[i]]},
             error = function(e){
               if(!na.allow){
                 stop(paste("var with name=[", nameList[i], "] not found in the varList.", sep=""))
               }else{
				 a = get("namePos", envir=tryCatchEnv)
				 a[i]=0
				 assign("namePos", a, envir=tryCatchEnv)
                 #namePos[i] <<- 0
               }
             })

  }

  ## assign default value
  varEx = rep(NA, length=exLen)
  if(!is.null(na.replace)){
    varEx = rep(na.replace, length=exLen)
  }
  
  for ( i in 1:exLen ){
    if(namePos[i]!=0){
      varEx[i]= unlist(varList[namePos[i]])
      if(ifD) print(paste("i=(", i, ") namePos=(", namePos[i], ")."))
    }
  }
    
  return (varEx)
}

util.list.rmByKeyVal <-
function(dataList, keys, keyRemoved){
    seqOrder = is.element(keys, keyRemoved)
    return(dataList[!seqOrder])
}

util.listMatrix.2matrix <-
function(list, rbind=TRUE){

  rowCt = length(list)

  re = NULL
  if(rbind){
      for ( i in 1:rowCt ){
         re = rbind(re, list[[i]])
      }
  }else{
      for ( i in 1:rowCt ){
        re = cbind(re, list[[i]])
      }
  }

  return(re)
}

util.matrix.2list <-
function(ma, byRow=TRUE){

  if(byRow){
    colNum = dim(ma)[2]
    rowNum = dim(ma)[1]
    ma = t(ma)
  }else{
    colNum = dim(ma)[1]
    rowNum = dim(ma)[2]
  }

  maItem = unlist(as.list(ma))
  re = NULL
  for(row in 1:rowNum){
    cur = maItem[seq.int(from=(row-1)*colNum+1, to=row*colNum, by=1)]
    re = c(re, list(cur))
  }
  return(re)
}

util.matrix.cat <-
function(data, cols, sep=""){
  len = length(cols)
  for(i in 1:len){
    if(i==1) {
      re = data[,cols[i]]
    }else{
      re = paste(re, data[,cols[i]], sep=sep)
    }
  }
  return(re)
}

util.matrix.catm <-
function(inMatrix, colVec, discIn=NULL, discVec=NULL, delimVec, digitVec, missingVec=NULL){

     anyMatrix = inMatrix[,colVec]
     mRow = dim(anyMatrix)[1]
     mCol = dim(anyMatrix)[2]
     
     re = matrix(ncol =2, nrow = mRow)

     if(!is.null(discIn)){
       re[,1]= inMatrix[,discIn]
     }else{
       re[,1]=discVec
     }
     for(row in 1:mRow) {
       curRow = NULL
       for(col in 1:mCol){
         num = anyMatrix[row, col]
         if(is.na(num)){
           if(is.null(missingVec)){
             num = "NA"
           }else{
             num = missingVec[col]
           } ## if(is.null(missingVec)){           
         }else{
           ##num = round(num, digitVec[col])
           if(digitVec[col]==0){
             num=format(num, nsmall=0)
           }else{
             num = format(num, digits=digitVec[col], nsmall=digitVec[col])
           }
         } ##if(is.na(num)){
         
         if(col == 1){
           curRow = paste(delimVec[(0+col)], num, sep="")
         }else{
           if(col == mCol){
             curRow = paste(curRow, delimVec[col], num, delimVec[1+col], sep="")
           }else{
             curRow = paste(curRow, delimVec[col], num, sep="")
           } ##if(col = mCol){
         } ## if(col = 1){
         
       } ##for(col in 1:mCol){
       re[row,2] = curRow
     } ##for(row in 1:mRow) {
     return(re)
}

util.matrix.catSave <-
function(data, cols, sep ="-"){
  colNum = dim(data)[2]
  keys = util.matrix.cat(data, cols, sep)
  data[,colNum+1]=keys
  return(data)
}

util.matrix.clone <-
function(ma, n, rowAppend=TRUE){
  rnm = dim(ma)[1]
  cnm = dim(ma)[2]
  if(rowAppend){
    t1 = replicate(n, t(ma))
    re = t(matrix(t1, nrow=cnm, byrow=FALSE))
    re
  }else{
    t1 = replicate(n, ma)
    re = matrix(t1, nrow=rnm, byrow=FALSE)
    re
  }
  return(re)
}

util.matrix.col.shuffle <-
function(ma){

  if(!is.null(dim(ma)))  return(ma)

  colNum = dim(ma)[2]
 
  filterSeq = matrix(1:colNum, ncol=2)
  filterSeq = as.vector(t(filterSeq))

  re = ma[,filterSeq]

  return(re)
}

util.matrix.col.shuffle2 <-
function(ma1, ma2){

  re = cbind(ma1, ma2)

  colNum = 1
  if(!is.null(dim(ma1))){
    colNum = dim(ma1)[2]
  }
  filterSeq = matrix(1:(2*colNum), ncol=2)
  filterSeq = as.vector(t(filterSeq))

  re = re[,filterSeq]

  return(re)
}

util.matrix.colComp <-
function(ma, values, operator="and"){
  ifD = FALSE
  vCt = length(values)

  ## if "and" operator is chosen, the original list will decrease to speed up the comparison
  matchId = 1:(dim(ma)[1])
  if(ifD) print(matchId)
  for (i in 1:vCt ){
    if(ifD) print(i)
    compIdx = which(ma[matchId,i]==values[i])
    matchId = matchId[compIdx]
    if(ifD) print(paste("compIdx=", compIdx, collapse=", ", sep=""))
    if(ifD) print(paste("matchId=", matchId, collapse=", ", sep=""))
    
    if(length(matchId)==0) return (NULL)
  }
  return(matchId)

  ## if "or" operator is chose, the original list will remain the same to capture all that meet it.
  ## not implemented
}

util.matrix.colIdx4Match <-
function(ma, val){

  rowCt = nrow(ma)

  #print("util.matrix.colIdx4Match")
  #print(dim(ma))

  matchIdx = qing.mulMatch(val, ma)
  ##matchIdx = which (ma == val)
  
  if(length(matchIdx)>0 & matchIdx[1]!=0){
    
    re = qing.cut(matchIdx,
                  cutPt = seq.int(from=rowCt, to=rowCt*ncol(ma), by=rowCt), 
                  cutPt.ordered = TRUE, right.include=TRUE)

    ##  remove because it cause memeory problems
    ##  as.numeric(cut(matchIdx, breaks = c(0, seq(rowCt, rowCt*ncol(ma), by=rowCt)), include.lowest=TRUE, right=TRUE))
    
  }else{
    
    re =  NULL
  }

  return(re)
}

util.matrix.csvText <-
function(numericMa, fileName=NULL, colNames=NULL, rowNames=NULL, digit = NULL,  keepZero=FALSE){
     mRow = dim(numericMa)[1]
     mCol = dim(numericMa)[2]
     i = 0

     lineComment = ""
     
     comm = vector( )

            if(!is.null(colNames)){
              if(!is.null(rowNames))     lineComment = " \t"
              for( col in 1:mCol ){
                lineComment = paste(lineComment, colNames[col], sep="\t")  
              }
              i = i+1
              comm[i] = lineComment
            }     

     lineComment = ""
     
     for( row in 1:mRow ){
       i = i+1
          for( col in 1:mCol ){
          
                if(col == 1){
     
                  if(!is.null(rowNames)){
                        lineComment = paste(lineComment, rowNames[row], sep="\t")  
                      }
                  }
                  tmp = numericMa[row,col]
                  if(is.numeric(tmp) & (!is.null(digit))){
                    if(!keepZero){
                      if(digit==0){
                          num = format(tmp, nsmall=0)
                      }else{
                          num = format(tmp, digits=digit, nsmall=digit)
                      }
                      lineComment = paste(lineComment, num, sep="\t")
                    }else{
                      lineComment = paste(lineComment, round(tmp, digit), sep="\t")
                    }
                  }else{
                    lineComment = paste(lineComment, tmp, sep="\t") 
                  }
     
            }  ## for( col in 1:mCol ){
          comm[i] = lineComment
          lineComment = ""
     }  ## for( row in 1:mRow ){

     if(!is.null(fileName)) write(comm, file = fileName,  append = TRUE)
     return(comm)
}

util.matrix.delCol <-
function(ma, colPos){
	rnm = dim(ma)[1]
	cnm = dim(ma)[2]
	cutPt = (colPos-1)*rnm
	if(colPos==cnm){
	    re = matrix(ma[1:cutPt], nrow = rnm, ncol=(cnm-1))
	    return(re)
	}
        if(colPos == 1){
            re = matrix(ma[(rnm+1):(rnm*cnm)], nrow = rnm, ncol=(cnm-1))
	    return(re)
        }else{	
	    re = c(ma[1:cutPt], ma[(cutPt+rnm+1):(rnm*cnm)])
	    re = matrix(re, nrow = rnm, ncol=(cnm-1))									
	    return(re)
	}																			
}

util.matrix.exByKeyInRow <-
function(ma, keyCol, keys){

    maNew = ma[ is.element( ma[,keyCol], keys), ]

    return(maNew)
}

util.matrix.insertCol <-
function(ma, insertVec, afterCol){
	rnm = dim(ma)[1]
	cnm = dim(ma)[2]
	startnm = afterCol*rnm+1
        re = NULL
	if(afterCol==cnm){
	    re = matrix(c(ma, insertVec), ncol=cnm+1)
	}else{	
	    re = c(ma[1:startnm-1], insertVec, ma[startnm:(rnm*cnm)])
	    re = matrix(re, nrow = rnm, ncol=cnm+1)									
	}
        return(re)
      }

util.matrix.merge <-
function(ma, ma2){
  maVec = as.vector(ma)
  maVec2 = as.vector(ma2)
  re = c(maVec, maVec2)
  renames = NULL
  if(is.matrix(ma)) {
    maLen = dim(ma)[2]
    renames = colnames(ma)
  }else{
    maLen = 1
    if(is.null(names(ma))){
      renames = ""
    }else{
      renames = names(ma)
    }
  }
  

  if(is.matrix(ma2)) {
    maLen2 = dim(ma2)[2]
    renames = c(renames, colnames(ma2))

  }else{
    maLen2 = 1
    if(is.null(names(ma2))){
      renames = c(renames, "")
    }else{
      renames= c(renames, names(ma2))
    }
  }    
  if(F)print(renames)
  colNum = maLen + maLen2
  if(F)print(colNum)
  re = matrix(re, ncol=colNum)
  colnames(re) = renames
  
  return(re)
}

util.matrix.rmSparseRow <-
function(ma, colChecked, minNumInCol=1){

  ifD = FALSE
  checked = ma[,colChecked]
  if(ifD) print(checked)

  len = length(checked)
  rmList = NULL
  reList = NULL
  for( i in 1:len ){
    if(ifD) print(checked[i])
    if( checked[i] <= minNumInCol ){
      rmList = c(rmList, i)
    }else{
      reList = c(reList, i)

    }
  }
  if(ifD) print(rmList)
  if(is.null(rmList)) return(NULL)
  len = length(rmList)
  re = ma
    for( j in 1:len ){
      re = t(util.matrix.delCol (t(re), rmList[j]))
      if(ifD) print(re)
      rmList = rmList - 1
      if(ifD) print(rmList)
    }

  return(list(re, reList))
}

util.sql.groupby <-
function(data, groupCols, sep="-", varCol, type=c("max", "min", "sum", "mean"), na.rm=FALSE){

  m = match(c("max", "min", "sum", "mean"), type, 0)
  matchFun = which(m==1)
  
  
  if(length(groupCols)>1){
    key = util.matrix.cat(data, cols=groupCols, sep=sep)
  }else{
    key = data[,groupCols]
  }

  keyVal = unique(key)
  grpMax=NULL
  grpMin=NULL
  grpSum=NULL
  grpMean= NULL
  varVec=NULL
  for( i in keyVal){
    filter = key==i
    varVec = unlist(data[filter, varCol])
    for(j in matchFun){
      if(j==1) grpMax = c(grpMax, max(varVec, na.rm=na.rm))
      if(j==2) grpMin = c(grpMin, min(varVec, na.rm=na.rm))
      if(j==3) grpSum = c(grpSum, sum(varVec, na.rm=na.rm))
      if(j==4) grpMean = c(grpMean, mean(varVec, na.rm=na.rm))
    }

  }
  
  re = NULL
  re = cbind(re, key=keyVal) 
  for (j in matchFun){
    if(j==1) re = cbind(re, max=grpMax)
    if(j==2) re = cbind(re, min=grpMin)
    if(j==3) re = cbind(re, sum=grpSum)
    if(j==4) re = cbind(re, mean=grpMean)
  }

  return(re)
}

util.str.2CharArray <-
function(str, len=nchar(str)){

  startSeq = 1:len
  re = lapply(startSeq, FUN = function(inStr, startSeq){
    char = substr(inStr, startSeq, startSeq)
    char
  }, inStr = str)
  re = unlist(re)
  return(re)
}

util.str.replace <-
function(str, replaced, new, replace.all=FALSE){

  pos = util.char.1stStrIdx(str, find=replaced)
  if(pos==0){
    # if not found, return the original
    return(str)
  }

  if(  (pos+nchar(replaced))<=nchar(str)){
    str.left = substr(str, pos+nchar(replaced), nchar(str))
  
    if(replace.all){
      # iteratively replace the left-over part
      str.left = util.str.replace(str.left, replaced=replaced, new=new, replace.all=replace.all)
    }
    str.re = paste(substr(str, 1, pos-1), new, str.left, sep="")
  }else{
    str.re = paste(substr(str, 1, pos-1), new, sep="")
  }
  return(str.re)

}

util.str.rmSpacePadder <-
function(str, rm=c("b", "e")){

    charArray = util.str.2CharArray(str, len=nchar(str))
    blankPos = which(charArray!=" ")
    if(length(blankPos)==0) return("")
    if(is.element("b", rm)){
       if(is.element("e", rm)){
         re = substr(str, blankPos[1], blankPos[length(blankPos)])
       }else{
         re = substr(str, blankPos[1], nchar(str))
       }
    }else{
       if(is.element("e", rm)){
         re = substr(str, 1, blankPos[length(blankPos)])
       }else{
         re = str
       }
    }
    return(re)
}

util.str.seqCutter <-
function(str, delims){
   i = 1
   j = 1
   stopS = FALSE
   re = NULL
   len = length(delims)
   curStr = str
   lastIdx = 0
   while (i<=len & !stopS){
     idx.f = util.char.1stStrIdx(curStr, delims[i])
     if (idx.f==0){
       stopS = TRUE
       return(NA)
     }else{
       # find the current one
       #print(curStr)
       #print(idx.f)
       seg = substr(curStr, 1, idx.f-1)
       if(i==len) lastIdx = nchar(curStr)
       curStr = substr(curStr, idx.f+nchar(delims[i]), nchar(curStr))
       #print(curStr)
       re = c(re, seg)
     }
     i = i +1 
   }
   # if(idx.f+nchar(delims[i])<lastIdx) re = c(re, curStr) 
   if(idx.f + 2 < lastIdx) re = c(re, curStr)
   return(re)
 }

util.str.splitAndOrNot <-
function(str, split="and", check.space=FALSE){
  split.ex = util.str.splitEx(str, split)
  ## for example "and"
  if(length(split.ex)==1){
    ## check whether no "and" is found
    if (nchar(split.ex[1])==nchar(str)) {
      re = NULL
      return(re)
    }
  }
  ## remove space
  split.ex.rm= unlist(lapply(split.ex, util.str.rmSpacePadder, rm=c("b", "e")))
  items=util.array.rmEmptyStr(split.ex.rm)

  ## if check.space, return NULL if any of the item contains space in it
  if(check.space){
    found=FALSE
    i=1
    while(i<=length(items) & !found){
      tmp.idx = util.char.1stIdx(items[i], " ")
      if(tmp.idx>0) found=TRUE
      i = i + 1
    }
    if(found) return(NULL)
  }
  ## create the list for return
  re = list(op=split, items=util.array.rmEmptyStr(split.ex.rm))
  return(re)
 
}

util.str.splitEx <-
function(str, split, case.sen=TRUE){

  if(!case.sen){
    str = tolower(str)
    split = tolower(split)
  }
  split.re = strsplit(str, split=split)
  split.re=split.re[[1]]

  len.s = nchar(split.re)
  re = split.re[len.s!=0]
  return(re)
}

util.str.tokenPicker <-
function(str, delim, indexPicked){
  tokens = unlist(strsplit(str, delim))
  return(tokens[indexPicked])
}

util.tbl.exFactorInfo <-
function(tbl){
  rnum = dim(tbl)[1]
  cnum = dim(tbl)[2]

  headList = dimnames(tbl)[[2]]
  numList  = dimnames(tbl)[[1]]
  
  num = NULL
  head = NULL
  ct = NULL
  for ( i in 1:rnum){
    for (j in 1:cnum){
      cur = tbl[i,j]

      num = c(num, numList[i])
      head = c(head, headList[j])
      ct = c(ct, cur)
        
    }
  }
  re = data.frame( row.level=num, col.level=head, ct=as.numeric(ct))
  return(re)
}

util.vec.2maIdx <-
function(rowCt, idx){
  ## assuming putted in  column by column
  rowIdx = idx%%rowCt
  colIdx = (idx-rowIdx)/rowCt+1
  if(rowIdx==0) 
    rowIdx=rowCt
    
  return(c(rowIdx, colIdx))
}

util.vec.exByKey <-
function(vecKey, vec, keysOrder){
    filter =  match(keysOrder, vecKey)

    ex = vec[filter]

    return(ex)

}

util.vec.matchVec <-
function(vec, benchMark, vecLen, benchLen){

  dup = vecLen/benchLen

  bench = rep(benchMark, dup)

  matchset = vec == bench

  re = NULL
  for( i in 0:(dup-1)){
    pos = i*benchLen + 1
    re = c(re, as.logical(min(matchset[pos:(pos+benchLen-1)])))
  }

  return(re)
}

util.vec.matchVecIdx <-
function(vec, benchMark, vecLen, benchLen){

  aa = util.vec.matchVec(vec, benchMark, vecLen, benchLen)
 
  return(which(aa))
}

util.vec.orderPair <-
function(pair){
  front = min(pair)
  back = max(pair)
  return(c(front, back))
}

util.vec.replace <-
function(vec, orignal, replaceBy){
     vec.idx = match(vec, orignal)
     re = replaceBy[vec.idx]
     return(re)
}

wrComTbl <-
function(numericMa,       fileName=NULL, colNames=NULL, rowNames=NULL, digit = NULL){
     mRow = dim(numericMa)[1]
     mCol = dim(numericMa)[2]
     i = 0

     lineComment = ""
     
     comm = vector( )

            if(!is.null(colNames)){
              if(!is.null(rowNames))     lineComment = " \t"
              for( col in 1:mCol ){
                lineComment = paste(lineComment, colNames[col], sep="\t")  
              }
              i = i+1
              comm[i] = lineComment
            }     

     lineComment = ""
     
     for( row in 1:mRow ){
       i = i+1
          for( col in 1:mCol ){
          
                if(col == 1){
     
                  if(!is.null(rowNames)){
                        lineComment = paste(lineComment, rowNames[row], sep="\t")  
                      }
                  }
                  tmp = numericMa[row,col]
                  if(is.numeric(tmp) & (!is.null(digit))){
                    lineComment = paste(lineComment, round(tmp, digit), sep="\t") 
                  }else{
                    lineComment = paste(lineComment, tmp, sep="\t") 
                  }
     
            }  ## for( col in 1:mCol ){
          comm[i] = lineComment
          lineComment = ""
     }  ## for( row in 1:mRow ){

     if(!is.null(fileName)) write(comm, file = fileName,  append = FALSE)
     return(comm)
}

Try the trio package in your browser

Any scripts or data that you put into this service are public.

trio documentation built on Nov. 8, 2020, 7:41 p.m.