R/MultiPhen-internal.R

Defines functions .calcExpectedGeno .allFreq .calcOffset .findIn .readHeader .centralise .standardise .destandardise .cnt .cntNa .cntVec .convAvgToUnc .convAvgToUncM .convAvgToUncMat .convertFactorToMatrix .convertMatrixToFactor .estSigma .expand .expand2 .expandCoeff .expandDat .expDist .extractIds .extractIdsMat .extractIdsVec .extractSig .extractSig1 .extractSig1P .extractSigExact .extractSigP .pseudoInv .fillMissing .findInd .findmax .fisherPvalue .fixGenoCols .fixNonNumeric .getArray .getDim .getFamily .abfSig .abf .length .mabf .calcPPA .lrt .getGlm .glm1 .backwardSelection .getHist .getIMatrix .getin .getin1 .getin2 .getIncl .getMaf .getHWE .countIn .getOnes .getPvLrr .getPvLrrAllGen .getPvLrrMult .getPvLrrMultiGenVS .getPvLrrMultiGenNoVS .getPvLrrMultiGen .ordTestUnivariate .getPvRev .getPvLrrMultiGen1 .getPvRevPleio_ .getPvRevPleio .getPvRevPleioVS .getPvTable .getUNum1Matrix .hwestat .iscase .joinRes .hwep .hwepall .joinResVec .mafCount .makeFactor .makeNumeric .makeQuantile .makeThresh .makeTopTail .mergeAlleleCols .metaresFisher .metaresInvVarianceFixed .metaresHetMeasure .pFromSEBeta .seFromPBeta .getSuffix .fixDuplicatesInTodo .minOneMinus .mod .mod1 .expandGenoToDist .expandGenoToDistAll .reduceWeightsToGeno .applyTrans .applySidakCorrection .nyholdtSidak .readPhen .as.numeric1 .getLast .findInclusion .extractNames .trL .mergeFiles .rescale .getAvgWeights .makeGenoWeights .entropy .mPhen .splitImputed .getAvg .getAvgAll .nonNumeric .nonNumeric1 .nullDim .numLev .ordTest .ordTest1 .ordTest2 .chol .genRandCovar .reopenConnection .openOutputConnection .flatten .subSample .flattenBetaX .formatTable .writeTable .writeBed .summariseBootstrap .parseOpts .getLimitMatrixFromList .readLimitFile .updateSampleQC .getSumBetaX .sample1 .sampDirichlet .cholesky .pow .unchanged .cnvdistr .bafratio .bafratio .plotBaf .projOut .gs .sampJoint .qb .getPv .manhattanBed .manhattan .qqplot .qq .manh .heatm .fprint .plotFingerprint .makeColsForBed .getCNProfile .orthogonalise .readGenoConnection .closeGenoConnection .openGenoConnection .openVCFConnection .openZipConnection .openPlinkConnection .readLinesPlink .readPlinkConnection .readVCFConnection .readZip .getVCFSplit .sum1 .getVCFProcess .getVCFDimNames .readZipConnection .metaAnalysis .updateRelatedness .proc .proc1 .tabulateWithWeights

##helper function that reduces imputed matrix to expected genotypes
.calcExpectedGeno<-function(genoData){
  apply(genoData,c(1,2),.mod,as.numeric(dimnames(genoData)[[3]]))
}


.allFreq <-
function(y){
  vecm = c(2,1,0) 
  (vecm[1:length(y)] %*% y) / (2*(sum(y)))
}
.calcOffset <-
function(phenv,family,pheno_cov){
  offsets1 = rep(NA,dim(pheno_cov)[2])
  incl = phenv[2,]==1
  dimres = dim(pheno_cov)[2]
  if(dimres>0) 
    offsets1[incl] =  glm(phenv[1,incl]~pheno_cov[incl,],family=family)$lin else offsets1[incl] = 0
  offsets1
}

.findIn<-function(header,str, critical=FALSE, grep=TRUE){
 for(k in 1:length(str)){
  index =  which(header==str[k])
  if(length(index)>0) break
 }
 index = index[!is.na(index)]
 if(grep & length(index)==0){
   for(k in 1:length(str)){
    index = grep(str[k], header) 
    if(length(index)>0) break
   } 
 }
 index = index[!is.na(index)]
 if(critical & length(index)==0){
   print(head(header))  
 stop(paste('could not match',paste(str,collapse=","), 'in header'))
 }
 if(length(index)>1) index = index[1]
 index
}

###MODIFIED 29-11
.readHeader<-function(header,type){
   formatIndex = .findIn(header,c("FORMAT","imputed"))
   if(length(formatIndex)==0) formatIndex = .findIn(header,c("FORMAT","regionId"))

   pos_index = .findIn(header,c("POS","FirstProbe","start"), critical=TRUE)
   chr_index = .findIn(header,c("CHROM","Chr","chr"), critical=FALSE)
   if(length(chr_index)==0) chr_index = -1
   rs_index = .findIn(header,c("snpid","rsid","regionId","ID","id"), critical=TRUE)
   lastIndex = .findIn(header,"LastProbe")
   NbSnpIndex =.findIn(header,"NbSnp")
   if(length(lastIndex)==0) lastIndex = pos_index
   if(length(NbSnpIndex)==0) NbSnpIndex=-1
   if(length(formatIndex)==0){
       firstGenoIndex = rs_index+1
       if(type=='impute')  firstGenoIndex =1+ .findIn(header,c("ALT"))
   }else  firstGenoIndex = formatIndex+1
   list(formatIndex = formatIndex,pos_index = pos_index,chr_index = chr_index, rs_index = rs_index,lastIndex = lastIndex,
	NbSnpIndex = NbSnpIndex, firstGenoIndex=firstGenoIndex,
	posi = c(pos_index,chr_index,rs_index,lastIndex,NbSnpIndex))
}

.centralise <-
  function(vec,inds=1:length(vec), na.replace = NA){
    vec1 = (vec-mean(vec[inds],na.rm=TRUE))
    vec1[is.na(vec1)] = na.replace
    vec1
  }

#.centralise <-
#function(vec,na.replace = NA){
#  vec1 = (vec-mean(vec,na.rm=TRUE))
#  vec1[is.na(vec1)] = na.replace
#  vec1
#}

.standardise <-
function(vec, na.replace=NA){
  vec1 = (vec-mean(vec,na.rm=TRUE))/sd(vec,na.rm=TRUE)
  vec1[is.na(vec1)] = na.replace
  vec1
}

.destandardise <-
function(vec,mean,sd) vec*sd + mean

.cnt <-
function(str) {
  if(length(grep("N",str))>0) 
    NA 
  else 
    nchar(gsub("A","",str))
}
.cntNa<-function(vec) length(which(is.na(vec)))

.cntVec <-
function(str) apply(as.matrix(str,nrow = length(str)),1,.cnt)
.convAvgToUnc <-
function(vec,n,sc=1){
  res = rep(0,n)
  fl = floor(vec)
  if(fl==vec) 
    res[vec+1] = sc
  if(fl!=vec){
    ceil = fl+1
    val = round(sc*(ceil-vec))
    res[fl+1] = val
    res[ceil+1] = sc-val
  }
  res
}
.convAvgToUncM <-
function(vec,sc,maximum){
 matr = apply(as.matrix(vec),1,.convAvgToUnc,maximum)
 as.vector(t(apply(as.matrix(vec),1,.convAvgToUnc,maximum,sc)))
}
.convAvgToUncMat <-
function(mat,sc){
 maximum = max(mat,na.rm=TRUE)+1
 if(maximum%%1>0) maximum = floor(maximum)+1
 apply(mat,2,.convAvgToUncM,sc,maximum)
}
.convertFactorToMatrix <-
function(f,nme){
   res= apply(as.matrix(levels(as.factor(f))),1,.getOnes,f)
   dimnames(res) = list(names(f),paste(nme,levels(as.factor(f)),sep="_"))
   res
}
.convertMatrixToFactor <-
function(mat){
 nme = as.matrix(data.frame(strsplit(dimnames(mat)[[2]],"_")))
 res = rep("",dim(mat)[1])
 for(i in 1:length(res)) 
   res[i] = paste(nme[2,mat[i,]==1],collase="")
 res
}
.estSigma <-
function(x){
  d = ncol(x)
  v = matrix(ncol=d,nrow=d)
  for( i in 1:d ){
    ptrI = !is.na(x[,i])
    v[i,i] = var(x[ptrI,i])
    if( i < d ){
      for( j in (i+1):d ){
        ptrJ = !is.na(x[,j])
        ptr = as.logical(ptrI*ptrJ)
        v[i,j] = cov( x[ptr,i], x[ptr,j] )
        v[j,i] = v[i,j]
      }
    }
  }
  return(v)
}
.expand <-
function(matr, repl){
 res = matr
 for(i in 2:repl) res = rbind(res,matr)
 res
}
.expand2 <-
function(matr, repl){
  res = matr
  for(i in 2:repl)
    res = abind(res,matr,along = 3)
  res
}
.expandCoeff <-
function(coeff,ind){
  diff =  max(ind) - dim(coeff)[1]
  if(diff>0) coeff = rbind(coeff,matrix(NA, nrow = diff,ncol = dim(coeff)[2]))
  coeff
}
.expandDat <-
function(data,repl){
  if(repl==1) {return(data)}

  inds = sapply(data,.nullDim)
  data[inds==2] = lapply(data[inds==2],.expand,repl)
  data[inds==3] = lapply(data[inds==3],.expand2,repl)
  data
}
.expDist <-
function(y){
   p = (y%*%2:0)/(2*sum(y))
   c(p^2, 2*p*(1-p), (1-p)^2)*sum(y)
}

.extractIds <-
function(vec,nme) {paste(nme[vec],collapse=",")[[1]]}
.extractIdsMat <-
function(mat,spl,caseInd) apply(mat,1,.extractIdsVec,spl,caseInd,dimnames(mat)[[3]])
.extractIdsVec <-
function(vec,spl,caseInd,nme) apply(.getin2(as.numeric(vec),spl,caseInd),2,.extractIds,nme)
.extractSig <-
function(res,ind = 2, totweight = 1,log.p=FALSE){
  vec = summary(res)$coeff[ind,]
  ppvs = if(log.p) log(2) + pt(abs(vec[3]),1,lower.tail=FALSE,log.p=TRUE) else 2*pt(abs(vec[3]),1,lower.tail=FALSE,log.p=FALSE)
  if(length(vec)==3) vec = c(vec, ppvs) 

  c(vec[c(1,4)],  sum(res$weights)/totweight)
}
.extractSig1 <-
function(res,ind=2,totweight=1,hasCov=FALSE,family="binomial",log.p=FALSE,useBF=FALSE){

  if(sum(is.na(res)) == 2){
    res1 = res
  }
  else {
    beta = summary(res)$coeff[ind,1]
   if(useBF){
        pv =  .abfSig(res,log.p=log.p)
        print(pv)
        if(hasCov){
           pv2 = .abfSig(update(res, ~covar,family=family) ,log.p=log.p)
           pv = if(log) pv-pv2 else pv/pv2
	  ###SHOULD CHECK THIS
        }
     }else{
    ll1 = logLik(res)

    nullfam = family=="ordinal"
    if(hasCov & !nullfam) 
      ll2 = logLik(update(res, ~covar,family=family)) 
    else 
      if(!nullfam) 
        ll2 = logLik(update(res, ~1,family=family)) 
      else 
        if(hasCov) ll2 = logLik(update(res, ~covar)) 
        else 
          ll2 = logLik(update(res, ~1))
    df = attr(ll1,"df")[1]-attr(ll2,"df")[1]
    pv = pchisq((2*(ll1 - ll2))/(totweight),df,lower.tail=FALSE,log.p=log.p)
     }
    res1 = c(beta,pv)
  }
  res1
}
.extractSig1P <-
function(res,ind=2,totweight=1,hasCov=FALSE,family="binomial",log.p=FALSE, bf = FALSE){
  if(sum(is.na(res)) == 2){
    res1 = res
  }
  else {
    ll1 = logLik(res)
    coeff = summary(res)$coeff
    beta = coeff[ind,1]
    ve = abs(as.numeric(coeff[ind,3]))
    pvs = if(log.p) log(2) + pt(ve,1,lower.tail=FALSE,log.p=TRUE) else 2*pt(ve,1,lower.tail=FALSE,log.p=FALSE)
    nullfam = family=="ordinal"
    if(!nullfam){
     if(hasCov)       ll2 = logLik(update(res, ~covar,family=family)) 
     else   ll2 = logLik(update(res, ~1,family=family))  
    }  
    else{
	 if(hasCov)	        ll2 = logLik(update(res, ~covar)) 
         else       ll2 = logLik(update(res, ~1))
    }
    pv = pchisq((2*(ll1 - ll2))/(totweight),attr(ll1,"df")[1]-attr(ll2,"df")[1],lower.tail=FALSE,log.p=log.p)
    res1 = cbind(c(beta,NA),c(pvs,pv))
  }
  res1
}
.extractSigExact <-
function(t){
  exactMethod = "wald"
  or1 = fisher.test(t)
  or = oddsratio(t,method = exactMethod)
  c(log(or$measure[2,1]),or1$p.value, sum(t))
}
.extractSigP <-
function(res,ind,totweight = 1,hasCov=FALSE,family="binomial",log.p = FALSE){
  #nobs= sum(res$weights)/totweight
  coeff = .expandCoeff(summary(res)$coeff,ind)
  vec  = coeff[ind,,drop=FALSE]
  ll1 = logLik(res)
#  print(ind)
#  print(vec)
#  print(vec[ind,])
  ppvs = if(log.p) log(2) + pt(abs(vec[,3]),1,lower.tail=FALSE,log.p=TRUE) else 2*pt(abs(vec[,3]),1,lower.tail=FALSE,log.p=FALSE)
  if(dim(vec)[2]==3){
    vec = cbind(vec, ppvs)
  }else{
    vec[,4] = ppvs
  }
  if(hasCov) 
    ll2 = logLik(update(res, ~covar,family=family)) 
  else 
    ll2 = logLik(update(res, ~1,family=family))
  pv = pchisq((2*(ll1 - ll2))/(totweight),attr(ll1,"df")[1]-attr(ll2,"df")[1],lower.tail=FALSE,log.p=log.p)
  res1 = cbind(c(vec[,1],NA),c(vec[,4],pv))

   #cbind(res1,rep(nobs,dim(res1)[[1]]))
  res1
}

.pseudoInv<-function(P){
svd = svd(P)
U = svd$u
d = svd$d
V = svd$u
d1 = diag(1/svd$d)
return(V %*% (d1 %*% t(U)))
}


.fillMissing <-
function( x){
  ptr.ind = which( apply(is.na(x),1,sum)>0 )
  if(length(ptr.ind)==0) return (x)

  v = .estSigma(x)
  m = apply(x,2,mean,na.rm=TRUE)
  lambda = .pseudoInv(v)
  xx = x
  for( ii in 1:length(ptr.ind) ){
    i = ptr.ind[ii]
    ptr.miss = which(is.na(x[i,]))
    fill =  m[ptr.miss] - .pseudoInv(lambda[ptr.miss,ptr.miss]) %*% lambda[ptr.miss,-ptr.miss] %*% as.matrix(x[i,-ptr.miss] - m[-ptr.miss])
    xx[i,ptr.miss] = fill
  }
  return(xx)
 
}
.findInd <-
function(vec,header,ind){x = grep(vec[ind],header)[1]; x}
.findmax <-
function(vec) max(0,max(as.numeric(vec),na.rm=TRUE))
.fisherPvalue <-
function(p){ 
	if(getOption("mPhen.log10p",FALSE)) pchisq(-2*log(10)*sum(p),df = 2*length(p),lower.tail=FALSE,log.p=TRUE)/log(10)
	else  pchisq(-2*sum(log(p)),df = 2*length(p),lower.tail=FALSE)
}
.fixGenoCols <-
function(geno, gCols){
 geno1 = matrix(0,nrow=dim(geno)[1],ncol=dim(geno)[2])
 geno1[,gCols] = apply(geno[,gCols,drop=FALSE],2,.cntVec)
 geno1[,-gCols] = apply(geno[,-gCols,drop=FALSE],2,as.numeric)
 geno1
}
.fixNonNumeric <-
function(pheno){
 nonnumvec = .nonNumeric1(pheno)
 pheno1 = matrix(0,nrow=dim(pheno)[1],ncol=dim(pheno)[2])
 pheno1[,!nonnumvec] = apply(as.matrix(pheno[,!nonnumvec]),2,as.numeric)
 pheno1[,nonnumvec] = apply(as.matrix(pheno[,nonnumvec]),2,.makeNumeric)
 dimnames(pheno1) = dimnames(pheno)
 pheno = pheno1
}
.getArray <-
function(arraynames,v=NA) array(v,dim= .getDim(arraynames), dimnames = arraynames)
.getDim <-
function(arraynames) lapply(arraynames,length)
#c(length(arraynames[[1]]), length(arraynames[[2]]), length(arraynames[[3]]))

.getFamily <-
function(phenv,incl){
 if(length(levels(as.factor(phenv[incl])))>2) "gaussian" else "binomial" 
}

#.getGlmNoCovar <-
#function(phenvec,vars, fam1,weights,offset){
#
#}
#.getGlmRev <-
#function(vars,phenvec,covar,weights, family) .glm1(vars ~ phenvec + covar,family=family,weights=weights)

#.getGlmRevNoCov <-
#function( vars, phenvec, weights, family){
#  .glm1(vars ~ phenvec,family=family,weights=weights)
#}

.abfSig<-function(res, w = .21 * .21,log.p=TRUE){
  fit <- summary(res)$coefficients[2,]
  v <- fit[2]
  z <- fit[3]
  labf <- 0.5*(log(v) - log(w+v)) + (0.5*z*z*w/(v+w))
  if(log.p) return(labf) else return(exp(labf))
}

##log bayes factor
.abf <- function( y, x, w=.21*.21 ){
  .abfSig(glm( y ~ x ,family=gaussian()))
}

.length<-function(x){
 if(is.null(x) )return(0)
 if(is.array(x)) return(dim(x)[2]) else return(length(x))
}

#log bayes factor
.mabf <- function(y1,x,delta =1, tau = 1,log.p=TRUE){
  n = length(y1)
  y = matrix(.standardise(y1),ncol=1,nrow =n)
  p = dim(x)[2]
  if(p==0) return(sum(dnorm(y1,log=T)))
  x = apply(x,2,.standardise)
  k = p 
  numerator = lgamma((n+delta+k)/2)
  denominator1 = (n/2)*log(pi) +  ((n-k)/2)*log(tau) +  lgamma((delta+k)/2)
  Xg = x
  Mg = diag(rep(tau,k)) + t(Xg) %*% Xg
  qg = t(y) %*% y -  t(y) %*% Xg%*% solve(Mg) %*% t(Xg) %*% y
  denominator2 =  (1/2)*log(det(Mg)) +  ((n+delta+k)/2)*log(1+qg/tau) 
  labf = numerator - denominator1 - denominator2
  if(log.p) return(labf) else return(exp(log.p))
}

##logP1 comes from model with more variables (model 1)
## returns posterior probability of model 1 versus model 2
## pi is prior odds (of bigger model)
.calcPPA<-function(logP1,logP2,totweight,pi= 0.05, log.p=TRUE){

logBF = (logP1-logP2)/totweight
PO = exp(logBF + log((pi/(1-pi))))
pv = PO/(1+PO)
if(log.p) pv = log(pv)
pv
}

.lrt<-function(ll1,ll2, totweight){
 df1 =  attr(ll1,"df")[1]
 df2 = attr(ll2,"df")[1]
 if(df2>df1) stop(paste("df2 should be bigger than df1 ",df2,df1))
 pchisq((2*(ll1 - ll2))/(totweight),df1-df2,lower.tail=FALSE,log.p=TRUE)
}

.getGlm <-
function(phenvec,vars, fam1,weights,covar=NULL,offset=NULL){

  if(is.null(covar)) .glm1(phenvec~vars,weights = weights,offset = offset, family = fam1)
  else {
    .glm1(phenvec~vars+covar,family = fam1,weights = weights, offset = offset)
  }
}

.glm1<-function(string, family, weights, offset=NULL){
  resu = NULL
  if(family[1]=="ordinal"){
    if(!is.null(offset)) stop('error')

    resu =  polr(string,Hess=TRUE,method="logistic",weights = weights)

  }else{
    resu = glm(string,family=family, weights = weights,offset = offset)
  }
  if(family=="gaussian") attr(resu,"totweight") = 1
  else attr(resu,"totweight") = mean(weights)
  resu
}


.backwardSelection<-function(vars,phenvec,covar,weights, family, totweight, iterationsMax = 100,minVarLength =1){
hascov=!is.null(covar)
 logthresh = log(getOption("mPhen.defaultBackwardThresh",default=0.01))
 frac1 =getOption("mPhen.defaultBackwardFrac", default = 0.1)
 bf = getOption("mPhen.defaultUseBF",default = FALSE)

if(family=="gaussian") totweight=1
else totweight = mean(weights)
 res = .getGlm(vars,phenvec,family,weights, covar)
 res2 = .getGlm(vars,rep(1, dim(phenvec)[1]),family,weights, covar)
 ll1 = logLik(res)
 ll2 = logLik(res2)

 if(bf){
	abf1 = .mabf(vars,phenvec)  ## should add covar
 }
 pv = .lrt(ll1,ll2,totweight)


 noP = dim(phenvec)[2]
 torem = c()
 pvl = rep(1.0,noP)

 inds = 1:noP
 toremnew = c()

 endloop<-FALSE
 cnt =1

 while(!endloop ){
   frac = frac1[1]
   pvl = rep(0,length(inds))
  # pvlg = rep(0,length(inds))
  # abf = rep(0,length(inds))
  
   for(i in 1:length(inds)){
      if(bf){
	    abfi = .mabf(vars,phenvec[,-c(torem,inds[i]),drop=F])
           pvl[i] =.calcPPA(abfi,abf1,totweight,pi= 0.5)
      }
      else{
          
            
          if(length(inds[-i])==0) lli =ll2
          else{
          
             res2 = .getGlm(vars, phenvec[,-c(torem,inds[i])], family, weights, covar)
             lli = logLik(res2)
	  
          }
	  pvl[i] = .lrt(ll1,lli,totweight)
      }
   }
   ordp = rev(order(pvl))
   names(pvl) = dimnames(phenvec)[[2]][inds]
   len = length(which(pvl[ordp]>logthresh))
   step = max(1,floor(frac*length(pvl)))
   if(len==0) toremnew = c() 
   else toremnew = inds[ordp[1:min(step,len,length(ordp)-minVarLength)]]


  if(cnt <=iterationsMax 
	& length(toremnew)>0
        & length(c(torem,toremnew)) <= noP - minVarLength 
	##& length((1:noP)[-c(torem,toremnew)])>=minVarLength
       ){
          torem = c(torem,toremnew)
          
           res = .getGlm(vars,phenvec[,-torem,drop=F],family,weights, covar)
	   #res = .glm1(vars ~ covar + phenvec[,-torem,drop=F],family=family,weights=weights)

          ll1 = logLik(res)

          if(bf){
           abf1 = .mabf(vars,phenvec[,-torem,drop=F])
	  }

	  pv_new = .lrt(ll1,ll2,totweight)
 	        inds = (1:noP)[-torem]
		cnt = cnt+1
	        pv = pv_new
   }else{
	endloop=TRUE
   }
   
 }
 summ = summary(res)
if(length(torem)==0) torem = c(noP+1)
#without intercept

lastind = 1
if(hascov)  lastind = (lastind+dim(covar)[[2]])
 diff =length(summ$alias)- dim(summ$coeff)[1] 

 coeff = summ$coeff[-(1:lastind),,drop=F]
 alias = summ$alias[-(1:(lastind+diff))]
 pv = .lrt(ll1,ll2,totweight)

 indSig = which(pvl<=logthresh)

 ind = (1:noP)
 beta = rep(0,noP)
 pvs = rep(1,noP) 
if(is.null(alias)){
 beta[-torem][indSig] = coeff[,1][indSig]
}else{
 beta[-torem][which(!alias)][indSig] = coeff[,1][indSig]
}
 pvs[-torem][indSig] = exp(pvl[indSig])
 res1 = cbind(c(beta,NA),c(pvs,exp(pv)))
  res1
}

#.getGlmRevOrd <-
#function( vars, phenvec, covar,weights) {
# tryCatch(polr(vars ~ phenvec + covar,Hess=TRUE,method="logistic",weights = weights), error = function(e) print(NULL))
#}
#.getGlmRevOrdNoCov <-
#function( vars, phenvec, weights){
#  tryCatch(polr(vars ~ phenvec,Hess=TRUE,method="logistic",weights = weights), error = function(e) print(NULL))
#}
.getHist <-
function(vec, spl){ 
  le = vec < spl[1]
  gt = vec > spl[length(spl)]
  c(length(which(le)),hist(as.numeric(vec[!le & !gt]),br=spl,plot=FALSE)$count,length(which(gt)))
}
.getIMatrix <-
function(d,phi,cats,num1,x){
  II = matrix(ncol=d,nrow=d)
  for(j in 1:d){
    for(k in j:d){
      num = 0
      for( i in 1:cats ) num = num + (i-1)*(i-1)*x[,j]*x[,k]*exp(phi[i])
      II[j,k] = sum(num1[[j]]*num1[[k]])
      II[j,k] = II[j,k] - sum(num)
      II[k,j] = II[j,k]
    }
  }
  II
}
.getin <-
function(stend,vec) vec>=stend[1] & vec<stend[2]
.getin1 <-
function(genvec,spl) apply(cbind(spl[1:(length(spl)-1)],spl[2:length(spl)]),1,.getin, genvec)
.getin2 <-
function(genvec,spl,cc) apply(cbind(spl[1:(length(spl)-1)],spl[2:length(spl)]),1,.getin, genvec) & cc
.getIncl <-
function(phenv, na_cov){incl = !((is.na(phenv) | na_cov)); incl}
.getMaf <-
function(matr,base,noallele){
index = !is.na(matr[1,1,]) & matr[1,2,]>0 
vec = matr[1,1,index]
weights = matr[1,2,index]
res=  .minOneMinus(sum(weights*abs(vec -base))/(sum(weights)*noallele))
res
}
.getHWE<-function(matr,totweight){
 index = !is.na(matr[1,1,]) & matr[1,2,]>0 
 vec = matr[1,1,index]
 weights = matr[1,2,index]
 vec1 = .countIn(vec,weights,-2.5:2.5)/totweight
# vec1 = round(vec1) 
min(.hwep(vec1[3:5]),.hwep(vec1[1:3]))
}

.countIn<-function(vec,weights,br){
 
 res = rep(NA, length(br)-2)
 for(i in 1:(length(br-1))){
  min = br[i]
  max = br[i+1]
  ind = vec>=min & vec < max
   res[i] = sum(weights[ind])
 }
res
}
.getOnes <-function(vec,f) f==vec[1]

.getPvLrr <-
function(phend, gvar, family,inclu,covar,totweight){
  log.p = getOption("mPhen.log10p",FALSE)
  phenvec = phend[1,]
  vars = gvar[1,]
  weights = gvar[2,]
  offset = phend[4,]
  include = phend[2,] & inclu & gvar[3,]>0 & weights > 0 & sd(vars,na.rm=T)>0
  if(dim(covar)[2]==0) covar = NULL   else covar = covar[include,]

  if(length(which(include))>0 & var(phenvec[include],na.rm=TRUE)>0){
      glm.res = .getGlm(phenvec[include] ,as.numeric(vars[include]),family,weights[include],covar = covar, offset = offset[include])
    totweight1 = attr(glm.res,"totweight")
    res =  .extractSig1(glm.res,ind=2,totweight = totweight1,hasCov = !is.null(covar),family=family,log.p=log.p)
  }
  else res = rep(NA,2) 
  c(res,sum(weights[include])/totweight)
}

.getPvLrrAllGen <-
function(vars,phen,families,include,covar,totweight,functiAll,functi,jointModel){
 dimv = dimnames(vars)[[1]]
 resn = dimnames(phen)[[1]]
 if(jointModel) resn = c(resn,"all")
  x = .getArray(list(dimv,resn,c("beta","pvalue","Nobs")))
  for(i in 1:length(dimv)){
        ress = functiAll(vars[i,,],phen,families,include,covar,totweight[i],functi)
	  x[i,,] = ress
  }
  x
}
.getPvLrrMult <-
function(vars,phen,fams,include,covar,totweight, functi){
  phenN = dimnames(phen)[[1]]
  res = .getArray(list(phenN,c("beta","pvalue","Nobs")))
  for(k in 1:length(phenN)){   
    res[k,] = functi(phen[k,,],vars, fams[k],include,covar,totweight)
  }
  res
}


.getPvLrrMultiGenVS<-
function(gvar,phend, family,inclu,covar,totweight,functiAll,functi,jointModel){
 .getPvLrrMultiGen(gvar,phend, family,inclu,covar,totweight,functiAll,functi,jointModel, var_sel=TRUE)
}
.getPvLrrMultiGenNoVS<-
function(gvar,phend, family,inclu,covar,totweight,functiAll,functi,jointModel){
.getPvLrrMultiGen(gvar,phend, family,inclu,covar,totweight,functiAll,functi,jointModel,var_sel=FALSE)
}

.getPvLrrMultiGen <-
function(gvar,phend, family,inclu,covar,totweight,functiAll,functi,jointModel, var_sel = FALSE){
  phenN = dimnames(phend)[[1]]
  res = .getArray(list(c(dimnames(gvar)[[1]],"combined"),phenN,c("beta","pvalue","Nobs")))
 for(k in 1:length(phenN)){
  res[,k,] = .getPvLrrMultiGen1(phend[k,,],gvar,family[k],inclu,covar,mean(totweight),functiAll,functi, var_sel = var_sel)
  }
  res
}

.ordTestUnivariate <-
function(phend,gvar, family, inclu,covar,totweight){
 if(dim(covar)[2]>0) stop('ord test not coded up for covariates, use residuals')
  weights = as.numeric(gvar[2,])
  include = phend[2,]>0 & inclu & weights > 0 & gvar[3,]>0
  y = as.numeric(gvar[1,include])
  x = (as.matrix(phend[1,include] - phend[4,include]))
  #print(y)
  #print(head(x))
  resu  = .ordTest2(y,x)
  resu = c(resu[1,], dim(covar)[1])
  resu
}




.getPvRev <-
function(phend, gvar, family, inclu,covar,totweight){ 
 log.p = getOption("mPhen.log10p",FALSE)
  vars = gvar[1,]
  weights = as.numeric(gvar[2,])
  include = phend[2,]>0 & inclu & weights > 0 & gvar[3,]>0

  phenvec = phend[1,include] - phend[4,include]
  emptyCov = dim(covar)[2]==0
  if(emptyCov) covar = NULL else covar = covar[include,]
  var1 = vars[include]
  cont = (family=="gaussian")
  binom = (family=="binomial")
  ord = (family=="ordinal")
  if(ord){
     var1 = as.factor(var1)
   if(length(levels(var1))<=2){
    binom = TRUE
    family = "binomial"
    ord = FALSE
  }
  }
  todo = TRUE
  if(length(which(include))==0 | all(duplicated(var1)[-1L])) 
    todo=FALSE
  if(todo){


    glm.res = tryCatch(.getGlm(var1,phenvec,family, weights[include], covar = covar,offset = NULL),error = function(e) print(NULL))
      if(is.null(glm.res)){
	family = "gaussian"
        print("polr failed")
	glm.res = .getGlm(as.numeric(as.character(var1)),phenvec,family, weights[include],covar = covar, offset = NULL)
      }

       totweight1 = attr(glm.res,"totweight")
     c(.extractSig1(glm.res,ind=1, totweight = totweight1,hasCov=!emptyCov,family=family,log.p=log.p),sum(weights[include]/totweight))
 }
  else if(!todo) rep(NA,3) 
 }

.getPvLrrMultiGen1 <-
function(phend, gvar,family,inclu,covar,totweight,functiAll,functi, var_sel = FALSE){
 log.p = getOption("mPhen.log10p",FALSE)
  phenvec = phend[1,]
  vars = gvar[,1,]
  weights1 = gvar[1,2,] 
  nainds =  apply(as.matrix(gvar[,3,]),2,min)
  offset = phend[4,]
  include = phend[2,] & inclu & nainds >0 & weights1 > 0
  # & nainds # & weights > 0
  emptyCov = dim(covar)[2]==0
  if(emptyCov) covar = NULL else covar = covar[include,,drop=F]
  ind = 1:(dim(vars)[1]) + 1
  tobind = sum(weights1[include])/totweight   #,length(families)+1)
  v = var(phenvec[include])
  if(length(which(include))>0  & !is.na(v) & v>0){
    if(var_sel){
      res = .backwardSelection(phenvec[include]-offset[include],t(vars[,include]),covar=covar,weights=weights1[include], family = family, totweight=totweight)
      res = cbind(res,tobind)
     return(res)
    }
    glm.res = .getGlm(phenvec[include],t(vars[,include]),family,weights1[include],covar = covar, offset = offset[include]) 
     totweight1 = attr(glm.res,"totweight")
    cbind(.extractSigP(glm.res,ind=ind,totweight=totweight1,family=family,hasCov=!emptyCov,log.p=log.p),sum(weights1[include])/totweight)
  }
  else rep(NA,3)
}



###uses the first family
.getPvRevPleio_ <-
function(gvar,phen,families,inclu,covar,totweight,functi, variable_selection){
 #print(families)
  log.p = getOption("mPhen.log10p",FALSE)
  phenvec = phen[,1,]-phen[,4,]
  family = families[1]
   vars = gvar[1,]
  weights = gvar[2,]
  nainds  =   gvar[3,] 
  phenincl = apply(as.matrix(phen[,2,]),2,min)
  include =  phenincl>0 & inclu & weights>0 & nainds >0
  var1 = vars[include]
  cont = (family=="gaussian")
  binom = (family=="binomial")
  ord = (family=="ordinal")
  if(ord){
     var1 = as.factor(var1)
   if(length(levels(var1))<=2){
    binom = TRUE
    family = "binomial"
    ord = FALSE
  }
  }
  emptyCov = dim(covar)[2]==0
  if(emptyCov)covar = NULL else covar = covar[include,,drop=F]
  ind = 1:(dim(phenvec)[1])
  if(!ord) ind = ind+1
  todo = TRUE
  tobind = rep(sum(weights[include])/totweight,length(families)+1)
  if(length(which(include))==0 | all(duplicated(var1)[-1L])) 
    todo=FALSE
  if(todo){
    if(variable_selection){
      res = .backwardSelection(var1,t(phenvec[,include]),covar=covar,weights=weights[include], family = family, totweight=totweight)
      res = cbind(res,tobind)
      return(res)
    }else{
      glm.res = NULL
      glm.res = tryCatch(.getGlm(var1,t(phenvec[,include]),family, weights[include],covar = covar, offset = NULL),error = function(e) print(NULL))
      if(is.null(glm.res)){
	family = "gaussian"
        print("polr failed, using Gaussian")
	glm.res = .getGlm(as.numeric(as.character(var1)),t(phenvec[,include]),family, weights[include],covar = covar, offset = NULL)
      }
      totweight1 = attr(glm.res,"totweight")
      resu= .extractSig1P(glm.res, ind = ind, totweight = totweight1, hasCov = !emptyCov, family = family,log.p = log.p)
      return(cbind(resu,tobind))
    }
  }  
  if(!todo) 
    return(rep(NA,3) )
}


.getPvRevPleio<-function(gvar,phen,families,inclu,covar,totweight,functi) .getPvRevPleio_(gvar,phen,families,inclu,covar,totweight,functi, FALSE)
.getPvRevPleioVS<-function(gvar,phen,families,inclu,covar,totweight,functi) .getPvRevPleio_(gvar,phen,families,inclu,covar,totweight,functi, TRUE)


.getPvTable <-
function(phend, gvar, family,inclu,covar,totweight){
  phenvec = phend[3,]
  vars = as.factor(gvar[1,])
  weights = gvar[2,]
  offset = phend[4,]
  include = phend[2,] & inclu & weights > 0 & gvar[3,]>0
  emptyCov = dim(covar)[2]==0
  todo = TRUE
  if(length(which(include))==0 | var(vars[include],na.rm=TRUE)==0 | family!="binomial") 
    todo=FALSE
  if(!todo) 
    rep(NA,3) 
  else 
    .extractSigExact(.tabulateWithWeights(phenvec[include],vars[include],weights[include],totweight))
}
.getUNum1Matrix <-
function(d,y,x,phi,num1,cats){
  U = matrix(ncol=1,nrow=d)
  num1 = list(length=d)
  for( j in 1:d ){
    U[j] = sum(y*x[,j])    
    num1[[j]]=0 
    for( i in 1:cats ) 
      num1[[j]] = num1[[j]] + x[,j] * (i-1) * exp(phi[i]) 
    U[j,1] = U[j,1] - sum(num1[[j]])
  }
  list(U,num1)
}
.hwestat <-
function(obs,exp) sum((obs-exp)^2/exp)
.iscase <-
function(vec) !is.na(vec) & vec>mean(vec,na.rm=TRUE)
.joinRes <-
function(mat,ncol,form){
 if(is.null(dim(mat))) 
   mat = matrix(mat,ncol=ncol)
 res1 =  apply(mat,2,.joinResVec,form)
 res1
}

.hwep<-function(vec){
 if(max(vec)==0){
   p = 1.0
 }
 else if(min(vec)<5){
   p = HWExact(vec)$pv
 }
 else{
 p=pchisq(.hwestat(vec, .expDist(vec)),1,lower.tail=F)
}
p
}
.hwepall<-function(y){
h=hist(y,plot=F,br=-2.5:2.5)
min(.hwep(h$counts[3:5]),.hwep(h$counts[1:3]))
}



.joinResVec <-
function(vec,form) paste(sprintf(form,vec),collapse=";")
.mafCount <-
function(y) max(.minOneMinus(.allFreq(y[1:min(3,length(y))])), .minOneMinus(.allFreq(y[3:min(5,length(y))])),na.rm=TRUE)
.makeFactor <-
function(mat1,nme){
  #if(dim(mat1)[2]>0){
  # for(k in 1:(dim(mat1)[2])){
  #   if(length(levels(as.factor(mat1[,k])))>3){
  #       mat1[,k] = .makeTopTail(mat1[,k],c(0.33,0.66))
  #   }
  # }
  #}
  append = dim(mat1)[2]>0
  fact = factor(apply(mat1,1,paste,collapse=".")) 
  lev = levels(fact) 

  matrix = .convertFactorToMatrix(fact,paste(nme,sep="."))

  nmes = dimnames(matrix)[[2]]
  txt = dimnames(matrix)[[2]]
  index = setdiff(seq(length(txt)),grep("_NA",txt))
  res = matrix[,index,drop=FALSE]
  dimnames(res) = list(NULL,nmes[index])
  if(append)  
    res = cbind(res, rep(TRUE,dim(res)[1]))
  if(append)   
    dimnames(res) = list(NULL,c(nmes[index],"all")) 
  res 
}
.makeNumeric <-
function(vec) as.numeric(as.factor(vec))



.makeQuantile <-
function(vec)  qqnorm(vec,plot=FALSE)$x

.makeThresh <-
function(vec,thresh){
 vec1 = rep(NA,length(vec))
 vec1[vec<=thresh[1]] = 0
 vec1[vec>=thresh[2]] = 1
 vec1
}
.makeTopTail <-
function(vec,perc1){
 perc = as.numeric(perc1)/100
 vec1 = rep(NA,length(vec))
 quants = ecdf(sort(vec))(vec)
 vec1[quants<=perc[1]] = 0
 vec1[quants>=perc[2]] = 1
 vec1
}

 ##HAS BEEN MODIFIED 29-11


.mergeAlleleCols <-
function(snp,cols){
  len = 1+ dim(snp)[2] - length(cols) 
  snp1 = matrix(0,nrow = dim(snp)[1],ncol =len) 
  snp1[,1] = .cntVec(apply(snp[,cols],1,paste,collapse=""))
  matr = snp[,-cols,drop=FALSE]
  nme1 = rep("",len)
  nme1[1] = "geno"
  if(len>1) 
    nme1[2:len] = dimnames(snp)[[2]][-cols]
  if(len>1) 
    for(i in 2:len) 
    snp1[,i] = matr[,i-1]
  dimnames(snp1) = list(dimnames(snp)[[1]],nme1)
  snp1
}
.metaresFisher <-
function(matr){
  inds = !is.na(matr[,2] )
  fisherp = .fisherPvalue(matr[inds,2])
  return(c(mean(matr[inds,1]),fisherp,sum(matr[,3])))
}
.metaresInvVarianceFixed <-
function(matr){
  inds = which(!is.na(matr[,2]))
  teSe = apply(matr[inds,,drop=FALSE],1,.seFromPBeta)
  if(length(inds)<getOption("mPhen.metaMinCohortNum",2)) return(c(NA,NA,NA))
  else if(length(inds)==1) return(matr[inds,])
  te = matr[inds,1]
  if(getOption("mPhen.useFisherIfAllBetaNA",FALSE) & .cntNa(te)==length(te)){
    return(.metaresFisher(matr))
  }
  metres = metagen(te,teSe)
  pv = .pFromSEBeta(c(metres$TE.fixed,metres$seTE.fixed))
  ##c(metres$Q,pchisq(metres$Q,1,lower.tail=FALSE)) 
  c(metres$TE.fixed,pv,sum(matr[,3]))
}

.metaresHetMeasure<-function(matr){

  inds = which(!is.na(matr[,2]))
  if(length(inds)<getOption("mPhen.metaMinCohortNum",2)) return(c(NA,NA,NA))
  teSe = apply(matr[inds,,drop=FALSE],1,.seFromPBeta)
  te = matr[inds,1,drop=FALSE]
  metres = metagen(te,teSe)
  pv = .pFromSEBeta(c(metres$TE.fixed,metres$seTE.fixed))
  c(metres$Q,pchisq(metres$Q,1,lower.tail=FALSE),sum(matr[,3]))
}
.pFromSEBeta <-function(v){
  if(getOption("mPhen.log10p",FALSE)){
       x =  (pnorm(abs(v[1]),sd = v[2],lower.tail=FALSE,log.p=TRUE)+log(2))/log(10);
   }else{
	x =  pnorm(abs(v[1]),sd = v[2],lower.tail=FALSE)*2;
   }
   x
}
.seFromPBeta <-function(v){
  if(getOption("mPhen.log10p",FALSE)){
    x = abs(v[1])/qnorm((v[2]*log(10))/2,lower.tail=FALSE,log.p=TRUE); 
  }else{
   x = abs(v[1])/qnorm(v[2]/2,lower.tail=FALSE); 
  }
x
}


.getSuffix<-function(todo,sub=""){
  todo_ = todo[grep(sub,todo[,1]),,drop=F]
    l=levels(as.factor(todo_[,1]))
  if(length(l)>1){

    res = .getSuffix(todo_,sub=l[1])
    for(k in 2:length(l)){
      res = paste(res,"_",.getSuffix(todo_,sub=l[k]),sep="")
    }
    return(res)
  }else{
   l2=levels(as.factor(todo_[,2]))
   if(length(l2)>2){
     return(paste(sub,"_",length(l2),sep=""))
   }else{
     return(paste(sub,"_",paste(l2,collapse="."),sep=""))
}

}
}

.fixDuplicatesInTodo<-function(todo1_){
ph_i = grep("pheno",todo1_[,1])
todo_ = rbind(todo1_[-ph_i,,drop=F],todo1_[ph_i,,drop=F])
type = todo_[,2]
a = levels(as.factor(type))
toexcl = c()
for(i in 1:length(a)){
  match = which(type==a[i])
  if(length(match)>1){
    toexcl = c(toexcl,match[2:(length(match))])
  }
}
if(length(toexcl)>0){
return(todo_[-toexcl,,drop=F])
}else{
return(todo_)
}
}
.minOneMinus <-
function(x) min(x,1-x)
.mod <-
function(vec,vals){
  #vec[vec==""] = 0; 
  #vec[vec=="-"] = 1000 - sum(as.numeric(vec[vec!="-"]),na.rm=TRUE); 
  (as.numeric(vec)%*% vals)/sum(vec)
}
.mod1 <-
function(vec, rescale){
  #vec[vec==""] = 0
  #vec[vec=="-"] = 1000 - sum(as.numeric(vec[vec!="-"]),na.rm=TRUE);
  #vec[vec<200]  =0
  vec1 = floor(rescale * as.numeric(vec))#floor(rescale*(as.numeric(vec)/1000))
  ind = vec1==max(vec1)
  vec1[ind] = vec1[ind]+ rescale - sum(vec1)
  vec1
}

#this function expands a genotype into a distribution over genotypes (just used for testing)
.expandGenoToDist<-function(geno,vals,scale,perc){
	res=rep(perc*scale,length(vals));
       
	res[which(vals==geno)] =res[which(vals==geno)]+scale*(1-perc)
	res
}
.expandGenoToDistAll<-function(geno,spl,scale, perc=0.0){
vals = floor(min(spl)):(floor(max(spl))+1)
vals[[1]]
res = apply(geno,c(1,2),.expandGenoToDist,vals,scale,perc)
res1=aperm(res,c(2,3,1))
dimnames(res1)[[3]] = vals
res1
}

.reduceWeightsToGeno<-function(geno){
geno=data$geno
inds=which(geno[1,2,]>0)
geno1 = geno[1,,inds]
t(geno1[1,,drop=F])
}

.applyTrans<-function(phenoData,limit){
pheno1 = phenoData[,unique(match(limit[,2],dimnames(phenoData)[[2]])),drop=F]


  if(dim(limit)[2]>2)phenoTrans = limit[,3] else phenoTrans = NULL
if(!is.null(phenoTrans)){
    qqfam =  grep("^quantile",phenoTrans)
    if(length(qqfam)>0) pheno1[,qqfam] = apply(pheno1[,qqfam,drop=FALSE],2,.makeQuantile)
    stdfam =  grep("^standard",phenoTrans)
    if(length(stdfam)>0) pheno1[,stdfam] = apply(pheno1[,stdfam,drop=FALSE],2,.standardise)

    toptail = grep("^toptail",phenoTrans)
    if(length(toptail)>0) pheno1[,toptail] = apply(pheno1[,toptail,drop=FALSE],2,.makeTopTail,strsplit(phenoTrans[toptail[1]],"_")[[1]][2])
    thresh = grep("^thresh",phenoTrans)
    if(length(thresh)>0) pheno1[,thresh] = apply(pheno1[,thresh,drop=FALSE],2,.makeThresh,strsplit(phenoTrans[thresh[1]],"_")[[1]][1:2])
    #dimnames(pheno1)[[2]] = dimnames(phenoData)[[2]]
    factor = grep("^factor",phenoTrans)[1]
    while(length(factor)>0 & !is.na(factor[1])){
       k= factor[1]
       newM =  .makeFactor(as.matrix(pheno1[,k]),limit[k,2])
       newM = newM[,-dim(newM)[[2]]]
       pheno1 = cbind(pheno1[,-k,drop=F],newM)
       newLim = cbind(limit[k,1],dimnames(newM)[[2]],"")
        limit = rbind(limit[-k,,drop=F],newLim)
            phenoTrans = limit[,3]
       factor = grep("^factor",phenoTrans)[1]
   }
  }
#    if(!is.null(phenoTrans)) phenN = apply(rbind(phenN,phenoTrans),2,paste,collapse=".")
list(pheno1=pheno1,limit=limit)
}


.applySidakCorrection<-function(pvs,effPhe){
  pv = min(pvs,na.rm=T)
if(getOption("mPhen.log10p",FALSE)){
   if(pv<1e-15) res = pv+log10(effPhe)
   else res = 1-(1-(10^pv))^effPhe
}else{
  if(pv<1e-15) res = pv*effPhe
  else res = 1-(1-pv)^effPhe
}
res
}
.nyholdtSidak<-function(pheno){
M = dim(pheno)[[2]]
if(M==1){
  return(1)
}else{
cor = cor(pheno,use="pairwise.complete.obs")
excl_ind = (apply(apply(cor,c(1,2),is.na),1,max)>0)
v =var(eigen(cor[!excl_ind,!excl_ind])$val)
return((M-1)*(1-v/M)+1)
}
}

.readPhen<-function(phenFile,sep="\t",numHeaderRows = 1){
  if(length(grep("zip",phenFile))>0) phen3 = .readZip(phenFile)
  else{
    phen2 = read.table(phenFile,header=numHeaderRows>0,as.is=T,sep=sep,comment.char="")
    inds = match(unique(phen2[,1]),phen2[,1])
     phen3 = phen2[inds,-1,drop=F]
     rown =     phen2[inds,1] 
    if(numHeaderRows==0){
	coln = paste("phen",1:(dim(phen2)[2]-1),sep="")
       dimnames(phen3) = list(phen2[inds,1],coln)
    }else{
      dimnames(phen3)[[1]] = rown
    }
  }
  if(numHeaderRows>1){
    toincl = (numHeaderRows:(dim(phen3)[1]))
     phen3 = phen3[toincl,,drop=F]
  }
  p4 = as.matrix(apply(phen3,2,.as.numeric1))
  dimnames(p4) = dimnames(phen3)

  return(p4)
}
.as.numeric1<-function(v){
 f = as.factor(v)
 le = levels(f)
 if(length(le)<=3){
     return(as.numeric(f))
 }else{
    return(as.numeric(v))
 }
}
.getLast<-function(vec) vec[length(vec)]

.findInclusion<-function(sampleids, match, any=FALSE){
  if(any){
   rn = c()
   for(k in 1:length(sampleids)){
      rn = c(rn,sampleids[[k]])
   } 
   rn = unique(rn)
   pos = as.numeric(unlist(lapply(strsplit(rn,"_"),.getLast)))
   ord = order(pos)
   rn = rn[ord]
   }else  rn = sampleids[[1]]
  inclrow = list()
  for(i in 1:length(sampleids)){
        if(is.null(match)) inclrow[[i]] = match(rn,sampleids[[i]])
        else if(match) inclrow[[i]] = which(!is.na(match(rn,sampleids[[i]])))
        else{
            if(i==1) inclrow[[i]] = which(!is.na(match(rn,sampleids[[i]])))
            else inclrow[[i]] = which(is.na(match(sampleids[[i]],rn)))
        }
  }
  attr(inclrow,"rn")<-rn
  inclrow
}

.extractNames<-function(phenos,ind){
  sampleids = list()
  for(k in 1:length(phenos)){
    sampleids[[k]] = dimnames(phenos[[k]])[[ind]]
  }
  sampleids
}

.trL<-function(phenos){
res = list()
for(k in 1:length(phenos)){
  res[[k]] = t(phenos[[k]])
  #dimnames(res[[k]]) = rev(dimnames(phenos[[k]]))
}
res
}

###MOD 29-11
.mergeFiles<-function(phenos, 
      markerCol=FALSE,
      rownames = .extractNames(phenos,1),
      colnames = .extractNames(phenos,2),
      inclrow = NULL,
      ind2 = NULL, anyCol=FALSE	      
){
  if(length(phenos)==1) return(phenos[[1]])
  if(is.null(inclrow))       inclrow = .findInclusion(rownames,match=FALSE)
  if(is.null(ind2)) ind2 = .findInclusion(colnames,match=NULL, any=anyCol)
  rn =  attr(ind2,"rn")
  phen1 = array(dim = c(0,length(rn)))
  Cohort_ = c()
  indiv = list()
  for(i in 1:length(phenos)){
     phen2 = phenos[[i]][inclrow[[i]],ind2[[i]],drop=F]
    phen1 = rbind(phen1,phen2)
    indiv[[i]] = rownames[[i]][inclrow[[i]]]
    Cohort_ = c(Cohort_,rep(i,dim(phen2)[[1]]))
 }
 dimnames(phen1)[[2]] =rn
 if(markerCol & length(phenos)>1) phen1 = cbind(phen1,Cohort_)

 if(length(indiv)==1) indiv = indiv[[1]]
 attr(phen1,"indiv")<-indiv
 phen1
}




.rescale<-function(genoData,.rescaleV, integer = FALSE,min = 0.01){
dim= dim(genoData)
for(i in 1:dim[[1]]){
  for(j in 1:dim[[2]]){
    vec = genoData[i,j,]
    vec[vec<min] = 0.0
    vec  = vec * (.rescaleV/sum(vec))
    if(integer){ 
      vec = round(vec)
      maxind = which(vec==max(vec))
      vec[maxind] = vec[maxind] + (.rescaleV - sum(vec))
    }
        genoData[i,j,]=vec
  }
}
genoData
}

.getAvgWeights<- function(x1){
mean((x1[2,x1[3,]>0]))
}


.makeGenoWeights<-function(genoData,rescale = 1, ordinal=FALSE){
   dimg = dim(genoData)
   geno_header = dimnames(genoData)[[2]]
   indiv = dimnames(genoData)[[1]]  
  if(length(dimg)==2){
      genoweights = .getArray(list(geno_header,c("geno","weights","include"),indiv))
      genoweights[,1,] = t(genoData)
      genoweights[,2,] =  t( matrix(1,nrow = length(indiv),ncol = length(geno_header)))
      genoweights[,3,] = !apply(as.matrix(genoweights[,1,]),c(1,2),is.na)
         resc = rep(1,dim(genoData)[2])
  }else{

    genoData = .rescale(genoData,rescale,integer = (rescale>1 | ordinal),min = 0)
    valsg = as.numeric(dimnames(genoData)[[3]])
    len = length(indiv)
    genoweights = .getArray(list(geno_header,c("geno","weights","include"),rep(indiv,length(valsg))))
    for(i in 1:(length(valsg))){
	 genoweights[,1,((i-1)*len +1):(i*len)] = rep(valsg[i],length(indiv))
	 genoweights[,2,((i-1)*len +1):(i*len)] = t(genoData[,,i])
         genoweights[,3,((i-1)*len +1):(i*len)] = t(genoData[,,i]>0)
    }
    # resc = apply(genoweights,1,.getAvgWeights)
     ###else{
          resc = rep(rescale,dim(genoData)[2])
###      }
     #apply(apply(genoData,c(1,2),.entropy),2,sum)/dim(genoData)[1]

     
  }

  attr(genoweights,"dimg")<-dim(genoData)
  attr(genoweights,"rescale")<-resc
#  attr(genoweights,"levels")<-apply(genoweights,1,.getLevels)
#  print(resc)
   genoweights
}

.entropy<-function(v){
v = v/sum(v)
e = 0
for(k in 1:length(v)){
 if(v[k]>0)  e = e+-v[k]*log(v[k])

}
1+e
}

#assumes as baseLevel of 0, so should subtract 2 for CN
#note order of genoData is preserved
.mPhen <-
function(genoweights, pheno_, opts, subinds = (1:dim(genoweights)[3])){
  multiGen = FALSE
  rescale = attr(genoweights,"rescale")[subinds]
  JointModel=opts$mPhen.JointModel
  inverseRegress = opts$mPhen.inverseRegress
  
  if(JointModel){
    if(!inverseRegress) multiGen = TRUE
    else if(length(pheno_$phenN)==1) 	JointModel=FALSE
  }
  if(multiGen) JointModel=FALSE

  geno_header = dimnames(genoweights)[[1]]
  exactTest = !is.null(opts$mPhen.exactMethod)
#  exactMethod = "wald"
  multiPhen = JointModel
  singleOutput=TRUE
  zipOutput=FALSE
  step = 1
  if(length(pheno_$index)==1) multiPhen = FALSE
  if(length(pheno_$index_strat)>0 | length(pheno_$index_cov) > 0) exactTest = FALSE
  if(exactTest){
    multiPhen = FALSE
    multiGen = FALSE
  }
  funct = .getPvLrr
  pvFunct = .getPvLrrMult
  pvFunctMult = .getPvLrrAllGen


  if(inverseRegress) funct = .getPvRev
  if(multiPhen) {
   if(opts$mPhen.variable.selection) pvFunct = .getPvRevPleioVS
   else  pvFunct = .getPvRevPleio
  }
  if(opts$mPhen.scoreTest) {
      if(multiPhen) pvFunct = .ordTest1 
      else funct = .ordTestUnivariate
  }
  if(multiGen){
     if(opts$mPhen.variable.selection) pvFunctMult = .getPvLrrMultiGenVS
     else  pvFunctMult = .getPvLrrMultiGenNoVS
  }
  if(exactTest) funct = .getPvTable
  if(multiGen & dim(genoweights)[1]<=1) stop('need at least two genotypes for MultiGen model')
  geno2 = genoweights[,,subinds,drop=F]
  maxg = apply(geno2[,1,,drop=F],1,max,na.rm=TRUE)
  ming = apply(geno2[,1,,drop=F],1,min,na.rm=TRUE)
  nonint = maxg%%1 > 0
  nonint1 = ming%%1 > 0
  maxg[nonint] = floor(maxg[nonint])+1
  ming[nonint1] = floor(ming[nonint1])
  mini = min(max(maxg,na.rm=T),min(ming,na.rm=T)+1,na.rm=T)
  maxi = (max(maxg,na.rm=T))
  if(mini==-Inf) mini=0
  if(maxi==-Inf) maxi=3
  spl = seq(mini,maxi,step) - 0.5

  if(length(geno_header)==1) multiGen = FALSE
  fams <- pheno_$families
  if(inverseRegress & !is.null(opts$mPhen.link.geno)) fams<-rep(opts$mPhen.link.geno, length(fams))

  stratificNames = dimnames(pheno_$stratMatrix1)[[2]]
  if(multiGen) geno_header=c(geno_header,"comb") 
  if(length(stratificNames)==1) stratificNames = c("all")
  else stratificNames = c(stratificNames,"comb")
  phenN = pheno_$phenN
  if(multiPhen) phenN = c(phenN,"JointModel")

  res = .getArray(list(stratificNames,  geno_header,phenN,c("beta","pvalue","Nobs")))
  
 
  maf=NULL; 
   maf = rep(NA,dim(geno2)[1])
  for(k in 1:length(maf)){
     maf[k] = .getMaf(geno2[k,,,drop=F],0,2)
  }
hwe=NULL;
  if(opts$mPhen.calcHWE){
  hwe = rep(NA,dim(geno2)[1])
for(k in 1:length(hwe)){
     hwe[k] = .getHWE(geno2[k,,,drop=F],rescale[k])
   }
}
  strat = pheno_$stratMatrix1[subinds,,drop=F]
  for(j in 1:(dim(strat)[2])){
    inclu = strat[,j]
    res[j,,,] =  pvFunctMult(geno2,pheno_$phendata[,,subinds,drop=F],fams,inclu,pheno_$pheno_cov[subinds,,drop=F],rescale,pvFunct,funct,JointModel)
    if(getOption("mPhen.log10p",FALSE)){
    ## convert from loge to log10
      res[,,,2] = res[,,,2]/log(10)
     }
 }
  if(length(pheno_$index_strat)>0){ 
    res[dim(strat)[2]+1,,,] =apply(apply(res[1:(dim(strat)[2]-1),,,,drop=FALSE],c(2,3),.metaresInvVarianceFixed),c(3,1),t)
  }
 
  #pheno = pheno_$phendata
  #countsCase = array(NA,dim=dimcounts)
  #countsControl = array(NA,dim=dimcounts)
  #hwe_control = array(NA,dim=dimcounts[1:3])
  #hwe_case = array(NA,dim=dimcounts[1:3])
  #maf_control = array(NA,dim=dimcounts[1:3])
  #maf_case = array(NA,dim=dimcounts[1:3])
  #for(j in 1:(dimcounts[1])){
  #  stra = strat[,j]
  #  for(k in 1:length(families)){
  #     # print(paste(j,k))
  #    inclu = pheno[k,2,] & stra
  #    incluCont = inclu &  pheno[k,3,]
  #    incluCase = inclu &  pheno[k,3,]
  #    if(length(which(incluCont))>0)  countsControl[j,k,,] = t(apply(geno2[,1,incluCont,drop=FALSE],1,.getHist,spl))
  #    if(length(which(incluCont))>0)  countsCase[j,k,,] = t(apply(geno2[,1,incluCase,drop=FALSE],1,.getHist,spl))
  #    hwe_control[j,k,] =  apply(countsControl[j,k,,,drop=FALSE],3,.hwepall)
  #    hwe_case[j,k,] =  apply(countsCase[j,k,,,drop=FALSE],3,.hwepall)
  #    maf_control[j,k,] =  apply(countsControl[j,k,,,drop=FALSE],3,.mafCount)
  #    maf_case[j,k,] =  apply(countsCase[j,k,,,drop=FALSE],3,.mafCount)
  #  }
  #}
    list(Results=res,maf=maf,hwe=hwe)
}


##splits a string into component values
.splitImputed<-function(v1,tot=1000){
v = as.numeric(v1)   ####2014-1-1
ind = which(is.na(v))
v[ind] = tot -sum(v,na.rm=T)
return(v)
}



.getAvg<-function(line,vec) (vec%*%line)/sum(line)
.getAvgAll<-function(gen) apply(gen,c(1,2),.getAvg,as.numeric(dimnames(gen)[[3]]))


.nonNumeric <-
function(vec) sum(is.na(as.numeric(vec)))==length(vec)
.nonNumeric1 <-
function(mat) apply(mat,2,.nonNumeric)
.nullDim <-
function(matr){x = length(dim(matr)); x}
.numLev <-
function(vec) length(levels(as.factor(vec)))
.ordTest <-
function(gvar,phend,families, inclu,covar,totweight,functi){
  include =  apply(as.matrix(phend[,2,]),2,min) & inclu & as.numeric(gvar[2,])>0 &  gvar[3,]
  y = as.numeric(gvar[1,include])
  x = t(as.matrix(phend[,1,include] - phend[,4,include]))
  cats = max(y) + 1
  m = apply(x,2,mean,na.rm=TRUE)
  x = t(t(x) - m)
  d = ncol(x)
  gamma = as.vector(table(y)/length(y))
  phi = log(gamma)
  Unum1 = .getUNum1Matrix(d,y,x,phi,num1,cats)
  U = Unum1[[1]]
  num1 = Unum1[[2]]
  II = .getIMatrix(d,phi,cats,num1,x)
  zstats  = U/sqrt(diag(-II))
  pvs = 2*(1-pnorm(abs(U/sqrt(diag(-II)))))
  overallp = 	1 - pchisq(t(U) %*% solve(-II) %*% U, d)
  results = cbind(c(zstats, NA),c(pvs, overallp))
  results
}
.ordTest1 <-
function(gvar,phend,families, inclu,covar,totweight,functi){
 if(dim(covar)[2]>0) stop('ord test not coded up for covariates, use residuals')
  include =  apply(as.matrix(phend[,2,]),2,min) & inclu & as.numeric(gvar[2,])>0 &  gvar[3,]
  y = as.numeric(gvar[1,include])
  x = t(as.matrix(phend[,1,include] - phend[,4,include]))
  resu  = .ordTest2(y,x)
  resu = cbind(resu, rep(dim(covar)[1],dim(resu)[1]))
  resu
}




.ordTest2 <-
function(y,x,log.p=FALSE){
  cats = max(y) + 1
  m = apply(x,2,mean,na.rm=TRUE)
  x = t(t(x) - m)
  d = ncol(x)
  gamma = as.vector(table(y)/length(y))
  phi = log(gamma)
  U = matrix(ncol=1,nrow=d)
  num1 = list(length=d)
  for( j in 1:d ){
    U[j] = sum(y*x[,j])
    num1[[j]]=0
    for( i in 1:cats ){
      num1[[j]] = num1[[j]] + x[,j] * (i-1) * exp(phi[i])
    }
    U[j,1] = U[j,1] - sum(num1[[j]])
  }

  II = matrix(ncol=d,nrow=d)
  num2 = matrix(ncol=d,nrow=d)
  for( j in 1:d ){
    for( k in j:d ){
      num = 0
      for( i in 1:cats ){
        num = num + (i-1)*(i-1)*x[,j]*x[,k]*exp(phi[i])
      }
      II[j,k] = sum(num1[[j]]*num1[[k]])
      II[j,k] = II[j,k] - sum(num)
      II[k,j] = II[j,k]
    }
  }
  zstats  = U/sqrt(diag(-II))
  pvs = 2*(pnorm(abs(U/sqrt(diag(-II))),lower.tail=F,log.p=log.p))
  chisqv = t(U) %*% solve(-II) %*% U
  overallp = 	pchisq(chisqv, d,lower.tail=F,log.p=log.p)
  results = cbind(c(zstats, NA),c(pvs, overallp))
  results
}

##modified cholesky to keep column order
.chol<-function(cor,pivot=T){
ch = chol(cor,pivot=pivot)
if(pivot){
	 ch = (ch[,match(1:dim(ch)[[2]],attr(ch,"pivot"))])
}
if(!is.matrix(ch)) ch = matrix(ch)
dimnames(ch) = dimnames(cor)
ch
}

.genRandCovar<-function(noPhenos, orthogAll = 0.5, betav = 5, orthog_i = rbeta(noPhenos,orthogAll*betav,(1-orthogAll)*betav), sd = rgamma(noPhenos,shape=10,rate = 10), resample = FALSE,cor1 = NULL,dirichletScale = 5){
if(min(orthog_i)<0) stop(paste("orthog must be <1",min(orthog_i)))
if(noPhenos==1) return(as.matrix(1))

M = array(0,dim = c(noPhenos,noPhenos))

M[1,1] = 1.0
for(i in 2:noPhenos){

     nonorthog = 1-orthog_i[i]
     if(is.null(cor1)){
         v1 = .sampDirichlet(rep(1,i-1),weight=dirichletScale)*nonorthog
	 v = c(v1,1-nonorthog,rep(0,noPhenos - i))
     }else{
	     cor1[i] = min(nonorthog,cor1[i])
             v1 = .sampDirichlet(rep(1,i-2),weight=dirichletScale)*(nonorthog-cor1[i])
	     v = c(cor1[i], v1,1- nonorthog,rep(0,noPhenos - i))
     }
     M[,i] = sqrt(v) * sample(c(-1,1),length(v),replace=T)
}
res = (t(M) %*% M )* outer(sd,sd)
pnames = paste("pheno",(1:noPhenos),sep="")
if(resample){ 
 ord = sample(1:noPhenos)
 res = res[ord,ord]
}
dimnames(res) = list(pnames,pnames)
res
}

.reopenConnection<-function(outC){
  files = outC$files
  for(k in 1:length(files)){
      if(!is.null(files[k])) {
         nme = attr(files[k],"name")
	 files[[k]]<- file(nme, 'a')
  	 attr(files[[k]],"name")<-nme
 	 attr(files[[k]],"first")<-FALSE
 	 #attr(files[[k]],"open")<-TRUE
   }
  }
  outC$files = files
  outC$open=TRUE
  outC
}

##Prepares output files and directories.
##limit is obtained from mPhen.readLimitFile
.openOutputConnection<-function(outFiles="results/",genName,write, plot, limits=NULL, regopts = NULL
  ){
 if(!is.null(regopts)){
    nme = paste(outFiles,"/.opts_",genName,sep='')
    nmes = names(regopts) 
    charc = which(lapply(regopts,class)=="character")
    for(k in charc){
     regopts[[k]] = paste("\'",regopts[[k]],"\'",sep="")
    }
    x =  apply(cbind(paste("opts$mPhen.",nmes,sep=""),regopts),1,paste,collapse="= ")
    for(k in x)     write(k,file = nme)

 }

   nme = paste(paste(outFiles,"/",genName,sep=''),names(write),sep=".")
   files = list()
   for(k in 1:length(write)){
      if(write[[k]]) {
	 files[[k]]<- file(nme[k], 'w')
  	 attr(files[[k]],"name")<-nme[k]
 	 attr(files[[k]],"first")<-TRUE
 	 attr(files[[k]],"open")<-TRUE
      }else {
	files[[k]] = NULL
      }
  }
  
 names(files) = names(write)[1:length(files)]
  nmesplot = names(plot)

  plotFiles = paste(paste(paste(outFiles,"/",genName,sep=''),nmesplot,sep=""),".pdf",sep="")

   
   if(!is.null(limits)){
 	outFileLimit=paste(outFiles,"/limit_",genName,".txt",sep='')
	  write.table(limits,file=outFileLimit,col.names=FALSE,row.names=FALSE,sep="\t",append=F,quote=F)
   }
   list(files = files, plotFiles=plotFiles,pvs=list(), beta = list(), sampleQC=NULL,open=TRUE)
}



#converts multi-dimensional output array into a printable table
.flatten<-function(results, extra=dimnames(results$Res)[[2]],extra_head="pos"){
 res = results$Results
 hwe = results$hwe
 if(is.null(hwe)) hwe = rep(NA,length(results$maf))
 maf = results$maf
 dn = dimnames(res)
 dims=dim(res)
 norows=dims[1]*dims[2]*dims[3]
 matr = .getArray(list(1:norows,c(extra_head,"strata","snpid","phen","maf","hwe","beta","pv","nobs")))
 n=1
 for(i in 1:dims[1]){
     for(j in 1:dims[2]){
      for(k in 1:dims[3]){ 
      line =         c(extra[j],dn[[1]][i], dn[[2]][j],dn[[3]][k],sprintf("%2.3g",c(maf[j],hwe[j],res[i,j,k,])))
          matr[n,] = line
	  n =n+1
      }
    }
 }
matr
}

## method for sub-sampling positions for bootstrap which 
## makes sure it preserves same imputed samples
.subSample<-function(samples, dim3){
  # samples = 1:dim1
  # if(resample) {
  #    samples = sample(samples, dim1,replace=TRUE)
  # }
  dim1 = length(samples)
   samples1 = rep(NA, dim3*length(samples))
   for(k in 1:dim3){
     samples1[(k-1)*(dim1) + (1:(dim1))] = (k-1)*dim1 + samples
   }
     return(samples1)
}





#converts abs(beta*X) to printable form
.flattenBetaX<-function(sumBetaX){
   dims=dim(sumBetaX)
   dn = dimnames(sumBetaX)
   nrows=dims[1]*dims[2]
   matr =.getArray(list(1:nrows,c("PATIENT",dn[[3]])))
   n=1
    for(i in 1:dims[1]){
    for(j in 1:dims[2]){

	matr[n,] = c(paste(dn[[1]][i],dn[[2]][j],sep="_"),round(10*sumBetaX[i,j,])/10)
	n=n+1
    }
   }
   matr
}

##formats table for printing
##table is a table of  numerical values. 
## format is a string in sprintf format (e.g. "%5.3f")
## replaces null values with '-' for ease of reading
.formatTable<-function(table,format,nullv=0,nullrepl = "   -    ",  firstColHeader="SNP"){
  if(!is.matrix(table)) table = t(as.matrix(table))
  nmes=dimnames(table)[[1]]
  dim = dim(table)
  res = array(dim = c(dim[1],dim[2]+1))
  res[,1] = nmes
  for(i in 1:dim[1]){
   for(j in 1:dim[2]){
     if(is.na(table[i,j])){
		res[i,j+1] = sprintf(format,table[i,j])
     }else if(table[i,j]==nullv){
	     res[i,j+1] =  nullrepl

     } else {
	res[i,j+1] = sprintf(format,table[i,j])
     }
   }
  }
  dimnames(res) = list(NULL,c(firstColHeader,dimnames(table)[[2]]))
  res
}


##function for printing out results of simulation with a defined correlation
##matrix cor, effect direction v and betaAll
.writeTable<-function(resultsJ,file, rsid =dimnames(resultsJ$Res)[[2]],  format = "%3.2e",betaAll=NULL, v=NULL,cor=NULL){
  append=!attr(file,"first")
  index = 1
  resu = resultsJ$Res
  strats = dimnames(resultsJ$Res)[[1]]
  len =  dim(resu)[2]
  nmes = dimnames(resu)[[2]]
  numSnps = dim(resu)[[2]]
  numPheno = dim(resu)[[3]]
  label = rep("pval",len)
  label1 = rep("beta",len)
  for(k in 1:length(strats)){
   strata = rep(strats[k],len)
   beta = resu[k,,,1]
   pvalue = resu[k,,,2]
   if(is.null(dim(pvalue))){
    pvalue = array(data = pvalue,dim = c(dim(resu)[2],ncol = dim(resu)[3]),dimnames = dimnames(resu)[2:3])
   }
   if(is.null(dim(beta))){
     beta = array(data = beta,dim = c(dim(resu)[2],ncol = dim(resu)[3]),dimnames = dimnames(resu)[2:3])
   }

   if(!is.null(betaAll)){
     pvalue = cbind(betaAll,pvalue)
     beta = cbind(betaAll,beta)
  }
  write.table(file=file,cbind(strata,label,rsid,.formatTable(pvalue,format,nullv=1.0,nullrepl="   1    ")),row.names=F,quote=F,append=append, col.names=!append)
label = rep("beta",len)
 write.table(file=file,cbind(strata,label,rsid,.formatTable(beta,format,nullv=1.0,nullrepl="   1    ")),row.names=F,quote=F,append=append, col.names=FALSE)

  }
}

##Writes CNV genotype information in a bed format
## input is object returned by mPhen.readGenoConnection
## output is object returned by mPhen.openOutputConnection
##pheno_ is object returend by mPhen.preparePheno
##bed output is split into different groups based on the pheno values in pheno_, according to pre-specified quantiles
.writeBed<-function(pheno_,input,output,index, .quantiles = c(1/3,2/3,1),.base=getOption("mPhen.baseValue",2),
nme=paste(getOption("mPhen.resultsName","result"),index,sep="."),
phenN = pheno_$phenN){

 fil1=file(paste(output$outFileBed,nme,"bed",sep="."),'w')
  g0 = input$genoData
 cols = .makeColsForBed()
 pos = t(as.matrix(data.frame(strsplit(dimnames(g0)[[2]],"_"))))
 if(length(levels(as.factor(pos[,1])))>1) stop('only one chrom at time')
 chr = pos[1,1]
 starts = as.numeric(pos[,2])
 minpos = input$firstPos-1000
 lastP = max(starts[length(starts)]+1000,input$lastPos)
 maxpos = lastP +1000
 linestart=paste("browser position ",paste("chr",chr,sep=""),":",minpos,"-",maxpos,sep="")
    write(linestart,file=fil1,append = FALSE)
 nmes = dimnames(g0)[[1]]
 for(k1 in 1:length(phenN)){
 # phenN = phenN[k1]
    k = which(pheno_$phenN==phenN[k1])
    typenme = phenN[k]
 phend = pheno_$phendata[k,1,]
 quantiles = unique(quantile(phend,.quantiles))
 counts = rep(0,length(quantiles))
 prevj = -Inf
 for(j in 1:length(counts)){
  indsj = which(phend<=quantiles[j] & phend>prevj)
  if(length(indsj)>0){
  
    counts[j] = length(indsj)
    prevj = quantiles[j]
    if(j==1) dirnme = "Controls" 
    else if(j==length(quantiles)) dirnme = "Cases" 
    else dirnme = paste("Q",quantiles[j],sep="")
    line1 = "track name=\"Controls\" description=\"Controls\" itemRgb=\"On\"" 
     trackname = paste(nme,typenme,dirnme,sep="_")
    line1=gsub("Controls",trackname, line1)
 
    write(line1,file=fil1,append=T)

    for(k in indsj){

     arr1 = .getCNProfile(g0[k,],starts,lastP,.base,cols,nmes[k],paste("chr",chr,sep=""))
      write.table(arr1,row.names=F,col.names=F,quote=F,sep="\t",file=fil1,append=T)
    }
    }
 }
}
if(FALSE){
  snps = pos
  bedSnp = cbind(rep(paste("chr",chr,sep=""),dim(snps)[1]),snps[,2],snps[,2])
   typenme = paste("expt","probes",sep="_")
   dirnme = ""
   line1 = "track name=\"Controls\" description=\"Controls\" itemRgb=\"On\"" 
   line1=gsub("Controls",paste(typenme,dirnme,sep="_"), line1)
   write(linestart,file=fil1,append = T)
   write(line1,file=fil1,append=T)
   write.table(bedSnp,row.names=F,col.names=F,quote=F,sep="\t",file=fil1,append=T)
}
stratNames=1:length(output$pvs)
 for(kk in 1:length(output$pvs)){
  topl = output$pvs[[kk]]
  beta = output$beta[[kk]]
  stratN = stratNames[kk]
  .manhattanBed(topl,beta, fil1,paste("pvals",nme,stratN,sep="_"))
  
  }
close(fil1)
}


.summariseBootstrap<-function(res, thresh = 0.05){
resAll = array(0,dim = dim(res[[1]]$Res), dimnames = dimnames(res[[1]]$Res))
for(k in 1:(dim(resAll)[[2]])){
  for(j in 1:length(res)){
    summ = which(res[[j]]$Res[,k,,2]<thresh)
    resAll[,k,summ,2] =  resAll[,k,summ,2]+1
    if(j==1) resAll[,k,summ,1] =  res[[j]]$Res[,k,summ,1]
    else     resAll[,k,summ,1] =  resAll[,k,summ,1]+res[[j]]$Res[,k,summ,1]
  }
  for(m in 1:(dim(resAll)[[3]])){
    resAll[,k,m,1] = resAll[,k,m,1]/resAll[,k,m,2]
  }
   resAll[,k,,2] = resAll[,k,,2] / length(res)
}
resAll[,,,3] = res[[1]]$Res[,,,3]
resAll
}


.parseOpts<-function(){
args = commandArgs()
argsst=which(args=="--args")
if(length(argsst)>0){
  if( argsst <= length(args)){
  args = args[(argsst+1):length(args)]
  print(args)
  eqind = grep("=",args)
  if(length(eqind)>0){
    argsM = as.matrix(data.frame(strsplit(args[eqind],"=")))
    toassign = as.list(argsM[2,])
    names(toassign) = argsM[1,]
    options(toassign)
  }
 }
}
}


.getLimitMatrixFromList<-function(l1){
  limit = cbind(rep('pheno', .length(l1$phenotypes)), l1$phenotypes)
  if(.length(l1$covariates>0)) limit = rbind(limit, cbind(rep('covar', .length(l1$covariates)), l1$covariates))
  if(.length(l1$resids>0)) limit = rbind(limit, cbind(rep('resid', .length(l1$resids)), l1$resids))
  if(.length(l1$strats>0)) limit = rbind(limit, cbind(rep('strat', .length(l1$strats)), l1$strats))
  if(.length(l1$excls>0)) limit = rbind(limit, cbind(rep('excl', .length(l1$excls)), l1$excls))
  limit
}

.readLimitFile<-function(limitfile="./limit.txt", phenNames=NULL){
 todo_ = read.table(limitfile,as.is=T,fill=T,header=F,comment.char='')[,1:3,drop=F]
 hashinds = grep('^#',todo_[,1])
 if(length(hashinds)>0)   todo_ = todo_[-hashinds,,drop=F]
  ind2 = grep(':',todo_[,2])
  while(length(ind2)>0){
        k= ind2[1]
	ln = strsplit(todo_[k,2],':')[[1]]
	#pref = strsplit(ln[1],"[0-9]")[[1]][1]
        #indin = nchar(pref)
        #if(indin>0) ln = substr(ln,start=(indin+1),stop=200)
        if(nchar(ln[[1]])==0)  newt=phenNames[as.numeric(ln[2]):as.numeric(ln[3])]
        else  newt=paste(ln[[1]],as.numeric(ln[2]):as.numeric(ln[3]),sep="")
     
        todo_ = rbind(as.matrix(todo_[-k,]),as.matrix(cbind(todo_[k,1],newt,todo_[k,3])))
        ind2 = grep(':',todo_[,2])
  }
  ind2 = grep('\\*',todo_[,2])
  while(length(ind2)>0){
        k= ind2[1]
	ln = strsplit(todo_[k,2],'\\*')[[1]][1]
        newt = grep(ln,phenNames,value=T)
        todo_ = rbind(as.matrix(todo_[-k,]),as.matrix(cbind(todo_[k,1],newt,todo_[k,3])))
        ind2 = grep(':',todo_[,2])
  }

  todo_ = unique(todo_)##
  
  return(todo_)
#  todo_ = .fixDuplicatesInTodo(todo_)
#  print(todo_)
#outn= .getSuffix(todo_)
#  outn=substr(outn,1,50)
#list(todo = todo_,outn =outn)
}




## updates sample qc based on current set of results
.updateSampleQC<-function(genoData,results,sampleQC = NULL, pvthresh =0.01,maf_thresh = 0.05, hwe_thresh = 1e-4){
   res1 = results$Results
   sumBetaXNew =.getSumBetaX(genoData,res1,results$maf,results$hwe,pvthresh = pvthresh,maf_thresh = maf_thresh,hwe_thresh = hwe_thresh)
   if(is.null(sampleQC)) sampleQC = sumBetaXNew else sampleQC= sampleQC+sumBetaXNew
   sampleQC
}


#calculates sum(abs(beta*x)) for each sample over all snps to identify
#individuals driving the result
## not used if  using inverse regression
.getSumBetaX<-function(genD,res,maf,hwe,pvthresh=0.05,maf_thresh=0.05, hwe_thresh = 1e-4){
 sumBetaX=NULL;
 nosnp = length(dimnames(res)[[2]])
 if(is.null(maf)) maf = rep(0,nosnp)
 if(is.null(hwe)) hwe = rep(1,nosnp)
  geno_header = dimnames(genD)[[1]]
 if(length(dim(genD))>2) genoData=.getAvgAll(genD)
 else genoData = genD
  if(!is.null(maf) & ! is.null(hwe)){
  li = dimnames(res)[c(1,3)]
  li[[3]] = geno_header
  sumBetaX = .getArray(li,v=0)
   dm = dim(sumBetaX)
      for(k in 1:nosnp){
	    nonNAind = !is.na(genoData[,k])
       if( maf[k] < maf_thresh & hwe[k] > hwe_thresh){
       for(i in 1:dm[1]){
        for(j in 1:dm[2]){
           pv = res[i,k,j,2]
           beta = res[i,k,j,1]
	if(!is.na(beta)){
 	   if(pv<pvthresh){
                 sumBetaX[i,j,nonNAind] = sumBetaX[i,j,nonNAind]+abs(beta*(genoData[nonNAind,k]))
              	   }
          }
       }
       }

      }
  }
 }
  sumBetaX
}

  .sample1<-function(probv)  sample(0:1, 1, prob = probv)

##simply samples from a dirichlet with probability distribution p
## and weight w.
.sampDirichlet<-function(p,weight=1){
alpha = p*weight
scale = 1
vec = rep(0,length(alpha))
for(i in 1:length(vec)){
  vec[i] = rgamma(1,shape=alpha[i],scale=scale)
}
vec = vec/sum(vec)
vec
}



###HAS BEEN MODIFIED 29-11
##calculates correlation matrices and effect sizes
#beta0 is expressed as proportion of covar[1,1,]
#v is a rotational vector such that after rotation main effect is in direction of rotation
#returns cholesky decomposition of rotated and non-rotated phenotypes
## phenos is a simulated dataset from the covariance matrix, unless specified (in which case cov has no effect and can be NULL)
.cholesky<-function(cov, .nsamp = 1e5,      
    v =NULL,
    phen = if(is.null(v))NULL else  .sampJoint(.chol(cov2cor(cov),pivot=T),num=.nsamp,sd=sqrt(diag(cov))),
    means= if(is.null(phen)) rep(0,dim(cov)[[2]]) else apply(phen,2,mean))
 { 
   if(!is.null(v)){
       nonzero = which(v!=0)
       if(length(nonzero)==0) v = NULL
       else if(length(nonzero)==1 & nonzero[1]==1) v= NULL
   }
   if(is.null(v)){
      M = diag(rep(1,dim(cov)[2]))
      covT = cov
   }else{
     M = .gs(v) 
  #   Minv = t(M)
 ### note v %*% M is on x axis
     phenosT = phen %*% M  ###t(Minv%*%t(phen))
     covT = cov(phenosT,use="complete.obs")
   }
    corT = cov2cor(covT)
    dimnames(corT) = list(dimnames(cov)[[2]],dimnames(cov)[[2]])
    cholT = .chol(corT,pivot=T)
    list(cholT = cholT,  Minv = t(M),sdT = sqrt(diag(covT)),means=means,corT=corT)
}


.pow<-function(x,base=10,normalise=TRUE,multFactor=1000){
   v = (10^as.numeric(x))
   if(normalise) v = v/sum(v)
   if(multFactor>1){
	 v = round(v*multFactor)
	 maxind = which(v==max(v))
	 v[maxind] = v[maxind] + multFactor - sum(v)
   }
   return(v)
}

.unchanged<-function(v1) as.numeric(v1)
.cnvdistr<-function(v1){
 cn = rep(0,5)
 for(k in seq(1,length(v1),2)){
    p = as.numeric(v1[k+1])
    CN =nchar(sub(",","",v1[k]))+1 
    cn[CN] = round(1000*p)
 }
 cn
}
  
#  if(length(v)==3){
#     v = v[!(v==min(v))]
#   }
#   v
#}



.bafratio<-function(v) v[2]/(v[2]+v[1])
.bafratio<-function(v) v[2]/(v[2]+v[1])

.plotBaf<-function(geno,  usesum=FALSE, thresh = 50){
x = as.numeric(as.matrix(data.frame(strsplit(dimnames(geno)[[2]],'_')))[2,])
sumy = apply(geno,c(1,2),sum)
if(usesum) { plot(x,sumy)
}else{
  y = apply(geno,c(1,2),.bafratio)
  
 plot(x[sumy>thresh],y[sumy>thresh])
}
}

##FOR SIMULATION ######
##Aim is to project out all variations corresponding to vars variables (this is just indices)
## from the matrix Dall.
## removes all variation in P from Dall
## returns the vector v such that Dall - P%*%v is orthogonal to all columns
## in P 
.projOut<-function(Dall, vars = NULL, P = Dall[,vars,drop=F]){
  ncol = dim(Dall)[[2]]
  nrow = dim(Dall)[[1]]
  svd = svd(P)
  U = svd$u
  V = t(svd$v)
  if(dim(P)[2]==1) {
      D = as.matrix(svd$d)
  }else {
      D = diag(svd$d)
  }
  Vinv = solve(V)
  Dinv = solve(D)
  colU = dim(U)[[2]]
  proj = matrix(0,nrow = dim(U)[[2]],ncol = ncol)
  for(j in 1:ncol){ 	
	 for(k in 1:colU){
           proj[k,j] = U[,k] %*% Dall[,j]
	}	
  }  
  Vinv %*% Dinv %*% proj
}

## returns an orthogonal matrix M, such that 
## M %*% v  = c(v%*%v,0,0,0)
.gs<-function(v, thresh = 1e-8){
 ###res = svd(cbind(v,basis))$u  ### not in same direction
 v1 = v/sqrt(v%*%v)
 basis=cbind(v1,diag(rep(1,length(v))))
 W = .projOut(basis,c(1))
 basis1 = basis - basis[,1,drop=F] %*% W
 svd = svd(basis1[,-1,drop=F])
 res = cbind(v1,svd$u[,svd$d>thresh,drop=F])
 res
 ###t(res) ## this is same as inverse as orthonormal
}

#basic function for simulating effect of genotype of phenotype in a linear direction
.sampJoint<-function(ch,x=NULL,num=length(x),varexp=0,
                ncol =  dim(ch)[2],
		means=rep(0,ncol),
			sd = rep(1,dim(ch)[2])){

	
        res = matrix(rnorm(num*ncol,0,1),nrow =num, ncol = ncol)
        if(!is.null(x) & varexp>0){
             beta = sqrt(((varexp/(1-varexp)))/var(x))
             res[,1] =res[,1] + x*beta
        }
	res = res %*% ch     
              #t(t(ch)%*%t(z1))

        for(i in 1:(dim(res)[2])){
          res[,i] = .destandardise(res[,i],means[i],sd[i])
	}

        dimnames(res)[[2]] =  dimnames(ch)[[2]] 
res
}

##END FOR SIMULATION ### 


#### FOR QQ PLOTTING MULTIPLE VARIABLES ###


.qb<-function(np,conf,ntot,sigThresh=1.0){
  logp = getOption("mPhen.log10p",FALSE)
  Obs = rep(0,np)
  if(logp) sigThresh = 10^sigThresh;
  for(s in 1:np)
              {
	        prop = (s/(np+1))*sigThresh
                s1 = ceiling(prop*(ntot+1))
                Obs[s]=qbeta(conf, s1, ntot-s1+1)
              }
 if(logp) return(log10(Obs))
else return( Obs)
}


.getPv<-function(pvs1,effPhe, opts, beta=NULL){
logP = getOption("mPhen.log10p",FALSE)
.sigThresh =opts$mPhen.sigThresh
.limitP = opts$mPhen.limitP
if(logP){
	 .limitP = log10(.limitP)
	.sigThresh = log10(.sigThresh)
}
pool=opts$mPhen.pool
pvs = pvs1
dimp = dim(pvs)
nosnp = dimp[[1]]
nophe = dimp[[2]]
np = nosnp*nophe
ntot = effPhe*nosnp
pv = matrix(0,nrow = np,ncol=7)
nmes = rep(dimnames(pvs)[[1]],nophe)
dimnames(pv)= list(nmes,c("pv","index",0.5,0.95,0.05,0.5,"beta"))
for(i in 1:nophe){
   indexes = ((i-1)*nosnp+1):(i*nosnp)
   pv[indexes ,2] = rep(i,nosnp)
   pv[indexes ,1] = pvs[,i]
   if(!is.null(beta)) pv[indexes, 7] = beta[,i]
}
pv = pv[!is.na(pv[,1]),,drop=F]
pv = pv[pv[,1]<=.sigThresh,,drop=F]
pv[pv[,1]<.limitP,1] = .limitP
pv = pv[order(pv[,1]),,drop=F]
np = dim(pv)[[1]]
if(pool){
 pv[,3] = .qb(np,0.5, ntot,.sigThresh)  #(1:np)/(np+1)
 pv[,4] = .qb(np,0.95, ntot,.sigThresh)
 pv[,5] = .qb(np,0.05, ntot,.sigThresh)
}else{
 for(i in 1:nophe){
        indexes = which(pv[,2]==i)
	nosnp1 = length(indexes)
    	 pv[indexes,3] = .qb(nosnp1,0.5, nosnp1,.sigThresh)  #(1:np)/(np+1)
         pv[indexes,4] = .qb(nosnp1,0.95, nosnp1,.sigThresh)
         pv[indexes,5] = .qb(nosnp1,0.05, nosnp1,.sigThresh)
    }
}
pv[,6] = pv[,3]

pv
}
#.getPvAll<-function(pvs1,effPhe,.sigThresh=1.0){
#  pv = .getPv(pvs1[,1,drop=F],.sigThresh)
#  for(k in 2:(dim(pvs1)[[2]])){
#    pv1 = .getPv(pvs1[,k,drop=F],.sigThresh)
#    pv1[,2] = rep(k,dim(pv1)[[1]])
#    pv = rbind(pv,pv1)
#  }
#  pv=pv[order(pv[,1]),]
#  pv
#}


.manhattanBed<-function(pvs1,logfun,beta, title,fil1,opts){
effPhe = 1.0
 order = order(apply(pvs1,2,min,na.rm=T))
pv <-.getPv(pvs1,effPhe, opts,beta)
if(dim(pv)[[1]]>0){
nophe = dim(pvs1)[[2]]
pos_info = as.matrix(((data.frame(strsplit(dimnames(pv)[[1]],"_")))))
pos=as.numeric(pos_info[2,])
end=as.numeric(pos_info[3,])
chroms=pos_info[1,]
chromsl = levels(as.factor(chroms))
chr = paste("chr",chromsl[1],sep="")
if(length(chromsl)>1) stop('only works for one chrom')
 for(i in 1:nophe){
       subind = pv[,2]==order[i]
       data = pv[subind,1]
       posj = pos[subind]
       endj = end[subind]
       betaj = pv[subind,7]
    line1 = "track type=bedGraph name=\"Controls\" description=\"Controls\" itemRgb=\"On\"  graphType=bar" 
    line1=gsub("Controls",paste(title,dimnames(pvs1)[[2]][order[i]],sep="_"), line1)
 
    write(line1,file=fil1,append=T)
    for(j in 1:length(data)){
     
      logpv = -logfun(data[j])
      if(betaj[j]>0) logpv = -logpv
      posjj = posj[j]
      write(paste(chr,posjj,endj[j],sprintf("%5.3g",logpv),sep=" "),file=fil1,append=T)
    }
 
 }

}
}



### Assumes snp names have format chr_pos
.manhattan<-function(pvs1,logfun, beta1, title=expression(paste("-log10","Manhattan")),opts){
 order = order(apply(pvs1,2,min,na.rm=T))
 effPhe = 1
step=opts$mPhen.noPhenosPerPlot
chromsToDo = opts$mPhen.chromsToDo
epsilon = opts$mPhen.epsilon
pv =.getPv(pvs1,effPhe,opts, beta1)
if(dim(pv)[[1]]>0){
nophe = dim(pvs1)[[2]]
ymax=max(8,logfun(min(pv[,1])))
norep = ceiling(nophe/step)
pos_info = as.matrix(((data.frame(strsplit(dimnames(pv)[[1]],"_")))))
#print(pos_info)
#print(dimnames(pv))
pos=as.numeric(pos_info[2,])
chroms=pos_info[1,]
toincl = !is.na(pos)
if(length(which(toincl))==0){
  pos = 1:length(pos)
  toincl = rep(TRUE,length(pos))
}
if(!is.null(chromsToDo)){
  toincl = chroms%in%chromsToDo & toincl
}

  pos_info = pos_info[,toincl]
  pos= pos[toincl]
  chroms = chroms[toincl]
  pv = pv[toincl,]

chromsl = levels(as.factor(chroms))

if(length(chromsl)>1){
for(k in chromsl){
    indk= which(chroms==k)
    posk = pos[indk]
    mink = 0  #
    maxk=max(posk)
    pos[indk] = ((posk - mink)/(maxk-mink))*(1-2*epsilon)+epsilon

}

   if(length(grep("_",chroms))>0){
     chroms = as.matrix(((data.frame(strsplit(chroms,"_")))))[1,]
   }
   chroms[chroms=="X"]=23
   chroms[chroms=="Y"]=24
   chroms[chroms=="XY"]=25
   chroms = as.numeric(chroms)
   pos = pos + chroms   
}

xstart = min(pos)
xend = max(pos)
xstart = xstart  - 0.1*(xend-xstart)

for(jk in 1:length(chromsl)){
 for(kk in 1:norep){
 start = ((kk-1)*step) 
 index=(start+1):min(kk*step,nophe)
 for(i in index){
       subind = pv[,2]==order[i]
       data = pv[subind,1]
       chrs = chroms[subind]
       subind1 = which(chrs==chromsl[jk])
         col = jk
         if(!opts$mPhen.colourByChroms) col = i-start
       if(jk==1 & i==(start+1)){
	 plot(pos[subind][subind1],logfun(data[subind1]),   pch=i-start, cex=opts$mPhen.cex,col=col,type="p",
		xlim = c(xstart,xend),ylim=c(0,ymax), ylab=expression(paste("-log"[10]," (Expected p-value)")), 
	   xlab=paste("Position on",pos_info[1,1]), main=title)
       }
       else  lines(pos[subind][subind1],logfun(data[subind1]),   pch=i-start, cex=opts$mPhen.cex,col=col,type="p")
 
 }
txtsize = min(0.5, 25/length(index))
legend("topleft", legend =dimnames(pvs1)[[2]][order[index]],cex=txtsize,col=(1:step),pch=1:step)
}
}
}
}



.qqplot<-function(pvs1,effPhe, logfun, beta1, title=expression(paste("-log"[10],"QQ")),opts){
 order = order(apply(pvs1,2,min,na.rm=T))
pv =.getPv(pvs1,effPhe, beta=NULL, opts)
step = opts$mPhen.noPhenosPerPlot
nophe = dim(pvs1)[[2]]


 ymax=logfun(min(pv[,1]))
 xmax=logfun(min(pv[pv[,3]>0,3]))

xmax = min(50,xmax)
norep = ceiling(nophe/step)
if(length(pv[,1])==0){
  xmax = 1
  ymax = 1
}
xmax = max(xmax,ymax)
ymax = xmax
for(kk in 1:norep){
 ## first plot all invisibly
 plot(logfun(pv[,c(3,1)]),xlim = c(0,min(opts$mPhen.minlogpv,xmax)), ylim=c(0,min(opts$mPhen.minlogpv,ymax)), xlab=expression(paste("-log"," (Expected p-value)")), ylab=expression(paste("-log"," (Observed p-value)")), main=title, pch=20, cex=0.5,col=0)

 lines(logfun(pv[,6]), logfun(pv[,6]), col="grey", lw=2)
 lines(logfun(pv[,4]), logfun(pv[,3]), col="grey", lw=2)
 lines(logfun(pv[,5]), logfun(pv[,3]), col="grey", lw=2)
 start = ((kk-1)*step) 
 index=(start+1):min(kk*step,nophe)
 for(i in index){
       data = pv[pv[,2]==order[i],c(3,1),drop=F]
       lines(logfun(data),   pch=i-start, col=i-start,type="p",cex=opts$mPhen.cex)
 }

txtsize = min(0.5, 25/length(index))
legend("bottomright", legend =dimnames(pvs1)[[2]][order[index]],cex=txtsize,col=(1:step),pch=1:step)
}
}


## Makes a QQ plot from a table of pvalues.  Each column is pvalue from a different phenotype, and each row is a different marker
##pvs can also be a list of pvalue tables (e.g. by strata), in which case multiple qqplots are calculated
# dimnames of table is in format ("position\tchr")
.qq<-function(pvs,effPhe,logfun,
              opts =list( 
                         sigThresh=getOption("mPhen.sigThresh",1.0),
                         step=getOption("mPhen.noPhenosPerPlot",20),
                         limitP = 1e-200,
                         title = "",
                         minlogpv=10,
                         cex=0.5,pool=TRUE))
{
  if(!is.list(pvs)) pvs = list(pvs)
  stratNames=names(pvs)
  if(is.null(stratNames)) stratNames = 1:length(pvs)
  for(kk in 1:length(pvs)){
    topl = pvs[[kk]]
    .qqplot(topl,effPhe,logfun,beta1 = NULL,  paste(opts$mPhen.title,stratNames[kk],"logQQ Pool = ",opts$mPhen.pool, "effPhe ",effPhe),opts = opts)
  }
}

## Makes a manhattan plot from a table of pvalues.  Each column is pvalue from a different phenotype, and each row is a different marker
##pvs can also be a list of pvalue tables (e.g. by strata), in which case multiple qqplots are calculated
##dimnames(pv)[[1]]) must be of  format ("position\tchr")
## only p-values less than .sigThresh are plotted
## only 'step' phenotypes are plotted on each manhattan plot
#.sigThresh=.sigThresh,
.manh<-function(pvs,effPhe,logfun, opts = list(
                                               sigThresh=getOption("mPhen.sigThresh",1.0),
                                               step=getOption("mPhen.noPhenosPerPlot",20),
                                               limitP = 1e-200,
                                               title="", 
                                               minlogpv=10,
                                               cex=0.25,
                                               chromsToDo = NULL,colourByChroms=FALSE))

{
  if(!is.list(pvs)) pvs = list(pvs)
  stratNames=names(pvs)
  if(is.null(stratNames)) stratNames = 1:length(pvs)
  for(kk in 1:length(pvs)){
    topl = pvs[[kk]]
    stratN = stratNames[kk]
    .manhattan(topl,logfun, beta1=NULL, paste(opts$mPhen.title,stratN,"Manhattan"),opts = opts)
  }
}

##pv heatmap fuction
.heatm<-function(pvs,effPhe, logfun, opts =list(title="",indexMatch = NULL, Colv=TRUE),symbreaks = FALSE, file ="", xlab="phenotypes",ylab="snp"){
  if(!is.list(pvs)) pvs = list(pvs)
  title=opts$mPhen.title
  if(symbreaks) col = c(rev(colorRampPalette(brewer.pal(9,"Reds"),bias = 0.5)(250)), colorRampPalette(brewer.pal(9,"Blues"), bias = 0.5)(250))
  else col=colorRampPalette(brewer.pal(9,"Blues"))(250)
  Colv=opts$mPhen.Colv
  indexMatch = opts$mPhen.indexMatch
  dendro = Colv
  for(k in 1:length(pvs)){
    main = paste(title,names(pvs)[[k]], if(symbreaks) "beta" else "pv")
    pvm = pvs[[k]]
    if(is.null(pvm)){
      return(NULL)
    }
    if(!is.matrix(pvm)) pvm = as.matrix(pvm)
    heatMatrix66 = logfun(t(pvm))
    d = dim(heatMatrix66)
    if(d[1]==1) heatMatrix66 = rbind(heatMatrix66,rep(0,d[2]))
    d = dim(heatMatrix66)
    if(d[2] ==1) heatMatrix66 = cbind(heatMatrix66,rep(0,d[1]))  #1e-10*runif(d[1]))
    d = dim(heatMatrix66)
    #if(k==1){
     #if(!is.null(Colv) & (is.numeric(Colv) | is.logical(Colv))){
      # if(!is.na(Colv)){
      #  naind = length(which(!is.na(as.vector(heatMatrix66))))
      #   if(Colv & naind==0) dendro<-NA
      # }
      #}

    #}
    dn = dimnames(heatMatrix66)
    cols66<-rep("white",ncol(heatMatrix66))
    if(!is.null(indexMatch)){
      indexChange<-grep(indexMatch,colnames(heatMatrix66))
      cols66[indexChange]<-"red"
    }
    cexCol = 0.5*min(1,0.2 + 1/log10(d[1]))
    cexRow =  0.5*min(1,0.2 + 1/log10(d[2]))
 
      p3 <- tryCatch(heatmap(t(heatMatrix66),Rowv=NA,
                    na.rm=TRUE,
                    na.color=getOption("mPhen.nacolor","black"), 
                    Colv=dendro, col = col,dendrogram = "column",
                    density.info="none",trace="none",xlab=xlab,ylab=ylab,key=T,margins=c(8,8),
                    RowSideColors = c(cols66),scale="none",cexRow = cexRow,cexCol=cexCol,main = main,
                    symbreaks = symbreaks),error = function(e) NULL)
    #legend(x="topleft",legend="-log(p)")
     if(!is.null(p3)){

      #  print(heatMatrix66)
    if(symbreaks) legend(x="topleft",legend="beta")
    else legend(x="topleft",legend="-log(p)")
    #legend(x="bottomleft",legend="1a",bty="n",cex=2.5)
    if(k==1)dendro = p3$colDendrogram
    }else{
#	print(heatMatrix66[1:5,1:5])	
	heatmap(t(heatMatrix66),Rowv=NA,
                    na.rm=TRUE,
                    na.color=getOption("mPhen.nacolor","black"), 
                    Colv=NA, col = col,dendrogram = "none",
                    density.info="none",trace="none",xlab=xlab,ylab=ylab,key=FALSE,margins=c(8,8),
                    RowSideColors = c(cols66),scale="none",cexRow = cexRow,cexCol=cexCol,main = main,
                    symbreaks = symbreaks)

#	print(dendro)
	warning(paste('unable to make heatmap',file[1], main[1]))	
   }
  }
  invisible(dendro)
}

##fingerprint plot
.fprint<-function(genD,
                  Res,
                  opts,
                  snpinds = 1:(dim(genD)[[2]]),
                  phenoinds =1:(dim(Res$Results)[[3]]), 
                  plot=TRUE){
  colsc = 2  
  #colsc = opts$mPhen.colsc
  res = Res$Results[,,phenoinds,,drop=F]
  maf = Res$maf
  nosnp = length(snpinds)
  geno_header = dimnames(genD)[[1]]
  if(length(dim(genD))>2) genoData=.getAvgAll(genD)   else genoData = genD
  li = dimnames(res)[c(1,2,3)]
  li[[2]] = dimnames(res)[[2]][snpinds]
  li1 = dimnames(res)[c(1,3)]
  li[[4]] = geno_header
  li1[[3]] = geno_header
  sumBetaX = .getArray(li1,v=0)
  intens = .getArray(li,v=0)
  dm = dim(sumBetaX)
  pvs = as.vector(res[,snpinds,,2])
  inds = which(pvs < 0.05)
  if(length(inds)==0){
    sig = colsc*max(abs(as.vector(res[,snpinds,,1])),na.rm=T)  # 
  }else{
    sig = colsc*max(abs(as.vector(res[,snpinds,,1]))[pvs<0.05],na.rm=T)  # 
  }
  sig = max(0.1,sig) 
  colsall = c()

  for(k1 in snpinds){
    k = snpinds[k1]

    nonNAind = !is.na(genoData[,k])
    gx = genoData[nonNAind,k]
    levs = levels(as.factor(gx))
    for(i in 1:dm[1]){
      for(j in 1:dm[2]){
        beta = res[i,k,j,1]
        if(is.na(beta)) beta =0
        sumBetaX[i,j,nonNAind] = beta*gx + sumBetaX[i,j,nonNAind] 
        intensj  =2*(pnorm(as.numeric(levs)*abs(beta),0,sig)-0.5)
        colj = if(beta<0) rgb(0,intensj,0) else rgb(intensj,0,0)
        colsgx = rep(0,length(gx))
        for(jj in 1:length(levs)){
          indjj = gx==levs[jj]
          colsgx[indjj] = jj + length(colsall)
        }
        intens[i,k1,j,nonNAind] = colsgx 
        colsall = c(colsall,colj)            
      }
    }
  }
  resuall = list()
  m = 1
  dmn = dimnames(sumBetaX)
  for(i in 1:dm[1]){
    for(j in 1:dm[2]){
      risk  = sumBetaX[i,j,]
      betas = res[i,snpinds,j,1]
      pvs = res[i,snpinds,j,2]
      ord = rev(order(risk))
      gt = betas>0
      lt = betas<=0        
      ordy = c(which(lt)[(order(pvs[lt]))], which(gt)[rev(order(pvs[gt]))])
      intensij = intens[i,,j,] 
      if(is.null(dim(intensij))) intensij = matrix(intensij,ncol = dim(genD)[[1]],nrow = length(snpinds))
      resu = list(risk =risk,cols=colsall,intens = intensij,
                  ord = ord, ordy = ordy,
                  ynme = dimnames(genoData)[[2]][snpinds], xnme = dimnames(genoData)[[1]], nme =paste(dmn[[1]][i], dmn[[2]][j],sep="_"))
      resuall[[m]] = resu
      m = m+1
    }

  }
  if(plot) {
    .plotFingerprint(resuall)
  }
  invisible(resuall)
}



.plotFingerprint<-function(resuall){
for(i in 1:length(resuall)){
resu = resuall[[i]]
nmes = resu$ynme
risk = resu$risk
intens = resu$intens
cols = resu$cols
ord = resu$ord
ordy = resu$ordy
nmey = resu$ynme
dm = dim(intens)
    x = t(intens)[ord,ordy,drop=F]
    dmx = dim(x)
 if(length(nmes)==dmx[2]) {
 par(mfrow = c(2,1))
    image(1:dmx[1],1:dmx[2],x,col = cols,main = resu$nme,xlab="indiv", ylab="snps",col.axis='white')
   axis(4,1:dmx[2], nmes,cex.axis = 0.5)
    plot(1:dmx[1], exp(risk[ord])/(1+exp(risk[ord])))
 }
}
}




##USED IN MAKING BED



.makeColsForBed<-function(){
red = c(255,0,0)
blue = c(0,0,255)
maxCN=4
minCN=0
cols = list()
for(i in minCN:maxCN){
  p = (maxCN-i)/(maxCN-minCN)
  colv = red * p +  blue * (1-p) 
  cols[[i+1]] = paste(colv,collapse=",")
}
cols[[1]] =  "250,218,221"
cols[[2]] = "255,0,0"
cols[[3]] = "0,0,0"
cols[[4]] = "0,255,0"
cols[[5]] = "0,0,255"

names(cols) =minCN:maxCN
cols
}

.getCNProfile<-function(row,starts, end,.base,cols,nme,chr){
 start = starts[1]
 row0 = row[2:length(row)]
 row1 = row[1:(length(row)-1)]
 br = which(row0!=row1)
 res = array(0,dim = c(1+length(br),9))
 st=1
   col = cols[[round(1+row[st] +.base)]]
 cn = rep(0,1+length(br))
 if(length(br)>0){
  for(k in 1:length(br)){
   en = br[k]+1 
   end1 = starts[en]
   cn[k] = round(row[st] )
   res[k,] = (c(chr,start,end1,nme,1000,'+',  start,end1,col))
   st = en
   start = starts[st]
   col = cols[[1+round(row[st] +.base)]]
 }
 }

  res[length(br)+1,] = (c(chr,start,end,nme,1000,'+',  start,end,col))
  cn[length(br)+1] = round(row[st])
 res[cn!=0,,drop=F]
}



###END PLOTTING ##


####COMMANDS FOR MULTIPHEN-BIGP ###

.orthogonalise<-function(pheno){
W = diag(rep(1,dim(pheno)[2]))
pheno1 = array(dim = dim(pheno))
pheno1[,1] = pheno[,1]
for(k in 2:(dim(pheno)[[2]])){
  ind = apply(pheno[,1:k,drop=F],1,.cntNa)==0
  W[1:(k-1),k] = -.projOut(pheno[ind,k,drop=F],P = pheno[ind,1:(k-1),drop=F])
  pheno1[,k] = pheno[,1:k] %*% W[1:k,k]
 }
dimnames(pheno1) = list(dimnames(pheno1)[[1]],paste(dimnames(pheno)[[2]],"orth",sep="_"))
list(pheno = pheno1, W = W)
}

#.projOutRem<-function(Dall,P){
#   W = .projOut(Dall, P = P)
#  R = P %*% W
#  return(Dall - R)
#}



### END BIGP ###


## genoInputs is a list of connections from different datasets, obtained from mPhen.openGenoConnection.  If there is more than one entry, 
##.batch is number of entries to read i
##.baseValue is the base genotype value, usually 0 for genotypes and 2 for copy number
## the results will be merged
## returns NULL if no lines of input are left
## .thin is used to only select a subset of lines. If value is 10, then only every 10th line included
###Returns a list, which includes $genoData, which is a matrix of genotypes
.readGenoConnection<-function(genoInput,opts=mPhen.options("geno.input")){
  .batch = opts$mPhen.batch
  .baseValue = opts$mPhen.baseValue
  .format = opts$mPhen.format
  .starting = opts$mPhen.starting
  .ending = opts$mPhen.ending
  .thin = opts$mPhen.thin
  rsidToDo = opts$mPhen.rsidToDo
  if(is.null(rsidToDo)) rsidToDo = attr(genoInput,"rsid") 
  if(!is.null(genoInput$genoFiles)) genoInput = list(genoInput)
  res = list()
  for(k in 1:length(genoInput)){
    type = genoInput[[k]]$type
    if(type=="zip"){
      res[[k]] = (.readZipConnection(genoInput[[k]],opts, .batch=.batch,.baseValue=.baseValue,
                                     .format = .format,.starting = .starting,.ending = .ending,
                                     .thin = .thin,rsidToDo = rsidToDo))
    }
    else   
      if(type=="bed"){
        res[[k]] = (.readPlinkConnection(genoInput[[k]],opts, .batch=.batch,.baseValue=.baseValue,
                                       .format = .format,.starting = .starting,.ending = .ending,
                                       .thin = .thin,rsidToDo = rsidToDo))
    }
    else {
      res[[k]] = (.readVCFConnection(genoInput[[k]],opts, .batch=.batch,.baseValue=.baseValue,
                                     .format = .format,.starting = .starting,.ending = .ending,
                                     .thin = .thin,rsidToDo = rsidToDo,type = type))
    }
    if(length(res)<k) break
  }
  if(length(res)!=length(genoInput)){
    #	print(length(names(res)))
    #        print(length(names(genoInput)))
    #     warning(paste("names length mismatch", paste(names(res),collapse=".")
    #        ,paste(names(genoInput),collapse=".")))
    return(NULL)
  }
  names(res) = names(genoInput)
  if(length(res)>1){
    allGenoData = list()
    for(k in 1:length(res)){
      allGenoData[[k]] = res[[k]]$genoData
      ###if(res[[k]]$chrom!=res[[1]]$chrom) stop('chroms should match')
    }
    res[[1]]$genoData = .mergeFiles(allGenoData, markerCol=FALSE, inclrow = attr(genoInput,"inclrow"), 
                                    anyCol = !opts$mPhen.onlyCommon)
  }
  res[[1]]
}


#Closes genotype connection.  genoInput is obtained from mPhen.openVCFConnection
.closeGenoConnection<-function(genoInput){
 if(!is.null(genoInput$type))  genoInput = list(genoInput)

 for(k in length(genoInput)){
  if(genoInput[[k]]$type!="zip"){
   conn = genoInput[[k]]$con
   close(conn)
  }
 }
}

# Will merge across multiple .genoFiles (i.e. if you have two cohorts measured on overlapping markers, you wish to merge in-silico)
## Can currently read 'GT' and 'GL' fields
#Outputs the header and re-ordering to be consistent with ordering of 'indiv' (if provided')
.openGenoConnection<-function(genoFiles,opts, indiv = NULL){
  
   if(is.null(names(genoFiles))) names(genoFiles) = paste("cohort",1:length(genoFiles),sep="")
  usePosition = opts$mPhen.matchPosition

  info_ind = if(usePosition) 1 else 3
  if(!is.list(genoFiles)) genoFiles = list(genoFiles)
  if(!is.list(indiv) & !is.null(indiv)) indiv = list(indiv)
  res = list()
  snpids = NULL
  rsmatch = NULL
  sampleids = list()
  for(k in 1:length(genoFiles)){
   indivk = NULL
   if(!is.null(indiv)) indivk = indiv[[k]]
   bedf = paste(genoFiles[[k]],"bed",sep=".")
   if(length(grep('.zip$',genoFiles[[k]]))>0){
            res[[k]] = (.openZipConnection(genoFiles[[k]],indiv=indivk))
            snpids1 = (res[[k]]$snps[,res[[k]]$posi[info_ind]])
        if(k==1){
              		 snpids = snpids1
                 rsmatch = array(dim = c(length(snpids), length(genoFiles)), dimnames = list(snpids, genoFiles))

        }
                 rsmatch[,k] = match(snpids, snpids1)
   }
   else if(file.exists(bedf)) res[[k]] = (.openPlinkConnection(genoFiles[[k]],indiv=indivk))
   else res[[k]] = (.openVCFConnection(genoFiles[[k]],indiv=indivk))
   sampleids[[k]] = res[[k]]$sampleids
  }
  if(length(res)==1){
     attr(res[[1]],"sampleids")<- res[[1]]$sampleids
     attr(res[[1]],"inclrow")<- 1:length(res[[1]]$sampleids)
     return(res[[1]])
  }
  else{
    names(res) = names(genoFiles)
    onlyCommon = opts$mPhen.onlyCommon
     
      if(onlyCommon & !is.null(rsmatch)){
            print("matching rsids")
            attr(res,"rsid") = dimnames(rsmatch)[[1]][apply(rsmatch,1,.cntNa)==0]
            print(attr(res,"rsid"))
      }
      inclrow = .findInclusion(sampleids,match=FALSE)
      sampleidsall = sampleids[[1]]
      for(i in 2:length(sampleids)){
	   sampleidsall = c(sampleidsall,sampleids[[i]][inclrow[[i]]])
      }
   attr(res,"inclrow")<-inclrow
   attr(res,"sampleids") = sampleidsall
      return(res)
   }
}



### READING ZIP AND VCF FILES
.openVCFConnection<-function(.genoFile, indiv = NULL){
 sampids = c()
 type = 'txt'
 if(length(grep('vcf',.genoFile))>0) type = 'vcf'
 if(length(grep('impute',.genoFile))>0) type = 'impute'
  con<- file(.genoFile, 'r')
if(type=='impute'){
  if(is.null(indiv)) stop('must specify sample order to use impute format via the indiv variable')
  header = c("IMP-STATUS","ID","POS","REF","ALT",indiv)
}else{
  header="##"
  while(length(grep('^##', header))>0){
   header = readLines(con,n=1)[1]
 }
   header = strsplit(gsub("#","",header),'\t')[[1]]
}
   vcf  = .readHeader(header,type)
   if(!is.null(indiv) & type!='impute'){
	   index_sample_inGenoFile<-match(indiv,header)
	 #  index_sample_inGenoFile <- index_sample_inGenoFile[!is.na(index_sample_inGenoFile)]
   }else{
	   index_sample_inGenoFile<-(vcf$firstGenoIndex:length(header))
   }
   sampids = c(sampids,header[index_sample_inGenoFile])
   res =    list(con=con,genoFiles = .genoFile, header=header,index=index_sample_inGenoFile, sampleids = header[index_sample_inGenoFile],
 	posi = vcf$posi, formatIndex=vcf$formatIndex,sampleids = sampids,type = type, firstGenoIndex = vcf$firstGenoIndex)
   res
}


.openZipConnection<-function(genoFiles, indiv= NULL,zipsep="\t"){
  res = list() 
  geno_header = NULL
  SNPs_all = NULL
  marker = c()
  sampleids= NULL
  posi=NULL
  formatIndex = NULL
  header= NULL
  start=c()
  end = c()
  for(i in 1:length(genoFiles)){
   genoFile = genoFiles[[i]]
   SNPs <- read.table(unz(genoFile, "SNPS"),as.is=T,sep=zipsep,header=FALSE)
   Samples<- read.table(unz(genoFile, "Samples"),as.is=T,sep=zipsep,header=FALSE)
   Name<- read.table(unz(genoFile, "Name"),as.is=T,fill=T,sep=zipsep,header=FALSE)
   names(Samples) = grep('^$',Name[3,],value=TRUE,invert=TRUE)[1:dim(Samples)[2]]
   names(SNPs) = grep('^$',Name[2,] ,value=TRUE,invert=TRUE)[1:dim(SNPs)[2]]
   header1 = Samples[,1]
   vcf = .readHeader(Name[2,],'zip')
   SNPs[,vcf$posi[2]] = sub("chr","", SNPs[,vcf$posi[2]])
   start = c(start, SNPs[1,vcf$posi[1]])
   end = c(end, SNPs[dim(SNPs)[1],vcf$posi[1]])
    geno_header1 = grep('^$',Name[1,],invert=T,value=TRUE)
    marker = c(marker,rep(i,dim(SNPs)[[1]]))
    posi1 = vcf$posi
    formatIndex = vcf$formatIndex
    if(i==1){ 
         geno_header = geno_header1
         SNPs_all = SNPs
	 header = header1
         posi=posi1
    }
    else if(length(which(geno_header!=geno_header1))>0) stop('zip files need to match')
    else if(length(which(header!=header1))>0) stop('zip files need to match')
    else if(length(which(posi!=posi1))>0) stop('zip files need to match')
    else{
         SNPs_all = rbind(SNPs_all,SNPs)
    }
   }
   if(!is.null(indiv)){
 	   index_sample_inGenoFile<-match(indiv,header)
 	  # index_sample_inGenoFile <- index_sample_inGenoFile[!is.na(index_sample_inGenoFile)]
    }else{
 	   index_sample_inGenoFile<-(1:length(header))
    }
      ord = order(SNPs_all[,posi[1]])
      list(con = NULL, genoFiles = genoFiles, header =geno_header,snps = SNPs_all[ord,], sampleids =header[index_sample_inGenoFile],
  	posi = posi, formatIndex=formatIndex, index = index_sample_inGenoFile, type="zip", marker=marker[ord], startend =cbind(start,end))
}

#genoConnection <- setClass("genoConnection",  
#       slots = c(genoFiles="character", header="character",
#			snps="data.frame", sampleids = "character", posi="numeric",
#			formatIndex="integer", index ="integer",type="character",
#			marker="integer",startend="matrix"  ))
            


.openPlinkConnection<-function(root, indiv = NULL){
 sampids = c()
  bed.file = paste(root, '.bed', sep = '')   
  bed.file.size = file.info(bed.file)$size # the bed file size in bytes
  famtable = read.table(paste(root, '.fam', sep = ''),as.is=T) # number of individuals
  header = famtable[,1]
  sample.size = dim(famtable)[1]
  pheni = 5:(dim(famtable)[2])
  pheno = famtable[,pheni,drop=F]
  phenN = paste("Phenotype",(pheni-5),sep="")
  phenN[1] = "Sex"
  dimnames(pheno) = list(header,phenN)
  snp.size = ceiling(sample.size/4) # no. bytes storing the data for 1 SNP for all individuals; each byte stores 4 people
  n.snps = round((bed.file.size-3)/snp.size) # the total .bed file size is 3 plus size of ids x snps combo hence removing 3
  bim = read.table(paste(root,'.bim', sep = ''), header = FALSE, colClasses = c('character', 'character', 'character', 'character', 'NULL', 'NULL'))
#  snp.names = as.factor(bim[,2]) # the bim file has the snps names in the 2nd column
#  snp.names = as.vector(snp.names)
  bin.connection = file(bed.file, 'rb') # opens a connection with .bed file
  test.bytes = readBin(bin.connection, what = "raw", n = 3) # bytes 1 and 2 store infor about file type, byte 3 about array type
  if(!identical(as.character(test.bytes), c('6c', '1b', '01'))) {
    stop('BED file not a v0.99 SNP-major BED file, please re-encode the data as v0.99 SNP-major file')
  }
   if(!is.null(indiv)){
	   index_sample_inGenoFile<-match(indiv,header)
#	   #index_sample_inGenoFile <- index_sample_inGenoFile[!is.na(index_sample_inGenoFile)]
   }else{
	   index_sample_inGenoFile<-(1:length(header))
   }
   sampids = c(sampids,header[index_sample_inGenoFile])
   res =    list(con=bin.connection,genoFiles = root, header=header,index=index_sample_inGenoFile, sampleids = header[index_sample_inGenoFile],
 	posi = bim[,c(4,1,2)], formatIndex=NULL,snp.size = snp.size,n.snps = n.snps, sampleids=sampids, type="bed",pheno=as.matrix(pheno))
  res
}

.readLinesPlink<-function(bin.connection,n,snp.size,sample.size){
 genotype = array(NA, dim = c(sample.size,n))
 for(k in 1:n){
    r.bin.snp = readBin(bin.connection, what = 'raw', n = snp.size)
    bin.snp = matrix(as.numeric(rawToBits(r.bin.snp)), ncol = 2, byrow = TRUE)
    if(dim(bin.snp)[1]==0){
      genotype[genotype == -9] = NA 
      return(genotype[,1:k-1,drop=F])
    }
    
   bin.snp = bin.snp[1:sample.size,]
    genotype[,k] = bin.snp[,1] + bin.snp[,2] - 10 * ((bin.snp[,1] == 1) & (bin.snp[,2] == 0))
  }
  genotype[genotype == -9] = NA 
  genotype
}


###modified 29-11
.readPlinkConnection<-function(genoInput,opts,.batch=20,.baseValue=0,.format="GT",.starting = 0, .ending = 900000000,.thin = 1, rsidToDo = NULL){
  usePosition = opts$mPhen.matchPosition
  info_ind = if(usePosition) 1 else 3 
  subinds = genoInput$index
  sample.size = length(genoInput$sampleids)
  posi = NULL
  if(is.null(rsidToDo)){
    input = matrix(1)
    lastPos = -1
    kj =0
    while(dim(input)[2]>0 & lastPos < .starting & kj <genoInput$snp.size){
      toRead = min(.batch,genoInput$n.snps - kj)
      input<-.readLinesPlink(genoInput$con, n=toRead,snp.size = genoInput$snp.size,sample.size = sample.size)
   ##  print(dim(input))
  ##   print(max(subinds))
    
      input = input[subinds,,drop=F]
      
      if(.thin>1) input<-input[,seq(1,length(input),by=.thin),drop=F ]
      len<-dim(input)[2]     
      firstPos = genoInput$posi[kj+1,1]
      lastPos = genoInput$posi[kj+toRead,1]
      posi = genoInput$posi[(kj+1):(kj+toRead),]
      if(firstPos > .ending){
        input = NULL
        break;
      }
      kj = kj+toRead
    }
  }
  else {
    if(length(rsidToDo)==0) return(NULL)
    indsToDo = match(rsidToDo,genoInput$pos[,info_ind])
    indsToDo = sort(indsToDo[!is.na(indsToDo)])
    len<-length(indsToDo)
    posi = genoInput$posi[indsToDo,]
    input =array(NA, dim = c(sample.size,length(indsToDo)))
    for(kj in 1:(genoInput$n.snps)){
      indsI = which(indsToDo==kj)
      if(length(indsI)>0){
        input[,indsI[1]]<-.readLinesPlink(genoInput$con, n=1,snp.size = genoInput$snp.size,sample.size = sample.size)[subinds,,drop=F]
      }
      else {
        tmp<- readBin(genoInput$con, what = 'raw', n = genoInput$snp.size)
      }
    }
    firstPos = genoInput$posi[1,indsToDo[1]]
    lastPos = genoInput$posi[1,indsToDo[length(indsToDo)]]
  }
  nsamp = sample.size
  genoData = input
  dimnames(genoData)[[1]] = genoInput$sampleids 
  chrom=posi[,2]
  rsid = posi[,3]
  #pos_info = apply(posi,1,paste,collapse="\t")
  nbsnp = rep(NA,len)
  snpids=posi[,3]
  indsToKeep = 1:len
  print(dim(genoData))
  if(dim(genoData)[[2]]==0) return (NULL)
  dimnames(genoData)[[2]] = apply(posi[,2:1],1,paste,collapse="_")[1:(dim(genoData)[2])]
  
  res1 = list(len=length(indsToKeep),nbsnp = nbsnp[indsToKeep], firstPos=firstPos,lastPos=lastPos, genoData=genoData[,indsToKeep,drop=F],
              chrom=chrom, rsids=posi[,info_ind])
  res1
}



###HAS BEEN MODIFIED 29-11
.readVCFConnection<-function(genoInput,opts, .batch=20,.baseValue=0,.format="GT",.starting = 0, .ending = 900000000,.thin = 1, rsidToDo = NULL, type = 'vcf'){
  sep_ = '\t'
  if (type=='vcf') split1 =  '[\t:]' 
  else if(type=='impute'){
    split1 = ' '
    .format = "IMP"
    sep_=' '
  }
  else split1 = '\t'
  vcf = type=='vcf'
  if(is.null(rsidToDo)){
    input = c(1)
    lastPos = -1
    while(length(input)>0 & (lastPos < .starting)){
      input<-readLines(genoInput$con, n=.batch)
      len<-length(input)     
      if( len==0) break
      if( is.null(input[1])) break
      if(.thin>1) input<-input[seq(1,length(input),by=.thin) ]
      splits=strsplit(input[1],split1)[[1]]
      firstPos = as.numeric(splits[genoInput$posi[1]])
      splits=strsplit(input[len],split1)[[1]]
      lastPos = as.numeric(splits[genoInput$posi[4]])
      if(firstPos > .ending){
        input = c()
        break;
      }
    }
  }
  else {
    if(length(rsidToDo)==0) return(NULL)
    for(k in 1:length(rsidToDo)) rsidToDo[k] = paste(rsidToDo[k],sep_,sep="")
    input = rep("",length(rsidToDo))
    done = rep(FALSE,length(rsidToDo))
    while(length(which(!done))>0){
      input1<-readLines(genoInput$con, n=.batch)
      if(length(input1)==0){
        break;
      }
      notd = which(!done)
      for(k in notd){
        subl = grep(rsidToDo[k], input1,value=TRUE)
        if(length(subl)>0){
          input[k] = subl[1]
          done[k] = TRUE
        }
      }
    }
    input = input[which(done)]
    len = length(input)
    splits=strsplit(input[1],'\t')[[1]]
    firstPos = as.numeric(splits[genoInput$posi[1]])
    splits=strsplit(input[len],'\t')[[1]]
    lastPos = as.numeric(splits[genoInput$posi[4]])
    #rsidToDo = c()   #rsidToDo[!done]  
  }
  nsamp = length(genoInput$sampleids)
  if(length(input)==0) return(NULL)
  if(is.null(input[1])) return(NULL)
  chrom=splits[genoInput$posi[2]]
  rsids = rep("",len)
  nbsnp = rep(NA,len)
  snpids=rep("",len)
  posi = genoInput$posi
  formatInd = 1
  .split = .getVCFSplit(.format[1])
  .process = .getVCFProcess(.format[1])
  dn = .getVCFDimNames(genoInput$sampleids,snpids,.format[1])
  genoData = .getArray(dn[[1]])
  formatIndex = genoInput$formatIndex
  firstIndex = if(vcf) formatIndex else formatIndex +1
  if(type=='impute') firstIndex = genoInput$firstGenoIndex
  nsamp1 =  length(genoInput$header)-(firstIndex-1)
  chrpos = rep(0,len)
  subinds = genoInput$index
  dimg = dim(genoData)
  #if(length(formatIndex)==1)
  for (i in 1:len){
    line = strsplit(input[i],split1)[[1]]
    formatLen = (length(line) - (firstIndex-1))/(nsamp1)
    rsids[i] = line[posi[3]]  #paste(line[posi[1:3]],collapse='\t')
    if(opts$mPhen.matchPosition) rsids[i] = line[posi[1]]
    chrpos[i] = as.numeric(line[posi[1]])
    if(posi[2]<0) {
      fileline =  strsplit(genoInput$genoFiles,"/")[[1]]

      chrom_ = strsplit(fileline[length(fileline)],"\\.")[[1]][1]
      snpi = paste(chrom_,line[unique(posi[1])],sep='_')
    }
    else snpi = paste(line[unique(posi[c(2,1)])],collapse='_')
    snpids[i] = snpi 
    if(posi[5]>=0) nbsnp[i]  =  as.numeric(line[posi[5]])
    if(type=='impute'){
      line1 = 1000*t(apply(matrix(line[firstIndex:length(line)],ncol = 3,byrow = TRUE),c(1,2),as.numeric))
    }
    else {
      if(formatLen>1){
        .formatV = line[seq(formatIndex,formatIndex+formatLen-1)]
        formatInd = which(.formatV==.format)
        if(length(formatInd)==0){
          warning('did not find format id')
          next
        }
        subinds = (genoInput$index - firstIndex)*formatLen+firstIndex+(formatInd-1)
      }
      if(.split==""){
        line1 = as.numeric(line[subinds])
      } 
      else {
        m = as.matrix(data.frame(strsplit(line[subinds],.split)))
        line1 = apply(m,2,.process)
      }
    }
    if(length(dimg)>2) genoData[,i,] = t(line1) else genoData[,i] = line1 - .baseValue
  }
  indsToKeep = which(chrpos>=.starting & chrpos<=.ending)
  firstPos = chrpos[indsToKeep[1]]
  lastPos =chrpos[indsToKeep[length(indsToKeep)]]
  print(paste("first","last",firstPos,lastPos))
  dimnames(genoData)[[2]] = snpids
  list(len=length(indsToKeep),nbsnp = nbsnp[indsToKeep], firstPos=firstPos,lastPos=lastPos, 
       genoData=if(length(dimg)>2) genoData[,indsToKeep,,drop=F] else genoData[,indsToKeep,drop=F] 
       ,rsids=rsids[indsToKeep],chrom=chrom)
}


##reads full zip file
.readZip<-function(genoFile,zipsep="\t"){
  SNPs <- read.table(unz(genoFile, "SNPS"),as.is=T,sep=zipsep,header=FALSE)
  Samples<- read.table(unz(genoFile, "Samples"),as.is=T,sep=zipsep,header=FALSE)
  Name<- read.table(unz(genoFile, "Name"),as.is=T,fill=T,sep=zipsep,header=FALSE)
  types = grep('^$',Name[1,],invert=T,value=T)
   names(Samples) = grep('^$',Name[3,],value=TRUE,invert=TRUE)[1:dim(Samples)[2]]
   names(SNPs) = grep('^$',Name[2,] ,value=TRUE,invert=TRUE)[1:dim(SNPs)[2]]
  res = array(dim = c(dim(Samples)[[1]],dim(SNPs)[[1]],length(types) ),dimnames = list(Samples[,1],SNPs[,4],types))
  res = res[,,]
  len = length(dim(res))
  for(k in 1:(dim(SNPs)[[1]])){
      t = read.table(unz(genoFile, SNPs[k,4]),as.is=T,fill=T,sep=zipsep,header=FALSE)
      if(len==2) res[,k] = t[,]
      else res[,k,]  = t 
  }
  res
}


.getVCFSplit<-function(.format){
 .split = rep("",length(format))
 for(k in 1:length(.format)){
    if (.format[k] == "GT"){
     .split[k] =   "[\\|\\/]"
   }else if (.format[k] == "GL"){
     .split[k] =  ","
   }else if(.format[k] =="IMP"){
     .split[k] =  ","
   }else if(.format[k] =="AD"){
     .split[k] =  ","
   }else if(.format[k] =="distr"){
     .split[k] =  "[;=]"
   }
  }
  .split
}

.sum1<-function(vec) sum(as.numeric(vec))

.getVCFProcess<-function(.format){
 # .process = rep(NA, length(format))
 k = 1
# for(k in 1:length(.format)){
    if (.format[k] == "GT"){
      .process =.sum1
   }else if (.format[k] == "GL"){
      .process = .pow
   }else if(.format[k] =="IMP"){
      .process = .splitImputed
   }else if(.format[k] =="AD"){
      .process = .unchanged
   }else if(.format[k]=="distr"){
     .process = .cnvdistr
   }else{
	.process = .unchanged
   }
 #}
.process
}

 .getVCFDimNames<-function(sampleids, snpids,.formats){
   dn = vector("list",length(.formats))
   for(k in 1:length(.formats)){
     .format = .formats[k]
    if(length(which(c("GL","IMP")==.format))>0){
   ### NOTE, ASSUMES 3 GENOTYPES HERE
     dn[[k]] = list(sampleids,snpids,c(0,1,2))
   }else if("distr"==.format){
   ### NOTE, ASSUMES 5 CNV GENOTYPES HERE
     dn[[k]] = list(sampleids,snpids,c(0,1,2,3,4))
   }else if(length(which(c("AD")==.format))>0){
   ### NOTE, ASSUMES 3 GENOTYPES HERE
     dn[[k]] = list(sampleids,snpids,c(0,1))
   }else{
        dn[[k]] = list(sampleids,snpids)
   }
   }
dn
}

##modified 1/2014
.readZipConnection<-function(genoInput,opts, .batch=10,.baseValue=0,.format="geno",.starting = 0, .ending = Inf,.thin = 1, rsidToDo = NULL, zipsep="\t"){
  genoFile = genoInput$genoFiles
  posi = genoInput$posi
  if(length(genoInput$header)==1) genoInd = 1 
  else{
    genoInd = .findIn(genoInput$header,.format,critical=TRUE)
  }
  .format = genoInput$header[genoInd]
  snps = genoInput$snps
  pos =  snps[,posi[1]]
  rsids = snps[,posi[3]]
  if(is.null(rsidToDo) ){
    todo =  which(pos>=.starting & pos<=.ending)
    if(length(todo)==0){
      return(NULL)
    }
    if(.thin>1) todo<-todo[seq(1,length(input),by=.thin) ]
    todo = todo[1:min(.batch,length(todo))]
  }
  else {
    if(opts$mPhen.matchPosition) tomatch = pos else tomatch = rsids
    todo = match(rsidToDo,tomatch)
    todo = todo[!is.na(todo)]
    todo = todo[pos[todo]>=.starting & pos[todo]<=.ending]
    if(length(todo)==0){
      return(NULL)
    }
  }
  marker = genoInput$marker[todo]
  #startend = genoInput$startend
  pos = pos[todo]
  snps = snps[todo,,drop=F]
  rsids = rsids[todo]   
  len = length(todo)
  #if(len==0){
  #  next     
  #}
  firstPos = pos[1]
  lastPos =  pos[len]
  nsamp = length(genoInput$sampleids)
  input = array(NA, dim = c(len,nsamp))
  todrop = rep(FALSE, length(todo))
  for(j in 1:length(todo)){
    rsidj = rsids[j]
    if(length(grep(':',rsidj))>0){
      t1 = system(paste("unzip -p",genoFile[marker[j]],rsidj),intern=TRUE)
      t1 = t(as.matrix(data.frame(strsplit(t1,"\t"))))
    }
    else t1 = read.table(unz(genoFile[marker[j]], rsidj),as.is=T,fill=T,sep=zipsep,header=FALSE)
    if(dim(t1)[2]>0)  input[j,]<- t1[genoInput$index,min(genoInd, dim(t1)[2])]	
    else todrop[j] = TRUE
  }
  input = input[,!todrop,drop=FALSE]
  pos = pos[!todrop]
  snps = snps[!todrop,,drop=F]
  rsids = rsids[!todrop]   
  chrom=snps[1,2]
  pos_info = rep("",len)
  nbsnp = rep(NA,len)
  snpids=rep("",len)
  formatInd = 1
  .split = .getVCFSplit(.format[1])
  .process = .getVCFProcess(.format[1])
  dn = .getVCFDimNames(genoInput$sampleids,snpids,.format[1])
  formatIndex = genoInput$formatIndex
  chrpos = rep(0,len)
  #subinds = genoInput$index
  #if(length(formatIndex)==1)
  genoData = .getArray(dn[[1]])
  dimg = dim(genoData)
  for (i in 1:len){
    line = input[i,]
    line_ = snps[i,]
    formatLen = 1
    pos_info[i] = line_[[posi[3]]] #paste(line_[posi[1:3]],collapse='\t')
    if(opts$mPhen.matchPosition) pos_info[i] = line_[[posi[1]]]
    chrpos[i] = as.numeric(line_[posi[1]])
    snpids[i] = paste(line_[unique(posi[c(2,1)])],collapse='_')
    if(posi[5]>=0) nbsnp[i]  =  as.numeric(line_[posi[5]])
    if(.split==""){
      line1 = as.numeric(line)
    }
    else {
      line2 = sub(',,',',0,',sub("^,","0,",sub(",$",",0",line)))
      line1 = as.matrix(data.frame(lapply(strsplit(line2,.split),.process)))
      #	   line1 = apply(m,2,.process)
    }
    if(length(dim(genoData))>2) genoData[,i,] = t(line1) else genoData[,i] = line1 - .baseValue
  }
  print(paste("first","last",firstPos,lastPos))
  #   if(length(unique(snpids)) < length(snpids)) snpids = pos_info
  dimnames(genoData)[[2]] = snpids
  list(len=length(todo),nbsnp = nbsnp, firstPos=firstPos,lastPos=lastPos, genoData=genoData,rsids=pos_info,chrom=chrom)
}

## END ZIP FILES

##Carries out a meta-analysis on a list of pvalue tables
##pvs is a list of tables. 
##pvs can be obtained from output$pvs where output is obtained from mPhen.writeToOutputConnection
##NEED TO UPDATE THIS WITH BETAS
.metaAnalysis<-function(pvs, betas){
#   print(names(pvs))
  arr = .getArray(list(names(pvs), pvs[[1]][,3],dimnames(pvs[[1]])[[2]][9:11]))
  for(j in 1:length(pvs)){
    arr[j,,] = as.matrix(pvs[[j]][,9:11])
  }
  results = pvs[[1]]
  for(j in 1:(dim(results)[[1]])){
  results[j,9:11] = .metaresInvVarianceFixed(arr[,j,])
 }
 results
}


.updateRelatedness<-function(geno, K,
na.replace=0,standardise = TRUE){
 if(standardise){
	 geno1 = apply(geno,2,.standardise,na.replace)
 }else{
	 geno1 = apply(geno,2,.centralise,na.replace)
 }
 K = K + geno1 %*% t(geno1)
 K
 }



.proc <-
function(vec) (vec - 0.5)*vec*(1-vec)
.proc1 <-
function(vec) vec





.tabulateWithWeights <- function(phen,vars,weights,totweight){
  tab = table(list(vars,phen))
  nme = dimnames(tab)
  nme1 = as.numeric(nme[[1]])
  nme2 = as.numeric(nme[[2]]) 
  tab1 = matrix(0,nrow = length(nme1), ncol  = length(nme2))
  for(i in 1:length(nme1)) 
    for(j in 1:length(nme2)) 
      tab1[i,j] = sum(weights[vars==nme1[i] & phen==nme2[j]])
  tab2 = tab1/totweight
  tab2
}

Try the MultiPhen package in your browser

Any scripts or data that you put into this service are public.

MultiPhen documentation built on Feb. 9, 2020, 5:07 p.m.