R/gpart.R

Defines functions LDblockHeatmap GPART BigLD CLQD combineOverlapSamegene geneinfoExt namingRegion2 mergeSmallRegion splitBigLD LDblockGeneMerge mergeOverlapGene LDblockSplit appendcutByForce subBigLD constructLDblock findMaximumIndept intervalCliqueList cutsequence dataPreparation heuristicCLQ newSplitCliques CIDp_strLD haploview_blocks_classify em_phase_hethet calc_lnlike_quantile cubic_real_roots calc_lnlike genoDp2 genoCubeDp genoDp CIDp estiD matCubeX2 matCubeX pairCubeX VCFtogeno

Documented in BigLD CLQD GPART LDblockHeatmap

#' @useDynLib gpart, .registration = TRUE
#'

VCFtogeno <- function(vcf) {
  .Call(`_gpart_VCFtogeno`, vcf)
}

pairCubeX <- function(b1, b2) {
  .Call(`_gpart_pairCubeX`, b1, b2)
}

matCubeX <- function(geno) {
  .Call(`_gpart_matCubeX`, geno)
}

matCubeX2 <- function(geno1, geno2) {
  .Call(`_gpart_matCubeX2`, geno1, geno2)
}

estiD <- function(pA, pB, n11, n12, n21, n22, n1212) {
  .Call(`_gpart_estiD`, pA, pB, n11, n12, n21, n22, n1212)
}

CIDp <- function(b1, b2) {
  .Call(`_gpart_CIDp`, b1, b2)
}

genoDp <- function(geno, strLD = TRUE, lower = 0.7, upper = 0.98) {
  .Call(`_gpart_genoDp`, geno, strLD, lower, upper)
}

genoCubeDp <- function(geno) {
  .Call(`_gpart_genoCubeDp`, geno)
}

genoDp2 <- function(geno1, geno2, strLD = TRUE, lower = 0.7, upper = 0.98) {
  .Call(`_gpart_genoDp2`, geno1, geno2, strLD, lower, upper)
}

calc_lnlike <- function(known11, known12, known21, known22, center_ct_d, freq11, freq12, freq21, freq22, half_hethet_share, freq11_incr) {
  .Call(`_gpart_calc_lnlike`, known11, known12, known21, known22, center_ct_d, freq11, freq12, freq21, freq22, half_hethet_share, freq11_incr)
}

cubic_real_roots <- function(coef_a, coef_b, coef_c, solutions) {
  .Call(`_gpart_cubic_real_roots`, coef_a, coef_b, coef_c, solutions)
}

calc_lnlike_quantile <- function(known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile) {
  .Call(`_gpart_calc_lnlike_quantile`, known11, known12, known21, known22, unknown_dh, freqx1, freq1x, freq2x, freq11_expected, denom, quantile)
}

em_phase_hethet <- function(known11, known12, known21, known22, center_ct, onside_sol_ct_ptr) {
  .Call(`_gpart_em_phase_hethet`, known11, known12, known21, known22, center_ct, onside_sol_ct_ptr)
}

haploview_blocks_classify <- function(counts, lowci_max, lowci_min, recomb_highci, strong_highci, strong_lowci, strong_lowci_outer, is_x, recomb_fast_ln_thresh) {
  .Call(`_gpart_haploview_blocks_classify`, counts, lowci_max, lowci_min, recomb_highci, strong_highci, strong_lowci, strong_lowci_outer, is_x, recomb_fast_ln_thresh)
}

CIDp_strLD <- function(b1, b2, lower, upper) {
  .Call(`_gpart_CIDp_strLD`, b1, b2, lower, upper)
}


################# CLQD subFUNCTIONS ################
# subfunctions : 1.newSplitCliques 2. heuristicCLQ
#1
newSplitCliques <- function(cliques.bp, gapdist)
{
  nowlist <- lapply(cliques.bp, sort)
  fixlist <- NULL
  repeat {
    need.split <- which(lapply(nowlist, function(x) max(diff(x)) > gapdist) == TRUE)
    need.fix <- which(lapply(nowlist, function(x) max(diff(x)) > gapdist) == FALSE)
    addlist <- nowlist[need.fix]
    fixlist <- c(fixlist, addlist)
    if (length(need.split) == 0) {
      break
    }
    nowlist <- nowlist[need.split]
    nowlength <- length(nowlist)
    newlist <- as.list(rep(NA, nowlength))
    for (i in seq_len(nowlength)) {
      gap <- diff(nowlist[[i]])
      frontpart <- nowlist[[i]][seq_len(min(which(gap > gapdist)))]
      restpart <- nowlist[[i]][-(seq_len(min(which(gap > gapdist))))]
      nowlist[[i]] <- frontpart
      newlist[[i]] <- restpart
    }
    addlist <- nowlist[vapply(nowlist, function(x) length(x) > 1, TRUE)]
    fixlist <- c(fixlist, addlist)
    nowlist <- newlist[vapply(newlist, function(x) length(x) > 1, TRUE)]
  }
  return(fixlist)
}
#2
heuristicCLQ <- function(subOCM, hrstParam)
{
  degs <- apply(subOCM, 1, sum)
  top5pctDegVs <- which(degs>=quantile(degs, 0.95))
  top5pctDegVs <- top5pctDegVs[order(degs[top5pctDegVs],decreasing=TRUE)]
  heuristicBinbasket <- NULL
  total_density <- NULL
  v_dens_all <- c()
  find_opt <- FALSE
  for(v in top5pctDegVs){
    v_nbds <- as.integer(which(subOCM[v,]!=0))
    candibin <- c(v, v_nbds)
    v_density <- sum(subOCM[candibin,candibin])/(length(candibin)^2 - length(candibin))
    v_dens <- c(v_density, candibin)
    if(v_density>=0.95) {
      return(candibin)
      find_opt <- TRUE
      break
    }else{
      new_v_density <- 1
      while((v_density+0.0001)<new_v_density){
        newdeg <- apply(subOCM[v_nbds,], 1, sum)
        v_nbds <- v_nbds[-(which(newdeg==min(newdeg)))]
        candibin <- c(v, v_nbds)
        new_v_density <- sum(subOCM[candibin,candibin])/(length(candibin)^2 - length(candibin))
        if(new_v_density>=0.95){
          return(candibin)
        }else{
          v_dens_all <- c(v_dens_all, new_v_density)
          heuristicBinbasket<- c(heuristicBinbasket, list(candibin))
        }
        if(length(v_nbds) < (2*hrstParam/3)) break
      }#end while
    }#end else
  }#end for
  ## return results
  max_den_bin = heuristicBinbasket[which(v_dens_all == max(v_dens_all))]
  if(length(max_den_bin) == 1) {
    return(max_den_bin[[1]])
  }else{
    max_den_bin_leng = vapply(max_den_bin, length, 1)
    candibin = max_den_bin[[which(max_den_bin_leng == max(max_den_bin_leng))[1]]]
    return(candibin)
  }
}


################# BigLD subFUNCTIONS ##########################
# subfunctions : CLQD,
# dataPreparation , cutsequence, intervalCliqueList,
# findMaximumIndept, constructLDblock, subBigLD, appendcutByForce
dataPreparation <- function(genofile, SNPinfofile, geno, SNPinfo,
                            chrN = NULL , startbp = -Inf, endbp = Inf)
{
  #chrN : vector of chromosome
  #startbp, endbp : integer
  # filetype <- txt, the input data have header
  # if(is.null(genofile)) stop("Please input genofile (and SNPinfofile)")
  filetype <- tail(strsplit(genofile, "\\.")[[1]], 1)
  if(filetype == "txt"){
    if(is.null(SNPinfo)){
      SNPinfo <- data.table::fread(SNPinfofile, header=TRUE)
    }
    class(SNPinfo)<-"data.frame"
    if(dim(SNPinfo)[2]==4){
      SNPinfo <- SNPinfo[,c(1,3,4)] # chrN, rsID, bp
    }
    geno <- data.table::fread(genofile, header = TRUE) # rsID
    geno <- as.matrix(geno)
    # header generation
    if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    if(startbp != -Inf | endbp != Inf){
      if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
        stop("If your input data include more than one chromosome or you choose more than one chromosome,
             then do not specify the startbp and endbp!")
      SNPloca <- which(SNPinfo$bp>=startbp & SNPinfo$bp<=endbp & SNPinfo$chrN == chrN)
      SNPinfo <- SNPinfo[SNPloca,]
      geno <- geno[,SNPloca]
    }
    colnames(geno)<- SNPinfo$rsID
    if(dim(geno)[2] != dim(SNPinfo)[1]) stop("Please check the input file format!")
  }else if(filetype == "raw"){
    if(is.null(SNPinfofile) & is.null(SNPinfo)) stop("Please input map file for SNPinfofile")
    if(!is.null(SNPinfofile)){
      if((tail(strsplit(SNPinfofile, "\\.")[[1]],1)!="map")) stop("Please input map file for SNPinfofile")
      SNPinfo <- data.table::fread(SNPinfofile, header=FALSE, sep = "\t")
      class(SNPinfo)<-"data.frame"
      SNPinfo <- SNPinfo[,c(1,2,4)]
      colnames(SNPinfo) <- c("chrN", "rsID", "bp")
      geno <- data.table::fread(genofile, header = TRUE, sep = " ")
      geno <- geno[, -seq_len(6)]
      geno <- as.matrix(geno)
      if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
      if(startbp != -Inf | endbp != Inf){
        if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
          stop("If your input data include more than one chromosome or you choose more than one chromosome,
               then do not specify the startbp and endbp!")
        SNPloca <- which(SNPinfo$bp>=startbp & SNPinfo$bp<=endbp & SNPinfo$chrN == chrN)
        SNPinfo <- SNPinfo[SNPloca,]
        geno <- geno[,SNPloca]
      }
      colnames(geno)<- SNPinfo$rsID
    }else{
      stop("Please check your input files")
    }
  }else if(filetype == "traw"){
    genodata <- data.table::fread(genofile, header = TRUE, sep = "\t")
    SNPinfo <- genodata[,c(1,2,4)]
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    class(SNPinfo)<-"data.frame"
    geno <- t(genodata[,-seq_len(6)])
    geno <- as.matrix(geno)
    colnames(geno) <- SNPinfo$rsID
    if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
    if(startbp != -Inf | endbp != Inf){
      if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
        stop("If your input data include more than one chromosome or you choose more than one chromosome,
             then do not specify the startbp and endbp!")
      takenIndex <- which((SNPinfo$bp >= startbp) & (SNPinfo$bp <= endbp) & (SNPinfo$chrN == chrN))
      SNPinfo <- SNPinfo[takenIndex,]
      geno <- geno[, takenIndex]
    }
  }else if(filetype == "ped"){
    if(is.null(SNPinfofile) & is.null(SNPinfo)) stop("Please input map file for SNPinfofile")
    if(!is.null(SNPinfofile)){
      # system.time({
      if((tail(strsplit(SNPinfofile, "\\.")[[1]],1)!="map")) stop("Please input map file for SNPinfofile")
      SNPinfo <-  data.table::fread(SNPinfofile, header=FALSE, sep = "\t")
      SNPinfo <- SNPinfo[,c(1,2,4)]
      colnames(SNPinfo) <- c("chrN", "rsID", "bp")
      class(SNPinfo)<-"data.frame"
      geno <- data.table::fread(genofile, header=FALSE, sep = " ", colClasses = "character")
      geno <- geno[,-seq_len(6)]
      geno <- as.matrix(geno)
      # message(dim(geno))
      if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
      if(startbp != -Inf | endbp != Inf){
        if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
          stop("If your input data include more than one chromosome or you choose more than one chromosome,
               then do not specify the startbp and endbp!")
        SNPloca <- which(SNPinfo$bp >= startbp & SNPinfo$bp <= endbp & SNPinfo$chrN == chrN)
        SNPinfo <- SNPinfo[SNPloca,]
        genoloca1 <- min(SNPloca) * 2 - 1
        genoloca2 <- max(SNPloca) * 2
        geno <- geno[, genoloca1:genoloca2]
      }
      multiAllele<-c()
      nInd = nrow(geno)
      for(i in seq_len(ncol(geno)/2)){
        nowseq <- c(geno[, (i*2-1)], geno[, (i*2)])
        naInd = unique(c(which(geno[, (i*2-1)] == "0"), which(geno[, (i*2)] == "0")))
        uniqueAllele <- setdiff(unique(nowseq), "0")
        if(length(uniqueAllele)>2) multiAllele <- c(multiAllele, i)
        line1 <- rep(NA, nInd)
        line2 <- rep(NA, nInd)
        line1[geno[,(i*2-1)] == uniqueAllele[1]] <- 0
        line1[geno[,(i*2-1)] == uniqueAllele[2]] <- 1
        line2[geno[,(i*2)] == uniqueAllele[1]] <- 0
        line2[geno[,(i*2)] == uniqueAllele[2]] <- 1
        nline = (line1+line2)
        if(sum(nline, na.rm = TRUE) > nInd) nline = 2-nline
        geno[,(i)]<- nline
      }
      geno<-geno[,seq_len(ncol(geno)/2)]
      # message(dim(geno))
      class(geno)<-"integer"
      # remove multi-allelic
      if(length(multiAllele)>0){
        SNPinfo <- SNPinfo[-multiAllele,]
        geno <- geno[, -multiAllele]
      }
      # })
    }
  }else if(filetype == "vcf"){
    rawgeno <- data.table::fread(genofile, header=TRUE, sep = "\t",
                                 colClasses = "character", skip = "#CHROM")
    SNPinfo <- rawgeno[,c(1,3,2)]
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    class(SNPinfo)<-"data.frame"
    infocol <- max(which(colnames(rawgeno) == "INFO"),which(colnames(rawgeno) == "FORMAT"))
    rawgeno <- rawgeno[, -seq_len(infocol)]
    rawgeno <- as.matrix(rawgeno)
    geno <- VCFtogeno(rawgeno)
    if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
    if(startbp != -Inf | endbp != Inf){
      if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
        stop("If your input data include more than one chromosome or you choose more than one chromosome,
             then do not specify the startbp and endbp!")
      SNPloca <- which(SNPinfo$bp >= startbp & SNPinfo$bp <= endbp & SNPinfo$chrN == chrN)
      SNPinfo <- SNPinfo[SNPloca,]
      geno <- geno[,SNPloca]
    }
  }else {
    stop("Please check your input files.\n(supporting file format: txt, ped, map, raw, traw, vcf)")
  }
  #choose chrN
  message("data reformating done!")
  reverseind <- which(colSums(geno)>nrow(geno))
  geno[,reverseind] <- 2-geno[,reverseind]
  class(SNPinfo$bp)<-"numeric"
  class(SNPinfo$rsID) <- "character"
  #choose chrN
  index <- (SNPinfo$chrN %in% chrN)
  if(all(index == TRUE) == FALSE){
    geno <- geno[,index]
    SNPinfo <- SNPinfo[,index]
  }
  return(list(geno = geno, SNPinfo = SNPinfo))
}
cutsequence <- function(geno, SNPinfo, leng, subTaskSize, LD, clstgap, CLQcut, cutByForce)
{
  message("start to split the whole sequence into sub-task regions")
  modeNum <- 1
  lastnum <- 0
  nSNPs <- dim(SNPinfo)[1]
  geno <- as.matrix(geno)
  mincut <- max(0.5, CLQcut^2)
  # region length<=3000
  SNPbpgap <- diff(SNPinfo[,3])
  if(is.null(cutByForce)){
    precutpoints <- which(SNPbpgap>clstgap)
  }else{
    cutByForce <- vapply(cutByForce[,3], function(x) max(which(SNPinfo[,3]<=x)), 1)
    precutpoints <- sort(unique(cutByForce))
    precutpoints1 <- which(SNPbpgap>clstgap)
    precutpoints <- c(precutpoints, precutpoints1)
  }
  if (dim(geno)[2] <= subTaskSize) {
    message("there is only one sub-region!")
    return(list(dim(geno)[2], NULL))
  } else {
    calterms <- c(1, 10, leng)
    cutpoints <- NULL
    i <- leng # i :current candidate cutposition
    while (i <= (dim(geno)[2] - leng)) {
      if((i-lastnum) > 5*subTaskSize){
        modeNum <- 2
        break;
      }
      if(i %in% precutpoints){
        cutpoints <- c(cutpoints, i)
        lastnum <- i
        cat("total SNPs = ",nSNPs, " | cutpoint = ", i, "\r")
        i<-i+(leng/2)
        # message(i)
        cutnow <- FALSE
      }else{
        for(j in calterms){
          if(LD == "r2"){
            nowcm <- suppressWarnings(cor(geno[,(i-j+1):(i)], geno[,((i+1):(i+j))]
                                          ,use="pairwise.complete.obs"))
            nowr2 <- nowcm^2
            nowr2[which(nowr2 < mincut)] <- 0
          }else{
            nowr2 <- genoDp2(geno[,(i-j+1):(i), drop=FALSE], geno[,((i+1):(i+j)), drop=FALSE])
          }
          if(sum(nowr2,na.rm=TRUE)>0){
            i <- i+1
            cutnow <- FALSE
            break
          }
          if(j == leng){
            cutnow <- TRUE
            # message(i)
          }
        }
        if(cutnow == TRUE){
          cutpoints <- c(cutpoints, i)
          lastnum <- i
          cat("total SNPs = ",nSNPs, " | cutpoint = ", i, "\r")
          i<-i+(leng/2)
          cutnow <- FALSE
        }
      }
    }##end while
    if(modeNum == 1){
      cutpoints <- c(0,cutpoints, dim(geno)[2])
      # separate too big regions candi.cutpoints return(cutpoints,candi.cutpoints)
      atfcut <- NULL
      while (max(diff(cutpoints)) > subTaskSize) {
        diffseq <- diff(cutpoints)
        recutpoint <- which(diffseq > subTaskSize)
        nowmaxsize <- max(diff(cutpoints))
        tt <- cbind((cutpoints[recutpoint] + 1), cutpoints[recutpoint + 1])
        numvec <- NULL
        for (i in seq_len(dim(tt)[1])){
          st <- tt[i, 1]
          ed <- tt[i, 2]
          if (ed > (dim(geno)[2] - leng)) {
            ed <- dim(geno)[2] - leng
          }
          weakcount <- vapply(c((st + leng):(ed - leng)), function(x) {
            tick <- as.integer(leng/5)
            if(LD == "r2"){
              nowCM <- suppressWarnings(cor(geno[, (x - tick + 1):(x)], geno[, (x+ 1):(x + tick)]
                                            ,use="pairwise.complete.obs"))
              nowr2 <- nowCM^2
              length(which(nowr2>= mincut))
            }else{
              nowr2 <- genoDp2(geno[, (x - tick + 1):(x), drop=FALSE], geno[, (x+ 1):(x + tick), drop=FALSE])
              length(which(nowr2>= mincut))
            }
          }, 1)
          weakcount.s <- sort(weakcount)
          weaks <- weakcount.s[10]
          weakpoint <- which(weakcount <= weaks)
          weakpoint <- weakpoint + st + leng - 1
          nearcenter <- vapply(weakpoint, function(x) abs((ed - x) - (x - st)), 1)
          addcut <- weakpoint[which(nearcenter == min(nearcenter))][1]
          cat("total SNPs = ",nSNPs, " | additional cutpoint = ", addcut, "\r")
          numvec <- c(numvec, addcut)
          atfcut <- c(atfcut, addcut)
        }  ##end for
        cutpoints <- sort(c(cutpoints, numvec))
        newcandi <- which(diff(cutpoints) > subTaskSize)
        # remaxsize <- max(diff(cutpoints))
        # message(remaxsize) message(newcandi)
        if (length(newcandi) == 0) {
          break
        }
      }
    }
    ##end while
    if(modeNum == 2){
      message(paste("split the whole sequence into sub-task regions of length", subTaskSize))
      precutpoints <- which(SNPbpgap>clstgap)
      cutpoints <- seq(subTaskSize, dim(geno)[2], subTaskSize)
      remaincutpoints <- vapply(cutpoints, function(x) sum(abs(x-precutpoints)<(leng/2))<1, TRUE)
      atfcut <- cutpoints[remaincutpoints]
      cutpoints <- sort(c(cutpoints[remaincutpoints], precutpoints))
      if(max(atfcut) == dim(geno)[2]){
        atfcut <- atfcut[-(length(atfcut))]
      }else{
        cutpoints <- c(cutpoints, dim(geno)[2])
      }
    }
  }
  cat("\n")
  message("splitting sequence, done!")
  if(!is.null(atfcut)) atfcut <- sort(atfcut)
  cutpoints <- unique(c(cutpoints, precutpoints))
  return(list(sort(cutpoints), atfcut))
}
intervalCliqueList <- function(clstlist)
{
  clstlist <- clstlist[order(vapply(clstlist, min, 1))]
  bp.clstlist <- t(vapply(clstlist, function(x) range(x), c(1,2)))  ###
  bp.clstlist <- bp.clstlist[order(bp.clstlist[,1]),]
  IMsize <- dim(bp.clstlist)[1]  ## adjacency matrix of intervals in interval graph
  adjacencyM <- matrix(0, IMsize, IMsize)
  for (i in seq_len(IMsize-1)) {
    for (j in (i+1):IMsize) {
      if(bp.clstlist[i,2]>bp.clstlist[j,1]){
        adjacencyM[j, i]<-adjacencyM[i, j] <- 1
      }else{
        next
      }
    }
  }
  diag(adjacencyM) <- 0
  interval.graph <- igraph::graph.adjacency(adjacencyM, mode="undirected",
                                            weighted=TRUE, diag=FALSE, add.colnames=NULL)
  # message(paste("max coreness", max(coreness(interval.graph))))
  # message(paste("ecount", ecount(interval.graph), "vertex*5 ", 5*IMsize))
  if(max(igraph::coreness(interval.graph))>10){ #ecount(interval.graph)> 5*IMsize|
    interval.cliques <- igraph::maximal.cliques(interval.graph, min=1)
  }else{
    interval.cliques <- igraph::cliques(interval.graph, min=1)
  }
  interval.cliques <- interval.cliques[order(vapply(interval.cliques, min, 1))]

  intervals <- lapply(interval.cliques, function(x) unlist(clstlist[x]))
  intervals <- lapply(intervals, sort)
  intervals <- lapply(intervals, unique)
  weight.itv <- vapply(intervals, length, 1)

  intervals.range <- t(vapply(intervals, range, c(1,2)))
  unique.intervals.range <- unique(intervals.range)

  rangeinfo <- cbind(intervals.range, weight.itv)

  interval.info <- apply(unique.intervals.range, 1, function(x) {
    sameitv <- which(rangeinfo[, 1] == x[1] & rangeinfo[, 2] == x[2])
    maxweight <- max(rangeinfo[sameitv, 3])
    sameitv[which(maxweight == rangeinfo[sameitv, 3])][1]
  })
  res <- rangeinfo[interval.info,,drop=FALSE]
  return(res)
  # final.intervals <- intervals[interval.info]
  # final.intervals.w <- rangeinfo[interval.info, 3]
  # return(list(final.intervals, final.intervals.w))
}
findMaximumIndept <- function(interval.range, sample.weight)
{
  n.of.sample <- length(sample.weight)
  pre.range <- as.list(rep(NA, n.of.sample))  #pre.range : range of predecessor
  ## pre.range : n by 2, i row (x,y) : possible predecessors of i interval are from x interval to y interval
  for (i in seq_len(n.of.sample)) {
    nowstart <- interval.range[i, 1]
    if (sum(interval.range[, 2] < nowstart) > 0) {
      pre.range[[i]] <- which(interval.range[, 2] < nowstart)
    }
  }
  sources <- seq_len(n.of.sample)[(vapply(pre.range, function(x) all(is.na(x)) == TRUE, TRUE))]

  ## source of comparability graph of complement of Interval graph
  if (length(sources) < n.of.sample) {
    not.s <- setdiff(seq_len(n.of.sample), sources)
    for (i in not.s) {
      pre.pre <- sort(unique(unlist(pre.range[pre.range[[i]]])))
      pre.range[[i]] <- setdiff(pre.range[[i]], pre.pre)
    }
    names(pre.range) <- sample.weight
    n.interval <- seq_len(n.of.sample)
    route.weights <- rep(0, n.of.sample)  ##cumulative weights
    route.weights[sources] <- sample.weight[sources]
    pointers <- rep(0, n.of.sample)  ## predecessor of current interval
    pointers[sources] <- NA
    explored <- rep(0, n.of.sample)
    explored[sources] <- 1
    info <- cbind(n.interval, route.weights, pointers, explored)

    for (i in not.s) {
      maybe.pred <- pre.range[[i]]
      now.info <- info[maybe.pred, , drop=FALSE]
      max.info <- now.info[which(now.info[, 2] == max(now.info[, 2])), , drop=FALSE]
      if (dim(max.info)[1] > 1)
        max.info <- max.info[1, , drop=FALSE]
      info[i, 2] <- sample.weight[i] + max.info[2]
      info[i, 3] <- max.info[1]
      info[i, 4] <- 1
    }

    #### trace maximum independent set
    start.itv <- which(info[, 2] == max(info[, 2]))[1]
    predecessor <- info[start.itv, 3]
    indept.set <- c(predecessor, start.itv)
    while (!is.na(predecessor)) {
      predecessor <- info[predecessor, 3]
      indept.set <- c(predecessor, indept.set)
    }

    indept.set <- as.vector(indept.set)
    indept.set <- indept.set[-which(is.na(indept.set))]
    indept.set.weight <- max(info[, 2])
  } else {
    indept.set <- which(sample.weight == max(sample.weight))
    indept.set.weight <- max(sample.weight)
  }

  final.result <- list(indept.set=indept.set, indept.set.weight=indept.set.weight)
  return(final.result)
}
constructLDblock <- function(clstlist, subSNPinfo)
{
  # subfunction: intervalCliqueList, findMaximumIndept
  Totalblocks <- NULL
  while (length(clstlist) > 0) {
    if(length(clstlist)==1){
      Totalblocks <- rbind(Totalblocks, range(clstlist[[1]]))
      break
    }else{
      allsnps <- lapply(clstlist, function(x) c(min(x):max(x)))
      candi.interval <- intervalCliqueList(clstlist)
      intervals <- candi.interval[,seq_len(2),drop=FALSE]  ## list of SNPs in each cliques
      weight.itv <- candi.interval[,3]  ## weights of each cliques
      MWIS <- findMaximumIndept(intervals, weight.itv)  ##find independent set
      subLDblocks <- intervals[MWIS[[1]],, drop=FALSE]
      Totalblocks <- rbind(Totalblocks, subLDblocks)
      takenSNPs <- apply(subLDblocks, 1, function(x) c(min(x):max(x)))
      takenSNPs <- as.integer(unlist(takenSNPs))
      clstlist <- lapply(clstlist, function(x) setdiff(x, takenSNPs))
      clstlist <- clstlist[vapply(clstlist, length, 1)>0]
      if (length(clstlist) == 0) break
      # when a taken cluster located in the middle of the other cluster, we split the other cluster
      while(length(clstlist)>0){
        addinglist <- NULL
        for (n in seq_len(length(clstlist))) {
          nowbin <- clstlist[[n]]
          intersection <- intersect(c(min(nowbin):max(nowbin)), takenSNPs)
          if (length(intersection) > 0) {
            part1 <- nowbin[which(nowbin < min(intersection))]
            part2 <- setdiff(nowbin, c(min(part1):max(intersection)))
            clstlist[[n]] <- part1
            addinglist <- c(addinglist, list(part2))
          }
        }
        clstlist <- c(clstlist, addinglist)
        clstlist <- clstlist[vapply(clstlist, function(x) length(x) > 1, TRUE)]
        clstlist <- clstlist[order(vapply(clstlist, min, 1))]
        if(length(addinglist) ==0) break;
      }
    }

  }
  return(Totalblocks)
}
subBigLD <- function(subgeno, subSNPinfo,  CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
{
  subbinvec <- CLQD(geno=subgeno, SNPinfo=subSNPinfo, CLQcut=CLQcut, clstgap=clstgap,
                    hrstType=hrstType, hrstParam=hrstParam, CLQmode=CLQmode, LD=LD)
  if(all(is.na(subbinvec))){return(NULL)}
  bins <- seq_len(max(subbinvec[which(!is.na(subbinvec))]))
  clstlist <- lapply(bins, function(x) which(subbinvec == x))
  clstlist <- lapply(clstlist, sort)  ###
  clstlist <- clstlist[order(vapply(clstlist, min, 1))]  ###
  nowLDblocks <- constructLDblock(clstlist, subSNPinfo)
  # message('constructLDblock done!')
  nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop=FALSE]
  return(nowLDblocks)
}

appendcutByForce <- function(LDblocks, Ogeno, OSNPinfo, CLQcut, clstgap,
                             CLQmode, hrstParam, LD, hrstType)
{
  expandB <- NULL
  snp1 <- which(OSNPinfo[,3]<LDblocks[1,5])
  if(length(snp1)>2){
    OSNPs <- seq_len(max(snp1))
    firstB <- LDblocks[1,]
    secondSNPs <- (as.numeric(firstB[1]):as.numeric(firstB[2]))
    if(LD == "r2"){
      cor2 <- suppressWarnings(cor(Ogeno[,OSNPs,drop=FALSE], Ogeno[,secondSNPs,drop=FALSE],
                                   use="pairwise.complete.obs")^2)
    }else if(LD == "Dprime"){
      cor2 <- genoDp2(Ogeno[,OSNPs,drop=FALSE], Ogeno[,secondSNPs,drop=FALSE])
    }
    cor2num <- apply(cor2, 1, function(x) {
      sum(x>CLQcut^2, na.rm = TRUE)
    })
    cor2ratio <- cor2num/(dim(cor2)[2])
    cor2numT <- cor2ratio>0.6
    cor2numT <- c(cor2numT, 1)
    points2 <- min(which(cor2numT>0))
    NsecondSNPs <- points2:max(secondSNPs)
    reOSNPs <- setdiff(seq_len(max(NsecondSNPs)), NsecondSNPs)
    if(length(reOSNPs)>1){
      subgeno <- Ogeno[, reOSNPs]
      subSNPinfo <- OSNPinfo[reOSNPs,]
      subBlocks <- subBigLD(subgeno, subSNPinfo,  CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
      subBlocks <- subBlocks+min(reOSNPs)-1
      expandB <- rbind(expandB, subBlocks)
    }
    firstSNPs <- NsecondSNPs
  }else{
    firstB <- LDblocks[1,]
    firstSNPs <- (as.numeric(firstB[1]):as.numeric(firstB[2]))
  }
  if(dim(LDblocks)[1]>1){
    for(i in seq_len(dim(LDblocks)[1]-1)){
      # if(i==2192) stop("stop")
      secondB <- LDblocks[(i+1),]
      secondSNPs <- (as.numeric(secondB[1]):as.numeric(secondB[2]))
      OSNPs <- setdiff(max(firstSNPs):min(secondSNPs), c(max(firstSNPs), min(secondSNPs)))
      if(length(OSNPs)==0){
        expandB <- rbind(expandB, range(firstSNPs))
        firstSNPs <- secondSNPs
      }else{
        if(LD == "r2"){
          cor1 <- suppressWarnings(cor(Ogeno[,firstSNPs,drop=FALSE], Ogeno[,OSNPs,drop=FALSE],
                                       use="pairwise.complete.obs")^2)
        }else if(LD == "Dprime"){
          cor1 <- genoDp2(Ogeno[,firstSNPs,drop=FALSE], Ogeno[,OSNPs,drop=FALSE])
        }
        cor1num <- apply(cor1, 2, function(x) {
          sum(x>CLQcut^2)
        })
        cor1ratio <- cor1num/(dim(cor1)[1])
        # cor1num <- apply(cor1r2, 2, function(x) length(which(x>CLQcut^2))/length(firstSNPs))
        if(LD == "r2"){
          cor2 <- suppressWarnings(cor(Ogeno[,OSNPs,drop=FALSE], Ogeno[,secondSNPs,drop=FALSE],
                                       use="pairwise.complete.obs")^2)
        }else if (LD == "Dprime"){
          cor2 <- genoDp2(Ogeno[,OSNPs,drop=FALSE], Ogeno[,secondSNPs,drop=FALSE])
        }
        cor2num <- apply(cor2, 1, function(x) {
          sum(x>CLQcut^2)
        })
        cor2ratio <- cor2num/(dim(cor2)[2])
        cor1numT <- cor1ratio>0.6
        cor2numT <- cor2ratio>0.6
        cor1numT <- c(1, cor1numT, 0)
        cor2numT <- c(0, cor2numT, 1)
        points1 <- max(firstSNPs)+max(which(cor1numT>0))-1
        NfirstSNPs <- min(firstSNPs):points1
        points2 <- max(firstSNPs)+max(which(cor2numT>0))-1
        NsecondSNPs <- points2:max(secondSNPs)
        if(max(NfirstSNPs)<min(NsecondSNPs)){
          expandB <- rbind(expandB, range(NfirstSNPs))
          reOSNPs <- setdiff(c(min(NfirstSNPs):max(NsecondSNPs)), c(NfirstSNPs, NsecondSNPs))
          if(length(reOSNPs)>1){
            subgeno <- Ogeno[, reOSNPs]
            subSNPinfo <- OSNPinfo[reOSNPs,]
            subBlocks <- subBigLD(subgeno, subSNPinfo, CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
            subBlocks <- subBlocks+min(reOSNPs)-1
            expandB <- rbind(expandB, subBlocks)
          }
          firstSNPs <- NsecondSNPs
        }else{
          #merge two blocks
          # message(paste("i", i,"now!!!!!!"))
          subgeno <- Ogeno[, c(min(firstSNPs):max(secondSNPs))]
          subSNPinfo <- OSNPinfo[c(min(firstSNPs):max(secondSNPs)),]
          if(dim(subgeno)[2]<=3000){
            subBlocks <- subBigLD(subgeno, subSNPinfo,  CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
            subBlocks <- subBlocks+min(firstSNPs)-1
          }else{
            # Too big LD blocks
            subgeno <- Ogeno[, c((max(firstSNPs)-1500):(min(secondSNPs)+1500))]
            subSNPinfo <- Ogeno[ c((max(firstSNPs)-1500):(min(secondSNPs)+1500)),]
            subBlocks <- subBigLD(subgeno, subSNPinfo,  CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
            for(i in seq_len(dim(subBlocks)[1]-1)){
              nowB <- subBlocks[i,]
              nextB <- subBlocks[(i+1),]
              # merging steps!!
              if(nowB[2]<nextB[1]){
                next
              }else{
                subBlocks[i,]<-c(NA, NA)
                subBlocks[(i+1),]<-range(c(nowB, nextB))
              }
            }
            subBlocks<-subBlocks[!is.na(subBlocks[,1]),]
            subBlocks <- subBlocks+min(firstSNPs)-1
          }
          if(dim(subBlocks)[1]==1) {
            firstSNPs <- subBlocks[1,1]:subBlocks[1,2]
          }else{
            expandB <- rbind(expandB, subBlocks[-(dim(subBlocks)[1]),])
            firstSNPs <- subBlocks[(dim(subBlocks)[1]),1]:subBlocks[(dim(subBlocks)[1]),2]
          }
        }
        cat(paste(i," | ", dim(LDblocks)[1], "\r", sep = ""))
        # message(tail(expandB))
        # if(i >= 30) break
      }
    }
  }
  # firstSNPs
  if(max(firstSNPs)<(dim(Ogeno)[2]-1)){
    OSNPs <- (max(firstSNPs)+1):(dim(Ogeno)[2])
    if(LD == "r2"){
      cor1 <- suppressWarnings(cor(Ogeno[,firstSNPs,drop=FALSE], Ogeno[,OSNPs,drop=FALSE],
                                   use="pairwise.complete.obs")^2)
    }else if (LD == "Dprime"){
      cor1 <- genoDp2(Ogeno[,firstSNPs,drop=FALSE], Ogeno[,OSNPs,drop=FALSE])
    }
    cor1num <- apply(cor1, 2, function(x) {
      sum(x>CLQcut^2)
    })
    cor1ratio <- cor1num/(dim(cor1)[1])
    cor1numT <- cor1ratio>0.6
    cor1numT <- c(1, cor1numT, 0)
    points1 <- max(firstSNPs)+max(which(cor1numT>0))-1
    NfirstSNPs <- min(firstSNPs):points1
    expandB <- rbind(expandB, range(NfirstSNPs))
    reOSNPs <- setdiff(c(min(NfirstSNPs):dim(Ogeno)[2]), c(NfirstSNPs))
    if(length(reOSNPs)>1){
      subgeno <- Ogeno[, reOSNPs]
      subSNPinfo <- OSNPinfo[reOSNPs,]
      subBlocks <- subBigLD(subgeno, subSNPinfo,  CLQcut, clstgap, CLQmode, hrstParam, LD, hrstType)
      subBlocks <- subBlocks+min(reOSNPs)-1
      expandB <- rbind(expandB, subBlocks)
    }
  }else{
    expandB <- rbind(expandB, range(firstSNPs))
  }
  # LDblocks <- expandB
  expandB <- expandB[(expandB[,1]!=expandB[,2]),,drop=FALSE]
  start.bp <- OSNPinfo[, 3][expandB[, 1]]
  end.bp <- OSNPinfo[, 3][expandB[, 2]]
  start.rsID <- as.character(OSNPinfo[, 2][expandB[, 1]])
  end.rsID <- as.character(OSNPinfo[, 2][expandB[, 2]])
  TexpandB <- data.frame(expandB, start.rsID, end.rsID, start.bp, end.bp)
  colnames(TexpandB) <- c("start.index", "end.index", "start.rsID", "end.rsID", "start.bp", "end.bp")
  return(TexpandB)
}

#################  GPART subFUNCTIONS ########################################################################
# sub-Functions
# BigLD,CLQD, LDblockSplit, dataPreparation,
# mergeOverlapGene, LDblockGeneMerge, splitBigLD, mergeSmallRegion, namingRegion2
#
# blocks with Big-LD result
LDblockSplit <- function(geno, LDblocks, maxsize, LD){
  LDblockSizes <- apply(LDblocks, 1, diff) +1
  LargeBlocksN <- which(LDblockSizes> maxsize)
  LargeBlocks <- LDblocks[LargeBlocksN,,drop=FALSE]
  Newblocks <- NULL
  btwr2 <- NULL
  if(length(LargeBlocksN)!=0){
    while(dim(LargeBlocks)[1]>0){
      # message(dim(LargeBlocks))
      nowblocks <- LargeBlocks[1,]
      # if(nowblocks[1]>4000) break
      if(is.null(btwr2)){
        nowgeno <- geno[,nowblocks[1]:nowblocks[2]]
        if(LD == "r2"){
          nowr2 <- suppressWarnings(cor(nowgeno, use="pairwise.complete.obs")^2)
          nowr2[is.na(nowr2)] <- 0
        }else if(LD == "Dprime"){
          nowr2 <- genoDp(nowgeno, strLD = FALSE)
          nowr2[is.na(nowr2)] <- 0
          nowr2[(nowr2==Inf)|(nowr2==-Inf)] <- 0
        }
        btwr2 <- vapply(seq_len(dim(nowr2)[1]-1), function(x) mean(nowr2[seq_len(x), (x+1):(dim(nowr2)[1]-1)]), 1)
      }
      if(length(btwr2)< ceiling(maxsize*0.8*2)){
        subbtwr2 <- btwr2[(ceiling(length(btwr2)/2)-(maxsize*0.2)):(floor(length(btwr2)/2)+(maxsize*0.2))]
        addN <- which(subbtwr2 == min(subbtwr2))[1]+ (ceiling(length(btwr2)/2)-(maxsize*0.2)) - 1
        Newblocks <- rbind(Newblocks, c(nowblocks[1], (nowblocks[1]+addN-1)))
        if(Newblocks[dim(Newblocks)[1], 2]-Newblocks[dim(Newblocks)[1], 1]>=maxsize) {
          # message(Newblocks[dim(Newblocks)[1], ])
          break}
        Newblocks <- rbind(Newblocks, c(nowblocks[1]+addN, nowblocks[2]))
        if(Newblocks[dim(Newblocks)[1], 2]-Newblocks[dim(Newblocks)[1], 1]>=maxsize) {
          # message(Newblocks[dim(Newblocks)[1], ])
          break}
        LargeBlocks <- LargeBlocks[-1,, drop=FALSE]
        btwr2<- NULL
      }else{
        # message(LargeBlocks[1,]) Error in LargeBlocks[1, 1] <- (nowblocks[1] + addN)
        subbtwr2 <- btwr2[(maxsize*0.4):maxsize]
        addN <- which(subbtwr2 == min(subbtwr2))[1]+ (maxsize*0.4) - 1
        Newblocks <- rbind(Newblocks, c(nowblocks[1], (nowblocks[1]+addN-1)))
        LargeBlocks[1,1] <- (nowblocks[1]+addN)
        btwr2<-btwr2[-seq_len(addN)]
        if(diff(LargeBlocks[1,])<maxsize) {
          Newblocks <- rbind(Newblocks, LargeBlocks[1,])
          if(Newblocks[dim(Newblocks)[1], 2]-Newblocks[dim(Newblocks)[1], 1]>=maxsize) {
            # message(Newblocks[dim(Newblocks)[1], ])
            break}
          LargeBlocks <- LargeBlocks[-1,, drop=FALSE]
          btwr2<- NULL
        }
      }
      if(Newblocks[dim(Newblocks)[1], 2]-Newblocks[dim(Newblocks)[1], 1]>=maxsize) {
        # message(Newblocks[dim(Newblocks)[1], ])
        break}
    } # end while
    FinalLDblocks <- rbind(LDblocks[-LargeBlocksN,], Newblocks)
    FinalLDblocks <- FinalLDblocks[order(FinalLDblocks[,1]),]

    return(FinalLDblocks)
  }else{
    return(LDblocks)
  }
}
# merge overlapped gene regions
mergeOverlapGene <- function(Geneblocks) {
  overlaplist <- as.list(rep(NA, dim(Geneblocks)[1]))
  Geneblocks<-Geneblocks[order(Geneblocks[,1]),]
  for (i in seq_len(dim(Geneblocks)[1] - 1)) {
    overlaplist[[i]] <- i
    for (j in (i + 1):dim(Geneblocks)[1]) {
      if(Geneblocks[i,2]>=min(Geneblocks[j,1])){
        overlaplist[[i]] <- c(overlaplist[[i]], j)
      }else{
        next
      }
    }
  }
  overlaplist[[length(overlaplist)]]<- length(overlaplist)
  if(is.null(overlaplist)){
    return(Geneblocks)
  }else{
    for(i in seq_len(length(overlaplist)-1)){
      nowbin <- overlaplist[[i]]
      if(all(is.na(nowbin))) next
      for(j in (i+1):length(overlaplist)){
        if(all(overlaplist[[j]] %in% nowbin)){
          overlaplist[[j]]<-NA
        }else if(max(nowbin)<max(overlaplist[[j]])){
          break
        }
      }
    }
    overlaplist <- overlaplist[!vapply(overlaplist, function(x) all(is.na(x)), TRUE)]
    newblocks <- matrix(NA, nrow=length(overlaplist), ncol=2)
    rownames(newblocks)<-seq_len(length(overlaplist))
    for(i in seq_len(length(overlaplist))){
      if(length(overlaplist[[i]])==1){
        newblocks[i,]<-Geneblocks[overlaplist[[i]],,drop=FALSE]
        rownames(newblocks)[i]<-rownames(Geneblocks[overlaplist[[i]],,drop=FALSE])
      }else{
        newblocks[i,]<-c(min(Geneblocks[overlaplist[[i]],,drop=FALSE]),
                         max(Geneblocks[overlaplist[[i]],,drop=FALSE]))
        rownames(newblocks)[i]<-paste(rownames(Geneblocks[overlaplist[[i]],,drop=FALSE]), collapse="/")
      }

    }
    return(newblocks)
  }
}
# LDblock construction and split Large regions
LDblockGeneMerge <- function(LDblocks.T, Geneblocks.M) {
  LDblocks.T <- LDblocks.T[order(LDblocks.T[,1]),]
  Geneblocks.M <- Geneblocks.M[order(Geneblocks.M[,1]),]
  # for each gene blocks, we find the overlapping LD blocks
  LDwithGene.list <- matrix(NA, nrow=dim(Geneblocks.M)[1], ncol=2)
  rownames(LDwithGene.list) <-rownames(Geneblocks.M)
  overlapLDBlocks <- NULL
  for(i in seq_len(dim(Geneblocks.M)[1])){
    LD1 <- max(which(LDblocks.T[,1] <= Geneblocks.M[i,1]))
    LD2 <- min(which(LDblocks.T[,2] >= Geneblocks.M[i,2]))
    LDwithGene.list[i,] <- c(LD1, LD2)
    overlapLDBlocks <- c(overlapLDBlocks, LD1:LD2)
  }
  NonOverlapLDBlocks <- LDblocks.T[-overlapLDBlocks,]
  ## LDblocks which are not overlapped with gene regions

  overlapLDGene <- NULL
  NonoverlapLDgene <- NULL
  i <- 1
  while(i <= (dim(LDwithGene.list)[1])){
    nowGeneR <- LDwithGene.list[i,, drop=FALSE]
    if((LDwithGene.list[i,2] < LDwithGene.list[i+1,1])){
      NonoverlapLDgene <- rbind(NonoverlapLDgene, nowGeneR)
      # message(i);message(nowGeneR)
      i <- i+1
    }else{
      merge.candi <- nowGeneR
      for(j in (i+1):dim(LDwithGene.list)[1]){
        if(max(LDwithGene.list[i:(j-1),])>=LDwithGene.list[(j),1]){
          merge.candi<- rbind(merge.candi, LDwithGene.list[(j),,drop=FALSE])
          if(j == dim(LDwithGene.list)[1]){
            i <- j+1
          }
        }else{
          i <- j
          break
        }
      }
      overlapLDGene <- c(overlapLDGene, list(merge.candi))
    }
    if(i == dim(LDwithGene.list)[1]){
      nowGeneR <- LDwithGene.list[i,, drop=FALSE]
      NonoverlapLDgene <- rbind(NonoverlapLDgene, nowGeneR)
      # message(i);message(nowGeneR)
      break
    }
  }
  mergeLDgene <- NULL
  if(length(overlapLDGene)>0){
    mergeLDgene <- matrix(NA, length(overlapLDGene), 2)
    rownames(mergeLDgene) <- seq_len(length(overlapLDGene))
    for(i in seq_len(length(overlapLDGene))){
      nowmerge <- overlapLDGene[[i]]
      rownames(mergeLDgene)[i] <- paste(rownames(nowmerge), collapse="+")
      mergeLDgene[i,1] <- min(nowmerge)
      mergeLDgene[i,2] <- max(nowmerge)
    }
  }

  finalGeneBlocks <- rbind(mergeLDgene, NonoverlapLDgene)
  finalGeneBlocks <- finalGeneBlocks[order(finalGeneBlocks[,1]),]

  for(i in seq_len(dim(finalGeneBlocks)[1])){
    st <- LDblocks.T[finalGeneBlocks[i,1],1]
    ed <- LDblocks.T[finalGeneBlocks[i,2],2]
    finalGeneBlocks[i,] <-c(st, ed)
  }

  rownames(NonOverlapLDBlocks) <-rep("No", dim(NonOverlapLDBlocks)[1])
  res <- rbind(finalGeneBlocks, NonOverlapLDBlocks)
  res <- res[order(res[,1]),]
  return(res)
}
# split big Gene region
splitBigLD <- function(GeneLDblocks, LDblocks.T, Geneblocks,maxsize){
  BigN <- which(GeneLDblocks[,3]>maxsize)
  BigGeneblocks <- GeneLDblocks[BigN,,drop=FALSE]
  GeneLDblocks <- GeneLDblocks[-BigN,,drop=FALSE]
  newblocks <- NULL
  for (i in seq_len(dim(BigGeneblocks)[1])){
    nowBlock <- BigGeneblocks[i,,drop=FALSE]
    # nowSNPs<- nowBlock[1,1]:nowBlock[1,2]
    intersectLD <- LDblocks.T[max(which(nowBlock[1,1]>=LDblocks.T[,1])):min(which(nowBlock[1,2]<=LDblocks.T[,2])),]
    intersectLD[1,1] <- nowBlock[1,1]
    intersectLD[dim(intersectLD)[1],2] <- nowBlock[1,2]
    NintersectLD <- NULL
    while(dim(intersectLD)[1]>0){
      st <- intersectLD[1,1]
      edposi <- max(which(intersectLD[,2]<(st+maxsize)))
      ed <- intersectLD[edposi,2]
      NintersectLD <- rbind(NintersectLD, c(st, ed))
      intersectLD <- intersectLD[-seq_len(edposi),,drop=FALSE]
    }
    intersectLD <- NintersectLD
    # if(all(apply(NintersectLD, 1, diff)<50)==FALSE) message(i)
    rownames(intersectLD) <- rep(rownames(nowBlock), dim(intersectLD)[1])
    blockL <- apply(intersectLD, 1, diff)+1
    Addblocks <- cbind(intersectLD, blockL)
    newblocks <- rbind(newblocks, Addblocks)
  }
  GeneLDblocks <- rbind(GeneLDblocks, newblocks)
  GeneLDblocks <- GeneLDblocks[order(GeneLDblocks[,1]),]
  return(GeneLDblocks)
}
# small region merging
mergeSmallRegion <- function(GeneLDblocks, maxsize, minsize) {
  gname <- rownames(GeneLDblocks)
  GeneLDblocks <- as.data.frame(GeneLDblocks, stringsAsFactors=FALSE)
  # row.names(GeneLDblocks)<- NULL
  # GeneLDblocks<- cbind(GeneLDblocks, gname, stringsAsFactors<- F)
  smallbin <- NULL
  completeBin <- NULL
  while (dim(GeneLDblocks)[1] > 0) {
    # message(dim(GeneLDblocks))
    nowbin <- GeneLDblocks[1, ,drop=FALSE]
    # if(nowbin[1,1]>75530) break
    if (is.null(smallbin) & nowbin[1,3] >= minsize) {
      completeBin <- rbind(completeBin, nowbin)
      GeneLDblocks <- GeneLDblocks[-1, ,drop=FALSE]
    } else if (is.null(smallbin) & nowbin[1,3] < minsize) {
      smallbin <- nowbin
      GeneLDblocks<- GeneLDblocks[-1, ,drop=FALSE]
    } else if (smallbin[1, 3] + nowbin[1,3] < minsize) {
      st <- min(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2])
      ed <- max(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2])
      blockL <- smallbin[1, 3] + nowbin[1,3]
      smallbin <- data.frame(st, ed, blockL)
      GeneLDblocks <- GeneLDblocks[-1, ,drop=FALSE]
    } else if (smallbin[1, 3] + nowbin[1,3] >= minsize & smallbin[1, 3] + nowbin[1,3] <= maxsize) {
      st <- min(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2])
      ed <- max(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2])
      blockL <- smallbin[1, 3] + nowbin[1,3]
      completeBin <- rbind(completeBin, data.frame(st, ed, blockL))
      smallbin <- NULL
      GeneLDblocks <- GeneLDblocks[-1,,drop=FALSE]
    } else if (smallbin[1, 3] + nowbin[1,3] > maxsize) {
      lastbin <- completeBin[dim(completeBin)[1], ]
      if(is.null(lastbin)){
        message(c("Large-small-Large"))
        message(rbind(lastbin, smallbin, nowbin))
        completeBin <- rbind(completeBin, smallbin, nowbin)
        smallbin <- NULL
      }else if (lastbin[1,3] + smallbin[1,3] <= maxsize) {
        st <- min(smallbin[1, 1], smallbin[1, 2], lastbin[1, 1], lastbin[1, 2])
        ed <- max(smallbin[1, 1], smallbin[1, 2], lastbin[1, 1], lastbin[1, 2])
        blockL <- smallbin[1, 3] + lastbin[1, 3]
        completeBin <- completeBin[-dim(completeBin)[1], ,drop=FALSE]
        completeBin <- rbind(completeBin, data.frame(st, ed, blockL))
        smallbin <- NULL
      } else {
        message(c("Large-small-Large"))
        message(rbind(lastbin, smallbin, nowbin))
        completeBin <- rbind(completeBin, smallbin, nowbin)
        smallbin <- NULL
        GeneLDblocks <- GeneLDblocks[-1, ,drop=FALSE]
      }
    }
  }
  if(!is.null(smallbin) & is.null(completeBin)){
    completeBin < -smallbin
  }
  if(!is.null(smallbin) & !is.null(completeBin)){
    lastindex <- dim(completeBin)[1]
    if(completeBin[lastindex,3,drop=FALSE]+smallbin[1,3] > maxsize){
      completeBin <- rbind(completeBin, smallbin)
    }else{
      nowbin <- c(completeBin[lastindex, 1], smallbin[1,2], completeBin[lastindex, 3] + smallbin[1,3])
      completeBin[lastindex,seq_len(3)] <- nowbin
    }
  }
  return(completeBin)
}
# naming each region
namingRegion2 <- function(GeneLDblocks, genelist, chrSNPinfo) {
  st.bp <- chrSNPinfo[GeneLDblocks[,1],3]
  ed.bp <- chrSNPinfo[GeneLDblocks[,2],3]
  bpBlocks <- cbind(GeneLDblocks[,seq_len(3), drop=FALSE], st.bp, ed.bp)
  FinalGeneLDblocks <- NULL
  PartN <- 1
  Pgene <- NULL
  gene <- NULL
  Ngene <- NULL
  gname <- rep(NA, dim(bpBlocks)[1])
  FinalGeneLDblocks<- data.frame(bpBlocks, gname)

  for (i in seq_len(dim(bpBlocks)[1])){
    # if (i%%10 == 0)
    # message(i)
    nowloca <- bpBlocks[i,4:5,drop=FALSE]
    intro.index <- (which(genelist[,4]<nowloca[1,1]))
    intro.index1 <- setdiff(seq_len(dim(genelist)[1]), intro.index)
    outro.index <- (which(genelist[,3]>nowloca[1,2]))
    outro.index1 <- setdiff(seq_len(dim(genelist)[1]), outro.index)
    common.index <- intersect(intro.index1, outro.index1)

    if(length(common.index)==0){
      if(length(intro.index)==0){
        #before gene
        gname <- paste("before-", as.character(genelist[1, 1]), sep="")
      }else if(length(outro.index)==0){
        #after gene
        gname <- paste("after-", as.character(genelist[dim(genelist)[1], 1]), sep="")
      }else{
        intronum <- which(genelist[,4]==max(genelist[intro.index,4]))
        outronum <- which(genelist[,3]==min(genelist[outro.index,3]))
        gname <- paste("inter-", paste(as.character(genelist[intronum, 1]),collapse = "/"),":",paste(as.character(genelist[outronum, 1]), collapse = "/"), sep="")
      }
    }else{
      subgenelist <- genelist[common.index,,drop=FALSE]
      gname <- as.character(subgenelist[1,1])
      nowregion <- c(subgenelist[1,3], subgenelist[1,4])
      if(length(common.index)>1){
        for(k in 2:dim(subgenelist)[1]){
          nextregion <- subgenelist[k,3:4]
          if(max(nowregion)<min(nextregion)){
            gname <- paste(c(gname, as.character(subgenelist[k,1])), collapse="+")
            nowregion <- subgenelist[k,3:4]
          }else{
            gname <- paste(c(gname, as.character(subgenelist[k,1])), collapse="/")
            candi.r <- rbind(nowregion, nextregion)
            nowregion <- c(min(candi.r)[1], max(candi.r)[1])
          }
        }
      }
    }
    FinalGeneLDblocks$gname[i]<-gname
  }
  # part numbering
  FinalGeneLDblocks <- data.frame(FinalGeneLDblocks, 0)
  FinalGeneLDblocks[1, 7] <- 1
  for (i in 2:dim(FinalGeneLDblocks)[1]) {
    ifelse(FinalGeneLDblocks[i - 1, 6] == FinalGeneLDblocks[i, 6],
           FinalGeneLDblocks[i, 7] <- FinalGeneLDblocks[(i - 1), 7] + 1, FinalGeneLDblocks[i, 7] <- 1)
    # if(i%%10==0) message(i)
  }

  Finalnames <- apply(FinalGeneLDblocks, 1, function(x) {
    # paste(as.character(x[6]), '-part', as.character(x[7]), sep='')
    paste(c(x[6], "-part", as.numeric(x[7])), collapse="")
  })
  FinalGeneLDblocks <- cbind(FinalGeneLDblocks[, seq_len(5)], Finalnames)
  return(FinalGeneLDblocks)
}

geneinfoExt <- function(geneinfofile=geneinfofile, geneDB = geneDB, assembly = assembly, geneid = geneid,
                         ensbversion = ensbversion, chrNs = chrNs){
  if(!is.null(geneinfofile)){
    message("load gene information from inputed file")
    geneinfo <- data.table::fread(geneinfofile, stringsAsFactors = FALSE)
    geneinfo <- data.frame(geneinfo)
    colnames(geneinfo) <- c("genename", "chr", "start.bp", "end.bp")
  }else if(geneDB == "ensembl"){
    if(assembly=="GRCh38"){
      message("load gene information from ensembl (assembly GRCh38)")
      grch <- 38
      ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", version=ensbversion)#, version=version
      if(geneid!="ensembl_gene_id"){
        geneinfo <- biomaRt::getBM(attributes = c(geneid, 'ensembl_gene_id', 'chromosome_name','start_position', 'end_position'), #external_gene_name
                                   filters = 'chromosome_name',
                                   value = chrNs,
                                   mart = ensembl)
        geneinfo[(geneinfo[,1]==""),1]<-geneinfo[(geneinfo[,1]==""),2]
        geneinfo<-geneinfo[,c(1,3,4,5)]
      }else{
        geneinfo <- biomaRt::getBM(attributes = c(geneid, 'chromosome_name','start_position', 'end_position'), #external_gene_name
                                   filters = 'chromosome_name',
                                   value = chrNs,
                                   mart = ensembl)
      }
      # geneinfo <- geneinfo[geneinfo[,1]!="",]
      colnames(geneinfo) <- c("genename", "chr", "start.bp", "end.bp")
    }else if(assembly=="GRCh37"){
      message("load gene information from ensembl (assembly GRCh37)")
      ensembl <- biomaRt::useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", GRCh = 37)#, version=version

      if(geneid!="ensembl_gene_id"){
        geneinfo <- biomaRt::getBM(attributes = c(geneid, 'ensembl_gene_id', 'chromosome_name','start_position', 'end_position'), #external_gene_name
                                   filters = 'chromosome_name',
                                   value = chrNs,
                                   mart = ensembl)
        geneinfo[(geneinfo[,1]==""),1]<-geneinfo[(geneinfo[,1]==""),2]
        geneinfo<-geneinfo[,c(1,3,4,5)]
      }else{
        geneinfo <- biomaRt::getBM(attributes = c(geneid, 'chromosome_name','start_position', 'end_position'), #external_gene_name
                                   filters = 'chromosome_name',
                                   value = chrNs,
                                   mart = ensembl)
      }
      # geneinfo <- geneinfo[geneinfo[,1]!="",]
      colnames(geneinfo) <- c("genename", "chr", "start.bp", "end.bp")
    }else{
      stop("wrong assembly")
    }
  }else if(geneDB == "ucsc"){
    if(assembly == "GRCh37"){
      message("load gene information from UCSC genome browser (assembly GRCh37)")
      Homo.sapiens <- Homo.sapiens::Homo.sapiens
      cls <- AnnotationDbi::columns(Homo.sapiens::Homo.sapiens)
      cls <- cls[match(c("TXCHROM","TXEND","TXSTART"), cls)]
      kts <- AnnotationDbi::keytypes(Homo.sapiens)
      if(geneid == 'hgnc_symbol'){
        gene_id <- "SYMBOL"
      }else{
        gene_id <- geneid
      }
      kt <- kts[match(c(gene_id), kts)]
      ks <- AnnotationDbi::keys(Homo.sapiens, keytype=kt)
      res <- AnnotationDbi::select(Homo.sapiens, keys=ks, columns=cls, keytype=kt)
      geneinfo = res[res$TXCHROM %in% paste("chr", chrNs, sep = ""),]
      geneinfo$TXCHROM <- gsub(pattern = "chr", replacement = "", geneinfo$TXCHROM)
      class(geneinfo$TXCHROM)<-"integer"
      colnames(geneinfo) <- c("genename", "chr", "start.bp", "end.bp")

    } else if(assembly == "GRCh38"){
      message("load gene information from UCSC genome browser (assembly GRCh38)")
      Homo.sapiens <- Homo.sapiens::Homo.sapiens
      OrganismDbi::TxDb(Homo.sapiens) <- TxDb.Hsapiens.UCSC.hg38.knownGene::TxDb.Hsapiens.UCSC.hg38.knownGene
      cls <- AnnotationDbi::columns(Homo.sapiens)
      cls <- cls[match(c("TXCHROM","TXEND","TXSTART"), cls)]
      kts <- AnnotationDbi::keytypes(Homo.sapiens)
      if(geneid == 'hgnc_symbol'){
        gene_id <- "SYMBOL"
      }else{
        gene_id <- geneid
      }
      kt <- kts[match(c(gene_id), kts)]
      ks <- AnnotationDbi::keys(Homo.sapiens, keytype=kt)
      res <- AnnotationDbi::select(Homo.sapiens, keys=ks, columns=cls, keytype=kt)
      geneinfo = res[res$TXCHROM %in% paste("chr", chrNs, sep = ""),]
      geneinfo$TXCHROM <- gsub(pattern = "chr", replacement = "", geneinfo$TXCHROM)
      class(geneinfo$TXCHROM)<-"integer"
      colnames(geneinfo) <- c("genename", "chr", "start.bp", "end.bp")
    }else{
      stop("wrong assembly")
    }
  }else{
    stop("wrong DB name!")
  }
  return(geneinfo)
}


#gene overlap merging : combine gene region if there are genes with same name
combineOverlapSamegene = function(genelist){
  combgene <- names(table(genelist[,1])[table(genelist[,1])>1])
  for(cgene in combgene){
    subgeneinfo <- genelist[genelist[,1]==cgene,]
    subgeneinfo <- subgeneinfo[order(subgeneinfo[,3]),]
    genelist <- genelist[-which(genelist[,1]==cgene),]
    addgene <- subgeneinfo[1,,drop=FALSE]
    for(i in 2:dim(subgeneinfo)[1]){
      if(addgene[nrow(addgene),4]<subgeneinfo[i,3]){
        addgene <- rbind(addgene, subgeneinfo[i,])
      }else{
        addgene[nrow(addgene),4] <- max(subgeneinfo[i,4])
      }
    }
    genelist <- rbind(genelist, addgene)
  }
  genelist <- genelist[order(genelist[,3]),,drop=FALSE]
  return(genelist)
}

# MAIN FUNCTIONS ------------------------------------------
# CLQD < input >
#' @title partitioning into cliques
#' @name CLQD
#' @aliases CLQD
#' @description \code{CLQD} partitioning the given data into subgroups that contain SNPs which are highly correlated.
#' @usage CLQD(geno, SNPinfo, CLQcut=0.5, clstgap=40000,
#' hrstType=c("near-nonhrst", "fast", "nonhrst"), hrstParam=200,
#' CLQmode=c("density", "maximal"), LD=c("r2", "Dprime"))
#' @param geno  Data frame or matrix of additive genotype data, each column is additive genotype of each SNP. (Use data of non-monomorphic SNPs)
#' @param SNPinfo  Data frame or matrix of SNPs information. 1st column is rsID and 2nd column is bp position.
#' @param CLQcut Numeric constant; a threshold for the LD measure value |r|, between 0 to 1. Default 0.5.
#' @param clstgap Numeric constant; a threshold of physical distance (bp) between two consecutive SNPs
#' which do not belong to the same clique, i.e., if a physical distance between two consecutive SNPs in a clique
#' greater than \code{clstgap}, then the algorithm split the cliques satisfying each
#' clique do not contain such consecutive SNPs. Default 40000.
#' @param hrstType  Character constant; heuristic methods.
#' If you want to do not use heuristic algorithm, set \code{hrstType = "nonhrst"}.
#' If you want to use heuristic algorithm suggested in Kim et al.,(2017), set \code{hrstType = "fast"}.
#' That algorithm is fastest heuristic algorithm and suitable when your memory capacity is not greater than 8GB.
#' If you want to obtain the results similar to the that of non-heuristic algorithm, set \code{hrstType = "near-nonhrst"}.
#' @param hrstParam Numeric constant; parameter for heuristic algorithm "near-nonhrst".
#'  Default is \code{200}. It is recommended that you set the parameter to greater than 150.
#' @param CLQmode Character constant; the way to give priority among detected cliques.
#' if \code{CLQmode = "density"} then the algorithm gives priority to the clique of largest value of \eqn{(Number of SNPs)/(range of clique)},
#' else if \code{CLQmode = "maximal"}, then the algorithm gives priority to the largest clique. The default is "density".
#' @param LD Character constant; LD measure to use, "r2" or "Dprime". Default "r2".
# <output>
#' @return A vector of cluster numbers of all  SNPs (\code{NA} represents singleton cluster).
#'
#' @seealso \code{\link{BigLD}}
#' @examples
#'
#' data(geno)
#' data(SNPinfo)
#' CLQD(geno=geno[,1:100],SNPinfo=SNPinfo[1:100,])
#' \dontrun{
#' CLQD(geno=geno[,1:100],SNPinfo=SNPinfo[1:100,], CLQmode = 'maximal')
#' CLQD(geno=geno[,1:100],SNPinfo=SNPinfo[1:100,], LD='Dprime')
#'}
#' @author Sun-Ah Kim <sunny03@snu.ac.kr>, Yun Joo Yoo <yyoo@snu.ac.kr>
#' @importFrom stats cor median quantile
#' @importFrom utils tail
#' @importFrom Rcpp sourceCpp
#' @export
CLQD <- function(geno, SNPinfo, CLQcut=0.5, clstgap=40000, hrstType=c("near-nonhrst", "fast", "nonhrst"), hrstParam=200,
                 CLQmode=c("density", "maximal"), LD=c("r2", "Dprime"))
{
  ########################################################################################################
  skipRatio <- 0
  CLQmode <- match.arg(CLQmode)
  LD <- match.arg(LD)
  hrstType <- match.arg(hrstType)
  geno <- as.matrix(geno)
  # filtering all NA SNPs
  allNASNPs = apply(geno, 2, function(x) all(is.na(x)))
  geno<-geno[, !allNASNPs]
  SNPinfo<-SNPinfo[!allNASNPs, ]
  # Main Function
  SNPbps <- SNPinfo[, 3]
  if(LD == "r2"){
    OCM <- suppressWarnings(cor(geno, use="pairwise.complete.obs"))
    diag(OCM) <- 0
    OCM[is.na(OCM)]<-0
    OCM[abs(OCM) < CLQcut] <- 0
    OCM[abs(OCM) >= CLQcut] <- 1
  }else if(LD == "Dprime"){
    OCM <- genoDp(geno)
    OCM[(OCM==Inf)|(OCM==-Inf)] <- 0
  }
  binvector <- rep(NA, dim(OCM)[2])
  binnum <- 1
  # re.SNPbps <- SNPbps
  if(all(OCM==0)) return(binvector)

  # test graph complexity
  OCM1 <- OCM
  # bothhigh <- (degs>=400 & cores>=400)
  if(hrstType == "nonhrst"){
    heuristic <- FALSE
  }else if(hrstType == "near-nonhrst"){
    heuristic <- TRUE
    heuristicNum <- 0
    while(heuristic == TRUE){
      g <- igraph::graph_from_adjacency_matrix(OCM1, mode="undirected", weighted=TRUE, diag=FALSE, add.colnames=NA)
      cores <- igraph::coreness(g)
      highcore <- sum(cores>=hrstParam)
      local_cores <- table(cores[cores>=(hrstParam)])
      local_cores <- local_cores[local_cores>=(hrstParam)]
      if(length(local_cores>0)){
        if(heuristicNum==0){
          message("Use near-CLQ heuristic procedure!!")
          heuristicNum <- heuristicNum+1
          message(paste("hueristic loop", heuristicNum))
        }else{
          heuristicNum <- heuristicNum+1
          message(paste("hueristic loop", heuristicNum))
        }
        # find dense region
        # local_cores <- table(cores[cores>=(hrstParam)])
        # local_cores <- local_cores[local_cores>=(hrstParam)]
        local_hrstParam <- names(local_cores[which(as.integer(names(local_cores))==max(as.integer(names(local_cores))))])
        local_hrstParam <- as.numeric(local_hrstParam)
        bothhighSNPs <- which(cores == local_hrstParam)
        SNPset1 <- which(is.na(binvector))[bothhighSNPs]
        nowOCM <- OCM[SNPset1, SNPset1]
        heuristicBins <- heuristicCLQ(nowOCM, hrstParam)
        binvector[SNPset1[heuristicBins]]<-binnum
        binnum <- binnum+1
        OCM1 <- OCM[is.na(binvector), is.na(binvector)]
      }else{
        # if(heuristicNum ==0){
        #   # message("We do not need heuristic procedure")
        # }else{
        #   message("End new heuristic procedure")
        # }
        heuristic <- FALSE
      }
    } #end while
  }else if(hrstType == "fast"){
    checkLargest <- TRUE
    r2Mat <- OCM
    re.SNPbps <- SNPinfo[,3]
    # firstTerm = T
    while(checkLargest == TRUE){
      g <- igraph::graph_from_adjacency_matrix(r2Mat, mode = "undirected", weighted = TRUE, diag = FALSE, add.colnames = NA)
      compo = igraph::components(g)
      componum = which(compo$csize==max(compo$csize))[1]
      compov = which(compo$membership==componum)
      compadjM = OCM1[compov, compov]
      cg = igraph::graph_from_adjacency_matrix(compadjM, mode = "undirected", weighted = TRUE, diag = FALSE, add.colnames = NA)
      if((median(igraph::coreness(cg))>80 & max(igraph::coreness(cg))>100)| (quantile(igraph::coreness(cg), 0.75)>100 & max(igraph::coreness(cg))>100)){
        message("use fast heuristic procedure!")
        # if(quantile(degrees, 0.7) == 1) break
        # message(head((quantile(degrees, 0.7))))
        degrees = apply(r2Mat, 1, sum)
        maxdegv = which(degrees >=max(quantile(degrees, 0.7), 80))
        # if(length(maxdegv)>=1){
        maxdegvs = maxdegv
        edgeDens = NULL
        for(maxdegv in maxdegvs){
          Bignbds = which(r2Mat[maxdegv,, drop = FALSE]>0, arr.ind = TRUE)
          Bignbds.c = unique(Bignbds[,2])
          newr2Mat = r2Mat[Bignbds.c,Bignbds.c]
          EdgeDen = sum(newr2Mat)/((dim(newr2Mat)[1])*(dim(newr2Mat)[1]-1))
          edgeDens = c(edgeDens, EdgeDen)
        }
        maxdegvs = maxdegvs[order(edgeDens, decreasing = TRUE)]
        edgeDens = edgeDens[order(edgeDens, decreasing = TRUE)]
        degv = maxdegvs[1]
        edgeD = edgeDens[1]
        Bignbds = which(r2Mat[degv,, drop = FALSE]>0, arr.ind = TRUE)
        Bignbds.c = unique(Bignbds[,2])
        # maxiC = maximal.cliques(g, min = dim(OCM1)[1]*0.9)
        # largestOneRange = range(Bignbds.c)
        # largestSNPn = diff(largestOneRange)
        # largestCsize = length(Bignbds.c)
        nowSNPsbp = re.SNPbps[Bignbds.c]
        nowSNPsbploca = match(nowSNPsbp, SNPbps)
        binvector[nowSNPsbploca] <- binnum
        binnum = binnum + 1
        r2Mat <- r2Mat[-Bignbds.c, -Bignbds.c, drop = FALSE]
        OCM1 <- OCM1[-Bignbds.c, -Bignbds.c, drop = FALSE]
        re.SNPbps <- re.SNPbps[-Bignbds.c]
        # message("case2")
        checkLargest = TRUE
        if(length(re.SNPbps)<500)  checkLargest = FALSE
      }else{
        checkLargest = FALSE
      }

    }
  }
  # binvector splitting by clstgap
  if(all(is.na(binvector))==FALSE){
    binveclist <- lapply(seq_len(max(binvector, na.rm = TRUE)),
                         function(x) SNPinfo[,3][which(binvector == x)])
    binvecbplist <- newSplitCliques(binveclist, clstgap)
    binvecbpindex <- lapply(binvecbplist, function(x) match(x, SNPinfo[,3]))
    binvecbpindex <- binvecbpindex[vapply(binvecbpindex, length, c(1))>1]
    binvector <- rep(NA, length(binvector))
    for(i in seq_len(length(binvecbpindex))){
      binvector[binvecbpindex[[i]]]<-i
    }
  }
  # take Toooo Big block First!
  r2Mat <- OCM[is.na(binvector), is.na(binvector)]
  if(sum(is.na(binvector))<=1 | (sum(r2Mat)==0)) return(binvector)
  g <- igraph::graph_from_adjacency_matrix(r2Mat, mode="undirected", weighted=TRUE, diag=FALSE, add.colnames=NA)
  max.cliques <- igraph::max_cliques(g, min=2)
  if(length(max.cliques)==0) stop("max.cliques is empty")
  re.SNPbps <- SNPbps[is.na(binvector)]
  bp.cliques <- lapply(max.cliques, function(x) re.SNPbps[x])
  split.bp.cliques <- newSplitCliques(bp.cliques, clstgap)
  # reduce the number of maximal clique? (candidate) or
  # modify density function. narrow SNP distance, so small cliques are chosen preferencely.
  repeat {
    # message(binnum)
    if (all(is.na(binvector) == FALSE)) {
      break
    }
    if(length(split.bp.cliques)==0) break
    now.split.bp.cliques <- split.bp.cliques
    if(CLQmode =="density"){
      density.v <- vapply(now.split.bp.cliques, function(x) ((length(x)))/(diff(range(x))/1000), 1)
    }else{
      density.v <- vapply(now.split.bp.cliques, length, 1)
    }

    max.d <- which(density.v == max(density.v))
    max.cluster <- now.split.bp.cliques[max.d]
    if (length(max.cluster) > 1) {
      # if there are two bins of same density, then we choose the bigger one.
      max.cluster <- max.cluster[order(vapply(max.cluster, length, 1), decreasing=TRUE)]
    }
    max.cluster <- max.cluster[[1]]
    max.cluster.od <- match(max.cluster, re.SNPbps)
    # if (codechange == TRUE) {
    #   max.cluster.od <- ChooseMaximal(max.cluster.od, CLQcut, OCM)
    #   max.cluster <- re.SNPbps[max.cluster.od]
    # }
    ## excluding all SNPs in max.cluster from re.SNPbps
    split.bp.cliques <- lapply(split.bp.cliques, function(x) setdiff(x, max.cluster))
    split.bp.cliques <- unique(split.bp.cliques)
    split.bp.cliques <- split.bp.cliques[which(vapply(split.bp.cliques, length, 1) > 1)]
    binvector[match(max.cluster, SNPbps)] <- binnum
    binnum=binnum + 1
    # r2Mat <- r2Mat[-max.cluster.od, -max.cluster.od]
    # OCM <- OCM[-max.cluster.od, -max.cluster.od]
    # re.SNPbps <- setdiff(re.SNPbps, max.cluster)
    if (length(re.SNPbps) < 2) {
      break
    }
    if(length(split.bp.cliques)==0) break
    # message(sum(is.na(binvector)))
  }  ##end repeat
  return(binvector)
}


# Big-LD <input>
#' @title Estimation of LD block regions
#' @name BigLD
#' @aliases BigLD
#' @description \code{BigLD} returns the estimation of LD block regions of given data.
#' @usage BigLD(geno=NULL, SNPinfo=NULL,genofile=NULL, SNPinfofile=NULL,
#' cutByForce=NULL, LD=c("r2", "Dprime"), CLQcut=0.5,
#' clstgap=40000, CLQmode=c("density", "maximal"),
#' leng=200, subTaskSize=1500, MAFcut=0.05, appendRare=FALSE,
#' hrstType=c("near-nonhrst", "fast", "nonhrst"),
#' hrstParam=200, chrN=NULL, startbp=-Inf, endbp=Inf)
#' @param geno Data frame or matrix of additive genotype data, each column is additive genotype of each SNP.
#' @param SNPinfo Data frame or matrix of SNPs information.  1st column is rsID and 2nd column is bp position.
#' @param genofile Character constant; Genotype data file name (supporting format: .txt, .ped, .raw, .traw, .vcf).
#' @param SNPinfofile Character constant; SNPinfo data file name (supporting format: .txt, .map).
#' @param cutByForce Data frame; information of SNPs which are forced to be the last SNP LD block boundary.
#' @param LD Character constant; LD measure to use, r2 or Dprime. Default "r2".
#' @param CLQcut Numeric constant; a threshold for the LD measure value |r|, between 0 to 1. Default 0.5.
#' @param clstgap Numeric constant; a threshold of physical distance (bp) between two consecutive SNPs
#' which do not belong to the same clique, i.e., if a physical distance between two consecutive SNPs in a clique
#' greater than \code{clstgap}, then the algorithm split the cliques satisfying each
#' clique do not contain such consecutive SNPs. Default 40000.
#' @param CLQmode Character constant; the way to give priority among detected cliques.
#' if \code{CLQmode = "density"} then the algorithm gives priority to the clique of largest value of \eqn{(Number of SNPs)/(range of clique)},
#' else if \code{CLQmode = "maximal"}, then the algorithm gives priority to the largest clique. The default is "density".
#' @param leng Numeric constant; the number of SNPs in a preceding and a following region
#' of each sub-region boundary, every SNP in a preceding and every SNP in a following region need to be in weak LD. Default 200.
#' @param subTaskSize  Numeric constant; upper bound of the number of SNPs in a one-take sub-region. Default 1500.
#' @param MAFcut Numeric constant; the MAF threshold. Default 0.05.
#' @param appendRare logical; if TRUE, the function append rare variants with MAF<MAFcut to the constructed blocks.
#' @param hrstType  Character constant; heuristic methods.
#' If you want to do not use heuristic algorithm, set \code{hrstType = "nonhrst"}.
#' If you want to use heuristic algorithm suggested in Kim et al.,(2017), set \code{hrstType = "fast"}.
#' That algorithm is fastest heuristic algorithm and suitable when your memory capacity is not greater than 8GB.
#' If you want to obtain the results similar to the that of non-heuristic algorithm, set \code{hrstType = "near-nonhrst"}.
#' @param hrstParam Numeric constant; parameter for heuristic algorithm "near-nonhrst".
#'  Default is \code{200}. It is recommended that you set the parameter to greater than 150.
#' @param chrN Numeric(or Character) constant (or vector); chromosome number to use.
#' @param startbp Numeric constant; starting bp position of the \code{chrN}. Default -Inf.
#' @param endbp Numeric constant; last bp position of the \code{chrN}. Default Inf.
# <output>
#' @return  A data frame of block estimation result.
#' Each row of data frame shows the starting SNP and end SNP of each estimated LD block.
#'
#' @author Sun-Ah Kim <sunny03@snu.ac.kr>, Yun Joo Yoo <yyoo@snu.ac.kr>
#'
#'
#'
#' @seealso \code{\link{CLQD}}, \code{\link{LDblockHeatmap}}
#'
#' @examples
#'
#' data(geno)
#' data(SNPinfo)
#' BigLD(geno=geno, SNPinfo=SNPinfo, startbp = 16058400, endbp = 16076500)
#'
#' \dontrun{
#' BigLD(geno, SNPinfo, LD = "Dprime")
#' BigLD(geno, SNPinfo, CLQcut = 0.5, clstgap = 40000, leng = 200, subTaskSize = 1500)
#' }
#' @importFrom stats cor median quantile
#' @importFrom utils tail
#' @importFrom Rcpp sourceCpp
#' @export

BigLD <- function(geno=NULL, SNPinfo=NULL,genofile=NULL, SNPinfofile=NULL, cutByForce=NULL,
                   LD=c("r2", "Dprime"), CLQcut=0.5, clstgap=40000, CLQmode=c("density", "maximal"),
                   leng=200, subTaskSize=1500, MAFcut=0.05, appendRare=FALSE,
                  hrstType=c("near-nonhrst", "fast", "nonhrst"), hrstParam=200,
                   chrN=NULL, startbp=-Inf, endbp=Inf)
{
  skipRatio=0.0
  CLQmode <- match.arg(CLQmode)
  hrstType <- match.arg(hrstType)
  LD <- match.arg(LD)
  # data preparation
  if(!is.null(genofile)){
    inputdata <- dataPreparation(genofile, SNPinfofile, geno, SNPinfo, chrN=chrN, startbp=startbp, endbp=endbp)
    geno <- inputdata[[1]]
    SNPinfo <- inputdata[[2]]
    chrNs <- unique(SNPinfo[,1])
  }else{
    if(is.null(geno) |is.null(SNPinfo)){
      stop("Need input data (or files)")
    }
    if(ncol(geno) != nrow(SNPinfo)){
      stop("The number of SNPs in geno and SNPinfo are not matched")
    }
    geno <- as.matrix(geno)
    # header generation
    if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    if(startbp != -Inf | endbp != Inf){
      if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
        stop("If your input data include more than one chromosome or you choose more than one chromosome,
             then do not specify the startbp and endbp!")
      SNPloca <- which(SNPinfo$bp>=startbp & SNPinfo$bp<=endbp & SNPinfo$chrN == chrN)
      SNPinfo <- SNPinfo[SNPloca,]
      geno <- geno[,SNPloca]
    }
    colnames(geno)<- SNPinfo$rsID
    chrNs <- unique(SNPinfo[,1])
  }
  # filtering all NA SNPs
  allNASNPs = apply(geno, 2, function(x) all(is.na(x)))
  geno<-geno[, !allNASNPs]
  SNPinfo<-SNPinfo[!allNASNPs, ]

  totalLDBres <- NULL
  totalCLQres <- rep(NA,dim(SNPinfo)[1])
  stindex <- 0

  if(ncol(geno)!=nrow(SNPinfo)) stop("The number of SNPs in geno data and SNPinfo data does not match")
  # for each chrN, seperately apply BigLD algorithm
  for(chrN in chrNs){
    message(paste("working chromosome name:", chrN))
    chrSNPinfo <- SNPinfo[(SNPinfo[,1]==chrN),,drop=FALSE]
    chrgeno <- geno[,(SNPinfo[,1]==chrN),drop=FALSE]
    if(dim(chrSNPinfo)[1]==1)next
    # pruning by MAF ---
    message(paste("remove SNPs with MAF <", MAFcut))
    MAF <- apply(chrgeno, 2, function(x) mean(x,na.rm=TRUE)/2)
    MAF_ok <- ifelse(MAF>=0.5,1-MAF,MAF)
    MAF <- MAF_ok
    message(paste("Number of SNPs after prunning SNPs with MAF<", MAFcut, ":", sum(MAF>=MAFcut)))
    underMAFgeno <- chrgeno[,MAF<MAFcut]
    chrgeno <- chrgeno[,MAF>=MAFcut]
    underMAFSNPinfo <- chrSNPinfo[MAF<MAFcut,]
    chrSNPinfo <- chrSNPinfo[MAF>=MAFcut,]

    chrLDBres <- matrix(NA, dim(chrSNPinfo)[1],2)
    chrCLQres <- rep(NA, dim(chrSNPinfo)[1])
    # divide whole sequence into sub-task size segments.
    chrcutbyforce = cutByForce[cutByForce[,1] == chrN, ]
    cutpoints.all<-cutsequence(geno = chrgeno, SNPinfo = chrSNPinfo, leng = leng,
                               subTaskSize = subTaskSize, LD=LD, clstgap = clstgap, CLQcut = CLQcut, cutByForce = chrcutbyforce)
    cutpoints <- cutpoints.all[[1]]
    atfcut <- (cutpoints.all[[2]])
    if (!is.null(atfcut)){
      atfcut <- sort(atfcut)
    }
    cutpoints <- setdiff(cutpoints, 0)
    cutblock <- cbind(c(1, cutpoints + 1), c(cutpoints, dim(chrgeno)[2]))
    cutblock <- cutblock[-(dim(cutblock)[1]), , drop=FALSE]
    cutblock <- cutblock[cutblock[,1]!=cutblock[,2],, drop=FALSE]
    # for each subtaskregion, adapt BigLD algorithm ------------------
    for(i in seq_len(dim(cutblock)[1])){
      nowst <- cutblock[i, 1]
      nowed <- cutblock[i, 2]
      subgeno <- chrgeno[, nowst:nowed]
      subSNPinfo <- chrSNPinfo[nowst:nowed, ]
      message(paste(Sys.time()," | ", "chr",chrN,":",min(subSNPinfo[,3]), "-" ,max(subSNPinfo[,3]), " | sub-region: ", i,"/",dim(cutblock)[1],  sep = ""))
      subbinvec <- CLQD(geno = subgeno, SNPinfo = subSNPinfo, CLQcut = CLQcut, clstgap = clstgap, hrstType=hrstType,
                        hrstParam = hrstParam, CLQmode = CLQmode, LD = LD)
      # chrCLQres <- c(chrCLQres, subbinvec)
      chrCLQres[nowst: nowed]<-subbinvec
      cat('CLQ done!\r')
      if(all(is.na(subbinvec) == TRUE)) next;
      bins <- seq_len(max(subbinvec[which(!is.na(subbinvec))]))
      clstlist <- lapply(bins, function(x) which(subbinvec  == x))
      clstlist <- lapply(clstlist, sort)  ###
      clstlist <- clstlist[order(vapply(clstlist, min, 1))]  ###
      nowLDblocks <- constructLDblock(clstlist, subSNPinfo)
      nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop = FALSE]
      cat('constructLDblock done!\r')
      nowLDblocks.bp <- cbind(subSNPinfo[nowLDblocks[,1],3], subSNPinfo[nowLDblocks[,2],3])
      nowLDblocks <- nowLDblocks + (cutblock[i, 1] - 1)
      nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop = FALSE]
      preleng1 <- length(which(!is.na(chrLDBres[, 1])))
      chrLDBres[(preleng1 + 1):(preleng1 + dim(nowLDblocks)[1]), ] <- nowLDblocks

    }

    doneLDblocks <- chrLDBres[which(!is.na(chrLDBres[, 1])), , drop = FALSE]
    largeLDblocks <- doneLDblocks[apply(doneLDblocks, 1, diff)>5,, drop = FALSE]

    # atfcut -- LD block reconst ----------
    # newLDblocks <- matrix(NA, dim(chrSNPinfo)[1], 2)
    if(length(atfcut)!=0){
      addingBlocks <- NULL
      message("handling the ambiguous sub-task regions..")
      for(i in atfcut){
        print(paste("ambiguous boundary", i, "\r"))
        if(length(which(largeLDblocks[,1]>i))==0) break
        st <- largeLDblocks[max(which(largeLDblocks[,2]<=i)),1]
        ed <- largeLDblocks[min(which(largeLDblocks[,1]>i)),2]

        # saveBlocks <- doneLDblocks[doneLDblocks[,1]<st,, drop = FALSE]
        delIndex <- which(doneLDblocks[,1]>=st & doneLDblocks[,2]<=ed)
        doneLDblocks <- doneLDblocks[-delIndex,]
        # if(dim(saveBlocks)[1] != 0){
        #   newLDblocks[which(is.na(newLDblocks)==TRUE)[1]:(which(is.na(newLDblocks)==TRUE)[1] +dim(saveBlocks)[1]),] <- saveBlocks
        # }
        subgeno <- chrgeno[, st:ed]
        subSNPinfo <- chrSNPinfo[st:ed, ]
        subbinvec <- CLQD(geno = subgeno, SNPinfo = subSNPinfo, CLQcut = CLQcut, clstgap = clstgap, LD = LD, hrstType=hrstType,
                          hrstParam = hrstParam)
        # chrCLQres <- c(chrCLQres, subbinvec)
        chrCLQres[st: ed] <- subbinvec  #### change vector procedure
        cat('CLQ done!\r')
        bins <- seq_len(max(subbinvec[which(!is.na(subbinvec))]))
        clstlist <- lapply(bins, function(x) which(subbinvec == x))
        clstlist <- lapply(clstlist, sort)  ###
        clstlist <- clstlist[order(vapply(clstlist, min, 1))]  ###
        nowLDblocks <- constructLDblock(clstlist, subSNPinfo)
        nowLDblocks <- nowLDblocks + (st - 1)
        nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop=FALSE]
        addingBlocks <- rbind(addingBlocks, nowLDblocks)
      }
      addingBlocks <- rbind(doneLDblocks, addingBlocks)
      addingBlocks <- addingBlocks[order(addingBlocks[, 2]), , drop=FALSE]
      addingBlocks <- addingBlocks[order(addingBlocks[, 1]), , drop=FALSE]
      for(i in 1:(dim(addingBlocks)[1]-1)){
        if(addingBlocks[i,2]>=addingBlocks[(i+1),2]){
          addingBlocks[(i+1),]<-c(min(addingBlocks[i:(i+1),]), max(addingBlocks[i:(i+1),]))
          addingBlocks[i,1] <- NA
          next
        }
        if(addingBlocks[i,2]>=addingBlocks[(i+1),1]){
          addingBlocks[(i+1),] <-c(min(addingBlocks[i:(i+1),]), max(addingBlocks[i:(i+1),]))
          addingBlocks[i,1] <- NA
          next;
        }
      }
      newLDblocks <- addingBlocks[!is.na(addingBlocks[,1]),]
    }else{
      newLDblocks<-doneLDblocks
    } # LDblock reconst ...atfcut

    start.index <- newLDblocks[,1]
    end.index <- newLDblocks[,2]
    start.rsID <- as.character(chrSNPinfo[newLDblocks[,1],2])
    end.rsID <- as.character(chrSNPinfo[newLDblocks[,2],2])
    start.bp <- chrSNPinfo[newLDblocks[,1],3]
    end.bp <- chrSNPinfo[newLDblocks[,2],3]
    chrLDblocks <- data.frame(start.index, end.index, start.rsID, end.rsID, start.bp, end.bp)
    chrSNPinfo <- SNPinfo[(SNPinfo[,1]==chrN),,drop=FALSE]
    chrLDblocks$start.index<- match(chrLDblocks[,5], chrSNPinfo[,3])
    chrLDblocks$end.index<- match(chrLDblocks[,6], chrSNPinfo[,3])
    #append rare variants --------------------
    if(appendRare==TRUE){
      message( "Append rare variants!")
      chrgeno <- geno[,(SNPinfo[,1]==chrN),drop=FALSE]
      chrLDblocks<-appendcutByForce(chrLDblocks, chrgeno, chrSNPinfo, CLQcut=CLQcut, clstgap=clstgap,
                                    CLQmode=CLQmode, hrstType = hrstType, hrstParam=hrstParam, LD=LD)
    }
    chrLDblocks <- data.frame(chr=chrN, chrLDblocks)
    chrLDblocks$start.index <- chrLDblocks$start.index+stindex
    chrLDblocks$end.index <- chrLDblocks$end.index+stindex
    stindex <- stindex + dim(chrSNPinfo)[1]
    totalLDBres <- rbind(totalLDBres, chrLDblocks)
  } # end for current chr
  message("\nBigLD done!")
  return(totalLDBres)
}

#-GPART
# GPART < input >
#' @title Partitioning genodata based on the result obtained by using Big-LD and gene region information.
#' @name GPART
#' @aliases GPART
#' @description
#' \code{GPART} partition the given genodata using the result obtained by Big-LD and gene region information.
#' The algorithm partition the whole sequence into sub sequences of which size do not exceed the given threshold.
#' @usage GPART(geno=NULL, SNPinfo=NULL, geneinfo=NULL, genofile=NULL,
#' SNPinfofile=NULL, geneinfofile=NULL, geneDB = c("ensembl","ucsc"),
#' assembly = c("GRCh38", "GRCh37"), geneid = "hgnc_symbol",ensbversion = NULL,
#' chrN=NULL, startbp=-Inf, endbp=Inf, BigLDresult=NULL, minsize=4, maxsize=50,
#' LD=c("r2", "Dprime"), CLQcut=0.5, CLQmode=c("density", "maximal"), MAFcut = 0.05,
#' GPARTmode=c("geneBased", "LDblockBased"),
#' Blockbasedmode=c("onlyBlocks", "useGeneRegions"))
#' @param geno Data frame or matrix of additive genotype data, each column is additive genotype of each SNP.
#' @param SNPinfo Data frame or matrix of SNPs information.  1st column is rsID and 2nd column is bp position.
#' @param geneinfo Data frame or matrix of Gene info data.
#' (1st col : Genename, 2nd col : chromosome, 3rd col : start bp,  4th col : end bp)
#' @param genofile Character constant; Genotype data file name (supporting format: .txt, .ped, .raw, .traw, .vcf).
#' @param SNPinfofile Character constant; SNPinfo data file name (supporting format: .txt, .map).
#' @param geneinfofile A Character constant; file containing the gene information
#' (1st col : Genename, 2nd col : chromosome, 3rd col : start bp,  4th col : end bp)
#' @param geneDB A Character constant; database type for gene information. Set \code{"ensembl"} to get gene info from "Ensembl",
#'  or set \code{"ucsc"} to get gene info from "UCSC genome browser" (See package "biomaRt" or
#'  package "homo.sapiens"/"TxDb.Hsapiens.UCSC.hg38.knownGene"/ "TxDb.Hsapiens.UCSC.hg19.knownGene" for details.)
#' @param assembly A character constant; set \code{"GRCh37"} for GRCh37, or set \code{"GRCh38"} for GRCh38
#' @param geneid A character constant; When you use the gene information by \code{geneDB}. specity the symbol for gene name to use.
#' default is "hgnc_symbol".
#' (eg. 'ensembl_gene_id' for \code{geneDB = "ensembl"}, "ENTREZID"/"ENSEMBL"/"REFSEQ"/"TXNAME" for \code{geneDB="ucsc"}.
#' See package 'biomaRt' or package 'Homo.sapiens' for details)
#' @param ensbversion a integer constant; you can set the release version of ensembl
#' when you use the gene information by using \code{geneDB='emsembl'} and \code{assembly='GRCh38'}
#' @param chrN Numeric(or Character) constant (or vector); chromosome number to use. If \code{NULL}(default), we use all chromosome.
#' @param startbp Numeric constant; starting bp position of the \code{chrN}. Default -Inf.
#' @param endbp Numeric constant; last bp position of the \code{chrN}. Default Inf.
#' @param BigLDresult Data frame; a result obtained by \code{BigLD} function. If \code{NULL}(default),
#' the \code{GPART} function first excute \code{BigLD} function to obtain LD blocks estimation result.
#' @param minsize Numeric constant; the lower bound of number of SNPs in a partition.
#' @param maxsize Numeric constant; the upper bound of number of SNPs in a partition.
#' @param LD Character constant; LD measure to use, r2 or Dprime.
#' @param CLQcut Numeric constant; threshold for the correlation value |r|, between 0 to 1.
#' @param CLQmode Character constant; the way to give priority among detected cliques.
#' if \code{CLQmode = "density"} then the algorithm gives priority to the clique of largest value of \eqn{(Number of SNPs)/(range of clique)},
#' else if \code{CLQmode = "maximal"}, then the algorithm gives priority to the largest clique. The default is "density".
#' @param MAFcut Numeric constant; the MAF threshold. Default 0.05.
#' @param GPARTmode Character constant; GPART algorithm methods to use, "geneBased" or "LDblockBased". Default is ??geneBased??
#' @param Blockbasedmode Character constant; When you set \code{GPARTmode = "LDblockBased"}, specify LDblock based method as
#' \code{"onlyBlocks"}("LDblock based only" algorithm) or \code{"useGeneRegions"}(LDblock based and also use gene info algorithm).
#'
# < output >
#' @return \code{GPART} returns data frame which contains 9 information of each partition
#' (chromosome, index number of the first SNP and last SNP, rsID of the first SNP and last SNP,
#' basepair position of the first SNP and last SNP, blocksize, Name of a block)
#'
#' @author Sun Ah Kim <sunny03@snu.ac.kr>, Yun Joo Yoo <yyoo@snu.ac.kr>
#' @seealso \code{\link{BigLD}}
#'
#'
#' @examples
#' data(geno)
#' data(SNPinfo)
#' data(geneinfo)
#' GPART(geno=geno, SNPinfo=SNPinfo, geneinfo=geneinfo, startbp = 16058400, endbp = 16076500)
#'
#' @importFrom stats cor median quantile
#' @importFrom utils tail
#' @importFrom Rcpp sourceCpp
#' @import Homo.sapiens
#' @import TxDb.Hsapiens.UCSC.hg38.knownGene
#' @export
GPART <- function(geno=NULL, SNPinfo=NULL, geneinfo=NULL, genofile=NULL, SNPinfofile=NULL, geneinfofile=NULL,
                  geneDB = c("ensembl","ucsc"), assembly = c("GRCh38", "GRCh37"), geneid = "hgnc_symbol",
                  ensbversion = NULL,
                  chrN=NULL, startbp=-Inf, endbp=Inf, BigLDresult=NULL, minsize=4, maxsize=50,
                  LD=c("r2", "Dprime"), CLQcut=0.5, CLQmode=c("density", "maximal"), MAFcut = 0.05,
                  GPARTmode=c("geneBased", "LDblockBased"), Blockbasedmode=c("onlyBlocks", "useGeneRegions"))
{
  # Main part
  GPARTmode <- match.arg(GPARTmode)
  geneDB <- match.arg(geneDB)
  assembly <- match.arg(assembly)
  CLQmode <- match.arg(CLQmode)
  LD <- match.arg(LD)# if null choose "null"
  # data preparation
  # input data preparation

  if(!is.null(genofile)){
    inputdata <- dataPreparation(genofile, SNPinfofile, geno, SNPinfo, chrN=chrN, startbp=startbp, endbp=endbp)
    geno <- inputdata[[1]]
    SNPinfo <- inputdata[[2]]
  }else{
    geno <- geno
    geno <- as.matrix(geno)
    SNPinfo <- SNPinfo
    if(is.null(geno) |is.null(SNPinfo)){
      stop("Need input data (or files)")
    }
    if(ncol(geno) != nrow(SNPinfo)){
      stop("The number of SNPs in geno and SNPinfo are not matched")
    }
    if(is.null(chrN)) chrN <- unique(SNPinfo[,1])
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    if(startbp != -Inf | endbp != Inf){
      if((length(chrN)>1) | (length(unique(SNPinfo[,1]))>1))
        stop("If your input data include more than one chromosome or you choose more than one chromosome,
             then do not specify the startbp and endbp!")
      SNPloca <- which(SNPinfo$bp>=startbp & SNPinfo$bp<=endbp & SNPinfo$chrN == chrN)
      SNPinfo <- SNPinfo[SNPloca,]
      geno <- geno[,SNPloca]
    }
    colnames(geno)<- SNPinfo$rsID
  }
  chrNs <- unique(SNPinfo[,1])

  # filtering all NA SNPs
  allNASNPs = apply(geno, 2, function(x) all(is.na(x)))
  geno<-geno[, !allNASNPs]
  SNPinfo<-SNPinfo[!allNASNPs, ]
  #SNP filtering
  MAF <- apply(geno, 2, function(x) mean(x,na.rm=TRUE)/2)
  MAF_ok <- ifelse(MAF>=0.5,1-MAF,MAF)
  MAF <- MAF_ok
  message(paste("Number of SNPs after prunning SNPs with MAF<", MAFcut, ":", sum(MAF>=MAFcut)))
  geno <- geno[,MAF>=MAFcut]
  SNPinfo <- SNPinfo[MAF>=MAFcut,]

  if(is.null(BigLDresult)){
    message("Start to execute BigLD function!")
    BigLDresult <- BigLD(geno, SNPinfo, CLQcut=CLQcut, CLQmode=CLQmode, LD=LD, MAFcut=MAFcut)#, MAFcut=MAFcut
    message("Big-LD, done!")
  }else{
    message("Use the inputted BigLD result")
    # BigLDblocks <- BigLDresult #[BigLDresult$chr==chrN,]
    newindex.st <- match(BigLDresult$start.rsID, SNPinfo[,2])
    newindex.ed <- match(BigLDresult$end.rsID, SNPinfo[,2])
    #BigLDresult check
    if(sum(is.na(newindex.st))!=0 |sum(is.na(newindex.ed))!=0){
      stop("The inputted data and the bigLD results are not paired.
           \nPlease check whether the BigLD results are obtained from the input data")
    }
    BigLDresult$start.index<-newindex.st
    BigLDresult$end.index<-newindex.ed
    }
  # gene based
  # load gene info
  if(is.null(geneinfo)){
    geneinfo <- geneinfoExt(geneinfofile=geneinfofile, geneDB = geneDB, assembly = assembly, geneid = geneid,
                             ensbversion = ensbversion, chrNs = chrNs)
  }
  if(GPARTmode == "geneBased"){
    message(paste("GPART algorithm: geneBased"))
    TotalRes <- NULL
    startindex <- 0
    for(chrN in unique(SNPinfo[,1])){
      message(paste("working chromosome name:", chrN))
      chrSNPinfo <- SNPinfo[(SNPinfo[,1]==chrN),,drop=FALSE]
      chrgeno <- geno[,(SNPinfo[,1]==chrN),drop=FALSE]
      BigLDblocks <- BigLDresult[BigLDresult$chr==chrN,,drop=FALSE]

      genelist <- geneinfo[which(geneinfo[,2] == chrN), ]
      colnames(genelist)<- c("genename", "chr", "start.bp", "end.bp")
      genelist <- genelist[order(genelist[,3]),,drop=FALSE]
      genelist <- combineOverlapSamegene(genelist)
      GeneRegionSNPs1 <- NULL
      for (i in seq_len(dim(genelist)[1])){
        test <- (which(chrSNPinfo[, 3] >= genelist[i, 3] & chrSNPinfo[, 3] <= genelist[i, 4]))
        ifelse(length(test) > 0, test <- range(test), test <- c(0, 0))
        GeneRegionSNPs1 <- rbind(GeneRegionSNPs1, test)
        # if(i%%10) message(i)
      }
      rownames(GeneRegionSNPs1) <- genelist[, 1]
      SNPexist <- apply(GeneRegionSNPs1, 1, function(x) (x[1] != 0 & x[2] != 0))
      SNPexist <- unlist(SNPexist)
      Geneblocks <- GeneRegionSNPs1[SNPexist, ,drop=FALSE]
      if(dim(Geneblocks)[1]>0){
        Geneblocks.M <- mergeOverlapGene(Geneblocks)
      }else{
        Geneblocks.M <- Geneblocks
      }
      if(dim(BigLDblocks)[1]>0){
        newindex.st <- match(BigLDblocks$start.rsID, chrSNPinfo[,2])
        newindex.ed <- match(BigLDblocks$end.rsID, chrSNPinfo[,2])
        #BigLDresult check
        # if(sum(is.na(newindex.st))!=0 |sum(is.na(newindex.ed))!=0){
        #   stop("The inputted data and the bigLD results are not paired.
        #      \nPlease check whether the BigLD results are obtained from the input data")
        # }
        BigLDblocks$start.index<-newindex.st
        BigLDblocks$end.index<-newindex.ed
        nowLDblocks <- BigLDblocks
        LDblocks <- cbind(nowLDblocks[, 2], nowLDblocks[, 3])
        # Split Large LDblocks
        FinalLDblocks <- LDblockSplit(chrgeno, LDblocks, maxsize, LD)
        LDblocks.sgt <- setdiff(seq_len(dim(chrSNPinfo)[1]), unlist(apply(FinalLDblocks, 1, function(x) min(x):max(x))))
        LDblocks.sgt <- cbind(LDblocks.sgt, LDblocks.sgt)
        LDblocks.T <- rbind(FinalLDblocks, LDblocks.sgt)
        LDblocks.T <- LDblocks.T[order(LDblocks.T[, 1]), ]
        colnames(LDblocks.T) <- c("st", "ed")
        # gene-base block partitioning
      }else{
        LDblocks.T <- cbind(seq_len(dim(chrSNPinfo)[1]), seq_len(dim(chrSNPinfo)[1]))
        colnames(LDblocks.T) <- c("st", "ed")
      }
      if(dim(Geneblocks.M)[1]>0){
        GeneLDblocks2 <- LDblockGeneMerge(LDblocks.T, Geneblocks.M)
      }else{
        GeneLDblocks2 <- LDblocks.T
        rownames(GeneLDblocks2) <- rep("No", dim(GeneLDblocks2)[1])
      }
      blockL <- GeneLDblocks2[, 2] - GeneLDblocks2[, 1] + 1
      GeneLDblocks3 <- cbind(GeneLDblocks2, blockL)
      # split big block
      if(sum(blockL>maxsize)>0){
        GeneLDblocks4 <- splitBigLD(GeneLDblocks3, LDblocks.T, Geneblocks, maxsize)
      }else{
        GeneLDblocks4 <- GeneLDblocks3
        # GeneLDblocks4 <- data.frame(GeneLDblocks4, rep("No", dim(GeneLDblocks4)[1]))
        # colnames(GeneLDblocks4)[4] <-"gname"
      }
      GeneLDblocks5 <- mergeSmallRegion(GeneLDblocks4, maxsize, minsize)
      # naming regions
      GeneLDblocks <- namingRegion2(GeneLDblocks5, genelist, chrSNPinfo)
      st.rsID <- chrSNPinfo[GeneLDblocks[, 1], 2]
      ed.rsID <- chrSNPinfo[GeneLDblocks[, 2], 2]
      Finalresult <- data.frame(chrN,GeneLDblocks$st, GeneLDblocks$ed, st.rsID, ed.rsID, GeneLDblocks$st.bp, GeneLDblocks$ed.bp,
                                GeneLDblocks$blockL, GeneLDblocks$Finalnames)
      colnames(Finalresult) <- c("chr","st", "ed", "st.rsID", "ed.rsID", "st.bp", "ed.bp", "blocksize", "Name")
      Finalresult$st <- Finalresult$st+startindex
      Finalresult$ed <- Finalresult$ed+startindex
      startindex <- startindex + dim(chrgeno)[2]
      message(paste("chr", chrN, "done!"))
      TotalRes <- rbind(TotalRes, Finalresult)
    }
    # LD block based
  }else if (GPARTmode == "LDblockBased"){
    Blockbasedmode <- match.arg(Blockbasedmode)
    message(paste("GPART algorithm: LDblockBased-", Blockbasedmode, sep = ""))
    TotalRes <- NULL
    startindex <- 0
    for(chrN in unique(SNPinfo[,1])){
      message(paste("working chromosome name:", chrN))
      chrSNPinfo <- SNPinfo[(SNPinfo[,1]==chrN),,drop=FALSE]
      chrgeno <- geno[,(SNPinfo[,1]==chrN),drop=FALSE]
      BigLDblocks <- BigLDresult[BigLDresult$chr==chrN,,drop=FALSE]

      genelist <- geneinfo[which(geneinfo[,2] == chrN), ]
      colnames(genelist)<- c("genename", "chr", "start.bp", "end.bp")
      genelist <- genelist[order(genelist[,3]),]
      genelist <- combineOverlapSamegene(genelist)
      # gene-region merging
      GeneRegionSNPs1 <- NULL
      for (i in seq_len(dim(genelist)[1])){
        test <- (which(chrSNPinfo[, 3] >= genelist[i, 3] & chrSNPinfo[, 3] <= genelist[i, 4]))
        ifelse(length(test) > 0, test <- range(test), test <- c(0, 0))
        GeneRegionSNPs1 <- rbind(GeneRegionSNPs1, test)
        # if(i%%10) message(i)
      }
      rownames(GeneRegionSNPs1) <- genelist[, 1]
      SNPexist <- apply(GeneRegionSNPs1, 1, function(x) (x[1] != 0 & x[2] != 0))
      SNPexist <- unlist(SNPexist)
      Geneblocks <- GeneRegionSNPs1[SNPexist, ,drop=FALSE]
      if(dim(Geneblocks)[1]>0){
        Geneblocks.M <- mergeOverlapGene(Geneblocks)
      }else{
        Geneblocks.M <- Geneblocks
      }

      if(dim(BigLDblocks)[1]>0){
        newindex.st <- match(BigLDblocks$start.rsID, chrSNPinfo[,2])
        newindex.ed <- match(BigLDblocks$end.rsID, chrSNPinfo[,2])
        BigLDblocks$start.index <- newindex.st
        BigLDblocks$end.index <- newindex.ed
        nowLDblocks <- BigLDblocks
        LDblocks <- cbind(nowLDblocks[, 2], nowLDblocks[, 3])
        # Split Large LDblocks
        FinalLDblocks <- LDblockSplit(chrgeno, LDblocks, maxsize, LD)
        LDblocks.sgt <- setdiff(seq_len(dim(chrSNPinfo)[1]), unlist(apply(FinalLDblocks, 1, function(x) min(x):max(x))))
        LDblocks.sgt <- cbind(LDblocks.sgt, LDblocks.sgt)
        LDblocks.T <- rbind(FinalLDblocks, LDblocks.sgt)
        LDblocks.T <- LDblocks.T[order(LDblocks.T[, 1]), ]
        colnames(LDblocks.T) <- c("st", "ed")
      }else{
        LDblocks.T <- cbind(seq_len(dim(chrSNPinfo)[1]), seq_len(dim(chrSNPinfo)[1]))
        colnames(LDblocks.T) <- c("st", "ed")
      }
      # onlyblocks? or useGeneRegions?
      if(Blockbasedmode == "onlyBlocks"){
        blockL <- apply(LDblocks.T, 1, function(x) diff(x)+1)
        LDblocks.T <- data.frame(LDblocks.T, blockL)
        LDblocks1 <- mergeSmallRegion(LDblocks.T, maxsize, minsize)
        GeneLDblocks <- namingRegion2(LDblocks1, genelist, chrSNPinfo)
        # end: if(Blockbasedmode == "onlyBlocks")
      }else if(Blockbasedmode == "useGeneRegions"){
        blockL <- apply(LDblocks.T, 1, function(x) diff(x)+1)
        LDblocks.T <- data.frame(LDblocks.T, blockL)
        gLDblocks <- data.frame(LDblocks.T, NA)
        for(k in seq_len(dim(gLDblocks)[1])){
          nowblock <- gLDblocks[k,,drop=FALSE]
          intro.index <- (which(Geneblocks[,2]<nowblock[1,1]))
          intro.index1 <- setdiff(seq_len(dim(Geneblocks)[1]), intro.index)
          outro.index <- (which(Geneblocks[,1]>nowblock[1,2]))
          outro.index1 <- setdiff(seq_len(dim(Geneblocks)[1]), outro.index)
          common.index <- intersect(intro.index1, outro.index1)
          if(length(common.index)==0) {
            gLDblocks[k,4]<-NA
          }else{
            gnames <- rownames(Geneblocks[common.index,1, drop=FALSE])
            gname <- paste(gnames, collapse="/")
            gLDblocks[k,4]<-gname
          }
        }
        ## LD block
        bigblocks <- gLDblocks[gLDblocks$blockL>=minsize,]
        smallblocks <- gLDblocks[gLDblocks$blockL<minsize,]
        small.gene <- smallblocks[!is.na(smallblocks[,4]),]
        small.nongene <- smallblocks[is.na(smallblocks[,4]),]
        #merge small blocks which are in gene regions
        addedblocks <- NULL
        generegions <- unique(small.gene[,4])
        for(g in generegions){
          # message(g)
          nowgeneblocks <- small.gene[(small.gene[,4]==g),,drop=FALSE]
          nowbin <- NULL
          while(dim(nowgeneblocks)[1]>0){
            if(is.null(nowbin)){
              nowbin <- nowgeneblocks[1,,drop=FALSE]
              nowgeneblocks <- nowgeneblocks[-1,,drop=FALSE]
            }
            if(dim(nowgeneblocks)[1]==0 & !is.null(nowbin)){
              addedblocks<- rbind(addedblocks, nowbin)
              break
            }
            newbin <- nowgeneblocks[1,,drop=FALSE]
            nowgeneblocks <- nowgeneblocks[-1,,drop=FALSE]
            if(nowbin[1,3]+newbin[1,3]>maxsize){
              addedblocks<- rbind(addedblocks, nowbin)
              nowbin <- newbin
            }else if((nowbin[1,2]+1)!=newbin[1,1]){
              addedblocks<- rbind(addedblocks, nowbin)
              nowbin <- newbin
            }else if((nowbin[1,2]+1)==newbin[1,1]){
              nowbin[1,2] <- newbin[1,2]
              nowbin[1,3] <- nowbin[1,3]+newbin[1,3]
            }
            if(dim(nowgeneblocks)[1]==0 & !is.null(nowbin)){
              addedblocks<- rbind(addedblocks, nowbin)
              break
            }
          }
        }
        GeneLDblocks <- rbind(bigblocks, addedblocks, small.nongene)
        GeneLDblocks <- GeneLDblocks[order(GeneLDblocks[,1]),]
        LDblocks1 <- mergeSmallRegion(GeneLDblocks[,seq_len(3)], maxsize, minsize)
        GeneLDblocks <- namingRegion2(LDblocks1, genelist, chrSNPinfo)
      } # end if(Blockbasedmode == "useGeneRegions")
      st.rsID <- chrSNPinfo[GeneLDblocks[, 1], 2]
      ed.rsID <- chrSNPinfo[GeneLDblocks[, 2], 2]
      Finalresult <- data.frame(chrN,GeneLDblocks$st, GeneLDblocks$ed, st.rsID, ed.rsID, GeneLDblocks$st.bp, GeneLDblocks$ed.bp,
                                GeneLDblocks$blockL, GeneLDblocks$Finalnames)
      colnames(Finalresult) <- c("chr","st", "ed", "st.rsID", "ed.rsID", "st.bp", "ed.bp", "blocksize", "Name")
      Finalresult$st <- Finalresult$st+startindex
      Finalresult$ed <- Finalresult$ed+startindex
      startindex <- startindex + dim(chrgeno)[2]
      message(paste("chr", chrN, "done!"))
      TotalRes <- rbind(TotalRes, Finalresult)
    }
  }
  colnames(TotalRes)[seq_len(7)] <- c( "chr","start.index", "end.index", "start.rsID","end.rsID", "start.bp", "end.bp")
  return(TotalRes)
}


#' @title visualization of LD block structure
#' @name LDblockHeatmap
#' @aliases LDblockHeatmap
#' @description
#' \code{LDblockHeatmap} visualize the LD structure or LD block results of the inputed data.
#' LDblockHeatmap <- function(geno=NULL, SNPinfo=NULL, genofile=NULL, SNPinfofile=NULL,geneinfo=NULL, geneinfofile = NULL,
#' geneDB = c("ensembl","ucsc","file"), assembly = c("GRCh38", "GRCh37"), geneid = "hgnc_symbol",
#' ensbversion = NULL, chrN=NULL,startbp=-Inf, endbp=Inf, blockresult=NULL, blocktype=c("bigld", "gpart"),
#' minsize=4, maxsize=50, LD=c("r2", "Dprime", "Dp-str"),  MAFcut=0.05,CLQcut=0.5,
#' CLQmode=c("density", "maximal"),  CLQshow=FALSE,
#' type=c("png", "tif"), filename="heatmap", res=300, onlyHeatmap=FALSE)
#' @param geno Data frame or matrix of additive genotype data, each column is additive genotype of each SNP.
#' @param SNPinfo Data frame or matrix of SNPs information.  1st column is rsID and 2nd column is bp position.
#' @param genofile Character constant; Genotype data file name (supporting format: .txt, .ped, .raw, .traw, .vcf).
#' @param SNPinfofile Character constant; SNPinfo data file name (supporting format: .txt, .map).
#' @param geneinfo Data frame or matrix of Gene info data.
#' (1st col : Genename, 2nd col : chromosome, 3rd col : start bp,  4th col : end bp)
#' @param geneinfofile A Character constant; file containing the gene information
#' (1st col : Genename, 2nd col : chromosome, 3rd col : start bp,  4th col : end bp)
#' @param geneDB A Character constant; database type for gene information. Set \code{"ensembl"} to get gene info from "Ensembl",
#'  or set \code{"ucsc"} to get gene info from "UCSC genome browser" (See package "biomaRt" or
#'  package "homo.sapiens"/"TxDb.Hsapiens.UCSC.hg38.knownGene"/ "TxDb.Hsapiens.UCSC.hg19.knownGene" for details.)
#' @param assembly A character constant; set \code{"GRCh37"} for GRCh37, or set \code{"GRCh38"} for GRCh38
#' @param geneid A character constant; When you use the gene information by \code{geneDB}. specity the symbol for gene name to use.
#' default is "hgnc_symbol".
#' (eg. 'ensembl_gene_id' for \code{geneDB = "ensembl"}, "ENTREZID"/"ENSEMBL"/"REFSEQ"/"TXNAME" for \code{geneDB="ucsc"}.
#' See package 'biomaRt' or package 'Homo.sapiens' for details)
#' @param ensbversion a integer constant; you can set the release version of ensembl
#' when you use the gene information by using \code{geneDB='emsembl'} and \code{assembly='GRCh38'}
#' @param geneshow logical; do not show the gene information if \code{geneshow=FALSE}.
#' Default is \code{geneshow=TRUE}
#' @param chrN Numeric(or Character) constant ; chromosome number to use.
#' If the data contains more than one chromosome, you need to specify the chromosome to show.
#' @param startbp Numeric constant; starting bp position of the \code{chrN}.
#' @param endbp Numeric constant; last bp position of the \code{chrN}.
#' @param blockresult Data frame; a result obtained by \code{BigLD} function or \code{GPART}. If \code{NULL}(default),
#' the function first excute \code{BigLD} or\code{GPART} to obtain block estimation result denpending on the \code{blocktype}.
#' @param blocktype Character constant; "bigld" for \code{Big-LD} or "gpart" for \code{GPART}. Default is \code{"gpart"}.
#' @param minsize Integer constant; when \code{blockresult=NULL, blocktype="gpart"}
#' specify the threshold for minsize of a block obtained by \code{GPART}
#' @param maxsize Integer constant; when \code{blockresult=NULL, blocktype="gpart"}
#' specify the threshold for maxsize of a block obtained by \code{GPART}
#' @param LD Character constant; LD measure to use, "r2" or "Dprime" or "Dp-str".
#' LD measures for LD heatmap (and BigLD execution when \code{BigLDresult=NULL}). When \code{LD = "Dp-str"},
#' heatmap shows only two cases, "weak LD or not-informative" and "strong LD". When \code{LD= Dprime} heatmap shows the estimated D' measures.
#' @param MAFcut Numeric constant; MAF threshold of SNPs to use.  Default 0.05
#' @param CLQcut Numeric constant; threshold for the correlation value |r|, between 0 to 1. Default 0.5.
#' @param CLQmode Character constant; the way to give priority among detected cliques.
#' if \code{CLQmode = "density"} then the algorithm gives priority to the clique of largest value of \eqn{(Number of SNPs)/(range of clique)},
#' else if \code{CLQmode = "maximal"}, then the algorithm gives priority to the largest clique. Default "density".
#' @param CLQshow logical; Show the LD bin structures, i.e. CLQ results, in each LD blocks.
#' Notice that if the region to show includes more than 200 SNPs, the function do now show LD bin structures automatically. Default false.
#' @param type Character constant; file format of output image file. set \code{png} for PNG format file, or \code{tif} for TIFF format file.
#' @param filename Character constant; filename of output image file. Default "heatmap".
#' @param res Numeric constant; resolution of image. Default 300.
#' @param onlyHeatmap logical; show the LD heatmap without the LD block boundaries. Default false.

# < output >
#' @return \code{GPART} returns data frame which contains 9 information of each partition
#' (chromosome, index number of the first SNP and last SNP, rsID of the first SNP and last SNP,
#' basepair position of the first SNP and last SNP, blocksize, Name of a block)
#'
#' @author Sun-Ah Kim <sunny03@snu.ac.kr>, Yun Joo Yoo <yyoo@snu.ac.kr>
#' @seealso \code{\link{BigLD}}
#'
#' @examples
#'
#' LDblockHeatmap(geno=geno[,1:100], SNPinfo=SNPinfo[1:100,], geneinfo=geneinfo,
#' filename="chr21Heatmap")
#'
#' @importFrom grDevices adjustcolor colorRampPalette dev.off heat.colors png tiff
#' @importFrom stats cor median quantile
#' @importFrom utils tail
#' @import grid
#' @importFrom Rcpp sourceCpp
#' @export
#-LDblockHeatmap2
LDblockHeatmap <- function(geno=NULL, SNPinfo=NULL, genofile=NULL, SNPinfofile=NULL,geneinfo=NULL, geneinfofile=NULL,
                           geneDB=c("ensembl","ucsc","file"), assembly=c("GRCh38", "GRCh37"), geneid="hgnc_symbol",
                           ensbversion=NULL, geneshow=TRUE, chrN=NULL,startbp=-Inf, endbp=Inf, blockresult=NULL, blocktype=c("bigld", "gpart"),
                           minsize=4, maxsize=50, LD=c("r2", "Dprime", "Dp-str"),  MAFcut=0.05,CLQcut=0.5,
                           CLQmode=c("density", "maximal"),  CLQshow=FALSE,
                           type=c("png", "tif"), filename="heatmap", res=300, onlyHeatmap=FALSE)
{
  # -set data and parameters ------
  tick <- "both"
  maxisize <- 20000
  longleng <- 2000
  shortleng <- ifelse(is.null(geneinfo), 1000, 100)
  CLQmode <- match.arg(CLQmode)
  blocktype <- match.arg(blocktype)
  type <- match.arg(type)
  LD <- match.arg(LD)
  LDcal <- ifelse(LD=="r2", "r2", "Dprime")
  # tick <- match.arg(tick)
  ## data preparation ----------

  # input data preparation
  if(!is.null(genofile)){
    inputdata <- dataPreparation(genofile, SNPinfofile, geno, SNPinfo, chrN=chrN, startbp=startbp, endbp=endbp)
    geno <- inputdata[[1]]
    SNPinfo <- inputdata[[2]]
  }else{
    if(is.null(geno) |is.null(SNPinfo)){
      stop("Need input data (or files)")
    }
    if(ncol(geno) != nrow(SNPinfo)){
      stop("The number of SNPs in geno and SNPinfo are not matched")
    }
    if(is.null(chrN)){
      if(length(unique(SNPinfo[,1]))==1){
        chrN <- unique(SNPinfo[,1])
      }else{
        stop("Please choose a chromosome!")
      }
    }
    colnames(SNPinfo) <- c("chrN", "rsID", "bp")
    geno <- as.matrix(geno)
  }
  if(is.null(geneinfo)){
    if(geneshow == TRUE){
      geneinfo <- geneinfoExt(geneinfofile=geneinfofile, geneDB = geneDB, assembly = assembly, geneid = geneid,
                               ensbversion = ensbversion, chrNs = chrN)
    }
  }

  # filtering all NA SNPs
  allNASNPs = apply(geno, 2, function(x) all(is.na(x)))
  geno<-geno[, !allNASNPs]
  SNPinfo<-SNPinfo[!allNASNPs, ]
  ## MAF filtering
  MAF <- apply(geno, 2, function(x) mean(x,na.rm=TRUE)/2)
  MAF_ok <- ifelse(MAF>=0.5,1-MAF,MAF)
  MAF <- MAF_ok
  message(paste("Number of SNPs after prunning SNPs with MAF<", MAFcut, ":", sum(MAF>=MAFcut)))
  # underMAFgeno <- geno[,MAF<MAFcut]
  geno <- geno[,MAF>=MAFcut]
  # underMAFSNPinfo <- SNPinfo[MAF<MAFcut,]
  SNPinfo <- SNPinfo[MAF>=MAFcut,]

  trascolor <-  adjustcolor( "red", alpha.f=0)
  col1 <- c('#7D0112','#820920','#87112B','#8C1834','#911E3D','#952445','#9A294C','#9E2F54','#A2345B','#A73962',
            '#AA3F69','#AE4470','#B24977','#B64E7D','#B95483','#BC598A','#C05E90','#C36396','#C6689C','#C86EA1',
            '#CB73A7','#CE78AC','#D07DB2','#D282B7','#D587BC','#D78CC1','#D991C5','#DB96CA','#DC9BCE','#DEA0D3',
            '#E0A5D7','#E1AADB','#E3AFDF','#E4B3E2','#E5B8E6','#E7BCE9','#E8C1EC','#E9C5EF','#EACAF1','#EBCEF4',
            '#ECD2F6','#EDD6F8','#EEDAF9','#EFDDFB','#F0E1FC','#F1E4FC','#F1E8FC','#F2EBFC','#F2EDFA','#F2F0F6')
  col2 <- c('#2D3184','#283C87','#23458A','#1E4E8E','#185692','#125E95','#0A6699','#026E9D','#0075A0','#007CA3',
            '#0083A6','#008AA9','#0790AC','#1296AE','#1C9CB0','#26A2B2','#2FA8B4','#38ADB6','#40B3B7','#49B8B9',
            '#51BCBA','#5AC1BB','#62C5BC','#6AC9BD','#73CDBE','#7BD1BE','#83D5BF','#8AD8C0','#92DBC1','#99DEC1',
            '#A0E0C2','#A7E3C3','#AEE5C4','#B5E7C5','#BBE9C6','#C1EBC7','#C7ECC9','#CCEECA','#D2EFCB','#D7F0CD',
            '#DBF0CF','#DFF1D0','#E3F2D2','#E7F2D4','#EAF2D6','#EDF2D9','#EFF2DB','#F1F2DD','#F2F2E0','#F3F1E4')
  col3 <- c('#D33F6A','#D44468','#D54866','#D74C63','#D85161','#D9545F','#DB585C','#DC5C5A','#DD6057','#DE6455',
            '#DF6752','#E06B50','#E16F4D','#E2724A','#E37647','#E47A45','#E57D42','#E5813F','#E6843C','#E78839',
            '#E78B37','#E88E34','#E89231','#E8952F','#E9992D','#E99C2B','#E99F2A','#E9A328','#EAA628','#EAAA28',
            '#EAAD29','#EAB02A','#E9B42C','#E9B72E','#E9BA31','#E9BE35','#E8C139','#E8C43D','#E8C742','#E7CB46',
            '#E7CE4C','#E6D151','#E6D457','#E5D75D','#E4DA64','#E4DD6B','#E3E073','#E2E37C','#E2E688','#E2E6BD')
  col4 <- c('#035F33','#176438','#23693C','#2D6D41','#367245','#3E764A','#457B4F','#4D7F53','#538458','#5A885C',
            '#608C61','#679165','#6D9569','#73996E','#799D72','#7EA176','#84A57A','#8AA97F','#8FAD83','#94B087',
            '#99B48B','#9EB88F','#A3BB93','#A8BE97','#ADC29B','#B1C59F','#B6C8A2','#BACBA6','#BECEAA','#C3D1AD',
            '#C7D4B1','#CAD6B4','#CED9B7','#D2DBBB','#D5DEBE','#D8E0C1','#DBE2C4','#DEE4C7','#E1E6CA','#E4E8CD',
            '#E6E9CF','#E9EBD2','#EBECD5','#EDEDD7','#EEEFD9','#F0EFDC','#F1F0DE','#F2F1E0','#F3F1E2','#F3F1E4')
  col5 <- c("#8B4497","#8D499B","#8F4E9F","#9052A4","#9257A8","#945CAC","#9560B0","#9765B4","#9869B8","#9A6DBC",
            "#9B72C0","#9D76C4","#9E7AC8","#9F7FCC","#A083D0","#A187D4","#A28BD8","#A390DB","#A494DF","#A598E2",
            "#A69CE6","#A7A0E9","#A7A4ED","#A8A8F0","#A8ACF3","#A9B0F7","#A9B4FA","#AAB8FD","#AABCFF","#AABFFF",
            "#ABC3FF","#ABC7FF","#ABCBFF","#ABCEFF","#ABD2FF","#ABD5FF","#ABD9FF","#ABDCFF","#ABDFFF","#AAE3FF",
            "#AAE6FF","#AAE9FF","#A9ECFF","#A9EFFF","#A8F1FF","#A7F4FF","#A7F6FF","#A6F9FF","#A4FBFF","#A3FCFF")


  allcol <- cbind(col5, col2, col3, col4)
  blockcol <- c(col2[5], col3[5], col4[5],col5[5])
  deepblockcol <- c(col2[1], col3[1], col4[1],col5[1])

  if(is.null(chrN)){
    chrN <- unique(SNPinfo[,1])
    if(length(chrN)>1) stop("The data contains two or more chromosomes. Please specify the chromosome.")
  }
  SNPloca <- which(SNPinfo$chrN==chrN & SNPinfo$bp>=startbp & SNPinfo$bp<=endbp)
  NSNPs <- length(SNPloca)
  if(NSNPs<5){
    stop("There are less than 5 SNPs in the chosen region!")
  }
  if(NSNPs>maxisize){
    Pmessage <- paste("There are too many SNPs in the chosen region. \n We use the first ",maxisize,"SNPs in the region!")
    message(Pmessage)
    SNPloca <- min(SNPloca):(min(SNPloca)+maxisize-1)
    NSNPs <- length(SNPloca)
  }
  if(NSNPs>200){
    if(CLQshow == TRUE){
      message("CLQ results can not be displayed because the number of SNPs in the region is too large.")
    }
    CLQshow <- FALSE
  }
  geno <- geno[,SNPloca]
  geno <- as.matrix(geno)
  SNPinfo <- SNPinfo[SNPloca,]

  SNPstbp <- min(SNPinfo[,3])
  SNPedbp <- max(SNPinfo[,3])

  SNPbp_cordi <- (SNPinfo[,3]-SNPstbp)/(SNPedbp - SNPstbp)

  ## matching BigLD index and SNP/genodata index
  if((!is.null(blockresult)) & (onlyHeatmap == FALSE)){
    if(blocktype == "gpart"){
      blockresult <- blockresult[,seq_len(7)]
      colnames(blockresult)[seq_len(7)] <- c( "chr","start.index", "end.index", "start.rsID","end.rsID", "start.bp", "end.bp")
    }
    blockresult <- blockresult[which(blockresult$chr==chrN & blockresult$end.bp>=SNPstbp & blockresult$start.bp<=SNPedbp),,drop=FALSE]
    blockresult$start.bp[blockresult$start.bp < SNPstbp] <- min(SNPinfo[,3])
    blockresult$end.bp[blockresult$end.bp > SNPedbp] <- max(SNPinfo[,3])
    stindex <- match(blockresult$start.bp, SNPinfo[,3])
    edindex <- match(blockresult$end.bp, SNPinfo[,3])
    blockresult$start.index <- stindex
    blockresult$end.index <- edindex
    blockresult$start.rsID <- SNPinfo[stindex,2]
    blockresult$end.rsID <- SNPinfo[edindex,2]
    blockresult <- blockresult[blockresult[,2]!=blockresult[,3],,drop=FALSE]
  }else if ((is.null(blockresult)) & (onlyHeatmap == FALSE)){
    if(blocktype == "bigld"){
      oblockresult <- BigLD(geno=geno, SNPinfo=SNPinfo, CLQcut=CLQcut, CLQmode=CLQmode, LD=LDcal, MAFcut=MAFcut)
      blockresult <- oblockresult
    }else if(blocktype == "gpart"){
      oblockresult <- GPART(geno=geno, SNPinfo=SNPinfo, geneinfo = geneinfo, CLQcut=CLQcut, CLQmode=CLQmode, LD=LDcal,
                            minsize = minsize, maxsize = maxsize, MAFcut = MAFcut)
      blockresult <- oblockresult[,seq_len(7)]
      colnames(blockresult)[seq_len(7)] <- c( "chr","start.index", "end.index", "start.rsID","end.rsID", "start.bp", "end.bp")
    }
  }

  if(NSNPs < 1000){
    unitleng <- 1.1
    unitlwd <- 0.4
  }else if(NSNPs >= 1000 & NSNPs < 4000){
    unitleng <- 2
    unitlwd <- 0.3
  }else if(NSNPs >= 4000 & NSNPs < 7000){
    unitleng <- 3
    unitlwd <- 0.2
  }else if(NSNPs >= 7000 & NSNPs <= maxisize){
    unitleng <- 4
    unitlwd <- 0.1
  }

  if(!is.null(geneinfo)){
    nowgeneinfo <- geneinfo[geneinfo[,2]==chrN,,drop=FALSE]
    nowgeneinfo <- nowgeneinfo[which(nowgeneinfo[,3]<SNPedbp & nowgeneinfo[,4]>SNPstbp),,drop=FALSE]
    nowgeneinfo <- nowgeneinfo[order(nowgeneinfo[,3]),,drop=FALSE]
    nowgeneinfo <- combineOverlapSamegene(nowgeneinfo)
  }else{
    message("There is no gene region information!")
    nowgeneinfo <- matrix(1,1,1)[-1,,drop=FALSE]
  }
  stratifyGene <- list()
  if(dim(nowgeneinfo)[1]>0){
    nowgeneinfo[which(nowgeneinfo[,3]<SNPstbp),3] <- SNPstbp
    nowgeneinfo[which(nowgeneinfo[,4]>SNPedbp),4] <- SNPedbp
    nowgeneinfo[,3:4] <- (nowgeneinfo[,3:4]-SNPstbp)/(SNPedbp - SNPstbp)
    stratifyGene <- list()
    # stratifyGene[[1]]<-nowgeneinfo[1,,drop=F]
    # nowgeneinfo<-nowgeneinfo[-1,]
    for(i in seq_len(dim(nowgeneinfo)[1])){
      nowgene <- nowgeneinfo[i,,drop=FALSE]
      appendlistnum <- which(vapply(stratifyGene, function(x) (max(x[,4])+0.01) < nowgene[1,3], TRUE)==TRUE)
      if(length(appendlistnum)==0){
        stratifyGene[[length(stratifyGene)+1]]<-nowgene
      }else{
        stratifyGene[[appendlistnum[1]]]<-rbind(stratifyGene[[appendlistnum[1]]], nowgene)
      }
    }
  }
  if(CLQshow == TRUE){
    geneheight <- min(0.05*length(stratifyGene), 0.35)
    bottomloca <- 0.33 + geneheight
  }else{
    geneheight <- min(0.05*length(stratifyGene), 0.35)
    bottomloca <- 0.25 + geneheight
  }
  if(NSNPs<300){
    nowcex <- 0.5
  }else if(NSNPs > 300 & NSNPs<1000) {
    nowcex <- 0.4
  }else if(NSNPs >= 1000 & NSNPs<=2000) {
    nowcex <- 0.4
  }else if(NSNPs >= 2000 & NSNPs<=3000) {
    nowcex <- 0.4
  }else if(NSNPs>=3000) {
    nowcex <- 0.3
  }
  # start drawing part ---------------------------------------------------------------------------------
  # heatmap construct---------------------------------------------------------------------------------
  ##color
  if(LD == 'Dprime'){
    RWcolor <- colorRampPalette(c("red", "white"))
    color <- RWcolor(50)
  }else if (LD == 'r2'){
    color=heat.colors(50)
  }else if (LD == "Dp-str"){
    RWcolor <- colorRampPalette(c("red", "white"))
    color <- RWcolor(50)[c(1,50)]
  }
  mybreak<-0:length(color)/length(color)
  VPheatmap<-viewport(x=0,y=0,width=unit(0.85/1.414,"snpc"),height=unit(0.85/1.414,"snpc"),just=c("left","bottom"), name="VPheatmap")
  rot.vp <- viewport(x=.11, y=bottomloca, width=unit(unitleng,"snpc"), height=unit(unitleng,"snpc"), just=c("left", "bottom"), angle=-45)
  # genodata
  if(NSNPs > longleng){
    # M<-Matrix::Matrix(0, dim(SNPinfo)[1], dim(SNPinfo)[1])
    startseq <- seq(1, dim(SNPinfo)[1],shortleng)
    # system.time({
    rectlist <- NULL
    for(i in startseq){
      if(LD == "r2"){
        imgLDmatrix <- suppressWarnings(cor(geno[,(i:min(i+shortleng-1, NSNPs)),drop=FALSE], geno[,(i:min(i+longleng+shortleng, NSNPs)),drop=FALSE],
                                            use="pairwise.complete.obs")^2)
      }else if(LD == "Dprime"){
        imgLDmatrix <- matCubeX2(geno[,(i:min(i+shortleng-1, NSNPs)),drop=FALSE], geno[,(i:min(i+longleng+shortleng, NSNPs)),drop=FALSE])
      }else if(LD == "Dp-str"){
        imgLDmatrix <- genoDp2(geno[,(i:min(i+shortleng-1, NSNPs)),drop=FALSE], geno[,(i:min(i+longleng+shortleng, NSNPs)),drop=FALSE],
                               strLD=TRUE)
      }
      imgLDmatrix[(imgLDmatrix==Inf)|(imgLDmatrix==-Inf)] <- 0
      imgLDmatrix[lower.tri(imgLDmatrix)] <- NA
      colcut <- as.character(cut(1-imgLDmatrix,mybreak,labels=as.character(color), include.lowest=TRUE, include.highest=FALSE))
      xx <- ((i:min(i+shortleng-1, NSNPs)))/NSNPs
      yy <- ((i):min(i+longleng+shortleng, NSNPs))/NSNPs
      right <- rep(xx, length((i):min(i+longleng+shortleng, NSNPs)))
      top <- rep(yy, each=length((i:min(i+shortleng-1, NSNPs))))
      nowrect <- rectGrob(x=right,y=top,width=1/NSNPs,height=1/NSNPs,just=c("right","top"),gp=gpar(col=NA,fill=colcut))
      cat(paste("Heatmap generating",i, "|", NSNPs, "\r"))
      rectlist <- gList(rectlist, nowrect)
    }
    LD.heat.map<-gTree(children=rectlist,name="LDheatmap")
    # }) #system.time
  }else{
    if(LD == "r2"){
      imgLDmatrix <- (cor(geno,use="pairwise.complete.obs"))^2
    }else if(LD == "Dprime"){
      imgLDmatrix <- matCubeX(geno)
    }else if(LD == "Dp-str"){
      imgLDmatrix <- genoDp(geno,strLD=TRUE)
    }
    imgLDmatrix[(imgLDmatrix==Inf)|(imgLDmatrix==-Inf)] <- 0
    imgLDmatrix[lower.tri(imgLDmatrix)] <- NA
    colcut <- as.character(cut(1-imgLDmatrix,mybreak,labels=as.character(color), include.lowest=TRUE, include.highest=FALSE))
    xx <- seq_len(NSNPs)/NSNPs
    yy <- seq_len(NSNPs)/NSNPs
    right <- rep(xx, NSNPs)
    top <- rep(yy, each=NSNPs)
    nowrect <- rectGrob(x=right,y=top,width=1/NSNPs,height=1/NSNPs,just=c("right","top"),gp=gpar(col=NA,fill=colcut))
    rectlist <- gList(nowrect)
    LD.heat.map <- gTree(children=rectlist,name="LDheatmap")
  }

  if(onlyHeatmap == FALSE){
    ### block boundaries
    BlockstP <- blockresult[,2]-1
    BlockedP <- blockresult[,3]
    x1s <- BlockstP*(1/NSNPs)
    x2s <- BlockstP*(1/NSNPs)
    x3s <- BlockedP*(1/NSNPs)
    y1s <- BlockstP*(1/NSNPs)
    y2s <- BlockedP*(1/NSNPs)
    y3s <- BlockedP*(1/NSNPs)
    BlockBoundaries <- polygonGrob(x=c(x1s, x2s, x3s), y=c(y1s, y2s, y3s), id=rep(seq_len(length(x1s)),3),
                                   gp=gpar(fill=trascolor, lwd=unitlwd, col="black"))

    LDblockHeatmap2 <- gTree(children=gList(LD.heat.map, BlockBoundaries),name="LDHeatmap", vp=VPheatmap)
    blocksizes <- blockresult[,3]-blockresult[,2]+1
    blocksizes <- sort(blocksizes, decreasing=TRUE)
    if(length(blocksizes) > (15*unitleng)){
      showLDsize <- blocksizes[(15*unitleng)]
      if(length(blocksizes[blocksizes>=showLDsize])>(15*unitleng)) showLDsize <- showLDsize+1
    }else{
      showLDsize <- min(blocksizes)
    }
    regionstbp <- (blockresult[1,6])
    regionedbp <- (blockresult[dim(blockresult)[1],7])
    rotLDtest <- gTree(children=gList(LDblockHeatmap2), name="rorLDheatmap", vp=rot.vp)
    # CLQ res show
    if(CLQshow == TRUE){
      CLQres <- NULL
      for(bn in seq_len(dim(blockresult)[1])){
        nowblock <- blockresult[bn,,drop=FALSE]
        nowgeno <- geno[,nowblock[1,2]:nowblock[1,3]]
        nowSNPinfo <- SNPinfo[nowblock[1,2]:nowblock[1,3],]
        nowCLQres <- CLQD(geno=nowgeno, SNPinfo=nowSNPinfo, CLQcut=0.5, clstgap=40000, hrstParam=200, hrstType="nonhrst",
                          CLQmode=CLQmode, LD=LDcal)
        CLQres <- c(CLQres, list(nowCLQres))
      }

      CLQmaxnum <- max(unlist(CLQres), na.rm=TRUE)
      CLQwidth <- 1/NSNPs
      CLQboxgL <- gList()

      CLQregionheight <- (0.015-0.002*unitleng)*CLQmaxnum
      vpCLQ <- viewport(x=0.11,y=bottomloca-CLQregionheight,width=unit(unitleng*0.85,"snpc"),height=unit(CLQregionheight,"snpc"),just=c("left","bottom"), name="vpCLQ")

      totalbox <- rectGrob(x=0, y=0, width=1, height=1, just=c("left","bottom"),
                           gp=gpar(fill="white",lwd=unitlwd, col="black"), vp=vpCLQ)
      CLQboxgL <- gList(CLQboxgL, totalbox)
      ifelse(CLQmaxnum<25, colgap <- (50%/%CLQmaxnum), colgap <- 1)
      bgcol <- c("gray60", "gray90")
      for(strt in seq_len(length(CLQres))){
        nowbgcol <- bgcol[strt%%2+1]
        nowCLQres <- CLQres[[strt]]
        stpoint <- blockresult[strt, 2]-1
        edpoint <- blockresult[strt, 3]
        leftbottomP <- (stpoint:edpoint)/(NSNPs)
        totalbox <- rectGrob(x=leftbottomP[1], y=0, width=CLQwidth*length(nowCLQres), height=1,
                             just=c("left","bottom"), gp=gpar(fill=nowbgcol, lwd=unitlwd, col="black"), vp=vpCLQ)
        CLQboxgL <- gList(CLQboxgL, totalbox)
        nowcols <- allcol[,(strt%%4+1)]
        clqnum <- max(nowCLQres, na.rm=TRUE)
        for(CLQn in seq_len(clqnum)){
          nowx <- leftbottomP[which((nowCLQres==CLQn)==TRUE)]
          nowx <- nowx[!is.na(nowx)]

          nownowcol <- nowcols[1+(colgap*(CLQn-1))]
          nowbox <- rectGrob(x=nowx, y=(1/CLQmaxnum)*(CLQn-1), width=CLQwidth, height=(1/CLQmaxnum),
                             just=c("left","bottom"), gp=gpar(fill=nownowcol ,lwd=unitlwd, col="black"), vp=vpCLQ)
          CLQboxgL <- gList(CLQboxgL, nowbox)
        }
      }
      #write.snpnumber
      binvector <- rep(0, NSNPs)
      for(strt in seq_len(length(CLQres))){
        stpoint <- blockresult[strt, 2]
        edpoint <- blockresult[strt, 3]
        binvector[stpoint:edpoint]<-CLQres[[strt]]
      }
      binvector[is.na(binvector)]<-0
      textx <- seq_len(NSNPs)/(NSNPs)-(0.5/NSNPs)
      texty <- binvector*(1/max(binvector)) - (0.5/max(binvector))
      textcol <- rep(NA, NSNPs)
      textcol[which(binvector <= round(max(binvector)/2))] <- "white"
      textcol[which(binvector > round(max(binvector)/2))] <- "black"
      clqtextsize =0.4
      if(NSNPs>50) {clqtextsize <- 0.3}
      if(NSNPs>100) {clqtextsize <- 0.2}
      CLQtext <- textGrob(seq_len(NSNPs), x=textx, y=texty, just=c("centre", "centre"), rot=90,
                          gp=gpar(col=textcol, cex=clqtextsize), vp=vpCLQ)
      CLQboxgL <- gList(CLQboxgL, CLQtext)
    }else{
      CLQboxgL <- gList()
      CLQregionheight <- 0
    }

    # oriblockresult <- blockresult
    vpSNPinfo <- viewport(x=0.11,y=0.90,width=unit(unitleng*0.85,"snpc"),height=unit(0.10,"snpc"),just=c("left","bottom"), name="vpSNPinfo")
    if(tick == "rsID"){
      tickname.st <- as.character(blockresult$start.rsID)
      tickname.ed <- as.character(blockresult$end.rsID)
      ticknames <- paste(tickname.st,"\n" ,tickname.ed, sep="")
    } else if(tick =="bp"){
      tickname.st <- (blockresult$start.bp)
      tickname.ed <- (blockresult$end.bp)
      ticknames <- paste(tickname.st,"\n" ,tickname.ed, sep="")
    }else if(tick == "both"){
      tickname.st1 <- as.character(blockresult$start.rsID)
      tickname.ed1 <- as.character(blockresult$end.rsID)
      tickname.st2 <- round(blockresult$start.bp*0.001)
      tickname.ed2 <- round(blockresult$end.bp*0.001)
      tickname.st <- paste(tickname.st1, "\n(",tickname.st2,"kb)", sep="")
      tickname.ed <- paste(tickname.ed1, "\n(",tickname.ed2,"kb)", sep="")
      ticknames <- paste(tickname.st,"\n" ,tickname.ed, sep="")
    }

    #SNP name write
    # blockresult <- blockresult[(blockresult[,3]-blockresult[,2]+1)>=showLDsize,]
    oriblockresult <- blockresult
    ticknames <- ticknames[(blockresult[,3]-blockresult[,2]+1)>=showLDsize]
    rectGrobposi <- seq_len(length(ticknames))/length(ticknames)
    Blockcol <- rep(blockcol, dim(blockresult)[1])[seq_len(nrow(blockresult))]
    Blockcol <- Blockcol[(blockresult[,3]-blockresult[,2]+1)>=showLDsize]
    LDSNPticksBox <- rectGrob(x=rectGrobposi, y=0.01, height=0.05, width=unit( 0.45, "snpc"),
                              gp=gpar(fill =Blockcol, col=Blockcol, alpha=0.7), just=c("centre","bottom"), vp=vpSNPinfo)
    # SNPnameposi=sort(c(rectGrobposi-min(2/length(rectGrobposi),0.01), rectGrobposi+min(2/length(rectGrobposi),0.01)))
    LDSNPticks <- textGrob(ticknames, x=rectGrobposi, y=0.1, just=c("left", "centre"),
                           rot=90, gp=gpar(cex=0.4, lineheight=0.8), vp= vpSNPinfo)
    #SNPinfo box mappingline draw
    vpSNPinfomapping <- viewport(x=0.11,y=bottomloca,width=unit(unitleng*0.85,"snpc"),height=unit(0.9-bottomloca,"snpc"),
                                 just=c("left","bottom"), name="vpSNPinfo")
    sblockresult <- blockresult[(blockresult[,3]-blockresult[,2]+1)>=showLDsize,]
    Blockcenter.x <- apply(cbind((sblockresult[,2]-1)*(1/NSNPs), sblockresult[,3]*(1/NSNPs)), 1, mean)
    Blockcenter.y <- apply(cbind((sblockresult[,2]-1)*(1/NSNPs), sblockresult[,3]*(1/NSNPs)), 1, function(x) (diff(x)/2))
    blocktop <- max(Blockcenter.y*unitleng*0.85)/(0.9-bottomloca)
    SNPblockmapping <- segmentsGrob(x0=rectGrobposi, x1=Blockcenter.x,
                                    y0=1, y1=blocktop, vp=vpSNPinfomapping, gp=gpar(lwd=unitlwd, col=Blockcol))
    SNPblockmapping1 <- segmentsGrob(x0=Blockcenter.x, x1=Blockcenter.x,
                                     y0=blocktop, y1=(Blockcenter.y*unitleng*0.85)/(0.9-bottomloca),
                                     vp=vpSNPinfomapping, gp=gpar(lwd=unitlwd, col=Blockcol))
    #SNP box name wright
    Blocknames <- paste("B-", which(oriblockresult[,3]-oriblockresult[,2]+1>=showLDsize), sep="")
    LDblockticks <- textGrob(Blocknames, x=rectGrobposi, y=-0.01, just=c("centre", "top"),
                             gp=gpar(cex=0.4), vp=vpSNPinfo)

    # (Blockcenter.y*unitleng*0.85)/(0.35)
    #
    # grid.draw(Locatick)
    #rotate LD heatmap
    #mapping lines
    #2
    mappingheight<- ifelse(CLQshow==FALSE, 0.13, 0.13)
    vpMappingLine <- viewport(x=0.11,y=bottomloca-(CLQregionheight+mappingheight),width=unit(unitleng*0.85,"snpc"),
                              height=unit(mappingheight,"snpc"),just=c("left","bottom"), name="vpmappingLine")
    # x0 <- c((blockresult[,2]-1)*(1/NSNPs), blockresult[,3]*(1/NSNPs))
    x0 <- apply(cbind((oriblockresult[,2]-1)*(1/NSNPs), oriblockresult[,3]*(1/NSNPs)), 1, mean)
    y0 <- rep(1, length(x0))
    # x1ori <- c(blockresult[,6], blockresult[,7])
    x1ori <- apply(cbind(oriblockresult[,6], oriblockresult[,7]), 1, mean)
    x1 <- (x1ori-SNPstbp)/(SNPedbp-SNPstbp)
    y1 <- rep(0, length(x1))
    mappingLine <- segmentsGrob(x0=x0, y0=y0, x1=x1, y1=y1,vp=vpMappingLine,gp=gpar(lwd=unitlwd, col=blockcol))
    # blockname on loca bar wright
    Blocknames <- paste("B-", which(oriblockresult[,3]-oriblockresult[,2]+1>=showLDsize), sep="")
    x1ori <- apply(cbind(sblockresult[,6], sblockresult[,7]), 1, mean)
    x1 <- (x1ori-SNPstbp)/(SNPedbp-SNPstbp)
    LDblocktickslocaBar <- textGrob(Blocknames, x=x1, y=0, just=c("left", "centre"), rot=90,
                                    gp=gpar(cex=0.4), vp=vpMappingLine)
    #location Bar
    locabarheight <- 0.02
    vpLocaBar <- viewport(x=0.11,y=(bottomloca-(CLQregionheight+mappingheight+locabarheight)),
                          width=unit(unitleng*0.85,"snpc"),height=unit(locabarheight,"snpc"),just=c("left","bottom"), name="vpLocaBar")
    locaBar <- rectGrob(x=0, y=0, width=1, height=1, just=c("left","bottom"), vp=vpLocaBar, gp=gpar(col="gray50", fill="gray50"))
    #draw LD blocks on the location bar
    x1 <- (blockresult[,6]-SNPstbp)/(SNPedbp-SNPstbp)
    blockwidth <- (blockresult[,7]-blockresult[,6])/(SNPedbp-SNPstbp)
    blockonBar <- rectGrob(x=x1, y=rep(0, dim(blockresult)[1]), height=1, width=blockwidth,
                           just=c("left","bottom"), gp=gpar(col=deepblockcol, fill=blockcol, lwd=0.1), vp=vpLocaBar)

    x0 <- seq_len(NSNPs)/NSNPs - (1/(2*NSNPs))
    y0 <- rep(1, length(x0))
    # x1ori <- c(blockresult[,6], blockresult[,7])
    x1 <- SNPbp_cordi
    y1 <- rep(0, length(x1))
    blocksizes <- blockresult[,3]-blockresult[,2]+1
    sgtblocks <- setdiff(seq_len(NSNPs), unlist(apply(blockresult[,2:3], 1, function(x) x[1]:x[2])))
    sgtblocks <- cbind(sgtblocks, sgtblocks)
    totalBigLDres <- rbind(as.matrix(blockresult[,2:3]), sgtblocks)
    totalBigLDres <- totalBigLDres[order(totalBigLDres[,1]),, drop=FALSE]
    SNPonBlockcol <- c()
    j<-0
    for(i in seq_len(dim(totalBigLDres)[1])){
      if(totalBigLDres[i, 2] == totalBigLDres[i, 1] ){
        SNPonBlockcol <-c(SNPonBlockcol, "gray50")
      }else{
        j<-j+1
        jj = ifelse(j%%4==0, 4, j%%4)
        SNPonBlockcol <-c(SNPonBlockcol, rep(blockcol[jj], (totalBigLDres[i, 2] - totalBigLDres[i, 1] + 1)))
      }
    }
    if(NSNPs>1000){
      x0 <- x0[seq(1,length(x0), 10)]
      x1 <- x1[seq(1,length(x1), 10)]
      y0 <- y0[seq(1,length(y0), 10)]
      y1 <- y1[seq(1,length(y1), 10)]
      SNPonBlockcol=SNPonBlockcol[seq(1,length(SNPonBlockcol), 10)]
    }

    deepSNPonBlockcol <- c()
    j<-0
    for(i in seq_len(dim(totalBigLDres)[1])){
      if(totalBigLDres[i, 2] == totalBigLDres[i, 1] ){
        deepSNPonBlockcol <-c(deepSNPonBlockcol, "gray50")
      }else{
        j<-j+1
        jj = ifelse(j%%4==0, 4, j%%4)
        deepSNPonBlockcol <-c(deepSNPonBlockcol, rep(deepblockcol[jj], (totalBigLDres[i, 2] - totalBigLDres[i, 1] + 1)))
      }
    }
    if(NSNPs>1000){
      deepSNPonBlockcol=deepSNPonBlockcol[seq(1,length(deepSNPonBlockcol), 10)]
    }

    SNPmappingLine <- segmentsGrob(x0=x0, y0=y0, x1=x1, y1=y1,vp=vpMappingLine,gp=gpar(lwd=unitlwd, col=SNPonBlockcol))
    # blockname on loca bar wright
    locaBar <- rectGrob(x=0, y=0, width=1, height=1, just=c("left","bottom"), vp=vpLocaBar, gp=gpar(col="gray50", fill="gray50"))
    SNPsonBar <- segmentsGrob(x0 =x1, x1=x1, y0=0.05, y1=0.95, vp=vpLocaBar, gp=gpar(lwd=unitlwd, col=deepSNPonBlockcol))
  }else{
    # no block boundaries, only heatmap
    LDblockHeatmap2 <- gTree(children=gList(LD.heat.map),name="LDHeatmap", vp=VPheatmap)
    rotLDtest <- gTree(children=gList(LDblockHeatmap2), name="rorLDheatmap", vp=rot.vp)
    CLQshow = FALSE
    CLQregionheight <- 0
    mappingheight <- 0.13
    locabarheight <- 0.02

    mappingheight<- ifelse(CLQshow==FALSE, 0.13, 0.05)
    vpMappingLine <- viewport(x=0.11,y=bottomloca-(CLQregionheight+mappingheight),width=unit(unitleng*0.85,"snpc"),
                              height=unit(mappingheight,"snpc"),just=c("left","bottom"), name="vpmappingLine")
    # x0 <- c((blockresult[,2]-1)*(1/NSNPs), blockresult[,3]*(1/NSNPs))
    x0 <- seq_len(NSNPs)/NSNPs - (1/(2*NSNPs))
    y0 <- rep(1, length(x0))
    # x1ori <- c(blockresult[,6], blockresult[,7])
    x1 <- SNPbp_cordi
    y1 <- rep(0, length(x1))
    if(NSNPs>1000){
      x0 <- x0[seq(1,length(x0), 10)]
      x1 <- x1[seq(1,length(x1), 10)]
      y0 <- y0[seq(1,length(y0), 10)]
      y1 <- y1[seq(1,length(y1), 10)]
      # SNPonBlockcol=SNPonBlockcol[seq(1,length(y1), 10)]
    }
    SNPmappingLine <- segmentsGrob(x0=x0, y0=y0, x1=x1, y1=y1,vp=vpMappingLine,gp=gpar(lwd=unitlwd, col="gray50"))
    # blockname on loca bar wright
    vpLocaBar <- viewport(x=0.11,y=(bottomloca-(CLQregionheight+mappingheight+locabarheight)),width=unit(unitleng*0.85,"snpc"),
                          height=unit(locabarheight,"snpc"),just=c("left","bottom"), name="vpLocaBar")
    locaBar <- rectGrob(x=0, y=0, width=1, height=1, just=c("left","bottom"), vp=vpLocaBar, gp=gpar(col="gray50", fill="gray50"))
    SNPsonBar <- segmentsGrob(x0 =x1, x1=x1, y0=0.05, y1=0.95, vp=vpLocaBar, gp=gpar(lwd=unitlwd, col="black"))
    CLQboxgL <- gList()
    LDSNPticksBox <- gList()
    LDSNPticks <- gList()
    SNPblockmapping <- gList()
    SNPblockmapping1 <- gList()
    LDblockticks <- gList()
    LDblocktickslocaBar <- gList()
    blockonBar <- gList()
    mappingLine <- gList()
  }

  # gene loca info ---------

  if(dim(nowgeneinfo)[1]>0){
    generegionH <- bottomloca-(CLQregionheight+mappingheight+locabarheight)-0.045
    stratNum <- length(stratifyGene)
    stratheight <- min(generegionH/stratNum, 0.05)
    geneinfoList <- gList()
    for(i in seq_len(stratNum)){
      # each info region : height 0.5
      nowvp <- viewport(x=0.11,y=(bottomloca-(CLQregionheight+mappingheight+locabarheight)-(stratheight*i)),
                        width=unit(unitleng*0.85,"snpc"),height=unit((stratheight*0.9),"snpc"),just=c("left","bottom"))
      nowgenes <- stratifyGene[[i]]
      x0s <- nowgenes[,3]
      x1s <- nowgenes[,4]
      nowwidth <- x1s-x0s
      # nowwidth[which(nowwidth<0.01)]<-0.008
      nowcol <- c("blue", "green3", "darkmagenta")
      geneinfoList <- gList(geneinfoList,
                            rectGrob(x=x0s, y=rep(0.8, length(x0s)), width=nowwidth, height=0.2,
                                     vp=nowvp, gp=gpar(fill=nowcol, col=nowcol, cex=nowcex), just=c("left","bottom")),
                            textGrob(as.character(nowgenes[,1]),x=x0s, y=c(0.75, 0.45), vp=nowvp, gp=gpar(col=nowcol, cex=0.4),
                                     just=c("left", "top")),
                            segmentsGrob(x0=x0s, x1=x0s, y0=0.8, y1=c(0.75, 0.45),vp=nowvp,
                                         gp=gpar(col=nowcol, cex=0.4, lty="dotted", alpha=0.5))
      )
    }
    geneMap <- gTree(children=geneinfoList,name="Genemap")
    # grid.draw(geneMap)
  }else{
    if(!is.null(geneinfo)){message("This region does not overlap any gene region!")}
    stratheight <- 0;stratNum <- 0
    geneMap<-NULL
    text_gene <- gList()
  }
  vpLocationTick <- viewport(x=0.11,y=bottomloca-(CLQregionheight+mappingheight),width=unit(unitleng*0.85,"snpc"),height=unit((stratheight*stratNum)+0.03,"snpc"),
                             just=c("left","top"), name="vpLocaTick")
  vpLocationTickname <- viewport(x=0.11,y=bottomloca-(CLQregionheight+mappingheight+locabarheight)-(stratheight*stratNum)-0.05,width=unit(unitleng*0.85,"snpc"),height=unit(0.05,"snpc"),
                                 just=c("left","bottom"), name="vpLocaTick")
  x0range <- SNPedbp-SNPstbp
  x0num <- length(strsplit(as.character(x0range), split="")[[1]])-1
  x0tickround <- seq(round(SNPstbp/(10^x0num)), round(SNPedbp/(10^x0num)),0.5)
  x0tickloca <- x0tickround*10^x0num
  x0tickloca <- x0tickloca[which(x0tickloca>=SNPstbp & x0tickloca<=SNPedbp)]
  x0tickloca <- c(SNPstbp, x0tickloca, SNPedbp)
  x0tickloca.adj <- (x0tickloca-SNPstbp)/x0range
  Locatick <- segmentsGrob(x0=x0tickloca.adj, y0=rep(1, length(x0tickloca.adj)),
                           x1=x0tickloca.adj, y1=rep(0, length(x0tickloca.adj)),
                           vp= vpLocationTick, gp=gpar(lty="dashed", col="gray50"))
  # locationTickname

  if(x0num >= 6){
    Locatickname <- textGrob(paste(round(x0tickloca/10^6,1), "Mb", sep=""),
                             x=x0tickloca.adj, y=0.9, rot=-45, just=c("left", "top"),vp=vpLocationTickname,gp=gpar(cex=0.5) )
  }else if((x0num >= 3) & (x0num < 6)){
    Locatickname <- textGrob(paste(round(x0tickloca/10^3,1), "Kb", sep=""),
                             x=x0tickloca.adj, y=0.9, rot=-45, just=c("left", "top"),vp=vpLocationTickname,gp=gpar(cex=0.5))
  }else if(x0num < 3){
    Locatickname <- textGrob(paste(x0tickloca, "bp", sep=""),
                             x=x0tickloca.adj, y=0.9, rot=-45, just=c("left", "top"),vp=vpLocationTickname,gp=gpar(cex=0.5))
  }

  #Locatick2
  x0tickround1 <- seq(round(SNPstbp/(10^(x0num-1))), round(SNPedbp/(10^(x0num-1))),1)
  x0tickloca1 <- x0tickround1*(10^(x0num-1))
  x0tickloca1 <- x0tickloca1[which(x0tickloca1 >= SNPstbp & x0tickloca1 <= SNPedbp)]
  x0tickloca1 <- c(SNPstbp, x0tickloca1, SNPedbp)
  x0tickloca1<-setdiff(x0tickloca1, x0tickloca)
  x0tickloca.adj1 <- (x0tickloca1-SNPstbp)/x0range
  Locatick1 <- segmentsGrob(x0=x0tickloca.adj1, y0=rep(1, length(x0tickloca.adj1)),
                            x1=x0tickloca.adj1, y1=rep(0, length(x0tickloca.adj1)),
                            vp= vpLocationTick, gp=gpar(lty="dotted", col="gray50"))

  #color key ----------------
  LDheatmapLegend<- function(color, LD) {
    colorrect <- rectGrob(x=(length(color):1/length(color)),y=0.7,width=1/length(color),height=0.3,just=c("right","top"),gp=gpar(col=NA,fill=color))
    if(LD == "r2"){
      ttt <-expression(paste("r"^{2}, " Color Key"))
    }else {
      ttt <-"D' Color Key"
    }
    title <- textGrob(ttt, x=0.5, y=0.9, name="title",
                      gp=gpar(cex=0.5))
    if(LD != "Dp-str"){
      labels <- textGrob(paste(0.2 * 0:5), x=0.2 * 0:5, y=0.2,
                         gp=gpar(cex=0.3), name="labels")
      ticks <- segmentsGrob(x0=c(0:5) * 0.2, y0=rep(0.4,6), x1=c(0:5) * 0.2, y1=rep(0.3, 6), name="ticks")
    }else{
      labels <- textGrob(c("weak LD\nor\nnot informative", "strong LD"), x=c(0.25, 0.75), y=0.35,
                         gp=gpar(cex=0.3,lineheight=0.8), name="labels",just=c("centre","top"))
      ticks=nullGrob()
    }
    box <- rectGrob(x=0, y=0.7, height=0.3, width=1,gp=gpar(col="black",fill=  trascolor), name="box", just=c("left","top"))
    key <- gTree(children=gList(colorrect, title, labels,ticks, box), name="Key")
    key
  }

  keyVP <- viewport(x=0.02, y=0.98, height=unit(0.1, "snpc"), width=unit(0.09, "snpc"), ######need modify
                    just=c("left", "top"), name="keyVP")

  ## mapname--------------
  vpmapname <- viewport(x=0.095, y=0, height=unit(1, "snpc"), width=unit(0.1, "snpc"), ######need modify
                        just=c("right", "bottom"), name="keyVP")
  text_heatmap <- textGrob("LD Heat Map", x=1, y=0.75, just=c("right", "centre"), gp=gpar(cex=0.5),  vp=vpmapname)
  if(CLQshow == TRUE){
    text_clq <- textGrob("CLQ results", x=1, y=bottomloca, just=c("right", "top"),gp=gpar(cex=0.5),  vp=vpmapname)
  }else{
    text_clq <- nullGrob(vp=vpmapname)
  }

  if(onlyHeatmap == FALSE){
    blockregionname <- ifelse(blocktype == "bigld", "block regions\n(Big-LD)", "block regions\n(GPART)")
    text_LD <- textGrob(blockregionname, x=1, y=bottomloca-(CLQregionheight+mappingheight), just=c("right", "centre"),
                        gp=gpar(cex=0.5), vp=vpmapname)
  }else{
    text_LD <- textGrob("SNP locations", x=1, y=bottomloca-(CLQregionheight+mappingheight), just=c("right", "top"),
                        gp=gpar(cex=0.5), vp=vpmapname)
  }
  if(dim(nowgeneinfo)[1]>0){
    text_gene <- textGrob("Genes", x=1, y= (bottomloca-(CLQregionheight+mappingheight+locabarheight)-0.02),
                          just=c("right", "top"), gp=gpar(cex=0.5),
                          vp=vpmapname)
  }else{
    text_gene <-gList()
  }



  mapname <- gTree(children=gList(text_heatmap,text_LD, text_clq, text_gene))

  # --------------------------------------------end ------
  filename <- filename
  if(unitleng == 1.1){
    widthleng <- unitleng *1.1
  }else  if(unitleng == 2){
    widthleng <- unitleng *1.1
  }else  if(unitleng == 3){
    widthleng <- unitleng
  }else  if(unitleng == 4){
    widthleng <- unitleng
  }

  if((NSNPs > 200) & (onlyHeatmap == FALSE)){
    SNPmappingLine <- gList()
    # SNPsonBar <- gList()
  }

  if((NSNPs <= 200) & (onlyHeatmap == FALSE)){
    mappingLine <- gList()
    # SNPsonBar <- gList()
  }

  cat("\n")
  message("Generating file...." )
  if(type == "tif"){
    filename <- paste(filename, ".tif", sep="")
    tiff(filename,res=res, units="cm", height=15, width=15*widthleng)
    grid.draw(mappingLine)
    grid.draw(SNPmappingLine)
    grid.draw(rotLDtest)
    grid.draw(CLQboxgL)#
    grid.draw(LDSNPticksBox)#
    grid.draw(Locatick)
    grid.draw(Locatick1)
    grid.draw(locaBar)
    grid.draw(Locatickname)
    grid.draw(blockonBar)
    grid.draw(SNPsonBar)
    grid.draw(geneMap)
    grid.draw(LDSNPticks)#
    grid.draw(SNPblockmapping)#
    grid.draw(SNPblockmapping1)#
    grid.draw(LDblockticks)#
    grid.draw(LDblocktickslocaBar)#
    grid.draw(mapname)

    pushViewport(keyVP)
    grid.draw(LDheatmapLegend(color, LD))
    upViewport()

    dev.off()
    message(filename)
  }else if(type == "png"){
    filename <- paste(filename, ".png", sep="")
    png(filename,res=res, units="cm", height=15, width=15*widthleng)
    grid.draw(mappingLine)
    grid.draw(SNPmappingLine)
    grid.draw(rotLDtest)
    grid.draw(CLQboxgL)#
    grid.draw(LDSNPticksBox)#
    grid.draw(Locatick)
    grid.draw(Locatick1)
    grid.draw(locaBar)
    grid.draw(Locatickname)
    grid.draw(blockonBar)
    grid.draw(SNPsonBar)
    grid.draw(geneMap)
    grid.draw(LDSNPticks)#
    grid.draw(SNPblockmapping)#
    grid.draw(SNPblockmapping1)#
    grid.draw(LDblockticks)#
    grid.draw(LDblocktickslocaBar)#
    grid.draw(mapname)

    pushViewport(keyVP)
    grid.draw(LDheatmapLegend(color, LD))
    upViewport()

    dev.off()
    message(filename)
  }
}

# data ------------
#' @title genotype data
#' @name geno
#' @docType data
#' @aliases geno
#' @description This data set gives genotype data that are subset of 1000 Genomes Project phase1 release 3 genotype data
#' with 286 individuals from JPT, CHB and CHS populations (1000 Genomes Project Consortium, 2012).
#' The dataset contains 9000 SNPs in chromosome 21
#' @usage data(geno)
#' @format A data frame with 286 rows and 9000 columns
#' @source 1000 genomes project Phase 1 dataset <http://www.internationalgenome.org/>
#' @references {
#'  1000 Genomes Project Consortium.
#'  "An integrated map of genetic variation from 1,092 human genomes." \emph{Nature} 491.7422 (2012): 56.
#'  }
#' @keywords datasets
NULL

#' @title SNP information data
#' @name SNPinfo
#' @docType data
#' @aliases SNPinfo
#' @description This data set gives information data of SNPs in \code{geno}
#' The dataset contains chromosome name, rsID and bp information of the 9000 SNPs in \code{geno}
#' @usage data(SNPinfo)
#' @format A data frame with 9000 rows and 3 columns for chromosome name, rsID and bp
#' @source 1000 genomes project Phase 1 dataset <http://www.internationalgenome.org/>
#' @references {
#'  1000 Genomes Project Consortium.
#'  "An integrated map of genetic variation from 1,092 human genomes." \emph{Nature} 491.7422 (2012): 56.
#'  }
#'  @keywords datasets
NULL

#' @title gene information data
#' @name geneinfo
#' @docType data
#' @aliases geneinfo
#' @description This data set gives gene information in chromosome 21.
#' @usage data(geneinfo)
#' @format A data frame with 736 rows and 4 columns for genename, chromosome name, start bp and end bp of each gene.
#' @source <http://grch37.ensembl.org/index.html>
#' @references {
#' Zerbino, Daniel R., et al. "Ensembl 2018." \emph{Nucleic acids research 46.D1} (2017): D754-D761.
#' }
#'
#' @keywords datasets
NULL

Try the gpart package in your browser

Any scripts or data that you put into this service are public.

gpart documentation built on Nov. 8, 2020, 5:16 p.m.