R/humarray.R

Defines functions get.nearby.snp.lists nearest.snp snps.in.range AB Band.pos Gene.pos Band.gene Band Pos.band Pos.gene ids.by.pos Pos Chr manifest rs.to.id id.to.rs chip.support meta.me compact.gene.list convert.textpos.to.data lambda_1000 invGRanges set.chr.to.numeric set.chr.to.char plotRanges in.window force.chr.pos startSnp rangeSnp endSnp expand.nsnp chrNums rranges chrSelect df.to.ranged df.to.GRanges ranged.to.data.frame select.autosomes ranged.to.txt recomWindow conv.36.37 conv.37.38 conv.38.37 conv.37.36 makeGRanges get.genic.subset get.t1d.subset get.t1d.regions get.chr.lens get.telomere.locs get.gene.annot get.exon.annot get.recombination.map get.cyto get.centromere.locs get.immunog.locs plotGeneAnnot nearest.gene GENE.to.ENS ENS.to.GENE get.GO.for.genes get.immunobase.snps rownames3 makePrettyMatrixForCompactPrinting2 makeNakedMatFromChipInfo extraColumnSlots2 showChipInfo ChipInfo updateAllele validate.dir.for gene.duplicate.report tidy.extra.chr chrnames.to.num chrnums.to.txt plot_get_area plotdf make.divisor chrGS chr2 chrIndices2 chrIndicesGS chrInfoGS chrPartitioning2 chrInfo2 toGenomeOrder2 TGOGR TGORD chrNames2 chrOrder2 emd.rmv rsnpid rsampid add.trail rmv.trail substitute.36s hard.coded.conv clean.snp.ids sort_chr order.chr comma pduplicated ucsc.sanitizer pt2 Cor l10 sortna sumna sdna medianna meanna maxna minna finitize immunobase.has.changed onLoad onAttach

Documented in AB add.trail Band Band.gene Band.pos ChipInfo chip.support Chr chr2 chrGS chrIndices2 chrIndicesGS chrInfo2 chrInfoGS chrNames2 chrnames.to.num chrNums chrnums.to.txt chrOrder2 chrPartitioning2 chrSelect clean.snp.ids comma compact.gene.list conv.36.37 conv.37.36 conv.37.38 conv.38.37 convert.textpos.to.data Cor df.to.GRanges df.to.ranged emd.rmv endSnp ENS.to.GENE expand.nsnp extraColumnSlots2 finitize force.chr.pos gene.duplicate.report Gene.pos GENE.to.ENS get.centromere.locs get.chr.lens get.cyto get.exon.annot get.gene.annot get.genic.subset get.GO.for.genes get.immunobase.snps get.immunog.locs get.nearby.snp.lists get.recombination.map get.t1d.regions get.t1d.subset get.telomere.locs hard.coded.conv ids.by.pos id.to.rs immunobase.has.changed invGRanges in.window l10 lambda_1000 make.divisor makeGRanges makePrettyMatrixForCompactPrinting2 manifest maxna meanna medianna meta.me minna nearest.gene nearest.snp order.chr pduplicated plotdf plotGeneAnnot plot_get_area plotRanges Pos Pos.band Pos.gene pt2 ranged.to.data.frame ranged.to.txt rangeSnp recomWindow rmv.trail rranges rsampid rsnpid rs.to.id sdna select.autosomes set.chr.to.char set.chr.to.numeric showChipInfo snps.in.range sort_chr sortna startSnp substitute.36s sumna TGOGR TGORD tidy.extra.chr toGenomeOrder2 ucsc.sanitizer validate.dir.for

###NAMESPACE ADDITIONS###
# import(Rcpp, NCmisc)
# importFrom(BiocInstaller, biocVersion)
# importFrom(stats, family, pnorm, pt, qnorm, rchisq, rnorm, runif, median, cor, sd)
# importFrom(reader, cat.path, reader, shift.rownames )
# importFrom(grDevices, dev.off, pdf)
# importFrom(graphics, abline, lines, points, rect, text, plot)
# importFrom(methods, as, callNextMethod, is, new, prototype, representation, setAs, setClass, setGeneric, setMethod, setValidity)
# importClassesFrom(GenomicRanges, GNCList, GRanges, GenomicRanges)
# importFrom(GenomicRanges, GRanges, GRangesList)
# importMethodsFrom(GenomicRanges, "names<-", length, names, start, end)
# importMethodsFrom(GenomicRanges, width, strand, show, findOverlaps)
# importMethodsFrom("GenomicRanges", "length", "names", "names<-", "[", "[<-", "[[<-", "$", "$<-")
# importMethodsFrom(GenomeInfoDb, "seqlevels", "seqlevels<-", "genome<-", "genome", seqinfo, "seqinfo<-", seqnames, "seqnames<-", "seqlengths")
# importClassesFrom(IRanges, RangedData)
# importFrom(IRanges, "%over%", IRanges, RangedData, showAsCell, PartitioningByEnd)
# importMethodsFrom(IRanges, "colnames<-", "rownames<-", "universe<-", showAsCell)
# importMethodsFrom(IRanges, as.data.frame, as.list, as.matrix, cbind, rbind, colnames)
# importMethodsFrom(IRanges, end, findOverlaps, subsetByOverlaps, gsub, intersect, lapply)
# importMethodsFrom(IRanges, mean, nrow, ncol, order, as.list)
# importMethodsFrom(IRanges, ranges, rownames, runLength, space, flank, reduce, resize)
# importMethodsFrom(IRanges, start, universe, unlist, width, "start<-", "width<-", "end<-", ranges, "ranges<-")
# importFrom("GenomicFeatures", makeTxDbFromUCSC, exonsBy, transcriptsBy)
# importMethodsFrom("GenomicFeatures", exonsBy, transcriptsBy, as.list)
# importClassesFrom("rtracklayer", ChainFile)
# importMethodsFrom("rtracklayer", liftOver, import.chain)
# importFrom("biomaRt", useMart, useDataset, getBM)
# importClassesFrom(S4Vectors, DataFrame, Rle, Hits)
# importFrom(S4Vectors, subjectHits, queryHits, mcols, "mcols<-", head, tail, DataFrame, Rle, runValue)
# importClassesFrom("biomaRt", Mart)
# importFrom(parallel, mclapply)
# importFrom(utils, capture.output, download.file, read.table, write.table, read.delim)
# importFrom(graphics, par)
# importFrom("genoset", chrIndices, chrInfo, chrNames, "chrNames<-")
# importFrom(methods, slot, "slot<-")
# importFrom(BiocGenerics, relist)
###END NAMESPACE###

#DataFrame
#seqlevels
#seqlevels<-
#genome<-
# BiocGenerics 

# , genoset (>= 1.16.2)   # took from DESCRIPTION file
# importClassesFrom(GenomicRanges, GNCList, GRanges, GenomicRanges, GenomicRanges_OR_missing)
# importFrom "genoset"  chr  chrIndices  chrInfo  chrNames  chrOrder   "chrNames<-"
# importFrom "genoset"  "isGenomeOrder"  "locData"   "locData<-"
# importMethodsFrom GenomicRanges length names  "names<-" "dimnames<-" "["  "[<-"  "[["  "[[<-"  "$"  "$<-" cbind rbind  "mcols<-" mcols subsetByOverlaps
# importMethodsFrom IRanges as.data.frame as.list as.matrix cbind rbind colnames elementLengths queryHits subjectHits
# importMethodsFrom "genoset"  genome "genome<-"
# importFrom "genoset"  "toGenomeOrder"
# doNotimportFrom utils capture.output download.file read.table write.table head tail data  
# dontimportClassesFrom "genoset" RangedDataOrGenomicRanges
# importNoClassesFrom("GenomicRanges", GRanges)
# importNoClassesFrom("IRanges", Rle, RangedData)
# doNotimportFrom AnnotationDbi head tail ncol as.list colnames get exists sample 
# doNotimportFrom(BiocGenerics,strand, "strand<-", colnames, cbind, rbind, unlist, order, rownames, ncol, as.vector, paste, as.data.frame)
# doNotimportFrom GenomicRanges "seqlevels"  "seqlevels<-" Seqinfo seqlengths
# importNoClassesFrom "GenomicFeatures" TranscriptDb

.onAttach <- function(libname, pkgname) {
  packageStartupMessage("humarray version 1.2\n")
}

.onLoad <- function(libname, pkgname) {
  # library.dynam("humarray", pkgname, libname)
  #~/github/iChip/ImmunoChip_ChipInfo_New.RData
  options(chip.info="") # if you can access the file, you won't need to change this path
  options(ucsc="hg19") # depends on which analysis, need to set something though
  #data("iChipRegionsB36", "egSymb", "ImmunoChipB37", "hg18ToHg19","hg38ToHg19","hg19ToHg18","hg19ToHg38",
  #     package=pkgname, envir=parent.env(environment()))
  options(save.annot.in.current=1)  # 1 = TRUE, stores annotation in current folder to speed up subsequent lookups
}



#require(GenomicRanges); require(IRanges); require(reader); require(genoset)


########################
## internal functions ##
########################

# immunobase doesn't allow regions to be downloaded automatically anymore :(
immunobase.has.changed <- function(type=c("message","text","warning","error"),ret.val=NULL) {
	  msg <- "Unfortunately this table can no longer be downloaded programmatically. Please obtain manually from: https://www.immunobase.org/disease/T1D/"
	  type <- type[1]
	  if(type=="error") {
	  	stop(msg)
	  } else if(type=="warning") {
	  	warnings(msg)
	  } else if(type=="text") {
	  	print(msg)
	  } else {
	  	message(msg)
	  }
	return(ret.val)
}

finitize <- function(X) {
  if(is.data.frame(X)) { X <- as.matrix(X) }
  return(X[is.finite(X)])
}

minna <- function(...) {
  if(length(list(...))==1) { 
    min(finitize(...),na.rm=TRUE)
  } else {
    min(...,na.rm=TRUE)
  }
}

maxna <- function(...) {
  if(length(list(...))==1) { 
    max(finitize(...),na.rm=TRUE)
  } else {
    max(...,na.rm=TRUE)
  }
}

meanna <- function(...) {
  if(length(list(...))==1) { 
    mean(finitize(...),na.rm=TRUE)
  } else {
    mean(...,na.rm=TRUE)
  }
}

medianna <- function(...) {
  if(length(list(...))==1) { 
    median(finitize(...),na.rm=TRUE)
  } else {
    median(...,na.rm=TRUE)
  }
}

sdna <- function(...) {
  if(length(list(...))==1) { 
    sd(finitize(...),na.rm=TRUE)
  } else {
    sd(...,na.rm=TRUE)
  }
}

sumna <- function(...) {
  if(length(list(...))==1) { 
    sum(finitize(...),na.rm=TRUE)
  } else {
    sum(...,na.rm=TRUE)
  }
}

sortna <- function(...) {
  sort(..., na.last=TRUE)
}


# internal
l10 <- function(x) { O <- log10(x); O[!is.finite(O)] <- NA; return(O) }
# internal
Cor <- function(...) { cor(...,use="pairwise.complete") }
# internal
pt2 <- function(q, df, log.p=FALSE) {  2*pt(-abs(q), df, log.p=log.p) }




#' Manage flexible input for the build parameter
#' 
#' The genome annotation version for internals in this package should always be
#' of the form 'hgXX', where XX can be 15,16,17,18,19,38. However most functions
#' allow flexible entry of this parameter as a build number, e.g, 36,37,38, or as
#' 'build36', 'b36', etc. This function sanitizes various forms of input to the 
#' correct format for internal operations. 
#' @param build the input to be sanitized. 
#' @param allow.multiple logical, whether to force a single value, or allow a vector
#' of build strings as input
#' @param show.valid logical, if TRUE, show a list of supported values.
#' @return build string in the correct 'hgXX' format.
#' @export
#' @examples
#' ucsc.sanitizer(36)
#' ucsc.sanitizer("build38")
#' ucsc.sanitizer("b37")
#' ucsc.sanitizer(show.valid=TRUE)
ucsc.sanitizer <- function(build,allow.multiple=FALSE,show.valid=FALSE) {
  build.alt <- c("hg15","hg20","hg17","hg18","hg19","hg38",17,18,19,20,35,36,37,38,
                 "build35","build36","build37","build38","b35","b36","b37","b38")
  build.new <- c("hg15","hg38",rep(c("hg17","hg18","hg19","hg38"),times=5))
  if(show.valid) { return(cbind(valid=build.alt,mapsTo=build.new)) }
  build <- build.new[match(tolower(build),build.alt)]
  if(is.null(build)) { build <- "hg19"; warning("build was NULL (see getOption('ucsc')), set to hg19") }
  if(any(is.na(build))) { 
    warning("Illegal build parameter '",build[1],"', defaulting to hg19") 
    build[is.na(build)] <- "hg19" 
  }
  if(allow.multiple) {
    return(build)
  } else {
    return(build[1])
  }
}


# ok as long as at least one non-missing snp in the summary
#' See snpStats::col.summary. Same in every way, except for the undesirable
#' behaviour of snpStats when a SNP has 100% missing values it is ignored
#' in the call-rate summary (rather than given a zero). This can unintentionally
#' mean that call-rate filters do not filter SNPs with 100% missing values.
#' This function is simply a wrapper that cleans up this problem.
# col.summary2 <- function(object,...) {
#   if(!is(object)[1]=="SnpMatrix")   { stop("'object' must be a SnpMatrix (snpStats package)") } 
#   if(any(!names(list(...)) %in% c("rules","uncertain"))) { 
#     stop("... contained invalid arguments to snpStats::col.summary") }
#   
# }

#internal
pduplicated <- function(X) {
  if(length(Dim(X))>1) {  stop("can only enter a vector into this function") }
  return((duplicated(X,fromLast=T) | duplicated(X,fromLast=F)))
}


#internal
comma <- function(...) {
  paste(...,collapse=",")
}






#internal function to properly sort chromosome labels as text
order.chr <- function(chrs) {
  # sort chr nms
  if(is.numeric(chrs)) { chrs <- paste(chrs) }
  if(!is.character(chrs)) { stop("chrs should be a character or integer vector") }
  asn <- function(X) { suppressWarnings(as.numeric(X)) }
  textz <- is.na(asn(chrs))
  nums <- chrs[!textz]
  txts <- chrs[textz]
  ns <- which(!textz)[order(asn(nums))]
  #print(max(ns,na.rm=TRUE))
  #print(order(txts)); print(txts)
  #print(which(textz))
  ts <- which(textz)[order(txts)]
  out <- c(ns,ts)
  return(out)
}

#internal
sort_chr <- function(chr) { chr[order.chr(chr)] }


#internal
# standardize snp ids so they would always appear the same, all _,.;, etc replaced with _
# all names leading with a number preceeded with X. mainly 'make.names' standard R-conventions
clean.snp.ids <- function(snpid.list) {
  snpid.list <- make.names(snpid.list)
  snpid.list <- gsub(".","_",snpid.list,fixed=T)
  return(snpid.list)
}


## internal function with extra mapping hits for immunochip that aren't in the chain file for 36-37
hard.coded.conv <- function() {
  chrzM <- c("7","7","9","5","7","14","17","4","8","8","15","7","6","6","2","2","4","17","19")
  pos36M <- c("142154515","142160115","132183222","17767156","141943232","27591752","41560151",
              "103951975","17510484","17501697","81350958","141911612","74644736","74644390",
              "1203295","21043693","4020119","78644427","52569727")
  pos37M <- c("142474939","142480539","135153668","17731427","142224511","28521898","44204373",
              "103732866","17466212","17457420","83559954","142108941","74588007","74587661",
              "1213294","21190209","3969218","81051007","47877928")
  rsidM <- c("rs10952532","rs10952534","rs11243704","rs11953245","rs17274","rs1952843",
             "rs2016730","rs223413","rs2427715","rs2517168","rs2621228",
             "rs2855938","rs2917890","rs2917891","rs4971417","rs6547409","rs6842556",
             "rs7502442","rs755327")
  chrzI <- c("3","3","3","3","3","6","7","7","8","8","8","8","17","17","X")
  pos36I <- c("50875374","50882163","50885514","50908888","195567372","119257505",
              "50323690","67383261","10961083","10961130","10975096","10975127",
              "21628754","59781521","75211826")
  pos37I <- c("50900354","50907147","50910499","50908888","194086083","119150813",
              "50353144","67745402","10923673","10923720","10937686","10937717",
              "21704627","59781521","75295444")
  rsidI <- c("rs12639243","rs62717061","rs4346541","imm_3_50908888","rs4974514","rs284919",
             "rs7804185","rs3113138","rs2898255","rs2409687","rs7827367","rs6601557",
             "rs17052332","rs1131012","rs929032")
  chrz <- c(chrzI,chrzM)
  pos36 <- c(pos36I,pos36M)
  pos37 <- c(pos37I,pos37M)
  rsid <- c(rsidI,rsidM)
  return(list(chr=chrz,pos36=pos36,pos37=pos37,rs.id=rsid))
}

substitute.36s <- function(granges=NULL) {
 pos36 <- matrix(c(c(3,"imm_3_50875337",50875337),
 c(3,"imm_3_50882163",50882163),
 c(3,"imm_3_50908888",50908888),
 c(7,"rs1574660",141711704),
 c(17,"rs1131012",59781521),
 c("X","rs16994803",147800037),
 c("X","rs12013571",148519953)),ncol=3,byrow=T)
 if(is.null(granges)) { return(pos36) }
 typ <- is(granges)[1]
 X <- as(granges,"GRanges")
 if(!is(X)[1]=="GRanges") { stop("conversion to GRanges failed") }
 ii <- narm(match(pos36[,2],rownames(X)))
 start(X)[ii] <- rep(1,length(ii))
 end(X)[ii] <- as.numeric(pos36[,3])
 start(X)[ii] <- as.numeric(pos36[,3])
 X <- as(X,typ)
 return(X)
}




# internal
# Remove trailing letter from non-unique rs-ids
#
# #@examples
# snp.ids <- rsnpid(25)
# snp.ids[1:2] <- paste0(snp.ids[1:2],"b")
# snp.ids[19:20] <- paste0(snp.ids[19:20],"c")
# snp.ids[6:7] <- paste0(snp.ids[6:7],"d")
# snp.ids[11:12] <- paste0(snp.ids[11:12],"a")
# snp.ids
# rmv.trail(snp.ids)
rmv.trail <- function(rs.ids,suffix=c("b","c","d","a")) {
  if(!is.character(suffix)) { stop("suffix must a character vector") }
  ind <- NULL
  for (cc in 1:length(suffix)) {
    ind <- c(ind,grep(suffix[cc],rs.ids))
  }
  ind <- unique(ind)
  X <- rs.ids[ind]
  nX <- nchar(X)
  last.chars <- substr(X,nX,nX)
  sufz <- (last.chars %in% c("a","b","c","d"))
  X[sufz] <- substr(X[sufz],1,nX[sufz]-1)
  rs.ids[ind] <- X
  return(rs.ids)
}

# internal
# Add trailing letter(s) to non-unique rs-ids
# #@examples
# snp.ids <- rsnpid(15)
# snp.ids
# add.trail(snp.ids)
# snp.ids <- snp.ids[sample(15,30,replace=TRUE)]
# snp.ids
# add.trail(snp.ids)
add.trail <- function(rs.ids,suffix=c("b","c","d","a")) {
  rs.ids <- rmv.trail(rs.ids)
  for (txt in suffix) {
    dupz <- duplicated(rs.ids)
    if(length(which(dupz))>0) {
      rs.ids[dupz] <- paste0(rmv.trail(rs.ids[dupz]),txt)
    }
  }
  dupz <- duplicated(rs.ids)
  if(length(which(dupz))>0) { warning("more than ",length(suffix),
                                      " duplications of at least 1 individial rs-id, suffixes exhausted, duplicates remain")
  }
  return(rs.ids)
}

# internal
rsampid <- function(n,pref="ID0") { paste0(pref,pad.left(1:n,"0")) }


# internal
rsnpid <- function(n) { 
  id.len <- sample(c(3:8),n,replace=T,prob=c(0.01, 0.01, 0.01, 0.10, 0.50, 0.37))
  each.id <- function(l) { sapply(l,function(n) { paste(replicate(n,sample(1:9,1)),collapse="",sep="") }) }
  sufz <- each.id(id.len)
  ids <- paste0("rs",sufz)
  return(ids)
}


# internal
# Remove that pesky 'elementmetadata.' prefix from column names that have been converted from GRanges
emd.rmv <- function(X, rmv.genome=TRUE) {
  requireNamespace("GenomicRanges"); requireNamespace("IRanges")
  if(has.method("mcols",X, where=environment(emd.rmv))) {
    colnames(mcols(X)) <- gsub("elementMetadata.","",colnames(mcols(X)))
    ii <- which(colnames(mcols(X))=="genome")
    if(length(ii)>0) {
      gn <- mcols(X)[,ii[1]]
      if(length(unique(gn))==1) {
        mcols(X) <- mcols(X)[,-ii[1]]
      }
    }
  } else {
    if(has.method("colnames",X, where=environment(emd.rmv))) {
      colnames(X) <- gsub("elementMetadata.","",colnames(X))
      ii <- which(colnames(X)=="genome")
      if(length(ii)>0) {
        gn <- X[,ii[1]]
        if(length(unique(gn))==1) {
          X <- X[,-ii[1]]
        }
      }
    } else {
      stop("X must have column names, expecting GRanges, RangedData or data.frame")
    }
  }
  return(X)
}



chrOrder2 <- function (chr.names) {
  if(!is.character(chr.names)) { warning("expecting character() type for chr.names argument") }
  simple.names = gsub("^chr", "", chr.names)
  name.is.numeric = grepl("^[0-9]+$", simple.names, perl = T)
  numeric.names = chr.names[name.is.numeric][order(as.numeric(simple.names[name.is.numeric]))]
  non.numeric.names = chr.names[!name.is.numeric][order(chr.names[!name.is.numeric])]
  all.names = c(numeric.names, non.numeric.names)
  return(all.names)
}


# internal# iFunctions
chrNames2 <- function(X) {
  #requireNamespace("GenomicRanges"); requireNamespace("IRanges")
  if(nrow(X)==0) { return(character(0)) }
  out <- as.character(unique(seqnames(X)))
  return(out)
}


# internal from genoset
TGORD <- function (ds, strict = TRUE) {
  if (strict == TRUE) {
    if (!isTRUE(all.equal(chrOrder2(chrNames2(ds)), chrNames2(ds)))) {
      ds = ds[chrOrder2(chrNames2(ds))]
    }
  }
  row.order = order(as.integer(space(ds)), start(ds))
  if (is.unsorted(row.order)) {
    return(ds[row.order, , drop = FALSE])
  }
  else {
    return(ds)
  }
}


# internal from genoset
TGOGR <- function (ds, strict = TRUE) {
  if (strict == TRUE) {
    if (!isTRUE(all.equal(chrOrder2(seqlevels(ds)), seqlevels(ds)))) {
      seqlevels(ds) = chrOrder2(seqlevels(ds))
    }
  }
  row.order = order(as.integer(seqnames(ds)), start(ds))
  if (is.unsorted(row.order)) {
    ds = ds[row.order, , drop = FALSE]
  }
  return(ds)
}



# internal # iFunctions
# version of toGenomeOrder() that is guaranteed to work for IRanges or GRanges
toGenomeOrder2 <- function(X,...,strict=TRUE) {
  requireNamespace("GenomicRanges"); requireNamespace("IRanges"); #requireNamespace("genoset")
  if(is(X)[1] %in% c("GRanges","RangedData","ChipInfo")) {
    if(is(X)[1]=="RangedData") {
      return(TGORD(X,strict=strict))
    } else {
      return(TGOGR(X,strict=strict))
    }
  } else {
    typ <- is(X)[1]
    if(!typ %in% c("GRanges","RangedData","ChipInfo")) { warning("unsupported type '",typ,"' for toGenomeOrder(), failure likely") }
    alreadyThere <-("strand" %in% colnames(X))
    out <- TGOGR(as(X,"GRanges"),strict=T) #genoset::
    X <- as(out,"RangedData")
    if(("strand" %in% colnames(X)) & !alreadyThere) {
      X <- X[,-which(colnames(X) %in% "strand")]
    }
    return(X)
  }
}

#internal # iFunctions
# version of chrInfo() that is guaranteed to work for IRanges or GRanges
chrInfo2 <- function(X) {
  requireNamespace("GenomicRanges"); requireNamespace("IRanges")
  if(is(X)[1] %in% c("GRanges","ChipInfo")) {
    return(genoset::chrInfo(X))
  } else {
    typ <- is(X)[1]
    if(!typ %in% c("GRanges","RangedData","ChipInfo")) { warning("unacceptable type '",typ,"' for chrInfo2(), failure likely") }
    out <- genoset::chrInfo(as(X,"GRanges"))
    return(out)
  }
}

#from genoset
chrPartitioning2 <- function (object) 
{
    rle = Rle(seqnames(object))
    ends = structure(cumsum(runLength(rle)), names = as.character(runValue(rle)))
    return(PartitioningByEnd(ends))
}

#from genoset
chrInfoGS  <- function (object) 
{
    if (is(object, "GenomicRanges") && !any(is.na(seqlengths(object)))) {
        max.val = seqlengths(object)
    }
    else {
        max.val = max(relist(end(object), chrPartitioning2(object)))
    }
    if (length(max.val) == 1) {
        names(max.val) = chrNames2(object)
    }
    else {
        max.val = max.val[chrOrder2(chrNames2(object))]
    }
    chr.info = matrix(ncol = 3, nrow = length(max.val), dimnames = list(names(max.val), 
        c("start", "stop", "offset")))
    chr.info[, "stop"] = cumsum(as.numeric(max.val))
    chr.info[, "offset"] = c(0, chr.info[-nrow(chr.info), "stop"])
    chr.info[, "start"] = chr.info[, "offset"] + 1
    return(chr.info)
}


# from genoset
chrIndicesGS <- function (object, chr = NULL) 
{
    partitions = chrPartitioning2(object)
    chr.first = start(partitions)
    chr.last = end(partitions)
    chr.info = matrix(c(chr.first, chr.last, chr.first - 1), 
        ncol = 3, nrow = length(chr.first), dimnames = list(names(partitions), 
            c("first", "last", "offset")))
    if (!is.null(chr)) {
        if (!chr %in% rownames(chr.info)) {
            stop("Must specify a valid chromosome name in chrIndices.\n")
        }
        return(seq.int(chr.info[chr, "first"], chr.info[chr, 
            "last"]))
    }
    else {
        return(chr.info)
    }
}


#internal # iFunctions
# version of chrIndices() that is guaranteed to work for IRanges or GRanges
chrIndices2 <- function(X,...) {
  requireNamespace("GenomicRanges"); requireNamespace("IRanges")
  if(is(X)[1] %in% c("GRanges","ChipInfo")) {
    return(genoset::chrIndices(X,...))
  } else {
    typ <- is(X)[1]
    if(!typ %in% c("GRanges","RangedData","ChipInfo")) { warning("unacceptable type '",typ,"' for chrIndices2(), failure likely") }
    out <- genoset::chrIndices(as(X,"GRanges"))
    return(out)
  }
}


#internal # iFunctions
# version of chr() that is guaranteed to work for IRanges or GRanges
chr2 <- function(X) {
  requireNamespace("GenomicRanges"); requireNamespace("IRanges")
  if(is(X)[1] %in% c("GRanges","ChipInfo")) {
    return(chrGS(X))
  } else {
    if(is(X)[1]=="RangedData") {
      return(space(X))
    } else {
      if(is.null(X)) { warning("X was NULL, expecting RangedData/GRanges"); return(NULL) }
      warning("chr2() function applies only to RangedData objects, attempting to pass ",is(X)[1]," to chr()")
      return(chrGS(X))
    }
  }
}

#from genoset
chrGS <- function (object) 
{
    return(as.character(seqnames(object)))
}


#internal
make.divisor <- function(unit=c("b","kb","mb","gb"), par.name="scale (scl)") {
  valid.units <- c("k","m","g","b")
  unit <- tolower(unit[1]);
  unit <- substr(unit,1,1)
  if(!unit %in% valid.units) { warning("invalid entry to ",par.name," defaulting to base-pairs") ; unit <- "b" }
  divisor <- switch(unit,k=1000,m=10^6, g=10^9, b=1)
  return(divisor)
}

#internal
plotdf <- function(expr,fn="myTempPlot.pdf") {
  pdf(fn)
{ expr }
dev.off()
cat("wrote plot to",cat.path(getwd(),fn),"\n")
}


# gets limits of a plot space on current device
plot_get_area <- function() {
  success <- tryCatch(mine <- par("usr"),error=function(e) { F } )
  if(all(!success)) { warning("could not get plot limits - no plot open perhaps?"); return(NULL) }
  xlim=mine[1:2]
  ylim=mine[3:4]
  return(list(xlim=xlim,ylim=ylim))
}


#internal function
chrnums.to.txt <- function(X,do.x.y=TRUE) {
  cond <- paste(X) %in% paste(1:22)
  if(any(cond)) { X[cond] <-  paste0("chr",X[cond]) }
  if(do.x.y) {
    X <- gsub("X","chrX",X)
    X <- gsub("Y","chrY",X)
    X <- gsub("23","chrX",X)
    X <- gsub("24","chrY",X)
    X <- gsub("25","chrXY",X)
    X <- gsub("26","chrM",X)
    X <- gsub("chrXchrY","XY",X)
    X <- gsub("chrYchrX","YX",X) 
    X <- gsub("M","chrM",X)
    X <- gsub("XY","chrXY",X)
    X <- gsub("chrchr","chr",X)
  } else {
    X[X %in% paste(23:100)] <- paste0("chr",X[X %in% paste(23:100)])
  }
  return(X)
}

#internal function
chrnames.to.num <- function(X,keep.let=TRUE) {
  X <- tolower(X)
  if(!keep.let) {
    X <- gsub("chrM","26",X)
    X <- gsub("chrXY","25",X) 
    X <- gsub("chrY","24",X)
    X <- gsub("chrX","23",X)
  } else {
    X <- gsub("chrM","M",X)
    X <- gsub("chrXY","XY",X) 
    X <- gsub("chrY","Y",X)
    X <- gsub("chrX","X",X)
  }
  X <- gsub("chrchr","",X) 
  X <- gsub("chr","",X)
  X <- toupper(X)
  return(X)
}


# iFunctions
# internal, tidy chromosome names using extra chromosomal annotation into rough chromosomes
tidy.extra.chr <- function(chr,select=FALSE) {
  # most relevant to hg18
  chr <- paste(chr)
  SEL_c6 <- grep("c6",chr,ignore.case=T)
  SEL_c5 <- grep("c5",chr,ignore.case=T)
  SEL_NT <- grep("NT",chr,ignore.case=T)
  # most relevant to hg19
  SEL_LRG <- grep("LRG",chr,ignore.case=T)
  SEL_HG <- grep("HG",chr,ignore.case=T)
  SEL_GL <- grep("GL",chr,ignore.case=T)
  SEL_HS <- grep("HSCHR",chr,ignore.case=T)
  if(select) {
    # create TRUE/FALSE as to whether list elements have weird chromosome codes
    all <- unique(c(SEL_c6,SEL_c5,SEL_NT,SEL_LRG,SEL_HG,SEL_GL,SEL_HS))
    return(!1:length(chr) %in% all)
  } else {
    # transform weird chromosomes into more palatable codes
    chr[SEL_c6] <- 6  # prevent issues with c6_COX, c6_QBL  
    chr[SEL_c5] <- 5  # prevent issues with c5_H2  
    chr[SEL_NT] <- "Z_NT"  # merge all NT regions to one label
    chr[SEL_LRG] <- "Z_LRG"  # merge all NT regions to one label
    chr[SEL_HG] <- "Z_HG"  # merge all NT regions to one label
    chr[SEL_GL] <- "Z_GL"  # merge all NT regions to one label
    X <- names(table(chr))
    X <- X[grep("HSCHR",X)]
    if(length(X)>0) {
      HSC <- gsub("_","",substr(gsub("HSCHR","",X),1,2))
      for(cc in 1:length(X)) {
        #cat("replacing ",X[cc]," with ",HSC[cc],"\n",sep="")
        chr[chr==X[cc]] <- HSC[cc]
      }
    }
    return(chr)
  }  
}


#internal
gene.duplicate.report <- function(ga,full.listing=F,colname="gene",silent=FALSE) {
  # for a RangedData object, report on any multiple listings for the same gene
  if(is(ga)[1]!="RangedData") { warning("not a RangedData object") ; return(NULL) }
  if(colname=="gene") {
    if("gene" %in% tolower(colnames(ga)))
    { 
      gene.col <- (which(tolower(colnames(ga)) %in% c("gene","genes","geneid")))
    } else {
      gene.col <- 0
    }
  } else {
    if(colname %in% colnames(ga)) { 
      gene.col <- which(colnames(ga)==colname) 
    } else { 
      stop("colname not found in ga") 
    } 
  }
  if(length(gene.col)>0) { gene.col <- gene.col[1] } else { warning("no 'gene' column"); return(NULL) }
  colnames(ga)[gene.col] <- "gene" #force this colname
  duplicate.report <- T  ### when would this be FALSE???
  if(duplicate.report) {
    culprits <- unique(ga$gene[which(duplicated(ga$gene))])
    n.gene.multi.row <- length(culprits)
    culprit.ranges <- ga[ga$gene %in% culprits,]
    total.culprit.rows <- nrow(culprit.ranges)
    start.same.ct <- end.same.ct <- 0; which.ss <- NULL
    for (cc in 1:length(culprits)) { 
      mini <- (ga[ga$gene %in% culprits[cc],]) 
      if(full.listing) {
        cat(colname,":",culprits[cc],"# same start:",anyDuplicated(start(mini)),
            "# same end:",anyDuplicated(end(mini)),"\n") }
      start.same.ct <- start.same.ct+anyDuplicated(start(mini))
      end.same.ct <- end.same.ct+anyDuplicated(end(mini))
      if(anyDuplicated(start(mini)) | anyDuplicated(end(mini))) { which.ss <- c(which.ss,cc) }
    }
    if(!silent) {
      cat(" ",colname,"s with split ranges:\n"); print(culprits,quote=F); cat("\n")
      cat(" ",colname,"s with same start or end:\n"); print(culprits[which.ss],quote=F); cat("\n")
      cat(" total ",colname,"-segments with same start",start.same.ct,"; total with same end:",end.same.ct,"\n")
    }
  }
  return(culprits)
}



# internal function
validate.dir.for <- function(dir,elements,warn=F) {
  # in case the 'dir' input list object is not the standardised list form, convert
  # allows flexible use of list or regular directory specifications in plumbCNV functions
  if(is.null(dir)) { cat("directory empty\n"); return(NULL) }
  if(!is.list(dir)) {
    if(warn) { cat(elements[cc],"'dir' object wasn't a list\n")}
    dir <- as.list(dir); names(dir)[1:length(dir)] <- elements[1:length(dir)] 
  }
  for (cc in 1:length(elements)) {
    if(!elements[cc] %in% names(dir)) { 
      dir[[paste(elements[cc])]] <- "" ;
      if(warn) { stop(paste("dir$",elements[cc]," was empty.. set to current\n",sep="")) } 
    }
  }
  return(dir)
}



################# end internals #################
#if(getwd()!= "/home/ncooper"){
#  require(genoset)
#}

# examples for package
# 
# all.support <- chip.support()
# snp.info <- ChipInfo(chr=all.support[,"Chr"],pos=all.support[,"Pos"],ids=rownames(all.support),chip="ImmunoChip",rs.id=all.support[,"dbSNP"],
#                      A1=all.support[,"A1"], A2=all.support[,"A2"])
# 
# gr.snp.info <- with(all.support,makeGRanges(chr=Chr,pos=Pos,row.names=rownames(all.support)))
# 
# snp.info <- ChipInfo(GRanges=gr.snp.info,chip="ImmunoChip",rs.id=all.support[,"dbSNP"],
#                      A1=all.support[,"A1"], A2=all.support[,"A2"])
# 
# snp.info[chr(snp.info)=="MT",] # look at the mitochondrial SNP
# QCcode(snp.info)[chr(snp.info)=="MT"] <- 1 # exclude it by changing the fail code
# snp.info[["MT"]] # revisit and see it now registers as 'fail'
# QCfail(snp.info)
# xx <- conv.36.37(chr=all.support[,"Chr"],pos=all.support[,"Pos"],ids=rownames(all.support))
# ucsc(si36[["XY"]])
# si37 <- convTo37(snp.info)
# si36 <- convTo36(si37)
# snp.info <- ChipInfo(chr=all.support[,"Chr"],pos=all.support[,"Pos37"],ids=rownames(all.support),chip="ImmunoChip",rs.id=all.support[,"dbSNP"],
#                     A1=all.support[,"A1"], A2=all.support[,"A2"],build=37)


#' Class to represent SNP annotation for a microarray
#' 
#' This class annotates a microarray SNP chip with data for each SNP including chromosome,
#' id, position, strand, 'rs' id, allele 1, allele 2 for each SNP of a microarray chip,
#' in either hg18, hg19 or hg38 (build 36/37/38) coordinates.
#' This package makes extension use of this class of annotation object for the working
#' microarray chip, e.g, default is ImmunoChip, 
#' and you can also load your own annotation if using a different chip. The class
#' is basically a GRanges object, modified to always have columns for A1, A2 (alleles), 
#' rs.id, and a quality control flag. The default display is tidier than GRanges, it has
#' nice coersion to and frame data.frame and subsetting by chromosome using [[n]] has been
#' added, in addition to normal [i,j] indexing native to GRanges.
#' Note that with this package the first time annotation is used it might be slow, but
#' subsequent calls should be fast.
#' METHODS
#'  "[[", show, print, length, dim, rownames, initialize
#'  build, chip, rs.id, A1, A2, QCcode, QCcode<-, QCpass, QCfail
#'  convTo36, convTo37, convTo38
#' COERCION
#'  can use 'as' to convert to and from: GRanges, RangedData, data.frame
#'@section Fields: 
#'  \describe{
#'    \item{\code{seqnames}:}{Object of class \code{"Rle"}, containing chromosomes for each range, see GRanges.}
#'    \item{\code{ranges}:}{Object of class \code{"IRanges"}, containing genomic start and end, see GRanges.}
#'    \item{\code{strand}:}{Object of class \code{"Rle"}, containing plus or minus coding for forward or reverse strand, see GRanges.}
#'    \item{\code{seqinfo}:}{Object of class \code{"Seqinfo"}, containing chromosome listing, see GRanges.}
#'    \item{\code{chip}:}{Name, class \code{"character"}, containing user description of the chip, e.g, 'immunoChip'.}
#'    \item{\code{build}:}{Object of class \code{"character"}, annotation version, e.g, hg18, hg19, hg38, etc.}
#'    \item{\code{elementMetaData}:}{Object of class \code{"DataFrame"}, see GRanges, but with specific column names:
#'  A1, A2, QCcode and rs.id.}
#'  }
# @name ChipInfo-class
#' @aliases ChipInfo-method
#' @rdname ChipInfo-class
#' @exportClass ChipInfo
#' @author Nick Cooper
setClass("ChipInfo",
         contains="GRanges",
         slots=list(chip="character", 
                    build="character"),
         prototype=prototype(
              seqnames=Rle(factor()),
              ranges=IRanges::IRanges(),
              strand=Rle(GenomicRanges::strand()),
              elementMetadata=DataFrame(A1=NULL, A2=NULL,  QCcode=integer(), rs.id=NULL, chip.id=NULL),
              seqinfo=Seqinfo(),
              metadata=list(),
              chip=character(), 
              build=character()
            )
)



#' rownames method for GRanges objects
#' 
#' rownames: Returns the row names.
# @name rownames
#' @param x a GRanges object
#' @return rownames: Character vector of row names (e.g, SNP IDs).
#' @rdname GRanges-methods
#' @exportMethod rownames
setMethod("rownames", "GRanges", function(x) names(x@ranges))
#' @rdname GRanges-methods
#' @exportMethod "rownames<-"
#' @param value a character string
setMethod("rownames<-", "GRanges", function(x,value) { names(x@ranges) <- value; return(x) })



# seqnames="Rle",
# ranges="IRanges",
# strand="Rle",
# elementMetadata="DataFrame",
# seqinfo="Seqinfo",
# metadata="list",
# ,
# prototype=prototype(
#   seqnames=IRanges::Rle(factor()),
#   ranges=IRanges::IRanges(),
#   strand=IRanges::Rle(GenomicRanges::strand()),
#   elementMetadata=IRanges::DataFrame(A1=NULL, A2=NULL,  QCcode=integer(), rs.id=NULL),
#   seqinfo=GenomicRanges::Seqinfo(),
#   chip=character(), 
#   build=character(),
# )

#' rownames method for ChipInfo objects
#' 
#' rownames: Returns the row names.
# @name rownames
#' @param x a ChipInfo object
#' @return rownames: Character vector of row names (SNP IDs).
#' @rdname ChipInfo-methods
#' @exportMethod rownames
setMethod("rownames", "ChipInfo", function(x) names(x@ranges))


#' dim method for ChipInfo objects
#' 
#' dim: Returns the dimension
# @name dim
#' @return dim: same as length
#' @rdname ChipInfo-methods
#' @exportMethod dim
setMethod("dim", "ChipInfo", function(x) dim(mcols(x)))


#' Length method for ChipInfo objects
#' 
#' length: Returns the number of rows
# @name length
#' @return length: integer, number of rows, same as inherited nrow()
#' @rdname ChipInfo-methods
#' @exportMethod length
setMethod("length", "ChipInfo", function(x) length(x@ranges))


#' Retrieve the Chip name for ChipInfo
#' 
#' Simply returns the name of the chip, e.g, 'ImmunoChip'
# @name chip
#' @param x a ChipInfo object
#' @return character string
#' @rdname chip-methods
#' @export
setGeneric("chip", function(x) standardGeneric("chip"))


#' @exportMethod chip
#' @rdname chip-methods
setMethod("chip", "ChipInfo", function(x) x@chip)


#' Retrieve the UCSC build for a ChipInfo object
#' 
#' Returns the UCSC build of the chip object, e.g, 'hg18', 'hg19', or 'hg38'
# @name ucsc
#' @param x a ChipInfo object
#' @return character, 'hg18', 'hg19', or 'hg38'
#' @rdname ucsc-methods
#' @export
setGeneric("ucsc", function(x) standardGeneric("ucsc") )

#' @rdname ucsc-methods
#' @exportMethod ucsc
setMethod("ucsc", "ChipInfo", function(x) x@build)


#' Access rs-ids for ChipInfo
#' 
#' Returns the rs-ids for the chip object, e.g, "rs689", etc
#' Only if these are annotated internally, or else a vector of NAs
# @name rs.id
#' @param x a ChipInfo object
#' @param b logical, whether to show 'b' suffixes on rs.ids which
#' are created in the background to allow duplicate ids to be uniquely
#' represented for lookup and reference purposes.
#' @return rs-ids: character vector of IDs (or NAs)
#' @rdname rs.id-methods
#' @export
setGeneric("rs.id", function(x,b=TRUE) standardGeneric("rs.id") )

#' @rdname rs.id-methods
#' @exportMethod rs.id
setMethod("rs.id", "ChipInfo", function(x,b=TRUE) { 
  u <- mcols(x) ;  
  if("rs.id" %in% colnames(u)) { 
    U <- u[,"rs.id"] 
    if(!b) { U <- gsub("b","",U) }
    return(U)
  } else { return(NULL) } 
})


#' Access chip-ids for ChipInfo
#' 
#' Returns the chip-ids for the chip object, e.g, "imm_1_898835", etc
#' Only if these are annotated internally, or else a vector of NAs
#' Note that the main purpose of this is because sometimes chip-ids
#' do not satisfy conditions to be an R column/row name, e.g, start
#' with a number, illegal characters, etc. So this allows certain
#' functions to return the actual chip names that would match the 
#' official manifest. These will largely be the same as the rownames,
#' but the rownames will always be valid R column names, converted from
#' the original using clean.snp.ids() [internal function]
# @name chip-id
#' @param x a ChipInfo object
#' @return chip ids: character vector of IDs (or NAs)
#' @rdname chipId-methods
#' @export
setGeneric("chipId", function(x) standardGeneric("chipId") )

#' @rdname chipId-methods
#' @exportMethod chipId
setMethod("chipId", "ChipInfo", function(x) { 
  u <- mcols(x) ;  
  if("chip.id" %in% colnames(u)) { 
    U <- u[,"chip.id"] 
    return(U)
  } else { return(NULL) } 
})

#' Access alleles for ChipInfo
#' 
#' A1/A2: Returns the letter for the A1/A2 alleles for the chip object, 
#' e.g, 'A','C','G','T', etc
#' Only if these are annotated internally, or else a vector of NAs
#' @param x a ChipInfo object
#' @rdname allele-methods
#' @family Alleles
#' @export
setGeneric("A1", function(x) standardGeneric("A1") )

#' @rdname allele-methods
#' @family Alleles
#' @exportMethod A1
setMethod("A1", "ChipInfo", function(x) { u <- mcols(x) ;  if("A1" %in% colnames(u)) { u[,"A1"] } else { NULL } })


#' @return character vector of allele codes (or NAs)
#' @rdname allele-methods
#' @family Alleles
#' @export
setGeneric("A2", function(x) standardGeneric("A2") )

#' @rdname allele-methods
#' @family Alleles
#' @exportMethod A2
setMethod("A2", "ChipInfo", function(x) { u <- mcols(x) ;  if("A2" %in% colnames(u)) { u[,"A2"] } else { NULL } })


#' Set quality control pass or fail codes for ChipInfo
#' 
#' A1<-/A2<-: Allows user to set the allele codes for each SNP of the chip object, 
#' e.g, A,C,G,T,K, etc. If you are using allele codes this is likely to 
#' necessary as each genotyping produces a different set of allele codes.
#' If using in conjunction with snpStats, remember that allele codes are
#' always flipped to be alphabetical, so the reference allele is the later
#' letter in the alphabet. Note, assignment to A2 needs to be done separately.
#' @param value new allele codes, e.g, A,C,G,T
#' @return A1<-: updates the ChipInfo object specified with new allele codes for the 'A1' slot
#' @rdname allele-methods
#' @family Alleles
#' @export
setGeneric("A1<-", function(x,value) standardGeneric("A1<-") )


#' @rdname allele-methods
#' @family Alleles
#' @exportMethod "A1<-"
setMethod("A1<-", "ChipInfo", function(x,value) {
  return(.updateAllele(x,value,"A1"))
} )




#' @return A2<-: updates the ChipInfo object specified with new allele codes for the 'A2' slot
#' @rdname allele-methods
#' @family Alleles
#' @export
setGeneric("A2<-", function(x,value) standardGeneric("A2<-") )


#' @rdname allele-methods
#' @family Alleles
#' @exportMethod "A2<-"
setMethod("A2<-", "ChipInfo", function(x,value) {
  return(.updateAllele(x,value,"A2"))
} )


#internal
.updateAllele <- function(x,value, allele="A1") {
  if(length(Dim(value))!=1) { stop("value must be a vector") }
  if(length(x)==length(value)) {
    if(is.character(value)) {
      mcols(x)[,allele] <- paste(value)
    } else {
      stop("only character values can be inserted into the ",allele," column, A,C,G,T, etc")
    }
  } else {
    stop("mismatching lengths, tried to insert ",length(value),"new allele codes into ChipInfo with ",length(x)," rows")
  }
  return(x)
}


#' Access quality control pass or fail codes for ChipInfo
#' 
#' Returns the pass or fail codes for each SNP of the chip object, 
#' e.g, 0,1,..,n etc
#' Only if these are added manually, or else all will be 'pass' (=0)
# @name QCcode
# why not? param x a ChipInfo object
#' @return integer vector of pass/fail codes
#' @rdname QC-methods
#' @family QC
#' @export
setGeneric("QCcode", function(x) standardGeneric("QCcode") )

#' @rdname QC-methods
#' @family QC
#' @exportMethod QCcode
setMethod("QCcode", "ChipInfo", function(x) { 
  u <- mcols(x) ;  if("QCcode" %in% colnames(u)) { u[,"QCcode"] } else { NULL } 
})


#' Set quality control pass or fail codes for ChipInfo
#' 
#' QCcode<-: Allows user to set the pass or fail codes for each SNP of the chip object, 
#' e.g, 0,1,..,n etc. 0 is always pass, >0 is always fail, but each integer
#' can be used to represent a different failure type, or for simplicity, stick
#' to 0 and 1, ie, just pass and fail.
#' @param x a ChipInfo object
#' @param value new pass/fail codes, e.g, 0,1,...,n
#' @return QCcode<-: updates the object specified with new pass/fail codes for the 'QCcode' slot
#' @rdname QC-methods
#' @family QC
#' @export
setGeneric("QCcode<-", function(x,value) standardGeneric("QCcode<-") )


#' @rdname QC-methods
#' @family QC
#' @exportMethod QCcode
setMethod("QCcode<-", "ChipInfo", function(x,value) {
  if(length(x)==length(value)) {
    if(is.numeric(value)) {
      mcols(x)[,"QCcode"] <- as.integer(value)
    } else {
      stop("only numeric values can be inserted into the QCcode column, 0=pass, higher integers are failure codes")
    }
  } else {
    stop("mismatching lengths, tried to insert ",length(value),"new values into ChipInfo with ",length(x)," rows")
  }
  return(x)
} )


#' Filter ChipInfo to for only SNPs passing QC
#' 
#' QCpass: Returns the subset of the ChipInfo object for which SNPs pass quality
#' control, according to the QCcodes() slot == 0.
# @name QCpass
# @param x a ChipInfo object
#' @return QCpass: ChipInfo object for which SNPs pass quality control
#' @rdname QC-methods
#' @family QC
#' @export
setGeneric("QCpass", function(x) standardGeneric("QCpass") )


#' Filter ChipInfo to for only SNPs failing QC
#' 
#' QCfail: Returns the subset of the ChipInfo object for which SNPs fail quality
#' control, according to the QCcodes() slot > 0.
# @name QCfail
# @param x a ChipInfo object
#' @param type integer between 1 and 100, failure type (user can assign own coding scheme)
#' @return QCfail: ChipInfo object for which SNPs fail quality control
#' @rdname QC-methods
#' @family QC
#' @export
setGeneric("QCfail", function(x,type=NA) standardGeneric("QCfail") )


#' @rdname QC-methods
#' @family QC
#' @exportMethod QCpass
setMethod("QCpass", "ChipInfo", function(x) { 
  ii <- which(QCcode(x)==0)
  if(length(ii)>0) { return(x[ii,]) } else { warning("No SNPs passed QC"); return(NULL) } })


#' @rdname QC-methods
#' @family QC
#' @exportMethod QCfail
setMethod("QCfail", "ChipInfo", function(x,type=NA) { 
  ii <- which(QCcode(x)!=0)
  if(is.numeric(type)) { 
    if(type %in% 1:100) {
      ii <- which(QCcode(x)==type) 
    } else { 
      warning("type must be an integer between 1 and 100, returning all failures") 
    }
  }
  if(length(ii)>0) { return(x[ii,]) } else { warning("All SNPs passed QC"); return(NULL) } })


#' Subset ChipInfo by chromosome
#' 
#' Returns the subset of the ChipInfo object for which SNPs are on
#' the chromosome specified, by either number or character.
#' @param x a ChipInfo object
#' @param i a chromosome number or letter, i.e, one of seqlevels(x)
#' @param j always leave missing, not applicable for this method.
#' @param ... further arguments - again there should not be any
#' @return ChipInfo object for the subset of SNPs on chromosome i
#' @rdname ChipInfo-subset
#' @exportMethod "[["
setMethod("[[", "ChipInfo", function(x,i,j,...) { 
  dotArgs <- list(...)
  if (length(dotArgs) > 0)
    dotArgs <- dotArgs[names(dotArgs) != "exact"]
  if (!missing(j) || length(dotArgs) > 0)
    stop("invalid subsetting")
  if (missing(i))
    stop("subscript is missing")
  if (!is.character(i) && !is.numeric(i)) 
    stop("invalid subscript type")
  if (length(i) < 1L)
    stop("attempt to select less than one element")
  if (length(i) > 1L)
    stop("attempt to select more than one element")
  cn <- chrNames(x)
  if (is.numeric(i) && !is.na(i) && (i < 1L || i > length(cn)))
    stop("subscript out of bounds")
  # do the selection #
  if(i %in% paste(chrm(x))) {
    out <- x[chrm(x)==i,]
  } else {
    if(is.numeric(i)) {
      out <- x[match(chrm(x),chrNames(x))==i,]
    } else {
      stop("unknown index")
    }
  }
  out@build <- x@build
  out@chip <- x@chip
  return(out)
} )


#' Convert ChipInfo between build 36/37/38 coordinates
#' 
#' Returns the a ChipInfo object with positions updated to build 36/37/38
#' coordinates, assuming that the build() slot was entered correctly. 
#' Ensure that the value of ucsc(x) is correct before
#' running this function for conversion; for instance, if the coordinates 
#' are already build 37/hg19, but ucsc(x)!="hg19" (incorrect value), then
#' these coordinates will be transformed in a relative manner rendering the
#' result meaningless.
# @name convTo37
#' @param x a ChipInfo object
#' @return convTo37: Returns a ChipInfo object with the build updated to hg19 coordinates
#' @family conversion
#' @rdname conv-methods
#' @export
setGeneric("convTo37", function(x) standardGeneric("convTo37"))
          
#' @family conversion
#' @aliases convTo37
#' @rdname conv-methods
#' @exportMethod convTo37
setMethod("convTo37", "ChipInfo", function(x) {
  if(ucsc.sanitizer(ucsc(x)) %in% c("hg18","hg38")) {
    if(ucsc.sanitizer(ucsc(x)) == c("hg18")) {
      u <- conv.36.37(ranges=as(x,"GRanges"))
    } else {
      u <- conv.38.37(ranges=as(x,"GRanges"))
    }
    if(length(u)==length(x)) { 
      x@ranges <- u@ranges
      all.eq <- TRUE
      if(all.eq) { all.eq <- length(seqlevels(x))==length(seqlevels(u)) }
      if(all.eq) { all.eq <- all(sort(seqlevels(x))==sort(seqlevels(u))) }
      if(!all.eq) {
        #print(seqlevels(x)); print(seqlevels(u))
        #warning("conversion altered the chromosomes"); 
        seqlevels(x) <- c(seqlevels(x),seqlevels(u)[!seqlevels(u) %in% seqlevels(x)])  #[email protected] <- [email protected] 
      }
      xx <- as(x@seqnames,"character")
      uu <- as(u@seqnames,"character")
      if(any(xx!=uu)) { x@seqnames <- u@seqnames }
      x@build <- "hg19"
    } else { 
      stop("conversion to build37/hg19 failed, input had length ",length(x),"; output had length ",length(u)) 
    } 
  } else {
    if(ucsc.sanitizer(ucsc(x))!="hg19") { 
      warning("input object was not tagged as hg18/build36/hg38/build38 [@build], left unchanged") 
    } else {
      warning("object is already using hg19/build37, no change")
    }
  }
  return(x)
})



# @name convTo36
# @param x a ChipInfo object
#' @return convTo36: Returns a ChipInfo object with the build updated to hg18 coordinates
#' @family conversion
#' @rdname conv-methods
#' @export
setGeneric("convTo36", function(x) standardGeneric("convTo36"))


#' @family conversion
#' @aliases convTo36
#' @rdname conv-methods
#' @exportMethod convTo36
setMethod("convTo36", "ChipInfo", function(x) {
  if(ucsc.sanitizer(ucsc(x))=="hg19") {
    u <- conv.37.36(ranges=as(x,"GRanges"))
    if(length(u)==length(x)) { 
      x@ranges <- u@ranges
      all.eq <- TRUE
      if(all.eq) { all.eq <- length(seqlevels(x))==length(seqlevels(u)) }
      if(all.eq) { all.eq <- any(sort(seqlevels(x))==sort(seqlevels(u))) }
      if(!all.eq) {
        #warning("conversion altered the chromosomes"); 
        seqlevels(x) <- c(seqlevels(x),seqlevels(u)[!seqlevels(u) %in% seqlevels(x)])  #[email protected] <- [email protected] 
      }
      xx <- as(x@seqnames,"character")
      uu <- as(u@seqnames,"character")
      if(any(xx!=uu)) { x@seqnames <- u@seqnames }
      x@build <- "hg18"
    } else { 
      stop("conversion to build36/hg18 failed") 
    } 
  } else {
    if(ucsc.sanitizer(ucsc(x))!="hg18") { 
      warning("input object was not tagged as hg19/build37 [@build], left unchanged") 
    } else {
      warning("object is already using hg18/build36, no change")
    }
  }
  return(x)
})



# @name convTo38
# @param x a ChipInfo object
#' @return convTo38: Returns a ChipInfo object with the build updated to hg38 coordinates
#' @family conversion
#' @rdname conv-methods
#' @export
setGeneric("convTo38", function(x) standardGeneric("convTo38"))


#' @family conversion
#' @aliases convTo38
#' @rdname conv-methods
#' @exportMethod convTo38
setMethod("convTo38", "ChipInfo", function(x) {
  if(ucsc.sanitizer(ucsc(x))=="hg18") { stop("can't convert 36 to 38; must convert from 36 to 37, then convert from 37 to 38") }
  if(ucsc.sanitizer(ucsc(x))=="hg19") {
    u <- conv.37.38(ranges=as(x,"GRanges"))
    if(length(u)==length(x)) { 
      x@ranges <- u@ranges
      all.eq <- TRUE
      if(all.eq) { all.eq <- length(seqlevels(x))==length(seqlevels(u)) }
      if(all.eq) { all.eq <- any(sort(seqlevels(x))==sort(seqlevels(u))) }
      if(!all.eq) {
        #warning("conversion altered the chromosomes"); 
        seqlevels(x) <- c(seqlevels(x),seqlevels(u)[!seqlevels(u) %in% seqlevels(x)])  #[email protected] <- [email protected] 
      }
      xx <- as(x@seqnames,"character")
      uu <- as(u@seqnames,"character")
      if(any(xx!=uu)) { x@seqnames <- u@seqnames }
      x@build <- "hg38"
    } else { 
      stop("conversion to build38/hg38 failed") 
    } 
  } else {
    if(ucsc.sanitizer(ucsc(x))!="hg38") { 
      warning("input object was not tagged as hg19/build37 [@build], left unchanged") 
    } else {
      warning("object is already using hg18/build36, no change")
    }
  }
  return(x)
})


#' Display method for ChipInfo objects
#' 
#' show: Displays a preview of the object
# @name show
#' @param object a ChipInfo object
#' @return show: Displays a preview of the object
#' @rdname ChipInfo-methods
#' @exportMethod show
setMethod("show", "ChipInfo", 
          function(object) { showChipInfo(object,up.to=10,head.tail=5,show.strand=FALSE) } )


#' Print a ChipInfo object to the console
#' 
#' print: See 'show' as the behaviour is very similar and ... are just arguments of 'show'.
#' The key difference with 'print' instead of 'show' is that by default the parameter
#' 'up.to' is set to 50, so that any ChipInfo object (or subset) of less than or equal
#' to 50 rows will be displayed in its entirety, rather than just the top/bottom 5 rows. 
# @name print
# @param x a ChipInfo object
#' @param ... further arguments to showChipInfo()
#' @return print: Prints the object to terminal using 'showChipInfo()'.
#' @rdname ChipInfo-methods
#' @exportMethod print
setMethod("print", "ChipInfo", 
          function(x,...) { showChipInfo(x,...) } )


#' Constructor (wrapper) for ChipInfo annotation object
#' 
#' This class annotates a microarray SNP chip with data for each SNP including chromosome,
#' id, position, strand, 'rs' id, allele 1, allele 2 for each SNP of a microarray chip,
#' in either hg18, hg19 or hg38 (build 36/37/38) coordinates.
#' This package makes extension use of this class of annotation object for the working
#' microarray chip, e.g, default is ImmunoChip, but Metabochip is also built-in,
#' and you can also load your own annotation if using a different chip. The class
#' is basically a GRanges object, modified to always have columns for A1, A2 (alleles), 
#' rs.id, and a quality control flag. The default display is tidier than GRanges, it has
#' nice coersion to and frame data.frame and subsetting by chromosome using [[n]] has been
#' added, in addition to normal [i,j] indexing native to GRanges.
# @name ChipInfo
#' @param GRanges a GRanges object containing chromosome, start/end = position, and strand
#' information for the chip object to be created, also rownames should be used to code
#' the chip-ids for each SNP.
#' @param chr optional, alternative to using 'GRanges' to input SNP locations, enter here 
#' a vector of chromosome numbers/letters for each SNP. The recommended coding is: 
#' 1:22, X, Y, XY, MT
#' @param pos optional, vector of positions (integers), use in conjunction with 'chr' and
#'  'ids' as an alternative way to input SNP position information instead of GRanges.
#' @param ids optional, vector of SNP chip-ids, use in conjunction with 'chr' and
#'  'pos' as an alternative way to input SNP position information instead of GRanges.
#' @param chip character, name of the chip you are making this annotation for (only used
#' for labelling purposes)
#' @param build character, either "hg18" or "hg19". Will also accept build number, 36 or 37.
#' This indicates what coordinates the object is using, and will be taken into account by
#' conversion functions, and annotation lookup functions throughout this package.
#' @param rs.id 'rs' ids are standardized ids for SNPs, these usually differ from each chips'
#' own IDs for each snp. If you don't know these, or can't find them, they can be left blank,
#' but will render the functions 'rs.to.id()' and 'id.to.rs()' useless for this ChipInfo object.
#' @param chip.id chip ids are the chip-specific ids for SNPs, these usually differ between chips'
#' even for the same snp. If you don't know these, or can't find them, they can be left blank,
#' but will render the function 'chip.id()' useless for this ChipInfo object. The main purpose
#' of this parameter is for when the real chip ids are not valid R row/column name strings, and
#' by using this column, some functions can return the real chip ids instead of the sanitized 
#' version
#' @param A1 the first allele letter code for each SNP, e.g, usually "A","C","G", or "T", but
#' you can use any scheme you like. Can be left blank.
#' @param A2, as for A1, but for allele 2.
#' @param QCcode optional column to keep track of SNPs passing and failing QC. You can completely
#' ignore this column. It works based on integer codes, 0,1,2, you may wish to use simple 0 and 1,
#' for pass and fail respectively, or else 0 can be pass, and 1,2,... can indicate failure for 
#' different criteria. 0 will always be treated as a pass and anything else as a fail, so you
#' can code fails however you wish.
#' @export
ChipInfo <- function(GRanges=NULL, chr=NULL, pos=NULL, ids=NULL, chip="unknown chip", build="",
                     rs.id=NULL, chip.id=NULL, A1=NULL, A2=NULL, QCcode=NULL) {
  if(build!="") { build <- ucsc.sanitizer(build) }
  LL <- max(c(length(chr),length(GRanges)),na.rm=T)
  if(length(A1)!=LL | length(A2)!=LL) { A1 <- A2 <- rep(NA,times=LL) }
  if(length(rs.id)!=LL) { rs.id <- rep(NA,times=LL) } else { 
    if(any(duplicated(rs.id))) { rs.id <- add.trail(rs.id) } # appends letters to stop duplicates
  }
  if(length(chip.id)!=LL) { chip.id <- rep(NA,times=LL) } else { 
    if(any(duplicated(narm(chip.id)))) { stop("chip.id shouldn't contain duplicates") } # appends letters to stop duplicates
  }
  if(length(QCcode)!=LL) { QCcode <- rep(0,LL) }
  if(is.null(GRanges)) {
    GRanges <- makeGRanges(chr=chr,pos=pos,row.names=ids)
  } else {
    if(is(GRanges)[1]!="GRanges") { GRanges <- as(GRanges,"GRanges") }
  }
  df <- DataFrame(A1=A1,A2=A2,QCcode=QCcode,rs.id=rs.id,chip.id=chip.id)
  #print(build)
  return(new("ChipInfo", seqnames=GRanges@seqnames, ranges=GRanges@ranges,  strand=GRanges@strand,
            elementMetadata=df, seqinfo=GRanges@seqinfo,
             chip=chip, build=build))
}


#' Initialize (constructor) method for ChipInfo
#' 
#' Use the 'ChipInfo()' wrapper to construct ChipInfo objects from scratch
#  @name ChipInfo
#' @param .Object An object generated from the ChipInfo class prototype,
#'  see methods:initialize
#' @param ... Additional arguments to initialize. None recommended.
#' @rdname ChipInfo-class
#' @exportMethod initialize
setMethod("initialize", "ChipInfo",
              function(.Object, ...){
          		  callNextMethod(.Object, ...)
          	  })


#' As("ChipInfo", "GRanges")
#'
#' @name as
# @rdname ChipInfo-class
#' @export
setAs("ChipInfo", "GRanges",
      function(from) { 
        #print(is(from)); print([email protected])
        out <- GRanges(from@seqnames,ranges=from@ranges,strand=from@strand,
                       seqinfo=from@seqinfo,elementMetadata=from@elementMetadata,genome=ucsc(from))
        return(out)
      }
)

#' As("ChipInfo", "GRanges")
#'
#' @name as
# @rdname ChipInfo-class
#' @export
setAs("ChipInfo", "RangedData",
      function(from) { 
        out <- as(as(from,"GRanges"),"RangedData")
        if("strand" %in% colnames(out)) { out <- out[,-which(colnames(out) %in% "strand")] }
        colnames(out) <- gsub("elementMetadata.","",colnames(out),fixed=TRUE)
        colnames(out) <- gsub("elementMetadata","",colnames(out),fixed=TRUE)
        return(out)
      }
)


#' As("ChipInfo", "GRanges")
#'
#' @name as
# @rdname ChipInfo-class
#' @export
setAs("ChipInfo", "data.frame", function(from) { ranged.to.data.frame(as(from,"GRanges"),include.cols=TRUE) })


#' As("GRanges", "ChipInfo")
#'
#' @name as
# @rdname ChipInfo-class
#' @export
setAs("GRanges", "ChipInfo", 
      function(from) { 
        bb <- genome(from)
        if(all(is.na(bb))) { build <- "" } else {
          if(length(unique(bb))==1) { build <- ucsc.sanitizer(bb[1]) } else { build <- "" }  
        }
        cN <- colnames(mcols(from))
        ii <- match(toupper("A1"), toupper(cN))
        if(!is.na(ii)) { a1 <- mcols(from)[,ii] } else { a1 <- NULL }
        ii <- match(toupper("A2"), toupper(cN))
        if(!is.na(ii)) { a2 <- mcols(from)[,ii] } else { a2 <- NULL }
        ii <- match(toupper("rs.id"), toupper(cN))
        if(!is.na(ii)) { rss <- mcols(from)[,ii] } else { rss <- NULL }
        ii <- match(toupper("chip.id"), toupper(cN))
        if(!is.na(ii)) { cid <- mcols(from)[,ii] } else { cid <- NULL }
        ii <- match(toupper("QCcode"), toupper(cN))
        if(!is.na(ii)) { qcc <- mcols(from)[,ii] } else { qcc <- NULL }
        ChipInfo("ChipInfo",GRanges=from,chip="unknown chip",build=build,rs.id=rss,chip.id=cid,A1=a1,A2=a2,QCcode=qcc)
      }
)

#' As("RangedData", "ChipInfo")
#'
#' @name as
# @rdname ChipInfo-class
#' @export
setAs("RangedData", "ChipInfo", function(from) { as(as(from,"GRanges"),"ChipInfo") } )

#' As("data.frame", "ChipInfo")
#'
#' @name as
# @rdname ChipInfo-class
#' @export
setAs("data.frame", "ChipInfo", 
      function(from) { 
        rr <- df.to.GRanges(from,chr="seqnames") 
        return(as(as(rr,"GRanges"),"ChipInfo"))
      } 
)



# improved coersion functions for data.frames to RangedData/GRanges

#' As("data.frame", "RangedData")
#'
#' @name as
#' @export
setAs("data.frame", "RangedData", function(from) { return(df.to.ranged(from,GRanges=FALSE))  } )

#' As("data.frame", "GRanges")
#'
#' @name as
#' @export
setAs("data.frame", "GRanges", function(from) { return(df.to.ranged(from,GRanges=TRUE))  } )

#' As("RangedData", "data.frame")
#'
#' Note that for automatic conversion of a data.frame to RangedData/GRanges, a column named 'chr' 
#' or 'seqnames' in the data.frame is expected/required to make the conversion effectively. 
#' Otherwise use 'ranged.to.data.frame()'
#' @name as
#' @export
setAs("RangedData", "data.frame", function(from) { return(ranged.to.data.frame(from))  } )

#' As("GRanges", "data.frame")
#'
#' @name as
#' @export
setAs("GRanges", "data.frame", function(from) { return(ranged.to.data.frame(from))  } )



# No roxygen required i think?
setValidity("ChipInfo",
            function(object) {
              if (!is.character(chip(object)) || length(chip(object)) != 1 || is.na(chip(object))) {
                return("'chip' slot must be a single string") 
              }
              if (!is.character(ucsc(object)) || length(ucsc(object)) != 1 || is.na(ucsc(object))) {
                return("'build' slot must be a single string") 
              } else {
               # requireNamespace(humarray)
                if(!paste(ucsc(object)) %in% c("",paste(as.vector(ucsc.sanitizer(show.valid=T)[,1])))) {
                  return("'build' must be a string, 36/37/38 or hg18/hg19/hg38") 
                }
              }
            }
)

#' Display a ChipInfo object
#' 
#' Returns a preview of a ChipInfo object to the console. This
#' is similar to a GRanges preview, but the seqlevels are hidden, the UCSC
#' build and chip name are displayed, start and end are merged to the virtual
#' label 'position' (as it's assume we are dealing with SNPs, not ranges), the strand
#' by default is hidden, and the integer codes for pass/fail in QCcodes() are 
#' displayed as 'pass' or 'fail', even though this is not how they are represented internally.
#' This is called by the default 'show' method for ChipInfo objects. 
#' @param x a ChipInfo object
#' @param margin margin for display, usually ""
#' @param with.classinfo logical, whether to display class information
#' @param print.seqlengths logical, whether to display sequence lengths below
#' the main output listing (e.g, chromsomes). Usually tidier when this is FALSE.
#' @param ... hidden arguments including: 'head.tail'; number of SNPs to display 
#' at start/end (only the head and tail are shown as these objects are generally
#' very large with >100K SNPs); 'up.to'; only SNPs at the start and end are generally
#' displayed, however this parameter specifies that when there are <= 'up.to' SNPs,
#' then all SNPs will be displayed; 'show.strand'; logical, by default the strand is 
#' hidden, particularly given that the strand can vary between different datasets 
#' of the same chip. Setting to TRUE will display the strand.
#' @return print compact preview of the object to the standard output (terminal)
#' @seealso \code{\link{ChipInfo}}
#' @export
showChipInfo <- function (x, margin = "", with.classinfo = FALSE, print.seqlengths = FALSE,...) 
{
  lx <- length(x)
  nc <- ncol(mcols(x))
  qc <- QCcode(x)
  bb <- ucsc(x)
  if(bb=="") { bb <- "unknown" }
  if(length(qc)==lx) { QC <- rep("pass",lx); QC[qc>0] <- paste0("fail",QC[qc>0]) ; x$QCcode <- QC }
  cat("ChipInfo for ",chip(x)," with ", lx, " ", ifelse(lx == 1L, "SNP", 
                   "SNPs")," using ",bb," coordinates",":\n", sep = "")
  out <- makePrettyMatrixForCompactPrinting2(x, .makeNakedMatFromChipInfo,...)
  if (nrow(out) != 0L) 
    rownames(out) <- paste0(margin, rownames(out))
  print(out, quote = FALSE, right = TRUE)
}


#internal
extraColumnSlots2 <- function(x) {
  sapply(extraColumnSlotNames2(x), slot, object = x, simplify = FALSE)
}

#internal
setGeneric("extraColumnSlotNames2",
           function(x) standardGeneric("extraColumnSlotNames2"))
           
#' Get extra column slot names
#' 
#' Return the list of chromosome start and end indexes from a RangedData object
#' @param x any object
#' @return internal function
#' @rdname extraColumnSlotNames2-methods
#' @exportMethod extraColumnSlotNames2
setMethod("extraColumnSlotNames2", "ANY", function(x) character())


#internal
.makeNakedMatFromChipInfo <- function (x,show.strand=TRUE) 
{
  lx <- length(x)
  nc <- ncol(mcols(x))
  if(!show.strand) {
    ans <- cbind(seqnames = as.character(seqnames(x)), ranges = showAsCell(ranges(x)))
  } else { 
    ans <- cbind(seqnames = as.character(seqnames(x)), ranges = showAsCell(ranges(x)),strand = as.character(strand(x)))
  }
  extraColumnNames <- extraColumnSlotNames2(x)
  if (length(extraColumnNames) > 0L) {
    ans <- do.call(cbind, c(list(ans), lapply(extraColumnSlots2(x), 
                                              showAsCell)))
  }
  if (nc > 0L) {
    df <- mcols(x)
    if(tail(colnames(df),1)=="chip.id") { df <- df[,-ncol(df)] }
    tmp <- do.call(data.frame, c(lapply(df, showAsCell), 
                                 list(check.names = FALSE)))  ### hide chip.id here!
    ans <- cbind(ans, `|` = rep.int("|", lx), as.matrix(tmp))
    if(all(colnames(ans)[1:2]==c("seqnames","ranges"))) { colnames(ans)[1:2] <- c("chr","pos") }
  }
  ans
}


#internal
makePrettyMatrixForCompactPrinting2 <- function (x, makeNakedMat.FUN,head.tail=6,up.to=50, show.strand=TRUE) 
{
  lx <- NROW(x)
  if(lx <= up.to) { head.tail <- up.to }
  nhead <- head.tail
  ntail <- head.tail
  if (lx < (nhead + ntail + 1L)) {
    ans <- makeNakedMat.FUN(x,show.strand=show.strand)
    ans_rownames <- .rownames3(names(x), lx)
  }
  else {
    top_idx <- 1:nhead
    if (nhead == 0) 
      top_idx <- 0
    bottom_idx = (lx - ntail + 1L):lx
    if (ntail == 0) 
      bottom_idx <- 0
    ans_top <- makeNakedMat.FUN(x[top_idx, , drop = FALSE],show.strand=show.strand)
    ans_bottom <- makeNakedMat.FUN(x[bottom_idx, , drop = FALSE],show.strand=show.strand)
    ans <- rbind(ans_top, matrix(rep.int("...", ncol(ans_top)), 
                                 nrow = 1L), ans_bottom)
    ans_rownames <- .rownames3(names(x), lx, top_idx, bottom_idx)
  }
  rownames(ans) <- format(ans_rownames, justify = "right")
  ans
}

#internal
.rownames3 <- function (names = NULL, len = NULL, tindex = NULL, bindex = NULL) 
{
  if (is.null(tindex) && is.null(bindex)) {
    if (len == 0L) 
      character(0)
    else if (is.null(names)) 
      paste0("[", seq_len(len), "]")
    else names
  }
  else {
    if (!is.null(names)) {
      c(names[tindex], "...", names[bindex])
    }
    else {
      s1 <- paste0("[", tindex, "]")
      s2 <- paste0("[", bindex, "]")
      if (all(tindex == 0)) 
        s1 <- character(0)
      if (all(bindex == 0)) 
        s2 <- character(0)
      c(s1, "...", s2)
    }
  }
}



#' Select chromosome subset for ranged objects
#' 
#' Returns the object filtered for specific chromosomes for a ranged object
#' @param object a ChipInfo, GRanges or RangedData object
#' @param chr vector, string or numeric of which chromosome(s) to select
#' @return vector of chromosome values for each range/SNP
#' @rdname chrSel-methods
#' @export
setGeneric("chrSel", function(object,chr) standardGeneric("chrSel"))


#' Select chromosome subset for RangedData objects
#' 
#' Returns the object filtered for specific chromosomes for a RangedData object
#' @rdname chrSel-methods
#' @exportMethod chrSel
setMethod("chrSel", "RangedData", function(object,chr) {
  return(humarray::chrSelect(object,chr))
})


#' Select chromosome subset for GRanges objects
#' 
#' Returns the object filtered for specific chromosomes for a GRanges object
#' @rdname chrSel-methods
#' @exportMethod chrSel
setMethod("chrSel", "GRanges", function(object,chr) {
  return(humarray::chrSelect(object,chr))
})

#' Select chromosome subset for ChipInfo objects
#' 
#' Returns the object filtered for specific chromosomes for a GRanges object
#' @rdname chrSel-methods
#' @exportMethod chrSel
setMethod("chrSel", "ChipInfo", function(object,chr) {
  return(humarray::chrSelect(object,chr))
})



#' Chromosome method for RangedData objects
#' 
#' Return the list of chromosome values from a RangedData object
#' @param object RangedData object
#' @return vector of chromosome values for each range/SNP
#' @rdname chrm-methods
#' @export
setGeneric("chrm",function(object) standardGeneric("chrm"))

#' @rdname chrm-methods
#' @exportMethod chrm
setMethod("chrm", "RangedData", function(object) {
  return(chr2(object))
})

#' @rdname chrm-methods
#' @exportMethod chrm
setMethod("chrm", "GRanges", function(object) {
  return(chr2(object))
})

#' @rdname chrm-methods
#' @exportMethod chrm
setMethod("chrm", "ChipInfo", function(object) {
  return(chr2(object))
})



# #' Genome order method for RangedData objects
# #' 
# #' Return the list of chromosome values from a RangedData object
# #' @param strict for compatibility with genoset toGenomeOrder, recommend setting TRUE
# #' @param object RangedData object
# #' @return vector of chromosome values for each range/SNP
# #' @rdname toGenomeOrder2-methods
# #' @export
# setGeneric("toGenomeOrder2",function(object,strict) standardGeneric("toGenomeOrder2"))

# # importMethodsFrom "genoset"  toGenomeOrder  
# #' @rdname toGenomeOrder2-methods
# #' @exportMethod toGenomeOrder2
# setMethod("toGenomeOrder2", "RangedData", function(object,strict) {
  # return(TGORD(object))
# })
# #' @rdname toGenomeOrder2-methods
# #' @exportMethod toGenomeOrder2
# setMethod("toGenomeOrder2", "GRanges", function(object,strict) {
  # return(TGOGR(object))
# })


#' Chromosome indices method for ranged objects
#' 
#' Return the list of chromosome start and end indexes from a RangedData object
#' @param object RangedData or GRanges object
#' @return matrix of indexes, colnames  first, last, offset
#' @rdname chrIndices-methods
# #' @export
# setGeneric("chrIndices",function(object) standardGeneric("chrIndices"))
# #' @rdname chrIndices-methods
#' @exportMethod chrIndices
setMethod("chrIndices", "RangedData", function(object) {
  return(chrIndices2(object))
})

# #' @rdname chrIndices-methods
# #' @exportMethod chrIndices
# setMethod("chrIndices", "GRanges", function(object) {
  # return(chrIndices2(object))
# })


#' Chromosome info method for ranged objects
#' 
#' Return the list of chromosome start and end ranges from a RangedData object
#' @param object RangedData or GRanges object
#' @return matrix of ranges, colnames start, stop, offset
#' @rdname chrInfo-methods
# #' @export
# setGeneric("chrInfo",function(object) standardGeneric("chrInfo"))
# #' @rdname chrInfo-methods
#' @exportMethod chrInfo
setMethod("chrInfo", "RangedData", function(object) {
  return(chrInfo2(object))
})

# #' @rdname chrInfo-methods
# #' @exportMethod chrInfo
# setMethod("chrInfo", "GRanges", function(object) {
  # return(chrInfo2(object))
# })


#' Chromosome names method for ranged objects
#' 
#' Return the list of chromosome labels from a RangedData object
#' @param object RangedData or GRanges object
#' @return vector of names
#' @rdname chrNames-methods
# #' @export
# setGeneric("chrNames",function(object) standardGeneric("chrNames"))
# #' @rdname chrNames-methods
#' @exportMethod chrNames
setMethod("chrNames", "RangedData", function(object) {
  return(chrNames2(object))
})

# #' @rdname chrNames-methods
# #' @exportMethod chrNames
# setMethod("chrNames", "GRanges", function(object) {
  # return(chrNames2(object))
# })


#' Plot method for GRanges objects
#' 
#' See plotRanges()
# @name plot
#' @param x a GRanges or RangedData object
#' @param y not used for plotRanges
#' @param ... further arguments, see plotRanges()
#' @rdname plot-methods
#' @aliases GRanges GRanges-method
#' @seealso \code{\link{plotRanges}}
#' @exportMethod plot
setMethod("plot", "GRanges", function(x,y,...) {
  plotRanges(ranged=x,...)
})


#' Plot method for RangedData objects
#' 
# @name plot
#' @rdname plot-methods
#' @aliases RangedData RangedData-method
#' @exportMethod plot
setMethod("plot", "RangedData", function(x,y,...) {
  plotRanges(ranged=x,...)
})

##################################
## General Annotation Functions ##
##################################




#' Download GWAS hits from t1dbase.org
#' 
#' Deprecated as this data is no longer available online
#' Retrieve human disease top GWAS hits from t1dbase in build hg19 coords (37).
#' 28 Diseases currently available
#' @param disease integer (1-28), or character (abbreviation), or full name of one of the listed
#' diseases. A full list of options can be obtained by setting show.codes=TRUE.
#' @param snps.only logical, default is just to return a list of rs-ids. Setting FALSE gives a table
#' @param show.codes logical, if set to TRUE, instead of looking up t1dbase, will simply return
#' a table of available diseases with their index numbers and abbreviations.
#' @return A character vector of SNP rs-ids
#' @export
#' @author Nicholas Cooper \email{[email protected]@cimr.cam.ac.uk}
#' @references PMID: 20937630
#' @examples
#' get.immunobase.snps(show.codes=TRUE) # show codes/diseases available to download
#' \donttest{
#' # Deprecated as this data is no longer available online
#' # get.immunobase.snps(disease="CEL") # get SNP ids for celiac disease
#' # get.immunobase.snps(disease="AS") # get SNP ids for Ankylosing Spondylitis in build-37/hg19
#' # get.immunobase.snps(disease=27) # get SNP ids for Alopecia Areata
#' # get.immunobase.snps("Vitiligo")
#' }
get.immunobase.snps <- function(disease="T1D",snps.only=TRUE,show.codes=FALSE) {
  disease.codes <- c("Type 1 Diabetes", "Crohns Disease","Rheumatoid Arthritis",
                     "Systemic Scleroderma",  "Ulcerative Colitis","Inflammatory Bowel Disease",  "Multiple Sclerosis",
                     "Bipolar Disorder",  "Diabetes Mellitus",  "Coronary Artery Disease",  "Hypertension",  "Celiac Disease",
                     "Systemic Lupus Erythematosus",  "Ankylosing Spondylitis",  "Type 2 Diabetes",  "Sjogren Syndrome",
                     "Graves' Disease",  "Juvenile Rheumatoid Arthritis",  "Vitiligo",  "Primary Biliary Cirrhosis",
                     "Psoriasis",  "Idiopathic Membranous Nephropathy",  "Immunoglobulin A Deficiency",
                     "Autoimmune Thyroid Disease",  "Juvenile Idiopathic Arthritis",  "Narcolepsy",  "Alopecia Areata",
                     "Alzheimer's Disease")
  abbr <- c("T1D","CD","RA","SCL","UC","IBD","MS","BD","DM","CAD","HYP",
            "CEL","SLE","AS","T2D","SS","GD","JRA","VIT",
            "PBC","PSO","IMN","IGA","ATD","JIA","NAR","AA","AD")
  code.table <- cbind(Abbreviation=abbr,FullNames=disease.codes)
  if(show.codes) { cat("values for the 'disease' parameter can be specified by the following index numbers or abbreviations:\n")
                   print(code.table,quote=F) ; return() }
  disease <- disease[1]
  if(toupper(disease) %in% abbr) { 
    disN <- match(toupper(disease),toupper(abbr)) 
  } else {
    if(toupper(disease) %in% toupper(disease.codes)) {
      disN <- match(toupper(disease),toupper(disease.codes))
    } else {
      if(as.numeric(disease) %in% 1:length(disease.codes)) { 
        disN <- as.numeric(disease)
      } else {
        stop("Invalid input for 'disease', use show.codes=TRUE to see list of codes/abbreviations")
      }
    }
  }
 	
   # unfortunately must add this as immunobase will no longer allow autodownload
  immunobase.has.changed("error")
  
  #if(is.null(build)) { build <- getOption("ucsc") }
  build <- "hg19" # ucsc.sanitizer(build)
  if(!build %in% c("hg18","hg19")) { stop("only hg18 and hg19 are supported for this function") }
  filenm <- cat.path(dir=getwd(),pref=tolower(abbr[disN]),"hits",suf=build,ext="tab")
  cat("attempting to download",abbr[disN],"hits from t1dbase\n")
  url36 <- paste("http://www.immunobase.org/webservice/RegionDownloads/=/model/variantsTAB/species=Human&disease_id=",disN,"&type=assoc&build=GRCh36",sep="")
  url37 <- paste("http://www.immunobase.org/webservice/RegionDownloads/=/model/variantsTAB/species=Human&disease_id=",disN,"&type=assoc&build=GRCh37",sep="")
#  url36 <- paste("http://www.t1dbase.org/webservice/RegionDownloads/=/model/variantsTAB/species=Human&disease_id=",disN,"&type=assoc&build=GRCh36",sep="")
#  url37 <- paste("http://www.t1dbase.org/webservice/RegionDownloads/=/model/variantsTAB/species=Human&disease_id=",disN,"&type=assoc&build=GRCh37",sep="")
  urL <- switch(build, hg18=url36,  hg19=url37)
  success <- T
  success <- tryCatch(download.file(urL ,filenm ,quiet=T),error=function(e) { F } )
  #prv(filenm,urL,success)
  if(!is.logical(success)) { success <- T }
  if(success) {
    t1dh <- readLines(filenm)
    firsts <- substr(t1dh,1,2)
    t1dh <- t1dh[firsts!="##"]
    len.lst <- strsplit(t1dh,"\t")
    rsids <- sapply(len.lst,"[",3)
    if(substr(rsids[1],1,2)!="rs") { rsids <- rsids[-1] }
    if(length(rsids)<1) { 
      cat("download successful but the list of hits for",disease.codes[disN],"was empty\n") 
    } else {
      cat("download successful for",disease.codes[disN],"\n")
    }
    if(snps.only) {
      return(unique(rsids))
    } else {
      return(read.delim(filenm,comment.char="#"))
    } 
  } else {
    stop("couldn't reach t1dbase website at: ",urL)
  }
}




#' Retreive GO terms from biomart for a given gene list
#' 
#' Gene-ontology terms (GO-terms) are commonly used for testing for simple functional
#' enrichment for pathways, etc. This function can retrieve biological function, 
#' cellular component, or molecular description, depending on the parameters chosen.
#' @param gene.list a list of gene, use HGNC names, like COMT, HLA-C, CTLA4, etc.
#' @param bio logical, whether to return biological process GO terms
#' @param cel logical, whether to return cellular component GO terms
#' @param mol logical, whether to return molecular function GO terms
#' @param host.txt character, the argument to pass to biomaRt::useMart(). Default is 
#' 'may2009.archive.ensembl.org', but more recently the recommended link is 'www.ensembl.org'
#' @return data.frame containing the gene name in the first column, chromosome in the
#' second column, and the GO terms in the third column, where one gene has multiple
#' GO terms, this will produce multiple rows, so there will usually be more rows
#' than genes entered. The data.frame can have 3,4 or 5 columns depending on
#' how many GO terms are selected.
#' @export
#' @examples
#' get.GO.for.genes(c("CTLA4","PTPN2","PTPN22")) # biological terms (default)
#' get.GO.for.genes(c("CTLA4","PTPN2","PTPN22"),cel=TRUE) # add cellular GO terms
get.GO.for.genes <- function(gene.list,bio=T,cel=F,mol=F,host.txt="may2009.archive.ensembl.org") {
 # must.use.package(c("biomaRt","genoset","gage"),T)
  mart.txt <- "ENSEMBL_MART_ENSEMBL"
  ens <- biomaRt::useMart(mart.txt,
                 dataset="hsapiens_gene_ensembl",
                 host= host.txt,
                 path="/biomart/martservice",
                 archive=FALSE)
  ens <- biomaRt::useDataset("hsapiens_gene_ensembl",mart=ens)
  #egSymb <- reader("~/github/iChip/egSymb.rda")
  egSymb <- humarray::egSymb
  base.attr <- c("hgnc_symbol", "chromosome_name")
  if(bio) { base.attr <- c(base.attr,"go_biological_process_description") }
  if(cel) { base.attr <- c(base.attr,"go_cellular_component_description") }
  if(mol) { base.attr <- c(base.attr,"go_molecular_function_description") }
  #  dat <- getBM(attributes = c("hgnc_symbol", "chromosome_name",
  #                              "start_position", "end_position", "band"), filters = "hgnc_symbol",
  #               values = egSymb[,2], mart = ens)
  results <- biomaRt::getBM(attributes = base.attr, filters = "hgnc_symbol",
                   values = c(gene.list), mart = ens)
  return(results)
}




#' Convert ensembl ids to HGNC gene ids 
#' 
#' Retrieve the gene IDs (HGNC) corresponding to a list of ensembl gene ids.
#' Note that this will not find all IDs found on ensembl.org, as it uses bioMart which
#' seems to be incomplete, but this only pertains to a small minority of genes, so this
#' function should have general utility for most applications. This is of course the case
#' at the time of writing - bioMart is likely to be updated at some point.
#' @param ens character, a list of ensembl gene ids, of the form ENSG00xxxxxxxxx
#' @param ... further arguments to get.gene.annot()
#' @param dir character, 'dir' is the location to download gene and cytoband information; if
#' left as NULL, depending on the value of getOption("save.annot.in.current"), the annotation
#' will either be saved in the working directory to speed-up subsequent lookups, or deleted 
#' after use.
#' @param build character, "hg18" or "hg19" (or 36/37) to show which reference to retrieve. The 
#' default when build is NULL is to use the build from the current ChipInfo annotation
#' @param name.dups logical, if TRUE then duplicates will have a suffix appended to force the
#' list to be unique (e.g, so it would be usable as rownames, or in a lookup table). Otherwise
#' duplicate entries will just appear in the list multiple times
#' @param name.missing logical, if TRUE then missing values will be named as MISSING_n (n=1
#'  to # of missing), ensuring a valid unique name if the results are to be used as rownames,
#' etc. If FALSE then these will be left as NA. 
#' @return Returns a vector of HGNC gene ids corresponding to the 'ens' ensembl ids entered,
#' any ids not found will be returned as MISSING_n (n=1 to # of missing), if name.missing=TRUE.
#' If name.missing is FALSE then missing will be set to NA. Similarly with 'name.dups', if
#' duplicates are found and name.dups is true, each will be appended with suffix _n; else
#' their names will be left as is.
#' @export
#' @author Nicholas Cooper \email{[email protected]@cimr.cam.ac.uk}
#' @seealso \code{\link{GENE.to.ENS}}, \code{\link{rs.to.id}}, \code{\link{id.to.rs}}; eg2sym, sym2eg from package 'gage'
#' @examples
#' \donttest{
#' setwd(tempdir())
#' ENS.ids <- c("ENSG00000183214", "ENSG00000163599", "ENSG00000175354", "ENSG00000134460")
#' ENS.to.GENE(ENS.ids)
#' gene.ids <- c("HLA-B","IFIH1","fake_gene!","FUT2")
#' ENS.to.GENE(GENE.to.ENS(gene.ids)) # lookup fails for the fake id, gives warning
#' }
ENS.to.GENE <- function(ens,dir=NULL,build=NULL,name.dups=FALSE,name.missing=TRUE,...) {
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
#  must.use.package(c("biomaRt","genoset","gage"),T)
  if(is.null(dir)) { if(any(getOption("save.annot.in.current")<1)) { dir <- NULL } else { dir <- getwd() } }
  ga <- get.gene.annot(...,dir=dir,bioC=FALSE,ens.id=TRUE,GRanges=FALSE)
  ### now have the gene data with the ensembl ids ##
  #print(head(ga))
  indx <- match(ens,ga$ens.id)
  missin <- length(which(is.na(indx))); valid <- length(indx)-missin
  if(valid<1) { warning("did not find any ENSEMBL ids from 'ens' in the bioMart human gene reference"); return(NULL) }
  if(missin>0) { warning(out.of(missin,(valid+missin))," of 'ens' did not match any ENSEMBL ids in the bioMart human gene reference") }
  outData <- ga$gene[indx]
  if(name.missing & any(is.na(outData))) {
    outData[is.na(outData)] <- paste("MISSING",pad.left(1:length(which(is.na(outData))),"0"),sep="_")
  }
  if(any(duplicated(outData))) { 
    if(name.dups) { 
      cnt <- 2
      while(any(duplicated(outData))) { 
        if(cnt==2) {
          outData[duplicated(outData)] <- paste(outData[duplicated(outData)],cnt,sep="_")
        } else {
          outData[duplicated(outData)] <- gsub(paste("_",cnt-1,sep=""),paste("_",cnt,sep=""),outData[duplicated(outData)])
        }
        cnt <- cnt + 1
      }
    } else { 
      warning("duplicated gene names produced, select 'name.dups=TRUE' to append numbers to make these unique")
    }
  }
  return(outData)
}


#' Convert gene ids to ensembl ids
#' 
#' Retrieve the ensembl IDs corresponding to a list of common gene names (HGNC format).
#' @param genes character, gene labels, e.g, "APOE"
#' @param ... further arguments to get.gene.annot()
#' @param dir character, 'dir' is the location to download gene and cytoband information; if
#' left as NULL, depending on the value of getOption("save.annot.in.current"), the annotation
#' will either be saved in the working directory to speed-up subsequent lookups, or deleted 
#' after use.
#' @return Returns a vector of HGNC gene ids corresponding to the 'ens' ensembl ids entered,
#' any ids not found will be returned as NA.
#' @export
#' @author Nicholas Cooper \email{[email protected]@cimr.cam.ac.uk}
#' @seealso \code{\link{GENE.to.ENS}}, \code{\link{rs.to.id}}, \code{\link{id.to.rs}}; eg2sym, sym2eg from package 'gage'
#' @examples
#' \donttest{
#' setwd(tempdir())
#' gene.ids <- c("MYC","PTPN2","IL2RA","APOE")
#' GENE.to.ENS(gene.ids)
#' }
GENE.to.ENS <- function(genes,dir=NULL,...) {
  if(is.null(dir)) { if(any(getOption("save.annot.in.current")<1)) { dir <- NULL } else { dir <- getwd() } }
  ga <- get.gene.annot(...,dir=dir,bioC=FALSE,ens.id=TRUE,GRanges=FALSE)
  ### now have the gene data with the ensembl ids ##
  indx <- match(genes,ga$gene)
  missin <- length(which(is.na(indx))); valid <- length(indx)-missin
  if(valid<1) { warning("did not find any gene ids from 'genes' in the bioMart human gene reference"); return(NULL) }
  if(missin>0) { warning("at least one of 'genes' did not match any gene ids in the bioMart human gene reference") }
  outData <- ga$ens.id[indx]
  return(outData)
}



#' Retrieve the 'n' closest GENE labels or positions near specified locus
#' 
#' @param chr integer, chromosome, should be a number from 1 to 25, where 23,24,25 are X,Y,MT
#' @param pos integer, genomic position, should be between 1 and the length of the chromosome 'chr'
#' @param n integer, the number of nearest GENEs to seek, if there aren't enough in the annotation
#' then NAs will fill the gaps to force the return value length to equal 'n'
#' @param side character, can be 'either', 'left' or 'right' and specifies which side of the 'pos'
#' to look for nearest genes (where left is decreasing genomic position and right is increasing)
#' @param ids logical, if TRUE will return GENE labels, 
#' or if FALSE will return the chromosome positions of the genes
#' @param limit integer, a limit on the maximum distance from the position 'pos' can be specified
#' @param build integer whether to use build 36/37 parameters, 36/37 is preferred, but can enter
#' using any form recognised by ucsc.sanitizer()
#' @param ga RangedData object, e.g, result of get.gene.annot(); gene annotation to save download
#' time if repeatedly calling this function
#' @export
#' @seealso \code{\link{expand.nsnp}}, \code{\link{nearest.snp}}, \code{\link{get.gene.annot}}
#' @return Set of GENE ids (when ids=TRUE), or otherwise genomic positions within chromosome 'chr'.
#' If the number of gemes on the chromosome or the bounds of the 'side' and 'limit' parameters
#' restrict the number returned to less than 'n' then the return value will be padded with NAs.
#' @examples
#' \donttest{
#' nearest.gene(1,159000000,n=10) # return ids
#' nearest.gene(1,159000000,n=10,build=37)
#' nearest.gene(1,159000000,n=10,build=36,ids=FALSE) # return positions
#' nearest.gene(1,159000000,n=10,build=37,ids=FALSE)
#' nearest.gene(6,25000000,n=10,build=37,ids=FALSE,side="left")  # only genes to the left of the locus
#' nearest.gene(6,25000000,n=10,build=37,ids=FALSE,side="right") # only genes to the right of the locus
#' }
nearest.gene <- function(chr, pos, n=1, side=c("either","left","right"),ids=TRUE,limit=NULL,build=NULL, ga=NULL) { 
  # ids - whether to return ichip SNP ids or positions
  if(length(chr)>1) { warning("chr should be length 1, using only first entry"); chr <- chr[1] }
  if(length(pos)>1) { warning("pos should be length 1, using only first entry"); pos <- pos[1] }
  if(is.null(build)) { build <- getOption("ucsc") }
  chrom <- paste(chr)
  build <- ucsc.sanitizer(build)
  if(is(get.gene.annot)[1]=="RangedData") { 
    if(!"gene" %in% colnames(ga)) { ga <- NULL }
  } else { ga <- NULL }
  if(is.null(ga)) {  
    ga <- get.gene.annot(build=build,GRanges=F) 
    if(!exists("ga")) { stop("couldn't find gene annotation") }  ## load object: ga [gene database]
    ga <- ga[ga$gene!="",]
  }
  side <- tolower(side[1]); 
  if(!side %in% c("either","left","right")) {
    side <- "either"; warning("invalid side argument, defaulting to 'either'") }
  if(!is.null(limit)) { if(!is.numeric(limit)) { limit <- NULL; warning("invalid limit argument, defaulting to NULL") } }
  all.chr <- paste(chr2(ga))
  all.st <- start(ga)[all.chr %in% chrom]
  all.en <- end(ga)[all.chr %in% chrom]
  #prv(all.st,all.en)
  if(length(all.st)<1) { warning("no positions found for 'chr' specified"); return(NULL) }
  difzS <- pos-all.st
  difzE <- pos-all.en
  all.true <- difzS==difzS
  if(is.null(limit)) { lfilt <- all.true } else { lfilt <- abs(difzS)<=limit | abs(difzE)<=limit }
  within <- difzS>0 & difzE<0
  if(side=="left") { filt <- ( difzE>0 & lfilt ) | within }
  if(side=="right") { filt <- ( difzS<0 & lfilt ) | within }
  if(side=="either") { filt <- ( all.true & lfilt ) | within }
  #print(length(which(filt)))
  tab <- rbind(abs(difzS),abs(difzE))
  minz <- apply(tab,2,min,na.rm=T)
  Difz <- abs(minz[filt])
  if(length(Difz)<n)  { warning("fewer than ",n," genes found for 'chr' specified (within 'limit'), NAs returned") }
  indx <- order(Difz)[1:n]
  # prv(minz,Difz,filt,indx)
  subi <- ga[["gene"]][all.chr %in% chrom][filt]
  #prv(ga,subi,all.chr,chrom)
  if(ids) {
    out <- ga[["gene"]][all.chr %in% chrom][filt][indx]
  } else {
    out <- start(ga)[(all.chr %in% chrom)][filt][indx]
  }
  return(out)
}




#' Plot genes to annotate figures with genomic axes
#' 
#' Quite often it is helpful to visualize genomic locations in the context of Genes
#' in the same region. This function makes it simple to overlay genes on plots
#' where the x-axis is chromosomal location.
#' @param chr chromosome number/name that the plot-range lies on
#' @param scl character, the scale that the x axis uses, ie, "b","kb","mb", or "gb", meaning
#' base-pairs, kilobases, megabases or gigabase-pairs.
#' @param y.ofs numeric, y-axis-offset, depending on what units are on your y-axis,
#' you may prefer to specify an offset so that the gene annotation is drawn at an appropriate
#' level on the vertical axis, this value should be the centre of annotation
#' @param width depending on the range of your y-axis, you might want to expand or reduce
#' the vertical width of the gene annotation (in normal graph units), default
#' when width=NA is 10 percent of the y-axis size.
#' @param txt logical, TRUE to include the names of genes on top of their representation
#' on the plot, or if FALSE, genes are drawn without labels.
#' @param chr.pos.offset if for some reason zero on the x-axis is not equal to 'zero' on
#' the chromsome, then this offset can correct the offset. For instance if you were using
#' a graph of the whole genome and you were plotting genes on chromosome 10, you would
#' set this offset to the combined lengths of chromosomes 1-9 to get the start point
#' in the correct place.
#' @param gs GRanges or RangedData object, this is annotation for the location of genes.
#' This will be retrieved using get.gene.annot() if 'gs' is NULL. THere may be several reasons
#' for passing an object directly to 'gs'; firstly speed, if making many calls then you won't
#' need to load the annotation every time; secondly, if you want to use an alternative annotation
#' you can create your own so long as it is a GRanges/RangedData object and contains a column
#' called 'gene' (which doesn't strictly have to contain gene labels, it could be any feature
#' you require, eg., transcript names, etc).
#' @param dir character, location to store file with the gene annotation.
#' If NULL then getOption("save.annot.in.current")>=1 will result in
#' this file being stored in the current directory, or if <=0, then this file will not
#' be stored.
#' @param build string, currently 'hg18' or 'hg19' to specify which annotation version to use. 
#'  Default is build-36/hg-18. Will also accept integers 36,37 as alternative arguments.
#' @param box.col genes are drawn as boxes, this sets the colour of the boxes
#' @param txt.col this sets the colour of the label text (Gene names)
#' @param join.col for exons, or multipart genes, joins are made between the sections with
#' a central line, this sets the colour of that line.
#' @param ... further arguments to 'rect', the graphics function used to plot the 'genes'.
#' @export
#' @return Returns a data.frame, GRanges or RangedData object, depending on input parameters. Contained
#' will be HGNC gene labels, chromosome and start and end positions, other information depends on 
#' specific parameters documented above
#' @export
#' @examples
#' # EXAMPLE PLOT OF SOME SIMULATED SNPS on chr21-p11.1 #
#' # do we need to require(GenomicRanges)? #
#' setwd(tempdir())
#' loc <- c(9.9,10.2)
#' Band(chr=21,pos=loc*10^6)
#' rr <- in.window(rranges(50000),chr=21,pos=loc,unit="mb") # make some random MHC ranges
#' # create some SNPs and plot
#' rr3 <- rr; end(rr3) <- start(rr3) 
#' rownames(rr3) <- paste0("rs",sample(10^6,nrow(rr3)))
#' plotRanges(rr3,col="blue",scl="mb",xlim=loc,xlab="Chr21 position (Mb)",ylab="")
#' # NOW add UCSC hg18 GENE annotation to the plot #
#' \donttest{ plotGeneAnnot(chr=21,pos=c(9.95,10.1),scl="mb",y.ofs=1,build=36) }
plotGeneAnnot <- function(chr=1, scl=c("b","kb","mb","gb"), y.ofs=0, width=NA, txt=T, chr.pos.offset=0,
                            gs=NULL, build=NULL, dir=NULL, box.col="green", txt.col="black", join.col="red", ...)
{
  if(is.null(dir)) { if(any(getOption("save.annot.in.current")<1)) { dir <- NULL } else { dir <- getwd() } }
  dir <- validate.dir.for(dir,"ano")
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  if(!is(gs)[1] %in% c("RangedData","GRanges")) { gs <- get.gene.annot(dir=dir,build=build,GRanges=FALSE) }
  if(is(gs)[1]=="GRanges") { gs <- as(gs,"RangedData") }
  if(!"gene" %in% colnames(gs)) { warning("didn't find 'gene' column in annotation") ; return(NULL) }
  Col <- c("green", "darkgreen")

  # get set of genes in range of the graph section + remove duplicate genes/exons
  plot.area <- plot_get_area();
  if(all(is.null(plot.area))) { pos <- c(1,Inf) } 
  x.lim <- plot.area$xlim;
  y.lim <- plot.area$ylim;
  pos <- x.lim*make.divisor(scl) # overrides pos
  if(any(is.na(pos))) { pos <- c(1,Inf) } 
  rng.genez <- in.window(gs,chr,pos,full.overlap=F, rmv.dup=T,unit=scl)
  if(nrow(rng.genez)<1) { warning("no genes found in range") ; return(NULL) }
  old.cc <- 1 ; tp <- 2
  # set vertical alignments for annotation
  old.auto <- F
  if(y.ofs==0) { y.ofs <- min(y.lim) + .1*(max(y.lim)-min(y.lim)) }
  if(is.na(width)) { width <- .1*(max(y.lim)-min(y.lim)) } 
  y.cent <- y.ofs
  y.bot <- y.ofs-(width/2)
  y.top <- y.ofs+(width/2)
  # text alignment
  tps <- y.bot + c(.18,.29,.46,.64,.82)[c(1,3,5)]*width
  # x position with scaling (e.g, Mb units = 10^6)
  #unit <- tolower(unit[1]) ; mult <- switch(unit,b=0,kb=3,mb=6,gb=9); pos <- pos*10^mult
  x.scl <- make.divisor(scl)
  cnrlo <- (start(rng.genez)/x.scl)+chr.pos.offset
  cnrhi <- (end(rng.genez)/x.scl)+chr.pos.offset
  gnnm <- (rng.genez$gene)
  n.genes <- length(cnrlo)
  txt.cex <- .75; if(n.genes>10) { txt.cex <- .5 } ; if(n.genes>100) { txt.cex <- .35 } # more genes = smaller labels
  #print(n.genes)
  for (cc in 1:n.genes) {
    # draw rectangle and label for each gene
    #prv(cnrlo[cc],y.top,cnrhi[cc], y.bot)
    rect(cnrlo[cc],y.top,cnrhi[cc], y.bot,border=box.col,...)
  }
  for (cc in 1:n.genes) {
    if (gnnm[old.cc]==gnnm[cc] & cc!=1)
    {
      link <- c(cnrhi[old.cc],cnrlo[cc])
      if(link[1]<link[2]) {
        lines(link,y=rep(y.cent,2),lwd=1,col=join.col,lty="dotted")
      }
    } else {
      if(txt) {
        if(cnrlo[cc] < min(pos/x.scl)) { 
          txt.x <- mean(c(min(pos/x.scl),min(cnrhi[cc],max(pos/x.scl))),na.rm=T)  
        } else { txt.x <- cnrlo[cc] }
        text(txt.x,tps[tp],gnnm[cc],col=txt.col,cex=txt.cex,pos=4,las=2,offset=0)
      }
    }
    old.cc <- cc  
    tp <- tp+1; if(tp==4) {tp <- 1}; #if(n.genes <=5) { tp <- 2 }
  }
  return(rng.genez)
}



#' Retrieve locations of Immunoglobin regions across the genome
#' 
#' Returns the locations of immunoglobin regions in the human genome, for a given build, as
#' a list by chromosome, text vector, or GRanges/RangedData object.
#' For instance, for CNV research, these regions are known to be highly structurally complex
#' and can lead to false positive CNV-calls, so are often excluded.
#' @param build string, currently 'hg18' or 'hg19' to specify which annotation version to use. 
#'  Default is build-36/hg-18. Will also accept integers 36,37 as alternative arguments.
#' @param bioC logical, whether to return the annotation as a ranged S4 object (GRanges or
#' RangedData), or as a data.frame
#' @param text logical, whether to return locations as a text vector of the form: chrN:xxxx-xxxx
#' @param GRanges logical, whether to return a GRanges object, or FALSE to return RangedData
#' @export
#' @return Returns a list, GRanges or RangedData object, depending on input parameters. Contained
#' will be immunoglobin chromosome, start and end positions.
#' @examples
#' get.immunog.locs()
#' get.immunog.locs(bioC=FALSE)
#' get.immunog.locs(text=TRUE,build=37)
get.immunog.locs <- function(build=NULL,bioC=TRUE,text=FALSE,GRanges=TRUE) {
  # http://atlasgeneticsoncology.org/Genes/GC_IGH.html
  # ^ has list of CELL line breakpoints... for future filtering?
  nchr <- 22
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  if(build[1]=="hg18") {
    # hg18
    chr <- c(22,14,2,14)
    stz <- c(20715572,105065301,88937989,21159897)
    enz <- c(21595082,106352275,89411302,22090937)
  } else {
    if(build[1]=="hg38") {
      #hg38
      chr <- c(22,14,2,14)
      stz <- c(20668232,105594256,88857361,21621904)
      enz <- c(22922910,107281230,89917421,22552944)
    } else {
      # hg19
      chr <- c(22,14,2,14)
      stz <- c(22385572,105994256,89156874,22090057)
      enz <- c(23265082,107281230,89630187,23021097)
    }
  }
  nmz <- c("ig_c22","ig_c14_a","ig_c2","ig_c14_b")
  reg.dat <- rep("immunoglobin",length(chr))
  if(bioC | text) {
    #must.use.package(c("genoset","IRanges"),bioC=T)
    outData <- RangedData(ranges=IRanges(start=stz,end=enz,names=nmz),space=chr,
                          reg=reg.dat)  #,universe=build[1])
    outData <- toGenomeOrder2(outData,strict=T)
    if(text) { outData <- ranged.to.txt(outData) } else {
      if(GRanges) { outData <- as(outData,"GRanges") }
    }
  } else {
    outData <- vector("list",nchr); names(outData) <- paste("chr",1:nchr,sep="")
    for (cc in 1:nchr) {
      if(cc %in% chr) {
        outData[[cc]] <- list(start=stz[chr==cc],end=enz[chr==cc])
      }
    }
  }
  return(outData)
}


#' Return Centromere locations across the genome
#' 
#' Returns the locations of centromeres in the human genome, for a given build, as
#' a list by chromosome, text vector, or GRanges/RangedData object.
#' @param dir character, location to store file with the this annotation.
#' If NULL then getOption("save.annot.in.current")>=1 will result in
#' this file being stored in the current directory, or if <=0, then this file will not
#' be stored.
#' @param build string, currently 'hg18' or 'hg19' to specify which annotation version to use. 
#'  Default is build-36/hg-18. Will also accept integers 36,37 as alternative arguments.
#' @param bioC logical, whether to return the annotation as a ranged S4 object (GRanges or
#' RangedData), or as a data.frame.
#' @param GRanges logical, whether to return a GRanges object, or FALSE to return RangedData
#' @param text logical, whether to return locations as a text vector of the form: chrN:xxxx-xxxx
#' @param autosomes logical, if TRUE, only return results for autosomes, if FALSE, also include
#' X and Y.
#' @export
#' @return Returns a list, GRanges or RangedData object, depending on input parameters. Contained
#' will be centromere chromosome and start and end positions.
#' @examples
#' setwd(tempdir())
#' get.centromere.locs()
#' get.centromere.locs(bioC=FALSE,autosomes=TRUE)
#' get.centromere.locs(text=TRUE)
get.centromere.locs <- function(dir=NULL,build=NULL,
                                bioC=TRUE,GRanges=TRUE,text=FALSE,autosomes=FALSE)
{
  if(is.null(dir)) { if(any(getOption("save.annot.in.current")<1)) { dir <- NULL } else { dir <- getwd() } }
  dir <- validate.dir.for(dir,c("ano"),warn=FALSE); success <- TRUE
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  local.file <- cat.path(dir$ano,"cyto")
  tt <- get.cyto(build=build,bioC=FALSE,dir=dir,GRanges=FALSE)
  chrn <- paste(1:22)
  if(!autosomes) { 
    chrn <- c(chrn,c("X","Y"))
  }
  nchr <- length(chrn)
  my.chr.range <- vector("list",nchr)
  names(my.chr.range) <- paste("chr",chrn,sep="")
  for (cc in 1:nchr) {
    just.centros <- tt[paste(tt[,5])=="acen",]
    just.chr <- just.centros[which(paste(just.centros[,1])==names(my.chr.range)[cc]),]
    my.chr.range[[cc]] <- list(start=min(just.chr[,2]), end=max(just.chr[,3]))
  }
  reg.dat <- rep("centromere",nchr)
  nmz <- paste(reg.dat,chrn,sep="_")
  stz <- sapply(my.chr.range,"[[",1)
  enz <- sapply(my.chr.range,"[[",2)
  if(bioC | text) {
    #must.use.package(c("genoset","IRanges"),bioC=TRUE)
    outData <- RangedData(ranges=IRanges(start=stz,end=enz,names=nmz),space=gsub("chr","",chrn),
                          reg=reg.dat) #,universe=build[1])
    outData <- toGenomeOrder2(outData,strict=TRUE)
    if(text) { 
      outData <- ranged.to.txt(outData) 
    } else {
      if(GRanges){
        outData <- as(outData,"GRanges")
      }
    }
  } else {
    outData <- my.chr.range 
  }
  return(outData)
}


#' Return Cytoband/Karyotype locations across the genome
#' 
#' Returns the locations of cytobands/karyotype-bands in the human genome, for a given build, as
#' a data.frame, or GRanges/RangedData object.
#' @param dir character, location to store file with the this annotation.
#' If NULL then getOption("save.annot.in.current")>=1 will result in
#' this file being stored in the current directory, or if <=0, then this file will not
#' be stored.
#' @param build string, currently 'hg18' or 'hg19' to specify which annotation version to use. 
#'  Default is build-36/hg-18. Will also accept integers 36,37 as alternative arguments.
#' @param bioC logical, whether to return the annotation as a ranged S4 object (GRanges or
#' RangedData), or as a data.frame
#' @param GRanges logical, whether to return a GRanges object, or FALSE to return RangedData
#' @param refresh logical, whether to re-download the file if the existing file has become corrupted
#' @export
#' @return Returns a list, GRanges or RangedData object, depending on input parameters. Contained
#' will be centromere chromosome and start and end positions.
#' @examples
#' require(BiocInstaller)
#' setwd(tempdir())
#' get.cyto()
#' cyto.frame <- get.cyto(bioC=FALSE)
#' prv(cyto.frame)
#' get.cyto(build=36)
get.cyto <- function(build=NULL,dir=NULL,bioC=TRUE,GRanges=TRUE,refresh=FALSE) {
  if(is.null(dir)) { if(any(getOption("save.annot.in.current")<1)) { dir <- NULL } else { dir <- getwd() } }
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  local.file="cyto"
  if(is.null(dir)) {
    local.file <- cat.path("",local.file,suf=build,ext="tar.gz")
  } else { 
    local.file <- cat.path(dir,local.file,suf=build,ext="tar.gz")
  }
  if(!file.exists(local.file) | refresh) {
    golden.path <- paste("http://hgdownload.cse.ucsc.edu/goldenPath/",build,"/database/cytoBand.txt.gz",sep="")
    success <- tryCatch( download.file(url=golden.path,local.file,quiet=T),error=function(e) { F } )
    if(is.logical(success)) {
      if(!success) { warning("couldn't reach ucsc website! try sourcing cytoband data elsewhere"); return(NULL) } }
    tt <- reader(local.file,header=FALSE)
    if(is.null(dir)) { unlink(local.file) }
  } else {
    tt <- reader(local.file)
  }
  colnames(tt) <- c("chr","start","end","band","negpos")
  write.table(tt,file=local.file,col.names=T,row.names=F,sep="\t",quote=F)
  mychr <- gsub("chr","",tt$chr,fixed=T)
  fullbands <- paste(mychr,tt$band,sep="")
  if(bioC ) {
    st <- as.numeric(tt$start)
    en <- as.numeric(tt$end)
   # must.use.package(c("genoset","IRanges"),bioC=T)
    outData <- RangedData(ranges=IRanges(start=st,end=en,names=fullbands),space=mychr,
                          negpos=tt$negpos) #,universe=build[1])
    #prv(outData)
    outData <- toGenomeOrder2(outData) ##,strict=T)
    if(GRanges) { outData <- as(outData,"GRanges") }
  } else {
    outData <- tt 
    if("band" %in% colnames(outData)) {
      ## make 'chr-band' rownames to be consistent with the RangedData object if bioC=T
      rownames(outData) <- fullbands
      #outData <- outData[,-which(colnames(outData) %in% "band")]
    }
  }
  return(outData)
}



#' Get HapMap recombination rates for hg18 (build 36)
#' 
#' Recombination rate files can be used to calculate recombination distances
#' for genome locations, in centimorgans. This function downloads these reference
#' files from the hapmap NCBI website. At the time of writing they were only 
#' availble for build 36. If using a more recent build I suggest using the
#' conversion function conv.37.36(), then recomWindow(), then conv.36.37() to 
#' get recombination distances for other builds. If getOption("save.annot.in.current")
#' is <=0 then no files will be kept. Otherwise an object containing this mapping data
#' will be saved in the local directory if dir=NULL, or else in the directory specified.
#' Allowing this reference to be saved will greatly increase the speed of this function
#' for subsequent lookups
#' @param dir character, location to store binary file with the recombination maps for
#' chromosomes 1-22. If NULL then getOption("save.annot.in.current")>=1 will result in
#' this file being stored in the current directory, or if <=0, then this file will not
#' be stored.
#' @param verbose logical, if the binary file is not already downloaded, when verbose
#' is TRUE, there will be some output to the console indicating the progress of the
#' download. If FALSE, all output is suppressed.
#' @param refresh logical, if you already have the binary file in the current directory,
#' this argument will let you re-download and re-generate this file, e.g, if the file
#' is modified or corrupted this will make a new one without having to manually delete it
#' @param compress logical, this argument is passed to 'save' and will result in a larger
#' binary file size, but quicker loading times, so 'FALSE' is recommended for faster retrieval.
#' @export
#' @return Returns a list object of length 22, containing the recombination map files
#' as 22 separate data.frame's.
#' @examples
#' \donttest{
#' ## not run as it takes roughly 2 minutes to download and read-in ##
#' setwd(tempdir())
#' rec.map <- get.recombination.map(getwd())
#' file.on.disk <- "rrates_genetic_map_chr_1_22_b36.RData"
#' if(file.exists(file.on.disk)) { unlink(file.on.disk) } # remove the downloaded file
#' }
get.recombination.map <- function(dir=NULL,verbose=TRUE,refresh=FALSE, compress=FALSE) {
  n.chr <- 22
#  hap.dir <- "http://hapmap.ncbi.nlm.nih.gov/downloads/recombination/latest/rates/"  # deprecated
  hap.dir <- "ftp://ftp.ncbi.nlm.nih.gov/hapmap/recombination/latest/rates/"
  temp.dir <- "recombinationratesGF13fDR1er119"
  local.file <- "rrates_genetic_map_chr_1_22_b36.RData"
  if(!file.exists(temp.dir)) { dir.create(temp.dir) } 
  local.files=paste0(temp.dir,"/genetic_map_chr",1:n.chr,"_b36.txt")
  if(is.null(dir)) { if(any(getOption("save.annot.in.current")<1)) { dir <- NULL } else { dir <- getwd() } }
  if(!is.null(dir)) {
    local.files <- cat.path(dir,local.files,ext="txt")
    local.file <- cat.path(dir,local.file,ext="RData")
  }
  if(!file.exists(local.file) | refresh) {
    if(verbose) { cat("Downloading recombination data from: ",hap.dir,"\n") }
    hapmap.urls <- cat.path(dir=hap.dir,fn=basename(local.files))
    success <- TRUE
    for (cc in 1:n.chr) {
      #print(hapmap.urls[cc])
      success <- tryCatch( download.file(url=hapmap.urls[cc],local.files[cc],quiet=T),error=function(e) { F } )
      if(verbose) { loop.tracker(cc,n.chr*2) }
    }
    if(is.logical(success)) {
      if(!success) { warning("couldn't download at least one of the files from: ",hap.dir); return(NULL) } }
    map.files.list <- vector("list",n.chr)
    for (cc in 1:n.chr) {
      map.files.list[[cc]] <- read.table(local.files[cc],header=TRUE)
      if(is.data.frame(map.files.list[[cc]])) { 
        unlink(local.files[cc]) 
      } else { warning("downloaded map file was corrupt for chr",cc) }
      if(verbose) { loop.tracker(n.chr+cc,n.chr*2) }
    }
    if(file.exists(temp.dir)) { file.remove(temp.dir) }   # delete the temporary directory
  } else {
    map.files.list <- reader(local.file)
  }
  if(!is.null(dir)) { save(map.files.list,file=local.file,compress=compress) }
  if(length(map.files.list)!=n.chr) { stop("Unfortunately the object derived seems corrupted") }
  names(map.files.list) <- paste0("chr",1:n.chr)
  return(map.files.list)
}


#' Get exon names and locations from UCSC
#' 
#' Various R packages assist in downloading exonic information but often the input required is 
#' complex, or several lines of code are required to initiate, returning an object that
#' might require some manipulation to be useful. This function simplifies the job 
#' considerably, not necessarily requiring any arguments. The object returned can be
#' a standard data.frame or a bioconductor GRanges/RangedData object. The raw annotation
#' file downloaded will be kept in the working directory so that subsequent calls to
#' this function run very quickly, and also allow use offline.
#' @param dir character, location to store file with the gene annotation.
#' If NULL then getOption("save.annot.in.current")>=1 will result in
#' this file being stored in the current directory, or if <=0, then this file will not
#' be stored.
#' @param build string, currently 'hg18' or 'hg19' to specify which annotation version to use. 
#'  Default is build-36/hg-18. Will also accept integers 36,37 as alternative arguments.
#' @param bioC logical, whether to return the annotation as a ranged S4 object (GRanges or
#' RangedData), or as a data.frame
#' @param transcripts logical, if TRUE, return transcripts rather than exons
#' @param GRanges logical, if TRUE and bioC is also TRUE, then returned object will be GRanges, otherwise
#' it will be RangedData
#' @export
#' @return Returns a data.frame, GRanges or RangedData object, depending on input parameters. Contained
#' will be HGNC gene labels, chromosome, start and end positions, transcript id number and name
#' @examples
#' \donttest{
#' setwd(tempdir())
#' get.exon.annot()
#' }
get.exon.annot <- function(dir=NULL,build=NULL,bioC=T, transcripts=FALSE, GRanges=TRUE) {
  if(is.null(dir)) { if(any(getOption("save.annot.in.current")<1)) { dir <- NULL } else { dir <- getwd() } }
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  ## load exon annotation (store locally if not there already)
  from.scr <- T
  txt <- if(transcripts) { "trans" } else { "exon" }
  if(!is.null(dir)) {
    dir <- validate.dir.for(dir,"ano")
    ex.fn <- cat.path(dir$ano,pref=txt,"Annot",suf=build,ext="RData")
    if(file.exists(ex.fn)) {
      tS <- reader(ex.fn)
      if(transcripts) {
        if(is(tS)[1]=="data.frame") { from.scr <- F }
      } else {
        if(is(tS)[1]=="GRanges") { from.scr <- F }
      }
    }
  }
 # must.use.package("GenomicFeatures",T)
  if(from.scr) {
    #must.use.package("gage",T) this is where egSymb came from, but is now not needed
    # get transcripts from build table 'knownGene'
    success <- tryCatch(txdb <- suppressWarnings(makeTxDbFromUCSC(genome=build,
                                                         tablename="knownGene"))  ,error=function(e) { F } )
    if(is.logical(success)) { 
      if(!success) {
        warning("Couldn't reach build website! try again later or, \n",
                "if in europe/uk, there may still be a bug in rtracklayer; \n",
                "Installing the latest version of R and bioconductor\n",
                "and running biocLite('rtracklayer'), should fix this")
        return(NULL) }
    }
    if(!transcripts) {
      ex = exonsBy(txdb, by="gene")
      tS <- toGenomeOrder2(unlist(ex),strict=T)
    } else {  
      tS = transcriptsBy(txdb, by="gene")
      #egSymb <- egSymb <- reader("~/github/iChip/egSymb.rda")
      egSymb <- humarray::egSymb
      select <- match(names(tS),egSymb[,1])
      names(tS)[!is.na(select)] <- egSymb[,2][select[!is.na(select)]]
      tS <- as.data.frame(tS)
    }
  }
  if(exists("ex.fn")) { save(tS,file=ex.fn) }
  if(transcripts) {
    chrs <- paste(tS$seqnames); chrs <- gsub("chr","",chrs)
    tS$seqnames <- chrs
    if(all(colnames(tS)==c("element","seqnames","start","end","width","strand","tx_id","tx_name"))) {
      colnames(tS) <- c("gene","chr","start","end","width","strand","txid","txname")
    } else {
      cat(" unexpected colnames found using makeTxDbFrombuild()\n")
      if(bioC) { cat(" therefore returning data.frame instead of RangedData object\n") ;
                 bioC <- F }
    }
    if(bioC) {
      tS <- RangedData(ranges=IRanges(start=tS$start,end=tS$end),
                       space=tS$chr,gene=tS$gene, strand=tS$strand,
                       txid=tS$txid, txname=tS$txname) #,universe=build)
      tS <- toGenomeOrder2(tS,strict=T)
      if(GRanges) { tS <- as(tS,"GRanges") }
    }
    return(tS)
  } else {
    rownames(tS) <- paste(1:nrow(tS))
    ei <- mcols(tS)[["exon_id"]]
    ei2 <- add.trail(ei,suffix=strsplit("abcdefghijklmnopqrstuvwxyz","")[[1]])
    mcols(tS)[["exon_name"]] <- ei2
    if(bioC) {
      if(GRanges) {
        return(tS)
      } else {
        return(as(tS,"RangedData"))
      }
    } else {
      return(ranged.to.data.frame(tS))
    }
  }
}



#' Get human gene names and locations from biomart
#' 
#' Various R packages assist in downloading genomic information but often the input required is 
#' complex, or several lines of code are required to initiate, returning an object that
#' might require some manipulation to be useful. This function simplifies the job 
#' considerably, not necessarily requiring any arguments. The object returned can be
#' a standard data.frame or a bioconductor GRanges/RangedData object. The raw annotation
#' file downloaded will be kept in the working directory so that subsequent calls to
#' this function run very quickly, and also allow use offline.
#' @param dir character, location to store file with the gene annotation.
#' If NULL then getOption("save.annot.in.current")>=1 will result in
#' this file being stored in the current directory, or if <=0, then this file will not
#' be stored.
#' @param build string, currently 'hg18' or 'hg19' to specify which annotation version to use. 
#'  Default is build-36/hg-18. Will also accept integers 36,37 as alternative arguments.
#' @param bioC logical, whether to return the annotation as a ranged S4 object (GRanges or
#' RangedData), or as a data.frame
#' @param duplicate.report logical, whether to provide a report on the genes labels that are listed
#' in more than 1 row - this is because some genes span ranges with substantial gaps within them
#' @param one.to.one logical, as per above, some genes have duplicate entries, sometimes for simplicity
#' you want just one range per gene, if this parameter is set TRUE, one range per gene is enforced,
#' and only the widest range will be kept by default for each unique gene label
#' @param remap.extra logical, whether to remap chromosome annotation for alternative builds and
#' unconnected segments to the closest regular chromosome, e.g, mapping MHC mappings to chromosome 6
#' @param discard.extra logical, similar to above, but if TRUE, then any non-standard chromosome
#' genes will just be discarded
#' @param only.named logical, biomart annotation contains some gene segments without names, if TRUE, then
#' such will not be included in the returned object (note that this will happen also if one.to.one is TRUE)
#' @param ens.id logical, whether to include the ensembl id in the dataframe
#' @param refresh logical, if you already have the file in the current directory,
#' this argument will let you re-download and re-generate this file, e.g, if the file
#' is modified or corrupted this will make a new one without having to manually delete it
#' @param GRanges logical, if TRUE and bioC is also TRUE, then returned object will be GRanges, otherwise
#' it will be RangedData
#' @param host.txt character, the argument to pass to biomaRt::useMart(). Default for build 36 is 
#' 'may2009.archive.ensembl.org', and for build 37, "feb2014.archive.ensembl.org" but for recent builds
#'  the recommended link is 'www.ensembl.org'
#' @export
#' @return Returns a data.frame, GRanges or RangedData object, depending on input parameters. Contained
#' will be HGNC gene labels, chromosome and start and end positions, other information depends on 
#' specific parameters documented above
#' @examples
#' \donttest{
#' setwd(tempdir())
#' get.gene.annot()
#' }
get.gene.annot <- function(dir=NULL,build=NULL,bioC=TRUE,duplicate.report=FALSE,
                           one.to.one=FALSE,remap.extra=FALSE,discard.extra=TRUE,only.named=FALSE,
                           ens.id=FALSE,refresh=FALSE,GRanges=TRUE, host.txt="") {
  # faster than exon, but only contains whole gene ranges, not transcripts
  # allows report on duplicates as some might be confused as to why some genes
  # have more than one row in the listing (split across ranges usually)
  # run with dir as NULL to refresh changes in COX
  #must.use.package(c("biomaRt","genoset","gage"),T)
  mart.txt <- "ENSEMBL_MART_ENSEMBL"
  verbose <- TRUE # hard coded at this stage!!
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  if(build %in% c("hg15","hg16","hg17")) { stop("older builds, prior to may 2009 are not supported by this function")}
  from.scr <- T
  if(is.null(dir)) { if(any(getOption("save.annot.in.current")<1)) { dir <- NULL } else { dir <- getwd() } }
  if(!is.null(dir)) {
    dir <- validate.dir.for(dir,"ano")
    utxt <- ""; if(one.to.one) { utxt <- "_unq" }
    if(ens.id) { utxt <- paste(utxt,"ens",sep="_") }
    if(only.named) { utxt <- paste(utxt,"onm",sep="_") }
    gn.fn <- cat.path(dir$ano,"geneAnnot",pref=build,suf=utxt,ext="RData")
    if(file.exists(gn.fn) & !refresh) {
      dat <- get(paste(load(gn.fn)))
      from.scr <- F
    }
  }
  # colnames for output
  nm.list <- c("gene","chr","start","end","band")
  if(ens.id & bioC) { warning("ens.id=TRUE only has an effect when bioC=FALSE") }
  if(from.scr) {
    if(build=="hg18") {
      if(all(host.txt=="")) { host.txt <- "may2009.archive.ensembl.org" }
      ens <- biomaRt::useMart(mart.txt,
                              dataset="hsapiens_gene_ensembl",
                              host= host.txt,
                              path="/biomart/martservice",
                              archive=FALSE)
    } else {
      if(build=="hg19") {
        if(all(host.txt=="")) { host.txt <- "feb2014.archive.ensembl.org" }
        ens <- biomaRt::useMart(mart.txt,
                                dataset="hsapiens_gene_ensembl",
                                host= host.txt,
                                path="/biomart/martservice",
                                archive=FALSE)
      } else {
        # whatever the current mart is
        if(all(host.txt=="")) { host.txt <- "www.ensembl.org" }
        ens <- biomaRt::useMart(mart.txt,host=host.txt)
      }
    }
    ens <- biomaRt::useDataset("hsapiens_gene_ensembl",mart=ens)
    attr.list <- c("hgnc_symbol", "chromosome_name",
                   "start_position", "end_position", "band")
    if(ens.id) { attr.list <- c(attr.list,"ensembl_gene_id") }
    #    if(only.named & !ens.id & build=="hg18") {
    #      egSymb <- reader("~/github/iChip/egSymb.rda") # this is a hg18 list!
    #      dat <- biomaRt::getBM(attributes = attr.list, filters = "hgnc_symbol",
    #                   values = egSymb[,2], mart = ens)
    #    } else {
    dat <- biomaRt::getBM(attributes = attr.list, mart = ens)
    #    }
    if(exists("gn.fn")) { save(dat,file=gn.fn) }
  } 
  if(ens.id) { nm.list <- c(nm.list,"ens.id") }
  #return(dat)
  no.gene.names <- which(paste(dat[[1]])=="")
  #prv(no.gene.names)
  if((one.to.one | (only.named & !ens.id)) & length(no.gene.names)>0) { dat <- dat[-no.gene.names,] }
  if(remap.extra) {
    dat$chromosome_name <- tidy.extra.chr(dat$chromosome_name)
    #dat$chromosome_name[grep("c6",dat$chromosome_name,ignore.case=T)] <- 6  # prevent issues with c6_COX, c6_QBL  
    #dat$chromosome_name[grep("c5",dat$chromosome_name,ignore.case=T)] <- 5  # prevent issues with c5_H2  
    #dat$chromosome_name[grep("NT",dat$chromosome_name,ignore.case=T)] <- "Z_NT"  # merge all NT regions to one label
  }
  if(discard.extra) {
    ## http://www.lrg-sequence.org/ ##
    # note that if remapping is already done, then these won't be discarded unless remapping failed
    tt <- tidy.extra.chr(dat$chromosome_name,select=TRUE)
    badz <- which(!tt)
    #prv(badz)
    if(length(badz)>0) { dat <- dat[-badz,] } # remove LRG, GS, HG, NT, COX, etc annotation from set
  }
  if(bioC) {
    missin <- function(x) { is.na(x) | (x=="") | x=="NA" }
  #  keep <- rep(T,nrow(dat))
    stz <- dat$start_position; enz <- dat$end_position
    whichmis <- missin(stz) | missin(enz)
    if(any(whichmis)) { dat <- dat[!whichmis,]; warning("some start/end positions were missing") }
    stz <- dat$start_position; enz <- dat$end_position
    if(any(stz>enz)) { mm <- stz>enz; prv(cbind(stz,enz)[mm,]); tmp <- stz; stz[mm] <- enz[mm]; enz[mm] <- tmp[mm]; warning("some start positions were after end positions (swapped these)") }
    dat$start_position <- stz; dat$end_position <- enz
#    bad1s <- NULL
#    for (jj in 1:22) { ii <- ga[ga$chr==jj,"end"]>get.chr.lens()[jj]; if(any(ii)) { bad1s <- c(bad1s,which(ga$chr==jj)[ii]) } }
#    if(length(bad1s)>0) { dat <- dat[-bad1s,] ; warning(length(bad1s)," gene positions exceeded chromosome length") }
    outData <- RangedData(ranges=IRanges(start=dat$start_position,end=dat$end_position),
                          space=dat$chromosome_name,gene=dat$hgnc_symbol, band=dat$band) #, universe=build)
    outData <- toGenomeOrder2(outData,strict=T)
    if(duplicate.report | one.to.one) {
      genez <- outData$gene
      dG <- which(duplicated(genez))
      if(!duplicate.report) {
        dup.genes <- genez[dG]
      } else {
        dup.genes <- gene.duplicate.report(outData)
      }
      stz <- start(outData); enz <- end(outData); wdz <- width(outData)
      to.del <- to.ch <- ch.st <- ch.en <- NULL
      n.dup <- length(dup.genes)
      if(one.to.one & n.dup>0) {
        #return(dup.genes)
        indz <- sapply(as.list(unique(dup.genes)),function(X) { which(genez %in% X) })
        # keep the range that is widest
        st.en <- lapply(indz,function(X) { c(stz[X][wdz[X]==max(wdz[X])][1],enz[X][wdz[X]==max(wdz[X])][1]) } )
        to.del <- dG
        to.ch <- sapply(indz,min,na.rm=T)
        ch.st <- sapply(st.en,"[",1) 
        ch.en <- sapply(st.en,"[",2) 
        start(outData)[to.ch] <- 1
        end(outData)[to.ch] <- ch.en 
        start(outData)[to.ch] <- ch.st 
        outData <- outData[-to.del,]
        if(verbose) { warning(cat("kept widest ranges, merging",length(to.del)+length(to.ch),"duplicate gene labels to",length(to.ch),"labels\n")) }
      } else {
        #cat("no dups found")
      }
      # but haven't done anything about them or removed them! 
    }
    if(GRanges) { outData <- as(outData, "GRanges") }
  } else {
    outData <- dat; colnames(outData) <- nm.list
  }
  return(outData)
}



#' Derive Telomere locations across the genome
#' 
#' Returns the locations of telomeres in the human genome, for a given build, as
#' a list by chromosome, text vector, or GRanges/RangedData object.
#' @param dir character, location to store file with the this annotation.
#' If NULL then getOption("save.annot.in.current")>=1 will result in
#' this file being stored in the current directory, or if <=0, then this file will not
#' be stored.
#' @param kb The number of base pairs at the start and end of a chromosome that are defined as
#' belonging to the telomere can be a little arbitrary. This argument allows specification
#' of whatever threshold is required.
#' @param build string, currently 'hg18' or 'hg19' to specify which annotation version to use. 
#'  Default is build-36/hg-18. Will also accept integers 36,37 as alternative arguments.
#' @param bioC logical, whether to return the annotation as a ranged S4 object (GRanges or
#' RangedData), or as a data.frame
#' @param GRanges logical, whether to return a GRanges object, or FALSE to return RangedData
#' @param text logical, whether to return locations as a text vector of the form: chrN:xxxx-xxxx
#' @param autosomes logical, if TRUE, only return results for autosomes, if FALSE, also include
#' X and Y.
#' @param mito.zeros logical, Mitochondria have no telomeres (are circular) but for some purposes you
#' might want zero values in order to match with other annotation that includes all chromosomes and MT.
#' TRUE adds zeros for chrMT, and FALSE excludes chrMT.
#' @export
#' @return Returns a text vector, GRanges or RangedData object, depending on input parameters. Contained
#' will be telomere chromosome and start and end positions.
#' @examples
#' setwd(tempdir())
#' get.telomere.locs()
#' get.telomere.locs(bioC=FALSE)
#' get.telomere.locs(text=TRUE)
get.telomere.locs <- function(dir=NULL,kb=10,build=NULL,bioC=TRUE,GRanges=TRUE,
                              text=FALSE,autosomes=FALSE,mito.zeros=FALSE)
{
  # the actual telomeres are typically about 10kb, but
  # for cnv-QC purposes want to exclude a larger region like 500kb
  # Mt have no telomeres, are circular, but for some purposes might want zero values in there
  if(is.null(dir)) { if(any(getOption("save.annot.in.current")<1)) { dir <- NULL } else { dir <- getwd() } }
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  chr.lens <- get.chr.lens(dir=dir,build=build[1],autosomes=FALSE,mito=mito.zeros)
  n <- 1:22; if(!autosomes) { n <- c(n,"X","Y") } # Mt have no telomeres, are circular
  nchr <- length(n) #default
  if(mito.zeros) { n <- c(n,"M") }
  my.chr.range <- vector("list",length(n))
  names(my.chr.range) <- paste("chr",n,sep="")
  for (cc in 1:nchr) {
    one <- force.chr.pos(Pos=c(1,kb*1000),Chr=cc,build=build) # f..c..pos() makes sure is a valid range
    two <- force.chr.pos(Pos=chr.lens[cc]+c(-kb*1000,0),Chr=cc,build=build)
    my.chr.range[[cc]] <- list(start=c(one[1],two[1]),end=c(one[2],two[2]))
  }
  if(mito.zeros) {
    # add null values for the Mitochondrial chromosome
    cc <- cc+1; one <- c(1,1);  two <- chr.lens[cc]+c(0,0)
    my.chr.range[[cc]] <- list(start=c(one[1],two[1]),end=c(one[2],two[2]))
  }
  reg.dat <- rep("telomere",length(n)*2)
  chrz <- rep(n,each=2)
  nmz <- paste(reg.dat,chrz,rep(c("a","b"),times=length(n)),sep="_")
  stz <- as.vector(sapply(my.chr.range,"[[",1))
  enz <- as.vector(sapply(my.chr.range,"[[",2))
  if(bioC | text) {
    #must.use.package(c("genoset","IRanges"),bioC=T)
    outData <- RangedData(ranges=IRanges(start=stz,end=enz,names=nmz),space=chrz,
                          reg=reg.dat) #,universe=build[1])
    outData <- toGenomeOrder2(outData,strict=T)
    if(text) { outData <- ranged.to.txt(outData) } else { if(GRanges) { outData <- as(outData,"GRanges") } }
  } else {
    outData <- my.chr.range 
  }
  return(outData)
}



#' Get chromosome lengths from build database
#' 
#' Quick and easy way to retrieve human chromosome lengths. Can select from hg18/hg19 (ie, 
#'  build 36/37), or any future builds (hg20, etc) stored in the same location on the build website.
#'  Default is to return lengths for 22 autosomes, but can also retrieve X,Y 
#'  and Mitochondrial DNA lengths by 'autosomes=FALSE' or n=1:25. Even if not connected to 
#'  the internet can retrieve hard coded lengths for hg18 or hg19.
#'
#' @param dir directory to retrieve/download the annotation from/to (defaults to current getwd())
#'  if dir is NULL then will automatically delete the annotation text file from the local directory
#'   after downloading
#' @param build string, currently 'hg17','hg18' or 'hg19' to specify which annotation version to use. 
#'  Default is getOption("ucsc"). Will also accept integers 17,18,19,35,36,37 as alternative arguments.
#' @param autosomes logical, if TRUE, only load the lengths for the 22 autosomes, else load X,Y,[MT] as well
#' @param len.fn optional file name to keep the lengths in
#' @param mito logical, whether to include the length of the mitochondrial DNA (will not include unless
#'  autosomes is also FALSE)
#' @param names logical, whether to name the chromosomes in the resulting vector
#' @param delete.after logical, if TRUE then delete the text file that these lengths were downloaded to. 
#' @param verbose logical, if TRUE display extra information on progress of chromsome retrieval
#'  If FALSE, then the file will be kept, meaning future lookups will be faster, and available offline.
#' @export
#' @examples
#'  setwd(tempdir())
#'  get.chr.lens(delete.after=TRUE) # delete.after simply deletes the downloaded txt file after reading
#'  get.chr.lens(build=35,autosomes=TRUE,delete.after=TRUE) # only for autosomes
#'  get.chr.lens(build="hg19",mito=TRUE,delete.after=TRUE) # include mitochondrial DNA length
get.chr.lens <- function(dir=NULL,build=NULL,autosomes=FALSE,len.fn="humanChrLens.txt",
                         mito=FALSE,names=FALSE, delete.after=FALSE, verbose=FALSE)
{
  # retrieve chromosome lengths from local annotation file, else download from build
  if(is.null(dir)) { if(any(getOption("save.annot.in.current")<1)) { dir <- NULL } else { dir <- getwd() } }
  if(is.null(dir)) { dir <- getwd() ; delete.after <- TRUE }
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  dir <- validate.dir.for(dir,c("ano"),warn=F)
  chrlens.f <- cat.path(dir$ano,len.fn) # existing or future lengths file
  n <- 1:22; if(!autosomes) { n <- c(n,"X","Y","M") }
  hg18.backup <- c(247249719,242951149,199501827,191273063,180857866,170899992,158821424,
                   146274826,140273252,135374737,134452384,132349534,114142980,106368585,
                   100338915,88827254,78774742,76117153,63811651,62435964,46944323,
                   49691432,154913754,57772954,16571)
  hg19.backup <- c(249250621,243199373,198022430,191154276,180915260,171115067,159138663,
                   146364022,141213431,135534747,135006516,133851895,115169878,107349540,
                   102531392,90354753,81195210,78077248,59128983,63025520,48129895,
                   51304566,155270560,59373566,16571)
  hg38.backup <- c(248956422,242193529,198295559,190214555,181538259,170805979,159345973,
                   145138636,138394717,133797422,135086622,133275309,114364328,107043718,
                   101991189,90338345,83257441,80373285,58617616,64444167,46709983,
                   50818468,156040895,57227415,16571)
  # backups for offline use
  if(build=="hg18") { 
    offline.backup <- hg18.backup 
  } else {
    if(build=="hg19") {
      offline.backup <- hg19.backup 
    } else {
      offline.backup <- hg38.backup 
    }
  }
  if(file.exists(chrlens.f))
  {
    # file seems to be in annotation directory already
    chrLens <- readLines(chrlens.f)
    if (length(chrLens)!=length(n))
    {
      #warning("Length of existing chromosome file didn't match expected:",length(n))
      notGot <- T
    } else {
      notGot <- F
      # we have the right length, but do we have the right version?
      if(build=="hg18" & ( length(which(chrLens %in% hg38.backup))>2 | length(which(chrLens %in% hg19.backup))>2) ) { notGot <- T }
      if(build=="hg19" & ( length(which(chrLens %in% hg18.backup))>2 | length(which(chrLens %in% hg38.backup))>2) ) { notGot <- T }
      if(build=="hg38" & ( length(which(chrLens %in% hg18.backup))>2 | length(which(chrLens %in% hg19.backup))>2) ) { notGot <- T }
      names(chrLens) <- paste0("chr",n)
    }
  } else { notGot <- T }
  if (notGot | (!build %in% c("hg18","hg19","hg38"))) {
    #download from build
    if(verbose) { cat("attempting to download chromosome lengths from genome build ... ") }
    urL <- switch(build,
                  hg17="http://hgdownload.cse.ucsc.edu/goldenPath/hg17/database/chromInfo.txt.gz",
                  hg18="http://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/chromInfo.txt.gz",
                  hg19="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz",
                  hg38="http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/chromInfo.txt.gz")
    success <- T
    success <- tryCatch(download.file(urL, chrlens.f,quiet=T),error=function(e) { F } )
    if(!is.logical(success)) { success <- T }
    if(success) {
      if(verbose) {  cat("download successful\n") }
      chrL.f <- readLines(chrlens.f)
      len.lst <- strsplit(chrL.f,"\t")
      nmz <- sapply(len.lst,"[",1)
      lnz <- sapply(len.lst,"[",2)
      nnn <- paste0("chr",n)
      want.chr.names <- match(nnn,nmz)
      want.chr.names <- want.chr.names[!is.na(want.chr.names)]
      #print(want.chr.names)
      chrLens <- lnz[want.chr.names]
      names(chrLens) <- nnn
    } else {
      warning("couldn't reach build website, so have used offline versions of chr lengths")
      if(!build %in% c("hg18","hg19","hg38")) { warning("no offline version for build version:",build) }
      chrLens <- paste(offline.backup)[1:length(n)]; names(chrLens) <- paste0("chr",n)
      #print(n)
      delete.after <- F
    }
    if(length(dir)==1 & dir[1]=="" & delete.after) {
      unlink(chrlens.f)
    } else {
      writeLines(chrLens,con=chrlens.f) # save file for future use
    }
  }
  if(!mito & length(chrLens)>22) { chrLens <- chrLens[-grep("M",n)] }
  if(names) {
    return(chrLens)
  } else {
    return(as.numeric(chrLens))
  }
}



#' Obtain a listing of known T1D associated genomic regions
#'
#' Deprecated as this data is no longer available online
#' This function uses a full list of ichip dense regions combined with a list of t1d
#' SNPs to get the t1d regions. For type 1 diabetes researchers.
#' @param dense.reg GRanges or RangedData object, only use if you need to provide for a
#' build other than 36 or 37 (hg18/hg19).
#' @param build e.g, 36/hg18 or 37/hg19, if left as NULL current getOption('ucsc') will
#' be used.
#' @param invert logical, set to TRUE if you wish to get the set of NON-T1D regions.
#' @return a GRanges object with the specified type 1 diabetes (or inverse) ranges
#' @export
#' @examples
#' \donttest{
#' # t1d.reg <- get.t1d.regions()
#' # non.t1d <- get.t1d.regions(build=36,invert=TRUE)
#' }
get.t1d.regions <- function(dense.reg=NULL,build=NULL,invert=FALSE) {
  #source("~/github/iChip/iFunctions.R")
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  def.build <- getOption("ucsc")
  if(is.null(dense.reg)) { 
    #dense.reg <- reader("~/github/iChip/iChipFineMappingRegionsB36.RData") 
    dense.reg <- humarray::iChipRegionsB36
    if(build!="hg18") {
      if(build=="hg19") { 
        dense.reg <- conv.36.37(dense.reg) 
      } else { 
        if(build=="hg38") {
          dense.reg <- conv.37.38(conv.36.37(dense.reg))
        } else {
          stop("automatic loading for dense regions is only supported for builds 36, 37 or 38") 
        }
      }
    }
  }  
  if(is(dense.reg)[1]=="GRanges") { dense.reg <- as(dense.reg,"RangedData") }
  if(!is(dense.reg)[1]=="RangedData") { stop("dense.reg must be RangedData or GRanges") }
  ichip.regions <- dense.reg
  rs.ids <- get.immunobase.snps()
  locs <- Pos(rs.ids); chrs <- Chr(rs.ids)
  good <- !is.na(locs) & !is.na(chrs)
  locs <- locs[good]; chrs <- chrs[good]
  if(build!=def.build) {
    if(!all(c(build,def.build) %in% c("hg18","hg19"))) {
      stop("if build is not equal to getOption('ucsc') then build and this getOption('ucsc')",
           " parameter must both be equivalent to either hg18 or hg19. To use build 38,
           first set 'options(ucsc=38)'") 
    }
    if(build=="hg18") {
      #prv(chrs,locs)
      locs <- conv.37.36(chr=chrs,pos=locs)[,"start"]
    } else {
      locs <- conv.36.37(chr=chrs,pos=locs)[,"start"]
    }
  }
  t1dgr <- makeGRanges(chr=chrs,pos=locs,build=build)
  #prv(t1dgr,ichip.regions)
  t1d.regions <- suppressWarnings(subsetByOverlaps(set.chr.to.numeric(as(ichip.regions,"GRanges")),t1dgr))
  #  t1dgr <- as(makeGRanges(chr=chrs,pos=locs,build=build),"RangedData")
  #  t1d.regions <- find.overlaps(ichip.regions,ref=t1dgr,thresh=0.000000000001,ranges.out=TRUE)
  #prv(t1d.regions)
  if(invert) { t1d.regions <- invGRanges(t1d.regions,build=build) }
  return(t1d.regions)
}



#' Obtain subset of ranged object overlapping known T1D associated genomic regions
#'
#' Return subset of a ranged object that overlaps ichip dense mapped regions. For type 1 diabetes
#' and autoimmune disesase researchers.
#' @param X GRanges or RangedData object, ranged object for which you want the T1D subset
#' of ranges/SNPs.
#' @param ichip.regions GRanges or RangedData object, only use if you need to provide for a
#' build other than 36 or 37 (hg18/hg19), or for multiple lookups to avoid reloading each time
#' @param T1D.regions GRanges or RangedData object, only use if you need to provide for a
#' build other than 36 or 37 (hg18/hg19), or for multiple lookups to avoid reloading each time.
#' @param build e.g, 36/hg18 or 37/hg19, if left as NULL current getOption('ucsc') will
#' be used.
#' @param T1D.only logical, standard is to return type 1 diabetes (T1D) regions subset, but
#' if this parameter is set to FALSE, will return the subset for all 12 autoimmune diseases
#' mapped by the ImmunoChip consortium. (Cortes and Brown, 2010).
#' @param invert logical, set to TRUE if you wish to get the set of NON-T1D regions, or
#' non-immune dense regions when T1D.only=FALSE.
#' @return a GRanges object with the specified type 1 diabetes/autoimmune (or inverse) ranges
#' @export
#' @examples
#' \donttest{
#' # all.reg <- rranges(10000)
#' # t1d <- get.t1d.subset(all.reg) # T1D regions
#' # non.autoimmune <- get.t1d.subset(T1D.only=FALSE,build=36,invert=TRUE) # non-autoimmune regions
#' }
get.t1d.subset <- function(X,T1D.only=TRUE,build=NULL,ichip.regions=NULL,T1D.regions=NULL,invert=FALSE) {
  #source("~/github/iChip/iFunctions.R")
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  if(is.null(ichip.regions)) {
    #ichip.regions <- reader("~/github/iChip/iChipFineMappingRegionsB36.RData")
    ichip.regions <- humarray::iChipRegionsB36
  }
  if(T1D.only) {
    if(is.null(T1D.regions)) {
      T1D.regions <- get.t1d.regions(ichip.regions,build=build,invert=invert)
    }
    #T1D.regions <- as(T1D.regions,"RangedData")
    filt.sd <- suppressWarnings(subsetByOverlaps(set.chr.to.numeric(as(X,"GRanges")),set.chr.to.numeric(as(T1D.regions,"GRanges"))))
    #filt.sd <- find.overlaps(X,ref=T1D.regions,thresh=0.000000000000001,ranges.out=TRUE)
  } else {
#    filt.sd <- find.overlaps(X,ref=ichip.regions,thresh=0.000000000000001,ranges.out=TRUE)
    if(invert) { ichip.regions <- invGRanges(ichip.regions,build=build) }
    filt.sd <- suppressWarnings(subsetByOverlaps(set.chr.to.numeric(as(X,"GRanges")),set.chr.to.numeric(as(ichip.regions,"GRanges"))))
  }
  return(filt.sd)
}



#' Obtain subset of ranged object overlapping human genes
#'
#' Return subset of a ranged object that overlaps human genes (or custom ranges/exons).
#' A wrapper for subsetByOverlaps that tries to ensure equivalent builds and chromosome
#' labelling are taken care of automatically, and provides the default case of genic subsetting
#' where no explicit ref parameter is required.
#' @param X GRanges or RangedData object, ranged object for which you want the genic subset
#' of ranges/SNPs.
#' @param ref GRanges or RangedData object, only use if you need to provide for a
#' build other than 36 or 37 (hg18/hg19), or to use this function for another reference set,
#' for instance you could provide an object with exons
#' @param build e.g, 36/hg18 or 37/hg19, if left as NULL current getOption('ucsc') will
#' be used.
#' @return a GRanges object with the specified genic (or custom) ranges
#' @seealso \code{\link{get.gene.annot}}, \code{\link{get.exon.annot}}, \code{\link{subsetByOverlaps}}
#' @export
#' @examples
#' \donttest{
#' # all.reg <- rranges(1000) # random set of 1000 regions
#' # genic <- get.genic.subset(all.reg) # gene regions from the random set
#' # exonic <- get.genic.subset(all.reg,ref=get.exon.annot()) # exonic regions from the random set
#' }
get.genic.subset <- function(X,ref=NULL,build=NULL) {
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  if(!is(ref)[1] %in% c("RangedData","GRanges")) {
    if(!is.null(ref)) { warning("ref was not GRanges or RangedData, so reverted to get.gene.annot()") }
    ref <- get.gene.annot(build=build)
  }
  #if(!is.character(DB)) { stop() }
  #if(!DB %in% c("gene","exon"))
  #filt.sd <- find.overlaps(X,db=DB,thresh=0.00000000000001,ranges.out=TRUE,...)
  filt.sd <- suppressWarnings(subsetByOverlaps(set.chr.to.numeric(as(X,"GRanges")),set.chr.to.numeric(as(ref,"GRanges"))))
  return(filt.sd)
}


################## end annotation ##########################



######################
## Ranged Functions ##
######################



#' Wrapper to construct GRanges object from chr,pos or chr,start,end
#' 
#' Slightly simplifies the creation of a GRanges object, allowing flexible input of
#' chr, pos, or chr,start,end, and specification of rownames and the 'genome' parameter
#' for specifying the build/coordinate type, e.g, hg18, build 37, etc. Designed for
#' a simplified GRanges object without metadata, and where the 'strand' data is of
#' no interest, so if strand/metadata is to be used, use the original GRanges() constructor.
#' @param chr character, an optional vector of chromosomes to combine with 'pos' or 'start'+'end'
#' (enter in ...) to describe positions for the GRanges object
#' @param pos integer/numeric, for SNPs, can enter positions just once in 'pos' instead of 
#' entering the same value for start and end
#' @param build character, "hg18" or "hg19" (or 36/37) to show which reference to retrieve. The 
#' default when build is NULL is to use the build from the current ChipInfo annotation
#' @param start integer/numeric, specify the start position of ranges to encode in the new
#' GRanges object, alongside 'end' (do not use 'pos' if using start+end)
#' @param end integer/numeric, specify the end position of ranges to encode in the new
#' GRanges object, alongside 'start' (do not use 'pos' if using start+end)
#' @param row.names character, rownames for the output object, e.g, unique IDs describing the 
#' ranges
#' @param ... further arguments to df.to.GRanges, such as 'fill.missing'
#' @return Returns a GRanges object with the ranges, build and rownames specified. Rownames
#' will be 1:nrow if the 'row.names' parameter is empty. The strand information will default
#' to '+' for all entries, and the metadata will be empty (this function is only for creation
#' of a very basic GRanges object).
#' @export
#' @author Nicholas Cooper \email{[email protected]@cimr.cam.ac.uk}
#' @seealso \code{\link{Chr}}, \code{\link{Pos}}, \code{\link{Pos.gene}}, \code{\link{Band}},
#'  \code{\link{Band.gene}}, \code{\link{Band.pos}}, \code{\link{Gene.pos}}, \code{zlink{df.to.GRanges}}
#' @examples
#' g1 <- makeGRanges(chr=c(1,4,"X"),pos=c(132432,434342,232222))
#' g2 <- makeGRanges(chr=c(22,21,21),start=c(1,1,1),end=c(1000,10000,100000),
#'                                               row.names=c("1K","10K","100K"))
#' g1 ; g2
makeGRanges <- function(chr,pos=NULL,start=NULL,end=NULL,row.names=NULL,build=NULL,...) {
  if(is.null(build)) { build <- getOption("ucsc") }
  build <- ucsc.sanitizer(build)
  if(is.null(start) & is.null(end) & !is.null(pos)) {
    dF <- cbind(paste(chr),round(as.numeric(pos)))
  } else {
    if(!is.null(start) & !is.null(end) & is.null(pos)) {
      pos <- cbind(round(as.numeric(start)),round(as.numeric(end)))
      dF <- cbind(paste(chr),pos) 
    } else {
      stop("must use either 'pos' or 'start' and 'end'")
    }
  }
  #prv(paste(row.names)); prv(dF)
  if(is.character(row.names)) { if(length(row.names)==nrow(dF)) { rownames(dF) <- row.names } else { warning("row.names had an incorrect length")} }
  if(!any(Dim(pos)==length(chr))) { stop("chr and pos must be of the same length") }
  if(length(Dim(pos))>1) { 
    if(!ncol(pos) %in% c(1,2)) { 
      stop("pos must be a vector of SNP locations, or a 2-column object with start and end coordinates") 
    } else {
      if(ncol(pos)==2) {
        if(any(pos[,2]<pos[,1])) { warning("end coordinates should be equal or greater than start coordinates") }
      }
    }
  }
  if(ncol(dF)==3) { 
    colnames(dF) <- c("chr","start","end") } else { colnames(dF) <- c("chr","pos") }
  #return(dF)
  ranged <- df.to.GRanges(dF,start=colnames(dF)[2],end=tail(colnames(dF),1),build=build,...) 
  if(!any(rownames(ranged) %in% row.names)) {
    if(is.character(row.names)){ if(length(row.names)==nrow(ranged)) { rownames(ranged) <- row.names }}
  }
  return(ranged)
}




#' Convert from build 37 to build 36 SNP coordinates
#' 
#' Convert range or SNP coordinates between builds using a chain file. Depending on the chain file
#' this can do any conversion, but the default will use the hg19 to hg18 (37-->36) chain file
#' built into this package. The positions to convert can be entered using using chr, pos vectors,
#'  or a RangedData or GRanges object. This function is a wrapper for liftOver() from rtracklayer,
#' providing more control of input and output and 'defensive' preservation of order and length
#' of the output versus the input ranges/SNPs. 
#' @param chr character, an optional vector of chromosomes to combine with 'pos' to describe
#'  positions to convert from build hg19 to hg18
#' @param pos integer, an optional vector of chromosome positions (for SNPs), no need to enter
#' a ranges object if this is provided along with 'chr'
#' @param ranges optional GRanges or RangedData object describing positions for which conversion
#' should be performed. No need to enter chr, pos if using ranges
#' @param ids if the ranges have ids (e.g, SNP ids, CNV ids), then by including this parameter
#' when using chr, pos input, the output object will have these ids as rownames. For ranges input
#' these ids would already be in the rownames of the GRanges or RangedData object, so use of
#' this parameter should be unnecessary
#' @param ... additional arguments to makeGRanges(), so in other words, can use 'start' and
#' 'end' to specify ranges instead of 'pos'.
#' @return Returns positions converted from build 37 to 36. If using the 'ranges' parameter 
#' for position input, the object returned will be of the same format. If using chr and pos 
#' to input, then the object returned will be a data.frame with columns, chr and pos with 
#' rownames 'ids'. Output will be the same length as the input, which is not necessarily the
#'  case for liftOver() which does the core part of this conversion. Using vector or GRanges 
#'  input will give a resulting data.frame or GRanges object respectively that has the same
#'  order of rownames as the original input. Using RangedData will result in an output that
#'   is sorted by genome order, regardless of the original order.
#' @export
#' @author Nicholas Cooper \email{[email protected]@cimr.cam.ac.uk}
#' @seealso \code{\link{conv.36.37}}, \code{\link{conv.37.38}}, 
#' \code{\link{conv.38.37}}, \code{\link{convTo37}}, \code{\link{convTo36}}
#' @examples
#' \donttest{
#' gene.labs <- c("CTLA4","IL2RA","HLA-C")
#' pp <- Pos.gene(gene.labs,build=37)
#' gg <- GRanges(ranges=IRanges(start=pp$start,end=pp$end),seqnames=pp$chr)
#' conv.37.36(gg) # order of output is preserved   ### HERE!!! ###
#' rr <- as(gg,"RangedData")
#' conv.37.36(rr) # note the result is same as GRanges, but in genome order
#' }
conv.37.36 <- function(ranges=NULL,chr=NULL,pos=NULL,...,ids=NULL) {
  #chain.file <- "~/github/iChip/hg19ToHg18.over.chain"
  #chain.file <- system.file("extdata", "hg19ToHg18.over.chain", package="humarray")
  chain.file <- humarray::hg19ToHg18
  return(conv.36.37(ranges=ranges,chr=chr,pos=pos,...,ids=ids,chain.file=chain.file))
}

#' Convert from build 38 to build 37 SNP coordinates
#' 
#' Convert range or SNP coordinates between builds using a chain file. Depending on the chain file
#' this can do any conversion, but the default will use the hg38 to hg19 (38-->37) chain file
#' built into this package. The positions to convert can be entered using using chr, pos vectors,
#'  or a RangedData or GRanges object. This function is a wrapper for liftOver() from rtracklayer,
#' providing more control of input and output and 'defensive' preservation of order and length
#' of the output versus the input ranges/SNPs. 
#' @param chr character, an optional vector of chromosomes to combine with 'pos' to describe
#'  positions to convert from build hg38 to hg19
#' @param pos integer, an optional vector of chromosome positions (for SNPs), no need to enter
#' a ranges object if this is provided along with 'chr'
#' @param ranges optional GRanges or RangedData object describing positions for which conversion
#' should be performed. No need to enter chr, pos if using ranges
#' @param ids if the ranges have ids (e.g, SNP ids, CNV ids), then by including this parameter
#' when using chr, pos input, the output object will have these ids as rownames. For ranges input
#' these ids would already be in the rownames of the GRanges or RangedData object, so use of
#' this parameter should be unnecessary
#' @param ... additional arguments to makeGRanges(), so in other words, can use 'start' and
#' 'end' to specify ranges instead of 'pos'.
#' @return Returns positions converted from build 38 to 37. If using the 'ranges' parameter 
#' for position input, the object returned will be of the same format. If using chr and pos 
#' to input, then the object returned will be a data.frame with columns, chr and pos with 
#' rownames 'ids'. Output will be the same length as the input, which is not necessarily the
#'  case for liftOver() which does the core part of this conversion. Using vector or GRanges 
#'  input will give a resulting data.frame or GRanges object respectively that has the same
#'  order of rownames as the original input. Using RangedData will result in an output that
#'   is sorted by genome order, regardless of the original order.
#' @export
#' @author Nicholas Cooper \email{[email protected]@cimr.cam.ac.uk}
#' @seealso \code{\link{conv.36.37}}, \code{\link{conv.37.36}}, 
#' \code{\link{conv.37.38}}, \code{\link{convTo37}}, \code{\link{convTo36}}
#' @examples
#' \donttest{
#' gene.labs <- c("CTLA4","IL2RA","HLA-C")
#' pp <- Pos.gene(gene.labs,build=38)
#' gg <- GRanges(ranges=IRanges(start=pp$start,end=pp$end),seqnames=pp$chr)
#' conv.38.37(gg) # order of output is preserved   ### HERE!!! ###
#' rr <- as(gg,"RangedData")
#' conv.38.37(rr) # note the result is same as GRanges, but in genome order
#' }
conv.38.37 <- function(ranges=NULL,chr=NULL,pos=NULL,...,ids=NULL) {
  #chain.file <- "~/github/iChip/hg38ToHg19.over.chain"
  #chain.file <- humarray::hg38ToHg19.over.chain
  #chain.file <- system.file("extdata", "hg38ToHg19.over.chain", package="humarray")
  chain.file <- humarray::hg38ToHg19
  return(conv.36.37(ranges=ranges,chr=chr,pos=pos,...,ids=ids,chain.file=chain.file))
}


#' Convert from build 37 to build 38 SNP coordinates
#' 
#' Convert range or SNP coordinates between builds using a chain file. Depending on the chain file
#' this can do any conversion, but the default will use the hg19 to hg38 (37-->38) chain file
#' built into this package. The positions to convert can be entered using using chr, pos vectors,
#'  or a RangedData or GRanges object. This function is a wrapper for liftOver() from rtracklayer,
#' providing more control of input and output and 'defensive' preservation of order and length
#' of the output versus the input ranges/SNPs. 
#' @param chr character, an optional vector of chromosomes to combine with 'pos' to describe
#'  positions to convert from build hg19 to hg38
#' @param pos integer, an optional vector of chromosome positions (for SNPs), no need to enter
#' a ranges object if this is provided along with 'chr'
#' @param ranges optional GRanges or RangedData object describing positions for which conversion
#' should be performed. No need to enter chr, pos if using ranges
#' @param ids if the ranges have ids (e.g, SNP ids, CNV ids), then by including this parameter
#' when using chr, pos input, the output object will have these ids as rownames. For ranges input
#' these ids would already be in the rownames of the GRanges or RangedData object, so use of
#' this parameter should be unnecessary
#' @param ... additional arguments to makeGRanges(), so in other words, can use 'start' and
#' 'end' to specify ranges instead of 'pos'.
#' @return Returns positions converted from build 37 to 38. If using the 'ranges' parameter 
#' for position input, the object returned will be of the same format. If using chr and pos 
#' to input, then the object returned will be a data.frame with columns, chr and pos with 
#' rownames 'ids'. Output will be the same length as the input, which is not necessarily the
#'  case for liftOver() which does the core part of this conversion. Using vector or GRanges 
#'  input will give a resulting data.frame or GRanges object respectively that has the same
#'  order of rownames as the original input. Using RangedData will result in an output that
#'   is sorted by genome order, regardless of the original order.
#' @export
#' @author Nicholas Cooper \email{[email protected]@cimr.cam.ac.uk}
#' @seealso \code{\link{conv.36.37}}, \code{\link{conv.37.36}}, 
#' \code{\link{conv.37.38}}, \code{\link{convTo37}}, \code{\link{convTo36}}
#' @examples
#' \donttest{
#' gene.labs <- c("CTLA4","IL2RA","HLA-C")
#' pp <- Pos.gene(gene.labs,build=37)
#' gg <- GRanges(ranges=IRanges(start=pp$start,end=pp$end),seqnames=pp$chr)
#' conv.37.38(gg) # order of output is preserved   ### HERE!!! ###
#' rr <- as(gg,"RangedData")
#' conv.37.38(rr) # note the result is same as GRanges, but in genome order
#' }
conv.37.38 <- function(ranges=NULL,chr=NULL,pos=NULL,...,ids=NULL) {
  #chain.file <- "~/github/iChip/hg19ToHg38.over.chain"
  #chain.file <- humarray::hg19ToHg38.over.chain
  #chain.file <- system.file("extdata", "hg19ToHg38.over.chain", package="humarray")
  chain.file <- humarray::hg19ToHg38
  return(conv.36.37(ranges=ranges,chr=chr,pos=pos,...,ids=ids,chain.file=chain.file))
}



#' Convert from build 36 to build 37 SNP coordinates
#' 
#' Convert range or SNP coordinates between builds using a chain file. Depending on the chain file
#' this can do any conversion, but the default will use the hg18 to hg19 (36-->37) chain file
#' built into this package. The positions to convert can be entered using using chr, pos vectors,
#'  or a RangedData or GRanges object. This function is a wrapper for liftOver() from rtracklayer,
#' providing more control of input and output and 'defensive' preservation of order and length
#' of the output versus the input ranges/SNPs. 
#' @param chr character, an optional vector of chromosomes to combine with 'pos' to describe
#'  positions to convert to an alternative build
#' @param pos integer, an optional vector of chromosome positions (for SNPs), no need to enter
#' a ranges object if this is provided along with 'chr'
#' @param ranges optional GRanges or RangedData object describing positions for which conversion
#' should be performed. No need to enter chr, pos if using ranges
#' @param ids if the ranges have ids (e.g, SNP ids, CNV ids), then by including this parameter
#' when using chr, pos input, the output object will have these ids as rownames. For ranges input
#' these ids would already be in the rownames of the GRanges or RangedData object, so use of
#' this parameter should be unnecessary
#' @param chain.file character, a file location for the liftOver chain file to use for the
#' conversion. If this argument is left NULL the default UCSC file that converts from hg18
#' to hg19 will be used. Can also use a 'Chain' object from rtracklayer created using
#' import.chain(). Alternate chain files for other conversions are available from
#' http://crossmap.sourceforge.net/, and you could also customize these or create your own.
#' So this function can be used for conversion between any in-out build combination, using
#' this argument, not just 36--37.
#' @param include.cols logical, whether to include any extra columns (e.g, in addition to positional
#' information) in the output object.
#' @param ... additional arguments to makeGRanges(), so in other words, can use 'start' and
#' 'end' to specify ranges instead of 'pos'.
#' @return Returns positions converted from build 36 to 37 (or equivalent for alternative chain 
#' files). If using the 'ranges' parameter for position input, the object returned will be of
#' the same format. If using chr and pos to input, then the object returned will be a data.frame
#' with columns, chr and pos with rownames 'ids'. Output will be the same length as the input,
#' which is not necessarily the case for liftOver() which does the core part of this conversion.
#' Using vector or GRanges input will give a resulting data.frame or GRanges object respectively
#' that has the same order of rownames as the original input. Using RangedData will result in an
#' output that is sorted by genome order, regardless of the original order. If ranges has no
#' rownames, or if 'ids' is blank when using chr, pos, ids of the form rngXXXX will be generated
#' in order to preserve the original ordering of locations.
#' @export
#' @author Nicholas Cooper \email{[email protected]@cimr.cam.ac.uk}
#' @seealso \code{\link{conv.37.36}}, \code{\link{conv.37.38}},
#'  \code{\link{conv.38.37}}, \code{\link{convTo37}}, \code{\link{convTo36}}
#' @references http://crossmap.sourceforge.net/
#' @examples
#' \donttest{
#' # various chain files downloadable from http://crossmap.sourceforge.net/ #
#' options(ucsc="hg18")
#' gene.labs <- c("CTLA4","IL2RA","HLA-C")
#' snp.ids <- c("rs3842724","rs9729550","rs1815606","rs114582555","rs1240708","rs6603785")
#' pp <- Pos(snp.ids); cc <- Chr(snp.ids)
#' conv.36.37(chr=cc,pos=pp,ids=snp.ids)
#' pp <- Pos(gene.labs)
#' gg <- GRanges(ranges=IRanges(start=pp$start,end=pp$end),seqnames=pp$chr)
#' conv.36.37(gg) # order of output is preserved
#' rr <- as(gg,"RangedData")
#' conv.36.37(rr) # note the result is same as GRanges, but in genome order
#' }
conv.36.37 <- function(ranges=NULL,chr=NULL,pos=NULL,...,ids=NULL,chain.file=NULL,include.cols=TRUE) {
 # require(rtracklayer); #require(genoset) #require(GenomicRanges); 
  if(!is.character(chain.file)) {
    #chain.file <- humarray::hg18ToHg19.over.chain 
    if(is(chain.file)[1]=="Chain") { 
      chn <- chain.file 
    } else {
      #chain.file <- system.file("extdata", "hg18ToHg19.over.chain", package="humarray")
      chn <- humarray::hg18ToHg19
    }
  } else {
    if(!file.exists(chain.file)) { stop("couldn't find chain file: ",chain.file) }
    chn <- import.chain(chain.file)
  }
  #toranged <- F
  outType <- is(ranges)[1]
  inr <- is.null(ranges)
  used.st.en <- all(c("start","end") %in% names(list(...)))
  if(!is.null(chr) & (!is.null(pos) | used.st.en)) {
    if(is.null(pos) & length(chr)==1) {
      if(length(list(...)$start)>1) {
        warning("when using start/end, 'chr' must have the same length as 'start'") 
      }
    }
    if(is.null(ids)) { ids <- paste0("rng",1:(max(length(chr),length(pos)))) } 
    ranges <- makeGRanges(chr=chr,pos=pos,row.names=ids,...)
    orn <- ids
  } else {
  	#prv(ranges)
    if(is.null(rownames(ranges))) { rownames(ranges) <- paste0("rng",1:nrow(ranges)) }    
    orn <- rownames(ranges)
  }
  # return(ranges)
  if(is(ranges)[1]=="RangedData") { ranges <- as(ranges, "GRanges") }
  if(is(ranges)[1] %in% c("RangedData","GRanges")) {
    wd <- width(ranges)
    if(length(which(wd==1))/length(wd)>.95) { SNPs <- TRUE } else { SNPs <- FALSE }
    mcols(ranges)[["XMYINDEXX"]] <- rownames(ranges)
    mcols(ranges)[["XMYCHRXX"]] <- ocr <- chrm(ranges)
    if(include.cols) { 
      meta.data <- mcols(ranges); if(is.null(dim(meta.data))) { meta.data <- as.data.frame(meta.data) }
      rownames(meta.data) <- rownames(ranges)
      oo <- (colnames(meta.data) %in% c("XMYINDEXX","XMYCHRXX"))
      if(length(oo)>0) { meta.data <- meta.data[,-oo,drop=F] }
    }
    #prv(orn,ocr)
    opos <- start(ranges)
    ranges <- set.chr.to.char(ranges)
    #print(head(ranged))
    ranged.gr <- ranges # as(ranges,"GRanges"); #toranged <- T
  } else {
    stop("input specified resulted in an invalid GRanges/RangedData 'ranged' object, type ",is(ranges)[1]) 
  } 
  # change CHR-XY to CHR-X prior to liftOver, then change back #
#  xy.ind <- grep("XY",seqnames(ranged.gr))
  xy.ind <- grep("XY",as.character(seqnames(ranged.gr)))
  if(length(xy.ind)>0) {
    found.xy <- TRUE
    xy.id <- rownames(ranged.gr)[xy.ind]
    if(!"chrX" %in% seqlevels(ranged.gr)) { seqlevels(ranged.gr) <- c(seqlevels(ranged.gr),"chrX") }
    seqnames(ranged.gr)[xy.ind] <- "chrX"
  } else { found.xy <- FALSE }
  ranged.gr.37 <- liftOver(ranged.gr,chn)
  myfun <- function(x) { 
    data.frame(start=minna(start(x)),end=maxna(end(x))) 
  }
  if(!SNPs ) {
    new.coords.df <- do.call("rbind",lapply(ranged.gr.37,myfun))
    ranged.gr.37 <- ranged.gr
    if(!used.st.en & inr) { stop("need start and end arguments unless the dataset is all SNPs (width=1)" ) }
    ranges(ranged.gr.37) <- with(new.coords.df,IRanges(start=start,end=end))
    #seqlevels(ranged.gr.37) <- gsub("chr","",seqlevels(ranged.gr.37))
    seqlevels(ranged.gr.37) <- gsub("chr","",seqlevels(ranged.gr.37))
    out <- ranged.gr.37
  } else {
    seqlevels(ranged.gr.37)<-gsub("chr","",seqlevels(ranged.gr.37))
    #seqnames(ranged.gr.37)<-gsub("chr","",seqnames(ranged.gr.37))
    out <- as(ranged.gr.37,"IRangesList")
    #seqlevels(ranged.gr.37)<-gsub("chr","",seqlevels(ranged.gr.37))
    #new.coords.df <- as.data.frame(ranged.gr.37)
  }
  # seqlevels(ranged.gr.37)<-gsub("chr","",seqlevels(ranged.gr.37))
  # out <- as(ranged.gr.37,"IRangesList")
  out <- as(out,"RangedData")
  #return(ranged.gr.37)
  #ranged.gr.37 <- set.chr.to.numeric(ranged.gr.37)
  #if(!toranged | T) { return(ranged.gr.37) }
  ranged.gr.37 <- out #toGenomeOrder2(out)
  #return(ranged.gr.37)
  if(all(c("XMYINDEXX","XMYCHRXX") %in% colnames(ranged.gr.37))) {
    RN <- ranged.gr.37[["XMYINDEXX"]]
    nr <- nrow(ranged.gr.37)
    MAXDISPLAY <- 50
    if(length(orn)>length(RN)) { 
      cat("conversion failed for",length(orn[!orn %in% RN]),"rows, original positions kept:\n") ;  
      failz <- orn[!orn %in% RN]
      cat(comma(head(failz,MAXDISPLAY))) 
      if(length(failz)>MAXDISPLAY) { cat(", ... and",length(failz)-MAXDISPLAY,"more\n")  } else { cat("\n") }
      ln <- orn[!orn %in% RN]
      #return(ranges)
      newchr <- gsub("chr","",chrm(ranges[match(ln,ranges$XMYINDEXX),]))
      noopos <- start(ranges[match(ln,ranges$XMYINDEXX),])
      hcc <- hard.coded.conv()
      ifNAthen0 <- function(X) { X[is.na(X)] <- 0; return(X) }
      h36 <- which(noopos %in% hcc$pos36 & newchr==ifNAthen0(hcc$chr[match(noopos,hcc$pos36)])); l36 <- length(h36)
      h37 <- which(noopos %in% hcc$pos37 & newchr==ifNAthen0(hcc$chr[match(noopos,hcc$pos37)])); l37 <- length(h37)
      if(l36>=l37 & l36>0) {
        noopos[h36] <- hcc$pos37[match(noopos[h36],hcc$pos36)]
        cat("found",l36,"of the missing SNP hg18-hg19 lookups in an internal table\n")