R/transutils.R
In GGtools: software and data for analyses in genetics of gene expression

Documented in geneIndcol geneNames locusNames nthScores topGenes topScores transeqByChrom transeqByCluster

trimSnps = function( listOfSms, rsidlist ) {
  ans = list()
  #cat("making smlSets with common SNPs: \n")
  for (j in 1:length(listOfSms)) {
  #  cat("smlSet", j, "\nchrom ")
    tmp = listOfSms[[j]]
    for (k in 1:length(rsidlist)) {
  #    cat(k)
      tmp@smlEnv$smList[[k]] = listOfSms[[j]]@smlEnv$smList[[k]][, rsidlist[[k]]]
      }
    ans[[j]] = tmp
  #  cat("\n")
    }
  names(ans) = names(listOfSms)
  #cat("done.\n")
  gc()
  ans
}

intersectSnps = function( listOfSms ) {
  nsms = length(listOfSms)
  nchr = length(smList(listOfSms[[1]]))
  chnames = names(smList(listOfSms[[1]]))
  rsidlist = lapply(smList(listOfSms[[1]]), colnames)
  if (length(listOfSms) == 1) return(rsidlist)
  for (j in 2:nsms)
    for (k in 1:length(rsidlist))
      rsidlist[[k]]  = intersect(rsidlist[[k]], colnames(smList(listOfSms[[j]])[[k]]))
  rsidlist
}

makeCommonSNPs = function( listOfSms ) {
  rsidlist = intersectSnps( listOfSms )
  trimSnps( listOfSms, rsidlist )
}


topKfeats = function(mgr, K, fn="inds1.ff", batchsize=200,
   feat = c("score", "ind", "geneind"), ffind=1, ginds ) {
#
# given an eqtlTestsManager instance, presumably generated on one
# chromosome worth of SNP (thus ffind=1 by default), find the
# top K features per SNP in an nsnp x K ff matrix
# 
# to keep things simple, we have separate treatments of values (feat == "score") and
# names of features (feat == "geneind"), where the latter are assumed to be integer
# indices into a vector of probe names
#
# it would of course be very nice to compute the order once and apply it to both
# the scores and the names simultaneously.  i am not sure it will be beneficial
#
     intests = mgr@fffile
     #thevmode = "short"
     #if (feat == "ind" | feat == "geneind") thevmode = "integer"
     if (feat == "score") op = function(x)sort(x, decreasing=TRUE)[1:K]
#     else if (feat == "ind") op = function(x)order(x, decreasing=TRUE)[1:K]
     else if (feat == "geneind") op = function(x)ginds[
                                        order(x,decreasing=TRUE)[1:K]]
     else stop("feat not recognized")
     tmp = ffrowapply(
       t(apply(intests[i1:i2,,drop=FALSE],1,op)),
       X=intests, RETURN=TRUE, RETCOL=K,
       BATCHSIZE=batchsize, VMODE="integer")
     ff(tmp, filename=fn, dim=c(nrow(intests),K), overwrite=TRUE,
       vmode="integer")
       }

.updateKfeats = function( sco1, sco2, ind1, ind2, batchsize=500 ) {
#
# will overwrite sco1, ind1 with the improvements available in sco2, ind2
#
   snchunk = chunk(1, nrow(sco1), by=batchsize)
   K=ncol(sco1)
   for (i in 1:length(snchunk)) {
      i1 = snchunk[[i]][1]
      i2 = snchunk[[i]][2]
      scos = cbind(sco1[i1:i2,,drop=FALSE], sco2[i1:i2,,drop=FALSE])
      ginds = cbind(ind1[i1:i2,,drop=FALSE], ind2[i1:i2,,drop=FALSE])
      rowwiseExtract = function(x,y) t(sapply(1:nrow(x), function(row) x[row,][y[row,]]))
      chind = t(apply(scos, 1, function(x)order(x,decreasing=TRUE)[1:K]))
      sco1[i1:i2,] = rowwiseExtract( scos, chind )
      ind1[i1:i2,] = rowwiseExtract( ginds, chind )
      }
   invisible(NULL)
}


cisZero = function(mgr, snpRanges, geneRanges, radius) {
#
# this function addresses the problem of excluding same-chromosome cis tests
# up to a certain radius around each gene
#
# given a manager, all tests for SNP within radius of each gene are set to zero
#
        dimnt = dimnames(mgr@fffile)
        oks = gsub("rs", "", dimnt[[1]])
        okg = dimnt[[2]]
        srids = values(snpRanges)$RefSNP_id
        if (is.null(srids)) {# perhaps naming convention used
             srids = names(snpRanges)
             if (length(grep("rs", srids))>0) srids = gsub("rs", "", srids)
             }
        if (length(srids) > 0) warning("had to take snp names from names(snpRanges)")
        if (!isTRUE(all(okg %in% names(geneRanges))))  {
            warning("geneRanges does not include names/ranges for all colnames of mgr fffile")
            okg = intersect( okg, names(geneRanges))
            }
        if (!isTRUE(all(oks %in% srids)))  {
            warning("snpRanges does not include names/ranges for all rownames of mgr fffile")
 # match and numeric indexing much more efficient than names for large objects
            oks = match(oks, srids, nomatch=0)
            if (length(oks) == 0) stop("snpRanges does not include any snps in mgr")
            }
 # can't use zero indices in subsetting GRanges...
        ol = findOverlaps(snpRanges[oks[oks>0]], geneRanges[okg] + 
            radius)
        matm = as.matrix(ol)
        if (nrow(matm) > 0) {
            mgr@fffile[matm] = 0
        }
    }

transScores.legacy = function (smpack, snpchr = "chr1", rhs, K = 20, targdirpref = "tsco", 
    geneApply = lapply, chrnames = paste("chr", as.character(1:22), sep=""), 
    geneRanges = NULL, snpRanges = NULL, radius = 2e+06, renameChrs=NULL, 
    probesToKeep=NULL, batchsize=200, genegran=50, shortfac=10, wrapperEndo=NULL,
    geneannopk = "illuminaHumanv1.db", snpannopk = snplocsDefault(),
    gchrpref = "", schrpref="ch", exFilter=function(x)x, 
    smFilter=function(x)x, SSgen=GGBase::getSS)
{
  stopifnot(length(snpchr)==1)

#getCisMap = function( radius=50000, gchr="20",
#  schr="ch20", geneannopk="illuminaHumanv1.db",
#     snpannopk=snplocsDefault(), as.GRangesList=FALSE ) {

#
# objective is a small-footprint accumulation of trans-eQTL tests
#  smpack is a lightweight smlSet package name
#  snpchr is the chromosome for which SNPs will be tested
#  rhs is the right hand side of formula for snp.rhs.tests in snpTests
#  K is the number of best features to be retained (per SNP) as we explore the transcriptome
#  targdirpref is used to define the eqtlTests targdir setting for transient and persistent ff files
#  chrnames is used to enumerate chromosomes for which probes will be identified using the
#    annotation package named in the smlSet -- the "chr" will be stripped
#  geneRanges and snpRanges are used to identify cis tests (relative to radius), which are set to zero
#  renameChrs is available in case the smList elements of the smlSet need to be renamed
#  probesToKeep is used to define a hard restriction on the expression data in the smlSet
#  batchsize is for applications of ffrowapply
#  genegran is for reporting as eqtlTests runs, if options()$verbose is TRUE
#
    #if (length(chrnames) < 2) 
    #    stop("must have length(chrnames) >= 2")
    theCall = match.call()
#
# get an image of the expression+genotype data for SNP on specific chromosome snpchr
#
    sms = SSgen(smpack, snpchr, renameChrs=renameChrs, probesToKeep=probesToKeep, 
       wrapperEndo=wrapperEndo, exFilter=exFilter)
    sms = smFilter(sms)
    if (!is.null(renameChrs)) snpchr=renameChrs
    guniv = featureNames(sms)   # universe of probes
    smanno = gsub(".db", "", annotation(sms))
    require(paste(smanno, ".db", sep = ""), character.only = TRUE)
    clcnames = gsub("chr", "", chrnames)  # typical chrom nomenclature of bioconductor
    pnameList = mget(clcnames, revmap(get(paste(smanno, "CHR", 
        sep = ""))))
 # be sure to use only genes that are on arrays in sms
    pnameList = lapply(pnameList, function(x) intersect(x, guniv))
    names(pnameList) = chrnames  # now the universe of probes is split into chromsomes
    todrop = which(sapply(pnameList, length)==0)
    if (length(todrop)>0) pnameList = pnameList[-todrop]
    genemap = lapply(pnameList, function(x) match(x, guniv))  # numerical indices for probes
    nchr_genes = length(names(pnameList))
    targdir = paste(targdirpref, snpchr, sep="")
#
# start with first element of chrnames vector
#
    inimgr = eqtlTests(sms[probeId(pnameList[[chrnames[1]]]),   # start the sifting through transcriptome
        ], rhs, targdir = targdir, runname = paste("tsc_", chrnames[1],  # testing on genes in chrom 1
        sep = ""), geneApply = geneApply, shortfac=shortfac)
    if (gsub("chr", "", snpchr) == gsub("chr", "", chrnames[1])) {
        mapobj = getCisMap( radius = radius, gchr = paste(gchrpref, chrnames[1], sep=""),
                  schr = paste(schrpref, gsub("chr", "", snpchr), sep=""), geneannopk=geneannopk, snpannopk = snpannopk )
        cisZero(inimgr, mapobj@snplocs, mapobj@generanges, radius=0)   # if SNP are on chrom 1, exclude cis
                             # the gene ranges supplied are already augmented by radius
    }
    topKinds = topKfeats(inimgr, K = K, fn = paste(targdir, "/",  # sort and save
        snpchr, "_tsinds1_1.ff", sep = ""), feat = "geneind", 
        ginds = genemap[[1]], batchsize=batchsize)
    topKscores = topKfeats(inimgr, K = K, fn = paste(targdir, 
        "/", snpchr, "_tssco1_1.ff", sep = ""), feat = "score", 
        ginds = genemap[[1]], batchsize=batchsize)
    kpsnpnames = rownames(inimgr@fffile)
    unlink(filename(inimgr@fffile))
    if (nchr_genes > 1) for (j in 2:nchr_genes) {    # continue sifting through transcriptome
        cat(j)
        gc()
        nxtmgr = eqtlTests(sms[probeId(pnameList[[chrnames[j]]]), 
            ], rhs, targdir = targdir, runname = paste("tsctmp", 
            j, sep = ""), geneApply = geneApply, 
            shortfac=shortfac)
        if (gsub("chr", "", snpchr) == gsub("chr", "", chrnames[j])) {
            mapobj = getCisMap( radius = radius, gchr = paste(gchrpref, chrnames[j], sep=""),
                  schr = paste(schrpref, gsub("chr", "", snpchr), sep=""), geneannopk=geneannopk, snpannopk = snpannopk )
#
# huge bug below, nxtmgr was previously inimgr ... found Aug 21 2012
#
            cisZero(nxtmgr, mapobj@snplocs, mapobj@generanges, radius=0)   # if SNP are on chrom 1, exclude cis
                             # the gene ranges supplied are already augmented by radius
        }
        nxtKinds = topKfeats(nxtmgr, K = K, fn = paste(targdir, 
            "indscratch.ff", sep = ""), feat = "geneind", ginds = genemap[[j]], 		batchsize=batchsize)
        nxtKscores = topKfeats(nxtmgr, K = K, fn = paste(targdir, 
            "scoscratch.ff", sep = ""), feat = "score", ginds = genemap[[j]],
                batchsize=batchsize)
        updateKfeats(topKscores, nxtKscores, topKinds, nxtKinds, batchsize=batchsize)  
        unlink(filename(nxtmgr@fffile))   # kill off scratch materials
        unlink(paste(targdir, "indscratch.ff", sep = ""))
        unlink(paste(targdir, "scoscratch.ff", sep = ""))
    }
    baseout = list(scores = topKscores, inds = topKinds, guniv = guniv, K=K, snpchr=snpchr,
        chrnames=chrnames,
	smsanno = annotation(sms),
# previous to 21 aug 2012 snpnames were assigned here from rownames inimgr@fffile; these are now
# memoed above
        snpnames = kpsnpnames, call = theCall, date=date(), shortfac=shortfac)
    new("transManager", base=baseout)
}

updateKfeats = function( sco1, sco2, ind1, ind2, batchsize=200 ) {
#
# will overwrite sco1, ind1 with the improvements available in sco2, ind2
#  this version reduces reliance on ff for sorting tasks
#
   snchunk = chunk(1, nrow(sco1), by=batchsize)
   K=ncol(sco1)
      rsco1 = as.ram(sco1)
      rsco2 = as.ram(sco2)
      rind1 = as.ram(ind1)
      rind2 = as.ram(ind2)
   for (i in 1:length(snchunk)) {
      i1 = snchunk[[i]][1]
      i2 = snchunk[[i]][2]
      scos = cbind(rsco1[i1:i2,,drop=FALSE], rsco2[i1:i2,,drop=FALSE])
      ginds = cbind(rind1[i1:i2,,drop=FALSE], rind2[i1:i2,,drop=FALSE])
      rowwiseExtract = function(x,y) t(sapply(1:nrow(x), function(row) x[row,][y[row,]]))
      chind = t(apply(scos, 1, function(x)order(x,decreasing=TRUE)[1:K]))
      sco1[i1:i2,] = rowwiseExtract( scos, chind )
      ind1[i1:i2,] = rowwiseExtract( ginds, chind )
      }
   rm(rsco1)
   rm(rsco2)
   rm(rind1)
   rm(rind2)
   gc()
   invisible(NULL)
}


bindSnpRanges2mgr = function (mgr, snpRanges, badstart=0L, badend=1e9L) # longer than longest chr
{
#
# this will get a GRanges instance from snpRanges congruent to mgr snp info
# aug 20 2012
# when a SNP is not given location in snpRanges, it will be assumed cis to any gene on same chr
# as it will be given a long interval for findOverlaps
#
    dimnt = dimnames(mgr@fffile)
    snpsInMgr = dimnt[[1]]
    if (!is.null(values(snpRanges)$RefSNP_id)) {
        snpsInMgr = gsub("rs", "", snpsInMgr)
        names(snpRanges) = as.character(values(snpRanges)$RefSNP_id)
    }
    if (is.null(names(snpRanges)))
        stop("need names on snpRanges instances if not a SNPlocs.Hsapiens.* derivative")
    fullsr = IRanges(rep(badstart, length(snpsInMgr)), rep(badend, length(snpsInMgr)))
    sn = seqnames(snpRanges)[1]
    fullsr = GRanges(seqnames = sn, fullsr)
    names(fullsr) = snpsInMgr
    matup = match(names(fullsr), names(snpRanges), nomatch = 0)
    IRanges::end(fullsr[which(matup > 0)]) = IRanges::end(snpRanges[matup[matup>0]])
    IRanges::start(fullsr[which(matup > 0)]) = IRanges::start(snpRanges[matup[matup>0]])
    fullsr
}

bindGeneRanges2mgr = function (mgr, geneRanges, badstart=-6, badend=-5)
{
#
# this will get a GRanges instance from geneRanges congruent to mgr gene info
#
    dimnt = dimnames(mgr@fffile)
    genesInMgr = dimnt[[2]]
    fullgr = IRanges(rep(badstart, length(genesInMgr)), rep(badend, length(genesInMgr)))
    sn = seqnames(geneRanges)[1]
    fullgr = GRanges(seqnames = sn, fullgr)
    names(fullgr) = genesInMgr
    matup = match(names(fullgr), names(geneRanges), nomatch = 0)
    IRanges::end(fullgr[which(matup > 0)]) = IRanges::end(geneRanges[matup[matup>0]])
    IRanges::start(fullgr[which(matup > 0)]) = IRanges::start(geneRanges[matup[matup>0]])
    fullgr
}


cisZero = function (mgr, snpRanges, geneRanges, radius) 
{
#
# use geneRanges+radius to define cis to gene intervals
# find tests of SNPs in these intervals
# set their scores in mgr to zero
#

#getCisMap = function( radius=50000, gchr="20",
#  schr="ch20", geneannopk="illuminaHumanv1.db",
#     snpannopk=snplocsDefault(), as.GRangesList=FALSE ) {

    realSnpRanges = bindSnpRanges2mgr( mgr, snpRanges )
    realGeneRanges = bindGeneRanges2mgr( mgr, geneRanges )
    ol = findOverlaps(realSnpRanges, realGeneRanges +
        radius)
    matm = as.matrix(ol)
    if (nrow(matm) > 0) {
        mgr@fffile[matm] = 0
    }
}

topScores = function(tm) {
 tm@base$scores[,1]/tm@base$shortfac
}
topGenes = function(tm) {
 tm@base$guniv[ tm@base$inds[,1] ]
}
locusNames = function(tm) tm@base$snpnames
geneNames = function(tm) tm@base$guniv
geneIndcol = function(tm, col) tm@base$inds[,col]
nthScores = function(tm, n) {
 tm@base$scores[,n]/tm@base$shortfac
}



setGeneric("transTab", function(x, snps2keep, ...)standardGeneric("transTab"))
setMethod("transTab", c("transManager", "missing"), function(x, snps2keep, ...) {
 .transTab(x@base, NULL, ...)
})
setMethod("transTab", c("transManager", "character"), function(x, snps2keep, ...) {
 .transTab(x@base, snps2keep, ...)
})

.transTab = function( x, snps2keep=NULL, ... ) {
 gannopkname = x$smsanno
 K = x$K
 sids = rep(x$snpnames, each=K )
 gtfs = rep(x$min.gtf, each=K )
 mafs = rep(x$tr.maf, each=K )
 thescos = as.numeric(t(as.ram(x$sco)))/x$shortfac
 theinds = as.numeric(t(as.ram(x$inds)))
 require( gannopkname, character.only=TRUE)
 ganno = gsub(".db", "", gannopkname )
 gloc = sapply(mget(x$guniv, get(paste(ganno, "CHRLOC", sep="")), ifnotfound=NA), "[", 1)
 glocend = sapply(mget(x$guniv, get(paste(ganno, "CHRLOCEND", sep="")), ifnotfound=NA), "[", 1)
 gchr = sapply(mget(x$guniv, get(paste(ganno, "CHR", sep="")), ifnotfound=NA), "[", 1)
 gsym = sapply(mget(x$guniv, get(paste(ganno, "SYMBOL", sep="")), ifnotfound=NA), "[", 1)
 if (gannopkname != "org.Hs.eg.db") gent = sapply(mget(x$guniv, get(paste(ganno, "ENTREZID", sep="")), ifnotfound=NA), "[", 1)
 else gent = x$guniv
 gn = x$guniv[ theinds ]
 gchr = gchr[ theinds ]
 gsym = gsym[ theinds ]
 gent = gent[ theinds ]
 gloc = gloc[ theinds ]
 glocend = glocend[ theinds ]
 okinds = 1:length(sids)
 if (!is.null(snps2keep)) okinds = which(sids %in% snps2keep) 
#
# we need to retrieve the snp locations.  in the trans setting these are only necessary
# when the probe chromosome coincides with snp chromosome, so not always available
# 
 simple = data.frame(snp=sids[okinds], MAF=mafs, GTF=gtfs, chisq=thescos[okinds], probeid=gn[okinds] , probechr=gchr[okinds], snpchr=x$snpchr,
    sym=gsym[okinds], entrez=gent[okinds], geneloc=gloc[okinds],
    genelocend=glocend[okinds], stringsAsFactors=FALSE)
 sldef = snplocsDefault()
 require(sldef, character.only=TRUE)
 slocObj = get(sldef)
 stag=x$snpchr[1]  
 stopifnot(all(x$snpchr == stag))
 slpos = snpsBySeqname(slocObj, stag)
 sl = start(slpos)
 sln = mcols(slpos)$RefSNP_id
 names(sl) = sln
 okn = intersect(simple$snp, sln)
 simple = simple[which(simple$snp %in% okn),]
 starts = start(sl[simple$snp])
 simple$snploc = starts
 data.table(simple)
}


treloc = function (old = "@@REPLACE_BY_RELOCATE@@", new, obj) 
{
# not for export
    tmp = obj
    fref = attr(attributes(tmp@base[["scores"]])[["physical"]], 
        "filename")
    ans = gsub(old, new, fref)
    attr(attributes(tmp@base[["scores"]])[["physical"]], "filename") = ans
    fref = attr(attributes(tmp@base[["inds"]])[["physical"]], 
        "filename")
    ans = gsub(old, new, fref)
    attr(attributes(tmp@base[["inds"]])[["physical"]], "filename") = ans
    obj = tmp
    obj
}


tr1_perm = function () 
{
# for export
    ans = get(load(system.file("transObjs/ptr1.rda", package = "GGtools")))
    treloc("@@REPLACE_BY_RELOCATE@@", system.file(package = "GGtools"), 
        ans)
}

tr1_obs = function () 
{
# for export
    ans = get(load(system.file("transObjs/tr1.rda", package = "GGtools")))
    treloc("@@REPLACE_BY_RELOCATE@@", system.file(package = "GGtools"), 
        ans)
}


mtransScores = function (smpackvec, snpchr = "chr1", rhslist, K = 20, targdirpref = "multtsco", 
    geneApply = lapply, chrnames = paste("chr", as.character(1:22), sep=""), 
    geneRanges = NULL, snpRanges = NULL, radius = 2e+06, renameChrs=NULL,
    batchsize=200, genegran=50, probesToKeep=NULL, shortfac=10, 
    wrapperEndo=NULL, SSgen=GGBase::getSS) 
{
#
# objective is a small-footprint accumulation of trans-eQTL tests 
#      summed over different cohorts
#  smpackvec is a vector of lightweight smlSet package name
#  snpchr is the chromosome for which SNPs will be tested
#  rhslist is the right hand side of formula for snp.rhs.tests in snpTests
#  K is the number of best features to be retained as we explore the transcriptome
#
#
    if (length(chrnames) < 2) 
        stop("must have length(chrnames) >= 2")
    theCall = match.call()
#
# get an image of the expression+genotype data for SNP on specific chromosome snpchr
#
    smsl = lapply(smpackvec, function(x) SSgen(x, snpchr, renameChrs=renameChrs,
       probesToKeep=probesToKeep, wrapperEndo=wrapperEndo))
    if (!is.null(renameChrs)) snpchr=renameChrs
    smsl = makeCommonSNPs(smsl) # could be optional
    names(smsl) = smpackvec
    sms = smsl[[1]]
    guniv = featureNames(sms)   # universe of probes
    smanno = gsub(".db", "", annotation(sms))
    require(paste(smanno, ".db", sep = ""), character.only = TRUE)
    clcnames = gsub("chr", "", chrnames)  # typical chrom nomenclature of bioconductor
    pnameList = mget(clcnames, revmap(get(paste(smanno, "CHR", 
        sep = ""))))
 # be sure to use only genes that are on arrays in sms
    pnameList = lapply(pnameList, function(x) intersect(x, guniv))
    names(pnameList) = chrnames  # now the universe of probes is split into chromsomes
    todrop = which(sapply(pnameList, length)==0)
    if (length(todrop)>0) pnameList = pnameList[-todrop]
    genemap = lapply(pnameList, function(x) match(x, guniv))  # numerical indices for probes
    nchr_genes = length(names(pnameList))
    targdir = paste(targdirpref, snpchr, sep="")
    cursmsl = lapply(smsl, function(x) x[probeId(pnameList[[chrnames[1]]]),])
    inimgr = meqtlTests(cursmsl,   # start the sifting through transcriptome
         rhslist, targdir = targdir, runname = paste("tsc_", chrnames[1],  # testing on genes in chrom 1
        sep = ""), geneApply = geneApply, shortfac=shortfac)
    rm(cursmsl); gc()
    if (gsub("chr", "", snpchr) == gsub("chr", "", chrnames[1])) {
        if (is.null(geneRanges) || is.null(snpRanges)) 
            stop("ranges must be supplied to exclude cis tests")
        cisZero(inimgr, snpRanges, geneRanges, radius)   # if SNP are on chrom 1, exclude cis
    }
    topKinds = topKfeats(inimgr, K = K, fn = paste(targdir, "/",  # sort and save
        snpchr, "_tsinds1_1.ff", sep = ""), feat = "geneind", 
        ginds = genemap[[1]], batchsize=batchsize)
    topKscores = topKfeats(inimgr, K = K, fn = paste(targdir, 
        "/", snpchr, "_tssco1_1.ff", sep = ""), feat = "score", 
        ginds = genemap[[1]], batchsize=batchsize)
    unlink(filename(inimgr@fflist[[1]]))
    for (j in 2:nchr_genes) {    # continue sifting through transcriptome
        cat(j)
        gc()
        cursmsl = lapply(smsl, function(x) x[probeId(pnameList[[chrnames[j]]]),])
        nxtmgr = meqtlTests(cursmsl, rhslist, targdir = targdir, 
            runname = paste("tsctmp", 
            j, sep = ""), geneApply = geneApply, shortfac=shortfac)
        rm(cursmsl); gc()
        if (gsub("chr", "", snpchr) == gsub("chr", "", chrnames[j])) {
            if (is.null(geneRanges) || is.null(snpRanges)) 
                stop("ranges must be supplied to exclude cis tests")
            cisZero(nxtmgr, snpRanges, geneRanges, radius)
            }
        nxtKinds = topKfeats(nxtmgr, K = K, fn = paste(targdir, 
            "indscratch.ff", sep = ""), feat = "geneind", ginds = genemap[[j]], batchsize=batchsize)
        nxtKscores = topKfeats(nxtmgr, K = K, fn = paste(targdir, 
            "scoscratch.ff", sep = ""), feat = "score", ginds = genemap[[j]], batchsize=batchsize)
        updateKfeats(topKscores, nxtKscores, topKinds, nxtKinds, batchsize=batchsize)  
        unlink(filename(nxtmgr@fflist[[1]]))   # kill off scratch materials
        unlink(paste(targdir, "indscratch.ff", sep = ""))
        unlink(paste(targdir, "scoscratch.ff", sep = ""))
    }

    baseout = list(scores = topKscores, inds = topKinds, guniv = guniv, K=K, snpchr=snpchr,
        chrnames=chrnames,
	smsanno = annotation(sms),
        snpnames = rownames(inimgr@fffile), call = theCall, date=date(), shortfac=shortfac)
    new("transManager", base=baseout)
}

transScores = function ( tconfig ) {
  snpchr = snpchr(tconfig)
  stopifnot(length(snpchr)==1)
  smpack = smpack(tconfig)
  rhs = rhs(tconfig)
  K = gbufsize(tconfig)
  targdirpref = folderStem(tconfig)
  geneApply = geneApply(tconfig)
  chrnames = chrnames(tconfig)
  renameChrs = paste0(smchrpref(tconfig), chrnames)
  radius = radius(tconfig)
  geneRanges = snpRanges = NULL
  batchsize = batchsize(tconfig)
  shortfac = shortfac(tconfig)
  probesToKeep = wrapperEndo = NULL
  geneannopk = geneannopk(tconfig)
  snpannopk = snpannopk(tconfig)
  gchrpref = gchrpref(tconfig)
  schrpref = schrpref(tconfig)
  exFilter = exFilter(tconfig)
  smFilter = smFilter(tconfig)
  SSgen = SSgen(tconfig)
  
    theCall = match.call()
#
# get an image of the expression+genotype data for SNP on specific chromosome snpchr
#
    sms = SSgen(smpack, snpchr, renameChrs=snpchr, probesToKeep=probesToKeep, 
       wrapperEndo=wrapperEndo, exFilter=exFilter)
    sms = smFilter(sms)
#    if (!is.null(renameChrs)) snpchr=renameChrs
    guniv = featureNames(sms)   # universe of probes
    smanno = gsub(".db", "", annotation(sms))
    require(paste(smanno, ".db", sep = ""), character.only = TRUE)
    clcnames = gsub("chr", "", chrnames)  # typical chrom nomenclature of bioconductor
# NOOP if no prefix to begin with
    pnameList = mget(clcnames, revmap(get(paste(smanno, "CHR", 
        sep = ""))))
 # be sure to use only genes that are on arrays in sms
    pnameList = lapply(pnameList, function(x) intersect(x, guniv))
    names(pnameList) = chrnames  # now the universe of probes is split into chromsomes
    todrop = which(sapply(pnameList, length)==0)
    if (length(todrop)>0) pnameList = pnameList[-todrop]
    genemap = lapply(pnameList, function(x) match(x, guniv))  # numerical indices for probes
    nchr_genes = length(names(pnameList))
    targdir = paste(targdirpref, snpchr, sep="")
#
# start with first element of chrnames vector
#
    inimgr = eqtlTests(sms[probeId(pnameList[[chrnames[1]]]),   # start the sifting through transcriptome
        ], rhs, targdir = targdir, runname = paste("tsc_", chrnames[1],  # testing on genes in chrom 1
        sep = ""), geneApply = geneApply, shortfac=shortfac)
    if (gsub("chr", "", snpchr) == gsub("chr", "", chrnames[1])) {
        mapobj = getCisMap( radius = radius, gchr = paste(gchrpref, chrnames[1], sep=""),
                  schr = paste(schrpref, gsub("chr", "", snpchr), sep=""), geneannopk=geneannopk, snpannopk = snpannopk )
        cisZero(inimgr, mapobj@snplocs, mapobj@generanges, radius=0)   # if SNP are on chrom 1, exclude cis
                             # the gene ranges supplied are already augmented by radius
    }
    topKinds = topKfeats(inimgr, K = K, fn = paste(targdir, "/",  # sort and save
        snpchr, "_tsinds1_1.ff", sep = ""), feat = "geneind", 
        ginds = genemap[[1]], batchsize=batchsize)
    topKscores = topKfeats(inimgr, K = K, fn = paste(targdir, 
        "/", snpchr, "_tssco1_1.ff", sep = ""), feat = "score", 
        ginds = genemap[[1]], batchsize=batchsize)
    kpsnpnames = rownames(inimgr@fffile)
    unlink(filename(inimgr@fffile))
    if (nchr_genes > 1) for (j in 2:nchr_genes) {    # continue sifting through transcriptome
        cat(j)
        gc()
        nxtmgr = eqtlTests(sms[probeId(pnameList[[chrnames[j]]]), 
            ], rhs, targdir = targdir, runname = paste("tsctmp", 
            j, sep = ""), geneApply = geneApply, 
            shortfac=shortfac)
        if (gsub("chr", "", snpchr) == gsub("chr", "", chrnames[j])) {
            mapobj = getCisMap( radius = radius, gchr = paste(gchrpref, chrnames[j], sep=""),
                  schr = paste(schrpref, gsub("chr", "", snpchr), sep=""), geneannopk=geneannopk, snpannopk = snpannopk )
#
# huge bug below, nxtmgr was previously inimgr ... found Aug 21 2012
#
            cisZero(nxtmgr, mapobj@snplocs, mapobj@generanges, radius=0)   # if SNP are on chrom 1, exclude cis
                             # the gene ranges supplied are already augmented by radius
        }
        nxtKinds = topKfeats(nxtmgr, K = K, fn = paste(targdir, 
            "indscratch.ff", sep = ""), feat = "geneind", ginds = genemap[[j]], 		batchsize=batchsize)
        nxtKscores = topKfeats(nxtmgr, K = K, fn = paste(targdir, 
            "scoscratch.ff", sep = ""), feat = "score", ginds = genemap[[j]],
                batchsize=batchsize)
        updateKfeats(topKscores, nxtKscores, topKinds, nxtKinds, batchsize=batchsize)  
        unlink(filename(nxtmgr@fffile))   # kill off scratch materials
        unlink(paste(targdir, "indscratch.ff", sep = ""))
        unlink(paste(targdir, "scoscratch.ff", sep = ""))
    }
    ss.gtf = GGBase:::smlSummary(sms)[[1]]  # only one chrom
    ss.maf = ss.gtf[,"MAF"]
    names(ss.maf) = rownames(ss.gtf)
    mingtfs = apply(ss.gtf, 1, function(z) min(z[c("P.AA", "P.AB", "P.BB")]))
    names(mingtfs) = rownames(ss.gtf)
    if (length(setdiff(kpsnpnames, names(mingtfs)))>0) warning("some kept snp  had no GTF")
    min.gtf = mingtfs[kpsnpnames]
    tr.maf = ss.maf[kpsnpnames]

    baseout = list(scores = topKscores, inds = topKinds, guniv = guniv, K=K, snpchr=snpchr,
        chrnames=chrnames,
	smsanno = annotation(sms),
# previous to 21 aug 2012 snpnames were assigned here from rownames inimgr@fffile; these are now
# memoed above
        snpnames = kpsnpnames, call = theCall, date=date(), shortfac=shortfac, min.gtf=min.gtf, tr.maf=tr.maf,
        config = tconfig)
    new("transManager", base=baseout)  # note includes references to ff in baseout$scores
}


cleanup_transff = function(x) {
 fn = attr(attr(x@base$scores, "physical"), "filename")
 comps = strsplit(fn, "/")[[1]]
 nel = length(comps)
 unlink(comps[nel-1], recursive=TRUE)
}


transeqByCluster = function( cl, snpchrs=c("chr21", "chr22"), exchrs=1:22, baseconf, targname="transrun_", nperm=1, inseed=1234, ... ) {
 RNGkind("L'Ecuyer-CMRG")
 set.seed(inseed)
 baseconf <<- baseconf
 targname <<- targname
 clusterExport(cl, "baseconf")
 clusterExport(cl, "targname")
 assign(cfn <- paste0(targname, "baseconf"), baseconf)
 save(list=cfn, file=paste0(cfn, ".rda"))
 ttabs = clusterApplyLB( cl, snpchrs, function(x) {
    gc() 
    snpchr(baseconf) = x # only manipulation apart from init prior to cal
    tab <- transTab( tmp <- transScores( baseconf ) ) 
    cleanup_transff(tmp) 
    pscolist = vector("list", nperm)
    g = smFilter(baseconf)
    for (k in 1:nperm) {
       smFilter(baseconf) = function(x) g(permEx(x))  # ADD A PERMUTATION
       pscolist[[k]] = transTab( tmp <- transScores( baseconf ) )$chisq
       cleanup_transff(tmp) 
       }
    pnames = paste0("permScore_", 1:nperm)
    for (k in 1:nperm) tab[[pnames[k]]] = pscolist[[k]]
    obn = paste0(targname, x)
    assign(obn, tab)
    save(list=obn, file=paste0(obn, ".rda"))  # one data table per chrom
 } )
 invisible(NULL)
}

transeqByChrom = function( snpchr="chr22", 
   exchrs=1:22, baseconf, targname="transrun_", nperm=1, inseed=1234, ... ) {
 stopifnot(length(snpchr)==1)
 RNGkind("L'Ecuyer-CMRG")
 set.seed(inseed)
 assign(cfn <- paste0(targname, "baseconf"), baseconf)
 save(list=cfn, file=paste0(cfn, ".rda"))
    gc() 
    snpchr(baseconf) = snpchr # only manipulation apart from init prior to cal
    tab <- transTab( tmp <- transScores( baseconf ) ) 
    cleanup_transff(tmp) 
    pscolist = vector("list", nperm)
    g = smFilter(baseconf)
    for (k in 1:nperm) {
       smFilter(baseconf) = function(x) g(permEx(x))  # ADD A PERMUTATION
       pscolist[[k]] = transTab( tmp <- transScores( baseconf ) )$chisq
       cleanup_transff(tmp) 
       }
    pnames = paste0("permScore_", 1:nperm)
    for (k in 1:nperm) tab[[pnames[k]]] = pscolist[[k]]
    obn = paste0(targname, snpchr)
    assign(obn, tab)
    save(list=obn, file=paste0(obn, ".rda"))  # one data table per chrom
    tab
}
Any scripts or data that you put into this service are public.
GGtools documentation built on Nov. 8, 2020, 6:32 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
GGtools
software and data for analyses in genetics of gene expression

R/transutils.R
In GGtools: software and data for analyses in genetics of gene expression

Defines functions transeqByChrom transeqByCluster cleanup_transff .transTab nthScores geneIndcol geneNames locusNames topGenes topScores updateKfeats cisZero .updateKfeats topKfeats makeCommonSNPs intersectSnps trimSnps

Documented in geneIndcol geneNames locusNames nthScores topGenes topScores transeqByChrom transeqByCluster

Try the GGtools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GGtools software and data for analyses in genetics of gene expression

R/transutils.R In GGtools: software and data for analyses in genetics of gene expression

Defines functions transeqByChrom transeqByCluster cleanup_transff .transTab nthScores geneIndcol geneNames locusNames topGenes topScores updateKfeats cisZero .updateKfeats topKfeats makeCommonSNPs intersectSnps trimSnps

Documented in geneIndcol geneNames locusNames nthScores topGenes topScores transeqByChrom transeqByCluster

Try the GGtools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GGtools
software and data for analyses in genetics of gene expression

R/transutils.R
In GGtools: software and data for analyses in genetics of gene expression