#### Functions to interact with happy genomecache
#### Written by Will Valdar - from bagpipe.backend package
assert.happy <- function(h)
{
if (!inherits(h, "happy.genome"))
{
stop("Object must be of class happy.genome\n")
}
}
happy.get.allowed.models <- function()
{
c("genotype", "additive", "full", "full.asymmetric")
}
happy.get.bp <- function(ha, markers)
{
ha$genotype$genome$bp[
match(markers, ha$genotype$genome$marker)
]
}
happy.get.chromosome <- function(h, markers)
{
assert.happy(h)
h$genotype$genome$chromosome[match(markers, h$genotype$genome$marker)]
}
happy.get.interval.length <- function(h, markers, scale="bp", fudge.bp=FALSE)
# return the length of the interval
{
d <- rep(NA, length(markers))
i <- match(markers, h$genotype$genome$marker)
if ("bp"==scale | "Mb"==scale)
{
start <- h$genotype$genome$bp[i]
end <- h$genotype$genome$bp[i+1]
d <- end - start
ok=!is.na(d)
if (any(d[ok] < 0))
{
if (fudge.bp)
{
stop("Fudging is currently deprecated\n")
# find next bp on same chrom that is higher than current bp
fudges <- which(d < 0)
for (f in fudges)
{
i.end <- which( h$genotype$genome$bp > h$genotype$genome$bp[i[f]]
& h$genotype$genome$chromosome == h$genotype$genome$chromosome[i[f]]
)[1]
d[f] <- h$genotype$genome$bp[i.end] - start[f]
}
}
else
{
warning("Cannot calculate lengths for the following intervals",
" because their right-flank markers have a lower bp than their",
" left-flank markers:",
paste( markers[which(d < 0)], collapse=", "),
"\n")
d[ 0 > d ] <- NA
}
}
if ("Mb"==scale) d <- d/1e6
}
else
{
start <- h$genotype$genome$map[i]
end <- h$genotype$genome$map[i+1]
d <- end - start
d[ 0 > d ] <- NA # deals with intervals of negative length
}
return (d)
}
happy.get.markers <- function(h,
chromosome = NULL,
model = "genotype",
as.intervals = TRUE)
{
assert.happy(h)
if (!all(happy.has.model(h, unique(c(model, "genotype")))))
{
i <- happy.has.model(h, unique(c(model, "genotype")))
stop("Model ", model[!i], " not loaded\n")
}
# optimized case
if (is.null(chromosome))
{
if (!as.intervals)
{
return ( h$genotype$markers )
}
if ("genotype"!=model && as.intervals)
{
return ( h[[model]]$markers )
}
}
# inefficient but general
markers <- h$genotype$markers
terminii <- tapply(1:length(h$genotype$marker), h$genotype$chromosome, tail, 1)
is.terminus <- rep(FALSE,length(markers))
is.terminus[terminii] <- TRUE
if (is.null(chromosome)) chromosome <- unique(h$genotype$chromosome)
if ("genotype"==model)
{
if (as.intervals)
{
return (markers[
!is.terminus
& h$genotype$chromosome %in% chromosome
])
}
return (markers[ h$genotype$chromosome %in% chromosome ])
}
if (as.intervals)
{
return (h[[model]]$markers[ h[[model]]$chromosome %in% chromosome ])
}
return (
happy.get.markers(h,
chromosome=chromosome,
model="genotype",
as.intervals=FALSE)
)
}
happy.get.models <- function(h)
{
assert.happy(h)
intersect(names(h), happy.get.allowed.models())
}
happy.get.position <- function(h, markers)
{
assert.happy(h)
h$genotype$genome$map[
match(markers, h$genotype$genome$marker)
]
}
happy.get.subjects <- function(h)
{
h$subjects
}
happy.get.strains <- function(h)
{
h$strains
}
happy.has.model <- function(h, model)
{
assert.happy(h)
model %in% happy.get.models(h)
}
happy.load.genome <- function (dir, use.X = TRUE, chr = NULL, models = NULL)
{
if (!is.null(chr) & 0==length(grep("chr", chr)))
{
chr <- paste("chr",sep="",chr)
}
if (is.null(models))
{
models <- intersect(happy.get.allowed.models(), list.subdirs(dir))
if (0==length(models))
{
stop("No genome cache models present in ", dir, "\n")
}
if (!"genotype" %in% models)
{
stop("Required genotype model is absent from cache dir ", dir, "\n")
}
}
else
{
models <- unique(c(models, "genotype"))
}
g <- list()
old.subjects <- NULL
old.strains <- NULL
for (model in models)
{
pkgs <- c()
if (is.null(chr))
{
found.chr <- list.subdirs( paste(dir, "/", model, sep=""), pattern="^chr" )
if (0==length(found.chr))
{
stop("Found no chromosomes for model ", model, " in dir ", dir, "\n")
}
pkgs <- paste(dir, model, found.chr, sep = "/")
}
else
{
pkgs <- paste(dir, model, chr, sep = "/")
}
markers <- c()
chromosome <- c()
map <- c()
pkgname <- c()
bp <- c()
for (p in pkgs)
{
chromosome <- c(chromosome, happy.load.data("chromosome", p))
m <- happy.load.data("markers", p)
markers <- c(markers, m)
map <- c(map, happy.load.data("map", p))
bp <- c(bp, happy.load.data("bp", p))
pkgname <- c(pkgname, rep(p, length(m)))
subjects <- happy.load.data("subjects", p)
strains <- happy.load.data("strains", p)
if (is.null(old.subjects))
{
old.subjects <- subjects
}
if (any(subjects != old.subjects))
{
cat("ERROR - subject names are inconsistent for chromosome ",
tail(chromosome,1), "\n")
stop("FATAL HAPPY ERROR")
}
if (is.null(old.strains))
{
old.strains <- strains
}
if (any(strains != old.strains))
{
cat("ERROR - strain names are inconsistent for chromosome ",
chromosome, "\n")
stop("FATAL HAPPY ERROR")
}
}
genome <- data.frame(
marker = I(as.character(markers)),
map = as.numeric(map),
bp = as.numeric(bp),
ddp = I(as.character(pkgname)),
chromosome = I(as.character(chromosome)))
g[[model]] <- list(
genome = genome,
subjects = subjects,
strains = strains,
markers = as.character(genome$marker),
chromosome = as.character(genome$chromosome),
map = genome$map,
design.matrix.colnames = happy.make.colnames(strains, model=model))
}
g$subjects <- g$genotype$subjects
g$strains <- g$additive$strains
g$markers <- g$genotype$markers
g$haploid <- g$genotype$haploid
class(g) <- "happy.genome"
return(g)
}
happy.load.marker <- function(h, marker, model)
# internal function to load data for a single marker from genome cache
{
# Check for bad arguments
assert.happy(h)
if (1!=length(marker))
{
stop("Must specify only one marker in happy.load.marker()\n")
}
if (1!=length(model))
{
stop("Must specify only one model in happy.load.marker()\n")
}
if (!happy.has.model(h, model))
{
stop("No such model ", model, " in happy object\n")
}
marker <- as.character(marker)
model <- as.character(model)
# check whether marker is in DATA memory
retval <- NULL
if ( happy.has.reserved.marker(h, marker=marker, model=model) )
{
retval <- happy.get.reserved.marker(h, marker=marker, model=model)
}
else
{
i <- which(h[[model]]$genome$marker == marker )
if (1!=length(i))
{
string <- paste("marker", marker, "for ", model, " model.")
if (0==length(i))
{
stop("Could not find ", string, "\n")
}
if (1 < length(i))
{
stop("Found multiple markers matching", string, "\n")
}
}
pkg <- h[[model]]$genome$ddp[i]
max.tries <- 10
num.tries <- 0
sleep.time <- 1
has.loaded <- FALSE
retval <- NULL
while (!has.loaded & num.tries < max.tries)
{
## read marker data from the genome cache
num.tries <- num.tries + 1
retval <- try( happy.load.data(marker, pkg) )
if (!caught.error(retval))
{
if (!is.null(retval))
{
break
}
}
warning("Failed to load data for ", model, " ", marker,
". Retrying after ", sleep.time, "s sleep...\n")
system(paste("sleep", sleep.time))
}
if (caught.error(retval))
{
stop("Error retrieving information via g.data.get() for marker ", marker, "\n")
}
if (is.null(retval))
{
stop("Error: null data retrieved via g.data.get() for marker ", marker, "\n")
}
if (!is.array(retval)) retval <- as.array(retval) # ensure return value has a dim component
colnames(retval) <- happy.make.colnames(happy.get.strains(h), model)
if ("full"==model)
# bug fix for when cache contains incorrectly doubled values
{
if (2==round(sum(retval[1,]),1))
{
retval <- retval/2
}
}
}
if (happy.is.auto.reserve(h))
{
if (happy.get.reserve.limit(h) > happy.reserve.memory.usage(h))
{
happy.reserve.marker(h, marker=marker, model=model, marker.data=retval)
}
}
retval
}
happy.load.data <- function (item, dir) # replaces calls to g.data.get, to make things backwards compatible.
{
env <- new.env()
# determine which version of g.data was used to save the data
# assume happy pre 2009 and g.data pre 2009
filename.pre2009 <- file.path(dir, "data", paste(item, "RData", sep = "."))
if (file.exists(filename.pre2009))
{
load(filename.pre2009, env)
return ( get(item, envir = env) )
}
# assume happy 2009 and g.data 2009
item.safe <- make.names(item)
filename.post2009 <- file.path(dir,
paste(gsub("([[:upper:]])", "@\\1", item.safe), "RData", sep = ".")
)
if (file.exists(filename.post2009))
{
load(filename.post2009, env)
return ( get(item.safe, envir = env ) )
}
# assume happy 2009 and g.data pre 2009
filename.hybrid <- file.path(dir, "data", paste(item.safe, "RData", sep = "."))
if (file.exists(filename.hybrid))
{
load(filename.hybrid, env)
return ( get(item.safe, envir = env) )
}
stop("Could not find object file containing data for ", item, " in package ", dir,
". Tried ", filename.pre2009, ", ", filename.post2009, " and ", filename.hybrid, "\n")
}
happy.reserve.marker <- function(h, marker, model, marker.data=NULL)
{
if (!happy.reserve.has(h, category=model))
{
stop("Cannot reserve marker ", marker, " for model ", model, " because memory cache has not been initialized\n")
}
if (is.null(marker.data))
{
marker.data <- happy.load.marker(h, marker=marker, model=model)
}
happy.reserve.put(h, category=model, object.name=marker, object=marker.data)
}
happy.reserve.markers <- function(h, markers, models, verbose=TRUE)
{
assert.happy(h)
if (length(markers)!=length(models))
{
stop("Number of markers must match number of models\n")
}
markers <- as.character(markers)
models <- as.character(models)
cat("Reserving data for ", length(markers), " marker-model combinations in memory\n")
mem.size <- 0
memory.limit.Mb <- happy.get.reserve.limit(h)
if (0<length(h$DATA))
{
mem.size <- happy.reserve.memory.usage(h)/2^20
}
for (i in 1:length(markers))
{
if (happy.has.reserved.marker(h, marker=markers[i], model=models[i]))
{
next
}
marker.data <- happy.load.marker(h, markers[i], models[i])
mem.size <- mem.size + object.size(marker.data)/2^20
if (memory.limit.Mb <= mem.size)
{
warning(paste("Reached memory limit",round(mem.size,3),"Mb / ",
memory.limit.Mb,"Mb for marker reserve with",
i, "/",length(markers),"markers. The remaining", length(markers)-i,
"markers will be accessed through disk I/O\n"))
break
}
happy.reserve.marker(h, marker=markers[i], model=models[i], marker.data=marker.data)
if (verbose)
{
cat("[",i,"]",sep="")
}
}
if (verbose) cat("\n")
cat("Marker data consumes", round(mem.size,3), "Mb\n")
}
happy.has.reserved.marker <- function(h, marker, model)
{
happy.reserve.has(h, category=model, object.name=marker)
}
happy.get.reserved.marker <- function(h, marker, model)
{
happy.reserve.get(h, category=model, object.name=marker)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.