Nothing
gdsSubsetCheck <- function(parent.gds,
sub.gds,
sample.include=NULL,
snp.include=NULL,
sub.storage=NULL,
verbose=TRUE,
allow.fork=FALSE) {
# this assumes that sample.id is the only 1D sample variable in the GDS
gds <- openfn.gds(parent.gds, allow.fork=allow.fork)
gds.sub <- openfn.gds(sub.gds, allow.fork=allow.fork)
# check sampleID
sampID.parent <- read.gdsn(index.gdsn(gds, "sample.id"))
sampID.sub <- read.gdsn(index.gdsn(gds.sub, "sample.id"))
chk <- all(is.element(sampID.sub, sampID.parent))
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop("sample.id in sub GDS is not a subset of parent GDS")
}
if (is.null(sample.include)){
sample.include <- sampID.parent
}
chk <- setequal(sample.include, sampID.sub)
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop("samples in sub GDS are not the same as sample.include")
}
sampsel <- sampID.parent %in% sample.include
# check snp variables
snpID.parent <- read.gdsn(index.gdsn(gds, "snp.id"))
snpID.sub <- read.gdsn(index.gdsn(gds.sub, "snp.id"))
chk <- all(is.element(snpID.sub, snpID.parent))
if (!chk){
closefn.gds(gds)
closefn.gds(gds.sub)
stop("snp.id in sub GDS is not a subset of parent GDS")
}
if (is.null(snp.include)){
snp.include <- snpID.parent
}
chk <- setequal(snp.include, snpID.sub)
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop("snps in sub GDS are not the same as snp.include")
}
snpsel <- snpID.parent %in% snp.include
variables <- c("snp.id", "snp.position", "snp.chromosome")
chk <- all(variables %in% ls.gdsn(gds))
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop("parent GDS does not have snp.id, snp.position, and snp.chromosome")
}
chk <- all(variables %in% ls.gdsn(gds.sub))
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop("sub GDS does not have snp.id, snp.position, and snp.chromosome")
}
for (variable in variables){
node.parent <- index.gdsn(gds, variable)
node.sub <- index.gdsn(gds.sub, variable)
vals.parent <- read.gdsn(node.parent)
vals.sub <- read.gdsn(node.sub)
chk <- allequal(vals.parent[snpsel], vals.sub)
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop("snps in sub GDS are not the same as snp.include")
}
# TO DO: check attributes here.
if (variable == "snp.chromosome"){
attributes.parent <- get.attr.gdsn(node.parent)
attributes.sub <- get.attr.gdsn(node.sub)
chk <- setequal(names(attributes.parent), names(attributes.sub))
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop(paste("sub GDS has different attributes than parent GDS for", variable))
}
for (attribute in names(attributes.parent)){
chk <- allequal(attributes.parent[[attribute]], attributes.sub[[attribute]])
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop(paste("sub GDS has different attribute values than parent GDS for", variable))
}
}
}
}
# check other variables
node.names <- ls.gdsn(gds)
node.names <- node.names[!(node.names %in% c("sample.id", "snp.id", "snp.position", "snp.chromosome", "snp.allele", "snp.rs.id"))]
# check that they exist in the sub file
chk <- all(variables %in% ls.gdsn(gds.sub))
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop("sub GDS does not include the same variables as parent GDS")
}
# compare the variables node by node
for (node.name in node.names){
# node in parent file
node.parent <- index.gdsn(gds, node.name)
# check dimensions
desc <- objdesp.gdsn(node.parent)
# only check on 2-d variables
if (length(desc$dim) != 2) next
if (verbose) message(paste("working on", node.name))
# get the subset node
node.sub <- index.gdsn(gds.sub, node.name)
# check attributes
attributes.parent <- get.attr.gdsn(node.parent)
attributes.sub <- get.attr.gdsn(node.sub)
chk <- setequal(names(attributes.parent), names(attributes.sub))
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop(paste("sub GDS has different attributes than parent GDS for", variable))
}
# check attribute values
for (attribute in names(attributes.parent)[]) {
# these don't have values
if (attribute %in% c("snp.order", "sample.order", "missing.value")) next
if (attribute == "storage" & !is.null(sub.storage)) {
chk(attributes.sub[[attribute]] == sub.storage)
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop(paste("sub GDS has incorrect attribute for", attribute))
}
} else{
chk <- allequal(attributes.parent[[attribute]], attributes.sub[[attribute]])
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop(paste("sub GDS has different attribute values than parent GDS for", variable))
}
}
}
if ("missing.value" %in% names(attributes.parent)) {
parent.miss <- attributes.parent[["missing.value"]]
} else {
parent.miss <- NA
}
if ("missing.value" %in% names(attributes.sub)) {
sub.miss <- attributes.sub[["missing.value"]]
} else {
sub.miss <- NA
}
if ("sample.order" %in% names(attributes.sub)) dimType <- "scan,snp" else dimType <- "snp,scan"
if (dimType == "snp,scan"){
if (verbose) message(paste(node.name, "- looping over samples"))
id.parent <- sampID.parent
id.sub <- sampID.sub
selection <- snpsel
mod <- 100
} else if (dimType == "scan,snp") {
if (verbose) message(paste(node.name, "- looping over snps"))
id.parent <- snpID.parent
id.sub <- snpID.sub
selection <- sampsel
mod <- 5000
}
# check element by element
for (i in 1:length(id.sub)){
if (verbose & (i %% mod == 0)) message(paste(node.name, "- element", i, "of", length(id.sub)))
i.sub <- i
i.parent <- which(id.parent %in% id.sub[i])
start.parent <- c(1, i.parent)
start.sub <- c(1, i.sub)
count <- c(-1, 1)
vals.parent <- read.gdsn(node.parent, start=start.parent, count=count)[selection]
vals.sub <- read.gdsn(node.sub, start=start.sub, count=count)
# set missing values
vals.parent[which(vals.parent == parent.miss)] <- NA
vals.sub[which(vals.sub == sub.miss)] <- NA
if (length(vals.parent) != length(vals.sub)) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop(paste("lengths of variable", node.name, "are not the same"))
}
chk <- allequal(vals.parent, vals.sub)
if (!chk) {
closefn.gds(gds)
closefn.gds(gds.sub)
stop(paste("values of variable", node.name, "are not the same."))
}
}
# check data sample by sample
}
closefn.gds(gds)
closefn.gds(gds.sub)
message("All variables match.")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.