# Use the outputs of the <map2gene> function as inputs to get a subset of genotype matrix including just the specified subset of samples
# (Optionally) filter variants that have redundant IDs, or have no valid genotype calls
subsetSamples<-function(gt, sample.id, filter.variants=TRUE, remove.monosnp=TRUE) {
# gt Outputs from the <map2gene> function, including 4 elements:
# 'genotype': genotype call matrix
# 'annotation': GRanges object with variant annotation
# 'allele': all alleles of each variant
# 'map2gene': mapping information related to genes, with 6 elements generated by the <map2gene> function
# 'summary': variant-to-region summary table, rows match variants one-to-one
# 'location': details of variant-to-region mapping
# 'mutation': details of variant-to-mutation type mapping
# 'gene2variant': list of gene-to-variant mapping
# 'variant2gene': list of variant-to-gene mapping
# 'alternative.code': table of recoded calls, match the 'allele' table, see <recodeCalls> functio for details
# sample.id IDs of samples to be included in the outputs
# filter.variants If TRUE, filter variants to remove those with duplicated ID (keep ones with highest number of valid calls) and those having no valid calls
# remove.monosnp If TRUE, remove monomorphic SNPs
g<-gt$genotype;
sample.id<-unique(sample.id[sample.id %in% colnames(g)]);
if (length(sample.id) > 0) {
if (!filter.variants) gt[[1]]<-g[, sample.id] else { # only change the genotype matrix
g<-g[, sample.id, drop=FALSE];
# flag of filtering
keep<-rep(TRUE, nrow(g));
names(keep)<-rownames(g);
# valid calls per variant
n<-apply(g, 1, function(g) length(g[!is.na(g) & g>-1]));
keep[n==0]<-FALSE; # remove variants with no valid calls
# remove duplicated IDs
id<-rownames(g);
id.dup<-unique(id[duplicated(id)]);
if (length(id.dup) > 0) {
# index of duplicated ID to be removed
ind.out<-lapply(id.dup, function(i) {
ind<-which(id == i);
ct<-n[ind];
ind[order(ct)][-length(ct)];
});
keep[unique(unlist(ind.out))]<-FALSE;
}
if (remove.monosnp) {
ct<-apply(g, 1, function(g) {
x<-unique(g);
length(x[!is.na(x) & x>-1]);
});
keep[ct<=1]<-FALSE;
}
# variant IDs to be kept
keep.id<-names(keep[keep]);
gt[[1]]<-g[keep, , drop=FALSE];
gt[[2]]<-gt[[2]][keep];
gt[[3]]<-gt[[3]][keep, , drop=FALSE];
if (!is.null(gt$map2gene)) {
anno<-gt[[4]];
anno[[1]]<-anno[[1]][keep, , drop=FALSE];
anno[[2]]<-anno[[2]][as.vector(anno[[2]]$varID) %in% keep.id, , drop=FALSE];
anno[[3]]<-anno[[3]][as.vector(anno[[3]]$varID) %in% keep.id, , drop=FALSE];
anno[[5]]<-anno[[5]][names(anno[[5]]) %in% keep.id];
anno[[6]]<-anno[[6]][keep, , drop=FALSE];
gn<-rep(names(anno[[4]]), sapply(anno[[4]], length));
v<-unlist(anno[[4]], use.names=FALSE);
anno[[4]]<-split(v[v %in% keep.id], gn[v %in% keep.id]);
gt[[4]]<-anno;
}
}
}
gt;
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.