seqGetData | R Documentation |
Gets data from a SeqArray GDS file.
seqGetData(gdsfile, var.name, .useraw=FALSE, .padNA=TRUE, .tolist=FALSE,
.envir=NULL)
gdsfile |
a |
var.name |
a variable name or a character vector, see details;
if |
.useraw |
|
.padNA |
|
.tolist |
if |
.envir |
|
The variable name should be "sample.id"
, "variant.id"
,
"position"
, "chromosome"
, "allele"
, "genotype"
,
"annotation/id"
, "annotation/qual"
, "annotation/filter"
,
"annotation/info/VARIABLE_NAME"
, or
"annotation/format/VARIABLE_NAME"
.
"@genotype"
, "annotation/info/@VARIABLE_NAME"
or
"annotation/format/@VARIABLE_NAME"
are used to obtain the index
associated with these variables.
"$dosage"
is also allowed for the dosages of reference allele (integer:
0, 1, 2 and NA for diploid genotypes).
"$dosage_alt"
returns a RAW/INTEGER matrix for the dosages of alternative
allele without distinguishing different alternative alleles.
"$dosage_alt2"
allow the alleles are partially missing (e.g., genotypes
on chromosome X for males)
"$dosage_sp"
returns a sparse matrix (dgCMatrix) for the dosages of
alternative allele without distinguishing different alternative alleles.
"$dosage_sp2"
allow the alleles are partially missing (e.g., genotypes
on chromosome X for males)
"$num_allele"
returns an integer vector with the numbers of distinct
alleles.
"$ref"
returns a character vector of reference alleles.
"$alt"
returns a character vector of alternative alleles (delimited by
comma).
"$chrom_pos"
returns characters with the combination of chromosome and
position, e.g., "1:1272721". "$chrom_pos2"
is similar to
"$chrom_pos"
, except the suffix "_1" is added to the first duplicate
following the variant, "_2" is added to the second duplicate, and so on.
"$chrom_pos_allele"
returns characters with the combination of
chromosome, position and alleles, e.g., "1:1272721_A_G"
(i.e., chr:position_REF_ALT).
"$variant_index"
returns the indices of selected variants starting
from 1, and "$sample_index"
returns the indices of selected samples
starting from 1.
"$:VAR"
return the variable "VAR" from .envir
according to the
selected variants.
Return vectors, matrices or lists (with length
and data
components) with a class name SeqVarDataList
.
Xiuwen Zheng
seqSetFilter
, seqApply
,
seqNewVarData
, seqListVarData
# the GDS file
(gds.fn <- seqExampleFileName("gds"))
# display
(f <- seqOpen(gds.fn))
# get 'sample.id
(samp.id <- seqGetData(f, "sample.id"))
# "NA06984" "NA06985" "NA06986" ...
# get 'variant.id'
head(variant.id <- seqGetData(f, "variant.id"))
# get 'chromosome'
table(seqGetData(f, "chromosome"))
# get 'allele'
head(seqGetData(f, "allele"))
# "T,C" "G,A" "G,A" ...
# get '$chrom_pos'
head(seqGetData(f, "$chrom_pos"))
# get '$dosage'
seqGetData(f, "$dosage")[1:6, 1:10]
# get a sparse matrix of dosages
seqGetData(f, "$dosage_sp")[1:6, 1:10]
# get '$num_allele'
head(seqGetData(f, "$num_allele"))
# set sample and variant filters
set.seed(100)
seqSetFilter(f, sample.id=samp.id[c(2,4,6,8,10)])
seqSetFilter(f, variant.id=sample(variant.id, 10))
# get a list
seqGetData(f, c(chr="chromosome", pos="position", allele="allele"))
# get the indices of selected variants/samples
seqGetData(f, "$variant_index")
seqGetData(f, "$sample_index")
# get genotypic data
seqGetData(f, "genotype")
# get annotation/info/DP
seqGetData(f, "annotation/info/DP")
# get annotation/info/AA, a variable-length dataset
seqGetData(f, "annotation/info/AA", .padNA=FALSE)
# $length <- indicating the length of each variable-length data
# [1] 1 1 1 1 1 1 ...
# $data <- the data according to $length
# [1] "T" "C" "T" "C" "G" "C" ...
# or return a simplified vector
seqGetData(f, "annotation/info/AA", .padNA=TRUE)
# get annotation/format/DP, a variable-length dataset
seqGetData(f, "annotation/format/DP")
# $length <- indicating the length of each variable-length data
# [1] 1 1 1 1 1 1 ...
# $data <- the data according to $length
# variant
# sample [,1] [,2] [,3] [,4] [,5] [,6] ...
# [1,] 25 25 22 3 4 17 ...
# get values from R environment
env <- new.env()
env$x <- 1:1348 / 10
env$x[seqGetData(f, "$variant_index")]
seqGetData(f, "$:x", .envir=env)
# close the GDS file
seqClose(f)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.