Description Usage Arguments Details Value Author(s) References See Also Examples
To calculate the eigenvectors and eigenvalues for principal component analysis in GWAS.
1 2 3 4 5 6 7 8 9 | snpgdsPCA(gdsobj, sample.id=NULL, snp.id=NULL,
autosome.only=TRUE, remove.monosnp=TRUE, maf=NaN, missing.rate=NaN,
algorithm=c("exact", "randomized"),
eigen.cnt=ifelse(identical(algorithm, "randomized"), 16L, 32L),
num.thread=1L, bayesian=FALSE, need.genmat=FALSE,
genmat.only=FALSE, eigen.method=c("DSPEVX", "DSPEV"),
aux.dim=eigen.cnt*2L, iter.num=10L, verbose=TRUE)
## S3 method for class 'snpgdsPCAClass'
plot(x, eig=c(1L,2L), ...)
|
gdsobj |
an object of class |
sample.id |
a vector of sample id specifying selected samples; if NULL, all samples are used |
snp.id |
a vector of snp id specifying selected SNPs; if NULL, all SNPs are used |
autosome.only |
if |
remove.monosnp |
if TRUE, remove monomorphic SNPs |
maf |
to use the SNPs with ">= maf" only; if NaN, no MAF threshold |
missing.rate |
to use the SNPs with "<= missing.rate" only; if NaN, no missing threshold |
eigen.cnt |
output the number of eigenvectors; if eigen.cnt <= 0, then return all eigenvectors |
algorithm |
"exact", traditional exact calculation; "randomized", fast PCA with randomized algorithm introduced in Galinsky et al. 2016 |
num.thread |
the number of (CPU) cores used; if |
bayesian |
if TRUE, use bayesian normalization |
need.genmat |
if TRUE, return the genetic covariance matrix |
genmat.only |
return the genetic covariance matrix only, do not compute the eigenvalues and eigenvectors |
eigen.method |
"DSPEVX" – compute the top |
aux.dim |
auxiliary dimension used in fast randomized algorithm |
iter.num |
iteration number used in fast randomized algorithm |
verbose |
if TRUE, show information |
x |
a |
eig |
indices of eigenvectors, like |
... |
the arguments passed to or from other methods, like
|
The minor allele frequency and missing rate for each SNP passed in
snp.id
are calculated over all the samples in sample.id
.
Return a snpgdsPCAClass
object, and it is a list:
sample.id |
the sample ids used in the analysis |
snp.id |
the SNP ids used in the analysis |
eigenval |
eigenvalues |
eigenvect |
eigenvactors, "# of samples" x "eigen.cnt" |
varprop |
variance proportion for each principal component |
TraceXTX |
the trace of the genetic covariance matrix |
Bayesian |
whether use bayerisan normalization |
genmat |
the genetic covariance matrix |
Xiuwen Zheng
Patterson N, Price AL, Reich D. Population structure and eigenanalysis. PLoS Genet. 2006 Dec;2(12):e190.
Galinsky KJ, Bhatia G, Loh PR, Georgiev S, Mukherjee S, Patterson NJ, Price AL. Fast Principal-Component Analysis Reveals Convergent Evolution of ADH1B in Europe and East Asia. Am J Hum Genet. 2016 Mar 3;98(3):456-72. doi: 10.1016/j.ajhg.2015.12.022. Epub 2016 Feb 25.
snpgdsPCACorr
, snpgdsPCASNPLoading
,
snpgdsPCASampLoading
, snpgdsAdmixProp
,
snpgdsEIGMIX
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | # open an example dataset (HapMap)
genofile <- snpgdsOpen(snpgdsExampleFileName())
# run PCA
RV <- snpgdsPCA(genofile)
RV
# eigenvalues
head(RV$eigenval)
# variance proportion (%)
head(round(RV$varprop*100, 2))
# [1] 12.23 5.84 1.01 0.95 0.84 0.74
# draw
plot(RV)
plot(RV, 1:4)
#### there is no population information ####
# make a data.frame
tab <- data.frame(sample.id = RV$sample.id,
EV1 = RV$eigenvect[,1], # the first eigenvector
EV2 = RV$eigenvect[,2], # the second eigenvector
stringsAsFactors = FALSE)
head(tab)
# sample.id EV1 EV2
# 1 NA19152 -0.08411287 -0.01226860
# 2 NA19139 -0.08360644 -0.01085849
# 3 NA18912 -0.08110808 -0.01184524
# 4 NA19160 -0.08680864 -0.01447106
# 5 NA07034 0.03109761 0.07709255
# 6 NA07055 0.03228450 0.08155730
# draw
plot(tab$EV2, tab$EV1, xlab="eigenvector 2", ylab="eigenvector 1")
#### there are population information ####
# get population information
# or pop_code <- scan("pop.txt", what=character())
# if it is stored in a text file "pop.txt"
pop_code <- read.gdsn(index.gdsn(genofile, "sample.annot/pop.group"))
# get sample id
samp.id <- read.gdsn(index.gdsn(genofile, "sample.id"))
# assume the order of sample IDs is as the same as population codes
cbind(samp.id, pop_code)
# samp.id pop_code
# [1,] "NA19152" "YRI"
# [2,] "NA19139" "YRI"
# [3,] "NA18912" "YRI"
# [4,] "NA19160" "YRI"
# [5,] "NA07034" "CEU"
# ...
# make a data.frame
tab <- data.frame(sample.id = RV$sample.id,
pop = factor(pop_code)[match(RV$sample.id, samp.id)],
EV1 = RV$eigenvect[,1], # the first eigenvector
EV2 = RV$eigenvect[,2], # the second eigenvector
stringsAsFactors = FALSE)
head(tab)
# sample.id pop EV1 EV2
# 1 NA19152 YRI -0.08411287 -0.01226860
# 2 NA19139 YRI -0.08360644 -0.01085849
# 3 NA18912 YRI -0.08110808 -0.01184524
# 4 NA19160 YRI -0.08680864 -0.01447106
# 5 NA07034 CEU 0.03109761 0.07709255
# 6 NA07055 CEU 0.03228450 0.08155730
# draw
plot(tab$EV2, tab$EV1, col=as.integer(tab$pop),
xlab="eigenvector 2", ylab="eigenvector 1")
legend("bottomright", legend=levels(tab$pop), pch="o", col=1:4)
# close the file
snpgdsClose(genofile)
|
Loading required package: gdsfmt
SNPRelate -- supported by Streaming SIMD Extensions 2 (SSE2)
Principal Component Analysis (PCA) on genotypes:
Excluding 365 SNPs on non-autosomes
Excluding 1 SNP (monomorphic: TRUE, MAF: NaN, missing rate: NaN)
# of samples: 279
# of SNPs: 8,722
using 1 thread
# of principal components: 32
PCA: the sum of all selected genotypes (0,1,2) = 2446510
CPU capabilities: Double-Precision SSE2
Wed Dec 9 10:21:04 2020 (internal increment: 5620)
[..................................................] 0%, ETC: ---
[==================================================] 100%, completed, 0s
Wed Dec 9 10:21:04 2020 Begin (eigenvalues and eigenvectors)
Wed Dec 9 10:21:04 2020 Done.
List of 8
$ sample.id: chr [1:279] "NA19152" "NA19139" "NA18912" "NA19160" ...
$ snp.id : int [1:8722] 1 2 3 4 5 6 7 8 9 10 ...
$ eigenval : num [1:279] 33.99 16.23 2.81 2.63 2.35 ...
$ eigenvect: num [1:279, 1:32] -0.0841 -0.0836 -0.0811 -0.0868 0.0311 ...
$ varprop : num [1:279] 0.12225 0.0584 0.01011 0.00948 0.00844 ...
$ TraceXTX : num 5298366
$ Bayesian : logi FALSE
$ genmat : NULL
- attr(*, "class")= chr "snpgdsPCAClass"
[1] 33.985988 16.234579 2.810976 2.634050 2.345686 2.047077
[1] 12.23 5.84 1.01 0.95 0.84 0.74
sample.id EV1 EV2
1 NA19152 -0.08411287 -0.01226860
2 NA19139 -0.08360644 -0.01085849
3 NA18912 -0.08110808 -0.01184524
4 NA19160 -0.08680864 -0.01447106
5 NA07034 0.03109761 0.07709255
6 NA07055 0.03228450 0.08155730
samp.id pop_code
[1,] "NA19152" "YRI"
[2,] "NA19139" "YRI"
[3,] "NA18912" "YRI"
[4,] "NA19160" "YRI"
[5,] "NA07034" "CEU"
[6,] "NA07055" "CEU"
[7,] "NA12814" "CEU"
[8,] "NA10847" "CEU"
[9,] "NA18532" "HCB"
[10,] "NA18561" "HCB"
[11,] "NA18942" "JPT"
[12,] "NA18940" "JPT"
[13,] "NA18515" "YRI"
[14,] "NA19222" "YRI"
[15,] "NA18508" "YRI"
[16,] "NA19129" "YRI"
[17,] "NA12056" "CEU"
[18,] "NA10863" "CEU"
[19,] "NA10857" "CEU"
[20,] "NA12006" "CEU"
[21,] "NA18605" "HCB"
[22,] "NA18542" "HCB"
[23,] "NA18945" "JPT"
[24,] "NA18949" "JPT"
[25,] "NA19238" "YRI"
[26,] "NA19194" "YRI"
[27,] "NA19142" "YRI"
[28,] "NA18913" "YRI"
[29,] "NA12145" "CEU"
[30,] "NA12763" "CEU"
[31,] "NA07357" "CEU"
[32,] "NA12144" "CEU"
[33,] "NA18550" "HCB"
[34,] "NA18608" "HCB"
[35,] "NA18964" "JPT"
[36,] "NA18953" "JPT"
[37,] "NA19154" "YRI"
[38,] "NA19138" "YRI"
[39,] "NA18852" "YRI"
[40,] "NA19120" "YRI"
[41,] "NA07019" "CEU"
[42,] "NA10831" "CEU"
[43,] "NA07000" "CEU"
[44,] "NA11832" "CEU"
[45,] "NA18564" "HCB"
[46,] "NA18961" "JPT"
[47,] "NA18972" "JPT"
[48,] "NA19210" "YRI"
[49,] "NA19204" "YRI"
[50,] "NA18507" "YRI"
[51,] "NA19159" "YRI"
[52,] "NA06991" "CEU"
[53,] "NA11840" "CEU"
[54,] "NA12802" "CEU"
[55,] "NA12813" "CEU"
[56,] "NA18571" "HCB"
[57,] "NA18620" "HCB"
[58,] "NA18967" "JPT"
[59,] "NA18976" "JPT"
[60,] "NA19211" "YRI"
[61,] "NA18516" "YRI"
[62,] "NA18523" "YRI"
[63,] "NA12761" "CEU"
[64,] "NA10830" "CEU"
[65,] "NA10855" "CEU"
[66,] "NA18623" "HCB"
[67,] "NA18576" "HCB"
[68,] "NA18981" "JPT"
[69,] "NA18971" "JPT"
[70,] "NA18862" "YRI"
[71,] "NA19205" "YRI"
[72,] "NA19101" "YRI"
[73,] "NA19102" "YRI"
[74,] "NA06994" "CEU"
[75,] "NA11993" "CEU"
[76,] "NA11995" "CEU"
[77,] "NA12891" "CEU"
[78,] "NA18582" "HCB"
[79,] "NA18633" "HCB"
[80,] "NA18994" "JPT"
[81,] "NA18872" "YRI"
[82,] "NA19192" "YRI"
[83,] "NA19172" "YRI"
[84,] "NA19092" "YRI"
[85,] "NA12864" "CEU"
[86,] "NA12751" "CEU"
[87,] "NA10839" "CEU"
[88,] "NA12717" "CEU"
[89,] "NA18637" "HCB"
[90,] "NA18594" "HCB"
[91,] "NA18998" "JPT"
[92,] "NA19000" "JPT"
[93,] "NA12753" "CEU"
[94,] "NA12707" "CEU"
[95,] "NA11839" "CEU"
[96,] "NA10859" "CEU"
[97,] "NA18526" "HCB"
[98,] "NA18524" "HCB"
[99,] "NA18529" "HCB"
[100,] "NA18558" "HCB"
[101,] "NA18502" "YRI"
[102,] "NA19145" "YRI"
[103,] "NA18505" "YRI"
[104,] "NA18856" "YRI"
[105,] "NA12875" "CEU"
[106,] "NA12156.dup" "CEU"
[107,] "NA12156" "CEU"
[108,] "NA07056" "CEU"
[109,] "NA18562" "HCB"
[110,] "NA18537" "HCB"
[111,] "NA18603" "HCB"
[112,] "NA18540" "HCB"
[113,] "NA19153" "YRI"
[114,] "NA19137" "YRI"
[115,] "NA19202" "YRI"
[116,] "NA19239" "YRI"
[117,] "NA12044" "CEU"
[118,] "NA11992" "CEU"
[119,] "NA11829" "CEU"
[120,] "NA07022" "CEU"
[121,] "NA18545" "HCB"
[122,] "NA18572" "HCB"
[123,] "NA18547" "HCB"
[124,] "NA18609.dup" "HCB"
[125,] "NA18857" "YRI"
[126,] "NA19238.dup" "YRI"
[127,] "NA18501" "YRI"
[128,] "NA19240" "YRI"
[129,] "NA06993.dup" "CEU"
[130,] "NA12239" "CEU"
[131,] "NA12154" "CEU"
[132,] "NA12762" "CEU"
[133,] "NA18609" "HCB"
[134,] "NA18552" "HCB"
[135,] "NA18611" "HCB"
[136,] "NA18555" "HCB"
[137,] "NA19223" "YRI"
[138,] "NA18500" "YRI"
[139,] "NA18861" "YRI"
[140,] "NA12716" "CEU"
[141,] "NA12878" "CEU"
[142,] "NA10856" "CEU"
[143,] "NA12874" "CEU"
[144,] "NA18566" "HCB"
[145,] "NA18563" "HCB"
[146,] "NA18570" "HCB"
[147,] "NA18612" "HCB"
[148,] "NA19201" "YRI"
[149,] "NA19144" "YRI"
[150,] "NA19193" "YRI"
[151,] "NA18503" "YRI"
[152,] "NA12760" "CEU"
[153,] "NA11993.dup" "CEU"
[154,] "NA06985" "CEU"
[155,] "NA12003" "CEU"
[156,] "NA18621" "HCB"
[157,] "NA18594.dup" "HCB"
[158,] "NA18622" "HCB"
[159,] "NA18573" "HCB"
[160,] "NA18504" "YRI"
[161,] "NA19203" "YRI"
[162,] "NA19143" "YRI"
[163,] "NA18871" "YRI"
[164,] "NA07348" "CEU"
[165,] "NA12750" "CEU"
[166,] "NA11831" "CEU"
[167,] "NA10835" "CEU"
[168,] "NA18577" "HCB"
[169,] "NA18624" "HCB"
[170,] "NA18579" "HCB"
[171,] "NA18632" "HCB"
[172,] "NA18870" "YRI"
[173,] "NA19200" "YRI"
[174,] "NA18517" "YRI"
[175,] "NA19221" "YRI"
[176,] "NA12146" "CEU"
[177,] "NA11882" "CEU"
[178,] "NA18635" "HCB"
[179,] "NA18592" "HCB"
[180,] "NA18636" "HCB"
[181,] "NA18593" "HCB"
[182,] "NA18863" "YRI"
[183,] "NA18855" "YRI"
[184,] "NA18862.dup" "YRI"
[185,] "NA19209" "YRI"
[186,] "NA18951.dup" "JPT"
[187,] "NA18943" "JPT"
[188,] "NA18947" "JPT"
[189,] "NA18944" "JPT"
[190,] "NA18521" "YRI"
[191,] "NA18858" "YRI"
[192,] "NA19141" "YRI"
[193,] "NA19093" "YRI"
[194,] "NA10861" "CEU"
[195,] "NA12005" "CEU"
[196,] "NA10851" "CEU"
[197,] "NA12234" "CEU"
[198,] "NA18948" "JPT"
[199,] "NA18951" "JPT"
[200,] "NA18952" "JPT"
[201,] "NA18956" "JPT"
[202,] "NA19099" "YRI"
[203,] "NA18854" "YRI"
[204,] "NA19132" "YRI"
[205,] "NA12004" "CEU"
[206,] "NA07345" "CEU"
[207,] "NA07029" "CEU"
[208,] "NA12892" "CEU"
[209,] "NA18968" "JPT"
[210,] "NA18959" "JPT"
[211,] "NA18969" "JPT"
[212,] "NA18960" "JPT"
[213,] "NA19206" "YRI"
[214,] "NA18506" "YRI"
[215,] "NA19098" "YRI"
[216,] "NA19130" "YRI"
[217,] "NA07048" "CEU"
[218,] "NA10854" "CEU"
[219,] "NA12248" "CEU"
[220,] "NA10846" "CEU"
[221,] "NA18965" "JPT"
[222,] "NA18973" "JPT"
[223,] "NA18966" "JPT"
[224,] "NA18975" "JPT"
[225,] "NA19128" "YRI"
[226,] "NA19131" "YRI"
[227,] "NA18522" "YRI"
[228,] "NA19208" "YRI"
[229,] "NA12801" "CEU"
[230,] "NA12872" "CEU"
[231,] "NA12155" "CEU"
[232,] "NA06993" "CEU"
[233,] "NA18978" "JPT"
[234,] "NA18970" "JPT"
[235,] "NA18980" "JPT"
[236,] "NA18995.dup" "JPT"
[237,] "NA18859.dup" "YRI"
[238,] "NA19116" "YRI"
[239,] "NA18914" "YRI"
[240,] "NA19127" "YRI"
[241,] "NA11830" "CEU"
[242,] "NA12865" "CEU"
[243,] "NA10838" "CEU"
[244,] "NA12249" "CEU"
[245,] "NA18974" "JPT"
[246,] "NA18987" "JPT"
[247,] "NA18990" "JPT"
[248,] "NA18991" "JPT"
[249,] "NA19094" "YRI"
[250,] "NA19161" "YRI"
[251,] "NA18853" "YRI"
[252,] "NA19173" "YRI"
[253,] "NA12057" "CEU"
[254,] "NA10860" "CEU"
[255,] "NA12812" "CEU"
[256,] "NA11881" "CEU"
[257,] "NA18992" "JPT"
[258,] "NA18995" "JPT"
[259,] "NA18997" "JPT"
[260,] "NA19012" "JPT"
[261,] "NA19171" "YRI"
[262,] "NA19140" "YRI"
[263,] "NA18859" "YRI"
[264,] "NA19119" "YRI"
[265,] "NA11994" "CEU"
[266,] "NA12873" "CEU"
[267,] "NA12815" "CEU"
[268,] "NA19005" "JPT"
[269,] "NA18999" "JPT"
[270,] "NA19007" "JPT"
[271,] "NA19003" "JPT"
[272,] "NA18860" "YRI"
[273,] "NA19207" "YRI"
[274,] "NA19100" "YRI"
[275,] "NA19103" "YRI"
[276,] "NA12740" "CEU"
[277,] "NA12752" "CEU"
[278,] "NA12043" "CEU"
[279,] "NA12264" "CEU"
sample.id pop EV1 EV2
1 NA19152 YRI -0.08411287 -0.01226860
2 NA19139 YRI -0.08360644 -0.01085849
3 NA18912 YRI -0.08110808 -0.01184524
4 NA19160 YRI -0.08680864 -0.01447106
5 NA07034 CEU 0.03109761 0.07709255
6 NA07055 CEU 0.03228450 0.08155730
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.