Nothing
## ----prelim, echo = FALSE, results="hide"-------------------------------------
library( knitr)
opts_chunk$set(cache=TRUE, message = FALSE, comment = "", dev="pdf",
dpi=300, fig.show = "hold", fig.align = "center")
## ----setup1, eval=FALSE-------------------------------------------------------
# install.packages( "stockR")
## ----setup2-------------------------------------------------------------------
library( stockR)
## ----setSeed------------------------------------------------------------------
set.seed( 747) #a 747 is a big plane
## ----simData------------------------------------------------------------------
#number of individuals
N <- 100
#number of markers to measure on each of the individuals
M <- 5000
#number of sampling groups (same as number of individuals)
S <- N
#number of stocks
K <- 3
#simulate the data
myData <- sim.stock.data( nAnimal=N, nSNP=M, nSampleGrps=N, K=K)
## ----lookAtData---------------------------------------------------------------
#the dimensions of the data
dim( myData)
#the number of fish
ncol( myData)
#the number of markers
nrow( myData)
#summary of the stock sizes
table( attributes( myData)$grps)
#the individuals are ordered by stock membership
attributes( myData)$grps
#in this case the third stock is under-represented in the entire data set. On
#average, there will be equal numbers though.
## ----lookAtData2--------------------------------------------------------------
#a quick look at the data -- first 5 markers and 3 individuals
myData[1:5,1:3]
#so, all individuals have two allele copies (homozygous) for the third loci
#the second fish is heterozygous for the allele at the first loci.
#and so on.
## ----findStocks---------------------------------------------------------------
#find the stocks in the data
stocks <- stockSTRUCTURE( myData, K=3) #K-EM estimation is default
#the (posterior) membership probability to each stock
stocks$postProb #shows high discrimination, which could be erroneous (bootstrap soon)
#hard classificaiton of individuals to stocks
stocks$hardClass <- apply( stocks$postProb, 1, which.max)
#Bootstrap to see about uncertainty in assignment
#for serious applications many more than B=25 will be needed.
#only 2 cores used to pass CRAN's arbitrary checks
stocks$boot <- stockBOOT( stocks, B=25, mc.cores=2)
#Assignment accounting for uncertainty.
#Could also look at other model quantities (e.g. allele frequencties)
stocks$uncertClass <- apply( stocks$boot$postProbs, 1:2,
quantile, probs=c(0.05,0.5,0.95))
#careful inspection of this object will give the lower and
#upper 90% confidence intervals and the median
#e.g. for the 99th individual
print( round( stocks$uncertClass[,99,], 3))
#showing that there is some uncertainty about which
#stock this indivudal belongs to.
## ----sampGrps-----------------------------------------------------------------
#number of sampling groups (same as number of individuals)
S <- 15 #there are now 15 sampling groups (100 individuals will be
#distributed between them).
#number of stocks
K <- 3
#simulate the data
myData <- sim.stock.data( nAnimal=N, nSNP=M, nSampleGrps=S, K=K)
#find stocks in data
stocks <- stockSTRUCTURE( myData, K=3,
sample.grps=attributes(myData)$sampleGrps)
#once again, the stocks have been found (up to labelling)
#the simulated values
attributes( myData)$grps
#the data-derived values (as hard clustered)
apply( stocks$postProbs, 1, which.max)
#Bootstrap to see about uncertainty in assignment
#for serious applications many more than B=25 will be needed.
#only 2 cores used to pass CRAN's arbitrary checks
stocks$boot <- stockBOOT( stocks, B=25, mc.cores=2)
#Assignment accounting for uncertainty.
#Could also look at other model quantities (e.g. allele frequencties)
stocks$uncertClass <- apply( stocks$boot$postProbs, 1:2,
quantile, probs=c(0.05,0.5,0.95))
#careful inspection of this object will give the lower and
#upper 90% confidence intervals and the median
print( round( stocks$uncertClass, 3))
#Showing that there is almost no uncertainty about which
#stock indivudals belong to.
#There is lots of information in this data, especially when
#genetic information between individuals in the same sampling
#group is utilised.
## ----miss---------------------------------------------------------------------
myData <- sim.stock.data( nAnimal=N, nSNP=M, nSampleGrps=N, K=K)
#add some missing data to simulation. There are 30% randomly missing scores.
totMark <- prod( dim( myData))
myData[sample( 1:totMark, size=floor( 0.3*totMark))] <- NA
#find stocks in data
stocks <- stockSTRUCTURE( myData, K=3)
#once again, the stocks have been found (up to labelling)
#the simulated values
attributes( myData)$grps
#the data-derived values (as hard clustered)
apply( stocks$postProbs, 1, which.max)
#bootstrap not performed for this example
## ----Tidy, eval=FALSE---------------------------------------------------------
# #THIS IS SUGGESTIVE ONLY.
# #You may wish to tidy your workspace.
# #rm( list=ls())
## ----sessionInfo, results = "asis", echo = FALSE------------------------------
toLatex(sessionInfo())
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.