This is just a little background on how and why I went about creating a few small data sets and variables and such that I have used in the course of testing while developing this package.
It is not intended that the test data be actually generated by knitting this
document while building. But it certainly could be run, and an assignment made
and saved to the data
directory.
The data that is described here has all be saved into the list fs_dev_test_data
which
we will declare here:
fs_dev_test_data <- list()
Here we make a data set of 2 SNPs and three individuals
simp <- as.data.frame(rbind( c(1,1, 2,2), c(1,2, 1,1), c(2,2, 1,2) )) rownames(simp) <- paste("Indiv", 1:3, sep="") names(simp) <- make.unique(paste("Loc_", rep(1:2, each=2), sep="")) fs_dev_test_data$simplest_genos <- simp
Here we want something that we can use to test that our data input is working:
# make SNPs that have two of various symbols that might be used # to denote alleles set.seed(5) num.inds <- 20 dat <- lapply(list( c("A","G"), c("T", "C"), c(1,2), c("X", "Y"), c("Vic", "Fam")), function(x) cbind(sample(x, size=num.inds, replace=T), sample(x, size=num.inds, replace=T) ) ) # add some missing data: dat <- lapply(dat, function(x) {x[sample(1:num.inds, size=3, replace=F), ] <- NA; x}) dat <- as.data.frame(dat, stringsAsFactors=F) colnames(dat) <- make.unique(paste("Locus", rep(1:5, each=2), sep="")) rownames(dat) <- paste("Ind", 1:num.inds, sep="") fs_dev_test_data$plain_snp_data <- dat
I want to add in the SNP data that I used when Hans and I were looking at the the psiblings
method. It is
a nice size, about 1400 fish at 95 SNPs or so. We will save the genotype data and the pedigree. This is a little
ugly now, and not portable, because the original is on my hard drive. I must clean this up before distribution,
but I wanted a record of it for now.
# get the data load("/Users/eriq/Documents/work/prj/HansSib/PBT_Data/WSH/data/wsh_data_for_sibship_analysis.Rda") fs_dev_test_data$mykiss_pedigree <- ped.no.parents fs_dev_test_data$mykiss_genos <- genos.no.parents names(fs_dev_test_data$mykiss_genos) <- make.unique(paste("Loc_", rep(1:(ncol(genos.no.parents)/2), each=2), sep=""))
}
First we will disregard half siblings and just make a list of of integer identifiers of the full siblings.
# here are the indices of the kids starting from 0 kidx <- as.integer(factor(fs_dev_test_data$mykiss_pedigree$Kid, levels=as.character(fs_dev_test_data$mykiss_pedigree$Kid))) - 1 sib_list <- split(kidx, paste(fs_dev_test_data$mykiss_pedigree$Pa, fs_dev_test_data$mykiss_pedigree$Ma, sep="--")) ord_sib_list <- sib_list[order(sapply(sib_list, length), -sapply(sib_list, function(x) x[1]), decreasing=T)] # now, put it in a list that holds the base-0 index of the sibship and also the base-0 # indices of the sibship members tmp <- lapply(1:length(ord_sib_list), function(x) list(LMMI_Idx = x-1, Indivs = ord_sib_list[[x]])) names(tmp) <- names(ord_sib_list) fs_dev_test_data$mykiss_sib_list <- tmp
I am going to add one of the chinook data sets in which we inferred siblings from PBT. I really should get a more updated one from Anthony, but for now I am just going to use what I had from the HansSib project.
load("/Users/eriq/Documents/work/prj/HansSib/PBT_Data/FRH/FRH_sib-data.Rda") chinook_pedigree <- as.data.frame(matrix(unlist(strsplit(rownames(genos), split = "--")), byrow=T, ncol=3), stringsAsFactors = F) names(chinook_pedigree) <- c("Kid", "Pa", "Ma") # put the names in there as just the kids chinook_genos <- genos rownames(chinook_genos) <- chinook_pedigree$Kid # here are the number of full sibships each parent is in: num.fss <- table(unlist(strsplit(unique(paste(chinook_pedigree$Ma, chinook_pedigree$Pa, sep="--")), "--"))) # here are the parents that are in more than one full sib group: hssers <- names(num.fss)[num.fss>1] # these are the indices of the kids to keep as they are all full sibs, # (no half sibling relationships in there that we no of) retain <- !((chinook_pedigree$Ma %in% hssers) | (chinook_pedigree$Pa %in% hssers) ) # now, make data sets that should have no half sibs in them chinook_full_sibs_pedigree <- chinook_pedigree[retain, ] chinook_full_sibs_genos <- chinook_genos[retain, ] fs_dev_test_data$chinook_genos <- chinook_genos fs_dev_test_data$chinook_pedigree <- chinook_pedigree fs_dev_test_data$chinook_full_sibs_genos <- chinook_full_sibs_genos fs_dev_test_data$chinook_full_sibs_pedigree <- chinook_full_sibs_pedigree
Though I actually have this commented out:
#save(fs_dev_test_data, file="data/fs_dev_test_data.rda", compress="xz")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.