R/LocusExtractor.R

Defines functions extractLoci

Documented in extractLoci

## Locus Extractor -- Steven J. Mack April 10, 2020
## v1.0
## The function accepts the two column haps data-frame generated by LDWrap(),
## processes this data-frame and extracts all of the locus information, and then 
## retruns a list object that contains (1) the formatted locus-prefix ($prefix) 
## and (2) the list of unique loci interleaved with loci_1 ($loci) for that datset

#' Extract Locus Information from Supplied Haplotype Data
#' 
#' This function extracts locus information from the haplotype data, and structures it for LDWrap().
#' @param dataSet Data frame of two haplotypes extracted from the famData provided to LDWrap()
#' @note This function is for internal POULD use only.
#' @return List of two vector elements; the locus prefix (if any), e.g. "HLA-", and the interleaved unsuffixed and suffixed locus names (e.g., locus, locus_1)
#' @keywords extractLocus
#' @export
#' @examples #

 extractLoci <- function(dataSet){
 
 # Split the haplotype columns into alleles
 alleles <- rbind(as.data.frame(strsplit(dataSet$V1[1],"~",fixed = TRUE),stringsAsFactors = FALSE,col.names="X"),as.data.frame(strsplit(dataSet$V2[1],"~",fixed = TRUE),stringsAsFactors = FALSE,col.names = "X"))

 # Compile the alleles into a long list
 for(i in 2:nrow(dataSet)){
   alleles <- rbind(alleles, as.data.frame(strsplit(dataSet$V1[i],"~",fixed = TRUE),stringsAsFactors = FALSE,col.names="X"),as.data.frame(strsplit(dataSet$V2[i],"~",fixed = TRUE),stringsAsFactors = FALSE,col.names = "X"))
 }
 
 # Extract the list of unique locus names
 prefixed <- unique(unlist(as.data.frame(strsplit(alleles$X,"*",fixed = TRUE),stringsAsFactors = FALSE)[1,]))
 
 # Extract the locus prefix; assumes that all loci have the same structure, and that a prefix is "<string>-"
 if(regexpr("-",prefixed[1],fixed = TRUE)[1]!=-1){
 prefix <- paste(strsplit(prefixed[1],"-",fixed = TRUE)[[1]][1],"-",sep="")} else {prefix <- ""}
 ## Remove the prefix (if there is one) from the prefixed vector
 loci <- gsub(prefix,"",prefixed)
 # Interleave the unsuffixed locus names with locus_1
 loci <- as.vector(rbind(loci, paste(loci,"_1",sep="")))
 
 ## Create a return list object
 block <- list()
 block$prefix <- prefix
 block$loci <- loci
 
 block
 }

Try the pould package in your browser

Any scripts or data that you put into this service are public.

pould documentation built on Oct. 23, 2020, 7 p.m.