R/code.R

Defines functions nDom nRec CLCfile gDom gRec VCFfile annot prepvar varfilter commonvar prepvarpar

Documented in annot CLCfile commonvar gDom gRec nDom nRec prepvar prepvarpar varfilter VCFfile

############################################
# NUCLEOTIDE LEVEL                         #
# 1) Dominant (function: nDOM)             #
# 2) Recessive (function: nRec)            #
############################################
# function 1: nDOM (require: stringr!)
# requirements input files:
# Chromosome, Region and Allele, for family: also Zygosity
#' Variant filtering with dominant inheritance
#' @description nDom should be used to filter variants under a dominant mode of 
#'   inheritance.
#' @param x the case(s).
#' @param y optional, the control(s).
#' @param family optional, "P-F" or "Ps-F" mode, indicating a (healthy) 
#'   parent(s)- (affected) progeny relation.
#' @details nDom checks for common variants within cases. If variants from 
#'   control(s) are present, they can be used to filter variants from the cases.
#'   In addition, various detectance and penetrance parameters can be specified 
#'   during the processing.
#'   
#'   If family is specified, penetrance is assumed to be complete. In addition,
#'   the causal variant is assumed to be heterozygous in the case.
#' @author Bart Broeckx
#' @examples
#' output <- nDom("CLCfile1", "CLCfile2"  )
#' output
#' output <- nDom(c("CLCfile1","CLCfile2"))
#' output
#' 

nDom <- function(x,y,family)
{ # general prints
  Text <- paste("Number of cases: ", length(x), "\n", sep="")
  cat(Text)
  if (!missing(y)){
    Text <- paste("Number of controls: ", length(y), "\n", sep="")
  } else {
    Text <- paste("Number of controls: 0", "\n")
  }
  cat(Text)
  if (!missing(family)) { 
    if (family == "P-F" && length(x) == "1" && length(y) == "1" ){
      Text <- paste("P-F mode: 1 healthy parent, 1 affected progeny", "\n", sep="")
      cat(Text)
    } else if (family == "Ps-F" &&  length(x) == "1" && length(y) == "2" ) {
      Text <- paste("Ps-F mode: 2 healthy parents, 1 affected progeny", "\n", sep="")
      cat(Text)
    } else {
      stop("family not correct: check number of cases/controls or selected inheritance") 
    }
  }
  
  # first x-vector (cases!!)
  output=NULL
  for (i in 1:length(x)){
    nameValue<-get(x[[i]])
    nameValue$comb <-paste(nameValue$Chromosome,nameValue$Region, nameValue$Allele, sep=" ")
    output <-rbind(output, nameValue)
    Text <- paste("Finished case",i, "\n" )
    cat(Text)
  }
  Freq <-as.data.frame(table(output$comb), stringsAsFactors=FALSE)
  # which detectance level is required?
  if (missing(family)) {
    Text <- paste("Which detectance level is required?", "\n" )
    cat(Text)
    for (i in 1:length(x)) {
      Text <- paste("level (",i,"): ", i/length(x)*100,"%"," (",i,"/",length(x),")","\n", sep="")
      cat(Text)
      
    }
    leveldetect <- as.numeric(readline("Choose level:"))
  }
  # followed by y vector (controls!!)
  if (!missing(y) ) {
    output=NULL
    for (i in 1:length(y)){
      nameValuey<-get(y[[i]])
      nameValuey$comb <-paste(nameValuey$Chromosome,nameValuey$Region, nameValuey$Allele, sep=" ")
      output <-rbind(output, nameValuey)
      Text <- paste("Finished control",i, "\n" )
      cat(Text)
    }
    Freq2 <-as.data.frame(table(output$comb), stringsAsFactors=FALSE)
    
    # next, which penetrance level is required?
    if (missing(family)) {
      Text <- paste("Which penetrance level is required?", "\n" )
      cat(Text)
      for (i in 0:length(y)) {
        Text <- paste("level (",i,"): ", round(leveldetect/(leveldetect+i)*100),
                      "%", " (",leveldetect,"/(",leveldetect,"+",i,"))" ,"\n", sep="")
        cat(Text)
      }
      level <- as.numeric(readline("Choose level:"))
      id <-which(Freq2$Freq >= (level+1)) 
    } else if (family == "P-F" | family == "Ps-F") { 
      id <- which(Freq2$Freq >= "1") 
    }
    # exlcusion of those variants dependent on the penetrance level
    if (length(id) > 0) { Freq2 <- Freq2[id,]
                          
                          # exclusion of those variants present in the controls
                          id <- which(Freq$Var1 %in% Freq2$Var1)
                          if (length(id) > 0) {Freq<- Freq[-id,]}
    }
  }
  # application of previously chosen detectance level
  if (missing(family)) {
    id <-which(Freq$Freq >= leveldetect)
  } else if (family == "P-F" | family == "Ps-F") { 
    id <- which(nameValue$comb %in% Freq$Var1)
    Zygosity <-nameValue$Zygosity
    Zygosity <-Zygosity[id]
    Freq <-cbind(Freq, Zygosity)              
    id <-which(Freq$Zygosity== "Heterozygous")          
    Freq <-Freq[id,]
    id <- which(Freq$Freq >= "1")   
  }
  
  if (length(id) > 0){
    q <-as.data.frame(stringr::str_split_fixed(Freq[id,1]," ",3), stringsAsFactors=FALSE) 
    q <-cbind(q,Freq[id,2])
    colnames(q) <- c("Chromosome", "Region", "Allele","Number of Samples")
    
    q
  } else {
    Text <- "No variants retained"
    cat(Text)}
  
  
}


##############################################
# function 2: nREC (require stringr!)
# requirements input files:
# Chromosome, Region and Allele AND Zygosity
#' Variant filtering with recessive inheritance
#' @description nRec should be used to filter variants under a recessive mode of 
#'   inheritance.
#' @param x the case(s).
#' @param y optional, the control(s).
#' @param family optional, "P-F" or "Ps-F" mode, indicating a (healthy) 
#'   parent(s)- (affected) progeny relation.
#' @details nRec checks for common variants that are homozygous in the cases. If variants from  
#'   control(s) are present, they can be used to filter variants from the cases. However, only homozygous variants are used to filter.
#'   In addition, various detectance and penetrance parameters can be specified 
#'   during the processing.
#'   
#'   If family is specified, penetrance is assumed to be complete. In addition,
#'   the causal variant is assumed to be homozygous in the case and heterozygous in the parent(s).
#' @author Bart Broeckx
#' @examples
#' output <- nRec("CLCfile1", "CLCfile2")
#' output
#' output <- nRec(c("CLCfile1","CLCfile2"))
#' output


nRec <- function(x,y, family)
{
  Text <- paste("Number of cases: ", length(x), "\n", sep="")
  cat(Text)
  if (!missing(y)){
    Text <- paste("Number of controls: ", length(y), "\n", sep="")
  } else {
    Text <- paste("Number of controls: 0", "\n")
  }
  cat(Text)
  if (!missing(family)) {
    if (family == "P-F" && length(x) == "1" && length(y) == "1" ){
      Text <- paste("P-F mode: 1 healthy parent, 1 affected progeny", "\n", sep="")
      cat(Text)
    } else if (family == "Ps-F" &&  length(x) == "1" && length(y) == "2" ) {
      Text <- paste("Ps-F mode: 2 healthy parents, 1 affected progeny", "\n", sep="")
      cat(Text)
    } else {
      stop("family not correct: check number of cases/controls or selected inheritance") 
    }
  }
  # first x-vector (cases!!)
  output=NULL
  for (i in 1:length(x)){
    nameValue<-get(x[[i]])
    id <- which(nameValue$Zygosity == "Homozygous")
    nameValue <- nameValue[id,]
    nameValue$comb <-paste(nameValue$Chromosome,nameValue$Region, nameValue$Allele, sep=" ")
    output <-rbind(output, nameValue)
    Text <- paste("Finished case",i, "\n" )
    cat(Text)
  }
  
  Freq <-as.data.frame(table(output$comb), stringsAsFactors=FALSE)
  
  # which detectance level is required?
  if (missing(family)) {
    
    Text <- paste("Which detectance level is required?", "\n" )
    cat(Text)
    for (i in 1:length(x)) {
      Text <- paste("level (",i,"): ", i/length(x)*100,"%"," (",i,"/",length(x),")","\n", sep="")
      cat(Text)
      
    }
    leveldetect <- as.numeric(readline("Choose level:"))
  } 
  
  # followed by y vector (controls!!)
  if (!missing(y) ) {
    output=NULL
    for (i in 1:length(y)){
      nameValuey<-get(y[[i]])
      id <- which(nameValuey$Zygosity == "Homozygous")
      nameValuey <- nameValuey[id,]
      nameValuey$comb <-paste(nameValuey$Chromosome,nameValuey$Region, nameValuey$Allele, sep=" ")
      output <-rbind(output, nameValuey)
      Text <- paste("Finished control",i, "\n" )
      cat(Text)
    }
    Freq2 <-as.data.frame(table(output$comb), stringsAsFactors=FALSE)
    
    # next, which penetrance level is required?
    if (missing(family)) {
      Text <- paste("Which penetrance level is required?", "\n" )
      cat(Text)
      for (i in 0:length(y)) {
        Text <- paste("level (",i,"): ", round(leveldetect/(leveldetect+i)*100),
                      "%", " (",leveldetect,"/(",leveldetect,"+",i,"))" ,"\n", sep="")
        cat(Text)
      }
      level <- as.numeric(readline("Choose level:"))
      id <-which(Freq2$Freq >= (level+1)) 
      
    } else if (family == "P-F" | family == "Ps-F") { 
      id <- which(Freq2$Freq >= "1") 
    }
    
    # exclusion of those variants present in the controls
    if (length(id) > 0) { Freq2 <- Freq2[id,]
                          
                          # exclusion of those variants present in the controls
                          id <- which(Freq$Var1 %in% Freq2$Var1)
                          if (length(id) > 0) {Freq<- Freq[-id,]}
    }
  }
  
  # application of detectance level
  if (missing(family)) {
    id <-which(Freq$Freq >= leveldetect)
  } else if (family == "P-F" ) { 
    output=NULL
    for (i in 1:length(y)){
      nameValuez<-get(y[[i]])
      id <- which(nameValuez$Zygosity == "Heterozygous")
      nameValuez <- nameValuez[id,]
      nameValuez$comb <-paste(nameValuez$Chromosome,nameValuez$Region, nameValuez$Allele, sep=" ")
      output <-rbind(output, nameValuez)
    }  
    Freq2 <-as.data.frame(table(output$comb), stringsAsFactors=FALSE)
    id <- which(Freq$Var1 %in% Freq2$Var1)
    
  } else if (family == "Ps-F"){
    output=NULL
    for (i in 1:length(y)){
      nameValuez<-get(y[[i]])
      id <- which(nameValuez$Zygosity == "Heterozygous")
      nameValuez <- nameValuez[id,]
      nameValuez$comb <-paste(nameValuez$Chromosome,nameValuez$Region, nameValuez$Allele, sep=" ")
      output <-rbind(output, nameValuez)
    }  
    Freq2 <-as.data.frame(table(output$comb), stringsAsFactors=FALSE)
    id <- which(Freq2$Freq == "2")
    Freq2 <- Freq2[id,]
    id <- which(Freq$Var1 %in% Freq2$Var1)
  }
  
  if (length(id) > 0){
    q <-as.data.frame(stringr::str_split_fixed(Freq[id,1]," ",3), stringsAsFactors=FALSE) 
    q <-cbind(q,Freq[id,2])
    colnames(q) <- c("Chromosome", "Region", "Allele","Number of Samples")
    q
  } else {
    Text <- "No variants retained"
    cat(Text)}
  
}



############################################
# GENE LEVEL (or other grouping var)       #
# 1) CLCfile                               #
# 2) Dominant (function: gDOM)             #
# 3) Recessive (function: gRec)            #
# 4) vcffile                               #
############################################
# Function 1: CLCfile (require stringr!)
# preparation of CLC files for gDom and gRec
# should be done SAMPLE PER SAMPLE!
#' Prepare CLC output for filtering
#' 
#' @description CLCfile should be used to process CLC output prior downstream
#'   analysis with gDom and gRec.
#' @param x the data frame to be processed.
#' @param y the column name containing the grouping variable.
#' @param multiple does one line contain >1 group? If true, specify split.
#' @param split the split that separates the group in one line.
#' @details This function is specifically designed to process the output from 
#'   CLC genomics workbench. It should be used before the variant filters gDom 
#'   and gRec are used to emphasize the grouping (e.g. gene, exon) to be used in
#'   downstream analysis. As often is the case, a variant might belong to >1 
#'   group (e.g. 2 genes). If so, the file is automatically reformatted to 
#'   assure that each line contains only one variant per group, if multiple is 
#'   TRUE and the split is specified. If multiple is FALSE, only the first part 
#'   of the y-column is maintained.This function should not be used for nDom and
#'   nRec.
#' @author Bart Broeckx 
#' @examples
#' CLCfile1proc <-CLCfile(CLCfile1, "Coding.region.change", TRUE, "; ")
#' CLCfile2proc <-CLCfile(CLCfile2, "Coding.region.change", TRUE, "; ")

CLCfile <- function(x,y, multiple,split)
{ # splitting up lines  
  if (!missing(multiple)) {
    if (multiple == "TRUE") {
      if (!missing(split)) {
        group <-x[,y]  
        group <-unlist(strsplit(group, split))
        repeats  <- 1+stringr::str_count(x[,y], split)
        x <-x[rep(seq_len(nrow(x)),repeats),]
        group <-gsub(":.*", "", group)
        output <-cbind(x,group)
      }
    }
  }
  # without split 
  if (missing(multiple) | multiple == "FALSE") {
    group <-x[,y]
    group <-gsub(":.*", "", group)
    output<-cbind(x,group)
  }
  output
}

##############################################
# Function 2: gDOm (require stringr!)
# filtering of previously processed files
# can be done for groups of samples
#' Variant filtering with dominant inheritance considering functional unit.
#' @description gDom should be used to filter variants under a dominant mode of 
#'   inheritance when common functional units (a gene, an exon, ...) should be considered.
#' @param x the case(s).
#' @param y optional, the control(s).
#' @details gDom checks for common functional units within cases. If variants from 
#'   control(s) are present, they can be used to filter variants from the cases.
#'   In addition, various detectance and penetrance parameters can be specified 
#'   during the processing. 
#'   
#'   Before using this function, each file from each case should have been processed by CLCfile or VCFfile with annot.  
#' @author Bart Broeckx
#' @examples
#' CLCfile1proc <-CLCfile(CLCfile1, "Coding.region.change", TRUE, "; ")
#' CLCfile2proc <-CLCfile(CLCfile2, "Coding.region.change", TRUE, "; ")
#'  output <- gDom("CLCfile1proc", "CLCfile2proc")
#'  output
#'  output <- gDom(c("CLCfile1proc","CLCfile2proc"))
#'  output




gDom <- function(x,y)
{ # first some general remarks
  Text <- paste("Number of cases: ", length(x), "\n", sep="")
  cat(Text)
  if (!missing(y)){
    Text <- paste("Number of controls: ", length(y), "\n", sep="")
    cat(Text)
  }
  # which detectance level is required?
  
  Text <- paste("Which detectance level is required?", "\n" )
  cat(Text)
  for (i in 1:length(x)) {
    Text <- paste("level (",i,"): ", i/length(x)*100,"%"," (",i,"/",length(x),")","\n", sep="")
    cat(Text)
  }
  leveldetect <- as.numeric(readline("Choose level:"))
  if (!missing(y)){
    # followed by y vector (controls!!)
    outputy=NULL
    for (i in 1:length(y)){
      nameValuey<-get(y[[i]])
      nameValuey$comb <-paste(nameValuey$Chromosome,nameValuey$Region, nameValuey$Allele, sep=" ")
      id <- which(duplicated(nameValuey$comb))
      if (length(id) >0){
        nameValuey <- nameValuey[-id,]
      }
      outputy <-rbind(outputy, nameValuey)
      Text <- paste("Finished control",i, "\n" )
      cat(Text)
    }
    Freq2 <-as.data.frame(table(outputy$comb), stringsAsFactors=FALSE)
    # next, which penetrance level is required?
    Text <- paste("Which penetrance level is required?", "\n" )
    cat(Text)
    for (i in 0:length(y)) {
      Text <- paste("level (",i,"): ", round(leveldetect/(leveldetect+i)*100),
                    "%", " (",leveldetect,"/(",leveldetect,"+",i,"))" ,"\n", sep="")
      cat(Text)
    }
    levelpenet <- as.numeric(readline("Choose level:"))
    id <-which(Freq2$Freq >= (levelpenet+1)) 
    
    
    # exclusion of those variants dependent on the penetrance level
    Freq2 <- Freq2[id,]
    
  }else {
    Text <- paste("Number of controls: 0", "\n")
  }
  
  
  
  # filtering:
  # first x-vector (cases!!)
  output=NULL
  variantoutput=NULL
  for (i in 1:length(x)){
    nameValue<-get(x[[i]])
    nameValue$comb <-paste(nameValue$Chromosome,nameValue$Region, nameValue$Allele, sep=" ")
    
    
    # exclusion of those variants present in the controls
    if (!missing(y) && nrow(Freq2) != "0") {
      
      id <- which(nameValue$comb %in% Freq2$Var1)
      if (length(id) > 0) {nameValue<- nameValue[-id,]}
    }
    # intermezzo: grouping
    Freq <-as.data.frame(table(nameValue[,"group"]), stringsAsFactors=FALSE)
    id <- which(Freq$Freq >=1)
    Freq <- Freq[id,]
    output <-c(output, Freq$Var1)
    # variantsretained
    nameValue$overall <-paste(nameValue$comb, nameValue[,"group"], sep=" / ")
    variantoutput<-c(variantoutput, nameValue$overall)
    
    Text <- paste("Finished case",i, "\n" )
    cat(Text)
    
  }
  Freq <- as.data.frame(table(output))
  variants <-as.data.frame(table(variantoutput))
  # application of detectance 
  # followed by final filtering
  
  id <-which(Freq$Freq >= leveldetect)
  if (length(id) > 0){
    q <-Freq[id,"output"]
    r <-as.data.frame(stringr::str_split_fixed(variants[,1]," / ",2), stringsAsFactors=FALSE) 
    
    id <- which(r[,2] %in% q)
    r <- r[id,]
    r<-cbind(as.data.frame(stringr::str_split_fixed(r[,1]," ",3), stringsAsFactors=FALSE),r[,2]) 
    r <- cbind(r,variants[id,"Freq"])
    colnames(r) <- c("Chromosome", "Region", "Allele", "Group", "Number of samples")
    r
    Text <- paste("Number of variants retained: ", nrow(r), "\n", sep="")
    cat(Text)
    Text <- paste("Number of genes retained: ", nrow(as.data.frame(table(r$Group))), "\n", sep="")
    cat(Text)
    r
  } else {
    Text <- "None retained"
    cat(Text)}
}


##############################################
# Function 3: gRec
# to be used after filtering of previously processed files
#'Variant filtering with recessive inheritance considering functional unit.
#'@description gRec should be used to filter variants under a recessive mode of 
#'  inheritance when common functional units (a gene, an exon, ...) should be 
#'  considered.
#'@param x the case(s).
#'@param y optional, the control(s).
#'@param list logical, specify whether a list should be returned (TRUE) or not
#'  (false).
#'@details gRec checks for common functional units within cases. For a unit to 
#'  be retained, it should have at least one homozygous non-reference variant or
#'  at least be compound heterozygous.  If variants from control(s) are present,
#'  they can be used to filter variants from the cases. Homozygous variants 
#'  present in the controls will be used to filter variants from the cases 
#'  immediately. Pairwise combinations of heterozygous variants present in the
#'  controls can also be used to filter variants in the cases. In addition,
#'  various detectance and penetrance parameters can be specified during the
#'  processing.
#'  
#'  Before using this function, each file from each case should have been 
#'  processed by CLCfile or VCFfile with annot.
#'  
#'  The parameter list allows you to specify whether the output should contain
#'  only the retained variants and genes (FALSE) only or together with the
#'  compound heterozygous variants per sample (TRUE). This parameter is only
#'  useful when you have both case(s) and control(s) and when compound
#'  heterozygous variants were present in the controls. For further processing
#'  with commonvar, list should be FALSE.
#'@author Bart Broeckx
#' @examples 
#' CLCfile1proc <-CLCfile(CLCfile1, "Coding.region.change", TRUE, "; ")
#' CLCfile2proc <-CLCfile(CLCfile2, "Coding.region.change", TRUE, "; ")
#'output <- gRec("CLCfile1proc", "CLCfile2proc", TRUE)
#'output
#'output <- gRec(c("CLCfile1proc","CLCfile2proc"),, TRUE)
#'output
gRec <- function(x,y,list)
{ # first some general remarks
  if (missing(list)) {
    stop("specify list")
  }
  Text <- paste("Number of cases: ", length(x), "\n", sep="")
  cat(Text)
  if (!missing(y)){
    Text <- paste("Number of controls: ", length(y), "\n", sep="")
    cat(Text)
  }
  Text <- paste("Which detectance level is required?", "\n" )
  cat(Text)
  for (i in 1:length(x)) {
    Text <- paste("level (",i,"): ", i/length(x)*100,"%"," (",i,"/",length(x),")","\n", sep="")
    cat(Text)
  }
  leveldetect <- as.numeric(readline("Choose level:"))
  
  
  if (!missing(y)){
    # followed by y vector (controls!!)
    homo=NULL
    hetero=NULL
    for (i in 1:length(y)){
      nameValuey<-get(y[[i]])
      nameValuey$comb <-paste(nameValuey$Chromosome,nameValuey$Region, nameValuey$Allele, sep=" ")
      id <- which(nameValuey$Zygosity =="Homozygous")
      homozygous <- unique(nameValuey[id, "comb"])
      heterozygous <- nameValuey[-id,]
      # following line has been added
      heterozygous$group <- factor(heterozygous$group)
      # previous line has been added
      pairwisecontrol = NULL
      for (z in 1:length(levels(heterozygous[, "group"]))) {
        Test <- subset(heterozygous[, "comb"], heterozygous[,"group"]==levels(heterozygous[,"group"])[z])
        if (length(Test)>1){
          a <-combn(Test,2, paste, collapse=" / ")
          pairwisecontrol <- c(pairwisecontrol,a)}
        
      }
      hetero <-c(hetero, unique(pairwisecontrol))
      homo <-c(homo, homozygous)
      
      Text <- paste("Finished control",i, "\n" )
      cat(Text)
    }
    Freqhomo <-as.data.frame(table(homo), stringsAsFactors=FALSE)
    Freqhetero <-as.data.frame(table(hetero), stringsAsFactors=FALSE)
    
    # next, which penetrance level is required?
    Text <- paste("Which penetrance level is required?", "\n" )
    cat(Text)
    for (i in 0:length(y)) {
      Text <- paste("level (",i,"): ", round(leveldetect/(leveldetect+i)*100),
                    "%", " (",leveldetect,"/(",leveldetect,"+",i,"))" ,"\n", sep="")
      cat(Text)
    }
    levelpenet <- as.numeric(readline("Choose level:"))
    id <-which(Freqhomo$Freq >= (levelpenet+1)) 
    id2 <-which(Freqhetero$Freq >= (levelpenet+1)) 
    
    # exclusion of those variants dependent on the penetrance level
    Freqhomo <- Freqhomo[id,]
    Freqhetero <- Freqhetero[id2,]
    
  }else {
    Text <- paste("Number of controls: 0", "\n")
  }
  
  # filtering:
  # first x-vector (cases!!)
  output=NULL
  variantoutput=NULL
  pairwisecombinations <- vector(mode="list", length=length(x))
  names(pairwisecombinations) <- x
  for (s in 1:length(x)){
    nameValue<-get(x[[s]])
    nameValue$comb <-paste(nameValue$Chromosome,nameValue$Region, nameValue$Allele, sep=" ")
    # two step filtering: 
    # filter out all the homozygous variants present in the controls:
    if (!missing(y) && nrow(Freqhomo) != "0") {
      
      id <- which(nameValue$comb %in% Freqhomo$homo)
      if (length(id) > 0) {nameValue<- nameValue[-id,]}
    }
    # give homozy value 2, hetero value 1 for further filtering
    id <- which(nameValue$Zygosity == "Homozygous")
    Zygo <- rep(2, length(id))
    homo <- cbind(nameValue[id,], Zygo)
    Zygo <- rep(1, nrow(nameValue)-nrow(homo))
    hetero <- cbind(nameValue[-id,], Zygo)
    
    
    
    # second filtering: filter out pairwise combinations
    # generate pairwise combn
    
    if (!missing(y) && nrow(Freqhetero) != "0") { 
      pairwisecase = NULL
      # the following line has been added:
      hetero$group<- factor(hetero$group)
      # the previous line has been added
      for (t in 1:length(levels(hetero[, "group"]))) {
        Test <- subset(hetero[, "comb"], hetero[,"group"]==levels(hetero[,"group"])[t])
        if (length(Test)>1){
          a <-combn(Test,2, paste, collapse=" / ")
          pairwisecase <- c(pairwisecase,a)}
        
      }
      # filterstep:
      
      id <- which(pairwisecase %in% Freqhetero$hetero)
      if (length(id) == 0) {
        heteroretained <- unique(unlist(strsplit(pairwisecase, " / ")))
        id <- which(hetero$comb %in% heteroretained)
        hetero <- hetero[id,]
      } else if(length(id) > 0) {
        if (length(unique(id)) != length(pairwisecase)) {
          # the next line comma has been removed
          pairwisecase<- pairwisecase[-id]
          # the previous line comma has been removed
          heteroretained <- unique(unlist(strsplit(pairwisecase, " / ")))
          id <- which(hetero$comb %in% heteroretained)
          hetero <- hetero[id,]
        } else if (length(unique(id)) == length(pairwisecase)){
          hetero=NULL
          pairwisecase <- "none"
        }
      }
    }
    endvariants <-rbind(homo, hetero)
    if (!missing(y) && nrow(Freqhetero) != "0") {
      pairwisecombinations[[s]] <- pairwisecase }
    
    
    # followed by grouping per "gene" (defined as group)
    grouping = NULL
    for (i in 1:length(levels(endvariants[,"group"]))) {
      Test <- subset(endvariants, endvariants[,"group"]==levels(endvariants[,"group"])[i])
      if (nrow(Test) > 0) {
        count <-sum(Test$Zygo)
        Gene <-levels(endvariants[,"group"])[i]
        c <- data.frame(count,Gene)
        grouping <- rbind(grouping,c)}
      
    }
    
    # followed by selection of those "genes" with count >=2
    id <- which(grouping$count >= 2)
    grouping <- grouping[id,2]
    
    # selection of those genes with a count of >=2 from the original dataframe:
    id <- which(endvariants[,"group"] %in% grouping)
    endvariants <-endvariants[id,]
    
    Freq <-as.data.frame(table(endvariants[,"group"]), stringsAsFactors=FALSE)
    id <- which(Freq$Freq > 0)
    Freq <- Freq[id,]
    output <-c(output, Freq$Var1)
    # variantsretained
    endvariants$overall <-paste(endvariants$comb, endvariants[,"group"], sep=" / ")
    variantoutput<-c(variantoutput, endvariants$overall)
    
    Text <- paste("Finished case",s, "\n" )
    cat(Text)
    
  } # end of for loop x
  Freq <- as.data.frame(table(output))
  variants <-as.data.frame(table(variantoutput))
  
  id <-which(Freq$Freq >= leveldetect)
  if (length(id) > 0){
    q <-Freq[id,"output"]
    r <-as.data.frame(stringr::str_split_fixed(variants[,1]," / ",2), stringsAsFactors=FALSE) 
    
    id <- which(r[,2] %in% q)
    r <- r[id,]
    filteringpairwisecombinations<- r[,1]
    r<-cbind(as.data.frame(stringr::str_split_fixed(r[,1]," ",3), stringsAsFactors=FALSE),r[,2]) 
    r <- cbind(r,variants[id,"Freq"])
    colnames(r) <- c("Chromosome", "Region", "Allele", "Group", "Number of samples")
    r
    Text <- paste("Number of variants retained: ", nrow(r), "\n", sep="")
    cat(Text)
    Text <- paste("Number of genes retained: ", nrow(as.data.frame(table(r$Group))), "\n", sep="")
    cat(Text)
    
    if (list == "TRUE" && !missing(y) && nrow(Freqhetero) != "0"){
      resultlist <- vector(mode="list", length=length(pairwisecombinations)+1)
      resultlist[[1]] <- r
      
      varend <- vector(mode="list", length=length(x))
      
      for (i in 1:length(pairwisecombinations)){
        var<-pairwisecombinations[[i]]
        vardef=NULL
        for (w in 1:length(var)){
          varret<- var[w]
          substr<-unlist(strsplit(varret, " / "))
          presentone<-substr[1] %in% filteringpairwisecombinations
          presenttwo <-substr[2] %in% filteringpairwisecombinations
          if (presentone == TRUE | presenttwo == TRUE) {
            vardef <- c(vardef,varret)
          }
        }
        if (length(vardef) > 0) {
          varend[[i]] <- vardef
        } else {
          varend[[i]] <- "none" 
        }
      }
      
      resultlist[2:length(resultlist)] <- varend
      allnames<- c("endresult", names(pairwisecombinations))
      names(resultlist)<- allnames
      resultlist
      
    } else if (list == "FALSE" | missing(y)) {r }
  } else {
    Text <- "None retained"
    cat(Text)}
  
}


##############################################
# Function 4: vcffile
# to be used after filtering of previously processed files

#' Prepare VCF file for filtering.
#' @description VCFfile should be used to process VCF files prior downstream 
#'   analysis with any other function.
#' @param x the VCF file to be processed.
#' @param sample specify the column name containing the variant info for the 
#'   sample of interest.
#' @param filter logical, should the variants have a certain quality value in the 7th 
#'   mandatory column of the VCF format? 
#' @param value optional, which variants should be retained? (e.g. PASS)
#' @details This function should be used to process VCF files to make them 
#'   compatible for further filtering. If wanted, a preliminary filtering can be
#'   performed by specifiying filter = TRUE and a value in "value". As VCF files
#'   can contain variant information from >1 sample, the column name of the
#'   sample of interest should be specified.
#' @author Bart Broeckx
#'   @examples 
#'   # test 1: filter = TRUE
#'   x <- test
#'   sample <- "V10"
#'   filter <- TRUE
#'   value <- "PASS"
#'   VCFfile(x,sample, filter, value)
#'   
#'   # test 2: filter = FALSE
#'   x <- test
#'   sample <- "V10"
#'   filter <- FALSE
#'   VCFfile(x,sample, filter)


VCFfile <-function(x,sample,filter,value){ 
  id <- which(is.na(x[, sample]))
  if (length(id) >=1) {
    x <- x[-id,]
  } 
  if(filter == TRUE){
    if(!missing(value)){
      id <- which(x[,7] == value)
      x <-x[id,]
    }
  }
  # standard fields
  y <- x[,c(1,2,5,9)]
  # add the field containing the sample
  x <- x[,sample]
  x <- cbind(y,x)
  # start reformatting
  output = NULL
  for(i in 1:nrow(x)){
    var <- x[i,]
    format <-unlist(strsplit(as.character(var[,4]), ":"))
    id <-which(format == "GT")
    variant <-unlist(strsplit(as.character(var[,"x"]), ":"))
    variant <- variant[id]
    splitvariants <-unlist(strsplit(variant, "/"))
    variant1 <- as.numeric(splitvariants[1])
    variant2 <- as.numeric(splitvariants[2])
    Zygosity<- ifelse(variant1 == variant2,"Homozygous", "Heterozygous")
    allele <-unlist(strsplit(as.character(var[,3]),","))
    allele1 <- allele[variant1]
    allele2 <- allele[variant2]
    
    final2  <- cbind(var[,c(1:2)], allele2, Zygosity)
    colnames(final2) <- c("Chromosome", "Region", "Allele", "Zygosity")
    if (Zygosity == "Homozygous") {
      end <- final2
      
    } else  if (Zygosity == "Heterozygous"){ 
      if (variant1 != "0") {
        final1  <-cbind(var[,c(1:2)], allele1, Zygosity)   
        colnames(final1) <- c("Chromosome", "Region", "Allele", "Zygosity")
        end <-rbind(final1,final2)
      } else if (variant1 == "0"){
        end <- final2
      }
    } 
    
    
    output <- rbind(output, end)
  }
  output
}  

############################################
# Annotation of a file and variant filter  #
# 1) Annot                                 #
# 2) Varfilter                             #
############################################
# werkende ANNOT functie.
#'Annotate variant data frame.
#'
#'@description annot can be used to annotate a data frame containing sequencing 
#'  variants.
#'@param x the data frame to be annotated.
#'@param y the data frame containing the annotation.
#'@param type specify whether y is a BED or GTF file.
#'@param nomatch should variants located outside any of the regions found in the
#'  annotation data frame be removed (FALSE) or withheld (TRUE).
#'@param CLC logical, origin of the data frame to be annotated is CLC genomics 
#'  workbench?
#'@details This function can be used to annotate data frames. For further 
#'  processing by gDom, gRec and commonvar, annotation is necessary. For nDom 
#'  and nRec, annotation is optional. Both variant files from CLC Genomics 
#'  Workbench and VCF files (after VCFfile) can be processed. If a variant can 
#'  be allocated to >1 group (a gene, exon or other), the variant information is
#'  spread over n rows (with n the number of groups).
#'  
#'  If the file originates from the CLC genomics workbench (directly or after
#'  processing with nDom or nRec), always specify CLC is TRUE. This is important
#'  as CLC specifies chromosomal location for MNVs in a different way than VCF.
#'@author Bart Broeckx
#'  @examples 
#'  data(test)
#'  data(CLCfile1)
#'  data(genBED)
#'  # for VCF file
#'  # general remark: make sure the chromosome is a character, not a factor: e.g.: 
#'  test$V1<-as.character(test$V1)
#'  # prepare the VCF file
#'  out <-VCFfile(test,"V10", TRUE, "PASS")
#'  # produce an annotated VCF file with all variants retained:
#'  AnnotCLCfile1 <- annot(out, genBED, type="BED", nomatch=TRUE, CLC=FALSE)
#'  # produce an annotated VCF file with all variants retained:
#'  AnnotCLCfile2 <- annot(out, genBED, type="BED", nomatch=FALSE, CLC=FALSE)
#'  # produces an annotated CLC file with all variants retained.
#'  AnnotCLCfile3 <- annot(CLCfile1, genBED, type="BED", nomatch=TRUE, CLC=TRUE)
#'  # produces an annotated CLC file with only the variants that were allocated 
#'  # to a gene being retained.
#'  AnnotCLCfile4 <- annot(CLCfile1, genBED, type="BED", nomatch=FALSE, CLC=TRUE)
#'  

annot <- function(x, y, type, nomatch, CLC){
  if (missing(x) | missing(y) | missing(type)){
    stop("specify x, y and type")
  } else {
    # preparatory phase 
    if  (CLC == TRUE){
      
      idregion <-grep("[:^:]", x$Region)
      if (length(idregion) >=1){
        originalregion <- x[idregion,"Region"]
        x$Region <- gsub("[:^:].*", "", x$Region)  
      }
      
      idregion2 <- grep("[:..:]", x$Region)
      if (length(idregion2) >=1){
        originalregion2 <- x[idregion2, "Region"]
        x$Region <- gsub("[:..:].*", "", x$Region)
        
      }
      
    }
    
    x[,1]<-as.factor(x[,1])
    x[,2]<- as.numeric(x[,2])
    
    
    if (type == "BED") {
      y[,2]<- y[,2] + 1 
    }
    output <- data.frame()
    for (i in 1:nrow(x)){
      linei <- x[i,]
      id <- which(linei[,1] == y[,1])
      subset <- y[id,]
      if (type == "BED") {
        id <- which(linei[,2] >= subset[,2] & linei[,2] <= subset[,3]) 
        if  (CLC == TRUE){
          if (length(idregion) >=1 | length(idregion2) >=1 ){
            if (length(idregion)>=1){ 
            for (f in 1:length(idregion)){
              if (i == idregion[f]){ 
                linei[,2]<-originalregion[f]
                
              }  
            }
          }
          if (length(idregion2)>=1){
            for (g in 1:length(idregion2)){
              if (i == idregion2[g]){ 
                linei[,2]<-originalregion2[g]
                
              }  
            }
          }
          }
        }
        
        if (length(id)>0){
          annotation<-as.vector(unlist(subset[id, 4]))
          nrep <- length(annotation)  
          replicated<- linei[rep(seq_len(nrow(linei)),nrep),]
          result <-cbind(replicated, annotation)
          
          output <-rbind(output, result)
        } else {
          if (nomatch == "TRUE"){
            annotation  <- "NOT FOUND"
            result <- cbind(linei, annotation)
            output <-rbind(output, result)
          } 
        }
      } else if (type== "GTF"){
        id <- which(linei[,2] >= subset[,4] & linei[,2] <= subset[,5]) 
        if  (CLC == TRUE){
          if (length(idregion) >=1 | length(idregion2) >=1 ){
            if (length(idregion)>=1){ 
            for (f in 1:length(idregion)){
              if (i == idregion[f]){ 
                linei[,2]<-originalregion[f]
                
              }  
            }
          }
          if (length(idregion2)>=1){   
          for (g in 1:length(idregion2)){
              if (i == idregion2[g]){ 
                linei[,2]<-originalregion2[g]
                
              }  
            }
          }
          }        
        }
        if (length(id)>0){
          annotation<-as.vector(unlist(subset[id, 10]))
          nrep <- length(annotation)  
          replicated<- linei[rep(seq_len(nrow(linei)),nrep),]
          result <-cbind(replicated, annotation)
        } else {
          if (nomatch == "TRUE"){
            annotation  <- "NOT FOUND"
            result <- cbind(linei, annotation)
            output <-rbind(output, result)
          } 
        }
      }
      
    }
    
  }
  
  output
}

#########################################
#' Preparing variant database.
#'
#'@description prepvar is used to prepare a variant database for further
#'  filtering with varfilter.
#'@param y the data frame containing the variants.
#'@param MAF optional numeric, the MAF of the non-reference variants to be 
#'  retained for filtering.
#'@param reference the column containing the reference sequence in the y data 
#'  frame.
#'@details This function prepares the variants from a database like dbSNP to be
#'  used for filtering variants in cases. MAF is optional and specifies a
#'  minimum allele frequency for the database variants to be retained for
#'  variant filtering. The input of y should be the standard output of the UCSC
#'  table browser ("all fields from selected table"). For large files, running
#'  time can be quite long. An alternative supporting parallel computing is
#'  prepvarpar. For the actual filtering, use varfilter.
#'@author Bart Broeckx
#'@examples
#' y <- SNP
#' reference <- "refNCBI"
#' MAF <- 0.03
#' a <-prepvar(y, MAF, reference)
prepvar <- function(y, MAF, reference){
  if (missing(y) | missing(reference)) {
    stop("specify y and reference")}
  filtering <- data.frame()
  y <- y[, -c(1, 4:7, 10:22, 24, 26)]
  y[, 2] <- y[, 2] + 1
  for (i in 1:nrow(y)) {
    subset <- y[i, ]
    allele <- unlist(strsplit(as.character(subset[, "alleles"]), 
                              ","))
    id <- which(allele != subset[, reference])
    nonrefallele <- allele[id]
    allelefreq <- unlist(strsplit(as.character(subset[, "alleleFreqs"]), 
                                  ","))
    nonreffreq <- allelefreq[id]
    nonreffreq <- as.numeric(nonreffreq)
    nrep <- length(nonreffreq)
    subset <- subset[rep(seq_len(nrow(subset)), nrep), c(1:2)]
    subsetend <- cbind(subset, nonrefallele, nonreffreq)
    filtering <- rbind(filtering, subsetend)
    if (i ==round(nrow(y)/20) | i ==round(nrow(y)/10) | i ==round(nrow(y)/5) |
          i ==round(nrow(y)/3.33) |i ==round(nrow(y)/2.5) | i ==round(nrow(y)/2) | 
          i ==round(nrow(y)/1.66) | i ==round(nrow(y)/1.42) | i ==round(nrow(y)/1.25) | 
          i ==round(nrow(y)/1.11)) {
      Text <- paste(round(i/nrow(y)*100), "procent prepared", "\n", sep=" ")
      cat(Text)
    }
  }
  Text <- paste("Number of variants for filtering: ", nrow(filtering), 
                "\n", sep = "")
  cat(Text)
  if (!missing(MAF)) {
    id <- which(filtering$nonreffreq >= MAF)
    if (length(id) >= 1) {
      filtering <- filtering[id, ]
      Text <- paste("Number of variants with > MAF: ", 
                    nrow(filtering), "\n", sep = "")
      cat(Text)
    }
    else if (length(id) < 1) {
      Text <- paste("No variants for filtering retained!", 
                    "\n", sep = "")
      cat(Text)
    }
  }
  filtering
}

#####################################################
#' Filtering variants from a database.
#'
#'@description varfilter is used to filter case variants using a variant
#'  database (like dbSNP).
#'@param x the data frame to be filtered
#'@param y the data frame containing the variants.
#'@details After preprocessing the variant database with prepvar or
#'  prepvarpar, the actual filtering is done with varfilter.
#'@author Bart Broeckx
#'@examples
#' out <-VCFfile(test, "V10", TRUE, "PASS")
#' a <-prepvar(SNP, 0.03,"refNCBI")
#' filtered <-varfilter(out,a)
varfilter<- function(x,y){
if (missing(x) | missing(y)) {
  stop("specify x and y")}
seqvariants <- paste(x$Chromosome, x$Region, x$Allele, sep = " ")
filtering <- y
filtervariants <- paste(filtering$chrom, filtering$chromStart, 
                        filtering$nonrefallele, sep = " ")
id <- which(seqvariants %in% filtervariants)
orcount <- nrow(x)
Text <- paste("Number of variants to be filtered: ", orcount, 
              "\n", sep = "")
cat(Text)
if (length(id) >= 1) {
  x <- x[-id, ]
}
if (nrow(x) >= 1) {
  Text <- paste("Number of variants from x after filtering retained: ", 
                nrow(x), "\n", sep = "")
  cat(Text)
}
else {
  Text <- paste("No variants left from x after filtering", 
                "\n", sep = "")
  cat(Text)
}
x
}




############################################
# Final putting everything together:       #
# commonvar                                #
############################################
#'Combining the output of several filters.
#'
#'@description commonvar should be used to combine the output of nDom/nRec and
#'  gDom/gRec filters.
#'@param x the data frames to be filtered (> 1).
#'@param group the column names of the columns containing the functional units.
#'@details This function allows the combination of nDom/nRec and gDom/gRec
#'  output. It retains all functional units in common, whithout assumptions on
#'  zygosity (as this should already have been taken care of in the previous
#'  filtering steps). Within each functional unit, the variants present in each
#'  x object are retained. Detectance can be specified and will be used on the
#'  functional group instead of the individual variant.
#'@author Bart Broeckx
#'@examples 
#'output <- nRec("CLCfile1", "CLCfile2"  )
#'output
#'CLCfile1proc <-CLCfile(CLCfile1, "Coding.region.change", TRUE, "; ")
#'CLCfile2proc <-CLCfile(CLCfile2, "Coding.region.change", TRUE, "; ")
#'output2 <- gDom("CLCfile1proc", "CLCfile2proc")
#'#Annotoutput2 <- annot(output,genBED, type="BED", nomatch=FALSE, CLC=TRUE)
#'#x <- c("output2", "Annotoutput2")
#'#group <- c("Group", "annotation")
#'#a <-commonvar(x,group)

commonvar <- function(x, group){
  
  # QC
  if (missing(x) | missing(group)) {
    stop("specify x and group")
  }
  if (length(x) != length(group)) {
    stop("x and group not equal length")
  }
  if (length(x) < 2) {
    stop("length(x) should be > 1")
  }
  
  # actual function
  common <- vector()
  for (i in 1:length(x)){
    nameValue<-get(x[[i]])
    grouper <- group[i] 
    var <-as.character(unique(nameValue[,grouper]))
    common <- c(common,var)
  }
  
  commongroup <-as.data.frame(table(common))
  Text <- paste("Which detectance level is required?", "\n" )
  cat(Text)
  for (i in 1:length(x)) {
    Text <- paste("level (",i,"): ", i/length(x)*100,"%"," (",i,"/",length(x),")","\n", sep="")
    cat(Text)
    
  }
  level <- readline("Choose level:")
  id <-which(commongroup$Freq >= level)
  if (length(id) > 0){
    result <-as.character(commongroup[id,-2])
    Text <- paste(length(result), "groups are retained", "\n" )
    cat(Text)
    
    
    output <- vector()
    if (length(result) >=1 ) {
      for (i in 1:length(x)){
        nameValue<-get(x[[i]])
        grouper <- group[i] 
        id <- which(nameValue[,grouper] %in% result)
        selection <- nameValue[id, c("Chromosome", "Region", "Allele", grouper)]
        pasted<-paste(selection$Chromosome, selection$Region, selection$Allele, selection[,grouper], sep=" ")
        output <-c(output, pasted)
      }
    }
    res <-as.data.frame(table(output))
    res$output <- as.character(res$output)
    final<- cbind(as.data.frame(stringr::str_split_fixed(res[,1]," ",4)),res$Freq)
    colnames(final) <- c("Chromosome", "Region", "Allele", "Group", "Freq")
    final
  }else {
    Text <- paste("Nothing retained", "\n" )
    cat(Text)
  }
}

#' CLCfile1 documentation
#' A dataset containing 8 variants (output CLC genomics workbench)
#' 
"CLCfile1"
#' CLCfile2 documentation
#' A dataset containing 9 variants (output CLC genomics workbench)
#' 
"CLCfile2"
#' SNP documentation
#' A dataset containing 472 variants from the dbSNP database
#' 
#' @source \url{http://genome.ucsc.edu/cgi-bin/hgTables?command=start}
"SNP"
#'genBED documentation
#' A bed file containing genes used to annotate files
#' 
#' @source \url{http://genome.ucsc.edu/cgi-bin/hgTables?command=start}
"genBED"
#' test documentation
#' An example of a VCF file.
"test"


#' Preparing variant database (parallel).
#'
#'@description prepvarpar is used to prepare a large variant database for 
#'  further filtering with varfilter.
#'@param y the data frame containing the variants.
#'@param MAF optional numeric, the MAF of the non-reference variants to be 
#'  retained for filtering.
#'@param reference the column containing the reference sequence in the y data 
#'  frame.
#'@param nproc the number of cores or workers used for parallel computing.
#'@details This function prepares the variants from a large database like dbSNP
#'  to be used for filtering variants in cases. MAF is optional and specifies a 
#'  minimum allele frequency for the database variants to be retained for 
#'  variant filtering. The input of y should be the standard output of the UCSC 
#'  table browser ("all fields from selected table"). This function allows 
#'  parallel computing, leading to substantial time improvement when entire 
#'  databases are used. If parallel computing is not necessary or not possible, 
#'  use prepvar.Prior to running this function, call function registerDoParallel
#'  from the doParallel package to permit parallel computing.
#'  @examples
#' #library(doParallel)
#' #registerDoParallel()
#' # nproc <-getDoParWorkers()
#' # a <-prepvarpar(SNP, 0.03,"refNCBI", 1)
prepvarpar <- function(y, MAF, reference, nproc){
  "%dopar%" <- NA
  rm("%dopar%")
  if (missing(y) | missing(reference)) {
    stop("specify y and reference")}
  y <- y[, -c(1, 4:7, 10:22, 24, 26)]
  y[, 2] <- y[, 2] + 1
  indic <-rep(1:nproc, length.out=nrow(y))
  indic <- sort(indic)
  indic <- as.factor(indic)
  y <- cbind(y,indic)
  splits <- iterators::isplit(y,y$indic)
  subsety=NULL
  result<- foreach::foreach(subsety=splits, .combine='rbind') %dopar% {
    filtering <- data.frame()
    for (i in 1:nrow(subsety$value)) {
      subset <- subsety$value[i, ]
      allele <- unlist(strsplit(as.character(subset[, "alleles"]), 
                                ","))
      id <- which(allele != subset[, reference])
      nonrefallele <- allele[id]
      allelefreq <- unlist(strsplit(as.character(subset[, "alleleFreqs"]), 
                                    ","))
      nonreffreq <- allelefreq[id]
      nonreffreq <- as.numeric(nonreffreq)
      nrep <- length(nonreffreq)
      subset <- subset[rep(seq_len(nrow(subset)), nrep), c(1:2)]
      subsetend <- cbind(subset, nonrefallele, nonreffreq)
      filtering <- rbind(filtering, subsetend)
    }
    prelist <-list(value=filtering)
    presubset<- subsety[-1]
    result <- c(prelist,presubset)
  }
  empty=NULL
  for (i in 1:nproc) {
    prepdata<-as.data.frame(result[[i]])
    empty <- rbind(empty,prepdata)
    
  }
  filtering<- empty 
  Text <- paste("Number of variants for filtering: ", nrow(filtering), 
                "\n", sep = "")
  cat(Text)
  if (!missing(MAF)) {
    id <- which(filtering$nonreffreq >= MAF)
    if (length(id) >= 1) {
      filtering <- filtering[id, ]
      Text <- paste("Number of variants with > MAF: ", 
                    nrow(filtering), "\n", sep = "")
      cat(Text)
    }
    else if (length(id) < 1) {
      Text <- paste("No variants for filtering retained!", 
                    "\n", sep = "")
      cat(Text)
    }
  }
  filtering
}
BartBroeckx/Mendelian documentation built on May 5, 2019, 10:27 a.m.