R/accessory_functions.R

Defines functions write.logheader weighted.var TMfun.6x_HH TMfun.6x_HB TMfun.6x_BH TMfun.6x_BB TMfun.4x_QQ TMfun.4x_QB TMfun.4x_BQ TMfun.4x_BB TMfun.3x_QB TMfun.3x_BB TM.quad TM.hex TM.biv.6 TM.biv.4 TM.biv.2 test_probgeno_df test_IBD_list test_dosage_matrix state_fun quadTM probgeno_df_to_array plotQTL.single_LG plotQTL.genome_linear_list plotQTL.genome_linear Nstates.onepar Nstates.fun NettletonDoerge mapseq list.depth hexTM findQTLpeaks fast_permute fast_IBD colour.bar bivTM

#' Bivalent transition matrix function
#' @param r recombination frequency
#' @noRd
bivTM <- function(r) 
  matrix(c(1-r,r,r,1-r),ncol=2)


#' Add colour bar scale to heatplot
#' @description Function to generate a scale for heatplots
#' @param col.data vector of colours
#' @param min minimum colour
#' @param max maximum colour
#' @param cex.ticks size of ticks on colour bar
#' @param nticks number of ticks on colour bar
#' @param ticks vector of positions of ticks on colour bar
#' @param title optional title for colour bar
#' @param ylab optional y-axis label for colour bar
#' @param cex.lab size of labels on colour bar
#' @noRd
colour.bar <- function(col.data, min, max=-min, cex.ticks = 1.2, nticks=11,
                       ticks=seq(min, max, len=nticks), title='', ylab = '',
                       cex.lab = 1) {
  scale <- length(col.data)/(max-min)
  
  plot(c(0,10), c(min,max), type='n', bty='n', xaxt='n', xlab='', yaxt='n', ylab=ylab, main=title, cex.lab = cex.lab)
  axis(2, ticks, las=1, cex.axis = cex.ticks)
  for (i in 1:length(col.data)) {
    y = (i-1)/scale + min
    rect(0,y,10,y+1/scale, col=col.data[i], border=NA)
  }
}

#' Extremely fast estimation of identity-by-descent (IBD) probabilities.
#' @description The method of "quick-and-dirty" IBD estimation was originally developed by Bourke (2014) for tetraploid data only, and was subsequently
#' generalised by van Geest et al. (2017). Can be useful for a first quick analysis, particularly in large hexaploid datasets. However, the higher accuracy of IBD
#' probabilities generated by \code{\link{hmm_IBD}} makes that function the preferred choice.
#' @param phased_maplist A list of linkage maps calculated by \code{polymapR::create_phased_maplist}
#' @param dosage_matrix An integer matrix with markers in rows and individuals in columns
#' @param map_function The mapping function to calculate recombination frequency based on map distance (haldane or kosambi)
#' @param ploidy Ploidy level of parents or of the first parent
#' @param ploidy2 Ploidy level of the second parent. By default \code{NULL}, if parents have equal ploidy levels.
#' @param fix_threshold The threshold to fix the IBD probabilities while correcting for the sum of probabilities.
#' @param factor_dist Factor to increase or decrease the recombination frequencies as calculated from the map distances.
#' @param ncores Number of cores to use for multi-core processing.
#' @return
#' A nested list (with the same length as phased_maplist). Each list element contains the following items:
#' \item{IBDtype}{Always "haplotypeIBD" for the output of this function}
#' \item{IBDarray}{An array of IBD probabilities. The dimensions of the array are: markers, homologues and individuals.}
#' \item{map}{Integrated linkage map positions of markers used in IBD calculation}
#' \item{parental_phase}{The parental marker phasing, coded in 1 and 0's}
#' \item{biv_dec}{\code{NULL}}
#' \item{gap}{The gap size used in IBD interpolation, by default \code{NULL}. See \code{\link{spline_IBD}}}
#' \item{genocodes}{\code{NULL}}
#' \item{pairing}{\code{NULL}}
#' \item{ploidy}{ploidy of parent 1}
#' \item{ploidy2}{ploidy of parent 2}
#' \item{method}{The method used, here "heur" (heuristic)}
#' \item{error}{The error prior used, not relevant here thus \code{NULL}}
#' @references
#' Bourke P.M. (2014) QTL analysis in polyploids: Model testing and power calculations. Wageningen University (MSc thesis)
#' @examples
#' data("phased_maplist.4x","SNP_dosages.4x")
#' IBD_list.4x <- fast_IBD(phased_maplist = phased_maplist.4x,
#'                         dosage_matrix = SNP_dosages.4x,
#'                         ploidy = 4)
#' @noRd
fast_IBD <- function(phased_maplist,
                     dosage_matrix,
                     map_function = "haldane",
                     ploidy,
                     ploidy2 = NULL,
                     fix_threshold = 0.1,
                     factor_dist = 1,
                     ncores = 1){
  
  if(is.null(ploidy2)) ploidy2 <- ploidy
  ploidy1 <- ploidy
  
  map_function <- match.arg(map_function, choices = c("kosambi", "haldane"))
  
  # check compatibility of dosages and phased maps
  p1check <- function(n) all(rowSums(phased_maplist[[n]][,3:(2+ploidy1)]) ==
                               dosage_matrix[match(as.character(phased_maplist[[n]][,"marker"]),rownames(dosage_matrix)),"P1"],
                             na.rm = TRUE)
  p2check <- function(n) all(rowSums(phased_maplist[[n]][,(3+ploidy1):(2+ploidy1+ploidy2)]) ==
                               dosage_matrix[match(as.character(phased_maplist[[n]][,"marker"]),rownames(dosage_matrix)),"P2"],
                             na.rm = TRUE)
  
  if(!all(sapply(seq(phased_maplist), p1check)) | !all(sapply(seq(phased_maplist), p1check)))
    stop("Inconsistent input detected.\nIt is likely that unconverted dosages are being combined with a converted phased maplist, or vice versa.")
  
  if(map_function == "kosambi"){
    mapfun <- function(d, f = 1){
      d <- d/100/f
      0.5*((exp(4*d)-1)/(exp(4*d)+1))
    }
  } else if (map_function == "haldane"){
    mapfun <- function(d, f = 1){
      d <- d/100/f
      0.5*(1-exp(-2*d))
    }
  }
  
  # function to correct total sum of genotype probabilties to 0.5*ploidy
  correct_to_sumScore <- function(subg_P, threshold = 0.1, ploidy){
    sumScore <- 0.5*ploidy
    # for all markers in subg_P
    for(m in rownames(subg_P)){
      gp <- subg_P[m,]
      
      ## Make sure that the IBDs don't exceed 0.5*ploidy
      if(sum(gp) > sumScore) {
        gp <- gp*sumScore/(sum(gp))
        # message("Excessive IBD probabilities detected and corrected for")
      }
      
      # if half of homologues 0 -> fully informative
      if(sum(gp == 0) == 0.5*ploidy){
        gp[gp != 0] <- 1
        # if half of homologues 1 -> fully informative
      } else if(sum(gp == 1) == 0.5*ploidy){
        gp[gp < 1] <- 0
        # otherwise correct to 0.5*ploidy
      } else {
        # fix genotypes that approach 0 or 1 according to fix_threshold
        fix <- gp < threshold | gp > (1-threshold)
        
        if(sum(fix) == ploidy){
          fix <- gp == 0 | gp ==1
        }
        
        # find correction and correct to sum = 0.5*ploidy
        corr <- sumScore - sum(gp[fix])
        
        gp[!fix] <- gp[!fix]*corr/sum(gp[!fix])
        # Some probabilites can end up >1. Correct for this:
        while(any(gp > 1)){
          addp <-  gp < 1 & gp > 0#& gp>=0.5
          remp <- gp > 1
          gp[addp] <- gp[addp] + (sum(gp[gp > 1] - 1)/sum(addp))
          gp[remp] <- 1
        }
      }
      subg_P[m,] <- gp
    }
    return(subg_P)
  }
  
  win <- Sys.info()["sysname"] == "Windows"
  if (win) {
    cl <- parallel::makeCluster(ncores)
    doParallel::registerDoParallel(cl)
  } else {
    doParallel::registerDoParallel(cores = ncores)
  }
  
  ## R CMD CHECK work-around:
  i <- NULL
  
  outlist <-
    foreach::foreach(
      i = seq(phased_maplist),
      .inorder = TRUE
    ) %dopar% {
      ph <- phased_maplist[[i]]
      posdf <- ph[,c("marker", "position")]
      
      ## If marker data is conflicting and co-located, can lead to problems. Jitter positions if necessary
      while(length(unique(posdf$position)) < nrow(posdf)){
        warning("Non-unique marker positions detected. Jittering positions.")
        posdf[duplicated(posdf$position),"position"] <- jitter(posdf[duplicated(posdf$position),"position"],amount=0.1)
        #Map might no longer be in order:
        ph <- ph[order(posdf$position),]
        posdf <- posdf[order(posdf$position),]
      }
      
      posvec <- posdf$position
      mark <- as.character(posdf$marker)
      names(posvec) <- mark
      
      # get all marker combinations
      combs <- expand.grid(mark, mark, stringsAsFactors = FALSE)
      
      # make a full distance matrix
      pos1 <- posvec[combs[,1]]
      pos2 <- posvec[combs[,2]]
      
      distdf <- data.frame(marker_a = as.character(combs[,1]),
                           marker_b = as.character(combs[,2]),
                           distance = abs(pos2 - pos1))
      
      suppressMessages(distmat <- reshape2::acast(distdf, marker_a ~ marker_b))
      distmat <- distmat[names(posvec), names(posvec)]
      
      k0_mat <- mapfun(distmat, f = factor_dist)
      
      rownames(ph) <- ph$marker
      ph <- ph[,-which(colnames(ph) %in% c("marker", "position"))]
      ph <- as.matrix(ph)
      
      scd <- dosage_matrix[rownames(ph),]
      
      # get parental scores
      par <- scd[,c("P1", "P2")]
      progeny <- scd[,-which(colnames(scd) %in% c("P1", "P2"))]
      
      # make a garray (=genotype probability array) with only 0.5
      garray <- array(data = 0.5, dim = c(nrow(ph), ploidy1 + ploidy2, ncol(scd)-2),
                      dimnames = list(rownames(scd), paste0("h", 1:(ploidy1 + ploidy2)), colnames(scd)[3:ncol(scd)]))
      
      # get all markers with fully informative dosage
      P1_hp <- scd[,"P1"] == 0.5*ploidy1
      P2_hp <- scd[,"P2"] == 0.5*ploidy2
      
      # get all maximum scores per marker which are informative in offspring
      sumP <- rowSums(par)
      
      # fill in garray
      for(m in seq(nrow(scd))){
        # get progeny that is informative - this is the kernel of the original approach:
        # progeny score which equals (or exceeds, DR?) the sum of the parental scores have inherited
        # all the parental alleles, and so we assign all these homologues with a probability of 1
        progS <- colnames(progeny)[progeny[m,] >= sumP[m] & !is.na(progeny[m,])]
        
        # assign 1 if so.
        garray[m,ph[m,] == 1,progS] <- 1
        
        # special case where dosage is fully informative (dosage = 0.5*ploidy)
        # only if one of the P1 dosages should be 0.5*ploidy
        # and marker is assigned to 0.5*ploidy homologues
        # apply zeros to all others
        if((P1_hp[m] | P2_hp[m]) & sum(ph[m,]) == sumP[m]){
          if(P1_hp[m])
            garray[m,c(ph[m,1:ploidy1] == 0, rep(FALSE, ploidy2)),progS] <- 0
          if(P2_hp[m])
            garray[m,c(rep(FALSE, ploidy1) ,ph[m,(ploidy1+1):(ploidy1 + ploidy2)] == 0),progS] <- 0
        }
        # if allele is absent in progeny, also informative:
        prog0 <- colnames(progeny)[progeny[m,] == 0 & !is.na(progeny[m,])]
        garray[m,ph[m,] == 1,prog0] <- 0
      }
      
      # order garray
      garray <- garray[colnames(distmat),,]
      
      # find nearest informative and change genotype probability accordingly
      for(g in dimnames(garray)[[3]]){
        for(h in dimnames(garray)[[2]]){
          gp <- garray[,h,g]
          inf1 <- gp == 1
          inf0 <- gp == 0
          informative <- inf1 | inf0
          if(any(informative)){
            mdist <- k0_mat[names(gp)[!informative], names(gp)[informative], drop = FALSE]
            near_inf <- apply(mdist, 1, which.min)
            gps <- mdist
            if(any(inf1)){
              gps[,names(inf1)[inf1]] <- 1 - gps[,names(inf1)[inf1]] 
            }
            garray[rownames(mdist),h,g] <- gps[cbind(seq(near_inf), near_inf)]
          }
        }
      }
      
      # call correct_to_sumScore to array:
      for(g in dimnames(garray)[[3]]){
        corr_P1 <- correct_to_sumScore(subg_P = garray[,1:ploidy1,g],
                                       threshold = fix_threshold, ploidy = ploidy1)
        garray[,1:ploidy1,g] <- corr_P1
        
        corr_P2 <- correct_to_sumScore(subg_P = garray[,(ploidy1 +1):(ploidy1 + ploidy2),g],
                                       threshold = fix_threshold, ploidy = ploidy2)
        garray[,(ploidy1 +1):(ploidy1 + ploidy2),g] <- corr_P2
      }
      
      return(list("IBDtype" = "haplotypeIBD",
                  "IBDarray" = garray,
                  "map" = cbind("chromosome" = i,
                                posdf),
                  "parental_phase" = ph,
                  "marginal.likelihoods" = NULL,
                  "valency" = NULL,
                  "offspring" = dimnames(garray)[[3]],
                  "biv_dec" = NULL,
                  "gap" = NULL,
                  "genocodes" = NULL,
                  "pairing" = NULL,
                  "ploidy" = ploidy,
                  "ploidy2" = ploidy2,
                  "method" = "heur",
                  "error" = NULL))
    }
  
  if(win) parallel::stopCluster(cl)
  
  if(!is.null(names(phased_maplist))){
    names(outlist) <- names(phased_maplist)
  } else{
    warning(paste("phased_maplist had no LG names assigned. Assigning names",
                  paste0("'LG",seq(length(outlist)),"'", collapse = ", "),"to output list."))
    names(outlist) <- paste0("LG",seq(length(outlist)))
  }
  
  return(outlist)
} #fast_IBD

#' Extension of the \code{\link{QTLscan}} function, offering an optimised permutation test when the experimental setting (i.e. phenotype structure) is simple 
#' @description Function to run optimised QTL permutation test using IBD probabilities
#' @param IBD_list List of IBD probabilities
#' @param Phenotype.df A data.frame containing phenotypic values
#' @param genotype.ID The colname of \code{Phenotype.df} that contains the offspring identifiers (F1 names)
#' @param trait.ID The colname of \code{Phenotype.df} that contains the response variable to use in the model
#' @param ploidy Integer. The ploidy of parent 1
#' @param ploidy2 The ploidy of parent 2, by default \code{NULL} i.e. assumed (unless specified) to be equal to the ploidy of parent 1.
#' @param N_perm The number of permutations to run, by default this is 1000.
#' @param alpha The P-value to be used in the selection of a threshold, by default 0.05 (i.e. the 0.95 quantile).
#' @param allelic_interaction The QTL detection model can be for additive main effects only (this is the default, ie. argument is \code{FALSE}). If
#' \code{TRUE}, then the full model is used (all possible genotype combinations are predictors in the model). This runs the risk of overfitting, especially if
#' double reduction was also allowed. Both types of analyses can ideally be performed and compared
#' @param ncores Number of cores to use, by default 1 only. Works both for Windows and UNIX (using \code{doParallel}).
#' Use \code{parallel::detectCores()} to find out how many cores you have available.
#' @param verbose Logical, by default \code{TRUE}. Should messages be printed during running?
#' @param log Character string specifying the log filename to which standard output should be written. If \code{NULL} log is send to stdout.
#' @return
#' A nested list; each list element (per linkage group) contains the following items:
#' \describe{
#' \item{QTL.res}{Single matrix of QTL results with columns chromosome, position, LOD}
#' \item{Perm.res}{List of the results of the permutation test, with (at least) list items
#' "quantile","threshold" and "scores". Quantile refers to which quantile of scores was used to determine the threshold.
#' Note that scores are each of the maximal LOD scores across the entire genome scan per permutation, thus returning a
#' genome-wide threshold rather than a chromosome-specific threshold. If the latter is preferred, restricting the
#' \code{IBD_list} to a single chromosome and re-running the permutation test will provide the desired threshold.}
#' \item{Map}{Original map of genetic marker positions upon which the IBDs were based, most often used
#' for adding rug of marker positions to QTL plots.}
#' \item{LG_names}{Names of the linkage groups}
#' \item{allelic_interaction}{Whether argument \code{allelic_interaction} was \code{TRUE} or \code{FALSE} in the QTL scan}
#' }
#' @examples
#' data("IBD_4x","Phenotypes_4x")
#' qtl_LODs.4x <- fast_permute(IBD_list = IBD_4x,
#'                        Phenotype.df = Phenotypes_4x,
#'                        genotype.ID = "geno",
#'                        trait.ID = "pheno",
#'                        ploidy = 4)
#' @noRd                       
fast_permute <- function(IBD_list,
                         Phenotype.df,
                         genotype.ID,
                         trait.ID,
                         ploidy,
                         ploidy2 = NULL,
                         N_perm = 1000,
                         alpha = 0.05,
                         allelic_interaction = FALSE,
                         ncores = 1,
                         verbose = TRUE,
                         log = NULL){
  
  if(is.null(log)) {
    log.conn <- stdout()
  } else {
    matc <- match.call()
    write.logheader(matc, log)
    log.conn <- file(log, "a")
  }
  
  ## Run some initial checks:
  if(!trait.ID %in% colnames(Phenotype.df)) stop("Trait colname incorrectly specified.")
  IBD_list <- test_IBD_list(IBD_list)
  
  IBDarray <- IBD_list[[1]]$IBDarray
  IBDtype <- IBD_list[[1]]$IBDtype
  bivalent_decoding <- IBD_list[[1]]$biv_dec
  gap <- IBD_list[[1]]$gap
  ploidy <- IBD_list[[1]]$ploidy
  ploidy2 <- IBD_list[[1]]$ploidy2
  
  ## Danger of overfitting if using full model for QTL detection and DR model for IBD estimation (e.g. tetraploid = 100 genotype classes!)
  if(allelic_interaction & bivalent_decoding) warning("Danger of over-fitting if fitting full allelic interaction model with
                                                      double-reduction-aware IBDs. Suggest to also re-run using bivalent-only predictions!")
  
  if(IBDtype == "genotypeIBD"){
    if(is.null(IBD_list[[1]]$genocodes)){
      ## We are using the output of TetraOrigin
      Nstates <- Nstates.fun(biv_dec = bivalent_decoding, pl = ploidy, pl2 = ploidy2)
      mname <- paste0("GenotypeMat",Nstates)
      indicatorMatrix <- as.matrix(get(mname, envir = getNamespace("polyqtlR")))
    } else{
      ## We are using the output of hmm_IBD
      GenCodes <- t(sapply(sapply(IBD_list[[1]]$genocodes,function(x) strsplit(x,"")), function(y) sapply(y,function(z) which(letters == z))))
      Nstates <- nrow(GenCodes)
      indicatorMatrix <- matrix(0,nrow=Nstates, ncol=ploidy + ploidy2)
      for(r in 1:nrow(indicatorMatrix)){
        for(h in 1:ncol(GenCodes)){
          indicatorMatrix[r,GenCodes[r,h]] <- indicatorMatrix[r,GenCodes[r,h]] + 1
        }
      }
      
    }
    
    # indicatorMatrix <- as.matrix(indicatorMatrix[,-c(1,ploidy+1)])
  } else if(IBDtype == "haplotypeIBD"){
    Nstates <- ploidy + ploidy2 #this is not actually needed..
    indicatorMatrix <- NULL
  } else{
    stop("Unable to determine IBD type, please check IBD_list elements have a valid $IBDtype tag.")
  }
  
  ## Could just check for equality here also, modulo check was from older version...
  if(dim(IBDarray)[2] %% Nstates != 0) stop("Incompatible input detected..")
  
  ## First get the starting set - the ones that were phenotyped and genotyped:
  if(any(is.na(Phenotype.df[,genotype.ID]))){
    warning("Missing genotype.ID values detected. Removing and proceeding without...")
    Phenotype.df <- Phenotype.df[!is.na(Phenotype.df[,genotype.ID]),]
  }
  
  phenoGeno0 <- intersect(
    dimnames(IBDarray)[[3]],
    unique(Phenotype.df[,genotype.ID]))
  
  if(verbose) write(paste("There are",length(phenoGeno0),"individuals with matching identifiers between phenotypic and genotypic datasets.\n"),log.conn)
  
  pheno <- Phenotype.df[Phenotype.df[,genotype.ID] %in% phenoGeno0,c(genotype.ID,trait.ID)]
  
  ## Make sure there is only a single observation per genotype:
  if(length(which(duplicated(pheno[,genotype.ID]))) > 0){
    warning(paste("Duplicate phenotypes identified for",length(which(duplicated(pheno[,genotype.ID]))), "samples. Their mean values were used."))
    pheno <-
      do.call("rbind",
              sapply(unique(pheno[,genotype.ID]), function(gen){
                temp.mean <- mean(pheno[pheno[,genotype.ID] == gen,trait.ID], na.rm = TRUE)
                temp.out <- pheno[pheno[,genotype.ID] == gen,][1,,drop=FALSE]
                temp.out[,trait.ID] <- round(temp.mean,1)
                return(temp.out)}, simplify = FALSE))
  }
  
  if(length(which(is.na(pheno[,trait.ID]))) > 0){
    write(paste("A further",length(which(is.na(pheno[,trait.ID]))),
                "matched individuals had missing phenotypic values and were removed.\n"),
          log.conn)
    pheno <- pheno[!is.na(pheno[,trait.ID]),]
  }
  
  pheno <- pheno[order(pheno[,1]),]
  phenoGeno0 <- pheno[,1]
  
  colnames(pheno)[2] <- "Pheno"
  phenoGeno <- as.character(unique(pheno[,1]))
  popSize <- length(phenoGeno)
  
  if(popSize < ploidy + ploidy2) stop("Insufficient population size to fit QTL model with IBD probabilities. Suggest to use single marker approach")
  if(popSize <= 2*(ploidy + ploidy2)) warning("Population size may be too small for accurate model fitting. Results should be interpreted with caution.")
  
  RSS0 <- sum((pheno$Pheno - mean(pheno$Pheno))^2)
  
  if(verbose) write(paste("\nRunning QTL analysis with",popSize,"individuals...\n"),log.conn)
  
  if(length(IBD_list) > 1){
    IBDprobs <- abind::abind(lapply(seq(length(IBD_list)), function(i) IBD_list[[i]]$IBDarray[,,phenoGeno]),
                             along = 1)
  } else{
    IBDprobs <- IBD_list[[1]]$IBDarray[,,phenoGeno]
  }
  
  
  ## Turn the genotype IBDs into haplotype IBDs:
  if(!allelic_interaction){ # we only need to calculate this if we are running the main effects model
    
    if(IBDtype == "genotypeIBD"){
      haploProbs <- array(apply(IBDprobs,3,function(x) x%*%indicatorMatrix),
                          dim = c(dim(IBDprobs)[1],dim(indicatorMatrix)[2],dim(IBDprobs)[3]),
                          dimnames = list(dimnames(IBDprobs)[[1]],
                                          paste0("X",1:(ploidy+ploidy2)),
                                          dimnames(IBDprobs)[[3]])
      )
    } else{
      haploProbs <- IBDprobs #a bit inefficient, but patch for now...
      dimnames(haploProbs)[[2]] <- paste0("X",1:(ploidy+ploidy2)) #replaces h1, h2, h3...
    }
    
  } else{
    haploProbs <- IBDprobs #a bit inefficient, but patch for now...
    dimnames(haploProbs)[[2]] <-  paste0("X",1:dim(IBDprobs)[2])
  }
  
  ## Compile the original map positions
  map <- do.call("rbind",
                 lapply(seq(length(IBD_list)), function(c)
                   IBD_list[[c]]$map#[,c("chromosome","position")]
                 ))
  ## Compile the map positions of the IBDd positions also:
  if(!is.null(gap)) {map.1 <- do.call(rbind,
                                      lapply(seq(length(IBD_list)), function(c)
                                        data.frame("chromosome" = c,
                                                   "position" = as.numeric(setdiff(unlist(strsplit(dimnames(IBD_list[[c]]$IBDarray)[[1]],"M")),"c"))
                                        )#Just record chm and position.
                                      ))
  } else{
    map.1 <- map
  }
  
  if(verbose) message("Running permutation test....")
  time_start <- Sys.time()
  
  win <- Sys.info()["sysname"] == "Windows"
  if (win) {
    cl <- parallel::makeCluster(ncores)
    doParallel::registerDoParallel(cl)
  } else {
    doParallel::registerDoParallel(cores = ncores)
  }
  
  pheno.mat <- matrix(pheno$Pheno,ncol=1)
  
  row.split <- split(1:dim(haploProbs)[1], ceiling(round((1:dim(haploProbs)[1])/(dim(haploProbs)[1]/ncores),4)))
  
  perms <- sapply(1:N_perm, function(n) sample(pheno$Pheno)) 
  
  pheno.mat <- cbind(pheno.mat,perms)
  
  if(!allelic_interaction){
    modelterms <- paste0("X", setdiff(1:(ploidy + ploidy2),c(1,ploidy + 1))) #boundary condition applied
  } else{
    modelterms <- paste0("X", 1:(dim(IBDprobs)[2] - 1)) #boundary condition applied
  }
  
  run_lm.vect <- function(X = haploProbs[1,,],
                          phenos = pheno.mat,
                          tm = modelterms){
    # X is a 2d matrix (slice) of haplotype IBDs
    # phenos is a matrix of phenotypes, with at least 1 column
    lmRes <- lm(phenos ~ t(X)[,tm],na.action = na.exclude)
    RSS1 <- apply(as.matrix(lmRes$residuals),2,function(y) sum(y^2))
    
    LOD <- ifelse(RSS1 == 0,0,dim(X)[2]*log10(RSS0/RSS1)/2)
    
    return(LOD)
  }
  
  ## R CMD CHECK work-around:
  i <- NULL
  
  LOD.res <-
    foreach::foreach(
      i = 1:length(row.split),
      # .combine = c,
      .inorder = FALSE
    ) %dopar% {
      list(set = row.split[[i]],
           res = apply(haploProbs[row.split[[i]],,,drop = FALSE],1,
                       function(x) run_lm.vect(X=x, phenos = pheno.mat))
      )
    }
  
  if(win) parallel::stopCluster(cl)
  
  extracted.order <- do.call(c,lapply(LOD.res,function(x) x$set))
  LODResults <- do.call(c,lapply(LOD.res,function(x) x$res[1,]))
  LODResults <- LODResults[order(extracted.order)]
  
  permLODs <- do.call(cbind, lapply(1:ncores, function(l) LOD.res[[l]]$res[-1,]))
  threshold <- quantile(apply(permLODs,1,max), probs = 1 - alpha)
  
  output <- list(QTL.res =  suppressWarnings(cbind(map.1,"LOD" = LODResults)),
                 Perm.res = list(
                   quantile = 1 - alpha,
                   threshold = threshold,
                   threshold.bounds = c(threshold,threshold),#for now, no Nettleton Doerge approximation
                   scores = apply(permLODs,1,max),
                   Nperm = N_perm
                 ),
                 Map = map,
                 Residuals = pheno$Pheno,
                 LG_names = names(IBD_list),
                 allelic_interaction = allelic_interaction)
  
  time_end <- Sys.time()
  timediff <- as.numeric(time_end - time_start, units = "mins")
  
  write(
    paste(N_perm,"permutations on a",Sys.info()["sysname"],"machine using",ncores,
          ifelse(ncores>1,"cores","core"),"took",round(timediff, 2),"minutes."),
    log.conn
  )
  
  if(!is.null(log)) close(log.conn)
  
  return(output)
} #fast_permute


#' Extract QTL peak positions from the results of a QTL scan
#' @description Function to find QTL peaks from output of a QTL scan
#' @param LOD_data Output of \code{\link{QTLscan}} function
#' @noRd
findQTLpeaks <- function(LOD_data){
  if(is.null(LOD_data$Perm.res)) stop("findQTLpeaks :: Please re-run QTLscan with perm_test = TRUE to check significance.")
  if(!"allelic_interaction" %in% names(LOD_data)) stop("Since package v.1.0.0, QTLscan has argument allelic_interaction, which is also passed to its output. Please re-run to update LOD_data")
  
  thresh <- LOD_data$Perm.res$threshold
  chms <- unique(LOD_data$QTL.res$chromosome)
  
  QTL.df <- data.frame(LG=NULL,
                       cM=NULL,
                       deltaLOD=NULL,
                       model=NULL)
  for(lg in chms){
    lgdat <- LOD_data$QTL.res[LOD_data$QTL.res$chromosome == lg,]
    
    if(any(lgdat$LOD >= thresh)){
      QTL.df <- rbind(QTL.df,data.frame(LG = lg,
                                        cM = lgdat[lgdat$LOD == max(lgdat$LOD),"position"],
                                        deltaLOD = lgdat[lgdat$LOD == max(lgdat$LOD),"LOD"] - thresh,
                                        model = ifelse(LOD_data$allelic_interaction,"f","a")))
    }
  }
  
  return(QTL.df)
} #findQTLpeaks


#' Hexavalent transition matrix function
#' @param r recombination frequency
#' @noRd
hexTM <- function(r) 
  matrix(c(1-r,r/5,r/5,r/5,r/5,r/5,
           r/5,1-r,r/5,r/5,r/5,r/5,
           r/5,r/5,1-r,r/5,r/5,r/5,
           r/5,r/5,r/5,1-r,r/5,r/5,
           r/5,r/5,r/5,r/5,1-r,r/5,
           r/5,r/5,r/5,r/5,r/5,1-r),ncol=6)


#' Generate IBD probabilities from marker genotypes and a phased linkage map using HMM
#' @description \code{hmm_IBD} is a function for creating identity-by-descent (IBD) probabilities using hidden Markov models,
#' from marker genotypes (either discrete marker dosages (ie scores 0, 1, ..., ploidy representing the number of copies of the marker allele),
#' or the probabilities of these dosages) and a phased linkage map. Unlike the original TetraOrigin software, it does not re-estimate parental linkage phase,
#' and has been generalised for use in diploid, triploid, tetraploid and hexaploid populations.
#' @param input_type Can be either one of 'discrete' or 'probabilistic'. For the former (default), \code{dosage_matrix} must be supplied,
#' while for the latter \code{probgeno_df} must be supplied
#' @param genotypes Marker genotypes, either a 2d matrix of integer marker scores or a data.frame of dosage probabilities. 
#' Details are as follows:
#' \describe{
#' \item{discrete }{If \code{input_type} is 'discrete', \code{genotypes} is a matrix of marker dosage scores with markers in rows 
#' and individuals in columns. Both (marker) rownames and (individual or sample) colnames are needed.}
#' \item{probabilistic}{If \code{input_type} is 'probabilistic', \code{genotypes} is a data frame as read from the scores file produced 
#' by function \code{saveMarkerModels} of R package \code{fitPoly}, or alternatively, a data frame containing at least the following columns:
#' \describe{
#' \item{SampleName}{Name of the sample (individual)}
#' \item{MarkerName}{Name of the marker}
#' \item{P0}{Probabilities of dosage score '0'}
#' \item{P1, P2... etc.}{Probabilities of dosage score '1' etc. (up to max offspring dosage, e.g. P4 for tetraploid population)}
#' }
#' }
#' }
#' @param phased_maplist A list of phased linkage maps, the output of \code{polymapR::create_phased_maplist}
#' @param remove_markers Optional vector of marker names to remove from the maps. Default is \code{NULL}.
#' @param ploidy Integer. Ploidy of the organism.
#' @param ploidy2 Optional integer, by default \code{NULL}. Ploidy of parent 2, if different from parent 1.
#' @param parent1 Identifier of parent 1, by default assumed to be \code{"P1"}
#' @param parent2 Identifier of parent 2, by default assumed to be \code{"P2"}
#' @param individuals By default "all" offspring are included, but otherwise a subset can be selected, using a vector of offspring indexing numbers (1,2, etc.)
#' according to their order in \code{dosage_matrix}
#' @param log Character string specifying the log filename to which standard output should be written. If \code{NULL} log is send to stdout.
#' @param map_function Mapping function to use when converting map distances to recombination frequencies.
#' Currently only \code{"haldane"} or \code{"kosambi"} are allowed.
#' @param bivalent_decoding Option to consider only bivalent pairing during formation of gametes (ignored for diploid populations, as only bivalents possible there), by default \code{TRUE}
#' @param error The (prior) probability of errors in the offspring dosages, usually assumed to be small but non-zero
#' @param full_multivalent_hexa Option to allow multivalent pairing in both parents at the hexaploid level, by default \code{FALSE}. Note that if \code{TRUE},
#' a very large available RAM may be required (>= 32Gb) to process the data.  
#' @param verbose Logical, by default \code{TRUE}. Should progress messages be written?
#' @param ncores How many CPU cores should be used in the evaluation? By default 1 core is used.
#' @return A list of IBD probabilities, organised by linkage group (as given in the input \code{phased_maplist}). Each
#' list item is itself a list containing the following:
#' \describe{
#' \item{IBDtype}{The type of IBD; for this function only "genotypeIBD" are calculated.}
#' \item{IBDarray}{A 3d array of IBD probabilities, with dimensions marker, genotype-class and F1 individual.}
#' \item{map}{A 3-column data-frame specifying chromosome, marker and position (in cM)}
#' \item{parental_phase}{Phasing of the markers in the parents, as given in the input \code{phased_maplist}}
#' \item{biv_dec}{Logical, whether bivalent decoding was used in the estimation of the F1 IBD probabilities.}
#' \item{gap}{The size of the gap (in cM) used when interpolating the IBD probabilities, if performed.}
#' \item{genocodes}{Ordered list of genotype codes used to represent different genotype classes.}
#' \item{pairing}{log likelihoods of each of the different pairing scenarios considered (can be used e.g. for post-mapping check of preferential pairing)}
#' \item{ploidy}{ploidy of parent 1}
#' \item{ploidy2}{ploidy of parent 2}
#' \item{method}{The method used, here "hmm" (Hidden Markov Model)}
#' \item{error}{The error prior used}
#' }
#' @references
#' \itemize{
#' \item{Durbin R, Eddy S, Krogh A, Mitchison G (1998) Biological sequence analysis: Probabilistic models of proteins and nucleic acids. Cambridge: Cambridge University Press.}
#' \item{Hackett et al. (2013) Linkage analysis and QTL mapping using SNP dosage data in a tetraploid potato mapping population. PLoS One 8(5): e63939}
#' \item{Zheng et al. (2016) Probabilistic multilocus haplotype reconstruction in outcrossing tetraploids. Genetics 203: 119-131}
#' }
#' @examples
#' data("phased_maplist.4x", "SNP_dosages.4x")
#' hmm_IBD(phased_maplist=phased_maplist.4x,genotypes=SNP_dosages.4x,ploidy=4)
#' @noRd
hmm_IBD <- compiler::cmpfun(function(input_type = "discrete",
                                     genotypes,
                                     phased_maplist,
                                     remove_markers = NULL,
                                     ploidy,
                                     ploidy2 = NULL,
                                     parent1 = "P1",
                                     parent2 = "P2",
                                     individuals = "all",
                                     log = NULL,
                                     map_function = "haldane",
                                     bivalent_decoding = TRUE,
                                     error = 0.01,
                                     full_multivalent_hexa = FALSE,
                                     verbose = FALSE,
                                     ncores = 1){
  input_type <- match.arg(input_type, choices = c("discrete","probabilistic"))
  
  if(is.null(ploidy2)) ploidy2 <- ploidy
  if(ploidy %% 2 != 0 | ploidy2 %% 2 != 0) stop("Parental ploidy cannot be odd-numbered! Find new parents!")
  ploidyF1 <- (ploidy + ploidy2)/2
  
  # if(ploidy2 > ploidy) stop("Currently, ploidy2 cannot exceed ploidy. Please re-order parents (may become fully generalised later).")
  if(!(error > 0 & error < 1)) stop("Error prior probability must be a number > 0 and less than 1")
  
  map_function <- match.arg(map_function, choices = c("haldane","kosambi"))
  
  if(map_function == "kosambi"){
    mapfun <- function(d){
      d <- d/100
      0.5*((exp(4*d)-1)/(exp(4*d)+1))
    }
  } else if (map_function == "haldane"){
    mapfun <- function(d){
      d <- d/100
      0.5*(1-exp(-2*d))
    }
  }
  
  if (is.null(log)) {
    log.conn <- stdout()
  } else {
    matc <- match.call()
    write.logheader(matc, log)
    log.conn <- file(log, "a")
  }
  
  if(input_type == "discrete"){
    genotypes <- test_dosage_matrix(genotypes)
  } else{
    probgeno_df <- test_probgeno_df(genotypes,ploidyF1)
    genotypes <- probgeno_df_to_array(probgeno_df,ploidyF1)
  }
  
  phased_markers <- as.character(unlist(sapply(phased_maplist, function(x) x$marker)))
  
  if(length(phased_markers) != length(unique(phased_markers))) stop("Some markers found at multiple positions - please check phased_maplist!")
  
  if(verbose) write(paste0("There were ", length(phased_markers)," phased markers supplied, on ",
                           length(phased_maplist)," linkage group",
                           ifelse(length(phased_maplist) > 1,"s.",".")),
                    file = log.conn)
  
  if(!is.null(remove_markers)){
    if(length(intersect(phased_markers, remove_markers)) == 0)
      stop("Markers specified for removal are not present in phased_maplist!")
    maplist.names <- names(phased_maplist)
    phased_maplist <- lapply(seq(length(phased_maplist)), function(ch) phased_maplist[[ch]][!phased_maplist[[ch]]$marker %in% remove_markers,])
    names(phased_maplist) <- maplist.names
    
    phased_markers <- as.character(unlist(sapply(phased_maplist, function(x) x$marker)))
    if(verbose) write(paste("There are now", length(phased_markers),"phased markers remaining following marker removal."),
                      file = log.conn)
  }
  
  # Generate vector 'offspring' of numbers 1:(number of F1 offspring)
  if(input_type == "discrete"){
    if(individuals[1] == "all"){
      offspring <- 1:(ncol(genotypes[,-which(colnames(genotypes) %in% c(parent1,parent2)),drop=FALSE]))
      
    } else{
      foo <- 1:(ncol(genotypes[,-which(colnames(genotypes) %in% c(parent1,parent2))])) 
      if(!all(individuals %in% foo)) stop("Vector of offspring indices incorrectly supplied using individuals argument!")
      offspring <- individuals
    }
  } else{
    if(individuals[1] == "all"){
      offspring <- seq_along(setdiff(dimnames(genotypes)[[3]],c(parent1, parent1)))
    } else{
      foo <- 1:dim(genotypes)[3]
      if(!all(individuals %in% foo)) stop("Vector of offspring indices incorrectly supplied using individuals argument!")
      offspring <- individuals
    }
  }
  
  
  ## Check for consistency between dosages and phased marker information (discrete genotypes only at present..)
  if(input_type == "discrete"){
    phased.dose <- lapply(seq(length(phased_maplist)),
                          function(ch) matrix(c(rowSums(phased_maplist[[ch]][,paste0("h",1:ploidy)]),
                                                rowSums(phased_maplist[[ch]][,paste0("h",(ploidy + 1):(ploidy + ploidy2))])),
                                              ncol=2,
                                              dimnames = list(phased_maplist[[ch]]$marker,
                                                              c(parent1,parent2))))
    
    for(i in seq(length(phased_maplist))) {
      
      if(!all(rownames(phased.dose[[i]]) %in% rownames(genotypes))) stop("Issue matching marker names from phased map with supplied genotype matrix. Please check!")
      
      if(!all(apply(phased.dose[[i]] == genotypes[rownames(phased.dose[[i]]),c(parent1,parent2)],1,all))){
        message(paste("Inconsistent dosages on LG",i,"encountered, concerning the following markers:"))
        print(rownames(phased.dose[[i]])[which(!apply(phased.dose[[i]] == genotypes[rownames(phased.dose[[i]]),1:2],1,all))])
        stop("Cannot proceed. Suggest to check input - was unconverted or converted dosage coding used when generating phased mapfile?
           Alternatively, use remove_markers argument to screen out these problematic markers.")
      }
    }
    
  }
  
  ## Generate the list of valencies and genotype states
  statefun.ls <- state_fun(ploidy = ploidy, ploidy2 = ploidy2, 
                           bivalent_decoding = bivalent_decoding,
                           full_multivalent_hexa = full_multivalent_hexa)
  state.ls <- statefun.ls$state.ls
  all.states <- statefun.ls$all.states
  
  ##-----------------------------------------------------------------------
  ## Forward and Backward algorithms (from Durbin et al. 1998, pp. 58 - 59)
  ##-----------------------------------------------------------------------
  
  forward_backward <- function(F1num=1,
                               ploidy_fb,
                               state_matrix,
                               TMarray,
                               e_o,
                               par_phase,
                               progeny_dose,#progeny_dose has different forms depending on input_type
                               n_mark,
                               input.typ){
    
    Nstates <- nrow(state_matrix)
    
    if(input.typ == "discrete"){
      emmat.tf <- apply(state_matrix,1,function(x) rowSums(par_phase[,x]) == progeny_dose[,F1num,drop=FALSE])
      emission.mat <- emmat.tf
      emission.mat[emmat.tf] <- 1 - e_o
      emission.mat[!emmat.tf] <- e_o/ploidy_fb
      emission.mat[is.na(emission.mat)] <- 1/Nstates  #I am using the dimension of the smaller state matrix as the denominator
    } else{
      emission.mat_dosePr <- apply(state_matrix,1,function(x) 
        progeny_dose[cbind(1:dim(par_phase)[1],rowSums(par_phase[,x]) + 1,F1num)])
      ## This is without the possibility of errors - how to factor error into these numbers?
      ## A possible work-around is to take Pr >= 0.5 as compatible and Pr < 0.5 as incompatible:
      emission.mat <- emission.mat_dosePr
      emission.mat[emission.mat_dosePr >= 0.5] <- emission.mat_dosePr[emission.mat_dosePr >= 0.5] - e_o
      emission.mat[emission.mat_dosePr < 0.5] <- emission.mat_dosePr[emission.mat_dosePr < 0.5] + e_o/ploidy_fb
      emission.mat[is.na(emission.mat)] <- 1/Nstates 
    }
    
    
    ##---------------------
    ## Forward algorithm
    ##---------------------
    FORres <- fx(EM = emission.mat,TM = TMarray)
    forward.mat <- FORres$FM
    
    ## Termination step:
    logprob_x <- sum(log(FORres$s))
    
    ##---------------------
    ## Backward algorithm
    ##---------------------
    backward.mat <- bx(EM = emission.mat,TM = TMarray, s = FORres$s)
    
    ## Compute the posterior probability of each of the states at position i, where P(x) = prob_x was computed earlier
    IBDmatrix <- t(forward.mat[,2:ncol(forward.mat)]*backward.mat)
    
    ## In some cases if denom is a very small number, can lead to overflow errors (Inf) in the IBDmatrix
    if(length(is.infinite(IBDmatrix)) > 0){
      IBDmatrix[is.infinite(IBDmatrix)] <- 0 #replace these values with 0
    }
    colnames(IBDmatrix) <- rownames(state_matrix)
    
    # Normalise:
    norm.vect <- rowSums(IBDmatrix)
    
    ## Protect against division by zero:
    if(length(which(norm.vect == 0)) > 0) norm.vect[norm.vect==0] <- 1
    
    ## If denom was very small, can still have Infinite rowSums
    if(length(which(is.infinite(norm.vect))) > 0) warning("Error calculating IBD probabilities. Suggest to reduce number of markers per linkage group and re-try.")
    
    IBDmatrix <- IBDmatrix/norm.vect
    
    return(list("P.X" = IBDmatrix,
                "P.V" = logprob_x))
  } #forward_backward
  
  ## IBD function, per linkage group:
  getIBD_LG <- function(lg) {
    
    ## Check that marker positions are not overlapping:
    map <- phased_maplist[[lg]][,c("marker","position")]
    par.phase <- phased_maplist[[lg]][,paste0("h",1:(ploidy+ploidy2))]
    
    if(length(map$position) != length(unique(map$position))){
      warning(paste("Jittering overlapping SNP positions on linkage group",lg))
      while(length(map$position) != length(unique(map$position)))
        map$position[duplicated(map$position)] <- jitter(map$position[duplicated(map$position)],amount = 0.1)
      #re-order after jittering if necessary:
      par.phase <- par.phase[order(map$position),]
      map <- map[order(map$position),]
    }
    
    posvec <- map$position
    mark <- as.character(map$marker)
    nmark <- length(mark)
    names(posvec) <- rownames(par.phase) <- mark
    par.phase <- as.matrix(par.phase)
    
    combs <- cbind(mark[-length(mark)],mark[-1])
    
    pos1 <- posvec[combs[,1]]
    pos2 <- posvec[combs[,2]]
    
    r.vect <- mapfun(abs(pos2 - pos1))
    
    if(input_type == "discrete"){ #for some reason this is evaluating (sometimes) as TRUE within foreach loop
      progeny <- genotypes[mark,-which(colnames(genotypes) %in% c(parent1,parent2)),drop=FALSE]
    } else{
      progeny <- genotypes[mark,,setdiff(dimnames(genotypes)[[3]],c(parent1,parent2)),drop = FALSE] #this is still a 3d array
    }
    
    ## Generate transition matrix list here first:
    TM.ls <- lapply(names(state.ls), function(VAL){
      TMfun <- get(paste0("TMfun.",ploidyF1,"x_",VAL))
      matdim <- dim(TMfun(1,r.vect)) #simplest is to just record the TM dim
      array(sapply(1:nmark,function(n) TMfun(n-1,r.vect)),dim = c(matdim,nmark))
    })
    
    #######################################################################
    ## Function to call the forward_backward function and generate the IBDs
    generate_IBDs <- function(F1index,
                              state_ls,
                              allstates = all.states,
                              ploidy_F1 = ploidyF1,
                              TM_ls = TM.ls,
                              ...){ ## dots are arguments passed to forward_backward()
      
      valency_IBD.ls <- lapply(seq(length(state_ls)), function(v) {
        lapply(state_ls[[v]], function(StateMat)
          forward_backward(F1num = F1index,
                           state_matrix = StateMat,
                           TMarray = TM_ls[[v]],
                           ...)
        )
      }
      )
      
      ## Work-around, simply use the pairing type which contains the most likely pairing structure:
      Pval_type <- sapply(valency_IBD.ls, function(x) max(sapply(x, function(y) y$P.V)))
      
      ML_val <- which.max(Pval_type) #Select the most likely and proceed
      
      if(verbose) write(paste("Most likely pairing type:",names(state_ls)[ML_val]),log.conn)
      
      valency_IBDs <- valency_IBD.ls[[ML_val]] #Go one level deeper into nested list, ignoring other types..
      
      ## Next, need to incorporate the probability of each valency:
      P_D <- sum(sapply(valency_IBDs, function(x) exp(x$P.V - Pval_type[ML_val])/length(valency_IBDs)))
      P_V <- sapply(valency_IBDs, function(x) exp(x$P.V - Pval_type[ML_val])/((length(valency_IBDs)*P_D))) #This could also be returned...
      
      ## Need to incorporate these probabilities together. These are not in the correct dimension yet..
      IBD_incompat <- lapply(valency_IBDs, function(x) x$P.X)
      
      valencyIBDarray <- array(0,dim = c(nmark,length(allstates),length(valency_IBDs)))
      dimnames(valencyIBDarray) <- list(mark,allstates,NULL)
      
      # Work-around: use a for loop here, could be improved later..
      for(p in 1:length(valency_IBDs)){
        valencyIBDarray[,colnames(IBD_incompat[[p]]),p] <- IBD_incompat[[p]]
      }
      
      valencyIBDarray <- sweep(valencyIBDarray,3,P_V,FUN="*") #Multiply IBD probs by probability of each valency
      
      IBDs_output <- apply(valencyIBDarray,2,rowSums)
      
      return(list("IBD" = IBDs_output,
                  "allstates" = allstates,
                  "P_V" = P_V,
                  # "marginal.likelihood" = setNames(Pval_type[ML_val],names(state_ls)[ML_val]))
                  "marginal.likelihoods" = matrix(Pval_type, ncol = 1, dimnames = list(names(state_ls),"logL")))
      ) 
    } #generate_IBDs
    ################
    
    IBD.list <- lapply(X = offspring, FUN = generate_IBDs,
                       state_ls = state.ls,
                       e_o = error,
                       ploidy_fb = ploidyF1,
                       par_phase = par.phase,
                       progeny_dose = progeny,
                       n_mark = nmark,
                       input.typ = input_type)
    
    ## Convert list to a 3d array
    IBDs.ls <- lapply(IBD.list,function(x) x$IBD)
    pairing.probs <- lapply(IBD.list,function(x) x$P_V)
    IBDarray <- abind::abind(IBDs.ls,along = 3)
    marginal.L <- lapply(IBD.list,"[[","marginal.likelihoods")
    
    if(input_type == "discrete"){
      offspring.names <- colnames(progeny)[offspring]
    } else{
      offspring.names <- dimnames(progeny)[[3]][offspring]
    }
    
    names(pairing.probs) <- dimnames(IBDarray)[[3]] <- names(marginal.L) <- offspring.names
    
    return(list("IBDtype" = "genotypeIBD",
                "IBDarray" = IBDarray,
                "map" = cbind("chromosome" = lg,
                              map),
                "parental_phase" = par.phase,
                "marginal.likelihoods" = marginal.L,
                "valency" = sapply(marginal.L,function(x) rownames(x)[which.max(x)]),
                "offspring" = offspring.names,
                "biv_dec" = bivalent_decoding,
                "gap" = NULL, #Add gap later if splining...
                "genocodes" = dimnames(IBDarray)[[2]],
                "pairing" = pairing.probs,
                "ploidy" = ploidy,
                "ploidy2" = ploidy2,
                "method" = "hmm",
                "error" = error))
  } #getIBD_LG
  
  ## Run over multiple linkage groups, in parallel if required:
  if(ncores > 1){
    
    win <- Sys.info()["sysname"] == "Windows"
    if (win) {
      cl <- parallel::makePSOCKcluster(ncores)
      doParallel::registerDoParallel(cl)
    } else {
      doParallel::registerDoParallel(cores = ncores)
    }
    
    ## R CMD CHECK work-around:
    lgp <- NULL
    
    outlist <-
      foreach::foreach(
        lgp = seq(length(phased_maplist)),
        .inorder = TRUE,
        .packages = c("polyqtlR")
      ) %dopar% {
        getIBD_LG(lgp)
      }
    if(win) parallel::stopCluster(cl)
    
  } else{
    outlist <- lapply(seq(length(phased_maplist)), getIBD_LG)
  }
  
  if(!is.null(names(phased_maplist))){
    names(outlist) <- names(phased_maplist)
  } else{
    warning(paste("phased_maplist had no LG names assigned. Assigning names",
                  paste0("'LG",seq(length(outlist)),"'", collapse = ", "),"to output list."))
    names(outlist) <- paste0("LG",seq(length(outlist)))
  }
  
  return(outlist)
  
}) #hmm_IBD


#' Find depth of a list
#' @description Recursive function checking list depth to determine subsequent subsetting rules
#' @param obj An input object, may be a (nested) list
#' @param objdepth Counter to record how deep the recursion has gone
#' @noRd
list.depth <- function(obj, objdepth = 0) {
  if(!is.list(obj)) {
    return(objdepth)
  } else {
    return(max(unlist(lapply(obj, list.depth, objdepth = objdepth + 1))))
  }
}

#' Generate a sequence of map positions for splining function
#' @description Function to return a sequence of map positions at steps of size gap from given input
#' @param cMvect Vector of map positions
#' @param gap Gap size in cM
#' @noRd
mapseq <- function(cMvect, gap){
  ## Error checking input:
  if(length(cMvect) < 2) stop("No need to fit splines on a map with fewer than 2 positions!")
  if(max(cMvect) - min(cMvect) < gap) stop("Impossible to fit grid using gap specified. Suggest to decrease gap.")
  if(!all(sort(order(cMvect)) == order(cMvect))) stop("Marker positions not supplied in correct order according to map order.")
  
  if(cMvect[1] == ceiling(cMvect[1])){
    out <- seq(cMvect[1],max(cMvect),gap)
  } else{
    out <- c(cMvect[1], seq(ceiling(cMvect[1]),max(cMvect),gap))
  }
  
  if(max(cMvect) > max(out)) out <- c(out,max(cMvect))
  
  return(out)
}

#' Nettleton and Doerge 2000
#' @param N number of permutations
#' @param alpha The threshold level, ie. (1 - alpha) quantile of sorted LOD scores defines threshold
#' @param gamma The confidence interval specifier, usually 0.05
#' @noRd
NettletonDoerge <- function(N = 100,alpha = 0.05,gamma = 0.05){
  lower <- ceiling(N*(1-alpha) - (1/pnorm(1-(gamma/2)))*sqrt(N*(1-alpha)*alpha))
  upper <- ceiling(N*(1-alpha) + (1/pnorm(1-(gamma/2)))*sqrt(N*(1-alpha)*alpha))
  return(c(lower,upper))
}


#' Error handling on ploidy and ploidy2, and determine the number of genotype classes for general ploidy level and pairing behaviour
#' @param biv_dec Either TRUE for bivalents only or FALSE to also allow quadrivalents and double reduction
#' @param pl ploidy level of parent 1, assumed to be even
#' @param pl2 ploidy level of parent 2, assumed to be even
#' @noRd
Nstates.fun <- function(biv_dec, pl, pl2){
  
  if(pl < pl2) { #Updated in v.1.0.0, handles 2x4 as well as 4x2 triploid populations
    temp <- pl ## Simplest solution, reverse pl and pl2
    pl <- pl2
    pl2 <- temp
    # stop("pl should always be >= pl2. Please reverse and try again.")
  }
  
  if(any(c(pl,pl2)%%2 != 0)) stop("pl and pl2 must be even (positive) integers.")
  
  if(all(c(pl,pl2) == 2)) {
    out <- 4 #handle the case of diploidy separately
  } else{
    out <- ifelse(biv_dec,choose(pl,pl/2)*choose(pl2,pl2/2),
                  ifelse(pl2 == 2,(choose(pl,pl/2) + pl*(choose(pl-1,(pl/2 - 1))))*choose(pl2,pl2/2),
                         (choose(pl,pl/2) + pl*(choose(pl-1,(pl/2) - 2)))*(choose(pl2,pl2/2) + pl2*(choose(pl2-1,(pl2/2) - 2)))
                  ))
  }
  return(out)
}

#' Determine the number of genotype classes for single parental ploidy level and pairing behaviour
#' @param biv_dec Either TRUE for bivalents only or FALSE to also allow quadrivalents and double reduction
#' @param pl ploidy level of parent, assumed to be even
#' @noRd
Nstates.onepar <- function(biv_dec, pl){
  out <- 2 #handle the case of diploidy separately
  if(pl > 2) out <- ifelse(biv_dec,choose(pl,pl/2),choose(pl,pl/2) + pl*(choose(pl-1,(pl/2) - 2)))
  return(out)
}


#' Plot the results of genome-wide QTL analysis along a single track. Up to v.0.0.9 called polyqtlR::plotLinearQTL
#' @description QTL plotting function that plots output of \code{\link{QTLscan}} function along a single track, useful for overlaying plots. Only works for scan over multiple chromosomes.
#' @param LOD_data Output of \code{\link{QTLscan}} function.
#' @param inter_chm_gap The gap size (in cM) between successive chromosomes - by default a gap of 5 cM is used.
#' @param overlay Add to an existing plot (should be produced by a comparable call to this function) or not? By default \code{FALSE},
#' in which case a new plot is drawn. Can be useful for displaying results of multiple analyses together. 
#' @param ylimits Use to specify ylimits of plot region, though by default \code{NULL} in which case a suitable plot region is automatically used.
#' @param sig.unit Label to use on the y-axis for significance units, by default assumed to be LOD score.
#' @param plot_type Plots can be either in line drawings ("lines", default) or scatter plot format ("points").
#' @param colour Colour to be used for the plotting.
#' @param add_xaxis Should an x-axis be drawn? If multiple QTL analyses are performed on different traits, specifying this to be \code{FALSE}
#' and using \code{par(mar=c(0,4.1,4.1,2.1))} allows subsequent plots to be neatly stacked.
#' @param add_rug Logical, by default \code{TRUE} - should original marker points be added to plot?
#' @param add_thresh Logical, by default \code{TRUE} - should a significance threshold be added to plot?
#' @param override_thresh By default \code{NULL}. Can be used to specify a value for the significance threshold, overriding any stored in \code{LOD_data}.
#' @param thresh.lty Gives user control over the line type of the significance threshold to be drawn.
#' @param thresh.lwd Gives user control over the line width of the significance threshold to be drawn.
#' @param thresh.col Gives user control over the line colour of the significance threshold to be drawn.
#' @param return_plotData Logical, by default \code{FALSE}. If \code{TRUE}, then the x and y coordinates of the plot data are returned,
#' which can be useful for subsequent plot manipulations and overlays.
#' @param show_thresh_CI Logical, by default \code{TRUE}. Should confidence interval bounds around LOD threshold be shown?
#' @param use_LG_names Logical, by default \code{TRUE}. Should original character LG names be used as axis labels, or should numbering be used instead?
#' @param axis_label.cex Argument to adjust the size of the axis labels, can be useful if there are many linkage groups to plot
#' @param custom_LG_names Specify a vector that contains custom linkage group names. By default \code{NULL}
#' @param LGdiv.col Colour of dividing lines between linkage groups, by default grey.
#' @param ylab.at Position of y axis lablel
#' @param highlight_positions Option to hightlight positions. Should be a 2 column data.frame with columns
#' @param \dots Arguments passed to \code{\link{plot}}, and \code{\link{lines}} or \code{\link{points}} as appropriate (see argument \code{plot_type}).
#' @return The plot data, if return_plotData = TRUE. Otherwise \code{NULL}. Output is returned invisibly
#' @noRd
plotQTL.genome_linear <- function(LOD_data,
                                  inter_chm_gap = 5, #cM jump between chromosomes..
                                  overlay = FALSE,
                                  ylimits = NULL,
                                  sig.unit = "LOD",
                                  plot_type = "lines",
                                  colour = "black",
                                  add_xaxis = TRUE,
                                  add_rug = TRUE,
                                  add_thresh = TRUE,
                                  override_thresh = NULL,
                                  thresh.lty = 3,
                                  thresh.lwd = 2,
                                  thresh.col = "darkred",
                                  return_plotData = FALSE,
                                  show_thresh_CI = TRUE,
                                  use_LG_names = TRUE,
                                  axis_label.cex = 1,
                                  custom_LG_names = NULL,
                                  LGdiv.col = "gray42",
                                  ylab.at = 2.5,
                                  highlight_positions = NULL,
                                  ...){
  
  plot_type <- match.arg(plot_type,choices = c("lines","points"))
  
  ## Check whether there is a LOD threshold:
  LOD_threshold <- 0
  
  if(add_thresh){
    if(!is.null(override_thresh)){
      LOD_threshold <- as.numeric(override_thresh)
    } else if(!is.null(LOD_data$Perm.res)){
      LOD_threshold <- LOD_data$Perm.res$threshold
    }
  }
  
  ## Find out the x and y ranges:
  plotdata <- as.data.frame(LOD_data$QTL.res)
  
  if(!"chromosome" %in% colnames(plotdata)) stop("No positional information available for LOD scores! Unable to plot...")
  
  # Check whether there is chromosome 0 data, if so plot separately.
  if(0 %in% plotdata$chromosome){
    warning("Unassigned marker positions detected (chr. 0)")
    ## If overlaying, cannot plot separately
    if(!overlay){
      subdata <- plotdata[plotdata$chromosome == 0,]
      
      with(subdata,
           plot(position,LOD,main = "Unassigned markers (Chr. 0)",
                xlab = "Index", ...)
      )
    } else{
      warning("Cannot plot chr. 0 data separately during overlay plot. Suggest to plot independently using plot() function.")
    }
    LOD_data$Map <- LOD_data$Map[-which(LOD_data$Map[,"chromosome"] == 0),]
    plotdata <- plotdata[-which(plotdata$chromosome == 0),]
  }
  
  n.chms <- length(unique(plotdata$chromosome))
  chm.lens <- sapply(unique(plotdata$chromosome), function(ch) max(plotdata[plotdata$chromosome == ch,"position"]))
  
  if(n.chms == 1) stop("This layout is not appropriate for a single chromosome dataset. Use layout = 's' instead.")
  
  additions <- sapply(1:(n.chms - 1), function(n) sum(chm.lens[1:n]))
  add.reps <- c(rep(0,length(which(plotdata$chromosome == 1))),
                unlist(sapply(2:n.chms, function(n) rep(additions[n-1], length(which(plotdata$chromosome == unique(plotdata$chromosome)[n]))))))
  
  add.reps <- add.reps + (plotdata$chromosome - 1)*inter_chm_gap #add inter_chm_gaps..
  chm.ends <- additions + seq(0,inter_chm_gap*(n.chms-2),inter_chm_gap)
  chm.ends <- c(0,chm.ends, max(chm.ends) + chm.lens[length(chm.lens)])
  
  plotdata <- cbind(plotdata, plotdata$position + add.reps)
  colnames(plotdata)[ncol(plotdata)] <- "xposns"
  
  hp <- FALSE
  if(!is.null(highlight_positions)){
    hp <- TRUE
    
    if(list.depth(highlight_positions) == 2) highlight_positions <- highlight_positions[[1]]
    if(!is.data.frame(highlight_positions)) highlight_positions <- as.data.frame(highlight_positions)
    
    newstarts <- c(0,additions) + inter_chm_gap*(seq(0,n.chms-1))
    
    highlight_positions$xposn <- highlight_positions[,2] + newstarts[highlight_positions[,1]]
  }
  
  if(!overlay){
    if(is.null(ylimits)) ylimits <- c(0,extendrange(plotdata$LOD)[2])
    
    plot(NULL, xlim=range(plotdata$xposns),ylim = ylimits,
         axes=FALSE,xlab = ifelse(add_xaxis,"Linkage group",""), ylab = "", ...)
    
    if(add_xaxis) {
      if(use_LG_names){
        ## Detect LG names, if missing, make them
        if(is.null(LOD_data$LG_names)){
          message("LG_names not detected in LOD_data. Re-creating LG names for plot...")
          LOD_data$LG_names <- paste0("LG",unique(plotdata$chromosome))
        }
        
        axis(1,at = rowMeans(cbind(chm.ends[-1],chm.ends[-length(chm.ends)])),
             labels = LOD_data$LG_names, cex.axis = axis_label.cex)
      } else if(!is.null(custom_LG_names)){
        axis(1,at = rowMeans(cbind(chm.ends[-1],chm.ends[-length(chm.ends)])),
             labels = custom_LG_names, cex.axis = axis_label.cex)
      } else{
        axis(1,at = rowMeans(cbind(chm.ends[-1],chm.ends[-length(chm.ends)])),
             labels = unique(plotdata$chromosome), cex.axis = axis_label.cex)
      }
    }
    
    if(add_rug) {
      LOD_data$Map <- as.data.frame(LOD_data$Map)
      add.reps1 <- c(rep(0,length(which(LOD_data$Map$chromosome == 1))),
                     unlist(sapply(2:n.chms, function(n) rep(additions[n-1], length(which(LOD_data$Map$chromosome == n))))))
      add.reps1 <- add.reps1 + (LOD_data$Map$chromosome - 1)*inter_chm_gap #add inter_chm_gaps..
      rug(LOD_data$Map$position + add.reps1)
    }
    
    axis(2, las = 1, cex.axis = axis_label.cex)
    mtext(text = sig.unit,side = 2,line = ylab.at)
    
    for(yline in chm.ends[-c(1,length(chm.ends))] + inter_chm_gap/2) abline(v = yline, col = LGdiv.col,lty = 3)
    box()
    
    if(hp){
      abline(v = highlight_positions$xposn, col = colour[1])
      points(x = highlight_positions$xposn, y = rep(0,nrow(highlight_positions)),pch = 17, col = colour[1])
    }
    
  }
  ## Add lines
  for(ch in unique(plotdata$chromosome)) {
    
    if(plot_type == "lines"){
      lines(plotdata[plotdata$chromosome == ch,"xposns"],
            plotdata[plotdata$chromosome == ch,"LOD"],
            col = colour[1],...)
    } else{
      points(plotdata[plotdata$chromosome == ch,"xposns"],
             plotdata[plotdata$chromosome == ch,"LOD"],
             col = colour[1],...)
    }
  }
  
  if(LOD_threshold > 0) {
    segments(par("usr")[1],LOD_threshold,
             par("usr")[2],LOD_threshold,
             lty=thresh.lty,lwd=thresh.lwd,col=thresh.col)
    
    if(show_thresh_CI & !is.null(LOD_data$Perm.res)){
      thresh.rgb <- col2rgb(thresh.col)[,1]/255
      
      segments(par("usr")[1],LOD_data$Perm.res$threshold.bounds[1],
               par("usr")[2],LOD_data$Perm.res$threshold.bounds[1],
               lty=thresh.lty,lwd=thresh.lwd,col=rgb(red = thresh.rgb[1],
                                                     green = thresh.rgb[2],
                                                     blue = thresh.rgb[3],
                                                     alpha = 0.5))
      
      segments(par("usr")[1],LOD_data$Perm.res$threshold.bounds[2],
               par("usr")[2],LOD_data$Perm.res$threshold.bounds[2],
               lty=thresh.lty,lwd=thresh.lwd,col=rgb(red = thresh.rgb[1],
                                                     green = thresh.rgb[2],
                                                     blue = thresh.rgb[3],
                                                     alpha = 0.5))
    }
    
  }
  
  if(return_plotData) {
    return(invisible(plotdata))
  } else{
    return(invisible(NULL))
  }
  
} #plotLinearQTL



#' Overlay the results of a number of genome-wide QTL analysis for which significance thresholds are available.
#' Up to v.0.0.9 called polyqtlR::plotLinearQTL_list
#' @description Extension of the \code{\link{plotLinearQTL}} function, taking as input a list generated from combining the output of \code{\link{QTLscan}}.
#' Its distinguishing characteristic is that overlaid plots are re-scaled so that the significance thresholds overlap. This can be useful if there are multiple results
#' being plotted together for comparison, all of which may have different thresholds. The resulting plot can help quickly compare the power of different analyses. Warning - 
#' the y axis LOD scale is only correct for the first list element / set of results. Also as before, this function only works for QTL scan over multiple chromosomes.
#' @param LOD_data.ls A list, each element of which is a separate output of \code{\link{QTLscan}}, for which the setting \code{perm_test = TRUE} was used each time.
#' @param inter_chm_gap The gap size (in cM) between successive chromosomes - by default a gap of 5 cM is used.
#' @param ylimits Use to specify ylimits of plot region, though by default \code{NULL} in which case a suitable plot region is automatically used.
#' @param sig.unit Label to use on the y-axis for significance units, by default assumed to be "LOD".
#' @param plot_type Plots can be either in line or scatter plot format (so, specify as "lines" or "points"). If multiple types are required, supply as a vector of same length as LOD_data.ls
#' @param add_xaxis Should an x-axis be drawn? If multiple QTL analyses are performed on different traits, specifying this to be \code{FALSE}
#' and using \code{par(mar=c(0,4.1,4.1,2.1))} allows subsequent plots to be neatly stacked.
#' @param add_rug Logical, by default \code{TRUE} - should original marker points be added to plot?
#' @param colours Vector of colours to be used in the plotting. A default set of 4 colours is provided.
#' @param ylab.at Distance from the y-axis to place label (by default at 2.5 points)
#' @param thresh.lty Gives user control over the line type of the significance threshold to be drawn.
#' @param thresh.lwd Gives user control over the line width of the significance threshold to be drawn.
#' @param thresh.col Gives user control over the line colour of the significance threshold to be drawn, by default "darkred"
#' @param return_plotData Logical, by default \code{FALSE}. If \code{TRUE}, then the x and y coordinates of the plot data are returned,
#' which can be useful for subsequent plot manipulations and overlays.
#' @param highlight_positions Option to include a list of associated positions to highlight, of the same length and in the same order as \code{LOD_data.ls}.
#' Each set of positions should be provided in data.frame format with 3 columns corresponding to linkage group, type and ID (same format as the cofactor.df 
#' argument of function \code{\link{QTLscan}}. This can be useful if genetic co-factor analyses are being compared. 
#' If no position is to be highlighted, add the corresponding list element as \code{NULL}.
#' @param LGdiv.col Colour of dividing lines between linkage groups, by default grey.
#' @param use_LG_names Logical, by default \code{TRUE}. Should original character LG names be used as axis labels, or should numbering be used instead?
#' @param axis_label.cex Argument to adjust the size of the axis labels, can be useful if there are many linkage groups to plot
#' @param custom_LG_names Specify a vector that contains custom linkage group names. By default \code{NULL}
#' @param override_thresh By default \code{NULL}. Can be used to specify a (numeric) value for the significance threshold, overriding any stored in \code{LOD_data}.
#' @param \dots Arguments passed to \code{\link{lines}} or \code{\link{points}} as appropriate (see argument plot_type).
#' @return The plot data, if \code{return_plotData = TRUE}, otherwise \code{NULL}. Function output is returned invisibly.
#' @noRd
plotQTL.genome_linear_list <- function(LOD_data.ls,
                                       inter_chm_gap = 5,
                                       ylimits = NULL,
                                       sig.unit = "LOD",
                                       plot_type,
                                       add_xaxis = TRUE,
                                       add_rug = TRUE,
                                       colours = c("black","red","dodgerblue","sienna4"),
                                       ylab.at = 2.5, 
                                       thresh.lty = 3,
                                       thresh.lwd = 2,
                                       thresh.col = "darkred",
                                       return_plotData = FALSE,
                                       highlight_positions = NULL,
                                       LGdiv.col = "gray42",
                                       use_LG_names = TRUE,
                                       axis_label.cex = 1,
                                       custom_LG_names = NULL,
                                       override_thresh = NULL,
                                       ...){
  ## Check input:
  if(length(plot_type) == 1){
    plot_type <- sapply(seq(length(LOD_data.ls)),function(n) match.arg(plot_type, choices = c("lines","points")))
  } else if(length(plot_type) != length(LOD_data.ls)) {
    stop(paste("If multiple plot_type required, please supply as a vector of length",length(LOD_data.ls)))
  } else{
    plot_type <- sapply(plot_type,match.arg,choices = c("lines","points"))
  }
  
  if(!is.null(highlight_positions)){
    if(!is.list(highlight_positions) | length(highlight_positions) != length(LOD_data.ls)) 
      stop("highlight_positions must be a list of the same length as LOD_data.ls")
    hp <- TRUE
  } else{
    hp <- FALSE
  }
  
  # oldpar <- par(no.readonly = TRUE)    
  # on.exit(par(oldpar))            # Thanks to Gregor, CRAN
  
  if(list.depth(LOD_data.ls) != 3) stop("LOD_data input should be a nested list of outputs of the function QTLscan!")
  
  if(any(sapply(sapply(LOD_data.ls, function(x) x$Perm.res),is.null)) & is.null(override_thresh)) stop("All elements of LOD_data should have a significance threshold!")
  
  ## Remove any chromosome 0 data (possible with single marker regression data)
  LOD_data.ls <-  lapply(seq(length(LOD_data.ls)), function(l) {
    temp <- LOD_data.ls[[l]]
    temp$QTL.res <- temp$QTL.res[temp$QTL.res$chromosome != 0,]
    temp$Map <- temp$Map[temp$Map[,"chromosome"] != 0,]
    return(temp)
  })
  
  LOD_data1 <- LOD_data.ls[[1]] #Use the first list element for most of the default data (cM etc..)
  plotdata1 <- as.data.frame(LOD_data1$QTL.res)
  
  n.chms <- length(unique(plotdata1$chromosome))
  chm.lens <- sapply(unique(plotdata1$chromosome), function(ch) max(plotdata1[plotdata1$chromosome == ch,"position"]))
  
  if(n.chms == 1) stop("This layout is not appropriate for a single chromosome dataset. Use layout 's' instead.")
  
  additions <- sapply(1:(n.chms - 1), function(n) sum(chm.lens[1:n]))
  
  add.reps <- lapply(seq(length(LOD_data.ls)), function(ds) c(rep(0,length(which(LOD_data.ls[[ds]]$QTL.res$chromosome == 1))),
                                                              unlist(sapply(2:n.chms, function(n) rep(additions[n-1], 
                                                                                                      length(which(LOD_data.ls[[ds]]$QTL.res$chromosome == n))))))
                     + (LOD_data.ls[[ds]]$QTL.res$chromosome - 1)*inter_chm_gap
  )
  
  chm.ends <- additions + seq(0,inter_chm_gap*(n.chms-2),inter_chm_gap)
  chm.ends <- c(0,chm.ends, max(chm.ends) + chm.lens[length(chm.lens)])
  
  # Generate a list of the plot data with new column xposns for linear plotting over all LGs on x axis
  plotdata.ls <- lapply(seq(length(LOD_data.ls)), function(i){
    out <- cbind(as.data.frame(LOD_data.ls[[i]]$QTL.res), "xposns" = LOD_data.ls[[i]]$QTL.res$position + add.reps[[i]])
    out$LOD[out$LOD<0] <- 0
    return(out)
  })
  
  new.adds <- unique(add.reps[[1]])
  
  if(hp) hp.ls <- lapply(seq(length(highlight_positions)), function(i){
    if(!is.null(highlight_positions[[i]])){
      
      x <- highlight_positions[[i]]
      if(!is.data.frame(x)) x <- as.data.frame(x)
      
      ## Unworked update here... needs some tweaking..
      # if(length(unique(x[,2])) != 1) stop("Currently, mixing markers and cM positions to highlight is not catered for")
      
      # if(x[,2] == "position"){
      #   out <- plotdata.ls[[i]][plotdata.ls[[i]]$chromosome == x[,1] & 
      #                             plotdata.ls[[i]]$position == x[,3],]
      # } else{
      #   #this bit might cause an issue
      #   hit <- which(LOD_data.ls[[1]]$Map$marker == x[,3])
      #   if(length(hit) == 0) {
      #     warning(paste("Marker",x[,3],"was not found on Map!"))
      #     out <- NULL
      #   } else {
      #     pos <- LOD_data.ls[[1]]$Map[hit,"position"]
      #     out <- plotdata.ls[[i]][which.min(abs(plotdata.ls[[i]]$position - pos)),]
      #     }
      # }
      
      x$out <- x[,2] + new.adds[x[,1]]
      colnames(out)[ncol(out)] <- "xposn"
    } else{
      out <- NULL
    }
    return(out)
  })
  
  if(!is.null(ylimits)){
    ylimits.ls <- lapply(seq(length(LOD_data.ls)), function(i){
      ylimits #just repeat ylimits
    })
  } else{
    ylimits.ls <- lapply(seq(length(LOD_data.ls)), function(i){
      extendrange(c(0,plotdata.ls[[i]]$LOD,LOD_data.ls[[i]]$Perm.res$threshold))
    })#add threshold in case not significant, so thresh line always present
  }
  
  if(!is.null(override_thresh)){
    if(length(override_thresh) == 1){
      for(i in 1:length(LOD_data.ls)) LOD_data.ls[[i]]$Perm.res$threshold <- override_thresh
    } else if(length(override_thresh) != length(LOD_data.ls)) stop("If over-riding significance thresholds, either provide a single value or a vector the same length as LOD_data!")
  }
  
  threshpos <- sapply(seq(length(LOD_data.ls)), function(i){
    (LOD_data.ls[[i]]$Perm.res$threshold - ylimits.ls[[i]][1])/(ylimits.ls[[i]][2]-ylimits.ls[[i]][1])
  })
  
  p <- min(threshpos) 
  
  ## Change the ones with least space above. First find the plot with most space above:
  min.base <- ylimits.ls[[which.min(threshpos)]][1]
  orig.wmt <- which.min(threshpos)
  
  ## Rescale plot y limits:
  
  for(i in setdiff(seq(length(LOD_data.ls)),which.min(threshpos))){
    ylimits.ls[[i]][2] <- (LOD_data.ls[[i]]$Perm.res$threshold - (1-p)*min.base)/p
    ylimits.ls[[i]][1] <- min.base
  }
  
  ## Double check the scaling is now correct:
  threshpos <- sapply(seq(length(LOD_data.ls)), function(i){
    (LOD_data.ls[[i]]$Perm.res$threshold - ylimits.ls[[i]][1])/(ylimits.ls[[i]][2]-ylimits.ls[[i]][1])
  })
  
  if(mean(threshpos) - threshpos[1] > 0.001) {
    message("Rescaled plot limits issue!"); print(ylimits.ls)
    stop("Error in rescaling plots, suggest to check data...")
  }
  
  shrink.range <- function(r){
    return(c(
      (r[1] + (0.04*r[2] + (r[1]*0.04^2)/(1.04))/(1.04 - 0.04^2))/(1.04),
      (r[2] + (0.04*r[1])/(1.04))/(1.04 - 0.04^2)
    ))
  }
  
  plot(NULL, xlim=range(plotdata.ls[[1]]$xposns),ylim = shrink.range(ylimits.ls[[1]]), #ylim gets expanded by plot function
       axes=FALSE,xlab = ifelse(add_xaxis,"Linkage group",""), ylab = "", cex.lab = 1.5) #Add axes labels afterwards
  
  if(add_xaxis) {
    if(use_LG_names) {
      xlabs <- LOD_data1$LG_names
    } else if(!is.null(custom_LG_names)){
      xlabs <- custom_LG_names
    }else{
      xlabs <- unique(plotdata1$chromosome)
    }
    
    axis(1,at = rowMeans(cbind(chm.ends[-1],chm.ends[-length(chm.ends)])),
         labels = xlabs, cex.axis = axis_label.cex)
  }
  
  if(add_rug) {
    LOD_data1$Map <- as.data.frame(LOD_data1$Map)
    add.reps1 <- c(rep(0,length(which(LOD_data1$Map$chromosome == 1))),
                   unlist(sapply(2:n.chms, function(n) rep(additions[n-1], length(which(LOD_data1$Map$chromosome == n))))))
    add.reps1 <- add.reps1 + (LOD_data1$Map$chromosome - 1)*inter_chm_gap #add inter_chm_gaps..
    rug(LOD_data1$Map$position + add.reps1)
  }
  
  axis(2, las = 1, cex.axis = axis_label.cex)
  mtext(text = sig.unit,side = 2,line = ylab.at, col = colours[orig.wmt])
  
  for(yline in chm.ends[-c(1,length(chm.ends))] + inter_chm_gap/2) abline(v = yline, col = LGdiv.col,lty = 3)
  
  ## Now overlay with the other data:
  for(j in 2:length(LOD_data.ls)){
    
    par(new = TRUE)
    
    plot(NULL, xlim=range(plotdata.ls[[j]]$xposns),ylim = shrink.range(ylimits.ls[[j]]),
         axes=FALSE,xlab = "", ylab = "")
    
    for(ch in unique(plotdata.ls[[j]]$chromosome)) {
      
      if(plot_type[j] == "lines"){
        lines(plotdata.ls[[j]][plotdata.ls[[j]]$chromosome == ch,"xposns"],
              plotdata.ls[[j]][plotdata.ls[[j]]$chromosome == ch,"LOD"],col = colours[j],...)
        
      } else{
        points(plotdata.ls[[j]][plotdata.ls[[j]]$chromosome == ch,"xposns"],
               plotdata.ls[[j]][plotdata.ls[[j]]$chromosome == ch,"LOD"],col = colours[j],...)
        
      }
    }
    
    if(hp && !is.null(hp.ls[[j]])){
      abline(v = hp.ls[[j]]$xposn, col = colours[j])
      points(x = hp.ls[[j]]$xposn, y = rep(0,nrow(hp.ls[[j]])),
             pch = 17, col = colours[j])
    }
    
    segments(par("usr")[1], LOD_data.ls[[j]]$Perm.res$threshold,
             par("usr")[2], LOD_data.ls[[j]]$Perm.res$threshold,
             lty=thresh.lty,lwd=thresh.lwd,col=thresh.col)
    
  } #for (j...)
  
  ## Add data1 at the end for emphasis (overlay)
  par(new = TRUE)
  
  plot(NULL, xlim=range(plotdata.ls[[1]]$xposns),ylim = shrink.range(ylimits.ls[[1]]),
       axes=FALSE,xlab = "", ylab = "") 
  
  for(ch in unique(plotdata1$chromosome)) {
    if(plot_type[1] == "lines"){
      lines(plotdata.ls[[1]][plotdata.ls[[1]]$chromosome == ch,"xposns"],
            plotdata.ls[[1]][plotdata.ls[[1]]$chromosome == ch,"LOD"],
            col = colours[1]
            # ,
            # lwd = main.size, 
            # lty = main.lty
      )
    } else{
      points(plotdata.ls[[1]][plotdata.ls[[1]]$chromosome == ch,"xposns"],
             plotdata.ls[[1]][plotdata.ls[[1]]$chromosome == ch,"LOD"],
             col = colours[1],
             # cex = main.size, 
             ...)
    }
  }
  
  if(hp && !is.null(hp.ls[[1]])){
    abline(v = hp.ls[[1]]$xposn, col = colours[1])
    points(x = hp.ls[[1]]$xposn, y = rep(0,nrow(hp.ls[[1]])),
           pch = 17, col = colours[1])
  }
  
  ## Add the significance threshold (this is at the correct level for all plots now...)
  segments(par("usr")[1],LOD_data1$Perm.res$threshold,
           par("usr")[2],LOD_data1$Perm.res$threshold,
           lty=thresh.lty,lwd=thresh.lwd,col=thresh.col)
  
  box()
  
  
  
  if(return_plotData) {
    return(invisible(plotdata.ls))
  } else{
    return(invisible(NULL))
  }
  
} #plotLinearQTL_list


#' Plot the results of a previous QTL analysis. Up to v.0.0.9 called polyqtlR::plotQTL
#' @description Basic QTL plotting function, taking map positions and significance levels as input
#' @param LOD_data Output list from \code{\link{QTLscan}} with items QTL.res and Perm.res (the latter can be \code{NULL})
#' @param ylimits Use to specify ylimits of plot region, though by default \code{NULL} in which case a suitable plot region is automatically used.
#' @param multiplot Vector of integers. By default \code{NULL}. If \code{LOD_data} contains results from multiple linkage groups, you can
#' define the number of rows and columns in the plot layout.
#' @param plot_type How should be plots be drawn, either "lines" or "points" are possible
#' @param overlay Add to an existing plot (should be produced by a comparable call to this function) or not? By default \code{FALSE},
#' in which case a new plot is drawn. Can be useful for displaying results of multiple analyses together.
#' @param add_xaxis Should an x-axis be drawn? If multiple QTL analyses are performed on different traits, specifying this to be \code{FALSE}
#' and using \code{par(mar=c(0,4.1,4.1,2.1))} allows subsequent plots to be neatly stacked.
#' @param add_rug Logical, by default \code{TRUE} - should original marker points be added to plot?
#' @param mainTitle Vector of plot titles (single character vector also allowed and will be recycled). For no plot titles, leave as \code{FALSE}
#' @param override_thresh By default \code{NULL}. Can be used to specify a (numeric) value for the significance threshold, overriding any stored in \code{LOD_data}.
#' @param thresh.lty Gives user control over the line type of the significance threshold to be drawn. Default threshold lty is 3.
#' @param thresh.lwd Gives user control over the line width of the significance threshold to be drawn. Default threshold lwd is 2.
#' @param thresh.col Gives user control over the line colour of the significance threshold to be drawn. Default threshold colour is dark red.
#' @param colour Vector of plotting colours
#' @param axis_label.cex Argument to adjust the size of the axis labels, can be useful if there are many linkage groups to plot
#' @param \dots Extra arguments passed to plotting functions (plot, lines / points)
#' @return The cM bounds of the LOD support interval, if \code{support_interval} > 0.
#' @noRd
plotQTL.single_LG <- function(LOD_data,
                              ylimits = NULL,
                              multiplot = NULL,
                              plot_type = "lines",
                              overlay = FALSE,
                              add_xaxis = TRUE,
                              add_rug = TRUE,
                              mainTitle = FALSE,
                              override_thresh = NULL,
                              thresh.lty = 3,
                              thresh.lwd = 2,
                              thresh.col = "darkred",
                              colour = "black",
                              axis_label.cex = 1,
                              ...){
  
  plot_type <- match.arg(plot_type, choices = c("lines","points"))
  
  # oldpar <- par(no.readonly = TRUE)
  # on.exit(par(oldpar)) # Thanks Gregor 
  
  ## Check whether there is a LOD threshold (or whether another should be used)
  LOD_threshold <- 0
  if(!is.null(override_thresh)) {
    LOD_threshold <- override_thresh
  } else if(!is.null(LOD_data$Perm.res)) {
    LOD_threshold <- LOD_data$Perm.res$threshold
  }
  
  plot_LODcM <- function(cMpositions,LOD,chm,chm.ctr,colour,...){
    
    if(!overlay){
      if(is.null(ylimits)) ylimits <- range(LOD)
      
      plot.title <- ifelse(mainTitle[1] == FALSE,"",
                           ifelse(length(mainTitle) >= chm.ctr, mainTitle[chm.ctr],mainTitle[1]))
      
      plot(NULL,xlim=range(cMpositions),
           ylim = ylimits,xlab=ifelse(add_xaxis,"cM",""),ylab="LOD",
           main = plot.title,axes = FALSE,...)
      if(add_xaxis) axis(1, cex.axis = axis_label.cex)
      axis(2, las = 1, cex.axis = axis_label.cex)
      if(add_rug) rug(LOD_data$Map[LOD_data$Map[,"chromosome"] == chm,"position"])
      
    }
    
    if(plot_type == "lines") {
      lines(cMpositions,LOD,col = colour,...)
    } else{
      points(cMpositions,LOD,col = colour,...)
    }
    
    if(LOD_threshold > 0) segments(par("usr")[1],LOD_threshold,
                                   par("usr")[2],LOD_threshold,
                                   lty=thresh.lty,lwd=thresh.lwd,col=thresh.col)
    
    # LODsupport <- 0
    
    # if(support_interval > 0){
    #   maxLODmin <- max(LOD) - support_interval
    #   LODsupport <- cMpositions[which(LOD >= maxLODmin)]
    #   if(length(LODsupport) >= 2) {
    #     segments(x0=min(LODsupport),y0=maxLODmin,x1=max(LODsupport),y1=maxLODmin,lwd=3,col="red2")
    #     segments(x0=min(LODsupport),y0=par("usr")[3],x1=min(LODsupport),y1=maxLODmin,lwd=1,lty=2,col="red2")
    #     segments(x0=max(LODsupport),y0=par("usr")[3],x1=max(LODsupport),y1=maxLODmin,lwd=1,lty=2,col="red2")
    #   }
    # }
    
    
    # return(range(LODsupport))
  }
  
  linkage_group <- sort(unique(LOD_data$QTL.res[,"chromosome"]))
  
  if(!is.null(multiplot) & length(multiplot)==2) {
    layout(matrix(1:(prod(multiplot)),
                  nrow=multiplot[1],
                  ncol=multiplot[2],
                  byrow=TRUE))
  }
  
  # support <- sapply(linkage_group, function(chr){
  #   chmDat <- LOD_data$QTL.res[LOD_data$QTL.res[,"chromosome"] == chr,]
  #   plot_LODcM(cMpositions = chmDat[,"position"],
  #              LOD = chmDat[,"LOD"],
  #              chm = chr,
  #              ...)
  # }
  # )
  
  sapply(1:length(linkage_group), function(chr.ctr){
    chr <- linkage_group[chr.ctr]
    chmDat <- LOD_data$QTL.res[LOD_data$QTL.res[,"chromosome"] == chr,]
    plot_LODcM(cMpositions = chmDat[,"position"],
               LOD = chmDat[,"LOD"],
               chm = chr,
               chm.ctr = chr.ctr,
               colour = colour,
               ...)
  })
  
  # Fill in any remaining empty plot spaces:
  if(!is.null(multiplot) & prod(multiplot)>length(linkage_group)){
    over <- prod(multiplot) - length(linkage_group)
    for(i in seq(1:over)) plot(NULL,xlim=c(0,1),ylim=c(0,1),
                               xlab=NA,ylab=NA,axes=F)
  }
  
  return(invisible(NULL))
  
  # if (!is.null(log))
  #   close(log.conn)
  
  # if(!is.null(multiplot)) par(mfrow=c(1,1))
  
  # if(support_interval > 0) return(t(support))
} #plotQTL




#' Convert a progeno_df data.frame to a 3d array
#' @param probgeno_df A data frame as read from the scores file produced by function
#' \code{saveMarkerModels} of R package \code{fitPoly}, or alternatively, a data frame containing at least the following columns:
#' \describe{
#' \item{SampleName }{Name of the sample (individual)}
#' \item{MarkerName }{Name of the marker}
#' \item{P0}{Probabilities of dosage score '0'}
#' \item{P1, P2... etc.}{Probabilities of dosage score '1' etc. (up to max offspring dosage, e.g. P4 for tetraploid population)}
#' }
#' @param ploidy  Ploidy of the F1 population (can be 2, 3, 4 or 6)
#' @noRd
probgeno_df_to_array <- function(probgeno_df,
                                 ploidy){
  #Note: this function assumes the function test_probgeno_df has already been called 
  # all.samp <- levels(probgeno_df$SampleName) #this includes the parents for now
  pcolnames <- paste0("P",0:ploidy)
  out.array <- array(0,
                     dim = c(length(levels(probgeno_df$MarkerName)),
                             ploidy + 1,
                             length(levels(probgeno_df$SampleName))),
                     dimnames=list(levels(probgeno_df$MarkerName),
                                   pcolnames,
                                   levels(probgeno_df$SampleName)))
  for(ind in levels(probgeno_df$SampleName)){
    temp <- probgeno_df[probgeno_df$SampleName == ind,]
    out.array[,,ind] <- as.matrix(temp[match(levels(probgeno_df$MarkerName),temp$MarkerName),pcolnames])
  }
  
  return(out.array)
} #probgeno_df_to_array


#' Quadrivalent transition matrix function
#' @param r recombination frequency
#' @noRd
quadTM <- function(r) 
  matrix(c(1-r,r/3,r/3,r/3,
           r/3,1-r,r/3,r/3,
           r/3,r/3,1-r,r/3,
           r/3,r/3,r/3,1-r),ncol=4)



#' Function to return the list of possible pairing states, given parental ploidies and meiotic pairing model
#' @param ploidy Ploidy of parent 1
#' @param ploidy2 Ploidy of parent 2
#' @param bivalent_decoding Logical, if \code{FALSE} then multivalent pairing assumed
#' @param full_multivalent_hexa Should multivalent pairing be considered in both parents simultaneously in hexaploids?
#' @noRd
state_fun <- function(ploidy, ploidy2, bivalent_decoding, full_multivalent_hexa = FALSE){
  
  ploidyF1 <- (ploidy + ploidy2)/2
  
  gen_state.mat <- function(l1,l2,ploidy_F1 = ploidyF1,onlyNames=FALSE){
    states_b <- matrix(apply(l1,2,function(x) apply(l2,2,function(y) c(x,y))),
                       ncol = ploidy_F1,byrow=T)
    rownames(states_b) <- apply(states_b,1,function(x) paste0(letters[x],collapse=""))
    ifelse(onlyNames,return(rownames(states_b)),return(states_b))
  } #gen_state.mat
  
  v1 <- combn(1:ploidy,ploidy/2)
  v2 <- combn((ploidy+1):(ploidy+ploidy2),ploidy2/2)
  
  if(ploidyF1 == 2){
    
    state.ls <- list("B" = setNames(list(gen_state.mat(v1,v2)),c("AB-CD"))) #single bivalent pairing, needs to be in nested list structure for generate_IBD function
    all.states <- gen_state.mat(v1,v2,onlyNames = TRUE)
    
  } else if(ploidyF1 == 3){
    
    BB.mat <- gen_state.mat(v1,v2)
    
    ## Update to v.1.0.0 generalises triploids to allow both 2x4 and 4x2
    if(ploidy > ploidy2){
      BBval.mat <- do.call(rbind,lapply(1:(ncol(v1)/2), function(cl1)
        t(gen_state.mat(v1[,-c(cl1,ncol(v1) - cl1 + 1)],v2,onlyNames = TRUE))
      ))
      
      QB.mat <- gen_state.mat(t(expand.grid(1:ploidy,1:ploidy)[-rem.quad,]),v2)
      
      state.ls <- list("BB" = setNames(lapply(1:nrow(BBval.mat),function(r) BB.mat[BBval.mat[r,],]),
                                       c("AB-CD-EF","AC-BD-EF","AD-BC-EF")),
                       "QB" = list("ABCD-EF" = QB.mat))
      
    } else{
      BBval.mat <- do.call(rbind,lapply(1:(ncol(v2)/2), function(cl1)
        t(gen_state.mat(v1,v2[,-c(cl1,ncol(v2) - cl1 + 1)],onlyNames = TRUE))
      ))
      
      QB.mat <- gen_state.mat(v1,t(expand.grid((ploidy + 1):(ploidy + ploidy2),(ploidy + 1):(ploidy + ploidy2))[-rem.quad,]))
      
      state.ls <- list("BB" = setNames(lapply(1:nrow(BBval.mat),function(r) BB.mat[BBval.mat[r,],]),
                                       c("AB-CD-EF","AB-CE-DF","AB-CF-DE")),
                       "QB" = list("AB-CDEF" = QB.mat))
    }
    
    all.states <- rownames(QB.mat)
    
    if(bivalent_decoding){
      state.ls <- state.ls[1]
      all.states <- rownames(BB.mat)
    }
    
  } else if(ploidyF1 == 4 & ploidy == 4){ #this assumes we are not in a 6 x 2 population!! Need to cover this possibility later, perhaps?
    
    BB.mat <- gen_state.mat(v1,v2)
    
    BBval.mat <- do.call(rbind,lapply(1:max(ncol(v1)/2,1), function(cl1)
      t(sapply(1:max(ncol(v2)/2,1), function(cl2) gen_state.mat(v1[,-c(cl1,ncol(v1) - cl1 + 1)],
                                                                v2[,-c(cl2,ncol(v2) - cl2 + 1)],onlyNames = TRUE)
      ))))
    
    QQ.mat <- gen_state.mat(t(expand.grid(1:ploidy,1:ploidy)[-rem.quad,]),
                            t(expand.grid((ploidy+1):(ploidy+ploidy2),(ploidy+1):(ploidy+ploidy2))[-rem.quad,]))
    
    BQ.mat <- setNames(lapply(1:(ncol(v1)/2), function(cl1) gen_state.mat(v1[,-c(cl1,ncol(v1) - cl1 + 1)],
                                                                          t(expand.grid((ploidy+1):(ploidy+ploidy2),(ploidy+1):(ploidy+ploidy2))[-rem.quad,]))),
                       c("AB-CD-EFGH","AC-BD-EFGH","AD-BC-EFGH"))
    
    QB.mat <- setNames(lapply(1:(ncol(v2)/2), function(cl2) gen_state.mat(t(expand.grid(1:ploidy,1:ploidy)[-rem.quad,]),
                                                                          v2[,-c(cl2,ncol(v2) - cl2 + 1)])),
                       c("ABCD-EF-GH","ABCD-EG-FH","ABCD-EH-FG"))
    
    state.ls <- list("BB" = setNames(lapply(1:nrow(BBval.mat),function(r) BB.mat[BBval.mat[r,],]),
                                     c("AB-CD-EF-GH","AB-CD-EG-FH","AB-CD-EH-FG",
                                       "AC-BD-EF-GH","AC-BD-EG-FH","AC-BD-EH-FG",
                                       "AD-BC-EF-GH","AD-BC-EG-FH","AD-BC-EH-FG")),
                     "BQ" = BQ.mat,
                     "QB" = QB.mat,
                     "QQ" = list("ABCD-EFGH" = QQ.mat))
    
    all.states <- rownames(QQ.mat)
    
    if(bivalent_decoding) {
      state.ls <- state.ls[1]
      all.states <- rownames(BB.mat)
    }
    
  } else if(ploidyF1 == 6 & ploidy == 6){
    
    P1.Valnames <- sapply(hexa.list, function(hl) paste0(apply(apply(hl,1,function(x) LETTERS[x]),1,paste0,collapse=""),collapse="-"))
    P2.Valnames <- sapply(hexa.list, function(hl) paste0(apply(apply(hl+6,1,function(x) LETTERS[x]),1,paste0,collapse=""),collapse="-"))
    
    P1val.list <- setNames(lapply(hexa.list, function(hl) apply(expand.grid(hl[,1],hl[,2],hl[,3]),1,sort)),P1.Valnames)
    P2val.list <- setNames(lapply(hexa.list, function(hl) apply(expand.grid(hl[,1] + 6,hl[,2] + 6,hl[,3] + 6),1,sort)),P2.Valnames)
    
    BB.ls_6x <- do.call(c,lapply(P1val.list, function(cl1) lapply(P2val.list, function(cl2) gen_state.mat(cl1,cl2))))
    names(BB.ls_6x) <- gsub("\\.","-",names(BB.ls_6x))
    
    if(!bivalent_decoding){
      
      P1hexa.combs <- t(expand.grid(1:ploidy,1:ploidy,1:ploidy)[-rem.hex,])
      P2hexa.combs <- t(expand.grid((ploidy+1):(ploidy+ploidy2),(ploidy+1):(ploidy+ploidy2),(ploidy+1):(ploidy+ploidy2))[-rem.hex,])
      
      BH.ls_6x <- lapply(P1val.list, function(cl1) gen_state.mat(cl1,P2hexa.combs))
      HB.ls_6x <- lapply(P2val.list, function(cl2) gen_state.mat(P1hexa.combs,cl2))
      
      names(BH.ls_6x) <- paste0(names(BH.ls_6x),"-GHIJKL")
      names(HB.ls_6x) <- paste0("ABCDEF-",names(HB.ls_6x))
      
      if(full_multivalent_hexa) {
        HH.ls_6x <- list("ABCDEF-GHIJKL" = gen_state.mat(P1hexa.combs,P2hexa.combs))
        state.ls <- list("BB" = BB.ls_6x,"BH" = BH.ls_6x,"HB" = HB.ls_6x,"HH" = HH.ls_6x)
        
        all.states <- sort(rownames(HH.ls_6x[[1]]))
        
      } else{
        state.ls <- list("BB" = BB.ls_6x,"BH" = BH.ls_6x,"HB" = HB.ls_6x)#,"HH" = HH.ls_6x)
        all.states <- sort(unique(c(sapply(BH.ls_6x,rownames),sapply(HB.ls_6x,rownames))))
      }
      
    } else{
      state.ls <- list("BB" = BB.ls_6x)
      all.states <- gen_state.mat(v1,v2,onlyNames = TRUE)
      
    }
  } else{
    stop("Currently that combination of parental ploidies is not handled. Suggest to contact Peter Bourke.")
  }
  
  return(list(state.ls = state.ls,
              all.states = all.states))
} #state_fun

#' Error and warning handling for dosage_matrix
#' @param dosage_matrix An integer matrix with markers in rows and individuals in columns
#' @noRd
test_dosage_matrix <- function(dosage_matrix){
  if(inherits(dosage_matrix,"data.frame")){
    warning("dosage_matrix should be a matrix, now it's a data.frame.")
    message("Trying to convert it to matrix, assuming markernames are in the first column..")
    rownames(dosage_matrix) <- dosage_matrix[,1]
    dosage_matrix <- as.matrix(dosage_matrix[,-1])
    class(dosage_matrix) <- "integer"
  } else if(inherits(dosage_matrix,"matrix")){
    rn <- rownames(dosage_matrix)
    cn <- colnames(dosage_matrix)
    if(is.null(rn)) stop("The rownames of dosage_matrix should contain markernames. Now NULL")
    if(is.null(cn)) stop("The columnnames of dosage_matrix should contain genotype names. Now NULL")
    if(!(typeof(dosage_matrix)=="integer" | typeof(dosage_matrix)=="double")){
      warning("dosage_matrix should be integer or numeric. Trying to convert it.. ")
      class(dosage_matrix) <- "integer"
    }
  } else {
    stop("dosage_matrix should be a matrix of integers.
         See the manual of this function for more information.")
  }
  return(dosage_matrix)
}

#' Error and warning handling for IBD_list as estimated by \code{estimate_IBD}
#' @param IBD_list List of IBD probabilities
#' @noRd
test_IBD_list <- function(IBD_list){
  if(!inherits(IBD_list,"list")) stop("IBD_list: list input expected but not found")
  if(!all(c("IBDtype","IBDarray","map","parental_phase","biv_dec","gap", "genocodes",
            "pairing","ploidy","ploidy2","method","error") %in% names(IBD_list[[1]]) )) {
    stop(paste0("The following required list elements were missing from IBD_list:\n\n",
                paste(setdiff(c("IBDtype","IBDarray","map","parental_phase","biv_dec","gap", "genocodes",
                                "pairing","ploidy","ploidy2","method","error"),
                              names(IBD_list[[1]])), collapse = ", "),
                ".\n\nSuggest to re-estimate IBD probabilities before proceeding.\n"))
  }
  
  listdepth <- list.depth(IBD_list) #polyqtlR:::list.depth(IBD_list)
  if(listdepth != 3) stop("Unexpected input - IBD_list is expected to be a nested list representing 1 or more chromosomes! Please check input.")
  
  return(IBD_list)
}

#' Error and warning handling for probgeno_df data-frame of probabilistic genotypes (scores)
#' @param probgeno_df A data frame as read from the scores file produced by function
#' \code{saveMarkerModels} of R package \code{fitPoly}, or alternatively, a data frame containing at least the following columns:
#' \describe{
#' \item{SampleName }{Name of the sample (individual)}
#' \item{MarkerName}{Name of the marker}
#' \item{P0}{Probabilities of dosage score '0'}
#' \item{P1, P2... etc.}{Probabilities of dosage score '1' etc. (up to max offspring dosage, e.g. P4 for tetraploid population)}
#' }
#' @param ploidy  Ploidy of the F1 population (can be 2, 3, 4 or 6)
#' @noRd
test_probgeno_df <- function(probgeno_df,
                             ploidy){
  if(!inherits(probgeno_df, "data.frame")) {
    warning("probgeno_df should be a data-frame. Attempting to coerce...")
    probgeno_df <- as.data.frame(probgeno_df)
  }
  
  pcolnames <- paste0("P",0:ploidy)
  
  if(!all(c("MarkerName", "SampleName",pcolnames) %in% colnames(probgeno_df))) stop(paste("colnames MarkerName, SampleName,",paste0(pcolnames, collapse=", "),"are required!"))
  
  if(!is.factor(probgeno_df$SampleName)) probgeno_df$SampleName <- as.factor(probgeno_df$SampleName)
  if(!is.factor(probgeno_df$MarkerName)) probgeno_df$MarkerName <- as.factor(probgeno_df$MarkerName)
  
  return(probgeno_df)
} #test_probgeno_df

#' Bivalent-pairing transition matrix function, diploid
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @noRd
TM.biv.2 <- function(m1,r_vect) {
  if(m1 == 0){
    out <- bivTM(0.5)
  } else{
    out <- bivTM(r_vect[m1])
  }
  return(out)
}#TM.biv.2

#' Bivalent-pairing transition matrix function, tetraploid
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @noRd
TM.biv.4 <- function(m1,r_vect) {
  if(m1 == 0){
    out <- kronecker(bivTM(0.5),bivTM(0.5)) #Starting transition matrix as one of the positions is 0 (i.e. in initialisation state)
  } else{
    out <- kronecker(bivTM(r_vect[m1]),bivTM(r_vect[m1]))
  }
  return(out)
} #TM.biv.4

#' Bivalent-pairing transition matrix function, hexaploid
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @noRd
TM.biv.6 <- function(m1,r_vect) {
  if(m1 == 0){
    out <- kronecker(bivTM(0.5),kronecker(bivTM(0.5),bivTM(0.5)))
  } else{
    out <- kronecker(bivTM(r_vect[m1]),kronecker(bivTM(r_vect[m1]),bivTM(r_vect[m1])))
  }
  return(out)
} #TM.biv.6

#' Hexavalent-pairing transition matrix function, hexaploid
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @param rem_hex Index of genotype classes to remove, work-around from overly-general transition 
#' matrix with redundant classes
#' @noRd
TM.hex <- function(m1,r_vect,rem_hex = rem.hex) {
  if(m1 == 0){
    out <- kronecker(hexTM(0.5),kronecker(hexTM(0.5),hexTM(0.5)))
  } else{
    out <- kronecker(hexTM(r_vect[m1]),kronecker(hexTM(r_vect[m1]),hexTM(r_vect[m1])))
  }
  out <- out[-rem_hex,-rem_hex]
  return(out)
} #TM.hex

#' Quadrivalent-pairing transition matrix function, tetraploid
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @param rem_quad Index of genotype classes to remove, work-around from overly-general transition 
#' matrix with redundant classes
#' @noRd
TM.quad <- function(m1,r_vect,rem_quad = rem.quad) {
  if(m1 == 0){
    out <- kronecker(quadTM(0.5),quadTM(0.5)) #Starting transition matrix as one of the positions is 0 (i.e. in initialisation state)
  } else{
    out <- kronecker(quadTM(r_vect[m1]),quadTM(r_vect[m1]))
  }
  out <- out[-rem_quad,-rem_quad]
  return(out)
} #TM.quad

#' Diploid bi-parental transition matrix
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @noRd
TMfun.2x_B <- TM.biv.4

#' Triploid bi-parental transition matrix, bivalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.3x_BB <- function(...) kronecker(TM.biv.4(...),TM.biv.2(...))

#' Triploid bi-parental transition matrix, quadrivalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.3x_QB <- function(...) kronecker(TM.quad(...),TM.biv.2(...))

#' Tetraploid bi-parental transition matrix, bivalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.4x_BB <- function(...) kronecker(TM.biv.4(...), TM.biv.4(...))

#' Tetraploid bi-parental transition matrix, bivalent-quadrivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.4x_BQ <- function(...) kronecker(TM.biv.4(...), TM.quad(...))

#' Tetraploid bi-parental transition matrix, quadrivalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.4x_QB <- function(...) kronecker(TM.quad(...), TM.biv.4(...))

#' Tetraploid bi-parental transition matrix, quadrivalent-quadrivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.4x_QQ <- function(...) kronecker(TM.quad(...), TM.quad(...))

#' Hexaploid bi-parental transition matrix, bivalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.6x_BB <- function(...) kronecker(TM.biv.6(...), TM.biv.6(...))

#' Hexaploid bi-parental transition matrix, bivalent-hexavalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.6x_BH <- function(...) kronecker(TM.biv.6(...), TM.hex(...))

#' Hexaploid bi-parental transition matrix, hexavalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.6x_HB <- function(...) kronecker(TM.hex(...), TM.biv.6(...))

#' Hexaploid bi-parental transition matrix, hexavalent-hexavalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.6x_HH <- function(...) kronecker(TM.hex(...), TM.hex(...)) #this becomes computationally intractable


#' Calculate the weighted variance
#' @description Generalisation of the variance to include weights
#' @param x Vector of interest
#' @param w Vector of weights
#' @param na.rm Should missing values be removed? By default, \code{FALSE}
#' @noRd
weighted.var <- function(x, w, na.rm = FALSE) {
  if (na.rm) {
    w <- w[i <- !is.na(x)]
    x <- x[i]
  }
  sum.w <- sum(w)
  sum.w2 <- sum(w^2)
  mean.w <- sum(x * w) / sum(w)
  (sum.w / (sum.w^2 - sum.w2)) * sum(w * (x - mean.w)^2, na.rm =
                                       na.rm)
}

#' Write a header for the log file
#' @description Functionalized writing of function name and arguments as start for log paragraph.
#' @param matc A object of class \code{call}
#' @param log A character string specifying the log file
#' @noRd
write.logheader <- function(matc, log){
  args <- as.list(matc)
  #args.df <- data.frame(names=names(args), values=as.character(args))
  #args.df <- args.df[-1,]
  mod <- "w"
  if(file.exists(log)) mod <- "a"
  log.conn <- file(log, mod)
  if(mod=="w") write(c("<style type=\"text/css\">.table {width: 40%;}</style>",
                       "##polyqtlR log file",
                       "This file is written in [Rmarkdown](https://en.wikipedia.org/wiki/Markdown).",
                       "Use knitr to export it as a nicely formatted html or word file."),
                     log.conn)
  close(log.conn)
  mod <- "a"
  log.conn <- file(log, mod)
  write(c(#"\n-----------------------------------------------------------",
    paste0("\n###Log for function call of `", args[[1]], "`"),
    as.character(Sys.time())
  ),
  #"-----------------------------------------------------------"),
  file = log.conn)
  close(log.conn)
  log.conn <- file(log, "a")
  write("\nwith the following call:\n", log.conn)
  write("```", log.conn)
  sink(log.conn)
  print(matc)
  sink()
  write("```\n", log.conn)
  #write("With the following arguments:", file=log.conn)
  #write.table(args.df, quote=F, sep=" = ", file=log.conn, row.names=F, col.names=F)
  close(log.conn)
}

Try the polyqtlR package in your browser

Any scripts or data that you put into this service are public.

polyqtlR documentation built on May 29, 2024, 7:16 a.m.