R/accessory_functions.R

Defines functions write.logheader weighted.var TMfun.6x_HH TMfun.6x_HB TMfun.6x_BH TMfun.6x_BB TMfun.4x_QQ TMfun.4x_QB TMfun.4x_BQ TMfun.4x_BB TMfun.3x_QB TMfun.3x_BB TM.quad TM.hex TM.biv.6 TM.biv.4 TM.biv.2 test_probgeno_df test_IBD_list test_dosage_matrix state_fun quadTM probgeno_df_to_array Nstates.fun NettletonDoerge mapseq list.depth hexTM findQTLpeaks fast_permute fast_IBD colour.bar bivTM

#' Bivalent transition matrix function
#' @param r recombination frequency
#' @noRd
bivTM <- function(r) 
  matrix(c(1-r,r,r,1-r),ncol=2)


#' Add colour bar scale to heatplot
#' @description Function to generate a scale for heatplots
#' @param col.data vector of colours
#' @param min minimum colour
#' @param max maximum colour
#' @param cex.ticks size of ticks on colour bar
#' @param nticks number of ticks on colour bar
#' @param ticks vector of positions of ticks on colour bar
#' @param title optional title for colour bar
#' @param ylab optional y-axis label for colour bar
#' @param cex.lab size of labels on colour bar
#' @noRd
colour.bar <- function(col.data, min, max=-min, cex.ticks = 1.2, nticks=11,
                       ticks=seq(min, max, len=nticks), title='', ylab = '',
                       cex.lab = 1) {
  scale <- length(col.data)/(max-min)
  
  plot(c(0,10), c(min,max), type='n', bty='n', xaxt='n', xlab='', yaxt='n', ylab=ylab, main=title, cex.lab = cex.lab)
  axis(2, ticks, las=1, cex.axis = cex.ticks)
  for (i in 1:length(col.data)) {
    y = (i-1)/scale + min
    rect(0,y,10,y+1/scale, col=col.data[i], border=NA)
  }
}

#' Extremely fast estimation of identity-by-descent (IBD) probabilities.
#' @description The method of "quick-and-dirty" IBD estimation was originally developed by Bourke (2014) for tetraploid data only, and was subsequently
#' generalised by van Geest et al. (2017). Can be useful for a first quick analysis, particularly in large hexaploid datasets. However, the higher accuracy of IBD
#' probabilities generated by \code{\link{hmm_IBD}} makes that function the preferred choice.
#' @param phased_maplist A list of linkage maps calculated by \code{polymapR::create_phased_maplist}
#' @param dosage_matrix An integer matrix with markers in rows and individuals in columns
#' @param map_function The mapping function to calculate recombination frequency based on map distance (haldane or kosambi)
#' @param ploidy Ploidy level of parents or of the first parent
#' @param ploidy2 Ploidy level of the second parent. By default \code{NULL}, if parents have equal ploidy levels.
#' @param fix_threshold The threshold to fix the IBD probabilities while correcting for the sum of probabilities.
#' @param factor_dist Factor to increase or decrease the recombination frequencies as calculated from the map distances.
#' @param ncores Number of cores to use for multi-core processing.
#' @return
#' A nested list (with the same length as phased_maplist). Each list element contains the following items:
#' \item{IBDtype}{Always "haplotypeIBD" for the output of this function}
#' \item{IBDarray}{An array of IBD probabilities. The dimensions of the array are: markers, homologues and individuals.}
#' \item{map}{Integrated linkage map positions of markers used in IBD calculation}
#' \item{parental_phase}{The parental marker phasing, coded in 1 and 0's}
#' \item{biv_dec}{\code{NULL}}
#' \item{gap}{The gap size used in IBD interpolation, by default \code{NULL}. See \code{\link{spline_IBD}}}
#' \item{genocodes}{\code{NULL}}
#' \item{pairing}{\code{NULL}}
#' \item{ploidy}{ploidy of parent 1}
#' \item{ploidy2}{ploidy of parent 2}
#' \item{method}{The method used, here "heur" (heuristic)}
#' \item{error}{The error prior used, not relevant here thus \code{NULL}}
#' @references
#' Bourke P.M. (2014) QTL analysis in polyploids: Model testing and power calculations. Wageningen University (MSc thesis)
#' @examples
#' data("phased_maplist.4x","SNP_dosages.4x")
#' IBD_list.4x <- fast_IBD(phased_maplist = phased_maplist.4x,
#'                         dosage_matrix = SNP_dosages.4x,
#'                         ploidy = 4)
#' @noRd
fast_IBD <- function(phased_maplist,
                     dosage_matrix,
                     map_function = "haldane",
                     ploidy,
                     ploidy2 = NULL,
                     fix_threshold = 0.1,
                     factor_dist = 1,
                     ncores = 1){
  
  if(is.null(ploidy2)) ploidy2 <- ploidy
  ploidy1 <- ploidy
  
  map_function <- match.arg(map_function, choices = c("kosambi", "haldane"))
  
  # check compatibility of dosages and phased maps
  p1check <- function(n) all(rowSums(phased_maplist[[n]][,3:(2+ploidy1)]) ==
                               dosage_matrix[match(as.character(phased_maplist[[n]][,"marker"]),rownames(dosage_matrix)),"P1"],
                             na.rm = TRUE)
  p2check <- function(n) all(rowSums(phased_maplist[[n]][,(3+ploidy1):(2+ploidy1+ploidy2)]) ==
                               dosage_matrix[match(as.character(phased_maplist[[n]][,"marker"]),rownames(dosage_matrix)),"P2"],
                             na.rm = TRUE)
  
  if(!all(sapply(seq(phased_maplist), p1check)) | !all(sapply(seq(phased_maplist), p1check)))
    stop("Inconsistent input detected.\nIt is likely that unconverted dosages are being combined with a converted phased maplist, or vice versa.")
  
  if(map_function == "kosambi"){
    mapfun <- function(d, f = 1){
      d <- d/100/f
      0.5*((exp(4*d)-1)/(exp(4*d)+1))
    }
  } else if (map_function == "haldane"){
    mapfun <- function(d, f = 1){
      d <- d/100/f
      0.5*(1-exp(-2*d))
    }
  }
  
  # function to correct total sum of genotype probabilties to 0.5*ploidy
  correct_to_sumScore <- function(subg_P, threshold = 0.1, ploidy){
    sumScore <- 0.5*ploidy
    # for all markers in subg_P
    for(m in rownames(subg_P)){
      gp <- subg_P[m,]
      
      ## Make sure that the IBDs don't exceed 0.5*ploidy
      if(sum(gp) > sumScore) {
        gp <- gp*sumScore/(sum(gp))
        # message("Excessive IBD probabilities detected and corrected for")
      }
      
      # if half of homologues 0 -> fully informative
      if(sum(gp == 0) == 0.5*ploidy){
        gp[gp != 0] <- 1
        # if half of homologues 1 -> fully informative
      } else if(sum(gp == 1) == 0.5*ploidy){
        gp[gp < 1] <- 0
        # otherwise correct to 0.5*ploidy
      } else {
        # fix genotypes that approach 0 or 1 according to fix_threshold
        fix <- gp < threshold | gp > (1-threshold)
        
        if(sum(fix) == ploidy){
          fix <- gp == 0 | gp ==1
        }
        
        # find correction and correct to sum = 0.5*ploidy
        corr <- sumScore - sum(gp[fix])
        
        gp[!fix] <- gp[!fix]*corr/sum(gp[!fix])
        # Some probabilites can end up >1. Correct for this:
        while(any(gp > 1)){
          addp <-  gp < 1 & gp > 0#& gp>=0.5
          remp <- gp > 1
          gp[addp] <- gp[addp] + (sum(gp[gp > 1] - 1)/sum(addp))
          gp[remp] <- 1
        }
      }
      subg_P[m,] <- gp
    }
    return(subg_P)
  }
  
  win <- Sys.info()["sysname"] == "Windows"
  if (win) {
    cl <- parallel::makeCluster(ncores)
    doParallel::registerDoParallel(cl)
  } else {
    doParallel::registerDoParallel(cores = ncores)
  }
  
  ## R CMD CHECK work-around:
  i <- NULL
  
  outlist <-
    foreach::foreach(
      i = seq(phased_maplist),
      .inorder = TRUE
    ) %dopar% {
      ph <- phased_maplist[[i]]
      posdf <- ph[,c("marker", "position")]
      
      ## If marker data is conflicting and co-located, can lead to problems. Jitter positions if necessary
      while(length(unique(posdf$position)) < nrow(posdf)){
        warning("Non-unique marker positions detected. Jittering positions.")
        posdf[duplicated(posdf$position),"position"] <- jitter(posdf[duplicated(posdf$position),"position"],amount=0.1)
        #Map might no longer be in order:
        ph <- ph[order(posdf$position),]
        posdf <- posdf[order(posdf$position),]
      }
      
      posvec <- posdf$position
      mark <- as.character(posdf$marker)
      names(posvec) <- mark
      
      # get all marker combinations
      combs <- expand.grid(mark, mark, stringsAsFactors = FALSE)
      
      # make a full distance matrix
      pos1 <- posvec[combs[,1]]
      pos2 <- posvec[combs[,2]]
      
      distdf <- data.frame(marker_a = as.character(combs[,1]),
                           marker_b = as.character(combs[,2]),
                           distance = abs(pos2 - pos1))
      
      suppressMessages(distmat <- reshape2::acast(distdf, marker_a ~ marker_b))
      distmat <- distmat[names(posvec), names(posvec)]
      
      k0_mat <- mapfun(distmat, f = factor_dist)
      
      rownames(ph) <- ph$marker
      ph <- ph[,-which(colnames(ph) %in% c("marker", "position"))]
      ph <- as.matrix(ph)
      
      scd <- dosage_matrix[rownames(ph),]
      
      # get parental scores
      par <- scd[,c("P1", "P2")]
      progeny <- scd[,-which(colnames(scd) %in% c("P1", "P2"))]
      
      # make a garray (=genotype probability array) with only 0.5
      garray <- array(data = 0.5, dim = c(nrow(ph), ploidy1 + ploidy2, ncol(scd)-2),
                      dimnames = list(rownames(scd), paste0("h", 1:(ploidy1 + ploidy2)), colnames(scd)[3:ncol(scd)]))
      
      # get all markers with fully informative dosage
      P1_hp <- scd[,"P1"] == 0.5*ploidy1
      P2_hp <- scd[,"P2"] == 0.5*ploidy2
      
      # get all maximum scores per marker which are informative in offspring
      sumP <- rowSums(par)
      
      # fill in garray
      for(m in seq(nrow(scd))){
        # get progeny that is informative - this is the kernel of the original approach:
        # progeny score which equals (or exceeds, DR?) the sum of the parental scores have inherited
        # all the parental alleles, and so we assign all these homologues with a probability of 1
        progS <- colnames(progeny)[progeny[m,] >= sumP[m] & !is.na(progeny[m,])]
        
        # assign 1 if so.
        garray[m,ph[m,] == 1,progS] <- 1
        
        # special case where dosage is fully informative (dosage = 0.5*ploidy)
        # only if one of the P1 dosages should be 0.5*ploidy
        # and marker is assigned to 0.5*ploidy homologues
        # apply zeros to all others
        if((P1_hp[m] | P2_hp[m]) & sum(ph[m,]) == sumP[m]){
          if(P1_hp[m])
            garray[m,c(ph[m,1:ploidy1] == 0, rep(FALSE, ploidy2)),progS] <- 0
          if(P2_hp[m])
            garray[m,c(rep(FALSE, ploidy1) ,ph[m,(ploidy1+1):(ploidy1 + ploidy2)] == 0),progS] <- 0
        }
        # if allele is absent in progeny, also informative:
        prog0 <- colnames(progeny)[progeny[m,] == 0 & !is.na(progeny[m,])]
        garray[m,ph[m,] == 1,prog0] <- 0
      }
      
      # order garray
      garray <- garray[colnames(distmat),,]
      
      # find nearest informative and change genotype probability accordingly
      for(g in dimnames(garray)[[3]]){
        for(h in dimnames(garray)[[2]]){
          gp <- garray[,h,g]
          inf1 <- gp == 1
          inf0 <- gp == 0
          informative <- inf1 | inf0
          if(any(informative)){
            mdist <- k0_mat[names(gp)[!informative], names(gp)[informative], drop = FALSE]
            near_inf <- apply(mdist, 1, which.min)
            gps <- mdist
            if(any(inf1)){
              gps[,names(inf1)[inf1]] <- 1 - gps[,names(inf1)[inf1]] 
            }
            garray[rownames(mdist),h,g] <- gps[cbind(seq(near_inf), near_inf)]
          }
        }
      }
      
      # call correct_to_sumScore to array:
      for(g in dimnames(garray)[[3]]){
        corr_P1 <- correct_to_sumScore(subg_P = garray[,1:ploidy1,g],
                                       threshold = fix_threshold, ploidy = ploidy1)
        garray[,1:ploidy1,g] <- corr_P1
        
        corr_P2 <- correct_to_sumScore(subg_P = garray[,(ploidy1 +1):(ploidy1 + ploidy2),g],
                                       threshold = fix_threshold, ploidy = ploidy2)
        garray[,(ploidy1 +1):(ploidy1 + ploidy2),g] <- corr_P2
      }
      
      return(list("IBDtype" = "haplotypeIBD",
                  "IBDarray" = garray,
                  "map" = cbind("chromosome" = i,
                                posdf),
                  "parental_phase" = ph,
                  "marginal.likelihoods" = NULL,
                  "valency" = NULL,
                  "offspring" = dimnames(garray)[[3]],
                  "biv_dec" = NULL,
                  "gap" = NULL,
                  "genocodes" = NULL,
                  "pairing" = NULL,
                  "ploidy" = ploidy,
                  "ploidy2" = ploidy2,
                  "method" = "heur",
                  "error" = NULL))
    }
  
  if(win) parallel::stopCluster(cl)
  
  if(!is.null(names(phased_maplist))){
    names(outlist) <- names(phased_maplist)
  } else{
    warning(paste("phased_maplist had no LG names assigned. Assigning names",
                  paste0("'LG",seq(length(outlist)),"'", collapse = ", "),"to output list."))
    names(outlist) <- paste0("LG",seq(length(outlist)))
  }
  
  return(outlist)
} #fast_IBD

#' Extension of the \code{\link{QTLscan}} function, offering an optimised permutation test when the experimental setting (i.e. phenotype structure) is simple 
#' @description Function to run optimised QTL permutation test using IBD probabilities
#' @param IBD_list List of IBD probabilities
#' @param Phenotype.df A data.frame containing phenotypic values
#' @param genotype.ID The colname of \code{Phenotype.df} that contains the offspring identifiers (F1 names)
#' @param trait.ID The colname of \code{Phenotype.df} that contains the response variable to use in the model
#' @param ploidy Integer. The ploidy of parent 1
#' @param ploidy2 The ploidy of parent 2, by default \code{NULL} i.e. assumed (unless specified) to be equal to the ploidy of parent 1.
#' @param N_perm The number of permutations to run, by default this is 1000.
#' @param alpha The P-value to be used in the selection of a threshold, by default 0.05 (i.e. the 0.95 quantile).
#' @param ncores Number of cores to use, by default 1 only. Works both for Windows and UNIX (using \code{doParallel}).
#' Use \code{parallel::detectCores()} to find out how many cores you have available.
#' @param verbose Logical, by default \code{TRUE}. Should messages be printed during running?
#' @param log Character string specifying the log filename to which standard output should be written. If \code{NULL} log is send to stdout.
#' @return
#' A nested list; each list element (per linkage group) contains the following items:
#' \itemize{
#' \item{QTL.res : }{Single matrix of QTL results with columns chromosome, position, LOD}
#' \item{Perm.res : }{List of the results of the permutation test, with (at least) list items
#' "quantile","threshold" and "scores". Quantile refers to which quantile of scores was used to determine the threshold.
#' Note that scores are each of the maximal LOD scores across the entire genome scan per permutation, thus returning a
#' genome-wide threshold rather than a chromosome-specific threshold. If the latter is preferred, restricting the
#' \code{IBD_list} to a single chromosome and re-running the permutation test will provide the desired threshold.}
#' \item{Map : }{Original map of genetic marker positions upon which the IBDs were based, most often used
#' for adding rug of marker positions to QTL plots.}
#' }
#' @examples
#' data("IBD_4x","Phenotypes_4x")
#' qtl_LODs.4x <- fast_permute(IBD_list = IBD_4x,
#'                        Phenotype.df = Phenotypes_4x,
#'                        genotype.ID = "geno",
#'                        trait.ID = "pheno",
#'                        ploidy = 4)
#' @noRd                       
fast_permute <- function(IBD_list,
                         Phenotype.df,
                         genotype.ID,
                         trait.ID,
                         ploidy,
                         ploidy2 = NULL,
                         N_perm = 1000,
                         alpha = 0.05,
                         ncores = 1,
                         verbose = TRUE,
                         log = NULL){
  
  if(is.null(log)) {
    log.conn <- stdout()
  } else {
    matc <- match.call()
    write.logheader(matc, log)
    log.conn <- file(log, "a")
  }
  
  ## Run some initial checks:
  if(!trait.ID %in% colnames(Phenotype.df)) stop("Trait colname incorrectly specified.")
  if(is.null(ploidy2)) ploidy2 <- ploidy
  
  ## Check the list depth:
  listdepth <- list.depth(IBD_list) #polyqtlR:::list.depth(IBD_list)
  if(listdepth != 3) stop("Unexpected input - IBD_list is expected to be a nested list representing 1 or more chromosomes! Please check input.")
  
  IBDarray <- IBD_list[[1]]$IBDarray
  IBDtype <- IBD_list[[1]]$IBDtype
  bivalent_decoding <- IBD_list[[1]]$biv_dec
  gap <- IBD_list[[1]]$gap
  
  if(IBDtype == "genotypeIBD"){
    if(is.null(IBD_list[[1]]$genocodes)){
      ## We are using the output of TetraOrigin
      Nstates <- Nstates.fun(biv_dec = bivalent_decoding, pl = ploidy, pl2 = ploidy2)
      mname <- paste0("GenotypeMat",Nstates)
      indicatorMatrix <- as.matrix(get(mname, envir = getNamespace("polyqtlR")))
    } else{
      ## We are using the output of hmm_IBD
      GenCodes <- t(sapply(sapply(IBD_list[[1]]$genocodes,function(x) strsplit(x,"")), function(y) sapply(y,function(z) which(letters == z))))
      Nstates <- nrow(GenCodes)
      indicatorMatrix <- matrix(0,nrow=Nstates, ncol=ploidy + ploidy2)
      for(r in 1:nrow(indicatorMatrix)){
        for(h in 1:ncol(GenCodes)){
          indicatorMatrix[r,GenCodes[r,h]] <- indicatorMatrix[r,GenCodes[r,h]] + 1
        }
      }
      
    }
    
    # indicatorMatrix <- as.matrix(indicatorMatrix[,-c(1,ploidy+1)])
  } else if(IBDtype == "haplotypeIBD"){
    Nstates <- ploidy + ploidy2 #this is not actually needed..
    indicatorMatrix <- NULL
  } else{
    stop("Unable to determine IBD type, please check IBD_list elements have a valid $IBDtype tag.")
  }
  
  ## Could just check for equality here also, modulo check was from older version...
  if(dim(IBDarray)[2] %% Nstates != 0) stop("Incompatible input detected..")
  
  ## First get the starting set - the ones that were phenotyped and genotyped:
  if(any(is.na(Phenotype.df[,genotype.ID]))){
    warning("Missing genotype.ID values detected. Removing and proceeding without...")
    Phenotype.df <- Phenotype.df[!is.na(Phenotype.df[,genotype.ID]),]
  }
  
  phenoGeno0 <- intersect(
    dimnames(IBDarray)[[3]],
    unique(Phenotype.df[,genotype.ID]))
  
  if(verbose) write(paste("There are",length(phenoGeno0),"individuals with matching identifiers between phenotypic and genotypic datasets.\n"),log.conn)
  
  pheno <- Phenotype.df[Phenotype.df[,genotype.ID] %in% phenoGeno0,c(genotype.ID,trait.ID)]
  
  ## Make sure there is only a single observation per genotype:
  if(length(which(duplicated(pheno[,genotype.ID]))) > 0){
    warning(paste("Duplicate phenotypes identified for",length(which(duplicated(pheno[,genotype.ID]))), "samples. Their mean values were used."))
    pheno <-
      do.call("rbind",
              sapply(unique(pheno[,genotype.ID]), function(gen){
                temp.mean <- mean(pheno[pheno[,genotype.ID] == gen,trait.ID], na.rm = TRUE)
                temp.out <- pheno[pheno[,genotype.ID] == gen,][1,,drop=FALSE]
                temp.out[,trait.ID] <- round(temp.mean,1)
                return(temp.out)}, simplify = FALSE))
  }
  
  if(length(which(is.na(pheno[,trait.ID]))) > 0){
    write(paste("A further",length(which(is.na(pheno[,trait.ID]))),
                "matched individuals had missing phenotypic values and were removed.\n"),
          log.conn)
    pheno <- pheno[!is.na(pheno[,trait.ID]),]
  }
  
  pheno <- pheno[order(pheno[,1]),]
  phenoGeno0 <- pheno[,1]
  
  colnames(pheno)[2] <- "Pheno"
  phenoGeno <- as.character(unique(pheno[,1]))
  popSize <- length(phenoGeno)
  
  if(popSize < ploidy + ploidy2) stop("Insufficient population size to fit QTL model with IBD probabilities. Suggest to use single marker approach")
  if(popSize <= 2*(ploidy + ploidy2)) warning("Population size may be too small for accurate model fitting. Results should be interpreted with caution.")
  
  RSS0 <- sum((pheno$Pheno - mean(pheno$Pheno))^2)
  
  if(verbose) write(paste("\nRunning QTL analysis with",popSize,"individuals...\n"),log.conn)
  
  if(length(IBD_list) > 1){
    IBDprobs <- abind::abind(lapply(seq(length(IBD_list)), function(i) IBD_list[[i]]$IBDarray[,,phenoGeno]),
                             along = 1)
  } else{
    IBDprobs <- IBD_list[[1]]$IBDarray[,,phenoGeno]
  }
  
  ## Turn the genotype IBDs into haplotype IBDs:
  if(IBDtype == "genotypeIBD"){
    haploProbs <- array(apply(IBDprobs,3,function(x) x%*%indicatorMatrix),
                        dim = c(dim(IBDprobs)[1],dim(indicatorMatrix)[2],dim(IBDprobs)[3]),
                        dimnames = list(dimnames(IBDprobs)[[1]],
                                        paste0("X",1:(ploidy+ploidy2)),
                                        dimnames(IBDprobs)[[3]])
    )
  } else{
    haploProbs <- IBDprobs #a bit inefficient, but patch for now...
    dimnames(haploProbs)[[2]] <- paste0("X",1:(ploidy+ploidy2)) #replaces h1, h2, h3...
  }
  
  ## Compile the original map positions
  map <- do.call("rbind",
                 lapply(seq(length(IBD_list)), function(c)
                   IBD_list[[c]]$map#[,c("chromosome","position")]
                 ))
  ## Compile the map positions of the IBDd positions also:
  if(!is.null(gap)) {map.1 <- do.call(rbind,
                                      lapply(seq(length(IBD_list)), function(c)
                                        data.frame("chromosome" = c,
                                                   "position" = as.numeric(setdiff(unlist(strsplit(dimnames(IBD_list[[c]]$IBDarray)[[1]],"M")),"c"))
                                        )#Just record chm and position.
                                      ))
  } else{
    map.1 <- map
  }
  
  if(verbose) message("Running permutation test....")
  time_start <- Sys.time()
  
  win <- Sys.info()["sysname"] == "Windows"
  if (win) {
    cl <- parallel::makeCluster(ncores)
    doParallel::registerDoParallel(cl)
  } else {
    doParallel::registerDoParallel(cores = ncores)
  }
  
  pheno.mat <- matrix(pheno$Pheno,ncol=1)
  
  row.split <- split(1:dim(haploProbs)[1], ceiling(round((1:dim(haploProbs)[1])/(dim(haploProbs)[1]/ncores),4)))
  
  perms <- sapply(1:N_perm, function(n) sample(pheno$Pheno)) 
  
  pheno.mat <- cbind(pheno.mat,perms)
  
  modelterms <- paste0("X", setdiff(1:(ploidy + ploidy2),c(1,ploidy + 1))) #boundary condition applied
  
  run_lm.vect <- function(X = haploProbs[1,,],
                          phenos = pheno.mat,
                          tm = modelterms){
    # X is a 2d matrix (slice) of haplotype IBDs
    # phenos is a matrix of phenotypes, with at least 1 column
    lmRes <- lm(phenos ~ t(X)[,tm],na.action = na.exclude)
    RSS1 <- apply(as.matrix(lmRes$residuals),2,function(y) sum(y^2))
    
    LOD <- ifelse(RSS1 == 0,0,dim(X)[2]*log10(RSS0/RSS1)/2)
    
    return(LOD)
  }
  
  ## R CMD CHECK work-around:
  i <- NULL
  
  LOD.res <-
    foreach::foreach(
      i = 1:length(row.split),
      # .combine = c,
      .inorder = FALSE
    ) %dopar% {
      list(set = row.split[[i]],
           res = apply(haploProbs[row.split[[i]],,,drop = FALSE],1,
                       function(x) run_lm.vect(X=x, phenos = pheno.mat))
      )
    }
  
  if(win) parallel::stopCluster(cl)
  
  extracted.order <- do.call(c,lapply(LOD.res,function(x) x$set))
  LODResults <- do.call(c,lapply(LOD.res,function(x) x$res[1,]))
  LODResults <- LODResults[order(extracted.order)]
  
  permLODs <- do.call(cbind, lapply(1:ncores, function(l) LOD.res[[l]]$res[-1,]))
  threshold <- quantile(apply(permLODs,1,max), probs = 1 - alpha)
  
  output <- list("QTL.res" =  suppressWarnings(cbind(map.1,"LOD" = LODResults)),
                 "Perm.res" = list(
                   quantile = 1 - alpha,
                   threshold = threshold,
                   threshold.bounds = c(threshold,threshold),#for now, no Nettleton Doerge approximation
                   scores = apply(permLODs,1,max),
                   Nperm = N_perm
                 ),
                 "Map" = map,
                 "Residuals" = pheno$Pheno,
                 "LG_names" = names(IBD_list))
  
  time_end <- Sys.time()
  timediff <- as.numeric(time_end - time_start, units = "mins")
  
  write(
    paste(N_perm,"permutations on a",Sys.info()["sysname"],"machine using",ncores,
          ifelse(ncores>1,"cores","core"),"took",round(timediff, 2),"minutes."),
    log.conn
  )
  
  if(!is.null(log)) close(log.conn)
  
  return(output)
} #fast_permute


#' Extract QTL peak positions from the results of a QTL scan
#' @description Function to find QTL peaks from output of a QTL scan
#' @param LOD_data Output of \code{\link{QTLscan}} function
#' @noRd
findQTLpeaks <- function(LOD_data){
  if(is.null(LOD_data$Perm.res)) stop("findQTLpeaks :: Please re-run QTLscan with perm_test = TRUE to check significance.")
  
  thresh <- LOD_data$Perm.res$threshold
  chms <- unique(LOD_data$QTL.res$chromosome)
  
  QTL.df <- data.frame("LG"=NULL,
                       "cM"=NULL,
                       "deltaLOD"=NULL)
  for(lg in chms){
    lgdat <- LOD_data$QTL.res[LOD_data$QTL.res$chromosome == lg,]
    
    if(any(lgdat$LOD >= thresh)){
      QTL.df <- rbind(QTL.df,data.frame("LG" = lg,
                                        "cM" = lgdat[lgdat$LOD == max(lgdat$LOD),"position"],
                                        "deltaLOD" = lgdat[lgdat$LOD == max(lgdat$LOD),"LOD"] - thresh))
    }
  }
  
  return(QTL.df)
} #findQTLpeaks


#' Hexavalent transition matrix function
#' @param r recombination frequency
#' @noRd
hexTM <- function(r) 
  matrix(c(1-r,r/5,r/5,r/5,r/5,r/5,
           r/5,1-r,r/5,r/5,r/5,r/5,
           r/5,r/5,1-r,r/5,r/5,r/5,
           r/5,r/5,r/5,1-r,r/5,r/5,
           r/5,r/5,r/5,r/5,1-r,r/5,
           r/5,r/5,r/5,r/5,r/5,1-r),ncol=6)


#' Generate IBD probabilities from marker genotypes and a phased linkage map using HMM
#' @description \code{hmm_IBD} is a function for creating identity-by-descent (IBD) probabilities using hidden Markov models,
#' from marker genotypes (either discrete marker dosages (ie scores 0, 1, ..., ploidy representing the number of copies of the marker allele),
#' or the probabilities of these dosages) and a phased linkage map. Unlike the original TetraOrigin software, it does not re-estimate parental linkage phase,
#' and has been generalised for use in diploid, triploid, tetraploid and hexaploid populations.
#' @param input_type Can be either one of 'discrete' or 'probabilistic'. For the former (default), \code{dosage_matrix} must be supplied,
#' while for the latter \code{probgeno_df} must be supplied
#' @param genotypes Marker genotypes, either a 2d matrix of integer marker scores or a data.frame of dosage probabilities. 
#' Details are as follows:
#' \itemize{
#' \item{discrete : }{
#'  If \code{input_type} is 'discrete', \code{genotypes} is a matrix of marker dosage scores with markers in rows and individuals in columns.
#'  Both (marker) rownames and (individual or sample) colnames are needed.
#' }
#' \item{probabilistic : }{
#' If \code{input_type} is 'probabilistic', \code{genotypes} is a data frame as read from the scores file produced by function \code{saveMarkerModels} of R package 
#' \code{fitPoly}, or alternatively, a data frame containing at least the following columns:
#' \itemize{
#' \item{SampleName : }{
#' Name of the sample (individual)
#' }
#' \item{MarkerName : }{
#' Name of the marker
#' }
#' \item{P0 : }{
#' Probabilities of dosage score '0'
#' }
#' \item{P1, P2... etc. : }{
#' Probabilities of dosage score '1' etc. (up to max offspring dosage, e.g. P4 for tetraploid population)
#' }
#' }
#' }
#' }
#' @param phased_maplist A list of phased linkage maps, the output of \code{polymapR::create_phased_maplist}
#' @param remove_markers Optional vector of marker names to remove from the maps. Default is \code{NULL}.
#' @param ploidy Integer. Ploidy of the organism.
#' @param ploidy2 Optional integer, by default \code{NULL}. Ploidy of parent 2, if different from parent 1.
#' @param parent1 Identifier of parent 1, by default assumed to be \code{"P1"}
#' @param parent2 Identifier of parent 2, by default assumed to be \code{"P2"}
#' @param individuals By default "all" offspring are included, but otherwise a subset can be selected, using a vector of offspring indexing numbers (1,2, etc.)
#' according to their order in \code{dosage_matrix}
#' @param log Character string specifying the log filename to which standard output should be written. If \code{NULL} log is send to stdout.
#' @param map_function Mapping function to use when converting map distances to recombination frequencies.
#' Currently only \code{"haldane"} or \code{"kosambi"} are allowed.
#' @param bivalent_decoding Option to consider only bivalent pairing during formation of gametes (ignored for diploid populations, as only bivalents possible there), by default \code{TRUE}
#' @param error The (prior) probability of errors in the offspring dosages, usually assumed to be small but non-zero
#' @param full_multivalent_hexa Option to allow multivalent pairing in both parents at the hexaploid level, by default \code{FALSE}. Note that if \code{TRUE},
#' a very large available RAM may be required (>= 32Gb) to process the data.  
#' @param verbose Logical, by default \code{TRUE}. Should progress messages be written?
#' @param ncores How many CPU cores should be used in the evaluation? By default 1 core is used.
#' @return A list of IBD probabilities, organised by linkage group (as given in the input \code{phased_maplist}). Each
#' list item is itself a list containing the following:
#' \itemize{
#' \item{IBDtype: The type of IBD; for this function only "genotypeIBD" are calculated.}
#' \item{IBDarray: A 3d array of IBD probabilities, with dimensions marker, genotype-class and F1 individual.}
#' \item{map: A 3-column data-frame specifying chromosome, marker and position (in cM)}
#' \item{parental_phase: Phasing of the markers in the parents, as given in the input \code{phased_maplist}}
#' \item{biv_dec: Logical, whether bivalent decoding was used in the estimation of the F1 IBD probabilities.}
#' \item{gap: The size of the gap (in cM) used when interpolating the IBD probabilities, if performed.}
#' \item{genocodes: Ordered list of genotype codes used to represent different genotype classes.}
#' \item{pairing: log likelihoods of each of the different pairing scenarios considered (can be used e.g. for post-mapping check of preferential pairing)}
#' \item{ploidy: ploidy of parent 1}
#' \item{ploidy2: ploidy of parent 2}
#' \item{method : The method used, here "hmm" (Hidden Markov Model)}
#' \item{error : The error prior used}
#' }
#' @references
#' \itemize{
#' \item{Durbin R, Eddy S, Krogh A, Mitchison G (1998) Biological sequence analysis: Probabilistic models of proteins and nucleic acids. Cambridge: Cambridge University Press.}
#' \item{Hackett et al. (2013) Linkage analysis and QTL mapping using SNP dosage data in a tetraploid potato mapping population. PLoS One 8(5): e63939}
#' \item{Zheng et al. (2016) Probabilistic multilocus haplotype reconstruction in outcrossing tetraploids. Genetics 203: 119-131}
#' }
#' @examples
#' data("phased_maplist.4x", "SNP_dosages.4x")
#' hmm_IBD(phased_maplist=phased_maplist.4x,genotypes=SNP_dosages.4x,ploidy=4)
#' @noRd
hmm_IBD <- compiler::cmpfun(function(input_type = "discrete",
                                     genotypes,
                                     phased_maplist,
                                     remove_markers = NULL,
                                     ploidy,
                                     ploidy2 = NULL,
                                     parent1 = "P1",
                                     parent2 = "P2",
                                     individuals = "all",
                                     log = NULL,
                                     map_function = "haldane",
                                     bivalent_decoding = TRUE,
                                     error = 0.01,
                                     full_multivalent_hexa = FALSE,
                                     verbose = FALSE,
                                     ncores = 1){
  input_type <- match.arg(input_type, choices = c("discrete","probabilistic"))
  
  if(is.null(ploidy2)) ploidy2 <- ploidy
  if(ploidy %% 2 != 0 | ploidy2 %% 2 != 0) stop("Parental ploidy cannot be odd-numbered! Find new parents!")
  ploidyF1 <- (ploidy + ploidy2)/2
  
  if(ploidy2 > ploidy) stop("Currently, ploidy2 cannot exceed ploidy. Please re-order parents (may become fully generalised later).")
  if(!(error > 0 & error < 1)) stop("Error prior probability must be a number > 0 and less than 1")
  
  map_function <- match.arg(map_function, choices = c("haldane","kosambi"))
  
  if(map_function == "kosambi"){
    mapfun <- function(d){
      d <- d/100
      0.5*((exp(4*d)-1)/(exp(4*d)+1))
    }
  } else if (map_function == "haldane"){
    mapfun <- function(d){
      d <- d/100
      0.5*(1-exp(-2*d))
    }
  }
  
  if (is.null(log)) {
    log.conn <- stdout()
  } else {
    matc <- match.call()
    write.logheader(matc, log)
    log.conn <- file(log, "a")
  }
  
  if(input_type == "discrete"){
    # dosage_matrix <- test_dosage_matrix(dosage_matrix)
    genotypes <- test_dosage_matrix(genotypes)
  } else{
    # probgeno_df <- test_probgeno_df(probgeno_df,ploidyF1)
    # dosage_prob_array <- probgeno_df_to_array(probgeno_df,ploidyF1)
    probgeno_df <- test_probgeno_df(genotypes,ploidyF1)
    genotypes <- probgeno_df_to_array(probgeno_df,ploidyF1)
  }
  
  phased_markers <- as.character(unlist(sapply(phased_maplist, function(x) x$marker)))
  
  if(length(phased_markers) != length(unique(phased_markers))) stop("Some markers found at multiple positions - please check phased_maplist!")
  
  if(verbose) write(paste0("There were ", length(phased_markers)," phased markers supplied, on ",
                           length(phased_maplist)," linkage group",
                           ifelse(length(phased_maplist) > 1,"s.",".")),
                    file = log.conn)
  
  if(!is.null(remove_markers)){
    if(length(intersect(phased_markers, remove_markers)) == 0)
      stop("Markers specified for removal are not present in phased_maplist!")
    maplist.names <- names(phased_maplist)
    phased_maplist <- lapply(seq(length(phased_maplist)), function(ch) phased_maplist[[ch]][!phased_maplist[[ch]]$marker %in% remove_markers,])
    names(phased_maplist) <- maplist.names
    
    phased_markers <- as.character(unlist(sapply(phased_maplist, function(x) x$marker)))
    if(verbose) write(paste("There are now", length(phased_markers),"phased markers remaining following marker removal."),
                      file = log.conn)
  }
  
  # Generate vector 'offspring' of numbers 1:(number of F1 offspring)
  if(input_type == "discrete"){
    if(individuals[1] == "all"){
      # offspring <- 1:(ncol(dosage_matrix[,-which(colnames(dosage_matrix) %in% c(parent1,parent2)),drop=FALSE]))
      offspring <- 1:(ncol(genotypes[,-which(colnames(genotypes) %in% c(parent1,parent2)),drop=FALSE]))
      
    } else{
      # foo <- 1:(ncol(dosage_matrix[,-which(colnames(dosage_matrix) %in% c(parent1,parent2))]))
      foo <- 1:(ncol(genotypes[,-which(colnames(genotypes) %in% c(parent1,parent2))])) 
      if(!all(individuals %in% foo)) stop("Vector of offspring indices incorrectly supplied using individuals argument!")
      offspring <- individuals
    }
  } else{
    if(individuals[1] == "all"){
      # offspring <- seq_along(setdiff(dimnames(dosage_prob_array)[[3]],c(parent1, parent1)))
      offspring <- seq_along(setdiff(dimnames(genotypes)[[3]],c(parent1, parent1)))
    } else{
      # foo <- 1:dim(dosage_prob_array)[3]
      foo <- 1:dim(genotypes)[3]
      if(!all(individuals %in% foo)) stop("Vector of offspring indices incorrectly supplied using individuals argument!")
      offspring <- individuals
    }
  }
  
  
  ## Check for consistency between dosages and phased marker information (discrete genotypes only at present..)
  if(input_type == "discrete"){
    phased.dose <- lapply(seq(length(phased_maplist)),
                          function(ch) matrix(c(rowSums(phased_maplist[[ch]][,paste0("h",1:ploidy)]),
                                                rowSums(phased_maplist[[ch]][,paste0("h",(ploidy + 1):(ploidy + ploidy2))])),
                                              ncol=2,
                                              dimnames = list(phased_maplist[[ch]]$marker,
                                                              c(parent1,parent2))))
    # for(i in seq(length(phased_maplist))) {
    #   if(!all(apply(phased.dose[[i]] == dosage_matrix[rownames(phased.dose[[i]]),c(parent1,parent2)],1,all))){
    #     message(paste("Inconsistent dosages on LG",i,"encountered, concerning the following markers:"))
    #     print(rownames(phased.dose[[i]])[which(!apply(phased.dose[[i]] == dosage_matrix[rownames(phased.dose[[i]]),1:2],1,all))])
    #     stop("Cannot proceed. Suggest to check input - was unconverted or converted dosage coding used when generating phased mapfile?
    #        Alternatively, use remove_markers argument to screen out these problematic markers.")
    #   }
    # }
    for(i in seq(length(phased_maplist))) {
      
      if(!all(rownames(phased.dose[[i]]) %in% rownames(genotypes))) stop("Issue matching marker names from phased map with supplied genotype matrix. Please check!")
      
      if(!all(apply(phased.dose[[i]] == genotypes[rownames(phased.dose[[i]]),c(parent1,parent2)],1,all))){
        message(paste("Inconsistent dosages on LG",i,"encountered, concerning the following markers:"))
        print(rownames(phased.dose[[i]])[which(!apply(phased.dose[[i]] == genotypes[rownames(phased.dose[[i]]),1:2],1,all))])
        stop("Cannot proceed. Suggest to check input - was unconverted or converted dosage coding used when generating phased mapfile?
           Alternatively, use remove_markers argument to screen out these problematic markers.")
      }
    }
    
  }
  
  ## Generate the list of valencies and genotype states
  statefun.ls <- state_fun(ploidy = ploidy, ploidy2 = ploidy2, 
                           bivalent_decoding = bivalent_decoding,
                           full_multivalent_hexa = full_multivalent_hexa)
  state.ls <- statefun.ls$state.ls
  all.states <- statefun.ls$all.states
  
  ##-----------------------------------------------------------------------
  ## Forward and Backward algorithms (from Durbin et al. 1998, pp. 58 - 59)
  ##-----------------------------------------------------------------------

  forward_backward <- function(F1num=1,
                               ploidy_fb,
                               state_matrix,
                               TMarray,
                               e_o,
                               par_phase,
                               progeny_dose,#progeny_dose has different forms depending on input_type
                               n_mark,
                               input.typ){
    
    Nstates <- nrow(state_matrix)
    
    if(input.typ == "discrete"){
      emmat.tf <- apply(state_matrix,1,function(x) rowSums(par_phase[,x]) == progeny_dose[,F1num,drop=FALSE])
      emission.mat <- emmat.tf
      emission.mat[emmat.tf] <- 1 - e_o
      emission.mat[!emmat.tf] <- e_o/ploidy_fb
      emission.mat[is.na(emission.mat)] <- 1/Nstates  #I am using the dimension of the smaller state matrix as the denominator
    } else{
      emission.mat_dosePr <- apply(state_matrix,1,function(x) 
        progeny_dose[cbind(1:dim(par_phase)[1],rowSums(par_phase[,x]) + 1,F1num)])
      ## This is without the possibility of errors - how to factor error into these numbers?
      ## A possible work-around is to take Pr >= 0.5 as compatible and Pr < 0.5 as incompatible:
      emission.mat <- emission.mat_dosePr
      emission.mat[emission.mat_dosePr >= 0.5] <- emission.mat_dosePr[emission.mat_dosePr >= 0.5] - e_o
      emission.mat[emission.mat_dosePr < 0.5] <- emission.mat_dosePr[emission.mat_dosePr < 0.5] + e_o/ploidy_fb
      emission.mat[is.na(emission.mat)] <- 1/Nstates 
    }
    
    
    ##---------------------
    ## Forward algorithm
    ##---------------------
    FORres <- fx(EM = emission.mat,TM = TMarray)
    forward.mat <- FORres$FM
    
    ## Termination step:
    logprob_x <- sum(log(FORres$s))
    
    ##---------------------
    ## Backward algorithm
    ##---------------------
    backward.mat <- bx(EM = emission.mat,TM = TMarray, s = FORres$s)
    
    ## Compute the posterior probability of each of the states at position i, where P(x) = prob_x was computed earlier
    IBDmatrix <- t(forward.mat[,2:ncol(forward.mat)]*backward.mat)
    
    ## In some cases if denom is a very small number, can lead to overflow errors (Inf) in the IBDmatrix
    if(length(is.infinite(IBDmatrix)) > 0){
      IBDmatrix[is.infinite(IBDmatrix)] <- 0 #replace these values with 0
    }
    colnames(IBDmatrix) <- rownames(state_matrix)
    
    # Normalise:
    norm.vect <- rowSums(IBDmatrix)
    
    ## Protect against division by zero:
    if(length(which(norm.vect == 0)) > 0) norm.vect[norm.vect==0] <- 1
    
    ## If denom was very small, can still have Infinite rowSums
    if(length(which(is.infinite(norm.vect))) > 0) warning("Error calculating IBD probabilities. Suggest to reduce number of markers per linkage group and re-try.")
    
    IBDmatrix <- IBDmatrix/norm.vect
    
    return(list("P.X" = IBDmatrix,
                "P.V" = logprob_x))
  } #forward_backward
  
  ## IBD function, per linkage group:
  getIBD_LG <- function(lg) {
    
    ## Check that marker positions are not overlapping:
    map <- phased_maplist[[lg]][,c("marker","position")]
    # par.phase <- phased_maplist[[lg]][,3:ncol(phased_maplist[[lg]])]
    par.phase <- phased_maplist[[lg]][,paste0("h",1:(ploidy+ploidy2))]
    
    if(length(map$position) != length(unique(map$position))){
      warning(paste("Jittering overlapping SNP positions on linkage group",lg))
      while(length(map$position) != length(unique(map$position)))
        map$position[duplicated(map$position)] <- jitter(map$position[duplicated(map$position)],amount = 0.1)
      #re-order after jittering if necessary:
      par.phase <- par.phase[order(map$position),]
      map <- map[order(map$position),]
    }
    
    posvec <- map$position
    mark <- as.character(map$marker)
    nmark <- length(mark)
    names(posvec) <- rownames(par.phase) <- mark
    par.phase <- as.matrix(par.phase)
    
    combs <- cbind(mark[-length(mark)],mark[-1])
    
    pos1 <- posvec[combs[,1]]
    pos2 <- posvec[combs[,2]]
    
    r.vect <- mapfun(abs(pos2 - pos1))
    
    if(input_type == "discrete"){ #for some reason this is evaluating (sometimes) as TRUE within foreach loop
      # progeny <- dosage_matrix[mark,-which(colnames(dosage_matrix) %in% c(parent1,parent2)),drop=FALSE]
      progeny <- genotypes[mark,-which(colnames(genotypes) %in% c(parent1,parent2)),drop=FALSE]
    } else{
      # progeny <- dosage_prob_array[mark,,setdiff(dimnames(dosage_prob_array)[[3]],c(parent1,parent2)),drop = FALSE] #this is still a 3d array
      progeny <- genotypes[mark,,setdiff(dimnames(genotypes)[[3]],c(parent1,parent2)),drop = FALSE] #this is still a 3d array
    }
    
    ## Generate transition matrix list here first:
    TM.ls <- lapply(names(state.ls), function(VAL){
      TMfun <- get(paste0("TMfun.",ploidyF1,"x_",VAL))
      matdim <- dim(TMfun(1,r.vect)) #simplest is to just record the TM dim
      array(sapply(1:nmark,function(n) TMfun(n-1,r.vect)),dim = c(matdim,nmark))
    })
    
    #######################################################################
    ## Function to call the forward_backward function and generate the IBDs
    generate_IBDs <- function(F1index,
                              state_ls,
                              allstates = all.states,
                              ploidy_F1 = ploidyF1,
                              TM_ls = TM.ls,
                              ...){ ## dots are arguments passed to forward_backward()
      
      valency_IBD.ls <- lapply(seq(length(state_ls)), function(v) {
        lapply(state_ls[[v]], function(StateMat)
          forward_backward(F1num = F1index,
                           state_matrix = StateMat,
                           TMarray = TM_ls[[v]],
                           ...)
        )
      }
      )
      
      ## Work-around, simply use the pairing type which contains the most likely pairing structure:
      Pval_type <- sapply(valency_IBD.ls, function(x) max(sapply(x, function(y) y$P.V)))
      
      ML_val <- which.max(Pval_type) #Select the most likely and proceed
      
      if(verbose) write(paste("Most likely pairing type:",names(state_ls)[ML_val]),log.conn)
      
      valency_IBDs <- valency_IBD.ls[[ML_val]] #Go one level deeper into nested list, ignoring other types..
      
      ## Next, need to incorporate the probability of each valency:
      P_D <- sum(sapply(valency_IBDs, function(x) exp(x$P.V - Pval_type[ML_val])/length(valency_IBDs)))
      P_V <- sapply(valency_IBDs, function(x) exp(x$P.V - Pval_type[ML_val])/((length(valency_IBDs)*P_D))) #This could also be returned...
      
      ## Need to incorporate these probabilities together. These are not in the correct dimension yet..
      IBD_incompat <- lapply(valency_IBDs, function(x) x$P.X)
      
      valencyIBDarray <- array(0,dim = c(nmark,length(allstates),length(valency_IBDs)))
      dimnames(valencyIBDarray) <- list(mark,allstates,NULL)
      
      # Work-around: use a for loop here, could be improved later..
      for(p in 1:length(valency_IBDs)){
        valencyIBDarray[,colnames(IBD_incompat[[p]]),p] <- IBD_incompat[[p]]
      }
      
      valencyIBDarray <- sweep(valencyIBDarray,3,P_V,FUN="*") #Multiply IBD probs by probability of each valency
      
      IBDs_output <- apply(valencyIBDarray,2,rowSums)
      
      return(list("IBD" = IBDs_output,
                  "allstates" = allstates,
                  "P_V" = P_V,
                  # "marginal.likelihood" = setNames(Pval_type[ML_val],names(state_ls)[ML_val]))
                  "marginal.likelihoods" = matrix(Pval_type, ncol = 1, dimnames = list(names(state_ls),"logL")))
             ) 
    } #generate_IBDs
    ################
    
    IBD.list <- lapply(X = offspring, FUN = generate_IBDs,
                       state_ls = state.ls,
                       e_o = error,
                       ploidy_fb = ploidyF1,
                       par_phase = par.phase,
                       progeny_dose = progeny,
                       n_mark = nmark,
                       input.typ = input_type)
    
    ## Convert list to a 3d array
    IBDs.ls <- lapply(IBD.list,function(x) x$IBD)
    pairing.probs <- lapply(IBD.list,function(x) x$P_V)
    IBDarray <- abind::abind(IBDs.ls,along = 3)
    marginal.L <- lapply(IBD.list,"[[","marginal.likelihoods")
    
    if(input_type == "discrete"){
      offspring.names <- colnames(progeny)[offspring]
    } else{
      offspring.names <- dimnames(progeny)[[3]][offspring]
    }
    
    names(pairing.probs) <- dimnames(IBDarray)[[3]] <- names(marginal.L) <- offspring.names
      
    return(list("IBDtype" = "genotypeIBD",
                "IBDarray" = IBDarray,
                "map" = cbind("chromosome" = lg,
                              map),
                "parental_phase" = par.phase,
                "marginal.likelihoods" = marginal.L,
                "valency" = sapply(marginal.L,function(x) rownames(x)[which.max(x)]),
                "offspring" = offspring.names,
                "biv_dec" = bivalent_decoding,
                "gap" = NULL, #Add gap later if splining...
                "genocodes" = dimnames(IBDarray)[[2]],
                "pairing" = pairing.probs,
                "ploidy" = ploidy,
                "ploidy2" = ploidy2,
                "method" = "hmm",
                "error" = error))
  } #getIBD_LG
  
  ## Run over multiple linkage groups, in parallel if required:
  if(ncores > 1){
    
    win <- Sys.info()["sysname"] == "Windows"
    if (win) {
      cl <- parallel::makePSOCKcluster(ncores)
      doParallel::registerDoParallel(cl)
    } else {
      doParallel::registerDoParallel(cores = ncores)
    }
    
    ## R CMD CHECK work-around:
    lgp <- NULL
    
    outlist <-
      foreach::foreach(
        lgp = seq(length(phased_maplist)),
        .inorder = TRUE,
        .packages = c("polyqtlR")
      ) %dopar% {
        getIBD_LG(lgp)
      }
    if(win) parallel::stopCluster(cl)
    
  } else{
    outlist <- lapply(seq(length(phased_maplist)), getIBD_LG)
  }
  
  if(!is.null(names(phased_maplist))){
    names(outlist) <- names(phased_maplist)
  } else{
    warning(paste("phased_maplist had no LG names assigned. Assigning names",
                  paste0("'LG",seq(length(outlist)),"'", collapse = ", "),"to output list."))
    names(outlist) <- paste0("LG",seq(length(outlist)))
  }
  
  return(outlist)
  
}) #hmm_IBD


#' Find depth of a list
#' @description Recursive function checking list depth to determine subsequent subsetting rules
#' @param obj An input object, may be a (nested) list
#' @param objdepth Counter to record how deep the recursion has gone
#' @noRd
list.depth <- function(obj, objdepth = 0) {
  if(!is.list(obj)) {
    return(objdepth)
  } else {
    return(max(unlist(lapply(obj, list.depth, objdepth = objdepth + 1))))
  }
}

#' Generate a sequence of map positions for splining function
#' @description Function to return a sequence of map positions at steps of size gap from given input
#' @param cMvect Vector of map positions
#' @param gap Gap size in cM
#' @noRd
mapseq <- function(cMvect, gap){
  ## Error checking input:
  if(length(cMvect) < 2) stop("No need to fit splines on a map with fewer than 2 positions!")
  if(max(cMvect) - min(cMvect) < gap) stop("Impossible to fit grid using gap specified. Suggest to decrease gap.")
  if(!all(sort(order(cMvect)) == order(cMvect))) stop("Marker positions not supplied in correct order according to map order.")
  
  if(cMvect[1] == ceiling(cMvect[1])){
    out <- seq(cMvect[1],max(cMvect),gap)
  } else{
    out <- c(cMvect[1], seq(ceiling(cMvect[1]),max(cMvect),gap))
  }
  
  if(max(cMvect) > max(out)) out <- c(out,max(cMvect))
  
  return(out)
}

#' Nettleton and Doerge 2000
#' @param N number of permutations
#' @param alpha The threshold level, ie. (1 - alpha) quantile of sorted LOD scores defines threshold
#' @param gamma The confidence interval specifier, usually 0.05
#' @noRd
NettletonDoerge <- function(N = 100,alpha = 0.05,gamma = 0.05){
  lower <- ceiling(N*(1-alpha) - (1/pnorm(1-(gamma/2)))*sqrt(N*(1-alpha)*alpha))
  upper <- ceiling(N*(1-alpha) + (1/pnorm(1-(gamma/2)))*sqrt(N*(1-alpha)*alpha))
  return(c(lower,upper))
}


#' Error handling on ploidy and ploidy2, and determine the number of genotype classes for general ploidy level and pairing behaviour
#' @param biv_dec Either TRUE for bivalents only or FALSE to also allow quadrivalents and double reduction
#' @param pl ploidy level of parent 1, assumed to be even and greater than 4
#' @param pl2 ploidy level of parent 2, assumed to be even and greater than 2
#' @noRd
Nstates.fun <- function(biv_dec, pl, pl2){
  
  if(pl < pl2) stop("pl should always be >= pl2. Please reverse and try again.")
  if(any(c(pl,pl2)%%2 != 0)) stop("pl and pl2 must be even (positive) integers.")
  
  if(all(c(pl,pl2) == 2)) {
    out <- 4 #handle the case of diploidy separately
  } else{
    out <- ifelse(biv_dec,choose(pl,pl/2)*choose(pl2,pl2/2),
                  ifelse(pl2 == 2,(choose(pl,pl/2) + pl*(choose(pl-1,(pl/2 - 1))))*choose(pl2,pl2/2),
                         (choose(pl,pl/2) + pl*(choose(pl-1,(pl/2) - 2)))*(choose(pl2,pl2/2) + pl2*(choose(pl2-1,(pl2/2) - 2)))
                  ))
  }
  return(out)
}

#' Convert a progeno_df data.frame to a 3d array
#' @param probgeno_df A data frame as read from the scores file produced by function
#' \code{saveMarkerModels} of R package \code{fitPoly}, or alternatively, a data frame containing at least the following columns:
#' \itemize{
#' \item{SampleName : }{
#' Name of the sample (individual)
#' }
#' \item{MarkerName : }{
#' Name of the marker
#' }
#' \item{P0}{
#' Probabilities of dosage score '0'
#' }
#' \item{P1, P2... etc. : }{
#' Probabilities of dosage score '1' etc. (up to max offspring dosage, e.g. P4 for tetraploid population)
#' }
#' }
#' @param ploidy  Ploidy of the F1 population (can be 2, 3, 4 or 6)
#' @noRd
probgeno_df_to_array <- function(probgeno_df,
                                 ploidy){
  #Note: this function assumes the function test_probgeno_df has already been called 
  # all.samp <- levels(probgeno_df$SampleName) #this includes the parents for now
  pcolnames <- paste0("P",0:ploidy)
  out.array <- array(0,
                     dim = c(length(levels(probgeno_df$MarkerName)),
                             ploidy + 1,
                             length(levels(probgeno_df$SampleName))),
                     dimnames=list(levels(probgeno_df$MarkerName),
                                   pcolnames,
                                   levels(probgeno_df$SampleName)))
  for(ind in levels(probgeno_df$SampleName)){
    temp <- probgeno_df[probgeno_df$SampleName == ind,]
    out.array[,,ind] <- as.matrix(temp[match(levels(probgeno_df$MarkerName),temp$MarkerName),pcolnames])
  }
  
  return(out.array)
} #probgeno_df_to_array


#' Quadrivalent transition matrix function
#' @param r recombination frequency
#' @noRd
quadTM <- function(r) 
  matrix(c(1-r,r/3,r/3,r/3,
           r/3,1-r,r/3,r/3,
           r/3,r/3,1-r,r/3,
           r/3,r/3,r/3,1-r),ncol=4)



#' Function to return the list of possible pairing states, given parental ploidies and meiotic pairing model
#' @param ploidy Ploidy of parent 1
#' @param ploidy2 Ploidy of parent 2
#' @param bivalent_decoding Logical, if \code{FALSE} then multivalent pairing assumed
#' @param full_multivalent_hexa Should multivalent pairing be considered in both parents simultaneously in hexaploids?
#' @noRd
state_fun <- function(ploidy, ploidy2, bivalent_decoding, full_multivalent_hexa = FALSE){
  
  ploidyF1 <- (ploidy + ploidy2)/2
  
  gen_state.mat <- function(l1,l2,ploidy_F1 = ploidyF1,onlyNames=FALSE){
    states_b <- matrix(apply(l1,2,function(x) apply(l2,2,function(y) c(x,y))),
                       ncol = ploidy_F1,byrow=T)
    rownames(states_b) <- apply(states_b,1,function(x) paste0(letters[x],collapse=""))
    ifelse(onlyNames,return(rownames(states_b)),return(states_b))
  } #gen_state.mat
  
  v1 <- combn(1:ploidy,ploidy/2)
  v2 <- combn((ploidy+1):(ploidy+ploidy2),ploidy2/2)
  
  if(ploidyF1 == 2){
    
    state.ls <- list("B" = setNames(list(gen_state.mat(v1,v2)),c("AB-CD"))) #single bivalent pairing, needs to be in nested list structure for generate_IBD function
    all.states <- gen_state.mat(v1,v2,onlyNames = TRUE)
    
  } else if(ploidyF1 == 3){
    
    BB.mat <- gen_state.mat(v1,v2)
    
    BBval.mat <- do.call(rbind,lapply(1:(ncol(v1)/2), function(cl1)
      t(gen_state.mat(v1[,-c(cl1,ncol(v1) - cl1 + 1)],v2,onlyNames = TRUE))
    ))
    
    QB.mat <- gen_state.mat(t(expand.grid(1:ploidy,1:ploidy)[-rem.quad,]),v2)
    
    state.ls <- list("BB" = setNames(lapply(1:nrow(BBval.mat),function(r) BB.mat[BBval.mat[r,],]),
                                     c("AB-CD-EF","AC-BD-EF","AD-BC-EF")),
                     "QB" = list("ABCD-EF" = QB.mat))
    
    all.states <- rownames(QB.mat)
    
    if(bivalent_decoding){
      state.ls <- state.ls[1]
      all.states <- rownames(BB.mat)
    }
    
  } else if(ploidyF1 == 4 & ploidy == 4){ #this assumes we are not in a 6 x 2 population!! Need to cover this possibility later, perhaps?
    
    BB.mat <- gen_state.mat(v1,v2)
    
    BBval.mat <- do.call(rbind,lapply(1:max(ncol(v1)/2,1), function(cl1)
      t(sapply(1:max(ncol(v2)/2,1), function(cl2) gen_state.mat(v1[,-c(cl1,ncol(v1) - cl1 + 1)],
                                                                v2[,-c(cl2,ncol(v2) - cl2 + 1)],onlyNames = TRUE)
      ))))
    
    QQ.mat <- gen_state.mat(t(expand.grid(1:ploidy,1:ploidy)[-rem.quad,]),
                            t(expand.grid((ploidy+1):(ploidy+ploidy2),(ploidy+1):(ploidy+ploidy2))[-rem.quad,]))
    
    BQ.mat <- setNames(lapply(1:(ncol(v1)/2), function(cl1) gen_state.mat(v1[,-c(cl1,ncol(v1) - cl1 + 1)],
                                                                          t(expand.grid((ploidy+1):(ploidy+ploidy2),(ploidy+1):(ploidy+ploidy2))[-rem.quad,]))),
                       c("AB-CD-EFGH","AC-BD-EFGH","AD-BC-EFGH"))
    
    QB.mat <- setNames(lapply(1:(ncol(v2)/2), function(cl2) gen_state.mat(t(expand.grid(1:ploidy,1:ploidy)[-rem.quad,]),
                                                                          v2[,-c(cl2,ncol(v2) - cl2 + 1)])),
                       c("ABCD-EF-GH","ABCD-EG-FH","ABCD-EH-FG"))
    
    state.ls <- list("BB" = setNames(lapply(1:nrow(BBval.mat),function(r) BB.mat[BBval.mat[r,],]),
                                     c("AB-CD-EF-GH","AB-CD-EG-FH","AB-CD-EH-FG",
                                       "AC-BD-EF-GH","AC-BD-EG-FH","AC-BD-EH-FG",
                                       "AD-BC-EF-GH","AD-BC-EG-FH","AD-BC-EH-FG")),
                     "BQ" = BQ.mat,
                     "QB" = QB.mat,
                     "QQ" = list("ABCD-EFGH" = QQ.mat))
    
    all.states <- rownames(QQ.mat)
    
    if(bivalent_decoding) {
      state.ls <- state.ls[1]
      all.states <- rownames(BB.mat)
    }
    
  } else if(ploidyF1 == 6 & ploidy == 6){
    
    P1.Valnames <- sapply(hexa.list, function(hl) paste0(apply(apply(hl,1,function(x) LETTERS[x]),1,paste0,collapse=""),collapse="-"))
    P2.Valnames <- sapply(hexa.list, function(hl) paste0(apply(apply(hl+6,1,function(x) LETTERS[x]),1,paste0,collapse=""),collapse="-"))
    
    P1val.list <- setNames(lapply(hexa.list, function(hl) apply(expand.grid(hl[,1],hl[,2],hl[,3]),1,sort)),P1.Valnames)
    P2val.list <- setNames(lapply(hexa.list, function(hl) apply(expand.grid(hl[,1] + 6,hl[,2] + 6,hl[,3] + 6),1,sort)),P2.Valnames)
    
    BB.ls_6x <- do.call(c,lapply(P1val.list, function(cl1) lapply(P2val.list, function(cl2) gen_state.mat(cl1,cl2))))
    names(BB.ls_6x) <- gsub("\\.","-",names(BB.ls_6x))
    
    if(!bivalent_decoding){
      
      P1hexa.combs <- t(expand.grid(1:ploidy,1:ploidy,1:ploidy)[-rem.hex,])
      P2hexa.combs <- t(expand.grid((ploidy+1):(ploidy+ploidy2),(ploidy+1):(ploidy+ploidy2),(ploidy+1):(ploidy+ploidy2))[-rem.hex,])
      
      BH.ls_6x <- lapply(P1val.list, function(cl1) gen_state.mat(cl1,P2hexa.combs))
      HB.ls_6x <- lapply(P2val.list, function(cl2) gen_state.mat(P1hexa.combs,cl2))
      
      names(BH.ls_6x) <- paste0(names(BH.ls_6x),"-GHIJKL")
      names(HB.ls_6x) <- paste0("ABCDEF-",names(HB.ls_6x))
      
      if(full_multivalent_hexa) {
        HH.ls_6x <- list("ABCDEF-GHIJKL" = gen_state.mat(P1hexa.combs,P2hexa.combs))
        state.ls <- list("BB" = BB.ls_6x,"BH" = BH.ls_6x,"HB" = HB.ls_6x,"HH" = HH.ls_6x)
        
        all.states <- sort(rownames(HH.ls_6x[[1]]))
        
      } else{
        state.ls <- list("BB" = BB.ls_6x,"BH" = BH.ls_6x,"HB" = HB.ls_6x)#,"HH" = HH.ls_6x)
        all.states <- sort(unique(c(sapply(BH.ls_6x,rownames),sapply(HB.ls_6x,rownames))))
        }
    
    } else{
      state.ls <- list("BB" = BB.ls_6x)
      all.states <- gen_state.mat(v1,v2,onlyNames = TRUE)
      
    }
  } else{
    stop("Currently that combination of parental ploidies is not handled. Suggest to contact Peter Bourke.")
  }
  
  return(list(state.ls = state.ls,
              all.states = all.states))
} #state_fun

#' Error and warning handling for dosage_matrix
#' @param dosage_matrix An integer matrix with markers in rows and individuals in columns
#' @noRd
test_dosage_matrix <- function(dosage_matrix){
  if(inherits(dosage_matrix,"data.frame")){
    warning("dosage_matrix should be a matrix, now it's a data.frame.")
    message("Trying to convert it to matrix, assuming markernames are in the first column..")
    rownames(dosage_matrix) <- dosage_matrix[,1]
    dosage_matrix <- as.matrix(dosage_matrix[,-1])
    class(dosage_matrix) <- "integer"
  } else if(inherits(dosage_matrix,"matrix")){
    rn <- rownames(dosage_matrix)
    cn <- colnames(dosage_matrix)
    if(is.null(rn)) stop("The rownames of dosage_matrix should contain markernames. Now NULL")
    if(is.null(cn)) stop("The columnnames of dosage_matrix should contain genotype names. Now NULL")
    if(!(typeof(dosage_matrix)=="integer" | typeof(dosage_matrix)=="double")){
      warning("dosage_matrix should be integer or numeric. Trying to convert it.. ")
      class(dosage_matrix) <- "integer"
    }
  } else {
    stop("dosage_matrix should be a matrix of integers.
         See the manual of this function for more information.")
  }
  return(dosage_matrix)
}

#' Error and warning handling for IBD_list as estimated by \code{estimate_IBD}
#' @param IBD_list List of IBD probabilities
#' @noRd
test_IBD_list <- function(IBD_list){
  if(!inherits(IBD_list,"list")) stop("IBD_list: list input expected but not found")
  if(!all(c("IBDtype","IBDarray","map","parental_phase","biv_dec","gap", "genocodes",
                              "pairing","ploidy","ploidy2","method","error") %in% names(IBD_list[[1]]) )) {
    stop(paste0("The following required list elements were missing from IBD_list:\n\n",
               paste(setdiff(c("IBDtype","IBDarray","map","parental_phase","biv_dec","gap", "genocodes",
                         "pairing","ploidy","ploidy2","method","error"),
                         names(IBD_list[[1]])), collapse = ", "),
               ".\n\nSuggest to re-estimate IBD probabilities before proceeding.\n"))
  }
  
  return(IBD_list)
}

#' Error and warning handling for probgeno_df data-frame of probabilistic genotypes (scores)
#' @param probgeno_df A data frame as read from the scores file produced by function
#' \code{saveMarkerModels} of R package \code{fitPoly}, or alternatively, a data frame containing at least the following columns:
#' \itemize{
#' \item{SampleName : }{
#' Name of the sample (individual)
#' }
#' \item{MarkerName : }{
#' Name of the marker
#' }
#' \item{P0 : }{
#' Probabilities of dosage score '0'
#' }
#' \item{P1, P2... etc. : }{
#' Probabilities of dosage score '1' etc. (up to max offspring dosage, e.g. P4 for tetraploid population)
#' }
#' }
#' @param ploidy  Ploidy of the F1 population (can be 2, 3, 4 or 6)
#' @noRd
test_probgeno_df <- function(probgeno_df,
                             ploidy){
  if(!inherits(probgeno_df, "data.frame")) {
    warning("probgeno_df should be a data-frame. Attempting to coerce...")
    probgeno_df <- as.data.frame(probgeno_df)
  }
  
  pcolnames <- paste0("P",0:ploidy)
  
  if(!all(c("MarkerName", "SampleName",pcolnames) %in% colnames(probgeno_df))) stop(paste("colnames MarkerName, SampleName,",paste0(pcolnames, collapse=", "),"are required!"))
  
  if(!is.factor(probgeno_df$SampleName)) probgeno_df$SampleName <- as.factor(probgeno_df$SampleName)
  if(!is.factor(probgeno_df$MarkerName)) probgeno_df$MarkerName <- as.factor(probgeno_df$MarkerName)
  
  return(probgeno_df)
} #test_probgeno_df

#' Bivalent-pairing transition matrix function, diploid
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @noRd
TM.biv.2 <- function(m1,r_vect) {
  if(m1 == 0){
    out <- bivTM(0.5)
  } else{
    out <- bivTM(r_vect[m1])
  }
  return(out)
}#TM.biv.2

#' Bivalent-pairing transition matrix function, tetraploid
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @noRd
TM.biv.4 <- function(m1,r_vect) {
  if(m1 == 0){
    out <- kronecker(bivTM(0.5),bivTM(0.5)) #Starting transition matrix as one of the positions is 0 (i.e. in initialisation state)
  } else{
    out <- kronecker(bivTM(r_vect[m1]),bivTM(r_vect[m1]))
  }
  return(out)
} #TM.biv.4

#' Bivalent-pairing transition matrix function, hexaploid
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @noRd
TM.biv.6 <- function(m1,r_vect) {
  if(m1 == 0){
    out <- kronecker(bivTM(0.5),kronecker(bivTM(0.5),bivTM(0.5)))
  } else{
    out <- kronecker(bivTM(r_vect[m1]),kronecker(bivTM(r_vect[m1]),bivTM(r_vect[m1])))
  }
  return(out)
} #TM.biv.6

#' Hexavalent-pairing transition matrix function, hexaploid
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @param rem_hex Index of genotype classes to remove, work-around from overly-general transition 
#' matrix with redundant classes
#' @noRd
TM.hex <- function(m1,r_vect,rem_hex = rem.hex) {
  if(m1 == 0){
    out <- kronecker(hexTM(0.5),kronecker(hexTM(0.5),hexTM(0.5)))
  } else{
    out <- kronecker(hexTM(r_vect[m1]),kronecker(hexTM(r_vect[m1]),hexTM(r_vect[m1])))
  }
  out <- out[-rem_hex,-rem_hex]
  return(out)
} #TM.hex

#' Quadrivalent-pairing transition matrix function, tetraploid
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @param rem_quad Index of genotype classes to remove, work-around from overly-general transition 
#' matrix with redundant classes
#' @noRd
TM.quad <- function(m1,r_vect,rem_quad = rem.quad) {
  if(m1 == 0){
    out <- kronecker(quadTM(0.5),quadTM(0.5)) #Starting transition matrix as one of the positions is 0 (i.e. in initialisation state)
  } else{
    out <- kronecker(quadTM(r_vect[m1]),quadTM(r_vect[m1]))
  }
  out <- out[-rem_quad,-rem_quad]
  return(out)
} #TM.quad

#' Diploid bi-parental transition matrix
#' @param m1 marker
#' @param r_vect Vector of adjacent recombination frequencies
#' @noRd
TMfun.2x_B <- TM.biv.4

#' Triploid bi-parental transition matrix, bivalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.3x_BB <- function(...) kronecker(TM.biv.4(...),TM.biv.2(...))

#' Triploid bi-parental transition matrix, quadrivalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.3x_QB <- function(...) kronecker(TM.quad(...),TM.biv.2(...))

#' Tetraploid bi-parental transition matrix, bivalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.4x_BB <- function(...) kronecker(TM.biv.4(...), TM.biv.4(...))

#' Tetraploid bi-parental transition matrix, bivalent-quadrivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.4x_BQ <- function(...) kronecker(TM.biv.4(...), TM.quad(...))

#' Tetraploid bi-parental transition matrix, quadrivalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.4x_QB <- function(...) kronecker(TM.quad(...), TM.biv.4(...))

#' Tetraploid bi-parental transition matrix, quadrivalent-quadrivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.4x_QQ <- function(...) kronecker(TM.quad(...), TM.quad(...))

#' Hexaploid bi-parental transition matrix, bivalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.6x_BB <- function(...) kronecker(TM.biv.6(...), TM.biv.6(...))

#' Hexaploid bi-parental transition matrix, bivalent-hexavalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.6x_BH <- function(...) kronecker(TM.biv.6(...), TM.hex(...))

#' Hexaploid bi-parental transition matrix, hexavalent-bivalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.6x_HB <- function(...) kronecker(TM.hex(...), TM.biv.6(...))

#' Hexaploid bi-parental transition matrix, hexavalent-hexavalent pairing
#' @param \dots Arguments passed to parental TM functions
#' @noRd
TMfun.6x_HH <- function(...) kronecker(TM.hex(...), TM.hex(...)) #this becomes computationally intractable


#' Calculate the weighted variance
#' @description Generalisation of the variance to include weights
#' @param x Vector of interest
#' @param w Vector of weights
#' @param na.rm Should missing values be removed? By default, \code{FALSE}
#' @noRd
weighted.var <- function(x, w, na.rm = FALSE) {
  if (na.rm) {
    w <- w[i <- !is.na(x)]
    x <- x[i]
  }
  sum.w <- sum(w)
  sum.w2 <- sum(w^2)
  mean.w <- sum(x * w) / sum(w)
  (sum.w / (sum.w^2 - sum.w2)) * sum(w * (x - mean.w)^2, na.rm =
                                       na.rm)
}

#' Write a header for the log file
#' @description Functionalized writing of function name and arguments as start for log paragraph.
#' @param matc A object of class \code{call}
#' @param log A character string specifying the log file
#' @noRd
write.logheader <- function(matc, log){
  args <- as.list(matc)
  #args.df <- data.frame(names=names(args), values=as.character(args))
  #args.df <- args.df[-1,]
  mod <- "w"
  if(file.exists(log)) mod <- "a"
  log.conn <- file(log, mod)
  if(mod=="w") write(c("<style type=\"text/css\">.table {width: 40%;}</style>",
                       "##polyqtlR log file",
                       "This file is written in [markdown](https://en.wikipedia.org/wiki/Markdown).",
                       "Use knitr or [Markable](https://markable.in) to export it as a nicely formatted html or word file."),
                     log.conn)
  close(log.conn)
  mod <- "a"
  log.conn <- file(log, mod)
  write(c(#"\n-----------------------------------------------------------",
    paste0("\n###Log for function call of `", args[[1]], "`"),
    as.character(Sys.time())
  ),
  #"-----------------------------------------------------------"),
  file = log.conn)
  close(log.conn)
  log.conn <- file(log, "a")
  write("\nwith the following call:\n", log.conn)
  write("```", log.conn)
  sink(log.conn)
  print(matc)
  sink()
  write("```\n", log.conn)
  #write("With the following arguments:", file=log.conn)
  #write.table(args.df, quote=F, sep=" = ", file=log.conn, row.names=F, col.names=F)
  close(log.conn)
}

Try the polyqtlR package in your browser

Any scripts or data that you put into this service are public.

polyqtlR documentation built on Feb. 2, 2022, 5:09 p.m.