R/marker_info.R

Defines functions print_mrk plot_mrk_info

Documented in plot_mrk_info print_mrk

#' Plot marker information
#'
#' Plots summary statistics for a given marker
#' 
#' @param input.data an object of class \code{mappoly.data}
#' 
#' @param mrk marker name or position in the dataset
#'     
#' @examples
#'  plot_mrk_info(tetra.solcap.geno.dist, 2680)
#'  plot_mrk_info(tetra.solcap.geno.dist, "solcap_snp_c2_23828")
#'     
#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu}
#'
#' @references
#'     Mollinari, M., and Garcia, A.  A. F. (2019) Linkage
#'     analysis and haplotype phasing in experimental autopolyploid
#'     populations with high ploidy level using hidden Markov
#'     models, _G3: Genes, Genomes, Genetics_. 
#'     \doi{10.1534/g3.119.400378}
#'     
#' @export
#' @importFrom graphics barplot layout mtext legend 
#' @importFrom stats chisq.test
#' @importFrom plot3D scatter3D
plot_mrk_info <- function(input.data, mrk)
  {
  input_classes <- c("mappoly.data")
  if (!inherits(input.data, input_classes)) {
    stop(deparse(substitute(input.data)), " is not an object of class 'mappoly.data'")
  }
  if(is.numeric(mrk))
  {
    mrk <- mrk[1]
    if(mrk > input.data$n.mrk)
      stop(mrk, " exceeds the number of markers in the dataset")
    mrk <- input.data$mrk.names[mrk]
  }
  oldpar <- par(mar = c(2,2,5,2), bg = "gray98")
  on.exit(par(oldpar))
  if(!mrk%in%input.data$mrk.names)
    stop(deparse(substitute(mrk)), " is not present in ", deparse(substitute(input.data)), " dataset")
  ## Parents dosage
  dp <- input.data$dosage.p1[input.data$mrk.names == mrk]
  dq <- input.data$dosage.p2[input.data$mrk.names == mrk]
  suppressWarnings({
    ## if no probabilities
    if(!is.prob.data(input.data)){
      layout(matrix(c(1,2), ncol = 2), widths = c(1, 2))
      ## Genotype frequencies
      x <- input.data$geno.dose[mrk, ]
      x[x == input.data$ploidy+1] <- NA
      x <- table(as.numeric(x), useNA = "always")
      names(x) <- c(names(x)[-length(x)], "NA") 
      ## empty plot area
      plot(0:100, type = "n", axes = FALSE, xlab = "", ylab = "")
      mtext(side = 3, text = mrk, adj = 0, cex = 1.2, font = 3)
      text(x = 0, y = 90 , labels = paste0("marker #: ", which(input.data$mrk.names == mrk)), adj = 0)
      ## parents dosage
      text(x = 0, y = 80, paste0("Dose in P1: ", dp), adj = 0)
      text(x = 0, y = 70, paste0("Dose in P2: ", dq), adj = 0)
      ## missing data 
      text(x = 0, y = 60 , labels = paste0("Missing: ", round(100*tail(x,1)/sum(x),1), "%"), adj = 0)
      ## p.value for chi-square test under Mendelian segregation
      seg.exp <- segreg_poly(ploidy = input.data$ploidy, 
                             dP = dp, 
                             dQ = dq)
      names(seg.exp) <- 0:input.data$ploidy
      seg.exp <- seg.exp[seg.exp != 0]
      seg.obs <- seg.exp
      seg.obs[names(x)[-length(x)]] <- x[-length(x)]
      pval <- chisq.test(x = seg.obs, p = seg.exp[names(seg.obs)])$p.value
      text(x = 0, y = 50 , labels = paste0("p-value: ", formatC(pval, format = "e", digits = 2)), adj = 0)
      ##genomic position and sequence
      text(x = 0, y = 40 , labels = paste0("sequence: ", input.data$chrom[input.data$mrk.names == mrk]), adj = 0)
      text(x = 0, y = 30 , labels = paste0("seq. position: ", input.data$genome.pos[input.data$mrk.names == mrk]), adj = 0)
      barplot(x, col = c(mp_pallet2(input.data$ploidy + 1)[1:(length(x)-1)], "#404040"))
    } 
    else { ## if genotype probabilities are available
      layout(matrix(c(1,2,3,3), ncol = 2, nrow = 2), widths = c(1, 2))
      ## Genotype frequencies
      x <- input.data$geno.dose[mrk, ]
      x[x == input.data$ploidy+1] <- NA
      x <- table(x, useNA = "always")
      names(x) <- c(names(x)[-length(x)], "NA") 
      ## empty plot area
      plot(0:100, type = "n", axes = FALSE, xlab = "", ylab = "")
      mtext(side = 3, text = mrk, adj = 0, cex = 1.2, font = 3)
      text(x = 0, y = 90 , labels = paste0("marker #: ", which(input.data$mrk.names == mrk)), adj = 0)
      ## parents dosage
      text(x = 0, y = 80, paste0("Dose in P1: ", 
                                 dp), adj = 0)
      text(x = 0, y = 70, paste0("Dose in P2: ",
                                 dq), adj = 0)
      ## missing data 
      text(x = 0, y = 60 , labels = paste0("Missing: ", round(100*tail(x,1)/sum(x),1), "%"), adj = 0)
      ## p.value for chi-square test under Mendelian segregation
      seg.exp <- segreg_poly(ploidy = input.data$ploidy, 
                             dP = dp, 
                             dQ = dq)
      names(seg.exp) <- 0:input.data$ploidy
      seg.exp <- seg.exp[seg.exp != 0]
      seg.obs <- seg.exp
      seg.obs[names(x)[-length(x)]] <- x[-length(x)]
      pval <- chisq.test(x = seg.obs, p = seg.exp[names(seg.obs)])$p.value
      text(x = 0, y = 50 , labels = paste0("p-value: ", formatC(pval, format = "e", digits = 2)), adj = 0)
      
      ##genomic position and sequence
      text(x = 0, y = 40 , labels = paste0("sequence: ", input.data$chrom[input.data$mrk.names == mrk]), adj = 0)
      text(x = 0, y = 30 , labels = paste0("seq. position: ", input.data$genome.pos[input.data$mrk.names == mrk]), adj = 0)
      text(x = 0, y = 20 , labels = paste0("prob. threshold: ", input.data$prob.thres), adj = 0)
      pal <- mp_pallet2(input.data$ploidy + 1)
      names(pal) <- 0:input.data$ploidy 
      op <- par(mar = c(5,3,0,2), cex = .7)
      barplot(x, col = c(na.omit(pal[names(x)]), "#404040"))
      
      
      ## probability distribution of the genotypes
      mrk.name<-mrk
      df<-subset(input.data$geno, 
                 mrk == mrk.name, 
                 select = c(as.character(0:input.data$ploidy), "ind"))
      rownames(df)<-df$ind
      z <- df[rev(do.call(order, as.data.frame(df[,1:(input.data$ploidy+1)]))),]
      z<-z[apply(z[,1:(input.data$ploidy + 1)], 1, function(x) !all(round(x,5)==round(segreg_poly(input.data$ploidy, dp, dq),5))),]
      w<-expand.grid(1:nrow(z), 0:(ncol(z)-2))
      x<-w[,1]
      y<-w[,2]
      pal<-rep(mp_pallet2(input.data$ploidy + 1), each = nrow(z))
      pal[as.numeric(as.matrix(z[,1:(input.data$ploidy + 1)])) < input.data$prob.thres] <- "#404040"
      op<-par(mar = c(2,2,2,2))
      plot3D::scatter3D(x,y, as.matrix(z[,1:(input.data$ploidy + 1)]),  theta = 30, phi = 30, bty = "g",  type = "h", lwd = .3 ,
                        ticktype = "detailed", pch = 19, cex = 0.5, 
                        colvar = NULL, 
                        col = pal,
                        xlab = "Offspring",
                        ylab ="Dose", 
                        zlab = "Genotype probability", 
                        cex.axis = .7, cex.lab = .7, clab = )
      
      ## probability distribution of the genotypes
      # mrk.name <- mrk
      # df <- subset(input.data$geno, 
      #            mrk  ==  mrk.name, 
      #            select = c(as.character(0:input.data$ploidy), "ind"))
      # rownames(df) <- df$ind
      # z <- df[rev(do.call(order, as.data.frame(df[,1:(input.data$ploidy+1)]))),]
      # z <- z[apply(z[,1:(input.data$ploidy + 1)], 1, function(x) !all(round(x,5) == round(segreg_poly(input.data$ploidy, dp, dq),5))),]
      # w <- expand.grid(1:nrow(z), 0:(ncol(z)-2))
      # x <- w[,1]
      # y <- w[,2]
      # pal <- rep(gg_color_hue(input.data$ploidy + 1), each = nrow(z))
      # pal[as.numeric(as.matrix(z[,1:(input.data$ploidy + 1)])) < input.data$prob.thres] <- "#404040"
      # op <- par(mar = c(2,2,2,2))
      # scatterplot3d::scatterplot3d(cbind(x,y, as.double(as.matrix(z[,1:(input.data$ploidy + 1)]))), 
      #                              type = 'h', color = pal, xlab = "Offspring",
      #                              ylab  = "Dose", pch = 20,
      #                              zlab = "Genotype probability", angle = 55,
      #                              xlim = c(0, input.data$n.ind))
    } 
  })
  par(mfrow = c(1,1))
}

#' Summary of a set of markers
#' 
#' Returns information related to a given set of markers
#'
#' @param input.data an object \code{'mappoly.data'}
#' @param mrks marker sequence index (integer vector)
#' @examples
#'  print_mrk(tetra.solcap.geno.dist, 1:5)
#'  print_mrk(hexafake, 256)
#' @export
print_mrk <- function(input.data, mrks){
  L<-vector("list", length(mrks))
  for(i in 1:length(mrks))
  {
    x <- input.data$geno.dose[mrks[i], ]
    x[x == input.data$ploidy+1] <- NA
    if(is.character(mrks[i]))
      cat("\n", mrks[i])
    else
      cat("\n", input.data$mrk.names[mrks[i]])
    cat("\n----------------------------------")
    cat("\n dosage P1: ", input.data$dosage.p1[mrks[i]])
    cat("\n dosage P2: ", input.data$dosage.p2[mrks[i]])
    cat("\n----")
    cat("\n dosage distribution\n")
    z <- table(as.numeric(x), useNA = "always")
    names(z)[is.na(names(z))] <- "mis"
    print(z)
    cat("----")
    cat("\n expected polysomic segregation\n")
    y <- segreg_poly(input.data$ploidy, dP = input.data$dosage.p1[mrks[i]], input.data$dosage.p2[mrks[i]])
    names(y) <- 0:input.data$ploidy
    print(y)
    cat("----------------------------------\n")
    L[[i]] <- list(p1p2 = c(input.data$dosage.p1[mrks[i]], input.data$dosage.p2[mrks[i]]),
                   freq  = z,
                   seg = y)
  }
  invisible(L)
}
mmollina/MAPPoly documentation built on March 8, 2024, 2:04 a.m.