#' @title Calculate gene frequency
#'
#' @description This function takes as input the output data frame of countAlleles
#' (more specifically, only uses its second and third columns) and computes the
#' frequency per gene given a two-column data frame of gene IDs and counts across
#' a bacterial population.
#'
#' @param x a two-column data frame where the first column stores gene names and
#' the second column stores allele counts.
#' Columns must not be factor variables.
#' @param n total number of bacterial isolates/strains in a population
#' @param ord a logical parameter specifying whether the output data frame should
#' be sorted by the gene frequency
#'
#' @examples
#' mapping <- countAlleles(...)
#' gf <- calcGeneFreq(x = mapping[, c("gene", "count")], n = 1125) # assuming
#' there are 1125 strains
#'
#' @author Yu Wan (\email{wanyuac@@126.com})
#' @export
#
# Copyright 2018 Yu Wan <wanyuac@126.com>
# Licensed under the Apache License, Version 2.0
# First edition: 26 May 2017; the latest edition: 4 Sep 2018
calcGeneFreq <- function(x, n, ord = TRUE) {
names(x) <- c("gene", "count")
genes <- unique(x$gene)
y <- data.frame(gene = character(0), count = integer(0), freq = numeric(0),
n_a = integer(0), stringsAsFactors = FALSE) # n_a: number of alleles
for (g in genes) {
r <- subset(x, gene == g) # There is at least one row being selected.
c <- sum(r$count) # total number of times observing the current gene
y.new <- data.frame(gene = g, count = c, freq = round(c / n * 100, digits = 2),
n_a = nrow(r), stringsAsFactors = FALSE)
y <- rbind.data.frame(y, y.new, stringsAsFactors = FALSE)
}
y$class <- getGeneClass(y$gene)
y <- y[, c("gene", "class", "count", "freq", "n_a")]
if (ord) {
y <- y[order(y$freq, decreasing = TRUE), ]
}
rownames(y) <- NULL
return(y)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.