R/RcppExports.R

Defines functions .compute_QmatfromF2samples .generateF4names .compute_blockDdenom .compute_F4DfromF2samples .compute_F4fromF2samples .compute_F4fromF2 .generateF3names .compute_F3fromF2samples .compute_F3fromF2 .compute_Q2 .compute_H1 .compute_Ddenom_bjmeans .compute_F2_bjmeans .compute_Q_bjmeans .compute_Ddenom .find_indelneighbor_idx .extract_allele_names .extract_nonvscan_counts .extract_vscan_counts .scan_allele_info

Documented in .compute_blockDdenom .compute_Ddenom .compute_Ddenom_bjmeans .compute_F2_bjmeans .compute_F3fromF2 .compute_F3fromF2samples .compute_F4DfromF2samples .compute_F4fromF2 .compute_F4fromF2samples .compute_H1 .compute_Q2 .compute_Q_bjmeans .compute_QmatfromF2samples .extract_allele_names .extract_nonvscan_counts .extract_vscan_counts .find_indelneighbor_idx .generateF3names .generateF4names .scan_allele_info

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @title scan_allele_info
#' @name scan_allele_info
#' @rdname scan_allele_info
#'
#' @description
#' Scan allele information in ALT field of a vcf
#'
#' @param allele_info a character string vector (ALT field of the vcf)
#'
#' @details
#' Scan allele information in ALT field of a vcf to identify the number of alleles and if there is indels
#'
#' @return Return a vector with two elements consisting i) the number of alleles (1+number of comma)
#' and ii) 0 or 1 if an indel is detected 
#' 
#' @examples
#' .scan_allele_info(c("A,C","T","AAT"))
#' 
#' @export
.scan_allele_info <- function(allele_info) {
    .Call('_poolfstat_scan_allele_info', PACKAGE = 'poolfstat', allele_info)
}

#' @title extract_vscan_counts
#' @name extract_vscan_counts
#' @rdname extract_vscan_counts
#'
#' @description
#' Extract VarScan counts
#'
#' @param vcf_data a matrix of String containing count information in VarScan format
#' @param ad_idx the index of the FORMAT AD field  
#' @param rd_idx the index of the FORMAT RD field 
#'
#' @details Extract VarScan counts and return read counts for the reference and alternate allele.
#' For VarScan generated vcf, SNPs with more than one alternate allele are discarded 
#' (because only a single count is then reported in the AD fields) making the min.rc unavailable (of vcf2pooldata).
#' The VarScan --min-reads2 option might replace to some extent the min.rc functionality although 
#' SNP where the two major alleles in the Pool-Seq data are different from the reference allele 
#' (e.g., expected to be more frequent when using a distantly related reference genome for mapping) 
#' will be disregarded.
#' @return A numeric matrix of read count with nsnp rows and 2*npools columns.
#' The first npools columns consist of read count for the reference allele (RD),
#' columns npools+1 to 2*npools consist of read coverage (RD+AD)
#' @examples
#' .extract_vscan_counts(rbind(c("0/0:0:20","1/1:18:1"),c("0/1:12:15","1/1:27:2")),3,2)
#' 
#' @export
.extract_vscan_counts <- function(vcf_data, ad_idx, rd_idx) {
    .Call('_poolfstat_extract_vscan_counts', PACKAGE = 'poolfstat', vcf_data, ad_idx, rd_idx)
}

#' @title extract_nonvscan_counts
#' @name extract_nonvscan_counts
#' @rdname extract_nonvscan_counts
#'
#' @description
#' Extract counts from vcf produced by other caller than VarScan (e.g., bcftools, FreeBayes, GATK)
#'
#' @param vcf_data a matrix of String containing count information
#' @param nb_all a vector containing the number of alleles for the different markers
#' @param ad_idx the index of the FORMAT AD field  
#' @param min_rc Minimal allowed read count per base (same as min.rc option in \code{\link{vcf2pooldata}}) 
#'
#' @details Extract VarScan counts and return read counts for the reference and alternate allele
#' @return A numeric matrix of read count with nsnp rows and 2*npools+6 columns.
#' The first npools columns consist of read count for the reference allele,
#' columns npools+1 to 2*npools consist of read coverage. The last 6 columns correspond to 
#' the index of the two most frequent alleles (idx_all1 and idx_all2) and their count (cnt_all1 and cnt_all2);
#' the min_rc filtering criterion and count of variant (cnt_bases) other than two first most frequent. The min_rc crit is
#' set to -1 for polymorphisms with more than 2 alleles and with the third most frequent alleles having 
#' more than min_rc count 
#' @examples
#' .extract_nonvscan_counts(rbind(c("0/0:20,0","1/1:1,18"),c("0/2:12,1,15","1/1:27,1,0")),c(2,3),2,0)
#' .extract_nonvscan_counts(rbind(c("0/0:20,0","1/1:1,18"),c("0/2:12,1,15","1/1:27,1,0")),c(2,3),2,2)
#' @export
.extract_nonvscan_counts <- function(vcf_data, nb_all, ad_idx, min_rc) {
    .Call('_poolfstat_extract_nonvscan_counts', PACKAGE = 'poolfstat', vcf_data, nb_all, ad_idx, min_rc)
}

#' @title extract_allele_names
#' @name extract_allele_names
#' @rdname extract_allele_names
#'
#' @description
#' Extract the alleles from the REF and ALT fields
#'
#' @param allele_info a character string vector (concatenated REF and ALT field of the vcf)
#' @param allele_idx Matrix with indexes of the two alleles of interest for the different markers
#'
#' @details
#' Extract the alleles from the REF and ALT fields
#' 
#' @return Return a matrix with the two alleles after parsing the alleles info
#' 
#' @examples
#' .extract_allele_names(c("A,C","A,C,T"),rbind(c(1,2),c(1,3)))
#' 
#' @export
.extract_allele_names <- function(allele_info, allele_idx) {
    .Call('_poolfstat_extract_allele_names', PACKAGE = 'poolfstat', allele_info, allele_idx)
}

#' @title find_indelneighbor_idx
#' @name find_indelneighbor_idx
#' @rdname find_indelneighbor_idx
#'
#' @description
#' Search for the closest indels of the markers
#'
#' @param contig a character string vector corresponding to the CHR field value of the vcf for the markers
#' @param position an integer vector corresponding to the POSITION value for the markers 
#' @param indels_idx vector of (0-indexed) indices of indels
#' @param min_dist same as min.dist.from.indels option in \code{\link{vcf2pooldata}}
#' @param indels_size size of the indels (associated to indels_idx)
#'
#' @details
#' Identify if the SNPs are close to an indel
#' 
#' @return Return a vector consisting of 1 (if the marker is close to an indel) or 0 (if not)
#' 
#' @examples
#' .find_indelneighbor_idx(c("chr1","chr1","chr1"),c(1000,1004,1020),1,5,2)
#' 
#' @export
.find_indelneighbor_idx <- function(contig, position, indels_idx, min_dist, indels_size) {
    .Call('_poolfstat_find_indelneighbor_idx', PACKAGE = 'poolfstat', contig, position, indels_idx, min_dist, indels_size)
}

#' @title poppair_idx
#' @name poppair_idx
#' @rdname poppair_idx
#'
#' @description
#' Compute the index of the pairwise comparison from the idx of each pop
#'
#' @param idx_pop1 Integer giving the (0-indexed) index of the first pop 
#' @param idx_pop2 Integer giving the (0-indexed) index of the second pop 
#' @param nidx Integer giving the total number of indexes (i.e., number of pops)
#'
#' @details
#' If idx_pop2 < idx_pop1, indexes are reversed
#' 
#' @return Return the (0-indexed) index for the row associated to the pairwise comparison in the ordered flat list of all (npop*(npop-1))/2 pairwise stats
#' 
#' @examples
#' #
NULL

#' @title bjack_cov
#' @name bjack_cov
#' @rdname bjack_cov
#'
#' @description
#' Compute the block-jackknife covariance between two stats
#'
#' @param stat1 Vector of block-jackknife values for the first stat
#' @param stat2 Vector of block-jackknife values for the second stat
#'
#' @details
#'  Compute the block-jackknife covariance between two stats with correction
#' 
#' @return Covariance values
#' 
#' @examples
#' #
NULL

#' @title compute_Ddenom
#' @name compute_Ddenom
#' @rdname compute_Ddenom
#'
#' @description
#' Compute the denominator of Dstats
#'
#' @param snpQ2 the nsnp by (npop*(npop-1))/2 matrix of all pairwise Q2 estimates
#' @param f2idx a matrix of nDstat by 2 giving the index of the Q2 required to compute the denominator of the different F4 
#' @param verbose if TRUE progression bar is printed on the terminal
#'
#' @details
#' Compute the denominator of Dstats
#' 
#' @return Return a vector of the denominator of the nDstat
#' 
#' @examples
#' #
#' @export
.compute_Ddenom <- function(snpQ2, f2idx, verbose) {
    .Call('_poolfstat_compute_Ddenom', PACKAGE = 'poolfstat', snpQ2, f2idx, verbose)
}

#' @title compute_Q_bjmeans
#' @name compute_Q_bjmeans
#' @rdname compute_Q_bjmeans
#'
#' @description
#' Compute the the block-jackknife mean of Q values
#'
#' @param snpQ matrix of nsnp by nQ estimates of Q (e.g., Q1 or Q2)
#' @param snp_bj_id integer vector of length nsnp giving the block index of each SNP 
#' @param verbose if TRUE progression bar is printed on the terminal
#'
#' @details
#' Compute the the block-jackknife mean of Q values
#' 
#' @return Return a vector with the block-jackknife mean estimates for the nQ values
#' 
#' @examples
#' #
#' @export
.compute_Q_bjmeans <- function(snpQ, snp_bj_id, verbose) {
    .Call('_poolfstat_compute_Q_bjmeans', PACKAGE = 'poolfstat', snpQ, snp_bj_id, verbose)
}

#' @title compute_F2_bjmeans
#' @name compute_F2_bjmeans
#' @rdname compute_F2_bjmeans
#'
#' @description
#' Compute the the block-jackknife mean of F2 values
#'
#' @param snpQ1 the nsnp by npop matrix of Q1 estimates
#' @param snpQ2 the nsnp by (npop*(npop-1))/2 matrix of all pairwise Q2 estimates
#' @param q1_idx the nsnp by 2 matrix with the indexes of the Q1 needed to compute each F2
#' @param snp_bj_id integer vector of length nsnp giving the block index of each SNP 
#' @param verbose if TRUE progression bar is printed on the terminal
#'
#' @details
#' Compute the the block-jackknife mean of F2 values
#' 
#' @return Return a vector with the block-jackknife mean estimates of the F2 values
#' 
#' @examples
#' #
#' @export
.compute_F2_bjmeans <- function(snpQ1, snpQ2, q1_idx, snp_bj_id, verbose) {
    .Call('_poolfstat_compute_F2_bjmeans', PACKAGE = 'poolfstat', snpQ1, snpQ2, q1_idx, snp_bj_id, verbose)
}

#' @title compute_Ddenom_bjmeans
#' @name compute_Ddenom_bjmeans
#' @rdname compute_Ddenom_bjmeans
#'
#' @description
#' Compute the the block-jackknife mean of Dstat denominator
#'
#' @param snpQ2 the nsnp by (npop*(npop-1))/2 matrix of all pairwise Q2 estimates
#' @param f2idx a matrix of nDstat by 2 giving the index of the Q2 required to compute the Dstat denominator 
#' @param snp_bj_id integer vector of length nsnp giving the block index of each SNP 
#' @param verbose if TRUE progression bar is printed on the terminal
#'
#' @details
#' Compute the the block-jackknife mean of Dstat denominator
#' 
#' @return Return a vector with the block-jackknife mean estimates of the Dstat denominator
#' 
#' @examples
#' #
#' @export
.compute_Ddenom_bjmeans <- function(snpQ2, f2idx, snp_bj_id, verbose) {
    .Call('_poolfstat_compute_Ddenom_bjmeans', PACKAGE = 'poolfstat', snpQ2, f2idx, snp_bj_id, verbose)
}

#' @title compute_H1
#' @name compute_H1
#' @rdname compute_H1
#'
#' @description
#' Compute (uncorrected) 1-Q1 for each block-jackknife block (if any) and over all the SNPs (i.e., either within or outside blocks)
#'
#' @param refcount Matrix of nsnpxnpop with counts (genotype or reads) for the reference allele
#' @param totcount Matrix of nsnpxnpop with total counts or read coverages
#' @param nblocks Integer giving the number of block-jackknife blocs (may be 0 if no block-jackknife)
#' @param block_id Integer vector of length nsnps with the (0-indexed) id of the block to which each SNP belongs (-1 for SNPs outside blocks)
#' @param verbose Logical (if TRUE progression bar is printed on the terminal)
#'
#' @details
#' Compute all the (uncorrected) H1=1-Q1 for each block-jackknife block (if any) and overall SNPs (within or outside blocks). 
#' It is indeed more convenient to compute H1 (rather than Q1) to apply corrections afterwards within R function 
#' 
#' @return Return a matrix with npops rows and nblocks+1 column giving the mean H1 of each pop within each block and for all SNPs (last column)
#' 
#' @examples
#' #
#' @export
.compute_H1 <- function(refcount, totcount, nblocks, block_id, verbose) {
    .Call('_poolfstat_compute_H1', PACKAGE = 'poolfstat', refcount, totcount, nblocks, block_id, verbose)
}

#' @title compute_Q2
#' @name compute_Q2
#' @rdname compute_Q2
#'
#' @description
#' Compute all Q2 for each block-jackknife block (if any) and overall SNPs (within or outside blocks)
#'
#' @param refcount Matrix of nsnpxnpop with counts (genotype or reads) for the reference allele
#' @param totcount Matrix of nsnpxnpop with total counts or read coverages
#' @param nblocks Integer giving the number of block-jackknife blocs (may be 0 if no block-jackknife)
#' @param block_id Integer vector of length nsnps with the (0-indexed) id of the block to which each SNP belongs (-1 for SNPs outside blocks)
#' @param verbose Logical (if TRUE progression bar is printed on the terminal)
#'
#' @details
#' Compute all Q2 for each block-jackknife block (if any) and overall SNPs (within or outside blocks). 
#' 
#' @return Return a matrix with npops*(npops-1)/2 and nblocks+1 column giving the mean Q2 of each pairwise pop comp. within each block and for all SNPs (last column)
#' 
#' @examples
#' #
#' @export
.compute_Q2 <- function(refcount, totcount, nblocks, block_id, verbose) {
    .Call('_poolfstat_compute_Q2', PACKAGE = 'poolfstat', refcount, totcount, nblocks, block_id, verbose)
}

#' @title compute_F3fromF2
#' @name compute_F3fromF2
#' @rdname compute_F3fromF2
#'
#' @description
#' Compute all F3 from overall F2 values
#'
#' @param F2val Numeric vector of length nF2=(npop*(npop-1))/2 with all pairwise F2 estimates
#' @param Hval Numeric vector of length npop with all within pop heterozygosity estimates
#' @param npops Integer giving the number of populations
#'
#' @details
#' Compute F3 and F3star estimates from F2 (and heterozygosities)
#' 
#' @return Return a matrix of length nF3=npops*(npops-1)*(npops-2)/2 rows and 2 columns corresponding to the F3 and F3star estimates
#' 
#' @examples
#' #
#' @export
.compute_F3fromF2 <- function(F2val, Hval, npops) {
    .Call('_poolfstat_compute_F3fromF2', PACKAGE = 'poolfstat', F2val, Hval, npops)
}

#' @title compute_F3fromF2samples
#' @name compute_F3fromF2samples
#' @rdname compute_F3fromF2samples
#'
#' @description
#' Compute all F3 from F2 values obtained from each block-jackknife bloc
#'
#' @param blockF2 Numeric Matrix with nF2=(npop*(npop-1))/2 rows and nblocks columns matrix containing pairwise-pop F2 estimates for each block-jackknife sample (l.o.o.)
#' @param blockHet Numeric Matrix with npop rows and nblocks columns containing all within pop heterozygosity estimates for each block-jackknife sample (l.o.o.)
#' @param npops Integer giving the number of populations
#' @param verbose Logical (if TRUE progression bar is printed on the terminal)
#'
#' @details
#' Compute F3 and F3star estimates and their s.e. based on block-jackknife estimates of all F2 (and heterozygosities)
#' 
#' @return Return a matrix with nF3=npops*(npops-1)*(npops-2)/2 rows and four columns corresponding to the mean and the s.e. of F3 and the mean and s.e. of F3star
#' 
#' @examples
#' #
#' @export
.compute_F3fromF2samples <- function(blockF2, blockHet, npops, verbose) {
    .Call('_poolfstat_compute_F3fromF2samples', PACKAGE = 'poolfstat', blockF2, blockHet, npops, verbose)
}

#' @title generateF3names
#' @name generateF3names
#' @rdname generateF3names
#'
#' @description
#' Generate all names for F3 stats (same order as computation)
#'
#' @param popnames String vector with the names of all the pops
#'
#' @details
#' Generate all the npops*(npops-1)*(npops-2)/2 names for F3 stats (same order as computation)
#' 
#' @return Return a string matrix with 4 columns including the complete F3 configuration names (of the form Px;P1,P2), and the names of each pop involved in the configuration
#' 
#' @examples
#' #
#' @export
.generateF3names <- function(popnames) {
    .Call('_poolfstat_generateF3names', PACKAGE = 'poolfstat', popnames)
}

#' @title compute_F4fromF2
#' @name compute_F4fromF2
#' @rdname compute_F4fromF2
#'
#' @description
#' Compute all F4 from overall F2 and Q2 values
#'
#' @param F2val Numeric vector of length nF2=(npop*(npop-1))/2 with all pairwise F2 estimates
#' @param npops Integer giving the number of populations
#'
#' @details
#' Compute F4 from F2 (and heterozygosities)
#' 
#' @return Return a vector of length nF4=(npops*(npops-1)/2) * ((npops-2)*(npops-3)/2) / 2 rows corresponding to all the F4 estimates for all possible configurations
#' 
#' @examples
#' #
#' @export
.compute_F4fromF2 <- function(F2val, npops) {
    .Call('_poolfstat_compute_F4fromF2', PACKAGE = 'poolfstat', F2val, npops)
}

#' @title compute_F4fromF2samples
#' @name compute_F4fromF2samples
#' @rdname compute_F4fromF2samples
#'
#' @description
#' Compute all F4 from F2 values obtained from each block-jackknife bloc
#'
#' @param blockF2 Numeric Matrix with nF2=(npop*(npop-1))/2 rows and nblocks columns matrix containing pairwise-pop F2 estimates for each block-jackknife sample (l.o.o.)
#' @param npops Integer giving the number of populations
#' @param verbose Logical (if TRUE progression bar is printed on the terminal)
#'
#' @details
#' Compute F4 estimates and their s.e. based on block-jackknife estimates of all F2 (and heterozygosities)
#' 
#' @return Return a matrix with nF4=(npops*(npops-1)/2) * ((npops-2)*(npops-3)/2) / 2 rows and two columns corresponding to the mean and the s.e. of F4 estimates for all possible configurations
#' 
#' @examples
#' #
#' @export
.compute_F4fromF2samples <- function(blockF2, npops, verbose) {
    .Call('_poolfstat_compute_F4fromF2samples', PACKAGE = 'poolfstat', blockF2, npops, verbose)
}

#' @title compute_F4DfromF2samples
#' @name compute_F4DfromF2samples
#' @rdname compute_F4DfromF2samples
#'
#' @description
#' Compute all F4 and Dstat from F2 values obtained from each block-jackknife bloc
#'
#' @param blockF2 Numeric Matrix with nF2=(npop*(npop-1))/2 rows and nblocks columns matrix containing pairwise-pop F2 estimates for each block-jackknife sample (l.o.o.)
#' @param blockDenom Numeric Matrix with nF4=(npops*(npops-1)/2)*((npops-2)*(npops-3)/2)/2 rows and nblocks containing the estimates of the denominator of Dstat (see compute_blockDdenom) for each block-jackknife sample (l.o.o.) 
#' @param npops Integer giving the number of populations
#' @param verbose Logical (if TRUE progression bar is printed on the terminal)
#'
#' @details
#' Compute F4 and D estimates and their s.e. based on block-jackknife estimates of all F2 (and heterozygosities)
#' 
#' @return Return a matrix with nF4=(npops*(npops-1)/2)*((npops-2)*(npops-3)/2)/2 rows and four columns corresponding to the mean and the s.e. of F4 and the mean and s.e. of Dstat
#' 
#' @examples
#' #
#' @export
.compute_F4DfromF2samples <- function(blockF2, blockDenom, npops, verbose) {
    .Call('_poolfstat_compute_F4DfromF2samples', PACKAGE = 'poolfstat', blockF2, blockDenom, npops, verbose)
}

#' @title compute_blockDdenom
#' @name compute_blockDdenom
#' @rdname compute_blockDdenom
#'
#' @description
#' Compute the denominator of the Dstat for all quadruplet configuration and each block-jackknife block (if any) and overall SNPs (within or outside blocks)
#'
#' @param refcount Matrix of nsnpxnpop with counts (genotype or reads) for the reference allele
#' @param totcount Matrix of nsnpxnpop with total counts or read coverages
#' @param nblocks Integer giving the number of block-jackknife blocs (may be 0 if no block-jackknife)
#' @param block_id Integer vector of length nsnps with the (0-indexed) id of the block to which each SNP belongs (-1 for SNPs outside blocks)
#' @param verbose Logical (if TRUE progression bar is printed on the terminal)
#'
#' @details
#' Compute the denominator of the Dstat for all quadruplet configuration and each block-jackknife block (if any) and overall SNPs (within or outside blocks)
#' 
#' @return Return a matrix with nf4=(npops*(npops-1)/2)*((npops-2)*(npops-3)/2)/2 rows and nblocks+1 columns giving the mean Dstat-denominator (1-Q2ab)(1-Q2cd)
#'  for all quadruplet configuration and within each block-jackknife sample and over all SNPs (last column)
#' 
#' @examples
#' #
#' @export
.compute_blockDdenom <- function(refcount, totcount, nblocks, block_id, verbose) {
    .Call('_poolfstat_compute_blockDdenom', PACKAGE = 'poolfstat', refcount, totcount, nblocks, block_id, verbose)
}

#' @title generateF4names
#' @name generateF4names
#' @rdname generateF4names
#'
#' @description
#' Generate all names for F4 stats (same order as computation)
#'
#' @param popnames String vector with the names of all the pops
#'
#' @details
#' Generate all the nf4=(npops*(npops-1)/2)*((npops-2)*(npops-3)/2)/2 names for F4 stats (same order as computation)
#' 
#' @return Return a string matrix with 5 columns including the complete F4 configuration names (of the form P1,P2;P3,P4), and the names of each pop involved in the configuration
#' 
#' #
#' @export
.generateF4names <- function(popnames) {
    .Call('_poolfstat_generateF4names', PACKAGE = 'poolfstat', popnames)
}

#' @title compute_QmatfromF2samples
#' @name compute_QmatfromF2samples
#' @rdname compute_QmatfromF2samples
#'
#' @description
#' Compute the Qmat matrix (error covariance between all F2 and F3 measures) from F2 block-jackknife estimates
#'
#' @param blockF2 Numeric Matrix with nF2=(npop*(npop-1))/2 rows and nblocks columns matrix containing pairwise-pop F2 estimates for each block-jackknife sample (l.o.o.)
#' @param npops Integer giving the number of populations
#' @param verbose Logical (if TRUE progression bar is printed on the terminal)
#'
#' @details
#' Compute the error covariance matrix Qmat (between all F2 and F3 measures) from F2 block-jackknife estimates (by recomuting all F3 for all blocks)
#' 
#' @return Return the (nF2+nF3)*(nF2+nF3) error covariance (symmetric) matrix
#' 
#' @examples
#' #
#' @export
.compute_QmatfromF2samples <- function(blockF2, npops, verbose) {
    .Call('_poolfstat_compute_QmatfromF2samples', PACKAGE = 'poolfstat', blockF2, npops, verbose)
}

Try the poolfstat package in your browser

Any scripts or data that you put into this service are public.

poolfstat documentation built on Sept. 8, 2023, 5:49 p.m.