R/RcppExports.R

Defines functions get_window_length get_hash get_hash_for_word count_kmers_str count_kmer_num count_kmers_larger_than_one count_unigrams

Documented in count_kmer_num count_kmers_larger_than_one count_kmers_str count_unigrams get_hash get_hash_for_word get_window_length

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @name get_total_size_of_kmer
#' @title Get the total size of k-mer
#' 
#' @description Computes the number of characters of the result k-mer
#' taking into account the base alphabet. 
#' 
#' @param s  \code{integer} vector of encoded elements of a sequence (see Details)
#' @param d  \code{integer} vector which denotes the gaps in k-mer
#' @param begin_index  \code{integer} representing the begin index (in \code{s}) of the k-mer
#' @param num2str  a \code{hash map} representing the encoding between number and \code{string} representation of each alphabet's item
#' @return \code{int} denoting the total size (number of characters) of k-mer
#' @details Each element of a sequence is previously encoded to an integer in order to make hashing computation
#' more convenient
NULL

#' @name get_total_size_of_kmer
#' @title Get the total size (number of characters) of a k-mer
#' 
#' @description The number of characters of the result k-mer (after decoding from \code{integer} to \code{string})
#' 
#' @param kmer  \code{integer} vector representing the encoded kmer (\link{get_total_size_of_kmer})
#' @param num2str  \code{hash map} representing the encoding between the integer and string
#' @return the number of characters in the result \code{string} that is the result of decoding each \code{integer} from \code{kmer}
NULL

#' @name create_kmer
#' @title Create k-mer
#' 
#' @description Creates k-mer (of type \code{string}) from encoded (\code{integer}) vector
#' based on encoding described in \code{num2str} and kmer_decorator
#' 
#' @param s  \code{integer} vector representing an encoded sequence
#' @param d  \code{integer} vector representing the gaps in k-mer
#' @param begin_index  \code{integer} representing the start of k-mer (in \code{s})
#' @param num2str  \code{hash map} representing encoding of sequence items between \code{integer} and \code{string}
#' @param kmer_decorator  a \code{function} that can add extra characters to k-mer (for example position information)
#' 
#' @return a \code{string} representing a result k-mer (that is used for presentation)
NULL

#' @name create_kmer
#' @title Create k-mer
#' 
#' @description Creates k-mer (of type \code{string}) from encoded (\code{integer}) vector
#' based on encoding described in \code{num2str} and kmer_decorator
#' 
#' @param kmer  \code{integer} vector representing an encoded sequence
#' @param num2str  \code{hash map} representing encoding of sequence items between \code{integer} and \code{string}
#' @param kmer_decorator  a \code{function} that can add extra characters to k-mer (for example position information)
#' 
#' @return a \code{string} representing a result k-mer (that is used for presentation)
NULL

#' @name update_kmers
#' @title Update k-mers in a \code{hash map}
#' 
#' @param kmers  a \code{hash map} reference representing the found k-mers (see details)
#' @param d  \code{integer} vector representing gaps in k-mer
#' @param s  \code{integer} vector representing an encoded sequence
#' @param kmer_hash \code{integer} representing computed hash of k-mer
#' @param kmer_begin_index \code{integer} representing the begin index of k-mer in \code{s}
#' @param num2str  \code{hash map} representing encoding of sequence items between \code{integer} and \code{string}
#' @param kmer_decorator  \code{function} that can add extra characters to \code{string} k-mer (for example position information)
#' 
#' @details k-mers \code{hashmap} contains key-value pairs: key is an \code{integer} representing a hash of k-mer,
#' whereas the value represents a pair: (k-mer \code{string} for presentation, number of k-mer occurrences)
NULL

#' @name add_kmer_if_not_exists
#' @title Add k-mer to a \code{hash map} if it does not exist
#' 
#' @param kmers  \code{hash map} containing k-mers (see \link{update_kmers})
#' @param kmer  encoded \code{integer} vector representing a k-mer
#' @param num2str  \code{hash map} representing encoding between \code{integer} and \code{string}
#' @param kmer_decorator  \code{function} that can add extra characters to the \code{string} representation of k-mer
NULL

#' @name update_kmers_with_alphabet
#' @title Update k-mers with alphabet
#' 
#' @description Generates and add k-mers (based on the given alphabet) that do not exist in the \code{hash map}.
#' 
#' @param kmers  \code{hash map} reference that stores k-mers (see \link{update_kmers})
#' @param alphabet  \code{integer} vector representing encoded alphabet
#' @param currentKmer \code{integer} vector representing the part of currently generated k-mer
#' @param k  \code{integer} representing the number of k-mer items
#' @param num2str  \code{hash map} representing the sequence encoding between \code{integer} and \code{string}
#' @param kmer_decorator  \code{function} that can add extra characters for \code{string} representation of k-mer (for presentation reasons)
NULL

#' @name is_kmer_allowed
#' @title Is k-mer allowed
#' 
#' @description Checks whether all elements of the given k-mer are contained in the alphabet set
#' @param s  \code{integer} vector of encoded sequence characters
#' @param d  \code{integer} vector representing gaps between elements of k-mer
#' @param begin_index  \code{integer} representing the start index of k-mer in \code{s}
#' @param is_item_allowed \code{hash map} that answers the question whether the element is in the alphabet
#' @return \code{logical} value denoting whether k-mer is valid (contains valid characters that are in the alphabet)
NULL

#' @name count_kmers_helper
#' @title Count k-mers 
#' @description Counts the occurrences of k-mers (the size of k-mer should be larger than one)
#' 
#' @param s  \code{integer} vector representing encoded input sequence
#' @param d  \code{integer} vector representing gaps in k-mer
#' @param alphabet  \code{integer} vector representing encoded alphabet
#' @param num2str  \code{hash map} representing the sequence elements encoding between \code{integer} and \code{string}
#' @param kmer_decorator \code{function} that can add extra characters in order to enhance the presentation of \code{string} k-mer
#' @param pos  \code{logical} value representing whether to count positional k-mers
#' 
#' @return \code{hash map} whose key is a \code{string} presentation of k-mer and value is the number of its occurrences
NULL

#' @name fill_items_coding_maps
#' @title Prepare encoding and decoding \code{hash maps} for sequence items
#' 
#' @details Enumerates each sequence item in order to convert \code{non-integer} values to \code{integer} ones
#' \code{B} is the template Rcpp input type
#' \code{S} is the template c++ input type of one element
#' @param elems  the input elements of a sequence
#' @param val2num  the reference to \code{hash map} representing the encoding to \code{integer} value
#' @param num2str the reference to \code{hash map} representing the (reversed to \code{val2num}) encoding to \code{string} value
#' @param lowest_not_used_num  the reference to \code{integer} value denoting current counter used to encode elements
#' @param val2str_converter  \code{function} that take a sequence item and returns its string representation that is used for presentation
NULL

#' @name fill_encoded_int_vector
#' @title Encode sequence vector (replace items to numbers)
#' 
#' @details \code{SEQ_TYPE} - the type of a sequence of the input sequence (Rcpp)
#' \code{ELEM_TYPE} - the type of an item of the input sequence (c++)
#' 
#' @param str_v  the input (Rcpp) sequence
#' @param res  the result (encoded) vector
#' @param val2int  the encoder
NULL

#' @name get_kmers
#' @title Get k-mers
#' 
#' @description Counts the occurrences of k-mers (the size of k-mer should be larger than one)
#' \code{B} - the (Rcpp) type of an input sequence
#' \code{S} - the (c++) type of an item of the sequence
#' 
#' @param s  input sequence
#' @param d  \code{integer} vector representing gaps in k-mer
#' @param alphabet  Rcpp sequence representing alphabet
#' @param val2str_converter  \code{function} representing the conversion to string representation of an item
#' @param kmer_decorator  \code{function} that can add extra characters in order to enhance the presentation of k-mer.
#' @return \code{hash map} containing string representations of k-mers with their occurrence counts 
NULL

#' @name get_window_length
#' @title Get k-mer window length
#' 
#' @description Compute a k-mer window length. The window length is the total size 
#' used by the k-mer - the number of elements and the size of gaps.
#' 
#' @param d  \code{integer} vector with distances between consequent elements
#' @return \code{integer} representing the total window length
#' @export
get_window_length <- function(d) {
    .Call('_kmer_get_window_length', PACKAGE = 'kmer', d)
}

#' @name get_hash
#' @title Get hash of k-mer that is in a given sequence
#' 
#' @param s  \code{integer} vector representing input sequence
#' @param d  \code{integer} vector representing the gaps between consecutive elements of k-mer
#' @param begin_index  \code{integer} value representing the begin index of the k-mer
#' @param pos  \code{logical} value representing whether k-mer is positional
#' 
#' @return \code{integer} value representing the result of a hashing function
#' @export
get_hash <- function(s, d, begin_index, pos) {
    .Call('_kmer_get_hash', PACKAGE = 'kmer', s, d, begin_index, pos)
}

#' @name get_hash_for_word
#' @title Get hash of a sequence
#' 
#' @param kmer  \code{integer} vector representing a word to be hashed
#' @return \code{integer} value representing the result of a hashing function
#' @export
get_hash_for_word <- function(kmer) {
    .Call('_kmer_get_hash_for_word', PACKAGE = 'kmer', kmer)
}

#' @name count_kmers_str
#' @title Count k-mers for string sequences (the size of k-mer should be larger than one)
#' 
#' @param s  a \code{string} vector representing an input sequence
#' @param d  an \code{integer} vector representing gaps between consecutive elements of k-mer
#' @param alphabet a \code{string} vector representing valid elements of k-mer
#' @param pos a \code{logical} value that denotes whether positional k-mers should be generated
#' @return a named vector with counts of k-mers
#' 
#' @details K-mers that contain elements from \code{alphabet} but do not exist in the input sequence are also generated.
#' 
#' @examples
#' count_kmers_str(
#' c("a", "b", "c", "d", "x", "y", "z", "z", "a", "a"),
#' d=c(0,0),
#' c("a", "b", "c", "z"),
#' pos=FALSE)
#' @export
count_kmers_str <- function(s, d, alphabet, pos) {
    .Call('_kmer_count_kmers_str', PACKAGE = 'kmer', s, d, alphabet, pos)
}

#' @name count_kmer_num
#' @title Count k-mers for numeric sequences (the size of k-mer should be larger than one)
#' 
#' 
#' @param s  a \code{numeric} vector representing an input sequence
#' @param d  an \code{integer} vector representing gaps between consecutive elements of k-mer
#' @param alphabet a \code{numeric} vector representing valid elements of k-mer
#' @param pos a \code{logical} value that denotes whether positional k-mers should be generated
#' @return a named vector with counts of k-mers
#' 
#' @details K-mers that contain elements from \code{alphabet} but do not exist in the input sequence are also generated.
#' 
#' @examples
#' count_kmers_str(c(1,2,3,5,3,7),
#' d=c(0,0),
#' c(1, 2, 3, 4),
#' pos=FALSE)
#' @export
count_kmer_num <- function(s, d, alphabet, pos) {
    .Call('_kmer_count_kmer_num', PACKAGE = 'kmer', s, d, alphabet, pos)
}

#' @name count_kmers_larger_than_one
#' @title Count k-mers that containes more than one item
#' 
#' @param m  \code{character} matrix - each row represents one sequence
#' @param d  an \code{integer} vector representing gaps between consecutive elements of k-mer
#' @param alphabet a \code{numeric} vector representing valid elements of k-mer
#' @param pos a \code{logical} value that denotes whether positional k-mers should be generated
#' @return a named vector with counts of k-mers
#' @example count_kmers_larger_than_one(
#' matrix(data=c("a", "b", "c", "b", "c", "a"), nrow=2),
#' c(0),
#' c("a", "b", "c"),
#' FALSE)
#' @importFrom  RcppParallel RcppParallelLibs
#' @export
count_kmers_larger_than_one <- function(m, d, alphabet, pos) {
    .Call('_kmer_count_kmers_larger_than_one', PACKAGE = 'kmer', m, d, alphabet, pos)
}

#' @name count_unigrams
#' @title Count unigrams
#' @param m  \code{string} matrix that contains one sequence in each row
#' @param alphabet  \code{string} vector that contains valid elements to construct unigrams
#' @param pos  \code{logical} vector denoting whether to count positional k-mers
#' @return named \code{integer} vector with unigrams' counts 
#' @export
count_unigrams <- function(m, alphabet, pos) {
    .Call('_kmer_count_unigrams', PACKAGE = 'kmer', m, alphabet, pos)
}
piotr-ole/kmer documentation built on Nov. 14, 2019, 8:10 p.m.