Nothing
#' @title copy_detect
#'
#' @description Separates two or more gene copies from a single subset of short reads.
#'
#' @param filename A fasta file contains short reads from a single subset generated by "subset_downsize".
#'
#' @param copy_number An integer (e.g. 2,3, or 4) giving the anticipated number of gene copies in the input file.
#'
#' @param verbose Turn on (verbose=1; default) or turn off (verbose=0) the output.
#'
#' @return A fasta alignment of the anticipated number of gene copies.
#'
#' @importFrom seqinr read.fasta write.fasta
#'
#' @importFrom stringr str_sort
#'
#' @importFrom kmer otu
#'
#' @importFrom Biostrings readDNAStringSet
#'
#' @importFrom ape as.character.DNAbin read.FASTA
#'
#' @importFrom DECIPHER ConsensusSequence
#'
#' @importFrom beepr beep
#'
#' @examples
#' \dontrun{
#' copy_detect("inst/extdata/toysubset.fasta",2,1)
#' }
#'
#' @export copy_detect
#'
copy_detect<-function(filename, copy_number, verbose=1)
{
filename_short <- gsub("[:.:].*","", filename)
sink(paste0(filename_short, "_log.txt"), append=FALSE, split=TRUE) # begin to record log
error_log_function <- function() {
cat(geterrmessage(), file="Error_log.txt", append=T)
}
if (copy_number<=1) stop ("The anticipated copy number must be a number larger than one!")
Sub_set <- ape::as.character.DNAbin(ape::read.FASTA(file=filename, type = "DNA"))
if (verbose) { cat(paste0("Clustering analyses for ", filename,"\n"))}
# find the threshold range for OTU to find the major clusters (number=copy_number) for each subset
for (m in seq(0.3,1, by = 0.1)) {
Subset_OTU <- kmer::otu(Sub_set, k = 5, threshold = m, method = "central", nstart = 20)
if (verbose) { cat(paste0("threshold = ",m),"\n")}
if (verbose) { cat(unique(Subset_OTU),"\n")}
if (length(unique(Subset_OTU))>=copy_number) {break}
}
# try different threshold values in the range found above
if (length(unique(Subset_OTU))>copy_number) {
for (j in seq(m-0.09,m, by = 0.01)) {
Subset_OTU <- kmer::otu(Sub_set, k = 5, threshold = j, method = "central", nstart = 20)
if (verbose) {cat(paste0("threshold = ",j),"\n")}
if (verbose) {cat( unique(Subset_OTU),"\n")}
if (length(unique(Subset_OTU))>=copy_number) {break}
}
}
reads_each_cluster <- sapply(unique(Subset_OTU), function(x) length(which(Subset_OTU==x)))
cluster_DF <- cbind(as.data.frame(unique(Subset_OTU)), as.data.frame(reads_each_cluster))[order(-reads_each_cluster),]
names(cluster_DF) <- c("cluster_num","cluster_total")
if (verbose) {cat("Number of reads in each cluster\n")}
if (verbose) {cat(reads_each_cluster,"\n")}
for (l in (1:length(cluster_DF$cluster_num))){
Picked_cluster <- Sub_set[which(Subset_OTU==cluster_DF$cluster_num[l])]
if (verbose) {cat(paste0("Number of reads in picked cluster ",l, " = ", length(Picked_cluster),"\n"))}
# calcuate the consensus sequence for the clusters of the subset
seqinr::write.fasta(sequences = Picked_cluster, names = labels(Picked_cluster), file.out = paste0(filename_short,"_cluster_",l,".fasta"))
seqinr::write.fasta(sequences = as.character(DECIPHER::ConsensusSequence(Biostrings::readDNAStringSet(paste0(filename_short,"_cluster_",l,".fasta"),format="fasta",nrec=-1L, skip=0L),threshold = 0.4,
ambiguity = TRUE, noConsensusChar = "N")[1]),names = paste0(filename_short,"_cluster_",l,"_consensus"), file.out = paste0(filename_short,"_cluster_",l,"_consensus.fasta"))
}
# put together all the consensus sequences into one file
All_consensus <- lapply(1:copy_number, function (x) seqinr::read.fasta(file = paste0(filename_short,"_cluster_",x,"_consensus.fasta"), seqtype = "DNA",
as.string = TRUE,forceDNAtolower = FALSE,set.attributes = FALSE, whole.header = TRUE))
seqinr::write.fasta(sequences=All_consensus, names=names(as.data.frame(All_consensus)), file.out=paste0(filename_short,"_consensus_list.fasta"))
cat("Run finished!\n")
beepr::beep(sound = 1, expr = NULL) # make a sound when run finishes
options("error" = error_log_function)
sink() # turn off log
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.