##' convert 454 summary file for generateGapFile function
##'
##'
##' @title convert_summary_file
##' @param summary_file summary_file
##' @param out_file out_file
##' @return NULL
##' @importFrom utils read.csv
##' @export
##' @author Guangchuang Yu
convert_summary_file <- function(summary_file, out_file="summary2.csv") {
## in summary file, sequence name contain something like CK_GY_4994_2014,
## which is use to match read file, fa.txt.
## but in 454 runs, read file only contain information like RL18.
## this function is to convert the filed, CK_GY_4994_2014, to RL18.
sf <- read.csv(summary_file)
sf[, "name"] <- gsub("(RL\\d+)", "_\\1", sub("_.*", "", sf[, "name"]))
write.csv(sf, file=out_file)
}
##' @importFrom Biostrings toString
##' @importFrom Biostrings DNAStringSet
identityRatio <- function(aln) {
fa <- DNAStringSet(aln)
seqs <- sapply(fa, toString)
seqs <- sapply(seqs, strsplit, split="")
sum(seqs[[1]] == seqs[[2]])/ length(seqs[[1]])
}
getIdx <- function(queryID, sourceID) {
unlist(sapply(queryID, function(x) which(x == sourceID)))
}
pause <- function() {
## print("press ENTER to continue...")
readline()
invisible()
}
aln2seqDF <- function(aln) {
## seqs <- aln$seqs[,2]
seqs <- sapply(aln, toString)
seq.df <- do.call("rbind", strsplit(seqs, ""))
return(seq.df)
}
printFormatSeq <- function(seq, window=80) {
jj <- seq(1, nchar(seq), window)
if (jj[length(jj)] < nchar(seq)) {
jj <- c(jj, nchar(seq))
}
for (p in 1:(length(jj)-1)) {
cat(substring(seq, jj[p], jj[p+1]), "\n")
}
}
addFooter <- function(file) {
sink(file, append=TRUE)
cat("\n")
cat("## Info\n")
cat("\nThis Report was generated by `skleid` version", as.character(packageVersion("skleid")),
".\nContact [Guangchuang](mailto:gcyu@connect.hku.hk) if you need helps.\n")
sink()
}
getFiles <- function(path) {
ff <- list.files(path=path)
ff <- paste(path, ff, sep="/")
return(ff)
}
moveUnknownFile <- function(contig.folder) {
contig <- getFiles(contig.folder)
sc <- getSampleID(contig)
idx <- which(sc == contig)
ufolder <- "uknown"
if (length(idx) >= 1) {
moveFile(contig[idx], contig.folder, ufolder)
}
}
moveFile <- function(files, from, to) {
if (!file.exists(to))
dir.create(to)
for (ff in files) {
file.rename(ff, to=sub(from, to, ff))
}
}
moveEmptyFile <- function(folder) {
ff <- getFiles(folder)
ii <- which(file.info(ff)$size == 0)
if (length(ii) > 0) {
moveFile(ff[ii], folder, "empty")
}
}
getMixedFileIndex <- function(files) {
which(sapply(files, isMixed))
}
isMixed <- function(file) {
sg <- get_sid_gn(file)
prot <- gsub("[SRL]+\\d+([HNMP][APSB]\\d*)", "\\1", sg)
prot.cutoff <- c(rep(2000, 4), 1100, rep(3000, 3))
names(prot.cutoff) <- c("HA", "NA", "MP", "NP", "NS", "PA", "PB1", "PB2")
res <- file.info(file)$size > prot.cutoff[prot]
## NDV that is not exists
if (is.na(res))
return(TRUE)
return(res)
}
getSampleID <- function(contig) {
sg <- get_sid_gn(contig)
sc <- gsub("([SRL]+\\d+)[HNMP][APSB]\\d*.*", "\\1", sg)
return(sc)
}
get_sid_gn <- function(files) {
sc <- files
files <- sub(".*/", "", files)
idx <- grep("[SRL]+\\d+[HNMP][APSB]*\\d*_", files)
if (length(idx) > 0) {
sc[idx] <- gsub("([SRL]+\\d+[HNMP][APSB]*\\d*)_.*", replacement="\\1", files[idx])
}
sc <- gsub(".*_([SRL]+\\d+[HNMP][APSB]*\\d*).*", replacement="\\1", sc)
return(sc)
}
moveMixedFile <- function(contig.folder) {
contig <- getFiles(contig.folder)
f454 <- contig[grep("_454M.fa[sta]$", contig)]
sc <- getSampleID(contig)
idx <- getMixedFileIndex(f454)
mix <- unique(getSampleID(f454[idx]))
if (length(mix) >= 1) {
if (!file.exists("mixed"))
dir.create("mixed")
ii <- which(sc %in% mix)
moveFile(contig[ii], contig.folder, "mixed")
}
}
getMixedStrain <- function() {
mix.sc <- NULL
if (file.exists("mixed")) {
mix <- getFiles("mixed")
if (length(mix) > 0)
mix.sc <- getSampleID(mix)
if (length(mix.sc) == 0)
mix.sc <- NULL
}
return(mix.sc)
}
moveNDVFile <- function(contig.folder) {
## S38-Yellow-CK-JX-10226-2014_S38NDV_454.fas
## S38-Yellow-CK-JX-10226-2014_S38NDV_Contigs.fna
## S38-Yellow-CK-JX-10226-2014_S38NDV_mira_delX.fasta
## S38-Yellow-CK-JX-10226-2014_S38NDV_mira.fasta
contig <- getFiles(contig.folder)
idx <- grep(".*/.*[SRL]+\\d+NDV_.*", contig)
if (length(idx) >= 1) {
if (!file.exists("NDV"))
dir.create("NDV")
moveFile(contig[idx], contig.folder, "NDV")
}
}
## `%>%` <- function(x, FUN) FUN(x)
utils::globalVariables(".")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.